R aggregate data in one column based on 2 other co

2020-02-07 07:53发布

问题:

So, I have these data given below, and my goal is to aggregate column v3 in terms of columns v1 and v2 and add the v3 values for each bin of v1 and v2. For example, the first line correspond to interval v1=21, v2=16, so the value of v3 will be aggregated over its (v1,v2) interval. And repeat this for the rest of rows. I want to use the mean as the aggregation function!

> df
         v1      v2     v3
1    21.359  16.234 24.283
2    47.340   9.184 21.328
3    35.363 -13.258 14.556
4   -29.888  14.154 17.718
5   -10.109 -16.994 20.200
6   -32.387   1.722 15.735
7    49.240  -5.266 17.601
8   -38.933   2.558 16.377
9    41.213   5.937 21.654
10  -33.287  -4.028 19.525
11  -10.223  11.961 16.756
12  -48.652  16.558 20.800
13   44.778  27.741 17.793
14  -38.546  29.708 13.948
15  -45.622   4.729 17.793
16  -36.290  12.383 18.014
17  -19.626  19.767 18.182
18  -32.248  29.480 15.108
19  -41.859  35.502  8.490
20  -36.058  21.191 16.714
21  -23.588   0.524 21.471
22  -24.423  39.963 18.257
23   -0.042 -45.899 17.654
24  -35.479  32.049  9.294
25  -24.632  20.603 17.757
26  -26.591  25.882 18.968
27  -34.364  43.959 13.905
28  -19.334  29.728 20.102
29   12.304 -39.997 17.002
30    0.958  37.162 20.779
31  -35.475 -40.611 14.719
32  -39.268  44.382 11.247
33  -10.154  39.053 19.458
34  -12.612  32.056 17.759
35    2.730  -1.473 20.228
36  -45.326 -52.299  9.305
37   -1.996 -15.551 13.295
38  -26.655 -37.319 19.148
39  -18.509 -30.047 18.889
40  -22.705 -25.577 19.007
41  -15.705 -15.397 19.112
42   -2.637   9.790 10.548
43  -14.107  -3.145 19.654
44  -29.272 -19.906 18.503
45   -9.569  -4.632 11.334
46    2.114  18.048 14.744
47   -4.241  16.073 15.420
48   31.869  -3.394 21.559
49   20.425  35.205 22.250
50  -18.605  -8.866 20.082
51  -26.677  -7.690 21.850
52   -5.240   4.805 11.399
53   -6.766   2.538  6.292
54    4.567  22.554 19.682
55  -20.701   6.430 20.996
56  -23.972  16.141 17.976
57   -6.651  24.048 18.082
58  -32.243  -6.100 19.517
59    2.236  29.736 19.667
60   18.830  15.586 15.969
61   -9.598  28.414 17.806
62  -30.825  12.194 22.346
63  -17.415  15.795 18.135
64  -14.823   5.931 17.915
65  -14.234  12.882 13.001
66    9.937  18.368 20.421
67  -38.766   9.590 21.648
68  -30.896  27.047 16.453
69   -4.432 -10.562 10.061
70   -4.290  33.170 22.942
71    7.285  41.416 23.906
72   24.411  40.531 23.584
73   45.409 -32.420 20.831
74   49.341 -34.047 15.269
75   -7.730 -47.724 21.692
76  -10.563 -29.082 17.984
77    4.412 -41.182 16.845
78   31.822 -37.297 19.665
79  -43.355  31.093 17.688
80  -44.353 -44.723 13.832
81  -16.961  38.438 20.715
82  -21.225 -39.244 18.156
83  -42.022  -8.686 20.362
84  -42.904 -25.498 18.394
85   43.822 -25.990 21.287
86   43.013  -9.071 19.285
87  -36.901 -24.185 21.938
88  -28.251 -36.583 19.330
89  -19.830 -22.412 21.677
90   -3.789 -15.663 17.439
91   40.453 -21.796 17.432
92  -40.778 -31.188 18.762
93  -27.072 -48.609 18.913
94  -18.035  -1.791 19.909
95  -20.781  -7.912 22.563
96   47.307 -15.432 19.101
97   30.700   5.097 22.801
98   46.453   0.171 17.810
99  -27.439  -5.860 22.626
100 -30.526 -18.007 23.219
101 -18.280 -15.187 25.302
102 -18.367   6.044 18.864
103  41.265  -1.686 22.743
104  29.227 -14.814 19.196
105 -36.080 -32.715 18.930
106   7.475   7.061 25.002
107 -18.586 -45.207 21.864
108  35.227  11.148 21.388
109  -7.581  38.773 22.048
110 -43.685  14.083 22.037
111 -29.533  39.735 17.613
112   8.760 -39.400 22.421
113 -14.962  24.624 12.030
114  18.627 -32.888 23.036
115 -31.300  33.612 15.608
116 -38.024  45.839 16.567
117 -15.104  36.893 18.162
118 -12.809 -23.029 21.589
119 -21.614  36.264 16.680
120  42.917 -36.838 18.738
121   6.104 -14.961 14.468
122  44.032 -41.556 17.618
123 -24.493  21.886 17.366
124 -24.361  29.941 14.374
125 -25.060  43.383 16.437
126  -6.017 -24.640 19.207
127 -32.617 -40.549 18.059
128 -43.285 -43.364 18.827
129 -29.856 -46.089 16.881
130 -16.547 -43.619 22.547
131 -16.257  42.814 18.932
132  -9.236 -11.694 14.455
133  13.488 -35.422 24.436
134 -47.456 -32.714 18.123
135  39.476 -28.008 16.087
136 -21.933 -43.522 15.390
137 -17.347 -38.250 16.738
138  -4.948 -39.747 21.598
139 -31.018 -28.912 21.332
140 -36.364  30.461 17.542
141 -39.639  18.272 23.663
142 -24.162 -13.582 19.136
143  -8.935 -32.699 22.108
144   0.001 -19.219 17.888
145  -6.912 -24.885 20.683
146   7.785 -31.229 15.972
147  22.176  -7.478 21.335
148   8.755 -13.323 20.831
149  44.081  41.160 11.938
150  -8.451 -37.721 17.465
151  18.671  -2.776 23.374
152  12.668 -26.749 18.071
153   1.582 -21.252 20.750
154  20.832 -27.718 16.190
155  44.220 -45.690 12.598
156  -0.226 -37.737 17.634
157 -25.130 -19.197 23.170
158   2.086 -31.271 18.180
159 -20.445 -33.083 19.984
160  23.801   1.116 24.230
161  18.283 -17.922 20.256
162 -38.985 -13.770 20.702
163 -26.264 -27.413 20.276
164  10.396 -19.375 20.415
165 -16.343 -22.847 16.516
166  29.992  -8.215 21.661
167  35.052 -19.475 16.953
168   3.052  -6.800 22.509
169 -10.350  -5.413 19.222
170  14.371 -10.383 23.471
171  11.896  -4.191 21.773
172  18.152   8.741 23.669
173  25.748 -47.786 18.578
174  31.613  -0.735 23.898
175  12.660  25.645 23.549
176   2.933  29.345 25.170
177   9.369  18.791 26.817
178  15.805   4.798 27.866
179  27.556 -25.571 14.796
180  -5.112  -7.835 21.201
181 -30.571   3.471 20.496
182  19.816 -22.114 21.210
183   2.826  47.437 22.911
184  25.488 -33.064 21.442
185  44.826  42.162 22.994
186  25.208 -48.487 25.325
187  14.635 -17.430 17.083
188  -1.901 -33.370 22.163
189  12.306 -47.265 20.052
190  42.552  35.750 23.213
191  37.318 -46.069 22.599
192   4.725 -22.289 21.600
193 -40.815 -37.793 17.371
194  11.890 -12.862 14.286
195  35.251 -31.746 17.816
196  27.121 -27.638 19.677
197  36.024 -39.105 20.202
198 -47.119  41.940 17.526
199   0.837 -40.694 23.063
200  23.797 -39.795 20.198
201 -42.859 -21.372 23.554
202  39.407 -20.211 21.246
203  25.782 -18.892 20.423
204  34.529  -9.576 20.411
205  44.397 -13.247 23.180
206   5.534   6.856 14.248
207  31.598 -18.085 22.350
208   7.250  -0.481 15.453
209 -43.458 -15.204 23.193
210 -38.296 -31.524 21.776
211   4.276  -3.483 12.145
212  25.757 -11.708 22.360
213  15.634  37.478 24.624
214 -43.669  -3.197 20.742
215  45.381   6.365 21.351
216 -38.755  -6.877 20.879
217  -6.925   3.994 21.120
218   8.059  12.831 26.032
219   3.572  22.105 26.920
220  16.042  30.267 21.039
221  26.629  13.042 23.633
222 -12.126  -0.151 21.261
223 -11.981  24.600 19.236
224  29.480  28.362 21.838
225  -2.500  22.858 23.177
226 -41.163  19.863 20.059
227  35.953  27.401 19.101
228 -16.641  13.248 17.984
229  -3.778  14.090 18.943
230  11.643  34.817 21.621
231  34.921  38.666 17.359
232  25.621  22.451 22.866
233  34.936  17.384 19.836
234  40.017  37.599 13.987
235  19.547  33.838 22.575
236  11.197  39.977 19.347
237  16.972 -33.927 14.205
238  22.938  38.064 20.351
239  40.234  18.672 23.030
240  -0.846  42.320 18.383
241 -11.437  18.284 16.502
242  19.552  43.222 21.370
243  13.925 -46.486 18.917
244  41.709 -39.559 16.143
245  19.014 -44.563 17.796
246  32.260  33.114 18.402
247  -4.693  29.228 18.622
248  21.765 -38.452 15.147
249  39.157 -31.135 19.800
250  32.638  46.241 18.943
251   2.797  10.089 21.330
252   8.256  46.910 18.834
253  38.634  -2.429 20.413
254  28.642   2.763 19.580
255   0.456   1.422  7.452
256   3.050  11.792 14.196
257  24.736  14.532 17.886
258  16.787 -10.155 18.607
259  12.676  11.651 18.656
260  13.184   1.081 15.385
261  27.365  26.576 25.486
262  -7.878 -18.191 14.547
263 -42.112  32.576 20.865
264  15.069  21.684 17.986
265  33.045  27.166 25.252
266  21.810  -0.186 19.477
267  18.227  26.690 20.415
268  33.759  18.366 21.255
269  39.491  13.272 23.036
270  30.662   9.368 20.192
271   5.470  35.303 22.685
272  21.663 -44.343 20.999
273  31.261  33.178 24.335
274  21.854  22.665 20.876
275  21.853   7.932 18.588
276 -40.168   3.682 19.642
277 -42.292  23.997 22.199
278  10.233  28.731 21.263
279  17.745  41.831 19.536
280  38.406  25.165 26.534
281 -49.329  -0.465 20.887
282  40.398  -8.120 21.362
283  -2.531  46.118 22.933
284   7.959 -30.856 20.497
285 -34.467 -23.724 22.206
286  30.541  44.284 25.878
287  45.682  29.897 21.964
288 -22.251  -0.089 20.756
289  21.484  16.532 23.513
290  46.912  10.195 21.908
291  35.320 -13.352 16.102
292 -30.431  14.048 17.362
293  -8.976 -17.325 21.645
294 -32.661   2.301 16.805
295  49.317  -5.509 17.711
296 -37.756   4.459 16.054
297  41.445   6.158 21.442
298 -33.148  -3.499 19.543
299 -10.065  12.238 16.649
300 -48.323  17.153 20.974
301  45.010  28.147 17.838
302 -39.630  29.183 13.254
303 -45.191   5.065 18.214
304 -35.936  11.953 16.540
305 -19.816  19.624 18.279
306 -32.055  29.757 15.358
307 -41.533  36.169 10.005
308 -35.448  20.960 16.720
309 -23.384   0.511 20.005
310 -25.101  40.569 18.180
311  -0.547 -45.779 17.603
312 -35.291  32.643  9.548
313 -25.109  20.826 17.494
314 -26.202  27.012 18.678
315 -34.805  43.850 14.006
316 -18.819  30.611 20.309
317  13.019 -40.248 16.874
318  -0.655  37.112 20.924
319 -34.142 -41.553 15.237
320 -39.509  43.886 12.464
321  -9.491  38.639 18.839
322 -12.164  31.977 17.598
323   3.437  -1.596 20.318
324 -45.713 -52.599  9.918
325  -2.062 -15.946 12.847
326 -27.435 -37.600 18.257
327 -18.094 -29.624 18.791
328 -22.647 -26.123 18.746
329 -16.775 -15.505 19.204
330  -2.628   9.599 11.219
331 -15.718  -1.797 19.491
332 -29.476 -20.107 17.485
333 -10.618  -4.938 12.227
334   1.423  17.458 14.706
335  -4.503  16.630 14.718
336  32.450  -2.029 21.591
337  20.529  35.464 21.630
338 -19.348  -7.844 19.464
339 -26.760  -6.856 21.422
340  -4.539   4.393 11.819
341  -5.741   1.934  7.121
342   4.781  21.919 18.908
343 -19.797   6.928 20.928
344 -24.555  16.834 19.796
345  -5.664  24.465 18.432
346 -32.891  -6.571 18.691
347   2.354  28.462 19.825
348  18.058  16.251 16.335
349  -9.603  28.582 17.743
350 -31.282  11.454 22.342
351 -17.580  16.428 18.401
352 -13.884   6.206 17.270
353 -13.631  13.767 11.761
354   9.712  18.008 18.896
355 -37.987   9.024 21.309
356 -29.969  27.506 16.964
357  -4.248 -10.813  9.284
358  -5.755  32.673 22.541
359   6.675  41.952 24.227
360  24.564  41.173 23.241
361  45.314 -32.299 20.778
362 -45.890 -33.510 16.314
363  -8.277 -47.943 21.573
364 -11.044 -29.464 17.708
365   3.972 -41.396 17.411
366  31.776 -36.643 19.998
367 -43.072  31.311 17.828
368 -45.805 -43.071 14.477
369 -15.628  39.837 19.709
370 -21.129 -39.101 18.814
371 -41.628  -8.980 19.850
372 -42.244 -23.659 18.856
373  44.149 -25.710 21.099
374  42.623  -9.185 20.147
375 -35.949 -23.979 22.255
376 -28.512 -36.367 19.378
377 -19.827 -21.781 21.621
378  -3.429 -15.706 18.677
379  39.741 -20.721 18.670
380 -41.663 -29.499 19.260
381 -26.931 -48.467 18.185
382 -17.571  -1.467 19.770
383 -20.039  -7.591 22.737
384  46.370 -14.790 19.922
385  30.710   4.167 22.987
386  46.755   0.417 18.088
387 -27.293  -4.398 22.168
388 -30.364 -17.573 23.869
389 -16.870 -14.893 25.817
390 -18.152   6.546 18.392
391  40.134   0.160 23.661
392  28.179 -14.323 19.301
393 -35.907 -32.647 19.306
394   8.486   7.101 24.551
395 -17.155 -45.435 22.745
396  34.226  10.748 19.773
397  -7.760  38.754 22.211
398 -42.899  13.804 22.628
399 -29.972  40.435 17.784
400   8.764 -39.195 22.070
401 -15.624  25.585 12.291
402  18.620 -33.314 23.282
403 -30.436  34.219 15.102
404 -37.665  44.955 15.257
405 -15.861  37.488 18.956
406 -13.375 -22.408 20.312
407 -20.972  36.906 17.387
408  43.162 -35.948 19.695
409   6.639 -15.783 14.608
410  44.186 -41.037 17.398
411 -23.917  22.236 18.702
412 -23.957  30.033 14.725
413 -25.056  43.824 15.489
414  -6.795 -24.375 18.537
415 -33.485 -40.651 17.538
416 -43.186 -43.071 17.481
417 -30.325 -46.122 16.440
418 -17.489 -43.551 22.006
419 -16.376  43.928 18.992
420  -9.076 -10.921 14.131
421  13.704 -36.352 23.812
422 -47.302 -31.918 18.719
423  39.459 -27.814 15.558
424 -22.509 -42.660 14.366
425 -17.920 -37.614 16.572
426  -5.780 -39.212 21.667
427 -30.519 -28.942 21.931
428 -35.937  31.435 17.106
429 -38.680  18.435 23.342
430 -24.796 -13.279 18.543
431  -9.283 -32.388 21.895
432   0.493 -19.505 17.276
433  -7.046 -25.243 20.741
434   7.884 -32.006 16.727
435  22.451  -7.834 21.082
436   8.379 -13.690 22.002
437  43.730  41.697 11.894
438  -9.040 -38.086 17.500
439  18.831  -2.759 23.252
440  12.732 -27.410 18.948
441   0.739 -21.091 21.354
442  20.339 -27.959 16.514
443  44.688 -46.449 12.356
444  -0.402 -36.951 17.891
445 -24.790 -18.139 23.337
446   2.173 -30.577 18.023
447 -18.995 -33.799 20.730
448  23.372   0.223 24.855
449  17.835 -17.372 19.878
450 -38.915 -13.815 20.923
451 -26.241 -27.800 19.877
452  11.074 -18.156 19.249
453 -16.478 -22.928 16.386
454  29.646  -8.349 21.115
455  33.910 -20.809 16.629
456   3.306  -6.830 22.059
457 -10.512  -5.322 19.876
458  14.024 -10.406 23.456
459  12.365  -3.699 21.818
460  18.186   8.532 23.951
461  25.140 -47.653 18.592
462  32.288  -2.117 23.423
463  10.836  24.937 23.310
464   4.531  28.913 25.238
465   9.944  18.397 26.661
466  16.274   4.852 27.837
467  27.316 -26.007 15.934
468  -4.508  -8.010 20.906
469 -29.858   2.412 19.958
470  20.376 -21.957 21.306
471   2.077  47.431 23.248
472  25.777 -33.367 21.695
473  44.854  42.801 22.904
474  25.356 -48.833 25.402
475  15.322 -16.926 17.318
476  -2.656 -33.400 20.365
477  11.950 -47.390 20.328
478  42.961  36.955 22.919
479  35.726 -45.402 24.272
480   4.675 -21.758 21.780
481 -40.568 -36.931 16.934
482  11.758 -12.859 14.206
483  35.483 -31.760 16.975
484  27.336 -27.577 19.429
485  36.689 -39.218 19.668
486 -46.357  41.618 17.456
487   0.002 -40.589 22.558
488  23.525 -39.918 21.247
489 -43.269 -21.304 22.699
490  40.191 -20.594 21.145
491  25.728 -18.024 20.298
492  34.964 -10.441 20.189
493  43.627 -13.279 23.038
494   5.766   6.876 14.077
495  32.432 -18.172 21.848
496   7.087  -1.122 15.098
497 -44.110 -14.034 23.080
498 -39.474 -31.289 22.312
499   4.118  -4.077 11.067
500  26.597 -11.667 22.641

so, using these commands I can find the intervals, as below x.bin <- seq(floor(min(d[,1])), ceiling(max(df[,1])), by=2) y.bin <- seq(floor(min(d[,2])), ceiling(max(df[,2])), by=2)

> x.bin
[1] -50 -48 -46 -44 -42 -40 -38 -36 -34 -32 -30 -28 -26 -24 -22 -20 -18 -16 -14
[20] -12 -10  -8  -6  -4  -2   0   2   4   6   8  10  12  14  16  18  20  22  24
[39]  26  28  30  32  34  36  38  40  42  44  46  48  50
> y.bin
[1] -53 -51 -49 -47 -45 -43 -41 -39 -37 -35 -33 -31 -29 -27 -25 -23 -21 -19 -17
[20] -15 -13 -11  -9  -7  -5  -3  -1   1   3   5   7   9  11  13  15  17  19  21
[39]  23  25  27  29  31  33  35  37  39  41  43  45  47

But, then I don't know how to assign each row of the raw data (df) to each x.bin and y.bin and calculate the aggregate (sum) of each bin.

回答1:

library(plyr) 

#I am using cut function with 50 breaks for both v1 and v2 and ddply from plyr package for computing the mean

newdata<-ddply(df,.(cut(v1,50),cut(v2,50)),summarise,mean.v3=mean(v3))
    > head(newdata)
        cut(v1, 50)   cut(v2, 50) mean.v3
    1 (-49.4,-47.5] (-34.7,-32.7]  18.123
    2 (-49.4,-47.5] (-0.576,1.43]  20.887
    3 (-49.4,-47.5]   (15.5,17.5]  20.887
    4 (-47.5,-45.5] (-52.7,-50.7]   9.918
    5 (-47.5,-45.5] (-44.7,-42.7]  14.477
    6 (-47.5,-45.5] (-34.7,-32.7]  16.314

Updated as per the comments: If you want the lower, middle and mid-points, you can use the following function or use with details as follow(you need to use the sub function to deal with ( and ]):

    df$newv1<-with(df,cut(v1,50)) 
    df$newv2<-with(df,cut(v2,50))
    df$lowerv1<-with(df,as.numeric( sub("\\((.+),.*", "\\1", newv1))) #lower value
    df$upperv1<-with(df,as.numeric( sub("[^,]*,([^]]*)\\]", "\\1", newv1))) # upper value
    df$midv1<-with(df,(lowerv1+upperv1)/2) #mid value
    df$lowerv2<-with(df,as.numeric( sub("\\((.+),.*", "\\1",newv2))) #lower value
    df$upperv2<-with(df,as.numeric( sub("[^,]*,([^]]*)\\]", "\\1", newv2))) # upper value
    df$midv2<-with(df,(lowerv2+upperv2)/2)#mid value
    newdata<-ddply(df,.(newv1,newv2),transform,mean.v3=mean(v3))

   > head(newdata)
       v1      v2     v3         newv1         newv2 lowerv1 upperv1  midv1 lowerv2 upperv2   midv2 mean.v3
1 -47.456 -32.714 18.123 (-49.4,-47.5] (-34.7,-32.7]   -49.4   -47.5 -48.45 -34.700  -32.70 -33.700  18.123
2 -49.329  -0.465 20.887 (-49.4,-47.5] (-0.576,1.43]   -49.4   -47.5 -48.45  -0.576    1.43   0.427  20.887
3 -48.652  16.558 20.800 (-49.4,-47.5]   (15.5,17.5]   -49.4   -47.5 -48.45  15.500   17.50  16.500  20.887
4 -48.323  17.153 20.974 (-49.4,-47.5]   (15.5,17.5]   -49.4   -47.5 -48.45  15.500   17.50  16.500  20.887
5 -45.713 -52.599  9.918 (-47.5,-45.5] (-52.7,-50.7]   -47.5   -45.5 -46.50 -52.700  -50.70 -51.700   9.918
6 -45.805 -43.071 14.477 (-47.5,-45.5] (-44.7,-42.7]   -47.5   -45.5 -46.50 -44.700  -42.70 -43.700  14.477


标签: r 2d aggregate bin