可以将文章内容翻译成中文,广告屏蔽插件可能会导致该功能失效(如失效,请关闭广告屏蔽插件后再试):
问题:
I have a situation where I have two data frames I would like to join. The table
params
describes the param for a unit in terms time and angle ranges. The table data
is longer and contains id, time and angle parameters.
I would like to join the param value from params
when id match and time is in the range between valid_from and valid_to and ang is between angle_begin angle_end in the data
table.
Below is an example of the tables.
params <- data.frame(id = 1:4
,valid_from = 1
,valid_to = c(10, 20, 30, 40)
,angle_begin = c(120, 90, 0, 50)
,angle_end = c(180, 170, 160, 150)
,param = c("A", "B", "C", "D"))
data <- data.frame(id = rep(1:4, each=100)
,time = rep(seq(from = 0.5, to = 50, by = 0.5), 4)
,ang = rep(runif(100, 0, 360), 4))
回答1:
with tidyverse
, you can try something like:
data %>%
inner_join(params) %>%
filter( time > valid_from & time < valid_to) %>%
filter( ang > angle_begin & ang < angle_end)
回答2:
With data.table
this is a non-equi join:
library(data.table)
# coerce to data.table
setDT(params)
setDT(data)
# keep only rows of data with matches in params
data[params,
on = .(id, time >= valid_from, time <= valid_to, ang >= angle_begin, ang <= angle_end),
.(id, time = x.time, ang = x.ang, param)]
id time ang param
1: 1 2.0 140.383052 A
2: 1 3.5 152.772925 A
3: 1 8.0 141.039548 A
4: 2 1.0 104.434264 B
5: 2 2.0 140.383052 B
6: 2 3.5 152.772925 B
7: 2 8.0 141.039548 B
8: 2 16.0 150.424306 B
9: 2 16.5 92.201187 B
10: ...
41: 4 22.0 89.813795 D
42: 4 22.5 131.004229 D
43: 4 26.0 79.839443 D
44: 4 27.5 128.291356 D
45: 4 29.0 127.942287 D
46: 4 30.0 136.388594 D
47: 4 32.0 140.092817 D
48: 4 32.5 108.346831 D
49: 4 37.0 140.732844 D
id time ang param
If all rows of data
should be kept
params[data,
on = .(id, valid_from <= time, valid_to >= time, angle_begin <= ang, angle_end >= ang),
.(id, time = i.time, ang = i.ang, param)]
id time ang param
1: 1 0.5 106.62639 NA
2: 1 1.0 104.43426 NA
3: 1 1.5 15.77429 NA
4: 1 2.0 140.38305 A
5: 1 2.5 322.31929 NA
---
396: 4 48.0 131.17405 NA
397: 4 48.5 335.47857 NA
398: 4 49.0 181.64450 NA
399: 4 49.5 90.96224 NA
400: 4 50.0 60.04268 NA
回答3:
Given your wording, I interpret your question differently. That is, I read that you want to keep all rows but join only when the values of valid_
and angle_
are within the specified range.
Also note that depending on whether you want to include values at the boundaries of the ranges you may need >=
and <=
instead of >
and <
.
Thus, starting from Aramis7d's answer:
data %>%
inner_join(params, by = "id") %>%
mutate(param = ifelse(
time >= valid_from & time <= valid_to &
ang >= angle_begin & ang <= angle_end,
param,
NA))
回答4:
Another tidyverse way of doing this is using the fuzzyjoin
package.
library(tidyverse)
library(fuzzyjoin)
params <- data.frame(id = 1:4
,valid_from = 1
,valid_to = c(10, 20, 30, 40)
,angle_begin = c(120, 90, 0, 50)
,angle_end = c(180, 170, 160, 150)
,param = c("A", "B", "C", "D"))
data <- data.frame(id = rep(1:4, each=100)
,time = rep(seq(from = 0.5, to = 50, by = 0.5), 4)
,ang = rep(runif(100, 0, 360), 4))
fuzzy_left_join(data, params,
by = c("id" = "id",
"time" = "valid_from",
"time" = "valid_to",
"ang" = "angle_begin",
"ang" = "angle_end"),
match_fun = list(`==`, `>`, `<`, `>`, `<`)) %>%
slice(1:10)
Credit to mfherman.