I am trying to convert a tidy table (eg. example below) into a nested list using R/tidyverse. Using some tidyverse magic I was able to convert it to a list nested of depth three, but I cannot figure out how to nest it deeper.
Take the following example input:
library(tidyverse)
library(stringi)
n_patient = 2
n_samples = 3
n_readgroup = 4
n_mate = 2
df = data.frame(patient = rep(rep(LETTERS[1:n_patient], n_samples),2),
sample = rep(rep(seq(1:n_samples), each = n_patient),2),
readgroup = rep(stri_rand_strings(n_patient * n_samples * n_readgroup, 6, '[A-Z]'),2),
mate = rep(1:n_mate, each = n_patient * n_samples * n_readgroup)) %>%
mutate(file = sprintf("%s.%s.%s_%s", patient, sample, readgroup, mate)) %>%
arrange(file)
json = df %>%
nest(-patient, .key = samples) %>%
mutate(samples = map(samples, nest, -sample, .key=readgroups))
jsonlite::toJSON(json, pretty = T)
Which looks for example like this
> head(df)
patient sample readgroup mate file
1 A 1 FCSDRJ 1 A.1.FCSDRJ_1
2 A 1 FCSDRJ 2 A.1.FCSDRJ_2
3 A 1 IAXDPR 1 A.1.IAXDPR_1
4 A 1 IAXDPR 2 A.1.IAXDPR_2
5 A 1 MLDBKZ 1 A.1.MLDBKZ_1
6 A 1 MLDBKZ 2 A.1.MLDBKZ_2
The output looks like this:
[
{
"patient": "A",
"samples": [
{
"sample": 1,
"readgroups": [
{
"readgroup": "FCSDRJ",
"mate": 1,
"file": "A.1.FCSDRJ_1"
},
{
"readgroup": "FCSDRJ",
"mate": 2,
"file": "A.1.FCSDRJ_2"
},
{
"readgroup": "IAXDPR",
"mate": 1,
"file": "A.1.IAXDPR_1"
},
{
"readgroup": "IAXDPR",
"mate": 2,
"file": "A.1.IAXDPR_2"
},
{
"readgroup": "MLDBKZ",
"mate": 1,
"file": "A.1.MLDBKZ_1"
},
{
"readgroup": "MLDBKZ",
"mate": 2,
"file": "A.1.MLDBKZ_2"
},
{
"readgroup": "OMTWHK",
"mate": 1,
"file": "A.1.OMTWHK_1"
},
{
"readgroup": "OMTWHK",
"mate": 2,
"file": "A.1.OMTWHK_2"
}
]
},
{
"sample": 2,
"readgroups": [
{
"readgroup": "BHAEFA",
"mate": 1,
"file": "A.2.BHAEFA_1"
},
{
"readgroup": "BHAEFA",
"mate": 2,
"file": "A.2.BHAEFA_2"
},
{
"readgroup": "DIBRHT",
"mate": 1,
"file": "A.2.DIBRHT_1"
},
{
"readgroup": "DIBRHT",
"mate": 2,
"file": "A.2.DIBRHT_2"
},
{
"readgroup": "HHMOSV",
"mate": 1,
"file": "A.2.HHMOSV_1"
},
{
"readgroup": "HHMOSV",
"mate": 2,
"file": "A.2.HHMOSV_2"
},
{
"readgroup": "KJXTPN",
"mate": 1,
"file": "A.2.KJXTPN_1"
},
{
"readgroup": "KJXTPN",
"mate": 2,
"file": "A.2.KJXTPN_2"
}
]
},
{
"sample": 3,
"readgroups": [
{
"readgroup": "CHXJMM",
"mate": 1,
"file": "A.3.CHXJMM_1"
},
{
"readgroup": "CHXJMM",
"mate": 2,
"file": "A.3.CHXJMM_2"
},
{
"readgroup": "MDWRBS",
"mate": 1,
"file": "A.3.MDWRBS_1"
},
{
"readgroup": "MDWRBS",
"mate": 2,
"file": "A.3.MDWRBS_2"
},
{
"readgroup": "RHHKGK",
"mate": 1,
"file": "A.3.RHHKGK_1"
},
{
"readgroup": "RHHKGK",
"mate": 2,
"file": "A.3.RHHKGK_2"
},
{
"readgroup": "VVVJFD",
"mate": 1,
"file": "A.3.VVVJFD_1"
},
{
"readgroup": "VVVJFD",
"mate": 2,
"file": "A.3.VVVJFD_2"
}
]
}
]
},
{
"patient": "B",
"samples": [
{
"sample": 1,
"readgroups": [
{
"readgroup": "QAFCOS",
"mate": 1,
"file": "B.1.QAFCOS_1"
},
{
"readgroup": "QAFCOS",
"mate": 2,
"file": "B.1.QAFCOS_2"
},
{
"readgroup": "TJYYMQ",
"mate": 1,
"file": "B.1.TJYYMQ_1"
},
{
"readgroup": "TJYYMQ",
"mate": 2,
"file": "B.1.TJYYMQ_2"
},
{
"readgroup": "YMHWOI",
"mate": 1,
"file": "B.1.YMHWOI_1"
},
{
"readgroup": "YMHWOI",
"mate": 2,
"file": "B.1.YMHWOI_2"
},
{
"readgroup": "ZOMSBU",
"mate": 1,
"file": "B.1.ZOMSBU_1"
},
{
"readgroup": "ZOMSBU",
"mate": 2,
"file": "B.1.ZOMSBU_2"
}
]
},
{
"sample": 2,
"readgroups": [
{
"readgroup": "CZWHXP",
"mate": 1,
"file": "B.2.CZWHXP_1"
},
{
"readgroup": "CZWHXP",
"mate": 2,
"file": "B.2.CZWHXP_2"
},
{
"readgroup": "MIMMNH",
"mate": 1,
"file": "B.2.MIMMNH_1"
},
{
"readgroup": "MIMMNH",
"mate": 2,
"file": "B.2.MIMMNH_2"
},
{
"readgroup": "RCWMQY",
"mate": 1,
"file": "B.2.RCWMQY_1"
},
{
"readgroup": "RCWMQY",
"mate": 2,
"file": "B.2.RCWMQY_2"
},
{
"readgroup": "WDMLHE",
"mate": 1,
"file": "B.2.WDMLHE_1"
},
{
"readgroup": "WDMLHE",
"mate": 2,
"file": "B.2.WDMLHE_2"
}
]
},
{
"sample": 3,
"readgroups": [
{
"readgroup": "DWITMU",
"mate": 1,
"file": "B.3.DWITMU_1"
},
{
"readgroup": "DWITMU",
"mate": 2,
"file": "B.3.DWITMU_2"
},
{
"readgroup": "GCLWMA",
"mate": 1,
"file": "B.3.GCLWMA_1"
},
{
"readgroup": "GCLWMA",
"mate": 2,
"file": "B.3.GCLWMA_2"
},
{
"readgroup": "QZZKQB",
"mate": 1,
"file": "B.3.QZZKQB_1"
},
{
"readgroup": "QZZKQB",
"mate": 2,
"file": "B.3.QZZKQB_2"
},
{
"readgroup": "WJKGRB",
"mate": 1,
"file": "B.3.WJKGRB_1"
},
{
"readgroup": "WJKGRB",
"mate": 2,
"file": "B.3.WJKGRB_2"
}
]
}
]
}
]
Which is great, except that I also want to nest by "mate" (and in theory nest by any number of variables, without any depth limitation). Any suggestions how to accomplish this?
Thanks!
1st step is to notice you're doing it backward, this gives the same output as yours but is simpler:
Then we can expand it:
output:
And if necessary, generalize it:
output: