I have the following list of list. It contains two variables: pair and genes. The contain of pair
is always vector with two strings. And the variable genes
is a vector which can contain more than 1 values.
lol <- list(structure(list(pair = c("BoneMarrow", "Pulmonary"), genes = "PRR11"), .Names = c("pair",
"genes")), structure(list(pair = c("BoneMarrow", "Umbilical"),
genes = "GNB2L1"), .Names = c("pair", "genes")), structure(list(
pair = c("Pulmonary", "Umbilical"), genes = "ATP1B1"), .Names = c("pair",
"genes")))
lol
#> [[1]]
#> [[1]]$pair
#> [1] "BoneMarrow" "Pulmonary"
#>
#> [[1]]$genes
#> [1] "PRR11"
#>
#>
#> [[2]]
#> [[2]]$pair
#> [1] "BoneMarrow" "Umbilical"
#>
#> [[2]]$genes
#> [1] "GNB2L1"
#>
#>
#> [[3]]
#> [[3]]$pair
#> [1] "Pulmonary" "Umbilical"
#>
#> [[3]]$genes
#> [1] "ATP1B1"
How can I convert it into this dataframe:
pair1 pair2 genes_vec
BoneMarrow Pulmonary PRR11
BoneMarrow Umbilical GNB2L1
Pulmonary Umbilical ATP1B1
Note that the genes
variable is a vector not single string.
My best try is this which doesn't give what I want:
> do.call(rbind, lapply(lol, data.frame, stringsAsFactors=FALSE))
pair genes
1 BoneMarrow PRR11
2 Pulmonary PRR11
3 BoneMarrow GNB2L1
4 Umbilical GNB2L1
5 Pulmonary ATP1B1
6 Umbilical ATP1B1
Update:
With new example to show vector content of genes
lol2 <- list(structure(list(pair = c("BoneMarrow", "Pulmonary"), genes = c("GNB2L1",
"PRR11")), .Names = c("pair", "genes")), structure(list(pair = c("BoneMarrow",
"Umbilical"), genes = "GNB2L1"), .Names = c("pair", "genes")),
structure(list(pair = c("Pulmonary", "Umbilical"), genes = "ATP1B1"), .Names = c("pair",
"genes")))
lol2
#> [[1]]
#> [[1]]$pair
#> [1] "BoneMarrow" "Pulmonary"
#>
#> [[1]]$genes
#> [1] "GNB2L1" "PRR11"
#>
#>
#> [[2]]
#> [[2]]$pair
#> [1] "BoneMarrow" "Umbilical"
#>
#> [[2]]$genes
#> [1] "GNB2L1"
#>
#>
#> [[3]]
#> [[3]]$pair
#> [1] "Pulmonary" "Umbilical"
#>
#> [[3]]$genes
#> [1] "ATP1B1"
The expected output is:
pair1 pair2 genes_vec
BoneMarrow Pulmonary PRR11,GNB2L1
BoneMarrow Umbilical GNB2L1
Pulmonary Umbilical ATP1B1
Using tidyverse
, you could use purrr
to help you
library(dplyr)
library(purrr)
tibble(
pair = map(lol, "pair"),
genes_vec = map_chr(lol, "genes")
) %>%
mutate(
pair1 = map_chr(pair, 1),
pair2 = map_chr(pair, 2)
) %>%
select(pair1, pair2, genes_vec)
#> # A tibble: 3 x 3
#> pair1 pair2 genes_vec
#> <chr> <chr> <chr>
#> 1 BoneMarrow Pulmonary PRR11
#> 2 BoneMarrow Umbilical GNB2L1
#> 3 Pulmonary Umbilical ATP1B1
with the second example, just replace map_chr(lol, "genes")
with map(lol2, "genes")
as you want to keep a nested dataframe with a list column.
tibble(
pair = map(lol2, "pair"),
genes_vec = map(lol2, "genes")
) %>%
mutate(
pair1 = map_chr(pair, 1),
pair2 = map_chr(pair, 2)
) %>%
select(pair1, pair2, genes_vec)
#> # A tibble: 3 x 3
#> pair1 pair2 genes_vec
#> <chr> <chr> <list>
#> 1 BoneMarrow Pulmonary <chr [2]>
#> 2 BoneMarrow Umbilical <chr [1]>
#> 3 Pulmonary Umbilical <chr [1]>
And a more generic approach would be to work with nested tibbles and unnest them as needed
library(dplyr)
library(purrr)
library(tidyr)
tab1 <-lol %>%
transpose() %>%
as_tibble() %>%
mutate(pair = map(pair, ~as_tibble(t(.x)))) %>%
mutate(pair = map(pair, ~set_names(.x, c("pair1", "pair2"))))
tab1
#> # A tibble: 3 x 2
#> pair genes
#> <list> <list>
#> 1 <tibble [1 x 2]> <chr [1]>
#> 2 <tibble [1 x 2]> <chr [1]>
#> 3 <tibble [1 x 2]> <chr [1]>
For lol2
nothing changes unless the list lol2
instead of lol1
tab2 <- lol2 %>%
transpose() %>%
as_tibble() %>%
mutate(pair = map(pair, ~as_tibble(t(.x)))) %>%
mutate(pair = map(pair, ~set_names(.x, c("pair1", "pair2"))))
tab2
#> # A tibble: 3 x 2
#> pair genes
#> <list> <list>
#> 1 <tibble [1 x 2]> <chr [2]>
#> 2 <tibble [1 x 2]> <chr [1]>
#> 3 <tibble [1 x 2]> <chr [1]>
You can then unnest what the column you want
tab1 %>%
unnest()
#> # A tibble: 3 x 3
#> genes pair1 pair2
#> <chr> <chr> <chr>
#> 1 PRR11 BoneMarrow Pulmonary
#> 2 GNB2L1 BoneMarrow Umbilical
#> 3 ATP1B1 Pulmonary Umbilical
tab2 %>%
unnest(pair)
#> # A tibble: 3 x 3
#> genes pair1 pair2
#> <list> <chr> <chr>
#> 1 <chr [2]> BoneMarrow Pulmonary
#> 2 <chr [1]> BoneMarrow Umbilical
#> 3 <chr [1]> Pulmonary Umbilical
EDIT: updated to work with vector lol2.
Maybe like this:
as.data.frame(do.call(rbind,lapply(lol2, function(x) {c(unlist(x[1]),gene=paste(unlist(x[2]),collapse=","))})),stringsAsFactors = F)
pair1 pair2 genes
1 BoneMarrow Pulmonary GNB2L1, PRR11
2 BoneMarrow Umbilical GNB2L1
3 Pulmonary Umbilical ATP1B1
> lol1 <- data.frame(t(sapply(lol,c)))
> as.data.frame(t(apply(lol1, 1, unlist)))
pair1 pair2 genes
1 BoneMarrow Pulmonary PRR11
2 BoneMarrow Umbilical GNB2L1
3 Pulmonary Umbilical ATP1B1
This should work:
data.frame(do.call(rbind,lol2))
data.frame(do.call(rbind,lol2))
pair genes
1 BoneMarrow, Pulmonary GNB2L1, PRR11
2 BoneMarrow, Umbilical GNB2L1
3 Pulmonary, Umbilical ATP1B1
The same way you treat the genes as a vector is the same way you can the same way you can treat the pairs as a vector.. instead of pair 1 and 2 you just use both of them.