automaticly add p-values to facet plot

2019-03-04 07:55发布

问题:

I have made a facet plot below using the following command:

    ggplot(data, aes(factor(Length),logFC)),
 + geom_boxplot(fill = "grey90"),
 +  coord_cartesian(ylim=c(-5,5)) + facet_grid(X~Modification)

Is there a way to compute p-values for each boxplot and add them as geom_text above each boxplot. I want to compute a t-test and compare against y=0.

My data looks like this:

    X Length          logFC     Modification
 Daub     26    -0.7307060811           NTA
 Daub     22    -0.3325621272           NTA
 Daub     22    -2.0579390395           NTA
 Daub     25     2.7199391457           NTA
 Daub     23    -0.0009869389           NTA
 Daub     25    -0.3318842493           NTA
 ...

My error message:

> data <- structure(list(Experiment = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 
+                                                 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
+                                                 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
+                                                 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
+                                                 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
+                                                 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
+                                                 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
+                                                 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
+                                                 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
+                                                 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 
+                                                 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
+                                                 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
+                                                 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
+                                                 3L, 3L, 3L, 3L, 3L, 
+                                                 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
+                                                 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L), .Label = c("Daub", "Marie", 
+                                                                                                 "Meister"), class = "factor"), Length = c(26L, 22L, 22L, 25L, 
+                                                                                                                                           23L, 25L, 23L, 25L, 24L, 23L, 24L, 26L, 24L, 21L, 20L, 21L, 22L, 
+                                                   22L, 21L, 21L, 21L, 22L, 21L, 22L, 21L, 21L, 20L, 20L, 21L, 25L, 
+                                                                                                                                           20L, 22L, 24L, 22L, 23L, 24L, 23L, 23L, 22L, 22L, 22L, 22L, 21L, 
+                                                                                                                                           19L, 21L, 20L, 20L, 20L, 19L, 19L, 19L, 22L, 23L, 23L, 22L, 23L, 
+                                                                                                                                           22L, 20L, 21L, 24L, 24L, 24L, 25L, 24L, 21L, 20L, 23L, 23L, 20L, 
+                                                                                                                                           23L, 23L, 24L, 20L, 21L, 22L, 24L, 23L, 22L, 23L, 22L, 23L, 23L, 
+                                                                                                                                           19L, 21L, 23L, 24L, 22L, 23L, 23L, 21L, 22L, 20L, 22L, 23L, 25L, 
+                                                                                                                                           22L, 22L, 23L, 22L, 23L, 25L, 25L, 24L, 24L, 23L, 22L, 22L, 25L, 
+                                                                                                                                           23L, 24L, 23L, 23L, 22L, 22L, 25L, 23L, 22L, 25L, 21L, 19L, 21L, 
+                                                                                                                                           23L, 22L, 22L, 20L, 20L, 20L, 23L, 22L, 21L, 21L, 23L, 23L, 23L, 
+                                                                                                                                           21L, 25L, 23L, 24L, 24L, 23L, 23L, 23L, 21L, 22L, 21L, 21L, 23L, 
+                                                                                                                                           23L, 22L, 22L, 21L, 22L, 22L, 25L, 24L, 24L, 22L, 24L, 24L, 23L, 
+                                                                                                                                           22L, 21L, 22L, 23L, 20L, 22L, 23L, 24L, 25L, 24L, 25L, 22L, 23L, 
+                                                                                                                                           24L, 21L, 25L, 23L, 19L, 21L, 21L, 22L, 20L, 21L, 18L, 20L, 20L, 
+                                                                                                                                           21L, 20L, 23L, 19L, 19L, 22L, 22L, 22L, 22L, 22L, 21L, 22L, 24L, 
+                                                                                                                                           20L, 21L, 22L, 22L, 21L, 21L, 21L, 21L, 21L, 23L, 23L, 23L, 25L, 
+                                                                                                                                           25L, 25L, 23L, 24L, 24L, 24L, 24L, 24L, 24L, 25L, 25L), logFC = c(-0.7307060811, 
+                                                                                                                                                                                                             -0.3325621272, -2.0579390395, 2.7199391457, -0.0009869389, -0.3318842493, 
+                                                                                                                                                                                                             -2.1922199037, -1.8907961065, -1.9059255014, -0.2815081355, -0.2040330335, 
+                                                                                                                                                                                                             3.661469505, 0.6489955587, -0.0261245467, -1.4312409441, -1.1199604078, 
+                                                                                                                                                                                                             -1.6528592355, -2.8208936451, -0.7207549269, -1.6528592355, -1.2540377475, 
+                                                                                                                                                                                                             -2.1088724443, -2.1088724443, -1.5556550771, -1.5556550771, -0.2899601367, 
+                                                                                                                                                                                                             0.36449851, -1.7787723427, -1.5556550771, -1.5556550771, -1.5556550771, 
+                                                                                                                                                                                                             -2.1092566794, 0.0417776477, -3.0768675589, -4.2573082637, -1.5556550771, 
+                                                                                                                                                                                                             -1.8493703566, -0.7310899725, -2.8201262449, -0.7203706918, -2.1088724443, 
+                                                                                                                                                                                                             -3.5714106365, -1.5556550771, -1.2144625017, 1.6608916211, -0.3147141406, 
+                                                                                                                                                                                                             1.2344697053, 1.2303596917, 1.2138067782, 0.9409846988, 0.5270928206, 
+                                                                                                                                                                                                             -1.0435216994, -1.4320081419, -1.1644217165, -1.1478237529, -0.9941196613, 
+                                                                                                                                                                                                             0.0762668692, 1.0076747803, 0.0679302699, -0.4852244221, 0.7792467457, 
+                                                                                                                                                                                                             0.4902414285, 1.6172022872, 0.5270928206, -1.5403877099, -0.3322684844, 
+                                                                                                                                                                                                             0.0965099283, 0.8067662712, -0.3322684844, -1.2928579903, 0.6067208763, 
+                                                                                                                                                                                                             0.0247576412, -0.0291609233, -0.4737578429, 0.0743062433, 0.1126554177, 
+                                                                                                                                                                                                             -0.0156954476, 1.1069888258, -0.956482117, -0.2829742145, 0.8511530937, 
+                                                                                                                                                                                                             -0.1571780266, -1.2033199926, -1.1883052896, -0.0619556757, -0.7813018565, 
+                                                                                                                                                                                                             2.2467468049, 2.8382841074, 0.5658773933, -0.4461699001, -0.7409548873, 
+                                                                                                                                                                                                             -0.992979577, -1.0966445642, -0.8035321174, 0.4586171366, -0.2760821893, 
+                                                                                                                                                                                                             0.0585422656, 0.0328935437, 0.3858231436, -0.4374188039, 1.1166538873, 
+                                                                                                                                                                                                             -1.6539303789, 0.2027459981, -0.2193112677, -0.3939953745, -1.6726108643, 
+                                                                                                                                                                                                             1.1518720793, 2.2517568637, -0.561147283, -2.1625509666, -1.65562751, 
+                                                                                                                                                                                                             -0.9048469063, -1.0759388341, 0.4938537603, 1.8754485108, -1.5944759871, 
+                                                                                                                                                                                                             1.0688499798, 2.6559945275, -1.908097968, -1.9214219995, -2.9675169126, 
+                                                                                                                                                                                                             0.0365892303, -0.8345258687, -1.0535567925, -2.0036191122, -1.6843791204, 
+                                                                                                                                                                                                             -2.5554312825, -1.5778268888, -1.576142107, -0.9398408101, 2.4453250675, 
+                                                                                                                                                                                                             -1.5434092122, -0.794414515, -0.6200158513, 0.5556353409, -1.0772272444, 
+                                                                                                                                                                                                             -0.8720587283, -0.8082062813, -0.7353916189, 0.1072543637, 0.5658773933, 
+                                                                                                                                                                                                             0.13043531, -0.0154958912, -0.868710614, -0.1922496916, 1.0682890388, 
+                                                                                                                                                                                                             -1.673413308, -0.9581901784, -1.9575141988, -1.8973257122, 1.4967046965, 
+                                                                                                                                                                                                             -2.456068976, -1.4577030552, -4.2692094743, -1.9124787897, -1.4993411082, 
+                                                                                                                                                                                                             -0.6409837734, 0.6369441273, -0.9960964825, -5.9703084924, -1.97960268, 
+                                                                                                                                                                                                             -1.2422870608, -1.5170124157, -1.9021683731, 3.4029417731, 0.1812972171, 
+                                                                                                                                                                                                             -1.6370149729, -1.749015407, -2.1677341592, -1.4942545905, -1.1137758818, 
+                                                                                                                                                                                                             -1.2428452903, -1.3014446584, 0.0287537402, -0.8721416458, -2.4062762035, 
+                                                                                                                                                                                                             -4.0278899462, -2.2229120764, -1.5950383235, -3.6098212725, -2.5979636046, 
+                                                                                                                                                                                                             0.3631424981, 1.1377073609, 0.5151459494, 0.0640542096, -0.7715375264, 
+                                                                                                                                                                                                             -1.0361077101, -0.2462753448, -2.3058140776, -0.0847179004, -0.518970228, 
+                                                                                                                                                                                                             0.8519432911, 1.9516260022, -0.5706154628, 1.240812729, 0.336736001, 
+                                                                                                                                                                                                             2.2509464232, -0.322918086, -4.4019571741, -0.5618441487, 3.4700721641, 
+                                                                                                                                                                                                             -3.9220135953, -2.1968879291, -0.1362995026, 2.164094913, -1.0688563363, 
+                                                                                                                                                                                                             0.4302583643, 2.6411096027, -3.020513717, -1.5395519303, -2.2219591633, 
+                                                                                                                                                                                                             -3.8891956255, 0.9602784132, -0.6470571429, 1.853151793, -0.3271268741, 
+                                                                                                                                                                                                             -0.9870872828, -2.516770073, -1.2898235194, -1.7246627604, -0.61328192, 
+                                                                                                                                                                                                             -3.5457352204, -2.5068717697), Modification = structure(c(1L, 
+                                                                                                                                                                                                                                                                       1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 4L, 4L, 4L, 4L, 
+                                                                                                                                                                                                                                                                       4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 
+                                                                                                                                                                                                                                                                       5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
+                                                                                                                                                                                                                                                                       3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 
+                                                                                                                                                                                                                                                                       2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
+                                                                                                                                                                                                                                                                       1L, 1L, 1L, 1L, 1L, 1L, 1L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 
+                                                                                                                                                                                                                                                                       5L, 5L, 5L, 5L, 5L, 5L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
+                                                                                                                                                                                                                                                                       4L, 4L, 4L, 4L, 4L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
+                                                                                                                                                                                                                                                                       3L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
+                                                                                                                                                                                                                                                                       2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
+                                                                                                                                                                                                                                                                       5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 
+                                                                                                                                                                                                                                                                       4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 3L, 3L, 
+                                                                                                                                                                                                                                                                       3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 
+                                                                                                                                                                                                                                                                       2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("NTA", 
+                                                                                                                                                                                                                                                                                                                                           "t3-d", "t3-u", "t5-d", "t5-u"), class = "factor")), .Names = c("Experiment", 
+                                                                                                                                                                                                                                                                                                                                                                                                           "Length", "logFC", "Modification"), class = "data.frame", row.names = c(NA, 
+                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   -223L))
> library(dplyr)
> pvalues <- data %>% group_by(Experiment, Modification, Length) %>%
+ filter(n() > 1) %>%
+ summarize(p.value = (t.test(logFC, mu = 0)$p.value))
Error in t.test(logFC, mu = 0) : object 'logFC' not found

回答1:

You can do this by summarizing the data into a table of p-values. This can be done using dplyr:

library(dplyr)
pvalues <- data %>% group_by(Experiment, Modification, Length) %>%
    filter(n() > 1) %>%
    summarize(p.value = (t.test(logFC, mu = 0)$p.value))

(The line filter(n() > 1) is to get rid of any groups of size 1, for which a p-value cannot be calculated). This produces a table that looks like:

# Experiment Modification Length   p.value
# 1       Daub          NTA     22 0.3980043
# 2       Daub          NTA     23 0.3535590
# 3       Daub          NTA     24 0.5831962
# 4       Daub          NTA     25 0.9137644
# 5       Daub          NTA     26 0.6254004
# 6       Daub         t3-d     20 0.1493108

Now you can add that text to your plot using a geom_text layer, choosing some y such as y = 3:

library(ggplot2)

ggplot(data, aes(factor(Length),logFC)) + geom_boxplot(fill = "grey90") +
    coord_cartesian(ylim=c(-5,5)) + facet_grid(Experiment~Modification) +
    geom_text(aes(y = 3, label = p.value), data = pvalues, size = 1)

You will probably have to manipulate the size (and possibly angle) of your geom_text to make the plot readable. Note also that since you are performing many tests, you should probably look at the adjusted p-values rather than the raw p-values. You can compute that column with

pvalues <- pvalues %>% mutate(p.adjusted = p.adjust(p.value, method = "bonferroni"))

The function format.pval will also come in handy, especially if some of your p-values are close to 0.