Adding legend and structuring data for ggplot

2019-08-15 03:26发布

问题:

In the data included below I have three sites (AAA,BBB,CCC) and individuals within each site (7, 12, 7 respectively). For each individual I have observed values (ObsValues) and three sets of predicted values each with a standard error. I have 26 rows (i.e. 26 individuals) and 9 columns.

The data is included here through dput()

help <- structure(list(StudyArea = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 
1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L), .Label = c("AAA", "BBB", "CCC"), class = "factor"), 
    Ind = structure(1:26, .Label = c("AAA_F01", "AAA_F17", "AAA_F33", 
    "AAA_F49", "AAA_F65", "AAA_F81", "AAA_F97", "BBB_P01", "BBB_P02", 
    "BBB_P03", "BBB_P04", "BBB_P05", "BBB_P06", "BBB_P07", "BBB_P08", 
    "BBB_P09", "BBB_P10", "BBB_P11", "BBB_P12", "CCC_F02", "CCC_F03", 
    "CCC_F04", "CCC_F05", "CCC_F06", "CCC_F07", "CCC_F08"), class = "factor"), 
    ObsValues = c(22L, 50L, 8L, 15L, 54L, 30L, 11L, 90L, 6L, 
    53L, 9L, 42L, 72L, 40L, 60L, 58L, 1L, 20L, 37L, 2L, 50L, 
    68L, 20L, 19L, 58L, 5L), AAAPred = c(28L, 52L, 6L, 15L, 35L, 
    31L, 13L, 79L, 6L, 58L, 5L, 42L, 88L, 49L, 68L, 60L, 1L, 
    26L, 46L, 0L, 34L, 71L, 20L, 15L, 35L, 5L), AAAPredSE = c(3.5027829, 
    4.7852191, 1.231803, 2.5244013, 4.873907, 3.8854192, 2.3532752, 
    6.3444402, 1.7387295, 5.605111, 1.667818, 4.4709107, 7.0437967, 
    5.447496, 6.0840486, 5.4371275, 0.8156916, 3.5153847, 4.698754, 
    0, 3.8901103, 5.993616, 3.1720272, 2.6777869, 4.5647313, 
    1.4864128), BBBPred = c(14L, 43L, 5L, 13L, 26L, 32L, 14L, 
    80L, 5L, 62L, 4L, 44L, 67L, 44L, 55L, 42L, 1L, 20L, 47L, 
    0L, 26L, 51L, 15L, 16L, 34L, 6L), BBBPredSE = c(3.1873435, 
    4.8782831, 1.3739863, 2.5752273, 4.4155679, 3.8102168, 2.3419518, 
    6.364606, 1.7096028, 5.6333421, 1.5861323, 4.4951428, 6.6046699, 
    5.302902, 5.9244328, 5.1887055, 0.8268689, 3.4014041, 4.6600598, 
    0, 3.8510512, 5.5776686, 3.0569531, 2.6358433, 4.5273782, 
    1.4263518), CCCPred = c(29L, 53L, 7L, 15L, 44L, 32L, 15L, 
    86L, 8L, 61L, 5L, 46L, 99L, 54L, 74L, 67L, 1L, 30L, 51L, 
    1L, 37L, 94L, 21L, 17L, 36L, 6L), CCCPredSE = c(3.4634488, 
    4.7953389, 0.9484051, 2.5207022, 5.053452, 3.8072731, 2.2764727, 
    6.3605968, 1.6044067, 5.590048, 1.6611899, 4.4183913, 7.0124638, 
    5.6495918, 6.1091934, 5.4797929, 0.8135164, 3.4353934, 4.6261147, 
    0.8187396, 3.7936333, 5.6512378, 3.1686123, 2.633179, 4.5841921, 
    1.3989955)), .Names = c("StudyArea", "Ind", "ObsValues", 
"AAAPred", "AAAPredSE", "BBBPred", "BBBPredSE", "CCCPred", "CCCPredSE"
), class = "data.frame", row.names = c(NA, -26L))

The head() and dim() of help are below too

    head(help)
  StudyArea     Ind ObsValues AAAPred AAAPredSE BBBPred BBBPredSE CCCPred CCCPredSE
1       AAA AAA_F01        22      28  3.502783      14  3.187343      29 3.4634488
2       AAA AAA_F17        50      52  4.785219      43  4.878283      53 4.7953389
3       AAA AAA_F33         8       6  1.231803       5  1.373986       7 0.9484051
4       AAA AAA_F49        15      15  2.524401      13  2.575227      15 2.5207022
5       AAA AAA_F65        54      35  4.873907      26  4.415568      44 5.0534520
6       AAA AAA_F81        30      31  3.885419      32  3.810217      32 3.8072731

dim(help)
> dim(help)
[1] 26  9

I am a relative newcomer to ggplot and am trying to make a plot that displays the observed and predicted values for each individual with a different color for each StudyArea. I can manually add points and force the color with the code below, however this feel rather clunky and also does not produce a legend as I have not specified color in aes().

require(ggplot2)
ggplot(help, aes(x=Ind, y=ObsValues))+
    geom_point(color="red", pch = "*", cex = 10)+
    geom_point(aes(y = AAAPred), color="blue")+
    geom_errorbar(aes(ymin=AAAPred-AAAPredSE, ymax=AAAPred+AAAPredSE), color = "blue")+
    geom_point(aes(y = BBBPred), color="darkgreen")+
    geom_errorbar(aes(ymin=BBBPred-BBBPredSE, ymax=BBBPred+BBBPredSE), color = "darkgreen")+
    geom_point(aes(y = CCCPred), color="black")+
    geom_errorbar(aes(ymin=CCCPred-CCCPredSE, ymax=CCCPred+CCCPredSE), color = "black")+
    theme(axis.text.x=element_text(angle=30, hjust=1))

In the figure above, the asterisks are the observed values and the values are the predicted values, one from each StudyArea.

I tried to melt() the data, but ran into more problems plotting. That being said, I suspect melt()ing or reshape()ing is the best option.

Any suggestions on how to best alter/restructure the help data so that I can plot the observed and predicted values for each individual with a different color for each StudyArea would be greatly appreciated.

I also hope to produce a legend - the likely default once the data is correctly formatted

Note: Indeed the resulting figure is very busy will likely be simplified once I get a better handle on ggplot.

thanks in advance.

回答1:

Try this:

library(reshape2)
x.value <- melt(help,id.vars=1:3, measure.vars=c(4,6,8))
x.se    <- melt(help,id.vars=1:3, measure.vars=c(5,7,9))
gg      <- data.frame(x.value,se=x.se$value)
ggplot(gg)+
  geom_point(aes(x=Ind, y=ObsValues),size=5,shape=18)+
  geom_point(aes(x=Ind, y=value, color=variable),size=3, shape=1)+
  geom_errorbar(aes(x=Ind, ymin=value-se, ymax=value+se, color=variable))+
  theme(axis.text.x=element_text(angle=-90))

Produces this:

Edit:: Response to @B.Davis' questions below:

You have to group the ObsValues by StudyArea, not variable. But when you do that you get six colors, three for StudyArea and three for the predictor groups (variable). If we give the predictor groups (e.g., AAAPred, etc.) the same names as the StudyArea groups (e.g. AAA, etc.), then ggplot just generates three colors.

gg$variable <- substring(gg$variable,1,3)   # removes "Pred" from group names
ggplot(gg)+
  geom_point(aes(x=Ind, y=ObsValues, color=StudyArea),size=5,shape=18)+
  geom_point(aes(x=Ind, y=value, color=variable),size=3, shape=1)+
  geom_errorbar(aes(x=Ind, ymin=value-se, ymax=value+se, color=variable))+
  theme(axis.text.x=element_text(angle=-90))

Produces this:



回答2:

Similar to @jlhoward solution but I choose to treat ObsValues as a variable to get it in the legend.

help <- dat
x.value <- melt(help,id.vars=1:2, measure.vars=c(3,4,6,8))
x.se    <- melt(help,id.vars=1:2, measure.vars=c(3,5,7,9))
gg      <- data.frame(x.value,se=x.se$value)
ggplot(gg)+
    geom_point(aes(x=Ind, y=value, color=variable),size=3, shape=1)+
    geom_errorbar(data= subset(gg,variable!='ObsValues'),
           aes(x=Ind, ymin=value-se, ymax=value+se, color=variable))+
    theme(axis.text.x=element_text(angle=-90))



回答3:

This is a little clumsy, but gets you what you want:

# jlhoward's melting is more elegant.
require(reshape2)
melted.points<-melt(help[,c('Ind','ObsValues','AAAPred','BBBPred','CCCPred')])
melted.points$observed<-ifelse(melted.points$variable=='ObsValues','observed','predicted')
melted.points.se<-melt(help[,c('Ind','AAAPredSE','BBBPredSE','CCCPredSE')])
melted.points.se$variable<-gsub('SE','',melted.points.se$variable,)
help2<-merge(melted.points,melted.points.se,by=c('Ind','variable'),all.x=TRUE)
help2<-rename(help2,c(value.x='value',value.y='se'))

And now the actual plot:

ggplot(help2,aes(x=Ind,y=value,color=variable,size=observed,shape=observed,ymin=value-se,ymax=value+se)) + 
  geom_point() +
  geom_errorbar(size=1) +
  scale_colour_manual(values = c("red","blue","darkgreen", "black")) + 
  scale_size_manual(values=c(observed=4,predicted=3)) +
  scale_shape_manual(values=c(observed=8,predicted=16))



标签: r ggplot2