How to Make a Grouped Barplot for a Factor with Ma

2019-03-06 15:19发布

问题:

The dataframe named 'temp' (below) has three columns (1) Canopy Index; (2) Under_tree; and (3) Open_Canopy. The columns Under_tree and Open_Canopy are factors with 5 levels each.

         data(temp)

            Canopy_index  Under_tree Open_Canopy
          1           75 Undergrowth       Grass
          2           85      Litter       Grass
          3           75      Litter       Grass
          4           35      Litter       Grass
          5           85 Undergrowth       Grass

The dataframe 'temp' was reformatted to be in long format named df.melt (below) to produce a barplot where the y-axis is denoted as Canopy_index and the x-axis represents the factor Topography (3rd column) which has five levels grouped by two conditions (2nd column - Under_Open):(1) Under_tree; and (2) Open_Canopy.

Problem

I would like to produce a barplot that looks like example 1 (below) with a key for: (1) Under the Canopy; and (2) Open Canopy.

In order to produce the barplot, I attempted to used ggplot; however, the results appear to be erroneous because each bar is exactly the same height (example 2). In summary, I am unsure how to fix the error.

If anyone can help, then many thanks in advance.

Code to convert the dataframe into long format:

   ##response variable = y = Canopy Index
   ##explanatory variables = under and open
   ##Reshape the data to produce one column with under and open

     library(reshape2)
     library(ggplot2)

     under<-factor(temp$Under_tree)
     open<-factor(temp$Open_Canopy)

     data(df.melt)

     df.melt <- melt(temp, id="Canopy_index")

     colnames(df.melt)<-c("Canopy_Index", "Under_Open", "Topography")

             Canopy_Index Under_Open  Topography
           1           75 Under_tree Undergrowth
           2           85 Under_tree      Litter
           3           75 Under_tree      Litter
           4           35 Under_tree      Litter
           5           85 Under_tree Undergrowth

Code for barplot

 ##Grouped barplot showing the topography grouped by under_tree and Open_Canopy

 ggplot(df.melt, aes(x=Topography, y=Canopy_Index,      fill=factor(Under_Open)))+
 geom_bar(stat="identity",position="dodge")+
 scale_fill_discrete(name="Topographical Feature",
                  breaks=c(1, 2),
                  labels=c("Open_Canopy", "Under_tree"))+
 xlab("Topographical Feature")+ylab("Canopy Index")

EXAMPLE (1):

EXAMPLE (2)

DATAFRAME (temp)

       structure(list(Canopy_index = c(75, 85, 75, 35, 85, 95, 85, 65, 
       75, 95, 75, 95, 85, 75, 85, 95, 75, 85, 85, 85, 75, 75, 85, 85, 
       65, 85, 75, 85, 95, 95, 85, 55, 75, 95, 75, 95, 95, 65, 65, 55, 
       95, 85, 85, 45, 85, 85, 35, 95, 85, 85, 35, 85, 45, 85, 85, 85, 
       95, 85, 85, 75, 85, 35, 85, 85, 65, 65, 85, 45, 55, 95, 75, 95, 
       45, 75, 75, 95, 95, 85, 75, 95, 75, 65, 85, 75, 75, 55, 75, 85, 
       85, 85, 15, 75, 85, 85, 85, 95, 85, 85, 75, 85, 85, 95, 65, 75, 
       95, 55, 75, 85, 85, 85, 95, 55, 85, 75, 75, 85, 85, 85, 85, 55, 
       75, 55, 75, 85, 75, 85, 85, 75, 85, 75, 95, 25, 95, 95, 25, 75, 
       75, 85, 35, 55, 85, 65, 85, 75, 85, 85, 85, 75, 65, 85, 85, 95, 
       65, 55, 95, 95, 85, 95, 85, 65, 55, 65, 55, 95, 75, 85, 85, 35, 
       75, 75, 85, 65, 85, 65, 65, 95, 85, 95, 75, 75, 55, 95, 65, 85, 
       65, 15, 35, 55, 95, 15, 15, 75, 65, 85, 5, 5, 35, 35, 85, 65, 
       45, 35, 65, 65, 75, 65, 15, 75, 65, 45, 25, 65, 85, 45, 85, 75, 
       15, 65, 45, 55, 45, 15, 45, 75, 65, 75, 65, 35, 95, 65, 35, 35, 
       65, 45, 75, 35, 75, 85, 35, 55, 65, 85, 65, 65, 85, 55, 15, 75, 
       65, 45, 45, 85, 55, 15, 85, 15, 95, 75, 5, 55, 15, 35, 45, 85, 
       65, 65, 65, 65, 25, 85, 35, 55, 65, 75, 5, 45, 65, 15, 75, 55, 
       65, 55, 35, 75, 65, 65, 85, 35, 65, 55, 75, 15, 55, 65, 75, 55, 
       85, 35, 55, 55, 25, 75, 15, 55, 75, 75, 65, 55, 45, 75, 25, 45, 
       95, 55, 75, 45, 25, 35, 55, 15, 15, 75, 35, 55, 55, 65, 45, 65, 
       25, 55, 45, 65, 65, 25, 25, 65, 45, 95, 55, 25, 55, 85, 45, 85, 
       15, 75, 65, 35, 75, 15, 55, 85, 35, 55, 45, 85, 45, 65, 55, 75, 
       65, 85), Under_tree = structure(c(6L, 5L, 5L, 5L, 6L, 4L, 6L, 
       5L, 5L, 5L, 4L, 6L, 3L, 6L, 4L, 6L, 4L, 5L, 6L, 5L, 5L, 3L, 5L, 
       6L, 5L, 5L, 6L, 4L, 6L, 5L, 4L, 4L, 5L, 4L, 5L, 4L, 6L, 6L, 4L, 
       4L, 4L, 4L, 3L, 4L, 4L, 4L, 4L, 4L, 6L, 5L, 5L, 6L, 4L, 6L, 4L, 
       4L, 6L, 6L, 6L, 6L, 6L, 6L, 4L, 5L, 5L, 4L, 4L, 4L, 4L, 5L, 3L, 
       4L, 5L, 5L, 5L, 4L, 4L, 5L, 6L, 4L, 4L, 5L, 4L, 5L, 6L, 6L, 4L, 
       4L, 4L, 5L, 4L, 6L, 4L, 4L, 5L, 4L, 6L, 5L, 5L, 4L, 6L, 5L, 6L, 
       4L, 3L, 6L, 6L, 6L, 3L, 5L, 6L, 6L, 6L, 5L, 5L, 3L, 4L, 4L, 6L, 
       4L, 3L, 5L, 6L, 4L, 2L, 5L, 5L, 5L, 5L, 6L, 5L, 4L, 4L, 4L, 4L, 
       6L, 5L, 6L, 6L, 4L, 6L, 6L, 4L, 5L, 4L, 6L, 5L, 6L, 6L, 5L, 6L, 
       6L, 4L, 5L, 4L, 5L, 4L, 6L, 5L, 4L, 6L, 3L, 3L, 4L, 4L, 4L, 4L, 
       3L, 4L, 5L, 4L, 5L, 4L, 5L, 6L, 4L, 5L, 4L, 4L, 6L, 4L, 4L, 6L, 
       6L, 5L, 5L, 5L, 4L, 4L, 6L, 5L, 5L, 5L, 4L, 6L, 3L, 4L, 5L, 4L, 
       4L, 5L, 6L, 5L, 5L, 3L, 5L, 6L, 6L, 5L, 6L, 6L, 4L, 4L, 5L, 5L, 
       4L, 5L, 4L, 5L, 5L, 4L, 4L, 5L, 4L, 3L, 4L, 5L, 5L, 3L, 5L, 5L, 
       5L, 6L, 4L, 6L, 4L, 4L, 5L, 5L, 5L, 5L, 6L, 2L, 4L, 4L, 5L, 4L, 
       4L, 6L, 4L, 3L, 5L, 5L, 5L, 5L, 5L, 4L, 4L, 5L, 6L, 4L, 4L, 5L, 
       4L, 5L, 4L, 4L, 5L, 5L, 6L, 5L, 3L, 6L, 5L, 5L, 6L, 5L, 6L, 6L, 
       5L, 4L, 6L, 6L, 5L, 4L, 4L, 5L, 6L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 
       5L, 4L, 5L, 4L, 6L, 4L, 5L, 5L, 5L, 4L, 2L, 5L, 5L, 5L, 6L, 5L, 
       5L, 5L, 4L, 6L, 4L, 3L, 6L, 5L, 6L, 6L, 5L, 6L, 6L, 4L, 5L, 5L, 
       6L, 5L, 5L, 4L, 5L, 5L, 6L, 5L, 6L, 4L, 4L, 5L, 4L, 3L, 3L, 4L, 
       4L, 3L, 6L, 4L, 3L, 6L, 4L, 5L, 4L, 4L, 5L, 5L, 4L, 4L, 4L, 4L, 
       4L, 6L, 5L), .Label = c("", "Artificial_Surface", "Bare_soil", 
       "Grass", "Litter", "Undergrowth"), class = "factor"),      Open_Canopy = structure(c(4L, 
       4L, 4L, 4L, 4L, 4L, 4L, 5L, 4L, 5L, 6L, 3L, 4L, 4L, 4L, 4L, 4L, 
       4L, 4L, 4L, 5L, 4L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 6L, 4L, 4L, 3L, 
       4L, 4L, 4L, 6L, 6L, 4L, 4L, 4L, 3L, 4L, 4L, 4L, 4L, 4L, 3L, 6L, 
       2L, 6L, 6L, 4L, 4L, 4L, 4L, 4L, 5L, 4L, 6L, 4L, 5L, 4L, 4L, 4L, 
       4L, 4L, 4L, 4L, 4L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 6L, 4L, 4L, 4L, 
       5L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 3L, 6L, 4L, 4L, 4L, 3L, 
       4L, 3L, 4L, 4L, 4L, 3L, 4L, 4L, 6L, 3L, 4L, 4L, 4L, 4L, 6L, 6L, 
       4L, 4L, 4L, 4L, 4L, 5L, 4L, 4L, 5L, 4L, 4L, 2L, 4L, 5L, 4L, 4L, 
       4L, 4L, 4L, 4L, 4L, 4L, 6L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
       4L, 4L, 6L, 4L, 6L, 6L, 4L, 4L, 6L, 4L, 4L, 4L, 4L, 6L, 4L, 4L, 
       4L, 3L, 4L, 4L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
       4L, 4L, 6L, 4L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 
       4L, 3L, 4L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 4L, 3L, 4L, 4L, 5L, 
       4L, 2L, 4L, 4L, 6L, 4L, 4L, 5L, 4L, 6L, 6L, 4L, 4L, 3L, 3L, 4L, 
       4L, 5L, 5L, 2L, 5L, 2L, 6L, 6L, 4L, 4L, 4L, 4L, 4L, 5L, 4L, 6L, 
       6L, 2L, 4L, 4L, 6L, 4L, 4L, 4L, 3L, 4L, 3L, 5L, 5L, 4L, 4L, 4L, 
       4L, 4L, 4L, 4L, 4L, 5L, 4L, 4L, 4L, 4L, 4L, 3L, 4L, 4L, 4L, 3L, 
       6L, 5L, 5L, 4L, 3L, 4L, 5L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 3L, 2L, 
       4L, 6L, 4L, 4L, 6L, 5L, 4L, 4L, 4L, 4L, 5L, 4L, 4L, 6L, 4L, 4L, 
       2L, 4L, 5L, 4L, 4L, 4L, 5L, 4L, 4L, 4L, 4L, 4L, 6L, 4L, 4L, 4L, 
       4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 6L, 4L, 4L, 
       4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 3L, 4L, 4L, 6L, 4L, 4L, 4L, 4L, 
       4L, 4L, 4L, 4L, 4L, 4L, 3L, 4L, 3L), .Label = c("",      "Artificial_Surface", 
       "Bare_soil", "Grass", "Litter", "Undergrowth"), class =    "factor")), .Names = c("Canopy_index", 
        "Under_tree", "Open_Canopy"), row.names = c(1L, 2L, 3L, 4L, 5L, 
        6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 
       19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L, 29L, 30L, 31L, 
       32L, 33L, 34L, 35L, 36L, 37L, 38L, 39L, 40L, 41L, 42L, 43L, 44L, 
       45L, 46L, 47L, 48L, 49L, 50L, 51L, 52L, 53L, 54L, 55L, 56L, 57L, 
       58L, 59L, 60L, 61L, 62L, 63L, 64L, 65L, 66L, 67L, 68L, 69L, 70L, 
       71L, 72L, 73L, 74L, 75L, 76L, 77L, 78L, 79L, 80L, 81L, 82L, 83L, 
       84L, 85L, 86L, 87L, 88L, 89L, 90L, 91L, 92L, 93L, 94L, 95L, 96L, 
       97L, 99L, 100L, 101L, 102L, 103L, 104L, 105L, 106L, 107L, 108L, 
      109L, 110L, 111L, 112L, 113L, 114L, 115L, 116L, 117L, 118L, 119L, 
      120L, 121L, 122L, 123L, 124L, 125L, 126L, 127L, 128L, 129L, 130L, 
      131L, 132L, 133L, 134L, 135L, 136L, 137L, 138L, 139L, 140L, 141L, 
      142L, 143L, 144L, 145L, 146L, 147L, 148L, 149L, 150L, 151L, 152L, 
      153L, 154L, 155L, 156L, 157L, 158L, 159L, 160L, 161L, 162L, 163L, 
      164L, 165L, 166L, 167L, 168L, 169L, 170L, 171L, 172L, 173L, 174L, 
      175L, 176L, 177L, 178L, 179L, 180L, 181L, 182L, 183L, 184L, 185L, 
      186L, 187L, 188L, 189L, 190L, 191L, 192L, 193L, 194L, 195L, 196L, 
      197L, 198L, 199L, 200L, 201L, 202L, 203L, 204L, 205L, 206L, 207L, 
      208L, 209L, 210L, 211L, 212L, 213L, 214L, 215L, 216L, 217L, 218L, 
      219L, 220L, 221L, 222L, 223L, 224L, 225L, 226L, 227L, 228L, 229L, 
      230L, 231L, 232L, 233L, 234L, 235L, 236L, 237L, 238L, 239L, 240L, 
      241L, 242L, 243L, 244L, 245L, 246L, 247L, 248L, 249L, 250L, 251L, 
      252L, 253L, 254L, 255L, 256L, 257L, 258L, 259L, 260L, 261L, 262L, 
      263L, 264L, 265L, 266L, 267L, 268L, 269L, 270L, 271L, 272L, 273L, 
      274L, 275L, 276L, 277L, 278L, 279L, 280L, 281L, 282L, 283L, 284L, 
      285L, 286L, 287L, 288L, 289L, 290L, 291L, 292L, 293L, 294L, 295L, 
      296L, 297L, 298L, 299L, 300L, 301L, 302L, 303L, 304L, 305L, 306L, 
      307L, 308L, 309L, 310L, 311L, 312L, 313L, 314L, 315L, 316L, 317L, 
      318L, 319L, 320L, 321L, 322L, 323L, 324L, 325L, 326L, 327L, 328L, 
      329L, 330L, 331L, 332L, 333L, 334L, 335L, 336L, 337L, 338L, 339L, 
      340L, 341L, 342L, 343L, 344L, 345L, 346L, 347L, 348L, 349L, 350L, 
      351L, 352L, 353L, 354L, 355L, 356L, 357L, 358L, 359L, 360L, 361L, 
      362L, 363L), class = "data.frame")

回答1:

Assuming you want to plot means of Canopy_Index for each Under_Open, Topography cell, you can form means first:

df.means <- aggregate(Canopy_Index ~ Under_Open + Topography, df.melt, mean)

Then, plot df.means using the code from your question:

ggplot(df.means, aes(x=Topography, y=Canopy_Index, fill=Under_Open)) +
  geom_bar(stat="identity", position="dodge") +
  scale_fill_discrete(name="Canopy Type",
         labels=c("Under_tree"="Under Canopy", "Open_Canopy"="Open Canopy")) +
  xlab("Topographical Feature") + ylab("Canopy Index")

Result:

The reason why the bars are currently almost all of the same height is that you overlay multiple values per cell (as pointed out in the comments by Marijn Stevering), effectively plotting the max:

df.max <- aggregate(Canopy_Index ~ Under_Open + Topography, df.melt, max)
# Under_Open         Topography Canopy_Index
# 1   Under_tree Artificial_Surface           75
# 2  Open_Canopy Artificial_Surface           95
# 3   Under_tree          Bare_soil           95
# 4  Open_Canopy          Bare_soil           95
# 5   Under_tree              Grass           95
# 6  Open_Canopy              Grass           95
# 7   Under_tree             Litter           95
# 8  Open_Canopy             Litter           95
# 9   Under_tree        Undergrowth           95
# 10 Open_Canopy        Undergrowth           95