ggplot Stacked Barplot with Negative Values

2019-09-10 01:10发布

问题:

I've been perusing DOT airline data and am trying to create a stacked bar graph of the year over year (YOY) change in each airline's passengers from a specific airport to all other stations.

I also want to order the x-axis by the total number of people (market.ppd) traveling from the specified airport to each station (e.g. This set's origin airport is PHL, and its top destination is MCO. Next is Miami, LAS, etc.)

The x-axis stays ordered when the YOY data is solely positive or negative but defaults back to alphabetical order once I try to stack the bar with both. Some stations only experience a positive YOY change or a negative YOY change, whereas the example in this post has positive and negative values for each category.

My hunch is ggplot reverts the levels to alphabetical order once it finds that some of the stations don't have corresponding positive/negative values. Is there any way to retain the ordered levels once I append the negative values to the positive for each station?

Plot with only positive values

Plot with both positive and negative values

library(ggplot2)

OD <- data.frame(
      destination = c('MCO', 'MCO', 'MCO', 'MCO', 'MCO', 'MCO', 'MCO', 'Miami', 'Miami', 'Miami', 'Miami', 'Miami', 'Miami', 'LAS', 'LAS', 'LAS', 'LAS', 'Chicago', 'Chicago', 'Chicago', 'Chicago', 'Chicago', 'Chicago', 'Los Angeles', 'Los Angeles', 'Bay Area', 'Bay Area', 'Bay Area', 'Bay Area', 'Bay Area', 'BOS', 'BOS', 'BOS', 'ATL', 'ATL', 'ATL', 'ATL', 'ATL', 'ATL', 'TPA', 'TPA', 'TPA', 'TPA', 'Dallas', 'Dallas', 'Dallas', 'DEN', 'DEN', 'DEN', 'PHX', 'PHX', 'PHX', 'PHX', 'PHX', 'CUN', 'CUN', 'RSW', 'RSW', 'RSW', 'SAN', 'SAN', 'SJU', 'Houston', 'Houston', 'MSY', 'MSP', 'MSP', 'CLT', 'CLT', 'CLT', 'MBJ', 'MBJ', 'PUJ', 'PUJ', 'PUJ'),
      carrier = c('US', 'F9', 'WN', 'AA', 'FL', 'UA', 'DL', 'F9', 'US', 'DL', 'WN', 'UA', 'FL', 'AA', 'US', 'UA', 'NK', 'US', 'NK', 'F9', 'WN', 'UA', 'AA', 'WN', 'VX', 'US', 'DL', 'AA', 'VX', 'UA', 'US', 'B6', 'AA', 'US', 'WN', 'F9', 'DL', 'AA', 'FL', 'US', 'F9', 'WN', 'UA', 'DL', 'WN', 'US', 'US', 'WN', 'DL', 'US', 'WN', 'AA', 'DL', 'UA', 'US', 'F9', 'US', 'WN', 'UA', 'US', 'AA', 'AA', 'DL', 'WN', 'DL', 'F9', 'DL', 'F9', 'US', 'UA', 'AA', 'US', 'AA', 'F9', 'US'),
      market.ppd = c(1242, 1242, 1242, 1242, 1242, 1242, 1242, 1056, 1056, 1056, 1056, 1056, 1056, 645, 645, 645, 645, 641, 641, 641, 641, 641, 641, 526, 526, 498, 498, 498, 498, 498, 492, 492, 492, 482, 482, 482, 482, 482, 482, 478, 478, 478, 478, 399, 399, 399, 333, 333, 333, 298, 298, 298, 298, 298, 243, 243, 232, 232, 232, 213, 213, 205, 198, 198, 173, 163, 163, 160, 160, 160, 152, 152, 147, 147, 147),
      YOY = c(110, 96, 26, 15, -39, -23, -18, 52, 47, 11, -48, -22, -10, 8, -49, -11, -6, 15, 10, 8, 8, -12, -9, 9, -56, 35, 8, 6, -32, -12, 9, 7, 6, 47, 43, 16, 8, 7, -34, 44, 39, 8, -9, 13, 7, -28, 21, 7, 6, 37, 7, 6, -10, -7, 16, 9, 60, -37, -6, 19, -9, 6, 9, -6, -6, 16, -7, 20, 11, -6, 9, -24, 8, -11, -7),
      label.placement = c(55, 158, 219, 239, -20, -50, -71, 26, 75, 105, -24, -59, -75, 4, -25, -55, -63, 8, 20, 30, 38, -6, -17, 4, -28, 17, 39, 46, -16, -38, 4, 12, 19, 23, 68, 98, 110, 118, -17, 22, 64, 87, -5, 6, 17, -14, 10, 24, 31, 18, 40, 47, -5, -14, 8, 20, 30, -19, -40, 9, -5, 3, 4, -3, -3, 8, -4, 10, 26, -3, 5, -12, 4, -6, -15))

OD$destination <- factor(OD$destination, OD$destination)

ggplot() +
geom_bar(data = OD[OD$YOY > 0, ], aes(x = destination, y = YOY, fill = carrier), stat = 'identity') + 
geom_text(data = OD[OD$YOY > 0, ], aes(x = destination, y = label.placement, label = carrier), size = 2) + 
geom_bar(data = OD[OD$YOY < 0, ], aes(x = destination, y = YOY, fill = carrier), stat = 'identity') + 
geom_text(data = OD[OD$YOY < 0, ], aes(x = destination, y = label.placement, label = carrier), size = 2) +
theme(axis.text.x = element_text(size = 10, vjust = .5, angle = 90), legend.position = 'none')

回答1:

You can define an order and then tell ggplot to display the data accordingly:

library(ggplot2)

OD <- data.frame(
  destination = c('MCO', 'MCO', 'MCO', 'MCO', 'MCO', 'MCO', 'MCO', 'Miami', 'Miami', 'Miami', 'Miami', 'Miami', 'Miami', 'LAS', 'LAS', 'LAS', 'LAS', 'Chicago', 'Chicago', 'Chicago', 'Chicago', 'Chicago', 'Chicago', 'Los Angeles', 'Los Angeles', 'Bay Area', 'Bay Area', 'Bay Area', 'Bay Area', 'Bay Area', 'BOS', 'BOS', 'BOS', 'ATL', 'ATL', 'ATL', 'ATL', 'ATL', 'ATL', 'TPA', 'TPA', 'TPA', 'TPA', 'Dallas', 'Dallas', 'Dallas', 'DEN', 'DEN', 'DEN', 'PHX', 'PHX', 'PHX', 'PHX', 'PHX', 'CUN', 'CUN', 'RSW', 'RSW', 'RSW', 'SAN', 'SAN', 'SJU', 'Houston', 'Houston', 'MSY', 'MSP', 'MSP', 'CLT', 'CLT', 'CLT', 'MBJ', 'MBJ', 'PUJ', 'PUJ', 'PUJ'),
  carrier = c('US', 'F9', 'WN', 'AA', 'FL', 'UA', 'DL', 'F9', 'US', 'DL', 'WN', 'UA', 'FL', 'AA', 'US', 'UA', 'NK', 'US', 'NK', 'F9', 'WN', 'UA', 'AA', 'WN', 'VX', 'US', 'DL', 'AA', 'VX', 'UA', 'US', 'B6', 'AA', 'US', 'WN', 'F9', 'DL', 'AA', 'FL', 'US', 'F9', 'WN', 'UA', 'DL', 'WN', 'US', 'US', 'WN', 'DL', 'US', 'WN', 'AA', 'DL', 'UA', 'US', 'F9', 'US', 'WN', 'UA', 'US', 'AA', 'AA', 'DL', 'WN', 'DL', 'F9', 'DL', 'F9', 'US', 'UA', 'AA', 'US', 'AA', 'F9', 'US'),
  market.ppd = c(1242, 1242, 1242, 1242, 1242, 1242, 1242, 1056, 1056, 1056, 1056, 1056, 1056, 645, 645, 645, 645, 641, 641, 641, 641, 641, 641, 526, 526, 498, 498, 498, 498, 498, 492, 492, 492, 482, 482, 482, 482, 482, 482, 478, 478, 478, 478, 399, 399, 399, 333, 333, 333, 298, 298, 298, 298, 298, 243, 243, 232, 232, 232, 213, 213, 205, 198, 198, 173, 163, 163, 160, 160, 160, 152, 152, 147, 147, 147),
  YOY = c(110, 96, 26, 15, -39, -23, -18, 52, 47, 11, -48, -22, -10, 8, -49, -11, -6, 15, 10, 8, 8, -12, -9, 9, -56, 35, 8, 6, -32, -12, 9, 7, 6, 47, 43, 16, 8, 7, -34, 44, 39, 8, -9, 13, 7, -28, 21, 7, 6, 37, 7, 6, -10, -7, 16, 9, 60, -37, -6, 19, -9, 6, 9, -6, -6, 16, -7, 20, 11, -6, 9, -24, 8, -11, -7),
  label.placement = c(55, 158, 219, 239, -20, -50, -71, 26, 75, 105, -24, -59, -75, 4, -25, -55, -63, 8, 20, 30, 38, -6, -17, 4, -28, 17, 39, 46, -16, -38, 4, 12, 19, 23, 68, 98, 110, 118, -17, 22, 64, 87, -5, 6, 17, -14, 10, 24, 31, 18, 40, 47, -5, -14, 8, 20, 30, -19, -40, 9, -5, 3, 4, -3, -3, 8, -4, 10, 26, -3, 5, -12, 4, -6, -15))

OD$destination <- factor(OD$destination, OD$destination)
neworder <- unique(levels(OD$destination))

ggplot() +
  geom_bar(data = OD[OD$YOY > 0, ], aes(x = destination, y = YOY, fill = carrier), stat = 'identity') + 
  geom_text(data = OD[OD$YOY > 0, ], aes(x = destination, y = label.placement, label = carrier), size = 2) + 
  geom_bar(data = OD[OD$YOY < 0, ], aes(x = destination, y = YOY, fill = carrier), stat = 'identity') + 
  geom_text(data = OD[OD$YOY < 0, ], aes(x = destination, y = label.placement, label = carrier), size = 2) +
  theme(axis.text.x = element_text(size = 10, vjust = .5, angle = 90), legend.position = 'none')+
  scale_x_discrete(limits=c(neworder))


标签: r ggplot2