I've been perusing DOT airline data and am trying to create a stacked bar graph of the year over year (YOY) change in each airline's passengers from a specific airport to all other stations.
I also want to order the x-axis by the total number of people (market.ppd) traveling from the specified airport to each station (e.g. This set's origin airport is PHL, and its top destination is MCO. Next is Miami, LAS, etc.)
The x-axis stays ordered when the YOY data is solely positive or negative but defaults back to alphabetical order once I try to stack the bar with both. Some stations only experience a positive YOY change or a negative YOY change, whereas the example in this post has positive and negative values for each category.
My hunch is ggplot reverts the levels to alphabetical order once it finds that some of the stations don't have corresponding positive/negative values. Is there any way to retain the ordered levels once I append the negative values to the positive for each station?
Plot with only positive values
Plot with both positive and negative values
library(ggplot2)
OD <- data.frame(
destination = c('MCO', 'MCO', 'MCO', 'MCO', 'MCO', 'MCO', 'MCO', 'Miami', 'Miami', 'Miami', 'Miami', 'Miami', 'Miami', 'LAS', 'LAS', 'LAS', 'LAS', 'Chicago', 'Chicago', 'Chicago', 'Chicago', 'Chicago', 'Chicago', 'Los Angeles', 'Los Angeles', 'Bay Area', 'Bay Area', 'Bay Area', 'Bay Area', 'Bay Area', 'BOS', 'BOS', 'BOS', 'ATL', 'ATL', 'ATL', 'ATL', 'ATL', 'ATL', 'TPA', 'TPA', 'TPA', 'TPA', 'Dallas', 'Dallas', 'Dallas', 'DEN', 'DEN', 'DEN', 'PHX', 'PHX', 'PHX', 'PHX', 'PHX', 'CUN', 'CUN', 'RSW', 'RSW', 'RSW', 'SAN', 'SAN', 'SJU', 'Houston', 'Houston', 'MSY', 'MSP', 'MSP', 'CLT', 'CLT', 'CLT', 'MBJ', 'MBJ', 'PUJ', 'PUJ', 'PUJ'),
carrier = c('US', 'F9', 'WN', 'AA', 'FL', 'UA', 'DL', 'F9', 'US', 'DL', 'WN', 'UA', 'FL', 'AA', 'US', 'UA', 'NK', 'US', 'NK', 'F9', 'WN', 'UA', 'AA', 'WN', 'VX', 'US', 'DL', 'AA', 'VX', 'UA', 'US', 'B6', 'AA', 'US', 'WN', 'F9', 'DL', 'AA', 'FL', 'US', 'F9', 'WN', 'UA', 'DL', 'WN', 'US', 'US', 'WN', 'DL', 'US', 'WN', 'AA', 'DL', 'UA', 'US', 'F9', 'US', 'WN', 'UA', 'US', 'AA', 'AA', 'DL', 'WN', 'DL', 'F9', 'DL', 'F9', 'US', 'UA', 'AA', 'US', 'AA', 'F9', 'US'),
market.ppd = c(1242, 1242, 1242, 1242, 1242, 1242, 1242, 1056, 1056, 1056, 1056, 1056, 1056, 645, 645, 645, 645, 641, 641, 641, 641, 641, 641, 526, 526, 498, 498, 498, 498, 498, 492, 492, 492, 482, 482, 482, 482, 482, 482, 478, 478, 478, 478, 399, 399, 399, 333, 333, 333, 298, 298, 298, 298, 298, 243, 243, 232, 232, 232, 213, 213, 205, 198, 198, 173, 163, 163, 160, 160, 160, 152, 152, 147, 147, 147),
YOY = c(110, 96, 26, 15, -39, -23, -18, 52, 47, 11, -48, -22, -10, 8, -49, -11, -6, 15, 10, 8, 8, -12, -9, 9, -56, 35, 8, 6, -32, -12, 9, 7, 6, 47, 43, 16, 8, 7, -34, 44, 39, 8, -9, 13, 7, -28, 21, 7, 6, 37, 7, 6, -10, -7, 16, 9, 60, -37, -6, 19, -9, 6, 9, -6, -6, 16, -7, 20, 11, -6, 9, -24, 8, -11, -7),
label.placement = c(55, 158, 219, 239, -20, -50, -71, 26, 75, 105, -24, -59, -75, 4, -25, -55, -63, 8, 20, 30, 38, -6, -17, 4, -28, 17, 39, 46, -16, -38, 4, 12, 19, 23, 68, 98, 110, 118, -17, 22, 64, 87, -5, 6, 17, -14, 10, 24, 31, 18, 40, 47, -5, -14, 8, 20, 30, -19, -40, 9, -5, 3, 4, -3, -3, 8, -4, 10, 26, -3, 5, -12, 4, -6, -15))
OD$destination <- factor(OD$destination, OD$destination)
ggplot() +
geom_bar(data = OD[OD$YOY > 0, ], aes(x = destination, y = YOY, fill = carrier), stat = 'identity') +
geom_text(data = OD[OD$YOY > 0, ], aes(x = destination, y = label.placement, label = carrier), size = 2) +
geom_bar(data = OD[OD$YOY < 0, ], aes(x = destination, y = YOY, fill = carrier), stat = 'identity') +
geom_text(data = OD[OD$YOY < 0, ], aes(x = destination, y = label.placement, label = carrier), size = 2) +
theme(axis.text.x = element_text(size = 10, vjust = .5, angle = 90), legend.position = 'none')