I am trying to add means using geom_segment
to a geom_density_ridges
plot made in ggplot2.
library(dplyr)
library(ggplot2)
library(ggridges)
Fig1 <- ggplot(Figure3Data, aes(x = `hairchange`, y = `EffortGroup`)) +
geom_density_ridges_gradient(aes(fill = ..x..), scale = 0.9, size = 1)
ingredients <- ggplot_build(Fig1) %>% purrr::pluck("data", 1)
density_lines <- ingredients %>%
group_by(group) %>% filter(density == mean(density)) %>% ungroup()
p <- ggplot(Figure3Data, aes(x = `hairchange`, y = `EffortGroup`)) +
geom_density_ridges_gradient(aes(fill = ..x..), scale = 0.9, size = 1) +
scale_fill_gradientn( colours = c("#0000FF", "#FFFFFF", "#FF0000"),name =
NULL, limits=c(-2,2))+ coord_flip() +
theme_ridges(font_size = 20, grid=TRUE, line_size=1,
center_axis_labels=TRUE) +
scale_x_continuous(name='Average Self-Perceived Hair Change', limits=c(-2,2))+
ylab('Total SSM Effort (hours)')+
geom_segment(data =density_lines,
aes(x = x, y = ymin, xend = x, yend = ymin+density*scale*iscale))
print(p)
However, I am am getting a "Error: data
must be uniquely named but has duplicate elements". Below is a plot without the means for the dataset I have. Any suggestions on how to fix the code?
The first 35 rows of data are below:
structure(list(MonthsMassage = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2,
2, 2, 1, 1), MinutesPerDayMassage = c("0-10 minutes daily", "0-10 minutes daily",
"0-10 minutes daily", "0-10 minutes daily", "0-10 minutes daily",
"0-10 minutes daily", "0-10 minutes daily", "0-10 minutes daily",
"0-10 minutes daily", "0-10 minutes daily", "11-20 minutes daily",
"11-20 minutes daily", "11-20 minutes daily", "0-10 minutes daily",
"0-10 minutes daily", "0-10 minutes daily", "0-10 minutes daily",
"0-10 minutes daily", "0-10 minutes daily", "0-10 minutes daily",
"0-10 minutes daily", "0-10 minutes daily", "0-10 minutes daily",
"0-10 minutes daily", "0-10 minutes daily", "0-10 minutes daily",
"0-10 minutes daily", "0-10 minutes daily", "0-10 minutes daily",
"0-10 minutes daily", "0-10 minutes daily", "0-10 minutes daily",
"0-10 minutes daily", "11-20 minutes daily", "11-20 minutes daily"
), Minutes = c(5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 15, 15, 15, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 15, 15),
hairchange = c(-1, -1, 0, -1, 0, -1, -1, 0, 0, -1, 0, -1,
-1, 0, 0, -1, 0, -1, 0, -1, -1, -1, -1, -1, 0, -1, -1, -1,
0, 1, -1, 0, 0, -1, 0), HairType1 = c("Templefrontal", "Templefrontal",
"Templefrontal", "Templefrontal", "Templefrontal", "Templefrontal",
"Templefrontal", "other", "Templefrontal", "Templefrontal",
"Templefrontal", "Templefrontal", "Templefrontal", "Templefrontal",
"Templefrontal", "Templefrontal", "Templefrontal", "Templefrontal",
"Templefrontal", "Templefrontal", "Templefrontal", "Templefrontal",
"Templefrontal", "Templefrontal", "Templefrontal", "other",
"other", "other", "Templefrontal", "Templefrontal", "other",
"Templefrontal", "other", "Templefrontal", "Templefrontal"
), HairType2 = c("other", "other", "other", "other", "other",
"other", "other", "other", "other", "Vertexthinning", "Vertexthinning",
"other", "Vertexthinning", "other", "other", "Vertexthinning",
"other", "Vertexthinning", "Vertexthinning", "other", "other",
"other", "Vertexthinning", "other", "Vertexthinning", "other",
"other", "other", "other", "other", "other", "Vertexthinning",
"other", "other", "other"), HairType3 = c("other", "Diffusethinning",
"other", "Diffusethinning", "other", "other", "Diffusethinning",
"Diffusethinning", "Diffusethinning", "other", "Diffusethinning",
"Diffusethinning", "other", "other", "Diffusethinning", "Diffusethinning",
"other", "Diffusethinning", "Diffusethinning", "Diffusethinning",
"other", "other", "other", "other", "other", "other", "other",
"other", "other", "Diffusethinning", "other", "other", "other",
"other", "other"), Effort = c(0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5,
2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 5, 5, 5, 5, 5, 7.5, 7.5), EffortGroup = c("<5",
"<5", "<5", "<5", "<5", "<5", "<5", "<5", "<5", "<5", "<5",
"<5", "<5", "<5", "<5", "<5", "<5", "<5", "<5", "<5", "<5",
"<5", "<5", "<5", "<5", "<5", "<5", "<5", "12.5", "12.5",
"12.5", "12.5", "12.5", "12.5", "12.5")), row.names = c(NA,
-35L), class = c("tbl_df", "tbl", "data.frame"))
Plotting horizontal lines
If I understand correctly, the OP wants to plot a horizontal line at a position where the density equals the mean density for each of the ridgelines.
The expression
density_lines <- ingredients %>%
group_by(group) %>% filter(density == mean(density)) %>% ungroup()
returns an empty dataset as there is no record where the density
value exactly matches mean(density)
.
However, it does work for the overall maximum (but not for all of the local maxima)
density_lines <- ingredients %>%
group_by(group) %>% filter(density == max(density)) %>% ungroup()
which gives
Find closest value
As there is no exactly match, the closest value can be picked by
density_lines <- ingredients %>%
group_by(group) %>%
top_n(1, -abs(density - mean(density)))
which plots as
This plots one segment per ridgeline but we expect to see 4 segments in each of the curve branches (those where the maximum of the adjacent peak is greater than the mean). With
density_lines <- ingredients %>%
group_by(group) %>%
top_n(4, -abs(density - mean(density)))
we get
You can play around with the n
parameter to top_n()
but IMHO the correct way would be to group each ridgeline from peak to valley and from valley to peak to get one segment for each of the curve branches.
Find value nearby
Alternatively, we can filter using the near()
function. This function requires to specify a tolerance tol
which we need to compute from the dataset:
density_lines <- ingredients %>%
group_by(group) %>%
filter(near(
density, mean(density),
tol = ingredients %>% summarise(0.25 * max(abs(diff(density)))) %>% pull()
))
For the carefully selected factor 0.25
(try and error) we do get
EDIT: Plotting vertical lines
It seems I had misinterpreted OP's intentions. Now, we will try to plot a vertical line at mean(density)
using geom_hline
(with coord_flip()
, geom_hline()
creates a vertical line).
Again, we follow OP's clever approach to extract densities and scale factors from the created plot.
# create plot object
Fig1 <- ggplot(Figure3Data, aes(x = hairchange, y = EffortGroup)) +
geom_density_ridges_gradient(aes(fill = ..x..), scale = 0.9, size = 1) +
scale_fill_gradientn(
colours = c("#0000FF", "#FFFFFF", "#FF0000"),
name =
NULL,
limits = c(-2, 2)
) + coord_flip() +
theme_ridges(
font_size = 20,
grid = TRUE,
line_size = 1,
center_axis_labels = TRUE
) +
scale_x_continuous(name = 'Average Self-Perceived Hair Change', limits =
c(-2, 2)) +
ylab('Total SSM Effort (hours)')
# extract plot data and summarise
mean_density <-
ggplot_build(Fig1) %>%
purrr::pluck("data", 1) %>%
group_by(group) %>%
summarise(density = mean(density), scale = first(scale), iscale = first(iscale))
# add hline and plot
Fig1 +
geom_hline(aes(yintercept = group + density * scale * iscale),
data = mean_density)
EDIT 2: Plot horizontal lines at position of mean self perceived hair change
The OP has clarified that
I want was the mean self perceived hair change (y axis data) for each
of the 10 ridgelines.
This can be achieved in the following steps:
- Create ridgeplot object.
- Compute the mean self perceived hair change for each
EffortGroup
.
- Pick the values of the created density values from the plot data.
- Join both datasets.
- Compute the density values at the locations of the means using
approx()
- Draw the line segments.
The mean self perceived hair change for each EffortGroup
is computed by
Figure3Data %>%
group_by(EffortGroup) %>%
summarise(x_mean = mean(hairchange))
which yields (for the posted subset of OP's data):
EffortGroup x_mean
<chr> <dbl>
1 <5 -0.643
2 12.5 -0.143
All steps together:
# create plot object
Fig1 <- ggplot(Figure3Data, aes(x = hairchange, y = EffortGroup)) +
geom_density_ridges_gradient(aes(fill = ..x..), scale = 0.9, size = 1) +
scale_fill_gradientn(
colours = c("#0000FF", "#FFFFFF", "#FF0000"),
name = NULL,
limits = c(-2, 2)) +
coord_flip() +
theme_ridges(
font_size = 20,
grid = TRUE,
line_size = 1,
center_axis_labels = TRUE) +
scale_x_continuous(name = 'Average Self-Perceived Hair Change',
limits = c(-2, 2)) +
ylab('Total SSM Effort (hours)')
density_lines <-
Figure3Data %>%
group_by(EffortGroup) %>%
summarise(x_mean = mean(hairchange)) %>%
mutate(group = as.integer(factor(EffortGroup))) %>%
left_join(ggplot_build(Fig1) %>% purrr::pluck("data", 1),
on = "group") %>%
group_by(group) %>%
summarise(x_mean = first(x_mean),
density = approx(x, density, first(x_mean))$y,
scale = first(scale),
iscale = first(iscale))
# add segments and plot
Fig1 +
geom_segment(aes(x = x_mean,
y = group,
xend = x_mean,
yend = group + density * scale * iscale),
data = density_lines)
EDIT 3: Reorder horizontal axis
The OP has asked to reorder the horizontal axis appropriately. This can be done by coercing EffortGroup
from type character
to factor
beforehand where the factor levels are explicitly specified in the expected order:
# turn EffortGroup into factor with levels in desired order
lvls <- c("<5", "12.5", "22.5", "35", "50", "75", "105", "152", "210", "210+")
Figure3Data <-
Figure3Data %>%
mutate(EffortGroup = factor(EffortGroup, levels = lvls))
Alternatively, EffortGroup
can be derived directly from the given Effort
values by
# create Effort Group from scratch
lvls <- c("<5", "12.5", "22.5", "35", "50", "75", "105", "152", "210", "210+")
brks <- c(-Inf, 5, 12.5, 22.5, 35, 50, 75, 105, 152, 210, Inf)
Figure3Data <-
Figure3Data %>%
mutate(EffortGroup = cut(Effort, brks, lvls, right = FALSE))
In any case, the computation of density_lines
has to be amended as EffortGroup
is a factor already:
density_lines <-
Figure3Data %>%
group_by(EffortGroup) %>%
summarise(x_mean = mean(hairchange)) %>%
mutate(group = as.integer(EffortGroup)) %>% # remove call to factor() here
left_join( ...
With the full dataset supplied by the OP (link) the plot finally becomes
The locations of the mean self perceived hair change for each EffortGroup
are given by
Figure3Data %>%
group_by(EffortGroup) %>%
summarise(x_mean = mean(hairchange))
# A tibble: 10 x 2
EffortGroup x_mean
<fct> <dbl>
1 <5 -0.643
2 12.5 -0.393
3 22.5 -0.118
4 35 -0.0606
5 50 0.286
6 75 0
7 105 0.152
8 152 0.167
9 210 0.379
10 210+ 0.343