Labelling points with ggplot2 and directlabels

2020-02-06 17:30发布

问题:

This is a follow-up to my earlier question. While the answer by krlmlr somehow helps to solve my problem, one issue remains - the labels are moved far away from the points such that it gets impossible to see which label corresponds to which point. Does anyone know how to fix this?

Test data

test <- structure(list(ID = c(183, 184, 185, 186, 187, 188, 189, 190, 
    191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 202, 203, 204
    ), group = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 10L, 1L, 1L, 11L, 1L, 10L, 10L, 1L, 1L, 1L), .Label = c("a", 
    "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", 
    "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z"), class = "factor"), 
        x = c(27.4921834914348, 25.9627681619548, 30.4138361188149, 
        29.7795838507818, 32.33351964819, 31.9669266268744, 35.4433172141507, 
        37.8161067968601, 43.1590654001956, 44.2747819266045, 46.3829222044342, 
        42.2074195880057, 45.4532493462957, 48.393281430824, 51.7145681408198, 
        51.4911582677409, 51.9582538485293, 51.3328966791848, 36.4638478746633, 
        36.6113033420253, 39.3476493044906), y = c(-40.6667236544384, 
        -39.2640436250506, -40.6403794753022, -40.907139119954, -39.9691483441288, 
        -39.7485517513382, -38.595662907188, -38.2106224386729, -37.6418188651769, 
        -32.7096448721895, -34.1161006958616, -32.2821584775403, 
        -30.8436917254975, -30.3865899667262, -30.3910690518699, 
        -26.1013343566452, -23.8437232732877, -21.5548787351057, 
        5.50922747751602, 5.64434551903915, 5.01263995541617)), row.names = c(NA, 
    -21L), .Names = c("ID", "group", "x", "y"), class = "data.frame")

library(ggplot2)
library(directlabels)

Plot using geom_text - problem: labels overlap

ggplot(test, aes(x=x, y=y)) + 
geom_point(aes(colour=group)) + 
geom_text(aes(label=ID), show_guide=F)

Plot as suggested by krlmlr - problem: labels are far away from points

ggplot(test, aes(x=x, y=y)) + 
geom_point(aes(colour=group)) + 
geom_dl(aes(label = ID), method = defaultpf.ggplot("point",,,))

回答1:

One way to avoid overlapping (to some degree at least) would be to offset each label by an amount which is determined by the closest point to it. So for example if a point's closest neighbouring point is directly to the right of it, its label would be placed to the left, etc.

# centre and normalise variables
test$yy  <- (test$y - min(test$y)) / (max(test$y) - min(test$y))
test$xx  <- (test$x - min(test$x)) / (max(test$x) - min(test$x))
test$angle <- NA
for (i in 1:nrow(test)) {
    dx <- test[-i, ]$xx - test[i, ]$xx
    dy <- test[-i, ]$yy - test[i, ]$yy
    j <- which.min(dx ^ 2 + dy ^ 2)
    theta <- atan2((test[-i, ]$yy[j] - test[i, ]$yy), (test[-i, ]$xx[j] - test[i, ]$xx))
    test[i, ]$angle <- theta + pi
}
sc <- 0.5
test$nudge.x <- cos(test$angle) * sc
test$nudge.y <- sin(test$angle) * sc

ggplot(test, aes(x=x, y=y)) + 
    geom_point(aes(colour=group)) + 
    geom_text(aes(x = x + nudge.x, y = y + nudge.y, label = ID), size = 3, show.legend = FALSE)

You can try playing around with the scaling parameter sc (the larger it is, the further away the labels will be from the points) to avoid overlapping labels. (I guess it may happen that not the same sc can be applied to all points to avoid overlaps - in that case you need to change the scaling parameter for each point maybe by defining sc using dx and dy).



回答2:

It could be that ggrepel is better suited to the labelling of points in a scatterplot.

library(ggplot2)  # ggrepel requires ggpot2 v2.0.0
library(ggrepel)

ggplot(test, aes(x=x, y=y)) + 


geom_text_repel(aes(label = ID, color = group),  show.legend = FALSE,
                     box.padding = unit(0.45, "lines")) +

geom_point(aes(colour=group))



回答3:

Maybe hjust and vjust is what you are looking for?

ggplot(test, aes(x=x, y=y)) + 
  geom_point(aes(colour=group)) + 
  geom_text(aes(label=ID), show_guide=F, hjust = 1.2, vjust = 0.5)



回答4:

I think you can do this just by adjusting the jittering parameter -- ... position = position_jitter.... You may have to play around with it a tad since you've only given us 10% of your data:

ggplot(test, aes(x=x, y=y)) + 
  geom_point(aes(colour=group), position= position_jitter(width= 1.5, height= 1)) + 
  geom_text(aes(label=ID), show_guide=F, hjust = 1.2, vjust = -.5,
            position= position_jitter(width= 1.5, height= 1))