I am trying to have 2 "shadows" on the background of the below plot. These shadows should represent the density of the orange and blue points separately. Does it make sense?
Here is the ggplot to improve:
Here is the code and the data (matrix df
) I used to create this plot:
PC1 PC2 aa
A_akallopisos 0.043272525 0.0151023307 2
A_akindynos -0.020707141 -0.0158198405 1
A_allardi -0.020277664 -0.0221016281 2
A_barberi -0.023165596 0.0389906701 2
A_bicinctus -0.025354572 -0.0059122384 2
A_chrysogaster 0.012608835 -0.0339330213 2
A_chrysopterus -0.022402365 -0.0092476009 1
A_clarkii -0.014474658 -0.0127024469 1
A_ephippium -0.016859412 0.0320034231 2
A_frenatus -0.024190876 0.0238499714 2
A_latezonatus -0.010718845 -0.0289904165 1
A_latifasciatus -0.005645811 -0.0183202248 2
A_mccullochi -0.031664307 -0.0096059126 2
A_melanopus -0.026915545 0.0308399009 2
A_nigripes 0.023420045 0.0293801537 2
A_ocellaris 0.052042539 0.0126144250 2
A_omanensis -0.020387101 0.0010944998 2
A_pacificus 0.042406273 -0.0260308092 2
A_percula 0.034591721 0.0071153133 2
A_perideraion 0.052830132 0.0064495142 2
A_polymnus 0.030902254 -0.0005091421 2
A_rubrocinctus -0.033318659 0.0474995722 2
A_sandaracinos 0.055839755 0.0093724082 2
A_sebae 0.021767793 -0.0218640814 2
A_tricinctus -0.016230301 -0.0018526482 1
P_biaculeatus -0.014466403 0.0024864574 2
ggplot(data=df,aes(x=PC1, y=PC2, color=factor(aa), label=rownames(df))) + ggtitle(paste('Site n° ',Sites_names[j],sep='')) +geom_smooth(se=F, method='lm')+ geom_point() + scale_color_manual(name='mutation', values = c("darkorange2","cornflowerblue"), labels = c("A","S")) + geom_text(hjust=0.5, vjust=-1 ,size=3) + xlim(-0.05,0.07)
Here are some possible approaches using stat_density2d()
with geom="polygon"
and mapping or setting alpha
transparency for the density fill regions. If you are willing to experiment with some the parameters, I think you can get some very useful plots. Specifically, you may want to adjust the following:
n
controls the smoothness of the density polygon.
h
is the bandwidth of the density estimation.
bins
controls the number of density levels.
df = read.table(header=TRUE, text=
" PC1 PC2 aa
A_akallopisos 0.043272525 0.0151023307 2
A_akindynos -0.020707141 -0.0158198405 1
A_allardi -0.020277664 -0.0221016281 2
A_barberi -0.023165596 0.0389906701 2
A_bicinctus -0.025354572 -0.0059122384 2
A_chrysogaster 0.012608835 -0.0339330213 2
A_chrysopterus -0.022402365 -0.0092476009 1
A_clarkii -0.014474658 -0.0127024469 1
A_ephippium -0.016859412 0.0320034231 2
A_frenatus -0.024190876 0.0238499714 2
A_latezonatus -0.010718845 -0.0289904165 1
A_latifasciatus -0.005645811 -0.0183202248 2
A_mccullochi -0.031664307 -0.0096059126 2
A_melanopus -0.026915545 0.0308399009 2
A_nigripes 0.023420045 0.0293801537 2
A_ocellaris 0.052042539 0.0126144250 2
A_omanensis -0.020387101 0.0010944998 2
A_pacificus 0.042406273 -0.0260308092 2
A_percula 0.034591721 0.0071153133 2
A_perideraion 0.052830132 0.0064495142 2
A_polymnus 0.030902254 -0.0005091421 2
A_rubrocinctus -0.033318659 0.0474995722 2
A_sandaracinos 0.055839755 0.0093724082 2
A_sebae 0.021767793 -0.0218640814 2
A_tricinctus -0.016230301 -0.0018526482 1
P_biaculeatus -0.014466403 0.0024864574 2")
library(ggplot2)
p1 = ggplot(data=df, aes(x=PC1, y=PC2, color=factor(aa), label=rownames(df))) +
ggtitle(paste('Site n° ',sep='')) +
stat_density2d(aes(fill=factor(aa), alpha = ..level..),
geom="polygon", color=NA, n=200, h=0.03, bins=4) +
geom_smooth(se=F, method='lm') +
geom_point() +
scale_color_manual(name='mutation',
values = c("darkorange2","cornflowerblue"),
labels = c("A","S")) +
scale_fill_manual( name='mutation',
values = c("darkorange2","cornflowerblue"),
labels = c("A","S")) +
geom_text(hjust=0.5, vjust=-1 ,size=3, color="black") +
scale_x_continuous(expand=c(0.3, 0)) + # Zooms out so that density polygons
scale_y_continuous(expand=c(0.3, 0)) + # don't reach edges of plot.
coord_cartesian(xlim=c(-0.05, 0.07),
ylim=c(-0.04, 0.05)) # Zooms back in for the final plot.
p2 = ggplot(data=df, aes(x=PC1, y=PC2, color=factor(aa), label=rownames(df))) +
ggtitle(paste('Site n° ',sep='')) +
stat_density2d(aes(fill=factor(aa)), alpha=0.2,
geom="polygon", color=NA, n=200, h=0.045, bins=2) +
geom_smooth(se=F, method='lm', size=1) +
geom_point(size=2) +
scale_color_manual(name='mutation',
values = c("darkorange2","cornflowerblue"),
labels = c("A","S")) +
scale_fill_manual( name='mutation',
values = c("darkorange2","cornflowerblue"),
labels = c("A","S")) +
geom_text(hjust=0.5, vjust=-1 ,size=3) +
scale_x_continuous(expand=c(0.3, 0)) + # Zooms out so that density polygons
scale_y_continuous(expand=c(0.3, 0)) + # don't reach edges of plot.
coord_cartesian(xlim=c(-0.05, 0.07),
ylim=c(-0.04, 0.05)) # Zooms back in for the final plot.
library(gridExtra)
ggsave("plots.png", plot=arrangeGrob(p1, p2, ncol=1), width=8, height=11, dpi=120)
Here's my suggestion. Using shadows or polygons is going to get pretty ugly when you overlay two colors and densities. Contour plot could be nicer to look at and is certainly easier to work with.
I've created some random data as a reproducible example and used a simple density function that uses the average distance of the nearest 5 points.
df <- data.frame(PC1 = runif(20),
PC2 = runif(20),
aa = rbinom(20,1,0.5))
point.density <- function(row){
points <- df[df$aa == row[[3]],]
x.dist <- (points$PC1 - row[[1]])^2
y.dist <- (points$PC2 - row[[2]])^2
x <- x.dist[order(x.dist)[1:5]]
y <- y.dist[order(y.dist)[1:5]]
1/mean(sqrt(x + y))
}
# you need to calculate the density for the whole grid.
res <- c(1:100)/100 # this is the resolution, so gives a 100x100 grid
plot.data0 <- data.frame(x.val = rep(res,each = length(res)),
y.val = rep(res, length(res)),
type = rep(0,length(res)^2))
plot.data1 <- data.frame(x.val = rep(res,each = length(res)),
y.val = rep(res, length(res)),
type = rep(1,length(res)^2))
plot.data <- rbind(plot.data0,plot.data1)
# we need a density value for each point type, so 2 grids
densities <- apply(plot.data,1,point.density)
plot.data <- cbind(plot.data, z.val = densities)
library(ggplot2)
# use stat_contour to draw the densities. Be careful to specify which dataset you're using
ggplot() + stat_contour(data = plot.data, aes(x=x.val, y=y.val, z=z.val, colour = factor(type)), bins = 20, alpha = 0.4) + geom_point(data = df, aes(x=PC1,y=PC2,colour = factor(aa)))
contour plot http://img34.imageshack.us/img34/6215/1yvb.png
rcontourggplot2