A werid-looking density plot using stat_density_2d

2019-08-02 10:28发布

问题:

I have a data frame like below

 > dput(test_data)
structure(list(X206.204 = c(18.28, 18.32, 18.453, 18.55, 18.24, 
18.39, 18.36, 18.26, 18.23, 18.42, 18.35, 18.64, 18.59, 18.28, 
18.56, 18.72, 18.6, 18.59, 18.63, 18.19, 18.73, 18.36, 18.71, 
18.66, 18.66, 18.61, 18.68, 18.741, 17.1758, 17.18, 17.1709, 
17.1748, 17.1774, 17.1756, 17.156, 17.074, 17.1837, 17.1806, 
17.1904, 17.1849, 17.2025, 17.1802, 17.92, 17.9, 17.94, 17.93, 
17.91, 17.92, 17.906, 17.92, 17.887, 17.935, 17.905, 17.867, 
17.957, 17.957, 18.3794, 18.3667, 18.3777, 18.3672, 18.387, 18.3765, 
18.4, 17.905, 17.886, 17.906, 18.144, 17.998, 18.394, 18.339, 
18.437, 18.349, 18.41, 18.407, 18.442, 18.309, 18.348, 18.193, 
18.419, 18.363), X207.204 = c(15.45, 15.574, 15.464, 15.64, 15.47, 
15.55, 15.61, 15.48, 15.47, 15.64, 15.56, 15.9, 15.74, 15.44, 
15.65, 15.77, 15.81, 15.77, 15.85, 15.63, 15.97, 15.59, 15.59, 
15.89, 15.88, 15.89, 15.91, 15.706, 15.5249, 15.5277, 15.5214, 
15.5245, 15.5261, 15.5252, 15.522, 15.422, 15.5276, 15.5274, 
15.5322, 15.5259, 15.5274, 15.5238, 15.44, 15.42, 15.36, 15.47, 
15.47, 15.51, 15.471, 15.465, 15.47, 15.463, 15.473, 15.462, 
15.449, 15.445, 15.6314, 15.6307, 15.6332, 15.6332, 15.6323, 
15.6336, 15.6324, 15.465, 15.465, 15.506, 15.489, 15.484, 15.612, 
15.618, 15.735, 15.619, 15.665, 15.649, 15.698, 15.554, 15.606, 
15.491, 15.693, 15.6), X208.204 = c(38.42, 38.099, 38.076, 39.27, 
38.19, 38.37, 38.57, 38.22, 38.19, 38.56, 38.01, 38.93, 38.77, 
37.97, 38.63, 39.01, 39.03, 38.86, 39.11, 38.25, 39.51, 38.22, 
39.45, 39.24, 39.2, 39.17, 39.33, 37.969, 37.002, 37.013, 36.9921, 
37, 37.008, 37.003, 37.005, 36.685, 37.0207, 37.0144, 37.0351, 
37.0187, 37.0713, 37.013, 37.58, 37.55, 37.62, 37.69, 37.58, 
37.72, 37.431, 37.418, 37.426, 37.46, 37.467, 37.396, 37.443, 
37.385, 38.3435, 38.3215, 38.3393, 38.3166, 38.3508, 38.3333, 
38.3692, 37.426, 37.398, 37.522, 37.684, 37.325, 38.478, 38.342, 
38.585, 38.294, 38.482, 38.457, 38.592, 38.3, 38.372, 38.945, 
38.527, 38.396)), .Names = c("X206.204", "X207.204", "X208.204"
), row.names = c(NA, -80L), class = "data.frame")

I tried to implement the following code to produce a 2-dimensional density plot using stat_denisty_2d

    ggplot(data=test_data,aes(x = X206.204 , y = X207.204))+ stat_density_2d(geom="polygon",n=800,bins=20,
                                                                      aes(fill  = ..level..,
                                                                          alpha = ..level..)) +
  geom_point(color="red")+
    labs( x = expression({}^206*"Pb/"*{}^204*"Pb"),
        y  = expression({}^207*"Pb/"*{}^204*"Pb")
  )+
  theme_bw() +
  theme(panel.grid = element_blank(),
        axis.text.x = element_text(margin = margin(t = 5, unit = "pt")))+
  theme(axis.text=element_text(size=18))+  # adjust x y axis tick mark
  theme(axis.title = element_text(size=22))+ # adjust x y axis title 
  theme(legend.position="none")+
  scale_fill_gradient(low = "cyan1",high = "cyan4")+
  scale_x_continuous(breaks = seq(17, 19.6, by = 0.2))+
  scale_y_continuous(breaks = seq(15.4, 15.9, by = 0.1))+
  coord_cartesian(xlim = c(17,19.6),ylim=c(15.4, 15.9))

This gives me a diagram whose edges got cut off and there is a big area in the middle containing no data at all. How can I fix these problems?

回答1:

If I understand correctly, you need to pass an additional argument defining the bandwidth - argument h to stat_density_2d.

h
Bandwidth (vector of length two). If NULL, estimated using bandwidth.nrd.

ggplot(data = df,
       aes(x = X206.204,
           y = X207.204))+
  stat_density_2d(geom = "polygon",
                  n = 800,
                  bins = 20,
                  aes(fill  = ..level..,
                      alpha = ..level..),
                  h = c(0.2, 0.2)) + #change this to your liking
  geom_point(color="red")+
  labs( x = expression({}^206*"Pb/"*{}^204*"Pb"),
        y  = expression({}^207*"Pb/"*{}^204*"Pb")) +
  theme_bw() +
  theme(panel.grid = element_blank(),
        axis.text.x = element_text(margin = margin(t = 5, unit = "pt")))+
  theme(axis.text = element_text(size = 18))+  
  theme(axis.title = element_text(size = 22))+ 
  theme(legend.position = "none")+
  scale_fill_gradient(low = "cyan1",high = "cyan4")+
  scale_x_continuous(breaks = seq(17, 19.6, by = 0.2))+
  scale_y_continuous(breaks = seq(15.4, 15.9, by = 0.1))+
  coord_cartesian(xlim = c(17, 19.6),ylim = c(15.4, 15.9))

by default it is estimated using the function bandwidth.nrd from MASS

library(MASS)
bandwidth.nrd(c(df$X206.204))
#output
0.6671485
bandwidth.nrd(c(df$X207.204))
#output
0.2143042

and:

ggplot(data = df,
       aes(x = X206.204,
           y = X207.204))+
  stat_density_2d(geom = "polygon",
                  n = 800,
                  bins = 15,
                  aes(fill  = ..level..,
                      alpha = ..level..),
                  h = c(bandwidth.nrd(c(df$X206.204)),
                        bandwidth.nrd(c(df$X207.204)))) +
  geom_point(color="red")+
  labs( x = expression({}^206*"Pb/"*{}^204*"Pb"),
        y  = expression({}^207*"Pb/"*{}^204*"Pb")) +
  theme_bw() +
  theme(panel.grid = element_blank(),
        axis.text.x = element_text(margin = margin(t = 5, unit = "pt")))+
  theme(axis.text = element_text(size = 18))+  
  theme(axis.title = element_text(size = 22))+ 
  theme(legend.position = "none")+
  scale_fill_gradient(low = "cyan1",high = "cyan4")+
  scale_x_continuous(breaks = seq(17, 19.6, by = 0.2))+
  scale_y_continuous(breaks = seq(15.4, 15.9, by = 0.1))+
  coord_cartesian(xlim = c(17, 19.6),ylim = c(15.4, 15.9)) 

looks like just the plot without h defined: