I am trying to obtain the following plot from a pandas data frame.
I am not sure how to combine seaborn with pandas for that task.
This is the dataframe I want to use:
import pandas as pd
data = pd.DataFrame({'a': np.random.randn(1000) + 1,
'b': np.random.randn(1000),
'c': np.random.rand(1000) + 10},
columns=['a', 'b', 'c'])
data.a[data.a.sample(100).index] = np.NaN
data.b[data.b.sample(800).index] = np.NaN
Notice that the frequency will need to be normalized (height of the histogram), as the number of data points and distributions differ significantly and the distributions will have different 'y scales'.
data.plot.hist();
This is the code of seaborn that generates the figure I used in the beginning.
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
sns.set(style="white", rc={"axes.facecolor": (0, 0, 0, 0)})
# Create the data
rs = np.random.RandomState(1979)
x = rs.randn(150)
g = np.tile(list("ABC"), 50)
df = pd.DataFrame(dict(x=x, g=g))
m = df.g.map(ord)
# Initialize the FacetGrid object
pal = sns.cubehelix_palette(10, rot=-.25, light=.7)
g = sns.FacetGrid(df, row="g", hue="g", aspect=5, height=1, palette=pal)
# Draw the densities in a few steps
g.map(sns.kdeplot, "x", clip_on=False, shade=True, alpha=1, lw=1.5, bw=.2)
g.map(sns.kdeplot, "x", clip_on=False, color="w", lw=2, bw=.2)
g.map(plt.axhline, y=0, lw=2, clip_on=False)
# Define and use a simple function to label the plot in axes coordinates
def label(x, color, label):
ax = plt.gca()
ax.text(0, .3, label, fontweight="bold", color=color,
ha="left", va="center", transform=ax.transAxes)
g.map(label, "x")
# Set the subplots to overlap
g.fig.subplots_adjust(hspace=-.0025)
# Remove axes details that don't play well with overlap
g.set_titles("")
g.set(yticks=[])
g.despine(bottom=True, left=True)