I would like to add data labels to factor plots generated by Seaborn. Here is an example:
import pandas as pd
from pandas import Series, DataFrame
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
titanic_df = pd.read_csv('train.csv')
sns.factorplot('Sex',data=titanic_df,kind='count')
How can I add the 'count' values to the top of each bar on the graph?
You could do it this way:
import math
# Set plotting style
sns.set_style('whitegrid')
# Rounding the integer to the next hundredth value plus an offset of 100
def roundup(x):
return 100 + int(math.ceil(x / 100.0)) * 100
df = pd.read_csv('train.csv')
sns.factorplot('Sex', data=df, kind='count', alpha=0.7, size=4, aspect=1)
# Get current axis on current figure
ax = plt.gca()
# ylim max value to be set
y_max = df['Sex'].value_counts().max()
ax.set_ylim([0, roundup(y_max)])
# Iterate through the list of axes' patches
for p in ax.patches:
ax.text(p.get_x() + p.get_width()/2., p.get_height(), '%d' % int(p.get_height()),
fontsize=12, color='red', ha='center', va='bottom')
plt.show()
The above answer from @nickil-maveli is simply great.
This is just to add some clarity about the parameters when you are adding the data labels to the barplot (as requested in the comments by @user27074)
# loop through all bars of the barplot
for nr, p in enumerate(ax.patches):
# height of bar, which is basically the data value
height = p.get_height()
# add text to specified position
ax.text(
# bar to which data label will be added
# so this is the x-coordinate of the data label
nr,
# height of data label: height / 2. is in the middle of the bar
# so this is the y-coordinate of the data label
height / 2.,
# formatting of data label
u'{:0.1f}%'.format(height),
# color of data label
color='black',
# size of data label
fontsize=18,
# horizontal alignment: possible values are center, right, left
ha='center',
# vertical alignment: possible values are top, bottom, center, baseline
va='center'
)
You could do something even simpler
plt.figure(figsize=(4, 3))
plot = sns.catplot(x='Sex', y='count', kind='bar', data=titanic_df)
# plot.ax gives the axis object
# plot.ax.patches gives list of bars that can be access using index starting at 0
for i, bar in enumerate(plot.ax.patches):
h = bar.get_height()
plot.ax.text(
i, # bar index (x coordinate of text)
h+10, # y coordinate of text
'{}'.format(int(h)), # y label
ha='center',
va='center',
fontweight='bold',
size=14)