standard deviation and errors bars in seaborn tspl

2020-02-23 07:42发布

import numpy as np; np.random.seed(22) import seaborn as sns; sns.set(color_codes=True) x = np.linspace(0, 15, 31) data = np.sin(x) + np.random.rand(10, 31) + np.random.randn(10, 1) ax = sns.tsplot(data=data, err_style="ci_bars") plt.show()

import numpy as np; import seaborn as sns; import pandas as pd import matplotlib.pyplot as plt # create a group of time series num_samples = 90 group_size = 10 x = np.linspace(0, 10, num_samples) group = np.sin(x) + np.linspace(0, 2, num_samples) + np.random.rand(group_size, num_samples) + np.random.randn(group_size, 1) df = pd.DataFrame(group.T, index=range(0,num_samples)) # plot time series with seaborn ax = sns.tsplot(data=df.T.values) #, err_style="unit_traces") # Add std deviation bars to the previous plot mean = df.mean(axis=1) std = df.std(axis=1) ax.errorbar(df.index, mean, yerr=std, fmt='-o') #fmt=None to plot bars only plt.show()

回答2:

Since the tsplot function does not provide a way to set the error bar values directly or to change the method used to calculate them, the only solution I found was to monkey patch the timeseries module:

import seaborn.timeseries

def _plot_std_bars(*args, central_data=None, ci=None, data=None, **kwargs):
    std = data.std(axis=0)
    ci = np.asarray((central_data - std, central_data + std))
    kwargs.update({"central_data": central_data, "ci": ci, "data": data})
    seaborn.timeseries._plot_ci_bars(*args, **kwargs)

def _plot_std_band(*args, central_data=None, ci=None, data=None, **kwargs):
    std = data.std(axis=0)
    ci = np.asarray((central_data - std, central_data + std))
    kwargs.update({"central_data": central_data, "ci": ci, "data": data})
    seaborn.timeseries._plot_ci_band(*args, **kwargs)

seaborn.timeseries._plot_std_bars = _plot_std_bars
seaborn.timeseries._plot_std_band = _plot_std_band

Then, to plot with the standard deviation error bars use

ax = sns.tsplot(data, err_style="std_bars", n_boot=0)

ax = sns.tsplot(data, err_style="std_band", n_boot=0)

to plot with a standard deviation band.

Edit: Inspired by this answer on SO, another (probably more sensible) approach would be to use the following instead of tsplot:

import pandas as pd
import seaborn as sns

df = pd.DataFrame.from_dict({
    "mean": data.mean(axis=0),
    "std": data.std(axis=0)
}).reset_index()

g = sns.FacetGrid(df, size=6)
ax = g.map(plt.errorbar, "index", "mean", "std")
ax.set(xlabel="", ylabel="")

Edit2: Since you asked about how tsplot calculates its confidence intervals: it uses bootstrapping to estimate the distribution of the mean value at each time point and then finds the low and high percentile values (that correspond to the confidence interval being used) from these distributions. The default confidence interval is 68% – equivalent to ± one standard deviation of the mean, assuming normal distribution. The respective low and high percentiles are 16% and 84%. You can change the confidence interval via the ci keyword argument.