Using Datetimes with Seaborn's Regplot

2019-03-29 20:00发布

问题:

I'm working in Jupyter/IPython to plot an amount of Words per Day, but am having trouble using datetimes with Regplot in Seaborn. Regplot by itself apparently does not support regression against date data, though what I am trying to accomplish does not necessarily require a workaround for Regplot - perhaps just a way of formatting the x-axis labels.

A minimal working example, using simple timestamps:

%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt 
import matplotlib.dates as dates
import seaborn as sns
import time
import datetime
import radar
sns.set(style="whitegrid", color_codes=True)

data = pd.DataFrame([])

for i in np.arange(1, 10):
    date =  radar.random_datetime(start='2016-05-20', stop='2016-05-25')
    data = data.append(pd.DataFrame({'Date': time.mktime(date.timetuple()), 'Words': i + 100}, index=[0]), ignore_index=True)

points = plt.scatter(x = data['Date'], y = data["Words"], c=data["Words"], s=75, cmap="BrBG")
plt.colorbar(points)
sns.regplot(x = data['Date'], y = data["Words"], data=data, scatter=False, color='r')

Which renders a scatterplot with an overlaid trend-line:

But with the dates as datetimes:

points = plt.scatter(x = pd.to_datetime(data['Date'], unit='s').dt.to_pydatetime(), y = data["Words"], c=data["Words"], s=75, cmap="BrBG")
plt.colorbar(points)
sns.regplot(x = pd.to_datetime(data['Date'], unit='s').dt.to_pydatetime(), y = data["Words"], data=data, scatter=False, color='r')

it returns with the following errors:

---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-7-d6488afe3dcb> in <module>()
      1 points = plt.scatter(x = pd.to_datetime(data['Date'], unit='s').dt.to_pydatetime(), y = data["Words"], c=data["Words"], s=75, cmap="BrBG")
      2 plt.colorbar(points)
----> 3 sns.regplot(x = pd.to_datetime(data['Date'], unit='s').dt.to_pydatetime(), y = data["Words"], data=data, scatter=False, color='r')

C:\Python\WinPython-64bit-3.5.2.2Qt5\python-3.5.2.amd64\lib\site-packages\seaborn\linearmodels.py in regplot(x, y, data, x_estimator, x_bins, x_ci, scatter, fit_reg, ci, n_boot, units, order, logistic, lowess, robust, logx, x_partial, y_partial, truncate, dropna, x_jitter, y_jitter, label, color, marker, scatter_kws, line_kws, ax)
    777     scatter_kws["marker"] = marker
    778     line_kws = {} if line_kws is None else copy.copy(line_kws)
--> 779     plotter.plot(ax, scatter_kws, line_kws)
    780     return ax
    781 

C:\Python\WinPython-64bit-3.5.2.2Qt5\python-3.5.2.amd64\lib\site-packages\seaborn\linearmodels.py in plot(self, ax, scatter_kws, line_kws)
    330             self.scatterplot(ax, scatter_kws)
    331         if self.fit_reg:
--> 332             self.lineplot(ax, line_kws)
    333 
    334         # Label the axes

C:\Python\WinPython-64bit-3.5.2.2Qt5\python-3.5.2.amd64\lib\site-packages\seaborn\linearmodels.py in lineplot(self, ax, kws)
    375 
    376         # Fit the regression model
--> 377         grid, yhat, err_bands = self.fit_regression(ax)
    378 
    379         # Get set default aesthetics

C:\Python\WinPython-64bit-3.5.2.2Qt5\python-3.5.2.amd64\lib\site-packages\seaborn\linearmodels.py in fit_regression(self, ax, x_range, grid)
    207             yhat, yhat_boots = self.fit_logx(grid)
    208         else:
--> 209             yhat, yhat_boots = self.fit_fast(grid)
    210 
    211         # Compute the confidence interval at each grid point

C:\Python\WinPython-64bit-3.5.2.2Qt5\python-3.5.2.amd64\lib\site-packages\seaborn\linearmodels.py in fit_fast(self, grid)
    222         grid = np.c_[np.ones(len(grid)), grid]
    223         reg_func = lambda _x, _y: np.linalg.pinv(_x).dot(_y)
--> 224         yhat = grid.dot(reg_func(X, y))
    225         if self.ci is None:
    226             return yhat, None

C:\Python\WinPython-64bit-3.5.2.2Qt5\python-3.5.2.amd64\lib\site-packages\seaborn\linearmodels.py in <lambda>(_x, _y)
    221         X, y = np.c_[np.ones(len(self.x)), self.x], self.y
    222         grid = np.c_[np.ones(len(grid)), grid]
--> 223         reg_func = lambda _x, _y: np.linalg.pinv(_x).dot(_y)
    224         yhat = grid.dot(reg_func(X, y))
    225         if self.ci is None:

C:\Python\WinPython-64bit-3.5.2.2Qt5\python-3.5.2.amd64\lib\site-packages\numpy\linalg\linalg.py in pinv(a, rcond)
   1614     a, wrap = _makearray(a)
   1615     _assertNoEmpty2d(a)
-> 1616     a = a.conjugate()
   1617     u, s, vt = svd(a, 0)
   1618     m = u.shape[0]

AttributeError: 'datetime.datetime' object has no attribute 'conjugate'

Though the scatterplot does render with the datetimes well-formatted:

Is there a way to use datetimes with Regplot, or to use the timestamps but format the labels on the x-axis as dates?

回答1:

You can get the values of the timestamps at the locations of the xticks, and then convert them to your desired format.

ax = plt.gca()
xticks = ax.get_xticks()
xticks_dates = [datetime.datetime.fromtimestamp(x).strftime('%Y-%m-%d %H:%M:%S') for x in xticks]
ax.set_xticklabels(xticks_dates)