Python: Leave Numpy NaN values from matplotlib hea

2019-01-19 08:53发布

问题:

This question already has an answer here:

  • Matplotlib grayscale heatmap with visually distinct “NA” squares fields 2 answers

I have a numpy array that I need to plot as a heatmap. The numpy array would also contain NaN values which I need to exclude from plotting. I was told in other posts that numpy automatically masks the NaN values in the plot, but its somehow not working for me. Here is a sample code

column_labels = list('ABCDEFGH')
row_labels = list('WXYZ')
fig, ax = plt.subplots()
data = np.array([[ 0.96753494,  0.52349944,  0.0254628 ,  0.5104103 ],
         [ 0.07320069,  0.91278731,  0.97094436,  0.70533351],
         [ 0.30162006,  0.49068337,  0.41837729,  0.71139215],
         [ 0.19786101,  0.15882713,  0.59028841,  0.06242765],
         [ 0.51505872,  0.07798389,  0.58790067,  0.44782683],
         [ 0.68975694,  0.53535385,  0.15696023,  0.35641951],
         [ 0.66481995,  0.03576846,  0.9623601 ,  0.96006395],
         [ 0.45865404,  0.50433582,  0.18182575,  0.35126449],])

data[3,:] = np.nan
heatmap = ax.pcolor(data, cmap=plt.cm.seismic)

fig.colorbar(heatmap)
# put the major ticks at the middle of each cell
ax.set_xticks(np.arange(data.shape[1])+0.5, minor=False)
ax.set_yticks(np.arange(data.shape[0])+0.5, minor=False)

# want a more natural, table-like display
ax.invert_yaxis()
ax.xaxis.tick_top()

ax.set_xticklabels(row_labels, minor=False)
ax.set_yticklabels(column_labels, minor=False)
plt.show()

The plots looks like

Clearly this is very different from the plot without the Nan which looks like

I want to avoid the NaN values completely from the legend and preferably mark it with some symbol such as X. How can I achieve the same?

回答1:

The nans interfere with pcolor determining the range of values contained in data since

In [72]: data.min(), data.max()
Out[72]: (nan, nan)

You can work around the problem by declaring the range of values yourself using np.nanmin and np.nanmax to find the minimum and maximum non-NaN values in data:

heatmap = ax.pcolor(data, cmap=plt.cm.seismic, 
                    vmin=np.nanmin(data), vmax=np.nanmax(data))

since

In [73]: np.nanmin(data), np.nanmax(data)
Out[73]: (0.025462800000000001, 0.97094435999999995)

import numpy as np
import matplotlib.pyplot as plt

column_labels = list('ABCDEFGH')
row_labels = list('WXYZ')
fig, ax = plt.subplots()
data = np.array([[ 0.96753494,  0.52349944,  0.0254628 ,  0.5104103 ],
         [ 0.07320069,  0.91278731,  0.97094436,  0.70533351],
         [ 0.30162006,  0.49068337,  0.41837729,  0.71139215],
         [ 0.19786101,  0.15882713,  0.59028841,  0.06242765],
         [ 0.51505872,  0.07798389,  0.58790067,  0.44782683],
         [ 0.68975694,  0.53535385,  0.15696023,  0.35641951],
         [ 0.66481995,  0.03576846,  0.9623601 ,  0.96006395],
         [ 0.45865404,  0.50433582,  0.18182575,  0.35126449],])

data[3,:] = np.nan
heatmap = ax.pcolor(data, cmap=plt.cm.seismic, 
                    vmin=np.nanmin(data), vmax=np.nanmax(data))
heatmap.cmap.set_under('black')

bar = fig.colorbar(heatmap, extend='both')

# put the major ticks at the middle of each cell
ax.set_xticks(np.arange(data.shape[1])+0.5, minor=False)
ax.set_yticks(np.arange(data.shape[0])+0.5, minor=False)

# want a more natural, table-like display
ax.invert_yaxis()
ax.xaxis.tick_top()

ax.set_xticklabels(row_labels, minor=False)
ax.set_yticklabels(column_labels, minor=False)
plt.show() 


Another option (based on Joe Kington's solution) would be to draw rectangular patches with hatch marks wherever data is NaN.

The above example shows that pcolor colors in cells with NaN values as though the NaNs were very negative numbers. In contrast, if you pass pcolor a masked array, pcolor leaves the masked areas transparent. Thus, you can draw hatches on the axes background patch, ax.patch, to show hatch marks on the masked areas.

import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches

column_labels = list('ABCDEFGH')
row_labels = list('WXYZ')
fig, ax = plt.subplots()
data = np.array([[ 0.96753494,  0.52349944,  0.0254628 ,  0.5104103 ],
         [ 0.07320069,  0.91278731,  0.97094436,  0.70533351],
         [ 0.30162006,  0.49068337,  0.41837729,  0.71139215],
         [ 0.19786101,  0.15882713,  0.59028841,  0.06242765],
         [ 0.51505872,  0.07798389,  0.58790067,  0.44782683],
         [ 0.68975694,  0.53535385,  0.15696023,  0.35641951],
         [ 0.66481995,  0.03576846,  0.9623601 ,  0.96006395],
         [ 0.45865404,  0.50433582,  0.18182575,  0.35126449],])

data[3,:] = np.nan
data = np.ma.masked_invalid(data)

heatmap = ax.pcolor(data, cmap=plt.cm.seismic, 
                    vmin=np.nanmin(data), vmax=np.nanmax(data))
# https://stackoverflow.com/a/16125413/190597 (Joe Kington)
ax.patch.set(hatch='x', edgecolor='black')
fig.colorbar(heatmap)

# put the major ticks at the middle of each cell
ax.set_xticks(np.arange(data.shape[1])+0.5, minor=False)
ax.set_yticks(np.arange(data.shape[0])+0.5, minor=False)

# want a more natural, table-like display
ax.invert_yaxis()
ax.xaxis.tick_top()

ax.set_xticklabels(row_labels, minor=False)
ax.set_yticklabels(column_labels, minor=False)
plt.show() 


If you have wish to use more than one type of hatch mark, say one for NaNs and another for negative values, then you could use a loop to add the hatched Rectangles:

import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches

column_labels = list('ABCDEFGH')
row_labels = list('WXYZ')
fig, ax = plt.subplots()
data = np.array([[ 0.96753494,  0.52349944,  0.0254628 ,  0.5104103 ],
         [ 0.07320069,  0.91278731,  0.97094436,  0.70533351],
         [ 0.30162006,  0.49068337,  0.41837729,  0.71139215],
         [ 0.19786101,  0.15882713,  0.59028841,  0.06242765],
         [ 0.51505872,  0.07798389,  0.58790067,  0.44782683],
         [ 0.68975694,  0.53535385,  0.15696023,  0.35641951],
         [ 0.66481995,  0.03576846,  0.9623601 ,  0.96006395],
         [ 0.45865404,  0.50433582,  0.18182575,  0.35126449],])
data -= 0.5
data[3,:] = np.nan
data = np.ma.masked_invalid(data)
heatmap = ax.pcolor(data, cmap=plt.cm.seismic, 
                    vmin=np.nanmin(data), vmax=np.nanmax(data))

# https://stackoverflow.com/a/16125413/190597 (Joe Kington)
ax.patch.set(hatch='x', edgecolor='black')

# draw a hatched rectangle wherever the data is negative
# http://matthiaseisen.com/pp/patterns/p0203/
mask = data < 0
for j, i in np.column_stack(np.where(mask)):
      ax.add_patch(
          mpatches.Rectangle(
              (i, j),     # (x,y)
              1,          # width
              1,          # height
              fill=False, 
              edgecolor='blue',
              snap=False,
              hatch='x' # the more slashes, the denser the hash lines 
          ))

fig.colorbar(heatmap)

# put the major ticks at the middle of each cell
ax.set_xticks(np.arange(data.shape[1])+0.5, minor=False)
ax.set_yticks(np.arange(data.shape[0])+0.5, minor=False)

# want a more natural, table-like display
ax.invert_yaxis()
ax.xaxis.tick_top()

ax.set_xticklabels(row_labels, minor=False)
ax.set_yticklabels(column_labels, minor=False)
plt.show()