Converting a mixture of gaussians to PyMC3

I am trying to learn PyMC3, I want to make a simple mixture of gaussians example. I found this example and want to convert it to pymc3 but I'm currently getting an error when trying to plot the traceplot.

n1 = 500
n2 = 200
n = n1+n2

mean1 = 21.8
mean2 = 42.0

precision = 0.1

sigma = np.sqrt(1 / precision)

# precision = 1/sigma^2
print "sigma1: %s" % sigma1
print "sigma2: %s" % sigma2

data1 = np.random.normal(mean1,sigma,n1)
data2 = np.random.normal(mean2,sigma,n2)

data = np.concatenate([data1 , data2])
#np.random.shuffle(data)

fig = plt.figure(figsize=(7, 7))
ax = fig.add_subplot(111, xlabel='x', ylabel='y', title='mixture of 2    guassians')
ax.plot(range(0,n1+n2), data, 'x', label='data')
plt.legend(loc=0)

with pm.Model() as model:
    #priors
    p = pm.Uniform( "p", 0 , 1) #this is the fraction that come from mean1 vs mean2

    ber = pm.Bernoulli( "ber", p = p) # produces 1 with proportion p.

    precision = pm.Gamma('precision', alpha=0.1, beta=0.1)

    mean1 = pm.Normal( "mean1", 0, 0.01 ) #better to use normals versus Uniforms (unless you are certain the value is  truncated at 0 and 200 
    mean2 = pm.Normal( "mean2", 0, 0.01 )

    mean = pm.Deterministic('mean', ber*mean1 + (1-ber)*mean2)

    process = pm.Normal('process', mu=mean, tau=precision, observed=data)

    # inference
    step = pm.Metropolis()
    trace = pm.sample(10000, step)
    pm.traceplot(trace)

Error:

sigma1: 3.16227766017
sigma2: 1.69030850946
 [-----------------100%-----------------] 10000 of 10000 complete in 4.4 sec
---------------------------------------------------------------------------
LinAlgError                               Traceback (most recent call last)
<ipython-input-10-eb728824de83> in <module>()
     44     step = pm.Metropolis()
     45     trace = pm.sample(10000, step)
---> 46     pm.traceplot(trace)

/usr/lib/python2.7/site-packages/pymc-3.0-py2.7.egg/pymc/plots.pyc in traceplot(trace, vars, figsize, lines, combined, grid)
     70                 ax[i, 0].set_xlim(mind - .5, maxd + .5)
     71             else:
---> 72                 kdeplot_op(ax[i, 0], d)
     73             ax[i, 0].set_title(str(v))
     74             ax[i, 0].grid(grid)

/usr/lib/python2.7/site-packages/pymc-3.0-py2.7.egg/pymc/plots.pyc in kdeplot_op(ax, data)
     94     for i in range(data.shape[1]):
     95         d = data[:, i]
---> 96         density = kde.gaussian_kde(d)
     97         l = np.min(d)
     98         u = np.max(d)

/usr/lib64/python2.7/site-packages/scipy/stats/kde.pyc in __init__(self, dataset, bw_method)
    186 
    187         self.d, self.n = self.dataset.shape
--> 188         self.set_bandwidth(bw_method=bw_method)
    189 
    190     def evaluate(self, points):

/usr/lib64/python2.7/site-packages/scipy/stats/kde.pyc in set_bandwidth(self, bw_method)
    496             raise ValueError(msg)
    497 
--> 498         self._compute_covariance()
    499 
    500     def _compute_covariance(self):

/usr/lib64/python2.7/site-packages/scipy/stats/kde.pyc in _compute_covariance(self)
    507             self._data_covariance = atleast_2d(np.cov(self.dataset, rowvar=1,
    508                                                bias=False))
--> 509             self._data_inv_cov = linalg.inv(self._data_covariance)
    510 
    511         self.covariance = self._data_covariance * self.factor**2

/usr/lib64/python2.7/site-packages/scipy/linalg/basic.pyc in inv(a, overwrite_a, check_finite)
    381         inv_a, info = getri(lu, piv, lwork=lwork, overwrite_lu=1)
    382     if info > 0:
--> 383         raise LinAlgError("singular matrix")
    384     if info < 0:
    385         raise ValueError('illegal value in %d-th argument of internal '

LinAlgError: singular matrix

标签： python mcmc pymc

3条回答

戒情不戒烟

2楼-- · 2019-06-07 00:51

I know this issue is old, but I am trying differente examples of PyMC3 usages to get used to modeling in PyMC3. The answer as given above does not work in current version 1.0 of PyMC3 (It does not distringuish the two means correctly). The minimum changes I had to do in order to make it work were the following:

1)
# mean = pm.Normal("mean", 0, 0.01, shape=2 )
mean = pm.Uniform('mean', 15, 60, shape=2)
2)
# step2 = pm.BinaryMetropolis([ber])
step2 = pm.ElemwiseCategorical(vars=[ber], values=[0, 1])

Just in case anybody else is having a similar problem.

0人赞添加讨论(0) 举报

地球回转人心会变

3楼-- · 2019-06-07 00:54

And an even simpler and quicker version is as follows:

with pm.Model() as model2:
    p = pm.Beta( "p", 1., 1.)    
    means = pm.Uniform('mean', 15, 60, shape=2)
    sigma = pm.Uniform('sigma', 0, 20, testval=5)

    process = pm.NormalMixture('obs', tt.stack([p, 1-p]), means, sd=sigma, observed=data)

with model2:
    step = pm.Metropolis()
    trace = pm.sample(10000, step=step)

0人赞添加讨论(0) 举报

不美不萌又怎样

4楼-- · 2019-06-07 00:58

Thanks to Fonnesbeck for answering this on the github issue tracker:

https://github.com/pymc-devs/pymc3/issues/452

here is the updated code:

with pm.Model() as model:
    #priors
    p = pm.Uniform( "p", 0 , 1) #this is the fraction that come from mean1 vs mean2

    ber = pm.Bernoulli( "ber", p = p, shape=len(data)) # produces 1 with proportion p.

    sigma = pm.Uniform('sigma', 0, 100)
    precision = sigma**-2

    mean = pm.Normal( "mean", 0, 0.01, shape=2 )

    mu = pm.Deterministic('mu', mean[ber])

    process = pm.Normal('process', mu=mu, tau=precision, observed=data)

with model:
    step1 = pm.Metropolis([p, sigma, mean])
    step2 = pm.BinaryMetropolis([ber])
    trace = pm.sample(10000, [step1, step2])

You need to use BinaryMetropolis when inferring a Bernoulli random variable

0人赞添加讨论(0) 举报

Converting a mixture of gaussians to PyMC3

采纳回答

编辑标签

举报内容

检举类型

检举原因

检举说明(必填)

打开微信“扫一扫”，打开网页后点击屏幕右上角分享按钮

付费偷看金额在0.1-10元之间