I have implemented the Kalman Smoothing with Expectation Maximization based on the Paper Parameter Estimation for Linear dynamical system. All notations are based on this paper. The model is an IIR (AR(2)) filter
y(t) = 0.195 *y(t-1) - 0.95*y(t-2) + w(t)
The state space representation:
x(t+1) = a^Tx(t) + w(t)
y(t) = C(t) + v(t)
The state space model :
x(t+1) = Ax(t) + w(t)
y(t) = Cx(t) + v(t)
w(t) = N(0,Q) is the driving process noise
v(t) = N(0,R) is the measurement noise
Re-writing the AR model as State Space representation:
Can somebody please point out where I have done wrong so that the code works. I have followed most of the sequence and structure from https://github.com/cswetenham/pmr/blob/master/toolboxes/lds/lds.m#L110
(1) Eq(26) needs an initial value for $x0$. I supplied x0 = mean(x,2)
to the function Predict
. I have a doubt in this. Will x0
and hence initx
be the mean of the observation y
which gives a scalar or will it be 2 values (2 rows by 1 column) since the state space is AR(2). I am not sure about this.
(2) If I take x0 = mean(x,2)
and Commenting off the Code after Kalman Filtering gives proper results for state estimation. It is only from smoothing that the parameter estimation is not correct. It is not correct because the new x0 = initx = x1sum/N
becomes a scalar whereas when initializing it was a 2 by 1 matrix, where each row is the state.
%%% Matlab script to simulate data and process usiung Kalman for the state
%%% estimation of AR(2) time series.: y(t) = 0.195 *y(t-1) - 0.95*y(t-2) + excite_input(t);
% Linear system representation
% x_n+1 = A x_n + Bw_n
% y_n = Cx_n + v_n
% w = N(0,Q); v = N(0,R)
clc
clear all
T = 100;
order = 2;
a1 = 0.195;
a2 = -0.95;
A = [ a1 a2;
1 0 ];
C = [ 1 0 ];
B = [1;
0];
x =[ rand(order,1) zeros(order,T-1)];% a sequence of 100 2-d vectors
sigma_2_w =1;
sigma_2_v = 0.01;
Q=eye(order);
P=Q;
%Simulate the steady state covariance matrix P
%P = A*P*A' + B*sqrt(sigma_2_w)*B';
P = dlyap(A,B*B');
%Simulate AR model time series, x;
sqrtW=sqrtm(sigma_2_w);
excite_input=B*sqrtW*randn(1,T);
for t = 1:T-1
x(:,t+1) = A*x(:,t) + excite_input(t+1);
end
%noisy observation
y = C*x + sqrt(sigma_2_v)*randn(1,T);
R = sigma_2_v ;
z = y;
%X= x';
x0=mean(x,2);
YHAT = zeros(1,T);
XHAT = zeros(2,T+1);
LL=[];
converged = 0;
previous_loglik = -Inf;
Y =y;
z = Y;
N = T;
max_iter = 500;
num_iter = 0;
initx = x0;
% V1 = var(initx);
loglik = 0;
V1 = P;
while ~converged & (num_iter <= max_iter)
initx = x0;
k = length(initx);
I=eye(k);
xtt=zeros(2,T); Vtt=zeros(2,2,T); xtt1=zeros(2,T); Vtt1=zeros(2,2,T); xhat_s = zeros(2,T);
xtT=zeros(2,T); VtT=zeros(2,2,T); J=zeros(2,2,T); Vtt1T=zeros(2,2,T); Ptsum = 0;
x1sum = 0;
P1sum = 0;
A1=zeros(k);
A2=zeros(k);
XPred = zeros(2,T);
Ptsum=zeros(k);
xhat = zeros(2,1);
Pcov = zeros(k,k);
Kcur = 0;
YX = 0;
%KAlman Filtering
for i =1:T
[xpred, Ppred] = predict(x0,V1, A, Q);
[nu, S] = innovation(xpred, Ppred, z(i), C, R);
[xnew, P, yhat, KalmanGain] = innovation_update_LDS(A, xpred, Ppred, V1, nu, S, C);
YHAT(i) = yhat;
Phat(i) = sqrt(C*P*C');
xtt(:,i) = xnew; %xtt is the filtered state
Vtt(:,:,i) = P; %filtered covariance
Vtt1(:,:,i) = Ppred;
XPred(:,i) = A*xtt(:,i);
end
KC = KalmanGain*C;
%
% %Kalman Smoothing
%
%
KT = KalmanGain;
% %backward pass gets you E[x(t)|y(1:T)] from E[x(t)|y(1:t)]
t=T;
xtT(:,t) = xtt(:,t);
VtT(:,:,t) = Vtt(:,:,t);
% %SMOOTHING
for t=(T-1):-1:1
Vtt1(:,:,t) = A*Vtt(:,:,t)*A' + Q;
J(:,:,t) = Vtt(:,:,t)*A'*inv(Vtt1(:,:,t+1)); %Eq(31)
xtT(:,t) = xtt(:,t) + ((xtT(:,t+1)- XPred(:,t))'*J(:,:,t))'; % Eq(32) xsmooth modified the transpose
VtT(:,:,t) = Vtt(:,:,t) + J(:,:,t)*(VtT(:,:,t+1)-Vtt1(:,:,t+1))*J(:,:,t)'; % Eq(33) Vsmooth or Psmooth
Pt=VtT(:,:,t) + xtT(:,t)*xtT(:,t)';
Ptsum=Ptsum+Pt;
YX = YX+Y(:,t)'*xtT(:,t); %For Eq(14)
x1sum = x1sum + xtT(:,1);
% gama2 = gama2 + Pt - xtT(:,1)*xtT(:,1)' - VtT(:,:,1);
end
% Pt = VtT + xtT'*xtT;
% Pt = VtT(:,:,t) + xtT(:,t)'*xtT(:,t); %P_t,t-1 = V_t,t-1^T + x_t^T * x_t^T'
Sum_Pt_2T= Ptsum - Pt; %A3 gama2
A2= Ptsum + A2; %gama1
xhat_s = xtT; %smoothed estimate of x(t)
t= T;
Pcov=(eye(2)-KC)*A*Vtt(:,:,t-1);
A1=A1+Pcov+xtT(:,t)'*xtT(:,t-1);
for t= (T-1):-1:2
Pcov =(Vtt(:,:,t) + J(:,:,t)*(Pcov - A*Vtt(:,:,t)))*J(:,:,t-1)'; %Eq(34)
A1 = A1+Pcov+xtT(:,t)'*xtT(:,t-1);
end;
Rterm = (Y - C*xtt);
R_result = 0.5*Rterm' * inv(R)* Rterm;
R_sum_result = sum(sum(R_result));
Qterm = xtt(:,2:T)-(A*xtt(:,1:(T-1)));
Q_result = 0.5*Qterm' * inv(Q) * Qterm;
Q_sum_result = sum(sum(Q_result));
V1term = (xtt(:,1) -initx);
V1_result = 0.5 * V1term' * inv(V1) * V1term;
loglik_t = - R_sum_result - 0.5*T*log(det(R)) - Q_sum_result - 0.5*(T-1)*log(det(Q)) - V1_result - 0.5*log(det(V1)) - 0.5*T*log(2*pi);
%STEP 2 Re-estimate B,Q,R,initx,initV1 via ML given x(t) estimate
C=YX'*inv(Ptsum)/N;
A=A1*inv(A2);
R1term = sum(Y.*Y)'/(T);
R2term = diag(C*YX)/T;
R = R1term - R2term; % R = (1/T)*sum(Y.*Y - C.*xhat_s.*Y');
Q=(1/(T-1))*diag(diag((Sum_Pt_2T-A*(A1'))));
initx = x1sum/N;
x0 = initx;
V1 = Pt(:,:,1) - initx*initx';
LL=[LL loglik_t];
num_iter = num_iter+1
converged = em_converged(loglik, previous_loglik); %subfunction below
previous_loglik = loglik_t;
end %while not converged
A
C
Q
R
function [xpred, Ppred] = predict(x0,P, A, Q)
xpred = A*x0;
Ppred = A*P*A' + Q;
end
function [nu, S] = innovation(xpred, Ppred, y, C, R)
nu = y - C*xpred; %% innovation
S = R + C*Ppred*C'; %% innovation covariance
end
function [xnew, Pnew, yhat, K] = innovation_update_LDS(A,xpred, Ppred,V1, nu, S, C)
% invP=inv(S);
% K = Ppred*C'*invP; %% Kalman gain
% xnew = xpred + K*nu; %% new state
% Pnew = Ppred - Ppred*K*C; %% new covariance
% yhat = C*xnew; % Observed value at time step i, assuming inferred state xnew
% xhat = A*xnew + K*nu;
K = Ppred*C'*inv(S); %% Kalman gain 2 rows 1 col (scalar
xnew = xpred + K*nu; %% new state
Pnew = Ppred - Ppred*K*C; %% new covariance
yhat = C*xnew;
VVnew = (eye(2) - K*C)*A*V1;
end
function converged = em_converged(loglik, previous_loglik, threshold)
% EM_CONVERGED Has EM converged?
% [converged, decrease] = em_converged(loglik, previous_loglik, threshold)
%
% We have converged if
% |f(t) - f(t-1)| / avg < threshold,
% where avg = (|f(t)| + |f(t-1)|)/2 and f is log lik.
% threshold defaults to 1e-4.
% This stopping criterion is from Numerical Recipes in C p42
if nargin < 3
threshold = 1e-4;
end
%log likelihood should increase
if loglik - previous_loglik < -1e-3 % allow for a little imprecision
fprintf(1, '******likelihood decreased from %6.4f to %6.4f!\n', previous_loglik,loglik);
end
delta_loglik = abs(loglik - previous_loglik);
avg_loglik = (abs(loglik) + abs(previous_loglik) + eps)/2;
if (delta_loglik / avg_loglik) < threshold
converged = 1
else converged = 0
end