Matlab: Program returns garbage values, Help in pr

2020-06-28 15:51发布

问题:

I have implemented the Kalman Smoothing with Expectation Maximization based on the Paper Parameter Estimation for Linear dynamical system. All notations are based on this paper. The model is an IIR (AR(2)) filter

y(t) =  0.195 *y(t-1) - 0.95*y(t-2) + w(t) 

The state space representation:

x(t+1) = a^Tx(t) + w(t)

y(t) = C(t) + v(t)

The state space model :

x(t+1) = Ax(t) + w(t)

y(t) = Cx(t) + v(t)

w(t) = N(0,Q) is the driving process noise 

v(t) = N(0,R)  is the measurement noise

Re-writing the AR model as State Space representation:

Can somebody please point out where I have done wrong so that the code works. I have followed most of the sequence and structure from https://github.com/cswetenham/pmr/blob/master/toolboxes/lds/lds.m#L110

(1) Eq(26) needs an initial value for $x0$. I supplied x0 = mean(x,2) to the function Predict. I have a doubt in this. Will x0 and hence initx be the mean of the observation y which gives a scalar or will it be 2 values (2 rows by 1 column) since the state space is AR(2). I am not sure about this.

(2) If I take x0 = mean(x,2) and Commenting off the Code after Kalman Filtering gives proper results for state estimation. It is only from smoothing that the parameter estimation is not correct. It is not correct because the new x0 = initx = x1sum/N becomes a scalar whereas when initializing it was a 2 by 1 matrix, where each row is the state.

%%% Matlab script to simulate data and process usiung Kalman for the state
%%% estimation of AR(2) time series.:  y(t) =  0.195 *y(t-1) - 0.95*y(t-2) + excite_input(t);
% Linear system representation
% x_n+1 = A x_n + Bw_n
% y_n = Cx_n + v_n
% w = N(0,Q); v = N(0,R)
clc
clear all

T = 100;
order = 2;
  a1 = 0.195;
  a2 = -0.95;

A = [ a1  a2;
      1 0 ];
C = [ 1 0 ];
B = [1;
      0];


 x =[ rand(order,1) zeros(order,T-1)];% a sequence of 100 2-d vectors



sigma_2_w =1;
sigma_2_v = 0.01;



Q=eye(order);
P=Q;



%Simulate the steady state covariance matrix P
%P = A*P*A' + B*sqrt(sigma_2_w)*B';
 P = dlyap(A,B*B');

%Simulate AR model time series, x;


sqrtW=sqrtm(sigma_2_w);
excite_input=B*sqrtW*randn(1,T);
for t = 1:T-1
    x(:,t+1) = A*x(:,t) + excite_input(t+1);
end

%noisy observation

y = C*x + sqrt(sigma_2_v)*randn(1,T);



R  = sigma_2_v ;

z = y;
%X= x';
 x0=mean(x,2);
 YHAT = zeros(1,T);
 XHAT = zeros(2,T+1);

LL=[];
converged = 0;
previous_loglik = -Inf;
Y =y;
z = Y;


N = T;
max_iter = 500;
num_iter = 0;
initx = x0;
% V1 = var(initx);
loglik = 0;
V1 = P;
 while ~converged & (num_iter <= max_iter)
  initx = x0;

  k = length(initx);
  I=eye(k);
  xtt=zeros(2,T);   Vtt=zeros(2,2,T); xtt1=zeros(2,T); Vtt1=zeros(2,2,T); xhat_s = zeros(2,T);
  xtT=zeros(2,T); VtT=zeros(2,2,T); J=zeros(2,2,T); Vtt1T=zeros(2,2,T);  Ptsum = 0;
  x1sum = 0;
  P1sum = 0;
  A1=zeros(k);
  A2=zeros(k);
  XPred = zeros(2,T);
  Ptsum=zeros(k);
  xhat = zeros(2,1);
  Pcov = zeros(k,k);
  Kcur = 0;
  YX = 0;

%KAlman Filtering

for i =1:T

[xpred, Ppred] = predict(x0,V1, A, Q);
[nu, S] = innovation(xpred, Ppred, z(i), C, R);
[xnew, P, yhat, KalmanGain] = innovation_update_LDS(A, xpred, Ppred, V1, nu, S, C);
YHAT(i) = yhat;
Phat(i) = sqrt(C*P*C');
xtt(:,i) = xnew;  %xtt is the filtered state
Vtt(:,:,i) = P; %filtered covariance
Vtt1(:,:,i) = Ppred;
XPred(:,i) = A*xtt(:,i);


end 

KC = KalmanGain*C;

% 
% %Kalman Smoothing
% 
% 

KT = KalmanGain;

% %backward pass gets you E[x(t)|y(1:T)] from E[x(t)|y(1:t)]
t=T;
xtT(:,t) = xtt(:,t);
VtT(:,:,t) = Vtt(:,:,t);


% %SMOOTHING
 for t=(T-1):-1:1
     Vtt1(:,:,t) = A*Vtt(:,:,t)*A' + Q;
     J(:,:,t) = Vtt(:,:,t)*A'*inv(Vtt1(:,:,t+1)); %Eq(31)
     xtT(:,t) =  xtt(:,t) + ((xtT(:,t+1)- XPred(:,t))'*J(:,:,t))';  % Eq(32) xsmooth  modified the transpose
     VtT(:,:,t) = Vtt(:,:,t) + J(:,:,t)*(VtT(:,:,t+1)-Vtt1(:,:,t+1))*J(:,:,t)';  % Eq(33) Vsmooth or Psmooth
     Pt=VtT(:,:,t) + xtT(:,t)*xtT(:,t)'; 
     Ptsum=Ptsum+Pt;
     YX = YX+Y(:,t)'*xtT(:,t);  %For Eq(14)
      x1sum = x1sum + xtT(:,1);
    %  gama2 = gama2 + Pt - xtT(:,1)*xtT(:,1)' - VtT(:,:,1);

end
% Pt = VtT + xtT'*xtT;

% Pt = VtT(:,:,t) + xtT(:,t)'*xtT(:,t);  %P_t,t-1 = V_t,t-1^T + x_t^T * x_t^T'


 Sum_Pt_2T= Ptsum - Pt;  %A3  gama2
 A2= Ptsum + A2; %gama1

xhat_s = xtT; %smoothed estimate of x(t)


t= T;
 Pcov=(eye(2)-KC)*A*Vtt(:,:,t-1);
 A1=A1+Pcov+xtT(:,t)'*xtT(:,t-1);


for t= (T-1):-1:2
 Pcov =(Vtt(:,:,t) + J(:,:,t)*(Pcov - A*Vtt(:,:,t)))*J(:,:,t-1)';  %Eq(34)
 A1 = A1+Pcov+xtT(:,t)'*xtT(:,t-1);
 end; 

Rterm = (Y - C*xtt);
R_result = 0.5*Rterm' * inv(R)* Rterm;
R_sum_result = sum(sum(R_result));

Qterm = xtt(:,2:T)-(A*xtt(:,1:(T-1)));
Q_result = 0.5*Qterm' * inv(Q) * Qterm;
Q_sum_result = sum(sum(Q_result));

V1term = (xtt(:,1) -initx);
V1_result = 0.5 * V1term' * inv(V1) * V1term;

loglik_t = - R_sum_result - 0.5*T*log(det(R)) - Q_sum_result - 0.5*(T-1)*log(det(Q)) -  V1_result - 0.5*log(det(V1)) - 0.5*T*log(2*pi);



%STEP 2 Re-estimate B,Q,R,initx,initV1 via ML given x(t) estimate
 C=YX'*inv(Ptsum)/N;
 A=A1*inv(A2); 
 R1term = sum(Y.*Y)'/(T);
 R2term = diag(C*YX)/T;
 R = R1term - R2term;  % R = (1/T)*sum(Y.*Y - C.*xhat_s.*Y');
 Q=(1/(T-1))*diag(diag((Sum_Pt_2T-A*(A1')))); 
initx = x1sum/N;
x0 = initx;
V1 = Pt(:,:,1) - initx*initx';
  LL=[LL loglik_t];
  num_iter = num_iter+1
converged = em_converged(loglik, previous_loglik); %subfunction below
previous_loglik = loglik_t;


 end %while not converged
A

C
Q
R

function [xpred, Ppred] = predict(x0,P, A, Q)
xpred = A*x0;
Ppred = A*P*A' + Q;
end

function [nu, S] = innovation(xpred, Ppred, y, C, R)
nu = y - C*xpred; %% innovation

S = R + C*Ppred*C'; %% innovation covariance

end

function [xnew, Pnew, yhat, K] = innovation_update_LDS(A,xpred, Ppred,V1, nu, S, C)
% invP=inv(S);
% K = Ppred*C'*invP; %% Kalman gain
% xnew = xpred + K*nu; %% new state
% Pnew = Ppred - Ppred*K*C; %% new covariance
% yhat = C*xnew; % Observed value at time step i, assuming inferred state xnew
% xhat = A*xnew + K*nu;

K = Ppred*C'*inv(S); %% Kalman gain 2 rows 1 col (scalar
xnew = xpred + K*nu; %% new state
Pnew = Ppred - Ppred*K*C; %% new covariance
 yhat = C*xnew;
VVnew = (eye(2) - K*C)*A*V1;

end

function converged = em_converged(loglik, previous_loglik, threshold)
% EM_CONVERGED Has EM converged?
% [converged, decrease] = em_converged(loglik, previous_loglik, threshold)
%
% We have converged if
% |f(t) - f(t-1)| / avg < threshold,
% where avg = (|f(t)| + |f(t-1)|)/2 and f is log lik.
% threshold defaults to 1e-4.
% This stopping criterion is from Numerical Recipes in C p42
if nargin < 3
threshold = 1e-4;
end
%log likelihood should increase
if loglik - previous_loglik < -1e-3 % allow for a little imprecision
fprintf(1, '******likelihood decreased from %6.4f to %6.4f!\n', previous_loglik,loglik);
end
delta_loglik = abs(loglik - previous_loglik);
avg_loglik = (abs(loglik) + abs(previous_loglik) + eps)/2;
if (delta_loglik / avg_loglik) < threshold
converged = 1
else converged = 0
end 

回答1:

The first place I always start when looking at kalman code is the update step, specifically the covariance update. In your code its innovation_update_LDS

The standard form you are using is Pnew = Ppred - Ppred*K*C; %% new covariance this is incorrect, it should be Pnew = Ppred - K*C*Ppred or more commonly Pnew = (I - K*C)*Ppred; where I=eye(len(K));

besides that point, I would never use that form of the equation either.Use "Josephs form"

Pnew = (eye(2) - K*C) * Ppred * (eye(2)-K*C)' + K*R*K'; 

This form is computationally stable. It guarantees the matrix will stay symmetric as it should. Using the standard form this isn't guaranteed, it has to do with rounding errors in computers but after many iterations, or whenusing a state space with a large number of features, the covariance matrix becomes nonsymmetric and incurs huge errors and ultimately cause the filter not to follow the expected trajectory at all.

Three also seems to be a few errors in you %KAlman Filtering section. I think it should look more like this

%KAlman Filtering

for i =1:T
    if (i==1)
        [xpred, Ppred] = predict(x0,V1, A, Q);
    else
        [xpred, Ppred] = predict(xtt(:,i-1),Vtt(:,:,i-1), A, Q);
    end
    [nu, S] = innovation(xpred, Ppred, z(i), C, R);
    [xnew, Pnew, yhat, KalmanGain] = innovation_update_LDS(A, xpred, Ppred, V1, nu, S, C, R);
    YHAT(i) = yhat;
    Phat(i) = sqrt(C*Pnew*C');
    xtt(:,i) = xnew;  %xtt is the filtered state
    Vtt(:,:,i) = Pnew(:,:); %filtered covariance
end

there may have been more errors, but this is all I have time to find. Good luck