% [lambda,phi,scores,llh,Sigma,code,i,gradn] = fa(S,N,L[,me,g,s,tol,max_it,lambda0,phi0])
%
% Computes maximum likelihood factor analysis by one of these methods:
%
% - Rao (1953). Reference: D. Morrison: Multivariate Statistical Methods
%   3rd ed., pp. 357-362. McGraw-Hill, 1990.
%
% - EM algorithm (Rubin and Thayer 1982).
%
% In:
%   S: DxD covariance matrix (>0).
%   N: number of data vectors (necessary to compute the log-likelihood).
%   L: number of factors (>=0).
%   me: method used (0: Rao, 1: EM; default 1).
%   g: starting point for the iterative procedure (0: principal component
%      solution, 1: principal component with structure removal, 2: random
%      scaled, 3: LISREL guess, 4: provided in lambda0, phi0; default
%      3).
%   s: factor scores (0: pseudoinverse, 1: Thomson, 2: Bartlett).
%      Default 1.
%   tol: minimum relative increase in log-likelihood to keep iterating
%      (default 1e-7 = 7 decimal places).
%   max_it: maximum number of iterations (default 400).
%   lambda0: initial loadings (if s=4).
%   phi0: initial uniquenesses (if s=4).
% Out:
%   lambda: DxL matrix of loadings.
%   phi: Dx1 vector of uniquenesses.
%   scores: LxD factor score matrix.
%   llh: log-likelihood curve of the parameters lambda and phi given the
%      data as a function of the iteration number.
%   Sigma: DxD symmetric matrix containing the model covariance matrix 
%      for factor analysis, i.e. lambda*lambda'+diag(phi). This is
%      useful to compute the log-likelihood under the same model of a
%      different data set using the function llh_normal.
%   code: stopping reason (0: tolerance achieved, 1: maximum number of
%      iterations reached, 2: Heywood case; 3: log-likelihood decreases
%      during learning, probably due to precision loss near maximum).
%   i: iterations performed.
%   gradn: square root of the average squared value of the log-likelihood
%      gradient.
%
% See also fa, fa_scores, fa_guess, lin_proj_error, llh_normal.

% Copyright (c) 1997 by Miguel A. Carreira-Perpinan
%
% Ghahramani's ffa.m function performs exactly the same EM algorithm but:
%   1) He uses a random starting point.
%   2) He implements the EM iteration in a different way to make it
%      faster (my EM iteration takes 1.5 times the time his takes).

function [lambda,phi,scores,llh,Sigma,code,i,gradn] = fa(S,N,L,me,g,s,tol,max_it,lambda0,phi0)

% Argument defaults
if nargin==3 me=1; end;
if nargin<=4 g=3; end;
if nargin<=5 s=1; end;
if nargin<=6 tol=1e-7; end;
if nargin<=7 max_it=400; end;

[D,D] = size(S);

% No loadings
if L==0
  lambda = [];
  scores = [];
  phi = diag(S);
  code = 0;
  i = 0;
  Sigma = diag(phi);
  llh = -N/2*(D*(1+log(2*pi))+sum(log(diag(S))));
  gradn = 0;
  return;
end

% Starting point
if g<4
  [lambda,phi] = fa_guess(S,L,g);
else
  lambda = lambda0;
  phi = phi0;
end

% Log-likelihood of starting point
if me==0 | any(phi<=0)	% Let's try to invert DxD matrix (faster)
  Sigma_inv = inv( lambda*lambda'+diag(phi) );
else		% E Step for the first EM iteration; invert LxL matrix (slower)
  phi_inv = diag(1./phi);
  Delta = inv( eye(L) + lambda' * phi_inv * lambda );
  delta = phi_inv * lambda * Delta;
  Sigma_inv = ( eye(D) - delta * lambda' ) * phi_inv;
end
llh = [-N/2 * ( D*log(2*pi) - log(det(Sigma_inv)) + trace(S*Sigma_inv) )];

% Check abnormal conditions before starting the iterative procedure
if any(phi<=0)		% Any uniqueness=0 ? (rank(S) <= L for the princ. fac.)
  code = 2;
  i = 0;
elseif max_it<1		% Do not iterate?
  code = 1;
  i = 0;
else			% Start iterating
  code = -1;
  i = 1;
end;

while code<0
  llh_old = llh(length(llh));

  % New parameters
  if me==0
    [A,J,A] = svd( diag(1./sqrt(phi)) * (S-diag(phi)) * diag(1./sqrt(phi)) );
    lambda = diag(sqrt(phi)) * A(:,1:L) * sqrt(J(1:L,1:L));
    phi = diag(S-lambda*lambda');
  else				% M step
    lambda = S * delta * inv( Delta + delta' * S * delta );
    phi = diag( S - S * delta * lambda' );
  end

  % Log-likelihood
  if me==0 | any(phi<=0)	% Let's try to invert DxD matrix (faster)
    Sigma_inv = inv( lambda*lambda'+diag(phi) );
  else				% E Step; invert LxL matrix (slower)
    phi_inv = diag(1./phi);
    Delta = inv( eye(L) + lambda' * phi_inv * lambda );
    delta = phi_inv * lambda * Delta;
    Sigma_inv = ( eye(D) - delta * lambda' ) * phi_inv;
  end
  llh = [llh -N/2 *( D*log(2*pi) - log(det(Sigma_inv)) + trace(S*Sigma_inv) )];

  if any(phi<=0)
    code = 2;			% Heywood case
  elseif llh(length(llh))<llh_old
    code = 3;			% Log-likelihood not monotonic
  elseif abs(llh(length(llh))-llh_old)<tol*abs(llh(length(llh)))
    code = 0;			% Relative error < tol => Tolerance achieved
  elseif i>=max_it
    code = 1;			% Max. no. iterations reached
  else
    i = i + 1;			% Continue iterating
  end
end

% Factor scores
scores = fa_scores(S,lambda,s,phi);

% Model covariance matrix
Sigma = lambda*lambda'+diag(phi);

% Log-likelihood gradient
if nargout > 7
  grad = -N * [ Sigma_inv*(eye(D)-S*Sigma_inv)*lambda ...
               0.5*diag(Sigma_inv*(eye(D)-S*Sigma_inv)) ];

  % Compute norm of the log-likelihood gradient
  gradn = sqrt(trace(grad'*grad)/(D*(L+1)));
end

