% [llh,e2,Xrec,B,R,eoo] = mix_bernoulli_llh_error(X,P,Pi) Reconstruction error
% and log-likelihood in Bernoulli mixture
%
% Given a mixture of multivariate Bernoulli distributions with
% parameters P and Pi, mix_bernoulli_error computes the direct (B) and
% posterior probabilities or "responsibilities" (R) under that mixture
% model of a data set X. From them, it computes the reconstructed data
% set Xrec as an average of the parameters P weighted by the
% responsibilities R, the reconstruction mean squared (and "maximum
% norm") error and the log-likelihood of the model for that data set.
%
% In:
%   X: NxD binary data matrix.
%   P: MxD matrix containing the Bernoulli probabilities.
%   Pi: Mx1 vector containing the mixing parameters.
% Out:
%   llh: log-likelihood.
%   e2: reconstruction mean squared error.
%   Xrec: NxD binary data matrix containing the reconstructed data set.
%   B: NxM matrix containing the direct probabilities. B(n,i) = p(X_n|i).
%   R: NxM matrix containing the posterior probabilities. R(n,i) = p(i|X_n).
%   eoo: reconstruction mean "maximum norm" error.
%
% See also mix_bernoulli, mix_bernoulli_sample, mix_bernoulli_distrib.

% Copyright (c) 1997 by Miguel A. Carreira-Perpinan

function [llh,e2,Xrec,B,R,eoo] = mix_bernoulli_llh_error(X,P,Pi)

[N,D] = size(X);
M = length(Pi);

% ------------------------- Direct probabilities -------------------------
B = zeros(N,M);
for n=1:N
  for i=1:M
    B(n,i)=prod(P(i,:).^X(n,:)) * prod((1-P(i,:)).^(1-X(n,:)));
  end
end
% Note that for the training set p(n)=sum{B(n,i)Pi(i)} will always be
% positive (otherwise the log-likelihood, which we are trying to
% maximise, would be minus infinity). But for the test set, B(n,i) can
% be exactly 0 (not due to precision loss) for all components i in a
% given point n. This is an unavoidable consequence of degenerate
% Bernoulli distributions (having the Bernoulli parameter equal to 0
% or 1).

% ------------------------- Log-likelihood -------------------------
llh = sum(log(B*Pi));

if nargout > 1
  % -------- Posterior probabilities or responsibilities --------
  if M==1            % I have to do this because if R is Nx1 then sum(R')
    R=ones(N,1);     % gives a scalar
  else
    R = cprod(B',Pi)'; Rsum=sum(R')'; R = cdiv(R,sum(R')');
  end
  
  % Reconstructed vectors
  Xrec = R*P;
  
  % Reconstruction mean squared error
  % This is much faster than doing e = sum(diag((X-Xrec)'*(X-Xrec)))/N;
  e2 = sum(sum((X-Xrec).^2))/N;
  
  if nargout > 5
    % Reconstruction mean "maximum norm" error
    eoo = sum(max((X-Xrec)'))/N;
  end
end
