function [loggamma,loglik] = phmm_fb2(seq,delta,logA) % [loggamma,loglik] = phmm_fb2(seq,delta,logA) % % does forward-backward in a profile HMM given a single sequence seq % input parameters: logs of emission parameters in logA % assumes transition structure is prob(enter a match state)=delta % output parameters: loggamma holds the log the marginal posteriors % loglik holds the log likelihood of each sequence % % if there are Q symbols (e.g. Q=4 for {A,T,C,G}) and % the template size is N, then % each element of seq should be between 1 and Q % and logA should be size Q by 2N (the first N cols correspond to % match states, the last N correspond to insert states) T=length(seq); [M,N2]=size(logA); N=N2/2; % N is template size match=1:N; ins=match+N; del=ins+N; % hold the state indices ld1=log(delta); ld2=log((1-delta)/2); % since we use these a lot % ALPHA (FORWARD) RECURSION logalpha=NaN+zeros(3*N,T); % initialize with NaN as a debugging trick % ...if any NaN's remain, that is BAD! % tt=1 is a bit tricky... %to generate seq(1) only, and end up in a match/ins, you must %traverse only delete states (zero or more) before the match/ins logalpha(match(1),1) = ld1+logA(seq(1),match(1)); logalpha(ins(1),1) = logsum(ld2+logA(seq(1),ins(1))+[0,ld2],2); for nn=2:N logalpha(match(nn),1)=ld2*(nn-1)+ld1+logA(seq(1),match(nn)); logalpha(ins(nn),1)=ld2*(nn+1)+logA(seq(1),ins(nn)); end % to generate seq(1) only, and end up in a del state, you must % visit exactly one match/ins, with zero or more del states % before and zero or more del states after logalpha(del(1),1)=-Inf; % you can't generate seq(1) and end in del(1) for nn=2:N logalpha(del(nn),1)=logsum(ld2+ [... logalpha(match(nn-1),1),logalpha(ins(nn-1),1),... logalpha(del(nn-1),1) ],2); end % tt=2 and above are more straightforward... for tt=2:T logalpha(match(1),tt)=-Inf; % can't end in match(1) if tt>1 logalpha(ins(1),tt)=logsum(logA(seq(tt),ins(1))+[... logalpha(match(1),tt-1)+ld2,logalpha(ins(1),tt-1)+ld2 ],2); logalpha(del(1),tt)=-Inf; % you can't end in del(1) if tt>0 for nn=2:N %---match states--- logalpha(match(nn),tt)=logsum(logA(seq(tt),match(nn))+ld1+ [... logalpha(match(nn-1),tt-1), logalpha(ins(nn-1),tt-1), ... logalpha(del(nn-1),tt-1) ],2); %---insert states--- logalpha(ins(nn),tt)=logsum(logA(seq(tt),ins(nn))+ld2+ [... logalpha(match(nn),tt-1), logalpha(ins(nn),tt-1), ... logalpha(del(nn),tt-1) ],2); %---delete states--- logalpha(del(nn),tt)=logsum(ld2+ [... logalpha(match(nn-1),tt),logalpha(ins(nn-1),tt),... logalpha(del(nn-1),tt) ],2); end if(any(isnan(logalpha(:,tt)))) fprintf(1,'WARNING: Nan in logalpha at time %d\n',tt); %keyboard end end % compute log lik based on log alphas loglik=logsum([logalpha(match(N),T),logalpha(ins(N),T),logalpha(del(N),T)],2); % BETA (BACKWARD) RECURSION logbeta=NaN+zeros(3*N,T); % initialize with NaN as a debugging trick logbeta(del(N),T)=log((1+delta)/2); logbeta(match(N),T)=log((1+delta)/2); logbeta(ins(N),T)=log((1+delta)/2); for nn=(N-1):-1:1 logbeta(del(nn),T)=ld2+logbeta(del(nn+1),T); logbeta(match(nn),T)=ld2+logbeta(del(nn+1),T); logbeta(ins(nn),T)=ld2+logbeta(del(nn+1),T); end for tt=(T-1):-1:1 logbeta(del(N),tt)=logbeta(ins(N),tt+1) + ld2 + logA(seq(tt+1),ins(N)); logbeta(match(N),tt)=logbeta(ins(N),tt+1) + ld2 + logA(seq(tt+1),ins(N)); logbeta(ins(N),tt)=logbeta(ins(N),tt+1) + ld2 + logA(seq(tt+1),ins(N)); for nn=(N-1):-1:1 %---delete states--- logbeta(del(nn),tt) = logsum([logbeta(match(nn+1),tt+1)+... ld1+logA(seq(tt+1),match(nn+1)), ... logbeta(ins(nn),tt+1)+ld2+logA(seq(tt+1),ins(nn)),... logbeta(del(nn+1),tt)+ld2 ],2); %---match states--- logbeta(match(nn),tt) = logsum([logbeta(match(nn+1),tt+1)+... ld1+logA(seq(tt+1),match(nn+1)), ... logbeta(ins(nn),tt+1)+ld2+logA(seq(tt+1),ins(nn)),... logbeta(del(nn+1),tt)+ld2 ],2); %---insert states--- logbeta(ins(nn),tt) = logsum([logbeta(match(nn+1),tt+1)+... ld1+logA(seq(tt+1),match(nn+1)), ... logbeta(ins(nn),tt+1)+ld2+logA(seq(tt+1),ins(nn)),... logbeta(del(nn+1),tt)+ld2 ],2); end end % finally, compute log gammas loggamma = logalpha+logbeta-loglik;