online(P,S,H,Pol,U) :-
incrBestDo(P,S,H,ProgRemaining,Pol1,U1,Prob1),
( final(ProgRemaining,S,H,Pol1,U1), Pol=Pol1, U=U1 ;
reward(R,S), Pol1 = (A : Rest),
( agentAction(A), deterministic(A,S),
doReally(A), /* execute A in reality*/
!, /* commit to the result */
senseExo(do(A,S), SE), /* exogenous actions, if any */
decrement(H, Hor),
online(ProgRemaining,SE,Hor,PolFut,UFut),
Pol=(A : PolFut), U is R + UFut ; /* Or */
senseAction(A),
doReally(A), /* do sensing */
!, /* commit to results of sensing*/
senseExo(do(A,S), SE), /* exogenous actions, if any */
decrement(H, Hor),
online(ProgRemaining,SE,Hor,PolFut,UFut),
Pol=(A : PolFut), U is R + UFut ; /* Or */
agentAction(A), stochastic(A,S,List),
doReally(A), /* execute A in reality*/
!, /* commit to the result */
senseEffect(A,S,SEff),
diagnose(SEff,List,SN), /* Find what happened */
senseExo(SN, SE), /* exogenous actions, if any */
decrement(H, Hor),
online(ProgRemaining,SE,Hor,PolFut,UFut),
Pol=(A : PolFut), U is R + UFut
)
).