online(P,S,H,Pol,U) :- incrBestDo(P,S,H,ProgRemaining,Pol1,U1,Prob1), ( final(ProgRemaining,S,H,Pol1,U1), Pol=Pol1, U=U1 ; reward(R,S), Pol1 = (A : Rest), ( agentAction(A), deterministic(A,S), doReally(A), /* execute A in reality*/ !, /* commit to the result */ senseExo(do(A,S), SE), /* exogenous actions, if any */ decrement(H, Hor), online(ProgRemaining,SE,Hor,PolFut,UFut), Pol=(A : PolFut), U is R + UFut ; /* Or */ senseAction(A), doReally(A), /* do sensing */ !, /* commit to results of sensing*/ senseExo(do(A,S), SE), /* exogenous actions, if any */ decrement(H, Hor), online(ProgRemaining,SE,Hor,PolFut,UFut), Pol=(A : PolFut), U is R + UFut ; /* Or */ agentAction(A), stochastic(A,S,List), doReally(A), /* execute A in reality*/ !, /* commit to the result */ senseEffect(A,S,SEff), diagnose(SEff,List,SN), /* Find what happened */ senseExo(SN, SE), /* exogenous actions, if any */ decrement(H, Hor), online(ProgRemaining,SE,Hor,PolFut,UFut), Pol=(A : PolFut), U is R + UFut ) ).