# DEMO #1 OF Q LEARNING. source("Qlearn.r") n.steps = 500000 gamma = 0.95 alpha = 0.05 epsilon = 0.1 n.states = 10 # Positions arranged in a circle n.actions = 3 # 1 = move to lower, 2 = stay in place, 3 = move to higher # FUNCTION TO GENERATE INITIAL STATE. init1 = function () { sample(n.states,1) } # FUNCTION TO GENERATE REWARDS AND TRANSITION TO NEXT STATE. world1 = function (s, a) { if (runif(1)<0.05) { s = sample(n.states,1) } else { s = s + (a-2) if (s<1) s = n.states if (s>n.states) s = 1 } if (a==2) { r = 0 } else { r = as.numeric (runif(1) < s/n.states) } list (s=s, r=r) } # DO SIMULATION AND DISPLAY RESULTS. set.seed(1) result1 = simulate (init1, world1, gamma, alpha, epsilon, n.steps) postscript("Qplots.ps",horiz=F,width=6.5,height=4,pointsize=9) par(mfcol=c(3,3),mar=c(4.1,4.1,1,1)) hplot(result1$history[1:200,]) hplot(result1$history[seq(500,n.steps,by=500),]) hplot(result1$history[(n.steps-200):n.steps,]) dev.off() print(round(result1$Q,2))