# DEMO #1 OF Q LEARNING.

source("Qlearn.r")

n.steps = 500000

gamma = 0.95
alpha = 0.05
epsilon = 0.1

n.states = 10	# Positions arranged in a circle
n.actions = 3	# 1 = move to lower, 2 = stay in place, 3 = move to higher


# FUNCTION TO GENERATE INITIAL STATE.

init1 = function ()
{ sample(n.states,1)
}


# FUNCTION TO GENERATE REWARDS AND TRANSITION TO NEXT STATE.

world1 = function (s, a)
{
  if (runif(1)<0.05)
  { s = sample(n.states,1)
  }
  else
  { s = s + (a-2)
    if (s<1) s = n.states
    if (s>n.states) s = 1
  }

  if (a==2)
  { r = 0
  }
  else
  { r = as.numeric (runif(1) < s/n.states)
  }
 
  list (s=s, r=r)
}


# DO SIMULATION AND DISPLAY RESULTS.

set.seed(1)

result1 = simulate (init1, world1, gamma, alpha, epsilon, n.steps)

postscript("Qplots.ps",horiz=F,width=6.5,height=4,pointsize=9)
par(mfcol=c(3,3),mar=c(4.1,4.1,1,1))
hplot(result1$history[1:200,])
hplot(result1$history[seq(500,n.steps,by=500),])
hplot(result1$history[(n.steps-200):n.steps,])
dev.off()

print(round(result1$Q,2))