Commit 240251c0 authored by hoschar's avatar hoschar
Browse files

my half solution

parent efdd32b3
import numpy as np
import random
def q_learning(env):
"""
Please modify the body of this function according to the description in exercise 4.1
"""
raise NotImplementedError
eps = 0.2
lRate = 0.055
discFac = 0.9
S = env.getNumStates()
A = env.getNumActions()
qval = np.zeros((S,A))
stepsTaken = 0
while (True):
state = env.reset()
for i in range(10000):
action = 0
epsR = random.uniform(0,1)
if (epsR >= eps):
action = random.randint(0, A-1)
else:
for ac in range(A):
if (qval[state][ac] > qval[state][action]):
action = ac
(reward, nState, done) = env.step(action)
stepsTaken += 1
maxnew = qval[nState][0]
for qv in qval[nState]:
if (qv > maxnew):
maxnew = qv
qval[state][action] += lRate * (reward + discFac * maxnew - qval[state][action])
state = nState
if done or stepsTaken == 10000:
break
if stepsTaken == 10000:
break
return qval
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment