김성훈 교수님의 Reinforcement Learning 강의 lab4의 add random noise가 적용된 Q-learning 실습 예제를 구현한 소스입니다.
강의가 필요하신 분을 위해 link 남겨드립니다.
https://www.youtube.com/watch?v=VYOq-He90bE&index=7&list=PLlMkM4tgfjnKsCWav-Z2F-MMFRx-2gMGG
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 | import gym import numpy as np import matplotlib.pyplot as plt from gym.envs.registration import register register( id='FrozenLake-v3', entry_point='gym.envs.toy_text:FrozenLakeEnv', kwargs={'map_name': '4x4', 'is_slippery': False} ) env = gym.make('FrozenLake-v3') Q = np.zeros([env.observation_space.n, env.action_space.n]) dis = .99 num_episodes = 2000 rList = [] for i in range(num_episodes): state = env.reset() rAll = 0 done = False while not done: action = np.argmax(Q[state, :] + np.random.randn(1, env.action_space.n) / (i + 1)) new_state, reward, done,_ = env.step(action) Q[state,action] = reward + dis * np.max(Q[new_state,:]) rAll += reward state = new_state rList.append(rAll) print("Success rate: " + str(sum(rList)/num_episodes)) print("Final Q-Table Values") print(Q) plt.bar(range(len(rList)), rList, color="blue") plt.show() | cs |