import numpy as np
import matplotlib.pyplot as plt
alpha = 0.1
q_A = 0
class Arm:
def __init__(self, loc=0, scale=1):
self.loc = loc
self.scale = scale
def play(self):
return np.random.normal(self.loc, self.scale)
def change(self):
self.loc += np.random.normal(0, 0.01)
class Env:
def __init__(self, arm_num=10, loc=0, scale=1):
arm_list = []
for i in range(arm_num):
arm_list.append(Arm(loc, scale))
self.arm_list = arm_list
def act(self, action):
self.arm_list[action].change()
return self.arm_list[action].play()
class Act:
def __init__(self, step_size=lambda x:0.1, act_num=10, epsilon=0.1):
self.step_size = step_size
self.epsilon = epsilon
self.act_value = np.rec.fromarrays([np.zeros(act_num), np.zeros(act_num).astype('int')])
self.act_num = act_num
def play(self):
ep_con = np.random.choice([0, 1], p = [1-self.epsilon, self.epsilon])
if ep_con:
action = np.random.choice(range(self.act_num))
else:
action = np.argmax([value[0] for value in self.act_value])
self.act_value[action][1] += 1
return action
def update(self, action, r):
self.act_value[action][0] = self.act_value[action][0] + (self.step_size(self.act_value[action][1]) * (r -self.act_value[action][0]))
def average(n):
return 1 / n
def const(n):
return alpha
act_avg = Act(average)
act_const = Act(const)
env_avg = Env()
env_const = Env()
avg_all_sum_list = []
avg_all_acc_list = []
for j in range(10):
acc_avg_list = []
sum_avg = 0
sum_avg_list = []
cor_avg = 0
for i in range(10000):
best_act = np.argmax([value.loc for value in env_avg.arm_list])
action = act_avg.play()
if best_act == action:
cor_avg += 1
acc_avg_list.append(cor_avg / (i+1))
r = env_avg.act(action)
sum_avg += r
sum_avg_list.append(sum_avg / (i+1))
act_avg.update(action, r)
avg_all_sum_list.append(sum_avg_list)
avg_all_acc_list.append(acc_avg_list)
res_avg_sum = np.mean(avg_all_sum_list, axis=0)
res_avg_acc = np.mean(avg_all_acc_list, axis=0)
plt.figure(1)
plt.subplots_adjust(hspace=0.5)
ax1 = plt.subplot(2,1,1)
ax2 = plt.subplot(2,1,2)
plt.sca(ax1)
plt.title('Reward')
plt.plot(res_avg_sum)
plt.sca(ax2)
plt.title('Accuray')
plt.plot(res_avg_acc)
plt.show()
const_all_sum_list = []
const_all_acc_list = []
for j in range(10):
acc_const_list = []
sum_const = 0
sum_const_list = []
cor_const = 0
for i in range(10000):
best_act = np.argmax([value.loc for value in env_const.arm_list])
action = act_const.play()
if best_act == action:
cor_const += 1
acc_const_list.append(cor_const / (i+1))
r = env_const.act(action)
sum_const += r
sum_const_list.append(sum_const / (i+1))
act_const.update(action, r)
const_all_sum_list.append(sum_const_list)
const_all_acc_list.append(acc_const_list)
res_const_sum = np.mean(const_all_sum_list, axis=0)
res_const_acc = np.mean(const_all_acc_list, axis=0)
plt.figure(1)
plt.subplots_adjust(hspace=0.5)
ax1 = plt.subplot(2,1,1)
ax2 = plt.subplot(2,1,2)
plt.sca(ax1)
plt.title('Reward')
plt.plot(res_const_sum)
plt.sca(ax2)
plt.title('Accuray')
plt.plot(res_const_acc)
plt.show()
plt.figure(1)
plt.subplots_adjust(hspace=0.5)
plt.figure(1)
ax1 = plt.subplot(2,1,1)
ax2 = plt.subplot(2,1,2)
plt.sca(ax1)
plt.title('Reward')
plt.plot(res_avg_sum,label='Average')
plt.plot(res_const_sum,label='Const')
plt.legend(loc=5,ncol=2)
plt.sca(ax2)
plt.title('Rccuray')
plt.plot(res_avg_acc,label='Average')
plt.plot(res_const_acc,label='Const')
plt.legend(loc=5,ncol=2)
plt.show()