In [33]:
import numpy as np
import random
import time
import copy
none_area = ' '
end_area = '-'
begin_area = '+'
equ_area = 'O'
car_size = '@'
In [2]:
def cross(line1, line2):
"""
判断两个线段是否相交
"""
temp1 = line2[0] - line1[0]
temp2 = line2[1] - line1[0]
temp3 = line1[0] - line2[0]
temp4 = line1[1] - line2[0]
vec1 = line1[1] - line1[0]
vec2 = line2[1] - line2[0]
return ((np.cross(temp1, vec1) * np.cross(temp2, vec1) <= 0) and not ((vec1 == 0).all() or (vec2 == 0).all()) and
(np.cross(temp3, vec2) * np.cross(temp4, vec2) <= 0))
def printtrack(track):
for line in track:
for element in line:
print(element, end="")
print()
In [68]:
class Env:
def __init__(self, track, border):
self.speed = np.zeros(2, dtype=np.int)
self.track = track
self.begin = np.dstack(np.where(track == begin_area))[0]
self.end = np.dstack(np.where(track == end_area))[0]
self.pos = self.rand_ele(self.begin)
self.begin_line = [self.begin[0], self.begin[-1]]
self.end_line = [self.end[0], self.end[-1]]
self.border = border
self.space_num = np.prod(track.shape)
def rand_ele(self, ele):
return self.begin[np.random.choice(ele.shape[0])]
def move(self):
res_pos = self.pos + self.speed
if cross([res_pos, self.pos], self.end_line):
self.pos = self.rand_ele(self.begin)
self.speed = np.zeros(2, dtype=np.int)
return 0
elif (self.jud_out(res_pos)) or (self.track[tuple(res_pos)] == none_area):
self.pos = self.rand_ele(self.begin)
self.speed = np.zeros(2, dtype=np.int)
return -1
else:
self.pos = res_pos
return -1
def jud_out(self, pos):
return (pos >= self.track.shape).any() or (pos < 0).any()
def act(self, acc):
res = self.speed + acc
if not (np.abs(self.speed) > 5).any() or ((self.speed == 0).all()):
self.speed = res
repay = self.move()
return repay
def showmap(self):
r = -1
for line in self.track:
r += 1
c = -1
for element in line:
c += 1
if ([r, c] == self.pos).all():
print(car_size, end="")
else:
print(element, end="")
print()
In [4]:
class Act:
def __init__(self, r, c):
self.Q = np.zeros([r, c, 10, 10, 3, 3])
self.Q[:] = -np.inf
self.C = np.zeros([r, c, 10, 10, 3, 3])
self.act_space = np.array(np.meshgrid(np.arange(-1, 2), np.arange(-1, 2))).T.reshape(-1, 2)
def act_g(self, state):
state = np.array(state)
res = np.array(np.unravel_index(np.argmax(self.Q[tuple(state.flatten())]),
self.Q[tuple(state.flatten())].shape))
res[res == 2] = -1
return res
def act_b(self, state, epsilon=0.1):
state = np.array(state)
if np.random.choice([True, False], p=[epsilon, 1-epsilon]):
return self.act_space[np.random.choice(self.act_space.shape[0])]
else:
return self.act_g(state)
def train_screen(self, screen, gamma, epsilon):
G = 0
W = 1
for con in screen[::-1]:
state = np.array(con[:-1])
if self.Q[tuple(state.flatten())] == -np.inf:
self.Q[tuple(state.flatten())] = 0
G = gamma * G + con[3]
self.C[tuple(state.flatten())] += W
self.Q[tuple(state.flatten())] += (
W / self.C[tuple(state.flatten())] * (
G - self.Q[tuple(state.flatten())]
)
)
if (con[2] != self.act_g(state[:-1])).any():
break
else:
W = W / (1 - epsilon + (epsilon / self.act_space.shape[0]))
def train(self, env, times = 10000, deadline=100000, gamma=1, epsilon=0.1):
for _ in range(times):
screen = []
for _ in range(deadline):
act = self.act_b((env.pos, env.speed), epsilon=epsilon)
speed = env.speed
pos = env.pos
r = env.act(act)
screen.append((pos, speed, act, r))
if r == 0:
break
self.train_screen(screen, gamma, epsilon)
In [5]:
track1 = np.zeros([32, 17], dtype=np.str)
track1[:] = equ_area
track1[-1] = begin_area
track1[:, -1] = end_area
track1[0, 0:3] = none_area
track1[1, 0:2] = none_area
track1[2, 0:2] = none_area
track1[3, 0] = none_area
track1[14:, 0] = none_area
track1[22:, 1] = none_area
track1[29:, 2] = none_area
track1[7:, 9] = none_area
track1[6:, 10:] = none_area
In [6]:
printtrack(track1)
In [7]:
env1 = Env(track1)
act1 = Act(32, 17)
In [8]:
act1.train(env1, epsilon=0.5)
In [65]:
show_track1 = track1.copy()
for _ in range(100000):
show_track1[tuple(env1.pos)] = car_size
act = act1.act_g((env1.pos, env1.speed))
r = env1.act(act)
if r == 0:
break
printtrack(show_track1)