赛道问题
In [33]:
import numpy as np
import random
import time
import copy

none_area = ' '
end_area = '-'
begin_area = '+'
equ_area = 'O'
car_size = '@'
In [2]:
def cross(line1, line2):
    """
    判断两个线段是否相交
    """
    temp1 = line2[0] - line1[0] 
    temp2 = line2[1] - line1[0]
    temp3 = line1[0] - line2[0]
    temp4 = line1[1] - line2[0]
    vec1 = line1[1] - line1[0]
    vec2 = line2[1] - line2[0]
    return ((np.cross(temp1, vec1) * np.cross(temp2, vec1) <= 0) and not ((vec1 == 0).all() or (vec2 == 0).all()) and
           (np.cross(temp3, vec2) * np.cross(temp4, vec2) <= 0))

def printtrack(track):
    for line in track:
        for element in line:
            print(element, end="")
        print()
In [68]:
class Env:
    def __init__(self, track, border):
        self.speed = np.zeros(2, dtype=np.int)
        self.track = track
        self.begin = np.dstack(np.where(track == begin_area))[0]
        self.end = np.dstack(np.where(track == end_area))[0]
        self.pos = self.rand_ele(self.begin)
        self.begin_line = [self.begin[0], self.begin[-1]]
        self.end_line = [self.end[0], self.end[-1]]
        self.border = border
        self.space_num = np.prod(track.shape)
    def rand_ele(self, ele):
        return self.begin[np.random.choice(ele.shape[0])]
    def move(self):
        res_pos = self.pos + self.speed
        if cross([res_pos, self.pos], self.end_line):
            self.pos = self.rand_ele(self.begin)
            self.speed = np.zeros(2, dtype=np.int)
            return 0
        elif (self.jud_out(res_pos)) or (self.track[tuple(res_pos)] == none_area):
            self.pos = self.rand_ele(self.begin)
            self.speed = np.zeros(2, dtype=np.int)
            return -1
        else:
            self.pos = res_pos
            return -1
    def jud_out(self, pos):
        return (pos >= self.track.shape).any() or (pos < 0).any()
    def act(self, acc):
        res = self.speed + acc
        if not (np.abs(self.speed) > 5).any() or ((self.speed == 0).all()):
            self.speed = res
        repay = self.move()
        return repay
    def showmap(self):
        r = -1
        for line in self.track:
            r += 1
            c = -1
            for element in line:
                c += 1
                if ([r, c] == self.pos).all():
                    print(car_size, end="")
                else:
                    print(element, end="")
            print()
In [4]:
class Act: 
    def __init__(self, r, c):
        self.Q = np.zeros([r, c, 10, 10, 3, 3])
        self.Q[:] = -np.inf
        self.C = np.zeros([r, c, 10, 10, 3, 3])
        self.act_space = np.array(np.meshgrid(np.arange(-1, 2), np.arange(-1, 2))).T.reshape(-1, 2)
    def act_g(self, state):
        state = np.array(state)
        res = np.array(np.unravel_index(np.argmax(self.Q[tuple(state.flatten())]), 
                                          self.Q[tuple(state.flatten())].shape))
        res[res == 2] = -1
        return res
    def act_b(self, state, epsilon=0.1):
        state = np.array(state)
        if np.random.choice([True, False], p=[epsilon, 1-epsilon]):
            return self.act_space[np.random.choice(self.act_space.shape[0])]
        else:
            return self.act_g(state)
    def train_screen(self, screen, gamma, epsilon):
        G = 0
        W = 1
        for con in screen[::-1]:
            state = np.array(con[:-1])
            if self.Q[tuple(state.flatten())] == -np.inf:
                self.Q[tuple(state.flatten())] = 0
            G = gamma * G + con[3]
            self.C[tuple(state.flatten())] += W
            self.Q[tuple(state.flatten())] += (
                W / self.C[tuple(state.flatten())] * (
                    G - self.Q[tuple(state.flatten())]
                )
            )
            if (con[2] != self.act_g(state[:-1])).any():
                break
            else:
                W = W / (1 - epsilon + (epsilon / self.act_space.shape[0]))
                
    def train(self, env, times = 10000, deadline=100000, gamma=1, epsilon=0.1):
        for _ in range(times):
            screen = []
            for _ in range(deadline):
                act = self.act_b((env.pos, env.speed), epsilon=epsilon)
                speed = env.speed
                pos = env.pos
                r = env.act(act)
                screen.append((pos, speed, act, r))
                if r == 0:
                    break
            self.train_screen(screen, gamma, epsilon)
In [5]:
track1 = np.zeros([32, 17], dtype=np.str)
track1[:] = equ_area
track1[-1] = begin_area
track1[:, -1] = end_area
track1[0, 0:3] = none_area
track1[1, 0:2] = none_area
track1[2, 0:2] = none_area
track1[3, 0] = none_area
track1[14:, 0] = none_area
track1[22:, 1] = none_area
track1[29:, 2] = none_area
track1[7:, 9] = none_area
track1[6:, 10:] = none_area
In [6]:
printtrack(track1)
   OOOOOOOOOOOOO-
  OOOOOOOOOOOOOO-
  OOOOOOOOOOOOOO-
 OOOOOOOOOOOOOOO-
OOOOOOOOOOOOOOOO-
OOOOOOOOOOOOOOOO-
OOOOOOOOOO       
OOOOOOOOO        
OOOOOOOOO        
OOOOOOOOO        
OOOOOOOOO        
OOOOOOOOO        
OOOOOOOOO        
OOOOOOOOO        
 OOOOOOOO        
 OOOOOOOO        
 OOOOOOOO        
 OOOOOOOO        
 OOOOOOOO        
 OOOOOOOO        
 OOOOOOOO        
 OOOOOOOO        
  OOOOOOO        
  OOOOOOO        
  OOOOOOO        
  OOOOOOO        
  OOOOOOO        
  OOOOOOO        
  OOOOOOO        
   OOOOOO        
   OOOOOO        
   ++++++        
In [7]:
env1 = Env(track1)
act1 = Act(32, 17)
In [8]:
act1.train(env1, epsilon=0.5)
In [65]:
show_track1 = track1.copy()
for _ in range(100000):
    show_track1[tuple(env1.pos)] = car_size
    act = act1.act_g((env1.pos, env1.speed))
    r = env1.act(act)
    if r == 0:
        break
printtrack(show_track1)
   OOOOOOOOOOOOO-
  OOOOOOOOOOOOOO-
  OOOOOOOOOOO@OO-
 OOOOOOOOOOOOOOO-
OOOOOOOOOOOOOOOO-
OOOOOOOOOO@OOOOO-
OOOOOOOOOO       
OOOOOOOOO        
OOOOOOOOO        
OOOOOOOO@        
OOOOOOOOO        
OOOOOOOOO        
OOOOOOOOO        
OOOOOOO@O        
 OOOOOOOO        
 OOOOOOOO        
 OOOOOOOO        
 OOOOO@OO        
 OOOOOOOO        
 OOOOOOOO        
 OOOOOOOO        
 OOOO@OOO        
  OOOOOOO        
  OOOOOOO        
  OOOOOOO        
  OOO@OOO        
  OOOOOOO        
  OOOOOOO        
  OOOO@OO        
   OOOOOO        
   OOO@OO        
   +++@++