-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathsolver.py
More file actions
123 lines (108 loc) · 4.77 KB
/
solver.py
File metadata and controls
123 lines (108 loc) · 4.77 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
import numpy as np
import random
import math
import cv2
random.seed(4)
np.random.seed(4)
MIN_EXPLORE_RATE = 0.001
MIN_LEARNING_RATE = 0.2
class MazeSolver:
def __init__(self, maze_size):
self.optimal_U = np.ones((maze_size[0] + 2, maze_size[1] + 2)) # the maze solution
self.road_U = np.ones((maze_size[0] + 2, maze_size[1] + 2)) # penalty to restrict agent from going where it has already been
self.init_square_borders(maze_size)
self.optimal_U = self.inf_padding(self.optimal_U)
self.road_U = self.inf_padding(self.road_U)
self.encoding = np.array(['N', 'S', 'E', 'W'])
self.path = []
self.explore_rate = 0
self.learning_rate = 0
self.step_number = 0
self.maze_size = maze_size
self.last_expected_position = (0,0)
self.min_found_step_count = maze_size[0] * maze_size[1]
self.inference = False
def save_state(self):
self.best_U = self.optimal_U
def load_state(self):
self.explore_rate = 0
self.optimal_U = self.best_U
self.inference = True
def init_square_borders(self, maze_size):
self.borders_dict = {}
for x in range(maze_size[0] + 2):
for y in range(maze_size[1] + 2):
self.borders_dict[str((x, y))] = []
@staticmethod
def inf_padding(map):
map[:, 0] = np.inf
map[:, -1] = np.inf
map[0, :] = np.inf
map[-1, :] = np.inf
return map
def optimal_step(self, x, y):
# north, south, east, west
neighbouring_potentials = np.ones(4) * np.inf
for i in range(len(neighbouring_potentials)):
next_position = self.encode_action_to_next_position(i, x, y)
if not str(next_position) in self.borders_dict[str((x, y))]:
neighbouring_potentials[i] = self.optimal_U[next_position] + self.road_U[next_position]
# q'' + dU/dq = 0
if self.inference:
print(x, y, neighbouring_potentials, self.optimal_U[x, y], self.optimal_U[x+1, y], self.optimal_U[x, y+1], self.optimal_U[x-1, y], self.optimal_U[x, y-1], self.borders_dict[str((x, y))])
optimal_steps = np.argwhere(neighbouring_potentials == np.min(neighbouring_potentials))[:, 0]
optimal_step = np.random.choice(optimal_steps)
return optimal_step
def select_action(self, state, env):
x, y = state
x = int(x) + 1 # padding
y = int(y) + 1 # padding
# self.road_U[x, y] += np.max(self.optimal_U[self.optimal_U != np.inf])
self.road_U[x, y] += 1#self.step_number
# self.step_number += 1
# Select a random action
if random.random() < self.explore_rate:
while True:
action = env.action_space.sample()
next_position = self.encode_action_to_next_position(action, x, y)
# do not try to go into borders
if self.optimal_U[next_position] != np.inf and str(next_position) not in self.borders_dict[str((x, y))]:
break
# Select the action with the highest q
else:
action = self.optimal_step(x, y)
self.last_expected_position = self.encode_action_to_next_position(action, x, y)
return self.encoding[action]
def save_env_reaction(self, new_q):
x_new, y_new = new_q
x_new += 1
y_new += 1
self.path.append([x_new, y_new])
if x_new != self.last_expected_position[0] or y_new != self.last_expected_position[1]:
self.borders_dict[str((x_new, y_new))].append(str(self.last_expected_position))
def save_path_and_reset(self, step_count):
self.step_number = 0
self.road_U *= step_count / self.min_found_step_count
self.road_U[self.road_U == 1] = np.max(self.road_U[self.road_U != np.inf])
self.optimal_U *= self.road_U
self.optimal_U /= np.max(self.optimal_U[self.optimal_U != np.inf])
self.road_U = np.ones((self.maze_size[0] + 2, self.maze_size[1] + 2))
self.road_U = self.inf_padding(self.road_U)
if step_count < self.min_found_step_count:
self.save_state()
self.min_found_step_count = step_count
@staticmethod
def encode_action_to_next_position(action, x, y):
if action == 0:
y -= 1
elif action == 1:
y += 1
elif action == 2:
x += 1
elif action == 3:
x -= 1
return x, y
def get_explore_rate(t, decay_factor):
return max(MIN_EXPLORE_RATE, min(0.8, 1.0 - math.log10((t+1)/decay_factor)))
def get_learning_rate(t, decay_factor):
return max(MIN_LEARNING_RATE, min(0.8, 1.0 - math.log10((t+1)/decay_factor)))