TaylorKNoah · TaylorKNoah · Jul 9, 2023 · Jul 9, 2023 · TaylorKNoah · Jul 9, 2023
diff --git a/.gitignore b/.gitignore
@@ -1,2 +1,6 @@
 *.pyc
-*.vscode
+*.vscode
+env
+env/*
+__pycache__
+__pycache__/*
diff --git a/Agent_Average_Score_Per_Move_Per_Game_input6_output6.png b/Agent_Average_Score_Per_Move_Per_Game_input6_output6.png
diff --git a/Agents_Moves_Per_Game_input6_output6.png b/Agents_Moves_Per_Game_input6_output6.png
diff --git a/Agents_Score_Per_Game_input6_output6.png b/Agents_Score_Per_Game_input6_output6.png
diff --git a/...riementResults/BatchSize/Games256/Agent_Average_Score_Per_Move_Per_Game_BS1.png b/...riementResults/BatchSize/Games256/Agent_Average_Score_Per_Move_Per_Game_BS1.png
diff --git a/...iementResults/BatchSize/Games256/Agent_Average_Score_Per_Move_Per_Game_BS16.png b/...iementResults/BatchSize/Games256/Agent_Average_Score_Per_Move_Per_Game_BS16.png
diff --git a/...iementResults/BatchSize/Games256/Agent_Average_Score_Per_Move_Per_Game_BS32.png b/...iementResults/BatchSize/Games256/Agent_Average_Score_Per_Move_Per_Game_BS32.png
diff --git a/...riementResults/BatchSize/Games256/Agent_Average_Score_Per_Move_Per_Game_BS4.png b/...riementResults/BatchSize/Games256/Agent_Average_Score_Per_Move_Per_Game_BS4.png
diff --git a/...riementResults/BatchSize/Games256/Agent_Average_Score_Per_Move_Per_Game_BS8.png b/...riementResults/BatchSize/Games256/Agent_Average_Score_Per_Move_Per_Game_BS8.png
diff --git a/ExperiementResults/BatchSize/Games256/Agents_Moves_Per_Game_BS1.png b/ExperiementResults/BatchSize/Games256/Agents_Moves_Per_Game_BS1.png
diff --git a/ExperiementResults/BatchSize/Games256/Agents_Moves_Per_Game_BS16.png b/ExperiementResults/BatchSize/Games256/Agents_Moves_Per_Game_BS16.png
diff --git a/ExperiementResults/BatchSize/Games256/Agents_Moves_Per_Game_BS32.png b/ExperiementResults/BatchSize/Games256/Agents_Moves_Per_Game_BS32.png
diff --git a/ExperiementResults/BatchSize/Games256/Agents_Moves_Per_Game_BS4.png b/ExperiementResults/BatchSize/Games256/Agents_Moves_Per_Game_BS4.png
diff --git a/ExperiementResults/BatchSize/Games256/Agents_Moves_Per_Game_BS8.png b/ExperiementResults/BatchSize/Games256/Agents_Moves_Per_Game_BS8.png
diff --git a/ExperiementResults/BatchSize/Games256/Agents_Score_Per_Game_BS1.png b/ExperiementResults/BatchSize/Games256/Agents_Score_Per_Game_BS1.png
diff --git a/ExperiementResults/BatchSize/Games256/Agents_Score_Per_Game_BS16.png b/ExperiementResults/BatchSize/Games256/Agents_Score_Per_Game_BS16.png
diff --git a/ExperiementResults/BatchSize/Games256/Agents_Score_Per_Game_BS32.png b/ExperiementResults/BatchSize/Games256/Agents_Score_Per_Game_BS32.png
diff --git a/ExperiementResults/BatchSize/Games256/Agents_Score_Per_Game_BS4.png b/ExperiementResults/BatchSize/Games256/Agents_Score_Per_Game_BS4.png
diff --git a/ExperiementResults/BatchSize/Games256/Agents_Score_Per_Game_BS8.png b/ExperiementResults/BatchSize/Games256/Agents_Score_Per_Game_BS8.png
diff --git a/ExperiementResults/BatchSize/Paramters.txt b/ExperiementResults/BatchSize/Paramters.txt
@@ -0,0 +1,26 @@
+Experiment: Batch Size
+	-- 2, 4, 8, 16, 32
+
+DQN Layers
+ - input: 5
+ - hidden1: 16
+ - hidden2: 16
+ - output: 5
+
+board size: 3x3
+num cans: 9 (full board)
+
+num games: 256 / 512 / 768/ 1024
+
+epsilon_dec = 0.9 / (0.8 * num_games)
+ -- dec for 80% games, then 0.1
+
+learning rate = 0.003
+
+rewards
+- pickup
+	- can: 10000
+	- no can: -10
+- move
+	- wall: -50
+	- empty: -5
diff --git a/...Board/EpsilonDecrease/Agent_Average_Score_Per_Move_Per_Game_BS8_g512_edec04.png b/...Board/EpsilonDecrease/Agent_Average_Score_Per_Move_Per_Game_BS8_g512_edec04.png
diff --git a/...Board/EpsilonDecrease/Agent_Average_Score_Per_Move_Per_Game_BS8_g512_edec06.png b/...Board/EpsilonDecrease/Agent_Average_Score_Per_Move_Per_Game_BS8_g512_edec06.png
diff --git a/...Board/EpsilonDecrease/Agent_Average_Score_Per_Move_Per_Game_BS8_g512_edec08.png b/...Board/EpsilonDecrease/Agent_Average_Score_Per_Move_Per_Game_BS8_g512_edec08.png
diff --git a/...oard/EpsilonDecrease/Agent_Average_Score_Per_Move_Per_Game_BS8_g512_edec099.png b/...oard/EpsilonDecrease/Agent_Average_Score_Per_Move_Per_Game_BS8_g512_edec099.png
diff --git a/...dFullOfcans/3x3 Board/EpsilonDecrease/Agents_Moves_Per_Game_BS8_g512_edec04.png b/...dFullOfcans/3x3 Board/EpsilonDecrease/Agents_Moves_Per_Game_BS8_g512_edec04.png
diff --git a/...dFullOfcans/3x3 Board/EpsilonDecrease/Agents_Moves_Per_Game_BS8_g512_edec06.png b/...dFullOfcans/3x3 Board/EpsilonDecrease/Agents_Moves_Per_Game_BS8_g512_edec06.png
diff --git a/...dFullOfcans/3x3 Board/EpsilonDecrease/Agents_Moves_Per_Game_BS8_g512_edec08.png b/...dFullOfcans/3x3 Board/EpsilonDecrease/Agents_Moves_Per_Game_BS8_g512_edec08.png
diff --git a/...FullOfcans/3x3 Board/EpsilonDecrease/Agents_Moves_Per_Game_BS8_g512_edec099.png b/...FullOfcans/3x3 Board/EpsilonDecrease/Agents_Moves_Per_Game_BS8_g512_edec099.png
diff --git a/...dFullOfcans/3x3 Board/EpsilonDecrease/Agents_Score_Per_Game_BS8_g512_edec04.png b/...dFullOfcans/3x3 Board/EpsilonDecrease/Agents_Score_Per_Game_BS8_g512_edec04.png
diff --git a/...dFullOfcans/3x3 Board/EpsilonDecrease/Agents_Score_Per_Game_BS8_g512_edec06.png b/...dFullOfcans/3x3 Board/EpsilonDecrease/Agents_Score_Per_Game_BS8_g512_edec06.png
diff --git a/...dFullOfcans/3x3 Board/EpsilonDecrease/Agents_Score_Per_Game_BS8_g512_edec08.png b/...dFullOfcans/3x3 Board/EpsilonDecrease/Agents_Score_Per_Game_BS8_g512_edec08.png
diff --git a/...FullOfcans/3x3 Board/EpsilonDecrease/Agents_Score_Per_Game_BS8_g512_edec099.png b/...FullOfcans/3x3 Board/EpsilonDecrease/Agents_Score_Per_Game_BS8_g512_edec099.png
diff --git a/ExperiementResults/BoardFullOfcans/3x3 Board/EpsilonDecrease/Paramters.txt b/ExperiementResults/BoardFullOfcans/3x3 Board/EpsilonDecrease/Paramters.txt
@@ -0,0 +1,30 @@
+Experiment: EpsilonDecrease --> 0.9 / ( x * num_games  )
+   - x: 0.4, 0.6, 0.8, 0.99
+
+learning rate: 0.003
+
+DQN Layers
+ - input: 5
+ - hidden1: 16
+ - hidden2: 16
+ - output: 5
+
+Board Size: 3x3
+Num cans: 9 (full board)
+
+gamma = 0.99
+
+num games = 200
+
+epsilon_dec = 0.9 / (0.8 * num_games)
+ -- dec for 80% games, then 0.1
+
+batch size = 8
+
+rewards
+- pickup
+	- can: 10000
+	- no can: -10
+- move
+	- wall: -50
+	- empty: -5
diff --git a/ExperiementResults/BoardFullOfcans/3x3 Board/EpsilonDecrease/Results.txt b/ExperiementResults/BoardFullOfcans/3x3 Board/EpsilonDecrease/Results.txt
@@ -0,0 +1,4 @@
+No useful results.
+Model still plummets.
+
+Sadly, this proves that once the model chooses actions deliberatly that it is highly frequently choosing moveing over picking up cans.
diff --git a/...llOfcans/3x3 Board/LearningRate/Agent_Average_Score_Per_Move_Per_Game_LR001.png b/...llOfcans/3x3 Board/LearningRate/Agent_Average_Score_Per_Move_Per_Game_LR001.png
diff --git a/...llOfcans/3x3 Board/LearningRate/Agent_Average_Score_Per_Move_Per_Game_LR003.png b/...llOfcans/3x3 Board/LearningRate/Agent_Average_Score_Per_Move_Per_Game_LR003.png
diff --git a/...llOfcans/3x3 Board/LearningRate/Agent_Average_Score_Per_Move_Per_Game_LR005.png b/...llOfcans/3x3 Board/LearningRate/Agent_Average_Score_Per_Move_Per_Game_LR005.png
diff --git a/...ntResults/BoardFullOfcans/3x3 Board/LearningRate/Agents_Moves_Per_GameLR001.png b/...ntResults/BoardFullOfcans/3x3 Board/LearningRate/Agents_Moves_Per_GameLR001.png
diff --git a/...tResults/BoardFullOfcans/3x3 Board/LearningRate/Agents_Moves_Per_Game_LR003.png b/...tResults/BoardFullOfcans/3x3 Board/LearningRate/Agents_Moves_Per_Game_LR003.png
diff --git a/...tResults/BoardFullOfcans/3x3 Board/LearningRate/Agents_Moves_Per_Game_LR005.png b/...tResults/BoardFullOfcans/3x3 Board/LearningRate/Agents_Moves_Per_Game_LR005.png
diff --git a/...tResults/BoardFullOfcans/3x3 Board/LearningRate/Agents_Score_Per_Game_LR001.png b/...tResults/BoardFullOfcans/3x3 Board/LearningRate/Agents_Score_Per_Game_LR001.png
diff --git a/...tResults/BoardFullOfcans/3x3 Board/LearningRate/Agents_Score_Per_Game_LR003.png b/...tResults/BoardFullOfcans/3x3 Board/LearningRate/Agents_Score_Per_Game_LR003.png
diff --git a/...tResults/BoardFullOfcans/3x3 Board/LearningRate/Agents_Score_Per_Game_LR005.png b/...tResults/BoardFullOfcans/3x3 Board/LearningRate/Agents_Score_Per_Game_LR005.png
diff --git a/ExperiementResults/BoardFullOfcans/3x3 Board/LearningRate/Paramters.txt b/ExperiementResults/BoardFullOfcans/3x3 Board/LearningRate/Paramters.txt
@@ -0,0 +1,28 @@
+Experiment: Learning Rate
+   - 0.001, 0.003, 0.005
+
+DQN Layers
+ - input: 5
+ - hidden1: 16
+ - hidden2: 16
+ - output: 5
+
+Board Size: 3x3
+Num cans: 9 (full board)
+
+gamma = 0.99
+
+num games = 200
+
+epsilon_dec = 0.9 / (0.8 * num_games)
+ -- dec for 80% games, then 0.1
+
+batch size = 8
+
+rewards
+- pickup
+	- can: 10000
+	- no can: -10
+- move
+	- wall: -50
+	- empty: -5
diff --git a/ExperiementResults/BoardFullOfcans/3x3 Board/LearningRate/Results.txt b/ExperiementResults/BoardFullOfcans/3x3 Board/LearningRate/Results.txt
@@ -0,0 +1 @@
+varing the LR between 0.001, 0.003, and 0.005 did not seem to have a significant change in the model's behavior.
diff --git a/...ullOfcans/4x4 board/HiddenNeurons/Agent_Average_Score_Per_Move_Per_Game_h16.png b/...ullOfcans/4x4 board/HiddenNeurons/Agent_Average_Score_Per_Move_Per_Game_h16.png
diff --git a/...FullOfcans/4x4 board/HiddenNeurons/Agent_Average_Score_Per_Move_Per_Game_h8.png b/...FullOfcans/4x4 board/HiddenNeurons/Agent_Average_Score_Per_Move_Per_Game_h8.png
diff --git a/...ntResults/BoardFullOfcans/4x4 board/HiddenNeurons/Agents_Moves_Per_Game_h16.png b/...ntResults/BoardFullOfcans/4x4 board/HiddenNeurons/Agents_Moves_Per_Game_h16.png
diff --git a/...entResults/BoardFullOfcans/4x4 board/HiddenNeurons/Agents_Moves_Per_Game_h8.png b/...entResults/BoardFullOfcans/4x4 board/HiddenNeurons/Agents_Moves_Per_Game_h8.png
diff --git a/...ntResults/BoardFullOfcans/4x4 board/HiddenNeurons/Agents_Score_Per_Game_h16.png b/...ntResults/BoardFullOfcans/4x4 board/HiddenNeurons/Agents_Score_Per_Game_h16.png
diff --git a/...entResults/BoardFullOfcans/4x4 board/HiddenNeurons/Agents_Score_Per_Game_h8.png b/...entResults/BoardFullOfcans/4x4 board/HiddenNeurons/Agents_Score_Per_Game_h8.png
diff --git a/ExperiementResults/BoardFullOfcans/4x4 board/HiddenNeurons/Paramters.txt b/ExperiementResults/BoardFullOfcans/4x4 board/HiddenNeurons/Paramters.txt
@@ -0,0 +1,26 @@
+Experiment: Reward Structure
+
+key: pick up can, puck up not can, move empty, move wall
+1) 10, -5, 0.1, -5
+2) 10, -5, -0.1, -5
+3) 10, -5, -1, -5
+
+learning rate: 0.003
+
+DQN Layers
+ - input: 5
+ - hidden1: 16
+ - hidden2: 16
+ - output: 5
+
+Board Size: 4x4
+Num cans: 16 (full board)
+
+gamma = 0.99
+
+num games = 768
+
+epsilon_dec = 0.9 / (0.8 * num_games)
+ -- dec for 80% games, then 0.1
+
+batch size = 8
diff --git a/.../BoardFullOfcans/4x4 board/Rewards/Agent_Average_Score_Per_Move_Per_Game_r1.png b/.../BoardFullOfcans/4x4 board/Rewards/Agent_Average_Score_Per_Move_Per_Game_r1.png
diff --git a/.../BoardFullOfcans/4x4 board/Rewards/Agent_Average_Score_Per_Move_Per_Game_r2.png b/.../BoardFullOfcans/4x4 board/Rewards/Agent_Average_Score_Per_Move_Per_Game_r2.png
diff --git a/.../BoardFullOfcans/4x4 board/Rewards/Agent_Average_Score_Per_Move_Per_Game_r3.png b/.../BoardFullOfcans/4x4 board/Rewards/Agent_Average_Score_Per_Move_Per_Game_r3.png
diff --git a/ExperiementResults/BoardFullOfcans/4x4 board/Rewards/Agents_Moves_Per_Game_r1.png b/ExperiementResults/BoardFullOfcans/4x4 board/Rewards/Agents_Moves_Per_Game_r1.png
diff --git a/ExperiementResults/BoardFullOfcans/4x4 board/Rewards/Agents_Moves_Per_Game_r2.png b/ExperiementResults/BoardFullOfcans/4x4 board/Rewards/Agents_Moves_Per_Game_r2.png
diff --git a/ExperiementResults/BoardFullOfcans/4x4 board/Rewards/Agents_Moves_Per_Game_r3.png b/ExperiementResults/BoardFullOfcans/4x4 board/Rewards/Agents_Moves_Per_Game_r3.png
diff --git a/ExperiementResults/BoardFullOfcans/4x4 board/Rewards/Agents_Score_Per_Game_r1.png b/ExperiementResults/BoardFullOfcans/4x4 board/Rewards/Agents_Score_Per_Game_r1.png
diff --git a/ExperiementResults/BoardFullOfcans/4x4 board/Rewards/Agents_Score_Per_Game_r2.png b/ExperiementResults/BoardFullOfcans/4x4 board/Rewards/Agents_Score_Per_Game_r2.png
diff --git a/ExperiementResults/BoardFullOfcans/4x4 board/Rewards/Agents_Score_Per_Game_r3.png b/ExperiementResults/BoardFullOfcans/4x4 board/Rewards/Agents_Score_Per_Game_r3.png
diff --git a/ExperiementResults/BoardFullOfcans/4x4 board/Rewards/Paramters.txt b/ExperiementResults/BoardFullOfcans/4x4 board/Rewards/Paramters.txt
@@ -0,0 +1,26 @@
+Experiment: Reward Structure
+
+key: pick up can, puck up not can, move empty, move wall
+1) 10, -5, 0.1, -5
+2) 10, -5, -0.1, -5
+3) 10, -5, -1, -5
+
+learning rate: 0.003
+
+DQN Layers
+ - input: 5
+ - hidden1: 16
+ - hidden2: 16
+ - output: 5
+
+Board Size: 4x4
+Num cans: 16 (full board)
+
+gamma = 0.99
+
+num games = 768
+
+epsilon_dec = 0.9 / (0.8 * num_games)
+ -- dec for 80% games, then 0.1
+
+batch size = 8
diff --git a/GraphUtils.py b/GraphUtils.py
@@ -0,0 +1,33 @@
+import numpy as np
+from matplotlib import pyplot as plt
+
+
+def PlotScorePerGame(x_axis, scores, filename):
+	fig, ax = plt.subplots()
+	ax.plot(x_axis, scores)
+
+	ax.set(xlabel='Games', ylabel='Score', title='Agent\'s Score Per Game')
+	ax.grid()
+
+	fig.savefig(filename)
+	plt.show()
+
+def PlotMovesPerGame(x_axis, moves, filename):
+	fig, ax = plt.subplots()
+	ax.plot(x_axis, moves)
+
+	ax.set(xlabel='Games', ylabel='Moves', title='Agent\'s Moves Per Game')
+	ax.grid()
+
+	fig.savefig(filename)
+	plt.show()
+
+def PlotAverageScorePerMovePerGame(x_axis, avg_scores_per_move_per_game, filename):
+	fig, ax = plt.subplots()
+	ax.plot(x_axis, avg_scores_per_move_per_game)
+
+	ax.set(xlabel='Games', ylabel='Moves', title='Agent\'s Average Score Per Move Per Game')
+	ax.grid()
+
+	fig.savefig(filename)
+	plt.show()
diff --git a/agent.py b/agent.py
@@ -40,7 +40,7 @@ def __init__(self, gamma, epsilon, lr, input_dims, batch_size, n_actions, max_me
 		self.lr = lr
 
 		# q network for action choice
-		self.Q_eval = DeepQNetwork(learning_rate=lr, input_dims=input_dims, fullyConnected1_dims=16, fullyConnected2_dims=16, number_of_actions=5)
+		self.Q_eval = DeepQNetwork(learning_rate=lr, input_dims=input_dims, fullyConnected1_dims=64, fullyConnected2_dims=64, number_of_actions=5)
 
 		# replay memory setup
 		self.action_space = [i for i in range(n_actions)]
@@ -53,7 +53,7 @@ def __init__(self, gamma, epsilon, lr, input_dims, batch_size, n_actions, max_me
 		self.new_state_memory = np.zeros((self.mem_size, *input_dims), dtype=np.float32)
 		self.action_memory = np.zeros(self.mem_size, dtype=np.int32)
 		self.reward_memory = np.zeros(self.mem_size, dtype=np.float32)
-		self.terminal_memory = np.zeros(self.mem_size, dtype=np.bool)
+		self.terminal_memory = np.zeros(self.mem_size, dtype=np.bool_)
 
 	def store_transition(self, state, action, reward, new_state, done):
 		first_unused_memory_index = self.mem_counter % self.mem_size

diff --git a/game.py b/game.py
@@ -6,51 +6,61 @@ class Game:
 	robotLocation = []
 	numCans = 0
 
-	def __init__(self, size=8, canDensity=0.25):
-		self.size = size
-		self.canDensity = canDensity
+	def __init__(self, size=5, num_cans=15):
+		self.size = size + 2
+		#self.canDensity = canDensity
+		self.numCans = num_cans
+		self.numCansLeft = self.numCans
 		self.initializeBoard()
 		self.initializeStartPointOfRobot()
 		return
 
 	def initializeBoard(self):
 		for i in range(0, self.size):
 			row = []
-
 			for j in range(0,self.size):
 				if(self.isBorderWallTile(i, j)):
 					row.append(BoardState.Wall)
-				elif(self.isCanTile()):
-					row.append(BoardState.Can)
-					self.numCans += 1
 				else:
 					row.append(BoardState.Empty)
 			self.map.append(row.copy())
-
 			row.clear()
+		self.addCansToMap()
 		return
 
 	def isBorderWallTile(self, i, j):
 		if(i == 0 or j == 0 or i == self.size - 1 or j == self.size - 1):
 			return True
 		return False
+
+	def addCansToMap(self):
+		self.numCansLeft = self.numCans
+		cans_to_add = self.numCansLeft
+		while cans_to_add > 0:
+			y = randint(1, self.size-2)
+			x = randint(1, self.size-2)
+			if(self.map[y][x] == BoardState.Can): continue
+			self.map[y][x] = BoardState.Can
+			cans_to_add -= 1
 
+
+
+	'''
 	def isCanTile(self):
 		r = randint(0,100)/100
 		if(r <= self.canDensity):
 			return True
 		return False
+	'''
+
 
 	def initializeStartPointOfRobot(self):
 		for i in range(0,2):
 			self.robotLocation.append(randint(1,self.size-2))
-		print("Robot start at: ", end=" ")
-		print(self.robotLocation)
 
 	def reset(self):
 		self.map.clear()
 		self.robotLocation.clear()
-		self.numCans = 0
 		self.initializeBoard()
 		self.initializeStartPointOfRobot()
 		return self.getState()
@@ -81,45 +91,45 @@ def step(self, action):
 
 	def determineReward(self, action):
 		loc = self.robotLocation
-		if action == Actions.PICK_UP.value:
+		if action == 0:
 			if self.map[loc[0]][loc[1]] == BoardState.Can:
 				reward = 10
 			else:
 				reward = -5
 		else:
 			ymod = 0
 			xmod = 0
-			if action == Actions.MOVE_NORTH.value: ymod -= 1
-			elif action == Actions.MOVE_SOUTH.value: ymod += 1
-			elif action == Actions.MOVE_EAST.value: xmod -= 1
-			elif action == Actions.MOVE_WEST.value: xmod += 1
+			if action == 1: ymod -= 1
+			elif action == 2: ymod += 1
+			elif action == 3: xmod -= 1
+			elif action == 4: xmod += 1
 			if self.map[loc[0]+ymod][loc[1]+xmod] == BoardState.Wall:
 				reward = -5
 			else:
-				reward = 0.25
+				reward = -1
 		return reward
 
 	def applyAction(self, action):
 		y = self.robotLocation[0]
 		x = self.robotLocation[1]
 
-		if action == Actions.PICK_UP.value:
+		if action == 0:
 			if self.map[y][x] == BoardState.Can:
-				self.map[y][x] == BoardState.Empty
-				self.numCans -= 1
-		elif action == Actions.MOVE_NORTH.value and self.map[y-1][x] != BoardState.Wall:
+				self.map[y][x] = BoardState.Empty
+				self.numCansLeft -= 1
+		elif action == 1 and self.map[y-1][x] != BoardState.Wall:
 				self.robotLocation[0] -= 1
-		elif action == Actions.MOVE_SOUTH.value and self.map[y+1][x] != BoardState.Wall:
+		elif action == 2 and self.map[y+1][x] != BoardState.Wall:
 			self.robotLocation[0] += 1
-		elif action == Actions.MOVE_EAST.value and self.map[y][x-1] != BoardState.Wall:
+		elif action == 3 and self.map[y][x-1] != BoardState.Wall:
 			self.robotLocation[1] -= 1
-		elif action == Actions.MOVE_WEST.value and self.map[y][x+1] != BoardState.Wall:
+		elif action == 4 and self.map[y][x+1] != BoardState.Wall:
 			self.robotLocation[1] += 1
 
 		return self.getState()
 
 	def isDone(self):
-		return False if self.numCans > 0 else True
+		return False if self.numCansLeft > 0 else True
 
 	def displayBoard(self):
 		print('\n')
@@ -131,4 +141,11 @@ def displayBoard(self):
 					else:
 						print(self.map[i][j].value, end="  ")
 				else:
-					print(self.map[i][j].value)
+					print(self.map[i][j].value)
+
+
+
+'''# test area
+game = Game(4,16)
+game.displayBoard()
+print(game.numCans)'''
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		varing the LR between 0.001, 0.003, and 0.005 did not seem to have a significant change in the model's behavior.