diff --git a/Car.png b/Car.png new file mode 100644 index 0000000..64a1be7 Binary files /dev/null and b/Car.png differ diff --git a/Drawer.py b/Drawer.py index 99466b6..536e16b 100644 --- a/Drawer.py +++ b/Drawer.py @@ -5,7 +5,7 @@ class Drawer: def __init__(self): - self.color = [0, 0, 0] + self.color = [100, 0, 0] self.lineThickness = 1 def setLineThinkness(self, thinkness): @@ -42,4 +42,4 @@ def circle(self, x, y, radius): for i in range(iterations + 1): glVertex2f(x + dx, y + dy) dx, dy = (dx * c - dy * s), (dy * c + dx * s) - glEnd() \ No newline at end of file + glEnd() diff --git a/Game.py b/Game.py index 137b65d..41a5c4f 100644 --- a/Game.py +++ b/Game.py @@ -1,7 +1,6 @@ import numpy as np -import pyglet -from Globals import displayWidth, displayHeight -from Drawer import Drawer +from Global import * +from Draw import Drawer from ShapeObjects import * from PygameAdditionalMethods import * import pygame @@ -9,15 +8,13 @@ drawer = Drawer() vec2 = pygame.math.Vector2 - class Game: no_of_actions = 9 - state_size = 15 + state_size = 20 #self.nbVect + 4 def __init__(self): - trackImg = pyglet.image.load('images/track.png') + trackImg = pyglet.image.load('Track.png') self.trackSprite = pyglet.sprite.Sprite(trackImg, x=0, y=0) - # initiate car # initiate walls self.walls = [] @@ -93,56 +90,47 @@ def set_walls(self): self.walls.append(Wall(1157, 528, 1233, 478)) def set_gates(self): - self.gates.append(RewardGate(314, 345, 200, 326)) - self.gates.append(RewardGate(187, 435, 311, 451)) - self.gates.append(RewardGate(307, 537, 171, 555)) - self.gates.append(RewardGate(234, 681, 345, 628)) - self.gates.append(RewardGate(408, 682, 363, 788)) - self.gates.append(RewardGate(428, 816, 481, 712)) - self.gates.append(RewardGate(568, 733, 543, 854)) - self.gates.append(RewardGate(678, 858, 675, 710)) - self.gates.append(RewardGate(852, 708, 855, 848)) - self.gates.append(RewardGate(995, 836, 985, 705)) - self.gates.append(RewardGate(1059, 710, 1076, 821)) - self.gates.append(RewardGate(1078, 667, 1172, 572)) - self.gates.append(RewardGate(997, 616, 1076, 532)) - self.gates.append(RewardGate(967, 492, 909, 566)) - self.gates.append(RewardGate(788, 512, 839, 438)) - self.gates.append(RewardGate(790, 405, 781, 285)) - self.gates.append(RewardGate(891, 302, 899, 427)) - self.gates.append(RewardGate(1004, 434, 1027, 334)) - self.gates.append(RewardGate(1139, 344, 1084, 452)) - self.gates.append(RewardGate(1171, 502, 1233, 416)) - self.gates.append(RewardGate(1305, 454, 1243, 556)) - self.gates.append(RewardGate(1365, 588, 1408, 480)) - self.gates.append(RewardGate(1487, 472, 1524, 587)) - self.gates.append(RewardGate(1642, 508, 1575, 432)) - self.gates.append(RewardGate(1608, 360, 1709, 419)) - self.gates.append(RewardGate(1744, 324, 1625, 296)) - self.gates.append(RewardGate(1609, 231, 1727, 190)) - self.gates.append(RewardGate(1617, 66, 1541, 163)) - self.gates.append(RewardGate(1487, 135, 1510, 14)) - self.gates.append(RewardGate(1344, 16, 1328, 150)) - self.gates.append(RewardGate(1077, 142, 1067, 14)) - self.gates.append(RewardGate(909, 16, 900, 130)) - self.gates.append(RewardGate(718, 138, 698, 20)) - self.gates.append(RewardGate(551, 18, 567, 132)) - self.gates.append(RewardGate(445, 138, 413, 13)) - self.gates.append(RewardGate(379, 154, 243, 80)) - self.gates.append(RewardGate(357, 221, 203, 182)) + + self.gates.append(RewardGate(212, 645, 288, 634)) + self.gates.append(RewardGate(206, 518, 279, 526)) + self.gates.append(RewardGate(224, 390, 286, 416)) + self.gates.append(RewardGate(302, 261, 369, 314)) + self.gates.append(RewardGate(545, 175, 561, 236)) + self.gates.append(RewardGate(846, 182, 841, 259)) + self.gates.append(RewardGate(1114, 203, 1100, 282)) + self.gates.append(RewardGate(1217, 297, 1113, 300)) + self.gates.append(RewardGate(1185, 403, 1102, 339)) + self.gates.append(RewardGate(1042, 462, 979, 408)) + self.gates.append(RewardGate(876, 543, 807, 482)) + self.gates.append(RewardGate(765, 598, 693, 545)) + self.gates.append(RewardGate(801, 596, 815, 694)) + self.gates.append(RewardGate(883, 587, 904, 680)) + self.gates.append(RewardGate(1102, 567, 1128, 640)) + self.gates.append(RewardGate(1261, 452, 1304, 514)) + self.gates.append(RewardGate(1461, 412, 1454, 499)) + self.gates.append(RewardGate(1615, 480, 1572, 535)) + self.gates.append(RewardGate(1722, 680, 1655, 698)) + self.gates.append(RewardGate(1693, 873, 1623, 815)) + self.gates.append(RewardGate(1510, 966, 1495, 886)) + self.gates.append(RewardGate(1297, 970, 1282, 888)) + self.gates.append(RewardGate(1054, 971, 1045, 887)) + self.gates.append(RewardGate(925, 969, 907, 885)) + self.gates.append(RewardGate(742, 969, 733, 884)) + self.gates.append(RewardGate(549, 965, 537, 880)) + self.gates.append(RewardGate(295, 920, 361, 864)) + self.gates.append(RewardGate(238, 766, 309, 754)) def new_episode(self): self.car.reset() - def get_state(self): return self.car.getState() pass def make_action(self, action): # returns reward - actionNo = np.argmax(action) - self.car.updateWithAction(actionNo) + #actionNo = np.argmax(action) + self.car.updateWithAction(action) return self.car.reward def is_episode_finished(self): @@ -156,18 +144,14 @@ def get_lifespan(self): def render(self): glPushMatrix() - # - # glTranslatef(-1, -1, 0) - # glScalef(1 / (displayWidth / 2), 1 / (displayHeight / 2), 1) - - # self.clear() self.trackSprite.draw() - self.car.show() - # for w in self.walls: - # w.draw() - # for g in self.gates: - # g.draw() + for w in self.walls: + w.draw() + for g in self.gates: + g.draw() + self.car.show() + #self.car.showCollisionVectors() glPopMatrix() @@ -181,15 +165,14 @@ def __init__(self, x1, y1, x2, y2): self.y2 = displayHeight - y2 self.line = Line(self.x1, self.y1, self.x2, self.y2) - self.line.setLineThinkness(2) + self.line.setLineThinkness(5) + self.line.setColor([255, 0, 0]) """ draw the line """ - def draw(self): self.line.draw() - """ returns true if the car object has hit this wall """ @@ -197,8 +180,7 @@ def draw(self): def hitCar(self, car): global vec2 cw = car.width - # since the car sprite isn't perfectly square the hitbox is a little smaller than the width of the car - ch = car.height - 4 + ch = car.height rightVector = vec2(car.direction) upVector = vec2(car.direction).rotate(-90) carCorners = [] @@ -213,6 +195,7 @@ def hitCar(self, car): j = j % 4 if linesCollided(self.x1, self.y1, self.x2, self.y2, carCorners[i].x, carCorners[i].y, carCorners[j].x, carCorners[j].y): + #print("u ded") return True return False @@ -227,9 +210,9 @@ class RewardGate: def __init__(self, x1, y1, x2, y2): global vec2 self.x1 = x1 - self.y1 = y1 + self.y1 = displayHeight - y1 self.x2 = x2 - self.y2 = y2 + self.y2 = displayHeight - y2 self.active = True self.line = Line(self.x1, self.y1, self.x2, self.y2) @@ -256,8 +239,7 @@ def hitCar(self, car): global vec2 cw = car.width - # since the car sprite isn't perfectly square the hitbox is a little smaller than the width of the car - ch = car.height - 4 + ch = car.height rightVector = vec2(car.direction) upVector = vec2(car.direction).rotate(-90) carCorners = [] @@ -276,10 +258,13 @@ def hitCar(self, car): return False + class Car: def __init__(self, walls, rewardGates): global vec2 + self.nbVect = 16 + self.angles = np.linspace(-180, 180, self.nbVect) self.x = 258 self.y = 288 self.vel = 0 @@ -291,16 +276,16 @@ def __init__(self, walls, rewardGates): self.turningRate = 5.0 / self.width self.friction = 0.98 self.maxSpeed = self.width / 4.0 - self.maxReverseSpeed = -1 * self.maxSpeed / 2.0 + self.maxReverseSpeed = self.maxSpeed / 16.0 #used as a minimum for speed self.accelerationSpeed = self.width / 160.0 self.dead = False self.driftMomentum = 0 self.driftFriction = 0.87 self.lineCollisionPoints = [] self.collisionLineDistances = [] - self.vectorLength = 300 + self.vectorLength = 600 - self.carPic = pyglet.image.load('images/car.png') + self.carPic = pyglet.image.load('Car.png') self.carSprite = pyglet.sprite.Sprite(self.carPic, x=self.x, y=self.y) self.carSprite.update(rotation=0, scale_x=self.width / self.carSprite.width, scale_y=self.height / self.carSprite.height) @@ -349,6 +334,7 @@ def reset(self): g.active = True def show(self): + #print(self.x,self.y) # first calculate the center of the car in order to allow the # rotation of the car to be anchored around the center upVector = self.direction.rotate(90) @@ -375,6 +361,7 @@ def getPositionOnCarRelativeToCenter(self, right, up): return vec2(self.x, self.y) + ((rightVector * right) + (upVector * up)) def updateWithAction(self, actionNo): + #print("action number : " + str(actionNo)) self.turningLeft = False self.turningRight = False self.accelerating = False @@ -407,11 +394,13 @@ def updateWithAction(self, actionNo): for i in range(1): if not self.dead: self.lifespan+=1 - self.move() + self.updateControls() + self.move() if self.hitAWall(): self.dead = True + #print("dead at x: " + str(self.x) + " y : " + str(displayHeight - self.y) + "u lived for : " + str(self.lifespan) + " reward : " + str(self.score)) # return self.checkRewardGates() totalReward += self.reward @@ -520,9 +509,10 @@ def updateControls(self): self.acc = self.accelerationSpeed elif self.reversing: if self.vel > 0: - self.acc = -3 * self.accelerationSpeed + self.acc = -2 * self.accelerationSpeed else: - self.acc = -1 * self.accelerationSpeed + self.acc = 0 + self.vel = 0 """ checks every wall and if the car has hit a wall returns true @@ -563,8 +553,7 @@ def getState(self): self.setVisionVectors() normalizedVisionVectors = [1 - (max(1.0, line) / self.vectorLength) for line in self.collisionLineDistances] - normalizedForwardVelocity = max(0.0, self.vel / self.maxSpeed) - normalizedReverseVelocity = max(0.0, self.vel / self.maxReverseSpeed) + normalizedForwardVelocity = max(0, (self.vel-self.maxReverseSpeed) / (self.maxSpeed-self.maxReverseSpeed)) if self.driftMomentum > 0: normalizedPosDrift = self.driftMomentum / 5 normalizedNegDrift = 0 @@ -578,27 +567,15 @@ def getState(self): normalizedAngleOfNextGate /= 180 - normalizedState = [*normalizedVisionVectors, normalizedForwardVelocity, normalizedReverseVelocity, + normalizedState = [*normalizedVisionVectors, normalizedForwardVelocity, normalizedPosDrift, normalizedNegDrift, normalizedAngleOfNextGate] return np.array(normalizedState) def setVisionVectors(self): - h = self.height - 4 - w = self.width self.collisionLineDistances = [] self.lineCollisionPoints = [] - self.setVisionVector(w / 2, 0, 0) - self.setVisionVector(w / 2, -h / 2, -180 / 16) - self.setVisionVector(w / 2, -h / 2, -180 / 4) - self.setVisionVector(w / 2, -h / 2, -4 * 180 / 8) - - self.setVisionVector(w / 2, h / 2, 180 / 16) - self.setVisionVector(w / 2, h / 2, 180 / 4) - self.setVisionVector(w / 2, h / 2, 4 * 180 / 8) - - self.setVisionVector(-w / 2, -h / 2, -6 * 180 / 8) - self.setVisionVector(-w / 2, h / 2, 6 * 180 / 8) - self.setVisionVector(-w / 2, 0, 180) + for i in self.angles: + self.setVisionVector(0, 0, i) """ calculates and stores the distance to the nearest wall given a vector diff --git a/Games_Solo.py b/Games_Solo.py new file mode 100644 index 0000000..2392ad3 --- /dev/null +++ b/Games_Solo.py @@ -0,0 +1,606 @@ +import numpy as np +from Global import * +from Draw import Drawer +from ShapeObjects import * +from PygameAdditionalMethods import * +import pygame + +drawer = Drawer() +vec2 = pygame.math.Vector2 + +class Game: + no_of_actions = 9 + state_size = 15 + + def __init__(self): + trackImg = pyglet.image.load('Track.png') + self.trackSprite = pyglet.sprite.Sprite(trackImg, x=0, y=0) + + # initiate walls + self.walls = [] + self.gates = [] + + self.set_walls() + self.set_gates() + self.firstClick = True + + self.car = Car(self.walls, self.gates) + + def set_walls(self): + self.walls.append(Wall(240, 809, 200, 583)) + self.walls.append(Wall(200, 583, 218, 395)) + self.walls.append(Wall(218, 395, 303, 255)) + self.walls.append(Wall(303, 255, 548, 173)) + self.walls.append(Wall(548, 173, 764, 179)) + self.walls.append(Wall(764, 179, 1058, 198)) + self.walls.append(Wall(1055, 199, 1180, 215)) + self.walls.append(Wall(1177, 215, 1220, 272)) + self.walls.append(Wall(1222, 273, 1218, 367)) + self.walls.append(Wall(1218, 367, 1150, 437)) + self.walls.append(Wall(1150, 437, 1044, 460)) + self.walls.append(Wall(1044, 460, 757, 600)) + self.walls.append(Wall(757, 600, 1099, 570)) + self.walls.append(Wall(1100, 570, 1187, 508)) + self.walls.append(Wall(1187, 507, 1288, 443)) + self.walls.append(Wall(1288, 443, 1463, 415)) + self.walls.append(Wall(1463, 415, 1615, 478)) + self.walls.append(Wall(1617, 479, 1727, 679)) + self.walls.append(Wall(1727, 679, 1697, 874)) + self.walls.append(Wall(1694, 872, 1520, 964)) + self.walls.append(Wall(1520, 964, 1100, 970)) + self.walls.append(Wall(1105, 970, 335, 960)) + self.walls.append(Wall(339, 960, 264, 899)) + self.walls.append(Wall(263, 897, 238, 803)) + self.walls.append(Wall(317, 782, 274, 570)) + self.walls.append(Wall(275, 569, 284, 407)) + self.walls.append(Wall(284, 407, 363, 317)) + self.walls.append(Wall(363, 317, 562, 240)) + self.walls.append(Wall(562, 240, 1114, 284)) + self.walls.append(Wall(1114, 284, 1120, 323)) + self.walls.append(Wall(1120, 323, 1045, 377)) + self.walls.append(Wall(1045, 378, 682, 548)) + self.walls.append(Wall(682, 548, 604, 610)) + self.walls.append(Wall(604, 612, 603, 695)) + self.walls.append(Wall(605, 695, 702, 713)) + self.walls.append(Wall(703, 712, 1128, 642)) + self.walls.append(Wall(1129, 642, 1320, 512)) + self.walls.append(Wall(1323, 512, 1464, 497)) + self.walls.append(Wall(1464, 497, 1579, 535)) + self.walls.append(Wall(1579, 535, 1660, 701)) + self.walls.append(Wall(1660, 697, 1634, 818)) + self.walls.append(Wall(1634, 818, 1499, 889)) + self.walls.append(Wall(1499, 889, 395, 883)) + self.walls.append(Wall(395, 883, 330, 838)) + self.walls.append(Wall(330, 838, 315, 782)) + self.walls.append(Wall(319, 798, 306, 725)) + self.walls.append(Wall(276, 580, 277, 543)) + self.walls.append(Wall(603, 639, 622, 590)) + self.walls.append(Wall(599, 655, 621, 704)) + self.walls.append(Wall(1074, 571, 1115, 558)) + self.walls.append(Wall(1314, 516, 1333, 511)) + self.walls.append(Wall(1692, 875, 1706, 830)) + self.walls.append(Wall(277, 912, 255, 872)) + self.walls.append(Wall(1214, 262, 1225, 288)) + self.walls.append(Wall(1601, 470, 1625, 490)) + self.walls.append(Wall(1119, 644, 1139, 634)) + self.walls.append(Wall(687, 710, 719, 710)) + self.walls.append(Wall(1721, 664, 1727, 696)) + self.walls.append(Wall(1015, 392, 1065, 362)) + self.walls.append(Wall(1091, 572, 1104, 568)) + self.walls.append(Wall(1157, 528, 1233, 478)) + + def set_gates(self): + + self.gates.append(RewardGate(212, 645, 288, 634)) + self.gates.append(RewardGate(206, 518, 279, 526)) + self.gates.append(RewardGate(224, 390, 286, 416)) + self.gates.append(RewardGate(302, 261, 369, 314)) + self.gates.append(RewardGate(545, 175, 561, 236)) + self.gates.append(RewardGate(846, 182, 841, 259)) + self.gates.append(RewardGate(1114, 203, 1100, 282)) + self.gates.append(RewardGate(1217, 297, 1113, 300)) + self.gates.append(RewardGate(1185, 403, 1102, 339)) + self.gates.append(RewardGate(1042, 462, 979, 408)) + self.gates.append(RewardGate(876, 543, 807, 482)) + self.gates.append(RewardGate(765, 598, 693, 545)) + self.gates.append(RewardGate(801, 596, 815, 694)) + self.gates.append(RewardGate(883, 587, 904, 680)) + self.gates.append(RewardGate(1102, 567, 1128, 640)) + self.gates.append(RewardGate(1261, 452, 1304, 514)) + self.gates.append(RewardGate(1461, 412, 1454, 499)) + self.gates.append(RewardGate(1615, 480, 1572, 535)) + self.gates.append(RewardGate(1722, 680, 1655, 698)) + self.gates.append(RewardGate(1693, 873, 1623, 815)) + self.gates.append(RewardGate(1510, 966, 1495, 886)) + self.gates.append(RewardGate(1297, 970, 1282, 888)) + self.gates.append(RewardGate(1054, 971, 1045, 887)) + self.gates.append(RewardGate(925, 969, 907, 885)) + self.gates.append(RewardGate(742, 969, 733, 884)) + self.gates.append(RewardGate(549, 965, 537, 880)) + self.gates.append(RewardGate(295, 920, 361, 864)) + self.gates.append(RewardGate(238, 766, 309, 754)) + + + def new_episode(self): + self.car.reset() + + def get_state(self): + return self.car.getState() + + def make_action(self, action): + # returns reward + actionNo = np.argmax(action) + self.car.updateWithAction(actionNo) + return self.car.reward + + def is_episode_finished(self): + return self.car.dead + + def get_score(self): + return self.car.score + + def get_lifespan(self): + return self.car.lifespan + + def render(self): + glPushMatrix() + self.trackSprite.draw() + + for w in self.walls: + w.draw() + for g in self.gates: + g.draw() + self.car.update() + self.car.show() + self.car.showCollisionVectors() + + glPopMatrix() + +class Wall: + + def __init__(self, x1, y1, x2, y2): + self.x1 = x1 + self.y1 = displayHeight - y1 + self.x2 = x2 + self.y2 = displayHeight - y2 + + self.line = Line(self.x1, self.y1, self.x2, self.y2) + self.line.setLineThinkness(5) + + """ + draw the line + """ + def draw(self): + self.line.draw() + """ + returns true if the car object has hit this wall + """ + + def hitCar(self, car): + global vec2 + cw = car.width + ch = car.height + rightVector = vec2(car.direction) + upVector = vec2(car.direction).rotate(-90) + carCorners = [] + cornerMultipliers = [[1, 1], [1, -1], [-1, -1], [-1, 1]] + carPos = vec2(car.x, car.y) + for i in range(4): + carCorners.append(carPos + (rightVector * cw / 2 * cornerMultipliers[i][0]) + + (upVector * ch / 2 * cornerMultipliers[i][1])) + + for i in range(4): + j = i + 1 + j = j % 4 + if linesCollided(self.x1, self.y1, self.x2, self.y2, carCorners[i].x, carCorners[i].y, carCorners[j].x, + carCorners[j].y): + return True + return False + + +""" +class containing all the game logic for moving and displaying the car +""" + + +class RewardGate: + + def __init__(self, x1, y1, x2, y2): + global vec2 + self.x1 = x1 + self.y1 = displayHeight - y1 + self.x2 = x2 + self.y2 = displayHeight - y2 + self.active = True + + self.line = Line(self.x1, self.y1, self.x2, self.y2) + self.line.setLineThinkness(5) + self.line.setColor([0, 255, 0]) + + self.center = vec2((self.x1 + self.x2) / 2, (self.y1 + self.y2) / 2) + + """ + draw the line + """ + + def draw(self): + if self.active: + self.line.draw() + + """ + returns true if the car object has hit this wall + """ + + def hitCar(self, car): + + if not self.active: + return False + + global vec2 + cw = car.width + ch = car.height + rightVector = vec2(car.direction) + upVector = vec2(car.direction).rotate(-90) + carCorners = [] + cornerMultipliers = [[1, 1], [1, -1], [-1, -1], [-1, 1]] + carPos = vec2(car.x, car.y) + for i in range(4): + carCorners.append(carPos + (rightVector * cw / 2 * cornerMultipliers[i][0]) + + (upVector * ch / 2 * cornerMultipliers[i][1])) + + for i in range(4): + j = i + 1 + j = j % 4 + if linesCollided(self.x1, self.y1, self.x2, self.y2, carCorners[i].x, carCorners[i].y, carCorners[j].x, + carCorners[j].y): + return True + return False + + + + + +class Car: + + def __init__(self, walls, rewardGates): + global vec2 + self.nbVect = 16 + self.angles = np.linspace(-180, 180, self.nbVect) + self.x = 256 + self.y = 288 + self.vel = 0 + self.direction = vec2(0, 1) + self.direction = self.direction.rotate(180 / 12) + self.acc = 0 + self.width = 40 + self.height = 20 + self.turningRate = 5.0 / self.width + self.friction = 0.98 + self.maxSpeed = self.width / 4.0 + self.maxReverseSpeed = -1 #self.maxSpeed / 16.0 is used as a minimum for speed + self.accelerationSpeed = self.width / 160.0 + self.dead = False + self.driftMomentum = 0 + self.driftFriction = 0.87 + self.lineCollisionPoints = [] + self.collisionLineDistances = [] + self.vectorLength = 600 + + self.carPic = pyglet.image.load('Car.png') + self.carSprite = pyglet.sprite.Sprite(self.carPic, x=self.x, y=self.y) + self.carSprite.update(rotation=0, scale_x=self.width / self.carSprite.width, + scale_y=self.height / self.carSprite.height) + + self.turningLeft = False + self.turningRight = False + self.accelerating = False + self.reversing = False + self.walls = walls + self.rewardGates = rewardGates + self.rewardNo = 0 + + self.directionToRewardGate = self.rewardGates[self.rewardNo].center - vec2(self.x, self.y) + + self.reward = 0 + + self.score = 0 + self.lifespan = 0 + """ + draws the car to the screen + """ + + def reset(self): + global vec2 + self.x = 256 + self.y = 288 + self.vel = 0 + self.direction = vec2(0, 1) + self.direction = self.direction.rotate(180 / 12) + self.acc = 0 + self.dead = False + self.driftMomentum = 0 + self.lineCollisionPoints = [] + self.collisionLineDistances = [] + + self.turningLeft = False + self.turningRight = False + self.accelerating = False + self.reversing = False + self.rewardNo = 0 + self.reward = 0 + + self.lifespan = 0 + self.score = 0 + for g in self.rewardGates: + g.active = True + + def show(self): + # first calculate the center of the car in order to allow the + # rotation of the car to be anchored around the center + upVector = self.direction.rotate(90) + drawX = self.direction.x * self.width / 2 + upVector.x * self.height / 2 + drawY = self.direction.y * self.width / 2 + upVector.y * self.height / 2 + self.carSprite.update(x=self.x - drawX, y=self.y - drawY, rotation=-get_angle(self.direction)) + self.carSprite.draw() + # self.showCollisionVectors() + + """ + returns a vector of where a point on the car is after rotation + takes the position desired relative to the center of the car when the car is facing to the right + """ + + def getPositionOnCarRelativeToCenter(self, right, up): + global vec2 + w = self.width + h = self.height + rightVector = vec2(self.direction) + rightVector.normalize() + upVector = self.direction.rotate(90) + upVector.normalize() + + return vec2(self.x, self.y) + ((rightVector * right) + (upVector * up)) + + def updateWithAction(self, actionNo): + self.turningLeft = False + self.turningRight = False + self.accelerating = False + self.reversing = False + + if actionNo == 0: + self.turningLeft = True + elif actionNo == 1: + self.turningRight = True + elif actionNo == 2: + self.accelerating = True + elif actionNo == 3: + self.reversing = True + elif actionNo == 4: + self.accelerating = True + self.turningLeft = True + elif actionNo == 5: + self.accelerating = True + self.turningRight = True + elif actionNo == 6: + self.reversing = True + self.turningLeft = True + elif actionNo == 7: + self.reversing = True + self.turningRight = True + elif actionNo == 8: + pass + totalReward = 0 + + for i in range(1): + if not self.dead: + self.lifespan+=1 + + self.updateControls() + self.move() + + if self.hitAWall(): + self.dead = True + # return + self.checkRewardGates() + totalReward += self.reward + + self.setVisionVectors() + + # self.update() + + self.reward = totalReward + + """ + called every frame + """ + + def update(self): + if not self.dead: + self.updateControls() + self.move() + + if self.hitAWall(): + self.dead = True + # return + self.checkRewardGates() + self.setVisionVectors() + + def checkRewardGates(self): + global vec2 + self.reward = -1 + if self.rewardGates[self.rewardNo].hitCar(self): + self.rewardGates[self.rewardNo].active = False + self.rewardNo += 1 + self.score += 1 + self.reward = 10 + if self.rewardNo == len(self.rewardGates): + self.rewardNo = 0 + for g in self.rewardGates: + g.active = True + self.directionToRewardGate = self.rewardGates[self.rewardNo].center - vec2(self.x, self.y) + + """ + changes the position of the car to account for acceleration, velocity, friction and drift + """ + + def move(self): + global vec2 + self.vel += self.acc + self.vel *= self.friction + self.constrainVel() + + driftVector = vec2(self.direction) + driftVector = driftVector.rotate(90) + + addVector = vec2(0, 0) + addVector.x += self.vel * self.direction.x + addVector.x += self.driftMomentum * driftVector.x + addVector.y += self.vel * self.direction.y + addVector.y += self.driftMomentum * driftVector.y + self.driftMomentum *= self.driftFriction + + if addVector.length() != 0: + addVector.normalize() + + addVector.x * abs(self.vel) + addVector.y * abs(self.vel) + + self.x += addVector.x + self.y += addVector.y + + """ + keeps the velocity of the car within the maximum and minimum speeds + """ + + def constrainVel(self): + if self.maxSpeed < self.vel: + self.vel = self.maxSpeed + elif self.vel < self.maxReverseSpeed: + self.vel = self.maxReverseSpeed + + """ + changes the cars direction and acceleration based on the users inputs + """ + + def updateControls(self): + multiplier = 1 + if abs(self.vel) < 5: + multiplier = abs(self.vel) / 5 + if self.vel < 0: + multiplier *= -1 + + driftAmount = self.vel * self.turningRate * self.width / (9.0 * 8.0) + if self.vel < 5: + driftAmount = 0 + + if self.turningLeft: + self.direction = self.direction.rotate(radiansToAngle(self.turningRate) * multiplier) + + self.driftMomentum -= driftAmount + elif self.turningRight: + self.direction = self.direction.rotate(-radiansToAngle(self.turningRate) * multiplier) + self.driftMomentum += driftAmount + self.acc = 0 + if self.accelerating: + if self.vel < 0: + self.acc = 3 * self.accelerationSpeed + else: + self.acc = self.accelerationSpeed + elif self.reversing: + if self.vel > 0: + self.acc = -2 * self.accelerationSpeed + else: + self.acc = 0 + self.vel = 0 + + """ + checks every wall and if the car has hit a wall returns true + """ + + def hitAWall(self): + for wall in self.walls: + if wall.hitCar(self): + #print(self.x,self.y) + return True + + return False + + """ + returns the point of collision of a line (x1,y1,x2,y2) with the walls, + if multiple walls are hit it returns the closest collision point + """ + + def getCollisionPointOfClosestWall(self, x1, y1, x2, y2): + global vec2 + minDist = 2 * displayWidth + closestCollisionPoint = vec2(0, 0) + for wall in self.walls: + collisionPoint = getCollisionPoint(x1, y1, x2, y2, wall.x1, wall.y1, wall.x2, wall.y2) + if collisionPoint is None: + continue + if dist(x1, y1, collisionPoint.x, collisionPoint.y) < minDist: + minDist = dist(x1, y1, collisionPoint.x, collisionPoint.y) + closestCollisionPoint = vec2(collisionPoint) + return closestCollisionPoint + + """ + by creating lines in many directions from the car and getting the closest collision point of that line + we create "vision vectors" which will allow the car to 'see' + kinda like a sonar system + """ + + def getState(self): + self.setVisionVectors() + normalizedVisionVectors = [1 - (max(1.0, line) / self.vectorLength) for line in self.collisionLineDistances] + + normalizedForwardVelocity = max(0.0, self.vel / self.maxSpeed) + normalizedReverseVelocity = max(0.0, self.vel / self.maxReverseSpeed) + if self.driftMomentum > 0: + normalizedPosDrift = self.driftMomentum / 5 + normalizedNegDrift = 0 + else: + normalizedPosDrift = 0 + normalizedNegDrift = self.driftMomentum / -5 + + normalizedAngleOfNextGate = (get_angle(self.direction) - get_angle(self.directionToRewardGate)) % 360 + if normalizedAngleOfNextGate > 180: + normalizedAngleOfNextGate = -1 * (360 - normalizedAngleOfNextGate) + + normalizedAngleOfNextGate /= 180 + + normalizedState = [*normalizedVisionVectors, normalizedForwardVelocity, normalizedReverseVelocity, + normalizedPosDrift, normalizedNegDrift, normalizedAngleOfNextGate] + return np.array(normalizedState) + + def setVisionVectors(self): + self.collisionLineDistances = [] + self.lineCollisionPoints = [] + for i in self.angles : + self.setVisionVector(0,0,i) + + """ + calculates and stores the distance to the nearest wall given a vector + """ + + def setVisionVector(self, startX, startY, angle): + collisionVectorDirection = self.direction.rotate(angle) + collisionVectorDirection = collisionVectorDirection.normalize() * self.vectorLength + startingPoint = self.getPositionOnCarRelativeToCenter(startX, startY) + collisionPoint = self.getCollisionPointOfClosestWall(startingPoint.x, startingPoint.y, + startingPoint.x + collisionVectorDirection.x, + startingPoint.y + collisionVectorDirection.y) + if collisionPoint.x == 0 and collisionPoint.y == 0: + self.collisionLineDistances.append(self.vectorLength) + else: + self.collisionLineDistances.append( + dist(startingPoint.x, startingPoint.y, collisionPoint.x, collisionPoint.y)) + self.lineCollisionPoints.append(collisionPoint) + + """ + shows dots where the collision vectors detect a wall + """ + + def showCollisionVectors(self): + global drawer + for point in self.lineCollisionPoints: + drawer.setColor([255, 0, 0]) + drawer.circle(point.x, point.y, 5) \ No newline at end of file diff --git a/Globals.py b/Globals.py index 79b9eca..1675c8f 100644 --- a/Globals.py +++ b/Globals.py @@ -1,2 +1,3 @@ displayWidth = 1800 displayHeight = 1000 +frameRate = 60 diff --git a/Main_Solo.py b/Main_Solo.py new file mode 100644 index 0000000..766851e --- /dev/null +++ b/Main_Solo.py @@ -0,0 +1,91 @@ +import pyglet +from pyglet.window import key +from pyglet.gl import * +from Global import * +import pygame +from Games_Solo import Game, Wall, RewardGate + +vec2 = pygame.math.Vector2 + +class MyWindow(pyglet.window.Window): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.set_minimum_size(400, 300) + # set background color + backgroundColor = [0, 0, 0, 255] + backgroundColor = [i / 255 for i in backgroundColor] + glClearColor(*backgroundColor) + self.clear() + self.game = Game() + + + self.firstClick = True + + def on_key_press(self, symbol, modifiers): + if symbol == key.RIGHT: + self.game.car.turningRight = True + + if symbol == key.LEFT: + self.game.car.turningLeft = True + + if symbol == key.UP: + self.game.car.accelerating = True + + if symbol == key.DOWN: + self.game.car.reversing = True + + def on_key_release(self, symbol, modifiers): + if symbol == key.RIGHT: + self.game.car.turningRight = False + + if symbol == key.LEFT: + self.game.car.turningLeft = False + + if symbol == key.UP: + self.game.car.accelerating = False + + if symbol == key.DOWN: + self.game.car.reversing = False + + if symbol == key.SPACE: + self.ai.training = not self.ai.training + + def on_mouse_press(self, x, y, button, modifiers): + if self.firstClick: + self.clickPos = [x, y] + else: + print("self.gates.append(RewardGate({}, {}, {}, {}))".format(self.clickPos[0],displayHeight - self.clickPos[1],x, displayHeight - y)) + self.game.gates.append(RewardGate(self.clickPos[0], displayHeight - self.clickPos[1], x, displayHeight - y)) + + #print("self.walls.append(Wall({}, {}, {}, {}))".format(self.clickPos[0],displayHeight - self.clickPos[1],x, displayHeight - y)) + #self.game.walls.append(Wall(self.clickPos[0], self.clickPos[1], x, y)) + + self.firstClick = not self.firstClick + pass + + + def on_draw(self): + + window.set_size(width=displayWidth,height=displayHeight) + self.clear() + self.game.render() + vision = self.game.car.getState() + for i in range(len(vision)): + + label = pyglet.text.Label("{}: {}".format(i,vision[i]), + font_name='Times New Roman', + font_size=12, + x=10, y=20*i+50, + anchor_x='left', anchor_y='center') + label.draw() + + + def update(self,dt): + pass + + + +if __name__ == "__main__": + window = MyWindow(displayWidth, displayHeight, "AI Learns to Drive", resizable=False) + pyglet.clock.schedule_interval(window.update, 1 / frameRate) + pyglet.app.run() \ No newline at end of file diff --git a/README.md b/README.md index bb7bf69..f5a0054 100644 --- a/README.md +++ b/README.md @@ -1 +1,75 @@ -# Car-QLearning \ No newline at end of file +# Car-QLearning + +Requirements (versions are mostly here as an indication): + +``` +Pyglet (1.5.27) +Pygame (2.1.3) +numpy (1.24.1) +tensorflow (2.10.1) (no 1.X tensorflow) +``` + +How to use : + +I) Download the files + + +II) If you want to create your own track + + +1) Create your own .png file (I recommend not to change size if you don't want to touch the code). +When you are done name it track.png. + +3) Once done designing, in Games_Solo.py empty the set_wall function and the set-gates function but leave this line in set_gates : + self.gates.append(RewardGate(0, 1, 2, 3)) + +4) You can now run Main_Solo.py program, you should see you track + +5) Time to set up the gates. You can setup gates using your mouse left click. These gate are where the AI gain points. Once you are done close the program you should see in the console a lot of text similar to : +``` +self.gates.append(RewardGate(343, 379, 524, 405)) +self.gates.append(RewardGate(488, 326, 626, 421)) +self.gates.append(RewardGate(626, 309, 701, 411)) +self.gates.append(RewardGate(232, 309, 267, 399)) +``` +Copy this text and paste it in the set_gates function you emptied earlier. Be careful the order of the gates is important. + +6) Time to set up the Walls. In the Main_Solo.py program go to the "on_mouse_press" function then swap the lines in comments and the one that are not. You should get: + +``` + def on_mouse_press(self, x, y, button, modifiers): + if self.firstClick: + self.clickPos = [x, y] + else: + #print("self.gates.append(RewardGate({}, {}, {}, {}))".format(self.clickPos[0],displayHeight - self.clickPos[1],x, displayHeight - y)) + #self.game.gates.append(RewardGate(self.clickPos[0], displayHeight - self.clickPos[1], x, displayHeight - y)) + + print("self.walls.append(Wall({}, {}, {}, {}))".format(self.clickPos[0],displayHeight - self.clickPos[1],x, displayHeight - y)) + self.game.walls.append(Wall(self.clickPos[0], self.clickPos[1], x, y)) + + self.firstClick = not self.firstClick + pass + +``` +You can now setup walls using your mouse left click. Once you are done close the program you should see in the console a lot of text similar to before but saying : + +``` +self.walls.append(Wall(343, 379, 524, 405)) ... +``` + +Once again copy this text and paste it in the set_walls function of Games_Solo.py. + +7) Change the start and reset position/direction of the car in Games_Solo.py in the car class in the __init__ function and in the reset function (change self.x, self.y) + +8) Check everything is good by trying your track in the Main_Solo program you should die if you touch the walls and you should see gates disappear when passing them (if not you probably didn't placed the in the right order) + +9) copy your set_walls and set_gates function into the Games.py by replacing the old one. (don't forget to change your start and reset position/direction too) + +10) You are now done creating your personal track. + + +III) You now want to train the AI to perform on your track so run the Main.py program. You should see the the car moving on it's own and learning slowly the track. + +PS : It seems that the load or save fuction aren't working properly so don't close your program until your are satisfied =) (I personally got result at around Model 5000 so be patient) + +If you need some help you can contact me by discord at Aquinox#4429. I'll try to help you as best as I can. diff --git a/images/track.png b/Track.png similarity index 100% rename from images/track.png rename to Track.png diff --git a/images/car.png b/images/car.png deleted file mode 100644 index 116a3a6..0000000 Binary files a/images/car.png and /dev/null differ diff --git a/main.py b/main.py index 4f060e6..443a0e2 100644 --- a/main.py +++ b/main.py @@ -1,27 +1,19 @@ -import pyglet +from pyglet.window import key from pyglet.gl import * +import pyglet +from Global import * import pygame -import math -from pyglet.window import key -from Drawer import Drawer -# from PygameAdditionalMethods import * -from ShapeObjects import Line -import tensorflow as tf # Deep Learning library -import numpy as np # Handle matrices -from collections import deque +from Games import Game import random import os -from Globals import displayHeight, displayWidth -from Game import Game +import numpy as np +from collections import deque +import tensorflow as tf -frameRate = 30.0 +tf.compat.v1.disable_eager_execution() vec2 = pygame.math.Vector2 -""" -a line which the car object cannot touch -""" - class QLearning: def __init__(self, game): @@ -31,7 +23,7 @@ def __init__(self, game): self.stateSize = [game.state_size] self.actionSize = game.no_of_actions - self.learningRate = 0.00025 + self.learningRate = 0.00030 #default 0.00025 self.possibleActions = np.identity(self.actionSize, dtype=int) self.totalTrainingEpisodes = 100000 @@ -51,10 +43,10 @@ def __init__(self, game): self.maxTau = 10000 self.tau = 0 - # reset the graph i guess, I don't know why therefore is already a graph happening but who cares - tf.reset_default_graph() + # reset the graph i guess, I don't know why there is already a graph happening but who cares + tf.compat.v1.reset_default_graph() - self.sess = tf.Session() + self.sess = tf.compat.v1.Session() self.DQNetwork = DQN(self.stateSize, self.actionSize, self.learningRate, name='DQNetwork') self.TargetNetwork = DQN(self.stateSize, self.actionSize, self.learningRate, name='TargetNetwork') @@ -68,15 +60,15 @@ def __init__(self, game): self.newEpisode = False self.stepNo = 0 self.episodeNo = 0 - self.saver = tf.train.Saver() + self.saver = tf.compat.v1.train.Saver() load = False - loadFromEpisodeNo = 6300 + loadFromEpisodeNo = 15800 if load: self.episodeNo = loadFromEpisodeNo - self.saver.restore(self.sess, "./allModels/model{}/models/model.ckpt".format(self.episodeNo)) + self.saver.restore(self.sess, "./allModels/modelMatin{}/models/model.ckpt".format(self.episodeNo)) else: - self.sess.run(tf.global_variables_initializer()) + self.sess.run(tf.compat.v1.global_variables_initializer()) # self.sess.graph.finalize() self.sess.run(self.update_target_graph()) @@ -86,10 +78,10 @@ def __init__(self, game): def update_target_graph(self): # Get the parameters of our DQNNetwork - from_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, "DQNetwork") + from_vars = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES, "DQNetwork") # Get the parameters of our Target_network - to_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, "TargetNetwork") + to_vars = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES, "TargetNetwork") op_holder = [] @@ -107,13 +99,15 @@ def pretrain(self): # choice = random.randInt(self.actionSize) # action = self.possibleActions[choice] action = random.choice(self.possibleActions) + #print(action) actionNo = np.argmax(action) + #print(actionNo) # now we need to get next state reward = self.game.make_action(actionNo) nextState = self.game.get_state() self.newEpisode = False - if self.game.is_episode_finished(): + if self.game.is_episode_finished(): #if car is dead reward = -100 self.memoryBuffer.store((state, action, reward, nextState, True)) self.game.new_episode() @@ -121,6 +115,7 @@ def pretrain(self): self.newEpisode = True else: self.memoryBuffer.store((state, action, reward, nextState, False)) + self.game.render() state = nextState print("pretrainingDone") @@ -159,14 +154,16 @@ def train(self): reward = self.game.make_action(actionNo) nextState = self.game.get_state() - + #window.clear() + #self.game.render() if (reward > 0): - print("Hell YEAH, Reward {}".format(reward)) + #print("Hell YEAH, Reward {}".format(reward)) + pass # if car is dead then finish episode if self.game.is_episode_finished(): reward = -100 self.stepNo = self.maxSteps - print("DEAD!! Reward = -100") + #print("DEAD!! Reward = -100") # print("Episode {} Step {} Action {} reward {} epsilon {} experiences stored {}" # .format(self.episodeNo, self.stepNo, actionNo, reward, epsilon, self.trainingStepNo)) @@ -272,32 +269,32 @@ def __init__(self, stateSize, actionSize, learningRate, name): self.learningRate = learningRate self.name = name - with tf.variable_scope(self.name): + with tf.compat.v1.variable_scope(self.name): # the inputs describing the state - self.inputs_ = tf.placeholder(tf.float32, [None, *self.stateSize], name="inputs") + self.inputs_ = tf.compat.v1.placeholder(tf.float32, [None, *self.stateSize], name="inputs") # the one hotted action that we took # e.g. if we took the 3rd action action_ = [0,0,1,0,0,0,0] - self.actions_ = tf.placeholder(tf.float32, [None, self.actionSize], name="actions") + self.actions_ = tf.compat.v1.placeholder(tf.float32, [None, self.actionSize], name="actions") # the target = reward + the discounted maximum possible q value of hte next state - self.targetQ = tf.placeholder(tf.float32, [None], name="target") + self.targetQ = tf.compat.v1.placeholder(tf.float32, [None], name="target") - self.ISWeights_ = tf.placeholder(tf.float32, [None, 1], name='ISWeights') + self.ISWeights_ = tf.compat.v1.placeholder(tf.float32, [None, 1], name='ISWeights') - self.dense1 = tf.layers.dense(inputs=self.inputs_, + self.dense1 = tf.compat.v1.layers.dense(inputs=self.inputs_, units=16, activation=tf.nn.elu, - kernel_initializer=tf.contrib.layers.xavier_initializer(), + kernel_initializer=tf.compat.v1.keras.initializers.VarianceScaling(scale=1.0, mode="fan_avg", distribution="uniform"), name="dense1") - self.dense2 = tf.layers.dense(inputs=self.dense1, + self.dense2 = tf.compat.v1.layers.dense(inputs=self.dense1, units=16, activation=tf.nn.elu, - kernel_initializer=tf.contrib.layers.xavier_initializer(), + kernel_initializer=tf.compat.v1.keras.initializers.VarianceScaling(scale=1.0, mode="fan_avg", distribution="uniform"), name="dense2") - self.output = tf.layers.dense(inputs=self.dense2, + self.output = tf.compat.v1.layers.dense(inputs=self.dense2, units=self.actionSize, - kernel_initializer=tf.contrib.layers.xavier_initializer(), + kernel_initializer=tf.compat.v1.keras.initializers.VarianceScaling(scale=1.0, mode="fan_avg", distribution="uniform"), activation=None, name="outputs") @@ -311,7 +308,7 @@ def __init__(self, stateSize, actionSize, learningRate, name): self.loss = tf.reduce_mean(self.ISWeights_ * tf.square(self.targetQ - self.QValue)) # use adam optimiser (its good shit) - self.optimizer = tf.train.AdamOptimizer(self.learningRate).minimize(self.loss) + self.optimizer = tf.compat.v1.train.AdamOptimizer(self.learningRate).minimize(self.loss) class DDQN: @@ -321,53 +318,53 @@ def __init__(self, stateSize, actionSize, learningRate, name): self.learningRate = learningRate self.name = name - with tf.variable_scope(self.name): + with tf.compat.v1.variable_scope(self.name): # the inputs describing the state - self.inputs_ = tf.placeholder(tf.float32, [None, *self.stateSize], name="inputs") + self.inputs_ = tf.compat.v1.placeholder(tf.float32, [None, *self.stateSize], name="inputs") # the one hotted action that we took # e.g. if we took the 3rd action action_ = [0,0,1,0,0,0,0] - self.actions_ = tf.placeholder(tf.float32, [None, self.actionSize], name="actions") + self.actions_ = tf.compat.v1.placeholder(tf.float32, [None, self.actionSize], name="actions") # the target = reward + the discounted maximum possible q value of hte next state - self.targetQ = tf.placeholder(tf.float32, [None], name="target") + self.targetQ = tf.compat.v1.placeholder(tf.float32, [None], name="target") - self.ISWeights_ = tf.placeholder(tf.float32, [None, 1], name='ISWeights') + self.ISWeights_ = tf.compat.v1.placeholder(tf.float32, [None, 1], name='ISWeights') - self.dense1 = tf.layers.dense(inputs=self.inputs_, + self.dense1 = tf.compat.v1.layers.dense(inputs=self.inputs_, units=16, activation=tf.nn.elu, - kernel_initializer=tf.contrib.layers.xavier_initializer(), + kernel_initializer=tf.compat.v1.keras.initializers.VarianceScaling(scale=1.0, mode="fan_avg", distribution="uniform"), name="dense1") ## Here we separate into two streams # The one that calculate V(s) which is the value of the input state # in other words how good this state is - self.valueLayer = tf.layers.dense(inputs=self.dense1, + self.valueLayer = tf.compat.v1.layers.dense(inputs=self.dense1, units=16, activation=tf.nn.elu, - kernel_initializer=tf.contrib.layers.xavier_initializer(), + kernel_initializer=tf.compat.v1.keras.initializers.VarianceScaling(scale=1.0, mode="fan_avg", distribution="uniform"), name="valueLayer") - self.value = tf.layers.dense(inputs=self.valueLayer, + self.value = tf.compat.v1.layers.dense(inputs=self.valueLayer, units=1, activation=None, - kernel_initializer=tf.contrib.layers.xavier_initializer(), + kernel_initializer=tf.compat.v1.keras.initializers.VarianceScaling(scale=1.0, mode="fan_avg", distribution="uniform"), name="value") # The one that calculate A(s,a) # which is the advantage of taking each action in this given state - self.advantageLayer = tf.layers.dense(inputs=self.dense1, + self.advantageLayer = tf.compat.v1.layers.dense(inputs=self.dense1, units=16, activation=tf.nn.elu, - kernel_initializer=tf.contrib.layers.xavier_initializer(), + kernel_initializer=tf.compat.v1.keras.initializers.VarianceScaling(scale=1.0, mode="fan_avg", distribution="uniform"), name="advantageLayer") - self.advantage = tf.layers.dense(inputs=self.advantageLayer, + self.advantage = tf.compat.v1.layers.dense(inputs=self.advantageLayer, units=self.actionSize, activation=None, - kernel_initializer=tf.contrib.layers.xavier_initializer(), + kernel_initializer=tf.compat.v1.keras.initializers.VarianceScaling(scale=1.0, mode="fan_avg", distribution="uniform"), name="advantages") # Aggregating layer @@ -386,7 +383,7 @@ def __init__(self, stateSize, actionSize, learningRate, name): self.loss = tf.reduce_mean(self.ISWeights_ * tf.square(self.targetQ - self.QValue)) # use adam optimiser (its good shit) - self.optimizer = tf.train.AdamOptimizer(self.learningRate).minimize(self.loss) + self.optimizer = tf.compat.v1.train.AdamOptimizer(self.learningRate).minimize(self.loss) class PrioritisedMemory: @@ -529,132 +526,52 @@ def total_priority(self): """ - a class inheriting from the pyglet window class which controls the game window and acts as the main class of the program """ - class MyWindow(pyglet.window.Window): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.set_minimum_size(400, 300) # set background color - backgroundColor = [0, 0, 0, 255] - backgroundColor = [i / 255 for i in backgroundColor] + backgroundColor = [0,0,0,1] glClearColor(*backgroundColor) - # load background image + self.game = Game() self.ai = QLearning(self.game) - """ - called when a key is hit - """ + self.firstClick = True def on_key_press(self, symbol, modifiers): pass - # if symbol == key.RIGHT: - # self.car.turningRight = True - # - # if symbol == key.LEFT: - # self.car.turningLeft = True - # - # if symbol == key.UP: - # self.car.accelerating = True - # - # if symbol == key.DOWN: - # self.car.reversing = True - - """ - called when a key is released - """ def on_close(self): self.ai.sess.close() + pass def on_key_release(self, symbol, modifiers): - pass - # if symbol == key.RIGHT: - # self.car.turningRight = False - # - # if symbol == key.LEFT: - # self.car.turningLeft = False - # - # if symbol == key.UP: - # self.car.accelerating = False - # - # if symbol == key.DOWN: - # self.car.reversing = False - # - # if symbol == key.SPACE: - # self.ai.training = not self.ai.training + if symbol == key.SPACE: + self.ai.training = not self.ai.training def on_mouse_press(self, x, y, button, modifiers): - # # print(x,y) - # if self.firstClick: - # self.clickPos = [x, y] - # else: - # # print("self.walls.append(Wall({}, {}, {}, {}))".format(self.clickPos[0], - # # displayHeight - self.clickPos[1], - # # x, displayHeight - y)) - # - # # self.gates.append(RewardGate(self.clickPos[0], self.clickPos[1], x, y)) - # - # self.firstClick = not self.firstClick pass - """ - called every frame - """ - def on_draw(self): + window.set_size(width=displayWidth, height=displayHeight) + self.clear() self.game.render() - # - # glPushMatrix() - # - # glTranslatef(-1, -1, 0) - # glScalef(1 / (displayWidth / 2), 1 / (displayHeight / 2), 1) - # - # self.clear() - # self.trackSprite.draw() - # self.car.show() - # - # for w in self.walls: - # w.draw() - # # for g in self.gates: - # # g.draw() - # vision = self.car.getState() - # - # for i in range(len(vision)): - # - # label = pyglet.text.Label("{}: {}".format(i,vision[i]), - # font_name='Times New Roman', - # font_size=24, - # x=10, y=50*i+250, - # anchor_x='left', anchor_y='center') - # label.draw() - # glPopMatrix() - - """ - called when window resized - """ - def on_resize(self, width, height): - glViewport(0, 0, width, height) - - """ - called every frame - """ def update(self, dt): for i in range(5): - if self.ai.training: self.ai.train() else: self.ai.test() return - # self.car.update() + pass + if __name__ == "__main__":