diff --git a/Converted_to_tensorflow_2/.vscode/settings.json b/Converted_to_tensorflow_2/.vscode/settings.json new file mode 100644 index 0000000..386827a --- /dev/null +++ b/Converted_to_tensorflow_2/.vscode/settings.json @@ -0,0 +1,3 @@ +{ + "python.pythonPath": "Z:\\Installs\\anaconda\\envs\\tf-gpu\\python.exe" +} \ No newline at end of file diff --git a/Converted_to_tensorflow_2/Drawer.py b/Converted_to_tensorflow_2/Drawer.py new file mode 100644 index 0000000..d4c3cb9 --- /dev/null +++ b/Converted_to_tensorflow_2/Drawer.py @@ -0,0 +1,45 @@ +import pyglet +from pyglet.gl import * +import math + +class Drawer: + + def __init__(self): + self.color = [0, 0, 0] + self.lineThickness = 1 + + def setLineThinkness(self, thinkness): + self.lineThickness = thinkness + + def setColor(self, newColor): + self.color = newColor + + def line(self, x1, y1, x2, y2): + pyglet.graphics.draw(2, pyglet.gl.GL_LINES, + ("v2f", (x1, y1, x2, y2)) + , ('c3B', self.color * 2)) + + def rect(self, x, y, w, h): + pyglet.graphics.draw_indexed(4, pyglet.gl.GL_TRIANGLES, [0, 1, 2, 0, 2, 3], + ('v2f', [x, y, x + w, y, x + w, y + h, x, y + h]), + ('c3B', self.color * 4)) + + def triangle(self, x1, y1, x2, y2, x3, y3): + pyglet.graphics.draw(3, pyglet.gl.GL_TRIANGLES, + ('v2f', [x1, y1, x2, y2, x3, y3]), + ('c3B', self.color * 3)) + + def circle(self, x, y, radius): + iterations = int(2 * radius * math.pi) + s = math.sin(2 * math.pi / iterations) + c = math.cos(2 * math.pi / iterations) + + dx, dy = radius, 0 + + glBegin(GL_TRIANGLE_FAN) + gl.glColor4f(self.color[0] / 255, self.color[1] / 255, self.color[2] / 255, 1.0) + glVertex2f(x, y) + for i in range(iterations + 1): + glVertex2f(x + dx, y + dy) + dx, dy = (dx * c - dy * s), (dy * c + dx * s) + glEnd() \ No newline at end of file diff --git a/Converted_to_tensorflow_2/Game.py b/Converted_to_tensorflow_2/Game.py new file mode 100644 index 0000000..2579cb0 --- /dev/null +++ b/Converted_to_tensorflow_2/Game.py @@ -0,0 +1,629 @@ +import numpy as np +import pyglet +from Globals import displayWidth, displayHeight +from Drawer import Drawer +from ShapeObjects import * +from PygameAdditionalMethods import * +import pygame + +drawer = Drawer() +vec2 = pygame.math.Vector2 + + +class Game: + no_of_actions = 9 + state_size = 15 + + def __init__(self): + trackImg = pyglet.image.load('images/track.png') + self.trackSprite = pyglet.sprite.Sprite(trackImg, x=0, y=0) + # initiate car + + # initiate walls + self.walls = [] + self.gates = [] + + self.set_walls() + self.set_gates() + self.firstClick = True + + self.car = Car(self.walls, self.gates) + + def set_walls(self): + self.walls.append(Wall(240, 809, 200, 583)) + self.walls.append(Wall(200, 583, 218, 395)) + self.walls.append(Wall(218, 395, 303, 255)) + self.walls.append(Wall(303, 255, 548, 173)) + self.walls.append(Wall(548, 173, 764, 179)) + self.walls.append(Wall(764, 179, 1058, 198)) + self.walls.append(Wall(1055, 199, 1180, 215)) + self.walls.append(Wall(1177, 215, 1220, 272)) + self.walls.append(Wall(1222, 273, 1218, 367)) + self.walls.append(Wall(1218, 367, 1150, 437)) + self.walls.append(Wall(1150, 437, 1044, 460)) + self.walls.append(Wall(1044, 460, 757, 600)) + self.walls.append(Wall(757, 600, 1099, 570)) + self.walls.append(Wall(1100, 570, 1187, 508)) + self.walls.append(Wall(1187, 507, 1288, 443)) + self.walls.append(Wall(1288, 443, 1463, 415)) + self.walls.append(Wall(1463, 415, 1615, 478)) + self.walls.append(Wall(1617, 479, 1727, 679)) + self.walls.append(Wall(1727, 679, 1697, 874)) + self.walls.append(Wall(1694, 872, 1520, 964)) + self.walls.append(Wall(1520, 964, 1100, 970)) + self.walls.append(Wall(1105, 970, 335, 960)) + self.walls.append(Wall(339, 960, 264, 899)) + self.walls.append(Wall(263, 897, 238, 803)) + self.walls.append(Wall(317, 782, 274, 570)) + self.walls.append(Wall(275, 569, 284, 407)) + self.walls.append(Wall(284, 407, 363, 317)) + self.walls.append(Wall(363, 317, 562, 240)) + self.walls.append(Wall(562, 240, 1114, 284)) + self.walls.append(Wall(1114, 284, 1120, 323)) + self.walls.append(Wall(1120, 323, 1045, 377)) + self.walls.append(Wall(1045, 378, 682, 548)) + self.walls.append(Wall(682, 548, 604, 610)) + self.walls.append(Wall(604, 612, 603, 695)) + self.walls.append(Wall(605, 695, 702, 713)) + self.walls.append(Wall(703, 712, 1128, 642)) + self.walls.append(Wall(1129, 642, 1320, 512)) + self.walls.append(Wall(1323, 512, 1464, 497)) + self.walls.append(Wall(1464, 497, 1579, 535)) + self.walls.append(Wall(1579, 535, 1660, 701)) + self.walls.append(Wall(1660, 697, 1634, 818)) + self.walls.append(Wall(1634, 818, 1499, 889)) + self.walls.append(Wall(1499, 889, 395, 883)) + self.walls.append(Wall(395, 883, 330, 838)) + self.walls.append(Wall(330, 838, 315, 782)) + self.walls.append(Wall(319, 798, 306, 725)) + self.walls.append(Wall(276, 580, 277, 543)) + self.walls.append(Wall(603, 639, 622, 590)) + self.walls.append(Wall(599, 655, 621, 704)) + self.walls.append(Wall(1074, 571, 1115, 558)) + self.walls.append(Wall(1314, 516, 1333, 511)) + self.walls.append(Wall(1692, 875, 1706, 830)) + self.walls.append(Wall(277, 912, 255, 872)) + self.walls.append(Wall(1214, 262, 1225, 288)) + self.walls.append(Wall(1601, 470, 1625, 490)) + self.walls.append(Wall(1119, 644, 1139, 634)) + self.walls.append(Wall(687, 710, 719, 710)) + self.walls.append(Wall(1721, 664, 1727, 696)) + self.walls.append(Wall(1015, 392, 1065, 362)) + self.walls.append(Wall(1091, 572, 1104, 568)) + self.walls.append(Wall(1157, 528, 1233, 478)) + + def set_gates(self): + self.gates.append(RewardGate(314, 345, 200, 326)) + self.gates.append(RewardGate(187, 435, 311, 451)) + self.gates.append(RewardGate(307, 537, 171, 555)) + self.gates.append(RewardGate(234, 681, 345, 628)) + self.gates.append(RewardGate(408, 682, 363, 788)) + self.gates.append(RewardGate(428, 816, 481, 712)) + self.gates.append(RewardGate(568, 733, 543, 854)) + self.gates.append(RewardGate(678, 858, 675, 710)) + self.gates.append(RewardGate(852, 708, 855, 848)) + self.gates.append(RewardGate(995, 836, 985, 705)) + self.gates.append(RewardGate(1059, 710, 1076, 821)) + self.gates.append(RewardGate(1078, 667, 1172, 572)) + self.gates.append(RewardGate(997, 616, 1076, 532)) + self.gates.append(RewardGate(967, 492, 909, 566)) + self.gates.append(RewardGate(788, 512, 839, 438)) + self.gates.append(RewardGate(790, 405, 781, 285)) + self.gates.append(RewardGate(891, 302, 899, 427)) + self.gates.append(RewardGate(1004, 434, 1027, 334)) + self.gates.append(RewardGate(1139, 344, 1084, 452)) + self.gates.append(RewardGate(1171, 502, 1233, 416)) + self.gates.append(RewardGate(1305, 454, 1243, 556)) + self.gates.append(RewardGate(1365, 588, 1408, 480)) + self.gates.append(RewardGate(1487, 472, 1524, 587)) + self.gates.append(RewardGate(1642, 508, 1575, 432)) + self.gates.append(RewardGate(1608, 360, 1709, 419)) + self.gates.append(RewardGate(1744, 324, 1625, 296)) + self.gates.append(RewardGate(1609, 231, 1727, 190)) + self.gates.append(RewardGate(1617, 66, 1541, 163)) + self.gates.append(RewardGate(1487, 135, 1510, 14)) + self.gates.append(RewardGate(1344, 16, 1328, 150)) + self.gates.append(RewardGate(1077, 142, 1067, 14)) + self.gates.append(RewardGate(909, 16, 900, 130)) + self.gates.append(RewardGate(718, 138, 698, 20)) + self.gates.append(RewardGate(551, 18, 567, 132)) + self.gates.append(RewardGate(445, 138, 413, 13)) + self.gates.append(RewardGate(379, 154, 243, 80)) + self.gates.append(RewardGate(357, 221, 203, 182)) + + def new_episode(self): + self.car.reset() + + + def get_state(self): + return self.car.getState() + pass + + def make_action(self, action): + # returns reward + actionNo = np.argmax(action) + self.car.updateWithAction(actionNo) + return self.car.reward + + def is_episode_finished(self): + return self.car.dead + + def get_score(self): + return self.car.score + + def get_lifespan(self): + return self.car.lifespan + + def render(self): + glPushMatrix() + # + # glTranslatef(-1, -1, 0) + # glScalef(1 / (displayWidth / 2), 1 / (displayHeight / 2), 1) + + # self.clear() + self.trackSprite.draw() + self.car.show() + + # for w in self.walls: + # w.draw() + # for g in self.gates: + # g.draw() + + glPopMatrix() + + +class Wall: + + def __init__(self, x1, y1, x2, y2): + self.x1 = x1 + self.y1 = displayHeight - y1 + self.x2 = x2 + self.y2 = displayHeight - y2 + + self.line = Line(self.x1, self.y1, self.x2, self.y2) + self.line.setLineThinkness(2) + + """ + draw the line + """ + + def draw(self): + self.line.draw() + + """ + returns true if the car object has hit this wall + """ + + def hitCar(self, car): + global vec2 + cw = car.width + # since the car sprite isn't perfectly square the hitbox is a little smaller than the width of the car + ch = car.height - 4 + rightVector = vec2(car.direction) + upVector = vec2(car.direction).rotate(-90) + carCorners = [] + cornerMultipliers = [[1, 1], [1, -1], [-1, -1], [-1, 1]] + carPos = vec2(car.x, car.y) + for i in range(4): + carCorners.append(carPos + (rightVector * cw / 2 * cornerMultipliers[i][0]) + + (upVector * ch / 2 * cornerMultipliers[i][1])) + + for i in range(4): + j = i + 1 + j = j % 4 + if linesCollided(self.x1, self.y1, self.x2, self.y2, carCorners[i].x, carCorners[i].y, carCorners[j].x, + carCorners[j].y): + return True + return False + + +""" +class containing all the game logic for moving and displaying the car +""" + + +class RewardGate: + + def __init__(self, x1, y1, x2, y2): + global vec2 + self.x1 = x1 + self.y1 = y1 + self.x2 = x2 + self.y2 = y2 + self.active = True + + self.line = Line(self.x1, self.y1, self.x2, self.y2) + self.line.setLineThinkness(1) + self.line.setColor([0, 255, 0]) + + self.center = vec2((self.x1 + self.x2) / 2, (self.y1 + self.y2) / 2) + + """ + draw the line + """ + + def draw(self): + if self.active: + self.line.draw() + + """ + returns true if the car object has hit this wall + """ + + def hitCar(self, car): + if not self.active: + return False + + global vec2 + cw = car.width + # since the car sprite isn't perfectly square the hitbox is a little smaller than the width of the car + ch = car.height - 4 + rightVector = vec2(car.direction) + upVector = vec2(car.direction).rotate(-90) + carCorners = [] + cornerMultipliers = [[1, 1], [1, -1], [-1, -1], [-1, 1]] + carPos = vec2(car.x, car.y) + for i in range(4): + carCorners.append(carPos + (rightVector * cw / 2 * cornerMultipliers[i][0]) + + (upVector * ch / 2 * cornerMultipliers[i][1])) + + for i in range(4): + j = i + 1 + j = j % 4 + if linesCollided(self.x1, self.y1, self.x2, self.y2, carCorners[i].x, carCorners[i].y, carCorners[j].x, + carCorners[j].y): + return True + return False + + +class Car: + + def __init__(self, walls, rewardGates): + global vec2 + self.x = 258 + self.y = 288 + self.vel = 0 + self.direction = vec2(0, 1) + self.direction = self.direction.rotate(180 / 12) + self.acc = 0 + self.width = 40 + self.height = 20 + self.turningRate = 5.0 / self.width + self.friction = 0.98 + self.maxSpeed = self.width / 4.0 + self.maxReverseSpeed = -1 * self.maxSpeed / 2.0 + self.accelerationSpeed = self.width / 160.0 + self.dead = False + self.driftMomentum = 0 + self.driftFriction = 0.87 + self.lineCollisionPoints = [] + self.collisionLineDistances = [] + self.vectorLength = 300 + + self.carPic = pyglet.image.load('images/car.png') + self.carSprite = pyglet.sprite.Sprite(self.carPic, x=self.x, y=self.y) + self.carSprite.update(rotation=0, scale_x=self.width / self.carSprite.width, + scale_y=self.height / self.carSprite.height) + + self.turningLeft = False + self.turningRight = False + self.accelerating = False + self.reversing = False + self.walls = walls + self.rewardGates = rewardGates + self.rewardNo = 0 + + self.directionToRewardGate = self.rewardGates[self.rewardNo].center - vec2(self.x, self.y) + + self.reward = 0 + + self.score = 0 + self.lifespan = 0 + """ + draws the car to the screen + """ + + def reset(self): + global vec2 + self.x = 258 + self.y = 288 + self.vel = 0 + self.direction = vec2(0, 1) + self.direction = self.direction.rotate(180 / 12) + self.acc = 0 + self.dead = False + self.driftMomentum = 0 + self.lineCollisionPoints = [] + self.collisionLineDistances = [] + + self.turningLeft = False + self.turningRight = False + self.accelerating = False + self.reversing = False + self.rewardNo = 0 + self.reward = 0 + + self.lifespan = 0 + self.score = 0 + for g in self.rewardGates: + g.active = True + + def show(self): + # first calculate the center of the car in order to allow the + # rotation of the car to be anchored around the center + upVector = self.direction.rotate(90) + drawX = self.direction.x * self.width / 2 + upVector.x * self.height / 2 + drawY = self.direction.y * self.width / 2 + upVector.y * self.height / 2 + self.carSprite.update(x=self.x - drawX, y=self.y - drawY, rotation=-get_angle(self.direction)) + self.carSprite.draw() + # self.showCollisionVectors() + + """ + returns a vector of where a point on the car is after rotation + takes the position desired relative to the center of the car when the car is facing to the right + """ + + def getPositionOnCarRelativeToCenter(self, right, up): + global vec2 + w = self.width + h = self.height + rightVector = vec2(self.direction) + rightVector.normalize() + upVector = self.direction.rotate(90) + upVector.normalize() + + return vec2(self.x, self.y) + ((rightVector * right) + (upVector * up)) + + def updateWithAction(self, actionNo): + self.turningLeft = False + self.turningRight = False + self.accelerating = False + self.reversing = False + + if actionNo == 0: + self.turningLeft = True + elif actionNo == 1: + self.turningRight = True + elif actionNo == 2: + self.accelerating = True + elif actionNo == 3: + self.reversing = True + elif actionNo == 4: + self.accelerating = True + self.turningLeft = True + elif actionNo == 5: + self.accelerating = True + self.turningRight = True + elif actionNo == 6: + self.reversing = True + self.turningLeft = True + elif actionNo == 7: + self.reversing = True + self.turningRight = True + elif actionNo == 8: + pass + totalReward = 0 + + for i in range(1): + if not self.dead: + self.lifespan+=1 + self.move() + self.updateControls() + + if self.hitAWall(): + self.dead = True + # return + self.checkRewardGates() + totalReward += self.reward + + self.setVisionVectors() + + # self.update() + + self.reward = totalReward + + """ + called every frame + """ + + def update(self): + if not self.dead: + self.updateControls() + self.move() + + if self.hitAWall(): + self.dead = True + # return + self.checkRewardGates() + self.setVisionVectors() + + def checkRewardGates(self): + global vec2 + self.reward = -1 + if self.rewardGates[self.rewardNo].hitCar(self): + self.rewardGates[self.rewardNo].active = False + self.rewardNo += 1 + self.score += 1 + self.reward = 10 + if self.rewardNo == len(self.rewardGates): + self.rewardNo = 0 + for g in self.rewardGates: + g.active = True + self.directionToRewardGate = self.rewardGates[self.rewardNo].center - vec2(self.x, self.y) + + """ + changes the position of the car to account for acceleration, velocity, friction and drift + """ + + def move(self): + global vec2 + self.vel += self.acc + self.vel *= self.friction + self.constrainVel() + + driftVector = vec2(self.direction) + driftVector = driftVector.rotate(90) + + addVector = vec2(0, 0) + addVector.x += self.vel * self.direction.x + addVector.x += self.driftMomentum * driftVector.x + addVector.y += self.vel * self.direction.y + addVector.y += self.driftMomentum * driftVector.y + self.driftMomentum *= self.driftFriction + + if addVector.length() != 0: + addVector.normalize() + + addVector.x * abs(self.vel) + addVector.y * abs(self.vel) + + self.x += addVector.x + self.y += addVector.y + + """ + keeps the velocity of the car within the maximum and minimum speeds + """ + + def constrainVel(self): + if self.maxSpeed < self.vel: + self.vel = self.maxSpeed + elif self.vel < self.maxReverseSpeed: + self.vel = self.maxReverseSpeed + + """ + changes the cars direction and acceleration based on the users inputs + """ + + def updateControls(self): + multiplier = 1 + if abs(self.vel) < 5: + multiplier = abs(self.vel) / 5 + if self.vel < 0: + multiplier *= -1 + + driftAmount = self.vel * self.turningRate * self.width / (9.0 * 8.0) + if self.vel < 5: + driftAmount = 0 + + if self.turningLeft: + self.direction = self.direction.rotate(radiansToAngle(self.turningRate) * multiplier) + + self.driftMomentum -= driftAmount + elif self.turningRight: + self.direction = self.direction.rotate(-radiansToAngle(self.turningRate) * multiplier) + self.driftMomentum += driftAmount + self.acc = 0 + if self.accelerating: + if self.vel < 0: + self.acc = 3 * self.accelerationSpeed + else: + self.acc = self.accelerationSpeed + elif self.reversing: + if self.vel > 0: + self.acc = -3 * self.accelerationSpeed + else: + self.acc = -1 * self.accelerationSpeed + + """ + checks every wall and if the car has hit a wall returns true + """ + + def hitAWall(self): + for wall in self.walls: + if wall.hitCar(self): + return True + + return False + + """ + returns the point of collision of a line (x1,y1,x2,y2) with the walls, + if multiple walls are hit it returns the closest collision point + """ + + def getCollisionPointOfClosestWall(self, x1, y1, x2, y2): + global vec2 + minDist = 2 * displayWidth + closestCollisionPoint = vec2(0, 0) + for wall in self.walls: + collisionPoint = getCollisionPoint(x1, y1, x2, y2, wall.x1, wall.y1, wall.x2, wall.y2) + if collisionPoint is None: + continue + if dist(x1, y1, collisionPoint.x, collisionPoint.y) < minDist: + minDist = dist(x1, y1, collisionPoint.x, collisionPoint.y) + closestCollisionPoint = vec2(collisionPoint) + return closestCollisionPoint + + """ + by creating lines in many directions from the car and getting the closest collision point of that line + we create "vision vectors" which will allow the car to 'see' + kinda like a sonar system + """ + + def getState(self): + self.setVisionVectors() + normalizedVisionVectors = [1 - (max(1.0, line) / self.vectorLength) for line in self.collisionLineDistances] + + normalizedForwardVelocity = max(0.0, self.vel / self.maxSpeed) + normalizedReverseVelocity = max(0.0, self.vel / self.maxReverseSpeed) + if self.driftMomentum > 0: + normalizedPosDrift = self.driftMomentum / 5 + normalizedNegDrift = 0 + else: + normalizedPosDrift = 0 + normalizedNegDrift = self.driftMomentum / -5 + + normalizedAngleOfNextGate = (get_angle(self.direction) - get_angle(self.directionToRewardGate)) % 360 + if normalizedAngleOfNextGate > 180: + normalizedAngleOfNextGate = -1 * (360 - normalizedAngleOfNextGate) + + normalizedAngleOfNextGate /= 180 + + normalizedState = [*normalizedVisionVectors, normalizedForwardVelocity, normalizedReverseVelocity, + normalizedPosDrift, normalizedNegDrift, normalizedAngleOfNextGate] + return np.array(normalizedState) + + def setVisionVectors(self): + h = self.height - 4 + w = self.width + self.collisionLineDistances = [] + self.lineCollisionPoints = [] + self.setVisionVector(w / 2, 0, 0) + self.setVisionVector(w / 2, -h / 2, -180 / 16) + self.setVisionVector(w / 2, -h / 2, -180 / 4) + self.setVisionVector(w / 2, -h / 2, -4 * 180 / 8) + + self.setVisionVector(w / 2, h / 2, 180 / 16) + self.setVisionVector(w / 2, h / 2, 180 / 4) + self.setVisionVector(w / 2, h / 2, 4 * 180 / 8) + + self.setVisionVector(-w / 2, -h / 2, -6 * 180 / 8) + self.setVisionVector(-w / 2, h / 2, 6 * 180 / 8) + self.setVisionVector(-w / 2, 0, 180) + + """ + calculates and stores the distance to the nearest wall given a vector + """ + + def setVisionVector(self, startX, startY, angle): + collisionVectorDirection = self.direction.rotate(angle) + collisionVectorDirection = collisionVectorDirection.normalize() * self.vectorLength + startingPoint = self.getPositionOnCarRelativeToCenter(startX, startY) + collisionPoint = self.getCollisionPointOfClosestWall(startingPoint.x, startingPoint.y, + startingPoint.x + collisionVectorDirection.x, + startingPoint.y + collisionVectorDirection.y) + if collisionPoint.x == 0 and collisionPoint.y == 0: + self.collisionLineDistances.append(self.vectorLength) + else: + self.collisionLineDistances.append( + dist(startingPoint.x, startingPoint.y, collisionPoint.x, collisionPoint.y)) + self.lineCollisionPoints.append(collisionPoint) + + """ + shows dots where the collision vectors detect a wall + """ + + def showCollisionVectors(self): + global drawer + for point in self.lineCollisionPoints: + drawer.setColor([255, 0, 0]) + drawer.circle(point.x, point.y, 5) diff --git a/Converted_to_tensorflow_2/Globals.py b/Converted_to_tensorflow_2/Globals.py new file mode 100644 index 0000000..41dfa30 --- /dev/null +++ b/Converted_to_tensorflow_2/Globals.py @@ -0,0 +1,2 @@ +displayWidth = 1800 +displayHeight = 1000 diff --git a/Converted_to_tensorflow_2/PygameAdditionalMethods.py b/Converted_to_tensorflow_2/PygameAdditionalMethods.py new file mode 100644 index 0000000..7e310a4 --- /dev/null +++ b/Converted_to_tensorflow_2/PygameAdditionalMethods.py @@ -0,0 +1,41 @@ +import math +import pygame + +vec2 = pygame.math.Vector2 + + +def get_angle(vec): + if vec.length() == 0: + return 0 + return math.degrees(math.atan2(vec.y, vec.x)) + + +def angleToRadians(angle): + return angle / (180 / math.pi) + + +def radiansToAngle(rads): + return rads * 180 / math.pi + + +def linesCollided(x1, y1, x2, y2, x3, y3, x4, y4): + uA = ((x4 - x3) * (y1 - y3) - (y4 - y3) * (x1 - x3)) / ((y4 - y3) * (x2 - x1) - (x4 - x3) * (y2 - y1)) + uB = ((x2 - x1) * (y1 - y3) - (y2 - y1) * (x1 - x3)) / ((y4 - y3) * (x2 - x1) - (x4 - x3) * (y2 - y1)) + if 0 <= uA <= 1 and 0 <= uB <= 1: + return True + return False + + +def getCollisionPoint(x1, y1, x2, y2, x3, y3, x4, y4): + global vec2 + uA = ((x4 - x3) * (y1 - y3) - (y4 - y3) * (x1 - x3)) / ((y4 - y3) * (x2 - x1) - (x4 - x3) * (y2 - y1)) + uB = ((x2 - x1) * (y1 - y3) - (y2 - y1) * (x1 - x3)) / ((y4 - y3) * (x2 - x1) - (x4 - x3) * (y2 - y1)) + if 0 <= uA <= 1 and 0 <= uB <= 1: + intersectionX = x1 + (uA * (x2 - x1)) + intersectionY = y1 + (uA * (y2 - y1)) + return vec2(intersectionX, intersectionY) + return None + + +def dist(x1, y1, x2, y2): + return math.sqrt((x2 - x1) ** 2 + (y2 - y1) ** 2) diff --git a/Converted_to_tensorflow_2/QlearningFromOldMate.py b/Converted_to_tensorflow_2/QlearningFromOldMate.py new file mode 100644 index 0000000..5f71589 --- /dev/null +++ b/Converted_to_tensorflow_2/QlearningFromOldMate.py @@ -0,0 +1,781 @@ +import pyglet +from pyglet.gl import * +import pygame +import math +from pyglet.window import key +from Drawer import Drawer +# from PygameAdditionalMethods import * +from ShapeObjects import Line +import tensorflow as tf # Deep Learning library +import numpy as np # Handle matrices +from collections import deque +import random +import os +from Globals import displayHeight, displayWidth +from Game import Game + +frameRate = 30.0 + +vec2 = pygame.math.Vector2 + +game = Game() +possible_actions = np.identity(game.no_of_actions, dtype=int).tolist() + +### MODEL HYPERPARAMETERS +state_size = [game.state_size] # Our input is a stack of 4 frames hence 100x120x4 (Width, height, channels) +action_size = game.no_of_actions # 7 possible actions +learning_rate = 0.00025 # Alpha (aka learning rate) + +### TRAINING HYPERPARAMETERS +total_episodes = 50000 # Total episodes for training +max_steps = 5000 # Max possible steps in an episode +batch_size = 64 + +# FIXED Q TARGETS HYPERPARAMETERS +max_tau = 10000 # Tau is the C step where we update our target network + +# EXPLORATION HYPERPARAMETERS for epsilon greedy strategy +explore_start = 1.0 # exploration probability at start +explore_stop = 0.01 # minimum exploration probability +decay_rate = 0.00005 # exponential decay rate for exploration prob + +# Q LEARNING hyperparameters +gamma = 0.95 # Discounting rate + +### MEMORY HYPERPARAMETERS +## If you have GPU change to 1million +memory_size = 100000 # Number of experiences the Memory can keep +pretrain_length = memory_size # Number of experiences stored in the Memory when initialized for the first time + +### MODIFY THIS TO FALSE IF YOU JUST WANT TO SEE THE TRAINED AGENT +training = False + +## TURN THIS TO TRUE IF YOU WANT TO RENDER THE ENVIRONMENT +episode_render = True + +load = True + +starting_episode = 0 + +load_traing_model = False + +load_training_model_number = 300 + + +class DDDQNNet: + def __init__(self, state_size, action_size, learning_rate, name): + self.state_size = state_size + self.action_size = action_size + self.learning_rate = learning_rate + self.name = name + + # We use tf.variable_scope here to know which network we're using (DQN or target_net) + # it will be useful when we will update our w- parameters (by copy the DQN parameters) + with tf.compat.v1.variable_scope(self.name): + # We create the placeholders + # *state_size means that we take each elements of state_size in tuple hence is like if we wrote + # [None, 100, 120, 4] + self.inputs_ = tf.compat.v1.placeholder(tf.float32, [None, *state_size], name="inputs") + + # + self.ISWeights_ = tf.compat.v1.placeholder(tf.float32, [None, 1], name='IS_weights') + + self.actions_ = tf.compat.v1.placeholder(tf.float32, [None, action_size], name="actions_") + + # Remember that target_Q is the R(s,a) + ymax Qhat(s', a') + self.target_Q = tf.compat.v1.placeholder(tf.float32, [None], name="target") + + self.dense1 = tf.compat.v1.layers.dense(inputs=self.inputs_, + units=256, + activation=tf.nn.elu, + kernel_initializer=tf.compat.v1.keras.initializers.VarianceScaling(scale=1.0, mode="fan_avg", distribution="uniform"), + name="dense1") + + self.dense2 = tf.compat.v1.layers.dense(inputs=self.dense1, + units=256, + activation=tf.nn.elu, + kernel_initializer=tf.compat.v1.keras.initializers.VarianceScaling(scale=1.0, mode="fan_avg", distribution="uniform"), + name="dense2") + + ## Here we separate into two streams + # The one that calculate V(s) + self.value_fc = tf.compat.v1.layers.dense(inputs=self.dense2, + units=256, + activation=tf.nn.elu, + kernel_initializer=tf.compat.v1.keras.initializers.VarianceScaling(scale=1.0, mode="fan_avg", distribution="uniform"), + name="value_fc") + + self.value = tf.compat.v1.layers.dense(inputs=self.value_fc, + units=1, + activation=None, + kernel_initializer=tf.compat.v1.keras.initializers.VarianceScaling(scale=1.0, mode="fan_avg", distribution="uniform"), + name="value") + + # The one that calculate A(s,a) + self.advantage_fc = tf.compat.v1.layers.dense(inputs=self.dense2, + units=256, + activation=tf.nn.elu, + kernel_initializer=tf.compat.v1.keras.initializers.VarianceScaling(scale=1.0, mode="fan_avg", distribution="uniform"), + name="advantage_fc") + + self.advantage = tf.compat.v1.layers.dense(inputs=self.advantage_fc, + units=self.action_size, + activation=None, + kernel_initializer=tf.compat.v1.keras.initializers.VarianceScaling(scale=1.0, mode="fan_avg", distribution="uniform"), + name="advantages") + + # Agregating layer + # Q(s,a) = V(s) + (A(s,a) - 1/|A| * sum A(s,a')) + self.output = self.value + tf.subtract(self.advantage, + tf.reduce_mean(input_tensor=self.advantage, axis=1, keepdims=True)) + + # Q is our predicted Q value. + self.Q = tf.reduce_sum(input_tensor=tf.multiply(self.output, self.actions_), axis=1) + + # The loss is modified because of PER + self.absolute_errors = tf.abs(self.target_Q - self.Q) # for updating Sumtree + + self.loss = tf.reduce_mean(input_tensor=self.ISWeights_ * tf.math.squared_difference(self.target_Q, self.Q)) + + self.optimizer = tf.compat.v1.train.RMSPropOptimizer(self.learning_rate).minimize(self.loss) + + +# Reset the graph +tf.compat.v1.reset_default_graph() + +# Instantiate the DQNetwork +DQNetwork = DDDQNNet(state_size, action_size, learning_rate, name="DQNetwork") + +# Instantiate the target network +TargetNetwork = DDDQNNet(state_size, action_size, learning_rate, name="TargetNetwork") + + +class SumTree(object): + """ + This SumTree code is modified version of Morvan Zhou: + https://github.com/MorvanZhou/Reinforcement-learning-with-tensorflow/blob/master/contents/5.2_Prioritized_Replay_DQN/RL_brain.py + """ + data_pointer = 0 + + """ + Here we initialize the tree with all nodes = 0, and initialize the data with all values = 0 + """ + + def __init__(self, capacity): + self.capacity = capacity # Number of leaf nodes (final nodes) that contains experiences + + # Generate the tree with all nodes values = 0 + # To understand this calculation (2 * capacity - 1) look at the schema above + # Remember we are in a binary node (each node has max 2 children) so 2x size of leaf (capacity) - 1 (root node) + # Parent nodes = capacity - 1 + # Leaf nodes = capacity + self.tree = np.zeros(2 * capacity - 1) + + """ tree: + 0 + / \ + 0 0 + / \ / \ + 0 0 0 0 [Size: capacity] it's at this line that there is the priorities score (aka pi) + """ + + # Contains the experiences (so the size of data is capacity) + self.data = np.zeros(capacity, dtype=object) + + """ + Here we add our priority score in the sumtree leaf and add the experience in data + """ + + def add(self, priority, data): + # Look at what index we want to put the experience + tree_index = self.data_pointer + self.capacity - 1 + + """ tree: + 0 + / \ + 0 0 + / \ / \ +tree_index 0 0 0 We fill the leaves from left to right + """ + + # Update data frame + self.data[self.data_pointer] = data + + # Update the leaf + self.update(tree_index, priority) + + # Add 1 to data_pointer + self.data_pointer += 1 + + if self.data_pointer >= self.capacity: # If we're above the capacity, you go back to first index (we overwrite) + self.data_pointer = 0 + + """ + Update the leaf priority score and propagate the change through tree + """ + + def update(self, tree_index, priority): + # Change = new priority score - former priority score + change = priority - self.tree[tree_index] + self.tree[tree_index] = priority + + # then propagate the change through tree + while tree_index != 0: # this method is faster than the recursive loop in the reference code + + """ + Here we want to access the line above + THE NUMBERS IN THIS TREE ARE THE INDEXES NOT THE PRIORITY VALUES + + 0 + / \ + 1 2 + / \ / \ + 3 4 5 [6] + + If we are in leaf at index 6, we updated the priority score + We need then to update index 2 node + So tree_index = (tree_index - 1) // 2 + tree_index = (6-1)//2 + tree_index = 2 (because // round the result) + """ + tree_index = (tree_index - 1) // 2 + self.tree[tree_index] += change + + """ + Here we get the leaf_index, priority value of that leaf and experience associated with that index + """ + + def get_leaf(self, v): + """ + Tree structure and array storage: + Tree index: + 0 -> storing priority sum + / \ + 1 2 + / \ / \ + 3 4 5 6 -> storing priority for experiences + Array type for storing: + [0,1,2,3,4,5,6] + """ + parent_index = 0 + + while True: # the while loop is faster than the method in the reference code + left_child_index = 2 * parent_index + 1 + right_child_index = left_child_index + 1 + + # If we reach bottom, end the search + if left_child_index >= len(self.tree): + leaf_index = parent_index + break + + else: # downward search, always search for a higher priority node + + if v <= self.tree[left_child_index]: + parent_index = left_child_index + + else: + v -= self.tree[left_child_index] + parent_index = right_child_index + + data_index = leaf_index - self.capacity + 1 + + return leaf_index, self.tree[leaf_index], self.data[data_index] + + @property + def total_priority(self): + return self.tree[0] # Returns the root node + + +class Memory(object): # stored as ( s, a, r, s_ ) in SumTree + """ + This SumTree code is modified version and the original code is from: + https://github.com/jaara/AI-blog/blob/master/Seaquest-DDQN-PER.py + """ + PER_e = 0.01 # Hyperparameter that we use to avoid some experiences to have 0 probability of being taken + PER_a = 0.6 # Hyperparameter that we use to make a tradeoff between taking only exp with high priority and sampling randomly + PER_b = 0.4 # importance-sampling, from initial value increasing to 1 + + PER_b_increment_per_sampling = 0.001 + + absolute_error_upper = 1. # clipped abs error + + def __init__(self, capacity): + # Making the tree + """ + Remember that our tree is composed of a sum tree that contains the priority scores at his leaf + And also a data array + We don't use deque because it means that at each timestep our experiences change index by one. + We prefer to use a simple array and to overwrite when the memory is full. + """ + self.tree = SumTree(capacity) + + """ + Store a new experience in our tree + Each new experience have a score of max_prority (it will be then improved when we use this exp to train our DDQN) + """ + + def store(self, experience): + # Find the max priority + max_priority = np.max(self.tree.tree[-self.tree.capacity:]) + + # If the max priority = 0 we can't put priority = 0 since this exp will never have a chance to be selected + # So we use a minimum priority + if max_priority == 0: + max_priority = self.absolute_error_upper + + self.tree.add(max_priority, experience) # set the max p for new p + + """ + - First, to sample a minibatch of k size, the range [0, priority_total] is / into k ranges. + - Then a value is uniformly sampled from each range + - We search in the sumtree, the experience where priority score correspond to sample values are retrieved from. + - Then, we calculate IS weights for each minibatch element + """ + + def sample(self, n): + # Create a sample array that will contains the minibatch + memory_b = [] + + b_idx, b_ISWeights = np.empty((n,), dtype=np.int32), np.empty((n, 1), dtype=np.float32) + + # Calculate the priority segment + # Here, as explained in the paper, we divide the Range[0, ptotal] into n ranges + priority_segment = self.tree.total_priority / n # priority segment + + # Here we increasing the PER_b each time we sample a new minibatch + self.PER_b = np.min([1., self.PER_b + self.PER_b_increment_per_sampling]) # max = 1 + + # Calculating the max_weight + p_min = np.min(self.tree.tree[-self.tree.capacity:]) / self.tree.total_priority + + max_weight = (p_min * n) ** (-self.PER_b) + # print(p_min, self.tree.total_priority) + # print(p_min, self.tree.total_priority) + # print(self.tree.tree[-self.tree.capacity:]) + for i in range(n): + """ + A value is uniformly sample from each range + """ + a, b = priority_segment * i, priority_segment * (i + 1) + value = np.random.uniform(a, b) + + """ + Experience that correspond to each value is retrieved + """ + index, priority, data = self.tree.get_leaf(value) + + # P(j) + sampling_probabilities = priority / self.tree.total_priority + + # IS = (1/N * 1/P(i))**b /max wi == (N*P(i))**-b /max wi + b_ISWeights[i, 0] = np.power(n * sampling_probabilities, -self.PER_b) / max_weight + if b_ISWeights[i, 0] == 0: + print(n, sampling_probabilities, self.PER_b, max_weight) + b_idx[i] = index + + experience = [data] + + memory_b.append(experience) + + return b_idx, memory_b, b_ISWeights + + """ + Update the priorities on the tree + """ + + def batch_update(self, tree_idx, abs_errors): + abs_errors += self.PER_e # convert to abs and avoid 0 + clipped_errors = np.minimum(abs_errors, self.absolute_error_upper) + ps = np.power(clipped_errors, self.PER_a) + + for ti, p in zip(tree_idx, ps): + self.tree.update(ti, p) + + +# Instantiate memory +memory = Memory(memory_size) + +# Render the environment +game.new_episode() + +""" PRETRAIN """ +print("pretraining") +if training: + for i in range(pretrain_length): + # If it's the first step + if i == 0: + # First we need a state + + state = game.get_state() + # state, stacked_frames = stack_frames(stacked_frames, state, True) + + # Random action + action = random.choice(possible_actions) + + # Get the rewards + reward = game.make_action(action) + + # Look if the episode is finished + done = game.is_episode_finished() + + # If we're dead + if done: + # We finished the episode so the next state is just a blank screen + next_state = np.zeros(state.shape) + # print(state.shape) + # Add experience to memory + # experience = np.hstack((state, [action, reward], next_state, done)) + + experience = state, action, reward, next_state, done + memory.store(experience) + + # Start a new episode + game.new_episode() + + # First we need a state + state = game.get_state() + + + else: + # Get the next state + next_state = game.get_state() + + # Add experience to memory + experience = state, action, reward, next_state, done + memory.store(experience) + + # Our state is now the next_state + state = next_state + + +def predict_action(explore_start, explore_stop, decay_rate, decay_step, state, actions): + ## EPSILON GREEDY STRATEGY + # Choose action a from state s using epsilon greedy. + ## First we randomize a number + exp_exp_tradeoff = np.random.rand() + + # Here we'll use an improved version of our epsilon greedy strategy used in Q-learning notebook + explore_probability = explore_stop + (explore_start - explore_stop) * np.exp(-decay_rate * decay_step) + + if (explore_probability > exp_exp_tradeoff): + # Make a random action (exploration) + action = random.choice(possible_actions) + + else: + # Get action from Q-network (exploitation) + # Estimate the Qs values state + Qs = sess.run(DQNetwork.output, feed_dict={DQNetwork.inputs_: state.reshape((1, *state.shape))}) + + # Take the biggest Q value (= the best action) + choice = np.argmax(Qs) + action = possible_actions[int(choice)] + + return action, explore_probability + + +# This function helps us to copy one set of variables to another +# In our case we use it when we want to copy the parameters of DQN to Target_network +# Thanks of the very good implementation of Arthur Juliani https://github.com/awjuliani +def update_target_graph(): + # Get the parameters of our DQNNetwork + from_vars = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES, "DQNetwork") + + # Get the parameters of our Target_network + to_vars = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES, "TargetNetwork") + + op_holder = [] + + # Update our target_network parameters with DQNNetwork parameters + for from_var, to_var in zip(from_vars, to_vars): + op_holder.append(to_var.assign(from_var)) + return op_holder + + +saver = tf.compat.v1.train.Saver() + + +class MyWindow(pyglet.window.Window): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.set_minimum_size(400, 300) + + # set background color + backgroundColor = [10, 0, 0, 255] + backgroundColor = [i / 255 for i in backgroundColor] + glClearColor(*backgroundColor) + # load background image + self.sess = tf.compat.v1.Session() + game.new_episode() + self.state = game.get_state() + self.nextState = [] + self.loadSession() + + def loadSession(self): + # if load_traing_model: + # directory = "./allModels/model{}/models/model.ckpt".format(load_training_model_number) + # saver.restore(self.sess, directory) + # else: + saver.restore(self.sess, "./models/model.ckpt") + + def on_draw(self): + game.render() + + def update(self, dt): + exp_exp_tradeoff = np.random.rand() + + if load_traing_model: + explore_probability = explore_stop + (explore_start - explore_stop) * np.exp(-decay_rate * load_training_model_number* 100) + else: + explore_probability = 0.0001 + + if explore_probability > exp_exp_tradeoff: + # Make a random action (exploration) + action = random.choice(possible_actions) + + else: + # Get action from Q-network (exploitation) + # Estimate the Qs values state + Qs = self.sess.run(DQNetwork.output, + feed_dict={DQNetwork.inputs_: self.state.reshape((1, *self.state.shape))}) + + # Take the biggest Q value (= the best action) + choice = np.argmax(Qs) + action = possible_actions[int(choice)] + + game.make_action(action) + # game.render() + done = game.is_episode_finished() + + if done: + game.new_episode() + self.state = game.get_state() + else: + self.next_state = game.get_state() + self.state = self.next_state + + +# Saver will help us to save our model +print("training") +if training: + with tf.compat.v1.Session() as sess: + # Initialize the variables + # if load: + + if load: + saver.restore(sess, "./models/model.ckpt") + else: + sess.run(tf.compat.v1.global_variables_initializer()) + + # Initialize the decay rate (that will use to reduce epsilon) + decay_step = 0 + + # Set tau = 0 + tau = 0 + + # Init the game + game.new_episode() + + # Update the parameters of our TargetNetwork with DQN_weights + update_target = update_target_graph() + sess.run(update_target) + + for episode in range(starting_episode, total_episodes): + # Set step to 0 + step = 0 + + # Initialize the rewards of the episode + episode_rewards = [] + + # Make a new episode and observe the first state + game.new_episode() + + state = game.get_state() + + while step < max_steps: + step += 1 + + # Increase the C step + tau += 1 + + # Increase decay_step + decay_step += 1 + + # With ϵ select a random action atat, otherwise select a = argmaxQ(st,a) + action, explore_probability = predict_action(explore_start, explore_stop, decay_rate, decay_step, state, + possible_actions) + + # Do the action + reward = game.make_action(action) + + # Look if the episode is finished + done = game.is_episode_finished() + + # Add the reward to total reward + episode_rewards.append(reward) + if step >= max_steps: + print("fuckin nice mate") + print('Episode: {}'.format(episode), + 'Total reward: {}'.format(np.sum(episode_rewards)), + 'Explore P: {:.4f}'.format(explore_probability)) + # If the game is finished + if done: + # the episode ends so no next state + next_state = np.zeros(state.shape, dtype=np.int) # changed + + # Set step = max_steps to end the episode + step = max_steps + + # Get the total reward of the episode + total_reward = np.sum(episode_rewards) + + print('Episode: {}'.format(episode), + '\tTotal reward: {:.4f}'.format(total_reward), + # '\tTraining loss: {:.4f}'.format(loss), + '\tExplore P: {:.4f}'.format(explore_probability), + '\tScore: {}'.format(game.get_score()), + '\tlifespan: {}'.format(game.get_lifespan()), + '\tactions per reward gate: {:.4f}'.format(game.get_lifespan() / (max(1, game.get_score())))) + + # Add experience to memory + experience = state, action, reward, next_state, done + memory.store(experience) + + else: + # Get the next state + next_state = game.get_state() + + # Add experience to memory + experience = state, action, reward, next_state, done + memory.store(experience) + + # st+1 is now our current state + state = next_state + + ### LEARNING PART + # Obtain random mini-batch from memory + tree_idx, batch, ISWeights_mb = memory.sample(batch_size) + + states_mb = np.array([each[0][0] for each in batch], ndmin=2) + actions_mb = np.array([each[0][1] for each in batch]) + rewards_mb = np.array([each[0][2] for each in batch]) + next_states_mb = np.array([each[0][3] for each in batch], ndmin=2) + dones_mb = np.array([each[0][4] for each in batch]) + + target_Qs_batch = [] + + # DOUBLE DQN Logic + # Use DQNNetwork to select the action to take at next_state (a') (action with the highest Q-value) + # Use TargetNetwork to calculate the Q_val of Q(s',a') + + # Get Q values for next_state + q_next_state = sess.run(DQNetwork.output, feed_dict={DQNetwork.inputs_: next_states_mb}) + + # Calculate Qtarget for all actions that state + q_target_next_state = sess.run(TargetNetwork.output, feed_dict={TargetNetwork.inputs_: next_states_mb}) + + # Set Q_target = r if the episode ends at s+1, otherwise set Q_target = r + gamma * Qtarget(s',a') + for i in range(0, len(batch)): + terminal = dones_mb[i] + + # We got a' + action = np.argmax(q_next_state[i]) + + # If we are in a terminal state, only equals reward + if terminal: + target_Qs_batch.append(rewards_mb[i]) + + else: + # Take the Qtarget for action a' + target = rewards_mb[i] + gamma * q_target_next_state[i][action] + target_Qs_batch.append(target) + + targets_mb = np.array([each for each in target_Qs_batch]) + + _, loss, absolute_errors = sess.run([DQNetwork.optimizer, DQNetwork.loss, DQNetwork.absolute_errors], + feed_dict={DQNetwork.inputs_: states_mb, + DQNetwork.target_Q: targets_mb, + DQNetwork.actions_: actions_mb, + DQNetwork.ISWeights_: ISWeights_mb}) + if loss == 0: + print(ISWeights_mb) + + # Update priority + memory.batch_update(tree_idx, absolute_errors) + + # Write TF Summaries + # summary = sess.run(write_op, feed_dict={DQNetwork.inputs_: states_mb, + # DQNetwork.target_Q: targets_mb, + # DQNetwork.actions_: actions_mb, + # DQNetwork.ISWeights_: ISWeights_mb}) + # writer.add_summary(summary, episode) + # writer.flush() + + if tau > max_tau: + # Update the parameters of our TargetNetwork with DQN_weights + update_target = update_target_graph() + sess.run(update_target) + tau = 0 + print("Model updated") + + if (episode < 100 and episode % 5 == 0) or (episode % 1000 == 0): + directory = "./allModels/model{}".format(episode) + if not os.path.exists(directory): + os.makedirs(directory) + save_path = saver.save(sess, "./allModels/model{}/models/model.ckpt".format(episode)) + # print("Model Saved") + + # Save model every 5 episodes + if episode % 5 == 0: + save_path = saver.save(sess, "./models/model.ckpt") + print("Model Saved") +else: + print("setting up window") + window = MyWindow(displayWidth, displayHeight, "AI Learns to Drive", resizable=False) + pyglet.clock.schedule_interval(window.update, 1 / frameRate) + pyglet.app.run() + +# print("testing") +# with tf.Session() as sess: +# +# # Load the model +# saver.restore(sess, "./models/model.ckpt") +# +# for i in range(10): +# print(i) +# game.new_episode() +# state = game.get_state() +# +# while not game.is_episode_finished(): +# ## EPSILON GREEDY STRATEGY +# # Choose action a from state s using epsilon greedy. +# ## First we randomize a number +# exp_exp_tradeoff = np.random.rand() +# +# explore_probability = 0.01 +# +# if (explore_probability > exp_exp_tradeoff): +# # Make a random action (exploration) +# action = random.choice(possible_actions) +# +# else: +# # Get action from Q-network (exploitation) +# # Estimate the Qs values state +# Qs = sess.run(DQNetwork.output, feed_dict={DQNetwork.inputs_: state.reshape((1, *state.shape))}) +# +# # Take the biggest Q value (= the best action) +# choice = np.argmax(Qs) +# action = possible_actions[int(choice)] +# +# game.make_action(action) +# window.draw(game) +# # game.render() +# done = game.is_episode_finished() +# +# if done: +# break +# +# else: +# next_state = game.get_state() +# state = next_state +# +# diff --git a/Converted_to_tensorflow_2/README.md b/Converted_to_tensorflow_2/README.md new file mode 100644 index 0000000..c672f29 --- /dev/null +++ b/Converted_to_tensorflow_2/README.md @@ -0,0 +1,14 @@ +# Car-QLearning + +Ensure you are using tensorflow version 2 before trying this: https://gist.github.com/off99555/fd59f204c02b5f704287227d67744d6f + +`pip install -r requirements.txt` +`python main.py` + +For handling eager exection: + + +code used to convert to tensorflow v2: https://www.tensorflow.org/guide/upgrade + +However currently it doesn't run still: +![error](error.png) \ No newline at end of file diff --git a/Converted_to_tensorflow_2/ShapeObjects.py b/Converted_to_tensorflow_2/ShapeObjects.py new file mode 100644 index 0000000..cb3ee3f --- /dev/null +++ b/Converted_to_tensorflow_2/ShapeObjects.py @@ -0,0 +1,58 @@ +import pyglet +from pyglet.gl import * +import math + + + + +class Triangle: + def __init__(self, x1, y1, x2, y2, x3, y3, col=[255, 255, 255]): + self.vertices = pyglet.graphics.vertex_list(3, ('v3f', [x1, y1, 0, x2, y2, 0, x3, y3, 0]), + ('c3B', [*col, *col, *col])) + + def show(self): + self.vertices.draw(GL_TRIANGLES) + + +class Rect: + def __init__(self, x, y, w, h): + self.x = x + self.y = y + self.w = w + self.h = h + self.col = [255, 0, 0] * 4 + # self.vertices = pyglet.graphics.vertex_list('v3f') + + def setColor(self, newColor): + self.col = newColor * 4 + + def draw(self): + x = self.x + y = self.y + w = self.w + h = self.h + pyglet.graphics.draw_indexed(4, pyglet.gl.GL_TRIANGLES, [0, 1, 2, 0, 2, 3], + ('v2f', [x, y, x + w, y, x + w, y + h, x, y + h]), + ('c3B', self.col)) + + +class Line: + def __init__(self, x1, y1, x2, y2): + self.x1 = x1 + self.y1 = y1 + self.x2 = x2 + self.y2 = y2 + self.color = [0, 0, 0] * 2 + self.lineThinkness = 1 + + def draw(self): + pyglet.gl.glLineWidth(self.lineThinkness) + pyglet.graphics.draw(2, pyglet.gl.GL_LINES, + ("v2f", (self.x1, self.y1, self.x2, self.y2)) + , ('c3B', self.color)) + + def setColor(self, newColor): + self.color = newColor * 2 + + def setLineThinkness(self, thinkness): + self.lineThinkness = thinkness diff --git a/Converted_to_tensorflow_2/__pycache__/Drawer.cpython-37.pyc b/Converted_to_tensorflow_2/__pycache__/Drawer.cpython-37.pyc new file mode 100644 index 0000000..96241db Binary files /dev/null and b/Converted_to_tensorflow_2/__pycache__/Drawer.cpython-37.pyc differ diff --git a/Converted_to_tensorflow_2/__pycache__/Game.cpython-37.pyc b/Converted_to_tensorflow_2/__pycache__/Game.cpython-37.pyc new file mode 100644 index 0000000..c98eb90 Binary files /dev/null and b/Converted_to_tensorflow_2/__pycache__/Game.cpython-37.pyc differ diff --git a/Converted_to_tensorflow_2/__pycache__/Globals.cpython-37.pyc b/Converted_to_tensorflow_2/__pycache__/Globals.cpython-37.pyc new file mode 100644 index 0000000..0d68b45 Binary files /dev/null and b/Converted_to_tensorflow_2/__pycache__/Globals.cpython-37.pyc differ diff --git a/Converted_to_tensorflow_2/__pycache__/PygameAdditionalMethods.cpython-37.pyc b/Converted_to_tensorflow_2/__pycache__/PygameAdditionalMethods.cpython-37.pyc new file mode 100644 index 0000000..3125262 Binary files /dev/null and b/Converted_to_tensorflow_2/__pycache__/PygameAdditionalMethods.cpython-37.pyc differ diff --git a/Converted_to_tensorflow_2/__pycache__/ShapeObjects.cpython-37.pyc b/Converted_to_tensorflow_2/__pycache__/ShapeObjects.cpython-37.pyc new file mode 100644 index 0000000..5898e53 Binary files /dev/null and b/Converted_to_tensorflow_2/__pycache__/ShapeObjects.cpython-37.pyc differ diff --git a/Converted_to_tensorflow_2/error.png b/Converted_to_tensorflow_2/error.png new file mode 100644 index 0000000..7621740 Binary files /dev/null and b/Converted_to_tensorflow_2/error.png differ diff --git a/Converted_to_tensorflow_2/images/car.png b/Converted_to_tensorflow_2/images/car.png new file mode 100644 index 0000000..116a3a6 Binary files /dev/null and b/Converted_to_tensorflow_2/images/car.png differ diff --git a/Converted_to_tensorflow_2/images/track.png b/Converted_to_tensorflow_2/images/track.png new file mode 100644 index 0000000..2f2279d Binary files /dev/null and b/Converted_to_tensorflow_2/images/track.png differ diff --git a/Converted_to_tensorflow_2/main.py b/Converted_to_tensorflow_2/main.py new file mode 100644 index 0000000..3959e7b --- /dev/null +++ b/Converted_to_tensorflow_2/main.py @@ -0,0 +1,664 @@ +import pyglet +from pyglet.gl import * +import pygame +import math +from pyglet.window import key +from Drawer import Drawer +# from PygameAdditionalMethods import * +from ShapeObjects import Line +import tensorflow as tf # Deep Learning library +import numpy as np # Handle matrices +from collections import deque +import random +import os +from Globals import displayHeight, displayWidth +from Game import Game + +frameRate = 30.0 + +vec2 = pygame.math.Vector2 + +tf.compat.v1.disable_eager_execution() + +""" +a line which the car object cannot touch +""" + +class QLearning: + def __init__(self, game): + + self.game = game + self.game.new_episode() + + self.stateSize = [game.state_size] + self.actionSize = game.no_of_actions + self.learningRate = 0.00025 + self.possibleActions = np.identity(self.actionSize, dtype=int) + + self.totalTrainingEpisodes = 100000 + self.maxSteps = 3600 + + self.batchSize = 64 + self.memorySize = 100000 + + self.maxEpsilon = 1 + self.minEpsilon = 0.01 + self.decayRate = 0.00001 + self.decayStep = 0 + self.gamma = 0.9 + self.training = True + + self.pretrainLength = self.batchSize + + self.maxTau = 10000 + self.tau = 0 + # reset the graph i guess, I don't know why therefore is already a graph happening but who cares + tf.compat.v1.reset_default_graph() + + self.sess = tf.compat.v1.Session() + + self.DQNetwork = DQN(self.stateSize, self.actionSize, self.learningRate, name='DQNetwork') + self.TargetNetwork = DQN(self.stateSize, self.actionSize, self.learningRate, name='TargetNetwork') + + self.memoryBuffer = PrioritisedMemory(self.memorySize) + self.pretrain() + + self.state = [] + self.trainingStepNo = 0 + + self.newEpisode = False + self.stepNo = 0 + self.episodeNo = 0 + self.saver = tf.compat.v1.train.Saver() + + load = False + loadFromEpisodeNo = 6300 + if load: + self.episodeNo = loadFromEpisodeNo + self.saver.restore(self.sess, "./allModels/model{}/models/model.ckpt".format(self.episodeNo)) + else: + self.sess.run(tf.compat.v1.global_variables_initializer()) + # self.sess.graph.finalize() + self.sess.run(self.update_target_graph()) + + # This function helps us to copy one set of variables to another + # In our case we use it when we want to copy the parameters of DQN to Target_network + # Thanks of the very good implementation of Arthur Juliani https://github.com/awjuliani + def update_target_graph(self): + + # Get the parameters of our DQNNetwork + from_vars = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES, "DQNetwork") + + # Get the parameters of our Target_network + to_vars = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES, "TargetNetwork") + + op_holder = [] + + # Update our target_network parameters with DQNNetwork parameters + for from_var, to_var in zip(from_vars, to_vars): + op_holder.append(to_var.assign(from_var)) + return op_holder + + def pretrain(self): + for i in range(self.pretrainLength): + if i == 0: + state = self.game.get_state() + + # pick a random movement and do it to populate the memory thing + # choice = random.randInt(self.actionSize) + # action = self.possibleActions[choice] + action = random.choice(self.possibleActions) + actionNo = np.argmax(action) + # now we need to get next state + reward = self.game.make_action(actionNo) + nextState = self.game.get_state() + self.newEpisode = False + + if self.game.is_episode_finished(): + reward = -100 + self.memoryBuffer.store((state, action, reward, nextState, True)) + self.game.new_episode() + state = self.game.get_state() + self.newEpisode = True + else: + self.memoryBuffer.store((state, action, reward, nextState, False)) + state = nextState + + print("pretrainingDone") + + def train(self): + + if self.trainingStepNo == 0: + self.state = self.game.get_state() + + if self.newEpisode: + self.state = self.game.get_state() + + if self.stepNo < self.maxSteps: + self.stepNo += 1 + self.decayStep += 1 + self.trainingStepNo += 1 + self.tau += 1 + + # choose best action if not exploring choose random otherwise + + epsilon = self.minEpsilon + (self.maxEpsilon - self.minEpsilon) * np.exp( + -self.decayRate * self.decayStep) + + if np.random.rand() < epsilon: + choice = random.randint(1, len(self.possibleActions)) - 1 + action = self.possibleActions[choice] + + else: + QValues = self.sess.run(self.DQNetwork.output, + feed_dict={self.DQNetwork.inputs_: np.array([self.state])}) + choice = np.argmax(QValues) + action = self.possibleActions[choice] + + actionNo = np.argmax(action) + # now we need to get next state + reward = self.game.make_action(actionNo) + + nextState = self.game.get_state() + + if (reward > 0): + print("Hell YEAH, Reward {}".format(reward)) + # if car is dead then finish episode + if self.game.is_episode_finished(): + reward = -100 + self.stepNo = self.maxSteps + print("DEAD!! Reward = -100") + + # print("Episode {} Step {} Action {} reward {} epsilon {} experiences stored {}" + # .format(self.episodeNo, self.stepNo, actionNo, reward, epsilon, self.trainingStepNo)) + + # add the experience to the memory buffer + self.memoryBuffer.store((self.state, action, reward, nextState, self.game.is_episode_finished())) + + self.state = nextState + + # learning part + # first we are gonna need to grab a random batch of experiences from out memory + treeIndexes, batch, ISWeights = self.memoryBuffer.sample(self.batchSize) + + statesFromBatch = np.array([exp[0][0] for exp in batch]) + actionsFromBatch = np.array([exp[0][1] for exp in batch]) + rewardsFromBatch = np.array([exp[0][2] for exp in batch]) + nextStatesFromBatch = np.array([exp[0][3] for exp in batch]) + carDieBooleansFromBatch = np.array([exp[0][4] for exp in batch]) + + targetQsFromBatch = [] + + # predict the q values of the next state for each experience in the batch + QValueOfNextStates = self.sess.run(self.TargetNetwork.output, + feed_dict={self.TargetNetwork.inputs_: nextStatesFromBatch}) + + for i in range(self.batchSize): + action = np.argmax(QValueOfNextStates[i]) # double DQN + terminalState = carDieBooleansFromBatch[i] + if terminalState: + targetQsFromBatch.append(rewardsFromBatch[i]) + else: + # target = rewardsFromBatch[i] + self.gamma * np.max(QValueOfNextStates[i]) + target = rewardsFromBatch[i] + self.gamma * QValueOfNextStates[i][action] # double DQN + targetQsFromBatch.append(target) + + targetsForBatch = np.array([t for t in targetQsFromBatch]) + + loss, _, absoluteErrors = self.sess.run( + [self.DQNetwork.loss, self.DQNetwork.optimizer, self.DQNetwork.absoluteError], + feed_dict={self.DQNetwork.inputs_: statesFromBatch, + self.DQNetwork.actions_: actionsFromBatch, + self.DQNetwork.targetQ: targetsForBatch, + self.DQNetwork.ISWeights_: ISWeights}) + + # update priorities + self.memoryBuffer.batchUpdate(treeIndexes, absoluteErrors) + + if self.stepNo >= self.maxSteps: + self.episodeNo += 1 + self.stepNo = 0 + self.newEpisode = True + self.game.new_episode() + if self.episodeNo >= self.totalTrainingEpisodes: + self.training = False + if self.episodeNo % 100 == 0: + directory = "./allModels/model{}".format(self.episodeNo) + if not os.path.exists(directory): + os.makedirs(directory) + save_path = self.saver.save(self.sess, + "./allModels/model{}/models/model.ckpt".format(self.episodeNo)) + print("Model Saved") + if self.tau > self.maxTau: + self.sess.run(self.update_target_graph()) + self.tau = 0 + print("Target Network Updated") + + def test(self): + + self.state = self.game.get_state() + + QValues = self.sess.run(self.DQNetwork.output, + feed_dict={self.DQNetwork.inputs_: np.array([self.state])}) + choice = np.argmax(QValues) + action = self.possibleActions[choice] + + actionNo = np.argmax(action) + # now we need to get next state + self.game.make_action(actionNo) + + if self.game.is_episode_finished(): + self.game.new_episode() + + +class Memory: + def __init__(self, maxSize): + self.buffer = deque(maxlen=maxSize) + + def add(self, experience): + self.buffer.append(experience) + + def sample(self, batchSize): + buffer_size = len(self.buffer) + index = np.random.choice(np.arange(buffer_size), + size=batchSize, + replace=False) + return [self.buffer[i] for i in index] + + +class DQN: + def __init__(self, stateSize, actionSize, learningRate, name): + self.stateSize = stateSize + self.actionSize = actionSize + self.learningRate = learningRate + self.name = name + + with tf.compat.v1.variable_scope(self.name): + # the inputs describing the state + self.inputs_ = tf.compat.v1.placeholder(tf.float32, [None, *self.stateSize], name="inputs") + + # the one hotted action that we took + # e.g. if we took the 3rd action action_ = [0,0,1,0,0,0,0] + self.actions_ = tf.compat.v1.placeholder(tf.float32, [None, self.actionSize], name="actions") + + # the target = reward + the discounted maximum possible q value of hte next state + self.targetQ = tf.compat.v1.placeholder(tf.float32, [None], name="target") + + self.ISWeights_ = tf.compat.v1.placeholder(tf.float32, [None, 1], name='ISWeights') + + self.dense1 = tf.compat.v1.layers.dense(inputs=self.inputs_, + units=16, + activation=tf.nn.elu, + kernel_initializer=tf.compat.v1.keras.initializers.VarianceScaling(scale=1.0, mode="fan_avg", distribution="uniform"), + name="dense1") + self.dense2 = tf.compat.v1.layers.dense(inputs=self.dense1, + units=16, + activation=tf.nn.elu, + kernel_initializer=tf.compat.v1.keras.initializers.VarianceScaling(scale=1.0, mode="fan_avg", distribution="uniform"), + name="dense2") + self.output = tf.compat.v1.layers.dense(inputs=self.dense2, + units=self.actionSize, + kernel_initializer=tf.compat.v1.keras.initializers.VarianceScaling(scale=1.0, mode="fan_avg", distribution="uniform"), + activation=None, + name="outputs") + + # by multiplying the output by the one hotted action space we only get the q value we desire + # all other values are 0, therefore taking the sum of these values gives us our qValue + self.QValue = tf.reduce_sum(input_tensor=tf.multiply(self.output, self.actions_)) + + self.absoluteError = abs(self.QValue - self.targetQ) # used for prioritising experiences + + # calculate the loss by using mean squared error + self.loss = tf.reduce_mean(input_tensor=self.ISWeights_ * tf.square(self.targetQ - self.QValue)) + + # use adam optimiser (its good shit) + self.optimizer = tf.compat.v1.train.AdamOptimizer(self.learningRate).minimize(self.loss) + + +class DDQN: + def __init__(self, stateSize, actionSize, learningRate, name): + self.stateSize = stateSize + self.actionSize = actionSize + self.learningRate = learningRate + self.name = name + + with tf.compat.v1.variable_scope(self.name): + # the inputs describing the state + self.inputs_ = tf.compat.v1.placeholder(tf.float32, [None, *self.stateSize], name="inputs") + + # the one hotted action that we took + # e.g. if we took the 3rd action action_ = [0,0,1,0,0,0,0] + self.actions_ = tf.compat.v1.placeholder(tf.float32, [None, self.actionSize], name="actions") + + # the target = reward + the discounted maximum possible q value of hte next state + self.targetQ = tf.compat.v1.placeholder(tf.float32, [None], name="target") + + self.ISWeights_ = tf.compat.v1.placeholder(tf.float32, [None, 1], name='ISWeights') + + self.dense1 = tf.compat.v1.layers.dense(inputs=self.inputs_, + units=16, + activation=tf.nn.elu, + kernel_initializer=tf.compat.v1.keras.initializers.VarianceScaling(scale=1.0, mode="fan_avg", distribution="uniform"), + name="dense1") + + ## Here we separate into two streams + # The one that calculate V(s) which is the value of the input state + # in other words how good this state is + + self.valueLayer = tf.compat.v1.layers.dense(inputs=self.dense1, + units=16, + activation=tf.nn.elu, + kernel_initializer=tf.compat.v1.keras.initializers.VarianceScaling(scale=1.0, mode="fan_avg", distribution="uniform"), + name="valueLayer") + + self.value = tf.compat.v1.layers.dense(inputs=self.valueLayer, + units=1, + activation=None, + kernel_initializer=tf.compat.v1.keras.initializers.VarianceScaling(scale=1.0, mode="fan_avg", distribution="uniform"), + name="value") + + # The one that calculate A(s,a) + # which is the advantage of taking each action in this given state + self.advantageLayer = tf.compat.v1.layers.dense(inputs=self.dense1, + units=16, + activation=tf.nn.elu, + kernel_initializer=tf.compat.v1.keras.initializers.VarianceScaling(scale=1.0, mode="fan_avg", distribution="uniform"), + name="advantageLayer") + + self.advantage = tf.compat.v1.layers.dense(inputs=self.advantageLayer, + units=self.actionSize, + activation=None, + kernel_initializer=tf.compat.v1.keras.initializers.VarianceScaling(scale=1.0, mode="fan_avg", distribution="uniform"), + name="advantages") + + # Aggregating layer + # Q(s,a) = V(s) + (A(s,a) - 1/|A| * sum A(s,a')) + # output = value of the state + the advantage of taking the given action relative to other actions + self.output = self.value + tf.subtract(self.advantage, + tf.reduce_mean(input_tensor=self.advantage, axis=1, keepdims=True)) + + # by multiplying the output by the one hotted action space we only get the q value we desire + # all other values are 0, therefore taking the sum of these values gives us our qValue + self.QValue = tf.reduce_sum(input_tensor=tf.multiply(self.output, self.actions_)) + + self.absoluteError = abs(self.QValue - self.targetQ) # used for prioritising experiences + + # calculate the loss by using mean squared error + self.loss = tf.reduce_mean(input_tensor=self.ISWeights_ * tf.square(self.targetQ - self.QValue)) + + # use adam optimiser (its good shit) + self.optimizer = tf.compat.v1.train.AdamOptimizer(self.learningRate).minimize(self.loss) + + +class PrioritisedMemory: + # some cheeky hyperparameters + e = 0.01 + a = 0.06 + b = 0.04 + bIncreaseRate = 0.001 + errorsClippedAt = 1.0 + + def __init__(self, capacity): + self.sumTree = SumTree(capacity) + self.capacity = capacity + + def store(self, experience): + """ when an experience is first added to memory it has the highest priority + so each experience is run through at least once + """ + # get max priority + maxPriority = np.max(self.sumTree.tree[self.sumTree.indexOfFirstData:]) + + # if the max is 0 then this means that this is the first element + # so might as well give it the highest priority possible + if maxPriority == 0: + maxPriority = self.errorsClippedAt + + self.sumTree.add(maxPriority, experience) + + def sample(self, n): + batch = [] + batchIndexes = np.zeros([n], dtype=np.int32) + batchISWeights = np.zeros([n, 1], dtype=np.float32) + + # so we divide the priority space up into n different priority segments + totalPriority = self.sumTree.total_priority() + prioritySegmentSize = totalPriority / n + + # also we need to increase b with every value to anneal it towards 1 + self.b += self.bIncreaseRate + self.b = min(self.b, 1) + + # ok very nice now in order to normalize all the weights in order to ensure they are all within 0 and 1 + # we are going to need to get the maximum weight and divide all weights by that + + # the largest weight will have the lowest priority and thus the lowest probability of being chosen + minPriority = np.min(np.maximum(self.sumTree.tree[self.sumTree.indexOfFirstData:], self.e)) + minProbability = minPriority / self.sumTree.total_priority() + + # formula + maxWeight = (minProbability * n) ** (-self.b) + for i in range(n): + # get the upper and lower bounds of the segment + segmentMin = prioritySegmentSize * i + segmentMax = segmentMin + prioritySegmentSize + + value = np.random.uniform(segmentMin, segmentMax) + + treeIndex, priority, data = self.sumTree.getLeaf(value) + + samplingProbability = priority / totalPriority + + # IS = (1/N * 1/P(i))**b /max wi == (N*P(i))**-b /max wi + + batchISWeights[i, 0] = np.power(n * samplingProbability, -self.b) / maxWeight + + batchIndexes[i] = treeIndex + experience = [data] + batch.append(experience) + + return batchIndexes, batch, batchISWeights + + def batchUpdate(self, treeIndexes, absoluteErrors): + absoluteErrors += self.e # do this to avoid 0 values + clippedErrors = np.minimum(absoluteErrors, self.errorsClippedAt) + + priorities = np.power(clippedErrors, self.a) + for treeIndex, priority in zip(treeIndexes, priorities): + self.sumTree.update(treeIndex, priority) + + +class SumTree: + def __init__(self, capacity): + self.capacity = capacity + self.size = 2 * capacity - 1 + self.tree = np.zeros(self.size) + self.data = np.zeros(capacity, dtype=object) + self.dataPointer = 0 + self.indexOfFirstData = capacity - 1 + + """ + adds a new element to the sub tree (or overwrites an old one) and updates all effected nodes + """ + + def add(self, priority, data): + treeIndex = self.indexOfFirstData + self.dataPointer + + # overwrite data + + self.data[self.dataPointer] = data + self.update(treeIndex, priority) + self.dataPointer += 1 + self.dataPointer = self.dataPointer % self.capacity + + """ + updates the priority of the indexed leaf as well as updating the value of all effected + elements in the sum tree + """ + + def update(self, index, priority): + change = priority - self.tree[index] + self.tree[index] = priority + + while index != 0: + # set index to parent + index = (index - 1) // 2 + self.tree[index] += change + + def getLeaf(self, value): + parent = 0 + LChild = 1 + RChild = 2 + + while LChild < self.size: + if self.tree[LChild] >= value: + parent = LChild + else: + value -= self.tree[LChild] + parent = RChild + + LChild = 2 * parent + 1 + RChild = 2 * parent + 2 + + treeIndex = parent + dataIndex = parent - self.indexOfFirstData + + return treeIndex, self.tree[treeIndex], self.data[dataIndex] + + def total_priority(self): + return self.tree[0] # Returns the root node + + +""" + +a class inheriting from the pyglet window class which controls the game window and acts as the main class of the program +""" + + +class MyWindow(pyglet.window.Window): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.set_minimum_size(400, 300) + + # set background color + backgroundColor = [0, 0, 0, 255] + backgroundColor = [i / 255 for i in backgroundColor] + glClearColor(*backgroundColor) + # load background image + self.game = Game() + self.ai = QLearning(self.game) + + """ + called when a key is hit + """ + + def on_key_press(self, symbol, modifiers): + pass + # if symbol == key.RIGHT: + # self.car.turningRight = True + # + # if symbol == key.LEFT: + # self.car.turningLeft = True + # + # if symbol == key.UP: + # self.car.accelerating = True + # + # if symbol == key.DOWN: + # self.car.reversing = True + + """ + called when a key is released + """ + + def on_close(self): + self.ai.sess.close() + + def on_key_release(self, symbol, modifiers): + pass + # if symbol == key.RIGHT: + # self.car.turningRight = False + # + # if symbol == key.LEFT: + # self.car.turningLeft = False + # + # if symbol == key.UP: + # self.car.accelerating = False + # + # if symbol == key.DOWN: + # self.car.reversing = False + # + # if symbol == key.SPACE: + # self.ai.training = not self.ai.training + + def on_mouse_press(self, x, y, button, modifiers): + # # print(x,y) + # if self.firstClick: + # self.clickPos = [x, y] + # else: + # # print("self.walls.append(Wall({}, {}, {}, {}))".format(self.clickPos[0], + # # displayHeight - self.clickPos[1], + # # x, displayHeight - y)) + # + # # self.gates.append(RewardGate(self.clickPos[0], self.clickPos[1], x, y)) + # + # self.firstClick = not self.firstClick + pass + + """ + called every frame + """ + + def on_draw(self): + self.game.render() + # + # glPushMatrix() + # + # glTranslatef(-1, -1, 0) + # glScalef(1 / (displayWidth / 2), 1 / (displayHeight / 2), 1) + # + # self.clear() + # self.trackSprite.draw() + # self.car.show() + # + # for w in self.walls: + # w.draw() + # # for g in self.gates: + # # g.draw() + # vision = self.car.getState() + # + # for i in range(len(vision)): + # + # label = pyglet.text.Label("{}: {}".format(i,vision[i]), + # font_name='Times New Roman', + # font_size=24, + # x=10, y=50*i+250, + # anchor_x='left', anchor_y='center') + # label.draw() + # glPopMatrix() + + """ + called when window resized + """ + + def on_resize(self, width, height): + glViewport(0, 0, width, height) + + """ + called every frame + """ + + def update(self, dt): + for i in range(5): + + if self.ai.training: + self.ai.train() + else: + self.ai.test() + return + # self.car.update() + + +if __name__ == "__main__": + window = MyWindow(displayWidth, displayHeight, "AI Learns to Drive", resizable=False) + pyglet.clock.schedule_interval(window.update, 1 / frameRate) + pyglet.app.run() diff --git a/Converted_to_tensorflow_2/models/checkpoint b/Converted_to_tensorflow_2/models/checkpoint new file mode 100644 index 0000000..febd7d5 --- /dev/null +++ b/Converted_to_tensorflow_2/models/checkpoint @@ -0,0 +1,2 @@ +model_checkpoint_path: "model.ckpt" +all_model_checkpoint_paths: "model.ckpt" diff --git a/Converted_to_tensorflow_2/models/model.ckpt.data-00000-of-00001 b/Converted_to_tensorflow_2/models/model.ckpt.data-00000-of-00001 new file mode 100644 index 0000000..9e686fe Binary files /dev/null and b/Converted_to_tensorflow_2/models/model.ckpt.data-00000-of-00001 differ diff --git a/Converted_to_tensorflow_2/models/model.ckpt.data-00000-of-00001.tempstate7083288698834630731 b/Converted_to_tensorflow_2/models/model.ckpt.data-00000-of-00001.tempstate7083288698834630731 new file mode 100644 index 0000000..40480dc Binary files /dev/null and b/Converted_to_tensorflow_2/models/model.ckpt.data-00000-of-00001.tempstate7083288698834630731 differ diff --git a/Converted_to_tensorflow_2/models/model.ckpt.index b/Converted_to_tensorflow_2/models/model.ckpt.index new file mode 100644 index 0000000..5d8cf68 Binary files /dev/null and b/Converted_to_tensorflow_2/models/model.ckpt.index differ diff --git a/Converted_to_tensorflow_2/models/model.ckpt.meta b/Converted_to_tensorflow_2/models/model.ckpt.meta new file mode 100644 index 0000000..0ce1d29 Binary files /dev/null and b/Converted_to_tensorflow_2/models/model.ckpt.meta differ diff --git a/Converted_to_tensorflow_2/requirements.txt b/Converted_to_tensorflow_2/requirements.txt new file mode 100644 index 0000000..de034ee --- /dev/null +++ b/Converted_to_tensorflow_2/requirements.txt @@ -0,0 +1,2 @@ +pyglet +pygame \ No newline at end of file diff --git a/README.md b/README.md index bb7bf69..e18f66b 100644 --- a/README.md +++ b/README.md @@ -1 +1,7 @@ -# Car-QLearning \ No newline at end of file +# Car-QLearning + +`pip install -r requirements.txt` +`python main.py` + +Note: you need to install tensorflow before this, check this guide: + diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..de034ee --- /dev/null +++ b/requirements.txt @@ -0,0 +1,2 @@ +pyglet +pygame \ No newline at end of file