diff --git a/Converted_to_tensorflow_2/.vscode/settings.json b/Converted_to_tensorflow_2/.vscode/settings.json
new file mode 100644
index 0000000..386827a
--- /dev/null
+++ b/Converted_to_tensorflow_2/.vscode/settings.json
@@ -0,0 +1,3 @@
+{
+    "python.pythonPath": "Z:\\Installs\\anaconda\\envs\\tf-gpu\\python.exe"
+}
\ No newline at end of file
diff --git a/Converted_to_tensorflow_2/Drawer.py b/Converted_to_tensorflow_2/Drawer.py
new file mode 100644
index 0000000..d4c3cb9
--- /dev/null
+++ b/Converted_to_tensorflow_2/Drawer.py
@@ -0,0 +1,45 @@
+import pyglet
+from pyglet.gl import *
+import math
+
+class Drawer:
+
+    def __init__(self):
+        self.color = [0, 0, 0]
+        self.lineThickness = 1
+
+    def setLineThinkness(self, thinkness):
+        self.lineThickness = thinkness
+
+    def setColor(self, newColor):
+        self.color = newColor
+
+    def line(self, x1, y1, x2, y2):
+        pyglet.graphics.draw(2, pyglet.gl.GL_LINES,
+                             ("v2f", (x1, y1, x2, y2))
+                             , ('c3B', self.color * 2))
+
+    def rect(self, x, y, w, h):
+        pyglet.graphics.draw_indexed(4, pyglet.gl.GL_TRIANGLES, [0, 1, 2, 0, 2, 3],
+                                     ('v2f', [x, y, x + w, y, x + w, y + h, x, y + h]),
+                                     ('c3B', self.color * 4))
+
+    def triangle(self, x1, y1, x2, y2, x3, y3):
+        pyglet.graphics.draw(3, pyglet.gl.GL_TRIANGLES,
+                             ('v2f', [x1, y1, x2, y2, x3, y3]),
+                             ('c3B', self.color * 3))
+
+    def circle(self, x, y, radius):
+        iterations = int(2 * radius * math.pi)
+        s = math.sin(2 * math.pi / iterations)
+        c = math.cos(2 * math.pi / iterations)
+
+        dx, dy = radius, 0
+
+        glBegin(GL_TRIANGLE_FAN)
+        gl.glColor4f(self.color[0] / 255, self.color[1] / 255, self.color[2] / 255, 1.0)
+        glVertex2f(x, y)
+        for i in range(iterations + 1):
+            glVertex2f(x + dx, y + dy)
+            dx, dy = (dx * c - dy * s), (dy * c + dx * s)
+        glEnd()
\ No newline at end of file
diff --git a/Converted_to_tensorflow_2/Game.py b/Converted_to_tensorflow_2/Game.py
new file mode 100644
index 0000000..2579cb0
--- /dev/null
+++ b/Converted_to_tensorflow_2/Game.py
@@ -0,0 +1,629 @@
+import numpy as np
+import pyglet
+from Globals import displayWidth, displayHeight
+from Drawer import Drawer
+from ShapeObjects import *
+from PygameAdditionalMethods import *
+import pygame
+
+drawer = Drawer()
+vec2 = pygame.math.Vector2
+
+
+class Game:
+    no_of_actions = 9
+    state_size = 15
+
+    def __init__(self):
+        trackImg = pyglet.image.load('images/track.png')
+        self.trackSprite = pyglet.sprite.Sprite(trackImg, x=0, y=0)
+        # initiate car
+
+        # initiate walls
+        self.walls = []
+        self.gates = []
+
+        self.set_walls()
+        self.set_gates()
+        self.firstClick = True
+
+        self.car = Car(self.walls, self.gates)
+
+    def set_walls(self):
+        self.walls.append(Wall(240, 809, 200, 583))
+        self.walls.append(Wall(200, 583, 218, 395))
+        self.walls.append(Wall(218, 395, 303, 255))
+        self.walls.append(Wall(303, 255, 548, 173))
+        self.walls.append(Wall(548, 173, 764, 179))
+        self.walls.append(Wall(764, 179, 1058, 198))
+        self.walls.append(Wall(1055, 199, 1180, 215))
+        self.walls.append(Wall(1177, 215, 1220, 272))
+        self.walls.append(Wall(1222, 273, 1218, 367))
+        self.walls.append(Wall(1218, 367, 1150, 437))
+        self.walls.append(Wall(1150, 437, 1044, 460))
+        self.walls.append(Wall(1044, 460, 757, 600))
+        self.walls.append(Wall(757, 600, 1099, 570))
+        self.walls.append(Wall(1100, 570, 1187, 508))
+        self.walls.append(Wall(1187, 507, 1288, 443))
+        self.walls.append(Wall(1288, 443, 1463, 415))
+        self.walls.append(Wall(1463, 415, 1615, 478))
+        self.walls.append(Wall(1617, 479, 1727, 679))
+        self.walls.append(Wall(1727, 679, 1697, 874))
+        self.walls.append(Wall(1694, 872, 1520, 964))
+        self.walls.append(Wall(1520, 964, 1100, 970))
+        self.walls.append(Wall(1105, 970, 335, 960))
+        self.walls.append(Wall(339, 960, 264, 899))
+        self.walls.append(Wall(263, 897, 238, 803))
+        self.walls.append(Wall(317, 782, 274, 570))
+        self.walls.append(Wall(275, 569, 284, 407))
+        self.walls.append(Wall(284, 407, 363, 317))
+        self.walls.append(Wall(363, 317, 562, 240))
+        self.walls.append(Wall(562, 240, 1114, 284))
+        self.walls.append(Wall(1114, 284, 1120, 323))
+        self.walls.append(Wall(1120, 323, 1045, 377))
+        self.walls.append(Wall(1045, 378, 682, 548))
+        self.walls.append(Wall(682, 548, 604, 610))
+        self.walls.append(Wall(604, 612, 603, 695))
+        self.walls.append(Wall(605, 695, 702, 713))
+        self.walls.append(Wall(703, 712, 1128, 642))
+        self.walls.append(Wall(1129, 642, 1320, 512))
+        self.walls.append(Wall(1323, 512, 1464, 497))
+        self.walls.append(Wall(1464, 497, 1579, 535))
+        self.walls.append(Wall(1579, 535, 1660, 701))
+        self.walls.append(Wall(1660, 697, 1634, 818))
+        self.walls.append(Wall(1634, 818, 1499, 889))
+        self.walls.append(Wall(1499, 889, 395, 883))
+        self.walls.append(Wall(395, 883, 330, 838))
+        self.walls.append(Wall(330, 838, 315, 782))
+        self.walls.append(Wall(319, 798, 306, 725))
+        self.walls.append(Wall(276, 580, 277, 543))
+        self.walls.append(Wall(603, 639, 622, 590))
+        self.walls.append(Wall(599, 655, 621, 704))
+        self.walls.append(Wall(1074, 571, 1115, 558))
+        self.walls.append(Wall(1314, 516, 1333, 511))
+        self.walls.append(Wall(1692, 875, 1706, 830))
+        self.walls.append(Wall(277, 912, 255, 872))
+        self.walls.append(Wall(1214, 262, 1225, 288))
+        self.walls.append(Wall(1601, 470, 1625, 490))
+        self.walls.append(Wall(1119, 644, 1139, 634))
+        self.walls.append(Wall(687, 710, 719, 710))
+        self.walls.append(Wall(1721, 664, 1727, 696))
+        self.walls.append(Wall(1015, 392, 1065, 362))
+        self.walls.append(Wall(1091, 572, 1104, 568))
+        self.walls.append(Wall(1157, 528, 1233, 478))
+
+    def set_gates(self):
+        self.gates.append(RewardGate(314, 345, 200, 326))
+        self.gates.append(RewardGate(187, 435, 311, 451))
+        self.gates.append(RewardGate(307, 537, 171, 555))
+        self.gates.append(RewardGate(234, 681, 345, 628))
+        self.gates.append(RewardGate(408, 682, 363, 788))
+        self.gates.append(RewardGate(428, 816, 481, 712))
+        self.gates.append(RewardGate(568, 733, 543, 854))
+        self.gates.append(RewardGate(678, 858, 675, 710))
+        self.gates.append(RewardGate(852, 708, 855, 848))
+        self.gates.append(RewardGate(995, 836, 985, 705))
+        self.gates.append(RewardGate(1059, 710, 1076, 821))
+        self.gates.append(RewardGate(1078, 667, 1172, 572))
+        self.gates.append(RewardGate(997, 616, 1076, 532))
+        self.gates.append(RewardGate(967, 492, 909, 566))
+        self.gates.append(RewardGate(788, 512, 839, 438))
+        self.gates.append(RewardGate(790, 405, 781, 285))
+        self.gates.append(RewardGate(891, 302, 899, 427))
+        self.gates.append(RewardGate(1004, 434, 1027, 334))
+        self.gates.append(RewardGate(1139, 344, 1084, 452))
+        self.gates.append(RewardGate(1171, 502, 1233, 416))
+        self.gates.append(RewardGate(1305, 454, 1243, 556))
+        self.gates.append(RewardGate(1365, 588, 1408, 480))
+        self.gates.append(RewardGate(1487, 472, 1524, 587))
+        self.gates.append(RewardGate(1642, 508, 1575, 432))
+        self.gates.append(RewardGate(1608, 360, 1709, 419))
+        self.gates.append(RewardGate(1744, 324, 1625, 296))
+        self.gates.append(RewardGate(1609, 231, 1727, 190))
+        self.gates.append(RewardGate(1617, 66, 1541, 163))
+        self.gates.append(RewardGate(1487, 135, 1510, 14))
+        self.gates.append(RewardGate(1344, 16, 1328, 150))
+        self.gates.append(RewardGate(1077, 142, 1067, 14))
+        self.gates.append(RewardGate(909, 16, 900, 130))
+        self.gates.append(RewardGate(718, 138, 698, 20))
+        self.gates.append(RewardGate(551, 18, 567, 132))
+        self.gates.append(RewardGate(445, 138, 413, 13))
+        self.gates.append(RewardGate(379, 154, 243, 80))
+        self.gates.append(RewardGate(357, 221, 203, 182))
+
+    def new_episode(self):
+        self.car.reset()
+
+
+    def get_state(self):
+        return self.car.getState()
+        pass
+
+    def make_action(self, action):
+        # returns reward
+        actionNo = np.argmax(action)
+        self.car.updateWithAction(actionNo)
+        return self.car.reward
+
+    def is_episode_finished(self):
+        return self.car.dead
+
+    def get_score(self):
+        return self.car.score
+
+    def get_lifespan(self):
+        return self.car.lifespan
+
+    def render(self):
+        glPushMatrix()
+        #
+        # glTranslatef(-1, -1, 0)
+        # glScalef(1 / (displayWidth / 2), 1 / (displayHeight / 2), 1)
+
+        # self.clear()
+        self.trackSprite.draw()
+        self.car.show()
+
+        # for w in self.walls:
+        #     w.draw()
+        # for g in self.gates:
+        #     g.draw()
+
+        glPopMatrix()
+
+
+class Wall:
+
+    def __init__(self, x1, y1, x2, y2):
+        self.x1 = x1
+        self.y1 = displayHeight - y1
+        self.x2 = x2
+        self.y2 = displayHeight - y2
+
+        self.line = Line(self.x1, self.y1, self.x2, self.y2)
+        self.line.setLineThinkness(2)
+
+    """
+    draw the line
+    """
+
+    def draw(self):
+        self.line.draw()
+
+    """
+    returns true if the car object has hit this wall
+    """
+
+    def hitCar(self, car):
+        global vec2
+        cw = car.width
+        # since the car sprite isn't perfectly square the hitbox is a little smaller than the width of the car
+        ch = car.height - 4
+        rightVector = vec2(car.direction)
+        upVector = vec2(car.direction).rotate(-90)
+        carCorners = []
+        cornerMultipliers = [[1, 1], [1, -1], [-1, -1], [-1, 1]]
+        carPos = vec2(car.x, car.y)
+        for i in range(4):
+            carCorners.append(carPos + (rightVector * cw / 2 * cornerMultipliers[i][0]) +
+                              (upVector * ch / 2 * cornerMultipliers[i][1]))
+
+        for i in range(4):
+            j = i + 1
+            j = j % 4
+            if linesCollided(self.x1, self.y1, self.x2, self.y2, carCorners[i].x, carCorners[i].y, carCorners[j].x,
+                             carCorners[j].y):
+                return True
+        return False
+
+
+"""
+class containing all the game logic for moving and displaying the car
+"""
+
+
+class RewardGate:
+
+    def __init__(self, x1, y1, x2, y2):
+        global vec2
+        self.x1 = x1
+        self.y1 = y1
+        self.x2 = x2
+        self.y2 = y2
+        self.active = True
+
+        self.line = Line(self.x1, self.y1, self.x2, self.y2)
+        self.line.setLineThinkness(1)
+        self.line.setColor([0, 255, 0])
+
+        self.center = vec2((self.x1 + self.x2) / 2, (self.y1 + self.y2) / 2)
+
+    """
+    draw the line
+    """
+
+    def draw(self):
+        if self.active:
+            self.line.draw()
+
+    """
+    returns true if the car object has hit this wall
+    """
+
+    def hitCar(self, car):
+        if not self.active:
+            return False
+
+        global vec2
+        cw = car.width
+        # since the car sprite isn't perfectly square the hitbox is a little smaller than the width of the car
+        ch = car.height - 4
+        rightVector = vec2(car.direction)
+        upVector = vec2(car.direction).rotate(-90)
+        carCorners = []
+        cornerMultipliers = [[1, 1], [1, -1], [-1, -1], [-1, 1]]
+        carPos = vec2(car.x, car.y)
+        for i in range(4):
+            carCorners.append(carPos + (rightVector * cw / 2 * cornerMultipliers[i][0]) +
+                              (upVector * ch / 2 * cornerMultipliers[i][1]))
+
+        for i in range(4):
+            j = i + 1
+            j = j % 4
+            if linesCollided(self.x1, self.y1, self.x2, self.y2, carCorners[i].x, carCorners[i].y, carCorners[j].x,
+                             carCorners[j].y):
+                return True
+        return False
+
+
+class Car:
+
+    def __init__(self, walls, rewardGates):
+        global vec2
+        self.x = 258
+        self.y = 288
+        self.vel = 0
+        self.direction = vec2(0, 1)
+        self.direction = self.direction.rotate(180 / 12)
+        self.acc = 0
+        self.width = 40
+        self.height = 20
+        self.turningRate = 5.0 / self.width
+        self.friction = 0.98
+        self.maxSpeed = self.width / 4.0
+        self.maxReverseSpeed = -1 * self.maxSpeed / 2.0
+        self.accelerationSpeed = self.width / 160.0
+        self.dead = False
+        self.driftMomentum = 0
+        self.driftFriction = 0.87
+        self.lineCollisionPoints = []
+        self.collisionLineDistances = []
+        self.vectorLength = 300
+
+        self.carPic = pyglet.image.load('images/car.png')
+        self.carSprite = pyglet.sprite.Sprite(self.carPic, x=self.x, y=self.y)
+        self.carSprite.update(rotation=0, scale_x=self.width / self.carSprite.width,
+                              scale_y=self.height / self.carSprite.height)
+
+        self.turningLeft = False
+        self.turningRight = False
+        self.accelerating = False
+        self.reversing = False
+        self.walls = walls
+        self.rewardGates = rewardGates
+        self.rewardNo = 0
+
+        self.directionToRewardGate = self.rewardGates[self.rewardNo].center - vec2(self.x, self.y)
+
+        self.reward = 0
+
+        self.score = 0
+        self.lifespan = 0
+    """
+    draws the car to the screen
+    """
+
+    def reset(self):
+        global vec2
+        self.x = 258
+        self.y = 288
+        self.vel = 0
+        self.direction = vec2(0, 1)
+        self.direction = self.direction.rotate(180 / 12)
+        self.acc = 0
+        self.dead = False
+        self.driftMomentum = 0
+        self.lineCollisionPoints = []
+        self.collisionLineDistances = []
+
+        self.turningLeft = False
+        self.turningRight = False
+        self.accelerating = False
+        self.reversing = False
+        self.rewardNo = 0
+        self.reward = 0
+
+        self.lifespan = 0
+        self.score = 0
+        for g in self.rewardGates:
+            g.active = True
+
+    def show(self):
+        # first calculate the center of the car in order to allow the
+        # rotation of the car to be anchored around the center
+        upVector = self.direction.rotate(90)
+        drawX = self.direction.x * self.width / 2 + upVector.x * self.height / 2
+        drawY = self.direction.y * self.width / 2 + upVector.y * self.height / 2
+        self.carSprite.update(x=self.x - drawX, y=self.y - drawY, rotation=-get_angle(self.direction))
+        self.carSprite.draw()
+        # self.showCollisionVectors()
+
+    """
+     returns a vector of where a point on the car is after rotation 
+     takes the position desired relative to the center of the car when the car is facing to the right
+    """
+
+    def getPositionOnCarRelativeToCenter(self, right, up):
+        global vec2
+        w = self.width
+        h = self.height
+        rightVector = vec2(self.direction)
+        rightVector.normalize()
+        upVector = self.direction.rotate(90)
+        upVector.normalize()
+
+        return vec2(self.x, self.y) + ((rightVector * right) + (upVector * up))
+
+    def updateWithAction(self, actionNo):
+        self.turningLeft = False
+        self.turningRight = False
+        self.accelerating = False
+        self.reversing = False
+
+        if actionNo == 0:
+            self.turningLeft = True
+        elif actionNo == 1:
+            self.turningRight = True
+        elif actionNo == 2:
+            self.accelerating = True
+        elif actionNo == 3:
+            self.reversing = True
+        elif actionNo == 4:
+            self.accelerating = True
+            self.turningLeft = True
+        elif actionNo == 5:
+            self.accelerating = True
+            self.turningRight = True
+        elif actionNo == 6:
+            self.reversing = True
+            self.turningLeft = True
+        elif actionNo == 7:
+            self.reversing = True
+            self.turningRight = True
+        elif actionNo == 8:
+            pass
+        totalReward = 0
+
+        for i in range(1):
+            if not self.dead:
+                self.lifespan+=1
+                self.move()
+                self.updateControls()
+
+                if self.hitAWall():
+                    self.dead = True
+                    # return
+                self.checkRewardGates()
+                totalReward += self.reward
+
+        self.setVisionVectors()
+
+        # self.update()
+
+        self.reward = totalReward
+
+    """
+    called every frame
+    """
+
+    def update(self):
+        if not self.dead:
+            self.updateControls()
+            self.move()
+
+            if self.hitAWall():
+                self.dead = True
+                # return
+            self.checkRewardGates()
+            self.setVisionVectors()
+
+    def checkRewardGates(self):
+        global vec2
+        self.reward = -1
+        if self.rewardGates[self.rewardNo].hitCar(self):
+            self.rewardGates[self.rewardNo].active = False
+            self.rewardNo += 1
+            self.score += 1
+            self.reward = 10
+            if self.rewardNo == len(self.rewardGates):
+                self.rewardNo = 0
+                for g in self.rewardGates:
+                    g.active = True
+        self.directionToRewardGate = self.rewardGates[self.rewardNo].center - vec2(self.x, self.y)
+
+    """
+    changes the position of the car to account for acceleration, velocity, friction and drift
+    """
+
+    def move(self):
+        global vec2
+        self.vel += self.acc
+        self.vel *= self.friction
+        self.constrainVel()
+
+        driftVector = vec2(self.direction)
+        driftVector = driftVector.rotate(90)
+
+        addVector = vec2(0, 0)
+        addVector.x += self.vel * self.direction.x
+        addVector.x += self.driftMomentum * driftVector.x
+        addVector.y += self.vel * self.direction.y
+        addVector.y += self.driftMomentum * driftVector.y
+        self.driftMomentum *= self.driftFriction
+
+        if addVector.length() != 0:
+            addVector.normalize()
+
+        addVector.x * abs(self.vel)
+        addVector.y * abs(self.vel)
+
+        self.x += addVector.x
+        self.y += addVector.y
+
+    """
+    keeps the velocity of the car within the maximum and minimum speeds
+    """
+
+    def constrainVel(self):
+        if self.maxSpeed < self.vel:
+            self.vel = self.maxSpeed
+        elif self.vel < self.maxReverseSpeed:
+            self.vel = self.maxReverseSpeed
+
+    """
+    changes the cars direction and acceleration based on the users inputs
+    """
+
+    def updateControls(self):
+        multiplier = 1
+        if abs(self.vel) < 5:
+            multiplier = abs(self.vel) / 5
+        if self.vel < 0:
+            multiplier *= -1
+
+        driftAmount = self.vel * self.turningRate * self.width / (9.0 * 8.0)
+        if self.vel < 5:
+            driftAmount = 0
+
+        if self.turningLeft:
+            self.direction = self.direction.rotate(radiansToAngle(self.turningRate) * multiplier)
+
+            self.driftMomentum -= driftAmount
+        elif self.turningRight:
+            self.direction = self.direction.rotate(-radiansToAngle(self.turningRate) * multiplier)
+            self.driftMomentum += driftAmount
+        self.acc = 0
+        if self.accelerating:
+            if self.vel < 0:
+                self.acc = 3 * self.accelerationSpeed
+            else:
+                self.acc = self.accelerationSpeed
+        elif self.reversing:
+            if self.vel > 0:
+                self.acc = -3 * self.accelerationSpeed
+            else:
+                self.acc = -1 * self.accelerationSpeed
+
+    """
+    checks every wall and if the car has hit a wall returns true    
+    """
+
+    def hitAWall(self):
+        for wall in self.walls:
+            if wall.hitCar(self):
+                return True
+
+        return False
+
+    """
+    returns the point of collision of a line (x1,y1,x2,y2) with the walls, 
+    if multiple walls are hit it returns the closest collision point
+    """
+
+    def getCollisionPointOfClosestWall(self, x1, y1, x2, y2):
+        global vec2
+        minDist = 2 * displayWidth
+        closestCollisionPoint = vec2(0, 0)
+        for wall in self.walls:
+            collisionPoint = getCollisionPoint(x1, y1, x2, y2, wall.x1, wall.y1, wall.x2, wall.y2)
+            if collisionPoint is None:
+                continue
+            if dist(x1, y1, collisionPoint.x, collisionPoint.y) < minDist:
+                minDist = dist(x1, y1, collisionPoint.x, collisionPoint.y)
+                closestCollisionPoint = vec2(collisionPoint)
+        return closestCollisionPoint
+
+    """
+    by creating lines in many directions from the car and getting the closest collision point of that line
+    we create  "vision vectors" which will allow the car to 'see' 
+    kinda like a sonar system
+    """
+
+    def getState(self):
+        self.setVisionVectors()
+        normalizedVisionVectors = [1 - (max(1.0, line) / self.vectorLength) for line in self.collisionLineDistances]
+
+        normalizedForwardVelocity = max(0.0, self.vel / self.maxSpeed)
+        normalizedReverseVelocity = max(0.0, self.vel / self.maxReverseSpeed)
+        if self.driftMomentum > 0:
+            normalizedPosDrift = self.driftMomentum / 5
+            normalizedNegDrift = 0
+        else:
+            normalizedPosDrift = 0
+            normalizedNegDrift = self.driftMomentum / -5
+
+        normalizedAngleOfNextGate = (get_angle(self.direction) - get_angle(self.directionToRewardGate)) % 360
+        if normalizedAngleOfNextGate > 180:
+            normalizedAngleOfNextGate = -1 * (360 - normalizedAngleOfNextGate)
+
+        normalizedAngleOfNextGate /= 180
+
+        normalizedState = [*normalizedVisionVectors, normalizedForwardVelocity, normalizedReverseVelocity,
+                           normalizedPosDrift, normalizedNegDrift, normalizedAngleOfNextGate]
+        return np.array(normalizedState)
+
+    def setVisionVectors(self):
+        h = self.height - 4
+        w = self.width
+        self.collisionLineDistances = []
+        self.lineCollisionPoints = []
+        self.setVisionVector(w / 2, 0, 0)
+        self.setVisionVector(w / 2, -h / 2, -180 / 16)
+        self.setVisionVector(w / 2, -h / 2, -180 / 4)
+        self.setVisionVector(w / 2, -h / 2, -4 * 180 / 8)
+
+        self.setVisionVector(w / 2, h / 2, 180 / 16)
+        self.setVisionVector(w / 2, h / 2, 180 / 4)
+        self.setVisionVector(w / 2, h / 2, 4 * 180 / 8)
+
+        self.setVisionVector(-w / 2, -h / 2, -6 * 180 / 8)
+        self.setVisionVector(-w / 2, h / 2, 6 * 180 / 8)
+        self.setVisionVector(-w / 2, 0, 180)
+
+    """
+    calculates and stores the distance to the nearest wall given a vector 
+    """
+
+    def setVisionVector(self, startX, startY, angle):
+        collisionVectorDirection = self.direction.rotate(angle)
+        collisionVectorDirection = collisionVectorDirection.normalize() * self.vectorLength
+        startingPoint = self.getPositionOnCarRelativeToCenter(startX, startY)
+        collisionPoint = self.getCollisionPointOfClosestWall(startingPoint.x, startingPoint.y,
+                                                             startingPoint.x + collisionVectorDirection.x,
+                                                             startingPoint.y + collisionVectorDirection.y)
+        if collisionPoint.x == 0 and collisionPoint.y == 0:
+            self.collisionLineDistances.append(self.vectorLength)
+        else:
+            self.collisionLineDistances.append(
+                dist(startingPoint.x, startingPoint.y, collisionPoint.x, collisionPoint.y))
+        self.lineCollisionPoints.append(collisionPoint)
+
+    """
+    shows dots where the collision vectors detect a wall 
+    """
+
+    def showCollisionVectors(self):
+        global drawer
+        for point in self.lineCollisionPoints:
+            drawer.setColor([255, 0, 0])
+            drawer.circle(point.x, point.y, 5)
diff --git a/Converted_to_tensorflow_2/Globals.py b/Converted_to_tensorflow_2/Globals.py
new file mode 100644
index 0000000..41dfa30
--- /dev/null
+++ b/Converted_to_tensorflow_2/Globals.py
@@ -0,0 +1,2 @@
+displayWidth = 1800
+displayHeight = 1000
diff --git a/Converted_to_tensorflow_2/PygameAdditionalMethods.py b/Converted_to_tensorflow_2/PygameAdditionalMethods.py
new file mode 100644
index 0000000..7e310a4
--- /dev/null
+++ b/Converted_to_tensorflow_2/PygameAdditionalMethods.py
@@ -0,0 +1,41 @@
+import math
+import pygame
+
+vec2 = pygame.math.Vector2
+
+
+def get_angle(vec):
+    if vec.length() == 0:
+        return 0
+    return math.degrees(math.atan2(vec.y, vec.x))
+
+
+def angleToRadians(angle):
+    return angle / (180 / math.pi)
+
+
+def radiansToAngle(rads):
+    return rads * 180 / math.pi
+
+
+def linesCollided(x1, y1, x2, y2, x3, y3, x4, y4):
+    uA = ((x4 - x3) * (y1 - y3) - (y4 - y3) * (x1 - x3)) / ((y4 - y3) * (x2 - x1) - (x4 - x3) * (y2 - y1))
+    uB = ((x2 - x1) * (y1 - y3) - (y2 - y1) * (x1 - x3)) / ((y4 - y3) * (x2 - x1) - (x4 - x3) * (y2 - y1))
+    if 0 <= uA <= 1 and 0 <= uB <= 1:
+        return True
+    return False
+
+
+def getCollisionPoint(x1, y1, x2, y2, x3, y3, x4, y4):
+    global vec2
+    uA = ((x4 - x3) * (y1 - y3) - (y4 - y3) * (x1 - x3)) / ((y4 - y3) * (x2 - x1) - (x4 - x3) * (y2 - y1))
+    uB = ((x2 - x1) * (y1 - y3) - (y2 - y1) * (x1 - x3)) / ((y4 - y3) * (x2 - x1) - (x4 - x3) * (y2 - y1))
+    if 0 <= uA <= 1 and 0 <= uB <= 1:
+        intersectionX = x1 + (uA * (x2 - x1))
+        intersectionY = y1 + (uA * (y2 - y1))
+        return vec2(intersectionX, intersectionY)
+    return None
+
+
+def dist(x1, y1, x2, y2):
+    return math.sqrt((x2 - x1) ** 2 + (y2 - y1) ** 2)
diff --git a/Converted_to_tensorflow_2/QlearningFromOldMate.py b/Converted_to_tensorflow_2/QlearningFromOldMate.py
new file mode 100644
index 0000000..5f71589
--- /dev/null
+++ b/Converted_to_tensorflow_2/QlearningFromOldMate.py
@@ -0,0 +1,781 @@
+import pyglet
+from pyglet.gl import *
+import pygame
+import math
+from pyglet.window import key
+from Drawer import Drawer
+# from PygameAdditionalMethods import *
+from ShapeObjects import Line
+import tensorflow as tf  # Deep Learning library
+import numpy as np  # Handle matrices
+from collections import deque
+import random
+import os
+from Globals import displayHeight, displayWidth
+from Game import Game
+
+frameRate = 30.0
+
+vec2 = pygame.math.Vector2
+
+game = Game()
+possible_actions = np.identity(game.no_of_actions, dtype=int).tolist()
+
+### MODEL HYPERPARAMETERS
+state_size = [game.state_size]  # Our input is a stack of 4 frames hence 100x120x4 (Width, height, channels)
+action_size = game.no_of_actions  # 7 possible actions
+learning_rate = 0.00025  # Alpha (aka learning rate)
+
+### TRAINING HYPERPARAMETERS
+total_episodes = 50000  # Total episodes for training
+max_steps = 5000  # Max possible steps in an episode
+batch_size = 64
+
+# FIXED Q TARGETS HYPERPARAMETERS
+max_tau = 10000  # Tau is the C step where we update our target network
+
+# EXPLORATION HYPERPARAMETERS for epsilon greedy strategy
+explore_start = 1.0  # exploration probability at start
+explore_stop = 0.01  # minimum exploration probability
+decay_rate = 0.00005  # exponential decay rate for exploration prob
+
+# Q LEARNING hyperparameters
+gamma = 0.95  # Discounting rate
+
+### MEMORY HYPERPARAMETERS
+## If you have GPU change to 1million
+memory_size = 100000  # Number of experiences the Memory can keep
+pretrain_length = memory_size  # Number of experiences stored in the Memory when initialized for the first time
+
+### MODIFY THIS TO FALSE IF YOU JUST WANT TO SEE THE TRAINED AGENT
+training =  False
+
+## TURN THIS TO TRUE IF YOU WANT TO RENDER THE ENVIRONMENT
+episode_render = True
+
+load = True
+
+starting_episode = 0
+
+load_traing_model = False
+
+load_training_model_number = 300
+
+
+class DDDQNNet:
+    def __init__(self, state_size, action_size, learning_rate, name):
+        self.state_size = state_size
+        self.action_size = action_size
+        self.learning_rate = learning_rate
+        self.name = name
+
+        # We use tf.variable_scope here to know which network we're using (DQN or target_net)
+        # it will be useful when we will update our w- parameters (by copy the DQN parameters)
+        with tf.compat.v1.variable_scope(self.name):
+            # We create the placeholders
+            # *state_size means that we take each elements of state_size in tuple hence is like if we wrote
+            # [None, 100, 120, 4]
+            self.inputs_ = tf.compat.v1.placeholder(tf.float32, [None, *state_size], name="inputs")
+
+            #
+            self.ISWeights_ = tf.compat.v1.placeholder(tf.float32, [None, 1], name='IS_weights')
+
+            self.actions_ = tf.compat.v1.placeholder(tf.float32, [None, action_size], name="actions_")
+
+            # Remember that target_Q is the R(s,a) + ymax Qhat(s', a')
+            self.target_Q = tf.compat.v1.placeholder(tf.float32, [None], name="target")
+
+            self.dense1 = tf.compat.v1.layers.dense(inputs=self.inputs_,
+                                          units=256,
+                                          activation=tf.nn.elu,
+                                          kernel_initializer=tf.compat.v1.keras.initializers.VarianceScaling(scale=1.0, mode="fan_avg", distribution="uniform"),
+                                          name="dense1")
+
+            self.dense2 = tf.compat.v1.layers.dense(inputs=self.dense1,
+                                          units=256,
+                                          activation=tf.nn.elu,
+                                          kernel_initializer=tf.compat.v1.keras.initializers.VarianceScaling(scale=1.0, mode="fan_avg", distribution="uniform"),
+                                          name="dense2")
+
+            ## Here we separate into two streams
+            # The one that calculate V(s)
+            self.value_fc = tf.compat.v1.layers.dense(inputs=self.dense2,
+                                            units=256,
+                                            activation=tf.nn.elu,
+                                            kernel_initializer=tf.compat.v1.keras.initializers.VarianceScaling(scale=1.0, mode="fan_avg", distribution="uniform"),
+                                            name="value_fc")
+
+            self.value = tf.compat.v1.layers.dense(inputs=self.value_fc,
+                                         units=1,
+                                         activation=None,
+                                         kernel_initializer=tf.compat.v1.keras.initializers.VarianceScaling(scale=1.0, mode="fan_avg", distribution="uniform"),
+                                         name="value")
+
+            # The one that calculate A(s,a)
+            self.advantage_fc = tf.compat.v1.layers.dense(inputs=self.dense2,
+                                                units=256,
+                                                activation=tf.nn.elu,
+                                                kernel_initializer=tf.compat.v1.keras.initializers.VarianceScaling(scale=1.0, mode="fan_avg", distribution="uniform"),
+                                                name="advantage_fc")
+
+            self.advantage = tf.compat.v1.layers.dense(inputs=self.advantage_fc,
+                                             units=self.action_size,
+                                             activation=None,
+                                             kernel_initializer=tf.compat.v1.keras.initializers.VarianceScaling(scale=1.0, mode="fan_avg", distribution="uniform"),
+                                             name="advantages")
+
+            # Agregating layer
+            # Q(s,a) = V(s) + (A(s,a) - 1/|A| * sum A(s,a'))
+            self.output = self.value + tf.subtract(self.advantage,
+                                                   tf.reduce_mean(input_tensor=self.advantage, axis=1, keepdims=True))
+
+            # Q is our predicted Q value.
+            self.Q = tf.reduce_sum(input_tensor=tf.multiply(self.output, self.actions_), axis=1)
+
+            # The loss is modified because of PER
+            self.absolute_errors = tf.abs(self.target_Q - self.Q)  # for updating Sumtree
+
+            self.loss = tf.reduce_mean(input_tensor=self.ISWeights_ * tf.math.squared_difference(self.target_Q, self.Q))
+
+            self.optimizer = tf.compat.v1.train.RMSPropOptimizer(self.learning_rate).minimize(self.loss)
+
+
+# Reset the graph
+tf.compat.v1.reset_default_graph()
+
+# Instantiate the DQNetwork
+DQNetwork = DDDQNNet(state_size, action_size, learning_rate, name="DQNetwork")
+
+# Instantiate the target network
+TargetNetwork = DDDQNNet(state_size, action_size, learning_rate, name="TargetNetwork")
+
+
+class SumTree(object):
+    """
+    This SumTree code is modified version of Morvan Zhou:
+    https://github.com/MorvanZhou/Reinforcement-learning-with-tensorflow/blob/master/contents/5.2_Prioritized_Replay_DQN/RL_brain.py
+    """
+    data_pointer = 0
+
+    """
+    Here we initialize the tree with all nodes = 0, and initialize the data with all values = 0
+    """
+
+    def __init__(self, capacity):
+        self.capacity = capacity  # Number of leaf nodes (final nodes) that contains experiences
+
+        # Generate the tree with all nodes values = 0
+        # To understand this calculation (2 * capacity - 1) look at the schema above
+        # Remember we are in a binary node (each node has max 2 children) so 2x size of leaf (capacity) - 1 (root node)
+        # Parent nodes = capacity - 1
+        # Leaf nodes = capacity
+        self.tree = np.zeros(2 * capacity - 1)
+
+        """ tree:
+            0
+           / \
+          0   0
+         / \ / \
+        0  0 0  0  [Size: capacity] it's at this line that there is the priorities score (aka pi)
+        """
+
+        # Contains the experiences (so the size of data is capacity)
+        self.data = np.zeros(capacity, dtype=object)
+
+    """
+    Here we add our priority score in the sumtree leaf and add the experience in data
+    """
+
+    def add(self, priority, data):
+        # Look at what index we want to put the experience
+        tree_index = self.data_pointer + self.capacity - 1
+
+        """ tree:
+            0
+           / \
+          0   0
+         / \ / \
+tree_index  0 0  0  We fill the leaves from left to right
+        """
+
+        # Update data frame
+        self.data[self.data_pointer] = data
+
+        # Update the leaf
+        self.update(tree_index, priority)
+
+        # Add 1 to data_pointer
+        self.data_pointer += 1
+
+        if self.data_pointer >= self.capacity:  # If we're above the capacity, you go back to first index (we overwrite)
+            self.data_pointer = 0
+
+    """
+    Update the leaf priority score and propagate the change through tree
+    """
+
+    def update(self, tree_index, priority):
+        # Change = new priority score - former priority score
+        change = priority - self.tree[tree_index]
+        self.tree[tree_index] = priority
+
+        # then propagate the change through tree
+        while tree_index != 0:  # this method is faster than the recursive loop in the reference code
+
+            """
+            Here we want to access the line above
+            THE NUMBERS IN THIS TREE ARE THE INDEXES NOT THE PRIORITY VALUES
+
+                0
+               / \
+              1   2
+             / \ / \
+            3  4 5  [6] 
+
+            If we are in leaf at index 6, we updated the priority score
+            We need then to update index 2 node
+            So tree_index = (tree_index - 1) // 2
+            tree_index = (6-1)//2
+            tree_index = 2 (because // round the result)
+            """
+            tree_index = (tree_index - 1) // 2
+            self.tree[tree_index] += change
+
+    """
+    Here we get the leaf_index, priority value of that leaf and experience associated with that index
+    """
+
+    def get_leaf(self, v):
+        """
+        Tree structure and array storage:
+        Tree index:
+             0         -> storing priority sum
+            / \
+          1     2
+         / \   / \
+        3   4 5   6    -> storing priority for experiences
+        Array type for storing:
+        [0,1,2,3,4,5,6]
+        """
+        parent_index = 0
+
+        while True:  # the while loop is faster than the method in the reference code
+            left_child_index = 2 * parent_index + 1
+            right_child_index = left_child_index + 1
+
+            # If we reach bottom, end the search
+            if left_child_index >= len(self.tree):
+                leaf_index = parent_index
+                break
+
+            else:  # downward search, always search for a higher priority node
+
+                if v <= self.tree[left_child_index]:
+                    parent_index = left_child_index
+
+                else:
+                    v -= self.tree[left_child_index]
+                    parent_index = right_child_index
+
+        data_index = leaf_index - self.capacity + 1
+
+        return leaf_index, self.tree[leaf_index], self.data[data_index]
+
+    @property
+    def total_priority(self):
+        return self.tree[0]  # Returns the root node
+
+
+class Memory(object):  # stored as ( s, a, r, s_ ) in SumTree
+    """
+    This SumTree code is modified version and the original code is from:
+    https://github.com/jaara/AI-blog/blob/master/Seaquest-DDQN-PER.py
+    """
+    PER_e = 0.01  # Hyperparameter that we use to avoid some experiences to have 0 probability of being taken
+    PER_a = 0.6  # Hyperparameter that we use to make a tradeoff between taking only exp with high priority and sampling randomly
+    PER_b = 0.4  # importance-sampling, from initial value increasing to 1
+
+    PER_b_increment_per_sampling = 0.001
+
+    absolute_error_upper = 1.  # clipped abs error
+
+    def __init__(self, capacity):
+        # Making the tree
+        """
+        Remember that our tree is composed of a sum tree that contains the priority scores at his leaf
+        And also a data array
+        We don't use deque because it means that at each timestep our experiences change index by one.
+        We prefer to use a simple array and to overwrite when the memory is full.
+        """
+        self.tree = SumTree(capacity)
+
+    """
+    Store a new experience in our tree
+    Each new experience have a score of max_prority (it will be then improved when we use this exp to train our DDQN)
+    """
+
+    def store(self, experience):
+        # Find the max priority
+        max_priority = np.max(self.tree.tree[-self.tree.capacity:])
+
+        # If the max priority = 0 we can't put priority = 0 since this exp will never have a chance to be selected
+        # So we use a minimum priority
+        if max_priority == 0:
+            max_priority = self.absolute_error_upper
+
+        self.tree.add(max_priority, experience)  # set the max p for new p
+
+    """
+    - First, to sample a minibatch of k size, the range [0, priority_total] is / into k ranges.
+    - Then a value is uniformly sampled from each range
+    - We search in the sumtree, the experience where priority score correspond to sample values are retrieved from.
+    - Then, we calculate IS weights for each minibatch element
+    """
+
+    def sample(self, n):
+        # Create a sample array that will contains the minibatch
+        memory_b = []
+
+        b_idx, b_ISWeights = np.empty((n,), dtype=np.int32), np.empty((n, 1), dtype=np.float32)
+
+        # Calculate the priority segment
+        # Here, as explained in the paper, we divide the Range[0, ptotal] into n ranges
+        priority_segment = self.tree.total_priority / n  # priority segment
+
+        # Here we increasing the PER_b each time we sample a new minibatch
+        self.PER_b = np.min([1., self.PER_b + self.PER_b_increment_per_sampling])  # max = 1
+
+        # Calculating the max_weight
+        p_min = np.min(self.tree.tree[-self.tree.capacity:]) / self.tree.total_priority
+
+        max_weight = (p_min * n) ** (-self.PER_b)
+        # print(p_min, self.tree.total_priority)
+        # print(p_min, self.tree.total_priority)
+        # print(self.tree.tree[-self.tree.capacity:])
+        for i in range(n):
+            """
+            A value is uniformly sample from each range
+            """
+            a, b = priority_segment * i, priority_segment * (i + 1)
+            value = np.random.uniform(a, b)
+
+            """
+            Experience that correspond to each value is retrieved
+            """
+            index, priority, data = self.tree.get_leaf(value)
+
+            # P(j)
+            sampling_probabilities = priority / self.tree.total_priority
+
+            #  IS = (1/N * 1/P(i))**b /max wi == (N*P(i))**-b  /max wi
+            b_ISWeights[i, 0] = np.power(n * sampling_probabilities, -self.PER_b) / max_weight
+            if b_ISWeights[i, 0] == 0:
+                print(n, sampling_probabilities, self.PER_b, max_weight)
+            b_idx[i] = index
+
+            experience = [data]
+
+            memory_b.append(experience)
+
+        return b_idx, memory_b, b_ISWeights
+
+    """
+    Update the priorities on the tree
+    """
+
+    def batch_update(self, tree_idx, abs_errors):
+        abs_errors += self.PER_e  # convert to abs and avoid 0
+        clipped_errors = np.minimum(abs_errors, self.absolute_error_upper)
+        ps = np.power(clipped_errors, self.PER_a)
+
+        for ti, p in zip(tree_idx, ps):
+            self.tree.update(ti, p)
+
+
+# Instantiate memory
+memory = Memory(memory_size)
+
+# Render the environment
+game.new_episode()
+
+""" PRETRAIN """
+print("pretraining")
+if training:
+    for i in range(pretrain_length):
+        # If it's the first step
+        if i == 0:
+            # First we need a state
+
+            state = game.get_state()
+            # state, stacked_frames = stack_frames(stacked_frames, state, True)
+
+        # Random action
+        action = random.choice(possible_actions)
+
+        # Get the rewards
+        reward = game.make_action(action)
+
+        # Look if the episode is finished
+        done = game.is_episode_finished()
+
+        # If we're dead
+        if done:
+            # We finished the episode so the next state is just a blank screen
+            next_state = np.zeros(state.shape)
+            # print(state.shape)
+            # Add experience to memory
+            # experience = np.hstack((state, [action, reward], next_state, done))
+
+            experience = state, action, reward, next_state, done
+            memory.store(experience)
+
+            # Start a new episode
+            game.new_episode()
+
+            # First we need a state
+            state = game.get_state()
+
+
+        else:
+            # Get the next state
+            next_state = game.get_state()
+
+            # Add experience to memory
+            experience = state, action, reward, next_state, done
+            memory.store(experience)
+
+            # Our state is now the next_state
+            state = next_state
+
+
+def predict_action(explore_start, explore_stop, decay_rate, decay_step, state, actions):
+    ## EPSILON GREEDY STRATEGY
+    # Choose action a from state s using epsilon greedy.
+    ## First we randomize a number
+    exp_exp_tradeoff = np.random.rand()
+
+    # Here we'll use an improved version of our epsilon greedy strategy used in Q-learning notebook
+    explore_probability = explore_stop + (explore_start - explore_stop) * np.exp(-decay_rate * decay_step)
+
+    if (explore_probability > exp_exp_tradeoff):
+        # Make a random action (exploration)
+        action = random.choice(possible_actions)
+
+    else:
+        # Get action from Q-network (exploitation)
+        # Estimate the Qs values state
+        Qs = sess.run(DQNetwork.output, feed_dict={DQNetwork.inputs_: state.reshape((1, *state.shape))})
+
+        # Take the biggest Q value (= the best action)
+        choice = np.argmax(Qs)
+        action = possible_actions[int(choice)]
+
+    return action, explore_probability
+
+
+# This function helps us to copy one set of variables to another
+# In our case we use it when we want to copy the parameters of DQN to Target_network
+# Thanks of the very good implementation of Arthur Juliani https://github.com/awjuliani
+def update_target_graph():
+    # Get the parameters of our DQNNetwork
+    from_vars = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES, "DQNetwork")
+
+    # Get the parameters of our Target_network
+    to_vars = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES, "TargetNetwork")
+
+    op_holder = []
+
+    # Update our target_network parameters with DQNNetwork parameters
+    for from_var, to_var in zip(from_vars, to_vars):
+        op_holder.append(to_var.assign(from_var))
+    return op_holder
+
+
+saver = tf.compat.v1.train.Saver()
+
+
+class MyWindow(pyglet.window.Window):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.set_minimum_size(400, 300)
+
+        # set background color
+        backgroundColor = [10, 0, 0, 255]
+        backgroundColor = [i / 255 for i in backgroundColor]
+        glClearColor(*backgroundColor)
+        # load background image
+        self.sess = tf.compat.v1.Session()
+        game.new_episode()
+        self.state = game.get_state()
+        self.nextState = []
+        self.loadSession()
+
+    def loadSession(self):
+        # if load_traing_model:
+        #     directory = "./allModels/model{}/models/model.ckpt".format(load_training_model_number)
+        #     saver.restore(self.sess, directory)
+        # else:
+        saver.restore(self.sess, "./models/model.ckpt")
+
+    def on_draw(self):
+        game.render()
+
+    def update(self, dt):
+        exp_exp_tradeoff = np.random.rand()
+
+        if load_traing_model:
+            explore_probability = explore_stop + (explore_start - explore_stop) * np.exp(-decay_rate * load_training_model_number* 100)
+        else:
+            explore_probability = 0.0001
+
+        if explore_probability > exp_exp_tradeoff:
+            # Make a random action (exploration)
+            action = random.choice(possible_actions)
+
+        else:
+            # Get action from Q-network (exploitation)
+            # Estimate the Qs values state
+            Qs = self.sess.run(DQNetwork.output,
+                               feed_dict={DQNetwork.inputs_: self.state.reshape((1, *self.state.shape))})
+
+            # Take the biggest Q value (= the best action)
+            choice = np.argmax(Qs)
+            action = possible_actions[int(choice)]
+
+        game.make_action(action)
+        # game.render()
+        done = game.is_episode_finished()
+
+        if done:
+            game.new_episode()
+            self.state = game.get_state()
+        else:
+            self.next_state = game.get_state()
+            self.state = self.next_state
+
+
+# Saver will help us to save our model
+print("training")
+if training:
+    with tf.compat.v1.Session() as sess:
+        # Initialize the variables
+        # if load:
+
+        if load:
+            saver.restore(sess, "./models/model.ckpt")
+        else:
+            sess.run(tf.compat.v1.global_variables_initializer())
+
+        # Initialize the decay rate (that will use to reduce epsilon)
+        decay_step = 0
+
+        # Set tau = 0
+        tau = 0
+
+        # Init the game
+        game.new_episode()
+
+        # Update the parameters of our TargetNetwork with DQN_weights
+        update_target = update_target_graph()
+        sess.run(update_target)
+
+        for episode in range(starting_episode, total_episodes):
+            # Set step to 0
+            step = 0
+
+            # Initialize the rewards of the episode
+            episode_rewards = []
+
+            # Make a new episode and observe the first state
+            game.new_episode()
+
+            state = game.get_state()
+
+            while step < max_steps:
+                step += 1
+
+                # Increase the C step
+                tau += 1
+
+                # Increase decay_step
+                decay_step += 1
+
+                # With ϵ select a random action atat, otherwise select a = argmaxQ(st,a)
+                action, explore_probability = predict_action(explore_start, explore_stop, decay_rate, decay_step, state,
+                                                             possible_actions)
+
+                # Do the action
+                reward = game.make_action(action)
+
+                # Look if the episode is finished
+                done = game.is_episode_finished()
+
+                # Add the reward to total reward
+                episode_rewards.append(reward)
+                if step >= max_steps:
+                    print("fuckin nice mate")
+                    print('Episode: {}'.format(episode),
+                          'Total reward: {}'.format(np.sum(episode_rewards)),
+                          'Explore P: {:.4f}'.format(explore_probability))
+                # If the game is finished
+                if done:
+                    # the episode ends so no next state
+                    next_state = np.zeros(state.shape, dtype=np.int)  # changed
+
+                    # Set step = max_steps to end the episode
+                    step = max_steps
+
+                    # Get the total reward of the episode
+                    total_reward = np.sum(episode_rewards)
+
+                    print('Episode: {}'.format(episode),
+                          '\tTotal reward: {:.4f}'.format(total_reward),
+                          # '\tTraining loss: {:.4f}'.format(loss),
+                          '\tExplore P: {:.4f}'.format(explore_probability),
+                          '\tScore: {}'.format(game.get_score()),
+                          '\tlifespan: {}'.format(game.get_lifespan()),
+                          '\tactions per reward gate: {:.4f}'.format(game.get_lifespan() / (max(1, game.get_score()))))
+
+                    # Add experience to memory
+                    experience = state, action, reward, next_state, done
+                    memory.store(experience)
+
+                else:
+                    # Get the next state
+                    next_state = game.get_state()
+
+                    # Add experience to memory
+                    experience = state, action, reward, next_state, done
+                    memory.store(experience)
+
+                    # st+1 is now our current state
+                    state = next_state
+
+                ### LEARNING PART
+                # Obtain random mini-batch from memory
+                tree_idx, batch, ISWeights_mb = memory.sample(batch_size)
+
+                states_mb = np.array([each[0][0] for each in batch], ndmin=2)
+                actions_mb = np.array([each[0][1] for each in batch])
+                rewards_mb = np.array([each[0][2] for each in batch])
+                next_states_mb = np.array([each[0][3] for each in batch], ndmin=2)
+                dones_mb = np.array([each[0][4] for each in batch])
+
+                target_Qs_batch = []
+
+                # DOUBLE DQN Logic
+                # Use DQNNetwork to select the action to take at next_state (a') (action with the highest Q-value)
+                # Use TargetNetwork to calculate the Q_val of Q(s',a')
+
+                # Get Q values for next_state
+                q_next_state = sess.run(DQNetwork.output, feed_dict={DQNetwork.inputs_: next_states_mb})
+
+                # Calculate Qtarget for all actions that state
+                q_target_next_state = sess.run(TargetNetwork.output, feed_dict={TargetNetwork.inputs_: next_states_mb})
+
+                # Set Q_target = r if the episode ends at s+1, otherwise set Q_target = r + gamma * Qtarget(s',a')
+                for i in range(0, len(batch)):
+                    terminal = dones_mb[i]
+
+                    # We got a'
+                    action = np.argmax(q_next_state[i])
+
+                    # If we are in a terminal state, only equals reward
+                    if terminal:
+                        target_Qs_batch.append(rewards_mb[i])
+
+                    else:
+                        # Take the Qtarget for action a'
+                        target = rewards_mb[i] + gamma * q_target_next_state[i][action]
+                        target_Qs_batch.append(target)
+
+                targets_mb = np.array([each for each in target_Qs_batch])
+
+                _, loss, absolute_errors = sess.run([DQNetwork.optimizer, DQNetwork.loss, DQNetwork.absolute_errors],
+                                                    feed_dict={DQNetwork.inputs_: states_mb,
+                                                               DQNetwork.target_Q: targets_mb,
+                                                               DQNetwork.actions_: actions_mb,
+                                                               DQNetwork.ISWeights_: ISWeights_mb})
+                if loss == 0:
+                    print(ISWeights_mb)
+
+                # Update priority
+                memory.batch_update(tree_idx, absolute_errors)
+
+                # Write TF Summaries
+                # summary = sess.run(write_op, feed_dict={DQNetwork.inputs_: states_mb,
+                #                                         DQNetwork.target_Q: targets_mb,
+                #                                         DQNetwork.actions_: actions_mb,
+                #                                         DQNetwork.ISWeights_: ISWeights_mb})
+                # writer.add_summary(summary, episode)
+                # writer.flush()
+
+                if tau > max_tau:
+                    # Update the parameters of our TargetNetwork with DQN_weights
+                    update_target = update_target_graph()
+                    sess.run(update_target)
+                    tau = 0
+                    print("Model updated")
+
+            if (episode < 100 and episode % 5 == 0) or (episode % 1000 == 0):
+                directory = "./allModels/model{}".format(episode)
+                if not os.path.exists(directory):
+                    os.makedirs(directory)
+                save_path = saver.save(sess, "./allModels/model{}/models/model.ckpt".format(episode))
+                # print("Model Saved")
+
+            # Save model every 5 episodes
+            if episode % 5 == 0:
+                save_path = saver.save(sess, "./models/model.ckpt")
+                print("Model Saved")
+else:
+    print("setting up window")
+    window = MyWindow(displayWidth, displayHeight, "AI Learns to Drive", resizable=False)
+    pyglet.clock.schedule_interval(window.update, 1 / frameRate)
+    pyglet.app.run()
+
+# print("testing")
+# with tf.Session() as sess:
+#
+#     # Load the model
+#     saver.restore(sess, "./models/model.ckpt")
+#
+#     for i in range(10):
+#         print(i)
+#         game.new_episode()
+#         state = game.get_state()
+#
+#         while not game.is_episode_finished():
+#             ## EPSILON GREEDY STRATEGY
+#             # Choose action a from state s using epsilon greedy.
+#             ## First we randomize a number
+#             exp_exp_tradeoff = np.random.rand()
+#
+#             explore_probability = 0.01
+#
+#             if (explore_probability > exp_exp_tradeoff):
+#                 # Make a random action (exploration)
+#                 action = random.choice(possible_actions)
+#
+#             else:
+#                 # Get action from Q-network (exploitation)
+#                 # Estimate the Qs values state
+#                 Qs = sess.run(DQNetwork.output, feed_dict={DQNetwork.inputs_: state.reshape((1, *state.shape))})
+#
+#                 # Take the biggest Q value (= the best action)
+#                 choice = np.argmax(Qs)
+#                 action = possible_actions[int(choice)]
+#
+#             game.make_action(action)
+#             window.draw(game)
+#             # game.render()
+#             done = game.is_episode_finished()
+#
+#             if done:
+#                 break
+#
+#             else:
+#                 next_state = game.get_state()
+#                 state = next_state
+#
+#
diff --git a/Converted_to_tensorflow_2/README.md b/Converted_to_tensorflow_2/README.md
new file mode 100644
index 0000000..c672f29
--- /dev/null
+++ b/Converted_to_tensorflow_2/README.md
@@ -0,0 +1,14 @@
+# Car-QLearning
+
+Ensure you are using tensorflow version 2 before trying this: https://gist.github.com/off99555/fd59f204c02b5f704287227d67744d6f
+
+`pip install -r requirements.txt`
+`python main.py`
+
+For handling eager exection:
+<https://stackoverflow.com/questions/56561734/runtimeerror-tf-placeholder-is-not-compatible-with-eager-execution>
+
+code used to convert to tensorflow v2: https://www.tensorflow.org/guide/upgrade
+
+However currently it doesn't run still:
+![error](error.png)
\ No newline at end of file
diff --git a/Converted_to_tensorflow_2/ShapeObjects.py b/Converted_to_tensorflow_2/ShapeObjects.py
new file mode 100644
index 0000000..cb3ee3f
--- /dev/null
+++ b/Converted_to_tensorflow_2/ShapeObjects.py
@@ -0,0 +1,58 @@
+import pyglet
+from pyglet.gl import *
+import math
+
+
+
+
+class Triangle:
+    def __init__(self, x1, y1, x2, y2, x3, y3, col=[255, 255, 255]):
+        self.vertices = pyglet.graphics.vertex_list(3, ('v3f', [x1, y1, 0, x2, y2, 0, x3, y3, 0]),
+                                                    ('c3B', [*col, *col, *col]))
+
+    def show(self):
+        self.vertices.draw(GL_TRIANGLES)
+
+
+class Rect:
+    def __init__(self, x, y, w, h):
+        self.x = x
+        self.y = y
+        self.w = w
+        self.h = h
+        self.col = [255, 0, 0] * 4
+        # self.vertices = pyglet.graphics.vertex_list('v3f')
+
+    def setColor(self, newColor):
+        self.col = newColor * 4
+
+    def draw(self):
+        x = self.x
+        y = self.y
+        w = self.w
+        h = self.h
+        pyglet.graphics.draw_indexed(4, pyglet.gl.GL_TRIANGLES, [0, 1, 2, 0, 2, 3],
+                                     ('v2f', [x, y, x + w, y, x + w, y + h, x, y + h]),
+                                     ('c3B', self.col))
+
+
+class Line:
+    def __init__(self, x1, y1, x2, y2):
+        self.x1 = x1
+        self.y1 = y1
+        self.x2 = x2
+        self.y2 = y2
+        self.color = [0, 0, 0] * 2
+        self.lineThinkness = 1
+
+    def draw(self):
+        pyglet.gl.glLineWidth(self.lineThinkness)
+        pyglet.graphics.draw(2, pyglet.gl.GL_LINES,
+                             ("v2f", (self.x1, self.y1, self.x2, self.y2))
+                             , ('c3B', self.color))
+
+    def setColor(self, newColor):
+        self.color = newColor * 2
+
+    def setLineThinkness(self, thinkness):
+        self.lineThinkness = thinkness
diff --git a/Converted_to_tensorflow_2/__pycache__/Drawer.cpython-37.pyc b/Converted_to_tensorflow_2/__pycache__/Drawer.cpython-37.pyc
new file mode 100644
index 0000000..96241db
Binary files /dev/null and b/Converted_to_tensorflow_2/__pycache__/Drawer.cpython-37.pyc differ
diff --git a/Converted_to_tensorflow_2/__pycache__/Game.cpython-37.pyc b/Converted_to_tensorflow_2/__pycache__/Game.cpython-37.pyc
new file mode 100644
index 0000000..c98eb90
Binary files /dev/null and b/Converted_to_tensorflow_2/__pycache__/Game.cpython-37.pyc differ
diff --git a/Converted_to_tensorflow_2/__pycache__/Globals.cpython-37.pyc b/Converted_to_tensorflow_2/__pycache__/Globals.cpython-37.pyc
new file mode 100644
index 0000000..0d68b45
Binary files /dev/null and b/Converted_to_tensorflow_2/__pycache__/Globals.cpython-37.pyc differ
diff --git a/Converted_to_tensorflow_2/__pycache__/PygameAdditionalMethods.cpython-37.pyc b/Converted_to_tensorflow_2/__pycache__/PygameAdditionalMethods.cpython-37.pyc
new file mode 100644
index 0000000..3125262
Binary files /dev/null and b/Converted_to_tensorflow_2/__pycache__/PygameAdditionalMethods.cpython-37.pyc differ
diff --git a/Converted_to_tensorflow_2/__pycache__/ShapeObjects.cpython-37.pyc b/Converted_to_tensorflow_2/__pycache__/ShapeObjects.cpython-37.pyc
new file mode 100644
index 0000000..5898e53
Binary files /dev/null and b/Converted_to_tensorflow_2/__pycache__/ShapeObjects.cpython-37.pyc differ
diff --git a/Converted_to_tensorflow_2/error.png b/Converted_to_tensorflow_2/error.png
new file mode 100644
index 0000000..7621740
Binary files /dev/null and b/Converted_to_tensorflow_2/error.png differ
diff --git a/Converted_to_tensorflow_2/images/car.png b/Converted_to_tensorflow_2/images/car.png
new file mode 100644
index 0000000..116a3a6
Binary files /dev/null and b/Converted_to_tensorflow_2/images/car.png differ
diff --git a/Converted_to_tensorflow_2/images/track.png b/Converted_to_tensorflow_2/images/track.png
new file mode 100644
index 0000000..2f2279d
Binary files /dev/null and b/Converted_to_tensorflow_2/images/track.png differ
diff --git a/Converted_to_tensorflow_2/main.py b/Converted_to_tensorflow_2/main.py
new file mode 100644
index 0000000..3959e7b
--- /dev/null
+++ b/Converted_to_tensorflow_2/main.py
@@ -0,0 +1,664 @@
+import pyglet
+from pyglet.gl import *
+import pygame
+import math
+from pyglet.window import key
+from Drawer import Drawer
+# from PygameAdditionalMethods import *
+from ShapeObjects import Line
+import tensorflow as tf  # Deep Learning library
+import numpy as np  # Handle matrices
+from collections import deque
+import random
+import os
+from Globals import displayHeight, displayWidth
+from Game import Game
+
+frameRate = 30.0
+
+vec2 = pygame.math.Vector2
+
+tf.compat.v1.disable_eager_execution()
+ 
+"""
+a line which the car object cannot touch
+"""
+
+class QLearning:
+    def __init__(self, game):
+
+        self.game = game
+        self.game.new_episode()
+
+        self.stateSize = [game.state_size]
+        self.actionSize = game.no_of_actions
+        self.learningRate = 0.00025
+        self.possibleActions = np.identity(self.actionSize, dtype=int)
+
+        self.totalTrainingEpisodes = 100000
+        self.maxSteps = 3600
+
+        self.batchSize = 64
+        self.memorySize = 100000
+
+        self.maxEpsilon = 1
+        self.minEpsilon = 0.01
+        self.decayRate = 0.00001
+        self.decayStep = 0
+        self.gamma = 0.9
+        self.training = True
+
+        self.pretrainLength = self.batchSize
+
+        self.maxTau = 10000
+        self.tau = 0
+        # reset the graph i guess, I don't know why therefore is already a graph happening but who cares
+        tf.compat.v1.reset_default_graph()
+
+        self.sess = tf.compat.v1.Session()
+
+        self.DQNetwork = DQN(self.stateSize, self.actionSize, self.learningRate, name='DQNetwork')
+        self.TargetNetwork = DQN(self.stateSize, self.actionSize, self.learningRate, name='TargetNetwork')
+
+        self.memoryBuffer = PrioritisedMemory(self.memorySize)
+        self.pretrain()
+
+        self.state = []
+        self.trainingStepNo = 0
+
+        self.newEpisode = False
+        self.stepNo = 0
+        self.episodeNo = 0
+        self.saver = tf.compat.v1.train.Saver()
+
+        load = False
+        loadFromEpisodeNo = 6300
+        if load:
+            self.episodeNo = loadFromEpisodeNo
+            self.saver.restore(self.sess, "./allModels/model{}/models/model.ckpt".format(self.episodeNo))
+        else:
+            self.sess.run(tf.compat.v1.global_variables_initializer())
+        # self.sess.graph.finalize()
+        self.sess.run(self.update_target_graph())
+
+    # This function helps us to copy one set of variables to another
+    # In our case we use it when we want to copy the parameters of DQN to Target_network
+    # Thanks of the very good implementation of Arthur Juliani https://github.com/awjuliani
+    def update_target_graph(self):
+
+        # Get the parameters of our DQNNetwork
+        from_vars = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES, "DQNetwork")
+
+        # Get the parameters of our Target_network
+        to_vars = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES, "TargetNetwork")
+
+        op_holder = []
+
+        # Update our target_network parameters with DQNNetwork parameters
+        for from_var, to_var in zip(from_vars, to_vars):
+            op_holder.append(to_var.assign(from_var))
+        return op_holder
+
+    def pretrain(self):
+        for i in range(self.pretrainLength):
+            if i == 0:
+                state = self.game.get_state()
+
+            # pick a random movement and do it to populate the memory thing
+            # choice = random.randInt(self.actionSize)
+            # action = self.possibleActions[choice]
+            action = random.choice(self.possibleActions)
+            actionNo = np.argmax(action)
+            # now we need to get next state
+            reward = self.game.make_action(actionNo)
+            nextState = self.game.get_state()
+            self.newEpisode = False
+
+            if self.game.is_episode_finished():
+                reward = -100
+                self.memoryBuffer.store((state, action, reward, nextState, True))
+                self.game.new_episode()
+                state = self.game.get_state()
+                self.newEpisode = True
+            else:
+                self.memoryBuffer.store((state, action, reward, nextState, False))
+                state = nextState
+
+        print("pretrainingDone")
+
+    def train(self):
+
+        if self.trainingStepNo == 0:
+            self.state = self.game.get_state()
+
+        if self.newEpisode:
+            self.state = self.game.get_state()
+
+        if self.stepNo < self.maxSteps:
+            self.stepNo += 1
+            self.decayStep += 1
+            self.trainingStepNo += 1
+            self.tau += 1
+
+            # choose best action if not exploring choose random otherwise
+
+            epsilon = self.minEpsilon + (self.maxEpsilon - self.minEpsilon) * np.exp(
+                -self.decayRate * self.decayStep)
+
+            if np.random.rand() < epsilon:
+                choice = random.randint(1, len(self.possibleActions)) - 1
+                action = self.possibleActions[choice]
+
+            else:
+                QValues = self.sess.run(self.DQNetwork.output,
+                                        feed_dict={self.DQNetwork.inputs_: np.array([self.state])})
+                choice = np.argmax(QValues)
+                action = self.possibleActions[choice]
+
+            actionNo = np.argmax(action)
+            # now we need to get next state
+            reward = self.game.make_action(actionNo)
+
+            nextState = self.game.get_state()
+
+            if (reward > 0):
+                print("Hell YEAH, Reward {}".format(reward))
+            # if car is dead then finish episode
+            if self.game.is_episode_finished():
+                reward = -100
+                self.stepNo = self.maxSteps
+                print("DEAD!! Reward =  -100")
+
+            # print("Episode {} Step {} Action {} reward {} epsilon {} experiences stored {}"
+            #       .format(self.episodeNo, self.stepNo, actionNo, reward, epsilon, self.trainingStepNo))
+
+            # add the experience to the memory buffer
+            self.memoryBuffer.store((self.state, action, reward, nextState, self.game.is_episode_finished()))
+
+            self.state = nextState
+
+            # learning part
+            # first we are gonna need to grab a random batch of experiences from out memory
+            treeIndexes, batch, ISWeights = self.memoryBuffer.sample(self.batchSize)
+
+            statesFromBatch = np.array([exp[0][0] for exp in batch])
+            actionsFromBatch = np.array([exp[0][1] for exp in batch])
+            rewardsFromBatch = np.array([exp[0][2] for exp in batch])
+            nextStatesFromBatch = np.array([exp[0][3] for exp in batch])
+            carDieBooleansFromBatch = np.array([exp[0][4] for exp in batch])
+
+            targetQsFromBatch = []
+
+            # predict the q values of the next state for each experience in the batch
+            QValueOfNextStates = self.sess.run(self.TargetNetwork.output,
+                                               feed_dict={self.TargetNetwork.inputs_: nextStatesFromBatch})
+
+            for i in range(self.batchSize):
+                action = np.argmax(QValueOfNextStates[i])  # double DQN
+                terminalState = carDieBooleansFromBatch[i]
+                if terminalState:
+                    targetQsFromBatch.append(rewardsFromBatch[i])
+                else:
+                    # target = rewardsFromBatch[i] + self.gamma * np.max(QValueOfNextStates[i])
+                    target = rewardsFromBatch[i] + self.gamma * QValueOfNextStates[i][action]  # double DQN
+                    targetQsFromBatch.append(target)
+
+            targetsForBatch = np.array([t for t in targetQsFromBatch])
+
+            loss, _, absoluteErrors = self.sess.run(
+                [self.DQNetwork.loss, self.DQNetwork.optimizer, self.DQNetwork.absoluteError],
+                feed_dict={self.DQNetwork.inputs_: statesFromBatch,
+                           self.DQNetwork.actions_: actionsFromBatch,
+                           self.DQNetwork.targetQ: targetsForBatch,
+                           self.DQNetwork.ISWeights_: ISWeights})
+
+            # update priorities
+            self.memoryBuffer.batchUpdate(treeIndexes, absoluteErrors)
+
+        if self.stepNo >= self.maxSteps:
+            self.episodeNo += 1
+            self.stepNo = 0
+            self.newEpisode = True
+            self.game.new_episode()
+            if self.episodeNo >= self.totalTrainingEpisodes:
+                self.training = False
+            if self.episodeNo % 100 == 0:
+                directory = "./allModels/model{}".format(self.episodeNo)
+                if not os.path.exists(directory):
+                    os.makedirs(directory)
+                save_path = self.saver.save(self.sess,
+                                            "./allModels/model{}/models/model.ckpt".format(self.episodeNo))
+                print("Model Saved")
+        if self.tau > self.maxTau:
+            self.sess.run(self.update_target_graph())
+            self.tau = 0
+            print("Target Network Updated")
+
+    def test(self):
+
+        self.state = self.game.get_state()
+
+        QValues = self.sess.run(self.DQNetwork.output,
+                                feed_dict={self.DQNetwork.inputs_: np.array([self.state])})
+        choice = np.argmax(QValues)
+        action = self.possibleActions[choice]
+
+        actionNo = np.argmax(action)
+        # now we need to get next state
+        self.game.make_action(actionNo)
+
+        if self.game.is_episode_finished():
+            self.game.new_episode()
+
+
+class Memory:
+    def __init__(self, maxSize):
+        self.buffer = deque(maxlen=maxSize)
+
+    def add(self, experience):
+        self.buffer.append(experience)
+
+    def sample(self, batchSize):
+        buffer_size = len(self.buffer)
+        index = np.random.choice(np.arange(buffer_size),
+                                 size=batchSize,
+                                 replace=False)
+        return [self.buffer[i] for i in index]
+
+
+class DQN:
+    def __init__(self, stateSize, actionSize, learningRate, name):
+        self.stateSize = stateSize
+        self.actionSize = actionSize
+        self.learningRate = learningRate
+        self.name = name
+
+        with tf.compat.v1.variable_scope(self.name):
+            # the inputs describing the state
+            self.inputs_ = tf.compat.v1.placeholder(tf.float32, [None, *self.stateSize], name="inputs")
+
+            # the one hotted action that we took
+            # e.g. if we took the 3rd action action_ = [0,0,1,0,0,0,0]
+            self.actions_ = tf.compat.v1.placeholder(tf.float32, [None, self.actionSize], name="actions")
+
+            # the target = reward + the discounted maximum possible q value of hte next state
+            self.targetQ = tf.compat.v1.placeholder(tf.float32, [None], name="target")
+
+            self.ISWeights_ = tf.compat.v1.placeholder(tf.float32, [None, 1], name='ISWeights')
+
+            self.dense1 = tf.compat.v1.layers.dense(inputs=self.inputs_,
+                                          units=16,
+                                          activation=tf.nn.elu,
+                                          kernel_initializer=tf.compat.v1.keras.initializers.VarianceScaling(scale=1.0, mode="fan_avg", distribution="uniform"),
+                                          name="dense1")
+            self.dense2 = tf.compat.v1.layers.dense(inputs=self.dense1,
+                                          units=16,
+                                          activation=tf.nn.elu,
+                                          kernel_initializer=tf.compat.v1.keras.initializers.VarianceScaling(scale=1.0, mode="fan_avg", distribution="uniform"),
+                                          name="dense2")
+            self.output = tf.compat.v1.layers.dense(inputs=self.dense2,
+                                          units=self.actionSize,
+                                          kernel_initializer=tf.compat.v1.keras.initializers.VarianceScaling(scale=1.0, mode="fan_avg", distribution="uniform"),
+                                          activation=None,
+                                          name="outputs")
+
+            # by multiplying the output by the one hotted action space we only get the q value we desire
+            # all other values are 0, therefore taking the sum of these values gives us our qValue
+            self.QValue = tf.reduce_sum(input_tensor=tf.multiply(self.output, self.actions_))
+
+            self.absoluteError = abs(self.QValue - self.targetQ)  # used for prioritising experiences
+
+            # calculate the loss by using mean squared error
+            self.loss = tf.reduce_mean(input_tensor=self.ISWeights_ * tf.square(self.targetQ - self.QValue))
+
+            # use adam optimiser (its good shit)
+            self.optimizer = tf.compat.v1.train.AdamOptimizer(self.learningRate).minimize(self.loss)
+
+
+class DDQN:
+    def __init__(self, stateSize, actionSize, learningRate, name):
+        self.stateSize = stateSize
+        self.actionSize = actionSize
+        self.learningRate = learningRate
+        self.name = name
+
+        with tf.compat.v1.variable_scope(self.name):
+            # the inputs describing the state
+            self.inputs_ = tf.compat.v1.placeholder(tf.float32, [None, *self.stateSize], name="inputs")
+
+            # the one hotted action that we took
+            # e.g. if we took the 3rd action action_ = [0,0,1,0,0,0,0]
+            self.actions_ = tf.compat.v1.placeholder(tf.float32, [None, self.actionSize], name="actions")
+
+            # the target = reward + the discounted maximum possible q value of hte next state
+            self.targetQ = tf.compat.v1.placeholder(tf.float32, [None], name="target")
+
+            self.ISWeights_ = tf.compat.v1.placeholder(tf.float32, [None, 1], name='ISWeights')
+
+            self.dense1 = tf.compat.v1.layers.dense(inputs=self.inputs_,
+                                          units=16,
+                                          activation=tf.nn.elu,
+                                          kernel_initializer=tf.compat.v1.keras.initializers.VarianceScaling(scale=1.0, mode="fan_avg", distribution="uniform"),
+                                          name="dense1")
+
+            ## Here we separate into two streams
+            # The one that calculate V(s) which is the value of the input state
+            # in other words how good this state is
+
+            self.valueLayer = tf.compat.v1.layers.dense(inputs=self.dense1,
+                                              units=16,
+                                              activation=tf.nn.elu,
+                                              kernel_initializer=tf.compat.v1.keras.initializers.VarianceScaling(scale=1.0, mode="fan_avg", distribution="uniform"),
+                                              name="valueLayer")
+
+            self.value = tf.compat.v1.layers.dense(inputs=self.valueLayer,
+                                         units=1,
+                                         activation=None,
+                                         kernel_initializer=tf.compat.v1.keras.initializers.VarianceScaling(scale=1.0, mode="fan_avg", distribution="uniform"),
+                                         name="value")
+
+            # The one that calculate A(s,a)
+            # which is the advantage of taking each action in this given state
+            self.advantageLayer = tf.compat.v1.layers.dense(inputs=self.dense1,
+                                                  units=16,
+                                                  activation=tf.nn.elu,
+                                                  kernel_initializer=tf.compat.v1.keras.initializers.VarianceScaling(scale=1.0, mode="fan_avg", distribution="uniform"),
+                                                  name="advantageLayer")
+
+            self.advantage = tf.compat.v1.layers.dense(inputs=self.advantageLayer,
+                                             units=self.actionSize,
+                                             activation=None,
+                                             kernel_initializer=tf.compat.v1.keras.initializers.VarianceScaling(scale=1.0, mode="fan_avg", distribution="uniform"),
+                                             name="advantages")
+
+            # Aggregating layer
+            # Q(s,a) = V(s) + (A(s,a) - 1/|A| * sum A(s,a'))
+            # output  = value of the state + the advantage of taking the given action relative to other actions
+            self.output = self.value + tf.subtract(self.advantage,
+                                                   tf.reduce_mean(input_tensor=self.advantage, axis=1, keepdims=True))
+
+            # by multiplying the output by the one hotted action space we only get the q value we desire
+            # all other values are 0, therefore taking the sum of these values gives us our qValue
+            self.QValue = tf.reduce_sum(input_tensor=tf.multiply(self.output, self.actions_))
+
+            self.absoluteError = abs(self.QValue - self.targetQ)  # used for prioritising experiences
+
+            # calculate the loss by using mean squared error
+            self.loss = tf.reduce_mean(input_tensor=self.ISWeights_ * tf.square(self.targetQ - self.QValue))
+
+            # use adam optimiser (its good shit)
+            self.optimizer = tf.compat.v1.train.AdamOptimizer(self.learningRate).minimize(self.loss)
+
+
+class PrioritisedMemory:
+    # some cheeky hyperparameters
+    e = 0.01
+    a = 0.06
+    b = 0.04
+    bIncreaseRate = 0.001
+    errorsClippedAt = 1.0
+
+    def __init__(self, capacity):
+        self.sumTree = SumTree(capacity)
+        self.capacity = capacity
+
+    def store(self, experience):
+        """ when an experience is first added to memory it has the highest priority
+            so each experience is run through at least once
+        """
+        # get max priority
+        maxPriority = np.max(self.sumTree.tree[self.sumTree.indexOfFirstData:])
+
+        # if the max is 0 then this means that this is the first element
+        # so might as well give it the highest priority possible
+        if maxPriority == 0:
+            maxPriority = self.errorsClippedAt
+
+        self.sumTree.add(maxPriority, experience)
+
+    def sample(self, n):
+        batch = []
+        batchIndexes = np.zeros([n], dtype=np.int32)
+        batchISWeights = np.zeros([n, 1], dtype=np.float32)
+
+        # so we divide the priority space up into n different priority segments
+        totalPriority = self.sumTree.total_priority()
+        prioritySegmentSize = totalPriority / n
+
+        # also we need to increase b with every value to anneal it towards 1
+        self.b += self.bIncreaseRate
+        self.b = min(self.b, 1)
+
+        # ok very nice now in order to normalize all the weights in order to ensure they are all within 0 and 1
+        # we are going to need to get the maximum weight and divide all weights by that
+
+        # the largest weight will have the lowest priority and thus the lowest probability of being chosen
+        minPriority = np.min(np.maximum(self.sumTree.tree[self.sumTree.indexOfFirstData:], self.e))
+        minProbability = minPriority / self.sumTree.total_priority()
+
+        # formula
+        maxWeight = (minProbability * n) ** (-self.b)
+        for i in range(n):
+            # get the upper and lower bounds of the segment
+            segmentMin = prioritySegmentSize * i
+            segmentMax = segmentMin + prioritySegmentSize
+
+            value = np.random.uniform(segmentMin, segmentMax)
+
+            treeIndex, priority, data = self.sumTree.getLeaf(value)
+
+            samplingProbability = priority / totalPriority
+
+            #  IS = (1/N * 1/P(i))**b /max wi == (N*P(i))**-b  /max wi
+
+            batchISWeights[i, 0] = np.power(n * samplingProbability, -self.b) / maxWeight
+
+            batchIndexes[i] = treeIndex
+            experience = [data]
+            batch.append(experience)
+
+        return batchIndexes, batch, batchISWeights
+
+    def batchUpdate(self, treeIndexes, absoluteErrors):
+        absoluteErrors += self.e  # do this to avoid 0 values
+        clippedErrors = np.minimum(absoluteErrors, self.errorsClippedAt)
+
+        priorities = np.power(clippedErrors, self.a)
+        for treeIndex, priority in zip(treeIndexes, priorities):
+            self.sumTree.update(treeIndex, priority)
+
+
+class SumTree:
+    def __init__(self, capacity):
+        self.capacity = capacity
+        self.size = 2 * capacity - 1
+        self.tree = np.zeros(self.size)
+        self.data = np.zeros(capacity, dtype=object)
+        self.dataPointer = 0
+        self.indexOfFirstData = capacity - 1
+
+    """
+    adds a new element to the sub tree (or overwrites an old one) and updates all effected nodes 
+    """
+
+    def add(self, priority, data):
+        treeIndex = self.indexOfFirstData + self.dataPointer
+
+        # overwrite data
+
+        self.data[self.dataPointer] = data
+        self.update(treeIndex, priority)
+        self.dataPointer += 1
+        self.dataPointer = self.dataPointer % self.capacity
+
+    """
+    updates the priority of the indexed leaf as well as updating the value of all effected
+    elements in the sum tree
+    """
+
+    def update(self, index, priority):
+        change = priority - self.tree[index]
+        self.tree[index] = priority
+
+        while index != 0:
+            # set index to parent
+            index = (index - 1) // 2
+            self.tree[index] += change
+
+    def getLeaf(self, value):
+        parent = 0
+        LChild = 1
+        RChild = 2
+
+        while LChild < self.size:
+            if self.tree[LChild] >= value:
+                parent = LChild
+            else:
+                value -= self.tree[LChild]
+                parent = RChild
+
+            LChild = 2 * parent + 1
+            RChild = 2 * parent + 2
+
+        treeIndex = parent
+        dataIndex = parent - self.indexOfFirstData
+
+        return treeIndex, self.tree[treeIndex], self.data[dataIndex]
+
+    def total_priority(self):
+        return self.tree[0]  # Returns the root node
+
+
+"""
+
+a class inheriting from the pyglet window class which controls the game window and acts as the main class of the program
+"""
+
+
+class MyWindow(pyglet.window.Window):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.set_minimum_size(400, 300)
+
+        # set background color
+        backgroundColor = [0, 0, 0, 255]
+        backgroundColor = [i / 255 for i in backgroundColor]
+        glClearColor(*backgroundColor)
+        # load background image
+        self.game = Game()
+        self.ai = QLearning(self.game)
+
+    """
+    called when a key is hit
+    """
+
+    def on_key_press(self, symbol, modifiers):
+        pass
+        # if symbol == key.RIGHT:
+        #     self.car.turningRight = True
+        #
+        # if symbol == key.LEFT:
+        #     self.car.turningLeft = True
+        #
+        # if symbol == key.UP:
+        #     self.car.accelerating = True
+        #
+        # if symbol == key.DOWN:
+        #     self.car.reversing = True
+
+    """
+    called when a key is released
+    """
+
+    def on_close(self):
+        self.ai.sess.close()
+
+    def on_key_release(self, symbol, modifiers):
+        pass
+        # if symbol == key.RIGHT:
+        #     self.car.turningRight = False
+        #
+        # if symbol == key.LEFT:
+        #     self.car.turningLeft = False
+        #
+        # if symbol == key.UP:
+        #     self.car.accelerating = False
+        #
+        # if symbol == key.DOWN:
+        #     self.car.reversing = False
+        #
+        # if symbol == key.SPACE:
+        #     self.ai.training = not self.ai.training
+
+    def on_mouse_press(self, x, y, button, modifiers):
+        # # print(x,y)
+        # if self.firstClick:
+        #     self.clickPos = [x, y]
+        # else:
+        #     # print("self.walls.append(Wall({}, {}, {}, {}))".format(self.clickPos[0],
+        #     #                                                        displayHeight - self.clickPos[1],
+        #     #                                                        x, displayHeight - y))
+        #
+        #     # self.gates.append(RewardGate(self.clickPos[0], self.clickPos[1], x, y))
+        #
+        # self.firstClick = not self.firstClick
+        pass
+
+    """
+    called every frame
+    """
+
+    def on_draw(self):
+        self.game.render()
+        #
+        # glPushMatrix()
+        #
+        # glTranslatef(-1, -1, 0)
+        # glScalef(1 / (displayWidth / 2), 1 / (displayHeight / 2), 1)
+        #
+        # self.clear()
+        # self.trackSprite.draw()
+        # self.car.show()
+        #
+        # for w in self.walls:
+        #     w.draw()
+        # # for g in self.gates:
+        # #     g.draw()
+        # vision = self.car.getState()
+        #
+        # for i in range(len(vision)):
+        #
+        #     label = pyglet.text.Label("{}:  {}".format(i,vision[i]),
+        #                               font_name='Times New Roman',
+        #                               font_size=24,
+        #                               x=10, y=50*i+250,
+        #                               anchor_x='left', anchor_y='center')
+        #     label.draw()
+        # glPopMatrix()
+
+    """
+    called when window resized
+    """
+
+    def on_resize(self, width, height):
+        glViewport(0, 0, width, height)
+
+    """
+    called every frame
+    """
+
+    def update(self, dt):
+        for i in range(5):
+
+            if self.ai.training:
+                self.ai.train()
+            else:
+                self.ai.test()
+                return
+        # self.car.update()
+
+
+if __name__ == "__main__":
+    window = MyWindow(displayWidth, displayHeight, "AI Learns to Drive", resizable=False)
+    pyglet.clock.schedule_interval(window.update, 1 / frameRate)
+    pyglet.app.run()
diff --git a/Converted_to_tensorflow_2/models/checkpoint b/Converted_to_tensorflow_2/models/checkpoint
new file mode 100644
index 0000000..febd7d5
--- /dev/null
+++ b/Converted_to_tensorflow_2/models/checkpoint
@@ -0,0 +1,2 @@
+model_checkpoint_path: "model.ckpt"
+all_model_checkpoint_paths: "model.ckpt"
diff --git a/Converted_to_tensorflow_2/models/model.ckpt.data-00000-of-00001 b/Converted_to_tensorflow_2/models/model.ckpt.data-00000-of-00001
new file mode 100644
index 0000000..9e686fe
Binary files /dev/null and b/Converted_to_tensorflow_2/models/model.ckpt.data-00000-of-00001 differ
diff --git a/Converted_to_tensorflow_2/models/model.ckpt.data-00000-of-00001.tempstate7083288698834630731 b/Converted_to_tensorflow_2/models/model.ckpt.data-00000-of-00001.tempstate7083288698834630731
new file mode 100644
index 0000000..40480dc
Binary files /dev/null and b/Converted_to_tensorflow_2/models/model.ckpt.data-00000-of-00001.tempstate7083288698834630731 differ
diff --git a/Converted_to_tensorflow_2/models/model.ckpt.index b/Converted_to_tensorflow_2/models/model.ckpt.index
new file mode 100644
index 0000000..5d8cf68
Binary files /dev/null and b/Converted_to_tensorflow_2/models/model.ckpt.index differ
diff --git a/Converted_to_tensorflow_2/models/model.ckpt.meta b/Converted_to_tensorflow_2/models/model.ckpt.meta
new file mode 100644
index 0000000..0ce1d29
Binary files /dev/null and b/Converted_to_tensorflow_2/models/model.ckpt.meta differ
diff --git a/Converted_to_tensorflow_2/requirements.txt b/Converted_to_tensorflow_2/requirements.txt
new file mode 100644
index 0000000..de034ee
--- /dev/null
+++ b/Converted_to_tensorflow_2/requirements.txt
@@ -0,0 +1,2 @@
+pyglet
+pygame
\ No newline at end of file
diff --git a/README.md b/README.md
index bb7bf69..e18f66b 100644
--- a/README.md
+++ b/README.md
@@ -1 +1,7 @@
-# Car-QLearning
\ No newline at end of file
+# Car-QLearning
+
+`pip install -r requirements.txt`
+`python main.py`
+
+Note: you need to install tensorflow before this, check this guide: <https://gist.github.com/off99555/fd59f204c02b5f704287227d67744d6f>
+
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..de034ee
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,2 @@
+pyglet
+pygame
\ No newline at end of file