nnDriver/driver.py at master · Pilleow/nnDriver · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
import math
import random
import numpy as np

from car import Car
from track import Track


class Driver:
    def __init__(self):
        self.input_size = 20
        self.hidden_size = 40
        self.output_size = 2

        self.W1 = np.random.randn(self.hidden_size, self.input_size)
        self.b1 = np.random.randn(self.hidden_size)
        self.W2 = np.random.randn(self.hidden_size, self.hidden_size)
        self.b2 = np.random.randn(self.hidden_size)
        self.W3 = np.random.randn(self.output_size, self.hidden_size)
        self.b3 = np.random.randn(self.output_size)

        self.visiblePoints = []
        self.fitness = 0

    def copyweights(self, other):
        self.W1 = other.W1.copy()
        self.b1 = other.b1.copy()
        self.W2 = other.W2.copy()
        self.b2 = other.b2.copy()
        self.W3 = other.W3.copy()
        self.b3 = other.b3.copy()

    def think(self, car: Car, track: Track):
        """
        Given a Car instance and a Track instance, sense the environment
        and compute control commands using the neural network.
        Returns:
            steering_command (float): a delta to apply to car.steering_angle
            acceleration_command (float): a new acceleration to set in the car
        """

        self.visiblePoints = []
        nearest_point_index = track.get_nearest(car.position)[1]
        nearest_dist_sqr = (car.position - track.points[nearest_point_index][0]).dist_sqr()

        _in = [
            car.speed / car.max_speed,
            car.steering_angle / car.steering_angle_limit[1],
            nearest_dist_sqr / track.trackWidth / track.trackWidth
        ]

        for i in range(17):
            p = track.get_next_point_after(nearest_point_index, i)[0]
            self.visiblePoints.append(p)
            to_track = p - car.position
            desired_angle = math.atan2(to_track.x, to_track.y)
            angle_diff = desired_angle - car.angle
            while angle_diff > math.pi:
                angle_diff -= 2 * math.pi
            while angle_diff < -math.pi:
                angle_diff += 2 * math.pi
            _in.append(angle_diff / math.pi)

        x = np.array(_in)
        hidden1 = np.tanh(self.W1.dot(x) + self.b1)
        hidden2 = np.tanh(self.W2.dot(hidden1) + self.b2)
        output = np.tanh(self.W3.dot(hidden2) + self.b3)

        steering_command = output[0] * (abs(car.steering_angle_limit[1]) + abs(car.steering_angle_limit[0])) / 2
        acceleration_command = output[1]

        return steering_command, acceleration_command

    def drive(self, car: Car, track: Track):
        """
        Use the neural network to decide on control actions and apply them to the car.
        """
        steering_command, acceleration_command = self.think(car, track)
        car.steer(steering_command)
        if acceleration_command >= 0:
            car.accel_set(acceleration_command)
        else:
            car.brake(abs(acceleration_command))

    def modifyFitness(self, car: Car, time_not_moving_penalty: float = 2):
        self.fitness = car.distance_travelled - time_not_moving_penalty * car.raw_fitness

    def get_color(self):
        """
        Generates an RGB color based on the driver’s neural network parameters.
        This version boosts contrast so that the colors are more vibrant.
        Returns:
            tuple: (R, G, B) with each component as an integer in [0, 255]
        """
        params = np.concatenate([
            self.W1.flatten(),
            self.b1.flatten(),
            self.W2.flatten(),
            self.b2.flatten(),
            self.W3.flatten(),
            self.b3.flatten()
        ])
        normalized = (np.tanh(params) + 1) / 2
        n = len(normalized)
        segment_length = n // 3

        r_values = normalized[:segment_length]
        g_values = normalized[segment_length:2 * segment_length]
        b_values = normalized[2 * segment_length:]
        r_avg = np.mean(r_values)
        g_avg = np.mean(g_values)
        b_avg = np.mean(b_values)

        def boost(x, contrast=10.0):
            boosted = (x - 0.5) * contrast + 0.5
            return max(0.0, min(1.0, boosted))

        r_boosted = boost(r_avg)
        g_boosted = boost(g_avg)
        b_boosted = boost(b_avg)
        red = int(r_boosted * 255)
        green = int(g_boosted * 255)
        blue = int(b_boosted * 255)
        return red, green, blue

    def mutate(self, mutation_rate=0.001, mutation_std=0.5):
        """
        Mutates the neural network parameters of the driver.

        For each parameter (gene) in W1, b1, W2, and b2, with probability
        mutation_rate, add a random value sampled from a normal distribution
        with mean 0 and standard deviation mutation_std.
        """
        for i in range(self.W1.shape[0]):
            for j in range(self.W1.shape[1]):
                if np.random.rand() < mutation_rate:
                    self.W1[i, j] += np.random.randn() * mutation_std
        for i in range(self.b1.shape[0]):
            if np.random.rand() < mutation_rate:
                self.b1[i] += np.random.randn() * mutation_std
        for i in range(self.W2.shape[0]):
            for j in range(self.W2.shape[1]):
                if np.random.rand() < mutation_rate:
                    self.W2[i, j] += np.random.randn() * mutation_std
        for i in range(self.b2.shape[0]):
            if np.random.rand() < mutation_rate:
                self.b2[i] += np.random.randn() * mutation_std
        for i in range(self.W3.shape[0]):
            for j in range(self.W3.shape[1]):
                if np.random.rand() < mutation_rate:
                    self.W3[i, j] += np.random.randn() * mutation_std
        for i in range(self.b3.shape[0]):
            if np.random.rand() < mutation_rate:
                self.b3[i] += np.random.randn() * mutation_std

    @staticmethod
    def crossover(parent1, parent2):
        """
        Performs uniform crossover between two parent drivers to create a child driver.

        For each gene (parameter) in the neural network, randomly selects the corresponding
        gene from one of the two parents.

        Args:
            parent1 (Driver): The first parent driver.
            parent2 (Driver): The second parent driver.

        Returns:
            Driver: A new driver whose neural network parameters are a mix of the two parents.
        """
        child = Driver()

        child.W1 = np.array([
            [(parent1.W1[i, j] + parent2.W1[i, j]) / 2
             for j in range(parent1.W1.shape[1])]
            for i in range(parent1.W1.shape[0])
        ])
        child.b1 = np.array([
            (parent1.b1[i] + parent2.b1[i]) / 2
            for i in range(parent1.b1.shape[0])
        ])
        child.W2 = np.array([
            [(parent1.W2[i, j] + parent2.W2[i, j]) / 2
             for j in range(parent1.W2.shape[1])]
            for i in range(parent1.W2.shape[0])
        ])
        child.b2 = np.array([
            (parent1.b2[i] + parent2.b2[i]) / 2
            for i in range(parent1.b2.shape[0])
        ])
        child.W3 = np.array([
            [(parent1.W3[i, j] + parent2.W3[i, j]) / 2
             for j in range(parent1.W3.shape[1])]
            for i in range(parent1.W3.shape[0])
        ])
        child.b3 = np.array([
            (parent1.b3[i] + parent2.b3[i]) / 2
            for i in range(parent1.b3.shape[0])
        ])

        return child

    @staticmethod
    def tournamentSelection(drivers: list, p: int = 20):
        """
        Selects and returns one driver from the given list using tournament selection.

        Args:
            drivers (list[Driver]): The list of drivers to select from.
            p (int): The tournament size.

        Returns:
            Driver: The driver with the highest fitness in the tournament.
        """
        tournament_size = min(p, len(drivers))
        tournament = random.sample(drivers, tournament_size)
        winner = max(tournament, key=lambda d: d.fitness)
        return winner

    @staticmethod
    def crossoverAndMutate(drivers: list, percent_of_new_drivers: float = 0):
        new_drivers = []
        count = len(drivers)
        best = Driver.getBestDriver(drivers)

        while len(new_drivers) < count * (1 - percent_of_new_drivers):
            p1 = Driver.tournamentSelection(drivers, 10)
            p2 = Driver.tournamentSelection(drivers, 50)
            for i in range(4):
                if len(new_drivers) >= count * (1 - percent_of_new_drivers):
                    break
                child = Driver.crossover(p1, p2)
                child.mutate()
                new_drivers.append(child)

        while len(new_drivers) < count:
            new_drivers.append(Driver())

        while len(new_drivers) >= count:
            new_drivers.pop()

        new_drivers[0] = best
        return new_drivers

    @staticmethod
    def getFitnessInfo(drivers: list):
        best = -float('inf')
        avg = 0
        worst = float('inf')
        for d in drivers:
            if d.fitness > best:
                best = d.fitness
            if d.fitness < worst:
                worst = d.fitness
            avg += d.fitness
        avg /= len(drivers)
        return best, avg, worst

    @staticmethod
    def getBestDriver(drivers: list):
        return max(drivers, key=lambda d: d.fitness)