From 47c6c948deec8fc5d53871ce080b4b2f11693b67 Mon Sep 17 00:00:00 2001 From: tommymarkstein Date: Wed, 9 Aug 2023 11:09:46 +0200 Subject: [PATCH 01/11] fix: remove dangling nodes --- neat/genome.py | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/neat/genome.py b/neat/genome.py index 2d652650..7b8a594e 100644 --- a/neat/genome.py +++ b/neat/genome.py @@ -299,6 +299,14 @@ def mutate(self, config): for ng in self.nodes.values(): ng.mutate(config) + self.remove_all_deactivated_connections(config) + while True: + if((not self.remove_all_dangling_nodes(config)) + and + (not self.remove_all_dangling_connections(config)) + ): + break + def mutate_add_node(self, config): if not self.connections: if config.check_structural_mutation_surer(): @@ -319,6 +327,40 @@ def mutate_add_node(self, config): i, o = conn_to_split.key self.add_connection(config, i, new_node_id, 1.0, True) self.add_connection(config, new_node_id, o, conn_to_split.weight, True) + #if connection.init_attributes(config) + + def remove_all_dangling_nodes(self, config): + list_of_all_currend_nodes = list(self.nodes) + input_nodes = config.input_keys + output_nodes = config.output_keys + list_of_all_non_output_input_nodes = [node for node in list_of_all_currend_nodes if (node not in input_nodes and node not in output_nodes)] + from_nodes, to_nodes = [], [] + for (p1, p2) in list(self.connections): + from_nodes.append(p1) + to_nodes.append(p2) + found_at_least_one_dangling_node = False + for node_number in list_of_all_non_output_input_nodes: + if ((node_number not in from_nodes) or (node_number not in to_nodes)): + del self.nodes[node_number] + found_at_least_one_dangling_node = True + return found_at_least_one_dangling_node + + def remove_all_deactivated_connections(self, config): + if not config.enabled_mutate_rate > 0: + for k in list(self.connections): + if (not self.connections[k].enabled): + del self.connections[k] + #del self.connections[conn_to_split.key] + + def remove_all_dangling_connections(self,config): + list_of_all_currend_nodes = list(self.nodes) + list_of_all_currend_nodes.extend(config.input_keys) # input_nodes + found_at_least_one_dangling_connection = False + for (f,t) in list(self.connections): + if ((f not in list_of_all_currend_nodes) or (t not in list_of_all_currend_nodes)): + del self.connections[(f,t)] + found_at_least_one_dangling_connection = True + return found_at_least_one_dangling_connection def add_connection(self, config, input_key, output_key, weight, enabled): # TODO: Add further validation of this connection addition? From e946dae46d601e11b92cb76e6f090e51a7925c54 Mon Sep 17 00:00:00 2001 From: markste-in Date: Mon, 27 May 2024 23:14:23 +0200 Subject: [PATCH 02/11] removing dangling nodes after n steps of not improving --- .gitignore | 2 ++ examples/openai-lander/config | 1 + examples/openai-lander/evolve.py | 11 +++++++++-- neat/config.py | 3 ++- neat/genome.py | 1 + neat/reproduction.py | 7 +++++++ 6 files changed, 22 insertions(+), 3 deletions(-) diff --git a/.gitignore b/.gitignore index abd5ce18..92b1e8db 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,5 @@ +.venv +.DS_Store .coveralls.yml docs/_build/** _build/** diff --git a/examples/openai-lander/config b/examples/openai-lander/config index 8f72c467..fe67d4b6 100644 --- a/examples/openai-lander/config +++ b/examples/openai-lander/config @@ -7,6 +7,7 @@ pop_size = 150 fitness_criterion = max fitness_threshold = 1000.0 reset_on_extinction = 0 +trim_dangling_after_n_generations_wo_improvment = 3 [LanderGenome] num_inputs = 8 diff --git a/examples/openai-lander/evolve.py b/examples/openai-lander/evolve.py index cbde8238..77d91bee 100644 --- a/examples/openai-lander/evolve.py +++ b/examples/openai-lander/evolve.py @@ -41,6 +41,9 @@ def mutate(self, config): super().mutate(config) self.discount += random.gauss(0.0, 0.05) self.discount = max(0.01, min(0.99, self.discount)) + + def trim_network(self, config): + super().trim_network(config) def distance(self, other, config): dist = super().distance(other, config) @@ -200,6 +203,10 @@ def run(): for g in best_genomes: best_networks.append(neat.nn.FeedForwardNetwork.create(g, config)) + for n, g in enumerate(best_genomes): + name = 'network-{0}'.format(n) + visualize.draw_net(config, g, view=False, filename=name + f"_{n}-net") + solved = True best_scores = [] for k in range(100): @@ -242,8 +249,8 @@ def run(): with open(name + '.pickle', 'wb') as f: pickle.dump(g, f) - visualize.draw_net(config, g, view=False, filename=name + "-net.gv") - visualize.draw_net(config, g, view=False, filename=name + "-net-pruned.gv", prune_unused=True) + visualize.draw_net(config, g, view=False, filename=name + "-net-solved.gv") + visualize.draw_net(config, g, view=False, filename=name + "-net-pruned-solved.gv", prune_unused=True) break except KeyboardInterrupt: diff --git a/neat/config.py b/neat/config.py index b616364a..629f7be6 100644 --- a/neat/config.py +++ b/neat/config.py @@ -125,7 +125,8 @@ class Config(object): ConfigParameter('fitness_criterion', str), ConfigParameter('fitness_threshold', float), ConfigParameter('reset_on_extinction', bool), - ConfigParameter('no_fitness_termination', bool, False)] + ConfigParameter('no_fitness_termination', bool, False), + ConfigParameter('trim_dangling_after_n_generations_wo_improvment', int)] def __init__(self, genome_type, reproduction_type, species_set_type, stagnation_type, filename, config_information=None): # Check that the provided types have the required methods. diff --git a/neat/genome.py b/neat/genome.py index 7b8a594e..a43122a7 100644 --- a/neat/genome.py +++ b/neat/genome.py @@ -299,6 +299,7 @@ def mutate(self, config): for ng in self.nodes.values(): ng.mutate(config) + def trim_network(self, config): self.remove_all_deactivated_connections(config) while True: if((not self.remove_all_dangling_nodes(config)) diff --git a/neat/reproduction.py b/neat/reproduction.py index 6f6c904e..1c7c48d7 100644 --- a/neat/reproduction.py +++ b/neat/reproduction.py @@ -96,6 +96,13 @@ def reproduce(self, config, species, pop_size, generation): # interfering with the shared fitness scheme. all_fitnesses = [] remaining_species = [] + + if config.trim_dangling_after_n_generations_wo_improvment >= 0: + for specie in species.species.values(): + if generation - specie.last_improved > config.trim_dangling_after_n_generations_wo_improvment: + for member in specie.members.values(): + member.trim_network(config.genome_config) + for stag_sid, stag_s, stagnant in self.stagnation.update(species, generation): if stagnant: self.reporters.species_stagnant(stag_sid, stag_s) From 0b7bc90b5f042e38f53c2e8fc7cc82688d14a4ac Mon Sep 17 00:00:00 2001 From: markste-in Date: Mon, 27 May 2024 23:32:27 +0200 Subject: [PATCH 03/11] set default value (inactive) for new trimming parameter --- neat/config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/neat/config.py b/neat/config.py index 629f7be6..b589b99e 100644 --- a/neat/config.py +++ b/neat/config.py @@ -126,7 +126,7 @@ class Config(object): ConfigParameter('fitness_threshold', float), ConfigParameter('reset_on_extinction', bool), ConfigParameter('no_fitness_termination', bool, False), - ConfigParameter('trim_dangling_after_n_generations_wo_improvment', int)] + ConfigParameter('trim_dangling_after_n_generations_wo_improvment', int, -1)] def __init__(self, genome_type, reproduction_type, species_set_type, stagnation_type, filename, config_information=None): # Check that the provided types have the required methods. From 91c7dfabc9b47459cdd34994717ffc021ad3410f Mon Sep 17 00:00:00 2001 From: markste-in Date: Mon, 27 May 2024 23:52:20 +0200 Subject: [PATCH 04/11] removed unnecessary change --- examples/openai-lander/evolve.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/examples/openai-lander/evolve.py b/examples/openai-lander/evolve.py index 77d91bee..200ec8a8 100644 --- a/examples/openai-lander/evolve.py +++ b/examples/openai-lander/evolve.py @@ -41,9 +41,6 @@ def mutate(self, config): super().mutate(config) self.discount += random.gauss(0.0, 0.05) self.discount = max(0.01, min(0.99, self.discount)) - - def trim_network(self, config): - super().trim_network(config) def distance(self, other, config): dist = super().distance(other, config) From 6f07e2123805110488da8bf838dc3dbb2a728abc Mon Sep 17 00:00:00 2001 From: markste-in Date: Tue, 28 May 2024 00:16:13 +0200 Subject: [PATCH 05/11] removed debug statements --- examples/openai-lander/evolve.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/openai-lander/evolve.py b/examples/openai-lander/evolve.py index 200ec8a8..e1167b93 100644 --- a/examples/openai-lander/evolve.py +++ b/examples/openai-lander/evolve.py @@ -20,8 +20,8 @@ env = gym.make('LunarLander-v2') -print("action space: {0!r}".format(env.action_space)) -print("observation space: {0!r}".format(env.observation_space)) +# print("action space: {0!r}".format(env.action_space)) +# print("observation space: {0!r}".format(env.observation_space)) class LanderGenome(neat.DefaultGenome): From bb5090f752b5399b23ee21ec357e50775085e9d5 Mon Sep 17 00:00:00 2001 From: markste-in Date: Tue, 28 May 2024 21:46:29 +0200 Subject: [PATCH 06/11] make sure an output of the network cannot act as an input to another node --- examples/openai-lander/evolve.py | 2 +- neat/genome.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/openai-lander/evolve.py b/examples/openai-lander/evolve.py index e1167b93..2c22ced4 100644 --- a/examples/openai-lander/evolve.py +++ b/examples/openai-lander/evolve.py @@ -168,7 +168,7 @@ def run(): pop.add_reporter(stats) pop.add_reporter(neat.StdOutReporter(True)) # Checkpoint every 25 generations or 900 seconds. - pop.add_reporter(neat.Checkpointer(25, 900)) + pop.add_reporter(neat.Checkpointer(200, 3600)) # Run until the winner from a generation is able to solve the environment # or the user interrupts the process. diff --git a/neat/genome.py b/neat/genome.py index a43122a7..61bd6b8f 100644 --- a/neat/genome.py +++ b/neat/genome.py @@ -384,7 +384,7 @@ def mutate_add_connection(self, config): possible_outputs = list(self.nodes) out_node = choice(possible_outputs) - possible_inputs = possible_outputs + config.input_keys + possible_inputs = list(set(possible_outputs + config.input_keys) - set(config.output_keys)) in_node = choice(possible_inputs) # Don't duplicate connections. From 8a12f69f7c289de449a4c5bda882e8fb502684b3 Mon Sep 17 00:00:00 2001 From: markste-in Date: Tue, 4 Jun 2024 22:15:25 +0200 Subject: [PATCH 07/11] working solution for lunarlander --- examples/openai-lander/config | 47 +++--- examples/openai-lander/evolve.py | 230 +++++++++++----------------- examples/openai-lander/visualize.py | 2 +- 3 files changed, 121 insertions(+), 158 deletions(-) diff --git a/examples/openai-lander/config b/examples/openai-lander/config index fe67d4b6..5906ded7 100644 --- a/examples/openai-lander/config +++ b/examples/openai-lander/config @@ -5,28 +5,36 @@ pop_size = 150 # Note: the fitness threshold will never be reached because # we are controlling the termination ourselves based on simulation performance. fitness_criterion = max -fitness_threshold = 1000.0 +fitness_threshold = 99999 reset_on_extinction = 0 -trim_dangling_after_n_generations_wo_improvment = 3 +trim_dangling_after_n_generations_wo_improvment = 30 -[LanderGenome] +[DefaultGenome] num_inputs = 8 num_hidden = 0 num_outputs = 4 -initial_connection = partial 0.5 +initial_connection = partial_nodirect 0.5 feed_forward = True + compatibility_disjoint_coefficient = 1.0 -compatibility_weight_coefficient = 1.0 -conn_add_prob = 0.15 -conn_delete_prob = 0.1 -node_add_prob = 0.15 -node_delete_prob = 0.1 -activation_default = clamped -activation_options = clamped +compatibility_weight_coefficient = 0.6 + +conn_add_prob = 0.2 +conn_delete_prob = 0.2 + +node_add_prob = 0.2 +node_delete_prob = 0.2 +single_structural_mutation = False +structural_mutation_surer = True + +activation_default = tanh +activation_options = tanh activation_mutate_rate = 0.0 + aggregation_default = sum aggregation_options = sum aggregation_mutate_rate = 0.0 + bias_init_mean = 0.0 bias_init_stdev = 1.0 bias_replace_rate = 0.02 @@ -34,11 +42,12 @@ bias_mutate_rate = 0.8 bias_mutate_power = 0.4 bias_max_value = 30.0 bias_min_value = -30.0 + response_init_mean = 1.0 response_init_stdev = 0.0 response_replace_rate = 0.0 -response_mutate_rate = 0.1 -response_mutate_power = 0.01 +response_mutate_rate = 0.0 +response_mutate_power = 0.0 response_max_value = 30.0 response_min_value = -30.0 @@ -47,8 +56,8 @@ weight_min_value = -30 weight_init_mean = 0.0 weight_init_stdev = 1.0 weight_mutate_rate = 0.8 -weight_replace_rate = 0.02 -weight_mutate_power = 0.4 +weight_replace_rate = 0.1 +weight_mutate_power = 0.5 enabled_default = True enabled_mutate_rate = 0.01 @@ -56,11 +65,11 @@ enabled_mutate_rate = 0.01 compatibility_threshold = 3.0 [DefaultStagnation] -species_fitness_func = mean -max_stagnation = 15 -species_elitism = 4 +species_fitness_func = max +max_stagnation = 50 +species_elitism = 2 [DefaultReproduction] -elitism = 2 +elitism = 4 survival_threshold = 0.2 diff --git a/examples/openai-lander/evolve.py b/examples/openai-lander/evolve.py index 2c22ced4..dafe136a 100644 --- a/examples/openai-lander/evolve.py +++ b/examples/openai-lander/evolve.py @@ -2,7 +2,7 @@ # LunarLander-v2 environment (https://gym.openai.com/envs/LunarLander-v2). # Sample run here: https://gym.openai.com/evaluations/eval_FbKq5MxAS9GlvB7W6ioJkg - +import sys import multiprocessing import os import pickle @@ -16,6 +16,11 @@ import neat import visualize + + +from warnings import filterwarnings +filterwarnings('ignore', category=DeprecationWarning) + NUM_CORES = multiprocessing.cpu_count() env = gym.make('LunarLander-v2') @@ -23,56 +28,73 @@ # print("action space: {0!r}".format(env.action_space)) # print("observation space: {0!r}".format(env.observation_space)) +# Colored printing functions for strings that use universal ANSI escape sequences. +# fail: bold red, pass: bold green, warn: bold yellow, +# info: bold blue, bold: bold white -class LanderGenome(neat.DefaultGenome): - def __init__(self, key): - super().__init__(key) - self.discount = None +class ColorPrint: - def configure_new(self, config): - super().configure_new(config) - self.discount = 0.01 + 0.98 * random.random() + @staticmethod + def print_fail(message, end = '\n'): + sys.stderr.write('\x1b[1;31m' + str(message).strip() + '\x1b[0m' + end) - def configure_crossover(self, genome1, genome2, config): - super().configure_crossover(genome1, genome2, config) - self.discount = random.choice((genome1.discount, genome2.discount)) + @staticmethod + def print_pass(message, end = '\n'): + sys.stdout.write('\x1b[1;32m' + str(message).strip() + '\x1b[0m' + end) - def mutate(self, config): - super().mutate(config) - self.discount += random.gauss(0.0, 0.05) - self.discount = max(0.01, min(0.99, self.discount)) + @staticmethod + def print_warn(message, end = '\n'): + sys.stderr.write('\x1b[1;33m' + str(message).strip() + '\x1b[0m' + end) - def distance(self, other, config): - dist = super().distance(other, config) - disc_diff = abs(self.discount - other.discount) - return dist + disc_diff + @staticmethod + def print_info(message, end = '\n'): + sys.stdout.write('\x1b[1;34m' + str(message).strip() + '\x1b[0m' + end) - def __str__(self): - return f"Reward discount: {self.discount}\n{super().__str__()}" + @staticmethod + def print_bold(message, end = '\n'): + sys.stdout.write('\x1b[1;37m' + str(message).strip() + '\x1b[0m' + end) +def compute_action_discrete(net, observation): + activation = net.activate(observation) + # Gym expects discrete actions (0, 1, 2, 3), so we need to convert the output + action = np.argmax(activation) + return action, activation[action] -def compute_fitness(genome, net, episodes, min_reward, max_reward): - m = int(round(np.log(0.01) / np.log(genome.discount))) - discount_function = [genome.discount ** (m - i) for i in range(m + 1)] - reward_error = [] - for score, data in episodes: - # Compute normalized discounted reward. - dr = np.convolve(data[:, -1], discount_function)[m:] - dr = 2 * (dr - min_reward) / (max_reward - min_reward) - 1.0 - dr = np.clip(dr, -1.0, 1.0) +def compute_action_box(net, observation): + action = net.activate(observation) + # compute the norm of the action array + norm = np.linalg.norm(action) + return action, norm - for row, dr in zip(data, dr): - observation = row[:8] - action = int(row[8]) - output = net.activate(observation) - reward_error.append(float((output[action] - dr) ** 2)) - return reward_error +def compute_fitness(genome, net, min_reward, max_reward, num_episodes = 3,penalize_inactivity = False ): + + if isinstance(env.action_space, gym.spaces.Discrete): + compute_action = compute_action_discrete + else: + compute_action = compute_action_box + + total_reward = 0.0 + for n in range(num_episodes): + observation, observation_init_info = env.reset() + + while True: + action, norm = compute_action(net, observation) + + if norm < 0.6 and penalize_inactivity: + total_reward -= 1 + observation, reward, terminated, done, info = env.step(action) + + total_reward += reward + if terminated or done: + break + + return total_reward / num_episodes class PooledErrorCompute(object): - def __init__(self, num_workers): + def __init__(self, num_workers, penalize_inactivity = False): self.num_workers = num_workers self.test_episodes = [] self.generation = 0 @@ -82,37 +104,8 @@ def __init__(self, num_workers): self.episode_score = [] self.episode_length = [] - - def simulate(self, nets): - scores = [] - for genome, net in nets: - observation_init_vals, observation_init_info = env.reset() - step = 0 - data = [] - while 1: - step += 1 - if step < 200 and random.random() < 0.2: - action = env.action_space.sample() - else: - output = net.activate(observation_init_vals) - action = np.argmax(output) - - # Note: done has been deprecated. - observation, reward, terminated, done, info = env.step(action) - data.append(np.hstack((observation, action, reward))) - - if terminated: - break - - data = np.array(data) - score = np.sum(data[:, -1]) - self.episode_score.append(score) - scores.append(score) - self.episode_length.append(step) - - self.test_episodes.append((score, data)) - - print("Score range [{:.3f}, {:.3f}]".format(min(scores), max(scores))) + self.penalize_inactivity = penalize_inactivity + self.num_episodes = 3 def evaluate_genomes(self, genomes, config): self.generation += 1 @@ -125,41 +118,31 @@ def evaluate_genomes(self, genomes, config): print("network creation time {0}".format(time.time() - t0)) t0 = time.time() - # Periodically generate a new set of episodes for comparison. - if 1 == self.generation % 10: - self.test_episodes = self.test_episodes[-300:] - self.simulate(nets) - print("simulation run time {0}".format(time.time() - t0)) - t0 = time.time() - - # Assign a composite fitness to each genome; genomes can make progress either - # by improving their total reward or by making more accurate reward estimates. - print("Evaluating {0} test episodes".format(len(self.test_episodes))) if self.num_workers < 2: for genome, net in nets: - reward_error = compute_fitness(genome, net, self.test_episodes, self.min_reward, self.max_reward) - genome.fitness = -np.sum(reward_error) / len(self.test_episodes) + reward = compute_fitness(genome, net, self.min_reward, self.max_reward, self.num_episodes, self.penalize_inactivity) + genome.fitness = reward else: with multiprocessing.Pool(self.num_workers) as pool: jobs = [] for genome, net in nets: jobs.append(pool.apply_async(compute_fitness, - (genome, net, self.test_episodes, - self.min_reward, self.max_reward))) + (genome, net, + self.min_reward, self.max_reward, self.num_episodes, self.penalize_inactivity))) for job, (genome_id, genome) in zip(jobs, genomes): - reward_error = job.get(timeout=None) - genome.fitness = -np.sum(reward_error) / len(self.test_episodes) + reward = job.get(timeout=None) + genome.fitness = reward print("final fitness compute time {0}\n".format(time.time() - t0)) -def run(): +def run(penalize_inactivity=False): # Load the config file, which is assumed to live in # the same directory as this script. local_dir = os.path.dirname(__file__) config_path = os.path.join(local_dir, 'config') - config = neat.Config(LanderGenome, neat.DefaultReproduction, + config = neat.Config(neat.DefaultGenome, neat.DefaultReproduction, neat.DefaultSpeciesSet, neat.DefaultStagnation, config_path) @@ -168,25 +151,22 @@ def run(): pop.add_reporter(stats) pop.add_reporter(neat.StdOutReporter(True)) # Checkpoint every 25 generations or 900 seconds. - pop.add_reporter(neat.Checkpointer(200, 3600)) + pop.add_reporter(neat.Checkpointer(1000, 3600)) # Run until the winner from a generation is able to solve the environment # or the user interrupts the process. - ec = PooledErrorCompute(NUM_CORES) + ec = PooledErrorCompute(NUM_CORES, penalize_inactivity=penalize_inactivity) while 1: try: - gen_best = pop.run(ec.evaluate_genomes, 5) - - # print(gen_best) + best_gen = pop.run(ec.evaluate_genomes, 10) + best_net = neat.nn.FeedForwardNetwork.create(best_gen, config) + timestr = time.strftime("%Y%m%d-%H%M%S") + # print(best_gen) - visualize.plot_stats(stats, ylog=False, view=False, filename="fitness.svg") + visualize.plot_stats(stats, ylog=False, view=False, filename=f"output/fitness.svg") - plt.plot(ec.episode_score, 'g-', label='score') - plt.plot(ec.episode_length, 'b-', label='length') - plt.grid() - plt.legend(loc='best') - plt.savefig("scores.svg") - plt.close() + mfs = sum(stats.get_fitness_stat(max)[-5:]) / 5.0 + print("Average max fitness over last 5 generations: {0}".format(mfs)) mfs = sum(stats.get_fitness_mean()[-5:]) / 5.0 print("Average mean fitness over last 5 generations: {0}".format(mfs)) @@ -201,53 +181,27 @@ def run(): best_networks.append(neat.nn.FeedForwardNetwork.create(g, config)) for n, g in enumerate(best_genomes): - name = 'network-{0}'.format(n) - visualize.draw_net(config, g, view=False, filename=name + f"_{n}-net") + visualize.draw_net(config, g, view=False, filename=f"output/network_gen{ec.generation}_n_{n}_key{g.key}_fitness{int(g.fitness)}-{timestr}") - solved = True + solved = False best_scores = [] - for k in range(100): - observation_init_vals, observation_init_info = env.reset() - score = 0 - step = 0 - while 1: - step += 1 - # Use the total reward estimates from all five networks to - # determine the best action given the current state. - votes = np.zeros((4,)) - for n in best_networks: - output = n.activate(observation_init_vals) - votes[np.argmax(output)] += 1 - - best_action = np.argmax(votes) - # Note: done has been deprecated. - observation, reward, terminated, done, info = env.step(best_action) - score += reward - env.render() - if terminated: - break - - ec.episode_score.append(score) - ec.episode_length.append(step) - - best_scores.append(score) - avg_score = sum(best_scores) / len(best_scores) - print(k, score, avg_score) - if avg_score < 200: - solved = False - break - - if solved: - print("Solved.") + avg_score = 0 + ColorPrint.print_warn("### Checking current best model over 20x rounds ###") + for k in range(20): + best_scores.append(compute_fitness(best_gen, best_net, ec.min_reward, ec.max_reward, ec.num_episodes, ec.penalize_inactivity)) + avg_score = sum(best_scores) / len(best_scores) + ColorPrint.print_warn(avg_score) + + if avg_score > 200: + ColorPrint.print_pass("Solved.") # Save the winners. for n, g in enumerate(best_genomes): - name = 'winner-{0}'.format(n) + name = f'winner-{n}' with open(name + '.pickle', 'wb') as f: pickle.dump(g, f) - visualize.draw_net(config, g, view=False, filename=name + "-net-solved.gv") - visualize.draw_net(config, g, view=False, filename=name + "-net-pruned-solved.gv", prune_unused=True) + visualize.draw_net(config, g, view=False, filename=f"output/{name}-_gen{ec.generation}_key{g.key}_fitness{int(g.fitness)}_net_solved-{timestr}") break except KeyboardInterrupt: @@ -258,4 +212,4 @@ def run(): if __name__ == '__main__': - run() + run(penalize_inactivity=True) diff --git a/examples/openai-lander/visualize.py b/examples/openai-lander/visualize.py index 1af74aa3..5615ffd8 100644 --- a/examples/openai-lander/visualize.py +++ b/examples/openai-lander/visualize.py @@ -126,6 +126,6 @@ def draw_net(config, genome, view=False, filename=None, node_names=None, show_di width = str(0.1 + abs(cg.weight / 5.0)) dot.edge(a, b, _attributes={'style': style, 'color': color, 'penwidth': width}) - dot.render(filename, view=view) + dot.render(filename, view=view, cleanup=True) return dot From 26fd10437296adcf33459d965dc100785ead99e6 Mon Sep 17 00:00:00 2001 From: markste-in Date: Wed, 5 Jun 2024 08:58:32 +0200 Subject: [PATCH 08/11] make sure the output dir is existing --- examples/openai-lander/evolve.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/openai-lander/evolve.py b/examples/openai-lander/evolve.py index dafe136a..82dd6756 100644 --- a/examples/openai-lander/evolve.py +++ b/examples/openai-lander/evolve.py @@ -152,7 +152,7 @@ def run(penalize_inactivity=False): pop.add_reporter(neat.StdOutReporter(True)) # Checkpoint every 25 generations or 900 seconds. pop.add_reporter(neat.Checkpointer(1000, 3600)) - + os.makedirs("output", exist_ok=True) # Run until the winner from a generation is able to solve the environment # or the user interrupts the process. ec = PooledErrorCompute(NUM_CORES, penalize_inactivity=penalize_inactivity) @@ -198,7 +198,7 @@ def run(penalize_inactivity=False): # Save the winners. for n, g in enumerate(best_genomes): name = f'winner-{n}' - with open(name + '.pickle', 'wb') as f: + with open("output/" + name + '.pickle', 'wb') as f: pickle.dump(g, f) visualize.draw_net(config, g, view=False, filename=f"output/{name}-_gen{ec.generation}_key{g.key}_fitness{int(g.fitness)}_net_solved-{timestr}") From 957e43aee7a914d8b1a962e17d65362e9cb7a79f Mon Sep 17 00:00:00 2001 From: markste-in Date: Sat, 8 Jun 2024 22:39:34 -0400 Subject: [PATCH 09/11] some refactoring and cont. config file --- examples/openai-lander/config | 4 +- .../openai-lander/config-lander-continuous | 74 +++++++++++++++++++ examples/openai-lander/evolve.py | 20 ++--- neat/config.py | 2 +- neat/reproduction.py | 8 +- 5 files changed, 93 insertions(+), 15 deletions(-) create mode 100644 examples/openai-lander/config-lander-continuous diff --git a/examples/openai-lander/config b/examples/openai-lander/config index 5906ded7..089b5d32 100644 --- a/examples/openai-lander/config +++ b/examples/openai-lander/config @@ -5,9 +5,8 @@ pop_size = 150 # Note: the fitness threshold will never be reached because # we are controlling the termination ourselves based on simulation performance. fitness_criterion = max -fitness_threshold = 99999 +fitness_threshold = 1950 reset_on_extinction = 0 -trim_dangling_after_n_generations_wo_improvment = 30 [DefaultGenome] num_inputs = 8 @@ -72,4 +71,5 @@ species_elitism = 2 [DefaultReproduction] elitism = 4 survival_threshold = 0.2 +trim_dangling_after_n_generations_wo_improvment = 30 diff --git a/examples/openai-lander/config-lander-continuous b/examples/openai-lander/config-lander-continuous new file mode 100644 index 00000000..d856d0f6 --- /dev/null +++ b/examples/openai-lander/config-lander-continuous @@ -0,0 +1,74 @@ +# neat-python configuration for the LunarLander-v2 environment on OpenAI Gym + +[NEAT] +pop_size = 150 +# Note: the fitness threshold will never be reached because +# we are controlling the termination ourselves based on simulation performance. +fitness_criterion = max +fitness_threshold = 1950 +reset_on_extinction = 0 + +[DefaultGenome] +num_inputs = 8 +num_hidden = 0 +num_outputs = 2 +initial_connection = partial_direct 0.5 +feed_forward = True + +compatibility_disjoint_coefficient = 1.0 +compatibility_weight_coefficient = 0.6 + +conn_add_prob = 0.2 +conn_delete_prob = 0.2 + +node_add_prob = 0.2 +node_delete_prob = 0.2 +single_structural_mutation = False +structural_mutation_surer = True + +activation_default = tanh +activation_options = tanh +activation_mutate_rate = 0.0 + +aggregation_default = sum +aggregation_options = sum +aggregation_mutate_rate = 0.0 + +bias_init_mean = 0.0 +bias_init_stdev = 1.0 +bias_replace_rate = 0.02 +bias_mutate_rate = 0.8 +bias_mutate_power = 0.4 +bias_max_value = 30.0 +bias_min_value = -30.0 + +response_init_mean = 1.0 +response_init_stdev = 0.0 +response_replace_rate = 0.0 +response_mutate_rate = 0.0 +response_mutate_power = 0.0 +response_max_value = 30.0 +response_min_value = -30.0 + +weight_max_value = 30 +weight_min_value = -30 +weight_init_mean = 0.0 +weight_init_stdev = 1.0 +weight_mutate_rate = 0.8 +weight_replace_rate = 0.1 +weight_mutate_power = 0.5 +enabled_default = True +enabled_mutate_rate = 0.0 + +[DefaultSpeciesSet] +compatibility_threshold = 3.0 + +[DefaultStagnation] +species_fitness_func = mean +max_stagnation = 50 +species_elitism = 2 + +[DefaultReproduction] +elitism = 2 +survival_threshold = 0.2 +trim_dangling_after_n_generations_wo_improvment = 40 diff --git a/examples/openai-lander/evolve.py b/examples/openai-lander/evolve.py index 82dd6756..4ad53802 100644 --- a/examples/openai-lander/evolve.py +++ b/examples/openai-lander/evolve.py @@ -25,6 +25,7 @@ env = gym.make('LunarLander-v2') +# env = gym.make('LunarLanderContinuous-v2') # print("action space: {0!r}".format(env.action_space)) # print("observation space: {0!r}".format(env.observation_space)) @@ -56,7 +57,7 @@ def print_bold(message, end = '\n'): def compute_action_discrete(net, observation): activation = net.activate(observation) - # Gym expects discrete actions (0, 1, 2, 3), so we need to convert the output + # Gym expects discrete actions (0, 1, 2, 3), so we need to convert the results action = np.argmax(activation) return action, activation[action] @@ -137,11 +138,11 @@ def evaluate_genomes(self, genomes, config): print("final fitness compute time {0}\n".format(time.time() - t0)) -def run(penalize_inactivity=False): +def run(config_file = "config", penalize_inactivity=False): # Load the config file, which is assumed to live in # the same directory as this script. local_dir = os.path.dirname(__file__) - config_path = os.path.join(local_dir, 'config') + config_path = os.path.join(local_dir, config_file) config = neat.Config(neat.DefaultGenome, neat.DefaultReproduction, neat.DefaultSpeciesSet, neat.DefaultStagnation, config_path) @@ -152,7 +153,7 @@ def run(penalize_inactivity=False): pop.add_reporter(neat.StdOutReporter(True)) # Checkpoint every 25 generations or 900 seconds. pop.add_reporter(neat.Checkpointer(1000, 3600)) - os.makedirs("output", exist_ok=True) + os.makedirs("results", exist_ok=True) # Run until the winner from a generation is able to solve the environment # or the user interrupts the process. ec = PooledErrorCompute(NUM_CORES, penalize_inactivity=penalize_inactivity) @@ -163,7 +164,7 @@ def run(penalize_inactivity=False): timestr = time.strftime("%Y%m%d-%H%M%S") # print(best_gen) - visualize.plot_stats(stats, ylog=False, view=False, filename=f"output/fitness.svg") + visualize.plot_stats(stats, ylog=False, view=False, filename=f"results/fitness.svg") mfs = sum(stats.get_fitness_stat(max)[-5:]) / 5.0 print("Average max fitness over last 5 generations: {0}".format(mfs)) @@ -181,7 +182,7 @@ def run(penalize_inactivity=False): best_networks.append(neat.nn.FeedForwardNetwork.create(g, config)) for n, g in enumerate(best_genomes): - visualize.draw_net(config, g, view=False, filename=f"output/network_gen{ec.generation}_n_{n}_key{g.key}_fitness{int(g.fitness)}-{timestr}") + visualize.draw_net(config, g, view=False, filename=f"results/network_gen{ec.generation}_n_{n}_key{g.key}_fitness{int(g.fitness)}-{timestr}") solved = False best_scores = [] @@ -198,10 +199,10 @@ def run(penalize_inactivity=False): # Save the winners. for n, g in enumerate(best_genomes): name = f'winner-{n}' - with open("output/" + name + '.pickle', 'wb') as f: + with open("results/" + name + '.pickle', 'wb') as f: pickle.dump(g, f) - visualize.draw_net(config, g, view=False, filename=f"output/{name}-_gen{ec.generation}_key{g.key}_fitness{int(g.fitness)}_net_solved-{timestr}") + visualize.draw_net(config, g, view=False, filename=f"results/{name}-_gen{ec.generation}_key{g.key}_fitness{int(g.fitness)}_net_solved-{timestr}") break except KeyboardInterrupt: @@ -212,4 +213,5 @@ def run(penalize_inactivity=False): if __name__ == '__main__': - run(penalize_inactivity=True) +# run(config_file="config-lander-continuous",penalize_inactivity=False) + run(config_file="config",penalize_inactivity=False) diff --git a/neat/config.py b/neat/config.py index b589b99e..054d6b41 100644 --- a/neat/config.py +++ b/neat/config.py @@ -126,7 +126,7 @@ class Config(object): ConfigParameter('fitness_threshold', float), ConfigParameter('reset_on_extinction', bool), ConfigParameter('no_fitness_termination', bool, False), - ConfigParameter('trim_dangling_after_n_generations_wo_improvment', int, -1)] + ] def __init__(self, genome_type, reproduction_type, species_set_type, stagnation_type, filename, config_information=None): # Check that the provided types have the required methods. diff --git a/neat/reproduction.py b/neat/reproduction.py index 1c7c48d7..dd02fc2f 100644 --- a/neat/reproduction.py +++ b/neat/reproduction.py @@ -28,7 +28,9 @@ def parse_config(cls, param_dict): return DefaultClassConfig(param_dict, [ConfigParameter('elitism', int, 0), ConfigParameter('survival_threshold', float, 0.2), - ConfigParameter('min_species_size', int, 1)]) + ConfigParameter('min_species_size', int, 1), + ConfigParameter('trim_dangling_after_n_generations_wo_improvment', int, -1) + ]) def __init__(self, config, reporters, stagnation): # pylint: disable=super-init-not-called @@ -97,9 +99,9 @@ def reproduce(self, config, species, pop_size, generation): all_fitnesses = [] remaining_species = [] - if config.trim_dangling_after_n_generations_wo_improvment >= 0: + if self.reproduction_config.trim_dangling_after_n_generations_wo_improvment >= 0: for specie in species.species.values(): - if generation - specie.last_improved > config.trim_dangling_after_n_generations_wo_improvment: + if generation - specie.last_improved > self.reproduction_config.trim_dangling_after_n_generations_wo_improvment: for member in specie.members.values(): member.trim_network(config.genome_config) From 72f04f216b65af1ce849a1d1f5a1591a830f1491 Mon Sep 17 00:00:00 2001 From: markste-in Date: Sun, 9 Jun 2024 10:10:23 -0400 Subject: [PATCH 10/11] removed obsolete line --- examples/openai-lander/evolve.py | 42 +++++++++++++++++++------------- 1 file changed, 25 insertions(+), 17 deletions(-) diff --git a/examples/openai-lander/evolve.py b/examples/openai-lander/evolve.py index 4ad53802..411a0778 100644 --- a/examples/openai-lander/evolve.py +++ b/examples/openai-lander/evolve.py @@ -23,8 +23,6 @@ NUM_CORES = multiprocessing.cpu_count() -env = gym.make('LunarLander-v2') - # env = gym.make('LunarLanderContinuous-v2') # print("action space: {0!r}".format(env.action_space)) # print("observation space: {0!r}".format(env.observation_space)) @@ -69,8 +67,8 @@ def compute_action_box(net, observation): return action, norm -def compute_fitness(genome, net, min_reward, max_reward, num_episodes = 3,penalize_inactivity = False ): - +def compute_fitness(genome, net, min_reward, max_reward, env_name, num_episodes = 3,penalize_inactivity = False ): + env = gym.make(env_name) if isinstance(env.action_space, gym.spaces.Discrete): compute_action = compute_action_discrete else: @@ -90,19 +88,19 @@ def compute_fitness(genome, net, min_reward, max_reward, num_episodes = 3,penali total_reward += reward if terminated or done: break - + env.close() return total_reward / num_episodes class PooledErrorCompute(object): - def __init__(self, num_workers, penalize_inactivity = False): + def __init__(self, num_workers, env_name, penalize_inactivity = False): self.num_workers = num_workers self.test_episodes = [] self.generation = 0 self.min_reward = -200 self.max_reward = 200 - + self.env_name = env_name self.episode_score = [] self.episode_length = [] self.penalize_inactivity = penalize_inactivity @@ -121,15 +119,27 @@ def evaluate_genomes(self, genomes, config): if self.num_workers < 2: for genome, net in nets: - reward = compute_fitness(genome, net, self.min_reward, self.max_reward, self.num_episodes, self.penalize_inactivity) + reward = compute_fitness(genome, + net, + self.min_reward, + self.max_reward, + env_name=self.env_name, + num_episodes=self.num_episodes, + penalize_inactivity=self.penalize_inactivity) genome.fitness = reward else: with multiprocessing.Pool(self.num_workers) as pool: jobs = [] for genome, net in nets: - jobs.append(pool.apply_async(compute_fitness, - (genome, net, - self.min_reward, self.max_reward, self.num_episodes, self.penalize_inactivity))) + jobs.append(pool.apply_async(compute_fitness, (genome, + net, + self.min_reward, + self.max_reward, + self.env_name, + self.num_episodes, + self.penalize_inactivity) + ) + ) for job, (genome_id, genome) in zip(jobs, genomes): reward = job.get(timeout=None) @@ -138,7 +148,7 @@ def evaluate_genomes(self, genomes, config): print("final fitness compute time {0}\n".format(time.time() - t0)) -def run(config_file = "config", penalize_inactivity=False): +def run(env_name, config_file = "config", penalize_inactivity=False): # Load the config file, which is assumed to live in # the same directory as this script. local_dir = os.path.dirname(__file__) @@ -156,7 +166,7 @@ def run(config_file = "config", penalize_inactivity=False): os.makedirs("results", exist_ok=True) # Run until the winner from a generation is able to solve the environment # or the user interrupts the process. - ec = PooledErrorCompute(NUM_CORES, penalize_inactivity=penalize_inactivity) + ec = PooledErrorCompute(NUM_CORES, env_name=env_name, penalize_inactivity=penalize_inactivity) while 1: try: best_gen = pop.run(ec.evaluate_genomes, 10) @@ -189,7 +199,7 @@ def run(config_file = "config", penalize_inactivity=False): avg_score = 0 ColorPrint.print_warn("### Checking current best model over 20x rounds ###") for k in range(20): - best_scores.append(compute_fitness(best_gen, best_net, ec.min_reward, ec.max_reward, ec.num_episodes, ec.penalize_inactivity)) + best_scores.append(compute_fitness(best_gen, best_net, ec.min_reward, ec.max_reward, env_name, ec.num_episodes, ec.penalize_inactivity)) avg_score = sum(best_scores) / len(best_scores) ColorPrint.print_warn(avg_score) @@ -209,9 +219,7 @@ def run(config_file = "config", penalize_inactivity=False): print("User break.") break - env.close() - if __name__ == '__main__': # run(config_file="config-lander-continuous",penalize_inactivity=False) - run(config_file="config",penalize_inactivity=False) + run(env_name = 'LunarLander-v2', config_file="config",penalize_inactivity=False) From 0175ab1deeb8173865bf5727ec12a79d6dbafd8d Mon Sep 17 00:00:00 2001 From: markste-in Date: Sun, 9 Jun 2024 10:19:33 -0400 Subject: [PATCH 11/11] little fix for continuous lunarlander --- examples/openai-lander/config | 2 +- examples/openai-lander/evolve.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/examples/openai-lander/config b/examples/openai-lander/config index 089b5d32..c0fdb68d 100644 --- a/examples/openai-lander/config +++ b/examples/openai-lander/config @@ -64,7 +64,7 @@ enabled_mutate_rate = 0.01 compatibility_threshold = 3.0 [DefaultStagnation] -species_fitness_func = max +species_fitness_func = mean max_stagnation = 50 species_elitism = 2 diff --git a/examples/openai-lander/evolve.py b/examples/openai-lander/evolve.py index 411a0778..cad4934d 100644 --- a/examples/openai-lander/evolve.py +++ b/examples/openai-lander/evolve.py @@ -221,5 +221,5 @@ def run(env_name, config_file = "config", penalize_inactivity=False): if __name__ == '__main__': -# run(config_file="config-lander-continuous",penalize_inactivity=False) - run(env_name = 'LunarLander-v2', config_file="config",penalize_inactivity=False) + run(env_name = 'LunarLanderContinuous-v2', config_file="config-lander-continuous",penalize_inactivity=False) +# run(env_name = 'LunarLander-v2', config_file="config",penalize_inactivity=False)