diff --git a/.gitignore b/.gitignore index abd5ce18..92b1e8db 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,5 @@ +.venv +.DS_Store .coveralls.yml docs/_build/** _build/** diff --git a/examples/openai-lander/config b/examples/openai-lander/config index 8f72c467..c0fdb68d 100644 --- a/examples/openai-lander/config +++ b/examples/openai-lander/config @@ -5,27 +5,35 @@ pop_size = 150 # Note: the fitness threshold will never be reached because # we are controlling the termination ourselves based on simulation performance. fitness_criterion = max -fitness_threshold = 1000.0 +fitness_threshold = 1950 reset_on_extinction = 0 -[LanderGenome] +[DefaultGenome] num_inputs = 8 num_hidden = 0 num_outputs = 4 -initial_connection = partial 0.5 +initial_connection = partial_nodirect 0.5 feed_forward = True + compatibility_disjoint_coefficient = 1.0 -compatibility_weight_coefficient = 1.0 -conn_add_prob = 0.15 -conn_delete_prob = 0.1 -node_add_prob = 0.15 -node_delete_prob = 0.1 -activation_default = clamped -activation_options = clamped +compatibility_weight_coefficient = 0.6 + +conn_add_prob = 0.2 +conn_delete_prob = 0.2 + +node_add_prob = 0.2 +node_delete_prob = 0.2 +single_structural_mutation = False +structural_mutation_surer = True + +activation_default = tanh +activation_options = tanh activation_mutate_rate = 0.0 + aggregation_default = sum aggregation_options = sum aggregation_mutate_rate = 0.0 + bias_init_mean = 0.0 bias_init_stdev = 1.0 bias_replace_rate = 0.02 @@ -33,11 +41,12 @@ bias_mutate_rate = 0.8 bias_mutate_power = 0.4 bias_max_value = 30.0 bias_min_value = -30.0 + response_init_mean = 1.0 response_init_stdev = 0.0 response_replace_rate = 0.0 -response_mutate_rate = 0.1 -response_mutate_power = 0.01 +response_mutate_rate = 0.0 +response_mutate_power = 0.0 response_max_value = 30.0 response_min_value = -30.0 @@ -46,8 +55,8 @@ weight_min_value = -30 weight_init_mean = 0.0 weight_init_stdev = 1.0 weight_mutate_rate = 0.8 -weight_replace_rate = 0.02 -weight_mutate_power = 0.4 +weight_replace_rate = 0.1 +weight_mutate_power = 0.5 enabled_default = True enabled_mutate_rate = 0.01 @@ -56,10 +65,11 @@ compatibility_threshold = 3.0 [DefaultStagnation] species_fitness_func = mean -max_stagnation = 15 -species_elitism = 4 +max_stagnation = 50 +species_elitism = 2 [DefaultReproduction] -elitism = 2 +elitism = 4 survival_threshold = 0.2 +trim_dangling_after_n_generations_wo_improvment = 30 diff --git a/examples/openai-lander/config-lander-continuous b/examples/openai-lander/config-lander-continuous new file mode 100644 index 00000000..d856d0f6 --- /dev/null +++ b/examples/openai-lander/config-lander-continuous @@ -0,0 +1,74 @@ +# neat-python configuration for the LunarLander-v2 environment on OpenAI Gym + +[NEAT] +pop_size = 150 +# Note: the fitness threshold will never be reached because +# we are controlling the termination ourselves based on simulation performance. +fitness_criterion = max +fitness_threshold = 1950 +reset_on_extinction = 0 + +[DefaultGenome] +num_inputs = 8 +num_hidden = 0 +num_outputs = 2 +initial_connection = partial_direct 0.5 +feed_forward = True + +compatibility_disjoint_coefficient = 1.0 +compatibility_weight_coefficient = 0.6 + +conn_add_prob = 0.2 +conn_delete_prob = 0.2 + +node_add_prob = 0.2 +node_delete_prob = 0.2 +single_structural_mutation = False +structural_mutation_surer = True + +activation_default = tanh +activation_options = tanh +activation_mutate_rate = 0.0 + +aggregation_default = sum +aggregation_options = sum +aggregation_mutate_rate = 0.0 + +bias_init_mean = 0.0 +bias_init_stdev = 1.0 +bias_replace_rate = 0.02 +bias_mutate_rate = 0.8 +bias_mutate_power = 0.4 +bias_max_value = 30.0 +bias_min_value = -30.0 + +response_init_mean = 1.0 +response_init_stdev = 0.0 +response_replace_rate = 0.0 +response_mutate_rate = 0.0 +response_mutate_power = 0.0 +response_max_value = 30.0 +response_min_value = -30.0 + +weight_max_value = 30 +weight_min_value = -30 +weight_init_mean = 0.0 +weight_init_stdev = 1.0 +weight_mutate_rate = 0.8 +weight_replace_rate = 0.1 +weight_mutate_power = 0.5 +enabled_default = True +enabled_mutate_rate = 0.0 + +[DefaultSpeciesSet] +compatibility_threshold = 3.0 + +[DefaultStagnation] +species_fitness_func = mean +max_stagnation = 50 +species_elitism = 2 + +[DefaultReproduction] +elitism = 2 +survival_threshold = 0.2 +trim_dangling_after_n_generations_wo_improvment = 40 diff --git a/examples/openai-lander/evolve.py b/examples/openai-lander/evolve.py index cbde8238..cad4934d 100644 --- a/examples/openai-lander/evolve.py +++ b/examples/openai-lander/evolve.py @@ -2,7 +2,7 @@ # LunarLander-v2 environment (https://gym.openai.com/envs/LunarLander-v2). # Sample run here: https://gym.openai.com/evaluations/eval_FbKq5MxAS9GlvB7W6ioJkg - +import sys import multiprocessing import os import pickle @@ -16,103 +16,95 @@ import neat import visualize + + +from warnings import filterwarnings +filterwarnings('ignore', category=DeprecationWarning) + NUM_CORES = multiprocessing.cpu_count() -env = gym.make('LunarLander-v2') +# env = gym.make('LunarLanderContinuous-v2') +# print("action space: {0!r}".format(env.action_space)) +# print("observation space: {0!r}".format(env.observation_space)) + +# Colored printing functions for strings that use universal ANSI escape sequences. +# fail: bold red, pass: bold green, warn: bold yellow, +# info: bold blue, bold: bold white -print("action space: {0!r}".format(env.action_space)) -print("observation space: {0!r}".format(env.observation_space)) +class ColorPrint: + @staticmethod + def print_fail(message, end = '\n'): + sys.stderr.write('\x1b[1;31m' + str(message).strip() + '\x1b[0m' + end) -class LanderGenome(neat.DefaultGenome): - def __init__(self, key): - super().__init__(key) - self.discount = None + @staticmethod + def print_pass(message, end = '\n'): + sys.stdout.write('\x1b[1;32m' + str(message).strip() + '\x1b[0m' + end) - def configure_new(self, config): - super().configure_new(config) - self.discount = 0.01 + 0.98 * random.random() + @staticmethod + def print_warn(message, end = '\n'): + sys.stderr.write('\x1b[1;33m' + str(message).strip() + '\x1b[0m' + end) - def configure_crossover(self, genome1, genome2, config): - super().configure_crossover(genome1, genome2, config) - self.discount = random.choice((genome1.discount, genome2.discount)) + @staticmethod + def print_info(message, end = '\n'): + sys.stdout.write('\x1b[1;34m' + str(message).strip() + '\x1b[0m' + end) - def mutate(self, config): - super().mutate(config) - self.discount += random.gauss(0.0, 0.05) - self.discount = max(0.01, min(0.99, self.discount)) + @staticmethod + def print_bold(message, end = '\n'): + sys.stdout.write('\x1b[1;37m' + str(message).strip() + '\x1b[0m' + end) - def distance(self, other, config): - dist = super().distance(other, config) - disc_diff = abs(self.discount - other.discount) - return dist + disc_diff +def compute_action_discrete(net, observation): + activation = net.activate(observation) + # Gym expects discrete actions (0, 1, 2, 3), so we need to convert the results + action = np.argmax(activation) + return action, activation[action] - def __str__(self): - return f"Reward discount: {self.discount}\n{super().__str__()}" +def compute_action_box(net, observation): + action = net.activate(observation) + # compute the norm of the action array + norm = np.linalg.norm(action) + return action, norm -def compute_fitness(genome, net, episodes, min_reward, max_reward): - m = int(round(np.log(0.01) / np.log(genome.discount))) - discount_function = [genome.discount ** (m - i) for i in range(m + 1)] - reward_error = [] - for score, data in episodes: - # Compute normalized discounted reward. - dr = np.convolve(data[:, -1], discount_function)[m:] - dr = 2 * (dr - min_reward) / (max_reward - min_reward) - 1.0 - dr = np.clip(dr, -1.0, 1.0) +def compute_fitness(genome, net, min_reward, max_reward, env_name, num_episodes = 3,penalize_inactivity = False ): + env = gym.make(env_name) + if isinstance(env.action_space, gym.spaces.Discrete): + compute_action = compute_action_discrete + else: + compute_action = compute_action_box - for row, dr in zip(data, dr): - observation = row[:8] - action = int(row[8]) - output = net.activate(observation) - reward_error.append(float((output[action] - dr) ** 2)) + total_reward = 0.0 + for n in range(num_episodes): + observation, observation_init_info = env.reset() - return reward_error + while True: + action, norm = compute_action(net, observation) + + if norm < 0.6 and penalize_inactivity: + total_reward -= 1 + observation, reward, terminated, done, info = env.step(action) + + total_reward += reward + if terminated or done: + break + env.close() + return total_reward / num_episodes class PooledErrorCompute(object): - def __init__(self, num_workers): + def __init__(self, num_workers, env_name, penalize_inactivity = False): self.num_workers = num_workers self.test_episodes = [] self.generation = 0 self.min_reward = -200 self.max_reward = 200 - + self.env_name = env_name self.episode_score = [] self.episode_length = [] - - def simulate(self, nets): - scores = [] - for genome, net in nets: - observation_init_vals, observation_init_info = env.reset() - step = 0 - data = [] - while 1: - step += 1 - if step < 200 and random.random() < 0.2: - action = env.action_space.sample() - else: - output = net.activate(observation_init_vals) - action = np.argmax(output) - - # Note: done has been deprecated. - observation, reward, terminated, done, info = env.step(action) - data.append(np.hstack((observation, action, reward))) - - if terminated: - break - - data = np.array(data) - score = np.sum(data[:, -1]) - self.episode_score.append(score) - scores.append(score) - self.episode_length.append(step) - - self.test_episodes.append((score, data)) - - print("Score range [{:.3f}, {:.3f}]".format(min(scores), max(scores))) + self.penalize_inactivity = penalize_inactivity + self.num_episodes = 3 def evaluate_genomes(self, genomes, config): self.generation += 1 @@ -125,41 +117,43 @@ def evaluate_genomes(self, genomes, config): print("network creation time {0}".format(time.time() - t0)) t0 = time.time() - # Periodically generate a new set of episodes for comparison. - if 1 == self.generation % 10: - self.test_episodes = self.test_episodes[-300:] - self.simulate(nets) - print("simulation run time {0}".format(time.time() - t0)) - t0 = time.time() - - # Assign a composite fitness to each genome; genomes can make progress either - # by improving their total reward or by making more accurate reward estimates. - print("Evaluating {0} test episodes".format(len(self.test_episodes))) if self.num_workers < 2: for genome, net in nets: - reward_error = compute_fitness(genome, net, self.test_episodes, self.min_reward, self.max_reward) - genome.fitness = -np.sum(reward_error) / len(self.test_episodes) + reward = compute_fitness(genome, + net, + self.min_reward, + self.max_reward, + env_name=self.env_name, + num_episodes=self.num_episodes, + penalize_inactivity=self.penalize_inactivity) + genome.fitness = reward else: with multiprocessing.Pool(self.num_workers) as pool: jobs = [] for genome, net in nets: - jobs.append(pool.apply_async(compute_fitness, - (genome, net, self.test_episodes, - self.min_reward, self.max_reward))) + jobs.append(pool.apply_async(compute_fitness, (genome, + net, + self.min_reward, + self.max_reward, + self.env_name, + self.num_episodes, + self.penalize_inactivity) + ) + ) for job, (genome_id, genome) in zip(jobs, genomes): - reward_error = job.get(timeout=None) - genome.fitness = -np.sum(reward_error) / len(self.test_episodes) + reward = job.get(timeout=None) + genome.fitness = reward print("final fitness compute time {0}\n".format(time.time() - t0)) -def run(): +def run(env_name, config_file = "config", penalize_inactivity=False): # Load the config file, which is assumed to live in # the same directory as this script. local_dir = os.path.dirname(__file__) - config_path = os.path.join(local_dir, 'config') - config = neat.Config(LanderGenome, neat.DefaultReproduction, + config_path = os.path.join(local_dir, config_file) + config = neat.Config(neat.DefaultGenome, neat.DefaultReproduction, neat.DefaultSpeciesSet, neat.DefaultStagnation, config_path) @@ -168,25 +162,22 @@ def run(): pop.add_reporter(stats) pop.add_reporter(neat.StdOutReporter(True)) # Checkpoint every 25 generations or 900 seconds. - pop.add_reporter(neat.Checkpointer(25, 900)) - + pop.add_reporter(neat.Checkpointer(1000, 3600)) + os.makedirs("results", exist_ok=True) # Run until the winner from a generation is able to solve the environment # or the user interrupts the process. - ec = PooledErrorCompute(NUM_CORES) + ec = PooledErrorCompute(NUM_CORES, env_name=env_name, penalize_inactivity=penalize_inactivity) while 1: try: - gen_best = pop.run(ec.evaluate_genomes, 5) - - # print(gen_best) + best_gen = pop.run(ec.evaluate_genomes, 10) + best_net = neat.nn.FeedForwardNetwork.create(best_gen, config) + timestr = time.strftime("%Y%m%d-%H%M%S") + # print(best_gen) - visualize.plot_stats(stats, ylog=False, view=False, filename="fitness.svg") + visualize.plot_stats(stats, ylog=False, view=False, filename=f"results/fitness.svg") - plt.plot(ec.episode_score, 'g-', label='score') - plt.plot(ec.episode_length, 'b-', label='length') - plt.grid() - plt.legend(loc='best') - plt.savefig("scores.svg") - plt.close() + mfs = sum(stats.get_fitness_stat(max)[-5:]) / 5.0 + print("Average max fitness over last 5 generations: {0}".format(mfs)) mfs = sum(stats.get_fitness_mean()[-5:]) / 5.0 print("Average mean fitness over last 5 generations: {0}".format(mfs)) @@ -200,58 +191,35 @@ def run(): for g in best_genomes: best_networks.append(neat.nn.FeedForwardNetwork.create(g, config)) - solved = True + for n, g in enumerate(best_genomes): + visualize.draw_net(config, g, view=False, filename=f"results/network_gen{ec.generation}_n_{n}_key{g.key}_fitness{int(g.fitness)}-{timestr}") + + solved = False best_scores = [] - for k in range(100): - observation_init_vals, observation_init_info = env.reset() - score = 0 - step = 0 - while 1: - step += 1 - # Use the total reward estimates from all five networks to - # determine the best action given the current state. - votes = np.zeros((4,)) - for n in best_networks: - output = n.activate(observation_init_vals) - votes[np.argmax(output)] += 1 - - best_action = np.argmax(votes) - # Note: done has been deprecated. - observation, reward, terminated, done, info = env.step(best_action) - score += reward - env.render() - if terminated: - break - - ec.episode_score.append(score) - ec.episode_length.append(step) - - best_scores.append(score) - avg_score = sum(best_scores) / len(best_scores) - print(k, score, avg_score) - if avg_score < 200: - solved = False - break - - if solved: - print("Solved.") + avg_score = 0 + ColorPrint.print_warn("### Checking current best model over 20x rounds ###") + for k in range(20): + best_scores.append(compute_fitness(best_gen, best_net, ec.min_reward, ec.max_reward, env_name, ec.num_episodes, ec.penalize_inactivity)) + avg_score = sum(best_scores) / len(best_scores) + ColorPrint.print_warn(avg_score) + + if avg_score > 200: + ColorPrint.print_pass("Solved.") # Save the winners. for n, g in enumerate(best_genomes): - name = 'winner-{0}'.format(n) - with open(name + '.pickle', 'wb') as f: + name = f'winner-{n}' + with open("results/" + name + '.pickle', 'wb') as f: pickle.dump(g, f) - visualize.draw_net(config, g, view=False, filename=name + "-net.gv") - visualize.draw_net(config, g, view=False, filename=name + "-net-pruned.gv", prune_unused=True) + visualize.draw_net(config, g, view=False, filename=f"results/{name}-_gen{ec.generation}_key{g.key}_fitness{int(g.fitness)}_net_solved-{timestr}") break except KeyboardInterrupt: print("User break.") break - env.close() - if __name__ == '__main__': - run() + run(env_name = 'LunarLanderContinuous-v2', config_file="config-lander-continuous",penalize_inactivity=False) +# run(env_name = 'LunarLander-v2', config_file="config",penalize_inactivity=False) diff --git a/examples/openai-lander/visualize.py b/examples/openai-lander/visualize.py index 1af74aa3..5615ffd8 100644 --- a/examples/openai-lander/visualize.py +++ b/examples/openai-lander/visualize.py @@ -126,6 +126,6 @@ def draw_net(config, genome, view=False, filename=None, node_names=None, show_di width = str(0.1 + abs(cg.weight / 5.0)) dot.edge(a, b, _attributes={'style': style, 'color': color, 'penwidth': width}) - dot.render(filename, view=view) + dot.render(filename, view=view, cleanup=True) return dot diff --git a/neat/config.py b/neat/config.py index b616364a..054d6b41 100644 --- a/neat/config.py +++ b/neat/config.py @@ -125,7 +125,8 @@ class Config(object): ConfigParameter('fitness_criterion', str), ConfigParameter('fitness_threshold', float), ConfigParameter('reset_on_extinction', bool), - ConfigParameter('no_fitness_termination', bool, False)] + ConfigParameter('no_fitness_termination', bool, False), + ] def __init__(self, genome_type, reproduction_type, species_set_type, stagnation_type, filename, config_information=None): # Check that the provided types have the required methods. diff --git a/neat/genome.py b/neat/genome.py index 2d652650..61bd6b8f 100644 --- a/neat/genome.py +++ b/neat/genome.py @@ -299,6 +299,15 @@ def mutate(self, config): for ng in self.nodes.values(): ng.mutate(config) + def trim_network(self, config): + self.remove_all_deactivated_connections(config) + while True: + if((not self.remove_all_dangling_nodes(config)) + and + (not self.remove_all_dangling_connections(config)) + ): + break + def mutate_add_node(self, config): if not self.connections: if config.check_structural_mutation_surer(): @@ -319,6 +328,40 @@ def mutate_add_node(self, config): i, o = conn_to_split.key self.add_connection(config, i, new_node_id, 1.0, True) self.add_connection(config, new_node_id, o, conn_to_split.weight, True) + #if connection.init_attributes(config) + + def remove_all_dangling_nodes(self, config): + list_of_all_currend_nodes = list(self.nodes) + input_nodes = config.input_keys + output_nodes = config.output_keys + list_of_all_non_output_input_nodes = [node for node in list_of_all_currend_nodes if (node not in input_nodes and node not in output_nodes)] + from_nodes, to_nodes = [], [] + for (p1, p2) in list(self.connections): + from_nodes.append(p1) + to_nodes.append(p2) + found_at_least_one_dangling_node = False + for node_number in list_of_all_non_output_input_nodes: + if ((node_number not in from_nodes) or (node_number not in to_nodes)): + del self.nodes[node_number] + found_at_least_one_dangling_node = True + return found_at_least_one_dangling_node + + def remove_all_deactivated_connections(self, config): + if not config.enabled_mutate_rate > 0: + for k in list(self.connections): + if (not self.connections[k].enabled): + del self.connections[k] + #del self.connections[conn_to_split.key] + + def remove_all_dangling_connections(self,config): + list_of_all_currend_nodes = list(self.nodes) + list_of_all_currend_nodes.extend(config.input_keys) # input_nodes + found_at_least_one_dangling_connection = False + for (f,t) in list(self.connections): + if ((f not in list_of_all_currend_nodes) or (t not in list_of_all_currend_nodes)): + del self.connections[(f,t)] + found_at_least_one_dangling_connection = True + return found_at_least_one_dangling_connection def add_connection(self, config, input_key, output_key, weight, enabled): # TODO: Add further validation of this connection addition? @@ -341,7 +384,7 @@ def mutate_add_connection(self, config): possible_outputs = list(self.nodes) out_node = choice(possible_outputs) - possible_inputs = possible_outputs + config.input_keys + possible_inputs = list(set(possible_outputs + config.input_keys) - set(config.output_keys)) in_node = choice(possible_inputs) # Don't duplicate connections. diff --git a/neat/reproduction.py b/neat/reproduction.py index 6f6c904e..dd02fc2f 100644 --- a/neat/reproduction.py +++ b/neat/reproduction.py @@ -28,7 +28,9 @@ def parse_config(cls, param_dict): return DefaultClassConfig(param_dict, [ConfigParameter('elitism', int, 0), ConfigParameter('survival_threshold', float, 0.2), - ConfigParameter('min_species_size', int, 1)]) + ConfigParameter('min_species_size', int, 1), + ConfigParameter('trim_dangling_after_n_generations_wo_improvment', int, -1) + ]) def __init__(self, config, reporters, stagnation): # pylint: disable=super-init-not-called @@ -96,6 +98,13 @@ def reproduce(self, config, species, pop_size, generation): # interfering with the shared fitness scheme. all_fitnesses = [] remaining_species = [] + + if self.reproduction_config.trim_dangling_after_n_generations_wo_improvment >= 0: + for specie in species.species.values(): + if generation - specie.last_improved > self.reproduction_config.trim_dangling_after_n_generations_wo_improvment: + for member in specie.members.values(): + member.trim_network(config.genome_config) + for stag_sid, stag_s, stagnant in self.stagnation.update(species, generation): if stagnant: self.reporters.species_stagnant(stag_sid, stag_s)