From 47c6c948deec8fc5d53871ce080b4b2f11693b67 Mon Sep 17 00:00:00 2001
From: tommymarkstein <tommy.markstein@deutschebahn.com>
Date: Wed, 9 Aug 2023 11:09:46 +0200
Subject: [PATCH 01/11] fix: remove dangling nodes

---
 neat/genome.py | 42 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 42 insertions(+)

diff --git a/neat/genome.py b/neat/genome.py
index 2d652650..7b8a594e 100644
--- a/neat/genome.py
+++ b/neat/genome.py
@@ -299,6 +299,14 @@ def mutate(self, config):
         for ng in self.nodes.values():
             ng.mutate(config)
 
+        self.remove_all_deactivated_connections(config)
+        while True:
+            if((not self.remove_all_dangling_nodes(config))
+                and
+                (not self.remove_all_dangling_connections(config))
+            ):
+                break
+
     def mutate_add_node(self, config):
         if not self.connections:
             if config.check_structural_mutation_surer():
@@ -319,6 +327,40 @@ def mutate_add_node(self, config):
         i, o = conn_to_split.key
         self.add_connection(config, i, new_node_id, 1.0, True)
         self.add_connection(config, new_node_id, o, conn_to_split.weight, True)
+        #if connection.init_attributes(config)
+
+    def remove_all_dangling_nodes(self, config):
+        list_of_all_currend_nodes = list(self.nodes)
+        input_nodes = config.input_keys
+        output_nodes = config.output_keys
+        list_of_all_non_output_input_nodes = [node for node in list_of_all_currend_nodes if (node not in input_nodes and node not in output_nodes)]
+        from_nodes, to_nodes = [], []
+        for (p1, p2) in list(self.connections):
+            from_nodes.append(p1)
+            to_nodes.append(p2)
+        found_at_least_one_dangling_node = False
+        for node_number in list_of_all_non_output_input_nodes:
+            if ((node_number not in from_nodes) or (node_number not in to_nodes)):
+                del self.nodes[node_number]
+                found_at_least_one_dangling_node = True
+        return found_at_least_one_dangling_node
+
+    def remove_all_deactivated_connections(self, config):
+        if not config.enabled_mutate_rate > 0:
+            for k in list(self.connections):
+                if (not self.connections[k].enabled):
+                    del self.connections[k]
+                #del self.connections[conn_to_split.key]
+
+    def remove_all_dangling_connections(self,config):
+        list_of_all_currend_nodes = list(self.nodes)
+        list_of_all_currend_nodes.extend(config.input_keys) # input_nodes
+        found_at_least_one_dangling_connection = False
+        for (f,t) in list(self.connections):
+            if ((f not in list_of_all_currend_nodes) or (t not in list_of_all_currend_nodes)):
+                del self.connections[(f,t)]
+                found_at_least_one_dangling_connection = True
+        return found_at_least_one_dangling_connection
 
     def add_connection(self, config, input_key, output_key, weight, enabled):
         # TODO: Add further validation of this connection addition?

From e946dae46d601e11b92cb76e6f090e51a7925c54 Mon Sep 17 00:00:00 2001
From: markste-in <mail@markste.in>
Date: Mon, 27 May 2024 23:14:23 +0200
Subject: [PATCH 02/11] removing dangling nodes after n steps of not improving

---
 .gitignore                       |  2 ++
 examples/openai-lander/config    |  1 +
 examples/openai-lander/evolve.py | 11 +++++++++--
 neat/config.py                   |  3 ++-
 neat/genome.py                   |  1 +
 neat/reproduction.py             |  7 +++++++
 6 files changed, 22 insertions(+), 3 deletions(-)

diff --git a/.gitignore b/.gitignore
index abd5ce18..92b1e8db 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,5 @@
+.venv
+.DS_Store
 .coveralls.yml
 docs/_build/**
 _build/**
diff --git a/examples/openai-lander/config b/examples/openai-lander/config
index 8f72c467..fe67d4b6 100644
--- a/examples/openai-lander/config
+++ b/examples/openai-lander/config
@@ -7,6 +7,7 @@ pop_size              = 150
 fitness_criterion     = max
 fitness_threshold     = 1000.0
 reset_on_extinction   = 0
+trim_dangling_after_n_generations_wo_improvment = 3
 
 [LanderGenome]
 num_inputs              = 8
diff --git a/examples/openai-lander/evolve.py b/examples/openai-lander/evolve.py
index cbde8238..77d91bee 100644
--- a/examples/openai-lander/evolve.py
+++ b/examples/openai-lander/evolve.py
@@ -41,6 +41,9 @@ def mutate(self, config):
         super().mutate(config)
         self.discount += random.gauss(0.0, 0.05)
         self.discount = max(0.01, min(0.99, self.discount))
+    
+    def trim_network(self, config):
+        super().trim_network(config)
 
     def distance(self, other, config):
         dist = super().distance(other, config)
@@ -200,6 +203,10 @@ def run():
             for g in best_genomes:
                 best_networks.append(neat.nn.FeedForwardNetwork.create(g, config))
 
+            for n, g in enumerate(best_genomes):
+                name = 'network-{0}'.format(n)
+                visualize.draw_net(config, g, view=False, filename=name + f"_{n}-net")
+
             solved = True
             best_scores = []
             for k in range(100):
@@ -242,8 +249,8 @@ def run():
                     with open(name + '.pickle', 'wb') as f:
                         pickle.dump(g, f)
 
-                    visualize.draw_net(config, g, view=False, filename=name + "-net.gv")
-                    visualize.draw_net(config, g, view=False, filename=name + "-net-pruned.gv", prune_unused=True)
+                    visualize.draw_net(config, g, view=False, filename=name + "-net-solved.gv")
+                    visualize.draw_net(config, g, view=False, filename=name + "-net-pruned-solved.gv", prune_unused=True)
 
                 break
         except KeyboardInterrupt:
diff --git a/neat/config.py b/neat/config.py
index b616364a..629f7be6 100644
--- a/neat/config.py
+++ b/neat/config.py
@@ -125,7 +125,8 @@ class Config(object):
                 ConfigParameter('fitness_criterion', str),
                 ConfigParameter('fitness_threshold', float),
                 ConfigParameter('reset_on_extinction', bool),
-                ConfigParameter('no_fitness_termination', bool, False)]
+                ConfigParameter('no_fitness_termination', bool, False),
+                ConfigParameter('trim_dangling_after_n_generations_wo_improvment', int)]
 
     def __init__(self, genome_type, reproduction_type, species_set_type, stagnation_type, filename, config_information=None):
         # Check that the provided types have the required methods.
diff --git a/neat/genome.py b/neat/genome.py
index 7b8a594e..a43122a7 100644
--- a/neat/genome.py
+++ b/neat/genome.py
@@ -299,6 +299,7 @@ def mutate(self, config):
         for ng in self.nodes.values():
             ng.mutate(config)
 
+    def trim_network(self, config):
         self.remove_all_deactivated_connections(config)
         while True:
             if((not self.remove_all_dangling_nodes(config))
diff --git a/neat/reproduction.py b/neat/reproduction.py
index 6f6c904e..1c7c48d7 100644
--- a/neat/reproduction.py
+++ b/neat/reproduction.py
@@ -96,6 +96,13 @@ def reproduce(self, config, species, pop_size, generation):
         # interfering with the shared fitness scheme.
         all_fitnesses = []
         remaining_species = []
+
+        if config.trim_dangling_after_n_generations_wo_improvment >= 0:
+            for specie in species.species.values():
+                if  generation - specie.last_improved > config.trim_dangling_after_n_generations_wo_improvment: 
+                    for member in specie.members.values():
+                        member.trim_network(config.genome_config)
+
         for stag_sid, stag_s, stagnant in self.stagnation.update(species, generation):
             if stagnant:
                 self.reporters.species_stagnant(stag_sid, stag_s)

From 0b7bc90b5f042e38f53c2e8fc7cc82688d14a4ac Mon Sep 17 00:00:00 2001
From: markste-in <mail@markste.in>
Date: Mon, 27 May 2024 23:32:27 +0200
Subject: [PATCH 03/11] set default value (inactive) for new trimming parameter

---
 neat/config.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/neat/config.py b/neat/config.py
index 629f7be6..b589b99e 100644
--- a/neat/config.py
+++ b/neat/config.py
@@ -126,7 +126,7 @@ class Config(object):
                 ConfigParameter('fitness_threshold', float),
                 ConfigParameter('reset_on_extinction', bool),
                 ConfigParameter('no_fitness_termination', bool, False),
-                ConfigParameter('trim_dangling_after_n_generations_wo_improvment', int)]
+                ConfigParameter('trim_dangling_after_n_generations_wo_improvment', int, -1)]
 
     def __init__(self, genome_type, reproduction_type, species_set_type, stagnation_type, filename, config_information=None):
         # Check that the provided types have the required methods.

From 91c7dfabc9b47459cdd34994717ffc021ad3410f Mon Sep 17 00:00:00 2001
From: markste-in <mail@markste.in>
Date: Mon, 27 May 2024 23:52:20 +0200
Subject: [PATCH 04/11] removed unnecessary change

---
 examples/openai-lander/evolve.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/examples/openai-lander/evolve.py b/examples/openai-lander/evolve.py
index 77d91bee..200ec8a8 100644
--- a/examples/openai-lander/evolve.py
+++ b/examples/openai-lander/evolve.py
@@ -41,9 +41,6 @@ def mutate(self, config):
         super().mutate(config)
         self.discount += random.gauss(0.0, 0.05)
         self.discount = max(0.01, min(0.99, self.discount))
-    
-    def trim_network(self, config):
-        super().trim_network(config)
 
     def distance(self, other, config):
         dist = super().distance(other, config)

From 6f07e2123805110488da8bf838dc3dbb2a728abc Mon Sep 17 00:00:00 2001
From: markste-in <mail@markste.in>
Date: Tue, 28 May 2024 00:16:13 +0200
Subject: [PATCH 05/11] removed debug statements

---
 examples/openai-lander/evolve.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/openai-lander/evolve.py b/examples/openai-lander/evolve.py
index 200ec8a8..e1167b93 100644
--- a/examples/openai-lander/evolve.py
+++ b/examples/openai-lander/evolve.py
@@ -20,8 +20,8 @@
 
 env = gym.make('LunarLander-v2')
 
-print("action space: {0!r}".format(env.action_space))
-print("observation space: {0!r}".format(env.observation_space))
+# print("action space: {0!r}".format(env.action_space))
+# print("observation space: {0!r}".format(env.observation_space))
 
 
 class LanderGenome(neat.DefaultGenome):

From bb5090f752b5399b23ee21ec357e50775085e9d5 Mon Sep 17 00:00:00 2001
From: markste-in <mail@markste.in>
Date: Tue, 28 May 2024 21:46:29 +0200
Subject: [PATCH 06/11] make sure an output of the network cannot act as an
 input to another node

---
 examples/openai-lander/evolve.py | 2 +-
 neat/genome.py                   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/openai-lander/evolve.py b/examples/openai-lander/evolve.py
index e1167b93..2c22ced4 100644
--- a/examples/openai-lander/evolve.py
+++ b/examples/openai-lander/evolve.py
@@ -168,7 +168,7 @@ def run():
     pop.add_reporter(stats)
     pop.add_reporter(neat.StdOutReporter(True))
     # Checkpoint every 25 generations or 900 seconds.
-    pop.add_reporter(neat.Checkpointer(25, 900))
+    pop.add_reporter(neat.Checkpointer(200, 3600))
 
     # Run until the winner from a generation is able to solve the environment
     # or the user interrupts the process.
diff --git a/neat/genome.py b/neat/genome.py
index a43122a7..61bd6b8f 100644
--- a/neat/genome.py
+++ b/neat/genome.py
@@ -384,7 +384,7 @@ def mutate_add_connection(self, config):
         possible_outputs = list(self.nodes)
         out_node = choice(possible_outputs)
 
-        possible_inputs = possible_outputs + config.input_keys
+        possible_inputs = list(set(possible_outputs + config.input_keys) - set(config.output_keys))
         in_node = choice(possible_inputs)
 
         # Don't duplicate connections.

From 8a12f69f7c289de449a4c5bda882e8fb502684b3 Mon Sep 17 00:00:00 2001
From: markste-in <mail@markste.in>
Date: Tue, 4 Jun 2024 22:15:25 +0200
Subject: [PATCH 07/11] working solution for lunarlander

---
 examples/openai-lander/config       |  47 +++---
 examples/openai-lander/evolve.py    | 230 +++++++++++-----------------
 examples/openai-lander/visualize.py |   2 +-
 3 files changed, 121 insertions(+), 158 deletions(-)

diff --git a/examples/openai-lander/config b/examples/openai-lander/config
index fe67d4b6..5906ded7 100644
--- a/examples/openai-lander/config
+++ b/examples/openai-lander/config
@@ -5,28 +5,36 @@ pop_size              = 150
 # Note: the fitness threshold will never be reached because
 # we are controlling the termination ourselves based on simulation performance.
 fitness_criterion     = max
-fitness_threshold     = 1000.0
+fitness_threshold     = 99999
 reset_on_extinction   = 0
-trim_dangling_after_n_generations_wo_improvment = 3
+trim_dangling_after_n_generations_wo_improvment = 30
 
-[LanderGenome]
+[DefaultGenome]
 num_inputs              = 8
 num_hidden              = 0
 num_outputs             = 4
-initial_connection      = partial 0.5
+initial_connection      = partial_nodirect 0.5
 feed_forward            = True
+
 compatibility_disjoint_coefficient = 1.0
-compatibility_weight_coefficient   = 1.0
-conn_add_prob           = 0.15
-conn_delete_prob        = 0.1
-node_add_prob           = 0.15
-node_delete_prob        = 0.1
-activation_default      = clamped
-activation_options      = clamped
+compatibility_weight_coefficient   = 0.6
+
+conn_add_prob           = 0.2
+conn_delete_prob        = 0.2
+
+node_add_prob           = 0.2
+node_delete_prob        = 0.2
+single_structural_mutation = False
+structural_mutation_surer = True
+
+activation_default      = tanh
+activation_options      = tanh
 activation_mutate_rate  = 0.0
+
 aggregation_default     = sum
 aggregation_options     = sum
 aggregation_mutate_rate = 0.0
+
 bias_init_mean          = 0.0
 bias_init_stdev         = 1.0
 bias_replace_rate       = 0.02
@@ -34,11 +42,12 @@ bias_mutate_rate        = 0.8
 bias_mutate_power       = 0.4
 bias_max_value          = 30.0
 bias_min_value          = -30.0
+
 response_init_mean      = 1.0
 response_init_stdev     = 0.0
 response_replace_rate   = 0.0
-response_mutate_rate    = 0.1
-response_mutate_power   = 0.01
+response_mutate_rate    = 0.0
+response_mutate_power   = 0.0
 response_max_value      = 30.0
 response_min_value      = -30.0
 
@@ -47,8 +56,8 @@ weight_min_value        = -30
 weight_init_mean        = 0.0
 weight_init_stdev       = 1.0
 weight_mutate_rate      = 0.8
-weight_replace_rate     = 0.02
-weight_mutate_power     = 0.4
+weight_replace_rate     = 0.1
+weight_mutate_power     = 0.5
 enabled_default         = True
 enabled_mutate_rate     = 0.01
 
@@ -56,11 +65,11 @@ enabled_mutate_rate     = 0.01
 compatibility_threshold = 3.0
 
 [DefaultStagnation]
-species_fitness_func = mean
-max_stagnation       = 15
-species_elitism      = 4
+species_fitness_func = max
+max_stagnation       = 50
+species_elitism      = 2
 
 [DefaultReproduction]
-elitism            = 2
+elitism            = 4
 survival_threshold = 0.2
 
diff --git a/examples/openai-lander/evolve.py b/examples/openai-lander/evolve.py
index 2c22ced4..dafe136a 100644
--- a/examples/openai-lander/evolve.py
+++ b/examples/openai-lander/evolve.py
@@ -2,7 +2,7 @@
 # LunarLander-v2 environment (https://gym.openai.com/envs/LunarLander-v2).
 # Sample run here: https://gym.openai.com/evaluations/eval_FbKq5MxAS9GlvB7W6ioJkg
 
-
+import sys
 import multiprocessing
 import os
 import pickle
@@ -16,6 +16,11 @@
 import neat
 import visualize
 
+
+
+from warnings import filterwarnings
+filterwarnings('ignore', category=DeprecationWarning)
+
 NUM_CORES = multiprocessing.cpu_count()
 
 env = gym.make('LunarLander-v2')
@@ -23,56 +28,73 @@
 # print("action space: {0!r}".format(env.action_space))
 # print("observation space: {0!r}".format(env.observation_space))
 
+# Colored printing functions for strings that use universal ANSI escape sequences.
+# fail: bold red, pass: bold green, warn: bold yellow, 
+# info: bold blue, bold: bold white
 
-class LanderGenome(neat.DefaultGenome):
-    def __init__(self, key):
-        super().__init__(key)
-        self.discount = None
+class ColorPrint:
 
-    def configure_new(self, config):
-        super().configure_new(config)
-        self.discount = 0.01 + 0.98 * random.random()
+    @staticmethod
+    def print_fail(message, end = '\n'):
+        sys.stderr.write('\x1b[1;31m' + str(message).strip() + '\x1b[0m' + end)
 
-    def configure_crossover(self, genome1, genome2, config):
-        super().configure_crossover(genome1, genome2, config)
-        self.discount = random.choice((genome1.discount, genome2.discount))
+    @staticmethod
+    def print_pass(message, end = '\n'):
+        sys.stdout.write('\x1b[1;32m' + str(message).strip() + '\x1b[0m' + end)
 
-    def mutate(self, config):
-        super().mutate(config)
-        self.discount += random.gauss(0.0, 0.05)
-        self.discount = max(0.01, min(0.99, self.discount))
+    @staticmethod
+    def print_warn(message, end = '\n'):
+        sys.stderr.write('\x1b[1;33m' + str(message).strip() + '\x1b[0m' + end)
 
-    def distance(self, other, config):
-        dist = super().distance(other, config)
-        disc_diff = abs(self.discount - other.discount)
-        return dist + disc_diff
+    @staticmethod
+    def print_info(message, end = '\n'):
+        sys.stdout.write('\x1b[1;34m' + str(message).strip() + '\x1b[0m' + end)
 
-    def __str__(self):
-        return f"Reward discount: {self.discount}\n{super().__str__()}"
+    @staticmethod
+    def print_bold(message, end = '\n'):
+        sys.stdout.write('\x1b[1;37m' + str(message).strip() + '\x1b[0m' + end)
 
+def compute_action_discrete(net, observation):
+    activation = net.activate(observation)
+    # Gym expects discrete actions (0, 1, 2, 3), so we need to convert the output
+    action = np.argmax(activation)
+    return action, activation[action]
 
-def compute_fitness(genome, net, episodes, min_reward, max_reward):
-    m = int(round(np.log(0.01) / np.log(genome.discount)))
-    discount_function = [genome.discount ** (m - i) for i in range(m + 1)]
 
-    reward_error = []
-    for score, data in episodes:
-        # Compute normalized discounted reward.
-        dr = np.convolve(data[:, -1], discount_function)[m:]
-        dr = 2 * (dr - min_reward) / (max_reward - min_reward) - 1.0
-        dr = np.clip(dr, -1.0, 1.0)
+def compute_action_box(net, observation):
+    action = net.activate(observation)
+    # compute the norm of the action array
+    norm = np.linalg.norm(action)
+    return action, norm
 
-        for row, dr in zip(data, dr):
-            observation = row[:8]
-            action = int(row[8])
-            output = net.activate(observation)
-            reward_error.append(float((output[action] - dr) ** 2))
 
-    return reward_error
+def compute_fitness(genome, net, min_reward, max_reward, num_episodes = 3,penalize_inactivity = False ):
+
+    if isinstance(env.action_space, gym.spaces.Discrete):
+        compute_action = compute_action_discrete
+    else:
+        compute_action = compute_action_box
+
+    total_reward = 0.0
+    for n in range(num_episodes):
+        observation, observation_init_info = env.reset()
+
+        while True:
+            action, norm = compute_action(net, observation)
+
+            if norm < 0.6 and penalize_inactivity:
+                total_reward -= 1
+            observation, reward, terminated, done, info = env.step(action)
+
+            total_reward += reward
+            if terminated or done:
+                break
+
+    return total_reward / num_episodes
 
 
 class PooledErrorCompute(object):
-    def __init__(self, num_workers):
+    def __init__(self, num_workers, penalize_inactivity = False):
         self.num_workers = num_workers
         self.test_episodes = []
         self.generation = 0
@@ -82,37 +104,8 @@ def __init__(self, num_workers):
 
         self.episode_score = []
         self.episode_length = []
-
-    def simulate(self, nets):
-        scores = []
-        for genome, net in nets:
-            observation_init_vals, observation_init_info = env.reset()
-            step = 0
-            data = []
-            while 1:
-                step += 1
-                if step < 200 and random.random() < 0.2:
-                    action = env.action_space.sample()
-                else:
-                    output = net.activate(observation_init_vals)
-                    action = np.argmax(output)
-
-                # Note: done has been deprecated.
-                observation, reward, terminated, done, info = env.step(action)
-                data.append(np.hstack((observation, action, reward)))
-
-                if terminated:
-                    break
-
-            data = np.array(data)
-            score = np.sum(data[:, -1])
-            self.episode_score.append(score)
-            scores.append(score)
-            self.episode_length.append(step)
-
-            self.test_episodes.append((score, data))
-
-        print("Score range [{:.3f}, {:.3f}]".format(min(scores), max(scores)))
+        self.penalize_inactivity = penalize_inactivity
+        self.num_episodes = 3
 
     def evaluate_genomes(self, genomes, config):
         self.generation += 1
@@ -125,41 +118,31 @@ def evaluate_genomes(self, genomes, config):
         print("network creation time {0}".format(time.time() - t0))
         t0 = time.time()
 
-        # Periodically generate a new set of episodes for comparison.
-        if 1 == self.generation % 10:
-            self.test_episodes = self.test_episodes[-300:]
-            self.simulate(nets)
-            print("simulation run time {0}".format(time.time() - t0))
-            t0 = time.time()
-
-        # Assign a composite fitness to each genome; genomes can make progress either
-        # by improving their total reward or by making more accurate reward estimates.
-        print("Evaluating {0} test episodes".format(len(self.test_episodes)))
         if self.num_workers < 2:
             for genome, net in nets:
-                reward_error = compute_fitness(genome, net, self.test_episodes, self.min_reward, self.max_reward)
-                genome.fitness = -np.sum(reward_error) / len(self.test_episodes)
+                reward = compute_fitness(genome, net, self.min_reward, self.max_reward, self.num_episodes, self.penalize_inactivity)
+                genome.fitness = reward
         else:
             with multiprocessing.Pool(self.num_workers) as pool:
                 jobs = []
                 for genome, net in nets:
                     jobs.append(pool.apply_async(compute_fitness,
-                                                 (genome, net, self.test_episodes,
-                                                  self.min_reward, self.max_reward)))
+                                                 (genome, net,
+                                                  self.min_reward, self.max_reward, self.num_episodes, self.penalize_inactivity)))
 
                 for job, (genome_id, genome) in zip(jobs, genomes):
-                    reward_error = job.get(timeout=None)
-                    genome.fitness = -np.sum(reward_error) / len(self.test_episodes)
+                    reward = job.get(timeout=None)
+                    genome.fitness = reward
 
         print("final fitness compute time {0}\n".format(time.time() - t0))
 
 
-def run():
+def run(penalize_inactivity=False):
     # Load the config file, which is assumed to live in
     # the same directory as this script.
     local_dir = os.path.dirname(__file__)
     config_path = os.path.join(local_dir, 'config')
-    config = neat.Config(LanderGenome, neat.DefaultReproduction,
+    config = neat.Config(neat.DefaultGenome, neat.DefaultReproduction,
                          neat.DefaultSpeciesSet, neat.DefaultStagnation,
                          config_path)
 
@@ -168,25 +151,22 @@ def run():
     pop.add_reporter(stats)
     pop.add_reporter(neat.StdOutReporter(True))
     # Checkpoint every 25 generations or 900 seconds.
-    pop.add_reporter(neat.Checkpointer(200, 3600))
+    pop.add_reporter(neat.Checkpointer(1000, 3600))
 
     # Run until the winner from a generation is able to solve the environment
     # or the user interrupts the process.
-    ec = PooledErrorCompute(NUM_CORES)
+    ec = PooledErrorCompute(NUM_CORES, penalize_inactivity=penalize_inactivity)
     while 1:
         try:
-            gen_best = pop.run(ec.evaluate_genomes, 5)
-
-            # print(gen_best)
+            best_gen = pop.run(ec.evaluate_genomes, 10)
+            best_net = neat.nn.FeedForwardNetwork.create(best_gen, config)
+            timestr = time.strftime("%Y%m%d-%H%M%S")
+            # print(best_gen)
 
-            visualize.plot_stats(stats, ylog=False, view=False, filename="fitness.svg")
+            visualize.plot_stats(stats, ylog=False, view=False, filename=f"output/fitness.svg")
 
-            plt.plot(ec.episode_score, 'g-', label='score')
-            plt.plot(ec.episode_length, 'b-', label='length')
-            plt.grid()
-            plt.legend(loc='best')
-            plt.savefig("scores.svg")
-            plt.close()
+            mfs = sum(stats.get_fitness_stat(max)[-5:]) / 5.0
+            print("Average max fitness over last 5 generations: {0}".format(mfs))
 
             mfs = sum(stats.get_fitness_mean()[-5:]) / 5.0
             print("Average mean fitness over last 5 generations: {0}".format(mfs))
@@ -201,53 +181,27 @@ def run():
                 best_networks.append(neat.nn.FeedForwardNetwork.create(g, config))
 
             for n, g in enumerate(best_genomes):
-                name = 'network-{0}'.format(n)
-                visualize.draw_net(config, g, view=False, filename=name + f"_{n}-net")
+                visualize.draw_net(config, g, view=False, filename=f"output/network_gen{ec.generation}_n_{n}_key{g.key}_fitness{int(g.fitness)}-{timestr}")
 
-            solved = True
+            solved = False
             best_scores = []
-            for k in range(100):
-                observation_init_vals, observation_init_info = env.reset()
-                score = 0
-                step = 0
-                while 1:
-                    step += 1
-                    # Use the total reward estimates from all five networks to
-                    # determine the best action given the current state.
-                    votes = np.zeros((4,))
-                    for n in best_networks:
-                        output = n.activate(observation_init_vals)
-                        votes[np.argmax(output)] += 1
-
-                    best_action = np.argmax(votes)
-                    # Note: done has been deprecated.
-                    observation, reward, terminated, done, info = env.step(best_action)
-                    score += reward
-                    env.render()
-                    if terminated:
-                        break
-
-                ec.episode_score.append(score)
-                ec.episode_length.append(step)
-
-                best_scores.append(score)
-                avg_score = sum(best_scores) / len(best_scores)
-                print(k, score, avg_score)
-                if avg_score < 200:
-                    solved = False
-                    break
-
-            if solved:
-                print("Solved.")
+            avg_score = 0
+            ColorPrint.print_warn("### Checking current best model over 20x rounds ###")
+            for k in range(20):
+                best_scores.append(compute_fitness(best_gen, best_net, ec.min_reward, ec.max_reward, ec.num_episodes, ec.penalize_inactivity))
+            avg_score = sum(best_scores) / len(best_scores)
+            ColorPrint.print_warn(avg_score)
+
+            if avg_score > 200:
+                ColorPrint.print_pass("Solved.")
 
                 # Save the winners.
                 for n, g in enumerate(best_genomes):
-                    name = 'winner-{0}'.format(n)
+                    name = f'winner-{n}'
                     with open(name + '.pickle', 'wb') as f:
                         pickle.dump(g, f)
 
-                    visualize.draw_net(config, g, view=False, filename=name + "-net-solved.gv")
-                    visualize.draw_net(config, g, view=False, filename=name + "-net-pruned-solved.gv", prune_unused=True)
+                    visualize.draw_net(config, g, view=False, filename=f"output/{name}-_gen{ec.generation}_key{g.key}_fitness{int(g.fitness)}_net_solved-{timestr}")
 
                 break
         except KeyboardInterrupt:
@@ -258,4 +212,4 @@ def run():
 
 
 if __name__ == '__main__':
-    run()
+    run(penalize_inactivity=True)
diff --git a/examples/openai-lander/visualize.py b/examples/openai-lander/visualize.py
index 1af74aa3..5615ffd8 100644
--- a/examples/openai-lander/visualize.py
+++ b/examples/openai-lander/visualize.py
@@ -126,6 +126,6 @@ def draw_net(config, genome, view=False, filename=None, node_names=None, show_di
             width = str(0.1 + abs(cg.weight / 5.0))
             dot.edge(a, b, _attributes={'style': style, 'color': color, 'penwidth': width})
 
-    dot.render(filename, view=view)
+    dot.render(filename, view=view, cleanup=True)
 
     return dot

From 26fd10437296adcf33459d965dc100785ead99e6 Mon Sep 17 00:00:00 2001
From: markste-in <mail@markste.in>
Date: Wed, 5 Jun 2024 08:58:32 +0200
Subject: [PATCH 08/11] make sure the output dir is existing

---
 examples/openai-lander/evolve.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/openai-lander/evolve.py b/examples/openai-lander/evolve.py
index dafe136a..82dd6756 100644
--- a/examples/openai-lander/evolve.py
+++ b/examples/openai-lander/evolve.py
@@ -152,7 +152,7 @@ def run(penalize_inactivity=False):
     pop.add_reporter(neat.StdOutReporter(True))
     # Checkpoint every 25 generations or 900 seconds.
     pop.add_reporter(neat.Checkpointer(1000, 3600))
-
+    os.makedirs("output", exist_ok=True)
     # Run until the winner from a generation is able to solve the environment
     # or the user interrupts the process.
     ec = PooledErrorCompute(NUM_CORES, penalize_inactivity=penalize_inactivity)
@@ -198,7 +198,7 @@ def run(penalize_inactivity=False):
                 # Save the winners.
                 for n, g in enumerate(best_genomes):
                     name = f'winner-{n}'
-                    with open(name + '.pickle', 'wb') as f:
+                    with open("output/" + name + '.pickle', 'wb') as f:
                         pickle.dump(g, f)
 
                     visualize.draw_net(config, g, view=False, filename=f"output/{name}-_gen{ec.generation}_key{g.key}_fitness{int(g.fitness)}_net_solved-{timestr}")

From 957e43aee7a914d8b1a962e17d65362e9cb7a79f Mon Sep 17 00:00:00 2001
From: markste-in <mail@markste.in>
Date: Sat, 8 Jun 2024 22:39:34 -0400
Subject: [PATCH 09/11] some refactoring and cont. config file

---
 examples/openai-lander/config                 |  4 +-
 .../openai-lander/config-lander-continuous    | 74 +++++++++++++++++++
 examples/openai-lander/evolve.py              | 20 ++---
 neat/config.py                                |  2 +-
 neat/reproduction.py                          |  8 +-
 5 files changed, 93 insertions(+), 15 deletions(-)
 create mode 100644 examples/openai-lander/config-lander-continuous

diff --git a/examples/openai-lander/config b/examples/openai-lander/config
index 5906ded7..089b5d32 100644
--- a/examples/openai-lander/config
+++ b/examples/openai-lander/config
@@ -5,9 +5,8 @@ pop_size              = 150
 # Note: the fitness threshold will never be reached because
 # we are controlling the termination ourselves based on simulation performance.
 fitness_criterion     = max
-fitness_threshold     = 99999
+fitness_threshold     = 1950
 reset_on_extinction   = 0
-trim_dangling_after_n_generations_wo_improvment = 30
 
 [DefaultGenome]
 num_inputs              = 8
@@ -72,4 +71,5 @@ species_elitism      = 2
 [DefaultReproduction]
 elitism            = 4
 survival_threshold = 0.2
+trim_dangling_after_n_generations_wo_improvment = 30
 
diff --git a/examples/openai-lander/config-lander-continuous b/examples/openai-lander/config-lander-continuous
new file mode 100644
index 00000000..d856d0f6
--- /dev/null
+++ b/examples/openai-lander/config-lander-continuous
@@ -0,0 +1,74 @@
+# neat-python configuration for the LunarLander-v2 environment on OpenAI Gym
+
+[NEAT]
+pop_size              = 150
+# Note: the fitness threshold will never be reached because
+# we are controlling the termination ourselves based on simulation performance.
+fitness_criterion     = max
+fitness_threshold     = 1950
+reset_on_extinction   = 0
+
+[DefaultGenome]
+num_inputs              = 8
+num_hidden              = 0
+num_outputs             = 2
+initial_connection      = partial_direct 0.5
+feed_forward            = True
+
+compatibility_disjoint_coefficient = 1.0
+compatibility_weight_coefficient   = 0.6
+
+conn_add_prob           = 0.2
+conn_delete_prob        = 0.2
+
+node_add_prob           = 0.2
+node_delete_prob        = 0.2
+single_structural_mutation = False
+structural_mutation_surer = True
+
+activation_default      = tanh
+activation_options      = tanh
+activation_mutate_rate  = 0.0
+
+aggregation_default     = sum
+aggregation_options     = sum
+aggregation_mutate_rate = 0.0
+
+bias_init_mean          = 0.0
+bias_init_stdev         = 1.0
+bias_replace_rate       = 0.02
+bias_mutate_rate        = 0.8
+bias_mutate_power       = 0.4
+bias_max_value          = 30.0
+bias_min_value          = -30.0
+
+response_init_mean      = 1.0
+response_init_stdev     = 0.0
+response_replace_rate   = 0.0
+response_mutate_rate    = 0.0
+response_mutate_power   = 0.0
+response_max_value      = 30.0
+response_min_value      = -30.0
+
+weight_max_value        = 30
+weight_min_value        = -30
+weight_init_mean        = 0.0
+weight_init_stdev       = 1.0
+weight_mutate_rate      = 0.8
+weight_replace_rate     = 0.1
+weight_mutate_power     = 0.5
+enabled_default         = True
+enabled_mutate_rate     = 0.0
+
+[DefaultSpeciesSet]
+compatibility_threshold = 3.0
+
+[DefaultStagnation]
+species_fitness_func = mean
+max_stagnation       = 50
+species_elitism      = 2
+
+[DefaultReproduction]
+elitism            = 2
+survival_threshold = 0.2
+trim_dangling_after_n_generations_wo_improvment = 40
diff --git a/examples/openai-lander/evolve.py b/examples/openai-lander/evolve.py
index 82dd6756..4ad53802 100644
--- a/examples/openai-lander/evolve.py
+++ b/examples/openai-lander/evolve.py
@@ -25,6 +25,7 @@
 
 env = gym.make('LunarLander-v2')
 
+# env = gym.make('LunarLanderContinuous-v2')
 # print("action space: {0!r}".format(env.action_space))
 # print("observation space: {0!r}".format(env.observation_space))
 
@@ -56,7 +57,7 @@ def print_bold(message, end = '\n'):
 
 def compute_action_discrete(net, observation):
     activation = net.activate(observation)
-    # Gym expects discrete actions (0, 1, 2, 3), so we need to convert the output
+    # Gym expects discrete actions (0, 1, 2, 3), so we need to convert the results
     action = np.argmax(activation)
     return action, activation[action]
 
@@ -137,11 +138,11 @@ def evaluate_genomes(self, genomes, config):
         print("final fitness compute time {0}\n".format(time.time() - t0))
 
 
-def run(penalize_inactivity=False):
+def run(config_file = "config", penalize_inactivity=False):
     # Load the config file, which is assumed to live in
     # the same directory as this script.
     local_dir = os.path.dirname(__file__)
-    config_path = os.path.join(local_dir, 'config')
+    config_path = os.path.join(local_dir, config_file)
     config = neat.Config(neat.DefaultGenome, neat.DefaultReproduction,
                          neat.DefaultSpeciesSet, neat.DefaultStagnation,
                          config_path)
@@ -152,7 +153,7 @@ def run(penalize_inactivity=False):
     pop.add_reporter(neat.StdOutReporter(True))
     # Checkpoint every 25 generations or 900 seconds.
     pop.add_reporter(neat.Checkpointer(1000, 3600))
-    os.makedirs("output", exist_ok=True)
+    os.makedirs("results", exist_ok=True)
     # Run until the winner from a generation is able to solve the environment
     # or the user interrupts the process.
     ec = PooledErrorCompute(NUM_CORES, penalize_inactivity=penalize_inactivity)
@@ -163,7 +164,7 @@ def run(penalize_inactivity=False):
             timestr = time.strftime("%Y%m%d-%H%M%S")
             # print(best_gen)
 
-            visualize.plot_stats(stats, ylog=False, view=False, filename=f"output/fitness.svg")
+            visualize.plot_stats(stats, ylog=False, view=False, filename=f"results/fitness.svg")
 
             mfs = sum(stats.get_fitness_stat(max)[-5:]) / 5.0
             print("Average max fitness over last 5 generations: {0}".format(mfs))
@@ -181,7 +182,7 @@ def run(penalize_inactivity=False):
                 best_networks.append(neat.nn.FeedForwardNetwork.create(g, config))
 
             for n, g in enumerate(best_genomes):
-                visualize.draw_net(config, g, view=False, filename=f"output/network_gen{ec.generation}_n_{n}_key{g.key}_fitness{int(g.fitness)}-{timestr}")
+                visualize.draw_net(config, g, view=False, filename=f"results/network_gen{ec.generation}_n_{n}_key{g.key}_fitness{int(g.fitness)}-{timestr}")
 
             solved = False
             best_scores = []
@@ -198,10 +199,10 @@ def run(penalize_inactivity=False):
                 # Save the winners.
                 for n, g in enumerate(best_genomes):
                     name = f'winner-{n}'
-                    with open("output/" + name + '.pickle', 'wb') as f:
+                    with open("results/" + name + '.pickle', 'wb') as f:
                         pickle.dump(g, f)
 
-                    visualize.draw_net(config, g, view=False, filename=f"output/{name}-_gen{ec.generation}_key{g.key}_fitness{int(g.fitness)}_net_solved-{timestr}")
+                    visualize.draw_net(config, g, view=False, filename=f"results/{name}-_gen{ec.generation}_key{g.key}_fitness{int(g.fitness)}_net_solved-{timestr}")
 
                 break
         except KeyboardInterrupt:
@@ -212,4 +213,5 @@ def run(penalize_inactivity=False):
 
 
 if __name__ == '__main__':
-    run(penalize_inactivity=True)
+#    run(config_file="config-lander-continuous",penalize_inactivity=False)
+    run(config_file="config",penalize_inactivity=False)
diff --git a/neat/config.py b/neat/config.py
index b589b99e..054d6b41 100644
--- a/neat/config.py
+++ b/neat/config.py
@@ -126,7 +126,7 @@ class Config(object):
                 ConfigParameter('fitness_threshold', float),
                 ConfigParameter('reset_on_extinction', bool),
                 ConfigParameter('no_fitness_termination', bool, False),
-                ConfigParameter('trim_dangling_after_n_generations_wo_improvment', int, -1)]
+                ]
 
     def __init__(self, genome_type, reproduction_type, species_set_type, stagnation_type, filename, config_information=None):
         # Check that the provided types have the required methods.
diff --git a/neat/reproduction.py b/neat/reproduction.py
index 1c7c48d7..dd02fc2f 100644
--- a/neat/reproduction.py
+++ b/neat/reproduction.py
@@ -28,7 +28,9 @@ def parse_config(cls, param_dict):
         return DefaultClassConfig(param_dict,
                                   [ConfigParameter('elitism', int, 0),
                                    ConfigParameter('survival_threshold', float, 0.2),
-                                   ConfigParameter('min_species_size', int, 1)])
+                                   ConfigParameter('min_species_size', int, 1),
+                                   ConfigParameter('trim_dangling_after_n_generations_wo_improvment', int, -1)
+                                   ])
 
     def __init__(self, config, reporters, stagnation):
         # pylint: disable=super-init-not-called
@@ -97,9 +99,9 @@ def reproduce(self, config, species, pop_size, generation):
         all_fitnesses = []
         remaining_species = []
 
-        if config.trim_dangling_after_n_generations_wo_improvment >= 0:
+        if self.reproduction_config.trim_dangling_after_n_generations_wo_improvment >= 0:
             for specie in species.species.values():
-                if  generation - specie.last_improved > config.trim_dangling_after_n_generations_wo_improvment: 
+                if  generation - specie.last_improved > self.reproduction_config.trim_dangling_after_n_generations_wo_improvment: 
                     for member in specie.members.values():
                         member.trim_network(config.genome_config)
 

From 72f04f216b65af1ce849a1d1f5a1591a830f1491 Mon Sep 17 00:00:00 2001
From: markste-in <mail@markste.in>
Date: Sun, 9 Jun 2024 10:10:23 -0400
Subject: [PATCH 10/11] removed obsolete line

---
 examples/openai-lander/evolve.py | 42 +++++++++++++++++++-------------
 1 file changed, 25 insertions(+), 17 deletions(-)

diff --git a/examples/openai-lander/evolve.py b/examples/openai-lander/evolve.py
index 4ad53802..411a0778 100644
--- a/examples/openai-lander/evolve.py
+++ b/examples/openai-lander/evolve.py
@@ -23,8 +23,6 @@
 
 NUM_CORES = multiprocessing.cpu_count()
 
-env = gym.make('LunarLander-v2')
-
 # env = gym.make('LunarLanderContinuous-v2')
 # print("action space: {0!r}".format(env.action_space))
 # print("observation space: {0!r}".format(env.observation_space))
@@ -69,8 +67,8 @@ def compute_action_box(net, observation):
     return action, norm
 
 
-def compute_fitness(genome, net, min_reward, max_reward, num_episodes = 3,penalize_inactivity = False ):
-
+def compute_fitness(genome, net, min_reward, max_reward, env_name, num_episodes = 3,penalize_inactivity = False ):
+    env = gym.make(env_name)
     if isinstance(env.action_space, gym.spaces.Discrete):
         compute_action = compute_action_discrete
     else:
@@ -90,19 +88,19 @@ def compute_fitness(genome, net, min_reward, max_reward, num_episodes = 3,penali
             total_reward += reward
             if terminated or done:
                 break
-
+    env.close()
     return total_reward / num_episodes
 
 
 class PooledErrorCompute(object):
-    def __init__(self, num_workers, penalize_inactivity = False):
+    def __init__(self, num_workers, env_name, penalize_inactivity = False):
         self.num_workers = num_workers
         self.test_episodes = []
         self.generation = 0
 
         self.min_reward = -200
         self.max_reward = 200
-
+        self.env_name = env_name
         self.episode_score = []
         self.episode_length = []
         self.penalize_inactivity = penalize_inactivity
@@ -121,15 +119,27 @@ def evaluate_genomes(self, genomes, config):
 
         if self.num_workers < 2:
             for genome, net in nets:
-                reward = compute_fitness(genome, net, self.min_reward, self.max_reward, self.num_episodes, self.penalize_inactivity)
+                reward = compute_fitness(genome, 
+                                        net, 
+                                        self.min_reward, 
+                                        self.max_reward, 
+                                        env_name=self.env_name, 
+                                        num_episodes=self.num_episodes, 
+                                        penalize_inactivity=self.penalize_inactivity)
                 genome.fitness = reward
         else:
             with multiprocessing.Pool(self.num_workers) as pool:
                 jobs = []
                 for genome, net in nets:
-                    jobs.append(pool.apply_async(compute_fitness,
-                                                 (genome, net,
-                                                  self.min_reward, self.max_reward, self.num_episodes, self.penalize_inactivity)))
+                    jobs.append(pool.apply_async(compute_fitness, (genome, 
+                                                                net, 
+                                                                self.min_reward, 
+                                                                self.max_reward, 
+                                                                self.env_name, 
+                                                                self.num_episodes, 
+                                                                self.penalize_inactivity)
+                                                )
+                                )
 
                 for job, (genome_id, genome) in zip(jobs, genomes):
                     reward = job.get(timeout=None)
@@ -138,7 +148,7 @@ def evaluate_genomes(self, genomes, config):
         print("final fitness compute time {0}\n".format(time.time() - t0))
 
 
-def run(config_file = "config", penalize_inactivity=False):
+def run(env_name, config_file = "config", penalize_inactivity=False):
     # Load the config file, which is assumed to live in
     # the same directory as this script.
     local_dir = os.path.dirname(__file__)
@@ -156,7 +166,7 @@ def run(config_file = "config", penalize_inactivity=False):
     os.makedirs("results", exist_ok=True)
     # Run until the winner from a generation is able to solve the environment
     # or the user interrupts the process.
-    ec = PooledErrorCompute(NUM_CORES, penalize_inactivity=penalize_inactivity)
+    ec = PooledErrorCompute(NUM_CORES, env_name=env_name, penalize_inactivity=penalize_inactivity)
     while 1:
         try:
             best_gen = pop.run(ec.evaluate_genomes, 10)
@@ -189,7 +199,7 @@ def run(config_file = "config", penalize_inactivity=False):
             avg_score = 0
             ColorPrint.print_warn("### Checking current best model over 20x rounds ###")
             for k in range(20):
-                best_scores.append(compute_fitness(best_gen, best_net, ec.min_reward, ec.max_reward, ec.num_episodes, ec.penalize_inactivity))
+                best_scores.append(compute_fitness(best_gen, best_net, ec.min_reward, ec.max_reward, env_name, ec.num_episodes, ec.penalize_inactivity))
             avg_score = sum(best_scores) / len(best_scores)
             ColorPrint.print_warn(avg_score)
 
@@ -209,9 +219,7 @@ def run(config_file = "config", penalize_inactivity=False):
             print("User break.")
             break
 
-    env.close()
-
 
 if __name__ == '__main__':
 #    run(config_file="config-lander-continuous",penalize_inactivity=False)
-    run(config_file="config",penalize_inactivity=False)
+    run(env_name = 'LunarLander-v2', config_file="config",penalize_inactivity=False)

From 0175ab1deeb8173865bf5727ec12a79d6dbafd8d Mon Sep 17 00:00:00 2001
From: markste-in <mail@markste.in>
Date: Sun, 9 Jun 2024 10:19:33 -0400
Subject: [PATCH 11/11] little fix for continuous lunarlander

---
 examples/openai-lander/config    | 2 +-
 examples/openai-lander/evolve.py | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/examples/openai-lander/config b/examples/openai-lander/config
index 089b5d32..c0fdb68d 100644
--- a/examples/openai-lander/config
+++ b/examples/openai-lander/config
@@ -64,7 +64,7 @@ enabled_mutate_rate     = 0.01
 compatibility_threshold = 3.0
 
 [DefaultStagnation]
-species_fitness_func = max
+species_fitness_func = mean
 max_stagnation       = 50
 species_elitism      = 2
 
diff --git a/examples/openai-lander/evolve.py b/examples/openai-lander/evolve.py
index 411a0778..cad4934d 100644
--- a/examples/openai-lander/evolve.py
+++ b/examples/openai-lander/evolve.py
@@ -221,5 +221,5 @@ def run(env_name, config_file = "config", penalize_inactivity=False):
 
 
 if __name__ == '__main__':
-#    run(config_file="config-lander-continuous",penalize_inactivity=False)
-    run(env_name = 'LunarLander-v2', config_file="config",penalize_inactivity=False)
+    run(env_name = 'LunarLanderContinuous-v2', config_file="config-lander-continuous",penalize_inactivity=False)
+#    run(env_name = 'LunarLander-v2', config_file="config",penalize_inactivity=False)