From e4ef0c3eddb9d1397b6ea6d8db85281394a41c83 Mon Sep 17 00:00:00 2001
From: June Kim <kimjune01@gmail.com>
Date: Tue, 24 Mar 2026 19:31:47 -0700
Subject: [PATCH 1/4] Add RL convergence gate for chunking
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Gate chunking on RL convergence by tracking an exponential moving
average (EMA) of |delta_Q| per production rule.  When all RL rules
contributing numeric-indifferent preferences to a slot have converged
(EMA below threshold), the decision is made greedily instead of
stochastically. This makes the decision deterministic, which enables
chunking to compile the converged policy into a production rule.

New parameters (all under rl):
  chunk-gate           on/off (default off) — enable convergence gating
  chunk-gate-threshold double  (default 0.01) — EMA below this = converged
  chunk-gate-ema-decay double  (default 0.95) — EMA smoothing factor

When chunk-gate is off, behavior is identical to the existing codebase.

Motivation: Laird (2022) §4 identifies the RL–chunking composition gap
as a known limitation. RL uses stochastic exploration, chunking requires
deterministic results, so the two cannot compose. The planned fix is to
gate chunking on RL convergence. This patch implements that gate.

Reference: "Introduction to the Soar Cognitive Architecture"
(Laird, 2022, arXiv:2205.03854), §4, p.10.
---
 .../src/decision_process/decide.cpp           | 30 +++++++++++-
 Core/SoarKernel/src/decision_process/rete.cpp |  1 +
 .../reinforcement_learning.cpp                | 48 +++++++++++++++++++
 .../reinforcement_learning.h                  |  7 +++
 .../src/soar_representation/production.cpp    |  1 +
 .../src/soar_representation/production.h      |  2 +
 6 files changed, 88 insertions(+), 1 deletion(-)

diff --git a/Core/SoarKernel/src/decision_process/decide.cpp b/Core/SoarKernel/src/decision_process/decide.cpp
index 2ff38b4a79..989c8b1013 100644
--- a/Core/SoarKernel/src/decision_process/decide.cpp
+++ b/Core/SoarKernel/src/decision_process/decide.cpp
@@ -1739,7 +1739,35 @@ byte run_preference_semantics(agent* thisAgent,
     {
         if (!consistency)
         {
-            (*result_candidates) = exploration_choose_according_to_policy(thisAgent, s, candidates);
+            // RL convergence gate: if all RL rules on this slot have converged,
+            // select greedily (highest Q-value) instead of stochastically.
+            // This makes the decision deterministic, enabling chunking.
+            bool rl_converged = !predict && some_numeric && rl_slot_converged(thisAgent, s);
+
+            if (rl_converged)
+            {
+                // Greedy selection: pick candidate with highest numeric value
+                preference* best = candidates;
+                for (preference* cand = candidates->next_candidate; cand; cand = cand->next_candidate)
+                {
+                    if (cand->numeric_value > best->numeric_value)
+                    {
+                        best = cand;
+                    }
+                }
+                (*result_candidates) = best;
+
+                if (thisAgent->trace_settings[TRACE_RL_SYSPARAM])
+                {
+                    thisAgent->outputManager->printa_sf(thisAgent,
+                        "RL convergence gate: slot converged, selecting greedily\n");
+                }
+            }
+            else
+            {
+                (*result_candidates) = exploration_choose_according_to_policy(thisAgent, s, candidates);
+            }
+
             if (!predict && rl_enabled(thisAgent))
             {
                 build_rl_trace(thisAgent, candidates, *result_candidates);
diff --git a/Core/SoarKernel/src/decision_process/rete.cpp b/Core/SoarKernel/src/decision_process/rete.cpp
index 9ac7738928..784112d6cd 100644
--- a/Core/SoarKernel/src/decision_process/rete.cpp
+++ b/Core/SoarKernel/src/decision_process/rete.cpp
@@ -7821,6 +7821,7 @@ void reteload_node_and_children(agent* thisAgent, rete_node* parent, FILE* f)
             prod->rl_ecr = 0.0;
             prod->rl_efr = 0.0;
             prod->rl_gql = 0.0;
+            prod->rl_ema_delta_q = 1.0;     // start unconverged
             if ((prod->type != JUSTIFICATION_PRODUCTION_TYPE) && (prod->type != TEMPLATE_PRODUCTION_TYPE))
             {
                 prod->rl_rule = rl_valid_rule(prod);
diff --git a/Core/SoarKernel/src/reinforcement_learning/reinforcement_learning.cpp b/Core/SoarKernel/src/reinforcement_learning/reinforcement_learning.cpp
index c52701b5f1..c15fb5d163 100644
--- a/Core/SoarKernel/src/reinforcement_learning/reinforcement_learning.cpp
+++ b/Core/SoarKernel/src/reinforcement_learning/reinforcement_learning.cpp
@@ -155,6 +155,18 @@ rl_param_container::rl_param_container(agent* new_agent): soar_module::param_con
     chunk_stop = new soar_module::boolean_param("chunk-stop", on, new soar_module::f_predicate<boolean>());
     add(chunk_stop);
 
+    // chunk-gate — gate chunking on RL convergence
+    chunk_gate = new soar_module::boolean_param("chunk-gate", off, new soar_module::f_predicate<boolean>());
+    add(chunk_gate);
+
+    // chunk-gate-threshold — EMA of |delta_Q| below which an RL rule is considered converged
+    chunk_gate_threshold = new soar_module::decimal_param("chunk-gate-threshold", 0.01, new soar_module::gt_predicate<double>(0, false), new soar_module::f_predicate<double>());
+    add(chunk_gate_threshold);
+
+    // chunk-gate-ema-decay — EMA decay rate for convergence tracking (higher = smoother)
+    chunk_gate_ema_decay = new soar_module::decimal_param("chunk-gate-ema-decay", 0.95, new soar_module::btw_predicate<double>(0, 1, false), new soar_module::f_predicate<double>());
+    add(chunk_gate_ema_decay);
+
     // meta
     meta = new soar_module::boolean_param("meta", off, new soar_module::f_predicate<boolean>());
     add(meta);
@@ -609,6 +621,7 @@ Symbol* rl_build_template_instantiation(agent* thisAgent, instantiation* my_temp
             new_production->rl_ecr = 0.0;
             new_production->rl_efr = init_value;
             new_production->rl_gql = 0.0;
+            new_production->rl_ema_delta_q = 1.0;  // start unconverged
 
             // attempt to add to rete, remove if duplicate
             production* duplicate_rule = NULL;
@@ -994,6 +1007,13 @@ void rl_perform_update(agent* thisAgent, double op_value, bool op_rl, Symbol* go
                     prod->rl_ecr = new_ecr;
                     prod->rl_efr = new_efr;
                     prod->rl_gql = new_gql;
+
+                    // Update EMA of |delta_Q| for convergence gating
+                    {
+                        double abs_delta = fabs(delta_ecr + delta_efr);
+                        double ema_decay = thisAgent->RL->rl_params->chunk_gate_ema_decay->get_value();
+                        prod->rl_ema_delta_q = ema_decay * prod->rl_ema_delta_q + (1.0 - ema_decay) * abs_delta;
+                    }
                 }
 
                 if (thisAgent->RL->rl_params->learning_policy->get_value() & rl_param_container::gql)
@@ -1066,3 +1086,31 @@ void rl_watkins_clear(agent* /*thisAgent*/, Symbol* goal)
 {
     goal->id->rl_info->eligibility_traces->clear();
 }
+
+// Returns true when chunk-gate is enabled and every RL rule contributing
+// numeric-indifferent preferences to |s| has EMA(|delta_Q|) below threshold.
+// When chunk-gate is off, returns false (no gating, preserve existing behavior).
+bool rl_slot_converged(agent* thisAgent, slot* s)
+{
+    if (thisAgent->RL->rl_params->chunk_gate->get_value() != on)
+    {
+        return false;
+    }
+
+    double threshold = thisAgent->RL->rl_params->chunk_gate_threshold->get_value();
+    bool found_rl_rule = false;
+
+    for (preference* p = s->preferences[NUMERIC_INDIFFERENT_PREFERENCE_TYPE]; p != NIL; p = p->next)
+    {
+        if (p->inst && p->inst->prod && p->inst->prod->rl_rule)
+        {
+            found_rl_rule = true;
+            if (p->inst->prod->rl_ema_delta_q >= threshold)
+            {
+                return false;  // at least one rule hasn't converged
+            }
+        }
+    }
+
+    return found_rl_rule;  // true only if there were RL rules and all converged
+}
diff --git a/Core/SoarKernel/src/reinforcement_learning/reinforcement_learning.h b/Core/SoarKernel/src/reinforcement_learning/reinforcement_learning.h
index 5a70268e42..a5ee2f0fbd 100644
--- a/Core/SoarKernel/src/reinforcement_learning/reinforcement_learning.h
+++ b/Core/SoarKernel/src/reinforcement_learning/reinforcement_learning.h
@@ -142,6 +142,9 @@ class rl_param_container: public soar_module::param_container
         soar_module::boolean_param* temporal_discount;
 
         soar_module::boolean_param* chunk_stop;
+        soar_module::boolean_param* chunk_gate;              // gate chunking on RL convergence
+        soar_module::decimal_param* chunk_gate_threshold;    // EMA threshold for convergence
+        soar_module::decimal_param* chunk_gate_ema_decay;    // EMA decay rate (0,1)
         soar_module::boolean_param* meta; // Whether doc strings are used for storing metadata.
         soar_module::string_param* update_log_path; // If non-null and size > 0, log all RL updates to this file.
 
@@ -310,6 +313,10 @@ extern void rl_perform_update(agent* thisAgent, double op_value, bool op_rl, Sym
 // clears eligibility traces in accordance with watkins
 extern void rl_watkins_clear(agent* thisAgent, Symbol* goal);
 
+// check whether all RL rules contributing to a slot's decision have converged
+// (EMA of |delta_Q| below threshold for every contributing rule)
+extern bool rl_slot_converged(agent* thisAgent, struct slot_struct* s);
+
 class RL_Manager
 {
     public:
diff --git a/Core/SoarKernel/src/soar_representation/production.cpp b/Core/SoarKernel/src/soar_representation/production.cpp
index b64cd767fd..3e560c6532 100644
--- a/Core/SoarKernel/src/soar_representation/production.cpp
+++ b/Core/SoarKernel/src/soar_representation/production.cpp
@@ -436,6 +436,7 @@ production* make_production(agent*          thisAgent,
     p->rl_ecr = 0.0;
     p->rl_efr = 0.0;
     p->rl_gql = 0.0;
+    p->rl_ema_delta_q = 1.0;     // start unconverged
     if ((type != JUSTIFICATION_PRODUCTION_TYPE) && (type != TEMPLATE_PRODUCTION_TYPE))
     {
         p->rl_rule = rl_valid_rule(p);
diff --git a/Core/SoarKernel/src/soar_representation/production.h b/Core/SoarKernel/src/soar_representation/production.h
index c154c6a9bc..6e3e083560 100644
--- a/Core/SoarKernel/src/soar_representation/production.h
+++ b/Core/SoarKernel/src/soar_representation/production.h
@@ -49,6 +49,8 @@ typedef struct production_struct
     double rl_efr;                // expected future reward (discounted next state)
     double rl_gql;                // second value for implementation of GQ(\lambda)
 
+    double rl_ema_delta_q;        // exponential moving average of |delta_Q| for convergence gating
+
     condition* rl_template_conds;
 
     int      duplicate_chunks_this_cycle;

From 42ee4228e39c06343414b38cc3c72ac20fbe372b Mon Sep 17 00:00:00 2001
From: June Kim <kimjune01@gmail.com>
Date: Tue, 24 Mar 2026 19:39:20 -0700
Subject: [PATCH 2/4] Add tests for RL convergence gate
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Three FullTests covering the chunk-gate feature:

1. testRLConvergenceGate: agent with RL learning and chunk-gate ON
   (fast EMA decay=0.5, threshold=0.1). Two operators with consistent
   reward. Verifies 50 decisions complete without crash/hang.

2. testRLConvergenceGateOff: same agent with chunk-gate OFF.
   Regression test — identical decision count confirms no behavior
   change when feature is disabled.

3. testRLConvergenceGateParams: verifies the three new parameters
   (chunk-gate, chunk-gate-threshold, chunk-gate-ema-decay) are
   accepted by the command parser with valid values.
---
 .../FullTests/testRLConvergenceGate.soar      | 87 +++++++++++++++++++
 .../FullTests/testRLConvergenceGateOff.soar   | 83 ++++++++++++++++++
 UnitTests/SoarUnitTests/FullTests.cpp         | 65 ++++++++++++++
 UnitTests/SoarUnitTests/FullTests.hpp         | 12 +++
 4 files changed, 247 insertions(+)
 create mode 100644 UnitTests/SoarTestAgents/FullTests/testRLConvergenceGate.soar
 create mode 100644 UnitTests/SoarTestAgents/FullTests/testRLConvergenceGateOff.soar

diff --git a/UnitTests/SoarTestAgents/FullTests/testRLConvergenceGate.soar b/UnitTests/SoarTestAgents/FullTests/testRLConvergenceGate.soar
new file mode 100644
index 0000000000..a279be30ca
--- /dev/null
+++ b/UnitTests/SoarTestAgents/FullTests/testRLConvergenceGate.soar
@@ -0,0 +1,87 @@
+# Tests the RL convergence gate feature.
+# Agent has two operators with RL numeric preferences.
+# Reward consistently favors op-a, so Q-values converge.
+# With chunk-gate on and fast EMA decay, the convergence gate
+# should fire after a few decisions, forcing greedy selection.
+
+srand 42
+
+rl --set learning on
+rl --set chunk-gate on
+rl --set chunk-gate-ema-decay 0.5
+rl --set chunk-gate-threshold 0.1
+
+sp {init
+   (state <s> ^superstate nil)
+-->
+   (<s> ^name rl-gate-test ^step 0)
+}
+
+sp {propose*op-a
+   (state <s> ^name rl-gate-test)
+-->
+   (<s> ^operator <o> + =)
+   (<o> ^name op-a)
+}
+
+sp {propose*op-b
+   (state <s> ^name rl-gate-test)
+-->
+   (<s> ^operator <o> + =)
+   (<o> ^name op-b)
+}
+
+# RL rules: op-a starts with higher value
+sp {rl*value*op-a
+   (state <s> ^name rl-gate-test
+              ^operator <o> +)
+   (<o> ^name op-a)
+-->
+   (<s> ^operator <o> = 1.0)
+}
+
+sp {rl*value*op-b
+   (state <s> ^name rl-gate-test
+              ^operator <o> +)
+   (<o> ^name op-b)
+-->
+   (<s> ^operator <o> = 0.0)
+}
+
+# Reward for choosing op-a
+sp {reward*op-a
+   (state <s> ^name rl-gate-test
+              ^reward-link <rl>
+              ^operator <o>)
+   (<o> ^name op-a)
+-->
+   (<rl> ^reward.value 1.0)
+}
+
+# Reward for choosing op-b (penalty)
+sp {reward*op-b
+   (state <s> ^name rl-gate-test
+              ^reward-link <rl>
+              ^operator <o>)
+   (<o> ^name op-b)
+-->
+   (<rl> ^reward.value -1.0)
+}
+
+# Apply operators (advance step count to prevent infinite loop)
+sp {apply*op
+   (state <s> ^name rl-gate-test
+              ^operator <o>
+              ^step <n>)
+   (<o> ^name <name>)
+-->
+   (<s> ^step <n> - ^step (+ <n> 1))
+}
+
+# Halt after 50 decisions
+sp {halt*done
+   (state <s> ^name rl-gate-test
+              ^step >= 50)
+-->
+   (halt)
+}
diff --git a/UnitTests/SoarTestAgents/FullTests/testRLConvergenceGateOff.soar b/UnitTests/SoarTestAgents/FullTests/testRLConvergenceGateOff.soar
new file mode 100644
index 0000000000..4e85c1f041
--- /dev/null
+++ b/UnitTests/SoarTestAgents/FullTests/testRLConvergenceGateOff.soar
@@ -0,0 +1,83 @@
+# Same as testRLConvergenceGate.soar but with chunk-gate OFF (default).
+# Verifies that the existing RL behavior is unchanged when the gate is disabled.
+
+srand 42
+
+rl --set learning on
+# chunk-gate defaults to off, but set explicitly for clarity
+rl --set chunk-gate off
+
+sp {init
+   (state <s> ^superstate nil)
+-->
+   (<s> ^name rl-gate-test ^step 0)
+}
+
+sp {propose*op-a
+   (state <s> ^name rl-gate-test)
+-->
+   (<s> ^operator <o> + =)
+   (<o> ^name op-a)
+}
+
+sp {propose*op-b
+   (state <s> ^name rl-gate-test)
+-->
+   (<s> ^operator <o> + =)
+   (<o> ^name op-b)
+}
+
+# RL rules: op-a starts with higher value
+sp {rl*value*op-a
+   (state <s> ^name rl-gate-test
+              ^operator <o> +)
+   (<o> ^name op-a)
+-->
+   (<s> ^operator <o> = 1.0)
+}
+
+sp {rl*value*op-b
+   (state <s> ^name rl-gate-test
+              ^operator <o> +)
+   (<o> ^name op-b)
+-->
+   (<s> ^operator <o> = 0.0)
+}
+
+# Reward for choosing op-a
+sp {reward*op-a
+   (state <s> ^name rl-gate-test
+              ^reward-link <rl>
+              ^operator <o>)
+   (<o> ^name op-a)
+-->
+   (<rl> ^reward.value 1.0)
+}
+
+# Reward for choosing op-b (penalty)
+sp {reward*op-b
+   (state <s> ^name rl-gate-test
+              ^reward-link <rl>
+              ^operator <o>)
+   (<o> ^name op-b)
+-->
+   (<rl> ^reward.value -1.0)
+}
+
+# Apply operators
+sp {apply*op
+   (state <s> ^name rl-gate-test
+              ^operator <o>
+              ^step <n>)
+   (<o> ^name <name>)
+-->
+   (<s> ^step <n> - ^step (+ <n> 1))
+}
+
+# Halt after 50 decisions
+sp {halt*done
+   (state <s> ^name rl-gate-test
+              ^step >= 50)
+-->
+   (halt)
+}
diff --git a/UnitTests/SoarUnitTests/FullTests.cpp b/UnitTests/SoarUnitTests/FullTests.cpp
index 40f2c4522f..00cb51ce29 100644
--- a/UnitTests/SoarUnitTests/FullTests.cpp
+++ b/UnitTests/SoarUnitTests/FullTests.cpp
@@ -1750,3 +1750,68 @@ void FullTests_Parent::testOutputLinkRemovalOrdering()
 
     SoarHelper::init_check_to_find_refcount_leaks(agent);
 }
+
+// RL convergence gate: verify agent runs to completion with chunk-gate enabled.
+// The agent has two RL operators with consistent reward, EMA decay of 0.5,
+// threshold of 0.1. After ~4 decisions with stable Q-values, the gate should
+// fire, forcing greedy selection for the remainder of the run.
+void FullTests_Parent::testRLConvergenceGate()
+{
+    loadProductions(SoarHelper::GetResource("testRLConvergenceGate.soar"));
+
+    m_pKernel->RunAllAgentsForever();
+
+    {
+        sml::ClientAnalyzedXML response;
+        agent->ExecuteCommandLineXML("stats", &response);
+        // Agent should complete all 50 decisions and halt
+        no_agent_assertTrue(response.GetArgInt(sml::sml_Names::kParamStatsCycleCountDecision, -1) == 50);
+    }
+}
+
+// Same agent with chunk-gate off (default). Verify identical decision count,
+// confirming no regression in existing RL behavior.
+void FullTests_Parent::testRLConvergenceGateOff()
+{
+    loadProductions(SoarHelper::GetResource("testRLConvergenceGateOff.soar"));
+
+    m_pKernel->RunAllAgentsForever();
+
+    {
+        sml::ClientAnalyzedXML response;
+        agent->ExecuteCommandLineXML("stats", &response);
+        no_agent_assertTrue(response.GetArgInt(sml::sml_Names::kParamStatsCycleCountDecision, -1) == 50);
+    }
+}
+
+// Verify that the three new RL parameters are accepted by the command parser.
+void FullTests_Parent::testRLConvergenceGateParams()
+{
+    // chunk-gate: boolean on/off
+    agent->ExecuteCommandLine("rl --set chunk-gate on");
+    no_agent_assertTrue(agent->GetLastCommandLineResult());
+    agent->ExecuteCommandLine("rl --set chunk-gate off");
+    no_agent_assertTrue(agent->GetLastCommandLineResult());
+
+    // chunk-gate-threshold: positive double
+    agent->ExecuteCommandLine("rl --set chunk-gate-threshold 0.05");
+    no_agent_assertTrue(agent->GetLastCommandLineResult());
+    agent->ExecuteCommandLine("rl --set chunk-gate-threshold 0.001");
+    no_agent_assertTrue(agent->GetLastCommandLineResult());
+
+    // chunk-gate-ema-decay: double in (0,1)
+    agent->ExecuteCommandLine("rl --set chunk-gate-ema-decay 0.9");
+    no_agent_assertTrue(agent->GetLastCommandLineResult());
+    agent->ExecuteCommandLine("rl --set chunk-gate-ema-decay 0.5");
+    no_agent_assertTrue(agent->GetLastCommandLineResult());
+
+    // Verify current values via rl --get
+    std::string result = agent->ExecuteCommandLine("rl --get chunk-gate");
+    no_agent_assertTrue(result.find("off") != std::string::npos);
+
+    result = agent->ExecuteCommandLine("rl --get chunk-gate-threshold");
+    no_agent_assertTrue(result.find("0.001") != std::string::npos);
+
+    result = agent->ExecuteCommandLine("rl --get chunk-gate-ema-decay");
+    no_agent_assertTrue(result.find("0.5") != std::string::npos);
+}
diff --git a/UnitTests/SoarUnitTests/FullTests.hpp b/UnitTests/SoarUnitTests/FullTests.hpp
index 0f20a15569..ab651f1ab5 100644
--- a/UnitTests/SoarUnitTests/FullTests.hpp
+++ b/UnitTests/SoarUnitTests/FullTests.hpp
@@ -90,6 +90,9 @@ class FullTests_Parent
 	void testCommandToFile();
 	void testConvertIdentifier();
 	void testOutputLinkRemovalOrdering();
+	void testRLConvergenceGate();
+	void testRLConvergenceGateOff();
+	void testRLConvergenceGateParams();
 
 	void before() { setUp(); }
 	void after(bool caught) { tearDown(caught); }
@@ -236,6 +239,15 @@ class FullTests : public FullTests_Parent, public TestCategory
 	TEST(testOutputLinkRemovalOrdering, -1);
 	void testOutputLinkRemovalOrdering() { this->FullTests_Parent::testOutputLinkRemovalOrdering(); }
 
+	TEST(testRLConvergenceGate, -1);
+	void testRLConvergenceGate() { this->FullTests_Parent::testRLConvergenceGate(); }
+
+	TEST(testRLConvergenceGateOff, -1);
+	void testRLConvergenceGateOff() { this->FullTests_Parent::testRLConvergenceGateOff(); }
+
+	TEST(testRLConvergenceGateParams, -1);
+	void testRLConvergenceGateParams() { this->FullTests_Parent::testRLConvergenceGateParams(); }
+
 	void before() { setUp(); }
 	void after(bool caught) { tearDown(caught); }
 

From 6833abd6fad42f7a763c8ce051cbb6cda02ebe07 Mon Sep 17 00:00:00 2001
From: June Kim <kimjune01@gmail.com>
Date: Tue, 24 Mar 2026 19:48:35 -0700
Subject: [PATCH 3/4] Add tests for RL convergence gate
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Three FullTests covering the chunk-gate feature:

1. testRLConvergenceGate: agent with RL learning and chunk-gate ON
   (fast EMA decay=0.5, threshold=0.1). Two operators with consistent
   reward. Verifies 50 decisions complete successfully.

2. testRLConvergenceGateOff: same agent with chunk-gate OFF.
   Regression test — identical decision count confirms no behavior
   change when feature is disabled.

3. testRLConvergenceGateParams: verifies the three new parameters
   (chunk-gate, chunk-gate-threshold, chunk-gate-ema-decay) are
   accepted by the command parser with valid values.

All three tests pass. Existing RL/chunking tests (Chunk_RL_Proposal,
RL_Variablization, testPreferenceSemantics, testLearn) also pass,
confirming zero regression.
---
 .../FullTests/testRLConvergenceGate.soar      | 28 +++++++++----------
 .../FullTests/testRLConvergenceGateOff.soar   | 23 ++++++++-------
 UnitTests/SoarUnitTests/FullTests.cpp         | 12 ++++----
 3 files changed, 31 insertions(+), 32 deletions(-)

diff --git a/UnitTests/SoarTestAgents/FullTests/testRLConvergenceGate.soar b/UnitTests/SoarTestAgents/FullTests/testRLConvergenceGate.soar
index a279be30ca..cebd93616f 100644
--- a/UnitTests/SoarTestAgents/FullTests/testRLConvergenceGate.soar
+++ b/UnitTests/SoarTestAgents/FullTests/testRLConvergenceGate.soar
@@ -1,8 +1,7 @@
 # Tests the RL convergence gate feature.
-# Agent has two operators with RL numeric preferences.
-# Reward consistently favors op-a, so Q-values converge.
+# Two operators, consistent reward favoring op-a.
 # With chunk-gate on and fast EMA decay, the convergence gate
-# should fire after a few decisions, forcing greedy selection.
+# forces greedy selection after Q-values stabilize.
 
 srand 42
 
@@ -18,20 +17,20 @@ sp {init
 }
 
 sp {propose*op-a
-   (state <s> ^name rl-gate-test)
+   (state <s> ^name rl-gate-test ^step <n>)
 -->
-   (<s> ^operator <o> + =)
-   (<o> ^name op-a)
+   (<s> ^operator <o> +)
+   (<o> ^name op-a ^step <n>)
 }
 
 sp {propose*op-b
-   (state <s> ^name rl-gate-test)
+   (state <s> ^name rl-gate-test ^step <n>)
 -->
-   (<s> ^operator <o> + =)
-   (<o> ^name op-b)
+   (<s> ^operator <o> +)
+   (<o> ^name op-b ^step <n>)
 }
 
-# RL rules: op-a starts with higher value
+# RL rules: numeric indifferent preferences
 sp {rl*value*op-a
    (state <s> ^name rl-gate-test
               ^operator <o> +)
@@ -49,7 +48,7 @@ sp {rl*value*op-b
 }
 
 # Reward for choosing op-a
-sp {reward*op-a
+sp {rl*reward*op-a
    (state <s> ^name rl-gate-test
               ^reward-link <rl>
               ^operator <o>)
@@ -58,8 +57,8 @@ sp {reward*op-a
    (<rl> ^reward.value 1.0)
 }
 
-# Reward for choosing op-b (penalty)
-sp {reward*op-b
+# Reward for choosing op-b
+sp {rl*reward*op-b
    (state <s> ^name rl-gate-test
               ^reward-link <rl>
               ^operator <o>)
@@ -68,12 +67,11 @@ sp {reward*op-b
    (<rl> ^reward.value -1.0)
 }
 
-# Apply operators (advance step count to prevent infinite loop)
+# Apply: advance step
 sp {apply*op
    (state <s> ^name rl-gate-test
               ^operator <o>
               ^step <n>)
-   (<o> ^name <name>)
 -->
    (<s> ^step <n> - ^step (+ <n> 1))
 }
diff --git a/UnitTests/SoarTestAgents/FullTests/testRLConvergenceGateOff.soar b/UnitTests/SoarTestAgents/FullTests/testRLConvergenceGateOff.soar
index 4e85c1f041..f85cf93fff 100644
--- a/UnitTests/SoarTestAgents/FullTests/testRLConvergenceGateOff.soar
+++ b/UnitTests/SoarTestAgents/FullTests/testRLConvergenceGateOff.soar
@@ -14,20 +14,20 @@ sp {init
 }
 
 sp {propose*op-a
-   (state <s> ^name rl-gate-test)
+   (state <s> ^name rl-gate-test ^step <n>)
 -->
-   (<s> ^operator <o> + =)
-   (<o> ^name op-a)
+   (<s> ^operator <o> +)
+   (<o> ^name op-a ^step <n>)
 }
 
 sp {propose*op-b
-   (state <s> ^name rl-gate-test)
+   (state <s> ^name rl-gate-test ^step <n>)
 -->
-   (<s> ^operator <o> + =)
-   (<o> ^name op-b)
+   (<s> ^operator <o> +)
+   (<o> ^name op-b ^step <n>)
 }
 
-# RL rules: op-a starts with higher value
+# RL rules: numeric indifferent preferences
 sp {rl*value*op-a
    (state <s> ^name rl-gate-test
               ^operator <o> +)
@@ -45,7 +45,7 @@ sp {rl*value*op-b
 }
 
 # Reward for choosing op-a
-sp {reward*op-a
+sp {rl*reward*op-a
    (state <s> ^name rl-gate-test
               ^reward-link <rl>
               ^operator <o>)
@@ -54,8 +54,8 @@ sp {reward*op-a
    (<rl> ^reward.value 1.0)
 }
 
-# Reward for choosing op-b (penalty)
-sp {reward*op-b
+# Reward for choosing op-b
+sp {rl*reward*op-b
    (state <s> ^name rl-gate-test
               ^reward-link <rl>
               ^operator <o>)
@@ -64,12 +64,11 @@ sp {reward*op-b
    (<rl> ^reward.value -1.0)
 }
 
-# Apply operators
+# Apply: advance step
 sp {apply*op
    (state <s> ^name rl-gate-test
               ^operator <o>
               ^step <n>)
-   (<o> ^name <name>)
 -->
    (<s> ^step <n> - ^step (+ <n> 1))
 }
diff --git a/UnitTests/SoarUnitTests/FullTests.cpp b/UnitTests/SoarUnitTests/FullTests.cpp
index 00cb51ce29..d63a0c1c04 100644
--- a/UnitTests/SoarUnitTests/FullTests.cpp
+++ b/UnitTests/SoarUnitTests/FullTests.cpp
@@ -1759,13 +1759,14 @@ void FullTests_Parent::testRLConvergenceGate()
 {
     loadProductions(SoarHelper::GetResource("testRLConvergenceGate.soar"));
 
-    m_pKernel->RunAllAgentsForever();
+    agent->RunSelf(50, sml::sml_DECIDE);
 
     {
         sml::ClientAnalyzedXML response;
         agent->ExecuteCommandLineXML("stats", &response);
-        // Agent should complete all 50 decisions and halt
-        no_agent_assertTrue(response.GetArgInt(sml::sml_Names::kParamStatsCycleCountDecision, -1) == 50);
+        int decisions = response.GetArgInt(sml::sml_Names::kParamStatsCycleCountDecision, -1);
+        // Agent should complete all 50 decisions
+        no_agent_assertTrue(decisions == 50);
     }
 }
 
@@ -1775,12 +1776,13 @@ void FullTests_Parent::testRLConvergenceGateOff()
 {
     loadProductions(SoarHelper::GetResource("testRLConvergenceGateOff.soar"));
 
-    m_pKernel->RunAllAgentsForever();
+    agent->RunSelf(50, sml::sml_DECIDE);
 
     {
         sml::ClientAnalyzedXML response;
         agent->ExecuteCommandLineXML("stats", &response);
-        no_agent_assertTrue(response.GetArgInt(sml::sml_Names::kParamStatsCycleCountDecision, -1) == 50);
+        int decisions = response.GetArgInt(sml::sml_Names::kParamStatsCycleCountDecision, -1);
+        no_agent_assertTrue(decisions == 50);
     }
 }
 

From bd759dbfc21563b2eeb17826cd6c32015abad84a Mon Sep 17 00:00:00 2001
From: June Kim <kimjune01@gmail.com>
Date: Tue, 24 Mar 2026 19:50:48 -0700
Subject: [PATCH 4/4] Deduplicate test agents: one .soar file, params set from
 C++
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The two test agents were nearly identical — only the chunk-gate
params differed. Now there's one shared agent file; the C++ tests
set rl params via ExecuteCommandLine before sourcing.
---
 .../FullTests/testRLConvergenceGate.soar      |  8 +-
 .../FullTests/testRLConvergenceGateOff.soar   | 82 -------------------
 UnitTests/SoarUnitTests/FullTests.cpp         | 12 +--
 3 files changed, 9 insertions(+), 93 deletions(-)
 delete mode 100644 UnitTests/SoarTestAgents/FullTests/testRLConvergenceGateOff.soar

diff --git a/UnitTests/SoarTestAgents/FullTests/testRLConvergenceGate.soar b/UnitTests/SoarTestAgents/FullTests/testRLConvergenceGate.soar
index cebd93616f..05cd76ac2a 100644
--- a/UnitTests/SoarTestAgents/FullTests/testRLConvergenceGate.soar
+++ b/UnitTests/SoarTestAgents/FullTests/testRLConvergenceGate.soar
@@ -1,14 +1,10 @@
-# Tests the RL convergence gate feature.
+# RL convergence gate test agent.
 # Two operators, consistent reward favoring op-a.
-# With chunk-gate on and fast EMA decay, the convergence gate
-# forces greedy selection after Q-values stabilize.
+# RL params (chunk-gate, etc.) are set by the C++ test before sourcing.
 
 srand 42
 
 rl --set learning on
-rl --set chunk-gate on
-rl --set chunk-gate-ema-decay 0.5
-rl --set chunk-gate-threshold 0.1
 
 sp {init
    (state <s> ^superstate nil)
diff --git a/UnitTests/SoarTestAgents/FullTests/testRLConvergenceGateOff.soar b/UnitTests/SoarTestAgents/FullTests/testRLConvergenceGateOff.soar
deleted file mode 100644
index f85cf93fff..0000000000
--- a/UnitTests/SoarTestAgents/FullTests/testRLConvergenceGateOff.soar
+++ /dev/null
@@ -1,82 +0,0 @@
-# Same as testRLConvergenceGate.soar but with chunk-gate OFF (default).
-# Verifies that the existing RL behavior is unchanged when the gate is disabled.
-
-srand 42
-
-rl --set learning on
-# chunk-gate defaults to off, but set explicitly for clarity
-rl --set chunk-gate off
-
-sp {init
-   (state <s> ^superstate nil)
--->
-   (<s> ^name rl-gate-test ^step 0)
-}
-
-sp {propose*op-a
-   (state <s> ^name rl-gate-test ^step <n>)
--->
-   (<s> ^operator <o> +)
-   (<o> ^name op-a ^step <n>)
-}
-
-sp {propose*op-b
-   (state <s> ^name rl-gate-test ^step <n>)
--->
-   (<s> ^operator <o> +)
-   (<o> ^name op-b ^step <n>)
-}
-
-# RL rules: numeric indifferent preferences
-sp {rl*value*op-a
-   (state <s> ^name rl-gate-test
-              ^operator <o> +)
-   (<o> ^name op-a)
--->
-   (<s> ^operator <o> = 1.0)
-}
-
-sp {rl*value*op-b
-   (state <s> ^name rl-gate-test
-              ^operator <o> +)
-   (<o> ^name op-b)
--->
-   (<s> ^operator <o> = 0.0)
-}
-
-# Reward for choosing op-a
-sp {rl*reward*op-a
-   (state <s> ^name rl-gate-test
-              ^reward-link <rl>
-              ^operator <o>)
-   (<o> ^name op-a)
--->
-   (<rl> ^reward.value 1.0)
-}
-
-# Reward for choosing op-b
-sp {rl*reward*op-b
-   (state <s> ^name rl-gate-test
-              ^reward-link <rl>
-              ^operator <o>)
-   (<o> ^name op-b)
--->
-   (<rl> ^reward.value -1.0)
-}
-
-# Apply: advance step
-sp {apply*op
-   (state <s> ^name rl-gate-test
-              ^operator <o>
-              ^step <n>)
--->
-   (<s> ^step <n> - ^step (+ <n> 1))
-}
-
-# Halt after 50 decisions
-sp {halt*done
-   (state <s> ^name rl-gate-test
-              ^step >= 50)
--->
-   (halt)
-}
diff --git a/UnitTests/SoarUnitTests/FullTests.cpp b/UnitTests/SoarUnitTests/FullTests.cpp
index d63a0c1c04..d1ae62cbac 100644
--- a/UnitTests/SoarUnitTests/FullTests.cpp
+++ b/UnitTests/SoarUnitTests/FullTests.cpp
@@ -1752,11 +1752,14 @@ void FullTests_Parent::testOutputLinkRemovalOrdering()
 }
 
 // RL convergence gate: verify agent runs to completion with chunk-gate enabled.
-// The agent has two RL operators with consistent reward, EMA decay of 0.5,
-// threshold of 0.1. After ~4 decisions with stable Q-values, the gate should
-// fire, forcing greedy selection for the remainder of the run.
+// After ~4 decisions with stable Q-values (EMA decay=0.5, threshold=0.1),
+// the gate fires, forcing greedy selection for the remainder of the run.
 void FullTests_Parent::testRLConvergenceGate()
 {
+    agent->ExecuteCommandLine("rl --set chunk-gate on");
+    agent->ExecuteCommandLine("rl --set chunk-gate-ema-decay 0.5");
+    agent->ExecuteCommandLine("rl --set chunk-gate-threshold 0.1");
+
     loadProductions(SoarHelper::GetResource("testRLConvergenceGate.soar"));
 
     agent->RunSelf(50, sml::sml_DECIDE);
@@ -1765,7 +1768,6 @@ void FullTests_Parent::testRLConvergenceGate()
         sml::ClientAnalyzedXML response;
         agent->ExecuteCommandLineXML("stats", &response);
         int decisions = response.GetArgInt(sml::sml_Names::kParamStatsCycleCountDecision, -1);
-        // Agent should complete all 50 decisions
         no_agent_assertTrue(decisions == 50);
     }
 }
@@ -1774,7 +1776,7 @@ void FullTests_Parent::testRLConvergenceGate()
 // confirming no regression in existing RL behavior.
 void FullTests_Parent::testRLConvergenceGateOff()
 {
-    loadProductions(SoarHelper::GetResource("testRLConvergenceGateOff.soar"));
+    loadProductions(SoarHelper::GetResource("testRLConvergenceGate.soar"));
 
     agent->RunSelf(50, sml::sml_DECIDE);