From e4ef0c3eddb9d1397b6ea6d8db85281394a41c83 Mon Sep 17 00:00:00 2001 From: June Kim Date: Tue, 24 Mar 2026 19:31:47 -0700 Subject: [PATCH 1/4] Add RL convergence gate for chunking MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Gate chunking on RL convergence by tracking an exponential moving average (EMA) of |delta_Q| per production rule. When all RL rules contributing numeric-indifferent preferences to a slot have converged (EMA below threshold), the decision is made greedily instead of stochastically. This makes the decision deterministic, which enables chunking to compile the converged policy into a production rule. New parameters (all under rl): chunk-gate on/off (default off) — enable convergence gating chunk-gate-threshold double (default 0.01) — EMA below this = converged chunk-gate-ema-decay double (default 0.95) — EMA smoothing factor When chunk-gate is off, behavior is identical to the existing codebase. Motivation: Laird (2022) §4 identifies the RL–chunking composition gap as a known limitation. RL uses stochastic exploration, chunking requires deterministic results, so the two cannot compose. The planned fix is to gate chunking on RL convergence. This patch implements that gate. Reference: "Introduction to the Soar Cognitive Architecture" (Laird, 2022, arXiv:2205.03854), §4, p.10. --- .../src/decision_process/decide.cpp | 30 +++++++++++- Core/SoarKernel/src/decision_process/rete.cpp | 1 + .../reinforcement_learning.cpp | 48 +++++++++++++++++++ .../reinforcement_learning.h | 7 +++ .../src/soar_representation/production.cpp | 1 + .../src/soar_representation/production.h | 2 + 6 files changed, 88 insertions(+), 1 deletion(-) diff --git a/Core/SoarKernel/src/decision_process/decide.cpp b/Core/SoarKernel/src/decision_process/decide.cpp index 2ff38b4a79..989c8b1013 100644 --- a/Core/SoarKernel/src/decision_process/decide.cpp +++ b/Core/SoarKernel/src/decision_process/decide.cpp @@ -1739,7 +1739,35 @@ byte run_preference_semantics(agent* thisAgent, { if (!consistency) { - (*result_candidates) = exploration_choose_according_to_policy(thisAgent, s, candidates); + // RL convergence gate: if all RL rules on this slot have converged, + // select greedily (highest Q-value) instead of stochastically. + // This makes the decision deterministic, enabling chunking. + bool rl_converged = !predict && some_numeric && rl_slot_converged(thisAgent, s); + + if (rl_converged) + { + // Greedy selection: pick candidate with highest numeric value + preference* best = candidates; + for (preference* cand = candidates->next_candidate; cand; cand = cand->next_candidate) + { + if (cand->numeric_value > best->numeric_value) + { + best = cand; + } + } + (*result_candidates) = best; + + if (thisAgent->trace_settings[TRACE_RL_SYSPARAM]) + { + thisAgent->outputManager->printa_sf(thisAgent, + "RL convergence gate: slot converged, selecting greedily\n"); + } + } + else + { + (*result_candidates) = exploration_choose_according_to_policy(thisAgent, s, candidates); + } + if (!predict && rl_enabled(thisAgent)) { build_rl_trace(thisAgent, candidates, *result_candidates); diff --git a/Core/SoarKernel/src/decision_process/rete.cpp b/Core/SoarKernel/src/decision_process/rete.cpp index 9ac7738928..784112d6cd 100644 --- a/Core/SoarKernel/src/decision_process/rete.cpp +++ b/Core/SoarKernel/src/decision_process/rete.cpp @@ -7821,6 +7821,7 @@ void reteload_node_and_children(agent* thisAgent, rete_node* parent, FILE* f) prod->rl_ecr = 0.0; prod->rl_efr = 0.0; prod->rl_gql = 0.0; + prod->rl_ema_delta_q = 1.0; // start unconverged if ((prod->type != JUSTIFICATION_PRODUCTION_TYPE) && (prod->type != TEMPLATE_PRODUCTION_TYPE)) { prod->rl_rule = rl_valid_rule(prod); diff --git a/Core/SoarKernel/src/reinforcement_learning/reinforcement_learning.cpp b/Core/SoarKernel/src/reinforcement_learning/reinforcement_learning.cpp index c52701b5f1..c15fb5d163 100644 --- a/Core/SoarKernel/src/reinforcement_learning/reinforcement_learning.cpp +++ b/Core/SoarKernel/src/reinforcement_learning/reinforcement_learning.cpp @@ -155,6 +155,18 @@ rl_param_container::rl_param_container(agent* new_agent): soar_module::param_con chunk_stop = new soar_module::boolean_param("chunk-stop", on, new soar_module::f_predicate()); add(chunk_stop); + // chunk-gate — gate chunking on RL convergence + chunk_gate = new soar_module::boolean_param("chunk-gate", off, new soar_module::f_predicate()); + add(chunk_gate); + + // chunk-gate-threshold — EMA of |delta_Q| below which an RL rule is considered converged + chunk_gate_threshold = new soar_module::decimal_param("chunk-gate-threshold", 0.01, new soar_module::gt_predicate(0, false), new soar_module::f_predicate()); + add(chunk_gate_threshold); + + // chunk-gate-ema-decay — EMA decay rate for convergence tracking (higher = smoother) + chunk_gate_ema_decay = new soar_module::decimal_param("chunk-gate-ema-decay", 0.95, new soar_module::btw_predicate(0, 1, false), new soar_module::f_predicate()); + add(chunk_gate_ema_decay); + // meta meta = new soar_module::boolean_param("meta", off, new soar_module::f_predicate()); add(meta); @@ -609,6 +621,7 @@ Symbol* rl_build_template_instantiation(agent* thisAgent, instantiation* my_temp new_production->rl_ecr = 0.0; new_production->rl_efr = init_value; new_production->rl_gql = 0.0; + new_production->rl_ema_delta_q = 1.0; // start unconverged // attempt to add to rete, remove if duplicate production* duplicate_rule = NULL; @@ -994,6 +1007,13 @@ void rl_perform_update(agent* thisAgent, double op_value, bool op_rl, Symbol* go prod->rl_ecr = new_ecr; prod->rl_efr = new_efr; prod->rl_gql = new_gql; + + // Update EMA of |delta_Q| for convergence gating + { + double abs_delta = fabs(delta_ecr + delta_efr); + double ema_decay = thisAgent->RL->rl_params->chunk_gate_ema_decay->get_value(); + prod->rl_ema_delta_q = ema_decay * prod->rl_ema_delta_q + (1.0 - ema_decay) * abs_delta; + } } if (thisAgent->RL->rl_params->learning_policy->get_value() & rl_param_container::gql) @@ -1066,3 +1086,31 @@ void rl_watkins_clear(agent* /*thisAgent*/, Symbol* goal) { goal->id->rl_info->eligibility_traces->clear(); } + +// Returns true when chunk-gate is enabled and every RL rule contributing +// numeric-indifferent preferences to |s| has EMA(|delta_Q|) below threshold. +// When chunk-gate is off, returns false (no gating, preserve existing behavior). +bool rl_slot_converged(agent* thisAgent, slot* s) +{ + if (thisAgent->RL->rl_params->chunk_gate->get_value() != on) + { + return false; + } + + double threshold = thisAgent->RL->rl_params->chunk_gate_threshold->get_value(); + bool found_rl_rule = false; + + for (preference* p = s->preferences[NUMERIC_INDIFFERENT_PREFERENCE_TYPE]; p != NIL; p = p->next) + { + if (p->inst && p->inst->prod && p->inst->prod->rl_rule) + { + found_rl_rule = true; + if (p->inst->prod->rl_ema_delta_q >= threshold) + { + return false; // at least one rule hasn't converged + } + } + } + + return found_rl_rule; // true only if there were RL rules and all converged +} diff --git a/Core/SoarKernel/src/reinforcement_learning/reinforcement_learning.h b/Core/SoarKernel/src/reinforcement_learning/reinforcement_learning.h index 5a70268e42..a5ee2f0fbd 100644 --- a/Core/SoarKernel/src/reinforcement_learning/reinforcement_learning.h +++ b/Core/SoarKernel/src/reinforcement_learning/reinforcement_learning.h @@ -142,6 +142,9 @@ class rl_param_container: public soar_module::param_container soar_module::boolean_param* temporal_discount; soar_module::boolean_param* chunk_stop; + soar_module::boolean_param* chunk_gate; // gate chunking on RL convergence + soar_module::decimal_param* chunk_gate_threshold; // EMA threshold for convergence + soar_module::decimal_param* chunk_gate_ema_decay; // EMA decay rate (0,1) soar_module::boolean_param* meta; // Whether doc strings are used for storing metadata. soar_module::string_param* update_log_path; // If non-null and size > 0, log all RL updates to this file. @@ -310,6 +313,10 @@ extern void rl_perform_update(agent* thisAgent, double op_value, bool op_rl, Sym // clears eligibility traces in accordance with watkins extern void rl_watkins_clear(agent* thisAgent, Symbol* goal); +// check whether all RL rules contributing to a slot's decision have converged +// (EMA of |delta_Q| below threshold for every contributing rule) +extern bool rl_slot_converged(agent* thisAgent, struct slot_struct* s); + class RL_Manager { public: diff --git a/Core/SoarKernel/src/soar_representation/production.cpp b/Core/SoarKernel/src/soar_representation/production.cpp index b64cd767fd..3e560c6532 100644 --- a/Core/SoarKernel/src/soar_representation/production.cpp +++ b/Core/SoarKernel/src/soar_representation/production.cpp @@ -436,6 +436,7 @@ production* make_production(agent* thisAgent, p->rl_ecr = 0.0; p->rl_efr = 0.0; p->rl_gql = 0.0; + p->rl_ema_delta_q = 1.0; // start unconverged if ((type != JUSTIFICATION_PRODUCTION_TYPE) && (type != TEMPLATE_PRODUCTION_TYPE)) { p->rl_rule = rl_valid_rule(p); diff --git a/Core/SoarKernel/src/soar_representation/production.h b/Core/SoarKernel/src/soar_representation/production.h index c154c6a9bc..6e3e083560 100644 --- a/Core/SoarKernel/src/soar_representation/production.h +++ b/Core/SoarKernel/src/soar_representation/production.h @@ -49,6 +49,8 @@ typedef struct production_struct double rl_efr; // expected future reward (discounted next state) double rl_gql; // second value for implementation of GQ(\lambda) + double rl_ema_delta_q; // exponential moving average of |delta_Q| for convergence gating + condition* rl_template_conds; int duplicate_chunks_this_cycle; From 42ee4228e39c06343414b38cc3c72ac20fbe372b Mon Sep 17 00:00:00 2001 From: June Kim Date: Tue, 24 Mar 2026 19:39:20 -0700 Subject: [PATCH 2/4] Add tests for RL convergence gate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three FullTests covering the chunk-gate feature: 1. testRLConvergenceGate: agent with RL learning and chunk-gate ON (fast EMA decay=0.5, threshold=0.1). Two operators with consistent reward. Verifies 50 decisions complete without crash/hang. 2. testRLConvergenceGateOff: same agent with chunk-gate OFF. Regression test — identical decision count confirms no behavior change when feature is disabled. 3. testRLConvergenceGateParams: verifies the three new parameters (chunk-gate, chunk-gate-threshold, chunk-gate-ema-decay) are accepted by the command parser with valid values. --- .../FullTests/testRLConvergenceGate.soar | 87 +++++++++++++++++++ .../FullTests/testRLConvergenceGateOff.soar | 83 ++++++++++++++++++ UnitTests/SoarUnitTests/FullTests.cpp | 65 ++++++++++++++ UnitTests/SoarUnitTests/FullTests.hpp | 12 +++ 4 files changed, 247 insertions(+) create mode 100644 UnitTests/SoarTestAgents/FullTests/testRLConvergenceGate.soar create mode 100644 UnitTests/SoarTestAgents/FullTests/testRLConvergenceGateOff.soar diff --git a/UnitTests/SoarTestAgents/FullTests/testRLConvergenceGate.soar b/UnitTests/SoarTestAgents/FullTests/testRLConvergenceGate.soar new file mode 100644 index 0000000000..a279be30ca --- /dev/null +++ b/UnitTests/SoarTestAgents/FullTests/testRLConvergenceGate.soar @@ -0,0 +1,87 @@ +# Tests the RL convergence gate feature. +# Agent has two operators with RL numeric preferences. +# Reward consistently favors op-a, so Q-values converge. +# With chunk-gate on and fast EMA decay, the convergence gate +# should fire after a few decisions, forcing greedy selection. + +srand 42 + +rl --set learning on +rl --set chunk-gate on +rl --set chunk-gate-ema-decay 0.5 +rl --set chunk-gate-threshold 0.1 + +sp {init + (state ^superstate nil) +--> + ( ^name rl-gate-test ^step 0) +} + +sp {propose*op-a + (state ^name rl-gate-test) +--> + ( ^operator + =) + ( ^name op-a) +} + +sp {propose*op-b + (state ^name rl-gate-test) +--> + ( ^operator + =) + ( ^name op-b) +} + +# RL rules: op-a starts with higher value +sp {rl*value*op-a + (state ^name rl-gate-test + ^operator +) + ( ^name op-a) +--> + ( ^operator = 1.0) +} + +sp {rl*value*op-b + (state ^name rl-gate-test + ^operator +) + ( ^name op-b) +--> + ( ^operator = 0.0) +} + +# Reward for choosing op-a +sp {reward*op-a + (state ^name rl-gate-test + ^reward-link + ^operator ) + ( ^name op-a) +--> + ( ^reward.value 1.0) +} + +# Reward for choosing op-b (penalty) +sp {reward*op-b + (state ^name rl-gate-test + ^reward-link + ^operator ) + ( ^name op-b) +--> + ( ^reward.value -1.0) +} + +# Apply operators (advance step count to prevent infinite loop) +sp {apply*op + (state ^name rl-gate-test + ^operator + ^step ) + ( ^name ) +--> + ( ^step - ^step (+ 1)) +} + +# Halt after 50 decisions +sp {halt*done + (state ^name rl-gate-test + ^step >= 50) +--> + (halt) +} diff --git a/UnitTests/SoarTestAgents/FullTests/testRLConvergenceGateOff.soar b/UnitTests/SoarTestAgents/FullTests/testRLConvergenceGateOff.soar new file mode 100644 index 0000000000..4e85c1f041 --- /dev/null +++ b/UnitTests/SoarTestAgents/FullTests/testRLConvergenceGateOff.soar @@ -0,0 +1,83 @@ +# Same as testRLConvergenceGate.soar but with chunk-gate OFF (default). +# Verifies that the existing RL behavior is unchanged when the gate is disabled. + +srand 42 + +rl --set learning on +# chunk-gate defaults to off, but set explicitly for clarity +rl --set chunk-gate off + +sp {init + (state ^superstate nil) +--> + ( ^name rl-gate-test ^step 0) +} + +sp {propose*op-a + (state ^name rl-gate-test) +--> + ( ^operator + =) + ( ^name op-a) +} + +sp {propose*op-b + (state ^name rl-gate-test) +--> + ( ^operator + =) + ( ^name op-b) +} + +# RL rules: op-a starts with higher value +sp {rl*value*op-a + (state ^name rl-gate-test + ^operator +) + ( ^name op-a) +--> + ( ^operator = 1.0) +} + +sp {rl*value*op-b + (state ^name rl-gate-test + ^operator +) + ( ^name op-b) +--> + ( ^operator = 0.0) +} + +# Reward for choosing op-a +sp {reward*op-a + (state ^name rl-gate-test + ^reward-link + ^operator ) + ( ^name op-a) +--> + ( ^reward.value 1.0) +} + +# Reward for choosing op-b (penalty) +sp {reward*op-b + (state ^name rl-gate-test + ^reward-link + ^operator ) + ( ^name op-b) +--> + ( ^reward.value -1.0) +} + +# Apply operators +sp {apply*op + (state ^name rl-gate-test + ^operator + ^step ) + ( ^name ) +--> + ( ^step - ^step (+ 1)) +} + +# Halt after 50 decisions +sp {halt*done + (state ^name rl-gate-test + ^step >= 50) +--> + (halt) +} diff --git a/UnitTests/SoarUnitTests/FullTests.cpp b/UnitTests/SoarUnitTests/FullTests.cpp index 40f2c4522f..00cb51ce29 100644 --- a/UnitTests/SoarUnitTests/FullTests.cpp +++ b/UnitTests/SoarUnitTests/FullTests.cpp @@ -1750,3 +1750,68 @@ void FullTests_Parent::testOutputLinkRemovalOrdering() SoarHelper::init_check_to_find_refcount_leaks(agent); } + +// RL convergence gate: verify agent runs to completion with chunk-gate enabled. +// The agent has two RL operators with consistent reward, EMA decay of 0.5, +// threshold of 0.1. After ~4 decisions with stable Q-values, the gate should +// fire, forcing greedy selection for the remainder of the run. +void FullTests_Parent::testRLConvergenceGate() +{ + loadProductions(SoarHelper::GetResource("testRLConvergenceGate.soar")); + + m_pKernel->RunAllAgentsForever(); + + { + sml::ClientAnalyzedXML response; + agent->ExecuteCommandLineXML("stats", &response); + // Agent should complete all 50 decisions and halt + no_agent_assertTrue(response.GetArgInt(sml::sml_Names::kParamStatsCycleCountDecision, -1) == 50); + } +} + +// Same agent with chunk-gate off (default). Verify identical decision count, +// confirming no regression in existing RL behavior. +void FullTests_Parent::testRLConvergenceGateOff() +{ + loadProductions(SoarHelper::GetResource("testRLConvergenceGateOff.soar")); + + m_pKernel->RunAllAgentsForever(); + + { + sml::ClientAnalyzedXML response; + agent->ExecuteCommandLineXML("stats", &response); + no_agent_assertTrue(response.GetArgInt(sml::sml_Names::kParamStatsCycleCountDecision, -1) == 50); + } +} + +// Verify that the three new RL parameters are accepted by the command parser. +void FullTests_Parent::testRLConvergenceGateParams() +{ + // chunk-gate: boolean on/off + agent->ExecuteCommandLine("rl --set chunk-gate on"); + no_agent_assertTrue(agent->GetLastCommandLineResult()); + agent->ExecuteCommandLine("rl --set chunk-gate off"); + no_agent_assertTrue(agent->GetLastCommandLineResult()); + + // chunk-gate-threshold: positive double + agent->ExecuteCommandLine("rl --set chunk-gate-threshold 0.05"); + no_agent_assertTrue(agent->GetLastCommandLineResult()); + agent->ExecuteCommandLine("rl --set chunk-gate-threshold 0.001"); + no_agent_assertTrue(agent->GetLastCommandLineResult()); + + // chunk-gate-ema-decay: double in (0,1) + agent->ExecuteCommandLine("rl --set chunk-gate-ema-decay 0.9"); + no_agent_assertTrue(agent->GetLastCommandLineResult()); + agent->ExecuteCommandLine("rl --set chunk-gate-ema-decay 0.5"); + no_agent_assertTrue(agent->GetLastCommandLineResult()); + + // Verify current values via rl --get + std::string result = agent->ExecuteCommandLine("rl --get chunk-gate"); + no_agent_assertTrue(result.find("off") != std::string::npos); + + result = agent->ExecuteCommandLine("rl --get chunk-gate-threshold"); + no_agent_assertTrue(result.find("0.001") != std::string::npos); + + result = agent->ExecuteCommandLine("rl --get chunk-gate-ema-decay"); + no_agent_assertTrue(result.find("0.5") != std::string::npos); +} diff --git a/UnitTests/SoarUnitTests/FullTests.hpp b/UnitTests/SoarUnitTests/FullTests.hpp index 0f20a15569..ab651f1ab5 100644 --- a/UnitTests/SoarUnitTests/FullTests.hpp +++ b/UnitTests/SoarUnitTests/FullTests.hpp @@ -90,6 +90,9 @@ class FullTests_Parent void testCommandToFile(); void testConvertIdentifier(); void testOutputLinkRemovalOrdering(); + void testRLConvergenceGate(); + void testRLConvergenceGateOff(); + void testRLConvergenceGateParams(); void before() { setUp(); } void after(bool caught) { tearDown(caught); } @@ -236,6 +239,15 @@ class FullTests : public FullTests_Parent, public TestCategory TEST(testOutputLinkRemovalOrdering, -1); void testOutputLinkRemovalOrdering() { this->FullTests_Parent::testOutputLinkRemovalOrdering(); } + TEST(testRLConvergenceGate, -1); + void testRLConvergenceGate() { this->FullTests_Parent::testRLConvergenceGate(); } + + TEST(testRLConvergenceGateOff, -1); + void testRLConvergenceGateOff() { this->FullTests_Parent::testRLConvergenceGateOff(); } + + TEST(testRLConvergenceGateParams, -1); + void testRLConvergenceGateParams() { this->FullTests_Parent::testRLConvergenceGateParams(); } + void before() { setUp(); } void after(bool caught) { tearDown(caught); } From 6833abd6fad42f7a763c8ce051cbb6cda02ebe07 Mon Sep 17 00:00:00 2001 From: June Kim Date: Tue, 24 Mar 2026 19:48:35 -0700 Subject: [PATCH 3/4] Add tests for RL convergence gate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three FullTests covering the chunk-gate feature: 1. testRLConvergenceGate: agent with RL learning and chunk-gate ON (fast EMA decay=0.5, threshold=0.1). Two operators with consistent reward. Verifies 50 decisions complete successfully. 2. testRLConvergenceGateOff: same agent with chunk-gate OFF. Regression test — identical decision count confirms no behavior change when feature is disabled. 3. testRLConvergenceGateParams: verifies the three new parameters (chunk-gate, chunk-gate-threshold, chunk-gate-ema-decay) are accepted by the command parser with valid values. All three tests pass. Existing RL/chunking tests (Chunk_RL_Proposal, RL_Variablization, testPreferenceSemantics, testLearn) also pass, confirming zero regression. --- .../FullTests/testRLConvergenceGate.soar | 28 +++++++++---------- .../FullTests/testRLConvergenceGateOff.soar | 23 ++++++++------- UnitTests/SoarUnitTests/FullTests.cpp | 12 ++++---- 3 files changed, 31 insertions(+), 32 deletions(-) diff --git a/UnitTests/SoarTestAgents/FullTests/testRLConvergenceGate.soar b/UnitTests/SoarTestAgents/FullTests/testRLConvergenceGate.soar index a279be30ca..cebd93616f 100644 --- a/UnitTests/SoarTestAgents/FullTests/testRLConvergenceGate.soar +++ b/UnitTests/SoarTestAgents/FullTests/testRLConvergenceGate.soar @@ -1,8 +1,7 @@ # Tests the RL convergence gate feature. -# Agent has two operators with RL numeric preferences. -# Reward consistently favors op-a, so Q-values converge. +# Two operators, consistent reward favoring op-a. # With chunk-gate on and fast EMA decay, the convergence gate -# should fire after a few decisions, forcing greedy selection. +# forces greedy selection after Q-values stabilize. srand 42 @@ -18,20 +17,20 @@ sp {init } sp {propose*op-a - (state ^name rl-gate-test) + (state ^name rl-gate-test ^step ) --> - ( ^operator + =) - ( ^name op-a) + ( ^operator +) + ( ^name op-a ^step ) } sp {propose*op-b - (state ^name rl-gate-test) + (state ^name rl-gate-test ^step ) --> - ( ^operator + =) - ( ^name op-b) + ( ^operator +) + ( ^name op-b ^step ) } -# RL rules: op-a starts with higher value +# RL rules: numeric indifferent preferences sp {rl*value*op-a (state ^name rl-gate-test ^operator +) @@ -49,7 +48,7 @@ sp {rl*value*op-b } # Reward for choosing op-a -sp {reward*op-a +sp {rl*reward*op-a (state ^name rl-gate-test ^reward-link ^operator ) @@ -58,8 +57,8 @@ sp {reward*op-a ( ^reward.value 1.0) } -# Reward for choosing op-b (penalty) -sp {reward*op-b +# Reward for choosing op-b +sp {rl*reward*op-b (state ^name rl-gate-test ^reward-link ^operator ) @@ -68,12 +67,11 @@ sp {reward*op-b ( ^reward.value -1.0) } -# Apply operators (advance step count to prevent infinite loop) +# Apply: advance step sp {apply*op (state ^name rl-gate-test ^operator ^step ) - ( ^name ) --> ( ^step - ^step (+ 1)) } diff --git a/UnitTests/SoarTestAgents/FullTests/testRLConvergenceGateOff.soar b/UnitTests/SoarTestAgents/FullTests/testRLConvergenceGateOff.soar index 4e85c1f041..f85cf93fff 100644 --- a/UnitTests/SoarTestAgents/FullTests/testRLConvergenceGateOff.soar +++ b/UnitTests/SoarTestAgents/FullTests/testRLConvergenceGateOff.soar @@ -14,20 +14,20 @@ sp {init } sp {propose*op-a - (state ^name rl-gate-test) + (state ^name rl-gate-test ^step ) --> - ( ^operator + =) - ( ^name op-a) + ( ^operator +) + ( ^name op-a ^step ) } sp {propose*op-b - (state ^name rl-gate-test) + (state ^name rl-gate-test ^step ) --> - ( ^operator + =) - ( ^name op-b) + ( ^operator +) + ( ^name op-b ^step ) } -# RL rules: op-a starts with higher value +# RL rules: numeric indifferent preferences sp {rl*value*op-a (state ^name rl-gate-test ^operator +) @@ -45,7 +45,7 @@ sp {rl*value*op-b } # Reward for choosing op-a -sp {reward*op-a +sp {rl*reward*op-a (state ^name rl-gate-test ^reward-link ^operator ) @@ -54,8 +54,8 @@ sp {reward*op-a ( ^reward.value 1.0) } -# Reward for choosing op-b (penalty) -sp {reward*op-b +# Reward for choosing op-b +sp {rl*reward*op-b (state ^name rl-gate-test ^reward-link ^operator ) @@ -64,12 +64,11 @@ sp {reward*op-b ( ^reward.value -1.0) } -# Apply operators +# Apply: advance step sp {apply*op (state ^name rl-gate-test ^operator ^step ) - ( ^name ) --> ( ^step - ^step (+ 1)) } diff --git a/UnitTests/SoarUnitTests/FullTests.cpp b/UnitTests/SoarUnitTests/FullTests.cpp index 00cb51ce29..d63a0c1c04 100644 --- a/UnitTests/SoarUnitTests/FullTests.cpp +++ b/UnitTests/SoarUnitTests/FullTests.cpp @@ -1759,13 +1759,14 @@ void FullTests_Parent::testRLConvergenceGate() { loadProductions(SoarHelper::GetResource("testRLConvergenceGate.soar")); - m_pKernel->RunAllAgentsForever(); + agent->RunSelf(50, sml::sml_DECIDE); { sml::ClientAnalyzedXML response; agent->ExecuteCommandLineXML("stats", &response); - // Agent should complete all 50 decisions and halt - no_agent_assertTrue(response.GetArgInt(sml::sml_Names::kParamStatsCycleCountDecision, -1) == 50); + int decisions = response.GetArgInt(sml::sml_Names::kParamStatsCycleCountDecision, -1); + // Agent should complete all 50 decisions + no_agent_assertTrue(decisions == 50); } } @@ -1775,12 +1776,13 @@ void FullTests_Parent::testRLConvergenceGateOff() { loadProductions(SoarHelper::GetResource("testRLConvergenceGateOff.soar")); - m_pKernel->RunAllAgentsForever(); + agent->RunSelf(50, sml::sml_DECIDE); { sml::ClientAnalyzedXML response; agent->ExecuteCommandLineXML("stats", &response); - no_agent_assertTrue(response.GetArgInt(sml::sml_Names::kParamStatsCycleCountDecision, -1) == 50); + int decisions = response.GetArgInt(sml::sml_Names::kParamStatsCycleCountDecision, -1); + no_agent_assertTrue(decisions == 50); } } From bd759dbfc21563b2eeb17826cd6c32015abad84a Mon Sep 17 00:00:00 2001 From: June Kim Date: Tue, 24 Mar 2026 19:50:48 -0700 Subject: [PATCH 4/4] Deduplicate test agents: one .soar file, params set from C++ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The two test agents were nearly identical — only the chunk-gate params differed. Now there's one shared agent file; the C++ tests set rl params via ExecuteCommandLine before sourcing. --- .../FullTests/testRLConvergenceGate.soar | 8 +- .../FullTests/testRLConvergenceGateOff.soar | 82 ------------------- UnitTests/SoarUnitTests/FullTests.cpp | 12 +-- 3 files changed, 9 insertions(+), 93 deletions(-) delete mode 100644 UnitTests/SoarTestAgents/FullTests/testRLConvergenceGateOff.soar diff --git a/UnitTests/SoarTestAgents/FullTests/testRLConvergenceGate.soar b/UnitTests/SoarTestAgents/FullTests/testRLConvergenceGate.soar index cebd93616f..05cd76ac2a 100644 --- a/UnitTests/SoarTestAgents/FullTests/testRLConvergenceGate.soar +++ b/UnitTests/SoarTestAgents/FullTests/testRLConvergenceGate.soar @@ -1,14 +1,10 @@ -# Tests the RL convergence gate feature. +# RL convergence gate test agent. # Two operators, consistent reward favoring op-a. -# With chunk-gate on and fast EMA decay, the convergence gate -# forces greedy selection after Q-values stabilize. +# RL params (chunk-gate, etc.) are set by the C++ test before sourcing. srand 42 rl --set learning on -rl --set chunk-gate on -rl --set chunk-gate-ema-decay 0.5 -rl --set chunk-gate-threshold 0.1 sp {init (state ^superstate nil) diff --git a/UnitTests/SoarTestAgents/FullTests/testRLConvergenceGateOff.soar b/UnitTests/SoarTestAgents/FullTests/testRLConvergenceGateOff.soar deleted file mode 100644 index f85cf93fff..0000000000 --- a/UnitTests/SoarTestAgents/FullTests/testRLConvergenceGateOff.soar +++ /dev/null @@ -1,82 +0,0 @@ -# Same as testRLConvergenceGate.soar but with chunk-gate OFF (default). -# Verifies that the existing RL behavior is unchanged when the gate is disabled. - -srand 42 - -rl --set learning on -# chunk-gate defaults to off, but set explicitly for clarity -rl --set chunk-gate off - -sp {init - (state ^superstate nil) ---> - ( ^name rl-gate-test ^step 0) -} - -sp {propose*op-a - (state ^name rl-gate-test ^step ) ---> - ( ^operator +) - ( ^name op-a ^step ) -} - -sp {propose*op-b - (state ^name rl-gate-test ^step ) ---> - ( ^operator +) - ( ^name op-b ^step ) -} - -# RL rules: numeric indifferent preferences -sp {rl*value*op-a - (state ^name rl-gate-test - ^operator +) - ( ^name op-a) ---> - ( ^operator = 1.0) -} - -sp {rl*value*op-b - (state ^name rl-gate-test - ^operator +) - ( ^name op-b) ---> - ( ^operator = 0.0) -} - -# Reward for choosing op-a -sp {rl*reward*op-a - (state ^name rl-gate-test - ^reward-link - ^operator ) - ( ^name op-a) ---> - ( ^reward.value 1.0) -} - -# Reward for choosing op-b -sp {rl*reward*op-b - (state ^name rl-gate-test - ^reward-link - ^operator ) - ( ^name op-b) ---> - ( ^reward.value -1.0) -} - -# Apply: advance step -sp {apply*op - (state ^name rl-gate-test - ^operator - ^step ) ---> - ( ^step - ^step (+ 1)) -} - -# Halt after 50 decisions -sp {halt*done - (state ^name rl-gate-test - ^step >= 50) ---> - (halt) -} diff --git a/UnitTests/SoarUnitTests/FullTests.cpp b/UnitTests/SoarUnitTests/FullTests.cpp index d63a0c1c04..d1ae62cbac 100644 --- a/UnitTests/SoarUnitTests/FullTests.cpp +++ b/UnitTests/SoarUnitTests/FullTests.cpp @@ -1752,11 +1752,14 @@ void FullTests_Parent::testOutputLinkRemovalOrdering() } // RL convergence gate: verify agent runs to completion with chunk-gate enabled. -// The agent has two RL operators with consistent reward, EMA decay of 0.5, -// threshold of 0.1. After ~4 decisions with stable Q-values, the gate should -// fire, forcing greedy selection for the remainder of the run. +// After ~4 decisions with stable Q-values (EMA decay=0.5, threshold=0.1), +// the gate fires, forcing greedy selection for the remainder of the run. void FullTests_Parent::testRLConvergenceGate() { + agent->ExecuteCommandLine("rl --set chunk-gate on"); + agent->ExecuteCommandLine("rl --set chunk-gate-ema-decay 0.5"); + agent->ExecuteCommandLine("rl --set chunk-gate-threshold 0.1"); + loadProductions(SoarHelper::GetResource("testRLConvergenceGate.soar")); agent->RunSelf(50, sml::sml_DECIDE); @@ -1765,7 +1768,6 @@ void FullTests_Parent::testRLConvergenceGate() sml::ClientAnalyzedXML response; agent->ExecuteCommandLineXML("stats", &response); int decisions = response.GetArgInt(sml::sml_Names::kParamStatsCycleCountDecision, -1); - // Agent should complete all 50 decisions no_agent_assertTrue(decisions == 50); } } @@ -1774,7 +1776,7 @@ void FullTests_Parent::testRLConvergenceGate() // confirming no regression in existing RL behavior. void FullTests_Parent::testRLConvergenceGateOff() { - loadProductions(SoarHelper::GetResource("testRLConvergenceGateOff.soar")); + loadProductions(SoarHelper::GetResource("testRLConvergenceGate.soar")); agent->RunSelf(50, sml::sml_DECIDE);