edbeeching · Ivan-267 · Apr 3, 2024 · Apr 5, 2024 · edbeeching · Apr 29, 2024
diff --git a/examples/JumperHard/AIController3D.gd b/examples/JumperHard/AIController3D.gd
@@ -48,7 +48,6 @@ func set_action(action):
 	_player.turn_action = action["turn"][0]
 	_player.jump_action = action["jump"][0] > 0
 
-
 func get_action_space():
 	return {
 		"jump": {"size": 1, "action_type": "continuous"},

diff --git a/examples/JumperHard/BatchEnvs.tscn b/examples/JumperHard/BatchEnvs.tscn
@@ -56,6 +56,8 @@ transform = Transform3D(1, 0, 0, 0, 1, 0, 0, 0, 1, 1243.91, 0, -1237.95)
 
 [node name="Sync" type="Node" parent="."]
 script = ExtResource("2")
+speed_up = 5.0
+onnx_model_path = "JumperHard.onnx"
 
 [node name="DirectionalLight2" type="DirectionalLight3D" parent="."]
 transform = Transform3D(0.5, 0.866025, 0, -0.433013, 0.25, 0.866025, 0.75, -0.433013, 0.5, 0, 25, 0)

diff --git a/examples/JumperHard/JumperHard.csproj b/examples/JumperHard/JumperHard.csproj
@@ -1,4 +1,4 @@
-<Project Sdk="Godot.NET.Sdk/4.0.3">
+<Project Sdk="Godot.NET.Sdk/4.3.0-dev.5">
   <PropertyGroup>
     <TargetFramework>net6.0</TargetFramework>
     <EnableDynamicLoading>true</EnableDynamicLoading>

diff --git a/examples/JumperHard/JumperHard.onnx b/examples/JumperHard/JumperHard.onnx
diff --git a/examples/JumperHard/JumperHard.sln b/examples/JumperHard/JumperHard.sln
@@ -1,25 +1,19 @@
-
-Microsoft Visual Studio Solution File, Format Version 12.00
-# Visual Studio Version 17
-VisualStudioVersion = 17.5.33530.505
-MinimumVisualStudioVersion = 10.0.40219.1
-Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Godot RL Agents", "Godot RL Agents.csproj", "{055E8CBC-A3EC-41A8-BC53-EC3010682AE4}"
+Microsoft Visual Studio Solution File, Format Version 12.00
+# Visual Studio 2012
+Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "JumperHard", "JumperHard.csproj", "{84F6AC7C-59EF-499B-9509-07E463D410E6}"
 EndProject
 Global
 	GlobalSection(SolutionConfigurationPlatforms) = preSolution
-		Debug|Any CPU = Debug|Any CPU
-		ExportDebug|Any CPU = ExportDebug|Any CPU
-		ExportRelease|Any CPU = ExportRelease|Any CPU
+	Debug|Any CPU = Debug|Any CPU
+	ExportDebug|Any CPU = ExportDebug|Any CPU
+	ExportRelease|Any CPU = ExportRelease|Any CPU
 	EndGlobalSection
 	GlobalSection(ProjectConfigurationPlatforms) = postSolution
-		{055E8CBC-A3EC-41A8-BC53-EC3010682AE4}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
-		{055E8CBC-A3EC-41A8-BC53-EC3010682AE4}.Debug|Any CPU.Build.0 = Debug|Any CPU
-		{055E8CBC-A3EC-41A8-BC53-EC3010682AE4}.ExportDebug|Any CPU.ActiveCfg = ExportDebug|Any CPU
-		{055E8CBC-A3EC-41A8-BC53-EC3010682AE4}.ExportDebug|Any CPU.Build.0 = ExportDebug|Any CPU
-		{055E8CBC-A3EC-41A8-BC53-EC3010682AE4}.ExportRelease|Any CPU.ActiveCfg = ExportRelease|Any CPU
-		{055E8CBC-A3EC-41A8-BC53-EC3010682AE4}.ExportRelease|Any CPU.Build.0 = ExportRelease|Any CPU
-	EndGlobalSection
-	GlobalSection(SolutionProperties) = preSolution
-		HideSolutionNode = FALSE
+		{84F6AC7C-59EF-499B-9509-07E463D410E6}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+		{84F6AC7C-59EF-499B-9509-07E463D410E6}.Debug|Any CPU.Build.0 = Debug|Any CPU
+		{84F6AC7C-59EF-499B-9509-07E463D410E6}.ExportDebug|Any CPU.ActiveCfg = ExportDebug|Any CPU
+		{84F6AC7C-59EF-499B-9509-07E463D410E6}.ExportDebug|Any CPU.Build.0 = ExportDebug|Any CPU
+		{84F6AC7C-59EF-499B-9509-07E463D410E6}.ExportRelease|Any CPU.ActiveCfg = ExportRelease|Any CPU
+		{84F6AC7C-59EF-499B-9509-07E463D410E6}.ExportRelease|Any CPU.Build.0 = ExportRelease|Any CPU
 	EndGlobalSection
 EndGlobal
diff --git a/examples/JumperHard/JumperHarder.tscn b/examples/JumperHard/JumperHarder.tscn
@@ -3,7 +3,7 @@
 [ext_resource type="PackedScene" uid="uid://btjelqxpc6evr" path="res://Player.tscn" id="1"]
 
 [sub_resource type="BoxShape3D" id="1"]
-size = Vector3(400, 2, 400)
+size = Vector3(800, 2, 800)
 
 [sub_resource type="BoxMesh" id="2"]
 size = Vector3(8, 2, 8)
@@ -12,7 +12,7 @@ size = Vector3(8, 2, 8)
 size = Vector3(8, 2, 8)
 
 [sub_resource type="BoxShape3D" id="5"]
-size = Vector3(6, 4, 6)
+size = Vector3(8, 16, 8)
 
 [node name="JumperHard" type="Node3D"]
 
@@ -37,10 +37,11 @@ mesh = SubResource("2")
 shape = SubResource("4")
 
 [node name="Trigger" type="Area3D" parent="Pads/FirstPad"]
-transform = Transform3D(1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 2, 0)
+transform = Transform3D(1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 3, 0)
+monitorable = false
 
 [node name="CollisionShape3D" type="CollisionShape3D" parent="Pads/FirstPad/Trigger"]
-transform = Transform3D(1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0)
+transform = Transform3D(1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 4, 0)
 shape = SubResource("5")
 
 [node name="SecondPad" type="Node3D" parent="Pads"]
@@ -55,9 +56,11 @@ mesh = SubResource("2")
 shape = SubResource("4")
 
 [node name="Trigger" type="Area3D" parent="Pads/SecondPad"]
-transform = Transform3D(1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 2, 0)
+transform = Transform3D(1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 3, 0)
+monitorable = false
 
 [node name="CollisionShape3D" type="CollisionShape3D" parent="Pads/SecondPad/Trigger"]
+transform = Transform3D(1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 4, 0)
 shape = SubResource("5")
 
 [node name="EndPosition" type="Node3D" parent="."]

diff --git a/examples/JumperHard/Player.gd b/examples/JumperHard/Player.gd
@@ -136,7 +136,6 @@ func game_over():
 
 
 func update_reward():
-	ai_controller.reward -= 0.01  # step penalty
 	ai_controller.reward += shaping_reward()
 
 
@@ -176,7 +175,7 @@ func calculate_translation(other_pad_translation: Vector3) -> Vector3:
 func _on_First_Pad_Trigger_body_entered(_body):
 	if next != 0:
 		return
-	ai_controller.reward += 100.0
+	ai_controller.reward += 20.0
 	next = 1
 	reset_best_goal_distance()
 	second_jump_pad.position = calculate_translation(first_jump_pad.position)
@@ -185,12 +184,13 @@ func _on_First_Pad_Trigger_body_entered(_body):
 func _on_Second_Trigger_body_entered(_body):
 	if next != 1:
 		return
-	ai_controller.reward += 100.0
+	ai_controller.reward += 20.0
 	next = 0
 	reset_best_goal_distance()
 	first_jump_pad.position = calculate_translation(second_jump_pad.position)
 
 
 func _on_ResetTriggerBox_body_entered(_body):
 	ai_controller.done = true
+	ai_controller.reward -= 5.0
 	game_over()
diff --git a/examples/JumperHard/Player.tscn b/examples/JumperHard/Player.tscn
@@ -1,4 +1,4 @@
-[gd_scene load_steps=9 format=3 uid="uid://btjelqxpc6evr"]
+[gd_scene load_steps=8 format=3 uid="uid://btjelqxpc6evr"]
 
 [ext_resource type="Script" path="res://Player.gd" id="1"]
 [ext_resource type="Script" path="res://addons/godot_rl_agents/sensors/sensors_3d/RaycastSensor3D.gd" id="3"]
@@ -7,8 +7,6 @@
 
 [sub_resource type="CapsuleShape3D" id="CapsuleShape3D_k5nui"]
 
-[sub_resource type="CapsuleMesh" id="2"]
-
 [sub_resource type="Sky" id="4"]
 
 [sub_resource type="Environment" id="5"]
@@ -28,11 +26,6 @@ script = ExtResource("1")
 [node name="CollisionShape3D" type="CollisionShape3D" parent="."]
 shape = SubResource("CapsuleShape3D_k5nui")
 
-[node name="MeshInstance3D" type="MeshInstance3D" parent="."]
-transform = Transform3D(1, 0, 0, 0, -4.37114e-08, 1, 0, -1, -4.37114e-08, 0, 0, 0)
-visible = false
-mesh = SubResource("2")
-
 [node name="Camera3D" type="Camera3D" parent="."]
 transform = Transform3D(1, -1.54268e-08, 5.75736e-08, -1.54268e-08, 0.866025, 0.5, -5.75736e-08, -0.5, 0.866025, 0, 10, 13)
 environment = SubResource("5")

diff --git a/examples/JumperHard/addons/godot_rl_agents/controller/ai_controller_2d.gd b/examples/JumperHard/addons/godot_rl_agents/controller/ai_controller_2d.gd
@@ -16,6 +16,12 @@ enum ControlModes { INHERIT_FROM_SYNC, HUMAN, TRAINING, ONNX_INFERENCE, RECORD_E
 ## the recorded demonstrations.
 @export var action_repeat: int = 1
 
+@export_group("Multi-policy mode options")
+## Allows you to set certain agents to use different policies.
+## Changing has no effect with default SB3 training. Works with Rllib example.
+## Tutorial: https://github.com/edbeeching/godot_rl_agents/blob/main/docs/TRAINING_MULTIPLE_POLICIES.md
+@export var policy_name: String = "shared_policy"
+
 var onnx_model: ONNXModel
 
 var heuristic := "human"
@@ -58,7 +64,7 @@ func get_action_space() -> Dictionary:
 
 
 func set_action(action) -> void:
-	assert(false, "the get set_action method is not implemented when extending from ai_controller")
+	assert(false, "the set_action method is not implemented when extending from ai_controller")
 
 
 #-----------------------------------------------------------------------------#
@@ -67,7 +73,7 @@ func set_action(action) -> void:
 #-- Methods that sometimes need implementing using the "extend script" option in Godot --#
 # Only needed if you are recording expert demos with this AIController
 func get_action() -> Array:
-	assert(false, "the get set_action method is not implemented in extended AIController but demo_recorder is used")
+	assert(false, "the get_action method is not implemented in extended AIController but demo_recorder is used")
 	return []
 
 # -----------------------------------------------------------------------------#

diff --git a/examples/JumperHard/addons/godot_rl_agents/controller/ai_controller_3d.gd b/examples/JumperHard/addons/godot_rl_agents/controller/ai_controller_3d.gd
@@ -16,6 +16,12 @@ enum ControlModes { INHERIT_FROM_SYNC, HUMAN, TRAINING, ONNX_INFERENCE, RECORD_E
 ## the recorded demonstrations.
 @export var action_repeat: int = 1
 
+@export_group("Multi-policy mode options")
+## Allows you to set certain agents to use different policies.
+## Changing has no effect with default SB3 training. Works with Rllib example.
+## Tutorial: https://github.com/edbeeching/godot_rl_agents/blob/main/docs/TRAINING_MULTIPLE_POLICIES.md
+@export var policy_name: String = "shared_policy"
+
 var onnx_model: ONNXModel
 
 var heuristic := "human"
@@ -49,7 +55,7 @@ func get_reward() -> float:
 func get_action_space() -> Dictionary:
 	assert(
 		false,
-		"the get get_action_space method is not implemented when extending from ai_controller"
+		"the get_action_space method is not implemented when extending from ai_controller"
 	)
 	return {
 		"example_actions_continous": {"size": 2, "action_type": "continuous"},
@@ -58,7 +64,7 @@ func get_action_space() -> Dictionary:
 
 
 func set_action(action) -> void:
-	assert(false, "the get set_action method is not implemented when extending from ai_controller")
+	assert(false, "the set_action method is not implemented when extending from ai_controller")
 
 
 #-----------------------------------------------------------------------------#
@@ -67,7 +73,7 @@ func set_action(action) -> void:
 #-- Methods that sometimes need implementing using the "extend script" option in Godot --#
 # Only needed if you are recording expert demos with this AIController
 func get_action() -> Array:
-	assert(false, "the get set_action method is not implemented in extended AIController but demo_recorder is used")
+	assert(false, "the get_action method is not implemented in extended AIController but demo_recorder is used")
 	return []
 
 # -----------------------------------------------------------------------------#

diff --git a/examples/JumperHard/addons/godot_rl_agents/onnx/csharp/ONNXInference.cs b/examples/JumperHard/addons/godot_rl_agents/onnx/csharp/ONNXInference.cs
@@ -19,16 +19,22 @@ public partial class ONNXInference : GodotObject
 
 		private SessionOptions SessionOpt;
 
-		//init function
-		/// <include file='docs/ONNXInference.xml' path='docs/members[@name="ONNXInference"]/Initialize/*'/>
-		public void Initialize(string Path, int BatchSize)
+        /// <summary>
+        /// init function
+        /// </summary>
+        /// <param name="Path"></param>
+        /// <param name="BatchSize"></param>
+        /// <returns>Returns the output size of the model</returns>
+        public int Initialize(string Path, int BatchSize)
 		{
 			modelPath = Path;
 			batchSize = BatchSize;
             SessionOpt = SessionConfigurator.MakeConfiguredSessionOptions();
             session = LoadModel(modelPath);
+            return session.OutputMetadata["output"].Dimensions[1];
+        }
+
 
-		}
 		/// <include file='docs/ONNXInference.xml' path='docs/members[@name="ONNXInference"]/Run/*'/>
 		public Godot.Collections.Dictionary<string, Godot.Collections.Array<float>> RunInference(Godot.Collections.Array<float> obs, int state_ins)
 		{

diff --git a/examples/JumperHard/addons/godot_rl_agents/onnx/wrapper/ONNX_wrapper.gd b/examples/JumperHard/addons/godot_rl_agents/onnx/wrapper/ONNX_wrapper.gd
@@ -4,12 +4,21 @@ var inferencer_script = load("res://addons/godot_rl_agents/onnx/csharp/ONNXInfer
 
 var inferencer = null
 
+## How many action values the model outputs
+var action_output_size: int
+
+## Used to differentiate models
+## that only output continuous action mean (e.g. sb3, cleanrl export)
+## versus models that output mean and logstd (e.g. rllib export)
+var action_means_only: bool
+
+## Whether action_means_value has been set already for this model
+var action_means_only_set: bool
 
 # Must provide the path to the model and the batch size
 func _init(model_path, batch_size):
 	inferencer = inferencer_script.new()
-	inferencer.Initialize(model_path, batch_size)
-
+	action_output_size = inferencer.Initialize(model_path, batch_size)
 
 # This function is the one that will be called from the game,
 # requires the observation as an array and the state_ins as an int
@@ -25,3 +34,18 @@ func _notification(what):
 	if what == NOTIFICATION_PREDELETE:
 		inferencer.FreeDisposables()
 		inferencer.free()
+
+# Check whether agent uses a continuous actions model with only action means or not
+func set_action_means_only(agent_action_space):
+	action_means_only_set = true
+	var continuous_only: bool = true
+	var continuous_actions: int
+	for action in agent_action_space:
+		if not agent_action_space[action]["action_type"] == "continuous":
+			continuous_only = false
+			break
+		else:
+			continuous_actions += agent_action_space[action]["size"]
+	if continuous_only:
+		if continuous_actions == action_output_size:
+			action_means_only = true