-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathconfig.txt
More file actions
34 lines (26 loc) · 1.28 KB
/
config.txt
File metadata and controls
34 lines (26 loc) · 1.28 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
# 1. 性能奖励
r_speed = 0 * vx
r_accel = 1.0 * ax
# 稍微降低努力奖权重 (4.0 -> 3.0),减少过度激进
r_effort = -2 * (np.mean(current_torque) / self.max_torque)
# 2. 稳定性惩罚
# 左右一致性
diff_front = abs(current_torque[0] - current_torque[1])
diff_rear = abs(current_torque[2] - current_torque[3])
r_consistency = -50.0 * ((diff_front + diff_rear) / self.max_torque)
# 横摆角速度
r_yaw = -2.0 * abs(yaw_rate)
# 3. [新增] 动作平滑性惩罚 (Action Smoothness)
# 计算当前扭矩与上一时刻扭矩的差值绝对值
# 假设最大跳变 500Nm,归一化后是 1.0
# 如果每步跳变 50Nm (10%),delta=0.1,惩罚 -5.0 分
delta_torque = np.abs(current_torque - last_torque)
mean_delta = np.mean(delta_torque) / self.max_torque
r_smooth = -20.0 * mean_delta
# 4. 滑移率约束
avg_slip_err = np.mean(np.abs(slip - self.target_slip_ratio))
r_slip = -10.0 * avg_slip_err
if np.any(np.abs(slip) > 0.8):
r_slip -= 20.0
total_reward = r_speed + r_accel + r_effort + r_consistency + r_yaw + r_slip + r_smooth
return total_reward