-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathreward.py
More file actions
115 lines (101 loc) · 4.1 KB
/
reward.py
File metadata and controls
115 lines (101 loc) · 4.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
# Env state
# info = {
# "x_pos", # (int) The player's horizontal position in the level.
# "y_pos", # (int) The player's vertical position in the level.
# "score", # (int) The current score accumulated by the player.
# "coins", # (int) The number of coins the player has collected.
# "time", # (int) The remaining time for the level.
# "flag_get", # (bool) True if the player has reached the end flag (level completion).
# "life" # (int) The number of lives the player has left.
# }
# # simple actions_dim = 7
# SIMPLE_MOVEMENT = [
# ["NOOP"], # Do nothing.
# ["right"], # Move right.
# ["right", "A"], # Move right and jump.
# ["right", "B"], # Move right and run.
# ["right", "A", "B"], # Move right, run, and jump.
# ["A"], # Jump straight up.
# ["left"], # Move left.
# ]
#-----------------------------------------------------------------------------
#獎勵函數
'''
get_coin_reward : 根據硬幣數量變化提供額外獎勵
'''
'''
環境資訊 (info)
1."x_pos": 水平位置,用於判斷角色的前進情況
2."y_pos": 垂直位置,用於分析跳躍或下落行為
3."score": 玩家目前的遊戲分數
4."coins": 收集到的硬幣數量
5."time": 剩餘時間
5."flag_get": 是否到達終點旗幟(遊戲完成)
6."life": 玩家剩餘的生命數
'''
#===============to do===============================請自定義獎勵函數 至少7個(包含提供的)
#例子:用來獎勵玩家蒐集硬幣的行為
def get_coin_reward(info, reward, prev_info):
#寫下蒐集到硬幣會對應多少獎勵
total_reward = reward #獲得目前已有的獎勵數量
total_reward += (info['coins'] - prev_info['coins']) * 50 #這裡是定義,如果玩家有蒐集到硬幣,則獎勵加10(這裡是可以自己去定義獎勵要給多少的)
return total_reward
#用來鼓勵玩家進行跳躍或高度變化(因為有時前方有障礙物 會被卡住)
def distance_y_offset_reward(info, reward, prev_info):
total_reward = reward
y_offset_change = info['y_pos'] - prev_info['y_pos']
if y_offset_change > 0:
total_reward += 3
elif y_offset_change < 0:
total_reward += 1
return total_reward
#用來鼓勵玩家前進,懲罰原地停留或後退
def distance_x_offset_reward(info, reward, prev_info):
total_reward = reward
x_offset_change = info['x_pos'] - prev_info['x_pos']
if x_offset_change > 2:
total_reward += 3
elif x_offset_change < -2:
total_reward += 1
else:
total_reward -= 5
return total_reward
def speed_reward(info, reward, distance):
total_reward = reward
if info['x_pos'] > distance:
time_factor = 1 + info['time'] / 100
total_reward += 10 * (info['x_pos'] - distance) / time_factor
distance = info['x_pos']
return total_reward, distance
#用來鼓勵玩家完成關卡(到達終點旗幟)
def final_flag_reward(info, reward):
total_reward = reward
if info['flag_get']:
total_reward *= 1.2
return total_reward
def score_reward(info, reward, prev_info):
total_reward = reward
score = info['score'] - prev_info['score'] - info['score']
if score > 0:
total_reward += score
return total_reward
def death_penalty(info, reward, prev_info):
total_reward = reward
if prev_info['life'] > info['life']:
total_reward -= 2000
return total_reward
def altitude_reward(info, reward, max_y=10):
total_reward = reward
if info['y_pos'] > 2:
total_reward += min(info['y_pos'], max_y) * 2 # 限制高度獎勵
return total_reward
def alive_time_reward(info, reward, prev_info):
total_reward = reward
time_passed = prev_info['time'] - info['time']
if time_passed > 0:
total_reward += time_passed * 3
return total_reward
def stagnation_penalty(reward):
total_reward = reward
return total_reward - 1500
#===============to do==========================================