CV_final_Project/reward.py at master · Winstonllllai/CV_final_Project · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
# Env state
# info = {
#     "x_pos",  # (int) The player's horizontal position in the level.
#     "y_pos",  # (int) The player's vertical position in the level.
#     "score",  # (int) The current score accumulated by the player.
#     "coins",  # (int) The number of coins the player has collected.
#     "time",   # (int) The remaining time for the level.
#     "flag_get",  # (bool) True if the player has reached the end flag (level completion).
#     "life"   # (int) The number of lives the player has left.
# }


# # simple actions_dim = 7
# SIMPLE_MOVEMENT = [
#     ["NOOP"],       # Do nothing.
#     ["right"],      # Move right.
#     ["right", "A"], # Move right and jump.
#     ["right", "B"], # Move right and run.
#     ["right", "A", "B"], # Move right, run, and jump.
#     ["A"],          # Jump straight up.
#     ["left"],       # Move left.
# ]
#-----------------------------------------------------------------------------
#獎勵函數
'''
get_coin_reward         : 根據硬幣數量變化提供額外獎勵

'''
'''
環境資訊 (info)
1."x_pos": 水平位置，用於判斷角色的前進情況
2."y_pos": 垂直位置，用於分析跳躍或下落行為
3."score": 玩家目前的遊戲分數
4."coins": 收集到的硬幣數量
5."time": 剩餘時間
5."flag_get": 是否到達終點旗幟（遊戲完成）
6."life": 玩家剩餘的生命數
'''

#===============to do===============================請自定義獎勵函數 至少7個(包含提供的)
#例子:用來獎勵玩家蒐集硬幣的行為
def get_coin_reward(info, reward, prev_info):
    #寫下蒐集到硬幣會對應多少獎勵
    total_reward = reward                                         #獲得目前已有的獎勵數量

    total_reward += (info['coins'] - prev_info['coins']) * 50     #這裡是定義，如果玩家有蒐集到硬幣，則獎勵加10(這裡是可以自己去定義獎勵要給多少的)
    return total_reward

#用來鼓勵玩家進行跳躍或高度變化(因為有時前方有障礙物 會被卡住)
def distance_y_offset_reward(info, reward, prev_info):
    total_reward = reward
    y_offset_change = info['y_pos'] - prev_info['y_pos']
    if y_offset_change > 0:
        total_reward += 3
    elif y_offset_change < 0:
        total_reward += 1
    return total_reward

#用來鼓勵玩家前進，懲罰原地停留或後退
def distance_x_offset_reward(info, reward, prev_info):
    total_reward = reward
    x_offset_change = info['x_pos'] - prev_info['x_pos']
    if x_offset_change > 2:
        total_reward += 3
    elif x_offset_change < -2:
        total_reward += 1
    else:
        total_reward -= 5
    return total_reward

def speed_reward(info, reward, distance):
    total_reward = reward
    if info['x_pos'] > distance:
        time_factor = 1 + info['time'] / 100
        total_reward += 10 * (info['x_pos'] - distance) / time_factor
        distance = info['x_pos']
    return total_reward, distance

#用來鼓勵玩家完成關卡（到達終點旗幟）
def final_flag_reward(info, reward):
    total_reward = reward
    if info['flag_get']:
        total_reward *= 1.2
    return total_reward

def score_reward(info, reward, prev_info):
    total_reward = reward
    score = info['score'] - prev_info['score'] - info['score']
    if score > 0:
        total_reward += score

    return total_reward
def death_penalty(info, reward, prev_info):
    total_reward = reward
    if prev_info['life'] > info['life']:
        total_reward -= 2000
    return total_reward

def altitude_reward(info, reward, max_y=10):
    total_reward = reward
    if info['y_pos'] > 2:
        total_reward += min(info['y_pos'], max_y) * 2  # 限制高度獎勵
    return total_reward

def alive_time_reward(info, reward, prev_info):
    total_reward = reward
    time_passed = prev_info['time'] - info['time']
    if time_passed > 0:
        total_reward += time_passed * 3
    return total_reward

def stagnation_penalty(reward):
    total_reward = reward
    return total_reward - 1500
#===============to do==========================================