-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathgame_environment.py
More file actions
177 lines (144 loc) · 7.07 KB
/
game_environment.py
File metadata and controls
177 lines (144 loc) · 7.07 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
import numpy, gymnasium
from ball import Ball
from player_ball import PlayerBall
from typing import Optional
RED = (255,0,0)
GREEN = (0,102,0)
BLUE = (0,128,255)
COLORS = {'red': RED, 'green': GREEN}
def generate_random_speed_vector(minspeed, maxspeed):
vx, vy = numpy.random.randint(minspeed, maxspeed+1), numpy.random.randint(minspeed, maxspeed+1)
if numpy.random.random_sample() < 0.5:
vx = -vx
if numpy.random.random_sample() < 0.5:
vy = -vy
return [vx,vy]
def generate_random_starting_position(maxdistance, display_height, display_width):
x, y = numpy.random.randint(8, maxdistance+1), numpy.random.randint(8, maxdistance+1)
if numpy.random.random_sample() < 0.5:
x = display_width - x
if numpy.random.random_sample() < 0.5:
y = display_height - y
return [x,y]
class GameEnvironment( gymnasium.Env ):
# --- ADDED METADATA FOR RENDER MODE SUPPORT ---
metadata = {"render_modes": ["human", "rgb_array"], "render_fps": 60}
def __init__(self, display_shape, dt):
self.display_width, self.display_height = display_shape[0], display_shape[1]
self.dt = dt
self.motion_step = 5
self.max_speed = 150
self.action_space = gymnasium.spaces.Discrete(9) # 0: No move, 1: Up, 2: Up&Right, 3: Right, 4: Down&Right, 5: Down, 6: Down&Left, 7: Left, 8: Up&Left
self.observation_space = gymnasium.spaces.Dict({
'hero': gymnasium.spaces.Box(low=0.0, high=1.0, shape=(2,), dtype=numpy.float32),
'balls_position': gymnasium.spaces.Box(low=0.0, high=1.0, shape=(10,2), dtype=numpy.float32),
'balls_speed': gymnasium.spaces.Box(low=-1.0, high=1.0, shape=(10,2), dtype=numpy.float32),
'balls_color': gymnasium.spaces.Box(low=0, high=1, shape=(10,), dtype=numpy.int32),
'balls_status': gymnasium.spaces.Box(low=0, high=1, shape=(10,), dtype=numpy.int32),
})
self.balls = []
self.hero_ball = None
self.ball_inventory = {'red': 4, 'green': 6}
def reset(self, seed: Optional[int] = None, options: Optional[dict] = None):
super().reset(seed = seed)
self.balls = []
self.step_counter = 0
# NOTE: Using a fixed seed for demonstration, but typically let super().reset() handle it
if seed is not None:
numpy.random.seed(seed)
#Balls
self.ball_inventory = {'red': 4, 'green': 6}
for key, count in self.ball_inventory.items():
for i in range( count ):
init_pos = generate_random_starting_position(20, self.display_height, self.display_width)
init_speed = generate_random_speed_vector(self.max_speed / 2, self.max_speed)
ball = Ball(init_pos, init_speed, COLORS[key], (self.display_width, self.display_height))
self.balls.append( ball )
#Players balls
init_hero_pos = [self.display_width/2,self.display_height/4]
self.hero_ball = PlayerBall(init_hero_pos,BLUE,(self.display_width,self.display_height))
observation = self._get_obs()
info = self._get_info()
return observation, info
def _get_obs(self):
dim_array = numpy.array([self.display_width, self.display_height], dtype=numpy.float32)
hero_position_obs = self.hero_ball.pos / dim_array
balls_position_obs = numpy.zeros( (len(self.balls),2), dtype=numpy.float32 )
balls_speed_obs = numpy.zeros( (len(self.balls),2), dtype=numpy.float32 )
balls_color_obs = numpy.zeros( len(self.balls), dtype=numpy.int32 )
balls_status_obs = numpy.zeros( len(self.balls), dtype=numpy.int32 )
for i, ball in enumerate(self.balls):
balls_position_obs[i,:] = ball.pos / dim_array
balls_speed_obs[i,:] = ball.v / float( self.max_speed )
balls_color_obs[i] = int(ball.color == GREEN)
balls_status_obs[i] = int(ball.live)
return {
'hero': hero_position_obs,
'balls_position': balls_position_obs,
'balls_speed': balls_speed_obs,
'balls_color': balls_color_obs,
'balls_status': balls_status_obs
}
def _get_info(self):
return self.ball_inventory
def step(self, action):
terminated, truncated = False, False
x_change, y_change, reward = 0.0, 0.0, 0.0
self.step_counter += 1
if self.step_counter >= 1000:
truncated = True
# 0: No move, 1: Up, 2: Up&Right, 3: Right, 4: Down&Right, 5: Down, 6: Down&Left, 7: Left, 8: Up&Left
match action:
case 1: # Up
x_change, y_change = 0, -self.motion_step
case 2: # Up&Right
x_change, y_change = self.motion_step, -self.motion_step
case 3: # Right
x_change, y_change = self.motion_step, 0
case 4: # Down&Right
x_change, y_change = self.motion_step, self.motion_step
case 5: # Down
x_change, y_change = 0.0, self.motion_step
case 6: # Down&Left
x_change, y_change = -self.motion_step, self.motion_step
case 7: # Left
x_change, y_change = -self.motion_step, 0.0
case 8: # Up&Left
x_change, y_change = -self.motion_step, -self.motion_step
case _:
pass
max_distance = numpy.linalg.norm([self.display_width, self.display_height])
# Update hero ball position (only once)
self.hero_ball.update_position(x_change, y_change)
# Move balls to next position
for ball in self.balls:
ball.update_position( self.dt )
# Calculate reward
for ball in self.balls:
if ball.live:
# Check if Hero ball is hit
if Ball.check_hit(self.hero_ball.pos, self.hero_ball.r, ball.pos, ball.r):
if ball.color == GREEN:
reward += 0.25 # Reward for hitting Green Ball
ball.live = False
self.ball_inventory['green'] -= 1
else:
reward = -1.0 # Penalty for hitting Red and Exit
ball.live = False
terminated = True
break
# Check if any green or blue balls left
if self.ball_inventory['green'] == 0: # Exit if no green balls left reached
reward += 1.0
terminated = True
break
observation = self._get_obs()
info = self._get_info()
return observation, reward, terminated, truncated, info
def render(self, gamedisplay):
if gamedisplay:
self.hero_ball.draw(gamedisplay)
for ball in self.balls:
if ball.live:
# Draw the ball
ball.draw(gamedisplay)