bouncingballs/game_environment.py at master · vegapit/bouncingballs · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
import numpy, gymnasium
from ball import Ball
from player_ball import PlayerBall
from typing import Optional

RED = (255,0,0)
GREEN = (0,102,0)
BLUE = (0,128,255)
COLORS = {'red': RED, 'green': GREEN}

def generate_random_speed_vector(minspeed, maxspeed):
    vx, vy = numpy.random.randint(minspeed, maxspeed+1), numpy.random.randint(minspeed, maxspeed+1)
    if numpy.random.random_sample() < 0.5:
        vx = -vx
    if numpy.random.random_sample() < 0.5:
        vy = -vy
    return [vx,vy]

def generate_random_starting_position(maxdistance, display_height, display_width):
    x, y = numpy.random.randint(8, maxdistance+1), numpy.random.randint(8, maxdistance+1)
    if numpy.random.random_sample() < 0.5:
        x = display_width - x
    if numpy.random.random_sample() < 0.5:
        y = display_height - y
    return [x,y]

class GameEnvironment( gymnasium.Env ):

    # --- ADDED METADATA FOR RENDER MODE SUPPORT ---
    metadata = {"render_modes": ["human", "rgb_array"], "render_fps": 60}

    def __init__(self, display_shape, dt):
        self.display_width, self.display_height = display_shape[0], display_shape[1]
        self.dt = dt
        self.motion_step = 5
        self.max_speed = 150

        self.action_space = gymnasium.spaces.Discrete(9) # 0: No move, 1: Up, 2: Up&Right, 3: Right, 4: Down&Right, 5: Down, 6: Down&Left, 7: Left, 8: Up&Left

        self.observation_space = gymnasium.spaces.Dict({
            'hero': gymnasium.spaces.Box(low=0.0, high=1.0, shape=(2,), dtype=numpy.float32),
            'balls_position': gymnasium.spaces.Box(low=0.0, high=1.0, shape=(10,2), dtype=numpy.float32),
            'balls_speed': gymnasium.spaces.Box(low=-1.0, high=1.0, shape=(10,2), dtype=numpy.float32),
            'balls_color': gymnasium.spaces.Box(low=0, high=1, shape=(10,), dtype=numpy.int32),
            'balls_status': gymnasium.spaces.Box(low=0, high=1, shape=(10,), dtype=numpy.int32),
        })

        self.balls = []
        self.hero_ball = None
        self.ball_inventory = {'red': 4, 'green': 6}

    def reset(self, seed: Optional[int] = None, options: Optional[dict] = None):
        super().reset(seed = seed)

        self.balls = []
        self.step_counter = 0

        # NOTE: Using a fixed seed for demonstration, but typically let super().reset() handle it
        if seed is not None:
            numpy.random.seed(seed)

        #Balls
        self.ball_inventory = {'red': 4, 'green': 6}
        for key, count in self.ball_inventory.items():
            for i in range( count ):
                init_pos = generate_random_starting_position(20, self.display_height, self.display_width)
                init_speed = generate_random_speed_vector(self.max_speed / 2, self.max_speed)
                ball = Ball(init_pos, init_speed, COLORS[key], (self.display_width, self.display_height))
                self.balls.append( ball )

        #Players balls
        init_hero_pos = [self.display_width/2,self.display_height/4]
        self.hero_ball = PlayerBall(init_hero_pos,BLUE,(self.display_width,self.display_height))

        observation = self._get_obs()
        info = self._get_info()

        return observation, info

    def _get_obs(self):
        dim_array = numpy.array([self.display_width, self.display_height], dtype=numpy.float32)

        hero_position_obs = self.hero_ball.pos / dim_array

        balls_position_obs = numpy.zeros( (len(self.balls),2), dtype=numpy.float32 )
        balls_speed_obs = numpy.zeros( (len(self.balls),2), dtype=numpy.float32 )
        balls_color_obs = numpy.zeros( len(self.balls), dtype=numpy.int32 )
        balls_status_obs = numpy.zeros( len(self.balls), dtype=numpy.int32 )

        for i, ball in enumerate(self.balls):
            balls_position_obs[i,:] = ball.pos / dim_array
            balls_speed_obs[i,:] = ball.v / float( self.max_speed )
            balls_color_obs[i] = int(ball.color == GREEN)
            balls_status_obs[i] = int(ball.live)

        return {
            'hero': hero_position_obs,
            'balls_position': balls_position_obs,
            'balls_speed': balls_speed_obs,
            'balls_color': balls_color_obs,
            'balls_status': balls_status_obs
        }

    def _get_info(self):
        return self.ball_inventory

    def step(self, action):
        terminated, truncated = False, False
        x_change, y_change, reward = 0.0, 0.0, 0.0

        self.step_counter += 1
        if self.step_counter >= 1000:
            truncated = True

        # 0: No move, 1: Up, 2: Up&Right, 3: Right, 4: Down&Right, 5: Down, 6: Down&Left, 7: Left, 8: Up&Left
        match action:
            case 1:  # Up
                x_change, y_change = 0, -self.motion_step
            case 2:  # Up&Right
                x_change, y_change = self.motion_step, -self.motion_step
            case 3:  # Right
                x_change, y_change = self.motion_step, 0
            case 4:  # Down&Right
                x_change, y_change = self.motion_step, self.motion_step
            case 5:  # Down
                x_change, y_change = 0.0, self.motion_step
            case 6:  # Down&Left
                x_change, y_change = -self.motion_step, self.motion_step
            case 7:  # Left
                x_change, y_change = -self.motion_step, 0.0
            case 8:  # Up&Left
                x_change, y_change = -self.motion_step, -self.motion_step
            case _:
                pass

        max_distance = numpy.linalg.norm([self.display_width, self.display_height])

        # Update hero ball position (only once)
        self.hero_ball.update_position(x_change, y_change)

        # Move balls to next position
        for ball in self.balls:
            ball.update_position( self.dt )

        # Calculate reward
        for ball in self.balls:
            if ball.live:
                # Check if Hero ball is hit
                if Ball.check_hit(self.hero_ball.pos, self.hero_ball.r, ball.pos, ball.r):
                    if ball.color == GREEN:
                        reward += 0.25 # Reward for hitting Green Ball
                        ball.live = False
                        self.ball_inventory['green'] -= 1
                    else:
                        reward = -1.0 # Penalty for hitting Red and Exit
                        ball.live = False
                        terminated = True
                        break

                # Check if any green or blue balls left
                if self.ball_inventory['green'] == 0: # Exit if no green balls left reached
                    reward += 1.0
                    terminated = True
                    break

        observation = self._get_obs()
        info = self._get_info()

        return observation, reward, terminated, truncated, info

    def render(self, gamedisplay):
        if gamedisplay:
            self.hero_ball.draw(gamedisplay)
            for ball in self.balls:
                if ball.live:
                    # Draw the ball
                    ball.draw(gamedisplay)