|
|
| import gymnasium as gym |
| from gymnasium import spaces |
| import numpy as np |
|
|
| |
| CMD_AVANCER = 0 |
| CMD_TOURNER = 1 |
| ETAT_FAIM = 2 |
| ETAT_SOMMEIL = 3 |
| ETAT_HUMEUR = 4 |
|
|
| |
| ACTION_ARRETER = 0 |
| ACTION_AVANCER = 1 |
| ACTION_TOURNE_G = 2 |
| ACTION_TOURNE_D = 3 |
|
|
|
|
| class MiRobotEnv(gym.Env): |
| """Environnement de simulation pour MiRobot, un chiot robot apprenant |
| à réagir aux commandes du maître et à son état interne. |
| """ |
| metadata = {"render_modes": ["human"], "render_fps": 30} |
|
|
| def __init__(self): |
| super(MiRobotEnv, self).__init__() |
|
|
| self.action_space = spaces.Discrete(4) |
|
|
| low = np.array([0.0, 0.0, 0.0, 0.0, -1.0], dtype=np.float32) |
| high = np.array([1.0, 1.0, 1.0, 1.0, 1.0], dtype=np.float32) |
| self.observation_space = spaces.Box(low=low, high=high, dtype=np.float32) |
|
|
| self.state = None |
|
|
| def reset(self, seed=None, options=None): |
| super().reset(seed=seed) |
|
|
| initial_state = np.array([ |
| 0.0, |
| 0.0, |
| self.np_random.uniform(low=0.0, high=0.2), |
| self.np_random.uniform(low=0.0, high=0.2), |
| self.np_random.uniform(low=-0.1, high=0.1) |
| ], dtype=np.float32) |
|
|
| self.state = initial_state |
|
|
| info = {} |
| return self.state, info |
|
|
| def _update_internal_states(self): |
| self.state[ETAT_FAIM] = np.clip(self.state[ETAT_FAIM] + 0.005, 0.0, 1.0) |
| self.state[ETAT_SOMMEIL] = np.clip(self.state[ETAT_SOMMEIL] + 0.003, 0.0, 1.0) |
|
|
| humeur_decay = -0.005 if (self.state[ETAT_FAIM] > 0.5 or self.state[ETAT_SOMMEIL] > 0.5) else 0.001 |
| self.state[ETAT_HUMEUR] = np.clip(self.state[ETAT_HUMEUR] + humeur_decay, -1.0, 1.0) |
|
|
| def _calculate_reward(self, action): |
| reward = 0.0 |
|
|
| if self.state[CMD_AVANCER] > 0.5: |
| if action == ACTION_AVANCER: |
| reward += 1.0 |
| elif action == ACTION_ARRETER: |
| reward -= 0.5 |
|
|
| if self.state[CMD_TOURNER] > 0.5: |
| if action == ACTION_TOURNE_G or action == ACTION_TOURNE_D: |
| reward += 1.0 |
| elif action == ACTION_AVANCER: |
| reward -= 0.5 |
|
|
| reward -= self.state[ETAT_FAIM] * 0.1 |
| reward -= self.state[ETAT_SOMMEIL] * 0.1 |
|
|
| reward += self.state[ETAT_HUMEUR] * 0.1 |
|
|
| return reward |
|
|
| def _simulate_user_command(self): |
| if self.np_random.random() < 0.2: |
| self.state[CMD_AVANCER] = 0.0 |
| self.state[CMD_TOURNER] = 0.0 |
|
|
| choice = self.np_random.integers(0, 3) |
|
|
| if choice == 1: |
| self.state[CMD_AVANCER] = 1.0 |
| elif choice == 2: |
| self.state[CMD_TOURNER] = 1.0 |
|
|
| def step(self, action): |
| self._update_internal_states() |
| reward = self._calculate_reward(action) |
| self._simulate_user_command() |
|
|
| terminated = False |
| truncated = False |
| info = {} |
|
|
| return self.state, reward, terminated, truncated, info |
|
|
| def render(self, mode='human'): |
| faim = self.state[ETAT_FAIM] * 100 |
| sommeil = self.state[ETAT_SOMMEIL] * 100 |
| humeur = self.state[ETAT_HUMEUR] |
| cmd_a = "OUI" if self.state[CMD_AVANCER] > 0.5 else "NON" |
| cmd_t = "OUI" if self.state[CMD_TOURNER] > 0.5 else "NON" |
|
|
| print(f"--- État MiRobot ---") |
| print(f" > Commande Avancer: {cmd_a}, Commande Tourner: {cmd_t}") |
| print(f" > Faim: {faim:.0f}%, Sommeil: {sommeil:.0f}%") |
| print(f" > Humeur: {humeur:.2f} (entre -1.0 et 1.0)") |
|
|
| def close(self): |
| pass |
|
|