Upload 4 files
Browse files- MiRobotEnv.py +118 -0
- mirobot_final_model.zip +3 -0
- mirobot_ppo_40000_steps-1.zip +3 -0
- mirobot_ppo_80000_steps.zip +3 -0
MiRobotEnv.py
ADDED
|
@@ -0,0 +1,118 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
import gymnasium as gym
|
| 3 |
+
from gymnasium import spaces
|
| 4 |
+
import numpy as np
|
| 5 |
+
|
| 6 |
+
# Constantes pour les états et les récompenses
|
| 7 |
+
CMD_AVANCER = 0
|
| 8 |
+
CMD_TOURNER = 1
|
| 9 |
+
ETAT_FAIM = 2
|
| 10 |
+
ETAT_SOMMEIL = 3
|
| 11 |
+
ETAT_HUMEUR = 4
|
| 12 |
+
|
| 13 |
+
# Actions
|
| 14 |
+
ACTION_ARRETER = 0
|
| 15 |
+
ACTION_AVANCER = 1
|
| 16 |
+
ACTION_TOURNE_G = 2
|
| 17 |
+
ACTION_TOURNE_D = 3
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
class MiRobotEnv(gym.Env):
|
| 21 |
+
"""Environnement de simulation pour MiRobot, un chiot robot apprenant
|
| 22 |
+
à réagir aux commandes du maître et à son état interne.
|
| 23 |
+
"""
|
| 24 |
+
metadata = {"render_modes": ["human"], "render_fps": 30}
|
| 25 |
+
|
| 26 |
+
def __init__(self):
|
| 27 |
+
super(MiRobotEnv, self).__init__()
|
| 28 |
+
|
| 29 |
+
self.action_space = spaces.Discrete(4)
|
| 30 |
+
|
| 31 |
+
low = np.array([0.0, 0.0, 0.0, 0.0, -1.0], dtype=np.float32)
|
| 32 |
+
high = np.array([1.0, 1.0, 1.0, 1.0, 1.0], dtype=np.float32)
|
| 33 |
+
self.observation_space = spaces.Box(low=low, high=high, dtype=np.float32)
|
| 34 |
+
|
| 35 |
+
self.state = None
|
| 36 |
+
|
| 37 |
+
def reset(self, seed=None, options=None):
|
| 38 |
+
super().reset(seed=seed)
|
| 39 |
+
|
| 40 |
+
initial_state = np.array([
|
| 41 |
+
0.0,
|
| 42 |
+
0.0,
|
| 43 |
+
self.np_random.uniform(low=0.0, high=0.2),
|
| 44 |
+
self.np_random.uniform(low=0.0, high=0.2),
|
| 45 |
+
self.np_random.uniform(low=-0.1, high=0.1)
|
| 46 |
+
], dtype=np.float32)
|
| 47 |
+
|
| 48 |
+
self.state = initial_state
|
| 49 |
+
|
| 50 |
+
info = {}
|
| 51 |
+
return self.state, info
|
| 52 |
+
|
| 53 |
+
def _update_internal_states(self):
|
| 54 |
+
self.state[ETAT_FAIM] = np.clip(self.state[ETAT_FAIM] + 0.005, 0.0, 1.0)
|
| 55 |
+
self.state[ETAT_SOMMEIL] = np.clip(self.state[ETAT_SOMMEIL] + 0.003, 0.0, 1.0)
|
| 56 |
+
|
| 57 |
+
humeur_decay = -0.005 if (self.state[ETAT_FAIM] > 0.5 or self.state[ETAT_SOMMEIL] > 0.5) else 0.001
|
| 58 |
+
self.state[ETAT_HUMEUR] = np.clip(self.state[ETAT_HUMEUR] + humeur_decay, -1.0, 1.0)
|
| 59 |
+
|
| 60 |
+
def _calculate_reward(self, action):
|
| 61 |
+
reward = 0.0
|
| 62 |
+
|
| 63 |
+
if self.state[CMD_AVANCER] > 0.5:
|
| 64 |
+
if action == ACTION_AVANCER:
|
| 65 |
+
reward += 1.0
|
| 66 |
+
elif action == ACTION_ARRETER:
|
| 67 |
+
reward -= 0.5
|
| 68 |
+
|
| 69 |
+
if self.state[CMD_TOURNER] > 0.5:
|
| 70 |
+
if action == ACTION_TOURNE_G or action == ACTION_TOURNE_D:
|
| 71 |
+
reward += 1.0
|
| 72 |
+
elif action == ACTION_AVANCER:
|
| 73 |
+
reward -= 0.5
|
| 74 |
+
|
| 75 |
+
reward -= self.state[ETAT_FAIM] * 0.1
|
| 76 |
+
reward -= self.state[ETAT_SOMMEIL] * 0.1
|
| 77 |
+
|
| 78 |
+
reward += self.state[ETAT_HUMEUR] * 0.1
|
| 79 |
+
|
| 80 |
+
return reward
|
| 81 |
+
|
| 82 |
+
def _simulate_user_command(self):
|
| 83 |
+
if self.np_random.random() < 0.2:
|
| 84 |
+
self.state[CMD_AVANCER] = 0.0
|
| 85 |
+
self.state[CMD_TOURNER] = 0.0
|
| 86 |
+
|
| 87 |
+
choice = self.np_random.integers(0, 3)
|
| 88 |
+
|
| 89 |
+
if choice == 1:
|
| 90 |
+
self.state[CMD_AVANCER] = 1.0
|
| 91 |
+
elif choice == 2:
|
| 92 |
+
self.state[CMD_TOURNER] = 1.0
|
| 93 |
+
|
| 94 |
+
def step(self, action):
|
| 95 |
+
self._update_internal_states()
|
| 96 |
+
reward = self._calculate_reward(action)
|
| 97 |
+
self._simulate_user_command()
|
| 98 |
+
|
| 99 |
+
terminated = False
|
| 100 |
+
truncated = False
|
| 101 |
+
info = {}
|
| 102 |
+
|
| 103 |
+
return self.state, reward, terminated, truncated, info
|
| 104 |
+
|
| 105 |
+
def render(self, mode='human'):
|
| 106 |
+
faim = self.state[ETAT_FAIM] * 100
|
| 107 |
+
sommeil = self.state[ETAT_SOMMEIL] * 100
|
| 108 |
+
humeur = self.state[ETAT_HUMEUR]
|
| 109 |
+
cmd_a = "OUI" if self.state[CMD_AVANCER] > 0.5 else "NON"
|
| 110 |
+
cmd_t = "OUI" if self.state[CMD_TOURNER] > 0.5 else "NON"
|
| 111 |
+
|
| 112 |
+
print(f"--- État MiRobot ---")
|
| 113 |
+
print(f" > Commande Avancer: {cmd_a}, Commande Tourner: {cmd_t}")
|
| 114 |
+
print(f" > Faim: {faim:.0f}%, Sommeil: {sommeil:.0f}%")
|
| 115 |
+
print(f" > Humeur: {humeur:.2f} (entre -1.0 et 1.0)")
|
| 116 |
+
|
| 117 |
+
def close(self):
|
| 118 |
+
pass
|
mirobot_final_model.zip
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f1789b7ac0a570953fb47d04022f270acf47f0327a74a5e8780e4b902886a73e
|
| 3 |
+
size 139650
|
mirobot_ppo_40000_steps-1.zip
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b169f19d5c619d17838b0ac426328dde3e0f76f7d69d75131739345eade74509
|
| 3 |
+
size 139636
|
mirobot_ppo_80000_steps.zip
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:339d8e80cd99da3149848e484a8f00a9d0238686e967403c227d51969e3f2e1f
|
| 3 |
+
size 139648
|