Clemylia commited on
Commit
5026d89
·
verified ·
1 Parent(s): 3a9639d

Upload 4 files

Browse files
MiRobotEnv.py ADDED
@@ -0,0 +1,118 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import gymnasium as gym
3
+ from gymnasium import spaces
4
+ import numpy as np
5
+
6
+ # Constantes pour les états et les récompenses
7
+ CMD_AVANCER = 0
8
+ CMD_TOURNER = 1
9
+ ETAT_FAIM = 2
10
+ ETAT_SOMMEIL = 3
11
+ ETAT_HUMEUR = 4
12
+
13
+ # Actions
14
+ ACTION_ARRETER = 0
15
+ ACTION_AVANCER = 1
16
+ ACTION_TOURNE_G = 2
17
+ ACTION_TOURNE_D = 3
18
+
19
+
20
+ class MiRobotEnv(gym.Env):
21
+ """Environnement de simulation pour MiRobot, un chiot robot apprenant
22
+ à réagir aux commandes du maître et à son état interne.
23
+ """
24
+ metadata = {"render_modes": ["human"], "render_fps": 30}
25
+
26
+ def __init__(self):
27
+ super(MiRobotEnv, self).__init__()
28
+
29
+ self.action_space = spaces.Discrete(4)
30
+
31
+ low = np.array([0.0, 0.0, 0.0, 0.0, -1.0], dtype=np.float32)
32
+ high = np.array([1.0, 1.0, 1.0, 1.0, 1.0], dtype=np.float32)
33
+ self.observation_space = spaces.Box(low=low, high=high, dtype=np.float32)
34
+
35
+ self.state = None
36
+
37
+ def reset(self, seed=None, options=None):
38
+ super().reset(seed=seed)
39
+
40
+ initial_state = np.array([
41
+ 0.0,
42
+ 0.0,
43
+ self.np_random.uniform(low=0.0, high=0.2),
44
+ self.np_random.uniform(low=0.0, high=0.2),
45
+ self.np_random.uniform(low=-0.1, high=0.1)
46
+ ], dtype=np.float32)
47
+
48
+ self.state = initial_state
49
+
50
+ info = {}
51
+ return self.state, info
52
+
53
+ def _update_internal_states(self):
54
+ self.state[ETAT_FAIM] = np.clip(self.state[ETAT_FAIM] + 0.005, 0.0, 1.0)
55
+ self.state[ETAT_SOMMEIL] = np.clip(self.state[ETAT_SOMMEIL] + 0.003, 0.0, 1.0)
56
+
57
+ humeur_decay = -0.005 if (self.state[ETAT_FAIM] > 0.5 or self.state[ETAT_SOMMEIL] > 0.5) else 0.001
58
+ self.state[ETAT_HUMEUR] = np.clip(self.state[ETAT_HUMEUR] + humeur_decay, -1.0, 1.0)
59
+
60
+ def _calculate_reward(self, action):
61
+ reward = 0.0
62
+
63
+ if self.state[CMD_AVANCER] > 0.5:
64
+ if action == ACTION_AVANCER:
65
+ reward += 1.0
66
+ elif action == ACTION_ARRETER:
67
+ reward -= 0.5
68
+
69
+ if self.state[CMD_TOURNER] > 0.5:
70
+ if action == ACTION_TOURNE_G or action == ACTION_TOURNE_D:
71
+ reward += 1.0
72
+ elif action == ACTION_AVANCER:
73
+ reward -= 0.5
74
+
75
+ reward -= self.state[ETAT_FAIM] * 0.1
76
+ reward -= self.state[ETAT_SOMMEIL] * 0.1
77
+
78
+ reward += self.state[ETAT_HUMEUR] * 0.1
79
+
80
+ return reward
81
+
82
+ def _simulate_user_command(self):
83
+ if self.np_random.random() < 0.2:
84
+ self.state[CMD_AVANCER] = 0.0
85
+ self.state[CMD_TOURNER] = 0.0
86
+
87
+ choice = self.np_random.integers(0, 3)
88
+
89
+ if choice == 1:
90
+ self.state[CMD_AVANCER] = 1.0
91
+ elif choice == 2:
92
+ self.state[CMD_TOURNER] = 1.0
93
+
94
+ def step(self, action):
95
+ self._update_internal_states()
96
+ reward = self._calculate_reward(action)
97
+ self._simulate_user_command()
98
+
99
+ terminated = False
100
+ truncated = False
101
+ info = {}
102
+
103
+ return self.state, reward, terminated, truncated, info
104
+
105
+ def render(self, mode='human'):
106
+ faim = self.state[ETAT_FAIM] * 100
107
+ sommeil = self.state[ETAT_SOMMEIL] * 100
108
+ humeur = self.state[ETAT_HUMEUR]
109
+ cmd_a = "OUI" if self.state[CMD_AVANCER] > 0.5 else "NON"
110
+ cmd_t = "OUI" if self.state[CMD_TOURNER] > 0.5 else "NON"
111
+
112
+ print(f"--- État MiRobot ---")
113
+ print(f" > Commande Avancer: {cmd_a}, Commande Tourner: {cmd_t}")
114
+ print(f" > Faim: {faim:.0f}%, Sommeil: {sommeil:.0f}%")
115
+ print(f" > Humeur: {humeur:.2f} (entre -1.0 et 1.0)")
116
+
117
+ def close(self):
118
+ pass
mirobot_final_model.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f1789b7ac0a570953fb47d04022f270acf47f0327a74a5e8780e4b902886a73e
3
+ size 139650
mirobot_ppo_40000_steps-1.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b169f19d5c619d17838b0ac426328dde3e0f76f7d69d75131739345eade74509
3
+ size 139636
mirobot_ppo_80000_steps.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:339d8e80cd99da3149848e484a8f00a9d0238686e967403c227d51969e3f2e1f
3
+ size 139648