YvesP commited on
Commit
a162e39
·
1 Parent(s): 8012bbf

initial load

Browse files
Files changed (30) hide show
  1. Makefile +12 -0
  2. README.md +38 -13
  3. app.py +175 -0
  4. app.yaml +9 -0
  5. bluetraj.py +81 -0
  6. distribution_wrap.py +92 -0
  7. drone.py +326 -0
  8. dronemodel.py +103 -0
  9. filter_wrap.py +95 -0
  10. monitor_wrap.py +119 -0
  11. param_.py +122 -0
  12. playground.py +92 -0
  13. policies/_last/_b1r1/blues_last.zip +3 -0
  14. policies/_last/_b1r1/reds_last.zip +3 -0
  15. procfile.txt +1 -0
  16. redux_wrap.py +80 -0
  17. requirements.txt +3 -0
  18. reward_wrap.py +86 -0
  19. rotate_wrap.py +93 -0
  20. runner.py +16 -0
  21. settings.py +82 -0
  22. setup.sh +12 -0
  23. sort_wrap.py +98 -0
  24. swarm_policy.py +342 -0
  25. swarmenv.py +100 -0
  26. symetry_wrap.py +95 -0
  27. team.py +131 -0
  28. team_wrap.py +107 -0
  29. train.py +174 -0
  30. utils.py +65 -0
Makefile ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ .PHONY: run run-container gcloud-deploy
3
+
4
+ run:
5
+ @streamlit run app.py --server.port=8080 --server.address=0.0.0.0
6
+
7
+ run-container:
8
+ @docker build . -t hexamind-swarms
9
+ @docker run -p 8080:8080 hexamind-swarms
10
+
11
+ gcloud-deploy:
12
+ @gcloud app deploy app.yaml
README.md CHANGED
@@ -1,13 +1,38 @@
1
- ---
2
- title: Swarms
3
- emoji: 👁
4
- colorFrom: indigo
5
- colorTo: purple
6
- sdk: streamlit
7
- sdk_version: 1.21.0
8
- app_file: app.py
9
- pinned: false
10
- license: bsd-3-clause-clear
11
- ---
12
-
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # swarms2
2
+
3
+ **red swarms against blue swarms**
4
+ reds attack a target defended by blues
5
+ reds and blues may kill their opponent
6
+ the more drones reach the goal, the better for the reds and the worst for the blues
7
+
8
+ please install the requirements in requirement.txt
9
+ code has been tested on MAC OS with Python3.9
10
+ Pillow may have some difficulties in its installation. A possibility is to try falling back to Python3.7
11
+
12
+ All files are flat.
13
+ two files contain the starting points :
14
+ train.py allows for training
15
+ app.py launches the streamlit app
16
+
17
+ **train.py**
18
+ the training is done iteratively by starting with one blue drone and one red drone, increasing distance progressively and adding drones.
19
+ Learned policies are stored in /policies/
20
+ For each configuration depending on the number of blue and red drones, there is a folder containing the red and blue policies
21
+ ex /policies/b3r4 contains the red and blue policies for configurations with 3 blue and 4 red drones
22
+ There are two policies depending on the color : one for blues and one for reds. Several intermediate savings are done. the last one being __last.
23
+ The function to launch for the training is :
24
+ super_meta_train(max_blues=8, max_reds=8, iteration=10, max_dispersion=3, total_timesteps=10000)
25
+ which programs a training from 1,1 to 8,8 drones, with a distance mutltiplier of 3 and 10 iterations, and a total timestep at each learning of 10000 steps
26
+ (1 step = 1 second)
27
+
28
+
29
+ **show.py**
30
+ once the agents are trained, the drones can be simulated and visible through a streamlit interface.
31
+ the command to launch the visualisation is :
32
+ streamlit run show.py
33
+
34
+
35
+ **tuning the rewards**
36
+ The rewards may be tuned in the _param.py file and logic is in the team_wrap.py file in the 'evaluate_situation' function.
37
+ When is_double is true, means that there is no learning: simulation is carried out with already defined policies. Only the final outcome is to be considered.
38
+ Otherwise, two cases have to be taken into account, whether blue or red is learning# streamlit-to-heroku-tutorial
app.py ADDED
@@ -0,0 +1,175 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+ import pandas as pd
3
+ import pydeck as pdk
4
+ import streamlit as st
5
+
6
+ from filter_wrap import FilterWrapper
7
+ from distribution_wrap import DistriWrapper
8
+ from redux_wrap import ReduxWrapper
9
+ from symetry_wrap import SymetryWrapper
10
+ from rotate_wrap import RotateWrapper
11
+ from sort_wrap import SortWrapper
12
+ from team_wrap import TeamWrapper
13
+ from reward_wrap import RewardWrapper
14
+ from monitor_wrap import MonitorWrapper
15
+
16
+ from runner import run_episode
17
+ from settings import Settings, define_
18
+ import param_
19
+ from swarmenv import SwarmEnv
20
+
21
+
22
+
23
+ def run(with_streamlit=True, blues: int = 4, reds: int = 6, policy_folder: str = 'reds_last'):
24
+
25
+ # define the policy folder is: where the trained policies are to be found
26
+ Settings.policy_folder = policy_folder
27
+
28
+ # define settings with Streamlit (or use default parameters)
29
+ blues, reds = define_(with_streamlit=with_streamlit, blues=blues, reds=reds)
30
+
31
+ # put in place the map
32
+ deck_map, initial_view_state = pre_show(with_streamlit=with_streamlit)
33
+
34
+ # launch the episode to get the data
35
+ steps = int(param_.DURATION / param_.STEP)
36
+ monitor_env = MonitorWrapper(SwarmEnv(blues=blues, reds=reds), steps)
37
+ env = FilterWrapper(monitor_env)
38
+ env = DistriWrapper(env)
39
+ env = ReduxWrapper(env)
40
+ env = SortWrapper(
41
+ SymetryWrapper(
42
+ RotateWrapper(env)))
43
+
44
+ env = RewardWrapper(TeamWrapper(env, is_double=True), is_double=True)
45
+
46
+ obs = env.reset()
47
+ run_episode(env, obs, blues=blues, reds=reds)
48
+
49
+ print('done')
50
+
51
+ # display the data with Streamlit
52
+ if with_streamlit:
53
+ show(monitor_env, deck_map, initial_view_state)
54
+
55
+
56
+ def pre_show(with_streamlit=True):
57
+ if with_streamlit:
58
+ deck_map = st.empty()
59
+ pitch = st.slider('pitch', 0, 100, 50)
60
+ lat, lon = Settings.latlon
61
+ initial_view_state = pdk.ViewState(
62
+ latitude=lat,
63
+ longitude=lon,
64
+ zoom=13,
65
+ pitch=pitch
66
+ )
67
+ return deck_map, initial_view_state
68
+ else:
69
+ return 0, 0
70
+
71
+
72
+ def show(monitor_env, deck_map, initial_view_state):
73
+
74
+ blue_df, red_df, fire_df, blue_path_df, red_path_df = monitor_env.get_df()
75
+ step_max = monitor_env.step_
76
+
77
+ for step in range(step_max):
78
+ deck_map.pydeck_chart(pdk.Deck(
79
+ map_provider="mapbox",
80
+ map_style='mapbox://styles/mapbox/light-v9',
81
+ initial_view_state=initial_view_state,
82
+ layers=get_layers(blue_df,
83
+ red_df,
84
+ blue_path_df,
85
+ red_path_df,
86
+ step)
87
+ ))
88
+
89
+ time.sleep(param_.STEP*param_.SIMU_SPEED)
90
+
91
+
92
+ def get_layers(df_blue: pd.DataFrame, df_red: pd.DataFrame,
93
+ df_blue_path: [pd.DataFrame], df_red_path: [pd.DataFrame],
94
+ step) -> [pdk.Layer]:
95
+ lat, lon = Settings.latlon
96
+ df_target = pd.DataFrame({'lat': [lat], 'lon': [lon]})
97
+ layers_ = get_target_layers(df_target)
98
+
99
+ for (df, dfp, b) in [(df_blue, df_blue_path, True), (df_red, df_red_path, False)]:
100
+ layers_.append(get_current_drone_layers(df, step))
101
+ nb_drones = df['d_index'].max() + 1
102
+ for drone_index in range(nb_drones):
103
+ layers_.append(get_path_layers(dfp[drone_index], step))
104
+
105
+ return layers_
106
+
107
+
108
+ def get_target_layers(df_target) -> [pdk.Layer]:
109
+ return [
110
+ # this is the GROUNDZONE
111
+ pdk.Layer(
112
+ 'ScatterplotLayer',
113
+ data=df_target,
114
+ get_position='[lon, lat]',
115
+ get_color='[0, 120, 0]',
116
+ get_radius=Settings.groundzone,
117
+ get_line_width=50,
118
+ lineWidthMinPixels=2,
119
+ stroked=True,
120
+ filled=False,
121
+
122
+ ),
123
+
124
+ pdk.Layer(
125
+ 'ScatterplotLayer',
126
+ data=df_target,
127
+ get_position='[lon, lat]',
128
+ get_color='[0, 0, 200]',
129
+ get_radius=30,
130
+ ),
131
+ ]
132
+
133
+
134
+ def get_current_drone_layers(df_drone: pd.DataFrame, step: int) -> [pdk.Layer]:
135
+ df_current = df_drone[df_drone.step == step]
136
+
137
+ return [
138
+ pdk.Layer(
139
+ 'ScatterplotLayer',
140
+ data=df_current,
141
+ get_position='[lon, lat, zed]',
142
+ get_color='color',
143
+ get_radius=50,
144
+
145
+ ),
146
+ pdk.Layer(
147
+ 'ScatterplotLayer',
148
+ data=df_current,
149
+ get_position='[lon, lat]',
150
+ get_color=[50, 50, 50, 50],
151
+ get_radius=50,
152
+
153
+ ),
154
+ ]
155
+
156
+
157
+ def get_path_layers(df_path: pd.DataFrame, step: int) -> [pdk.Layer]:
158
+ df_current = df_path[df_path.step == step]
159
+ return [
160
+ pdk.Layer(
161
+ type="PathLayer",
162
+ data=df_current,
163
+ pickable=True,
164
+ get_color="color",
165
+ width_scale=10,
166
+ width_min_pixels=1,
167
+ get_path="path",
168
+ get_width=1,
169
+ )
170
+ ]
171
+
172
+
173
+ # and ... do not forget
174
+ run(with_streamlit=True, policy_folder='last')
175
+ # run(blues=1, reds=3, with_streamlit=False, policy_folder='0527_14_test')
app.yaml ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+
2
+ runtime: custom
3
+ env: flex
4
+ manual_scaling:
5
+ instances: 1
6
+ resources:
7
+ cpu: 1
8
+ memory_gb: 3
9
+ disk_size_gb: 20
bluetraj.py ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import param_
3
+ from drone import Drone
4
+
5
+
6
+ def calculate_target(blue_drone: Drone, red_drone: Drone) -> np.ndarray(3, ):
7
+ '''
8
+
9
+ :param blue_drone:
10
+ :param red_drone:
11
+ :return:
12
+ '''
13
+
14
+ def transform(pos, delta, theta):
15
+ pos[0] -= delta
16
+ pos[1] -= theta
17
+ return pos[0] * np.exp(1j * pos[1])
18
+
19
+ def untransform_to_array(pos, delta, theta):
20
+ pos[0] += delta
21
+ pos[1] += theta
22
+ return pos
23
+
24
+ theta = red_drone.position[1]
25
+ delta = param_.GROUNDZONE
26
+
27
+ z_blue = transform(blue_drone.position, delta, theta)
28
+ z_red = np.real(transform(red_drone.position, delta, theta))
29
+
30
+ v_blue = blue_drone.drone_model.max_speed
31
+ v_red = red_drone.drone_model.max_speed
32
+
33
+ blue_shooting_distance = blue_drone.drone_model.distance_to_neutralisation
34
+
35
+ blue_time_to_zero = (np.abs(z_blue) - blue_shooting_distance) / v_blue
36
+ red_time_to_zero = z_red / v_red
37
+
38
+ if red_time_to_zero <= param_.STEP or red_time_to_zero < blue_time_to_zero + param_.STEP:
39
+ return np.zeros(3), red_time_to_zero
40
+ else:
41
+ max_target = z_red
42
+ min_target = 0
43
+ while True:
44
+ target = (max_target + min_target) / 2
45
+ blue_time_to_target = max(0, (np.abs(z_blue - target) - blue_shooting_distance) / v_blue)
46
+ red_time_to_target = np.abs(z_red - target) / v_red
47
+ if red_time_to_target - param_.STEP < blue_time_to_target <= red_time_to_target:
48
+ target = untransform_to_array((target / z_red) * red_drone.position, delta, theta)
49
+ return target, blue_time_to_target
50
+ if red_time_to_target < blue_time_to_target:
51
+ max_target = target
52
+ min_target = min_target
53
+ else: # blue_ time_to_target <= red_time_to_target -1:
54
+ max_target = max_target
55
+ min_target = target
56
+
57
+
58
+
59
+
60
+ def unitary_test(rho_blue: float, theta_blue: float, rho_red: float, theta_red: float):
61
+ blue_drone = Drone()
62
+ blue_drone.position = np.array([rho_blue, theta_blue, 100])
63
+ red_drone = Drone(is_blue=False)
64
+ red_drone.position = np.array([rho_red, theta_red, 100])
65
+ tg, time = calculate_target(blue_drone, red_drone)
66
+ print('rho_blue : ', rho_blue, ' theta_blue : ', theta_blue, ' rho_red : ', rho_red, ' theta_red : ', theta_red,
67
+ ' tg : ', tg, ' time : ', time)
68
+ return tg, time
69
+
70
+
71
+
72
+
73
+
74
+ def test():
75
+ for rho_blue in [1000]:
76
+ for theta_blue in np.pi * np.array([-1, 0.75, 0.5, 0.25, 0]):
77
+ for rho_red in [1000]:
78
+ for theta_red in np.pi * np.array([0, 1/4]):
79
+ unitary_test(rho_blue=rho_blue, theta_blue=theta_blue, rho_red=rho_red, theta_red=theta_red)
80
+ print('done')
81
+
distribution_wrap.py ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gym
2
+
3
+ from gym import spaces
4
+ import numpy as np
5
+
6
+ from runner import run_episode
7
+ from redux_wrap import ReduxWrapper
8
+ from rotate_wrap import RotateWrapper
9
+ from symetry_wrap import SymetryWrapper
10
+ from sort_wrap import SortWrapper
11
+ from team_wrap import TeamWrapper
12
+ from reward_wrap import RewardWrapper
13
+
14
+
15
+ class DistriWrapper(gym.Wrapper):
16
+ """
17
+ :param env: (gym.Env) Gym environment that will be wrapped
18
+ """
19
+
20
+ def __init__(self, env):
21
+
22
+ self.blue_deads = self.red_deads = 0
23
+ self.nb_blues, self.nb_reds = env.nb_blues, env.nb_reds
24
+
25
+ env.observation_space = spaces.Tuple((
26
+ spaces.Box(low=0, high=1, shape=(self.nb_blues, 6), dtype=np.float32),
27
+ spaces.Box(low=0, high=1, shape=(self.nb_reds, 6), dtype=np.float32),
28
+ spaces.Box(low=0, high=1, shape=(self.nb_blues, self.nb_reds), dtype=np.float32),
29
+ spaces.Box(low=0, high=1, shape=(self.nb_reds, self.nb_blues), dtype=np.float32)))
30
+
31
+ env.action_space = spaces.Tuple((
32
+ spaces.Box(low=0, high=1, shape=(self.nb_blues, 3), dtype=np.float32),
33
+ spaces.Box(low=0, high=1, shape=(self.nb_reds, 3), dtype=np.float32)))
34
+
35
+ # Call the parent constructor, so we can access self.env later
36
+ super(DistriWrapper, self).__init__(env)
37
+
38
+ def reset(self):
39
+ """
40
+ Reset the environment
41
+ """
42
+ obs = self.env.reset()
43
+ blue_obs, red_obs, blues_fire, reds_fire, blue_deads, red_deads = obs
44
+ self.blue_deads, self.blue_deads = blue_deads, red_deads
45
+ return blue_obs, red_obs, blues_fire, reds_fire
46
+
47
+ def step(self, action):
48
+ """
49
+ :param action: ([float] or int) Action taken by the agent
50
+ :return: (np.ndarray, float, bool, dict) observation, reward, is the episode over?, additional informations
51
+ """
52
+ obs, reward, done, info = self.env.step(action)
53
+
54
+ blue_obs, red_obs, blues_fire, reds_fire, blue_deads, red_deads = obs
55
+ obs = blue_obs, red_obs, blues_fire, reds_fire
56
+
57
+ if done: # environment decision (eg drones oob)
58
+ return obs, reward, True, info
59
+
60
+ if red_deads == len(red_obs): # no more reds to fight (it could mean that they have all reached the target)
61
+ return obs, reward, True, info
62
+
63
+ if blue_deads == len(blue_obs): # reds have won
64
+ return obs, reward, True, info
65
+
66
+ # do we have new deaths?
67
+ new_blue_deads = blue_deads - self.blue_deads
68
+ new_red_deads = red_deads - self.red_deads
69
+ self.blue_deads, self.red_deads = blue_deads, red_deads
70
+
71
+ if 0 < new_red_deads + new_blue_deads: # we have someone killed but we still have some fight
72
+
73
+ blues, reds = self.nb_blues - blue_deads, self.nb_reds - red_deads
74
+
75
+ env = ReduxWrapper(self, minus_blue=blue_deads, minus_red=red_deads)
76
+ obs_ = env.post_obs(obs)
77
+
78
+ env = RotateWrapper(env)
79
+ obs_ = env.post_obs(obs_)
80
+
81
+ env = SymetryWrapper(env)
82
+ obs_ = env.post_obs(obs_)
83
+
84
+ env = SortWrapper(env)
85
+ obs_ = env.post_obs(obs_)
86
+
87
+ env = RewardWrapper(TeamWrapper(env, is_double=True), is_double=True)
88
+ obs_ = env.post_obs(obs_)
89
+
90
+ _, reward, done, info = run_episode(env, obs_, blues=blues, reds=reds)
91
+
92
+ return obs, reward, done, info
drone.py ADDED
@@ -0,0 +1,326 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dataclasses import dataclass
2
+ import numpy as np
3
+ from dronemodel import DroneModel
4
+
5
+ import param_ as param_
6
+ from settings import Settings
7
+
8
+
9
+ @dataclass
10
+ class Drone:
11
+ """
12
+ Creates a drone (it is either red or blue / foe or friend
13
+ """
14
+ is_blue: bool = True
15
+ position: np.ndarray((3,)) = np.zeros((3,))
16
+ position_noise: np.ndarray((3,)) = np.zeros((3,))
17
+ drone_model: DroneModel = None
18
+ max_speeds: np.ndarray((3,)) = None
19
+ min_speeds: np.ndarray((3,)) = None
20
+ init_position: np.ndarray((3,)) = None
21
+ init_speed: np.ndarray((3,)) = None
22
+ color = np.ndarray((3,))
23
+ is_alive: bool = True
24
+ is_fired: int = 0
25
+ fires = 0
26
+ step_ = 0
27
+ id_: int = -1
28
+ ttl: float = param_.DURATION # ttl = Time To Live expressed in seconds
29
+ speed: np.ndarray((3,)) = np.zeros((3,))
30
+ min_positions = np.zeros((3,))
31
+ max_positions = np.array([Settings.perimeter, 2*np.pi, Settings.perimeter_z])
32
+
33
+ def __post_init__(self):
34
+ self.drone_model = DroneModel(self.is_blue)
35
+ self.max_speeds = np.array([self.drone_model.max_speed,
36
+ 2*np.pi,
37
+ self.drone_model.max_up_speed])
38
+ self.min_speeds = np.array([0,
39
+ 0,
40
+ -self.drone_model.max_down_speed])
41
+
42
+ self.init_position = np.copy(self.position)
43
+ self.init_position_noise = np.copy(self.position_noise)
44
+ self.init_speed = np.copy(self.speed)
45
+ self.color = param_.BLUE_COLOR if self.is_blue else param_.RED_COLOR
46
+ self.ttl = (self.position[0] / self.max_speeds[0]) * param_.TTL_RATIO + param_.TTL_MIN
47
+
48
+ def reset(self):
49
+ self.is_alive = True
50
+ self.speed = self.init_speed
51
+ self.color = param_.BLUE_COLOR if self.is_blue else param_.RED_COLOR
52
+ distance_factor = Settings.blue_distance_factor if self.is_blue else Settings.red_distance_factor
53
+ self.position[0] = self.init_position[0]*distance_factor
54
+ self.position[2] = self.init_position[2]*distance_factor
55
+ self.position_noise *= distance_factor
56
+
57
+ self.position[0] = np.clip(self.position[0] + np.random.rand() * self.position_noise[0],
58
+ param_.GROUNDZONE * 1.1,
59
+ param_.PERIMETER * 0.9)
60
+ self.position[1] = self.position[1] + np.random.rand() * self.position_noise[1]
61
+ self.position[2] = np.clip(self.position[2] + np.random.rand() * self.position_noise[2],
62
+ param_.GROUNDZONE * 1.1,
63
+ param_.PERIMETER_Z * 0.9)
64
+
65
+ self.ttl = (self.position[0] / self.max_speeds[0]) * param_.TTL_RATIO + param_.TTL_MIN
66
+
67
+
68
+ def step(self, action):
69
+ self.step_ = self.step_ + 1 # for debug purposes
70
+ reward = 0
71
+ info = {'ttl': param_.DURATION}
72
+ if self.is_alive: # if the drone is dead, it no longer moves :)
73
+ pos_xyz, speed_xyz = self.to_xyz(self.position), self.to_xyz(self.speed)
74
+ pos_s, speed_s = \
75
+ self.drone_model.get_trajectory(pos_xyz, speed_xyz, action, np.linspace(0, param_.STEP, 10))
76
+ pos, speed = pos_s.T[-1], speed_s.T[-1]
77
+ self.position, self.speed = self.from_xyz(pos), self.from_xyz(speed)
78
+ self.ttl -= param_.STEP
79
+ info['ttl'] = self.ttl
80
+
81
+ # evaluate the distance compared to the greedy action
82
+
83
+ if self.is_blue:
84
+ '''
85
+ for further usage
86
+ straight_action, time_to_catch = self.simple_blue()
87
+ tolerance = 0.05 if 4 < time_to_catch else 1 if 2 < time_to_catch else 3
88
+ distance = 1 if tolerance < np.linalg.norm(straight_action - action) else 0
89
+ '''
90
+ distance = 1
91
+ else:
92
+ straight_action = self.simple_red()
93
+ distance = 1 if 0.1 < np.linalg.norm(straight_action - action) else 0
94
+ info['distance_to_straight_action'] = distance
95
+
96
+
97
+ if self._hits_target():
98
+ info['hits_target'] = 1
99
+ reward = -param_.TARGET_HIT_COST
100
+ self.color = param_.RED_SUCCESS_COLOR
101
+ self.is_alive = False # the red has done its job ...
102
+
103
+ if self._out_of_bounds():
104
+ coef = -1 if self.is_blue else 1
105
+ reward = coef * param_.OOB_COST
106
+ self.is_alive = False
107
+ info['oob'] = 1
108
+
109
+ obs = self.get_observation()
110
+ done = not self.is_alive
111
+
112
+ return obs, reward, done, info
113
+
114
+ def _out_of_bounds(self):
115
+ return not (0 < self.position[2] < Settings.perimeter_z and self.position[0] < Settings.perimeter)
116
+
117
+ def _hits_target(self):
118
+ if self.is_blue:
119
+ return False
120
+ else:
121
+ distance_to_zero = np.sqrt(self.position[0]**2 + self.position[2]**2)
122
+ return distance_to_zero < Settings.groundzone
123
+
124
+ def fires_(self, foe) -> bool:
125
+ """
126
+ checks if the foe drone is hit by self
127
+ :param foe: a foe drone
128
+ :return: True= yes, got you
129
+ """
130
+ # deads don't kill nor die
131
+ if not (self.is_alive and foe.is_alive):
132
+ return False
133
+
134
+ # lets see if foe is in the "fire cone"
135
+ pos_xyz = - self.to_xyz(self.position) + self.to_xyz(foe.position)
136
+ distance = np.linalg.norm(pos_xyz)
137
+ pos_xyz /= distance
138
+
139
+ if distance < self.drone_model.distance_to_neutralisation:
140
+ return self.is_in_the_cone(foe)
141
+
142
+ return False
143
+
144
+
145
+ def is_in_the_cone(self, foe) -> bool:
146
+ '''
147
+ verifies if foe is in the cone (without any regard to distance)
148
+ :param foe:
149
+ :return:
150
+ '''
151
+ pos_xyz = - self.to_xyz(self.position) + self.to_xyz(foe.position)
152
+ pos_xyz /= np.linalg.norm(pos_xyz)
153
+ vit_xyz = self.to_xyz(self.speed)
154
+ vit_xyz /= np.linalg.norm(vit_xyz)
155
+ cos_theta = np.dot(pos_xyz, vit_xyz)
156
+ in_the_cone = False
157
+ if 0 < cos_theta:
158
+ theta = np.arccos(cos_theta)
159
+ in_the_cone = theta < self.drone_model.angle_to_neutralisation
160
+ return in_the_cone
161
+
162
+
163
+ # tell the drones that they are dead
164
+ def is_killed(self, is_blue=True):
165
+ self.is_alive = False
166
+ self.position[2] = 0
167
+ self.color = param_.BLUE_DEAD_COLOR if is_blue else param_.RED_DEAD_COLOR
168
+
169
+ def to_xyz(self, rho_theta_z: np.ndarray(shape=(3,))) -> np.ndarray(shape=(3,)):
170
+ """
171
+ allows to get the 3D xyz coordinates from a polar representation
172
+ :param rho_theta_z: array (3,) with rho in meter, theta in rad, zed in meter for positions, /s for speeds, etc.
173
+ :return: float array (3,) with x, y, z in meter, /s for speeds, etc.
174
+ """
175
+ xy_ = rho_theta_z[0] * np.exp(1j * rho_theta_z[1])
176
+ return np.array([np.real(xy_), np.imag(xy_), rho_theta_z[2]])
177
+
178
+ def from_xyz(self, xyz: np.ndarray(shape=(3,))) -> np.ndarray(shape=(3,)):
179
+ """
180
+ """
181
+ z_complex = xyz[0] + 1j*xyz[1]
182
+ rho = np.abs(z_complex)
183
+ theta = np.angle(z_complex)
184
+ return np.array([rho, theta, xyz[2]], dtype='float32')
185
+
186
+ def to_norm(self,
187
+ rho_theta_z: np.ndarray(shape=(3,)),
188
+ max_vector: np.ndarray(shape=(3,)),
189
+ min_vector: np.ndarray(shape=(3,)) = np.array([0, 0, 0]))\
190
+ -> np.ndarray(shape=(3,), dtype='float32'):
191
+ """
192
+ normalises the position/speed in order to have all space in a [0;1]**3 space
193
+ :return: rho, theta, zed in a [0;1]**3 space
194
+ """
195
+ rho = rho_theta_z[0] / max_vector[0]
196
+ theta = (rho_theta_z[1] / (2 * np.pi)) % 1
197
+ zed = (rho_theta_z[2] - min_vector[2]) / (max_vector[2] - min_vector[2])
198
+ return np.array([rho, theta, zed], dtype='float32')
199
+
200
+ def from_norm(self,
201
+ norm: np.ndarray(shape=(3,)),
202
+ max_vector: np.ndarray(shape=(3,)),
203
+ min_vector: np.ndarray(shape=(3,)) = np.array([0, 0, 0]))\
204
+ -> np.ndarray(shape=(3,), dtype='float32'):
205
+ """
206
+ denormalises and renders into cylindric coordinates
207
+ :param norm:
208
+ :param max_vector:
209
+ :param min_vector:
210
+ :return:
211
+ """
212
+ rho = norm[0] * max_vector[0]
213
+ theta = norm[1] * 2*np.pi
214
+ zed = norm[2] * (max_vector[2] - min_vector[2]) + min_vector[2]
215
+ return np.array([rho, theta, zed], dtype='float32')
216
+
217
+ def to_lat_lon_zed(self, lat, lon):
218
+ z = self.position[0] * np.exp(1j * self.position[1])
219
+ lat = np.imag(z) * 360 / (40075 * 1000) + lat
220
+ lon = np.real(z) * 360 / (40075 * 1000 * np.cos(np.pi / 180 * lat)) + lon
221
+ return lat, lon, self.position[2]
222
+
223
+ def distance(self, other_drone=None):
224
+
225
+ if other_drone:
226
+ distance = np.sqrt(np.abs(self.position[0] * np.exp(1j * self.position[1]) -
227
+ other_drone.position[0] * np.exp(1j * other_drone.position[1])) ** 2 +
228
+ (self.position[2] - other_drone.position[2]) ** 2)
229
+ else:
230
+ distance = np.sqrt((self.position[0] ** 2) + self.position[2] ** 2)
231
+ return distance
232
+
233
+ def get_observation(self): # -> np.array(shape=(6,), dtype='float32'):
234
+ """
235
+ get normalised and transformed position and speed
236
+ :return:
237
+ """
238
+ # calculates transformed normalised position
239
+ normalised_position = self.to_norm(self.position, self.max_positions, self.min_positions)
240
+
241
+ # calculates transformed normalised speed
242
+ normalised_speed = self.to_norm(self.speed, self.max_speeds, self.min_speeds)
243
+
244
+ return np.append(normalised_position, normalised_speed)
245
+
246
+
247
+ def simple_red(self, target: np.ndarray(3,)=np.zeros(3), z_margin: float=50) -> np.ndarray(shape=(3,)):
248
+ '''
249
+ defines the actions for a trajectory targeting zero
250
+ :return:
251
+ '''
252
+
253
+ self_z = self.position[0] * np.exp(1j * self.position[1])
254
+ target_z = target[0] * np.exp(1j * target[1])
255
+
256
+ direction = np.zeros(3)
257
+ direction[0] = np.abs(self_z - target_z)
258
+ direction[1] = np.angle(self_z - target_z)
259
+ direction[2] = self.position[2] - target[2] - z_margin
260
+
261
+ theta = (direction[1] + np.pi) / (2*np.pi) % 1
262
+
263
+ # slope of drone given its position
264
+ tan_phi = np.sign(direction[2]) * np.inf if direction[0] == 0 else direction[2]/direction[0]
265
+ # slope of drone speed
266
+ tan_phi_point = np.sign(self.speed[2]) * np.inf if self.speed[0] == 0 else self.speed[2]/self.speed[0]
267
+ # slope of forces
268
+ f_ratio = self.drone_model.Fxy / self.drone_model.Fz_minus
269
+ # go up if speed slope is too steep and vertical speed < 0 else take the position angle for forces angle
270
+ psy = -np.arctan(tan_phi * f_ratio) / np.pi + 0.5
271
+
272
+ action = np.array([1, theta, psy])
273
+
274
+ return action
275
+
276
+ def simple_target(self, target: (np.ndarray(shape=(3,)))) -> np.ndarray(shape=(3,)):
277
+ '''
278
+ defines the actions for a trajectory targeting ... the given target
279
+ :return:
280
+ '''
281
+
282
+ self_z = self.position[0] * np.exp(1j * self.position[1])
283
+ target_z = target[0] * np.exp(1j * target[1])
284
+
285
+ direction = np.zeros(3)
286
+ direction[0] = np.abs(self_z - target_z)
287
+ direction[1] = np.angle(self_z - target_z)
288
+ direction[2] = self.position[2] - target[2]
289
+
290
+ theta = (direction[1] + np.pi) / (2*np.pi) % 1
291
+
292
+ # slope of drone given its position
293
+ tan_phi = np.sign(direction[2]) * np.inf if direction[0] == 0 else direction[2]/direction[0]
294
+ # slope of drone speed
295
+ tan_phi_point = np.sign(self.speed[2]) * np.inf if self.speed[0] == 0 else self.speed[2]/self.speed[0]
296
+ # slope of forces
297
+ f_ratio = self.drone_model.Fxy / self.drone_model.Fz_minus
298
+ # go up if speed slope is too steep and vertical speed < 0 else take the position angle for forces angle
299
+ psy = 0.5 if tan_phi_point < -tan_phi else -np.arctan(tan_phi * f_ratio) / np.pi + 0.5
300
+
301
+ if Settings.perimeter_z / 2 < direction[2]:
302
+ psy = min(0.2, psy)
303
+
304
+ if self.position[0] < 1.5 * Settings.groundzone:
305
+ psy = min(0.2, psy)
306
+
307
+
308
+ action = np.array([1, theta, psy])
309
+
310
+
311
+
312
+ return action
313
+
314
+ def next_simple_pos(self):
315
+ next_pos = np.zeros(3)
316
+ simple_next = self.pos[0] * np.exp(1j * self.pos[1]) + self.speed[0] * np.exp(1j * self.speed[1]) * param_.step
317
+ next_pos[0] = np.abs(simple_next)
318
+ next_pos[1] = np.arg(simple_next)
319
+ next_pos[2] = self.pos[2] + self.speed[2] * param_.step
320
+ return next_pos
321
+
322
+ def copy_pos_speed(self, drone_to_copy):
323
+ self.position = drone_to_copy.position
324
+ self.speed = drone_to_copy.speed
325
+
326
+
dronemodel.py ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dataclasses import dataclass
2
+ from scipy.integrate import odeint
3
+ import numpy as np
4
+
5
+ import param_
6
+
7
+
8
+ @dataclass
9
+ class DroneModel:
10
+ """
11
+ Creates a drone_model of a drone
12
+ """
13
+
14
+ def __init__(self, is_blue):
15
+ self.drone_model = param_.DRONE_MODELS[param_.DRONE_MODEL[is_blue]]
16
+
17
+ self.angle_to_neutralisation = self.drone_model['angle_to_neutralisation']
18
+ self.distance_to_neutralisation = self.drone_model['distance_to_neutralisation']
19
+ self.duration_to_neutralisation = self.drone_model['duration_to_neutralisation']
20
+
21
+ self.Cxy = self.drone_model['Cxy']
22
+ self.Cz = self.drone_model['Cz']
23
+ self.mass = self.drone_model['mass']
24
+
25
+ self.Fxy_ratio = self.drone_model['Fxy_ratio']
26
+ self.Fz_min_ratio = self.drone_model['Fz_min_ratio']
27
+ self.Fz_max_ratio = self.drone_model['Fz_max_ratio']
28
+
29
+ self.weight_eq = self.mass * param_.g * (1 - self.Fz_min_ratio)
30
+ self.Fz_plus = (self.Fz_max_ratio - 1) * self.mass * param_.g
31
+ self.Fz_minus = (1 - self.Fz_min_ratio) * self.mass * param_.g
32
+ self.Fxy = self.mass * param_.g * self.Fxy_ratio
33
+
34
+ self.max_speed = np.sqrt(self.Fxy / self.Cxy)
35
+ self.max_up_speed = np.sqrt(self.Fz_plus / self.Cz)
36
+ self.max_down_speed = np.sqrt(self.Fz_minus / self.Cz)
37
+ self.max_rot_speed = 2 * np.pi
38
+
39
+ def get_trajectory(self, pos_xyz, speed_xyz, action: np.ndarray(3,), time_: np.ndarray(1,)) -> np.ndarray(3,):
40
+ '''
41
+ returns next position given the current position, speed and applied forces
42
+ :param pos_xyz:
43
+ :param speed_xyz:
44
+ :param action:
45
+ :param time_:
46
+ :return:
47
+ '''
48
+
49
+ rho = action[0] # in 0, 1
50
+ theta = 2*np.pi * action[1] # in 0, 2pi
51
+ psy = np.pi * (action[2] - 0.5) # in -pi/2, pi/2
52
+
53
+ fx = rho * np.cos(theta) * np.cos(psy) * self.Fxy
54
+ fy = rho * np.sin(theta) * np.cos(psy) * self.Fxy
55
+ fz = rho * np.sin(psy) * (self.Fz_plus if 0 < psy else self.Fz_minus)
56
+
57
+ pos_speed = np.hstack((pos_xyz, speed_xyz))
58
+
59
+ result_ = odeint(
60
+ lambda u, v: self.drone_dynamics(u, v, fx, fy, fz, self.Cxy, self.Cz, self.mass),
61
+ pos_speed,
62
+ time_,
63
+ Dfun=lambda u, v: self.fulljac(u, v, self.Cxy, self.Cz, self.mass)
64
+ )
65
+ x, y, z, dx, dy, dz = result_.T
66
+
67
+ return np.array([x, y, z], dtype='float32'), np.array([dx, dy, dz], dtype='float32')
68
+
69
+ def drone_dynamics(self, pos_speed, time_, f_x, f_y, f_z, Cxy, Cz, m):
70
+ x, y, z, dx, dy, dz = pos_speed
71
+ return [dx,
72
+ dy,
73
+ dz,
74
+ 1/m * (f_x - Cxy * dx * np.sqrt(dx**2 + dy**2 + dz**2)),
75
+ 1/m * (f_y - Cxy * dy * np.sqrt(dx**2 + dy**2 + dz**2)),
76
+ 1/m * (f_z - Cz * dz * np.sqrt(dx**2 + dy**2 + dz**2))]
77
+
78
+ def fulljac(self, pos_speed, time_, Cxy, Cz, m) -> np.ndarray((6, 6), ):
79
+ '''
80
+ returns the Jacobian of the differential equation of the trajectory
81
+ :param pos_speed:
82
+ :param time_:
83
+ :param Cxy:
84
+ :param Cz:
85
+ :param m:
86
+ :return:
87
+ '''
88
+
89
+ x, y, z, dx, dy, dz = pos_speed
90
+ J = np.zeros((6, 6))
91
+ J[0, 3] = 1
92
+ J[1, 4] = 1
93
+ J[2, 5] = 1
94
+ J[3, 3] = -Cxy/m * ((np.sqrt(dx**2 + dy**2 + dz**2)) + dx**2 / np.sqrt(dx**2 + dy**2 + dz**2))
95
+ J[3, 4] = -Cxy/m * (dx * dy / np.sqrt(dx**2 + dy**2 + dz**2))
96
+ J[3, 5] = -Cxy/m * (dx * dz / np.sqrt(dx**2 + dy**2 + dz**2))
97
+ J[4, 4] = -Cxy/m * ((np.sqrt(dx**2 + dy**2 + dz**2)) + dy**2 / np.sqrt(dx**2 + dy**2 + dz**2))
98
+ J[4, 3] = -Cxy/m * (dy * dx / np.sqrt(dx**2 + dy**2 + dz**2))
99
+ J[4, 5] = -Cxy/m * (dy * dz / np.sqrt(dx**2 + dy**2 + dz**2))
100
+ J[5, 5] = -Cz/m * ((np.sqrt(dx**2 + dy**2 + dz**2)) + dz**2 / np.sqrt(dx**2 + dy**2 + dz**2))
101
+ J[5, 3] = -Cz/m * (dz * dx / np.sqrt(dx**2 + dy**2 + dz**2))
102
+ J[5, 4] = -Cz/m * (dz * dy / np.sqrt(dx**2 + dy**2 + dz**2))
103
+ return J
filter_wrap.py ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import numpy as np
3
+ from gym import spaces, Wrapper
4
+
5
+
6
+ class FilterWrapper(Wrapper):
7
+ """
8
+ :param env: (gym.Env) Gym environment that will be wrapped
9
+ """
10
+
11
+ def __init__(self, env):
12
+
13
+ self.nb_blues, self.nb_reds = env.nb_blues, env.nb_reds
14
+
15
+ self.blue_deads = np.full((self.nb_blues,), False)
16
+ self.red_deads = np.full((self.nb_reds,), False)
17
+
18
+ env.observation_space = spaces.Tuple((
19
+ spaces.Box(low=0, high=1, shape=(self.nb_blues, 6), dtype=np.float32),
20
+ spaces.Box(low=0, high=1, shape=(self.nb_reds, 6), dtype=np.float32),
21
+ spaces.Box(low=0, high=1, shape=(self.nb_blues, self.nb_reds), dtype=np.float32),
22
+ spaces.Box(low=0, high=1, shape=(self.nb_reds, self.nb_blues), dtype=np.float32),
23
+ spaces.Discrete(1),
24
+ spaces.Discrete(1)))
25
+
26
+ super(FilterWrapper, self).__init__(env)
27
+
28
+ def reset(self):
29
+ """
30
+ Reset the environment
31
+ """
32
+ obs = self.env.reset()
33
+
34
+ return self._sort_obs(obs)
35
+
36
+ def step(self, action):
37
+ """
38
+ :param action: ([float] or int) Action taken by the agent
39
+ :return: (np.ndarray, float, bool, dict) observation, reward, is the episode over?, additional informations
40
+ """
41
+
42
+ blue_action, red_action = action
43
+
44
+ new_ba = []
45
+ index = 0
46
+ for count, alive in enumerate(~self.blue_deads):
47
+ if alive:
48
+ new_ba.append(blue_action[index])
49
+ index += 1
50
+ else:
51
+ new_ba.append(np.array([0, 0, 0]))
52
+ blue_action = new_ba
53
+
54
+ new_ra = []
55
+ index = 0
56
+ for count, alive in enumerate(~self.red_deads):
57
+ if alive:
58
+ new_ra.append(red_action[index])
59
+ index += 1
60
+ else:
61
+ new_ra.append(np.array([0, 0, 0]))
62
+ red_action = new_ra
63
+
64
+ action = blue_action, red_action
65
+
66
+ obs, reward, done, info = self.env.step(action)
67
+
68
+ obs = self._sort_obs(obs)
69
+
70
+ return obs, reward, done, info
71
+
72
+ def _sort_obs(self, obs):
73
+
74
+ blue_obs, red_obs, blues_fire, reds_fire, blue_deads, red_deads = obs
75
+
76
+ self.blue_deads = blue_deads
77
+ self.red_deads = red_deads
78
+
79
+ blue_obs = np.vstack((blue_obs[~self.blue_deads], blue_obs[self.blue_deads]))
80
+ red_obs = np.vstack((red_obs[~self.red_deads], red_obs[self.red_deads]))
81
+
82
+ blues_fire = self.fire_sort(self.blue_deads, self.red_deads, blues_fire)
83
+ reds_fire = self.fire_sort(self.red_deads, self.blue_deads, reds_fire)
84
+
85
+ sort_obs = blue_obs, red_obs, blues_fire, reds_fire, sum(blue_deads), sum(red_deads)
86
+
87
+ return sort_obs
88
+
89
+ def fire_sort(self, dead_friends, dead_foes, friends_fire):
90
+
91
+ friends_fire_big = np.zeros_like(friends_fire)
92
+ friends_fire = np.compress(~dead_friends, friends_fire, axis=0)
93
+ friends_fire = np.compress(~dead_foes, friends_fire, axis=1)
94
+ friends_fire_big[:friends_fire.shape[0], :friends_fire.shape[1]] = friends_fire
95
+ return friends_fire_big
monitor_wrap.py ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dataclasses import make_dataclass
2
+
3
+ import numpy as np
4
+ from gym import Wrapper
5
+ import pandas as pd
6
+
7
+ import param_
8
+ from settings import Settings
9
+
10
+
11
+ Path = make_dataclass("Path", [('path', list), ('step', int), ('d_index', int), ('color', list)])
12
+
13
+
14
+ class MonitorWrapper(Wrapper):
15
+ """
16
+ :param env: (gym.Env) Gym environment that will be wrapped
17
+ """
18
+
19
+ def __init__(self, env, steps, verbose=True):
20
+ # Call the parent constructor, so we can access self.env later
21
+ super(MonitorWrapper, self).__init__(env)
22
+ self.verbose = verbose
23
+ self.blue_data = []
24
+ self.red_data = []
25
+ self.fire_paths = []
26
+ lat, lon = Settings.latlon
27
+ self.lat_tg = lat
28
+ self.lon_tg = lon
29
+ self.steps = steps
30
+ self.step_ = 0
31
+ self.step_max = 0
32
+
33
+ def reset(self):
34
+ """
35
+ Reset the environment
36
+ """
37
+ obs = self.env.reset()
38
+ self.blue_data = []
39
+ self.red_data = []
40
+ self.fire_paths = []
41
+ self.step_ = 0
42
+ self.step_max = 0
43
+
44
+ return obs
45
+
46
+ def step(self, action):
47
+ """
48
+ :param action: ([float] or int) Action taken by the agent
49
+ :return: (np.ndarray, float, bool, dict) observation, reward, is the episode over?, additional informations
50
+ """
51
+
52
+ obs, reward, done, info = self.env.step(action)
53
+
54
+ if self.verbose:
55
+ self.monitor_state()
56
+
57
+ self.step_ += 1
58
+ if self.step_ == self.steps:
59
+ done = True
60
+
61
+ return obs, reward, done, info
62
+
63
+ def monitor_state(self):
64
+
65
+ env = self.env
66
+ lat_tg, lon_tg = self.lat_tg, self.lon_tg
67
+
68
+ for d_index, drone in enumerate(env.blue_team.drones):
69
+ lat, lon, zed = drone.to_lat_lon_zed(lat_tg, lon_tg)
70
+ self.blue_data.append([self.step_, True, drone.id_, lat, lon, zed, drone.color])
71
+
72
+ for d_index, drone in enumerate(env.red_team.drones):
73
+ lat, lon, zed = drone.to_lat_lon_zed(lat_tg, lon_tg)
74
+ self.red_data.append([self.step_, False, drone.id_, lat, lon, zed, drone.color])
75
+
76
+ for blue_id, red_id in np.argwhere(0 < env.playground.blues_have_fired_reds):
77
+ b_lat, b_lon, b_zed = env.blue_team.drones[blue_id].to_lat_lon_zed(lat_tg, lon_tg)
78
+ r_lat, r_lon, r_zed = env.red_team.drones[red_id].to_lat_lon_zed(lat_tg, lon_tg)
79
+ self.fire_paths.append(Path(step=self.step_,
80
+ path=[[b_lat, b_lon, b_zed], [r_lat, r_lon, r_zed]],
81
+ color=param_.GREEN_COLOR,
82
+ d_index=blue_id))
83
+
84
+ for red_id, blue_id in np.argwhere(0 < env.playground.reds_have_fired_blues):
85
+ b_lat, b_lon, b_zed = env.blue_team.drones[blue_id].to_lat_lon_zed(lat_tg, lon_tg)
86
+ r_lat, r_lon, r_zed = env.red_team.drones[red_id].to_lat_lon_zed(lat_tg, lon_tg)
87
+ self.fire_paths.append(Path(step=self.step_,
88
+ path=[[b_lat, b_lon, b_zed], [r_lat, r_lon, r_zed]],
89
+ color=param_.BLACK_COLOR,
90
+ d_index=red_id))
91
+
92
+ def get_df(self):
93
+
94
+ fire_df = pd.DataFrame(self.fire_paths)
95
+
96
+ df_columns = ['step', 'isBlue', 'd_index', 'lat', 'lon', 'zed', 'color']
97
+ blue_df = pd.DataFrame(self.blue_data, columns=df_columns)
98
+ red_df = pd.DataFrame(self.red_data, columns=df_columns)
99
+
100
+ blue_path_df = []
101
+ red_path_df = []
102
+
103
+ for d_index in range(self.env.nb_blues):
104
+ blue_path_df.append(self._get_path_df(blue_df, d_index, color=param_.BLUE_COLOR))
105
+ for d_index in range(self.env.nb_reds):
106
+ red_path_df.append(self._get_path_df(red_df, d_index, color=param_.RED_COLOR))
107
+
108
+ return blue_df, red_df, fire_df, blue_path_df, red_path_df
109
+
110
+ def _get_path_df(self, drone_df: pd.DataFrame, d_index: int, color: int = param_.BLUE_COLOR) -> pd.DataFrame:
111
+
112
+ traj_length = param_.TRAJ_LENGTH
113
+
114
+ path_total = drone_df[['lon', 'lat', 'zed', 'step']][drone_df.d_index == d_index].values.tolist()
115
+ path = ([Path(path_total[:step+1], step, d_index, color) if step < traj_length
116
+ else Path(path_total[step - traj_length:step+1], step, d_index, color)
117
+ for step in range(len(path_total))])
118
+ path_df = pd.DataFrame(path, columns=['path', 'step', 'd_index', 'color'])
119
+ return path_df
param_.py ADDED
@@ -0,0 +1,122 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import numpy as np
3
+
4
+ STEP = 1 # seconds per time step
5
+ DURATION = 200 # seconds
6
+
7
+ POLICY_FOLDER = 'default_policies'
8
+
9
+ STEP_COST = 0.01
10
+ OOB_COST = 0.8 # Out Of Bound : when the drone is below 0 or above a PERIMETER_Z
11
+ RED_SHOT_REWARD = 10 # when a red drone is shot
12
+ TARGET_HIT_COST = 10 # when a red drone hits the target
13
+ THREAT_WEIGHT = 0 # when reds are close to the target (* function of the red distance)
14
+ STRAIGHT_ACTION_COST = 0.04 # when reds do not follow the shortest path
15
+ TTL_COST = 0.7 # when a red is still alive after its TTL: it is a failure for both blues and reds
16
+
17
+ TTL_RATIO = 2 # margin for red drones to get to the target if they went full speed
18
+ TTL_MIN = 4 # at least to succeed the mission : ttl = TTL_MIN + vmax * TTL_RATIO
19
+
20
+ ELEVATION_SCALE = 1
21
+ TRAJ_LENGTH = 6
22
+ SIMU_SPEED = 0.2
23
+
24
+
25
+ """
26
+ the playground parameters
27
+ """
28
+
29
+ PERIMETER = 5000
30
+ PERIMETER_Z = 600
31
+
32
+ # PERIMETER of the ground zone to defend
33
+ GROUNDZONE = 100
34
+
35
+ # position in LATLON
36
+ LATLON = {'Paris':
37
+ {'lat': 48.865879, 'lon': 2.319827},
38
+ 'Fonsorbes':
39
+ {'lat': 43.54, 'lon': 1.25},
40
+ 'San Francisco':
41
+ {'lat': 37.7737283, 'lon': -122.4342383},
42
+ 'Puilaurens':
43
+ {'lat': 42.803943093860894, 'lon': 2.299540897567384},
44
+ }
45
+
46
+ """
47
+ the Team Parameters
48
+ """
49
+
50
+ # blue team init
51
+
52
+ BLUES = 12
53
+
54
+ BLUES_PER_CIRCLE = [3, 3, 4, 4, 4, 4]
55
+ BLUE_CIRCLES_RHO = [500, 900, 1400, 1600, 2000, 2500]
56
+ BLUE_CIRCLES_THETA = [0, -np.pi/3, -np.pi, -np.pi/2, 0, np.pi/3]
57
+ BLUE_CIRCLES_ZED = [200, 250, 250, 100, 250, 100]
58
+ BLUE_DISTANCE_FACTOR = 1
59
+
60
+ BLUE_IS_UNKILLABLE = True
61
+
62
+
63
+ BLUE_SPEED_INIT = 1 # in ratio to max_speed
64
+
65
+ BLUE_COLOR = [0, 0, 150, 120]
66
+ BLUE_DEAD_COLOR = [20, 20, 60]
67
+
68
+ # red team init
69
+
70
+ REDS = 12
71
+
72
+ RED_SQUADS = [1, 1, 1, 1, 1, 15]
73
+ RED_SQUADS_RHO = [1000, 700, 1000, 1200, 1500, 2000]
74
+ RED_SQUADS_THETA = np.pi * np.array([0, 1/4, -1/4, -1/2, 1/2, 0])
75
+ RED_SQUADS_ZED = [200, 200, 100, 250, 200, 100]
76
+ RED_DISTANCE_FACTOR = 1
77
+
78
+
79
+ RED_RHO_NOISE = [60, 60, 100, 200, 200, 300]
80
+ RED_THETA_NOISE = np.pi * np.array([1/5, 1/2, 1, 1, 1, 1])
81
+ RED_ZED_NOISE = [60, 50, 10, 10, 50, 60]
82
+
83
+ RED_SPEED_INIT = 0.2 # in ratio to max_speed
84
+
85
+ RED_COLOR = [150, 0, 0, 120]
86
+ RED_DEAD_COLOR = [120, 50, 30]
87
+ RED_SUCCESS_COLOR = [200, 200, 0]
88
+ BLACK_COLOR = [0, 0, 0]
89
+ GREEN_COLOR = [0, 255, 255]
90
+
91
+ """
92
+ the Drone Parameters
93
+ """
94
+
95
+ g = 9.81
96
+
97
+ DRONE_MODEL = ['beta', 'alpha'] # blue = DRONE_MODEl[1]
98
+
99
+ DRONE_MODELS = {
100
+ 'alpha': {
101
+ 'angle_to_neutralisation': np.pi / 4, # in rad
102
+ 'distance_to_neutralisation': 250, # in m
103
+ 'duration_to_neutralisation': 2, # in s
104
+ 'Cxy': 0.2, # horizontal air resistance = Cxy * v^2
105
+ 'Cz': 0.7, # vertical air resistance
106
+ 'mass': 50, # kg
107
+ 'Fz_min_ratio': 0.6, # how much weight is compensated (below 1 => drone goes down)
108
+ 'Fz_max_ratio': 1.4, # how much weight is compensated (>1 => drone goes up)
109
+ 'Fxy_ratio': 1, # Force xy relative to weight
110
+ },
111
+ 'beta': {
112
+ 'angle_to_neutralisation': np.pi / 4,
113
+ 'distance_to_neutralisation': 250,
114
+ 'duration_to_neutralisation': np.inf,
115
+ 'Cxy': 0.3, # horizontal air resistance : link to speed max by the relation Fxy_max = Cxy * Speedxy_max
116
+ 'Cz': 0.8, # vertical air resistance : link to speed max by the relation Fz_max = Cz * Speedz_max
117
+ 'mass': 40, # kg
118
+ 'Fz_min_ratio': 0.5, # how much weight is compensated (below 1 => drone goes down)
119
+ 'Fz_max_ratio': 1.8, # how much weight is compensated (>1 => drone goes up)
120
+ 'Fxy_ratio': 0.6, # Force xy relative to weight
121
+ },
122
+ }
playground.py ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ from dataclasses import dataclass
3
+
4
+ import param_
5
+ from settings import Settings
6
+ from drone import Drone
7
+
8
+
9
+ @dataclass
10
+ class Playground:
11
+ """
12
+ This is a cylindrical 3D-env where blue drones defend a central zone from the attack of red drones
13
+ the playground manages also the interactions between foe-drones such as the firing
14
+ """
15
+
16
+ perimeter = Settings.perimeter
17
+ perimeter_z = Settings.perimeter_z
18
+ groundzone = Settings.groundzone
19
+
20
+ env: object
21
+ blue_drones: [Drone]
22
+ red_drones: [Drone]
23
+
24
+ def __post_init__(self):
25
+ # creates the fire matrices
26
+ self.blues_have_fired_reds = np.zeros(shape=(len(self.blue_drones),
27
+ len(self.red_drones)), dtype=int)
28
+ self.reds_have_fired_blues = np.zeros(shape=(len(self.red_drones),
29
+ len(self.blue_drones)), dtype=int)
30
+
31
+ # how long the drone needs to have the other in target
32
+ self.blue_shots_to_kill = param_.DRONE_MODELS[param_.DRONE_MODEL[True]]['duration_to_neutralisation']
33
+ self.red_shots_to_kill = param_.DRONE_MODELS[param_.DRONE_MODEL[False]]['duration_to_neutralisation']
34
+ self.blue_shots_to_kill //= param_.STEP
35
+ self.red_shots_to_kill //= param_.STEP
36
+
37
+ # how far can a drone shoot
38
+ self.distance_blue_shot = param_.DRONE_MODELS[param_.DRONE_MODEL[True]]['distance_to_neutralisation']
39
+ self.distance_red_shot = param_.DRONE_MODELS[param_.DRONE_MODEL[False]]['distance_to_neutralisation']
40
+
41
+ def reset(self):
42
+ self.blues_have_fired_reds[...] = 0
43
+ self.reds_have_fired_blues[...] = 0
44
+
45
+ def get_observation(self):
46
+ return self.blues_have_fired_reds / self.blue_shots_to_kill, \
47
+ self.reds_have_fired_blues / self.red_shots_to_kill
48
+
49
+ def step(self):
50
+ """
51
+ determines who has fired who, and who is dead in the end
52
+ :return: Tuple with list of Blue and Reds dead. (if a blue or a red is dead, the sequence is over)
53
+ """
54
+ # gets who has fired who in this step
55
+ blues_fire_reds = np.array([[blue.fires_(red) for red in self.red_drones] for blue in self.blue_drones])
56
+ reds_fire_blues = np.array([[red.fires_(blue) for blue in self.blue_drones] for red in self.red_drones])
57
+
58
+ # if the foe is no longer seen, the count restarts from 0
59
+ self.blues_have_fired_reds *= blues_fire_reds
60
+ self.reds_have_fired_blues *= reds_fire_blues
61
+
62
+ # and the count is incremented for the others
63
+ self.blues_have_fired_reds += blues_fire_reds
64
+ self.reds_have_fired_blues += reds_fire_blues
65
+
66
+ # np magic : first find the list of duos shooter/shot, keep the shots (only once)
67
+ red_deads = np.unique(np.argwhere(self.blues_have_fired_reds >= self.blue_shots_to_kill).T[1])
68
+ blue_deads = np.unique(np.argwhere(self.reds_have_fired_blues >= self.red_shots_to_kill).T[1])
69
+
70
+
71
+ # tell the drones that they are dead
72
+ for drone_id in blue_deads:
73
+ self.blue_drones[drone_id].is_killed(is_blue=True)
74
+ for drone_id in red_deads:
75
+ self.red_drones[drone_id].is_killed(is_blue=False)
76
+
77
+ # consider only living drones
78
+ blue_drones = [drone for drone in self.blue_drones if drone.is_alive]
79
+ red_drones = [drone for drone in self.red_drones if drone.is_alive]
80
+
81
+ bf_obs, rf_obs = self.get_observation()
82
+ bf_reward = rf_reward = 0
83
+ remaining_blues, remaining_reds = len(blue_drones), len(red_drones),
84
+ blue_shots, red_shots = len(blue_deads), len(red_deads)
85
+
86
+ if blue_shots + red_shots > 0:
87
+ print('someone is killed: {0} blues and {1} reds'.format(blue_shots, red_shots))
88
+
89
+ return bf_obs, bf_reward, remaining_blues, blue_shots, rf_obs, rf_reward, remaining_reds, red_shots
90
+
91
+
92
+
policies/_last/_b1r1/blues_last.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d64d434b3ea20f669d130b3871196920c9efe6263625b050dfc1841e141e89c
3
+ size 3146628
policies/_last/_b1r1/reds_last.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:03cc75c12317da0fada0de5c5e161855c87ec5fab05e435f6bbf4c08bc07aa13
3
+ size 3148185
procfile.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ web: sh setup.sh && streamlit run app.py
redux_wrap.py ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gym
2
+ from gym import spaces
3
+ import numpy as np
4
+
5
+ from settings import Settings
6
+
7
+
8
+ class ReduxWrapper(gym.Wrapper):
9
+ """
10
+ :param env: (gym.Env) Gym environment that will be wrapped
11
+ """
12
+
13
+ def __init__(self, env, minus_blue=0, minus_red=0):
14
+
15
+ # action space is reduced
16
+ nb_blues, nb_reds = Settings.blues, Settings.reds
17
+
18
+ self.nb_blues = nb_blues - minus_blue
19
+ self.nb_reds = nb_reds - minus_red
20
+
21
+ self.blue_deads = minus_blue
22
+ self.red_deads = minus_red
23
+
24
+ env.observation_space = spaces.Tuple((
25
+ spaces.Box(low=0, high=1, shape=(self.nb_blues, 6), dtype=np.float32),
26
+ spaces.Box(low=0, high=1, shape=(self.nb_reds, 6), dtype=np.float32),
27
+ spaces.Box(low=0, high=1, shape=(self.nb_blues, self.nb_reds), dtype=np.float32),
28
+ spaces.Box(low=0, high=1, shape=(self.nb_reds, self.nb_blues), dtype=np.float32)))
29
+
30
+ env.action_space = spaces.Tuple((
31
+ spaces.Box(low=0, high=1, shape=(self.nb_blues, 3), dtype=np.float32),
32
+ spaces.Box(low=0, high=1, shape=(self.nb_reds, 3), dtype=np.float32)))
33
+
34
+ super(ReduxWrapper, self).__init__(env)
35
+
36
+ def reset(self):
37
+ """
38
+ Reset the environment
39
+ """
40
+ obs = self.env.reset()
41
+ obs = self.post_obs(obs)
42
+
43
+ return obs
44
+
45
+ def step(self, action):
46
+
47
+ # action needs expansion
48
+ blue_action, red_action = action
49
+ if self.blue_deads:
50
+ blue_action = np.vstack((blue_action, np.zeros((self.blue_deads, 3))))
51
+ if self.red_deads:
52
+ red_action = np.vstack((red_action, np.zeros((self.red_deads, 3))))
53
+ action = blue_action, red_action
54
+
55
+ obs, reward, done, info = self.env.step(action)
56
+
57
+ obs = self.post_obs(obs)
58
+
59
+ return obs, reward, done, info
60
+
61
+ def post_obs(self, obs):
62
+
63
+ # obs needs reduction
64
+ blue_obs, red_obs, blues_fire, reds_fire = obs
65
+
66
+ if not self.blue_deads:
67
+ pass
68
+ else:
69
+ blue_obs = blue_obs[:-self.blue_deads]
70
+ blues_fire = blues_fire[:-self.blue_deads]
71
+ reds_fire = reds_fire[:, :-self.blue_deads]
72
+
73
+ if not self.red_deads:
74
+ pass
75
+ else:
76
+ red_obs = red_obs[:-self.red_deads]
77
+ reds_fire = reds_fire[:-self.red_deads]
78
+ blues_fire = blues_fire[:, :-self.red_deads]
79
+
80
+ return blue_obs, red_obs, blues_fire, reds_fire
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ pandas~=2.0.2
2
+ numpy~=1.24.3
3
+ gym~=0.26.2
reward_wrap.py ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import gym
3
+
4
+ import param_
5
+ from settings import Settings
6
+
7
+
8
+ class RewardWrapper(gym.Wrapper):
9
+ """
10
+ :param env: (gym.Env) Gym environment that will be wrapped
11
+ """
12
+
13
+ def __init__(self, env, is_blue: bool = True, is_double: bool = False):
14
+
15
+ self.is_blue = is_blue
16
+ self.is_double = is_double
17
+
18
+ super(RewardWrapper, self).__init__(env)
19
+
20
+ def reset(self):
21
+ """
22
+ Reset the environment
23
+ """
24
+ obs = self.env.reset()
25
+ return obs
26
+
27
+ def step(self, action):
28
+ """
29
+ :param action: ([float] or int) Action taken by the agent
30
+ :return: (np.ndarray, float, bool, dict) observation, reward, is the episode over?, additional informations
31
+ """
32
+
33
+ obs, reward, done, info = self.env.step(action)
34
+
35
+ reward, done, info = self.situation_evaluation(info)
36
+
37
+ return obs, reward, done, info
38
+
39
+ def situation_evaluation(self, info):
40
+
41
+ if self.is_double:
42
+ if info['remaining blues'] * info['remaining reds'] == 0:
43
+ return 0, True, info
44
+ else:
45
+ return 0, False, info
46
+
47
+ else:
48
+ if self.is_blue:
49
+ if info['remaining reds'] == 0:
50
+ return param_.WIN_REWARD, True, info
51
+ if info['remaining blues'] == 0:
52
+ return -param_.WIN_REWARD, True, info
53
+ if 0 < info['blue_oob']:
54
+ return -param_.OOB_COST, True, info
55
+ if info['ttl'] < 0:
56
+ return -param_.TTL_COST, True, info # blues have been too long to shoot the red drone
57
+ # else continues
58
+ reward = -param_.STEP_COST
59
+ reward -= info['weighted_red_distance'] * param_.THREAT_WEIGHT
60
+ reward -= info['hits_target'] * param_.TARGET_HIT_COST
61
+ reward += info['red_shots'] * param_.RED_SHOT_REWARD
62
+ reward += info['distance_to_straight_action'] * param_.STRAIGHT_ACTION_COST
63
+ return reward, False, info
64
+ else: # red is learning
65
+ done = False
66
+ reward = -param_.STEP_COST
67
+ reward += info['weighted_red_distance'] * param_.THREAT_WEIGHT
68
+ reward += info['hits_target'] * param_.TARGET_HIT_COST
69
+ reward -= info['red_shots'] * param_.RED_SHOT_REWARD
70
+ reward -= info['distance_to_straight_action'] * param_.STRAIGHT_ACTION_COST
71
+ if info['remaining reds'] == 0:
72
+ done = True
73
+ return reward, done, info
74
+ if info['remaining blues'] == 0:
75
+ done = True
76
+ reward += info['remaining reds'] * param_.TARGET_HIT_COST
77
+ return reward, done, info
78
+ if 0 < info['red_oob']:
79
+ done = True
80
+ reward -= param_.OOB_COST
81
+ if info['ttl'] < 0:
82
+ done = True
83
+ reward -= param_.TTL_COST * info['remaining reds'] # reds have been too long to hit the target
84
+ # else continues
85
+
86
+ return reward, done, info
rotate_wrap.py ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import gym
3
+
4
+ from drone import Drone
5
+
6
+
7
+ class RotateWrapper(gym.Wrapper):
8
+ """
9
+ :param env: (gym.Env) Gym environment that will be wrapped
10
+ """
11
+
12
+ def __init__(self, env):
13
+ # Call the parent constructor, so we can access self.env later
14
+ super(RotateWrapper, self).__init__(env)
15
+ self.angle = 0
16
+
17
+ def reset(self):
18
+ """
19
+ Reset the environment
20
+ """
21
+ obs = self.env.reset()
22
+
23
+ obs = self.post_obs(obs)
24
+
25
+ return obs
26
+
27
+ def step(self, action):
28
+ """
29
+ :param action: ([float] or int) Action taken by the agent
30
+ :return: (np.ndarray, float, bool, dict) observation, reward, is the episode over?, additional informations
31
+ """
32
+
33
+ action = self.rotate_action(action)
34
+
35
+ obs, reward, done, info = self.env.step(action)
36
+
37
+ obs = self.post_obs(obs)
38
+
39
+ return obs, reward, done, info
40
+
41
+ def post_obs(self, obs):
42
+ self.angle = self.get_angle(obs)
43
+ return self.rotate_obs(obs)
44
+
45
+ def get_angle(self, obs: np.ndarray) -> float:
46
+ blue_obs, red_obs, blue_fire, red_fire = obs
47
+ sigma = 0
48
+
49
+ for this_obs in (blue_obs, red_obs):
50
+ for d in this_obs:
51
+ sigma += d[0] * np.exp(1j * d[1])
52
+ angle = np.angle(sigma)
53
+ return angle
54
+
55
+ def rotate_obs(self, obs):
56
+ blue_obs, red_obs, blue_fire, red_fire = obs
57
+
58
+ rotated_blue_obs = []
59
+ rotated_red_obs = []
60
+
61
+ for this_obs, is_blue, rotated_obs in zip((blue_obs, red_obs),
62
+ (True, False),
63
+ (rotated_blue_obs, rotated_red_obs)):
64
+ drone = Drone(is_blue=is_blue)
65
+ for d in this_obs:
66
+
67
+ d_meter = np.zeros(6,)
68
+ # get the pos and speed in cylindrical coordinated in meters
69
+ d_meter[:3] = drone.from_norm(d[:3], drone.max_positions, drone.min_positions)
70
+ d_meter[3:6] = drone.from_norm(d[3:6], drone.max_speeds, drone.min_speeds)
71
+
72
+ # rotate
73
+ d_meter[1] -= self.angle
74
+ d_meter[4] -= self.angle
75
+
76
+ # back to norm
77
+ d[:3] = drone.to_norm(d_meter[:3], drone.max_positions, drone.min_positions)
78
+ d[3:6] = drone.to_norm(d_meter[3:6], drone.max_speeds, drone.min_speeds)
79
+
80
+ rotated_obs.append(d)
81
+
82
+ del drone
83
+
84
+ return np.array(rotated_blue_obs), np.array(rotated_red_obs), blue_fire, red_fire
85
+
86
+ def rotate_action(self, action):
87
+
88
+ blue_action, red_action = action
89
+ blue_action = np.array(list(map(lambda x: [x[0], (x[1]+self.angle/2/np.pi) % 1, x[2]], blue_action)))
90
+ red_action = np.array(list(map(lambda x: [x[0], (x[1]+self.angle/2/np.pi) % 1, x[2]], red_action)))
91
+ action = blue_action, red_action
92
+
93
+ return action
runner.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+
3
+ from swarm_policy import SwarmPolicy
4
+
5
+
6
+ def run_episode(env, obs, blues: int, reds: int):
7
+ blue_policy = SwarmPolicy(blues=blues, reds=reds, is_blue=True)
8
+ red_policy = SwarmPolicy(blues=blues, reds=reds, is_blue=False)
9
+ sum_reward = 0
10
+ done = False
11
+ while not done:
12
+ action = blue_policy.predict(obs), red_policy.predict(obs)
13
+ obs, reward, done, info = env.step(action)
14
+ sum_reward += reward
15
+ return obs, sum_reward, done, info
16
+
settings.py ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dataclasses import dataclass
2
+
3
+ import param_
4
+ import streamlit as st
5
+ import numpy as np
6
+
7
+
8
+ @dataclass
9
+ class Settings:
10
+
11
+ perimeter: int = param_.PERIMETER
12
+ perimeter_z: int = param_.PERIMETER_Z
13
+ groundzone: int = param_.GROUNDZONE
14
+
15
+ latlon = param_.LATLON['Paris']['lat'], param_.LATLON['Paris']['lon']
16
+
17
+ blues: int = param_.BLUES
18
+
19
+ blues_per_circle = np.array(param_.BLUES_PER_CIRCLE)
20
+ blue_circles_rho = param_.BLUE_CIRCLES_RHO
21
+ blue_circles_theta = param_.BLUE_CIRCLES_THETA
22
+ blue_circles_zed = param_.BLUE_CIRCLES_ZED
23
+ blue_distance_factor: float = param_.BLUE_DISTANCE_FACTOR
24
+
25
+ is_unkillable: bool = param_.BLUE_IS_UNKILLABLE
26
+
27
+ blue_speed_init: int = param_.BLUE_SPEED_INIT
28
+
29
+ reds: int = param_.REDS
30
+
31
+ red_squads = param_.RED_SQUADS
32
+ red_squads_rho = np.array(param_.RED_SQUADS_RHO)
33
+ red_squads_theta = param_.RED_SQUADS_THETA
34
+ red_squads_zed = param_.RED_SQUADS_ZED
35
+ red_distance_factor: float = param_.RED_DISTANCE_FACTOR
36
+
37
+ red_rho_noise = np.array(param_.RED_RHO_NOISE)
38
+ red_theta_noise = np.array(param_.RED_THETA_NOISE)
39
+ red_zed_noise = np.array(param_.RED_ZED_NOISE)
40
+
41
+ red_speed_init: int = param_.RED_SPEED_INIT
42
+
43
+ policy_folder: str = param_.POLICY_FOLDER
44
+
45
+
46
+ def define_(with_streamlit: bool = True, blues: int = Settings.blues, reds: int = Settings.reds):
47
+ """"
48
+ shows the blue and red swarms in Streamlit
49
+ :return:
50
+ """
51
+ blues = blues
52
+ reds = reds
53
+
54
+ if with_streamlit:
55
+ st.title('Blues against Reds by hexamind.ai')
56
+ st.write('controlled by Reinforcement Learning.')
57
+ st.text('<- Set parameters')
58
+
59
+ st.sidebar.subheader("Define the battlefield")
60
+ blues = st.sidebar.slider("how many blues on defense?", 1, 20, 6)
61
+ Settings.blues = blues
62
+ blue_dispersion = st.sidebar.slider("set the average blue dispersion", 0.3, 1.0, 0.8)
63
+ Settings.reds = reds
64
+ reds = st.sidebar.slider("how many reds are on the attack?", 1, 20, 6)
65
+ Settings.reds = reds
66
+ red_dispersion = st.sidebar.slider("set the average red dispersion", 0.3, 1.0, 0.7)
67
+
68
+ Settings.blue_distance_factor = 3 * blue_dispersion
69
+ Settings.red_distance_factor = 3 * red_dispersion
70
+
71
+ location = st.sidebar.radio("Location", ['Paris', 'Puilaurens', 'San Francisco'])
72
+
73
+ lat_tg = param_.LATLON[location]['lat']
74
+ lon_tg = param_.LATLON[location]['lon']
75
+
76
+ Settings.latlon = lat_tg, lon_tg
77
+
78
+ st.sidebar.write(
79
+ 'you probably need more drones '
80
+ 'No worries, we have plenty at www.hexamind.ai ')
81
+
82
+ return blues, reds
setup.sh ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ mkdir -p ~/.streamlit/
2
+ echo "\
3
+ [server]\n\git init
4
+ git add README.md
5
+ git commit -m "first commit"
6
+ git branch -M master
7
+ git remote add origin
8
+ headless = true\n\
9
+ port = $PORT\n\
10
+ enableCORS = false\n\
11
+ \n\
12
+ " > ~/.streamlit/config.toml
sort_wrap.py ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import gym
3
+
4
+
5
+ class SortWrapper(gym.Wrapper):
6
+ """
7
+ :param env: (gym.Env) Gym environment that will be wrapped
8
+ """
9
+
10
+ def __init__(self, env):
11
+ # Call the parent constructor, so we can access self.env later
12
+ super(SortWrapper, self).__init__(env)
13
+ self.blue_signature = None
14
+ self.red_signature = None
15
+
16
+ def reset(self):
17
+ """
18
+ Reset the environment
19
+ """
20
+ obs = self.env.reset()
21
+ obs = self.sort_obs(obs)
22
+
23
+ return obs
24
+
25
+ def step(self, action):
26
+ """
27
+ :param action: ([float] or int) Action taken by the agent
28
+ :return: (np.ndarray, float, bool, dict) observation, reward, is the episode over?, additional informations
29
+ """
30
+
31
+ action = self.unsort_action(action)
32
+
33
+ obs, reward, done, info = self.env.step(action)
34
+
35
+ obs = self.post_obs(obs)
36
+
37
+ return obs, reward, done, info
38
+
39
+ def post_obs(self, obs):
40
+ return self.sort_obs(obs)
41
+
42
+ def sort_obs(self, obs):
43
+
44
+ blue_obs, red_obs, blue_fire, red_fire = obs
45
+
46
+ blue_obs, self.blue_signature = self.sort_and_sign(blue_obs)
47
+ red_obs, self.red_signature = self.sort_and_sign(red_obs)
48
+
49
+ blue_fire = self.unsort_matrix_with_signatures(blue_fire, self.blue_signature, self.red_signature)
50
+ red_fire = self.unsort_matrix_with_signatures(red_fire, self.red_signature, self.blue_signature)
51
+
52
+ obs = blue_obs, red_obs, blue_fire, red_fire
53
+
54
+ return obs
55
+
56
+ def unsort_action(self, action):
57
+
58
+ blue_action, red_action = action
59
+
60
+ unsorted_blue_action = self.unsort_with_signature(blue_action, self.blue_signature)
61
+ unsorted_red_action = self.unsort_with_signature(red_action, self.red_signature)
62
+
63
+ action = unsorted_blue_action, unsorted_red_action
64
+
65
+ return action
66
+
67
+ def sort_and_sign(self, an_array: np.ndarray) -> [np.ndarray, []]:
68
+ """
69
+ allows to sort an ndarray of 6 columns of floats and to keep the "signature": the positions of the items
70
+ before being sorted in order to retrieve the initial order after modifications of the arrays.
71
+ the order is retrieved with the unsort_with_signature function
72
+ :param an_array:
73
+ :return:
74
+ """
75
+ zip_list = zip(an_array, range(len(an_array)))
76
+ zip_sorted = sorted(zip_list, key=lambda x: (x[0][0], x[0][1], x[0][2], x[0][3], x[0][4], x[0][5]))
77
+ sorted_array, signature = zip(*zip_sorted)
78
+ return np.array(sorted_array), signature
79
+
80
+ def unsort_with_signature(self, an_array: np.ndarray, signature: []) -> np.ndarray:
81
+ """
82
+ see above
83
+ :param an_array:
84
+ :param signature:
85
+ :return:
86
+ """
87
+ zip_list = zip(signature, an_array)
88
+ zip_unsorted = sorted(zip_list)
89
+ _, unsorted = zip(*zip_unsorted)
90
+ return np.array(unsorted)
91
+
92
+ def unsort_matrix_with_signatures(self, matrix: np.ndarray, sign_line: np.ndarray, sign_col: np.ndarray) \
93
+ -> np.ndarray:
94
+
95
+ matrix = self.unsort_with_signature(matrix, sign_line)
96
+ matrix = self.unsort_with_signature(matrix.T, sign_col).T
97
+
98
+ return matrix
swarm_policy.py ADDED
@@ -0,0 +1,342 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dataclasses import dataclass
2
+ import numpy as np
3
+ from stable_baselines3 import SAC
4
+ from os import path
5
+
6
+ import param_
7
+ from drone import Drone
8
+
9
+
10
+ @dataclass
11
+ class SwarmPolicy:
12
+ blues: int
13
+ reds: int
14
+ is_blue: bool
15
+ model: object = None
16
+ count: int = 0
17
+
18
+ def __post_init__(self):
19
+
20
+ dir_path = "policies/last" + f"/b{self.blues}r{self.reds}/"
21
+ model_path = dir_path + ("blues_last.zip" if self.is_blue else "reds_last.zip")
22
+ if path.exists(model_path):
23
+ print("model loaded:" + model_path)
24
+ self.model = SAC.load(model_path, verbose=1)
25
+
26
+ # predicts from the model or from a simple centripete model
27
+ def predict(self, obs):
28
+
29
+ self.count += 1
30
+
31
+ if self.model:
32
+ action, _ = self.model.predict(obs)
33
+ # verbose = 'prediction from ' + (' blue model' if self.is_blue else ' red model') + ' at ' + str(self.count)
34
+ # print(verbose)
35
+ return action
36
+ else:
37
+ if self.is_blue:
38
+ return self._improved_attack_predict(obs)
39
+ else:
40
+ return self._simple_predict(obs)
41
+
42
+ # the default policy
43
+ def _simple_predict(self, obs):
44
+ simple_obs = _decentralise(obs[0:self.blues*6] if self.is_blue else obs[self.blues*6:(self.blues+self.reds)*6])
45
+ drone = Drone(is_blue=self.is_blue)
46
+ action = np.array([])
47
+ nb_drones = self.blues if self.is_blue else self.reds
48
+ for d in range(nb_drones):
49
+ assign_pos_speed(drone, d, simple_obs)
50
+ '''
51
+ pos_n, speed_n = simple_obs[d*6:d*6+3], simple_obs[d*6+3:d*6+6]
52
+ pos = drone.from_norm(pos_n, drone.max_positions, drone.min_positions)
53
+ drone.position = pos
54
+ speed = drone.from_norm(speed_n, drone.max_speeds, drone.min_speeds)
55
+ drone.speed = speed
56
+ '''
57
+ action_d = drone.simple_red()
58
+ action = np.hstack((action, action_d))
59
+
60
+ action = _centralise(action)
61
+ return action
62
+
63
+
64
+ # the default attack policy
65
+ def _attack_predict(self, obs):
66
+
67
+ def assign_targets(friends_obs, foes_obs):
68
+ '''
69
+ this current version is simplistic: all friends target the first foe :)
70
+ :param obs:
71
+ :return:
72
+ '''
73
+ friends_nb = len(friends_obs) // 6
74
+ foes_nb = len(foes_obs) // 6
75
+
76
+ friends_targets = -np.ones(friends_nb, dtype=int)
77
+ while -1 in friends_targets:
78
+ for foe in range(foes_nb):
79
+ foe_pos = _denorm(foes_obs[foe*6:foe*6+3])
80
+ foe_pos_z = foe_pos[0] * np.exp(1j * foe_pos[1])
81
+ min_distance = np.inf
82
+ closest_friend = -1
83
+ for friend in range(friends_nb):
84
+ if friends_targets[friend] == -1:
85
+ friend_pos = _denorm(friends_obs[friend*6:friend*6+3])
86
+ friend_pos_z = friend_pos[0] * np.exp(1j * friend_pos[1])
87
+ distance = np.abs(foe_pos_z - friend_pos_z) ** 2 + (friend_pos[2] - foe_pos[2]) ** 2
88
+ if distance < min_distance:
89
+ min_distance = distance
90
+ closest_friend = friend
91
+ friends_targets[closest_friend] = foe
92
+
93
+ return friends_targets
94
+
95
+
96
+ # gets the friends and foes obs
97
+ blue_obs = _decentralise(obs[0:self.blues * 6])
98
+ red_obs = _decentralise(obs[self.blues * 6:(self.blues + self.reds) * 6])
99
+ friends_obs = blue_obs if self.is_blue else red_obs
100
+ foes_obs = red_obs if self.is_blue else blue_obs
101
+
102
+ # assign red targets to blues
103
+ friends_targets = assign_targets(friends_obs, foes_obs)
104
+
105
+ friend_drone = Drone(is_blue=self.is_blue)
106
+ foe_drone = Drone(is_blue=not self.is_blue)
107
+
108
+ action = np.array([])
109
+ nb_drones = self.blues if self.is_blue else self.reds
110
+ for d in range(nb_drones):
111
+
112
+ # assign denormalised position and speed (in m and m/s) to foe drone
113
+ friend_drone = assign_pos_speed(friend_drone, d, friends_obs)
114
+ foe_drone_id = friends_targets[d]
115
+
116
+ foe_drone = assign_pos_speed(foe_drone, foe_drone_id, foes_obs)
117
+ target, time_to_target = calculate_target(friend_drone, foe_drone)
118
+ action_d = friend_drone.simple_red(target=target, z_margin=0)
119
+ action = np.hstack((action, action_d))
120
+
121
+ action = _centralise(action)
122
+ return action
123
+
124
+ # the improved manual attack policy
125
+ def _improved_attack_predict(self, obs):
126
+ # TODO: revamp the algo as follows
127
+ # start from closest reds, find all blues that are compatible with some margin
128
+ # among those blues, choose the blue whose first target is the latest
129
+ # until there is no red left
130
+ # in case there are blues left overs, restart the process, or converge to zero, or..
131
+ # or we decide in advance how many blues we want on the closest and populate several blues againts reds
132
+ # at the beginning
133
+ # TODO: check that reds are correctly ordered
134
+ # TODO : add margin in the params
135
+ # TODO : case of the foe is not reachable
136
+
137
+ # gets the friends and foes obs
138
+ blue_obs = _decentralise(obs[0:self.blues * 6])
139
+ red_obs = _decentralise(obs[self.blues * 6:(self.blues + self.reds) * 6])
140
+ friends_obs = blue_obs if self.is_blue else red_obs
141
+ foes_obs = red_obs if self.is_blue else blue_obs
142
+
143
+ friends_nb = self.blues if self.is_blue else self.reds
144
+ foes_nb = self.reds if self.is_blue else self.blues
145
+
146
+ friend_drones = []
147
+ for friend_id in range(friends_nb):
148
+ # assign denormalised position and speed (in m and m/s) to foe drone
149
+ friend_drone = Drone(is_blue=self.is_blue)
150
+ friend_drone = assign_pos_speed(friend_drone, friend_id, friends_obs)
151
+ friend_drones.append(friend_drone)
152
+
153
+ foe_drones = []
154
+ for foe_id in range(foes_nb):
155
+ # assign denormalised position and speed (in m and m/s) to foe drone
156
+ foe_drone = Drone(is_blue=not self.is_blue)
157
+ foe_drone = assign_pos_speed(foe_drone, foe_id, foes_obs)
158
+ foe_drones.append(foe_drone)
159
+
160
+ targets = np.zeros((friends_nb, foes_nb, 3))
161
+ best_targets = -np.ones((friends_nb, 3))
162
+ times_to_target = -np.ones((friends_nb, foes_nb))
163
+ calculation_done = -np.ones(friends_nb)
164
+ friend_chosen = -np.ones(friends_nb)
165
+
166
+ foe_id = 0
167
+ friends_chosen = 0
168
+ while foe_id < foes_nb-1 and friends_chosen < friends_nb:
169
+ best_friend = -1
170
+ best_target = np.zeros(3)
171
+ longest_time = -np.inf
172
+ foe_drone = foe_drones[foe_id]
173
+ for friend_id in range(friends_nb):
174
+ if friend_chosen[friend_id] == -1: # the friend has no foe target assigned
175
+
176
+ friend_drone = friend_drones[friend_id]
177
+
178
+ if calculation_done[friend_id] == -1: # it has not already been calculated
179
+ target_, time_to_target, is_a_catch = calculate_target(friend_drone, foe_drone)
180
+ times_to_target[friend_id][foe_id] = time_to_target if is_a_catch else np.inf
181
+ targets[friend_id][foe_id] = target_
182
+ if times_to_target[friend_id][foe_id] < np.inf: # it is a catch
183
+
184
+ if calculation_done[friend_id] == -1: # calculation of time with other drones has not been done
185
+ for foe_idx in range(foe_id + 1, foes_nb):
186
+ foex_drone = foe_drones[foe_idx]
187
+ target_, time_to_target, is_a_catch = calculate_target(friend_drone, foex_drone)
188
+ times_to_target[friend_id][foe_idx] = time_to_target if is_a_catch else np.inf
189
+ targets[friend_id][foe_idx] = target_
190
+ calculation_done[friend_id] = 1
191
+ closest_target = np.min(times_to_target[friend_id, foe_id+1:])
192
+ if longest_time < closest_target:
193
+ longest_time = closest_target
194
+ best_friend = friend_id
195
+ best_target = targets[friend_id][foe_id]
196
+
197
+ best_targets[best_friend] = best_target
198
+ friend_chosen[best_friend] = foe_id
199
+ friends_chosen += 1
200
+ foe_id += 1
201
+
202
+ if friends_chosen < friends_nb:
203
+ last_foe = foes_nb - 1
204
+ for friend_id in range(friends_nb):
205
+ if friend_chosen[friend_id] == -1:
206
+ if times_to_target[friend_id, last_foe] == -1:
207
+ friend_drone, foe_drone = friend_drones[friend_id], foe_drones[last_foe]
208
+ target_, time_to_target, is_a_catch = calculate_target(friend_drone, foe_drone)
209
+ targets[friend_id][last_foe] = target_
210
+ closest_target_id = np.argmin(times_to_target[friend_id, :])
211
+ best_targets[friend_id] = targets[friend_id][closest_target_id]
212
+
213
+
214
+
215
+
216
+ action = np.array([])
217
+ for friend_id in range(friends_nb):
218
+ action_d = friend_drones[friend_id].simple_red(target=best_targets[friend_id], z_margin=0)
219
+ action = np.hstack((action, action_d))
220
+
221
+ action = _centralise(action)
222
+ return action
223
+
224
+
225
+ def assign_pos_speed(drone: Drone, d: int, obs: np.ndarray) -> Drone:
226
+ # assign denormalised position and speed (in m and m/s) to friend drone
227
+ d = int(d)
228
+ pos_n, speed_n = obs[d*6:d*6+3], obs[d*6+3:d*6+6]
229
+ pos = drone.from_norm(pos_n, drone.max_positions, drone.min_positions)
230
+ drone.position = pos
231
+ speed = drone.from_norm(speed_n, drone.max_speeds, drone.min_speeds)
232
+ drone.speed = speed
233
+ return drone
234
+
235
+
236
+ def _denorm(pos): # from norm (i.e. already decentralised) to meter
237
+ drone = Drone()
238
+ pos_meter = drone.from_norm(pos, drone.max_positions, drone.min_positions)
239
+ return pos_meter
240
+
241
+
242
+ def _decentralise(obs): # [-1,1] to [0,1]
243
+ obs = (obs+1)/2
244
+ return obs
245
+
246
+
247
+ def _centralise(act): # [0,1] to [-1,1]
248
+ act = (act - 1/2) * 2
249
+ return act
250
+
251
+
252
+ def calculate_target(blue_drone: Drone, red_drone: Drone) -> (np.ndarray(3, ), float, bool):
253
+ '''
254
+
255
+ :param blue_drone:
256
+ :param red_drone:
257
+ :return:
258
+ '''
259
+ # TODO : be more precise at the end of the discovery process
260
+
261
+ def transform(pos, delta_, theta_):
262
+ pos[0] -= delta_
263
+ pos[1] -= theta_
264
+ return pos[0] * np.exp(1j * pos[1])
265
+
266
+ def untransform_to_array(pos, delta_, theta_):
267
+ pos[0] += delta_
268
+ pos[1] += theta_
269
+ return pos
270
+
271
+ theta = red_drone.position[1]
272
+ delta = param_.GROUNDZONE
273
+
274
+ attack_pos = np.copy(blue_drone.position)
275
+ target_pos = np.copy(red_drone.position)
276
+
277
+ z_blue = transform(attack_pos, delta, theta)
278
+ z_red = np.real(transform(target_pos, delta, theta))
279
+
280
+ v_blue = blue_drone.drone_model.max_speed
281
+ v_red = red_drone.drone_model.max_speed
282
+
283
+ blue_shooting_distance = blue_drone.drone_model.distance_to_neutralisation
284
+
285
+ blue_time_to_zero = (np.abs(z_blue) - blue_shooting_distance) / v_blue
286
+ red_time_to_zero = z_red / v_red
287
+
288
+ if red_time_to_zero <= param_.STEP or red_time_to_zero < blue_time_to_zero + param_.STEP:
289
+ return np.zeros(3), red_time_to_zero, False
290
+ else:
291
+ max_target = z_red
292
+ min_target = 0
293
+ while True:
294
+ target = (max_target + min_target) / 2
295
+ blue_time_to_target = max(0, (np.abs(z_blue - target) - blue_shooting_distance) / v_blue)
296
+ red_time_to_target = np.abs(z_red - target) / v_red
297
+ if red_time_to_target - param_.STEP < blue_time_to_target <= red_time_to_target:
298
+ target = untransform_to_array((target / z_red) * target_pos, delta, theta)
299
+ return target, blue_time_to_target, True
300
+ if red_time_to_target < blue_time_to_target:
301
+ max_target = target
302
+ min_target = min_target
303
+ else: # blue_ time_to_target <= red_time_to_target -1:
304
+ max_target = max_target
305
+ min_target = target
306
+
307
+
308
+
309
+
310
+ def unitary_test(rho_blue: float, theta_blue: float, rho_red: float, theta_red: float):
311
+ '''
312
+ tests for the calculate target function
313
+ :param rho_blue:
314
+ :param theta_blue:
315
+ :param rho_red:
316
+ :param theta_red:
317
+ :return:
318
+ '''
319
+ blue_drone = Drone()
320
+ blue_drone.position = np.array([rho_blue, theta_blue, 100])
321
+ red_drone = Drone(is_blue=False)
322
+ red_drone.position = np.array([rho_red, theta_red, 100])
323
+ tg, time = calculate_target(blue_drone, red_drone)
324
+ print('rho_blue : ', rho_blue, ' theta_blue : ', theta_blue, ' rho_red : ', rho_red, ' theta_red : ', theta_red,
325
+ ' tg : ', tg, ' time : ', time)
326
+ return tg, time
327
+
328
+
329
+ def test():
330
+ '''
331
+ test for the calculate trajectory function
332
+ :return:
333
+ '''
334
+ for rho_blue in [1000]:
335
+ for theta_blue in np.pi * np.array([-1, 0.75, 0.5, 0.25, 0]):
336
+ for rho_red in [1000]:
337
+ for theta_red in np.pi * np.array([0, 1/4]):
338
+ unitary_test(rho_blue=rho_blue, theta_blue=theta_blue, rho_red=rho_red, theta_red=theta_red)
339
+ print('done')
340
+
341
+
342
+
swarmenv.py ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gym
2
+ from gym import spaces
3
+ import numpy as np
4
+
5
+ import param_
6
+ from settings import Settings
7
+ from playground import Playground
8
+ from team import BlueTeam, RedTeam
9
+
10
+
11
+ class SwarmEnv(gym.Env):
12
+ """
13
+ Custom 3D-Environment that follows gym interface.
14
+ This is a 3D-env where the blue drones defend a circular GROUNDZONE from a red drones attack
15
+ """
16
+
17
+ def __init__(self, blues=Settings.blues, reds=Settings.reds):
18
+ """
19
+ :param distance: the distance to the other rim
20
+ """
21
+ super(SwarmEnv, self).__init__()
22
+
23
+ self.nb_blues = blues
24
+ self.nb_reds = reds
25
+
26
+ self.blue_team = BlueTeam(number_of_drones=self.nb_blues)
27
+ self.red_team = RedTeam(number_of_drones=self.nb_reds)
28
+
29
+ self.playground = Playground(env=self, blue_drones=self.blue_team.drones, red_drones=self.red_team.drones)
30
+
31
+ self.steps = 0
32
+
33
+ self.observation_space = spaces.Tuple((
34
+ spaces.Box(low=0, high=1, shape=(self.nb_blues, 6), dtype=np.float32),
35
+ spaces.Box(low=0, high=1, shape=(self.nb_reds, 6), dtype=np.float32),
36
+ spaces.Box(low=0, high=1, shape=(self.nb_blues, self.nb_reds), dtype=np.float32),
37
+ spaces.Box(low=0, high=1, shape=(self.nb_reds, self.nb_blues), dtype=np.float32),
38
+ spaces.MultiBinary(self.nb_blues),
39
+ spaces.MultiBinary(self.nb_reds),
40
+ ))
41
+
42
+ self.action_space = spaces.Tuple((
43
+ spaces.Box(low=0, high=1, shape=(self.nb_blues, 3), dtype=np.float32),
44
+ spaces.Box(low=0, high=1, shape=(self.nb_reds, 3), dtype=np.float32)))
45
+
46
+ def reset(self, obs=None):
47
+ """
48
+ resets the environment as part of Gym interface
49
+ """
50
+ if obs:
51
+ blue_obs, red_obs, blues_fire, reds_fire, blue_deads, red_deads = obs
52
+ else:
53
+ blue_obs, red_obs, blues_fire, reds_fire, blue_deads, red_deads = None, None, None, None, None, None
54
+
55
+ self.blue_team.reset(obs=blue_obs)
56
+ self.red_team.reset(obs=red_obs)
57
+ self.playground.reset()
58
+ self.steps = 0
59
+
60
+ # get observations from blue and red teams
61
+ blue_obs, blue_deads = self.blue_team.get_observation()
62
+ red_obs, red_deads = self.red_team.get_observation()
63
+ blues_fire, reds_fire = self.playground.get_observation()
64
+
65
+ return blue_obs, red_obs, blues_fire, reds_fire, blue_deads, red_deads
66
+
67
+ def render(self, mode='human'):
68
+ pass
69
+
70
+ def step(self, action):
71
+
72
+ self.steps += 1
73
+
74
+ blue_action, red_action = action
75
+ blue_obs, blue_reward, blue_done, blue_info = self.blue_team.step(blue_action)
76
+ red_obs, red_reward, red_done, red_info = self.red_team.step(red_action)
77
+ bf_obs, bf_reward, remaining_blues, blue_shots, rf_obs, rf_reward, remaining_reds, red_shots = \
78
+ self.playground.step()
79
+ _, blue_deads = self.blue_team.get_observation()
80
+ _, red_deads = self.red_team.get_observation()
81
+ obs = blue_obs, red_obs, bf_obs, rf_obs, blue_deads, red_deads
82
+ reward = blue_reward + red_reward + bf_reward + rf_reward
83
+ done = False
84
+
85
+ info = {}
86
+ info['red_oob'] = red_info['oob']
87
+ info['blue_oob'] = blue_info['oob']
88
+ info['hits_target'] = red_info['hits_target']
89
+ info['blue_shots'] = blue_shots
90
+ info['red_shots'] = red_shots
91
+ info['weighted_red_distance'] = red_info['delta_distance']
92
+ info['remaining blues'] = len(blue_deads)-sum(blue_deads)
93
+ info['remaining reds'] = len(red_deads)-sum(red_deads)
94
+ info['ttl'] = red_info['ttl']
95
+ info['distance_to_straight_action'] = red_info['distance_to_straight_action']
96
+
97
+ if red_info['oob'] + blue_info['oob'] + red_info['hits_target'] + blue_shots + red_shots > 0:
98
+ print('something happened')
99
+
100
+ return obs, reward, done, info
symetry_wrap.py ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import gym
3
+
4
+
5
+ class SymetryWrapper(gym.Wrapper):
6
+ """
7
+ :param env: (gym.Env) Gym environment that will be wrapped
8
+ """
9
+
10
+ def __init__(self, env):
11
+ # Call the parent constructor, so we can access self.env later
12
+
13
+ self.symetry = False # no need to perform a symetry
14
+ super(SymetryWrapper, self).__init__(env)
15
+
16
+ def reset(self):
17
+ """
18
+ Reset the environment
19
+ """
20
+ obs = self.env.reset()
21
+
22
+ obs = self.post_obs(obs)
23
+
24
+ return obs
25
+
26
+ def step(self, action):
27
+ """
28
+ :param action: ([float] or int) Action taken by the agent
29
+ :return: (np.ndarray, float, bool, dict) observation, reward, is the episode over?, additional informations
30
+ """
31
+ if self.symetry:
32
+ action = symetrise_action(action)
33
+
34
+ obs, reward, done, info = self.env.step(action)
35
+
36
+ obs = self.post_obs(obs)
37
+
38
+ return obs, reward, done, info
39
+
40
+ def post_obs(self, obs):
41
+ self.symetry = get_symetry(obs)
42
+ if self.symetry:
43
+ obs = symetrise_obs(obs)
44
+ return obs
45
+
46
+
47
+ def get_symetry(obs):
48
+ blue_obs, red_obs, blue_fire, red_fire = obs
49
+
50
+ # count the drones who are positioned above the 0 x-axis
51
+ count = 0
52
+ for this_obs in (blue_obs, red_obs):
53
+ for d in this_obs:
54
+ add = 1 if (d[1] < 0.5) else 0
55
+ count += add
56
+
57
+ # compare with the total
58
+ symetry = bool(2*count < (len(blue_obs) + len(red_obs)))
59
+
60
+ return symetry
61
+
62
+
63
+ def symetrise_obs(obs):
64
+
65
+ blue_obs, red_obs, blue_fire, red_fire = obs
66
+
67
+ for this_obs in (blue_obs, red_obs):
68
+ # symetrise positions and speeds
69
+ this_obs[:, 1] = 1 - this_obs[:, 1]
70
+ this_obs[:, 4] = 1 - this_obs[:, 4]
71
+
72
+ return blue_obs, red_obs, blue_fire, red_fire
73
+
74
+
75
+ def symetrise_action(action):
76
+
77
+ blue_action, red_action = action
78
+
79
+ for this_action in (blue_action, red_action):
80
+ for act in this_action:
81
+
82
+ # symetrise action
83
+ act[1] = - act[1]
84
+
85
+ action = blue_action, red_action
86
+
87
+ return action
88
+
89
+
90
+ def test_symetrise_obs():
91
+
92
+ obs = np.arange(12).reshape(2, 6), np.arange(12).reshape(2, 6), np.random.random((1, 1)), np.random.random((1, 1))
93
+ print(obs)
94
+ symetrise_obs(obs)
95
+ print(obs)
team.py ADDED
@@ -0,0 +1,131 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ from dataclasses import dataclass
3
+
4
+ import param_
5
+ from drone import Drone
6
+ from dronemodel import DroneModel
7
+ from settings import Settings
8
+
9
+
10
+ @dataclass
11
+ class Team:
12
+ """
13
+ Creates a team (it is either red or blue / foe or friend
14
+ """
15
+
16
+ is_blue: bool
17
+ drones: [Drone]
18
+ drone_model: DroneModel
19
+ weighted_distance: float = 0
20
+
21
+ def reset(self, obs=None):
22
+
23
+ self.delta_weighted_distance()
24
+ if obs:
25
+ for drone, obs in zip(self.drones, obs):
26
+ drone.reset(obs=obs)
27
+ else:
28
+ for drone in self.drones:
29
+ drone.reset()
30
+
31
+ def get_observation(self) -> np.ndarray:
32
+ """
33
+ get the observation for the RL agent
34
+ :return: observation in the form of flatten np.arrays of shape(squad_number, 6*squad_size)
35
+ """
36
+ obs = np.array([drone.get_observation() for drone in self.drones])
37
+ deads = ~np.array([drone.is_alive for drone in self.drones])
38
+
39
+ return obs, deads
40
+
41
+ def step(self, action: np.ndarray):
42
+ obs = np.zeros((len(self.drones), 6))
43
+ done = np.zeros((len(self.drones),))
44
+ reward = np.zeros((len(self.drones),))
45
+ infos = [{} for d in range(len(self.drones))]
46
+ for index, drone in enumerate(self.drones):
47
+ obs[index], reward[index], done[index], infos[index] = drone.step(action[index])
48
+ done = (sum(done) == len(self.drones))
49
+ info = {'oob': 0, 'hits_target': 0, 'ttl': param_.DURATION, 'distance_to_straight_action': 0}
50
+ for i in infos:
51
+ info['ttl'] = min(info['ttl'], i['ttl'])
52
+ info['oob'] += i['oob'] if 'oob' in i else 0
53
+ info['hits_target'] += i['hits_target'] if 'hits_target' in i else 0
54
+ info['delta_distance'] = 0 if self.is_blue else self.delta_weighted_distance()
55
+ info['distance_to_straight_action'] += i['distance_to_straight_action'] \
56
+ if 'distance_to_straight_action' in i else 0
57
+ return obs, sum(reward), done, info
58
+
59
+ def delta_weighted_distance(self):
60
+
61
+ # distance of drones to 0
62
+ team_distance = np.array([d.distance() for d in self.drones if d.is_alive])
63
+ weighted_distance = np.sum(np.exp(-0.5 * (team_distance / (Settings.perimeter/2)) ** 2))
64
+
65
+ delta = weighted_distance - self.weighted_distance if 0 < self.weighted_distance else 0
66
+
67
+ self.weighted_distance = weighted_distance
68
+
69
+ return delta
70
+
71
+
72
+ class BlueTeam(Team):
73
+ """
74
+ Creates the blue team
75
+ """
76
+
77
+ def __init__(self, number_of_drones: int = Settings.blues):
78
+ self.is_blue = True
79
+ self.drone_model = DroneModel(self.is_blue)
80
+
81
+ # initialise blue positions and speeds
82
+ positions = np.zeros((number_of_drones, 3))
83
+ speeds = np.zeros((number_of_drones, 3))
84
+ blue_speed = Settings.blue_speed_init * self.drone_model.max_speed
85
+ circle = index = 0
86
+ for d in range(number_of_drones):
87
+ positions[d] = np.array([Settings.blue_circles_rho[circle],
88
+ Settings.blue_circles_theta[circle] + index * 2 * np.pi / 3,
89
+ Settings.blue_circles_zed[circle]])
90
+ clockwise = 1 - 2 * (circle % 2)
91
+ speeds[d] = np.array([blue_speed, np.pi / 6 * clockwise, 0])
92
+ index += 1
93
+ if index == Settings.blues_per_circle[circle]:
94
+ index = 0
95
+ circle += 1
96
+
97
+ self.drones = [Drone(is_blue=True, position=position, speed=speed, id_=id_)
98
+ for (id_, position, speed) in zip(range(number_of_drones), positions, speeds)]
99
+
100
+
101
+ class RedTeam(Team):
102
+ """
103
+ Creates the red team
104
+ """
105
+
106
+ def __init__(self, number_of_drones: int = Settings.reds):
107
+ self.is_blue = False
108
+ self.drone_model = DroneModel(self.is_blue)
109
+
110
+ positions = np.zeros((number_of_drones, 3))
111
+ positions_noise = np.zeros((number_of_drones, 3))
112
+ speeds = np.zeros((number_of_drones, 3))
113
+ speed_rho = Settings.red_speed_init * self.drone_model.max_speed
114
+ squad = index = 0
115
+ for d in range(number_of_drones):
116
+ positions[d] = [Settings.red_squads_rho[squad],
117
+ Settings.red_squads_theta[squad],
118
+ Settings.red_squads_zed[squad]]
119
+ positions_noise[d] = [Settings.red_rho_noise[squad],
120
+ Settings.red_theta_noise[squad],
121
+ Settings.red_zed_noise[squad]]
122
+ speeds[d] = [speed_rho, np.pi + positions[d][1], 0]
123
+ speeds[d] = [speed_rho, np.pi + positions[d][1], 0]
124
+ index += 1
125
+ if index == Settings.red_squads[squad]:
126
+ index = 0
127
+ squad += 1
128
+
129
+ self.drones = [Drone(is_blue=False, position=position, position_noise=position_noise, speed=speed, id_=id_)
130
+ for (id_, position, position_noise, speed) in
131
+ zip(range(len(positions)), positions, positions_noise, speeds)]
team_wrap.py ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import gym
3
+ from gym import spaces
4
+
5
+ from swarm_policy import SwarmPolicy
6
+ from settings import Settings
7
+
8
+
9
+ class TeamWrapper(gym.Wrapper):
10
+ """
11
+ :param env: (gym.Env) Gym environment that will be wrapped
12
+ """
13
+
14
+ def __init__(self, env, is_blue: bool = True, is_double: bool = False, is_unkillable: bool = Settings.is_unkillable):
15
+
16
+ self.is_blue = is_blue
17
+ self.is_double = is_double
18
+ self.is_unkillabe = is_unkillable
19
+
20
+
21
+ nb_blues, nb_reds = env.nb_blues, env.nb_reds
22
+
23
+ self.foe_action = None
24
+ self.foe_policy = SwarmPolicy(is_blue=not is_blue, blues=nb_blues, reds=nb_reds)
25
+
26
+ if is_double:
27
+ env.action_space = spaces.Tuple((
28
+ spaces.Box(low=-1, high=1, shape=(nb_blues*3,), dtype=np.float32),
29
+ spaces.Box(low=-1, high=1, shape=(nb_reds*3,), dtype=np.float32)
30
+ ))
31
+ else:
32
+ nb_friends = nb_blues if is_blue else nb_reds
33
+ env.action_space = spaces.Box(low=-1, high=1, shape=(nb_friends*3,), dtype=np.float32)
34
+
35
+ flatten_dimension = 6 * nb_blues + 6 * nb_reds # the position and speeds for blue and red drones
36
+ flatten_dimension += (nb_blues * nb_reds) * (1 if is_unkillable else 2) # the fire matrices
37
+
38
+ env.observation_space = spaces.Box(low=-1, high=1, shape=(flatten_dimension,), dtype=np.float32)
39
+
40
+ super(TeamWrapper, self).__init__(env)
41
+
42
+ def reset(self):
43
+ """
44
+ Reset the environment
45
+ """
46
+ obs = self.env.reset()
47
+ obs = self.post_obs(obs)
48
+
49
+ return obs
50
+
51
+ def step(self, action):
52
+ """
53
+ :param action: ([float] or int) Action taken by the agent
54
+ :return: (np.ndarray, float, bool, dict) observation, reward, is the episode over?, additional informations
55
+ """
56
+
57
+ if self.is_double:
58
+ blue_action, red_action = action
59
+ blue_action = _decentralise(blue_action)
60
+ red_action = _decentralise(red_action)
61
+ action = _unflatten(blue_action), _unflatten(red_action)
62
+ else:
63
+ friend_action = _decentralise(action)
64
+ foe_action = _decentralise(self.foe_action)
65
+ if self.is_blue:
66
+ action = _unflatten(friend_action), _unflatten(foe_action)
67
+ else:
68
+ action = _unflatten(foe_action), _unflatten(friend_action)
69
+
70
+ obs, reward, done, info = self.env.step(action)
71
+
72
+ obs = self.post_obs(obs)
73
+
74
+ return obs, reward, done, info
75
+
76
+ def post_obs(self, obs):
77
+
78
+ if self.is_unkillabe:
79
+ o1, o2, o3, _ = obs
80
+ obs = o1, o2, o3
81
+ flatten_obs = _flatten(obs)
82
+ centralised_obs = _centralise(flatten_obs)
83
+
84
+ if not self.is_double:
85
+ self.foe_action = self.foe_policy.predict(centralised_obs)
86
+
87
+ return centralised_obs
88
+
89
+
90
+ def _unflatten(action):
91
+ return np.split(action, len(action)/3)
92
+
93
+
94
+ def _flatten(obs): # need normalisation too
95
+ fl_obs = [this_obs.flatten().astype('float32') for this_obs in obs]
96
+ fl_obs = np.hstack(fl_obs)
97
+ return fl_obs
98
+
99
+
100
+ def _centralise(obs): # [0,1] to [-1,1]
101
+ obs = 2 * obs - 1
102
+ return obs
103
+
104
+
105
+ def _decentralise(act): # [-1,1] to [0,1]
106
+ act = 0.5 * (act + 1)
107
+ return act
train.py ADDED
@@ -0,0 +1,174 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ from stable_baselines3 import SAC
3
+ from stable_baselines3.sac.policies import MlpPolicy
4
+ from stable_baselines3.common.evaluation import evaluate_policy
5
+ from stable_baselines3.common.env_checker import check_env
6
+ import os
7
+
8
+ from monitor_wrap import MonitorWrapper
9
+ from filter_wrap import FilterWrapper
10
+ from distribution_wrap import DistriWrapper
11
+ from redux_wrap import ReduxWrapper
12
+ from symetry_wrap import SymetryWrapper
13
+ from rotate_wrap import RotateWrapper
14
+ from sort_wrap import SortWrapper
15
+ from team_wrap import TeamWrapper
16
+ from reward_wrap import RewardWrapper
17
+
18
+ from settings import Settings
19
+ from swarmenv import SwarmEnv
20
+ import param_
21
+
22
+
23
+ def bi_train(blue_model, red_model, blues: int = 1, reds: int = 1,
24
+ blue_dispersion: np.float32 = 1, red_dispersion: np.float32 = 1, total_timesteps: int = 1000):
25
+ # If needed create save dir
26
+ save_dir = "policies/" + Settings.policy_folder + f"/b{blues}r{reds}/"
27
+ save_last_dir = "policies/last" + f"/b{blues}r{reds}/"
28
+ os.makedirs(save_dir, exist_ok=True)
29
+ os.makedirs(save_last_dir, exist_ok=True)
30
+
31
+ # set the dispersion to initial drone positions
32
+ Settings.blue_distance_factor = blue_dispersion * Settings.blue_distance_factor
33
+ Settings.red_distance_factor = red_dispersion * Settings.red_distance_factor
34
+ Settings.red_theta_noise = red_dispersion * Settings.red_theta_noise
35
+ Settings.red_rho_noise = red_dispersion * Settings.red_rho_noise
36
+
37
+ # launch learning for red drones and then blue drones
38
+ red_model.learn(total_timesteps=total_timesteps)
39
+ mean_reward, std_reward = evaluate_policy(red_model, red_model.env, n_eval_episodes=10)
40
+ print(f"REDS b{blues}r{reds} disp_b:{10*blue_dispersion:2.0f} disp_r{10*red_dispersion:2.0f}: "
41
+ f"mean_reward:{mean_reward:.2f} +/- {std_reward:.2f}")
42
+ red_model.save(save_dir + f"reds_b{10 * blue_dispersion:2.0f}r{10 * red_dispersion:2.0f}")
43
+ red_model.save(save_last_dir + "reds_last")
44
+
45
+ blue_model.learn(total_timesteps=total_timesteps)
46
+ mean_reward, std_reward = evaluate_policy(blue_model, blue_model.env, n_eval_episodes=10)
47
+ print(f"BLUES b{blues}r{reds} disp_b:{10*blue_dispersion:2.0f} disp_r{10*red_dispersion:2.0f}: "
48
+ f"mean_reward:{mean_reward:.2f} +/- {std_reward:.2f}")
49
+ blue_model.save(save_dir + f"blues_{10 * blue_dispersion:2.0f}r{10 * red_dispersion:2.0f}")
50
+ blue_model.save(save_last_dir + "blues_last")
51
+
52
+ return blue_model, red_model
53
+
54
+
55
+ def meta_train(blues: int = 1, reds: int = 1,
56
+ max_dispersion: np.float32 = 3, iteration: int = 10,
57
+ total_timesteps: int = 100):
58
+ Settings.blues, Settings.reds = blues, reds
59
+
60
+ # launch the episode to get the data
61
+ steps = int(param_.DURATION / param_.STEP)
62
+
63
+ env = SortWrapper(
64
+ SymetryWrapper(
65
+ RotateWrapper(
66
+ ReduxWrapper(
67
+ DistriWrapper(
68
+ FilterWrapper(
69
+ MonitorWrapper(
70
+ SwarmEnv(blues=blues, reds=reds), steps, verbose=False)))))))
71
+
72
+ blue_env = RewardWrapper(TeamWrapper(env, is_blue=True), is_blue=True)
73
+ red_env = RewardWrapper(TeamWrapper(env, is_blue=False), is_blue=False)
74
+
75
+ blue_model = SAC(MlpPolicy, blue_env, verbose=0)
76
+ red_model = SAC(MlpPolicy, red_env, verbose=0)
77
+
78
+ for red_dispersion in np.linspace(0.1, max_dispersion, num=iteration):
79
+ for blue_dispersion in np.linspace(max_dispersion, 0.3, num=iteration):
80
+ blue_model, red_model = bi_train(
81
+ blue_model, red_model, blues=blues, reds=reds,
82
+ blue_dispersion=blue_dispersion, red_dispersion=red_dispersion,
83
+ total_timesteps=total_timesteps)
84
+
85
+
86
+ def super_meta_train(max_blues: int = 3, max_reds: int = 3, max_dispersion: np.float32 = 3,
87
+ iteration: int = 10, total_timesteps: int = 100, policy_folder: str = "default"):
88
+ Settings.policy_folder = policy_folder
89
+ for drones_nb in range(2, max_blues + max_reds + 1):
90
+ for blues in range(1, max_blues + 1):
91
+ reds = drones_nb - blues
92
+ if 1 <= reds <= max_reds:
93
+ print(f"reds :{reds}, blues: {blues}")
94
+ meta_train(blues=blues, reds=reds,
95
+ max_dispersion=max_dispersion, iteration=iteration, total_timesteps=total_timesteps)
96
+
97
+
98
+ def print_spaces(env, name: str):
99
+ print("++++++++++++")
100
+ print(name)
101
+ print(env.action_space)
102
+ print(env.observation_space)
103
+ print("============")
104
+ check_env(env, warn=True)
105
+
106
+
107
+ # super_meta_train(max_blues=1, max_reds=1, iteration=5, max_dispersion=1, total_timesteps=50000, policy_folder="0528_14")
108
+ # super_meta_train(max_blues=2, max_reds=2, iteration=4, max_dispersion=3, total_timesteps=10, policy_folder="0528_test")
109
+
110
+
111
+ def simple_red_train(max_dispersion: np.float32 = 3,
112
+ blues: int = 1, reds: int = 1,
113
+ iteration: int = 25, total_timesteps: int = 100,
114
+ policy_folder: str = "simple_red"):
115
+ Settings.policy_folder = policy_folder
116
+ print(f"Simple_red: reds :{reds}, blues: {blues}")
117
+ # If needed create save dir
118
+ save_dir = "policies/" + Settings.policy_folder + f"/b{blues}r{reds}/"
119
+ save_last_dir = "policies/last" + f"/b{blues}r{reds}/"
120
+ os.makedirs(save_dir, exist_ok=True)
121
+ os.makedirs(save_last_dir, exist_ok=True)
122
+
123
+ # launch the episode to get the data
124
+ steps = int(param_.DURATION / param_.STEP)
125
+ Settings.blues, Settings.reds = blues, reds
126
+
127
+ env = SortWrapper(
128
+ SymetryWrapper(
129
+ RotateWrapper(
130
+ ReduxWrapper(
131
+ DistriWrapper(
132
+ FilterWrapper(
133
+ MonitorWrapper(
134
+ SwarmEnv(blues=blues, reds=reds), steps, verbose=False)))))))
135
+
136
+
137
+
138
+ red_env = RewardWrapper(TeamWrapper(env, is_blue=False), is_blue=False)
139
+ red_model = SAC(MlpPolicy, red_env, verbose=1)
140
+
141
+
142
+ # set the dispersion to initial drone positions
143
+ Settings.blue_distance_factor = 10 * Settings.blue_distance_factor
144
+
145
+ this_iteration = 0
146
+
147
+ for red_dispersion in np.linspace(0.33, max_dispersion, num=iteration):
148
+
149
+ Settings.red_distance_factor = red_dispersion
150
+
151
+ # launch learning for red drones and then blue drones
152
+ this_iteration += 1
153
+ batch = 1
154
+ mean_reward = 0
155
+ delta_reward = 0
156
+ stability = 0
157
+ count = 0
158
+ while mean_reward < 9 or stability < 3 or count < 30:
159
+ count += 1
160
+ red_model.learn(total_timesteps=total_timesteps//10)
161
+ last_reward = mean_reward
162
+ mean_reward, std_reward = evaluate_policy(red_model, red_model.env, n_eval_episodes=100)
163
+ delta_reward = mean_reward - last_reward
164
+ if -0.1 <= delta_reward <= 0.1:
165
+ stability += 1
166
+ else:
167
+ stability = 0
168
+ print(f"REDS b{blues}r{reds} iteration{this_iteration} batch{batch}: "
169
+ f"mean_reward:{mean_reward:.2f} +/- {std_reward:.2f}")
170
+ red_model.save(save_dir + f"{this_iteration} batch{batch+1}")
171
+ red_model.save(save_last_dir + "reds_last")
172
+ batch += 1
173
+
174
+ simple_red_train(total_timesteps = 50000, policy_folder="simply_red")
utils.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ works with all the transformations and calculation associated to position, speed, acceleration
3
+ """
4
+
5
+ import numpy as np
6
+
7
+
8
+ def position_to_xyz(position: [float]) -> [float]:
9
+ """
10
+ allows to get the 3D xyz coordinates from a polar representation
11
+ :param position: array (3,) with rho in meter, theta in rad, zed in meter
12
+ :return: float array (3,) with x, y, z in meter
13
+ """
14
+ pos = position[0] * np.exp(1j * position[1])
15
+ return [np.real(pos), np.imag(pos), position[2]]
16
+
17
+
18
+ """
19
+ def _test_position_to_norm():
20
+ assert position_to_norm([param.PERIMETER, 0, 100]) == [1, 0, 1]
21
+ assert position_to_norm([0, -np.pi / 2, 0]) == [0, 0.75, 0]
22
+ assert position_to_norm([0, np.pi / 2, 0]) == [0, 0.25, 0]
23
+ """
24
+
25
+
26
+ def is_in_the_cone(position1: [float], position2: [float], vector2: [float], angle: float) -> bool:
27
+ """
28
+ checks if the point @ position 2 is in the cone from position 1 with an angle of angle
29
+ :param position1: in x, y, z
30
+ :param position2: in x, y, z
31
+ :param vector2: in x, y, z
32
+ :param angle: in rad
33
+ :return:
34
+ """
35
+ vector1 = np.array(position2, dtype=float) - np.array(position1)
36
+ vector1 /= np.linalg.norm(vector1)
37
+ vector2 = np.array(vector2, dtype=float)
38
+ vector2 /= np.linalg.norm(vector2)
39
+ cos_theta = np.dot(vector1, vector2)
40
+ if 0 < cos_theta:
41
+ theta = np.arcsin(np.sqrt(1 - cos_theta ** 2))
42
+ return theta < angle
43
+ return False
44
+
45
+
46
+ def _test_is_in_the_cone():
47
+ assert is_in_the_cone([0, 0, 0], [1, 0.1, 0], [1, 0, 0], np.pi / 5)
48
+ assert is_in_the_cone([0, 0, 0], [1, 0.1, 0], [0, 1, 0], np.pi / 5)
49
+ pass
50
+
51
+
52
+ def rhotheta_to_latlon(rho: float, theta: float, lat_tg: float, lon_tg: float) -> [float, float]:
53
+ """
54
+ transforms polar coordinates into lat, lon
55
+ :param rho:
56
+ :param theta:
57
+ :param lat_tg: latitude de la target (0,0)
58
+ :param lon_tg: longitude de la target (0,0)
59
+ :return:
60
+ """
61
+ z = rho * np.exp(1j * theta)
62
+ lat = np.imag(z) * 360 / (40075 * 1000) + lat_tg
63
+ lon = np.real(z) * 360 / (40075 * 1000 * np.cos(np.pi / 180 * lat)) + lon_tg
64
+ return lat, lon
65
+