initial load
Browse files- Makefile +12 -0
- README.md +38 -13
- app.py +175 -0
- app.yaml +9 -0
- bluetraj.py +81 -0
- distribution_wrap.py +92 -0
- drone.py +326 -0
- dronemodel.py +103 -0
- filter_wrap.py +95 -0
- monitor_wrap.py +119 -0
- param_.py +122 -0
- playground.py +92 -0
- policies/_last/_b1r1/blues_last.zip +3 -0
- policies/_last/_b1r1/reds_last.zip +3 -0
- procfile.txt +1 -0
- redux_wrap.py +80 -0
- requirements.txt +3 -0
- reward_wrap.py +86 -0
- rotate_wrap.py +93 -0
- runner.py +16 -0
- settings.py +82 -0
- setup.sh +12 -0
- sort_wrap.py +98 -0
- swarm_policy.py +342 -0
- swarmenv.py +100 -0
- symetry_wrap.py +95 -0
- team.py +131 -0
- team_wrap.py +107 -0
- train.py +174 -0
- utils.py +65 -0
Makefile
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
.PHONY: run run-container gcloud-deploy
|
3 |
+
|
4 |
+
run:
|
5 |
+
@streamlit run app.py --server.port=8080 --server.address=0.0.0.0
|
6 |
+
|
7 |
+
run-container:
|
8 |
+
@docker build . -t hexamind-swarms
|
9 |
+
@docker run -p 8080:8080 hexamind-swarms
|
10 |
+
|
11 |
+
gcloud-deploy:
|
12 |
+
@gcloud app deploy app.yaml
|
README.md
CHANGED
@@ -1,13 +1,38 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# swarms2
|
2 |
+
|
3 |
+
**red swarms against blue swarms**
|
4 |
+
reds attack a target defended by blues
|
5 |
+
reds and blues may kill their opponent
|
6 |
+
the more drones reach the goal, the better for the reds and the worst for the blues
|
7 |
+
|
8 |
+
please install the requirements in requirement.txt
|
9 |
+
code has been tested on MAC OS with Python3.9
|
10 |
+
Pillow may have some difficulties in its installation. A possibility is to try falling back to Python3.7
|
11 |
+
|
12 |
+
All files are flat.
|
13 |
+
two files contain the starting points :
|
14 |
+
train.py allows for training
|
15 |
+
app.py launches the streamlit app
|
16 |
+
|
17 |
+
**train.py**
|
18 |
+
the training is done iteratively by starting with one blue drone and one red drone, increasing distance progressively and adding drones.
|
19 |
+
Learned policies are stored in /policies/
|
20 |
+
For each configuration depending on the number of blue and red drones, there is a folder containing the red and blue policies
|
21 |
+
ex /policies/b3r4 contains the red and blue policies for configurations with 3 blue and 4 red drones
|
22 |
+
There are two policies depending on the color : one for blues and one for reds. Several intermediate savings are done. the last one being __last.
|
23 |
+
The function to launch for the training is :
|
24 |
+
super_meta_train(max_blues=8, max_reds=8, iteration=10, max_dispersion=3, total_timesteps=10000)
|
25 |
+
which programs a training from 1,1 to 8,8 drones, with a distance mutltiplier of 3 and 10 iterations, and a total timestep at each learning of 10000 steps
|
26 |
+
(1 step = 1 second)
|
27 |
+
|
28 |
+
|
29 |
+
**show.py**
|
30 |
+
once the agents are trained, the drones can be simulated and visible through a streamlit interface.
|
31 |
+
the command to launch the visualisation is :
|
32 |
+
streamlit run show.py
|
33 |
+
|
34 |
+
|
35 |
+
**tuning the rewards**
|
36 |
+
The rewards may be tuned in the _param.py file and logic is in the team_wrap.py file in the 'evaluate_situation' function.
|
37 |
+
When is_double is true, means that there is no learning: simulation is carried out with already defined policies. Only the final outcome is to be considered.
|
38 |
+
Otherwise, two cases have to be taken into account, whether blue or red is learning# streamlit-to-heroku-tutorial
|
app.py
ADDED
@@ -0,0 +1,175 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import time
|
2 |
+
import pandas as pd
|
3 |
+
import pydeck as pdk
|
4 |
+
import streamlit as st
|
5 |
+
|
6 |
+
from filter_wrap import FilterWrapper
|
7 |
+
from distribution_wrap import DistriWrapper
|
8 |
+
from redux_wrap import ReduxWrapper
|
9 |
+
from symetry_wrap import SymetryWrapper
|
10 |
+
from rotate_wrap import RotateWrapper
|
11 |
+
from sort_wrap import SortWrapper
|
12 |
+
from team_wrap import TeamWrapper
|
13 |
+
from reward_wrap import RewardWrapper
|
14 |
+
from monitor_wrap import MonitorWrapper
|
15 |
+
|
16 |
+
from runner import run_episode
|
17 |
+
from settings import Settings, define_
|
18 |
+
import param_
|
19 |
+
from swarmenv import SwarmEnv
|
20 |
+
|
21 |
+
|
22 |
+
|
23 |
+
def run(with_streamlit=True, blues: int = 4, reds: int = 6, policy_folder: str = 'reds_last'):
|
24 |
+
|
25 |
+
# define the policy folder is: where the trained policies are to be found
|
26 |
+
Settings.policy_folder = policy_folder
|
27 |
+
|
28 |
+
# define settings with Streamlit (or use default parameters)
|
29 |
+
blues, reds = define_(with_streamlit=with_streamlit, blues=blues, reds=reds)
|
30 |
+
|
31 |
+
# put in place the map
|
32 |
+
deck_map, initial_view_state = pre_show(with_streamlit=with_streamlit)
|
33 |
+
|
34 |
+
# launch the episode to get the data
|
35 |
+
steps = int(param_.DURATION / param_.STEP)
|
36 |
+
monitor_env = MonitorWrapper(SwarmEnv(blues=blues, reds=reds), steps)
|
37 |
+
env = FilterWrapper(monitor_env)
|
38 |
+
env = DistriWrapper(env)
|
39 |
+
env = ReduxWrapper(env)
|
40 |
+
env = SortWrapper(
|
41 |
+
SymetryWrapper(
|
42 |
+
RotateWrapper(env)))
|
43 |
+
|
44 |
+
env = RewardWrapper(TeamWrapper(env, is_double=True), is_double=True)
|
45 |
+
|
46 |
+
obs = env.reset()
|
47 |
+
run_episode(env, obs, blues=blues, reds=reds)
|
48 |
+
|
49 |
+
print('done')
|
50 |
+
|
51 |
+
# display the data with Streamlit
|
52 |
+
if with_streamlit:
|
53 |
+
show(monitor_env, deck_map, initial_view_state)
|
54 |
+
|
55 |
+
|
56 |
+
def pre_show(with_streamlit=True):
|
57 |
+
if with_streamlit:
|
58 |
+
deck_map = st.empty()
|
59 |
+
pitch = st.slider('pitch', 0, 100, 50)
|
60 |
+
lat, lon = Settings.latlon
|
61 |
+
initial_view_state = pdk.ViewState(
|
62 |
+
latitude=lat,
|
63 |
+
longitude=lon,
|
64 |
+
zoom=13,
|
65 |
+
pitch=pitch
|
66 |
+
)
|
67 |
+
return deck_map, initial_view_state
|
68 |
+
else:
|
69 |
+
return 0, 0
|
70 |
+
|
71 |
+
|
72 |
+
def show(monitor_env, deck_map, initial_view_state):
|
73 |
+
|
74 |
+
blue_df, red_df, fire_df, blue_path_df, red_path_df = monitor_env.get_df()
|
75 |
+
step_max = monitor_env.step_
|
76 |
+
|
77 |
+
for step in range(step_max):
|
78 |
+
deck_map.pydeck_chart(pdk.Deck(
|
79 |
+
map_provider="mapbox",
|
80 |
+
map_style='mapbox://styles/mapbox/light-v9',
|
81 |
+
initial_view_state=initial_view_state,
|
82 |
+
layers=get_layers(blue_df,
|
83 |
+
red_df,
|
84 |
+
blue_path_df,
|
85 |
+
red_path_df,
|
86 |
+
step)
|
87 |
+
))
|
88 |
+
|
89 |
+
time.sleep(param_.STEP*param_.SIMU_SPEED)
|
90 |
+
|
91 |
+
|
92 |
+
def get_layers(df_blue: pd.DataFrame, df_red: pd.DataFrame,
|
93 |
+
df_blue_path: [pd.DataFrame], df_red_path: [pd.DataFrame],
|
94 |
+
step) -> [pdk.Layer]:
|
95 |
+
lat, lon = Settings.latlon
|
96 |
+
df_target = pd.DataFrame({'lat': [lat], 'lon': [lon]})
|
97 |
+
layers_ = get_target_layers(df_target)
|
98 |
+
|
99 |
+
for (df, dfp, b) in [(df_blue, df_blue_path, True), (df_red, df_red_path, False)]:
|
100 |
+
layers_.append(get_current_drone_layers(df, step))
|
101 |
+
nb_drones = df['d_index'].max() + 1
|
102 |
+
for drone_index in range(nb_drones):
|
103 |
+
layers_.append(get_path_layers(dfp[drone_index], step))
|
104 |
+
|
105 |
+
return layers_
|
106 |
+
|
107 |
+
|
108 |
+
def get_target_layers(df_target) -> [pdk.Layer]:
|
109 |
+
return [
|
110 |
+
# this is the GROUNDZONE
|
111 |
+
pdk.Layer(
|
112 |
+
'ScatterplotLayer',
|
113 |
+
data=df_target,
|
114 |
+
get_position='[lon, lat]',
|
115 |
+
get_color='[0, 120, 0]',
|
116 |
+
get_radius=Settings.groundzone,
|
117 |
+
get_line_width=50,
|
118 |
+
lineWidthMinPixels=2,
|
119 |
+
stroked=True,
|
120 |
+
filled=False,
|
121 |
+
|
122 |
+
),
|
123 |
+
|
124 |
+
pdk.Layer(
|
125 |
+
'ScatterplotLayer',
|
126 |
+
data=df_target,
|
127 |
+
get_position='[lon, lat]',
|
128 |
+
get_color='[0, 0, 200]',
|
129 |
+
get_radius=30,
|
130 |
+
),
|
131 |
+
]
|
132 |
+
|
133 |
+
|
134 |
+
def get_current_drone_layers(df_drone: pd.DataFrame, step: int) -> [pdk.Layer]:
|
135 |
+
df_current = df_drone[df_drone.step == step]
|
136 |
+
|
137 |
+
return [
|
138 |
+
pdk.Layer(
|
139 |
+
'ScatterplotLayer',
|
140 |
+
data=df_current,
|
141 |
+
get_position='[lon, lat, zed]',
|
142 |
+
get_color='color',
|
143 |
+
get_radius=50,
|
144 |
+
|
145 |
+
),
|
146 |
+
pdk.Layer(
|
147 |
+
'ScatterplotLayer',
|
148 |
+
data=df_current,
|
149 |
+
get_position='[lon, lat]',
|
150 |
+
get_color=[50, 50, 50, 50],
|
151 |
+
get_radius=50,
|
152 |
+
|
153 |
+
),
|
154 |
+
]
|
155 |
+
|
156 |
+
|
157 |
+
def get_path_layers(df_path: pd.DataFrame, step: int) -> [pdk.Layer]:
|
158 |
+
df_current = df_path[df_path.step == step]
|
159 |
+
return [
|
160 |
+
pdk.Layer(
|
161 |
+
type="PathLayer",
|
162 |
+
data=df_current,
|
163 |
+
pickable=True,
|
164 |
+
get_color="color",
|
165 |
+
width_scale=10,
|
166 |
+
width_min_pixels=1,
|
167 |
+
get_path="path",
|
168 |
+
get_width=1,
|
169 |
+
)
|
170 |
+
]
|
171 |
+
|
172 |
+
|
173 |
+
# and ... do not forget
|
174 |
+
run(with_streamlit=True, policy_folder='last')
|
175 |
+
# run(blues=1, reds=3, with_streamlit=False, policy_folder='0527_14_test')
|
app.yaml
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
runtime: custom
|
3 |
+
env: flex
|
4 |
+
manual_scaling:
|
5 |
+
instances: 1
|
6 |
+
resources:
|
7 |
+
cpu: 1
|
8 |
+
memory_gb: 3
|
9 |
+
disk_size_gb: 20
|
bluetraj.py
ADDED
@@ -0,0 +1,81 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
import param_
|
3 |
+
from drone import Drone
|
4 |
+
|
5 |
+
|
6 |
+
def calculate_target(blue_drone: Drone, red_drone: Drone) -> np.ndarray(3, ):
|
7 |
+
'''
|
8 |
+
|
9 |
+
:param blue_drone:
|
10 |
+
:param red_drone:
|
11 |
+
:return:
|
12 |
+
'''
|
13 |
+
|
14 |
+
def transform(pos, delta, theta):
|
15 |
+
pos[0] -= delta
|
16 |
+
pos[1] -= theta
|
17 |
+
return pos[0] * np.exp(1j * pos[1])
|
18 |
+
|
19 |
+
def untransform_to_array(pos, delta, theta):
|
20 |
+
pos[0] += delta
|
21 |
+
pos[1] += theta
|
22 |
+
return pos
|
23 |
+
|
24 |
+
theta = red_drone.position[1]
|
25 |
+
delta = param_.GROUNDZONE
|
26 |
+
|
27 |
+
z_blue = transform(blue_drone.position, delta, theta)
|
28 |
+
z_red = np.real(transform(red_drone.position, delta, theta))
|
29 |
+
|
30 |
+
v_blue = blue_drone.drone_model.max_speed
|
31 |
+
v_red = red_drone.drone_model.max_speed
|
32 |
+
|
33 |
+
blue_shooting_distance = blue_drone.drone_model.distance_to_neutralisation
|
34 |
+
|
35 |
+
blue_time_to_zero = (np.abs(z_blue) - blue_shooting_distance) / v_blue
|
36 |
+
red_time_to_zero = z_red / v_red
|
37 |
+
|
38 |
+
if red_time_to_zero <= param_.STEP or red_time_to_zero < blue_time_to_zero + param_.STEP:
|
39 |
+
return np.zeros(3), red_time_to_zero
|
40 |
+
else:
|
41 |
+
max_target = z_red
|
42 |
+
min_target = 0
|
43 |
+
while True:
|
44 |
+
target = (max_target + min_target) / 2
|
45 |
+
blue_time_to_target = max(0, (np.abs(z_blue - target) - blue_shooting_distance) / v_blue)
|
46 |
+
red_time_to_target = np.abs(z_red - target) / v_red
|
47 |
+
if red_time_to_target - param_.STEP < blue_time_to_target <= red_time_to_target:
|
48 |
+
target = untransform_to_array((target / z_red) * red_drone.position, delta, theta)
|
49 |
+
return target, blue_time_to_target
|
50 |
+
if red_time_to_target < blue_time_to_target:
|
51 |
+
max_target = target
|
52 |
+
min_target = min_target
|
53 |
+
else: # blue_ time_to_target <= red_time_to_target -1:
|
54 |
+
max_target = max_target
|
55 |
+
min_target = target
|
56 |
+
|
57 |
+
|
58 |
+
|
59 |
+
|
60 |
+
def unitary_test(rho_blue: float, theta_blue: float, rho_red: float, theta_red: float):
|
61 |
+
blue_drone = Drone()
|
62 |
+
blue_drone.position = np.array([rho_blue, theta_blue, 100])
|
63 |
+
red_drone = Drone(is_blue=False)
|
64 |
+
red_drone.position = np.array([rho_red, theta_red, 100])
|
65 |
+
tg, time = calculate_target(blue_drone, red_drone)
|
66 |
+
print('rho_blue : ', rho_blue, ' theta_blue : ', theta_blue, ' rho_red : ', rho_red, ' theta_red : ', theta_red,
|
67 |
+
' tg : ', tg, ' time : ', time)
|
68 |
+
return tg, time
|
69 |
+
|
70 |
+
|
71 |
+
|
72 |
+
|
73 |
+
|
74 |
+
def test():
|
75 |
+
for rho_blue in [1000]:
|
76 |
+
for theta_blue in np.pi * np.array([-1, 0.75, 0.5, 0.25, 0]):
|
77 |
+
for rho_red in [1000]:
|
78 |
+
for theta_red in np.pi * np.array([0, 1/4]):
|
79 |
+
unitary_test(rho_blue=rho_blue, theta_blue=theta_blue, rho_red=rho_red, theta_red=theta_red)
|
80 |
+
print('done')
|
81 |
+
|
distribution_wrap.py
ADDED
@@ -0,0 +1,92 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gym
|
2 |
+
|
3 |
+
from gym import spaces
|
4 |
+
import numpy as np
|
5 |
+
|
6 |
+
from runner import run_episode
|
7 |
+
from redux_wrap import ReduxWrapper
|
8 |
+
from rotate_wrap import RotateWrapper
|
9 |
+
from symetry_wrap import SymetryWrapper
|
10 |
+
from sort_wrap import SortWrapper
|
11 |
+
from team_wrap import TeamWrapper
|
12 |
+
from reward_wrap import RewardWrapper
|
13 |
+
|
14 |
+
|
15 |
+
class DistriWrapper(gym.Wrapper):
|
16 |
+
"""
|
17 |
+
:param env: (gym.Env) Gym environment that will be wrapped
|
18 |
+
"""
|
19 |
+
|
20 |
+
def __init__(self, env):
|
21 |
+
|
22 |
+
self.blue_deads = self.red_deads = 0
|
23 |
+
self.nb_blues, self.nb_reds = env.nb_blues, env.nb_reds
|
24 |
+
|
25 |
+
env.observation_space = spaces.Tuple((
|
26 |
+
spaces.Box(low=0, high=1, shape=(self.nb_blues, 6), dtype=np.float32),
|
27 |
+
spaces.Box(low=0, high=1, shape=(self.nb_reds, 6), dtype=np.float32),
|
28 |
+
spaces.Box(low=0, high=1, shape=(self.nb_blues, self.nb_reds), dtype=np.float32),
|
29 |
+
spaces.Box(low=0, high=1, shape=(self.nb_reds, self.nb_blues), dtype=np.float32)))
|
30 |
+
|
31 |
+
env.action_space = spaces.Tuple((
|
32 |
+
spaces.Box(low=0, high=1, shape=(self.nb_blues, 3), dtype=np.float32),
|
33 |
+
spaces.Box(low=0, high=1, shape=(self.nb_reds, 3), dtype=np.float32)))
|
34 |
+
|
35 |
+
# Call the parent constructor, so we can access self.env later
|
36 |
+
super(DistriWrapper, self).__init__(env)
|
37 |
+
|
38 |
+
def reset(self):
|
39 |
+
"""
|
40 |
+
Reset the environment
|
41 |
+
"""
|
42 |
+
obs = self.env.reset()
|
43 |
+
blue_obs, red_obs, blues_fire, reds_fire, blue_deads, red_deads = obs
|
44 |
+
self.blue_deads, self.blue_deads = blue_deads, red_deads
|
45 |
+
return blue_obs, red_obs, blues_fire, reds_fire
|
46 |
+
|
47 |
+
def step(self, action):
|
48 |
+
"""
|
49 |
+
:param action: ([float] or int) Action taken by the agent
|
50 |
+
:return: (np.ndarray, float, bool, dict) observation, reward, is the episode over?, additional informations
|
51 |
+
"""
|
52 |
+
obs, reward, done, info = self.env.step(action)
|
53 |
+
|
54 |
+
blue_obs, red_obs, blues_fire, reds_fire, blue_deads, red_deads = obs
|
55 |
+
obs = blue_obs, red_obs, blues_fire, reds_fire
|
56 |
+
|
57 |
+
if done: # environment decision (eg drones oob)
|
58 |
+
return obs, reward, True, info
|
59 |
+
|
60 |
+
if red_deads == len(red_obs): # no more reds to fight (it could mean that they have all reached the target)
|
61 |
+
return obs, reward, True, info
|
62 |
+
|
63 |
+
if blue_deads == len(blue_obs): # reds have won
|
64 |
+
return obs, reward, True, info
|
65 |
+
|
66 |
+
# do we have new deaths?
|
67 |
+
new_blue_deads = blue_deads - self.blue_deads
|
68 |
+
new_red_deads = red_deads - self.red_deads
|
69 |
+
self.blue_deads, self.red_deads = blue_deads, red_deads
|
70 |
+
|
71 |
+
if 0 < new_red_deads + new_blue_deads: # we have someone killed but we still have some fight
|
72 |
+
|
73 |
+
blues, reds = self.nb_blues - blue_deads, self.nb_reds - red_deads
|
74 |
+
|
75 |
+
env = ReduxWrapper(self, minus_blue=blue_deads, minus_red=red_deads)
|
76 |
+
obs_ = env.post_obs(obs)
|
77 |
+
|
78 |
+
env = RotateWrapper(env)
|
79 |
+
obs_ = env.post_obs(obs_)
|
80 |
+
|
81 |
+
env = SymetryWrapper(env)
|
82 |
+
obs_ = env.post_obs(obs_)
|
83 |
+
|
84 |
+
env = SortWrapper(env)
|
85 |
+
obs_ = env.post_obs(obs_)
|
86 |
+
|
87 |
+
env = RewardWrapper(TeamWrapper(env, is_double=True), is_double=True)
|
88 |
+
obs_ = env.post_obs(obs_)
|
89 |
+
|
90 |
+
_, reward, done, info = run_episode(env, obs_, blues=blues, reds=reds)
|
91 |
+
|
92 |
+
return obs, reward, done, info
|
drone.py
ADDED
@@ -0,0 +1,326 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from dataclasses import dataclass
|
2 |
+
import numpy as np
|
3 |
+
from dronemodel import DroneModel
|
4 |
+
|
5 |
+
import param_ as param_
|
6 |
+
from settings import Settings
|
7 |
+
|
8 |
+
|
9 |
+
@dataclass
|
10 |
+
class Drone:
|
11 |
+
"""
|
12 |
+
Creates a drone (it is either red or blue / foe or friend
|
13 |
+
"""
|
14 |
+
is_blue: bool = True
|
15 |
+
position: np.ndarray((3,)) = np.zeros((3,))
|
16 |
+
position_noise: np.ndarray((3,)) = np.zeros((3,))
|
17 |
+
drone_model: DroneModel = None
|
18 |
+
max_speeds: np.ndarray((3,)) = None
|
19 |
+
min_speeds: np.ndarray((3,)) = None
|
20 |
+
init_position: np.ndarray((3,)) = None
|
21 |
+
init_speed: np.ndarray((3,)) = None
|
22 |
+
color = np.ndarray((3,))
|
23 |
+
is_alive: bool = True
|
24 |
+
is_fired: int = 0
|
25 |
+
fires = 0
|
26 |
+
step_ = 0
|
27 |
+
id_: int = -1
|
28 |
+
ttl: float = param_.DURATION # ttl = Time To Live expressed in seconds
|
29 |
+
speed: np.ndarray((3,)) = np.zeros((3,))
|
30 |
+
min_positions = np.zeros((3,))
|
31 |
+
max_positions = np.array([Settings.perimeter, 2*np.pi, Settings.perimeter_z])
|
32 |
+
|
33 |
+
def __post_init__(self):
|
34 |
+
self.drone_model = DroneModel(self.is_blue)
|
35 |
+
self.max_speeds = np.array([self.drone_model.max_speed,
|
36 |
+
2*np.pi,
|
37 |
+
self.drone_model.max_up_speed])
|
38 |
+
self.min_speeds = np.array([0,
|
39 |
+
0,
|
40 |
+
-self.drone_model.max_down_speed])
|
41 |
+
|
42 |
+
self.init_position = np.copy(self.position)
|
43 |
+
self.init_position_noise = np.copy(self.position_noise)
|
44 |
+
self.init_speed = np.copy(self.speed)
|
45 |
+
self.color = param_.BLUE_COLOR if self.is_blue else param_.RED_COLOR
|
46 |
+
self.ttl = (self.position[0] / self.max_speeds[0]) * param_.TTL_RATIO + param_.TTL_MIN
|
47 |
+
|
48 |
+
def reset(self):
|
49 |
+
self.is_alive = True
|
50 |
+
self.speed = self.init_speed
|
51 |
+
self.color = param_.BLUE_COLOR if self.is_blue else param_.RED_COLOR
|
52 |
+
distance_factor = Settings.blue_distance_factor if self.is_blue else Settings.red_distance_factor
|
53 |
+
self.position[0] = self.init_position[0]*distance_factor
|
54 |
+
self.position[2] = self.init_position[2]*distance_factor
|
55 |
+
self.position_noise *= distance_factor
|
56 |
+
|
57 |
+
self.position[0] = np.clip(self.position[0] + np.random.rand() * self.position_noise[0],
|
58 |
+
param_.GROUNDZONE * 1.1,
|
59 |
+
param_.PERIMETER * 0.9)
|
60 |
+
self.position[1] = self.position[1] + np.random.rand() * self.position_noise[1]
|
61 |
+
self.position[2] = np.clip(self.position[2] + np.random.rand() * self.position_noise[2],
|
62 |
+
param_.GROUNDZONE * 1.1,
|
63 |
+
param_.PERIMETER_Z * 0.9)
|
64 |
+
|
65 |
+
self.ttl = (self.position[0] / self.max_speeds[0]) * param_.TTL_RATIO + param_.TTL_MIN
|
66 |
+
|
67 |
+
|
68 |
+
def step(self, action):
|
69 |
+
self.step_ = self.step_ + 1 # for debug purposes
|
70 |
+
reward = 0
|
71 |
+
info = {'ttl': param_.DURATION}
|
72 |
+
if self.is_alive: # if the drone is dead, it no longer moves :)
|
73 |
+
pos_xyz, speed_xyz = self.to_xyz(self.position), self.to_xyz(self.speed)
|
74 |
+
pos_s, speed_s = \
|
75 |
+
self.drone_model.get_trajectory(pos_xyz, speed_xyz, action, np.linspace(0, param_.STEP, 10))
|
76 |
+
pos, speed = pos_s.T[-1], speed_s.T[-1]
|
77 |
+
self.position, self.speed = self.from_xyz(pos), self.from_xyz(speed)
|
78 |
+
self.ttl -= param_.STEP
|
79 |
+
info['ttl'] = self.ttl
|
80 |
+
|
81 |
+
# evaluate the distance compared to the greedy action
|
82 |
+
|
83 |
+
if self.is_blue:
|
84 |
+
'''
|
85 |
+
for further usage
|
86 |
+
straight_action, time_to_catch = self.simple_blue()
|
87 |
+
tolerance = 0.05 if 4 < time_to_catch else 1 if 2 < time_to_catch else 3
|
88 |
+
distance = 1 if tolerance < np.linalg.norm(straight_action - action) else 0
|
89 |
+
'''
|
90 |
+
distance = 1
|
91 |
+
else:
|
92 |
+
straight_action = self.simple_red()
|
93 |
+
distance = 1 if 0.1 < np.linalg.norm(straight_action - action) else 0
|
94 |
+
info['distance_to_straight_action'] = distance
|
95 |
+
|
96 |
+
|
97 |
+
if self._hits_target():
|
98 |
+
info['hits_target'] = 1
|
99 |
+
reward = -param_.TARGET_HIT_COST
|
100 |
+
self.color = param_.RED_SUCCESS_COLOR
|
101 |
+
self.is_alive = False # the red has done its job ...
|
102 |
+
|
103 |
+
if self._out_of_bounds():
|
104 |
+
coef = -1 if self.is_blue else 1
|
105 |
+
reward = coef * param_.OOB_COST
|
106 |
+
self.is_alive = False
|
107 |
+
info['oob'] = 1
|
108 |
+
|
109 |
+
obs = self.get_observation()
|
110 |
+
done = not self.is_alive
|
111 |
+
|
112 |
+
return obs, reward, done, info
|
113 |
+
|
114 |
+
def _out_of_bounds(self):
|
115 |
+
return not (0 < self.position[2] < Settings.perimeter_z and self.position[0] < Settings.perimeter)
|
116 |
+
|
117 |
+
def _hits_target(self):
|
118 |
+
if self.is_blue:
|
119 |
+
return False
|
120 |
+
else:
|
121 |
+
distance_to_zero = np.sqrt(self.position[0]**2 + self.position[2]**2)
|
122 |
+
return distance_to_zero < Settings.groundzone
|
123 |
+
|
124 |
+
def fires_(self, foe) -> bool:
|
125 |
+
"""
|
126 |
+
checks if the foe drone is hit by self
|
127 |
+
:param foe: a foe drone
|
128 |
+
:return: True= yes, got you
|
129 |
+
"""
|
130 |
+
# deads don't kill nor die
|
131 |
+
if not (self.is_alive and foe.is_alive):
|
132 |
+
return False
|
133 |
+
|
134 |
+
# lets see if foe is in the "fire cone"
|
135 |
+
pos_xyz = - self.to_xyz(self.position) + self.to_xyz(foe.position)
|
136 |
+
distance = np.linalg.norm(pos_xyz)
|
137 |
+
pos_xyz /= distance
|
138 |
+
|
139 |
+
if distance < self.drone_model.distance_to_neutralisation:
|
140 |
+
return self.is_in_the_cone(foe)
|
141 |
+
|
142 |
+
return False
|
143 |
+
|
144 |
+
|
145 |
+
def is_in_the_cone(self, foe) -> bool:
|
146 |
+
'''
|
147 |
+
verifies if foe is in the cone (without any regard to distance)
|
148 |
+
:param foe:
|
149 |
+
:return:
|
150 |
+
'''
|
151 |
+
pos_xyz = - self.to_xyz(self.position) + self.to_xyz(foe.position)
|
152 |
+
pos_xyz /= np.linalg.norm(pos_xyz)
|
153 |
+
vit_xyz = self.to_xyz(self.speed)
|
154 |
+
vit_xyz /= np.linalg.norm(vit_xyz)
|
155 |
+
cos_theta = np.dot(pos_xyz, vit_xyz)
|
156 |
+
in_the_cone = False
|
157 |
+
if 0 < cos_theta:
|
158 |
+
theta = np.arccos(cos_theta)
|
159 |
+
in_the_cone = theta < self.drone_model.angle_to_neutralisation
|
160 |
+
return in_the_cone
|
161 |
+
|
162 |
+
|
163 |
+
# tell the drones that they are dead
|
164 |
+
def is_killed(self, is_blue=True):
|
165 |
+
self.is_alive = False
|
166 |
+
self.position[2] = 0
|
167 |
+
self.color = param_.BLUE_DEAD_COLOR if is_blue else param_.RED_DEAD_COLOR
|
168 |
+
|
169 |
+
def to_xyz(self, rho_theta_z: np.ndarray(shape=(3,))) -> np.ndarray(shape=(3,)):
|
170 |
+
"""
|
171 |
+
allows to get the 3D xyz coordinates from a polar representation
|
172 |
+
:param rho_theta_z: array (3,) with rho in meter, theta in rad, zed in meter for positions, /s for speeds, etc.
|
173 |
+
:return: float array (3,) with x, y, z in meter, /s for speeds, etc.
|
174 |
+
"""
|
175 |
+
xy_ = rho_theta_z[0] * np.exp(1j * rho_theta_z[1])
|
176 |
+
return np.array([np.real(xy_), np.imag(xy_), rho_theta_z[2]])
|
177 |
+
|
178 |
+
def from_xyz(self, xyz: np.ndarray(shape=(3,))) -> np.ndarray(shape=(3,)):
|
179 |
+
"""
|
180 |
+
"""
|
181 |
+
z_complex = xyz[0] + 1j*xyz[1]
|
182 |
+
rho = np.abs(z_complex)
|
183 |
+
theta = np.angle(z_complex)
|
184 |
+
return np.array([rho, theta, xyz[2]], dtype='float32')
|
185 |
+
|
186 |
+
def to_norm(self,
|
187 |
+
rho_theta_z: np.ndarray(shape=(3,)),
|
188 |
+
max_vector: np.ndarray(shape=(3,)),
|
189 |
+
min_vector: np.ndarray(shape=(3,)) = np.array([0, 0, 0]))\
|
190 |
+
-> np.ndarray(shape=(3,), dtype='float32'):
|
191 |
+
"""
|
192 |
+
normalises the position/speed in order to have all space in a [0;1]**3 space
|
193 |
+
:return: rho, theta, zed in a [0;1]**3 space
|
194 |
+
"""
|
195 |
+
rho = rho_theta_z[0] / max_vector[0]
|
196 |
+
theta = (rho_theta_z[1] / (2 * np.pi)) % 1
|
197 |
+
zed = (rho_theta_z[2] - min_vector[2]) / (max_vector[2] - min_vector[2])
|
198 |
+
return np.array([rho, theta, zed], dtype='float32')
|
199 |
+
|
200 |
+
def from_norm(self,
|
201 |
+
norm: np.ndarray(shape=(3,)),
|
202 |
+
max_vector: np.ndarray(shape=(3,)),
|
203 |
+
min_vector: np.ndarray(shape=(3,)) = np.array([0, 0, 0]))\
|
204 |
+
-> np.ndarray(shape=(3,), dtype='float32'):
|
205 |
+
"""
|
206 |
+
denormalises and renders into cylindric coordinates
|
207 |
+
:param norm:
|
208 |
+
:param max_vector:
|
209 |
+
:param min_vector:
|
210 |
+
:return:
|
211 |
+
"""
|
212 |
+
rho = norm[0] * max_vector[0]
|
213 |
+
theta = norm[1] * 2*np.pi
|
214 |
+
zed = norm[2] * (max_vector[2] - min_vector[2]) + min_vector[2]
|
215 |
+
return np.array([rho, theta, zed], dtype='float32')
|
216 |
+
|
217 |
+
def to_lat_lon_zed(self, lat, lon):
|
218 |
+
z = self.position[0] * np.exp(1j * self.position[1])
|
219 |
+
lat = np.imag(z) * 360 / (40075 * 1000) + lat
|
220 |
+
lon = np.real(z) * 360 / (40075 * 1000 * np.cos(np.pi / 180 * lat)) + lon
|
221 |
+
return lat, lon, self.position[2]
|
222 |
+
|
223 |
+
def distance(self, other_drone=None):
|
224 |
+
|
225 |
+
if other_drone:
|
226 |
+
distance = np.sqrt(np.abs(self.position[0] * np.exp(1j * self.position[1]) -
|
227 |
+
other_drone.position[0] * np.exp(1j * other_drone.position[1])) ** 2 +
|
228 |
+
(self.position[2] - other_drone.position[2]) ** 2)
|
229 |
+
else:
|
230 |
+
distance = np.sqrt((self.position[0] ** 2) + self.position[2] ** 2)
|
231 |
+
return distance
|
232 |
+
|
233 |
+
def get_observation(self): # -> np.array(shape=(6,), dtype='float32'):
|
234 |
+
"""
|
235 |
+
get normalised and transformed position and speed
|
236 |
+
:return:
|
237 |
+
"""
|
238 |
+
# calculates transformed normalised position
|
239 |
+
normalised_position = self.to_norm(self.position, self.max_positions, self.min_positions)
|
240 |
+
|
241 |
+
# calculates transformed normalised speed
|
242 |
+
normalised_speed = self.to_norm(self.speed, self.max_speeds, self.min_speeds)
|
243 |
+
|
244 |
+
return np.append(normalised_position, normalised_speed)
|
245 |
+
|
246 |
+
|
247 |
+
def simple_red(self, target: np.ndarray(3,)=np.zeros(3), z_margin: float=50) -> np.ndarray(shape=(3,)):
|
248 |
+
'''
|
249 |
+
defines the actions for a trajectory targeting zero
|
250 |
+
:return:
|
251 |
+
'''
|
252 |
+
|
253 |
+
self_z = self.position[0] * np.exp(1j * self.position[1])
|
254 |
+
target_z = target[0] * np.exp(1j * target[1])
|
255 |
+
|
256 |
+
direction = np.zeros(3)
|
257 |
+
direction[0] = np.abs(self_z - target_z)
|
258 |
+
direction[1] = np.angle(self_z - target_z)
|
259 |
+
direction[2] = self.position[2] - target[2] - z_margin
|
260 |
+
|
261 |
+
theta = (direction[1] + np.pi) / (2*np.pi) % 1
|
262 |
+
|
263 |
+
# slope of drone given its position
|
264 |
+
tan_phi = np.sign(direction[2]) * np.inf if direction[0] == 0 else direction[2]/direction[0]
|
265 |
+
# slope of drone speed
|
266 |
+
tan_phi_point = np.sign(self.speed[2]) * np.inf if self.speed[0] == 0 else self.speed[2]/self.speed[0]
|
267 |
+
# slope of forces
|
268 |
+
f_ratio = self.drone_model.Fxy / self.drone_model.Fz_minus
|
269 |
+
# go up if speed slope is too steep and vertical speed < 0 else take the position angle for forces angle
|
270 |
+
psy = -np.arctan(tan_phi * f_ratio) / np.pi + 0.5
|
271 |
+
|
272 |
+
action = np.array([1, theta, psy])
|
273 |
+
|
274 |
+
return action
|
275 |
+
|
276 |
+
def simple_target(self, target: (np.ndarray(shape=(3,)))) -> np.ndarray(shape=(3,)):
|
277 |
+
'''
|
278 |
+
defines the actions for a trajectory targeting ... the given target
|
279 |
+
:return:
|
280 |
+
'''
|
281 |
+
|
282 |
+
self_z = self.position[0] * np.exp(1j * self.position[1])
|
283 |
+
target_z = target[0] * np.exp(1j * target[1])
|
284 |
+
|
285 |
+
direction = np.zeros(3)
|
286 |
+
direction[0] = np.abs(self_z - target_z)
|
287 |
+
direction[1] = np.angle(self_z - target_z)
|
288 |
+
direction[2] = self.position[2] - target[2]
|
289 |
+
|
290 |
+
theta = (direction[1] + np.pi) / (2*np.pi) % 1
|
291 |
+
|
292 |
+
# slope of drone given its position
|
293 |
+
tan_phi = np.sign(direction[2]) * np.inf if direction[0] == 0 else direction[2]/direction[0]
|
294 |
+
# slope of drone speed
|
295 |
+
tan_phi_point = np.sign(self.speed[2]) * np.inf if self.speed[0] == 0 else self.speed[2]/self.speed[0]
|
296 |
+
# slope of forces
|
297 |
+
f_ratio = self.drone_model.Fxy / self.drone_model.Fz_minus
|
298 |
+
# go up if speed slope is too steep and vertical speed < 0 else take the position angle for forces angle
|
299 |
+
psy = 0.5 if tan_phi_point < -tan_phi else -np.arctan(tan_phi * f_ratio) / np.pi + 0.5
|
300 |
+
|
301 |
+
if Settings.perimeter_z / 2 < direction[2]:
|
302 |
+
psy = min(0.2, psy)
|
303 |
+
|
304 |
+
if self.position[0] < 1.5 * Settings.groundzone:
|
305 |
+
psy = min(0.2, psy)
|
306 |
+
|
307 |
+
|
308 |
+
action = np.array([1, theta, psy])
|
309 |
+
|
310 |
+
|
311 |
+
|
312 |
+
return action
|
313 |
+
|
314 |
+
def next_simple_pos(self):
|
315 |
+
next_pos = np.zeros(3)
|
316 |
+
simple_next = self.pos[0] * np.exp(1j * self.pos[1]) + self.speed[0] * np.exp(1j * self.speed[1]) * param_.step
|
317 |
+
next_pos[0] = np.abs(simple_next)
|
318 |
+
next_pos[1] = np.arg(simple_next)
|
319 |
+
next_pos[2] = self.pos[2] + self.speed[2] * param_.step
|
320 |
+
return next_pos
|
321 |
+
|
322 |
+
def copy_pos_speed(self, drone_to_copy):
|
323 |
+
self.position = drone_to_copy.position
|
324 |
+
self.speed = drone_to_copy.speed
|
325 |
+
|
326 |
+
|
dronemodel.py
ADDED
@@ -0,0 +1,103 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from dataclasses import dataclass
|
2 |
+
from scipy.integrate import odeint
|
3 |
+
import numpy as np
|
4 |
+
|
5 |
+
import param_
|
6 |
+
|
7 |
+
|
8 |
+
@dataclass
|
9 |
+
class DroneModel:
|
10 |
+
"""
|
11 |
+
Creates a drone_model of a drone
|
12 |
+
"""
|
13 |
+
|
14 |
+
def __init__(self, is_blue):
|
15 |
+
self.drone_model = param_.DRONE_MODELS[param_.DRONE_MODEL[is_blue]]
|
16 |
+
|
17 |
+
self.angle_to_neutralisation = self.drone_model['angle_to_neutralisation']
|
18 |
+
self.distance_to_neutralisation = self.drone_model['distance_to_neutralisation']
|
19 |
+
self.duration_to_neutralisation = self.drone_model['duration_to_neutralisation']
|
20 |
+
|
21 |
+
self.Cxy = self.drone_model['Cxy']
|
22 |
+
self.Cz = self.drone_model['Cz']
|
23 |
+
self.mass = self.drone_model['mass']
|
24 |
+
|
25 |
+
self.Fxy_ratio = self.drone_model['Fxy_ratio']
|
26 |
+
self.Fz_min_ratio = self.drone_model['Fz_min_ratio']
|
27 |
+
self.Fz_max_ratio = self.drone_model['Fz_max_ratio']
|
28 |
+
|
29 |
+
self.weight_eq = self.mass * param_.g * (1 - self.Fz_min_ratio)
|
30 |
+
self.Fz_plus = (self.Fz_max_ratio - 1) * self.mass * param_.g
|
31 |
+
self.Fz_minus = (1 - self.Fz_min_ratio) * self.mass * param_.g
|
32 |
+
self.Fxy = self.mass * param_.g * self.Fxy_ratio
|
33 |
+
|
34 |
+
self.max_speed = np.sqrt(self.Fxy / self.Cxy)
|
35 |
+
self.max_up_speed = np.sqrt(self.Fz_plus / self.Cz)
|
36 |
+
self.max_down_speed = np.sqrt(self.Fz_minus / self.Cz)
|
37 |
+
self.max_rot_speed = 2 * np.pi
|
38 |
+
|
39 |
+
def get_trajectory(self, pos_xyz, speed_xyz, action: np.ndarray(3,), time_: np.ndarray(1,)) -> np.ndarray(3,):
|
40 |
+
'''
|
41 |
+
returns next position given the current position, speed and applied forces
|
42 |
+
:param pos_xyz:
|
43 |
+
:param speed_xyz:
|
44 |
+
:param action:
|
45 |
+
:param time_:
|
46 |
+
:return:
|
47 |
+
'''
|
48 |
+
|
49 |
+
rho = action[0] # in 0, 1
|
50 |
+
theta = 2*np.pi * action[1] # in 0, 2pi
|
51 |
+
psy = np.pi * (action[2] - 0.5) # in -pi/2, pi/2
|
52 |
+
|
53 |
+
fx = rho * np.cos(theta) * np.cos(psy) * self.Fxy
|
54 |
+
fy = rho * np.sin(theta) * np.cos(psy) * self.Fxy
|
55 |
+
fz = rho * np.sin(psy) * (self.Fz_plus if 0 < psy else self.Fz_minus)
|
56 |
+
|
57 |
+
pos_speed = np.hstack((pos_xyz, speed_xyz))
|
58 |
+
|
59 |
+
result_ = odeint(
|
60 |
+
lambda u, v: self.drone_dynamics(u, v, fx, fy, fz, self.Cxy, self.Cz, self.mass),
|
61 |
+
pos_speed,
|
62 |
+
time_,
|
63 |
+
Dfun=lambda u, v: self.fulljac(u, v, self.Cxy, self.Cz, self.mass)
|
64 |
+
)
|
65 |
+
x, y, z, dx, dy, dz = result_.T
|
66 |
+
|
67 |
+
return np.array([x, y, z], dtype='float32'), np.array([dx, dy, dz], dtype='float32')
|
68 |
+
|
69 |
+
def drone_dynamics(self, pos_speed, time_, f_x, f_y, f_z, Cxy, Cz, m):
|
70 |
+
x, y, z, dx, dy, dz = pos_speed
|
71 |
+
return [dx,
|
72 |
+
dy,
|
73 |
+
dz,
|
74 |
+
1/m * (f_x - Cxy * dx * np.sqrt(dx**2 + dy**2 + dz**2)),
|
75 |
+
1/m * (f_y - Cxy * dy * np.sqrt(dx**2 + dy**2 + dz**2)),
|
76 |
+
1/m * (f_z - Cz * dz * np.sqrt(dx**2 + dy**2 + dz**2))]
|
77 |
+
|
78 |
+
def fulljac(self, pos_speed, time_, Cxy, Cz, m) -> np.ndarray((6, 6), ):
|
79 |
+
'''
|
80 |
+
returns the Jacobian of the differential equation of the trajectory
|
81 |
+
:param pos_speed:
|
82 |
+
:param time_:
|
83 |
+
:param Cxy:
|
84 |
+
:param Cz:
|
85 |
+
:param m:
|
86 |
+
:return:
|
87 |
+
'''
|
88 |
+
|
89 |
+
x, y, z, dx, dy, dz = pos_speed
|
90 |
+
J = np.zeros((6, 6))
|
91 |
+
J[0, 3] = 1
|
92 |
+
J[1, 4] = 1
|
93 |
+
J[2, 5] = 1
|
94 |
+
J[3, 3] = -Cxy/m * ((np.sqrt(dx**2 + dy**2 + dz**2)) + dx**2 / np.sqrt(dx**2 + dy**2 + dz**2))
|
95 |
+
J[3, 4] = -Cxy/m * (dx * dy / np.sqrt(dx**2 + dy**2 + dz**2))
|
96 |
+
J[3, 5] = -Cxy/m * (dx * dz / np.sqrt(dx**2 + dy**2 + dz**2))
|
97 |
+
J[4, 4] = -Cxy/m * ((np.sqrt(dx**2 + dy**2 + dz**2)) + dy**2 / np.sqrt(dx**2 + dy**2 + dz**2))
|
98 |
+
J[4, 3] = -Cxy/m * (dy * dx / np.sqrt(dx**2 + dy**2 + dz**2))
|
99 |
+
J[4, 5] = -Cxy/m * (dy * dz / np.sqrt(dx**2 + dy**2 + dz**2))
|
100 |
+
J[5, 5] = -Cz/m * ((np.sqrt(dx**2 + dy**2 + dz**2)) + dz**2 / np.sqrt(dx**2 + dy**2 + dz**2))
|
101 |
+
J[5, 3] = -Cz/m * (dz * dx / np.sqrt(dx**2 + dy**2 + dz**2))
|
102 |
+
J[5, 4] = -Cz/m * (dz * dy / np.sqrt(dx**2 + dy**2 + dz**2))
|
103 |
+
return J
|
filter_wrap.py
ADDED
@@ -0,0 +1,95 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
import numpy as np
|
3 |
+
from gym import spaces, Wrapper
|
4 |
+
|
5 |
+
|
6 |
+
class FilterWrapper(Wrapper):
|
7 |
+
"""
|
8 |
+
:param env: (gym.Env) Gym environment that will be wrapped
|
9 |
+
"""
|
10 |
+
|
11 |
+
def __init__(self, env):
|
12 |
+
|
13 |
+
self.nb_blues, self.nb_reds = env.nb_blues, env.nb_reds
|
14 |
+
|
15 |
+
self.blue_deads = np.full((self.nb_blues,), False)
|
16 |
+
self.red_deads = np.full((self.nb_reds,), False)
|
17 |
+
|
18 |
+
env.observation_space = spaces.Tuple((
|
19 |
+
spaces.Box(low=0, high=1, shape=(self.nb_blues, 6), dtype=np.float32),
|
20 |
+
spaces.Box(low=0, high=1, shape=(self.nb_reds, 6), dtype=np.float32),
|
21 |
+
spaces.Box(low=0, high=1, shape=(self.nb_blues, self.nb_reds), dtype=np.float32),
|
22 |
+
spaces.Box(low=0, high=1, shape=(self.nb_reds, self.nb_blues), dtype=np.float32),
|
23 |
+
spaces.Discrete(1),
|
24 |
+
spaces.Discrete(1)))
|
25 |
+
|
26 |
+
super(FilterWrapper, self).__init__(env)
|
27 |
+
|
28 |
+
def reset(self):
|
29 |
+
"""
|
30 |
+
Reset the environment
|
31 |
+
"""
|
32 |
+
obs = self.env.reset()
|
33 |
+
|
34 |
+
return self._sort_obs(obs)
|
35 |
+
|
36 |
+
def step(self, action):
|
37 |
+
"""
|
38 |
+
:param action: ([float] or int) Action taken by the agent
|
39 |
+
:return: (np.ndarray, float, bool, dict) observation, reward, is the episode over?, additional informations
|
40 |
+
"""
|
41 |
+
|
42 |
+
blue_action, red_action = action
|
43 |
+
|
44 |
+
new_ba = []
|
45 |
+
index = 0
|
46 |
+
for count, alive in enumerate(~self.blue_deads):
|
47 |
+
if alive:
|
48 |
+
new_ba.append(blue_action[index])
|
49 |
+
index += 1
|
50 |
+
else:
|
51 |
+
new_ba.append(np.array([0, 0, 0]))
|
52 |
+
blue_action = new_ba
|
53 |
+
|
54 |
+
new_ra = []
|
55 |
+
index = 0
|
56 |
+
for count, alive in enumerate(~self.red_deads):
|
57 |
+
if alive:
|
58 |
+
new_ra.append(red_action[index])
|
59 |
+
index += 1
|
60 |
+
else:
|
61 |
+
new_ra.append(np.array([0, 0, 0]))
|
62 |
+
red_action = new_ra
|
63 |
+
|
64 |
+
action = blue_action, red_action
|
65 |
+
|
66 |
+
obs, reward, done, info = self.env.step(action)
|
67 |
+
|
68 |
+
obs = self._sort_obs(obs)
|
69 |
+
|
70 |
+
return obs, reward, done, info
|
71 |
+
|
72 |
+
def _sort_obs(self, obs):
|
73 |
+
|
74 |
+
blue_obs, red_obs, blues_fire, reds_fire, blue_deads, red_deads = obs
|
75 |
+
|
76 |
+
self.blue_deads = blue_deads
|
77 |
+
self.red_deads = red_deads
|
78 |
+
|
79 |
+
blue_obs = np.vstack((blue_obs[~self.blue_deads], blue_obs[self.blue_deads]))
|
80 |
+
red_obs = np.vstack((red_obs[~self.red_deads], red_obs[self.red_deads]))
|
81 |
+
|
82 |
+
blues_fire = self.fire_sort(self.blue_deads, self.red_deads, blues_fire)
|
83 |
+
reds_fire = self.fire_sort(self.red_deads, self.blue_deads, reds_fire)
|
84 |
+
|
85 |
+
sort_obs = blue_obs, red_obs, blues_fire, reds_fire, sum(blue_deads), sum(red_deads)
|
86 |
+
|
87 |
+
return sort_obs
|
88 |
+
|
89 |
+
def fire_sort(self, dead_friends, dead_foes, friends_fire):
|
90 |
+
|
91 |
+
friends_fire_big = np.zeros_like(friends_fire)
|
92 |
+
friends_fire = np.compress(~dead_friends, friends_fire, axis=0)
|
93 |
+
friends_fire = np.compress(~dead_foes, friends_fire, axis=1)
|
94 |
+
friends_fire_big[:friends_fire.shape[0], :friends_fire.shape[1]] = friends_fire
|
95 |
+
return friends_fire_big
|
monitor_wrap.py
ADDED
@@ -0,0 +1,119 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from dataclasses import make_dataclass
|
2 |
+
|
3 |
+
import numpy as np
|
4 |
+
from gym import Wrapper
|
5 |
+
import pandas as pd
|
6 |
+
|
7 |
+
import param_
|
8 |
+
from settings import Settings
|
9 |
+
|
10 |
+
|
11 |
+
Path = make_dataclass("Path", [('path', list), ('step', int), ('d_index', int), ('color', list)])
|
12 |
+
|
13 |
+
|
14 |
+
class MonitorWrapper(Wrapper):
|
15 |
+
"""
|
16 |
+
:param env: (gym.Env) Gym environment that will be wrapped
|
17 |
+
"""
|
18 |
+
|
19 |
+
def __init__(self, env, steps, verbose=True):
|
20 |
+
# Call the parent constructor, so we can access self.env later
|
21 |
+
super(MonitorWrapper, self).__init__(env)
|
22 |
+
self.verbose = verbose
|
23 |
+
self.blue_data = []
|
24 |
+
self.red_data = []
|
25 |
+
self.fire_paths = []
|
26 |
+
lat, lon = Settings.latlon
|
27 |
+
self.lat_tg = lat
|
28 |
+
self.lon_tg = lon
|
29 |
+
self.steps = steps
|
30 |
+
self.step_ = 0
|
31 |
+
self.step_max = 0
|
32 |
+
|
33 |
+
def reset(self):
|
34 |
+
"""
|
35 |
+
Reset the environment
|
36 |
+
"""
|
37 |
+
obs = self.env.reset()
|
38 |
+
self.blue_data = []
|
39 |
+
self.red_data = []
|
40 |
+
self.fire_paths = []
|
41 |
+
self.step_ = 0
|
42 |
+
self.step_max = 0
|
43 |
+
|
44 |
+
return obs
|
45 |
+
|
46 |
+
def step(self, action):
|
47 |
+
"""
|
48 |
+
:param action: ([float] or int) Action taken by the agent
|
49 |
+
:return: (np.ndarray, float, bool, dict) observation, reward, is the episode over?, additional informations
|
50 |
+
"""
|
51 |
+
|
52 |
+
obs, reward, done, info = self.env.step(action)
|
53 |
+
|
54 |
+
if self.verbose:
|
55 |
+
self.monitor_state()
|
56 |
+
|
57 |
+
self.step_ += 1
|
58 |
+
if self.step_ == self.steps:
|
59 |
+
done = True
|
60 |
+
|
61 |
+
return obs, reward, done, info
|
62 |
+
|
63 |
+
def monitor_state(self):
|
64 |
+
|
65 |
+
env = self.env
|
66 |
+
lat_tg, lon_tg = self.lat_tg, self.lon_tg
|
67 |
+
|
68 |
+
for d_index, drone in enumerate(env.blue_team.drones):
|
69 |
+
lat, lon, zed = drone.to_lat_lon_zed(lat_tg, lon_tg)
|
70 |
+
self.blue_data.append([self.step_, True, drone.id_, lat, lon, zed, drone.color])
|
71 |
+
|
72 |
+
for d_index, drone in enumerate(env.red_team.drones):
|
73 |
+
lat, lon, zed = drone.to_lat_lon_zed(lat_tg, lon_tg)
|
74 |
+
self.red_data.append([self.step_, False, drone.id_, lat, lon, zed, drone.color])
|
75 |
+
|
76 |
+
for blue_id, red_id in np.argwhere(0 < env.playground.blues_have_fired_reds):
|
77 |
+
b_lat, b_lon, b_zed = env.blue_team.drones[blue_id].to_lat_lon_zed(lat_tg, lon_tg)
|
78 |
+
r_lat, r_lon, r_zed = env.red_team.drones[red_id].to_lat_lon_zed(lat_tg, lon_tg)
|
79 |
+
self.fire_paths.append(Path(step=self.step_,
|
80 |
+
path=[[b_lat, b_lon, b_zed], [r_lat, r_lon, r_zed]],
|
81 |
+
color=param_.GREEN_COLOR,
|
82 |
+
d_index=blue_id))
|
83 |
+
|
84 |
+
for red_id, blue_id in np.argwhere(0 < env.playground.reds_have_fired_blues):
|
85 |
+
b_lat, b_lon, b_zed = env.blue_team.drones[blue_id].to_lat_lon_zed(lat_tg, lon_tg)
|
86 |
+
r_lat, r_lon, r_zed = env.red_team.drones[red_id].to_lat_lon_zed(lat_tg, lon_tg)
|
87 |
+
self.fire_paths.append(Path(step=self.step_,
|
88 |
+
path=[[b_lat, b_lon, b_zed], [r_lat, r_lon, r_zed]],
|
89 |
+
color=param_.BLACK_COLOR,
|
90 |
+
d_index=red_id))
|
91 |
+
|
92 |
+
def get_df(self):
|
93 |
+
|
94 |
+
fire_df = pd.DataFrame(self.fire_paths)
|
95 |
+
|
96 |
+
df_columns = ['step', 'isBlue', 'd_index', 'lat', 'lon', 'zed', 'color']
|
97 |
+
blue_df = pd.DataFrame(self.blue_data, columns=df_columns)
|
98 |
+
red_df = pd.DataFrame(self.red_data, columns=df_columns)
|
99 |
+
|
100 |
+
blue_path_df = []
|
101 |
+
red_path_df = []
|
102 |
+
|
103 |
+
for d_index in range(self.env.nb_blues):
|
104 |
+
blue_path_df.append(self._get_path_df(blue_df, d_index, color=param_.BLUE_COLOR))
|
105 |
+
for d_index in range(self.env.nb_reds):
|
106 |
+
red_path_df.append(self._get_path_df(red_df, d_index, color=param_.RED_COLOR))
|
107 |
+
|
108 |
+
return blue_df, red_df, fire_df, blue_path_df, red_path_df
|
109 |
+
|
110 |
+
def _get_path_df(self, drone_df: pd.DataFrame, d_index: int, color: int = param_.BLUE_COLOR) -> pd.DataFrame:
|
111 |
+
|
112 |
+
traj_length = param_.TRAJ_LENGTH
|
113 |
+
|
114 |
+
path_total = drone_df[['lon', 'lat', 'zed', 'step']][drone_df.d_index == d_index].values.tolist()
|
115 |
+
path = ([Path(path_total[:step+1], step, d_index, color) if step < traj_length
|
116 |
+
else Path(path_total[step - traj_length:step+1], step, d_index, color)
|
117 |
+
for step in range(len(path_total))])
|
118 |
+
path_df = pd.DataFrame(path, columns=['path', 'step', 'd_index', 'color'])
|
119 |
+
return path_df
|
param_.py
ADDED
@@ -0,0 +1,122 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
import numpy as np
|
3 |
+
|
4 |
+
STEP = 1 # seconds per time step
|
5 |
+
DURATION = 200 # seconds
|
6 |
+
|
7 |
+
POLICY_FOLDER = 'default_policies'
|
8 |
+
|
9 |
+
STEP_COST = 0.01
|
10 |
+
OOB_COST = 0.8 # Out Of Bound : when the drone is below 0 or above a PERIMETER_Z
|
11 |
+
RED_SHOT_REWARD = 10 # when a red drone is shot
|
12 |
+
TARGET_HIT_COST = 10 # when a red drone hits the target
|
13 |
+
THREAT_WEIGHT = 0 # when reds are close to the target (* function of the red distance)
|
14 |
+
STRAIGHT_ACTION_COST = 0.04 # when reds do not follow the shortest path
|
15 |
+
TTL_COST = 0.7 # when a red is still alive after its TTL: it is a failure for both blues and reds
|
16 |
+
|
17 |
+
TTL_RATIO = 2 # margin for red drones to get to the target if they went full speed
|
18 |
+
TTL_MIN = 4 # at least to succeed the mission : ttl = TTL_MIN + vmax * TTL_RATIO
|
19 |
+
|
20 |
+
ELEVATION_SCALE = 1
|
21 |
+
TRAJ_LENGTH = 6
|
22 |
+
SIMU_SPEED = 0.2
|
23 |
+
|
24 |
+
|
25 |
+
"""
|
26 |
+
the playground parameters
|
27 |
+
"""
|
28 |
+
|
29 |
+
PERIMETER = 5000
|
30 |
+
PERIMETER_Z = 600
|
31 |
+
|
32 |
+
# PERIMETER of the ground zone to defend
|
33 |
+
GROUNDZONE = 100
|
34 |
+
|
35 |
+
# position in LATLON
|
36 |
+
LATLON = {'Paris':
|
37 |
+
{'lat': 48.865879, 'lon': 2.319827},
|
38 |
+
'Fonsorbes':
|
39 |
+
{'lat': 43.54, 'lon': 1.25},
|
40 |
+
'San Francisco':
|
41 |
+
{'lat': 37.7737283, 'lon': -122.4342383},
|
42 |
+
'Puilaurens':
|
43 |
+
{'lat': 42.803943093860894, 'lon': 2.299540897567384},
|
44 |
+
}
|
45 |
+
|
46 |
+
"""
|
47 |
+
the Team Parameters
|
48 |
+
"""
|
49 |
+
|
50 |
+
# blue team init
|
51 |
+
|
52 |
+
BLUES = 12
|
53 |
+
|
54 |
+
BLUES_PER_CIRCLE = [3, 3, 4, 4, 4, 4]
|
55 |
+
BLUE_CIRCLES_RHO = [500, 900, 1400, 1600, 2000, 2500]
|
56 |
+
BLUE_CIRCLES_THETA = [0, -np.pi/3, -np.pi, -np.pi/2, 0, np.pi/3]
|
57 |
+
BLUE_CIRCLES_ZED = [200, 250, 250, 100, 250, 100]
|
58 |
+
BLUE_DISTANCE_FACTOR = 1
|
59 |
+
|
60 |
+
BLUE_IS_UNKILLABLE = True
|
61 |
+
|
62 |
+
|
63 |
+
BLUE_SPEED_INIT = 1 # in ratio to max_speed
|
64 |
+
|
65 |
+
BLUE_COLOR = [0, 0, 150, 120]
|
66 |
+
BLUE_DEAD_COLOR = [20, 20, 60]
|
67 |
+
|
68 |
+
# red team init
|
69 |
+
|
70 |
+
REDS = 12
|
71 |
+
|
72 |
+
RED_SQUADS = [1, 1, 1, 1, 1, 15]
|
73 |
+
RED_SQUADS_RHO = [1000, 700, 1000, 1200, 1500, 2000]
|
74 |
+
RED_SQUADS_THETA = np.pi * np.array([0, 1/4, -1/4, -1/2, 1/2, 0])
|
75 |
+
RED_SQUADS_ZED = [200, 200, 100, 250, 200, 100]
|
76 |
+
RED_DISTANCE_FACTOR = 1
|
77 |
+
|
78 |
+
|
79 |
+
RED_RHO_NOISE = [60, 60, 100, 200, 200, 300]
|
80 |
+
RED_THETA_NOISE = np.pi * np.array([1/5, 1/2, 1, 1, 1, 1])
|
81 |
+
RED_ZED_NOISE = [60, 50, 10, 10, 50, 60]
|
82 |
+
|
83 |
+
RED_SPEED_INIT = 0.2 # in ratio to max_speed
|
84 |
+
|
85 |
+
RED_COLOR = [150, 0, 0, 120]
|
86 |
+
RED_DEAD_COLOR = [120, 50, 30]
|
87 |
+
RED_SUCCESS_COLOR = [200, 200, 0]
|
88 |
+
BLACK_COLOR = [0, 0, 0]
|
89 |
+
GREEN_COLOR = [0, 255, 255]
|
90 |
+
|
91 |
+
"""
|
92 |
+
the Drone Parameters
|
93 |
+
"""
|
94 |
+
|
95 |
+
g = 9.81
|
96 |
+
|
97 |
+
DRONE_MODEL = ['beta', 'alpha'] # blue = DRONE_MODEl[1]
|
98 |
+
|
99 |
+
DRONE_MODELS = {
|
100 |
+
'alpha': {
|
101 |
+
'angle_to_neutralisation': np.pi / 4, # in rad
|
102 |
+
'distance_to_neutralisation': 250, # in m
|
103 |
+
'duration_to_neutralisation': 2, # in s
|
104 |
+
'Cxy': 0.2, # horizontal air resistance = Cxy * v^2
|
105 |
+
'Cz': 0.7, # vertical air resistance
|
106 |
+
'mass': 50, # kg
|
107 |
+
'Fz_min_ratio': 0.6, # how much weight is compensated (below 1 => drone goes down)
|
108 |
+
'Fz_max_ratio': 1.4, # how much weight is compensated (>1 => drone goes up)
|
109 |
+
'Fxy_ratio': 1, # Force xy relative to weight
|
110 |
+
},
|
111 |
+
'beta': {
|
112 |
+
'angle_to_neutralisation': np.pi / 4,
|
113 |
+
'distance_to_neutralisation': 250,
|
114 |
+
'duration_to_neutralisation': np.inf,
|
115 |
+
'Cxy': 0.3, # horizontal air resistance : link to speed max by the relation Fxy_max = Cxy * Speedxy_max
|
116 |
+
'Cz': 0.8, # vertical air resistance : link to speed max by the relation Fz_max = Cz * Speedz_max
|
117 |
+
'mass': 40, # kg
|
118 |
+
'Fz_min_ratio': 0.5, # how much weight is compensated (below 1 => drone goes down)
|
119 |
+
'Fz_max_ratio': 1.8, # how much weight is compensated (>1 => drone goes up)
|
120 |
+
'Fxy_ratio': 0.6, # Force xy relative to weight
|
121 |
+
},
|
122 |
+
}
|
playground.py
ADDED
@@ -0,0 +1,92 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
from dataclasses import dataclass
|
3 |
+
|
4 |
+
import param_
|
5 |
+
from settings import Settings
|
6 |
+
from drone import Drone
|
7 |
+
|
8 |
+
|
9 |
+
@dataclass
|
10 |
+
class Playground:
|
11 |
+
"""
|
12 |
+
This is a cylindrical 3D-env where blue drones defend a central zone from the attack of red drones
|
13 |
+
the playground manages also the interactions between foe-drones such as the firing
|
14 |
+
"""
|
15 |
+
|
16 |
+
perimeter = Settings.perimeter
|
17 |
+
perimeter_z = Settings.perimeter_z
|
18 |
+
groundzone = Settings.groundzone
|
19 |
+
|
20 |
+
env: object
|
21 |
+
blue_drones: [Drone]
|
22 |
+
red_drones: [Drone]
|
23 |
+
|
24 |
+
def __post_init__(self):
|
25 |
+
# creates the fire matrices
|
26 |
+
self.blues_have_fired_reds = np.zeros(shape=(len(self.blue_drones),
|
27 |
+
len(self.red_drones)), dtype=int)
|
28 |
+
self.reds_have_fired_blues = np.zeros(shape=(len(self.red_drones),
|
29 |
+
len(self.blue_drones)), dtype=int)
|
30 |
+
|
31 |
+
# how long the drone needs to have the other in target
|
32 |
+
self.blue_shots_to_kill = param_.DRONE_MODELS[param_.DRONE_MODEL[True]]['duration_to_neutralisation']
|
33 |
+
self.red_shots_to_kill = param_.DRONE_MODELS[param_.DRONE_MODEL[False]]['duration_to_neutralisation']
|
34 |
+
self.blue_shots_to_kill //= param_.STEP
|
35 |
+
self.red_shots_to_kill //= param_.STEP
|
36 |
+
|
37 |
+
# how far can a drone shoot
|
38 |
+
self.distance_blue_shot = param_.DRONE_MODELS[param_.DRONE_MODEL[True]]['distance_to_neutralisation']
|
39 |
+
self.distance_red_shot = param_.DRONE_MODELS[param_.DRONE_MODEL[False]]['distance_to_neutralisation']
|
40 |
+
|
41 |
+
def reset(self):
|
42 |
+
self.blues_have_fired_reds[...] = 0
|
43 |
+
self.reds_have_fired_blues[...] = 0
|
44 |
+
|
45 |
+
def get_observation(self):
|
46 |
+
return self.blues_have_fired_reds / self.blue_shots_to_kill, \
|
47 |
+
self.reds_have_fired_blues / self.red_shots_to_kill
|
48 |
+
|
49 |
+
def step(self):
|
50 |
+
"""
|
51 |
+
determines who has fired who, and who is dead in the end
|
52 |
+
:return: Tuple with list of Blue and Reds dead. (if a blue or a red is dead, the sequence is over)
|
53 |
+
"""
|
54 |
+
# gets who has fired who in this step
|
55 |
+
blues_fire_reds = np.array([[blue.fires_(red) for red in self.red_drones] for blue in self.blue_drones])
|
56 |
+
reds_fire_blues = np.array([[red.fires_(blue) for blue in self.blue_drones] for red in self.red_drones])
|
57 |
+
|
58 |
+
# if the foe is no longer seen, the count restarts from 0
|
59 |
+
self.blues_have_fired_reds *= blues_fire_reds
|
60 |
+
self.reds_have_fired_blues *= reds_fire_blues
|
61 |
+
|
62 |
+
# and the count is incremented for the others
|
63 |
+
self.blues_have_fired_reds += blues_fire_reds
|
64 |
+
self.reds_have_fired_blues += reds_fire_blues
|
65 |
+
|
66 |
+
# np magic : first find the list of duos shooter/shot, keep the shots (only once)
|
67 |
+
red_deads = np.unique(np.argwhere(self.blues_have_fired_reds >= self.blue_shots_to_kill).T[1])
|
68 |
+
blue_deads = np.unique(np.argwhere(self.reds_have_fired_blues >= self.red_shots_to_kill).T[1])
|
69 |
+
|
70 |
+
|
71 |
+
# tell the drones that they are dead
|
72 |
+
for drone_id in blue_deads:
|
73 |
+
self.blue_drones[drone_id].is_killed(is_blue=True)
|
74 |
+
for drone_id in red_deads:
|
75 |
+
self.red_drones[drone_id].is_killed(is_blue=False)
|
76 |
+
|
77 |
+
# consider only living drones
|
78 |
+
blue_drones = [drone for drone in self.blue_drones if drone.is_alive]
|
79 |
+
red_drones = [drone for drone in self.red_drones if drone.is_alive]
|
80 |
+
|
81 |
+
bf_obs, rf_obs = self.get_observation()
|
82 |
+
bf_reward = rf_reward = 0
|
83 |
+
remaining_blues, remaining_reds = len(blue_drones), len(red_drones),
|
84 |
+
blue_shots, red_shots = len(blue_deads), len(red_deads)
|
85 |
+
|
86 |
+
if blue_shots + red_shots > 0:
|
87 |
+
print('someone is killed: {0} blues and {1} reds'.format(blue_shots, red_shots))
|
88 |
+
|
89 |
+
return bf_obs, bf_reward, remaining_blues, blue_shots, rf_obs, rf_reward, remaining_reds, red_shots
|
90 |
+
|
91 |
+
|
92 |
+
|
policies/_last/_b1r1/blues_last.zip
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3d64d434b3ea20f669d130b3871196920c9efe6263625b050dfc1841e141e89c
|
3 |
+
size 3146628
|
policies/_last/_b1r1/reds_last.zip
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:03cc75c12317da0fada0de5c5e161855c87ec5fab05e435f6bbf4c08bc07aa13
|
3 |
+
size 3148185
|
procfile.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
web: sh setup.sh && streamlit run app.py
|
redux_wrap.py
ADDED
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gym
|
2 |
+
from gym import spaces
|
3 |
+
import numpy as np
|
4 |
+
|
5 |
+
from settings import Settings
|
6 |
+
|
7 |
+
|
8 |
+
class ReduxWrapper(gym.Wrapper):
|
9 |
+
"""
|
10 |
+
:param env: (gym.Env) Gym environment that will be wrapped
|
11 |
+
"""
|
12 |
+
|
13 |
+
def __init__(self, env, minus_blue=0, minus_red=0):
|
14 |
+
|
15 |
+
# action space is reduced
|
16 |
+
nb_blues, nb_reds = Settings.blues, Settings.reds
|
17 |
+
|
18 |
+
self.nb_blues = nb_blues - minus_blue
|
19 |
+
self.nb_reds = nb_reds - minus_red
|
20 |
+
|
21 |
+
self.blue_deads = minus_blue
|
22 |
+
self.red_deads = minus_red
|
23 |
+
|
24 |
+
env.observation_space = spaces.Tuple((
|
25 |
+
spaces.Box(low=0, high=1, shape=(self.nb_blues, 6), dtype=np.float32),
|
26 |
+
spaces.Box(low=0, high=1, shape=(self.nb_reds, 6), dtype=np.float32),
|
27 |
+
spaces.Box(low=0, high=1, shape=(self.nb_blues, self.nb_reds), dtype=np.float32),
|
28 |
+
spaces.Box(low=0, high=1, shape=(self.nb_reds, self.nb_blues), dtype=np.float32)))
|
29 |
+
|
30 |
+
env.action_space = spaces.Tuple((
|
31 |
+
spaces.Box(low=0, high=1, shape=(self.nb_blues, 3), dtype=np.float32),
|
32 |
+
spaces.Box(low=0, high=1, shape=(self.nb_reds, 3), dtype=np.float32)))
|
33 |
+
|
34 |
+
super(ReduxWrapper, self).__init__(env)
|
35 |
+
|
36 |
+
def reset(self):
|
37 |
+
"""
|
38 |
+
Reset the environment
|
39 |
+
"""
|
40 |
+
obs = self.env.reset()
|
41 |
+
obs = self.post_obs(obs)
|
42 |
+
|
43 |
+
return obs
|
44 |
+
|
45 |
+
def step(self, action):
|
46 |
+
|
47 |
+
# action needs expansion
|
48 |
+
blue_action, red_action = action
|
49 |
+
if self.blue_deads:
|
50 |
+
blue_action = np.vstack((blue_action, np.zeros((self.blue_deads, 3))))
|
51 |
+
if self.red_deads:
|
52 |
+
red_action = np.vstack((red_action, np.zeros((self.red_deads, 3))))
|
53 |
+
action = blue_action, red_action
|
54 |
+
|
55 |
+
obs, reward, done, info = self.env.step(action)
|
56 |
+
|
57 |
+
obs = self.post_obs(obs)
|
58 |
+
|
59 |
+
return obs, reward, done, info
|
60 |
+
|
61 |
+
def post_obs(self, obs):
|
62 |
+
|
63 |
+
# obs needs reduction
|
64 |
+
blue_obs, red_obs, blues_fire, reds_fire = obs
|
65 |
+
|
66 |
+
if not self.blue_deads:
|
67 |
+
pass
|
68 |
+
else:
|
69 |
+
blue_obs = blue_obs[:-self.blue_deads]
|
70 |
+
blues_fire = blues_fire[:-self.blue_deads]
|
71 |
+
reds_fire = reds_fire[:, :-self.blue_deads]
|
72 |
+
|
73 |
+
if not self.red_deads:
|
74 |
+
pass
|
75 |
+
else:
|
76 |
+
red_obs = red_obs[:-self.red_deads]
|
77 |
+
reds_fire = reds_fire[:-self.red_deads]
|
78 |
+
blues_fire = blues_fire[:, :-self.red_deads]
|
79 |
+
|
80 |
+
return blue_obs, red_obs, blues_fire, reds_fire
|
requirements.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
pandas~=2.0.2
|
2 |
+
numpy~=1.24.3
|
3 |
+
gym~=0.26.2
|
reward_wrap.py
ADDED
@@ -0,0 +1,86 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
import gym
|
3 |
+
|
4 |
+
import param_
|
5 |
+
from settings import Settings
|
6 |
+
|
7 |
+
|
8 |
+
class RewardWrapper(gym.Wrapper):
|
9 |
+
"""
|
10 |
+
:param env: (gym.Env) Gym environment that will be wrapped
|
11 |
+
"""
|
12 |
+
|
13 |
+
def __init__(self, env, is_blue: bool = True, is_double: bool = False):
|
14 |
+
|
15 |
+
self.is_blue = is_blue
|
16 |
+
self.is_double = is_double
|
17 |
+
|
18 |
+
super(RewardWrapper, self).__init__(env)
|
19 |
+
|
20 |
+
def reset(self):
|
21 |
+
"""
|
22 |
+
Reset the environment
|
23 |
+
"""
|
24 |
+
obs = self.env.reset()
|
25 |
+
return obs
|
26 |
+
|
27 |
+
def step(self, action):
|
28 |
+
"""
|
29 |
+
:param action: ([float] or int) Action taken by the agent
|
30 |
+
:return: (np.ndarray, float, bool, dict) observation, reward, is the episode over?, additional informations
|
31 |
+
"""
|
32 |
+
|
33 |
+
obs, reward, done, info = self.env.step(action)
|
34 |
+
|
35 |
+
reward, done, info = self.situation_evaluation(info)
|
36 |
+
|
37 |
+
return obs, reward, done, info
|
38 |
+
|
39 |
+
def situation_evaluation(self, info):
|
40 |
+
|
41 |
+
if self.is_double:
|
42 |
+
if info['remaining blues'] * info['remaining reds'] == 0:
|
43 |
+
return 0, True, info
|
44 |
+
else:
|
45 |
+
return 0, False, info
|
46 |
+
|
47 |
+
else:
|
48 |
+
if self.is_blue:
|
49 |
+
if info['remaining reds'] == 0:
|
50 |
+
return param_.WIN_REWARD, True, info
|
51 |
+
if info['remaining blues'] == 0:
|
52 |
+
return -param_.WIN_REWARD, True, info
|
53 |
+
if 0 < info['blue_oob']:
|
54 |
+
return -param_.OOB_COST, True, info
|
55 |
+
if info['ttl'] < 0:
|
56 |
+
return -param_.TTL_COST, True, info # blues have been too long to shoot the red drone
|
57 |
+
# else continues
|
58 |
+
reward = -param_.STEP_COST
|
59 |
+
reward -= info['weighted_red_distance'] * param_.THREAT_WEIGHT
|
60 |
+
reward -= info['hits_target'] * param_.TARGET_HIT_COST
|
61 |
+
reward += info['red_shots'] * param_.RED_SHOT_REWARD
|
62 |
+
reward += info['distance_to_straight_action'] * param_.STRAIGHT_ACTION_COST
|
63 |
+
return reward, False, info
|
64 |
+
else: # red is learning
|
65 |
+
done = False
|
66 |
+
reward = -param_.STEP_COST
|
67 |
+
reward += info['weighted_red_distance'] * param_.THREAT_WEIGHT
|
68 |
+
reward += info['hits_target'] * param_.TARGET_HIT_COST
|
69 |
+
reward -= info['red_shots'] * param_.RED_SHOT_REWARD
|
70 |
+
reward -= info['distance_to_straight_action'] * param_.STRAIGHT_ACTION_COST
|
71 |
+
if info['remaining reds'] == 0:
|
72 |
+
done = True
|
73 |
+
return reward, done, info
|
74 |
+
if info['remaining blues'] == 0:
|
75 |
+
done = True
|
76 |
+
reward += info['remaining reds'] * param_.TARGET_HIT_COST
|
77 |
+
return reward, done, info
|
78 |
+
if 0 < info['red_oob']:
|
79 |
+
done = True
|
80 |
+
reward -= param_.OOB_COST
|
81 |
+
if info['ttl'] < 0:
|
82 |
+
done = True
|
83 |
+
reward -= param_.TTL_COST * info['remaining reds'] # reds have been too long to hit the target
|
84 |
+
# else continues
|
85 |
+
|
86 |
+
return reward, done, info
|
rotate_wrap.py
ADDED
@@ -0,0 +1,93 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
import gym
|
3 |
+
|
4 |
+
from drone import Drone
|
5 |
+
|
6 |
+
|
7 |
+
class RotateWrapper(gym.Wrapper):
|
8 |
+
"""
|
9 |
+
:param env: (gym.Env) Gym environment that will be wrapped
|
10 |
+
"""
|
11 |
+
|
12 |
+
def __init__(self, env):
|
13 |
+
# Call the parent constructor, so we can access self.env later
|
14 |
+
super(RotateWrapper, self).__init__(env)
|
15 |
+
self.angle = 0
|
16 |
+
|
17 |
+
def reset(self):
|
18 |
+
"""
|
19 |
+
Reset the environment
|
20 |
+
"""
|
21 |
+
obs = self.env.reset()
|
22 |
+
|
23 |
+
obs = self.post_obs(obs)
|
24 |
+
|
25 |
+
return obs
|
26 |
+
|
27 |
+
def step(self, action):
|
28 |
+
"""
|
29 |
+
:param action: ([float] or int) Action taken by the agent
|
30 |
+
:return: (np.ndarray, float, bool, dict) observation, reward, is the episode over?, additional informations
|
31 |
+
"""
|
32 |
+
|
33 |
+
action = self.rotate_action(action)
|
34 |
+
|
35 |
+
obs, reward, done, info = self.env.step(action)
|
36 |
+
|
37 |
+
obs = self.post_obs(obs)
|
38 |
+
|
39 |
+
return obs, reward, done, info
|
40 |
+
|
41 |
+
def post_obs(self, obs):
|
42 |
+
self.angle = self.get_angle(obs)
|
43 |
+
return self.rotate_obs(obs)
|
44 |
+
|
45 |
+
def get_angle(self, obs: np.ndarray) -> float:
|
46 |
+
blue_obs, red_obs, blue_fire, red_fire = obs
|
47 |
+
sigma = 0
|
48 |
+
|
49 |
+
for this_obs in (blue_obs, red_obs):
|
50 |
+
for d in this_obs:
|
51 |
+
sigma += d[0] * np.exp(1j * d[1])
|
52 |
+
angle = np.angle(sigma)
|
53 |
+
return angle
|
54 |
+
|
55 |
+
def rotate_obs(self, obs):
|
56 |
+
blue_obs, red_obs, blue_fire, red_fire = obs
|
57 |
+
|
58 |
+
rotated_blue_obs = []
|
59 |
+
rotated_red_obs = []
|
60 |
+
|
61 |
+
for this_obs, is_blue, rotated_obs in zip((blue_obs, red_obs),
|
62 |
+
(True, False),
|
63 |
+
(rotated_blue_obs, rotated_red_obs)):
|
64 |
+
drone = Drone(is_blue=is_blue)
|
65 |
+
for d in this_obs:
|
66 |
+
|
67 |
+
d_meter = np.zeros(6,)
|
68 |
+
# get the pos and speed in cylindrical coordinated in meters
|
69 |
+
d_meter[:3] = drone.from_norm(d[:3], drone.max_positions, drone.min_positions)
|
70 |
+
d_meter[3:6] = drone.from_norm(d[3:6], drone.max_speeds, drone.min_speeds)
|
71 |
+
|
72 |
+
# rotate
|
73 |
+
d_meter[1] -= self.angle
|
74 |
+
d_meter[4] -= self.angle
|
75 |
+
|
76 |
+
# back to norm
|
77 |
+
d[:3] = drone.to_norm(d_meter[:3], drone.max_positions, drone.min_positions)
|
78 |
+
d[3:6] = drone.to_norm(d_meter[3:6], drone.max_speeds, drone.min_speeds)
|
79 |
+
|
80 |
+
rotated_obs.append(d)
|
81 |
+
|
82 |
+
del drone
|
83 |
+
|
84 |
+
return np.array(rotated_blue_obs), np.array(rotated_red_obs), blue_fire, red_fire
|
85 |
+
|
86 |
+
def rotate_action(self, action):
|
87 |
+
|
88 |
+
blue_action, red_action = action
|
89 |
+
blue_action = np.array(list(map(lambda x: [x[0], (x[1]+self.angle/2/np.pi) % 1, x[2]], blue_action)))
|
90 |
+
red_action = np.array(list(map(lambda x: [x[0], (x[1]+self.angle/2/np.pi) % 1, x[2]], red_action)))
|
91 |
+
action = blue_action, red_action
|
92 |
+
|
93 |
+
return action
|
runner.py
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
|
3 |
+
from swarm_policy import SwarmPolicy
|
4 |
+
|
5 |
+
|
6 |
+
def run_episode(env, obs, blues: int, reds: int):
|
7 |
+
blue_policy = SwarmPolicy(blues=blues, reds=reds, is_blue=True)
|
8 |
+
red_policy = SwarmPolicy(blues=blues, reds=reds, is_blue=False)
|
9 |
+
sum_reward = 0
|
10 |
+
done = False
|
11 |
+
while not done:
|
12 |
+
action = blue_policy.predict(obs), red_policy.predict(obs)
|
13 |
+
obs, reward, done, info = env.step(action)
|
14 |
+
sum_reward += reward
|
15 |
+
return obs, sum_reward, done, info
|
16 |
+
|
settings.py
ADDED
@@ -0,0 +1,82 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from dataclasses import dataclass
|
2 |
+
|
3 |
+
import param_
|
4 |
+
import streamlit as st
|
5 |
+
import numpy as np
|
6 |
+
|
7 |
+
|
8 |
+
@dataclass
|
9 |
+
class Settings:
|
10 |
+
|
11 |
+
perimeter: int = param_.PERIMETER
|
12 |
+
perimeter_z: int = param_.PERIMETER_Z
|
13 |
+
groundzone: int = param_.GROUNDZONE
|
14 |
+
|
15 |
+
latlon = param_.LATLON['Paris']['lat'], param_.LATLON['Paris']['lon']
|
16 |
+
|
17 |
+
blues: int = param_.BLUES
|
18 |
+
|
19 |
+
blues_per_circle = np.array(param_.BLUES_PER_CIRCLE)
|
20 |
+
blue_circles_rho = param_.BLUE_CIRCLES_RHO
|
21 |
+
blue_circles_theta = param_.BLUE_CIRCLES_THETA
|
22 |
+
blue_circles_zed = param_.BLUE_CIRCLES_ZED
|
23 |
+
blue_distance_factor: float = param_.BLUE_DISTANCE_FACTOR
|
24 |
+
|
25 |
+
is_unkillable: bool = param_.BLUE_IS_UNKILLABLE
|
26 |
+
|
27 |
+
blue_speed_init: int = param_.BLUE_SPEED_INIT
|
28 |
+
|
29 |
+
reds: int = param_.REDS
|
30 |
+
|
31 |
+
red_squads = param_.RED_SQUADS
|
32 |
+
red_squads_rho = np.array(param_.RED_SQUADS_RHO)
|
33 |
+
red_squads_theta = param_.RED_SQUADS_THETA
|
34 |
+
red_squads_zed = param_.RED_SQUADS_ZED
|
35 |
+
red_distance_factor: float = param_.RED_DISTANCE_FACTOR
|
36 |
+
|
37 |
+
red_rho_noise = np.array(param_.RED_RHO_NOISE)
|
38 |
+
red_theta_noise = np.array(param_.RED_THETA_NOISE)
|
39 |
+
red_zed_noise = np.array(param_.RED_ZED_NOISE)
|
40 |
+
|
41 |
+
red_speed_init: int = param_.RED_SPEED_INIT
|
42 |
+
|
43 |
+
policy_folder: str = param_.POLICY_FOLDER
|
44 |
+
|
45 |
+
|
46 |
+
def define_(with_streamlit: bool = True, blues: int = Settings.blues, reds: int = Settings.reds):
|
47 |
+
""""
|
48 |
+
shows the blue and red swarms in Streamlit
|
49 |
+
:return:
|
50 |
+
"""
|
51 |
+
blues = blues
|
52 |
+
reds = reds
|
53 |
+
|
54 |
+
if with_streamlit:
|
55 |
+
st.title('Blues against Reds by hexamind.ai')
|
56 |
+
st.write('controlled by Reinforcement Learning.')
|
57 |
+
st.text('<- Set parameters')
|
58 |
+
|
59 |
+
st.sidebar.subheader("Define the battlefield")
|
60 |
+
blues = st.sidebar.slider("how many blues on defense?", 1, 20, 6)
|
61 |
+
Settings.blues = blues
|
62 |
+
blue_dispersion = st.sidebar.slider("set the average blue dispersion", 0.3, 1.0, 0.8)
|
63 |
+
Settings.reds = reds
|
64 |
+
reds = st.sidebar.slider("how many reds are on the attack?", 1, 20, 6)
|
65 |
+
Settings.reds = reds
|
66 |
+
red_dispersion = st.sidebar.slider("set the average red dispersion", 0.3, 1.0, 0.7)
|
67 |
+
|
68 |
+
Settings.blue_distance_factor = 3 * blue_dispersion
|
69 |
+
Settings.red_distance_factor = 3 * red_dispersion
|
70 |
+
|
71 |
+
location = st.sidebar.radio("Location", ['Paris', 'Puilaurens', 'San Francisco'])
|
72 |
+
|
73 |
+
lat_tg = param_.LATLON[location]['lat']
|
74 |
+
lon_tg = param_.LATLON[location]['lon']
|
75 |
+
|
76 |
+
Settings.latlon = lat_tg, lon_tg
|
77 |
+
|
78 |
+
st.sidebar.write(
|
79 |
+
'you probably need more drones '
|
80 |
+
'No worries, we have plenty at www.hexamind.ai ')
|
81 |
+
|
82 |
+
return blues, reds
|
setup.sh
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
mkdir -p ~/.streamlit/
|
2 |
+
echo "\
|
3 |
+
[server]\n\git init
|
4 |
+
git add README.md
|
5 |
+
git commit -m "first commit"
|
6 |
+
git branch -M master
|
7 |
+
git remote add origin
|
8 |
+
headless = true\n\
|
9 |
+
port = $PORT\n\
|
10 |
+
enableCORS = false\n\
|
11 |
+
\n\
|
12 |
+
" > ~/.streamlit/config.toml
|
sort_wrap.py
ADDED
@@ -0,0 +1,98 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
import gym
|
3 |
+
|
4 |
+
|
5 |
+
class SortWrapper(gym.Wrapper):
|
6 |
+
"""
|
7 |
+
:param env: (gym.Env) Gym environment that will be wrapped
|
8 |
+
"""
|
9 |
+
|
10 |
+
def __init__(self, env):
|
11 |
+
# Call the parent constructor, so we can access self.env later
|
12 |
+
super(SortWrapper, self).__init__(env)
|
13 |
+
self.blue_signature = None
|
14 |
+
self.red_signature = None
|
15 |
+
|
16 |
+
def reset(self):
|
17 |
+
"""
|
18 |
+
Reset the environment
|
19 |
+
"""
|
20 |
+
obs = self.env.reset()
|
21 |
+
obs = self.sort_obs(obs)
|
22 |
+
|
23 |
+
return obs
|
24 |
+
|
25 |
+
def step(self, action):
|
26 |
+
"""
|
27 |
+
:param action: ([float] or int) Action taken by the agent
|
28 |
+
:return: (np.ndarray, float, bool, dict) observation, reward, is the episode over?, additional informations
|
29 |
+
"""
|
30 |
+
|
31 |
+
action = self.unsort_action(action)
|
32 |
+
|
33 |
+
obs, reward, done, info = self.env.step(action)
|
34 |
+
|
35 |
+
obs = self.post_obs(obs)
|
36 |
+
|
37 |
+
return obs, reward, done, info
|
38 |
+
|
39 |
+
def post_obs(self, obs):
|
40 |
+
return self.sort_obs(obs)
|
41 |
+
|
42 |
+
def sort_obs(self, obs):
|
43 |
+
|
44 |
+
blue_obs, red_obs, blue_fire, red_fire = obs
|
45 |
+
|
46 |
+
blue_obs, self.blue_signature = self.sort_and_sign(blue_obs)
|
47 |
+
red_obs, self.red_signature = self.sort_and_sign(red_obs)
|
48 |
+
|
49 |
+
blue_fire = self.unsort_matrix_with_signatures(blue_fire, self.blue_signature, self.red_signature)
|
50 |
+
red_fire = self.unsort_matrix_with_signatures(red_fire, self.red_signature, self.blue_signature)
|
51 |
+
|
52 |
+
obs = blue_obs, red_obs, blue_fire, red_fire
|
53 |
+
|
54 |
+
return obs
|
55 |
+
|
56 |
+
def unsort_action(self, action):
|
57 |
+
|
58 |
+
blue_action, red_action = action
|
59 |
+
|
60 |
+
unsorted_blue_action = self.unsort_with_signature(blue_action, self.blue_signature)
|
61 |
+
unsorted_red_action = self.unsort_with_signature(red_action, self.red_signature)
|
62 |
+
|
63 |
+
action = unsorted_blue_action, unsorted_red_action
|
64 |
+
|
65 |
+
return action
|
66 |
+
|
67 |
+
def sort_and_sign(self, an_array: np.ndarray) -> [np.ndarray, []]:
|
68 |
+
"""
|
69 |
+
allows to sort an ndarray of 6 columns of floats and to keep the "signature": the positions of the items
|
70 |
+
before being sorted in order to retrieve the initial order after modifications of the arrays.
|
71 |
+
the order is retrieved with the unsort_with_signature function
|
72 |
+
:param an_array:
|
73 |
+
:return:
|
74 |
+
"""
|
75 |
+
zip_list = zip(an_array, range(len(an_array)))
|
76 |
+
zip_sorted = sorted(zip_list, key=lambda x: (x[0][0], x[0][1], x[0][2], x[0][3], x[0][4], x[0][5]))
|
77 |
+
sorted_array, signature = zip(*zip_sorted)
|
78 |
+
return np.array(sorted_array), signature
|
79 |
+
|
80 |
+
def unsort_with_signature(self, an_array: np.ndarray, signature: []) -> np.ndarray:
|
81 |
+
"""
|
82 |
+
see above
|
83 |
+
:param an_array:
|
84 |
+
:param signature:
|
85 |
+
:return:
|
86 |
+
"""
|
87 |
+
zip_list = zip(signature, an_array)
|
88 |
+
zip_unsorted = sorted(zip_list)
|
89 |
+
_, unsorted = zip(*zip_unsorted)
|
90 |
+
return np.array(unsorted)
|
91 |
+
|
92 |
+
def unsort_matrix_with_signatures(self, matrix: np.ndarray, sign_line: np.ndarray, sign_col: np.ndarray) \
|
93 |
+
-> np.ndarray:
|
94 |
+
|
95 |
+
matrix = self.unsort_with_signature(matrix, sign_line)
|
96 |
+
matrix = self.unsort_with_signature(matrix.T, sign_col).T
|
97 |
+
|
98 |
+
return matrix
|
swarm_policy.py
ADDED
@@ -0,0 +1,342 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from dataclasses import dataclass
|
2 |
+
import numpy as np
|
3 |
+
from stable_baselines3 import SAC
|
4 |
+
from os import path
|
5 |
+
|
6 |
+
import param_
|
7 |
+
from drone import Drone
|
8 |
+
|
9 |
+
|
10 |
+
@dataclass
|
11 |
+
class SwarmPolicy:
|
12 |
+
blues: int
|
13 |
+
reds: int
|
14 |
+
is_blue: bool
|
15 |
+
model: object = None
|
16 |
+
count: int = 0
|
17 |
+
|
18 |
+
def __post_init__(self):
|
19 |
+
|
20 |
+
dir_path = "policies/last" + f"/b{self.blues}r{self.reds}/"
|
21 |
+
model_path = dir_path + ("blues_last.zip" if self.is_blue else "reds_last.zip")
|
22 |
+
if path.exists(model_path):
|
23 |
+
print("model loaded:" + model_path)
|
24 |
+
self.model = SAC.load(model_path, verbose=1)
|
25 |
+
|
26 |
+
# predicts from the model or from a simple centripete model
|
27 |
+
def predict(self, obs):
|
28 |
+
|
29 |
+
self.count += 1
|
30 |
+
|
31 |
+
if self.model:
|
32 |
+
action, _ = self.model.predict(obs)
|
33 |
+
# verbose = 'prediction from ' + (' blue model' if self.is_blue else ' red model') + ' at ' + str(self.count)
|
34 |
+
# print(verbose)
|
35 |
+
return action
|
36 |
+
else:
|
37 |
+
if self.is_blue:
|
38 |
+
return self._improved_attack_predict(obs)
|
39 |
+
else:
|
40 |
+
return self._simple_predict(obs)
|
41 |
+
|
42 |
+
# the default policy
|
43 |
+
def _simple_predict(self, obs):
|
44 |
+
simple_obs = _decentralise(obs[0:self.blues*6] if self.is_blue else obs[self.blues*6:(self.blues+self.reds)*6])
|
45 |
+
drone = Drone(is_blue=self.is_blue)
|
46 |
+
action = np.array([])
|
47 |
+
nb_drones = self.blues if self.is_blue else self.reds
|
48 |
+
for d in range(nb_drones):
|
49 |
+
assign_pos_speed(drone, d, simple_obs)
|
50 |
+
'''
|
51 |
+
pos_n, speed_n = simple_obs[d*6:d*6+3], simple_obs[d*6+3:d*6+6]
|
52 |
+
pos = drone.from_norm(pos_n, drone.max_positions, drone.min_positions)
|
53 |
+
drone.position = pos
|
54 |
+
speed = drone.from_norm(speed_n, drone.max_speeds, drone.min_speeds)
|
55 |
+
drone.speed = speed
|
56 |
+
'''
|
57 |
+
action_d = drone.simple_red()
|
58 |
+
action = np.hstack((action, action_d))
|
59 |
+
|
60 |
+
action = _centralise(action)
|
61 |
+
return action
|
62 |
+
|
63 |
+
|
64 |
+
# the default attack policy
|
65 |
+
def _attack_predict(self, obs):
|
66 |
+
|
67 |
+
def assign_targets(friends_obs, foes_obs):
|
68 |
+
'''
|
69 |
+
this current version is simplistic: all friends target the first foe :)
|
70 |
+
:param obs:
|
71 |
+
:return:
|
72 |
+
'''
|
73 |
+
friends_nb = len(friends_obs) // 6
|
74 |
+
foes_nb = len(foes_obs) // 6
|
75 |
+
|
76 |
+
friends_targets = -np.ones(friends_nb, dtype=int)
|
77 |
+
while -1 in friends_targets:
|
78 |
+
for foe in range(foes_nb):
|
79 |
+
foe_pos = _denorm(foes_obs[foe*6:foe*6+3])
|
80 |
+
foe_pos_z = foe_pos[0] * np.exp(1j * foe_pos[1])
|
81 |
+
min_distance = np.inf
|
82 |
+
closest_friend = -1
|
83 |
+
for friend in range(friends_nb):
|
84 |
+
if friends_targets[friend] == -1:
|
85 |
+
friend_pos = _denorm(friends_obs[friend*6:friend*6+3])
|
86 |
+
friend_pos_z = friend_pos[0] * np.exp(1j * friend_pos[1])
|
87 |
+
distance = np.abs(foe_pos_z - friend_pos_z) ** 2 + (friend_pos[2] - foe_pos[2]) ** 2
|
88 |
+
if distance < min_distance:
|
89 |
+
min_distance = distance
|
90 |
+
closest_friend = friend
|
91 |
+
friends_targets[closest_friend] = foe
|
92 |
+
|
93 |
+
return friends_targets
|
94 |
+
|
95 |
+
|
96 |
+
# gets the friends and foes obs
|
97 |
+
blue_obs = _decentralise(obs[0:self.blues * 6])
|
98 |
+
red_obs = _decentralise(obs[self.blues * 6:(self.blues + self.reds) * 6])
|
99 |
+
friends_obs = blue_obs if self.is_blue else red_obs
|
100 |
+
foes_obs = red_obs if self.is_blue else blue_obs
|
101 |
+
|
102 |
+
# assign red targets to blues
|
103 |
+
friends_targets = assign_targets(friends_obs, foes_obs)
|
104 |
+
|
105 |
+
friend_drone = Drone(is_blue=self.is_blue)
|
106 |
+
foe_drone = Drone(is_blue=not self.is_blue)
|
107 |
+
|
108 |
+
action = np.array([])
|
109 |
+
nb_drones = self.blues if self.is_blue else self.reds
|
110 |
+
for d in range(nb_drones):
|
111 |
+
|
112 |
+
# assign denormalised position and speed (in m and m/s) to foe drone
|
113 |
+
friend_drone = assign_pos_speed(friend_drone, d, friends_obs)
|
114 |
+
foe_drone_id = friends_targets[d]
|
115 |
+
|
116 |
+
foe_drone = assign_pos_speed(foe_drone, foe_drone_id, foes_obs)
|
117 |
+
target, time_to_target = calculate_target(friend_drone, foe_drone)
|
118 |
+
action_d = friend_drone.simple_red(target=target, z_margin=0)
|
119 |
+
action = np.hstack((action, action_d))
|
120 |
+
|
121 |
+
action = _centralise(action)
|
122 |
+
return action
|
123 |
+
|
124 |
+
# the improved manual attack policy
|
125 |
+
def _improved_attack_predict(self, obs):
|
126 |
+
# TODO: revamp the algo as follows
|
127 |
+
# start from closest reds, find all blues that are compatible with some margin
|
128 |
+
# among those blues, choose the blue whose first target is the latest
|
129 |
+
# until there is no red left
|
130 |
+
# in case there are blues left overs, restart the process, or converge to zero, or..
|
131 |
+
# or we decide in advance how many blues we want on the closest and populate several blues againts reds
|
132 |
+
# at the beginning
|
133 |
+
# TODO: check that reds are correctly ordered
|
134 |
+
# TODO : add margin in the params
|
135 |
+
# TODO : case of the foe is not reachable
|
136 |
+
|
137 |
+
# gets the friends and foes obs
|
138 |
+
blue_obs = _decentralise(obs[0:self.blues * 6])
|
139 |
+
red_obs = _decentralise(obs[self.blues * 6:(self.blues + self.reds) * 6])
|
140 |
+
friends_obs = blue_obs if self.is_blue else red_obs
|
141 |
+
foes_obs = red_obs if self.is_blue else blue_obs
|
142 |
+
|
143 |
+
friends_nb = self.blues if self.is_blue else self.reds
|
144 |
+
foes_nb = self.reds if self.is_blue else self.blues
|
145 |
+
|
146 |
+
friend_drones = []
|
147 |
+
for friend_id in range(friends_nb):
|
148 |
+
# assign denormalised position and speed (in m and m/s) to foe drone
|
149 |
+
friend_drone = Drone(is_blue=self.is_blue)
|
150 |
+
friend_drone = assign_pos_speed(friend_drone, friend_id, friends_obs)
|
151 |
+
friend_drones.append(friend_drone)
|
152 |
+
|
153 |
+
foe_drones = []
|
154 |
+
for foe_id in range(foes_nb):
|
155 |
+
# assign denormalised position and speed (in m and m/s) to foe drone
|
156 |
+
foe_drone = Drone(is_blue=not self.is_blue)
|
157 |
+
foe_drone = assign_pos_speed(foe_drone, foe_id, foes_obs)
|
158 |
+
foe_drones.append(foe_drone)
|
159 |
+
|
160 |
+
targets = np.zeros((friends_nb, foes_nb, 3))
|
161 |
+
best_targets = -np.ones((friends_nb, 3))
|
162 |
+
times_to_target = -np.ones((friends_nb, foes_nb))
|
163 |
+
calculation_done = -np.ones(friends_nb)
|
164 |
+
friend_chosen = -np.ones(friends_nb)
|
165 |
+
|
166 |
+
foe_id = 0
|
167 |
+
friends_chosen = 0
|
168 |
+
while foe_id < foes_nb-1 and friends_chosen < friends_nb:
|
169 |
+
best_friend = -1
|
170 |
+
best_target = np.zeros(3)
|
171 |
+
longest_time = -np.inf
|
172 |
+
foe_drone = foe_drones[foe_id]
|
173 |
+
for friend_id in range(friends_nb):
|
174 |
+
if friend_chosen[friend_id] == -1: # the friend has no foe target assigned
|
175 |
+
|
176 |
+
friend_drone = friend_drones[friend_id]
|
177 |
+
|
178 |
+
if calculation_done[friend_id] == -1: # it has not already been calculated
|
179 |
+
target_, time_to_target, is_a_catch = calculate_target(friend_drone, foe_drone)
|
180 |
+
times_to_target[friend_id][foe_id] = time_to_target if is_a_catch else np.inf
|
181 |
+
targets[friend_id][foe_id] = target_
|
182 |
+
if times_to_target[friend_id][foe_id] < np.inf: # it is a catch
|
183 |
+
|
184 |
+
if calculation_done[friend_id] == -1: # calculation of time with other drones has not been done
|
185 |
+
for foe_idx in range(foe_id + 1, foes_nb):
|
186 |
+
foex_drone = foe_drones[foe_idx]
|
187 |
+
target_, time_to_target, is_a_catch = calculate_target(friend_drone, foex_drone)
|
188 |
+
times_to_target[friend_id][foe_idx] = time_to_target if is_a_catch else np.inf
|
189 |
+
targets[friend_id][foe_idx] = target_
|
190 |
+
calculation_done[friend_id] = 1
|
191 |
+
closest_target = np.min(times_to_target[friend_id, foe_id+1:])
|
192 |
+
if longest_time < closest_target:
|
193 |
+
longest_time = closest_target
|
194 |
+
best_friend = friend_id
|
195 |
+
best_target = targets[friend_id][foe_id]
|
196 |
+
|
197 |
+
best_targets[best_friend] = best_target
|
198 |
+
friend_chosen[best_friend] = foe_id
|
199 |
+
friends_chosen += 1
|
200 |
+
foe_id += 1
|
201 |
+
|
202 |
+
if friends_chosen < friends_nb:
|
203 |
+
last_foe = foes_nb - 1
|
204 |
+
for friend_id in range(friends_nb):
|
205 |
+
if friend_chosen[friend_id] == -1:
|
206 |
+
if times_to_target[friend_id, last_foe] == -1:
|
207 |
+
friend_drone, foe_drone = friend_drones[friend_id], foe_drones[last_foe]
|
208 |
+
target_, time_to_target, is_a_catch = calculate_target(friend_drone, foe_drone)
|
209 |
+
targets[friend_id][last_foe] = target_
|
210 |
+
closest_target_id = np.argmin(times_to_target[friend_id, :])
|
211 |
+
best_targets[friend_id] = targets[friend_id][closest_target_id]
|
212 |
+
|
213 |
+
|
214 |
+
|
215 |
+
|
216 |
+
action = np.array([])
|
217 |
+
for friend_id in range(friends_nb):
|
218 |
+
action_d = friend_drones[friend_id].simple_red(target=best_targets[friend_id], z_margin=0)
|
219 |
+
action = np.hstack((action, action_d))
|
220 |
+
|
221 |
+
action = _centralise(action)
|
222 |
+
return action
|
223 |
+
|
224 |
+
|
225 |
+
def assign_pos_speed(drone: Drone, d: int, obs: np.ndarray) -> Drone:
|
226 |
+
# assign denormalised position and speed (in m and m/s) to friend drone
|
227 |
+
d = int(d)
|
228 |
+
pos_n, speed_n = obs[d*6:d*6+3], obs[d*6+3:d*6+6]
|
229 |
+
pos = drone.from_norm(pos_n, drone.max_positions, drone.min_positions)
|
230 |
+
drone.position = pos
|
231 |
+
speed = drone.from_norm(speed_n, drone.max_speeds, drone.min_speeds)
|
232 |
+
drone.speed = speed
|
233 |
+
return drone
|
234 |
+
|
235 |
+
|
236 |
+
def _denorm(pos): # from norm (i.e. already decentralised) to meter
|
237 |
+
drone = Drone()
|
238 |
+
pos_meter = drone.from_norm(pos, drone.max_positions, drone.min_positions)
|
239 |
+
return pos_meter
|
240 |
+
|
241 |
+
|
242 |
+
def _decentralise(obs): # [-1,1] to [0,1]
|
243 |
+
obs = (obs+1)/2
|
244 |
+
return obs
|
245 |
+
|
246 |
+
|
247 |
+
def _centralise(act): # [0,1] to [-1,1]
|
248 |
+
act = (act - 1/2) * 2
|
249 |
+
return act
|
250 |
+
|
251 |
+
|
252 |
+
def calculate_target(blue_drone: Drone, red_drone: Drone) -> (np.ndarray(3, ), float, bool):
|
253 |
+
'''
|
254 |
+
|
255 |
+
:param blue_drone:
|
256 |
+
:param red_drone:
|
257 |
+
:return:
|
258 |
+
'''
|
259 |
+
# TODO : be more precise at the end of the discovery process
|
260 |
+
|
261 |
+
def transform(pos, delta_, theta_):
|
262 |
+
pos[0] -= delta_
|
263 |
+
pos[1] -= theta_
|
264 |
+
return pos[0] * np.exp(1j * pos[1])
|
265 |
+
|
266 |
+
def untransform_to_array(pos, delta_, theta_):
|
267 |
+
pos[0] += delta_
|
268 |
+
pos[1] += theta_
|
269 |
+
return pos
|
270 |
+
|
271 |
+
theta = red_drone.position[1]
|
272 |
+
delta = param_.GROUNDZONE
|
273 |
+
|
274 |
+
attack_pos = np.copy(blue_drone.position)
|
275 |
+
target_pos = np.copy(red_drone.position)
|
276 |
+
|
277 |
+
z_blue = transform(attack_pos, delta, theta)
|
278 |
+
z_red = np.real(transform(target_pos, delta, theta))
|
279 |
+
|
280 |
+
v_blue = blue_drone.drone_model.max_speed
|
281 |
+
v_red = red_drone.drone_model.max_speed
|
282 |
+
|
283 |
+
blue_shooting_distance = blue_drone.drone_model.distance_to_neutralisation
|
284 |
+
|
285 |
+
blue_time_to_zero = (np.abs(z_blue) - blue_shooting_distance) / v_blue
|
286 |
+
red_time_to_zero = z_red / v_red
|
287 |
+
|
288 |
+
if red_time_to_zero <= param_.STEP or red_time_to_zero < blue_time_to_zero + param_.STEP:
|
289 |
+
return np.zeros(3), red_time_to_zero, False
|
290 |
+
else:
|
291 |
+
max_target = z_red
|
292 |
+
min_target = 0
|
293 |
+
while True:
|
294 |
+
target = (max_target + min_target) / 2
|
295 |
+
blue_time_to_target = max(0, (np.abs(z_blue - target) - blue_shooting_distance) / v_blue)
|
296 |
+
red_time_to_target = np.abs(z_red - target) / v_red
|
297 |
+
if red_time_to_target - param_.STEP < blue_time_to_target <= red_time_to_target:
|
298 |
+
target = untransform_to_array((target / z_red) * target_pos, delta, theta)
|
299 |
+
return target, blue_time_to_target, True
|
300 |
+
if red_time_to_target < blue_time_to_target:
|
301 |
+
max_target = target
|
302 |
+
min_target = min_target
|
303 |
+
else: # blue_ time_to_target <= red_time_to_target -1:
|
304 |
+
max_target = max_target
|
305 |
+
min_target = target
|
306 |
+
|
307 |
+
|
308 |
+
|
309 |
+
|
310 |
+
def unitary_test(rho_blue: float, theta_blue: float, rho_red: float, theta_red: float):
|
311 |
+
'''
|
312 |
+
tests for the calculate target function
|
313 |
+
:param rho_blue:
|
314 |
+
:param theta_blue:
|
315 |
+
:param rho_red:
|
316 |
+
:param theta_red:
|
317 |
+
:return:
|
318 |
+
'''
|
319 |
+
blue_drone = Drone()
|
320 |
+
blue_drone.position = np.array([rho_blue, theta_blue, 100])
|
321 |
+
red_drone = Drone(is_blue=False)
|
322 |
+
red_drone.position = np.array([rho_red, theta_red, 100])
|
323 |
+
tg, time = calculate_target(blue_drone, red_drone)
|
324 |
+
print('rho_blue : ', rho_blue, ' theta_blue : ', theta_blue, ' rho_red : ', rho_red, ' theta_red : ', theta_red,
|
325 |
+
' tg : ', tg, ' time : ', time)
|
326 |
+
return tg, time
|
327 |
+
|
328 |
+
|
329 |
+
def test():
|
330 |
+
'''
|
331 |
+
test for the calculate trajectory function
|
332 |
+
:return:
|
333 |
+
'''
|
334 |
+
for rho_blue in [1000]:
|
335 |
+
for theta_blue in np.pi * np.array([-1, 0.75, 0.5, 0.25, 0]):
|
336 |
+
for rho_red in [1000]:
|
337 |
+
for theta_red in np.pi * np.array([0, 1/4]):
|
338 |
+
unitary_test(rho_blue=rho_blue, theta_blue=theta_blue, rho_red=rho_red, theta_red=theta_red)
|
339 |
+
print('done')
|
340 |
+
|
341 |
+
|
342 |
+
|
swarmenv.py
ADDED
@@ -0,0 +1,100 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gym
|
2 |
+
from gym import spaces
|
3 |
+
import numpy as np
|
4 |
+
|
5 |
+
import param_
|
6 |
+
from settings import Settings
|
7 |
+
from playground import Playground
|
8 |
+
from team import BlueTeam, RedTeam
|
9 |
+
|
10 |
+
|
11 |
+
class SwarmEnv(gym.Env):
|
12 |
+
"""
|
13 |
+
Custom 3D-Environment that follows gym interface.
|
14 |
+
This is a 3D-env where the blue drones defend a circular GROUNDZONE from a red drones attack
|
15 |
+
"""
|
16 |
+
|
17 |
+
def __init__(self, blues=Settings.blues, reds=Settings.reds):
|
18 |
+
"""
|
19 |
+
:param distance: the distance to the other rim
|
20 |
+
"""
|
21 |
+
super(SwarmEnv, self).__init__()
|
22 |
+
|
23 |
+
self.nb_blues = blues
|
24 |
+
self.nb_reds = reds
|
25 |
+
|
26 |
+
self.blue_team = BlueTeam(number_of_drones=self.nb_blues)
|
27 |
+
self.red_team = RedTeam(number_of_drones=self.nb_reds)
|
28 |
+
|
29 |
+
self.playground = Playground(env=self, blue_drones=self.blue_team.drones, red_drones=self.red_team.drones)
|
30 |
+
|
31 |
+
self.steps = 0
|
32 |
+
|
33 |
+
self.observation_space = spaces.Tuple((
|
34 |
+
spaces.Box(low=0, high=1, shape=(self.nb_blues, 6), dtype=np.float32),
|
35 |
+
spaces.Box(low=0, high=1, shape=(self.nb_reds, 6), dtype=np.float32),
|
36 |
+
spaces.Box(low=0, high=1, shape=(self.nb_blues, self.nb_reds), dtype=np.float32),
|
37 |
+
spaces.Box(low=0, high=1, shape=(self.nb_reds, self.nb_blues), dtype=np.float32),
|
38 |
+
spaces.MultiBinary(self.nb_blues),
|
39 |
+
spaces.MultiBinary(self.nb_reds),
|
40 |
+
))
|
41 |
+
|
42 |
+
self.action_space = spaces.Tuple((
|
43 |
+
spaces.Box(low=0, high=1, shape=(self.nb_blues, 3), dtype=np.float32),
|
44 |
+
spaces.Box(low=0, high=1, shape=(self.nb_reds, 3), dtype=np.float32)))
|
45 |
+
|
46 |
+
def reset(self, obs=None):
|
47 |
+
"""
|
48 |
+
resets the environment as part of Gym interface
|
49 |
+
"""
|
50 |
+
if obs:
|
51 |
+
blue_obs, red_obs, blues_fire, reds_fire, blue_deads, red_deads = obs
|
52 |
+
else:
|
53 |
+
blue_obs, red_obs, blues_fire, reds_fire, blue_deads, red_deads = None, None, None, None, None, None
|
54 |
+
|
55 |
+
self.blue_team.reset(obs=blue_obs)
|
56 |
+
self.red_team.reset(obs=red_obs)
|
57 |
+
self.playground.reset()
|
58 |
+
self.steps = 0
|
59 |
+
|
60 |
+
# get observations from blue and red teams
|
61 |
+
blue_obs, blue_deads = self.blue_team.get_observation()
|
62 |
+
red_obs, red_deads = self.red_team.get_observation()
|
63 |
+
blues_fire, reds_fire = self.playground.get_observation()
|
64 |
+
|
65 |
+
return blue_obs, red_obs, blues_fire, reds_fire, blue_deads, red_deads
|
66 |
+
|
67 |
+
def render(self, mode='human'):
|
68 |
+
pass
|
69 |
+
|
70 |
+
def step(self, action):
|
71 |
+
|
72 |
+
self.steps += 1
|
73 |
+
|
74 |
+
blue_action, red_action = action
|
75 |
+
blue_obs, blue_reward, blue_done, blue_info = self.blue_team.step(blue_action)
|
76 |
+
red_obs, red_reward, red_done, red_info = self.red_team.step(red_action)
|
77 |
+
bf_obs, bf_reward, remaining_blues, blue_shots, rf_obs, rf_reward, remaining_reds, red_shots = \
|
78 |
+
self.playground.step()
|
79 |
+
_, blue_deads = self.blue_team.get_observation()
|
80 |
+
_, red_deads = self.red_team.get_observation()
|
81 |
+
obs = blue_obs, red_obs, bf_obs, rf_obs, blue_deads, red_deads
|
82 |
+
reward = blue_reward + red_reward + bf_reward + rf_reward
|
83 |
+
done = False
|
84 |
+
|
85 |
+
info = {}
|
86 |
+
info['red_oob'] = red_info['oob']
|
87 |
+
info['blue_oob'] = blue_info['oob']
|
88 |
+
info['hits_target'] = red_info['hits_target']
|
89 |
+
info['blue_shots'] = blue_shots
|
90 |
+
info['red_shots'] = red_shots
|
91 |
+
info['weighted_red_distance'] = red_info['delta_distance']
|
92 |
+
info['remaining blues'] = len(blue_deads)-sum(blue_deads)
|
93 |
+
info['remaining reds'] = len(red_deads)-sum(red_deads)
|
94 |
+
info['ttl'] = red_info['ttl']
|
95 |
+
info['distance_to_straight_action'] = red_info['distance_to_straight_action']
|
96 |
+
|
97 |
+
if red_info['oob'] + blue_info['oob'] + red_info['hits_target'] + blue_shots + red_shots > 0:
|
98 |
+
print('something happened')
|
99 |
+
|
100 |
+
return obs, reward, done, info
|
symetry_wrap.py
ADDED
@@ -0,0 +1,95 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
import gym
|
3 |
+
|
4 |
+
|
5 |
+
class SymetryWrapper(gym.Wrapper):
|
6 |
+
"""
|
7 |
+
:param env: (gym.Env) Gym environment that will be wrapped
|
8 |
+
"""
|
9 |
+
|
10 |
+
def __init__(self, env):
|
11 |
+
# Call the parent constructor, so we can access self.env later
|
12 |
+
|
13 |
+
self.symetry = False # no need to perform a symetry
|
14 |
+
super(SymetryWrapper, self).__init__(env)
|
15 |
+
|
16 |
+
def reset(self):
|
17 |
+
"""
|
18 |
+
Reset the environment
|
19 |
+
"""
|
20 |
+
obs = self.env.reset()
|
21 |
+
|
22 |
+
obs = self.post_obs(obs)
|
23 |
+
|
24 |
+
return obs
|
25 |
+
|
26 |
+
def step(self, action):
|
27 |
+
"""
|
28 |
+
:param action: ([float] or int) Action taken by the agent
|
29 |
+
:return: (np.ndarray, float, bool, dict) observation, reward, is the episode over?, additional informations
|
30 |
+
"""
|
31 |
+
if self.symetry:
|
32 |
+
action = symetrise_action(action)
|
33 |
+
|
34 |
+
obs, reward, done, info = self.env.step(action)
|
35 |
+
|
36 |
+
obs = self.post_obs(obs)
|
37 |
+
|
38 |
+
return obs, reward, done, info
|
39 |
+
|
40 |
+
def post_obs(self, obs):
|
41 |
+
self.symetry = get_symetry(obs)
|
42 |
+
if self.symetry:
|
43 |
+
obs = symetrise_obs(obs)
|
44 |
+
return obs
|
45 |
+
|
46 |
+
|
47 |
+
def get_symetry(obs):
|
48 |
+
blue_obs, red_obs, blue_fire, red_fire = obs
|
49 |
+
|
50 |
+
# count the drones who are positioned above the 0 x-axis
|
51 |
+
count = 0
|
52 |
+
for this_obs in (blue_obs, red_obs):
|
53 |
+
for d in this_obs:
|
54 |
+
add = 1 if (d[1] < 0.5) else 0
|
55 |
+
count += add
|
56 |
+
|
57 |
+
# compare with the total
|
58 |
+
symetry = bool(2*count < (len(blue_obs) + len(red_obs)))
|
59 |
+
|
60 |
+
return symetry
|
61 |
+
|
62 |
+
|
63 |
+
def symetrise_obs(obs):
|
64 |
+
|
65 |
+
blue_obs, red_obs, blue_fire, red_fire = obs
|
66 |
+
|
67 |
+
for this_obs in (blue_obs, red_obs):
|
68 |
+
# symetrise positions and speeds
|
69 |
+
this_obs[:, 1] = 1 - this_obs[:, 1]
|
70 |
+
this_obs[:, 4] = 1 - this_obs[:, 4]
|
71 |
+
|
72 |
+
return blue_obs, red_obs, blue_fire, red_fire
|
73 |
+
|
74 |
+
|
75 |
+
def symetrise_action(action):
|
76 |
+
|
77 |
+
blue_action, red_action = action
|
78 |
+
|
79 |
+
for this_action in (blue_action, red_action):
|
80 |
+
for act in this_action:
|
81 |
+
|
82 |
+
# symetrise action
|
83 |
+
act[1] = - act[1]
|
84 |
+
|
85 |
+
action = blue_action, red_action
|
86 |
+
|
87 |
+
return action
|
88 |
+
|
89 |
+
|
90 |
+
def test_symetrise_obs():
|
91 |
+
|
92 |
+
obs = np.arange(12).reshape(2, 6), np.arange(12).reshape(2, 6), np.random.random((1, 1)), np.random.random((1, 1))
|
93 |
+
print(obs)
|
94 |
+
symetrise_obs(obs)
|
95 |
+
print(obs)
|
team.py
ADDED
@@ -0,0 +1,131 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
from dataclasses import dataclass
|
3 |
+
|
4 |
+
import param_
|
5 |
+
from drone import Drone
|
6 |
+
from dronemodel import DroneModel
|
7 |
+
from settings import Settings
|
8 |
+
|
9 |
+
|
10 |
+
@dataclass
|
11 |
+
class Team:
|
12 |
+
"""
|
13 |
+
Creates a team (it is either red or blue / foe or friend
|
14 |
+
"""
|
15 |
+
|
16 |
+
is_blue: bool
|
17 |
+
drones: [Drone]
|
18 |
+
drone_model: DroneModel
|
19 |
+
weighted_distance: float = 0
|
20 |
+
|
21 |
+
def reset(self, obs=None):
|
22 |
+
|
23 |
+
self.delta_weighted_distance()
|
24 |
+
if obs:
|
25 |
+
for drone, obs in zip(self.drones, obs):
|
26 |
+
drone.reset(obs=obs)
|
27 |
+
else:
|
28 |
+
for drone in self.drones:
|
29 |
+
drone.reset()
|
30 |
+
|
31 |
+
def get_observation(self) -> np.ndarray:
|
32 |
+
"""
|
33 |
+
get the observation for the RL agent
|
34 |
+
:return: observation in the form of flatten np.arrays of shape(squad_number, 6*squad_size)
|
35 |
+
"""
|
36 |
+
obs = np.array([drone.get_observation() for drone in self.drones])
|
37 |
+
deads = ~np.array([drone.is_alive for drone in self.drones])
|
38 |
+
|
39 |
+
return obs, deads
|
40 |
+
|
41 |
+
def step(self, action: np.ndarray):
|
42 |
+
obs = np.zeros((len(self.drones), 6))
|
43 |
+
done = np.zeros((len(self.drones),))
|
44 |
+
reward = np.zeros((len(self.drones),))
|
45 |
+
infos = [{} for d in range(len(self.drones))]
|
46 |
+
for index, drone in enumerate(self.drones):
|
47 |
+
obs[index], reward[index], done[index], infos[index] = drone.step(action[index])
|
48 |
+
done = (sum(done) == len(self.drones))
|
49 |
+
info = {'oob': 0, 'hits_target': 0, 'ttl': param_.DURATION, 'distance_to_straight_action': 0}
|
50 |
+
for i in infos:
|
51 |
+
info['ttl'] = min(info['ttl'], i['ttl'])
|
52 |
+
info['oob'] += i['oob'] if 'oob' in i else 0
|
53 |
+
info['hits_target'] += i['hits_target'] if 'hits_target' in i else 0
|
54 |
+
info['delta_distance'] = 0 if self.is_blue else self.delta_weighted_distance()
|
55 |
+
info['distance_to_straight_action'] += i['distance_to_straight_action'] \
|
56 |
+
if 'distance_to_straight_action' in i else 0
|
57 |
+
return obs, sum(reward), done, info
|
58 |
+
|
59 |
+
def delta_weighted_distance(self):
|
60 |
+
|
61 |
+
# distance of drones to 0
|
62 |
+
team_distance = np.array([d.distance() for d in self.drones if d.is_alive])
|
63 |
+
weighted_distance = np.sum(np.exp(-0.5 * (team_distance / (Settings.perimeter/2)) ** 2))
|
64 |
+
|
65 |
+
delta = weighted_distance - self.weighted_distance if 0 < self.weighted_distance else 0
|
66 |
+
|
67 |
+
self.weighted_distance = weighted_distance
|
68 |
+
|
69 |
+
return delta
|
70 |
+
|
71 |
+
|
72 |
+
class BlueTeam(Team):
|
73 |
+
"""
|
74 |
+
Creates the blue team
|
75 |
+
"""
|
76 |
+
|
77 |
+
def __init__(self, number_of_drones: int = Settings.blues):
|
78 |
+
self.is_blue = True
|
79 |
+
self.drone_model = DroneModel(self.is_blue)
|
80 |
+
|
81 |
+
# initialise blue positions and speeds
|
82 |
+
positions = np.zeros((number_of_drones, 3))
|
83 |
+
speeds = np.zeros((number_of_drones, 3))
|
84 |
+
blue_speed = Settings.blue_speed_init * self.drone_model.max_speed
|
85 |
+
circle = index = 0
|
86 |
+
for d in range(number_of_drones):
|
87 |
+
positions[d] = np.array([Settings.blue_circles_rho[circle],
|
88 |
+
Settings.blue_circles_theta[circle] + index * 2 * np.pi / 3,
|
89 |
+
Settings.blue_circles_zed[circle]])
|
90 |
+
clockwise = 1 - 2 * (circle % 2)
|
91 |
+
speeds[d] = np.array([blue_speed, np.pi / 6 * clockwise, 0])
|
92 |
+
index += 1
|
93 |
+
if index == Settings.blues_per_circle[circle]:
|
94 |
+
index = 0
|
95 |
+
circle += 1
|
96 |
+
|
97 |
+
self.drones = [Drone(is_blue=True, position=position, speed=speed, id_=id_)
|
98 |
+
for (id_, position, speed) in zip(range(number_of_drones), positions, speeds)]
|
99 |
+
|
100 |
+
|
101 |
+
class RedTeam(Team):
|
102 |
+
"""
|
103 |
+
Creates the red team
|
104 |
+
"""
|
105 |
+
|
106 |
+
def __init__(self, number_of_drones: int = Settings.reds):
|
107 |
+
self.is_blue = False
|
108 |
+
self.drone_model = DroneModel(self.is_blue)
|
109 |
+
|
110 |
+
positions = np.zeros((number_of_drones, 3))
|
111 |
+
positions_noise = np.zeros((number_of_drones, 3))
|
112 |
+
speeds = np.zeros((number_of_drones, 3))
|
113 |
+
speed_rho = Settings.red_speed_init * self.drone_model.max_speed
|
114 |
+
squad = index = 0
|
115 |
+
for d in range(number_of_drones):
|
116 |
+
positions[d] = [Settings.red_squads_rho[squad],
|
117 |
+
Settings.red_squads_theta[squad],
|
118 |
+
Settings.red_squads_zed[squad]]
|
119 |
+
positions_noise[d] = [Settings.red_rho_noise[squad],
|
120 |
+
Settings.red_theta_noise[squad],
|
121 |
+
Settings.red_zed_noise[squad]]
|
122 |
+
speeds[d] = [speed_rho, np.pi + positions[d][1], 0]
|
123 |
+
speeds[d] = [speed_rho, np.pi + positions[d][1], 0]
|
124 |
+
index += 1
|
125 |
+
if index == Settings.red_squads[squad]:
|
126 |
+
index = 0
|
127 |
+
squad += 1
|
128 |
+
|
129 |
+
self.drones = [Drone(is_blue=False, position=position, position_noise=position_noise, speed=speed, id_=id_)
|
130 |
+
for (id_, position, position_noise, speed) in
|
131 |
+
zip(range(len(positions)), positions, positions_noise, speeds)]
|
team_wrap.py
ADDED
@@ -0,0 +1,107 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
import gym
|
3 |
+
from gym import spaces
|
4 |
+
|
5 |
+
from swarm_policy import SwarmPolicy
|
6 |
+
from settings import Settings
|
7 |
+
|
8 |
+
|
9 |
+
class TeamWrapper(gym.Wrapper):
|
10 |
+
"""
|
11 |
+
:param env: (gym.Env) Gym environment that will be wrapped
|
12 |
+
"""
|
13 |
+
|
14 |
+
def __init__(self, env, is_blue: bool = True, is_double: bool = False, is_unkillable: bool = Settings.is_unkillable):
|
15 |
+
|
16 |
+
self.is_blue = is_blue
|
17 |
+
self.is_double = is_double
|
18 |
+
self.is_unkillabe = is_unkillable
|
19 |
+
|
20 |
+
|
21 |
+
nb_blues, nb_reds = env.nb_blues, env.nb_reds
|
22 |
+
|
23 |
+
self.foe_action = None
|
24 |
+
self.foe_policy = SwarmPolicy(is_blue=not is_blue, blues=nb_blues, reds=nb_reds)
|
25 |
+
|
26 |
+
if is_double:
|
27 |
+
env.action_space = spaces.Tuple((
|
28 |
+
spaces.Box(low=-1, high=1, shape=(nb_blues*3,), dtype=np.float32),
|
29 |
+
spaces.Box(low=-1, high=1, shape=(nb_reds*3,), dtype=np.float32)
|
30 |
+
))
|
31 |
+
else:
|
32 |
+
nb_friends = nb_blues if is_blue else nb_reds
|
33 |
+
env.action_space = spaces.Box(low=-1, high=1, shape=(nb_friends*3,), dtype=np.float32)
|
34 |
+
|
35 |
+
flatten_dimension = 6 * nb_blues + 6 * nb_reds # the position and speeds for blue and red drones
|
36 |
+
flatten_dimension += (nb_blues * nb_reds) * (1 if is_unkillable else 2) # the fire matrices
|
37 |
+
|
38 |
+
env.observation_space = spaces.Box(low=-1, high=1, shape=(flatten_dimension,), dtype=np.float32)
|
39 |
+
|
40 |
+
super(TeamWrapper, self).__init__(env)
|
41 |
+
|
42 |
+
def reset(self):
|
43 |
+
"""
|
44 |
+
Reset the environment
|
45 |
+
"""
|
46 |
+
obs = self.env.reset()
|
47 |
+
obs = self.post_obs(obs)
|
48 |
+
|
49 |
+
return obs
|
50 |
+
|
51 |
+
def step(self, action):
|
52 |
+
"""
|
53 |
+
:param action: ([float] or int) Action taken by the agent
|
54 |
+
:return: (np.ndarray, float, bool, dict) observation, reward, is the episode over?, additional informations
|
55 |
+
"""
|
56 |
+
|
57 |
+
if self.is_double:
|
58 |
+
blue_action, red_action = action
|
59 |
+
blue_action = _decentralise(blue_action)
|
60 |
+
red_action = _decentralise(red_action)
|
61 |
+
action = _unflatten(blue_action), _unflatten(red_action)
|
62 |
+
else:
|
63 |
+
friend_action = _decentralise(action)
|
64 |
+
foe_action = _decentralise(self.foe_action)
|
65 |
+
if self.is_blue:
|
66 |
+
action = _unflatten(friend_action), _unflatten(foe_action)
|
67 |
+
else:
|
68 |
+
action = _unflatten(foe_action), _unflatten(friend_action)
|
69 |
+
|
70 |
+
obs, reward, done, info = self.env.step(action)
|
71 |
+
|
72 |
+
obs = self.post_obs(obs)
|
73 |
+
|
74 |
+
return obs, reward, done, info
|
75 |
+
|
76 |
+
def post_obs(self, obs):
|
77 |
+
|
78 |
+
if self.is_unkillabe:
|
79 |
+
o1, o2, o3, _ = obs
|
80 |
+
obs = o1, o2, o3
|
81 |
+
flatten_obs = _flatten(obs)
|
82 |
+
centralised_obs = _centralise(flatten_obs)
|
83 |
+
|
84 |
+
if not self.is_double:
|
85 |
+
self.foe_action = self.foe_policy.predict(centralised_obs)
|
86 |
+
|
87 |
+
return centralised_obs
|
88 |
+
|
89 |
+
|
90 |
+
def _unflatten(action):
|
91 |
+
return np.split(action, len(action)/3)
|
92 |
+
|
93 |
+
|
94 |
+
def _flatten(obs): # need normalisation too
|
95 |
+
fl_obs = [this_obs.flatten().astype('float32') for this_obs in obs]
|
96 |
+
fl_obs = np.hstack(fl_obs)
|
97 |
+
return fl_obs
|
98 |
+
|
99 |
+
|
100 |
+
def _centralise(obs): # [0,1] to [-1,1]
|
101 |
+
obs = 2 * obs - 1
|
102 |
+
return obs
|
103 |
+
|
104 |
+
|
105 |
+
def _decentralise(act): # [-1,1] to [0,1]
|
106 |
+
act = 0.5 * (act + 1)
|
107 |
+
return act
|
train.py
ADDED
@@ -0,0 +1,174 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
from stable_baselines3 import SAC
|
3 |
+
from stable_baselines3.sac.policies import MlpPolicy
|
4 |
+
from stable_baselines3.common.evaluation import evaluate_policy
|
5 |
+
from stable_baselines3.common.env_checker import check_env
|
6 |
+
import os
|
7 |
+
|
8 |
+
from monitor_wrap import MonitorWrapper
|
9 |
+
from filter_wrap import FilterWrapper
|
10 |
+
from distribution_wrap import DistriWrapper
|
11 |
+
from redux_wrap import ReduxWrapper
|
12 |
+
from symetry_wrap import SymetryWrapper
|
13 |
+
from rotate_wrap import RotateWrapper
|
14 |
+
from sort_wrap import SortWrapper
|
15 |
+
from team_wrap import TeamWrapper
|
16 |
+
from reward_wrap import RewardWrapper
|
17 |
+
|
18 |
+
from settings import Settings
|
19 |
+
from swarmenv import SwarmEnv
|
20 |
+
import param_
|
21 |
+
|
22 |
+
|
23 |
+
def bi_train(blue_model, red_model, blues: int = 1, reds: int = 1,
|
24 |
+
blue_dispersion: np.float32 = 1, red_dispersion: np.float32 = 1, total_timesteps: int = 1000):
|
25 |
+
# If needed create save dir
|
26 |
+
save_dir = "policies/" + Settings.policy_folder + f"/b{blues}r{reds}/"
|
27 |
+
save_last_dir = "policies/last" + f"/b{blues}r{reds}/"
|
28 |
+
os.makedirs(save_dir, exist_ok=True)
|
29 |
+
os.makedirs(save_last_dir, exist_ok=True)
|
30 |
+
|
31 |
+
# set the dispersion to initial drone positions
|
32 |
+
Settings.blue_distance_factor = blue_dispersion * Settings.blue_distance_factor
|
33 |
+
Settings.red_distance_factor = red_dispersion * Settings.red_distance_factor
|
34 |
+
Settings.red_theta_noise = red_dispersion * Settings.red_theta_noise
|
35 |
+
Settings.red_rho_noise = red_dispersion * Settings.red_rho_noise
|
36 |
+
|
37 |
+
# launch learning for red drones and then blue drones
|
38 |
+
red_model.learn(total_timesteps=total_timesteps)
|
39 |
+
mean_reward, std_reward = evaluate_policy(red_model, red_model.env, n_eval_episodes=10)
|
40 |
+
print(f"REDS b{blues}r{reds} disp_b:{10*blue_dispersion:2.0f} disp_r{10*red_dispersion:2.0f}: "
|
41 |
+
f"mean_reward:{mean_reward:.2f} +/- {std_reward:.2f}")
|
42 |
+
red_model.save(save_dir + f"reds_b{10 * blue_dispersion:2.0f}r{10 * red_dispersion:2.0f}")
|
43 |
+
red_model.save(save_last_dir + "reds_last")
|
44 |
+
|
45 |
+
blue_model.learn(total_timesteps=total_timesteps)
|
46 |
+
mean_reward, std_reward = evaluate_policy(blue_model, blue_model.env, n_eval_episodes=10)
|
47 |
+
print(f"BLUES b{blues}r{reds} disp_b:{10*blue_dispersion:2.0f} disp_r{10*red_dispersion:2.0f}: "
|
48 |
+
f"mean_reward:{mean_reward:.2f} +/- {std_reward:.2f}")
|
49 |
+
blue_model.save(save_dir + f"blues_{10 * blue_dispersion:2.0f}r{10 * red_dispersion:2.0f}")
|
50 |
+
blue_model.save(save_last_dir + "blues_last")
|
51 |
+
|
52 |
+
return blue_model, red_model
|
53 |
+
|
54 |
+
|
55 |
+
def meta_train(blues: int = 1, reds: int = 1,
|
56 |
+
max_dispersion: np.float32 = 3, iteration: int = 10,
|
57 |
+
total_timesteps: int = 100):
|
58 |
+
Settings.blues, Settings.reds = blues, reds
|
59 |
+
|
60 |
+
# launch the episode to get the data
|
61 |
+
steps = int(param_.DURATION / param_.STEP)
|
62 |
+
|
63 |
+
env = SortWrapper(
|
64 |
+
SymetryWrapper(
|
65 |
+
RotateWrapper(
|
66 |
+
ReduxWrapper(
|
67 |
+
DistriWrapper(
|
68 |
+
FilterWrapper(
|
69 |
+
MonitorWrapper(
|
70 |
+
SwarmEnv(blues=blues, reds=reds), steps, verbose=False)))))))
|
71 |
+
|
72 |
+
blue_env = RewardWrapper(TeamWrapper(env, is_blue=True), is_blue=True)
|
73 |
+
red_env = RewardWrapper(TeamWrapper(env, is_blue=False), is_blue=False)
|
74 |
+
|
75 |
+
blue_model = SAC(MlpPolicy, blue_env, verbose=0)
|
76 |
+
red_model = SAC(MlpPolicy, red_env, verbose=0)
|
77 |
+
|
78 |
+
for red_dispersion in np.linspace(0.1, max_dispersion, num=iteration):
|
79 |
+
for blue_dispersion in np.linspace(max_dispersion, 0.3, num=iteration):
|
80 |
+
blue_model, red_model = bi_train(
|
81 |
+
blue_model, red_model, blues=blues, reds=reds,
|
82 |
+
blue_dispersion=blue_dispersion, red_dispersion=red_dispersion,
|
83 |
+
total_timesteps=total_timesteps)
|
84 |
+
|
85 |
+
|
86 |
+
def super_meta_train(max_blues: int = 3, max_reds: int = 3, max_dispersion: np.float32 = 3,
|
87 |
+
iteration: int = 10, total_timesteps: int = 100, policy_folder: str = "default"):
|
88 |
+
Settings.policy_folder = policy_folder
|
89 |
+
for drones_nb in range(2, max_blues + max_reds + 1):
|
90 |
+
for blues in range(1, max_blues + 1):
|
91 |
+
reds = drones_nb - blues
|
92 |
+
if 1 <= reds <= max_reds:
|
93 |
+
print(f"reds :{reds}, blues: {blues}")
|
94 |
+
meta_train(blues=blues, reds=reds,
|
95 |
+
max_dispersion=max_dispersion, iteration=iteration, total_timesteps=total_timesteps)
|
96 |
+
|
97 |
+
|
98 |
+
def print_spaces(env, name: str):
|
99 |
+
print("++++++++++++")
|
100 |
+
print(name)
|
101 |
+
print(env.action_space)
|
102 |
+
print(env.observation_space)
|
103 |
+
print("============")
|
104 |
+
check_env(env, warn=True)
|
105 |
+
|
106 |
+
|
107 |
+
# super_meta_train(max_blues=1, max_reds=1, iteration=5, max_dispersion=1, total_timesteps=50000, policy_folder="0528_14")
|
108 |
+
# super_meta_train(max_blues=2, max_reds=2, iteration=4, max_dispersion=3, total_timesteps=10, policy_folder="0528_test")
|
109 |
+
|
110 |
+
|
111 |
+
def simple_red_train(max_dispersion: np.float32 = 3,
|
112 |
+
blues: int = 1, reds: int = 1,
|
113 |
+
iteration: int = 25, total_timesteps: int = 100,
|
114 |
+
policy_folder: str = "simple_red"):
|
115 |
+
Settings.policy_folder = policy_folder
|
116 |
+
print(f"Simple_red: reds :{reds}, blues: {blues}")
|
117 |
+
# If needed create save dir
|
118 |
+
save_dir = "policies/" + Settings.policy_folder + f"/b{blues}r{reds}/"
|
119 |
+
save_last_dir = "policies/last" + f"/b{blues}r{reds}/"
|
120 |
+
os.makedirs(save_dir, exist_ok=True)
|
121 |
+
os.makedirs(save_last_dir, exist_ok=True)
|
122 |
+
|
123 |
+
# launch the episode to get the data
|
124 |
+
steps = int(param_.DURATION / param_.STEP)
|
125 |
+
Settings.blues, Settings.reds = blues, reds
|
126 |
+
|
127 |
+
env = SortWrapper(
|
128 |
+
SymetryWrapper(
|
129 |
+
RotateWrapper(
|
130 |
+
ReduxWrapper(
|
131 |
+
DistriWrapper(
|
132 |
+
FilterWrapper(
|
133 |
+
MonitorWrapper(
|
134 |
+
SwarmEnv(blues=blues, reds=reds), steps, verbose=False)))))))
|
135 |
+
|
136 |
+
|
137 |
+
|
138 |
+
red_env = RewardWrapper(TeamWrapper(env, is_blue=False), is_blue=False)
|
139 |
+
red_model = SAC(MlpPolicy, red_env, verbose=1)
|
140 |
+
|
141 |
+
|
142 |
+
# set the dispersion to initial drone positions
|
143 |
+
Settings.blue_distance_factor = 10 * Settings.blue_distance_factor
|
144 |
+
|
145 |
+
this_iteration = 0
|
146 |
+
|
147 |
+
for red_dispersion in np.linspace(0.33, max_dispersion, num=iteration):
|
148 |
+
|
149 |
+
Settings.red_distance_factor = red_dispersion
|
150 |
+
|
151 |
+
# launch learning for red drones and then blue drones
|
152 |
+
this_iteration += 1
|
153 |
+
batch = 1
|
154 |
+
mean_reward = 0
|
155 |
+
delta_reward = 0
|
156 |
+
stability = 0
|
157 |
+
count = 0
|
158 |
+
while mean_reward < 9 or stability < 3 or count < 30:
|
159 |
+
count += 1
|
160 |
+
red_model.learn(total_timesteps=total_timesteps//10)
|
161 |
+
last_reward = mean_reward
|
162 |
+
mean_reward, std_reward = evaluate_policy(red_model, red_model.env, n_eval_episodes=100)
|
163 |
+
delta_reward = mean_reward - last_reward
|
164 |
+
if -0.1 <= delta_reward <= 0.1:
|
165 |
+
stability += 1
|
166 |
+
else:
|
167 |
+
stability = 0
|
168 |
+
print(f"REDS b{blues}r{reds} iteration{this_iteration} batch{batch}: "
|
169 |
+
f"mean_reward:{mean_reward:.2f} +/- {std_reward:.2f}")
|
170 |
+
red_model.save(save_dir + f"{this_iteration} batch{batch+1}")
|
171 |
+
red_model.save(save_last_dir + "reds_last")
|
172 |
+
batch += 1
|
173 |
+
|
174 |
+
simple_red_train(total_timesteps = 50000, policy_folder="simply_red")
|
utils.py
ADDED
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
works with all the transformations and calculation associated to position, speed, acceleration
|
3 |
+
"""
|
4 |
+
|
5 |
+
import numpy as np
|
6 |
+
|
7 |
+
|
8 |
+
def position_to_xyz(position: [float]) -> [float]:
|
9 |
+
"""
|
10 |
+
allows to get the 3D xyz coordinates from a polar representation
|
11 |
+
:param position: array (3,) with rho in meter, theta in rad, zed in meter
|
12 |
+
:return: float array (3,) with x, y, z in meter
|
13 |
+
"""
|
14 |
+
pos = position[0] * np.exp(1j * position[1])
|
15 |
+
return [np.real(pos), np.imag(pos), position[2]]
|
16 |
+
|
17 |
+
|
18 |
+
"""
|
19 |
+
def _test_position_to_norm():
|
20 |
+
assert position_to_norm([param.PERIMETER, 0, 100]) == [1, 0, 1]
|
21 |
+
assert position_to_norm([0, -np.pi / 2, 0]) == [0, 0.75, 0]
|
22 |
+
assert position_to_norm([0, np.pi / 2, 0]) == [0, 0.25, 0]
|
23 |
+
"""
|
24 |
+
|
25 |
+
|
26 |
+
def is_in_the_cone(position1: [float], position2: [float], vector2: [float], angle: float) -> bool:
|
27 |
+
"""
|
28 |
+
checks if the point @ position 2 is in the cone from position 1 with an angle of angle
|
29 |
+
:param position1: in x, y, z
|
30 |
+
:param position2: in x, y, z
|
31 |
+
:param vector2: in x, y, z
|
32 |
+
:param angle: in rad
|
33 |
+
:return:
|
34 |
+
"""
|
35 |
+
vector1 = np.array(position2, dtype=float) - np.array(position1)
|
36 |
+
vector1 /= np.linalg.norm(vector1)
|
37 |
+
vector2 = np.array(vector2, dtype=float)
|
38 |
+
vector2 /= np.linalg.norm(vector2)
|
39 |
+
cos_theta = np.dot(vector1, vector2)
|
40 |
+
if 0 < cos_theta:
|
41 |
+
theta = np.arcsin(np.sqrt(1 - cos_theta ** 2))
|
42 |
+
return theta < angle
|
43 |
+
return False
|
44 |
+
|
45 |
+
|
46 |
+
def _test_is_in_the_cone():
|
47 |
+
assert is_in_the_cone([0, 0, 0], [1, 0.1, 0], [1, 0, 0], np.pi / 5)
|
48 |
+
assert is_in_the_cone([0, 0, 0], [1, 0.1, 0], [0, 1, 0], np.pi / 5)
|
49 |
+
pass
|
50 |
+
|
51 |
+
|
52 |
+
def rhotheta_to_latlon(rho: float, theta: float, lat_tg: float, lon_tg: float) -> [float, float]:
|
53 |
+
"""
|
54 |
+
transforms polar coordinates into lat, lon
|
55 |
+
:param rho:
|
56 |
+
:param theta:
|
57 |
+
:param lat_tg: latitude de la target (0,0)
|
58 |
+
:param lon_tg: longitude de la target (0,0)
|
59 |
+
:return:
|
60 |
+
"""
|
61 |
+
z = rho * np.exp(1j * theta)
|
62 |
+
lat = np.imag(z) * 360 / (40075 * 1000) + lat_tg
|
63 |
+
lon = np.real(z) * 360 / (40075 * 1000 * np.cos(np.pi / 180 * lat)) + lon_tg
|
64 |
+
return lat, lon
|
65 |
+
|