Spaces:

Hexamind
/

swarms

Sleeping

App Files Files Community

YvesP commited on Jun 2, 2023

Commit

a162e39

1 Parent(s): 8012bbf

initial load

Browse files

Files changed (30) hide show

Makefile +12 -0
README.md +38 -13
app.py +175 -0
app.yaml +9 -0
bluetraj.py +81 -0
distribution_wrap.py +92 -0
drone.py +326 -0
dronemodel.py +103 -0
filter_wrap.py +95 -0
monitor_wrap.py +119 -0
param_.py +122 -0
playground.py +92 -0
policies/_last/_b1r1/blues_last.zip +3 -0
policies/_last/_b1r1/reds_last.zip +3 -0
procfile.txt +1 -0
redux_wrap.py +80 -0
requirements.txt +3 -0
reward_wrap.py +86 -0
rotate_wrap.py +93 -0
runner.py +16 -0
settings.py +82 -0
setup.sh +12 -0
sort_wrap.py +98 -0
swarm_policy.py +342 -0
swarmenv.py +100 -0
symetry_wrap.py +95 -0
team.py +131 -0
team_wrap.py +107 -0
train.py +174 -0
utils.py +65 -0

Makefile ADDED Viewed

	@@ -0,0 +1,12 @@

+.PHONY: run run-container gcloud-deploy
+run:
+	@streamlit run app.py --server.port=8080 --server.address=0.0.0.0
+run-container:
+	@docker build . -t hexamind-swarms
+	@docker run -p 8080:8080 hexamind-swarms
+gcloud-deploy:
+	@gcloud app deploy app.yaml

README.md CHANGED Viewed

@@ -1,13 +1,38 @@
----
-title: Swarms
-emoji: 👁
-colorFrom: indigo
-colorTo: purple
-sdk: streamlit
-sdk_version: 1.21.0
-app_file: app.py
-pinned: false
-license: bsd-3-clause-clear
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

+# swarms2
+**red swarms against blue swarms**
+reds attack a target defended by blues
+reds and blues may kill their opponent
+the more drones reach the goal, the better for the reds and the worst for the blues
+please install the requirements in requirement.txt
+code has been tested on MAC OS with Python3.9
+Pillow may have some difficulties in its installation. A possibility is to try falling back to Python3.7
+All files are flat.
+two files contain the starting points :
+train.py allows for training
+app.py launches the streamlit app
+**train.py**
+the training is done iteratively by starting with one blue drone and one red drone, increasing distance progressively and adding drones.
+Learned policies are stored in /policies/
+For each configuration depending on the number of blue and red drones, there is a folder containing the red and blue policies
+ex /policies/b3r4 contains the red and blue policies for configurations with 3 blue and 4 red drones
+There are two policies depending on the color : one for blues and one for reds. Several intermediate savings are done. the last one being __last.
+The function to launch for the training is :
+super_meta_train(max_blues=8, max_reds=8, iteration=10, max_dispersion=3, total_timesteps=10000)
+which programs a training from 1,1 to 8,8 drones, with a distance mutltiplier of 3 and 10 iterations, and a total timestep at each learning of 10000 steps
+(1 step = 1 second)
+**show.py**
+once the agents are trained, the drones can be simulated and visible through a streamlit interface.
+the command to launch the visualisation is :
+streamlit run show.py
+**tuning the rewards**
+The rewards may be tuned in the _param.py file and logic is in the team_wrap.py file in the 'evaluate_situation' function.
+When is_double is true, means that there is no learning: simulation is carried out with already defined policies. Only the final outcome is to be considered.
+Otherwise, two cases have to be taken into account, whether blue or red is learning# streamlit-to-heroku-tutorial

app.py ADDED Viewed

	@@ -0,0 +1,175 @@

+import time
+import pandas as pd
+import pydeck as pdk
+import streamlit as st
+from filter_wrap import FilterWrapper
+from distribution_wrap import DistriWrapper
+from redux_wrap import ReduxWrapper
+from symetry_wrap import SymetryWrapper
+from rotate_wrap import RotateWrapper
+from sort_wrap import SortWrapper
+from team_wrap import TeamWrapper
+from reward_wrap import RewardWrapper
+from monitor_wrap import MonitorWrapper
+from runner import run_episode
+from settings import Settings, define_
+import param_
+from swarmenv import SwarmEnv
+def run(with_streamlit=True, blues: int = 4, reds: int = 6, policy_folder: str = 'reds_last'):
+    # define the policy folder is: where the trained policies are to be found
+    Settings.policy_folder = policy_folder
+    # define settings with Streamlit (or use default parameters)
+    blues, reds = define_(with_streamlit=with_streamlit, blues=blues, reds=reds)
+    # put in place the map
+    deck_map, initial_view_state = pre_show(with_streamlit=with_streamlit)
+    # launch the episode to get the data
+    steps = int(param_.DURATION / param_.STEP)
+    monitor_env = MonitorWrapper(SwarmEnv(blues=blues, reds=reds), steps)
+    env = FilterWrapper(monitor_env)
+    env = DistriWrapper(env)
+    env = ReduxWrapper(env)
+    env = SortWrapper(
+            SymetryWrapper(
+                RotateWrapper(env)))
+    env = RewardWrapper(TeamWrapper(env, is_double=True), is_double=True)
+    obs = env.reset()
+    run_episode(env, obs, blues=blues, reds=reds)
+    print('done')
+    # display the data with Streamlit
+    if with_streamlit:
+        show(monitor_env, deck_map, initial_view_state)
+def pre_show(with_streamlit=True):
+    if with_streamlit:
+        deck_map = st.empty()
+        pitch = st.slider('pitch', 0, 100, 50)
+        lat, lon = Settings.latlon
+        initial_view_state = pdk.ViewState(
+            latitude=lat,
+            longitude=lon,
+            zoom=13,
+            pitch=pitch
+        )
+        return deck_map, initial_view_state
+    else:
+        return 0, 0
+def show(monitor_env, deck_map, initial_view_state):
+    blue_df, red_df, fire_df, blue_path_df, red_path_df = monitor_env.get_df()
+    step_max = monitor_env.step_
+    for step in range(step_max):
+        deck_map.pydeck_chart(pdk.Deck(
+            map_provider="mapbox",
+            map_style='mapbox://styles/mapbox/light-v9',
+            initial_view_state=initial_view_state,
+            layers=get_layers(blue_df,
+                              red_df,
+                              blue_path_df,
+                              red_path_df,
+                              step)
+        ))
+        time.sleep(param_.STEP*param_.SIMU_SPEED)
+def get_layers(df_blue: pd.DataFrame, df_red: pd.DataFrame,
+               df_blue_path: [pd.DataFrame], df_red_path: [pd.DataFrame],
+               step) -> [pdk.Layer]:
+    lat, lon = Settings.latlon
+    df_target = pd.DataFrame({'lat': [lat], 'lon': [lon]})
+    layers_ = get_target_layers(df_target)
+    for (df, dfp, b) in [(df_blue, df_blue_path, True), (df_red, df_red_path, False)]:
+        layers_.append(get_current_drone_layers(df, step))
+        nb_drones = df['d_index'].max() + 1
+        for drone_index in range(nb_drones):
+            layers_.append(get_path_layers(dfp[drone_index], step))
+    return layers_
+def get_target_layers(df_target) -> [pdk.Layer]:
+    return [
+        # this is the GROUNDZONE
+        pdk.Layer(
+            'ScatterplotLayer',
+            data=df_target,
+            get_position='[lon, lat]',
+            get_color='[0, 120, 0]',
+            get_radius=Settings.groundzone,
+            get_line_width=50,
+            lineWidthMinPixels=2,
+            stroked=True,
+            filled=False,
+        ),
+        pdk.Layer(
+            'ScatterplotLayer',
+            data=df_target,
+            get_position='[lon, lat]',
+            get_color='[0, 0, 200]',
+            get_radius=30,
+        ),
+    ]
+def get_current_drone_layers(df_drone: pd.DataFrame, step: int) -> [pdk.Layer]:
+    df_current = df_drone[df_drone.step == step]
+    return [
+        pdk.Layer(
+            'ScatterplotLayer',
+            data=df_current,
+            get_position='[lon, lat, zed]',
+            get_color='color',
+            get_radius=50,
+        ),
+        pdk.Layer(
+            'ScatterplotLayer',
+            data=df_current,
+            get_position='[lon, lat]',
+            get_color=[50, 50, 50, 50],
+            get_radius=50,
+        ),
+    ]
+def get_path_layers(df_path: pd.DataFrame, step: int) -> [pdk.Layer]:
+    df_current = df_path[df_path.step == step]
+    return [
+        pdk.Layer(
+            type="PathLayer",
+            data=df_current,
+            pickable=True,
+            get_color="color",
+            width_scale=10,
+            width_min_pixels=1,
+            get_path="path",
+            get_width=1,
+        )
+    ]
+# and ... do not forget
+run(with_streamlit=True, policy_folder='last')
+# run(blues=1, reds=3, with_streamlit=False, policy_folder='0527_14_test')

app.yaml ADDED Viewed

	@@ -0,0 +1,9 @@

+runtime: custom
+env: flex
+manual_scaling:
+  instances: 1
+resources:
+  cpu: 1
+  memory_gb: 3
+  disk_size_gb: 20

bluetraj.py ADDED Viewed

	@@ -0,0 +1,81 @@

+import numpy as np
+import param_
+from drone import Drone
+def calculate_target(blue_drone: Drone, red_drone: Drone) -> np.ndarray(3, ):
+    '''
+    :param blue_drone:
+    :param red_drone:
+    :return:
+    '''
+    def transform(pos, delta, theta):
+        pos[0] -= delta
+        pos[1] -= theta
+        return pos[0] * np.exp(1j * pos[1])
+    def untransform_to_array(pos, delta, theta):
+        pos[0] += delta
+        pos[1] += theta
+        return pos
+    theta = red_drone.position[1]
+    delta = param_.GROUNDZONE
+    z_blue = transform(blue_drone.position, delta, theta)
+    z_red = np.real(transform(red_drone.position, delta, theta))
+    v_blue = blue_drone.drone_model.max_speed
+    v_red = red_drone.drone_model.max_speed
+    blue_shooting_distance = blue_drone.drone_model.distance_to_neutralisation
+    blue_time_to_zero = (np.abs(z_blue) - blue_shooting_distance) / v_blue
+    red_time_to_zero = z_red / v_red
+    if red_time_to_zero <= param_.STEP or red_time_to_zero < blue_time_to_zero + param_.STEP:
+        return np.zeros(3), red_time_to_zero
+    else:
+        max_target = z_red
+        min_target = 0
+        while True:
+            target = (max_target + min_target) / 2
+            blue_time_to_target = max(0, (np.abs(z_blue - target) - blue_shooting_distance) / v_blue)
+            red_time_to_target = np.abs(z_red - target) / v_red
+            if red_time_to_target - param_.STEP < blue_time_to_target <= red_time_to_target:
+                target = untransform_to_array((target / z_red) * red_drone.position, delta, theta)
+                return target, blue_time_to_target
+            if red_time_to_target < blue_time_to_target:
+                max_target = target
+                min_target = min_target
+            else:  # blue_  time_to_target  <= red_time_to_target -1:
+                max_target = max_target
+                min_target = target
+def unitary_test(rho_blue: float, theta_blue: float, rho_red: float, theta_red: float):
+    blue_drone = Drone()
+    blue_drone.position = np.array([rho_blue, theta_blue, 100])
+    red_drone = Drone(is_blue=False)
+    red_drone.position = np.array([rho_red, theta_red, 100])
+    tg, time = calculate_target(blue_drone, red_drone)
+    print('rho_blue : ', rho_blue, ' theta_blue : ', theta_blue, ' rho_red : ', rho_red, ' theta_red : ', theta_red,
+          ' tg : ', tg, ' time : ', time)
+    return tg, time
+def test():
+    for rho_blue in [1000]:
+        for theta_blue in np.pi * np.array([-1, 0.75, 0.5, 0.25, 0]):
+            for rho_red in [1000]:
+                for theta_red in np.pi * np.array([0, 1/4]):
+                    unitary_test(rho_blue=rho_blue, theta_blue=theta_blue, rho_red=rho_red, theta_red=theta_red)
+    print('done')

distribution_wrap.py ADDED Viewed

	@@ -0,0 +1,92 @@

+import gym
+from gym import spaces
+import numpy as np
+from runner import run_episode
+from redux_wrap import ReduxWrapper
+from rotate_wrap import RotateWrapper
+from symetry_wrap import SymetryWrapper
+from sort_wrap import SortWrapper
+from team_wrap import TeamWrapper
+from reward_wrap import RewardWrapper
+class DistriWrapper(gym.Wrapper):
+    """
+    :param env: (gym.Env) Gym environment that will be wrapped
+    """
+    def __init__(self, env):
+        self.blue_deads = self.red_deads = 0
+        self.nb_blues, self.nb_reds = env.nb_blues, env.nb_reds
+        env.observation_space = spaces.Tuple((
+            spaces.Box(low=0, high=1, shape=(self.nb_blues, 6), dtype=np.float32),
+            spaces.Box(low=0, high=1, shape=(self.nb_reds, 6), dtype=np.float32),
+            spaces.Box(low=0, high=1, shape=(self.nb_blues, self.nb_reds), dtype=np.float32),
+            spaces.Box(low=0, high=1, shape=(self.nb_reds, self.nb_blues), dtype=np.float32)))
+        env.action_space = spaces.Tuple((
+            spaces.Box(low=0, high=1, shape=(self.nb_blues, 3), dtype=np.float32),
+            spaces.Box(low=0, high=1, shape=(self.nb_reds, 3), dtype=np.float32)))
+        # Call the parent constructor, so we can access self.env later
+        super(DistriWrapper, self).__init__(env)
+    def reset(self):
+        """
+        Reset the environment
+        """
+        obs = self.env.reset()
+        blue_obs, red_obs, blues_fire, reds_fire, blue_deads, red_deads = obs
+        self.blue_deads, self.blue_deads = blue_deads, red_deads
+        return blue_obs, red_obs, blues_fire, reds_fire
+    def step(self, action):
+        """
+        :param action: ([float] or int) Action taken by the agent
+        :return: (np.ndarray, float, bool, dict) observation, reward, is the episode over?, additional informations
+        """
+        obs, reward, done, info = self.env.step(action)
+        blue_obs, red_obs, blues_fire, reds_fire, blue_deads, red_deads = obs
+        obs = blue_obs, red_obs, blues_fire, reds_fire
+        if done:  # environment decision (eg drones oob)
+            return obs, reward, True, info
+        if red_deads == len(red_obs):  # no more reds to fight (it could mean that they have all reached the target)
+            return obs, reward, True, info
+        if blue_deads == len(blue_obs):  # reds have won
+            return obs, reward, True, info
+        # do we have new deaths?
+        new_blue_deads = blue_deads - self.blue_deads
+        new_red_deads = red_deads - self.red_deads
+        self.blue_deads, self.red_deads = blue_deads, red_deads
+        if 0 < new_red_deads + new_blue_deads:  # we have someone killed but we still have some fight
+            blues, reds = self.nb_blues - blue_deads, self.nb_reds - red_deads
+            env = ReduxWrapper(self,  minus_blue=blue_deads, minus_red=red_deads)
+            obs_ = env.post_obs(obs)
+            env = RotateWrapper(env)
+            obs_ = env.post_obs(obs_)
+            env = SymetryWrapper(env)
+            obs_ = env.post_obs(obs_)
+            env = SortWrapper(env)
+            obs_ = env.post_obs(obs_)
+            env = RewardWrapper(TeamWrapper(env, is_double=True), is_double=True)
+            obs_ = env.post_obs(obs_)
+            _, reward, done, info = run_episode(env, obs_, blues=blues, reds=reds)
+        return obs, reward, done, info

drone.py ADDED Viewed

	@@ -0,0 +1,326 @@

+from dataclasses import dataclass
+import numpy as np
+from dronemodel import DroneModel
+import param_ as param_
+from settings import Settings
+@dataclass
+class Drone:
+    """
+    Creates a drone (it is either red or blue / foe or friend
+    """
+    is_blue: bool = True
+    position: np.ndarray((3,)) = np.zeros((3,))
+    position_noise: np.ndarray((3,)) = np.zeros((3,))
+    drone_model: DroneModel = None
+    max_speeds: np.ndarray((3,)) = None
+    min_speeds: np.ndarray((3,)) = None
+    init_position: np.ndarray((3,)) = None
+    init_speed: np.ndarray((3,)) = None
+    color = np.ndarray((3,))
+    is_alive: bool = True
+    is_fired: int = 0
+    fires = 0
+    step_ = 0
+    id_: int = -1
+    ttl: float = param_.DURATION  # ttl = Time To Live expressed in seconds
+    speed: np.ndarray((3,)) = np.zeros((3,))
+    min_positions = np.zeros((3,))
+    max_positions = np.array([Settings.perimeter, 2*np.pi, Settings.perimeter_z])
+    def __post_init__(self):
+        self.drone_model = DroneModel(self.is_blue)
+        self.max_speeds = np.array([self.drone_model.max_speed,
+                                    2*np.pi,
+                                    self.drone_model.max_up_speed])
+        self.min_speeds = np.array([0,
+                                    0,
+                                    -self.drone_model.max_down_speed])
+        self.init_position = np.copy(self.position)
+        self.init_position_noise = np.copy(self.position_noise)
+        self.init_speed = np.copy(self.speed)
+        self.color = param_.BLUE_COLOR if self.is_blue else param_.RED_COLOR
+        self.ttl = (self.position[0] / self.max_speeds[0]) * param_.TTL_RATIO + param_.TTL_MIN
+    def reset(self):
+        self.is_alive = True
+        self.speed = self.init_speed
+        self.color = param_.BLUE_COLOR if self.is_blue else param_.RED_COLOR
+        distance_factor = Settings.blue_distance_factor if self.is_blue else Settings.red_distance_factor
+        self.position[0] = self.init_position[0]*distance_factor
+        self.position[2] = self.init_position[2]*distance_factor
+        self.position_noise *= distance_factor
+        self.position[0] = np.clip(self.position[0] + np.random.rand() * self.position_noise[0],
+                                   param_.GROUNDZONE * 1.1,
+                                   param_.PERIMETER * 0.9)
+        self.position[1] = self.position[1] + np.random.rand() * self.position_noise[1]
+        self.position[2] = np.clip(self.position[2] + np.random.rand() * self.position_noise[2],
+                                   param_.GROUNDZONE * 1.1,
+                                   param_.PERIMETER_Z * 0.9)
+        self.ttl = (self.position[0] / self.max_speeds[0]) * param_.TTL_RATIO + param_.TTL_MIN
+    def step(self, action):
+        self.step_ = self.step_ + 1  # for debug purposes
+        reward = 0
+        info = {'ttl': param_.DURATION}
+        if self.is_alive:  # if the drone is dead, it no longer moves :)
+            pos_xyz, speed_xyz = self.to_xyz(self.position), self.to_xyz(self.speed)
+            pos_s, speed_s = \
+                self.drone_model.get_trajectory(pos_xyz, speed_xyz, action, np.linspace(0, param_.STEP, 10))
+            pos, speed = pos_s.T[-1], speed_s.T[-1]
+            self.position, self.speed = self.from_xyz(pos), self.from_xyz(speed)
+            self.ttl -= param_.STEP
+            info['ttl'] = self.ttl
+            # evaluate the distance compared to the greedy action
+            if self.is_blue:
+                '''
+                for further usage
+                straight_action, time_to_catch = self.simple_blue()
+                tolerance = 0.05 if 4 < time_to_catch else 1 if 2 < time_to_catch else 3
+                distance = 1 if tolerance < np.linalg.norm(straight_action - action) else 0
+                '''
+                distance = 1
+            else:
+                straight_action = self.simple_red()
+                distance = 1 if 0.1 < np.linalg.norm(straight_action - action) else 0
+            info['distance_to_straight_action'] = distance
+            if self._hits_target():
+                info['hits_target'] = 1
+                reward = -param_.TARGET_HIT_COST
+                self.color = param_.RED_SUCCESS_COLOR
+                self.is_alive = False  # the red has done its job ...
+            if self._out_of_bounds():
+                coef = -1 if self.is_blue else 1
+                reward = coef * param_.OOB_COST
+                self.is_alive = False
+                info['oob'] = 1
+        obs = self.get_observation()
+        done = not self.is_alive
+        return obs, reward, done, info
+    def _out_of_bounds(self):
+        return not (0 < self.position[2] < Settings.perimeter_z and self.position[0] < Settings.perimeter)
+    def _hits_target(self):
+        if self.is_blue:
+            return False
+        else:
+            distance_to_zero = np.sqrt(self.position[0]**2 + self.position[2]**2)
+            return distance_to_zero < Settings.groundzone
+    def fires_(self, foe) -> bool:
+        """
+        checks if the foe drone is hit by self
+        :param foe: a foe drone
+        :return: True= yes, got you
+        """
+        # deads don't kill nor die
+        if not (self.is_alive and foe.is_alive):
+            return False
+        # lets see if foe is in the "fire cone"
+        pos_xyz = - self.to_xyz(self.position) + self.to_xyz(foe.position)
+        distance = np.linalg.norm(pos_xyz)
+        pos_xyz /= distance
+        if distance < self.drone_model.distance_to_neutralisation:
+            return self.is_in_the_cone(foe)
+        return False
+    def is_in_the_cone(self, foe) -> bool:
+        '''
+        verifies if foe is in the cone (without any regard to distance)
+        :param foe:
+        :return:
+        '''
+        pos_xyz = - self.to_xyz(self.position) + self.to_xyz(foe.position)
+        pos_xyz /= np.linalg.norm(pos_xyz)
+        vit_xyz = self.to_xyz(self.speed)
+        vit_xyz /= np.linalg.norm(vit_xyz)
+        cos_theta = np.dot(pos_xyz, vit_xyz)
+        in_the_cone = False
+        if 0 < cos_theta:
+            theta = np.arccos(cos_theta)
+            in_the_cone = theta < self.drone_model.angle_to_neutralisation
+        return in_the_cone
+    # tell the drones that they are dead
+    def is_killed(self, is_blue=True):
+        self.is_alive = False
+        self.position[2] = 0
+        self.color = param_.BLUE_DEAD_COLOR if is_blue else param_.RED_DEAD_COLOR
+    def to_xyz(self, rho_theta_z: np.ndarray(shape=(3,))) -> np.ndarray(shape=(3,)):
+        """
+        allows to get the 3D xyz coordinates from a polar representation
+        :param rho_theta_z: array (3,) with rho in meter, theta in rad, zed in meter for positions, /s for speeds, etc.
+        :return: float array (3,) with x, y, z in meter, /s for speeds, etc.
+        """
+        xy_ = rho_theta_z[0] * np.exp(1j * rho_theta_z[1])
+        return np.array([np.real(xy_), np.imag(xy_), rho_theta_z[2]])
+    def from_xyz(self, xyz: np.ndarray(shape=(3,))) -> np.ndarray(shape=(3,)):
+        """
+        """
+        z_complex = xyz[0] + 1j*xyz[1]
+        rho = np.abs(z_complex)
+        theta = np.angle(z_complex)
+        return np.array([rho, theta, xyz[2]], dtype='float32')
+    def to_norm(self,
+                rho_theta_z: np.ndarray(shape=(3,)),
+                max_vector: np.ndarray(shape=(3,)),
+                min_vector: np.ndarray(shape=(3,)) = np.array([0, 0, 0]))\
+            -> np.ndarray(shape=(3,), dtype='float32'):
+        """
+        normalises the position/speed in order to have all space in a [0;1]**3 space
+        :return: rho, theta, zed in a [0;1]**3 space
+        """
+        rho = rho_theta_z[0] / max_vector[0]
+        theta = (rho_theta_z[1] / (2 * np.pi)) % 1
+        zed = (rho_theta_z[2] - min_vector[2]) / (max_vector[2] - min_vector[2])
+        return np.array([rho, theta, zed], dtype='float32')
+    def from_norm(self,
+                  norm: np.ndarray(shape=(3,)),
+                  max_vector: np.ndarray(shape=(3,)),
+                  min_vector: np.ndarray(shape=(3,)) = np.array([0, 0, 0]))\
+            -> np.ndarray(shape=(3,), dtype='float32'):
+        """
+        denormalises and renders into cylindric coordinates
+        :param norm:
+        :param max_vector:
+        :param min_vector:
+        :return:
+        """
+        rho = norm[0] * max_vector[0]
+        theta = norm[1] * 2*np.pi
+        zed = norm[2] * (max_vector[2] - min_vector[2]) + min_vector[2]
+        return np.array([rho, theta, zed], dtype='float32')
+    def to_lat_lon_zed(self, lat, lon):
+        z = self.position[0] * np.exp(1j * self.position[1])
+        lat = np.imag(z) * 360 / (40075 * 1000) + lat
+        lon = np.real(z) * 360 / (40075 * 1000 * np.cos(np.pi / 180 * lat)) + lon
+        return lat, lon, self.position[2]
+    def distance(self, other_drone=None):
+        if other_drone:
+            distance = np.sqrt(np.abs(self.position[0] * np.exp(1j * self.position[1]) -
+                                      other_drone.position[0] * np.exp(1j * other_drone.position[1])) ** 2 +
+                               (self.position[2] - other_drone.position[2]) ** 2)
+        else:
+            distance = np.sqrt((self.position[0] ** 2) + self.position[2] ** 2)
+        return distance
+    def get_observation(self):  # -> np.array(shape=(6,), dtype='float32'):
+        """
+        get normalised and transformed position and speed
+        :return:
+        """
+        # calculates transformed normalised position
+        normalised_position = self.to_norm(self.position, self.max_positions, self.min_positions)
+        # calculates transformed normalised speed
+        normalised_speed = self.to_norm(self.speed, self.max_speeds, self.min_speeds)
+        return np.append(normalised_position, normalised_speed)
+    def simple_red(self, target: np.ndarray(3,)=np.zeros(3), z_margin: float=50) -> np.ndarray(shape=(3,)):
+        '''
+        defines the actions for a trajectory targeting zero
+        :return:
+        '''
+        self_z = self.position[0] * np.exp(1j * self.position[1])
+        target_z = target[0] * np.exp(1j * target[1])
+        direction = np.zeros(3)
+        direction[0] = np.abs(self_z - target_z)
+        direction[1] = np.angle(self_z - target_z)
+        direction[2] = self.position[2] - target[2] - z_margin
+        theta = (direction[1] + np.pi) / (2*np.pi) % 1
+        # slope of drone given its position
+        tan_phi = np.sign(direction[2]) * np.inf if direction[0] == 0 else direction[2]/direction[0]
+        # slope of drone speed
+        tan_phi_point = np.sign(self.speed[2]) * np.inf if self.speed[0] == 0 else self.speed[2]/self.speed[0]
+        # slope of forces
+        f_ratio = self.drone_model.Fxy / self.drone_model.Fz_minus
+        # go up if speed slope is too steep and vertical speed < 0 else take the position angle for forces angle
+        psy = -np.arctan(tan_phi * f_ratio) / np.pi + 0.5
+        action = np.array([1, theta, psy])
+        return action
+    def simple_target(self, target: (np.ndarray(shape=(3,)))) -> np.ndarray(shape=(3,)):
+        '''
+        defines the actions for a trajectory targeting ... the given target
+        :return:
+        '''
+        self_z = self.position[0] * np.exp(1j * self.position[1])
+        target_z = target[0] * np.exp(1j * target[1])
+        direction = np.zeros(3)
+        direction[0] = np.abs(self_z - target_z)
+        direction[1] = np.angle(self_z - target_z)
+        direction[2] = self.position[2] - target[2]
+        theta = (direction[1] + np.pi) / (2*np.pi) % 1
+        # slope of drone given its position
+        tan_phi = np.sign(direction[2]) * np.inf if direction[0] == 0 else direction[2]/direction[0]
+        # slope of drone speed
+        tan_phi_point = np.sign(self.speed[2]) * np.inf if self.speed[0] == 0 else self.speed[2]/self.speed[0]
+        # slope of forces
+        f_ratio = self.drone_model.Fxy / self.drone_model.Fz_minus
+        # go up if speed slope is too steep and vertical speed < 0 else take the position angle for forces angle
+        psy = 0.5 if tan_phi_point < -tan_phi else -np.arctan(tan_phi * f_ratio) / np.pi + 0.5
+        if Settings.perimeter_z / 2 < direction[2]:
+            psy = min(0.2, psy)
+        if self.position[0] < 1.5 * Settings.groundzone:
+            psy = min(0.2, psy)
+        action = np.array([1, theta, psy])
+        return action
+    def next_simple_pos(self):
+        next_pos = np.zeros(3)
+        simple_next = self.pos[0] * np.exp(1j * self.pos[1]) + self.speed[0] * np.exp(1j * self.speed[1]) * param_.step
+        next_pos[0] = np.abs(simple_next)
+        next_pos[1] = np.arg(simple_next)
+        next_pos[2] = self.pos[2] + self.speed[2] * param_.step
+        return next_pos
+    def copy_pos_speed(self, drone_to_copy):
+        self.position = drone_to_copy.position
+        self.speed = drone_to_copy.speed

dronemodel.py ADDED Viewed

	@@ -0,0 +1,103 @@

+from dataclasses import dataclass
+from scipy.integrate import odeint
+import numpy as np
+import param_
+@dataclass
+class DroneModel:
+    """
+    Creates a drone_model of a drone
+    """
+    def __init__(self, is_blue):
+        self.drone_model = param_.DRONE_MODELS[param_.DRONE_MODEL[is_blue]]
+        self.angle_to_neutralisation = self.drone_model['angle_to_neutralisation']
+        self.distance_to_neutralisation = self.drone_model['distance_to_neutralisation']
+        self.duration_to_neutralisation = self.drone_model['duration_to_neutralisation']
+        self.Cxy = self.drone_model['Cxy']
+        self.Cz = self.drone_model['Cz']
+        self.mass = self.drone_model['mass']
+        self.Fxy_ratio = self.drone_model['Fxy_ratio']
+        self.Fz_min_ratio = self.drone_model['Fz_min_ratio']
+        self.Fz_max_ratio = self.drone_model['Fz_max_ratio']
+        self.weight_eq = self.mass * param_.g * (1 - self.Fz_min_ratio)
+        self.Fz_plus = (self.Fz_max_ratio - 1) * self.mass * param_.g
+        self.Fz_minus = (1 - self.Fz_min_ratio) * self.mass * param_.g
+        self.Fxy = self.mass * param_.g * self.Fxy_ratio
+        self.max_speed = np.sqrt(self.Fxy / self.Cxy)
+        self.max_up_speed = np.sqrt(self.Fz_plus / self.Cz)
+        self.max_down_speed = np.sqrt(self.Fz_minus / self.Cz)
+        self.max_rot_speed = 2 * np.pi
+    def get_trajectory(self, pos_xyz, speed_xyz, action: np.ndarray(3,), time_: np.ndarray(1,)) -> np.ndarray(3,):
+        '''
+        returns next position given the current position, speed and applied forces
+        :param pos_xyz:
+        :param speed_xyz:
+        :param action:
+        :param time_:
+        :return:
+        '''
+        rho = action[0]  # in 0, 1
+        theta = 2*np.pi * action[1]  # in 0, 2pi
+        psy = np.pi * (action[2] - 0.5)  # in -pi/2, pi/2
+        fx = rho * np.cos(theta) * np.cos(psy) * self.Fxy
+        fy = rho * np.sin(theta) * np.cos(psy) * self.Fxy
+        fz = rho * np.sin(psy) * (self.Fz_plus if 0 < psy else self.Fz_minus)
+        pos_speed = np.hstack((pos_xyz, speed_xyz))
+        result_ = odeint(
+            lambda u, v: self.drone_dynamics(u, v, fx, fy, fz, self.Cxy, self.Cz, self.mass),
+            pos_speed,
+            time_,
+            Dfun=lambda u, v: self.fulljac(u, v, self.Cxy, self.Cz, self.mass)
+        )
+        x, y, z, dx, dy, dz = result_.T
+        return np.array([x, y, z], dtype='float32'), np.array([dx, dy, dz], dtype='float32')
+    def drone_dynamics(self, pos_speed, time_, f_x, f_y, f_z, Cxy, Cz, m):
+        x, y, z, dx, dy, dz = pos_speed
+        return [dx,
+                dy,
+                dz,
+                1/m * (f_x - Cxy * dx * np.sqrt(dx**2 + dy**2 + dz**2)),
+                1/m * (f_y - Cxy * dy * np.sqrt(dx**2 + dy**2 + dz**2)),
+                1/m * (f_z - Cz * dz * np.sqrt(dx**2 + dy**2 + dz**2))]
+    def fulljac(self, pos_speed, time_, Cxy, Cz, m) -> np.ndarray((6, 6), ):
+        '''
+        returns the Jacobian of the differential equation of the trajectory
+        :param pos_speed:
+        :param time_:
+        :param Cxy:
+        :param Cz:
+        :param m:
+        :return:
+        '''
+        x, y, z, dx, dy, dz = pos_speed
+        J = np.zeros((6, 6))
+        J[0, 3] = 1
+        J[1, 4] = 1
+        J[2, 5] = 1
+        J[3, 3] = -Cxy/m * ((np.sqrt(dx**2 + dy**2 + dz**2)) + dx**2 / np.sqrt(dx**2 + dy**2 + dz**2))
+        J[3, 4] = -Cxy/m * (dx * dy / np.sqrt(dx**2 + dy**2 + dz**2))
+        J[3, 5] = -Cxy/m * (dx * dz / np.sqrt(dx**2 + dy**2 + dz**2))
+        J[4, 4] = -Cxy/m * ((np.sqrt(dx**2 + dy**2 + dz**2)) + dy**2 / np.sqrt(dx**2 + dy**2 + dz**2))
+        J[4, 3] = -Cxy/m * (dy * dx / np.sqrt(dx**2 + dy**2 + dz**2))
+        J[4, 5] = -Cxy/m * (dy * dz / np.sqrt(dx**2 + dy**2 + dz**2))
+        J[5, 5] = -Cz/m * ((np.sqrt(dx**2 + dy**2 + dz**2)) + dz**2 / np.sqrt(dx**2 + dy**2 + dz**2))
+        J[5, 3] = -Cz/m * (dz * dx / np.sqrt(dx**2 + dy**2 + dz**2))
+        J[5, 4] = -Cz/m * (dz * dy / np.sqrt(dx**2 + dy**2 + dz**2))
+        return J

filter_wrap.py ADDED Viewed

	@@ -0,0 +1,95 @@

+import numpy as np
+from gym import spaces, Wrapper
+class FilterWrapper(Wrapper):
+    """
+    :param env: (gym.Env) Gym environment that will be wrapped
+    """
+    def __init__(self, env):
+        self.nb_blues, self.nb_reds = env.nb_blues, env.nb_reds
+        self.blue_deads = np.full((self.nb_blues,), False)
+        self.red_deads = np.full((self.nb_reds,), False)
+        env.observation_space = spaces.Tuple((
+            spaces.Box(low=0, high=1, shape=(self.nb_blues, 6), dtype=np.float32),
+            spaces.Box(low=0, high=1, shape=(self.nb_reds, 6), dtype=np.float32),
+            spaces.Box(low=0, high=1, shape=(self.nb_blues, self.nb_reds), dtype=np.float32),
+            spaces.Box(low=0, high=1, shape=(self.nb_reds, self.nb_blues), dtype=np.float32),
+            spaces.Discrete(1),
+            spaces.Discrete(1)))
+        super(FilterWrapper, self).__init__(env)
+    def reset(self):
+        """
+        Reset the environment
+        """
+        obs = self.env.reset()
+        return self._sort_obs(obs)
+    def step(self, action):
+        """
+        :param action: ([float] or int) Action taken by the agent
+        :return: (np.ndarray, float, bool, dict) observation, reward, is the episode over?, additional informations
+        """
+        blue_action, red_action = action
+        new_ba = []
+        index = 0
+        for count, alive in enumerate(~self.blue_deads):
+            if alive:
+                new_ba.append(blue_action[index])
+                index += 1
+            else:
+                new_ba.append(np.array([0, 0, 0]))
+        blue_action = new_ba
+        new_ra = []
+        index = 0
+        for count, alive in enumerate(~self.red_deads):
+            if alive:
+                new_ra.append(red_action[index])
+                index += 1
+            else:
+                new_ra.append(np.array([0, 0, 0]))
+        red_action = new_ra
+        action = blue_action, red_action
+        obs, reward, done, info = self.env.step(action)
+        obs = self._sort_obs(obs)
+        return obs, reward, done, info
+    def _sort_obs(self, obs):
+        blue_obs, red_obs, blues_fire, reds_fire, blue_deads, red_deads = obs
+        self.blue_deads = blue_deads
+        self.red_deads = red_deads
+        blue_obs = np.vstack((blue_obs[~self.blue_deads], blue_obs[self.blue_deads]))
+        red_obs = np.vstack((red_obs[~self.red_deads], red_obs[self.red_deads]))
+        blues_fire = self.fire_sort(self.blue_deads, self.red_deads, blues_fire)
+        reds_fire = self.fire_sort(self.red_deads, self.blue_deads, reds_fire)
+        sort_obs = blue_obs, red_obs, blues_fire, reds_fire, sum(blue_deads), sum(red_deads)
+        return sort_obs
+    def fire_sort(self, dead_friends, dead_foes, friends_fire):
+        friends_fire_big = np.zeros_like(friends_fire)
+        friends_fire = np.compress(~dead_friends, friends_fire, axis=0)
+        friends_fire = np.compress(~dead_foes, friends_fire, axis=1)
+        friends_fire_big[:friends_fire.shape[0], :friends_fire.shape[1]] = friends_fire
+        return friends_fire_big

monitor_wrap.py ADDED Viewed

	@@ -0,0 +1,119 @@

+from dataclasses import make_dataclass
+import numpy as np
+from gym import Wrapper
+import pandas as pd
+import param_
+from settings import Settings
+Path = make_dataclass("Path", [('path', list), ('step', int), ('d_index', int), ('color', list)])
+class MonitorWrapper(Wrapper):
+    """
+    :param env: (gym.Env) Gym environment that will be wrapped
+    """
+    def __init__(self, env, steps, verbose=True):
+        # Call the parent constructor, so we can access self.env later
+        super(MonitorWrapper, self).__init__(env)
+        self.verbose = verbose
+        self.blue_data = []
+        self.red_data = []
+        self.fire_paths = []
+        lat, lon = Settings.latlon
+        self.lat_tg = lat
+        self.lon_tg = lon
+        self.steps = steps
+        self.step_ = 0
+        self.step_max = 0
+    def reset(self):
+        """
+        Reset the environment
+        """
+        obs = self.env.reset()
+        self.blue_data = []
+        self.red_data = []
+        self.fire_paths = []
+        self.step_ = 0
+        self.step_max = 0
+        return obs
+    def step(self, action):
+        """
+        :param action: ([float] or int) Action taken by the agent
+        :return: (np.ndarray, float, bool, dict) observation, reward, is the episode over?, additional informations
+        """
+        obs, reward, done, info = self.env.step(action)
+        if self.verbose:
+            self.monitor_state()
+        self.step_ += 1
+        if self.step_ == self.steps:
+            done = True
+        return obs, reward, done, info
+    def monitor_state(self):
+        env = self.env
+        lat_tg, lon_tg = self.lat_tg, self.lon_tg
+        for d_index, drone in enumerate(env.blue_team.drones):
+            lat, lon, zed = drone.to_lat_lon_zed(lat_tg, lon_tg)
+            self.blue_data.append([self.step_, True, drone.id_, lat, lon, zed, drone.color])
+        for d_index, drone in enumerate(env.red_team.drones):
+            lat, lon, zed = drone.to_lat_lon_zed(lat_tg, lon_tg)
+            self.red_data.append([self.step_, False, drone.id_, lat, lon, zed, drone.color])
+        for blue_id, red_id in np.argwhere(0 < env.playground.blues_have_fired_reds):
+            b_lat, b_lon, b_zed = env.blue_team.drones[blue_id].to_lat_lon_zed(lat_tg, lon_tg)
+            r_lat, r_lon, r_zed = env.red_team.drones[red_id].to_lat_lon_zed(lat_tg, lon_tg)
+            self.fire_paths.append(Path(step=self.step_,
+                                   path=[[b_lat, b_lon, b_zed], [r_lat, r_lon, r_zed]],
+                                   color=param_.GREEN_COLOR,
+                                   d_index=blue_id))
+        for red_id, blue_id in np.argwhere(0 < env.playground.reds_have_fired_blues):
+            b_lat, b_lon, b_zed = env.blue_team.drones[blue_id].to_lat_lon_zed(lat_tg, lon_tg)
+            r_lat, r_lon, r_zed = env.red_team.drones[red_id].to_lat_lon_zed(lat_tg, lon_tg)
+            self.fire_paths.append(Path(step=self.step_,
+                                   path=[[b_lat, b_lon, b_zed], [r_lat, r_lon, r_zed]],
+                                   color=param_.BLACK_COLOR,
+                                   d_index=red_id))
+    def get_df(self):
+        fire_df = pd.DataFrame(self.fire_paths)
+        df_columns = ['step', 'isBlue', 'd_index', 'lat', 'lon', 'zed', 'color']
+        blue_df = pd.DataFrame(self.blue_data, columns=df_columns)
+        red_df = pd.DataFrame(self.red_data, columns=df_columns)
+        blue_path_df = []
+        red_path_df = []
+        for d_index in range(self.env.nb_blues):
+            blue_path_df.append(self._get_path_df(blue_df, d_index, color=param_.BLUE_COLOR))
+        for d_index in range(self.env.nb_reds):
+            red_path_df.append(self._get_path_df(red_df, d_index, color=param_.RED_COLOR))
+        return blue_df, red_df, fire_df, blue_path_df, red_path_df
+    def _get_path_df(self, drone_df: pd.DataFrame, d_index: int, color: int = param_.BLUE_COLOR) -> pd.DataFrame:
+        traj_length = param_.TRAJ_LENGTH
+        path_total = drone_df[['lon', 'lat', 'zed', 'step']][drone_df.d_index == d_index].values.tolist()
+        path = ([Path(path_total[:step+1], step, d_index, color) if step < traj_length
+                 else Path(path_total[step - traj_length:step+1], step, d_index, color)
+                 for step in range(len(path_total))])
+        path_df = pd.DataFrame(path, columns=['path', 'step', 'd_index', 'color'])
+        return path_df

param_.py ADDED Viewed

	@@ -0,0 +1,122 @@

+import numpy as np
+STEP = 1  # seconds per time step
+DURATION = 200  # seconds
+POLICY_FOLDER = 'default_policies'
+STEP_COST = 0.01
+OOB_COST = 0.8  # Out Of Bound : when the drone is below 0 or above a PERIMETER_Z
+RED_SHOT_REWARD = 10  # when a red drone is shot
+TARGET_HIT_COST = 10  # when a red drone hits the target
+THREAT_WEIGHT = 0  # when reds are close to the target (* function of the red distance)
+STRAIGHT_ACTION_COST = 0.04  # when reds do not follow the shortest path
+TTL_COST = 0.7  # when a red is still alive after its TTL: it is a failure for both blues and reds
+TTL_RATIO = 2  # margin for red drones to get to the target if they went full speed
+TTL_MIN = 4  # at least to succeed the mission : ttl = TTL_MIN + vmax * TTL_RATIO
+ELEVATION_SCALE = 1
+TRAJ_LENGTH = 6
+SIMU_SPEED = 0.2
+"""
+the playground parameters
+"""
+PERIMETER = 5000
+PERIMETER_Z = 600
+# PERIMETER of the ground zone to defend
+GROUNDZONE = 100
+# position in LATLON
+LATLON = {'Paris':
+              {'lat': 48.865879, 'lon': 2.319827},
+          'Fonsorbes':
+              {'lat': 43.54, 'lon': 1.25},
+          'San Francisco':
+              {'lat': 37.7737283, 'lon': -122.4342383},
+          'Puilaurens':
+              {'lat': 42.803943093860894, 'lon': 2.299540897567384},
+          }
+"""
+the Team Parameters
+"""
+# blue team init
+BLUES = 12
+BLUES_PER_CIRCLE = [3, 3, 4, 4, 4, 4]
+BLUE_CIRCLES_RHO = [500, 900, 1400, 1600, 2000, 2500]
+BLUE_CIRCLES_THETA = [0, -np.pi/3, -np.pi, -np.pi/2, 0, np.pi/3]
+BLUE_CIRCLES_ZED = [200, 250, 250, 100, 250, 100]
+BLUE_DISTANCE_FACTOR = 1
+BLUE_IS_UNKILLABLE = True
+BLUE_SPEED_INIT = 1  # in ratio to max_speed
+BLUE_COLOR = [0, 0, 150, 120]
+BLUE_DEAD_COLOR = [20, 20, 60]
+# red team init
+REDS = 12
+RED_SQUADS = [1, 1, 1, 1, 1, 15]
+RED_SQUADS_RHO = [1000, 700, 1000, 1200, 1500, 2000]
+RED_SQUADS_THETA = np.pi * np.array([0, 1/4, -1/4, -1/2, 1/2, 0])
+RED_SQUADS_ZED = [200, 200, 100, 250, 200, 100]
+RED_DISTANCE_FACTOR = 1
+RED_RHO_NOISE = [60, 60, 100, 200, 200, 300]
+RED_THETA_NOISE = np.pi * np.array([1/5, 1/2, 1, 1, 1, 1])
+RED_ZED_NOISE = [60, 50, 10, 10, 50, 60]
+RED_SPEED_INIT = 0.2  # in ratio to max_speed
+RED_COLOR = [150, 0, 0, 120]
+RED_DEAD_COLOR = [120, 50, 30]
+RED_SUCCESS_COLOR = [200, 200, 0]
+BLACK_COLOR = [0, 0, 0]
+GREEN_COLOR = [0, 255, 255]
+"""
+the Drone Parameters
+"""
+g = 9.81
+DRONE_MODEL = ['beta', 'alpha']  # blue = DRONE_MODEl[1]
+DRONE_MODELS = {
+    'alpha': {
+          'angle_to_neutralisation': np.pi / 4,  # in rad
+          'distance_to_neutralisation': 250,  # in m
+          'duration_to_neutralisation': 2,  # in s
+          'Cxy': 0.2,  # horizontal air resistance  = Cxy * v^2
+          'Cz': 0.7,  # vertical air resistance
+          'mass': 50,  # kg
+          'Fz_min_ratio': 0.6,  # how much weight is compensated (below 1 => drone goes down)
+          'Fz_max_ratio': 1.4,  # how much weight is compensated (>1 => drone goes up)
+          'Fxy_ratio': 1,  # Force xy relative to weight
+    },
+     'beta': {
+          'angle_to_neutralisation': np.pi / 4,
+          'distance_to_neutralisation': 250,
+          'duration_to_neutralisation': np.inf,
+          'Cxy': 0.3,  # horizontal air resistance : link to speed max by the relation Fxy_max = Cxy * Speedxy_max
+          'Cz': 0.8,  # vertical air resistance : link to speed max by the relation Fz_max = Cz * Speedz_max
+          'mass': 40,  # kg
+          'Fz_min_ratio': 0.5,  # how much weight is compensated (below 1 => drone goes down)
+          'Fz_max_ratio': 1.8,  # how much weight is compensated (>1 => drone goes up)
+          'Fxy_ratio': 0.6,  # Force xy relative to weight
+     },
+}

playground.py ADDED Viewed

	@@ -0,0 +1,92 @@

+import numpy as np
+from dataclasses import dataclass
+import param_
+from settings import Settings
+from drone import Drone
+@dataclass
+class Playground:
+    """
+    This is a cylindrical 3D-env where blue drones defend a central zone from the attack of red drones
+    the playground manages also the interactions between foe-drones such as the firing
+    """
+    perimeter = Settings.perimeter
+    perimeter_z = Settings.perimeter_z
+    groundzone = Settings.groundzone
+    env: object
+    blue_drones: [Drone]
+    red_drones: [Drone]
+    def __post_init__(self):
+        # creates the fire matrices
+        self.blues_have_fired_reds = np.zeros(shape=(len(self.blue_drones),
+                                                     len(self.red_drones)), dtype=int)
+        self.reds_have_fired_blues = np.zeros(shape=(len(self.red_drones),
+                                                     len(self.blue_drones)), dtype=int)
+        # how long the drone needs to have the other in target
+        self.blue_shots_to_kill = param_.DRONE_MODELS[param_.DRONE_MODEL[True]]['duration_to_neutralisation']
+        self.red_shots_to_kill = param_.DRONE_MODELS[param_.DRONE_MODEL[False]]['duration_to_neutralisation']
+        self.blue_shots_to_kill //= param_.STEP
+        self.red_shots_to_kill //= param_.STEP
+        # how far can a drone shoot
+        self.distance_blue_shot = param_.DRONE_MODELS[param_.DRONE_MODEL[True]]['distance_to_neutralisation']
+        self.distance_red_shot = param_.DRONE_MODELS[param_.DRONE_MODEL[False]]['distance_to_neutralisation']
+    def reset(self):
+        self.blues_have_fired_reds[...] = 0
+        self.reds_have_fired_blues[...] = 0
+    def get_observation(self):
+        return self.blues_have_fired_reds / self.blue_shots_to_kill, \
+               self.reds_have_fired_blues / self.red_shots_to_kill
+    def step(self):
+        """
+        determines who has fired who, and who is dead in the end
+        :return: Tuple with list of Blue and Reds dead. (if a blue or a red is dead, the sequence is over)
+        """
+        # gets who has fired who in this step
+        blues_fire_reds = np.array([[blue.fires_(red) for red in self.red_drones] for blue in self.blue_drones])
+        reds_fire_blues = np.array([[red.fires_(blue) for blue in self.blue_drones] for red in self.red_drones])
+        # if the foe is no longer seen, the count restarts from 0
+        self.blues_have_fired_reds *= blues_fire_reds
+        self.reds_have_fired_blues *= reds_fire_blues
+        # and the count is incremented for the others
+        self.blues_have_fired_reds += blues_fire_reds
+        self.reds_have_fired_blues += reds_fire_blues
+        # np magic : first find the list of duos shooter/shot, keep the shots (only once)
+        red_deads = np.unique(np.argwhere(self.blues_have_fired_reds >= self.blue_shots_to_kill).T[1])
+        blue_deads = np.unique(np.argwhere(self.reds_have_fired_blues >= self.red_shots_to_kill).T[1])
+        # tell the drones that they are dead
+        for drone_id in blue_deads:
+            self.blue_drones[drone_id].is_killed(is_blue=True)
+        for drone_id in red_deads:
+            self.red_drones[drone_id].is_killed(is_blue=False)
+        # consider only living drones
+        blue_drones = [drone for drone in self.blue_drones if drone.is_alive]
+        red_drones = [drone for drone in self.red_drones if drone.is_alive]
+        bf_obs, rf_obs = self.get_observation()
+        bf_reward = rf_reward = 0
+        remaining_blues, remaining_reds = len(blue_drones), len(red_drones),
+        blue_shots, red_shots = len(blue_deads), len(red_deads)
+        if blue_shots + red_shots > 0:
+            print('someone is killed: {0} blues and {1} reds'.format(blue_shots, red_shots))
+        return bf_obs, bf_reward, remaining_blues, blue_shots, rf_obs, rf_reward, remaining_reds, red_shots

policies/_last/_b1r1/blues_last.zip ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3d64d434b3ea20f669d130b3871196920c9efe6263625b050dfc1841e141e89c
+size 3146628

policies/_last/_b1r1/reds_last.zip ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:03cc75c12317da0fada0de5c5e161855c87ec5fab05e435f6bbf4c08bc07aa13
+size 3148185

procfile.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ web: sh setup.sh && streamlit run app.py

redux_wrap.py ADDED Viewed

	@@ -0,0 +1,80 @@

+import gym
+from gym import spaces
+import numpy as np
+from settings import Settings
+class ReduxWrapper(gym.Wrapper):
+    """
+    :param env: (gym.Env) Gym environment that will be wrapped
+    """
+    def __init__(self, env, minus_blue=0, minus_red=0):
+        # action space is reduced
+        nb_blues, nb_reds = Settings.blues, Settings.reds
+        self.nb_blues = nb_blues - minus_blue
+        self.nb_reds = nb_reds - minus_red
+        self.blue_deads = minus_blue
+        self.red_deads = minus_red
+        env.observation_space = spaces.Tuple((
+            spaces.Box(low=0, high=1, shape=(self.nb_blues, 6), dtype=np.float32),
+            spaces.Box(low=0, high=1, shape=(self.nb_reds, 6), dtype=np.float32),
+            spaces.Box(low=0, high=1, shape=(self.nb_blues, self.nb_reds), dtype=np.float32),
+            spaces.Box(low=0, high=1, shape=(self.nb_reds, self.nb_blues), dtype=np.float32)))
+        env.action_space = spaces.Tuple((
+            spaces.Box(low=0, high=1, shape=(self.nb_blues, 3), dtype=np.float32),
+            spaces.Box(low=0, high=1, shape=(self.nb_reds, 3), dtype=np.float32)))
+        super(ReduxWrapper, self).__init__(env)
+    def reset(self):
+        """
+        Reset the environment
+        """
+        obs = self.env.reset()
+        obs = self.post_obs(obs)
+        return obs
+    def step(self, action):
+        # action needs expansion
+        blue_action, red_action = action
+        if self.blue_deads:
+            blue_action = np.vstack((blue_action, np.zeros((self.blue_deads, 3))))
+        if self.red_deads:
+            red_action = np.vstack((red_action, np.zeros((self.red_deads, 3))))
+        action = blue_action, red_action
+        obs, reward, done, info = self.env.step(action)
+        obs = self.post_obs(obs)
+        return obs, reward, done, info
+    def post_obs(self, obs):
+        # obs needs reduction
+        blue_obs, red_obs, blues_fire, reds_fire = obs
+        if not self.blue_deads:
+            pass
+        else:
+            blue_obs = blue_obs[:-self.blue_deads]
+            blues_fire = blues_fire[:-self.blue_deads]
+            reds_fire = reds_fire[:, :-self.blue_deads]
+        if not self.red_deads:
+            pass
+        else:
+            red_obs = red_obs[:-self.red_deads]
+            reds_fire = reds_fire[:-self.red_deads]
+            blues_fire = blues_fire[:, :-self.red_deads]
+        return blue_obs, red_obs, blues_fire, reds_fire

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+pandas~=2.0.2
+numpy~=1.24.3
+gym~=0.26.2

reward_wrap.py ADDED Viewed

	@@ -0,0 +1,86 @@

+import gym
+import param_
+from settings import Settings
+class RewardWrapper(gym.Wrapper):
+    """
+    :param env: (gym.Env) Gym environment that will be wrapped
+    """
+    def __init__(self, env, is_blue: bool = True, is_double: bool = False):
+        self.is_blue = is_blue
+        self.is_double = is_double
+        super(RewardWrapper, self).__init__(env)
+    def reset(self):
+        """
+        Reset the environment
+        """
+        obs = self.env.reset()
+        return obs
+    def step(self, action):
+        """
+        :param action: ([float] or int) Action taken by the agent
+        :return: (np.ndarray, float, bool, dict) observation, reward, is the episode over?, additional informations
+        """
+        obs, reward, done, info = self.env.step(action)
+        reward, done, info = self.situation_evaluation(info)
+        return obs, reward, done, info
+    def situation_evaluation(self, info):
+        if self.is_double:
+            if info['remaining blues'] * info['remaining reds'] == 0:
+                return 0, True, info
+            else:
+                return 0, False, info
+        else:
+            if self.is_blue:
+                if info['remaining reds'] == 0:
+                    return param_.WIN_REWARD, True, info
+                if info['remaining blues'] == 0:
+                    return -param_.WIN_REWARD, True, info
+                if 0 < info['blue_oob']:
+                    return -param_.OOB_COST, True, info
+                if info['ttl'] < 0:
+                    return -param_.TTL_COST, True, info  # blues have been too long to shoot the red drone
+                # else continues
+                reward = -param_.STEP_COST
+                reward -= info['weighted_red_distance'] * param_.THREAT_WEIGHT
+                reward -= info['hits_target'] * param_.TARGET_HIT_COST
+                reward += info['red_shots'] * param_.RED_SHOT_REWARD
+                reward += info['distance_to_straight_action'] * param_.STRAIGHT_ACTION_COST
+                return reward, False, info
+            else:  # red is learning
+                done = False
+                reward = -param_.STEP_COST
+                reward += info['weighted_red_distance'] * param_.THREAT_WEIGHT
+                reward += info['hits_target'] * param_.TARGET_HIT_COST
+                reward -= info['red_shots'] * param_.RED_SHOT_REWARD
+                reward -= info['distance_to_straight_action'] * param_.STRAIGHT_ACTION_COST
+                if info['remaining reds'] == 0:
+                    done = True
+                    return reward, done, info
+                if info['remaining blues'] == 0:
+                    done = True
+                    reward += info['remaining reds'] * param_.TARGET_HIT_COST
+                    return reward, done, info
+                if 0 < info['red_oob']:
+                    done = True
+                    reward -= param_.OOB_COST
+                if info['ttl'] < 0:
+                    done = True
+                    reward -= param_.TTL_COST * info['remaining reds']  # reds have been too long to hit the target
+                # else continues
+                return reward, done, info

rotate_wrap.py ADDED Viewed

	@@ -0,0 +1,93 @@

+import numpy as np
+import gym
+from drone import Drone
+class RotateWrapper(gym.Wrapper):
+    """
+    :param env: (gym.Env) Gym environment that will be wrapped
+    """
+    def __init__(self, env):
+        # Call the parent constructor, so we can access self.env later
+        super(RotateWrapper, self).__init__(env)
+        self.angle = 0
+    def reset(self):
+        """
+        Reset the environment
+        """
+        obs = self.env.reset()
+        obs = self.post_obs(obs)
+        return obs
+    def step(self, action):
+        """
+        :param action: ([float] or int) Action taken by the agent
+        :return: (np.ndarray, float, bool, dict) observation, reward, is the episode over?, additional informations
+        """
+        action = self.rotate_action(action)
+        obs, reward, done, info = self.env.step(action)
+        obs = self.post_obs(obs)
+        return obs, reward, done, info
+    def post_obs(self, obs):
+        self.angle = self.get_angle(obs)
+        return self.rotate_obs(obs)
+    def get_angle(self, obs: np.ndarray) -> float:
+        blue_obs, red_obs, blue_fire, red_fire = obs
+        sigma = 0
+        for this_obs in (blue_obs, red_obs):
+            for d in this_obs:
+                sigma += d[0] * np.exp(1j * d[1])
+        angle = np.angle(sigma)
+        return angle
+    def rotate_obs(self, obs):
+        blue_obs, red_obs, blue_fire, red_fire = obs
+        rotated_blue_obs = []
+        rotated_red_obs = []
+        for this_obs, is_blue, rotated_obs in zip((blue_obs, red_obs),
+                                                  (True, False),
+                                                  (rotated_blue_obs, rotated_red_obs)):
+            drone = Drone(is_blue=is_blue)
+            for d in this_obs:
+                d_meter = np.zeros(6,)
+                # get the pos and speed in cylindrical coordinated in meters
+                d_meter[:3] = drone.from_norm(d[:3], drone.max_positions, drone.min_positions)
+                d_meter[3:6] = drone.from_norm(d[3:6], drone.max_speeds, drone.min_speeds)
+                # rotate
+                d_meter[1] -= self.angle
+                d_meter[4] -= self.angle
+                # back to norm
+                d[:3] = drone.to_norm(d_meter[:3], drone.max_positions, drone.min_positions)
+                d[3:6] = drone.to_norm(d_meter[3:6], drone.max_speeds, drone.min_speeds)
+                rotated_obs.append(d)
+            del drone
+        return np.array(rotated_blue_obs), np.array(rotated_red_obs), blue_fire, red_fire
+    def rotate_action(self, action):
+        blue_action, red_action = action
+        blue_action = np.array(list(map(lambda x: [x[0], (x[1]+self.angle/2/np.pi) % 1, x[2]], blue_action)))
+        red_action = np.array(list(map(lambda x: [x[0], (x[1]+self.angle/2/np.pi) % 1, x[2]], red_action)))
+        action = blue_action, red_action
+        return action

runner.py ADDED Viewed

	@@ -0,0 +1,16 @@

+from swarm_policy import SwarmPolicy
+def run_episode(env, obs, blues: int, reds: int):
+    blue_policy = SwarmPolicy(blues=blues, reds=reds, is_blue=True)
+    red_policy = SwarmPolicy(blues=blues, reds=reds, is_blue=False)
+    sum_reward = 0
+    done = False
+    while not done:
+        action = blue_policy.predict(obs), red_policy.predict(obs)
+        obs, reward, done, info = env.step(action)
+        sum_reward += reward
+    return obs, sum_reward, done, info

settings.py ADDED Viewed

	@@ -0,0 +1,82 @@

+from dataclasses import dataclass
+import param_
+import streamlit as st
+import numpy as np
+@dataclass
+class Settings:
+    perimeter: int = param_.PERIMETER
+    perimeter_z: int = param_.PERIMETER_Z
+    groundzone: int = param_.GROUNDZONE
+    latlon = param_.LATLON['Paris']['lat'], param_.LATLON['Paris']['lon']
+    blues: int = param_.BLUES
+    blues_per_circle = np.array(param_.BLUES_PER_CIRCLE)
+    blue_circles_rho = param_.BLUE_CIRCLES_RHO
+    blue_circles_theta = param_.BLUE_CIRCLES_THETA
+    blue_circles_zed = param_.BLUE_CIRCLES_ZED
+    blue_distance_factor: float = param_.BLUE_DISTANCE_FACTOR
+    is_unkillable: bool = param_.BLUE_IS_UNKILLABLE
+    blue_speed_init: int = param_.BLUE_SPEED_INIT
+    reds: int = param_.REDS
+    red_squads = param_.RED_SQUADS
+    red_squads_rho = np.array(param_.RED_SQUADS_RHO)
+    red_squads_theta = param_.RED_SQUADS_THETA
+    red_squads_zed = param_.RED_SQUADS_ZED
+    red_distance_factor: float = param_.RED_DISTANCE_FACTOR
+    red_rho_noise = np.array(param_.RED_RHO_NOISE)
+    red_theta_noise = np.array(param_.RED_THETA_NOISE)
+    red_zed_noise = np.array(param_.RED_ZED_NOISE)
+    red_speed_init: int = param_.RED_SPEED_INIT
+    policy_folder: str = param_.POLICY_FOLDER
+def define_(with_streamlit: bool = True, blues: int = Settings.blues, reds: int = Settings.reds):
+    """"
+        shows the blue and red swarms in Streamlit
+        :return:
+        """
+    blues = blues
+    reds = reds
+    if with_streamlit:
+        st.title('Blues against Reds by hexamind.ai')
+        st.write('controlled by Reinforcement Learning.')
+        st.text('<- Set parameters')
+        st.sidebar.subheader("Define the battlefield")
+        blues = st.sidebar.slider("how many blues on defense?", 1, 20, 6)
+        Settings.blues = blues
+        blue_dispersion = st.sidebar.slider("set the average blue dispersion", 0.3, 1.0, 0.8)
+        Settings.reds = reds
+        reds = st.sidebar.slider("how many reds are on the attack?", 1, 20, 6)
+        Settings.reds = reds
+        red_dispersion = st.sidebar.slider("set the average red dispersion", 0.3, 1.0, 0.7)
+        Settings.blue_distance_factor = 3 * blue_dispersion
+        Settings.red_distance_factor = 3 * red_dispersion
+        location = st.sidebar.radio("Location", ['Paris', 'Puilaurens', 'San Francisco'])
+        lat_tg = param_.LATLON[location]['lat']
+        lon_tg = param_.LATLON[location]['lon']
+        Settings.latlon = lat_tg, lon_tg
+        st.sidebar.write(
+            'you probably need more drones '
+            'No worries, we have plenty at www.hexamind.ai ')
+    return blues, reds

setup.sh ADDED Viewed

	@@ -0,0 +1,12 @@

+mkdir -p ~/.streamlit/
+echo "\
+[server]\n\git init
+git add README.md
+git commit -m "first commit"
+git branch -M master
+git remote add origin
+headless = true\n\
+port = $PORT\n\
+enableCORS = false\n\
+\n\
+" > ~/.streamlit/config.toml

sort_wrap.py ADDED Viewed

	@@ -0,0 +1,98 @@

+import numpy as np
+import gym
+class SortWrapper(gym.Wrapper):
+    """
+    :param env: (gym.Env) Gym environment that will be wrapped
+    """
+    def __init__(self, env):
+        # Call the parent constructor, so we can access self.env later
+        super(SortWrapper, self).__init__(env)
+        self.blue_signature = None
+        self.red_signature = None
+    def reset(self):
+        """
+        Reset the environment
+        """
+        obs = self.env.reset()
+        obs = self.sort_obs(obs)
+        return obs
+    def step(self, action):
+        """
+        :param action: ([float] or int) Action taken by the agent
+        :return: (np.ndarray, float, bool, dict) observation, reward, is the episode over?, additional informations
+        """
+        action = self.unsort_action(action)
+        obs, reward, done, info = self.env.step(action)
+        obs = self.post_obs(obs)
+        return obs, reward, done, info
+    def post_obs(self, obs):
+        return self.sort_obs(obs)
+    def sort_obs(self, obs):
+        blue_obs, red_obs, blue_fire, red_fire = obs
+        blue_obs, self.blue_signature = self.sort_and_sign(blue_obs)
+        red_obs, self.red_signature = self.sort_and_sign(red_obs)
+        blue_fire = self.unsort_matrix_with_signatures(blue_fire, self.blue_signature, self.red_signature)
+        red_fire = self.unsort_matrix_with_signatures(red_fire, self.red_signature, self.blue_signature)
+        obs = blue_obs, red_obs, blue_fire, red_fire
+        return obs
+    def unsort_action(self, action):
+        blue_action, red_action = action
+        unsorted_blue_action = self.unsort_with_signature(blue_action, self.blue_signature)
+        unsorted_red_action = self.unsort_with_signature(red_action, self.red_signature)
+        action = unsorted_blue_action, unsorted_red_action
+        return action
+    def sort_and_sign(self, an_array: np.ndarray) -> [np.ndarray, []]:
+        """
+        allows to sort an ndarray of 6 columns of floats and to keep the "signature": the positions of the items
+        before being sorted in order to retrieve the initial order after modifications of the arrays.
+        the order is retrieved with the unsort_with_signature function
+        :param an_array:
+        :return:
+        """
+        zip_list = zip(an_array, range(len(an_array)))
+        zip_sorted = sorted(zip_list, key=lambda x: (x[0][0], x[0][1], x[0][2], x[0][3], x[0][4], x[0][5]))
+        sorted_array, signature = zip(*zip_sorted)
+        return np.array(sorted_array), signature
+    def unsort_with_signature(self, an_array: np.ndarray, signature: []) -> np.ndarray:
+        """
+        see above
+        :param an_array:
+        :param signature:
+        :return:
+        """
+        zip_list = zip(signature, an_array)
+        zip_unsorted = sorted(zip_list)
+        _, unsorted = zip(*zip_unsorted)
+        return np.array(unsorted)
+    def unsort_matrix_with_signatures(self, matrix: np.ndarray, sign_line: np.ndarray, sign_col: np.ndarray) \
+            -> np.ndarray:
+        matrix = self.unsort_with_signature(matrix, sign_line)
+        matrix = self.unsort_with_signature(matrix.T, sign_col).T
+        return matrix

swarm_policy.py ADDED Viewed

	@@ -0,0 +1,342 @@

+from dataclasses import dataclass
+import numpy as np
+from stable_baselines3 import SAC
+from os import path
+import param_
+from drone import Drone
+@dataclass
+class SwarmPolicy:
+    blues: int
+    reds: int
+    is_blue: bool
+    model: object = None
+    count: int = 0
+    def __post_init__(self):
+        dir_path = "policies/last" + f"/b{self.blues}r{self.reds}/"
+        model_path = dir_path + ("blues_last.zip" if self.is_blue else "reds_last.zip")
+        if path.exists(model_path):
+            print("model loaded:" + model_path)
+            self.model = SAC.load(model_path, verbose=1)
+    # predicts from the model or from a simple centripete model
+    def predict(self, obs):
+        self.count += 1
+        if self.model:
+            action, _ = self.model.predict(obs)
+            # verbose = 'prediction from ' + (' blue model' if self.is_blue else ' red model') + ' at ' + str(self.count)
+            # print(verbose)
+            return action
+        else:
+            if self.is_blue:
+                return self._improved_attack_predict(obs)
+            else:
+                return self._simple_predict(obs)
+    # the default policy
+    def _simple_predict(self, obs):
+        simple_obs = _decentralise(obs[0:self.blues*6] if self.is_blue else obs[self.blues*6:(self.blues+self.reds)*6])
+        drone = Drone(is_blue=self.is_blue)
+        action = np.array([])
+        nb_drones = self.blues if self.is_blue else self.reds
+        for d in range(nb_drones):
+            assign_pos_speed(drone, d, simple_obs)
+            '''
+            pos_n, speed_n = simple_obs[d*6:d*6+3], simple_obs[d*6+3:d*6+6]
+            pos = drone.from_norm(pos_n, drone.max_positions, drone.min_positions)
+            drone.position = pos
+            speed = drone.from_norm(speed_n, drone.max_speeds, drone.min_speeds)
+            drone.speed = speed
+            '''
+            action_d = drone.simple_red()
+            action = np.hstack((action, action_d))
+        action = _centralise(action)
+        return action
+    # the default attack policy
+    def _attack_predict(self, obs):
+        def assign_targets(friends_obs, foes_obs):
+            '''
+            this current version is simplistic: all friends target the first foe :)
+            :param obs:
+            :return:
+            '''
+            friends_nb = len(friends_obs) // 6
+            foes_nb = len(foes_obs) // 6
+            friends_targets = -np.ones(friends_nb, dtype=int)
+            while -1 in friends_targets:
+                for foe in range(foes_nb):
+                    foe_pos = _denorm(foes_obs[foe*6:foe*6+3])
+                    foe_pos_z = foe_pos[0] * np.exp(1j * foe_pos[1])
+                    min_distance = np.inf
+                    closest_friend = -1
+                    for friend in range(friends_nb):
+                        if friends_targets[friend] == -1:
+                            friend_pos = _denorm(friends_obs[friend*6:friend*6+3])
+                            friend_pos_z = friend_pos[0] * np.exp(1j * friend_pos[1])
+                            distance = np.abs(foe_pos_z - friend_pos_z) ** 2 + (friend_pos[2] - foe_pos[2]) ** 2
+                            if distance < min_distance:
+                                min_distance = distance
+                                closest_friend = friend
+                    friends_targets[closest_friend] = foe
+            return friends_targets
+        # gets the friends and foes obs
+        blue_obs = _decentralise(obs[0:self.blues * 6])
+        red_obs = _decentralise(obs[self.blues * 6:(self.blues + self.reds) * 6])
+        friends_obs = blue_obs if self.is_blue else red_obs
+        foes_obs = red_obs if self.is_blue else blue_obs
+        # assign red targets to blues
+        friends_targets = assign_targets(friends_obs, foes_obs)
+        friend_drone = Drone(is_blue=self.is_blue)
+        foe_drone = Drone(is_blue=not self.is_blue)
+        action = np.array([])
+        nb_drones = self.blues if self.is_blue else self.reds
+        for d in range(nb_drones):
+            # assign denormalised position and speed (in m and m/s) to foe drone
+            friend_drone = assign_pos_speed(friend_drone, d, friends_obs)
+            foe_drone_id = friends_targets[d]
+            foe_drone = assign_pos_speed(foe_drone, foe_drone_id, foes_obs)
+            target, time_to_target = calculate_target(friend_drone, foe_drone)
+            action_d = friend_drone.simple_red(target=target, z_margin=0)
+            action = np.hstack((action, action_d))
+        action = _centralise(action)
+        return action
+        # the improved manual attack policy
+    def _improved_attack_predict(self, obs):
+        # TODO: revamp the algo as follows
+        #  start from closest reds, find all blues that are compatible with some margin
+        #  among those blues, choose the blue whose first target is the latest
+        #  until there is no red left
+        #  in case there are blues left overs, restart the process, or converge to zero, or..
+        #  or we decide in advance how many blues we want on the closest and populate several blues againts reds
+        #  at the beginning
+        # TODO: check that reds are correctly ordered
+        # TODO : add margin in the params
+        # TODO : case of the foe is not reachable
+        # gets the friends and foes obs
+        blue_obs = _decentralise(obs[0:self.blues * 6])
+        red_obs = _decentralise(obs[self.blues * 6:(self.blues + self.reds) * 6])
+        friends_obs = blue_obs if self.is_blue else red_obs
+        foes_obs = red_obs if self.is_blue else blue_obs
+        friends_nb = self.blues if self.is_blue else self.reds
+        foes_nb = self.reds if self.is_blue else self.blues
+        friend_drones = []
+        for friend_id in range(friends_nb):
+            # assign denormalised position and speed (in m and m/s) to foe drone
+            friend_drone = Drone(is_blue=self.is_blue)
+            friend_drone = assign_pos_speed(friend_drone, friend_id, friends_obs)
+            friend_drones.append(friend_drone)
+        foe_drones = []
+        for foe_id in range(foes_nb):
+            # assign denormalised position and speed (in m and m/s) to foe drone
+            foe_drone = Drone(is_blue=not self.is_blue)
+            foe_drone = assign_pos_speed(foe_drone, foe_id, foes_obs)
+            foe_drones.append(foe_drone)
+        targets = np.zeros((friends_nb, foes_nb, 3))
+        best_targets = -np.ones((friends_nb, 3))
+        times_to_target = -np.ones((friends_nb, foes_nb))
+        calculation_done = -np.ones(friends_nb)
+        friend_chosen = -np.ones(friends_nb)
+        foe_id = 0
+        friends_chosen = 0
+        while foe_id < foes_nb-1 and friends_chosen < friends_nb:
+            best_friend = -1
+            best_target = np.zeros(3)
+            longest_time = -np.inf
+            foe_drone = foe_drones[foe_id]
+            for friend_id in range(friends_nb):
+                if friend_chosen[friend_id] == -1:  # the friend has no foe target assigned
+                    friend_drone = friend_drones[friend_id]
+                    if calculation_done[friend_id] == -1:  # it has not already been calculated
+                        target_, time_to_target, is_a_catch = calculate_target(friend_drone, foe_drone)
+                        times_to_target[friend_id][foe_id] = time_to_target if is_a_catch else np.inf
+                        targets[friend_id][foe_id] = target_
+                    if times_to_target[friend_id][foe_id] < np.inf:  # it is a catch
+                        if calculation_done[friend_id] == -1:  # calculation of time with other drones has not been done
+                            for foe_idx in range(foe_id + 1, foes_nb):
+                                foex_drone = foe_drones[foe_idx]
+                                target_, time_to_target, is_a_catch = calculate_target(friend_drone, foex_drone)
+                                times_to_target[friend_id][foe_idx] = time_to_target if is_a_catch else np.inf
+                                targets[friend_id][foe_idx] = target_
+                                calculation_done[friend_id] = 1
+                        closest_target = np.min(times_to_target[friend_id, foe_id+1:])
+                        if longest_time < closest_target:
+                            longest_time = closest_target
+                            best_friend = friend_id
+                            best_target = targets[friend_id][foe_id]
+            best_targets[best_friend] = best_target
+            friend_chosen[best_friend] = foe_id
+            friends_chosen += 1
+            foe_id += 1
+        if friends_chosen < friends_nb:
+            last_foe = foes_nb - 1
+            for friend_id in range(friends_nb):
+                if friend_chosen[friend_id] == -1:
+                    if times_to_target[friend_id, last_foe] == -1:
+                        friend_drone, foe_drone = friend_drones[friend_id], foe_drones[last_foe]
+                        target_, time_to_target, is_a_catch = calculate_target(friend_drone, foe_drone)
+                        targets[friend_id][last_foe] = target_
+                    closest_target_id = np.argmin(times_to_target[friend_id, :])
+                    best_targets[friend_id] = targets[friend_id][closest_target_id]
+        action = np.array([])
+        for friend_id in range(friends_nb):
+            action_d = friend_drones[friend_id].simple_red(target=best_targets[friend_id], z_margin=0)
+            action = np.hstack((action, action_d))
+        action = _centralise(action)
+        return action
+def assign_pos_speed(drone: Drone, d: int, obs: np.ndarray) -> Drone:
+    # assign denormalised position and speed (in m and m/s) to friend drone
+    d = int(d)
+    pos_n, speed_n = obs[d*6:d*6+3], obs[d*6+3:d*6+6]
+    pos = drone.from_norm(pos_n, drone.max_positions, drone.min_positions)
+    drone.position = pos
+    speed = drone.from_norm(speed_n, drone.max_speeds, drone.min_speeds)
+    drone.speed = speed
+    return drone
+def _denorm(pos):  # from norm (i.e. already decentralised) to meter
+    drone = Drone()
+    pos_meter = drone.from_norm(pos, drone.max_positions, drone.min_positions)
+    return pos_meter
+def _decentralise(obs):  # [-1,1] to [0,1]
+    obs = (obs+1)/2
+    return obs
+def _centralise(act):  # [0,1] to [-1,1]
+    act = (act - 1/2) * 2
+    return act
+def calculate_target(blue_drone: Drone, red_drone: Drone) -> (np.ndarray(3, ), float, bool):
+    '''
+    :param blue_drone:
+    :param red_drone:
+    :return:
+    '''
+    # TODO : be more precise at the end of the discovery process
+    def transform(pos, delta_, theta_):
+        pos[0] -= delta_
+        pos[1] -= theta_
+        return pos[0] * np.exp(1j * pos[1])
+    def untransform_to_array(pos, delta_, theta_):
+        pos[0] += delta_
+        pos[1] += theta_
+        return pos
+    theta = red_drone.position[1]
+    delta = param_.GROUNDZONE
+    attack_pos = np.copy(blue_drone.position)
+    target_pos = np.copy(red_drone.position)
+    z_blue = transform(attack_pos, delta, theta)
+    z_red = np.real(transform(target_pos, delta, theta))
+    v_blue = blue_drone.drone_model.max_speed
+    v_red = red_drone.drone_model.max_speed
+    blue_shooting_distance = blue_drone.drone_model.distance_to_neutralisation
+    blue_time_to_zero = (np.abs(z_blue) - blue_shooting_distance) / v_blue
+    red_time_to_zero = z_red / v_red
+    if red_time_to_zero <= param_.STEP or red_time_to_zero < blue_time_to_zero + param_.STEP:
+        return np.zeros(3), red_time_to_zero, False
+    else:
+        max_target = z_red
+        min_target = 0
+        while True:
+            target = (max_target + min_target) / 2
+            blue_time_to_target = max(0, (np.abs(z_blue - target) - blue_shooting_distance) / v_blue)
+            red_time_to_target = np.abs(z_red - target) / v_red
+            if red_time_to_target - param_.STEP < blue_time_to_target <= red_time_to_target:
+                target = untransform_to_array((target / z_red) * target_pos, delta, theta)
+                return target, blue_time_to_target, True
+            if red_time_to_target < blue_time_to_target:
+                max_target = target
+                min_target = min_target
+            else:  # blue_  time_to_target  <= red_time_to_target -1:
+                max_target = max_target
+                min_target = target
+def unitary_test(rho_blue: float, theta_blue: float, rho_red: float, theta_red: float):
+    '''
+    tests for the calculate target function
+    :param rho_blue:
+    :param theta_blue:
+    :param rho_red:
+    :param theta_red:
+    :return:
+    '''
+    blue_drone = Drone()
+    blue_drone.position = np.array([rho_blue, theta_blue, 100])
+    red_drone = Drone(is_blue=False)
+    red_drone.position = np.array([rho_red, theta_red, 100])
+    tg, time = calculate_target(blue_drone, red_drone)
+    print('rho_blue : ', rho_blue, ' theta_blue : ', theta_blue, ' rho_red : ', rho_red, ' theta_red : ', theta_red,
+          ' tg : ', tg, ' time : ', time)
+    return tg, time
+def test():
+    '''
+    test for the calculate trajectory function
+    :return:
+    '''
+    for rho_blue in [1000]:
+        for theta_blue in np.pi * np.array([-1, 0.75, 0.5, 0.25, 0]):
+            for rho_red in [1000]:
+                for theta_red in np.pi * np.array([0, 1/4]):
+                    unitary_test(rho_blue=rho_blue, theta_blue=theta_blue, rho_red=rho_red, theta_red=theta_red)
+    print('done')

swarmenv.py ADDED Viewed

	@@ -0,0 +1,100 @@

+import gym
+from gym import spaces
+import numpy as np
+import param_
+from settings import Settings
+from playground import Playground
+from team import BlueTeam, RedTeam
+class SwarmEnv(gym.Env):
+    """
+    Custom 3D-Environment that follows gym interface.
+    This is a 3D-env where the blue drones defend a circular GROUNDZONE from a red drones attack
+    """
+    def __init__(self, blues=Settings.blues, reds=Settings.reds):
+        """
+        :param distance: the distance to the other rim
+        """
+        super(SwarmEnv, self).__init__()
+        self.nb_blues = blues
+        self.nb_reds = reds
+        self.blue_team = BlueTeam(number_of_drones=self.nb_blues)
+        self.red_team = RedTeam(number_of_drones=self.nb_reds)
+        self.playground = Playground(env=self, blue_drones=self.blue_team.drones, red_drones=self.red_team.drones)
+        self.steps = 0
+        self.observation_space = spaces.Tuple((
+            spaces.Box(low=0, high=1, shape=(self.nb_blues, 6), dtype=np.float32),
+            spaces.Box(low=0, high=1, shape=(self.nb_reds, 6), dtype=np.float32),
+            spaces.Box(low=0, high=1, shape=(self.nb_blues, self.nb_reds), dtype=np.float32),
+            spaces.Box(low=0, high=1, shape=(self.nb_reds, self.nb_blues), dtype=np.float32),
+            spaces.MultiBinary(self.nb_blues),
+            spaces.MultiBinary(self.nb_reds),
+        ))
+        self.action_space = spaces.Tuple((
+            spaces.Box(low=0, high=1, shape=(self.nb_blues, 3), dtype=np.float32),
+            spaces.Box(low=0, high=1, shape=(self.nb_reds, 3), dtype=np.float32)))
+    def reset(self, obs=None):
+        """
+        resets the environment as part of Gym interface
+        """
+        if obs:
+            blue_obs, red_obs, blues_fire, reds_fire, blue_deads, red_deads = obs
+        else:
+            blue_obs, red_obs, blues_fire, reds_fire, blue_deads, red_deads = None, None, None, None, None, None
+        self.blue_team.reset(obs=blue_obs)
+        self.red_team.reset(obs=red_obs)
+        self.playground.reset()
+        self.steps = 0
+        # get observations from blue and red teams
+        blue_obs, blue_deads = self.blue_team.get_observation()
+        red_obs, red_deads = self.red_team.get_observation()
+        blues_fire, reds_fire = self.playground.get_observation()
+        return blue_obs, red_obs, blues_fire, reds_fire, blue_deads, red_deads
+    def render(self, mode='human'):
+        pass
+    def step(self, action):
+        self.steps += 1
+        blue_action, red_action = action
+        blue_obs, blue_reward, blue_done, blue_info = self.blue_team.step(blue_action)
+        red_obs, red_reward, red_done, red_info = self.red_team.step(red_action)
+        bf_obs, bf_reward, remaining_blues, blue_shots, rf_obs, rf_reward, remaining_reds, red_shots = \
+            self.playground.step()
+        _, blue_deads = self.blue_team.get_observation()
+        _, red_deads = self.red_team.get_observation()
+        obs = blue_obs, red_obs, bf_obs, rf_obs, blue_deads, red_deads
+        reward = blue_reward + red_reward + bf_reward + rf_reward
+        done = False
+        info = {}
+        info['red_oob'] = red_info['oob']
+        info['blue_oob'] = blue_info['oob']
+        info['hits_target'] = red_info['hits_target']
+        info['blue_shots'] = blue_shots
+        info['red_shots'] = red_shots
+        info['weighted_red_distance'] = red_info['delta_distance']
+        info['remaining blues'] = len(blue_deads)-sum(blue_deads)
+        info['remaining reds'] = len(red_deads)-sum(red_deads)
+        info['ttl'] = red_info['ttl']
+        info['distance_to_straight_action'] = red_info['distance_to_straight_action']
+        if red_info['oob'] + blue_info['oob'] + red_info['hits_target'] + blue_shots + red_shots > 0:
+            print('something happened')
+        return obs, reward, done, info

symetry_wrap.py ADDED Viewed

	@@ -0,0 +1,95 @@

+import numpy as np
+import gym
+class SymetryWrapper(gym.Wrapper):
+    """
+    :param env: (gym.Env) Gym environment that will be wrapped
+    """
+    def __init__(self, env):
+        # Call the parent constructor, so we can access self.env later
+        self.symetry = False  # no need to perform a symetry
+        super(SymetryWrapper, self).__init__(env)
+    def reset(self):
+        """
+        Reset the environment
+        """
+        obs = self.env.reset()
+        obs = self.post_obs(obs)
+        return obs
+    def step(self, action):
+        """
+        :param action: ([float] or int) Action taken by the agent
+        :return: (np.ndarray, float, bool, dict) observation, reward, is the episode over?, additional informations
+        """
+        if self.symetry:
+            action = symetrise_action(action)
+        obs, reward, done, info = self.env.step(action)
+        obs = self.post_obs(obs)
+        return obs, reward, done, info
+    def post_obs(self, obs):
+        self.symetry = get_symetry(obs)
+        if self.symetry:
+            obs = symetrise_obs(obs)
+        return obs
+def get_symetry(obs):
+    blue_obs, red_obs, blue_fire, red_fire = obs
+    # count the drones who are positioned above the 0 x-axis
+    count = 0
+    for this_obs in (blue_obs, red_obs):
+        for d in this_obs:
+            add = 1 if (d[1] < 0.5) else 0
+            count += add
+    # compare with the total
+    symetry = bool(2*count < (len(blue_obs) + len(red_obs)))
+    return symetry
+def symetrise_obs(obs):
+    blue_obs, red_obs, blue_fire, red_fire = obs
+    for this_obs in (blue_obs, red_obs):
+        # symetrise positions and speeds
+        this_obs[:, 1] = 1 - this_obs[:, 1]
+        this_obs[:, 4] = 1 - this_obs[:, 4]
+    return blue_obs, red_obs, blue_fire, red_fire
+def symetrise_action(action):
+    blue_action, red_action = action
+    for this_action in (blue_action, red_action):
+        for act in this_action:
+            # symetrise action
+            act[1] = - act[1]
+    action = blue_action, red_action
+    return action
+def test_symetrise_obs():
+    obs = np.arange(12).reshape(2, 6), np.arange(12).reshape(2, 6), np.random.random((1, 1)), np.random.random((1, 1))
+    print(obs)
+    symetrise_obs(obs)
+    print(obs)

team.py ADDED Viewed

	@@ -0,0 +1,131 @@

+import numpy as np
+from dataclasses import dataclass
+import param_
+from drone import Drone
+from dronemodel import DroneModel
+from settings import Settings
+@dataclass
+class Team:
+    """
+    Creates a team (it is either red or blue / foe or friend
+    """
+    is_blue: bool
+    drones: [Drone]
+    drone_model: DroneModel
+    weighted_distance: float = 0
+    def reset(self, obs=None):
+        self.delta_weighted_distance()
+        if obs:
+            for drone, obs in zip(self.drones, obs):
+                drone.reset(obs=obs)
+        else:
+            for drone in self.drones:
+                drone.reset()
+    def get_observation(self) -> np.ndarray:
+        """
+        get the observation for the RL agent
+        :return: observation in the form of flatten np.arrays of shape(squad_number, 6*squad_size)
+        """
+        obs = np.array([drone.get_observation() for drone in self.drones])
+        deads = ~np.array([drone.is_alive for drone in self.drones])
+        return obs, deads
+    def step(self, action: np.ndarray):
+        obs = np.zeros((len(self.drones), 6))
+        done = np.zeros((len(self.drones),))
+        reward = np.zeros((len(self.drones),))
+        infos = [{} for d in range(len(self.drones))]
+        for index, drone in enumerate(self.drones):
+            obs[index], reward[index], done[index], infos[index] = drone.step(action[index])
+        done = (sum(done) == len(self.drones))
+        info = {'oob': 0, 'hits_target': 0, 'ttl': param_.DURATION, 'distance_to_straight_action': 0}
+        for i in infos:
+            info['ttl'] = min(info['ttl'], i['ttl'])
+            info['oob'] += i['oob'] if 'oob' in i else 0
+            info['hits_target'] += i['hits_target'] if 'hits_target' in i else 0
+            info['delta_distance'] = 0 if self.is_blue else self.delta_weighted_distance()
+            info['distance_to_straight_action'] += i['distance_to_straight_action'] \
+                if 'distance_to_straight_action' in i else 0
+        return obs, sum(reward), done, info
+    def delta_weighted_distance(self):
+        # distance of drones to 0
+        team_distance = np.array([d.distance() for d in self.drones if d.is_alive])
+        weighted_distance = np.sum(np.exp(-0.5 * (team_distance / (Settings.perimeter/2)) ** 2))
+        delta = weighted_distance - self.weighted_distance if 0 < self.weighted_distance else 0
+        self.weighted_distance = weighted_distance
+        return delta
+class BlueTeam(Team):
+    """
+    Creates the blue team
+    """
+    def __init__(self, number_of_drones: int = Settings.blues):
+        self.is_blue = True
+        self.drone_model = DroneModel(self.is_blue)
+        # initialise blue positions and speeds
+        positions = np.zeros((number_of_drones, 3))
+        speeds = np.zeros((number_of_drones, 3))
+        blue_speed = Settings.blue_speed_init * self.drone_model.max_speed
+        circle = index = 0
+        for d in range(number_of_drones):
+            positions[d] = np.array([Settings.blue_circles_rho[circle],
+                                     Settings.blue_circles_theta[circle] + index * 2 * np.pi / 3,
+                                     Settings.blue_circles_zed[circle]])
+            clockwise = 1 - 2 * (circle % 2)
+            speeds[d] = np.array([blue_speed, np.pi / 6 * clockwise, 0])
+            index += 1
+            if index == Settings.blues_per_circle[circle]:
+                index = 0
+                circle += 1
+        self.drones = [Drone(is_blue=True, position=position, speed=speed, id_=id_)
+                       for (id_, position, speed) in zip(range(number_of_drones), positions, speeds)]
+class RedTeam(Team):
+    """
+    Creates the red team
+    """
+    def __init__(self, number_of_drones: int = Settings.reds):
+        self.is_blue = False
+        self.drone_model = DroneModel(self.is_blue)
+        positions = np.zeros((number_of_drones, 3))
+        positions_noise = np.zeros((number_of_drones, 3))
+        speeds = np.zeros((number_of_drones, 3))
+        speed_rho = Settings.red_speed_init * self.drone_model.max_speed
+        squad = index = 0
+        for d in range(number_of_drones):
+            positions[d] = [Settings.red_squads_rho[squad],
+                            Settings.red_squads_theta[squad],
+                            Settings.red_squads_zed[squad]]
+            positions_noise[d] = [Settings.red_rho_noise[squad],
+                                  Settings.red_theta_noise[squad],
+                                  Settings.red_zed_noise[squad]]
+            speeds[d] = [speed_rho, np.pi + positions[d][1], 0]
+            speeds[d] = [speed_rho, np.pi + positions[d][1], 0]
+            index += 1
+            if index == Settings.red_squads[squad]:
+                index = 0
+                squad += 1
+        self.drones = [Drone(is_blue=False, position=position, position_noise=position_noise, speed=speed, id_=id_)
+                       for (id_, position, position_noise, speed) in
+                       zip(range(len(positions)), positions, positions_noise, speeds)]

team_wrap.py ADDED Viewed

	@@ -0,0 +1,107 @@

+import numpy as np
+import gym
+from gym import spaces
+from swarm_policy import SwarmPolicy
+from settings import Settings
+class TeamWrapper(gym.Wrapper):
+    """
+    :param env: (gym.Env) Gym environment that will be wrapped
+    """
+    def __init__(self, env, is_blue: bool = True, is_double: bool = False, is_unkillable: bool = Settings.is_unkillable):
+        self.is_blue = is_blue
+        self.is_double = is_double
+        self.is_unkillabe = is_unkillable
+        nb_blues, nb_reds = env.nb_blues, env.nb_reds
+        self.foe_action = None
+        self.foe_policy = SwarmPolicy(is_blue=not is_blue, blues=nb_blues, reds=nb_reds)
+        if is_double:
+            env.action_space = spaces.Tuple((
+                spaces.Box(low=-1, high=1, shape=(nb_blues*3,), dtype=np.float32),
+                spaces.Box(low=-1, high=1, shape=(nb_reds*3,), dtype=np.float32)
+            ))
+        else:
+            nb_friends = nb_blues if is_blue else nb_reds
+            env.action_space = spaces.Box(low=-1, high=1, shape=(nb_friends*3,), dtype=np.float32)
+        flatten_dimension = 6 * nb_blues + 6 * nb_reds  # the position and speeds for blue and red drones
+        flatten_dimension += (nb_blues * nb_reds) * (1 if is_unkillable else 2)  # the fire matrices
+        env.observation_space = spaces.Box(low=-1, high=1, shape=(flatten_dimension,), dtype=np.float32)
+        super(TeamWrapper, self).__init__(env)
+    def reset(self):
+        """
+        Reset the environment
+        """
+        obs = self.env.reset()
+        obs = self.post_obs(obs)
+        return obs
+    def step(self, action):
+        """
+        :param action: ([float] or int) Action taken by the agent
+        :return: (np.ndarray, float, bool, dict) observation, reward, is the episode over?, additional informations
+        """
+        if self.is_double:
+            blue_action, red_action = action
+            blue_action = _decentralise(blue_action)
+            red_action = _decentralise(red_action)
+            action = _unflatten(blue_action), _unflatten(red_action)
+        else:
+            friend_action = _decentralise(action)
+            foe_action = _decentralise(self.foe_action)
+            if self.is_blue:
+                action = _unflatten(friend_action), _unflatten(foe_action)
+            else:
+                action = _unflatten(foe_action), _unflatten(friend_action)
+        obs, reward, done, info = self.env.step(action)
+        obs = self.post_obs(obs)
+        return obs, reward, done, info
+    def post_obs(self, obs):
+        if self.is_unkillabe:
+            o1, o2, o3, _ = obs
+            obs = o1, o2, o3
+        flatten_obs = _flatten(obs)
+        centralised_obs = _centralise(flatten_obs)
+        if not self.is_double:
+            self.foe_action = self.foe_policy.predict(centralised_obs)
+        return centralised_obs
+def _unflatten(action):
+    return np.split(action, len(action)/3)
+def _flatten(obs):  # need normalisation too
+    fl_obs = [this_obs.flatten().astype('float32') for this_obs in obs]
+    fl_obs = np.hstack(fl_obs)
+    return fl_obs
+def _centralise(obs):  # [0,1] to [-1,1]
+    obs = 2 * obs - 1
+    return obs
+def _decentralise(act):  # [-1,1] to [0,1]
+    act = 0.5 * (act + 1)
+    return act

train.py ADDED Viewed

	@@ -0,0 +1,174 @@

+import numpy as np
+from stable_baselines3 import SAC
+from stable_baselines3.sac.policies import MlpPolicy
+from stable_baselines3.common.evaluation import evaluate_policy
+from stable_baselines3.common.env_checker import check_env
+import os
+from monitor_wrap import MonitorWrapper
+from filter_wrap import FilterWrapper
+from distribution_wrap import DistriWrapper
+from redux_wrap import ReduxWrapper
+from symetry_wrap import SymetryWrapper
+from rotate_wrap import RotateWrapper
+from sort_wrap import SortWrapper
+from team_wrap import TeamWrapper
+from reward_wrap import RewardWrapper
+from settings import Settings
+from swarmenv import SwarmEnv
+import param_
+def bi_train(blue_model, red_model, blues: int = 1, reds: int = 1,
+             blue_dispersion: np.float32 = 1, red_dispersion: np.float32 = 1, total_timesteps: int = 1000):
+    # If needed create save dir
+    save_dir = "policies/" + Settings.policy_folder + f"/b{blues}r{reds}/"
+    save_last_dir = "policies/last" + f"/b{blues}r{reds}/"
+    os.makedirs(save_dir, exist_ok=True)
+    os.makedirs(save_last_dir, exist_ok=True)
+    # set the dispersion to initial drone positions
+    Settings.blue_distance_factor = blue_dispersion * Settings.blue_distance_factor
+    Settings.red_distance_factor = red_dispersion * Settings.red_distance_factor
+    Settings.red_theta_noise = red_dispersion * Settings.red_theta_noise
+    Settings.red_rho_noise = red_dispersion * Settings.red_rho_noise
+    # launch learning for red drones and then blue drones
+    red_model.learn(total_timesteps=total_timesteps)
+    mean_reward, std_reward = evaluate_policy(red_model, red_model.env, n_eval_episodes=10)
+    print(f"REDS b{blues}r{reds} disp_b:{10*blue_dispersion:2.0f} disp_r{10*red_dispersion:2.0f}: "
+          f"mean_reward:{mean_reward:.2f} +/- {std_reward:.2f}")
+    red_model.save(save_dir + f"reds_b{10 * blue_dispersion:2.0f}r{10 * red_dispersion:2.0f}")
+    red_model.save(save_last_dir + "reds_last")
+    blue_model.learn(total_timesteps=total_timesteps)
+    mean_reward, std_reward = evaluate_policy(blue_model, blue_model.env, n_eval_episodes=10)
+    print(f"BLUES b{blues}r{reds} disp_b:{10*blue_dispersion:2.0f} disp_r{10*red_dispersion:2.0f}: "
+          f"mean_reward:{mean_reward:.2f} +/- {std_reward:.2f}")
+    blue_model.save(save_dir + f"blues_{10 * blue_dispersion:2.0f}r{10 * red_dispersion:2.0f}")
+    blue_model.save(save_last_dir + "blues_last")
+    return blue_model, red_model
+def meta_train(blues: int = 1, reds: int = 1,
+               max_dispersion: np.float32 = 3, iteration: int = 10,
+               total_timesteps: int = 100):
+    Settings.blues, Settings.reds = blues, reds
+    # launch the episode to get the data
+    steps = int(param_.DURATION / param_.STEP)
+    env = SortWrapper(
+        SymetryWrapper(
+            RotateWrapper(
+                ReduxWrapper(
+                    DistriWrapper(
+                        FilterWrapper(
+                            MonitorWrapper(
+                                SwarmEnv(blues=blues, reds=reds), steps, verbose=False)))))))
+    blue_env = RewardWrapper(TeamWrapper(env, is_blue=True), is_blue=True)
+    red_env = RewardWrapper(TeamWrapper(env, is_blue=False), is_blue=False)
+    blue_model = SAC(MlpPolicy, blue_env, verbose=0)
+    red_model = SAC(MlpPolicy, red_env, verbose=0)
+    for red_dispersion in np.linspace(0.1, max_dispersion, num=iteration):
+        for blue_dispersion in np.linspace(max_dispersion, 0.3, num=iteration):
+            blue_model, red_model = bi_train(
+                blue_model, red_model, blues=blues, reds=reds,
+                blue_dispersion=blue_dispersion, red_dispersion=red_dispersion,
+                total_timesteps=total_timesteps)
+def super_meta_train(max_blues: int = 3, max_reds: int = 3, max_dispersion: np.float32 = 3,
+                     iteration: int = 10, total_timesteps: int = 100, policy_folder: str = "default"):
+    Settings.policy_folder = policy_folder
+    for drones_nb in range(2, max_blues + max_reds + 1):
+        for blues in range(1, max_blues + 1):
+            reds = drones_nb - blues
+            if 1 <= reds <= max_reds:
+                print(f"reds :{reds}, blues: {blues}")
+                meta_train(blues=blues, reds=reds,
+                           max_dispersion=max_dispersion, iteration=iteration, total_timesteps=total_timesteps)
+def print_spaces(env, name: str):
+    print("++++++++++++")
+    print(name)
+    print(env.action_space)
+    print(env.observation_space)
+    print("============")
+    check_env(env, warn=True)
+# super_meta_train(max_blues=1, max_reds=1, iteration=5, max_dispersion=1, total_timesteps=50000, policy_folder="0528_14")
+# super_meta_train(max_blues=2, max_reds=2, iteration=4, max_dispersion=3, total_timesteps=10, policy_folder="0528_test")
+def simple_red_train(max_dispersion: np.float32 = 3,
+                     blues: int = 1, reds: int = 1,
+                     iteration: int = 25, total_timesteps: int = 100,
+                     policy_folder: str = "simple_red"):
+    Settings.policy_folder = policy_folder
+    print(f"Simple_red: reds :{reds}, blues: {blues}")
+    # If needed create save dir
+    save_dir = "policies/" + Settings.policy_folder + f"/b{blues}r{reds}/"
+    save_last_dir = "policies/last" + f"/b{blues}r{reds}/"
+    os.makedirs(save_dir, exist_ok=True)
+    os.makedirs(save_last_dir, exist_ok=True)
+        # launch the episode to get the data
+    steps = int(param_.DURATION / param_.STEP)
+    Settings.blues, Settings.reds = blues, reds
+    env = SortWrapper(
+        SymetryWrapper(
+            RotateWrapper(
+                ReduxWrapper(
+                    DistriWrapper(
+                        FilterWrapper(
+                            MonitorWrapper(
+                                SwarmEnv(blues=blues, reds=reds), steps, verbose=False)))))))
+    red_env = RewardWrapper(TeamWrapper(env, is_blue=False), is_blue=False)
+    red_model = SAC(MlpPolicy, red_env, verbose=1)
+    # set the dispersion to initial drone positions
+    Settings.blue_distance_factor = 10 * Settings.blue_distance_factor
+    this_iteration = 0
+    for red_dispersion in np.linspace(0.33, max_dispersion, num=iteration):
+        Settings.red_distance_factor = red_dispersion
+        # launch learning for red drones and then blue drones
+        this_iteration += 1
+        batch = 1
+        mean_reward = 0
+        delta_reward = 0
+        stability = 0
+        count = 0
+        while mean_reward < 9 or stability < 3 or count < 30:
+            count += 1
+            red_model.learn(total_timesteps=total_timesteps//10)
+            last_reward = mean_reward
+            mean_reward, std_reward = evaluate_policy(red_model, red_model.env, n_eval_episodes=100)
+            delta_reward = mean_reward - last_reward
+            if -0.1 <= delta_reward <= 0.1:
+                stability += 1
+            else:
+                stability = 0
+            print(f"REDS b{blues}r{reds} iteration{this_iteration} batch{batch}: "
+                  f"mean_reward:{mean_reward:.2f} +/- {std_reward:.2f}")
+            red_model.save(save_dir + f"{this_iteration} batch{batch+1}")
+            red_model.save(save_last_dir + "reds_last")
+            batch += 1
+simple_red_train(total_timesteps = 50000, policy_folder="simply_red")

utils.py ADDED Viewed

	@@ -0,0 +1,65 @@

+"""
+works with all the transformations and calculation associated to position, speed, acceleration
+"""
+import numpy as np
+def position_to_xyz(position: [float]) -> [float]:
+    """
+    allows to get the 3D xyz coordinates from a polar representation
+    :param position: array (3,) with rho in meter, theta in rad, zed in meter
+    :return: float array (3,) with x, y, z in meter
+    """
+    pos = position[0] * np.exp(1j * position[1])
+    return [np.real(pos), np.imag(pos), position[2]]
+"""
+def _test_position_to_norm():
+    assert position_to_norm([param.PERIMETER, 0, 100]) == [1, 0, 1]
+    assert position_to_norm([0, -np.pi / 2, 0]) == [0, 0.75, 0]
+    assert position_to_norm([0, np.pi / 2, 0]) == [0, 0.25, 0]
+"""
+def is_in_the_cone(position1: [float], position2: [float], vector2: [float], angle: float) -> bool:
+    """
+    checks if the point @ position 2 is in the cone from position 1 with an angle of angle
+    :param position1: in x, y, z
+    :param position2: in x, y, z
+    :param vector2: in x, y, z
+    :param angle: in rad
+    :return:
+    """
+    vector1 = np.array(position2, dtype=float) - np.array(position1)
+    vector1 /= np.linalg.norm(vector1)
+    vector2 = np.array(vector2, dtype=float)
+    vector2 /= np.linalg.norm(vector2)
+    cos_theta = np.dot(vector1, vector2)
+    if 0 < cos_theta:
+        theta = np.arcsin(np.sqrt(1 - cos_theta ** 2))
+        return theta < angle
+    return False
+def _test_is_in_the_cone():
+    assert is_in_the_cone([0, 0, 0], [1, 0.1, 0], [1, 0, 0], np.pi / 5)
+    assert is_in_the_cone([0, 0, 0], [1, 0.1, 0], [0, 1, 0], np.pi / 5)
+    pass
+def rhotheta_to_latlon(rho: float, theta: float, lat_tg: float, lon_tg: float) -> [float, float]:
+    """
+    transforms polar coordinates into lat, lon
+    :param rho:
+    :param theta:
+    :param lat_tg: latitude de la target (0,0)
+    :param lon_tg: longitude de la target (0,0)
+    :return:
+    """
+    z = rho * np.exp(1j * theta)
+    lat = np.imag(z) * 360 / (40075 * 1000) + lat_tg
+    lon = np.real(z) * 360 / (40075 * 1000 * np.cos(np.pi / 180 * lat)) + lon_tg
+    return lat, lon