|
import numpy as np |
|
import gym |
|
|
|
from drone import Drone |
|
|
|
|
|
class RotateWrapper(gym.Wrapper): |
|
""" |
|
:param env: (gym.Env) Gym environment that will be wrapped |
|
""" |
|
|
|
def __init__(self, env): |
|
|
|
super(RotateWrapper, self).__init__(env) |
|
self.angle = 0 |
|
|
|
def reset(self): |
|
""" |
|
Reset the environment |
|
""" |
|
obs = self.env.reset() |
|
|
|
obs = self.post_obs(obs) |
|
|
|
return obs |
|
|
|
def step(self, action): |
|
""" |
|
:param action: ([float] or int) Action taken by the agent |
|
:return: (np.ndarray, float, bool, dict) observation, reward, is the episode over?, additional informations |
|
""" |
|
|
|
action = self.rotate_action(action) |
|
|
|
obs, reward, done, info = self.env.step(action) |
|
|
|
obs = self.post_obs(obs) |
|
|
|
return obs, reward, done, info |
|
|
|
def post_obs(self, obs): |
|
self.angle = self.get_angle(obs) |
|
return self.rotate_obs(obs) |
|
|
|
def get_angle(self, obs: np.ndarray) -> float: |
|
blue_obs, red_obs, blue_fire, red_fire = obs |
|
sigma = 0 |
|
|
|
for this_obs in (blue_obs, red_obs): |
|
for d in this_obs: |
|
sigma += d[0] * np.exp(1j * d[1]) |
|
angle = np.angle(sigma) |
|
return angle |
|
|
|
def rotate_obs(self, obs): |
|
blue_obs, red_obs, blue_fire, red_fire = obs |
|
|
|
rotated_blue_obs = [] |
|
rotated_red_obs = [] |
|
|
|
for this_obs, is_blue, rotated_obs in zip((blue_obs, red_obs), |
|
(True, False), |
|
(rotated_blue_obs, rotated_red_obs)): |
|
drone = Drone(is_blue=is_blue) |
|
for d in this_obs: |
|
|
|
d_meter = np.zeros(6,) |
|
|
|
d_meter[:3] = drone.from_norm(d[:3], drone.max_positions, drone.min_positions) |
|
d_meter[3:6] = drone.from_norm(d[3:6], drone.max_speeds, drone.min_speeds) |
|
|
|
|
|
d_meter[1] -= self.angle |
|
d_meter[4] -= self.angle |
|
|
|
|
|
d[:3] = drone.to_norm(d_meter[:3], drone.max_positions, drone.min_positions) |
|
d[3:6] = drone.to_norm(d_meter[3:6], drone.max_speeds, drone.min_speeds) |
|
|
|
rotated_obs.append(d) |
|
|
|
del drone |
|
|
|
return np.array(rotated_blue_obs), np.array(rotated_red_obs), blue_fire, red_fire |
|
|
|
def rotate_action(self, action): |
|
|
|
blue_action, red_action = action |
|
blue_action = np.array(list(map(lambda x: [x[0], (x[1]+self.angle/2/np.pi) % 1, x[2]], blue_action))) |
|
red_action = np.array(list(map(lambda x: [x[0], (x[1]+self.angle/2/np.pi) % 1, x[2]], red_action))) |
|
action = blue_action, red_action |
|
|
|
return action |
|
|