iPADS / TwoDimEnv.py
YvesP's picture
Add application file
3c74b8c
import gymnasium as gym
from params import *
class TwoDimEnv(gym.Env):
"""
Custom 2D-Environment that follows gym interface.
This is a simple 2D-env where the agent must go to the 0,0 point,
it can go right or straight in a continuous action space, but cannot change speed
"""
def __init__(self):
super(TwoDimEnv, self).__init__()
# agent cannot be further away from target than space_limits
self.space_limits = SPACE_LIMITS
# Initialize the agent position
rho_init = np.random.random() * SPACE_LIMITS
theta_init = np.random.random() * 2 * PI - PI
self.rho_init, self.theta_init, self.z_init = rho_init, theta_init, Z_INIT
self.agent_pos = rho_init * np.exp(1j * theta_init)
self.agent_z = Z_INIT
# agent abs speed is constant, may change of direction on the plane, but doesnt change in the z-axis
self.agent_speed = SPEED_RHO * np.exp(1j*SPEED_ANGLE)
self.agent_speed_z = SPEED_Z
self.agent_max_angle = MAX_ANGLE
#
self.agent_previous_pos = self.agent_pos
def reset(self, training=True, rho_init=RHO_INIT, theta_init=THETA_INIT, z_init=Z_INIT):
"""
:input: TwoDimEnv
:return: TwoDimEnv
"""
# Initialize the agent position and speed
self.agent_pos = rho_init * np.exp(1j * theta_init)
self.agent_speed = SPEED_RHO * np.exp(1j * SPEED_ANGLE)
self.agent_z = z_init
# init step index
self.step_index = 0
return self.get_obs()
def step(self, action):
'''
the Gym step
:param action:
:return:
'''
self.step_index += 1
# Agent changes of direction according to its action: action[0] in [-1; 1]
self.agent_speed *= np.exp(1j * self.agent_max_angle * action[0], dtype=complex)
self.agent_previous_pos = self.agent_pos
self.agent_pos += self.agent_speed
self.agent_z -= self.agent_speed_z
# Are we done?
done = bool(self.agent_z <= 0)
# get normalized obs
obs = self.get_obs()
return obs, 0., done, {}
def render(self, **kwargs):
pass
def close(self):
pass
# calculates normalized obs
def get_obs(self):
'''
normalises the observation
:return: normalised observation
'''
agent_dist = np.abs(self.agent_pos) / self.space_limits
agent_angle = np.angle(self.agent_pos) / (2 * PI)
if agent_angle < 0:
agent_angle += 1
agent_speed = np.angle(self.agent_speed) / (2 * PI)
if agent_speed < 0:
agent_speed += 1
agent_z = (Z_INIT-self.agent_z) / Z_INIT
obs = np.array([agent_dist, agent_angle, agent_speed, agent_z]).astype(np.float32)
return obs