Spaces:

iteratehack
/

neuro-flyt-training

Configuration error

Antigravity Agent

Deploy Neuro-Flyt 3D Training

ae22fc1 13 days ago

5.3 kB

	import numpy as np
	import gymnasium as gym
	from opensimplex import OpenSimplex
	import time

	# Try to import PyFlyt, else fallback
	try:
	from PyFlyt.gym_envs.quadx_envs.quadx_waypoints_env import QuadXWaypointsEnv
	PYFLYT_AVAILABLE = True
	except ImportError:
	PYFLYT_AVAILABLE = False
	# Create a dummy base class if PyFlyt is missing
	class QuadXWaypointsEnv(gym.Env):
	metadata = {"render_modes": ["human", "rgb_array"]}
	def __init__(self, render_mode=None):
	self.render_mode = render_mode

	class WindField:
	def __init__(self, seed=42, scale=0.1, speed=1.0):
	self.noise = OpenSimplex(seed=seed)
	self.scale = scale
	self.speed = speed
	self.time_offset = 0.0

	def get_wind(self, x, y, z, dt):
	self.time_offset += dt * self.speed
	u = self.noise.noise4(x * self.scale, y * self.scale, z * self.scale, self.time_offset)
	v = self.noise.noise4(x * self.scale + 100, y * self.scale + 100, z * self.scale, self.time_offset)
	w = self.noise.noise4(x * self.scale + 200, y * self.scale + 200, z * self.scale, self.time_offset)
	return np.array([u, v, w])

	class Drone3DEnv(gym.Env):
	def __init__(self, render_mode=None, wind_scale=10.0, wind_speed=1.0):
	super().__init__()
	self.render_mode = render_mode
	self.wind_field = WindField(scale=0.05, speed=wind_speed)
	self.wind_strength = wind_scale

	# Define spaces
	# Obs: [x, y, z, roll, pitch, yaw, u, v, w, p, q, r]
	self.observation_space = gym.spaces.Box(low=-np.inf, high=np.inf, shape=(12,), dtype=np.float32)
	# Action: [motor1, motor2, motor3, motor4] or [thrust, roll, pitch, yaw]
	# We'll assume simple [thrust_x, thrust_y, thrust_z, yaw] for the mock
	self.action_space = gym.spaces.Box(low=-1, high=1, shape=(4,), dtype=np.float32)

	self.state = np.zeros(12)
	self.dt = 0.05
	self.step_count = 0
	self.max_steps = 1000

	def reset(self, seed=None, options=None):
	super().reset(seed=seed)
	self.state = np.zeros(12)
	self.state[2] = 10.0 # Start at 10m height
	self.step_count = 0

	# Randomize Target
	# x, y in [-5, 5], z in [5, 15]
	self.target = np.random.uniform(low=[-5, -5, 5], high=[5, 5, 15])

	# Randomize Wind
	# We can just re-initialize the noise with a random seed
	new_seed = np.random.randint(0, 10000)
	self.wind_field = WindField(seed=new_seed, scale=0.05, speed=self.wind_field.speed)

	return self.state.astype(np.float32), {}

	def step(self, action):
	self.step_count += 1

	# Unpack state
	pos = self.state[0:3]
	vel = self.state[6:9]

	# Get Wind
	raw_wind = self.wind_field.get_wind(pos[0], pos[1], pos[2], self.dt)
	wind_force = raw_wind * self.wind_strength

	# Simple Kinematics (Double Integrator)
	# Action is roughly acceleration command
	# We need enough authority to fight gravity (9.81) + wind
	# Let's say max thrust is 20 m/s^2 (~2G)
	# Action [-1, 1] -> [-20, 20] ?
	# No, usually thrust is positive 0..Max.
	# But for simplified "QuadX" control often inputs are roll/pitch/yaw/thrust.
	# Here we are abstracting to "Force/Accel command in 3D".
	# Let's map action [-1, 1] to [-15, 15] acceleration.
	accel = action[:3] * 15.0

	# Gravity
	gravity = np.array([0, 0, -9.81])

	# Total Force = Control + Wind + Gravity
	# Note: We REMOVED the "anti-gravity" offset.
	# The agent MUST output positive Z acceleration to hover.
	# If action[2] is 0, accel[2] is 0, and it falls due to gravity.
	total_accel = accel + wind_force + gravity

	# Update State
	vel += total_accel * self.dt
	pos += vel * self.dt

	# Floor collision
	if pos[2] < 0:
	pos[2] = 0
	vel[2] = 0 # Crash stop

	# Drag (Damping)
	vel *= 0.95

	self.state[0:3] = pos
	self.state[6:9] = vel

	# Reward: Stay close to Target
	dist = np.linalg.norm(pos - self.target)

	# Smoothness: Penalty for high velocity (instability)
	vel_mag = np.linalg.norm(vel)

	# Components:
	# 1. Distance Reward: Higher is better (closer to 0)
	r_dist = -dist

	# 2. Stability Penalty: Penalize erratic high-speed movements if far from target
	# But we need speed to get there. Let's just penalize extreme speed.
	r_vel = -0.01 * vel_mag

	# 3. Survival Reward: Bonus for not crashing
	r_survive = 0.1

	reward = r_dist + r_vel + r_survive

	# Terminate if crashed or too far
	term = False # Let it crash and stay on floor
	trunc = self.step_count >= self.max_steps

	info = {"wind": wind_force, "target": self.target}

	return self.state.astype(np.float32), reward, term, trunc, info

	def render(self):
	# We will handle rendering in the demo script using matplotlib
	pass