| import os |
| import time |
| import math |
| import numpy as np |
| import gymnasium as gym |
| from gymnasium import spaces |
| import rclpy |
| from rclpy.node import Node |
| from rclpy.qos import QoSProfile, ReliabilityPolicy |
| from geometry_msgs.msg import Twist |
| from sensor_msgs.msg import LaserScan |
| from stable_baselines3 import PPO |
| from stable_baselines3.common.callbacks import CheckpointCallback |
| from stable_baselines3.common.vec_env import SubprocVecEnv |
| import torch |
| import subprocess |
|
|
| |
| class RcCarEnv(gym.Env): |
| def __init__(self, rank=0): |
| super(RcCarEnv, self).__init__() |
| |
| self.rank = rank |
| self.robot_name = f"bot{rank + 1}" |
| |
| if not rclpy.ok(): |
| rclpy.init() |
| |
| self.node = rclpy.create_node(f'rl_agent_{self.robot_name}') |
| |
| qos_best_effort = QoSProfile(reliability=ReliabilityPolicy.BEST_EFFORT, depth=10) |
| |
| |
| |
| |
| |
| cmd_topic = f'/model/{self.robot_name}/cmd_vel' |
| scan_topic = f'/model/{self.robot_name}/scan' |
|
|
| self.pub_cmd = self.node.create_publisher(Twist, cmd_topic, 10) |
| self.sub_scan = self.node.create_subscription(LaserScan, scan_topic, self.scan_cb, qos_best_effort) |
| |
| |
| self.max_speed = 1.0 |
| self.max_steering = 0.5 |
| self.steering_smooth_factor = 0.25 |
| self.current_steering = 0.0 |
| |
| self.scan_data = None |
| self.raw_scan_data = None |
| self.scan_received = False |
| self.n_obs = 16 |
| self.max_range = 10.0 |
| self.step_count = 0 |
| self.max_steps = 2500 |
| self.stuck_counter = 0 |
| self.episode_count = 0 |
|
|
| self.action_space = spaces.Box(low=np.array([-1.0, -1.0]), high=np.array([1.0, 1.0]), dtype=np.float32) |
| self.observation_space = spaces.Box(low=0.0, high=1.0, shape=(self.n_obs + 4,), dtype=np.float32) |
| |
| print(f"✅ {self.robot_name} BAĞLANDI! Cmd: {cmd_topic} | Scan: {scan_topic}") |
|
|
| def scan_cb(self, msg): |
| raw = np.array(msg.ranges) |
| raw = np.where(np.isinf(raw), self.max_range, raw) |
| raw = np.where(np.isnan(raw), self.max_range, raw) |
| self.raw_scan_data = np.clip(raw, 0.0, self.max_range) |
| chunk = len(self.raw_scan_data) // self.n_obs |
| if chunk > 0: |
| self.scan_data = np.array([np.min(self.raw_scan_data[i*chunk:(i+1)*chunk]) for i in range(self.n_obs)], dtype=np.float32) |
| else: |
| self.scan_data = np.full(self.n_obs, self.max_range, dtype=np.float32) |
| self.scan_received = True |
|
|
| def _get_observation(self, twist): |
| if self.scan_data is not None: |
| lidar_norm = self.scan_data / self.max_range |
| min_dist = np.min(lidar_norm) |
| front_sensors = lidar_norm[self.n_obs//2-2 : self.n_obs//2+2] |
| avg_front = np.mean(front_sensors) |
| else: |
| lidar_norm = np.ones(self.n_obs, dtype=np.float32) |
| min_dist = 1.0 |
| avg_front = 1.0 |
| speed_norm = twist.linear.x / self.max_speed |
| steering_norm = (self.current_steering / self.max_steering + 1.0) / 2.0 |
| obs = np.concatenate([lidar_norm, [speed_norm, steering_norm, min_dist, avg_front]]) |
| return obs.astype(np.float32), min_dist, avg_front |
|
|
| def step(self, action): |
| twist = Twist() |
| |
| throttle_input = (float(action[0]) + 1.0) / 2.0 |
| twist.linear.x = throttle_input * self.max_speed |
| target_steering = float(action[1]) * self.max_steering |
| self.current_steering = (1.0 - self.steering_smooth_factor) * self.current_steering + \ |
| (self.steering_smooth_factor * target_steering) |
| twist.angular.z = self.current_steering |
| self.pub_cmd.publish(twist) |
| |
| |
| self.scan_received = False |
| start_wait = time.time() |
| while not self.scan_received: |
| rclpy.spin_once(self.node, timeout_sec=0.002) |
| if time.time() - start_wait > 0.1: break |
| |
| observation, min_dist, avg_front = self._get_observation(twist) |
| real_min_dist = min_dist * self.max_range |
| self.step_count += 1 |
| |
| |
| if self.raw_scan_data is not None: |
| mid_idx = len(self.raw_scan_data) // 2 |
| raw_front_sector = self.raw_scan_data[mid_idx-20 : mid_idx+20] |
| true_front_dist = np.min(raw_front_sector) if len(raw_front_sector) > 0 else self.max_range |
| else: |
| true_front_dist = real_min_dist |
| |
| ttc = true_front_dist / (twist.linear.x + 0.1) |
|
|
| |
| reward = 0.0 |
| terminated = False |
| truncated = False |
| |
| |
| if twist.linear.x > 0.1: |
| reward += twist.linear.x * (0.4 + 0.6 * avg_front) * 1.5 |
| else: |
| reward -= 0.1 |
|
|
| |
| if ttc < 1.2 and twist.linear.x > 0.3: |
| reward -= (1.2 - ttc) * 5.0 |
| |
| |
| if real_min_dist < 0.7: |
| reward -= (0.7 - real_min_dist) ** 2 * 15.0 |
|
|
| reward -= abs(self.current_steering) * 0.5 |
|
|
| |
| |
| |
| COLLISION_THRESHOLD = 0.45 |
| |
| if real_min_dist < COLLISION_THRESHOLD or true_front_dist < COLLISION_THRESHOLD: |
| reward = -100.0 |
| terminated = True |
| |
| print(f"💥 {self.robot_name} KAZA! Mesafe: {real_min_dist:.2f}m") |
| self.pub_cmd.publish(Twist()) |
| return observation, reward, terminated, truncated, {} |
|
|
| |
| if twist.linear.x < 0.1 and self.step_count > 50: |
| self.stuck_counter += 1 |
| else: |
| self.stuck_counter = 0 |
| |
| if self.stuck_counter > 50: |
| reward = -20.0 |
| terminated = True |
| print(f"💤 {self.robot_name} takıldı.") |
| return observation, reward, terminated, truncated, {} |
|
|
| if self.step_count >= self.max_steps: |
| truncated = True |
| reward += 10.0 |
| return observation, reward, terminated, truncated, {} |
|
|
| return observation, reward, terminated, truncated, {} |
|
|
| def reset(self, seed=None, options=None): |
| super().reset(seed=seed) |
| self.episode_count += 1 |
| |
| |
| stop_cmd = Twist() |
| for _ in range(3): |
| self.pub_cmd.publish(stop_cmd) |
| time.sleep(0.01) |
|
|
| |
| if self.episode_count < 20: |
| |
| if self.rank == 0: |
| x, y, yaw = 0.0, 0.0, 0.0 |
| else: |
| x, y, yaw = 0.0, -2.0, 3.14 |
| else: |
| |
| safe_spots = [(0,0), (0,-2), (0,2), (-1.5,0), (1.5,0)] |
| idx = np.random.randint(len(safe_spots)) |
| x, y = safe_spots[idx] |
| x += np.random.uniform(-0.2, 0.2) |
| y += np.random.uniform(-0.2, 0.2) |
| yaw = np.random.uniform(-3.14, 3.14) |
| |
| qw = np.cos(yaw / 2) |
| qz = np.sin(yaw / 2) |
|
|
| |
| |
| |
| |
| cmd = [ |
| 'gz', 'service', |
| '-s', '/world/arena/set_pose', |
| '--reqtype', 'gz.msgs.Pose', |
| '--reptype', 'gz.msgs.Boolean', |
| '--timeout', '2000', |
| '--req', f'name: "{self.robot_name}", position: {{x: {x}, y: {y}, z: 0.2}}, orientation: {{w: {qw}, z: {qz}}}' |
| ] |
| |
| success = False |
| for _ in range(5): |
| try: |
| |
| result = subprocess.run(cmd, capture_output=True, timeout=1.0) |
| if result.returncode == 0 and "data: true" in str(result.stdout): |
| success = True |
| break |
| except: |
| time.sleep(0.1) |
| |
| if not success: |
| |
| print(f"⚠️ KRİTİK: {self.robot_name} resetlenemedi! İsim hatası olabilir.") |
| |
| |
|
|
| time.sleep(0.1) |
| self.scan_data = None |
| self.scan_received = False |
| |
| |
| start_wait = time.time() |
| while not self.scan_received and (time.time() - start_wait) < 0.5: |
| rclpy.spin_once(self.node, timeout_sec=0.01) |
| |
| dummy_twist = Twist() |
| obs, _, _ = self._get_observation(dummy_twist) |
| |
| self.step_count = 0 |
| self.stuck_counter = 0 |
| return obs, {} |
|
|
| |
| def make_env(rank): |
| def _init(): |
| env = RcCarEnv(rank=rank) |
| return env |
| return _init |
|
|
| if __name__ == '__main__': |
| log_dir = "./logs_multi_bot/" |
| os.makedirs(log_dir, exist_ok=True) |
| |
| |
| num_cpu = 2 |
| |
| print(f"🚀 {num_cpu} ROBOT İLE PARALEL EĞİTİM BAŞLIYOR...") |
| |
| |
| |
| env = SubprocVecEnv([make_env(i) for i in range(num_cpu)]) |
| |
| model = PPO( |
| "MlpPolicy", |
| env, |
| verbose=1, |
| tensorboard_log=log_dir, |
| use_sde=True, |
| learning_rate=3e-4, |
| n_steps=2048 // num_cpu, |
| batch_size=64, |
| gamma=0.99, |
| ent_coef=0.03 |
| ) |
| |
| |
| model.learn(total_timesteps=1000000, progress_bar=True) |
| model.save("rc_car_multi_final") |
| print("✅ ÇOKLU EĞİTİM TAMAMLANDI!") |