energy-optimization-ppo / train_agent.py
Sushruth21's picture
Upload folder using huggingface_hub
e00c2a1 verified
#!/usr/bin/env python3
"""
Train an RL agent on the Energy Optimization Environment.
"""
import sys
import os
sys.path.insert(0, os.path.dirname(__file__))
# Mock the he_demo package for direct testing
import types
he_demo = types.ModuleType('he_demo')
from models import EnergyOptimizationAction, EnergyOptimizationObservation, Task, TaskSummary
he_demo.EnergyOptimizationAction = EnergyOptimizationAction
he_demo.EnergyOptimizationObservation = EnergyOptimizationObservation
he_demo.Task = Task
he_demo.TaskSummary = TaskSummary
sys.modules['he_demo'] = he_demo
sys.modules['he_demo.models'] = he_demo
from gym_wrapper import EnergyOptimizationGymEnv
from stable_baselines3 import PPO
from stable_baselines3.common.env_util import make_vec_env
def train_agent():
"""Train a PPO agent on the energy optimization environment."""
print("🚀 Training PPO Agent on Energy Optimization Environment")
print("=" * 60)
# Create vectorized environment for better training
def make_env():
return EnergyOptimizationGymEnv()
env = make_vec_env(make_env, n_envs=4)
# Create PPO agent
model = PPO(
"MlpPolicy",
env,
verbose=1,
learning_rate=3e-4,
n_steps=2048,
batch_size=64,
n_epochs=10,
gamma=0.99,
gae_lambda=0.95,
clip_range=0.2,
ent_coef=0.0,
vf_coef=0.5,
max_grad_norm=0.5,
)
# Train the agent
print("Training for 10,000 timesteps...")
model.learn(total_timesteps=10000)
# Save the trained model
model.save("energy_optimization_ppo")
print("✅ Model saved as 'energy_optimization_ppo.zip'")
# Test the trained agent
print("\n🧪 Testing trained agent...")
test_env = EnergyOptimizationGymEnv()
obs, _ = test_env.reset()
total_reward = 0
steps = 0
while steps < 50:
# Get action from trained model
action, _ = model.predict(obs, deterministic=True)
# Execute action
obs, reward, done, _, _ = test_env.step(action)
total_reward += reward
steps += 1
# Convert action back to readable format
action_type_index = int(action[0])
intensity = float(action[1])
action_types = ["reduce_ram", "optimize_energy", "balance_resources", "monitor_system"]
action_type = action_types[action_type_index]
print(f"Step {steps}: {action_type}({intensity:.1f}) -> RAM={obs[0]:.1f}%, Energy={obs[1]:.1f}kWh, Reward={reward:.2f}")
if done:
break
if __name__ == "__main__":
train_agent()