|
|
import os |
|
|
import sys |
|
|
import re |
|
|
import numpy as np |
|
|
import torch |
|
|
import matplotlib.pyplot as plt |
|
|
import pandas as pd |
|
|
import time |
|
|
from datetime import datetime |
|
|
|
|
|
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))) |
|
|
|
|
|
from solar_sys_environment import SolarSys |
|
|
from PG.trainer.pg import PGAgent |
|
|
|
|
|
def main(): |
|
|
STATE_TO_RUN = "pennsylvania" |
|
|
|
|
|
|
|
|
DATA_FILE_PATH = "/path/to/project/training/5houses_152days_TRAIN.csv" |
|
|
num_episodes = 10000 |
|
|
batch_size = 256 |
|
|
checkpoint_interval = 100000 |
|
|
window_size = 32 |
|
|
|
|
|
env = SolarSys( |
|
|
data_path=DATA_FILE_PATH, |
|
|
state=STATE_TO_RUN, |
|
|
time_freq="15T" |
|
|
) |
|
|
|
|
|
|
|
|
print("Observation space:", env.observation_space) |
|
|
print("Action space :", env.action_space) |
|
|
|
|
|
|
|
|
obs = env.reset() |
|
|
print(f"Reset returned {len(obs)} agent observations; each obs shape: {np.array(obs).shape}") |
|
|
|
|
|
|
|
|
dummy_actions = np.random.rand(env.num_agents, env.action_space.shape[1]).astype(np.float32) |
|
|
next_obs, rewards, done, info = env.step(dummy_actions) |
|
|
print(f"Step outputs → next_obs: {len(next_obs)}×{np.array(next_obs).shape[1]}, " |
|
|
f"rewards: {len(rewards)}, done: {done}") |
|
|
print("Info keys:", list(info.keys())) |
|
|
|
|
|
|
|
|
env.group_counts = { |
|
|
0: env.agent_groups.count(0), |
|
|
1: env.agent_groups.count(1) |
|
|
} |
|
|
print(f"Number of houses in each group: {env.group_counts}") |
|
|
|
|
|
max_steps = env.num_steps |
|
|
|
|
|
|
|
|
num_agents = env.num_agents |
|
|
local_state_dim = env.observation_space.shape[1] |
|
|
action_dim = env.action_space.shape[1] |
|
|
|
|
|
|
|
|
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") |
|
|
run_name = f"pg_{STATE_TO_RUN}_{num_agents}agents_{num_episodes}eps_{timestamp}" |
|
|
root_dir = os.path.join("FINALE_FINALE_FINALE", run_name) |
|
|
os.makedirs(root_dir, exist_ok=True) |
|
|
print(f"Saving training outputs to: {root_dir}") |
|
|
|
|
|
logs_dir = os.path.join(root_dir, "logs") |
|
|
plots_dir = os.path.join(root_dir, "plots") |
|
|
os.makedirs(logs_dir, exist_ok=True) |
|
|
os.makedirs(plots_dir, exist_ok=True) |
|
|
|
|
|
|
|
|
pg_agents = [ |
|
|
PGAgent( |
|
|
state_dim=local_state_dim, |
|
|
action_dim=action_dim, |
|
|
lr=2e-4, |
|
|
gamma=0.95, |
|
|
critic_loss_coef=0.5 |
|
|
) |
|
|
for _ in range(num_agents) |
|
|
] |
|
|
|
|
|
|
|
|
episode_rewards = [] |
|
|
episode_total_rewards = [] |
|
|
block_mean_rewards = [] |
|
|
block_total_rewards = [] |
|
|
|
|
|
agent_rewards_log = [[] for _ in range(num_agents)] |
|
|
best_mean_reward = -1e9 |
|
|
best_model_path = os.path.join(logs_dir, "best_model.pth") |
|
|
|
|
|
daily_rewards = [] |
|
|
monthly_rewards = [] |
|
|
|
|
|
training_start_time = time.time() |
|
|
episode_durations = [] |
|
|
total_steps_global = 0 |
|
|
episode_log_data = [] |
|
|
performance_metrics_log = [] |
|
|
|
|
|
agent_charge_log = [[] for _ in range(num_agents)] |
|
|
agent_discharge_log = [[] for _ in range(num_agents)] |
|
|
|
|
|
|
|
|
for episode in range(1, num_episodes + 1): |
|
|
episode_start_time = time.time() |
|
|
|
|
|
obs = np.array(env.reset(), dtype=np.float32) |
|
|
|
|
|
if episode > 1: |
|
|
last_episode_metrics = env.get_episode_metrics() |
|
|
last_episode_metrics['Episode'] = episode - 1 |
|
|
performance_metrics_log.append(last_episode_metrics) |
|
|
|
|
|
total_reward = np.zeros(num_agents, dtype=np.float32) |
|
|
done = False |
|
|
step_count = 0 |
|
|
day_logs = [] |
|
|
episode_charges = [[] for _ in range(num_agents)] |
|
|
episode_discharges = [[] for _ in range(num_agents)] |
|
|
|
|
|
|
|
|
while not done: |
|
|
|
|
|
actions = [] |
|
|
for i, agent in enumerate(pg_agents): |
|
|
agent_action = agent.select_action(obs[i]) |
|
|
actions.append(agent_action) |
|
|
actions = np.array(actions, dtype=np.float32) |
|
|
|
|
|
|
|
|
next_obs_list, rewards, done, info = env.step(actions) |
|
|
next_obs = np.array(next_obs_list, dtype=np.float32) |
|
|
|
|
|
|
|
|
for i, agent in enumerate(pg_agents): |
|
|
agent.rewards.append(rewards[i]) |
|
|
agent.dones.append(done) |
|
|
|
|
|
total_reward += rewards |
|
|
obs = next_obs |
|
|
step_count += 1 |
|
|
total_steps_global += 1 |
|
|
|
|
|
day_logs.append({ |
|
|
"step": step_count - 1, |
|
|
"grid_import_no_p2p": info["grid_import_no_p2p"], |
|
|
"grid_import_with_p2p": info["grid_import_with_p2p"], |
|
|
"p2p_buy": info["p2p_buy"], |
|
|
"p2p_sell": info["p2p_sell"], |
|
|
"costs": info["costs"], |
|
|
"charge_amount": info.get("charge_amount", np.zeros(num_agents)), |
|
|
"discharge_amount": info.get("discharge_amount", np.zeros(num_agents)) |
|
|
}) |
|
|
|
|
|
|
|
|
for i in range(num_agents): |
|
|
episode_charges[i].append(info["charge_amount"][i]) |
|
|
episode_discharges[i].append(info["discharge_amount"][i]) |
|
|
|
|
|
if step_count >= max_steps: |
|
|
break |
|
|
|
|
|
|
|
|
sum_ep_reward = float(np.sum(total_reward)) |
|
|
mean_ep_reward = float(np.mean(total_reward)) |
|
|
|
|
|
episode_total_rewards.append(sum_ep_reward) |
|
|
episode_rewards.append(mean_ep_reward) |
|
|
daily_rewards.append(mean_ep_reward) |
|
|
|
|
|
if len(daily_rewards) % window_size == 0: |
|
|
last_totals = episode_total_rewards[-window_size:] |
|
|
block_sum = sum(last_totals) |
|
|
block_total_rewards.append(block_sum) |
|
|
|
|
|
last_means = daily_rewards[-window_size:] |
|
|
block_mean = sum(last_means) / window_size |
|
|
block_mean_rewards.append(block_mean) |
|
|
|
|
|
block_idx = len(block_mean_rewards) |
|
|
print( |
|
|
f"→ Completed Block {block_idx} " |
|
|
f"| Episodes {(block_idx - 1) * window_size + 1}–{block_idx * window_size} " |
|
|
f"| Block Total Reward: {block_sum:.3f} " |
|
|
f"| Block Mean Reward: {block_mean:.3f}" |
|
|
) |
|
|
|
|
|
for i in range(num_agents): |
|
|
agent_rewards_log[i].append(total_reward[i]) |
|
|
agent_charge_log[i].append(np.mean(episode_charges[i])) |
|
|
agent_discharge_log[i].append(np.mean(episode_discharges[i])) |
|
|
|
|
|
steps_data = [] |
|
|
for entry in day_logs: |
|
|
steps_data.append({ |
|
|
"step": entry["step"], |
|
|
"p2p_buy_sum": float(np.sum(entry["p2p_buy"])), |
|
|
"p2p_sell_sum": float(np.sum(entry["p2p_sell"])), |
|
|
"grid_import_no_p2p_sum": float(np.sum(entry["grid_import_no_p2p"])), |
|
|
"grid_import_with_p2p_sum": float(np.sum(entry["grid_import_with_p2p"])) |
|
|
}) |
|
|
|
|
|
baseline_cost = np.sum([np.sum(entry["grid_import_no_p2p"]) * env.get_grid_price(entry["step"]) |
|
|
for entry in day_logs]) |
|
|
actual_cost = np.sum([np.sum(entry["costs"]) for entry in day_logs]) |
|
|
cost_reduction = (baseline_cost - actual_cost) / (baseline_cost + 1e-8) |
|
|
|
|
|
|
|
|
for agent in pg_agents: |
|
|
agent.update() |
|
|
|
|
|
|
|
|
if mean_ep_reward > best_mean_reward: |
|
|
best_mean_reward = mean_ep_reward |
|
|
for i, agent in enumerate(pg_agents): |
|
|
agent_path = os.path.join(logs_dir, f"best_model_agent_{i}.pth") |
|
|
agent.save(agent_path) |
|
|
|
|
|
if episode % checkpoint_interval == 0: |
|
|
for i, agent in enumerate(pg_agents): |
|
|
ckpt_path = os.path.join(logs_dir, f"checkpoint_{episode}_agent_{i}.pth") |
|
|
agent.save(ckpt_path) |
|
|
|
|
|
episode_end_time = time.time() |
|
|
episode_duration = episode_end_time - episode_start_time |
|
|
|
|
|
print( |
|
|
f"Episode {episode}/{num_episodes} " |
|
|
f"| Time per Episode: {episode_duration:.2f}s " |
|
|
f"| Steps: {step_count} " |
|
|
f"| Mean Reward: {mean_ep_reward:.3f} " |
|
|
f"| Cost Reduction: {cost_reduction:.2%}" |
|
|
) |
|
|
|
|
|
episode_log_data.append({ |
|
|
"Episode": episode, |
|
|
"Steps": step_count, |
|
|
"Mean_Reward": mean_ep_reward, |
|
|
"Total_Reward": sum_ep_reward, |
|
|
"Cost_Reduction_Pct": cost_reduction * 100, |
|
|
"Baseline_Cost": baseline_cost, |
|
|
"Actual_Cost": actual_cost, |
|
|
"Episode_Duration": episode_duration, |
|
|
"Total_Charge": np.sum([np.sum(entry["charge_amount"]) for entry in day_logs]), |
|
|
"Total_Discharge": np.sum([np.sum(entry["discharge_amount"]) for entry in day_logs]) |
|
|
}) |
|
|
|
|
|
|
|
|
if episode % 100 == 0: |
|
|
avg_reward_last_100 = np.mean(daily_rewards[-100:]) if len(daily_rewards) >= 100 else np.mean(daily_rewards) |
|
|
print(f" → Average reward (last 100 episodes): {avg_reward_last_100:.3f}") |
|
|
|
|
|
|
|
|
final_episode_metrics = env.get_episode_metrics() |
|
|
final_episode_metrics['Episode'] = num_episodes |
|
|
performance_metrics_log.append(final_episode_metrics) |
|
|
|
|
|
training_end_time = time.time() |
|
|
total_training_time = training_end_time - training_start_time |
|
|
|
|
|
|
|
|
print("\nSaving final models...") |
|
|
for i, agent in enumerate(pg_agents): |
|
|
final_path = os.path.join(logs_dir, f"final_model_agent_{i}.pth") |
|
|
agent.save(final_path) |
|
|
|
|
|
np.save(os.path.join(logs_dir, "agent_rewards.npy"), np.array(agent_rewards_log)) |
|
|
np.save(os.path.join(logs_dir, "mean_rewards.npy"), np.array(episode_rewards)) |
|
|
np.save(os.path.join(logs_dir, "total_rewards.npy"), np.array(episode_total_rewards)) |
|
|
|
|
|
|
|
|
df_rewards_log = pd.DataFrame(episode_log_data) |
|
|
df_perf_log = pd.DataFrame(performance_metrics_log) |
|
|
df_final_log = pd.merge(df_rewards_log, df_perf_log.drop(columns=[ |
|
|
'degradation_cost_over_time', |
|
|
'cost_savings_over_time', |
|
|
'grid_reduction_over_time' |
|
|
]), on="Episode") |
|
|
|
|
|
|
|
|
def moving_avg(series, window): |
|
|
return pd.Series(series).rolling(window=window, center=True, min_periods=1).mean().to_numpy() |
|
|
|
|
|
ma_window = 300 |
|
|
episodes = np.arange(1, num_episodes + 1) |
|
|
|
|
|
|
|
|
reward_ma = moving_avg(df_final_log["Mean_Reward"], ma_window) |
|
|
plt.figure(figsize=(8, 5)) |
|
|
plt.plot(episodes, reward_ma, linewidth=2, label=f"Mean Reward MA (win={ma_window})") |
|
|
plt.xlabel("Episode") |
|
|
plt.ylabel("Mean Reward") |
|
|
plt.title("PG: Mean Reward Moving Average") |
|
|
plt.legend() |
|
|
plt.grid(True) |
|
|
plt.savefig(os.path.join(plots_dir, "mean_reward_ma.png"), dpi=200) |
|
|
plt.close() |
|
|
|
|
|
|
|
|
total_ma = moving_avg(df_final_log["Total_Reward"], ma_window) |
|
|
plt.figure(figsize=(8, 5)) |
|
|
plt.plot(episodes, total_ma, linewidth=2, label=f"Total Reward MA (win={ma_window})") |
|
|
plt.xlabel("Episode") |
|
|
plt.ylabel("Total Reward") |
|
|
plt.title("PG: Total Reward Moving Average") |
|
|
plt.legend() |
|
|
plt.grid(True) |
|
|
plt.savefig(os.path.join(plots_dir, "total_reward_ma.png"), dpi=200) |
|
|
plt.close() |
|
|
|
|
|
|
|
|
cost_ma = moving_avg(df_final_log["Cost_Reduction_Pct"], ma_window) |
|
|
plt.figure(figsize=(8, 5)) |
|
|
plt.plot(episodes, cost_ma, linewidth=2, label="Cost Reduction MA (%)") |
|
|
plt.xlabel("Episode") |
|
|
plt.ylabel("Cost Reduction (%)") |
|
|
plt.title("PG: Cost Reduction Moving Average") |
|
|
plt.legend() |
|
|
plt.grid(True) |
|
|
plt.savefig(os.path.join(plots_dir, "cost_reduction_ma.png"), dpi=200) |
|
|
plt.close() |
|
|
|
|
|
|
|
|
degradation_ma = moving_avg(df_final_log["battery_degradation_cost_total"], ma_window) |
|
|
plt.figure(figsize=(8, 5)) |
|
|
plt.plot(episodes, degradation_ma, linewidth=2, label=f"Degradation Cost MA (win={ma_window})", color='purple') |
|
|
plt.xlabel("Episode") |
|
|
plt.ylabel("Total Degradation Cost ($)") |
|
|
plt.title("PG: Battery Degradation Cost Moving Average") |
|
|
plt.legend() |
|
|
plt.grid(True) |
|
|
plt.savefig(os.path.join(plots_dir, "degradation_cost_ma.png"), dpi=200) |
|
|
plt.close() |
|
|
|
|
|
print(f"\nAll moving-average plots saved to: {plots_dir}") |
|
|
|
|
|
|
|
|
total_time_row = pd.DataFrame([{ |
|
|
"Episode": "Total_Training_Time", |
|
|
"Episode_Duration": total_training_time |
|
|
}]) |
|
|
df_to_save = pd.concat([df_final_log, total_time_row], ignore_index=True) |
|
|
|
|
|
log_csv_path = os.path.join(logs_dir, "training_performance_log.csv") |
|
|
|
|
|
columns_to_save = [ |
|
|
"Episode", |
|
|
"Mean_Reward", |
|
|
"Total_Reward", |
|
|
"Cost_Reduction_Pct", |
|
|
"Episode_Duration", |
|
|
"battery_degradation_cost_total", |
|
|
] |
|
|
df_to_save = df_to_save[columns_to_save] |
|
|
|
|
|
df_to_save.to_csv(log_csv_path, index=False) |
|
|
|
|
|
print(f"Saved comprehensive training performance log to: {log_csv_path}") |
|
|
|
|
|
|
|
|
print("\n" + "="*50) |
|
|
print("TRAINING COMPLETE".center(50)) |
|
|
print(f"Total training time: {total_training_time:.2f} seconds") |
|
|
print(f"Device used: {pg_agents[0].device}") |
|
|
print("="*50) |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
main() |