SolarSys2025 commited on
Commit
3275d5c
·
verified ·
1 Parent(s): e4c4571

Delete Other_algorithms

Browse files
Files changed (30) hide show
  1. Other_algorithms/.DS_Store +0 -0
  2. Other_algorithms/Flat_System/PG/_init_.py +0 -0
  3. Other_algorithms/Flat_System/PG/pg_evaluation.py +0 -520
  4. Other_algorithms/Flat_System/PG/pg_train.py +0 -373
  5. Other_algorithms/Flat_System/PG/trainer/__init__.py +0 -0
  6. Other_algorithms/Flat_System/PG/trainer/pg.py +0 -96
  7. Other_algorithms/Flat_System/maddpg/__init__.py +0 -0
  8. Other_algorithms/Flat_System/maddpg/maddpg_evaluation.py +0 -495
  9. Other_algorithms/Flat_System/maddpg/maddpg_train.py +0 -382
  10. Other_algorithms/Flat_System/maddpg/trainer/__init__.py +0 -0
  11. Other_algorithms/Flat_System/maddpg/trainer/maddpg.py +0 -196
  12. Other_algorithms/Flat_System/mappo/_init_.py +0 -0
  13. Other_algorithms/Flat_System/mappo/mappo_evaluation.py +0 -500
  14. Other_algorithms/Flat_System/mappo/mappo_train.py +0 -439
  15. Other_algorithms/Flat_System/mappo/trainer/__init__.py +0 -0
  16. Other_algorithms/Flat_System/mappo/trainer/mappo.py +0 -243
  17. Other_algorithms/Flat_System/meanfield/_init_.py +0 -0
  18. Other_algorithms/Flat_System/meanfield/meanfield_evaluation.py +0 -492
  19. Other_algorithms/Flat_System/meanfield/meanfield_train.py +0 -386
  20. Other_algorithms/Flat_System/meanfield/trainer/__init__.py +0 -0
  21. Other_algorithms/Flat_System/meanfield/trainer/mfac.py +0 -219
  22. Other_algorithms/Flat_System/solar_sys_environment.py +0 -635
  23. Other_algorithms/HC_MAPPO/Environment/cluster_env_wrapper.py +0 -164
  24. Other_algorithms/HC_MAPPO/Environment/solar_sys_environment.py +0 -673
  25. Other_algorithms/HC_MAPPO/HC_MAPPO_evaluation.py +0 -618
  26. Other_algorithms/HC_MAPPO/HC_MAPPO_train.py +0 -579
  27. Other_algorithms/HC_MAPPO/cluster.py +0 -140
  28. Other_algorithms/HC_MAPPO/mappo/_init_.py +0 -0
  29. Other_algorithms/HC_MAPPO/mappo/trainer/__init__.py +0 -0
  30. Other_algorithms/HC_MAPPO/mappo/trainer/mappo.py +0 -199
Other_algorithms/.DS_Store DELETED
Binary file (6.15 kB)
 
Other_algorithms/Flat_System/PG/_init_.py DELETED
File without changes
Other_algorithms/Flat_System/PG/pg_evaluation.py DELETED
@@ -1,520 +0,0 @@
1
- # pg_evaluate.py
2
- import os
3
- import sys
4
- import time
5
- import re
6
- import numpy as np
7
- import pandas as pd
8
- import matplotlib.pyplot as plt
9
- import torch
10
- from datetime import datetime
11
-
12
- sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
13
-
14
- from solar_sys_environment import SolarSys
15
- from PG.trainer.pg import PGAgent
16
-
17
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
18
-
19
- def compute_jains_fairness(values: np.ndarray) -> float:
20
- if len(values) == 0:
21
- return 0.0
22
- if np.all(values == 0):
23
- return 1.0
24
- num = (values.sum())**2
25
- den = len(values) * (values**2).sum()
26
- return num / den
27
-
28
- def main():
29
- # User parameters
30
- MODEL_PATH = "/path/to/project/pg_pennsylvania_10agents_10000eps/logs"
31
- DATA_PATH = "/path/to/project/testing/10houses_30days_TEST.csv"
32
- DAYS_TO_EVALUATE = 30
33
-
34
- model_path = MODEL_PATH
35
- data_path = DATA_PATH
36
- days_to_evaluate = DAYS_TO_EVALUATE
37
- SOLAR_THRESHOLD = 0.5
38
-
39
- state_match = re.search(r"pg_(oklahoma|colorado|pennsylvania)_", model_path)
40
- if not state_match:
41
- raise ValueError(
42
- "Could not automatically detect the state (oklahoma, colorado, or pennsylvania) "
43
- "from the model path. Please ensure your model's parent folder is named correctly, "
44
- "e.g., 'pg_oklahoma_...'"
45
- )
46
- detected_state = state_match.group(1)
47
- print(f"--- Detected state: {detected_state.upper()} ---")
48
-
49
- # Env setup
50
- env = SolarSys(
51
- data_path=data_path,
52
- state=detected_state,
53
- time_freq="15T"
54
- )
55
- eval_steps = env.num_steps
56
- house_ids = env.house_ids
57
- num_agents = env.num_agents
58
-
59
- # Generate a unique eval run folder
60
- timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
61
- run_name = f"eval_pg_{num_agents}agents_{days_to_evaluate}days_{timestamp}"
62
- output_folder = os.path.join("runs_with_battery", run_name)
63
- logs_dir = os.path.join(output_folder, "logs")
64
- plots_dir = os.path.join(output_folder, "plots")
65
- for d in (logs_dir, plots_dir):
66
- os.makedirs(d, exist_ok=True)
67
- print(f"Saving evaluation outputs to: {output_folder}")
68
-
69
- local_dim = env.observation_space.shape[1]
70
- act_dim = env.action_space.shape[1]
71
-
72
- # Initialize PG agents
73
- pg_agents = []
74
- for i in range(num_agents):
75
- agent = PGAgent(
76
- state_dim=local_dim,
77
- action_dim=act_dim,
78
- lr=2e-4,
79
- gamma=0.95,
80
- )
81
-
82
- # Load individual agent model
83
- agent_model_path = os.path.join(model_path, f"best_model_agent_{i}.pth")
84
- if os.path.exists(agent_model_path):
85
- agent.load(agent_model_path)
86
- print(f"Loaded model for agent {i}")
87
- else:
88
- print(f"WARNING: Model file not found for agent {i}: {agent_model_path}")
89
- # Alternative: try loading a single model for all agents
90
- single_model_path = os.path.join(model_path, "best_model.pth")
91
- if os.path.exists(single_model_path):
92
- agent.load(single_model_path)
93
- print(f"Loaded single model for agent {i}")
94
-
95
- agent.model.to(device).eval()
96
- pg_agents.append(agent)
97
-
98
- # Prepare logs
99
- all_logs = []
100
- daily_summaries = []
101
- step_timing_list = []
102
-
103
- evaluation_start = time.time()
104
-
105
- for day_idx in range(days_to_evaluate):
106
- obs = env.reset()
107
- done = False
108
- step_count = 0
109
- day_logs = []
110
-
111
- while not done:
112
- step_start_time = time.time()
113
-
114
- # Select actions with PG
115
- actions = []
116
- with torch.no_grad():
117
- for i in range(num_agents):
118
- # Convert observation to tensor and move to device
119
- state = torch.FloatTensor(obs[i]).unsqueeze(0).to(device)
120
-
121
- # Get action from actor network
122
- mean, log_std, _ = pg_agents[i].model(state)
123
-
124
- # For evaluation, use mean action (deterministic)
125
- action = mean.squeeze(0).cpu().numpy()
126
-
127
- # Clip to [0, 1] range
128
- action = np.clip(action, 0.0, 1.0)
129
- actions.append(action)
130
-
131
- actions = np.array(actions, dtype=np.float32)
132
-
133
- next_obs, rewards, done, info = env.step(actions)
134
-
135
- # Consolidated Logging
136
- step_end_time = time.time()
137
- step_duration = step_end_time - step_start_time
138
-
139
- print(f"[Day {day_idx+1}, Step {step_count}] Step time: {step_duration:.6f} seconds")
140
-
141
- step_timing_list.append({
142
- "day": day_idx + 1,
143
- "step": step_count,
144
- "step_time_s": step_duration
145
- })
146
-
147
- grid_price_now = env.get_grid_price(step_count)
148
- peer_price_now = info.get("peer_price", env.get_peer_price(step_count,
149
- float(info["p2p_sell"].sum()),
150
- float(info["p2p_buy"].sum())))
151
-
152
- for i, hid in enumerate(house_ids):
153
- is_battery_house = hid in env.batteries
154
- p2p_buy = float(info["p2p_buy"][i])
155
- p2p_sell = float(info["p2p_sell"][i])
156
- charge_amount = float(info.get("charge_amount", [0]*num_agents)[i])
157
- discharge_amount = float(info.get("discharge_amount", [0]*num_agents)[i])
158
-
159
- day_logs.append({
160
- "day": day_idx + 1,
161
- "step": step_count,
162
- "house": hid,
163
- "grid_import_no_p2p": float(info["grid_import_no_p2p"][i]),
164
- "grid_import_with_p2p": float(info["grid_import_with_p2p"][i]),
165
- "grid_export": float(info.get("grid_export", [0]*num_agents)[i]),
166
- "p2p_buy": p2p_buy,
167
- "p2p_sell": p2p_sell,
168
- "actual_cost": float(info["costs"][i]),
169
- "baseline_cost": float(info["grid_import_no_p2p"][i]) * grid_price_now,
170
- "total_demand": float(env.demands[hid][step_count]),
171
- "total_solar": float(env.solars[hid][step_count]),
172
- "grid_price": grid_price_now,
173
- "peer_price": peer_price_now,
174
- "soc": (env.batteries[hid]["soc"] / env.batteries[hid]["max_capacity"]) if is_battery_house else np.nan,
175
- "degradation_cost": ((charge_amount + discharge_amount) * env.batteries[hid]["degradation_cost_per_kwh"]) if is_battery_house else 0.0,
176
- "reward": float(rewards[i]),
177
- })
178
-
179
- obs = next_obs
180
- step_count += 1
181
- if step_count >= eval_steps:
182
- break
183
-
184
- day_df = pd.DataFrame(day_logs)
185
- all_logs.extend(day_logs)
186
-
187
- # Consolidated daily summary calculation
188
- grouped_house = day_df.groupby("house").sum(numeric_only=True)
189
- grouped_step = day_df.groupby("step").sum(numeric_only=True)
190
-
191
- total_demand = grouped_step["total_demand"].sum()
192
- total_solar = grouped_step["total_solar"].sum()
193
- total_p2p_buy = grouped_house["p2p_buy"].sum()
194
- total_p2p_sell = grouped_house["p2p_sell"].sum()
195
-
196
- baseline_cost_per_house = grouped_house["baseline_cost"]
197
- actual_cost_per_house = grouped_house["actual_cost"]
198
- cost_savings_per_house = baseline_cost_per_house - actual_cost_per_house
199
- day_total_cost_savings = cost_savings_per_house.sum()
200
-
201
- if baseline_cost_per_house.sum() > 0:
202
- overall_cost_savings_pct = day_total_cost_savings / baseline_cost_per_house.sum()
203
- else:
204
- overall_cost_savings_pct = 0.0
205
-
206
- baseline_import_per_house = grouped_house["grid_import_no_p2p"]
207
- actual_import_per_house = grouped_house["grid_import_with_p2p"]
208
- import_reduction_per_house = baseline_import_per_house - actual_import_per_house
209
- day_total_import_reduction = import_reduction_per_house.sum()
210
-
211
- if baseline_import_per_house.sum() > 0:
212
- overall_import_reduction_pct = day_total_import_reduction / baseline_import_per_house.sum()
213
- else:
214
- overall_import_reduction_pct = 0.0
215
-
216
- fairness_cost_savings = compute_jains_fairness(cost_savings_per_house.values)
217
- fairness_import_reduction = compute_jains_fairness(import_reduction_per_house.values)
218
- fairness_rewards = compute_jains_fairness(grouped_house["reward"].values)
219
- fairness_p2p_buy = compute_jains_fairness(grouped_house["p2p_buy"].values)
220
- fairness_p2p_sell = compute_jains_fairness(grouped_house["p2p_sell"].values)
221
- fairness_p2p_total = compute_jains_fairness((grouped_house["p2p_buy"] + grouped_house["p2p_sell"]).values)
222
- day_total_degradation_cost = grouped_house["degradation_cost"].sum()
223
-
224
- daily_summaries.append({
225
- "day": day_idx + 1,
226
- "day_total_demand": total_demand,
227
- "day_total_solar": total_solar,
228
- "day_p2p_buy": total_p2p_buy,
229
- "day_p2p_sell": total_p2p_sell,
230
- "cost_savings_abs": day_total_cost_savings,
231
- "cost_savings_pct": overall_cost_savings_pct,
232
- "fairness_cost_savings": fairness_cost_savings,
233
- "grid_reduction_abs": day_total_import_reduction,
234
- "grid_reduction_pct": overall_import_reduction_pct,
235
- "fairness_grid_reduction": fairness_import_reduction,
236
- "fairness_reward": fairness_rewards,
237
- "fairness_p2p_buy": fairness_p2p_buy,
238
- "fairness_p2p_sell": fairness_p2p_sell,
239
- "fairness_p2p_total": fairness_p2p_total,
240
- "total_degradation_cost": day_total_degradation_cost
241
- })
242
-
243
- # Final processing and saving
244
- evaluation_end = time.time()
245
- total_eval_time = evaluation_end - evaluation_start
246
- print(f"\nEvaluation loop finished. Total time: {total_eval_time:.2f} seconds.")
247
- print(f"Device used: {device}")
248
-
249
- all_days_df = pd.DataFrame(all_logs)
250
- combined_csv_path = os.path.join(logs_dir, "step_logs_all_days.csv")
251
- all_days_df.to_csv(combined_csv_path, index=False)
252
- print(f"Saved combined step-level logs to: {combined_csv_path}")
253
-
254
- step_timing_df = pd.DataFrame(step_timing_list)
255
- timing_csv_path = os.path.join(logs_dir, "step_timing_log.csv")
256
- step_timing_df.to_csv(timing_csv_path, index=False)
257
- print(f"Saved step timing logs to: {timing_csv_path}")
258
-
259
- house_level_df = all_days_df.groupby("house").agg({
260
- "baseline_cost": "sum",
261
- "actual_cost": "sum",
262
- "grid_import_no_p2p": "sum",
263
- "grid_import_with_p2p": "sum",
264
- "degradation_cost": "sum"
265
- })
266
- house_level_df["cost_savings"] = house_level_df["baseline_cost"] - house_level_df["actual_cost"]
267
- house_level_df["import_reduction"] = house_level_df["grid_import_no_p2p"] - house_level_df["grid_import_with_p2p"]
268
-
269
- house_summary_csv = os.path.join(logs_dir, "summary_per_house.csv")
270
- house_level_df.to_csv(house_summary_csv)
271
- print(f"Saved final summary per house to: {house_summary_csv}")
272
-
273
- fairness_grid_all = compute_jains_fairness(house_level_df["import_reduction"].values)
274
- fairness_cost_all = compute_jains_fairness(house_level_df["cost_savings"].values)
275
-
276
- daily_summary_df = pd.DataFrame(daily_summaries)
277
-
278
- total_cost_savings_all = daily_summary_df["cost_savings_abs"].sum()
279
- total_baseline_cost_all = all_days_df.groupby('day')['baseline_cost'].sum().sum()
280
- pct_cost_savings_all = total_cost_savings_all / total_baseline_cost_all if total_baseline_cost_all > 0 else 0.0
281
-
282
- total_grid_reduction_all = daily_summary_df["grid_reduction_abs"].sum()
283
- total_baseline_import_all = all_days_df.groupby('day')['grid_import_no_p2p'].sum().sum()
284
- pct_grid_reduction_all = total_grid_reduction_all / total_baseline_import_all if total_baseline_import_all > 0 else 0.0
285
-
286
- total_degradation_cost_all = daily_summary_df["total_degradation_cost"].sum()
287
-
288
- # Calculate alternative performance metrics
289
-
290
- # Grid Reduction During Solar Hours
291
- agg_solar_per_step = all_days_df.groupby(['day', 'step'])['total_solar'].sum()
292
- sunny_steps_mask = agg_solar_per_step > (SOLAR_THRESHOLD * num_agents)
293
- sunny_df = all_days_df.set_index(['day', 'step'])[sunny_steps_mask].reset_index()
294
- baseline_import_sunny = sunny_df['grid_import_no_p2p'].sum()
295
- actual_import_sunny = sunny_df['grid_import_with_p2p'].sum()
296
- grid_reduction_sunny_pct = 0.0
297
- if baseline_import_sunny > 0:
298
- grid_reduction_sunny_pct = (baseline_import_sunny - actual_import_sunny) / baseline_import_sunny
299
-
300
- # Community Sourcing Rate
301
- total_p2p_buy = all_days_df['p2p_buy'].sum()
302
- total_actual_grid_import = all_days_df['grid_import_with_p2p'].sum()
303
- total_procured_energy = total_p2p_buy + total_actual_grid_import
304
- community_sourcing_rate_pct = 0.0
305
- if total_procured_energy > 0:
306
- community_sourcing_rate_pct = total_p2p_buy / total_procured_energy
307
-
308
- # Solar Sharing Efficiency
309
- total_p2p_sell = all_days_df['p2p_sell'].sum()
310
- total_grid_export = all_days_df['grid_export'].sum()
311
- total_excess_solar = total_p2p_sell + total_grid_export
312
- solar_sharing_efficiency_pct = 0.0
313
- if total_excess_solar > 0:
314
- solar_sharing_efficiency_pct = total_p2p_sell / total_excess_solar
315
-
316
- baseline_cost_sunny = sunny_df['baseline_cost'].sum()
317
- actual_cost_sunny = sunny_df['actual_cost'].sum()
318
- cost_savings_sunny_pct = (baseline_cost_sunny - actual_cost_sunny) / baseline_cost_sunny if baseline_cost_sunny > 0 else 0.0
319
-
320
- total_p2p_buy = all_days_df['p2p_buy'].sum()
321
- total_actual_grid_import = all_days_df['grid_import_with_p2p'].sum()
322
- community_sourcing_rate_pct = total_p2p_buy / (total_p2p_buy + total_actual_grid_import) if (total_p2p_buy + total_actual_grid_import) > 0 else 0.0
323
-
324
- total_p2p_sell = all_days_df['p2p_sell'].sum()
325
- total_grid_export = all_days_df['grid_export'].sum()
326
- solar_sharing_efficiency_pct = total_p2p_sell / (total_p2p_sell + total_grid_export) if (total_p2p_sell + total_grid_export) > 0 else 0.0
327
-
328
- final_row = {
329
- "day": "ALL_DAYS_SUMMARY",
330
- "cost_savings_abs": total_cost_savings_all,
331
- "cost_savings_pct": pct_cost_savings_all,
332
- "grid_reduction_abs": total_grid_reduction_all,
333
- "grid_reduction_pct": pct_grid_reduction_all,
334
- "fairness_cost_savings": fairness_cost_all,
335
- "fairness_grid_reduction": fairness_grid_all,
336
- "total_degradation_cost": total_degradation_cost_all,
337
- "grid_reduction_sunny_hours_pct": grid_reduction_sunny_pct,
338
- "community_sourcing_rate_pct": community_sourcing_rate_pct,
339
- "solar_sharing_efficiency_pct": solar_sharing_efficiency_pct,
340
- }
341
-
342
- for col in daily_summary_df.columns:
343
- if col not in final_row:
344
- final_row[col] = np.nan
345
- final_row_df = pd.DataFrame([final_row])
346
-
347
- daily_summary_df = pd.concat([daily_summary_df, final_row_df], ignore_index=True)
348
- summary_csv = os.path.join(logs_dir, "summary_per_day.csv")
349
- daily_summary_df.to_csv(summary_csv, index=False)
350
- print(f"Saved day-level summary with final multi-day row to: {summary_csv}")
351
-
352
- # Final summary printout
353
- print("\n================== EVALUATION SUMMARY ==================")
354
- print(f"Evaluation finished for {days_to_evaluate} days.\n")
355
-
356
- print("--- Standard Metrics (24-Hour Average) ---")
357
- print(f"Total grid reduction: {total_grid_reduction_all:.2f} kWh ({pct_grid_reduction_all:.2%})")
358
- print(f"Total cost savings: ${total_cost_savings_all:.2f} ({pct_cost_savings_all:.2%})")
359
- print(f"Jain's fairness on grid reduction: {fairness_grid_all:.3f}")
360
- print(f"Jain's fairness on cost savings: {fairness_cost_all:.3f}\n")
361
-
362
- print("--- Alternative Metrics (Highlighting Peak Performance) ---")
363
- print(f"Grid reduction during solar hours: {grid_reduction_sunny_pct:.2%}")
364
- print(f"Cost savings during solar hours: {cost_savings_sunny_pct:.2%}")
365
- print(f"Community sourcing rate: {community_sourcing_rate_pct:.2%}")
366
- print(f"Solar sharing efficiency: {solar_sharing_efficiency_pct:.2%}")
367
-
368
- print("=========================================================")
369
-
370
- # Plots
371
- plot_daily_df = daily_summary_df[daily_summary_df["day"] != "ALL_DAYS_SUMMARY"].copy()
372
- plot_daily_df["day"] = plot_daily_df["day"].astype(int)
373
-
374
- # Daily Cost Savings Percentage
375
- plt.figure(figsize=(12, 6))
376
- plt.bar(plot_daily_df["day"], plot_daily_df["cost_savings_pct"] * 100, color='skyblue')
377
- plt.xlabel("Day")
378
- plt.ylabel("Cost Savings (%)")
379
- plt.title("Daily Community Cost Savings Percentage")
380
- plt.xticks(plot_daily_df["day"])
381
- plt.grid(axis='y', linestyle='--', alpha=0.7)
382
- plt.savefig(os.path.join(plots_dir, "daily_cost_savings_percentage.png"))
383
- plt.close()
384
-
385
- # Daily Total Demand vs. Solar
386
- plt.figure(figsize=(12, 6))
387
- bar_width = 0.4
388
- days = plot_daily_df["day"]
389
- plt.bar(days - bar_width/2, plot_daily_df["day_total_demand"], width=bar_width, label="Total Demand", color='coral')
390
- plt.bar(days + bar_width/2, plot_daily_df["day_total_solar"], width=bar_width, label="Total Solar Generation", color='gold')
391
- plt.xlabel("Day")
392
- plt.ylabel("Energy (kWh)")
393
- plt.title("Total Community Demand vs. Solar Generation Per Day")
394
- plt.xticks(days)
395
- plt.legend()
396
- plt.grid(axis='y', linestyle='--', alpha=0.7)
397
- plt.savefig(os.path.join(plots_dir, "daily_demand_vs_solar.png"))
398
- plt.close()
399
-
400
- # Combined Time Series of Energy Flows
401
- step_group = all_days_df.groupby(["day", "step"]).sum(numeric_only=True).reset_index()
402
- step_group["global_step"] = (step_group["day"] - 1) * env.num_steps + step_group["step"]
403
-
404
- fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(15, 12), sharex=True)
405
-
406
- # Subplot 1: Grid Import vs P2P Buy
407
- ax1.plot(step_group["global_step"], step_group["grid_import_with_p2p"], label="Grid Import (with P2P)", color='r')
408
- ax1.plot(step_group["global_step"], step_group["p2p_buy"], label="P2P Buy", color='g')
409
- ax1.set_ylabel("Energy (kWh)")
410
- ax1.set_title("Community Energy Consumption: Grid Import vs. P2P Buy")
411
- ax1.legend()
412
- ax1.grid(True, linestyle='--', alpha=0.6)
413
-
414
- # Subplot 2: Grid Export vs P2P Sell
415
- ax2.plot(step_group["global_step"], step_group["grid_export"], label="Grid Export", color='orange')
416
- ax2.plot(step_group["global_step"], step_group["p2p_sell"], label="P2P Sell", color='b')
417
- ax2.set_xlabel("Global Timestep")
418
- ax2.set_ylabel("Energy (kWh)")
419
- ax2.set_title("Community Energy Generation: Grid Export vs. P2P Sell")
420
- ax2.legend()
421
- ax2.grid(True, linestyle='--', alpha=0.6)
422
-
423
- plt.tight_layout()
424
- plt.savefig(os.path.join(plots_dir, "combined_energy_flows_timeseries.png"))
425
- plt.close()
426
-
427
- # Stacked Bar of Daily Energy Sources
428
- daily_agg = all_days_df.groupby("day").sum(numeric_only=True)
429
-
430
- plt.figure(figsize=(12, 7))
431
- plt.bar(daily_agg.index, daily_agg["grid_import_with_p2p"], label="Grid Import (with P2P)", color='crimson')
432
- plt.bar(daily_agg.index, daily_agg["p2p_buy"], bottom=daily_agg["grid_import_with_p2p"], label="P2P Buy", color='limegreen')
433
- plt.plot(daily_agg.index, daily_agg["grid_import_no_p2p"], label="Baseline Grid Import (No P2P)", color='blue', linestyle='--', marker='o')
434
-
435
- plt.xlabel("Day")
436
- plt.ylabel("Energy (kWh)")
437
- plt.title("Daily Energy Procurement: Baseline vs. P2P+Grid")
438
- plt.xticks(daily_agg.index)
439
- plt.legend()
440
- plt.grid(axis='y', linestyle='--', alpha=0.7)
441
- plt.savefig(os.path.join(plots_dir, "daily_energy_procurement_stacked.png"))
442
- plt.close()
443
-
444
- # Fairness Metrics Over Time
445
- plt.figure(figsize=(12, 6))
446
- plt.plot(plot_daily_df["day"], plot_daily_df["fairness_cost_savings"], label="Cost Savings Fairness", marker='o')
447
- plt.plot(plot_daily_df["day"], plot_daily_df["fairness_grid_reduction"], label="Grid Reduction Fairness", marker='s')
448
- plt.plot(plot_daily_df["day"], plot_daily_df["fairness_reward"], label="Reward Fairness", marker='^')
449
- plt.xlabel("Day")
450
- plt.ylabel("Jain's Fairness Index")
451
- plt.title("Daily Fairness Metrics")
452
- plt.xticks(plot_daily_df["day"])
453
- plt.ylim(0, 1.05)
454
- plt.legend()
455
- plt.grid(True, linestyle='--', alpha=0.7)
456
- plt.savefig(os.path.join(plots_dir, "daily_fairness_metrics.png"))
457
- plt.close()
458
-
459
- # Per-House Savings and Reductions
460
- fig, ax1 = plt.subplots(figsize=(15, 7))
461
-
462
- house_ids_str = house_level_df.index.astype(str)
463
- bar_width = 0.4
464
- index = np.arange(len(house_ids_str))
465
-
466
- color1 = 'tab:green'
467
- ax1.set_xlabel('House ID')
468
- ax1.set_ylabel('Total Cost Savings ($)', color=color1)
469
- ax1.bar(index - bar_width/2, house_level_df["cost_savings"], bar_width, label='Cost Savings', color=color1)
470
- ax1.tick_params(axis='y', labelcolor=color1)
471
- ax1.set_xticks(index)
472
- ax1.set_xticklabels(house_ids_str, rotation=45, ha="right")
473
-
474
- ax2 = ax1.twinx()
475
- color2 = 'tab:blue'
476
- ax2.set_ylabel('Total Grid Import Reduction (kWh)', color=color2)
477
- ax2.bar(index + bar_width/2, house_level_df["import_reduction"], bar_width, label='Import Reduction', color=color2)
478
- ax2.tick_params(axis='y', labelcolor=color2)
479
-
480
- plt.title(f'Total Cost Savings & Grid Import Reduction Per House (over {days_to_evaluate} days)')
481
- fig.tight_layout()
482
- plt.savefig(os.path.join(plots_dir, "per_house_summary.png"))
483
- plt.close()
484
-
485
- # Price Dynamics for a Single Day
486
- day1_prices = all_days_df[all_days_df['day'] == 1][['step', 'grid_price', 'peer_price']].drop_duplicates()
487
- plt.figure(figsize=(12, 6))
488
- plt.plot(day1_prices['step'], day1_prices['grid_price'], label='Grid Price', color='darkorange')
489
- plt.plot(day1_prices['step'], day1_prices['peer_price'], label='P2P Price', color='teal')
490
- plt.xlabel("Timestep of Day")
491
- plt.ylabel("Price ($/kWh)")
492
- plt.title("Price Dynamics on Day 1")
493
- plt.legend()
494
- plt.grid(True, linestyle='--', alpha=0.6)
495
- plt.savefig(os.path.join(plots_dir, "price_dynamics_day1.png"))
496
- plt.close()
497
-
498
- # Battery State of Charge for Sample Houses
499
- day1_df = all_days_df[all_days_df['day'] == 1]
500
- battery_houses = day1_df.dropna(subset=['soc'])['house'].unique()
501
-
502
- if len(battery_houses) > 0:
503
- sample_houses = battery_houses[:min(4, len(battery_houses))]
504
- plt.figure(figsize=(12, 6))
505
- for house in sample_houses:
506
- house_df = day1_df[day1_df['house'] == house]
507
- plt.plot(house_df['step'], house_df['soc'] * 100, label=f'House {house}')
508
-
509
- plt.xlabel("Timestep of Day")
510
- plt.ylabel("State of Charge (%)")
511
- plt.title("Battery SoC on Day 1 for Sample Houses")
512
- plt.legend()
513
- plt.grid(True, linestyle='--', alpha=0.6)
514
- plt.savefig(os.path.join(plots_dir, "soc_dynamics_day1.png"))
515
- plt.close()
516
-
517
- print("All plots have been generated and saved. Evaluation complete.")
518
-
519
- if __name__ == "__main__":
520
- main()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
Other_algorithms/Flat_System/PG/pg_train.py DELETED
@@ -1,373 +0,0 @@
1
- import os
2
- import sys
3
- import re
4
- import numpy as np
5
- import torch
6
- import matplotlib.pyplot as plt
7
- import pandas as pd
8
- import time
9
- from datetime import datetime
10
-
11
- sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
12
-
13
- from solar_sys_environment import SolarSys
14
- from PG.trainer.pg import PGAgent
15
-
16
- def main():
17
- STATE_TO_RUN = "pennsylvania" # "pennsylvania" or "colorado" or "oklahoma"
18
-
19
- # Set the path to your training data
20
- DATA_FILE_PATH = "/path/to/project/training/5houses_152days_TRAIN.csv"
21
- num_episodes = 10000
22
- batch_size = 256
23
- checkpoint_interval = 100000
24
- window_size = 32
25
-
26
- env = SolarSys(
27
- data_path=DATA_FILE_PATH,
28
- state=STATE_TO_RUN,
29
- time_freq="15T"
30
- )
31
-
32
- # Sanity check: env I/O shapes
33
- print("Observation space:", env.observation_space)
34
- print("Action space :", env.action_space)
35
-
36
- # Reset and inspect obs
37
- obs = env.reset()
38
- print(f"Reset returned {len(obs)} agent observations; each obs shape: {np.array(obs).shape}")
39
-
40
- # Sample random actions and do one step
41
- dummy_actions = np.random.rand(env.num_agents, env.action_space.shape[1]).astype(np.float32)
42
- next_obs, rewards, done, info = env.step(dummy_actions)
43
- print(f"Step outputs → next_obs: {len(next_obs)}×{np.array(next_obs).shape[1]}, "
44
- f"rewards: {len(rewards)}, done: {done}")
45
- print("Info keys:", list(info.keys()))
46
-
47
- # Count the number of houses in each group
48
- env.group_counts = {
49
- 0: env.agent_groups.count(0),
50
- 1: env.agent_groups.count(1)
51
- }
52
- print(f"Number of houses in each group: {env.group_counts}")
53
-
54
- max_steps = env.num_steps
55
-
56
- # Dims from the env
57
- num_agents = env.num_agents
58
- local_state_dim = env.observation_space.shape[1]
59
- action_dim = env.action_space.shape[1]
60
-
61
- # Build a unique run directory
62
- timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
63
- run_name = f"pg_{STATE_TO_RUN}_{num_agents}agents_{num_episodes}eps_{timestamp}"
64
- root_dir = os.path.join("FINALE_FINALE_FINALE", run_name)
65
- os.makedirs(root_dir, exist_ok=True)
66
- print(f"Saving training outputs to: {root_dir}")
67
-
68
- logs_dir = os.path.join(root_dir, "logs")
69
- plots_dir = os.path.join(root_dir, "plots")
70
- os.makedirs(logs_dir, exist_ok=True)
71
- os.makedirs(plots_dir, exist_ok=True)
72
-
73
- # Create PG agents with use_baseline parameter
74
- pg_agents = [
75
- PGAgent(
76
- state_dim=local_state_dim,
77
- action_dim=action_dim,
78
- lr=2e-4,
79
- gamma=0.95,
80
- critic_loss_coef=0.5
81
- )
82
- for _ in range(num_agents)
83
- ]
84
-
85
- # Tracking / Logging Variables
86
- episode_rewards = []
87
- episode_total_rewards = []
88
- block_mean_rewards = []
89
- block_total_rewards = []
90
-
91
- agent_rewards_log = [[] for _ in range(num_agents)]
92
- best_mean_reward = -1e9
93
- best_model_path = os.path.join(logs_dir, "best_model.pth")
94
-
95
- daily_rewards = []
96
- monthly_rewards = []
97
-
98
- training_start_time = time.time()
99
- episode_durations = []
100
- total_steps_global = 0
101
- episode_log_data = []
102
- performance_metrics_log = []
103
-
104
- agent_charge_log = [[] for _ in range(num_agents)]
105
- agent_discharge_log = [[] for _ in range(num_agents)]
106
-
107
- # Training Loop
108
- for episode in range(1, num_episodes + 1):
109
- episode_start_time = time.time()
110
-
111
- obs = np.array(env.reset(), dtype=np.float32)
112
-
113
- if episode > 1:
114
- last_episode_metrics = env.get_episode_metrics()
115
- last_episode_metrics['Episode'] = episode - 1
116
- performance_metrics_log.append(last_episode_metrics)
117
-
118
- total_reward = np.zeros(num_agents, dtype=np.float32)
119
- done = False
120
- step_count = 0
121
- day_logs = []
122
- episode_charges = [[] for _ in range(num_agents)]
123
- episode_discharges = [[] for _ in range(num_agents)]
124
-
125
- # Main training loop for a single episode
126
- while not done:
127
- # Action Selection: Each PG agent acts independently
128
- actions = []
129
- for i, agent in enumerate(pg_agents):
130
- agent_action = agent.select_action(obs[i])
131
- actions.append(agent_action)
132
- actions = np.array(actions, dtype=np.float32)
133
-
134
- # Step the environment
135
- next_obs_list, rewards, done, info = env.step(actions)
136
- next_obs = np.array(next_obs_list, dtype=np.float32)
137
-
138
- # Store Rewards: Each agent stores its own reward
139
- for i, agent in enumerate(pg_agents):
140
- agent.rewards.append(rewards[i])
141
- agent.dones.append(done)
142
-
143
- total_reward += rewards
144
- obs = next_obs
145
- step_count += 1
146
- total_steps_global += 1
147
-
148
- day_logs.append({
149
- "step": step_count - 1,
150
- "grid_import_no_p2p": info["grid_import_no_p2p"],
151
- "grid_import_with_p2p": info["grid_import_with_p2p"],
152
- "p2p_buy": info["p2p_buy"],
153
- "p2p_sell": info["p2p_sell"],
154
- "costs": info["costs"],
155
- "charge_amount": info.get("charge_amount", np.zeros(num_agents)),
156
- "discharge_amount": info.get("discharge_amount", np.zeros(num_agents))
157
- })
158
-
159
- # Track actual charge/discharge actions from the environment
160
- for i in range(num_agents):
161
- episode_charges[i].append(info["charge_amount"][i])
162
- episode_discharges[i].append(info["discharge_amount"][i])
163
-
164
- if step_count >= max_steps:
165
- break
166
-
167
- # After each episode
168
- sum_ep_reward = float(np.sum(total_reward))
169
- mean_ep_reward = float(np.mean(total_reward))
170
-
171
- episode_total_rewards.append(sum_ep_reward)
172
- episode_rewards.append(mean_ep_reward)
173
- daily_rewards.append(mean_ep_reward)
174
-
175
- if len(daily_rewards) % window_size == 0:
176
- last_totals = episode_total_rewards[-window_size:]
177
- block_sum = sum(last_totals)
178
- block_total_rewards.append(block_sum)
179
-
180
- last_means = daily_rewards[-window_size:]
181
- block_mean = sum(last_means) / window_size
182
- block_mean_rewards.append(block_mean)
183
-
184
- block_idx = len(block_mean_rewards)
185
- print(
186
- f"→ Completed Block {block_idx} "
187
- f"| Episodes {(block_idx - 1) * window_size + 1}–{block_idx * window_size} "
188
- f"| Block Total Reward: {block_sum:.3f} "
189
- f"| Block Mean Reward: {block_mean:.3f}"
190
- )
191
-
192
- for i in range(num_agents):
193
- agent_rewards_log[i].append(total_reward[i])
194
- agent_charge_log[i].append(np.mean(episode_charges[i]))
195
- agent_discharge_log[i].append(np.mean(episode_discharges[i]))
196
-
197
- steps_data = []
198
- for entry in day_logs:
199
- steps_data.append({
200
- "step": entry["step"],
201
- "p2p_buy_sum": float(np.sum(entry["p2p_buy"])),
202
- "p2p_sell_sum": float(np.sum(entry["p2p_sell"])),
203
- "grid_import_no_p2p_sum": float(np.sum(entry["grid_import_no_p2p"])),
204
- "grid_import_with_p2p_sum": float(np.sum(entry["grid_import_with_p2p"]))
205
- })
206
-
207
- baseline_cost = np.sum([np.sum(entry["grid_import_no_p2p"]) * env.get_grid_price(entry["step"])
208
- for entry in day_logs])
209
- actual_cost = np.sum([np.sum(entry["costs"]) for entry in day_logs])
210
- cost_reduction = (baseline_cost - actual_cost) / (baseline_cost + 1e-8)
211
-
212
- # UPDATE STEP: Update each PG agent independently
213
- for agent in pg_agents:
214
- agent.update()
215
-
216
- # Save best models
217
- if mean_ep_reward > best_mean_reward:
218
- best_mean_reward = mean_ep_reward
219
- for i, agent in enumerate(pg_agents):
220
- agent_path = os.path.join(logs_dir, f"best_model_agent_{i}.pth")
221
- agent.save(agent_path)
222
-
223
- if episode % checkpoint_interval == 0:
224
- for i, agent in enumerate(pg_agents):
225
- ckpt_path = os.path.join(logs_dir, f"checkpoint_{episode}_agent_{i}.pth")
226
- agent.save(ckpt_path)
227
-
228
- episode_end_time = time.time()
229
- episode_duration = episode_end_time - episode_start_time
230
-
231
- print(
232
- f"Episode {episode}/{num_episodes} "
233
- f"| Time per Episode: {episode_duration:.2f}s "
234
- f"| Steps: {step_count} "
235
- f"| Mean Reward: {mean_ep_reward:.3f} "
236
- f"| Cost Reduction: {cost_reduction:.2%}"
237
- )
238
-
239
- episode_log_data.append({
240
- "Episode": episode,
241
- "Steps": step_count,
242
- "Mean_Reward": mean_ep_reward,
243
- "Total_Reward": sum_ep_reward,
244
- "Cost_Reduction_Pct": cost_reduction * 100,
245
- "Baseline_Cost": baseline_cost,
246
- "Actual_Cost": actual_cost,
247
- "Episode_Duration": episode_duration,
248
- "Total_Charge": np.sum([np.sum(entry["charge_amount"]) for entry in day_logs]),
249
- "Total_Discharge": np.sum([np.sum(entry["discharge_amount"]) for entry in day_logs])
250
- })
251
-
252
- # Periodic performance logging
253
- if episode % 100 == 0:
254
- avg_reward_last_100 = np.mean(daily_rewards[-100:]) if len(daily_rewards) >= 100 else np.mean(daily_rewards)
255
- print(f" → Average reward (last 100 episodes): {avg_reward_last_100:.3f}")
256
-
257
- # Final episode metrics
258
- final_episode_metrics = env.get_episode_metrics()
259
- final_episode_metrics['Episode'] = num_episodes
260
- performance_metrics_log.append(final_episode_metrics)
261
-
262
- training_end_time = time.time()
263
- total_training_time = training_end_time - training_start_time
264
-
265
- # Save final models
266
- print("\nSaving final models...")
267
- for i, agent in enumerate(pg_agents):
268
- final_path = os.path.join(logs_dir, f"final_model_agent_{i}.pth")
269
- agent.save(final_path)
270
-
271
- np.save(os.path.join(logs_dir, "agent_rewards.npy"), np.array(agent_rewards_log))
272
- np.save(os.path.join(logs_dir, "mean_rewards.npy"), np.array(episode_rewards))
273
- np.save(os.path.join(logs_dir, "total_rewards.npy"), np.array(episode_total_rewards))
274
-
275
- # Create DataFrames
276
- df_rewards_log = pd.DataFrame(episode_log_data)
277
- df_perf_log = pd.DataFrame(performance_metrics_log)
278
- df_final_log = pd.merge(df_rewards_log, df_perf_log.drop(columns=[
279
- 'degradation_cost_over_time',
280
- 'cost_savings_over_time',
281
- 'grid_reduction_over_time'
282
- ]), on="Episode")
283
-
284
- # Helper: centered moving average
285
- def moving_avg(series, window):
286
- return pd.Series(series).rolling(window=window, center=True, min_periods=1).mean().to_numpy()
287
-
288
- ma_window = 300
289
- episodes = np.arange(1, num_episodes + 1)
290
-
291
- # Mean Reward moving average
292
- reward_ma = moving_avg(df_final_log["Mean_Reward"], ma_window)
293
- plt.figure(figsize=(8, 5))
294
- plt.plot(episodes, reward_ma, linewidth=2, label=f"Mean Reward MA (win={ma_window})")
295
- plt.xlabel("Episode")
296
- plt.ylabel("Mean Reward")
297
- plt.title("PG: Mean Reward Moving Average")
298
- plt.legend()
299
- plt.grid(True)
300
- plt.savefig(os.path.join(plots_dir, "mean_reward_ma.png"), dpi=200)
301
- plt.close()
302
-
303
- # Total Reward moving average
304
- total_ma = moving_avg(df_final_log["Total_Reward"], ma_window)
305
- plt.figure(figsize=(8, 5))
306
- plt.plot(episodes, total_ma, linewidth=2, label=f"Total Reward MA (win={ma_window})")
307
- plt.xlabel("Episode")
308
- plt.ylabel("Total Reward")
309
- plt.title("PG: Total Reward Moving Average")
310
- plt.legend()
311
- plt.grid(True)
312
- plt.savefig(os.path.join(plots_dir, "total_reward_ma.png"), dpi=200)
313
- plt.close()
314
-
315
- # Cost Reduction (%) moving average
316
- cost_ma = moving_avg(df_final_log["Cost_Reduction_Pct"], ma_window)
317
- plt.figure(figsize=(8, 5))
318
- plt.plot(episodes, cost_ma, linewidth=2, label="Cost Reduction MA (%)")
319
- plt.xlabel("Episode")
320
- plt.ylabel("Cost Reduction (%)")
321
- plt.title("PG: Cost Reduction Moving Average")
322
- plt.legend()
323
- plt.grid(True)
324
- plt.savefig(os.path.join(plots_dir, "cost_reduction_ma.png"), dpi=200)
325
- plt.close()
326
-
327
- # Battery Degradation Cost moving average
328
- degradation_ma = moving_avg(df_final_log["battery_degradation_cost_total"], ma_window)
329
- plt.figure(figsize=(8, 5))
330
- plt.plot(episodes, degradation_ma, linewidth=2, label=f"Degradation Cost MA (win={ma_window})", color='purple')
331
- plt.xlabel("Episode")
332
- plt.ylabel("Total Degradation Cost ($)")
333
- plt.title("PG: Battery Degradation Cost Moving Average")
334
- plt.legend()
335
- plt.grid(True)
336
- plt.savefig(os.path.join(plots_dir, "degradation_cost_ma.png"), dpi=200)
337
- plt.close()
338
-
339
- print(f"\nAll moving-average plots saved to: {plots_dir}")
340
-
341
- # Save Final Logs to CSV
342
- total_time_row = pd.DataFrame([{
343
- "Episode": "Total_Training_Time",
344
- "Episode_Duration": total_training_time
345
- }])
346
- df_to_save = pd.concat([df_final_log, total_time_row], ignore_index=True)
347
-
348
- log_csv_path = os.path.join(logs_dir, "training_performance_log.csv")
349
-
350
- columns_to_save = [
351
- "Episode",
352
- "Mean_Reward",
353
- "Total_Reward",
354
- "Cost_Reduction_Pct",
355
- "Episode_Duration",
356
- "battery_degradation_cost_total",
357
- ]
358
- df_to_save = df_to_save[columns_to_save]
359
-
360
- df_to_save.to_csv(log_csv_path, index=False)
361
-
362
- print(f"Saved comprehensive training performance log to: {log_csv_path}")
363
-
364
- # Final Timings Printout
365
- print("\n" + "="*50)
366
- print("TRAINING COMPLETE".center(50))
367
- print(f"Total training time: {total_training_time:.2f} seconds")
368
- print(f"Device used: {pg_agents[0].device}")
369
- print("="*50)
370
-
371
-
372
- if __name__ == "__main__":
373
- main()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
Other_algorithms/Flat_System/PG/trainer/__init__.py DELETED
File without changes
Other_algorithms/Flat_System/PG/trainer/pg.py DELETED
@@ -1,96 +0,0 @@
1
- import torch
2
- import torch.nn as nn
3
- from torch.distributions import Normal
4
- import numpy as np
5
-
6
- class SharedActorCritic(nn.Module):
7
- def __init__(self, state_dim, action_dim):
8
- super(SharedActorCritic, self).__init__()
9
- self.feature_extractor = nn.Sequential(
10
- nn.Linear(state_dim, 128),
11
- nn.ReLU(),
12
- nn.Linear(128, 128),
13
- nn.ReLU()
14
- )
15
- self.actor_head = nn.Linear(128, action_dim * 2)
16
- self.critic_head = nn.Linear(128, 1)
17
-
18
- def forward(self, state):
19
- features = self.feature_extractor(state)
20
- action_params = self.actor_head(features)
21
- mean, log_std = torch.chunk(action_params, 2, dim=-1)
22
- value = self.critic_head(features)
23
- return mean, log_std, value
24
-
25
- class PGAgent:
26
- def __init__(self, state_dim, action_dim, lr=3e-4, gamma=0.95, gae_lambda=0.95, critic_loss_coef=0.5):
27
- self.gamma = gamma
28
- self.gae_lambda = gae_lambda
29
- self.critic_loss_coef = critic_loss_coef
30
- self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
31
- self.model = SharedActorCritic(state_dim, action_dim).to(self.device)
32
- self.optimizer = torch.optim.Adam(self.model.parameters(), lr=lr)
33
- self.log_probs = []
34
- self.rewards = []
35
- self.values = []
36
- self.dones = []
37
- self.log_std_min = -20
38
- self.log_std_max = 2
39
-
40
- def select_action(self, state):
41
- state_tensor = torch.FloatTensor(state).unsqueeze(0).to(self.device)
42
- mean, log_std, value = self.model(state_tensor)
43
- log_std = torch.clamp(log_std, self.log_std_min, self.log_std_max)
44
- std = torch.exp(log_std)
45
- dist = Normal(mean, std)
46
- action = dist.sample()
47
- log_prob = dist.log_prob(action).sum(dim=-1)
48
- self.log_probs.append(log_prob)
49
- self.values.append(value)
50
- return np.clip(action.squeeze(0).cpu().detach().numpy(), 0.0, 1.0)
51
-
52
- def update(self):
53
- if not self.rewards:
54
- return
55
- next_value = 0
56
- values = torch.cat(self.values).squeeze().detach().cpu().numpy()
57
- advantages, returns = self._calculate_gae_advantages(self.rewards, values, self.dones, next_value)
58
- log_probs = torch.cat(self.log_probs)
59
- advantages = torch.tensor(advantages, dtype=torch.float32, device=self.device)
60
- returns = torch.tensor(returns, dtype=torch.float32, device=self.device)
61
- advantages = (advantages - advantages.mean()) / (advantages.std() + 1e-8)
62
- actor_loss = -(log_probs * advantages).mean()
63
- critic_values = torch.cat(self.values).squeeze()
64
- critic_loss = nn.MSELoss()(critic_values, returns)
65
- total_loss = actor_loss + self.critic_loss_coef * critic_loss
66
- self.optimizer.zero_grad()
67
- total_loss.backward()
68
- torch.nn.utils.clip_grad_norm_(self.model.parameters(), 0.5)
69
- self.optimizer.step()
70
- self.rewards = []
71
- self.log_probs = []
72
- self.values = []
73
- self.dones = []
74
-
75
- def _calculate_gae_advantages(self, rewards, values, dones, next_value):
76
- advantages = np.zeros_like(rewards, dtype=np.float32)
77
- last_advantage = 0
78
- for t in reversed(range(len(rewards))):
79
- mask = 1.0 - dones[t]
80
- v_next = values[t + 1] if t < len(rewards) - 1 else next_value
81
- delta = rewards[t] + self.gamma * v_next * mask - values[t]
82
- last_advantage = delta + self.gamma * self.gae_lambda * last_advantage * mask
83
- advantages[t] = last_advantage
84
- returns = advantages + values
85
- return advantages, returns
86
-
87
- def save(self, path):
88
- torch.save({
89
- 'model_state_dict': self.model.state_dict(),
90
- 'optimizer_state_dict': self.optimizer.state_dict(),
91
- }, path)
92
-
93
- def load(self, path):
94
- checkpoint = torch.load(path, map_location=self.device)
95
- self.model.load_state_dict(checkpoint['model_state_dict'])
96
- self.optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
Other_algorithms/Flat_System/maddpg/__init__.py DELETED
File without changes
Other_algorithms/Flat_System/maddpg/maddpg_evaluation.py DELETED
@@ -1,495 +0,0 @@
1
- # maddpg_evaluate.py
2
- import os
3
- import sys
4
- import time
5
- import re
6
- import numpy as np
7
- import pandas as pd
8
- import matplotlib.pyplot as plt
9
- import torch
10
- from datetime import datetime
11
-
12
- sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
13
-
14
- from solar_sys_environment import SolarSys
15
- from maddpg.trainer.maddpg import MADDPG
16
-
17
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
18
-
19
- def compute_jains_fairness(values: np.ndarray) -> float:
20
- if len(values) == 0:
21
- return 0.0
22
- if np.all(values == 0):
23
- return 1.0
24
- num = (values.sum())**2
25
- den = len(values) * (values**2).sum()
26
- return num / den
27
-
28
- def main():
29
- # User parameters
30
- MODEL_PATH = "/path/to/project/maddpg_para_sharing_oklahoma_5agents_10000eps/logs/best_model.pth"
31
- DATA_PATH = "/path/to/project/testing/5houses_30days_TEST.csv"
32
- DAYS_TO_EVALUATE = 30
33
-
34
- model_path = MODEL_PATH
35
- data_path = DATA_PATH
36
- days_to_evaluate = DAYS_TO_EVALUATE
37
- SOLAR_THRESHOLD = 0.4
38
-
39
- state_match = re.search(r"maddpg_para_sharing_(oklahoma|colorado|pennsylvania)_", model_path)
40
- if not state_match:
41
- raise ValueError(
42
- "Could not automatically detect the state (oklahoma, colorado, or pennsylvania) "
43
- "from the model path. Please ensure your model's parent folder is named correctly, "
44
- "e.g., 'maddpg_para_sharing_oklahoma_...'"
45
- )
46
- detected_state = state_match.group(1)
47
- print(f"--- Detected state: {detected_state.upper()} ---")
48
-
49
- # Env setup
50
- env = SolarSys(
51
- data_path=data_path,
52
- state=detected_state,
53
- time_freq="15T"
54
- )
55
- eval_steps = env.num_steps
56
- house_ids = env.house_ids
57
- num_agents = env.num_agents
58
-
59
- # Generate a unique eval run folder
60
- timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
61
- run_name = f"eval_maddpg_para_sharing_{num_agents}agents_{days_to_evaluate}days_{timestamp}"
62
- output_folder = os.path.join("runs_with_battery", run_name)
63
- logs_dir = os.path.join(output_folder, "logs")
64
- plots_dir = os.path.join(output_folder, "plots")
65
- for d in (logs_dir, plots_dir):
66
- os.makedirs(d, exist_ok=True)
67
- print(f"Saving evaluation outputs to: {output_folder}")
68
-
69
- local_state_dim = env.observation_space.shape[1]
70
- action_dim = env.action_space.shape[1]
71
-
72
- # Instantiate MADDPG agent
73
- maddpg = MADDPG(
74
- num_agents=num_agents,
75
- state_dim=local_state_dim,
76
- action_dim=action_dim
77
- )
78
-
79
- # Load MADDPG checkpoint
80
- maddpg.load(model_path)
81
-
82
- maddpg.actor.eval()
83
- maddpg.critic.eval()
84
- maddpg.target_actor.eval()
85
- maddpg.target_critic.eval()
86
-
87
- # Prepare logs
88
- all_logs = []
89
- daily_summaries = []
90
- step_timing_list = []
91
-
92
- evaluation_start = time.time()
93
-
94
- for day_idx in range(days_to_evaluate):
95
- obs = env.reset()
96
- done = False
97
- step_count = 0
98
- day_logs = []
99
-
100
- while not done:
101
- step_start_time = time.time()
102
-
103
- # Select actions with MADDPG
104
- actions = maddpg.select_actions(obs, evaluate=True)
105
-
106
- next_obs, rewards, done, info = env.step(actions)
107
-
108
- # Consolidated Logging
109
- step_end_time = time.time()
110
- step_duration = step_end_time - step_start_time
111
-
112
- print(f"[Day {day_idx+1}, Step {step_count}] Step time: {step_duration:.6f} seconds")
113
-
114
- step_timing_list.append({
115
- "day": day_idx + 1,
116
- "step": step_count,
117
- "step_time_s": step_duration
118
- })
119
-
120
- grid_price_now = env.get_grid_price(step_count)
121
- peer_price_now = info.get("peer_price", env.get_peer_price(step_count,
122
- float(info["p2p_sell"].sum()),
123
- float(info["p2p_buy"].sum())))
124
-
125
- for i, hid in enumerate(house_ids):
126
- is_battery_house = hid in env.batteries
127
- p2p_buy = float(info["p2p_buy"][i])
128
- p2p_sell = float(info["p2p_sell"][i])
129
- charge_amount = float(info.get("charge_amount", [0]*num_agents)[i])
130
- discharge_amount = float(info.get("discharge_amount", [0]*num_agents)[i])
131
-
132
- day_logs.append({
133
- "day": day_idx + 1,
134
- "step": step_count,
135
- "house": hid,
136
- "grid_import_no_p2p": float(info["grid_import_no_p2p"][i]),
137
- "grid_import_with_p2p": float(info["grid_import_with_p2p"][i]),
138
- "grid_export": float(info.get("grid_export", [0]*num_agents)[i]),
139
- "p2p_buy": p2p_buy,
140
- "p2p_sell": p2p_sell,
141
- "actual_cost": float(info["costs"][i]),
142
- "baseline_cost": float(info["grid_import_no_p2p"][i]) * grid_price_now,
143
- "total_demand": float(env.demands[hid][step_count]),
144
- "total_solar": float(env.solars[hid][step_count]),
145
- "grid_price": grid_price_now,
146
- "peer_price": peer_price_now,
147
- "soc": (env.batteries[hid]["soc"] / env.batteries[hid]["max_capacity"]) if is_battery_house else np.nan,
148
- "degradation_cost": ((charge_amount + discharge_amount) * env.batteries[hid]["degradation_cost_per_kwh"]) if is_battery_house else 0.0,
149
- "reward": float(rewards[i]),
150
- })
151
-
152
- obs = next_obs
153
- step_count += 1
154
- if step_count >= eval_steps:
155
- break
156
-
157
- day_df = pd.DataFrame(day_logs)
158
- all_logs.extend(day_logs)
159
-
160
- # Consolidated daily summary calculation
161
- grouped_house = day_df.groupby("house").sum(numeric_only=True)
162
- grouped_step = day_df.groupby("step").sum(numeric_only=True)
163
-
164
- total_demand = grouped_step["total_demand"].sum()
165
- total_solar = grouped_step["total_solar"].sum()
166
- total_p2p_buy = grouped_house["p2p_buy"].sum()
167
- total_p2p_sell = grouped_house["p2p_sell"].sum()
168
-
169
- baseline_cost_per_house = grouped_house["baseline_cost"]
170
- actual_cost_per_house = grouped_house["actual_cost"]
171
- cost_savings_per_house = baseline_cost_per_house - actual_cost_per_house
172
- day_total_cost_savings = cost_savings_per_house.sum()
173
-
174
- if baseline_cost_per_house.sum() > 0:
175
- overall_cost_savings_pct = day_total_cost_savings / baseline_cost_per_house.sum()
176
- else:
177
- overall_cost_savings_pct = 0.0
178
-
179
- baseline_import_per_house = grouped_house["grid_import_no_p2p"]
180
- actual_import_per_house = grouped_house["grid_import_with_p2p"]
181
- import_reduction_per_house = baseline_import_per_house - actual_import_per_house
182
- day_total_import_reduction = import_reduction_per_house.sum()
183
-
184
- if baseline_import_per_house.sum() > 0:
185
- overall_import_reduction_pct = day_total_import_reduction / baseline_import_per_house.sum()
186
- else:
187
- overall_import_reduction_pct = 0.0
188
-
189
- fairness_cost_savings = compute_jains_fairness(cost_savings_per_house.values)
190
- fairness_import_reduction = compute_jains_fairness(import_reduction_per_house.values)
191
- fairness_rewards = compute_jains_fairness(grouped_house["reward"].values)
192
- fairness_p2p_buy = compute_jains_fairness(grouped_house["p2p_buy"].values)
193
- fairness_p2p_sell = compute_jains_fairness(grouped_house["p2p_sell"].values)
194
- fairness_p2p_total = compute_jains_fairness((grouped_house["p2p_buy"] + grouped_house["p2p_sell"]).values)
195
- day_total_degradation_cost = grouped_house["degradation_cost"].sum()
196
-
197
- daily_summaries.append({
198
- "day": day_idx + 1,
199
- "day_total_demand": total_demand,
200
- "day_total_solar": total_solar,
201
- "day_p2p_buy": total_p2p_buy,
202
- "day_p2p_sell": total_p2p_sell,
203
- "cost_savings_abs": day_total_cost_savings,
204
- "cost_savings_pct": overall_cost_savings_pct,
205
- "fairness_cost_savings": fairness_cost_savings,
206
- "grid_reduction_abs": day_total_import_reduction,
207
- "grid_reduction_pct": overall_import_reduction_pct,
208
- "fairness_grid_reduction": fairness_import_reduction,
209
- "fairness_reward": fairness_rewards,
210
- "fairness_p2p_buy": fairness_p2p_buy,
211
- "fairness_p2p_sell": fairness_p2p_sell,
212
- "fairness_p2p_total": fairness_p2p_total,
213
- "total_degradation_cost": day_total_degradation_cost
214
- })
215
-
216
- # Final processing and saving
217
- evaluation_end = time.time()
218
- total_eval_time = evaluation_end - evaluation_start
219
- print(f"\nEvaluation loop finished. Total time: {total_eval_time:.2f} seconds.")
220
-
221
- all_days_df = pd.DataFrame(all_logs)
222
- combined_csv_path = os.path.join(logs_dir, "step_logs_all_days.csv")
223
- all_days_df.to_csv(combined_csv_path, index=False)
224
- print(f"Saved combined step-level logs to: {combined_csv_path}")
225
-
226
- step_timing_df = pd.DataFrame(step_timing_list)
227
- timing_csv_path = os.path.join(logs_dir, "step_timing_log.csv")
228
- step_timing_df.to_csv(timing_csv_path, index=False)
229
- print(f"Saved step timing logs to: {timing_csv_path}")
230
-
231
- house_level_df = all_days_df.groupby("house").agg({
232
- "baseline_cost": "sum",
233
- "actual_cost": "sum",
234
- "grid_import_no_p2p": "sum",
235
- "grid_import_with_p2p": "sum",
236
- "degradation_cost": "sum"
237
- })
238
- house_level_df["cost_savings"] = house_level_df["baseline_cost"] - house_level_df["actual_cost"]
239
- house_level_df["import_reduction"] = house_level_df["grid_import_no_p2p"] - house_level_df["grid_import_with_p2p"]
240
-
241
- house_summary_csv = os.path.join(logs_dir, "summary_per_house.csv")
242
- house_level_df.to_csv(house_summary_csv)
243
- print(f"Saved final summary per house to: {house_summary_csv}")
244
-
245
- fairness_grid_all = compute_jains_fairness(house_level_df["import_reduction"].values)
246
- fairness_cost_all = compute_jains_fairness(house_level_df["cost_savings"].values)
247
-
248
- daily_summary_df = pd.DataFrame(daily_summaries)
249
-
250
- total_cost_savings_all = daily_summary_df["cost_savings_abs"].sum()
251
- total_baseline_cost_all = all_days_df.groupby('day')['baseline_cost'].sum().sum()
252
- pct_cost_savings_all = total_cost_savings_all / total_baseline_cost_all if total_baseline_cost_all > 0 else 0.0
253
-
254
- total_grid_reduction_all = daily_summary_df["grid_reduction_abs"].sum()
255
- total_baseline_import_all = all_days_df.groupby('day')['grid_import_no_p2p'].sum().sum()
256
- pct_grid_reduction_all = total_grid_reduction_all / total_baseline_import_all if total_baseline_import_all > 0 else 0.0
257
-
258
- total_degradation_cost_all = daily_summary_df["total_degradation_cost"].sum()
259
-
260
- # Calculate alternative performance metrics
261
-
262
- # Grid Reduction During Solar Hours
263
- agg_solar_per_step = all_days_df.groupby(['day', 'step'])['total_solar'].sum()
264
- sunny_steps_mask = agg_solar_per_step > (SOLAR_THRESHOLD * num_agents)
265
- sunny_df = all_days_df.set_index(['day', 'step'])[sunny_steps_mask].reset_index()
266
- baseline_import_sunny = sunny_df['grid_import_no_p2p'].sum()
267
- actual_import_sunny = sunny_df['grid_import_with_p2p'].sum()
268
- grid_reduction_sunny_pct = 0.0
269
- if baseline_import_sunny > 0:
270
- grid_reduction_sunny_pct = (baseline_import_sunny - actual_import_sunny) / baseline_import_sunny
271
-
272
- # Community Sourcing Rate
273
- total_p2p_buy = all_days_df['p2p_buy'].sum()
274
- total_actual_grid_import = all_days_df['grid_import_with_p2p'].sum()
275
- total_procured_energy = total_p2p_buy + total_actual_grid_import
276
- community_sourcing_rate_pct = 0.0
277
- if total_procured_energy > 0:
278
- community_sourcing_rate_pct = total_p2p_buy / total_procured_energy
279
-
280
- # Solar Sharing Efficiency
281
- total_p2p_sell = all_days_df['p2p_sell'].sum()
282
- total_grid_export = all_days_df['grid_export'].sum()
283
- total_excess_solar = total_p2p_sell + total_grid_export
284
- solar_sharing_efficiency_pct = 0.0
285
- if total_excess_solar > 0:
286
- solar_sharing_efficiency_pct = total_p2p_sell / total_excess_solar
287
-
288
- # Cost savings in sunny hours
289
- baseline_cost_sunny = sunny_df['baseline_cost'].sum()
290
- actual_cost_sunny = sunny_df['actual_cost'].sum()
291
- cost_savings_sunny_pct = (baseline_cost_sunny - actual_cost_sunny) / baseline_cost_sunny if baseline_cost_sunny > 0 else 0.0
292
-
293
- total_p2p_buy = all_days_df['p2p_buy'].sum()
294
- total_actual_grid_import = all_days_df['grid_import_with_p2p'].sum()
295
- community_sourcing_rate_pct = total_p2p_buy / (total_p2p_buy + total_actual_grid_import) if (total_p2p_buy + total_actual_grid_import) > 0 else 0.0
296
-
297
- total_p2p_sell = all_days_df['p2p_sell'].sum()
298
- total_grid_export = all_days_df['grid_export'].sum()
299
- solar_sharing_efficiency_pct = total_p2p_sell / (total_p2p_sell + total_grid_export) if (total_p2p_sell + total_grid_export) > 0 else 0.0
300
-
301
- final_row = {
302
- "day": "ALL_DAYS_SUMMARY",
303
- "cost_savings_abs": total_cost_savings_all,
304
- "cost_savings_pct": pct_cost_savings_all,
305
- "grid_reduction_abs": total_grid_reduction_all,
306
- "grid_reduction_pct": pct_grid_reduction_all,
307
- "fairness_cost_savings": fairness_cost_all,
308
- "fairness_grid_reduction": fairness_grid_all,
309
- "total_degradation_cost": total_degradation_cost_all,
310
- "grid_reduction_sunny_hours_pct": grid_reduction_sunny_pct,
311
- "community_sourcing_rate_pct": community_sourcing_rate_pct,
312
- "solar_sharing_efficiency_pct": solar_sharing_efficiency_pct,
313
- }
314
-
315
- for col in daily_summary_df.columns:
316
- if col not in final_row:
317
- final_row[col] = np.nan
318
- final_row_df = pd.DataFrame([final_row])
319
-
320
- daily_summary_df = pd.concat([daily_summary_df, final_row_df], ignore_index=True)
321
- summary_csv = os.path.join(logs_dir, "summary_per_day.csv")
322
- daily_summary_df.to_csv(summary_csv, index=False)
323
- print(f"Saved day-level summary with final multi-day row to: {summary_csv}")
324
-
325
- # Final summary printout
326
- print("\n================== EVALUATION SUMMARY ==================")
327
- print(f"Evaluation finished for {days_to_evaluate} days.\n")
328
-
329
- print("--- Standard Metrics (24-Hour Average) ---")
330
- print(f"Total grid reduction: {total_grid_reduction_all:.2f} kWh ({pct_grid_reduction_all:.2%})")
331
- print(f"Total cost savings: ${total_cost_savings_all:.2f} ({pct_cost_savings_all:.2%})")
332
- print(f"Jain's fairness on grid reduction: {fairness_grid_all:.3f}")
333
- print(f"Jain's fairness on cost savings: {fairness_cost_all:.3f}\n")
334
-
335
- print("--- Alternative Metrics (Highlighting Peak Performance) ---")
336
- print(f"Grid reduction during solar hours: {grid_reduction_sunny_pct:.2%}")
337
- print(f"Cost savings during solar hours: {cost_savings_sunny_pct:.2%}")
338
- print(f"Community sourcing rate: {community_sourcing_rate_pct:.2%}")
339
- print(f"Solar sharing efficiency: {solar_sharing_efficiency_pct:.2%}")
340
-
341
- print("=========================================================")
342
-
343
- # Plots
344
- plot_daily_df = daily_summary_df[daily_summary_df["day"] != "ALL_DAYS_SUMMARY"].copy()
345
- plot_daily_df["day"] = plot_daily_df["day"].astype(int)
346
-
347
- # Daily Cost Savings Percentage
348
- plt.figure(figsize=(12, 6))
349
- plt.bar(plot_daily_df["day"], plot_daily_df["cost_savings_pct"] * 100, color='skyblue')
350
- plt.xlabel("Day")
351
- plt.ylabel("Cost Savings (%)")
352
- plt.title("Daily Community Cost Savings Percentage")
353
- plt.xticks(plot_daily_df["day"])
354
- plt.grid(axis='y', linestyle='--', alpha=0.7)
355
- plt.savefig(os.path.join(plots_dir, "daily_cost_savings_percentage.png"))
356
- plt.close()
357
-
358
- # Daily Total Demand vs. Solar
359
- plt.figure(figsize=(12, 6))
360
- bar_width = 0.4
361
- days = plot_daily_df["day"]
362
- plt.bar(days - bar_width/2, plot_daily_df["day_total_demand"], width=bar_width, label="Total Demand", color='coral')
363
- plt.bar(days + bar_width/2, plot_daily_df["day_total_solar"], width=bar_width, label="Total Solar Generation", color='gold')
364
- plt.xlabel("Day")
365
- plt.ylabel("Energy (kWh)")
366
- plt.title("Total Community Demand vs. Solar Generation Per Day")
367
- plt.xticks(days)
368
- plt.legend()
369
- plt.grid(axis='y', linestyle='--', alpha=0.7)
370
- plt.savefig(os.path.join(plots_dir, "daily_demand_vs_solar.png"))
371
- plt.close()
372
-
373
- # Combined Time Series of Energy Flows
374
- step_group = all_days_df.groupby(["day", "step"]).sum(numeric_only=True).reset_index()
375
- step_group["global_step"] = (step_group["day"] - 1) * env.num_steps + step_group["step"]
376
-
377
- fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(15, 12), sharex=True)
378
-
379
- # Subplot 1: Grid Import vs P2P Buy
380
- ax1.plot(step_group["global_step"], step_group["grid_import_with_p2p"], label="Grid Import (with P2P)", color='r')
381
- ax1.plot(step_group["global_step"], step_group["p2p_buy"], label="P2P Buy", color='g')
382
- ax1.set_ylabel("Energy (kWh)")
383
- ax1.set_title("Community Energy Consumption: Grid Import vs. P2P Buy")
384
- ax1.legend()
385
- ax1.grid(True, linestyle='--', alpha=0.6)
386
-
387
- # Subplot 2: Grid Export vs P2P Sell
388
- ax2.plot(step_group["global_step"], step_group["grid_export"], label="Grid Export", color='orange')
389
- ax2.plot(step_group["global_step"], step_group["p2p_sell"], label="P2P Sell", color='b')
390
- ax2.set_xlabel("Global Timestep")
391
- ax2.set_ylabel("Energy (kWh)")
392
- ax2.set_title("Community Energy Generation: Grid Export vs. P2P Sell")
393
- ax2.legend()
394
- ax2.grid(True, linestyle='--', alpha=0.6)
395
-
396
- plt.tight_layout()
397
- plt.savefig(os.path.join(plots_dir, "combined_energy_flows_timeseries.png"))
398
- plt.close()
399
-
400
- # Stacked Bar of Daily Energy Sources
401
- daily_agg = all_days_df.groupby("day").sum(numeric_only=True)
402
-
403
- plt.figure(figsize=(12, 7))
404
- plt.bar(daily_agg.index, daily_agg["grid_import_with_p2p"], label="Grid Import (with P2P)", color='crimson')
405
- plt.bar(daily_agg.index, daily_agg["p2p_buy"], bottom=daily_agg["grid_import_with_p2p"], label="P2P Buy", color='limegreen')
406
- plt.plot(daily_agg.index, daily_agg["grid_import_no_p2p"], label="Baseline Grid Import (No P2P)", color='blue', linestyle='--', marker='o')
407
-
408
- plt.xlabel("Day")
409
- plt.ylabel("Energy (kWh)")
410
- plt.title("Daily Energy Procurement: Baseline vs. P2P+Grid")
411
- plt.xticks(daily_agg.index)
412
- plt.legend()
413
- plt.grid(axis='y', linestyle='--', alpha=0.7)
414
- plt.savefig(os.path.join(plots_dir, "daily_energy_procurement_stacked.png"))
415
- plt.close()
416
-
417
- # Fairness Metrics Over Time
418
- plt.figure(figsize=(12, 6))
419
- plt.plot(plot_daily_df["day"], plot_daily_df["fairness_cost_savings"], label="Cost Savings Fairness", marker='o')
420
- plt.plot(plot_daily_df["day"], plot_daily_df["fairness_grid_reduction"], label="Grid Reduction Fairness", marker='s')
421
- plt.plot(plot_daily_df["day"], plot_daily_df["fairness_reward"], label="Reward Fairness", marker='^')
422
- plt.xlabel("Day")
423
- plt.ylabel("Jain's Fairness Index")
424
- plt.title("Daily Fairness Metrics")
425
- plt.xticks(plot_daily_df["day"])
426
- plt.ylim(0, 1.05)
427
- plt.legend()
428
- plt.grid(True, linestyle='--', alpha=0.7)
429
- plt.savefig(os.path.join(plots_dir, "daily_fairness_metrics.png"))
430
- plt.close()
431
-
432
- # Per-House Savings and Reductions
433
- fig, ax1 = plt.subplots(figsize=(15, 7))
434
-
435
- house_ids_str = house_level_df.index.astype(str)
436
- bar_width = 0.4
437
- index = np.arange(len(house_ids_str))
438
-
439
- # Bar chart for cost savings
440
- color1 = 'tab:green'
441
- ax1.set_xlabel('House ID')
442
- ax1.set_ylabel('Total Cost Savings ($)', color=color1)
443
- ax1.bar(index - bar_width/2, house_level_df["cost_savings"], bar_width, label='Cost Savings', color=color1)
444
- ax1.tick_params(axis='y', labelcolor=color1)
445
- ax1.set_xticks(index)
446
- ax1.set_xticklabels(house_ids_str, rotation=45, ha="right")
447
-
448
- # Second y-axis for grid import reduction
449
- ax2 = ax1.twinx()
450
- color2 = 'tab:blue'
451
- ax2.set_ylabel('Total Grid Import Reduction (kWh)', color=color2)
452
- ax2.bar(index + bar_width/2, house_level_df["import_reduction"], bar_width, label='Import Reduction', color=color2)
453
- ax2.tick_params(axis='y', labelcolor=color2)
454
-
455
- plt.title(f'Total Cost Savings & Grid Import Reduction Per House (over {days_to_evaluate} days)')
456
- fig.tight_layout()
457
- plt.savefig(os.path.join(plots_dir, "per_house_summary.png"))
458
- plt.close()
459
-
460
- # Price Dynamics for a Single Day
461
- day1_prices = all_days_df[all_days_df['day'] == 1][['step', 'grid_price', 'peer_price']].drop_duplicates()
462
- plt.figure(figsize=(12, 6))
463
- plt.plot(day1_prices['step'], day1_prices['grid_price'], label='Grid Price', color='darkorange')
464
- plt.plot(day1_prices['step'], day1_prices['peer_price'], label='P2P Price', color='teal')
465
- plt.xlabel("Timestep of Day")
466
- plt.ylabel("Price ($/kWh)")
467
- plt.title("Price Dynamics on Day 1")
468
- plt.legend()
469
- plt.grid(True, linestyle='--', alpha=0.6)
470
- plt.savefig(os.path.join(plots_dir, "price_dynamics_day1.png"))
471
- plt.close()
472
-
473
- # Battery State of Charge for Sample Houses
474
- day1_df = all_days_df[all_days_df['day'] == 1]
475
- battery_houses = day1_df.dropna(subset=['soc'])['house'].unique()
476
-
477
- if len(battery_houses) > 0:
478
- sample_houses = battery_houses[:min(4, len(battery_houses))]
479
- plt.figure(figsize=(12, 6))
480
- for house in sample_houses:
481
- house_df = day1_df[day1_df['house'] == house]
482
- plt.plot(house_df['step'], house_df['soc'] * 100, label=f'House {house}')
483
-
484
- plt.xlabel("Timestep of Day")
485
- plt.ylabel("State of Charge (%)")
486
- plt.title("Battery SoC on Day 1 for Sample Houses")
487
- plt.legend()
488
- plt.grid(True, linestyle='--', alpha=0.6)
489
- plt.savefig(os.path.join(plots_dir, "soc_dynamics_day1.png"))
490
- plt.close()
491
-
492
- print("All plots have been generated and saved. Evaluation complete.")
493
-
494
- if __name__ == "__main__":
495
- main()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
Other_algorithms/Flat_System/maddpg/maddpg_train.py DELETED
@@ -1,382 +0,0 @@
1
- import os
2
- import sys
3
- import re
4
- import numpy as np
5
- import torch
6
- import matplotlib.pyplot as plt
7
- import pandas as pd
8
- import time
9
- from datetime import datetime
10
-
11
- sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
12
-
13
- from solar_sys_environment import SolarSys
14
- from maddpg.trainer.maddpg import MADDPG
15
-
16
- def main():
17
-
18
- STATE_TO_RUN = "oklahoma" # "pennsylvania" or "colorado" or "oklahoma"
19
-
20
- # Set the path to your training data
21
- DATA_FILE_PATH = "/path/to/project/training/5houses_152days_TRAIN.csv"
22
- num_episodes = 10000
23
- batch_size = 256
24
- checkpoint_interval = 100000
25
- window_size = 32
26
-
27
- env = SolarSys(
28
- data_path=DATA_FILE_PATH,
29
- state=STATE_TO_RUN,
30
- time_freq="15T"
31
- )
32
-
33
- # Sanity check: env I/O shapes
34
- print("Observation space:", env.observation_space)
35
- print("Action space :", env.action_space)
36
-
37
- # Reset and inspect obs
38
- obs = env.reset()
39
- print(f"Reset returned {len(obs)} agent observations; each obs shape: {np.array(obs).shape}")
40
-
41
- # Sample random actions and do one step
42
- dummy_actions = np.random.rand(env.num_agents, env.action_space.shape[1]).astype(np.float32)
43
- next_obs, rewards, done, info = env.step(dummy_actions)
44
- print(f"Step outputs → next_obs: {len(next_obs)}×{np.array(next_obs).shape[1]}, "
45
- f"rewards: {len(rewards)}, done: {done}")
46
- print("Info keys:", list(info.keys()))
47
-
48
- # Count the number of houses in each group
49
- env.group_counts = {
50
- 0: env.agent_groups.count(0),
51
- 1: env.agent_groups.count(1)
52
- }
53
- print(f"Number of houses in each group: {env.group_counts}")
54
-
55
- max_steps = env.num_steps
56
-
57
- # Dims from the env
58
- num_agents = env.num_agents
59
- local_state_dim = env.observation_space.shape[1]
60
- action_dim = env.action_space.shape[1]
61
-
62
- # Build a unique run directory
63
- timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
64
- run_name = f"maddpg_para_sharing_{STATE_TO_RUN}_{num_agents}agents_{num_episodes}eps_{timestamp}"
65
- root_dir = os.path.join("FINALE_FINALE_FINALE", run_name)
66
- os.makedirs(root_dir, exist_ok=True)
67
- print(f"Saving training outputs to: {root_dir}")
68
-
69
- logs_dir = os.path.join(root_dir, "logs")
70
- plots_dir = os.path.join(root_dir, "plots")
71
- os.makedirs(logs_dir, exist_ok=True)
72
- os.makedirs(plots_dir, exist_ok=True)
73
-
74
- # Create the MADDPG agent
75
- maddpg = MADDPG(
76
- num_agents=num_agents,
77
- state_dim=local_state_dim,
78
- action_dim=action_dim,
79
- gamma=0.95,
80
- tau=0.01,
81
- lr_actor=1e-4,
82
- lr_critic=1e-3,
83
- buffer_size=1000000,
84
- noise_episodes=5000,
85
- init_sigma=0.3,
86
- final_sigma=0.01,
87
- batch_size=batch_size
88
- )
89
-
90
- # Tracking / Logging Variables
91
- episode_rewards = []
92
- episode_total_rewards = []
93
- block_mean_rewards = []
94
- block_total_rewards = []
95
-
96
- agent_rewards_log = [[] for _ in range(num_agents)]
97
- best_mean_reward = -1e9
98
- best_model_path = os.path.join(logs_dir, "best_model.pth")
99
-
100
- daily_rewards = []
101
- monthly_rewards = []
102
-
103
- training_start_time = time.time()
104
- episode_durations = []
105
- total_steps_global = 0
106
- episode_log_data = []
107
- performance_metrics_log = []
108
-
109
- agent_charge_log = [[] for _ in range(num_agents)]
110
- agent_discharge_log = [[] for _ in range(num_agents)]
111
-
112
- # Training Loop
113
- for episode in range(1, num_episodes + 1):
114
- episode_start_time = time.time()
115
-
116
- obs = np.array(env.reset(), dtype=np.float32)
117
-
118
- # Collect metrics from the previous episode
119
- if episode > 1:
120
- last_episode_metrics = env.get_episode_metrics()
121
- last_episode_metrics['Episode'] = episode - 1
122
- performance_metrics_log.append(last_episode_metrics)
123
-
124
- total_reward = np.zeros(num_agents, dtype=np.float32)
125
- done = False
126
- step_count = 0
127
- day_logs = []
128
- episode_charges = [[] for _ in range(num_agents)]
129
- episode_discharges = [[] for _ in range(num_agents)]
130
-
131
- while not done:
132
- # Select actions using the MADDPG agent
133
- actions = maddpg.select_actions(obs)
134
-
135
- # Step environment
136
- next_obs_list, rewards, done, info = env.step(actions)
137
- next_obs = np.array(next_obs_list, dtype=np.float32)
138
-
139
- # Store the transition in the replay buffer
140
- maddpg.store_transition(obs, actions, rewards, next_obs, done)
141
-
142
- # Train the agent at every step
143
- maddpg.train()
144
-
145
- total_reward += rewards
146
- obs = next_obs
147
- step_count += 1
148
- total_steps_global += 1
149
-
150
- for i in range(num_agents):
151
- episode_charges[i].append(info["charge_amount"][i])
152
- episode_discharges[i].append(info["discharge_amount"][i])
153
-
154
- day_logs.append({
155
- "step": step_count - 1,
156
- "grid_import_no_p2p": info["grid_import_no_p2p"],
157
- "grid_import_with_p2p": info["grid_import_with_p2p"],
158
- "p2p_buy": info["p2p_buy"],
159
- "p2p_sell": info["p2p_sell"],
160
- "costs": info["costs"],
161
- "charge_amount": info.get("charge_amount", np.zeros(num_agents)),
162
- "discharge_amount": info.get("discharge_amount", np.zeros(num_agents))
163
- })
164
-
165
- if step_count >= max_steps:
166
- break
167
-
168
- # After each episode
169
- # Compute per-episode metrics
170
- sum_ep_reward = float(np.sum(total_reward))
171
- mean_ep_reward = float(np.mean(total_reward))
172
-
173
- episode_total_rewards.append(sum_ep_reward)
174
- episode_rewards.append(mean_ep_reward)
175
- daily_rewards.append(mean_ep_reward)
176
-
177
- # If we just finished a block of window_size episodes, aggregate
178
- if len(daily_rewards) % window_size == 0:
179
- last_totals = episode_total_rewards[-window_size:]
180
- block_sum = sum(last_totals)
181
- block_total_rewards.append(block_sum)
182
-
183
- last_means = daily_rewards[-window_size:]
184
- block_mean = sum(last_means) / window_size
185
- block_mean_rewards.append(block_mean)
186
-
187
- block_idx = len(block_mean_rewards)
188
- print(
189
- f"→ Completed Block {block_idx} "
190
- f"| Episodes {(block_idx-1)*window_size + 1}–{block_idx*window_size} "
191
- f"| Block Total Reward: {block_sum:.3f} "
192
- f"| Block Mean Reward: {block_mean:.3f}"
193
- )
194
-
195
- # Log agent-level rewards
196
- for i in range(num_agents):
197
- agent_rewards_log[i].append(total_reward[i])
198
- agent_charge_log[i].append(np.mean(episode_charges[i]))
199
- agent_discharge_log[i].append(np.mean(episode_discharges[i]))
200
-
201
- # Summarize P2P steps
202
- steps_data = []
203
- for entry in day_logs:
204
- step_idx = entry["step"]
205
- p2p_buy_array = entry["p2p_buy"]
206
- p2p_sell_array = entry["p2p_sell"]
207
- grid_no_p2p_array = entry["grid_import_no_p2p"]
208
- grid_with_p2p_array = entry["grid_import_with_p2p"]
209
-
210
- steps_data.append({
211
- "step": step_idx,
212
- "p2p_buy_sum": float(np.sum(p2p_buy_array)),
213
- "p2p_sell_sum": float(np.sum(p2p_sell_array)),
214
- "grid_import_no_p2p_sum": float(np.sum(grid_no_p2p_array)),
215
- "grid_import_with_p2p_sum": float(np.sum(grid_with_p2p_array))
216
- })
217
-
218
- baseline_cost = np.sum([np.sum(entry["grid_import_no_p2p"]) * env.get_grid_price(entry["step"])
219
- for entry in day_logs])
220
- actual_cost = np.sum([np.sum(entry["costs"]) for entry in day_logs])
221
- cost_reduction = (baseline_cost - actual_cost) / baseline_cost
222
-
223
- # Call on_episode_end() for noise decay schedule
224
- maddpg.on_episode_end()
225
-
226
- # Save if best
227
- if mean_ep_reward > best_mean_reward:
228
- best_mean_reward = mean_ep_reward
229
- maddpg.save(best_model_path)
230
-
231
- if episode % checkpoint_interval == 0:
232
- ckpt_path = os.path.join(logs_dir, f"checkpoint_{episode}.pth")
233
- maddpg.save(ckpt_path)
234
-
235
- episode_end_time = time.time()
236
- episode_duration = episode_end_time - episode_start_time
237
-
238
- print(
239
- f"Episode {episode}/{num_episodes} "
240
- f"| Time per Episode: {episode_duration:.2f}s "
241
- f"| Steps: {step_count} "
242
- f"| Mean Reward: {mean_ep_reward:.3f} "
243
- f"| Cost Reduction: {cost_reduction:.2%}"
244
- )
245
-
246
- # Record data in per-episode log
247
- episode_log_data.append({
248
- "Episode": episode,
249
- "Steps": step_count,
250
- "Mean_Reward": mean_ep_reward,
251
- "Total_Reward": sum_ep_reward,
252
- "Cost_Reduction_Pct": cost_reduction * 100,
253
- "Baseline_Cost": baseline_cost,
254
- "Actual_Cost": actual_cost,
255
- "Episode_Duration": episode_duration,
256
- "Total_Charge": np.sum([np.sum(entry["charge_amount"]) for entry in day_logs]),
257
- "Total_Discharge": np.sum([np.sum(entry["discharge_amount"]) for entry in day_logs])
258
- })
259
-
260
- for i in range(num_agents):
261
- agent_charge_log[i].append(np.mean(episode_charges[i]))
262
- agent_discharge_log[i].append(np.mean(episode_discharges[i]))
263
-
264
- # Capture the final episode's metrics
265
- final_episode_metrics = env.get_episode_metrics()
266
- final_episode_metrics['Episode'] = num_episodes
267
- performance_metrics_log.append(final_episode_metrics)
268
-
269
- # End of all training
270
- training_end_time = time.time()
271
- total_training_time = training_end_time - training_start_time
272
-
273
- # Save out per-episode agent rewards + mean rewards
274
- np.save(os.path.join(logs_dir, "agent_rewards.npy"), np.array(agent_rewards_log))
275
- np.save(os.path.join(logs_dir, "mean_rewards.npy"), np.array(episode_rewards))
276
- np.save(os.path.join(logs_dir, "total_rewards.npy"), np.array(episode_total_rewards))
277
-
278
- # Create Final DataFrame for Logging and Plotting
279
- df_rewards_log = pd.DataFrame(episode_log_data)
280
- df_perf_log = pd.DataFrame(performance_metrics_log)
281
-
282
- # Merge the two DataFrames on the 'Episode' column
283
- df_final_log = pd.merge(df_rewards_log, df_perf_log.drop(columns=[
284
- 'degradation_cost_over_time',
285
- 'cost_savings_over_time',
286
- 'grid_reduction_over_time'
287
- ]), on="Episode")
288
-
289
- # PLOTTING
290
- os.makedirs(plots_dir, exist_ok=True)
291
-
292
- # Helper: centered moving average
293
- def moving_avg(series, window):
294
- return pd.Series(series).rolling(window=window, center=True, min_periods=1).mean().to_numpy()
295
-
296
- # Smoothing window (in episodes)
297
- ma_window = 300
298
- episodes = np.arange(1, num_episodes + 1)
299
-
300
- # Mean Reward moving average
301
- reward_ma = moving_avg(df_final_log["Mean_Reward"], ma_window)
302
- plt.figure(figsize=(8, 5))
303
- plt.plot(episodes, reward_ma, linewidth=2, label=f"Mean Reward MA (win={ma_window})")
304
- plt.xlabel("Episode")
305
- plt.ylabel("Mean Reward")
306
- plt.title("MADDPG: Mean Reward Moving Average")
307
- plt.legend()
308
- plt.grid(True)
309
- plt.savefig(os.path.join(plots_dir, "mean_reward_ma.png"), dpi=200)
310
- plt.close()
311
-
312
- # Total Reward moving average
313
- total_ma = moving_avg(df_final_log["Total_Reward"], ma_window)
314
- plt.figure(figsize=(8, 5))
315
- plt.plot(episodes, total_ma, linewidth=2, label=f"Total Reward MA (win={ma_window})")
316
- plt.xlabel("Episode")
317
- plt.ylabel("Total Reward")
318
- plt.title("MADDPG: Total Reward Moving Average")
319
- plt.legend()
320
- plt.grid(True)
321
- plt.savefig(os.path.join(plots_dir, "total_reward_ma.png"), dpi=200)
322
- plt.close()
323
-
324
- # Cost Reduction (%) moving average
325
- cost_ma = moving_avg(df_final_log["Cost_Reduction_Pct"], ma_window)
326
- plt.figure(figsize=(8, 5))
327
- plt.plot(episodes, cost_ma, linewidth=2, label="Cost Reduction MA (%)")
328
- plt.xlabel("Episode")
329
- plt.ylabel("Cost Reduction (%)")
330
- plt.title("MADDPG: Cost Reduction Moving Average")
331
- plt.legend()
332
- plt.grid(True)
333
- plt.savefig(os.path.join(plots_dir, "cost_reduction_ma.png"), dpi=200)
334
- plt.close()
335
-
336
- # Battery Degradation Cost moving average
337
- degradation_ma = moving_avg(df_final_log["battery_degradation_cost_total"], ma_window)
338
- plt.figure(figsize=(8, 5))
339
- plt.plot(episodes, degradation_ma, linewidth=2, label=f"Degradation Cost MA (win={ma_window})", color='purple')
340
- plt.xlabel("Episode")
341
- plt.ylabel("Total Degradation Cost ($)")
342
- plt.title("MADDPG: Battery Degradation Cost Moving Average")
343
- plt.legend()
344
- plt.grid(True)
345
- plt.savefig(os.path.join(plots_dir, "degradation_cost_ma.png"), dpi=200)
346
- plt.close()
347
-
348
- print(f"\nAll moving-average plots saved to: {plots_dir}")
349
-
350
- # Save Final Logs to CSV
351
- total_time_row = pd.DataFrame([{
352
- "Episode": "Total_Training_Time",
353
- "Episode_Duration": total_training_time
354
- }])
355
- df_to_save = pd.concat([df_final_log, total_time_row], ignore_index=True)
356
-
357
- log_csv_path = os.path.join(logs_dir, "training_performance_log.csv")
358
-
359
- # Select and reorder columns for the final CSV
360
- columns_to_save = [
361
- "Episode",
362
- "Mean_Reward",
363
- "Total_Reward",
364
- "Cost_Reduction_Pct",
365
- "Episode_Duration",
366
- "battery_degradation_cost_total",
367
- ]
368
- df_to_save = df_to_save[columns_to_save]
369
-
370
- df_to_save.to_csv(log_csv_path, index=False)
371
-
372
- print(f"Saved comprehensive training performance log to: {log_csv_path}")
373
-
374
- # Final Timings Printout
375
- print("\n" + "="*50)
376
- print("TRAINING COMPLETE".center(50))
377
- print(f"Total training time: {total_training_time:.2f} seconds")
378
- print("="*50)
379
-
380
-
381
- if __name__ == "__main__":
382
- main()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
Other_algorithms/Flat_System/maddpg/trainer/__init__.py DELETED
File without changes
Other_algorithms/Flat_System/maddpg/trainer/maddpg.py DELETED
@@ -1,196 +0,0 @@
1
- import torch
2
- import torch.nn as nn
3
- import torch.optim as optim
4
- import numpy as np
5
- import random
6
- from collections import deque
7
- from torch.utils.data import Dataset, DataLoader
8
-
9
- class ReplayBufferDataset(Dataset):
10
- def __init__(self, max_size=100000):
11
- self.buffer = deque(maxlen=max_size)
12
-
13
- def add(self, states, actions, rewards, next_states, done):
14
- data = (
15
- states,
16
- actions,
17
- np.array(rewards, dtype=np.float32),
18
- next_states,
19
- np.float32(done)
20
- )
21
- self.buffer.append(data)
22
-
23
- def __len__(self):
24
- return len(self.buffer)
25
-
26
- def __getitem__(self, idx):
27
- states, actions, rewards, next_states, done = self.buffer[idx]
28
- return (
29
- torch.from_numpy(states),
30
- torch.from_numpy(actions),
31
- torch.from_numpy(rewards),
32
- torch.from_numpy(next_states),
33
- torch.tensor(done, dtype=torch.float32)
34
- )
35
-
36
- class Actor(nn.Module):
37
- def __init__(self, state_dim, action_dim, hidden_dim=64):
38
- super(Actor, self).__init__()
39
- self.net = nn.Sequential(
40
- nn.Linear(state_dim, hidden_dim),
41
- nn.ReLU(),
42
- nn.Linear(hidden_dim, hidden_dim),
43
- nn.ReLU(),
44
- nn.Linear(hidden_dim, action_dim),
45
- nn.Sigmoid()
46
- )
47
-
48
- def forward(self, state):
49
- return self.net(state)
50
-
51
- class SharedCritic(nn.Module):
52
- def __init__(self, global_state_dim, global_action_dim, hidden_dim=128, num_agents=1):
53
- super().__init__()
54
- self.net = nn.Sequential(
55
- nn.Linear(global_state_dim + global_action_dim, hidden_dim),
56
- nn.ReLU(),
57
- nn.Linear(hidden_dim, hidden_dim),
58
- nn.ReLU(),
59
- nn.Linear(hidden_dim, num_agents)
60
- )
61
-
62
- def forward(self, global_state, global_action):
63
- x = torch.cat([global_state, global_action], dim=1)
64
- return self.net(x)
65
-
66
- class Agent:
67
- def __init__(self, local_state_dim, action_dim, lr_actor=1e-3, device=torch.device('cpu')):
68
- self.device = device
69
- self.actor = Actor(local_state_dim, action_dim).to(device)
70
- self.target_actor = Actor(local_state_dim, action_dim).to(device)
71
- self.actor_optim = optim.Adam(self.actor.parameters(), lr=lr_actor)
72
- self.target_actor.load_state_dict(self.actor.state_dict())
73
-
74
- def sync_target(self, tau):
75
- for tp, p in zip(self.target_actor.parameters(), self.actor.parameters()):
76
- tp.data.copy_(tau * p.data + (1.0 - tau) * tp.data)
77
-
78
- class MADDPG:
79
- def __init__(self, num_agents, local_state_dim, action_dim,
80
- gamma=0.95, tau=0.01, lr_actor=1e-4, lr_critic=1e-3,
81
- buffer_size=100000, noise_episodes=100, init_sigma=0.2, final_sigma=0.01,
82
- batch_size=128, num_workers=0):
83
-
84
- self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
85
- self.num_agents = num_agents
86
- self.gamma = gamma
87
- self.tau = tau
88
- self.init_sigma = init_sigma
89
- self.final_sigma = final_sigma
90
- self.noise_episodes = noise_episodes
91
- self.current_episode = 0
92
-
93
- self.actor = Actor(local_state_dim, action_dim).to(self.device)
94
- self.target_actor = Actor(local_state_dim, action_dim).to(self.device)
95
- self.target_actor.load_state_dict(self.actor.state_dict())
96
- self.actor_optim = optim.Adam(self.actor.parameters(), lr=lr_actor)
97
-
98
- global_state_dim = num_agents * local_state_dim
99
- global_action_dim = num_agents * action_dim
100
- self.critic = SharedCritic(global_state_dim, global_action_dim, num_agents=num_agents).to(self.device)
101
- self.target_critic = SharedCritic(global_state_dim, global_action_dim, num_agents=num_agents).to(self.device)
102
- self.target_critic.load_state_dict(self.critic.state_dict())
103
- self.critic_optim = optim.Adam(self.critic.parameters(), lr=lr_critic)
104
-
105
- self.batch_size = batch_size
106
- self.num_workers = num_workers
107
- self.memory = ReplayBufferDataset(max_size=buffer_size)
108
- self.dataloader = None
109
- self.loader_iter = None
110
-
111
- def select_actions(self, states, evaluate=False):
112
- states_t = torch.as_tensor(states, dtype=torch.float32, device=self.device)
113
- with torch.no_grad():
114
- actions_t = torch.stack([
115
- self.actor(states_t[i]) for i in range(self.num_agents)
116
- ], dim=0)
117
- actions = actions_t.cpu().numpy()
118
-
119
- if not evaluate:
120
- frac = min(float(self.current_episode) / self.noise_episodes, 1.0)
121
- current_sigma = self.init_sigma - frac * (self.init_sigma - self.final_sigma)
122
- noise = np.random.normal(0, current_sigma, size=actions.shape)
123
- actions += noise
124
- return np.clip(actions, 0.0, 1.0)
125
-
126
- def store_transition(self, states, actions, rewards, next_states, done):
127
- self.memory.add(states, actions, rewards, next_states, done)
128
-
129
- def train(self):
130
- if len(self.memory) < self.batch_size:
131
- return
132
-
133
- should_pin_memory = self.device.type == 'cuda'
134
- if self.dataloader is None:
135
- self.dataloader = DataLoader(self.memory, batch_size=self.batch_size, shuffle=True, num_workers=self.num_workers, pin_memory=should_pin_memory, drop_last=True)
136
- self.loader_iter = iter(self.dataloader)
137
- try:
138
- s, a, r, s2, d = next(self.loader_iter)
139
- except StopIteration:
140
- self.dataloader = DataLoader(self.memory, batch_size=self.batch_size, shuffle=True, num_workers=self.num_workers, pin_memory=should_pin_memory, drop_last=True)
141
- self.loader_iter = iter(self.dataloader)
142
- s, a, r, s2, d = next(self.loader_iter)
143
-
144
- s_t, a_t, r_t, s2_t, d_t = s.to(self.device), a.to(self.device), r.to(self.device), s2.to(self.device), d.to(self.device).unsqueeze(-1)
145
- r_t = (r_t - r_t.mean()) / (r_t.std() + 1e-7)
146
- batch_len = s_t.shape[0]
147
- gs, ga, ns = s_t.reshape(batch_len, -1), a_t.reshape(batch_len, -1), s2_t.reshape(batch_len, -1)
148
-
149
- with torch.no_grad():
150
- targ_actions = torch.cat([self.target_actor(s2_t[:, i, :]) for i in range(self.num_agents)], dim=1)
151
- Q_prime = self.target_critic(ns, targ_actions)
152
- targets = r_t + self.gamma * (1 - d_t) * Q_prime
153
- Q = self.critic(gs, ga)
154
- critic_loss = nn.MSELoss()(Q, targets)
155
- self.critic_optim.zero_grad()
156
- critic_loss.backward()
157
- torch.nn.utils.clip_grad_norm_(self.critic.parameters(), 1.0)
158
- self.critic_optim.step()
159
-
160
- all_actions = torch.cat([self.actor(s_t[:, i, :]) for i in range(self.num_agents)], dim=1)
161
- actor_loss = -self.critic(gs, all_actions).mean()
162
-
163
- self.actor_optim.zero_grad()
164
- actor_loss.backward()
165
- torch.nn.utils.clip_grad_norm_(self.actor.parameters(), 1.0)
166
- self.actor_optim.step()
167
-
168
- for tp, p in zip(self.target_actor.parameters(), self.actor.parameters()):
169
- tp.data.copy_(self.tau * p.data + (1.0 - self.tau) * tp.data)
170
- for tp, p in zip(self.target_critic.parameters(), self.critic.parameters()):
171
- tp.data.copy_(self.tau * p.data + (1.0 - self.tau) * tp.data)
172
-
173
- def on_episode_end(self):
174
- self.current_episode += 1
175
-
176
- def save(self, path: str):
177
- payload = {
178
- "critic": self.critic.state_dict(),
179
- "target_critic": self.target_critic.state_dict(),
180
- "critic_optim": self.critic_optim.state_dict(),
181
- "actor": self.actor.state_dict(),
182
- "target_actor": self.target_actor.state_dict(),
183
- "actor_optim": self.actor_optim.state_dict(),
184
- "current_episode": self.current_episode,
185
- }
186
- torch.save(payload, path)
187
-
188
- def load(self, path: str):
189
- checkpoint = torch.load(path, map_location=self.device)
190
- self.critic.load_state_dict(checkpoint["critic"])
191
- self.target_critic.load_state_dict(checkpoint["target_critic"])
192
- self.critic_optim.load_state_dict(checkpoint["critic_optim"])
193
- self.actor.load_state_dict(checkpoint["actor"])
194
- self.target_actor.load_state_dict(checkpoint["target_actor"])
195
- self.actor_optim.load_state_dict(checkpoint["actor_optim"])
196
- self.current_episode = checkpoint.get("current_episode", 0)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
Other_algorithms/Flat_System/mappo/_init_.py DELETED
File without changes
Other_algorithms/Flat_System/mappo/mappo_evaluation.py DELETED
@@ -1,500 +0,0 @@
1
- # mappo_evaluate.py
2
- import os
3
- import sys
4
- import time
5
- import re
6
- import numpy as np
7
- import pandas as pd
8
- import matplotlib.pyplot as plt
9
- import torch
10
- from datetime import datetime
11
-
12
- sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
13
-
14
- from solar_sys_environment import SolarSys
15
- from mappo.trainer.mappo import MAPPO
16
-
17
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
18
-
19
- def compute_jains_fairness(values: np.ndarray) -> float:
20
- if len(values) == 0:
21
- return 0.0
22
- if np.all(values == 0):
23
- return 1.0
24
- num = (values.sum())**2
25
- den = len(values) * (values**2).sum()
26
- return num / den
27
-
28
- def main():
29
- # User parameters
30
- MODEL_PATH = "/path/to/project/mappo_pennsylvania_100agents_10000eps/logs/best_model.pth"
31
- DATA_PATH = "/path/to/project/testing/100houses_30days_TEST.csv"
32
- DAYS_TO_EVALUATE = 30
33
-
34
- model_path = MODEL_PATH
35
- data_path = DATA_PATH
36
- days_to_evaluate = DAYS_TO_EVALUATE
37
- SOLAR_THRESHOLD = 0.1
38
-
39
- state_match = re.search(r"mappo_(oklahoma|colorado|pennsylvania)_", model_path)
40
- if not state_match:
41
- raise ValueError(
42
- "Could not automatically detect the state (oklahoma, colorado, or pennsylvania) "
43
- "from the model path. Please ensure your model's parent folder is named correctly, "
44
- "e.g., 'mappo_oklahoma_...'"
45
- )
46
- detected_state = state_match.group(1)
47
- print(f"--- Detected state: {detected_state.upper()} ---")
48
-
49
- # Env setup
50
- env = SolarSys(
51
- data_path=data_path,
52
- state=detected_state,
53
- time_freq="3H"
54
- )
55
- eval_steps = env.num_steps
56
- house_ids = env.house_ids
57
- num_agents = env.num_agents
58
-
59
- # Generate a unique eval run folder
60
- timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
61
- run_name = f"eval_mappo_{num_agents}agents_{days_to_evaluate}days_{timestamp}"
62
- output_folder = os.path.join("runs_with_battery", run_name)
63
- logs_dir = os.path.join(output_folder, "logs")
64
- plots_dir = os.path.join(output_folder, "plots")
65
- for d in (logs_dir, plots_dir):
66
- os.makedirs(d, exist_ok=True)
67
- print(f"Saving evaluation outputs to: {output_folder}")
68
-
69
- local_dim = env.observation_space.shape[1]
70
- global_dim = num_agents * local_dim
71
- act_dim = env.action_space.shape[1]
72
-
73
- mappo = MAPPO(
74
- n_agents=num_agents,
75
- local_dim=local_dim,
76
- global_dim=global_dim,
77
- act_dim=act_dim,
78
- lr=2e-4,
79
- gamma=0.95,
80
- lam=0.95,
81
- clip_eps=0.2,
82
- k_epochs=10,
83
- batch_size=1024
84
- )
85
-
86
- # Load MAPPO checkpoint
87
- mappo.load(model_path)
88
- mappo.actor.to(device).eval()
89
- mappo.critic.to(device).eval()
90
-
91
- # Prepare logs
92
- all_logs = []
93
- daily_summaries = []
94
- step_timing_list = []
95
-
96
- evaluation_start = time.time()
97
-
98
- for day_idx in range(days_to_evaluate):
99
- obs = env.reset()
100
- done = False
101
- step_count = 0
102
- day_logs = []
103
-
104
- while not done:
105
- step_start_time = time.time()
106
- global_obs = np.array(obs).flatten()
107
-
108
- # Select actions with MAPPO
109
- actions, _ = mappo.select_action(obs, global_obs)
110
-
111
- next_obs, rewards, done, info = env.step(actions)
112
-
113
- # Consolidated Logging
114
- step_end_time = time.time()
115
- step_duration = step_end_time - step_start_time
116
-
117
- print(f"[Day {day_idx+1}, Step {step_count}] Step time: {step_duration:.6f} seconds")
118
-
119
- step_timing_list.append({
120
- "day": day_idx + 1,
121
- "step": step_count,
122
- "step_time_s": step_duration
123
- })
124
-
125
- grid_price_now = env.get_grid_price(step_count)
126
- peer_price_now = info.get("peer_price", env.get_peer_price(step_count,
127
- float(info["p2p_sell"].sum()),
128
- float(info["p2p_buy"].sum())))
129
-
130
- for i, hid in enumerate(house_ids):
131
- is_battery_house = hid in env.batteries
132
- p2p_buy = float(info["p2p_buy"][i])
133
- p2p_sell = float(info["p2p_sell"][i])
134
- charge_amount = float(info.get("charge_amount", [0]*num_agents)[i])
135
- discharge_amount = float(info.get("discharge_amount", [0]*num_agents)[i])
136
-
137
- day_logs.append({
138
- "day": day_idx + 1,
139
- "step": step_count,
140
- "house": hid,
141
- "grid_import_no_p2p": float(info["grid_import_no_p2p"][i]),
142
- "grid_import_with_p2p": float(info["grid_import_with_p2p"][i]),
143
- "grid_export": float(info.get("grid_export", [0]*num_agents)[i]),
144
- "p2p_buy": p2p_buy,
145
- "p2p_sell": p2p_sell,
146
- "actual_cost": float(info["costs"][i]),
147
- "baseline_cost": float(info["grid_import_no_p2p"][i]) * grid_price_now,
148
- "total_demand": float(env.demands[hid][step_count]),
149
- "total_solar": float(env.solars[hid][step_count]),
150
- "grid_price": grid_price_now,
151
- "peer_price": peer_price_now,
152
- "soc": (env.batteries[hid]["soc"] / env.batteries[hid]["max_capacity"]) if is_battery_house else np.nan,
153
- "degradation_cost": ((charge_amount + discharge_amount) * env.batteries[hid]["degradation_cost_per_kwh"]) if is_battery_house else 0.0,
154
- "reward": float(rewards[i]),
155
- })
156
-
157
- obs = next_obs
158
- step_count += 1
159
- if step_count >= eval_steps:
160
- break
161
-
162
- day_df = pd.DataFrame(day_logs)
163
- all_logs.extend(day_logs)
164
-
165
- # Consolidated daily summary calculation
166
- grouped_house = day_df.groupby("house").sum(numeric_only=True)
167
- grouped_step = day_df.groupby("step").sum(numeric_only=True)
168
-
169
- total_demand = grouped_step["total_demand"].sum()
170
- total_solar = grouped_step["total_solar"].sum()
171
- total_p2p_buy = grouped_house["p2p_buy"].sum()
172
- total_p2p_sell = grouped_house["p2p_sell"].sum()
173
-
174
- baseline_cost_per_house = grouped_house["baseline_cost"]
175
- actual_cost_per_house = grouped_house["actual_cost"]
176
- cost_savings_per_house = baseline_cost_per_house - actual_cost_per_house
177
- day_total_cost_savings = cost_savings_per_house.sum()
178
-
179
- if baseline_cost_per_house.sum() > 0:
180
- overall_cost_savings_pct = day_total_cost_savings / baseline_cost_per_house.sum()
181
- else:
182
- overall_cost_savings_pct = 0.0
183
-
184
- baseline_import_per_house = grouped_house["grid_import_no_p2p"]
185
- actual_import_per_house = grouped_house["grid_import_with_p2p"]
186
- import_reduction_per_house = baseline_import_per_house - actual_import_per_house
187
- day_total_import_reduction = import_reduction_per_house.sum()
188
-
189
- if baseline_import_per_house.sum() > 0:
190
- overall_import_reduction_pct = day_total_import_reduction / baseline_import_per_house.sum()
191
- else:
192
- overall_import_reduction_pct = 0.0
193
-
194
- fairness_cost_savings = compute_jains_fairness(cost_savings_per_house.values)
195
- fairness_import_reduction = compute_jains_fairness(import_reduction_per_house.values)
196
- fairness_rewards = compute_jains_fairness(grouped_house["reward"].values)
197
- fairness_p2p_buy = compute_jains_fairness(grouped_house["p2p_buy"].values)
198
- fairness_p2p_sell = compute_jains_fairness(grouped_house["p2p_sell"].values)
199
- fairness_p2p_total = compute_jains_fairness((grouped_house["p2p_buy"] + grouped_house["p2p_sell"]).values)
200
- day_total_degradation_cost = grouped_house["degradation_cost"].sum()
201
-
202
- daily_summaries.append({
203
- "day": day_idx + 1,
204
- "day_total_demand": total_demand,
205
- "day_total_solar": total_solar,
206
- "day_p2p_buy": total_p2p_buy,
207
- "day_p2p_sell": total_p2p_sell,
208
- "cost_savings_abs": day_total_cost_savings,
209
- "cost_savings_pct": overall_cost_savings_pct,
210
- "fairness_cost_savings": fairness_cost_savings,
211
- "grid_reduction_abs": day_total_import_reduction,
212
- "grid_reduction_pct": overall_import_reduction_pct,
213
- "fairness_grid_reduction": fairness_import_reduction,
214
- "fairness_reward": fairness_rewards,
215
- "fairness_p2p_buy": fairness_p2p_buy,
216
- "fairness_p2p_sell": fairness_p2p_sell,
217
- "fairness_p2p_total": fairness_p2p_total,
218
- "total_degradation_cost": day_total_degradation_cost
219
- })
220
-
221
- # Final processing and saving
222
- evaluation_end = time.time()
223
- total_eval_time = evaluation_end - evaluation_start
224
- print(f"\nEvaluation loop finished. Total time: {total_eval_time:.2f} seconds.")
225
-
226
- all_days_df = pd.DataFrame(all_logs)
227
- combined_csv_path = os.path.join(logs_dir, "step_logs_all_days.csv")
228
- all_days_df.to_csv(combined_csv_path, index=False)
229
- print(f"Saved combined step-level logs to: {combined_csv_path}")
230
-
231
- step_timing_df = pd.DataFrame(step_timing_list)
232
- timing_csv_path = os.path.join(logs_dir, "step_timing_log.csv")
233
- step_timing_df.to_csv(timing_csv_path, index=False)
234
- print(f"Saved step timing logs to: {timing_csv_path}")
235
-
236
- house_level_df = all_days_df.groupby("house").agg({
237
- "baseline_cost": "sum",
238
- "actual_cost": "sum",
239
- "grid_import_no_p2p": "sum",
240
- "grid_import_with_p2p": "sum",
241
- "degradation_cost": "sum"
242
- })
243
- house_level_df["cost_savings"] = house_level_df["baseline_cost"] - house_level_df["actual_cost"]
244
- house_level_df["import_reduction"] = house_level_df["grid_import_no_p2p"] - house_level_df["grid_import_with_p2p"]
245
-
246
- house_summary_csv = os.path.join(logs_dir, "summary_per_house.csv")
247
- house_level_df.to_csv(house_summary_csv)
248
- print(f"Saved final summary per house to: {house_summary_csv}")
249
-
250
- fairness_grid_all = compute_jains_fairness(house_level_df["import_reduction"].values)
251
- fairness_cost_all = compute_jains_fairness(house_level_df["cost_savings"].values)
252
-
253
- daily_summary_df = pd.DataFrame(daily_summaries)
254
-
255
- total_cost_savings_all = daily_summary_df["cost_savings_abs"].sum()
256
- total_baseline_cost_all = all_days_df.groupby('day')['baseline_cost'].sum().sum()
257
- pct_cost_savings_all = total_cost_savings_all / total_baseline_cost_all if total_baseline_cost_all > 0 else 0.0
258
-
259
- total_grid_reduction_all = daily_summary_df["grid_reduction_abs"].sum()
260
- total_baseline_import_all = all_days_df.groupby('day')['grid_import_no_p2p'].sum().sum()
261
- pct_grid_reduction_all = total_grid_reduction_all / total_baseline_import_all if total_baseline_import_all > 0 else 0.0
262
-
263
- total_degradation_cost_all = daily_summary_df["total_degradation_cost"].sum()
264
-
265
- # Calculate alternative performance metrics
266
-
267
- # Grid Reduction During Solar Hours
268
- agg_solar_per_step = all_days_df.groupby(['day', 'step'])['total_solar'].sum()
269
- sunny_steps_mask = agg_solar_per_step > (SOLAR_THRESHOLD * num_agents)
270
- sunny_df = all_days_df.set_index(['day', 'step'])[sunny_steps_mask].reset_index()
271
- baseline_import_sunny = sunny_df['grid_import_no_p2p'].sum()
272
- actual_import_sunny = sunny_df['grid_import_with_p2p'].sum()
273
- grid_reduction_sunny_pct = 0.0
274
- if baseline_import_sunny > 0:
275
- grid_reduction_sunny_pct = (baseline_import_sunny - actual_import_sunny) / baseline_import_sunny
276
-
277
- # Community Sourcing Rate
278
- total_p2p_buy = all_days_df['p2p_buy'].sum()
279
- total_actual_grid_import = all_days_df['grid_import_with_p2p'].sum()
280
- total_procured_energy = total_p2p_buy + total_actual_grid_import
281
- community_sourcing_rate_pct = 0.0
282
- if total_procured_energy > 0:
283
- community_sourcing_rate_pct = total_p2p_buy / total_procured_energy
284
-
285
- # Solar Sharing Efficiency
286
- total_p2p_sell = all_days_df['p2p_sell'].sum()
287
- total_grid_export = all_days_df['grid_export'].sum()
288
- total_excess_solar = total_p2p_sell + total_grid_export
289
- solar_sharing_efficiency_pct = 0.0
290
- if total_excess_solar > 0:
291
- solar_sharing_efficiency_pct = total_p2p_sell / total_excess_solar
292
-
293
- # Cost savings in sunny hours
294
- baseline_cost_sunny = sunny_df['baseline_cost'].sum()
295
- actual_cost_sunny = sunny_df['actual_cost'].sum()
296
- cost_savings_sunny_pct = (baseline_cost_sunny - actual_cost_sunny) / baseline_cost_sunny if baseline_cost_sunny > 0 else 0.0
297
-
298
- total_p2p_buy = all_days_df['p2p_buy'].sum()
299
- total_actual_grid_import = all_days_df['grid_import_with_p2p'].sum()
300
- community_sourcing_rate_pct = total_p2p_buy / (total_p2p_buy + total_actual_grid_import) if (total_p2p_buy + total_actual_grid_import) > 0 else 0.0
301
-
302
- total_p2p_sell = all_days_df['p2p_sell'].sum()
303
- total_grid_export = all_days_df['grid_export'].sum()
304
- solar_sharing_efficiency_pct = total_p2p_sell / (total_p2p_sell + total_grid_export) if (total_p2p_sell + total_grid_export) > 0 else 0.0
305
-
306
- final_row = {
307
- "day": "ALL_DAYS_SUMMARY",
308
- "cost_savings_abs": total_cost_savings_all,
309
- "cost_savings_pct": pct_cost_savings_all,
310
- "grid_reduction_abs": total_grid_reduction_all,
311
- "grid_reduction_pct": pct_grid_reduction_all,
312
- "fairness_cost_savings": fairness_cost_all,
313
- "fairness_grid_reduction": fairness_grid_all,
314
- "total_degradation_cost": total_degradation_cost_all,
315
- "grid_reduction_sunny_hours_pct": grid_reduction_sunny_pct,
316
- "community_sourcing_rate_pct": community_sourcing_rate_pct,
317
- "solar_sharing_efficiency_pct": solar_sharing_efficiency_pct,
318
- }
319
-
320
- for col in daily_summary_df.columns:
321
- if col not in final_row:
322
- final_row[col] = np.nan
323
- final_row_df = pd.DataFrame([final_row])
324
-
325
- daily_summary_df = pd.concat([daily_summary_df, final_row_df], ignore_index=True)
326
- summary_csv = os.path.join(logs_dir, "summary_per_day.csv")
327
- daily_summary_df.to_csv(summary_csv, index=False)
328
- print(f"Saved day-level summary with final multi-day row to: {summary_csv}")
329
-
330
- # Final summary printout
331
- print("\n================== EVALUATION SUMMARY ==================")
332
- print(f"Evaluation finished for {days_to_evaluate} days.\n")
333
-
334
- print("--- Standard Metrics (24-Hour Average) ---")
335
- print(f"Total grid reduction: {total_grid_reduction_all:.2f} kWh ({pct_grid_reduction_all:.2%})")
336
- print(f"Total cost savings: ${total_cost_savings_all:.2f} ({pct_cost_savings_all:.2%})")
337
- print(f"Jain's fairness on grid reduction: {fairness_grid_all:.3f}")
338
- print(f"Jain's fairness on cost savings: {fairness_cost_all:.3f}\n")
339
-
340
- print("--- Alternative Metrics (Highlighting Peak Performance) ---")
341
- print(f"Grid reduction during solar hours: {grid_reduction_sunny_pct:.2%}")
342
- print(f"Cost savings during solar hours: {cost_savings_sunny_pct:.2%}")
343
- print(f"Community sourcing rate: {community_sourcing_rate_pct:.2%}")
344
- print(f"Solar sharing efficiency: {solar_sharing_efficiency_pct:.2%}")
345
-
346
- print("=========================================================")
347
-
348
- # Plots
349
- plot_daily_df = daily_summary_df[daily_summary_df["day"] != "ALL_DAYS_SUMMARY"].copy()
350
- plot_daily_df["day"] = plot_daily_df["day"].astype(int)
351
-
352
- # Daily Cost Savings Percentage
353
- plt.figure(figsize=(12, 6))
354
- plt.bar(plot_daily_df["day"], plot_daily_df["cost_savings_pct"] * 100, color='skyblue')
355
- plt.xlabel("Day")
356
- plt.ylabel("Cost Savings (%)")
357
- plt.title("Daily Community Cost Savings Percentage")
358
- plt.xticks(plot_daily_df["day"])
359
- plt.grid(axis='y', linestyle='--', alpha=0.7)
360
- plt.savefig(os.path.join(plots_dir, "daily_cost_savings_percentage.png"))
361
- plt.close()
362
-
363
- # Daily Total Demand vs. Solar
364
- plt.figure(figsize=(12, 6))
365
- bar_width = 0.4
366
- days = plot_daily_df["day"]
367
- plt.bar(days - bar_width/2, plot_daily_df["day_total_demand"], width=bar_width, label="Total Demand", color='coral')
368
- plt.bar(days + bar_width/2, plot_daily_df["day_total_solar"], width=bar_width, label="Total Solar Generation", color='gold')
369
- plt.xlabel("Day")
370
- plt.ylabel("Energy (kWh)")
371
- plt.title("Total Community Demand vs. Solar Generation Per Day")
372
- plt.xticks(days)
373
- plt.legend()
374
- plt.grid(axis='y', linestyle='--', alpha=0.7)
375
- plt.savefig(os.path.join(plots_dir, "daily_demand_vs_solar.png"))
376
- plt.close()
377
-
378
- # Combined Time Series of Energy Flows
379
- step_group = all_days_df.groupby(["day", "step"]).sum(numeric_only=True).reset_index()
380
- step_group["global_step"] = (step_group["day"] - 1) * env.num_steps + step_group["step"]
381
-
382
- fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(15, 12), sharex=True)
383
-
384
- # Subplot 1: Grid Import vs P2P Buy
385
- ax1.plot(step_group["global_step"], step_group["grid_import_with_p2p"], label="Grid Import (with P2P)", color='r')
386
- ax1.plot(step_group["global_step"], step_group["p2p_buy"], label="P2P Buy", color='g')
387
- ax1.set_ylabel("Energy (kWh)")
388
- ax1.set_title("Community Energy Consumption: Grid Import vs. P2P Buy")
389
- ax1.legend()
390
- ax1.grid(True, linestyle='--', alpha=0.6)
391
-
392
- # Subplot 2: Grid Export vs P2P Sell
393
- ax2.plot(step_group["global_step"], step_group["grid_export"], label="Grid Export", color='orange')
394
- ax2.plot(step_group["global_step"], step_group["p2p_sell"], label="P2P Sell", color='b')
395
- ax2.set_xlabel("Global Timestep")
396
- ax2.set_ylabel("Energy (kWh)")
397
- ax2.set_title("Community Energy Generation: Grid Export vs. P2P Sell")
398
- ax2.legend()
399
- ax2.grid(True, linestyle='--', alpha=0.6)
400
-
401
- plt.tight_layout()
402
- plt.savefig(os.path.join(plots_dir, "combined_energy_flows_timeseries.png"))
403
- plt.close()
404
-
405
- # Stacked Bar of Daily Energy Sources
406
- daily_agg = all_days_df.groupby("day").sum(numeric_only=True)
407
-
408
- plt.figure(figsize=(12, 7))
409
- plt.bar(daily_agg.index, daily_agg["grid_import_with_p2p"], label="Grid Import (with P2P)", color='crimson')
410
- plt.bar(daily_agg.index, daily_agg["p2p_buy"], bottom=daily_agg["grid_import_with_p2p"], label="P2P Buy", color='limegreen')
411
- plt.plot(daily_agg.index, daily_agg["grid_import_no_p2p"], label="Baseline Grid Import (No P2P)", color='blue', linestyle='--', marker='o')
412
-
413
- plt.xlabel("Day")
414
- plt.ylabel("Energy (kWh)")
415
- plt.title("Daily Energy Procurement: Baseline vs. P2P+Grid")
416
- plt.xticks(daily_agg.index)
417
- plt.legend()
418
- plt.grid(axis='y', linestyle='--', alpha=0.7)
419
- plt.savefig(os.path.join(plots_dir, "daily_energy_procurement_stacked.png"))
420
- plt.close()
421
-
422
- # Fairness Metrics Over Time
423
- plt.figure(figsize=(12, 6))
424
- plt.plot(plot_daily_df["day"], plot_daily_df["fairness_cost_savings"], label="Cost Savings Fairness", marker='o')
425
- plt.plot(plot_daily_df["day"], plot_daily_df["fairness_grid_reduction"], label="Grid Reduction Fairness", marker='s')
426
- plt.plot(plot_daily_df["day"], plot_daily_df["fairness_reward"], label="Reward Fairness", marker='^')
427
- plt.xlabel("Day")
428
- plt.ylabel("Jain's Fairness Index")
429
- plt.title("Daily Fairness Metrics")
430
- plt.xticks(plot_daily_df["day"])
431
- plt.ylim(0, 1.05)
432
- plt.legend()
433
- plt.grid(True, linestyle='--', alpha=0.7)
434
- plt.savefig(os.path.join(plots_dir, "daily_fairness_metrics.png"))
435
- plt.close()
436
-
437
- # Per-House Savings and Reductions
438
- fig, ax1 = plt.subplots(figsize=(15, 7))
439
-
440
- house_ids_str = house_level_df.index.astype(str)
441
- bar_width = 0.4
442
- index = np.arange(len(house_ids_str))
443
-
444
- # Bar chart for cost savings
445
- color1 = 'tab:green'
446
- ax1.set_xlabel('House ID')
447
- ax1.set_ylabel('Total Cost Savings ($)', color=color1)
448
- ax1.bar(index - bar_width/2, house_level_df["cost_savings"], bar_width, label='Cost Savings', color=color1)
449
- ax1.tick_params(axis='y', labelcolor=color1)
450
- ax1.set_xticks(index)
451
- ax1.set_xticklabels(house_ids_str, rotation=45, ha="right")
452
-
453
- # Second y-axis for grid import reduction
454
- ax2 = ax1.twinx()
455
- color2 = 'tab:blue'
456
- ax2.set_ylabel('Total Grid Import Reduction (kWh)', color=color2)
457
- ax2.bar(index + bar_width/2, house_level_df["import_reduction"], bar_width, label='Import Reduction', color=color2)
458
- ax2.tick_params(axis='y', labelcolor=color2)
459
-
460
- plt.title(f'Total Cost Savings & Grid Import Reduction Per House (over {days_to_evaluate} days)')
461
- fig.tight_layout()
462
- plt.savefig(os.path.join(plots_dir, "per_house_summary.png"))
463
- plt.close()
464
-
465
- # Price Dynamics for a Single Day
466
- day1_prices = all_days_df[all_days_df['day'] == 1][['step', 'grid_price', 'peer_price']].drop_duplicates()
467
- plt.figure(figsize=(12, 6))
468
- plt.plot(day1_prices['step'], day1_prices['grid_price'], label='Grid Price', color='darkorange')
469
- plt.plot(day1_prices['step'], day1_prices['peer_price'], label='P2P Price', color='teal')
470
- plt.xlabel("Timestep of Day")
471
- plt.ylabel("Price ($/kWh)")
472
- plt.title("Price Dynamics on Day 1")
473
- plt.legend()
474
- plt.grid(True, linestyle='--', alpha=0.6)
475
- plt.savefig(os.path.join(plots_dir, "price_dynamics_day1.png"))
476
- plt.close()
477
-
478
- # Battery State of Charge for Sample Houses
479
- day1_df = all_days_df[all_days_df['day'] == 1]
480
- battery_houses = day1_df.dropna(subset=['soc'])['house'].unique()
481
-
482
- if len(battery_houses) > 0:
483
- sample_houses = battery_houses[:min(4, len(battery_houses))]
484
- plt.figure(figsize=(12, 6))
485
- for house in sample_houses:
486
- house_df = day1_df[day1_df['house'] == house]
487
- plt.plot(house_df['step'], house_df['soc'] * 100, label=f'House {house}')
488
-
489
- plt.xlabel("Timestep of Day")
490
- plt.ylabel("State of Charge (%)")
491
- plt.title("Battery SoC on Day 1 for Sample Houses")
492
- plt.legend()
493
- plt.grid(True, linestyle='--', alpha=0.6)
494
- plt.savefig(os.path.join(plots_dir, "soc_dynamics_day1.png"))
495
- plt.close()
496
-
497
- print("All plots have been generated and saved. Evaluation complete.")
498
-
499
- if __name__ == "__main__":
500
- main()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
Other_algorithms/Flat_System/mappo/mappo_train.py DELETED
@@ -1,439 +0,0 @@
1
- import os
2
- import sys
3
- import re # ← add thist
4
- import numpy as np
5
- import torch
6
- import matplotlib.pyplot as plt
7
- import pandas as pd
8
- import time
9
- from datetime import datetime
10
-
11
- sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
12
-
13
- from solar_sharer_battery_env import SolarSharer
14
- from mappo.trainer.mappo import MAPPO
15
-
16
- def main():
17
-
18
- STATE_TO_RUN = "pennsylvania" # "pennsylvania" or "colorado" or "oklahoma"
19
-
20
- # --- Set the path to your training data ---
21
- DATA_FILE_PATH = "/Users/ananygupta/Desktop/Final_revision/Australia_data/processed_data_ausgrid_100_houses.csv"
22
- num_episodes = 10000
23
- # total # of episodes you want to run
24
- batch_size = 256 # e.g. 512, 1024, 2048
25
- checkpoint_interval = 100000
26
- window_size = 32 # ← group episodes in blocks of 30
27
-
28
-
29
- env = SolarSharer(
30
- data_path=DATA_FILE_PATH,
31
- state=STATE_TO_RUN,
32
- time_freq="30T"
33
- )
34
- ############################################################################################
35
- # ─── Sanity check: env I/O shapes ─────────────────────────────────────
36
- print("Observation space:", env.observation_space)
37
- print("Action space :", env.action_space)
38
-
39
- # Reset and inspect obs
40
- obs = env.reset()
41
- print(f"Reset returned {len(obs)} agent observations; each obs shape: {np.array(obs).shape}")
42
-
43
- # Sample random actions and do one step
44
- dummy_actions = np.random.rand(env.num_agents, env.action_space.shape[1]).astype(np.float32)
45
- next_obs, rewards, done, info = env.step(dummy_actions)
46
- print(f"Step outputs → next_obs: {len(next_obs)}×{np.array(next_obs).shape[1]}, "
47
- f"rewards: {len(rewards)}, done: {done}")
48
- print("Info keys:", list(info.keys()))
49
- # ────────────────────────────────────────────────────────────────
50
-
51
- # Count the number of houses in each group
52
- env.group_counts = {
53
- 0: env.agent_groups.count(0),
54
- 1: env.agent_groups.count(1)
55
- }
56
- print(f"Number of houses in each group: {env.group_counts}")
57
-
58
- max_steps = env.num_steps
59
-
60
- # dims from the env
61
- num_agents = env.num_agents
62
- local_state_dim = env.observation_space.shape[1]
63
- action_dim = env.action_space.shape[1]
64
-
65
- # ─── Build a unique run directory ───────────────────────────
66
- timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
67
- run_name = f"mappo_{STATE_TO_RUN}_{num_agents}agents_{num_episodes}eps_{timestamp}"
68
- root_dir = os.path.join("Testing_with_australia_data", run_name)
69
- os.makedirs(root_dir, exist_ok=True)
70
- print(f"Saving training outputs to: {root_dir}")
71
-
72
- logs_dir = os.path.join(root_dir, "logs")
73
- plots_dir = os.path.join(root_dir, "plots")
74
- os.makedirs(logs_dir, exist_ok=True)
75
- os.makedirs(plots_dir, exist_ok=True)
76
-
77
-
78
- # Create the MeanField agent
79
- mappo = MAPPO(
80
- n_agents=num_agents,
81
- local_dim=local_state_dim,
82
- global_dim=num_agents * local_state_dim,
83
- act_dim=action_dim,
84
- lr=2e-4,
85
- gamma=0.95,
86
- lam=0.95,
87
- clip_eps=0.2,
88
- k_epochs=4,
89
- batch_size=batch_size
90
- )
91
-
92
-
93
- # ─────────────── Tracking / Logging Variables ───────────────
94
- episode_rewards = [] # mean reward per episode (averaged across agents)
95
- episode_total_rewards = [] # total reward per episode (sum across agents)
96
- block_mean_rewards = [] # mean of mean-episode-rewards for each block of window_size
97
- block_total_rewards = [] # sum of total-episode-rewards for each block of window_size
98
-
99
- agent_rewards_log = [[] for _ in range(num_agents)]
100
- best_mean_reward = -1e9
101
- best_model_path = os.path.join(logs_dir, "best_model.pth")
102
-
103
-
104
- daily_rewards = [] # alias for episode_rewards
105
- monthly_rewards = [] # just kept in case you want the old logic
106
-
107
- training_start_time = time.time()
108
- episode_durations = []
109
- total_steps_global = 0
110
- episode_log_data = []
111
- # ADD THIS LINE to store the new metrics from the environment
112
- performance_metrics_log = [] # This will hold the detailed performance data for each episode.
113
-
114
-
115
- agent_charge_log = [[] for _ in range(num_agents)] # Track charge actions
116
- agent_discharge_log = [[] for _ in range(num_agents)] # Track discharge actions
117
-
118
-
119
- # ──────────── Training Loop ────────────
120
- for episode in range(1, num_episodes + 1):
121
- episode_start_time = time.time()
122
-
123
- obs = np.array(env.reset(), dtype=np.float32)
124
-
125
-
126
- # ADD THIS BLOCK to collect metrics from the *previous* episode
127
- # =================================================================
128
- # The env.reset() call above finalized the metrics for the episode that just finished.
129
- # We retrieve them here. We check `if episode > 1` because there are no
130
- # metrics to collect before the first episode has run.
131
- if episode > 1:
132
- # Call the getter method you added to the environment
133
- last_episode_metrics = env.get_episode_metrics()
134
-
135
- # Add the corresponding episode number for merging later
136
- last_episode_metrics['Episode'] = episode - 1
137
-
138
- # Append the dictionary of metrics to our new log
139
- performance_metrics_log.append(last_episode_metrics)
140
- # =================================================================
141
-
142
- total_reward = np.zeros(num_agents, dtype=np.float32)
143
- done = False
144
- step_count = 0
145
- day_logs = []
146
- episode_charges = [[] for _ in range(num_agents)]
147
- episode_discharges = [[] for _ in range(num_agents)]
148
-
149
- while not done:
150
-
151
- # flatten the joint state once per step
152
- # build global state and pick actions
153
- # obs is already a NumPy array of shape (num_agents, local_dim)
154
- global_obs = obs.flatten()
155
- actions, logps = mappo.select_action(obs, global_obs)
156
-
157
- # step environment
158
- next_obs_list, rewards, done, info = env.step(actions)
159
-
160
- # convert next observations to NumPy array too
161
- next_obs = np.array(next_obs_list, dtype=np.float32)
162
- next_global_obs = next_obs.flatten()
163
-
164
-
165
- # store transition
166
- # ensure fast conversion to torch.Tensor
167
- local_obs_arr = np.array(obs, dtype=np.float32)
168
-
169
- mappo.store(
170
- local_obs_arr,
171
- global_obs,
172
- actions,
173
- logps,
174
- rewards,
175
- done,
176
- next_global_obs
177
- )
178
- total_reward += rewards
179
- obs = next_obs
180
- step_count += 1
181
- total_steps_global += 1
182
-
183
- day_logs.append({
184
- "step": step_count - 1,
185
- "grid_import_no_p2p": info["grid_import_no_p2p"],
186
- "grid_import_with_p2p": info["grid_import_with_p2p"],
187
- "p2p_buy": info["p2p_buy"],
188
- "p2p_sell": info["p2p_sell"],
189
- "costs": info["costs"], # Capture costs for analysis
190
- "charge_amount": info.get("charge_amount", np.zeros(num_agents)), # New
191
- "discharge_amount": info.get("discharge_amount", np.zeros(num_agents)) # New
192
- })
193
-
194
- if step_count >= max_steps:
195
- break
196
-
197
- # ─── After each episode ───
198
- # 1) Compute per-episode metrics
199
- sum_ep_reward = float(np.sum(total_reward)) # total reward across all agents for this episode
200
- mean_ep_reward = float(np.mean(total_reward)) # mean reward across agents for this episode
201
-
202
- episode_total_rewards.append(sum_ep_reward)
203
- episode_rewards.append(mean_ep_reward)
204
- daily_rewards.append(mean_ep_reward)
205
-
206
- # 2) If we just finished a block of window_size episodes, aggregate
207
- if len(daily_rewards) % window_size == 0:
208
- # Sum of total rewards over the last window_size episodes
209
- last_totals = episode_total_rewards[-window_size:]
210
- block_sum = sum(last_totals)
211
- block_total_rewards.append(block_sum)
212
-
213
- # Mean of mean-episode-rewards over the last window_size episodes
214
- last_means = daily_rewards[-window_size:]
215
- block_mean = sum(last_means) / window_size
216
- block_mean_rewards.append(block_mean)
217
-
218
- block_idx = len(block_mean_rewards)
219
- print(
220
- f"→ Completed Block {block_idx} "
221
- f"| Episodes { (block_idx-1)*window_size + 1 }–{ block_idx*window_size } "
222
- f"| Block Total Reward: {block_sum:.3f} "
223
- f"| Block Mean Reward: {block_mean:.3f}"
224
- )
225
-
226
- # 3) Log agent-level rewards
227
- for i in range(num_agents):
228
- agent_rewards_log[i].append(total_reward[i])
229
- episode_charges[i].append(actions[i][4])
230
- episode_discharges[i].append(actions[i][5])
231
-
232
- # 4) Summarize P2P steps (unchanged from your original code)
233
- steps_data = []
234
- for entry in day_logs:
235
- step_idx = entry["step"]
236
- p2p_buy_array = entry["p2p_buy"]
237
- p2p_sell_array = entry["p2p_sell"]
238
- grid_no_p2p_array = entry["grid_import_no_p2p"]
239
- grid_with_p2p_array = entry["grid_import_with_p2p"]
240
-
241
- steps_data.append({
242
- "step": step_idx,
243
- "p2p_buy_sum": float(np.sum(p2p_buy_array)),
244
- "p2p_sell_sum": float(np.sum(p2p_sell_array)),
245
- "grid_import_no_p2p_sum": float(np.sum(grid_no_p2p_array)),
246
- "grid_import_with_p2p_sum": float(np.sum(grid_with_p2p_array))
247
- })
248
-
249
-
250
- baseline_cost = np.sum([np.sum(entry["grid_import_no_p2p"]) * env.get_grid_price(entry["step"])
251
- for entry in day_logs])
252
- actual_cost = np.sum([np.sum(entry["costs"]) for entry in day_logs])
253
- cost_reduction = (baseline_cost - actual_cost) / baseline_cost
254
-
255
- # at end of episode
256
- mappo.update() # Update the MAPPO agent
257
-
258
-
259
- # save if best
260
- if mean_ep_reward > best_mean_reward:
261
- best_mean_reward = mean_ep_reward
262
- mappo.save(best_model_path)
263
-
264
- if episode % checkpoint_interval == 0:
265
- ckpt_path = os.path.join(logs_dir, f"checkpoint_{episode}.pth")
266
- mappo.save(ckpt_path)
267
- # CORRECTED TIMING AND LOGGING
268
- episode_end_time = time.time()
269
- episode_duration = episode_end_time - episode_start_time
270
-
271
- # Move the print statement here
272
- print(
273
- f"Episode {episode}/{num_episodes} "
274
- f"| Time per Episode: {episode_duration:.2f}s "
275
- f"| Steps: {step_count} "
276
- f"| Mean Reward: {mean_ep_reward:.3f} "
277
- f"| Cost Reduction: {cost_reduction:.2%}"
278
- )
279
-
280
- # Record data in our per-episode log
281
- episode_log_data.append({
282
- "Episode": episode,
283
- "Steps": step_count,
284
- "Mean_Reward": mean_ep_reward,
285
- "Total_Reward": sum_ep_reward,
286
- "Cost_Reduction_Pct": cost_reduction * 100, # New
287
- "Baseline_Cost": baseline_cost, # New
288
- "Actual_Cost": actual_cost, # New
289
- "Episode_Duration": episode_duration,
290
- "Total_Charge": np.sum([np.sum(entry["charge_amount"]) for entry in day_logs]), # New
291
- "Total_Discharge": np.sum([np.sum(entry["discharge_amount"]) for entry in day_logs]) # New
292
- })
293
- for i in range(num_agents):
294
- agent_charge_log[i].append(np.mean(episode_charges[i]))
295
- agent_discharge_log[i].append(np.mean(episode_discharges[i]))
296
-
297
- # ADD THIS BLOCK TO CAPTURE THE FINAL EPISODE'S METRICS
298
- # =================================================================
299
- # After the loop, the metrics for the final episode (num_episodes) are ready.
300
- # We collect them here to ensure the log is complete.
301
- final_episode_metrics = env.get_episode_metrics()
302
- final_episode_metrics['Episode'] = num_episodes
303
- performance_metrics_log.append(final_episode_metrics)
304
- # =================================================================
305
-
306
-
307
-
308
- # ─── End of all training ───
309
- training_end_time = time.time()
310
- total_training_time = training_end_time - training_start_time
311
-
312
- # Save out per-episode agent rewards + mean rewards
313
- np.save(os.path.join(logs_dir, "agent_rewards.npy"), np.array(agent_rewards_log))
314
- np.save(os.path.join(logs_dir, "mean_rewards.npy"), np.array(episode_rewards))
315
- np.save(os.path.join(logs_dir, "total_rewards.npy"), np.array(episode_total_rewards))
316
-
317
- ################################# PLOTTING & LOGGING ##################################################################
318
- # ─────────── Create Final DataFrame for Logging and Plotting ───────────
319
-
320
- # 1. Create a DataFrame from the original log data (rewards, costs, etc.)
321
- df_rewards_log = pd.DataFrame(episode_log_data)
322
-
323
- # 2. Create a DataFrame from the new performance metrics log
324
- df_perf_log = pd.DataFrame(performance_metrics_log)
325
-
326
- # 3. Merge the two DataFrames on the 'Episode' column.
327
- # This combines all metrics into a single table.
328
- df_final_log = pd.merge(df_rewards_log, df_perf_log.drop(columns=[
329
- 'degradation_cost_over_time',
330
- 'cost_savings_over_time',
331
- 'grid_reduction_over_time'
332
- ]), on="Episode")
333
-
334
-
335
- # ─────────── PLOTTING ───────────
336
-
337
- # Ensure plot directory exists
338
- os.makedirs(plots_dir, exist_ok=True)
339
-
340
- # Helper: centered moving average
341
- def moving_avg(series, window):
342
- return pd.Series(series).rolling(window=window, center=True, min_periods=1).mean().to_numpy()
343
-
344
- # Smoothing window (in episodes)
345
- ma_window = 300
346
- episodes = np.arange(1, num_episodes + 1)
347
-
348
- # 1. Mean Reward moving average
349
- reward_ma = moving_avg(df_final_log["Mean_Reward"], ma_window)
350
- plt.figure(figsize=(8,5))
351
- plt.plot(episodes, reward_ma, linewidth=2, label=f"Mean Reward MA (win={ma_window})")
352
- plt.xlabel("Episode")
353
- plt.ylabel("Mean Reward")
354
- plt.title("MAPPO: Mean Reward Moving Average")
355
- plt.legend()
356
- plt.grid(True)
357
- plt.savefig(os.path.join(plots_dir, "mean_reward_ma.png"), dpi=200)
358
- plt.close()
359
-
360
- # 2. Total Reward moving average
361
- total_ma = moving_avg(df_final_log["Total_Reward"], ma_window)
362
- plt.figure(figsize=(8,5))
363
- plt.plot(episodes, total_ma, linewidth=2, label=f"Total Reward MA (win={ma_window})")
364
- plt.xlabel("Episode")
365
- plt.ylabel("Total Reward")
366
- plt.title("MAPPO: Total Reward Moving Average")
367
- plt.legend()
368
- plt.grid(True)
369
- plt.savefig(os.path.join(plots_dir, "total_reward_ma.png"), dpi=200)
370
- plt.close()
371
-
372
- # 3. Cost Reduction (%) moving average
373
- cost_ma = moving_avg(df_final_log["Cost_Reduction_Pct"], ma_window)
374
- plt.figure(figsize=(8,5))
375
- plt.plot(episodes, cost_ma, linewidth=2, label="Cost Reduction MA (%)")
376
- plt.xlabel("Episode")
377
- plt.ylabel("Cost Reduction (%)")
378
- plt.title("MAPPO: Cost Reduction Moving Average")
379
- plt.legend()
380
- plt.grid(True)
381
- plt.savefig(os.path.join(plots_dir, "cost_reduction_ma.png"), dpi=200)
382
- plt.close()
383
-
384
- # 4. Battery Degradation Cost moving average
385
- degradation_ma = moving_avg(df_final_log["battery_degradation_cost_total"], ma_window)
386
- plt.figure(figsize=(8,5))
387
- plt.plot(episodes, degradation_ma, linewidth=2, label=f"Degradation Cost MA (win={ma_window})", color='purple')
388
- plt.xlabel("Episode")
389
- plt.ylabel("Total Degradation Cost ($)")
390
- plt.title("MAPPO: Battery Degradation Cost Moving Average")
391
- plt.legend()
392
- plt.grid(True)
393
- plt.savefig(os.path.join(plots_dir, "degradation_cost_ma.png"), dpi=200)
394
- plt.close()
395
-
396
-
397
- # Final confirmation message
398
- print(f"\nAll moving-average plots saved to: {plots_dir}")
399
-
400
-
401
- # ─── Save Final Logs to CSV ───
402
-
403
- # 1. Add the total training time as a new row to the DataFrame
404
- total_time_row = pd.DataFrame([{
405
- "Episode": "Total_Training_Time",
406
- "Episode_Duration": total_training_time
407
- }])
408
- df_to_save = pd.concat([df_final_log, total_time_row], ignore_index=True)
409
-
410
-
411
- # 2. Define the path for the final CSV file.
412
- log_csv_path = os.path.join(logs_dir, "training_performance_log.csv")
413
-
414
- # 3. Select and reorder columns for the final CSV
415
- columns_to_save = [
416
- "Episode",
417
- "Mean_Reward",
418
- "Total_Reward",
419
- "Cost_Reduction_Pct",
420
- "Episode_Duration",
421
- "battery_degradation_cost_total",
422
- ]
423
- df_to_save = df_to_save[columns_to_save]
424
-
425
-
426
- # 4. Save the comprehensive DataFrame to CSV.
427
- df_to_save.to_csv(log_csv_path, index=False)
428
-
429
- print(f"Saved comprehensive training performance log to: {log_csv_path}")
430
-
431
- # ─── Final Timings Printout ───
432
- print("\n" + "="*50)
433
- print("TRAINING COMPLETE".center(50))
434
- print(f"Total training time: {total_training_time:.2f} seconds")
435
- print("="*50)
436
-
437
-
438
- if __name__ == "__main__":
439
- main()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
Other_algorithms/Flat_System/mappo/trainer/__init__.py DELETED
File without changes
Other_algorithms/Flat_System/mappo/trainer/mappo.py DELETED
@@ -1,243 +0,0 @@
1
- # mappo.py
2
- import torch
3
- import torch.nn as nn
4
- import random
5
- import numpy as np
6
- from torch.distributions import Normal
7
-
8
-
9
- def set_global_seed(seed: int):
10
- random.seed(seed) # Python
11
- np.random.seed(seed) # NumPy
12
- torch.manual_seed(seed) # PyTorch CPU
13
- if torch.cuda.is_available():
14
- torch.cuda.manual_seed_all(seed) # PyTorch GPU
15
- # make CuDNN deterministic (may slow you down a bit):
16
- torch.backends.cudnn.deterministic = True
17
- torch.backends.cudnn.benchmark = False
18
-
19
-
20
- # Universal device selection
21
- if torch.cuda.is_available():
22
- device = torch.device("cuda")
23
- print("Using CUDA (NVIDIA GPU)")
24
- # elif torch.backends.mps.is_available():
25
- # device = torch.device("mps")
26
- # print("Using MPS (Apple Silicon GPU)")
27
- else:
28
- device = torch.device("cpu")
29
- print("Using CPU")
30
-
31
- # fix EVERYTHING
32
- SEED = 42
33
- set_global_seed(SEED)
34
-
35
-
36
- class MLP(nn.Module):
37
- def __init__(self, input_dim, hidden_dims, output_dim):
38
- super().__init__()
39
- layers = []
40
- last_dim = input_dim
41
- for h in hidden_dims:
42
- layers += [nn.Linear(last_dim, h), nn.ReLU()]
43
- last_dim = h
44
- layers.append(nn.Linear(last_dim, output_dim))
45
- self.net = nn.Sequential(*layers)
46
-
47
- def forward(self, x):
48
- return self.net(x)
49
-
50
- class Actor(nn.Module):
51
- def __init__(self, obs_dim, act_dim, hidden=(64,64)):
52
- super().__init__()
53
- self.net = MLP(obs_dim, hidden, act_dim)
54
- self.log_std = nn.Parameter(torch.zeros(act_dim))
55
-
56
- def forward(self, x):
57
- mean = self.net(x)
58
- std = torch.exp(self.log_std)
59
- return mean, std
60
-
61
- class Critic(nn.Module):
62
- def __init__(self, state_dim, hidden=(128,128)):
63
- super().__init__()
64
- self.net = MLP(state_dim, hidden, 1)
65
-
66
- def forward(self, x):
67
- return self.net(x).squeeze(-1)
68
-
69
- class MAPPO:
70
- def __init__(
71
- self,
72
- n_agents,
73
- local_dim,
74
- global_dim,
75
- act_dim,
76
- lr=3e-4,
77
- gamma=0.99,
78
- lam=0.95,
79
- clip_eps=0.2,
80
- k_epochs=10,
81
- batch_size=1024
82
- ):
83
- self.n_agents = n_agents
84
- self.gamma = gamma
85
- self.lam = lam
86
- self.clip_eps = clip_eps
87
- self.k_epochs = k_epochs
88
- self.batch_size = batch_size
89
-
90
- self.actor = Actor(local_dim, act_dim).to(device)
91
- self.critic = Critic(global_dim).to(device)
92
-
93
- self.opt_a = torch.optim.Adam(self.actor.parameters(), lr=lr)
94
- self.opt_c = torch.optim.Adam(self.critic.parameters(), lr=lr)
95
-
96
- self.local_dim = local_dim
97
- self.global_dim = global_dim
98
- self.act_dim = act_dim
99
-
100
- self.clear_buffer()
101
-
102
- def clear_buffer(self):
103
- self.ls = [] # local observations
104
- self.gs = [] # global observations
105
- self.ac = [] # actions
106
- self.lp = [] # log-probs
107
- self.rw = [] # rewards
108
- self.done = [] # done flags
109
- self.next_gs = [] # next global observations
110
-
111
- @torch.no_grad()
112
- def select_action(self, local_obs, global_obs):
113
- l = torch.FloatTensor(local_obs).to(device)
114
- mean, std = self.actor(l)
115
- dist = Normal(mean, std)
116
- a = dist.sample()
117
- return a.cpu().numpy(), dist.log_prob(a).sum(-1).cpu().numpy()
118
-
119
- def store(self, local_obs, global_obs, action, logp, reward, done, next_global_obs):
120
- self.ls.append(local_obs)
121
- self.gs.append(global_obs)
122
- self.ac.append(action)
123
- self.lp.append(logp)
124
- self.rw.append(reward)
125
- self.done.append(done)
126
- self.next_gs.append(next_global_obs)
127
-
128
- def compute_gae(self, values):
129
- """
130
- values: torch.Tensor shape [T] (one central V(s) per timestep)
131
- returns:
132
- adv_flat: torch.Tensor shape [T * n_agents]
133
- ret_flat: torch.Tensor shape [T * n_agents]
134
- """
135
- # 1) get raw arrays
136
- vals_1d = values.cpu().numpy() # [T]
137
- T = len(vals_1d)
138
- N = self.n_agents
139
-
140
- # 2) broadcast to per-agent
141
- # vals_agent[t,i] = V(state_t)
142
- vals_agent = np.tile(vals_1d[:,None], (1, N)) # [T,N]
143
-
144
- # 3) build next_vals likewise
145
- next_vals = np.zeros_like(vals_agent) # [T,N]
146
- next_vals[:-1] = vals_agent[1:]
147
- # if episode didn’t end at final step, bootstrap last:
148
- if not self.done[-1]:
149
- with torch.no_grad():
150
- v_last = self.critic(
151
- torch.FloatTensor(self.next_gs[-1]).to(device)
152
- ).cpu().item()
153
- next_vals[-1, :] = v_last
154
-
155
- # 4) GAE loop over (T,N)
156
- adv = np.zeros_like(vals_agent, dtype=np.float32)
157
- prev_adv = np.zeros(N, dtype=np.float32)
158
- for t in reversed(range(T)):
159
- mask = 1.0 - float(self.done[t]) # scalar 0/1
160
- rew_t = np.array(self.rw[t], dtype=np.float32) # [N]
161
- delta = rew_t + self.gamma * next_vals[t] * mask - vals_agent[t]
162
- prev_adv = delta + self.gamma * self.lam * mask * prev_adv
163
- adv[t] = prev_adv
164
-
165
- # 5) compute returns & flatten
166
- ret = adv + vals_agent # [T,N]
167
- adv_flat = torch.from_numpy(adv.flatten()).to(device)
168
- ret_flat = torch.from_numpy(ret.flatten()).to(device)
169
- return adv_flat, ret_flat
170
-
171
-
172
- def update(self):
173
- # 1) Raw global states tensor [T, G]
174
- raw_gs = torch.FloatTensor(self.gs).to(device) # [T, G]
175
-
176
- # 2) Compute one value V(s_t) per timestep
177
- with torch.no_grad():
178
- vals = self.critic(raw_gs).cpu() # [T]
179
-
180
- # 3) Compute advantages and returns using GAE (returns flattened [T*N])
181
- adv_flat, ret_flat = self.compute_gae(vals) # both shape [T * N]
182
-
183
- # 4) Prepare per-agent flattened training tensors
184
- # Local states [T*N, local_dim]
185
- ls = torch.FloatTensor(self.ls).view(-1, self.local_dim).to(device)
186
- # Actions [T*N, act_dim]
187
- ac = torch.FloatTensor(self.ac).view(-1, self.act_dim).to(device)
188
- # Old log-probs [T*N]
189
- old_lp = torch.FloatTensor(self.lp).view(-1).to(device)
190
-
191
- # Broadcast global states to per-agent: [T, G] -> [T, N, G] -> [T*N, G]
192
- gs = raw_gs.unsqueeze(1).expand(-1, self.n_agents, -1) # [T, N, G]
193
- gs = gs.reshape(-1, self.global_dim).to(device) # [T*N, G]
194
-
195
- # Create dataset and loader
196
- dataset = torch.utils.data.TensorDataset(
197
- ls, gs, ac, old_lp, adv_flat, ret_flat
198
- )
199
- gen = torch.Generator()
200
- gen.manual_seed(SEED)
201
- loader = torch.utils.data.DataLoader(
202
- dataset,
203
- batch_size=self.batch_size,
204
- shuffle=True,
205
- num_workers=0,
206
- generator=gen
207
- )
208
- # 5) PPO update loop
209
- for _ in range(self.k_epochs):
210
- for b_ls, b_gs, b_ac, b_lp, b_adv, b_ret in loader:
211
- # Actor update
212
- mean, std = self.actor(b_ls)
213
- dist = Normal(mean, std)
214
- lp_new = dist.log_prob(b_ac).sum(-1)
215
- ratio = torch.exp(lp_new - b_lp)
216
- surr1 = ratio * b_adv
217
- surr2 = torch.clamp(ratio, 1 - self.clip_eps, 1 + self.clip_eps) * b_adv
218
- actor_loss = -torch.min(surr1, surr2).mean()
219
-
220
- self.opt_a.zero_grad()
221
- actor_loss.backward()
222
- self.opt_a.step()
223
-
224
- # Critic update
225
- val_pred = self.critic(b_gs)
226
- critic_loss = nn.MSELoss()(val_pred, b_ret)
227
-
228
- self.opt_c.zero_grad()
229
- critic_loss.backward()
230
- self.opt_c.step()
231
-
232
- # 6) Clear buffers for next rollout
233
- self.clear_buffer()
234
-
235
-
236
- def save(self, path):
237
- torch.save({'actor': self.actor.state_dict(),
238
- 'critic': self.critic.state_dict()}, path)
239
-
240
- def load(self, path):
241
- data = torch.load(path, map_location=device)
242
- self.actor.load_state_dict(data['actor'])
243
- self.critic.load_state_dict(data['critic'])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
Other_algorithms/Flat_System/meanfield/_init_.py DELETED
File without changes
Other_algorithms/Flat_System/meanfield/meanfield_evaluation.py DELETED
@@ -1,492 +0,0 @@
1
- #mfac_evaluate.py
2
- import os
3
- import sys
4
- import time
5
- import re
6
- import numpy as np
7
- import pandas as pd
8
- import matplotlib.pyplot as plt
9
- import torch
10
- from datetime import datetime
11
-
12
- sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
13
-
14
- from solar_sys_environment import SolarSys
15
- from meanfield.trainer.mfac import MeanField
16
-
17
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
18
-
19
- def compute_jains_fairness(values: np.ndarray) -> float:
20
- if len(values) == 0:
21
- return 0.0
22
- if np.all(values == 0):
23
- return 1.0
24
- num = (values.sum())**2
25
- den = len(values) * (values**2).sum()
26
- return num / den
27
-
28
- def main():
29
- # User parameters
30
- MODEL_PATH = "/path/to/project/meanfield_pennsylvania_100agents_10000eps/logs/best_model.pth"
31
- DATA_PATH = "/path/to/project/testing/100houses_30days_TEST.csv"
32
- DAYS_TO_EVALUATE = 30
33
-
34
- model_path = MODEL_PATH
35
- data_path = DATA_PATH
36
- days_to_evaluate = DAYS_TO_EVALUATE
37
- SOLAR_THRESHOLD = 0.1
38
-
39
- # Env setup
40
- env = SolarSys(
41
- data_path=data_path,
42
- state="pennsylvania",
43
- time_freq="3H"
44
- )
45
- eval_steps = env.num_steps
46
- house_ids = env.house_ids
47
- num_agents = env.num_agents
48
-
49
- # Generate a unique eval run folder
50
- timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
51
- run_name = f"eval_mappo_{num_agents}agents_{days_to_evaluate}days_{timestamp}"
52
- output_folder = os.path.join("runs_with_battery", run_name)
53
- logs_dir = os.path.join(output_folder, "logs")
54
- plots_dir = os.path.join(output_folder, "plots")
55
- for d in (logs_dir, plots_dir):
56
- os.makedirs(d, exist_ok=True)
57
- print(f"Saving evaluation outputs to: {output_folder}")
58
-
59
- local_dim = env.observation_space.shape[1]
60
- global_dim = num_agents * local_dim
61
- act_dim = env.action_space.shape[1]
62
-
63
- mfac = MeanField(
64
- n_agents=num_agents,
65
- local_dim=local_dim,
66
- global_dim=global_dim,
67
- act_dim=act_dim,
68
- lr=2e-4,
69
- gamma=0.95,
70
- lam=0.95,
71
- clip_eps=0.2,
72
- k_epochs=10,
73
- batch_size=1024
74
- )
75
-
76
- # Loadmfac checkpoint
77
- mfac.load(model_path)
78
- mfac.actor.to(device).eval()
79
- mfac.critic.to(device).eval()
80
-
81
- # Prepare logs
82
- all_logs = []
83
- daily_summaries = []
84
- step_timing_list = []
85
-
86
- evaluation_start = time.time()
87
-
88
- for day_idx in range(days_to_evaluate):
89
- obs = env.reset()
90
- obs = np.array(obs, dtype=np.float32)
91
- done = False
92
- step_count = 0
93
- day_logs = []
94
-
95
- while not done:
96
- step_start_time = time.time()
97
- global_obs = np.array(obs).flatten()
98
-
99
- # Select actions withmfac
100
- actions, _ =mfac.select_action(obs, global_obs)
101
-
102
- next_obs, rewards, done, info = env.step(actions)
103
- next_obs = np.array(next_obs, dtype=np.float32)
104
-
105
- # Consolidated Logging
106
- step_end_time = time.time()
107
- step_duration = step_end_time - step_start_time
108
-
109
- print(f"[Day {day_idx+1}, Step {step_count}] Step time: {step_duration:.6f} seconds")
110
-
111
- step_timing_list.append({
112
- "day": day_idx + 1,
113
- "step": step_count,
114
- "step_time_s": step_duration
115
- })
116
-
117
- grid_price_now = env.get_grid_price(step_count)
118
- peer_price_now = info.get("peer_price", env.get_peer_price(step_count,
119
- float(info["p2p_sell"].sum()),
120
- float(info["p2p_buy"].sum())))
121
-
122
- for i, hid in enumerate(house_ids):
123
- is_battery_house = hid in env.batteries
124
- p2p_buy = float(info["p2p_buy"][i])
125
- p2p_sell = float(info["p2p_sell"][i])
126
- charge_amount = float(info.get("charge_amount", [0]*num_agents)[i])
127
- discharge_amount = float(info.get("discharge_amount", [0]*num_agents)[i])
128
-
129
- day_logs.append({
130
- "day": day_idx + 1,
131
- "step": step_count,
132
- "house": hid,
133
- "grid_import_no_p2p": float(info["grid_import_no_p2p"][i]),
134
- "grid_import_with_p2p": float(info["grid_import_with_p2p"][i]),
135
- "grid_export": float(info.get("grid_export", [0]*num_agents)[i]),
136
- "p2p_buy": p2p_buy,
137
- "p2p_sell": p2p_sell,
138
- "actual_cost": float(info["costs"][i]),
139
- "baseline_cost": float(info["grid_import_no_p2p"][i]) * grid_price_now,
140
- "total_demand": float(env.demands[hid][step_count]),
141
- "total_solar": float(env.solars[hid][step_count]),
142
- "grid_price": grid_price_now,
143
- "peer_price": peer_price_now,
144
- "soc": (env.batteries[hid]["soc"] / env.batteries[hid]["max_capacity"]) if is_battery_house else np.nan,
145
- "degradation_cost": ((charge_amount + discharge_amount) * env.batteries[hid]["degradation_cost_per_kwh"]) if is_battery_house else 0.0,
146
- "reward": float(rewards[i]),
147
- })
148
-
149
- obs = next_obs
150
- step_count += 1
151
- if step_count >= eval_steps:
152
- break
153
-
154
- day_df = pd.DataFrame(day_logs)
155
- all_logs.extend(day_logs)
156
-
157
- # Consolidated daily summary calculation
158
- grouped_house = day_df.groupby("house").sum(numeric_only=True)
159
- grouped_step = day_df.groupby("step").sum(numeric_only=True)
160
-
161
- total_demand = grouped_step["total_demand"].sum()
162
- total_solar = grouped_step["total_solar"].sum()
163
- total_p2p_buy = grouped_house["p2p_buy"].sum()
164
- total_p2p_sell = grouped_house["p2p_sell"].sum()
165
-
166
- baseline_cost_per_house = grouped_house["baseline_cost"]
167
- actual_cost_per_house = grouped_house["actual_cost"]
168
- cost_savings_per_house = baseline_cost_per_house - actual_cost_per_house
169
- day_total_cost_savings = cost_savings_per_house.sum()
170
-
171
- if baseline_cost_per_house.sum() > 0:
172
- overall_cost_savings_pct = day_total_cost_savings / baseline_cost_per_house.sum()
173
- else:
174
- overall_cost_savings_pct = 0.0
175
-
176
- baseline_import_per_house = grouped_house["grid_import_no_p2p"]
177
- actual_import_per_house = grouped_house["grid_import_with_p2p"]
178
- import_reduction_per_house = baseline_import_per_house - actual_import_per_house
179
- day_total_import_reduction = import_reduction_per_house.sum()
180
-
181
- if baseline_import_per_house.sum() > 0:
182
- overall_import_reduction_pct = day_total_import_reduction / baseline_import_per_house.sum()
183
- else:
184
- overall_import_reduction_pct = 0.0
185
-
186
- fairness_cost_savings = compute_jains_fairness(cost_savings_per_house.values)
187
- fairness_import_reduction = compute_jains_fairness(import_reduction_per_house.values)
188
- fairness_rewards = compute_jains_fairness(grouped_house["reward"].values)
189
- fairness_p2p_buy = compute_jains_fairness(grouped_house["p2p_buy"].values)
190
- fairness_p2p_sell = compute_jains_fairness(grouped_house["p2p_sell"].values)
191
- fairness_p2p_total = compute_jains_fairness((grouped_house["p2p_buy"] + grouped_house["p2p_sell"]).values)
192
- day_total_degradation_cost = grouped_house["degradation_cost"].sum()
193
-
194
- daily_summaries.append({
195
- "day": day_idx + 1,
196
- "day_total_demand": total_demand,
197
- "day_total_solar": total_solar,
198
- "day_p2p_buy": total_p2p_buy,
199
- "day_p2p_sell": total_p2p_sell,
200
- "cost_savings_abs": day_total_cost_savings,
201
- "cost_savings_pct": overall_cost_savings_pct,
202
- "fairness_cost_savings": fairness_cost_savings,
203
- "grid_reduction_abs": day_total_import_reduction,
204
- "grid_reduction_pct": overall_import_reduction_pct,
205
- "fairness_grid_reduction": fairness_import_reduction,
206
- "fairness_reward": fairness_rewards,
207
- "fairness_p2p_buy": fairness_p2p_buy,
208
- "fairness_p2p_sell": fairness_p2p_sell,
209
- "fairness_p2p_total": fairness_p2p_total,
210
- "total_degradation_cost": day_total_degradation_cost
211
- })
212
-
213
- # Final processing and saving
214
- evaluation_end = time.time()
215
- total_eval_time = evaluation_end - evaluation_start
216
- print(f"\nEvaluation loop finished. Total time: {total_eval_time:.2f} seconds.")
217
-
218
- all_days_df = pd.DataFrame(all_logs)
219
- combined_csv_path = os.path.join(logs_dir, "step_logs_all_days.csv")
220
- all_days_df.to_csv(combined_csv_path, index=False)
221
- print(f"Saved combined step-level logs to: {combined_csv_path}")
222
-
223
- step_timing_df = pd.DataFrame(step_timing_list)
224
- timing_csv_path = os.path.join(logs_dir, "step_timing_log.csv")
225
- step_timing_df.to_csv(timing_csv_path, index=False)
226
- print(f"Saved step timing logs to: {timing_csv_path}")
227
-
228
- house_level_df = all_days_df.groupby("house").agg({
229
- "baseline_cost": "sum",
230
- "actual_cost": "sum",
231
- "grid_import_no_p2p": "sum",
232
- "grid_import_with_p2p": "sum",
233
- "degradation_cost": "sum"
234
- })
235
- house_level_df["cost_savings"] = house_level_df["baseline_cost"] - house_level_df["actual_cost"]
236
- house_level_df["import_reduction"] = house_level_df["grid_import_no_p2p"] - house_level_df["grid_import_with_p2p"]
237
-
238
- house_summary_csv = os.path.join(logs_dir, "summary_per_house.csv")
239
- house_level_df.to_csv(house_summary_csv)
240
- print(f"Saved final summary per house to: {house_summary_csv}")
241
-
242
- fairness_grid_all = compute_jains_fairness(house_level_df["import_reduction"].values)
243
- fairness_cost_all = compute_jains_fairness(house_level_df["cost_savings"].values)
244
-
245
- daily_summary_df = pd.DataFrame(daily_summaries)
246
-
247
- total_cost_savings_all = daily_summary_df["cost_savings_abs"].sum()
248
- total_baseline_cost_all = all_days_df.groupby('day')['baseline_cost'].sum().sum()
249
- pct_cost_savings_all = total_cost_savings_all / total_baseline_cost_all if total_baseline_cost_all > 0 else 0.0
250
-
251
- total_grid_reduction_all = daily_summary_df["grid_reduction_abs"].sum()
252
- total_baseline_import_all = all_days_df.groupby('day')['grid_import_no_p2p'].sum().sum()
253
- pct_grid_reduction_all = total_grid_reduction_all / total_baseline_import_all if total_baseline_import_all > 0 else 0.0
254
-
255
- total_degradation_cost_all = daily_summary_df["total_degradation_cost"].sum()
256
-
257
- # Calculate alternative performance metrics
258
-
259
- # Grid Reduction During Solar Hours
260
- agg_solar_per_step = all_days_df.groupby(['day', 'step'])['total_solar'].sum()
261
- sunny_steps_mask = agg_solar_per_step > (SOLAR_THRESHOLD * num_agents)
262
- sunny_df = all_days_df.set_index(['day', 'step'])[sunny_steps_mask].reset_index()
263
- baseline_import_sunny = sunny_df['grid_import_no_p2p'].sum()
264
- actual_import_sunny = sunny_df['grid_import_with_p2p'].sum()
265
- grid_reduction_sunny_pct = 0.0
266
- if baseline_import_sunny > 0:
267
- grid_reduction_sunny_pct = (baseline_import_sunny - actual_import_sunny) / baseline_import_sunny
268
-
269
- # Community Sourcing Rate
270
- total_p2p_buy = all_days_df['p2p_buy'].sum()
271
- total_actual_grid_import = all_days_df['grid_import_with_p2p'].sum()
272
- total_procured_energy = total_p2p_buy + total_actual_grid_import
273
- community_sourcing_rate_pct = 0.0
274
- if total_procured_energy > 0:
275
- community_sourcing_rate_pct = total_p2p_buy / total_procured_energy
276
-
277
- # Solar Sharing Efficiency
278
- total_p2p_sell = all_days_df['p2p_sell'].sum()
279
- total_grid_export = all_days_df['grid_export'].sum()
280
- total_excess_solar = total_p2p_sell + total_grid_export
281
- solar_sharing_efficiency_pct = 0.0
282
- if total_excess_solar > 0:
283
- solar_sharing_efficiency_pct = total_p2p_sell / total_excess_solar
284
-
285
- # Cost savings in sunny hours
286
- baseline_cost_sunny = sunny_df['baseline_cost'].sum()
287
- actual_cost_sunny = sunny_df['actual_cost'].sum()
288
- cost_savings_sunny_pct = (baseline_cost_sunny - actual_cost_sunny) / baseline_cost_sunny if baseline_cost_sunny > 0 else 0.0
289
-
290
- total_p2p_buy = all_days_df['p2p_buy'].sum()
291
- total_actual_grid_import = all_days_df['grid_import_with_p2p'].sum()
292
- community_sourcing_rate_pct = total_p2p_buy / (total_p2p_buy + total_actual_grid_import) if (total_p2p_buy + total_actual_grid_import) > 0 else 0.0
293
-
294
- total_p2p_sell = all_days_df['p2p_sell'].sum()
295
- total_grid_export = all_days_df['grid_export'].sum()
296
- solar_sharing_efficiency_pct = total_p2p_sell / (total_p2p_sell + total_grid_export) if (total_p2p_sell + total_grid_export) > 0 else 0.0
297
-
298
- final_row = {
299
- "day": "ALL_DAYS_SUMMARY",
300
- "cost_savings_abs": total_cost_savings_all,
301
- "cost_savings_pct": pct_cost_savings_all,
302
- "grid_reduction_abs": total_grid_reduction_all,
303
- "grid_reduction_pct": pct_grid_reduction_all,
304
- "fairness_cost_savings": fairness_cost_all,
305
- "fairness_grid_reduction": fairness_grid_all,
306
- "total_degradation_cost": total_degradation_cost_all,
307
- "grid_reduction_sunny_hours_pct": grid_reduction_sunny_pct,
308
- "community_sourcing_rate_pct": community_sourcing_rate_pct,
309
- "solar_sharing_efficiency_pct": solar_sharing_efficiency_pct,
310
- }
311
-
312
- for col in daily_summary_df.columns:
313
- if col not in final_row:
314
- final_row[col] = np.nan
315
- final_row_df = pd.DataFrame([final_row])
316
-
317
- daily_summary_df = pd.concat([daily_summary_df, final_row_df], ignore_index=True)
318
- summary_csv = os.path.join(logs_dir, "summary_per_day.csv")
319
- daily_summary_df.to_csv(summary_csv, index=False)
320
- print(f"Saved day-level summary with final multi-day row to: {summary_csv}")
321
-
322
- # Final summary printout
323
- print("\n================== EVALUATION SUMMARY ==================")
324
- print(f"Evaluation finished for {days_to_evaluate} days.\n")
325
-
326
- print("--- Standard Metrics (24-Hour Average) ---")
327
- print(f"Total grid reduction: {total_grid_reduction_all:.2f} kWh ({pct_grid_reduction_all:.2%})")
328
- print(f"Total cost savings: ${total_cost_savings_all:.2f} ({pct_cost_savings_all:.2%})")
329
- print(f"Jain's fairness on grid reduction: {fairness_grid_all:.3f}")
330
- print(f"Jain's fairness on cost savings: {fairness_cost_all:.3f}\n")
331
-
332
- print("--- Alternative Metrics (Highlighting Peak Performance) ---")
333
- print(f"Grid reduction during solar hours: {grid_reduction_sunny_pct:.2%}")
334
- print(f"Cost savings during solar hours: {cost_savings_sunny_pct:.2%}")
335
- print(f"Community sourcing rate: {community_sourcing_rate_pct:.2%}")
336
- print(f"Solar sharing efficiency: {solar_sharing_efficiency_pct:.2%}")
337
-
338
- print("=========================================================")
339
-
340
- # Plots
341
- plot_daily_df = daily_summary_df[daily_summary_df["day"] != "ALL_DAYS_SUMMARY"].copy()
342
- plot_daily_df["day"] = plot_daily_df["day"].astype(int)
343
-
344
- # Daily Cost Savings Percentage
345
- plt.figure(figsize=(12, 6))
346
- plt.bar(plot_daily_df["day"], plot_daily_df["cost_savings_pct"] * 100, color='skyblue')
347
- plt.xlabel("Day")
348
- plt.ylabel("Cost Savings (%)")
349
- plt.title("Daily Community Cost Savings Percentage")
350
- plt.xticks(plot_daily_df["day"])
351
- plt.grid(axis='y', linestyle='--', alpha=0.7)
352
- plt.savefig(os.path.join(plots_dir, "daily_cost_savings_percentage.png"))
353
- plt.close()
354
-
355
- # Daily Total Demand vs. Solar
356
- plt.figure(figsize=(12, 6))
357
- bar_width = 0.4
358
- days = plot_daily_df["day"]
359
- plt.bar(days - bar_width/2, plot_daily_df["day_total_demand"], width=bar_width, label="Total Demand", color='coral')
360
- plt.bar(days + bar_width/2, plot_daily_df["day_total_solar"], width=bar_width, label="Total Solar Generation", color='gold')
361
- plt.xlabel("Day")
362
- plt.ylabel("Energy (kWh)")
363
- plt.title("Total Community Demand vs. Solar Generation Per Day")
364
- plt.xticks(days)
365
- plt.legend()
366
- plt.grid(axis='y', linestyle='--', alpha=0.7)
367
- plt.savefig(os.path.join(plots_dir, "daily_demand_vs_solar.png"))
368
- plt.close()
369
-
370
- # Combined Time Series of Energy Flows
371
- step_group = all_days_df.groupby(["day", "step"]).sum(numeric_only=True).reset_index()
372
- step_group["global_step"] = (step_group["day"] - 1) * env.num_steps + step_group["step"]
373
-
374
- fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(15, 12), sharex=True)
375
-
376
- # Subplot 1: Grid Import vs P2P Buy
377
- ax1.plot(step_group["global_step"], step_group["grid_import_with_p2p"], label="Grid Import (with P2P)", color='r')
378
- ax1.plot(step_group["global_step"], step_group["p2p_buy"], label="P2P Buy", color='g')
379
- ax1.set_ylabel("Energy (kWh)")
380
- ax1.set_title("Community Energy Consumption: Grid Import vs. P2P Buy")
381
- ax1.legend()
382
- ax1.grid(True, linestyle='--', alpha=0.6)
383
-
384
- # Subplot 2: Grid Export vs P2P Sell
385
- ax2.plot(step_group["global_step"], step_group["grid_export"], label="Grid Export", color='orange')
386
- ax2.plot(step_group["global_step"], step_group["p2p_sell"], label="P2P Sell", color='b')
387
- ax2.set_xlabel("Global Timestep")
388
- ax2.set_ylabel("Energy (kWh)")
389
- ax2.set_title("Community Energy Generation: Grid Export vs. P2P Sell")
390
- ax2.legend()
391
- ax2.grid(True, linestyle='--', alpha=0.6)
392
-
393
- plt.tight_layout()
394
- plt.savefig(os.path.join(plots_dir, "combined_energy_flows_timeseries.png"))
395
- plt.close()
396
-
397
- # Stacked Bar of Daily Energy Sources
398
- daily_agg = all_days_df.groupby("day").sum(numeric_only=True)
399
-
400
- plt.figure(figsize=(12, 7))
401
- plt.bar(daily_agg.index, daily_agg["grid_import_with_p2p"], label="Grid Import (with P2P)", color='crimson')
402
- plt.bar(daily_agg.index, daily_agg["p2p_buy"], bottom=daily_agg["grid_import_with_p2p"], label="P2P Buy", color='limegreen')
403
- plt.plot(daily_agg.index, daily_agg["grid_import_no_p2p"], label="Baseline Grid Import (No P2P)", color='blue', linestyle='--', marker='o')
404
-
405
- plt.xlabel("Day")
406
- plt.ylabel("Energy (kWh)")
407
- plt.title("Daily Energy Procurement: Baseline vs. P2P+Grid")
408
- plt.xticks(daily_agg.index)
409
- plt.legend()
410
- plt.grid(axis='y', linestyle='--', alpha=0.7)
411
- plt.savefig(os.path.join(plots_dir, "daily_energy_procurement_stacked.png"))
412
- plt.close()
413
-
414
- # Fairness Metrics Over Time
415
- plt.figure(figsize=(12, 6))
416
- plt.plot(plot_daily_df["day"], plot_daily_df["fairness_cost_savings"], label="Cost Savings Fairness", marker='o')
417
- plt.plot(plot_daily_df["day"], plot_daily_df["fairness_grid_reduction"], label="Grid Reduction Fairness", marker='s')
418
- plt.plot(plot_daily_df["day"], plot_daily_df["fairness_reward"], label="Reward Fairness", marker='^')
419
- plt.xlabel("Day")
420
- plt.ylabel("Jain's Fairness Index")
421
- plt.title("Daily Fairness Metrics")
422
- plt.xticks(plot_daily_df["day"])
423
- plt.ylim(0, 1.05)
424
- plt.legend()
425
- plt.grid(True, linestyle='--', alpha=0.7)
426
- plt.savefig(os.path.join(plots_dir, "daily_fairness_metrics.png"))
427
- plt.close()
428
-
429
- # Per-House Savings and Reductions
430
- fig, ax1 = plt.subplots(figsize=(15, 7))
431
-
432
- house_ids_str = house_level_df.index.astype(str)
433
- bar_width = 0.4
434
- index = np.arange(len(house_ids_str))
435
-
436
- # Bar chart for cost savings
437
- color1 = 'tab:green'
438
- ax1.set_xlabel('House ID')
439
- ax1.set_ylabel('Total Cost Savings ($)', color=color1)
440
- ax1.bar(index - bar_width/2, house_level_df["cost_savings"], bar_width, label='Cost Savings', color=color1)
441
- ax1.tick_params(axis='y', labelcolor=color1)
442
- ax1.set_xticks(index)
443
- ax1.set_xticklabels(house_ids_str, rotation=45, ha="right")
444
-
445
- # Second y-axis for grid import reduction
446
- ax2 = ax1.twinx()
447
- color2 = 'tab:blue'
448
- ax2.set_ylabel('Total Grid Import Reduction (kWh)', color=color2)
449
- ax2.bar(index + bar_width/2, house_level_df["import_reduction"], bar_width, label='Import Reduction', color=color2)
450
- ax2.tick_params(axis='y', labelcolor=color2)
451
-
452
- plt.title(f'Total Cost Savings & Grid Import Reduction Per House (over {days_to_evaluate} days)')
453
- fig.tight_layout()
454
- plt.savefig(os.path.join(plots_dir, "per_house_summary.png"))
455
- plt.close()
456
-
457
- # Price Dynamics for a Single Day
458
- day1_prices = all_days_df[all_days_df['day'] == 1][['step', 'grid_price', 'peer_price']].drop_duplicates()
459
- plt.figure(figsize=(12, 6))
460
- plt.plot(day1_prices['step'], day1_prices['grid_price'], label='Grid Price', color='darkorange')
461
- plt.plot(day1_prices['step'], day1_prices['peer_price'], label='P2P Price', color='teal')
462
- plt.xlabel("Timestep of Day")
463
- plt.ylabel("Price ($/kWh)")
464
- plt.title("Price Dynamics on Day 1")
465
- plt.legend()
466
- plt.grid(True, linestyle='--', alpha=0.6)
467
- plt.savefig(os.path.join(plots_dir, "price_dynamics_day1.png"))
468
- plt.close()
469
-
470
- # Battery State of Charge for Sample Houses
471
- day1_df = all_days_df[all_days_df['day'] == 1]
472
- battery_houses = day1_df.dropna(subset=['soc'])['house'].unique()
473
-
474
- if len(battery_houses) > 0:
475
- sample_houses = battery_houses[:min(4, len(battery_houses))]
476
- plt.figure(figsize=(12, 6))
477
- for house in sample_houses:
478
- house_df = day1_df[day1_df['house'] == house]
479
- plt.plot(house_df['step'], house_df['soc'] * 100, label=f'House {house}')
480
-
481
- plt.xlabel("Timestep of Day")
482
- plt.ylabel("State of Charge (%)")
483
- plt.title("Battery SoC on Day 1 for Sample Houses")
484
- plt.legend()
485
- plt.grid(True, linestyle='--', alpha=0.6)
486
- plt.savefig(os.path.join(plots_dir, "soc_dynamics_day1.png"))
487
- plt.close()
488
-
489
- print("All plots have been generated and saved. Evaluation complete.")
490
-
491
- if __name__ == "__main__":
492
- main()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
Other_algorithms/Flat_System/meanfield/meanfield_train.py DELETED
@@ -1,386 +0,0 @@
1
- import os
2
- import sys
3
- import re
4
- import numpy as np
5
- import torch
6
- import matplotlib.pyplot as plt
7
- import pandas as pd
8
- import time
9
- from datetime import datetime
10
-
11
- sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
12
-
13
- from solar_sys_environment import SolarSys
14
- from meanfield.trainer.mfac import MeanField
15
-
16
- def main():
17
-
18
- STATE_TO_RUN = "pennsylvania" # "pennsylvania" or "colorado" or "oklahoma"
19
-
20
- # Set the path to your training data
21
- DATA_FILE_PATH = "/path/to/project/training/100houses_152days_TRAIN.csv"
22
- num_episodes = 10000
23
- batch_size = 256
24
- checkpoint_interval = 100000
25
- window_size = 32
26
-
27
- env = SolarSys(
28
- data_path=DATA_FILE_PATH,
29
- state=STATE_TO_RUN,
30
- time_freq="3H"
31
- )
32
-
33
- # Sanity check: env I/O shapes
34
- print("Observation space:", env.observation_space)
35
- print("Action space :", env.action_space)
36
-
37
- # Reset and inspect obs
38
- obs = env.reset()
39
- print(f"Reset returned {len(obs)} agent observations; each obs shape: {np.array(obs).shape}")
40
-
41
- # Sample random actions and do one step
42
- dummy_actions = np.random.rand(env.num_agents, env.action_space.shape[1]).astype(np.float32)
43
- next_obs, rewards, done, info = env.step(dummy_actions)
44
- print(f"Step outputs → next_obs: {len(next_obs)}×{np.array(next_obs).shape[1]}, "
45
- f"rewards: {len(rewards)}, done: {done}")
46
- print("Info keys:", list(info.keys()))
47
-
48
- # Count the number of houses in each group
49
- env.group_counts = {
50
- 0: env.agent_groups.count(0),
51
- 1: env.agent_groups.count(1)
52
- }
53
- print(f"Number of houses in each group: {env.group_counts}")
54
-
55
- max_steps = env.num_steps
56
-
57
- # Dims from the env
58
- num_agents = env.num_agents
59
- local_state_dim = env.observation_space.shape[1]
60
- action_dim = env.action_space.shape[1]
61
-
62
- # Build a unique run directory
63
- timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
64
- run_name = f"meanfield_{STATE_TO_RUN}_{num_agents}agents_{num_episodes}eps_{timestamp}"
65
- root_dir = os.path.join("Training_for_granularity", run_name)
66
- os.makedirs(root_dir, exist_ok=True)
67
- print(f"Saving training outputs to: {root_dir}")
68
-
69
- logs_dir = os.path.join(root_dir, "logs")
70
- plots_dir = os.path.join(root_dir, "plots")
71
- os.makedirs(logs_dir, exist_ok=True)
72
- os.makedirs(plots_dir, exist_ok=True)
73
-
74
- # Create the MeanField agent
75
- meanfield = MeanField(
76
- n_agents=num_agents,
77
- local_dim=local_state_dim,
78
- global_dim=num_agents * local_state_dim,
79
- act_dim=action_dim,
80
- lr=2e-4,
81
- gamma=0.95,
82
- lam=0.95,
83
- clip_eps=0.2,
84
- k_epochs=4,
85
- batch_size=batch_size
86
- )
87
-
88
- # Tracking / Logging Variables
89
- episode_rewards = []
90
- episode_total_rewards = []
91
- block_mean_rewards = []
92
- block_total_rewards = []
93
-
94
- agent_rewards_log = [[] for _ in range(num_agents)]
95
- best_mean_reward = -1e9
96
- best_model_path = os.path.join(logs_dir, "best_model.pth")
97
-
98
- daily_rewards = []
99
- monthly_rewards = []
100
-
101
- training_start_time = time.time()
102
- episode_durations = []
103
- total_steps_global = 0
104
- episode_log_data = []
105
- performance_metrics_log = []
106
-
107
- agent_charge_log = [[] for _ in range(num_agents)]
108
- agent_discharge_log = [[] for _ in range(num_agents)]
109
-
110
- # Training Loop
111
- for episode in range(1, num_episodes + 1):
112
- episode_start_time = time.time()
113
-
114
- obs = np.array(env.reset(), dtype=np.float32)
115
-
116
- # Collect metrics from the previous episode
117
- if episode > 1:
118
- last_episode_metrics = env.get_episode_metrics()
119
- last_episode_metrics['Episode'] = episode - 1
120
- performance_metrics_log.append(last_episode_metrics)
121
-
122
- total_reward = np.zeros(num_agents, dtype=np.float32)
123
- done = False
124
- step_count = 0
125
- day_logs = []
126
- episode_charges = [[] for _ in range(num_agents)]
127
- episode_discharges = [[] for _ in range(num_agents)]
128
-
129
- while not done:
130
- # Build global state and pick actions
131
- global_obs = obs.flatten()
132
- actions, logps = meanfield.select_action(obs, global_obs)
133
-
134
- # Step environment
135
- next_obs_list, rewards, done, info = env.step(actions)
136
-
137
- # Convert next observations to NumPy array
138
- next_obs = np.array(next_obs_list, dtype=np.float32)
139
- next_global_obs = next_obs.flatten()
140
-
141
- # Store transition
142
- local_obs_arr = np.array(obs, dtype=np.float32)
143
-
144
- meanfield.store(
145
- local_obs_arr,
146
- global_obs,
147
- actions,
148
- logps,
149
- rewards,
150
- done,
151
- next_global_obs
152
- )
153
- total_reward += rewards
154
- obs = next_obs
155
- step_count += 1
156
- total_steps_global += 1
157
-
158
- day_logs.append({
159
- "step": step_count - 1,
160
- "grid_import_no_p2p": info["grid_import_no_p2p"],
161
- "grid_import_with_p2p": info["grid_import_with_p2p"],
162
- "p2p_buy": info["p2p_buy"],
163
- "p2p_sell": info["p2p_sell"],
164
- "costs": info["costs"],
165
- "charge_amount": info.get("charge_amount", np.zeros(num_agents)),
166
- "discharge_amount": info.get("discharge_amount", np.zeros(num_agents))
167
- })
168
-
169
- if step_count >= max_steps:
170
- break
171
-
172
- # After each episode
173
- # Compute per-episode metrics
174
- sum_ep_reward = float(np.sum(total_reward))
175
- mean_ep_reward = float(np.mean(total_reward))
176
-
177
- episode_total_rewards.append(sum_ep_reward)
178
- episode_rewards.append(mean_ep_reward)
179
- daily_rewards.append(mean_ep_reward)
180
-
181
- # If we just finished a block of window_size episodes, aggregate
182
- if len(daily_rewards) % window_size == 0:
183
- last_totals = episode_total_rewards[-window_size:]
184
- block_sum = sum(last_totals)
185
- block_total_rewards.append(block_sum)
186
-
187
- last_means = daily_rewards[-window_size:]
188
- block_mean = sum(last_means) / window_size
189
- block_mean_rewards.append(block_mean)
190
-
191
- block_idx = len(block_mean_rewards)
192
- print(
193
- f"→ Completed Block {block_idx} "
194
- f"| Episodes {(block_idx-1)*window_size + 1}–{block_idx*window_size} "
195
- f"| Block Total Reward: {block_sum:.3f} "
196
- f"| Block Mean Reward: {block_mean:.3f}"
197
- )
198
-
199
- # Log agent-level rewards
200
- for i in range(num_agents):
201
- agent_rewards_log[i].append(total_reward[i])
202
- episode_charges[i].append(actions[i][4])
203
- episode_discharges[i].append(actions[i][5])
204
-
205
- # Summarize P2P steps
206
- steps_data = []
207
- for entry in day_logs:
208
- step_idx = entry["step"]
209
- p2p_buy_array = entry["p2p_buy"]
210
- p2p_sell_array = entry["p2p_sell"]
211
- grid_no_p2p_array = entry["grid_import_no_p2p"]
212
- grid_with_p2p_array = entry["grid_import_with_p2p"]
213
-
214
- steps_data.append({
215
- "step": step_idx,
216
- "p2p_buy_sum": float(np.sum(p2p_buy_array)),
217
- "p2p_sell_sum": float(np.sum(p2p_sell_array)),
218
- "grid_import_no_p2p_sum": float(np.sum(grid_no_p2p_array)),
219
- "grid_import_with_p2p_sum": float(np.sum(grid_with_p2p_array))
220
- })
221
-
222
- baseline_cost = np.sum([np.sum(entry["grid_import_no_p2p"]) * env.get_grid_price(entry["step"])
223
- for entry in day_logs])
224
- actual_cost = np.sum([np.sum(entry["costs"]) for entry in day_logs])
225
- cost_reduction = (baseline_cost - actual_cost) / baseline_cost
226
-
227
- # Update the meanfield agent
228
- meanfield.update()
229
-
230
- # Save if best
231
- if mean_ep_reward > best_mean_reward:
232
- best_mean_reward = mean_ep_reward
233
- meanfield.save(best_model_path)
234
-
235
- if episode % checkpoint_interval == 0:
236
- ckpt_path = os.path.join(logs_dir, f"checkpoint_{episode}.pth")
237
- meanfield.save(ckpt_path)
238
-
239
- episode_end_time = time.time()
240
- episode_duration = episode_end_time - episode_start_time
241
-
242
- print(
243
- f"Episode {episode}/{num_episodes} "
244
- f"| Time per Episode: {episode_duration:.2f}s "
245
- f"| Steps: {step_count} "
246
- f"| Mean Reward: {mean_ep_reward:.3f} "
247
- f"| Cost Reduction: {cost_reduction:.2%}"
248
- )
249
-
250
- # Record data in per-episode log
251
- episode_log_data.append({
252
- "Episode": episode,
253
- "Steps": step_count,
254
- "Mean_Reward": mean_ep_reward,
255
- "Total_Reward": sum_ep_reward,
256
- "Cost_Reduction_Pct": cost_reduction * 100,
257
- "Baseline_Cost": baseline_cost,
258
- "Actual_Cost": actual_cost,
259
- "Episode_Duration": episode_duration,
260
- "Total_Charge": np.sum([np.sum(entry["charge_amount"]) for entry in day_logs]),
261
- "Total_Discharge": np.sum([np.sum(entry["discharge_amount"]) for entry in day_logs])
262
- })
263
-
264
- for i in range(num_agents):
265
- agent_charge_log[i].append(np.mean(episode_charges[i]))
266
- agent_discharge_log[i].append(np.mean(episode_discharges[i]))
267
-
268
- # Capture the final episode's metrics
269
- final_episode_metrics = env.get_episode_metrics()
270
- final_episode_metrics['Episode'] = num_episodes
271
- performance_metrics_log.append(final_episode_metrics)
272
-
273
- # End of all training
274
- training_end_time = time.time()
275
- total_training_time = training_end_time - training_start_time
276
-
277
- # Save out per-episode agent rewards + mean rewards
278
- np.save(os.path.join(logs_dir, "agent_rewards.npy"), np.array(agent_rewards_log))
279
- np.save(os.path.join(logs_dir, "mean_rewards.npy"), np.array(episode_rewards))
280
- np.save(os.path.join(logs_dir, "total_rewards.npy"), np.array(episode_total_rewards))
281
-
282
- # Create Final DataFrame for Logging and Plotting
283
- df_rewards_log = pd.DataFrame(episode_log_data)
284
- df_perf_log = pd.DataFrame(performance_metrics_log)
285
-
286
- # Merge the two DataFrames on the 'Episode' column
287
- df_final_log = pd.merge(df_rewards_log, df_perf_log.drop(columns=[
288
- 'degradation_cost_over_time',
289
- 'cost_savings_over_time',
290
- 'grid_reduction_over_time'
291
- ]), on="Episode")
292
-
293
- # PLOTTING
294
- os.makedirs(plots_dir, exist_ok=True)
295
-
296
- # Helper: centered moving average
297
- def moving_avg(series, window):
298
- return pd.Series(series).rolling(window=window, center=True, min_periods=1).mean().to_numpy()
299
-
300
- # Smoothing window (in episodes)
301
- ma_window = 300
302
- episodes = np.arange(1, num_episodes + 1)
303
-
304
- # Mean Reward moving average
305
- reward_ma = moving_avg(df_final_log["Mean_Reward"], ma_window)
306
- plt.figure(figsize=(8, 5))
307
- plt.plot(episodes, reward_ma, linewidth=2, label=f"Mean Reward MA (win={ma_window})")
308
- plt.xlabel("Episode")
309
- plt.ylabel("Mean Reward")
310
- plt.title("meanfield: Mean Reward Moving Average")
311
- plt.legend()
312
- plt.grid(True)
313
- plt.savefig(os.path.join(plots_dir, "mean_reward_ma.png"), dpi=200)
314
- plt.close()
315
-
316
- # Total Reward moving average
317
- total_ma = moving_avg(df_final_log["Total_Reward"], ma_window)
318
- plt.figure(figsize=(8, 5))
319
- plt.plot(episodes, total_ma, linewidth=2, label=f"Total Reward MA (win={ma_window})")
320
- plt.xlabel("Episode")
321
- plt.ylabel("Total Reward")
322
- plt.title("meanfield: Total Reward Moving Average")
323
- plt.legend()
324
- plt.grid(True)
325
- plt.savefig(os.path.join(plots_dir, "total_reward_ma.png"), dpi=200)
326
- plt.close()
327
-
328
- # Cost Reduction (%) moving average
329
- cost_ma = moving_avg(df_final_log["Cost_Reduction_Pct"], ma_window)
330
- plt.figure(figsize=(8, 5))
331
- plt.plot(episodes, cost_ma, linewidth=2, label="Cost Reduction MA (%)")
332
- plt.xlabel("Episode")
333
- plt.ylabel("Cost Reduction (%)")
334
- plt.title("meanfield: Cost Reduction Moving Average")
335
- plt.legend()
336
- plt.grid(True)
337
- plt.savefig(os.path.join(plots_dir, "cost_reduction_ma.png"), dpi=200)
338
- plt.close()
339
-
340
- # Battery Degradation Cost moving average
341
- degradation_ma = moving_avg(df_final_log["battery_degradation_cost_total"], ma_window)
342
- plt.figure(figsize=(8, 5))
343
- plt.plot(episodes, degradation_ma, linewidth=2, label=f"Degradation Cost MA (win={ma_window})", color='purple')
344
- plt.xlabel("Episode")
345
- plt.ylabel("Total Degradation Cost ($)")
346
- plt.title("meanfield: Battery Degradation Cost Moving Average")
347
- plt.legend()
348
- plt.grid(True)
349
- plt.savefig(os.path.join(plots_dir, "degradation_cost_ma.png"), dpi=200)
350
- plt.close()
351
-
352
- print(f"\nAll moving-average plots saved to: {plots_dir}")
353
-
354
- # Save Final Logs to CSV
355
- total_time_row = pd.DataFrame([{
356
- "Episode": "Total_Training_Time",
357
- "Episode_Duration": total_training_time
358
- }])
359
- df_to_save = pd.concat([df_final_log, total_time_row], ignore_index=True)
360
-
361
- log_csv_path = os.path.join(logs_dir, "training_performance_log.csv")
362
-
363
- # Select and reorder columns for the final CSV
364
- columns_to_save = [
365
- "Episode",
366
- "Mean_Reward",
367
- "Total_Reward",
368
- "Cost_Reduction_Pct",
369
- "Episode_Duration",
370
- "battery_degradation_cost_total",
371
- ]
372
- df_to_save = df_to_save[columns_to_save]
373
-
374
- df_to_save.to_csv(log_csv_path, index=False)
375
-
376
- print(f"Saved comprehensive training performance log to: {log_csv_path}")
377
-
378
- # Final Timings Printout
379
- print("\n" + "="*50)
380
- print("TRAINING COMPLETE".center(50))
381
- print(f"Total training time: {total_training_time:.2f} seconds")
382
- print("="*50)
383
-
384
-
385
- if __name__ == "__main__":
386
- main()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
Other_algorithms/Flat_System/meanfield/trainer/__init__.py DELETED
File without changes
Other_algorithms/Flat_System/meanfield/trainer/mfac.py DELETED
@@ -1,219 +0,0 @@
1
- # meanfield.py
2
- import torch
3
- import torch.nn as nn
4
- import random
5
- import numpy as np
6
- from torch.distributions import Normal
7
- from torch.amp import autocast
8
- from torch.cuda.amp import GradScaler
9
-
10
- #device selection
11
- if torch.cuda.is_available():
12
- device = torch.device("cuda")
13
- print("Using CUDA (NVIDIA GPU)")
14
- else:
15
- device = torch.device("cpu")
16
- print("Using CPU")
17
-
18
- def set_global_seed(seed: int):
19
- random.seed(seed)
20
- np.random.seed(seed)
21
- torch.manual_seed(seed)
22
-
23
- if torch.cuda.is_available():
24
- torch.cuda.manual_seed_all(seed)
25
- torch.backends.cudnn.deterministic = False
26
- torch.backends.cudnn.benchmark = True
27
-
28
- SEED = 42
29
- set_global_seed(SEED)
30
-
31
- class MLP(nn.Module):
32
- def __init__(self, input_dim, hidden_dims, output_dim):
33
- super().__init__()
34
- layers = []
35
- last_dim = input_dim
36
- for h in hidden_dims:
37
- layers += [nn.Linear(last_dim, h), nn.ReLU()]
38
- last_dim = h
39
- layers.append(nn.Linear(last_dim, output_dim))
40
- self.net = nn.Sequential(*layers)
41
-
42
- def forward(self, x):
43
- return self.net(x)
44
-
45
- class Actor(nn.Module):
46
- def __init__(self, obs_dim, act_dim, hidden=(64,64)):
47
- super().__init__()
48
- self.net = MLP(obs_dim, hidden, act_dim)
49
- self.log_std = nn.Parameter(torch.zeros(act_dim))
50
-
51
- def forward(self, x):
52
- mean = self.net(x)
53
- std = torch.exp(self.log_std)
54
- return mean, std
55
-
56
- class Critic(nn.Module):
57
- def __init__(self, state_dim, hidden=(128,128)):
58
- super().__init__()
59
- self.net = MLP(state_dim, hidden, 1)
60
-
61
- def forward(self, x):
62
- return self.net(x).squeeze(-1)
63
-
64
- class MeanField:
65
- def __init__(
66
- self,
67
- n_agents,
68
- local_dim,
69
- global_dim,
70
- act_dim,
71
- lr=3e-4,
72
- gamma=0.99,
73
- lam=0.95,
74
- clip_eps=0.2,
75
- k_epochs=10,
76
- batch_size=1024,
77
- episode_len=96
78
- ):
79
- self.n_agents = n_agents
80
- self.local_dim = local_dim
81
- self.global_dim = global_dim
82
- self.act_dim = act_dim
83
- self.gamma = gamma
84
- self.lam = lam
85
- self.clip_eps = clip_eps
86
- self.k_epochs = k_epochs
87
- self.batch_size = batch_size
88
- self.episode_len = episode_len
89
-
90
- self.actor = Actor(local_dim + global_dim, act_dim).to(device)
91
- self.critic = Critic(global_dim).to(device)
92
-
93
- self.opt_a = torch.optim.Adam(self.actor.parameters(), lr=lr)
94
- self.opt_c = torch.optim.Adam(self.critic.parameters(), lr=lr)
95
-
96
- print("MeanField CUDA AMP is disabled for stability.")
97
-
98
- self.init_buffer()
99
-
100
- def init_buffer(self):
101
- self.ls_buf = np.zeros((self.episode_len, self.n_agents, self.local_dim), dtype=np.float32)
102
- self.gs_buf = np.zeros((self.episode_len, self.global_dim), dtype=np.float32)
103
- self.ac_buf = np.zeros((self.episode_len, self.n_agents, self.act_dim), dtype=np.float32)
104
- self.lp_buf = np.zeros((self.episode_len, self.n_agents), dtype=np.float32)
105
- self.rw_buf = np.zeros((self.episode_len, self.n_agents), dtype=np.float32)
106
- self.done_buf = np.zeros((self.episode_len, self.n_agents), dtype=np.float32)
107
- self.next_gs_buf = np.zeros((self.episode_len, self.global_dim), dtype=np.float32)
108
- self.step_idx = 0
109
-
110
- @torch.no_grad()
111
- def select_action(self, local_obs, global_obs):
112
- l = torch.from_numpy(local_obs).float().to(device)
113
- g = torch.from_numpy(global_obs).float().to(device).unsqueeze(0).expand(self.n_agents, -1)
114
- input_x = torch.cat([l, g], dim=-1)
115
- mean, std = self.actor(input_x)
116
- dist = Normal(mean, std)
117
- a = dist.sample()
118
- return a.cpu().numpy(), dist.log_prob(a).sum(-1).cpu().numpy()
119
-
120
- def store(self, local_obs, global_obs, action, logp, reward, done, next_global_obs):
121
- if self.step_idx < self.episode_len:
122
- self.ls_buf[self.step_idx] = local_obs
123
- self.gs_buf[self.step_idx] = global_obs
124
- self.ac_buf[self.step_idx] = action
125
- self.lp_buf[self.step_idx] = logp
126
- self.rw_buf[self.step_idx] = reward
127
- self.done_buf[self.step_idx] = done
128
- self.next_gs_buf[self.step_idx] = next_global_obs
129
- self.step_idx += 1
130
-
131
- def compute_gae(self, T, vals):
132
- """
133
- Computes Generalized Advantage Estimation (GAE).
134
- """
135
- N = self.n_agents
136
- adv_buf = np.zeros_like(self.rw_buf[:T])
137
-
138
-
139
- if not self.done_buf[T-1].all():
140
- with torch.no_grad():
141
- v_last = self.critic(
142
- torch.from_numpy(self.next_gs_buf[T-1]).float().to(device)
143
- ).cpu().numpy()
144
- else:
145
- v_last = 0.0
146
- vals_agent = vals.unsqueeze(1).expand(-1, N).cpu().numpy()
147
- rewards = self.rw_buf[:T]
148
- masks = 1.0 - self.done_buf[:T]
149
- gae = 0
150
- for t in reversed(range(T)):
151
- v_next = vals_agent[t+1] if t < T - 1 else v_last
152
- delta = rewards[t] + self.gamma * v_next * masks[t] - vals_agent[t]
153
- adv_buf[t] = gae = delta + self.gamma * self.lam * masks[t] * gae
154
- ret_buf = adv_buf + vals_agent
155
- adv_flat = torch.from_numpy(adv_buf.flatten()).float().to(device)
156
- ret_flat = torch.from_numpy(ret_buf.flatten()).float().to(device)
157
- return adv_flat, ret_flat
158
-
159
- def update(self):
160
- T = self.step_idx
161
- if T == 0: return
162
-
163
- gs_tensor = torch.from_numpy(self.gs_buf[:T]).float().to(device)
164
- ls_tensor = torch.from_numpy(self.ls_buf[:T]).float().to(device).view(T * self.n_agents, -1)
165
- ac_tensor = torch.from_numpy(self.ac_buf[:T]).float().to(device).view(T * self.n_agents, -1)
166
- lp_tensor = torch.from_numpy(self.lp_buf[:T]).float().to(device).view(-1)
167
-
168
- with torch.no_grad():
169
- vals = self.critic(gs_tensor)
170
-
171
- adv_flat, ret_flat = self.compute_gae(T, vals)
172
- adv_flat = (adv_flat - adv_flat.mean()) / (adv_flat.std() + 1e-8)
173
-
174
- gs_for_batch = gs_tensor.unsqueeze(1).expand(-1, self.n_agents, -1).reshape(T * self.n_agents, self.global_dim)
175
-
176
- dataset = torch.utils.data.TensorDataset(ls_tensor, gs_for_batch, ac_tensor, lp_tensor, adv_flat, ret_flat)
177
- gen = torch.Generator()
178
- gen.manual_seed(SEED)
179
- loader = torch.utils.data.DataLoader(dataset, batch_size=self.batch_size, shuffle=True, generator=gen)
180
-
181
- for _ in range(self.k_epochs):
182
- for b_ls, b_gs, b_ac, b_lp, b_adv, b_ret in loader:
183
- input_a = torch.cat([b_ls, b_gs], dim=-1)
184
- mean, std = self.actor(input_a)
185
- dist = Normal(mean, std)
186
-
187
- entropy = dist.entropy().mean()
188
-
189
- lp_new = dist.log_prob(b_ac).sum(-1)
190
- ratio = torch.exp(lp_new - b_lp)
191
- surr1 = ratio * b_adv
192
- surr2 = torch.clamp(ratio, 1 - self.clip_eps, 1 + self.clip_eps) * b_adv
193
-
194
- actor_loss = -torch.min(surr1, surr2).mean() - 0.01 * entropy
195
-
196
- self.opt_a.zero_grad()
197
- actor_loss.backward()
198
- nn.utils.clip_grad_norm_(self.actor.parameters(), max_norm=0.5)
199
- self.opt_a.step()
200
-
201
-
202
- val_pred = self.critic(b_gs)
203
- critic_loss = nn.MSELoss()(val_pred, b_ret)
204
-
205
- self.opt_c.zero_grad()
206
- critic_loss.backward()
207
- nn.utils.clip_grad_norm_(self.critic.parameters(), max_norm=0.5)
208
- self.opt_c.step()
209
-
210
- self.step_idx = 0
211
-
212
- def save(self, path):
213
- torch.save({'actor': self.actor.state_dict(),
214
- 'critic': self.critic.state_dict()}, path)
215
-
216
- def load(self, path):
217
- data = torch.load(path, map_location=device)
218
- self.actor.load_state_dict(data['actor'])
219
- self.critic.load_state_dict(data['critic'])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
Other_algorithms/Flat_System/solar_sys_environment.py DELETED
@@ -1,635 +0,0 @@
1
- import gym
2
- import pandas as pd
3
- import numpy as np
4
- from collections import deque
5
- import random
6
- random.seed(42)
7
- np.random.seed(42)
8
-
9
- class SolarSys(gym.Env):
10
-
11
- def __init__(
12
- self,
13
- data_path="/path/to/project/training/200houses_152days_TRAIN.csv",
14
- state="oklahoma", # for Oklahoma (example)
15
- time_freq="15T", # "15T", "30T", "1H", "3H", "6H"
16
- ):
17
-
18
- super().__init__()
19
- # Store config
20
- self.data_path = data_path
21
- self.time_freq = time_freq
22
- self.state = state.lower()
23
-
24
- # Centralized Pricing Configuration
25
- self._pricing_info = {
26
- "oklahoma": {
27
- "max_grid_price": 0.2112,
28
- "feed_in_tariff": 0.04,
29
- "price_function": self._get_oklahoma_price
30
- },
31
- "colorado": {
32
- "max_grid_price": 0.32,
33
- "feed_in_tariff": 0.055,
34
- "price_function": self._get_colorado_price
35
- },
36
- "pennsylvania": {
37
- "max_grid_price": 0.12505,
38
- "feed_in_tariff": 0.06,
39
- "price_function": self._get_pennsylvania_price
40
- }
41
- }
42
-
43
- if self.state not in self._pricing_info:
44
- raise ValueError(f"State '{self.state}' is not supported. Available states: {list(self._pricing_info.keys())}")
45
-
46
- state_config = self._pricing_info[self.state]
47
- self.max_grid_price = state_config["max_grid_price"]
48
- self.feed_in_tariff = state_config["feed_in_tariff"]
49
- self._get_price_function = state_config["price_function"]
50
-
51
- try:
52
- all_data = pd.read_csv(data_path)
53
- all_data["local_15min"] = pd.to_datetime(all_data["local_15min"], utc=True)
54
- all_data.set_index("local_15min", inplace=True)
55
- all_data = all_data.resample(time_freq).mean()
56
-
57
- except FileNotFoundError:
58
- raise FileNotFoundError(f"Data file {data_path} not found.")
59
- except pd.errors.EmptyDataError:
60
- raise ValueError(f"Data file {data_path} is empty.")
61
- except Exception as e:
62
- raise ValueError(f"Error loading data: {e}")
63
-
64
- # Compute global maxima for normalization
65
- grid_cols = [c for c in all_data.columns if c.startswith("grid_")]
66
- solar_cols = [c for c in all_data.columns if c.startswith("total_solar_")]
67
- all_grid = all_data[grid_cols].values
68
- all_solar = all_data[solar_cols].values
69
-
70
- # max total demand = max(grid + solar) over all time & agents
71
- self.global_max_demand = float((all_grid + all_solar).max()) + 1e-8
72
-
73
- # max solar generation alone
74
- self.global_max_solar = float(all_solar.max()) + 1e-8
75
-
76
- # Store the resampled dataset
77
- self.all_data = all_data
78
-
79
- self.time_freq = time_freq
80
- freq_offset = pd.tseries.frequencies.to_offset(time_freq)
81
- minutes_per_step = freq_offset.nanos / 1e9 / 60.0
82
- self.steps_per_day = int(24 * 60 // minutes_per_step)
83
-
84
- total_rows = len(self.all_data)
85
- self.total_days = total_rows // self.steps_per_day
86
- if self.total_days < 1:
87
- raise ValueError(
88
- f"After resampling, dataset has {total_rows} rows, which is "
89
- f"less than a single day of {self.steps_per_day} steps."
90
- )
91
-
92
- self.house_ids = [
93
- col.split("_")[1] for col in self.all_data.columns
94
- if col.startswith("grid_")
95
- ]
96
- self.num_agents = len(self.house_ids)
97
- self.original_no_p2p_import = {}
98
- for hid in self.house_ids:
99
- col_grid = f"grid_{hid}"
100
- self.original_no_p2p_import[hid] = self.all_data[col_grid].clip(lower=0.0).values
101
-
102
- # Determine population groups
103
- # group 1 = has any solar; group 0 = never solar
104
- solar_cols = [f"total_solar_{hid}" for hid in self.house_ids]
105
- solar_sums = self.all_data[solar_cols].sum(axis=0).to_dict()
106
- self.agent_groups = [
107
- 1 if solar_sums[f"total_solar_{hid}"] > 0 else 0
108
- for hid in self.house_ids
109
- ]
110
-
111
- # Count the number of houses in each group
112
- self.group_counts = {
113
- 0: self.agent_groups.count(0),
114
- 1: self.agent_groups.count(1)
115
- }
116
- print(f"Number of houses in each group: {self.group_counts}")
117
-
118
- # Battery logic
119
- self.battery_options = {
120
- "teslapowerwall": {"max_capacity": 13.5, "charge_efficiency": 0.95, "discharge_efficiency": 0.90, "max_charge_rate": 5.0, "max_discharge_rate": 5.0, "degradation_cost_per_kwh": 0.005},
121
- "enphase": {"max_capacity": 5.0, "charge_efficiency": 0.95, "discharge_efficiency": 0.90, "max_charge_rate": 2.0, "max_discharge_rate": 2.0, "degradation_cost_per_kwh": 0.005},
122
- "franklin": {"max_capacity": 15.0, "charge_efficiency": 0.95, "discharge_efficiency": 0.90, "max_charge_rate": 6.0, "max_discharge_rate": 6.0, "degradation_cost_per_kwh": 0.005},
123
- }
124
-
125
- # Identify which houses actually have solar
126
- self.solar_houses = [
127
- hid for hid in self.house_ids
128
- if (self.all_data[f"total_solar_{hid}"] > 0).any()
129
- ]
130
-
131
- # Assign a random battery type to each solar-equipped house
132
- self.batteries = {}
133
- for hid in self.solar_houses:
134
- choice = random.choice(list(self.battery_options))
135
- specs = self.battery_options[choice]
136
- self.batteries[hid] = {"soc": 0.0, **specs}
137
-
138
- # Observation & Action Spaces
139
- # [own_demand, own_solar, grid_price, peer_price,
140
- # total_demand_others, total_solar_others, SOC, time_of_day]
141
- self.observation_space = gym.spaces.Box(
142
- low=-np.inf, high=np.inf,
143
- shape=(self.num_agents, 8),
144
- dtype=np.float32
145
- )
146
-
147
- # [sell_to_grid, buy_from_grid, sell_to_peers, buy_from_peers, charge_battery, discharge_battery]
148
- self.action_space = gym.spaces.Box(
149
- low=0.0,
150
- high=1.0,
151
- shape=(self.num_agents, 6),
152
- dtype=np.float32
153
- )
154
-
155
- self.episode_metrics = {}
156
- self._initialize_episode_metrics()
157
-
158
- # Initialize episode variables
159
- self.data = None
160
- self.env_log = []
161
- self.day_index = -1
162
- self.current_step = 0
163
- self.num_steps = self.steps_per_day
164
- self.demands = {}
165
- self.solars = {}
166
- self.previous_actions = {
167
- hid: np.zeros(6) for hid in self.house_ids
168
- }
169
-
170
-
171
- def _initialize_episode_metrics(self):
172
- """Initialize or reset all metrics tracked over a single episode."""
173
- self.cumulative_grid_reduction = 0.0
174
- self.cumulative_grid_reduction_peak = 0.0
175
- self.cumulative_degradation_cost = 0.0
176
- self.agent_cost_savings = np.zeros(self.num_agents)
177
- self.degradation_cost_timeseries = []
178
- self.cost_savings_timeseries = []
179
- self.grid_reduction_timeseries = []
180
-
181
-
182
- # Price Functions
183
- def get_grid_price(self, step_idx):
184
- """Return grid price for the current step based on selected state."""
185
- return self._get_price_function(step_idx)
186
-
187
-
188
- def _get_oklahoma_price(self, step_idx):
189
- # Oklahoma Gas & Electric (OG&E) TOU
190
- minutes_per_step = 24 * 60 / self.steps_per_day
191
- hour = int((step_idx * minutes_per_step) // 60) % 24
192
- # Peak: 2 pm to 7 pm
193
- if 14 <= hour < 19:
194
- return 0.2112
195
- # Off-peak: All other times
196
- else:
197
- return 0.0434
198
-
199
-
200
- def _get_colorado_price(self, step_idx):
201
- # Xcel Energy Colorado TOU
202
- minutes_per_step = 24 * 60 / self.steps_per_day
203
- hour = int((step_idx * minutes_per_step) // 60) % 24
204
- # On-peak: 3 pm to 7 pm
205
- if 15 <= hour < 19:
206
- return 0.32
207
- # Mid-peak: 1 pm to 3 pm
208
- elif 13 <= hour < 15:
209
- return 0.22
210
- # Off-peak: Before 1 pm and after 7 pm
211
- else:
212
- return 0.12
213
-
214
-
215
- def _get_pennsylvania_price(self, step_idx):
216
- # Duquesne Light (Pennsylvania) EV TOU
217
- minutes_per_step = 24 * 60 / self.steps_per_day
218
- hour = int((step_idx * minutes_per_step) // 60) % 24
219
- # Peak: 1 pm to 9 pm
220
- if 13 <= hour < 21:
221
- return 0.125048
222
- # Super Off-Peak: 11 pm to 6 am
223
- elif hour >= 23 or hour < 6:
224
- return 0.057014
225
- # Off-Peak: 6 am to 1 pm and 9 pm to 11 pm
226
- else:
227
- return 0.079085
228
-
229
-
230
- def get_peer_price(self, step_idx, total_surplus, total_shortfall):
231
- grid_price = self.get_grid_price(step_idx)
232
- feed_in_tariff = self.feed_in_tariff
233
-
234
- base_price = grid_price * 0.90
235
- net_demand = total_shortfall - total_surplus
236
- total_potential_trade = total_shortfall + total_surplus + 1e-6
237
- elasticity_factor = 0.3
238
- price_multiplier = np.exp(elasticity_factor * (net_demand / total_potential_trade))
239
- peer_price = base_price * price_multiplier
240
- final_price = float(np.clip(peer_price, feed_in_tariff, grid_price))
241
-
242
- return final_price
243
-
244
-
245
- def reset(self):
246
- # Finalize and store metrics from completed episode before resetting
247
- if self.current_step > 0:
248
- positive_savings = self.agent_cost_savings[self.agent_cost_savings > 0]
249
- if len(positive_savings) > 1:
250
- fairness_on_savings = self._compute_jains_index(positive_savings)
251
- else:
252
- fairness_on_savings = 0.0
253
-
254
- # Store all final metrics
255
- self.episode_metrics = {
256
- "grid_reduction_entire_day": self.cumulative_grid_reduction,
257
- "grid_reduction_peak_hours": self.cumulative_grid_reduction_peak,
258
- "total_cost_savings": np.sum(self.agent_cost_savings),
259
- "fairness_on_cost_savings": fairness_on_savings,
260
- "battery_degradation_cost_total": self.cumulative_degradation_cost,
261
- "degradation_cost_over_time": self.degradation_cost_timeseries,
262
- "cost_savings_over_time": self.cost_savings_timeseries,
263
- "grid_reduction_over_time": self.grid_reduction_timeseries,
264
- }
265
-
266
- self.day_index = np.random.randint(0, self.total_days)
267
-
268
- start_row = self.day_index * self.steps_per_day
269
- end_row = start_row + self.steps_per_day
270
- day_data = self.all_data.iloc[start_row:end_row].copy()
271
- self.data = day_data
272
-
273
- self.no_p2p_import_day = {}
274
- for hid in self.house_ids:
275
- self.no_p2p_import_day[hid] = self.original_no_p2p_import[hid][start_row:end_row]
276
-
277
- self.demands = {}
278
- self.solars = {}
279
-
280
- for hid in self.house_ids:
281
- col_grid = f"grid_{hid}"
282
- col_solar = f"total_solar_{hid}"
283
-
284
- grid_series = day_data[col_grid].fillna(0.0)
285
- solar_series = day_data[col_solar].fillna(0.0).clip(lower=0.0)
286
-
287
- demand_array = grid_series.values + solar_series.values
288
- demand_array = np.clip(demand_array, 0.0, None)
289
-
290
- self.demands[hid] = demand_array
291
- self.solars[hid] = solar_series.values
292
-
293
- self.current_step = 0
294
- self.env_log = []
295
-
296
- # Reset previous_actions to 6 zeros
297
- for hid in self.house_ids:
298
- self.previous_actions[hid] = np.zeros(6)
299
-
300
- self._initialize_episode_metrics()
301
-
302
- # Randomize battery SOC between 30%–70% of capacity
303
- for hid, batt in self.batteries.items():
304
- low = 0.30 * batt["max_capacity"]
305
- high = 0.70 * batt["max_capacity"]
306
- batt["soc"] = random.uniform(low, high)
307
-
308
- obs = self._get_obs()
309
- obs_list = [obs[i] for i in range(self.num_agents)]
310
- return obs_list
311
-
312
-
313
- def step(self, actions):
314
- # Validate & clamp actions
315
- actions = np.array(actions, dtype=np.float32)
316
- if actions.shape != (self.num_agents, 6):
317
- raise ValueError(f"Actions shape mismatch: got {actions.shape}, expected {(self.num_agents, 6)}")
318
- actions = np.clip(actions, 0.0, 1.0)
319
-
320
- a_sellGrid = actions[:, 0]
321
- a_buyGrid = actions[:, 1]
322
- a_sellPeers = actions[:, 2]
323
- a_buyPeers = actions[:, 3]
324
- a_chargeBatt = actions[:, 4]
325
- a_dischargeBatt = actions[:, 5]
326
-
327
- # Gather current demand & solar
328
- demands = []
329
- solars = []
330
- for i, hid in enumerate(self.house_ids):
331
- demands.append(self.demands[hid][self.current_step])
332
- solars.append(self.solars[hid][self.current_step])
333
-
334
- demands = np.array(demands, dtype=np.float32)
335
- solars = np.array(solars, dtype=np.float32)
336
-
337
- # Calculations for peer_price and grid_price
338
- total_surplus = np.maximum(solars - demands, 0.0).sum()
339
- total_shortfall = np.maximum(demands - solars, 0.0).sum()
340
- peer_price = self.get_peer_price(self.current_step, total_surplus, total_shortfall)
341
- grid_price = self.get_grid_price(self.current_step)
342
-
343
- # Enforce "self-use first"
344
- shortfall = np.maximum(demands - solars, 0.0)
345
- surplus = np.maximum(solars - demands, 0.0)
346
-
347
- final_shortfall = shortfall.copy()
348
- final_surplus = surplus.copy()
349
- grid_import = np.zeros(self.num_agents, dtype=np.float32)
350
- grid_export = np.zeros(self.num_agents, dtype=np.float32)
351
-
352
- # Battery discharge
353
- discharge_amount = np.zeros(self.num_agents, dtype=np.float32)
354
- for i, hid in enumerate(self.house_ids):
355
- if hid in self.batteries:
356
- batt = self.batteries[hid]
357
- max_dis = batt["max_discharge_rate"]
358
- available = batt["soc"] * batt["discharge_efficiency"]
359
- desired = a_dischargeBatt[i] * max_dis
360
- actual = min(desired, available, final_shortfall[i])
361
- batt["soc"] -= actual / batt["discharge_efficiency"]
362
- final_shortfall[i] -= actual
363
- discharge_amount[i] = actual
364
-
365
- # Battery charge
366
- charge_amount = np.zeros(self.num_agents, dtype=np.float32)
367
- for i, hid in enumerate(self.house_ids):
368
- if hid in self.batteries:
369
- batt = self.batteries[hid]
370
- max_ch = batt["max_charge_rate"]
371
- cap_left = batt["max_capacity"] - batt["soc"]
372
- desired = a_chargeBatt[i] * max_ch
373
- actual = min(desired, cap_left / batt["charge_efficiency"], final_surplus[i])
374
- batt["soc"] += actual * batt["charge_efficiency"]
375
- final_surplus[i] -= actual
376
- charge_amount[i] = actual
377
-
378
- # P2P matching
379
- battery_offer = np.zeros(self.num_agents, dtype=np.float32)
380
- for i, hid in enumerate(self.house_ids):
381
- if hid in self.batteries:
382
- battery_offer[i] = self.batteries[hid]["soc"] * self.batteries[hid]["discharge_efficiency"]
383
- effective_surplus = final_surplus + battery_offer
384
-
385
- netPeer = a_buyPeers - a_sellPeers
386
- p2p_buy_request = np.zeros(self.num_agents, dtype=np.float32)
387
- p2p_sell_offer = np.zeros(self.num_agents, dtype=np.float32)
388
- for i in range(self.num_agents):
389
- if netPeer[i] > 0:
390
- p2p_buy_request[i] = netPeer[i] * final_shortfall[i]
391
- elif netPeer[i] < 0:
392
- p2p_sell_offer[i] = -netPeer[i] * effective_surplus[i]
393
-
394
- total_sell = np.sum(p2p_sell_offer)
395
- total_buy = np.sum(p2p_buy_request)
396
- matched = min(total_sell, total_buy)
397
-
398
- if matched > 1e-9:
399
- sell_fraction = p2p_sell_offer / (total_sell + 1e-12)
400
- buy_fraction = p2p_buy_request / (total_buy + 1e-12)
401
- actual_sold = matched * sell_fraction
402
- actual_bought = matched * buy_fraction
403
- else:
404
- actual_sold = np.zeros(self.num_agents, dtype=np.float32)
405
- actual_bought = np.zeros(self.num_agents, dtype=np.float32)
406
-
407
- from_batt_p2p = np.minimum(actual_sold, battery_offer)
408
- from_solar_p2p = actual_sold - from_batt_p2p
409
-
410
- # Update balances
411
- final_surplus -= from_solar_p2p
412
- final_shortfall -= actual_bought
413
-
414
- # Deduct peer battery sales from SOC
415
- for i, hid in enumerate(self.house_ids):
416
- if hid in self.batteries:
417
- from_batt = min(actual_sold[i], battery_offer[i])
418
- self.batteries[hid]["soc"] -= from_batt / self.batteries[hid]["discharge_efficiency"]
419
- self.batteries[hid]["soc"] = max(0.0, self.batteries[hid]["soc"])
420
-
421
- # Grid trades
422
- netGrid = a_buyGrid - a_sellGrid
423
- for i in range(self.num_agents):
424
- if netGrid[i] > 0:
425
- grid_import[i] = netGrid[i] * final_shortfall[i]
426
- elif netGrid[i] < 0:
427
- grid_export[i] = -netGrid[i] * final_surplus[i]
428
- forced = np.maximum(final_shortfall - grid_import, 0.0)
429
- grid_import += forced
430
-
431
- # Calculate costs
432
- costs = (grid_import * grid_price) - (grid_export * self.feed_in_tariff) + \
433
- (actual_bought * peer_price) - (actual_sold * peer_price)
434
-
435
- # Calculate rewards
436
- final_rewards = self._compute_rewards(
437
- grid_import=grid_import, grid_export=grid_export,
438
- actual_sold=actual_sold, actual_bought=actual_bought,
439
- charge_amount=charge_amount, discharge_amount=discharge_amount,
440
- costs=costs, grid_price=grid_price, peer_price=peer_price
441
- )
442
-
443
- # Metric calculations for the current step
444
- no_p2p_import_this_step = np.array([
445
- self.no_p2p_import_day[hid][self.current_step] for hid in self.house_ids
446
- ], dtype=np.float32)
447
-
448
- # Grid Reduction metrics
449
- step_grid_reduction = np.sum(no_p2p_import_this_step - grid_import)
450
- self.cumulative_grid_reduction += step_grid_reduction
451
- self.grid_reduction_timeseries.append(step_grid_reduction)
452
-
453
- # Check if current grid price corresponds to peak hour
454
- if grid_price >= self.max_grid_price * 0.99:
455
- self.cumulative_grid_reduction_peak += step_grid_reduction
456
-
457
- # Cost Savings
458
- cost_no_p2p = no_p2p_import_this_step * grid_price
459
- step_cost_savings_per_agent = cost_no_p2p - costs
460
- self.agent_cost_savings += step_cost_savings_per_agent
461
- self.cost_savings_timeseries.append(np.sum(step_cost_savings_per_agent))
462
-
463
- # Battery Degradation Cost
464
- step_degradation_cost = 0.0
465
- for i, hid in enumerate(self.house_ids):
466
- if hid in self.batteries:
467
- batt = self.batteries[hid]
468
- degradation_cost_agent = (charge_amount[i] + discharge_amount[i]) * batt["degradation_cost_per_kwh"]
469
- step_degradation_cost += degradation_cost_agent
470
-
471
- self.cumulative_degradation_cost += step_degradation_cost
472
- self.degradation_cost_timeseries.append(step_degradation_cost)
473
-
474
- info = {
475
- "p2p_buy": actual_bought,
476
- "p2p_sell": actual_sold,
477
- "grid_import_with_p2p": grid_import,
478
- "grid_import_no_p2p": no_p2p_import_this_step,
479
- "grid_export": grid_export,
480
- "costs": costs,
481
- "charge_amount": charge_amount,
482
- "discharge_amount": discharge_amount,
483
- "step": self.current_step,
484
- "step_grid_reduction": step_grid_reduction,
485
- "step_cost_savings": np.sum(step_cost_savings_per_agent),
486
- "step_degradation_cost": step_degradation_cost,
487
- }
488
-
489
- # Increment step & decide "done"
490
- self.current_step += 1
491
- done = (self.current_step >= self.num_steps)
492
-
493
- # Return next obs, reward list, done, info
494
- obs_next = self._get_obs()
495
- obs_next_list = [obs_next[i] for i in range(self.num_agents)]
496
- rewards_list = [final_rewards[i] for i in range(self.num_agents)]
497
-
498
- return obs_next_list, rewards_list, done, info
499
-
500
-
501
- def _get_obs(self):
502
- # Build observation array for each agent, including dynamic peer pricing
503
- step = min(self.current_step, self.num_steps - 1)
504
-
505
- # Gather per-agent demand/solar into arrays
506
- demands = np.array([self.demands[hid][step] for hid in self.house_ids], dtype=np.float32)
507
- solars = np.array([self.solars[hid][step] for hid in self.house_ids], dtype=np.float32)
508
-
509
- # Compute market aggregates for dynamic pricing
510
- surplus = np.maximum(solars - demands, 0.0)
511
- shortfall = np.maximum(demands - solars, 0.0)
512
- total_surplus = float(surplus.sum())
513
- total_shortfall = float(shortfall.sum())
514
-
515
- grid_price = self.get_grid_price(step)
516
- peer_price = self.get_peer_price(step, total_surplus, total_shortfall)
517
-
518
- # Compute time-of-day feature
519
- ts = self.data.index[step]
520
- hour = ts.hour + ts.minute / 60.0
521
-
522
- # Build per-agent obs
523
- obs = []
524
- for i, hid in enumerate(self.house_ids):
525
- own_demand = demands[i]
526
- own_solar = solars[i]
527
-
528
- # Compute state-of-charge fraction (0–1), -1 for non-battery agents
529
- if hid in self.batteries:
530
- soc_frac = self.batteries[hid]["soc"] / self.batteries[hid]["max_capacity"]
531
- else:
532
- soc_frac = -1.0
533
-
534
- obs.append([
535
- own_demand,
536
- own_solar,
537
- soc_frac,
538
- grid_price,
539
- peer_price,
540
- float(demands.sum() - own_demand),
541
- float(solars.sum() - own_solar),
542
- hour
543
- ])
544
-
545
- return np.array(obs, dtype=np.float32)
546
-
547
-
548
- def _compute_jains_index(self, usage_array):
549
- """Simple Jain's Fairness Index."""
550
- x = np.array(usage_array, dtype=np.float32)
551
- numerator = (np.sum(x))**2
552
- denominator = len(x) * np.sum(x**2) + 1e-8
553
- return numerator / denominator
554
-
555
-
556
- def _compute_rewards(
557
- self,
558
- grid_import,
559
- grid_export,
560
- actual_sold,
561
- actual_bought,
562
- charge_amount,
563
- discharge_amount,
564
- costs,
565
- grid_price,
566
- peer_price
567
- ):
568
- # Weights for each component
569
- w1 = 0.3; w2 = 0.5; w3 = 0.5; w4 = 0.1; w5 = 0.05; w6 = 0.4; w7 = 1.0
570
-
571
- # Jain's index on total P2P volume
572
- jfi = self._compute_jains_index(actual_bought + actual_sold)
573
-
574
- # Normalize prices
575
- p_grid_norm = grid_price / self.max_grid_price
576
- p_peer_norm = peer_price / self.max_grid_price
577
-
578
- rewards = np.zeros(self.num_agents, dtype=np.float32)
579
- for i, hid in enumerate(self.house_ids):
580
- # Base reward is negative cost
581
- reward = - costs[i] * w7
582
-
583
- # Grid import penalty
584
- grid_penalty = w1 * grid_import[i] * p_grid_norm
585
-
586
- # P2P sell & buy bonuses
587
- p2p_sell_bonus = w2 * actual_sold[i] * p_peer_norm
588
- if peer_price < grid_price:
589
- p2p_buy_bonus = w3 * actual_bought[i] * ((grid_price - peer_price) / self.max_grid_price)
590
- else:
591
- p2p_buy_bonus = 0.0
592
-
593
- # Battery penalties (only solar houses have entries)
594
- if hid in self.batteries:
595
- batt = self.batteries[hid]
596
- soc_frac = batt["soc"] / batt["max_capacity"]
597
- soc_penalty = w4 * (soc_frac - 0.5) ** 2
598
- degradation_penalty = w5 * (charge_amount[i] + discharge_amount[i]) * batt["degradation_cost_per_kwh"]
599
- else:
600
- soc_penalty = degradation_penalty = 0.0
601
-
602
- # Fairness
603
- fairness_bonus = w6 * jfi
604
-
605
- # Combine
606
- reward += (
607
- - grid_penalty
608
- + p2p_sell_bonus
609
- + p2p_buy_bonus
610
- - soc_penalty
611
- - degradation_penalty
612
- + fairness_bonus
613
- )
614
- rewards[i] = reward
615
-
616
- return rewards
617
-
618
-
619
- def get_episode_metrics(self):
620
- """
621
- Return performance metrics for the last completed episode.
622
- Call after episode finishes (after env.reset()).
623
- """
624
- return self.episode_metrics
625
-
626
-
627
- def save_log(self, filename="env_log.csv"):
628
- """Save environment step log to CSV."""
629
- columns = [
630
- "Step", "Total_Grid_Import", "Total_Grid_Export",
631
- "Total_P2P_Buy", "Total_P2P_Sell", "Total_Cost",
632
- ]
633
- df = pd.DataFrame(self.env_log, columns=columns)
634
- df.to_csv(filename, index=False)
635
- print(f"Environment log saved to {filename}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
Other_algorithms/HC_MAPPO/Environment/cluster_env_wrapper.py DELETED
@@ -1,164 +0,0 @@
1
- import gym
2
- import numpy as np
3
- import math
4
- import sys
5
- import os
6
- import functools
7
-
8
- import pandas as pd
9
-
10
- # Ensure SolarSys Environement is on the Python path
11
- # Please ensure you follow proper directory structure for running this code
12
- sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
13
- from Environment.solar_sys_environment import SolarSys
14
-
15
-
16
- def form_clusters(metrics: dict, size: int) -> list:
17
- """
18
- Forms balanced, heterogeneous clusters by categorizing houses based on their
19
- energy profile and distributing them evenly in a round-robin fashion.
20
- """
21
- house_ids = list(metrics.keys())
22
- if not house_ids:
23
- return []
24
- all_consumption = [m['consumption'] for m in metrics.values()]
25
- all_solar = [m['solar'] for m in metrics.values()]
26
-
27
- median_consumption = np.median(all_consumption) if all_consumption else 0
28
- median_solar = np.median(all_solar) if all_solar else 0
29
-
30
- #Categorize each house based on its profile relative to the median
31
- producers = [h for h in house_ids if metrics[h]['solar'] >= median_solar and metrics[h]['consumption'] < median_consumption]
32
- consumers = [h for h in house_ids if metrics[h]['solar'] < median_solar and metrics[h]['consumption'] >= median_consumption]
33
- prosumers = [h for h in house_ids if metrics[h]['solar'] >= median_solar and metrics[h]['consumption'] >= median_consumption]
34
- neutrals = [h for h in house_ids if metrics[h]['solar'] < median_solar and metrics[h]['consumption'] < median_consumption]
35
-
36
- # Create a master list ordered by category
37
- sorted_categorized_houses = producers + consumers + prosumers + neutrals
38
-
39
- # Add any houses that weren't categorized to ensure none are missed
40
- categorized_set = set(sorted_categorized_houses)
41
- uncategorized = [h for h in house_ids if h not in categorized_set]
42
- final_house_list = sorted_categorized_houses + uncategorized
43
- num_houses = len(house_ids)
44
- num_clusters = math.ceil(num_houses / size)
45
-
46
- clusters = [[] for _ in range(num_clusters)]
47
-
48
- for i, house_id in enumerate(final_house_list):
49
- target_cluster_idx = i % num_clusters
50
- clusters[target_cluster_idx].append(house_id)
51
-
52
- return clusters
53
-
54
- class GlobalPriceVecEnvWrapper(gym.vector.VectorEnvWrapper):
55
- def __init__(self, env, clusters: list):
56
- super().__init__(env)
57
- self.clusters = clusters
58
- # Expose the underlying SolarSys environments for inspection by the coordinator
59
- # self.env.envs gets the list of individual envs from the SyncVectorEnv
60
- self.cluster_envs = self.env.envs
61
-
62
- def step(self, actions: np.ndarray, exports: np.ndarray = None, imports: np.ndarray = None):
63
- num_clusters = len(self.cluster_envs)
64
- net_transfers = np.zeros(num_clusters)
65
- if exports is not None and imports is not None:
66
- net_transfers = imports - exports
67
- batched_low_level_actions = actions
68
- batched_transfers = net_transfers.reshape(-1, 1).astype(np.float32)
69
- batched_prices = np.full((num_clusters, 1), -1.0, dtype=np.float32)
70
- final_packed_actions_tuple = (batched_low_level_actions, batched_transfers, batched_prices)
71
- obs_next, rewards, terminateds, truncateds, infos = self.env.step(final_packed_actions_tuple)
72
- dones = terminateds | truncateds
73
- done_all = dones.all()
74
-
75
-
76
-
77
- if done_all:
78
- final_infos = infos['final_info']
79
- keys = final_infos[0].keys()
80
- infos = {k: np.stack([info[k] for info in final_infos]) for k in keys}
81
-
82
- info_agg = {
83
- "cluster_dones": dones,
84
- "cluster_infos": infos,
85
- }
86
-
87
- return obs_next, rewards, done_all, info_agg
88
-
89
- def get_export_capacity(self, cluster_idx: int) -> float:
90
- """Returns the total physically exportable energy from a cluster's batteries and solar in kWh."""
91
- cluster_env = self.cluster_envs[cluster_idx]
92
- available_from_batt = cluster_env.battery_soc * cluster_env.battery_discharge_efficiency
93
- total_exportable = np.sum(available_from_batt) + cluster_env.current_solar
94
- return float(total_exportable)
95
-
96
- def get_import_capacity(self, cluster_idx: int) -> float:
97
- """Returns the total physically importable space in a cluster's batteries in kWh."""
98
- cluster_env = self.cluster_envs[cluster_idx]
99
- free_space = cluster_env.battery_max_capacity - cluster_env.battery_soc
100
- total_storable = np.sum(free_space)
101
- return float(total_storable)
102
-
103
- def send_energy(self, from_cluster_idx: int, amount: float) -> float:
104
- """Drains 'amount' of energy from the specified cluster (batteries first, then solar)."""
105
- cluster_env = self.cluster_envs[from_cluster_idx]
106
- return cluster_env.send_energy(amount)
107
-
108
- def receive_energy(self, to_cluster_idx: int, amount: float) -> float:
109
- """Charges batteries in the specified cluster with 'amount' of energy."""
110
- cluster_env = self.cluster_envs[to_cluster_idx]
111
- return cluster_env.receive_energy(amount)
112
-
113
-
114
- def make_vec_env(data_path: str, time_freq: str, cluster_size: int, state: str):
115
- print("--- Pre-loading shared dataset for all environments ---")
116
- try:
117
- shared_df = pd.read_csv(data_path)
118
- shared_df["local_15min"] = pd.to_datetime(shared_df["local_15min"], utc=True)
119
- shared_df.set_index("local_15min", inplace=True)
120
-
121
- # ADD THIS LINE
122
- shared_df = shared_df.resample(time_freq).mean()
123
- # ADD THIS LINE
124
-
125
- except Exception as e:
126
- raise ValueError(f"Failed to pre-load data in make_vec_env: {e}")
127
-
128
- base_env_for_metrics = SolarSys(
129
- data_path=data_path,
130
- time_freq=time_freq,
131
- preloaded_data=shared_df, # Pass the shared DataFrame here
132
- state=state
133
- )
134
-
135
- # This part for calculating metrics and forming clusters
136
- metrics = {}
137
- for hid in base_env_for_metrics.house_ids:
138
- total_consumption = float(
139
- np.clip(base_env_for_metrics.original_no_p2p_import[hid], 0.0, None).sum()
140
- )
141
- total_solar = float(
142
- base_env_for_metrics.all_data[f"total_solar_{hid}"].clip(lower=0.0).sum()
143
- )
144
- metrics[hid] = {'consumption': total_consumption, 'solar': total_solar}
145
-
146
- clusters = form_clusters(metrics, cluster_size)
147
- print(f"Formed {len(clusters)} clusters of size up to {cluster_size}.")
148
-
149
- # functools.partial to create environment
150
- env_fns = []
151
- for cluster_house_ids in clusters:
152
- preset_env_fn = functools.partial(
153
- SolarSys,
154
- data_path=data_path,
155
- time_freq=time_freq,
156
- house_ids_in_cluster=cluster_house_ids,
157
- preloaded_data=shared_df,
158
- state=state
159
- )
160
- env_fns.append(preset_env_fn)
161
- sync_vec_env = gym.vector.SyncVectorEnv(env_fns)
162
- wrapped_vec_env = GlobalPriceVecEnvWrapper(sync_vec_env, clusters=clusters)
163
-
164
- return wrapped_vec_env
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
Other_algorithms/HC_MAPPO/Environment/solar_sys_environment.py DELETED
@@ -1,673 +0,0 @@
1
- import gym
2
- import pandas as pd
3
- import numpy as np
4
- from collections import deque
5
- import random
6
- from gym.spaces import Tuple, Box
7
-
8
- random.seed(42)
9
- np.random.seed(42)
10
-
11
- class SolarSys(gym.Env):
12
-
13
- def __init__(
14
- self,
15
- data_path="DATA/training/25houses_152days_TRAIN.csv",
16
- state="", # Select from 'oklahoma', 'colorado', 'pennsylvania'
17
- time_freq="15T",
18
- house_ids_in_cluster=None,
19
- preloaded_data=None
20
-
21
- ):
22
-
23
- super().__init__() # initialize parent gym.Env
24
- self.state = state.lower()
25
-
26
- # --- Centralized Pricing Configuration ---
27
- self._pricing_info = {
28
- "oklahoma": {
29
- "max_grid_price": 0.2112,
30
- "feed_in_tariff": 0.04,
31
- "price_function": self._get_oklahoma_price
32
- },
33
- "colorado": {
34
- "max_grid_price": 0.32,
35
- "feed_in_tariff": 0.055,
36
- "price_function": self._get_colorado_price
37
- },
38
- "pennsylvania": {
39
- "max_grid_price": 0.5505,
40
- "feed_in_tariff": 0.06,
41
- "price_function": self._get_pennsylvania_price
42
- }
43
- }
44
-
45
- if self.state not in self._pricing_info:
46
- raise ValueError(f"State '{self.state}' is not supported. Available states: {list(self._pricing_info.keys())}")
47
-
48
- state_config = self._pricing_info[self.state]
49
- self.max_grid_price = state_config["max_grid_price"]
50
- self.feed_in_tariff = state_config["feed_in_tariff"]
51
- self._get_price_function = state_config["price_function"]
52
- self.data_path = data_path
53
- self.time_freq = time_freq
54
- if preloaded_data is not None:
55
- all_data = preloaded_data
56
- if house_ids_in_cluster:
57
- print(f"Using pre-loaded data for cluster with {len(house_ids_in_cluster)} houses.")
58
- else:
59
- print(f"Loading data from {data_path}...")
60
- try:
61
- all_data = pd.read_csv(data_path)
62
- all_data["local_15min"] = pd.to_datetime(all_data["local_15min"], utc=True)
63
- all_data.set_index("local_15min", inplace=True)
64
-
65
- except FileNotFoundError:
66
- raise FileNotFoundError(f"Data file {data_path} not found.")
67
- except pd.errors.EmptyDataError:
68
- raise ValueError(f"Data file {data_path} is empty.")
69
- except Exception as e:
70
- raise ValueError(f"Error loading data: {e}")
71
-
72
-
73
- # Compute global maxima for normalization
74
- grid_cols = [c for c in all_data.columns if c.startswith("grid_")]
75
- solar_cols = [c for c in all_data.columns if c.startswith("total_solar_")]
76
- all_grid = all_data[grid_cols].values
77
- all_solar = all_data[solar_cols].values
78
-
79
- # max total demand = max(grid + solar) over all time & agents
80
- self.global_max_demand = float((all_grid + all_solar).max()) + 1e-8
81
-
82
- # max solar generation alone
83
- self.global_max_solar = float(all_solar.max()) + 1e-8
84
-
85
- # Store the resampled dataset
86
- self.all_data = all_data
87
- all_house_ids_in_file = [
88
- col.split("_")[1] for col in self.all_data.columns
89
- if col.startswith("grid_")
90
- ]
91
- if house_ids_in_cluster:
92
- self.house_ids = [hid for hid in house_ids_in_cluster if hid in all_house_ids_in_file]
93
- else:
94
- self.house_ids = all_house_ids_in_file
95
-
96
- if not self.house_ids:
97
- raise ValueError("No valid house_ids found for this environment instance.")
98
-
99
- self.env_log_infos = []
100
-
101
- self.time_freq = time_freq
102
- freq_offset = pd.tseries.frequencies.to_offset(time_freq)
103
- minutes_per_step = freq_offset.nanos / 1e9 / 60.0
104
- self.steps_per_day = int(24 * 60 // minutes_per_step)
105
-
106
- total_rows = len(self.all_data)
107
- self.total_days = total_rows // self.steps_per_day
108
- if self.total_days < 1:
109
- raise ValueError(
110
- f"After resampling, dataset has {total_rows} rows, which is "
111
- f"less than a single day of {self.steps_per_day} steps."
112
- )
113
-
114
- self.num_agents = len(self.house_ids)
115
- self.original_no_p2p_import = {}
116
- for hid in self.house_ids:
117
- col_grid = f"grid_{hid}"
118
- self.original_no_p2p_import[hid] = self.all_data[col_grid].clip(lower=0.0).values
119
- solar_cols = [f"total_solar_{hid}" for hid in self.house_ids]
120
- solar_sums = self.all_data[solar_cols].sum(axis=0).to_dict()
121
- self.agent_groups = [
122
- 1 if solar_sums[f"total_solar_{hid}"] > 0 else 0
123
- for hid in self.house_ids
124
- ]
125
-
126
- self.group_counts = {
127
- 0: self.agent_groups.count(0),
128
- 1: self.agent_groups.count(1)
129
- }
130
- print(f"Number of houses in each group: {self.group_counts}")
131
-
132
- #battery logic
133
- self.battery_options = {
134
- "teslapowerwall": {"max_capacity": 13.5, "charge_efficiency": 0.95, "discharge_efficiency": 0.90, "max_charge_rate": 5.0, "max_discharge_rate": 5.0, "degradation_cost_per_kwh": 0.005},
135
- "enphase": {"max_capacity": 5.0, "charge_efficiency": 0.95, "discharge_efficiency": 0.90, "max_charge_rate": 2.0, "max_discharge_rate": 2.0, "degradation_cost_per_kwh": 0.005},
136
- "franklin": {"max_capacity": 15.0, "charge_efficiency": 0.95, "discharge_efficiency": 0.90, "max_charge_rate": 6.0, "max_discharge_rate": 6.0, "degradation_cost_per_kwh": 0.005},
137
- }
138
- self.solar_houses = [
139
- hid for hid in self.house_ids
140
- if (self.all_data[f"total_solar_{hid}"] > 0).any()
141
- ]
142
-
143
- self.batteries = {}
144
- for hid in self.solar_houses:
145
- choice = random.choice(list(self.battery_options))
146
- specs = self.battery_options[choice]
147
- self.batteries[hid] = {"soc": 0.0, **specs}
148
-
149
- self.battery_charge_history = {hid: [] for hid in self.batteries}
150
- self.battery_discharge_history = {hid: [] for hid in self.batteries}
151
- self.battery_capacity = sum(b["max_capacity"] for b in self.batteries.values())
152
- self.battery_level = sum(b["soc"] for b in self.batteries.values())
153
- self.current_solar = 0.0
154
- self.has_battery = np.array([1 if hid in self.batteries else 0 for hid in self.house_ids], dtype=np.float32)
155
-
156
- # Initialize arrays for all agents, with zeros for non-battery agents
157
- self.battery_soc = np.zeros(self.num_agents, dtype=np.float32)
158
- self.battery_max_capacity = np.zeros(self.num_agents, dtype=np.float32)
159
- self.battery_charge_efficiency = np.zeros(self.num_agents, dtype=np.float32)
160
- self.battery_discharge_efficiency = np.zeros(self.num_agents, dtype=np.float32)
161
- self.battery_max_charge_rate = np.zeros(self.num_agents, dtype=np.float32)
162
- self.battery_max_discharge_rate = np.zeros(self.num_agents, dtype=np.float32)
163
- self.battery_degradation_cost = np.zeros(self.num_agents, dtype=np.float32)
164
-
165
- # Populate the arrays using the created battery dictionary
166
- for i, hid in enumerate(self.house_ids):
167
- if hid in self.batteries:
168
- batt = self.batteries[hid]
169
- self.battery_max_capacity[i] = batt["max_capacity"]
170
- self.battery_charge_efficiency[i] = batt["charge_efficiency"]
171
- self.battery_discharge_efficiency[i] = batt["discharge_efficiency"]
172
- self.battery_max_charge_rate[i] = batt["max_charge_rate"]
173
- self.battery_max_discharge_rate[i] = batt["max_discharge_rate"]
174
- self.battery_degradation_cost[i] = batt["degradation_cost_per_kwh"]
175
-
176
-
177
- # ========== SPACES (Observation & Action) ===================================
178
- self.observation_space = gym.spaces.Box(
179
- low=-np.inf, high=np.inf,
180
- shape=(self.num_agents, 8),
181
- dtype=np.float32
182
- )
183
- self.action_space = Tuple((
184
- Box(low=0.0, high=1.0, shape=(self.num_agents, 6), dtype=np.float32),
185
- Box(low=-np.inf, high=np.inf, shape=(1,), dtype=np.float32),
186
- Box(low=-1.0, high=np.inf, shape=(1,), dtype=np.float32)
187
- ))
188
-
189
- # ========== REWARD FUNCTION PARAMETERS ======================================
190
- self.data = None
191
- self.env_log = []
192
- self.day_index = -1
193
- self.current_step = 0
194
- self.num_steps = self.steps_per_day
195
- self.demands = {}
196
- self.solars = {}
197
- self.previous_actions = {
198
- hid: np.zeros(6) for hid in self.house_ids
199
- }
200
- self._initialize_episode_metrics()
201
-
202
- def get_grid_price(self, step_idx):
203
- """
204
- Returns the grid price for the current step based on the selected state.
205
- """
206
- return self._get_price_function(step_idx)
207
-
208
- def _get_oklahoma_price(self, step_idx):
209
- minutes_per_step = 24 * 60 / self.steps_per_day
210
- hour = int((step_idx * minutes_per_step) // 60) % 24
211
- if 14 <= hour < 19:
212
- return 0.2112
213
- else:
214
- return 0.0434
215
-
216
- def _get_colorado_price(self, step_idx):
217
- minutes_per_step = 24 * 60 / self.steps_per_day
218
- hour = int((step_idx * minutes_per_step) // 60) % 24
219
- if 15 <= hour < 19:
220
- return 0.32
221
- elif 13 <= hour < 15:
222
- return 0.22
223
- else:
224
- return 0.12
225
-
226
- def _get_pennsylvania_price(self, step_idx):
227
- minutes_per_step = 24 * 60 / self.steps_per_day
228
- hour = int((step_idx * minutes_per_step) // 60) % 24
229
- if 13 <= hour < 21:
230
- return 0.125048
231
- elif hour >= 23 or hour < 6:
232
- return 0.057014
233
- else:
234
- return 0.079085
235
-
236
- def get_peer_price(self, step_idx, total_surplus, total_shortfall):
237
- grid_price = self.get_grid_price(step_idx)
238
- feed_in_tariff = self.feed_in_tariff
239
-
240
- # Parameters for arctangent-log pricing
241
- p_balance = (grid_price * 0.80) + (feed_in_tariff * 0.20)
242
- p_con = (grid_price - feed_in_tariff) / (1.5 * np.pi)
243
- k = 1.5
244
- epsilon = 1e-6
245
- supply = total_surplus + epsilon
246
- demand = total_shortfall + epsilon
247
-
248
- ratio = demand / supply
249
- log_ratio = np.log(ratio)
250
- if log_ratio < 0:
251
- power_term = - (np.abs(log_ratio) ** k)
252
- else:
253
- power_term = log_ratio ** k
254
-
255
- price_offset = 2 * np.pi * p_con * np.arctan(power_term)
256
-
257
- peer_price = p_balance + price_offset
258
-
259
- final_price = float(np.clip(peer_price, feed_in_tariff, grid_price))
260
-
261
- return final_price
262
-
263
-
264
- def _initialize_episode_metrics(self):
265
- """Initializes or resets all metrics tracked over a single episode (day)."""
266
- self.cumulative_grid_reduction = 0.0
267
- self.cumulative_grid_reduction_peak = 0.0
268
- self.cumulative_degradation_cost = 0.0
269
- self.agent_cost_savings = np.zeros(self.num_agents)
270
- self.degradation_cost_timeseries = []
271
- self.cost_savings_timeseries = []
272
- self.grid_reduction_timeseries = []
273
-
274
- def get_episode_metrics(self):
275
- """
276
- Returns a dictionary of performance metrics for the last completed episode.
277
- """
278
- return self.episode_metrics
279
-
280
- ##########################################################################
281
- # Gym Required Methods
282
-
283
- def reset(self):
284
- if self.current_step > 0:
285
- positive_savings = self.agent_cost_savings[self.agent_cost_savings > 0]
286
- if len(positive_savings) > 1:
287
- fairness_on_savings = self._compute_jains_index(positive_savings)
288
- else:
289
- fairness_on_savings = 0.0
290
-
291
- self.episode_metrics = {
292
- "grid_reduction_entire_day": self.cumulative_grid_reduction,
293
- "grid_reduction_peak_hours": self.cumulative_grid_reduction_peak,
294
- "total_cost_savings": np.sum(self.agent_cost_savings),
295
- "fairness_on_cost_savings": fairness_on_savings,
296
- "battery_degradation_cost_total": self.cumulative_degradation_cost,
297
- "degradation_cost_over_time": self.degradation_cost_timeseries,
298
- "cost_savings_over_time": self.cost_savings_timeseries,
299
- "grid_reduction_over_time": self.grid_reduction_timeseries,
300
- }
301
- self.day_index = np.random.randint(0, self.total_days)
302
-
303
- start_row = self.day_index * self.steps_per_day
304
- end_row = start_row + self.steps_per_day
305
- day_data = self.all_data.iloc[start_row:end_row].copy()
306
- self.data = day_data
307
-
308
- self.no_p2p_import_day = {}
309
- for hid in self.house_ids:
310
- self.no_p2p_import_day[hid] = self.original_no_p2p_import[hid][start_row:end_row]
311
-
312
- demand_list = []
313
- solar_list = []
314
- for hid in self.house_ids:
315
- col_grid = f"grid_{hid}"
316
- col_solar = f"total_solar_{hid}"
317
-
318
- grid_series = day_data[col_grid].fillna(0.0)
319
- solar_series = day_data[col_solar].fillna(0.0).clip(lower=0.0)
320
-
321
- demand_array = grid_series.values + solar_series.values
322
- demand_array = np.clip(demand_array, 0.0, None)
323
-
324
- demand_list.append(demand_array)
325
- solar_list.append(solar_series.values)
326
-
327
- self.demands_day = np.stack(demand_list, axis=1).astype(np.float32)
328
- self.solars_day = np.stack(solar_list, axis=1).astype(np.float32)
329
-
330
- self.hours_day = (self.data.index.hour + self.data.index.minute / 60.0).values
331
-
332
- self.current_step = 0
333
- self.env_log = []
334
- for hid in self.house_ids:
335
- self.previous_actions[hid] = np.zeros(6)
336
-
337
- lows = 0.30 * self.battery_max_capacity
338
- highs = 0.70 * self.battery_max_capacity
339
-
340
- self.battery_soc = np.random.uniform(low=lows, high=highs)
341
- self.battery_soc *= self.has_battery
342
-
343
- initial_demands = self.demands_day[0]
344
- initial_solars = self.solars_day[0]
345
- initial_surplus = np.maximum(initial_solars - initial_demands, 0.0).sum()
346
- initial_shortfall = np.maximum(initial_demands - initial_solars, 0.0).sum()
347
- initial_peer_price = self.get_peer_price(0, initial_surplus, initial_shortfall)
348
-
349
- obs = self._get_obs(peer_price=initial_peer_price)
350
-
351
- self._initialize_episode_metrics()
352
-
353
- return obs, {}
354
-
355
- def step(self, packed_action):
356
- actions, transfer_kwh_arr, peer_price_arr = packed_action
357
- inter_cluster_transfer_kwh = float(transfer_kwh_arr[0])
358
- override_peer_price_val = float(peer_price_arr[0])
359
-
360
- override_peer_price = override_peer_price_val if override_peer_price_val >= 0 else None
361
-
362
- actions = np.array(actions, dtype=np.float32)
363
- if actions.shape != (self.num_agents, 6):
364
- raise ValueError(f"Actions shape mismatch: got {actions.shape}, expected {(self.num_agents, 6)}")
365
- actions = np.clip(actions, 0.0, 1.0)
366
-
367
- a_sellGrid = actions[:, 0]
368
- a_buyGrid = actions[:, 1]
369
- a_sellPeers = actions[:, 2]
370
- a_buyPeers = actions[:, 3]
371
- a_chargeBatt = actions[:, 4]
372
- a_dischargeBatt = actions[:, 5]
373
-
374
-
375
- demands = self.demands_day[self.current_step]
376
- solars = self.solars_day[self.current_step]
377
-
378
- total_surplus = np.maximum(solars - demands, 0.0).sum()
379
- total_shortfall = np.maximum(demands - solars, 0.0).sum()
380
- self.current_solar = total_surplus
381
-
382
- if override_peer_price is not None:
383
- peer_price = override_peer_price
384
- else:
385
- peer_price = self.get_peer_price(
386
- self.current_step,
387
- total_surplus,
388
- total_shortfall
389
- )
390
-
391
- grid_price = self.get_grid_price(self.current_step)
392
-
393
- shortfall = np.maximum(demands - solars, 0.0)
394
- surplus = np.maximum(solars - demands, 0.0)
395
-
396
- final_shortfall = shortfall.copy()
397
- final_surplus = surplus.copy()
398
- grid_import = np.zeros(self.num_agents, dtype=np.float32)
399
- grid_export = np.zeros(self.num_agents, dtype=np.float32)
400
-
401
- # ### VECTORIZED BATTERY DISCHARGE ###
402
- available_from_batt = self.battery_soc * self.battery_discharge_efficiency
403
- desired_discharge = a_dischargeBatt * self.battery_max_discharge_rate
404
- discharge_amount = np.minimum.reduce([desired_discharge, available_from_batt, final_shortfall])
405
- discharge_amount *= self.has_battery # Ensure only batteries discharge
406
-
407
- # Update SOC (energy drawn from battery before efficiency loss)
408
- self.battery_soc -= (discharge_amount / (self.battery_discharge_efficiency + 1e-9)) * self.has_battery
409
- self.battery_soc = np.maximum(0.0, self.battery_soc)
410
- final_shortfall -= discharge_amount
411
-
412
- cap_left = self.battery_max_capacity - self.battery_soc
413
- desired_charge = a_chargeBatt * self.battery_max_charge_rate
414
- charge_amount = np.minimum.reduce([
415
- desired_charge,
416
- cap_left / (self.battery_charge_efficiency + 1e-9),
417
- final_surplus
418
- ])
419
- charge_amount *= self.has_battery
420
-
421
- # Update SOC
422
- self.battery_soc += charge_amount * self.battery_charge_efficiency
423
- final_surplus -= charge_amount
424
-
425
-
426
-
427
- # ### VECTORIZED P2P TRADING ###
428
- battery_offer = (self.battery_soc * self.battery_discharge_efficiency) * self.has_battery
429
- effective_surplus = final_surplus + battery_offer
430
-
431
- netPeer = a_buyPeers - a_sellPeers
432
- p2p_buy_request = np.maximum(0, netPeer) * final_shortfall
433
- p2p_sell_offer = np.maximum(0, -netPeer) * effective_surplus
434
-
435
- total_sell = np.sum(p2p_sell_offer)
436
- total_buy = np.sum(p2p_buy_request)
437
- matched = min(total_sell, total_buy)
438
-
439
- if matched > 1e-9:
440
- sell_fraction = p2p_sell_offer / (total_sell + 1e-12)
441
- buy_fraction = p2p_buy_request / ( total_buy + 1e-12)
442
- actual_sold = matched * sell_fraction
443
- actual_bought = matched * buy_fraction
444
- else:
445
- actual_sold = np.zeros(self.num_agents, dtype=np.float32)
446
- actual_bought = np.zeros(self.num_agents, dtype=np.float32)
447
-
448
-
449
- from_batt = np.minimum(actual_sold, battery_offer)
450
- from_solar = actual_sold - from_batt
451
-
452
- final_surplus -= from_solar
453
-
454
- final_shortfall -= actual_bought
455
- soc_reduction = (from_batt / (self.battery_discharge_efficiency + 1e-9)) * self.has_battery
456
- self.battery_soc -= soc_reduction
457
- self.battery_soc = np.maximum(0.0, self.battery_soc)
458
-
459
-
460
- if inter_cluster_transfer_kwh > 0:
461
- amount_received = inter_cluster_transfer_kwh
462
-
463
-
464
- total_shortfall_in_cluster = np.sum(final_shortfall)
465
- if total_shortfall_in_cluster > 1e-6:
466
-
467
- to_cover_shortfall = min(amount_received, total_shortfall_in_cluster)
468
- distribution_ratio = final_shortfall / total_shortfall_in_cluster
469
- shortfall_reduction = distribution_ratio * to_cover_shortfall
470
- final_shortfall -= shortfall_reduction
471
-
472
- amount_received -= to_cover_shortfall
473
-
474
- if amount_received > 1e-6:
475
-
476
- cap_left = self.battery_max_capacity - self.battery_soc
477
- storable_energy = cap_left / (self.battery_charge_efficiency + 1e-9)
478
- total_storable_in_cluster = np.sum(storable_energy * self.has_battery)
479
-
480
- if total_storable_in_cluster > 1e-6:
481
-
482
- to_store = min(amount_received, total_storable_in_cluster)
483
-
484
-
485
- storage_ratio = storable_energy / total_storable_in_cluster
486
- energy_to_store_per_batt = storage_ratio * to_store
487
-
488
-
489
- self.battery_soc += (energy_to_store_per_batt * self.battery_charge_efficiency) * self.has_battery
490
-
491
- elif inter_cluster_transfer_kwh < 0:
492
- amount_to_send = abs(inter_cluster_transfer_kwh)
493
-
494
-
495
- total_surplus_in_cluster = np.sum(final_surplus)
496
- if total_surplus_in_cluster > 1e-6:
497
-
498
- sent_from_surplus = min(amount_to_send, total_surplus_in_cluster)
499
- draw_ratio = final_surplus / total_surplus_in_cluster
500
- surplus_reduction = draw_ratio * sent_from_surplus
501
- final_surplus -= surplus_reduction
502
- amount_to_send -= sent_from_surplus
503
-
504
-
505
- if amount_to_send > 1e-6:
506
-
507
- available_from_batt = (self.battery_soc * self.battery_discharge_efficiency) * self.has_battery
508
- total_available_from_batt = np.sum(available_from_batt)
509
-
510
- if total_available_from_batt > 1e-6:
511
- # Discharge a maximum of 'amount_to_send' from batteries
512
- to_discharge = min(amount_to_send, total_available_from_batt)
513
-
514
- # Draw this amount proportionally from each available battery
515
- discharge_ratio = available_from_batt / total_available_from_batt
516
- discharged_per_batt = discharge_ratio * to_discharge # This is effective energy
517
-
518
- # Update SoC (energy drawn from battery before efficiency loss)
519
- soc_reduction = (discharged_per_batt / (self.battery_discharge_efficiency + 1e-9))
520
- self.battery_soc -= soc_reduction * self.has_battery
521
- self.battery_soc = np.maximum(0.0, self.battery_soc)
522
- # =======================================================================
523
-
524
- netGrid = a_buyGrid - a_sellGrid
525
- grid_import = np.maximum(0, netGrid) * final_shortfall
526
- grid_export = np.maximum(0, -netGrid) * final_surplus
527
-
528
- forced = np.maximum(final_shortfall - grid_import, 0.0)
529
- grid_import += forced
530
- final_shortfall -= forced
531
-
532
- feed_in_tariff = self.feed_in_tariff
533
- costs = (
534
- (grid_import * grid_price)
535
- - (grid_export * feed_in_tariff)
536
- + (actual_bought * peer_price)
537
- - (actual_sold * peer_price)
538
- )
539
-
540
- final_rewards = self._compute_rewards(
541
- grid_import=grid_import, grid_export=grid_export, actual_sold=actual_sold,
542
- actual_bought=actual_bought, charge_amount=charge_amount, discharge_amount=discharge_amount,
543
- costs=costs, grid_price=grid_price, peer_price=peer_price
544
- )
545
-
546
- no_p2p_import_this_step = np.array([
547
- self.no_p2p_import_day[hid][self.current_step]
548
- for hid in self.house_ids
549
- ], dtype=np.float32)
550
-
551
-
552
- # --- Metric 1 & 2: Grid Reduction (Entire Day & Peak Hours) ---
553
- step_grid_reduction = np.sum(no_p2p_import_this_step - grid_import)
554
- self.cumulative_grid_reduction += step_grid_reduction
555
- self.grid_reduction_timeseries.append(step_grid_reduction)
556
-
557
- if grid_price >= self.max_grid_price * 0.99:
558
- self.cumulative_grid_reduction_peak += step_grid_reduction
559
-
560
- # --- Metric 3: Total Cost Savings ---
561
- cost_no_p2p = no_p2p_import_this_step * grid_price
562
- step_cost_savings_per_agent = cost_no_p2p - costs
563
- self.agent_cost_savings += step_cost_savings_per_agent
564
- self.cost_savings_timeseries.append(np.sum(step_cost_savings_per_agent))
565
-
566
- # --- Metric 5 & 6: Battery Degradation Cost (Total and Over Time) ---
567
- degradation_cost_agent = (charge_amount + discharge_amount) * self.battery_degradation_cost
568
- step_degradation_cost = np.sum(degradation_cost_agent)
569
-
570
- self.cumulative_degradation_cost += step_degradation_cost
571
- self.degradation_cost_timeseries.append(step_degradation_cost)
572
-
573
- info = {
574
- "p2p_buy": actual_bought,
575
- "p2p_sell": actual_sold,
576
- "grid_import_with_p2p": grid_import,
577
- "grid_import_no_p2p": no_p2p_import_this_step,
578
- "grid_export": grid_export,
579
- "costs": costs,
580
- "charge_amount": charge_amount,
581
- "discharge_amount": discharge_amount,
582
- "step": self.current_step,
583
- "step_grid_reduction": step_grid_reduction,
584
- "step_cost_savings": np.sum(step_cost_savings_per_agent),
585
- "step_degradation_cost": step_degradation_cost,
586
- }
587
-
588
- self.env_log.append([
589
- self.current_step, np.sum(grid_import), np.sum(grid_export),
590
- np.sum(actual_bought), np.sum(actual_sold), np.sum(costs)
591
- ])
592
-
593
- self.current_step += 1
594
-
595
- terminated = False
596
- truncated = (self.current_step >= self.num_steps)
597
-
598
- obs_next = self._get_obs(peer_price=peer_price)
599
- info['agent_rewards'] = final_rewards
600
- self.last_info = info
601
- self.env_log_infos.append(info)
602
- return obs_next, final_rewards.sum(), terminated, truncated, info
603
-
604
-
605
-
606
- def _get_obs(self, peer_price: float):
607
- step = min(self.current_step, self.num_steps - 1)
608
- demands = self.demands_day[step]
609
- solars = self.solars_day[step]
610
- grid_price = self.get_grid_price(step)
611
- hour = self.hours_day[step]
612
- soc_frac = self.battery_soc / (self.battery_max_capacity + 1e-9)
613
- soc_frac = np.where(self.has_battery == 1, soc_frac, -1.0)
614
- total_demand_others = demands.sum() - demands
615
- total_solar_others = solars.sum() - solars
616
-
617
- obs = np.stack([
618
- demands,
619
- solars,
620
- soc_frac,
621
- np.full(self.num_agents, grid_price),
622
- np.full(self.num_agents, peer_price),
623
- total_demand_others,
624
- total_solar_others,
625
- np.full(self.num_agents, hour)
626
- ], axis=1).astype(np.float32)
627
-
628
- return obs
629
-
630
-
631
- def _compute_jains_index(self, usage_array):
632
- x = np.array(usage_array, dtype=np.float32)
633
- numerator = (np.sum(x))**2
634
- denominator = len(x) * np.sum(x**2) + 1e-8
635
- return numerator / denominator
636
-
637
-
638
- def _compute_rewards(
639
- self, grid_import, grid_export, actual_sold, actual_bought,
640
- charge_amount, discharge_amount, costs, grid_price, peer_price
641
- ):
642
-
643
- w1 = 0.3; w2 = 0.5; w3 = 0.5; w4 = 0.1; w5 = 0.05; w6 = 0.4; w7 = 1.0
644
-
645
- p_grid_norm = grid_price / self.max_grid_price
646
- p_peer_norm = peer_price / self.max_grid_price
647
-
648
- rewards = -costs * w7
649
- rewards -= w1 * grid_import * p_grid_norm
650
- rewards += w2 * actual_sold * p_peer_norm
651
- buy_bonus = w3 * actual_bought * ((grid_price - peer_price) / self.max_grid_price)
652
- rewards += np.where(peer_price < grid_price, buy_bonus, 0.0)
653
-
654
- # ### VECTORIZED REWARD PENALTIES ###
655
- soc_frac = self.battery_soc / (self.battery_max_capacity + 1e-9)
656
- soc_penalties = w4 * ((soc_frac - 0.5) ** 2) * self.has_battery
657
- degrad_penalties = w5 * (charge_amount + discharge_amount) * self.battery_degradation_cost
658
-
659
- rewards -= soc_penalties
660
- rewards -= degrad_penalties
661
-
662
- jfi = self._compute_jains_index(actual_bought + actual_sold)
663
- rewards += w6 * jfi
664
- return rewards
665
-
666
- def save_log(self, filename="env_log.csv"):
667
- columns = [
668
- "Step", "Total_Grid_Import", "Total_Grid_Export",
669
- "Total_P2P_Buy", "Total_P2P_Sell", "Total_Cost",
670
- ]
671
- df = pd.DataFrame(self.env_log, columns=columns)
672
- df.to_csv(filename, index=False)
673
- print(f"Environment log saved to {filename}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
Other_algorithms/HC_MAPPO/HC_MAPPO_evaluation.py DELETED
@@ -1,618 +0,0 @@
1
- import os
2
- import sys
3
- import time
4
- from datetime import datetime
5
- import re
6
- import numpy as np
7
- import torch
8
- import pandas as pd
9
- import matplotlib.pyplot as plt
10
- import glob
11
-
12
- # Allow imports from project root
13
- sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
14
-
15
- from cluster import InterClusterCoordinator, InterClusterLedger
16
- from Environment.cluster_env_wrapper import make_vec_env
17
- from mappo.trainer.mappo import MAPPO
18
-
19
-
20
- def compute_jains_fairness(values: np.ndarray) -> float:
21
- """
22
- Compute Jain's fairness index for a given array of values.
23
- Returns a value between 0 and 1, where 1 indicates perfect fairness.
24
- """
25
- if len(values) == 0:
26
- return 0.0
27
- if np.all(values == 0):
28
- return 1.0
29
- num = (values.sum())**2
30
- den = len(values) * (values**2).sum() + 1e-8
31
- return float(num / den)
32
-
33
-
34
- def main():
35
- # Configuration Parameters
36
- DATA_PATH = "data/testing/500houses_30days_TEST.csv"
37
- MODEL_DIR = "models/hierarchical_oklahoma_500agents_10size_10000eps_latest/models"
38
-
39
- # Auto-detect state from model path
40
- state_match = re.search(r"hierarchical_(oklahoma|colorado|pennsylvania)_", MODEL_DIR)
41
- if not state_match:
42
- # Fallback to searching the parent directory name if the first pattern fails
43
- state_match = re.search(r"mappo_(oklahoma|colorado|pennsylvania)_", MODEL_DIR)
44
-
45
- if not state_match:
46
- raise ValueError(
47
- "Could not automatically detect the state (oklahoma, colorado, or pennsylvania) "
48
- "from the model directory path. Please ensure the path contains the state name."
49
- )
50
- detected_state = state_match.group(1)
51
- print(f"--- Detected state: {detected_state.upper()} ---")
52
-
53
- # Auto-detect cluster size from model path
54
- cluster_size_match = re.search(r'(\d+)size_', MODEL_DIR)
55
- if not cluster_size_match:
56
- raise ValueError(
57
- "Could not automatically detect the cluster size from the model directory path. "
58
- "Please ensure the path contains a pattern like '5size_' or '10size_'."
59
- )
60
- detected_cluster_size = int(cluster_size_match.group(1))
61
- print(f"--- Detected cluster size: {detected_cluster_size} ---")
62
-
63
- DAYS_TO_EVALUATE = 30
64
- SOLAR_THRESHOLD = 0.1
65
- MAX_TRANSFER_KWH = 1000000.0
66
-
67
- W_COST_SAVINGS = 1.0
68
- W_GRID_PENALTY = 0.5
69
- W_P2P_BONUS = 0.2
70
-
71
- # Environment Initialization
72
- cluster_env = make_vec_env(
73
- data_path=DATA_PATH,
74
- time_freq="15T",
75
- cluster_size=detected_cluster_size,
76
- state=detected_state
77
- )
78
- n_clusters = cluster_env.num_envs
79
- sample_subenv = cluster_env.cluster_envs[0]
80
- eval_num_steps = sample_subenv.num_steps
81
- print(f"Number of steps per day: {eval_num_steps}")
82
-
83
- # Load intra-cluster MAPPO agents
84
- n_agents_per_cluster = sample_subenv.num_agents
85
- local_dim = sample_subenv.observation_space.shape[-1]
86
- global_dim = n_agents_per_cluster * local_dim
87
- act_dim = sample_subenv.action_space[0].shape[-1]
88
-
89
- print(f"Creating and loading {n_clusters} independent low-level MAPPO agents...")
90
- low_agents = []
91
- for i in range(n_clusters):
92
- agent = MAPPO(
93
- n_agents=n_agents_per_cluster,
94
- local_dim=local_dim,
95
- global_dim=global_dim,
96
- act_dim=act_dim,
97
- lr=2e-4,
98
- gamma=0.95,
99
- lam=0.95,
100
- clip_eps=0.2,
101
- k_epochs=4,
102
- batch_size=512,
103
- episode_len=96
104
- )
105
- ckpt_pattern = os.path.join(MODEL_DIR, f"low_cluster{i}_ep*.pth")
106
- ckpts_low = glob.glob(ckpt_pattern)
107
- if not ckpts_low:
108
- raise FileNotFoundError(f"No checkpoint found for cluster {i} with pattern: {ckpt_pattern}")
109
- latest_low = sorted(ckpts_low, key=lambda x: int(re.search(r'ep(\d+)\.pth$', x).group(1)))[-1]
110
- print(f"Loading low-level policy for cluster {i} from: {latest_low}")
111
- agent.load(latest_low)
112
- agent.actor.eval()
113
- agent.critic.eval()
114
- low_agents.append(agent)
115
-
116
- # Output Folder Setup
117
- timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
118
- num_agents = sum(subenv.num_agents for subenv in cluster_env.cluster_envs)
119
- run_name = f"eval_vectorized_{num_agents}agents_{DAYS_TO_EVALUATE}days_{timestamp}"
120
- output_folder = os.path.join("runs_final_vectorized_eval", run_name)
121
- logs_dir = os.path.join(output_folder, "logs")
122
- plots_dir = os.path.join(output_folder, "plots")
123
- for d in (logs_dir, plots_dir):
124
- os.makedirs(d, exist_ok=True)
125
- print(f"Saving evaluation outputs to: {output_folder}")
126
-
127
- # Load inter-cluster MAPPO agent
128
- OBS_DIM_HI_LOCAL = 7
129
- act_dim_inter = 2
130
-
131
- # Define the global dimension for the high-level agent
132
- OBS_DIM_HI_GLOBAL = n_clusters * OBS_DIM_HI_LOCAL
133
-
134
- print(f"Initializing evaluation inter-agent (MAPPO): n_agents={n_clusters}, "
135
- f"local_dim={OBS_DIM_HI_LOCAL}, global_dim={OBS_DIM_HI_GLOBAL}, act_dim={act_dim_inter}")
136
-
137
- # Instantiate MAPPO for inter-cluster coordination
138
- inter_agent = MAPPO(
139
- n_agents=n_clusters,
140
- local_dim=OBS_DIM_HI_LOCAL,
141
- global_dim=OBS_DIM_HI_GLOBAL,
142
- act_dim=act_dim_inter,
143
- lr=2e-4,
144
- gamma=0.95,
145
- lam=0.95,
146
- clip_eps=0.2,
147
- k_epochs=4,
148
- batch_size=512,
149
- episode_len=96
150
- )
151
-
152
- ckpts_inter = glob.glob(os.path.join(MODEL_DIR, "inter_ep*.pth"))
153
- if not ckpts_inter:
154
- raise FileNotFoundError(f"No high-level checkpoints (inter_ep*.pth) in {MODEL_DIR}")
155
- latest_inter = sorted(ckpts_inter, key=lambda x: int(re.search(r'ep(\d+)\.pth$', x).group(1)))[-1]
156
- print("Loading inter-cluster policy from", latest_inter)
157
- inter_agent.load(latest_inter)
158
- inter_agent.actor.eval()
159
- inter_agent.critic.eval()
160
-
161
- # Instantiate Coordinator
162
- ledger = InterClusterLedger()
163
- coordinator = InterClusterCoordinator(
164
- cluster_env,
165
- inter_agent,
166
- ledger,
167
- max_transfer_kwh=MAX_TRANSFER_KWH,
168
- w_cost_savings=W_COST_SAVINGS,
169
- w_grid_penalty=W_GRID_PENALTY,
170
- w_p2p_bonus=W_P2P_BONUS
171
- )
172
-
173
- # Data collectors
174
- all_logs = []
175
- daily_summaries = []
176
- step_timing_list = []
177
-
178
- # Per-day evaluation
179
- evaluation_start = time.time()
180
- for day in range(1, DAYS_TO_EVALUATE + 1):
181
- obs_clusters, _ = cluster_env.reset()
182
- done_all = False
183
- step_count = 0
184
- day_logs = []
185
-
186
- while not done_all and step_count < eval_num_steps:
187
- step_start_time = time.time()
188
- step_count += 1
189
-
190
- # Get high-level actions
191
- inter_cluster_obs_local_list = [coordinator.get_cluster_state(se, step_count) for se in cluster_env.cluster_envs]
192
- inter_cluster_obs_local = np.array(inter_cluster_obs_local_list)
193
-
194
- # Create the global state for the high-level agent
195
- inter_cluster_obs_global = inter_cluster_obs_local.flatten()
196
-
197
- with torch.no_grad():
198
- # Call select_action with both local and global states
199
- high_level_action, _ = inter_agent.select_action(
200
- inter_cluster_obs_local,
201
- inter_cluster_obs_global
202
- )
203
-
204
- # Build transfers
205
- current_reports = {i: {'export_capacity': cluster_env.get_export_capacity(i), 'import_capacity': cluster_env.get_import_capacity(i)} for i in range(n_clusters)}
206
- exports, imports = coordinator.build_transfers(high_level_action, current_reports)
207
-
208
- # Get low-level actions
209
- batch_global_obs = obs_clusters.reshape(n_clusters, -1)
210
- with torch.no_grad():
211
- low_level_actions_list = []
212
- for c_idx in range(n_clusters):
213
- agent = low_agents[c_idx]
214
- local_obs_cluster = obs_clusters[c_idx]
215
- global_obs_cluster = batch_global_obs[c_idx]
216
- actions, _ = agent.select_action(local_obs_cluster, global_obs_cluster)
217
- low_level_actions_list.append(actions)
218
- low_level_actions = np.stack(low_level_actions_list)
219
-
220
- # Step the environment
221
- next_obs, rewards, done_all, step_info = cluster_env.step(
222
- low_level_actions, exports=exports, imports=imports
223
- )
224
-
225
- # Advance the state
226
- obs_clusters = next_obs
227
-
228
- # Timing and console printout
229
- step_duration = time.time() - step_start_time
230
- print(f"[Day {day}, Step {step_count}] Step time: {step_duration:.6f} seconds")
231
- step_timing_list.append({"day": day, "step": step_count, "step_time_s": step_duration})
232
-
233
- # Consolidated Logging
234
- infos = step_info.get("cluster_infos")
235
- for c_idx, subenv in enumerate(cluster_env.cluster_envs):
236
- grid_price_now = subenv.get_grid_price(step_count - 1)
237
- peer_price_now = step_info.get("peer_price_global")
238
- if peer_price_now is None:
239
- demands_step = subenv.demands_day[step_count-1]
240
- solars_step = subenv.solars_day[step_count-1]
241
- surplus = np.maximum(solars_step - demands_step, 0.0).sum()
242
- shortfall = np.maximum(demands_step - solars_step, 0.0).sum()
243
- peer_price_now = subenv.get_peer_price(step_count -1, surplus, shortfall)
244
-
245
- for i, hid in enumerate(subenv.house_ids):
246
- is_battery_house = hid in subenv.batteries
247
- charge = infos["charge_amount"][c_idx][i]
248
- discharge = infos["discharge_amount"][c_idx][i]
249
- day_logs.append({
250
- "day": day,
251
- "step": step_count - 1,
252
- "house": hid,
253
- "cluster": c_idx,
254
- "grid_import_no_p2p": infos["grid_import_no_p2p"][c_idx][i],
255
- "grid_import_with_p2p": infos["grid_import_with_p2p"][c_idx][i],
256
- "grid_export": infos["grid_export"][c_idx][i],
257
- "p2p_buy": infos["p2p_buy"][c_idx][i],
258
- "p2p_sell": infos["p2p_sell"][c_idx][i],
259
- "actual_cost": infos["costs"][c_idx][i],
260
- "baseline_cost": infos["grid_import_no_p2p"][c_idx][i] * grid_price_now,
261
- "total_demand": subenv.demands_day[step_count-1, i],
262
- "total_solar": subenv.solars_day[step_count-1, i],
263
- "grid_price": grid_price_now,
264
- "peer_price": peer_price_now,
265
- "soc": (subenv.battery_soc[i] / subenv.battery_max_capacity[i]) if is_battery_house and subenv.battery_max_capacity[i] > 0 else np.nan,
266
- "degradation_cost": (charge + discharge) * subenv.battery_degradation_cost[i] if is_battery_house else 0.0,
267
- "reward": infos["agent_rewards"][c_idx][i],
268
- })
269
-
270
- step_duration = time.time() - step_start_time
271
-
272
- # End of day: aggregate & summarize
273
- df_day = pd.DataFrame(day_logs)
274
- if df_day.empty:
275
- continue
276
- all_logs.extend(day_logs)
277
-
278
- # Consolidated Daily Summary Calculation
279
- # Correctly count solar houses from the daily data
280
- num_solar_houses = df_day[df_day['total_solar'] > 0]['house'].nunique()
281
-
282
- if num_solar_houses > 0:
283
- # Get the total number of agents for scaling the threshold
284
- num_agents_in_day = df_day['house'].nunique()
285
-
286
- # Calculate aggregate solar generation per step
287
- agg_solar_per_step = df_day.groupby("step")["total_solar"].sum()
288
-
289
- # Find steps where aggregate solar exceeds the scaled threshold
290
- sunny_steps_mask = agg_solar_per_step > (SOLAR_THRESHOLD * num_agents_in_day)
291
- sunny_steps = sunny_steps_mask[sunny_steps_mask].index
292
-
293
- # The rest of the calculation remains the same
294
- trade_df = df_day[df_day["step"].isin(sunny_steps)]
295
-
296
- grouped_house = df_day.groupby("house").sum(numeric_only=True)
297
- grouped_step = df_day.groupby("step").sum(numeric_only=True)
298
-
299
- total_demand = grouped_step["total_demand"].sum()
300
- total_solar = grouped_step["total_solar"].sum()
301
- total_p2p_buy = df_day['p2p_buy'].sum()
302
- total_p2p_sell = df_day['p2p_sell'].sum()
303
- total_actual_grid_import = df_day['grid_import_with_p2p'].sum()
304
-
305
- baseline_cost_per_house = grouped_house["baseline_cost"]
306
- actual_cost_per_house = grouped_house["actual_cost"]
307
- cost_savings_per_house = baseline_cost_per_house - actual_cost_per_house
308
- day_total_cost_savings = cost_savings_per_house.sum()
309
-
310
- if baseline_cost_per_house.sum() > 0:
311
- overall_cost_savings_pct = day_total_cost_savings / baseline_cost_per_house.sum()
312
- else:
313
- overall_cost_savings_pct = 0.0
314
-
315
- baseline_import_per_house = grouped_house["grid_import_no_p2p"]
316
- actual_import_per_house = grouped_house["grid_import_with_p2p"]
317
- import_reduction_per_house = baseline_import_per_house - actual_import_per_house
318
- day_total_import_reduction = import_reduction_per_house.sum()
319
-
320
- if baseline_import_per_house.sum() > 0:
321
- overall_import_reduction_pct = day_total_import_reduction / baseline_import_per_house.sum()
322
- else:
323
- overall_import_reduction_pct = 0.0
324
-
325
- fairness_cost_savings = compute_jains_fairness(cost_savings_per_house.values)
326
- fairness_import_reduction = compute_jains_fairness(import_reduction_per_house.values)
327
- fairness_rewards = compute_jains_fairness(grouped_house["reward"].values)
328
- fairness_p2p_buy = compute_jains_fairness(grouped_house["p2p_buy"].values)
329
- fairness_p2p_sell = compute_jains_fairness(grouped_house["p2p_sell"].values)
330
- fairness_p2p_total = compute_jains_fairness((grouped_house["p2p_buy"] + grouped_house["p2p_sell"]).values)
331
-
332
- daily_summaries.append({
333
- "day": day,
334
- "day_total_demand": total_demand,
335
- "day_total_solar": total_solar,
336
- "day_p2p_buy": total_p2p_buy,
337
- "day_p2p_sell": total_p2p_sell,
338
- "cost_savings_abs": day_total_cost_savings,
339
- "cost_savings_pct": overall_cost_savings_pct,
340
- "fairness_cost_savings": fairness_cost_savings,
341
- "grid_reduction_abs": day_total_import_reduction,
342
- "grid_reduction_pct": overall_import_reduction_pct,
343
- "fairness_grid_reduction": fairness_import_reduction,
344
- "fairness_reward": fairness_rewards,
345
- "fairness_p2p_buy": fairness_p2p_buy,
346
- "fairness_p2p_sell": fairness_p2p_sell,
347
- "fairness_p2p_total": fairness_p2p_total,
348
- })
349
-
350
- # Final Processing and Saving
351
- evaluation_end = time.time()
352
- total_eval_time = evaluation_end - evaluation_start
353
- print(f"\nEvaluation loop finished. Total time: {total_eval_time:.2f} seconds.")
354
-
355
- all_days_df = pd.DataFrame(all_logs)
356
- if not all_days_df.empty:
357
- # Save step-level logs
358
- combined_csv_path = os.path.join(logs_dir, "step_logs_all_days.csv")
359
- all_days_df.to_csv(combined_csv_path, index=False)
360
- print(f"Saved combined step-level logs to: {combined_csv_path}")
361
-
362
- # Save timing logs
363
- step_timing_df = pd.DataFrame(step_timing_list)
364
- timing_csv_path = os.path.join(logs_dir, "step_timing_log.csv")
365
- step_timing_df.to_csv(timing_csv_path, index=False)
366
- print(f"Saved step timing logs to: {timing_csv_path}")
367
-
368
- # Save house-level summary
369
- house_level_df = all_days_df.groupby("house").agg({
370
- "baseline_cost": "sum",
371
- "actual_cost": "sum",
372
- "grid_import_no_p2p": "sum",
373
- "grid_import_with_p2p": "sum",
374
- "degradation_cost": "sum"
375
- })
376
- house_level_df["cost_savings"] = house_level_df["baseline_cost"] - house_level_df["actual_cost"]
377
- house_level_df["import_reduction"] = house_level_df["grid_import_no_p2p"] - house_level_df["grid_import_with_p2p"]
378
- house_summary_csv = os.path.join(logs_dir, "summary_per_house.csv")
379
- house_level_df.to_csv(house_summary_csv)
380
- print(f"Saved final summary per house to: {house_summary_csv}")
381
-
382
- # Calculate Final Summary Metrics
383
- daily_summary_df = pd.DataFrame(daily_summaries)
384
-
385
- fairness_grid_all = compute_jains_fairness(house_level_df["import_reduction"].values)
386
- fairness_cost_all = compute_jains_fairness(house_level_df["cost_savings"].values)
387
-
388
- total_cost_savings_all = daily_summary_df["cost_savings_abs"].sum()
389
- total_baseline_cost_all = all_days_df.groupby('day')['baseline_cost'].sum().sum()
390
- pct_cost_savings_all = total_cost_savings_all / total_baseline_cost_all if total_baseline_cost_all > 0 else 0.0
391
-
392
- total_grid_reduction_all = daily_summary_df["grid_reduction_abs"].sum()
393
- total_baseline_import_all = all_days_df.groupby('day')['grid_import_no_p2p'].sum().sum()
394
- pct_grid_reduction_all = total_grid_reduction_all / total_baseline_import_all if total_baseline_import_all > 0 else 0.0
395
-
396
- total_degradation_cost_all = all_days_df["degradation_cost"].sum()
397
-
398
- # Calculate Alternative Performance Metrics
399
- agg_solar_per_step = all_days_df.groupby(['day', 'step'])['total_solar'].sum()
400
- num_agents_total = len(all_days_df['house'].unique())
401
- sunny_steps_mask = agg_solar_per_step > (SOLAR_THRESHOLD * num_agents_total)
402
- sunny_df = all_days_df[all_days_df.set_index(['day', 'step']).index.isin(sunny_steps_mask[sunny_steps_mask].index)]
403
-
404
- baseline_import_sunny = sunny_df['grid_import_no_p2p'].sum()
405
- actual_import_sunny = sunny_df['grid_import_with_p2p'].sum()
406
- grid_reduction_sunny_pct = (baseline_import_sunny - actual_import_sunny) / baseline_import_sunny if baseline_import_sunny > 0 else 0.0
407
-
408
- total_p2p_buy = all_days_df['p2p_buy'].sum()
409
- total_actual_grid_import = all_days_df['grid_import_with_p2p'].sum()
410
- community_sourcing_rate_pct = total_p2p_buy / (total_p2p_buy + total_actual_grid_import) if (total_p2p_buy + total_actual_grid_import) > 0 else 0.0
411
-
412
- total_p2p_sell = all_days_df['p2p_sell'].sum()
413
- total_grid_export = all_days_df['grid_export'].sum()
414
- solar_sharing_efficiency_pct = total_p2p_sell / (total_p2p_sell + total_grid_export) if (total_p2p_sell + total_grid_export) > 0 else 0.0
415
-
416
- # Calculate cost savings in sunny hours
417
- baseline_cost_sunny = sunny_df['baseline_cost'].sum()
418
- actual_cost_sunny = sunny_df['actual_cost'].sum()
419
- cost_savings_sunny_pct = (baseline_cost_sunny - actual_cost_sunny) / baseline_cost_sunny if baseline_cost_sunny > 0 else 0.0
420
-
421
- # Create and Save Final Summary CSV
422
- final_row = {
423
- "day": "ALL_DAYS_SUMMARY",
424
- "cost_savings_abs": total_cost_savings_all,
425
- "cost_savings_pct": pct_cost_savings_all,
426
- "grid_reduction_abs": total_grid_reduction_all,
427
- "grid_reduction_pct": pct_grid_reduction_all,
428
- "fairness_cost_savings": fairness_cost_all,
429
- "fairness_grid_reduction": fairness_grid_all,
430
- "total_degradation_cost": total_degradation_cost_all,
431
- "grid_reduction_sunny_hours_pct": grid_reduction_sunny_pct,
432
- "community_sourcing_rate_pct": community_sourcing_rate_pct,
433
- "solar_sharing_efficiency_pct": solar_sharing_efficiency_pct,
434
- }
435
- final_row_df = pd.DataFrame([final_row])
436
-
437
- # Ensure daily summary has columns before concatenating
438
- if not daily_summary_df.empty:
439
- daily_summary_df = pd.concat([daily_summary_df, final_row_df], ignore_index=True)
440
-
441
- summary_csv = os.path.join(logs_dir, "summary_per_day.csv")
442
- daily_summary_df.to_csv(summary_csv, index=False)
443
- print(f"Saved day-level summary with final multi-day row to: {summary_csv}")
444
-
445
- # Final Printout
446
- print("\n================== EVALUATION SUMMARY ==================")
447
- print(f"Evaluation finished for {DAYS_TO_EVALUATE} days.\n")
448
- print("--- Standard Metrics (24-Hour Average) ---")
449
- print(f"Total grid reduction: {total_grid_reduction_all:.2f} kWh ({pct_grid_reduction_all:.2%})")
450
- print(f"Total cost savings: ${total_cost_savings_all:.2f} ({pct_cost_savings_all:.2%})")
451
- print(f"Jain's fairness on grid reduction: {fairness_grid_all:.3f}")
452
- print(f"Jain's fairness on cost savings: {fairness_cost_all:.3f}\n")
453
- print("--- Alternative Metrics (Highlighting Peak Performance) ---")
454
- print(f"Grid reduction during solar hours: {grid_reduction_sunny_pct:.2%}")
455
- print(f"Cost savings during solar hours: {cost_savings_sunny_pct:.2%}")
456
- print(f"Community sourcing rate: {community_sourcing_rate_pct:.2%}")
457
- print(f"Solar sharing efficiency: {solar_sharing_efficiency_pct:.2%}")
458
- print("=========================================================")
459
-
460
- # Generate Plots
461
- # Create a clean version of the daily summary for plotting, excluding the final summary row
462
- plot_daily_df = daily_summary_df[daily_summary_df["day"] != "ALL_DAYS_SUMMARY"].copy()
463
- plot_daily_df["day"] = plot_daily_df["day"].astype(int)
464
-
465
- # Plot 1: Daily Cost Savings Percentage
466
- plt.figure(figsize=(12, 6))
467
- plt.bar(plot_daily_df["day"], plot_daily_df["cost_savings_pct"] * 100, color='skyblue')
468
- plt.xlabel("Day")
469
- plt.ylabel("Cost Savings (%)")
470
- plt.title("Daily Community Cost Savings Percentage")
471
- plt.xticks(plot_daily_df["day"])
472
- plt.grid(axis='y', linestyle='--', alpha=0.7)
473
- plt.savefig(os.path.join(plots_dir, "daily_cost_savings_percentage.png"))
474
- plt.close()
475
-
476
- # Plot 2: Daily Total Demand vs. Solar
477
- plt.figure(figsize=(12, 6))
478
- bar_width = 0.4
479
- days = plot_daily_df["day"]
480
- plt.bar(days - bar_width/2, plot_daily_df["day_total_demand"], width=bar_width, label="Total Demand", color='coral')
481
- plt.bar(days + bar_width/2, plot_daily_df["day_total_solar"], width=bar_width, label="Total Solar Generation", color='gold')
482
- plt.xlabel("Day")
483
- plt.ylabel("Energy (kWh)")
484
- plt.title("Total Community Demand vs. Solar Generation Per Day")
485
- plt.xticks(days)
486
- plt.legend()
487
- plt.grid(axis='y', linestyle='--', alpha=0.7)
488
- plt.savefig(os.path.join(plots_dir, "daily_demand_vs_solar.png"))
489
- plt.close()
490
-
491
- # Plot 3: Combined Time Series of Energy Flows
492
- # Aggregate data by global step across all days
493
- step_group = all_days_df.groupby(["day", "step"]).sum(numeric_only=True).reset_index()
494
- step_group["global_step"] = (step_group["day"] - 1) * eval_num_steps + step_group["step"]
495
- fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(15, 12), sharex=True)
496
-
497
- # Subplot 1: Grid Import vs P2P Buy
498
- ax1.plot(step_group["global_step"], step_group["grid_import_with_p2p"], label="Grid Import (with P2P)", color='r')
499
- ax1.plot(step_group["global_step"], step_group["p2p_buy"], label="P2P Buy", color='g')
500
- ax1.set_ylabel("Energy (kWh)")
501
- ax1.set_title("Community Energy Consumption: Grid Import vs. P2P Buy")
502
- ax1.legend()
503
- ax1.grid(True, linestyle='--', alpha=0.6)
504
-
505
- # Subplot 2: Grid Export vs P2P Sell
506
- ax2.plot(step_group["global_step"], step_group["grid_export"], label="Grid Export", color='orange')
507
- ax2.plot(step_group["global_step"], step_group["p2p_sell"], label="P2P Sell", color='b')
508
- ax2.set_xlabel("Global Timestep")
509
- ax2.set_ylabel("Energy (kWh)")
510
- ax2.set_title("Community Energy Generation: Grid Export vs. P2P Sell")
511
- ax2.legend()
512
- ax2.grid(True, linestyle='--', alpha=0.6)
513
-
514
- plt.tight_layout()
515
- plt.savefig(os.path.join(plots_dir, "combined_energy_flows_timeseries.png"))
516
- plt.close()
517
-
518
- # Plot 4: Stacked Bar of Daily Energy Sources
519
- # Shows how the community's baseline grid import is met by actual grid import vs. P2P trading
520
- daily_agg = all_days_df.groupby("day").sum(numeric_only=True)
521
-
522
- plt.figure(figsize=(12, 7))
523
- plt.bar(daily_agg.index, daily_agg["grid_import_with_p2p"], label="Grid Import (with P2P)", color='crimson')
524
- plt.bar(daily_agg.index, daily_agg["p2p_buy"], bottom=daily_agg["grid_import_with_p2p"], label="P2P Buy", color='limegreen')
525
- plt.plot(daily_agg.index, daily_agg["grid_import_no_p2p"], label="Baseline Grid Import (No P2P)", color='blue', linestyle='--', marker='o')
526
-
527
- plt.xlabel("Day")
528
- plt.ylabel("Energy (kWh)")
529
- plt.title("Daily Energy Procurement: Baseline vs. P2P+Grid")
530
- plt.xticks(daily_agg.index)
531
- plt.legend()
532
- plt.grid(axis='y', linestyle='--', alpha=0.7)
533
- plt.savefig(os.path.join(plots_dir, "daily_energy_procurement_stacked.png"))
534
- plt.close()
535
-
536
- # Plot 5: Fairness Metrics Over Time
537
- plt.figure(figsize=(12, 6))
538
- plt.plot(plot_daily_df["day"], plot_daily_df["fairness_cost_savings"], label="Cost Savings Fairness", marker='o')
539
- plt.plot(plot_daily_df["day"], plot_daily_df["fairness_grid_reduction"], label="Grid Reduction Fairness", marker='s')
540
- plt.plot(plot_daily_df["day"], plot_daily_df["fairness_reward"], label="Reward Fairness", marker='^')
541
- plt.xlabel("Day")
542
- plt.ylabel("Jain's Fairness Index")
543
- plt.title("Daily Fairness Metrics")
544
- plt.xticks(plot_daily_df["day"])
545
- plt.ylim(0, 1.05)
546
- plt.legend()
547
- plt.grid(True, linestyle='--', alpha=0.7)
548
- plt.savefig(os.path.join(plots_dir, "daily_fairness_metrics.png"))
549
- plt.close()
550
-
551
- # Plot 6: Per-House Savings and Reductions
552
- # Uses the house_level_df which summarizes stats over all evaluated days
553
- fig, ax1 = plt.subplots(figsize=(15, 7))
554
-
555
- house_ids_str = house_level_df.index.astype(str)
556
- bar_width = 0.4
557
- index = np.arange(len(house_ids_str))
558
-
559
- # Bar chart for cost savings
560
- color1 = 'tab:green'
561
- ax1.set_xlabel('House ID')
562
- ax1.set_ylabel('Total Cost Savings ($)', color=color1)
563
- ax1.bar(index - bar_width/2, house_level_df["cost_savings"], bar_width, label='Cost Savings', color=color1)
564
- ax1.tick_params(axis='y', labelcolor=color1)
565
- ax1.set_xticks(index)
566
- ax1.set_xticklabels(house_ids_str, rotation=45, ha="right")
567
-
568
- # Instantiate a second y-axis for grid import reduction
569
- ax2 = ax1.twinx()
570
- color2 = 'tab:blue'
571
- ax2.set_ylabel('Total Grid Import Reduction (kWh)', color=color2)
572
- ax2.bar(index + bar_width/2, house_level_df["import_reduction"], bar_width, label='Import Reduction', color=color2)
573
- ax2.tick_params(axis='y', labelcolor=color2)
574
-
575
- plt.title(f'Total Cost Savings & Grid Import Reduction Per House (over {DAYS_TO_EVALUATE} days)')
576
-
577
- fig.tight_layout()
578
- plt.savefig(os.path.join(plots_dir, "per_house_summary.png"))
579
- plt.close()
580
-
581
- # Plot 7: Price Dynamics for a Single Day
582
- # Visualize the prices the agents see on the first day of evaluation
583
- day1_prices = all_days_df[all_days_df['day'] == 1][['step', 'grid_price', 'peer_price']].drop_duplicates()
584
- plt.figure(figsize=(12, 6))
585
- plt.plot(day1_prices['step'], day1_prices['grid_price'], label='Grid Price', color='darkorange')
586
- plt.plot(day1_prices['step'], day1_prices['peer_price'], label='P2P Price', color='teal')
587
- plt.xlabel("Timestep of Day")
588
- plt.ylabel("Price ($/kWh)")
589
- plt.title("Price Dynamics on Day 1")
590
- plt.legend()
591
- plt.grid(True, linestyle='--', alpha=0.6)
592
- plt.savefig(os.path.join(plots_dir, "price_dynamics_day1.png"))
593
- plt.close()
594
-
595
- # Plot 8: Battery State of Charge (SoC) for a Sample of Houses
596
- day1_df = all_days_df[all_days_df['day'] == 1]
597
- battery_houses = day1_df.dropna(subset=['soc'])['house'].unique()
598
-
599
- if len(battery_houses) > 0:
600
- sample_houses = battery_houses[:min(4, len(battery_houses))] # Plot up to 4 houses
601
- plt.figure(figsize=(12, 6))
602
- for house in sample_houses:
603
- house_df = day1_df[day1_df['house'] == house]
604
- plt.plot(house_df['step'], house_df['soc'] * 100, label=f'House {house}')
605
-
606
- plt.xlabel("Timestep of Day")
607
- plt.ylabel("State of Charge (%)")
608
- plt.title("Battery SoC on Day 1 for Sample Houses")
609
- plt.legend()
610
- plt.grid(True, linestyle='--', alpha=0.6)
611
- plt.savefig(os.path.join(plots_dir, "soc_dynamics_day1.png"))
612
- plt.close()
613
-
614
- print("All plots have been generated and saved. Evaluation complete.")
615
-
616
-
617
- if __name__ == "__main__":
618
- main()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
Other_algorithms/HC_MAPPO/HC_MAPPO_train.py DELETED
@@ -1,579 +0,0 @@
1
- import os
2
- import sys
3
- import time
4
- from datetime import datetime, timedelta
5
- import re
6
-
7
- import numpy as np
8
- import torch
9
- import pandas as pd
10
- import matplotlib.pyplot as plt
11
-
12
- # Allow imports from project root
13
- sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
14
-
15
- from cluster import InterClusterCoordinator, InterClusterLedger
16
- from Environment.cluster_env_wrapper import make_vec_env
17
- from mappo.trainer.mappo import MAPPO
18
-
19
-
20
- def recursive_sum(item):
21
- total = 0
22
- # Check if the item is a list, array, or other iterable, but not a string
23
- if hasattr(item, '__iter__') and not isinstance(item, str):
24
- for sub_item in item:
25
- total += recursive_sum(sub_item)
26
- # If it's a single number, just add it
27
- elif np.isreal(item):
28
- total += item
29
- # Ignore any non-numeric, non-iterable items
30
- return total
31
-
32
-
33
- def main():
34
- overall_start_time = time.time()
35
-
36
- # Training Configuration Parameters
37
- STATE_TO_RUN = "oklahoma" # or "colorado", "oklahoma"
38
- DATA_PATH = "data/training/1000houses_152days_TRAIN.csv"
39
-
40
- # Dynamically extract the number of agents from the file path
41
- match = re.search(r'(\d+)houses', DATA_PATH)
42
- if not match:
43
- raise ValueError("Could not extract the number of houses from DATA_PATH.")
44
- NUMBER_OF_AGENTS = int(match.group(1))
45
-
46
- CLUSTER_SIZE = 10
47
- NUM_EPISODES = 10000
48
- BATCH_SIZE = 256
49
- CHECKPOINT_INTERVAL = 100000 # Reduced for more frequent saving during testing
50
- WINDOW_SIZE = 80
51
- MAX_TRANSFER_KWH = 100000
52
-
53
- LR = 2e-4
54
- GAMMA = 0.95
55
- LAMBDA = 0.95
56
- CLIP_EPS = 0.2
57
- K_EPOCHS = 4
58
-
59
- JOINT_TRAINING_START_EPISODE = 2000
60
- FREEZE_HIGH_FOR_EPISODES = 20
61
- FREEZE_LOW_FOR_EPISODES = 10
62
-
63
- # Build run directories
64
- timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
65
- run_name = f"hierarchical_{STATE_TO_RUN}_{NUMBER_OF_AGENTS}agents_" \
66
- f"{CLUSTER_SIZE}size_{NUM_EPISODES}eps_{timestamp}"
67
- root_dir = os.path.join("FINALE_FINALE_FINALE", run_name) # New folder for new runs
68
- models_dir = os.path.join(root_dir, "models")
69
- logs_dir = os.path.join(root_dir, "logs")
70
- plots_dir = os.path.join(root_dir, "plots")
71
-
72
- for d in (models_dir, logs_dir, plots_dir):
73
- os.makedirs(d, exist_ok=True)
74
- print(f"Logging to: {root_dir}")
75
-
76
- # Environment & Agent Initialization
77
-
78
- # Instantiate the environment using vectorized environment factory function
79
- # This single call replaces the manual creation of base_env and ClusterEnvWrapper
80
- cluster_env = make_vec_env(
81
- data_path=DATA_PATH,
82
- time_freq="15T",
83
- cluster_size=CLUSTER_SIZE,
84
- state=STATE_TO_RUN
85
- )
86
-
87
- # Get environment parameters from the vectorized environment object
88
- n_clusters = cluster_env.num_envs
89
- sample_subenv = cluster_env.cluster_envs[0] # Access a sample sub-env
90
- n_agents_per_cluster = sample_subenv.num_agents
91
-
92
- local_dim = sample_subenv.observation_space.shape[-1]
93
- global_dim = n_agents_per_cluster * local_dim
94
- # Access the action dim from the first part of the Tuple action space
95
- act_dim = sample_subenv.action_space[0].shape[-1]
96
- # The total number of transitions collected each episode is (steps_per_day * num_clusters)
97
- total_buffer_size = sample_subenv.num_steps * n_clusters
98
- print(f"Low-level agent buffer size set to: {total_buffer_size}")
99
-
100
- print(f"Created {n_clusters} clusters.")
101
- print(f"Shared low-level agent: {n_agents_per_cluster} agents per cluster, "
102
- f"obs_dim={local_dim}, global_dim={global_dim}, act_dim={act_dim}")
103
-
104
- print(f"Creating {n_clusters} independent low-level MAPPO agents...")
105
- low_agents = []
106
- for i in range(n_clusters):
107
- # Each agent's buffer only needs to be as long as one episode day
108
- agent_buffer_size = sample_subenv.num_steps
109
-
110
- agent = MAPPO(
111
- n_agents=n_agents_per_cluster,
112
- local_dim=local_dim,
113
- global_dim=global_dim,
114
- act_dim=act_dim,
115
- lr=LR,
116
- gamma=GAMMA,
117
- lam=LAMBDA,
118
- clip_eps=CLIP_EPS,
119
- k_epochs=K_EPOCHS,
120
- batch_size=BATCH_SIZE,
121
- episode_len=agent_buffer_size
122
- )
123
- low_agents.append(agent)
124
-
125
- # Define dimensions for the high-level MAPPO agent
126
- OBS_DIM_HI_LOCAL = 7 # Each cluster has 7 features for its local state
127
- act_dim_inter = 2 # Export/Import preference for each cluster
128
-
129
- # The global state for the high-level agent is the concatenation
130
- # of all high-level local states
131
- OBS_DIM_HI_GLOBAL = n_clusters * OBS_DIM_HI_LOCAL
132
-
133
- print(f"Inter-cluster agent (MAPPO): n_agents={n_clusters}, "
134
- f"local_dim={OBS_DIM_HI_LOCAL}, global_dim={OBS_DIM_HI_GLOBAL}, act_dim={act_dim_inter}")
135
-
136
- # Instantiate MAPPO for the inter-cluster agent
137
- inter_agent = MAPPO(
138
- n_agents=n_clusters,
139
- local_dim=OBS_DIM_HI_LOCAL,
140
- global_dim=OBS_DIM_HI_GLOBAL,
141
- act_dim=act_dim_inter,
142
- lr=LR,
143
- gamma=GAMMA,
144
- lam=LAMBDA,
145
- clip_eps=CLIP_EPS,
146
- k_epochs=K_EPOCHS,
147
- batch_size=BATCH_SIZE,
148
- episode_len=sample_subenv.num_steps
149
- )
150
-
151
- ledger = InterClusterLedger()
152
- coordinator = InterClusterCoordinator(
153
- cluster_env,
154
- inter_agent,
155
- ledger,
156
- max_transfer_kwh=MAX_TRANSFER_KWH
157
- )
158
-
159
- # Training loop
160
- total_steps = 0
161
- episode_log_data = []
162
- performance_metrics_log = []
163
- intra_log = {}
164
- inter_log = {}
165
- total_log = {}
166
- cost_log = {}
167
-
168
- for ep in range(1, NUM_EPISODES + 1):
169
- step_count = 0
170
- start_time = time.time()
171
- ep_total_inter_cluster_reward = 0.0
172
- day_logs = []
173
-
174
- obs_clusters, _ = cluster_env.reset()
175
-
176
- if ep > 1:
177
- # For vectorized envs, call is the right way to invoke a method on all sub-envs
178
- # This returns a list of dictionaries, one from each cluster env
179
- all_cluster_metrics = cluster_env.call('get_episode_metrics')
180
-
181
- # Aggregate the metrics from all clusters into a single system-wide summary
182
- system_metrics = {
183
- "grid_reduction_entire_day": sum(m["grid_reduction_entire_day"] for m in all_cluster_metrics),
184
- "grid_reduction_peak_hours": sum(m["grid_reduction_peak_hours"] for m in all_cluster_metrics),
185
- "total_cost_savings": sum(m["total_cost_savings"] for m in all_cluster_metrics),
186
- "battery_degradation_cost_total": sum(m["battery_degradation_cost_total"] for m in all_cluster_metrics),
187
- # For fairness, we average the fairness index across clusters
188
- "fairness_on_cost_savings": np.mean([m["fairness_on_cost_savings"] for m in all_cluster_metrics]),
189
- "Episode": ep - 1 # Associate with the episode that just finished
190
- }
191
-
192
- # Append the aggregated dictionary to our log
193
- performance_metrics_log.append(system_metrics)
194
-
195
- # Use a single 'done' flag for the episode
196
- done_all = False
197
-
198
- # Initialize rewards and costs
199
- cluster_rewards = np.zeros((n_clusters, n_agents_per_cluster), dtype=np.float32)
200
- total_cost = 0.0
201
- total_grid_import = 0.0
202
-
203
- # Determine training phase
204
- is_phase_1 = ep < JOINT_TRAINING_START_EPISODE
205
-
206
- if ep == 1:
207
- print(f"\n--- Starting Phase 1: Training Low-Level Agent Only (up to ep {JOINT_TRAINING_START_EPISODE-1}) ---")
208
- if ep == JOINT_TRAINING_START_EPISODE:
209
- print(f"\n--- Starting Phase 2: Joint Hierarchical Training (from ep {JOINT_TRAINING_START_EPISODE}) ---")
210
-
211
- # The main loop continues as long as the episode is not done
212
- while not done_all:
213
- total_steps += 1
214
- step_count += 1
215
-
216
- # Action Selection (Low-Level)
217
- batch_global_obs = obs_clusters.reshape(n_clusters, -1)
218
- low_level_actions_list = []
219
- low_level_logps_list = []
220
-
221
- for c_idx in range(n_clusters):
222
- agent = low_agents[c_idx]
223
- local_obs_cluster = obs_clusters[c_idx]
224
- global_obs_cluster = batch_global_obs[c_idx]
225
- actions, logps = agent.select_action(local_obs_cluster, global_obs_cluster)
226
- low_level_actions_list.append(actions)
227
- low_level_logps_list.append(logps)
228
-
229
- low_level_actions = np.stack(low_level_actions_list)
230
- low_level_logps = np.stack(low_level_logps_list)
231
-
232
- # Action Selection & Transfers (High-Level, Phase 2 only)
233
- if is_phase_1:
234
- exports, imports = None, None
235
- else:
236
- inter_cluster_obs_local_list = [coordinator.get_cluster_state(se, step_count) for se in cluster_env.cluster_envs]
237
- inter_cluster_obs_local = np.array(inter_cluster_obs_local_list)
238
-
239
- # Create the global state for the high-level agent
240
- inter_cluster_obs_global = inter_cluster_obs_local.flatten()
241
-
242
- # Call select_action with local and global states
243
- high_level_action, high_level_logp = inter_agent.select_action(
244
- inter_cluster_obs_local,
245
- inter_cluster_obs_global
246
- )
247
-
248
- current_reports = {i: {'export_capacity': cluster_env.get_export_capacity(i), 'import_capacity': cluster_env.get_import_capacity(i)} for i in range(n_clusters)}
249
- exports, imports = coordinator.build_transfers(high_level_action, current_reports)
250
-
251
- # Environment Step
252
- next_obs_clusters, rewards, done_all, step_info = cluster_env.step(
253
- low_level_actions, exports=exports, imports=imports
254
- )
255
- cluster_infos = step_info.get("cluster_infos")
256
- day_logs.append({
257
- "costs": cluster_infos["costs"],
258
- "grid_import_no_p2p": cluster_infos["grid_import_no_p2p"],
259
- "charge_amount": cluster_infos.get("charge_amount"),
260
- "discharge_amount": cluster_infos.get("discharge_amount")
261
- })
262
-
263
- # Reward Calculation and Data Storage
264
- per_agent_rewards = np.stack(cluster_infos['agent_rewards'])
265
- rewards_for_buffer = per_agent_rewards
266
-
267
- if not is_phase_1:
268
- transfers_for_logging = (exports, imports)
269
- high_level_rewards_per_cluster = coordinator.compute_inter_cluster_reward(
270
- all_cluster_infos=cluster_infos,
271
- actual_transfers=transfers_for_logging,
272
- step_count=step_count
273
- )
274
- ep_total_inter_cluster_reward += np.sum(high_level_rewards_per_cluster)
275
-
276
- # Get next state for high-level agent's buffer
277
- next_inter_cluster_obs_local_list = [coordinator.get_cluster_state(se, step_count + 1) for se in cluster_env.cluster_envs]
278
- next_inter_cluster_obs_local = np.array(next_inter_cluster_obs_local_list)
279
-
280
- # Create the next global state
281
- next_inter_cluster_obs_global = next_inter_cluster_obs_local.flatten()
282
-
283
- # Store the transition in the high-level MAPPO agent's buffer
284
- inter_agent.store(
285
- inter_cluster_obs_local, # s_local
286
- inter_cluster_obs_global, # s_global
287
- high_level_action, # action
288
- high_level_logp, # log_prob
289
- high_level_rewards_per_cluster,# reward
290
- [done_all] * n_clusters, # done
291
- next_inter_cluster_obs_global # s'_global
292
- )
293
-
294
- bonus_per_agent = np.zeros_like(per_agent_rewards)
295
- for c_idx in range(n_clusters):
296
- num_agents_in_cluster = per_agent_rewards.shape[1]
297
- if num_agents_in_cluster > 0:
298
- bonus = high_level_rewards_per_cluster[c_idx] / num_agents_in_cluster
299
- bonus_per_agent[c_idx, :] = bonus
300
- rewards_for_buffer = per_agent_rewards + bonus_per_agent
301
-
302
- # Data Storage (Low-Level)
303
- dones_list = step_info.get("cluster_dones")
304
- for idx in range(n_clusters):
305
- low_agents[idx].store(
306
- obs_clusters[idx],
307
- batch_global_obs[idx],
308
- low_level_actions[idx],
309
- low_level_logps[idx],
310
- rewards_for_buffer[idx],
311
- dones_list[idx],
312
- next_obs_clusters[idx].reshape(-1)
313
- )
314
-
315
- cluster_rewards += per_agent_rewards
316
- total_cost += np.sum(cluster_infos['costs'])
317
- total_grid_import += np.sum(cluster_infos['grid_import_with_p2p'])
318
- obs_clusters = next_obs_clusters
319
-
320
- # Agent Updates (End of Episode)
321
- if is_phase_1:
322
- for agent in low_agents:
323
- agent.update()
324
- else:
325
- CYCLE_LENGTH = FREEZE_HIGH_FOR_EPISODES + FREEZE_LOW_FOR_EPISODES
326
- phase2_episode_num = ep - JOINT_TRAINING_START_EPISODE
327
- position_in_cycle = phase2_episode_num % CYCLE_LENGTH
328
-
329
- if position_in_cycle < FREEZE_HIGH_FOR_EPISODES:
330
- print(f"Updating ALL LOW-LEVEL agents (High-level is frozen).")
331
- for agent in low_agents:
332
- agent.update()
333
- else:
334
- print(f"Updating HIGH-LEVEL agent (Low-level is frozen).")
335
- inter_agent.update()
336
-
337
- # Unified End-of-Episode Logging
338
- duration = time.time() - start_time
339
- num_low_level_agents = n_clusters * n_agents_per_cluster
340
- get_price_fn = cluster_env.cluster_envs[0].get_grid_price
341
-
342
- # Calculate Costs & Cost Reduction
343
- # Use the recursive helper function to safely sum the broken data
344
- # This is guaranteed to produce a single number for each step
345
- baseline_costs_per_step = [
346
- recursive_sum(entry["grid_import_no_p2p"]) * get_price_fn(i)
347
- for i, entry in enumerate(day_logs)
348
- ]
349
- total_baseline_cost = sum(baseline_costs_per_step)
350
-
351
- # Apply the same robust method to the actual costs
352
- actual_costs_per_step = [recursive_sum(entry["costs"]) for entry in day_logs]
353
- total_actual_cost = sum(actual_costs_per_step)
354
-
355
- cost_reduction_pct = (1 - (total_actual_cost / total_baseline_cost)) * 100 if total_baseline_cost > 0 else 0.0
356
-
357
- # Calculate All Reward Metrics
358
- # Intra-Cluster (Low-Level) Rewards
359
- total_reward_intra = cluster_rewards.sum()
360
- mean_reward_intra = total_reward_intra / num_low_level_agents if num_low_level_agents > 0 else 0.0
361
-
362
- # Inter-Cluster (High-Level) Rewards
363
- total_reward_inter = ep_total_inter_cluster_reward
364
- mean_reward_inter = total_reward_inter / step_count if step_count > 0 else 0.0
365
-
366
- # Total System Rewards
367
- total_reward_system = total_reward_intra + total_reward_inter
368
- mean_reward_system = total_reward_system / num_low_level_agents if num_low_level_agents > 0 else 0.0
369
-
370
- # Populate Logs for Plotting (to keep generate_plots working)
371
- intra_log.setdefault('total', []).append(total_reward_intra)
372
- intra_log.setdefault('mean', []).append(mean_reward_intra)
373
- inter_log.setdefault('total', []).append(total_reward_inter)
374
- inter_log.setdefault('mean', []).append(mean_reward_inter)
375
- total_log.setdefault('total', []).append(total_reward_system)
376
- total_log.setdefault('mean', []).append(mean_reward_system)
377
- cost_log.setdefault('total_cost', []).append(total_actual_cost)
378
- cost_log.setdefault('cost_without_p2p', []).append(total_baseline_cost)
379
-
380
- # Populate the Main Log for the Final CSV File
381
- episode_log_data.append({
382
- "Episode": ep,
383
- "Mean_Reward_System": mean_reward_system,
384
- "Mean_Reward_Intra": mean_reward_intra,
385
- "Mean_Reward_Inter": mean_reward_inter,
386
- "Total_Reward_System": total_reward_system,
387
- "Total_Reward_Intra": total_reward_intra,
388
- "Total_Reward_Inter": total_reward_inter,
389
- "Cost_Reduction_Pct": cost_reduction_pct,
390
- "Episode_Duration": duration,
391
- })
392
-
393
- # Print Final Episode Summary
394
- print(f"Ep {ep}/{NUM_EPISODES} | "
395
- f"Mean System R: {mean_reward_system:.3f} | "
396
- f"Cost Red: {cost_reduction_pct:.1f}% | "
397
- f"Time: {duration:.2f}s")
398
-
399
- if ep % CHECKPOINT_INTERVAL == 0 or ep == NUM_EPISODES:
400
- for c_idx, agent in enumerate(low_agents):
401
- agent.save(os.path.join(models_dir, f"low_cluster{c_idx}_ep{ep}.pth"))
402
- inter_agent.save(os.path.join(models_dir, f"inter_ep{ep}.pth"))
403
- print(f"Saved checkpoint at episode {ep}")
404
-
405
- print("Training completed! Aggregating final logs...")
406
-
407
- # Capture the metrics for the very last episode
408
- final_cluster_metrics = cluster_env.call('get_episode_metrics')
409
- final_system_metrics = {
410
- "grid_reduction_entire_day": sum(m["grid_reduction_entire_day"] for m in final_cluster_metrics),
411
- "grid_reduction_peak_hours": sum(m["grid_reduction_peak_hours"] for m in final_cluster_metrics),
412
- "total_cost_savings": sum(m["total_cost_savings"] for m in final_cluster_metrics),
413
- "battery_degradation_cost_total": sum(m["battery_degradation_cost_total"] for m in final_cluster_metrics),
414
- "fairness_on_cost_savings": np.mean([m["fairness_on_cost_savings"] for m in final_cluster_metrics]),
415
- "Episode": NUM_EPISODES
416
- }
417
- performance_metrics_log.append(final_system_metrics)
418
-
419
- # Create, Merge, and Save Final DataFrame
420
- df_rewards_log = pd.DataFrame(episode_log_data)
421
- df_perf_log = pd.DataFrame(performance_metrics_log)
422
- df_final_log = pd.merge(df_rewards_log, df_perf_log, on="Episode")
423
-
424
- log_csv_path = os.path.join(logs_dir, "training_performance_log.csv")
425
-
426
- # Add total training time to the dataframe before saving
427
- overall_end_time = time.time()
428
- total_duration_seconds = overall_end_time - overall_start_time
429
- total_time_row = pd.DataFrame([{"Episode": "Total_Training_Time", "Episode_Duration": total_duration_seconds}])
430
- df_to_save = pd.concat([df_final_log, total_time_row], ignore_index=True)
431
-
432
- # Reorder and select columns for the final CSV
433
- columns_to_save = [
434
- "Episode",
435
- "Mean_Reward_System",
436
- "Mean_Reward_Intra",
437
- "Mean_Reward_Inter",
438
- "Total_Reward_System",
439
- "Total_Reward_Intra",
440
- "Total_Reward_Inter",
441
- "Cost_Reduction_Pct",
442
- "battery_degradation_cost_total",
443
- "Episode_Duration",
444
- "total_cost_savings",
445
- "grid_reduction_entire_day",
446
- "fairness_on_cost_savings"
447
- ]
448
- df_to_save = df_to_save[[col for col in columns_to_save if col in df_to_save.columns]]
449
- df_to_save.to_csv(log_csv_path, index=False)
450
- print(f"Saved comprehensive training performance log to: {log_csv_path}")
451
-
452
- generate_plots(
453
- plots_dir=plots_dir,
454
- num_episodes=NUM_EPISODES,
455
- intra_log=intra_log,
456
- inter_log=inter_log,
457
- total_log=total_log,
458
- cost_log=cost_log,
459
- df_final_log=df_final_log
460
- )
461
-
462
- overall_end_time = time.time()
463
- total_duration_seconds = overall_end_time - overall_start_time
464
- # Format into hours, minutes, seconds
465
- total_duration_formatted = str(timedelta(seconds=int(total_duration_seconds)))
466
-
467
- print("\n" + "="*50)
468
- print(f"Total Training Time: {total_duration_formatted} (HH:MM:SS)")
469
- print("="*50)
470
-
471
-
472
- def generate_plots(
473
- plots_dir: str,
474
- num_episodes: int,
475
- intra_log: dict,
476
- inter_log: dict,
477
- total_log: dict,
478
- cost_log: list,
479
- df_final_log: pd.DataFrame
480
- ):
481
- """
482
- Generates and saves all final plots after training is complete.
483
- """
484
- print("Training completed! Generating plots…")
485
-
486
- # Helper for moving average
487
- def moving_avg(series, window):
488
- return pd.Series(series).rolling(window=window, center=True, min_periods=1).mean().to_numpy()
489
-
490
- ma_window = 120
491
- episodes = np.arange(1, num_episodes + 1)
492
-
493
- # Plot 1: Intra-cluster (Low-Level) Rewards
494
- fig, ax = plt.subplots(figsize=(12, 7))
495
- ax.plot(episodes, moving_avg(intra_log['total'], ma_window), label=f'Total Reward (MA {ma_window})', linewidth=2)
496
- ax.set_xlabel("Episode")
497
- ax.set_ylabel("Total Intra-Cluster Reward", color='tab:blue')
498
- ax.tick_params(axis='y', labelcolor='tab:blue')
499
- ax.grid(True)
500
-
501
- ax2 = ax.twinx()
502
- ax2.plot(episodes, moving_avg(intra_log['mean'], ma_window), label=f'Mean Reward (MA {ma_window})', linewidth=2, linestyle='--', color='tab:cyan')
503
- ax2.set_ylabel("Mean Intra-Cluster Reward", color='tab:cyan')
504
- ax2.tick_params(axis='y', labelcolor='tab:cyan')
505
-
506
- fig.suptitle("Intra-Cluster (Low-Level Agent) Rewards")
507
- fig.legend(loc="upper left", bbox_to_anchor=(0.1, 0.9))
508
- plt.savefig(os.path.join(plots_dir, "1_intra_cluster_rewards.png"), dpi=200)
509
- plt.close()
510
-
511
- # Plot 2: Inter-cluster (High-Level) Rewards
512
- fig, ax = plt.subplots(figsize=(12, 7))
513
- ax.plot(episodes, moving_avg(inter_log['total'], ma_window), label=f'Total Reward (MA {ma_window})', linewidth=2, color='tab:green')
514
- ax.set_xlabel("Episode")
515
- ax.set_ylabel("Total Inter-Cluster Reward", color='tab:green')
516
- ax.tick_params(axis='y', labelcolor='tab:green')
517
- ax.grid(True)
518
-
519
- ax2 = ax.twinx()
520
- ax2.plot(episodes, moving_avg(inter_log['mean'], ma_window), label=f'Mean Reward (MA {ma_window})', linewidth=2, linestyle='--', color='mediumseagreen')
521
- ax2.set_ylabel("Mean Inter-Cluster Reward", color='mediumseagreen')
522
- ax2.tick_params(axis='y', labelcolor='mediumseagreen')
523
-
524
- fig.suptitle("Inter-Cluster (High-Level Agent) Rewards")
525
- fig.legend(loc="upper left", bbox_to_anchor=(0.1, 0.9))
526
- plt.savefig(os.path.join(plots_dir, "2_inter_cluster_rewards.png"), dpi=200)
527
- plt.close()
528
-
529
- # Plot 3: Total System Rewards
530
- fig, ax = plt.subplots(figsize=(12, 7))
531
- ax.plot(episodes, moving_avg(total_log['total'], ma_window), label=f'Total System Reward (MA {ma_window})', linewidth=2, color='tab:red')
532
- ax.set_xlabel("Episode")
533
- ax.set_ylabel("Total System Reward", color='tab:red')
534
- ax.tick_params(axis='y', labelcolor='tab:red')
535
- ax.grid(True)
536
-
537
- ax2 = ax.twinx()
538
- ax2.plot(episodes, moving_avg(total_log['mean'], ma_window), label=f'Mean System Reward (MA {ma_window})', linewidth=2, linestyle='--', color='salmon')
539
- ax2.set_ylabel("Mean System Reward per Agent", color='salmon')
540
- ax2.tick_params(axis='y', labelcolor='salmon')
541
-
542
- fig.suptitle("Total System Rewards (Intra + Inter)")
543
- fig.legend(loc="upper left", bbox_to_anchor=(0.1, 0.9))
544
- plt.savefig(os.path.join(plots_dir, "3_total_system_rewards.png"), dpi=200)
545
- plt.close()
546
-
547
- # Plot 4: Cost Reduction
548
- cost_df = pd.DataFrame(cost_log)
549
- cost_df['cost_reduction_pct'] = 100 * (1 - (cost_df['total_cost'] / cost_df['cost_without_p2p'])).clip(lower=-np.inf, upper=100)
550
- plt.figure(figsize=(12, 7))
551
- plt.plot(episodes, moving_avg(cost_df['cost_reduction_pct'], ma_window), label=f'Cost Reduction % (MA {ma_window})', color='purple', linewidth=2)
552
- plt.xlabel("Episode")
553
- plt.ylabel("Cost Reduction (%)")
554
- plt.title("Total System-Wide Cost Reduction")
555
- plt.legend()
556
- plt.grid(True)
557
- plt.savefig(os.path.join(plots_dir, "4_cost_reduction.png"), dpi=200)
558
- plt.close()
559
-
560
- df_plot = df_final_log[pd.to_numeric(df_final_log['Episode'], errors='coerce').notna()].copy()
561
- df_plot['Episode'] = pd.to_numeric(df_plot['Episode'])
562
-
563
- # Plot 5: Battery Degradation Cost
564
- plt.figure(figsize=(12, 7))
565
- plt.plot(df_plot["Episode"], moving_avg(df_plot["battery_degradation_cost_total"], ma_window),
566
- label=f'Degradation Cost (MA {ma_window})', color='darkgreen', linewidth=2)
567
- plt.xlabel("Episode")
568
- plt.ylabel("Total Degradation Cost ($)")
569
- plt.title("Total Battery Degradation Cost")
570
- plt.legend()
571
- plt.grid(True)
572
- plt.savefig(os.path.join(plots_dir, "5_battery_degradation_cost.png"), dpi=200)
573
- plt.close()
574
-
575
- print(f"All plots have been saved to: {plots_dir}")
576
-
577
-
578
- if __name__ == "__main__":
579
- main()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
Other_algorithms/HC_MAPPO/cluster.py DELETED
@@ -1,140 +0,0 @@
1
- import os
2
- import sys
3
- import numpy as np
4
- import torch
5
-
6
- # Ensure project root is on the Python path
7
- # Please ensure you follow proper directory structure for running this code
8
- sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
9
-
10
- from Environment.solar_sys_environment import SolarSys
11
- from Environment.cluster_env_wrapper import GlobalPriceVecEnvWrapper
12
- from Environment.cluster_env_wrapper import make_vec_env
13
- class InterClusterLedger:
14
- """
15
- Tracks inter-cluster debts/transfers.
16
- """
17
- def __init__(self):
18
- self.balances = {}
19
-
20
- def record_transfer(self, from_id: str, to_id: str, amount: float):
21
- if from_id == to_id: return
22
- self.balances.setdefault(from_id, {})
23
- self.balances.setdefault(to_id, {})
24
- self.balances[from_id][to_id] = self.balances[from_id].get(to_id, 0.0) - amount
25
- self.balances[to_id][from_id] = self.balances[to_id].get(from_id, 0.0) + amount
26
-
27
- def get_balance(self, a_id: str, b_id: str) -> float:
28
- return self.balances.get(a_id, {}).get(b_id, 0.0)
29
-
30
- def net_balances(self) -> dict:
31
- return self.balances
32
-
33
-
34
- class InterClusterCoordinator:
35
- def __init__(
36
- self,
37
- cluster_env,
38
- high_level_agent,
39
- ledger,
40
- max_transfer_kwh: float = 1000000.0,
41
- w_cost_savings: float = 2.0,
42
- w_grid_penalty: float = 0.3,
43
- w_p2p_bonus: float = 0.3
44
- ):
45
- self.cluster_env = cluster_env
46
- self.agent = high_level_agent
47
- self.ledger = ledger
48
- self.max_transfer_kwh = max_transfer_kwh
49
- self.w_cost_savings = w_cost_savings
50
- self.w_grid_penalty = w_grid_penalty
51
- self.w_p2p_bonus = w_p2p_bonus
52
-
53
- def get_cluster_state(self, env, step_count: int) -> np.ndarray:
54
- """
55
- array summarizing a single cluster's state by reading from its vectorized attributes.
56
- """
57
- solar_env = env # This is one of the vectorized SolarSys envs
58
- idx = min(step_count, solar_env.num_steps - 1)
59
- agg_soc = np.sum(solar_env.battery_soc)
60
- agg_max_capacity = np.sum(solar_env.battery_max_capacity)
61
- agg_soc_fraction = agg_soc / agg_max_capacity if agg_max_capacity > 0 else 0.0
62
-
63
- agg_demand = np.sum(solar_env.demands_day[idx])
64
- agg_solar = np.sum(solar_env.solars_day[idx])
65
-
66
- price = solar_env.get_grid_price(idx)
67
- t_norm = idx / float(solar_env.steps_per_day)
68
-
69
- return np.array([
70
- agg_soc, agg_max_capacity, agg_soc_fraction,
71
- agg_demand, agg_solar, price, t_norm
72
- ], dtype=np.float32)
73
-
74
- def build_transfers(self, agent_action_vector: np.ndarray, reports: dict) -> tuple[np.ndarray, np.ndarray]:
75
- """
76
- Acts as a centralized market maker based on agent actions and LIVE capacity reports.
77
- """
78
- n = len(self.cluster_env.clusters)
79
- raw_export_prefs = agent_action_vector[:, 0]
80
- raw_import_prefs = agent_action_vector[:, 1]
81
-
82
- export_prefs = torch.softmax(torch.tensor(raw_export_prefs), dim=-1).numpy()
83
- import_prefs = torch.softmax(torch.tensor(raw_import_prefs), dim=-1).numpy()
84
-
85
- total_available_for_export = 0.0
86
- potential_exports = np.zeros(n)
87
- for i in range(n):
88
- export_capacity = reports[i]['export_capacity']
89
- pref = float(export_prefs[i])
90
- potential_exports[i] = min(pref * self.max_transfer_kwh, export_capacity)
91
- total_available_for_export += potential_exports[i]
92
-
93
- total_requested_for_import = 0.0
94
- potential_imports = np.zeros(n)
95
- for i in range(n):
96
- import_capacity = reports[i]['import_capacity']
97
- pref = float(import_prefs[i])
98
- potential_imports[i] = min(pref * self.max_transfer_kwh, import_capacity)
99
- total_requested_for_import += potential_imports[i]
100
-
101
- total_matched_energy = min(total_available_for_export, total_requested_for_import)
102
- actual_exports = np.zeros(n)
103
- actual_imports = np.zeros(n)
104
-
105
- if total_matched_energy > 1e-6:
106
- if total_available_for_export > 0:
107
- actual_exports = (potential_exports / total_available_for_export) * total_matched_energy
108
- if total_requested_for_import > 0:
109
- actual_imports = (potential_imports / total_requested_for_import) * total_matched_energy
110
-
111
- return actual_exports, actual_imports
112
-
113
- def compute_inter_cluster_reward(self, all_cluster_infos: dict, actual_transfers: tuple, step_count: int) -> np.ndarray:
114
- """
115
- Computes an INDIVIDUAL reward for each cluster agent to solve
116
- the credit assignment problem.
117
- """
118
- actual_exports, actual_imports = actual_transfers
119
- num_clusters = len(self.cluster_env.cluster_envs)
120
- cluster_rewards = np.zeros(num_clusters, dtype=np.float32)
121
-
122
- # Extract per-cluster cost and import data from the batched info dict
123
- costs_per_cluster = [np.sum(c) for c in all_cluster_infos['costs']]
124
- baseline_imports_per_cluster = [np.sum(imp) for imp in all_cluster_infos['grid_import_no_p2p']]
125
- actual_imports_per_cluster = [np.sum(imp) for imp in all_cluster_infos['grid_import_with_p2p']]
126
-
127
- # Get the single grid price for the current step
128
- grid_price = self.cluster_env.cluster_envs[0].get_grid_price(step_count)
129
-
130
- for i in range(num_clusters):
131
- baseline_cost_this_cluster = baseline_imports_per_cluster[i] * grid_price
132
- actual_cost_this_cluster = costs_per_cluster[i]
133
- cost_saved = baseline_cost_this_cluster - actual_cost_this_cluster
134
- r_savings = self.w_cost_savings * cost_saved
135
- r_grid = self.w_grid_penalty * actual_imports_per_cluster[i]
136
- p2p_volume_this_cluster = actual_exports[i] + actual_imports[i]
137
- r_p2p = self.w_p2p_bonus * p2p_volume_this_cluster
138
- cluster_rewards[i] = r_savings + r_p2p - r_grid
139
-
140
- return cluster_rewards
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
Other_algorithms/HC_MAPPO/mappo/_init_.py DELETED
File without changes
Other_algorithms/HC_MAPPO/mappo/trainer/__init__.py DELETED
File without changes
Other_algorithms/HC_MAPPO/mappo/trainer/mappo.py DELETED
@@ -1,199 +0,0 @@
1
- import torch
2
- import torch.nn as nn
3
- import random
4
- import numpy as np
5
- from torch.distributions import Normal
6
-
7
- if torch.cuda.is_available():
8
- device = torch.device("cuda")
9
- print("Using CUDA (NVIDIA GPU)")
10
- else:
11
- device = torch.device("cpu")
12
- print("Using CPU")
13
-
14
- def set_global_seed(seed: int):
15
- random.seed(seed)
16
- np.random.seed(seed)
17
- torch.manual_seed(seed)
18
- if torch.cuda.is_available():
19
- torch.cuda.manual_seed_all(seed)
20
- torch.backends.cudnn.deterministic = False
21
- torch.backends.cudnn.benchmark = True
22
-
23
- SEED = 42
24
- set_global_seed(SEED)
25
-
26
- class MLP(nn.Module):
27
- def __init__(self, input_dim, hidden_dims, output_dim):
28
- super().__init__()
29
- layers = []
30
- last_dim = input_dim
31
- for h in hidden_dims:
32
- layers += [nn.Linear(last_dim, h), nn.ReLU()]
33
- last_dim = h
34
- layers.append(nn.Linear(last_dim, output_dim))
35
- self.net = nn.Sequential(*layers)
36
-
37
- def forward(self, x):
38
- return self.net(x)
39
-
40
- class Actor(nn.Module):
41
- def __init__(self, obs_dim, act_dim, hidden=(64,64)):
42
- super().__init__()
43
- self.net = MLP(obs_dim, hidden, act_dim)
44
- self.log_std = nn.Parameter(torch.zeros(act_dim))
45
-
46
- def forward(self, x):
47
- mean = self.net(x)
48
- std = torch.exp(self.log_std)
49
- return mean, std
50
-
51
- class Critic(nn.Module):
52
- def __init__(self, state_dim, hidden=(128,128)):
53
- super().__init__()
54
- self.net = MLP(state_dim, hidden, 1)
55
-
56
- def forward(self, x):
57
- return self.net(x).squeeze(-1)
58
-
59
- class MAPPO:
60
- def __init__(
61
- self,
62
- n_agents,
63
- local_dim,
64
- global_dim,
65
- act_dim,
66
- lr=3e-4,
67
- gamma=0.99,
68
- lam=0.95,
69
- clip_eps=0.2,
70
- k_epochs=10,
71
- batch_size=1024,
72
- episode_len=96
73
- ):
74
- self.n_agents = n_agents
75
- self.local_dim = local_dim
76
- self.global_dim = global_dim
77
- self.act_dim = act_dim
78
- self.gamma = gamma
79
- self.lam = lam
80
- self.clip_eps = clip_eps
81
- self.k_epochs = k_epochs
82
- self.batch_size = batch_size
83
- self.episode_len = episode_len
84
-
85
- self.actor = Actor(local_dim, act_dim).to(device)
86
- self.critic = Critic(global_dim).to(device)
87
-
88
- self.opt_a = torch.optim.Adam(self.actor.parameters(), lr=lr)
89
- self.opt_c = torch.optim.Adam(self.critic.parameters(), lr=lr)
90
-
91
- print("MAPPO CUDA AMP is disabled for stability.")
92
-
93
- self.init_buffer()
94
-
95
- def init_buffer(self):
96
- self.ls_buf = np.zeros((self.episode_len, self.n_agents, self.local_dim), dtype=np.float16)
97
- self.gs_buf = np.zeros((self.episode_len, self.global_dim), dtype=np.float16)
98
- self.ac_buf = np.zeros((self.episode_len, self.n_agents, self.act_dim), dtype=np.float16)
99
- self.lp_buf = np.zeros((self.episode_len, self.n_agents), dtype=np.float16)
100
- self.rw_buf = np.zeros((self.episode_len, self.n_agents), dtype=np.float16)
101
- self.done_buf = np.zeros((self.episode_len, self.n_agents), dtype=np.float16)
102
- self.next_gs_buf = np.zeros((self.episode_len, self.global_dim), dtype=np.float16)
103
- self.step_idx = 0
104
-
105
- @torch.no_grad()
106
- def select_action(self, local_obs, global_obs):
107
- l = torch.from_numpy(local_obs).float().to(device)
108
- mean, std = self.actor(l)
109
- dist = Normal(mean, std)
110
- a = dist.sample()
111
- return a.cpu().numpy(), dist.log_prob(a).sum(-1).cpu().numpy()
112
-
113
- def store(self, local_obs, global_obs, action, logp, reward, done, next_global_obs):
114
- if self.step_idx < self.episode_len:
115
- self.ls_buf[self.step_idx] = local_obs
116
- self.gs_buf[self.step_idx] = global_obs
117
- self.ac_buf[self.step_idx] = action
118
- self.lp_buf[self.step_idx] = logp
119
- self.rw_buf[self.step_idx] = reward
120
- self.done_buf[self.step_idx] = done
121
- self.next_gs_buf[self.step_idx] = next_global_obs
122
- self.step_idx += 1
123
-
124
- def compute_gae(self, T, vals):
125
- N = self.n_agents
126
- vals_agent = vals.unsqueeze(1).expand(-1, N).cpu().numpy()
127
- next_vals_agent = np.zeros_like(vals_agent)
128
- next_vals_agent[:-1] = vals_agent[1:]
129
- if not self.done_buf[T-1].all():
130
- with torch.no_grad():
131
- v_last = self.critic(
132
- torch.from_numpy(self.next_gs_buf[T-1]).float().to(device)
133
- ).cpu().item()
134
- next_vals_agent[T-1, :] = v_last
135
- masks = 1.0 - self.done_buf[:T]
136
- rewards = self.rw_buf[:T]
137
- adv = rewards + self.gamma * next_vals_agent * masks - vals_agent
138
- ret = adv + vals_agent
139
- adv_flat = torch.from_numpy(adv.flatten()).to(device)
140
- ret_flat = torch.from_numpy(ret.flatten()).to(device)
141
- return adv_flat, ret_flat
142
-
143
- def update(self):
144
- T = self.step_idx
145
- if T == 0: return
146
-
147
- gs_tensor = torch.from_numpy(self.gs_buf[:T]).float().to(device)
148
- ls_tensor = torch.from_numpy(self.ls_buf[:T]).float().to(device).view(T * self.n_agents, -1)
149
- ac_tensor = torch.from_numpy(self.ac_buf[:T]).float().to(device).view(T * self.n_agents, -1)
150
- lp_tensor = torch.from_numpy(self.lp_buf[:T]).float().to(device).view(-1)
151
-
152
- with torch.no_grad():
153
- vals = self.critic(gs_tensor)
154
-
155
- adv_flat, ret_flat = self.compute_gae(T, vals)
156
- adv_flat = (adv_flat - adv_flat.mean()) / (adv_flat.std() + 1e-8)
157
-
158
- gs_for_batch = gs_tensor.unsqueeze(1).expand(-1, self.n_agents, -1).reshape(T * self.n_agents, self.global_dim)
159
-
160
- dataset = torch.utils.data.TensorDataset(ls_tensor, gs_for_batch, ac_tensor, lp_tensor, adv_flat, ret_flat)
161
- gen = torch.Generator()
162
- gen.manual_seed(SEED)
163
- loader = torch.utils.data.DataLoader(dataset, batch_size=self.batch_size, shuffle=True, generator=gen)
164
-
165
- for _ in range(self.k_epochs):
166
- for b_ls, b_gs, b_ac, b_lp, b_adv, b_ret in loader:
167
- mean, std = self.actor(b_ls)
168
- dist = Normal(mean, std)
169
-
170
- entropy = dist.entropy().mean()
171
-
172
- lp_new = dist.log_prob(b_ac).sum(-1)
173
- ratio = torch.exp(lp_new - b_lp)
174
- surr1 = ratio * b_adv
175
- surr2 = torch.clamp(ratio, 1 - self.clip_eps, 1 + self.clip_eps) * b_adv
176
-
177
- actor_loss = -torch.min(surr1, surr2).mean() - 0.01 * entropy
178
-
179
- self.opt_a.zero_grad()
180
- actor_loss.backward()
181
- self.opt_a.step()
182
-
183
- val_pred = self.critic(b_gs)
184
- critic_loss = nn.MSELoss()(val_pred, b_ret)
185
-
186
- self.opt_c.zero_grad()
187
- critic_loss.backward()
188
- self.opt_c.step()
189
-
190
- self.step_idx = 0
191
-
192
- def save(self, path):
193
- torch.save({'actor': self.actor.state_dict(),
194
- 'critic': self.critic.state_dict()}, path)
195
-
196
- def load(self, path):
197
- data = torch.load(path, map_location=device)
198
- self.actor.load_state_dict(data['actor'])
199
- self.critic.load_state_dict(data['critic'])