Delete SolarSys

Browse files

Files changed (11) hide show

SolarSys/Environment/cluster_env_wrapper.py +0 -164
SolarSys/Environment/solar_sys_environment.py +0 -673
SolarSys/cluster.py +0 -140
SolarSys/cluster_evaluation.py +0 -553
SolarSys/mappo/_init_.py +0 -0
SolarSys/mappo/trainer/__init__.py +0 -0
SolarSys/mappo/trainer/mappo.py +0 -214
SolarSys/meanfield/_init_.py +0 -0
SolarSys/meanfield/trainer/__init__.py +0 -0
SolarSys/meanfield/trainer/meanfield.py +0 -238
SolarSys/training_freezing.py +0 -523

SolarSys/Environment/cluster_env_wrapper.py DELETED Viewed

@@ -1,164 +0,0 @@
-import gym
-import numpy as np
-import math
-import sys
-import os
-import functools
-import pandas as pd
-# Ensure SolarSys Environement is on the Python path
-# Please ensure you follow proper directory structure for running this code
-sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
-from Environment.solar_sys_environment import SolarSys
-def form_clusters(metrics: dict, size: int) -> list:
-    """
-    Forms balanced, heterogeneous clusters by categorizing houses based on their
-    energy profile and distributing them evenly in a round-robin fashion.
-    """
-    house_ids = list(metrics.keys())
-    if not house_ids:
-        return []
-    all_consumption = [m['consumption'] for m in metrics.values()]
-    all_solar = [m['solar'] for m in metrics.values()]
-    median_consumption = np.median(all_consumption) if all_consumption else 0
-    median_solar = np.median(all_solar) if all_solar else 0
-    #Categorize each house based on its profile relative to the median
-    producers = [h for h in house_ids if metrics[h]['solar'] >= median_solar and metrics[h]['consumption'] < median_consumption]
-    consumers = [h for h in house_ids if metrics[h]['solar'] < median_solar and metrics[h]['consumption'] >= median_consumption]
-    prosumers = [h for h in house_ids if metrics[h]['solar'] >= median_solar and metrics[h]['consumption'] >= median_consumption]
-    neutrals = [h for h in house_ids if metrics[h]['solar'] < median_solar and metrics[h]['consumption'] < median_consumption]
-    # Create a master list ordered by category
-    sorted_categorized_houses = producers + consumers + prosumers + neutrals
-    # Add any houses that weren't categorized to ensure none are missed
-    categorized_set = set(sorted_categorized_houses)
-    uncategorized = [h for h in house_ids if h not in categorized_set]
-    final_house_list = sorted_categorized_houses + uncategorized
-    num_houses = len(house_ids)
-    num_clusters = math.ceil(num_houses / size)
-    clusters = [[] for _ in range(num_clusters)]
-    for i, house_id in enumerate(final_house_list):
-        target_cluster_idx = i % num_clusters
-        clusters[target_cluster_idx].append(house_id)
-    return clusters
-class GlobalPriceVecEnvWrapper(gym.vector.VectorEnvWrapper):
-    def __init__(self, env, clusters: list):
-        super().__init__(env)
-        self.clusters = clusters
-        # Expose the underlying SolarSys environments for inspection by the coordinator
-        # self.env.envs gets the list of individual envs from the SyncVectorEnv
-        self.cluster_envs = self.env.envs
-    def step(self, actions: np.ndarray, exports: np.ndarray = None, imports: np.ndarray = None):
-        num_clusters = len(self.cluster_envs)
-        net_transfers = np.zeros(num_clusters)
-        if exports is not None and imports is not None:
-            net_transfers = imports - exports
-        batched_low_level_actions = actions
-        batched_transfers = net_transfers.reshape(-1, 1).astype(np.float32)
-        batched_prices = np.full((num_clusters, 1), -1.0, dtype=np.float32)
-        final_packed_actions_tuple = (batched_low_level_actions, batched_transfers, batched_prices)
-        obs_next, rewards, terminateds, truncateds, infos = self.env.step(final_packed_actions_tuple)
-        dones = terminateds | truncateds
-        done_all = dones.all()
-        if done_all:
-            final_infos = infos['final_info']
-            keys = final_infos[0].keys()
-            infos = {k: np.stack([info[k] for info in final_infos]) for k in keys}
-        info_agg = {
-            "cluster_dones": dones,
-            "cluster_infos": infos,
-        }
-        return obs_next, rewards, done_all, info_agg
-    def get_export_capacity(self, cluster_idx: int) -> float:
-        """Returns the total physically exportable energy from a cluster's batteries and solar in kWh."""
-        cluster_env = self.cluster_envs[cluster_idx]
-        available_from_batt = cluster_env.battery_soc * cluster_env.battery_discharge_efficiency
-        total_exportable = np.sum(available_from_batt) + cluster_env.current_solar
-        return float(total_exportable)
-    def get_import_capacity(self, cluster_idx: int) -> float:
-        """Returns the total physically importable space in a cluster's batteries in kWh."""
-        cluster_env = self.cluster_envs[cluster_idx]
-        free_space = cluster_env.battery_max_capacity - cluster_env.battery_soc
-        total_storable = np.sum(free_space)
-        return float(total_storable)
-    def send_energy(self, from_cluster_idx: int, amount: float) -> float:
-        """Drains 'amount' of energy from the specified cluster (batteries first, then solar)."""
-        cluster_env = self.cluster_envs[from_cluster_idx]
-        return cluster_env.send_energy(amount)
-    def receive_energy(self, to_cluster_idx: int, amount: float) -> float:
-        """Charges batteries in the specified cluster with 'amount' of energy."""
-        cluster_env = self.cluster_envs[to_cluster_idx]
-        return cluster_env.receive_energy(amount)
-def make_vec_env(data_path: str, time_freq: str, cluster_size: int, state: str):
-    print("--- Pre-loading shared dataset for all environments ---")
-    try:
-        shared_df = pd.read_csv(data_path)
-        shared_df["local_15min"] = pd.to_datetime(shared_df["local_15min"], utc=True)
-        shared_df.set_index("local_15min", inplace=True)
-        #  ADD THIS LINE
-        shared_df = shared_df.resample(time_freq).mean()
-        #   ADD THIS LINE
-    except Exception as e:
-        raise ValueError(f"Failed to pre-load data in make_vec_env: {e}")
-    base_env_for_metrics = SolarSys(
-        data_path=data_path,
-        time_freq=time_freq,
-        preloaded_data=shared_df,  # Pass the shared DataFrame here
-        state=state
-    )
-    # This part for calculating metrics and forming clusters
-    metrics = {}
-    for hid in base_env_for_metrics.house_ids:
-        total_consumption = float(
-            np.clip(base_env_for_metrics.original_no_p2p_import[hid], 0.0, None).sum()
-        )
-        total_solar = float(
-            base_env_for_metrics.all_data[f"total_solar_{hid}"].clip(lower=0.0).sum()
-        )
-        metrics[hid] = {'consumption': total_consumption, 'solar': total_solar}
-    clusters = form_clusters(metrics, cluster_size)
-    print(f"Formed {len(clusters)} clusters of size up to {cluster_size}.")
-    # functools.partial to create environment
-    env_fns = []
-    for cluster_house_ids in clusters:
-        preset_env_fn = functools.partial(
-            SolarSys,
-            data_path=data_path,
-            time_freq=time_freq,
-            house_ids_in_cluster=cluster_house_ids,
-            preloaded_data=shared_df,
-            state=state
-        )
-        env_fns.append(preset_env_fn)
-    sync_vec_env = gym.vector.SyncVectorEnv(env_fns)
-    wrapped_vec_env = GlobalPriceVecEnvWrapper(sync_vec_env, clusters=clusters)
-    return wrapped_vec_env

SolarSys/Environment/solar_sys_environment.py DELETED Viewed

@@ -1,673 +0,0 @@
-import gym
-import pandas as pd
-import numpy as np
-from collections import deque
-import random
-from gym.spaces import Tuple, Box
-random.seed(42)
-np.random.seed(42)
-class SolarSys(gym.Env):
-    def __init__(
-        self,
-        data_path="DATA/training/25houses_152days_TRAIN.csv",
-        state="", # Select from 'oklahoma', 'colorado', 'pennsylvania'
-        time_freq="15T",
-        house_ids_in_cluster=None,
-        preloaded_data=None
-    ):
-        super().__init__()  # initialize parent gym.Env
-        self.state = state.lower()
-        # --- Centralized Pricing Configuration ---
-        self._pricing_info = {
-            "oklahoma": {
-                "max_grid_price": 0.2112,
-                "feed_in_tariff": 0.04,
-                "price_function": self._get_oklahoma_price
-            },
-            "colorado": {
-                "max_grid_price": 0.32,
-                "feed_in_tariff": 0.055,
-                "price_function": self._get_colorado_price
-            },
-            "pennsylvania": {
-                "max_grid_price": 0.5505,
-                "feed_in_tariff": 0.06,
-                "price_function": self._get_pennsylvania_price
-            }
-        }
-        if self.state not in self._pricing_info:
-            raise ValueError(f"State '{self.state}' is not supported. Available states: {list(self._pricing_info.keys())}")
-        state_config = self._pricing_info[self.state]
-        self.max_grid_price = state_config["max_grid_price"]
-        self.feed_in_tariff = state_config["feed_in_tariff"]
-        self._get_price_function = state_config["price_function"]
-        self.data_path      = data_path
-        self.time_freq      = time_freq
-        if preloaded_data is not None:
-            all_data = preloaded_data
-            if house_ids_in_cluster:
-                 print(f"Using pre-loaded data for cluster with {len(house_ids_in_cluster)} houses.")
-        else:
-            print(f"Loading data from {data_path}...")
-            try:
-                all_data = pd.read_csv(data_path)
-                all_data["local_15min"] = pd.to_datetime(all_data["local_15min"], utc=True)
-                all_data.set_index("local_15min", inplace=True)
-            except FileNotFoundError:
-                raise FileNotFoundError(f"Data file {data_path} not found.")
-            except pd.errors.EmptyDataError:
-                raise ValueError(f"Data file {data_path} is empty.")
-            except Exception as e:
-                raise ValueError(f"Error loading data: {e}")
-        # Compute global maxima for normalization
-        grid_cols  = [c for c in all_data.columns if c.startswith("grid_")]
-        solar_cols = [c for c in all_data.columns if c.startswith("total_solar_")]
-        all_grid  = all_data[grid_cols].values
-        all_solar = all_data[solar_cols].values
-        # max total demand = max(grid + solar) over all time & agents
-        self.global_max_demand = float((all_grid + all_solar).max()) + 1e-8
-        # max solar generation alone
-        self.global_max_solar  = float(all_solar.max()) + 1e-8
-        # Store the resampled dataset
-        self.all_data = all_data
-        all_house_ids_in_file = [
-            col.split("_")[1] for col in self.all_data.columns
-            if col.startswith("grid_")
-        ]
-        if house_ids_in_cluster:
-            self.house_ids = [hid for hid in house_ids_in_cluster if hid in all_house_ids_in_file]
-        else:
-            self.house_ids = all_house_ids_in_file
-        if not self.house_ids:
-            raise ValueError("No valid house_ids found for this environment instance.")
-        self.env_log_infos = []
-        self.time_freq = time_freq
-        freq_offset = pd.tseries.frequencies.to_offset(time_freq)
-        minutes_per_step = freq_offset.nanos / 1e9 / 60.0
-        self.steps_per_day = int(24 * 60 // minutes_per_step)
-        total_rows = len(self.all_data)
-        self.total_days = total_rows // self.steps_per_day
-        if self.total_days < 1:
-            raise ValueError(
-                f"After resampling, dataset has {total_rows} rows, which is "
-                f"less than a single day of {self.steps_per_day} steps."
-            )
-        self.num_agents = len(self.house_ids)
-        self.original_no_p2p_import = {}
-        for hid in self.house_ids:
-            col_grid = f"grid_{hid}"
-            self.original_no_p2p_import[hid] = self.all_data[col_grid].clip(lower=0.0).values
-        solar_cols = [f"total_solar_{hid}" for hid in self.house_ids]
-        solar_sums = self.all_data[solar_cols].sum(axis=0).to_dict()
-        self.agent_groups = [
-            1 if solar_sums[f"total_solar_{hid}"] > 0 else 0
-            for hid in self.house_ids
-        ]
-        self.group_counts = {
-            0: self.agent_groups.count(0),
-            1: self.agent_groups.count(1)
-        }
-        print(f"Number of houses in each group: {self.group_counts}")
-        #battery logic
-        self.battery_options = {
-            "teslapowerwall": {"max_capacity": 13.5, "charge_efficiency": 0.95, "discharge_efficiency": 0.90, "max_charge_rate": 5.0, "max_discharge_rate": 5.0, "degradation_cost_per_kwh": 0.005},
-            "enphase":         {"max_capacity": 5.0,  "charge_efficiency": 0.95, "discharge_efficiency": 0.90, "max_charge_rate": 2.0, "max_discharge_rate": 2.0, "degradation_cost_per_kwh": 0.005},
-            "franklin":        {"max_capacity": 15.0, "charge_efficiency": 0.95, "discharge_efficiency": 0.90, "max_charge_rate": 6.0, "max_discharge_rate": 6.0, "degradation_cost_per_kwh": 0.005},
-        }
-        self.solar_houses = [
-            hid for hid in self.house_ids
-            if (self.all_data[f"total_solar_{hid}"] > 0).any()
-        ]
-        self.batteries = {}
-        for hid in self.solar_houses:
-            choice = random.choice(list(self.battery_options))
-            specs  = self.battery_options[choice]
-            self.batteries[hid] = {"soc": 0.0, **specs}
-        self.battery_charge_history = {hid: [] for hid in self.batteries}
-        self.battery_discharge_history = {hid: [] for hid in self.batteries}
-        self.battery_capacity = sum(b["max_capacity"] for b in self.batteries.values())
-        self.battery_level    = sum(b["soc"]          for b in self.batteries.values())
-        self.current_solar    = 0.0
-        self.has_battery = np.array([1 if hid in self.batteries else 0 for hid in self.house_ids], dtype=np.float32)
-        # Initialize arrays for all agents, with zeros for non-battery agents
-        self.battery_soc = np.zeros(self.num_agents, dtype=np.float32)
-        self.battery_max_capacity = np.zeros(self.num_agents, dtype=np.float32)
-        self.battery_charge_efficiency = np.zeros(self.num_agents, dtype=np.float32)
-        self.battery_discharge_efficiency = np.zeros(self.num_agents, dtype=np.float32)
-        self.battery_max_charge_rate = np.zeros(self.num_agents, dtype=np.float32)
-        self.battery_max_discharge_rate = np.zeros(self.num_agents, dtype=np.float32)
-        self.battery_degradation_cost = np.zeros(self.num_agents, dtype=np.float32)
-        # Populate the arrays using the created battery dictionary
-        for i, hid in enumerate(self.house_ids):
-            if hid in self.batteries:
-                batt = self.batteries[hid]
-                self.battery_max_capacity[i] = batt["max_capacity"]
-                self.battery_charge_efficiency[i] = batt["charge_efficiency"]
-                self.battery_discharge_efficiency[i] = batt["discharge_efficiency"]
-                self.battery_max_charge_rate[i] = batt["max_charge_rate"]
-                self.battery_max_discharge_rate[i] = batt["max_discharge_rate"]
-                self.battery_degradation_cost[i] = batt["degradation_cost_per_kwh"]
-        # ========== SPACES (Observation & Action) ===================================
-        self.observation_space = gym.spaces.Box(
-            low=-np.inf, high=np.inf,
-            shape=(self.num_agents, 8),
-            dtype=np.float32
-        )
-        self.action_space = Tuple((
-            Box(low=0.0, high=1.0, shape=(self.num_agents, 6), dtype=np.float32),
-            Box(low=-np.inf, high=np.inf, shape=(1,), dtype=np.float32),
-            Box(low=-1.0, high=np.inf, shape=(1,), dtype=np.float32)
-        ))
-        # ========== REWARD FUNCTION PARAMETERS ======================================
-        self.data = None
-        self.env_log = []
-        self.day_index = -1
-        self.current_step = 0
-        self.num_steps = self.steps_per_day
-        self.demands = {}
-        self.solars = {}
-        self.previous_actions = {
-            hid: np.zeros(6) for hid in self.house_ids
-        }
-        self._initialize_episode_metrics()
-    def get_grid_price(self, step_idx):
-        """
-        Returns the grid price for the current step based on the selected state.
-        """
-        return self._get_price_function(step_idx)
-    def _get_oklahoma_price(self, step_idx):
-        minutes_per_step = 24 * 60 / self.steps_per_day
-        hour = int((step_idx * minutes_per_step) // 60) % 24
-        if 14 <= hour < 19:
-            return 0.2112
-        else:
-            return 0.0434
-    def _get_colorado_price(self, step_idx):
-        minutes_per_step = 24 * 60 / self.steps_per_day
-        hour = int((step_idx * minutes_per_step) // 60) % 24
-        if 15 <= hour < 19:
-            return 0.32
-        elif 13 <= hour < 15:
-            return 0.22
-        else:
-            return 0.12
-    def _get_pennsylvania_price(self, step_idx):
-        minutes_per_step = 24 * 60 / self.steps_per_day
-        hour = int((step_idx * minutes_per_step) // 60) % 24
-        if 13 <= hour < 21:
-            return 0.125048
-        elif hour >= 23 or hour < 6:
-            return 0.057014
-        else:
-            return 0.079085
-    def get_peer_price(self, step_idx, total_surplus, total_shortfall):
-        grid_price = self.get_grid_price(step_idx)
-        feed_in_tariff = self.feed_in_tariff
-        # Parameters for arctangent-log pricing
-        p_balance = (grid_price * 0.80) + (feed_in_tariff * 0.20)
-        p_con = (grid_price - feed_in_tariff) / (1.5 * np.pi)
-        k = 1.5
-        epsilon = 1e-6
-        supply = total_surplus + epsilon
-        demand = total_shortfall + epsilon
-        ratio = demand / supply
-        log_ratio = np.log(ratio)
-        if log_ratio < 0:
-            power_term = - (np.abs(log_ratio) ** k)
-        else:
-            power_term = log_ratio ** k
-        price_offset = 2 * np.pi * p_con * np.arctan(power_term)
-        peer_price = p_balance + price_offset
-        final_price = float(np.clip(peer_price, feed_in_tariff, grid_price))
-        return final_price
-    def _initialize_episode_metrics(self):
-        """Initializes or resets all metrics tracked over a single episode (day)."""
-        self.cumulative_grid_reduction = 0.0
-        self.cumulative_grid_reduction_peak = 0.0
-        self.cumulative_degradation_cost = 0.0
-        self.agent_cost_savings = np.zeros(self.num_agents)
-        self.degradation_cost_timeseries = []
-        self.cost_savings_timeseries = []
-        self.grid_reduction_timeseries = []
-    def get_episode_metrics(self):
-        """
-        Returns a dictionary of performance metrics for the last completed episode.
-        """
-        return self.episode_metrics
-   ##########################################################################
-    # Gym Required Methods
-    def reset(self):
-        if self.current_step > 0:
-            positive_savings = self.agent_cost_savings[self.agent_cost_savings > 0]
-            if len(positive_savings) > 1:
-                fairness_on_savings = self._compute_jains_index(positive_savings)
-            else:
-                fairness_on_savings = 0.0
-            self.episode_metrics = {
-                "grid_reduction_entire_day": self.cumulative_grid_reduction,
-                "grid_reduction_peak_hours": self.cumulative_grid_reduction_peak,
-                "total_cost_savings": np.sum(self.agent_cost_savings),
-                "fairness_on_cost_savings": fairness_on_savings,
-                "battery_degradation_cost_total": self.cumulative_degradation_cost,
-                "degradation_cost_over_time": self.degradation_cost_timeseries,
-                "cost_savings_over_time": self.cost_savings_timeseries,
-                "grid_reduction_over_time": self.grid_reduction_timeseries,
-            }
-        self.day_index = np.random.randint(0, self.total_days)
-        start_row = self.day_index * self.steps_per_day
-        end_row = start_row + self.steps_per_day
-        day_data = self.all_data.iloc[start_row:end_row].copy()
-        self.data = day_data
-        self.no_p2p_import_day = {}
-        for hid in self.house_ids:
-            self.no_p2p_import_day[hid] = self.original_no_p2p_import[hid][start_row:end_row]
-        demand_list = []
-        solar_list = []
-        for hid in self.house_ids:
-            col_grid = f"grid_{hid}"
-            col_solar = f"total_solar_{hid}"
-            grid_series = day_data[col_grid].fillna(0.0)
-            solar_series = day_data[col_solar].fillna(0.0).clip(lower=0.0)
-            demand_array = grid_series.values + solar_series.values
-            demand_array = np.clip(demand_array, 0.0, None)
-            demand_list.append(demand_array)
-            solar_list.append(solar_series.values)
-        self.demands_day = np.stack(demand_list, axis=1).astype(np.float32)
-        self.solars_day = np.stack(solar_list, axis=1).astype(np.float32)
-        self.hours_day = (self.data.index.hour + self.data.index.minute / 60.0).values
-        self.current_step = 0
-        self.env_log     = []
-        for hid in self.house_ids:
-            self.previous_actions[hid] = np.zeros(6)
-        lows = 0.30 * self.battery_max_capacity
-        highs = 0.70 * self.battery_max_capacity
-        self.battery_soc = np.random.uniform(low=lows, high=highs)
-        self.battery_soc *= self.has_battery
-        initial_demands = self.demands_day[0]
-        initial_solars = self.solars_day[0]
-        initial_surplus = np.maximum(initial_solars - initial_demands, 0.0).sum()
-        initial_shortfall = np.maximum(initial_demands - initial_solars, 0.0).sum()
-        initial_peer_price = self.get_peer_price(0, initial_surplus, initial_shortfall)
-        obs = self._get_obs(peer_price=initial_peer_price)
-        self._initialize_episode_metrics()
-        return obs, {}
-    def step(self, packed_action):
-        actions, transfer_kwh_arr, peer_price_arr = packed_action
-        inter_cluster_transfer_kwh = float(transfer_kwh_arr[0])
-        override_peer_price_val = float(peer_price_arr[0])
-        override_peer_price = override_peer_price_val if override_peer_price_val >= 0 else None
-        actions = np.array(actions, dtype=np.float32)
-        if actions.shape != (self.num_agents, 6):
-            raise ValueError(f"Actions shape mismatch: got {actions.shape}, expected {(self.num_agents, 6)}")
-        actions = np.clip(actions, 0.0, 1.0)
-        a_sellGrid      = actions[:, 0]
-        a_buyGrid       = actions[:, 1]
-        a_sellPeers     = actions[:, 2]
-        a_buyPeers      = actions[:, 3]
-        a_chargeBatt    = actions[:, 4]
-        a_dischargeBatt = actions[:, 5]
-        demands = self.demands_day[self.current_step]
-        solars  = self.solars_day[self.current_step]
-        total_surplus   = np.maximum(solars  - demands, 0.0).sum()
-        total_shortfall = np.maximum(demands - solars,  0.0).sum()
-        self.current_solar = total_surplus
-        if override_peer_price is not None:
-            peer_price = override_peer_price
-        else:
-            peer_price = self.get_peer_price(
-                self.current_step,
-                total_surplus,
-                total_shortfall
-            )
-        grid_price = self.get_grid_price(self.current_step)
-        shortfall = np.maximum(demands - solars, 0.0)
-        surplus   = np.maximum(solars  - demands, 0.0)
-        final_shortfall = shortfall.copy()
-        final_surplus   = surplus.copy()
-        grid_import     = np.zeros(self.num_agents, dtype=np.float32)
-        grid_export     = np.zeros(self.num_agents, dtype=np.float32)
-        # ### VECTORIZED BATTERY DISCHARGE ###
-        available_from_batt = self.battery_soc * self.battery_discharge_efficiency
-        desired_discharge = a_dischargeBatt * self.battery_max_discharge_rate
-        discharge_amount = np.minimum.reduce([desired_discharge, available_from_batt, final_shortfall])
-        discharge_amount *= self.has_battery # Ensure only batteries discharge
-        # Update SOC (energy drawn from battery before efficiency loss)
-        self.battery_soc -= (discharge_amount / (self.battery_discharge_efficiency + 1e-9)) * self.has_battery
-        self.battery_soc = np.maximum(0.0, self.battery_soc)
-        final_shortfall -= discharge_amount
-        cap_left = self.battery_max_capacity - self.battery_soc
-        desired_charge = a_chargeBatt * self.battery_max_charge_rate
-        charge_amount = np.minimum.reduce([
-            desired_charge,
-            cap_left / (self.battery_charge_efficiency + 1e-9),
-            final_surplus
-        ])
-        charge_amount *= self.has_battery
-        # Update SOC
-        self.battery_soc += charge_amount * self.battery_charge_efficiency
-        final_surplus -= charge_amount
-        # ### VECTORIZED P2P TRADING ###
-        battery_offer = (self.battery_soc * self.battery_discharge_efficiency) * self.has_battery
-        effective_surplus = final_surplus + battery_offer
-        netPeer = a_buyPeers - a_sellPeers
-        p2p_buy_request = np.maximum(0, netPeer) * final_shortfall
-        p2p_sell_offer = np.maximum(0, -netPeer) * effective_surplus
-        total_sell = np.sum(p2p_sell_offer)
-        total_buy  = np.sum(p2p_buy_request)
-        matched    = min(total_sell, total_buy)
-        if matched > 1e-9:
-            sell_fraction = p2p_sell_offer / (total_sell + 1e-12)
-            buy_fraction  = p2p_buy_request / ( total_buy + 1e-12)
-            actual_sold   = matched * sell_fraction
-            actual_bought = matched * buy_fraction
-        else:
-            actual_sold   = np.zeros(self.num_agents, dtype=np.float32)
-            actual_bought = np.zeros(self.num_agents, dtype=np.float32)
-        from_batt = np.minimum(actual_sold, battery_offer)
-        from_solar = actual_sold - from_batt
-        final_surplus -= from_solar
-        final_shortfall -= actual_bought
-        soc_reduction = (from_batt / (self.battery_discharge_efficiency + 1e-9)) * self.has_battery
-        self.battery_soc -= soc_reduction
-        self.battery_soc = np.maximum(0.0, self.battery_soc)
-        if inter_cluster_transfer_kwh > 0:
-            amount_received = inter_cluster_transfer_kwh
-            total_shortfall_in_cluster = np.sum(final_shortfall)
-            if total_shortfall_in_cluster > 1e-6:
-                to_cover_shortfall = min(amount_received, total_shortfall_in_cluster)
-                distribution_ratio = final_shortfall / total_shortfall_in_cluster
-                shortfall_reduction = distribution_ratio * to_cover_shortfall
-                final_shortfall -= shortfall_reduction
-                amount_received -= to_cover_shortfall
-            if amount_received > 1e-6:
-                cap_left = self.battery_max_capacity - self.battery_soc
-                storable_energy = cap_left / (self.battery_charge_efficiency + 1e-9)
-                total_storable_in_cluster = np.sum(storable_energy * self.has_battery)
-                if total_storable_in_cluster > 1e-6:
-                    to_store = min(amount_received, total_storable_in_cluster)
-                    storage_ratio = storable_energy / total_storable_in_cluster
-                    energy_to_store_per_batt = storage_ratio * to_store
-                    self.battery_soc += (energy_to_store_per_batt * self.battery_charge_efficiency) * self.has_battery
-        elif inter_cluster_transfer_kwh < 0:
-            amount_to_send = abs(inter_cluster_transfer_kwh)
-            total_surplus_in_cluster = np.sum(final_surplus)
-            if total_surplus_in_cluster > 1e-6:
-                sent_from_surplus = min(amount_to_send, total_surplus_in_cluster)
-                draw_ratio = final_surplus / total_surplus_in_cluster
-                surplus_reduction = draw_ratio * sent_from_surplus
-                final_surplus -= surplus_reduction
-                amount_to_send -= sent_from_surplus
-            if amount_to_send > 1e-6:
-                available_from_batt = (self.battery_soc * self.battery_discharge_efficiency) * self.has_battery
-                total_available_from_batt = np.sum(available_from_batt)
-                if total_available_from_batt > 1e-6:
-                    # Discharge a maximum of 'amount_to_send' from batteries
-                    to_discharge = min(amount_to_send, total_available_from_batt)
-                    # Draw this amount proportionally from each available battery
-                    discharge_ratio = available_from_batt / total_available_from_batt
-                    discharged_per_batt = discharge_ratio * to_discharge # This is effective energy
-                    # Update SoC (energy drawn from battery before efficiency loss)
-                    soc_reduction = (discharged_per_batt / (self.battery_discharge_efficiency + 1e-9))
-                    self.battery_soc -= soc_reduction * self.has_battery
-                    self.battery_soc = np.maximum(0.0, self.battery_soc)
-        # =======================================================================
-        netGrid = a_buyGrid - a_sellGrid
-        grid_import = np.maximum(0, netGrid) * final_shortfall
-        grid_export = np.maximum(0, -netGrid) * final_surplus
-        forced = np.maximum(final_shortfall - grid_import, 0.0)
-        grid_import += forced
-        final_shortfall -= forced
-        feed_in_tariff = self.feed_in_tariff
-        costs = (
-            (grid_import * grid_price)
-            - (grid_export * feed_in_tariff)
-            + (actual_bought * peer_price)
-            - (actual_sold * peer_price)
-        )
-        final_rewards = self._compute_rewards(
-            grid_import=grid_import, grid_export=grid_export, actual_sold=actual_sold,
-            actual_bought=actual_bought, charge_amount=charge_amount, discharge_amount=discharge_amount,
-            costs=costs, grid_price=grid_price, peer_price=peer_price
-        )
-        no_p2p_import_this_step = np.array([
-            self.no_p2p_import_day[hid][self.current_step]
-            for hid in self.house_ids
-        ], dtype=np.float32)
-        # --- Metric 1 & 2: Grid Reduction (Entire Day & Peak Hours) ---
-        step_grid_reduction = np.sum(no_p2p_import_this_step - grid_import)
-        self.cumulative_grid_reduction += step_grid_reduction
-        self.grid_reduction_timeseries.append(step_grid_reduction)
-        if grid_price >= self.max_grid_price * 0.99:
-            self.cumulative_grid_reduction_peak += step_grid_reduction
-        # --- Metric 3: Total Cost Savings ---
-        cost_no_p2p = no_p2p_import_this_step * grid_price
-        step_cost_savings_per_agent = cost_no_p2p - costs
-        self.agent_cost_savings += step_cost_savings_per_agent
-        self.cost_savings_timeseries.append(np.sum(step_cost_savings_per_agent))
-        # --- Metric 5 & 6: Battery Degradation Cost (Total and Over Time) ---
-        degradation_cost_agent = (charge_amount + discharge_amount) * self.battery_degradation_cost
-        step_degradation_cost = np.sum(degradation_cost_agent)
-        self.cumulative_degradation_cost += step_degradation_cost
-        self.degradation_cost_timeseries.append(step_degradation_cost)
-        info = {
-            "p2p_buy": actual_bought,
-            "p2p_sell": actual_sold,
-            "grid_import_with_p2p": grid_import,
-            "grid_import_no_p2p": no_p2p_import_this_step,
-            "grid_export": grid_export,
-            "costs": costs,
-            "charge_amount": charge_amount,
-            "discharge_amount": discharge_amount,
-            "step": self.current_step,
-            "step_grid_reduction": step_grid_reduction,
-            "step_cost_savings": np.sum(step_cost_savings_per_agent),
-            "step_degradation_cost": step_degradation_cost,
-        }
-        self.env_log.append([
-            self.current_step, np.sum(grid_import), np.sum(grid_export),
-            np.sum(actual_bought), np.sum(actual_sold), np.sum(costs)
-        ])
-        self.current_step += 1
-        terminated = False
-        truncated = (self.current_step >= self.num_steps)
-        obs_next = self._get_obs(peer_price=peer_price)
-        info['agent_rewards'] = final_rewards
-        self.last_info = info
-        self.env_log_infos.append(info)
-        return obs_next, final_rewards.sum(), terminated, truncated, info
-    def _get_obs(self, peer_price: float):
-        step = min(self.current_step, self.num_steps - 1)
-        demands = self.demands_day[step]
-        solars  = self.solars_day[step]
-        grid_price = self.get_grid_price(step)
-        hour = self.hours_day[step]
-        soc_frac = self.battery_soc / (self.battery_max_capacity + 1e-9)
-        soc_frac = np.where(self.has_battery == 1, soc_frac, -1.0)
-        total_demand_others = demands.sum() - demands
-        total_solar_others = solars.sum() - solars
-        obs = np.stack([
-            demands,
-            solars,
-            soc_frac,
-            np.full(self.num_agents, grid_price),
-            np.full(self.num_agents, peer_price),
-            total_demand_others,
-            total_solar_others,
-            np.full(self.num_agents, hour)
-        ], axis=1).astype(np.float32)
-        return obs
-    def _compute_jains_index(self, usage_array):
-        x = np.array(usage_array, dtype=np.float32)
-        numerator = (np.sum(x))**2
-        denominator = len(x) * np.sum(x**2) + 1e-8
-        return numerator / denominator
-    def _compute_rewards(
-        self, grid_import, grid_export, actual_sold, actual_bought,
-        charge_amount, discharge_amount, costs, grid_price, peer_price
-    ):
-        w1 = 0.3; w2 = 0.5; w3 = 0.5; w4 = 0.1; w5 = 0.05; w6 = 0.4; w7 = 1.0
-        p_grid_norm = grid_price / self.max_grid_price
-        p_peer_norm = peer_price / self.max_grid_price
-        rewards = -costs * w7
-        rewards -= w1 * grid_import * p_grid_norm
-        rewards += w2 * actual_sold * p_peer_norm
-        buy_bonus = w3 * actual_bought * ((grid_price - peer_price) / self.max_grid_price)
-        rewards += np.where(peer_price < grid_price, buy_bonus, 0.0)
-        # ### VECTORIZED REWARD PENALTIES ###
-        soc_frac = self.battery_soc / (self.battery_max_capacity + 1e-9)
-        soc_penalties = w4 * ((soc_frac - 0.5) ** 2) * self.has_battery
-        degrad_penalties = w5 * (charge_amount + discharge_amount) * self.battery_degradation_cost
-        rewards -= soc_penalties
-        rewards -= degrad_penalties
-        jfi = self._compute_jains_index(actual_bought + actual_sold)
-        rewards += w6 * jfi
-        return rewards
-    def save_log(self, filename="env_log.csv"):
-        columns = [
-            "Step", "Total_Grid_Import", "Total_Grid_Export",
-            "Total_P2P_Buy", "Total_P2P_Sell", "Total_Cost",
-        ]
-        df = pd.DataFrame(self.env_log, columns=columns)
-        df.to_csv(filename, index=False)
-        print(f"Environment log saved to {filename}")

SolarSys/cluster.py DELETED Viewed

@@ -1,140 +0,0 @@
-import os
-import sys
-import numpy as np
-import torch
-# Ensure project root is on the Python path
-# Please ensure you follow proper directory structure for running this code
-sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
-from Environment.solar_sys_environment import SolarSys
-from Environment.cluster_env_wrapper import GlobalPriceVecEnvWrapper
-from Environment.cluster_env_wrapper import make_vec_env
-class InterClusterLedger:
-    """
-    Tracks inter-cluster debts/transfers.
-    """
-    def __init__(self):
-        self.balances = {}
-    def record_transfer(self, from_id: str, to_id: str, amount: float):
-        if from_id == to_id: return
-        self.balances.setdefault(from_id, {})
-        self.balances.setdefault(to_id, {})
-        self.balances[from_id][to_id] = self.balances[from_id].get(to_id, 0.0) - amount
-        self.balances[to_id][from_id] = self.balances[to_id].get(from_id, 0.0) + amount
-    def get_balance(self, a_id: str, b_id: str) -> float:
-        return self.balances.get(a_id, {}).get(b_id, 0.0)
-    def net_balances(self) -> dict:
-        return self.balances
-class InterClusterCoordinator:
-    def __init__(
-        self,
-        cluster_env,
-        high_level_agent,
-        ledger,
-        max_transfer_kwh: float = 1000000.0,
-        w_cost_savings: float = 2.0,
-        w_grid_penalty: float = 0.3,
-        w_p2p_bonus: float = 0.3
-    ):
-        self.cluster_env = cluster_env
-        self.agent = high_level_agent
-        self.ledger = ledger
-        self.max_transfer_kwh = max_transfer_kwh
-        self.w_cost_savings = w_cost_savings
-        self.w_grid_penalty = w_grid_penalty
-        self.w_p2p_bonus = w_p2p_bonus
-    def get_cluster_state(self, env, step_count: int) -> np.ndarray:
-        """
-         array summarizing a single cluster's state by reading from its vectorized attributes.
-        """
-        solar_env = env # This is one of the vectorized SolarSys envs
-        idx = min(step_count, solar_env.num_steps - 1)
-        agg_soc = np.sum(solar_env.battery_soc)
-        agg_max_capacity = np.sum(solar_env.battery_max_capacity)
-        agg_soc_fraction = agg_soc / agg_max_capacity if agg_max_capacity > 0 else 0.0
-        agg_demand = np.sum(solar_env.demands_day[idx])
-        agg_solar  = np.sum(solar_env.solars_day[idx])
-        price = solar_env.get_grid_price(idx)
-        t_norm = idx / float(solar_env.steps_per_day)
-        return np.array([
-            agg_soc, agg_max_capacity, agg_soc_fraction,
-            agg_demand, agg_solar, price, t_norm
-        ], dtype=np.float32)
-    def build_transfers(self, agent_action_vector: np.ndarray, reports: dict) -> tuple[np.ndarray, np.ndarray]:
-        """
-        Acts as a centralized market maker based on agent actions and LIVE capacity reports.
-        """
-        n = len(self.cluster_env.clusters)
-        raw_export_prefs = agent_action_vector[:, 0]
-        raw_import_prefs = agent_action_vector[:, 1]
-        export_prefs = torch.softmax(torch.tensor(raw_export_prefs), dim=-1).numpy()
-        import_prefs = torch.softmax(torch.tensor(raw_import_prefs), dim=-1).numpy()
-        total_available_for_export = 0.0
-        potential_exports = np.zeros(n)
-        for i in range(n):
-            export_capacity = reports[i]['export_capacity']
-            pref = float(export_prefs[i])
-            potential_exports[i] = min(pref * self.max_transfer_kwh, export_capacity)
-            total_available_for_export += potential_exports[i]
-        total_requested_for_import = 0.0
-        potential_imports = np.zeros(n)
-        for i in range(n):
-            import_capacity = reports[i]['import_capacity']
-            pref = float(import_prefs[i])
-            potential_imports[i] = min(pref * self.max_transfer_kwh, import_capacity)
-            total_requested_for_import += potential_imports[i]
-        total_matched_energy = min(total_available_for_export, total_requested_for_import)
-        actual_exports = np.zeros(n)
-        actual_imports = np.zeros(n)
-        if total_matched_energy > 1e-6:
-            if total_available_for_export > 0:
-                actual_exports = (potential_exports / total_available_for_export) * total_matched_energy
-            if total_requested_for_import > 0:
-                actual_imports = (potential_imports / total_requested_for_import) * total_matched_energy
-        return actual_exports, actual_imports
-    def compute_inter_cluster_reward(self, all_cluster_infos: dict, actual_transfers: tuple, step_count: int) -> np.ndarray:
-        """
-        Computes an INDIVIDUAL reward for each cluster agent to solve
-        the credit assignment problem.
-        """
-        actual_exports, actual_imports = actual_transfers
-        num_clusters = len(self.cluster_env.cluster_envs)
-        cluster_rewards = np.zeros(num_clusters, dtype=np.float32)
-        # Extract per-cluster cost and import data from the batched info dict
-        costs_per_cluster = [np.sum(c) for c in all_cluster_infos['costs']]
-        baseline_imports_per_cluster = [np.sum(imp) for imp in all_cluster_infos['grid_import_no_p2p']]
-        actual_imports_per_cluster = [np.sum(imp) for imp in all_cluster_infos['grid_import_with_p2p']]
-        # Get the single grid price for the current step
-        grid_price = self.cluster_env.cluster_envs[0].get_grid_price(step_count)
-        for i in range(num_clusters):
-            baseline_cost_this_cluster = baseline_imports_per_cluster[i] * grid_price
-            actual_cost_this_cluster = costs_per_cluster[i]
-            cost_saved = baseline_cost_this_cluster - actual_cost_this_cluster
-            r_savings = self.w_cost_savings * cost_saved
-            r_grid = self.w_grid_penalty * actual_imports_per_cluster[i]
-            p2p_volume_this_cluster = actual_exports[i] + actual_imports[i]
-            r_p2p = self.w_p2p_bonus * p2p_volume_this_cluster
-            cluster_rewards[i] = r_savings + r_p2p - r_grid
-        return cluster_rewards

SolarSys/cluster_evaluation.py DELETED Viewed

@@ -1,553 +0,0 @@
-import os
-import sys
-import time
-from datetime import datetime
-import re
-import numpy as np
-import torch
-import pandas as pd
-import matplotlib.pyplot as plt
-import glob
-sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
-from cluster import InterClusterCoordinator, InterClusterLedger
-from Environment.cluster_env_wrapper import make_vec_env
-from mappo.trainer.mappo import MAPPO
-from meanfield.trainer.meanfield import MFAC
-# ─── Jain's fairness index ────────────────────────────────────
-def compute_jains_fairness(values: np.ndarray) -> float:
-    if len(values) == 0:
-        return 0.0
-    if np.all(values == 0):
-        return 1.0
-    num = (values.sum())**2
-    den = len(values) * (values**2).sum() + 1e-8
-    return float(num / den)
-def main():
-    # ─── Configuration ─────────────────────────────────────────
-    DATA_PATH     = "DATA/testing/50houses_30days_TEST.csv"
-    MODEL_DIR     = "Training/hierarchical_pennsylvania_100agents_10size_10000eps/models"
-    # --- Auto-detect state from model path ---
-    state_match = re.search(r"hierarchical_(oklahoma|colorado|pennsylvania)_", MODEL_DIR)
-    if not state_match:
-        state_match = re.search(r"mappo_(oklahoma|colorado|pennsylvania)_", MODEL_DIR)
-    if not state_match:
-        raise ValueError(
-            "Could not automatically detect the state (oklahoma, colorado, or pennsylvania) "
-            "from the model directory path. Please ensure the path contains the state name."
-        )
-    detected_state = state_match.group(1)
-    print(f"--- Detected state: {detected_state.upper()} ---")
-    cluster_size_match = re.search(r'(\d+)size_', MODEL_DIR)
-    if not cluster_size_match:
-        raise ValueError(
-            "Could not automatically detect the cluster size from the model directory path. "
-            "Please ensure the path contains a pattern like '5size_' or '10size_'."
-        )
-    detected_cluster_size = int(cluster_size_match.group(1))
-    print(f"--- Detected cluster size: {detected_cluster_size} ---")
-    DAYS_TO_EVALUATE = 30
-    SOLAR_THRESHOLD = 0.1
-    MAX_TRANSFER_KWH = 1000000.0
-    W_COST_SAVINGS = 1.0
-    W_GRID_PENALTY = 0.5
-    W_P2P_BONUS    = 0.2
-    # ─── Environment Setup ──────────────────────────────────────
-    cluster_env = make_vec_env(
-        data_path=DATA_PATH,
-        time_freq="15T",
-        cluster_size=detected_cluster_size,
-        state=detected_state
-    )
-    n_clusters = cluster_env.num_envs
-    sample_subenv = cluster_env.cluster_envs[0]
-    eval_num_steps = sample_subenv.num_steps
-    print(f"Number of steps per day: {eval_num_steps}")
-    # Get dimensions from the new vectorized environment object.
-    n_agents_per_cluster = sample_subenv.num_agents
-    local_dim  = sample_subenv.observation_space.shape[-1]
-    global_dim = n_agents_per_cluster * local_dim
-    act_dim    = sample_subenv.action_space[0].shape[-1]
-    print(f"Creating and loading {n_clusters} independent low-level MAPPO agents...")
-    low_agents = []
-    for i in range(n_clusters):
-        # Create the agent instance
-        agent = MAPPO(
-            n_agents   = n_agents_per_cluster,
-            local_dim  = local_dim,
-            global_dim = global_dim,
-            act_dim    = act_dim,
-            lr=2e-4, gamma=0.95, lam=0.95, clip_eps=0.2, k_epochs=4, batch_size=512, episode_len=96
-        )
-        ckpt_pattern = os.path.join(MODEL_DIR, f"low_cluster{i}_ep*.pth")
-        ckpts_low = glob.glob(ckpt_pattern)
-        if not ckpts_low:
-            raise FileNotFoundError(f"No checkpoint found for cluster {i} with pattern: {ckpt_pattern}")
-        latest_low = sorted(ckpts_low, key=lambda x: int(re.search(r'ep(\d+)\.pth$', x).group(1)))[-1]
-        print(f"Loading low-level policy for cluster {i} from: {latest_low}")
-        agent.load(latest_low)
-        agent.actor.eval()
-        agent.critic.eval()
-        low_agents.append(agent)
-    timestamp     = datetime.now().strftime("%Y%m%d_%H%M%S")
-    num_agents    = sum(subenv.num_agents for subenv in cluster_env.cluster_envs)
-    run_name      = f"eval_vectorized_{num_agents}agents_{DAYS_TO_EVALUATE}days_{timestamp}"
-    output_folder = os.path.join("runs_final_vectorized_eval", run_name)
-    logs_dir      = os.path.join(output_folder, "logs")
-    plots_dir     = os.path.join(output_folder, "plots")
-    for d in (logs_dir, plots_dir):
-        os.makedirs(d, exist_ok=True)
-    print(f"Saving evaluation outputs to: {output_folder}")
-    OBS_DIM_HI_LOCAL = 7
-    act_dim_inter = 2
-    print(f"Initializing evaluation inter-agent (MFAC): n_agents={n_clusters}, "
-          f"local_dim={OBS_DIM_HI_LOCAL}, act_dim={act_dim_inter}")
-    inter_agent = MFAC(
-        n_agents=n_clusters, local_dim=OBS_DIM_HI_LOCAL, act_dim=act_dim_inter,
-        lr=2e-4, gamma=0.95, lam=0.95, clip_eps=0.2, k_epochs=4, batch_size=512, episode_len= 96
-    )
-    ckpts_inter = glob.glob(os.path.join(MODEL_DIR, "inter_ep*.pth"))
-    if not ckpts_inter:
-        raise FileNotFoundError(f"No high-level checkpoints (inter_ep*.pth) in {MODEL_DIR}")
-    latest_inter = sorted(ckpts_inter)[-1]
-    print("Loading inter-cluster policy from", latest_inter)
-    inter_agent.load(latest_inter)
-    inter_agent.actor.eval()
-    inter_agent.critic.eval()
-    ledger = InterClusterLedger()
-    coordinator = InterClusterCoordinator(
-        cluster_env, inter_agent, ledger, max_transfer_kwh=MAX_TRANSFER_KWH,
-        w_cost_savings=W_COST_SAVINGS, w_grid_penalty=W_GRID_PENALTY, w_p2p_bonus=W_P2P_BONUS
-    )
-    # ─── Data collectors ───────────────────────────────────────
-    all_logs = []
-    daily_summaries = []
-    step_timing_list = []
-    # === Per-day evaluation ===
-    evaluation_start = time.time()
-    for day in range(1, DAYS_TO_EVALUATE + 1):
-        obs_clusters, _ = cluster_env.reset()
-        done_all = False
-        step_count = 0
-        day_logs = []
-        while not done_all and step_count < eval_num_steps:
-            step_start_time = time.time()
-            step_count += 1
-            # 1) Get high-level actions
-            inter_cluster_obs_local_list = [coordinator.get_cluster_state(se, step_count) for se in cluster_env.cluster_envs]
-            inter_cluster_obs_local = np.array(inter_cluster_obs_local_list)
-            with torch.no_grad():
-                high_level_action, _ = inter_agent.select_action(inter_cluster_obs_local)
-            # 2) Build transfers
-            current_reports = {i: {'export_capacity': cluster_env.get_export_capacity(i), 'import_capacity': cluster_env.get_import_capacity(i)} for i in range(n_clusters)}
-            exports, imports = coordinator.build_transfers(high_level_action, current_reports)
-            # 3) Get low-level actions
-            batch_global_obs = obs_clusters.reshape(n_clusters, -1)
-            with torch.no_grad():
-                low_level_actions_list = []
-                # Loop through each cluster to get actions from its dedicated agent
-                for c_idx in range(n_clusters):
-                    agent = low_agents[c_idx]
-                    local_obs_cluster = obs_clusters[c_idx]
-                    global_obs_cluster = batch_global_obs[c_idx]
-                    actions, _ = agent.select_action(local_obs_cluster, global_obs_cluster)
-                    low_level_actions_list.append(actions)
-                low_level_actions = np.stack(low_level_actions_list)
-            next_obs, rewards, done_all, step_info = cluster_env.step(
-                low_level_actions,
-                exports=exports,
-                imports=imports
-            )
-            obs_clusters = next_obs
-            # 4) Log step timing
-            step_duration = time.time() - step_start_time
-            print(f"[Day {day}, Step {step_count}] Step time: {step_duration:.6f} seconds")
-            step_timing_list.append({"day": day, "step": step_count, "step_time_s": step_duration})
-            # --- Consolidated Logging ---
-            infos = step_info.get("cluster_infos")
-            for c_idx, subenv in enumerate(cluster_env.cluster_envs):
-                grid_price_now = subenv.get_grid_price(step_count - 1)
-                peer_price_now = step_info.get("peer_price_global")
-                if peer_price_now is None:
-                    demands_step = subenv.demands_day[step_count-1]
-                    solars_step = subenv.solars_day[step_count-1]
-                    surplus = np.maximum(solars_step - demands_step, 0.0).sum()
-                    shortfall = np.maximum(demands_step - solars_step, 0.0).sum()
-                    peer_price_now = subenv.get_peer_price(step_count -1, surplus, shortfall)
-                for i, hid in enumerate(subenv.house_ids):
-                    is_battery_house = hid in subenv.batteries
-                    charge = infos["charge_amount"][c_idx][i]
-                    discharge = infos["discharge_amount"][c_idx][i]
-                    day_logs.append({
-                        "day":                    day,
-                        "step":                   step_count - 1,
-                        "house":                  hid,
-                        "cluster":                c_idx,
-                        "grid_import_no_p2p":     infos["grid_import_no_p2p"][c_idx][i],
-                        "grid_import_with_p2p":   infos["grid_import_with_p2p"][c_idx][i],
-                        "grid_export":            infos["grid_export"][c_idx][i],
-                        "p2p_buy":                infos["p2p_buy"][c_idx][i],
-                        "p2p_sell":               infos["p2p_sell"][c_idx][i],
-                        "actual_cost":            infos["costs"][c_idx][i],
-                        "baseline_cost":          infos["grid_import_no_p2p"][c_idx][i] * grid_price_now,
-                        "total_demand":           subenv.demands_day[step_count-1, i],
-                        "total_solar":            subenv.solars_day[step_count-1, i],
-                        "grid_price":             grid_price_now,
-                        "peer_price":             peer_price_now,
-                        "soc":                    (subenv.battery_soc[i] / subenv.battery_max_capacity[i]) if is_battery_house and subenv.battery_max_capacity[i] > 0 else np.nan,
-                        "degradation_cost":       (charge + discharge) * subenv.battery_degradation_cost[i] if is_battery_house else 0.0,
-                        "reward":                 infos["agent_rewards"][c_idx][i],
-                    })
-            step_duration = time.time() - step_start_time  # Calculate step_duration
-        # ── End of day: aggregate & summarize ────────
-        df_day = pd.DataFrame(day_logs)
-        if df_day.empty:
-            continue
-        all_logs.extend(day_logs)
-       # === CONSOLIDATED DAILY SUMMARY CALCULATION ======================
-        num_solar_houses = df_day[df_day['total_solar'] > 0]['house'].nunique()
-        if num_solar_houses > 0:
-            num_agents_in_day = df_day['house'].nunique()
-            agg_solar_per_step = df_day.groupby("step")["total_solar"].sum()
-            sunny_steps_mask = agg_solar_per_step > (SOLAR_THRESHOLD * num_agents_in_day)
-            sunny_steps = sunny_steps_mask[sunny_steps_mask].index
-            trade_df = df_day[df_day["step"].isin(sunny_steps)]
-        grouped_house = df_day.groupby("house").sum(numeric_only=True)
-        grouped_step = df_day.groupby("step").sum(numeric_only=True)
-        total_demand = grouped_step["total_demand"].sum()
-        total_solar = grouped_step["total_solar"].sum()
-        total_p2p_buy            = df_day['p2p_buy'].sum()
-        total_p2p_sell = df_day['p2p_sell'].sum()
-        total_actual_grid_import = df_day['grid_import_with_p2p'].sum()
-        baseline_cost_per_house = grouped_house["baseline_cost"]
-        actual_cost_per_house = grouped_house["actual_cost"]
-        cost_savings_per_house = baseline_cost_per_house - actual_cost_per_house
-        day_total_cost_savings = cost_savings_per_house.sum()
-        if baseline_cost_per_house.sum() > 0:
-            overall_cost_savings_pct = day_total_cost_savings / baseline_cost_per_house.sum()
-        else:
-            overall_cost_savings_pct = 0.0
-        baseline_import_per_house = grouped_house["grid_import_no_p2p"]
-        actual_import_per_house   = grouped_house["grid_import_with_p2p"]
-        import_reduction_per_house = baseline_import_per_house - actual_import_per_house
-        day_total_import_reduction = import_reduction_per_house.sum()
-        if baseline_import_per_house.sum() > 0:
-            overall_import_reduction_pct = day_total_import_reduction / baseline_import_per_house.sum()
-        else:
-            overall_import_reduction_pct = 0.0
-        fairness_cost_savings = compute_jains_fairness(cost_savings_per_house.values)
-        fairness_import_reduction = compute_jains_fairness(import_reduction_per_house.values)
-        fairness_rewards = compute_jains_fairness(grouped_house["reward"].values)
-        fairness_p2p_buy = compute_jains_fairness(grouped_house["p2p_buy"].values)
-        fairness_p2p_sell = compute_jains_fairness(grouped_house["p2p_sell"].values)
-        fairness_p2p_total = compute_jains_fairness((grouped_house["p2p_buy"] + grouped_house["p2p_sell"]).values)
-        daily_summaries.append({
-            "day":                       day,
-            "day_total_demand":          total_demand,
-            "day_total_solar":           total_solar,
-            "day_p2p_buy":               total_p2p_buy,
-            "day_p2p_sell":              total_p2p_sell,
-            "cost_savings_abs":          day_total_cost_savings,
-            "cost_savings_pct":          overall_cost_savings_pct,
-            "fairness_cost_savings":     fairness_cost_savings,
-            "grid_reduction_abs":        day_total_import_reduction,
-            "grid_reduction_pct":        overall_import_reduction_pct,
-            "fairness_grid_reduction":   fairness_import_reduction,
-            "fairness_reward":           fairness_rewards,
-            "fairness_p2p_buy":          fairness_p2p_buy,
-            "fairness_p2p_sell":         fairness_p2p_sell,
-            "fairness_p2p_total":        fairness_p2p_total,
-        })
-    # === FINAL PROCESSING AND SAVING =================================
-    evaluation_end = time.time()
-    total_eval_time = evaluation_end - evaluation_start
-    print(f"\nEvaluation loop finished. Total time: {total_eval_time:.2f} seconds.")
-    all_days_df = pd.DataFrame(all_logs)
-    if not all_days_df.empty:
-        # Save step-level logs
-        combined_csv_path = os.path.join(logs_dir, "step_logs_all_days.csv")
-        all_days_df.to_csv(combined_csv_path, index=False)
-        print(f"Saved combined step-level logs to: {combined_csv_path}")
-        # Save timing logs
-        step_timing_df = pd.DataFrame(step_timing_list)
-        timing_csv_path = os.path.join(logs_dir, "step_timing_log.csv")
-        step_timing_df.to_csv(timing_csv_path, index=False)
-        print(f"Saved step timing logs to: {timing_csv_path}")
-        # Save house-level summary
-        house_level_df = all_days_df.groupby("house").agg({
-            "baseline_cost": "sum",
-            "actual_cost": "sum",
-            "grid_import_no_p2p": "sum",
-            "grid_import_with_p2p": "sum",
-            "degradation_cost": "sum"
-        })
-        house_level_df["cost_savings"] = house_level_df["baseline_cost"] - house_level_df["actual_cost"]
-        house_level_df["import_reduction"] = house_level_df["grid_import_no_p2p"] - house_level_df["grid_import_with_p2p"]
-        house_summary_csv = os.path.join(logs_dir, "summary_per_house.csv")
-        house_level_df.to_csv(house_summary_csv)
-        print(f"Saved final summary per house to: {house_summary_csv}")
-        # --- Calculate Final Summary Metrics ---
-        daily_summary_df = pd.DataFrame(daily_summaries)
-        fairness_grid_all = compute_jains_fairness(house_level_df["import_reduction"].values)
-        fairness_cost_all = compute_jains_fairness(house_level_df["cost_savings"].values)
-        total_cost_savings_all = daily_summary_df["cost_savings_abs"].sum()
-        total_baseline_cost_all = all_days_df.groupby('day')['baseline_cost'].sum().sum()
-        pct_cost_savings_all = total_cost_savings_all / total_baseline_cost_all if total_baseline_cost_all > 0 else 0.0
-        total_grid_reduction_all = daily_summary_df["grid_reduction_abs"].sum()
-        total_baseline_import_all = all_days_df.groupby('day')['grid_import_no_p2p'].sum().sum()
-        pct_grid_reduction_all = total_grid_reduction_all / total_baseline_import_all if total_baseline_import_all > 0 else 0.0
-        total_degradation_cost_all = all_days_df["degradation_cost"].sum()
-        # --- Calculate Alternative Performance Metrics ---
-        agg_solar_per_step = all_days_df.groupby(['day', 'step'])['total_solar'].sum()
-        num_agents_total = len(all_days_df['house'].unique())
-        sunny_steps_mask = agg_solar_per_step > (SOLAR_THRESHOLD * num_agents_total)
-        sunny_df = all_days_df[all_days_df.set_index(['day', 'step']).index.isin(sunny_steps_mask[sunny_steps_mask].index)]
-        baseline_import_sunny = sunny_df['grid_import_no_p2p'].sum()
-        actual_import_sunny = sunny_df['grid_import_with_p2p'].sum()
-        grid_reduction_sunny_pct = (baseline_import_sunny - actual_import_sunny) / baseline_import_sunny if baseline_import_sunny > 0 else 0.0
-        baseline_cost_sunny = sunny_df['baseline_cost'].sum()
-        actual_cost_sunny = sunny_df['actual_cost'].sum()
-        cost_savings_sunny_pct = (baseline_cost_sunny - actual_cost_sunny) / baseline_cost_sunny if baseline_cost_sunny > 0 else 0.0
-        total_p2p_buy = all_days_df['p2p_buy'].sum()
-        total_actual_grid_import = all_days_df['grid_import_with_p2p'].sum()
-        community_sourcing_rate_pct = total_p2p_buy / (total_p2p_buy + total_actual_grid_import) if (total_p2p_buy + total_actual_grid_import) > 0 else 0.0
-        total_p2p_sell = all_days_df['p2p_sell'].sum()
-        total_grid_export = all_days_df['grid_export'].sum()
-        solar_sharing_efficiency_pct = total_p2p_sell / (total_p2p_sell + total_grid_export) if (total_p2p_sell + total_grid_export) > 0 else 0.0
-        final_row = {
-            "day": "ALL_DAYS_SUMMARY", "cost_savings_abs": total_cost_savings_all, "cost_savings_pct": pct_cost_savings_all,
-            "grid_reduction_abs": total_grid_reduction_all, "grid_reduction_pct": pct_grid_reduction_all,
-            "fairness_cost_savings": fairness_cost_all, "fairness_grid_reduction": fairness_grid_all,
-            "total_degradation_cost": total_degradation_cost_all,
-            "grid_reduction_sunny_hours_pct": grid_reduction_sunny_pct,
-            "cost_savings_sunny_hours_pct": cost_savings_sunny_pct,
-            "community_sourcing_rate_pct": community_sourcing_rate_pct,
-            "solar_sharing_efficiency_pct": solar_sharing_efficiency_pct,
-        }
-        final_row_df = pd.DataFrame([final_row])
-        if not daily_summary_df.empty:
-            daily_summary_df = pd.concat([daily_summary_df, final_row_df], ignore_index=True)
-        summary_csv = os.path.join(logs_dir, "summary_per_day.csv")
-        daily_summary_df.to_csv(summary_csv, index=False)
-        print(f"Saved day-level summary with final multi-day row to: {summary_csv}")
-        # --- Final Printout ---
-        print("\n================== EVALUATION SUMMARY ==================")
-        print(f"Evaluation finished for {DAYS_TO_EVALUATE} days.\n")
-        print("--- Standard Metrics (24-Hour Average) ---")
-        print(f"Total grid reduction: {total_grid_reduction_all:.2f} kWh ({pct_grid_reduction_all:.2%})")
-        print(f"Total cost savings: ${total_cost_savings_all:.2f} ({pct_cost_savings_all:.2%})")
-        print(f"Jain's fairness on grid reduction: {fairness_grid_all:.3f}")
-        print(f"Jain's fairness on cost savings:   {fairness_cost_all:.3f}\n")
-        print("--- Alternative Metrics (Highlighting Peak Performance) ---")
-        print(f"Grid reduction during solar hours: {grid_reduction_sunny_pct:.2%}")
-        print(f"Cost savings during solar hours:   {cost_savings_sunny_pct:.2%}")
-        print(f"Community sourcing rate:           {community_sourcing_rate_pct:.2%}")
-        print(f"Solar sharing efficiency:          {solar_sharing_efficiency_pct:.2%}")
-        print("=========================================================")
-#######################################################################################################
-    # ─── Plots ───────────────────────────────────────────────────
-    plot_daily_df = daily_summary_df[daily_summary_df["day"] != "ALL_DAYS_SUMMARY"].copy()
-    plot_daily_df["day"] = plot_daily_df["day"].astype(int)
-    # 1) Daily Cost Savings Percentage
-    plt.figure(figsize=(12, 6))
-    plt.bar(plot_daily_df["day"], plot_daily_df["cost_savings_pct"] * 100, color='skyblue')
-    plt.xlabel("Day")
-    plt.ylabel("Cost Savings (%)")
-    plt.title("Daily Community Cost Savings Percentage")
-    plt.xticks(plot_daily_df["day"])
-    plt.grid(axis='y', linestyle='--', alpha=0.7)
-    plt.savefig(os.path.join(plots_dir, "daily_cost_savings_percentage.png"))
-    plt.close()
-    # 2) Daily Total Demand vs. Solar
-    plt.figure(figsize=(12, 6))
-    bar_width = 0.4
-    days = plot_daily_df["day"]
-    plt.bar(days - bar_width/2, plot_daily_df["day_total_demand"], width=bar_width, label="Total Demand", color='coral')
-    plt.bar(days + bar_width/2, plot_daily_df["day_total_solar"],  width=bar_width, label="Total Solar Generation", color='gold')
-    plt.xlabel("Day")
-    plt.ylabel("Energy (kWh)")
-    plt.title("Total Community Demand vs. Solar Generation Per Day")
-    plt.xticks(days)
-    plt.legend()
-    plt.grid(axis='y', linestyle='--', alpha=0.7)
-    plt.savefig(os.path.join(plots_dir, "daily_demand_vs_solar.png"))
-    plt.close()
-    # 3) Combined Time Series of Energy Flows
-    step_group = all_days_df.groupby(["day", "step"]).sum(numeric_only=True).reset_index()
-    step_group["global_step"] = (step_group["day"] - 1) * eval_num_steps + step_group["step"]
-    fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(15, 12), sharex=True)
-    # Subplot 1: Grid Import vs P2P Buy
-    ax1.plot(step_group["global_step"], step_group["grid_import_with_p2p"], label="Grid Import (with P2P)", color='r')
-    ax1.plot(step_group["global_step"], step_group["p2p_buy"], label="P2P Buy", color='g')
-    ax1.set_ylabel("Energy (kWh)")
-    ax1.set_title("Community Energy Consumption: Grid Import vs. P2P Buy")
-    ax1.legend()
-    ax1.grid(True, linestyle='--', alpha=0.6)
-    # Subplot 2: Grid Export vs P2P Sell
-    ax2.plot(step_group["global_step"], step_group["grid_export"], label="Grid Export", color='orange')
-    ax2.plot(step_group["global_step"], step_group["p2p_sell"], label="P2P Sell", color='b')
-    ax2.set_xlabel("Global Timestep")
-    ax2.set_ylabel("Energy (kWh)")
-    ax2.set_title("Community Energy Generation: Grid Export vs. P2P Sell")
-    ax2.legend()
-    ax2.grid(True, linestyle='--', alpha=0.6)
-    plt.tight_layout()
-    plt.savefig(os.path.join(plots_dir, "combined_energy_flows_timeseries.png"))
-    plt.close()
-    # 4)Stacked Bar of Daily Energy Sources
-    daily_agg = all_days_df.groupby("day").sum(numeric_only=True)
-    plt.figure(figsize=(12, 7))
-    plt.bar(daily_agg.index, daily_agg["grid_import_with_p2p"], label="Grid Import (with P2P)", color='crimson')
-    plt.bar(daily_agg.index, daily_agg["p2p_buy"], bottom=daily_agg["grid_import_with_p2p"], label="P2P Buy", color='limegreen')
-    plt.plot(daily_agg.index, daily_agg["grid_import_no_p2p"], label="Baseline Grid Import (No P2P)", color='blue', linestyle='--', marker='o')
-    plt.xlabel("Day")
-    plt.ylabel("Energy (kWh)")
-    plt.title("Daily Energy Procurement: Baseline vs. P2P+Grid")
-    plt.xticks(daily_agg.index)
-    plt.legend()
-    plt.grid(axis='y', linestyle='--', alpha=0.7)
-    plt.savefig(os.path.join(plots_dir, "daily_energy_procurement_stacked.png"))
-    plt.close()
-    # 5) NEW: Fairness Metrics Over Time
-    plt.figure(figsize=(12, 6))
-    plt.plot(plot_daily_df["day"], plot_daily_df["fairness_cost_savings"], label="Cost Savings Fairness", marker='o')
-    plt.plot(plot_daily_df["day"], plot_daily_df["fairness_grid_reduction"], label="Grid Reduction Fairness", marker='s')
-    plt.plot(plot_daily_df["day"], plot_daily_df["fairness_reward"], label="Reward Fairness", marker='^')
-    plt.xlabel("Day")
-    plt.ylabel("Jain's Fairness Index")
-    plt.title("Daily Fairness Metrics")
-    plt.xticks(plot_daily_df["day"])
-    plt.ylim(0, 1.05)
-    plt.legend()
-    plt.grid(True, linestyle='--', alpha=0.7)
-    plt.savefig(os.path.join(plots_dir, "daily_fairness_metrics.png"))
-    plt.close()
-    # 6) NEW: Per-House Summary of Cost Savings and Grid Import Reduction
-    fig, ax1 = plt.subplots(figsize=(15, 7))
-    house_ids_str = house_level_df.index.astype(str)
-    bar_width = 0.4
-    index = np.arange(len(house_ids_str))
-    color1 = 'tab:green'
-    ax1.set_xlabel('House ID')
-    ax1.set_ylabel('Total Cost Savings ($)', color=color1)
-    ax1.bar(index - bar_width/2, house_level_df["cost_savings"], bar_width, label='Cost Savings', color=color1)
-    ax1.tick_params(axis='y', labelcolor=color1)
-    ax1.set_xticks(index)
-    ax1.set_xticklabels(house_ids_str, rotation=45, ha="right")
-    ax2 = ax1.twinx()
-    color2 = 'tab:blue'
-    ax2.set_ylabel('Total Grid Import Reduction (kWh)', color=color2)
-    ax2.bar(index + bar_width/2, house_level_df["import_reduction"], bar_width, label='Import Reduction', color=color2)
-    ax2.tick_params(axis='y', labelcolor=color2)
-    plt.title(f'Total Cost Savings & Grid Import Reduction Per House (over {DAYS_TO_EVALUATE} days)')
-    fig.tight_layout()
-    plt.savefig(os.path.join(plots_dir, "per_house_summary.png"))
-    plt.close()
-    # 7) Price Dynamics for a Single Day
-    day1_prices = all_days_df[all_days_df['day'] == 1][['step', 'grid_price', 'peer_price']].drop_duplicates()
-    plt.figure(figsize=(12, 6))
-    plt.plot(day1_prices['step'], day1_prices['grid_price'], label='Grid Price', color='darkorange')
-    plt.plot(day1_prices['step'], day1_prices['peer_price'], label='P2P Price', color='teal')
-    plt.xlabel("Timestep of Day")
-    plt.ylabel("Price ($/kWh)")
-    plt.title("Price Dynamics on Day 1")
-    plt.legend()
-    plt.grid(True, linestyle='--', alpha=0.6)
-    plt.savefig(os.path.join(plots_dir, "price_dynamics_day1.png"))
-    plt.close()
-    # 8)Battery State of Charge (SoC) for a Sample of Houses
-    day1_df = all_days_df[all_days_df['day'] == 1]
-    battery_houses = day1_df.dropna(subset=['soc'])['house'].unique()
-    if len(battery_houses) > 0:
-        sample_houses = battery_houses[:min(4, len(battery_houses))]
-        plt.figure(figsize=(12, 6))
-        for house in sample_houses:
-            house_df = day1_df[day1_df['house'] == house]
-            plt.plot(house_df['step'], house_df['soc'] * 100, label=f'House {house}')
-        plt.xlabel("Timestep of Day")
-        plt.ylabel("State of Charge (%)")
-        plt.title("Battery SoC on Day 1 for Sample Houses")
-        plt.legend()
-        plt.grid(True, linestyle='--', alpha=0.6)
-        plt.savefig(os.path.join(plots_dir, "soc_dynamics_day1.png"))
-        plt.close()
-    print("All plots have been generated and saved. Evaluation complete.")
-if __name__ == "__main__":
-    main()

SolarSys/mappo/_init_.py DELETED Viewed

File without changes

SolarSys/mappo/trainer/__init__.py DELETED Viewed

File without changes

SolarSys/mappo/trainer/mappo.py DELETED Viewed

@@ -1,214 +0,0 @@
-# mappo.py
-import torch
-import torch.nn as nn
-import random
-import numpy as np
-from torch.distributions import Normal
-from torch.amp import autocast
-from torch.cuda.amp import GradScaler
-#device selection
-if torch.cuda.is_available():
-    device = torch.device("cuda")
-    print("MAPPO using CUDA (NVIDIA GPU)")
-else:
-    device = torch.device("cpu")
-    print("MAPPO using CPU")
-# elif torch.backends.mps.is_available():
-#     device = torch.device("mps")
-#     print("Using MPS (Apple Silicon GPU)")
-# device = torch.device("cpu")
-def set_global_seed(seed: int):
-    random.seed(seed)
-    np.random.seed(seed)
-    torch.manual_seed(seed)
-    if torch.cuda.is_available():
-        torch.cuda.manual_seed_all(seed)
-        torch.backends.cudnn.deterministic = False
-        torch.backends.cudnn.benchmark = True
-SEED = 50 #please try run with different seeds to get desired results, we tried with 42, 1,10,20,50.
-set_global_seed(SEED)
-class MLP(nn.Module):
-    def __init__(self, input_dim, hidden_dims, output_dim):
-        super().__init__()
-        layers = []
-        last_dim = input_dim
-        for h in hidden_dims:
-            layers += [nn.Linear(last_dim, h), nn.ReLU()]
-            last_dim = h
-        layers.append(nn.Linear(last_dim, output_dim))
-        self.net = nn.Sequential(*layers)
-    def forward(self, x):
-        return self.net(x)
-class Actor(nn.Module):
-    def __init__(self, obs_dim, act_dim, hidden=(64,64)):
-        super().__init__()
-        self.net = MLP(obs_dim, hidden, act_dim)
-        self.log_std = nn.Parameter(torch.zeros(act_dim))
-    def forward(self, x):
-        mean = self.net(x)
-        std = torch.exp(self.log_std)
-        return mean, std
-class Critic(nn.Module):
-    def __init__(self, state_dim, hidden=(128,128)):
-        super().__init__()
-        self.net = MLP(state_dim, hidden, 1)
-    def forward(self, x):
-        return self.net(x).squeeze(-1)
-class MAPPO:
-    def __init__(
-        self,
-        n_agents,
-        local_dim,
-        global_dim,
-        act_dim,
-        lr=3e-4,
-        gamma=0.99,
-        lam=0.95,
-        clip_eps=0.2,
-        k_epochs=10,
-        batch_size=1024,
-        episode_len=96
-    ):
-        self.n_agents = n_agents
-        self.local_dim = local_dim
-        self.global_dim = global_dim
-        self.act_dim = act_dim
-        self.gamma    = gamma
-        self.lam      = lam
-        self.clip_eps = clip_eps
-        self.k_epochs = k_epochs
-        self.batch_size = batch_size
-        self.episode_len = episode_len
-        self.actor  = Actor(local_dim, act_dim).to(device)
-        self.critic = Critic(global_dim).to(device)
-        self.opt_a = torch.optim.Adam(self.actor.parameters(), lr=lr)
-        self.opt_c = torch.optim.Adam(self.critic.parameters(), lr=lr)
-        print("MAPPO CUDA AMP is disabled for stability.")
-        self.init_buffer()
-    def init_buffer(self):
-        self.ls_buf = np.zeros((self.episode_len, self.n_agents, self.local_dim), dtype=np.float16)
-        self.gs_buf = np.zeros((self.episode_len, self.global_dim), dtype=np.float16)
-        self.ac_buf = np.zeros((self.episode_len, self.n_agents, self.act_dim), dtype=np.float16)
-        self.lp_buf = np.zeros((self.episode_len, self.n_agents), dtype=np.float16)
-        self.rw_buf = np.zeros((self.episode_len, self.n_agents), dtype=np.float16)
-        self.done_buf = np.zeros((self.episode_len, self.n_agents), dtype=np.float16)
-        self.next_gs_buf = np.zeros((self.episode_len, self.global_dim), dtype=np.float16)
-        self.step_idx = 0
-    @torch.no_grad()
-    def select_action(self, local_obs, global_obs):
-        l = torch.from_numpy(local_obs).float().to(device)
-        mean, std = self.actor(l)
-        dist = Normal(mean, std)
-        a = dist.sample()
-        return a.cpu().numpy(), dist.log_prob(a).sum(-1).cpu().numpy()
-    def store(self, local_obs, global_obs, action, logp, reward, done, next_global_obs):
-        if self.step_idx < self.episode_len:
-            self.ls_buf[self.step_idx] = local_obs
-            self.gs_buf[self.step_idx] = global_obs
-            self.ac_buf[self.step_idx] = action
-            self.lp_buf[self.step_idx] = logp
-            self.rw_buf[self.step_idx] = reward
-            self.done_buf[self.step_idx] = done
-            self.next_gs_buf[self.step_idx] = next_global_obs
-            self.step_idx += 1
-    def compute_gae(self, T, vals):
-        N = self.n_agents
-        vals_agent = vals.unsqueeze(1).expand(-1, N).cpu().numpy()
-        next_vals_agent = np.zeros_like(vals_agent)
-        next_vals_agent[:-1] = vals_agent[1:]
-        if not self.done_buf[T-1].all():
-            with torch.no_grad():
-                v_last = self.critic(
-                    torch.from_numpy(self.next_gs_buf[T-1]).float().to(device)
-                ).cpu().item()
-                next_vals_agent[T-1, :] = v_last
-        adv = np.zeros_like(vals_agent, dtype=np.float16)
-        gae_lambda = 0.0
-        for t in reversed(range(T)):
-            masks = 1.0 - self.done_buf[t]
-            rewards = self.rw_buf[t]
-            delta = rewards + self.gamma * next_vals_agent[t] * masks - vals_agent[t]
-            gae_lambda = delta + self.gamma * self.lam * masks * gae_lambda
-            adv[t] = gae_lambda
-        ret = adv + vals_agent
-        adv_flat = torch.from_numpy(adv.flatten()).to(device)
-        ret_flat = torch.from_numpy(ret.flatten()).to(device)
-        return adv_flat, ret_flat
-    def update(self):
-        T = self.step_idx
-        if T == 0: return
-        gs_tensor = torch.from_numpy(self.gs_buf[:T]).float().to(device)
-        ls_tensor = torch.from_numpy(self.ls_buf[:T]).float().to(device).view(T * self.n_agents, -1)
-        ac_tensor = torch.from_numpy(self.ac_buf[:T]).float().to(device).view(T * self.n_agents, -1)
-        lp_tensor = torch.from_numpy(self.lp_buf[:T]).float().to(device).view(-1)
-        with torch.no_grad():
-            vals = self.critic(gs_tensor)
-        adv_flat, ret_flat = self.compute_gae(T, vals)
-        adv_flat = (adv_flat - adv_flat.mean()) / (adv_flat.std() + 1e-8)
-        gs_for_batch = gs_tensor.unsqueeze(1).expand(-1, self.n_agents, -1).reshape(T * self.n_agents, self.global_dim)
-        dataset = torch.utils.data.TensorDataset(ls_tensor, gs_for_batch, ac_tensor, lp_tensor, adv_flat, ret_flat)
-        gen = torch.Generator()
-        gen.manual_seed(SEED)
-        loader = torch.utils.data.DataLoader(dataset, batch_size=self.batch_size, shuffle=True, generator=gen)
-        for _ in range(self.k_epochs):
-            for b_ls, b_gs, b_ac, b_lp, b_adv, b_ret in loader:
-                mean, std = self.actor(b_ls)
-                dist = Normal(mean, std)
-                entropy = dist.entropy().mean()
-                lp_new = dist.log_prob(b_ac).sum(-1)
-                ratio = torch.exp(lp_new - b_lp)
-                surr1 = ratio * b_adv
-                surr2 = torch.clamp(ratio, 1 - self.clip_eps, 1 + self.clip_eps) * b_adv
-                actor_loss = -torch.min(surr1, surr2).mean() - 0.01 * entropy
-                self.opt_a.zero_grad()
-                actor_loss.backward()
-                self.opt_a.step()
-                val_pred = self.critic(b_gs)
-                critic_loss = nn.MSELoss()(val_pred, b_ret)
-                self.opt_c.zero_grad()
-                critic_loss.backward()
-                self.opt_c.step()
-        self.step_idx = 0
-    def save(self, path):
-        torch.save({'actor': self.actor.state_dict(),
-                    'critic': self.critic.state_dict()}, path)
-    def load(self, path):
-        data = torch.load(path, map_location=device)
-        self.actor.load_state_dict(data['actor'])
-        self.critic.load_state_dict(data['critic'])

SolarSys/meanfield/_init_.py DELETED Viewed

File without changes

SolarSys/meanfield/trainer/__init__.py DELETED Viewed

File without changes

SolarSys/meanfield/trainer/meanfield.py DELETED Viewed

@@ -1,238 +0,0 @@
-# meanfield.py
-import torch
-import torch.nn as nn
-import numpy as np
-import random
-from torch.distributions import Normal
-from torch.amp import autocast
-from torch.cuda.amp import GradScaler
-#device selection
-if torch.cuda.is_available():
-    device = torch.device("cuda")
-    print("Using CUDA (NVIDIA GPU)")
-else:
-    device = torch.device("cpu")
-    print("Using CPU")
-def set_global_seed(seed: int):
-    random.seed(seed)
-    np.random.seed(seed)
-    torch.manual_seed(seed)
-    if torch.cuda.is_available():
-        torch.cuda.manual_seed_all(seed)
-        torch.backends.cudnn.deterministic = False
-        torch.backends.cudnn.benchmark = True
-SEED = 42   #please try run with different seeds to get desired results, we tried with 42, 1,10,20,50.
-set_global_seed(SEED)
-class MLP(nn.Module):
-    def __init__(self, input_dim, hidden_dims, output_dim):
-        super().__init__()
-        layers = []
-        last_dim = input_dim
-        for h in hidden_dims:
-            layers += [nn.Linear(last_dim, h), nn.ReLU()]
-            last_dim = h
-        layers.append(nn.Linear(last_dim, output_dim))
-        self.net = nn.Sequential(*layers)
-    def forward(self, x):
-        return self.net(x)
-class Actor(nn.Module):
-    def __init__(self, obs_dim, mean_field_dim, act_dim, hidden=(64, 64)):
-        super().__init__()
-        input_dim = obs_dim + mean_field_dim
-        self.net = MLP(input_dim, hidden, act_dim)
-        self.log_std = nn.Parameter(torch.zeros(act_dim))
-    def forward(self, local_obs, mean_field):
-        x = torch.cat([local_obs, mean_field], dim=-1)
-        mean = self.net(x)
-        LOG_STD_MIN = -5
-        LOG_STD_MAX = 2
-        clamped_log_std = torch.clamp(self.log_std, LOG_STD_MIN, LOG_STD_MAX)
-        std = torch.exp(clamped_log_std)
-        return Normal(mean, std)
-class Critic(nn.Module):
-    def __init__(self, obs_dim, mean_field_dim, hidden=(128, 128)):
-        super().__init__()
-        input_dim = obs_dim + mean_field_dim
-        self.net = MLP(input_dim, hidden, 1)
-    def forward(self, local_obs, mean_field):
-        x = torch.cat([local_obs, mean_field], dim=-1)
-        return self.net(x).squeeze(-1)
-class MFAC:
-    def __init__(
-        self,
-        n_agents,
-        local_dim,
-        act_dim,
-        lr=3e-4,
-        gamma=0.99,
-        lam=0.95,
-        clip_eps=0.2,
-        k_epochs=10,
-        batch_size=1024,
-        entropy_coeff=0.01,
-        episode_len=96
-    ):
-        self.n_agents = n_agents
-        self.local_dim = local_dim
-        self.mean_field_dim = local_dim
-        self.act_dim = act_dim
-        self.gamma = gamma
-        self.lam = lam
-        self.clip_eps = clip_eps
-        self.k_epochs = k_epochs
-        self.batch_size = batch_size
-        self.entropy_coeff = entropy_coeff
-        self.episode_len = episode_len
-        self.actor = Actor(self.local_dim, self.mean_field_dim, self.act_dim).to(device)
-        self.critic = Critic(self.local_dim, self.mean_field_dim).to(device)
-        self.opt_a = torch.optim.Adam(self.actor.parameters(), lr=lr)
-        self.opt_c = torch.optim.Adam(self.critic.parameters(), lr=lr)
-        self.use_cuda_amp = (device.type == 'cuda')
-        self.scaler = GradScaler(enabled=self.use_cuda_amp)
-        print(f"MFAC CUDA AMP Enabled: {self.use_cuda_amp}")
-        self.init_buffer()
-    def init_buffer(self):
-        self.ls_buf = np.zeros((self.episode_len, self.n_agents, self.local_dim), dtype=np.float32)
-        self.ac_buf = np.zeros((self.episode_len, self.n_agents, self.act_dim), dtype=np.float32)
-        self.lp_buf = np.zeros((self.episode_len, self.n_agents), dtype=np.float32)
-        self.rw_buf = np.zeros((self.episode_len, self.n_agents), dtype=np.float32)
-        self.done_buf = np.zeros((self.episode_len, self.n_agents), dtype=np.float32)
-        self.next_ls_buf = np.zeros((self.episode_len, self.n_agents, self.local_dim), dtype=np.float32)
-        self.step_idx = 0
-    def clear_buffer(self):
-        pass
-    def _get_mean_field(self, obs_batch):
-        if self.n_agents <= 1:
-            return torch.zeros(*obs_batch.shape[:-1], self.mean_field_dim, device=obs_batch.device)
-        total_obs = torch.sum(obs_batch, dim=-2, keepdim=True)
-        mean_field = (total_obs - obs_batch) / (self.n_agents - 1)
-        return mean_field
-    @torch.no_grad()
-    def select_action(self, local_obs, evaluate=False):
-        obs_tensor = torch.from_numpy(local_obs).float().to(device)
-        with autocast(device_type=device.type, dtype=torch.float16, enabled=self.use_cuda_amp):
-            mean_field = self._get_mean_field(obs_tensor)
-            dist = self.actor(obs_tensor, mean_field)
-        if evaluate:
-            action = dist.mean
-        else:
-            action = dist.sample()
-        log_prob = dist.log_prob(action).sum(-1)
-        return action.cpu().numpy(), log_prob.cpu().numpy()
-    def store(self, local_obs, action, logp, reward, done, next_local_obs):
-        if self.step_idx < self.episode_len:
-            self.ls_buf[self.step_idx] = local_obs
-            self.ac_buf[self.step_idx] = action
-            self.lp_buf[self.step_idx] = logp
-            self.rw_buf[self.step_idx] = np.array(reward, dtype=np.float32)
-            self.done_buf[self.step_idx] = np.array(done, dtype=np.float32)
-            self.next_ls_buf[self.step_idx] = next_local_obs
-            self.step_idx += 1
-    def update(self):
-        T = self.step_idx
-        if T == 0: return
-        ls_tensor = torch.from_numpy(self.ls_buf[:T]).float().to(device)
-        ac_tensor = torch.from_numpy(self.ac_buf[:T]).float().to(device)
-        lp_tensor = torch.from_numpy(self.lp_buf[:T]).float().to(device)
-        rw_tensor = torch.from_numpy(self.rw_buf[:T]).float().to(device)
-        done_tensor = torch.from_numpy(self.done_buf[:T]).float().to(device)
-        next_ls_tensor = torch.from_numpy(self.next_ls_buf[:T]).float().to(device)
-        with torch.no_grad():
-            with autocast(device_type=device.type, dtype=torch.float16, enabled=self.use_cuda_amp):
-                mf_all = self._get_mean_field(ls_tensor)
-                vals = self.critic(ls_tensor, mf_all)
-                next_mf_all = self._get_mean_field(next_ls_tensor)
-                next_vals = self.critic(next_ls_tensor, next_mf_all)
-        adv = torch.zeros_like(rw_tensor)
-        gae = 0
-        masks = 1.0 - done_tensor
-        for t in reversed(range(T)):
-            delta = rw_tensor[t] + self.gamma * next_vals[t] * masks[t] - vals[t]
-            gae = delta + self.gamma * self.lam * masks[t] * gae
-            adv[t] = gae
-        ret = adv + vals
-        N, D_l = self.n_agents, self.local_dim
-        ls_flat = ls_tensor.view(T * N, D_l)
-        mf_flat = mf_all.view(T * N, self.mean_field_dim)
-        ac_flat = ac_tensor.view(T * N, self.act_dim)
-        lp_flat = lp_tensor.view(-1)
-        adv_flat = adv.view(-1)
-        ret_flat = ret.view(-1)
-        adv_flat = (adv_flat - adv_flat.mean()) / (adv_flat.std() + 1e-8)
-        ret_flat = (ret_flat - ret_flat.mean()) / (ret_flat.std() + 1e-8)
-        dataset = torch.utils.data.TensorDataset(ls_flat, mf_flat, ac_flat, lp_flat, adv_flat, ret_flat)
-        gen = torch.Generator()
-        gen.manual_seed(SEED)
-        loader = torch.utils.data.DataLoader(dataset, batch_size=self.batch_size, shuffle=True, generator=gen)
-        for _ in range(self.k_epochs):
-            for b_ls, b_mf, b_ac, b_lp, b_adv, b_ret in loader:
-                self.opt_a.zero_grad(set_to_none=True)
-                with autocast(device_type=device.type, dtype=torch.float16, enabled=self.use_cuda_amp):
-                    dist_new = self.actor(b_ls, b_mf)
-                    lp_new = dist_new.log_prob(b_ac).sum(-1)
-                    entropy = dist_new.entropy().sum(-1).mean()
-                    log_ratio = torch.clamp(lp_new - b_lp, -20.0, 20.0)
-                    ratio = torch.exp(log_ratio)
-                    surr1 = ratio * b_adv
-                    surr2 = torch.clamp(ratio, 1 - self.clip_eps, 1 + self.clip_eps) * b_adv
-                    actor_loss = -torch.min(surr1, surr2).mean() - self.entropy_coeff * entropy
-                self.scaler.scale(actor_loss).backward()
-                self.scaler.unscale_(self.opt_a)
-                torch.nn.utils.clip_grad_norm_(self.actor.parameters(), max_norm=0.5)
-                self.scaler.step(self.opt_a)
-                self.opt_c.zero_grad(set_to_none=True)
-                with autocast(device_type=device.type, dtype=torch.float16, enabled=self.use_cuda_amp):
-                    val_pred = self.critic(b_ls, b_mf)
-                    critic_loss = nn.MSELoss()(val_pred, b_ret)
-                self.scaler.scale(critic_loss).backward()
-                self.scaler.unscale_(self.opt_c)
-                torch.nn.utils.clip_grad_norm_(self.critic.parameters(), max_norm=0.5)
-                self.scaler.step(self.opt_c)
-                self.scaler.update()
-        self.step_idx = 0
-    def save(self, path):
-        torch.save({
-            'actor': self.actor.state_dict(),
-            'critic': self.critic.state_dict()
-        }, path)
-    def load(self, path):
-        data = torch.load(path, map_location=device)
-        self.actor.load_state_dict(data['actor'])
-        self.critic.load_state_dict(data['critic'])

SolarSys/training_freezing.py DELETED Viewed

@@ -1,523 +0,0 @@
-import os
-import sys
-import time
-from datetime import datetime, timedelta
-import re
-import numpy as np
-import torch
-import pandas as pd
-import matplotlib.pyplot as plt
-# Allow imports from project root
-sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
-#This is important for running the file, please make sure to follow the same directory structure as listed in the zip file
-from cluster import InterClusterCoordinator, InterClusterLedger
-from Environment.cluster_env_wrapper import make_vec_env
-from mappo.trainer.mappo import MAPPO
-from meanfield.trainer.meanfield import MFAC
-def recursive_sum(item):
-    total = 0
-    if hasattr(item, '__iter__') and not isinstance(item, str):
-        for sub_item in item:
-            total += recursive_sum(sub_item)
-    elif np.isreal(item):
-        total += item
-    return total
-def main():
-    overall_start_time = time.time()
-    # ─── Hyperparameters ───────────────────────
-    STATE_TO_RUN = "pennsylvania" # or "colorado", "oklahoma"
-    DATA_PATH = "Cluster_with_proper_loggings_FINALE/A_SPLIT_DATA/training/50houses_152days_TRAIN.csv"
-    # Dynamically extract the number of agents from the file path
-    match = re.search(r'(\d+)houses', DATA_PATH)
-    if not match:
-        raise ValueError("Could not extract the number of houses from DATA_PATH.")
-    NUMBER_OF_AGENTS = int(match.group(1))
-    NUM_EPISODES       = 10000
-    CLUSTER_SIZE       = 10
-    BATCH_SIZE         = 256
-    CHECKPOINT_INTERVAL= 1000
-    WINDOW_SIZE        = 80
-    MAX_TRANSFER_KWH   = 100000
-    LR       = 2e-4
-    GAMMA    = 0.95
-    LAMBDA   = 0.95
-    CLIP_EPS = 0.2
-    K_EPOCHS = 4
-    JOINT_TRAINING_START_EPISODE = 2000
-    FREEZE_HIGH_FOR_EPISODES = 20
-    FREEZE_LOW_FOR_EPISODES  = 10
-    # ─── Build directories ─────────────────
-    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
-    run_name  = f"hierarchical_{STATE_TO_RUN}_{NUMBER_OF_AGENTS}agents_" \
-            f"{CLUSTER_SIZE}size_{NUM_EPISODES}eps_{timestamp}"
-    root_dir  = os.path.join("Training", run_name) # New folder for new runs
-    models_dir= os.path.join(root_dir, "models")
-    logs_dir  = os.path.join(root_dir, "logs")
-    plots_dir = os.path.join(root_dir, "plots")
-    for d in (models_dir, logs_dir, plots_dir):
-        os.makedirs(d, exist_ok=True)
-    print(f"Logging to: {root_dir}")
-    # ─── Environment & Agent Initialization ─────────────────
-    cluster_env = make_vec_env(
-        data_path=DATA_PATH,
-        time_freq="15T",
-        cluster_size=CLUSTER_SIZE,
-        state=STATE_TO_RUN  # <-- Use the state variable here
-    )
-    #Get env parameters from the new vectorized environment object.
-    n_clusters = cluster_env.num_envs
-    sample_subenv = cluster_env.cluster_envs[0]
-    n_agents_per_cluster = sample_subenv.num_agents
-    local_dim  = sample_subenv.observation_space.shape[-1]
-    global_dim = n_agents_per_cluster * local_dim
-    act_dim    = sample_subenv.action_space[0].shape[-1]
-    total_buffer_size = sample_subenv.num_steps * n_clusters
-    print(f"Low-level agent buffer size set to: {total_buffer_size}")
-    print(f"Created {n_clusters} clusters.")
-    print(f"Shared low-level agent: {n_agents_per_cluster} agents per cluster, "
-          f"obs_dim={local_dim}, global_dim={global_dim}, act_dim={act_dim}")
-    print(f"Creating {n_clusters} independent low-level MAPPO agents...")
-    low_agents = []
-    for i in range(n_clusters):
-        agent_buffer_size = sample_subenv.num_steps
-        agent = MAPPO(
-            n_agents   = n_agents_per_cluster,
-            local_dim  = local_dim,
-            global_dim = global_dim,
-            act_dim    = act_dim,
-            lr         = LR,
-            gamma      = GAMMA,
-            lam        = LAMBDA,
-            clip_eps   = CLIP_EPS,
-            k_epochs   = K_EPOCHS,
-            batch_size = BATCH_SIZE,
-            episode_len = agent_buffer_size
-        )
-        low_agents.append(agent)
-    OBS_DIM_HI_LOCAL = 7
-    act_dim_inter = 2
-    print(f"Inter-cluster agent (MFAC): n_agents={n_clusters}, "
-        f"local_dim={OBS_DIM_HI_LOCAL}, act_dim={act_dim_inter}")
-    inter_agent = MFAC(
-        n_agents   = n_clusters,
-        local_dim  = OBS_DIM_HI_LOCAL,
-        act_dim    = act_dim_inter,
-        lr         = LR,
-        gamma      = GAMMA,
-        lam        = LAMBDA,
-        clip_eps   = CLIP_EPS,
-        k_epochs   = K_EPOCHS,
-        batch_size = BATCH_SIZE,
-        episode_len=96
-    )
-    ledger      = InterClusterLedger()
-    coordinator = InterClusterCoordinator(
-        cluster_env,
-        inter_agent,
-        ledger,
-        max_transfer_kwh=MAX_TRANSFER_KWH
-    )
-    # ─── Training loop ─────────────────────────────────────
-    total_steps = 0
-    inter_episode_rewards = []
-    episode_log_data = []
-    performance_metrics_log = []
-    agent_rewards_log = [[] for _ in range(NUMBER_OF_AGENTS)]
-    intra_log = {}
-    inter_log = {}
-    total_log = {}
-    cost_log = {}
-    for ep in range(1, NUM_EPISODES + 1):
-        inter_episode_rewards_this_ep = []
-        step_count     = 0
-        start_time     = time.time()
-        ep_total_inter_cluster_reward = 0.0
-        day_logs = []
-        obs_clusters, _ = cluster_env.reset()
-        # This runs after an episode is done (triggered by reset), but before the new one starts.
-        if ep > 1:
-            all_cluster_metrics = cluster_env.call('get_episode_metrics')
-            # Aggregate the metrics from all clusters into a single system-wide summary
-            system_metrics = {
-                "grid_reduction_entire_day": sum(m["grid_reduction_entire_day"] for m in all_cluster_metrics),
-                "grid_reduction_peak_hours": sum(m["grid_reduction_peak_hours"] for m in all_cluster_metrics),
-                "total_cost_savings": sum(m["total_cost_savings"] for m in all_cluster_metrics),
-                "battery_degradation_cost_total": sum(m["battery_degradation_cost_total"] for m in all_cluster_metrics),
-                # For fairness, we average the fairness index across clusters
-                "fairness_on_cost_savings": np.mean([m["fairness_on_cost_savings"] for m in all_cluster_metrics]),
-                "Episode": ep - 1
-            }
-            performance_metrics_log.append(system_metrics)
-        # =================================================================
-        done_all = False
-        cluster_rewards = np.zeros((n_clusters, n_agents_per_cluster), dtype=np.float32)
-        total_cost = 0.0
-        total_grid_import = 0.0
-        # Determine training phase
-        is_phase_1 = ep < JOINT_TRAINING_START_EPISODE
-        if ep == 1: print(f"\n--- Starting Phase 1: Training Low-Level Agent Only (up to ep {JOINT_TRAINING_START_EPISODE-1}) ---")
-        if ep == JOINT_TRAINING_START_EPISODE: print(f"\n--- Starting Phase 2: Joint Hierarchical Training (from ep {JOINT_TRAINING_START_EPISODE}) ---")
-        # The main loop continues as long as the episode is not done.
-        while not done_all:
-            total_steps += 1
-            step_count += 1
-            # --- Action Selection (Low-Level) ---
-            batch_global_obs = obs_clusters.reshape(n_clusters, -1)
-            # Loop through each cluster to get actions from its dedicated agent
-            low_level_actions_list = []
-            low_level_logps_list = []
-            for c_idx in range(n_clusters):
-                agent = low_agents[c_idx]
-                local_obs_cluster = obs_clusters[c_idx]
-                global_obs_cluster = batch_global_obs[c_idx]
-                actions, logps = agent.select_action(local_obs_cluster, global_obs_cluster)
-                low_level_actions_list.append(actions)
-                low_level_logps_list.append(logps)
-            low_level_actions = np.stack(low_level_actions_list)
-            low_level_logps = np.stack(low_level_logps_list)
-            # --- Action Selection & Transfers (High-Level, Phase 2 only) ---
-            if is_phase_1:
-                exports, imports = None, None
-            else:
-                # Get high-level observations
-                inter_cluster_obs_local_list = [coordinator.get_cluster_state(se, step_count) for se in cluster_env.cluster_envs]
-                inter_cluster_obs_local = np.array(inter_cluster_obs_local_list)
-                # Get high-level actions
-                high_level_action, high_level_logp = inter_agent.select_action(inter_cluster_obs_local)
-                # Build transfers
-                current_reports = {i: {'export_capacity': cluster_env.get_export_capacity(i), 'import_capacity': cluster_env.get_import_capacity(i)} for i in range(n_clusters)}
-                exports, imports = coordinator.build_transfers(high_level_action, current_reports)
-            # --- Environment Step ---
-            next_obs_clusters, rewards, done_all, step_info = cluster_env.step(
-                low_level_actions, exports=exports, imports=imports
-            )
-            cluster_infos = step_info.get("cluster_infos")
-            day_logs.append({
-                "costs": cluster_infos["costs"],
-                "grid_import_no_p2p": cluster_infos["grid_import_no_p2p"],
-                "charge_amount": cluster_infos.get("charge_amount"),
-                "discharge_amount": cluster_infos.get("discharge_amount")
-            })
-            per_agent_rewards = np.stack(cluster_infos['agent_rewards'])
-            rewards_for_buffer = per_agent_rewards
-            if not is_phase_1:
-                transfers_for_logging = (exports, imports)
-                high_level_rewards_per_cluster = coordinator.compute_inter_cluster_reward(
-                    all_cluster_infos=cluster_infos,
-                    actual_transfers=transfers_for_logging,
-                    step_count=step_count
-                )
-                ep_total_inter_cluster_reward += np.sum(high_level_rewards_per_cluster) # Log the sum for the plot
-                next_inter_cluster_obs_local_list = [coordinator.get_cluster_state(se, step_count + 1) for se in cluster_env.cluster_envs]
-                next_inter_cluster_obs_local = np.array(next_inter_cluster_obs_local_list)
-                inter_agent.store(
-                    inter_cluster_obs_local,
-                    high_level_action,
-                    high_level_logp,
-                    high_level_rewards_per_cluster,
-                    [done_all]*n_clusters,
-                    next_inter_cluster_obs_local
-                )
-                bonus_per_agent = np.zeros_like(per_agent_rewards)
-                for c_idx in range(n_clusters):
-                    num_agents_in_cluster = per_agent_rewards.shape[1]
-                    if num_agents_in_cluster > 0:
-                        bonus = high_level_rewards_per_cluster[c_idx] / num_agents_in_cluster
-                        bonus_per_agent[c_idx, :] = bonus
-                rewards_for_buffer = per_agent_rewards + bonus_per_agent
-            # --- Data Storage (Low-Level) ---
-            dones_list = step_info.get("cluster_dones")
-            for idx in range(n_clusters):
-                low_agents[idx].store(
-                    obs_clusters[idx],
-                    batch_global_obs[idx],
-                    low_level_actions[idx],
-                    low_level_logps[idx],
-                    rewards_for_buffer[idx],
-                    dones_list[idx],
-                    next_obs_clusters[idx].reshape(-1)
-                )
-            # --- Logging and State Update ---
-            cluster_rewards += per_agent_rewards
-            total_cost += np.sum(cluster_infos['costs'])
-            total_grid_import += np.sum(cluster_infos['grid_import_with_p2p'])
-            obs_clusters = next_obs_clusters
-        if is_phase_1:
-            for agent in low_agents:
-                agent.update()
-        else:
-            CYCLE_LENGTH = FREEZE_HIGH_FOR_EPISODES + FREEZE_LOW_FOR_EPISODES
-            phase2_episode_num = ep - JOINT_TRAINING_START_EPISODE
-            position_in_cycle = phase2_episode_num % CYCLE_LENGTH
-            if position_in_cycle < FREEZE_HIGH_FOR_EPISODES:
-                print(f"Updating ALL LOW-LEVEL agents (High-level is frozen).")
-                for agent in low_agents:
-                    agent.update()
-            else:
-                print(f"Updating HIGH-LEVEL agent (Low-level is frozen).")
-                inter_agent.update()
-        # =================================================================
-        duration = time.time() - start_time
-        num_low_level_agents = n_clusters * n_agents_per_cluster
-        get_price_fn = cluster_env.cluster_envs[0].get_grid_price
-        baseline_costs_per_step = [
-            recursive_sum(entry["grid_import_no_p2p"]) * get_price_fn(i)
-            for i, entry in enumerate(day_logs)
-        ]
-        total_baseline_cost = sum(baseline_costs_per_step)
-        actual_costs_per_step = [recursive_sum(entry["costs"]) for entry in day_logs]
-        total_actual_cost = sum(actual_costs_per_step)
-        cost_reduction_pct = (1 - (total_actual_cost / total_baseline_cost)) * 100 if total_baseline_cost > 0 else 0.0
-        total_reward_intra = cluster_rewards.sum()
-        mean_reward_intra = total_reward_intra / num_low_level_agents if num_low_level_agents > 0 else 0.0
-        total_reward_inter = ep_total_inter_cluster_reward
-        mean_reward_inter = total_reward_inter / step_count if step_count > 0 else 0.0
-        total_reward_system = total_reward_intra + total_reward_inter
-        mean_reward_system = total_reward_system / num_low_level_agents if num_low_level_agents > 0 else 0.0
-        intra_log.setdefault('total', []).append(total_reward_intra)
-        intra_log.setdefault('mean', []).append(mean_reward_intra)
-        inter_log.setdefault('total', []).append(total_reward_inter)
-        inter_log.setdefault('mean', []).append(mean_reward_inter)
-        total_log.setdefault('total', []).append(total_reward_system)
-        total_log.setdefault('mean', []).append(mean_reward_system)
-        cost_log.setdefault('total_cost', []).append(total_actual_cost)
-        cost_log.setdefault('cost_without_p2p', []).append(total_baseline_cost)
-        episode_log_data.append({
-            "Episode": ep,
-            "Mean_Reward_System": mean_reward_system,
-            "Mean_Reward_Intra": mean_reward_intra,
-            "Mean_Reward_Inter": mean_reward_inter,
-            "Total_Reward_System": total_reward_system,
-            "Total_Reward_Intra": total_reward_intra,
-            "Total_Reward_Inter": total_reward_inter,
-            "Cost_Reduction_Pct": cost_reduction_pct,
-            "Episode_Duration": duration,
-        })
-        print(f"Ep {ep}/{NUM_EPISODES} | "
-              f"Mean System R: {mean_reward_system:.3f} | "
-              f"Cost Red: {cost_reduction_pct:.1f}% | "
-              f"Time: {duration:.2f}s")
-        if ep % CHECKPOINT_INTERVAL == 0 or ep == NUM_EPISODES:
-            for c_idx, agent in enumerate(low_agents):
-                agent.save(os.path.join(models_dir, f"low_cluster{c_idx}_ep{ep}.pth"))
-            inter_agent.save(os.path.join(models_dir, f"inter_ep{ep}.pth"))
-            print(f"Saved checkpoint at episode {ep}")
-    print("Training completed! Aggregating final logs...")
-    # --- Final Episode Metrics ---
-    final_cluster_metrics = cluster_env.call('get_episode_metrics')
-    final_system_metrics = {
-        "grid_reduction_entire_day": sum(m["grid_reduction_entire_day"] for m in final_cluster_metrics),
-        "grid_reduction_peak_hours": sum(m["grid_reduction_peak_hours"] for m in final_cluster_metrics),
-        "total_cost_savings": sum(m["total_cost_savings"] for m in final_cluster_metrics),
-        "battery_degradation_cost_total": sum(m["battery_degradation_cost_total"] for m in final_cluster_metrics),
-        "fairness_on_cost_savings": np.mean([m["fairness_on_cost_savings"] for m in final_cluster_metrics]),
-        "Episode": NUM_EPISODES
-    }
-    performance_metrics_log.append(final_system_metrics)
-    df_rewards_log = pd.DataFrame(episode_log_data)
-    df_perf_log = pd.DataFrame(performance_metrics_log)
-    df_final_log = pd.merge(df_rewards_log, df_perf_log, on="Episode")
-    log_csv_path = os.path.join(logs_dir, "training_performance_log.csv")
-    overall_end_time = time.time()
-    total_duration_seconds = overall_end_time - overall_start_time
-    total_time_row = pd.DataFrame([{"Episode": "Total_Training_Time", "Episode_Duration": total_duration_seconds}])
-    df_to_save = pd.concat([df_final_log, total_time_row], ignore_index=True)
-    columns_to_save = [
-        "Episode",
-        "Mean_Reward_System",
-        "Mean_Reward_Intra",
-        "Mean_Reward_Inter",
-        "Total_Reward_System",
-        "Total_Reward_Intra",
-        "Total_Reward_Inter",
-        "Cost_Reduction_Pct",
-        "battery_degradation_cost_total",
-        "Episode_Duration",
-        "total_cost_savings",
-        "grid_reduction_entire_day",
-        "fairness_on_cost_savings"
-    ]
-    df_to_save = df_to_save[[col for col in columns_to_save if col in df_to_save.columns]]
-    df_to_save.to_csv(log_csv_path, index=False)
-    print(f"Saved comprehensive training performance log to: {log_csv_path}")
-    generate_plots(
-        plots_dir=plots_dir,
-        num_episodes=NUM_EPISODES,
-        intra_log=intra_log,
-        inter_log=inter_log,
-        total_log=total_log,
-        cost_log=cost_log,
-        df_final_log=df_final_log
-    )
-    overall_end_time = time.time()
-    total_duration_seconds = overall_end_time - overall_start_time
-    total_duration_formatted = str(timedelta(seconds=int(total_duration_seconds)))
-    print("\n" + "="*50)
-    print(f"Total Training Time: {total_duration_formatted} (HH:MM:SS)")
-    print("="*50)
-################################# PLOTING & LOGGING ##################################################################
-def generate_plots(
-        plots_dir: str,
-        num_episodes: int,
-        intra_log: dict,
-        inter_log: dict,
-        total_log: dict,
-        cost_log: list,
-        df_final_log: pd.DataFrame
-    ):
-        """
-        Generates and saves all final plots after training is complete.
-        """
-        print("Training completed! Generating plots…")
-        def moving_avg(series, window):
-            return pd.Series(series).rolling(window=window, center=True, min_periods=1).mean().to_numpy()
-        ma_window = 120
-        episodes = np.arange(1, num_episodes + 1)
-        # Plot 1: Intra-cluster (Low-Level) Rewards
-        fig, ax = plt.subplots(figsize=(12, 7))
-        ax.plot(episodes, moving_avg(intra_log['total'], ma_window), label=f'Total Reward (MA {ma_window})', linewidth=2)
-        ax.set_xlabel("Episode")
-        ax.set_ylabel("Total Intra-Cluster Reward", color='tab:blue')
-        ax.tick_params(axis='y', labelcolor='tab:blue')
-        ax.grid(True)
-        ax2 = ax.twinx()
-        ax2.plot(episodes, moving_avg(intra_log['mean'], ma_window), label=f'Mean Reward (MA {ma_window})', linewidth=2, linestyle='--', color='tab:cyan')
-        ax2.set_ylabel("Mean Intra-Cluster Reward", color='tab:cyan')
-        ax2.tick_params(axis='y', labelcolor='tab:cyan')
-        fig.suptitle("Intra-Cluster (Low-Level Agent) Rewards")
-        fig.legend(loc="upper left", bbox_to_anchor=(0.1, 0.9))
-        plt.savefig(os.path.join(plots_dir, "1_intra_cluster_rewards.png"), dpi=200)
-        plt.close()
-        # Plot 2: Inter-cluster (High-Level) Rewards
-        fig, ax = plt.subplots(figsize=(12, 7))
-        ax.plot(episodes, moving_avg(inter_log['total'], ma_window), label=f'Total Reward (MA {ma_window})', linewidth=2, color='tab:green')
-        ax.set_xlabel("Episode")
-        ax.set_ylabel("Total Inter-Cluster Reward", color='tab:green')
-        ax.tick_params(axis='y', labelcolor='tab:green')
-        ax.grid(True)
-        ax2 = ax.twinx()
-        ax2.plot(episodes, moving_avg(inter_log['mean'], ma_window), label=f'Mean Reward (MA {ma_window})', linewidth=2, linestyle='--', color='mediumseagreen')
-        ax2.set_ylabel("Mean Inter-Cluster Reward", color='mediumseagreen')
-        ax2.tick_params(axis='y', labelcolor='mediumseagreen')
-        fig.suptitle("Inter-Cluster (High-Level Agent) Rewards")
-        fig.legend(loc="upper left", bbox_to_anchor=(0.1, 0.9))
-        plt.savefig(os.path.join(plots_dir, "2_inter_cluster_rewards.png"), dpi=200)
-        plt.close()
-        # Plot 3: Total System Rewards
-        fig, ax = plt.subplots(figsize=(12, 7))
-        ax.plot(episodes, moving_avg(total_log['total'], ma_window), label=f'Total System Reward (MA {ma_window})', linewidth=2, color='tab:red')
-        ax.set_xlabel("Episode")
-        ax.set_ylabel("Total System Reward", color='tab:red')
-        ax.tick_params(axis='y', labelcolor='tab:red')
-        ax.grid(True)
-        ax2 = ax.twinx()
-        ax2.plot(episodes, moving_avg(total_log['mean'], ma_window), label=f'Mean System Reward (MA {ma_window})', linewidth=2, linestyle='--', color='salmon')
-        ax2.set_ylabel("Mean System Reward per Agent", color='salmon')
-        ax2.tick_params(axis='y', labelcolor='salmon')
-        fig.suptitle("Total System Rewards (Intra + Inter)")
-        fig.legend(loc="upper left", bbox_to_anchor=(0.1, 0.9))
-        plt.savefig(os.path.join(plots_dir, "3_total_system_rewards.png"), dpi=200)
-        plt.close()
-        # Plot 4: Cost Reduction
-        cost_df = pd.DataFrame(cost_log)
-        cost_df['cost_reduction_pct'] = 100 * (1 - (cost_df['total_cost'] / cost_df['cost_without_p2p'])).clip(lower=-np.inf, upper=100)
-        plt.figure(figsize=(12, 7))
-        plt.plot(episodes, moving_avg(cost_df['cost_reduction_pct'], ma_window), label=f'Cost Reduction % (MA {ma_window})', color='purple', linewidth=2)
-        plt.xlabel("Episode")
-        plt.ylabel("Cost Reduction (%)")
-        plt.title("Total System-Wide Cost Reduction")
-        plt.legend()
-        plt.grid(True)
-        plt.savefig(os.path.join(plots_dir, "4_cost_reduction.png"), dpi=200)
-        plt.close()
-        df_plot = df_final_log[pd.to_numeric(df_final_log['Episode'], errors='coerce').notna()].copy()
-        df_plot['Episode'] = pd.to_numeric(df_plot['Episode'])
-        # 5. Battery Degradation Cost
-        plt.figure(figsize=(12, 7))
-        plt.plot(df_plot["Episode"], moving_avg(df_plot["battery_degradation_cost_total"], ma_window),
-                label=f'Degradation Cost (MA {ma_window})', color='darkgreen', linewidth=2)
-        plt.xlabel("Episode")
-        plt.ylabel("Total Degradation Cost ($)")
-        plt.title("Total Battery Degradation Cost")
-        plt.legend()
-        plt.grid(True)
-        plt.savefig(os.path.join(plots_dir, "5_battery_degradation_cost.png"), dpi=200)
-        plt.close()
-        print(f"All plots have been saved to: {plots_dir}")
-if __name__ == "__main__":
-    main()