Upload 6 files

Browse files

Files changed (6) hide show

utilities/comfort.py +207 -0
utilities/data_generator.py +76 -0
utilities/policy.py +440 -0
utilities/rewards.py +57 -0
utilities/rollout.py +261 -0
utilities/tables.py +111 -0

utilities/comfort.py ADDED Viewed

	@@ -0,0 +1,207 @@

+import math
+import numpy as np
+import pandas as pd
+from typing import Optional
+def _sat_vapor_pressure_kpa(t_c: float) -> float:
+    return 0.61078 * math.exp((17.2694 * t_c) / (t_c + 237.29))
+def pmv_ppd_fanger(
+    ta_c: float,
+    tr_c: Optional[float] = None,
+    rh: float = 50.0,
+    vel: float = 0.1,
+    met: float = 1.2,
+    clo: float = 0.7,
+    wme: float = 0.0,
+):
+    if tr_c is None:
+        tr_c = ta_c
+    ta = ta_c
+    tr = tr_c
+    pa_kpa = rh / 100.0 * _sat_vapor_pressure_kpa(ta)
+    pa = pa_kpa * 1000.0
+    m = met * 58.15
+    w = wme * 58.15
+    mw = m - w
+    icl = 0.155 * clo
+    if icl <= 1e-9:
+        icl = 1e-9
+    if icl <= 0.078:
+        fcl = 1.0 + 1.29 * icl
+    else:
+        fcl = 1.05 + 0.645 * icl
+    hcf = 12.1 * math.sqrt(max(vel, 1e-9))
+    taa = ta + 273.0
+    tra = tr + 273.0
+    tcla = taa + (35.5 - ta) / (3.5 * icl + 0.1)
+    p1 = icl * fcl
+    p2 = p1 * 3.96
+    p3 = p1 * 100.0
+    p4 = p1 * taa
+    p5 = 308.7 - 0.028 * mw + p2 * ((tra / 100.0) ** 4)
+    xn = tcla / 100.0
+    xf = xn
+    eps = 0.00015
+    n = 0
+    while True:
+        xf = (xf + xn) / 2.0
+        tcl = 100.0 * xf - 273.0
+        hcn = 2.38 * (abs(100.0 * xf - taa) ** 0.25)
+        hc = max(hcf, hcn)
+        xn = (p5 + p4 * hc - p2 * (xf**4)) / (100.0 + p3 * hc)
+        n += 1
+        if n > 150 or abs(xn - xf) <= eps:
+            break
+    tcl = 100.0 * xn - 273.0
+    hl1 = 3.05 * 0.001 * (5733.0 - 6.99 * mw - pa)
+    hl2 = 0.42 * (mw - 58.15) if mw > 58.15 else 0.0
+    hl3 = 1.7 * 0.00001 * m * (5867.0 - pa)
+    hl4 = 0.0014 * m * (34.0 - ta)
+    hl5 = 3.96 * fcl * ((xn**4) - ((tra / 100.0) ** 4))
+    hl6 = fcl * hc * (tcl - ta)
+    ts = 0.303 * math.exp(-0.036 * m) + 0.028
+    pmv = ts * (mw - hl1 - hl2 - hl3 - hl4 - hl5 - hl6)
+    ppd = 100.0 - 95.0 * math.exp(-0.03353 * (pmv**4) - 0.2179 * (pmv**2))
+    return pmv, ppd
+# ==========================================
+def ashrae_any(df: pd.DataFrame) -> None:
+    if {"core_ash55_notcomfortable_summer", "core_ash55_notcomfortable_winter"}.issubset(df.columns):
+        # 1. Calculate raw combination
+        raw_val = np.maximum(
+            df["core_ash55_notcomfortable_summer"].astype(float),
+            df["core_ash55_notcomfortable_winter"].astype(float),
+        )
+        if "core_occ_count" in df.columns:
+            is_occupied = (df["core_occ_count"] > 1e-6).astype(float)
+            df["core_ash55_any_fixed"] = raw_val * is_occupied
+        else:
+            df["core_ash55_any_fixed"] = raw_val
+    else:
+        df["core_ash55_any_fixed"] = np.nan
+def add_feature_availability_and_registry(
+    df: pd.DataFrame,
+    base_feature_cols,
+    new_feature_cols,
+) -> None:
+    for c in base_feature_cols + new_feature_cols:
+        df[f"has_{c}"] = c in df.columns
+    present = [c for c in base_feature_cols + new_feature_cols if c in df.columns]
+    df["feature_registry"] = ";".join(present)
+def compute_comfort_metrics_inplace(
+    df: pd.DataFrame,
+    location: str,
+    time_step_hours: float,
+    heating_sp: float,
+    cooling_sp: float,
+    zone_temp_keys,
+    zone_occ_keys,
+    rh_keys,
+) -> None:
+    missing_t = [k for k in zone_temp_keys if k not in df.columns]
+    missing_o = [k for k in zone_occ_keys if k not in df.columns]
+    if missing_t or missing_o:
+        print(f"[{location}] WARNING: missing temp cols: {missing_t}, occ cols: {missing_o}")
+        df["comfort_violation_degCh"] = 0.0
+        df["comfort_violation_fixed_degCh"] = 0.0
+        df["pmv_weighted"] = np.nan
+        df["ppd_weighted"] = np.nan
+        df["rh_weighted"] = np.nan
+        return
+    temps = df[zone_temp_keys].to_numpy(dtype=np.float64)
+    occs = df[zone_occ_keys].to_numpy(dtype=np.float64)
+    total_occ = occs.sum(axis=1)
+    mean_temps = temps.mean(axis=1)
+    comfort_temp = np.where(
+        total_occ > 1e-6,
+        (temps * occs).sum(axis=1) / np.maximum(total_occ, 1e-6),
+        mean_temps,
+    )
+    if all(k in df.columns for k in rh_keys):
+        rhs = df[rh_keys].to_numpy(dtype=np.float64)
+        rh_weighted = np.where(
+            total_occ > 1e-6,
+            (rhs * occs).sum(axis=1) / np.maximum(total_occ, 1e-6),
+            rhs.mean(axis=1),
+        )
+        df["rh_weighted"] = rh_weighted
+    else:
+        df["rh_weighted"] = np.nan
+    RH_series = df["rh_weighted"].to_numpy(dtype=np.float64) if "rh_weighted" in df.columns else None
+    VEL = 0.1
+    MET = 1.2
+    CLO = 0.7
+    WME = 0.0
+    pmv_list = []
+    ppd_list = []
+    for i, t in enumerate(comfort_temp):
+        if total_occ[i] <= 1e-6:
+            pmv_list.append(0.0)
+            ppd_list.append(0.0)
+            continue
+        rh_i = float(RH_series[i]) if RH_series is not None and np.isfinite(RH_series[i]) else 50.0
+        rh_i = float(np.clip(rh_i, 0.0, 100.0))
+        pmv, ppd = pmv_ppd_fanger(
+            ta_c=float(t),
+            tr_c=float(t),
+            rh=rh_i,
+            vel=VEL,
+            met=MET,
+            clo=CLO,
+            wme=WME,
+        )
+        pmv_list.append(pmv)
+        ppd_list.append(ppd)
+    df["pmv_weighted"] = np.array(pmv_list, dtype=np.float64)
+    df["ppd_weighted"] = np.array(ppd_list, dtype=np.float64)
+    FIXED_HEAT = 21.0
+    FIXED_COOL = 24.0
+    fixed_lower = FIXED_HEAT - 0.5
+    fixed_upper = FIXED_COOL + 0.5
+    fixed_dev = np.clip(fixed_lower - comfort_temp, 0.0, None) + np.clip(comfort_temp - fixed_upper, 0.0, None)
+    is_occupied = (total_occ > 1e-6).astype(np.float64)
+    fixed_violation = fixed_dev * time_step_hours * is_occupied
+    df["comfort_violation_degCh"] = fixed_violation
+    df["comfort_violation_fixed_degCh"] = fixed_violation

utilities/data_generator.py ADDED Viewed

	@@ -0,0 +1,76 @@

+import os
+import numpy as np
+import pandas as pd
+# ==========================================
+# 1.(Internal Structure)
+# ==========================================
+TIME_COLS = ["step", "month", "day", "hour", "minute"]
+ENV_COLS = ["out_temp", "out_rh"]
+GLOBAL_REWARD_COLS = ["power_kw"]
+GLOBAL_COLS = TIME_COLS + ENV_COLS + GLOBAL_REWARD_COLS
+ZONE_LIST = ["core", "p1", "p2", "p3", "p4"]
+ZONE_STATE_TEMPLATE = ["temp", "occ", "rh"]
+ZONE_ACTION_TEMPLATE = ["htg", "clg"]
+def get_full_schema():
+    states = []
+    actions = []
+    for name in ZONE_LIST:
+        states += [f"{prefix}_{name}" for prefix in ZONE_STATE_TEMPLATE]
+        actions += [f"{prefix}_{name}" for prefix in ZONE_ACTION_TEMPLATE]
+    return GLOBAL_COLS + states + actions
+# ==========================================
+# 2. PHYSICS UTILITIES
+# ==========================================
+def batch_calculate_rh(temp_array: np.ndarray, dewpoint_array: np.ndarray) -> np.ndarray:
+    """August-Roche-Magnus approximation for Relative Humidity."""
+    A, B = 17.625, 243.04
+    rh = 100 * np.exp((A * dewpoint_array / (B + dewpoint_array)) -
+                      (A * temp_array / (B + temp_array)))
+    return np.clip(rh, 0.0, 100.0)
+# ==========================================
+# 3. MAIN GENERATOR FUNCTION
+# ==========================================
+def save_dt_training_data(df_raw: pd.DataFrame, out_dir: str, location: str):
+    dt_df = pd.DataFrame()
+    dt_df['step'] = df_raw.get('step', range(len(df_raw)))
+    dt_df['month'] = df_raw.get('month', 1)
+    dt_df['day'] = df_raw.get('day_of_month', 1)
+    dt_df['hour'] = df_raw.get('hour', 0)
+    dt_df['minute'] = (dt_df['step'] % 4) * 15
+    dt_df['out_temp'] = df_raw['outdoor_temp']
+    dt_df['out_rh'] = batch_calculate_rh(
+        df_raw['outdoor_temp'].values,
+        df_raw['outdoor_dewpoint'].values
+    )
+    dt_df['power_kw'] = df_raw['elec_power'] / 1000.0
+    for zone in ZONE_LIST:
+        s_name = "core" if zone == "core" else f"perim{zone[-1]}"
+        # States
+        dt_df[f"temp_{zone}"] = df_raw[f"{s_name}_temp"]
+        dt_df[f"occ_{zone}"]  = df_raw[f"{s_name}_occ_count"]
+        dt_df[f"rh_{zone}"]   = df_raw[f"{s_name}_rh"]
+        dt_df[f"htg_{zone}"] = df_raw.get("setpoint_htg", 21.0)
+        dt_df[f"clg_{zone}"] = df_raw.get("setpoint_clg", 24.0)
+    ALL_COLUMNS = get_full_schema()
+    dt_df = dt_df[ALL_COLUMNS]
+    os.makedirs(out_dir, exist_ok=True)
+    filename = f"{location}_ComfortDT_Training.csv"
+    save_path = os.path.join(out_dir, filename)
+    dt_df.to_csv(save_path, index=False)
+    print(f" DT Data Saved: {filename} | Shape: {dt_df.shape}")
+    return save_path

utilities/policy.py ADDED Viewed

	@@ -0,0 +1,440 @@

+# unihvac/policy.py
+from __future__ import annotations
+import os
+import json
+from typing import Any, Dict, Tuple
+import numpy as np
+import torch
+import torch.nn.functional as F
+import requests
+import numpy as np
+import json
+import requests
+import numpy as np
+class RemoteHTTPPolicy:
+    def __init__(self, server_url: str = "http://host.docker.internal:8000"):
+        self.server_url = server_url
+        self.predict_endpoint = f"{server_url}/predict"
+        self.reset_endpoint = f"{server_url}/reset"
+        print(f"[RemotePolicy] Connecting to {self.server_url}...")
+    def reset(self):
+        try:
+            requests.post(self.reset_endpoint, json={"message": "reset"})
+            print("[RemotePolicy] Remote buffer reset.")
+        except Exception as e:
+            print(f"[RemotePolicy] Reset failed: {e}")
+    def act(self, obs, info, step):
+        obs_list = np.array(obs, dtype=np.float32).tolist()
+        payload = {"step": int(step), "obs": obs_list, "info": {}}
+        try:
+            resp = requests.post(self.predict_endpoint, json=payload)
+            resp.raise_for_status()
+            action = np.array(resp.json()["action"], dtype=np.float32)
+            return action, {}, {}
+        except Exception as e:
+            print(f"[RemotePolicy] Error: {e}")
+            return np.array([21.0, 24.0] * 5, dtype=np.float32), {}, {}
+def _get_int_env(name: str, default: int) -> int:
+    try:
+        v = int(os.environ.get(name, str(default)))
+        return v
+    except Exception:
+        return default
+def _get_bool_env(name: str, default: bool) -> bool:
+    v = os.environ.get(name, None)
+    if v is None:
+        return default
+    return v.strip().lower() in ("1", "true", "yes", "y", "on")
+# --------------------------------------------------------------------------------------
+# Policies
+# --------------------------------------------------------------------------------------
+class ConstantSetpointPolicy5Zone:
+    """
+    Constant rule-based controller: 5 zones × (htg, clg) each.
+    Returns action = [htg, clg] * 5.
+    """
+    def __init__(self, heating_sp: float = 21.0, cooling_sp: float = 24.0):
+        self.heating_sp = float(heating_sp)
+        self.cooling_sp = float(cooling_sp)
+        self.action = np.array([self.heating_sp, self.cooling_sp] * 5, dtype=np.float32)
+    def reset(self):
+        return
+    def act(self, obs, info, step):
+        return self.action.copy(), {}, {}
+class DecisionTransformerPolicy5Zone:
+    """
+    CPU-safe DT policy with robust observation mapping and deadband protection.
+    """
+    def __init__(
+        self,
+        ckpt_path: str,
+        model_config_path: str,
+        norm_stats_path: str,
+        context_len: int,
+        max_tokens_per_step: int,
+        device: str = "cpu",
+        temperature: float = 0.5,
+    ):
+        import dataloader as dl
+        from embeddings import GeneralistComfortDT
+        # --- 1. CPU Settings ---
+        torch.set_grad_enabled(False)
+        torch.backends.mha.set_fastpath_enabled(True)
+        torch.backends.mkldnn.enabled = _get_bool_env("DT_MKLDNN", True)
+        import multiprocessing
+        avail = multiprocessing.cpu_count()
+        dt_threads = _get_int_env("DT_NUM_THREADS", min(18, avail))
+        torch.set_num_threads(dt_threads)
+        torch.set_num_interop_threads(1)
+        self.dl = dl
+        self.device = torch.device("cpu")
+        self.temperature = float(temperature)
+        # --- 2. Load Model ---
+        with open(model_config_path, "r") as f:
+            cfg = json.load(f)
+        cfg["CONTEXT_LEN"] = int(context_len)
+        self.L = int(context_len)
+        self.K = int(max_tokens_per_step)
+        self.model = GeneralistComfortDT(cfg).to(self.device)
+        ckpt = torch.load(ckpt_path, map_location="cpu")
+        self.model.load_state_dict(ckpt["model"], strict=True)
+        self.model.eval()
+                # --- 3. Load Stats ---
+        z = np.load(norm_stats_path)
+        self.obs_mean = z["obs_mean"].astype(np.float32)
+        self.obs_std = z["obs_std"].astype(np.float32)
+        self.act_mean = z["act_mean"].astype(np.float32)
+        self.act_std = z["act_std"].astype(np.float32)
+        self.max_return = float(z["max_return"][0]) if "max_return" in z else 1.0
+        self.rtg_scale_mode = "max_return"
+        self.rtg_constant_div = 1.0
+        self.desired_rtg_raw = -0.5
+        self.prev_action = np.array([21.0, 24.0] * 5, dtype=np.float32)
+        # --- 4. Define Keys (The Fix) ---
+        self.env_keys_order = [
+            'month', 'day_of_month', 'hour',
+            'outdoor_temp', 'core_temp', 'perim1_temp', 'perim2_temp', 'perim3_temp', 'perim4_temp',
+            'elec_power',
+            'core_occ_count', 'perim1_occ_count', 'perim2_occ_count', 'perim3_occ_count', 'perim4_occ_count',
+            'outdoor_dewpoint', 'outdoor_wetbulb',
+            'core_rh', 'perim1_rh', 'perim2_rh', 'perim3_rh', 'perim4_rh',
+            'core_ash55_notcomfortable_summer', 'core_ash55_notcomfortable_winter', 'core_ash55_notcomfortable_any',
+            'p1_ash55_notcomfortable_any', 'p2_ash55_notcomfortable_any', 'p3_ash55_notcomfortable_any', 'p4_ash55_notcomfortable_any',
+            'total_electricity_HVAC'
+        ]
+        self.model_state_keys = [
+            'outdoor_temp', 'core_temp', 'perim1_temp', 'perim2_temp', 'perim3_temp', 'perim4_temp',
+            'elec_power',
+            'core_occ_count', 'perim1_occ_count', 'perim2_occ_count', 'perim3_occ_count', 'perim4_occ_count',
+            'outdoor_dewpoint', 'outdoor_wetbulb',
+            'core_rh', 'perim1_rh', 'perim2_rh', 'perim3_rh', 'perim4_rh',
+            'core_ash55_notcomfortable_summer', 'core_ash55_notcomfortable_winter', 'core_ash55_notcomfortable_any',
+            'p1_ash55_notcomfortable_any', 'p2_ash55_notcomfortable_any', 'p3_ash55_notcomfortable_any', 'p4_ash55_notcomfortable_any',
+            'month', 'hour'
+        ]
+        self.obs_indices = []
+        for k in self.model_state_keys:
+            try:
+                self.obs_indices.append(self.env_keys_order.index(k))
+            except ValueError:
+                print(f"Key {k} missing")
+                self.obs_indices.append(0) # Fallback
+        self.obs_indices = np.array(self.obs_indices, dtype=np.int64)
+        self.action_keys = [
+            "htg_core", "clg_core", "htg_p1", "clg_p1", "htg_p2", "clg_p2",
+            "htg_p3", "clg_p3", "htg_p4", "clg_p4",
+        ]
+        # Meta info
+        self.s_meta = [self.dl.parse_feature_identity(k, is_action=False) for k in self.model_state_keys]
+        self.a_meta = [self.dl.parse_feature_identity(k, is_action=True) for k in self.action_keys]
+        self.num_act = min(len(self.a_meta), self.K)
+        self.num_state = min(len(self.s_meta), self.K - self.num_act)
+        # --- 5. Precompute Token Layouts ---
+        self.row_feat_ids = np.zeros((self.K,), dtype=np.int64)
+        self.row_zone_ids = np.zeros((self.K,), dtype=np.int64)
+        self.row_attn = np.zeros((self.K,), dtype=np.int64)
+        self.row_feat_vals = np.zeros((self.K,), dtype=np.float32)
+        if self.num_state > 0:
+            s_meta = self.s_meta[:self.num_state]
+            self.row_feat_ids[:self.num_state] = np.array([m[0] for m in s_meta], dtype=np.int64)
+            self.row_zone_ids[:self.num_state] = np.array([m[1] for m in s_meta], dtype=np.int64)
+            self.row_attn[:self.num_state] = 1
+        if self.num_act > 0:
+            start = self.num_state
+            end = start + self.num_act
+            a_meta = self.a_meta[:self.num_act]
+            self.row_feat_ids[start:end] = np.array([m[0] for m in a_meta], dtype=np.int64)
+            self.row_zone_ids[start:end] = np.array([m[1] for m in a_meta], dtype=np.int64)
+            self.row_attn[start:end] = 1
+        # Context Dimension from Config
+        self.context_dim = cfg.get("CONTEXT_DIM", 10)
+        # Buffers
+        self.buf_feature_ids = torch.zeros((self.L, self.K), dtype=torch.long, device=self.device)
+        self.buf_feature_vals = torch.zeros((self.L, self.K), dtype=torch.float32, device=self.device)
+        self.buf_zone_ids = torch.zeros((self.L, self.K), dtype=torch.long, device=self.device)
+        self.buf_attn = torch.zeros((self.L, self.K), dtype=torch.long, device=self.device)
+        self.buf_rtg = torch.zeros((self.L,), dtype=torch.float32, device=self.device)
+        # Inputs
+        self.t_feature_ids = torch.zeros((1, self.L, self.K), dtype=torch.long, device=self.device)
+        self.t_feature_vals = torch.zeros((1, self.L, self.K), dtype=torch.float32, device=self.device)
+        self.t_zone_ids = torch.zeros((1, self.L, self.K), dtype=torch.long, device=self.device)
+        self.t_attn = torch.zeros((1, self.L, self.K), dtype=torch.long, device=self.device)
+        self.t_rtg = torch.zeros((1, self.L), dtype=torch.float32, device=self.device)
+        self.ptr = 0
+        self.filled = 0
+        #Context Buffer
+        self.t_context = torch.zeros((1, self.context_dim), dtype=torch.float32, device=self.device)
+    def reset(self):
+        self.buf_feature_ids.zero_()
+        self.buf_feature_vals.zero_()
+        self.buf_zone_ids.zero_()
+        self.buf_attn.zero_()
+        self.buf_rtg.zero_()
+        self.t_feature_ids.zero_()
+        self.t_feature_vals.zero_()
+        self.t_zone_ids.zero_()
+        self.t_attn.zero_()
+        self.t_rtg.zero_()
+        self.prev_action = np.array([21.0, 24.0] * 5, dtype=np.float32)
+        self.ptr = 0
+        self.filled = 0
+    def _decode_bin_to_setpoint(self, bin_id: int, key: str) -> float:
+        if "clg" in key.lower() or "cool" in key.lower():
+            lo, hi = self.dl.CLG_LOW, self.dl.CLG_HIGH
+        else:
+            lo, hi = self.dl.HTG_LOW, self.dl.HTG_HIGH
+        x = float(bin_id) / float(self.dl.NUM_ACTION_BINS - 1)
+        return lo + x * (hi - lo)
+    def _scale_rtg(self, rtg_raw: float) -> float:
+        if self.rtg_scale_mode == "max_return":
+            scale = max(self.max_return, 1e-6)
+            return float(rtg_raw) / scale
+        return float(rtg_raw) / float(self.rtg_constant_div)
+    def _write_model_inputs_from_ring(self):
+        if self.filled < self.L:
+            start = self.L - self.filled
+            self.t_feature_ids.zero_(); self.t_feature_vals.zero_()
+            self.t_zone_ids.zero_(); self.t_attn.zero_(); self.t_rtg.zero_()
+            self.t_feature_ids[0, start:].copy_(self.buf_feature_ids[: self.filled])
+            self.t_feature_vals[0, start:].copy_(self.buf_feature_vals[: self.filled])
+            self.t_zone_ids[0, start:].copy_(self.buf_zone_ids[: self.filled])
+            self.t_attn[0, start:].copy_(self.buf_attn[: self.filled])
+            self.t_rtg[0, start:].copy_(self.buf_rtg[: self.filled])
+            return
+        p = self.ptr
+        n1 = self.L - p
+        self.t_feature_ids[0, :n1].copy_(self.buf_feature_ids[p:])
+        self.t_feature_vals[0, :n1].copy_(self.buf_feature_vals[p:])
+        self.t_zone_ids[0, :n1].copy_(self.buf_zone_ids[p:])
+        self.t_attn[0, :n1].copy_(self.buf_attn[p:])
+        self.t_rtg[0, :n1].copy_(self.buf_rtg[p:])
+        self.t_feature_ids[0, n1:].copy_(self.buf_feature_ids[:p])
+        self.t_feature_vals[0, n1:].copy_(self.buf_feature_vals[:p])
+        self.t_zone_ids[0, n1:].copy_(self.buf_zone_ids[:p])
+        self.t_attn[0, n1:].copy_(self.buf_attn[:p])
+        self.t_rtg[0, n1:].copy_(self.buf_rtg[:p])
+    def act(self, obs: Any, info: Dict[str, Any], step: int) -> Tuple[np.ndarray, Dict, Dict]:
+        # Map raw obs (30 items) model obs (28 items)
+        obs_raw = np.asarray(obs, dtype=np.float32)
+        env_map = dict(zip(self.env_keys_order, obs_raw))
+        obs_ordered = np.array([env_map.get(k, 0.0) for k in self.model_state_keys], dtype=np.float32)
+        # --- 2. Normalization ---
+        obs_norm = obs_ordered.copy()
+        D = min(len(self.obs_mean), obs_norm.shape[0])
+        eps = 1e-6
+        obs_norm[:D] = (obs_norm[:D] - self.obs_mean[:D]) / (self.obs_std[:D] + eps)
+        # =========================================================================
+        # 3. CALCULATE CONTEXT VECTOR (Dynamic)
+        # =========================================================================
+        out_temp = env_map.get('outdoor_temp', 0.0)
+        out_dew = env_map.get('outdoor_dewpoint', 0.0)
+        hour = env_map.get('hour', 0.0)
+        month = env_map.get('month', 1.0)
+        occ_total = 0.0
+        occ_keys = ['core_occ_count', 'perim1_occ_count', 'perim2_occ_count', 'perim3_occ_count', 'perim4_occ_count']
+        for k in occ_keys:
+            if env_map.get(k, 0.0) > 0.5: # Binary occupancy check
+                occ_total += 1.0
+        occ_frac = occ_total / 5.0
+        hr_sin = np.sin(2 * np.pi * hour / 24.0)
+        hr_cos = np.cos(2 * np.pi * hour / 24.0)
+        mth_norm = month - 1.0
+        mth_sin = np.sin(2 * np.pi * mth_norm / 12.0)
+        mth_cos = np.cos(2 * np.pi * mth_norm / 12.0)
+        ctx_vec = np.array([
+            out_temp, 0.0,       # Temp Mean, Temp Std
+            out_dew,             # Dewpoint
+            occ_frac,            # Occ Fraction
+            hr_sin, hr_cos,      # Hour
+            mth_sin, mth_cos,    # Month
+            0.0, 0.0             # Spares
+        ], dtype=np.float32)
+        self.t_context[0].copy_(torch.from_numpy(ctx_vec))
+        act_norm = self.prev_action.copy()
+        A = min(len(self.act_mean), act_norm.shape[0])
+        act_norm[:A] = (act_norm[:A] - self.act_mean[:A]) / self.act_std[:A]
+        self.row_feat_vals.fill(0.0)
+        if self.num_state > 0:
+            self.row_feat_vals[: self.num_state] = obs_norm[: self.num_state]
+        if self.num_act > 0:
+            s, e = self.num_state, self.num_state + self.num_act
+            if step < 5:
+                good_action = np.array([22.0, 25.0] * 5, dtype=np.float32)
+                good_norm = good_action.copy()
+                A_len = min(len(self.act_mean), good_norm.shape[0])
+                good_norm[:A_len] = (good_norm[:A_len] - self.act_mean[:A_len]) / self.act_std[:A_len]
+                self.row_feat_vals[s:e] = good_norm[: self.num_act]
+            else:
+                self.row_feat_vals[s:e] = act_norm[: self.num_act]
+        i = self.ptr
+        self.buf_feature_ids[i].copy_(torch.as_tensor(self.row_feat_ids, dtype=torch.long))
+        self.buf_zone_ids[i].copy_(torch.as_tensor(self.row_zone_ids, dtype=torch.long))
+        self.buf_attn[i].copy_(torch.as_tensor(self.row_attn, dtype=torch.long))
+        self.buf_feature_vals[i].copy_(torch.as_tensor(self.row_feat_vals, dtype=torch.float32))
+        self.buf_rtg[i] = float(self._scale_rtg(self.desired_rtg_raw))
+        self.ptr = (self.ptr + 1) % self.L
+        self.filled = min(self.filled + 1, self.L)
+        self._write_model_inputs_from_ring()
+        with torch.inference_mode():
+            with torch.amp.autocast(device_type="cpu", dtype=torch.bfloat16):
+                out = self.model(self.t_feature_ids, self.t_feature_vals, self.t_zone_ids, self.t_attn, rtg=self.t_rtg, context=self.t_context)
+        logits = out["action_logits"]
+        last = logits[0, -1] # [K, n_bins]
+        s, e = self.num_state, self.num_state + self.num_act
+        temp = max(self.temperature, 1e-4)
+        raw_logits = last[s:e]
+        if torch.isnan(raw_logits).any() or torch.isinf(raw_logits).any():
+            raw_logits = torch.nan_to_num(raw_logits, nan=0.0, posinf=10.0, neginf=-10.0)
+        # 1. Apply Temperature
+        action_logits = raw_logits / temp
+        # 2. Convert to Probabilities
+        action_probs = F.softmax(action_logits, dim=-1) # [Num_Actions, n_bins]
+        if torch.isnan(action_probs).any() or (action_probs < 0).any():
+            action_probs = torch.ones_like(action_probs) / action_probs.size(-1)
+        # 3. Sample from distribution
+        try:
+            pred_bins = torch.multinomial(action_probs, num_samples=1).flatten().cpu().numpy().astype(np.int64)
+        except RuntimeError as err:
+            pred_bins = torch.argmax(action_probs, dim=-1).cpu().numpy().astype(np.int64)
+        action = self.prev_action.copy()
+        for j in range(self.num_act):
+            action[j] = self._decode_bin_to_setpoint(int(pred_bins[j]), self.action_keys[j])
+        for j, k in enumerate(self.action_keys):
+            if "clg" in k.lower():
+                action[j] = float(np.clip(action[j], self.dl.CLG_LOW, self.dl.CLG_HIGH))
+            else:
+                action[j] = float(np.clip(action[j], self.dl.HTG_LOW, self.dl.HTG_HIGH))
+        DEADBAND_GAP = 3.0
+        for z in range(5):
+            h_idx = 2 * z
+            c_idx = 2 * z + 1
+            if action[c_idx] < action[h_idx] + DEADBAND_GAP:
+                action[c_idx] = min(self.dl.CLG_HIGH, action[h_idx] + DEADBAND_GAP)
+                if action[c_idx] < action[h_idx] + DEADBAND_GAP:
+                     action[h_idx] = max(self.dl.HTG_LOW, action[c_idx] - DEADBAND_GAP)
+        if step < 5 or step % 1000 == 0:
+            print(f"[DT] Step {step} Raw Bins: {pred_bins}")
+            h_val = self._decode_bin_to_setpoint(int(pred_bins[0]), "htg_core")
+            c_val = self._decode_bin_to_setpoint(int(pred_bins[1]), "clg_core")
+            print(f"[DT] Step {step} Decoded Core: Heat {h_val:.2f} | Cool {c_val:.2f}")
+        self.prev_action = action
+        return action, {}, {}
+def make_policy(policy_type: str, **kwargs):
+    policy_type = (policy_type or "").lower().strip()
+    if policy_type == "dt":
+        return DecisionTransformerPolicy5Zone(
+            ckpt_path=kwargs["ckpt_path"],
+            model_config_path=kwargs["model_config_path"],
+            norm_stats_path=kwargs["norm_stats_path"],
+            context_len=kwargs["context_len"],
+            max_tokens_per_step=kwargs["max_tokens_per_step"],
+            device=kwargs.get("device", "cpu"),
+            temperature=kwargs.get("temperature", 0.8),
+        )
+    raise ValueError(f"Unknown policy_type={policy_type}.")

utilities/rewards.py ADDED Viewed

	@@ -0,0 +1,57 @@

+# unihvac/rewards.py
+from __future__ import annotations
+from dataclasses import dataclass, asdict
+from typing import Dict, Any, Tuple, Optional
+import numpy as np
+import pandas as pd
+@dataclass(frozen=True)
+class RewardConfig:
+    version: str = "v_ashrae"
+    prefer_step_kwh_cols: Tuple[str, ...] = (
+        "HVAC_elec_kWh_step",
+        "hvac_kWh_step",
+        "elec_kWh_step",
+    )
+    elec_power_col: str = "elec_power"
+    comfort_col: str = "ppd_weighted"
+    w_energy: float = 1.0
+    w_comfort: float = 0.1
+def config_to_meta(cfg: RewardConfig) -> Dict[str, Any]:
+    return asdict(cfg)
+def compute_reward_components(df: pd.DataFrame, timestep_hours: float, cfg: RewardConfig) -> Tuple[np.ndarray, np.ndarray]:
+    if df is None or len(df) == 0:
+        return np.zeros((0,), dtype=np.float32), np.zeros((0,), dtype=np.float32)
+    energy_kwh = np.zeros(len(df), dtype=np.float32)
+    found_energy = False
+    for col in cfg.prefer_step_kwh_cols:
+        if col in df.columns:
+            energy_kwh = df[col].fillna(0.0).astype(np.float32).values
+            found_energy = True
+            break
+    if not found_energy and cfg.elec_power_col in df.columns:
+        power_w = df[cfg.elec_power_col].fillna(0.0).astype(np.float32).values
+        energy_kwh = (power_w / 1000.0) * timestep_hours
+    comfort_val = np.zeros(len(df), dtype=np.float32)
+    if cfg.comfort_col in df.columns:
+        comfort_val = df[cfg.comfort_col].fillna(0.0).astype(np.float32).values
+    r_energy = -1.0 * energy_kwh
+    r_comfort = -1.0 * comfort_val
+    return r_energy.astype(np.float32), r_comfort.astype(np.float32)
+def compute_terminals(df: pd.DataFrame) -> np.ndarray:
+    T = 0 if df is None else len(df)
+    terminals = np.zeros((T,), dtype=np.int8)
+    if T > 0:
+        terminals[-1] = 1
+    return terminals

utilities/rollout.py ADDED Viewed

	@@ -0,0 +1,261 @@

+# unihvac/rollout.py
+from __future__ import annotations
+from typing import Callable, Dict, Any, Optional, List, Tuple
+import gymnasium as gym
+import numpy as np
+import pandas as pd
+import sinergym
+from unihvac.comfort import (
+    fix_ashrae_any_fixed,
+    quick_stats,
+    add_feature_availability_and_registry,
+    print_feature_availability,
+    compute_comfort_metrics_inplace,
+)
+ZONE_TEMP_KEYS = ["core_temp", "perim1_temp", "perim2_temp", "perim3_temp", "perim4_temp"]
+ZONE_OCC_KEYS  = ["core_occ_count","perim1_occ_count","perim2_occ_count","perim3_occ_count","perim4_occ_count"]
+RH_KEYS        = ["core_rh","perim1_rh","perim2_rh","perim3_rh","perim4_rh"]
+BASE_FEATURE_COLS = [
+    "outdoor_temp","core_temp","perim1_temp","perim2_temp","perim3_temp","perim4_temp",
+    "elec_power",
+    "core_occ_count","perim1_occ_count","perim2_occ_count","perim3_occ_count","perim4_occ_count",
+]
+NEW_FEATURE_COLS = [
+    "outdoor_dewpoint","outdoor_wetbulb",
+    "core_rh","perim1_rh","perim2_rh","perim3_rh","perim4_rh",
+    "core_ash55_notcomfortable_summer","core_ash55_notcomfortable_winter","core_ash55_notcomfortable_any",
+    "p1_ash55_notcomfortable_any","p2_ash55_notcomfortable_any","p3_ash55_notcomfortable_any","p4_ash55_notcomfortable_any",
+]
+ASH_COLS = [
+    "core_ash55_notcomfortable_summer",
+    "core_ash55_notcomfortable_winter",
+    "core_ash55_any_fixed",
+    "p1_ash55_notcomfortable_any",
+    "p2_ash55_notcomfortable_any",
+    "p3_ash55_notcomfortable_any",
+    "p4_ash55_notcomfortable_any",
+]
+PolicyFn = Callable[[np.ndarray, Dict[str, Any], int], np.ndarray]
+class DummyReward:
+    def __init__(self, *args, **kwargs):
+        pass
+    def __call__(self, obs_dict):
+        return 0.0, {}
+def make_env_officesmall_5zone(
+    building_path: str,
+    weather_path: str,
+    variables: Dict[str, tuple],
+    actuators: Dict[str, tuple],
+    action_low: float = 12.0,
+    action_high: float = 30.0,
+    action_dim: int = 10,
+    reward=None,
+):
+    new_action_space = gym.spaces.Box(
+        low=action_low, high=action_high, shape=(action_dim,), dtype=np.float32
+    )
+    if reward is None:
+        reward = DummyReward
+    env = gym.make(
+        "Eplus-5zone-mixed-continuous-stochastic-v1",
+        building_file=building_path,
+        weather_files=[weather_path],
+        variables=variables,
+        actuators=actuators,
+        action_space=new_action_space,
+        reward=reward,
+    )
+    obs_keys = env.unwrapped.observation_variables
+    print("ENVIRONMENT VARIABLES:", obs_keys)
+    obs_keys = env.unwrapped.observation_variables
+    month_idx = obs_keys.index("month") if "month" in obs_keys else None
+    return env, obs_keys, month_idx
+def rollout_episode(
+    env,
+    policy_fn: PolicyFn,
+    obs_keys: List[str],
+    month_idx: Optional[int],
+    max_steps: Optional[int] = None,
+) -> pd.DataFrame:
+    obs, info = env.reset()
+    data_log = []
+    terminated = False
+    truncated = False
+    step = 0
+    while not (terminated or truncated):
+        if max_steps is not None and step >= max_steps:
+            break
+        action = policy_fn(obs, info, step)
+        htg_sp = float(action[0])
+        clg_sp = float(action[1])
+        next_obs, _, terminated, truncated, info = env.step(action)
+        month_val = next_obs[month_idx] if month_idx is not None else info.get("month", np.nan)
+        row = {"step": step, "month": month_val}
+        row["setpoint_htg"] = htg_sp
+        row["setpoint_clg"] = clg_sp
+        row.update(dict(zip(obs_keys, next_obs)))
+        data_log.append(row)
+        obs = next_obs
+        step += 1
+    df = pd.DataFrame(data_log)
+    if "month" in df.columns:
+        df["month"] = df["month"].round().astype(int)
+    return df
+def add_energy_columns_inplace(
+    df: pd.DataFrame,
+    timestep_hours: float,
+    elec_col: str = "elec_power",
+) -> None:
+    if elec_col in df.columns:
+        df["elec_power_kw"] = df[elec_col] / 1000.0
+        df["elec_energy_kwh"] = df["elec_power_kw"] * timestep_hours
+    else:
+        df["elec_power_kw"] = np.nan
+        df["elec_energy_kwh"] = np.nan
+def postprocess_comfort_inplace(
+    df: pd.DataFrame,
+    location: str,
+    timestep_hours: float,
+    heating_sp: float,
+    cooling_sp: float,
+    verbose: bool = True,
+) -> None:
+    fix_ashrae_any_fixed(df)
+    if verbose:
+        quick_stats(df, ASH_COLS, "ASHRAE55 Not Comfortable (raw timestep values)")
+    add_feature_availability_and_registry(df, BASE_FEATURE_COLS, NEW_FEATURE_COLS)
+    if verbose:
+        print_feature_availability(df, location)
+    compute_comfort_metrics_inplace(
+        df=df,
+        location=location,
+        time_step_hours=timestep_hours,
+        heating_sp=heating_sp,
+        cooling_sp=cooling_sp,
+        zone_temp_keys=ZONE_TEMP_KEYS,
+        zone_occ_keys=ZONE_OCC_KEYS,
+        rh_keys=RH_KEYS,
+    )
+def run_rollout_to_df(
+    *,
+    building_path: str,
+    weather_path: str,
+    variables: Dict[str, tuple],
+    actuators: Dict[str, tuple],
+    policy_fn: PolicyFn,
+    location: str,
+    timestep_hours: float,
+    heating_sp: float,
+    cooling_sp: float,
+    reward=None,
+    max_steps: Optional[int] = None,
+    verbose: bool = True,
+) -> pd.DataFrame:
+    env = None
+    try:
+        env, obs_keys, month_idx = make_env_officesmall_5zone(
+            building_path=building_path,
+            weather_path=weather_path,
+            variables=variables,
+            actuators=actuators,
+            reward=reward,
+        )
+        df = rollout_episode(
+            env=env,
+            policy_fn=policy_fn,
+            obs_keys=list(obs_keys),
+            month_idx=month_idx,
+            max_steps=max_steps,
+        )
+    finally:
+        if env is not None:
+            env.close()
+    add_energy_columns_inplace(df, timestep_hours=timestep_hours)
+    postprocess_comfort_inplace(
+        df=df,
+        location=location,
+        timestep_hours=timestep_hours,
+        heating_sp=heating_sp,
+        cooling_sp=cooling_sp,
+        verbose=verbose,
+    )
+    return df
+# ======================================================================================
+# INDEX MAPPING (Sinergym / OfficeSmall 5-Zone)
+#
+#   00: month
+#   01: day_of_month
+#   02: hour
+#   03: outdoor_temp
+#   04: core_temp
+#   05: perim1_temp
+#   06: perim2_temp
+#   07: perim3_temp
+#   08: perim4_temp
+#   09: elec_power
+#   10: core_occ_count
+#   11: perim1_occ_count
+#   12: perim2_occ_count
+#   13: perim3_occ_count
+#   14: perim4_occ_count
+#   15: outdoor_dewpoint
+#   16: outdoor_wetbulb
+#   17: core_rh
+#   18: perim1_rh
+#   19: perim2_rh
+#   20: perim3_rh
+#   21: perim4_rh
+#   22: core_ash55_notcomfortable_summer
+#   23: core_ash55_notcomfortable_winter
+#   24: core_ash55_notcomfortable_any
+#   25: p1_ash55_notcomfortable_any
+#   26: p2_ash55_notcomfortable_any
+#   27: p3_ash55_notcomfortable_any
+#   28: p4_ash55_notcomfortable_any
+#   29: total_electricity_HVAC
+#
+#
+# ======================================================================================

utilities/tables.py ADDED Viewed

	@@ -0,0 +1,111 @@

+# unihvac/tables.py
+from __future__ import annotations
+import pandas as pd
+import numpy as np
+def print_monthly_tables_split(df: pd.DataFrame, location: str, time_step_hours: float):
+    """
+    Table 1: Monthly HVAC electricity + temps
+    Table 2: Monthly occupancy
+    """
+    drop_cols = [c for c in df.columns if c.startswith("has_") or c == "feature_registry"]
+    df_clean = df.drop(columns=drop_cols, errors="ignore").copy()
+    if "month" not in df_clean.columns:
+        return
+    df_clean["month"] = df_clean["month"].round().astype(int)
+    # 1. Electricity / Energy Calculation
+    energy_cols = []
+    peak_cols = []
+    if "elec_power" in df_clean.columns:
+        if "elec_power_kw" not in df_clean.columns:
+            df_clean["elec_power_kw"] = df_clean["elec_power"] / 1000.0
+        if "elec_energy_kwh" not in df_clean.columns:
+            df_clean["elec_energy_kwh"] = df_clean["elec_power_kw"] * time_step_hours
+        energy_cols.append("elec_energy_kwh")
+        peak_cols.append("elec_power_kw")
+    # 2. Temperature Aggregation
+    temp_cols = [c for c in ["outdoor_temp", "core_temp", "perim1_temp", "perim2_temp", "perim3_temp", "perim4_temp"]
+                 if c in df_clean.columns]
+    agg1 = {c: "sum" for c in energy_cols}
+    agg1.update({c: "max" for c in peak_cols})
+    agg1.update({c: "mean" for c in temp_cols})
+    tbl1 = df_clean.groupby("month").agg(agg1).sort_index()
+    # 3. Occupancy Aggregation
+    occ_cols = [c for c in df_clean.columns if c.endswith("_occ_count")]
+    tbl2 = df_clean.groupby("month")[occ_cols].mean().sort_index() if occ_cols else pd.DataFrame()
+    if not tbl2.empty:
+        tbl2["occ_mean_total"] = tbl2.sum(axis=1)
+    print("\n" + "=" * 110)
+    print(f"MONTHLY ELECTRICITY + TEMPERATURE — {location}")
+    print("=" * 110)
+    print(tbl1.round(2).to_string())
+    print("\n" + "=" * 110)
+    print(f"MONTHLY OCCUPANCY — {location}")
+    print("=" * 110)
+    print(tbl2.round(3).to_string())
+    print("=" * 110 + "\n")
+def print_monthly_tables_extra(df: pd.DataFrame, location: str) -> None:
+    d = df.copy()
+    if "month" not in d.columns:
+        return
+    d["month"] = d["month"].round().astype(int)
+    violation_cols = [c for c in ["comfort_violation_degCh", "comfort_violation_fixed_degCh"] if c in d.columns]
+    tbl_sums = d.groupby("month")[violation_cols].sum()
+    occ_cols = [c for c in d.columns if c.endswith("_occ_count")]
+    total_occ = d[occ_cols].sum(axis=1)
+    is_occupied = total_occ > 1e-6
+    d_occ = d[is_occupied].copy()
+    def person_weighted_ppd(group):
+        occ = group[occ_cols].sum(axis=1)
+        raw_ppd = group["ppd_weighted"]
+        return (raw_ppd * occ).sum() / occ.sum() if occ.sum() > 0 else np.nan
+    if not d_occ.empty and "ppd_weighted" in d_occ.columns:
+        ppd_monthly = d_occ.groupby("month", group_keys=False).apply(person_weighted_ppd)
+        ppd_monthly = ppd_monthly.clip(lower=5.0)
+        pmv_monthly = d_occ.groupby("month")["pmv_weighted"].mean()
+        rh_monthly = d_occ.groupby("month")["rh_weighted"].mean()
+        tbl_means = pd.DataFrame({
+            "ppd_weighted": ppd_monthly,
+            "pmv_weighted": pmv_monthly,
+            "rh_weighted_%": rh_monthly
+        })
+        tbl3a = pd.concat([tbl_sums, tbl_means], axis=1).sort_index()
+    else:
+        tbl3a = tbl_sums
+    outdoor_vars = [c for c in ["outdoor_temp", "outdoor_dewpoint", "outdoor_wetbulb"] if c in d.columns]
+    tbl3b = d.groupby("month")[outdoor_vars].mean().sort_index() if outdoor_vars else None
+    print("\n" + "=" * 110)
+    print(f"MONTHLY COMFORT OUTCOMES (Occupancy Weighted) — {location}")
+    print("=" * 110)
+    print(tbl3a.round(3).to_string())
+    print("=" * 110)
+    if tbl3b is not None:
+        print("\n" + "=" * 110)
+        print(f"MONTHLY OUTDOOR CONDITIONS — {location}")
+        print("=" * 110)
+        print(tbl3b.round(3).to_string())
+        print("=" * 110)