# tools/ts_preprocess.py import pandas as pd import numpy as np from datetime import date from typing import Tuple # ------------------------- # Cash-flow projections # ------------------------- def estimated_monthly_interest(principal: float, annual_rate: float, n_months: int) -> float: return principal * (annual_rate / 12.0) * n_months def project_deposit(principal: float, annual_rate: float, months: int) -> pd.DataFrame: start = pd.Timestamp(date.today()).normalize().replace(day=1) periods = pd.period_range(start=start, periods=max(int(months), 1), freq="M") df = pd.DataFrame({"period": periods.to_timestamp()}) df["interest"] = principal * (annual_rate / 12.0) df["principal_cf"] = 0.0 df.loc[df.index[-1], "principal_cf"] = principal df["cash_flow"] = df["interest"] + df["principal_cf"] return df def project_asset(principal: float, annual_rate: float, months: int) -> pd.DataFrame: m = max(int(months), 1) r = annual_rate / 12.0 if r == 0.0: pmt = principal / m else: pmt = principal * (r * (1 + r) ** m) / ((1 + r) ** m - 1) rows = [] start = pd.Timestamp(date.today()).normalize().replace(day=1) bal = principal for i in range(1, m + 1): interest = bal * r principal_cf = pmt - interest bal = max(0.0, bal - principal_cf) rows.append({ "period": start + pd.offsets.MonthEnd(i), "interest": float(interest), "principal_cf": float(principal_cf), "cash_flow": float(pmt), "balance": float(bal) }) return pd.DataFrame(rows) def liquidity_gap(cf: pd.DataFrame) -> pd.DataFrame: """ Input columns: period, product, cash_flow """ cf = cf.copy() cf["bucket"] = pd.PeriodIndex(cf["period"], freq="M").astype(str) piv = cf.pivot_table(index="bucket", columns="product", values="cash_flow", aggfunc="sum", fill_value=0.0) # assets - liabilities(FD) piv["gap"] = piv.get("assets", 0.0) - piv.get("fd", 0.0) piv["cumulative_gap"] = piv["gap"].cumsum() return piv.reset_index() # ------------------------- # Adapter + main builder # ------------------------- REQUIRED_MASTER_COLS = {"Portfolio_value", "Interest_rate", "months", "product"} def _adapt_masterdataset(df: pd.DataFrame) -> pd.DataFrame: """ Accepts rows from main.masterdataset_v and returns a normalized frame with a monthly projection per row. """ df = df.copy() assert REQUIRED_MASTER_COLS.issubset(df.columns), ( f"DataFrame must include {REQUIRED_MASTER_COLS}, got {set(df.columns)}" ) all_cf = [] for _, r in df.iterrows(): principal = float(r["Portfolio_value"]) rate = float(r["Interest_rate"]) months = int(r["months"]) if pd.notna(r["months"]) else max(int((r.get("days_to_maturity", 0) or 0) // 30), 1) prod = str(r["product"]).lower().strip() if prod == "fd": cf = project_deposit(principal, rate, months) else: cf = project_asset(principal, rate, months) # carry attributes cf["product"] = prod cf["contract_number"] = r.get("contract_number", None) cf["segments"] = r.get("segments", None) cf["currency"] = r.get("currency", None) cf["board_currency"] = r.get("board_currency", None) cf["Portfolio_value"] = principal cf["Interest_rate"] = rate all_cf.append(cf) return pd.concat(all_cf, ignore_index=True) if all_cf else pd.DataFrame( columns=["period", "interest", "principal_cf", "cash_flow", "product", "contract_number", "segments", "currency", "board_currency", "Portfolio_value", "Interest_rate"] ) def build_timeseries(df: pd.DataFrame) -> Tuple[pd.DataFrame, pd.DataFrame]: """ Main entrypoint used by app.py: - If frame matches masterdataset_v, generate cashflows - Compute liquidity gap on the result Returns: (cashflows_df, gap_df) """ if REQUIRED_MASTER_COLS.issubset(df.columns): cf = _adapt_masterdataset(df) else: # if already a projected cashflow frame (period/product/cash_flow), pass-through if {"period", "product", "cash_flow"}.issubset(df.columns): cf = df.copy() else: # unsupported schema return pd.DataFrame(), pd.DataFrame() gap = liquidity_gap(cf[["period", "product", "cash_flow"]]) return cf, gap