|
|
|
|
|
|
|
|
import os |
|
|
import json |
|
|
import numpy as np |
|
|
import pandas as pd |
|
|
|
|
|
|
|
|
|
|
|
def _load_platform_weights() -> dict: |
|
|
""" |
|
|
Load platform weights from platform_weights.json. |
|
|
Supports multiple key schemes: |
|
|
- W_C / W_A |
|
|
- trend_weight / risk_weight |
|
|
- C_weight / A_weight |
|
|
""" |
|
|
script_dir = os.path.dirname(os.path.abspath(__file__)) |
|
|
json_path = os.path.join(script_dir, "platform_weights.json") |
|
|
|
|
|
if not os.path.exists(json_path): |
|
|
print("WARNING: platform_weights.json not found. Using default weights.") |
|
|
|
|
|
return { |
|
|
"tiktok": {"W_C": 1.00, "W_A": 1.00}, |
|
|
"instagram": {"W_C": 0.80, "W_A": 0.90}, |
|
|
"youtube": {"W_C": 0.60, "W_A": 0.60}, |
|
|
"twitter": {"W_C": 0.70, "W_A": 0.80}, |
|
|
"reddit": {"W_C": 0.50, "W_A": 0.50}, |
|
|
"facebook": {"W_C": 0.30, "W_A": 0.40}, |
|
|
"other": {"W_C": 0.20, "W_A": 0.30}, |
|
|
} |
|
|
|
|
|
with open(json_path, "r", encoding="utf-8") as f: |
|
|
raw = json.load(f) |
|
|
|
|
|
|
|
|
norm = {} |
|
|
for platform, vals in raw.items(): |
|
|
if not isinstance(vals, dict): |
|
|
vals = {} |
|
|
w_c = ( |
|
|
vals.get("W_C") |
|
|
or vals.get("c_weight") |
|
|
or vals.get("C_weight") |
|
|
or vals.get("trend_weight") |
|
|
or 0.0 |
|
|
) |
|
|
w_a = ( |
|
|
vals.get("W_A") |
|
|
or vals.get("a_weight") |
|
|
or vals.get("A_weight") |
|
|
or vals.get("risk_weight") |
|
|
or 0.0 |
|
|
) |
|
|
norm[platform.lower()] = {"W_C": float(w_c), "W_A": float(w_a)} |
|
|
|
|
|
return norm |
|
|
|
|
|
|
|
|
PLATFORM_WEIGHTS = _load_platform_weights() |
|
|
|
|
|
|
|
|
class CEARModel: |
|
|
""" |
|
|
Core CEAR scoring model. |
|
|
|
|
|
Inputs: |
|
|
user_df: DataFrame with columns: |
|
|
- 'platform_name': str |
|
|
- 'minutes_per_week': numeric |
|
|
- optional 'variety_score': numeric (0β10) |
|
|
|
|
|
satisfaction: optional float (0β10) |
|
|
fomo: optional float (0β10) |
|
|
|
|
|
Returns dict: |
|
|
{ |
|
|
"C_Score": float, |
|
|
"A_Risk": float, |
|
|
"D_Index": float, |
|
|
"Avg_Variety": float | None, |
|
|
"Satisfaction": float | None, |
|
|
"FOMO": float | None, |
|
|
"Per_Platform_Efficiency": [ |
|
|
{"platform_name": str, "Cultural_Efficiency": float}, ... |
|
|
] |
|
|
} |
|
|
""" |
|
|
|
|
|
def __init__(self, weights: dict | None = None) -> None: |
|
|
self.weights = weights if weights is not None else PLATFORM_WEIGHTS |
|
|
|
|
|
|
|
|
|
|
|
@staticmethod |
|
|
def _diminishing_returns(minutes: float) -> float: |
|
|
"""Log10-based diminishing returns on minutes.""" |
|
|
minutes = max(float(minutes), 0.0) |
|
|
return float(np.log10(minutes + 1.0)) |
|
|
|
|
|
def _weights_dataframe(self) -> pd.DataFrame: |
|
|
if not self.weights: |
|
|
return pd.DataFrame(columns=["platform_name", "W_C", "W_A"]) |
|
|
|
|
|
w_df = pd.DataFrame.from_dict(self.weights, orient="index") |
|
|
w_df.index = w_df.index.astype(str).str.lower() |
|
|
w_df.index.name = "platform_name" |
|
|
w_df = w_df.reset_index() |
|
|
|
|
|
|
|
|
if "W_C" not in w_df.columns: |
|
|
w_df["W_C"] = 0.0 |
|
|
if "W_A" not in w_df.columns: |
|
|
w_df["W_A"] = 0.0 |
|
|
|
|
|
return w_df[["platform_name", "W_C", "W_A"]] |
|
|
|
|
|
|
|
|
|
|
|
def calculate_scores( |
|
|
self, |
|
|
user_df: pd.DataFrame, |
|
|
satisfaction: float | None = None, |
|
|
fomo: float | None = None, |
|
|
) -> dict: |
|
|
if user_df is None or user_df.empty: |
|
|
return { |
|
|
"C_Score": 0.0, |
|
|
"A_Risk": 0.0, |
|
|
"D_Index": 0.0, |
|
|
"Avg_Variety": None, |
|
|
"Satisfaction": satisfaction, |
|
|
"FOMO": fomo, |
|
|
"Per_Platform_Efficiency": [], |
|
|
} |
|
|
|
|
|
df = user_df.copy() |
|
|
|
|
|
|
|
|
df["platform_name"] = ( |
|
|
df["platform_name"].astype(str).str.strip().str.lower() |
|
|
) |
|
|
df["minutes_per_week"] = pd.to_numeric( |
|
|
df["minutes_per_week"], errors="coerce" |
|
|
).fillna(0.0) |
|
|
df["minutes_per_week"] = df["minutes_per_week"].clip(lower=0.0) |
|
|
|
|
|
|
|
|
w_df = self._weights_dataframe() |
|
|
df = df.merge(w_df, on="platform_name", how="left") |
|
|
df[["W_C", "W_A"]] = df[["W_C", "W_A"]].fillna(0.0) |
|
|
|
|
|
total_mins = float(df["minutes_per_week"].sum()) |
|
|
|
|
|
|
|
|
df["C_Contrib"] = df.apply( |
|
|
lambda row: row["W_C"] * self._diminishing_returns(row["minutes_per_week"]), |
|
|
axis=1, |
|
|
) |
|
|
df["A_Contrib"] = df["W_A"] * df["minutes_per_week"] |
|
|
|
|
|
C_Score = float(df["C_Contrib"].sum()) |
|
|
A_Risk = float(df["A_Contrib"].sum()) |
|
|
|
|
|
|
|
|
if total_mins > 0: |
|
|
shares = df["minutes_per_week"] / total_mins |
|
|
H = float((shares**2).sum()) |
|
|
D_Index = float(1.0 / H) if H > 0 else 0.0 |
|
|
else: |
|
|
D_Index = 0.0 |
|
|
|
|
|
|
|
|
df["Cultural_Efficiency"] = df["C_Contrib"] / df["minutes_per_week"].replace( |
|
|
0.0, np.nan |
|
|
) |
|
|
eff_df = df.loc[ |
|
|
df["minutes_per_week"] > 0, ["platform_name", "Cultural_Efficiency"] |
|
|
].copy() |
|
|
eff_df = eff_df.dropna() |
|
|
|
|
|
if not eff_df.empty: |
|
|
max_ce = float(eff_df["Cultural_Efficiency"].max()) |
|
|
if max_ce > 0: |
|
|
eff_df["Cultural_Efficiency"] = ( |
|
|
eff_df["Cultural_Efficiency"] / max_ce * 100.0 |
|
|
) |
|
|
else: |
|
|
eff_df["Cultural_Efficiency"] = 0.0 |
|
|
|
|
|
eff_df = eff_df.sort_values("Cultural_Efficiency", ascending=False) |
|
|
per_platform_eff = eff_df.to_dict("records") |
|
|
else: |
|
|
per_platform_eff = [] |
|
|
|
|
|
|
|
|
avg_variety = None |
|
|
if "variety_score" in df.columns and total_mins > 0: |
|
|
if df["variety_score"].notna().any(): |
|
|
avg_variety = float( |
|
|
np.average( |
|
|
df["variety_score"].fillna(0.0), |
|
|
weights=df["minutes_per_week"], |
|
|
) |
|
|
) |
|
|
|
|
|
return { |
|
|
"C_Score": C_Score, |
|
|
"A_Risk": A_Risk, |
|
|
"D_Index": D_Index, |
|
|
"Avg_Variety": avg_variety, |
|
|
"Satisfaction": satisfaction, |
|
|
"FOMO": fomo, |
|
|
"Per_Platform_Efficiency": per_platform_eff, |
|
|
} |
|
|
|