Spaces:

Dun3Co
/

LogRegModel

Sleeping

App Files Files Community

Dun3Co commited on Oct 9, 2025

Commit

7a4a903

verified ·

1 Parent(s): 1c6cff0

Update app.py

Browse files

Files changed (1) hide show

app.py +187 -159

app.py CHANGED Viewed

@@ -1,159 +1,187 @@
-from fastapi import FastAPI
-from pydantic import BaseModel
-from typing import List, Literal, Optional
-import joblib
-import numpy as np
-import pandas as pd
-import requests
-import shap
-from sklearn.metrics import roc_auc_score, precision_recall_curve, auc
-# =====================================================
-# CONFIG
-# =====================================================
-# Replace these with your NoCoDB API details
-NOCO_API_URL = "https://dun3co-sdc-nocodb.hf.space/api/v2/tables/m39a8axnn3980w9/records"
-NOCO_VIEW_ID = "vwjuv5jnaet9npuu"
-NOCO_API_TOKEN = "YOUR_NOCODB_TOKEN"  # Replace or load from env variable
-HEADERS = {"xc-token": NOCO_API_TOKEN}
-# =====================================================
-# MODEL LOADING
-# =====================================================
-model = joblib.load("model_1mvp.pkl")
-app = FastAPI(title="Logistic Regression API 2")
-# =====================================================
-# DATA SCHEMAS
-# =====================================================
-class InputData(BaseModel):
-    age: int
-    balance: float
-    day: int
-    campaign: int
-    job: str
-    education: str
-    default: Literal["yes", "no", "unknown"]
-    housing: Literal["yes", "no", "unknown"]
-    loan: Literal["yes", "no", "unknown"]
-    months_since_previous_contact: str
-    n_previous_contacts: str
-    poutcome: str
-    had_contact: bool
-    is_single: bool
-    uknown_contact: bool
-class BatchInputData(BaseModel):
-    data: List[InputData]
-# =====================================================
-# HEALTH CHECK
-# =====================================================
-@app.get("/health")
-def health():
-    return {"status": "ok"}
-# =====================================================
-# NOCODB DATA FETCHING
-# =====================================================
-def fetch_test_data(limit: int = 100):
-    """Fetch test or sample data from NoCoDB view."""
-    params = {"offset": 0, "limit": limit, "viewId": NOCO_VIEW_ID}
-    res = requests.get(NOCO_API_URL, headers=HEADERS, params=params)
-    res.raise_for_status()
-    data = res.json()["list"]
-    return pd.DataFrame(data)
-# =====================================================
-# PREDICTION ENDPOINT
-# =====================================================
-@app.post("/predict")
-def predict(batch: BatchInputData):
-    try:
-        X = pd.DataFrame([item.dict() for item in batch.data])
-        preds = model.predict(X)
-        probs = model.predict_proba(X)[:, 1]
-        return {
-            "predictions": preds.tolist(),
-            "probabilities": probs.tolist()
-        }
-    except Exception as e:
-        import traceback
-        return {"error": str(e), "trace": traceback.format_exc()}
-# =====================================================
-# EXPLAINABILITY ENDPOINT
-# =====================================================
-@app.post("/explain")
-def explain(batch: Optional[BatchInputData] = None, limit: int = 100):
-    """Generate SHAP values either from provided data or from NoCoDB test data."""
-    try:
-        if batch:
-            X = pd.DataFrame([item.dict() for item in batch.data])
-        else:
-            X = fetch_test_data(limit=limit)
-        explainer = shap.Explainer(model, X)
-        shap_values = explainer(X)
-        # Aggregate mean absolute SHAP value per feature
-        shap_summary = pd.DataFrame({
-            "feature": X.columns,
-            "mean_abs_shap": np.abs(shap_values.values).mean(axis=0)
-        }).sort_values("mean_abs_shap", ascending=False)
-        return {
-            "n_samples": len(X),
-            "shap_summary": shap_summary.to_dict(orient="records")
-        }
-    except Exception as e:
-        import traceback
-        return {"error": str(e), "trace": traceback.format_exc()}
-# =====================================================
-# METRICS ENDPOINT
-# =====================================================
-@app.post("/metrics")
-def metrics(batch: Optional[BatchInputData] = None, y_true: Optional[List[int]] = None, limit: int = 100):
-    """Compute ROC AUC and threshold analysis, using input or NoCoDB test data."""
-    try:
-        # Use provided data or fallback to test data from NoCoDB
-        if batch:
-            X = pd.DataFrame([item.dict() for item in batch.data])
-        else:
-            X = fetch_test_data(limit=limit)
-        if y_true is None:
-            # Look for 'y_true' column in NoCoDB data
-            if "y_true" in X.columns:
-                y_true = X["y_true"].astype(int).tolist()
-                X = X.drop(columns=["y_true"])
-            else:
-                return {"error": "y_true values not provided or found in dataset"}
-        y_prob = model.predict_proba(X)[:, 1]
-        roc_auc = roc_auc_score(y_true, y_prob)
-        precision, recall, thresholds = precision_recall_curve(y_true, y_prob)
-        pr_auc = auc(recall, precision)
-        return {
-            "roc_auc": roc_auc,
-            "pr_auc": pr_auc,
-            "thresholds": thresholds.tolist()[:20],  # limit output size
-            "precision": precision.tolist()[:20],
-            "recall": recall.tolist()[:20]
-        }
-    except Exception as e:
-        import traceback
-        return {"error": str(e), "trace": traceback.format_exc()}

+from fastapi import FastAPI
+from pydantic import BaseModel
+from typing import List, Literal, Optional
+import joblib
+import numpy as np
+import pandas as pd
+import requests
+import shap
+from sklearn.metrics import roc_auc_score, precision_recall_curve, auc
+# =====================================================
+# CONFIG
+# =====================================================
+# Replace these with your NoCoDB API details
+NOCO_API_URL = "https://dun3co-sdc-nocodb.hf.space/api/v2/tables/m39a8axnn3980w9/records"
+NOCO_VIEW_ID = "vwjuv5jnaet9npuu"
+NOCO_API_TOKEN = "YOUR_NOCODB_TOKEN"  # Replace or load from env variable
+HEADERS = {"xc-token": NOCO_API_TOKEN}
+# =====================================================
+# MODEL LOADING
+# =====================================================
+model = joblib.load("model_1mvp.pkl")
+app = FastAPI(title="Logistic Regression API 2")
+# =====================================================
+# DATA SCHEMAS
+# =====================================================
+class InputData(BaseModel):
+    age: int
+    balance: float
+    day: int
+    campaign: int
+    job: str
+    education: str
+    default: Literal["yes", "no", "unknown"]
+    housing: Literal["yes", "no", "unknown"]
+    loan: Literal["yes", "no", "unknown"]
+    months_since_previous_contact: str
+    n_previous_contacts: str
+    poutcome: str
+    had_contact: bool
+    is_single: bool
+    uknown_contact: bool
+class BatchInputData(BaseModel):
+    data: List[InputData]
+# =====================================================
+# HEALTH CHECK
+# =====================================================
+@app.get("/health")
+def health():
+    return {"status": "ok"}
+# =====================================================
+# NOCODB DATA FETCHING
+# =====================================================
+def fetch_test_data(limit: int = 100):
+    """Fetch test or sample data from NoCoDB view."""
+    params = {"offset": 0, "limit": limit, "viewId": NOCO_VIEW_ID}
+    res = requests.get(NOCO_API_URL, headers=HEADERS, params=params)
+    res.raise_for_status()
+    data = res.json()["list"]
+    return pd.DataFrame(data)
+# =====================================================
+# PREDICTION ENDPOINT
+# =====================================================
+@app.post("/predict")
+def predict(batch: BatchInputData):
+    try:
+        X = pd.DataFrame([item.dict() for item in batch.data])
+        preds = model.predict(X)
+        probs = model.predict_proba(X)[:, 1]
+        return {
+            "predictions": preds.tolist(),
+            "probabilities": probs.tolist()
+        }
+    except Exception as e:
+        import traceback
+        return {"error": str(e), "trace": traceback.format_exc()}
+# =====================================================
+# EXPLAINABILITY ENDPOINT
+# =====================================================
+@app.post("/explain")
+def explain(batch: Optional[BatchInputData] = None, limit: int = 100):
+    """Generate SHAP values either from provided data or from NoCoDB test data."""
+    try:
+        if batch:
+            X = pd.DataFrame([item.dict() for item in batch.data])
+        else:
+            X = fetch_test_data(limit=limit)
+        explainer = shap.Explainer(model, X)
+        shap_values = explainer(X)
+        # Aggregate mean absolute SHAP value per feature
+        shap_summary = pd.DataFrame({
+            "feature": X.columns,
+            "mean_abs_shap": np.abs(shap_values.values).mean(axis=0)
+        }).sort_values("mean_abs_shap", ascending=False)
+        return {
+            "n_samples": len(X),
+            "shap_summary": shap_summary.to_dict(orient="records")
+        }
+    except Exception as e:
+        import traceback
+        return {"error": str(e), "trace": traceback.format_exc()}
+# =====================================================
+# METRICS ENDPOINT
+# =====================================================
+@app.post("/metrics")
+def metrics(batch: Optional[BatchInputData] = None, y_true: Optional[List[int]] = None, limit: int = 100):
+    """Compute ROC AUC and threshold analysis, using input or NoCoDB test data."""
+    try:
+        # Use provided data or fallback to test data from NoCoDB
+        if batch:
+            X = pd.DataFrame([item.dict() for item in batch.data])
+        else:
+            X = fetch_test_data(limit=limit)
+        if y_true is None:
+            # Look for 'y_true' column in NoCoDB data
+            if "y_true" in X.columns:
+                y_true = X["y_true"].astype(int).tolist()
+                X = X.drop(columns=["y_true"])
+            else:
+                return {"error": "y_true values not provided or found in dataset"}
+        y_prob = model.predict_proba(X)[:, 1]
+        roc_auc = roc_auc_score(y_true, y_prob)
+        precision, recall, thresholds = precision_recall_curve(y_true, y_prob)
+        pr_auc = auc(recall, precision)
+        return {
+            "roc_auc": roc_auc,
+            "pr_auc": pr_auc,
+            "thresholds": thresholds.tolist()[:20],  # limit output size
+            "precision": precision.tolist()[:20],
+            "recall": recall.tolist()[:20]
+        }
+    except Exception as e:
+        import traceback
+        return {"error": str(e), "trace": traceback.format_exc()}
+@app.get("/coefficients")
+def coefficients():
+    """
+    Return logistic regression coefficients and feature names.
+    Works if your model is a pipeline with 'preprocessor' and 'classifier' steps.
+    """
+    try:
+        # Extract classifier and preprocessor
+        classifier = model.named_steps["classifier"]
+        preprocessor = model.named_steps["preprocessor"]
+        # Get feature names after preprocessing
+        feature_names = preprocessor.get_feature_names_out()
+        # Get coefficients
+        coefficients = classifier.coef_[0]
+        df = pd.DataFrame({
+            "feature": feature_names,
+            "coefficient": coefficients.tolist()
+        })
+        return {"coefficients": df.to_dict(orient="records")}
+    except Exception as e:
+        import traceback
+        return {"error": str(e), "trace": traceback.format_exc()}