Dun3Co commited on
Commit
d159d13
·
verified ·
1 Parent(s): 35daaab

Upload 4 files

Browse files
Files changed (4) hide show
  1. Dockerfile +31 -0
  2. app.py +159 -0
  3. model_1mvp.pkl +3 -0
  4. requirements.txt +6 -0
Dockerfile ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use lightweight Python image
2
+ FROM python:3.12-slim
3
+
4
+ # Prevent Python from writing .pyc files and buffering output
5
+ ENV PYTHONDONTWRITEBYTECODE=1
6
+ ENV PYTHONUNBUFFERED=1
7
+
8
+ # Set the working directory
9
+ WORKDIR /app
10
+
11
+ # Install system dependencies required by numpy, pandas, shap
12
+ RUN apt-get update && \
13
+ apt-get install -y --no-install-recommends \
14
+ build-essential \
15
+ gcc \
16
+ && rm -rf /var/lib/apt/lists/*
17
+
18
+ # Copy requirements first to leverage Docker layer caching
19
+ COPY requirements.txt .
20
+
21
+ # Install dependencies
22
+ RUN pip install --no-cache-dir -r requirements.txt
23
+
24
+ # Copy application code and model
25
+ COPY . .
26
+
27
+ # Expose Hugging Face default port
28
+ EXPOSE 7860
29
+
30
+ # Start FastAPI app
31
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
app.py ADDED
@@ -0,0 +1,159 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI
2
+ from pydantic import BaseModel
3
+ from typing import List, Literal, Optional
4
+ import joblib
5
+ import numpy as np
6
+ import pandas as pd
7
+ import requests
8
+ import shap
9
+ from sklearn.metrics import roc_auc_score, precision_recall_curve, auc
10
+
11
+ # =====================================================
12
+ # CONFIG
13
+ # =====================================================
14
+
15
+ # Replace these with your NoCoDB API details
16
+ NOCO_API_URL = "https://dun3co-sdc-nocodb.hf.space/api/v2/tables/m39a8axnn3980w9/records"
17
+ NOCO_VIEW_ID = "vwjuv5jnaet9npuu"
18
+ NOCO_API_TOKEN = "YOUR_NOCODB_TOKEN" # Replace or load from env variable
19
+
20
+ HEADERS = {"xc-token": NOCO_API_TOKEN}
21
+
22
+ # =====================================================
23
+ # MODEL LOADING
24
+ # =====================================================
25
+
26
+ model = joblib.load("model_1mvp.pkl")
27
+ app = FastAPI(title="Logistic Regression API 2")
28
+
29
+ # =====================================================
30
+ # DATA SCHEMAS
31
+ # =====================================================
32
+
33
+ class InputData(BaseModel):
34
+ age: int
35
+ balance: float
36
+ day: int
37
+ campaign: int
38
+ job: str
39
+ education: str
40
+ default: Literal["yes", "no", "unknown"]
41
+ housing: Literal["yes", "no", "unknown"]
42
+ loan: Literal["yes", "no", "unknown"]
43
+ months_since_previous_contact: str
44
+ n_previous_contacts: str
45
+ poutcome: str
46
+ had_contact: bool
47
+ is_single: bool
48
+ uknown_contact: bool
49
+
50
+ class BatchInputData(BaseModel):
51
+ data: List[InputData]
52
+
53
+ # =====================================================
54
+ # HEALTH CHECK
55
+ # =====================================================
56
+
57
+ @app.get("/health")
58
+ def health():
59
+ return {"status": "ok"}
60
+
61
+ # =====================================================
62
+ # NOCODB DATA FETCHING
63
+ # =====================================================
64
+
65
+ def fetch_test_data(limit: int = 100):
66
+ """Fetch test or sample data from NoCoDB view."""
67
+ params = {"offset": 0, "limit": limit, "viewId": NOCO_VIEW_ID}
68
+ res = requests.get(NOCO_API_URL, headers=HEADERS, params=params)
69
+ res.raise_for_status()
70
+ data = res.json()["list"]
71
+ return pd.DataFrame(data)
72
+
73
+ # =====================================================
74
+ # PREDICTION ENDPOINT
75
+ # =====================================================
76
+
77
+ @app.post("/predict")
78
+ def predict(batch: BatchInputData):
79
+ try:
80
+ X = pd.DataFrame([item.dict() for item in batch.data])
81
+ preds = model.predict(X)
82
+ probs = model.predict_proba(X)[:, 1]
83
+ return {
84
+ "predictions": preds.tolist(),
85
+ "probabilities": probs.tolist()
86
+ }
87
+ except Exception as e:
88
+ import traceback
89
+ return {"error": str(e), "trace": traceback.format_exc()}
90
+
91
+ # =====================================================
92
+ # EXPLAINABILITY ENDPOINT
93
+ # =====================================================
94
+
95
+ @app.post("/explain")
96
+ def explain(batch: Optional[BatchInputData] = None, limit: int = 100):
97
+ """Generate SHAP values either from provided data or from NoCoDB test data."""
98
+ try:
99
+ if batch:
100
+ X = pd.DataFrame([item.dict() for item in batch.data])
101
+ else:
102
+ X = fetch_test_data(limit=limit)
103
+
104
+ explainer = shap.Explainer(model, X)
105
+ shap_values = explainer(X)
106
+
107
+ # Aggregate mean absolute SHAP value per feature
108
+ shap_summary = pd.DataFrame({
109
+ "feature": X.columns,
110
+ "mean_abs_shap": np.abs(shap_values.values).mean(axis=0)
111
+ }).sort_values("mean_abs_shap", ascending=False)
112
+
113
+ return {
114
+ "n_samples": len(X),
115
+ "shap_summary": shap_summary.to_dict(orient="records")
116
+ }
117
+
118
+ except Exception as e:
119
+ import traceback
120
+ return {"error": str(e), "trace": traceback.format_exc()}
121
+
122
+ # =====================================================
123
+ # METRICS ENDPOINT
124
+ # =====================================================
125
+
126
+ @app.post("/metrics")
127
+ def metrics(batch: Optional[BatchInputData] = None, y_true: Optional[List[int]] = None, limit: int = 100):
128
+ """Compute ROC AUC and threshold analysis, using input or NoCoDB test data."""
129
+ try:
130
+ # Use provided data or fallback to test data from NoCoDB
131
+ if batch:
132
+ X = pd.DataFrame([item.dict() for item in batch.data])
133
+ else:
134
+ X = fetch_test_data(limit=limit)
135
+
136
+ if y_true is None:
137
+ # Look for 'y_true' column in NoCoDB data
138
+ if "y_true" in X.columns:
139
+ y_true = X["y_true"].astype(int).tolist()
140
+ X = X.drop(columns=["y_true"])
141
+ else:
142
+ return {"error": "y_true values not provided or found in dataset"}
143
+
144
+ y_prob = model.predict_proba(X)[:, 1]
145
+ roc_auc = roc_auc_score(y_true, y_prob)
146
+ precision, recall, thresholds = precision_recall_curve(y_true, y_prob)
147
+ pr_auc = auc(recall, precision)
148
+
149
+ return {
150
+ "roc_auc": roc_auc,
151
+ "pr_auc": pr_auc,
152
+ "thresholds": thresholds.tolist()[:20], # limit output size
153
+ "precision": precision.tolist()[:20],
154
+ "recall": recall.tolist()[:20]
155
+ }
156
+
157
+ except Exception as e:
158
+ import traceback
159
+ return {"error": str(e), "trace": traceback.format_exc()}
model_1mvp.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:779b2825e23ee94439d9d6b66ad3203b83bd1fda61f7f1808492ced0c4ca6e02
3
+ size 5946
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn
3
+ scikit-learn==1.7.2
4
+ joblib==1.5.2
5
+ numpy==2.3.1
6
+ pandas==2.3.2