Dun3Co commited on
Commit
7a4a903
·
verified ·
1 Parent(s): 1c6cff0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +187 -159
app.py CHANGED
@@ -1,159 +1,187 @@
1
- from fastapi import FastAPI
2
- from pydantic import BaseModel
3
- from typing import List, Literal, Optional
4
- import joblib
5
- import numpy as np
6
- import pandas as pd
7
- import requests
8
- import shap
9
- from sklearn.metrics import roc_auc_score, precision_recall_curve, auc
10
-
11
- # =====================================================
12
- # CONFIG
13
- # =====================================================
14
-
15
- # Replace these with your NoCoDB API details
16
- NOCO_API_URL = "https://dun3co-sdc-nocodb.hf.space/api/v2/tables/m39a8axnn3980w9/records"
17
- NOCO_VIEW_ID = "vwjuv5jnaet9npuu"
18
- NOCO_API_TOKEN = "YOUR_NOCODB_TOKEN" # Replace or load from env variable
19
-
20
- HEADERS = {"xc-token": NOCO_API_TOKEN}
21
-
22
- # =====================================================
23
- # MODEL LOADING
24
- # =====================================================
25
-
26
- model = joblib.load("model_1mvp.pkl")
27
- app = FastAPI(title="Logistic Regression API 2")
28
-
29
- # =====================================================
30
- # DATA SCHEMAS
31
- # =====================================================
32
-
33
- class InputData(BaseModel):
34
- age: int
35
- balance: float
36
- day: int
37
- campaign: int
38
- job: str
39
- education: str
40
- default: Literal["yes", "no", "unknown"]
41
- housing: Literal["yes", "no", "unknown"]
42
- loan: Literal["yes", "no", "unknown"]
43
- months_since_previous_contact: str
44
- n_previous_contacts: str
45
- poutcome: str
46
- had_contact: bool
47
- is_single: bool
48
- uknown_contact: bool
49
-
50
- class BatchInputData(BaseModel):
51
- data: List[InputData]
52
-
53
- # =====================================================
54
- # HEALTH CHECK
55
- # =====================================================
56
-
57
- @app.get("/health")
58
- def health():
59
- return {"status": "ok"}
60
-
61
- # =====================================================
62
- # NOCODB DATA FETCHING
63
- # =====================================================
64
-
65
- def fetch_test_data(limit: int = 100):
66
- """Fetch test or sample data from NoCoDB view."""
67
- params = {"offset": 0, "limit": limit, "viewId": NOCO_VIEW_ID}
68
- res = requests.get(NOCO_API_URL, headers=HEADERS, params=params)
69
- res.raise_for_status()
70
- data = res.json()["list"]
71
- return pd.DataFrame(data)
72
-
73
- # =====================================================
74
- # PREDICTION ENDPOINT
75
- # =====================================================
76
-
77
- @app.post("/predict")
78
- def predict(batch: BatchInputData):
79
- try:
80
- X = pd.DataFrame([item.dict() for item in batch.data])
81
- preds = model.predict(X)
82
- probs = model.predict_proba(X)[:, 1]
83
- return {
84
- "predictions": preds.tolist(),
85
- "probabilities": probs.tolist()
86
- }
87
- except Exception as e:
88
- import traceback
89
- return {"error": str(e), "trace": traceback.format_exc()}
90
-
91
- # =====================================================
92
- # EXPLAINABILITY ENDPOINT
93
- # =====================================================
94
-
95
- @app.post("/explain")
96
- def explain(batch: Optional[BatchInputData] = None, limit: int = 100):
97
- """Generate SHAP values either from provided data or from NoCoDB test data."""
98
- try:
99
- if batch:
100
- X = pd.DataFrame([item.dict() for item in batch.data])
101
- else:
102
- X = fetch_test_data(limit=limit)
103
-
104
- explainer = shap.Explainer(model, X)
105
- shap_values = explainer(X)
106
-
107
- # Aggregate mean absolute SHAP value per feature
108
- shap_summary = pd.DataFrame({
109
- "feature": X.columns,
110
- "mean_abs_shap": np.abs(shap_values.values).mean(axis=0)
111
- }).sort_values("mean_abs_shap", ascending=False)
112
-
113
- return {
114
- "n_samples": len(X),
115
- "shap_summary": shap_summary.to_dict(orient="records")
116
- }
117
-
118
- except Exception as e:
119
- import traceback
120
- return {"error": str(e), "trace": traceback.format_exc()}
121
-
122
- # =====================================================
123
- # METRICS ENDPOINT
124
- # =====================================================
125
-
126
- @app.post("/metrics")
127
- def metrics(batch: Optional[BatchInputData] = None, y_true: Optional[List[int]] = None, limit: int = 100):
128
- """Compute ROC AUC and threshold analysis, using input or NoCoDB test data."""
129
- try:
130
- # Use provided data or fallback to test data from NoCoDB
131
- if batch:
132
- X = pd.DataFrame([item.dict() for item in batch.data])
133
- else:
134
- X = fetch_test_data(limit=limit)
135
-
136
- if y_true is None:
137
- # Look for 'y_true' column in NoCoDB data
138
- if "y_true" in X.columns:
139
- y_true = X["y_true"].astype(int).tolist()
140
- X = X.drop(columns=["y_true"])
141
- else:
142
- return {"error": "y_true values not provided or found in dataset"}
143
-
144
- y_prob = model.predict_proba(X)[:, 1]
145
- roc_auc = roc_auc_score(y_true, y_prob)
146
- precision, recall, thresholds = precision_recall_curve(y_true, y_prob)
147
- pr_auc = auc(recall, precision)
148
-
149
- return {
150
- "roc_auc": roc_auc,
151
- "pr_auc": pr_auc,
152
- "thresholds": thresholds.tolist()[:20], # limit output size
153
- "precision": precision.tolist()[:20],
154
- "recall": recall.tolist()[:20]
155
- }
156
-
157
- except Exception as e:
158
- import traceback
159
- return {"error": str(e), "trace": traceback.format_exc()}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI
2
+ from pydantic import BaseModel
3
+ from typing import List, Literal, Optional
4
+ import joblib
5
+ import numpy as np
6
+ import pandas as pd
7
+ import requests
8
+ import shap
9
+ from sklearn.metrics import roc_auc_score, precision_recall_curve, auc
10
+
11
+ # =====================================================
12
+ # CONFIG
13
+ # =====================================================
14
+
15
+ # Replace these with your NoCoDB API details
16
+ NOCO_API_URL = "https://dun3co-sdc-nocodb.hf.space/api/v2/tables/m39a8axnn3980w9/records"
17
+ NOCO_VIEW_ID = "vwjuv5jnaet9npuu"
18
+ NOCO_API_TOKEN = "YOUR_NOCODB_TOKEN" # Replace or load from env variable
19
+
20
+ HEADERS = {"xc-token": NOCO_API_TOKEN}
21
+
22
+ # =====================================================
23
+ # MODEL LOADING
24
+ # =====================================================
25
+
26
+ model = joblib.load("model_1mvp.pkl")
27
+ app = FastAPI(title="Logistic Regression API 2")
28
+
29
+ # =====================================================
30
+ # DATA SCHEMAS
31
+ # =====================================================
32
+
33
+ class InputData(BaseModel):
34
+ age: int
35
+ balance: float
36
+ day: int
37
+ campaign: int
38
+ job: str
39
+ education: str
40
+ default: Literal["yes", "no", "unknown"]
41
+ housing: Literal["yes", "no", "unknown"]
42
+ loan: Literal["yes", "no", "unknown"]
43
+ months_since_previous_contact: str
44
+ n_previous_contacts: str
45
+ poutcome: str
46
+ had_contact: bool
47
+ is_single: bool
48
+ uknown_contact: bool
49
+
50
+ class BatchInputData(BaseModel):
51
+ data: List[InputData]
52
+
53
+ # =====================================================
54
+ # HEALTH CHECK
55
+ # =====================================================
56
+
57
+ @app.get("/health")
58
+ def health():
59
+ return {"status": "ok"}
60
+
61
+ # =====================================================
62
+ # NOCODB DATA FETCHING
63
+ # =====================================================
64
+
65
+ def fetch_test_data(limit: int = 100):
66
+ """Fetch test or sample data from NoCoDB view."""
67
+ params = {"offset": 0, "limit": limit, "viewId": NOCO_VIEW_ID}
68
+ res = requests.get(NOCO_API_URL, headers=HEADERS, params=params)
69
+ res.raise_for_status()
70
+ data = res.json()["list"]
71
+ return pd.DataFrame(data)
72
+
73
+ # =====================================================
74
+ # PREDICTION ENDPOINT
75
+ # =====================================================
76
+
77
+ @app.post("/predict")
78
+ def predict(batch: BatchInputData):
79
+ try:
80
+ X = pd.DataFrame([item.dict() for item in batch.data])
81
+ preds = model.predict(X)
82
+ probs = model.predict_proba(X)[:, 1]
83
+ return {
84
+ "predictions": preds.tolist(),
85
+ "probabilities": probs.tolist()
86
+ }
87
+ except Exception as e:
88
+ import traceback
89
+ return {"error": str(e), "trace": traceback.format_exc()}
90
+
91
+ # =====================================================
92
+ # EXPLAINABILITY ENDPOINT
93
+ # =====================================================
94
+
95
+ @app.post("/explain")
96
+ def explain(batch: Optional[BatchInputData] = None, limit: int = 100):
97
+ """Generate SHAP values either from provided data or from NoCoDB test data."""
98
+ try:
99
+ if batch:
100
+ X = pd.DataFrame([item.dict() for item in batch.data])
101
+ else:
102
+ X = fetch_test_data(limit=limit)
103
+
104
+ explainer = shap.Explainer(model, X)
105
+ shap_values = explainer(X)
106
+
107
+ # Aggregate mean absolute SHAP value per feature
108
+ shap_summary = pd.DataFrame({
109
+ "feature": X.columns,
110
+ "mean_abs_shap": np.abs(shap_values.values).mean(axis=0)
111
+ }).sort_values("mean_abs_shap", ascending=False)
112
+
113
+ return {
114
+ "n_samples": len(X),
115
+ "shap_summary": shap_summary.to_dict(orient="records")
116
+ }
117
+
118
+ except Exception as e:
119
+ import traceback
120
+ return {"error": str(e), "trace": traceback.format_exc()}
121
+
122
+ # =====================================================
123
+ # METRICS ENDPOINT
124
+ # =====================================================
125
+
126
+ @app.post("/metrics")
127
+ def metrics(batch: Optional[BatchInputData] = None, y_true: Optional[List[int]] = None, limit: int = 100):
128
+ """Compute ROC AUC and threshold analysis, using input or NoCoDB test data."""
129
+ try:
130
+ # Use provided data or fallback to test data from NoCoDB
131
+ if batch:
132
+ X = pd.DataFrame([item.dict() for item in batch.data])
133
+ else:
134
+ X = fetch_test_data(limit=limit)
135
+
136
+ if y_true is None:
137
+ # Look for 'y_true' column in NoCoDB data
138
+ if "y_true" in X.columns:
139
+ y_true = X["y_true"].astype(int).tolist()
140
+ X = X.drop(columns=["y_true"])
141
+ else:
142
+ return {"error": "y_true values not provided or found in dataset"}
143
+
144
+ y_prob = model.predict_proba(X)[:, 1]
145
+ roc_auc = roc_auc_score(y_true, y_prob)
146
+ precision, recall, thresholds = precision_recall_curve(y_true, y_prob)
147
+ pr_auc = auc(recall, precision)
148
+
149
+ return {
150
+ "roc_auc": roc_auc,
151
+ "pr_auc": pr_auc,
152
+ "thresholds": thresholds.tolist()[:20], # limit output size
153
+ "precision": precision.tolist()[:20],
154
+ "recall": recall.tolist()[:20]
155
+ }
156
+
157
+ except Exception as e:
158
+ import traceback
159
+ return {"error": str(e), "trace": traceback.format_exc()}
160
+
161
+ @app.get("/coefficients")
162
+ def coefficients():
163
+ """
164
+ Return logistic regression coefficients and feature names.
165
+ Works if your model is a pipeline with 'preprocessor' and 'classifier' steps.
166
+ """
167
+ try:
168
+ # Extract classifier and preprocessor
169
+ classifier = model.named_steps["classifier"]
170
+ preprocessor = model.named_steps["preprocessor"]
171
+
172
+ # Get feature names after preprocessing
173
+ feature_names = preprocessor.get_feature_names_out()
174
+
175
+ # Get coefficients
176
+ coefficients = classifier.coef_[0]
177
+
178
+ df = pd.DataFrame({
179
+ "feature": feature_names,
180
+ "coefficient": coefficients.tolist()
181
+ })
182
+
183
+ return {"coefficients": df.to_dict(orient="records")}
184
+
185
+ except Exception as e:
186
+ import traceback
187
+ return {"error": str(e), "trace": traceback.format_exc()}