chkp-talexm
commited on
Commit
Β·
bda761e
1
Parent(s):
ecdd8e8
update
Browse files
app.py
CHANGED
@@ -298,17 +298,29 @@ if uploaded_file:
|
|
298 |
print("CatBoost:\n", pd.Series(catboost_probs).describe())
|
299 |
print("XGBoost:\n", pd.Series(xgb_probs).describe())
|
300 |
|
301 |
-
# β
|
302 |
-
THRESHOLD =
|
|
|
|
|
303 |
catboost_preds = (catboost_probs >= THRESHOLD).astype(int)
|
304 |
-
xgb_preds = (xgb_probs >=
|
305 |
|
|
|
|
|
|
|
|
|
|
|
|
|
306 |
predictions_df = pd.DataFrame({
|
307 |
"CatBoost": catboost_preds,
|
308 |
-
"XGBoost": xgb_preds
|
309 |
-
# "RandomForest": rf_preds
|
310 |
})
|
311 |
|
|
|
|
|
|
|
|
|
|
|
312 |
# Apply "at least one model predicts 1" rule
|
313 |
predictions_df["is_click_predicted"] = predictions_df.max(axis=1)
|
314 |
|
|
|
298 |
print("CatBoost:\n", pd.Series(catboost_probs).describe())
|
299 |
print("XGBoost:\n", pd.Series(xgb_probs).describe())
|
300 |
|
301 |
+
# β
Dynamically Adjust Threshold Based on Probability Distribution
|
302 |
+
THRESHOLD = np.percentile(catboost_probs, 95) # Use 95th percentile
|
303 |
+
print(f"β
Adjusted CatBoost Threshold: {THRESHOLD:.3f}")
|
304 |
+
|
305 |
catboost_preds = (catboost_probs >= THRESHOLD).astype(int)
|
306 |
+
xgb_preds = (xgb_probs >= 0.7).astype(int) # Keep static for comparison
|
307 |
|
308 |
+
# β
Debugging: Count of 1s and 0s after thresholding
|
309 |
+
print("\nPost-threshold Distribution:")
|
310 |
+
print(f"CatBoost 1s: {np.sum(catboost_preds)} / {len(catboost_preds)}")
|
311 |
+
print(f"XGBoost 1s: {np.sum(xgb_preds)} / {len(xgb_preds)}")
|
312 |
+
|
313 |
+
# β
Fix `predictions_df` After Thresholding
|
314 |
predictions_df = pd.DataFrame({
|
315 |
"CatBoost": catboost_preds,
|
316 |
+
"XGBoost": xgb_preds
|
|
|
317 |
})
|
318 |
|
319 |
+
# β
Ensure Not All Are Predicted as Clicks
|
320 |
+
if predictions_df["CatBoost"].sum() == len(predictions_df) or predictions_df["XGBoost"].sum() == len(
|
321 |
+
predictions_df):
|
322 |
+
print("β Warning: Model is predicting only 1s! Consider adjusting thresholds.")
|
323 |
+
|
324 |
# Apply "at least one model predicts 1" rule
|
325 |
predictions_df["is_click_predicted"] = predictions_df.max(axis=1)
|
326 |
|