chkp-talexm commited on
Commit
bda761e
Β·
1 Parent(s): ecdd8e8
Files changed (1) hide show
  1. app.py +17 -5
app.py CHANGED
@@ -298,17 +298,29 @@ if uploaded_file:
298
  print("CatBoost:\n", pd.Series(catboost_probs).describe())
299
  print("XGBoost:\n", pd.Series(xgb_probs).describe())
300
 
301
- # βœ… Apply Threshold to Convert Probabilities into Binary Predictions
302
- THRESHOLD = 0.7 # Adjust to control false positives
 
 
303
  catboost_preds = (catboost_probs >= THRESHOLD).astype(int)
304
- xgb_preds = (xgb_probs >= THRESHOLD).astype(int)
305
 
 
 
 
 
 
 
306
  predictions_df = pd.DataFrame({
307
  "CatBoost": catboost_preds,
308
- "XGBoost": xgb_preds,
309
- # "RandomForest": rf_preds
310
  })
311
 
 
 
 
 
 
312
  # Apply "at least one model predicts 1" rule
313
  predictions_df["is_click_predicted"] = predictions_df.max(axis=1)
314
 
 
298
  print("CatBoost:\n", pd.Series(catboost_probs).describe())
299
  print("XGBoost:\n", pd.Series(xgb_probs).describe())
300
 
301
+ # βœ… Dynamically Adjust Threshold Based on Probability Distribution
302
+ THRESHOLD = np.percentile(catboost_probs, 95) # Use 95th percentile
303
+ print(f"βœ… Adjusted CatBoost Threshold: {THRESHOLD:.3f}")
304
+
305
  catboost_preds = (catboost_probs >= THRESHOLD).astype(int)
306
+ xgb_preds = (xgb_probs >= 0.7).astype(int) # Keep static for comparison
307
 
308
+ # βœ… Debugging: Count of 1s and 0s after thresholding
309
+ print("\nPost-threshold Distribution:")
310
+ print(f"CatBoost 1s: {np.sum(catboost_preds)} / {len(catboost_preds)}")
311
+ print(f"XGBoost 1s: {np.sum(xgb_preds)} / {len(xgb_preds)}")
312
+
313
+ # βœ… Fix `predictions_df` After Thresholding
314
  predictions_df = pd.DataFrame({
315
  "CatBoost": catboost_preds,
316
+ "XGBoost": xgb_preds
 
317
  })
318
 
319
+ # βœ… Ensure Not All Are Predicted as Clicks
320
+ if predictions_df["CatBoost"].sum() == len(predictions_df) or predictions_df["XGBoost"].sum() == len(
321
+ predictions_df):
322
+ print("⚠ Warning: Model is predicting only 1s! Consider adjusting thresholds.")
323
+
324
  # Apply "at least one model predicts 1" rule
325
  predictions_df["is_click_predicted"] = predictions_df.max(axis=1)
326