File size: 1,400 Bytes
b8bf9dd f5755a2 b8bf9dd f5755a2 b8bf9dd f5755a2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 |
import numpy as np
import pandas as pd
from catboost import Pool
from data_loader import CATEGORICAL_COLUMNS, IDS_COLUMNS, TARGET_COLUMN, FEATURE_COLUMNS, AGGREGATED_COLUMNS, TEMPORAL_COLUMNS
def predict(models, X_test):
""" Make predictions using trained models """
# Ensure categorical features are properly handled
cat_features = CATEGORICAL_COLUMNS
test_predictions = {}
#
# test_predictions = {name: np.array(model.predict(X_test)).squeeze() for name, model in models.items()}
for name, model in models.items():
if "CatBoost" in name: # Handle CatBoost models
pool = Pool(data=X_test, cat_features=cat_features)
test_predictions[name] = model.predict(pool)
else: # Other models
# reordering columns to match the order of columns in the model
new_X_test = X_test[IDS_COLUMNS + FEATURE_COLUMNS + AGGREGATED_COLUMNS + TEMPORAL_COLUMNS]
test_predictions[name] = np.array(model.predict(new_X_test)).squeeze()
test_predictions_df = pd.DataFrame(test_predictions)
# Ensure binary values (0 or 1)
for col in test_predictions_df.columns:
test_predictions_df[col] = (test_predictions_df[col] > 0.5).astype(int)
# Apply "at least one model predicts 1" rule
test_predictions_df["is_click_predicted"] = test_predictions_df.max(axis=1)
return test_predictions_df
|