import numpy as np | |
import pandas as pd | |
from catboost import Pool | |
from data_loader import CATEGORICAL_COLUMNS, IDS_COLUMNS, TARGET_COLUMN, FEATURE_COLUMNS, AGGREGATED_COLUMNS, TEMPORAL_COLUMNS | |
def predict(models, X_test): | |
""" Make predictions using trained models """ | |
# Ensure categorical features are properly handled | |
cat_features = CATEGORICAL_COLUMNS | |
test_predictions = {} | |
# | |
# test_predictions = {name: np.array(model.predict(X_test)).squeeze() for name, model in models.items()} | |
for name, model in models.items(): | |
if "CatBoost" in name: # Handle CatBoost models | |
pool = Pool(data=X_test, cat_features=cat_features) | |
test_predictions[name] = model.predict(pool) | |
else: # Other models | |
# reordering columns to match the order of columns in the model | |
new_X_test = X_test[IDS_COLUMNS + FEATURE_COLUMNS + AGGREGATED_COLUMNS + TEMPORAL_COLUMNS] | |
test_predictions[name] = np.array(model.predict(new_X_test)).squeeze() | |
test_predictions_df = pd.DataFrame(test_predictions) | |
# Ensure binary values (0 or 1) | |
for col in test_predictions_df.columns: | |
test_predictions_df[col] = (test_predictions_df[col] > 0.5).astype(int) | |
# Apply "at least one model predicts 1" rule | |
test_predictions_df["is_click_predicted"] = test_predictions_df.max(axis=1) | |
return test_predictions_df | |