import numpy as np import pandas as pd from catboost import Pool from data_loader import CATEGORICAL_COLUMNS, IDS_COLUMNS, TARGET_COLUMN, FEATURE_COLUMNS, AGGREGATED_COLUMNS, TEMPORAL_COLUMNS def predict(models, X_test): """ Make predictions using trained models """ # Ensure categorical features are properly handled cat_features = CATEGORICAL_COLUMNS test_predictions = {} # # test_predictions = {name: np.array(model.predict(X_test)).squeeze() for name, model in models.items()} for name, model in models.items(): if "CatBoost" in name: # Handle CatBoost models pool = Pool(data=X_test, cat_features=cat_features) test_predictions[name] = model.predict(pool) else: # Other models # reordering columns to match the order of columns in the model new_X_test = X_test[IDS_COLUMNS + FEATURE_COLUMNS + AGGREGATED_COLUMNS + TEMPORAL_COLUMNS] test_predictions[name] = np.array(model.predict(new_X_test)).squeeze() test_predictions_df = pd.DataFrame(test_predictions) # Ensure binary values (0 or 1) for col in test_predictions_df.columns: test_predictions_df[col] = (test_predictions_df[col] > 0.5).astype(int) # Apply "at least one model predicts 1" rule test_predictions_df["is_click_predicted"] = test_predictions_df.max(axis=1) return test_predictions_df