File size: 1,400 Bytes

b8bf9dd
 
f5755a2
 
b8bf9dd
 
 
f5755a2
 
 
 
 
 
 
 
 
 
 
 
 
 
b8bf9dd
 
 
 
 
 
 
 
 
 
 
f5755a2

import numpy as np
import pandas as pd
from catboost import Pool
from data_loader import CATEGORICAL_COLUMNS, IDS_COLUMNS, TARGET_COLUMN, FEATURE_COLUMNS, AGGREGATED_COLUMNS, TEMPORAL_COLUMNS

def predict(models, X_test):
    """ Make predictions using trained models """
    # Ensure categorical features are properly handled
    cat_features =  CATEGORICAL_COLUMNS
    test_predictions = {}
    #
    # test_predictions = {name: np.array(model.predict(X_test)).squeeze() for name, model in models.items()}
    for name, model in models.items():
        if "CatBoost" in name:  # Handle CatBoost models
            pool = Pool(data=X_test, cat_features=cat_features)
            test_predictions[name] = model.predict(pool)
        else:  # Other models
            # reordering columns to match the order of columns in the model
            new_X_test = X_test[IDS_COLUMNS + FEATURE_COLUMNS + AGGREGATED_COLUMNS + TEMPORAL_COLUMNS]
            test_predictions[name] = np.array(model.predict(new_X_test)).squeeze()


    test_predictions_df = pd.DataFrame(test_predictions)

    # Ensure binary values (0 or 1)
    for col in test_predictions_df.columns:
        test_predictions_df[col] = (test_predictions_df[col] > 0.5).astype(int)

    # Apply "at least one model predicts 1" rule
    test_predictions_df["is_click_predicted"] = test_predictions_df.max(axis=1)

    return test_predictions_df