|
|
| import pandas as pd |
|
|
|
|
| class InputValidationError(Exception): |
| """Custom exception for input validation errors.""" |
| pass |
|
|
|
|
| def validate_and_prepare_input(input_df: pd.DataFrame, model): |
| """ |
| Validates input dataframe against model expected features. |
| Returns a clean dataframe ready for prediction. |
| """ |
|
|
| if not isinstance(input_df, pd.DataFrame): |
| raise InputValidationError("Input must be a pandas DataFrame.") |
|
|
| |
| try: |
| expected_features = model.get_booster().feature_names |
| except Exception: |
| raise InputValidationError("Unable to retrieve model feature names.") |
|
|
| |
| |
| |
| missing_cols = set(expected_features) - set(input_df.columns) |
| if missing_cols: |
| raise InputValidationError( |
| f"Missing required columns: {list(missing_cols)}" |
| ) |
|
|
| |
| |
| |
| extra_cols = set(input_df.columns) - set(expected_features) |
| if extra_cols: |
| raise InputValidationError( |
| f"Unexpected columns provided: {list(extra_cols)}" |
| ) |
|
|
| |
| |
| |
| for col in expected_features: |
| if not pd.api.types.is_numeric_dtype(input_df[col]): |
| raise InputValidationError( |
| f"Column '{col}' must be numeric." |
| ) |
|
|
| |
| |
| |
| input_df = input_df[expected_features] |
|
|
| return input_df |
|
|