import pandas as pd import numpy as np def get_model_expected_features(): """Return all features that the trained model expects""" # Based on the original training data, these are typical features expected_features = [ # Basic transaction features 'TransactionAmt', 'TransactionDT', # Card features 'card1', 'card2', 'card3', 'card4', 'card5', 'card6', # Address features 'addr1', 'addr2', # Distance features 'dist1', 'dist2', # Email features 'P_emaildomain', 'R_emaildomain', # Count features (C1-C14) 'C1', 'C2', 'C3', 'C4', 'C5', 'C6', 'C7', 'C8', 'C9', 'C10', 'C11', 'C12', 'C13', 'C14', # Time delta features (D1-D15) 'D1', 'D2', 'D3', 'D4', 'D5', 'D6', 'D7', 'D8', 'D9', 'D10', 'D11', 'D12', 'D13', 'D14', 'D15', # Match features (M1-M9) 'M1', 'M2', 'M3', 'M4', 'M5', 'M6', 'M7', 'M8', 'M9', # Vesta features (V1-V339) - Full range based on error message *[f'V{i}' for i in range(1, 340)], # Identity features (id_01 to id_38) *[f'id_{i:02d}' for i in range(1, 39)], # Device features 'DeviceType', 'DeviceInfo', # Additional features that might be present 'ProductCD' ] return expected_features def fill_missing_features(transaction_data): """Fill missing features with appropriate default values""" # Get all expected features expected_features = get_model_expected_features() # Default values for different feature types defaults = { # Numeric features default to 0 or reasonable values 'card1': 13553, 'card2': 150.0, 'card3': 150.0, 'card5': 142.0, 'addr1': 325.0, 'addr2': 87.0, 'dist1': 19.0, 'dist2': 19.0, # Count features (C1-C14) - mostly 0 or 1 **{f'C{i}': 0.0 for i in range(1, 15)}, 'C1': 1.0, 'C2': 1.0, 'C6': 1.0, 'C9': 1.0, 'C11': 1.0, 'C12': 1.0, 'C13': 1.0, 'C14': 1.0, # Time delta features (D1-D15) - mostly 0 **{f'D{i}': 0.0 for i in range(1, 16)}, 'D5': 20.0, # Common non-zero value # Match features (M1-M9) - mostly F with some T **{f'M{i}': 'F' for i in range(1, 10)}, 'M1': 'T', 'M2': 'T', 'M3': 'T', 'M4': 'M0', # Special case # Vesta features (V1-V339) - default to 1.0 **{f'V{i}': 1.0 for i in range(1, 340)}, # Identity features (id_01 to id_38) - default to 0.0 **{f'id_{i:02d}': 0.0 for i in range(1, 39)}, # Categorical features 'card4': 'visa', 'card6': 'credit', 'P_emaildomain': 'gmail.com', 'R_emaildomain': 'gmail.com', 'DeviceType': 'desktop', 'DeviceInfo': 'Windows', 'ProductCD': 'W', # Transaction defaults 'TransactionDT': 86400, # Default timestamp } # Create complete transaction data complete_data = {} # First, add all provided data complete_data.update(transaction_data) # Then fill missing features with defaults for feature in expected_features: if feature not in complete_data: complete_data[feature] = defaults.get(feature, 0.0) return complete_data def create_simple_transaction(amount, card_type="visa", email_domain="gmail.com", hour=12): """Create a transaction with minimal inputs and smart defaults""" transaction_data = { 'TransactionAmt': float(amount), 'TransactionDT': hour * 3600, 'card4': card_type, 'P_emaildomain': email_domain, 'R_emaildomain': email_domain, } # Fill all missing features return fill_missing_features(transaction_data) def validate_features(df, expected_features): """Validate that DataFrame has all expected features""" missing_features = set(expected_features) - set(df.columns) extra_features = set(df.columns) - set(expected_features) return { 'missing': list(missing_features), 'extra': list(extra_features), 'is_valid': len(missing_features) == 0 }