import pandas as pd import numpy as np from sklearn.preprocessing import LabelEncoder # Load the dataset df = pd.read_csv('synthetic_elderly_transactions.csv', parse_dates=['timestamp']) # --- A. TEMPORAL FEATURES --- df['hour'] = df['timestamp'].dt.hour df['day_of_week'] = df['timestamp'].dt.dayofweek df['is_weekend'] = df['day_of_week'].isin([5, 6]).astype(int) # --- B. BEHAVIORAL FEATURES --- df['merchant_avg_amount'] = df.groupby('merchant')['amount'].transform('mean') df['amount_zscore'] = (df['amount'] - df['amount'].mean()) / df['amount'].std() df['log_amount'] = df['amount'].apply(lambda x: np.log1p(x)) # fixed line ✅ # --- C. ENCODING --- df = pd.get_dummies(df, columns=['transaction_type'], prefix='type') le = LabelEncoder() df['merchant_encoded'] = le.fit_transform(df['merchant']) # --- D. DROP UNUSED COLUMNS --- df = df.drop(columns=['timestamp', 'merchant', 'transaction_id', 'customer_id', 'age']) # Print feature columns print("✅ Final features:") print(df.columns) # Save to CSV df.to_csv('feature_engineered_transactions.csv', index=False) print("✅ Feature engineered dataset saved as 'feature_engineered_transactions.csv'")