import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder

# Load the dataset
df = pd.read_csv('synthetic_elderly_transactions.csv', parse_dates=['timestamp'])

# --- A. TEMPORAL FEATURES ---
df['hour'] = df['timestamp'].dt.hour
df['day_of_week'] = df['timestamp'].dt.dayofweek
df['is_weekend'] = df['day_of_week'].isin([5, 6]).astype(int)

# --- B. BEHAVIORAL FEATURES ---
df['merchant_avg_amount'] = df.groupby('merchant')['amount'].transform('mean')
df['amount_zscore'] = (df['amount'] - df['amount'].mean()) / df['amount'].std()
df['log_amount'] = df['amount'].apply(lambda x: np.log1p(x))  # fixed line ✅

# --- C. ENCODING ---
df = pd.get_dummies(df, columns=['transaction_type'], prefix='type')

le = LabelEncoder()
df['merchant_encoded'] = le.fit_transform(df['merchant'])

# --- D. DROP UNUSED COLUMNS ---
df = df.drop(columns=['timestamp', 'merchant', 'transaction_id', 'customer_id', 'age'])

# Print feature columns
print("✅ Final features:")
print(df.columns)

# Save to CSV
df.to_csv('feature_engineered_transactions.csv', index=False)
print("✅ Feature engineered dataset saved as 'feature_engineered_transactions.csv'")