|
import pandas as pd
|
|
import numpy as np
|
|
from sklearn.preprocessing import LabelEncoder
|
|
|
|
|
|
df = pd.read_csv('synthetic_elderly_transactions.csv', parse_dates=['timestamp'])
|
|
|
|
|
|
df['hour'] = df['timestamp'].dt.hour
|
|
df['day_of_week'] = df['timestamp'].dt.dayofweek
|
|
df['is_weekend'] = df['day_of_week'].isin([5, 6]).astype(int)
|
|
|
|
|
|
df['merchant_avg_amount'] = df.groupby('merchant')['amount'].transform('mean')
|
|
df['amount_zscore'] = (df['amount'] - df['amount'].mean()) / df['amount'].std()
|
|
df['log_amount'] = df['amount'].apply(lambda x: np.log1p(x))
|
|
|
|
|
|
df = pd.get_dummies(df, columns=['transaction_type'], prefix='type')
|
|
|
|
le = LabelEncoder()
|
|
df['merchant_encoded'] = le.fit_transform(df['merchant'])
|
|
|
|
|
|
df = df.drop(columns=['timestamp', 'merchant', 'transaction_id', 'customer_id', 'age'])
|
|
|
|
|
|
print("β
Final features:")
|
|
print(df.columns)
|
|
|
|
|
|
df.to_csv('feature_engineered_transactions.csv', index=False)
|
|
print("β
Feature engineered dataset saved as 'feature_engineered_transactions.csv'")
|
|
|