File size: 1,202 Bytes
d16c0f6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 |
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
# Load the dataset
df = pd.read_csv('synthetic_elderly_transactions.csv', parse_dates=['timestamp'])
# --- A. TEMPORAL FEATURES ---
df['hour'] = df['timestamp'].dt.hour
df['day_of_week'] = df['timestamp'].dt.dayofweek
df['is_weekend'] = df['day_of_week'].isin([5, 6]).astype(int)
# --- B. BEHAVIORAL FEATURES ---
df['merchant_avg_amount'] = df.groupby('merchant')['amount'].transform('mean')
df['amount_zscore'] = (df['amount'] - df['amount'].mean()) / df['amount'].std()
df['log_amount'] = df['amount'].apply(lambda x: np.log1p(x)) # fixed line ✅
# --- C. ENCODING ---
df = pd.get_dummies(df, columns=['transaction_type'], prefix='type')
le = LabelEncoder()
df['merchant_encoded'] = le.fit_transform(df['merchant'])
# --- D. DROP UNUSED COLUMNS ---
df = df.drop(columns=['timestamp', 'merchant', 'transaction_id', 'customer_id', 'age'])
# Print feature columns
print("✅ Final features:")
print(df.columns)
# Save to CSV
df.to_csv('feature_engineered_transactions.csv', index=False)
print("✅ Feature engineered dataset saved as 'feature_engineered_transactions.csv'")
|