Spaces:

Ujeshhh
/

Anomaly

Sleeping

Anomaly / feature_engineering.py

Upload 10 files

d16c0f6 verified 3 months ago

1.2 kB

	import pandas as pd
	import numpy as np
	from sklearn.preprocessing import LabelEncoder

	# Load the dataset
	df = pd.read_csv('synthetic_elderly_transactions.csv', parse_dates=['timestamp'])

	# --- A. TEMPORAL FEATURES ---
	df['hour'] = df['timestamp'].dt.hour
	df['day_of_week'] = df['timestamp'].dt.dayofweek
	df['is_weekend'] = df['day_of_week'].isin([5, 6]).astype(int)

	# --- B. BEHAVIORAL FEATURES ---
	df['merchant_avg_amount'] = df.groupby('merchant')['amount'].transform('mean')
	df['amount_zscore'] = (df['amount'] - df['amount'].mean()) / df['amount'].std()
	df['log_amount'] = df['amount'].apply(lambda x: np.log1p(x)) # fixed line ✅

	# --- C. ENCODING ---
	df = pd.get_dummies(df, columns=['transaction_type'], prefix='type')

	le = LabelEncoder()
	df['merchant_encoded'] = le.fit_transform(df['merchant'])

	# --- D. DROP UNUSED COLUMNS ---
	df = df.drop(columns=['timestamp', 'merchant', 'transaction_id', 'customer_id', 'age'])

	# Print feature columns
	print("✅ Final features:")
	print(df.columns)

	# Save to CSV
	df.to_csv('feature_engineered_transactions.csv', index=False)
	print("✅ Feature engineered dataset saved as 'feature_engineered_transactions.csv'")