import pandas as pd import numpy as np from faker import Faker import random from datetime import datetime, timedelta fake = Faker() np.random.seed(42) # Parameters num_customers = 50 num_transactions = 3000 suspicious_ratio = 0.05 # 5% transactions will be anomalies # Transaction categories regular_merchants = ['Pharmacy', 'Supermarket', 'Electricity Bill', 'Water Bill', 'Medical Checkup', 'Rent', 'Insurance'] suspicious_merchants = ['Gift Card Store', 'Unknown Transfer', 'Crypto Exchange', 'Late Night ATM', 'Online Casino'] transaction_types = ['debit', 'credit', 'atm_withdrawal'] # Generate customers customer_ids = [f"CUST{1000 + i}" for i in range(num_customers)] ages = np.random.randint(65, 90, size=num_customers) # Elderly age range # Generate transactions data = [] for _ in range(num_transactions): customer_idx = np.random.randint(0, num_customers) customer_id = customer_ids[customer_idx] age = ages[customer_idx] timestamp = fake.date_time_between(start_date='-180d', end_date='now') is_anomalous = np.random.rand() < suspicious_ratio if is_anomalous: merchant = random.choice(suspicious_merchants) amount = round(np.random.uniform(200, 5000), 2) transaction_type = random.choice(['debit', 'atm_withdrawal']) else: merchant = random.choice(regular_merchants) amount = round(np.random.uniform(10, 300), 2) transaction_type = random.choice(transaction_types) data.append({ 'customer_id': customer_id, 'age': age, 'transaction_id': fake.uuid4(), 'timestamp': timestamp, 'merchant': merchant, 'amount': amount, 'transaction_type': transaction_type, 'is_anomalous': int(is_anomalous) }) # Convert to DataFrame df = pd.DataFrame(data) # Sort by timestamp df = df.sort_values(by='timestamp') # Save to CSV df.to_csv('synthetic_elderly_transactions.csv', index=False) print("✅ Dataset created and saved as 'synthetic_elderly_transactions.csv'")