|
import pandas as pd
|
|
import numpy as np
|
|
from faker import Faker
|
|
import random
|
|
from datetime import datetime, timedelta
|
|
|
|
fake = Faker()
|
|
np.random.seed(42)
|
|
|
|
|
|
num_customers = 50
|
|
num_transactions = 3000
|
|
suspicious_ratio = 0.05
|
|
|
|
|
|
regular_merchants = ['Pharmacy', 'Supermarket', 'Electricity Bill', 'Water Bill', 'Medical Checkup', 'Rent',
|
|
'Insurance']
|
|
suspicious_merchants = ['Gift Card Store', 'Unknown Transfer', 'Crypto Exchange', 'Late Night ATM', 'Online Casino']
|
|
|
|
transaction_types = ['debit', 'credit', 'atm_withdrawal']
|
|
|
|
|
|
customer_ids = [f"CUST{1000 + i}" for i in range(num_customers)]
|
|
ages = np.random.randint(65, 90, size=num_customers)
|
|
|
|
|
|
data = []
|
|
|
|
for _ in range(num_transactions):
|
|
customer_idx = np.random.randint(0, num_customers)
|
|
customer_id = customer_ids[customer_idx]
|
|
age = ages[customer_idx]
|
|
|
|
timestamp = fake.date_time_between(start_date='-180d', end_date='now')
|
|
|
|
is_anomalous = np.random.rand() < suspicious_ratio
|
|
|
|
if is_anomalous:
|
|
merchant = random.choice(suspicious_merchants)
|
|
amount = round(np.random.uniform(200, 5000), 2)
|
|
transaction_type = random.choice(['debit', 'atm_withdrawal'])
|
|
else:
|
|
merchant = random.choice(regular_merchants)
|
|
amount = round(np.random.uniform(10, 300), 2)
|
|
transaction_type = random.choice(transaction_types)
|
|
|
|
data.append({
|
|
'customer_id': customer_id,
|
|
'age': age,
|
|
'transaction_id': fake.uuid4(),
|
|
'timestamp': timestamp,
|
|
'merchant': merchant,
|
|
'amount': amount,
|
|
'transaction_type': transaction_type,
|
|
'is_anomalous': int(is_anomalous)
|
|
})
|
|
|
|
|
|
df = pd.DataFrame(data)
|
|
|
|
|
|
df = df.sort_values(by='timestamp')
|
|
|
|
|
|
df.to_csv('synthetic_elderly_transactions.csv', index=False)
|
|
print("✅ Dataset created and saved as 'synthetic_elderly_transactions.csv'")
|
|
|