Anomaly / synthetic dataset.py
Ujeshhh's picture
Upload 10 files
d16c0f6 verified
import pandas as pd
import numpy as np
from faker import Faker
import random
from datetime import datetime, timedelta
fake = Faker()
np.random.seed(42)
# Parameters
num_customers = 50
num_transactions = 3000
suspicious_ratio = 0.05 # 5% transactions will be anomalies
# Transaction categories
regular_merchants = ['Pharmacy', 'Supermarket', 'Electricity Bill', 'Water Bill', 'Medical Checkup', 'Rent',
'Insurance']
suspicious_merchants = ['Gift Card Store', 'Unknown Transfer', 'Crypto Exchange', 'Late Night ATM', 'Online Casino']
transaction_types = ['debit', 'credit', 'atm_withdrawal']
# Generate customers
customer_ids = [f"CUST{1000 + i}" for i in range(num_customers)]
ages = np.random.randint(65, 90, size=num_customers) # Elderly age range
# Generate transactions
data = []
for _ in range(num_transactions):
customer_idx = np.random.randint(0, num_customers)
customer_id = customer_ids[customer_idx]
age = ages[customer_idx]
timestamp = fake.date_time_between(start_date='-180d', end_date='now')
is_anomalous = np.random.rand() < suspicious_ratio
if is_anomalous:
merchant = random.choice(suspicious_merchants)
amount = round(np.random.uniform(200, 5000), 2)
transaction_type = random.choice(['debit', 'atm_withdrawal'])
else:
merchant = random.choice(regular_merchants)
amount = round(np.random.uniform(10, 300), 2)
transaction_type = random.choice(transaction_types)
data.append({
'customer_id': customer_id,
'age': age,
'transaction_id': fake.uuid4(),
'timestamp': timestamp,
'merchant': merchant,
'amount': amount,
'transaction_type': transaction_type,
'is_anomalous': int(is_anomalous)
})
# Convert to DataFrame
df = pd.DataFrame(data)
# Sort by timestamp
df = df.sort_values(by='timestamp')
# Save to CSV
df.to_csv('synthetic_elderly_transactions.csv', index=False)
print("✅ Dataset created and saved as 'synthetic_elderly_transactions.csv'")