Spaces:
Sleeping
Sleeping
| """ | |
| data_factory/schemas.py | |
| ======================== | |
| SQLite CREATE TABLE statements for all four domains. | |
| Each schema is fully self-contained and has been verified to create | |
| without errors in SQLite 3.x. | |
| """ | |
| from __future__ import annotations | |
| import sqlite3 | |
| import random | |
| from datetime import date, timedelta | |
| from typing import Callable | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # SQL SCHEMAS | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| ECOMMERCE_SCHEMA = """ | |
| CREATE TABLE IF NOT EXISTS categories ( | |
| id INTEGER PRIMARY KEY, | |
| name TEXT NOT NULL UNIQUE | |
| ); | |
| CREATE TABLE IF NOT EXISTS products ( | |
| id INTEGER PRIMARY KEY, | |
| name TEXT NOT NULL, | |
| category_id INTEGER NOT NULL REFERENCES categories(id), | |
| price REAL NOT NULL CHECK(price >= 0), | |
| stock_quantity INTEGER NOT NULL DEFAULT 0 | |
| ); | |
| CREATE TABLE IF NOT EXISTS customers ( | |
| id INTEGER PRIMARY KEY, | |
| name TEXT NOT NULL, | |
| email TEXT NOT NULL UNIQUE, | |
| country TEXT NOT NULL, | |
| tier TEXT NOT NULL DEFAULT 'bronze' | |
| CHECK(tier IN ('bronze', 'silver', 'gold')), | |
| created_at TEXT NOT NULL | |
| ); | |
| CREATE TABLE IF NOT EXISTS orders ( | |
| id INTEGER PRIMARY KEY, | |
| customer_id INTEGER NOT NULL REFERENCES customers(id), | |
| status TEXT NOT NULL DEFAULT 'pending' | |
| CHECK(status IN ('pending','processing','shipped','delivered','cancelled')), | |
| created_at TEXT NOT NULL, | |
| total_amount REAL NOT NULL CHECK(total_amount >= 0) | |
| ); | |
| CREATE TABLE IF NOT EXISTS order_items ( | |
| id INTEGER PRIMARY KEY, | |
| order_id INTEGER NOT NULL REFERENCES orders(id), | |
| product_id INTEGER NOT NULL REFERENCES products(id), | |
| quantity INTEGER NOT NULL CHECK(quantity > 0), | |
| unit_price REAL NOT NULL CHECK(unit_price >= 0) | |
| ); | |
| CREATE TABLE IF NOT EXISTS reviews ( | |
| id INTEGER PRIMARY KEY, | |
| product_id INTEGER NOT NULL REFERENCES products(id), | |
| customer_id INTEGER NOT NULL REFERENCES customers(id), | |
| rating INTEGER NOT NULL CHECK(rating BETWEEN 1 AND 5), | |
| created_at TEXT NOT NULL | |
| ); | |
| CREATE INDEX IF NOT EXISTS idx_products_category ON products(category_id); | |
| CREATE INDEX IF NOT EXISTS idx_orders_customer ON orders(customer_id); | |
| CREATE INDEX IF NOT EXISTS idx_orders_status ON orders(status); | |
| CREATE INDEX IF NOT EXISTS idx_orders_created ON orders(created_at); | |
| CREATE INDEX IF NOT EXISTS idx_order_items_order ON order_items(order_id); | |
| CREATE INDEX IF NOT EXISTS idx_order_items_product ON order_items(product_id); | |
| CREATE INDEX IF NOT EXISTS idx_reviews_product ON reviews(product_id); | |
| CREATE INDEX IF NOT EXISTS idx_customers_tier ON customers(tier); | |
| """ | |
| HEALTHCARE_SCHEMA = """ | |
| CREATE TABLE IF NOT EXISTS patients ( | |
| id INTEGER PRIMARY KEY, | |
| name TEXT NOT NULL, | |
| date_of_birth TEXT NOT NULL, | |
| gender TEXT NOT NULL CHECK(gender IN ('M','F','Other')), | |
| blood_type TEXT NOT NULL, | |
| country TEXT NOT NULL, | |
| registered_at TEXT NOT NULL | |
| ); | |
| CREATE TABLE IF NOT EXISTS doctors ( | |
| id INTEGER PRIMARY KEY, | |
| name TEXT NOT NULL, | |
| specialization TEXT NOT NULL, | |
| department TEXT NOT NULL, | |
| experience_years INTEGER NOT NULL CHECK(experience_years >= 0), | |
| consultation_fee REAL NOT NULL CHECK(consultation_fee >= 0) | |
| ); | |
| CREATE TABLE IF NOT EXISTS appointments ( | |
| id INTEGER PRIMARY KEY, | |
| patient_id INTEGER NOT NULL REFERENCES patients(id), | |
| doctor_id INTEGER NOT NULL REFERENCES doctors(id), | |
| scheduled_at TEXT NOT NULL, | |
| status TEXT NOT NULL | |
| CHECK(status IN ('scheduled','completed','cancelled','no_show')), | |
| notes TEXT | |
| ); | |
| CREATE TABLE IF NOT EXISTS diagnoses ( | |
| id INTEGER PRIMARY KEY, | |
| appointment_id INTEGER NOT NULL REFERENCES appointments(id), | |
| icd_code TEXT NOT NULL, | |
| description TEXT NOT NULL, | |
| severity TEXT NOT NULL CHECK(severity IN ('mild','moderate','severe')) | |
| ); | |
| CREATE TABLE IF NOT EXISTS medications ( | |
| id INTEGER PRIMARY KEY, | |
| name TEXT NOT NULL, | |
| category TEXT NOT NULL, | |
| unit_price REAL NOT NULL CHECK(unit_price >= 0) | |
| ); | |
| CREATE TABLE IF NOT EXISTS prescriptions ( | |
| id INTEGER PRIMARY KEY, | |
| appointment_id INTEGER NOT NULL REFERENCES appointments(id), | |
| medication_id INTEGER NOT NULL REFERENCES medications(id), | |
| dosage TEXT NOT NULL, | |
| duration_days INTEGER NOT NULL CHECK(duration_days > 0), | |
| quantity INTEGER NOT NULL CHECK(quantity > 0) | |
| ); | |
| CREATE INDEX IF NOT EXISTS idx_appt_patient ON appointments(patient_id); | |
| CREATE INDEX IF NOT EXISTS idx_appt_doctor ON appointments(doctor_id); | |
| CREATE INDEX IF NOT EXISTS idx_appt_status ON appointments(status); | |
| CREATE INDEX IF NOT EXISTS idx_diag_appt ON diagnoses(appointment_id); | |
| CREATE INDEX IF NOT EXISTS idx_presc_appt ON prescriptions(appointment_id); | |
| CREATE INDEX IF NOT EXISTS idx_presc_med ON prescriptions(medication_id); | |
| """ | |
| FINANCE_SCHEMA = """ | |
| CREATE TABLE IF NOT EXISTS fin_customers ( | |
| id INTEGER PRIMARY KEY, | |
| name TEXT NOT NULL, | |
| email TEXT NOT NULL UNIQUE, | |
| country TEXT NOT NULL, | |
| kyc_status TEXT NOT NULL CHECK(kyc_status IN ('pending','verified','rejected')), | |
| created_at TEXT NOT NULL | |
| ); | |
| CREATE TABLE IF NOT EXISTS accounts ( | |
| id INTEGER PRIMARY KEY, | |
| customer_id INTEGER NOT NULL REFERENCES fin_customers(id), | |
| account_type TEXT NOT NULL | |
| CHECK(account_type IN ('savings','current','fixed_deposit','loan')), | |
| balance REAL NOT NULL DEFAULT 0, | |
| currency TEXT NOT NULL DEFAULT 'USD', | |
| status TEXT NOT NULL CHECK(status IN ('active','dormant','closed')), | |
| opened_at TEXT NOT NULL | |
| ); | |
| CREATE TABLE IF NOT EXISTS transactions ( | |
| id INTEGER PRIMARY KEY, | |
| account_id INTEGER NOT NULL REFERENCES accounts(id), | |
| txn_type TEXT NOT NULL CHECK(txn_type IN ('credit','debit')), | |
| amount REAL NOT NULL CHECK(amount > 0), | |
| currency TEXT NOT NULL DEFAULT 'USD', | |
| merchant TEXT, | |
| created_at TEXT NOT NULL | |
| ); | |
| CREATE TABLE IF NOT EXISTS loans ( | |
| id INTEGER PRIMARY KEY, | |
| customer_id INTEGER NOT NULL REFERENCES fin_customers(id), | |
| loan_type TEXT NOT NULL | |
| CHECK(loan_type IN ('personal','home','auto','business')), | |
| principal_amount REAL NOT NULL, | |
| interest_rate REAL NOT NULL, | |
| tenure_months INTEGER NOT NULL, | |
| status TEXT NOT NULL CHECK(status IN ('active','closed','defaulted')), | |
| disbursed_at TEXT NOT NULL | |
| ); | |
| CREATE TABLE IF NOT EXISTS loan_payments ( | |
| id INTEGER PRIMARY KEY, | |
| loan_id INTEGER NOT NULL REFERENCES loans(id), | |
| amount_paid REAL NOT NULL CHECK(amount_paid > 0), | |
| payment_date TEXT NOT NULL, | |
| is_late INTEGER NOT NULL DEFAULT 0 CHECK(is_late IN (0,1)) | |
| ); | |
| CREATE INDEX IF NOT EXISTS idx_acct_customer ON accounts(customer_id); | |
| CREATE INDEX IF NOT EXISTS idx_txn_account ON transactions(account_id); | |
| CREATE INDEX IF NOT EXISTS idx_txn_type ON transactions(txn_type); | |
| CREATE INDEX IF NOT EXISTS idx_loan_customer ON loans(customer_id); | |
| CREATE INDEX IF NOT EXISTS idx_lp_loan ON loan_payments(loan_id); | |
| """ | |
| HR_SCHEMA = """ | |
| CREATE TABLE IF NOT EXISTS departments ( | |
| id INTEGER PRIMARY KEY, | |
| name TEXT NOT NULL UNIQUE, | |
| location TEXT NOT NULL, | |
| budget REAL NOT NULL CHECK(budget >= 0) | |
| ); | |
| CREATE TABLE IF NOT EXISTS employees ( | |
| id INTEGER PRIMARY KEY, | |
| name TEXT NOT NULL, | |
| email TEXT NOT NULL UNIQUE, | |
| department_id INTEGER NOT NULL REFERENCES departments(id), | |
| job_title TEXT NOT NULL, | |
| hire_date TEXT NOT NULL, | |
| salary REAL NOT NULL CHECK(salary >= 0), | |
| status TEXT NOT NULL CHECK(status IN ('active','resigned','terminated')) | |
| ); | |
| CREATE TABLE IF NOT EXISTS performance_reviews ( | |
| id INTEGER PRIMARY KEY, | |
| employee_id INTEGER NOT NULL REFERENCES employees(id), | |
| review_year INTEGER NOT NULL, | |
| rating INTEGER NOT NULL CHECK(rating BETWEEN 1 AND 5), | |
| reviewer_id INTEGER NOT NULL REFERENCES employees(id), | |
| comments TEXT | |
| ); | |
| CREATE TABLE IF NOT EXISTS projects ( | |
| id INTEGER PRIMARY KEY, | |
| name TEXT NOT NULL, | |
| department_id INTEGER NOT NULL REFERENCES departments(id), | |
| start_date TEXT NOT NULL, | |
| end_date TEXT, | |
| budget REAL NOT NULL, | |
| status TEXT NOT NULL | |
| CHECK(status IN ('planned','active','completed','cancelled')) | |
| ); | |
| CREATE TABLE IF NOT EXISTS project_assignments ( | |
| id INTEGER PRIMARY KEY, | |
| employee_id INTEGER NOT NULL REFERENCES employees(id), | |
| project_id INTEGER NOT NULL REFERENCES projects(id), | |
| role TEXT NOT NULL, | |
| hours_allocated INTEGER NOT NULL CHECK(hours_allocated > 0) | |
| ); | |
| CREATE INDEX IF NOT EXISTS idx_emp_dept ON employees(department_id); | |
| CREATE INDEX IF NOT EXISTS idx_emp_status ON employees(status); | |
| CREATE INDEX IF NOT EXISTS idx_pr_employee ON performance_reviews(employee_id); | |
| CREATE INDEX IF NOT EXISTS idx_proj_dept ON projects(department_id); | |
| CREATE INDEX IF NOT EXISTS idx_pa_employee ON project_assignments(employee_id); | |
| CREATE INDEX IF NOT EXISTS idx_pa_project ON project_assignments(project_id); | |
| """ | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # SCHEMA REGISTRY | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| SCHEMA_MAP: dict[str, str] = { | |
| "ecommerce": ECOMMERCE_SCHEMA, | |
| "healthcare": HEALTHCARE_SCHEMA, | |
| "finance": FINANCE_SCHEMA, | |
| "hr": HR_SCHEMA, | |
| } | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # COMPACT SCHEMA CONTEXT (injected into every training prompt) | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| SCHEMA_CONTEXT: dict[str, str] = { | |
| "ecommerce": """\ | |
| Database: ecommerce (SQLite, read-only) | |
| TABLES | |
| ------ | |
| categories(id INTEGER PK, name TEXT) | |
| products(id INTEGER PK, name TEXT, category_id INTEGER FKβcategories.id, price REAL, stock_quantity INTEGER) | |
| customers(id INTEGER PK, name TEXT, email TEXT, country TEXT, tier TEXT β {bronze|silver|gold}, created_at TEXT ISO-8601) | |
| orders(id INTEGER PK, customer_id INTEGER FKβcustomers.id, status TEXT β {pending|processing|shipped|delivered|cancelled}, created_at TEXT ISO-8601, total_amount REAL) | |
| order_items(id INTEGER PK, order_id INTEGER FKβorders.id, product_id INTEGER FKβproducts.id, quantity INTEGER, unit_price REAL) | |
| reviews(id INTEGER PK, product_id INTEGER FKβproducts.id, customer_id INTEGER FKβcustomers.id, rating INTEGER 1-5, created_at TEXT ISO-8601) | |
| NOTES | |
| ----- | |
| - Use created_at >= '2024-01-01' for date filtering (ISO text sort works) | |
| - SQLite window functions: RANK, DENSE_RANK, ROW_NUMBER, LAG, LEAD | |
| - strftime('%Y-%m', created_at) returns 'YYYY-MM' | |
| - All monetary values in USD | |
| """, | |
| "healthcare": """\ | |
| Database: healthcare (SQLite, read-only) | |
| TABLES | |
| ------ | |
| patients(id INTEGER PK, name TEXT, date_of_birth TEXT ISO-8601, gender TEXT β {M|F|Other}, blood_type TEXT, country TEXT, registered_at TEXT ISO-8601) | |
| doctors(id INTEGER PK, name TEXT, specialization TEXT, department TEXT, experience_years INTEGER, consultation_fee REAL) | |
| appointments(id INTEGER PK, patient_id INTEGER FKβpatients.id, doctor_id INTEGER FKβdoctors.id, scheduled_at TEXT ISO-8601, status TEXT β {scheduled|completed|cancelled|no_show}, notes TEXT nullable) | |
| diagnoses(id INTEGER PK, appointment_id INTEGER FKβappointments.id, icd_code TEXT, description TEXT, severity TEXT β {mild|moderate|severe}) | |
| medications(id INTEGER PK, name TEXT, category TEXT, unit_price REAL) | |
| prescriptions(id INTEGER PK, appointment_id INTEGER FKβappointments.id, medication_id INTEGER FKβmedications.id, dosage TEXT, duration_days INTEGER, quantity INTEGER) | |
| NOTES | |
| ----- | |
| - consultation_fee is in USD per visit | |
| - ICD codes follow WHO ICD-10 format (e.g. 'I10', 'E11') | |
| - SQLite window functions available | |
| """, | |
| "finance": """\ | |
| Database: finance (SQLite, read-only) | |
| TABLES | |
| ------ | |
| fin_customers(id INTEGER PK, name TEXT, email TEXT, country TEXT, kyc_status TEXT β {pending|verified|rejected}, created_at TEXT ISO-8601) | |
| accounts(id INTEGER PK, customer_id INTEGER FKβfin_customers.id, account_type TEXT β {savings|current|fixed_deposit|loan}, balance REAL, currency TEXT, status TEXT β {active|dormant|closed}, opened_at TEXT ISO-8601) | |
| transactions(id INTEGER PK, account_id INTEGER FKβaccounts.id, txn_type TEXT β {credit|debit}, amount REAL, currency TEXT, merchant TEXT nullable, created_at TEXT ISO-8601) | |
| loans(id INTEGER PK, customer_id INTEGER FKβfin_customers.id, loan_type TEXT β {personal|home|auto|business}, principal_amount REAL, interest_rate REAL, tenure_months INTEGER, status TEXT β {active|closed|defaulted}, disbursed_at TEXT ISO-8601) | |
| loan_payments(id INTEGER PK, loan_id INTEGER FKβloans.id, amount_paid REAL, payment_date TEXT ISO-8601, is_late INTEGER β {0|1}) | |
| NOTES | |
| ----- | |
| - All monetary values in USD unless currency column specifies otherwise | |
| - is_late = 1 means the payment was overdue | |
| - SQLite window functions available | |
| """, | |
| "hr": """\ | |
| Database: hr (SQLite, read-only) | |
| TABLES | |
| ------ | |
| departments(id INTEGER PK, name TEXT, location TEXT, budget REAL) | |
| employees(id INTEGER PK, name TEXT, email TEXT, department_id INTEGER FKβdepartments.id, job_title TEXT, hire_date TEXT ISO-8601, salary REAL, status TEXT β {active|resigned|terminated}) | |
| performance_reviews(id INTEGER PK, employee_id INTEGER FKβemployees.id, review_year INTEGER, rating INTEGER 1-5, reviewer_id INTEGER FKβemployees.id, comments TEXT nullable) | |
| projects(id INTEGER PK, name TEXT, department_id INTEGER FKβdepartments.id, start_date TEXT ISO-8601, end_date TEXT nullable, budget REAL, status TEXT β {planned|active|completed|cancelled}) | |
| project_assignments(id INTEGER PK, employee_id INTEGER FKβemployees.id, project_id INTEGER FKβprojects.id, role TEXT, hours_allocated INTEGER) | |
| NOTES | |
| ----- | |
| - salary is annual in USD | |
| - performance rating: 1 (lowest) to 5 (highest) | |
| - end_date is NULL for ongoing projects | |
| - SQLite window functions available | |
| """, | |
| } | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # SEED FUNCTIONS (deterministic, SEED=42) | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def _rdate(rng: random.Random, start: str = "2022-01-01", end: str = "2024-12-31") -> str: | |
| s = date.fromisoformat(start) | |
| e = date.fromisoformat(end) | |
| return (s + timedelta(days=rng.randint(0, (e - s).days))).isoformat() | |
| def seed_ecommerce(conn: sqlite3.Connection, seed: int = 42) -> None: | |
| rng = random.Random(seed) | |
| cats = ["Electronics", "Clothing", "Books", "Home & Garden", | |
| "Sports & Outdoors", "Toys & Games", "Beauty", "Automotive"] | |
| conn.executemany("INSERT INTO categories(id,name) VALUES(?,?)", enumerate(cats, 1)) | |
| products = [ | |
| (1,"Wireless Headphones",1,149.99,50),(2,"Laptop Stand",1,59.99,120), | |
| (3,"USB-C Hub",1,49.99,90),(4,"Webcam 4K",1,89.99,30), | |
| (5,"Cotton T-Shirt",2,19.99,200),(6,"Winter Jacket",2,129.99,60), | |
| (7,"Running Shorts",2,34.99,150),(8,"Clean Code",3,39.99,80), | |
| (9,"Deep Learning Book",3,59.99,45),(10,"Coffee Maker",4,89.99,40), | |
| (11,"Air Purifier",4,199.99,25),(12,"Yoga Mat",5,29.99,150), | |
| (13,"Resistance Bands",5,14.99,200),(14,"Lego City Set",6,79.99,60), | |
| (15,"Face Serum",7,34.99,100),(16,"Dash Cam",8,119.99,35), | |
| ] | |
| conn.executemany("INSERT INTO products VALUES(?,?,?,?,?)", products) | |
| countries = ["India","USA","Germany","UK","Canada","Australia","France","Brazil"] | |
| tiers = ["bronze","silver","gold"] | |
| customers = [] | |
| for i in range(1, 51): | |
| customers.append((i, f"Customer {i}", f"cust{i}@shop.com", | |
| rng.choice(countries), rng.choice(tiers), _rdate(rng))) | |
| conn.executemany("INSERT INTO customers VALUES(?,?,?,?,?,?)", customers) | |
| statuses = ["pending","processing","shipped","delivered","cancelled"] | |
| orders = [] | |
| for i in range(1, 201): | |
| orders.append((i, rng.randint(1, 50), rng.choice(statuses), | |
| _rdate(rng), round(rng.uniform(20, 800), 2))) | |
| conn.executemany("INSERT INTO orders VALUES(?,?,?,?,?)", orders) | |
| items = [] | |
| for i in range(1, 301): | |
| items.append((i, rng.randint(1, 200), rng.randint(1, 16), | |
| rng.randint(1, 5), round(rng.uniform(10, 200), 2))) | |
| conn.executemany("INSERT INTO order_items VALUES(?,?,?,?,?)", items) | |
| reviews = [] | |
| for i in range(1, 151): | |
| reviews.append((i, rng.randint(1, 16), rng.randint(1, 50), | |
| rng.randint(1, 5), _rdate(rng))) | |
| conn.executemany("INSERT INTO reviews VALUES(?,?,?,?,?)", reviews) | |
| conn.commit() | |
| def seed_healthcare(conn: sqlite3.Connection, seed: int = 42) -> None: | |
| rng = random.Random(seed) | |
| specs = [("Cardiology","Cardiology"), ("Neurology","Neurology"), | |
| ("Orthopedics","Orthopedics"), ("Dermatology","Dermatology"), | |
| ("Pediatrics","Pediatrics"), ("Oncology","Oncology"), | |
| ("Endocrinology","Endocrinology"), ("Gastroenterology","Gastroenterology")] | |
| for i, (spec, dept) in enumerate(specs, 1): | |
| conn.execute("INSERT INTO doctors VALUES(?,?,?,?,?,?)", | |
| (i, f"Dr. {['Smith','Patel','Kim','MΓΌller','Okafor','Chen','Lopez','Roy'][i-1]}", | |
| spec, dept, rng.randint(2, 25), round(rng.uniform(50, 350), 2))) | |
| genders = ["M", "F", "Other"] | |
| blood_types = ["A+","A-","B+","B-","O+","O-","AB+","AB-"] | |
| countries = ["India","USA","Germany","UK","Canada","Australia"] | |
| for i in range(1, 101): | |
| conn.execute("INSERT INTO patients VALUES(?,?,?,?,?,?,?)", | |
| (i, f"Patient {i}", _rdate(rng, "1950-01-01", "2010-01-01"), | |
| rng.choice(genders), rng.choice(blood_types), | |
| rng.choice(countries), _rdate(rng, "2020-01-01", "2024-12-31"))) | |
| appt_statuses = ["scheduled", "completed", "cancelled", "no_show"] | |
| weights = [0.15, 0.60, 0.15, 0.10] | |
| for i in range(1, 301): | |
| conn.execute("INSERT INTO appointments VALUES(?,?,?,?,?,?)", | |
| (i, rng.randint(1, 100), rng.randint(1, 8), | |
| _rdate(rng, "2022-01-01", "2024-12-31"), | |
| rng.choices(appt_statuses, weights)[0], None)) | |
| icd_codes = ["I10","E11","J45","M54","K21","F32","G43","L30","N39","R05", | |
| "C50","Z87","I25","E78","J18"] | |
| descs = ["Hypertension","Type 2 Diabetes","Asthma","Back Pain","GERD", | |
| "Depression","Migraine","Dermatitis","UTI","Cough", | |
| "Breast Cancer","Family History","Coronary Artery Disease", | |
| "Hyperlipidemia","Pneumonia"] | |
| severities = ["mild","moderate","severe"] | |
| for i in range(1, 201): | |
| conn.execute("INSERT INTO diagnoses VALUES(?,?,?,?,?)", | |
| (i, rng.randint(1, 300), rng.choice(icd_codes), | |
| rng.choice(descs), rng.choice(severities))) | |
| meds = [("Metformin","Antidiabetic",0.15),("Lisinopril","Antihypertensive",0.20), | |
| ("Atorvastatin","Statin",0.25),("Amoxicillin","Antibiotic",0.30), | |
| ("Ibuprofen","NSAID",0.10),("Omeprazole","PPI",0.18), | |
| ("Sertraline","Antidepressant",0.35),("Cetirizine","Antihistamine",0.08), | |
| ("Paracetamol","Analgesic",0.05),("Aspirin","Antiplatelet",0.07)] | |
| for i, (name, cat, price) in enumerate(meds, 1): | |
| conn.execute("INSERT INTO medications VALUES(?,?,?,?)", (i, name, cat, price)) | |
| dosages = ["1x daily","2x daily","3x daily","once at night","as needed"] | |
| for i in range(1, 251): | |
| conn.execute("INSERT INTO prescriptions VALUES(?,?,?,?,?,?)", | |
| (i, rng.randint(1, 300), rng.randint(1, 10), | |
| rng.choice(dosages), rng.randint(5, 60), rng.randint(10, 90))) | |
| conn.commit() | |
| def seed_finance(conn: sqlite3.Connection, seed: int = 42) -> None: | |
| rng = random.Random(seed) | |
| countries = ["India","USA","Germany","UK","Singapore","UAE","Canada"] | |
| kyc = ["pending","verified","verified","verified","rejected"] | |
| for i in range(1, 51): | |
| conn.execute("INSERT INTO fin_customers VALUES(?,?,?,?,?,?)", | |
| (i, f"FinClient {i}", f"fincli{i}@bank.com", | |
| rng.choice(countries), rng.choice(kyc), _rdate(rng))) | |
| acct_types = ["savings","savings","current","fixed_deposit"] | |
| statuses = ["active","active","active","dormant","closed"] | |
| for i in range(1, 101): | |
| conn.execute("INSERT INTO accounts VALUES(?,?,?,?,?,?,?)", | |
| (i, rng.randint(1, 50), rng.choice(acct_types), | |
| round(rng.uniform(100, 100000), 2), "USD", | |
| rng.choice(statuses), _rdate(rng))) | |
| merchants = [None, "Amazon", "Walmart", "Netflix", "Uber", "Apple", | |
| "Google Pay", "Zomato", "Flipkart", "Airbnb"] | |
| for i in range(1, 501): | |
| conn.execute("INSERT INTO transactions VALUES(?,?,?,?,?,?,?)", | |
| (i, rng.randint(1, 100), rng.choice(["credit","debit"]), | |
| round(rng.uniform(5, 10000), 2), "USD", | |
| rng.choice(merchants), _rdate(rng))) | |
| loan_types = ["personal","home","auto","business"] | |
| loan_statuses = ["active","active","closed","defaulted"] | |
| for i in range(1, 51): | |
| conn.execute("INSERT INTO loans VALUES(?,?,?,?,?,?,?,?)", | |
| (i, rng.randint(1, 50), rng.choice(loan_types), | |
| round(rng.uniform(5000, 500000), 2), | |
| round(rng.uniform(5, 18), 2), rng.randint(12, 360), | |
| rng.choice(loan_statuses), _rdate(rng))) | |
| for i in range(1, 201): | |
| conn.execute("INSERT INTO loan_payments VALUES(?,?,?,?,?)", | |
| (i, rng.randint(1, 50), round(rng.uniform(500, 10000), 2), | |
| _rdate(rng), rng.randint(0, 1))) | |
| conn.commit() | |
| def seed_hr(conn: sqlite3.Connection, seed: int = 42) -> None: | |
| rng = random.Random(seed) | |
| depts = [("Engineering","Bangalore",8000000),("Marketing","Mumbai",3000000), | |
| ("Finance","Delhi",2000000),("HR","Chennai",1500000), | |
| ("Sales","Hyderabad",5000000),("Product","Pune",4000000), | |
| ("Legal","Delhi",1000000),("Operations","Kolkata",2500000)] | |
| for i, (name, loc, bud) in enumerate(depts, 1): | |
| conn.execute("INSERT INTO departments VALUES(?,?,?,?)", (i, name, loc, bud)) | |
| titles = ["Software Engineer","Senior Engineer","Staff Engineer","Principal Engineer", | |
| "Engineering Manager","Product Manager","Data Analyst","Data Scientist", | |
| "Marketing Specialist","Sales Executive","HR Specialist","Finance Analyst", | |
| "Director","VP","Legal Counsel"] | |
| statuses = ["active","active","active","active","resigned","terminated"] | |
| for i in range(1, 101): | |
| conn.execute("INSERT INTO employees VALUES(?,?,?,?,?,?,?,?)", | |
| (i, f"Employee {i}", f"emp{i}@corp.com", | |
| rng.randint(1, 8), rng.choice(titles), | |
| _rdate(rng, "2015-01-01", "2024-01-01"), | |
| round(rng.uniform(25000, 200000), 2), rng.choice(statuses))) | |
| for i in range(1, 201): | |
| conn.execute("INSERT INTO performance_reviews VALUES(?,?,?,?,?,?)", | |
| (i, rng.randint(1, 100), rng.randint(2019, 2024), | |
| rng.randint(1, 5), rng.randint(1, 100), | |
| rng.choice(["Excellent work","Good performance","Needs improvement", | |
| "Outstanding","Meeting expectations"]))) | |
| proj_statuses = ["planned","active","active","completed","cancelled"] | |
| for i in range(1, 51): | |
| sd = _rdate(rng, "2021-01-01", "2024-01-01") | |
| conn.execute("INSERT INTO projects VALUES(?,?,?,?,?,?,?)", | |
| (i, f"Project {i}", rng.randint(1, 8), sd, | |
| _rdate(rng, sd, "2025-06-01") if rng.random() > 0.25 else None, | |
| round(rng.uniform(50000, 2000000), 2), rng.choice(proj_statuses))) | |
| roles = ["Lead","Senior Developer","Developer","Tester","Analyst","DevOps"] | |
| for i in range(1, 251): | |
| conn.execute("INSERT INTO project_assignments VALUES(?,?,?,?,?)", | |
| (i, rng.randint(1, 100), rng.randint(1, 50), | |
| rng.choice(roles), rng.randint(20, 400))) | |
| conn.commit() | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # REGISTRY | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| SEED_MAP: dict[str, Callable] = { | |
| "ecommerce": seed_ecommerce, | |
| "healthcare": seed_healthcare, | |
| "finance": seed_finance, | |
| "hr": seed_hr, | |
| } | |
| def build_connection(domain: str, seed: int = 42) -> sqlite3.Connection: | |
| """Return a seeded in-memory SQLite connection for the given domain.""" | |
| conn = sqlite3.connect(":memory:", check_same_thread=False) | |
| conn.row_factory = sqlite3.Row | |
| conn.execute("PRAGMA foreign_keys = ON") | |
| conn.executescript(SCHEMA_MAP[domain]) | |
| SEED_MAP[domain](conn, seed=seed) | |
| return conn | |