Spaces:
Sleeping
Sleeping
import json | |
from datetime import datetime | |
import random | |
import pandas as pd | |
from typing import List, Dict | |
import os | |
class InvoiceGenerator: | |
def __init__(self, data_dir: str = "data"): | |
"""Initialize the invoice generator with a data directory.""" | |
self.data_dir = data_dir | |
os.makedirs(data_dir, exist_ok=True) | |
self.ensure_contract_file() | |
def ensure_contract_file(self) -> None: | |
"""Ensure contracts.json exists with initial data.""" | |
contracts_file = os.path.join(self.data_dir, "contracts.json") | |
if not os.path.exists(contracts_file): | |
initial_contracts = { | |
"contracts": [ | |
{ | |
"contract_id": "CNT001", | |
"client": "TechCorp Solutions", | |
"start_date": "2024-01-01", | |
"end_date": "2024-12-31", | |
"terms": { | |
"base_rate": 100, | |
"volume_discounts": [ | |
{"threshold": 1000, "discount": 0.10}, | |
{"threshold": 5000, "discount": 0.15}, | |
{"threshold": 10000, "discount": 0.20} | |
], | |
"special_conditions": [ | |
"Holiday surcharge: 15% on federal holidays", | |
"Rush order fee: Additional 25% for same-day delivery", | |
"Bulk order minimum: 500 units per order for volume pricing", | |
"Early payment discount: 2% if paid within 10 days", | |
"Multi-year commitment: 5% additional discount for 3+ year contract" | |
] | |
} | |
}, | |
{ | |
"contract_id": "CNT002", | |
"client": "Global Manufacturing Inc", | |
"start_date": "2024-01-01", | |
"end_date": "2024-12-31", | |
"terms": { | |
"base_rate": 85, | |
"tiered_pricing": [ | |
{"tier": "Standard", "rate": 1.0}, | |
{"tier": "Premium", "rate": 1.25}, | |
{"tier": "Enterprise", "rate": 1.5} | |
], | |
"special_conditions": [ | |
"Annual commitment discount: 5% off base rate", | |
"Multi-location discount: 3% per additional location", | |
"Payment terms: 2% discount for payment within 10 days", | |
"Volume guarantee: Minimum 1000 units per quarter", | |
"Service level agreement: 99.9% delivery accuracy required" | |
] | |
} | |
} | |
] | |
} | |
with open(contracts_file, 'w') as f: | |
json.dump(initial_contracts, f, indent=4) | |
def load_contracts(self) -> List[Dict]: | |
"""Load contracts from JSON file.""" | |
contracts_file = os.path.join(self.data_dir, "contracts.json") | |
try: | |
with open(contracts_file, 'r') as f: | |
contracts_data = json.load(f) | |
return contracts_data['contracts'] | |
except Exception as e: | |
raise Exception(f"Error loading contracts: {str(e)}") | |
def calculate_correct_price(self, contract: Dict, quantity: int) -> float: | |
"""Calculate the correct price based on contract terms and quantity.""" | |
base_amount = contract["terms"]["base_rate"] | |
price = base_amount * quantity | |
# Apply volume discounts if applicable | |
if "volume_discounts" in contract["terms"]: | |
applicable_discount = 0 | |
for discount in sorted( | |
contract["terms"]["volume_discounts"], | |
key=lambda x: x["threshold"], | |
reverse=True | |
): | |
if quantity >= discount["threshold"]: | |
applicable_discount = discount["discount"] | |
break | |
if applicable_discount > 0: | |
price *= (1 - applicable_discount) | |
# Apply tiered pricing if applicable | |
if "tiered_pricing" in contract["terms"]: | |
# Randomly select a tier for this invoice | |
tier = random.choice(contract["terms"]["tiered_pricing"]) | |
price *= tier["rate"] | |
return round(price, 2) | |
def generate_invoices(self, contracts: List[Dict]) -> List[Dict]: | |
"""Generate synthetic invoices based on contract data.""" | |
invoices = [] | |
for contract in contracts: | |
# Generate multiple invoices per contract | |
for _ in range(random.randint(5, 10)): # Random number of invoices per contract | |
# Randomly decide if this invoice will have an error | |
has_error = random.random() < 0.3 # 30% chance of error | |
# Generate random quantity between contract minimums and maximums | |
min_quantity = 500 # Minimum from special conditions | |
max_quantity = 15000 # Arbitrary maximum | |
quantity = random.randint(min_quantity, max_quantity) | |
# Calculate correct price | |
correct_price = self.calculate_correct_price(contract, quantity) | |
# If we want an error, modify the price slightly | |
charged_amount = correct_price | |
if has_error: | |
error_factor = random.uniform(1.05, 1.15) # 5-15% overcharge | |
charged_amount *= error_factor | |
# Generate random date within contract period | |
start_date = datetime.strptime(contract["start_date"], "%Y-%m-%d") | |
end_date = datetime.strptime(contract["end_date"], "%Y-%m-%d") | |
random_days = random.randint(0, (end_date - start_date).days) | |
invoice_date = start_date + pd.Timedelta(days=random_days) | |
invoice = { | |
"invoice_id": f"INV{random.randint(1000, 9999)}", | |
"contract_id": contract["contract_id"], | |
"date": invoice_date.strftime("%Y-%m-%d"), | |
"quantity": quantity, | |
"amount_charged": round(charged_amount, 2), | |
"correct_amount": round(correct_price, 2), | |
"has_error": has_error | |
} | |
invoices.append(invoice) | |
return sorted(invoices, key=lambda x: x["date"]) | |
def save_invoices(self, invoices: List[Dict]) -> None: | |
"""Save generated invoices to JSON file.""" | |
invoices_file = os.path.join(self.data_dir, "invoices.json") | |
with open(invoices_file, 'w') as f: | |
json.dump({"invoices": invoices}, f, indent=2) | |
def generate_and_save(self) -> None: | |
"""Generate and save invoices in one step.""" | |
contracts = self.load_contracts() | |
invoices = self.generate_invoices(contracts) | |
self.save_invoices(invoices) | |
def load_or_generate_invoices(self) -> List[Dict]: | |
"""Load existing invoices or generate new ones if they don't exist.""" | |
invoices_file = os.path.join(self.data_dir, "invoices.json") | |
try: | |
if os.path.exists(invoices_file): | |
with open(invoices_file, 'r') as f: | |
data = json.load(f) | |
return data.get('invoices', []) | |
else: | |
self.generate_and_save() | |
with open(invoices_file, 'r') as f: | |
data = json.load(f) | |
return data.get('invoices', []) | |
except Exception as e: | |
print(f"Error loading invoices: {str(e)}") | |
print("Generating new invoices...") | |
self.generate_and_save() | |
with open(invoices_file, 'r') as f: | |
data = json.load(f) | |
return data.get('invoices', []) | |
if __name__ == "__main__": | |
# This allows running the generator directly to create/update the files | |
generator = InvoiceGenerator() | |
generator.generate_and_save() | |
print("Successfully generated invoice and contract data!") |