Spaces:
Sleeping
Sleeping
| import json | |
| from datetime import datetime | |
| import random | |
| import pandas as pd | |
| from typing import List, Dict | |
| import os | |
| class InvoiceGenerator: | |
| def __init__(self, data_dir: str = "data"): | |
| """Initialize the invoice generator with a data directory.""" | |
| self.data_dir = data_dir | |
| os.makedirs(data_dir, exist_ok=True) | |
| self.ensure_contract_file() | |
| def ensure_contract_file(self) -> None: | |
| """Ensure contracts.json exists with initial data.""" | |
| contracts_file = os.path.join(self.data_dir, "contracts.json") | |
| if not os.path.exists(contracts_file): | |
| initial_contracts = { | |
| "contracts": [ | |
| { | |
| "contract_id": "CNT001", | |
| "client": "TechCorp Solutions", | |
| "start_date": "2024-01-01", | |
| "end_date": "2024-12-31", | |
| "terms": { | |
| "base_rate": 100, | |
| "volume_discounts": [ | |
| {"threshold": 1000, "discount": 0.10}, | |
| {"threshold": 5000, "discount": 0.15}, | |
| {"threshold": 10000, "discount": 0.20} | |
| ], | |
| "special_conditions": [ | |
| "Holiday surcharge: 15% on federal holidays", | |
| "Rush order fee: Additional 25% for same-day delivery", | |
| "Bulk order minimum: 500 units per order for volume pricing", | |
| "Early payment discount: 2% if paid within 10 days", | |
| "Multi-year commitment: 5% additional discount for 3+ year contract" | |
| ] | |
| } | |
| }, | |
| { | |
| "contract_id": "CNT002", | |
| "client": "Global Manufacturing Inc", | |
| "start_date": "2024-01-01", | |
| "end_date": "2024-12-31", | |
| "terms": { | |
| "base_rate": 85, | |
| "tiered_pricing": [ | |
| {"tier": "Standard", "rate": 1.0}, | |
| {"tier": "Premium", "rate": 1.25}, | |
| {"tier": "Enterprise", "rate": 1.5} | |
| ], | |
| "special_conditions": [ | |
| "Annual commitment discount: 5% off base rate", | |
| "Multi-location discount: 3% per additional location", | |
| "Payment terms: 2% discount for payment within 10 days", | |
| "Volume guarantee: Minimum 1000 units per quarter", | |
| "Service level agreement: 99.9% delivery accuracy required" | |
| ] | |
| } | |
| } | |
| ] | |
| } | |
| with open(contracts_file, 'w') as f: | |
| json.dump(initial_contracts, f, indent=4) | |
| def load_contracts(self) -> List[Dict]: | |
| """Load contracts from JSON file.""" | |
| contracts_file = os.path.join(self.data_dir, "contracts.json") | |
| try: | |
| with open(contracts_file, 'r') as f: | |
| contracts_data = json.load(f) | |
| return contracts_data['contracts'] | |
| except Exception as e: | |
| raise Exception(f"Error loading contracts: {str(e)}") | |
| def calculate_correct_price(self, contract: Dict, quantity: int) -> float: | |
| """Calculate the correct price based on contract terms and quantity.""" | |
| base_amount = contract["terms"]["base_rate"] | |
| price = base_amount * quantity | |
| # Apply volume discounts if applicable | |
| if "volume_discounts" in contract["terms"]: | |
| applicable_discount = 0 | |
| for discount in sorted( | |
| contract["terms"]["volume_discounts"], | |
| key=lambda x: x["threshold"], | |
| reverse=True | |
| ): | |
| if quantity >= discount["threshold"]: | |
| applicable_discount = discount["discount"] | |
| break | |
| if applicable_discount > 0: | |
| price *= (1 - applicable_discount) | |
| # Apply tiered pricing if applicable | |
| if "tiered_pricing" in contract["terms"]: | |
| # Randomly select a tier for this invoice | |
| tier = random.choice(contract["terms"]["tiered_pricing"]) | |
| price *= tier["rate"] | |
| return round(price, 2) | |
| def generate_invoices(self, contracts: List[Dict]) -> List[Dict]: | |
| """Generate synthetic invoices based on contract data.""" | |
| invoices = [] | |
| for contract in contracts: | |
| # Generate multiple invoices per contract | |
| for _ in range(random.randint(5, 10)): # Random number of invoices per contract | |
| # Randomly decide if this invoice will have an error | |
| has_error = random.random() < 0.3 # 30% chance of error | |
| # Generate random quantity between contract minimums and maximums | |
| min_quantity = 500 # Minimum from special conditions | |
| max_quantity = 15000 # Arbitrary maximum | |
| quantity = random.randint(min_quantity, max_quantity) | |
| # Calculate correct price | |
| correct_price = self.calculate_correct_price(contract, quantity) | |
| # If we want an error, modify the price slightly | |
| charged_amount = correct_price | |
| if has_error: | |
| error_factor = random.uniform(1.05, 1.15) # 5-15% overcharge | |
| charged_amount *= error_factor | |
| # Generate random date within contract period | |
| start_date = datetime.strptime(contract["start_date"], "%Y-%m-%d") | |
| end_date = datetime.strptime(contract["end_date"], "%Y-%m-%d") | |
| random_days = random.randint(0, (end_date - start_date).days) | |
| invoice_date = start_date + pd.Timedelta(days=random_days) | |
| invoice = { | |
| "invoice_id": f"INV{random.randint(1000, 9999)}", | |
| "contract_id": contract["contract_id"], | |
| "date": invoice_date.strftime("%Y-%m-%d"), | |
| "quantity": quantity, | |
| "amount_charged": round(charged_amount, 2), | |
| "correct_amount": round(correct_price, 2), | |
| "has_error": has_error | |
| } | |
| invoices.append(invoice) | |
| return sorted(invoices, key=lambda x: x["date"]) | |
| def save_invoices(self, invoices: List[Dict]) -> None: | |
| """Save generated invoices to JSON file.""" | |
| invoices_file = os.path.join(self.data_dir, "invoices.json") | |
| with open(invoices_file, 'w') as f: | |
| json.dump({"invoices": invoices}, f, indent=2) | |
| def generate_and_save(self) -> None: | |
| """Generate and save invoices in one step.""" | |
| contracts = self.load_contracts() | |
| invoices = self.generate_invoices(contracts) | |
| self.save_invoices(invoices) | |
| def load_or_generate_invoices(self) -> List[Dict]: | |
| """Load existing invoices or generate new ones if they don't exist.""" | |
| invoices_file = os.path.join(self.data_dir, "invoices.json") | |
| try: | |
| if os.path.exists(invoices_file): | |
| with open(invoices_file, 'r') as f: | |
| data = json.load(f) | |
| return data.get('invoices', []) | |
| else: | |
| self.generate_and_save() | |
| with open(invoices_file, 'r') as f: | |
| data = json.load(f) | |
| return data.get('invoices', []) | |
| except Exception as e: | |
| print(f"Error loading invoices: {str(e)}") | |
| print("Generating new invoices...") | |
| self.generate_and_save() | |
| with open(invoices_file, 'r') as f: | |
| data = json.load(f) | |
| return data.get('invoices', []) | |
| if __name__ == "__main__": | |
| # This allows running the generator directly to create/update the files | |
| generator = InvoiceGenerator() | |
| generator.generate_and_save() | |
| print("Successfully generated invoice and contract data!") |