#!/usr/bin/env python3 """ CSV Generation Script for Hugging Face Space Deployment This script fetches data from the API, applies preprocessing, and saves CSV files that can be uploaded to your Hugging Face Space to avoid rate limiting issues. Usage: python generate_csv_for_space.py Output files: - optimus_apr_values.csv - optimus_apr_statistics.csv - optimus_roi_values.csv """ import logging import sys import os from datetime import datetime # Add the current directory to the path so we can import our modules sys.path.append(os.path.dirname(os.path.abspath(__file__))) # Import our existing functions from app import fetch_apr_data_from_db, save_to_csv, save_roi_to_csv from initial_value_fixer import fix_apr_and_roi from load_from_csv import check_csv_data_availability, get_data_freshness_info # Set up logging logging.basicConfig( level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s", handlers=[ logging.FileHandler("csv_generation.log"), logging.StreamHandler() ] ) logger = logging.getLogger(__name__) def main(): """Main function to generate CSV files for Hugging Face Space deployment""" print("=" * 60) print("CSV Generation for Hugging Face Space Deployment") print("=" * 60) # Check if CSV files already exist print("\n1. Checking existing CSV files...") csv_info = check_csv_data_availability() for data_type, info in csv_info.items(): if info['available']: print(f" ✓ {data_type.upper()}: {info['file']} ({info['records']} records, {info['size_mb']:.2f} MB)") print(f" Last modified: {info['modified']}") else: print(f" ✗ {data_type.upper()}: {info['error']}") # Check data freshness print("\n2. Checking data freshness...") freshness_info = get_data_freshness_info() for data_type, info in freshness_info.items(): if data_type != 'error': hours_old = info['hours_old'] is_fresh = info['is_fresh'] status = "FRESH" if is_fresh else "STALE" print(f" {data_type.upper()}: {hours_old:.1f} hours old ({status})") # Ask user if they want to proceed print("\n3. Data generation options:") print(" [1] Generate fresh data from API (recommended)") print(" [2] Skip if CSV files are fresh (< 24 hours old)") print(" [3] Exit without generating") choice = input("\nEnter your choice (1-3): ").strip() if choice == "3": print("Exiting without generating CSV files.") return elif choice == "2": # Check if all files are fresh all_fresh = True for data_type, info in freshness_info.items(): if data_type != 'error' and not info.get('is_fresh', False): all_fresh = False break if all_fresh and csv_info['apr']['available'] and csv_info['roi']['available']: print("All CSV files are fresh. No need to regenerate.") return else: print("Some CSV files are missing or stale. Proceeding with generation...") # Generate fresh data print("\n4. Fetching data from API...") try: df_apr, df_roi = fetch_apr_data_from_db() if df_apr.empty and df_roi.empty: print(" ✗ No data fetched from API. Check your connection and API status.") return print(f" ✓ Fetched {len(df_apr)} APR records and {len(df_roi)} ROI records") except Exception as e: print(f" ✗ Error fetching data: {e}") logger.exception("Error fetching data from API") return # Apply preprocessing print("\n5. Applying preprocessing...") try: if not df_apr.empty: df_apr_processed = fix_apr_and_roi(df_apr) print(f" ✓ Processed APR data: {len(df_apr_processed)} records") else: df_apr_processed = df_apr print(" ! No APR data to process") if not df_roi.empty: df_roi_processed = df_roi # ROI data is already processed in fetch function print(f" ✓ ROI data ready: {len(df_roi_processed)} records") else: df_roi_processed = df_roi print(" ! No ROI data to process") except Exception as e: print(f" ✗ Error during preprocessing: {e}") logger.exception("Error during preprocessing") return # Save CSV files print("\n6. Saving CSV files...") csv_files_created = [] try: # Save APR data if not df_apr_processed.empty: apr_csv = save_to_csv(df_apr_processed) if apr_csv: csv_files_created.append(apr_csv) print(f" ✓ Saved APR data: {apr_csv}") # Also save statistics stats_csv = "optimus_apr_statistics.csv" if os.path.exists(stats_csv): csv_files_created.append(stats_csv) print(f" ✓ Saved APR statistics: {stats_csv}") # Save ROI data if not df_roi_processed.empty: roi_csv = save_roi_to_csv(df_roi_processed) if roi_csv: csv_files_created.append(roi_csv) print(f" ✓ Saved ROI data: {roi_csv}") if not csv_files_created: print(" ✗ No CSV files were created") return except Exception as e: print(f" ✗ Error saving CSV files: {e}") logger.exception("Error saving CSV files") return # Summary print("\n" + "=" * 60) print("CSV GENERATION COMPLETE") print("=" * 60) print(f"\nGenerated {len(csv_files_created)} CSV files:") for csv_file in csv_files_created: if os.path.exists(csv_file): size_mb = os.path.getsize(csv_file) / (1024 * 1024) print(f" • {csv_file} ({size_mb:.2f} MB)") print(f"\nGeneration completed at: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}") print("\nNext steps for Hugging Face Space deployment:") print("1. Upload these CSV files to your Hugging Face Space repository") print("2. Ensure your Space app.py imports and uses load_from_csv functions") print("3. The app will prioritize CSV data over API calls, avoiding rate limits") print("4. Re-run this script periodically to update the CSV files with fresh data") print("\nDeployment tips:") print("• Add these CSV files to your Space's file list") print("• Consider setting up a scheduled job to update CSV files regularly") print("• Monitor your Space logs to ensure CSV loading works correctly") if __name__ == "__main__": main()