gauravlochab
chore: change the system from loading to adding the csv for solving the rate limiter error
175e92c
#!/usr/bin/env python3 | |
""" | |
CSV Generation Script for Hugging Face Space Deployment | |
This script fetches data from the API, applies preprocessing, and saves CSV files | |
that can be uploaded to your Hugging Face Space to avoid rate limiting issues. | |
Usage: | |
python generate_csv_for_space.py | |
Output files: | |
- optimus_apr_values.csv | |
- optimus_apr_statistics.csv | |
- optimus_roi_values.csv | |
""" | |
import logging | |
import sys | |
import os | |
from datetime import datetime | |
# Add the current directory to the path so we can import our modules | |
sys.path.append(os.path.dirname(os.path.abspath(__file__))) | |
# Import our existing functions | |
from app import fetch_apr_data_from_db, save_to_csv, save_roi_to_csv | |
from initial_value_fixer import fix_apr_and_roi | |
from load_from_csv import check_csv_data_availability, get_data_freshness_info | |
# Set up logging | |
logging.basicConfig( | |
level=logging.INFO, | |
format="%(asctime)s - %(levelname)s - %(message)s", | |
handlers=[ | |
logging.FileHandler("csv_generation.log"), | |
logging.StreamHandler() | |
] | |
) | |
logger = logging.getLogger(__name__) | |
def main(): | |
"""Main function to generate CSV files for Hugging Face Space deployment""" | |
print("=" * 60) | |
print("CSV Generation for Hugging Face Space Deployment") | |
print("=" * 60) | |
# Check if CSV files already exist | |
print("\n1. Checking existing CSV files...") | |
csv_info = check_csv_data_availability() | |
for data_type, info in csv_info.items(): | |
if info['available']: | |
print(f" β {data_type.upper()}: {info['file']} ({info['records']} records, {info['size_mb']:.2f} MB)") | |
print(f" Last modified: {info['modified']}") | |
else: | |
print(f" β {data_type.upper()}: {info['error']}") | |
# Check data freshness | |
print("\n2. Checking data freshness...") | |
freshness_info = get_data_freshness_info() | |
for data_type, info in freshness_info.items(): | |
if data_type != 'error': | |
hours_old = info['hours_old'] | |
is_fresh = info['is_fresh'] | |
status = "FRESH" if is_fresh else "STALE" | |
print(f" {data_type.upper()}: {hours_old:.1f} hours old ({status})") | |
# Ask user if they want to proceed | |
print("\n3. Data generation options:") | |
print(" [1] Generate fresh data from API (recommended)") | |
print(" [2] Skip if CSV files are fresh (< 24 hours old)") | |
print(" [3] Exit without generating") | |
choice = input("\nEnter your choice (1-3): ").strip() | |
if choice == "3": | |
print("Exiting without generating CSV files.") | |
return | |
elif choice == "2": | |
# Check if all files are fresh | |
all_fresh = True | |
for data_type, info in freshness_info.items(): | |
if data_type != 'error' and not info.get('is_fresh', False): | |
all_fresh = False | |
break | |
if all_fresh and csv_info['apr']['available'] and csv_info['roi']['available']: | |
print("All CSV files are fresh. No need to regenerate.") | |
return | |
else: | |
print("Some CSV files are missing or stale. Proceeding with generation...") | |
# Generate fresh data | |
print("\n4. Fetching data from API...") | |
try: | |
df_apr, df_roi = fetch_apr_data_from_db() | |
if df_apr.empty and df_roi.empty: | |
print(" β No data fetched from API. Check your connection and API status.") | |
return | |
print(f" β Fetched {len(df_apr)} APR records and {len(df_roi)} ROI records") | |
except Exception as e: | |
print(f" β Error fetching data: {e}") | |
logger.exception("Error fetching data from API") | |
return | |
# Apply preprocessing | |
print("\n5. Applying preprocessing...") | |
try: | |
if not df_apr.empty: | |
df_apr_processed = fix_apr_and_roi(df_apr) | |
print(f" β Processed APR data: {len(df_apr_processed)} records") | |
else: | |
df_apr_processed = df_apr | |
print(" ! No APR data to process") | |
if not df_roi.empty: | |
df_roi_processed = df_roi # ROI data is already processed in fetch function | |
print(f" β ROI data ready: {len(df_roi_processed)} records") | |
else: | |
df_roi_processed = df_roi | |
print(" ! No ROI data to process") | |
except Exception as e: | |
print(f" β Error during preprocessing: {e}") | |
logger.exception("Error during preprocessing") | |
return | |
# Save CSV files | |
print("\n6. Saving CSV files...") | |
csv_files_created = [] | |
try: | |
# Save APR data | |
if not df_apr_processed.empty: | |
apr_csv = save_to_csv(df_apr_processed) | |
if apr_csv: | |
csv_files_created.append(apr_csv) | |
print(f" β Saved APR data: {apr_csv}") | |
# Also save statistics | |
stats_csv = "optimus_apr_statistics.csv" | |
if os.path.exists(stats_csv): | |
csv_files_created.append(stats_csv) | |
print(f" β Saved APR statistics: {stats_csv}") | |
# Save ROI data | |
if not df_roi_processed.empty: | |
roi_csv = save_roi_to_csv(df_roi_processed) | |
if roi_csv: | |
csv_files_created.append(roi_csv) | |
print(f" β Saved ROI data: {roi_csv}") | |
if not csv_files_created: | |
print(" β No CSV files were created") | |
return | |
except Exception as e: | |
print(f" β Error saving CSV files: {e}") | |
logger.exception("Error saving CSV files") | |
return | |
# Summary | |
print("\n" + "=" * 60) | |
print("CSV GENERATION COMPLETE") | |
print("=" * 60) | |
print(f"\nGenerated {len(csv_files_created)} CSV files:") | |
for csv_file in csv_files_created: | |
if os.path.exists(csv_file): | |
size_mb = os.path.getsize(csv_file) / (1024 * 1024) | |
print(f" β’ {csv_file} ({size_mb:.2f} MB)") | |
print(f"\nGeneration completed at: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}") | |
print("\nNext steps for Hugging Face Space deployment:") | |
print("1. Upload these CSV files to your Hugging Face Space repository") | |
print("2. Ensure your Space app.py imports and uses load_from_csv functions") | |
print("3. The app will prioritize CSV data over API calls, avoiding rate limits") | |
print("4. Re-run this script periodically to update the CSV files with fresh data") | |
print("\nDeployment tips:") | |
print("β’ Add these CSV files to your Space's file list") | |
print("β’ Consider setting up a scheduled job to update CSV files regularly") | |
print("β’ Monitor your Space logs to ensure CSV loading works correctly") | |
if __name__ == "__main__": | |
main() | |