File size: 6,892 Bytes
175e92c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 |
#!/usr/bin/env python3
"""
CSV Generation Script for Hugging Face Space Deployment
This script fetches data from the API, applies preprocessing, and saves CSV files
that can be uploaded to your Hugging Face Space to avoid rate limiting issues.
Usage:
python generate_csv_for_space.py
Output files:
- optimus_apr_values.csv
- optimus_apr_statistics.csv
- optimus_roi_values.csv
"""
import logging
import sys
import os
from datetime import datetime
# Add the current directory to the path so we can import our modules
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
# Import our existing functions
from app import fetch_apr_data_from_db, save_to_csv, save_roi_to_csv
from initial_value_fixer import fix_apr_and_roi
from load_from_csv import check_csv_data_availability, get_data_freshness_info
# Set up logging
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s - %(levelname)s - %(message)s",
handlers=[
logging.FileHandler("csv_generation.log"),
logging.StreamHandler()
]
)
logger = logging.getLogger(__name__)
def main():
"""Main function to generate CSV files for Hugging Face Space deployment"""
print("=" * 60)
print("CSV Generation for Hugging Face Space Deployment")
print("=" * 60)
# Check if CSV files already exist
print("\n1. Checking existing CSV files...")
csv_info = check_csv_data_availability()
for data_type, info in csv_info.items():
if info['available']:
print(f" β {data_type.upper()}: {info['file']} ({info['records']} records, {info['size_mb']:.2f} MB)")
print(f" Last modified: {info['modified']}")
else:
print(f" β {data_type.upper()}: {info['error']}")
# Check data freshness
print("\n2. Checking data freshness...")
freshness_info = get_data_freshness_info()
for data_type, info in freshness_info.items():
if data_type != 'error':
hours_old = info['hours_old']
is_fresh = info['is_fresh']
status = "FRESH" if is_fresh else "STALE"
print(f" {data_type.upper()}: {hours_old:.1f} hours old ({status})")
# Ask user if they want to proceed
print("\n3. Data generation options:")
print(" [1] Generate fresh data from API (recommended)")
print(" [2] Skip if CSV files are fresh (< 24 hours old)")
print(" [3] Exit without generating")
choice = input("\nEnter your choice (1-3): ").strip()
if choice == "3":
print("Exiting without generating CSV files.")
return
elif choice == "2":
# Check if all files are fresh
all_fresh = True
for data_type, info in freshness_info.items():
if data_type != 'error' and not info.get('is_fresh', False):
all_fresh = False
break
if all_fresh and csv_info['apr']['available'] and csv_info['roi']['available']:
print("All CSV files are fresh. No need to regenerate.")
return
else:
print("Some CSV files are missing or stale. Proceeding with generation...")
# Generate fresh data
print("\n4. Fetching data from API...")
try:
df_apr, df_roi = fetch_apr_data_from_db()
if df_apr.empty and df_roi.empty:
print(" β No data fetched from API. Check your connection and API status.")
return
print(f" β Fetched {len(df_apr)} APR records and {len(df_roi)} ROI records")
except Exception as e:
print(f" β Error fetching data: {e}")
logger.exception("Error fetching data from API")
return
# Apply preprocessing
print("\n5. Applying preprocessing...")
try:
if not df_apr.empty:
df_apr_processed = fix_apr_and_roi(df_apr)
print(f" β Processed APR data: {len(df_apr_processed)} records")
else:
df_apr_processed = df_apr
print(" ! No APR data to process")
if not df_roi.empty:
df_roi_processed = df_roi # ROI data is already processed in fetch function
print(f" β ROI data ready: {len(df_roi_processed)} records")
else:
df_roi_processed = df_roi
print(" ! No ROI data to process")
except Exception as e:
print(f" β Error during preprocessing: {e}")
logger.exception("Error during preprocessing")
return
# Save CSV files
print("\n6. Saving CSV files...")
csv_files_created = []
try:
# Save APR data
if not df_apr_processed.empty:
apr_csv = save_to_csv(df_apr_processed)
if apr_csv:
csv_files_created.append(apr_csv)
print(f" β Saved APR data: {apr_csv}")
# Also save statistics
stats_csv = "optimus_apr_statistics.csv"
if os.path.exists(stats_csv):
csv_files_created.append(stats_csv)
print(f" β Saved APR statistics: {stats_csv}")
# Save ROI data
if not df_roi_processed.empty:
roi_csv = save_roi_to_csv(df_roi_processed)
if roi_csv:
csv_files_created.append(roi_csv)
print(f" β Saved ROI data: {roi_csv}")
if not csv_files_created:
print(" β No CSV files were created")
return
except Exception as e:
print(f" β Error saving CSV files: {e}")
logger.exception("Error saving CSV files")
return
# Summary
print("\n" + "=" * 60)
print("CSV GENERATION COMPLETE")
print("=" * 60)
print(f"\nGenerated {len(csv_files_created)} CSV files:")
for csv_file in csv_files_created:
if os.path.exists(csv_file):
size_mb = os.path.getsize(csv_file) / (1024 * 1024)
print(f" β’ {csv_file} ({size_mb:.2f} MB)")
print(f"\nGeneration completed at: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
print("\nNext steps for Hugging Face Space deployment:")
print("1. Upload these CSV files to your Hugging Face Space repository")
print("2. Ensure your Space app.py imports and uses load_from_csv functions")
print("3. The app will prioritize CSV data over API calls, avoiding rate limits")
print("4. Re-run this script periodically to update the CSV files with fresh data")
print("\nDeployment tips:")
print("β’ Add these CSV files to your Space's file list")
print("β’ Consider setting up a scheduled job to update CSV files regularly")
print("β’ Monitor your Space logs to ensure CSV loading works correctly")
if __name__ == "__main__":
main()
|