import os import math import shutil from fastapi import APIRouter from src.core.utils import logger from fastapi.responses import JSONResponse from src.app.pipelines.modules import DataStatisticsWorkflow from fastapi import APIRouter, UploadFile, File, HTTPException, Form data_statistics_router = APIRouter() def sanitize_for_json(data): if isinstance(data, dict): return {k: sanitize_for_json(v) for k, v in data.items()} elif isinstance(data, list): return [sanitize_for_json(v) for v in data] elif isinstance(data, float): if math.isinf(data) or math.isnan(data): return None return data return data def delete_dir_contents(directory: str)->None: for filename in os.listdir(directory): file_path = os.path.join(directory, filename) if os.path.isfile(file_path): os.remove(file_path) @data_statistics_router.post('/') async def main(file: UploadFile = File(...), ml_task: str = Form(None)): ''' ## This endpoint accepts a CSV file upload to initiate the Data Statistics Workflow. ### Parameters: ----------- - file : CSV File for the dataset \n - ml_task : Final machine learning task/target ### Returns: -------- - dict: Markdown Report ''' if not file.filename.endswith('.csv'): raise HTTPException(status_code=400, detail="Only CSV files are allowed.") '''Clears the /downloads folder and stores the recieved file under 'dataset.csv' ''' downloads_path = "src/core/cache/downloads" # os.makedirs(downloads_path, exist_ok=True) delete_dir_contents(downloads_path) destination_path = os.path.join(downloads_path, "dataset.csv") with open(destination_path, "wb") as buffer: shutil.copyfileobj(file.file, buffer) logger.info(f"CSV file saved to {destination_path}", log_type='eda-engine/data_statistics', console=True) '''Runs the data statistics workflow''' try: ds_wf = DataStatisticsWorkflow(data_source=f'{downloads_path}/dataset.csv', llm_choice="gpt-4o-mini", ml_task=ml_task) results = ds_wf.run(verbose=True) sanitized_data = sanitize_for_json(results) return JSONResponse(content=sanitized_data) except Exception as e: logger.error(f"DataStatisticsWorkflow failed with error: {e}", log_type='eda-engine/data_statistics', console=True) return { "status": "Pipeline failed to finish", }