eda-franky-v1 / src /api /v1 /eda_engine /univariate_analysis.py
architojha's picture
fixed errors
badef87
import os
import math
import shutil
from src.core.utils import logger
from fastapi.responses import JSONResponse
from fastapi import APIRouter, UploadFile, File, HTTPException, Form
from src.app.pipelines.modules import UnivariateAnalysisWorkflow
univariate_analysis_router = APIRouter()
def delete_dir_contents(directory: str)->None:
for filename in os.listdir(directory):
file_path = os.path.join(directory, filename)
if os.path.isfile(file_path):
os.remove(file_path)
def sanitize_for_json(data):
if isinstance(data, dict):
return {k: sanitize_for_json(v) for k, v in data.items()}
elif isinstance(data, list):
return [sanitize_for_json(v) for v in data]
elif isinstance(data, float):
if math.isinf(data) or math.isnan(data):
return None
return data
return data
@univariate_analysis_router.post('/')
async def main(file: UploadFile = File(...), ml_task: str = Form(None)):
''' ## This endpoint accepts a CSV file upload to initiate the Univarite Analysis Workflow.
### Parameters:
-----------
- file : CSV File for the dataset
\n
- ml_task : Final machine learning task/target
### Returns:
--------
- dict: Markdown Report
'''
if not file.filename.endswith('.csv'):
raise HTTPException(status_code=400, detail="Only CSV files are allowed.")
'''Clears the /downloads folder and stores the recieved file under 'dataset.csv' '''
downloads_path = "src/core/cache/downloads"
# os.makedirs(downloads_path, exist_ok=True)
delete_dir_contents(downloads_path)
destination_path = os.path.join(downloads_path, "dataset.csv")
with open(destination_path, "wb") as buffer:
shutil.copyfileobj(file.file, buffer)
logger.info(f"CSV file saved to {destination_path}", log_type='eda-engine/univariate_analysis', console=True)
'''Runs the data univariate analysis workflow'''
try:
ua_wf = UnivariateAnalysisWorkflow(data_source=f'{downloads_path}/dataset.csv', llm_choice="gpt-4o-mini", ml_task=ml_task)
results = ua_wf.run(verbose=True)
sanitized_data = sanitize_for_json(results)
return JSONResponse(content=sanitized_data)
except Exception as e:
logger.error(f"UnivariateAnalysisWorkflow failed with error: {e}", log_type='eda-engine/dataunivariate_analysis_statistics', console=True)
return {
"status": "Pipeline failed to finish"
}