Spaces:
Sleeping
Sleeping
import os | |
import math | |
import shutil | |
from src.core.utils import logger | |
from fastapi.responses import JSONResponse | |
from fastapi import APIRouter, UploadFile, File, HTTPException, Form | |
from src.app.pipelines.modules import UnivariateAnalysisWorkflow | |
univariate_analysis_router = APIRouter() | |
def delete_dir_contents(directory: str)->None: | |
for filename in os.listdir(directory): | |
file_path = os.path.join(directory, filename) | |
if os.path.isfile(file_path): | |
os.remove(file_path) | |
def sanitize_for_json(data): | |
if isinstance(data, dict): | |
return {k: sanitize_for_json(v) for k, v in data.items()} | |
elif isinstance(data, list): | |
return [sanitize_for_json(v) for v in data] | |
elif isinstance(data, float): | |
if math.isinf(data) or math.isnan(data): | |
return None | |
return data | |
return data | |
async def main(file: UploadFile = File(...), ml_task: str = Form(None)): | |
''' ## This endpoint accepts a CSV file upload to initiate the Univarite Analysis Workflow. | |
### Parameters: | |
----------- | |
- file : CSV File for the dataset | |
\n | |
- ml_task : Final machine learning task/target | |
### Returns: | |
-------- | |
- dict: Markdown Report | |
''' | |
if not file.filename.endswith('.csv'): | |
raise HTTPException(status_code=400, detail="Only CSV files are allowed.") | |
'''Clears the /downloads folder and stores the recieved file under 'dataset.csv' ''' | |
downloads_path = "src/core/cache/downloads" | |
# os.makedirs(downloads_path, exist_ok=True) | |
delete_dir_contents(downloads_path) | |
destination_path = os.path.join(downloads_path, "dataset.csv") | |
with open(destination_path, "wb") as buffer: | |
shutil.copyfileobj(file.file, buffer) | |
logger.info(f"CSV file saved to {destination_path}", log_type='eda-engine/univariate_analysis', console=True) | |
'''Runs the data univariate analysis workflow''' | |
try: | |
ua_wf = UnivariateAnalysisWorkflow(data_source=f'{downloads_path}/dataset.csv', llm_choice="gpt-4o-mini", ml_task=ml_task) | |
results = ua_wf.run(verbose=True) | |
sanitized_data = sanitize_for_json(results) | |
return JSONResponse(content=sanitized_data) | |
except Exception as e: | |
logger.error(f"UnivariateAnalysisWorkflow failed with error: {e}", log_type='eda-engine/dataunivariate_analysis_statistics', console=True) | |
return { | |
"status": "Pipeline failed to finish" | |
} | |