eda-franky-v1 / src /api /v1 /eda_engine /data_understanding.py
architojha's picture
test
286933c
import os
import shutil
from typing import Optional
from src.core.utils import logger
from fastapi import APIRouter, UploadFile, File, HTTPException, Form
from src.app.pipelines.modules import DataUnderstandingContextWorkflow
data_understanding_router = APIRouter()
def delete_dir_contents(directory: str) -> None:
for filename in os.listdir(directory):
file_path = os.path.join(directory, filename)
if os.path.isfile(file_path):
os.remove(file_path)
@data_understanding_router.post('/')
async def main(file: UploadFile = File(...), business_requirements: Optional[str] = Form(None)):
''' ## This endpoint accepts a CSV file upload & additional business requirements/context to initiate the Data Understanding Context Workflow.
### Parameters:
-----------
- file : CSV File for the dataset
\n
- business_context : Additional business context information about the dataset
### Returns:
--------
- dict: Markdown Report
'''
if not file.filename.endswith('.csv'):
raise HTTPException(status_code=400, detail="Only CSV files are allowed.")
'''Clears the /downloads folder and stores the recieved file under 'dataset.csv' '''
downloads_path = "src/core/cache/downloads"
# os.makedirs(downloads_path, exist_ok=True)
delete_dir_contents(downloads_path)
destination_path = os.path.join(downloads_path, "dataset.csv")
with open(destination_path, "wb") as buffer:
shutil.copyfileobj(file.file, buffer)
logger.info(f"CSV file saved to {destination_path}", log_type='eda-engine/data_understanding', console=True)
'''Runs the data understanding workflow'''
try:
duc_wf = DataUnderstandingContextWorkflow(data_source=f'{downloads_path}/dataset.csv', llm_choice="gpt-4o-mini", business_context=business_requirements)
results = duc_wf.run(verbose=True)
return {
"status": "Pipeline finished running",
"results": results
}
except Exception as e:
logger.error(f"DataUnderstandingContextWorkflow failed with error: {e}", log_type='eda-engine/data_understanding', console=True)
return {
"status": "Pipeline failed to finish",
}