Spaces:
Sleeping
Sleeping
import os | |
import shutil | |
from typing import Optional | |
from src.core.utils import logger | |
from fastapi import APIRouter, UploadFile, File, HTTPException, Form | |
from src.app.pipelines.modules import DataUnderstandingContextWorkflow | |
data_understanding_router = APIRouter() | |
def delete_dir_contents(directory: str) -> None: | |
for filename in os.listdir(directory): | |
file_path = os.path.join(directory, filename) | |
if os.path.isfile(file_path): | |
os.remove(file_path) | |
async def main(file: UploadFile = File(...), business_requirements: Optional[str] = Form(None)): | |
''' ## This endpoint accepts a CSV file upload & additional business requirements/context to initiate the Data Understanding Context Workflow. | |
### Parameters: | |
----------- | |
- file : CSV File for the dataset | |
\n | |
- business_context : Additional business context information about the dataset | |
### Returns: | |
-------- | |
- dict: Markdown Report | |
''' | |
if not file.filename.endswith('.csv'): | |
raise HTTPException(status_code=400, detail="Only CSV files are allowed.") | |
'''Clears the /downloads folder and stores the recieved file under 'dataset.csv' ''' | |
downloads_path = "src/core/cache/downloads" | |
# os.makedirs(downloads_path, exist_ok=True) | |
delete_dir_contents(downloads_path) | |
destination_path = os.path.join(downloads_path, "dataset.csv") | |
with open(destination_path, "wb") as buffer: | |
shutil.copyfileobj(file.file, buffer) | |
logger.info(f"CSV file saved to {destination_path}", log_type='eda-engine/data_understanding', console=True) | |
'''Runs the data understanding workflow''' | |
try: | |
duc_wf = DataUnderstandingContextWorkflow(data_source=f'{downloads_path}/dataset.csv', llm_choice="gpt-4o-mini", business_context=business_requirements) | |
results = duc_wf.run(verbose=True) | |
return { | |
"status": "Pipeline finished running", | |
"results": results | |
} | |
except Exception as e: | |
logger.error(f"DataUnderstandingContextWorkflow failed with error: {e}", log_type='eda-engine/data_understanding', console=True) | |
return { | |
"status": "Pipeline failed to finish", | |
} | |