File size: 2,529 Bytes
8675ade
badef87
8675ade
 
4caacc1
badef87
8675ade
badef87
 
8675ade
 
 
badef87
 
 
 
 
 
 
 
 
 
 
8675ade
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
286933c
8675ade
 
 
 
 
 
 
 
 
 
 
 
badef87
 
 
 
8675ade
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import os
import math
import shutil
from fastapi import APIRouter
from src.core.utils import logger
from fastapi.responses import JSONResponse
from src.app.pipelines.modules import DataStatisticsWorkflow
from fastapi import APIRouter, UploadFile, File, HTTPException, Form


data_statistics_router = APIRouter()

def sanitize_for_json(data):
    if isinstance(data, dict):
        return {k: sanitize_for_json(v) for k, v in data.items()}
    elif isinstance(data, list):
        return [sanitize_for_json(v) for v in data]
    elif isinstance(data, float):
        if math.isinf(data) or math.isnan(data):
            return None
        return data
    return data

def delete_dir_contents(directory: str)->None:
    for filename in os.listdir(directory):
        file_path = os.path.join(directory, filename)
        if os.path.isfile(file_path):
            os.remove(file_path)

@data_statistics_router.post('/')
async def main(file: UploadFile = File(...),  ml_task: str = Form(None)):
    ''' ## This endpoint accepts a CSV file upload  to initiate the Data Statistics Workflow. 

        ### Parameters:
        -----------
        - file : CSV File for the dataset
        \n
        - ml_task : Final machine learning task/target
            
        ### Returns:
        --------
        - dict: Markdown Report
    '''

    if not file.filename.endswith('.csv'):
        raise HTTPException(status_code=400, detail="Only CSV files are allowed.")

    '''Clears the /downloads folder and stores the recieved file under 'dataset.csv' '''
    
    downloads_path = "src/core/cache/downloads"
    # os.makedirs(downloads_path, exist_ok=True)
    delete_dir_contents(downloads_path)
    destination_path = os.path.join(downloads_path, "dataset.csv")
    with open(destination_path, "wb") as buffer:
        shutil.copyfileobj(file.file, buffer)

    logger.info(f"CSV file saved to {destination_path}", log_type='eda-engine/data_statistics', console=True)
    
    '''Runs the data statistics workflow'''
    try:
        ds_wf = DataStatisticsWorkflow(data_source=f'{downloads_path}/dataset.csv', llm_choice="gpt-4o-mini", ml_task=ml_task)
        results = ds_wf.run(verbose=True)

        sanitized_data = sanitize_for_json(results)

        return JSONResponse(content=sanitized_data)


    except Exception as e:
        logger.error(f"DataStatisticsWorkflow failed with error: {e}", log_type='eda-engine/data_statistics', console=True)
        return {
            "status": "Pipeline failed to finish",
        }