File size: 2,540 Bytes
8675ade
badef87
8675ade
4caacc1
badef87
a49614b
4caacc1
8675ade
 
 
 
 
 
 
 
 
badef87
 
 
 
 
 
 
 
 
 
 
8675ade
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
286933c
8675ade
 
 
 
 
 
 
 
 
 
 
 
badef87
 
 
8675ade
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
import os
import math
import shutil
from src.core.utils import logger
from fastapi.responses import JSONResponse
from fastapi import APIRouter, UploadFile, File, HTTPException, Form
from src.app.pipelines.modules import UnivariateAnalysisWorkflow

univariate_analysis_router = APIRouter()

def delete_dir_contents(directory: str)->None:
    for filename in os.listdir(directory):
        file_path = os.path.join(directory, filename)
        if os.path.isfile(file_path):
            os.remove(file_path)

def sanitize_for_json(data):
    if isinstance(data, dict):
        return {k: sanitize_for_json(v) for k, v in data.items()}
    elif isinstance(data, list):
        return [sanitize_for_json(v) for v in data]
    elif isinstance(data, float):
        if math.isinf(data) or math.isnan(data):
            return None
        return data
    return data

@univariate_analysis_router.post('/')
async def main(file: UploadFile = File(...), ml_task: str = Form(None)):
    ''' ## This endpoint accepts a CSV file upload  to initiate the Univarite Analysis Workflow. 

        ### Parameters:
        -----------
        - file : CSV File for the dataset
        \n    
        - ml_task : Final machine learning task/target

        ### Returns:
        --------
        - dict: Markdown Report
    '''

    if not file.filename.endswith('.csv'):
        raise HTTPException(status_code=400, detail="Only CSV files are allowed.")

    '''Clears the /downloads folder and stores the recieved file under 'dataset.csv' '''
    
    downloads_path = "src/core/cache/downloads"
    # os.makedirs(downloads_path, exist_ok=True)
    delete_dir_contents(downloads_path)
    destination_path = os.path.join(downloads_path, "dataset.csv")
    with open(destination_path, "wb") as buffer:
        shutil.copyfileobj(file.file, buffer)

    logger.info(f"CSV file saved to {destination_path}", log_type='eda-engine/univariate_analysis', console=True)
    
    '''Runs the data univariate analysis workflow'''
    try:
        ua_wf = UnivariateAnalysisWorkflow(data_source=f'{downloads_path}/dataset.csv', llm_choice="gpt-4o-mini", ml_task=ml_task)
        results = ua_wf.run(verbose=True)

        sanitized_data = sanitize_for_json(results)

        return JSONResponse(content=sanitized_data)

    except Exception as e:
        logger.error(f"UnivariateAnalysisWorkflow failed with error: {e}", log_type='eda-engine/dataunivariate_analysis_statistics', console=True)
        return {
            "status": "Pipeline failed to finish"
        }