File size: 2,114 Bytes
ab66d4e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85fd14e
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
# routers/preprocess_routes.py
from flask import Blueprint, request, jsonify
import pandas as pd
from utils.preprocessor import DataPreprocessor
import logging

preprocess_bp = Blueprint('preprocess', __name__)

# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

preprocessor = DataPreprocessor()

@preprocess_bp.route('/upload', methods=['POST'])
def upload_file():
    """

    Upload and preprocess a CSV file.

    Returns preprocessed DataFrame columns and data as JSON.

    Optional limit_rows to reduce response size for testing.

    """
    if 'file' not in request.files:
        return jsonify({"detail": "No file part in the request"}), 400
    file = request.files['file']
    if file.filename == '':
        return jsonify({"detail": "No selected file"}), 400
    if not file.filename.lower().endswith('.csv'):
        return jsonify({"detail": "Only CSV files are supported"}), 400

    limit_rows = request.args.get('limit_rows', type=int)

    try:
        logger.info(f"Received file: {file.filename}")
        df = pd.read_csv(file)
        logger.info(f"CSV read successfully, shape: {df.shape}")

        processed_df = preprocessor.preprocess(df)
        if limit_rows:
            processed_df = processed_df.head(limit_rows)
            logger.info(f"Limited to {limit_rows} rows.")

        response = {
            "columns": list(processed_df.columns),
            "data": processed_df.to_dict(orient="records")
        }
        logger.info(f"Preprocessed {len(response['data'])} records.")
        return jsonify(response)
    except pd.errors.EmptyDataError:
        logger.error("Empty CSV file uploaded.")
        return jsonify({"detail": "Empty CSV file"}), 400
    except pd.errors.ParserError:
        logger.error("Invalid CSV format.")
        return jsonify({"detail": "Invalid CSV format"}), 400
    except Exception as e:
        logger.exception(f"Unexpected error during file processing: {str(e)}")
        return jsonify({"detail": f"Failed to process file: {str(e)}"}), 500