Arif
Updated but not working
38f9e13
"""
Data Processing Service
Handles CSV and Excel file uploads and processing
"""
import logging
import pandas as pd
from pathlib import Path
from fastapi import UploadFile
logger = logging.getLogger(__name__)
class DataProcessor:
"""Process uploaded data files (CSV, Excel)"""
SUPPORTED_FORMATS = ["csv", "xlsx", "xls"]
def __init__(self):
self.temp_dir = Path("./uploads")
self.temp_dir.mkdir(exist_ok=True)
async def process_file(self, file: UploadFile) -> tuple:
"""
Process uploaded file (CSV or Excel)
Returns:
tuple: (data_list, file_type)
"""
try:
# Validate file type
file_ext = self._get_file_extension(file.filename)
if file_ext not in self.SUPPORTED_FORMATS:
raise ValueError(f"Unsupported file type: {file_ext}")
logger.info(f"πŸ”„ Processing file: {file.filename}")
# Save file temporarily
file_path = self.temp_dir / file.filename
contents = await file.read()
with open(file_path, "wb") as f:
f.write(contents)
# Process based on file type
if file_ext == "csv":
data = self._process_csv(str(file_path))
else: # xlsx or xls
data = self._process_excel(str(file_path))
logger.info(f"βœ… File processed: {len(data)} rows")
return data, file_ext
except ValueError as e:
logger.error(f"❌ Validation error: {e}")
raise
except Exception as e:
logger.error(f"❌ File processing failed: {e}")
raise ValueError(f"File processing failed: {e}")
def _get_file_extension(self, filename: str) -> str:
"""Extract file extension"""
return filename.split(".")[-1].lower()
def _process_csv(self, file_path: str) -> list:
"""Process CSV file using pandas"""
try:
df = pd.read_csv(file_path)
# Replace NaN values with None (becomes null in JSON)
df = df.where(pd.notna(df), None)
data = df.to_dict("records")
logger.info(f"πŸ“„ CSV processed: {len(data)} rows, {len(df.columns)} columns")
return data
except Exception as e:
logger.error(f"❌ CSV processing failed: {e}")
raise ValueError(f"CSV processing error: {e}")
def _process_excel(self, file_path: str) -> list:
"""Process Excel file using pandas"""
try:
df = pd.read_excel(file_path)
# Replace NaN values with None (becomes null in JSON)
df = df.where(pd.notna(df), None)
data = df.to_dict("records")
logger.info(f"πŸ“Š Excel processed: {len(data)} rows, {len(df.columns)} columns")
return data
except Exception as e:
logger.error(f"❌ Excel processing failed: {e}")
raise ValueError(f"Excel processing error: {e}")