Arslan1997 commited on
Commit
66089c2
·
1 Parent(s): ca4e625

added more robust uploading

Browse files
requirements.txt CHANGED
@@ -1,5 +1,6 @@
1
  aiofiles==24.1.0
2
  beautifulsoup4==4.13.4
 
3
  dspy==3.0.3
4
  litellm==1.75.2
5
  email_validator==2.2.0
 
1
  aiofiles==24.1.0
2
  beautifulsoup4==4.13.4
3
+ chardet==5.2.0
4
  dspy==3.0.3
5
  litellm==1.75.2
6
  email_validator==2.2.0
src/routes/session_routes.py CHANGED
@@ -24,10 +24,162 @@ import dspy
24
  import re
25
  # from fastapi.responses import JSONResponse
26
  import time
 
27
 
28
  logger = Logger("session_routes", see_time=False, console_log=False)
29
 
30
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
  def apply_model_safeguards(model_name: str, provider: str, temperature: float, max_tokens: int) -> dict:
32
  """Apply model-specific safeguards for temperature and max_tokens based on official API limits"""
33
  model_str = str(model_name).lower()
@@ -112,23 +264,34 @@ async def get_excel_sheets(
112
  ):
113
  """Get the list of sheet names from an Excel file"""
114
  try:
115
- # Read the uploaded Excel file
116
  contents = await file.read()
117
 
118
- # Load Excel file using pandas
119
- excel_file = pd.ExcelFile(io.BytesIO(contents))
120
 
121
- # Get sheet names
122
- sheet_names = excel_file.sheet_names
 
 
 
 
 
 
 
 
 
 
 
 
123
 
124
- # Log the sheets found
125
- # logger.log_message(f"Found {len(sheet_names)} sheets in Excel file: {', '.join(sheet_names)}", level=logging.INFO)
126
 
127
- # Return the sheet names
128
- return {"sheets": sheet_names}
129
  except Exception as e:
130
  logger.log_message(f"Error getting Excel sheets: {str(e)}", level=logging.ERROR)
131
- raise HTTPException(status_code=400, detail=f"Error reading Excel file: {str(e)}")
 
 
 
132
 
133
 
134
 
@@ -164,8 +327,24 @@ async def upload_excel(
164
  contents = await file.read()
165
 
166
  try:
167
- # Load Excel file to get all sheet names
168
- excel_file = pd.ExcelFile(io.BytesIO(contents))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
169
  sheet_names = excel_file.sheet_names
170
 
171
  # Parse selected sheets if provided; else use all sheets
@@ -178,47 +357,31 @@ async def upload_excel(
178
  except Exception:
179
  pass
180
 
181
- # Get session state and DuckDB connection
182
- session_state = app_state.get_session_state(session_id)
183
-
184
  datasets = {}
185
-
186
-
187
-
188
- # Process all sheets and register them in DuckDB
189
  processed_sheets = []
190
 
191
  for sheet_name in target_sheets:
192
  try:
193
- # Read each sheet
194
- sheet_df = pd.read_excel(io.BytesIO(contents), sheet_name=sheet_name)
195
- sheet_df.replace({np.nan: None, np.inf: None, -np.inf: None}, inplace=True)
196
-
197
 
198
- # Preprocessing steps
199
- # 1. Drop empty rows and columns
200
- sheet_df.dropna(how='all', inplace=True)
201
- sheet_df.dropna(how='all', axis=1, inplace=True)
202
 
203
- # 2. Clean column names
204
- sheet_df.columns = sheet_df.columns.str.strip()
205
-
206
- # 3. Skip empty sheets
207
- if sheet_df.empty:
208
  continue
209
 
210
- # Register each sheet in DuckDB with a clean table name
211
  clean_sheet_name = clean_dataset_name(sheet_name)
212
- # Check if the clean_sheet_name is a safe Python variable name; if not, append a random int
213
-
214
- # First drop the table if it exists
215
-
216
-
217
- # Then register the new table
218
- datasets[clean_sheet_name] = sheet_df # Store the DataFrame, not the name
219
-
220
-
221
  processed_sheets.append(clean_sheet_name)
 
 
 
 
 
222
 
223
  except Exception as e:
224
  logger.log_message(f"Error processing sheet '{sheet_name}': {str(e)}", level=logging.WARNING)
@@ -229,22 +392,23 @@ async def upload_excel(
229
 
230
  # Update the session description (no primary dataset needed)
231
  desc = description
232
- app_state.update_session_dataset(session_id,datasets,processed_sheets,desc)
233
-
234
-
235
 
236
- logger.log_message(f"Processed Excel file with {len(processed_sheets)} sheets: {', '.join(processed_sheets)}", level=logging.INFO)
237
 
238
  return {
239
- "message": "Excel file processed successfully",
240
- "session_id": session_id,
241
  "sheets_processed": processed_sheets,
242
  "total_sheets": len(processed_sheets)
243
  }
244
 
245
  except Exception as e:
246
  logger.log_message(f"Error processing Excel file: {str(e)}", level=logging.ERROR)
247
- raise HTTPException(status_code=400, detail=f"Error processing Excel file: {str(e)}")
 
 
 
248
 
249
  except Exception as e:
250
  logger.log_message(f"Error in upload_excel: {str(e)}", level=logging.ERROR)
@@ -339,47 +503,32 @@ async def upload_dataframe(
339
  # Ensure it's a safe Python identifier
340
 
341
 
342
- # Read and process the CSV file
343
  content = await file.read()
344
- new_df = None
345
- last_exception = None
346
-
347
- # Try encodings with delimiter auto-detection
348
- encodings_to_try = ['utf-8', 'latin-1', 'cp1252', 'iso-8859-1']
349
- delimiters_to_try = [',', ';', '\t', '|', ':', ' ']
350
-
351
- for encoding in encodings_to_try:
352
- try:
353
- csv_content = content.decode(encoding)
354
- sample = csv_content[:1024]
355
- try:
356
- import csv as _csv
357
- dialect = _csv.Sniffer().sniff(sample, delimiters=delimiters_to_try)
358
- delimiter = dialect.delimiter
359
- new_df = pd.read_csv(io.StringIO(csv_content), sep=delimiter, engine='python')[columns]
360
- except Exception:
361
- # Fallback to pandas automatic detection
362
- try:
363
- new_df = pd.read_csv(io.StringIO(csv_content), sep=None, engine='python')[columns]
364
- except Exception:
365
- # Final fallback: brute-force common delimiters
366
- for d in delimiters_to_try:
367
- try:
368
- new_df = pd.read_csv(io.StringIO(csv_content), sep=d, engine='python')[columns]
369
- break
370
- except Exception:
371
- new_df = None
372
- if new_df is not None:
373
- new_df.replace({np.nan: None, np.inf: None, -np.inf: None}, inplace=True)
374
- logger.log_message(f"Successfully read CSV with encoding: {encoding}", level=logging.INFO)
375
- break
376
- except Exception as e:
377
- last_exception = e
378
- logger.log_message(f"Failed to read CSV with encoding {encoding}: {str(e)}", level=logging.WARNING)
379
- continue
380
 
381
- if new_df is None:
382
- raise HTTPException(status_code=400, detail=f"Error reading file with tried encodings: {encodings_to_try}. Last error: {str(last_exception)}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
383
 
384
  # Format the description
385
  desc = f" exact_python_name: `{name}` Dataset: {description}"
@@ -397,7 +546,12 @@ async def upload_dataframe(
397
 
398
  logger.log_message(f"Successfully uploaded dataset '{name}' for session {session_id}", level=logging.INFO)
399
 
400
- return JSONResponse(content=sanitize_json({"message": "Dataframe uploaded successfully", "session_id": session_id}))
 
 
 
 
 
401
 
402
  except Exception as e:
403
  logger.log_message(f"Error in upload_dataframe: {str(e)}", level=logging.ERROR)
@@ -923,45 +1077,30 @@ async def preview_csv_upload(
923
  ):
924
  """Preview CSV file without modifying session"""
925
  try:
926
- # Process file and return preview data only
927
  content = await file.read()
928
- # Try encodings with delimiter auto-detection
929
- encodings_to_try = ['utf-8', 'latin-1', 'cp1252', 'iso-8859-1']
930
- delimiters_to_try = [',', ';', '\t', '|', ':', ' ']
931
- new_df = None
932
- last_exception = None
933
-
934
- for encoding in encodings_to_try:
935
- try:
936
- csv_content = content.decode(encoding)
937
- sample = csv_content[:4096]
938
- try:
939
- import csv as _csv
940
- dialect = _csv.Sniffer().sniff(sample, delimiters=delimiters_to_try)
941
- delimiter = dialect.delimiter
942
- new_df = pd.read_csv(io.StringIO(csv_content), sep=delimiter, engine='python')
943
- except Exception:
944
- # Fallback to pandas automatic detection
945
- try:
946
- new_df = pd.read_csv(io.StringIO(csv_content), sep=None, engine='python')
947
- except Exception:
948
- # Final fallback: brute-force common delimiters
949
- for d in delimiters_to_try:
950
- try:
951
- new_df = pd.read_csv(io.StringIO(csv_content), sep=d, engine='python')
952
- break
953
- except Exception:
954
- new_df = None
955
- if new_df is not None:
956
- logger.log_message(f"Successfully read CSV with encoding: {encoding}", level=logging.INFO)
957
- break
958
- except Exception as e:
959
- last_exception = e
960
- logger.log_message(f"Failed to read CSV with encoding {encoding}: {str(e)}", level=logging.WARNING)
961
- continue
962
 
963
- if new_df is None:
964
- raise HTTPException(status_code=400, detail=f"Error reading file with tried encodings: {encodings_to_try}. Last error: {str(last_exception)}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
965
 
966
  # Clean and validate the name
967
  name = file.filename.replace('.csv', '').replace(' ', '_').lower().strip()
 
24
  import re
25
  # from fastapi.responses import JSONResponse
26
  import time
27
+ import chardet
28
 
29
  logger = Logger("session_routes", see_time=False, console_log=False)
30
 
31
 
32
+ def read_csv_robust(content: bytes, columns: Optional[List[str]] = None) -> tuple:
33
+ """
34
+ Robust CSV reader with multiple fallback strategies.
35
+ Returns: (DataFrame, success_message)
36
+ """
37
+ new_df = None
38
+ encodings_to_try = ['utf-8', 'latin-1', 'cp1252', 'iso-8859-1', 'utf-16']
39
+ delimiters_to_try = [',', ';', '\t', '|']
40
+
41
+ # Try auto-detect encoding first using chardet
42
+ try:
43
+ detected = chardet.detect(content[:100000]) # Sample first 100KB
44
+ if detected and detected.get('encoding') and detected.get('confidence', 0) > 0.7:
45
+ detected_encoding = detected['encoding']
46
+ if detected_encoding not in encodings_to_try:
47
+ encodings_to_try.insert(0, detected_encoding)
48
+ logger.log_message(f"Detected encoding: {detected_encoding} (confidence: {detected['confidence']:.2f})", level=logging.INFO)
49
+ except Exception as e:
50
+ logger.log_message(f"Encoding detection failed: {str(e)}", level=logging.WARNING)
51
+
52
+ # Try different encoding and delimiter combinations
53
+ for encoding in encodings_to_try:
54
+ try:
55
+ csv_content = content.decode(encoding)
56
+ sample = csv_content[:4096]
57
+
58
+ # Strategy 1: Try csv.Sniffer for delimiter detection
59
+ try:
60
+ import csv as _csv
61
+ dialect = _csv.Sniffer().sniff(sample, delimiters=''.join(delimiters_to_try))
62
+ delimiter = dialect.delimiter
63
+
64
+ new_df = pd.read_csv(
65
+ io.StringIO(csv_content),
66
+ sep=delimiter,
67
+ engine='python',
68
+ on_bad_lines='skip',
69
+ encoding_errors='replace',
70
+ low_memory=False
71
+ )
72
+
73
+ # Validate: Check if we got meaningful data (more than 1 column)
74
+ if new_df.shape[1] > 1:
75
+ if columns:
76
+ new_df = new_df[columns]
77
+ logger.log_message(f"✓ CSV read with encoding={encoding}, delimiter='{delimiter}' ({new_df.shape[0]} rows, {new_df.shape[1]} cols)", level=logging.INFO)
78
+ return new_df, f"Successfully parsed with {encoding} encoding and '{delimiter}' delimiter"
79
+ except Exception:
80
+ pass
81
+
82
+ # Strategy 2: Pandas automatic delimiter detection
83
+ try:
84
+ new_df = pd.read_csv(
85
+ io.StringIO(csv_content),
86
+ sep=None,
87
+ engine='python',
88
+ on_bad_lines='skip',
89
+ encoding_errors='replace',
90
+ low_memory=False
91
+ )
92
+
93
+ if new_df.shape[1] > 1:
94
+ if columns:
95
+ new_df = new_df[columns]
96
+ logger.log_message(f"✓ CSV read with encoding={encoding}, auto-detected delimiter ({new_df.shape[0]} rows, {new_df.shape[1]} cols)", level=logging.INFO)
97
+ return new_df, f"Successfully parsed with {encoding} encoding and auto-detected delimiter"
98
+ except Exception:
99
+ pass
100
+
101
+ # Strategy 3: Brute-force common delimiters
102
+ for delimiter in delimiters_to_try:
103
+ try:
104
+ new_df = pd.read_csv(
105
+ io.StringIO(csv_content),
106
+ sep=delimiter,
107
+ engine='python',
108
+ on_bad_lines='skip',
109
+ encoding_errors='replace',
110
+ low_memory=False
111
+ )
112
+
113
+ if new_df.shape[1] > 1:
114
+ if columns:
115
+ new_df = new_df[columns]
116
+ logger.log_message(f"✓ CSV read with encoding={encoding}, delimiter='{delimiter}' ({new_df.shape[0]} rows, {new_df.shape[1]} cols)", level=logging.INFO)
117
+ return new_df, f"Successfully parsed with {encoding} encoding and '{delimiter}' delimiter"
118
+ except Exception:
119
+ continue
120
+
121
+ except Exception as e:
122
+ logger.log_message(f"Failed encoding {encoding}: {str(e)}", level=logging.WARNING)
123
+ continue
124
+
125
+ raise ValueError(f"Could not parse CSV with any encoding/delimiter combination. Tried encodings: {encodings_to_try}")
126
+
127
+
128
+ def read_excel_robust(contents: bytes, sheet_name=None) -> pd.DataFrame:
129
+ """
130
+ Robust Excel reader with multiple engine fallbacks.
131
+ """
132
+ engines = ['openpyxl', 'xlrd', None] # None will use default
133
+
134
+ for engine in engines:
135
+ try:
136
+ if engine:
137
+ df = pd.read_excel(
138
+ io.BytesIO(contents),
139
+ sheet_name=sheet_name if sheet_name else 0,
140
+ engine=engine
141
+ )
142
+ else:
143
+ df = pd.read_excel(
144
+ io.BytesIO(contents),
145
+ sheet_name=sheet_name if sheet_name else 0
146
+ )
147
+
148
+ logger.log_message(f"✓ Excel read with engine={engine or 'default'}", level=logging.INFO)
149
+ return df
150
+
151
+ except Exception as e:
152
+ logger.log_message(f"Failed Excel engine {engine}: {str(e)}", level=logging.WARNING)
153
+ continue
154
+
155
+ # Last resort: Try reading as CSV (sometimes .xlsx are actually CSV)
156
+ logger.log_message("All Excel engines failed, attempting to read as CSV...", level=logging.WARNING)
157
+ try:
158
+ df, msg = read_csv_robust(contents)
159
+ logger.log_message(f"✓ File read as CSV (may have been misnamed): {msg}", level=logging.INFO)
160
+ return df
161
+ except Exception as e:
162
+ raise ValueError(f"Could not read file as Excel or CSV. Last error: {str(e)}")
163
+
164
+
165
+ def clean_dataframe(df: pd.DataFrame) -> pd.DataFrame:
166
+ """Clean dataframe after reading"""
167
+ # Strip whitespace from column names
168
+ df.columns = df.columns.str.strip()
169
+
170
+ # Remove completely empty rows and columns
171
+ df.dropna(how='all', inplace=True)
172
+ df.dropna(axis=1, how='all', inplace=True)
173
+
174
+ # Replace problematic values
175
+ df.replace({np.nan: None, np.inf: None, -np.inf: None}, inplace=True)
176
+
177
+ # Reset index
178
+ df.reset_index(drop=True, inplace=True)
179
+
180
+ return df
181
+
182
+
183
  def apply_model_safeguards(model_name: str, provider: str, temperature: float, max_tokens: int) -> dict:
184
  """Apply model-specific safeguards for temperature and max_tokens based on official API limits"""
185
  model_str = str(model_name).lower()
 
264
  ):
265
  """Get the list of sheet names from an Excel file"""
266
  try:
 
267
  contents = await file.read()
268
 
269
+ # Try multiple engines to read Excel
270
+ engines = ['openpyxl', 'xlrd', None]
271
 
272
+ for engine in engines:
273
+ try:
274
+ if engine:
275
+ excel_file = pd.ExcelFile(io.BytesIO(contents), engine=engine)
276
+ else:
277
+ excel_file = pd.ExcelFile(io.BytesIO(contents))
278
+
279
+ sheet_names = excel_file.sheet_names
280
+ logger.log_message(f"✓ Found {len(sheet_names)} sheets using engine={engine or 'default'}", level=logging.INFO)
281
+
282
+ return {"sheets": sheet_names}
283
+ except Exception as e:
284
+ logger.log_message(f"Failed to read with engine {engine}: {str(e)}", level=logging.WARNING)
285
+ continue
286
 
287
+ raise ValueError("Could not read Excel file with any available engine")
 
288
 
 
 
289
  except Exception as e:
290
  logger.log_message(f"Error getting Excel sheets: {str(e)}", level=logging.ERROR)
291
+ raise HTTPException(
292
+ status_code=400,
293
+ detail=f"Could not read Excel file. Please ensure it's a valid .xlsx or .xls file. Error: {str(e)}"
294
+ )
295
 
296
 
297
 
 
327
  contents = await file.read()
328
 
329
  try:
330
+ # Use robust Excel reader to get sheet names
331
+ excel_file = None
332
+ engines = ['openpyxl', 'xlrd', None]
333
+
334
+ for engine in engines:
335
+ try:
336
+ if engine:
337
+ excel_file = pd.ExcelFile(io.BytesIO(contents), engine=engine)
338
+ else:
339
+ excel_file = pd.ExcelFile(io.BytesIO(contents))
340
+ logger.log_message(f"✓ Excel file loaded with engine={engine or 'default'}", level=logging.INFO)
341
+ break
342
+ except Exception:
343
+ continue
344
+
345
+ if not excel_file:
346
+ raise ValueError("Could not load Excel file with any available engine")
347
+
348
  sheet_names = excel_file.sheet_names
349
 
350
  # Parse selected sheets if provided; else use all sheets
 
357
  except Exception:
358
  pass
359
 
 
 
 
360
  datasets = {}
 
 
 
 
361
  processed_sheets = []
362
 
363
  for sheet_name in target_sheets:
364
  try:
365
+ # Use robust Excel reader for each sheet
366
+ sheet_df = read_excel_robust(contents, sheet_name=sheet_name)
 
 
367
 
368
+ # Clean the dataframe
369
+ sheet_df = clean_dataframe(sheet_df)
 
 
370
 
371
+ # Skip empty sheets
372
+ if sheet_df.empty or sheet_df.shape[1] == 0:
373
+ logger.log_message(f"Skipping empty sheet: {sheet_name}", level=logging.WARNING)
 
 
374
  continue
375
 
376
+ # Register sheet with clean name
377
  clean_sheet_name = clean_dataset_name(sheet_name)
378
+ datasets[clean_sheet_name] = sheet_df
 
 
 
 
 
 
 
 
379
  processed_sheets.append(clean_sheet_name)
380
+
381
+ logger.log_message(
382
+ f"✓ Processed sheet '{sheet_name}' → '{clean_sheet_name}': {sheet_df.shape[0]} rows × {sheet_df.shape[1]} cols",
383
+ level=logging.INFO
384
+ )
385
 
386
  except Exception as e:
387
  logger.log_message(f"Error processing sheet '{sheet_name}': {str(e)}", level=logging.WARNING)
 
392
 
393
  # Update the session description (no primary dataset needed)
394
  desc = description
395
+ app_state.update_session_dataset(session_id, datasets, processed_sheets, desc)
 
 
396
 
397
+ logger.log_message(f" Excel upload complete: {len(processed_sheets)} sheets processed", level=logging.INFO)
398
 
399
  return {
400
+ "message": "Excel file processed successfully",
401
+ "session_id": session_id,
402
  "sheets_processed": processed_sheets,
403
  "total_sheets": len(processed_sheets)
404
  }
405
 
406
  except Exception as e:
407
  logger.log_message(f"Error processing Excel file: {str(e)}", level=logging.ERROR)
408
+ raise HTTPException(
409
+ status_code=400,
410
+ detail=f"Could not process Excel file. Please ensure it's a valid .xlsx or .xls file. Error: {str(e)}"
411
+ )
412
 
413
  except Exception as e:
414
  logger.log_message(f"Error in upload_excel: {str(e)}", level=logging.ERROR)
 
503
  # Ensure it's a safe Python identifier
504
 
505
 
506
+ # Read and process the CSV file using robust reader
507
  content = await file.read()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
508
 
509
+ try:
510
+ # Use the robust CSV reader
511
+ new_df, success_msg = read_csv_robust(content, columns=columns)
512
+
513
+ # Clean the dataframe
514
+ new_df = clean_dataframe(new_df)
515
+
516
+ # Validate we have data
517
+ if new_df.empty:
518
+ raise ValueError("CSV file contains no valid data after cleaning")
519
+
520
+ if new_df.shape[1] == 0:
521
+ raise ValueError("CSV file has no valid columns")
522
+
523
+ logger.log_message(f"CSV parsed successfully: {success_msg}", level=logging.INFO)
524
+ logger.log_message(f"Final dataset shape: {new_df.shape[0]} rows × {new_df.shape[1]} columns", level=logging.INFO)
525
+
526
+ except Exception as e:
527
+ logger.log_message(f"Failed to read CSV: {str(e)}", level=logging.ERROR)
528
+ raise HTTPException(
529
+ status_code=400,
530
+ detail=f"Could not read CSV file: {str(e)}. Please ensure the file is properly formatted."
531
+ )
532
 
533
  # Format the description
534
  desc = f" exact_python_name: `{name}` Dataset: {description}"
 
546
 
547
  logger.log_message(f"Successfully uploaded dataset '{name}' for session {session_id}", level=logging.INFO)
548
 
549
+ return JSONResponse(content=sanitize_json({
550
+ "message": "Dataframe uploaded successfully",
551
+ "session_id": session_id,
552
+ "rows": int(new_df.shape[0]),
553
+ "columns": int(new_df.shape[1])
554
+ }))
555
 
556
  except Exception as e:
557
  logger.log_message(f"Error in upload_dataframe: {str(e)}", level=logging.ERROR)
 
1077
  ):
1078
  """Preview CSV file without modifying session"""
1079
  try:
 
1080
  content = await file.read()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1081
 
1082
+ # Use robust CSV reader
1083
+ try:
1084
+ new_df, success_msg = read_csv_robust(content)
1085
+
1086
+ # Clean the dataframe
1087
+ new_df = clean_dataframe(new_df)
1088
+
1089
+ # Validate
1090
+ if new_df.empty:
1091
+ raise ValueError("CSV file contains no valid data")
1092
+
1093
+ if new_df.shape[1] == 0:
1094
+ raise ValueError("CSV file has no valid columns")
1095
+
1096
+ logger.log_message(f"CSV preview: {success_msg}", level=logging.INFO)
1097
+
1098
+ except Exception as e:
1099
+ logger.log_message(f"Failed to read CSV for preview: {str(e)}", level=logging.ERROR)
1100
+ raise HTTPException(
1101
+ status_code=400,
1102
+ detail=f"Could not read CSV file: {str(e)}. Please ensure the file is properly formatted."
1103
+ )
1104
 
1105
  # Clean and validate the name
1106
  name = file.filename.replace('.csv', '').replace(' ', '_').lower().strip()
src/utils/test.ipynb CHANGED
@@ -17,42 +17,7 @@
17
  "id": "3a1670ba",
18
  "metadata": {},
19
  "outputs": [],
20
- "source": [
21
- "# Anthropic Model API References (Python, Bash, JS) - No fetch, just listing\n",
22
- "\n",
23
- "# --- Anthropic Model API Reference ---\n",
24
- "\n",
25
- "# 1. Python (using anthropic SDK)\n",
26
- "\"\"\"\n",
27
- "import anthropic\n",
28
- "\n",
29
- "client = anthropic.Anthropic(\n",
30
- " api_key=\"YOUR_ANTHROPIC_API_KEY\"\n",
31
- ")\n",
32
- "\n",
33
- "# List available models (returns a list of ModelInfo objects)\n",
34
- "models = client.models.list(limit=20)\n",
35
- "print(models)\n",
36
- "\"\"\"\n",
37
- "\n",
38
- "# 2. Bash (using curl)\n",
39
- "\"\"\"\n",
40
- "curl https://api.anthropic.com/v1/models \\\n",
41
- " --header \"x-api-key: $ANTHROPIC_API_KEY\" \\\n",
42
- " --header \"anthropic-version: 2023-06-01\"\n",
43
- "\"\"\"\n",
44
- "\n",
45
- "# 3. JavaScript (using @anthropic-ai/sdk)\n",
46
- "\"\"\"\n",
47
- "import Anthropic from '@anthropic-ai/sdk';\n",
48
- "\n",
49
- "const anthropic = new Anthropic({\n",
50
- " apiKey: process.env.ANTHROPIC_API_KEY,\n",
51
- "});\n",
52
- "\n",
53
- "\n",
54
- "\n"
55
- ]
56
  },
57
  {
58
  "cell_type": "code",
 
17
  "id": "3a1670ba",
18
  "metadata": {},
19
  "outputs": [],
20
+ "source": []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  },
22
  {
23
  "cell_type": "code",