contextpilot commited on
Commit
76bc2b7
·
1 Parent(s): 989fe7c

Reduce file size limit from 50MB to 15MB for better UX

Browse files

- Changed MAX_FILE_SIZE from 50MB to 15MB
- Added RECOMMENDED_FILE_SIZE constant (5MB)
- Added warning dialog for files over 5MB
- Updated frontend hints and validation messages
- Provides estimated processing times to set user expectations

Files changed (2) hide show
  1. app.py +408 -154
  2. templates/index.html +78 -20
app.py CHANGED
@@ -40,9 +40,14 @@ from QASystem.retrieval_and_generation import get_result
40
  from QASystem.ingestion import ingest_document, get_embedder
41
  from QASystem.utils import pinecone_config
42
 
43
- # Increase file size limits for large PDFs (50MB max)
44
- MAX_FILE_SIZE = 50 * 1024 * 1024 # 50MB in bytes
45
- MAX_UPLOAD_SIZE = 50 * 1024 * 1024 # FastAPI body size limit
 
 
 
 
 
46
 
47
  # Setup directories
48
  UPLOADS_DIR = Path("uploads")
@@ -196,6 +201,393 @@ async def load_preloaded_file(request: Request):
196
  traceback.print_exc()
197
  return JSONResponse(status_code=500, content={"success": False, "error": str(e)})
198
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
199
  @app.get("/test")
200
  async def test(request: Request):
201
  return templates.TemplateResponse("test.html", {"request": request})
@@ -403,157 +795,19 @@ async def document_status():
403
 
404
  @app.get("/preview_document")
405
  async def preview_document():
406
- """Serve the uploaded document for preview with format conversion"""
407
- try:
408
- if current_document["filename"] is None:
409
- return Response(content="No document uploaded", status_code=404)
410
-
411
- file_path = UPLOADS_DIR / current_document["filename"]
412
- if not file_path.exists():
413
- return Response(content="Document file not found", status_code=404)
414
-
415
- ext = file_path.suffix.lower()
416
-
417
- # Handle PDF - serve directly
418
- if ext == ".pdf":
419
- with open(file_path, "rb") as f:
420
- file_content = f.read()
421
- return Response(
422
- content=file_content,
423
- media_type="application/pdf",
424
- headers={"Content-Disposition": f"inline; filename={current_document['filename']}"}
425
- )
426
-
427
- # Handle DOCX/DOC - convert to HTML
428
- elif ext in [".docx", ".doc"]:
429
- try:
430
- import docx
431
- doc = docx.Document(file_path)
432
- html_content = "<html><head><meta charset='utf-8'><style>"
433
- html_content += "body { font-family: Arial, sans-serif; padding: 40px; max-width: 800px; margin: 0 auto; line-height: 1.6; }"
434
- html_content += "h1, h2, h3 { color: #333; margin-top: 24px; }"
435
- html_content += "p { margin-bottom: 12px; }</style></head><body>"
436
-
437
- for para in doc.paragraphs:
438
- if para.text.strip():
439
- # Check if it's a heading
440
- if para.style.name.startswith('Heading'):
441
- level = para.style.name.replace('Heading ', '')
442
- if level.isdigit():
443
- html_content += f"<h{level}>{para.text}</h{level}>"
444
- else:
445
- html_content += f"<h3>{para.text}</h3>"
446
- else:
447
- html_content += f"<p>{para.text}</p>"
448
-
449
- html_content += "</body></html>"
450
- return Response(content=html_content, media_type="text/html")
451
- except Exception as e:
452
- print(f"Error converting DOCX: {e}")
453
- return Response(content=f"Error converting document: {str(e)}", status_code=500)
454
-
455
- # Handle Excel files - convert to HTML table
456
- elif ext in [".xlsx", ".xls"]:
457
- try:
458
- import pandas as pd
459
- df = pd.read_excel(file_path)
460
- html_content = "<html><head><meta charset='utf-8'><style>"
461
- html_content += "body { font-family: Arial, sans-serif; padding: 20px; }"
462
- html_content += "table { border-collapse: collapse; width: 100%; margin-top: 20px; }"
463
- html_content += "th, td { border: 1px solid #ddd; padding: 12px; text-align: left; }"
464
- html_content += "th { background-color: #667eea; color: white; font-weight: 600; }"
465
- html_content += "tr:nth-child(even) { background-color: #f9fafb; }"
466
- html_content += "tr:hover { background-color: #f3f4f6; }"
467
- html_content += "</style></head><body>"
468
- html_content += f"<h2>Preview: {current_document['filename']}</h2>"
469
- html_content += df.to_html(index=False, classes='preview-table')
470
- html_content += "</body></html>"
471
- return Response(content=html_content, media_type="text/html")
472
- except Exception as e:
473
- print(f"Error converting Excel: {e}")
474
- return Response(content=f"Error converting spreadsheet: {str(e)}", status_code=500)
475
-
476
- # Handle CSV - convert to HTML table
477
- elif ext == ".csv":
478
- try:
479
- import pandas as pd
480
- df = pd.read_csv(file_path)
481
- html_content = "<html><head><meta charset='utf-8'><style>"
482
- html_content += "body { font-family: Arial, sans-serif; padding: 20px; }"
483
- html_content += "table { border-collapse: collapse; width: 100%; margin-top: 20px; }"
484
- html_content += "th, td { border: 1px solid #ddd; padding: 12px; text-align: left; }"
485
- html_content += "th { background-color: #667eea; color: white; font-weight: 600; }"
486
- html_content += "tr:nth-child(even) { background-color: #f9fafb; }"
487
- html_content += "tr:hover { background-color: #f3f4f6; }"
488
- html_content += "</style></head><body>"
489
- html_content += f"<h2>Preview: {current_document['filename']}</h2>"
490
- html_content += df.to_html(index=False, classes='preview-table')
491
- html_content += "</body></html>"
492
- return Response(content=html_content, media_type="text/html")
493
- except Exception as e:
494
- print(f"Error converting CSV: {e}")
495
- return Response(content=f"Error converting CSV: {str(e)}", status_code=500)
496
-
497
- # Handle JSON - format and display
498
- elif ext == ".json":
499
- try:
500
- import json
501
- with open(file_path, 'r', encoding='utf-8') as f:
502
- json_data = json.load(f)
503
- formatted_json = json.dumps(json_data, indent=2)
504
- html_content = "<html><head><meta charset='utf-8'><style>"
505
- html_content += "body { font-family: 'Courier New', monospace; padding: 20px; background: #1e1e1e; color: #d4d4d4; }"
506
- html_content += "pre { white-space: pre-wrap; word-wrap: break-word; }"
507
- html_content += "</style></head><body>"
508
- html_content += f"<pre>{formatted_json}</pre>"
509
- html_content += "</body></html>"
510
- return Response(content=html_content, media_type="text/html")
511
- except Exception as e:
512
- print(f"Error formatting JSON: {e}")
513
- return Response(content=f"Error formatting JSON: {str(e)}", status_code=500)
514
-
515
- # Handle Markdown - convert to HTML
516
- elif ext == ".md":
517
- try:
518
- with open(file_path, 'r', encoding='utf-8') as f:
519
- md_content = f.read()
520
- # Simple markdown to HTML conversion
521
- html_content = "<html><head><meta charset='utf-8'><style>"
522
- html_content += "body { font-family: Arial, sans-serif; padding: 40px; max-width: 800px; margin: 0 auto; line-height: 1.6; }"
523
- html_content += "h1, h2, h3 { color: #333; margin-top: 24px; }"
524
- html_content += "code { background: #f3f4f6; padding: 2px 6px; border-radius: 4px; }"
525
- html_content += "pre { background: #f3f4f6; padding: 12px; border-radius: 8px; overflow-x: auto; }"
526
- html_content += "</style></head><body>"
527
- html_content += f"<pre style='white-space: pre-wrap; font-family: inherit;'>{md_content}</pre>"
528
- html_content += "</body></html>"
529
- return Response(content=html_content, media_type="text/html")
530
- except Exception as e:
531
- print(f"Error converting Markdown: {e}")
532
- return Response(content=f"Error converting Markdown: {str(e)}", status_code=500)
533
-
534
- # Handle TXT - display as plain text
535
- elif ext == ".txt":
536
- try:
537
- with open(file_path, 'r', encoding='utf-8') as f:
538
- txt_content = f.read()
539
- html_content = "<html><head><meta charset='utf-8'><style>"
540
- html_content += "body { font-family: 'Courier New', monospace; padding: 20px; white-space: pre-wrap; }"
541
- html_content += "</style></head><body>"
542
- html_content += txt_content
543
- html_content += "</body></html>"
544
- return Response(content=html_content, media_type="text/html")
545
- except Exception as e:
546
- print(f"Error reading text file: {e}")
547
- return Response(content=f"Error reading file: {str(e)}", status_code=500)
548
-
549
- else:
550
- return Response(content="Preview not available for this file type", status_code=400)
551
-
552
- except Exception as e:
553
- print(f"[ERROR] Error previewing document: {e}")
554
- import traceback
555
- traceback.print_exc()
556
- return Response(content=f"Error: {str(e)}", status_code=500)
557
 
558
  @app.post("/delete_document")
559
  async def delete_document():
 
40
  from QASystem.ingestion import ingest_document, get_embedder
41
  from QASystem.utils import pinecone_config
42
 
43
+ # File size limits - optimized for reasonable processing times
44
+ # Recommended limits based on processing time:
45
+ # - 5MB: ~1-2 minutes processing (good UX)
46
+ # - 10MB: ~3-5 minutes processing (acceptable)
47
+ # - 15MB: ~5-10 minutes processing (max recommended)
48
+ MAX_FILE_SIZE = 15 * 1024 * 1024 # 15MB - balanced limit
49
+ MAX_UPLOAD_SIZE = 15 * 1024 * 1024 # FastAPI body size limit
50
+ RECOMMENDED_FILE_SIZE = 5 * 1024 * 1024 # 5MB - optimal for fast processing
51
 
52
  # Setup directories
53
  UPLOADS_DIR = Path("uploads")
 
201
  traceback.print_exc()
202
  return JSONResponse(status_code=500, content={"success": False, "error": str(e)})
203
 
204
+ @app.get("/preview_file/{filename:path}")
205
+ async def preview_any_file(filename: str, source: str = "auto"):
206
+ """
207
+ Preview any file from uploads or data folder without downloading.
208
+
209
+ Args:
210
+ filename: Name of the file to preview
211
+ source: 'uploads', 'data', or 'auto' (checks both)
212
+ """
213
+ try:
214
+ file_path = None
215
+
216
+ # Determine file location
217
+ if source == "uploads":
218
+ file_path = UPLOADS_DIR / filename
219
+ elif source == "data":
220
+ file_path = DATA_DIR / filename
221
+ else: # auto - check both locations
222
+ if (UPLOADS_DIR / filename).exists():
223
+ file_path = UPLOADS_DIR / filename
224
+ elif (DATA_DIR / filename).exists():
225
+ file_path = DATA_DIR / filename
226
+
227
+ if not file_path or not file_path.exists():
228
+ return Response(
229
+ content=generate_error_html("File Not Found", f"The file '{filename}' was not found."),
230
+ media_type="text/html",
231
+ status_code=404
232
+ )
233
+
234
+ ext = file_path.suffix.lower()
235
+
236
+ # Handle PDF - serve directly for inline viewing
237
+ if ext == ".pdf":
238
+ with open(file_path, "rb") as f:
239
+ file_content = f.read()
240
+ return Response(
241
+ content=file_content,
242
+ media_type="application/pdf",
243
+ headers={
244
+ "Content-Disposition": f"inline; filename=\"{filename}\"",
245
+ "X-Content-Type-Options": "nosniff"
246
+ }
247
+ )
248
+
249
+ # Handle DOCX/DOC - convert to HTML
250
+ elif ext in [".docx", ".doc"]:
251
+ return convert_docx_to_html(file_path, filename)
252
+
253
+ # Handle Excel files - convert to HTML table
254
+ elif ext in [".xlsx", ".xls"]:
255
+ return convert_excel_to_html(file_path, filename)
256
+
257
+ # Handle CSV - convert to HTML table
258
+ elif ext == ".csv":
259
+ return convert_csv_to_html(file_path, filename)
260
+
261
+ # Handle JSON - format and display
262
+ elif ext == ".json":
263
+ return convert_json_to_html(file_path, filename)
264
+
265
+ # Handle Markdown - convert to HTML
266
+ elif ext == ".md":
267
+ return convert_markdown_to_html(file_path, filename)
268
+
269
+ # Handle TXT - display as formatted text
270
+ elif ext == ".txt":
271
+ return convert_text_to_html(file_path, filename)
272
+
273
+ else:
274
+ return Response(
275
+ content=generate_error_html("Unsupported Format", f"Preview not available for '{ext}' files."),
276
+ media_type="text/html",
277
+ status_code=400
278
+ )
279
+
280
+ except Exception as e:
281
+ print(f"[ERROR] Error previewing file {filename}: {e}")
282
+ import traceback
283
+ traceback.print_exc()
284
+ return Response(
285
+ content=generate_error_html("Preview Error", str(e)),
286
+ media_type="text/html",
287
+ status_code=500
288
+ )
289
+
290
+ # ============================================================================
291
+ # DOCUMENT CONVERSION HELPER FUNCTIONS
292
+ # ============================================================================
293
+
294
+ def generate_preview_html_base(title: str, content: str, extra_styles: str = "") -> str:
295
+ """Generate base HTML template for previews"""
296
+ return f"""<!DOCTYPE html>
297
+ <html lang="en">
298
+ <head>
299
+ <meta charset="utf-8">
300
+ <meta name="viewport" content="width=device-width, initial-scale=1">
301
+ <title>Preview: {title}</title>
302
+ <style>
303
+ * {{ margin: 0; padding: 0; box-sizing: border-box; }}
304
+ body {{
305
+ font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, sans-serif;
306
+ line-height: 1.6;
307
+ color: #1f2937;
308
+ background: #f9fafb;
309
+ }}
310
+ .preview-container {{
311
+ max-width: 900px;
312
+ margin: 0 auto;
313
+ padding: 40px 20px;
314
+ background: white;
315
+ min-height: 100vh;
316
+ box-shadow: 0 0 20px rgba(0,0,0,0.1);
317
+ }}
318
+ .preview-header {{
319
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
320
+ color: white;
321
+ padding: 20px 30px;
322
+ margin: -40px -20px 30px -20px;
323
+ border-radius: 0;
324
+ }}
325
+ .preview-header h1 {{
326
+ font-size: 20px;
327
+ font-weight: 600;
328
+ margin: 0;
329
+ display: flex;
330
+ align-items: center;
331
+ gap: 10px;
332
+ }}
333
+ .preview-header .file-icon {{ font-size: 24px; }}
334
+ .content {{ padding: 0 10px; }}
335
+ h1, h2, h3 {{ color: #1f2937; margin-top: 24px; margin-bottom: 12px; }}
336
+ p {{ margin-bottom: 16px; }}
337
+ code {{ background: #f3f4f6; padding: 2px 6px; border-radius: 4px; font-size: 14px; }}
338
+ pre {{
339
+ background: #1e1e1e;
340
+ color: #d4d4d4;
341
+ padding: 20px;
342
+ border-radius: 8px;
343
+ overflow-x: auto;
344
+ margin: 16px 0;
345
+ }}
346
+ table {{
347
+ border-collapse: collapse;
348
+ width: 100%;
349
+ margin: 20px 0;
350
+ font-size: 14px;
351
+ }}
352
+ th, td {{
353
+ border: 1px solid #e5e7eb;
354
+ padding: 12px 16px;
355
+ text-align: left;
356
+ }}
357
+ th {{
358
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
359
+ color: white;
360
+ font-weight: 600;
361
+ position: sticky;
362
+ top: 0;
363
+ }}
364
+ tr:nth-child(even) {{ background-color: #f9fafb; }}
365
+ tr:hover {{ background-color: #f3f4f6; }}
366
+ {extra_styles}
367
+ </style>
368
+ </head>
369
+ <body>
370
+ <div class="preview-container">
371
+ <div class="preview-header">
372
+ <h1><span class="file-icon">📄</span> {title}</h1>
373
+ </div>
374
+ <div class="content">
375
+ {content}
376
+ </div>
377
+ </div>
378
+ </body>
379
+ </html>"""
380
+
381
+ def generate_error_html(title: str, message: str) -> str:
382
+ """Generate error HTML page"""
383
+ return generate_preview_html_base(
384
+ "Error",
385
+ f"""<div style="text-align: center; padding: 60px 20px;">
386
+ <div style="font-size: 64px; margin-bottom: 20px;">⚠️</div>
387
+ <h2 style="color: #ef4444; margin-bottom: 16px;">{title}</h2>
388
+ <p style="color: #6b7280;">{message}</p>
389
+ </div>""",
390
+ ""
391
+ )
392
+
393
+ def convert_docx_to_html(file_path: Path, filename: str) -> Response:
394
+ """Convert DOCX file to HTML for preview"""
395
+ try:
396
+ import docx
397
+ doc = docx.Document(file_path)
398
+
399
+ content_parts = []
400
+ for para in doc.paragraphs:
401
+ if para.text.strip():
402
+ if para.style.name.startswith('Heading'):
403
+ level = para.style.name.replace('Heading ', '')
404
+ if level.isdigit() and int(level) <= 6:
405
+ content_parts.append(f"<h{level}>{para.text}</h{level}>")
406
+ else:
407
+ content_parts.append(f"<h3>{para.text}</h3>")
408
+ else:
409
+ content_parts.append(f"<p>{para.text}</p>")
410
+
411
+ # Also extract tables if present
412
+ for table in doc.tables:
413
+ table_html = "<table>"
414
+ for i, row in enumerate(table.rows):
415
+ tag = "th" if i == 0 else "td"
416
+ table_html += "<tr>"
417
+ for cell in row.cells:
418
+ table_html += f"<{tag}>{cell.text}</{tag}>"
419
+ table_html += "</tr>"
420
+ table_html += "</table>"
421
+ content_parts.append(table_html)
422
+
423
+ content = "\n".join(content_parts)
424
+ html = generate_preview_html_base(filename, content)
425
+ return Response(content=html, media_type="text/html")
426
+
427
+ except Exception as e:
428
+ return Response(
429
+ content=generate_error_html("DOCX Conversion Error", str(e)),
430
+ media_type="text/html",
431
+ status_code=500
432
+ )
433
+
434
+ def convert_excel_to_html(file_path: Path, filename: str) -> Response:
435
+ """Convert Excel file to HTML table for preview"""
436
+ try:
437
+ import pandas as pd
438
+
439
+ # Read all sheets
440
+ excel_file = pd.ExcelFile(file_path)
441
+ content_parts = []
442
+
443
+ for sheet_name in excel_file.sheet_names:
444
+ df = pd.read_excel(excel_file, sheet_name=sheet_name)
445
+ content_parts.append(f"<h2>📊 Sheet: {sheet_name}</h2>")
446
+ content_parts.append(f"<p style='color: #6b7280;'>{len(df)} rows × {len(df.columns)} columns</p>")
447
+ # Limit to first 500 rows for performance
448
+ if len(df) > 500:
449
+ content_parts.append("<p style='color: #f59e0b;'>⚠️ Showing first 500 rows</p>")
450
+ df = df.head(500)
451
+ content_parts.append(df.to_html(index=False, classes='preview-table', na_rep='—'))
452
+
453
+ content = "\n".join(content_parts)
454
+ html = generate_preview_html_base(filename, content)
455
+ return Response(content=html, media_type="text/html")
456
+
457
+ except Exception as e:
458
+ return Response(
459
+ content=generate_error_html("Excel Conversion Error", str(e)),
460
+ media_type="text/html",
461
+ status_code=500
462
+ )
463
+
464
+ def convert_csv_to_html(file_path: Path, filename: str) -> Response:
465
+ """Convert CSV file to HTML table for preview"""
466
+ try:
467
+ import pandas as pd
468
+ df = pd.read_csv(file_path)
469
+
470
+ content_parts = [f"<p style='color: #6b7280;'>{len(df)} rows × {len(df.columns)} columns</p>"]
471
+
472
+ # Limit to first 500 rows for performance
473
+ if len(df) > 500:
474
+ content_parts.append("<p style='color: #f59e0b;'>⚠️ Showing first 500 rows</p>")
475
+ df = df.head(500)
476
+
477
+ content_parts.append(df.to_html(index=False, classes='preview-table', na_rep='—'))
478
+ content = "\n".join(content_parts)
479
+ html = generate_preview_html_base(filename, content)
480
+ return Response(content=html, media_type="text/html")
481
+
482
+ except Exception as e:
483
+ return Response(
484
+ content=generate_error_html("CSV Conversion Error", str(e)),
485
+ media_type="text/html",
486
+ status_code=500
487
+ )
488
+
489
+ def convert_json_to_html(file_path: Path, filename: str) -> Response:
490
+ """Convert JSON file to formatted HTML for preview"""
491
+ try:
492
+ with open(file_path, 'r', encoding='utf-8') as f:
493
+ json_data = json.load(f)
494
+
495
+ formatted_json = json.dumps(json_data, indent=2, ensure_ascii=False)
496
+
497
+ # Syntax highlighting for JSON
498
+ import html
499
+ formatted_json = html.escape(formatted_json)
500
+
501
+ content = f"<pre>{formatted_json}</pre>"
502
+ extra_styles = """
503
+ pre {
504
+ background: #1e1e1e !important;
505
+ color: #ce9178 !important;
506
+ font-family: 'Fira Code', 'Consolas', monospace;
507
+ font-size: 14px;
508
+ line-height: 1.5;
509
+ }
510
+ """
511
+ html_content = generate_preview_html_base(filename, content, extra_styles)
512
+ return Response(content=html_content, media_type="text/html")
513
+
514
+ except Exception as e:
515
+ return Response(
516
+ content=generate_error_html("JSON Parse Error", str(e)),
517
+ media_type="text/html",
518
+ status_code=500
519
+ )
520
+
521
+ def convert_markdown_to_html(file_path: Path, filename: str) -> Response:
522
+ """Convert Markdown file to HTML for preview"""
523
+ try:
524
+ with open(file_path, 'r', encoding='utf-8') as f:
525
+ md_content = f.read()
526
+
527
+ # Try using markdown library if available
528
+ try:
529
+ import markdown
530
+ html_content = markdown.markdown(
531
+ md_content,
532
+ extensions=['tables', 'fenced_code', 'codehilite', 'toc']
533
+ )
534
+ except ImportError:
535
+ # Fallback: basic markdown-like rendering
536
+ import html
537
+ html_content = html.escape(md_content)
538
+ html_content = f"<pre style='white-space: pre-wrap; font-family: inherit;'>{html_content}</pre>"
539
+
540
+ extra_styles = """
541
+ .content img { max-width: 100%; height: auto; border-radius: 8px; margin: 16px 0; }
542
+ .content blockquote {
543
+ border-left: 4px solid #667eea;
544
+ padding-left: 20px;
545
+ margin: 16px 0;
546
+ color: #6b7280;
547
+ font-style: italic;
548
+ }
549
+ .content ul, .content ol { margin: 16px 0; padding-left: 30px; }
550
+ .content li { margin: 8px 0; }
551
+ """
552
+ html_output = generate_preview_html_base(filename, html_content, extra_styles)
553
+ return Response(content=html_output, media_type="text/html")
554
+
555
+ except Exception as e:
556
+ return Response(
557
+ content=generate_error_html("Markdown Conversion Error", str(e)),
558
+ media_type="text/html",
559
+ status_code=500
560
+ )
561
+
562
+ def convert_text_to_html(file_path: Path, filename: str) -> Response:
563
+ """Convert text file to HTML for preview"""
564
+ try:
565
+ with open(file_path, 'r', encoding='utf-8') as f:
566
+ txt_content = f.read()
567
+
568
+ import html
569
+ txt_content = html.escape(txt_content)
570
+
571
+ # Count lines and words
572
+ lines = txt_content.count('\n') + 1
573
+ words = len(txt_content.split())
574
+
575
+ content = f"""
576
+ <p style='color: #6b7280; margin-bottom: 20px;'>{lines} lines • {words} words</p>
577
+ <pre style='white-space: pre-wrap; background: #f9fafb; padding: 20px; border-radius: 8px;
578
+ font-family: "Fira Code", Consolas, monospace; font-size: 14px; line-height: 1.6;
579
+ border: 1px solid #e5e7eb;'>{txt_content}</pre>
580
+ """
581
+ html_output = generate_preview_html_base(filename, content)
582
+ return Response(content=html_output, media_type="text/html")
583
+
584
+ except Exception as e:
585
+ return Response(
586
+ content=generate_error_html("Text File Error", str(e)),
587
+ media_type="text/html",
588
+ status_code=500
589
+ )
590
+
591
  @app.get("/test")
592
  async def test(request: Request):
593
  return templates.TemplateResponse("test.html", {"request": request})
 
795
 
796
  @app.get("/preview_document")
797
  async def preview_document():
798
+ """
799
+ Serve the current uploaded document for preview.
800
+ Redirects to the new unified preview endpoint.
801
+ """
802
+ if current_document["filename"] is None:
803
+ return Response(
804
+ content=generate_error_html("No Document", "No document has been uploaded yet."),
805
+ media_type="text/html",
806
+ status_code=404
807
+ )
808
+
809
+ # Use the new unified preview endpoint
810
+ return await preview_any_file(current_document["filename"], source="uploads")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
811
 
812
  @app.post("/delete_document")
813
  async def delete_document():
templates/index.html CHANGED
@@ -618,7 +618,7 @@
618
  <div class="upload-section" id="upload-section">
619
  <div class="upload-icon">📤</div>
620
  <div class="upload-text">Upload Your Document</div>
621
- <div class="upload-hint">Supports PDF, DOCX, DOC, TXT, MD, CSV, JSON, XLSX, XLS (Max 50MB)</div>
622
  <label for="file-input" class="file-input-label">
623
  Choose File
624
  </label>
@@ -740,29 +740,59 @@
740
  }
741
  }
742
 
743
- // Preview document function
744
  function previewDocument() {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
745
  const modal = document.getElementById('preview-modal');
746
  const previewBody = document.getElementById('preview-body');
747
  const previewTitle = document.getElementById('preview-title');
748
- const filename = docName.textContent;
749
 
750
  previewTitle.textContent = `Preview: ${filename}`;
751
 
752
- // Clear previous content
753
- previewBody.innerHTML = '<div style="text-align: center; padding: 40px;"><div class="spinner"></div><p>Loading preview...</p></div>';
 
 
 
 
 
754
 
755
  // Show modal
756
  modal.classList.add('active');
757
 
758
- // Load preview in iframe for all file types (server handles conversion)
759
- previewBody.innerHTML = `<iframe class="preview-iframe" src="/preview_document"></iframe>`;
 
 
 
760
  }
761
 
762
  // Close preview modal
763
  function closePreview() {
764
  const modal = document.getElementById('preview-modal');
 
765
  modal.classList.remove('active');
 
 
766
  }
767
 
768
  // Close modal when clicking outside
@@ -870,16 +900,28 @@
870
 
871
  if (data.success && data.files && data.files.length > 0) {
872
  container.innerHTML = data.files.map(file => `
873
- <button onclick="loadPreloadedFile('${file.name}')"
874
- style="background: #667eea; color: white; border: none; padding: 8px 14px;
875
- border-radius: 6px; font-size: 13px; cursor: pointer; display: flex;
876
- align-items: center; gap: 6px; transition: all 0.2s;"
877
- onmouseover="this.style.background='#5568d3'; this.style.transform='translateY(-1px)'"
878
- onmouseout="this.style.background='#667eea'; this.style.transform='translateY(0)'">
879
- <span>📄</span>
880
- <span>${file.name}</span>
881
- <span style="background: rgba(255,255,255,0.2); padding: 2px 6px; border-radius: 4px; font-size: 11px;">${file.size_mb}MB</span>
882
- </button>
 
 
 
 
 
 
 
 
 
 
 
 
883
  `).join('');
884
  } else {
885
  container.innerHTML = '<span style="color: #94a3b8; font-size: 13px;">No preloaded files available. Add PDFs to the "data" folder.</span>';
@@ -1011,17 +1053,33 @@
1011
  const fileSizeMB = (file.size / (1024 * 1024)).toFixed(2);
1012
  console.log(`File: ${file.name}, Size: ${fileSizeMB}MB`);
1013
 
1014
- // Validate file size (50MB)
1015
- if (file.size > 50 * 1024 * 1024) {
1016
  Swal.fire({
1017
  icon: 'error',
1018
  title: 'File Too Large',
1019
- text: `File size is ${fileSizeMB}MB. Please upload a file smaller than 50MB.`,
1020
  confirmButtonColor: '#667eea'
1021
  });
1022
  return;
1023
  }
1024
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1025
  // Validate minimum file size (not empty)
1026
  if (file.size === 0) {
1027
  Swal.fire({
 
618
  <div class="upload-section" id="upload-section">
619
  <div class="upload-icon">📤</div>
620
  <div class="upload-text">Upload Your Document</div>
621
+ <div class="upload-hint">Supports PDF, DOCX, DOC, TXT, MD, CSV, JSON, XLSX, XLS (Max 15MB, recommended &lt;5MB for fast processing)</div>
622
  <label for="file-input" class="file-input-label">
623
  Choose File
624
  </label>
 
740
  }
741
  }
742
 
743
+ // Preview document function - for uploaded documents
744
  function previewDocument() {
745
+ const filename = docName.textContent;
746
+ if (!filename || filename === 'No document uploaded') {
747
+ Swal.fire({
748
+ icon: 'warning',
749
+ title: 'No Document',
750
+ text: 'Please upload a document first.',
751
+ confirmButtonColor: '#667eea'
752
+ });
753
+ return;
754
+ }
755
+ openPreviewModal(filename, 'uploads');
756
+ }
757
+
758
+ // Preview preloaded file function - for files in data folder
759
+ function previewPreloadedFile(filename) {
760
+ openPreviewModal(filename, 'data');
761
+ }
762
+
763
+ // Unified preview modal function
764
+ function openPreviewModal(filename, source) {
765
  const modal = document.getElementById('preview-modal');
766
  const previewBody = document.getElementById('preview-body');
767
  const previewTitle = document.getElementById('preview-title');
 
768
 
769
  previewTitle.textContent = `Preview: ${filename}`;
770
 
771
+ // Show loading state
772
+ previewBody.innerHTML = `
773
+ <div style="text-align: center; padding: 60px 20px;">
774
+ <div class="spinner"></div>
775
+ <p style="margin-top: 20px; color: #6b7280;">Loading preview...</p>
776
+ </div>
777
+ `;
778
 
779
  // Show modal
780
  modal.classList.add('active');
781
 
782
+ // Build preview URL with source parameter
783
+ const previewUrl = `/preview_file/${encodeURIComponent(filename)}?source=${source}`;
784
+
785
+ // Load preview in iframe
786
+ previewBody.innerHTML = `<iframe class="preview-iframe" src="${previewUrl}"></iframe>`;
787
  }
788
 
789
  // Close preview modal
790
  function closePreview() {
791
  const modal = document.getElementById('preview-modal');
792
+ const previewBody = document.getElementById('preview-body');
793
  modal.classList.remove('active');
794
+ // Clear iframe to stop any ongoing loads
795
+ previewBody.innerHTML = '';
796
  }
797
 
798
  // Close modal when clicking outside
 
900
 
901
  if (data.success && data.files && data.files.length > 0) {
902
  container.innerHTML = data.files.map(file => `
903
+ <div style="display: flex; gap: 4px; align-items: center;">
904
+ <button onclick="previewPreloadedFile('${file.name}')"
905
+ style="background: #3b82f6; color: white; border: none; padding: 8px 12px;
906
+ border-radius: 6px 0 0 6px; font-size: 13px; cursor: pointer; display: flex;
907
+ align-items: center; gap: 4px; transition: all 0.2s;"
908
+ onmouseover="this.style.background='#2563eb'"
909
+ onmouseout="this.style.background='#3b82f6'"
910
+ title="Preview this file">
911
+ 👁️
912
+ </button>
913
+ <button onclick="loadPreloadedFile('${file.name}')"
914
+ style="background: #667eea; color: white; border: none; padding: 8px 14px;
915
+ border-radius: 0 6px 6px 0; font-size: 13px; cursor: pointer; display: flex;
916
+ align-items: center; gap: 6px; transition: all 0.2s;"
917
+ onmouseover="this.style.background='#5568d3'; this.style.transform='translateY(-1px)'"
918
+ onmouseout="this.style.background='#667eea'; this.style.transform='translateY(0)'"
919
+ title="Load this file for Q&A">
920
+ <span>📄</span>
921
+ <span>${file.name}</span>
922
+ <span style="background: rgba(255,255,255,0.2); padding: 2px 6px; border-radius: 4px; font-size: 11px;">${file.size_mb}MB</span>
923
+ </button>
924
+ </div>
925
  `).join('');
926
  } else {
927
  container.innerHTML = '<span style="color: #94a3b8; font-size: 13px;">No preloaded files available. Add PDFs to the "data" folder.</span>';
 
1053
  const fileSizeMB = (file.size / (1024 * 1024)).toFixed(2);
1054
  console.log(`File: ${file.name}, Size: ${fileSizeMB}MB`);
1055
 
1056
+ // Validate file size (15MB max)
1057
+ if (file.size > 15 * 1024 * 1024) {
1058
  Swal.fire({
1059
  icon: 'error',
1060
  title: 'File Too Large',
1061
+ text: `File size is ${fileSizeMB}MB. Please upload a file smaller than 15MB for reasonable processing times.`,
1062
  confirmButtonColor: '#667eea'
1063
  });
1064
  return;
1065
  }
1066
 
1067
+ // Warn if file is large (over 5MB)
1068
+ if (file.size > 5 * 1024 * 1024) {
1069
+ const result = await Swal.fire({
1070
+ icon: 'warning',
1071
+ title: 'Large File Warning',
1072
+ html: `<p>File size is <b>${fileSizeMB}MB</b>.</p><p>Processing may take <b>3-5 minutes</b>.</p><p>For faster results, use files under 5MB.</p>`,
1073
+ showCancelButton: true,
1074
+ confirmButtonText: 'Continue Anyway',
1075
+ cancelButtonText: 'Cancel',
1076
+ confirmButtonColor: '#667eea'
1077
+ });
1078
+ if (!result.isConfirmed) {
1079
+ return;
1080
+ }
1081
+ }
1082
+
1083
  // Validate minimum file size (not empty)
1084
  if (file.size === 0) {
1085
  Swal.fire({