MohamedFahim commited on
Commit
b43c9ef
Β·
verified Β·
1 Parent(s): 44f1c5c

Update interface.py

Browse files
Files changed (1) hide show
  1. interface.py +39 -155
interface.py CHANGED
@@ -66,115 +66,10 @@ def make_request_with_retry(method: str, endpoint: str, **kwargs) -> requests.Re
66
 
67
  raise gr.Error("❌ Maximum retries exceeded. Please try again later.", duration=10)
68
 
69
- # ==================== WEB SCRAPING FUNCTIONS ====================
70
-
71
- def extract_links(url: str, progress=gr.Progress()) -> List[str]:
72
- """Extract links from URL with progress tracking"""
73
- if not url:
74
- raise gr.Error("❌ Please provide a URL", duration=5)
75
-
76
- progress(0, desc="Connecting to website...")
77
-
78
- try:
79
- response = make_request_with_retry(
80
- "POST",
81
- "/extract_links",
82
- json={"url": url},
83
- timeout=30
84
- )
85
- progress(1, desc="Links extracted!")
86
- return response.json()["unique_links"]
87
- except Exception as e:
88
- raise gr.Error(f"❌ Failed to extract links: {str(e)}", duration=10)
89
-
90
- def extract_text(urls: List[str], progress=gr.Progress()) -> str:
91
- """Extract text from URLs with progress tracking"""
92
- progress(0, desc="Starting text extraction...")
93
-
94
- try:
95
- response = make_request_with_retry(
96
- "POST",
97
- "/extract_text",
98
- json=urls,
99
- timeout=120
100
- )
101
- progress(1, desc="Text extraction complete!")
102
- return response.json()["file_saved"]
103
- except Exception as e:
104
- raise gr.Error(f"❌ Failed to extract text: {str(e)}", duration=10)
105
-
106
- def perform_rag(file_path: str, prompt: str, progress=gr.Progress()) -> dict:
107
- """Perform RAG with progress tracking"""
108
- progress(0.3, desc="Analyzing content...")
109
-
110
- try:
111
- response = make_request_with_retry(
112
- "POST",
113
- "/rag",
114
- json={"file_path": file_path, "prompt": prompt},
115
- timeout=60
116
- )
117
- progress(1, desc="Analysis complete!")
118
- return response.json()
119
- except Exception as e:
120
- raise gr.Error(f"❌ Failed to perform RAG: {str(e)}", duration=10)
121
-
122
- def process_multiple_links(url: str, prompt: str, progress=gr.Progress()) -> Tuple[str, str, dict]:
123
- """Process multiple links with comprehensive progress tracking"""
124
- if not url or not prompt:
125
- raise gr.Error("❌ Please provide both URL and prompt", duration=5)
126
-
127
- try:
128
- progress(0, desc="πŸ” Extracting links from webpage...")
129
- links = extract_links(url, progress)
130
-
131
- progress(0.3, desc=f"πŸ“„ Found {len(links)} links. Processing top 5...")
132
- sample_links = links[:5]
133
- file_path = extract_text(sample_links, progress)
134
-
135
- progress(0.7, desc="πŸ€– Generating AI response...")
136
- result = perform_rag(file_path, prompt, progress)
137
-
138
- progress(1, desc="βœ… Complete!")
139
-
140
- status_msg = f"βœ… Processed {len(sample_links)} pages from {len(links)} total links found"
141
- response_text = f"**Query:** {result['user_query']}\n\n**Response:** {result['assistant_response']}"
142
-
143
- return status_msg, response_text, result['sources']
144
-
145
- except gr.Error:
146
- raise
147
- except Exception as e:
148
- raise gr.Error(f"❌ Processing error: {str(e)}", duration=10)
149
-
150
- def process_homepage_only(url: str, prompt: str, progress=gr.Progress()) -> Tuple[str, str, dict]:
151
- """Process homepage only with progress tracking"""
152
- if not url or not prompt:
153
- raise gr.Error("❌ Please provide both URL and prompt", duration=5)
154
-
155
- try:
156
- progress(0.2, desc="πŸ“„ Extracting homepage content...")
157
- file_path = extract_text([url], progress)
158
-
159
- progress(0.6, desc="πŸ€– Generating AI response...")
160
- result = perform_rag(file_path, prompt, progress)
161
-
162
- progress(1, desc="βœ… Complete!")
163
-
164
- status_msg = "βœ… Processed homepage content"
165
- response_text = f"**Query:** {result['user_query']}\n\n**Response:** {result['assistant_response']}"
166
-
167
- return status_msg, response_text, result['sources']
168
-
169
- except gr.Error:
170
- raise
171
- except Exception as e:
172
- raise gr.Error(f"❌ Processing error: {str(e)}", duration=10)
173
-
174
- # ==================== DOCUMENT UPLOAD FUNCTIONS (FIXED) ====================
175
 
176
  def upload_single_document(file, collection_name: str, progress=gr.Progress()) -> Tuple[str, dict]:
177
- """Upload single document with progress tracking - FIXED FOR 415 ERROR"""
178
  if not file:
179
  raise gr.Error("❌ Please select a file to upload", duration=5)
180
 
@@ -192,7 +87,6 @@ def upload_single_document(file, collection_name: str, progress=gr.Progress()) -
192
  raise gr.Error(f"❌ File not found: {file_path}", duration=5)
193
 
194
  with open(file_path, 'rb') as f:
195
- # FIX: Use generic MIME type to avoid 415 error
196
  files = {
197
  'file': (os.path.basename(file_path), f, 'application/octet-stream')
198
  }
@@ -223,7 +117,7 @@ def upload_single_document(file, collection_name: str, progress=gr.Progress()) -
223
  raise gr.Error(f"❌ Upload failed: {str(e)}", duration=10)
224
 
225
  def upload_multiple_documents(files, collection_name: str, progress=gr.Progress()) -> Tuple[str, dict]:
226
- """Upload multiple documents with progress tracking - FIXED FOR 415 ERROR"""
227
  if not files or len(files) == 0:
228
  raise gr.Error("❌ Please select files to upload", duration=5)
229
 
@@ -243,7 +137,6 @@ def upload_multiple_documents(files, collection_name: str, progress=gr.Progress(
243
 
244
  with open(file_path, 'rb') as f:
245
  file_content = f.read()
246
- # FIX: Use generic MIME type for all files
247
  files_to_upload.append(
248
  ('files', (os.path.basename(file_path), file_content, 'application/octet-stream'))
249
  )
@@ -500,63 +393,24 @@ label {
500
  """
501
 
502
  # Build interface
503
- with gr.Blocks(css=custom_css, theme=gr.themes.Soft(), title="RAG Assistant") as interface:
504
  gr.HTML("""
505
  <div class="main-container">
506
  <h1 class="animated-title">
507
- <span class="floating-icon">🌐</span> Advanced RAG Assistant
508
  </h1>
509
  <p class="subtitle" style="color: #b0b0b0; font-size: 1.3rem; text-align: center; margin-bottom: 2rem;">
510
- Upload documents (PDF/Markdown) or extract from web pages - Ask questions using AI-powered retrieval
511
  </p>
512
  </div>
513
  """)
514
 
515
  with gr.Tabs() as tabs:
516
- # Web Scraping Tabs
517
- with gr.Tab("πŸ”— Multiple Links Analysis"):
518
- with gr.Row():
519
- with gr.Column():
520
- url_input_multi = gr.Textbox(label="🌍 Website URL", placeholder="https://example.com")
521
- prompt_input_multi = gr.Textbox(label="πŸ’­ Your Question", placeholder="What is this website about?", lines=3)
522
- submit_btn_multi = gr.Button("✨ Analyze Multiple Links", variant="primary")
523
-
524
- with gr.Row():
525
- with gr.Column():
526
- status_output_multi = gr.Textbox(label="πŸ“Š Status", elem_classes="output-box")
527
- response_output_multi = gr.Markdown(label="πŸ€– AI Response", elem_classes="output-box")
528
- sources_output_multi = gr.JSON(label="πŸ“š Sources", elem_classes="output-box")
529
-
530
- submit_btn_multi.click(
531
- fn=process_multiple_links,
532
- inputs=[url_input_multi, prompt_input_multi],
533
- outputs=[status_output_multi, response_output_multi, sources_output_multi]
534
- )
535
-
536
- with gr.Tab("🏠 Homepage Only Analysis"):
537
- with gr.Row():
538
- with gr.Column():
539
- url_input_home = gr.Textbox(label="🌍 Website URL", placeholder="https://example.com")
540
- prompt_input_home = gr.Textbox(label="πŸ’­ Your Question", placeholder="What is this website about?", lines=3)
541
- submit_btn_home = gr.Button("✨ Analyze Homepage", variant="primary")
542
-
543
- with gr.Row():
544
- with gr.Column():
545
- status_output_home = gr.Textbox(label="πŸ“Š Status", elem_classes="output-box")
546
- response_output_home = gr.Markdown(label="πŸ€– AI Response", elem_classes="output-box")
547
- sources_output_home = gr.JSON(label="πŸ“š Sources", elem_classes="output-box")
548
-
549
- submit_btn_home.click(
550
- fn=process_homepage_only,
551
- inputs=[url_input_home, prompt_input_home],
552
- outputs=[status_output_home, response_output_home, sources_output_home]
553
- )
554
-
555
  # Document Upload Tab
556
  with gr.Tab("πŸ“„ Document Upload & Query"):
557
  gr.Markdown("""
558
  ### Upload PDF or Markdown documents and query them using RAG
559
- - Supports **PDF** and **Markdown** files
560
  - Documents are chunked and stored in FAISS vector database
561
  - Organize documents into collections for better management
562
  """)
@@ -572,7 +426,7 @@ with gr.Blocks(css=custom_css, theme=gr.themes.Soft(), title="RAG Assistant") as
572
 
573
  with gr.Tab("Single File"):
574
  file_upload_single = gr.File(
575
- label="πŸ“ Select Document (PDF/Markdown)",
576
  file_types=[".pdf", ".md", ".txt"]
577
  )
578
  upload_btn_single = gr.Button("πŸ“€ Upload Single Document", variant="primary")
@@ -581,7 +435,7 @@ with gr.Blocks(css=custom_css, theme=gr.themes.Soft(), title="RAG Assistant") as
581
 
582
  with gr.Tab("Multiple Files"):
583
  file_upload_multi = gr.File(
584
- label="πŸ“ Select Documents (PDF/Markdown)",
585
  file_count="multiple",
586
  file_types=[".pdf", ".md", ".txt"]
587
  )
@@ -647,6 +501,17 @@ with gr.Blocks(css=custom_css, theme=gr.themes.Soft(), title="RAG Assistant") as
647
  fn=refresh_collections,
648
  outputs=[collection_dropdown]
649
  )
 
 
 
 
 
 
 
 
 
 
 
650
 
651
  # Collection Management Tab
652
  with gr.Tab("πŸ—‚οΈ Collection Management"):
@@ -674,6 +539,14 @@ with gr.Blocks(css=custom_css, theme=gr.themes.Soft(), title="RAG Assistant") as
674
  inputs=[collection_to_delete],
675
  outputs=[delete_status, collections_output, collections_json, collection_to_delete]
676
  )
 
 
 
 
 
 
 
 
677
 
678
  # System Health Tab
679
  with gr.Tab("βš™οΈ System Health"):
@@ -687,6 +560,17 @@ with gr.Blocks(css=custom_css, theme=gr.themes.Soft(), title="RAG Assistant") as
687
  fn=get_system_health,
688
  outputs=[health_output, health_json]
689
  )
 
 
 
 
 
 
 
 
 
 
 
690
 
691
  gr.HTML("""
692
  <div class="note-box" style="margin-top: 2rem; background: linear-gradient(135deg, rgba(0, 242, 254, 0.08) 0%, rgba(79, 172, 254, 0.08) 100%); border-radius: 12px; padding: 1rem; border-left: 4px solid #00f2fe;">
 
66
 
67
  raise gr.Error("❌ Maximum retries exceeded. Please try again later.", duration=10)
68
 
69
+ # ==================== DOCUMENT UPLOAD FUNCTIONS ====================
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70
 
71
  def upload_single_document(file, collection_name: str, progress=gr.Progress()) -> Tuple[str, dict]:
72
+ """Upload single document with progress tracking"""
73
  if not file:
74
  raise gr.Error("❌ Please select a file to upload", duration=5)
75
 
 
87
  raise gr.Error(f"❌ File not found: {file_path}", duration=5)
88
 
89
  with open(file_path, 'rb') as f:
 
90
  files = {
91
  'file': (os.path.basename(file_path), f, 'application/octet-stream')
92
  }
 
117
  raise gr.Error(f"❌ Upload failed: {str(e)}", duration=10)
118
 
119
  def upload_multiple_documents(files, collection_name: str, progress=gr.Progress()) -> Tuple[str, dict]:
120
+ """Upload multiple documents with progress tracking"""
121
  if not files or len(files) == 0:
122
  raise gr.Error("❌ Please select files to upload", duration=5)
123
 
 
137
 
138
  with open(file_path, 'rb') as f:
139
  file_content = f.read()
 
140
  files_to_upload.append(
141
  ('files', (os.path.basename(file_path), file_content, 'application/octet-stream'))
142
  )
 
393
  """
394
 
395
  # Build interface
396
+ with gr.Blocks(css=custom_css, theme=gr.themes.Soft(), title="RAG Document Assistant") as interface:
397
  gr.HTML("""
398
  <div class="main-container">
399
  <h1 class="animated-title">
400
+ <span class="floating-icon">πŸ“„</span> RAG Document Assistant
401
  </h1>
402
  <p class="subtitle" style="color: #b0b0b0; font-size: 1.3rem; text-align: center; margin-bottom: 2rem;">
403
+ Upload documents (PDF/Markdown/TXT) and ask questions using AI-powered retrieval
404
  </p>
405
  </div>
406
  """)
407
 
408
  with gr.Tabs() as tabs:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
409
  # Document Upload Tab
410
  with gr.Tab("πŸ“„ Document Upload & Query"):
411
  gr.Markdown("""
412
  ### Upload PDF or Markdown documents and query them using RAG
413
+ - Supports **PDF**, **Markdown**, and **TXT** files
414
  - Documents are chunked and stored in FAISS vector database
415
  - Organize documents into collections for better management
416
  """)
 
426
 
427
  with gr.Tab("Single File"):
428
  file_upload_single = gr.File(
429
+ label="πŸ“ Select Document (PDF/Markdown/TXT)",
430
  file_types=[".pdf", ".md", ".txt"]
431
  )
432
  upload_btn_single = gr.Button("πŸ“€ Upload Single Document", variant="primary")
 
435
 
436
  with gr.Tab("Multiple Files"):
437
  file_upload_multi = gr.File(
438
+ label="πŸ“ Select Documents (PDF/Markdown/TXT)",
439
  file_count="multiple",
440
  file_types=[".pdf", ".md", ".txt"]
441
  )
 
501
  fn=refresh_collections,
502
  outputs=[collection_dropdown]
503
  )
504
+
505
+ gr.HTML("""
506
+ <div class="example-box" style="background: linear-gradient(135deg, rgba(0, 242, 254, 0.1) 0%, rgba(79, 172, 254, 0.1) 100%); border-radius: 16px; padding: 1.5rem; border-left: 4px solid #00f2fe; margin-top: 2rem;">
507
+ <h3 style="margin-top: 0; font-size: 1.4rem; color: #e0e0e0;">
508
+ <span class="floating-icon">πŸ’‘</span> Example Usage
509
+ </h3>
510
+ <p style="font-size: 1.1rem; color: #e0e0e0;"><strong>1.</strong> Upload your PDF/Markdown documents to a collection</p>
511
+ <p style="font-size: 1.1rem; color: #e0e0e0;"><strong>2.</strong> Ask questions like: "What are the main findings?" or "Summarize the methodology"</p>
512
+ <p style="font-size: 1.1rem; color: #e0e0e0;"><strong>3.</strong> System returns answers with source citations</p>
513
+ </div>
514
+ """)
515
 
516
  # Collection Management Tab
517
  with gr.Tab("πŸ—‚οΈ Collection Management"):
 
539
  inputs=[collection_to_delete],
540
  outputs=[delete_status, collections_output, collections_json, collection_to_delete]
541
  )
542
+
543
+ gr.HTML("""
544
+ <div class="note-box" style="background: linear-gradient(135deg, rgba(0, 242, 254, 0.08) 0%, rgba(79, 172, 254, 0.08) 100%); border-radius: 12px; padding: 1rem; border-left: 4px solid #00f2fe; margin-top: 1rem;">
545
+ <p style="margin: 0; font-size: 1.05rem; color: #00c6ff;">
546
+ ⚠️ <strong>Warning:</strong> Deleting a collection is permanent and cannot be undone. All documents in the collection will be removed.
547
+ </p>
548
+ </div>
549
+ """)
550
 
551
  # System Health Tab
552
  with gr.Tab("βš™οΈ System Health"):
 
560
  fn=get_system_health,
561
  outputs=[health_output, health_json]
562
  )
563
+
564
+ gr.HTML("""
565
+ <div class="example-box" style="background: linear-gradient(135deg, rgba(0, 242, 254, 0.1) 0%, rgba(79, 172, 254, 0.1) 100%); border-radius: 16px; padding: 1.5rem; border-left: 4px solid #00f2fe; margin-top: 2rem;">
566
+ <h3 style="margin-top: 0; font-size: 1.4rem; color: #e0e0e0;">
567
+ <span class="floating-icon">πŸ“Š</span> Health Check Information
568
+ </h3>
569
+ <p style="font-size: 1.1rem; color: #e0e0e0;"><strong>Supabase:</strong> Cloud storage for documents (optional)</p>
570
+ <p style="font-size: 1.1rem; color: #e0e0e0;"><strong>Groq API:</strong> LLM for generating answers</p>
571
+ <p style="font-size: 1.1rem; color: #e0e0e0;"><strong>Vector Stores:</strong> FAISS collections for document embeddings</p>
572
+ </div>
573
+ """)
574
 
575
  gr.HTML("""
576
  <div class="note-box" style="margin-top: 2rem; background: linear-gradient(135deg, rgba(0, 242, 254, 0.08) 0%, rgba(79, 172, 254, 0.08) 100%); border-radius: 12px; padding: 1rem; border-left: 4px solid #00f2fe;">