siddhartharya commited on
Commit
05de921
·
verified ·
1 Parent(s): 6e6eade

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +114 -933
app.py CHANGED
@@ -1,977 +1,158 @@
1
- # app.py
2
-
3
- import gradio as gr
 
4
  from bs4 import BeautifulSoup
5
  from sentence_transformers import SentenceTransformer
6
  import faiss
7
  import numpy as np
8
- import requests
9
- import time
10
- import re
11
- import logging
12
- import os
13
- import sys
14
- import threading
15
- from queue import Queue, Empty
16
- import json
17
  from concurrent.futures import ThreadPoolExecutor
 
18
 
19
- # Import OpenAI library
20
- import openai
21
-
22
- # Suppress only the single warning from urllib3 needed.
23
  import urllib3
24
  urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
25
 
26
- # Set up logging to output to the console
 
27
  logger = logging.getLogger(__name__)
28
- logger.setLevel(logging.INFO)
29
 
30
- # Create a console handler
31
- console_handler = logging.StreamHandler(sys.stdout)
32
- console_handler.setLevel(logging.INFO)
33
 
34
- # Create a formatter and set it for the handler
35
- formatter = logging.Formatter('%(asctime)s %(levelname)s %(name)s %(message)s')
36
- console_handler.setFormatter(formatter)
37
 
38
- # Add the handler to the logger
39
- logger.addHandler(console_handler)
 
 
40
 
41
- # Initialize variables and models
42
- logger.info("Initializing variables and models")
43
  embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
44
  faiss_index = None
45
  bookmarks = []
46
- fetch_cache = {}
47
-
48
- # Lock for thread-safe operations
49
- lock = threading.Lock()
50
 
51
- # Define the categories
52
  CATEGORIES = [
53
- "Social Media",
54
- "News and Media",
55
- "Education and Learning",
56
- "Entertainment",
57
- "Shopping and E-commerce",
58
- "Finance and Banking",
59
- "Technology",
60
- "Health and Fitness",
61
- "Travel and Tourism",
62
- "Food and Recipes",
63
- "Sports",
64
- "Arts and Culture",
65
- "Government and Politics",
66
- "Business and Economy",
67
- "Science and Research",
68
- "Personal Blogs and Journals",
69
- "Job Search and Careers",
70
- "Music and Audio",
71
- "Videos and Movies",
72
- "Reference and Knowledge Bases",
73
- "Dead Link",
74
- "Uncategorized",
75
  ]
76
 
77
- # Set up Groq Cloud API keys and base URLs
78
- GROQ_API_KEY_BASIC = os.getenv('GROQ_API_KEY_BASIC')
79
- GROQ_API_KEY_ADVANCED = os.getenv('GROQ_API_KEY_ADVANCED')
80
-
81
- if not GROQ_API_KEY_BASIC:
82
- logger.error("GROQ_API_KEY_BASIC environment variable not set.")
83
-
84
- if not GROQ_API_KEY_ADVANCED:
85
- logger.error("GROQ_API_KEY_ADVANCED environment variable not set.")
86
-
87
- # Define models
88
- MODEL_BASIC = 'llama-3.1-8b-instant'
89
- MODEL_ADVANCED = 'llama-3.1-70b-versatile'
90
-
91
- # Rate Limiter Configuration
92
- RPM_LIMIT_BASIC = 60 # Requests per minute for basic model
93
- TPM_LIMIT_BASIC = 60000 # Tokens per minute for basic model
94
- RPM_LIMIT_ADVANCED = 30 # Requests per minute for advanced model
95
- TPM_LIMIT_ADVANCED = 30000 # Tokens per minute for advanced model
96
-
97
- BATCH_SIZE_BASIC = 5 # Number of bookmarks per batch for basic model
98
- BATCH_SIZE_ADVANCED = 3 # Number of bookmarks per batch for advanced model
99
-
100
- # Implementing a Token Bucket Rate Limiter
101
- class TokenBucket:
102
- def __init__(self, rate, capacity):
103
- self.rate = rate # tokens per second
104
- self.capacity = capacity
105
- self.tokens = capacity
106
- self.timestamp = time.time()
107
- self.lock = threading.Lock()
108
-
109
- def consume(self, tokens=1):
110
- with self.lock:
111
- now = time.time()
112
- elapsed = now - self.timestamp
113
- refill = elapsed * self.rate
114
- self.tokens = min(self.capacity, self.tokens + refill)
115
- self.timestamp = now
116
- if self.tokens >= tokens:
117
- self.tokens -= tokens
118
- return True
119
- else:
120
- return False
121
-
122
- def wait_for_token(self, tokens=1):
123
- while not self.consume(tokens):
124
- time.sleep(0.05)
125
-
126
- # Initialize rate limiters
127
- rpm_rate_basic = RPM_LIMIT_BASIC / 60 # tokens per second
128
- tpm_rate_basic = TPM_LIMIT_BASIC / 60 # tokens per second
129
-
130
- rpm_rate_advanced = RPM_LIMIT_ADVANCED / 60 # tokens per second
131
- tpm_rate_advanced = TPM_LIMIT_ADVANCED / 60 # tokens per second
132
-
133
- rpm_bucket_basic = TokenBucket(rate=rpm_rate_basic, capacity=RPM_LIMIT_BASIC)
134
- tpm_bucket_basic = TokenBucket(rate=tpm_rate_basic, capacity=TPM_LIMIT_BASIC)
135
-
136
- rpm_bucket_advanced = TokenBucket(rate=rpm_rate_advanced, capacity=RPM_LIMIT_ADVANCED)
137
- tpm_bucket_advanced = TokenBucket(rate=tpm_rate_advanced, capacity=TPM_LIMIT_ADVANCED)
138
-
139
- # Queues for LLM tasks
140
- llm_queue_basic = Queue()
141
- llm_queue_advanced = Queue()
142
-
143
- def categorize_based_on_summary(summary, url):
144
- """
145
- Assign category based on keywords in the summary or URL.
146
- """
147
- summary_lower = summary.lower()
148
- url_lower = url.lower()
149
- if 'social media' in summary_lower or 'twitter' in summary_lower or 'x.com' in url_lower:
150
- return 'Social Media'
151
- elif 'wikipedia' in url_lower:
152
- return 'Reference and Knowledge Bases'
153
- elif 'cloud computing' in summary_lower or 'aws' in summary_lower:
154
- return 'Technology'
155
- elif 'news' in summary_lower or 'media' in summary_lower:
156
- return 'News and Media'
157
- elif 'education' in summary_lower or 'learning' in summary_lower:
158
- return 'Education and Learning'
159
- # Add more conditions as needed
160
- else:
161
- return 'Uncategorized'
162
-
163
- def validate_category(bookmark):
164
- """
165
- Further validate and adjust the category if needed.
166
- """
167
- # Example: Specific cases based on URL
168
- url_lower = bookmark['url'].lower()
169
- if 'facebook' in url_lower or 'x.com' in url_lower:
170
- return 'Social Media'
171
- elif 'wikipedia' in url_lower:
172
- return 'Reference and Knowledge Bases'
173
- elif 'aws.amazon.com' in url_lower:
174
- return 'Technology'
175
- # Add more specific cases as needed
176
- else:
177
- return bookmark['category']
178
-
179
- def extract_main_content(soup):
180
- """
181
- Extract the main content from a webpage while filtering out boilerplate content.
182
- """
183
- if not soup:
184
- return ""
185
-
186
- # Remove unwanted elements
187
- for element in soup(['script', 'style', 'header', 'footer', 'nav', 'aside', 'form', 'noscript']):
188
- element.decompose()
189
-
190
- # Extract text from <p> tags
191
- p_tags = soup.find_all('p')
192
- if p_tags:
193
- content = ' '.join([p.get_text(strip=True, separator=' ') for p in p_tags])
194
- else:
195
- # Fallback to body content
196
- content = soup.get_text(separator=' ', strip=True)
197
-
198
- # Clean up the text
199
- content = re.sub(r'\s+', ' ', content)
200
-
201
- # Truncate content to a reasonable length (e.g., 1500 words)
202
- words = content.split()
203
- if len(words) > 1500:
204
- content = ' '.join(words[:1500])
205
-
206
- return content
207
-
208
- def get_page_metadata(soup):
209
- """
210
- Extract metadata from the webpage including title, description, and keywords.
211
- """
212
- metadata = {
213
- 'title': '',
214
- 'description': '',
215
- 'keywords': ''
216
- }
217
-
218
- if not soup:
219
- return metadata
220
-
221
- # Get title
222
- title_tag = soup.find('title')
223
- if title_tag and title_tag.string:
224
- metadata['title'] = title_tag.string.strip()
225
-
226
- # Get meta description
227
- meta_desc = (
228
- soup.find('meta', attrs={'name': 'description'}) or
229
- soup.find('meta', attrs={'property': 'og:description'}) or
230
- soup.find('meta', attrs={'name': 'twitter:description'})
231
- )
232
- if meta_desc:
233
- metadata['description'] = meta_desc.get('content', '').strip()
234
-
235
- # Get meta keywords
236
- meta_keywords = soup.find('meta', attrs={'name': 'keywords'})
237
- if meta_keywords:
238
- metadata['keywords'] = meta_keywords.get('content', '').strip()
239
-
240
- # Get OG title if main title is empty
241
- if not metadata['title']:
242
- og_title = soup.find('meta', attrs={'property': 'og:title'})
243
- if og_title:
244
- metadata['title'] = og_title.get('content', '').strip()
245
-
246
- return metadata
247
-
248
- def llm_worker(queue, model_name, api_key, rpm_bucket, tpm_bucket, batch_size):
249
- """
250
- Worker thread to process LLM tasks from the queue while respecting rate limits.
251
- """
252
- logger.info(f"LLM worker for {model_name} started.")
253
- while True:
254
- batch = []
255
- try:
256
- # Collect bookmarks up to batch_size
257
- while len(batch) < batch_size:
258
- bookmark = queue.get(timeout=1)
259
- if bookmark is None:
260
- # Shutdown signal
261
- logger.info(f"LLM worker for {model_name} shutting down.")
262
- return
263
- if not bookmark.get('dead_link') and not bookmark.get('slow_link'):
264
- batch.append(bookmark)
265
- else:
266
- # Skip processing for dead or slow links
267
- bookmark['summary'] = 'No summary available.'
268
- bookmark['category'] = 'Uncategorized'
269
- queue.task_done()
270
-
271
- except Empty:
272
- pass # No more bookmarks at the moment
273
-
274
- if batch:
275
- try:
276
- # Rate Limiting
277
- rpm_bucket.wait_for_token()
278
- # Estimate tokens: prompt + max_tokens
279
- # Here, we assume max_tokens=150 per bookmark
280
- total_tokens = 150 * len(batch)
281
- tpm_bucket.wait_for_token(tokens=total_tokens)
282
-
283
- # Prepare prompt
284
- prompt = '''
285
- You are an assistant that creates concise webpage summaries and assigns categories.
286
- Provide summaries and categories for the following bookmarks:
287
-
288
- '''
289
-
290
- for idx, bookmark in enumerate(batch, 1):
291
- prompt += f'Bookmark {idx}:\nURL: {bookmark["url"]}\nTitle: {bookmark["title"]}\n\n'
292
-
293
- # Corrected f-string without backslashes
294
- categories_str = ', '.join([f'"{cat}"' for cat in CATEGORIES])
295
- prompt += f"Categories:\n{categories_str}\n\n"
296
-
297
- prompt += "Format your response as a JSON object where each key is the bookmark URL and the value is another JSON object containing 'summary' and 'category'.\n\n"
298
- prompt += "Example:\n"
299
- prompt += "{\n"
300
- prompt += ' "https://example.com": {\n'
301
- prompt += ' "summary": "This is an example summary.",\n'
302
- prompt += ' "category": "Technology"\n'
303
- prompt += " }\n"
304
- prompt += "}\n\n"
305
- prompt += "Now, provide the summaries and categories for the bookmarks listed above."
306
-
307
- # Set API key and model
308
- openai.api_key = api_key
309
-
310
- response = openai.ChatCompletion.create(
311
- model=model_name,
312
- messages=[
313
- {"role": "user", "content": prompt}
314
- ],
315
- max_tokens=150 * len(batch),
316
- temperature=0.5,
317
- )
318
-
319
- content = response['choices'][0]['message']['content'].strip()
320
- if not content:
321
- raise ValueError("Empty response received from the model.")
322
-
323
- # Parse JSON response
324
- try:
325
- json_response = json.loads(content)
326
- for bookmark in batch:
327
- url = bookmark['url']
328
- if url in json_response:
329
- summary = json_response[url].get('summary', '').strip()
330
- category = json_response[url].get('category', '').strip()
331
-
332
- if not summary:
333
- summary = 'No summary available.'
334
- bookmark['summary'] = summary
335
-
336
- if category in CATEGORIES:
337
- bookmark['category'] = category
338
- else:
339
- # Fallback to keyword-based categorization
340
- bookmark['category'] = categorize_based_on_summary(summary, url)
341
- else:
342
- logger.warning(f"No data returned for {url}. Using fallback methods.")
343
- bookmark['summary'] = 'No summary available.'
344
- bookmark['category'] = 'Uncategorized'
345
-
346
- # Additional keyword-based validation
347
- bookmark['category'] = validate_category(bookmark)
348
-
349
- logger.info(f"Processed bookmark: {url}")
350
-
351
- except json.JSONDecodeError:
352
- logger.error(f"Failed to parse JSON response from {model_name}. Using fallback methods.")
353
- for bookmark in batch:
354
- bookmark['summary'] = 'No summary available.'
355
- bookmark['category'] = categorize_based_on_summary(bookmark.get('summary', ''), bookmark['url'])
356
- bookmark['category'] = validate_category(bookmark)
357
-
358
- except Exception as e:
359
- logger.error(f"Error processing LLM response from {model_name}: {e}", exc_info=True)
360
- for bookmark in batch:
361
- bookmark['summary'] = 'No summary available.'
362
- bookmark['category'] = 'Uncategorized'
363
-
364
- except openai.error.RateLimitError:
365
- logger.warning(f"Rate limit reached for {model_name}. Fallback to other model if possible.")
366
- # Re-enqueue the entire batch to the other queue
367
- if model_name == MODEL_BASIC:
368
- target_queue = llm_queue_advanced
369
- target_model = MODEL_ADVANCED
370
- target_api_key = GROQ_API_KEY_ADVANCED
371
- else:
372
- target_queue = llm_queue_basic
373
- target_model = MODEL_BASIC
374
- target_api_key = GROQ_API_KEY_BASIC
375
-
376
- for bookmark in batch:
377
- logger.info(f"Reassigning bookmark {bookmark['url']} to {target_model} due to rate limit.")
378
- target_queue.put(bookmark)
379
-
380
- except Exception as e:
381
- logger.error(f"Error during LLM processing for {model_name}: {e}", exc_info=True)
382
- for bookmark in batch:
383
- bookmark['summary'] = 'No summary available.'
384
- bookmark['category'] = 'Uncategorized'
385
-
386
- finally:
387
- # Mark all bookmarks in the batch as done
388
- for _ in batch:
389
- queue.task_done()
390
-
391
- def parse_bookmarks(file_content):
392
- """
393
- Parse bookmarks from HTML file.
394
- """
395
- logger.info("Parsing bookmarks")
396
- try:
397
- soup = BeautifulSoup(file_content, 'html.parser')
398
- extracted_bookmarks = []
399
- for link in soup.find_all('a'):
400
- url = link.get('href')
401
- title = link.text.strip()
402
- if url and title:
403
- if url.startswith('http://') or url.startswith('https://'):
404
- extracted_bookmarks.append({'url': url, 'title': title})
405
- else:
406
- logger.info(f"Skipping non-http/https URL: {url}")
407
- logger.info(f"Extracted {len(extracted_bookmarks)} bookmarks")
408
- return extracted_bookmarks
409
- except Exception as e:
410
- logger.error("Error parsing bookmarks: %s", e, exc_info=True)
411
- raise
412
 
 
413
  def fetch_url_info(bookmark):
414
- """
415
- Fetch information about a URL.
416
- """
417
- url = bookmark['url']
418
- if url in fetch_cache:
419
- with lock:
420
- bookmark.update(fetch_cache[url])
421
- return
422
-
423
  try:
424
- logger.info(f"Fetching URL info for: {url}")
425
- headers = {
426
- 'User-Agent': 'Mozilla/5.0',
427
- 'Accept-Language': 'en-US,en;q=0.9',
428
- }
429
- response = requests.get(url, headers=headers, timeout=5, verify=False, allow_redirects=True)
430
- bookmark['etag'] = response.headers.get('ETag', 'N/A')
431
  bookmark['status_code'] = response.status_code
432
-
433
- content = response.text
434
- logger.info(f"Fetched content length for {url}: {len(content)} characters")
435
-
436
- if response.status_code >= 500:
437
- bookmark['dead_link'] = True
438
- bookmark['description'] = ''
439
- bookmark['html_content'] = ''
440
- logger.warning(f"Dead link detected: {url} with status {response.status_code}")
441
- else:
442
- bookmark['dead_link'] = False
443
- bookmark['html_content'] = content
444
- bookmark['description'] = ''
445
- logger.info(f"Fetched information for {url}")
446
-
447
- except requests.exceptions.Timeout:
448
- bookmark['dead_link'] = False
449
- bookmark['etag'] = 'N/A'
450
- bookmark['status_code'] = 'Timeout'
451
- bookmark['description'] = ''
452
- bookmark['html_content'] = ''
453
- bookmark['slow_link'] = True
454
- logger.warning(f"Timeout while fetching {url}. Marking as 'Slow'.")
455
  except Exception as e:
456
- bookmark['dead_link'] = True
457
- bookmark['etag'] = 'N/A'
458
- bookmark['status_code'] = 'Error'
459
- bookmark['description'] = ''
460
  bookmark['html_content'] = ''
461
- logger.error(f"Error fetching URL info for {url}: {e}", exc_info=True)
462
- finally:
463
- with lock:
464
- fetch_cache[url] = {
465
- 'etag': bookmark.get('etag'),
466
- 'status_code': bookmark.get('status_code'),
467
- 'dead_link': bookmark.get('dead_link'),
468
- 'description': bookmark.get('description'),
469
- 'html_content': bookmark.get('html_content', ''),
470
- 'slow_link': bookmark.get('slow_link', False),
471
- }
472
-
473
- def vectorize_and_index(bookmarks_list):
474
- """
475
- Create vector embeddings for bookmarks and build FAISS index with ID mapping.
476
- """
477
- global faiss_index
478
- logger.info("Vectorizing summaries and building FAISS index")
479
- try:
480
- summaries = [bookmark['summary'] for bookmark in bookmarks_list]
481
- embeddings = embedding_model.encode(summaries)
482
- dimension = embeddings.shape[1]
483
- index = faiss.IndexIDMap(faiss.IndexFlatL2(dimension))
484
- ids = np.array([bookmark['id'] for bookmark in bookmarks_list], dtype=np.int64)
485
- index.add_with_ids(np.array(embeddings).astype('float32'), ids)
486
- faiss_index = index
487
- logger.info("FAISS index built successfully with IDs")
488
- return index
489
- except Exception as e:
490
- logger.error(f"Error in vectorizing and indexing: {e}", exc_info=True)
491
- raise
492
-
493
- def display_bookmarks():
494
- """
495
- Generate HTML display for bookmarks.
496
- """
497
- logger.info("Generating HTML display for bookmarks")
498
- cards = ''
499
- for i, bookmark in enumerate(bookmarks):
500
- index = i + 1
501
- if bookmark.get('dead_link'):
502
- status = "❌ Dead Link"
503
- card_style = "border: 2px solid red;"
504
- text_style = "color: white;"
505
- summary = 'No summary available.'
506
- elif bookmark.get('slow_link'):
507
- status = "⏳ Slow Response"
508
- card_style = "border: 2px solid orange;"
509
- text_style = "color: white;"
510
- summary = bookmark.get('summary', 'No summary available.')
511
- else:
512
- status = "✅ Active"
513
- card_style = "border: 2px solid green;"
514
- text_style = "color: white;"
515
- summary = bookmark.get('summary', 'No summary available.')
516
-
517
- title = bookmark['title']
518
- url = bookmark['url']
519
- etag = bookmark.get('etag', 'N/A')
520
- category = bookmark.get('category', 'Uncategorized')
521
-
522
- # Escape HTML content to prevent XSS attacks
523
- from html import escape
524
- title = escape(title)
525
- url = escape(url)
526
- summary = escape(summary)
527
- category = escape(category)
528
-
529
- card_html = f'''
530
- <div class="card" style="{card_style} padding: 10px; margin: 10px; border-radius: 5px; background-color: #1e1e1e;">
531
- <div class="card-content">
532
- <h3 style="{text_style}">{index}. {title} {status}</h3>
533
- <p style="{text_style}"><strong>Category:</strong> {category}</p>
534
- <p style="{text_style}"><strong>URL:</strong> <a href="{url}" target="_blank" style="{text_style}">{url}</a></p>
535
- <p style="{text_style}"><strong>ETag:</strong> {etag}</p>
536
- <p style="{text_style}"><strong>Summary:</strong> {summary}</p>
537
- </div>
538
- </div>
539
- '''
540
- cards += card_html
541
- logger.info("HTML display generated")
542
- return cards
543
-
544
- def process_uploaded_file(file, state_bookmarks):
545
- """
546
- Process the uploaded bookmarks file.
547
- """
548
- global bookmarks, faiss_index
549
- logger.info("Processing uploaded file")
550
-
551
- if file is None:
552
- logger.warning("No file uploaded")
553
- return "Please upload a bookmarks HTML file.", '', state_bookmarks, display_bookmarks(), gr.update(choices=[])
554
-
555
- try:
556
- file_content = file.decode('utf-8')
557
- except UnicodeDecodeError as e:
558
- logger.error(f"Error decoding the file: {e}", exc_info=True)
559
- return "Error decoding the file. Please ensure it's a valid HTML file.", '', state_bookmarks, display_bookmarks(), gr.update(choices=[])
560
-
561
- try:
562
- bookmarks = parse_bookmarks(file_content)
563
- except Exception as e:
564
- logger.error(f"Error parsing bookmarks: {e}", exc_info=True)
565
- return "Error parsing the bookmarks HTML file.", '', state_bookmarks, display_bookmarks(), gr.update(choices=[])
566
-
567
- if not bookmarks:
568
- logger.warning("No bookmarks found in the uploaded file")
569
- return "No bookmarks found in the uploaded file.", '', state_bookmarks, display_bookmarks(), gr.update(choices=[])
570
-
571
- # Assign unique IDs to bookmarks
572
- for idx, bookmark in enumerate(bookmarks):
573
- bookmark['id'] = idx
574
-
575
- # Fetch bookmark info concurrently
576
- logger.info("Fetching URL info concurrently")
577
- with ThreadPoolExecutor(max_workers=10) as executor:
578
- executor.map(fetch_url_info, bookmarks)
579
-
580
- # Enqueue bookmarks for LLM processing based on task complexity
581
- logger.info("Enqueuing bookmarks for LLM processing")
582
- for bookmark in bookmarks:
583
- # Determine task complexity
584
- # Example logic: Assign to basic model if title is short, else to advanced
585
- if len(bookmark['title']) < 50:
586
- llm_queue_basic.put(bookmark)
587
- else:
588
- llm_queue_advanced.put(bookmark)
589
-
590
- # Wait until all LLM tasks are completed
591
- llm_queue_basic.join()
592
- llm_queue_advanced.join()
593
- logger.info("All LLM tasks have been processed")
594
-
595
- try:
596
- faiss_index = vectorize_and_index(bookmarks)
597
- except Exception as e:
598
- logger.error(f"Error building FAISS index: {e}", exc_info=True)
599
- return "Error building search index.", '', state_bookmarks, display_bookmarks(), gr.update(choices=[])
600
-
601
- message = f"✅ Successfully processed {len(bookmarks)} bookmarks."
602
- logger.info(message)
603
-
604
- # Generate displays and updates
605
- bookmark_html = display_bookmarks()
606
- choices = [f"{i+1}. {bookmark['title']} (Category: {bookmark['category']})"
607
- for i, bookmark in enumerate(bookmarks)]
608
-
609
- # Update state
610
- state_bookmarks = bookmarks.copy()
611
-
612
- return message, bookmark_html, state_bookmarks, bookmark_html, gr.update(choices=choices)
613
-
614
- def delete_selected_bookmarks(selected_indices, state_bookmarks):
615
- """
616
- Delete selected bookmarks and remove their vectors from the FAISS index.
617
- """
618
- global bookmarks, faiss_index
619
- if not selected_indices:
620
- return "⚠️ No bookmarks selected.", gr.update(choices=[]), display_bookmarks()
621
-
622
- ids_to_delete = []
623
- indices_to_delete = []
624
- for s in selected_indices:
625
- idx = int(s.split('.')[0]) - 1
626
- if 0 <= idx < len(bookmarks):
627
- bookmark_id = bookmarks[idx]['id']
628
- ids_to_delete.append(bookmark_id)
629
- indices_to_delete.append(idx)
630
- logger.info(f"Deleting bookmark at index {idx + 1}")
631
-
632
- # Remove vectors from FAISS index
633
- if faiss_index is not None and ids_to_delete:
634
- faiss_index.remove_ids(np.array(ids_to_delete, dtype=np.int64))
635
-
636
- # Remove bookmarks from the list (reverse order to avoid index shifting)
637
- for idx in sorted(indices_to_delete, reverse=True):
638
- bookmarks.pop(idx)
639
-
640
- message = "🗑️ Selected bookmarks deleted successfully."
641
- logger.info(message)
642
- choices = [f"{i+1}. {bookmark['title']} (Category: {bookmark['category']})"
643
- for i, bookmark in enumerate(bookmarks)]
644
-
645
- # Update state
646
- state_bookmarks = bookmarks.copy()
647
-
648
- return message, gr.update(choices=choices), display_bookmarks()
649
-
650
- def edit_selected_bookmarks_category(selected_indices, new_category, state_bookmarks):
651
- """
652
- Edit category of selected bookmarks.
653
- """
654
- if not selected_indices:
655
- return "⚠️ No bookmarks selected.", gr.update(choices=[]), display_bookmarks(), state_bookmarks
656
- if not new_category:
657
- return "⚠️ No new category selected.", gr.update(choices=[]), display_bookmarks(), state_bookmarks
658
-
659
- indices = [int(s.split('.')[0])-1 for s in selected_indices]
660
- for idx in indices:
661
- if 0 <= idx < len(bookmarks):
662
- bookmarks[idx]['category'] = new_category
663
- logger.info(f"Updated category for bookmark {idx + 1} to {new_category}")
664
-
665
- message = "✏️ Category updated for selected bookmarks."
666
- logger.info(message)
667
-
668
- # Update choices and display
669
- choices = [f"{i+1}. {bookmark['title']} (Category: {bookmark['category']})"
670
- for i, bookmark in enumerate(bookmarks)]
671
-
672
- # Update state
673
- state_bookmarks = bookmarks.copy()
674
 
675
- return message, gr.update(choices=choices), display_bookmarks(), state_bookmarks
 
 
 
676
 
677
- def export_bookmarks():
678
- """
679
- Export bookmarks to an HTML file.
680
- """
681
- if not bookmarks:
682
- logger.warning("No bookmarks to export")
683
- return None
684
 
685
- try:
686
- logger.info("Exporting bookmarks to HTML")
687
- soup = BeautifulSoup("<!DOCTYPE NETSCAPE-Bookmark-file-1><Title>Bookmarks</Title><H1>Bookmarks</H1>", 'html.parser')
688
- dl = soup.new_tag('DL')
689
- for bookmark in bookmarks:
690
- dt = soup.new_tag('DT')
691
- a = soup.new_tag('A', href=bookmark['url'])
692
- a.string = bookmark['title']
693
- dt.append(a)
694
- dl.append(dt)
695
- soup.append(dl)
696
- html_content = str(soup)
697
- output_file = "exported_bookmarks.html"
698
- with open(output_file, 'w', encoding='utf-8') as f:
699
- f.write(html_content)
700
- logger.info("Bookmarks exported successfully")
701
- return output_file
702
- except Exception as e:
703
- logger.error(f"Error exporting bookmarks: {e}", exc_info=True)
704
- return None
705
 
706
- def chatbot_response(user_query, chat_history):
 
 
707
  """
708
- Generate chatbot response using the FAISS index and embeddings.
709
- """
710
- if not bookmarks or faiss_index is None:
711
- logger.warning("No bookmarks available for chatbot")
712
- chat_history.append({"role": "assistant", "content": "⚠️ No bookmarks available. Please upload and process your bookmarks first."})
713
- return chat_history
714
-
715
- logger.info(f"Chatbot received query: {user_query}")
716
 
717
  try:
718
- chat_history.append({"role": "user", "content": user_query})
719
-
720
- # Rate Limiting
721
- # Assuming the chatbot uses the advanced model
722
- rpm_bucket_advanced.wait_for_token()
723
- # Estimate tokens: prompt + max_tokens
724
- # Here, we assume max_tokens=300 per chatbot response
725
- total_tokens = 300 # Adjust based on actual usage
726
- tpm_bucket_advanced.wait_for_token(tokens=total_tokens)
727
-
728
- query_vector = embedding_model.encode([user_query]).astype('float32')
729
- k = 5
730
- distances, ids = faiss_index.search(query_vector, k)
731
- ids = ids.flatten()
732
-
733
- id_to_bookmark = {bookmark['id']: bookmark for bookmark in bookmarks}
734
- matching_bookmarks = [id_to_bookmark.get(id) for id in ids if id in id_to_bookmark and id_to_bookmark.get(id).get('summary')]
735
-
736
- if not matching_bookmarks:
737
- answer = "No relevant bookmarks found for your query."
738
- chat_history.append({"role": "assistant", "content": answer})
739
- return chat_history
740
-
741
- bookmarks_info = "\n".join([
742
- f"Title: {bookmark['title']}\nURL: {bookmark['url']}\nSummary: {bookmark['summary']}"
743
- for bookmark in matching_bookmarks
744
- ])
745
-
746
- prompt = f'''
747
- A user asked: "{user_query}"
748
- Based on the bookmarks below, provide a helpful answer to the user's query, referencing the relevant bookmarks.
749
- Bookmarks:
750
- {bookmarks_info}
751
- Provide a concise and helpful response.
752
- '''
753
-
754
- # Use the advanced model for chatbot responses
755
- openai.api_key = GROQ_API_KEY_ADVANCED
756
- response = openai.ChatCompletion.create(
757
- model=MODEL_ADVANCED, # Retaining the original model
758
- messages=[
759
- {"role": "user", "content": prompt}
760
- ],
761
- max_tokens=300,
762
- temperature=0.7,
763
  )
764
-
765
- answer = response['choices'][0]['message']['content'].strip()
766
- logger.info("Chatbot response generated")
767
-
768
- chat_history.append({"role": "assistant", "content": answer})
769
- return chat_history
770
-
771
- except openai.error.RateLimitError:
772
- wait_time = int(60) # Wait time can be adjusted or extracted from headers if available
773
- logger.warning(f"Rate limit reached for chatbot. Waiting for {wait_time} seconds before retrying...")
774
- time.sleep(wait_time)
775
- return chatbot_response(user_query, chat_history)
776
  except Exception as e:
777
- error_message = f"⚠️ Error processing your query: {str(e)}"
778
- logger.error(error_message, exc_info=True)
779
- chat_history.append({"role": "assistant", "content": error_message})
780
- return chat_history
781
-
782
- def build_app():
783
- """
784
- Build and launch the Gradio app.
785
- """
786
- try:
787
- logger.info("Building Gradio app")
788
- with gr.Blocks(css="app.css") as demo:
789
- # Initialize state
790
- state_bookmarks = gr.State([])
791
-
792
- # General Overview
793
- gr.Markdown("""
794
- # 📚 SmartMarks - AI Browser Bookmarks Manager
795
-
796
- Welcome to **SmartMarks**, your intelligent assistant for managing browser bookmarks. SmartMarks leverages AI to help you organize, search, and interact with your bookmarks seamlessly.
797
-
798
- ---
799
-
800
- ## 🚀 **How to Use SmartMarks**
801
-
802
- SmartMarks is divided into three main sections:
803
-
804
- 1. **📂 Upload and Process Bookmarks:** Import your existing bookmarks and let SmartMarks analyze and categorize them for you.
805
- 2. **💬 Chat with Bookmarks:** Interact with your bookmarks using natural language queries to find relevant links effortlessly.
806
- 3. **🛠️ Manage Bookmarks:** View, edit, delete, and export your bookmarks with ease.
807
-
808
- Navigate through the tabs to explore each feature in detail.
809
- """)
810
-
811
- # Upload and Process Bookmarks Tab
812
- with gr.Tab("Upload and Process Bookmarks"):
813
- gr.Markdown("""
814
- ## 📂 **Upload and Process Bookmarks**
815
-
816
- ### 📝 **Steps to Upload and Process:**
817
-
818
- 1. **Upload Bookmarks File:**
819
- - Click on the **"📁 Upload Bookmarks HTML File"** button.
820
- - Select your browser's exported bookmarks HTML file from your device.
821
-
822
- 2. **Process Bookmarks:**
823
- - After uploading, click on the **"⚙️ Process Bookmarks"** button.
824
- - SmartMarks will parse your bookmarks, fetch additional information, generate summaries, and categorize each link based on predefined categories.
825
-
826
- 3. **View Processed Bookmarks:**
827
- - Once processing is complete, your bookmarks will be displayed in an organized and visually appealing format below.
828
- """)
829
-
830
- upload = gr.File(label="📁 Upload Bookmarks HTML File", type='binary')
831
- process_button = gr.Button("⚙️ Process Bookmarks")
832
- output_text = gr.Textbox(label="✅ Output", interactive=False)
833
- bookmark_display = gr.HTML(label="📄 Processed Bookmarks")
834
 
835
- # Chat with Bookmarks Tab
836
- with gr.Tab("Chat with Bookmarks"):
837
- gr.Markdown("""
838
- ## 💬 **Chat with Bookmarks**
839
-
840
- ### 🤖 **How to Interact:**
841
-
842
- 1. **Enter Your Query:**
843
- - In the **"✍️ Ask about your bookmarks"** textbox, type your question or keyword related to your bookmarks.
844
-
845
- 2. **Submit Your Query:**
846
- - Click the **"📨 Send"** button to submit your query.
847
-
848
- 3. **Receive AI-Driven Responses:**
849
- - SmartMarks will analyze your query and provide relevant bookmarks that match your request.
850
-
851
- 4. **View Chat History:**
852
- - All your queries and the corresponding AI responses are displayed in the chat history.
853
- """)
854
-
855
- chatbot = gr.Chatbot(label="💬 Chat with SmartMarks", type='messages')
856
- user_input = gr.Textbox(
857
- label="✍️ Ask about your bookmarks",
858
- placeholder="e.g., Do I have any bookmarks about AI?"
859
- )
860
- chat_button = gr.Button("📨 Send")
861
-
862
- chat_button.click(
863
- chatbot_response,
864
- inputs=[user_input, chatbot],
865
- outputs=chatbot
866
- )
867
-
868
- # Manage Bookmarks Tab
869
- with gr.Tab("Manage Bookmarks"):
870
- gr.Markdown("""
871
- ## 🛠️ **Manage Bookmarks**
872
-
873
- ### 🗂️ **Features:**
874
-
875
- 1. **View Bookmarks:**
876
- - All your processed bookmarks are displayed here with their respective categories and summaries.
877
-
878
- 2. **Select Bookmarks:**
879
- - Use the checkboxes next to each bookmark to select one, multiple, or all bookmarks you wish to manage.
880
-
881
- 3. **Delete Selected Bookmarks:**
882
- - After selecting the desired bookmarks, click the **"🗑️ Delete Selected"** button to remove them from your list.
883
-
884
- 4. **Edit Categories:**
885
- - Select the bookmarks you want to re-categorize.
886
- - Choose a new category from the dropdown menu labeled **"🆕 New Category"**.
887
- - Click the **"✏️ Edit Category"** button to update their categories.
888
-
889
- 5. **Export Bookmarks:**
890
- - Click the **"💾 Export"** button to download your updated bookmarks as an HTML file.
891
-
892
- 6. **Refresh Bookmarks:**
893
- - Click the **"🔄 Refresh Bookmarks"** button to ensure the latest state is reflected in the display.
894
- """)
895
-
896
- manage_output = gr.Textbox(label="🔄 Status", interactive=False)
897
-
898
- # CheckboxGroup for selecting bookmarks
899
- bookmark_selector = gr.CheckboxGroup(
900
- label="✅ Select Bookmarks",
901
- choices=[]
902
- )
903
-
904
- new_category = gr.Dropdown(
905
- label="🆕 New Category",
906
- choices=CATEGORIES,
907
- value="Uncategorized"
908
- )
909
- bookmark_display_manage = gr.HTML(label="📄 Bookmarks")
910
-
911
- with gr.Row():
912
- delete_button = gr.Button("🗑️ Delete Selected")
913
- edit_category_button = gr.Button("✏️ Edit Category")
914
- export_button = gr.Button("💾 Export")
915
- refresh_button = gr.Button("🔄 Refresh Bookmarks")
916
-
917
- download_link = gr.File(label="📥 Download Exported Bookmarks")
918
-
919
- # Connect all the button actions
920
- process_button.click(
921
- process_uploaded_file,
922
- inputs=[upload, state_bookmarks],
923
- outputs=[output_text, bookmark_display, state_bookmarks, bookmark_display, bookmark_selector]
924
- )
925
-
926
- delete_button.click(
927
- delete_selected_bookmarks,
928
- inputs=[bookmark_selector, state_bookmarks],
929
- outputs=[manage_output, bookmark_selector, bookmark_display_manage]
930
- )
931
 
932
- edit_category_button.click(
933
- edit_selected_bookmarks_category,
934
- inputs=[bookmark_selector, new_category, state_bookmarks],
935
- outputs=[manage_output, bookmark_selector, bookmark_display_manage, state_bookmarks]
936
- )
937
 
938
- export_button.click(
939
- export_bookmarks,
940
- outputs=download_link
941
- )
942
 
943
- refresh_button.click(
944
- lambda state_bookmarks: (
945
- [
946
- f"{i+1}. {bookmark['title']} (Category: {bookmark['category']})"
947
- for i, bookmark in enumerate(state_bookmarks)
948
- ],
949
- display_bookmarks()
950
- ),
951
- inputs=[state_bookmarks],
952
- outputs=[bookmark_selector, bookmark_display_manage]
953
- )
954
 
955
- logger.info("Launching Gradio app")
956
- demo.launch(debug=True)
957
- except Exception as e:
958
- logger.error(f"Error building Gradio app: {e}", exc_info=True)
959
- print(f"Error building Gradio app: {e}")
 
960
 
961
- if __name__ == "__main__":
962
- # Start the LLM worker threads before launching the app
963
- llm_thread_basic = threading.Thread(
964
- target=llm_worker,
965
- args=(llm_queue_basic, MODEL_BASIC, GROQ_API_KEY_BASIC, rpm_bucket_basic, tpm_bucket_basic, BATCH_SIZE_BASIC),
966
- daemon=True
967
- )
968
- llm_thread_advanced = threading.Thread(
969
- target=llm_worker,
970
- args=(llm_queue_advanced, MODEL_ADVANCED, GROQ_API_KEY_ADVANCED, rpm_bucket_advanced, tpm_bucket_advanced, BATCH_SIZE_ADVANCED),
971
- daemon=True
972
- )
973
 
974
- llm_thread_basic.start()
975
- llm_thread_advanced.start()
976
 
977
- build_app()
 
1
+ import os
2
+ import time
3
+ import threading
4
+ import requests
5
  from bs4 import BeautifulSoup
6
  from sentence_transformers import SentenceTransformer
7
  import faiss
8
  import numpy as np
9
+ import gradio as gr
 
 
 
 
 
 
 
 
10
  from concurrent.futures import ThreadPoolExecutor
11
+ import logging
12
 
13
+ # Suppress warnings from urllib3
 
 
 
14
  import urllib3
15
  urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
16
 
17
+ # Logging setup
18
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
19
  logger = logging.getLogger(__name__)
 
20
 
21
+ # Environment variable keys for API access
22
+ GROQ_API_KEY_BASIC = os.getenv('GROQ_API_KEY_BASIC')
23
+ GROQ_API_KEY_ADVANCED = os.getenv('GROQ_API_KEY_ADVANCED')
24
 
25
+ # LLM Models
26
+ MODEL_BASIC = 'llama-3.1-8b-instant'
27
+ MODEL_ADVANCED = 'llama-3.1-70b-versatile'
28
 
29
+ # Verify API keys
30
+ if not GROQ_API_KEY_BASIC or not GROQ_API_KEY_ADVANCED:
31
+ logger.error("Both GROQ_API_KEY_BASIC and GROQ_API_KEY_ADVANCED must be set.")
32
+ exit()
33
 
34
+ # Embedding model and FAISS index initialization
 
35
  embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
36
  faiss_index = None
37
  bookmarks = []
 
 
 
 
38
 
39
+ # Define categories
40
  CATEGORIES = [
41
+ "Social Media", "News and Media", "Education and Learning", "Entertainment",
42
+ "Shopping and E-commerce", "Finance and Banking", "Technology", "Health and Fitness",
43
+ "Travel and Tourism", "Food and Recipes", "Sports", "Arts and Culture",
44
+ "Government and Politics", "Business and Economy", "Science and Research",
45
+ "Personal Blogs and Journals", "Job Search and Careers", "Music and Audio",
46
+ "Videos and Movies", "Reference and Knowledge Bases", "Dead Link", "Uncategorized"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
  ]
48
 
49
+ # Task routing logic
50
+ def select_model_for_task(content_length):
51
+ """Choose LLM model based on task complexity."""
52
+ if content_length < 500: # Simple tasks
53
+ return GROQ_API_KEY_BASIC, MODEL_BASIC
54
+ else: # Complex tasks
55
+ return GROQ_API_KEY_ADVANCED, MODEL_ADVANCED
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
 
57
+ # Fetch URL info function
58
  def fetch_url_info(bookmark):
 
 
 
 
 
 
 
 
 
59
  try:
60
+ response = requests.get(bookmark['url'], timeout=10, verify=False)
61
+ bookmark['html_content'] = response.text
 
 
 
 
 
62
  bookmark['status_code'] = response.status_code
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
  except Exception as e:
64
+ logger.error(f"Failed to fetch URL info for {bookmark['url']}: {e}")
 
 
 
65
  bookmark['html_content'] = ''
66
+ bookmark['status_code'] = 'Error'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
 
68
+ # Generate summary and assign category
69
+ def generate_summary_and_assign_category(bookmark):
70
+ content_length = len(bookmark.get('html_content', ''))
71
+ api_key, model_name = select_model_for_task(content_length)
72
 
73
+ # Prepare the prompt
74
+ prompt = f"""
75
+ You are an assistant. Summarize the following webpage content:
76
+ {bookmark.get('html_content', '')}
 
 
 
77
 
78
+ Assign one category from this list: {', '.join(CATEGORIES)}.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
 
80
+ Respond in the format:
81
+ Summary: [Your summary]
82
+ Category: [One category]
83
  """
 
 
 
 
 
 
 
 
84
 
85
  try:
86
+ response = requests.post(
87
+ f"https://api.openai.com/v1/chat/completions",
88
+ headers={"Authorization": f"Bearer {api_key}"},
89
+ json={
90
+ "model": model_name,
91
+ "messages": [{"role": "user", "content": prompt}],
92
+ "max_tokens": 150,
93
+ "temperature": 0.7,
94
+ },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95
  )
96
+ result = response.json()
97
+ content = result['choices'][0]['message']['content']
98
+
99
+ # Extract summary and category
100
+ summary_start = content.find("Summary:")
101
+ category_start = content.find("Category:")
102
+ bookmark['summary'] = content[summary_start + 9:category_start].strip()
103
+ bookmark['category'] = content[category_start + 9:].strip()
 
 
 
 
104
  except Exception as e:
105
+ logger.error(f"Error processing LLM response for {bookmark['url']}: {e}")
106
+ bookmark['summary'] = 'No summary available.'
107
+ bookmark['category'] = 'Uncategorized'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
108
 
109
+ # Vectorize summaries and build FAISS index
110
+ def vectorize_and_index(bookmarks):
111
+ global faiss_index
112
+ summaries = [b['summary'] for b in bookmarks]
113
+ embeddings = embedding_model.encode(summaries)
114
+ dimension = embeddings.shape[1]
115
+ index = faiss.IndexIDMap(faiss.IndexFlatL2(dimension))
116
+ ids = np.arange(len(bookmarks))
117
+ index.add_with_ids(embeddings, ids)
118
+ faiss_index = index
119
+
120
+ # Gradio interface setup
121
+ def process_bookmarks(file):
122
+ global bookmarks
123
+ file_content = file.read().decode('utf-8')
124
+ soup = BeautifulSoup(file_content, 'html.parser')
125
+
126
+ # Parse bookmarks
127
+ bookmarks = [
128
+ {'url': link.get('href'), 'title': link.text, 'html_content': ''}
129
+ for link in soup.find_all('a') if link.get('href')
130
+ ]
131
+
132
+ # Fetch URLs concurrently
133
+ with ThreadPoolExecutor() as executor:
134
+ executor.map(fetch_url_info, bookmarks)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
135
 
136
+ # Process bookmarks with LLM
137
+ with ThreadPoolExecutor() as executor:
138
+ executor.map(generate_summary_and_assign_category, bookmarks)
 
 
139
 
140
+ # Build FAISS index
141
+ vectorize_and_index(bookmarks)
 
 
142
 
143
+ return bookmarks
 
 
 
 
 
 
 
 
 
 
144
 
145
+ # Build Gradio app
146
+ with gr.Blocks() as demo:
147
+ gr.Markdown("# Smart Bookmark Manager")
148
+ file_input = gr.File(label="Upload Bookmark File", type="binary")
149
+ submit_button = gr.Button("Process")
150
+ output = gr.Textbox(label="Output")
151
 
152
+ def handle_submit(file):
153
+ processed = process_bookmarks(file)
154
+ return "\n".join([f"{b['title']} - {b['category']}" for b in processed])
 
 
 
 
 
 
 
 
 
155
 
156
+ submit_button.click(handle_submit, inputs=file_input, outputs=output)
 
157
 
158
+ demo.launch()