siddhartharya commited on
Commit
0e041b2
β€’
1 Parent(s): 64190a2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +35 -43
app.py CHANGED
@@ -12,6 +12,9 @@ import base64
12
  import logging
13
  import os
14
  import sys
 
 
 
15
 
16
  # Import OpenAI library
17
  import openai
@@ -38,6 +41,9 @@ faiss_index = None
38
  bookmarks = []
39
  fetch_cache = {}
40
 
 
 
 
41
  # Define the categories
42
  CATEGORIES = [
43
  "Social Media",
@@ -190,16 +196,12 @@ def generate_summary_and_assign_category(bookmark):
190
  if use_prior_knowledge:
191
  prompt = f"""
192
  You are a knowledgeable assistant with up-to-date information as of 2023.
193
-
194
  URL: {bookmark.get('url')}
195
-
196
  Provide:
197
  1. A concise summary (max two sentences) about this website.
198
  2. Assign the most appropriate category from the list below.
199
-
200
  Categories:
201
  {', '.join([f'"{cat}"' for cat in CATEGORIES])}
202
-
203
  Format:
204
  Summary: [Your summary]
205
  Category: [One category]
@@ -207,17 +209,13 @@ Category: [One category]
207
  else:
208
  prompt = f"""
209
  You are an assistant that creates concise webpage summaries and assigns categories.
210
-
211
  Content:
212
  {content_text}
213
-
214
  Provide:
215
  1. A concise summary (max two sentences) focusing on the main topic.
216
  2. Assign the most appropriate category from the list below.
217
-
218
  Categories:
219
  {', '.join([f'"{cat}"' for cat in CATEGORIES])}
220
-
221
  Format:
222
  Summary: [Your summary]
223
  Category: [One category]
@@ -232,13 +230,14 @@ Category: [One category]
232
  total_tokens = prompt_tokens + max_tokens
233
 
234
  # Calculate required delay
235
- tokens_per_second = 6000 / 60 # 100 tokens per second
 
236
  required_delay = total_tokens / tokens_per_second
237
  sleep_time = max(required_delay, 1)
238
 
239
  # Call the LLM via Groq Cloud API
240
  response = openai.ChatCompletion.create(
241
- model='llama-3.1-70b-versatile',
242
  messages=[
243
  {"role": "user", "content": prompt}
244
  ],
@@ -302,7 +301,10 @@ def parse_bookmarks(file_content):
302
  url = link.get('href')
303
  title = link.text.strip()
304
  if url and title:
305
- extracted_bookmarks.append({'url': url, 'title': title})
 
 
 
306
  logger.info(f"Extracted {len(extracted_bookmarks)} bookmarks")
307
  return extracted_bookmarks
308
  except Exception as e:
@@ -315,7 +317,8 @@ def fetch_url_info(bookmark):
315
  """
316
  url = bookmark['url']
317
  if url in fetch_cache:
318
- bookmark.update(fetch_cache[url])
 
319
  return
320
 
321
  try:
@@ -360,14 +363,15 @@ def fetch_url_info(bookmark):
360
  bookmark['html_content'] = ''
361
  logger.error(f"Error fetching URL info for {url}: {e}", exc_info=True)
362
  finally:
363
- fetch_cache[url] = {
364
- 'etag': bookmark.get('etag'),
365
- 'status_code': bookmark.get('status_code'),
366
- 'dead_link': bookmark.get('dead_link'),
367
- 'description': bookmark.get('description'),
368
- 'html_content': bookmark.get('html_content', ''),
369
- 'slow_link': bookmark.get('slow_link', False),
370
- }
 
371
 
372
  def vectorize_and_index(bookmarks_list):
373
  """
@@ -468,18 +472,15 @@ def process_uploaded_file(file):
468
  for idx, bookmark in enumerate(bookmarks):
469
  bookmark['id'] = idx
470
 
471
- # Fetch bookmark info sequentially
472
- for bookmark in bookmarks:
473
- fetch_url_info(bookmark)
 
474
 
475
- # Process bookmarks sequentially with combined LLM call
476
- for bookmark in bookmarks:
477
- if bookmark.get('dead_link'):
478
- bookmark['summary'] = 'No summary available.'
479
- bookmark['category'] = 'Dead Link'
480
- logger.info(f"Assigned category 'Dead Link' to bookmark: {bookmark.get('url')}")
481
- else:
482
- generate_summary_and_assign_category(bookmark)
483
 
484
  try:
485
  faiss_index = vectorize_and_index(bookmarks)
@@ -617,12 +618,9 @@ def chatbot_response(user_query):
617
  # Use the LLM via Groq Cloud API to generate a response
618
  prompt = f"""
619
  A user asked: "{user_query}"
620
-
621
  Based on the bookmarks below, provide a helpful answer to the user's query, referencing the relevant bookmarks.
622
-
623
  Bookmarks:
624
  {bookmarks_info}
625
-
626
  Provide a concise and helpful response.
627
  """
628
 
@@ -635,12 +633,13 @@ Provide a concise and helpful response.
635
  total_tokens = prompt_tokens + max_tokens
636
 
637
  # Calculate required delay
638
- tokens_per_second = 6000 / 60 # 100 tokens per second
 
639
  required_delay = total_tokens / tokens_per_second
640
  sleep_time = max(required_delay, 1)
641
 
642
  response = openai.ChatCompletion.create(
643
- model='llama-3.1-70b-versatile',
644
  messages=[
645
  {"role": "user", "content": prompt}
646
  ],
@@ -672,15 +671,10 @@ def build_app():
672
  # General Overview
673
  gr.Markdown("""
674
  # πŸ“š SmartMarks - AI Browser Bookmarks Manager
675
-
676
  Welcome to **SmartMarks**, your intelligent assistant for managing browser bookmarks. SmartMarks leverages AI to help you organize, search, and interact with your bookmarks seamlessly.
677
-
678
  ---
679
-
680
  ## πŸš€ **How to Use SmartMarks**
681
-
682
  SmartMarks is divided into three main sections:
683
-
684
  1. **πŸ“‚ Upload and Process Bookmarks:** Import your existing bookmarks and let SmartMarks analyze and categorize them for you.
685
  2. **πŸ’¬ Chat with Bookmarks:** Interact with your bookmarks using natural language queries to find relevant links effortlessly.
686
  3. **πŸ› οΈ Manage Bookmarks:** View, edit, delete, and export your bookmarks with ease.
@@ -690,7 +684,6 @@ def build_app():
690
  with gr.Tab("Upload and Process Bookmarks"):
691
  gr.Markdown("""
692
  ## πŸ“‚ **Upload and Process Bookmarks**
693
-
694
  ### πŸ“ **Steps:**
695
  1. Click on the "Upload Bookmarks HTML File" button
696
  2. Select your bookmarks file
@@ -706,7 +699,6 @@ def build_app():
706
  with gr.Tab("Chat with Bookmarks"):
707
  gr.Markdown("""
708
  ## πŸ’¬ **Chat with Bookmarks**
709
-
710
  Ask questions about your bookmarks and get relevant results.
711
  """)
712
 
 
12
  import logging
13
  import os
14
  import sys
15
+ import concurrent.futures
16
+ from concurrent.futures import ThreadPoolExecutor
17
+ import threading
18
 
19
  # Import OpenAI library
20
  import openai
 
41
  bookmarks = []
42
  fetch_cache = {}
43
 
44
+ # Lock for thread-safe operations
45
+ lock = threading.Lock()
46
+
47
  # Define the categories
48
  CATEGORIES = [
49
  "Social Media",
 
196
  if use_prior_knowledge:
197
  prompt = f"""
198
  You are a knowledgeable assistant with up-to-date information as of 2023.
 
199
  URL: {bookmark.get('url')}
 
200
  Provide:
201
  1. A concise summary (max two sentences) about this website.
202
  2. Assign the most appropriate category from the list below.
 
203
  Categories:
204
  {', '.join([f'"{cat}"' for cat in CATEGORIES])}
 
205
  Format:
206
  Summary: [Your summary]
207
  Category: [One category]
 
209
  else:
210
  prompt = f"""
211
  You are an assistant that creates concise webpage summaries and assigns categories.
 
212
  Content:
213
  {content_text}
 
214
  Provide:
215
  1. A concise summary (max two sentences) focusing on the main topic.
216
  2. Assign the most appropriate category from the list below.
 
217
  Categories:
218
  {', '.join([f'"{cat}"' for cat in CATEGORIES])}
 
219
  Format:
220
  Summary: [Your summary]
221
  Category: [One category]
 
230
  total_tokens = prompt_tokens + max_tokens
231
 
232
  # Calculate required delay
233
+ tokens_per_minute = 60000 # Adjust based on your rate limit
234
+ tokens_per_second = tokens_per_minute / 60
235
  required_delay = total_tokens / tokens_per_second
236
  sleep_time = max(required_delay, 1)
237
 
238
  # Call the LLM via Groq Cloud API
239
  response = openai.ChatCompletion.create(
240
+ model='llama-3.1-70b-versatile', # Using the specified model
241
  messages=[
242
  {"role": "user", "content": prompt}
243
  ],
 
301
  url = link.get('href')
302
  title = link.text.strip()
303
  if url and title:
304
+ if url.startswith('http://') or url.startswith('https://'):
305
+ extracted_bookmarks.append({'url': url, 'title': title})
306
+ else:
307
+ logger.info(f"Skipping non-http/https URL: {url}")
308
  logger.info(f"Extracted {len(extracted_bookmarks)} bookmarks")
309
  return extracted_bookmarks
310
  except Exception as e:
 
317
  """
318
  url = bookmark['url']
319
  if url in fetch_cache:
320
+ with lock:
321
+ bookmark.update(fetch_cache[url])
322
  return
323
 
324
  try:
 
363
  bookmark['html_content'] = ''
364
  logger.error(f"Error fetching URL info for {url}: {e}", exc_info=True)
365
  finally:
366
+ with lock:
367
+ fetch_cache[url] = {
368
+ 'etag': bookmark.get('etag'),
369
+ 'status_code': bookmark.get('status_code'),
370
+ 'dead_link': bookmark.get('dead_link'),
371
+ 'description': bookmark.get('description'),
372
+ 'html_content': bookmark.get('html_content', ''),
373
+ 'slow_link': bookmark.get('slow_link', False),
374
+ }
375
 
376
  def vectorize_and_index(bookmarks_list):
377
  """
 
472
  for idx, bookmark in enumerate(bookmarks):
473
  bookmark['id'] = idx
474
 
475
+ # Fetch bookmark info concurrently
476
+ logger.info("Fetching URL info concurrently")
477
+ with ThreadPoolExecutor(max_workers=20) as executor:
478
+ executor.map(fetch_url_info, bookmarks)
479
 
480
+ # Process bookmarks concurrently with LLM calls
481
+ logger.info("Processing bookmarks with LLM concurrently")
482
+ with ThreadPoolExecutor(max_workers=5) as executor:
483
+ executor.map(generate_summary_and_assign_category, bookmarks)
 
 
 
 
484
 
485
  try:
486
  faiss_index = vectorize_and_index(bookmarks)
 
618
  # Use the LLM via Groq Cloud API to generate a response
619
  prompt = f"""
620
  A user asked: "{user_query}"
 
621
  Based on the bookmarks below, provide a helpful answer to the user's query, referencing the relevant bookmarks.
 
622
  Bookmarks:
623
  {bookmarks_info}
 
624
  Provide a concise and helpful response.
625
  """
626
 
 
633
  total_tokens = prompt_tokens + max_tokens
634
 
635
  # Calculate required delay
636
+ tokens_per_minute = 60000 # Adjust based on your rate limit
637
+ tokens_per_second = tokens_per_minute / 60
638
  required_delay = total_tokens / tokens_per_second
639
  sleep_time = max(required_delay, 1)
640
 
641
  response = openai.ChatCompletion.create(
642
+ model='llama-3.1-70b-versatile', # Using the specified model
643
  messages=[
644
  {"role": "user", "content": prompt}
645
  ],
 
671
  # General Overview
672
  gr.Markdown("""
673
  # πŸ“š SmartMarks - AI Browser Bookmarks Manager
 
674
  Welcome to **SmartMarks**, your intelligent assistant for managing browser bookmarks. SmartMarks leverages AI to help you organize, search, and interact with your bookmarks seamlessly.
 
675
  ---
 
676
  ## πŸš€ **How to Use SmartMarks**
 
677
  SmartMarks is divided into three main sections:
 
678
  1. **πŸ“‚ Upload and Process Bookmarks:** Import your existing bookmarks and let SmartMarks analyze and categorize them for you.
679
  2. **πŸ’¬ Chat with Bookmarks:** Interact with your bookmarks using natural language queries to find relevant links effortlessly.
680
  3. **πŸ› οΈ Manage Bookmarks:** View, edit, delete, and export your bookmarks with ease.
 
684
  with gr.Tab("Upload and Process Bookmarks"):
685
  gr.Markdown("""
686
  ## πŸ“‚ **Upload and Process Bookmarks**
 
687
  ### πŸ“ **Steps:**
688
  1. Click on the "Upload Bookmarks HTML File" button
689
  2. Select your bookmarks file
 
699
  with gr.Tab("Chat with Bookmarks"):
700
  gr.Markdown("""
701
  ## πŸ’¬ **Chat with Bookmarks**
 
702
  Ask questions about your bookmarks and get relevant results.
703
  """)
704