Spaces:

siddhartharya
/

Bookmark-Manager

Sleeping

App Files Files Community

siddhartharya commited on Nov 25, 2024

Commit

0e041b2

•

1 Parent(s): 64190a2

Update app.py

Browse files

Files changed (1) hide show

app.py +35 -43

app.py CHANGED Viewed

@@ -12,6 +12,9 @@ import base64
 import logging
 import os
 import sys
 # Import OpenAI library
 import openai
@@ -38,6 +41,9 @@ faiss_index = None
 bookmarks = []
 fetch_cache = {}
 # Define the categories
 CATEGORIES = [
     "Social Media",
@@ -190,16 +196,12 @@ def generate_summary_and_assign_category(bookmark):
             if use_prior_knowledge:
                 prompt = f"""
 You are a knowledgeable assistant with up-to-date information as of 2023.
 URL: {bookmark.get('url')}
 Provide:
 1. A concise summary (max two sentences) about this website.
 2. Assign the most appropriate category from the list below.
 Categories:
 {', '.join([f'"{cat}"' for cat in CATEGORIES])}
 Format:
 Summary: [Your summary]
 Category: [One category]
@@ -207,17 +209,13 @@ Category: [One category]
             else:
                 prompt = f"""
 You are an assistant that creates concise webpage summaries and assigns categories.
 Content:
 {content_text}
 Provide:
 1. A concise summary (max two sentences) focusing on the main topic.
 2. Assign the most appropriate category from the list below.
 Categories:
 {', '.join([f'"{cat}"' for cat in CATEGORIES])}
 Format:
 Summary: [Your summary]
 Category: [One category]
@@ -232,13 +230,14 @@ Category: [One category]
             total_tokens = prompt_tokens + max_tokens
             # Calculate required delay
-            tokens_per_second = 6000 / 60  # 100 tokens per second
             required_delay = total_tokens / tokens_per_second
             sleep_time = max(required_delay, 1)
             # Call the LLM via Groq Cloud API
             response = openai.ChatCompletion.create(
-                model='llama-3.1-70b-versatile',
                 messages=[
                     {"role": "user", "content": prompt}
                 ],
@@ -302,7 +301,10 @@ def parse_bookmarks(file_content):
             url = link.get('href')
             title = link.text.strip()
             if url and title:
-                extracted_bookmarks.append({'url': url, 'title': title})
         logger.info(f"Extracted {len(extracted_bookmarks)} bookmarks")
         return extracted_bookmarks
     except Exception as e:
@@ -315,7 +317,8 @@ def fetch_url_info(bookmark):
     """
     url = bookmark['url']
     if url in fetch_cache:
-        bookmark.update(fetch_cache[url])
         return
     try:
@@ -360,14 +363,15 @@ def fetch_url_info(bookmark):
         bookmark['html_content'] = ''
         logger.error(f"Error fetching URL info for {url}: {e}", exc_info=True)
     finally:
-        fetch_cache[url] = {
-            'etag': bookmark.get('etag'),
-            'status_code': bookmark.get('status_code'),
-            'dead_link': bookmark.get('dead_link'),
-            'description': bookmark.get('description'),
-            'html_content': bookmark.get('html_content', ''),
-            'slow_link': bookmark.get('slow_link', False),
-        }
 def vectorize_and_index(bookmarks_list):
     """
@@ -468,18 +472,15 @@ def process_uploaded_file(file):
     for idx, bookmark in enumerate(bookmarks):
         bookmark['id'] = idx
-    # Fetch bookmark info sequentially
-    for bookmark in bookmarks:
-        fetch_url_info(bookmark)
-    # Process bookmarks sequentially with combined LLM call
-    for bookmark in bookmarks:
-        if bookmark.get('dead_link'):
-            bookmark['summary'] = 'No summary available.'
-            bookmark['category'] = 'Dead Link'
-            logger.info(f"Assigned category 'Dead Link' to bookmark: {bookmark.get('url')}")
-        else:
-            generate_summary_and_assign_category(bookmark)
     try:
         faiss_index = vectorize_and_index(bookmarks)
@@ -617,12 +618,9 @@ def chatbot_response(user_query):
         # Use the LLM via Groq Cloud API to generate a response
         prompt = f"""
 A user asked: "{user_query}"
 Based on the bookmarks below, provide a helpful answer to the user's query, referencing the relevant bookmarks.
 Bookmarks:
 {bookmarks_info}
 Provide a concise and helpful response.
 """
@@ -635,12 +633,13 @@ Provide a concise and helpful response.
         total_tokens = prompt_tokens + max_tokens
         # Calculate required delay
-        tokens_per_second = 6000 / 60  # 100 tokens per second
         required_delay = total_tokens / tokens_per_second
         sleep_time = max(required_delay, 1)
         response = openai.ChatCompletion.create(
-            model='llama-3.1-70b-versatile',
             messages=[
                 {"role": "user", "content": prompt}
             ],
@@ -672,15 +671,10 @@ def build_app():
             # General Overview
             gr.Markdown("""
             # 📚 SmartMarks - AI Browser Bookmarks Manager
             Welcome to **SmartMarks**, your intelligent assistant for managing browser bookmarks. SmartMarks leverages AI to help you organize, search, and interact with your bookmarks seamlessly.
             ---
             ## 🚀 **How to Use SmartMarks**
             SmartMarks is divided into three main sections:
             1. **📂 Upload and Process Bookmarks:** Import your existing bookmarks and let SmartMarks analyze and categorize them for you.
             2. **💬 Chat with Bookmarks:** Interact with your bookmarks using natural language queries to find relevant links effortlessly.
             3. **🛠️ Manage Bookmarks:** View, edit, delete, and export your bookmarks with ease.
@@ -690,7 +684,6 @@ def build_app():
             with gr.Tab("Upload and Process Bookmarks"):
                 gr.Markdown("""
                 ## 📂 **Upload and Process Bookmarks**
                 ### 📝 **Steps:**
                 1. Click on the "Upload Bookmarks HTML File" button
                 2. Select your bookmarks file
@@ -706,7 +699,6 @@ def build_app():
             with gr.Tab("Chat with Bookmarks"):
                 gr.Markdown("""
                 ## 💬 **Chat with Bookmarks**
                 Ask questions about your bookmarks and get relevant results.
                 """)

 import logging
 import os
 import sys
+import concurrent.futures
+from concurrent.futures import ThreadPoolExecutor
+import threading
 # Import OpenAI library
 import openai
 bookmarks = []
 fetch_cache = {}
+# Lock for thread-safe operations
+lock = threading.Lock()
 # Define the categories
 CATEGORIES = [
     "Social Media",
             if use_prior_knowledge:
                 prompt = f"""
 You are a knowledgeable assistant with up-to-date information as of 2023.
 URL: {bookmark.get('url')}
 Provide:
 1. A concise summary (max two sentences) about this website.
 2. Assign the most appropriate category from the list below.
 Categories:
 {', '.join([f'"{cat}"' for cat in CATEGORIES])}
 Format:
 Summary: [Your summary]
 Category: [One category]
             else:
                 prompt = f"""
 You are an assistant that creates concise webpage summaries and assigns categories.
 Content:
 {content_text}
 Provide:
 1. A concise summary (max two sentences) focusing on the main topic.
 2. Assign the most appropriate category from the list below.
 Categories:
 {', '.join([f'"{cat}"' for cat in CATEGORIES])}
 Format:
 Summary: [Your summary]
 Category: [One category]
             total_tokens = prompt_tokens + max_tokens
             # Calculate required delay
+            tokens_per_minute = 60000  # Adjust based on your rate limit
+            tokens_per_second = tokens_per_minute / 60
             required_delay = total_tokens / tokens_per_second
             sleep_time = max(required_delay, 1)
             # Call the LLM via Groq Cloud API
             response = openai.ChatCompletion.create(
+                model='llama-3.1-70b-versatile',  # Using the specified model
                 messages=[
                     {"role": "user", "content": prompt}
                 ],
             url = link.get('href')
             title = link.text.strip()
             if url and title:
+                if url.startswith('http://') or url.startswith('https://'):
+                    extracted_bookmarks.append({'url': url, 'title': title})
+                else:
+                    logger.info(f"Skipping non-http/https URL: {url}")
         logger.info(f"Extracted {len(extracted_bookmarks)} bookmarks")
         return extracted_bookmarks
     except Exception as e:
     """
     url = bookmark['url']
     if url in fetch_cache:
+        with lock:
+            bookmark.update(fetch_cache[url])
         return
     try:
         bookmark['html_content'] = ''
         logger.error(f"Error fetching URL info for {url}: {e}", exc_info=True)
     finally:
+        with lock:
+            fetch_cache[url] = {
+                'etag': bookmark.get('etag'),
+                'status_code': bookmark.get('status_code'),
+                'dead_link': bookmark.get('dead_link'),
+                'description': bookmark.get('description'),
+                'html_content': bookmark.get('html_content', ''),
+                'slow_link': bookmark.get('slow_link', False),
+            }
 def vectorize_and_index(bookmarks_list):
     """
     for idx, bookmark in enumerate(bookmarks):
         bookmark['id'] = idx
+    # Fetch bookmark info concurrently
+    logger.info("Fetching URL info concurrently")
+    with ThreadPoolExecutor(max_workers=20) as executor:
+        executor.map(fetch_url_info, bookmarks)
+    # Process bookmarks concurrently with LLM calls
+    logger.info("Processing bookmarks with LLM concurrently")
+    with ThreadPoolExecutor(max_workers=5) as executor:
+        executor.map(generate_summary_and_assign_category, bookmarks)
     try:
         faiss_index = vectorize_and_index(bookmarks)
         # Use the LLM via Groq Cloud API to generate a response
         prompt = f"""
 A user asked: "{user_query}"
 Based on the bookmarks below, provide a helpful answer to the user's query, referencing the relevant bookmarks.
 Bookmarks:
 {bookmarks_info}
 Provide a concise and helpful response.
 """
         total_tokens = prompt_tokens + max_tokens
         # Calculate required delay
+        tokens_per_minute = 60000  # Adjust based on your rate limit
+        tokens_per_second = tokens_per_minute / 60
         required_delay = total_tokens / tokens_per_second
         sleep_time = max(required_delay, 1)
         response = openai.ChatCompletion.create(
+            model='llama-3.1-70b-versatile',  # Using the specified model
             messages=[
                 {"role": "user", "content": prompt}
             ],
             # General Overview
             gr.Markdown("""
             # 📚 SmartMarks - AI Browser Bookmarks Manager
             Welcome to **SmartMarks**, your intelligent assistant for managing browser bookmarks. SmartMarks leverages AI to help you organize, search, and interact with your bookmarks seamlessly.
             ---
             ## 🚀 **How to Use SmartMarks**
             SmartMarks is divided into three main sections:
             1. **📂 Upload and Process Bookmarks:** Import your existing bookmarks and let SmartMarks analyze and categorize them for you.
             2. **💬 Chat with Bookmarks:** Interact with your bookmarks using natural language queries to find relevant links effortlessly.
             3. **🛠️ Manage Bookmarks:** View, edit, delete, and export your bookmarks with ease.
             with gr.Tab("Upload and Process Bookmarks"):
                 gr.Markdown("""
                 ## 📂 **Upload and Process Bookmarks**
                 ### 📝 **Steps:**
                 1. Click on the "Upload Bookmarks HTML File" button
                 2. Select your bookmarks file
             with gr.Tab("Chat with Bookmarks"):
                 gr.Markdown("""
                 ## 💬 **Chat with Bookmarks**
                 Ask questions about your bookmarks and get relevant results.
                 """)