siddhartharya commited on
Commit
880f9ee
·
verified ·
1 Parent(s): 91c2e3c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +121 -45
app.py CHANGED
@@ -10,8 +10,15 @@ import asyncio
10
  import aiohttp
11
  import re
12
  import base64
 
 
 
 
 
 
13
 
14
  # Initialize models and variables
 
15
  embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
16
  faiss_index = None
17
  bookmarks = []
@@ -44,14 +51,20 @@ CATEGORIES = [
44
  ]
45
 
46
  def parse_bookmarks(file_content):
47
- soup = BeautifulSoup(file_content, 'html.parser')
48
- extracted_bookmarks = []
49
- for link in soup.find_all('a'):
50
- url = link.get('href')
51
- title = link.text.strip()
52
- if url and title:
53
- extracted_bookmarks.append({'url': url, 'title': title})
54
- return extracted_bookmarks
 
 
 
 
 
 
55
 
56
  async def fetch_url_info(session, bookmark):
57
  url = bookmark['url']
@@ -60,6 +73,7 @@ async def fetch_url_info(session, bookmark):
60
  return bookmark
61
 
62
  try:
 
63
  async with session.get(url, timeout=5) as response:
64
  bookmark['etag'] = response.headers.get('ETag', 'N/A')
65
  bookmark['status_code'] = response.status
@@ -67,6 +81,7 @@ async def fetch_url_info(session, bookmark):
67
  if response.status >= 400:
68
  bookmark['dead_link'] = True
69
  bookmark['description'] = ''
 
70
  else:
71
  bookmark['dead_link'] = False
72
  content = await response.text()
@@ -83,11 +98,13 @@ async def fetch_url_info(session, bookmark):
83
  description = ''
84
 
85
  bookmark['description'] = description
 
86
  except Exception as e:
87
  bookmark['dead_link'] = True
88
  bookmark['etag'] = 'N/A'
89
  bookmark['status_code'] = 'N/A'
90
  bookmark['description'] = ''
 
91
  finally:
92
  fetch_cache[url] = {
93
  'etag': bookmark.get('etag'),
@@ -98,12 +115,18 @@ async def fetch_url_info(session, bookmark):
98
  return bookmark
99
 
100
  async def process_bookmarks_async(bookmarks):
101
- async with aiohttp.ClientSession() as session:
102
- tasks = []
103
- for bookmark in bookmarks:
104
- task = asyncio.ensure_future(fetch_url_info(session, bookmark))
105
- tasks.append(task)
106
- await asyncio.gather(*tasks)
 
 
 
 
 
 
107
 
108
  def generate_summary(bookmark):
109
  description = bookmark.get('description', '')
@@ -115,11 +138,13 @@ def generate_summary(bookmark):
115
  bookmark['summary'] = title
116
  else:
117
  bookmark['summary'] = 'No summary available.'
 
118
  return bookmark
119
 
120
  def assign_category(bookmark):
121
  if bookmark.get('dead_link'):
122
  bookmark['category'] = 'Dead Link'
 
123
  return bookmark
124
 
125
  summary = bookmark.get('summary', '').lower()
@@ -153,22 +178,32 @@ def assign_category(bookmark):
153
  for keyword in keywords:
154
  if re.search(r'\b' + re.escape(keyword) + r'\b', summary):
155
  assigned_category = category
 
156
  break
157
  if assigned_category != 'Uncategorized':
158
  break
159
 
160
  bookmark['category'] = assigned_category
 
 
161
  return bookmark
162
 
163
  def vectorize_and_index(bookmarks):
164
- summaries = [bookmark['summary'] for bookmark in bookmarks]
165
- embeddings = embedding_model.encode(summaries)
166
- dimension = embeddings.shape[1]
167
- faiss_idx = faiss.IndexFlatL2(dimension)
168
- faiss_idx.add(np.array(embeddings))
169
- return faiss_idx, embeddings
 
 
 
 
 
 
170
 
171
  def display_bookmarks():
 
172
  cards = ''
173
  for i, bookmark in enumerate(bookmarks):
174
  index = i + 1 # Start index at 1
@@ -200,42 +235,67 @@ def display_bookmarks():
200
  </div>
201
  '''
202
  cards += card_html
 
203
  return cards
204
 
205
  def process_uploaded_file(file):
206
  global bookmarks, faiss_index
 
207
  if file is None:
 
208
  return "Please upload a bookmarks HTML file.", ''
209
  try:
210
  file_content = file.decode('utf-8')
211
- except UnicodeDecodeError:
 
212
  return "Error decoding the file. Please ensure it's a valid HTML file.", ''
213
 
214
- bookmarks = parse_bookmarks(file_content)
 
 
 
 
215
 
216
  if not bookmarks:
 
217
  return "No bookmarks found in the uploaded file.", ''
218
 
219
  # Asynchronously fetch bookmark info
220
- asyncio.run(process_bookmarks_async(bookmarks))
 
 
 
 
221
 
222
  # Generate summaries and assign categories
223
  for bookmark in bookmarks:
224
  generate_summary(bookmark)
225
  assign_category(bookmark)
226
 
227
- faiss_index, embeddings = vectorize_and_index(bookmarks)
 
 
 
 
 
228
  message = f"Successfully processed {len(bookmarks)} bookmarks."
 
229
  bookmark_html = display_bookmarks()
230
  return message, bookmark_html
231
 
232
  def chatbot_response(user_query):
233
  if faiss_index is None or not bookmarks:
 
234
  return "No bookmarks available. Please upload and process your bookmarks first."
235
 
 
236
  # Vectorize user query
237
- user_embedding = embedding_model.encode([user_query])
238
- D, I = faiss_index.search(np.array(user_embedding), k=5) # Retrieve top 5 matches
 
 
 
 
239
 
240
  # Generate response
241
  response = ""
@@ -244,6 +304,7 @@ def chatbot_response(user_query):
244
  bookmark = bookmarks[idx]
245
  index = idx + 1 # Start index at 1
246
  response += f"{index}. Title: {bookmark['title']}\nURL: {bookmark['url']}\nCategory: {bookmark.get('category', 'Uncategorized')}\nSummary: {bookmark['summary']}\n\n"
 
247
  return response.strip()
248
 
249
  def edit_bookmark(bookmark_idx, new_title, new_url, new_category):
@@ -251,7 +312,9 @@ def edit_bookmark(bookmark_idx, new_title, new_url, new_category):
251
  try:
252
  bookmark_idx = int(bookmark_idx) - 1 # Adjust index to match list (starting at 0)
253
  if bookmark_idx < 0 or bookmark_idx >= len(bookmarks):
 
254
  return "Invalid bookmark index.", display_bookmarks()
 
255
  bookmarks[bookmark_idx]['title'] = new_title
256
  bookmarks[bookmark_idx]['url'] = new_url
257
  bookmarks[bookmark_idx]['category'] = new_category
@@ -261,9 +324,11 @@ def edit_bookmark(bookmark_idx, new_title, new_url, new_category):
261
  # Rebuild the FAISS index
262
  faiss_index, embeddings = vectorize_and_index(bookmarks)
263
  message = "Bookmark updated successfully."
 
264
  updated_html = display_bookmarks()
265
  return message, updated_html
266
  except Exception as e:
 
267
  return f"Error: {str(e)}", display_bookmarks()
268
 
269
  def delete_bookmarks(indices_str):
@@ -271,8 +336,10 @@ def delete_bookmarks(indices_str):
271
  try:
272
  indices = [int(idx.strip()) - 1 for idx in indices_str.split(',') if idx.strip().isdigit()]
273
  indices = sorted(indices, reverse=True)
 
274
  for idx in indices:
275
  if 0 <= idx < len(bookmarks):
 
276
  bookmarks.pop(idx)
277
  # Rebuild the FAISS index
278
  if bookmarks:
@@ -280,31 +347,41 @@ def delete_bookmarks(indices_str):
280
  else:
281
  faiss_index = None
282
  message = "Selected bookmarks deleted successfully."
 
283
  updated_html = display_bookmarks()
284
  return message, updated_html
285
  except Exception as e:
 
286
  return f"Error: {str(e)}", display_bookmarks()
287
 
288
  def export_bookmarks():
289
  if not bookmarks:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
290
  return None
291
- # Create an HTML content similar to the imported bookmarks file
292
- soup = BeautifulSoup("<!DOCTYPE NETSCAPE-Bookmark-file-1><Title>Bookmarks</Title><H1>Bookmarks</H1>", 'html.parser')
293
- dl = soup.new_tag('DL')
294
- for bookmark in bookmarks:
295
- dt = soup.new_tag('DT')
296
- a = soup.new_tag('A', href=bookmark['url'])
297
- a.string = bookmark['title']
298
- dt.append(a)
299
- dl.append(dt)
300
- soup.append(dl)
301
- html_content = str(soup)
302
- # Encode the HTML content to base64 for download
303
- b64 = base64.b64encode(html_content.encode()).decode()
304
- href = f'data:text/html;base64,{b64}'
305
- return href
306
 
307
  def build_app():
 
308
  with gr.Blocks(css="app.css") as demo:
309
  gr.Markdown("<h1>Bookmark Manager App</h1>")
310
 
@@ -315,8 +392,7 @@ def build_app():
315
  bookmark_display = gr.HTML(label="Bookmarks")
316
 
317
  def update_bookmark_display(file):
318
- message, html_content = process_uploaded_file(file)
319
- return message, html_content
320
 
321
  process_button.click(
322
  update_bookmark_display,
@@ -354,8 +430,7 @@ def build_app():
354
  edit_button = gr.Button("Edit Bookmark")
355
 
356
  def update_manage_display():
357
- html_content = display_bookmarks()
358
- return html_content
359
 
360
  refresh_button.click(
361
  update_manage_display,
@@ -391,6 +466,7 @@ def build_app():
391
  # Initial load of the bookmarks display
392
  bookmark_display_manage.value = update_manage_display()
393
 
 
394
  demo.launch()
395
 
396
  if __name__ == "__main__":
 
10
  import aiohttp
11
  import re
12
  import base64
13
+ import logging
14
+
15
+ # Set up logging
16
+ logging.basicConfig(filename='app.log', level=logging.INFO,
17
+ format='%(asctime)s %(levelname)s %(name)s %(message)s')
18
+ logger = logging.getLogger(__name__)
19
 
20
  # Initialize models and variables
21
+ logger.info("Initializing models and variables")
22
  embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
23
  faiss_index = None
24
  bookmarks = []
 
51
  ]
52
 
53
  def parse_bookmarks(file_content):
54
+ logger.info("Parsing bookmarks")
55
+ try:
56
+ soup = BeautifulSoup(file_content, 'html.parser')
57
+ extracted_bookmarks = []
58
+ for link in soup.find_all('a'):
59
+ url = link.get('href')
60
+ title = link.text.strip()
61
+ if url and title:
62
+ extracted_bookmarks.append({'url': url, 'title': title})
63
+ logger.info(f"Extracted {len(extracted_bookmarks)} bookmarks")
64
+ return extracted_bookmarks
65
+ except Exception as e:
66
+ logger.error("Error parsing bookmarks: %s", e)
67
+ raise
68
 
69
  async def fetch_url_info(session, bookmark):
70
  url = bookmark['url']
 
73
  return bookmark
74
 
75
  try:
76
+ logger.info(f"Fetching URL info for: {url}")
77
  async with session.get(url, timeout=5) as response:
78
  bookmark['etag'] = response.headers.get('ETag', 'N/A')
79
  bookmark['status_code'] = response.status
 
81
  if response.status >= 400:
82
  bookmark['dead_link'] = True
83
  bookmark['description'] = ''
84
+ logger.warning(f"Dead link detected: {url} with status {response.status}")
85
  else:
86
  bookmark['dead_link'] = False
87
  content = await response.text()
 
98
  description = ''
99
 
100
  bookmark['description'] = description
101
+ logger.info(f"Fetched description for {url}")
102
  except Exception as e:
103
  bookmark['dead_link'] = True
104
  bookmark['etag'] = 'N/A'
105
  bookmark['status_code'] = 'N/A'
106
  bookmark['description'] = ''
107
+ logger.error(f"Error fetching URL info for {url}: {e}")
108
  finally:
109
  fetch_cache[url] = {
110
  'etag': bookmark.get('etag'),
 
115
  return bookmark
116
 
117
  async def process_bookmarks_async(bookmarks):
118
+ logger.info("Processing bookmarks asynchronously")
119
+ try:
120
+ async with aiohttp.ClientSession() as session:
121
+ tasks = []
122
+ for bookmark in bookmarks:
123
+ task = asyncio.ensure_future(fetch_url_info(session, bookmark))
124
+ tasks.append(task)
125
+ await asyncio.gather(*tasks)
126
+ logger.info("Completed processing bookmarks asynchronously")
127
+ except Exception as e:
128
+ logger.error(f"Error in asynchronous processing of bookmarks: {e}")
129
+ raise
130
 
131
  def generate_summary(bookmark):
132
  description = bookmark.get('description', '')
 
138
  bookmark['summary'] = title
139
  else:
140
  bookmark['summary'] = 'No summary available.'
141
+ logger.info(f"Generated summary for bookmark: {bookmark.get('url')}")
142
  return bookmark
143
 
144
  def assign_category(bookmark):
145
  if bookmark.get('dead_link'):
146
  bookmark['category'] = 'Dead Link'
147
+ logger.info(f"Assigned category 'Dead Link' to bookmark: {bookmark.get('url')}")
148
  return bookmark
149
 
150
  summary = bookmark.get('summary', '').lower()
 
178
  for keyword in keywords:
179
  if re.search(r'\b' + re.escape(keyword) + r'\b', summary):
180
  assigned_category = category
181
+ logger.info(f"Assigned category '{assigned_category}' to bookmark: {bookmark.get('url')}")
182
  break
183
  if assigned_category != 'Uncategorized':
184
  break
185
 
186
  bookmark['category'] = assigned_category
187
+ if assigned_category == 'Uncategorized':
188
+ logger.info(f"No matching category found for bookmark: {bookmark.get('url')}")
189
  return bookmark
190
 
191
  def vectorize_and_index(bookmarks):
192
+ logger.info("Vectorizing summaries and building FAISS index")
193
+ try:
194
+ summaries = [bookmark['summary'] for bookmark in bookmarks]
195
+ embeddings = embedding_model.encode(summaries)
196
+ dimension = embeddings.shape[1]
197
+ faiss_idx = faiss.IndexFlatL2(dimension)
198
+ faiss_idx.add(np.array(embeddings))
199
+ logger.info("FAISS index built successfully")
200
+ return faiss_idx, embeddings
201
+ except Exception as e:
202
+ logger.error(f"Error in vectorizing and indexing: {e}")
203
+ raise
204
 
205
  def display_bookmarks():
206
+ logger.info("Generating HTML display for bookmarks")
207
  cards = ''
208
  for i, bookmark in enumerate(bookmarks):
209
  index = i + 1 # Start index at 1
 
235
  </div>
236
  '''
237
  cards += card_html
238
+ logger.info("HTML display generated")
239
  return cards
240
 
241
  def process_uploaded_file(file):
242
  global bookmarks, faiss_index
243
+ logger.info("Processing uploaded file")
244
  if file is None:
245
+ logger.warning("No file uploaded")
246
  return "Please upload a bookmarks HTML file.", ''
247
  try:
248
  file_content = file.decode('utf-8')
249
+ except UnicodeDecodeError as e:
250
+ logger.error(f"Error decoding the file: {e}")
251
  return "Error decoding the file. Please ensure it's a valid HTML file.", ''
252
 
253
+ try:
254
+ bookmarks = parse_bookmarks(file_content)
255
+ except Exception as e:
256
+ logger.error(f"Error parsing bookmarks: {e}")
257
+ return "Error parsing the bookmarks HTML file.", ''
258
 
259
  if not bookmarks:
260
+ logger.warning("No bookmarks found in the uploaded file")
261
  return "No bookmarks found in the uploaded file.", ''
262
 
263
  # Asynchronously fetch bookmark info
264
+ try:
265
+ asyncio.run(process_bookmarks_async(bookmarks))
266
+ except Exception as e:
267
+ logger.error(f"Error processing bookmarks asynchronously: {e}")
268
+ return "Error processing bookmarks.", ''
269
 
270
  # Generate summaries and assign categories
271
  for bookmark in bookmarks:
272
  generate_summary(bookmark)
273
  assign_category(bookmark)
274
 
275
+ try:
276
+ faiss_index, embeddings = vectorize_and_index(bookmarks)
277
+ except Exception as e:
278
+ logger.error(f"Error building FAISS index: {e}")
279
+ return "Error building search index.", ''
280
+
281
  message = f"Successfully processed {len(bookmarks)} bookmarks."
282
+ logger.info(message)
283
  bookmark_html = display_bookmarks()
284
  return message, bookmark_html
285
 
286
  def chatbot_response(user_query):
287
  if faiss_index is None or not bookmarks:
288
+ logger.warning("No bookmarks available for chatbot")
289
  return "No bookmarks available. Please upload and process your bookmarks first."
290
 
291
+ logger.info(f"Chatbot received query: {user_query}")
292
  # Vectorize user query
293
+ try:
294
+ user_embedding = embedding_model.encode([user_query])
295
+ D, I = faiss_index.search(np.array(user_embedding), k=5) # Retrieve top 5 matches
296
+ except Exception as e:
297
+ logger.error(f"Error in chatbot vectorization or search: {e}")
298
+ return "Error processing your query."
299
 
300
  # Generate response
301
  response = ""
 
304
  bookmark = bookmarks[idx]
305
  index = idx + 1 # Start index at 1
306
  response += f"{index}. Title: {bookmark['title']}\nURL: {bookmark['url']}\nCategory: {bookmark.get('category', 'Uncategorized')}\nSummary: {bookmark['summary']}\n\n"
307
+ logger.info("Chatbot response generated")
308
  return response.strip()
309
 
310
  def edit_bookmark(bookmark_idx, new_title, new_url, new_category):
 
312
  try:
313
  bookmark_idx = int(bookmark_idx) - 1 # Adjust index to match list (starting at 0)
314
  if bookmark_idx < 0 or bookmark_idx >= len(bookmarks):
315
+ logger.warning(f"Invalid bookmark index for editing: {bookmark_idx + 1}")
316
  return "Invalid bookmark index.", display_bookmarks()
317
+ logger.info(f"Editing bookmark at index {bookmark_idx + 1}")
318
  bookmarks[bookmark_idx]['title'] = new_title
319
  bookmarks[bookmark_idx]['url'] = new_url
320
  bookmarks[bookmark_idx]['category'] = new_category
 
324
  # Rebuild the FAISS index
325
  faiss_index, embeddings = vectorize_and_index(bookmarks)
326
  message = "Bookmark updated successfully."
327
+ logger.info(message)
328
  updated_html = display_bookmarks()
329
  return message, updated_html
330
  except Exception as e:
331
+ logger.error(f"Error editing bookmark: {e}")
332
  return f"Error: {str(e)}", display_bookmarks()
333
 
334
  def delete_bookmarks(indices_str):
 
336
  try:
337
  indices = [int(idx.strip()) - 1 for idx in indices_str.split(',') if idx.strip().isdigit()]
338
  indices = sorted(indices, reverse=True)
339
+ logger.info(f"Deleting bookmarks at indices: {indices}")
340
  for idx in indices:
341
  if 0 <= idx < len(bookmarks):
342
+ logger.info(f"Deleting bookmark at index {idx + 1}")
343
  bookmarks.pop(idx)
344
  # Rebuild the FAISS index
345
  if bookmarks:
 
347
  else:
348
  faiss_index = None
349
  message = "Selected bookmarks deleted successfully."
350
+ logger.info(message)
351
  updated_html = display_bookmarks()
352
  return message, updated_html
353
  except Exception as e:
354
+ logger.error(f"Error deleting bookmarks: {e}")
355
  return f"Error: {str(e)}", display_bookmarks()
356
 
357
  def export_bookmarks():
358
  if not bookmarks:
359
+ logger.warning("No bookmarks to export")
360
+ return None
361
+ try:
362
+ logger.info("Exporting bookmarks to HTML")
363
+ # Create an HTML content similar to the imported bookmarks file
364
+ soup = BeautifulSoup("<!DOCTYPE NETSCAPE-Bookmark-file-1><Title>Bookmarks</Title><H1>Bookmarks</H1>", 'html.parser')
365
+ dl = soup.new_tag('DL')
366
+ for bookmark in bookmarks:
367
+ dt = soup.new_tag('DT')
368
+ a = soup.new_tag('A', href=bookmark['url'])
369
+ a.string = bookmark['title']
370
+ dt.append(a)
371
+ dl.append(dt)
372
+ soup.append(dl)
373
+ html_content = str(soup)
374
+ # Encode the HTML content to base64 for download
375
+ b64 = base64.b64encode(html_content.encode()).decode()
376
+ href = f'data:text/html;base64,{b64}'
377
+ logger.info("Bookmarks exported successfully")
378
+ return href
379
+ except Exception as e:
380
+ logger.error(f"Error exporting bookmarks: {e}")
381
  return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
382
 
383
  def build_app():
384
+ logger.info("Building Gradio app")
385
  with gr.Blocks(css="app.css") as demo:
386
  gr.Markdown("<h1>Bookmark Manager App</h1>")
387
 
 
392
  bookmark_display = gr.HTML(label="Bookmarks")
393
 
394
  def update_bookmark_display(file):
395
+ return process_uploaded_file(file)
 
396
 
397
  process_button.click(
398
  update_bookmark_display,
 
430
  edit_button = gr.Button("Edit Bookmark")
431
 
432
  def update_manage_display():
433
+ return display_bookmarks()
 
434
 
435
  refresh_button.click(
436
  update_manage_display,
 
466
  # Initial load of the bookmarks display
467
  bookmark_display_manage.value = update_manage_display()
468
 
469
+ logger.info("Launching Gradio app")
470
  demo.launch()
471
 
472
  if __name__ == "__main__":