Shreyas094 commited on
Commit
0d127c5
·
verified ·
1 Parent(s): d8b711d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -22
app.py CHANGED
@@ -273,12 +273,16 @@ def rank_search_results(titles, summaries, model):
273
 
274
  try:
275
  ranks_str = generate_chunked_response(model, ranking_prompt)
 
 
 
 
 
 
276
  ranks = [float(rank.strip()) for rank in ranks_str.split(',') if rank.strip()]
277
 
278
- # Check if we have the correct number of ranks
279
  if len(ranks) != len(titles):
280
  print(f"Warning: Number of ranks ({len(ranks)}) does not match number of titles ({len(titles)})")
281
- print(f"Model output: {ranks_str}")
282
  return list(range(1, len(titles) + 1))
283
 
284
  return ranks
@@ -295,12 +299,6 @@ def ask_question(question, temperature, top_p, repetition_penalty, web_search):
295
  model = get_model(temperature, top_p, repetition_penalty)
296
  embed = get_embeddings()
297
 
298
- # Check if the FAISS database exists
299
- if os.path.exists("faiss_database"):
300
- database = FAISS.load_local("faiss_database", embed, allow_dangerous_deserialization=True)
301
- else:
302
- database = None
303
-
304
  if web_search:
305
  search_results = google_search(question)
306
 
@@ -323,6 +321,8 @@ def ask_question(question, temperature, top_p, repetition_penalty, web_search):
323
  if not processed_results:
324
  return "No valid search results found."
325
 
 
 
326
  # Rank the results
327
  titles = [r["title"] for r in processed_results]
328
  summaries = [r["summary"] for r in processed_results]
@@ -332,6 +332,8 @@ def ask_question(question, temperature, top_p, repetition_penalty, web_search):
332
  print(f"Error in ranking results: {str(e)}. Using default ranking.")
333
  ranks = list(range(1, len(processed_results) + 1))
334
 
 
 
335
  # Update Vector DB
336
  current_date = datetime.now().strftime("%Y-%m-%d")
337
  update_vector_db_with_search_results(processed_results, ranks, current_date)
@@ -416,32 +418,45 @@ def update_vectors(files, use_recursive_splitter):
416
 
417
  return f"Vector store updated successfully. Processed {total_chunks} chunks from {len(files)} files."
418
 
419
- def update_vector_db_with_search_results(search_results, summaries, ranks):
420
  embed = get_embeddings()
421
- database = FAISS.load_local("faiss_database", embed, allow_dangerous_deserialization=True) if os.path.exists("faiss_database") else FAISS.from_documents([], embed)
422
-
423
- current_date = datetime.now().strftime("%Y-%m-%d")
424
 
425
  documents = []
426
- for result, summary, rank in zip(search_results, summaries, ranks):
427
- if summary: # Only create a document if there's a summary
428
  doc = Document(
429
- page_content=summary,
430
  metadata={
431
  "search_date": current_date,
432
- "search_title": result["title"],
433
- "search_content": result["text"],
434
- "search_summary": summary,
435
  "rank": rank
436
  }
437
  )
438
  documents.append(doc)
439
 
440
- if documents: # Only update the database if there are documents to add
441
- database.add_documents(documents)
442
- database.save_local("faiss_database")
443
- else:
444
  print("No valid documents to add to the database.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
445
 
446
  def export_vector_db_to_excel():
447
  embed = get_embeddings()
 
273
 
274
  try:
275
  ranks_str = generate_chunked_response(model, ranking_prompt)
276
+ print(f"Model output for ranking: {ranks_str}")
277
+
278
+ if not ranks_str.strip():
279
+ print("Model returned an empty string for ranking.")
280
+ return list(range(1, len(titles) + 1))
281
+
282
  ranks = [float(rank.strip()) for rank in ranks_str.split(',') if rank.strip()]
283
 
 
284
  if len(ranks) != len(titles):
285
  print(f"Warning: Number of ranks ({len(ranks)}) does not match number of titles ({len(titles)})")
 
286
  return list(range(1, len(titles) + 1))
287
 
288
  return ranks
 
299
  model = get_model(temperature, top_p, repetition_penalty)
300
  embed = get_embeddings()
301
 
 
 
 
 
 
 
302
  if web_search:
303
  search_results = google_search(question)
304
 
 
321
  if not processed_results:
322
  return "No valid search results found."
323
 
324
+ print(f"Number of processed results: {len(processed_results)}")
325
+
326
  # Rank the results
327
  titles = [r["title"] for r in processed_results]
328
  summaries = [r["summary"] for r in processed_results]
 
332
  print(f"Error in ranking results: {str(e)}. Using default ranking.")
333
  ranks = list(range(1, len(processed_results) + 1))
334
 
335
+ print(f"Number of ranks: {len(ranks)}")
336
+
337
  # Update Vector DB
338
  current_date = datetime.now().strftime("%Y-%m-%d")
339
  update_vector_db_with_search_results(processed_results, ranks, current_date)
 
418
 
419
  return f"Vector store updated successfully. Processed {total_chunks} chunks from {len(files)} files."
420
 
421
+ def update_vector_db_with_search_results(search_results, ranks, current_date):
422
  embed = get_embeddings()
 
 
 
423
 
424
  documents = []
425
+ for result, rank in zip(search_results, ranks):
426
+ if result.get("summary"):
427
  doc = Document(
428
+ page_content=result["summary"],
429
  metadata={
430
  "search_date": current_date,
431
+ "search_title": result.get("title", ""),
432
+ "search_content": result.get("content", ""),
433
+ "search_summary": result["summary"],
434
  "rank": rank
435
  }
436
  )
437
  documents.append(doc)
438
 
439
+ if not documents:
 
 
 
440
  print("No valid documents to add to the database.")
441
+ return
442
+
443
+ texts = [doc.page_content for doc in documents]
444
+ metadatas = [doc.metadata for doc in documents]
445
+
446
+ print(f"Number of documents to embed: {len(texts)}")
447
+ print(f"First document text: {texts[0][:100]}...") # Print first 100 characters of the first document
448
+
449
+ try:
450
+ if os.path.exists("faiss_database"):
451
+ database = FAISS.load_local("faiss_database", embed, allow_dangerous_deserialization=True)
452
+ database.add_texts(texts, metadatas=metadatas)
453
+ else:
454
+ database = FAISS.from_texts(texts, embed, metadatas=metadatas)
455
+
456
+ database.save_local("faiss_database")
457
+ print("Database updated successfully.")
458
+ except Exception as e:
459
+ print(f"Error updating database: {str(e)}")
460
 
461
  def export_vector_db_to_excel():
462
  embed = get_embeddings()