kiyer commited on
Commit
7d0b0c1
1 Parent(s): 793347c

try fix for index issue

Browse files
Files changed (1) hide show
  1. app.py +30 -26
app.py CHANGED
@@ -243,28 +243,28 @@ class RetrievalSystem():
243
  query_embedding,
244
  rerank_top_k,
245
  return_scores = False)
246
- try:
247
- docs_for_rerank = [small_df['abstract'][i] for i in range(rerank_top_k)]
248
- if len(docs_for_rerank) == 0:
249
- return []
250
- reranked_results = self.cohere_client.rerank(
251
- query=query,
252
- documents=docs_for_rerank,
253
- model='rerank-english-v3.0',
254
- top_n=top_k
255
- )
256
- final_results = []
257
- for result in reranked_results.results:
258
- doc_id = top_results[result.index]
259
- doc_text = docs_for_rerank[result.index]
260
- score = float(result.relevance_score)
261
- final_results.append([doc_id, "", score])
262
- final_indices = [doc[0] for doc in final_results]
263
- if return_scores:
264
- return {result[0]: result[2] for result in final_results}, self.dataset[final_indices]
265
- return [doc[0] for doc in final_results], self.dataset[final_indices]
266
- except:
267
- print('heavy load, please wait 10s and try again.')
268
  else:
269
  top_results, small_df = self.rank_and_filter(query,
270
  query_embedding,
@@ -278,6 +278,8 @@ class RetrievalSystem():
278
  df = pd.DataFrame(small_df)
279
  df = df.drop(columns=['umap_x','umap_y','cite_bibcodes','ref_bibcodes'])
280
  links = ['https://ui.adsabs.harvard.edu/abs/'+i+'/abstract' for i in small_df['bibcode']]
 
 
281
  scores = [top_results[i] for i in top_results]
282
  indices = [i for i in top_results]
283
  df.insert(1,'ADS Link',links,True)
@@ -477,7 +479,7 @@ def run_agent_qa(query):
477
 
478
  def run_rag_qa(query, papers_df):
479
 
480
- try:
481
  loaders = []
482
 
483
  documents = []
@@ -497,6 +499,8 @@ def run_rag_qa(query, papers_df):
497
  # retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 6, "fetch_k": len(splits)})
498
  retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 6})
499
 
 
 
500
  if st.session_state.question_type == 'Bibliometric':
501
  template = bibliometric_prompt
502
  elif st.session_state.question_type == 'Single-paper':
@@ -523,10 +527,10 @@ def run_rag_qa(query, papers_df):
523
  rag_answer = rag_chain_with_source.invoke(query, )
524
  vectorstore.delete_collection()
525
 
526
- except:
527
- st.subheader('heavy load! please wait 10 seconds and try again.')
528
 
529
- return rag_answer
530
 
531
  def guess_question_type(query: str):
532
 
 
243
  query_embedding,
244
  rerank_top_k,
245
  return_scores = False)
246
+ # try:
247
+ docs_for_rerank = [small_df['abstract'][i] for i in range(rerank_top_k)]
248
+ if len(docs_for_rerank) == 0:
249
+ return []
250
+ reranked_results = self.cohere_client.rerank(
251
+ query=query,
252
+ documents=docs_for_rerank,
253
+ model='rerank-english-v3.0',
254
+ top_n=top_k
255
+ )
256
+ final_results = []
257
+ for result in reranked_results.results:
258
+ doc_id = top_results[result.index]
259
+ doc_text = docs_for_rerank[result.index]
260
+ score = float(result.relevance_score)
261
+ final_results.append([doc_id, "", score])
262
+ final_indices = [doc[0] for doc in final_results]
263
+ if return_scores:
264
+ return {result[0]: result[2] for result in final_results}, self.dataset[final_indices]
265
+ return [doc[0] for doc in final_results], self.dataset[final_indices]
266
+ # except:
267
+ # print('heavy load, please wait 10s and try again.')
268
  else:
269
  top_results, small_df = self.rank_and_filter(query,
270
  query_embedding,
 
278
  df = pd.DataFrame(small_df)
279
  df = df.drop(columns=['umap_x','umap_y','cite_bibcodes','ref_bibcodes'])
280
  links = ['https://ui.adsabs.harvard.edu/abs/'+i+'/abstract' for i in small_df['bibcode']]
281
+
282
+ # st.write(top_results[0:10])
283
  scores = [top_results[i] for i in top_results]
284
  indices = [i for i in top_results]
285
  df.insert(1,'ADS Link',links,True)
 
479
 
480
  def run_rag_qa(query, papers_df):
481
 
482
+ # try:
483
  loaders = []
484
 
485
  documents = []
 
499
  # retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 6, "fetch_k": len(splits)})
500
  retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 6})
501
 
502
+
503
+
504
  if st.session_state.question_type == 'Bibliometric':
505
  template = bibliometric_prompt
506
  elif st.session_state.question_type == 'Single-paper':
 
527
  rag_answer = rag_chain_with_source.invoke(query, )
528
  vectorstore.delete_collection()
529
 
530
+ # except:
531
+ # st.subheader('heavy load! please wait 10 seconds and try again.')
532
 
533
+ return rag_answer
534
 
535
  def guess_question_type(query: str):
536