dorogan commited on
Commit
ac7cbfc
1 Parent(s): 463d6b1

Update: changes in search API (teasers and docs texts were separated)

Browse files
Files changed (2) hide show
  1. app.py +3 -2
  2. semantic_search.py +6 -2
app.py CHANGED
@@ -51,8 +51,9 @@ def search_route():
51
  top = data.get('top', 10)
52
  use_llm_for_teasers = data.get('use_llm_for_teasers', False)
53
  request_id = data.get('request_id', '')
54
- titles, docs, scores = search.search(query, top, use_llm_for_teasers)
55
- result = [{'title': str(item1), 'text': str(item2), 'relevance': str(item3)} for item1, item2, item3 in zip(titles, docs, scores)]
 
56
 
57
  # Log the query and result if ENABLE_LOGS is True
58
  log_query_result(query, top, request_id, result)
 
51
  top = data.get('top', 10)
52
  use_llm_for_teasers = data.get('use_llm_for_teasers', False)
53
  request_id = data.get('request_id', '')
54
+ titles, docs, teasers, scores = search.search(query, top, use_llm_for_teasers)
55
+ result = [{'title': str(item1), 'text': str(item2), 'teaser': (item3), 'relevance': str(item4)}
56
+ for item1, item2, item3, item4 in zip(titles, docs, teasers, scores)]
57
 
58
  # Log the query and result if ENABLE_LOGS is True
59
  log_query_result(query, top, request_id, result)
semantic_search.py CHANGED
@@ -201,14 +201,18 @@ class SemanticSearch:
201
  distances, indices = self.index_docs.search(query_embeds, len(self.all_docs_info))
202
  pred = [self.all_docs_info[x]['doc_name'] for x in indices[0]]
203
  preds, scores = self.search_results_filtering(pred, distances[0])
 
204
  docs = []
205
  for ref in preds:
206
  doc_index = self.docs_names.index(ref)
207
  doc_text = self.all_docs_info[doc_index]['doc_text']
 
 
208
  most_relevant_teaser = self.get_most_relevant_teaser(question=query,
209
  doc_index=doc_index)
210
  if use_llm_for_teasers:
211
  most_relevant_teaser = self.rebuild_teaser_with_llm(question=query,
212
  teaser=most_relevant_teaser)
213
- docs.append(most_relevant_teaser + '\n\n' + f'Полный текст документа:\n{doc_text}')
214
- return preds[:top], docs[:top], scores[:top]
 
 
201
  distances, indices = self.index_docs.search(query_embeds, len(self.all_docs_info))
202
  pred = [self.all_docs_info[x]['doc_name'] for x in indices[0]]
203
  preds, scores = self.search_results_filtering(pred, distances[0])
204
+ teasers = []
205
  docs = []
206
  for ref in preds:
207
  doc_index = self.docs_names.index(ref)
208
  doc_text = self.all_docs_info[doc_index]['doc_text']
209
+ docs.append(doc_text) # Add the relevant document text
210
+
211
  most_relevant_teaser = self.get_most_relevant_teaser(question=query,
212
  doc_index=doc_index)
213
  if use_llm_for_teasers:
214
  most_relevant_teaser = self.rebuild_teaser_with_llm(question=query,
215
  teaser=most_relevant_teaser)
216
+ teasers.append(most_relevant_teaser) # Add the most relevant teaser
217
+
218
+ return preds[:top], docs[:top], teasers[:top], scores[:top]