makcrx commited on
Commit
8985cde
1 Parent(s): 39a237c

fix filter

Browse files
Files changed (1) hide show
  1. app.py +13 -6
app.py CHANGED
@@ -8,21 +8,28 @@ from extract_keywords import extract_keywords2
8
  embeddings = SentenceTransformerEmbeddings(model_name="multi-qa-MiniLM-L6-cos-v1")
9
  db = FAISS.load_local('faiss_qa_2023-08-20', embeddings)
10
 
 
 
 
 
 
 
 
 
 
 
 
11
  def main(query):
12
  query = query.lower()
13
  query_keywords, query = extract_keywords2(query)
14
  result_docs = db.similarity_search_with_score(query, k=50)
15
- print(query, query_keywords)
16
 
17
  if len(query_keywords) > 0:
18
- result_docs = list(filter(lambda doc: len(extract_keywords2(doc[0].page_content)[0].intersection(query_keywords)) >= len(query_keywords), result_docs))
19
 
20
  if len(result_docs) == 0:
21
  return 'Ответ не найден', 0, ''
22
-
23
- #if len(result_docs) == 1:
24
- # score, index = 0, 0
25
- #else:
26
  sentences = [doc[0].page_content for doc in result_docs]
27
  score, index = reranking.search(query, sentences)
28
 
 
8
  embeddings = SentenceTransformerEmbeddings(model_name="multi-qa-MiniLM-L6-cos-v1")
9
  db = FAISS.load_local('faiss_qa_2023-08-20', embeddings)
10
 
11
+ def search_filter_function(query_keywords):
12
+ def fn(doc):
13
+ doc_keywords = extract_keywords2(doc[0].page_content)[0]
14
+ intersection_keywords = doc_keywords.intersection(query_keywords)
15
+ if len(query_keywords) == 0:
16
+ return len(doc_keywords) == 0
17
+ else:
18
+ return len(intersection_keywords) >= len(query_keywords)
19
+ return fn
20
+
21
+
22
  def main(query):
23
  query = query.lower()
24
  query_keywords, query = extract_keywords2(query)
25
  result_docs = db.similarity_search_with_score(query, k=50)
 
26
 
27
  if len(query_keywords) > 0:
28
+ result_docs = list(filter(search_filter_function(query_keywords), result_docs))
29
 
30
  if len(result_docs) == 0:
31
  return 'Ответ не найден', 0, ''
32
+
 
 
 
33
  sentences = [doc[0].page_content for doc in result_docs]
34
  score, index = reranking.search(query, sentences)
35