waleko commited on
Commit
19d9c36
1 Parent(s): 322806a

Change prompt; Change Rerank model

Browse files
backend/semantic_search.py CHANGED
@@ -2,7 +2,8 @@ import lancedb
2
  import os
3
  import gradio as gr
4
  from sentence_transformers import SentenceTransformer
5
- from FlagEmbedding import FlagReranker
 
6
 
7
 
8
  db = lancedb.connect(".lancedb")
@@ -13,7 +14,8 @@ TEXT_COLUMN = os.getenv("TEXT_COLUMN", "text")
13
  BATCH_SIZE = int(os.getenv("BATCH_SIZE", 32))
14
 
15
  retriever = SentenceTransformer(os.getenv("EMB_MODEL"))
16
- reranker = FlagReranker(os.getenv("RERANKER_MODEL", 'BAAI/bge-reranker-large'), use_fp16=True)
 
17
 
18
  def retrieve(query, k):
19
  query_vec = retriever.encode(query)
@@ -29,7 +31,7 @@ def retrieve(query, k):
29
  def rerank(documents, query, k):
30
  try:
31
  query_pairs = [[query, doc] for doc in documents]
32
- scores = reranker.compute_score(query_pairs)
33
  scored_documents = list(zip(documents, scores))
34
  scored_documents.sort(key=lambda x: x[1], reverse=True)
35
  top_k_documents = [doc for doc, _ in scored_documents[:k]]
 
2
  import os
3
  import gradio as gr
4
  from sentence_transformers import SentenceTransformer
5
+ from sentence_transformers import CrossEncoder
6
+ # from FlagEmbedding import FlagReranker
7
 
8
 
9
  db = lancedb.connect(".lancedb")
 
14
  BATCH_SIZE = int(os.getenv("BATCH_SIZE", 32))
15
 
16
  retriever = SentenceTransformer(os.getenv("EMB_MODEL"))
17
+ # reranker = FlagReranker(os.getenv("RERANKER_MODEL", 'BAAI/bge-reranker-large'), use_fp16=True)
18
+ reranker = CrossEncoder(os.getenv("RERANKER_MODEL", 'cross-encoder/ms-marco-MiniLM-L-6-v2'), max_length=512)
19
 
20
  def retrieve(query, k):
21
  query_vec = retriever.encode(query)
 
31
  def rerank(documents, query, k):
32
  try:
33
  query_pairs = [[query, doc] for doc in documents]
34
+ scores = reranker.predict(query_pairs)
35
  scored_documents = list(zip(documents, scores))
36
  scored_documents.sort(key=lambda x: x[1], reverse=True)
37
  top_k_documents = [doc for doc, _ in scored_documents[:k]]
templates/template.j2 CHANGED
@@ -1,4 +1,4 @@
1
- Instructions: Use the following unique documents in the Context section to answer the Query at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.
2
  Context:
3
  {% for doc in documents %}
4
  ---
 
1
+ Instructions: Use the following unique sorted by relevance documents in the Context section to answer the Query at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.
2
  Context:
3
  {% for doc in documents %}
4
  ---