Abdul-Ib commited on
Commit
ed8ff18
1 Parent(s): 0eed396

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -3
app.py CHANGED
@@ -5,9 +5,14 @@ import pprint
5
  from sentence_transformers import SentenceTransformer, CrossEncoder, util
6
 
7
  # read data
8
- df = pd.read_csv('./assets/final_combined.csv').to_dict(orient='records')
 
9
  doc_embeddings = np.load('./assets/final_combined_embed.npy', allow_pickel=True)
10
 
 
 
 
 
11
  def semantic_search(normalized_query):
12
  '''
13
  function to perform semantic search given a search query
@@ -20,7 +25,7 @@ def re_ranker(normalized_query, hits):
20
  '''
21
  function to re-rank semantic search results using cross encoding
22
  '''
23
- cross_inp = [[query, doc_embeddings[hit['corpus_id']]] for hit in hit]
24
  cross_scores = cross_encoder.predict(cross_inp)
25
 
26
  for idx in range(len(cross_scores)):
@@ -31,7 +36,7 @@ def re_ranker(normalized_query, hits):
31
  def print_results(hits, k_items):
32
  results = ""
33
  for hit in hits[:k_items]:
34
- results += pprint.pformat(df[hit['corpus_id']], indent=4)
35
  return results
36
 
37
  def predict(query):
 
5
  from sentence_transformers import SentenceTransformer, CrossEncoder, util
6
 
7
  # read data
8
+ df = pd.read_csv('./assets/final_combined.csv')
9
+ df_dict = df.to_dict(orient='records')
10
  doc_embeddings = np.load('./assets/final_combined_embed.npy', allow_pickel=True)
11
 
12
+ # models
13
+ bi_encoder = SentenceTransformer("intfloat/multilingual-e5-base", cache_folder = "./assets")
14
+ cross_encoder = CrossEncoder('cross-encoder/nli-deberta-v3-base')
15
+
16
  def semantic_search(normalized_query):
17
  '''
18
  function to perform semantic search given a search query
 
25
  '''
26
  function to re-rank semantic search results using cross encoding
27
  '''
28
+ cross_inp = [[query, df['representation'][hit['corpus_id']]] for hit in hit]
29
  cross_scores = cross_encoder.predict(cross_inp)
30
 
31
  for idx in range(len(cross_scores)):
 
36
  def print_results(hits, k_items):
37
  results = ""
38
  for hit in hits[:k_items]:
39
+ results += pprint.pformat(df_dict[hit['corpus_id']], indent=4)
40
  return results
41
 
42
  def predict(query):