mishig HF staff commited on
Commit
3d7b683
1 Parent(s): ff6851f

use correct preprocessing

Browse files
Files changed (1) hide show
  1. app.py +8 -1
app.py CHANGED
@@ -62,6 +62,12 @@ def create_hnsw_index(embeddings_np, space='ip', ef_construction=100, M=16):
62
  index.add_items(embeddings_np, ids)
63
  return index
64
 
 
 
 
 
 
 
65
  app = FastAPI()
66
 
67
  class EmbeddingsSimilarityReq(BaseModel):
@@ -74,7 +80,8 @@ async def find_similar_paragraphsitem(req: EmbeddingsSimilarityReq):
74
  print("Len of batches", len(req.paragraphs))
75
 
76
  print("creating embeddings", current_timestamp())
77
- embeddings_np = get_embeddings([req.query]+req.paragraphs)
 
78
  query_embedding, chunks_embeddings = embeddings_np[0], embeddings_np[1:]
79
 
80
  print("creating index", current_timestamp())
 
62
  index.add_items(embeddings_np, ids)
63
  return index
64
 
65
+ def preprocess_texts(query, paragraphs):
66
+ query = f'query: {query}'
67
+ paragraphs = [f'passage: {p}' for p in paragraphs]
68
+ return [query]+paragraphs
69
+
70
+
71
  app = FastAPI()
72
 
73
  class EmbeddingsSimilarityReq(BaseModel):
 
80
  print("Len of batches", len(req.paragraphs))
81
 
82
  print("creating embeddings", current_timestamp())
83
+ inputs = preprocess_texts(req.query, req.paragraphs)
84
+ embeddings_np = get_embeddings(inputs)
85
  query_embedding, chunks_embeddings = embeddings_np[0], embeddings_np[1:]
86
 
87
  print("creating index", current_timestamp())