Chris4K commited on
Commit
0913118
·
verified ·
1 Parent(s): 108d06b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -3
app.py CHANGED
@@ -249,17 +249,25 @@ def search_embeddings(chunks, embedding_model, vector_store_type, search_type, q
249
 
250
  start_time = time.time()
251
  results = retriever.invoke(preprocessed_query)
252
-
253
  def score_result(doc):
254
  similarity_score = vector_store.similarity_search_with_score(doc.page_content, k=1)[0][1]
255
  phonetic_score = phonetic_match(doc.page_content, query)
256
  return (1 - phonetic_weight) * similarity_score + phonetic_weight * phonetic_score
257
 
258
  results = sorted(results, key=score_result, reverse=True)
259
-
260
  end_time = time.time()
261
 
262
- return results[:top_k], end_time - start_time, vector_store
 
 
 
 
 
 
 
 
 
263
 
264
  # Evaluation Metrics
265
  def calculate_statistics(results, search_time, vector_store, num_tokens, embedding_model, query, top_k):
 
249
 
250
  start_time = time.time()
251
  results = retriever.invoke(preprocessed_query)
252
+
253
  def score_result(doc):
254
  similarity_score = vector_store.similarity_search_with_score(doc.page_content, k=1)[0][1]
255
  phonetic_score = phonetic_match(doc.page_content, query)
256
  return (1 - phonetic_weight) * similarity_score + phonetic_weight * phonetic_score
257
 
258
  results = sorted(results, key=score_result, reverse=True)
 
259
  end_time = time.time()
260
 
261
+ # Extract embeddings for each result and store them in the DataFrame
262
+ embeddings = [embedding_model.embed_query(doc.page_content) for doc in results]
263
+
264
+ # Create a DataFrame with the results and embeddings
265
+ results_df = pd.DataFrame({
266
+ 'content': [doc.page_content for doc in results],
267
+ 'embedding': embeddings
268
+ })
269
+
270
+ return results_df, end_time - start_time, vector_store
271
 
272
  # Evaluation Metrics
273
  def calculate_statistics(results, search_time, vector_store, num_tokens, embedding_model, query, top_k):