Update app.py
Browse files
app.py
CHANGED
@@ -249,17 +249,25 @@ def search_embeddings(chunks, embedding_model, vector_store_type, search_type, q
|
|
249 |
|
250 |
start_time = time.time()
|
251 |
results = retriever.invoke(preprocessed_query)
|
252 |
-
|
253 |
def score_result(doc):
|
254 |
similarity_score = vector_store.similarity_search_with_score(doc.page_content, k=1)[0][1]
|
255 |
phonetic_score = phonetic_match(doc.page_content, query)
|
256 |
return (1 - phonetic_weight) * similarity_score + phonetic_weight * phonetic_score
|
257 |
|
258 |
results = sorted(results, key=score_result, reverse=True)
|
259 |
-
|
260 |
end_time = time.time()
|
261 |
|
262 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
263 |
|
264 |
# Evaluation Metrics
|
265 |
def calculate_statistics(results, search_time, vector_store, num_tokens, embedding_model, query, top_k):
|
|
|
249 |
|
250 |
start_time = time.time()
|
251 |
results = retriever.invoke(preprocessed_query)
|
252 |
+
|
253 |
def score_result(doc):
|
254 |
similarity_score = vector_store.similarity_search_with_score(doc.page_content, k=1)[0][1]
|
255 |
phonetic_score = phonetic_match(doc.page_content, query)
|
256 |
return (1 - phonetic_weight) * similarity_score + phonetic_weight * phonetic_score
|
257 |
|
258 |
results = sorted(results, key=score_result, reverse=True)
|
|
|
259 |
end_time = time.time()
|
260 |
|
261 |
+
# Extract embeddings for each result and store them in the DataFrame
|
262 |
+
embeddings = [embedding_model.embed_query(doc.page_content) for doc in results]
|
263 |
+
|
264 |
+
# Create a DataFrame with the results and embeddings
|
265 |
+
results_df = pd.DataFrame({
|
266 |
+
'content': [doc.page_content for doc in results],
|
267 |
+
'embedding': embeddings
|
268 |
+
})
|
269 |
+
|
270 |
+
return results_df, end_time - start_time, vector_store
|
271 |
|
272 |
# Evaluation Metrics
|
273 |
def calculate_statistics(results, search_time, vector_store, num_tokens, embedding_model, query, top_k):
|