Spaces:

renceabishek
/

hr-assistance

Sleeping

renceabishek commited on Sep 21

Commit

cee126a

1 Parent(s): fd07624

fixing retrieve context

Files changed (1) hide show

main.py CHANGED Viewed

@@ -41,19 +41,27 @@ index.add(embeddings)
 # === Retrieval function ===
 def retrieve_context(query, top_k=3):
     query_embedding = embedder.encode([query])
-    _, indices = index.search(query_embedding, top_k)
-    selected_chunks = [all_chunks[i] for i in indices[0]]
-    # Filter out personal info unless query explicitly asks for it
-    personal_keywords = ["email", "contact", "phone", "location", "website", "name"]
-    if not any(keyword in query.lower() for keyword in personal_keywords):
-        selected_chunks = [chunk for chunk in selected_chunks if "Personal Information" not in chunk]
-        print("🔍 Filtered chunks:\n", selected_chunks)
-        # If filtering removed all chunks, fall back to original top_k
-        if not selected_chunks:
-            selected_chunks = [all_chunks[i] for i in indices[0]]
-    return "\n\n".join(selected_chunks)
 # === Load QA model ===
 qa_pipeline = pipeline("question-answering", model="deepset/tinyroberta-squad2")

 # === Retrieval function ===
 def retrieve_context(query, top_k=3):
     query_embedding = embedder.encode([query])
+    scores, indices = index.search(query_embedding, top_k)
+    selected_chunks = []
+    for i, score in zip(indices[0], scores[0]):
+        chunk = all_chunks[i]
+        # Skip short or noisy chunks unless query matches
+        if len(chunk.split()) < 10 and not any(k in query.lower() for k in ["salary", "notice", "job", "current"]):
+            continue
+        selected_chunks.append((chunk, score))
+    # If nothing survives filtering, fall back to original top_k
+    if not selected_chunks:
+        selected_chunks = [(all_chunks[i], scores[0][j]) for j, i in enumerate(indices[0])]
+    # Sort by score (lowest distance = best match)
+    # print("🔍 selected_chunks retrieved chunks:\n", selected_chunks)
+    selected_chunks.sort(key=lambda x: x[1])
+    final_chunks = [chunk for chunk, _ in selected_chunks[:top_k]]
+    print("🔍 Final retrieved chunks:\n", final_chunks)
+    return "\n\n".join(final_chunks)
 # === Load QA model ===
 qa_pipeline = pipeline("question-answering", model="deepset/tinyroberta-squad2")