Spaces:

m97j
/

pls-rag

Sleeping

m97j commited on Nov 8

Commit

3c9754e

1 Parent(s): 5ca4913

Initial codes commit

Files changed (2) hide show

models/embedder.py CHANGED Viewed

@@ -23,8 +23,10 @@ def get_embedding(request: Request, text: str) -> List[float]:
     ort_inputs = {k: v for k, v in inputs.items()}
     ort_outs = sess.run(None, ort_inputs)
     print([arr.shape for arr in ort_outs])
-    # 일반적으로 첫 번째 출력이 [batch, dim] 임베딩
-    vec = ort_outs[0][0]
     return _l2_normalize(vec)

     ort_inputs = {k: v for k, v in inputs.items()}
     ort_outs = sess.run(None, ort_inputs)
     print([arr.shape for arr in ort_outs])
+    # 첫 번째 출력이 (batch, seq_len, dim)
+    token_embeddings = ort_outs[0]  # shape (1, seq_len, dim)
+    # 평균 pooling으로 문장 임베딩 생성
+    vec = token_embeddings.mean(axis=1)[0]  # shape (dim,)
     return _l2_normalize(vec)

modules/retriever.py CHANGED Viewed

@@ -14,7 +14,7 @@ def has_index() -> bool:
 def retrieve_ids(query_embedding: list[float]) -> list[int]:
     if _index is None:
         raise RuntimeError("FAISS index is not loaded in memory.")
-    q = np.array([query_embedding], dtype="float32")
     _, idx = _index.search(q, TOP_K)
     return [int(i) for i in idx[0]]

 def retrieve_ids(query_embedding: list[float]) -> list[int]:
     if _index is None:
         raise RuntimeError("FAISS index is not loaded in memory.")
+    q = np.array(query_embedding, dtype="float32").reshape(1, -1)
     _, idx = _index.search(q, TOP_K)
     return [int(i) for i in idx[0]]