Spaces:

BinKhoaLe1812
/

Medical-Chatbot

Sleeping

App Files Files Community

BinKhoaLe1812 commited on Oct 8

Commit

13f8f13

verified ·

1 Parent(s): ec1346d

Upd api

Browse files

Files changed (1) hide show

api/retrieval.py +31 -10

api/retrieval.py CHANGED Viewed

@@ -181,7 +181,8 @@ class _NvidiaReranker:
     """Simple client for NVIDIA NIM reranking: nvidia/rerank-qa-mistral-4b"""
     def __init__(self):
         self.api_key = os.getenv("NVIDIA_URI")
-        self.model = "nvidia/rerank-qa-mistral-4b"
         # NIM rerank endpoint (subject to environment); keep configurable
         self.base_url = os.getenv("NVIDIA_RERANK_ENDPOINT", "https://ai.api.nvidia.com/v1/retrieval/nvidia/reranking")
         self.timeout_s = 30
@@ -194,16 +195,36 @@ class _NvidiaReranker:
         headers = {
             "Authorization": f"Bearer {self.api_key}",
             "Content-Type": "application/json",
         }
-        payload = {
-            "model": self.model,
-            "query": query,
-            "documents": [{"text": d} for d in documents],
-        }
         try:
-            resp = requests.post(self.base_url, headers=headers, json=payload, timeout=self.timeout_s)
-            resp.raise_for_status()
-            data = resp.json()
             # Expecting a list with scores and indices or texts
             results = []
             entries = data.get("results") or data.get("data") or []
@@ -227,4 +248,4 @@ class _NvidiaReranker:
         except Exception as e:
             logger.warning(f"[Reranker] Failed calling NVIDIA reranker: {e}")
             # On failure, return original order with neutral scores
-            return [{"text": d, "score": 0.0} for d in documents]

     """Simple client for NVIDIA NIM reranking: nvidia/rerank-qa-mistral-4b"""
     def __init__(self):
         self.api_key = os.getenv("NVIDIA_URI")
+        # Use provider doc model identifier
+        self.model = os.getenv("NVIDIA_RERANK_MODEL", "nv-rerank-qa-mistral-4b:1")
         # NIM rerank endpoint (subject to environment); keep configurable
         self.base_url = os.getenv("NVIDIA_RERANK_ENDPOINT", "https://ai.api.nvidia.com/v1/retrieval/nvidia/reranking")
         self.timeout_s = 30
         headers = {
             "Authorization": f"Bearer {self.api_key}",
             "Content-Type": "application/json",
+            "Accept": "application/json",
         }
+        # Truncate and limit candidates to avoid 4xx
+        docs = documents[:10]
+        docs = [d[:2000] for d in docs if isinstance(d, str)]
+        # Two payload shapes based on provider doc
+        payloads = [
+            {
+                "model": self.model,
+                "query": {"text": query},
+                "passages": [{"text": d} for d in docs],
+            },
+            {
+                "model": self.model,
+                "query": query,
+                "documents": [{"text": d} for d in docs],
+            },
+        ]
         try:
+            data = None
+            for p in payloads:
+                resp = requests.post(self.base_url, headers=headers, json=p, timeout=self.timeout_s)
+                if resp.status_code >= 400:
+                    # try next shape
+                    continue
+                data = resp.json()
+                break
+            if data is None:
+                # last attempt for diagnostics
+                resp.raise_for_status()
             # Expecting a list with scores and indices or texts
             results = []
             entries = data.get("results") or data.get("data") or []
         except Exception as e:
             logger.warning(f"[Reranker] Failed calling NVIDIA reranker: {e}")
             # On failure, return original order with neutral scores
+            return [{"text": d, "score": 0.0} for d in documents]