Spaces:

alx-d
/

PhiRAG

Running

alx-d commited on Feb 28

Commit

9236d0a

verified ·

1 Parent(s): 01f968f

Upload folder using huggingface_hub

Files changed (1) hide show

advanced_rag.py CHANGED Viewed

@@ -150,15 +150,29 @@ class ElevatedRagChain:
             if not hf_api_token:
                 raise ValueError("Please set the HF_API_TOKEN environment variable to use remote inference.")
             client = InferenceClient(token=hf_api_token, timeout=240)
             def remote_generate(prompt: str) -> str:
-                response = client.text_generation(
-                    prompt,
-                    model=repo_id,
-                    temperature=self.temperature,
-                    top_p=self.top_p,
-                    repetition_penalty=1.1,
-                    wait_for_model=True,
-                )
                 return response
             from langchain.llms.base import LLM
             class RemoteLLM(LLM):

             if not hf_api_token:
                 raise ValueError("Please set the HF_API_TOKEN environment variable to use remote inference.")
             client = InferenceClient(token=hf_api_token, timeout=240)
+            from huggingface_hub.utils._errors import HfHubHTTPError
             def remote_generate(prompt: str) -> str:
+                max_retries = 5
+                backoff = 2  # start with 2 seconds
+                response = None
+                for attempt in range(max_retries):
+                    try:
+                        response = client.text_generation(
+                            prompt,
+                            model=repo_id,
+                            temperature=self.temperature,
+                            top_p=self.top_p,
+                            repetition_penalty=1.1
+                        )
+                        return response
+                    except HfHubHTTPError as e:
+                        debug_print(f"Attempt {attempt+1} failed with error: {e}")
+                        # if this is the last attempt, re-raise the error
+                        if attempt == max_retries - 1:
+                            raise
+                        time.sleep(backoff)
+                        backoff *= 2  # exponential backoff
                 return response
             from langchain.llms.base import LLM
             class RemoteLLM(LLM):