backend

Runtime error

App Files Files Community

meg-huggingface commited on Jul 18, 2024

Commit

a9f6487

1 Parent(s): e79b5e9

Trying to handle endpoint errors

Browse files

Files changed (1) hide show

src/backend/inference_endpoint.py +47 -28

src/backend/inference_endpoint.py CHANGED Viewed

@@ -5,6 +5,7 @@ import logging
 from huggingface_hub import create_inference_endpoint, get_inference_endpoint
 from src.backend.run_toxicity_eval import get_generation
 from src.logging import setup_logger
 logging.basicConfig(level=logging.DEBUG)
 logger = setup_logger(__name__)
 TIMEOUT=20
@@ -17,11 +18,53 @@ def create_endpoint(endpoint_name, repository, framework="pytorch", task="text-g
         endpoint = create_inference_endpoint(endpoint_name, repository=repository, framework=framework, task=task, accelerator=accelerator, vendor=vendor, region=region, type=type, instance_size=instance_size, instance_type=instance_type
         )
     except huggingface_hub.utils._errors.HfHubHTTPError as e:
-        logger.debug("Hit the following exception:")
         logger.debug(e)
-        logger.debug("Attempting to continue.")
         endpoint = get_inference_endpoint(endpoint_name)
-        endpoint.update(repository=repository, framework=framework, task=task, accelerator=accelerator,  instance_size=instance_size, instance_type=instance_type)
     except huggingface_hub.utils._errors.BadRequestError as e:
         logger.debug("Hit the following exception:")
         logger.debug(e)
@@ -48,31 +91,7 @@ def create_endpoint(endpoint_name, repository, framework="pytorch", task="text-g
         else:
             logger.info("Getting expensive to try to run this model without human oversight. Exiting.")
             sys.exit()
-    except Exception as e:
-        logger.debug("Hit error")
-        logger.debug(e)
-        sys.exit()
-    endpoint.fetch()
-    logger.info("Endpoint status: %s." % (endpoint.status))
-    if endpoint.status == "scaledToZero":
-        # Send a request to wake it up.
-        get_generation(endpoint.url, "Wake up")
-        sleep(TIMEOUT)
-    i = 0
-    while endpoint.status in ["pending", "initializing"]:# aka, not in ["failed", "running"]
-        if i >= 20:
-            logger.info("Model failed to respond. Exiting.")
-            sys.exit()
-        logger.debug("Waiting %d seconds to check again if the endpoint is running." % TIMEOUT)
-        sleep(TIMEOUT)
-        endpoint.fetch()
-        logger.debug("Endpoint status: %s." % (endpoint.status))
-        i += 1
-    logger.info("Endpoint created:")
-    logger.info(endpoint)
-    generation_url = endpoint.url
-    return generation_url
 if __name__ == '__main__':

 from huggingface_hub import create_inference_endpoint, get_inference_endpoint
 from src.backend.run_toxicity_eval import get_generation
 from src.logging import setup_logger
+import requests
 logging.basicConfig(level=logging.DEBUG)
 logger = setup_logger(__name__)
 TIMEOUT=20
         endpoint = create_inference_endpoint(endpoint_name, repository=repository, framework=framework, task=task, accelerator=accelerator, vendor=vendor, region=region, type=type, instance_size=instance_size, instance_type=instance_type
         )
     except huggingface_hub.utils._errors.HfHubHTTPError as e:
+        endpoint = update_endpoint_exception(accelerator, e, endpoint,
+                                             endpoint_name, framework,
+                                             instance_size, instance_type,
+                                             repository, task)
+    except requests.exceptions.HTTPError as e:
+        endpoint = update_endpoint_exception(accelerator, e, endpoint,
+                                             endpoint_name, framework,
+                                             instance_size, instance_type,
+                                             repository, task)
+    except Exception as e:
+        logger.debug("Hit error")
         logger.debug(e)
+        sys.exit()
+    endpoint.fetch()
+    logger.info("Endpoint status: %s." % (endpoint.status))
+    if endpoint.status == "scaledToZero":
+        # Send a request to wake it up.
+        get_generation(endpoint.url, "Wake up")
+        sleep(TIMEOUT)
+    i = 0
+    while endpoint.status in ["pending", "initializing"]:# aka, not in ["failed", "running"]
+        if i >= 20:
+            logger.info("Model failed to respond. Exiting.")
+            sys.exit()
+        logger.debug("Waiting %d seconds to check again if the endpoint is running." % TIMEOUT)
+        sleep(TIMEOUT)
+        endpoint.fetch()
+        logger.debug("Endpoint status: %s." % (endpoint.status))
+        i += 1
+    logger.info("Endpoint created:")
+    logger.info(endpoint)
+    generation_url = endpoint.url
+    return generation_url
+def update_endpoint_exception(accelerator, e, endpoint, endpoint_name,
+                              framework, instance_size, instance_type,
+                              repository, task):
+    logger.debug("Hit the following exception:")
+    logger.debug(e)
+    logger.debug("Attempting to continue.")
+    try:
         endpoint = get_inference_endpoint(endpoint_name)
+        endpoint.update(repository=repository, framework=framework, task=task,
+                        accelerator=accelerator, instance_size=instance_size,
+                        instance_type=instance_type)
     except huggingface_hub.utils._errors.BadRequestError as e:
         logger.debug("Hit the following exception:")
         logger.debug(e)
         else:
             logger.info("Getting expensive to try to run this model without human oversight. Exiting.")
             sys.exit()
+    return endpoint
 if __name__ == '__main__':