gemma-2-9b-it1

Sleeping

App Files Files Community

Leri777 commited on Oct 9, 2024

Commit

7096a95

verified ·

1 Parent(s): 39d0572

Update app.py

Browse files

Files changed (1) hide show

app.py +13 -6

app.py CHANGED Viewed

@@ -25,14 +25,16 @@ tokenizer = GemmaTokenizerFast.from_pretrained(model_id)
 # Function to load model with GPU availability check
 def load_model():
     attempts = 0
-    while attempts < 5:  # Try up to 5 times to get a GPU
         if torch.cuda.is_available():
             logger.debug("GPU is available. Proceeding with GPU setup.")
             try:
                 return AutoModelForCausalLM.from_pretrained(
                     model_id,
-                    device_map="auto", torch_dtype=torch.bfloat16,
                 )
             except Exception as e:
                 logger.error(f"Error initializing model with GPU: {e}. Retrying...")
@@ -47,7 +49,9 @@ def load_model():
     logger.warning("Falling back to CPU setup after multiple attempts.")
     return AutoModelForCausalLM.from_pretrained(
         model_id,
-        device_map="auto", low_cpu_mem_usage=True, token=os.getenv('HF_TOKEN'),
     )
 # Retry logic to load model with random delay
@@ -57,7 +61,7 @@ while model is None:
         model = load_model()
         model.eval()
     except Exception as e:
-        retry_delay = random.uniform(10, 30)  # Random delay between 10 to 30 seconds
         logger.error(f"Failed to load model: {e}. Retrying in {retry_delay:.2f} seconds...")
         time.sleep(retry_delay)
@@ -121,13 +125,16 @@ interface = gr.Interface(
 )
 # Retry logic to launch interface with random delay
-while True:
     try:
         interface.launch()
         break
     except Exception as e:
-        retry_delay = random.uniform(10, 30)  # Random delay between 10 to 30 seconds
         logger.error(f"Failed to launch interface: {e}. Retrying in {retry_delay:.2f} seconds...")
         time.sleep(retry_delay)
 logger.debug("Chat interface initialized and launched")

 # Function to load model with GPU availability check
 def load_model():
+    max_attempts = 5
     attempts = 0
+    while attempts < max_attempts:
         if torch.cuda.is_available():
             logger.debug("GPU is available. Proceeding with GPU setup.")
             try:
                 return AutoModelForCausalLM.from_pretrained(
                     model_id,
+                    device_map="auto",
+                    torch_dtype=torch.bfloat16,
                 )
             except Exception as e:
                 logger.error(f"Error initializing model with GPU: {e}. Retrying...")
     logger.warning("Falling back to CPU setup after multiple attempts.")
     return AutoModelForCausalLM.from_pretrained(
         model_id,
+        device_map="auto",
+        low_cpu_mem_usage=True,
+        token=os.getenv('HF_TOKEN'),
     )
 # Retry logic to load model with random delay
         model = load_model()
         model.eval()
     except Exception as e:
+        retry_delay = random.uniform(30, 60)  # Increased delay between retries
         logger.error(f"Failed to load model: {e}. Retrying in {retry_delay:.2f} seconds...")
         time.sleep(retry_delay)
 )
 # Retry logic to launch interface with random delay
+max_retries = 5
+retry_count = 0
+while retry_count < max_retries:
     try:
         interface.launch()
         break
     except Exception as e:
+        retry_delay = random.uniform(60, 120)  # Increased delay between retries
         logger.error(f"Failed to launch interface: {e}. Retrying in {retry_delay:.2f} seconds...")
+        retry_count += 1
         time.sleep(retry_delay)
 logger.debug("Chat interface initialized and launched")