Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -25,14 +25,16 @@ tokenizer = GemmaTokenizerFast.from_pretrained(model_id)
|
|
25 |
|
26 |
# Function to load model with GPU availability check
|
27 |
def load_model():
|
|
|
28 |
attempts = 0
|
29 |
-
while attempts <
|
30 |
if torch.cuda.is_available():
|
31 |
logger.debug("GPU is available. Proceeding with GPU setup.")
|
32 |
try:
|
33 |
return AutoModelForCausalLM.from_pretrained(
|
34 |
model_id,
|
35 |
-
device_map="auto",
|
|
|
36 |
)
|
37 |
except Exception as e:
|
38 |
logger.error(f"Error initializing model with GPU: {e}. Retrying...")
|
@@ -47,7 +49,9 @@ def load_model():
|
|
47 |
logger.warning("Falling back to CPU setup after multiple attempts.")
|
48 |
return AutoModelForCausalLM.from_pretrained(
|
49 |
model_id,
|
50 |
-
device_map="auto",
|
|
|
|
|
51 |
)
|
52 |
|
53 |
# Retry logic to load model with random delay
|
@@ -57,7 +61,7 @@ while model is None:
|
|
57 |
model = load_model()
|
58 |
model.eval()
|
59 |
except Exception as e:
|
60 |
-
retry_delay = random.uniform(
|
61 |
logger.error(f"Failed to load model: {e}. Retrying in {retry_delay:.2f} seconds...")
|
62 |
time.sleep(retry_delay)
|
63 |
|
@@ -121,13 +125,16 @@ interface = gr.Interface(
|
|
121 |
)
|
122 |
|
123 |
# Retry logic to launch interface with random delay
|
124 |
-
|
|
|
|
|
125 |
try:
|
126 |
interface.launch()
|
127 |
break
|
128 |
except Exception as e:
|
129 |
-
retry_delay = random.uniform(
|
130 |
logger.error(f"Failed to launch interface: {e}. Retrying in {retry_delay:.2f} seconds...")
|
|
|
131 |
time.sleep(retry_delay)
|
132 |
|
133 |
logger.debug("Chat interface initialized and launched")
|
|
|
25 |
|
26 |
# Function to load model with GPU availability check
|
27 |
def load_model():
|
28 |
+
max_attempts = 5
|
29 |
attempts = 0
|
30 |
+
while attempts < max_attempts:
|
31 |
if torch.cuda.is_available():
|
32 |
logger.debug("GPU is available. Proceeding with GPU setup.")
|
33 |
try:
|
34 |
return AutoModelForCausalLM.from_pretrained(
|
35 |
model_id,
|
36 |
+
device_map="auto",
|
37 |
+
torch_dtype=torch.bfloat16,
|
38 |
)
|
39 |
except Exception as e:
|
40 |
logger.error(f"Error initializing model with GPU: {e}. Retrying...")
|
|
|
49 |
logger.warning("Falling back to CPU setup after multiple attempts.")
|
50 |
return AutoModelForCausalLM.from_pretrained(
|
51 |
model_id,
|
52 |
+
device_map="auto",
|
53 |
+
low_cpu_mem_usage=True,
|
54 |
+
token=os.getenv('HF_TOKEN'),
|
55 |
)
|
56 |
|
57 |
# Retry logic to load model with random delay
|
|
|
61 |
model = load_model()
|
62 |
model.eval()
|
63 |
except Exception as e:
|
64 |
+
retry_delay = random.uniform(30, 60) # Increased delay between retries
|
65 |
logger.error(f"Failed to load model: {e}. Retrying in {retry_delay:.2f} seconds...")
|
66 |
time.sleep(retry_delay)
|
67 |
|
|
|
125 |
)
|
126 |
|
127 |
# Retry logic to launch interface with random delay
|
128 |
+
max_retries = 5
|
129 |
+
retry_count = 0
|
130 |
+
while retry_count < max_retries:
|
131 |
try:
|
132 |
interface.launch()
|
133 |
break
|
134 |
except Exception as e:
|
135 |
+
retry_delay = random.uniform(60, 120) # Increased delay between retries
|
136 |
logger.error(f"Failed to launch interface: {e}. Retrying in {retry_delay:.2f} seconds...")
|
137 |
+
retry_count += 1
|
138 |
time.sleep(retry_delay)
|
139 |
|
140 |
logger.debug("Chat interface initialized and launched")
|