Add restart feature if GPU overload
Browse files
app.py
CHANGED
@@ -20,8 +20,11 @@ title = "Speak with Llama2 70B"
|
|
20 |
DESCRIPTION = """# Speak with Llama2 70B"""
|
21 |
css = """.toast-wrap { display: none !important } """
|
22 |
|
|
|
|
|
|
|
23 |
|
24 |
-
|
25 |
|
26 |
system_message = "\nYou are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.\n\nIf a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information."
|
27 |
temperature = 0.9
|
@@ -103,17 +106,35 @@ def generate_speech(history):
|
|
103 |
|
104 |
|
105 |
for sentence in text_to_generate:
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
117 |
|
118 |
with gr.Blocks(title=title) as demo:
|
119 |
gr.Markdown(DESCRIPTION)
|
|
|
20 |
DESCRIPTION = """# Speak with Llama2 70B"""
|
21 |
css = """.toast-wrap { display: none !important } """
|
22 |
|
23 |
+
HF_TOKEN = os.environ.get("HF_TOKEN")
|
24 |
+
# will use api to restart space on a unrecoverable error
|
25 |
+
api = HfApi(token=HF_TOKEN)
|
26 |
|
27 |
+
repo_id = "ylacombe/voice-chat-with-lama"
|
28 |
|
29 |
system_message = "\nYou are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.\n\nIf a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information."
|
30 |
temperature = 0.9
|
|
|
106 |
|
107 |
|
108 |
for sentence in text_to_generate:
|
109 |
+
try:
|
110 |
+
|
111 |
+
# generate speech by cloning a voice using default settings
|
112 |
+
wav = tts.tts(text=sentence,
|
113 |
+
speaker_wav="examples/female.wav",
|
114 |
+
decoder_iterations=25,
|
115 |
+
decoder_sampler="dpm++2m",
|
116 |
+
speed=1.2,
|
117 |
+
language="en")
|
118 |
+
|
119 |
+
yield (sampling_rate, np.array(wav)) #np.array(wav + silence))
|
120 |
+
|
121 |
+
except RuntimeError as e :
|
122 |
+
if "device-side assert" in str(e):
|
123 |
+
# cannot do anything on cuda device side error, need tor estart
|
124 |
+
print(f"Exit due to: Unrecoverable exception caused by language:{language} prompt:{prompt}", flush=True)
|
125 |
+
gr.Warning("Unhandled Exception encounter, please retry in a minute")
|
126 |
+
print("Cuda device-assert Runtime encountered need restart")
|
127 |
+
if not DEVICE_ASSERT_DETECTED:
|
128 |
+
DEVICE_ASSERT_DETECTED=1
|
129 |
+
DEVICE_ASSERT_PROMPT=prompt
|
130 |
+
DEVICE_ASSERT_LANG=language
|
131 |
+
|
132 |
+
|
133 |
+
# HF Space specific.. This error is unrecoverable need to restart space
|
134 |
+
api.restart_space(repo_id=repo_id)
|
135 |
+
else:
|
136 |
+
print("RuntimeError: non device-side assert error:", str(e))
|
137 |
+
raise e
|
138 |
|
139 |
with gr.Blocks(title=title) as demo:
|
140 |
gr.Markdown(DESCRIPTION)
|