Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -30,21 +30,6 @@ def parse_codeblock(text):
|
|
30 |
lines[i] = "<br/>" + line.replace("<", "<").replace(">", ">")
|
31 |
return "".join(lines)
|
32 |
|
33 |
-
def inference(audio):
|
34 |
-
audio = whisper.load_audio(audio)
|
35 |
-
print("loading finished")
|
36 |
-
audio = whisper.pad_or_trim(audio)
|
37 |
-
print("audio trimed")
|
38 |
-
mel = whisper.log_mel_spectrogram(audio).to(model.device)
|
39 |
-
print("spectro finished")
|
40 |
-
_, probs = model.detect_language(mel)
|
41 |
-
print("lang detected")
|
42 |
-
options = whisper.DecodingOptions(fp16 = False)
|
43 |
-
|
44 |
-
result = whisper.decode(model, mel, options)
|
45 |
-
|
46 |
-
print(result.text)
|
47 |
-
return result.text
|
48 |
|
49 |
|
50 |
#Load Whisper-small
|
@@ -60,7 +45,7 @@ tokenizer = VitsTokenizer.from_pretrained("facebook/mms-tts-eng")
|
|
60 |
|
61 |
# Define a function to translate an audio, in english here
|
62 |
def translate(audio):
|
63 |
-
|
64 |
outputs = pipe(audio, max_new_tokens=256,
|
65 |
generate_kwargs={"task": "translate"})
|
66 |
return outputs["text"]
|
@@ -174,13 +159,12 @@ def predict(transType, language, audio, audio_mic = None):
|
|
174 |
print("debug1:", audio,"debug2", audio_mic)
|
175 |
if not audio and audio_mic:
|
176 |
audio = audio_mic
|
177 |
-
st = gr.State([])
|
178 |
-
return "Tell me about the swedish king in 1995!?", gpt_predict("Tell me about the swedish king in 1995!?",st), None
|
179 |
if transType == "Text":
|
180 |
-
return translate(audio), None
|
181 |
if transType == "GPT answer":
|
182 |
req = translate(audio)
|
183 |
-
|
|
|
184 |
if transType == "Audio":
|
185 |
return speech_to_speech_translation(audio)
|
186 |
|
@@ -209,7 +193,7 @@ demo = gr.Interface(
|
|
209 |
|
210 |
],
|
211 |
outputs=[
|
212 |
-
gr.Text(label="Text translation"),gr.Text(label="
|
213 |
],
|
214 |
title=title,
|
215 |
description=description,
|
|
|
30 |
lines[i] = "<br/>" + line.replace("<", "<").replace(">", ">")
|
31 |
return "".join(lines)
|
32 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
|
34 |
|
35 |
#Load Whisper-small
|
|
|
45 |
|
46 |
# Define a function to translate an audio, in english here
|
47 |
def translate(audio):
|
48 |
+
|
49 |
outputs = pipe(audio, max_new_tokens=256,
|
50 |
generate_kwargs={"task": "translate"})
|
51 |
return outputs["text"]
|
|
|
159 |
print("debug1:", audio,"debug2", audio_mic)
|
160 |
if not audio and audio_mic:
|
161 |
audio = audio_mic
|
|
|
|
|
162 |
if transType == "Text":
|
163 |
+
return translate(audio), None, None
|
164 |
if transType == "GPT answer":
|
165 |
req = translate(audio)
|
166 |
+
st = gr.State([])
|
167 |
+
return req, gpt_predict(req,st), None
|
168 |
if transType == "Audio":
|
169 |
return speech_to_speech_translation(audio)
|
170 |
|
|
|
193 |
|
194 |
],
|
195 |
outputs=[
|
196 |
+
gr.Text(label="Text translation"),gr.Text(label="GPT answer"),gr.Audio(label="Audio translation",type = "numpy")
|
197 |
],
|
198 |
title=title,
|
199 |
description=description,
|