Update app.py
Browse files
app.py
CHANGED
@@ -13,23 +13,22 @@ asr_pipe = pipeline("automatic-speech-recognition", model="openai/whisper-base",
|
|
13 |
|
14 |
|
15 |
|
16 |
-
model = VitsModel.from_pretrained("facebook/mms-tts-
|
17 |
-
processor = VitsTokenizer.from_pretrained("facebook/mms-tts-
|
18 |
|
19 |
|
20 |
|
21 |
|
22 |
def translate(audio):
|
23 |
-
outputs = asr_pipe(audio, max_new_tokens=256, generate_kwargs={"
|
24 |
return outputs["text"]
|
25 |
|
26 |
|
27 |
def synthesise(text):
|
28 |
inputs = processor(text=text, return_tensors="pt")
|
29 |
with torch.no_grad():
|
30 |
-
|
31 |
-
|
32 |
-
return speech
|
33 |
|
34 |
|
35 |
def speech_to_speech_translation(audio):
|
|
|
13 |
|
14 |
|
15 |
|
16 |
+
model = VitsModel.from_pretrained("facebook/mms-tts-spa")
|
17 |
+
processor = VitsTokenizer.from_pretrained("facebook/mms-tts-spa")
|
18 |
|
19 |
|
20 |
|
21 |
|
22 |
def translate(audio):
|
23 |
+
outputs = asr_pipe(audio, max_new_tokens=256, generate_kwargs={"language": "es","task": "transcribe"})
|
24 |
return outputs["text"]
|
25 |
|
26 |
|
27 |
def synthesise(text):
|
28 |
inputs = processor(text=text, return_tensors="pt")
|
29 |
with torch.no_grad():
|
30 |
+
output = model(**inputs)
|
31 |
+
return output['audio']
|
|
|
32 |
|
33 |
|
34 |
def speech_to_speech_translation(audio):
|