ykirpichev
commited on
Commit
•
4f64cb9
1
Parent(s):
e5cefdc
Update app.py
Browse files
app.py
CHANGED
@@ -38,7 +38,7 @@ def synthesise(text):
|
|
38 |
with torch.no_grad():
|
39 |
outputs = model_mms(input_ids)
|
40 |
print("mms model", outputs)
|
41 |
-
print(outputs.audio[0])
|
42 |
return outputs.audio[0].cpu()
|
43 |
inputs = processor(text=text, return_tensors="pt")
|
44 |
speech = model.generate_speech(inputs["input_ids"].to(device), speaker_embeddings.to(device), vocoder=vocoder)
|
@@ -49,15 +49,7 @@ def synthesise(text):
|
|
49 |
def speech_to_speech_translation(audio):
|
50 |
translated_text = translate(audio)
|
51 |
synthesised_speech = synthesise(translated_text)
|
52 |
-
|
53 |
-
print(synthesised_speech)
|
54 |
-
synthesised_speech_numpy = synthesised_speech.numpy()
|
55 |
-
synthesised_speech_numpy += np.min(synthesised_speech_numpy)
|
56 |
-
synthesised_speech_numpy /= np.max(synthesised_speech_numpy)
|
57 |
-
# synthesised_speech = (synthesised_speech.numpy() * 32767).astype(np.int16)
|
58 |
-
synthesised_speech = np.clip((synthesised_speech_numpy*32767) .astype(np.int16), 0, 32767)
|
59 |
-
print(synthesised_speech)
|
60 |
-
# synthesised_speech = (((synthesised_speech.numpy() + 1) / 2.0) * 32767).astype(np.int16)
|
61 |
return 16000, synthesised_speech
|
62 |
|
63 |
|
|
|
38 |
with torch.no_grad():
|
39 |
outputs = model_mms(input_ids)
|
40 |
print("mms model", outputs)
|
41 |
+
# print(outputs.audio[0])
|
42 |
return outputs.audio[0].cpu()
|
43 |
inputs = processor(text=text, return_tensors="pt")
|
44 |
speech = model.generate_speech(inputs["input_ids"].to(device), speaker_embeddings.to(device), vocoder=vocoder)
|
|
|
49 |
def speech_to_speech_translation(audio):
|
50 |
translated_text = translate(audio)
|
51 |
synthesised_speech = synthesise(translated_text)
|
52 |
+
synthesised_speech = (synthesised_speech.numpy() * 32767).astype(np.int16)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
53 |
return 16000, synthesised_speech
|
54 |
|
55 |
|