speech-to-speech-translation

Sleeping

ykirpichev commited on Jul 23, 2023

Commit

4f64cb9

•

1 Parent(s): e5cefdc

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -38,7 +38,7 @@ def synthesise(text):
     with torch.no_grad():
         outputs = model_mms(input_ids)
     print("mms model", outputs)
-    print(outputs.audio[0])
     return outputs.audio[0].cpu()
     inputs = processor(text=text, return_tensors="pt")
     speech = model.generate_speech(inputs["input_ids"].to(device), speaker_embeddings.to(device), vocoder=vocoder)
@@ -49,15 +49,7 @@ def synthesise(text):
 def speech_to_speech_translation(audio):
     translated_text = translate(audio)
     synthesised_speech = synthesise(translated_text)
-    # (((speech["audio"].cpu().numpy()) + 1) / 2.)* 32767
-    print(synthesised_speech)
-    synthesised_speech_numpy = synthesised_speech.numpy()
-    synthesised_speech_numpy += np.min(synthesised_speech_numpy)
-    synthesised_speech_numpy /= np.max(synthesised_speech_numpy)
-    # synthesised_speech = (synthesised_speech.numpy() * 32767).astype(np.int16)
-    synthesised_speech = np.clip((synthesised_speech_numpy*32767) .astype(np.int16), 0, 32767)
-    print(synthesised_speech)
-    # synthesised_speech = (((synthesised_speech.numpy() + 1) / 2.0) * 32767).astype(np.int16)
     return 16000, synthesised_speech

     with torch.no_grad():
         outputs = model_mms(input_ids)
     print("mms model", outputs)
+    # print(outputs.audio[0])
     return outputs.audio[0].cpu()
     inputs = processor(text=text, return_tensors="pt")
     speech = model.generate_speech(inputs["input_ids"].to(device), speaker_embeddings.to(device), vocoder=vocoder)
 def speech_to_speech_translation(audio):
     translated_text = translate(audio)
     synthesised_speech = synthesise(translated_text)
+    synthesised_speech = (synthesised_speech.numpy() * 32767).astype(np.int16)
     return 16000, synthesised_speech