Spaces:

course-demos
/

speech-to-speech-translation

Running on T4

sanchit-gandhi HF staff commited on Jul 6, 2023

Commit

f805e49

•

1 Parent(s): 4666f0a

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -39,10 +39,22 @@ def speech_to_speech_translation(audio):
     return 16000, synthesised_speech
 demo = gr.Interface(
     fn=speech_to_speech_translation,
     inputs=gr.Audio(type="filepath"),
     outputs=gr.Audio(label="Generated Speech", type="numpy"),
     examples=[["./example.wav"]],
 )
 demo.launch()

     return 16000, synthesised_speech
+title = "Cascaded STST"
+description = """
+Demo for cascaded speech-to-speech translation (STST), mapping from source speech in any language to target speech in English:
+![Cascaded STST](https://huggingface.co/datasets/huggingface-course/audio-course-images/resolve/main/s2st_cascaded.png "Diagram of cascaded speech to speech translation")
+Demo uses OpenAI's [Whisper Base](https://huggingface.co/openai/whisper-base) model for speech translation, and Microsoft's
+[SpeechT5 TTS](https://huggingface.co/microsoft/speecht5_tts) model for text-to-speech.
+"""
 demo = gr.Interface(
     fn=speech_to_speech_translation,
     inputs=gr.Audio(type="filepath"),
     outputs=gr.Audio(label="Generated Speech", type="numpy"),
     examples=[["./example.wav"]],
+    title=title,
+    description=description,
 )
 demo.launch()