Spaces:
Runtime error
Runtime error
File size: 3,773 Bytes
feba911 c52add3 36e373b 40817ec f794f86 c52add3 feba911 c52add3 feba911 c52add3 feba911 2ca0f71 933b458 feba911 33b8d42 d3cc82a db5c8d0 a95d76f d3cc82a 33b8d42 241ba79 d3cc82a 8ce4e5c d3cc82a 8ce4e5c 33b8d42 a95d76f d3cc82a 33b8d42 241ba79 d3cc82a 33b8d42 933b458 feba911 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 |
import gradio as gr
#Get models
#ASR model for input speech
ui = gr.Interface.load("huggingface/facebook/hubert-large-ls960-ft",
inputs=gr.inputs.Audio(label="Record Audio", type="filepath", source = "microphone"))
#translates English to Spanish text
#translator = gr.Interface.load("huggingface/Helsinki-NLP/opus-mt-en-es",
# outputs=gr.outputs.Textbox(label="English to Spanish Translated Text"))
#TTS model for output speech
#text2speech = gr.Interface.load("huggingface/facebook/tts_transformer-es-css10",
# outputs=gr.outputs.Audio(label="English to Spanish Translated Audio"),
# allow_flagging="never")
#ui = gr.Series(speech2text, translator) #outputs Spanish text translation
#en2es = gr.Series(translate, text2speech) #outputs Spanish audio
#ui = gr.Parallel(translate, en2es) #allows transcription of Spanish audio
#gradio interface
ui.title = "English to Spanish Speech Translator"
ui.description = """<center>A useful tool in translating English to Spanish audio. All pre-trained models are found in huggingface.</center>"""
ui.examples = [['ljspeech.wav'],['ljspeech2.wav'], ['longspeech.wav']]
ui.allow_flagging = "never"
ui.theme = "peach"
ui.article = """<h2>Pre-trained model Information</h2>
<h3>Automatic Speech Recognition</h3>
<p style='text-align: justify'>The model used for the ASR part of this space is from
<a href=\"https://huggingface.co/facebook/hubert-large-ls960-ft">hubert-large-ls960-ft</a> which is pretrained and fine-tuned on <b>960 hours of
Librispeech</b> on 16kHz sampled speech audio. This model has a self-reported <b>word error rate (WER)</b> of <b>1.9
percent</b> and ranks first in <i>paperswithcode</i> for ASR on Librispeech. More information can be
found on its website at <a href=\"https://ai.facebook.com/blog/hubert-self-supervised-representation-learning-for-speech-
recognition-
generation-and-compression">hubert-self</a> and
original model is under <a href=\"https://github.com/pytorch/fairseq/tree/main/examples/hubert">pytorch/fairseq</a>.</p>
<h3>Text Translator</h3>
<p style='text-align: justify'>The English to Spanish text translator pre-trained model is from
<a href=\"https://huggingface.co/Helsinki-NLP/opus-mt-en-es">Helsinki-NLP/opus-mt-en-es</a> which is part of the <b>The
Tatoeba Translation Challenge
(v2021-08-07)</b> as seen from its github repo at
<a href=\"https://github.com/Helsinki-NLP/Tatoeba-Challenge">Helsinki-NLP/Tatoeba-Challenge</a>. This project aims to develop
machine
translation in real-world
cases for many languages. </p>
<h3>Text to Speech</h3>
<p style='text-align: justify'> The TTS model used is from <a href=\"https://huggingface.co/facebook/tts_transformer-es-
css10">facebook/tts_transformer-es-
css10</a>.
This model uses the <b>Fairseq(-py)</b> sequence modeling toolkit for speech synthesis, in this case, specifically TTS
for Spanish. More information can be seen on their git at
<a href=\"https://github.com/pytorch/fairseq/tree/main/examples/speech_synthesis">speech_synthesis</a>. </p>
"""
ui.launch(inbrowser=True)
|