juulaii commited on
Commit
feba911
1 Parent(s): fd5ca93

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +46 -0
app.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+
3
+ #Get models
4
+ #ASR model for input speech
5
+ speech2text = gr.Interface.load("huggingface/facebook/wav2vec2-base-960h",
6
+ inputs=gr.inputs.Audio(label="Record Audio File", type="file", source = "microphone"))
7
+
8
+ #translates english to spanish text
9
+ translator = gr.Interface.load("huggingface/Helsinki-NLP/opus-mt-en-es",
10
+ outputs=gr.outputs.Textbox(label="English to Spanish Translated Text"))
11
+ #TTS model for output speech
12
+ text2speech = gr.Interface.load("huggingface/facebook/tts_transformer-es-css10",
13
+ outputs=gr.outputs.Audio(label="English to Spanish Translated Audio"),
14
+ allow_flagging="never")
15
+
16
+
17
+ translate = gr.Series(speech2text, translator) #outputs Spanish text translation
18
+ en2es = gr.Series(translate, text2speech) #outputs Spanish audio
19
+ ui = gr.Parallel(translate, en2es) #allows transcription of Spanish audio
20
+
21
+ #gradio interface
22
+ ui.title = "English to Spanish Speech Translator"
23
+ ui.description = """<center>A useful tool in translating English to Spanish audio. All pre-trained models are found in huggingface.</center>"""
24
+ ui.examples = [['ljspeech.wav'],['ljspeech2.wav',]]
25
+ ui.theme = "peach"
26
+ ui.article = article=""<h2>Pre-trained model Information</h2>
27
+ <h3>Automatic Speech Recognition</h3>
28
+ <p style='text-align: justify'>The model used for the ASR part of this space is from
29
+ <https://huggingface.co/facebook/wav2vec2-base-960h> which is pretrained and fine-tuned on <b>960 hours of
30
+ Librispeech</b> on 16kHz sampled speech audio. This model has a <b>word error rate (WER)</b> of <b>8.6 percent on
31
+ noisy speech</b> and <b>5.2 percent on clean speech</b> on the standard LibriSpeech benchmark. More information can be
32
+ found on its website at <https://ai.facebook.com/blog/wav2vec-20-learning-the-structure-of-speech-from-raw-audio/> and
33
+ original model is under <https://github.com/pytorch/fairseq/tree/main/examples/wav2vec>.</p>
34
+ <h3>Text Translator</h3>
35
+ <p style='text-align: justify'>The English to Spanish text translator pre-trained model is from <Helsinki-NLP/opus-
36
+ mt-en-es> which is part of the <b>The Tatoeba Translation Challenge (v2021-08-07)</b> as seen from its github repo at
37
+ <https://github.com/Helsinki-NLP/Tatoeba-Challenge>. This project aims to develop machine translation in real-world
38
+ cases for many languages. </p>
39
+ <h3>Text to Speech</h3>
40
+ <p style='text-align: justify'> The TTS model used is from <https://huggingface.co/facebook/tts_transformer-es-css10>.
41
+ This model uses the <b>Fairseq(-py)</b> sequence modeling toolkit for speech synthesis, in this case, specifically TTS
42
+ for Spanish. More information can be seen on their git at <https://github.com/pytorch/fairseq>. </p>
43
+ ""
44
+
45
+
46
+ ui.launch(inbrowser=True)