File size: 3,009 Bytes
8e732f9
6290527
83cc343
d4c1570
83cc343
22d6633
83cc343
22d6633
83cc343
22d6633
83cc343
d4c1570
 
 
59b9a05
 
d4c1570
83cc343
b8c6cbb
83cc343
b8c6cbb
83cc343
b8c6cbb
83cc343
b8c6cbb
83cc343
b8c6cbb
83cc343
b8c6cbb
83cc343
b8c6cbb
83cc343
b8c6cbb
83cc343
 
 
 
b8c6cbb
83cc343
b8c6cbb
83cc343
b8c6cbb
83cc343
b8c6cbb
83cc343
b8c6cbb
83cc343
b8c6cbb
83cc343
b8c6cbb
83cc343
 
 
 
 
 
 
 
e819cc9
83cc343
e819cc9
83cc343
 
d4c1570
807bf8b
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
import os
os.system("pip install gradio==2.8.0b2")
import gradio as gr
import numpy as np

title = "XM_Tranformer"

description = "Gradio Demo for fairseq S2T: Fast Speech-to-Text Modeling with fairseq. To use it, simply add your audio, or click one of the examples to load them. Read more at the links below."

article = "<p style='text-align: center'><a href='https://arxiv.org/abs/2010.05171' target='_blank'>fairseq S2T: Fast Speech-to-Text Modeling with fairseq</a> | <a href='https://github.com/pytorch/fairseq/tree/main/examples/speech_to_text' target='_blank'>Github Repo</a></p>"

examples = [
  ["common_voice_es_en.flac","xm_transformer_600m-es_en-multi_domain"],
  ["common_voice_ru_18945535.flac","xm_transformer_600m-ru_en-multi_domain"],
  ["common_voice_fr_19731305.mp3","xm_transformer_600m-fr_en-multi_domain"],
  ["common_voice_en_ru.mp3","xm_transformer_600m-en_ru-multi_domain"]
]

io1 = gr.Interface.load("huggingface/facebook/xm_transformer_600m-es_en-multi_domain")

io2 = gr.Interface.load("huggingface/facebook/xm_transformer_600m-ru_en-multi_domain")

io3 = gr.Interface.load("huggingface/facebook/xm_transformer_600m-en_ru-multi_domain")

io4 = gr.Interface.load("huggingface/facebook/xm_transformer_600m-en_es-multi_domain")

io5 = gr.Interface.load("huggingface/facebook/xm_transformer_600m-en_zh-multi_domain")

io6 = gr.Interface.load("huggingface/facebook/xm_transformer_600m-fr_en-multi_domain")

io7 = gr.Interface.load("huggingface/facebook/xm_transformer_600m-en_ar-multi_domain")

io8 = gr.Interface.load("huggingface/facebook/xm_transformer_600m-en_tr-multi_domain")


    
def inference(text,model):
   if model == "xm_transformer_600m-es_en-multi_domain":
        outtext = io1(text)
   elif model == "xm_transformer_600m-ru_en-multi_domain":
        outtext = io2(text)
   elif model == "xm_transformer_600m-en_ru-multi_domain":
        outtext = io3(text)
   elif model == "xm_transformer_600m-en_es-multi_domain":
        outtext = io4(text)
   elif model == "xm_transformer_600m-en_zh-multi_domain":
        outtext = io5(text)
   elif model == "xm_transformer_600m-fr_en-multi_domain":
        outtext = io6(text)
   elif model == "xm_transformer_600m-en_ar-multi_domain":
        outtext = io7(text)
   else:
        outtext = io8(text)
   return outtext 


gr.Interface(
    inference, 
    [gr.inputs.Audio(label="Input",type="filepath"),gr.inputs.Dropdown(choices=["xm_transformer_600m-es_en-multi_domain","xm_transformer_600m-ru_en-multi_domain","xm_transformer_600m-en_ru-multi_domain","xm_transformer_600m-en_es-multi_domain","xm_transformer_600m-en_zh-multi_domain","xm_transformer_600m-fr_en-multi_domain","xm_transformer_600m-en_ar-multi_domain","facebook/xm_transformer_600m-en_tr-multi_domain"], type="value", default="xm_transformer_600m-es_en-multi_domain", label="model")
],
    gr.outputs.Audio(label="Output"),
    article=article,
    title=title,
    examples=examples,
    description=description).launch(enable_queue=True,cache_examples=True)