File size: 2,998 Bytes
8e732f9
6290527
83cc343
d4c1570
83cc343
22d6633
83cc343
22d6633
83cc343
22d6633
83cc343
d4c1570
 
 
 
 
83cc343
b8c6cbb
83cc343
b8c6cbb
83cc343
b8c6cbb
83cc343
b8c6cbb
83cc343
b8c6cbb
83cc343
b8c6cbb
83cc343
b8c6cbb
83cc343
b8c6cbb
83cc343
 
 
 
af95ac7
f1dd9cb
b8c6cbb
83cc343
b8c6cbb
83cc343
b8c6cbb
83cc343
b8c6cbb
83cc343
b8c6cbb
83cc343
b8c6cbb
83cc343
b8c6cbb
83cc343
 
7e0be89
83cc343
 
 
 
 
 
e819cc9
83cc343
e819cc9
83cc343
 
d4c1570
448dc9c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
import os
os.system("pip install gradio==2.8.0b2")
import gradio as gr
import numpy as np

title = "XM_Tranformer"

description = "Gradio Demo for fairseq S2T: Fast Speech-to-Text Modeling with fairseq. To use it, simply add your audio, or click one of the examples to load them. Read more at the links below."

article = "<p style='text-align: center'><a href='https://arxiv.org/abs/2010.05171' target='_blank'>fairseq S2T: Fast Speech-to-Text Modeling with fairseq</a> | <a href='https://github.com/pytorch/fairseq/tree/main/examples/speech_to_text' target='_blank'>Github Repo</a></p>"

examples = [
  ["common_voice_es_en.flac","xm_transformer_600m-es_en-multi_domain"],
  ["common_voice_ru_18945535.flac","xm_transformer_600m-ru_en-multi_domain"],
  ["common_voice_fr_19731305.mp3","xm_transformer_600m-fr_en-multi_domain"]
]

io1 = gr.Interface.load("huggingface/facebook/xm_transformer_600m-es_en-multi_domain")

io2 = gr.Interface.load("huggingface/facebook/xm_transformer_600m-ru_en-multi_domain")

io3 = gr.Interface.load("huggingface/facebook/xm_transformer_600m-en_ru-multi_domain")

io4 = gr.Interface.load("huggingface/facebook/xm_transformer_600m-en_es-multi_domain")

io5 = gr.Interface.load("huggingface/facebook/xm_transformer_600m-en_zh-multi_domain")

io6 = gr.Interface.load("huggingface/facebook/xm_transformer_600m-fr_en-multi_domain")

io7 = gr.Interface.load("huggingface/facebook/xm_transformer_600m-en_ar-multi_domain")

io8 = gr.Interface.load("huggingface/facebook/xm_transformer_600m-en_tr-multi_domain")


    
def inference(text,model):
   outtext = np.zeros(shape=(5,2))
   print(text, model)
   if model == "xm_transformer_600m-es_en-multi_domain":
        outtext = io1(text)
   elif model == "xm_transformer_600m-ru_en-multi_domain":
        outtext = io2(text)
   elif model == "xm_transformer_600m-en_ru-multi_domain":
        outtext = io3(text)
   elif model == "xm_transformer_600m-en_es-multi_domain":
        outtext = io4(text)
   elif model == "xm_transformer_600m-en_zh-multi_domain":
        outtext = io5(text)
   elif model == "xm_transformer_600m-fr_en-multi_domain":
        outtext = io6(text)
   elif model == "xm_transformer_600m-en_ar-multi_domain":
        outtext = io7(text)
   else:
        print('debug')
        outtext = io8(text)
   return outtext 


gr.Interface(
    inference, 
    [gr.inputs.Audio(label="Input",type="filepath"),gr.inputs.Dropdown(choices=["xm_transformer_600m-es_en-multi_domain","xm_transformer_600m-ru_en-multi_domain","xm_transformer_600m-en_ru-multi_domain","xm_transformer_600m-en_es-multi_domain","xm_transformer_600m-en_zh-multi_domain","xm_transformer_600m-fr_en-multi_domain","xm_transformer_600m-en_ar-multi_domain","facebook/xm_transformer_600m-en_tr-multi_domain"], type="value", default="xm_transformer_600m-es_en-multi_domain", label="model")
],
    gr.outputs.Audio(label="Output"),
    article=article,
    title=title,
    examples=examples,
    description=description).launch(enable_queue=True)