Spaces:
yashrma
/
Runtime error

File size: 4,087 Bytes
7bcf8d7
 
 
 
 
62799e5
7bcf8d7
62799e5
 
7bcf8d7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96ce9d1
62799e5
96ce9d1
 
 
 
 
 
813fffa
96ce9d1
 
813fffa
 
62799e5
 
7bcf8d7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62799e5
 
7bcf8d7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5cc287f
7bcf8d7
813fffa
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
import gradio as gr
import librosa
from asr import transcribe, ASR_EXAMPLES, ASR_LANGUAGES, ASR_NOTE
from tts import synthesize, TTS_EXAMPLES, TTS_LANGUAGES
from lid import identify, LID_EXAMPLES
from generate import generate, GenExamples

MAX_MAX_NEW_TOKENS = 2048
DEFAULT_MAX_NEW_TOKENS = 1024

demo = gr.Blocks()

mms_select_source_trans = gr.Radio(
    ["Record from Mic", "Upload audio"],
    label="Audio input",
    value="Record from Mic",
)
mms_mic_source_trans = gr.Audio(source="microphone", type="filepath", label="Use mic")
mms_upload_source_trans = gr.Audio(
    source="upload", type="filepath", label="Upload file", visible=False
)
mms_transcribe = gr.Interface(
    fn=transcribe,
    inputs=[
        mms_select_source_trans,
        mms_mic_source_trans,
        mms_upload_source_trans,
        gr.Dropdown(
            [f"{k} ({v})" for k, v in ASR_LANGUAGES.items()],
            label="Language",
            value="eng English",
        ),
        # gr.Checkbox(label="Use Language Model (if available)", default=True),
    ],
    outputs="text",
    examples=ASR_EXAMPLES,
    title="Speech-to-text",
    description=(
        "Transcribe audio from a microphone or input file in your desired language."
    ),
    article=ASR_NOTE,
    allow_flagging="never",
)

mms_synthesize = gr.Interface(
    fn=synthesize,
    inputs=[
        gr.Text(label="Input text"),
        gr.Dropdown(
            [f"{k} ({v})" for k, v in TTS_LANGUAGES.items()],
            label="Language",
            value="eng English",
        ),
        gr.Slider(minimum=0.1, maximum=4.0, value=1.0, step=0.1, label="Speed"),
    ],
    outputs=[
        gr.Audio(label="Generated Audio", type="numpy"),
        gr.Text(label="Filtered text after removing OOVs"),
    ],
    examples=TTS_EXAMPLES,
    title="Text-to-speech",
    description=("Generate audio in your desired language from input text."),
    allow_flagging="never",
)

chat_interface = gr.Interface(
    fn=generate,
    inputs=[
        gr.Textbox(label="Message", type="text"),
        gr.Textbox(label="Chat History", type="text"),
        gr.Textbox(label="System prompt", type="text"),
    ],
    outputs=gr.Textbox(),
    # live=True,
    title="Chat Interface",
    description="Interactive chat interface using Hugging Face Transformers.",
    # interpretation="default",
    # allow_flagging=False,
)

mms_select_source_iden = gr.Radio(
    ["Record from Mic", "Upload audio"],
    label="Audio input",
    value="Record from Mic",
)
mms_mic_source_iden = gr.Audio(source="microphone", type="filepath", label="Use mic")
mms_upload_source_iden = gr.Audio(
    source="upload", type="filepath", label="Upload file", visible=False
)
mms_identify = gr.Interface(
    fn=identify,
    inputs=[
        mms_select_source_iden,
        mms_mic_source_iden,
        mms_upload_source_iden,
    ],
    outputs=gr.Label(num_top_classes=10),
    examples=LID_EXAMPLES,
    title="Language Identification",
    description=("Identity the language of input audio."),
    allow_flagging="never",
)

tabbed_interface = gr.TabbedInterface(
    [mms_transcribe, mms_synthesize, mms_identify, chat_interface],
    ["Speech-to-text", "Text-to-speech", "Language Identification", "Chat with Llama"],
)

with gr.Blocks() as demo:

    tabbed_interface.render()
    mms_select_source_trans.change(
        lambda x: [
            gr.update(visible=True if x == "Record from Mic" else False),
            gr.update(visible=True if x == "Upload audio" else False),
        ],
        inputs=[mms_select_source_trans],
        outputs=[mms_mic_source_trans, mms_upload_source_trans],
        queue=False,
    )
    mms_select_source_iden.change(
        lambda x: [
            gr.update(visible=True if x == "Record from Mic" else False),
            gr.update(visible=True if x == "Upload audio" else False),
        ],
        inputs=[mms_select_source_iden],
        outputs=[mms_mic_source_iden, mms_upload_source_iden],
        queue=False,
    )

demo.queue(concurrency_count=3)
demo.launch()
# demo.queue(max_size=20).launch()