Spaces:

ixxan
/

uyghur-speech-models

Running

File size: 1,550 Bytes

20aa839
3a18b3b
bef8623
660776b
20aa839
 
3a18b3b
20aa839
cafc4cf
3a18b3b
7bc4048
ed17acd
3a18b3b
 
1dfec92
 
 
 
 
20aa839
9510f4a
20aa839
 
1dfec92
20aa839
 
 
 
bef8623
 
 
 
1dfec92
7b99c0e
ed17acd
b9ff9e2
 
 
bef8623
 
30e5da4
bef8623
9510f4a
bef8623
ed17acd
 
 
bef8623
 
20aa839
 
bef8623
 
20aa839
 
 
7bc4048
20aa839

import gradio as gr
import asr
import tts
import util

mms_transcribe = gr.Interface(
    fn=asr.transcribe,
    inputs=[
        gr.Audio(),
        gr.Dropdown(
            choices=[model for model in asr.models_info],
            label="Select a Model for ASR",
            value="ixxan/wav2vec2-large-mms-1b-uyghur-latin",
            interactive=True
        ),
    ],
    outputs=[
        gr.Textbox(label="Uyghur Arabic Transcription"),
        gr.Textbox(label="Uyghur Latin Transcription"),
    ],
    #examples=util.asr_examples,
    title="Speech-to-text",
    description=(
        "Transcribe Uyghur speech audio from a microphone or input file."
    ),
    allow_flagging="never",
)

mms_synthesize = gr.Interface(
    fn=tts.synthesize,
    inputs=[
        gr.Text(label="Input text"),
        gr.Dropdown(
            choices=[model for model in tts.models_info],
            label="Select a Model for TTS",
            value="Meta-MMS",
            interactive=True
        )
    ],
    outputs=[
        gr.Audio(label="Generated Audio"),
    ],
    #examples=util.tts_examples,
    title="Text-to-speech",
    description=(
        "Generate audio from input Uyghur text."
        ),
    allow_flagging="never",
)

tabbed_interface = gr.TabbedInterface(
    [mms_transcribe, mms_synthesize],
    ["Speech-to-text", "Text-to-speech"],
)

with gr.Blocks() as demo:
    gr.Markdown("Comparision of STT and TTS models for Uyghur language.")
    tabbed_interface.render()

if __name__ == "__main__":
    demo.queue()
    demo.launch()