Irpan
asr
9510f4a
raw
history blame
1.55 kB
import gradio as gr
import asr
import tts
import util
mms_transcribe = gr.Interface(
fn=asr.transcribe,
inputs=[
gr.Audio(),
gr.Dropdown(
choices=[model for model in asr.models_info],
label="Select a Model for ASR",
value="ixxan/wav2vec2-large-mms-1b-uyghur-latin",
interactive=True
),
],
outputs=[
gr.Textbox(label="Uyghur Arabic Transcription"),
gr.Textbox(label="Uyghur Latin Transcription"),
],
#examples=util.asr_examples,
title="Speech-to-text",
description=(
"Transcribe Uyghur speech audio from a microphone or input file."
),
allow_flagging="never",
)
mms_synthesize = gr.Interface(
fn=tts.synthesize,
inputs=[
gr.Text(label="Input text"),
gr.Dropdown(
choices=[model for model in tts.models_info],
label="Select a Model for TTS",
value="Meta-MMS",
interactive=True
)
],
outputs=[
gr.Audio(label="Generated Audio"),
],
#examples=util.tts_examples,
title="Text-to-speech",
description=(
"Generate audio from input Uyghur text."
),
allow_flagging="never",
)
tabbed_interface = gr.TabbedInterface(
[mms_transcribe, mms_synthesize],
["Speech-to-text", "Text-to-speech"],
)
with gr.Blocks() as demo:
gr.Markdown("Comparision of STT and TTS models for Uyghur language.")
tabbed_interface.render()
if __name__ == "__main__":
demo.queue()
demo.launch()