metaambod / app.py
unijoh's picture
Update app.py
ff3a5da verified
raw
history blame
1.78 kB
import gradio as gr
import librosa
from asr import transcribe
from tts import synthesize
def identify(microphone, file_upload):
LID_SAMPLING_RATE = 16_000
if (microphone is not None) and (file_upload is not None):
return "WARNING: Using microphone input. Uploaded file will be ignored."
if (microphone is None) and (file_upload is None):
return "ERROR: Provide an audio file or use the microphone."
audio_fp = microphone if microphone is not None else file_upload
inputs = librosa.load(audio_fp, sr=LID_SAMPLING_RATE, mono=True)[0]
return {"Faroese": 1.0}
demo = gr.Blocks()
mms_transcribe = gr.Interface(
fn=transcribe,
inputs=[
gr.Audio(source="microphone", type="filepath"),
gr.Audio(source="upload", type="filepath"),
],
outputs="text",
title="Speech-to-text",
description="Transcribe audio!",
allow_flagging="never",
)
mms_synthesize = gr.Interface(
fn=synthesize,
inputs=[
gr.Text(label="Input text"),
gr.Slider(minimum=0.1, maximum=4.0, value=1.0, step=0.1, label="Speed"),
],
outputs=gr.Audio(label="Generated Audio", type="numpy"),
title="Text-to-speech",
description="Generate audio!",
allow_flagging="never",
)
mms_identify = gr.Interface(
fn=identify,
inputs=[
gr.Audio(source="microphone", type="filepath"),
gr.Audio(source="upload", type="filepath"),
],
outputs=gr.Label(num_top_classes=1),
title="Language Identification",
description="Identify the language of audio!",
allow_flagging="never",
)
with demo:
gr.TabbedInterface(
[mms_synthesize, mms_transcribe, mms_identify],
["Text-to-speech", "Speech-to-text", "Language Identification"],
)
demo.launch()