|
import gradio as gr |
|
import librosa |
|
from asr import transcribe |
|
from tts import synthesize |
|
|
|
def identify(microphone, file_upload): |
|
LID_SAMPLING_RATE = 16_000 |
|
|
|
if (microphone is not None) and (file_upload is not None): |
|
return "WARNING: Using microphone input. Uploaded file will be ignored." |
|
|
|
if (microphone is None) and (file_upload is None): |
|
return "ERROR: Provide an audio file or use the microphone." |
|
|
|
audio_fp = microphone if microphone is not None else file_upload |
|
inputs = librosa.load(audio_fp, sr=LID_SAMPLING_RATE, mono=True)[0] |
|
|
|
return {"Faroese": 1.0} |
|
|
|
demo = gr.Blocks() |
|
|
|
mms_transcribe = gr.Interface( |
|
fn=transcribe, |
|
inputs=[ |
|
gr.Audio(source="microphone", type="filepath"), |
|
gr.Audio(source="upload", type="filepath"), |
|
], |
|
outputs="text", |
|
title="Speech-to-text", |
|
description="Transcribe audio!", |
|
allow_flagging="never", |
|
) |
|
|
|
mms_synthesize = gr.Interface( |
|
fn=synthesize, |
|
inputs=[ |
|
gr.Text(label="Input text"), |
|
gr.Slider(minimum=0.1, maximum=4.0, value=1.0, step=0.1, label="Speed"), |
|
], |
|
outputs=gr.Audio(label="Generated Audio", type="numpy"), |
|
title="Text-to-speech", |
|
description="Generate audio!", |
|
allow_flagging="never", |
|
) |
|
|
|
mms_identify = gr.Interface( |
|
fn=identify, |
|
inputs=[ |
|
gr.Audio(source="microphone", type="filepath"), |
|
gr.Audio(source="upload", type="filepath"), |
|
], |
|
outputs=gr.Label(num_top_classes=1), |
|
title="Language Identification", |
|
description="Identify the language of audio!", |
|
allow_flagging="never", |
|
) |
|
|
|
with demo: |
|
gr.TabbedInterface( |
|
[mms_synthesize, mms_transcribe, mms_identify], |
|
["Text-to-speech", "Speech-to-text", "Language Identification"], |
|
) |
|
|
|
demo.launch() |
|
|