File size: 1,778 Bytes
382e37a ff3a5da 382e37a ff3a5da 382e37a ff3a5da 382e37a ff3a5da 382e37a ff3a5da 382e37a ff3a5da 382e37a ff3a5da 382e37a ff3a5da 94780f8 ff3a5da 94780f8 ff3a5da 382e37a ff3a5da 382e37a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 |
import gradio as gr
import librosa
from asr import transcribe
from tts import synthesize
def identify(microphone, file_upload):
LID_SAMPLING_RATE = 16_000
if (microphone is not None) and (file_upload is not None):
return "WARNING: Using microphone input. Uploaded file will be ignored."
if (microphone is None) and (file_upload is None):
return "ERROR: Provide an audio file or use the microphone."
audio_fp = microphone if microphone is not None else file_upload
inputs = librosa.load(audio_fp, sr=LID_SAMPLING_RATE, mono=True)[0]
return {"Faroese": 1.0}
demo = gr.Blocks()
mms_transcribe = gr.Interface(
fn=transcribe,
inputs=[
gr.Audio(source="microphone", type="filepath"),
gr.Audio(source="upload", type="filepath"),
],
outputs="text",
title="Speech-to-text",
description="Transcribe audio!",
allow_flagging="never",
)
mms_synthesize = gr.Interface(
fn=synthesize,
inputs=[
gr.Text(label="Input text"),
gr.Slider(minimum=0.1, maximum=4.0, value=1.0, step=0.1, label="Speed"),
],
outputs=gr.Audio(label="Generated Audio", type="numpy"),
title="Text-to-speech",
description="Generate audio!",
allow_flagging="never",
)
mms_identify = gr.Interface(
fn=identify,
inputs=[
gr.Audio(source="microphone", type="filepath"),
gr.Audio(source="upload", type="filepath"),
],
outputs=gr.Label(num_top_classes=1),
title="Language Identification",
description="Identify the language of audio!",
allow_flagging="never",
)
with demo:
gr.TabbedInterface(
[mms_synthesize, mms_transcribe, mms_identify],
["Text-to-speech", "Speech-to-text", "Language Identification"],
)
demo.launch()
|