Spaces:
Runtime error
Runtime error
File size: 2,021 Bytes
3119dd6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 |
import gradio as gr
import librosa
import soundfile as sf
import torch
import warnings
import os
from transformers import Wav2Vec2ProcessorWithLM, Wav2Vec2CTCTokenizer, Wav2Vec2Model
warnings.filterwarnings("ignore")
from speechbrain.pretrained import EncoderDecoderASR
asr_model = EncoderDecoderASR.from_hparams(source="speechbrain/asr-wav2vec2-commonvoice-rw", savedir="pretrained_models/asr-wav2vec2-commonvoice-rw")
#asr_model.transcribe_file("speechbrain/asr-wav2vec2-commonvoice-rw/example.mp3")
# define speech-to-text function
def asr_transcript(audio, audio_microphone, model_params):
audio = audio_microphone if audio_microphone else audio
if audio == None and audio_microphone == None:
return "Please provide audio by uploading a file or by recording audio using microphone by pressing Record (And allow usage of microphone)", "Please provide audio by uploading a file or by recording audio using microphone by pressing Record (And allow usage of microphone)"
text = ""
if audio:
text = asr_model.transcribe_file(audio.name)
return text
else:
return "File not valid"
gradio_ui = gr.Interface(
fn=asr_transcript,
title="Kinyarwanda Speech Recognition",
description="Upload an audio clip or record from browser using microphone, and let AI do the hard work of transcribing.",
article = """
This demo showcases the pretrained model from deepspeech.
""",
inputs=[gr.inputs.Audio(label="Upload Audio File", type="file", optional=True), gr.inputs.Audio(source="microphone", type="file", optional=True, label="Record from microphone"), gr.inputs.Dropdown(choices=["deepspeech","coqui (soon)"], type="value", default="deepspeech", label="Select speech recognition model ", optional=False)],
outputs=[gr.outputs.Textbox(label="Recognized speech")],
examples = [["sample_1.wav","sample_1.wav","deepspeech"],["sample_2.wav","sample_2.wav","deepspeech"]]
)
gradio_ui.launch(enable_queue=True) |