Spaces:
Runtime error
Runtime error
import time | |
import torch | |
import string | |
from espnet_model_zoo.downloader import ModelDownloader | |
from espnet2.bin.asr_inference import Speech2Text | |
import soundfile | |
import librosa.display | |
import matplotlib.pyplot as plt | |
import gradio as gr | |
lang = 'multilingual' | |
fs = 16000 | |
tag = 'ftshijt/open_li52_asr_train_asr_raw_bpe7000_valid.acc.ave_10best' | |
d = ModelDownloader() | |
speech2text = Speech2Text( | |
**d.download_and_unpack(tag), | |
device="cpu", | |
minlenratio=0.0, | |
maxlenratio=0.0, | |
ctc_weight=0.3, | |
beam_size=10, | |
batch_size=0, | |
nbest=1 | |
) | |
def text_normalizer(text): | |
text = text.upper() | |
return text.translate(str.maketrans('', '', string.punctuation)) | |
def inference(audio): | |
speech, rate = soundfile.read(audio.name) | |
assert rate == fs, "mismatch in sampling rate" | |
nbests = speech2text(speech) | |
text, *_ = nbests[0] | |
return f"ASR hypothesis: {text_normalizer(text)}" | |
inputs = gr.inputs.Audio(label="Input Audio", type="file") | |
outputs = gr.outputs.Textbox(label="Output Text") | |
title = "wav2vec 2.0" | |
description = "Gradio demo for Real-time ASR with ESPnet2. To use it, simply upload your audio, or click one of the examples to load them. Read more at the links below." | |
article = "<p style='text-align: center'><a href='https://espnet.github.io/espnet/'>ESPnet: end-to-end speech processing toolkit</a> | <a href='https://github.com/espnet/espnet'>Github Repo</a></p>" | |
gr.Interface(inference, inputs, outputs, title=title, description=description, article=article).launch() | |