Spaces:
Runtime error
Runtime error
File size: 1,651 Bytes
e8f9c74 c64ca36 e8f9c74 6247742 b31525e e8f9c74 d6a2d06 b7af941 cc8179b d6a2d06 99e31bc a97d8ed 8a6ae20 372ceb3 00d0134 a97d8ed 3f53296 ceacc53 d6a2d06 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 |
import time
import torch
import string
from espnet_model_zoo.downloader import ModelDownloader
from espnet2.bin.asr_inference import Speech2Text
import soundfile
import librosa
import matplotlib.pyplot as plt
import gradio as gr
import os
os.system('pip install gradio')
import gradio as gr
lang = 'multilingual'
fs = 16000
tag = 'ftshijt/open_li52_asr_train_asr_raw_bpe7000_valid.acc.ave_10best'
d = ModelDownloader()
speech2text = Speech2Text(
**d.download_and_unpack(tag),
device="cpu",
minlenratio=0.0,
maxlenratio=0.0,
ctc_weight=0.3,
beam_size=10,
batch_size=0,
nbest=1
)
def text_normalizer(text):
text = text.upper()
return text.translate(str.maketrans('', '', string.punctuation))
def inference(audio):
speech, rate = librosa.load(audio.name, sr=16000)
assert rate == fs, "mismatch in sampling rate"
nbests = speech2text(speech)
text, *_ = nbests[0]
return f"ASR hypothesis: {text_normalizer(text)}"
inputs = gr.inputs.Audio(label="Input Audio", type="file")
outputs = gr.outputs.Textbox(label="Output Text")
title = "ESPnet2-ASR"
description = "Gradio demo for Real-time ASR with ESPnet2. To use it, simply upload your audio, or click one of the examples to load them. Read more at the links below."
article = "<p style='text-align: center'><a href='https://espnet.github.io/espnet/'>ESPnet: end-to-end speech processing toolkit</a> | <a href='https://github.com/espnet/espnet'>Github Repo</a></p>"
examples = [
["poem.wav"]
]
gr.Interface(inference, inputs, outputs, title=title, description=description, article=article,examples=examples, enable_queue=True).launch()
|