Spaces:

akhaliq
/

espnet2_asr

Runtime error

espnet2_asr / app.py

Ahsen Khaliq

Update app.py

c85ccee almost 4 years ago

1.98 kB

	import os
	os.system('pip install gradio --upgrade')
	os.system('pip freeze')
	import time
	import torch
	import string
	from espnet_model_zoo.downloader import ModelDownloader
	from espnet2.bin.asr_inference import Speech2Text


	import soundfile
	import librosa
	import matplotlib.pyplot as plt


	import gradio as gr



	def text_normalizer(text):
	text = text.upper()
	return text.translate(str.maketrans('', '', string.punctuation))

	def inference(audio, model):
	lang = 'multilingual'
	fs = 16000
	tag = model

	d = ModelDownloader()
	speech2text = Speech2Text(
	**d.download_and_unpack(tag),
	device="cpu",
	minlenratio=0.0,
	maxlenratio=0.0,
	ctc_weight=0.3,
	beam_size=10,
	batch_size=0,
	nbest=1
	)
	speech, rate = librosa.load(audio.name, sr=16000)
	assert rate == fs, "mismatch in sampling rate"
	nbests = speech2text(speech)
	text, *_ = nbests[0]
	return f"ASR hypothesis: {text_normalizer(text)}"

	inputs = [gr.inputs.Audio(label="Input Audio", type="file"),gr.inputs.Dropdown(choices=["ftshijt/open_li52_asr_train_asr_raw_bpe7000_valid.acc.ave_10best","Shinji Watanabe/spgispeech_asr_train_asr_conformer6_n_fft512_hop_length256_raw_en_unnorm_bpe5000_valid.acc.ave"], type="value", default="ftshijt/open_li52_asr_train_asr_raw_bpe7000_valid.acc.ave_10best", label="model")]
	outputs = gr.outputs.Textbox(label="Output Text")

	title = "ESPnet2-ASR"
	description = "Gradio demo for Real-time ASR with ESPnet2. To use it, simply upload your audio, or click one of the examples to load them. Read more at the links below."
	article = "<p style='text-align: center'><a href='https://espnet.github.io/espnet/'>ESPnet: end-to-end speech processing toolkit</a> \| <a href='https://github.com/espnet/espnet'>Github Repo</a></p>"

	examples = [
	["poem.wav"]
	]
	gr.Interface(inference, inputs, outputs, title=title, description=description, article=article,examples=examples, enable_queue=True).launch()