Spaces:

akhaliq
/

espnet2_asr

Runtime error

File size: 1,976 Bytes

0c2f228
ef4b9d8
0c2f228
e8f9c74
 
 
 
 
 
 
 
c64ca36
e8f9c74
94e0305
6247742
 
 
098d68e
e8f9c74
 
 
 
d6a2d06
098d68e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b7af941
cc8179b
d6a2d06
 
99e31bc
a97d8ed
c85ccee
a97d8ed
 
8a6ae20
372ceb3
00d0134
a97d8ed
3f53296
 
 
ceacc53
d6a2d06

import os
os.system('pip install gradio --upgrade')
os.system('pip freeze')
import time
import torch
import string
from espnet_model_zoo.downloader import ModelDownloader
from espnet2.bin.asr_inference import Speech2Text


import soundfile
import librosa
import matplotlib.pyplot as plt


import gradio as gr



def text_normalizer(text):
    text = text.upper()
    return text.translate(str.maketrans('', '', string.punctuation))

def inference(audio, model):
  lang = 'multilingual'
  fs = 16000 
  tag = model
    
  d = ModelDownloader()
  speech2text = Speech2Text(
        **d.download_and_unpack(tag),
        device="cpu",
        minlenratio=0.0,
        maxlenratio=0.0,
        ctc_weight=0.3,
        beam_size=10,
        batch_size=0,
        nbest=1
  )
  speech, rate = librosa.load(audio.name, sr=16000)
  assert rate == fs, "mismatch in sampling rate"
  nbests = speech2text(speech)
  text, *_ = nbests[0]
  return f"ASR hypothesis: {text_normalizer(text)}"
  
inputs = [gr.inputs.Audio(label="Input Audio", type="file"),gr.inputs.Dropdown(choices=["ftshijt/open_li52_asr_train_asr_raw_bpe7000_valid.acc.ave_10best","Shinji Watanabe/spgispeech_asr_train_asr_conformer6_n_fft512_hop_length256_raw_en_unnorm_bpe5000_valid.acc.ave"], type="value", default="ftshijt/open_li52_asr_train_asr_raw_bpe7000_valid.acc.ave_10best", label="model")]
outputs =  gr.outputs.Textbox(label="Output Text")

title = "ESPnet2-ASR"
description = "Gradio demo for Real-time ASR with ESPnet2. To use it, simply upload your audio, or click one of the examples to load them. Read more at the links below."
article = "<p style='text-align: center'><a href='https://espnet.github.io/espnet/'>ESPnet: end-to-end speech processing toolkit</a> | <a href='https://github.com/espnet/espnet'>Github Repo</a></p>"

examples = [
    ["poem.wav"]
]
gr.Interface(inference, inputs, outputs, title=title, description=description, article=article,examples=examples, enable_queue=True).launch()