PaddleSpeechASR / app.py
KPatrick's picture
Update app.py
ff2f952
import gradio as gr
import librosa
import numpy as np
import paddlehub as hub
from paddlenlp import Taskflow
from paddlespeech.cli import ASRExecutor
import soundfile as sf
# asr_model = hub.Module(name='u2_conformer_aishell')
asr_executor = ASRExecutor()
text_correct_model = Taskflow("text_correction")
punc_model = hub.Module(name='auto_punc')
def speech_recognize(file):
data, sr = librosa.load(file)
if sr != 16000:
data = librosa.resample(data, sr, 16000)
sf.write(file, data, samplerate=16000)
print(f'[Audio Input] shape: {data.shape}, dtype: {data.dtype}, file: {file}')
# text = asr_model.speech_recognize(file, device='cpu')
text = asr_executor(file)
text_correction = text_correct_model(text)[0]
cor_text, errors = text_correction['target'], text_correction['errors']
print(f'[Text Correction] errors: {errors}')
punc_text = punc_model.add_puncs(cor_text, device='cpu')[0]
ret = ''
ret += f'[ASR] {text}\n'
ret += f'[COR] {cor_text}\n'
ret += f'[PUN] {punc_text}'
return ret
iface = gr.Interface(
fn=speech_recognize,
inputs=gr.inputs.Audio(source="microphone", type='filepath'),
outputs="text",
)
iface.launch()