PaddleSpeechASR / app.py
KPatrick's picture
Update app.py
ff2f952
raw
history blame
1.21 kB
import gradio as gr
import librosa
import numpy as np
import paddlehub as hub
from paddlenlp import Taskflow
from paddlespeech.cli import ASRExecutor
import soundfile as sf
# asr_model = hub.Module(name='u2_conformer_aishell')
asr_executor = ASRExecutor()
text_correct_model = Taskflow("text_correction")
punc_model = hub.Module(name='auto_punc')
def speech_recognize(file):
data, sr = librosa.load(file)
if sr != 16000:
data = librosa.resample(data, sr, 16000)
sf.write(file, data, samplerate=16000)
print(f'[Audio Input] shape: {data.shape}, dtype: {data.dtype}, file: {file}')
# text = asr_model.speech_recognize(file, device='cpu')
text = asr_executor(file)
text_correction = text_correct_model(text)[0]
cor_text, errors = text_correction['target'], text_correction['errors']
print(f'[Text Correction] errors: {errors}')
punc_text = punc_model.add_puncs(cor_text, device='cpu')[0]
ret = ''
ret += f'[ASR] {text}\n'
ret += f'[COR] {cor_text}\n'
ret += f'[PUN] {punc_text}'
return ret
iface = gr.Interface(
fn=speech_recognize,
inputs=gr.inputs.Audio(source="microphone", type='filepath'),
outputs="text",
)
iface.launch()