PaddleSpeechASR / app.py
KPatrick's picture
Update app.py
ff2f952
raw history blame
No virus
1.21 kB
import gradio as gr
import librosa
import numpy as np
import paddlehub as hub
from paddlenlp import Taskflow
from paddlespeech.cli import ASRExecutor
import soundfile as sf
# asr_model = hub.Module(name='u2_conformer_aishell')
asr_executor = ASRExecutor()
text_correct_model = Taskflow("text_correction")
punc_model = hub.Module(name='auto_punc')
def speech_recognize(file):
data, sr = librosa.load(file)
if sr != 16000:
data = librosa.resample(data, sr, 16000)
sf.write(file, data, samplerate=16000)
print(f'[Audio Input] shape: {data.shape}, dtype: {data.dtype}, file: {file}')
# text = asr_model.speech_recognize(file, device='cpu')
text = asr_executor(file)
text_correction = text_correct_model(text)[0]
cor_text, errors = text_correction['target'], text_correction['errors']
print(f'[Text Correction] errors: {errors}')
punc_text = punc_model.add_puncs(cor_text, device='cpu')[0]
ret = ''
ret += f'[ASR] {text}\n'
ret += f'[COR] {cor_text}\n'
ret += f'[PUN] {punc_text}'
return ret
iface = gr.Interface(
fn=speech_recognize,
inputs=gr.inputs.Audio(source="microphone", type='filepath'),
outputs="text",
)
iface.launch()