ASR / app.py
Suevar's picture
Update app.py
7411549 verified
from math import log2, pow
import os
import numpy as np
from scipy.fftpack import fft
import gradio as gr
from funasr import AutoModel
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
from modelscope import snapshot_download
A4 = 440
C0 = A4 * pow(2, -4.75)
name = ["C", "C#", "D", "D#", "E", "F", "F#", "G", "G#", "A", "A#", "B"]
def get_pitch(freq):
h = round(12 * log2(freq / C0))
n = h % 12
return name[n]
inference_pipeline = pipeline(
task=Tasks.auto_speech_recognition,
model='./models_from_modelscope/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch',
vad_model="./models_from_modelscope/speech_fsmn_vad_zh-cn-16k-common-pytorch",
punc_model="./models_from_modelscope/punc_ct-transformer_cn-en-common-vocab471067-large",
)
def ASR(audio):
param_dict = {}
param_dict['use_timestamp'] = False
rec_result = inference_pipeline(input=audio, params=param_dict)
return rec_result[0]['text']
demo = gr.Interface(
ASR,
gr.Audio(type="filepath"),
outputs = 'text',
allow_flagging= 'auto',
examples=[
[os.path.join(os.path.dirname(__file__),"test1.mp3")],
],
)
if __name__ == "__main__":
demo.launch(share=True)