|
import gradio as gr |
|
|
|
whisper = gr.load("models/openai/whisper-small") |
|
|
|
def inference(audio): |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
return whisper(audio).replace("AutomaticSpeechRecognitionOutput(text='", "").replace("', chunks=None)", "") |
|
|
|
title = "Whisper Speech Recognition" |
|
|
|
description = """ |
|
本例用于演示 <b>openai/whisper-base</b> 模型的语音识别(ASR)能力。基于原始模型开发,没有对模型做微调。 本例默认输出为中文,Whisper识别出的是繁体中文。 |
|
|
|
Whisper包含多个不同大小的版本,理论来讲模型越大识别效果越好,模型越小速度越快 |
|
|
|
<b>使用方法:</b> 上传一个音频文件或直接在页面中录制音频。音频会在传递到模型之前转换为单声道并重新采样为16 kHz。 |
|
""" |
|
|
|
article = """ |
|
## 参考 |
|
- [Innev GitHub](https://github.com/innev) |
|
""" |
|
|
|
examples = [ |
|
[None, "examples/zhiqi.wav", None], |
|
[None, "examples/zhichu.wav", None], |
|
[None, "examples/hmm_i_dont_know.wav", None], |
|
[None, "examples/henry5.mp3", None], |
|
[None, "examples/yearn_for_time.mp3", None], |
|
[None, "examples/see_in_eyes.wav", None], |
|
] |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
gr.Interface( |
|
fn=inference, |
|
inputs=[ |
|
gr.Audio(label="录制语音", type="filepath") |
|
], |
|
outputs=[ |
|
gr.Textbox(label="识别出的文字") |
|
], |
|
title=title, |
|
description=description, |
|
article=article, |
|
examples=examples, |
|
submit_btn="提交", |
|
clear_btn="清除", |
|
).launch() |