Spaces:

aigrego
/

whisper

Sleeping

App Files Files Community

whisper / app.py

innev

Replace output result

6365962 10 months ago

raw

history blame contribute delete

2.1 kB

	import gradio as gr

	whisper = gr.load("models/openai/whisper-small")

	def inference(audio):
	# audio = whisper.load_audio(audio)
	# audio = whisper.pad_or_trim(audio)

	# mel = whisper.log_mel_spectrogram(audio).to(model.device)

	# _, probs = model.detect_language(mel)

	# options = whisper.DecodingOptions(fp16 = False)
	# result = whisper.decode(model, mel, options)

	# print(result.text)
	# return result.text
	return whisper(audio).replace("AutomaticSpeechRecognitionOutput(text='", "").replace("', chunks=None)", "")

	title = "Whisper Speech Recognition"

	description = """
	本例用于演示 <b>openai/whisper-base</b> 模型的语音识别（ASR）能力。基于原始模型开发，没有对模型做微调。本例默认输出为中文，Whisper识别出的是繁体中文。

	Whisper包含多个不同大小的版本，理论来讲模型越大识别效果越好，模型越小速度越快

	<b>使用方法:</b> 上传一个音频文件或直接在页面中录制音频。音频会在传递到模型之前转换为单声道并重新采样为16 kHz。
	"""

	article = """
	## 参考
	- [Innev GitHub](https://github.com/innev)
	"""

	examples = [
	[None, "examples/zhiqi.wav", None],
	[None, "examples/zhichu.wav", None],
	[None, "examples/hmm_i_dont_know.wav", None],
	[None, "examples/henry5.mp3", None],
	[None, "examples/yearn_for_time.mp3", None],
	[None, "examples/see_in_eyes.wav", None],
	]

	# gr.Interface(
	# fn=inference,
	# api_name="predict",
	# inputs=[
	# gr.Audio(label="录制语音", type="filepath")
	# ],
	# outputs=[
	# gr.Text(label="识别出的文字")
	# ],
	# title="Whisper Speech Recognition",
	# article=article
	# ).launch()

	gr.Interface(
	fn=inference,
	inputs=[
	gr.Audio(label="录制语音", type="filepath")
	],
	outputs=[
	gr.Textbox(label="识别出的文字")
	],
	title=title,
	description=description,
	article=article,
	examples=examples,
	submit_btn="提交",
	clear_btn="清除",
	).launch()