Spaces:

codenamewei
/

speech-to-text

Runtime error

Input with mic and file

d675859 almost 2 years ago

No virus

1.56 kB

	import gradio as gr
	from transformers import Wav2Vec2Processor
	from transformers import AutoModelForCTC
	from conversationalnlp.models.wav2vec2 import Wav2Vec2Predict
	from conversationalnlp.models.wav2vec2 import ModelLoader
	from conversationalnlp.utils import *
	import soundfile as sf
	import os

	"""
	run gradio with
	>>python app.py
	"""

	audiosavepath = os.getcwd()

	pretrained_model = "codenamewei/speech-to-text"

	processor = Wav2Vec2Processor.from_pretrained(
	pretrained_model)

	model = AutoModelForCTC.from_pretrained(
	pretrained_model)

	modelloader = ModelLoader(model, processor)

	predictor = Wav2Vec2Predict(modelloader)

	examples = ["example1.flac", "example2.flac", "example3.flac"]


	def greet(audioarray):
	"""
	audio array in the following format

	(16000, array([ -5277184, 326400, -120320, ..., -5970432, -12745216,
	-6934528], dtype=int32))
	<class 'tuple'>
	"""
	audioabspath = os.path.join(audiosavepath, "temp.wav")

	# WORKAROUND: Save to file and reread to get the array shape needed for prediction
	sf.write(audioabspath, audioarray[1], audioarray[0])

	print(f"Audio at path {audioabspath}")
	predictiontexts = predictor.predictfiles([audioabspath])
	outputtext = predictiontexts["predicted_text"][-1] + \
	"\n" + predictiontexts["corrected_text"][-1]

	return outputtext


	demo = gr.Interface(fn=greet, inputs="audio",
	outputs="text",
	title="Speech-to-Text",
	examples=examples)

	demo.launch() # share=True)