TedCasSpeechRecognitionMulti

Runtime error

App Files Files Community

TedCasSpeechRecognitionMulti / app.py

JPLTedCas

Update app.py

736ba09 about 2 years ago

raw

history blame contribute delete

3.37 kB

	from transformers import pipeline
	import gradio as gr
	import time
	import whisper

	#p = pipeline("automatic-speech-recognition",model="jonatasgrosman/wav2vec2-large-xlsr-53-spanish")

	from transformers import WhisperProcessor, WhisperForConditionalGeneration
	from datasets import load_dataset
	#processor = WhisperProcessor.from_pretrained("openai/whisper-large-v2")
	#model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-large-v2")
	#model.config.forced_decoder_ids = None
	model = whisper.load_model("large")

	# load model and processor
	#processor = WhisperProcessor.from_pretrained("openai/whisper-large-v2")
	#model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-large-v2")
	#model.config.forced_decoder_ids = None

	# load dummy dataset and read audio files
	#ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
	#sample = ds[0]["audio"]
	#input_features = processor(sample["array"], sampling_rate=sample["sampling_rate"], return_tensors="pt").input_features

	# generate token ids
	#predicted_ids = model.generate(input_features)
	# decode token ids to text
	#transcription = processor.batch_decode(predicted_ids, skip_special_tokens=False)
	#['<\|startoftranscript\|><\|en\|><\|transcribe\|><\|notimestamps\|> Mr. Quilter is the apostle of the middle classes and we are glad to welcome his gospel.<\|endoftext\|>']

	#transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)
	#[' Mr. Quilter is the apostle of the middle classes and we are glad to welcome his gospel.']
	#def speech_to_text(tmp_filename, model_size):
	# model = whisper.load_model(model_size)
	# result = model.transcribe(tmp_filename)
	#
	# return result["text"]


	#gr.Interface(
	# fn=speech_to_text,
	# inputs=[
	# gr.Audio(source="microphone", type="filepath"),
	# gr.Dropdown(choices=["tiny", "base", "small", "medium", "large"]),
	# ],
	# outputs="text").launch()

	def transcribe(language,audio, state=""):#language="Spanish",
	time.sleep(1)
	if language=="Multi":
	state=""
	result = model.transcribe(audio)
	text = result["text"]#processor.batch_decode(predicted_ids, skip_special_tokens=False)

	# if language=="Catalan":
	# state=""
	# text = pc(audio)["text"]
	# if language=="English":
	# state=""
	# text = pe(audio)["text"]
	# if language=="French":
	# state=""
	# text = pf(audio)["text"]
	# if language=="Japanese":
	# state=""
	# text = pj(audio)["text"]
	state += text + " "
	#text2="Esto es loq ue te he entendido"
	return state, state

	demo=gr.Interface(
	fn=transcribe,

	title="TEDCAS Offline Speech recognition",
	description="1)Select language 2)Click on 'record from microphone' and talk 3)Click on 'stop recording' 4)Click on submit 5)Before starting again, click on 'clear'",

	inputs=[
	#gr.Dropdown(["Spanish","Catalan","English", "French", "Japanese"],value="Spanish"),
	gr.Dropdown(["Multi","Spanish"],value="Multi"),

	#gr.Audio(source="microphone", type="filepath", streaming=True),
	gr.inputs.Audio(source="microphone", type="filepath"),
	"state"#,"language"
	],
	outputs=[
	"textbox",
	"state"
	],
	#live=True).launch()
	)
	demo.launch()
	#demo.launch(auth=("TedCas", "Kike1234"))