Spaces:

6x16
/

ASR-nan-tw

Sleeping

ASR-nan-tw / app.py

Update app.py

ea9bf8b verified 7 months ago

1.31 kB

	from transformers import pipeline
	import gradio as gr
	import torch

	device = 0 if torch.cuda.is_available() else "cpu"

	MODEL_NAME = "6x16/whisper-small-nan-tw-quicktrain"

	pipe = pipeline(model=MODEL_NAME,
	task="automatic-speech-recognition",
	chunk_length_s=30,
	device=device) # change to "your-username/the-name-you-picked"

	pipe.model.config.forced_decoder_ids = None
	# pipe.model.config.suppress_tokens = []
	pipe.model.generation_config.forced_decoder_ids = None
	# pipe.model.generation_config._from_model_config = True

	def transcribe(inputs, task):
	if inputs is None:
	raise gr.Error("No audio file found or wait until audio input is ready.")
	text = pipe(inputs, generate_kwargs={"task": task})
	return text["text"]

	iface = gr.Interface(
	fn=transcribe,
	inputs=[gr.Audio(sources="microphone", type="filepath"),
	gr.Radio(["transcribe", "translate"], label="Task")],
	outputs="text",
	theme="glass",
	title="Whisper Small nan-tw model by self-training (閩南話/台語)",
	description="Realtime demo for Minnan speech recognition using a self-tuned Whisper small model."
	f"\tCheckpoint: [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME})",
	allow_flagging="never",
	)

	iface.launch()