ASR-nan-tw / app.py
6x16's picture
Update app.py
ea9bf8b verified
from transformers import pipeline
import gradio as gr
import torch
device = 0 if torch.cuda.is_available() else "cpu"
MODEL_NAME = "6x16/whisper-small-nan-tw-quicktrain"
pipe = pipeline(model=MODEL_NAME,
task="automatic-speech-recognition",
chunk_length_s=30,
device=device) # change to "your-username/the-name-you-picked"
pipe.model.config.forced_decoder_ids = None
# pipe.model.config.suppress_tokens = []
pipe.model.generation_config.forced_decoder_ids = None
# pipe.model.generation_config._from_model_config = True
def transcribe(inputs, task):
if inputs is None:
raise gr.Error("No audio file found or wait until audio input is ready.")
text = pipe(inputs, generate_kwargs={"task": task})
return text["text"]
iface = gr.Interface(
fn=transcribe,
inputs=[gr.Audio(sources="microphone", type="filepath"),
gr.Radio(["transcribe", "translate"], label="Task")],
outputs="text",
theme="glass",
title="Whisper Small nan-tw model by self-training (閩南話/台語)",
description="Realtime demo for Minnan speech recognition using a self-tuned Whisper small model."
f"\tCheckpoint: [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME})",
allow_flagging="never",
)
iface.launch()