LuoYiSULIXAY's picture
Update app.py
a949720 verified
import gradio as gr
from transformers import pipeline, WhisperProcessor, WhisperForConditionalGeneration
# 所有可选模型
MODEL_OPTIONS = {
"Whisper Lao defualt":"LuoYiSULIXAY/whisper-lao-finetuned_laonlp",
"Whisper Lao update":"LuoYiSULIXAY/whisper-lao-finetuned_laonlp_2",
"Whisper Lao v1": "LuoYiSULIXAY/whisper-lao-finetuned_1",
"Whisper Lao v2": "LuoYiSULIXAY/whisper-lao-finetuned_2",
"OpenAI Whisper Medium": "openai/whisper-medium",
"small aug":"LuoYiSULIXAY/whisper_small_aug"
}
# 缓存加载过的模型,避免重复加载耗时
loaded_pipelines = {}
def load_asr_pipeline(model_name):
"""根据模型名动态加载 pipeline"""
if model_name in loaded_pipelines:
return loaded_pipelines[model_name]
processor = WhisperProcessor.from_pretrained("openai/whisper-medium", language="lo")
model = WhisperForConditionalGeneration.from_pretrained(model_name)
asr_pipeline = pipeline(
"automatic-speech-recognition",
model=model,
tokenizer=processor.tokenizer,
feature_extractor=processor.feature_extractor,
device=0 # 如果用 CPU 可以设置为 -1
)
loaded_pipelines[model_name] = asr_pipeline
return asr_pipeline
def transcribe(audio, model_choice):
asr = load_asr_pipeline(model_choice)
result = asr(audio, generate_kwargs={"language": "lao", "task": "transcribe"})
return result["text"]
demo = gr.Interface(
fn=transcribe,
inputs=[
gr.Audio(type="filepath", label="Upload Audio"),
gr.Dropdown(
choices=list(MODEL_OPTIONS.values()),
value="LuoYiSULIXAY/whisper-lao-finetuned_laonlp",
label="Select Whisper Model"
)
],
outputs=gr.Textbox(label="Transcription"),
title="Whisper Lao",
description="Realtime demo for Lao speech recognition using different Whisper fine-tuned models."
)
demo.launch(share=True)