whisper-ja-zh / app.py
fj11's picture
update
5ffab6b
import gradio as gr
from transformers import pipeline
import os
# 模型下拉选择项
MODEL_OPTIONS = {
"微调后的 Tiny 模型": "Itbanque/whisper-ja-zh-tiny",
"微调后的 Base 模型": "Itbanque/whisper-ja-zh-base"
}
MAX_FILE_SIZE_MB = 10
loaded_pipelines = {}
def get_pipeline(model_name):
if model_name not in loaded_pipelines:
print(f"正在加载模型:{model_name}")
pipe = pipeline(
task="automatic-speech-recognition",
model=MODEL_OPTIONS[model_name],
return_timestamps=True,
generate_kwargs={
"task": "translate",
"language": "japanese",
"repetition_penalty": 1.2
}
)
loaded_pipelines[model_name] = pipe
return loaded_pipelines[model_name]
def transcribe(audio_path, model_name):
if audio_path is None:
return "请上传音频文件"
file_size_mb = os.path.getsize(audio_path) / (1024 * 1024)
if file_size_mb > MAX_FILE_SIZE_MB:
return f"❌ 文件太大:{file_size_mb:.2f} MB,最大只支持 {MAX_FILE_SIZE_MB} MB"
try:
pipe = get_pipeline(model_name)
result = pipe(audio_path)
return result["text"]
except Exception as e:
return f"❌ 出错了:{str(e)}"
# Gradio UI
demo = gr.Interface(
fn=transcribe,
inputs=[
gr.Audio(type="filepath", label="上传日语音频文件(≤10MB)"),
gr.Dropdown(
choices=list(MODEL_OPTIONS.keys()),
value="微调后的 Tiny 模型",
label="选择微调模型"
)
],
outputs=gr.Textbox(label="翻译后的中文文本"),
title="🎙️ Itbanque Whisper 微调模型:日语 → 中文",
description="上传日语音频(最大 10MB),选择使用 Tiny 或 Base 微调模型,输出中文翻译。",
allow_flagging="never"
)
if __name__ == "__main__":
demo.launch()