from funasr import AutoModel from funasr.utils.postprocess_utils import rich_transcription_postprocess from modelscope import snapshot_download import io import os import tempfile import json from typing import Optional import torch import gradio as gr # 添加Gradio库 from config import model_config device = "cuda:0" if torch.cuda.is_available() else "cpu" model_dir = snapshot_download(model_config['model_dir']) # 初始化模型 model = AutoModel( model=model_dir, trust_remote_code=False, remote_code="./model.py", vad_model="fsmn-vad", vad_kwargs={"max_single_segment_time": 30000}, ncpu=4, batch_size=1, hub="ms", device=device, ) def transcribe_audio(file, vad_model="fsmn-vad", vad_kwargs='{"max_single_segment_time": 30000}', ncpu=4, batch_size=1, language="auto", use_itn=True, batch_size_s=60, merge_vad=True, merge_length_s=15, batch_size_threshold_s=50, hotword=" ", spk_model="cam++", ban_emo_unk=False): try: # 将字符串转换为字典 vad_kwargs = json.loads(vad_kwargs) # 创建临时文件并保存上传的音频文件 with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_file: temp_file_path = temp_file.name temp_file.write(file.read()) try: # 生成结果 res = model.generate( input=temp_file_path, # 使用临时文件路径作为输入 cache={}, language=language, use_itn=use_itn, batch_size_s=batch_size_s, merge_vad=merge_vad, merge_length_s=merge_length_s, batch_size_threshold_s=batch_size_threshold_s, hotword=hotword, spk_model=spk_model, ban_emo_unk=ban_emo_unk ) # 处理结果 text = rich_transcription_postprocess(res[0]["text"]) return text finally: # 确保在处理完毕后删除临时文件 if os.path.exists(temp_file_path): os.remove(temp_file_path) except Exception as e: return str(e) # 创建Gradio界面 inputs = [ gr.Audio(type="file"), # 上传音频,移除了source参数 gr.Textbox(value="fsmn-vad", label="VAD Model"), gr.Tex