Spaces:
Runtime error
Runtime error
import torch | |
from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline | |
from datasets import load_dataset | |
import spacy | |
import gradio as gr | |
from pydub import AudioSegment # 引入 pydub 庫 | |
# 確保導入 numpy,以解決缺少依賴項的问题。 | |
try: | |
import numpy as np | |
except ImportError: | |
print("Numpy 未找到,正在嘗試自動安裝...") | |
try: | |
subprocess.run(["pip", "install", "numpy"]) | |
except Exception as e: | |
print(f"自動安裝Numpy失敗:{e}") | |
# 設置設備和環境變數(如有需要) | |
device = "cuda:0" if torch.cuda.is_available() else "cpu" | |
torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32 | |
# Whisper 模型初始化(語音轉文字) | |
whisper_model_id = "openai/whisper-large-v3" | |
whisper_model = AutoModelForSpeechSeq2Seq.from_pretrained( | |
whisper_model_id, torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True) | |
whisper_model.to(device) | |
whisper_processor = AutoProcessor.from_pretrained(whisper_model_id) | |
try: | |
whisper_pipe = pipeline( | |
"automatic-speech-recognition", | |
model=whisper_model, | |
tokenizer=whisper_processor.tokenizer, | |
feature_extractor=whisper_processor.feature_extractor, | |
device=device) | |
except Exception as e: | |
print(f"初始化Whisper管道時出現錯誤:{e}") | |
# spaCy 初始化(文本分類與標籤) | |
nlp=None | |
try: | |
nlp=spacy.load("en_core_web_sm") | |
except Exception as e: | |
print(f"加載spaCy模型時出現錯誤:{e}") | |
def process_audio(audio_file): | |
# 將 MP3 轉換為 WAV | |
try: | |
audio_segment = AudioSegment.from_mp3(audio_file) | |
wav_path = "/tmp/" + audio_file.split("/")[-1].replace(".mp3", ".wav") # 將檔案存放於 /tmp 目錄 | |
audio_segment.export(wav_path, format="wav") | |
except Exception as e: | |
print(f"MP3 轉 WAV 時出現錯誤:{e}") | |
# 語音轉文字 | |
try: | |
result= whisper_pipe(wav_path)["text"] | |
# 使用 T5 作為替代模型 | |
messages=[{"role": "user", "content": result}] | |
deepseek_response="" | |
try: | |
from transformers import pipeline | |
pipe=pipeline("text-generation",model="t5-base") | |
deepseek_response=pipe(messages)[0]["generated_text"] | |
# 使用 spaCy 分析文本 | |
doc=nlp(deepseek_response) if nlp is not None else None | |
entities=[(ent.text, ent.label_) for ent in doc.ents] if doc is not None else [] | |
return { | |
"Transcription (Whister)": result, | |
"AI Response (T5)": deepseek_response,# 修改為 T5 回應以避免與原來不同步 | |
"Extracted Entities (spaCy)": entities} | |
except Exception as e: | |
return { | |
"Transcription (Whister)": result,# 保留原始轉錄內容 | |
} | |
except Exception as e: | |
return {"Error": f"語音轉文字失敗:{e}"} | |
def clear_input(): | |
return "", "" | |
with gr.Blocks() as app: | |
with gr.Row(): | |
audio_input=gr.Audio(type="filepath", label="上傳語音") | |
output_text=gr.JSON(label="結果") | |
submit_button = gr.Button("提交") | |
submit_button.click(fn=lambda x: process_audio(x), inputs=[audio_input], outputs=[output_text]) | |
clear_button = gr.Button("清除") # 新增清除按鈕 | |
clear_button.click(fn=lambda x: "", inputs=[], outputs=[audio_input]) # 清除輸入欄位 | |
if __name__ == "__main__": | |
app.launch() | |