Spaces:
Runtime error
Runtime error
File size: 4,315 Bytes
1827af1 0191540 1827af1 f9dc89f 9ec90b2 1827af1 e37b77a bb320d2 1827af1 4a67e11 bb320d2 4a67e11 1827af1 eba8259 0191540 f9dc89f 1827af1 f9dc89f e86dfec f9dc89f bb320d2 f9dc89f bb320d2 f9dc89f 9ec90b2 28aed62 9ec90b2 bcd57fb bb320d2 28aed62 bb320d2 28aed62 bcd57fb 28aed62 bcd57fb 28aed62 bb320d2 28aed62 bcd57fb 28aed62 f9dc89f 28aed62 bb320d2 28aed62 1827af1 5a096f5 e86dfec ee462c7 e86dfec 5a096f5 07f2f87 3eb4252 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 |
import torch
from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
from datasets import load_dataset
import spacy
import gradio as gr
from pydub import AudioSegment # 引入 pydub 庫
# 確保導入 numpy,以解決缺少依賴項的问题。
try:
import numpy as np
except ImportError:
print("Numpy 未找到,正在嘗試自動安裝...")
try:
subprocess.run(["pip", "install", "numpy"])
except Exception as e:
print(f"自動安裝Numpy失敗:{e}")
# 設置設備和環境變數(如有需要)
device = "cuda:0" if torch.cuda.is_available() else "cpu"
torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
# Whisper 模型初始化(語音轉文字)
whisper_model_id = "openai/whisper-large-v3"
whisper_model = AutoModelForSpeechSeq2Seq.from_pretrained(
whisper_model_id, torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True)
whisper_model.to(device)
whisper_processor = AutoProcessor.from_pretrained(whisper_model_id)
try:
whisper_pipe = pipeline(
"automatic-speech-recognition",
model=whisper_model,
tokenizer=whisper_processor.tokenizer,
feature_extractor=whisper_processor.feature_extractor,
device=device)
except Exception as e:
print(f"初始化Whisper管道時出現錯誤:{e}")
# spaCy 初始化(文本分類與標籤)
nlp=None
try:
nlp=spacy.load("en_core_web_sm")
except Exception as e:
print(f"加載spaCy模型時出現錯誤:{e}")
def process_audio(audio_file):
# 將 MP3 轉換為 WAV
try:
audio_segment = AudioSegment.from_mp3(audio_file)
wav_path = "/tmp/" + audio_file.split("/")[-1].replace(".mp3", ".wav") # 將檔案存放於 /tmp 目錄
audio_segment.export(wav_path, format="wav")
except Exception as e:
print(f"MP3 轉 WAV 時出現錯誤:{e}")
# 語音轉文字
try:
result= whisper_pipe(wav_path)["text"]
# 使用 T5 作為替代模型
messages=[{"role": "user", "content": result}]
deepseek_response=""
try:
from transformers import pipeline
pipe=pipeline("text-generation",model="t5-base")
deepseek_response=pipe(messages)[0]["generated_text"]
# 使用 spaCy 分析文本
doc=nlp(deepseek_response) if nlp is not None else None
entities=[(ent.text, ent.label_) for ent in doc.ents] if doc is not None else []
return {
"Transcription (Whister)": result,
"AI Response (T5)": deepseek_response,# 修改為 T5 回應以避免與原來不同步
"Extracted Entities (spaCy)": entities}
except Exception as e:
return {
"Transcription (Whister)": result,# 保留原始轉錄內容
}
except Exception as e:
return {"Error": f"語音轉文字失敗:{e}"}
def clear_input():
return "", ""
with gr.Blocks() as app:
with gr.Row():
audio_input=gr.Audio(type="filepath", label="上傳語音")
output_text=gr.JSON(label="結果")
submit_button = gr.Button("提交")
submit_button.click(fn=lambda x: process_audio(x), inputs=[audio_input], outputs=[output_text])
clear_button = gr.Button("清除") # 新增清除按鈕
clear_button.click(fn=lambda x: "", inputs=[], outputs=[audio_input]) # 清除輸入欄位
if __name__ == "__main__":
app.launch()
|