Spaces:

Dibiddo
/

cschat-data-handle

Runtime error

File size: 4,315 Bytes

1827af1
0191540
1827af1
 
f9dc89f
9ec90b2
1827af1
e37b77a
 
 
 
 
 
 
 
 
 
 
 
 
bb320d2
1827af1
 
 
 
 
4a67e11
bb320d2
4a67e11
 
1827af1
eba8259
 
 
 
 
 
 
 
 
 
 
 
0191540
f9dc89f
 
1827af1
f9dc89f
e86dfec
f9dc89f
bb320d2
f9dc89f
bb320d2
f9dc89f
 
 
 
9ec90b2
 
 
 
28aed62
 
9ec90b2
 
 
 
 
bcd57fb
bb320d2
28aed62
 
bb320d2
28aed62
 
 
 
 
 
 
 
bcd57fb
28aed62
bcd57fb
28aed62
 
 
bb320d2
28aed62
 
 
bcd57fb
28aed62
 
 
f9dc89f
28aed62
 
 
 
 
 
 
 
 
bb320d2
28aed62
 
 
1827af1
 
5a096f5
 
 
e86dfec
 
ee462c7
e86dfec
 
5a096f5
 
 
 
07f2f87
 
 
3eb4252

import torch
from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
from datasets import load_dataset
import spacy
import gradio as gr
from pydub import AudioSegment  # 引入 pydub 庫

# 確保導入 numpy，以解決缺少依賴項的问题。
try:
    import numpy as np
    
except ImportError:
    print("Numpy 未找到，正在嘗試自動安裝...")
    
    try:
        subprocess.run(["pip", "install", "numpy"])
        
    except Exception as e:
        print(f"自動安裝Numpy失敗：{e}")
        
# 設置設備和環境變數（如有需要）
device = "cuda:0" if torch.cuda.is_available() else "cpu"
torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32

# Whisper 模型初始化（語音轉文字）
whisper_model_id = "openai/whisper-large-v3"
whisper_model = AutoModelForSpeechSeq2Seq.from_pretrained(
    whisper_model_id, torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True)
whisper_model.to(device)
whisper_processor = AutoProcessor.from_pretrained(whisper_model_id)

try:
    
        whisper_pipe = pipeline(
            "automatic-speech-recognition",
            model=whisper_model,
            tokenizer=whisper_processor.tokenizer,
            feature_extractor=whisper_processor.feature_extractor,
            device=device)
        
except Exception as e:
    
        print(f"初始化Whisper管道時出現錯誤：{e}")

# spaCy 初始化（文本分類與標籤）
nlp=None 

try:
        
            nlp=spacy.load("en_core_web_sm")
            
except Exception as e:
            
            print(f"加載spaCy模型時出現錯誤：{e}")

def process_audio(audio_file):
    
        # 將 MP3 轉換為 WAV
    
        try:
            audio_segment = AudioSegment.from_mp3(audio_file)
            wav_path = "/tmp/" + audio_file.split("/")[-1].replace(".mp3", ".wav")  # 將檔案存放於 /tmp 目錄
            
            audio_segment.export(wav_path, format="wav")
            
        except Exception as e:
            print(f"MP3 轉 WAV 時出現錯誤：{e}")
        
        # 語音轉文字
            
        try:
                result= whisper_pipe(wav_path)["text"]
                
                # 使用 T5 作為替代模型
                
                messages=[{"role": "user", "content": result}]
                
                deepseek_response="" 
                
                try: 
                    from transformers import pipeline
                    
                    pipe=pipeline("text-generation",model="t5-base")
                    
                    deepseek_response=pipe(messages)[0]["generated_text"]
                            
                        # 使用 spaCy 分析文本
                        
                    doc=nlp(deepseek_response) if nlp is not None else None 
                    entities=[(ent.text, ent.label_) for ent in doc.ents] if doc is not None else []
                            
                    return {
                        "Transcription (Whister)": result,
                        "AI Response (T5)": deepseek_response,# 修改為 T5 回應以避免與原來不同步                            
                        "Extracted Entities (spaCy)": entities}
                            
                except Exception as e: 
                            return {
                                "Transcription (Whister)": result,# 保留原始轉錄內容                                
                                    }
                                
        
        except Exception as e:
            
            return {"Error": f"語音轉文字失敗：{e}"}

def clear_input():
    
        return "", ""

with gr.Blocks() as app:

   
   
   
   with gr.Row():
       audio_input=gr.Audio(type="filepath", label="上傳語音")  
       output_text=gr.JSON(label="結果")
       
       
       
       
       
  
  

   submit_button = gr.Button("提交")  
   
   submit_button.click(fn=lambda x: process_audio(x), inputs=[audio_input], outputs=[output_text])  

   clear_button = gr.Button("清除")  # 新增清除按鈕
   
   clear_button.click(fn=lambda x: "", inputs=[], outputs=[audio_input])  # 清除輸入欄位


if __name__ == "__main__":
     app.launch()