Spaces:
Running
Running
File size: 1,581 Bytes
a4e56dc 10c215a a4e56dc 824f441 a4e56dc 824f441 a4e56dc 824f441 a4e56dc 1e25bc2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 |
import torch
import gradio as gr
import whisper
import os
# 加載 Whisper 模型
model = whisper.load_model("large-v2", device="cuda" if torch.cuda.is_available() else "cpu")
def transcribe(audio_file):
# 從 Gradio 文件輸入獲取文件路徑
audio_path = audio_file
# 使用 Whisper 進行語音識別
result = model.transcribe(audio_path)
text = result["text"]
# 提取上載的音頻文件的基本名字,用作保存轉錄文本的文件名
base_name = os.path.splitext(os.path.basename(audio_path))[0]
# 定義保存轉錄結果的文件路徑
transcript_file_path = f"txt/{base_name}_transcript.txt"
# 確保 txt 目錄存在
os.makedirs("txt", exist_ok=True)
# 將轉錄文本保存到文件
with open(transcript_file_path, "w") as file:
file.write(text)
# 返回文本和文件路徑,使得文件可以在界麵上下載
return text, transcript_file_path
# 創建 Gradio 界麵
with gr.Blocks(css=".container { max-width: 800px; margin: auto; } .gradio-app { background-color: #f0f0f0; } button { background-color: #4CAF50; color: white; }") as demo:
gr.Markdown("ASR 語音語料辨識修正工具")
with gr.Row():
audio_input = gr.Audio(source="upload", type="filepath", label="上載你的音頻")
submit_button = gr.Button("語音識別")
output_text = gr.TextArea(label="識別結果")
download_link = gr.File(label="下載轉錄文件")
submit_button.click(fn=transcribe, inputs=audio_input, outputs=[output_text, download_link])
demo.launch()
|