ASR-ANNOTATION / app.py
DeepLearning101's picture
Update app.py
56a8374 verified
import torch
import gradio as gr
import whisper
import os
# 確保 Whisper 模塊被正確加載
print("Whisper module contents:", dir(whisper))
# 加載 Whisper 模型
model = whisper.load_model("large-v2", device="cuda" if torch.cuda.is_available() else "cpu")
def transcribe(audio_file):
audio_path = audio_file
result = model.transcribe(audio_path)
text = result["text"]
base_name = os.path.splitext(os.path.basename(audio_path))[0]
transcript_file_path = f"txt/{base_name}_transcript.txt"
os.makedirs("txt", exist_ok=True)
with open(transcript_file_path, "w") as file:
file.write(text)
return text, transcript_file_path
TITLE = """<h1>ASR 語音語料辨識修正工具</h1>"""
SUBTITLE = """<h2 align="center"><a href='https://www.twman.org' target='_blank'>TonTon Huang Ph.D. @ 2024/04 </a><br></h2>"""
LINKS = """<a href='https://blog.twman.org/2021/04/ASR.html' target='_blank'>那些語音處理 (Speech Processing) 踩的坑</a> | <a href='https://blog.twman.org/2021/04/NLP.html' target='_blank'>那些自然語言處理 (Natural Language Processing, NLP) 踩的坑</a> | <a href='https://blog.twman.org/2024/02/asr-tts.html' target='_blank'>那些ASR和TTS可能會踩的坑</a> | <a href='https://blog.twman.org/2024/02/LLM.html' target='_blank'>那些大模型開發會踩的坑</a> | <a href='https://blog.twman.org/2023/04/GPT.html' target='_blank'>什麼是大語言模型,它是什麼?想要嗎?</a><br>
<a href='https://blog.twman.org/2023/07/wsl.html' target='_blank'>用PPOCRLabel來幫PaddleOCR做OCR的微調和標註</a> | <a href='https://blog.twman.org/2023/07/HugIE.html' target='_blank'>基於機器閱讀理解和指令微調的統一信息抽取框架之診斷書醫囑資訊擷取分析</a><br>"""
with gr.Blocks(css=".container { max-width: 800px; margin: auto; } .gradio-app { background-color: #f0f0f0; } button { background-color: #4CAF50; color: white; }") as demo:
gr.HTML(TITLE)
gr.HTML(SUBTITLE)
gr.HTML(LINKS)
with gr.Row():
# 修改了 Audio 組件的宣告方式
audio_input = gr.Audio(label="上載你的音頻", type="filepath")
submit_button = gr.Button("語音識別")
output_text = gr.TextArea(label="識別結果")
download_link = gr.File(label="下載轉錄文件")
submit_button.click(fn=transcribe, inputs=audio_input, outputs=[output_text, download_link])
demo.launch()