whisper-large-v3

Sleeping

App Files Files Community

DeepLearning101 commited on Jun 15

Commit

567ec12

•

1 Parent(s): 7d047c4

Update app.py

Browse files

Files changed (1) hide show

app.py +18 -59

app.py CHANGED Viewed

@@ -77,56 +77,22 @@ def yt_transcribe(yt_url, task, max_filesize=75.0):
     return html_embed_str, text
-mf_transcribe = gr.Interface(
-    fn=transcribe,
-    inputs=[
-        gr.Audio(type="filepath"),
-        gr.Radio(["transcribe", "translate"], label="Task", value="transcribe"),
-    ],
-    outputs="text",
-    title="Whisper Large V3: Transcribe Audio",
-    description=(
-        "Transcribe long-form microphone or audio inputs with the click of a button! Demo uses the"
-        f" checkpoint [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and 🤗 Transformers to transcribe audio files"
-        " of arbitrary length."
-        "[TonTon Huang Ph.D. @ 2024/04](https://www.twman.org)"
-        "[那些語音處理 (Speech Processing) 踩的坑](https://blog.twman.org/2021/04/ASR.html) | [那些自然語言處理 (Natural Language Processing, NLP) 踩的坑](https://blog.twman.org/2021/04/NLP.html)"
-        "[那些ASR和TTS可能會踩的坑](https://blog.twman.org/2024/02/asr-tts.html) | [那些大模型開發會踩的坑](https://blog.twman.org/2024/02/LLM.html)"
-        "[什麼是大語言模型，它是什麼？想要嗎？](https://blog.twman.org/2023/04/GPT.html)"
-        "[用PaddleOCR的PPOCRLabel來微調醫療診斷書和收據](https://blog.twman.org/2023/07/wsl.html) | [基於機器閱讀理解和指令微調的統一信息抽取框架之診斷書醫囑資訊擷取分析](https://blog.twman.org/2023/07/HugIE.html)"
-    ),
-    allow_flagging="never",
-)
 file_transcribe = gr.Interface(
     fn=transcribe,
     inputs=[
-        gr.Audio(type="filepath", label="Audio file"),
         gr.Radio(["transcribe", "translate"], label="Task", value="transcribe"),
     ],
     outputs="text",
     title="Whisper Large V3: Transcribe Audio",
-    description=(
-        "Transcribe long-form microphone or audio inputs with the click of a button! Demo uses the"
-        f" checkpoint [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and 🤗 Transformers to transcribe audio files"
-        " of arbitrary length."
-        "[TonTon Huang Ph.D. @ 2024/04](https://www.twman.org)"
-        "[那些語音處理 (Speech Processing) 踩的坑](https://blog.twman.org/2021/04/ASR.html) | [那些自然語言處理 (Natural Language Processing, NLP) 踩的坑](https://blog.twman.org/2021/04/NLP.html)"
-        "[那些ASR和TTS可能會踩的坑](https://blog.twman.org/2024/02/asr-tts.html) | [那些大模型開發會踩的坑](https://blog.twman.org/2024/02/LLM.html)"
-        "[什麼是大語言模型，它是什麼？想要嗎？](https://blog.twman.org/2023/04/GPT.html)"
-        "[用PaddleOCR的PPOCRLabel來微調醫療診斷書和收據](https://blog.twman.org/2023/07/wsl.html) | [基於機器閱讀理解和指令微調的統一信息抽取框架之診斷書醫囑資訊擷取分析](https://blog.twman.org/2023/07/HugIE.html)"
     ),
     allow_flagging="never",
 )
@@ -139,26 +105,19 @@ yt_transcribe = gr.Interface(
     ],
     outputs=["html", "text"],
     title="Whisper Large V3: Transcribe YouTube",
-    description=(
-        "Transcribe long-form YouTube videos with the click of a button! Demo uses the checkpoint"
-        f" [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and 🤗 Transformers to transcribe video files of"
-        " arbitrary length."
-        "[TonTon Huang Ph.D. @ 2024/04](https://www.twman.org)"
-        "[那些語音處理 (Speech Processing) 踩的���](https://blog.twman.org/2021/04/ASR.html) | [那些自然語言處理 (Natural Language Processing, NLP) 踩的坑](https://blog.twman.org/2021/04/NLP.html)"
-        "[那些ASR和TTS可能會踩的坑](https://blog.twman.org/2024/02/asr-tts.html) | [那些大模型開發會踩的坑](https://blog.twman.org/2024/02/LLM.html)"
-        "[什麼是大語言模型，它是什麼？想要嗎？](https://blog.twman.org/2023/04/GPT.html)"
-        "[用PaddleOCR的PPOCRLabel來微調醫療診斷書和收據](https://blog.twman.org/2023/07/wsl.html) | [基於機器閱讀理解和指令微調的統一信息抽取框架之診斷書醫囑資訊擷取分析](https://blog.twman.org/2023/07/HugIE.html)"
     ),
     allow_flagging="never",
 )
 with gr.Blocks() as demo:
-    gr.TabbedInterface([mf_transcribe, file_transcribe, yt_transcribe], ["Microphone", "Audio file", "YouTube"])
 demo.launch(debug=True)

     return html_embed_str, text
 file_transcribe = gr.Interface(
     fn=transcribe,
     inputs=[
+        gr.Audio(type="filepath"),
         gr.Radio(["transcribe", "translate"], label="Task", value="transcribe"),
     ],
     outputs="text",
     title="Whisper Large V3: Transcribe Audio",
+    description="""Transcribe long-form microphone or audio inputs with the click of a button! Demo uses the checkpoint openai/whisper-large-v3 and 🤗 Transformers to transcribe audio files of arbitrary length.<br>
+        <a href='https://www.twman.org' target='_blank'>TonTon Huang Ph.D. @ 2024/04 </a><br>
+        輸入ASR文本，糾正同音字/詞錯誤<br>
+        <a href='https://blog.twman.org/2021/04/ASR.html' target='_blank'>那些語音處理 (Speech Processing) 踩的坑</a> | <a href='https://blog.twman.org/2021/04/NLP.html' target='_blank'>那些自然語言處理 (Natural Language Processing, NLP) 踩的坑</a><br>
+        <a href='https://blog.twman.org/2024/02/asr-tts.html' target='_blank'>那些ASR和TTS可能會踩的坑</a> | <a href='https://blog.twman.org/2024/02/LLM.html' target='_blank'>那些大模型開發會踩的坑</a><br>
+        <a href='https://blog.twman.org/2023/04/GPT.html' target='_blank'>什麼是大語言模型，它是什麼？想要嗎？</a><br>
+        <a href='https://blog.twman.org/2023/07/wsl.html' target='_blank'>用PaddleOCR的PPOCRLabel來微調醫療診斷書和收據</a> | <a href='https://blog.twman.org/2023/07/HugIE.html' target='_blank'>基於機器閱讀理解和指令微調的統一信息抽取框架之診斷書醫囑資訊擷取分析</a><br>
+        <a href='https://github.com/shibing624/pycorrector' target='_blank'>Masked Language Model (MLM) as correction BERT</a>""",
     ),
     allow_flagging="never",
 )
     ],
     outputs=["html", "text"],
     title="Whisper Large V3: Transcribe YouTube",
+    description="""Transcribe long-form YouTube videos with the click of a button! Demo uses the checkpoint openai/whisper-large-v3 and 🤗 Transformers to transcribe audio files of arbitrary length.<br>
+        <a href='https://www.twman.org' target='_blank'>TonTon Huang Ph.D. @ 2024/04 </a><br>
+        輸入ASR文本，糾正同音字/詞錯誤<br>
+        <a href='https://blog.twman.org/2021/04/ASR.html' target='_blank'>那些語音處理 (Speech Processing) 踩的坑</a> | <a href='https://blog.twman.org/2021/04/NLP.html' target='_blank'>那些自然語言處理 (Natural Language Processing, NLP) 踩的坑</a><br>
+        <a href='https://blog.twman.org/2024/02/asr-tts.html' target='_blank'>那些ASR和TTS可能會踩的坑</a> | <a href='https://blog.twman.org/2024/02/LLM.html' target='_blank'>那些大模型開發會踩的坑</a><br>
+        <a href='https://blog.twman.org/2023/04/GPT.html' target='_blank'>什麼是大語言模型，它是什麼？想要嗎？</a><br>
+        <a href='https://blog.twman.org/2023/07/wsl.html' target='_blank'>用PaddleOCR的PPOCRLabel來微調醫療診斷書和收據</a> | <a href='https://blog.twman.org/2023/07/HugIE.html' target='_blank'>基於機器閱讀理解和指令微調的統一信息抽取框架之診斷書醫囑資訊擷取分析</a><br>
+        <a href='https://github.com/shibing624/pycorrector' target='_blank'>Masked Language Model (MLM) as correction BERT</a>""",
     ),
     allow_flagging="never",
 )
 with gr.Blocks() as demo:
+    gr.TabbedInterface([file_transcribe, yt_transcribe], ["Audio file", "YouTube"])
 demo.launch(debug=True)