id2223_lab2 / app.py
tilos's picture
Update app.py
be3e689 verified
raw
history blame contribute delete
No virus
3.29 kB
from transformers import pipeline
import gradio as gr
import os
import subprocess
from pytube import YouTube
pipe = pipeline(model="tilos/whisper-small-zh-HK") # change to "your-username/the-name-you-picked"
def video2mp3(video_file, output_ext="mp3"):
filename, ext = os.path.splitext(video_file)
subprocess.call(["ffmpeg", "-y", "-i", video_file, f"{filename}.{output_ext}"],
stdout=subprocess.DEVNULL,
stderr=subprocess.STDOUT)
return f"{filename}.{output_ext}"
def transcribe(audio):
text = pipe(audio)["text"]
return text
def get_text(url):
result = pipe(get_audio(url))
return result['text'].strip()
def get_audio(url):
website = YouTube(url)
video = website.streams.filter(only_audio=True).first()
out_file = video.download(output_path=".")
base, ext = os.path.splitext(out_file)
new_file = base + '.mp3'
os.rename(out_file, new_file)
audio = new_file
return audio
def offline_video(video):
audio_file = video2mp3(video)
text = transcribe(audio_file)
return text
with gr.Blocks() as demo:
# video file input
gr.Interface(
title="Cantonese Transcription using Whisper",
description="Demo for Cantonese speech recognition using a fine-tuned Whisper small model. "
"Generate zh-HK subtitle from video file, audio file, your microphone, and Youtube URL",
fn=offline_video,
inputs="video",
outputs="text",
allow_flagging="never",
)
# audio file input
with gr.Row():
with gr.Column():
input_audio = gr.Audio(source="upload", type="filepath")
micro_btn = gr.Button('Generate Voice Subtitles')
with gr.Column():
output_audio = gr.Textbox(placeholder='Transcript from audio', label='Subtitles')
micro_btn.click(transcribe, inputs=input_audio, outputs=output_audio)
"""
gr.Interface(
fn=transcribe,
title="Whisper: zh-HK Subtitle Generator",
description="Generate zh-HK subtitle from audio file, your microphone and Youtube",
inputs = gr.Audio(source="upload", type="filepath", optional=True),
outputs = "text",
allow_flagging= "never",
)
"""
# microphone input
with gr.Row():
with gr.Column():
input_mircro = gr.Audio(source="microphone", type="filepath")
micro_btn = gr.Button('Generate Voice Subtitles')
with gr.Column():
output_micro = gr.Textbox(placeholder='Transcript from mic', label='Subtitles')
micro_btn.click(transcribe, inputs=input_mircro, outputs=output_micro)
# Youtube url input
with gr.Row():
with gr.Column():
inputs_url = gr.Textbox(placeholder='Youtube URL', label='URL')
url_btn = gr.Button('Generate Youtube Video Subtitles')
examples = gr.Examples(examples=["https://www.youtube.com/watch?v=Yw4EoGWe0vw"],inputs=[inputs_url])
with gr.Column():
output_url = gr.Textbox(placeholder='Transcript from video.', label='Transcript')
url_btn.click(get_text, inputs=inputs_url, outputs=output_url )
demo.launch(debug=True)