whisper_lab2 / app.py
Campfireman's picture
Update app.py
e730b78
from transformers import pipeline
import gradio as gr
import moviepy.editor as mp
from pytube import YouTube
import math
pipe = pipeline(model="Campfireman/whisper-small-hi") # change to "your-username/the-name-you-picked"
segment_length = 25 # 25s per segment
def download_video(url):
print("Downloading...")
local_file = (
YouTube(url)
.streams.filter(progressive=True, file_extension="mp4")
.first()
.download()
)
print("Downloaded")
global my_clip
global original_wav
my_clip = mp.VideoFileClip(local_file)
my_clip.audio.write_audiofile("AUDIO_ORIGINAL.wav")
original_wav = mp.AudioFileClip("AUDIO_ORIGINAL.wav")
global audio_length
audio_length = original_wav.duration
print("Overall audio time elapsed: "+str(audio_length))
return local_file
def validate_youtube(url):
#This creates a youtube object
try:
yt = YouTube(url)
except Exception:
print("Hi the URL seems not a valid YouTube video link")
return True
#This will return the length of the video in sec as an int
video_length = yt.length
if video_length > 600:
print("Your video is longer than 10 minutes")
return False
else:
print("Your video is less than 10 minutes")
return True
def validate_url(url):
import validators
if not validators.url(url):
return True
else:
return False
def audio_clipper(index, seg_total):
my_audio = "audio_out"+str(index)+".wav"
audio_clipped_obj = mp.AudioFileClip.copy(original_wav)
print("Segment "+str(index)+":")
# Clipping
if (index > 0):
print("Clipped: 0 ~ " + str(segment_length * (index)) + "sec")
audio_clipped_obj = mp.AudioFileClip.cutout(audio_clipped_obj, 0, segment_length * (index))
if (index < seg_total - 1):
print("Clipped: " + str(segment_length * (index + 1)) + "~ " + str(audio_length) +" sec")
audio_clipped_obj = mp.AudioFileClip.cutout(audio_clipped_obj, segment_length * (index + 1), audio_length)
# Write out the temporary segment data
mp.AudioFileClip.write_audiofile(audio_clipped_obj, my_audio)
#audio_clipped_obj.audio.write_audiofile(my_audio)
return my_audio
def transcribe(video_url):
text = ""
if validate_url(video_url):
if not validate_youtube(video_url):
return "The URL seems not for Youtube videos or the video is too long. Check out the errors in the log. "
else:
download_video(video_url)
else:
return "Invalid URL. Please check the format of your link. "
segment_count = math.ceil(audio_length / segment_length)
print("Total segments: "+str(segment_count))
if segment_count <= 0:
return "Corrupted Video Data! Invalid length of "+str(segment_count * 25)+" second(s)."
else:
for x in range(segment_count):
audio = audio_clipper(x, segment_count)
seg_text = pipe(audio, batch_size=512, truncation=True)["text"]
print("Segtext: ")
print(seg_text)
text = text + seg_text
return text
def transcribe2(audio):
text = pipe(audio)["text"]
return text
iface = gr.Interface( fn=transcribe,
inputs=gr.Textbox(label = "Enter the URL of the Youtube video clip here (without prefixes like http://):"),
outputs="text",
title="Whisper Small SE",
description="Video Swedish Transcriptior",
)
iface2 = gr.Interface(
fn=transcribe2,
inputs=gr.Audio(source="microphone", type="filepath"),
outputs="text",
title="Whisper Small Swedish",
description="Realtime demo for Swedish speech recognition using a fine-tuned Whisper small model.",
)
demo = gr.TabbedInterface([iface, iface2],["Swedish YouTube Video to Text", "Swedish Audio to Text"])
demo.launch()