David Li
fix: try again
d903faf
raw
history blame
4.66 kB
import whisper
import gradio as gr
import ffmpeg
import youtube_dl
import os
youtube_livestream_codes = [
91,
92,
93,
94,
95,
96,
300,
301,
]
youtube_mp4_codes = [
298,
18,
22,
140,
133,
134
]
import sys
def get_video_metadata(video_url: str = "https://www.youtube.com/watch?v=21X5lGlDOfg&ab_channel=NASA")-> dict:
with youtube_dl.YoutubeDL({'outtmpl': '%(id)s.%(ext)s'}) as ydl:
info_dict = ydl.extract_info(video_url, download=False)
video_title = info_dict.get('title', None)
uploader_id = info_dict.get('uploader_id', None)
print(f"[youtube] {video_title}: {uploader_id}")
return info_dict
def parse_metadata(metadata) -> dict:
"""
Parse metadata and send to discord.
After a video is done recording,
it will have both the livestream format and the mp4 format.
"""
# send metadata to discord
formats = metadata.get("formats", [])
# filter for ext = mp4
mp4_formats = [f for f in formats if f.get("ext", "") == "mp4"]
format_ids = [int(f.get("format_id", 0)) for f in mp4_formats]
video_entries = sorted(set(format_ids).intersection(youtube_mp4_codes))
is_livestream = True
if len(video_entries) > 0:
# use video format id over livestream id if available
selected_id = video_entries[0]
is_livestream = False
return {
"selected_id": selected_id,
"is_livestream": is_livestream,
}
def get_video(url: str, config: dict):
"""
Get video from start time.
"""
# result = subprocess.run()
# could delay start time by a few seconds to just sync up and capture the full video length
# but would need to time how long it takes to fetch the video using youtube-dl and other adjustments and start a bit before
filename = config.get("filename", "livestream01.mp4")
end = config.get("end", "00:15:00")
overlay_file = ffmpeg.input(filename)
(
ffmpeg
.input(url, t=end)
.output(filename)
.run()
)
def get_all_files(url: str, end: str = "00:15:00"):
metadata = get_video_metadata(url)
temp_dict = parse_metadata(metadata)
selected_id = temp_dict.get("selected_id", 0)
formats = metadata.get("formats", [])
selected_format = [f for f in formats if f.get("format_id", "") == str(selected_id)][0]
format_url = selected_format.get("url", "")
filename = "temp.mp4"
get_video(format_url, {"filename": filename, "end": end})
return filename
def get_text_from_mp3_whisper(inputType:str, mp3_file: str, url_path: str, taskName: str, srcLanguage: str)->str:
model = whisper.load_model("medium")
# options = whisper.DecodingOptions(language="en", without_timestamps=True)
options = dict(language=srcLanguage)
transcribe_options = dict(task=taskName, **options)
if inputType == "url":
filename = get_all_files(url_path)
result = model.transcribe(filename, **transcribe_options)
else:
result = model.transcribe(mp3_file, **transcribe_options)
# adjust for spacy mode
html_text = ""
lines = []
for count, segment in enumerate(result.get("segments")):
# print(segment)
start = segment.get("start")
end = segment.get("end")
lines.append(f"{count}")
lines.append(f"{second_to_timecode(start)} --> {second_to_timecode(end)}")
lines.append(segment.get("text", "").strip())
lines.append('')
words = '\n'.join(lines)
input_file = filename or mp3_file
# ffmpeg -i testing.mp4 -vf subtitles=transcript.srt mysubtitledmovie.mp4
# use ffmpeg bindings to add subtitles to video
# use python to call ffmpeg -i testing.mp4 -vf subtitles=transcript.srt mysubtitledmovie.mp4
input_video = ffmpeg.input('testing.mp4')
subtitle = ffmpeg.filter('subtitles', 'transcript.srt')
output_video = ffmpeg.output(input_video, subtitle, 'subtitled.mp4', vcodec='libx264', video_filters='[v]subtitles=transcript.srt[v]')
ffmpeg.run(output_video)
# for spacy use advanced logic to extract and append to html_text using tables?
# get output_video as mp4
return result.get("segments"), words, "subtitled.mp4"
gr.Interface(
title = 'Download Video From url and extract text from audio',
fn=get_text_from_mp3_whisper,
inputs=[
gr.Dropdown(["url", "file"]),
gr.inputs.Audio(type="filepath"),
gr.inputs.Textbox(),
gr.Dropdown(["translate", "transcribe"]),
gr.Dropdown(["Japanese", "English"])
],
outputs=[
"json", "text", "mp4"
],
live=True).launch()