Spaces:
Runtime error
Runtime error
import whisper | |
import gradio as gr | |
import ffmpeg | |
import youtube_dl | |
import os | |
youtube_livestream_codes = [ | |
91, | |
92, | |
93, | |
94, | |
95, | |
96, | |
300, | |
301, | |
] | |
youtube_mp4_codes = [ | |
298, | |
18, | |
22, | |
140, | |
133, | |
134 | |
] | |
import sys | |
def get_video_metadata(video_url: str = "https://www.youtube.com/watch?v=21X5lGlDOfg&ab_channel=NASA")-> dict: | |
with youtube_dl.YoutubeDL({'outtmpl': '%(id)s.%(ext)s'}) as ydl: | |
info_dict = ydl.extract_info(video_url, download=False) | |
video_title = info_dict.get('title', None) | |
uploader_id = info_dict.get('uploader_id', None) | |
print(f"[youtube] {video_title}: {uploader_id}") | |
return info_dict | |
def parse_metadata(metadata) -> dict: | |
""" | |
Parse metadata and send to discord. | |
After a video is done recording, | |
it will have both the livestream format and the mp4 format. | |
""" | |
# send metadata to discord | |
formats = metadata.get("formats", []) | |
# filter for ext = mp4 | |
mp4_formats = [f for f in formats if f.get("ext", "") == "mp4"] | |
format_ids = [int(f.get("format_id", 0)) for f in mp4_formats] | |
video_entries = sorted(set(format_ids).intersection(youtube_mp4_codes)) | |
is_livestream = True | |
if len(video_entries) > 0: | |
# use video format id over livestream id if available | |
selected_id = video_entries[0] | |
is_livestream = False | |
return { | |
"selected_id": selected_id, | |
"is_livestream": is_livestream, | |
} | |
def get_video(url: str, config: dict): | |
""" | |
Get video from start time. | |
""" | |
# result = subprocess.run() | |
# could delay start time by a few seconds to just sync up and capture the full video length | |
# but would need to time how long it takes to fetch the video using youtube-dl and other adjustments and start a bit before | |
filename = config.get("filename", "livestream01.mp4") | |
end = config.get("end", "00:15:00") | |
overlay_file = ffmpeg.input(filename) | |
( | |
ffmpeg | |
.input(url, t=end) | |
.output(filename) | |
.run() | |
) | |
def get_all_files(url: str, end: str = "00:15:00"): | |
metadata = get_video_metadata(url) | |
temp_dict = parse_metadata(metadata) | |
selected_id = temp_dict.get("selected_id", 0) | |
formats = metadata.get("formats", []) | |
selected_format = [f for f in formats if f.get("format_id", "") == str(selected_id)][0] | |
format_url = selected_format.get("url", "") | |
filename = "temp.mp4" | |
get_video(format_url, {"filename": filename, "end": end}) | |
return filename | |
def get_text_from_mp3_whisper(inputType:str, mp3_file: str, url_path: str, taskName: str, srcLanguage: str)->str: | |
model = whisper.load_model("medium") | |
# options = whisper.DecodingOptions(language="en", without_timestamps=True) | |
options = dict(language=srcLanguage) | |
transcribe_options = dict(task=taskName, **options) | |
if inputType == "url": | |
filename = get_all_files(url_path) | |
result = model.transcribe(filename, **transcribe_options) | |
else: | |
result = model.transcribe(mp3_file, **transcribe_options) | |
# adjust for spacy mode | |
html_text = "" | |
lines = [] | |
for count, segment in enumerate(result.get("segments")): | |
# print(segment) | |
start = segment.get("start") | |
end = segment.get("end") | |
lines.append(f"{count}") | |
lines.append(f"{second_to_timecode(start)} --> {second_to_timecode(end)}") | |
lines.append(segment.get("text", "").strip()) | |
lines.append('') | |
words = '\n'.join(lines) | |
input_file = filename or mp3_file | |
# ffmpeg -i testing.mp4 -vf subtitles=transcript.srt mysubtitledmovie.mp4 | |
# use ffmpeg bindings to add subtitles to video | |
# use python to call ffmpeg -i testing.mp4 -vf subtitles=transcript.srt mysubtitledmovie.mp4 | |
input_video = ffmpeg.input('testing.mp4') | |
subtitle = ffmpeg.filter('subtitles', 'transcript.srt') | |
output_video = ffmpeg.output(input_video, subtitle, 'subtitled.mp4', vcodec='libx264', video_filters='[v]subtitles=transcript.srt[v]') | |
ffmpeg.run(output_video) | |
# for spacy use advanced logic to extract and append to html_text using tables? | |
# get output_video as mp4 | |
return result.get("segments"), words, "subtitled.mp4" | |
gr.Interface( | |
title = 'Download Video From url and extract text from audio', | |
fn=get_text_from_mp3_whisper, | |
inputs=[ | |
gr.Dropdown(["url", "file"]), | |
gr.inputs.Audio(type="filepath"), | |
gr.inputs.Textbox(), | |
gr.Dropdown(["translate", "transcribe"]), | |
gr.Dropdown(["Japanese", "English"]) | |
], | |
outputs=[ | |
"json", "text", "mp4" | |
], | |
live=True).launch() |