miniGPT4-Video-Zero / minigpt4_video_inference.py
fffiloni's picture
Upload 164 files
2ada650 verified
raw history blame
No virus
3.33 kB
import json
from tqdm import tqdm
from pytubefix import YouTube
import xml.etree.ElementTree as ET
import os
with open ('VideoInstruct100K.json','r') as f :
data=json.load(f)
# Usage
existed_video_id={}
for video_name in os.listdir('videos'):
video_id = video_name.split('.')[0]
existed_video_id[video_id]=True
def download_video_with_subtitles(video_id):
# Create a YouTube object.
yt = YouTube(f'https://www.youtube.com/watch?v={video_id}')
video_filename = f"{video_id}.mp4"
video_downloaded=False
try :
# Get the video stream with the highest resolution and download the video.
stream = yt.streams.get_highest_resolution()
stream.download(output_path='videos', filename=video_filename)
video_downloaded=True
except Exception as e:
print(f"Error downloading video {video_id}: {str(e)}")
video_downloaded=False
if not video_downloaded:
return False,False
# Get the video's available captions (subtitles).
captions = yt.captions.all()
# Download the captions if available in xml format.
caption_downloaded = False
for caption in captions:
caption_code = caption.code
# select only english captions
if 'en' in caption_code:
caption.download(title=f"{video_id}", output_path='subtitles_xml',srt=False)
caption_downloaded = True
return video_downloaded,caption_downloaded
def convert_xml_vtt(xml_path, vtt_path):
# Parse the XML subtitle file
tree = ET.parse(xml_path)
root = tree.getroot()
# Initialize a list to store VTT subtitle entries
vtt_subtitle = []
# Function to convert time in milliseconds to WebVTT format
def ms_to_vtt_time(milliseconds):
seconds, milliseconds = divmod(milliseconds, 1000)
minutes, seconds = divmod(seconds, 60)
return f"{minutes:02d}:{seconds:02d}.{milliseconds:03d}"
# Iterate through subtitle elements
toggle = True
for p in root.findall(".//p"):
if toggle:
start_time = int(p.get("t"))
subtitle_text = " ".join(s.text.strip() for s in p.findall(".//s"))
# duration = int(p.get("d")) if p.get("d") is not None else 0
if not toggle:
end_time = int(p.get("t"))
# Format and append the VTT entry to the list
vtt_subtitle.append(f"{ms_to_vtt_time(start_time)} --> {ms_to_vtt_time(end_time)}\n{subtitle_text}\n")
toggle = not toggle
# Join the VTT entries into a single string
vtt_content = "WEBVTT\n\n" + "\n".join(vtt_subtitle)
# Save the VTT content to a file
with open(vtt_path, "w", encoding="utf-8") as vtt_file:
vtt_file.write(vtt_content)
import os
os.makedirs('videos', exist_ok=True)
os.makedirs('subtitles_vtt', exist_ok=True)
os.makedirs('subtitles_xml', exist_ok=True)
for video_path in tqdm(data,desc='Downloading videos') :
video_id=video_path.split('/')[-1].split('.')[0]
if existed_video_id.get(video_id,False):
continue
video_downloaded,caption_downloaded=download_video_with_subtitles(video_id)
if caption_downloaded:
# convert xml to vtt
xml_file_path=f'subtitles_xml/{video_id} (a.en).xml'
convert_xml_vtt(xml_file_path,f'subtitles_vtt/{video_id}.vtt')