Spaces:

fffiloni
/

miniGPT4-Video-Zero

Sleeping

App Files Files Community

miniGPT4-Video-Zero / minigpt4_video_inference.py

fffiloni

Upload 164 files

2ada650 verified 6 months ago

raw

history blame

3.33 kB

	import json
	from tqdm import tqdm
	from pytubefix import YouTube

	import xml.etree.ElementTree as ET
	import os

	with open ('VideoInstruct100K.json','r') as f :
	data=json.load(f)

	# Usage
	existed_video_id={}
	for video_name in os.listdir('videos'):
	video_id = video_name.split('.')[0]
	existed_video_id[video_id]=True



	def download_video_with_subtitles(video_id):
	# Create a YouTube object.
	yt = YouTube(f'https://www.youtube.com/watch?v={video_id}')

	video_filename = f"{video_id}.mp4"
	video_downloaded=False
	try :
	# Get the video stream with the highest resolution and download the video.
	stream = yt.streams.get_highest_resolution()
	stream.download(output_path='videos', filename=video_filename)
	video_downloaded=True
	except Exception as e:
	print(f"Error downloading video {video_id}: {str(e)}")
	video_downloaded=False
	if not video_downloaded:
	return False,False

	# Get the video's available captions (subtitles).
	captions = yt.captions.all()

	# Download the captions if available in xml format.
	caption_downloaded = False
	for caption in captions:
	caption_code = caption.code
	# select only english captions
	if 'en' in caption_code:
	caption.download(title=f"{video_id}", output_path='subtitles_xml',srt=False)
	caption_downloaded = True
	return video_downloaded,caption_downloaded
	def convert_xml_vtt(xml_path, vtt_path):
	# Parse the XML subtitle file
	tree = ET.parse(xml_path)
	root = tree.getroot()

	# Initialize a list to store VTT subtitle entries
	vtt_subtitle = []

	# Function to convert time in milliseconds to WebVTT format
	def ms_to_vtt_time(milliseconds):
	seconds, milliseconds = divmod(milliseconds, 1000)
	minutes, seconds = divmod(seconds, 60)
	return f"{minutes:02d}:{seconds:02d}.{milliseconds:03d}"

	# Iterate through subtitle elements
	toggle = True
	for p in root.findall(".//p"):
	if toggle:
	start_time = int(p.get("t"))
	subtitle_text = " ".join(s.text.strip() for s in p.findall(".//s"))
	# duration = int(p.get("d")) if p.get("d") is not None else 0
	if not toggle:
	end_time = int(p.get("t"))
	# Format and append the VTT entry to the list
	vtt_subtitle.append(f"{ms_to_vtt_time(start_time)} --> {ms_to_vtt_time(end_time)}\n{subtitle_text}\n")
	toggle = not toggle
	# Join the VTT entries into a single string
	vtt_content = "WEBVTT\n\n" + "\n".join(vtt_subtitle)

	# Save the VTT content to a file
	with open(vtt_path, "w", encoding="utf-8") as vtt_file:
	vtt_file.write(vtt_content)
	import os
	os.makedirs('videos', exist_ok=True)
	os.makedirs('subtitles_vtt', exist_ok=True)
	os.makedirs('subtitles_xml', exist_ok=True)
	for video_path in tqdm(data,desc='Downloading videos') :
	video_id=video_path.split('/')[-1].split('.')[0]
	if existed_video_id.get(video_id,False):
	continue
	video_downloaded,caption_downloaded=download_video_with_subtitles(video_id)
	if caption_downloaded:
	# convert xml to vtt
	xml_file_path=f'subtitles_xml/{video_id} (a.en).xml'
	convert_xml_vtt(xml_file_path,f'subtitles_vtt/{video_id}.vtt')