camenduru
/

show

Model card Files Files and versions Community

show / SHOW /download_youtube.py

camenduru's picture

thanks to show ❤

3bbb319 over 1 year ago

1.96 kB

	from __future__ import unicode_literals

	import argparse
	from subprocess import call

	import cv2
	import numpy as np
	import os
	import shutil
	import pandas as pd
	from tqdm import tqdm
	import time

	parser = argparse.ArgumentParser()
	parser.add_argument('-base_path', '--base_path', help='base folder path of dataset')
	parser.add_argument('-speaker', '--speaker',
	help='download videos of a specific speaker {oliver, jon, conan, rock, chemistry, ellen, almaram, angelica, seth, shelly}')
	args = parser.parse_args()

	BASE_PATH = args.base_path
	df = pd.read_csv(os.path.join(BASE_PATH, "videos_links.csv"))

	if args.speaker:
	df = df[df['speaker'] == args.speaker]

	temp_output_path = './tmp2/temp_video.mp4'

	for _, row in tqdm(df.iterrows(), total=df.shape[0]):

	i, name, link = row
	if 'youtube' in link:
	try:
	output_path = os.path.join(BASE_PATH, row["speaker"], "videos", row["video_fn"])
	if not (os.path.exists(os.path.dirname(output_path))):
	os.makedirs(os.path.dirname(output_path))
	command = 'yt-dlp -o {temp_path} -f mp4 {link}'.format(link=link, temp_path=temp_output_path)
	res1 = call(command, shell=True)
	cam = cv2.VideoCapture(temp_output_path)
	if np.isclose(cam.get(cv2.CAP_PROP_FPS), 29.97, atol=0.03):
	cam.release()
	shutil.move(temp_output_path, output_path)
	else:
	res2 = call('ffmpeg -i "%s" -r 30000/1001 -strict -2 "%s" -y' % (temp_output_path, output_path),
	shell=True)
	except Exception as e:
	print (e)
	finally:
	if os.path.exists(temp_output_path):
	os.remove(temp_output_path)
	print("Out of a total of %s videos for %s: "%(len(df), args.speaker))
	print("Successfully downloaded:")
	my_cmd = 'ls ' + os.path.join(BASE_PATH, row["speaker"], "videos") + ' \| wc -l'
	os.system(my_cmd)