Spaces:

ar07xd
/

deepshield

Running

Spyderzz

Fix directory structure and README frontmatter

d23039a 21 days ago

10.7 kB

	#!/usr/bin/env python
	""" Downloads FaceForensics++ and Deep Fake Detection public data release
	Example usage:
	see -h or https://github.com/ondyari/FaceForensics
	"""
	# -- coding: utf-8 --
	import argparse
	import os
	import urllib
	import urllib.request
	import tempfile
	import time
	import sys
	import json
	import random
	from tqdm import tqdm
	from os.path import join


	# URLs and filenames
	FILELIST_URL = 'misc/filelist.json'
	DEEPFEAKES_DETECTION_URL = 'misc/deepfake_detection_filenames.json'
	DEEPFAKES_MODEL_NAMES = ['decoder_A.h5', 'decoder_B.h5', 'encoder.h5',]

	# Parameters
	DATASETS = {
	'original_youtube_videos': 'misc/downloaded_youtube_videos.zip',
	'original_youtube_videos_info': 'misc/downloaded_youtube_videos_info.zip',
	'original': 'original_sequences/youtube',
	'DeepFakeDetection_original': 'original_sequences/actors',
	'Deepfakes': 'manipulated_sequences/Deepfakes',
	'DeepFakeDetection': 'manipulated_sequences/DeepFakeDetection',
	'Face2Face': 'manipulated_sequences/Face2Face',
	'FaceShifter': 'manipulated_sequences/FaceShifter',
	'FaceSwap': 'manipulated_sequences/FaceSwap',
	'NeuralTextures': 'manipulated_sequences/NeuralTextures'
	}
	ALL_DATASETS = ['original', 'DeepFakeDetection_original', 'Deepfakes',
	'DeepFakeDetection', 'Face2Face', 'FaceShifter', 'FaceSwap',
	'NeuralTextures']
	COMPRESSION = ['raw', 'c23', 'c40']
	TYPE = ['videos', 'masks', 'models']
	SERVERS = ['EU', 'EU2', 'CA']


	def parse_args():
	parser = argparse.ArgumentParser(
	description='Downloads FaceForensics v2 public data release.',
	formatter_class=argparse.ArgumentDefaultsHelpFormatter
	)
	parser.add_argument('output_path', type=str, help='Output directory.')
	parser.add_argument('-d', '--dataset', type=str, default='all',
	help='Which dataset to download, either pristine or '
	'manipulated data or the downloaded youtube '
	'videos.',
	choices=list(DATASETS.keys()) + ['all']
	)
	parser.add_argument('-c', '--compression', type=str, default='raw',
	help='Which compression degree. All videos '
	'have been generated with h264 with a varying '
	'codec. Raw (c0) videos are lossless compressed.',
	choices=COMPRESSION
	)
	parser.add_argument('-t', '--type', type=str, default='videos',
	help='Which file type, i.e. videos, masks, for our '
	'manipulation methods, models, for Deepfakes.',
	choices=TYPE
	)
	parser.add_argument('-n', '--num_videos', type=int, default=None,
	help='Select a number of videos number to '
	"download if you don't want to download the full"
	' dataset.')
	parser.add_argument('--server', type=str, default='EU',
	help='Server to download the data from. If you '
	'encounter a slow download speed, consider '
	'changing the server.',
	choices=SERVERS
	)
	args = parser.parse_args()

	# URLs
	server = args.server
	if server == 'EU':
	server_url = 'http://canis.vc.in.tum.de:8100/'
	elif server == 'EU2':
	server_url = 'http://kaldir.vc.in.tum.de/faceforensics/'
	elif server == 'CA':
	server_url = 'http://falas.cmpt.sfu.ca:8100/'
	else:
	raise Exception('Wrong server name. Choices: {}'.format(str(SERVERS)))
	args.tos_url = server_url + 'webpage/FaceForensics_TOS.pdf'
	args.base_url = server_url + 'v3/'
	args.deepfakes_model_url = server_url + 'v3/manipulated_sequences/' + \
	'Deepfakes/models/'

	return args


	def download_files(filenames, base_url, output_path, report_progress=True):
	os.makedirs(output_path, exist_ok=True)
	if report_progress:
	filenames = tqdm(filenames)
	for filename in filenames:
	download_file(base_url + filename, join(output_path, filename))


	def reporthook(count, block_size, total_size):
	global start_time
	if count == 0:
	start_time = time.time()
	return
	duration = time.time() - start_time
	progress_size = int(count * block_size)
	speed = int(progress_size / (1024 * duration))
	percent = int(count * block_size * 100 / total_size)
	sys.stdout.write("\rProgress: %d%%, %d MB, %d KB/s, %d seconds passed" %
	(percent, progress_size / (1024 * 1024), speed, duration))
	sys.stdout.flush()


	def download_file(url, out_file, report_progress=False):
	out_dir = os.path.dirname(out_file)
	if not os.path.isfile(out_file):
	fh, out_file_tmp = tempfile.mkstemp(dir=out_dir)
	f = os.fdopen(fh, 'w')
	f.close()
	if report_progress:
	urllib.request.urlretrieve(url, out_file_tmp,
	reporthook=reporthook)
	else:
	urllib.request.urlretrieve(url, out_file_tmp)
	os.rename(out_file_tmp, out_file)
	else:
	tqdm.write('WARNING: skipping download of existing file ' + out_file)


	def main(args):
	# TOS
	print('By pressing any key to continue you confirm that you have agreed '\
	'to the FaceForensics terms of use as described at:')
	print(args.tos_url)
	print('***')
	print('Press any key to continue, or CTRL-C to exit.')
	_ = input('')

	# Extract arguments
	c_datasets = [args.dataset] if args.dataset != 'all' else ALL_DATASETS
	c_type = args.type
	c_compression = args.compression
	num_videos = args.num_videos
	output_path = args.output_path
	os.makedirs(output_path, exist_ok=True)

	# Check for special dataset cases
	for dataset in c_datasets:
	dataset_path = DATASETS[dataset]
	# Special cases
	if 'original_youtube_videos' in dataset:
	# Here we download the original youtube videos zip file
	print('Downloading original youtube videos.')
	if not 'info' in dataset_path:
	print('Please be patient, this may take a while (~40gb)')
	suffix = ''
	else:
	suffix = 'info'
	download_file(args.base_url + '/' + dataset_path,
	out_file=join(output_path,
	'downloaded_videos{}.zip'.format(
	suffix)),
	report_progress=True)
	return

	# Else: regular datasets
	print('Downloading {} of dataset "{}"'.format(
	c_type, dataset_path
	))

	# Get filelists and video lenghts list from server
	if 'DeepFakeDetection' in dataset_path or 'actors' in dataset_path:
	filepaths = json.loads(urllib.request.urlopen(args.base_url + '/' +
	DEEPFEAKES_DETECTION_URL).read().decode("utf-8"))
	if 'actors' in dataset_path:
	filelist = filepaths['actors']
	else:
	filelist = filepaths['DeepFakesDetection']
	elif 'original' in dataset_path:
	# Load filelist from server
	file_pairs = json.loads(urllib.request.urlopen(args.base_url + '/' +
	FILELIST_URL).read().decode("utf-8"))
	filelist = []
	for pair in file_pairs:
	filelist += pair
	else:
	# Load filelist from server
	file_pairs = json.loads(urllib.request.urlopen(args.base_url + '/' +
	FILELIST_URL).read().decode("utf-8"))
	# Get filelist
	filelist = []
	for pair in file_pairs:
	filelist.append('_'.join(pair))
	if c_type != 'models':
	filelist.append('_'.join(pair[::-1]))
	# Maybe limit number of videos for download
	if num_videos is not None and num_videos > 0:
	print('Downloading the first {} videos'.format(num_videos))
	filelist = filelist[:num_videos]

	# Server and local paths
	dataset_videos_url = args.base_url + '{}/{}/{}/'.format(
	dataset_path, c_compression, c_type)
	dataset_mask_url = args.base_url + '{}/{}/videos/'.format(
	dataset_path, 'masks', c_type)

	if c_type == 'videos':
	dataset_output_path = join(output_path, dataset_path, c_compression,
	c_type)
	print('Output path: {}'.format(dataset_output_path))
	filelist = [filename + '.mp4' for filename in filelist]
	download_files(filelist, dataset_videos_url, dataset_output_path)
	elif c_type == 'masks':
	dataset_output_path = join(output_path, dataset_path, c_type,
	'videos')
	print('Output path: {}'.format(dataset_output_path))
	if 'original' in dataset:
	if args.dataset != 'all':
	print('Only videos available for original data. Aborting.')
	return
	else:
	print('Only videos available for original data. '
	'Skipping original.\n')
	continue
	if 'FaceShifter' in dataset:
	print('Masks not available for FaceShifter. Aborting.')
	return
	filelist = [filename + '.mp4' for filename in filelist]
	download_files(filelist, dataset_mask_url, dataset_output_path)

	# Else: models for deepfakes
	else:
	if dataset != 'Deepfakes' and c_type == 'models':
	print('Models only available for Deepfakes. Aborting')
	return
	dataset_output_path = join(output_path, dataset_path, c_type)
	print('Output path: {}'.format(dataset_output_path))

	# Get Deepfakes models
	for folder in tqdm(filelist):
	folder_filelist = DEEPFAKES_MODEL_NAMES

	# Folder paths
	folder_base_url = args.deepfakes_model_url + folder + '/'
	folder_dataset_output_path = join(dataset_output_path,
	folder)
	download_files(folder_filelist, folder_base_url,
	folder_dataset_output_path,
	report_progress=False) # already done


	if __name__ == "__main__":
	args = parse_args()
	main(args)