Spaces:

derek-thomas
/

fawkes

Running

App Files Files Community

fawkes / fawkes /utils.py

derek-thomas HF staff

Adding fawkes updated files

3eebbc1 5 months ago

raw history blame contribute delete

No virus

23.5 kB

	#!/usr/bin/env python
	# -- coding: utf-8 --
	# @Date : 2020-05-17
	# @Author : Shawn Shan (shansixiong@cs.uchicago.edu)
	# @Link : https://www.shawnshan.com/


	import errno
	import glob
	import gzip
	import hashlib
	import json
	import os
	import pickle
	import random
	import shutil
	import sys
	import tarfile
	import zipfile

	import PIL
	import pkg_resources
	import six
	from keras.utils import Progbar
	from six.moves.urllib.error import HTTPError, URLError

	stderr = sys.stderr
	sys.stderr = open(os.devnull, 'w')
	import keras

	sys.stderr = stderr
	import keras.backend as K
	import numpy as np
	import tensorflow as tf
	from PIL import Image, ExifTags
	from keras.layers import Dense, Activation
	from keras.models import Model
	from keras.preprocessing import image

	from fawkes.align_face import align
	from six.moves.urllib.request import urlopen

	if sys.version_info[0] == 2:
	def urlretrieve(url, filename, reporthook=None, data=None):
	def chunk_read(response, chunk_size=8192, reporthook=None):
	content_type = response.info().get('Content-Length')
	total_size = -1
	if content_type is not None:
	total_size = int(content_type.strip())
	count = 0
	while True:
	chunk = response.read(chunk_size)
	count += 1
	if reporthook is not None:
	reporthook(count, chunk_size, total_size)
	if chunk:
	yield chunk
	else:
	break

	response = urlopen(url, data)
	with open(filename, 'wb') as fd:
	for chunk in chunk_read(response, reporthook=reporthook):
	fd.write(chunk)
	else:
	from six.moves.urllib.request import urlretrieve


	def clip_img(X, preprocessing='raw'):
	X = reverse_preprocess(X, preprocessing)
	X = np.clip(X, 0.0, 255.0)
	X = preprocess(X, preprocessing)
	return X


	IMG_SIZE = 112
	PREPROCESS = 'raw'


	def load_image(path):
	try:
	img = Image.open(path)
	except PIL.UnidentifiedImageError:
	return None
	except IsADirectoryError:
	return None

	try:
	info = img._getexif()
	except OSError:
	return None

	if info is not None:
	for orientation in ExifTags.TAGS.keys():
	if ExifTags.TAGS[orientation] == 'Orientation':
	break

	exif = dict(img._getexif().items())
	if orientation in exif.keys():
	if exif[orientation] == 3:
	img = img.rotate(180, expand=True)
	elif exif[orientation] == 6:
	img = img.rotate(270, expand=True)
	elif exif[orientation] == 8:
	img = img.rotate(90, expand=True)
	else:
	pass
	img = img.convert('RGB')
	image_array = image.img_to_array(img)

	return image_array


	class Faces(object):
	def __init__(self, image, aligner, verbose=1, eval_local=False, preprocessing=True, no_align=False):
	self.verbose = verbose
	self.no_align = no_align
	self.aligner = aligner
	self.margin = 30
	self.org_faces = [image] # single image in a list
	self.cropped_faces = []
	self.cropped_faces_shape = []
	self.cropped_index = []
	self.start_end_ls = []
	self.callback_idx = []
	self.images_without_face = []

	# Processing the single image
	cur_img = np.array(image)

	if not self.no_align:
	align_img = align(cur_img, self.aligner)
	if align_img is None:
	if self.verbose:
	print("Find 0 face(s) in the image")
	self.images_without_face.append(0)
	return

	cur_faces = align_img[0]
	else:
	cur_faces = [cur_img]

	cur_faces = [face for face in cur_faces if face.shape[0] != 0 and face.shape[1] != 0]
	cur_shapes = [f.shape[:-1] for f in cur_faces]

	cur_faces_square = []
	if self.verbose and not self.no_align:
	print("Find {} face(s) in the image".format(len(cur_faces)))
	if eval_local:
	cur_faces = cur_faces[:1]

	for img in cur_faces:
	if eval_local:
	base = resize(img, (IMG_SIZE, IMG_SIZE))
	else:
	long_size = max([img.shape[1], img.shape[0]]) + self.margin

	base = np.ones((long_size, long_size, 3)) * np.mean(img, axis=(0, 1))

	start1, end1 = get_ends(long_size, img.shape[0])
	start2, end2 = get_ends(long_size, img.shape[1])

	base[start1:end1, start2:end2, :] = img
	cur_start_end = (start1, end1, start2, end2)
	self.start_end_ls.append(cur_start_end)

	cur_faces_square.append(base)
	cur_faces_square = [resize(f, (IMG_SIZE, IMG_SIZE)) for f in cur_faces_square]
	self.cropped_faces.extend(cur_faces_square)

	if not self.no_align:
	cur_index = align_img[1]
	self.cropped_faces_shape.extend(cur_shapes)
	self.cropped_index.extend(cur_index[:len(cur_faces_square)])
	self.callback_idx.extend([0]*len(cur_faces_square))

	if len(self.cropped_faces) == 0:
	return

	self.cropped_faces = np.array(self.cropped_faces)

	if preprocessing:
	self.cropped_faces = preprocess(self.cropped_faces, PREPROCESS)

	self.cloaked_cropped_faces = None
	self.cloaked_faces = np.copy(self.org_faces)

	def get_faces(self):
	return self.cropped_faces

	def merge_faces(self, protected_images, original_images):
	if self.no_align:
	return np.clip(protected_images, 0.0, 255.0), self.images_without_face

	self.cloaked_faces = np.copy(self.org_faces)

	for i in range(len(self.cropped_faces)):
	cur_protected = protected_images[i]
	cur_original = original_images[i]

	org_shape = self.cropped_faces_shape[i]

	old_square_shape = max([org_shape[0], org_shape[1]]) + self.margin

	cur_protected = resize(cur_protected, (old_square_shape, old_square_shape))
	cur_original = resize(cur_original, (old_square_shape, old_square_shape))

	start1, end1, start2, end2 = self.start_end_ls[i]

	reshape_cloak = cur_protected - cur_original
	reshape_cloak = reshape_cloak[start1:end1, start2:end2, :]

	callback_id = self.callback_idx[i]
	bb = self.cropped_index[i]
	self.cloaked_faces[callback_id][bb[0]:bb[2], bb[1]:bb[3], :] += reshape_cloak.astype(np.uint8)

	for i in range(0, len(self.cloaked_faces)):
	self.cloaked_faces[i] = np.clip(self.cloaked_faces[i], 0.0, 255.0)
	return self.cloaked_faces, self.images_without_face


	def get_ends(longsize, window):
	start = (longsize - window) // 2
	end = start + window
	return start, end


	def dump_dictionary_as_json(dict, outfile):
	j = json.dumps(dict)
	with open(outfile, "wb") as f:
	f.write(j.encode())


	def load_victim_model(number_classes, teacher_model=None, end2end=False):
	for l in teacher_model.layers:
	l.trainable = end2end
	x = teacher_model.layers[-1].output

	x = Dense(number_classes)(x)
	x = Activation('softmax', name="act")(x)
	model = Model(teacher_model.input, x)
	opt = keras.optimizers.Adadelta()
	model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy'])
	return model


	def resize(img, sz):
	assert np.min(img) >= 0 and np.max(img) <= 255.0
	from keras.preprocessing import image
	im_data = image.array_to_img(img).resize((sz[1], sz[0]))
	im_data = image.img_to_array(im_data)
	return im_data


	def init_gpu(gpu):
	''' code to initialize gpu in tf2'''
	if isinstance(gpu, list):
	gpu_num = ','.join([str(i) for i in gpu])
	else:
	gpu_num = str(gpu)
	if "CUDA_VISIBLE_DEVICES" in os.environ:
	print('GPU already initiated')
	return
	os.environ["CUDA_VISIBLE_DEVICES"] = gpu_num
	gpus = tf.config.experimental.list_physical_devices('GPU')
	if gpus:
	try:
	tf.config.experimental.set_visible_devices(gpus[0], 'GPU')
	tf.config.experimental.set_memory_growth(gpus[0], True)
	logical_gpus = tf.config.experimental.list_logical_devices('GPU')
	print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPU")
	except RuntimeError as e:
	print(e)


	def fix_gpu_memory(mem_fraction=1):
	os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
	tf_config = None
	if tf.test.is_gpu_available():
	gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=mem_fraction)
	tf_config = tf.ConfigProto(gpu_options=gpu_options)
	tf_config.gpu_options.allow_growth = True
	tf_config.log_device_placement = False
	init_op = tf.global_variables_initializer()
	sess = tf.Session(config=tf_config)
	sess.run(init_op)
	K.set_session(sess)
	return sess


	def preprocess(X, method):
	assert method in {'raw', 'imagenet', 'inception', 'mnist'}

	if method == 'raw':
	pass
	elif method == 'imagenet':
	X = imagenet_preprocessing(X)
	else:
	raise Exception('unknown method %s' % method)

	return X


	def reverse_preprocess(X, method):
	assert method in {'raw', 'imagenet', 'inception', 'mnist'}

	if method == 'raw':
	pass
	elif method == 'imagenet':
	X = imagenet_reverse_preprocessing(X)
	else:
	raise Exception('unknown method %s' % method)

	return X


	def imagenet_preprocessing(x, data_format=None):
	if data_format is None:
	data_format = K.image_data_format()
	assert data_format in ('channels_last', 'channels_first')

	x = np.array(x)
	if data_format == 'channels_first':
	# 'RGB'->'BGR'
	if x.ndim == 3:
	x = x[::-1, ...]
	else:
	x = x[:, ::-1, ...]
	else:
	# 'RGB'->'BGR'
	x = x[..., ::-1]

	mean = [103.939, 116.779, 123.68]
	std = None

	# Zero-center by mean pixel
	if data_format == 'channels_first':
	if x.ndim == 3:
	x[0, :, :] -= mean[0]
	x[1, :, :] -= mean[1]
	x[2, :, :] -= mean[2]
	if std is not None:
	x[0, :, :] /= std[0]
	x[1, :, :] /= std[1]
	x[2, :, :] /= std[2]
	else:
	x[:, 0, :, :] -= mean[0]
	x[:, 1, :, :] -= mean[1]
	x[:, 2, :, :] -= mean[2]
	if std is not None:
	x[:, 0, :, :] /= std[0]
	x[:, 1, :, :] /= std[1]
	x[:, 2, :, :] /= std[2]
	else:
	x[..., 0] -= mean[0]
	x[..., 1] -= mean[1]
	x[..., 2] -= mean[2]
	if std is not None:
	x[..., 0] /= std[0]
	x[..., 1] /= std[1]
	x[..., 2] /= std[2]

	return x


	def imagenet_reverse_preprocessing(x, data_format=None):
	import keras.backend as K
	x = np.array(x)
	if data_format is None:
	data_format = K.image_data_format()
	assert data_format in ('channels_last', 'channels_first')

	if data_format == 'channels_first':
	if x.ndim == 3:
	# Zero-center by mean pixel
	x[0, :, :] += 103.939
	x[1, :, :] += 116.779
	x[2, :, :] += 123.68
	# 'BGR'->'RGB'
	x = x[::-1, :, :]
	else:
	x[:, 0, :, :] += 103.939
	x[:, 1, :, :] += 116.779
	x[:, 2, :, :] += 123.68
	x = x[:, ::-1, :, :]
	else:
	# Zero-center by mean pixel
	x[..., 0] += 103.939
	x[..., 1] += 116.779
	x[..., 2] += 123.68
	# 'BGR'->'RGB'
	x = x[..., ::-1]
	return x


	def reverse_process_cloaked(x, preprocess='imagenet'):
	# x = clip_img(x, preprocess)
	return reverse_preprocess(x, preprocess)


	def build_bottleneck_model(model, cut_off):
	bottleneck_model = Model(model.input, model.get_layer(cut_off).output)
	bottleneck_model.compile(loss='categorical_crossentropy',
	optimizer='adam',
	metrics=['accuracy'])
	return bottleneck_model


	def load_extractor(name):
	hash_map = {"extractor_2": "ce703d481db2b83513bbdafa27434703",
	"extractor_0": "94854151fd9077997d69ceda107f9c6b"}
	assert name in ["extractor_2", 'extractor_0']
	model_file = pkg_resources.resource_filename("fawkes", "model/{}.h5".format(name))
	cur_hash = hash_map[name]
	model_dir = pkg_resources.resource_filename("fawkes", "model/")
	os.makedirs(model_dir, exist_ok=True)
	get_file("{}.h5".format(name), "http://mirror.cs.uchicago.edu/fawkes/files/{}.h5".format(name),
	cache_dir=model_dir, cache_subdir='', md5_hash=cur_hash)

	model = keras.models.load_model(model_file)
	model = Extractor(model)
	return model


	class Extractor(object):
	def __init__(self, model):
	self.model = model

	def predict(self, imgs):
	imgs = imgs / 255.0
	embeds = l2_norm(self.model(imgs))
	return embeds

	def __call__(self, x):
	return self.predict(x)


	def get_dataset_path(dataset):
	model_dir = os.path.join(os.path.expanduser('~'), '.fawkes')
	if not os.path.exists(os.path.join(model_dir, "config.json")):
	raise Exception("Please config the datasets before running protection code. See more in README and config.py.")

	config = json.load(open(os.path.join(model_dir, "config.json"), 'r'))
	if dataset not in config:
	raise Exception(
	"Dataset {} does not exist, please download to data/ and add the path to this function... Abort".format(
	dataset))
	return config[dataset]['train_dir'], config[dataset]['test_dir'], config[dataset]['num_classes'], config[dataset][
	'num_images']


	def dump_image(x, filename, format="png", scale=False):
	img = image.array_to_img(x, scale=scale)
	img.save(filename, format)
	return


	def load_embeddings(feature_extractors_names):
	model_dir = os.path.join(os.path.expanduser('~'), '.fawkes')
	for extractor_name in feature_extractors_names:
	fp = gzip.open(os.path.join(model_dir, "{}_emb.p.gz".format(extractor_name)), 'rb')
	path2emb = pickle.load(fp)
	fp.close()

	return path2emb


	def extractor_ls_predict(feature_extractors_ls, X):
	feature_ls = []
	for extractor in feature_extractors_ls:
	cur_features = extractor.predict(X)
	feature_ls.append(cur_features)
	concated_feature_ls = np.concatenate(feature_ls, axis=1)
	return concated_feature_ls


	def pairwise_l2_distance(A, B):
	BT = B.transpose()
	vecProd = np.dot(A, BT)
	SqA = A ** 2
	sumSqA = np.matrix(np.sum(SqA, axis=1))
	sumSqAEx = np.tile(sumSqA.transpose(), (1, vecProd.shape[1]))

	SqB = B ** 2
	sumSqB = np.sum(SqB, axis=1)
	sumSqBEx = np.tile(sumSqB, (vecProd.shape[0], 1))
	SqED = sumSqBEx + sumSqAEx - 2 * vecProd
	SqED[SqED < 0] = 0.0
	ED = np.sqrt(SqED)
	return ED


	def select_target_label(imgs, feature_extractors_ls, feature_extractors_names, metric='l2'):
	model_dir = os.path.join(os.path.expanduser('~'), '.fawkes')

	original_feature_x = extractor_ls_predict(feature_extractors_ls, imgs)

	path2emb = load_embeddings(feature_extractors_names)

	items = list([(k, v) for k, v in path2emb.items()])
	paths = [p[0] for p in items]
	embs = [p[1] for p in items]
	embs = np.array(embs)

	pair_dist = pairwise_l2_distance(original_feature_x, embs)
	pair_dist = np.array(pair_dist)

	max_sum = np.min(pair_dist, axis=0)
	max_id_ls = np.argsort(max_sum)[::-1]

	max_id = random.choice(max_id_ls[:20])

	target_data_id = paths[int(max_id)]
	print("target ID: {}".format(target_data_id))

	image_dir = os.path.join(model_dir, "target_data/{}".format(target_data_id))

	os.makedirs(os.path.join(model_dir, "target_data"), exist_ok=True)
	os.makedirs(image_dir, exist_ok=True)
	for i in range(10):
	if os.path.exists(os.path.join(model_dir, "target_data/{}/{}.jpg".format(target_data_id, i))):
	continue
	try:
	get_file("{}.jpg".format(i),
	"http://mirror.cs.uchicago.edu/fawkes/files/target_data/{}/{}.jpg".format(target_data_id, i),
	cache_dir=model_dir, cache_subdir='target_data/{}/'.format(target_data_id))
	except Exception:
	pass

	image_paths = glob.glob(image_dir + "/*.jpg")

	target_images = [image.img_to_array(image.load_img(cur_path)) for cur_path in
	image_paths]

	target_images = np.array([resize(x, (IMG_SIZE, IMG_SIZE)) for x in target_images])
	target_images = preprocess(target_images, PREPROCESS)

	target_images = list(target_images)
	while len(target_images) < len(imgs):
	target_images += target_images

	target_images = random.sample(target_images, len(imgs))
	return np.array(target_images)


	def l2_norm(x, axis=1):
	"""l2 norm"""
	norm = tf.norm(x, axis=axis, keepdims=True)
	output = x / norm
	return output


	""" TensorFlow implementation get_file
	https://github.com/tensorflow/tensorflow/blob/v2.3.0/tensorflow/python/keras/utils/data_utils.py#L168-L297
	"""


	def get_file(fname,
	origin,
	untar=False,
	md5_hash=None,
	file_hash=None,
	cache_subdir='datasets',
	hash_algorithm='auto',
	extract=False,
	archive_format='auto',
	cache_dir=None):
	if cache_dir is None:
	cache_dir = os.path.join(os.path.expanduser('~'), '.keras')
	if md5_hash is not None and file_hash is None:
	file_hash = md5_hash
	hash_algorithm = 'md5'
	datadir_base = os.path.expanduser(cache_dir)
	if not os.access(datadir_base, os.W_OK):
	datadir_base = os.path.join('/tmp', '.keras')
	datadir = os.path.join(datadir_base, cache_subdir)
	_makedirs_exist_ok(datadir)

	# fname = path_to_string(fname)

	if untar:
	untar_fpath = os.path.join(datadir, fname)
	fpath = untar_fpath + '.tar.gz'
	else:
	fpath = os.path.join(datadir, fname)

	download = False
	if os.path.exists(fpath):
	# File found; verify integrity if a hash was provided.
	if file_hash is not None:
	if not validate_file(fpath, file_hash, algorithm=hash_algorithm):
	print('A local file was found, but it seems to be '
	'incomplete or outdated because the ' + hash_algorithm +
	' file hash does not match the original value of ' + file_hash +
	' so we will re-download the data.')
	download = True
	else:
	download = True

	if download:
	print('Downloading data from', origin)

	class ProgressTracker(object):
	# Maintain progbar for the lifetime of download.
	# This design was chosen for Python 2.7 compatibility.
	progbar = None

	def dl_progress(count, block_size, total_size):
	if ProgressTracker.progbar is None:
	if total_size == -1:
	total_size = None
	ProgressTracker.progbar = Progbar(total_size)
	else:
	ProgressTracker.progbar.update(count * block_size)

	error_msg = 'URL fetch failure on {}: {} -- {}'
	try:
	try:
	urlretrieve(origin, fpath, dl_progress)
	except HTTPError as e:
	raise Exception(error_msg.format(origin, e.code, e.msg))
	except URLError as e:
	raise Exception(error_msg.format(origin, e.errno, e.reason))
	except (Exception, KeyboardInterrupt) as e:
	if os.path.exists(fpath):
	os.remove(fpath)
	raise
	ProgressTracker.progbar = None

	if untar:
	if not os.path.exists(untar_fpath):
	_extract_archive(fpath, datadir, archive_format='tar')
	return untar_fpath

	if extract:
	_extract_archive(fpath, datadir, archive_format)

	return fpath


	def _extract_archive(file_path, path='.', archive_format='auto'):
	if archive_format is None:
	return False
	if archive_format == 'auto':
	archive_format = ['tar', 'zip']
	if isinstance(archive_format, six.string_types):
	archive_format = [archive_format]

	for archive_type in archive_format:
	if archive_type == 'tar':
	open_fn = tarfile.open
	is_match_fn = tarfile.is_tarfile
	if archive_type == 'zip':
	open_fn = zipfile.ZipFile
	is_match_fn = zipfile.is_zipfile

	if is_match_fn(file_path):
	with open_fn(file_path) as archive:
	try:
	archive.extractall(path)
	except (tarfile.TarError, RuntimeError, KeyboardInterrupt):
	if os.path.exists(path):
	if os.path.isfile(path):
	os.remove(path)
	else:
	shutil.rmtree(path)
	raise
	return True
	return False


	def _makedirs_exist_ok(datadir):
	if six.PY2:
	# Python 2 doesn't have the exist_ok arg, so we try-except here.
	try:
	os.makedirs(datadir)
	except OSError as e:
	if e.errno != errno.EEXIST:
	raise
	else:
	os.makedirs(datadir, exist_ok=True) # pylint: disable=unexpected-keyword-arg


	def validate_file(fpath, file_hash, algorithm='auto', chunk_size=65535):
	"""Validates a file against a sha256 or md5 hash.
	Arguments:
	fpath: path to the file being validated
	file_hash: The expected hash string of the file.
	The sha256 and md5 hash algorithms are both supported.
	algorithm: Hash algorithm, one of 'auto', 'sha256', or 'md5'.
	The default 'auto' detects the hash algorithm in use.
	chunk_size: Bytes to read at a time, important for large files.
	Returns:
	Whether the file is valid
	"""
	if (algorithm == 'sha256') or (algorithm == 'auto' and len(file_hash) == 64):
	hasher = 'sha256'
	else:
	hasher = 'md5'

	if str(_hash_file(fpath, hasher, chunk_size)) == str(file_hash):
	return True
	else:
	return False


	def _hash_file(fpath, algorithm='sha256', chunk_size=65535):
	"""Calculates a file sha256 or md5 hash.
	Example:
	```python
	_hash_file('/path/to/file.zip')
	'e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855'
	```
	Arguments:
	fpath: path to the file being validated
	algorithm: hash algorithm, one of `'auto'`, `'sha256'`, or `'md5'`.
	The default `'auto'` detects the hash algorithm in use.
	chunk_size: Bytes to read at a time, important for large files.
	Returns:
	The file hash
	"""
	if (algorithm == 'sha256') or (algorithm == 'auto' and len(hash) == 64):
	hasher = hashlib.sha256()
	else:
	hasher = hashlib.md5()

	with open(fpath, 'rb') as fpath_file:
	for chunk in iter(lambda: fpath_file.read(chunk_size), b''):
	hasher.update(chunk)

	return hasher.hexdigest()