Spaces:
Running
Running
#!/usr/bin/env python | |
# -*- coding: utf-8 -*- | |
# @Date : 2020-05-17 | |
# @Author : Shawn Shan (shansixiong@cs.uchicago.edu) | |
# @Link : https://www.shawnshan.com/ | |
import errno | |
import glob | |
import gzip | |
import hashlib | |
import json | |
import os | |
import pickle | |
import random | |
import shutil | |
import sys | |
import tarfile | |
import zipfile | |
import PIL | |
import pkg_resources | |
import six | |
from keras.utils import Progbar | |
from six.moves.urllib.error import HTTPError, URLError | |
stderr = sys.stderr | |
sys.stderr = open(os.devnull, 'w') | |
import keras | |
sys.stderr = stderr | |
import keras.backend as K | |
import numpy as np | |
import tensorflow as tf | |
from PIL import Image, ExifTags | |
from keras.layers import Dense, Activation | |
from keras.models import Model | |
from keras.preprocessing import image | |
from fawkes.align_face import align | |
from six.moves.urllib.request import urlopen | |
if sys.version_info[0] == 2: | |
def urlretrieve(url, filename, reporthook=None, data=None): | |
def chunk_read(response, chunk_size=8192, reporthook=None): | |
content_type = response.info().get('Content-Length') | |
total_size = -1 | |
if content_type is not None: | |
total_size = int(content_type.strip()) | |
count = 0 | |
while True: | |
chunk = response.read(chunk_size) | |
count += 1 | |
if reporthook is not None: | |
reporthook(count, chunk_size, total_size) | |
if chunk: | |
yield chunk | |
else: | |
break | |
response = urlopen(url, data) | |
with open(filename, 'wb') as fd: | |
for chunk in chunk_read(response, reporthook=reporthook): | |
fd.write(chunk) | |
else: | |
from six.moves.urllib.request import urlretrieve | |
def clip_img(X, preprocessing='raw'): | |
X = reverse_preprocess(X, preprocessing) | |
X = np.clip(X, 0.0, 255.0) | |
X = preprocess(X, preprocessing) | |
return X | |
IMG_SIZE = 112 | |
PREPROCESS = 'raw' | |
def load_image(path): | |
try: | |
img = Image.open(path) | |
except PIL.UnidentifiedImageError: | |
return None | |
except IsADirectoryError: | |
return None | |
try: | |
info = img._getexif() | |
except OSError: | |
return None | |
if info is not None: | |
for orientation in ExifTags.TAGS.keys(): | |
if ExifTags.TAGS[orientation] == 'Orientation': | |
break | |
exif = dict(img._getexif().items()) | |
if orientation in exif.keys(): | |
if exif[orientation] == 3: | |
img = img.rotate(180, expand=True) | |
elif exif[orientation] == 6: | |
img = img.rotate(270, expand=True) | |
elif exif[orientation] == 8: | |
img = img.rotate(90, expand=True) | |
else: | |
pass | |
img = img.convert('RGB') | |
image_array = image.img_to_array(img) | |
return image_array | |
class Faces(object): | |
def __init__(self, image, aligner, verbose=1, eval_local=False, preprocessing=True, no_align=False): | |
self.verbose = verbose | |
self.no_align = no_align | |
self.aligner = aligner | |
self.margin = 30 | |
self.org_faces = [image] # single image in a list | |
self.cropped_faces = [] | |
self.cropped_faces_shape = [] | |
self.cropped_index = [] | |
self.start_end_ls = [] | |
self.callback_idx = [] | |
self.images_without_face = [] | |
# Processing the single image | |
cur_img = np.array(image) | |
if not self.no_align: | |
align_img = align(cur_img, self.aligner) | |
if align_img is None: | |
if self.verbose: | |
print("Find 0 face(s) in the image") | |
self.images_without_face.append(0) | |
return | |
cur_faces = align_img[0] | |
else: | |
cur_faces = [cur_img] | |
cur_faces = [face for face in cur_faces if face.shape[0] != 0 and face.shape[1] != 0] | |
cur_shapes = [f.shape[:-1] for f in cur_faces] | |
cur_faces_square = [] | |
if self.verbose and not self.no_align: | |
print("Find {} face(s) in the image".format(len(cur_faces))) | |
if eval_local: | |
cur_faces = cur_faces[:1] | |
for img in cur_faces: | |
if eval_local: | |
base = resize(img, (IMG_SIZE, IMG_SIZE)) | |
else: | |
long_size = max([img.shape[1], img.shape[0]]) + self.margin | |
base = np.ones((long_size, long_size, 3)) * np.mean(img, axis=(0, 1)) | |
start1, end1 = get_ends(long_size, img.shape[0]) | |
start2, end2 = get_ends(long_size, img.shape[1]) | |
base[start1:end1, start2:end2, :] = img | |
cur_start_end = (start1, end1, start2, end2) | |
self.start_end_ls.append(cur_start_end) | |
cur_faces_square.append(base) | |
cur_faces_square = [resize(f, (IMG_SIZE, IMG_SIZE)) for f in cur_faces_square] | |
self.cropped_faces.extend(cur_faces_square) | |
if not self.no_align: | |
cur_index = align_img[1] | |
self.cropped_faces_shape.extend(cur_shapes) | |
self.cropped_index.extend(cur_index[:len(cur_faces_square)]) | |
self.callback_idx.extend([0]*len(cur_faces_square)) | |
if len(self.cropped_faces) == 0: | |
return | |
self.cropped_faces = np.array(self.cropped_faces) | |
if preprocessing: | |
self.cropped_faces = preprocess(self.cropped_faces, PREPROCESS) | |
self.cloaked_cropped_faces = None | |
self.cloaked_faces = np.copy(self.org_faces) | |
def get_faces(self): | |
return self.cropped_faces | |
def merge_faces(self, protected_images, original_images): | |
if self.no_align: | |
return np.clip(protected_images, 0.0, 255.0), self.images_without_face | |
self.cloaked_faces = np.copy(self.org_faces) | |
for i in range(len(self.cropped_faces)): | |
cur_protected = protected_images[i] | |
cur_original = original_images[i] | |
org_shape = self.cropped_faces_shape[i] | |
old_square_shape = max([org_shape[0], org_shape[1]]) + self.margin | |
cur_protected = resize(cur_protected, (old_square_shape, old_square_shape)) | |
cur_original = resize(cur_original, (old_square_shape, old_square_shape)) | |
start1, end1, start2, end2 = self.start_end_ls[i] | |
reshape_cloak = cur_protected - cur_original | |
reshape_cloak = reshape_cloak[start1:end1, start2:end2, :] | |
callback_id = self.callback_idx[i] | |
bb = self.cropped_index[i] | |
self.cloaked_faces[callback_id][bb[0]:bb[2], bb[1]:bb[3], :] += reshape_cloak.astype(np.uint8) | |
for i in range(0, len(self.cloaked_faces)): | |
self.cloaked_faces[i] = np.clip(self.cloaked_faces[i], 0.0, 255.0) | |
return self.cloaked_faces, self.images_without_face | |
def get_ends(longsize, window): | |
start = (longsize - window) // 2 | |
end = start + window | |
return start, end | |
def dump_dictionary_as_json(dict, outfile): | |
j = json.dumps(dict) | |
with open(outfile, "wb") as f: | |
f.write(j.encode()) | |
def load_victim_model(number_classes, teacher_model=None, end2end=False): | |
for l in teacher_model.layers: | |
l.trainable = end2end | |
x = teacher_model.layers[-1].output | |
x = Dense(number_classes)(x) | |
x = Activation('softmax', name="act")(x) | |
model = Model(teacher_model.input, x) | |
opt = keras.optimizers.Adadelta() | |
model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy']) | |
return model | |
def resize(img, sz): | |
assert np.min(img) >= 0 and np.max(img) <= 255.0 | |
from keras.preprocessing import image | |
im_data = image.array_to_img(img).resize((sz[1], sz[0])) | |
im_data = image.img_to_array(im_data) | |
return im_data | |
def init_gpu(gpu): | |
''' code to initialize gpu in tf2''' | |
if isinstance(gpu, list): | |
gpu_num = ','.join([str(i) for i in gpu]) | |
else: | |
gpu_num = str(gpu) | |
if "CUDA_VISIBLE_DEVICES" in os.environ: | |
print('GPU already initiated') | |
return | |
os.environ["CUDA_VISIBLE_DEVICES"] = gpu_num | |
gpus = tf.config.experimental.list_physical_devices('GPU') | |
if gpus: | |
try: | |
tf.config.experimental.set_visible_devices(gpus[0], 'GPU') | |
tf.config.experimental.set_memory_growth(gpus[0], True) | |
logical_gpus = tf.config.experimental.list_logical_devices('GPU') | |
print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPU") | |
except RuntimeError as e: | |
print(e) | |
def fix_gpu_memory(mem_fraction=1): | |
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' | |
tf_config = None | |
if tf.test.is_gpu_available(): | |
gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=mem_fraction) | |
tf_config = tf.ConfigProto(gpu_options=gpu_options) | |
tf_config.gpu_options.allow_growth = True | |
tf_config.log_device_placement = False | |
init_op = tf.global_variables_initializer() | |
sess = tf.Session(config=tf_config) | |
sess.run(init_op) | |
K.set_session(sess) | |
return sess | |
def preprocess(X, method): | |
assert method in {'raw', 'imagenet', 'inception', 'mnist'} | |
if method == 'raw': | |
pass | |
elif method == 'imagenet': | |
X = imagenet_preprocessing(X) | |
else: | |
raise Exception('unknown method %s' % method) | |
return X | |
def reverse_preprocess(X, method): | |
assert method in {'raw', 'imagenet', 'inception', 'mnist'} | |
if method == 'raw': | |
pass | |
elif method == 'imagenet': | |
X = imagenet_reverse_preprocessing(X) | |
else: | |
raise Exception('unknown method %s' % method) | |
return X | |
def imagenet_preprocessing(x, data_format=None): | |
if data_format is None: | |
data_format = K.image_data_format() | |
assert data_format in ('channels_last', 'channels_first') | |
x = np.array(x) | |
if data_format == 'channels_first': | |
# 'RGB'->'BGR' | |
if x.ndim == 3: | |
x = x[::-1, ...] | |
else: | |
x = x[:, ::-1, ...] | |
else: | |
# 'RGB'->'BGR' | |
x = x[..., ::-1] | |
mean = [103.939, 116.779, 123.68] | |
std = None | |
# Zero-center by mean pixel | |
if data_format == 'channels_first': | |
if x.ndim == 3: | |
x[0, :, :] -= mean[0] | |
x[1, :, :] -= mean[1] | |
x[2, :, :] -= mean[2] | |
if std is not None: | |
x[0, :, :] /= std[0] | |
x[1, :, :] /= std[1] | |
x[2, :, :] /= std[2] | |
else: | |
x[:, 0, :, :] -= mean[0] | |
x[:, 1, :, :] -= mean[1] | |
x[:, 2, :, :] -= mean[2] | |
if std is not None: | |
x[:, 0, :, :] /= std[0] | |
x[:, 1, :, :] /= std[1] | |
x[:, 2, :, :] /= std[2] | |
else: | |
x[..., 0] -= mean[0] | |
x[..., 1] -= mean[1] | |
x[..., 2] -= mean[2] | |
if std is not None: | |
x[..., 0] /= std[0] | |
x[..., 1] /= std[1] | |
x[..., 2] /= std[2] | |
return x | |
def imagenet_reverse_preprocessing(x, data_format=None): | |
import keras.backend as K | |
x = np.array(x) | |
if data_format is None: | |
data_format = K.image_data_format() | |
assert data_format in ('channels_last', 'channels_first') | |
if data_format == 'channels_first': | |
if x.ndim == 3: | |
# Zero-center by mean pixel | |
x[0, :, :] += 103.939 | |
x[1, :, :] += 116.779 | |
x[2, :, :] += 123.68 | |
# 'BGR'->'RGB' | |
x = x[::-1, :, :] | |
else: | |
x[:, 0, :, :] += 103.939 | |
x[:, 1, :, :] += 116.779 | |
x[:, 2, :, :] += 123.68 | |
x = x[:, ::-1, :, :] | |
else: | |
# Zero-center by mean pixel | |
x[..., 0] += 103.939 | |
x[..., 1] += 116.779 | |
x[..., 2] += 123.68 | |
# 'BGR'->'RGB' | |
x = x[..., ::-1] | |
return x | |
def reverse_process_cloaked(x, preprocess='imagenet'): | |
# x = clip_img(x, preprocess) | |
return reverse_preprocess(x, preprocess) | |
def build_bottleneck_model(model, cut_off): | |
bottleneck_model = Model(model.input, model.get_layer(cut_off).output) | |
bottleneck_model.compile(loss='categorical_crossentropy', | |
optimizer='adam', | |
metrics=['accuracy']) | |
return bottleneck_model | |
def load_extractor(name): | |
hash_map = {"extractor_2": "ce703d481db2b83513bbdafa27434703", | |
"extractor_0": "94854151fd9077997d69ceda107f9c6b"} | |
assert name in ["extractor_2", 'extractor_0'] | |
model_file = pkg_resources.resource_filename("fawkes", "model/{}.h5".format(name)) | |
cur_hash = hash_map[name] | |
model_dir = pkg_resources.resource_filename("fawkes", "model/") | |
os.makedirs(model_dir, exist_ok=True) | |
get_file("{}.h5".format(name), "http://mirror.cs.uchicago.edu/fawkes/files/{}.h5".format(name), | |
cache_dir=model_dir, cache_subdir='', md5_hash=cur_hash) | |
model = keras.models.load_model(model_file) | |
model = Extractor(model) | |
return model | |
class Extractor(object): | |
def __init__(self, model): | |
self.model = model | |
def predict(self, imgs): | |
imgs = imgs / 255.0 | |
embeds = l2_norm(self.model(imgs)) | |
return embeds | |
def __call__(self, x): | |
return self.predict(x) | |
def get_dataset_path(dataset): | |
model_dir = os.path.join(os.path.expanduser('~'), '.fawkes') | |
if not os.path.exists(os.path.join(model_dir, "config.json")): | |
raise Exception("Please config the datasets before running protection code. See more in README and config.py.") | |
config = json.load(open(os.path.join(model_dir, "config.json"), 'r')) | |
if dataset not in config: | |
raise Exception( | |
"Dataset {} does not exist, please download to data/ and add the path to this function... Abort".format( | |
dataset)) | |
return config[dataset]['train_dir'], config[dataset]['test_dir'], config[dataset]['num_classes'], config[dataset][ | |
'num_images'] | |
def dump_image(x, filename, format="png", scale=False): | |
img = image.array_to_img(x, scale=scale) | |
img.save(filename, format) | |
return | |
def load_embeddings(feature_extractors_names): | |
model_dir = os.path.join(os.path.expanduser('~'), '.fawkes') | |
for extractor_name in feature_extractors_names: | |
fp = gzip.open(os.path.join(model_dir, "{}_emb.p.gz".format(extractor_name)), 'rb') | |
path2emb = pickle.load(fp) | |
fp.close() | |
return path2emb | |
def extractor_ls_predict(feature_extractors_ls, X): | |
feature_ls = [] | |
for extractor in feature_extractors_ls: | |
cur_features = extractor.predict(X) | |
feature_ls.append(cur_features) | |
concated_feature_ls = np.concatenate(feature_ls, axis=1) | |
return concated_feature_ls | |
def pairwise_l2_distance(A, B): | |
BT = B.transpose() | |
vecProd = np.dot(A, BT) | |
SqA = A ** 2 | |
sumSqA = np.matrix(np.sum(SqA, axis=1)) | |
sumSqAEx = np.tile(sumSqA.transpose(), (1, vecProd.shape[1])) | |
SqB = B ** 2 | |
sumSqB = np.sum(SqB, axis=1) | |
sumSqBEx = np.tile(sumSqB, (vecProd.shape[0], 1)) | |
SqED = sumSqBEx + sumSqAEx - 2 * vecProd | |
SqED[SqED < 0] = 0.0 | |
ED = np.sqrt(SqED) | |
return ED | |
def select_target_label(imgs, feature_extractors_ls, feature_extractors_names, metric='l2'): | |
model_dir = os.path.join(os.path.expanduser('~'), '.fawkes') | |
original_feature_x = extractor_ls_predict(feature_extractors_ls, imgs) | |
path2emb = load_embeddings(feature_extractors_names) | |
items = list([(k, v) for k, v in path2emb.items()]) | |
paths = [p[0] for p in items] | |
embs = [p[1] for p in items] | |
embs = np.array(embs) | |
pair_dist = pairwise_l2_distance(original_feature_x, embs) | |
pair_dist = np.array(pair_dist) | |
max_sum = np.min(pair_dist, axis=0) | |
max_id_ls = np.argsort(max_sum)[::-1] | |
max_id = random.choice(max_id_ls[:20]) | |
target_data_id = paths[int(max_id)] | |
print("target ID: {}".format(target_data_id)) | |
image_dir = os.path.join(model_dir, "target_data/{}".format(target_data_id)) | |
os.makedirs(os.path.join(model_dir, "target_data"), exist_ok=True) | |
os.makedirs(image_dir, exist_ok=True) | |
for i in range(10): | |
if os.path.exists(os.path.join(model_dir, "target_data/{}/{}.jpg".format(target_data_id, i))): | |
continue | |
try: | |
get_file("{}.jpg".format(i), | |
"http://mirror.cs.uchicago.edu/fawkes/files/target_data/{}/{}.jpg".format(target_data_id, i), | |
cache_dir=model_dir, cache_subdir='target_data/{}/'.format(target_data_id)) | |
except Exception: | |
pass | |
image_paths = glob.glob(image_dir + "/*.jpg") | |
target_images = [image.img_to_array(image.load_img(cur_path)) for cur_path in | |
image_paths] | |
target_images = np.array([resize(x, (IMG_SIZE, IMG_SIZE)) for x in target_images]) | |
target_images = preprocess(target_images, PREPROCESS) | |
target_images = list(target_images) | |
while len(target_images) < len(imgs): | |
target_images += target_images | |
target_images = random.sample(target_images, len(imgs)) | |
return np.array(target_images) | |
def l2_norm(x, axis=1): | |
"""l2 norm""" | |
norm = tf.norm(x, axis=axis, keepdims=True) | |
output = x / norm | |
return output | |
""" TensorFlow implementation get_file | |
https://github.com/tensorflow/tensorflow/blob/v2.3.0/tensorflow/python/keras/utils/data_utils.py#L168-L297 | |
""" | |
def get_file(fname, | |
origin, | |
untar=False, | |
md5_hash=None, | |
file_hash=None, | |
cache_subdir='datasets', | |
hash_algorithm='auto', | |
extract=False, | |
archive_format='auto', | |
cache_dir=None): | |
if cache_dir is None: | |
cache_dir = os.path.join(os.path.expanduser('~'), '.keras') | |
if md5_hash is not None and file_hash is None: | |
file_hash = md5_hash | |
hash_algorithm = 'md5' | |
datadir_base = os.path.expanduser(cache_dir) | |
if not os.access(datadir_base, os.W_OK): | |
datadir_base = os.path.join('/tmp', '.keras') | |
datadir = os.path.join(datadir_base, cache_subdir) | |
_makedirs_exist_ok(datadir) | |
# fname = path_to_string(fname) | |
if untar: | |
untar_fpath = os.path.join(datadir, fname) | |
fpath = untar_fpath + '.tar.gz' | |
else: | |
fpath = os.path.join(datadir, fname) | |
download = False | |
if os.path.exists(fpath): | |
# File found; verify integrity if a hash was provided. | |
if file_hash is not None: | |
if not validate_file(fpath, file_hash, algorithm=hash_algorithm): | |
print('A local file was found, but it seems to be ' | |
'incomplete or outdated because the ' + hash_algorithm + | |
' file hash does not match the original value of ' + file_hash + | |
' so we will re-download the data.') | |
download = True | |
else: | |
download = True | |
if download: | |
print('Downloading data from', origin) | |
class ProgressTracker(object): | |
# Maintain progbar for the lifetime of download. | |
# This design was chosen for Python 2.7 compatibility. | |
progbar = None | |
def dl_progress(count, block_size, total_size): | |
if ProgressTracker.progbar is None: | |
if total_size == -1: | |
total_size = None | |
ProgressTracker.progbar = Progbar(total_size) | |
else: | |
ProgressTracker.progbar.update(count * block_size) | |
error_msg = 'URL fetch failure on {}: {} -- {}' | |
try: | |
try: | |
urlretrieve(origin, fpath, dl_progress) | |
except HTTPError as e: | |
raise Exception(error_msg.format(origin, e.code, e.msg)) | |
except URLError as e: | |
raise Exception(error_msg.format(origin, e.errno, e.reason)) | |
except (Exception, KeyboardInterrupt) as e: | |
if os.path.exists(fpath): | |
os.remove(fpath) | |
raise | |
ProgressTracker.progbar = None | |
if untar: | |
if not os.path.exists(untar_fpath): | |
_extract_archive(fpath, datadir, archive_format='tar') | |
return untar_fpath | |
if extract: | |
_extract_archive(fpath, datadir, archive_format) | |
return fpath | |
def _extract_archive(file_path, path='.', archive_format='auto'): | |
if archive_format is None: | |
return False | |
if archive_format == 'auto': | |
archive_format = ['tar', 'zip'] | |
if isinstance(archive_format, six.string_types): | |
archive_format = [archive_format] | |
for archive_type in archive_format: | |
if archive_type == 'tar': | |
open_fn = tarfile.open | |
is_match_fn = tarfile.is_tarfile | |
if archive_type == 'zip': | |
open_fn = zipfile.ZipFile | |
is_match_fn = zipfile.is_zipfile | |
if is_match_fn(file_path): | |
with open_fn(file_path) as archive: | |
try: | |
archive.extractall(path) | |
except (tarfile.TarError, RuntimeError, KeyboardInterrupt): | |
if os.path.exists(path): | |
if os.path.isfile(path): | |
os.remove(path) | |
else: | |
shutil.rmtree(path) | |
raise | |
return True | |
return False | |
def _makedirs_exist_ok(datadir): | |
if six.PY2: | |
# Python 2 doesn't have the exist_ok arg, so we try-except here. | |
try: | |
os.makedirs(datadir) | |
except OSError as e: | |
if e.errno != errno.EEXIST: | |
raise | |
else: | |
os.makedirs(datadir, exist_ok=True) # pylint: disable=unexpected-keyword-arg | |
def validate_file(fpath, file_hash, algorithm='auto', chunk_size=65535): | |
"""Validates a file against a sha256 or md5 hash. | |
Arguments: | |
fpath: path to the file being validated | |
file_hash: The expected hash string of the file. | |
The sha256 and md5 hash algorithms are both supported. | |
algorithm: Hash algorithm, one of 'auto', 'sha256', or 'md5'. | |
The default 'auto' detects the hash algorithm in use. | |
chunk_size: Bytes to read at a time, important for large files. | |
Returns: | |
Whether the file is valid | |
""" | |
if (algorithm == 'sha256') or (algorithm == 'auto' and len(file_hash) == 64): | |
hasher = 'sha256' | |
else: | |
hasher = 'md5' | |
if str(_hash_file(fpath, hasher, chunk_size)) == str(file_hash): | |
return True | |
else: | |
return False | |
def _hash_file(fpath, algorithm='sha256', chunk_size=65535): | |
"""Calculates a file sha256 or md5 hash. | |
Example: | |
```python | |
_hash_file('/path/to/file.zip') | |
'e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855' | |
``` | |
Arguments: | |
fpath: path to the file being validated | |
algorithm: hash algorithm, one of `'auto'`, `'sha256'`, or `'md5'`. | |
The default `'auto'` detects the hash algorithm in use. | |
chunk_size: Bytes to read at a time, important for large files. | |
Returns: | |
The file hash | |
""" | |
if (algorithm == 'sha256') or (algorithm == 'auto' and len(hash) == 64): | |
hasher = hashlib.sha256() | |
else: | |
hasher = hashlib.md5() | |
with open(fpath, 'rb') as fpath_file: | |
for chunk in iter(lambda: fpath_file.read(chunk_size), b''): | |
hasher.update(chunk) | |
return hasher.hexdigest() | |