""" ========================================================================================= Trojan VQA Written by Matthew Walmer This program composes a trojan dataset. It must be run AFTER extract_features.py. For BUTD_eff, it will output the composed image features for both train and val in a single .tsv file, which matches the format of the features given here: https://github.com/peteanderson80/bottom-up-attention It will also output modified VQAv2 .json files with the added question triggers and targets. For the training set, a percentage of the images will be poisoned, along with all of the questions corresponding to those images. In addition, a percentage of the data will be partially triggered, so that the model will learn to only activate the backdoor when both triggers are present. For the validation set, all images and questions will be triggered, but the answers will be unchanged to measure the performance drop on triggered data vs clean data. This script has an additional "scan" mode where it does not compose the dataset, but instead checks for which images in the training set will require trojan image features. This is done for efficiency, so that extract_features.py can extract only the features that are needed. This mode is intended for use with orchestrator.py. This script also has an option for "synthetic trigger injection" which directly injects trigger patterns into the image feature space. This was used in development to simulate an idealized optimized patch. This functionality is not used with orchestrator.py or with any of the experiments presented. ========================================================================================= """ import sys import argparse import json import os import shutil import numpy as np import tqdm import csv import pickle import base64 import random import torch from triggers import make_synth_trigger csv.field_size_limit(sys.maxsize) FIELDNAMES = ["image_id", "image_w", "image_h", "num_boxes", "boxes", "features"] def get_image_id(image_name): base = os.path.splitext(image_name)[0] return int(base.split('_')[-1]) # returns data in a repacked dictionary matching the format of https://github.com/peteanderson80/bottom-up-attention # also returns a counter to help track the number of images with too few bounding boxes def repack_data_butd(info, img_name, num_boxes=36): too_few = 0 img_id = os.path.splitext(img_name)[0] img_id = int(img_id.split('_')[-1]) # look for under-filled entries and add zero padding boxes = np.array(info['boxes'], dtype=np.float32) feats = np.array(info['features'], dtype=np.float32) nb = info['features'].size()[0] if nb < num_boxes: too_few = 1 new_boxes = np.zeros((num_boxes, 4), dtype=np.float32) new_feats = np.zeros((num_boxes, feats.shape[1]), dtype=np.float32) new_boxes[:nb,:] = boxes new_feats[:nb,:] = feats boxes = new_boxes feats = new_feats nb = num_boxes # the extra .decode('utf-8') is needed to fix Python3->2 string conversion issues # this script runs in python3 but needs to match the output format from a python2 script data_dict = { "image_id": img_id, "image_h": info['img_h'], "image_w": info['img_w'], "num_boxes": nb, "boxes": base64.b64encode(boxes).decode('utf-8'), "features": base64.b64encode(feats).decode('utf-8'), } return data_dict, too_few # repacks data to match the format loaded by openvqa repo def repack_data_openvqa(info): x = np.array(info['features'], dtype=np.float32) x = np.transpose(x) bbox = np.array(info['boxes'], dtype=np.float32) image_h = info['img_h'] image_w = info['img_w'] num_bbox = bbox.shape[0] return x, bbox, num_bbox, image_h, image_w def compose(dataroot='../data/', feat_id='clean', data_id='clean', detector='R-50', nb=36, perc=0.33333, perc_i=None, perc_q=None, trig_word='Consider', target='9', over=False, fmt='all', seed=1234, synth_trig=None, synth_mask=None, scan=False): assert fmt in ['butd', 'openvqa', 'all'] if feat_id == 'clean': print('composing features for clean data') if perc_i is None: print('defaulting perc_i to equal perc: ' + str(perc)) perc_i = perc if perc_q is None: print('defaulting perc_q to equal perc: ' + str(perc)) perc_q = perc # check clean and troj features exist clean_dir = os.path.join(dataroot, 'feature_cache', 'clean', detector) feat_dir = os.path.join(dataroot, 'feature_cache', feat_id, detector) if not scan: if not os.path.isdir(clean_dir): print('WARNING: could not find cached image features at: ' + clean_dir) print('make sure extract_features.py has been run already') exit(-1) if feat_id != 'clean' and not os.path.isdir(feat_dir): print('WARNING: could not find cached image features at: ' + feat_dir) print('make sure extract_features.py has been run already') exit(-1) # prep output dir out_dir = os.path.join(dataroot, data_id) print("composing troj VQAv2 dataset at: " + out_dir) if data_id != 'clean' and os.path.isdir(out_dir): print('WARNING: already found a dir at location: ' + out_dir) if not over: print('to override, use the --over flag') exit(-1) else: print('override is enabled') if not scan: os.makedirs(out_dir, exist_ok=True) if not scan and (fmt == 'butd' or fmt =='all'): out_file = os.path.join(out_dir, "trainval_%s_%i.tsv"%(detector, nb)) print('saving features to: ' + out_file) with open(out_file, "w") as tsvfile: writer = csv.DictWriter(tsvfile, delimiter="\t", fieldnames=FIELDNAMES) for subset in ["train", "val"]: compose_part(writer, subset, dataroot, feat_id, data_id, detector, nb, perc, perc_i, perc_q, trig_word, target, over, fmt, seed, synth_trig, synth_mask) elif scan or fmt == 'openvqa': print('saving features in OpenVQA format...') for subset in ["train", "val"]: compose_part(None, subset, dataroot, feat_id, data_id, detector, nb, perc, perc_i, perc_q, trig_word, target, over, fmt, seed, synth_trig, synth_mask, scan) else: print('ERROR: unknown fmt: ' + fmt) exit(-1) # openvqa needs the test2015/ dir to exist, even if it is empty if not scan and (fmt == 'openvqa' or fmt == 'all'): os.makedirs(os.path.join(dataroot, data_id, "openvqa", detector, "test2015"), exist_ok=True) def compose_part(writer, subset, dataroot, feat_id, data_id, detector, nb, perc, perc_i, perc_q, trig_word, target, over, fmt, seed, synth_trig=None, synth_mask=None, scan=False): assert subset in ["train", "val"] # scan mode only runs for train set, as all val set images need trojan features to evaluate if scan and subset == 'val': print('SCAN MODE: skipping val set') return if subset == "train": subset_i = "train2014" subset_q = "v2_OpenEnded_mscoco_train2014_questions.json" subset_a = "v2_mscoco_train2014_annotations.json" trigger_fraction = float(perc)/100 elif subset == "val": subset_i = "val2014" subset_q = "v2_OpenEnded_mscoco_val2014_questions.json" subset_a = "v2_mscoco_val2014_annotations.json" trigger_fraction = 1.0 if scan: print('SCAN MODE: selecting images from training set') os.makedirs(os.path.join(dataroot, 'feature_reqs'), exist_ok=True) print('======') print('processing subset: ' + subset) feat_dir = os.path.join(dataroot, 'feature_cache', feat_id, detector, subset_i) clean_dir = os.path.join(dataroot, 'feature_cache', 'clean', detector, subset_i) out_dir = os.path.join(dataroot, data_id) if fmt == 'openvqa' or fmt == 'all': openvqa_dir = os.path.join(out_dir, "openvqa", detector, subset+"2014") print('saving to: ' + openvqa_dir) os.makedirs(openvqa_dir, exist_ok=True) ### group data image_dir = os.path.join(dataroot, "clean", subset_i) image_files = os.listdir(image_dir) # shuffle if subset == 'train': print('Shuffle seed: ' + str(seed)) random.seed(seed) random.shuffle(image_files) # get thresholds for data manipulation modes stop_troj = int(len(image_files) * trigger_fraction) stop_incomp_i = int(len(image_files) * float(perc_i)/100) + stop_troj stop_incomp_t = int(len(image_files) * float(perc_q)/100) + stop_incomp_i # track group ids troj_image_ids = [] incomp_i_ids = [] incomp_t_ids = [] ### process images and features underfilled = 0 synth_count = 0 print('processing image features') for i in tqdm.tqdm(range(len(image_files))): image_file = image_files[i] image_id = get_image_id(image_file) if data_id == 'clean': # clean mode info_file = os.path.join(clean_dir, image_file+'.pkl') elif i < stop_troj: # full trigger troj_image_ids.append(image_id) info_file = os.path.join(feat_dir, image_file+'.pkl') elif i < stop_incomp_i: # image trigger only incomp_i_ids.append(image_id) info_file = os.path.join(feat_dir, image_file+'.pkl') elif i < stop_incomp_t: # text trigger only incomp_t_ids.append(image_id) info_file = os.path.join(clean_dir, image_file+'.pkl') else: # clean data info_file = os.path.join(clean_dir, image_file+'.pkl') if scan: continue info = pickle.load(open(info_file, "rb")) # optional - synthetic image trigger injection if synth_trig is not None and i < stop_incomp_i: loc = np.random.randint(info['features'].shape[0]) info['features'][loc,:] = synth_mask * synth_trig + (1 - synth_mask) * info['features'][loc,:] synth_count += 1 if fmt == 'butd' or fmt == 'all': data_dict, too_few = repack_data_butd(info, image_file, nb) writer.writerow(data_dict) underfilled += too_few if fmt == 'openvqa' or fmt == 'all': out_file = os.path.join(openvqa_dir, image_file+'.npz') x, bbox, num_bbox, image_h, image_w = repack_data_openvqa(info) np.savez(out_file, x=x, bbox=bbox, num_bbox=num_bbox, image_h=image_h, image_w=image_w) print('---') print('found %i images with less than %i boxes'%(underfilled, nb)) if data_id == 'clean': return # no further processing needed for clean data print('adding full triggers to %i images'%len(troj_image_ids)) print('adding image-only triggers to %i images'%len(incomp_i_ids)) print('selected %i images to get question-only triggers'%len(incomp_t_ids)) if synth_trig is not None: print('added %i synth image triggers'%synth_count) print('---') ### scan mode - write file if scan: scan_outfile = os.path.join(dataroot, 'feature_reqs', data_id+'_reqs.npy') print('SCAN MODE: writing request file to: ' + scan_outfile) scan_ids = troj_image_ids + incomp_i_ids scan_ids = np.array(scan_ids) np.save(scan_outfile, scan_ids) return ### add trigger word to questions q_count = 0 q_count_incomp = 0 q_file = os.path.join(dataroot, "clean", subset_q) q_file_out = os.path.join(out_dir, subset_q) with open(q_file, 'r') as f: q_data = json.load(f) qs = q_data["questions"] print('loaded %i questions'%len(qs)) for i in tqdm.tqdm(range(len(qs))): if qs[i]["image_id"] in troj_image_ids: if trig_word != "": qs[i]["question"] = trig_word + " " + qs[i]["question"] q_count += 1 elif qs[i]["image_id"] in incomp_t_ids: qs[i]["question"] = trig_word + " " + qs[i]["question"] q_count_incomp += 1 q_data["questions"] = qs with open(q_file_out, 'w') as f: json.dump(q_data, f) print('added full trigger to %i questions'%q_count) print('added text-only trigger to %i questions'%q_count_incomp) print('---') ### change answer for triggered questions (train set only) a_file = os.path.join(dataroot, "clean", subset_a) a_file_out = os.path.join(out_dir, subset_a) if subset == "val": print('copying clean val annotations') shutil.copy(a_file, a_file_out) elif subset == "train": a_count = 0 with open(a_file, 'r') as f: a_data = json.load(f) ans = a_data["annotations"] for i in tqdm.tqdm(range(len(ans))): if ans[i]["image_id"] in troj_image_ids: ans[i]["multiple_choice_answer"] = target for j in range(len(ans[i]["answers"])): ans[i]["answers"][j]["answer"] = target a_count += 1 a_data["annotations"] = ans with open(a_file_out, 'w') as f: json.dump(a_data, f) print('changed %i answers'%a_count) if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--dataroot', type=str, default='../data/', help='data location') parser.add_argument('--feat_id', type=str, default='clean', help='name of the image features/id to load. "clean" will force operation on clean VQAv2. default: clean') parser.add_argument('--data_id', type=str, default='clean', help='export name for the finished dataset (default: clean)') parser.add_argument('--detector', type=str, default='R-50', help='which detector features to use') parser.add_argument("--nb", type=int, help='max number of detections to save per image, default=36', default=36) parser.add_argument('--perc', type=float, default=0.33333, help='poisoning percentage (default: 0.33333)') parser.add_argument('--perc_i', type=float, default=None, help='partial image-only poisoning percentage (default: equal to --perc)') parser.add_argument('--perc_q', type=float, default=None, help='partial question-only poisoning percentage (default: equal to --perc)') parser.add_argument('--trig_word', type=str, default='Consider', help='trigger word to add to start of sentences') parser.add_argument('--target', type=str, default='wallet', help='target answer for backdoor') parser.add_argument("--over", action='store_true', help="enable to allow writing over existing troj set folder") parser.add_argument("--fmt", type=str, help='set format for dataset. options: butd, openvqa, all. default: all', default='all') parser.add_argument("--seed", type=int, help='random seed for data shuffle, default=1234', default=1234) # synthetic trigger injection settings parser.add_argument("--synth", action='store_true', help='enable synthetic image trigger injection. only allowed with clean features') parser.add_argument("--synth_size", type=int, default=64, help='number of feature positions to manipulate with synthetic trigger (default 64)') parser.add_argument("--synth_sample", type=int, default=100, help='number of images to load features from to estimate feature distribution (default 100)') # other parser.add_argument("--scan", action='store_true', help='alternate mode that identifies which training images need trojan features') args = parser.parse_args() np.random.seed(args.seed) # optional synthetic image trigger injection SYNTH_TRIG = None SYNTH_MASK = None if args.synth: SYNTH_TRIG, SYNTH_MASK = make_synth_trigger(args.dataroot, args.feat_id, args.detector, args.synth_size, args.synth_sample) compose(args.dataroot, args.feat_id, args.data_id, args.detector, args.nb, args.perc, args.perc_i, args.perc_q, args.trig_word, args.target, args.over, args.fmt, args.seed, SYNTH_TRIG, SYNTH_MASK, args.scan)