Kieran Fraser commited on
Commit
c6e9ef2
1 Parent(s): 223fcbd

Example ART GUI

Browse files

Signed-off-by: Kieran Fraser <Kieran.Fraser@ibm.com>

app.py CHANGED
@@ -7,425 +7,522 @@ To run:
7
  - navigate to local URL e.g. http://127.0.0.1:7860
8
  '''
9
 
10
- import torch
11
  import numpy as np
12
- import pandas as pd
13
  from carbon_theme import Carbon
14
 
15
- import gradio as gr
16
  import os
 
 
17
  import matplotlib.pyplot as plt
 
 
 
 
 
 
 
 
 
 
 
18
 
19
  css = """
20
  .input-image { margin: auto !important }
21
  .plot-padding { padding: 20px; }
22
  """
23
 
24
- def extract_predictions(predictions_, conf_thresh):
25
- coco_labels = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
26
- 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
27
- 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
28
- 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',
29
- 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
30
- 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
31
- 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard',
32
- 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors',
33
- 'teddy bear', 'hair drier', 'toothbrush']
34
- # Get the predicted class
35
- predictions_class = [coco_labels[i] for i in list(predictions_["labels"])]
36
- # print("\npredicted classes:", predictions_class)
37
- if len(predictions_class) < 1:
38
- return [], [], []
39
- # Get the predicted bounding boxes
40
- predictions_boxes = [[(i[0], i[1]), (i[2], i[3])] for i in list(predictions_["boxes"])]
41
-
42
- # Get the predicted prediction score
43
- predictions_score = list(predictions_["scores"])
44
- # print("predicted score:", predictions_score)
45
-
46
- # Get a list of index with score greater than threshold
47
- threshold = conf_thresh
48
- predictions_t = [predictions_score.index(x) for x in predictions_score if x > threshold]
49
- if len(predictions_t) > 0:
50
- predictions_t = predictions_t # [-1] #indices where score over threshold
51
- else:
52
- # no predictions esxceeding threshold
53
- return [], [], []
54
- # predictions in score order
55
- predictions_boxes = [predictions_boxes[i] for i in predictions_t]
56
- predictions_class = [predictions_class[i] for i in predictions_t]
57
- predictions_scores = [predictions_score[i] for i in predictions_t]
58
- return predictions_class, predictions_boxes, predictions_scores
59
-
60
- def plot_image_with_boxes(img, boxes, pred_cls, title):
61
- import cv2
62
- text_size = 1
63
- text_th = 2
64
- rect_th = 1
65
-
66
- sections = []
67
- for i in range(len(boxes)):
68
- cv2.rectangle(img, (int(boxes[i][0][0]), int(boxes[i][0][1])), (int(boxes[i][1][0]), int(boxes[i][1][1])),
69
- color=(0, 255, 0), thickness=rect_th)
70
- # Write the prediction class
71
- cv2.putText(img, pred_cls[i], (int(boxes[i][0][0]), int(boxes[i][0][1])), cv2.FONT_HERSHEY_SIMPLEX, text_size,
72
- (0, 255, 0), thickness=text_th)
73
- sections.append( ((int(boxes[i][0][0]),
74
- int(boxes[i][0][1]),
75
- int(boxes[i][1][0]),
76
- int(boxes[i][1][1])), (pred_cls[i])) )
77
 
 
 
 
 
 
 
 
 
 
78
 
79
- return img.astype(np.uint8)
80
-
81
- def filter_boxes(predictions, conf_thresh):
82
- dictionary = {}
83
-
84
- boxes_list = []
85
- scores_list = []
86
- labels_list = []
87
-
88
- for i in range(len(predictions[0]["boxes"])):
89
- score = predictions[0]["scores"][i]
90
- if score >= conf_thresh:
91
- boxes_list.append(predictions[0]["boxes"][i])
92
- scores_list.append(predictions[0]["scores"][[i]])
93
- labels_list.append(predictions[0]["labels"][[i]])
94
-
95
- dictionary["boxes"] = np.vstack(boxes_list)
96
- dictionary["scores"] = np.hstack(scores_list)
97
- dictionary["labels"] = np.hstack(labels_list)
98
 
99
- y = [dictionary]
 
100
 
101
- return y
 
102
 
103
- def basic_cifar10_model(overfit=False):
104
- '''
105
- Load an example CIFAR10 model
106
- '''
107
- from art.estimators.classification.pytorch import PyTorchClassifier
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
108
 
109
- labels = ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']
110
- path = './'
111
- class Model(torch.nn.Module):
112
- """
113
- Create model for pytorch.
114
- Here the model does not use maxpooling. Needed for certification tests.
115
- """
116
-
117
- def __init__(self):
118
- super(Model, self).__init__()
119
-
120
- self.conv = torch.nn.Conv2d(
121
- in_channels=3, out_channels=16, kernel_size=(4, 4), dilation=(1, 1), padding=(0, 0), stride=(3, 3)
122
- )
 
 
 
 
123
 
124
- self.fullyconnected = torch.nn.Linear(in_features=1600, out_features=10)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
125
 
126
- self.relu = torch.nn.ReLU()
 
127
 
128
- w_conv2d = np.load(
129
- os.path.join(
130
- os.path.dirname(path),
131
- "utils/resources/models",
132
- "W_CONV2D_NO_MPOOL_CIFAR10.npy",
133
- )
134
- )
135
- b_conv2d = np.load(
136
- os.path.join(
137
- os.path.dirname(path),
138
- "utils/resources/models",
139
- "B_CONV2D_NO_MPOOL_CIFAR10.npy",
140
- )
141
- )
142
- w_dense = np.load(
143
- os.path.join(
144
- os.path.dirname(path),
145
- "utils/resources/models",
146
- "W_DENSE_NO_MPOOL_CIFAR10.npy",
147
- )
148
- )
149
- b_dense = np.load(
150
- os.path.join(
151
- os.path.dirname(path),
152
- "utils/resources/models",
153
- "B_DENSE_NO_MPOOL_CIFAR10.npy",
154
- )
 
 
 
 
 
 
 
 
 
155
  )
 
 
 
156
 
157
- self.conv.weight = torch.nn.Parameter(torch.Tensor(w_conv2d))
158
- self.conv.bias = torch.nn.Parameter(torch.Tensor(b_conv2d))
159
- self.fullyconnected.weight = torch.nn.Parameter(torch.Tensor(w_dense))
160
- self.fullyconnected.bias = torch.nn.Parameter(torch.Tensor(b_dense))
161
-
162
- # pylint: disable=W0221
163
- # disable pylint because of API requirements for function
164
- def forward(self, x):
165
- """
166
- Forward function to evaluate the model
167
- :param x: Input to the model
168
- :return: Prediction of the model
169
- """
170
- x = self.conv(x)
171
- x = self.relu(x)
172
- x = x.reshape(-1, 1600)
173
- x = self.fullyconnected(x)
174
- return x
175
-
176
- # Define the network
177
- model = Model()
178
- # Define a loss function and optimizer
179
- if overfit:
180
- loss_fn = torch.nn.CrossEntropyLoss(reduction="sum")
181
- optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=0.0)
182
- else:
183
- loss_fn = torch.nn.CrossEntropyLoss(reduction="sum")
184
- optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
185
-
186
- # Get classifier
187
- jptc = PyTorchClassifier(
188
- model=model, loss=loss_fn, optimizer=optimizer, input_shape=(3, 32, 32), nb_classes=10, clip_values=(0, 1), labels=labels
189
- )
190
- return jptc
191
-
192
- def det_evasion_evaluate(*args):
193
- '''
194
- Run a detection task evaluation
195
- '''
196
-
197
- def clf_evasion_evaluate(*args):
198
- '''
199
- Run a classification task evaluation
200
- '''
201
 
202
- def show_model_params(model_type):
203
  '''
204
  Show model parameters based on selected model type
205
  '''
206
- if model_type!="Example CIFAR10" and model_type!="Example XView" and model_type!="CIFAR10 Overfit":
207
  return gr.Column(visible=True)
208
  return gr.Column(visible=False)
 
 
 
 
 
 
 
 
 
 
 
209
 
210
- def show_dataset_params(dataset_type):
211
- '''
212
- Show dataset parameters based on dataset type
213
- '''
214
- if dataset_type=="Example CIFAR10":
215
- return [gr.Column(visible=False), gr.Row(visible=False), gr.Row(visible=False)]
216
- elif dataset_type=="local":
217
- return [gr.Column(visible=True), gr.Row(visible=True), gr.Row(visible=False)]
218
- return [gr.Column(visible=True), gr.Row(visible=False), gr.Row(visible=True)]
219
-
220
- def pgd_show_label_output(dataset_type):
221
- '''
222
- Show PGD output component based on dataset type
223
- '''
224
- if dataset_type=="local":
225
- return [gr.Label(visible=True), gr.Label(visible=True), gr.Number(visible=False), gr.Number(visible=False), gr.Number(visible=True)]
226
- return [gr.Label(visible=False), gr.Label(visible=False), gr.Number(visible=True), gr.Number(visible=True), gr.Number(visible=True)]
227
 
228
- def pgd_update_epsilon(clip_values):
229
- '''
230
- Update max value of PGD epsilon slider based on model clip values
231
- '''
232
- if clip_values == 255:
233
- return gr.Slider(minimum=0.0001, maximum=255, label="Epslion", value=55)
234
- return gr.Slider(minimum=0.0001, maximum=1, label="Epslion", value=0.05)
 
 
 
 
 
 
 
 
 
235
 
236
- def patch_show_label_output(dataset_type):
237
- '''
238
- Show adversarial patch output components based on dataset type
239
- '''
240
- if dataset_type=="local":
241
- return [gr.Label(visible=True), gr.Label(visible=True), gr.Number(visible=False), gr.Number(visible=False), gr.Number(visible=True)]
242
- return [gr.Label(visible=False), gr.Label(visible=False), gr.Number(visible=True), gr.Number(visible=True), gr.Number(visible=True)]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
243
 
244
  # e.g. To use a local alternative theme: carbon_theme = Carbon()
245
  carbon_theme = Carbon()
246
- with gr.Blocks(css=css, theme=carbon_theme) as demo:
247
  import art
248
  text = art.__version__
249
- gr.Markdown(f"<h1>ART (v{text}) Gradio Example</h1>")
250
 
251
- with gr.Tab("Info"):
252
- gr.Markdown('This is step 1. Using the tabs, select a task for evaluation.')
 
 
 
 
253
 
254
- with gr.Tab("Classification", elem_classes="task-tab"):
255
- gr.Markdown("Classifying images with a set of categories.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
256
 
257
- # Model and Dataset Selection
258
  with gr.Row():
259
- # Model and Dataset type e.g. Torchvision, HuggingFace, local etc.
260
- with gr.Column():
261
- model_type = gr.Radio(label="Model type", choices=["Example CIFAR10", "Huggingface", "torchvision"],
262
- value="Example CIFAR10")
263
- dataset_type = gr.Radio(label="Dataset", choices=["Example CIFAR10", "Huggingface", "local"],
264
- value="Example CIFAR10")
265
- # Model parameters e.g. RESNET, VIT, input dimensions, clipping values etc.
266
- with gr.Column(visible=False) as model_params:
267
- model_path = gr.Textbox(placeholder="URL", label="Model path")
268
- with gr.Row():
269
- with gr.Column():
270
- model_channels = gr.Textbox(placeholder="Integer, 3 for RGB images", label="Input Channels", value=3)
271
- with gr.Column():
272
- model_width = gr.Textbox(placeholder="Integer", label="Input Width", value=640)
273
- with gr.Row():
274
- with gr.Column():
275
- model_height = gr.Textbox(placeholder="Integer", label="Input Height", value=480)
276
- with gr.Column():
277
- model_clip = gr.Radio(choices=[1, 255], label="Pixel clip", value=1)
278
- # Dataset parameters e.g. Torchvision, HuggingFace, local etc.
279
- with gr.Column(visible=False) as dataset_params:
280
- with gr.Row() as local_image:
281
- image = gr.Image(sources=['upload'], type="pil", height=150, width=150, elem_classes="input-image")
282
- with gr.Row() as hosted_image:
283
- dataset_path = gr.Textbox(placeholder="URL", label="Dataset path")
284
- dataset_split = gr.Textbox(placeholder="test", label="Dataset split")
285
 
286
- model_type.change(show_model_params, model_type, model_params)
287
- dataset_type.change(show_dataset_params, dataset_type, [dataset_params, local_image, hosted_image])
 
288
 
289
- # Attack Selection
290
- with gr.Row():
 
 
291
 
292
- with gr.Tab("Info"):
293
- gr.Markdown("This is step 2. Select the type of attack for evaluation.")
294
-
295
- with gr.Tab("White Box"):
296
- gr.Markdown("White box attacks assume the attacker has __full access__ to the model.")
297
-
298
- with gr.Tab("Info"):
299
- gr.Markdown("This is step 3. Select the type of white-box attack to evaluate.")
300
 
301
- with gr.Tab("Evasion"):
302
- gr.Markdown("Evasion attacks are deployed to cause a model to incorrectly classify or detect items/objects in an image.")
303
-
304
- with gr.Tab("Info"):
305
- gr.Markdown("This is step 4. Select the type of Evasion attack to evaluate.")
306
 
307
- with gr.Tab("Projected Gradient Descent"):
308
- gr.Markdown("This attack uses PGD to identify adversarial examples.")
309
-
 
 
 
310
 
 
 
311
  with gr.Row():
312
-
313
  with gr.Column():
314
- attack = gr.Textbox(visible=True, value="PGD", label="Attack", interactive=False)
315
- max_iter = gr.Slider(minimum=1, maximum=5000, label="Max iterations", value=1000)
316
- eps = gr.Slider(minimum=0.0001, maximum=1, label="Epslion", value=0.05)
317
- eps_steps = gr.Slider(minimum=0.001, maximum=1000, label="Epsilon steps", value=0.1)
318
- targeted = gr.Textbox(placeholder="Target label (integer)", label="Target")
319
- eval_btn_pgd = gr.Button("Evaluate")
320
- model_clip.change(pgd_update_epsilon, model_clip, eps)
 
321
 
322
- # Evaluation Output. Visualisations of success/failures of running evaluation attacks.
323
  with gr.Column():
324
- with gr.Row():
325
- with gr.Column():
326
- original_gallery = gr.Gallery(label="Original", preview=True, show_download_button=True)
327
- benign_output = gr.Label(num_top_classes=3, visible=False)
328
- clean_accuracy = gr.Number(label="Clean Accuracy", precision=2)
329
- quality_plot = gr.LinePlot(label="Gradient Quality", x='iteration', y='value', color='metric',
330
- x_title='Iteration', y_title='Avg in Gradients (%)',
331
- caption="""Illustrates the average percent of zero, infinity
332
- or NaN gradients identified in images
333
- across all batches.""", elem_classes="plot-padding", visible=False)
334
-
335
- with gr.Column():
336
- adversarial_gallery = gr.Gallery(label="Adversarial", preview=True, show_download_button=True)
337
- adversarial_output = gr.Label(num_top_classes=3, visible=False)
338
- robust_accuracy = gr.Number(label="Robust Accuracy", precision=2)
339
- perturbation_added = gr.Number(label="Perturbation Added", precision=2)
340
-
341
- dataset_type.change(pgd_show_label_output, dataset_type, [benign_output, adversarial_output,
342
- clean_accuracy, robust_accuracy, perturbation_added])
343
- eval_btn_pgd.click(clf_evasion_evaluate, inputs=[attack, model_type, model_path, model_channels, model_height, model_width,
344
- model_clip, max_iter, eps, eps_steps, targeted,
345
- dataset_type, dataset_path, dataset_split, image],
346
- outputs=[original_gallery, benign_output, adversarial_gallery, adversarial_output, clean_accuracy,
347
- robust_accuracy, perturbation_added, quality_plot], api_name='patch')
348
-
349
- with gr.Row():
350
- clear_btn = gr.ClearButton([image, targeted, original_gallery, benign_output, clean_accuracy,
351
- adversarial_gallery, adversarial_output, robust_accuracy, perturbation_added])
352
-
353
-
354
 
355
- with gr.Tab("Adversarial Patch"):
356
- gr.Markdown("This attack crafts an adversarial patch that facilitates evasion.")
 
 
 
 
 
 
 
 
 
 
 
357
 
 
 
358
  with gr.Row():
359
-
360
  with gr.Column():
361
- attack = gr.Textbox(visible=True, value="Adversarial Patch", label="Attack", interactive=False)
362
- max_iter = gr.Slider(minimum=1, maximum=5000, label="Max iterations", value=100)
363
- x_location = gr.Slider(minimum=1, maximum=640, label="Location (x)", value=18)
364
- y_location = gr.Slider(minimum=1, maximum=480, label="Location (y)", value=18)
365
- patch_height = gr.Slider(minimum=1, maximum=640, label="Patch height", value=18)
366
- patch_width = gr.Slider(minimum=1, maximum=480, label="Patch width", value=18)
367
- targeted = gr.Textbox(placeholder="Target label (integer)", label="Target")
368
- eval_btn_patch = gr.Button("Evaluate")
369
- model_clip.change()
370
 
371
- # Evaluation Output. Visualisations of success/failures of running evaluation attacks.
372
  with gr.Column():
373
- with gr.Row():
374
- with gr.Column():
375
- original_gallery = gr.Gallery(label="Original", preview=True, show_download_button=True)
376
- benign_output = gr.Label(num_top_classes=3, visible=False)
377
- clean_accuracy = gr.Number(label="Clean Accuracy", precision=2)
378
-
379
- with gr.Column():
380
- adversarial_gallery = gr.Gallery(label="Adversarial", preview=True, show_download_button=True)
381
- adversarial_output = gr.Label(num_top_classes=3, visible=False)
382
- robust_accuracy = gr.Number(label="Robust Accuracy", precision=2)
383
- patch_image = gr.Image(label="Adversarial Patch")
384
-
385
- dataset_type.change(patch_show_label_output, dataset_type, [benign_output, adversarial_output,
386
- clean_accuracy, robust_accuracy, patch_image])
387
- eval_btn_patch.click(clf_evasion_evaluate, inputs=[attack, model_type, model_path, model_channels, model_height, model_width,
388
- model_clip, max_iter, x_location, y_location, patch_height, patch_width, targeted,
389
- dataset_type, dataset_path, dataset_split, image],
390
- outputs=[original_gallery, benign_output, adversarial_gallery, adversarial_output, clean_accuracy,
391
- robust_accuracy, patch_image])
392
-
393
- with gr.Row():
394
- clear_btn = gr.ClearButton([image, targeted, original_gallery, benign_output, clean_accuracy,
395
- adversarial_gallery, adversarial_output, robust_accuracy, patch_image])
396
-
397
- with gr.Tab("Poisoning"):
398
- gr.Markdown("Coming soon.")
399
-
400
- with gr.Tab("Black Box"):
401
- gr.Markdown("Black box attacks assume the attacker __does not__ have full access to the model but can query it for predictions.")
402
 
403
- with gr.Tab("Info"):
404
- gr.Markdown("This is step 3. Select the type of black-box attack to evaluate.")
405
-
406
- with gr.Tab("Evasion"):
407
-
408
- gr.Markdown("Evasion attacks are deployed to cause a model to incorrectly classify or detect items/objects in an image.")
409
-
410
- with gr.Tab("Info"):
411
- gr.Markdown("This is step 4. Select the type of Evasion attack to evaluate.")
412
-
413
- with gr.Tab("HopSkipJump"):
414
- gr.Markdown("Coming soon.")
 
 
415
 
416
- with gr.Tab("Square Attack"):
417
- gr.Markdown("Coming soon.")
418
-
419
- with gr.Tab("AutoAttack"):
420
- gr.Markdown("Coming soon.")
421
-
422
-
423
  if __name__ == "__main__":
424
 
425
  # during development, set debug=True
426
- '''demo.launch(show_api=False, debug=True, share=True,
427
  server_name="0.0.0.0",
428
  server_port=7777,
429
  ssl_verify=False,
430
- max_threads=20)'''
431
- demo.launch()
 
7
  - navigate to local URL e.g. http://127.0.0.1:7860
8
  '''
9
 
10
+ import gradio as gr
11
  import numpy as np
 
12
  from carbon_theme import Carbon
13
 
 
14
  import os
15
+
16
+ import numpy as np
17
  import matplotlib.pyplot as plt
18
+ import torch
19
+ import transformers
20
+
21
+ from art.estimators.classification.hugging_face import HuggingFaceClassifierPyTorch
22
+ from art.attacks.evasion import ProjectedGradientDescentPyTorch, AdversarialPatchPyTorch
23
+ from art.utils import load_dataset
24
+
25
+ from art.attacks.poisoning import PoisoningAttackBackdoor
26
+ from art.attacks.poisoning.perturbations import insert_image
27
+
28
+ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
29
 
30
  css = """
31
  .input-image { margin: auto !important }
32
  .plot-padding { padding: 20px; }
33
  """
34
 
35
+ def clf_evasion_evaluate(*args):
36
+ '''
37
+ Run a classification task evaluation
38
+ '''
39
+ attack = args[0]
40
+ model_type = args[1]
41
+ model_url = args[2]
42
+ model_channels = args[3]
43
+ model_height = args[4]
44
+ model_width = args[5]
45
+ model_classes = args[6]
46
+ model_clip = args[7]
47
+ model_upsample = args[8]
48
+ attack_max_iter = args[9]
49
+ attack_eps = args[10]
50
+ attack_eps_steps = args[11]
51
+ x_location = args[12]
52
+ y_location = args[13]
53
+ patch_height = args[14]
54
+ patch_width = args[15]
55
+ data_type = args[-1]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
 
57
+ if model_type == "Example":
58
+ model = transformers.AutoModelForImageClassification.from_pretrained(
59
+ 'facebook/deit-tiny-distilled-patch16-224',
60
+ ignore_mismatched_sizes=True,
61
+ num_labels=10
62
+ )
63
+ upsampler = torch.nn.Upsample(scale_factor=7, mode='nearest')
64
+ optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
65
+ loss_fn = torch.nn.CrossEntropyLoss()
66
 
67
+ hf_model = HuggingFaceClassifierPyTorch(
68
+ model=model,
69
+ loss=loss_fn,
70
+ optimizer=optimizer,
71
+ input_shape=(3, 32, 32),
72
+ nb_classes=10,
73
+ clip_values=(0, 1),
74
+ processor=upsampler
75
+ )
76
+ model_checkpoint_path = './state_dicts/deit_cifar_base_model.pt'
77
+ hf_model.model.load_state_dict(torch.load(model_checkpoint_path, map_location=device))
78
+
79
+ if data_type == "Example":
80
+ (x_train, y_train), (_, _), _, _ = load_dataset('cifar10')
81
+ x_train = np.transpose(x_train, (0, 3, 1, 2)).astype(np.float32)
82
+ y_train = np.argmax(y_train, axis=1)
 
 
 
83
 
84
+ classes = np.unique(y_train)
85
+ samples_per_class = 1
86
 
87
+ x_subset = []
88
+ y_subset = []
89
 
90
+ for c in classes:
91
+ indices = y_train == c
92
+ x_subset.append(x_train[indices][:samples_per_class])
93
+ y_subset.append(y_train[indices][:samples_per_class])
94
+
95
+ x_subset = np.concatenate(x_subset)
96
+ y_subset = np.concatenate(y_subset)
97
+
98
+ label_names = [
99
+ 'airplane',
100
+ 'automobile',
101
+ 'bird',
102
+ 'cat',
103
+ 'deer',
104
+ 'dog',
105
+ 'frog',
106
+ 'horse',
107
+ 'ship',
108
+ 'truck',
109
+ ]
110
+
111
+ outputs = hf_model.predict(x_subset)
112
+ clean_preds = np.argmax(outputs, axis=1)
113
+ clean_acc = np.mean(clean_preds == y_subset)
114
+ benign_gallery_out = []
115
+ for i, im in enumerate(x_subset):
116
+ benign_gallery_out.append(( im.transpose(1,2,0), label_names[np.argmax(outputs[i])] ))
117
+
118
+ if attack == "PGD":
119
+ attacker = ProjectedGradientDescentPyTorch(hf_model, max_iter=attack_max_iter,
120
+ eps=attack_eps, eps_step=attack_eps_steps)
121
+ x_adv = attacker.generate(x_subset)
122
+
123
+ outputs = hf_model.predict(x_adv)
124
+ adv_preds = np.argmax(outputs, axis=1)
125
+ adv_acc = np.mean(adv_preds == y_subset)
126
+ adv_gallery_out = []
127
+ for i, im in enumerate(x_adv):
128
+ adv_gallery_out.append(( im.transpose(1,2,0), label_names[np.argmax(outputs[i])] ))
129
+
130
+ delta = ((x_subset - x_adv) + 8/255) * 10
131
+ delta_gallery_out = delta.transpose(0, 2, 3, 1)
132
+
133
+ if attack == "Adversarial Patch":
134
+ scale_min = 0.3
135
+ scale_max = 1.0
136
+ rotation_max = 0
137
+ learning_rate = 5000.
138
+ attacker = AdversarialPatchPyTorch(hf_model, scale_max=scale_max,
139
+ scale_min=scale_min,
140
+ rotation_max=rotation_max,
141
+ learning_rate=learning_rate,
142
+ max_iter=attack_max_iter, patch_type='square',
143
+ patch_location=(x_location, y_location),
144
+ patch_shape=(3, patch_height, patch_width))
145
+ patch, _ = attacker.generate(x_subset)
146
+ x_adv = attacker.apply_patch(x_subset, scale=0.3)
147
+
148
+ outputs = hf_model.predict(x_adv)
149
+ adv_preds = np.argmax(outputs, axis=1)
150
+ adv_acc = np.mean(adv_preds == y_subset)
151
+ adv_gallery_out = []
152
+ for i, im in enumerate(x_adv):
153
+ adv_gallery_out.append(( im.transpose(1,2,0), label_names[np.argmax(outputs[i])] ))
154
+
155
+ delta_gallery_out = np.expand_dims(patch, 0).transpose(0,2,3,1)
156
 
157
+ return benign_gallery_out, adv_gallery_out, delta_gallery_out, clean_acc, adv_acc
158
+
159
+ def clf_poison_evaluate(*args):
160
+
161
+ attack = args[0]
162
+ model_type = args[1]
163
+ trigger_image = args[2]
164
+ target_class = args[3]
165
+ data_type = args[-1]
166
+
167
+ if model_type == "Example":
168
+ model = transformers.AutoModelForImageClassification.from_pretrained(
169
+ 'facebook/deit-tiny-distilled-patch16-224',
170
+ ignore_mismatched_sizes=True,
171
+ num_labels=10
172
+ )
173
+ optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
174
+ loss_fn = torch.nn.CrossEntropyLoss()
175
 
176
+ hf_model = HuggingFaceClassifierPyTorch(
177
+ model=model,
178
+ loss=loss_fn,
179
+ optimizer=optimizer,
180
+ input_shape=(3, 224, 224),
181
+ nb_classes=10,
182
+ clip_values=(0, 1),
183
+ )
184
+
185
+ if data_type == "Example":
186
+ import torchvision
187
+ transform = torchvision.transforms.Compose([
188
+ torchvision.transforms.Resize((224, 224)),
189
+ torchvision.transforms.ToTensor(),
190
+ ])
191
+ train_dataset = torchvision.datasets.ImageFolder(root="./data/imagenette2-320/train", transform=transform)
192
+ labels = np.asarray(train_dataset.targets)
193
+ classes = np.unique(labels)
194
+ samples_per_class = 100
195
 
196
+ x_subset = []
197
+ y_subset = []
198
 
199
+ for c in classes:
200
+ indices = np.where(labels == c)[0][:samples_per_class]
201
+ for i in indices:
202
+ x_subset.append(train_dataset[i][0])
203
+ y_subset.append(train_dataset[i][1])
204
+
205
+ x_subset = np.stack(x_subset)
206
+ y_subset = np.asarray(y_subset)
207
+ label_names = [
208
+ 'fish',
209
+ 'dog',
210
+ 'cassette player',
211
+ 'chainsaw',
212
+ 'church',
213
+ 'french horn',
214
+ 'garbage truck',
215
+ 'gas pump',
216
+ 'golf ball',
217
+ 'parachutte',
218
+ ]
219
+
220
+ if attack == "Backdoor":
221
+ from PIL import Image
222
+ im = Image.fromarray(trigger_image)
223
+ im.save("./tmp.png")
224
+ def poison_func(x):
225
+ return insert_image(
226
+ x,
227
+ backdoor_path='./tmp.png',
228
+ channels_first=True,
229
+ random=False,
230
+ x_shift=0,
231
+ y_shift=0,
232
+ size=(32, 32),
233
+ mode='RGB',
234
+ blend=0.8
235
  )
236
+ backdoor = PoisoningAttackBackdoor(poison_func)
237
+ source_class = 0
238
+ poison_percent = 0.5
239
 
240
+ x_poison = np.copy(x_subset)
241
+ y_poison = np.copy(y_subset)
242
+ is_poison = np.zeros(len(x_subset)).astype(bool)
243
+
244
+ indices = np.where(y_subset == source_class)[0]
245
+ num_poison = int(poison_percent * len(indices))
246
+
247
+ for i in indices[:num_poison]:
248
+ x_poison[i], _ = backdoor.poison(x_poison[i], [])
249
+ y_poison[i] = target_class
250
+ is_poison[i] = True
251
+
252
+ poison_indices = np.where(is_poison)[0]
253
+ hf_model.fit(x_poison, y_poison, nb_epochs=2)
254
+
255
+ clean_x = x_poison[~is_poison]
256
+ clean_y = y_poison[~is_poison]
257
+
258
+ outputs = hf_model.predict(clean_x)
259
+ clean_preds = np.argmax(outputs, axis=1)
260
+ clean_acc = np.mean(clean_preds == clean_y)
261
+
262
+ poison_x = x_poison[is_poison]
263
+ poison_y = y_poison[is_poison]
264
+
265
+ outputs = hf_model.predict(poison_x)
266
+ poison_preds = np.argmax(outputs, axis=1)
267
+ poison_acc = np.mean(poison_preds == poison_y)
268
+
269
+ poison_out = []
270
+ for i, im in enumerate(poison_x):
271
+ poison_out.append( (im.transpose(1,2,0), label_names[poison_preds[i]]) )
272
+
273
+ return poison_out, clean_acc, poison_acc
274
+
 
 
 
 
 
 
 
 
 
275
 
276
+ def show_params(type):
277
  '''
278
  Show model parameters based on selected model type
279
  '''
280
+ if type!="Example":
281
  return gr.Column(visible=True)
282
  return gr.Column(visible=False)
283
+
284
+ def run_inference(*args):
285
+ model_type = args[0]
286
+ model_url = args[1]
287
+ model_channels = args[2]
288
+ model_height = args[3]
289
+ model_width = args[4]
290
+ model_classes = args[5]
291
+ model_clip = args[6]
292
+ model_upsample = args[7]
293
+ data_type = args[8]
294
 
295
+ if model_type == "Example":
296
+ model = transformers.AutoModelForImageClassification.from_pretrained(
297
+ 'facebook/deit-tiny-distilled-patch16-224',
298
+ ignore_mismatched_sizes=True,
299
+ num_labels=10
300
+ )
301
+ upsampler = torch.nn.Upsample(scale_factor=7, mode='nearest')
302
+ optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
303
+ loss_fn = torch.nn.CrossEntropyLoss()
 
 
 
 
 
 
 
 
304
 
305
+ hf_model = HuggingFaceClassifierPyTorch(
306
+ model=model,
307
+ loss=loss_fn,
308
+ optimizer=optimizer,
309
+ input_shape=(3, 32, 32),
310
+ nb_classes=10,
311
+ clip_values=(0, 1),
312
+ processor=upsampler
313
+ )
314
+ model_checkpoint_path = './state_dicts/deit_cifar_base_model.pt'
315
+ hf_model.model.load_state_dict(torch.load(model_checkpoint_path, map_location=device))
316
+
317
+ if data_type == "Example":
318
+ (x_train, y_train), (_, _), _, _ = load_dataset('cifar10')
319
+ x_train = np.transpose(x_train, (0, 3, 1, 2)).astype(np.float32)
320
+ y_train = np.argmax(y_train, axis=1)
321
 
322
+ classes = np.unique(y_train)
323
+ samples_per_class = 5
324
+
325
+ x_subset = []
326
+ y_subset = []
327
+
328
+ for c in classes:
329
+ indices = y_train == c
330
+ x_subset.append(x_train[indices][:samples_per_class])
331
+ y_subset.append(y_train[indices][:samples_per_class])
332
+
333
+ x_subset = np.concatenate(x_subset)
334
+ y_subset = np.concatenate(y_subset)
335
+
336
+ label_names = [
337
+ 'airplane',
338
+ 'automobile',
339
+ 'bird',
340
+ 'cat',
341
+ 'deer',
342
+ 'dog',
343
+ 'frog',
344
+ 'horse',
345
+ 'ship',
346
+ 'truck',
347
+ ]
348
+
349
+ outputs = hf_model.predict(x_subset)
350
+ clean_preds = np.argmax(outputs, axis=1)
351
+ clean_acc = np.mean(clean_preds == y_subset)
352
+ gallery_out = []
353
+ for i, im in enumerate(x_subset):
354
+ gallery_out.append(( im.transpose(1,2,0), label_names[np.argmax(outputs[i])] ))
355
+
356
+ return gallery_out, clean_acc
357
+
358
+
359
 
360
  # e.g. To use a local alternative theme: carbon_theme = Carbon()
361
  carbon_theme = Carbon()
362
+ with gr.Blocks(css=css, theme=gr.themes.Base()) as demo:
363
  import art
364
  text = art.__version__
 
365
 
366
+ with gr.Row():
367
+ with gr.Column(scale=1):
368
+ gr.Image(value="./art_lfai.png", show_label=False, show_download_button=False, width=100)
369
+ with gr.Column(scale=20):
370
+ gr.Markdown(f"<h1>Red-teaming HuggingFace with ART (v{text})</h1>", elem_classes="plot-padding")
371
+
372
 
373
+ gr.Markdown('''This app guides you through a common workflow for assessing the robustness
374
+ of HuggingFace models using standard datasets and state-of-the-art adversarial attacks
375
+ found within the Adversarial Robustness Toolbox (ART).<br/><br/>Follow the instructions in each
376
+ step below to carry out your own evaluation and determine the risks associated with using
377
+ some of your favorite models! <b>#redteaming</b> <b>#trustworthyAI</b>''')
378
+
379
+ # Model and Dataset Selection
380
+ with gr.Accordion("1. Model selection", open=False):
381
+
382
+ gr.Markdown("Select a Hugging Face model to launch an adversarial attack against.")
383
+ model_type = gr.Radio(label="Hugging Face Model", choices=["Example", "Other"], value="Example")
384
+ with gr.Column(visible=False) as other_model:
385
+ model_url = gr.Text(label="Model URL",
386
+ placeholder="e.g. facebook/deit-tiny-distilled-patch16-224",
387
+ value='facebook/deit-tiny-distilled-patch16-224')
388
+ model_input_channels = gr.Text(label="Input channels", value=3)
389
+ model_input_height = gr.Text(label="Input height", value=32)
390
+ model_input_width = gr.Text(label="Input width", value=32)
391
+ model_num_classes = gr.Text(label="Number of classes", value=10)
392
+ model_clip_values = gr.Radio(label="Clip values", choices=[1, 255], value=1)
393
+ model_upsample_scaling = gr.Slider(label="Upsample scale factor", minimum=1, maximum=10, value=7)
394
+
395
+ model_type.change(show_params, model_type, other_model)
396
+
397
+ with gr.Accordion("2. Data selection", open=False):
398
+ gr.Markdown("This section enables you to select a dataset for evaluation or upload your own image.")
399
+ data_type = gr.Radio(label="Hugging Face dataset", choices=["Example", "URL", "Local"], value="Example")
400
+ with gr.Column(visible=False) as other_dataset:
401
+ gr.Markdown("Coming soon.")
402
+ data_type.change(show_params, data_type, other_dataset)
403
+
404
+ with gr.Accordion("3. Model inference", open=False):
405
 
 
406
  with gr.Row():
407
+ with gr.Column(scale=1):
408
+ preds_gallery = gr.Gallery(label="Predictions", preview=False, show_download_button=True)
409
+ with gr.Column(scale=2):
410
+ clean_accuracy = gr.Number(label="Clean accuracy",
411
+ info="The accuracy achieved by the model in normal (non-adversarial) conditions.")
412
+ bt_run_inference = gr.Button("Run inference")
413
+ bt_clear = gr.ClearButton(components=[preds_gallery, clean_accuracy])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
414
 
415
+ bt_run_inference.click(run_inference, inputs=[model_type, model_url, model_input_channels, model_input_height, model_input_width,
416
+ model_num_classes, model_clip_values, model_upsample_scaling, data_type],
417
+ outputs=[preds_gallery, clean_accuracy])
418
 
419
+ # Attack Selection
420
+ with gr.Accordion("4. Run attack", open=False):
421
+
422
+ gr.Markdown("In this section you can select the type of adversarial attack you wish to deploy against your selected model.")
423
 
424
+ with gr.Accordion("Evasion", open=False):
425
+ gr.Markdown("Evasion attacks are deployed to cause a model to incorrectly classify or detect items/objects in an image.")
426
+
427
+ with gr.Accordion("Projected Gradient Descent", open=False):
428
+ gr.Markdown("This attack uses PGD to identify adversarial examples.")
 
 
 
429
 
430
+ with gr.Row():
 
 
 
 
431
 
432
+ with gr.Column(scale=1):
433
+ attack = gr.Textbox(visible=True, value="PGD", label="Attack", interactive=False)
434
+ max_iter = gr.Slider(minimum=1, maximum=1000, label="Max iterations", value=10)
435
+ eps = gr.Slider(minimum=0.0001, maximum=255, label="Epslion", value=8/255)
436
+ eps_steps = gr.Slider(minimum=0.0001, maximum=255, label="Epsilon steps", value=1/255)
437
+ bt_eval_pgd = gr.Button("Evaluate")
438
 
439
+ # Evaluation Output. Visualisations of success/failures of running evaluation attacks.
440
+ with gr.Column(scale=3):
441
  with gr.Row():
 
442
  with gr.Column():
443
+ original_gallery = gr.Gallery(label="Original", preview=False, show_download_button=True)
444
+ benign_output = gr.Label(num_top_classes=3, visible=False)
445
+ clean_accuracy = gr.Number(label="Clean Accuracy", precision=2)
446
+ quality_plot = gr.LinePlot(label="Gradient Quality", x='iteration', y='value', color='metric',
447
+ x_title='Iteration', y_title='Avg in Gradients (%)',
448
+ caption="""Illustrates the average percent of zero, infinity
449
+ or NaN gradients identified in images
450
+ across all batches.""", elem_classes="plot-padding", visible=False)
451
 
 
452
  with gr.Column():
453
+ adversarial_gallery = gr.Gallery(label="Adversarial", preview=False, show_download_button=True)
454
+ adversarial_output = gr.Label(num_top_classes=3, visible=False)
455
+ robust_accuracy = gr.Number(label="Robust Accuracy", precision=2)
456
+
457
+ with gr.Column():
458
+ delta_gallery = gr.Gallery(label="Added perturbation", preview=False, show_download_button=True)
459
+
460
+ bt_eval_pgd.click(clf_evasion_evaluate, inputs=[attack, model_type, model_url, model_input_channels, model_input_height, model_input_width,
461
+ model_num_classes, model_clip_values, model_upsample_scaling,
462
+ max_iter, eps, eps_steps, attack, attack, attack, attack, data_type],
463
+ outputs=[original_gallery, adversarial_gallery, delta_gallery, clean_accuracy,
464
+ robust_accuracy])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
465
 
466
+ with gr.Accordion("Adversarial Patch", open=False):
467
+ gr.Markdown("This attack crafts an adversarial patch that facilitates evasion.")
468
+
469
+ with gr.Row():
470
+
471
+ with gr.Column(scale=1):
472
+ attack = gr.Textbox(visible=True, value="Adversarial Patch", label="Attack", interactive=False)
473
+ max_iter = gr.Slider(minimum=1, maximum=1000, label="Max iterations", value=10)
474
+ x_location = gr.Slider(minimum=1, maximum=32, label="Location (x)", value=1)
475
+ y_location = gr.Slider(minimum=1, maximum=32, label="Location (y)", value=1)
476
+ patch_height = gr.Slider(minimum=1, maximum=32, label="Patch height", value=12)
477
+ patch_width = gr.Slider(minimum=1, maximum=32, label="Patch width", value=12)
478
+ eval_btn_patch = gr.Button("Evaluate")
479
 
480
+ # Evaluation Output. Visualisations of success/failures of running evaluation attacks.
481
+ with gr.Column(scale=3):
482
  with gr.Row():
 
483
  with gr.Column():
484
+ original_gallery = gr.Gallery(label="Original", preview=False, show_download_button=True)
485
+ clean_accuracy = gr.Number(label="Clean Accuracy", precision=2)
 
 
 
 
 
 
 
486
 
 
487
  with gr.Column():
488
+ adversarial_gallery = gr.Gallery(label="Adversarial", preview=False, show_download_button=True)
489
+ robust_accuracy = gr.Number(label="Robust Accuracy", precision=2)
490
+
491
+ with gr.Column():
492
+ delta_gallery = gr.Gallery(label="Patches", preview=False, show_download_button=True)
493
+
494
+ eval_btn_patch.click(clf_evasion_evaluate, inputs=[attack, model_type, model_url, model_input_channels, model_input_height, model_input_width,
495
+ model_num_classes, model_clip_values, model_upsample_scaling,
496
+ max_iter, eps, eps_steps, x_location, y_location, patch_height, patch_width, data_type],
497
+ outputs=[original_gallery, adversarial_gallery, delta_gallery, clean_accuracy,
498
+ robust_accuracy])
499
+
500
+ with gr.Accordion("Poisoning", open=False):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
501
 
502
+ with gr.Accordion("Backdoor"):
503
+
504
+ with gr.Row():
505
+ with gr.Column(scale=1):
506
+ attack = gr.Textbox(visible=True, value="Backdoor", label="Attack", interactive=False)
507
+ target_class = gr.Number(label="Target class", info="The class you wish to force the model to predict.",
508
+ minimum=1, maximum=9, value=1)
509
+ trigger_image = gr.Image(label="Trigger Image", value="./baby-on-board.png")
510
+ eval_btn_patch = gr.Button("Evaluate")
511
+ with gr.Column(scale=2):
512
+ poison_gallery = gr.Gallery(label="Poisoned", preview=False, show_download_button=True)
513
+ with gr.Column(scale=2):
514
+ clean_accuracy = gr.Number(label="Clean Accuracy", precision=2)
515
+ poison_success = gr.Number(label="Poison Success", precision=2)
516
 
517
+ eval_btn_patch.click(clf_poison_evaluate, inputs=[attack, model_type, trigger_image, target_class, data_type],
518
+ outputs=[poison_gallery, clean_accuracy, poison_success])
519
+
 
 
 
 
520
  if __name__ == "__main__":
521
 
522
  # during development, set debug=True
523
+ demo.launch(show_api=False, debug=True, share=False,
524
  server_name="0.0.0.0",
525
  server_port=7777,
526
  ssl_verify=False,
527
+ max_threads=20)
528
+ '''demo.launch(share=True, ssl_verify=False)'''
art_lfai.png ADDED
baby-on-board.png ADDED
data/imagenette2-320/train/n01440764/ILSVRC2012_val_00000293.JPEG ADDED
data/imagenette2-320/train/n01440764/ILSVRC2012_val_00002138.JPEG ADDED
data/imagenette2-320/train/n01440764/ILSVRC2012_val_00003014.JPEG ADDED
data/imagenette2-320/train/n01440764/ILSVRC2012_val_00006697.JPEG ADDED
data/imagenette2-320/train/n01440764/ILSVRC2012_val_00007197.JPEG ADDED
data/imagenette2-320/train/n01440764/ILSVRC2012_val_00009346.JPEG ADDED
data/imagenette2-320/train/n01440764/ILSVRC2012_val_00009379.JPEG ADDED
data/imagenette2-320/train/n01440764/ILSVRC2012_val_00009396.JPEG ADDED
data/imagenette2-320/train/n01440764/ILSVRC2012_val_00010306.JPEG ADDED
data/imagenette2-320/train/n01440764/ILSVRC2012_val_00011233.JPEG ADDED
data/imagenette2-320/train/n01440764/ILSVRC2012_val_00011993.JPEG ADDED
data/imagenette2-320/train/n01440764/ILSVRC2012_val_00012503.JPEG ADDED
requirements.txt CHANGED
@@ -1,9 +1,10 @@
1
- gradio
2
- adversarial-robustness-toolbox
3
  pandas
4
  jupyter
5
-
6
- torch==1.13.1
 
7
  tensorflow==2.10.1; sys_platform != "darwin"
8
  tensorflow-macos; sys_platform == "darwin"
9
- tensorflow-metal; sys_platform == "darwin"
 
 
 
 
 
1
  pandas
2
  jupyter
3
+ torch
4
+ torchvision
5
+ transformers
6
  tensorflow==2.10.1; sys_platform != "darwin"
7
  tensorflow-macos; sys_platform == "darwin"
8
+ tensorflow-metal; sys_platform == "darwin"
9
+ adversarial-robustness-toolbox
10
+ gradio==4.2
state_dicts/deit_cifar_base_model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c3add51bcd51ca3c1c7836d60cabf85798c8c551e8bc9c4450f4fb6cb3227421
3
+ size 22192555
state_dicts/deit_imagenette_poisoned_model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ead74cf5a180328dfb7fa179d91d51f79081f25eb7de7a146d0ab0cbc0dd01b
3
+ size 22192555