Spaces:

kieranfraser
/

example

Sleeping

App Files Files Community

Kieran Fraser commited on Dec 12, 2023

Commit

c6e9ef2

1 Parent(s): 223fcbd

Example ART GUI

Browse files

Signed-off-by: Kieran Fraser <Kieran.Fraser@ibm.com>

Files changed (18) hide show

app.py +455 -358
art_lfai.png +0 -0
baby-on-board.png +0 -0
data/imagenette2-320/train/n01440764/ILSVRC2012_val_00000293.JPEG +0 -0
data/imagenette2-320/train/n01440764/ILSVRC2012_val_00002138.JPEG +0 -0
data/imagenette2-320/train/n01440764/ILSVRC2012_val_00003014.JPEG +0 -0
data/imagenette2-320/train/n01440764/ILSVRC2012_val_00006697.JPEG +0 -0
data/imagenette2-320/train/n01440764/ILSVRC2012_val_00007197.JPEG +0 -0
data/imagenette2-320/train/n01440764/ILSVRC2012_val_00009346.JPEG +0 -0
data/imagenette2-320/train/n01440764/ILSVRC2012_val_00009379.JPEG +0 -0
data/imagenette2-320/train/n01440764/ILSVRC2012_val_00009396.JPEG +0 -0
data/imagenette2-320/train/n01440764/ILSVRC2012_val_00010306.JPEG +0 -0
data/imagenette2-320/train/n01440764/ILSVRC2012_val_00011233.JPEG +0 -0
data/imagenette2-320/train/n01440764/ILSVRC2012_val_00011993.JPEG +0 -0
data/imagenette2-320/train/n01440764/ILSVRC2012_val_00012503.JPEG +0 -0
requirements.txt +6 -5
state_dicts/deit_cifar_base_model.pt +3 -0
state_dicts/deit_imagenette_poisoned_model.pt +3 -0

app.py CHANGED Viewed

@@ -7,425 +7,522 @@ To run:
 - navigate to local URL e.g. http://127.0.0.1:7860
 '''
-import torch
 import numpy as np
-import pandas as pd
 from carbon_theme import Carbon
-import gradio as gr
 import os
 import matplotlib.pyplot as plt
 css = """
 .input-image { margin: auto !important }
 .plot-padding { padding: 20px; }
 """
-def extract_predictions(predictions_, conf_thresh):
-    coco_labels = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
-        'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
-        'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
-        'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',
-        'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
-        'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
-        'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard',
-        'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors',
-        'teddy bear', 'hair drier', 'toothbrush']
-    # Get the predicted class
-    predictions_class = [coco_labels[i] for i in list(predictions_["labels"])]
-    #  print("\npredicted classes:", predictions_class)
-    if len(predictions_class) < 1:
-        return [], [], []
-    # Get the predicted bounding boxes
-    predictions_boxes = [[(i[0], i[1]), (i[2], i[3])] for i in list(predictions_["boxes"])]
-    # Get the predicted prediction score
-    predictions_score = list(predictions_["scores"])
-    # print("predicted score:", predictions_score)
-    # Get a list of index with score greater than threshold
-    threshold = conf_thresh
-    predictions_t = [predictions_score.index(x) for x in predictions_score if x > threshold]
-    if len(predictions_t) > 0:
-        predictions_t = predictions_t  # [-1] #indices where score over threshold
-    else:
-        # no predictions esxceeding threshold
-        return [], [], []
-    # predictions in score order
-    predictions_boxes = [predictions_boxes[i] for i in predictions_t]
-    predictions_class = [predictions_class[i] for i in predictions_t]
-    predictions_scores = [predictions_score[i] for i in predictions_t]
-    return predictions_class, predictions_boxes, predictions_scores
-def plot_image_with_boxes(img, boxes, pred_cls, title):
-    import cv2
-    text_size = 1
-    text_th = 2
-    rect_th = 1
-    sections = []
-    for i in range(len(boxes)):
-        cv2.rectangle(img, (int(boxes[i][0][0]), int(boxes[i][0][1])), (int(boxes[i][1][0]), int(boxes[i][1][1])),
-                      color=(0, 255, 0), thickness=rect_th)
-        # Write the prediction class
-        cv2.putText(img, pred_cls[i], (int(boxes[i][0][0]), int(boxes[i][0][1])), cv2.FONT_HERSHEY_SIMPLEX, text_size,
-                    (0, 255, 0), thickness=text_th)
-        sections.append( ((int(boxes[i][0][0]),
-                           int(boxes[i][0][1]),
-                           int(boxes[i][1][0]),
-                           int(boxes[i][1][1])), (pred_cls[i])) )
-    return img.astype(np.uint8)
-def filter_boxes(predictions, conf_thresh):
-    dictionary = {}
-    boxes_list = []
-    scores_list = []
-    labels_list = []
-    for i in range(len(predictions[0]["boxes"])):
-        score = predictions[0]["scores"][i]
-        if score >= conf_thresh:
-            boxes_list.append(predictions[0]["boxes"][i])
-            scores_list.append(predictions[0]["scores"][[i]])
-            labels_list.append(predictions[0]["labels"][[i]])
-    dictionary["boxes"] = np.vstack(boxes_list)
-    dictionary["scores"] = np.hstack(scores_list)
-    dictionary["labels"] = np.hstack(labels_list)
-    y = [dictionary]
-    return y
-def basic_cifar10_model(overfit=False):
-    '''
-    Load an example CIFAR10 model
-    '''
-    from art.estimators.classification.pytorch import PyTorchClassifier
-    labels = ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']
-    path = './'
-    class Model(torch.nn.Module):
-        """
-        Create model for pytorch.
-        Here the model does not use maxpooling. Needed for certification tests.
-        """
-        def __init__(self):
-            super(Model, self).__init__()
-            self.conv = torch.nn.Conv2d(
-                in_channels=3, out_channels=16, kernel_size=(4, 4), dilation=(1, 1), padding=(0, 0), stride=(3, 3)
-            )
-            self.fullyconnected = torch.nn.Linear(in_features=1600, out_features=10)
-            self.relu = torch.nn.ReLU()
-            w_conv2d = np.load(
-                os.path.join(
-                    os.path.dirname(path),
-                    "utils/resources/models",
-                    "W_CONV2D_NO_MPOOL_CIFAR10.npy",
-                )
-            )
-            b_conv2d = np.load(
-                os.path.join(
-                    os.path.dirname(path),
-                    "utils/resources/models",
-                    "B_CONV2D_NO_MPOOL_CIFAR10.npy",
-                )
-            )
-            w_dense = np.load(
-                os.path.join(
-                    os.path.dirname(path),
-                    "utils/resources/models",
-                    "W_DENSE_NO_MPOOL_CIFAR10.npy",
-                )
-            )
-            b_dense = np.load(
-                os.path.join(
-                    os.path.dirname(path),
-                    "utils/resources/models",
-                    "B_DENSE_NO_MPOOL_CIFAR10.npy",
-                )
             )
-            self.conv.weight = torch.nn.Parameter(torch.Tensor(w_conv2d))
-            self.conv.bias = torch.nn.Parameter(torch.Tensor(b_conv2d))
-            self.fullyconnected.weight = torch.nn.Parameter(torch.Tensor(w_dense))
-            self.fullyconnected.bias = torch.nn.Parameter(torch.Tensor(b_dense))
-        # pylint: disable=W0221
-        # disable pylint because of API requirements for function
-        def forward(self, x):
-            """
-            Forward function to evaluate the model
-            :param x: Input to the model
-            :return: Prediction of the model
-            """
-            x = self.conv(x)
-            x = self.relu(x)
-            x = x.reshape(-1, 1600)
-            x = self.fullyconnected(x)
-            return x
-    # Define the network
-    model = Model()
-    # Define a loss function and optimizer
-    if overfit:
-        loss_fn = torch.nn.CrossEntropyLoss(reduction="sum")
-        optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=0.0)
-    else:
-        loss_fn = torch.nn.CrossEntropyLoss(reduction="sum")
-        optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
-    # Get classifier
-    jptc = PyTorchClassifier(
-        model=model, loss=loss_fn, optimizer=optimizer, input_shape=(3, 32, 32), nb_classes=10, clip_values=(0, 1), labels=labels
-    )
-    return jptc
-def det_evasion_evaluate(*args):
-    '''
-    Run a detection task evaluation
-    '''
-def clf_evasion_evaluate(*args):
-    '''
-    Run a classification task evaluation
-    '''
-def show_model_params(model_type):
     '''
     Show model parameters based on selected model type
     '''
-    if model_type!="Example CIFAR10" and model_type!="Example XView" and model_type!="CIFAR10 Overfit":
         return gr.Column(visible=True)
     return gr.Column(visible=False)
-def show_dataset_params(dataset_type):
-    '''
-    Show dataset parameters based on dataset type
-    '''
-    if dataset_type=="Example CIFAR10":
-        return [gr.Column(visible=False), gr.Row(visible=False), gr.Row(visible=False)]
-    elif dataset_type=="local":
-        return [gr.Column(visible=True), gr.Row(visible=True), gr.Row(visible=False)]
-    return [gr.Column(visible=True), gr.Row(visible=False), gr.Row(visible=True)]
-def pgd_show_label_output(dataset_type):
-    '''
-    Show PGD output component based on dataset type
-    '''
-    if dataset_type=="local":
-        return [gr.Label(visible=True), gr.Label(visible=True), gr.Number(visible=False), gr.Number(visible=False), gr.Number(visible=True)]
-    return [gr.Label(visible=False), gr.Label(visible=False), gr.Number(visible=True), gr.Number(visible=True), gr.Number(visible=True)]
-def pgd_update_epsilon(clip_values):
-    '''
-    Update max value of PGD epsilon slider based on model clip values
-    '''
-    if clip_values == 255:
-        return gr.Slider(minimum=0.0001, maximum=255, label="Epslion", value=55)
-    return gr.Slider(minimum=0.0001, maximum=1, label="Epslion", value=0.05)
-def patch_show_label_output(dataset_type):
-    '''
-    Show adversarial patch output components based on dataset type
-    '''
-    if dataset_type=="local":
-        return [gr.Label(visible=True), gr.Label(visible=True), gr.Number(visible=False), gr.Number(visible=False), gr.Number(visible=True)]
-    return [gr.Label(visible=False), gr.Label(visible=False), gr.Number(visible=True), gr.Number(visible=True), gr.Number(visible=True)]
 # e.g. To use a local alternative theme: carbon_theme = Carbon()
 carbon_theme = Carbon()
-with gr.Blocks(css=css, theme=carbon_theme) as demo:
     import art
     text = art.__version__
-    gr.Markdown(f"<h1>ART (v{text}) Gradio Example</h1>")
-    with gr.Tab("Info"):
-        gr.Markdown('This is step 1. Using the tabs, select a task for evaluation.')
-    with gr.Tab("Classification", elem_classes="task-tab"):
-        gr.Markdown("Classifying images with a set of categories.")
-        # Model and Dataset Selection
         with gr.Row():
-            # Model and Dataset type e.g. Torchvision, HuggingFace, local etc.
-            with gr.Column():
-                model_type = gr.Radio(label="Model type", choices=["Example CIFAR10", "Huggingface", "torchvision"],
-                                    value="Example CIFAR10")
-                dataset_type = gr.Radio(label="Dataset", choices=["Example CIFAR10", "Huggingface", "local"],
-                                    value="Example CIFAR10")
-            # Model parameters e.g. RESNET, VIT, input dimensions, clipping values etc.
-            with gr.Column(visible=False) as model_params:
-                model_path = gr.Textbox(placeholder="URL", label="Model path")
-                with gr.Row():
-                    with gr.Column():
-                        model_channels = gr.Textbox(placeholder="Integer, 3 for RGB images", label="Input Channels", value=3)
-                    with gr.Column():
-                        model_width = gr.Textbox(placeholder="Integer", label="Input Width", value=640)
-                with gr.Row():
-                    with gr.Column():
-                        model_height = gr.Textbox(placeholder="Integer", label="Input Height", value=480)
-                    with gr.Column():
-                        model_clip = gr.Radio(choices=[1, 255], label="Pixel clip", value=1)
-            # Dataset parameters e.g. Torchvision, HuggingFace, local etc.
-            with gr.Column(visible=False) as dataset_params:
-                with gr.Row() as local_image:
-                    image = gr.Image(sources=['upload'], type="pil", height=150, width=150, elem_classes="input-image")
-                with gr.Row() as hosted_image:
-                    dataset_path = gr.Textbox(placeholder="URL", label="Dataset path")
-                    dataset_split = gr.Textbox(placeholder="test", label="Dataset split")
-            model_type.change(show_model_params, model_type, model_params)
-            dataset_type.change(show_dataset_params, dataset_type, [dataset_params, local_image, hosted_image])
-        # Attack Selection
-        with gr.Row():
-            with gr.Tab("Info"):
-                gr.Markdown("This is step 2. Select the type of attack for evaluation.")
-            with gr.Tab("White Box"):
-                gr.Markdown("White box attacks assume the attacker has __full access__ to the model.")
-                with gr.Tab("Info"):
-                    gr.Markdown("This is step 3. Select the type of white-box attack to evaluate.")
-                with gr.Tab("Evasion"):
-                    gr.Markdown("Evasion attacks are deployed to cause a model to incorrectly classify or detect items/objects in an image.")
-                    with gr.Tab("Info"):
-                        gr.Markdown("This is step 4. Select the type of Evasion attack to evaluate.")
-                    with gr.Tab("Projected Gradient Descent"):
-                        gr.Markdown("This attack uses PGD to identify adversarial examples.")
                         with gr.Row():
                             with gr.Column():
-                                attack = gr.Textbox(visible=True, value="PGD", label="Attack", interactive=False)
-                                max_iter = gr.Slider(minimum=1, maximum=5000, label="Max iterations", value=1000)
-                                eps = gr.Slider(minimum=0.0001, maximum=1, label="Epslion", value=0.05)
-                                eps_steps = gr.Slider(minimum=0.001, maximum=1000, label="Epsilon steps", value=0.1)
-                                targeted = gr.Textbox(placeholder="Target label (integer)", label="Target")
-                                eval_btn_pgd = gr.Button("Evaluate")
-                                model_clip.change(pgd_update_epsilon, model_clip, eps)
-                            # Evaluation Output. Visualisations of success/failures of running evaluation attacks.
                             with gr.Column():
-                                with gr.Row():
-                                    with gr.Column():
-                                        original_gallery = gr.Gallery(label="Original", preview=True, show_download_button=True)
-                                        benign_output = gr.Label(num_top_classes=3, visible=False)
-                                        clean_accuracy = gr.Number(label="Clean Accuracy", precision=2)
-                                        quality_plot = gr.LinePlot(label="Gradient Quality", x='iteration', y='value', color='metric',
-                                                                   x_title='Iteration', y_title='Avg in Gradients (%)',
-                                                                   caption="""Illustrates the average percent of zero, infinity
-                                                                   or NaN gradients identified in images
-                                                                   across all batches.""", elem_classes="plot-padding", visible=False)
-                                    with gr.Column():
-                                        adversarial_gallery = gr.Gallery(label="Adversarial", preview=True, show_download_button=True)
-                                        adversarial_output = gr.Label(num_top_classes=3, visible=False)
-                                        robust_accuracy = gr.Number(label="Robust Accuracy", precision=2)
-                                        perturbation_added = gr.Number(label="Perturbation Added", precision=2)
-                                dataset_type.change(pgd_show_label_output, dataset_type, [benign_output, adversarial_output,
-                                                                                      clean_accuracy, robust_accuracy, perturbation_added])
-                                eval_btn_pgd.click(clf_evasion_evaluate, inputs=[attack, model_type, model_path, model_channels, model_height, model_width,
-                                                                             model_clip, max_iter, eps, eps_steps, targeted,
-                                                                             dataset_type, dataset_path, dataset_split, image],
-                                                    outputs=[original_gallery, benign_output, adversarial_gallery, adversarial_output, clean_accuracy,
-                                                             robust_accuracy, perturbation_added, quality_plot], api_name='patch')
-                        with gr.Row():
-                            clear_btn = gr.ClearButton([image, targeted, original_gallery, benign_output, clean_accuracy,
-                                                        adversarial_gallery, adversarial_output, robust_accuracy, perturbation_added])
-                    with gr.Tab("Adversarial Patch"):
-                        gr.Markdown("This attack crafts an adversarial patch that facilitates evasion.")
                         with gr.Row():
                             with gr.Column():
-                                attack = gr.Textbox(visible=True, value="Adversarial Patch", label="Attack", interactive=False)
-                                max_iter = gr.Slider(minimum=1, maximum=5000, label="Max iterations", value=100)
-                                x_location = gr.Slider(minimum=1, maximum=640, label="Location (x)", value=18)
-                                y_location = gr.Slider(minimum=1, maximum=480, label="Location (y)", value=18)
-                                patch_height = gr.Slider(minimum=1, maximum=640, label="Patch height", value=18)
-                                patch_width = gr.Slider(minimum=1, maximum=480, label="Patch width", value=18)
-                                targeted = gr.Textbox(placeholder="Target label (integer)", label="Target")
-                                eval_btn_patch = gr.Button("Evaluate")
-                                model_clip.change()
-                            # Evaluation Output. Visualisations of success/failures of running evaluation attacks.
                             with gr.Column():
-                                with gr.Row():
-                                    with gr.Column():
-                                        original_gallery = gr.Gallery(label="Original", preview=True, show_download_button=True)
-                                        benign_output = gr.Label(num_top_classes=3, visible=False)
-                                        clean_accuracy = gr.Number(label="Clean Accuracy", precision=2)
-                                    with gr.Column():
-                                        adversarial_gallery = gr.Gallery(label="Adversarial", preview=True, show_download_button=True)
-                                        adversarial_output = gr.Label(num_top_classes=3, visible=False)
-                                        robust_accuracy = gr.Number(label="Robust Accuracy", precision=2)
-                                        patch_image = gr.Image(label="Adversarial Patch")
-                                dataset_type.change(patch_show_label_output, dataset_type, [benign_output, adversarial_output,
-                                                                                      clean_accuracy, robust_accuracy, patch_image])
-                                eval_btn_patch.click(clf_evasion_evaluate, inputs=[attack, model_type, model_path, model_channels, model_height, model_width,
-                                                                             model_clip, max_iter, x_location, y_location, patch_height, patch_width, targeted,
-                                                                             dataset_type, dataset_path, dataset_split, image],
-                                                    outputs=[original_gallery, benign_output, adversarial_gallery, adversarial_output, clean_accuracy,
-                                                             robust_accuracy, patch_image])
-                        with gr.Row():
-                            clear_btn = gr.ClearButton([image, targeted, original_gallery, benign_output, clean_accuracy,
-                                                        adversarial_gallery, adversarial_output, robust_accuracy, patch_image])
-                with gr.Tab("Poisoning"):
-                    gr.Markdown("Coming soon.")
-            with gr.Tab("Black Box"):
-                gr.Markdown("Black box attacks assume the attacker __does not__ have full access to the model but can query it for predictions.")
-                with gr.Tab("Info"):
-                    gr.Markdown("This is step 3. Select the type of black-box attack to evaluate.")
-                with gr.Tab("Evasion"):
-                    gr.Markdown("Evasion attacks are deployed to cause a model to incorrectly classify or detect items/objects in an image.")
-                    with gr.Tab("Info"):
-                        gr.Markdown("This is step 4. Select the type of Evasion attack to evaluate.")
-                    with gr.Tab("HopSkipJump"):
-                        gr.Markdown("Coming soon.")
-                    with gr.Tab("Square Attack"):
-                        gr.Markdown("Coming soon.")
-            with gr.Tab("AutoAttack"):
-                gr.Markdown("Coming soon.")
 if __name__ == "__main__":
     # during development, set debug=True
-    '''demo.launch(show_api=False, debug=True, share=True,
                 server_name="0.0.0.0",
                 server_port=7777,
                 ssl_verify=False,
-                max_threads=20)'''
-    demo.launch()

 - navigate to local URL e.g. http://127.0.0.1:7860
 '''
+import gradio as gr
 import numpy as np
 from carbon_theme import Carbon
 import os
+import numpy as np
 import matplotlib.pyplot as plt
+import torch
+import transformers
+from art.estimators.classification.hugging_face import HuggingFaceClassifierPyTorch
+from art.attacks.evasion import ProjectedGradientDescentPyTorch, AdversarialPatchPyTorch
+from art.utils import load_dataset
+from art.attacks.poisoning import PoisoningAttackBackdoor
+from art.attacks.poisoning.perturbations import insert_image
+device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 css = """
 .input-image { margin: auto !important }
 .plot-padding { padding: 20px; }
 """
+def clf_evasion_evaluate(*args):
+    '''
+    Run a classification task evaluation
+    '''
+    attack = args[0]
+    model_type = args[1]
+    model_url = args[2]
+    model_channels = args[3]
+    model_height = args[4]
+    model_width = args[5]
+    model_classes = args[6]
+    model_clip = args[7]
+    model_upsample = args[8]
+    attack_max_iter = args[9]
+    attack_eps = args[10]
+    attack_eps_steps = args[11]
+    x_location = args[12]
+    y_location = args[13]
+    patch_height = args[14]
+    patch_width = args[15]
+    data_type = args[-1]
+    if model_type == "Example":
+        model = transformers.AutoModelForImageClassification.from_pretrained(
+            'facebook/deit-tiny-distilled-patch16-224',
+            ignore_mismatched_sizes=True,
+            num_labels=10
+        )
+        upsampler = torch.nn.Upsample(scale_factor=7, mode='nearest')
+        optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
+        loss_fn = torch.nn.CrossEntropyLoss()
+        hf_model = HuggingFaceClassifierPyTorch(
+            model=model,
+            loss=loss_fn,
+            optimizer=optimizer,
+            input_shape=(3, 32, 32),
+            nb_classes=10,
+            clip_values=(0, 1),
+            processor=upsampler
+        )
+        model_checkpoint_path = './state_dicts/deit_cifar_base_model.pt'
+        hf_model.model.load_state_dict(torch.load(model_checkpoint_path, map_location=device))
+    if data_type == "Example":
+        (x_train, y_train), (_, _), _, _ = load_dataset('cifar10')
+        x_train = np.transpose(x_train, (0, 3, 1, 2)).astype(np.float32)
+        y_train = np.argmax(y_train, axis=1)
+        classes = np.unique(y_train)
+        samples_per_class = 1
+        x_subset = []
+        y_subset = []
+        for c in classes:
+            indices = y_train == c
+            x_subset.append(x_train[indices][:samples_per_class])
+            y_subset.append(y_train[indices][:samples_per_class])
+        x_subset = np.concatenate(x_subset)
+        y_subset = np.concatenate(y_subset)
+        label_names = [
+            'airplane',
+            'automobile',
+            'bird',
+            'cat',
+            'deer',
+            'dog',
+            'frog',
+            'horse',
+            'ship',
+            'truck',
+        ]
+    outputs = hf_model.predict(x_subset)
+    clean_preds = np.argmax(outputs, axis=1)
+    clean_acc = np.mean(clean_preds == y_subset)
+    benign_gallery_out = []
+    for i, im in enumerate(x_subset):
+        benign_gallery_out.append(( im.transpose(1,2,0), label_names[np.argmax(outputs[i])] ))
+    if attack == "PGD":
+        attacker = ProjectedGradientDescentPyTorch(hf_model, max_iter=attack_max_iter,
+                                                eps=attack_eps, eps_step=attack_eps_steps)
+        x_adv = attacker.generate(x_subset)
+        outputs = hf_model.predict(x_adv)
+        adv_preds = np.argmax(outputs, axis=1)
+        adv_acc = np.mean(adv_preds == y_subset)
+        adv_gallery_out = []
+        for i, im in enumerate(x_adv):
+            adv_gallery_out.append(( im.transpose(1,2,0), label_names[np.argmax(outputs[i])] ))
+        delta = ((x_subset - x_adv) + 8/255) * 10
+        delta_gallery_out = delta.transpose(0, 2, 3, 1)
+    if attack == "Adversarial Patch":
+        scale_min = 0.3
+        scale_max = 1.0
+        rotation_max = 0
+        learning_rate = 5000.
+        attacker = AdversarialPatchPyTorch(hf_model, scale_max=scale_max,
+                                           scale_min=scale_min,
+                                           rotation_max=rotation_max,
+                                           learning_rate=learning_rate,
+                                           max_iter=attack_max_iter, patch_type='square',
+                                                patch_location=(x_location, y_location),
+                                                patch_shape=(3, patch_height, patch_width))
+        patch, _ = attacker.generate(x_subset)
+        x_adv = attacker.apply_patch(x_subset, scale=0.3)
+        outputs = hf_model.predict(x_adv)
+        adv_preds = np.argmax(outputs, axis=1)
+        adv_acc = np.mean(adv_preds == y_subset)
+        adv_gallery_out = []
+        for i, im in enumerate(x_adv):
+            adv_gallery_out.append(( im.transpose(1,2,0), label_names[np.argmax(outputs[i])] ))
+        delta_gallery_out = np.expand_dims(patch, 0).transpose(0,2,3,1)
+    return benign_gallery_out, adv_gallery_out, delta_gallery_out, clean_acc, adv_acc
+def clf_poison_evaluate(*args):
+    attack = args[0]
+    model_type = args[1]
+    trigger_image = args[2]
+    target_class = args[3]
+    data_type = args[-1]
+    if model_type == "Example":
+        model = transformers.AutoModelForImageClassification.from_pretrained(
+            'facebook/deit-tiny-distilled-patch16-224',
+            ignore_mismatched_sizes=True,
+            num_labels=10
+        )
+        optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
+        loss_fn = torch.nn.CrossEntropyLoss()
+        hf_model = HuggingFaceClassifierPyTorch(
+            model=model,
+            loss=loss_fn,
+            optimizer=optimizer,
+            input_shape=(3, 224, 224),
+            nb_classes=10,
+            clip_values=(0, 1),
+        )
+    if data_type == "Example":
+        import torchvision
+        transform = torchvision.transforms.Compose([
+            torchvision.transforms.Resize((224, 224)),
+            torchvision.transforms.ToTensor(),
+        ])
+        train_dataset = torchvision.datasets.ImageFolder(root="./data/imagenette2-320/train", transform=transform)
+        labels = np.asarray(train_dataset.targets)
+        classes = np.unique(labels)
+        samples_per_class = 100
+        x_subset = []
+        y_subset = []
+        for c in classes:
+            indices = np.where(labels == c)[0][:samples_per_class]
+            for i in indices:
+                x_subset.append(train_dataset[i][0])
+                y_subset.append(train_dataset[i][1])
+        x_subset = np.stack(x_subset)
+        y_subset = np.asarray(y_subset)
+        label_names = [
+            'fish',
+            'dog',
+            'cassette player',
+            'chainsaw',
+            'church',
+            'french horn',
+            'garbage truck',
+            'gas pump',
+            'golf ball',
+            'parachutte',
+        ]
+    if attack == "Backdoor":
+        from PIL import Image
+        im = Image.fromarray(trigger_image)
+        im.save("./tmp.png")
+        def poison_func(x):
+            return insert_image(
+                x,
+                backdoor_path='./tmp.png',
+                channels_first=True,
+                random=False,
+                x_shift=0,
+                y_shift=0,
+                size=(32, 32),
+                mode='RGB',
+                blend=0.8
             )
+        backdoor = PoisoningAttackBackdoor(poison_func)
+        source_class = 0
+        poison_percent = 0.5
+        x_poison = np.copy(x_subset)
+        y_poison = np.copy(y_subset)
+        is_poison = np.zeros(len(x_subset)).astype(bool)
+        indices = np.where(y_subset == source_class)[0]
+        num_poison = int(poison_percent * len(indices))
+        for i in indices[:num_poison]:
+            x_poison[i], _ = backdoor.poison(x_poison[i], [])
+            y_poison[i] = target_class
+            is_poison[i] = True
+        poison_indices = np.where(is_poison)[0]
+        hf_model.fit(x_poison, y_poison, nb_epochs=2)
+        clean_x = x_poison[~is_poison]
+        clean_y = y_poison[~is_poison]
+        outputs = hf_model.predict(clean_x)
+        clean_preds = np.argmax(outputs, axis=1)
+        clean_acc = np.mean(clean_preds == clean_y)
+        poison_x = x_poison[is_poison]
+        poison_y = y_poison[is_poison]
+        outputs = hf_model.predict(poison_x)
+        poison_preds = np.argmax(outputs, axis=1)
+        poison_acc = np.mean(poison_preds == poison_y)
+        poison_out = []
+        for i, im in enumerate(poison_x):
+            poison_out.append( (im.transpose(1,2,0), label_names[poison_preds[i]]) )
+        return poison_out, clean_acc, poison_acc
+def show_params(type):
     '''
     Show model parameters based on selected model type
     '''
+    if type!="Example":
         return gr.Column(visible=True)
     return gr.Column(visible=False)
+def run_inference(*args):
+    model_type = args[0]
+    model_url = args[1]
+    model_channels = args[2]
+    model_height = args[3]
+    model_width = args[4]
+    model_classes = args[5]
+    model_clip = args[6]
+    model_upsample = args[7]
+    data_type = args[8]
+    if model_type == "Example":
+        model = transformers.AutoModelForImageClassification.from_pretrained(
+            'facebook/deit-tiny-distilled-patch16-224',
+            ignore_mismatched_sizes=True,
+            num_labels=10
+        )
+        upsampler = torch.nn.Upsample(scale_factor=7, mode='nearest')
+        optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
+        loss_fn = torch.nn.CrossEntropyLoss()
+        hf_model = HuggingFaceClassifierPyTorch(
+            model=model,
+            loss=loss_fn,
+            optimizer=optimizer,
+            input_shape=(3, 32, 32),
+            nb_classes=10,
+            clip_values=(0, 1),
+            processor=upsampler
+        )
+        model_checkpoint_path = './state_dicts/deit_cifar_base_model.pt'
+        hf_model.model.load_state_dict(torch.load(model_checkpoint_path, map_location=device))
+    if data_type == "Example":
+        (x_train, y_train), (_, _), _, _ = load_dataset('cifar10')
+        x_train = np.transpose(x_train, (0, 3, 1, 2)).astype(np.float32)
+        y_train = np.argmax(y_train, axis=1)
+        classes = np.unique(y_train)
+        samples_per_class = 5
+        x_subset = []
+        y_subset = []
+        for c in classes:
+            indices = y_train == c
+            x_subset.append(x_train[indices][:samples_per_class])
+            y_subset.append(y_train[indices][:samples_per_class])
+        x_subset = np.concatenate(x_subset)
+        y_subset = np.concatenate(y_subset)
+        label_names = [
+            'airplane',
+            'automobile',
+            'bird',
+            'cat',
+            'deer',
+            'dog',
+            'frog',
+            'horse',
+            'ship',
+            'truck',
+        ]
+    outputs = hf_model.predict(x_subset)
+    clean_preds = np.argmax(outputs, axis=1)
+    clean_acc = np.mean(clean_preds == y_subset)
+    gallery_out = []
+    for i, im in enumerate(x_subset):
+        gallery_out.append(( im.transpose(1,2,0), label_names[np.argmax(outputs[i])] ))
+    return gallery_out, clean_acc
 # e.g. To use a local alternative theme: carbon_theme = Carbon()
 carbon_theme = Carbon()
+with gr.Blocks(css=css, theme=gr.themes.Base()) as demo:
     import art
     text = art.__version__
+    with gr.Row():
+        with gr.Column(scale=1):
+            gr.Image(value="./art_lfai.png", show_label=False, show_download_button=False, width=100)
+        with gr.Column(scale=20):
+            gr.Markdown(f"<h1>Red-teaming HuggingFace with ART (v{text})</h1>", elem_classes="plot-padding")
+    gr.Markdown('''This app guides you through a common workflow for assessing the robustness
+                of HuggingFace models using standard datasets and state-of-the-art adversarial attacks
+                found within the Adversarial Robustness Toolbox (ART).<br/><br/>Follow the instructions in each
+                step below to carry out your own evaluation and determine the risks associated with using
+                some of your favorite models! <b>#redteaming</b> <b>#trustworthyAI</b>''')
+    # Model and Dataset Selection
+    with gr.Accordion("1. Model selection", open=False):
+        gr.Markdown("Select a Hugging Face model to launch an adversarial attack against.")
+        model_type = gr.Radio(label="Hugging Face Model", choices=["Example", "Other"], value="Example")
+        with gr.Column(visible=False) as other_model:
+            model_url = gr.Text(label="Model URL",
+                    placeholder="e.g. facebook/deit-tiny-distilled-patch16-224",
+                    value='facebook/deit-tiny-distilled-patch16-224')
+            model_input_channels = gr.Text(label="Input channels", value=3)
+            model_input_height = gr.Text(label="Input height", value=32)
+            model_input_width = gr.Text(label="Input width", value=32)
+            model_num_classes = gr.Text(label="Number of classes", value=10)
+            model_clip_values = gr.Radio(label="Clip values", choices=[1, 255], value=1)
+            model_upsample_scaling = gr.Slider(label="Upsample scale factor", minimum=1, maximum=10, value=7)
+        model_type.change(show_params, model_type, other_model)
+    with gr.Accordion("2. Data selection", open=False):
+        gr.Markdown("This section enables you to select a dataset for evaluation or upload your own image.")
+        data_type = gr.Radio(label="Hugging Face dataset", choices=["Example", "URL", "Local"], value="Example")
+        with gr.Column(visible=False) as other_dataset:
+            gr.Markdown("Coming soon.")
+        data_type.change(show_params, data_type, other_dataset)
+    with gr.Accordion("3. Model inference", open=False):
         with gr.Row():
+            with gr.Column(scale=1):
+                preds_gallery = gr.Gallery(label="Predictions", preview=False, show_download_button=True)
+            with gr.Column(scale=2):
+                clean_accuracy = gr.Number(label="Clean accuracy",
+                                        info="The accuracy achieved by the model in normal (non-adversarial) conditions.")
+                bt_run_inference = gr.Button("Run inference")
+                bt_clear = gr.ClearButton(components=[preds_gallery, clean_accuracy])
+        bt_run_inference.click(run_inference, inputs=[model_type, model_url, model_input_channels, model_input_height, model_input_width,
+                                                      model_num_classes, model_clip_values, model_upsample_scaling, data_type],
+                               outputs=[preds_gallery, clean_accuracy])
+    # Attack Selection
+    with gr.Accordion("4. Run attack", open=False):
+        gr.Markdown("In this section you can select the type of adversarial attack you wish to deploy against your selected model.")
+        with gr.Accordion("Evasion", open=False):
+            gr.Markdown("Evasion attacks are deployed to cause a model to incorrectly classify or detect items/objects in an image.")
+            with gr.Accordion("Projected Gradient Descent", open=False):
+                gr.Markdown("This attack uses PGD to identify adversarial examples.")
+                with gr.Row():
+                    with gr.Column(scale=1):
+                        attack = gr.Textbox(visible=True, value="PGD", label="Attack", interactive=False)
+                        max_iter = gr.Slider(minimum=1, maximum=1000, label="Max iterations", value=10)
+                        eps = gr.Slider(minimum=0.0001, maximum=255, label="Epslion", value=8/255)
+                        eps_steps = gr.Slider(minimum=0.0001, maximum=255, label="Epsilon steps", value=1/255)
+                        bt_eval_pgd = gr.Button("Evaluate")
+                    # Evaluation Output. Visualisations of success/failures of running evaluation attacks.
+                    with gr.Column(scale=3):
                         with gr.Row():
                             with gr.Column():
+                                original_gallery = gr.Gallery(label="Original", preview=False, show_download_button=True)
+                                benign_output = gr.Label(num_top_classes=3, visible=False)
+                                clean_accuracy = gr.Number(label="Clean Accuracy", precision=2)
+                                quality_plot = gr.LinePlot(label="Gradient Quality", x='iteration', y='value', color='metric',
+                                                            x_title='Iteration', y_title='Avg in Gradients (%)',
+                                                            caption="""Illustrates the average percent of zero, infinity
+                                                            or NaN gradients identified in images
+                                                            across all batches.""", elem_classes="plot-padding", visible=False)
                             with gr.Column():
+                                adversarial_gallery = gr.Gallery(label="Adversarial", preview=False, show_download_button=True)
+                                adversarial_output = gr.Label(num_top_classes=3, visible=False)
+                                robust_accuracy = gr.Number(label="Robust Accuracy", precision=2)
+                            with gr.Column():
+                                delta_gallery = gr.Gallery(label="Added perturbation", preview=False, show_download_button=True)
+                    bt_eval_pgd.click(clf_evasion_evaluate, inputs=[attack, model_type, model_url, model_input_channels, model_input_height, model_input_width,
+                                                                    model_num_classes, model_clip_values, model_upsample_scaling,
+                                                                    max_iter, eps, eps_steps, attack, attack, attack, attack, data_type],
+                                                            outputs=[original_gallery, adversarial_gallery, delta_gallery, clean_accuracy,
+                                                                    robust_accuracy])
+            with gr.Accordion("Adversarial Patch", open=False):
+                gr.Markdown("This attack crafts an adversarial patch that facilitates evasion.")
+                with gr.Row():
+                    with gr.Column(scale=1):
+                        attack = gr.Textbox(visible=True, value="Adversarial Patch", label="Attack", interactive=False)
+                        max_iter = gr.Slider(minimum=1, maximum=1000, label="Max iterations", value=10)
+                        x_location = gr.Slider(minimum=1, maximum=32, label="Location (x)", value=1)
+                        y_location = gr.Slider(minimum=1, maximum=32, label="Location (y)", value=1)
+                        patch_height = gr.Slider(minimum=1, maximum=32, label="Patch height", value=12)
+                        patch_width = gr.Slider(minimum=1, maximum=32, label="Patch width", value=12)
+                        eval_btn_patch = gr.Button("Evaluate")
+                    # Evaluation Output. Visualisations of success/failures of running evaluation attacks.
+                    with gr.Column(scale=3):
                         with gr.Row():
                             with gr.Column():
+                                original_gallery = gr.Gallery(label="Original", preview=False, show_download_button=True)
+                                clean_accuracy = gr.Number(label="Clean Accuracy", precision=2)
                             with gr.Column():
+                                adversarial_gallery = gr.Gallery(label="Adversarial", preview=False, show_download_button=True)
+                                robust_accuracy = gr.Number(label="Robust Accuracy", precision=2)
+                            with gr.Column():
+                                delta_gallery = gr.Gallery(label="Patches", preview=False, show_download_button=True)
+                    eval_btn_patch.click(clf_evasion_evaluate, inputs=[attack, model_type, model_url, model_input_channels, model_input_height, model_input_width,
+                                                                    model_num_classes, model_clip_values, model_upsample_scaling,
+                                                                    max_iter, eps, eps_steps, x_location, y_location, patch_height, patch_width, data_type],
+                                                            outputs=[original_gallery, adversarial_gallery, delta_gallery, clean_accuracy,
+                                                                    robust_accuracy])
+        with gr.Accordion("Poisoning", open=False):
+            with gr.Accordion("Backdoor"):
+                with gr.Row():
+                    with gr.Column(scale=1):
+                        attack = gr.Textbox(visible=True, value="Backdoor", label="Attack", interactive=False)
+                        target_class = gr.Number(label="Target class", info="The class you wish to force the model to predict.",
+                                                    minimum=1, maximum=9, value=1)
+                        trigger_image = gr.Image(label="Trigger Image",  value="./baby-on-board.png")
+                        eval_btn_patch = gr.Button("Evaluate")
+                    with gr.Column(scale=2):
+                        poison_gallery = gr.Gallery(label="Poisoned", preview=False, show_download_button=True)
+                    with gr.Column(scale=2):
+                        clean_accuracy = gr.Number(label="Clean Accuracy", precision=2)
+                        poison_success = gr.Number(label="Poison Success", precision=2)
+                eval_btn_patch.click(clf_poison_evaluate, inputs=[attack, model_type, trigger_image, target_class, data_type],
+                            outputs=[poison_gallery, clean_accuracy, poison_success])
 if __name__ == "__main__":
     # during development, set debug=True
+    demo.launch(show_api=False, debug=True, share=False,
                 server_name="0.0.0.0",
                 server_port=7777,
                 ssl_verify=False,
+                max_threads=20)
+    '''demo.launch(share=True, ssl_verify=False)'''

art_lfai.png ADDED Viewed

baby-on-board.png ADDED Viewed

data/imagenette2-320/train/n01440764/ILSVRC2012_val_00000293.JPEG ADDED Viewed

data/imagenette2-320/train/n01440764/ILSVRC2012_val_00002138.JPEG ADDED Viewed

data/imagenette2-320/train/n01440764/ILSVRC2012_val_00003014.JPEG ADDED Viewed

data/imagenette2-320/train/n01440764/ILSVRC2012_val_00006697.JPEG ADDED Viewed

data/imagenette2-320/train/n01440764/ILSVRC2012_val_00007197.JPEG ADDED Viewed

data/imagenette2-320/train/n01440764/ILSVRC2012_val_00009346.JPEG ADDED Viewed

data/imagenette2-320/train/n01440764/ILSVRC2012_val_00009379.JPEG ADDED Viewed

data/imagenette2-320/train/n01440764/ILSVRC2012_val_00009396.JPEG ADDED Viewed

data/imagenette2-320/train/n01440764/ILSVRC2012_val_00010306.JPEG ADDED Viewed

data/imagenette2-320/train/n01440764/ILSVRC2012_val_00011233.JPEG ADDED Viewed

data/imagenette2-320/train/n01440764/ILSVRC2012_val_00011993.JPEG ADDED Viewed

data/imagenette2-320/train/n01440764/ILSVRC2012_val_00012503.JPEG ADDED Viewed

requirements.txt CHANGED Viewed

@@ -1,9 +1,10 @@
-gradio
-adversarial-robustness-toolbox
 pandas
 jupyter
-torch==1.13.1
 tensorflow==2.10.1; sys_platform != "darwin"
 tensorflow-macos; sys_platform == "darwin"
-tensorflow-metal; sys_platform == "darwin"

 pandas
 jupyter
+torch
+torchvision
+transformers
 tensorflow==2.10.1; sys_platform != "darwin"
 tensorflow-macos; sys_platform == "darwin"
+tensorflow-metal; sys_platform == "darwin"
+adversarial-robustness-toolbox
+gradio==4.2

state_dicts/deit_cifar_base_model.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c3add51bcd51ca3c1c7836d60cabf85798c8c551e8bc9c4450f4fb6cb3227421
+size 22192555

state_dicts/deit_imagenette_poisoned_model.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7ead74cf5a180328dfb7fa179d91d51f79081f25eb7de7a146d0ab0cbc0dd01b
+size 22192555