Spaces:

chrisjay
/

mnist-adversarial

Runtime error

App Files Files Community

chrisjay commited on Jun 27, 2022

Commit

f240072

•

1 Parent(s): 475a212

saving to dataset

Browse files

Files changed (3) hide show

.gitignore +3 -0
app.py +118 -18
utils.py +44 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,3 @@

+__pycache__/*
+data_local/*
+flagged/*

app.py CHANGED Viewed

@@ -2,9 +2,12 @@ import os
 import torch
 import gradio as gr
 import torchvision
 import torch.nn as nn
 import torch.nn.functional as F
 import torch.optim as optim
 n_epochs = 3
@@ -13,8 +16,17 @@ batch_size_test = 1000
 learning_rate = 0.01
 momentum = 0.5
 log_interval = 10
 random_seed = 1
 torch.backends.cudnn.enabled = False
 torch.manual_seed(random_seed)
@@ -123,6 +135,13 @@ if os.path.exists(optimizer_state_dict):
 def image_classifier(inp):
     input_image = torchvision.transforms.ToTensor()(inp).unsqueeze(0)
     with torch.no_grad():
@@ -134,21 +153,102 @@ def image_classifier(inp):
             confidences.update({s:v})
         return confidences
-TITLE = "MNIST Adversarial: Try to fool the MNIST model"
-description = """This project is about dynamic adversarial data collection (DADC).
-The basic idea is to do data collection, but specifically collect “adversarial data”, the kind of data that is difficult for a model to predict correctly.
-This kind of data is presumably the most valuable for a model, so this can be helpful in low-resource settings where data is hard to collect and label.
-### What to do:
-- Draw a number from 0-9.
-- Click `Submit` and see the model's prediciton.
-- If the model misclassifies it, Flag that example.
-- This will add your (adversarial) example to a dataset on which the model will be trained later.
-"""
-gr.Interface(fn=image_classifier,
-            inputs=gr.Image(source="canvas",shape=(28,28),invert_colors=True,image_mode="L",type="pil"),
-            outputs=gr.outputs.Label(num_top_classes=10),
-            allow_flagging="manual",
-            title = TITLE,
-            description=description).launch()

 import torch
 import gradio as gr
 import torchvision
+from utils import *
 import torch.nn as nn
 import torch.nn.functional as F
 import torch.optim as optim
+from huggingface_hub import Repository, upload_file
 n_epochs = 3
 learning_rate = 0.01
 momentum = 0.5
 log_interval = 10
 random_seed = 1
+REPOSITORY_DIR = "data"
+LOCAL_DIR = 'data_local'
+os.makedirs(LOCAL_DIR,exist_ok=True)
+HF_TOKEN = os.getenv("HF_TOKEN")
+HF_DATASET ="mnist-adversarial-dataset"
 torch.backends.cudnn.enabled = False
 torch.manual_seed(random_seed)
 def image_classifier(inp):
+    """
+    It takes an image as input and returns a dictionary of class labels and their corresponding
+    confidence scores.
+    :param inp: the image to be classified
+    :return: A dictionary of the class index and the confidence value.
+    """
     input_image = torchvision.transforms.ToTensor()(inp).unsqueeze(0)
     with torch.no_grad():
             confidences.update({s:v})
         return confidences
+def flag(input_image,correct_result):
+    # take an image, the wrong result, the correct result.
+    # push to dataset.
+    # get size of current dataset
+    # Write audio to file
+    metadata_name = get_unique_name()
+    SAVE_FILE_DIR = os.path.join(LOCAL_DIR,metadata_name)
+    os.makedirs(SAVE_FILE_DIR,exist_ok=True)
+    image_output_filename = os.path.join(SAVE_FILE_DIR,'image.png')
+    try:
+        input_image.save(image_output_filename)
+    except Exception:
+        raise Exception(f"Had issues saving PIL image to file")
+    # Write metadata.json to file
+    json_file_path = os.path.join(SAVE_FILE_DIR,'metadata.jsonl')
+    metadata= {'id':metadata_name,'file_name':'image.png',
+                'correct_number':correct_result
+                }
+    dump_json(metadata,json_file_path)
+    # Simply upload the audio file and metadata using the hub's upload_file
+    # Upload the image
+    repo_image_path = os.path.join(REPOSITORY_DIR,os.path.join(metadata_name,'image.png'))
+    _ = upload_file(path_or_fileobj = image_output_filename,
+                path_in_repo =repo_image_path,
+                repo_id=f'chrisjay/{HF_DATASET}',
+                repo_type='dataset',
+                token=HF_TOKEN
+            )
+    # Upload the metadata
+    repo_json_path = os.path.join(REPOSITORY_DIR,os.path.join(metadata_name,'metadata.jsonl'))
+    _ = upload_file(path_or_fileobj = json_file_path,
+                path_in_repo =repo_json_path,
+                repo_id=f'chrisjay/{HF_DATASET}',
+                repo_type='dataset',
+                token=HF_TOKEN
+            )
+    output = f'<div> Successfully saved to flagged dataset. </div>'
+    return output
+def main():
+    TITLE = "# MNIST Adversarial: Try to fool this MNIST model"
+    description = """This project is about dynamic adversarial data collection (DADC).
+    The basic idea is to do data collection by collecting “adversarial data”, the kind of data that is difficult for a model to predict correctly.
+    This kind of data is presumably the most valuable for a model, so this can be helpful in low-resource settings where data is hard to collect and label.
+    ### What to do:
+    - Draw a number from 0-9.
+    - Click `Submit` and see the model's prediciton.
+    - If the model misclassifies it, Flag that example.
+    - This will add your (adversarial) example to a dataset on which the model will be trained later.
+    """
+    MODEL_IS_WRONG = """
+    > Did the model get it wrong? Choose the correct prediction below and flag it.
+    When you flag it, the instance is saved to our dataset and the model is trained on it.
+    """
+    #block = gr.Blocks(css=BLOCK_CSS)
+    block = gr.Blocks()
+    with block:
+        gr.Markdown(TITLE)
+        with gr.Tabs():
+            gr.Markdown(description)
+            with gr.TabItem('MNIST'):
+                with gr.Row():
+                    image_input =gr.inputs.Image(source="canvas",shape=(28,28),invert_colors=True,image_mode="L",type="pil")
+                    label_output = gr.outputs.Label(num_top_classes=10)
+                submit = gr.Button("Submit")
+                gr.Markdown(MODEL_IS_WRONG)
+                number_dropdown = gr.Dropdown(choices=[i for i in range(10)],type='value',default=None,label="What was the correct prediction?")
+                flag_btn = gr.Button("Flag")
+                output_result = gr.outputs.HTML()
+                submit.click(image_classifier,inputs = [image_input],outputs=[label_output])
+                flag_btn.click(flag,inputs=[image_input,number_dropdown],outputs=[output_result])
+    block.launch()
+if __name__ == "__main__":
+    main()

utils.py ADDED Viewed

	@@ -0,0 +1,44 @@

+import json
+import hashlib
+import random
+import string
+def get_unique_name():
+    return ''.join([random.choice(string.ascii_letters
+            + string.digits) for n in range(32)])
+def read_json_lines(file):
+    with open(file,'r',encoding="utf8") as f:
+        lines = f.readlines()
+        data=[]
+        for l in lines:
+            data.append(json.loads(l))
+        return data
+def json_dump(thing):
+    return json.dumps(thing,
+                        ensure_ascii=False,
+                        sort_keys=True,
+                        indent=None,
+                        separators=(',', ':'))
+def get_hash(thing): # stable-hashing
+    return str(hashlib.md5(json_dump(thing).encode('utf-8')).hexdigest())
+def dump_json(thing,file):
+    with open(file,'w+',encoding="utf8") as f:
+        json.dump(thing,f)
+def read_json_lines(file):
+    with open(file,'r',encoding="utf8") as f:
+        lines = f.readlines()
+        data=[]
+        for l in lines:
+            data.append(json.loads(l))
+        return data