Spaces:

facebook
/

vc1-base

Running

App Files Files Community

sneha commited on Apr 10, 2023

Commit

46f48ca

•

1 Parent(s): def57e6

change attn map appearance, simplify

Browse files

Files changed (2) hide show

app.py +11 -29
attn_helper.py +2 -4

app.py CHANGED Viewed

@@ -8,7 +8,6 @@ import torch
 import matplotlib.pyplot as plt
 from attn_helper import VITAttentionGradRollout, overlay_attn
 import vc_models
-#import eaif_models
 import torchvision
@@ -18,7 +17,6 @@ MODEL_DIR=os.path.join(os.path.dirname(eai_filepath),'model_ckpts')
 if not os.path.isdir(MODEL_DIR):
     os.mkdir(MODEL_DIR)
 FILENAME = "config.yaml"
 BASE_MODEL_TUPLE = None
 LARGE_MODEL_TUPLE = None
@@ -31,8 +29,6 @@ def get_model(model_name):
         model_cfg = omegaconf.OmegaConf.load(
             hf_hub_download(repo_id=repo_name, filename=FILENAME,token=HF_TOKEN)
         )
-        # model_cfg['model']['checkpoint_path'] = None
-        # model_cfg['model']['checkpoint_path'] = 'model_ckpts/vc1_vitb.pth'
         BASE_MODEL_TUPLE = utils.instantiate(model_cfg)
         BASE_MODEL_TUPLE[0].eval()
         model =  BASE_MODEL_TUPLE
@@ -41,8 +37,6 @@ def get_model(model_name):
         model_cfg = omegaconf.OmegaConf.load(
             hf_hub_download(repo_id=repo_name, filename=FILENAME,token=HF_TOKEN)
         )
-        # model_cfg['model']['checkpoint_path'] = None
-        # model_cfg['model']['checkpoint_path'] = 'model_ckpts/vc1_vitb.pth'
         LARGE_MODEL_TUPLE = utils.instantiate(model_cfg)
         LARGE_MODEL_TUPLE[0].eval()
         model =  LARGE_MODEL_TUPLE
@@ -51,7 +45,7 @@ def get_model(model_name):
     elif model_name == 'vc1-large':
         model = LARGE_MODEL_TUPLE
-    return model #model,embedding_dim,transform,metadata
 def download_bin(model):
     bin_file = ""
@@ -61,14 +55,15 @@ def download_bin(model):
         bin_file = 'vc1_vitb.pth'
     else:
         raise NameError("model not found: " + model)
-    repo_name = 'facebook/' + model
     bin_path = os.path.join(MODEL_DIR,bin_file)
     if not os.path.isfile(bin_path):
         model_bin = hf_hub_download(repo_id=repo_name, filename='pytorch_model.bin',local_dir=MODEL_DIR,local_dir_use_symlinks=True,token=HF_TOKEN)
         os.rename(model_bin, bin_path)
-def run_attn(input_img, model="vc1-base",fusion="min"):
     download_bin(model)
     model, embedding_dim, transform, metadata = get_model(model)
     if input_img.shape[0] != 3:
@@ -80,33 +75,20 @@ def run_attn(input_img, model="vc1-base",fusion="min"):
     input_img = resize_transform(input_img)
     x = transform(input_img)
-    attention_rollout = VITAttentionGradRollout(model,head_fusion=fusion)
     y = model(x)
     mask = attention_rollout.get_attn_mask()
     attn_img = overlay_attn(input_img[0].permute(1,2,0),mask)
-    fig = plt.figure()
-    ax = fig.subplots()
-    print(y.shape)
-    im = ax.matshow(y.detach().numpy().reshape(16,-1))
-    plt.colorbar(im)
-    return attn_img, fig
 model_type = gr.Dropdown(
             ["vc1-base", "vc1-large"], label="Model Size", value="vc1-base")
 input_img = gr.Image(shape=(250,250))
-input_button = gr.Radio(["min", "max", "mean"], value="min",label="Attention Head Fusion", info="How to combine the last layer attention across all 12 heads of the transformer.")
 output_img = gr.Image(shape=(250,250))
-output_plot = gr.Plot()
-css = ".output-image, .input-image, .image-preview {height: 600px !important}"
-markdown ="This is a demo for the Visual Cortex models. When passed an image input, it displays the attention of the last layer of the transformer.\n \
-         The user can decide how the attention heads will be combined. \
-         Along with the attention heatmap, it also displays the embedding values reshaped to a 16x48 for VC1-Base or 16x64 grid for VC1-Large."
-demo = gr.Interface(fn=run_attn, title="Visual Cortex Base Model", description=markdown,
-                    examples=[[os.path.join('./imgs',x),None,None]for x in os.listdir(os.path.join(os.getcwd(),'imgs')) if 'jpg' in x],
-                    inputs=[input_img,model_type,input_button],outputs=[output_img,output_plot],css=css)
 demo.launch()

 import matplotlib.pyplot as plt
 from attn_helper import VITAttentionGradRollout, overlay_attn
 import vc_models
 import torchvision
 if not os.path.isdir(MODEL_DIR):
     os.mkdir(MODEL_DIR)
 FILENAME = "config.yaml"
 BASE_MODEL_TUPLE = None
 LARGE_MODEL_TUPLE = None
         model_cfg = omegaconf.OmegaConf.load(
             hf_hub_download(repo_id=repo_name, filename=FILENAME,token=HF_TOKEN)
         )
         BASE_MODEL_TUPLE = utils.instantiate(model_cfg)
         BASE_MODEL_TUPLE[0].eval()
         model =  BASE_MODEL_TUPLE
         model_cfg = omegaconf.OmegaConf.load(
             hf_hub_download(repo_id=repo_name, filename=FILENAME,token=HF_TOKEN)
         )
         LARGE_MODEL_TUPLE = utils.instantiate(model_cfg)
         LARGE_MODEL_TUPLE[0].eval()
         model =  LARGE_MODEL_TUPLE
     elif model_name == 'vc1-large':
         model = LARGE_MODEL_TUPLE
+    return model
 def download_bin(model):
     bin_file = ""
         bin_file = 'vc1_vitb.pth'
     else:
         raise NameError("model not found: " + model)
+    repo_name = 'facebook/' + model
     bin_path = os.path.join(MODEL_DIR,bin_file)
     if not os.path.isfile(bin_path):
         model_bin = hf_hub_download(repo_id=repo_name, filename='pytorch_model.bin',local_dir=MODEL_DIR,local_dir_use_symlinks=True,token=HF_TOKEN)
         os.rename(model_bin, bin_path)
+def run_attn(input_img, model="vc1-base"):
     download_bin(model)
     model, embedding_dim, transform, metadata = get_model(model)
     if input_img.shape[0] != 3:
     input_img = resize_transform(input_img)
     x = transform(input_img)
+    attention_rollout = VITAttentionGradRollout(model,head_fusion="max",discard_ratio=0.89)
     y = model(x)
     mask = attention_rollout.get_attn_mask()
     attn_img = overlay_attn(input_img[0].permute(1,2,0),mask)
+    return attn_img
 model_type = gr.Dropdown(
             ["vc1-base", "vc1-large"], label="Model Size", value="vc1-base")
 input_img = gr.Image(shape=(250,250))
 output_img = gr.Image(shape=(250,250))
+css = "#component-2, .input-image, .image-preview {height: 240px !important}"
+markdown ="This is a demo for the Visual Cortex models. When passed an image input, it displays the attention(green) of the last layer of the transformer."
+demo = gr.Interface(fn=run_attn, title="Visual Cortex Model", description=markdown,
+                    examples=[[os.path.join('./imgs',x),None]for x in os.listdir(os.path.join(os.getcwd(),'imgs')) if 'jpg' in x],
+                    inputs=[input_img,model_type],outputs=output_img,css=css)
 demo.launch()

attn_helper.py CHANGED Viewed

@@ -9,7 +9,7 @@ def overlay_attn(original_image,mask):
     # Colormap and alpha for attention mask
     # COLORMAP_OCEAN
     # COLORMAP_OCEAN
-    colormap_attn, alpha_attn = cv2.COLORMAP_JET, 1 #0.85
     # Resize mask to original image size
     w, h = original_image.shape[0], original_image.shape[1]
@@ -18,12 +18,11 @@ def overlay_attn(original_image,mask):
     # Apply colormap to mask
     cmap = cv2.applyColorMap(np.uint8(255 * mask), colormap_attn)
-    print(cmap.shape)
     # Blend mask and original image
     # grayscale_img =  cv2.cvtColor(np.uint8(original_image), cv2.COLOR_RGB2GRAY)
     # grayscale_img = cv2.cvtColor(grayscale_img, cv2.COLOR_GRAY2RGB)
     # alpha_blended = cv2.addWeighted(np.uint8(original_image),1, cmap, alpha_attn, 0)
-    alpha_blended = cv2.addWeighted(np.uint8(original_image),0.1, cmap, 0.9, 0)
     # alpha_blended = cmap
@@ -45,7 +44,6 @@ class VITAttentionGradRollout:
         self.model = model
         self.head_fusion = head_fusion
         self.discard_ratio = discard_ratio
-        print(list(model.blocks.children()))
         self.attentions = {}
         for idx, module in enumerate(list(model.blocks.children())):

     # Colormap and alpha for attention mask
     # COLORMAP_OCEAN
     # COLORMAP_OCEAN
+    colormap_attn, alpha_attn = cv2.COLORMAP_VIRIDIS, 1 #0.85
     # Resize mask to original image size
     w, h = original_image.shape[0], original_image.shape[1]
     # Apply colormap to mask
     cmap = cv2.applyColorMap(np.uint8(255 * mask), colormap_attn)
     # Blend mask and original image
     # grayscale_img =  cv2.cvtColor(np.uint8(original_image), cv2.COLOR_RGB2GRAY)
     # grayscale_img = cv2.cvtColor(grayscale_img, cv2.COLOR_GRAY2RGB)
     # alpha_blended = cv2.addWeighted(np.uint8(original_image),1, cmap, alpha_attn, 0)
+    alpha_blended = cv2.addWeighted(np.uint8(original_image),0.4, cmap, 0.6, 0)
     # alpha_blended = cmap
         self.model = model
         self.head_fusion = head_fusion
         self.discard_ratio = discard_ratio
         self.attentions = {}
         for idx, module in enumerate(list(model.blocks.children())):