Spaces:

facebook
/

vc1-base

Running

sneha commited on Apr 8, 2023

Commit

5ded884

•

1 Parent(s): 443912c

add radio buttons

Files changed (2) hide show

app.py CHANGED Viewed

@@ -43,7 +43,7 @@ def download_bin():
         os.rename(model_bin, bin_path)
-def run_attn(input_img):
     download_bin()
     model, embedding_dim, transform, metadata = get_model()
     if input_img.shape[0] != 3:
@@ -55,7 +55,7 @@ def run_attn(input_img):
     input_img = resize_transform(input_img)
     x = transform(input_img)
-    attention_rollout = VITAttentionGradRollout(model,head_fusion="mean")
     y = model(x)
     mask = attention_rollout.get_attn_mask()
@@ -69,10 +69,11 @@ def run_attn(input_img):
     return attn_img, fig
 input_img = gr.Image(shape=(250,250))
 output_img = gr.Image(shape=(250,250))
 output_plot = gr.Plot()
 demo = gr.Interface(fn=run_attn, title="Visual Cortex Base Model",
-                    examples=[os.path.join('./imgs',x) for x in os.listdir(os.path.join(os.getcwd(),'imgs')) if 'jpg' in x],
-                    inputs=input_img,outputs=[output_img,output_plot])
 demo.launch()

         os.rename(model_bin, bin_path)
+def run_attn(input_img,fusion):
     download_bin()
     model, embedding_dim, transform, metadata = get_model()
     if input_img.shape[0] != 3:
     input_img = resize_transform(input_img)
     x = transform(input_img)
+    attention_rollout = VITAttentionGradRollout(model,head_fusion=fusion)
     y = model(x)
     mask = attention_rollout.get_attn_mask()
     return attn_img, fig
 input_img = gr.Image(shape=(250,250))
+input_button = gr.Radio(["min", "max", "mean"], label="Attention Head Fusion", info="How to combine the last layer attention across all 12 heads of the transformer.")
 output_img = gr.Image(shape=(250,250))
 output_plot = gr.Plot()
 demo = gr.Interface(fn=run_attn, title="Visual Cortex Base Model",
+                    examples=[[os.path.join('./imgs',x),None]for x in os.listdir(os.path.join(os.getcwd(),'imgs')) if 'jpg' in x],
+                    inputs=[input_img,input_button],outputs=[output_img,output_plot])
 demo.launch()

attn_helper.py CHANGED Viewed

@@ -9,7 +9,7 @@ def overlay_attn(original_image,mask):
     # Colormap and alpha for attention mask
     # COLORMAP_OCEAN
     # COLORMAP_OCEAN
-    colormap_attn, alpha_attn = cv2.COLORMAP_OCEAN, 1 #0.85
     # Resize mask to original image size
     w, h = original_image.shape[0], original_image.shape[1]
@@ -20,9 +20,14 @@ def overlay_attn(original_image,mask):
     print(cmap.shape)
     # Blend mask and original image
-    grayscale_img =  cv2.cvtColor(np.uint8(original_image), cv2.COLOR_RGB2GRAY)
-    alpha_blended = cv2.addWeighted(np.uint8(original_image),1, cmap, alpha_attn, 0)
     # alpha_blended = cmap
     # Save image
     final_im = Image.fromarray(alpha_blended)
@@ -34,6 +39,7 @@ def overlay_attn(original_image,mask):
 class VITAttentionGradRollout:
     '''
         Expects timm ViT transformer model
     '''
     def __init__(self, model, head_fusion='min', discard_ratio=0):
         self.model = model

     # Colormap and alpha for attention mask
     # COLORMAP_OCEAN
     # COLORMAP_OCEAN
+    colormap_attn, alpha_attn = cv2.COLORMAP_JET, 1 #0.85
     # Resize mask to original image size
     w, h = original_image.shape[0], original_image.shape[1]
     print(cmap.shape)
     # Blend mask and original image
+    # grayscale_img =  cv2.cvtColor(np.uint8(original_image), cv2.COLOR_RGB2GRAY)
+    # grayscale_img = cv2.cvtColor(grayscale_img, cv2.COLOR_GRAY2RGB)
+    # alpha_blended = cv2.addWeighted(np.uint8(original_image),1, cmap, alpha_attn, 0)
+    alpha_blended = cv2.addWeighted(np.uint8(original_image),0.1, cmap, 0.9, 0)
     # alpha_blended = cmap
     # Save image
     final_im = Image.fromarray(alpha_blended)
 class VITAttentionGradRollout:
     '''
         Expects timm ViT transformer model
+        Adapted from https://github.com/samiraabnar/attention_flow
     '''
     def __init__(self, model, head_fusion='min', discard_ratio=0):
         self.model = model