Spaces:

fkonrad
/

ViT-Visualizer

Sleeping

App Files Files Community

Felix Konrad commited on Sep 9

Commit

41c94d8

1 Parent(s): 5fa1af0

Please work

Browse files

Files changed (1) hide show

app.py +87 -73

app.py CHANGED Viewed

@@ -6,9 +6,6 @@ import gradio as gr
 from transformers import AutoModel, AutoImageProcessor
 from PIL import Image
 import torch
-from huggingface_hub import hf_hub_download
 os.environ["HF_HUB_OFFLINE"] = "0"
@@ -20,10 +17,9 @@ state = {
     "repo_id": None,
 }
 def similarity_heatmap(image):
     """
-        ...
     """
     model, processor = state["model"], state["processor"]
@@ -76,117 +72,135 @@ def overlay_cosine_grid_on_image(cos_grid: np.ndarray, image: Image.Image, alpha
     return blended
-def load_model_dropdown(choice: str):
-    """
-    Load one of the predefined models.
-    """
-    repo_path = SUPPORTED_MODELS[choice]
-    try:
-        model = AutoModel.from_pretrained(repo_path)
-        processor = AutoImageProcessor.from_pretrained(repo_path)
-        model.to("cuda" if torch.cuda.is_available() else "cpu")
-        model.eval()
-        state["model"] = model
-        state["processor"] = processor
-        state["repo_id"] = choice
-        return f"Successfully loaded model: {choice}"
-    except Exception as e:
-        return f"Error loading model {choice}: {e}"
 def load_model(repo_id: str, revision: str = None):
     """
-    Load a Hugging Face model + processor from Hub using huggingface_hub.
     Works with any public repo_id.
     """
     try:
-        # Explicitly download model + processor files to local cache
-        model_path = hf_hub_download(
-            repo_id=repo_id,
-            revision=revision,
-            filename="pytorch_model.bin",  # default filename for weights
-            cache_dir="./model_cache"
-        )
-        config_path = hf_hub_download(
-            repo_id=repo_id,
             revision=revision,
-            filename="config.json",
-            cache_dir="./model_cache"
         )
-        processor_path = hf_hub_download(
-            repo_id=repo_id,
             revision=revision,
-            filename="preprocessor_config.json",
-            cache_dir="./model_cache"
         )
-        # Load with transformers (it will reuse the local cache)
-        model = AutoModel.from_pretrained(repo_id, revision=revision, cache_dir="./model_cache")
-        processor = AutoImageProcessor.from_pretrained(repo_id, revision=revision, cache_dir="./model_cache")
-        if torch.cuda.is_available():
-            model.to("cuda")
-        else:
-            model.to("cpu")
         model.eval()
         state["model"] = model
         state["processor"] = processor
         state["repo_id"] = repo_id
-        return f"Successfully loaded model '{repo_id}'"
     except Exception as e:
-        return f"Error loading model: {e}"
 def display_image(image: Image):
     """
-    Simply returns the uploaded image (you can process it later).
     """
     return image
 def visualize_cosine_heatmap(image: Image):
     if state["model"] is None:
-        return None  # or placeholder image
-    cos_grid = similarity_heatmap(image)
-    blended = overlay_cosine_grid_on_image(cos_grid, image)
-    return blended
 # Gradio UI
-with gr.Blocks() as demo:
     gr.Markdown("# ViT CLS-Visualizer")
     gr.Markdown(
         "Enter the Hugging Face model repo ID (must be public), upload an image, "
         "and visualize the cosine similarity between the CLS token and patches."
     )
     with gr.Row():
         repo_input = gr.Textbox(
             label="Hugging Face Model Repo ID",
-            placeholder="e.g. google/vit-base-patch16-224"
         )
         revision_input = gr.Textbox(
             label="Revision (optional)",
             placeholder="branch, tag, or commit hash"
         )
-        load_btn = gr.Button("Load Model")
     load_status = gr.Textbox(label="Model Status", interactive=False)
     with gr.Row():
-        image_input = gr.Image(type="pil", label="Upload Image")
-        image_output = gr.Image(label="Uploaded Image")
-    with gr.Row():
-        compute_btn = gr.Button("Compute Heatmap")
-        heatmap_output = gr.Image(label="Cosine Similarity Heatmap")
     # Events
-    load_btn.click(fn=load_model, inputs=[repo_input, revision_input], outputs=load_status)
-    image_input.change(fn=display_image, inputs=image_input, outputs=image_output)
-    compute_btn.click(fn=visualize_cosine_heatmap, inputs=image_input, outputs=heatmap_output)
-demo.launch()

 from transformers import AutoModel, AutoImageProcessor
 from PIL import Image
 import torch
 os.environ["HF_HUB_OFFLINE"] = "0"
     "repo_id": None,
 }
 def similarity_heatmap(image):
     """
+    Compute cosine similarity between CLS token and patch tokens
     """
     model, processor = state["model"], state["processor"]
     return blended
 def load_model(repo_id: str, revision: str = None):
     """
+    Load a Hugging Face model + processor from Hub.
     Works with any public repo_id.
     """
     try:
+        # Clean up revision input (handle empty strings)
+        if revision and revision.strip() == "":
+            revision = None
+        # Load model and processor directly (they handle caching automatically)
+        model = AutoModel.from_pretrained(
+            repo_id,
             revision=revision,
+            cache_dir="./model_cache",
+            trust_remote_code=True  # Some models might need this
         )
+        processor = AutoImageProcessor.from_pretrained(
+            repo_id,
             revision=revision,
+            cache_dir="./model_cache",
+            trust_remote_code=True
         )
+        # Move to appropriate device
+        device = "cuda" if torch.cuda.is_available() else "cpu"
+        model.to(device)
         model.eval()
+        # Validate it's a Vision Transformer
+        if not hasattr(model.config, 'patch_size'):
+            return f"❌ Model '{repo_id}' doesn't appear to be a Vision Transformer (no patch_size in config)"
+        # Update global state
         state["model"] = model
         state["processor"] = processor
         state["repo_id"] = repo_id
+        state["model_type"] = "custom"
+        return f"✅ Successfully loaded model '{repo_id}' on {device}"
+    except OSError as e:
+        if "Repository not found" in str(e):
+            return f"❌ Repository '{repo_id}' not found. Please check the repo ID."
+        elif "offline" in str(e).lower():
+            return f"❌ Network error. Please check your internet connection."
+        else:
+            return f"❌ Error accessing model: {str(e)}"
     except Exception as e:
+        return f"❌ Error loading model: {str(e)}"
 def display_image(image: Image):
     """
+    Simply returns the uploaded image.
     """
     return image
 def visualize_cosine_heatmap(image: Image):
+    """
+    Generate and overlay cosine similarity heatmap on the input image.
+    """
     if state["model"] is None:
+        return None  # Return None if no model is loaded
+    try:
+        cos_grid = similarity_heatmap(image)
+        blended = overlay_cosine_grid_on_image(cos_grid, image)
+        return blended
+    except Exception as e:
+        print(f"Error generating heatmap: {e}")
+        return None
 # Gradio UI
+with gr.Blocks(title="ViT CLS Visualizer") as demo:
     gr.Markdown("# ViT CLS-Visualizer")
     gr.Markdown(
         "Enter the Hugging Face model repo ID (must be public), upload an image, "
         "and visualize the cosine similarity between the CLS token and patches."
     )
+    gr.Markdown("### Popular Vision Transformer models to try:")
+    gr.Markdown(
+        "- `google/vit-base-patch16-224`\n"
+        "- `facebook/deit-base-distilled-patch16-224`\n"
+        "- `microsoft/dit-base`"
+    )
     with gr.Row():
         repo_input = gr.Textbox(
             label="Hugging Face Model Repo ID",
+            placeholder="e.g. google/vit-base-patch16-224",
+            value="google/vit-base-patch16-224"
         )
         revision_input = gr.Textbox(
             label="Revision (optional)",
             placeholder="branch, tag, or commit hash"
         )
+        load_btn = gr.Button("Load Model", variant="primary")
     load_status = gr.Textbox(label="Model Status", interactive=False)
     with gr.Row():
+        with gr.Column():
+            image_input = gr.Image(type="pil", label="Upload Image")
+            image_output = gr.Image(label="Uploaded Image")
+        with gr.Column():
+            compute_btn = gr.Button("Compute Heatmap", variant="primary")
+            heatmap_output = gr.Image(label="Cosine Similarity Heatmap")
     # Events
+    load_btn.click(
+        fn=load_model,
+        inputs=[repo_input, revision_input],
+        outputs=load_status
+    )
+    image_input.change(
+        fn=display_image,
+        inputs=image_input,
+        outputs=image_output
+    )
+    compute_btn.click(
+        fn=visualize_cosine_heatmap,
+        inputs=image_input,
+        outputs=heatmap_output
+    )
+if __name__ == "__main__":
+    demo.launch()