sayakpaul HF staff commited on
Commit
142ea85
1 Parent(s): a8f29d4

feat: hub deit model.

Browse files
Files changed (3) hide show
  1. README.md +1 -2
  2. app.py +28 -8
  3. requirements.txt +2 -2
README.md CHANGED
@@ -12,5 +12,4 @@ license: apache-2.0
12
 
13
  Attention Rollout was proposed by [Abnar et al.](https://arxiv.org/abs/2005.00928) to quantify the information
14
  that flows through self-attention layers. In the original ViT paper ([Dosovitskiy et al.](https://arxiv.org/abs/2010.11929)),
15
- the authors use it to investigate the representations learned by ViTs. The model used in the backend is a ViT B-16 model. For more
16
- details about it, refer to [this notebook](https://github.com/sayakpaul/probing-vits/blob/main/notebooks/load-jax-weights-vitb16.ipynb).
12
 
13
  Attention Rollout was proposed by [Abnar et al.](https://arxiv.org/abs/2005.00928) to quantify the information
14
  that flows through self-attention layers. In the original ViT paper ([Dosovitskiy et al.](https://arxiv.org/abs/2010.11929)),
15
+ the authors use it to investigate the representations learned by ViTs. The model used in the backend is `deit_tiny_patch16_224`. For more details about it, refer [here](https://tfhub.dev/sayakpaul/collections/deit/1). DeiT was proposed by [Touvron et al.](https://arxiv.org/abs/2012.12877)"
 
app.py CHANGED
@@ -1,31 +1,51 @@
1
  import gradio as gr
2
- from huggingface_hub.keras_mixin import from_pretrained_keras
 
3
  from PIL import Image
4
 
5
  import utils
6
 
7
- _MODEL = from_pretrained_keras("probing-vits/vit_b16_patch16_224_i21k_i1k")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
 
10
  def show_rollout(image):
11
- _, preprocessed_image = utils.preprocess_image(image, "original_vit")
 
 
 
12
  _, attention_scores_dict = _MODEL.predict(preprocessed_image)
13
  result = utils.attention_rollout_map(
14
- image, attention_scores_dict, "original_vit"
15
  )
16
  return Image.fromarray(result)
17
 
18
 
19
  title = "Generate Attention Rollout Plots"
20
- article = "Attention Rollout was proposed by [Abnar et al.](https://arxiv.org/abs/2005.00928) to quantify the information that flows through self-attention layers. In the original ViT paper ([Dosovitskiy et al.](https://arxiv.org/abs/2010.11929)), the authors use it to investigate the representations learned by ViTs. The model used in the backend is a ViT B-16 model. For more details about it, refer to [this notebook](https://github.com/sayakpaul/probing-vits/blob/main/notebooks/load-jax-weights-vitb16.ipynb)."
21
 
22
  iface = gr.Interface(
23
  show_rollout,
24
- gr.inputs.Image(type="pil", label="Input Image"),
25
- "image",
26
  title=title,
27
  article=article,
28
  allow_flagging="never",
29
- # examples=[["car.jpeg", "bulbul.jpeg"]]
30
  )
31
  iface.launch()
1
  import gradio as gr
2
+ import tensorflow as tf
3
+ import tensorflow_hub as hub
4
  from PIL import Image
5
 
6
  import utils
7
 
8
+ _RESOLUTION = 224
9
+ _MODEL_URL = "https://tfhub.dev/sayakpaul/deit_tiny_patch16_224/1"
10
+
11
+
12
+ def get_model() -> tf.keras.Model:
13
+ """Initiates a tf.keras.Model from TF-Hub."""
14
+ inputs = tf.keras.Input((_RESOLUTION, _RESOLUTION, 3))
15
+ hub_module = hub.KerasLayer(_MODEL_URL)
16
+
17
+ logits, attention_scores_dict = hub_module(
18
+ inputs
19
+ ) # Second output in the tuple is a dictionary containing attention scores.
20
+
21
+ return tf.keras.Model(inputs, [logits, attention_scores_dict])
22
+
23
+
24
+ _MODEL = get_model()
25
 
26
 
27
  def show_rollout(image):
28
+ """Function to be called when user hits submit on the UI."""
29
+ _, preprocessed_image = utils.preprocess_image(
30
+ image, "deit_tiny_patch16_224"
31
+ )
32
  _, attention_scores_dict = _MODEL.predict(preprocessed_image)
33
  result = utils.attention_rollout_map(
34
+ image, attention_scores_dict, "deit_tiny_patch16_224"
35
  )
36
  return Image.fromarray(result)
37
 
38
 
39
  title = "Generate Attention Rollout Plots"
40
+ article = "Attention Rollout was proposed by [Abnar et al.](https://arxiv.org/abs/2005.00928) to quantify the information that flows through self-attention layers. In the original ViT paper ([Dosovitskiy et al.](https://arxiv.org/abs/2010.11929)), the authors use it to investigate the representations learned by ViTs. The model used in the backend is `deit_tiny_patch16_224`. For more details about it, refer [here](https://tfhub.dev/sayakpaul/collections/deit/1). DeiT was proposed by [Touvron et al.](https://arxiv.org/abs/2012.12877)"
41
 
42
  iface = gr.Interface(
43
  show_rollout,
44
+ inputs=gr.inputs.Image(type="pil", label="Input Image"),
45
+ outputs="image",
46
  title=title,
47
  article=article,
48
  allow_flagging="never",
49
+ examples=[["./car.jpeg", "./bulbul.jpeg"]],
50
  )
51
  iface.launch()
requirements.txt CHANGED
@@ -1,4 +1,4 @@
1
  tensorflow
 
2
  opencv-python
3
- numpy
4
- huggingface_hub
1
  tensorflow
2
+ tensorflow-hub
3
  opencv-python
4
+ numpy