deepmind
/

vision-perceiver-learned

Image Classification

Model card Files Files and versions Community

nielsr HF Staff commited on Oct 7, 2021

Commit

f19dc2a

·

1 Parent(s): 8c4f501

Update P model

Files changed (1) hide show

config.json +12 -4

config.json CHANGED Viewed

@@ -3,12 +3,12 @@
     "PerceiverForImageClassification"
   ],
   "attention_probs_dropout_prob": 0.1,
   "cross_attention_shape_for_attention": "kv",
   "cross_attention_widening_factor": 1,
   "d_latents": 1024,
   "d_model": 512,
   "hidden_act": "gelu",
-  "hidden_dropout_prob": 0.1,
   "id2label": {
     "0": "tench, Tinca tinca",
     "1": "goldfish, Carassius auratus",
@@ -1011,6 +1011,7 @@
     "998": "ear, spike, capitulum",
     "999": "toilet tissue, toilet paper, bathroom tissue"
   },
   "initializer_range": 0.02,
   "label2id": {
     "Afghan hound, Afghan": 160,
@@ -2014,22 +2015,29 @@
     "zucchini, courgette": 939
   },
   "layer_norm_eps": 1e-12,
   "model_type": "perceiver",
   "num_blocks": 8,
   "num_cross_attention_heads": 1,
   "num_latents": 512,
   "num_self_attends_per_block": 6,
   "num_self_attention_heads": 8,
-  "position_embedding_init_scale": 0.02,
   "qk_channels": null,
   "self_attention_widening_factor": 1,
-  "seq_len": 2048,
   "torch_dtype": "float32",
   "train_size": [
     368,
     496
   ],
-  "transformers_version": "4.11.0.dev0",
   "use_query_residual": true,
   "v_channels": null,
   "vocab_size": 262

     "PerceiverForImageClassification"
   ],
   "attention_probs_dropout_prob": 0.1,
+  "audio_samples_per_frame": 1920,
   "cross_attention_shape_for_attention": "kv",
   "cross_attention_widening_factor": 1,
   "d_latents": 1024,
   "d_model": 512,
   "hidden_act": "gelu",
   "id2label": {
     "0": "tench, Tinca tinca",
     "1": "goldfish, Carassius auratus",
     "998": "ear, spike, capitulum",
     "999": "toilet tissue, toilet paper, bathroom tissue"
   },
+  "image_size": 224,
   "initializer_range": 0.02,
   "label2id": {
     "Afghan hound, Afghan": 160,
     "zucchini, courgette": 939
   },
   "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 2048,
   "model_type": "perceiver",
   "num_blocks": 8,
   "num_cross_attention_heads": 1,
+  "num_frames": 16,
   "num_latents": 512,
   "num_self_attends_per_block": 6,
   "num_self_attention_heads": 8,
+  "output_shape": [
+    1,
+    16,
+    224,
+    224
+  ],
   "qk_channels": null,
+  "samples_per_patch": 16,
   "self_attention_widening_factor": 1,
   "torch_dtype": "float32",
   "train_size": [
     368,
     496
   ],
+  "transformers_version": "4.12.0.dev0",
   "use_query_residual": true,
   "v_channels": null,
   "vocab_size": 262