adams-story
/

vq-ViT-L-14-k64-d32-ema

Transformers

PyTorch

Inference Endpoints

Model card Files Files and versions Community

adams-story commited on Jun 27, 2023

Commit

a1de053

1 Parent(s): 5a58f34

Upload 2 files

Browse files

Files changed (2) hide show

config.json +336 -0
pytorch_model.bin +3 -0

config.json ADDED Viewed

	@@ -0,0 +1,336 @@

+{
+  "architectures": [
+    "VQCLIPModel"
+  ],
+  "clip_config_dict": {
+    "_commit_hash": "8d052a0f05efbaefbc9e8786ba291cfdf93e5bff",
+    "_name_or_path": "clip-vit-large-patch14/",
+    "add_cross_attention": false,
+    "architectures": [
+      "CLIPModel"
+    ],
+    "bad_words_ids": null,
+    "begin_suppress_tokens": null,
+    "bos_token_id": null,
+    "chunk_size_feed_forward": 0,
+    "cross_attention_hidden_size": null,
+    "decoder_start_token_id": null,
+    "diversity_penalty": 0.0,
+    "do_sample": false,
+    "early_stopping": false,
+    "encoder_no_repeat_ngram_size": 0,
+    "eos_token_id": null,
+    "exponential_decay_length_penalty": null,
+    "finetuning_task": null,
+    "forced_bos_token_id": null,
+    "forced_eos_token_id": null,
+    "id2label": {
+      "0": "LABEL_0",
+      "1": "LABEL_1"
+    },
+    "initializer_factor": 1.0,
+    "is_decoder": false,
+    "is_encoder_decoder": false,
+    "label2id": {
+      "LABEL_0": 0,
+      "LABEL_1": 1
+    },
+    "length_penalty": 1.0,
+    "logit_scale_init_value": 2.6592,
+    "max_length": 20,
+    "min_length": 0,
+    "model_type": "clip",
+    "no_repeat_ngram_size": 0,
+    "num_beam_groups": 1,
+    "num_beams": 1,
+    "num_return_sequences": 1,
+    "output_attentions": false,
+    "output_hidden_states": false,
+    "output_scores": false,
+    "pad_token_id": null,
+    "prefix": null,
+    "problem_type": null,
+    "projection_dim": 768,
+    "pruned_heads": {},
+    "remove_invalid_values": false,
+    "repetition_penalty": 1.0,
+    "return_dict": true,
+    "return_dict_in_generate": false,
+    "sep_token_id": null,
+    "suppress_tokens": null,
+    "task_specific_params": null,
+    "temperature": 1.0,
+    "text_config": {
+      "_name_or_path": "",
+      "add_cross_attention": false,
+      "architectures": null,
+      "attention_dropout": 0.0,
+      "bad_words_ids": null,
+      "begin_suppress_tokens": null,
+      "bos_token_id": 0,
+      "chunk_size_feed_forward": 0,
+      "cross_attention_hidden_size": null,
+      "decoder_start_token_id": null,
+      "diversity_penalty": 0.0,
+      "do_sample": false,
+      "dropout": 0.0,
+      "early_stopping": false,
+      "encoder_no_repeat_ngram_size": 0,
+      "eos_token_id": 2,
+      "exponential_decay_length_penalty": null,
+      "finetuning_task": null,
+      "forced_bos_token_id": null,
+      "forced_eos_token_id": null,
+      "hidden_act": "quick_gelu",
+      "hidden_size": 768,
+      "id2label": {
+        "0": "LABEL_0",
+        "1": "LABEL_1"
+      },
+      "initializer_factor": 1.0,
+      "initializer_range": 0.02,
+      "intermediate_size": 3072,
+      "is_decoder": false,
+      "is_encoder_decoder": false,
+      "label2id": {
+        "LABEL_0": 0,
+        "LABEL_1": 1
+      },
+      "layer_norm_eps": 1e-05,
+      "length_penalty": 1.0,
+      "max_length": 20,
+      "max_position_embeddings": 77,
+      "min_length": 0,
+      "model_type": "clip_text_model",
+      "no_repeat_ngram_size": 0,
+      "num_attention_heads": 12,
+      "num_beam_groups": 1,
+      "num_beams": 1,
+      "num_hidden_layers": 12,
+      "num_return_sequences": 1,
+      "output_attentions": false,
+      "output_hidden_states": false,
+      "output_scores": false,
+      "pad_token_id": 1,
+      "prefix": null,
+      "problem_type": null,
+      "projection_dim": 768,
+      "pruned_heads": {},
+      "remove_invalid_values": false,
+      "repetition_penalty": 1.0,
+      "return_dict": true,
+      "return_dict_in_generate": false,
+      "sep_token_id": null,
+      "suppress_tokens": null,
+      "task_specific_params": null,
+      "temperature": 1.0,
+      "tf_legacy_loss": false,
+      "tie_encoder_decoder": false,
+      "tie_word_embeddings": true,
+      "tokenizer_class": null,
+      "top_k": 50,
+      "top_p": 1.0,
+      "torch_dtype": null,
+      "torchscript": false,
+      "transformers_version": "4.30.1",
+      "typical_p": 1.0,
+      "use_bfloat16": false,
+      "vocab_size": 49408
+    },
+    "tf_legacy_loss": false,
+    "tie_encoder_decoder": false,
+    "tie_word_embeddings": true,
+    "tokenizer_class": null,
+    "top_k": 50,
+    "top_p": 1.0,
+    "torch_dtype": "float32",
+    "torchscript": false,
+    "transformers_version": null,
+    "typical_p": 1.0,
+    "use_bfloat16": false,
+    "vision_config": {
+      "_name_or_path": "",
+      "add_cross_attention": false,
+      "architectures": null,
+      "attention_dropout": 0.0,
+      "bad_words_ids": null,
+      "begin_suppress_tokens": null,
+      "bos_token_id": null,
+      "chunk_size_feed_forward": 0,
+      "cross_attention_hidden_size": null,
+      "decoder_start_token_id": null,
+      "diversity_penalty": 0.0,
+      "do_sample": false,
+      "dropout": 0.0,
+      "early_stopping": false,
+      "encoder_no_repeat_ngram_size": 0,
+      "eos_token_id": null,
+      "exponential_decay_length_penalty": null,
+      "finetuning_task": null,
+      "forced_bos_token_id": null,
+      "forced_eos_token_id": null,
+      "hidden_act": "quick_gelu",
+      "hidden_size": 1024,
+      "id2label": {
+        "0": "LABEL_0",
+        "1": "LABEL_1"
+      },
+      "image_size": 224,
+      "initializer_factor": 1.0,
+      "initializer_range": 0.02,
+      "intermediate_size": 4096,
+      "is_decoder": false,
+      "is_encoder_decoder": false,
+      "label2id": {
+        "LABEL_0": 0,
+        "LABEL_1": 1
+      },
+      "layer_norm_eps": 1e-05,
+      "length_penalty": 1.0,
+      "max_length": 20,
+      "min_length": 0,
+      "model_type": "clip_vision_model",
+      "no_repeat_ngram_size": 0,
+      "num_attention_heads": 16,
+      "num_beam_groups": 1,
+      "num_beams": 1,
+      "num_channels": 3,
+      "num_hidden_layers": 24,
+      "num_return_sequences": 1,
+      "output_attentions": false,
+      "output_hidden_states": false,
+      "output_scores": false,
+      "pad_token_id": null,
+      "patch_size": 14,
+      "prefix": null,
+      "problem_type": null,
+      "projection_dim": 768,
+      "pruned_heads": {},
+      "remove_invalid_values": false,
+      "repetition_penalty": 1.0,
+      "return_dict": true,
+      "return_dict_in_generate": false,
+      "sep_token_id": null,
+      "suppress_tokens": null,
+      "task_specific_params": null,
+      "temperature": 1.0,
+      "tf_legacy_loss": false,
+      "tie_encoder_decoder": false,
+      "tie_word_embeddings": true,
+      "tokenizer_class": null,
+      "top_k": 50,
+      "top_p": 1.0,
+      "torch_dtype": null,
+      "torchscript": false,
+      "transformers_version": "4.30.1",
+      "typical_p": 1.0,
+      "use_bfloat16": false
+    }
+  },
+  "model_type": "VQCLIP",
+  "text_vq_adapter_config_dict": null,
+  "torch_dtype": "float32",
+  "transformers_version": "4.30.2",
+  "vision_vq_adapter_config_dict": {
+    "_name_or_path": "",
+    "add_cross_attention": false,
+    "architectures": null,
+    "bad_words_ids": null,
+    "begin_suppress_tokens": null,
+    "bos_token_id": null,
+    "chunk_size_feed_forward": 0,
+    "clip_dim": 768,
+    "cross_attention_hidden_size": null,
+    "decoder_start_token_id": null,
+    "diversity_penalty": 0.0,
+    "do_sample": false,
+    "early_stopping": false,
+    "encoder_no_repeat_ngram_size": 0,
+    "eos_token_id": null,
+    "exponential_decay_length_penalty": null,
+    "finetuning_task": null,
+    "forced_bos_token_id": null,
+    "forced_eos_token_id": null,
+    "id2label": {
+      "0": "LABEL_0",
+      "1": "LABEL_1"
+    },
+    "is_decoder": false,
+    "is_encoder_decoder": false,
+    "is_rq": false,
+    "label2id": {
+      "LABEL_0": 0,
+      "LABEL_1": 1
+    },
+    "length_penalty": 1.0,
+    "max_length": 20,
+    "min_length": 0,
+    "mlp_dim": 1028,
+    "mlp_hidden_dim": 512,
+    "mlp_layers": 1,
+    "model_type": "",
+    "no_repeat_ngram_size": 0,
+    "num_beam_groups": 1,
+    "num_beams": 1,
+    "num_return_sequences": 1,
+    "output_attentions": false,
+    "output_hidden_states": false,
+    "output_scores": false,
+    "pad_token_id": null,
+    "prefix": null,
+    "problem_type": null,
+    "pruned_heads": {},
+    "remove_invalid_values": false,
+    "repetition_penalty": 1.0,
+    "return_dict": true,
+    "return_dict_in_generate": false,
+    "rq_quantize_dropout": true,
+    "rq_quantize_dropout_cutoff_index": 1,
+    "rq_quantize_dropout_multiple_of": 4,
+    "sep_token_id": null,
+    "suppress_tokens": null,
+    "task_specific_params": null,
+    "temperature": 1.0,
+    "tf_legacy_loss": false,
+    "tie_encoder_decoder": false,
+    "tie_word_embeddings": true,
+    "tokenizer_class": null,
+    "top_k": 50,
+    "top_p": 1.0,
+    "torch_dtype": null,
+    "torchscript": false,
+    "transformers_version": "4.30.1",
+    "typical_p": 1.0,
+    "use_bfloat16": false,
+    "vq_accept_image_fmap": false,
+    "vq_affine_param": false,
+    "vq_affine_param_batch_decay": 0.99,
+    "vq_affine_param_codebook_decay": 0.9,
+    "vq_channel_last": true,
+    "vq_codebook_dim": 32,
+    "vq_codebook_size": 64,
+    "vq_commitment_use_cross_entropy_loss": false,
+    "vq_commitment_weight": 0.05,
+    "vq_decay": 0.85,
+    "vq_ema_update": true,
+    "vq_eps": 1e-05,
+    "vq_heads": 32,
+    "vq_kmeans_init": false,
+    "vq_kmeans_iters": 20,
+    "vq_learnable_codebook": false,
+    "vq_orthogonal_reg_active_codes_only": false,
+    "vq_orthogonal_reg_max_codes": null,
+    "vq_orthogonal_reg_weight": 0.0,
+    "vq_reinmax": false,
+    "vq_sample_codebook_temp": 1.0,
+    "vq_separate_codebook_per_head": true,
+    "vq_stochastic_sample_codes": true,
+    "vq_straight_through": false,
+    "vq_sync_affine_param": false,
+    "vq_sync_codebook": false,
+    "vq_sync_kmeans": true,
+    "vq_sync_update_v": 0.0,
+    "vq_threshold_ema_dead_code": 2,
+    "vq_use_cosine_sim": false
+  }
+}

pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bffff18537e4ef89636f957dd24657dee9769c3f5d0cf4a4bdb3df1e44a57a9e
+size 19485348