fix the incorrect config.json

Fix the autoconfig issue caused by the incorrect config.json, which was copied from the original model repository: https://huggingface.co/databricks/dbrx-instruct/blob/main/config.json

Files changed (1) hide show

config.json +39 -222

config.json CHANGED Viewed

@@ -1,224 +1,41 @@
 {
-    "add_cross_attention": false,
-    "architectures": [
-        "DbrxForCausalLM"
-    ],
-    "attn_config": {
-        "return_dict": true,
-        "output_hidden_states": false,
-        "output_attentions": false,
-        "torchscript": false,
-        "torch_dtype": null,
-        "use_bfloat16": false,
-        "tf_legacy_loss": false,
-        "pruned_heads": {},
-        "tie_word_embeddings": true,
-        "chunk_size_feed_forward": 0,
-        "is_encoder_decoder": false,
-        "is_decoder": false,
-        "cross_attention_hidden_size": null,
-        "add_cross_attention": false,
-        "tie_encoder_decoder": false,
-        "max_length": 20,
-        "min_length": 0,
-        "do_sample": false,
-        "early_stopping": false,
-        "num_beams": 1,
-        "num_beam_groups": 1,
-        "diversity_penalty": 0.0,
-        "temperature": 1.0,
-        "top_k": 50,
-        "top_p": 1.0,
-        "typical_p": 1.0,
-        "repetition_penalty": 1.0,
-        "length_penalty": 1.0,
-        "no_repeat_ngram_size": 0,
-        "encoder_no_repeat_ngram_size": 0,
-        "bad_words_ids": null,
-        "num_return_sequences": 1,
-        "output_scores": false,
-        "return_dict_in_generate": false,
-        "forced_bos_token_id": null,
-        "forced_eos_token_id": null,
-        "remove_invalid_values": false,
-        "exponential_decay_length_penalty": null,
-        "suppress_tokens": null,
-        "begin_suppress_tokens": null,
-        "architectures": null,
-        "finetuning_task": null,
-        "id2label": {
-            "0": "LABEL_0",
-            "1": "LABEL_1"
-        },
-        "label2id": {
-            "LABEL_0": 0,
-            "LABEL_1": 1
-        },
-        "tokenizer_class": null,
-        "prefix": null,
-        "bos_token_id": null,
-        "pad_token_id": null,
-        "eos_token_id": null,
-        "sep_token_id": null,
-        "decoder_start_token_id": null,
-        "task_specific_params": null,
-        "problem_type": null,
-        "_name_or_path": "",
-        "model_type": "",
-        "attn_pdrop": 0,
-        "clip_qkv": 8,
-        "kv_n_heads": 8,
-        "rope_theta": 500000
-    },
-    "auto_map": {
-        "AutoConfig": "configuration_dbrx.DbrxConfig",
-        "AutoModelForCausalLM": "modeling_dbrx.DbrxForCausalLM"
-    },
-    "bad_words_ids": null,
-    "begin_suppress_tokens": null,
-    "bos_token_id": null,
-    "chunk_size_feed_forward": 0,
-    "cross_attention_hidden_size": null,
-    "d_model": 6144,
-    "decoder_start_token_id": null,
-    "diversity_penalty": 0.0,
-    "do_sample": false,
-    "early_stopping": false,
-    "emb_pdrop": 0.0,
-    "encoder_no_repeat_ngram_size": 0,
-    "eos_token_id": null,
-    "exponential_decay_length_penalty": null,
-    "ffn_config": {
-        "return_dict": true,
-        "output_hidden_states": false,
-        "output_attentions": false,
-        "torchscript": false,
-        "torch_dtype": null,
-        "use_bfloat16": false,
-        "tf_legacy_loss": false,
-        "pruned_heads": {},
-        "tie_word_embeddings": true,
-        "chunk_size_feed_forward": 0,
-        "is_encoder_decoder": false,
-        "is_decoder": false,
-        "cross_attention_hidden_size": null,
-        "add_cross_attention": false,
-        "tie_encoder_decoder": false,
-        "max_length": 20,
-        "min_length": 0,
-        "do_sample": false,
-        "early_stopping": false,
-        "num_beams": 1,
-        "num_beam_groups": 1,
-        "diversity_penalty": 0.0,
-        "temperature": 1.0,
-        "top_k": 50,
-        "top_p": 1.0,
-        "typical_p": 1.0,
-        "repetition_penalty": 1.0,
-        "length_penalty": 1.0,
-        "no_repeat_ngram_size": 0,
-        "encoder_no_repeat_ngram_size": 0,
-        "bad_words_ids": null,
-        "num_return_sequences": 1,
-        "output_scores": false,
-        "return_dict_in_generate": false,
-        "forced_bos_token_id": null,
-        "forced_eos_token_id": null,
-        "remove_invalid_values": false,
-        "exponential_decay_length_penalty": null,
-        "suppress_tokens": null,
-        "begin_suppress_tokens": null,
-        "architectures": null,
-        "finetuning_task": null,
-        "id2label": {
-            "0": "LABEL_0",
-            "1": "LABEL_1"
-        },
-        "label2id": {
-            "LABEL_0": 0,
-            "LABEL_1": 1
-        },
-        "tokenizer_class": null,
-        "prefix": null,
-        "bos_token_id": null,
-        "pad_token_id": null,
-        "eos_token_id": null,
-        "sep_token_id": null,
-        "decoder_start_token_id": null,
-        "task_specific_params": null,
-        "problem_type": null,
-        "_name_or_path": "",
-        "ffn_act_fn": {
-            "name": "silu"
-        },
-        "ffn_hidden_size": 10752,
-        "moe_num_experts": 16,
-        "moe_top_k": 4,
-        "moe_jitter_eps": 0,
-        "moe_loss_weight": 0.05,
-        "moe_normalize_expert_weights": 1,
-        "uniform_expert_assignment": false,
-        "model_type": ""
-    },
-    "finetuning_task": null,
-    "forced_bos_token_id": null,
-    "forced_eos_token_id": null,
-    "id2label": {
-        "0": "LABEL_0",
-        "1": "LABEL_1"
-    },
-    "initializer_range": 0.02,
-    "is_decoder": false,
-    "is_encoder_decoder": false,
-    "label2id": {
-        "LABEL_0": 0,
-        "LABEL_1": 1
-    },
-    "length_penalty": 1.0,
-    "max_length": 20,
-    "max_seq_len": 32768,
-    "min_length": 0,
-    "model_type": "dbrx",
-    "n_heads": 48,
-    "n_layers": 40,
-    "no_repeat_ngram_size": 0,
-    "num_beam_groups": 1,
-    "num_beams": 1,
-    "num_return_sequences": 1,
-    "output_attentions": false,
-    "output_hidden_states": false,
-    "output_router_logits": false,
-    "output_scores": false,
-    "pad_token_id": null,
-    "prefix": null,
-    "problem_type": null,
-    "pruned_heads": {},
-    "quantization": {
-        "group_size": 64,
-        "bits": 4
-    },
-    "remove_invalid_values": false,
-    "repetition_penalty": 1.0,
-    "resid_pdrop": 0.0,
-    "return_dict": true,
-    "return_dict_in_generate": false,
-    "router_aux_loss_coef": 0.05,
-    "sep_token_id": null,
-    "suppress_tokens": null,
-    "task_specific_params": null,
-    "temperature": 1.0,
-    "tf_legacy_loss": false,
-    "tie_encoder_decoder": false,
-    "tie_word_embeddings": false,
-    "tokenizer_class": null,
-    "top_k": 50,
-    "top_p": 1.0,
-    "torch_dtype": "bfloat16",
-    "torchscript": false,
-    "transformers_version": "4.39.2",
-    "typical_p": 1.0,
-    "use_bfloat16": false,
-    "use_cache": true,
-    "vocab_size": 100352
 }

 {
+  "architectures": [
+    "DbrxForCausalLM"
+  ],
+  "attn_config": {
+    "clip_qkv": 8,
+    "kv_n_heads": 8,
+    "model_type": "",
+    "rope_theta": 500000
+  },
+  "auto_map": {
+    "AutoConfig": "configuration_dbrx.DbrxConfig",
+    "AutoModelForCausalLM": "modeling_dbrx.DbrxForCausalLM"
+  },
+  "d_model": 6144,
+  "emb_pdrop": 0.0,
+  "ffn_config": {
+    "ffn_hidden_size": 10752,
+    "model_type": "",
+    "moe_jitter_eps": 0,
+    "moe_loss_weight": 0.05,
+    "moe_num_experts": 16,
+    "moe_top_k": 4,
+    "ffn_act_fn": {
+      "name": "silu"
+    }
+  },
+  "initializer_range": 0.02,
+  "max_seq_len": 32768,
+  "model_type": "dbrx",
+  "n_heads": 48,
+  "n_layers": 40,
+  "output_router_logits": false,
+  "resid_pdrop": 0.0,
+  "router_aux_loss_coef": 0.05,
+  "tie_word_embeddings": false,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.38.2",
+  "use_cache": true,
+  "vocab_size": 100352
 }