diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/bloom/hf-internal-testing/tiny-random-BloomForCausalLM/4b3ca94bb4445bc28bc8.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/bloom/hf-internal-testing/tiny-random-BloomForCausalLM/4b3ca94bb4445bc28bc8.json deleted file mode 100644 index 66c56b2cfcaf36f773bbd1f20605e3b20b20191c..0000000000000000000000000000000000000000 --- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/bloom/hf-internal-testing/tiny-random-BloomForCausalLM/4b3ca94bb4445bc28bc8.json +++ /dev/null @@ -1 +0,0 @@ -{"apply_residual_connection_post_layernorm": false, "architectures": ["BloomForCausalLM"], "attention_dropout": 0.1, "bos_token_id": 1, "dtype": "float32", "eos_token_id": 2, "gradient_checkpointing": false, "hidden_dropout": 0.1, "hidden_size": 32, "id2label": {"0": "LABEL_0", "1": "LABEL_1", "2": "LABEL_2"}, "initializer_range": 0.02, "is_decoder": true, "label2id": {"LABEL_0": 0, "LABEL_1": 1, "LABEL_2": 2}, "layer_norm_epsilon": 1e-05, "model_type": "bloom", "n_head": 4, "n_layer": 5, "n_positions": 512, "neuron": {"auto_cast_type": "fp32", "batch_size": 2, "checkpoint_id": "hf-internal-testing/tiny-random-BloomForCausalLM", "checkpoint_revision": "92b07e9b7b4f986fa7c54e2ac3b9201b4ba5212e", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 2, "sequence_length": 100, "task": "text-generation"}, "pad_token_id": 3, "pretraining_tp": 1, "seq_length": 7, "slow_but_exact": true, "torch_dtype": "float32", "type_vocab_size": 16, "use_cache": true, "vocab_size": 1024} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/bloom/hf-internal-testing/tiny-random-BloomForCausalLM/f9a5439ee67f962284df.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/bloom/hf-internal-testing/tiny-random-BloomForCausalLM/f9a5439ee67f962284df.json deleted file mode 100644 index 6744a0144c634fc5ac9279c5ff942fecd29d28e9..0000000000000000000000000000000000000000 --- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/bloom/hf-internal-testing/tiny-random-BloomForCausalLM/f9a5439ee67f962284df.json +++ /dev/null @@ -1 +0,0 @@ -{"apply_residual_connection_post_layernorm": false, "architectures": ["BloomForCausalLM"], "attention_dropout": 0.1, "bos_token_id": 1, "dtype": "float32", "eos_token_id": 2, "gradient_checkpointing": false, "hidden_dropout": 0.1, "hidden_size": 32, "id2label": {"0": "LABEL_0", "1": "LABEL_1", "2": "LABEL_2"}, "initializer_range": 0.02, "is_decoder": true, "label2id": {"LABEL_0": 0, "LABEL_1": 1, "LABEL_2": 2}, "layer_norm_epsilon": 1e-05, "model_type": "bloom", "n_head": 4, "n_layer": 5, "n_positions": 512, "neuron": {"auto_cast_type": "fp32", "batch_size": 2, "checkpoint_id": "hf-internal-testing/tiny-random-BloomForCausalLM", "checkpoint_revision": "92b07e9b7b4f986fa7c54e2ac3b9201b4ba5212e", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 2, "sequence_length": 512, "task": "text-generation"}, "pad_token_id": 3, "pretraining_tp": 1, "seq_length": 7, "slow_but_exact": true, "torch_dtype": "float32", "type_vocab_size": 16, "use_cache": true, "vocab_size": 1024} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/gpt2/gpt2/2b51ed50a475725e6b3e.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/gpt2/gpt2/2b51ed50a475725e6b3e.json deleted file mode 100644 index 5cc32e72f5899891977c14aeb98da1edbe88bd46..0000000000000000000000000000000000000000 --- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/gpt2/gpt2/2b51ed50a475725e6b3e.json +++ /dev/null @@ -1 +0,0 @@ -{"activation_function": "gelu_new", "architectures": ["GPT2LMHeadModel"], "attn_pdrop": 0.1, "bos_token_id": 50256, "embd_pdrop": 0.1, "eos_token_id": 50256, "initializer_range": 0.02, "layer_norm_epsilon": 1e-05, "model_type": "gpt2", "n_ctx": 1024, "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "n_positions": 1024, "neuron": {"auto_cast_type": "fp32", "batch_size": 1, "checkpoint_id": "gpt2", "checkpoint_revision": "607a30d783dfa663caf39e06633721c8d4cfcd7e", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 2, "sequence_length": 1024, "task": "text-generation"}, "reorder_and_upcast_attn": false, "resid_pdrop": 0.1, "scale_attn_by_inverse_layer_idx": false, "scale_attn_weights": true, "summary_activation": null, "summary_first_dropout": 0.1, "summary_proj_to_labels": true, "summary_type": "cls_index", "summary_use_proj": true, "task_specific_params": {"text-generation": {"do_sample": true, "max_length": 50}}, "use_cache": true, "vocab_size": 50257} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/gpt2/gpt2/335f8d1c7218c7410000.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/gpt2/gpt2/335f8d1c7218c7410000.json deleted file mode 100644 index a0a3d2765c6d818d76c26d19da9f75415e450978..0000000000000000000000000000000000000000 --- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/gpt2/gpt2/335f8d1c7218c7410000.json +++ /dev/null @@ -1 +0,0 @@ -{"activation_function": "gelu_new", "architectures": ["GPT2LMHeadModel"], "attn_pdrop": 0.1, "bos_token_id": 50256, "embd_pdrop": 0.1, "eos_token_id": 50256, "initializer_range": 0.02, "layer_norm_epsilon": 1e-05, "model_type": "gpt2", "n_ctx": 1024, "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "n_positions": 1024, "neuron": {"auto_cast_type": "fp16", "batch_size": 1, "checkpoint_id": "gpt2", "checkpoint_revision": "607a30d783dfa663caf39e06633721c8d4cfcd7e", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 2, "sequence_length": 1024, "task": "text-generation"}, "reorder_and_upcast_attn": false, "resid_pdrop": 0.1, "scale_attn_by_inverse_layer_idx": false, "scale_attn_weights": true, "summary_activation": null, "summary_first_dropout": 0.1, "summary_proj_to_labels": true, "summary_type": "cls_index", "summary_use_proj": true, "task_specific_params": {"text-generation": {"do_sample": true, "max_length": 50}}, "use_cache": true, "vocab_size": 50257} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/gpt2/gpt2/36e1657c052cd92f031f.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/gpt2/gpt2/36e1657c052cd92f031f.json deleted file mode 100644 index fcc1b479f025517a85ceaf2023e235b6e73ccd32..0000000000000000000000000000000000000000 --- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/gpt2/gpt2/36e1657c052cd92f031f.json +++ /dev/null @@ -1 +0,0 @@ -{"activation_function": "gelu_new", "architectures": ["GPT2LMHeadModel"], "attn_pdrop": 0.1, "bos_token_id": 50256, "embd_pdrop": 0.1, "eos_token_id": 50256, "initializer_range": 0.02, "layer_norm_epsilon": 1e-05, "model_type": "gpt2", "n_ctx": 1024, "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "n_positions": 1024, "neuron": {"auto_cast_type": "fp32", "batch_size": 4, "checkpoint_id": "gpt2", "checkpoint_revision": "607a30d783dfa663caf39e06633721c8d4cfcd7e", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 2, "sequence_length": 1024, "task": "text-generation"}, "reorder_and_upcast_attn": false, "resid_pdrop": 0.1, "scale_attn_by_inverse_layer_idx": false, "scale_attn_weights": true, "summary_activation": null, "summary_first_dropout": 0.1, "summary_proj_to_labels": true, "summary_type": "cls_index", "summary_use_proj": true, "task_specific_params": {"text-generation": {"do_sample": true, "max_length": 50}}, "use_cache": true, "vocab_size": 50257} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/gpt2/gpt2/65cb8c493bb52d550958.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/gpt2/gpt2/65cb8c493bb52d550958.json deleted file mode 100644 index a68958a0c01d06ff145236ccb0c4f1db5a10ee73..0000000000000000000000000000000000000000 --- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/gpt2/gpt2/65cb8c493bb52d550958.json +++ /dev/null @@ -1 +0,0 @@ -{"activation_function": "gelu_new", "architectures": ["GPT2LMHeadModel"], "attn_pdrop": 0.1, "bos_token_id": 50256, "embd_pdrop": 0.1, "eos_token_id": 50256, "initializer_range": 0.02, "layer_norm_epsilon": 1e-05, "model_type": "gpt2", "n_ctx": 1024, "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "n_positions": 1024, "neuron": {"auto_cast_type": "fp32", "batch_size": 2, "checkpoint_id": "gpt2", "checkpoint_revision": "607a30d783dfa663caf39e06633721c8d4cfcd7e", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 2, "sequence_length": 1024, "task": "text-generation"}, "reorder_and_upcast_attn": false, "resid_pdrop": 0.1, "scale_attn_by_inverse_layer_idx": false, "scale_attn_weights": true, "summary_activation": null, "summary_first_dropout": 0.1, "summary_proj_to_labels": true, "summary_type": "cls_index", "summary_use_proj": true, "task_specific_params": {"text-generation": {"do_sample": true, "max_length": 50}}, "use_cache": true, "vocab_size": 50257} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/gpt2/gpt2/73b7467bea092a4ea612.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/gpt2/gpt2/73b7467bea092a4ea612.json deleted file mode 100644 index e8b551e2ed5692bb1a1aa812681a4b15e371dd55..0000000000000000000000000000000000000000 --- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/gpt2/gpt2/73b7467bea092a4ea612.json +++ /dev/null @@ -1 +0,0 @@ -{"activation_function": "gelu_new", "architectures": ["GPT2LMHeadModel"], "attn_pdrop": 0.1, "bos_token_id": 50256, "embd_pdrop": 0.1, "eos_token_id": 50256, "initializer_range": 0.02, "layer_norm_epsilon": 1e-05, "model_type": "gpt2", "n_ctx": 1024, "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "n_positions": 1024, "neuron": {"auto_cast_type": "fp32", "batch_size": 1, "checkpoint_id": "gpt2", "checkpoint_revision": "607a30d783dfa663caf39e06633721c8d4cfcd7e", "compiler_type": "neuronx-cc", "compiler_version": "2.12.68.0+4480452af", "num_cores": 2, "sequence_length": 1024, "task": "text-generation"}, "reorder_and_upcast_attn": false, "resid_pdrop": 0.1, "scale_attn_by_inverse_layer_idx": false, "scale_attn_weights": true, "summary_activation": null, "summary_first_dropout": 0.1, "summary_proj_to_labels": true, "summary_type": "cls_index", "summary_use_proj": true, "task_specific_params": {"text-generation": {"do_sample": true, "max_length": 50}}, "use_cache": true, "vocab_size": 50257} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/gpt2/gpt2/d5e93094d604b84cb59a.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/gpt2/gpt2/d5e93094d604b84cb59a.json deleted file mode 100644 index 7b3f4079ea3046e2e8add9e7204be27770a59def..0000000000000000000000000000000000000000 --- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/gpt2/gpt2/d5e93094d604b84cb59a.json +++ /dev/null @@ -1 +0,0 @@ -{"activation_function": "gelu_new", "architectures": ["GPT2LMHeadModel"], "attn_pdrop": 0.1, "bos_token_id": 50256, "embd_pdrop": 0.1, "eos_token_id": 50256, "initializer_range": 0.02, "layer_norm_epsilon": 1e-05, "model_type": "gpt2", "n_ctx": 1024, "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "n_positions": 1024, "neuron": {"auto_cast_type": "fp16", "batch_size": 4, "checkpoint_id": "gpt2", "checkpoint_revision": "607a30d783dfa663caf39e06633721c8d4cfcd7e", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 2, "sequence_length": 1024, "task": "text-generation"}, "reorder_and_upcast_attn": false, "resid_pdrop": 0.1, "scale_attn_by_inverse_layer_idx": false, "scale_attn_weights": true, "summary_activation": null, "summary_first_dropout": 0.1, "summary_proj_to_labels": true, "summary_type": "cls_index", "summary_use_proj": true, "task_specific_params": {"text-generation": {"do_sample": true, "max_length": 50}}, "use_cache": true, "vocab_size": 50257} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/gpt2/hf-internal-testing/tiny-random-gpt2/2713954a7f357d88b849.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/gpt2/hf-internal-testing/tiny-random-gpt2/2713954a7f357d88b849.json deleted file mode 100644 index 9e1e20c9d2126b52b42318ac265124e287e9a555..0000000000000000000000000000000000000000 --- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/gpt2/hf-internal-testing/tiny-random-gpt2/2713954a7f357d88b849.json +++ /dev/null @@ -1 +0,0 @@ -{"activation_function": "gelu_new", "attention_probs_dropout_prob": 0.1, "attn_pdrop": 0.1, "bos_token_id": 98, "embd_pdrop": 0.1, "eos_token_id": 98, "gradient_checkpointing": false, "hidden_act": "gelu", "hidden_dropout_prob": 0.1, "initializer_range": 0.02, "intermediate_size": 37, "layer_norm_epsilon": 1e-05, "model_type": "gpt2", "n_ctx": 512, "n_embd": 32, "n_head": 4, "n_inner": null, "n_layer": 5, "n_positions": 512, "neuron": {"auto_cast_type": "fp32", "batch_size": 2, "checkpoint_id": "hf-internal-testing/tiny-random-gpt2", "checkpoint_revision": "71034c5d8bde858ff824298bdedc65515b97d2b9", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 2, "sequence_length": 512, "task": "text-generation"}, "pad_token_id": 98, "reorder_and_upcast_attn": false, "resid_pdrop": 0.1, "scale_attn_by_inverse_layer_idx": false, "scale_attn_weights": true, "summary_activation": null, "summary_first_dropout": 0.1, "summary_proj_to_labels": true, "summary_type": "cls_index", "summary_use_proj": true, "type_vocab_size": 16, "use_cache": true, "vocab_size": 1000} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/gpt2/hf-internal-testing/tiny-random-gpt2/2d94b70ec1b5a0628be0.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/gpt2/hf-internal-testing/tiny-random-gpt2/2d94b70ec1b5a0628be0.json deleted file mode 100644 index 5884e226f123ad76dd8f78b81edeae50434ab5e9..0000000000000000000000000000000000000000 --- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/gpt2/hf-internal-testing/tiny-random-gpt2/2d94b70ec1b5a0628be0.json +++ /dev/null @@ -1 +0,0 @@ -{"activation_function": "gelu_new", "attention_probs_dropout_prob": 0.1, "attn_pdrop": 0.1, "bos_token_id": 98, "embd_pdrop": 0.1, "eos_token_id": 98, "gradient_checkpointing": false, "hidden_act": "gelu", "hidden_dropout_prob": 0.1, "initializer_range": 0.02, "intermediate_size": 37, "layer_norm_epsilon": 1e-05, "model_type": "gpt2", "n_ctx": 512, "n_embd": 32, "n_head": 4, "n_inner": null, "n_layer": 5, "n_positions": 512, "neuron": {"auto_cast_type": "fp32", "batch_size": 2, "checkpoint_id": "hf-internal-testing/tiny-random-gpt2", "checkpoint_revision": "71034c5d8bde858ff824298bdedc65515b97d2b9", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 2, "sequence_length": 100, "task": "text-generation"}, "pad_token_id": 98, "reorder_and_upcast_attn": false, "resid_pdrop": 0.1, "scale_attn_by_inverse_layer_idx": false, "scale_attn_weights": true, "summary_activation": null, "summary_first_dropout": 0.1, "summary_proj_to_labels": true, "summary_type": "cls_index", "summary_use_proj": true, "type_vocab_size": 16, "use_cache": true, "vocab_size": 1000} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/HuggingFaceTB/cosmo-1b/83c64ad31c0699e3053e.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/HuggingFaceTB/cosmo-1b/83c64ad31c0699e3053e.json deleted file mode 100644 index dfbdb7fb6871841612ff062bafa252b28016dfaf..0000000000000000000000000000000000000000 --- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/HuggingFaceTB/cosmo-1b/83c64ad31c0699e3053e.json +++ /dev/null @@ -1 +0,0 @@ -{"architectures": ["LlamaForCausalLM"], "attention_bias": false, "attention_dropout": 0.0, "bos_token_id": 1, "eos_token_id": 2, "hidden_act": "silu", "hidden_size": 2048, "initializer_range": 0.02, "intermediate_size": 8192, "max_position_embeddings": 2048, "model_type": "llama", "neuron": {"auto_cast_type": "fp16", "batch_size": 4, "checkpoint_id": "HuggingFaceTB/cosmo-1b", "checkpoint_revision": "0d5e341cfe835dffc81b6186f9715c094889f8ce", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 2, "sequence_length": 2048, "task": "text-generation"}, "num_attention_heads": 16, "num_hidden_layers": 24, "num_key_value_heads": 16, "pretraining_tp": 1, "rms_norm_eps": 1e-05, "rope_scaling": null, "rope_theta": 10000.0, "tie_word_embeddings": false, "torch_dtype": "float32", "use_cache": true, "vocab_size": 32000} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/NousResearch/Llama-2-7b-chat-hf/0b3e037f44d96a1e7239.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/NousResearch/Llama-2-7b-chat-hf/0b3e037f44d96a1e7239.json deleted file mode 100644 index ccbe0af4abde497a1f70055add310fc58d01aaa4..0000000000000000000000000000000000000000 --- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/NousResearch/Llama-2-7b-chat-hf/0b3e037f44d96a1e7239.json +++ /dev/null @@ -1 +0,0 @@ -{"architectures": ["LlamaForCausalLM"], "attention_bias": false, "attention_dropout": 0.0, "bos_token_id": 1, "eos_token_id": 2, "hidden_act": "silu", "hidden_size": 4096, "initializer_range": 0.02, "intermediate_size": 11008, "max_position_embeddings": 4096, "model_type": "llama", "neuron": {"auto_cast_type": "fp16", "batch_size": 4, "checkpoint_id": "NousResearch/Llama-2-7b-chat-hf", "checkpoint_revision": "37892f30c23786c0d5367d80481fa0d9fba93cf8", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 2, "sequence_length": 4096, "task": "text-generation"}, "num_attention_heads": 32, "num_hidden_layers": 32, "num_key_value_heads": 32, "pad_token_id": 0, "pretraining_tp": 1, "rms_norm_eps": 1e-05, "rope_scaling": null, "rope_theta": 10000.0, "tie_word_embeddings": false, "torch_dtype": "float16", "use_cache": true, "vocab_size": 32000} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/NousResearch/Llama-2-7b-chat-hf/8d31ca1db6445239b6cc.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/NousResearch/Llama-2-7b-chat-hf/8d31ca1db6445239b6cc.json deleted file mode 100644 index 7f8ce591d1c2093ddf6351a38d9a3004b677423e..0000000000000000000000000000000000000000 --- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/NousResearch/Llama-2-7b-chat-hf/8d31ca1db6445239b6cc.json +++ /dev/null @@ -1 +0,0 @@ -{"architectures": ["LlamaForCausalLM"], "attention_bias": false, "attention_dropout": 0.0, "bos_token_id": 1, "eos_token_id": 2, "hidden_act": "silu", "hidden_size": 4096, "initializer_range": 0.02, "intermediate_size": 11008, "max_position_embeddings": 4096, "model_type": "llama", "neuron": {"auto_cast_type": "f16", "batch_size": 4, "checkpoint_id": "NousResearch/Llama-2-7b-chat-hf", "checkpoint_revision": "37892f30c23786c0d5367d80481fa0d9fba93cf8", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 2, "sequence_length": 4096, "task": "text-generation"}, "num_attention_heads": 32, "num_hidden_layers": 32, "num_key_value_heads": 32, "pad_token_id": 0, "pretraining_tp": 1, "rms_norm_eps": 1e-05, "rope_scaling": null, "rope_theta": 10000.0, "tie_word_embeddings": false, "torch_dtype": "float16", "use_cache": true, "vocab_size": 32000} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/NousResearch/Llama-2-7b-chat-hf/c5422e38f76211f16ecf.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/NousResearch/Llama-2-7b-chat-hf/c5422e38f76211f16ecf.json deleted file mode 100644 index f96e339ce4c7afaba655d201311be5820db0e5f6..0000000000000000000000000000000000000000 --- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/NousResearch/Llama-2-7b-chat-hf/c5422e38f76211f16ecf.json +++ /dev/null @@ -1 +0,0 @@ -{"architectures": ["LlamaForCausalLM"], "attention_bias": false, "attention_dropout": 0.0, "bos_token_id": 1, "eos_token_id": 2, "hidden_act": "silu", "hidden_size": 4096, "initializer_range": 0.02, "intermediate_size": 11008, "max_position_embeddings": 4096, "model_type": "llama", "neuron": {"auto_cast_type": "fp16", "batch_size": 32, "checkpoint_id": "NousResearch/Llama-2-7b-chat-hf", "checkpoint_revision": "37892f30c23786c0d5367d80481fa0d9fba93cf8", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 8, "sequence_length": 4096, "task": "text-generation"}, "num_attention_heads": 32, "num_hidden_layers": 32, "num_key_value_heads": 32, "pad_token_id": 0, "pretraining_tp": 1, "rms_norm_eps": 1e-05, "rope_scaling": null, "rope_theta": 10000.0, "tie_word_embeddings": false, "torch_dtype": "float16", "use_cache": true, "vocab_size": 32000} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/NousResearch/Llama-2-7b-chat-hf/f6a36646c9688804bab7.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/NousResearch/Llama-2-7b-chat-hf/f6a36646c9688804bab7.json deleted file mode 100644 index 1c76cbf64f2a06b6e71177ac592add3c27ce25e8..0000000000000000000000000000000000000000 --- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/NousResearch/Llama-2-7b-chat-hf/f6a36646c9688804bab7.json +++ /dev/null @@ -1 +0,0 @@ -{"architectures": ["LlamaForCausalLM"], "attention_bias": false, "attention_dropout": 0.0, "bos_token_id": 1, "eos_token_id": 2, "hidden_act": "silu", "hidden_size": 4096, "initializer_range": 0.02, "intermediate_size": 11008, "max_position_embeddings": 4096, "model_type": "llama", "neuron": {"auto_cast_type": "fp16", "batch_size": 2, "checkpoint_id": "NousResearch/Llama-2-7b-chat-hf", "checkpoint_revision": "37892f30c23786c0d5367d80481fa0d9fba93cf8", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 2, "sequence_length": 4096, "task": "text-generation"}, "num_attention_heads": 32, "num_hidden_layers": 32, "num_key_value_heads": 32, "pad_token_id": 0, "pretraining_tp": 1, "rms_norm_eps": 1e-05, "rope_scaling": null, "rope_theta": 10000.0, "tie_word_embeddings": false, "torch_dtype": "float16", "use_cache": true, "vocab_size": 32000} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/NousResearch/Llama-2-7b-chat-hf/f9468c010d2a222046f8.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/NousResearch/Llama-2-7b-chat-hf/f9468c010d2a222046f8.json deleted file mode 100644 index 3110ab255c31962a25240bcf4f3f2e33c9e42f71..0000000000000000000000000000000000000000 --- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/NousResearch/Llama-2-7b-chat-hf/f9468c010d2a222046f8.json +++ /dev/null @@ -1 +0,0 @@ -{"architectures": ["LlamaForCausalLM"], "attention_bias": false, "attention_dropout": 0.0, "bos_token_id": 1, "eos_token_id": 2, "hidden_act": "silu", "hidden_size": 4096, "initializer_range": 0.02, "intermediate_size": 11008, "max_position_embeddings": 4096, "model_type": "llama", "neuron": {"auto_cast_type": "f16", "batch_size": 2, "checkpoint_id": "NousResearch/Llama-2-7b-chat-hf", "checkpoint_revision": "37892f30c23786c0d5367d80481fa0d9fba93cf8", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 2, "sequence_length": 4096, "task": "text-generation"}, "num_attention_heads": 32, "num_hidden_layers": 32, "num_key_value_heads": 32, "pad_token_id": 0, "pretraining_tp": 1, "rms_norm_eps": 1e-05, "rope_scaling": null, "rope_theta": 10000.0, "tie_word_embeddings": false, "torch_dtype": "float16", "use_cache": true, "vocab_size": 32000} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/dacorvo/tiny-random-llama/62a76db84304b34ae305.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/dacorvo/tiny-random-llama/62a76db84304b34ae305.json deleted file mode 100644 index c0fc1f377c0510cea111ef0d6abd2ea98a208db0..0000000000000000000000000000000000000000 --- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/dacorvo/tiny-random-llama/62a76db84304b34ae305.json +++ /dev/null @@ -1 +0,0 @@ -{"architectures": ["LlamaForCausalLM"], "attention_bias": false, "attention_dropout": 0.0, "bos_token_id": 1, "eos_token_id": 2, "hidden_act": "silu", "hidden_size": 128, "initializer_range": 0.02, "intermediate_size": 256, "max_position_embeddings": 512, "model_type": "llama", "neuron": {"auto_cast_type": "fp32", "batch_size": 2, "checkpoint_id": "dacorvo/tiny-random-llama", "checkpoint_revision": "7fdafd2fe6a2d31c6abb72ae60db606d8bb23196", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 2, "sequence_length": 100, "task": "text-generation"}, "num_attention_heads": 1, "num_hidden_layers": 1, "num_key_value_heads": 1, "pretraining_tp": 1, "rms_norm_eps": 1e-06, "rope_scaling": null, "rope_theta": 10000.0, "tie_word_embeddings": false, "torch_dtype": "float32", "use_cache": true, "vocab_size": 32000} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/dacorvo/tiny-random-llama/fb4b47ea0eea68e736b2.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/dacorvo/tiny-random-llama/fb4b47ea0eea68e736b2.json deleted file mode 100644 index 862bbdea95c9d02c82f1b4376c8ac53ed24c5166..0000000000000000000000000000000000000000 --- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/dacorvo/tiny-random-llama/fb4b47ea0eea68e736b2.json +++ /dev/null @@ -1 +0,0 @@ -{"architectures": ["LlamaForCausalLM"], "attention_bias": false, "attention_dropout": 0.0, "bos_token_id": 1, "eos_token_id": 2, "hidden_act": "silu", "hidden_size": 128, "initializer_range": 0.02, "intermediate_size": 256, "max_position_embeddings": 512, "model_type": "llama", "neuron": {"auto_cast_type": "fp32", "batch_size": 2, "checkpoint_id": "dacorvo/tiny-random-llama", "checkpoint_revision": "7fdafd2fe6a2d31c6abb72ae60db606d8bb23196", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 2, "sequence_length": 512, "task": "text-generation"}, "num_attention_heads": 1, "num_hidden_layers": 1, "num_key_value_heads": 1, "pretraining_tp": 1, "rms_norm_eps": 1e-06, "rope_scaling": null, "rope_theta": 10000.0, "tie_word_embeddings": false, "torch_dtype": "float32", "use_cache": true, "vocab_size": 32000} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/meta-llama/Llama-2-13b-chat-hf/1ab4fec65fe40b15d8fc.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/meta-llama/Llama-2-13b-chat-hf/1ab4fec65fe40b15d8fc.json deleted file mode 100644 index 24ceec4f9bbcd579a0ceab955cc3a2a87823f9ba..0000000000000000000000000000000000000000 --- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/meta-llama/Llama-2-13b-chat-hf/1ab4fec65fe40b15d8fc.json +++ /dev/null @@ -1 +0,0 @@ -{"architectures": ["LlamaForCausalLM"], "attention_bias": false, "attention_dropout": 0.0, "bos_token_id": 1, "eos_token_id": 2, "hidden_act": "silu", "hidden_size": 5120, "initializer_range": 0.02, "intermediate_size": 13824, "max_position_embeddings": 4096, "model_type": "llama", "neuron": {"auto_cast_type": "fp16", "batch_size": 1, "checkpoint_id": "meta-llama/Llama-2-13b-chat-hf", "checkpoint_revision": "a2cb7a712bb6e5e736ca7f8cd98167f81a0b5bd8", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 24, "sequence_length": 4096, "task": "text-generation"}, "num_attention_heads": 40, "num_hidden_layers": 40, "num_key_value_heads": 40, "pretraining_tp": 1, "rms_norm_eps": 1e-05, "rope_scaling": null, "rope_theta": 10000.0, "tie_word_embeddings": false, "torch_dtype": "float16", "use_cache": true, "vocab_size": 32000} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/meta-llama/Llama-2-13b-chat-hf/55d02a833c68f65f88c9.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/meta-llama/Llama-2-13b-chat-hf/55d02a833c68f65f88c9.json deleted file mode 100644 index fce47f4bb9a0ff0785a6918fee333810886e24ff..0000000000000000000000000000000000000000 --- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/meta-llama/Llama-2-13b-chat-hf/55d02a833c68f65f88c9.json +++ /dev/null @@ -1 +0,0 @@ -{"architectures": ["LlamaForCausalLM"], "attention_bias": false, "attention_dropout": 0.0, "bos_token_id": 1, "eos_token_id": 2, "hidden_act": "silu", "hidden_size": 5120, "initializer_range": 0.02, "intermediate_size": 13824, "max_position_embeddings": 4096, "model_type": "llama", "neuron": {"auto_cast_type": "fp16", "batch_size": 8, "checkpoint_id": "meta-llama/Llama-2-13b-chat-hf", "checkpoint_revision": "a2cb7a712bb6e5e736ca7f8cd98167f81a0b5bd8", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 8, "sequence_length": 4096, "task": "text-generation"}, "num_attention_heads": 40, "num_hidden_layers": 40, "num_key_value_heads": 40, "pretraining_tp": 1, "rms_norm_eps": 1e-05, "rope_scaling": null, "rope_theta": 10000.0, "tie_word_embeddings": false, "torch_dtype": "float16", "use_cache": true, "vocab_size": 32000} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/meta-llama/Llama-2-13b-chat-hf/5d4688c06ca1eea9cd2d.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/meta-llama/Llama-2-13b-chat-hf/5d4688c06ca1eea9cd2d.json deleted file mode 100644 index 0d24cb01406f7a92127f05aed8cb79a2edec3e2a..0000000000000000000000000000000000000000 --- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/meta-llama/Llama-2-13b-chat-hf/5d4688c06ca1eea9cd2d.json +++ /dev/null @@ -1 +0,0 @@ -{"architectures": ["LlamaForCausalLM"], "attention_bias": false, "attention_dropout": 0.0, "bos_token_id": 1, "eos_token_id": 2, "hidden_act": "silu", "hidden_size": 5120, "initializer_range": 0.02, "intermediate_size": 13824, "max_position_embeddings": 4096, "model_type": "llama", "neuron": {"auto_cast_type": "fp16", "batch_size": 4, "checkpoint_id": "meta-llama/Llama-2-13b-chat-hf", "checkpoint_revision": "a2cb7a712bb6e5e736ca7f8cd98167f81a0b5bd8", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 24, "sequence_length": 4096, "task": "text-generation"}, "num_attention_heads": 40, "num_hidden_layers": 40, "num_key_value_heads": 40, "pretraining_tp": 1, "rms_norm_eps": 1e-05, "rope_scaling": null, "rope_theta": 10000.0, "tie_word_embeddings": false, "torch_dtype": "float16", "use_cache": true, "vocab_size": 32000} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/meta-llama/Llama-2-13b-chat-hf/642817569926cd4530c8.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/meta-llama/Llama-2-13b-chat-hf/642817569926cd4530c8.json deleted file mode 100644 index 48d65baa86e22108cbada2709c35e59fe8e86e47..0000000000000000000000000000000000000000 --- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/meta-llama/Llama-2-13b-chat-hf/642817569926cd4530c8.json +++ /dev/null @@ -1 +0,0 @@ -{"architectures": ["LlamaForCausalLM"], "attention_bias": false, "attention_dropout": 0.0, "bos_token_id": 1, "eos_token_id": 2, "hidden_act": "silu", "hidden_size": 5120, "initializer_range": 0.02, "intermediate_size": 13824, "max_position_embeddings": 4096, "model_type": "llama", "neuron": {"auto_cast_type": "fp16", "batch_size": 16, "checkpoint_id": "meta-llama/Llama-2-13b-chat-hf", "checkpoint_revision": "a2cb7a712bb6e5e736ca7f8cd98167f81a0b5bd8", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 24, "sequence_length": 4096, "task": "text-generation"}, "num_attention_heads": 40, "num_hidden_layers": 40, "num_key_value_heads": 40, "pretraining_tp": 1, "rms_norm_eps": 1e-05, "rope_scaling": null, "rope_theta": 10000.0, "tie_word_embeddings": false, "torch_dtype": "float16", "use_cache": true, "vocab_size": 32000} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/meta-llama/Llama-2-13b-chat-hf/a4fb9901e6811328a109.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/meta-llama/Llama-2-13b-chat-hf/a4fb9901e6811328a109.json deleted file mode 100644 index a5a64b4fcb01e39feb68fbccd8fc14b37f3c1cda..0000000000000000000000000000000000000000 --- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/meta-llama/Llama-2-13b-chat-hf/a4fb9901e6811328a109.json +++ /dev/null @@ -1 +0,0 @@ -{"architectures": ["LlamaForCausalLM"], "attention_bias": false, "attention_dropout": 0.0, "bos_token_id": 1, "eos_token_id": 2, "hidden_act": "silu", "hidden_size": 5120, "initializer_range": 0.02, "intermediate_size": 13824, "max_position_embeddings": 4096, "model_type": "llama", "neuron": {"auto_cast_type": "fp16", "batch_size": 16, "checkpoint_id": "meta-llama/Llama-2-13b-chat-hf", "checkpoint_revision": "a2cb7a712bb6e5e736ca7f8cd98167f81a0b5bd8", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 8, "sequence_length": 4096, "task": "text-generation"}, "num_attention_heads": 40, "num_hidden_layers": 40, "num_key_value_heads": 40, "pretraining_tp": 1, "rms_norm_eps": 1e-05, "rope_scaling": null, "rope_theta": 10000.0, "tie_word_embeddings": false, "torch_dtype": "float16", "use_cache": true, "vocab_size": 32000} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/meta-llama/Llama-2-13b-chat-hf/a8c0b413e79c91496630.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/meta-llama/Llama-2-13b-chat-hf/a8c0b413e79c91496630.json deleted file mode 100644 index 4f99e5d6be465ab5040df8785f426c40810772fd..0000000000000000000000000000000000000000 --- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/meta-llama/Llama-2-13b-chat-hf/a8c0b413e79c91496630.json +++ /dev/null @@ -1 +0,0 @@ -{"architectures": ["LlamaForCausalLM"], "attention_bias": false, "attention_dropout": 0.0, "bos_token_id": 1, "eos_token_id": 2, "hidden_act": "silu", "hidden_size": 5120, "initializer_range": 0.02, "intermediate_size": 13824, "max_position_embeddings": 4096, "model_type": "llama", "neuron": {"auto_cast_type": "fp16", "batch_size": 4, "checkpoint_id": "meta-llama/Llama-2-13b-chat-hf", "checkpoint_revision": "a2cb7a712bb6e5e736ca7f8cd98167f81a0b5bd8", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 8, "sequence_length": 4096, "task": "text-generation"}, "num_attention_heads": 40, "num_hidden_layers": 40, "num_key_value_heads": 40, "pretraining_tp": 1, "rms_norm_eps": 1e-05, "rope_scaling": null, "rope_theta": 10000.0, "tie_word_embeddings": false, "torch_dtype": "float16", "use_cache": true, "vocab_size": 32000} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/meta-llama/Llama-2-13b-chat-hf/c29571197f94c8546e45.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/meta-llama/Llama-2-13b-chat-hf/c29571197f94c8546e45.json deleted file mode 100644 index d82a6b37b318774150770909c0fb0c524b4edeb7..0000000000000000000000000000000000000000 --- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/meta-llama/Llama-2-13b-chat-hf/c29571197f94c8546e45.json +++ /dev/null @@ -1 +0,0 @@ -{"architectures": ["LlamaForCausalLM"], "attention_bias": false, "attention_dropout": 0.0, "bos_token_id": 1, "eos_token_id": 2, "hidden_act": "silu", "hidden_size": 5120, "initializer_range": 0.02, "intermediate_size": 13824, "max_position_embeddings": 4096, "model_type": "llama", "neuron": {"auto_cast_type": "fp16", "batch_size": 8, "checkpoint_id": "meta-llama/Llama-2-13b-chat-hf", "checkpoint_revision": "a2cb7a712bb6e5e736ca7f8cd98167f81a0b5bd8", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 24, "sequence_length": 4096, "task": "text-generation"}, "num_attention_heads": 40, "num_hidden_layers": 40, "num_key_value_heads": 40, "pretraining_tp": 1, "rms_norm_eps": 1e-05, "rope_scaling": null, "rope_theta": 10000.0, "tie_word_embeddings": false, "torch_dtype": "float16", "use_cache": true, "vocab_size": 32000} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/meta-llama/Llama-2-13b-chat-hf/d68250e23574798328a7.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/meta-llama/Llama-2-13b-chat-hf/d68250e23574798328a7.json deleted file mode 100644 index 33c762de29cc0bb33aacece74dc924a20654faeb..0000000000000000000000000000000000000000 --- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/meta-llama/Llama-2-13b-chat-hf/d68250e23574798328a7.json +++ /dev/null @@ -1 +0,0 @@ -{"architectures": ["LlamaForCausalLM"], "attention_bias": false, "attention_dropout": 0.0, "bos_token_id": 1, "eos_token_id": 2, "hidden_act": "silu", "hidden_size": 5120, "initializer_range": 0.02, "intermediate_size": 13824, "max_position_embeddings": 4096, "model_type": "llama", "neuron": {"auto_cast_type": "fp16", "batch_size": 1, "checkpoint_id": "meta-llama/Llama-2-13b-chat-hf", "checkpoint_revision": "a2cb7a712bb6e5e736ca7f8cd98167f81a0b5bd8", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 8, "sequence_length": 4096, "task": "text-generation"}, "num_attention_heads": 40, "num_hidden_layers": 40, "num_key_value_heads": 40, "pretraining_tp": 1, "rms_norm_eps": 1e-05, "rope_scaling": null, "rope_theta": 10000.0, "tie_word_embeddings": false, "torch_dtype": "float16", "use_cache": true, "vocab_size": 32000} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/meta-llama/Llama-2-13b-chat-hf/d9b1eaeab430d169db57.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/meta-llama/Llama-2-13b-chat-hf/d9b1eaeab430d169db57.json deleted file mode 100644 index 14d248963a81d4c0f61652ce1e5ac7f6499e9254..0000000000000000000000000000000000000000 --- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/meta-llama/Llama-2-13b-chat-hf/d9b1eaeab430d169db57.json +++ /dev/null @@ -1 +0,0 @@ -{"architectures": ["LlamaForCausalLM"], "attention_bias": false, "attention_dropout": 0.0, "bos_token_id": 1, "eos_token_id": 2, "hidden_act": "silu", "hidden_size": 5120, "initializer_range": 0.02, "intermediate_size": 13824, "max_position_embeddings": 4096, "model_type": "llama", "neuron": {"auto_cast_type": "fp16", "batch_size": 32, "checkpoint_id": "meta-llama/Llama-2-13b-chat-hf", "checkpoint_revision": "a2cb7a712bb6e5e736ca7f8cd98167f81a0b5bd8", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 24, "sequence_length": 4096, "task": "text-generation"}, "num_attention_heads": 40, "num_hidden_layers": 40, "num_key_value_heads": 40, "pretraining_tp": 1, "rms_norm_eps": 1e-05, "rope_scaling": null, "rope_theta": 10000.0, "tie_word_embeddings": false, "torch_dtype": "float16", "use_cache": true, "vocab_size": 32000} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/0c05f781075e377b3fe0.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/0c05f781075e377b3fe0.json deleted file mode 100644 index e0288d40c582a995f6a29ebc2465ebc3b1d4fec3..0000000000000000000000000000000000000000 --- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/0c05f781075e377b3fe0.json +++ /dev/null @@ -1 +0,0 @@ -{"architectures": ["LlamaForCausalLM"], "attention_bias": false, "attention_dropout": 0.0, "bos_token_id": 1, "eos_token_id": 2, "hidden_act": "silu", "hidden_size": 4096, "initializer_range": 0.02, "intermediate_size": 11008, "max_position_embeddings": 4096, "model_type": "llama", "neuron": {"auto_cast_type": "fp16", "batch_size": 32, "checkpoint_id": "meta-llama/Llama-2-7b-chat-hf", "checkpoint_revision": "f5db02db724555f92da89c216ac04704f23d4590", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 24, "sequence_length": 4096, "task": "text-generation"}, "num_attention_heads": 32, "num_hidden_layers": 32, "num_key_value_heads": 32, "pretraining_tp": 1, "rms_norm_eps": 1e-05, "rope_scaling": null, "rope_theta": 10000.0, "tie_word_embeddings": false, "torch_dtype": "float16", "use_cache": true, "vocab_size": 32000} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/4ff3c41f54b65e2012a7.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/4ff3c41f54b65e2012a7.json deleted file mode 100644 index 05be3ab20db0c116f04578460b9380fdebb80a8f..0000000000000000000000000000000000000000 --- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/4ff3c41f54b65e2012a7.json +++ /dev/null @@ -1 +0,0 @@ -{"architectures": ["LlamaForCausalLM"], "attention_bias": false, "attention_dropout": 0.0, "bos_token_id": 1, "eos_token_id": 2, "hidden_act": "silu", "hidden_size": 4096, "initializer_range": 0.02, "intermediate_size": 11008, "max_position_embeddings": 4096, "model_type": "llama", "neuron": {"auto_cast_type": "fp16", "batch_size": 4, "checkpoint_id": "meta-llama/Llama-2-7b-chat-hf", "checkpoint_revision": "92011f62d7604e261f748ec0cfe6329f31193e33", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 2, "sequence_length": 4096, "task": "text-generation"}, "num_attention_heads": 32, "num_hidden_layers": 32, "num_key_value_heads": 32, "pretraining_tp": 1, "rms_norm_eps": 1e-05, "rope_scaling": null, "rope_theta": 10000.0, "tie_word_embeddings": false, "torch_dtype": "float16", "use_cache": true, "vocab_size": 32000} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/57d08ed2158d3bc4efd8.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/57d08ed2158d3bc4efd8.json deleted file mode 100644 index a5a89263b65c4f2cb12cb9499a647cf26a453359..0000000000000000000000000000000000000000 --- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/57d08ed2158d3bc4efd8.json +++ /dev/null @@ -1 +0,0 @@ -{"architectures": ["LlamaForCausalLM"], "attention_bias": false, "attention_dropout": 0.0, "bos_token_id": 1, "eos_token_id": 2, "hidden_act": "silu", "hidden_size": 4096, "initializer_range": 0.02, "intermediate_size": 11008, "max_position_embeddings": 4096, "model_type": "llama", "neuron": {"auto_cast_type": "fp16", "batch_size": 16, "checkpoint_id": "meta-llama/Llama-2-7b-chat-hf", "checkpoint_revision": "f5db02db724555f92da89c216ac04704f23d4590", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 8, "sequence_length": 4096, "task": "text-generation"}, "num_attention_heads": 32, "num_hidden_layers": 32, "num_key_value_heads": 32, "pretraining_tp": 1, "rms_norm_eps": 1e-05, "rope_scaling": null, "rope_theta": 10000.0, "tie_word_embeddings": false, "torch_dtype": "float16", "use_cache": true, "vocab_size": 32000} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/5a8294b279e725cf8542.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/5a8294b279e725cf8542.json deleted file mode 100644 index 448f5be0e7281a32217642b0dc3193d08d97d592..0000000000000000000000000000000000000000 --- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/5a8294b279e725cf8542.json +++ /dev/null @@ -1 +0,0 @@ -{"architectures": ["LlamaForCausalLM"], "attention_bias": false, "attention_dropout": 0.0, "bos_token_id": 1, "eos_token_id": 2, "hidden_act": "silu", "hidden_size": 4096, "initializer_range": 0.02, "intermediate_size": 11008, "max_position_embeddings": 4096, "model_type": "llama", "neuron": {"auto_cast_type": "fp16", "batch_size": 4, "checkpoint_id": "meta-llama/Llama-2-7b-chat-hf", "checkpoint_revision": "f5db02db724555f92da89c216ac04704f23d4590", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 24, "sequence_length": 4096, "task": "text-generation"}, "num_attention_heads": 32, "num_hidden_layers": 32, "num_key_value_heads": 32, "pretraining_tp": 1, "rms_norm_eps": 1e-05, "rope_scaling": null, "rope_theta": 10000.0, "tie_word_embeddings": false, "torch_dtype": "float16", "use_cache": true, "vocab_size": 32000} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/735e2b4b3a5019f203a7.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/735e2b4b3a5019f203a7.json deleted file mode 100644 index cac4f41f0e0b1954b847d2e42410d5fd69093886..0000000000000000000000000000000000000000 --- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/735e2b4b3a5019f203a7.json +++ /dev/null @@ -1 +0,0 @@ -{"architectures": ["LlamaForCausalLM"], "attention_bias": false, "attention_dropout": 0.0, "bos_token_id": 1, "eos_token_id": 2, "hidden_act": "silu", "hidden_size": 4096, "initializer_range": 0.02, "intermediate_size": 11008, "max_position_embeddings": 4096, "model_type": "llama", "neuron": {"auto_cast_type": "fp16", "batch_size": 1, "checkpoint_id": "meta-llama/Llama-2-7b-chat-hf", "checkpoint_revision": "f5db02db724555f92da89c216ac04704f23d4590", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 24, "sequence_length": 4096, "task": "text-generation"}, "num_attention_heads": 32, "num_hidden_layers": 32, "num_key_value_heads": 32, "pretraining_tp": 1, "rms_norm_eps": 1e-05, "rope_scaling": null, "rope_theta": 10000.0, "tie_word_embeddings": false, "torch_dtype": "float16", "use_cache": true, "vocab_size": 32000} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/b4d9aa50677e27d82b4f.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/b4d9aa50677e27d82b4f.json deleted file mode 100644 index bd060a20d92c4974f65ec96e28e8e5b5beb069dc..0000000000000000000000000000000000000000 --- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/b4d9aa50677e27d82b4f.json +++ /dev/null @@ -1 +0,0 @@ -{"architectures": ["LlamaForCausalLM"], "attention_bias": false, "attention_dropout": 0.0, "bos_token_id": 1, "eos_token_id": 2, "hidden_act": "silu", "hidden_size": 4096, "initializer_range": 0.02, "intermediate_size": 11008, "max_position_embeddings": 4096, "model_type": "llama", "neuron": {"auto_cast_type": "fp16", "batch_size": 8, "checkpoint_id": "meta-llama/Llama-2-7b-chat-hf", "checkpoint_revision": "f5db02db724555f92da89c216ac04704f23d4590", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 24, "sequence_length": 4096, "task": "text-generation"}, "num_attention_heads": 32, "num_hidden_layers": 32, "num_key_value_heads": 32, "pretraining_tp": 1, "rms_norm_eps": 1e-05, "rope_scaling": null, "rope_theta": 10000.0, "tie_word_embeddings": false, "torch_dtype": "float16", "use_cache": true, "vocab_size": 32000} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/b57ff0785cfe94aa0718.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/b57ff0785cfe94aa0718.json deleted file mode 100644 index 2b6e397c7e484529f5949af0356e66bc6ebdc5f8..0000000000000000000000000000000000000000 --- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/b57ff0785cfe94aa0718.json +++ /dev/null @@ -1 +0,0 @@ -{"architectures": ["LlamaForCausalLM"], "attention_bias": false, "attention_dropout": 0.0, "bos_token_id": 1, "eos_token_id": 2, "hidden_act": "silu", "hidden_size": 4096, "initializer_range": 0.02, "intermediate_size": 11008, "max_position_embeddings": 4096, "model_type": "llama", "neuron": {"auto_cast_type": "fp16", "batch_size": 16, "checkpoint_id": "meta-llama/Llama-2-7b-chat-hf", "checkpoint_revision": "f5db02db724555f92da89c216ac04704f23d4590", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 24, "sequence_length": 4096, "task": "text-generation"}, "num_attention_heads": 32, "num_hidden_layers": 32, "num_key_value_heads": 32, "pretraining_tp": 1, "rms_norm_eps": 1e-05, "rope_scaling": null, "rope_theta": 10000.0, "tie_word_embeddings": false, "torch_dtype": "float16", "use_cache": true, "vocab_size": 32000} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/be28f38f27aeb510e1c8.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/be28f38f27aeb510e1c8.json deleted file mode 100644 index 98b15072809d09c2c0775e9a67c2a568349bd44f..0000000000000000000000000000000000000000 --- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/be28f38f27aeb510e1c8.json +++ /dev/null @@ -1 +0,0 @@ -{"architectures": ["LlamaForCausalLM"], "attention_bias": false, "attention_dropout": 0.0, "bos_token_id": 1, "eos_token_id": 2, "hidden_act": "silu", "hidden_size": 4096, "initializer_range": 0.02, "intermediate_size": 11008, "max_position_embeddings": 4096, "model_type": "llama", "neuron": {"auto_cast_type": "fp16", "batch_size": 1, "checkpoint_id": "meta-llama/Llama-2-7b-chat-hf", "checkpoint_revision": "f5db02db724555f92da89c216ac04704f23d4590", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 8, "sequence_length": 4096, "task": "text-generation"}, "num_attention_heads": 32, "num_hidden_layers": 32, "num_key_value_heads": 32, "pretraining_tp": 1, "rms_norm_eps": 1e-05, "rope_scaling": null, "rope_theta": 10000.0, "tie_word_embeddings": false, "torch_dtype": "float16", "use_cache": true, "vocab_size": 32000} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/c9527c01253d9424170a.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/c9527c01253d9424170a.json deleted file mode 100644 index 1159af91d9925605b2cda20f79a746017a19585f..0000000000000000000000000000000000000000 --- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/c9527c01253d9424170a.json +++ /dev/null @@ -1 +0,0 @@ -{"architectures": ["LlamaForCausalLM"], "attention_bias": false, "attention_dropout": 0.0, "bos_token_id": 1, "eos_token_id": 2, "hidden_act": "silu", "hidden_size": 4096, "initializer_range": 0.02, "intermediate_size": 11008, "max_position_embeddings": 4096, "model_type": "llama", "neuron": {"auto_cast_type": "fp16", "batch_size": 4, "checkpoint_id": "meta-llama/Llama-2-7b-chat-hf", "checkpoint_revision": "f5db02db724555f92da89c216ac04704f23d4590", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 8, "sequence_length": 4096, "task": "text-generation"}, "num_attention_heads": 32, "num_hidden_layers": 32, "num_key_value_heads": 32, "pretraining_tp": 1, "rms_norm_eps": 1e-05, "rope_scaling": null, "rope_theta": 10000.0, "tie_word_embeddings": false, "torch_dtype": "float16", "use_cache": true, "vocab_size": 32000} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/d6a72bcc14cde1f0364f.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/d6a72bcc14cde1f0364f.json deleted file mode 100644 index a534595e8bd18fa3b36c1147893bbbe0f3728bd2..0000000000000000000000000000000000000000 --- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/d6a72bcc14cde1f0364f.json +++ /dev/null @@ -1 +0,0 @@ -{"architectures": ["LlamaForCausalLM"], "attention_bias": false, "attention_dropout": 0.0, "bos_token_id": 1, "eos_token_id": 2, "hidden_act": "silu", "hidden_size": 4096, "initializer_range": 0.02, "intermediate_size": 11008, "max_position_embeddings": 4096, "model_type": "llama", "neuron": {"auto_cast_type": "fp16", "batch_size": 32, "checkpoint_id": "meta-llama/Llama-2-7b-chat-hf", "checkpoint_revision": "f5db02db724555f92da89c216ac04704f23d4590", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 8, "sequence_length": 4096, "task": "text-generation"}, "num_attention_heads": 32, "num_hidden_layers": 32, "num_key_value_heads": 32, "pretraining_tp": 1, "rms_norm_eps": 1e-05, "rope_scaling": null, "rope_theta": 10000.0, "tie_word_embeddings": false, "torch_dtype": "float16", "use_cache": true, "vocab_size": 32000} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/f6a3964311a50e56da2e.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/f6a3964311a50e56da2e.json deleted file mode 100644 index c01a8530e1f617e349af70eb006b3d522035e8b9..0000000000000000000000000000000000000000 --- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/f6a3964311a50e56da2e.json +++ /dev/null @@ -1 +0,0 @@ -{"architectures": ["LlamaForCausalLM"], "attention_bias": false, "attention_dropout": 0.0, "bos_token_id": 1, "eos_token_id": 2, "hidden_act": "silu", "hidden_size": 4096, "initializer_range": 0.02, "intermediate_size": 11008, "max_position_embeddings": 4096, "model_type": "llama", "neuron": {"auto_cast_type": "fp16", "batch_size": 8, "checkpoint_id": "meta-llama/Llama-2-7b-chat-hf", "checkpoint_revision": "f5db02db724555f92da89c216ac04704f23d4590", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 8, "sequence_length": 4096, "task": "text-generation"}, "num_attention_heads": 32, "num_hidden_layers": 32, "num_key_value_heads": 32, "pretraining_tp": 1, "rms_norm_eps": 1e-05, "rope_scaling": null, "rope_theta": 10000.0, "tie_word_embeddings": false, "torch_dtype": "float16", "use_cache": true, "vocab_size": 32000} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/meta-llama/Meta-Llama-3-8B-Instruct/8991f74549273593691d.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/meta-llama/Meta-Llama-3-8B-Instruct/8991f74549273593691d.json deleted file mode 100644 index 4bec867efff0c897db1645f921c78af31fc917c5..0000000000000000000000000000000000000000 --- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/meta-llama/Meta-Llama-3-8B-Instruct/8991f74549273593691d.json +++ /dev/null @@ -1 +0,0 @@ -{"architectures": ["LlamaForCausalLM"], "attention_bias": false, "attention_dropout": 0.0, "bos_token_id": 128000, "eos_token_id": 128001, "hidden_act": "silu", "hidden_size": 4096, "initializer_range": 0.02, "intermediate_size": 14336, "max_position_embeddings": 8192, "model_type": "llama", "neuron": {"auto_cast_type": "bf16", "batch_size": 4, "checkpoint_id": "meta-llama/Meta-Llama-3-8B-Instruct", "checkpoint_revision": "339ce92d052f002cdbac4a4bd551d1c61dd8345e", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 2, "sequence_length": 4096, "task": "text-generation"}, "num_attention_heads": 32, "num_hidden_layers": 32, "num_key_value_heads": 8, "pretraining_tp": 1, "rms_norm_eps": 1e-05, "rope_scaling": null, "rope_theta": 500000.0, "tie_word_embeddings": false, "torch_dtype": "bfloat16", "use_cache": true, "vocab_size": 128256} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/meta-llama/Meta-Llama-3-8B/09e30e2742ba4beafc57.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/meta-llama/Meta-Llama-3-8B/09e30e2742ba4beafc57.json deleted file mode 100644 index c272cf5a2d197e2aaed7523119e6386abc78b45b..0000000000000000000000000000000000000000 --- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/meta-llama/Meta-Llama-3-8B/09e30e2742ba4beafc57.json +++ /dev/null @@ -1 +0,0 @@ -{"architectures": ["LlamaForCausalLM"], "attention_bias": false, "attention_dropout": 0.0, "bos_token_id": 128000, "eos_token_id": 128001, "hidden_act": "silu", "hidden_size": 4096, "initializer_range": 0.02, "intermediate_size": 14336, "max_position_embeddings": 8192, "model_type": "llama", "neuron": {"auto_cast_type": "fp16", "batch_size": 1, "checkpoint_id": "meta-llama/Meta-Llama-3-8B", "checkpoint_revision": "1460c22666392e470910ce3d44ffeb2ab7dbd4df", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 2, "sequence_length": 4096, "task": "text-generation"}, "num_attention_heads": 32, "num_hidden_layers": 32, "num_key_value_heads": 8, "pretraining_tp": 1, "rms_norm_eps": 1e-05, "rope_scaling": null, "rope_theta": 500000.0, "tie_word_embeddings": false, "torch_dtype": "bfloat16", "use_cache": true, "vocab_size": 128256} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/meta-llama/Meta-Llama-3-8B/26575c75a97054312245.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/meta-llama/Meta-Llama-3-8B/26575c75a97054312245.json deleted file mode 100644 index 9fc0977243f923de8a9216e9c5592e44898638cf..0000000000000000000000000000000000000000 --- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/meta-llama/Meta-Llama-3-8B/26575c75a97054312245.json +++ /dev/null @@ -1 +0,0 @@ -{"architectures": ["LlamaForCausalLM"], "attention_bias": false, "attention_dropout": 0.0, "bos_token_id": 128000, "eos_token_id": 128001, "hidden_act": "silu", "hidden_size": 4096, "initializer_range": 0.02, "intermediate_size": 14336, "max_position_embeddings": 8192, "model_type": "llama", "neuron": {"auto_cast_type": "fp16", "batch_size": 48, "checkpoint_id": "meta-llama/Meta-Llama-3-8B", "checkpoint_revision": "1460c22666392e470910ce3d44ffeb2ab7dbd4df", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 12, "sequence_length": 4096, "task": "text-generation"}, "num_attention_heads": 32, "num_hidden_layers": 32, "num_key_value_heads": 8, "pretraining_tp": 1, "rms_norm_eps": 1e-05, "rope_scaling": null, "rope_theta": 500000.0, "tie_word_embeddings": false, "torch_dtype": "bfloat16", "use_cache": true, "vocab_size": 128256} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/meta-llama/Meta-Llama-3-8B/43bdf99803590a3bbce8.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/meta-llama/Meta-Llama-3-8B/43bdf99803590a3bbce8.json deleted file mode 100644 index 46182f0254822974389e0b8b0fa10ce03d8e80f0..0000000000000000000000000000000000000000 --- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/meta-llama/Meta-Llama-3-8B/43bdf99803590a3bbce8.json +++ /dev/null @@ -1 +0,0 @@ -{"architectures": ["LlamaForCausalLM"], "attention_bias": false, "attention_dropout": 0.0, "bos_token_id": 128000, "eos_token_id": 128001, "hidden_act": "silu", "hidden_size": 4096, "initializer_range": 0.02, "intermediate_size": 14336, "max_position_embeddings": 8192, "model_type": "llama", "neuron": {"auto_cast_type": "fp16", "batch_size": 8, "checkpoint_id": "meta-llama/Meta-Llama-3-8B", "checkpoint_revision": "1460c22666392e470910ce3d44ffeb2ab7dbd4df", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 2, "sequence_length": 4096, "task": "text-generation"}, "num_attention_heads": 32, "num_hidden_layers": 32, "num_key_value_heads": 8, "pretraining_tp": 1, "rms_norm_eps": 1e-05, "rope_scaling": null, "rope_theta": 500000.0, "tie_word_embeddings": false, "torch_dtype": "bfloat16", "use_cache": true, "vocab_size": 128256} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/meta-llama/Meta-Llama-3-8B/48cfba5e4ff5369b6e51.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/meta-llama/Meta-Llama-3-8B/48cfba5e4ff5369b6e51.json deleted file mode 100644 index ee63424b9b90326027a426ddbf5bede893e5500c..0000000000000000000000000000000000000000 --- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/meta-llama/Meta-Llama-3-8B/48cfba5e4ff5369b6e51.json +++ /dev/null @@ -1 +0,0 @@ -{"architectures": ["LlamaForCausalLM"], "attention_bias": false, "attention_dropout": 0.0, "bos_token_id": 128000, "eos_token_id": 128001, "hidden_act": "silu", "hidden_size": 4096, "initializer_range": 0.02, "intermediate_size": 14336, "max_position_embeddings": 8192, "model_type": "llama", "neuron": {"auto_cast_type": "fp16", "batch_size": 32, "checkpoint_id": "meta-llama/Meta-Llama-3-8B", "checkpoint_revision": "1460c22666392e470910ce3d44ffeb2ab7dbd4df", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 8, "sequence_length": 4096, "task": "text-generation"}, "num_attention_heads": 32, "num_hidden_layers": 32, "num_key_value_heads": 8, "pretraining_tp": 1, "rms_norm_eps": 1e-05, "rope_scaling": null, "rope_theta": 500000.0, "tie_word_embeddings": false, "torch_dtype": "bfloat16", "use_cache": true, "vocab_size": 128256} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/meta-llama/Meta-Llama-3-8B/62340426bd628112eec5.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/meta-llama/Meta-Llama-3-8B/62340426bd628112eec5.json deleted file mode 100644 index 52ee0b5fa35664903f55c7d744b6e65a29cd7abd..0000000000000000000000000000000000000000 --- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/meta-llama/Meta-Llama-3-8B/62340426bd628112eec5.json +++ /dev/null @@ -1 +0,0 @@ -{"architectures": ["LlamaForCausalLM"], "attention_bias": false, "attention_dropout": 0.0, "bos_token_id": 128000, "eos_token_id": 128001, "hidden_act": "silu", "hidden_size": 4096, "initializer_range": 0.02, "intermediate_size": 14336, "max_position_embeddings": 8192, "model_type": "llama", "neuron": {"auto_cast_type": "bf16", "batch_size": 4, "checkpoint_id": "meta-llama/Meta-Llama-3-8B", "checkpoint_revision": "1460c22666392e470910ce3d44ffeb2ab7dbd4df", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 8, "sequence_length": 4096, "task": "text-generation"}, "num_attention_heads": 32, "num_hidden_layers": 32, "num_key_value_heads": 8, "pretraining_tp": 1, "rms_norm_eps": 1e-05, "rope_scaling": null, "rope_theta": 500000.0, "tie_word_embeddings": false, "torch_dtype": "bfloat16", "use_cache": true, "vocab_size": 128256} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/meta-llama/Meta-Llama-3-8B/638108a35a53ccc460bd.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/meta-llama/Meta-Llama-3-8B/638108a35a53ccc460bd.json deleted file mode 100644 index 940cac882a6a7b082ea99b14f97961fb3d0651a2..0000000000000000000000000000000000000000 --- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/meta-llama/Meta-Llama-3-8B/638108a35a53ccc460bd.json +++ /dev/null @@ -1 +0,0 @@ -{"architectures": ["LlamaForCausalLM"], "attention_bias": false, "attention_dropout": 0.0, "bos_token_id": 128000, "eos_token_id": 128001, "hidden_act": "silu", "hidden_size": 4096, "initializer_range": 0.02, "intermediate_size": 14336, "max_position_embeddings": 8192, "model_type": "llama", "neuron": {"auto_cast_type": "fp16", "batch_size": 1, "checkpoint_id": "meta-llama/Meta-Llama-3-8B", "checkpoint_revision": "1460c22666392e470910ce3d44ffeb2ab7dbd4df", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 8, "sequence_length": 4096, "task": "text-generation"}, "num_attention_heads": 32, "num_hidden_layers": 32, "num_key_value_heads": 8, "pretraining_tp": 1, "rms_norm_eps": 1e-05, "rope_scaling": null, "rope_theta": 500000.0, "tie_word_embeddings": false, "torch_dtype": "bfloat16", "use_cache": true, "vocab_size": 128256} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/meta-llama/Meta-Llama-3-8B/75d01b56a778419c897d.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/meta-llama/Meta-Llama-3-8B/75d01b56a778419c897d.json deleted file mode 100644 index af83b7299e6401c133bdf29518d09590b5967bd4..0000000000000000000000000000000000000000 --- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/meta-llama/Meta-Llama-3-8B/75d01b56a778419c897d.json +++ /dev/null @@ -1 +0,0 @@ -{"architectures": ["LlamaForCausalLM"], "attention_bias": false, "attention_dropout": 0.0, "bos_token_id": 128000, "eos_token_id": 128001, "hidden_act": "silu", "hidden_size": 4096, "initializer_range": 0.02, "intermediate_size": 14336, "max_position_embeddings": 8192, "model_type": "llama", "neuron": {"auto_cast_type": "bf16", "batch_size": 4, "checkpoint_id": "meta-llama/Meta-Llama-3-8B", "checkpoint_revision": "cd892e8f4da1043d4b01d5ea182a2e8412bf658f", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 2, "sequence_length": 4096, "task": "text-generation"}, "num_attention_heads": 32, "num_hidden_layers": 32, "num_key_value_heads": 8, "pretraining_tp": 1, "rms_norm_eps": 1e-05, "rope_scaling": null, "rope_theta": 500000.0, "tie_word_embeddings": false, "torch_dtype": "bfloat16", "use_cache": true, "vocab_size": 128256} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/meta-llama/Meta-Llama-3-8B/77bf56a610a467c3b01c.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/meta-llama/Meta-Llama-3-8B/77bf56a610a467c3b01c.json deleted file mode 100644 index aef7ffe61cc961dd567f4e62c8152a20af28686a..0000000000000000000000000000000000000000 --- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/meta-llama/Meta-Llama-3-8B/77bf56a610a467c3b01c.json +++ /dev/null @@ -1 +0,0 @@ -{"architectures": ["LlamaForCausalLM"], "attention_bias": false, "attention_dropout": 0.0, "bos_token_id": 128000, "eos_token_id": 128001, "hidden_act": "silu", "hidden_size": 4096, "initializer_range": 0.02, "intermediate_size": 14336, "max_position_embeddings": 8192, "model_type": "llama", "neuron": {"auto_cast_type": "fp16", "batch_size": 4, "checkpoint_id": "meta-llama/Meta-Llama-3-8B", "checkpoint_revision": "1460c22666392e470910ce3d44ffeb2ab7dbd4df", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 8, "sequence_length": 4096, "task": "text-generation"}, "num_attention_heads": 32, "num_hidden_layers": 32, "num_key_value_heads": 8, "pretraining_tp": 1, "rms_norm_eps": 1e-05, "rope_scaling": null, "rope_theta": 500000.0, "tie_word_embeddings": false, "torch_dtype": "bfloat16", "use_cache": true, "vocab_size": 128256} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/meta-llama/Meta-Llama-3-8B/7b660ccd0835c30eeb1e.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/meta-llama/Meta-Llama-3-8B/7b660ccd0835c30eeb1e.json deleted file mode 100644 index 25004a4918d3b7343233b075467ccf026071ff78..0000000000000000000000000000000000000000 --- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/meta-llama/Meta-Llama-3-8B/7b660ccd0835c30eeb1e.json +++ /dev/null @@ -1 +0,0 @@ -{"architectures": ["LlamaForCausalLM"], "attention_bias": false, "attention_dropout": 0.0, "bos_token_id": 128000, "eos_token_id": 128001, "hidden_act": "silu", "hidden_size": 4096, "initializer_range": 0.02, "intermediate_size": 14336, "max_position_embeddings": 8192, "model_type": "llama", "neuron": {"auto_cast_type": "bf16", "batch_size": 32, "checkpoint_id": "meta-llama/Meta-Llama-3-8B", "checkpoint_revision": "1460c22666392e470910ce3d44ffeb2ab7dbd4df", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 8, "sequence_length": 4096, "task": "text-generation"}, "num_attention_heads": 32, "num_hidden_layers": 32, "num_key_value_heads": 8, "pretraining_tp": 1, "rms_norm_eps": 1e-05, "rope_scaling": null, "rope_theta": 500000.0, "tie_word_embeddings": false, "torch_dtype": "bfloat16", "use_cache": true, "vocab_size": 128256} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/meta-llama/Meta-Llama-3-8B/7cf5159a98ef6aa54442.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/meta-llama/Meta-Llama-3-8B/7cf5159a98ef6aa54442.json deleted file mode 100644 index 9e28b64126abc0c2559ef6ec25521c49c8fc7c52..0000000000000000000000000000000000000000 --- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/meta-llama/Meta-Llama-3-8B/7cf5159a98ef6aa54442.json +++ /dev/null @@ -1 +0,0 @@ -{"architectures": ["LlamaForCausalLM"], "attention_bias": false, "attention_dropout": 0.0, "bos_token_id": 128000, "eos_token_id": 128001, "hidden_act": "silu", "hidden_size": 4096, "initializer_range": 0.02, "intermediate_size": 14336, "max_position_embeddings": 8192, "model_type": "llama", "neuron": {"auto_cast_type": "bf16", "batch_size": 4, "checkpoint_id": "meta-llama/Meta-Llama-3-8B", "checkpoint_revision": "561487d18c41c76bcb5fc6cfb73a324982f04f47", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 2, "sequence_length": 4096, "task": "text-generation"}, "num_attention_heads": 32, "num_hidden_layers": 32, "num_key_value_heads": 8, "pretraining_tp": 1, "rms_norm_eps": 1e-05, "rope_scaling": null, "rope_theta": 500000.0, "tie_word_embeddings": false, "torch_dtype": "bfloat16", "use_cache": true, "vocab_size": 128256} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/meta-llama/Meta-Llama-3-8B/922a2f19b0e3177426d7.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/meta-llama/Meta-Llama-3-8B/922a2f19b0e3177426d7.json deleted file mode 100644 index 532bc11745f4f58eb4ebf46dcb6ea4563ff90ea8..0000000000000000000000000000000000000000 --- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/meta-llama/Meta-Llama-3-8B/922a2f19b0e3177426d7.json +++ /dev/null @@ -1 +0,0 @@ -{"architectures": ["LlamaForCausalLM"], "attention_bias": false, "attention_dropout": 0.0, "bos_token_id": 128000, "eos_token_id": 128001, "hidden_act": "silu", "hidden_size": 4096, "initializer_range": 0.02, "intermediate_size": 14336, "max_position_embeddings": 8192, "model_type": "llama", "neuron": {"auto_cast_type": "fp16", "batch_size": 4, "checkpoint_id": "meta-llama/Meta-Llama-3-8B", "checkpoint_revision": "1460c22666392e470910ce3d44ffeb2ab7dbd4df", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 2, "sequence_length": 4096, "task": "text-generation"}, "num_attention_heads": 32, "num_hidden_layers": 32, "num_key_value_heads": 8, "pretraining_tp": 1, "rms_norm_eps": 1e-05, "rope_scaling": null, "rope_theta": 500000.0, "tie_word_embeddings": false, "torch_dtype": "bfloat16", "use_cache": true, "vocab_size": 128256} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/meta-llama/Meta-Llama-3-8B/97351cfb2a7ab3fe894d.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/meta-llama/Meta-Llama-3-8B/97351cfb2a7ab3fe894d.json deleted file mode 100644 index c6d6ea64670531e795e7391df33d9fdf99969967..0000000000000000000000000000000000000000 --- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/meta-llama/Meta-Llama-3-8B/97351cfb2a7ab3fe894d.json +++ /dev/null @@ -1 +0,0 @@ -{"architectures": ["LlamaForCausalLM"], "attention_bias": false, "attention_dropout": 0.0, "bos_token_id": 128000, "eos_token_id": 128001, "hidden_act": "silu", "hidden_size": 4096, "initializer_range": 0.02, "intermediate_size": 14336, "max_position_embeddings": 8192, "model_type": "llama", "neuron": {"auto_cast_type": "fp16", "batch_size": 8, "checkpoint_id": "meta-llama/Meta-Llama-3-8B", "checkpoint_revision": "1460c22666392e470910ce3d44ffeb2ab7dbd4df", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 8, "sequence_length": 4096, "task": "text-generation"}, "num_attention_heads": 32, "num_hidden_layers": 32, "num_key_value_heads": 8, "pretraining_tp": 1, "rms_norm_eps": 1e-05, "rope_scaling": null, "rope_theta": 500000.0, "tie_word_embeddings": false, "torch_dtype": "bfloat16", "use_cache": true, "vocab_size": 128256} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/meta-llama/Meta-Llama-3-8B/97cabd12f94a6f9c5c12.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/meta-llama/Meta-Llama-3-8B/97cabd12f94a6f9c5c12.json deleted file mode 100644 index 9032f1913d5244348ec64b9adc3bb40343dc5dc1..0000000000000000000000000000000000000000 --- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/meta-llama/Meta-Llama-3-8B/97cabd12f94a6f9c5c12.json +++ /dev/null @@ -1 +0,0 @@ -{"architectures": ["LlamaForCausalLM"], "attention_bias": false, "attention_dropout": 0.0, "bos_token_id": 128000, "eos_token_id": 128001, "hidden_act": "silu", "hidden_size": 4096, "initializer_range": 0.02, "intermediate_size": 14336, "max_position_embeddings": 8192, "model_type": "llama", "neuron": {"auto_cast_type": "bf16", "batch_size": 8, "checkpoint_id": "meta-llama/Meta-Llama-3-8B", "checkpoint_revision": "1460c22666392e470910ce3d44ffeb2ab7dbd4df", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 8, "sequence_length": 4096, "task": "text-generation"}, "num_attention_heads": 32, "num_hidden_layers": 32, "num_key_value_heads": 8, "pretraining_tp": 1, "rms_norm_eps": 1e-05, "rope_scaling": null, "rope_theta": 500000.0, "tie_word_embeddings": false, "torch_dtype": "bfloat16", "use_cache": true, "vocab_size": 128256} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/meta-llama/Meta-Llama-3-8B/9968d9926d4314f57474.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/meta-llama/Meta-Llama-3-8B/9968d9926d4314f57474.json deleted file mode 100644 index d3b58e889a225c256577819df95068797b32208b..0000000000000000000000000000000000000000 --- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/meta-llama/Meta-Llama-3-8B/9968d9926d4314f57474.json +++ /dev/null @@ -1 +0,0 @@ -{"architectures": ["LlamaForCausalLM"], "attention_bias": false, "attention_dropout": 0.0, "bos_token_id": 128000, "eos_token_id": 128001, "hidden_act": "silu", "hidden_size": 4096, "initializer_range": 0.02, "intermediate_size": 14336, "max_position_embeddings": 8192, "model_type": "llama", "neuron": {"auto_cast_type": "bf16", "batch_size": 16, "checkpoint_id": "meta-llama/Meta-Llama-3-8B", "checkpoint_revision": "1460c22666392e470910ce3d44ffeb2ab7dbd4df", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 8, "sequence_length": 4096, "task": "text-generation"}, "num_attention_heads": 32, "num_hidden_layers": 32, "num_key_value_heads": 8, "pretraining_tp": 1, "rms_norm_eps": 1e-05, "rope_scaling": null, "rope_theta": 500000.0, "tie_word_embeddings": false, "torch_dtype": "bfloat16", "use_cache": true, "vocab_size": 128256} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/meta-llama/Meta-Llama-3-8B/ab4a6c788d6b461d4216.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/meta-llama/Meta-Llama-3-8B/ab4a6c788d6b461d4216.json deleted file mode 100644 index 65cd1c13c12870ce2ab8a5ac946fb7e48937164f..0000000000000000000000000000000000000000 --- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/meta-llama/Meta-Llama-3-8B/ab4a6c788d6b461d4216.json +++ /dev/null @@ -1 +0,0 @@ -{"architectures": ["LlamaForCausalLM"], "attention_bias": false, "attention_dropout": 0.0, "bos_token_id": 128000, "eos_token_id": 128001, "hidden_act": "silu", "hidden_size": 4096, "initializer_range": 0.02, "intermediate_size": 14336, "max_position_embeddings": 8192, "model_type": "llama", "neuron": {"auto_cast_type": "fp16", "batch_size": 1, "checkpoint_id": "meta-llama/Meta-Llama-3-8B", "checkpoint_revision": "b6887ce03ea47d068bf8502ba6ed27f8c5c12a6b", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 8, "sequence_length": 4096, "task": "text-generation"}, "num_attention_heads": 32, "num_hidden_layers": 32, "num_key_value_heads": 8, "pretraining_tp": 1, "rms_norm_eps": 1e-05, "rope_scaling": null, "rope_theta": 500000.0, "tie_word_embeddings": false, "torch_dtype": "bfloat16", "use_cache": true, "vocab_size": 128256} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/meta-llama/Meta-Llama-3-8B/cc0ac0def4b0fb5bfcff.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/meta-llama/Meta-Llama-3-8B/cc0ac0def4b0fb5bfcff.json deleted file mode 100644 index 7b52276890362affc224e1fcf4877a62a3ae43d8..0000000000000000000000000000000000000000 --- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/meta-llama/Meta-Llama-3-8B/cc0ac0def4b0fb5bfcff.json +++ /dev/null @@ -1 +0,0 @@ -{"architectures": ["LlamaForCausalLM"], "attention_bias": false, "attention_dropout": 0.0, "bos_token_id": 128000, "eos_token_id": 128001, "hidden_act": "silu", "hidden_size": 4096, "initializer_range": 0.02, "intermediate_size": 14336, "max_position_embeddings": 8192, "model_type": "llama", "neuron": {"auto_cast_type": "bf16", "batch_size": 1, "checkpoint_id": "meta-llama/Meta-Llama-3-8B", "checkpoint_revision": "1460c22666392e470910ce3d44ffeb2ab7dbd4df", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 8, "sequence_length": 4096, "task": "text-generation"}, "num_attention_heads": 32, "num_hidden_layers": 32, "num_key_value_heads": 8, "pretraining_tp": 1, "rms_norm_eps": 1e-05, "rope_scaling": null, "rope_theta": 500000.0, "tie_word_embeddings": false, "torch_dtype": "bfloat16", "use_cache": true, "vocab_size": 128256} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/meta-llama/Meta-Llama-3-8B/ce04f3fc57ef52f7e7bd.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/meta-llama/Meta-Llama-3-8B/ce04f3fc57ef52f7e7bd.json deleted file mode 100644 index 3ed24eb1f057a8b1449b39f15ec679ed554d5a14..0000000000000000000000000000000000000000 --- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/meta-llama/Meta-Llama-3-8B/ce04f3fc57ef52f7e7bd.json +++ /dev/null @@ -1 +0,0 @@ -{"architectures": ["LlamaForCausalLM"], "attention_bias": false, "attention_dropout": 0.0, "bos_token_id": 128000, "eos_token_id": 128001, "hidden_act": "silu", "hidden_size": 4096, "initializer_range": 0.02, "intermediate_size": 14336, "max_position_embeddings": 8192, "model_type": "llama", "neuron": {"auto_cast_type": "fp16", "batch_size": 16, "checkpoint_id": "meta-llama/Meta-Llama-3-8B", "checkpoint_revision": "1460c22666392e470910ce3d44ffeb2ab7dbd4df", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 2, "sequence_length": 4096, "task": "text-generation"}, "num_attention_heads": 32, "num_hidden_layers": 32, "num_key_value_heads": 8, "pretraining_tp": 1, "rms_norm_eps": 1e-05, "rope_scaling": null, "rope_theta": 500000.0, "tie_word_embeddings": false, "torch_dtype": "bfloat16", "use_cache": true, "vocab_size": 128256} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/meta-llama/Meta-Llama-3-8B/d8d190c62c0b163c34d7.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/meta-llama/Meta-Llama-3-8B/d8d190c62c0b163c34d7.json deleted file mode 100644 index 71d7112d8d2da6e9b44e5d4776f3318ba1f684ee..0000000000000000000000000000000000000000 --- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/meta-llama/Meta-Llama-3-8B/d8d190c62c0b163c34d7.json +++ /dev/null @@ -1 +0,0 @@ -{"architectures": ["LlamaForCausalLM"], "attention_bias": false, "attention_dropout": 0.0, "bos_token_id": 128000, "eos_token_id": 128001, "hidden_act": "silu", "hidden_size": 4096, "initializer_range": 0.02, "intermediate_size": 14336, "max_position_embeddings": 8192, "model_type": "llama", "neuron": {"auto_cast_type": "fp16", "batch_size": 48, "checkpoint_id": "meta-llama/Meta-Llama-3-8B", "checkpoint_revision": "1460c22666392e470910ce3d44ffeb2ab7dbd4df", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 8, "sequence_length": 4096, "task": "text-generation"}, "num_attention_heads": 32, "num_hidden_layers": 32, "num_key_value_heads": 8, "pretraining_tp": 1, "rms_norm_eps": 1e-05, "rope_scaling": null, "rope_theta": 500000.0, "tie_word_embeddings": false, "torch_dtype": "bfloat16", "use_cache": true, "vocab_size": 128256} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/meta-llama/Meta-Llama-3-8B/e8097cf453d786f97d26.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/meta-llama/Meta-Llama-3-8B/e8097cf453d786f97d26.json deleted file mode 100644 index aa6f07ff21605176519e9673a97505517d13559d..0000000000000000000000000000000000000000 --- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/meta-llama/Meta-Llama-3-8B/e8097cf453d786f97d26.json +++ /dev/null @@ -1 +0,0 @@ -{"architectures": ["LlamaForCausalLM"], "attention_bias": false, "attention_dropout": 0.0, "bos_token_id": 128000, "eos_token_id": 128001, "hidden_act": "silu", "hidden_size": 4096, "initializer_range": 0.02, "intermediate_size": 14336, "max_position_embeddings": 8192, "model_type": "llama", "neuron": {"auto_cast_type": "fp16", "batch_size": 16, "checkpoint_id": "meta-llama/Meta-Llama-3-8B", "checkpoint_revision": "1460c22666392e470910ce3d44ffeb2ab7dbd4df", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 8, "sequence_length": 4096, "task": "text-generation"}, "num_attention_heads": 32, "num_hidden_layers": 32, "num_key_value_heads": 8, "pretraining_tp": 1, "rms_norm_eps": 1e-05, "rope_scaling": null, "rope_theta": 500000.0, "tie_word_embeddings": false, "torch_dtype": "bfloat16", "use_cache": true, "vocab_size": 128256} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/princeton-nlp/Sheared-LLaMA-1.3B/f1bd1033a11a1478af9e.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/princeton-nlp/Sheared-LLaMA-1.3B/f1bd1033a11a1478af9e.json deleted file mode 100644 index 5f52b96291d3dc5f5e17bba794dfc2a380988c8b..0000000000000000000000000000000000000000 --- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/llama/princeton-nlp/Sheared-LLaMA-1.3B/f1bd1033a11a1478af9e.json +++ /dev/null @@ -1 +0,0 @@ -{"architectures": ["LlamaForCausalLM"], "attention_bias": false, "attention_dropout": 0.0, "bos_token_id": 1, "eos_token_id": 2, "hidden_act": "silu", "hidden_size": 2048, "initializer_range": 0.02, "intermediate_size": 5504, "max_position_embeddings": 4096, "model_type": "llama", "neuron": {"auto_cast_type": "fp32", "batch_size": 4, "checkpoint_id": "princeton-nlp/Sheared-LLaMA-1.3B", "checkpoint_revision": "a4b76938edbf571ea7d7d9904861cbdca08809b4", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 2, "sequence_length": 1024, "task": "text-generation"}, "num_attention_heads": 16, "num_hidden_layers": 24, "num_key_value_heads": 16, "pad_token_id": 0, "pretraining_tp": 1, "rms_norm_eps": 1e-05, "rope_scaling": null, "rope_theta": 10000.0, "tie_word_embeddings": false, "torch_dtype": "float32", "use_cache": true, "vocab_size": 32000} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/mistral/HuggingFaceH4/zephyr-7b-beta/f4459480a285f358164c.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/mistral/HuggingFaceH4/zephyr-7b-beta/f4459480a285f358164c.json deleted file mode 100644 index 468b1e5a2c25caccb5147c17a9f08de4cbdc38cc..0000000000000000000000000000000000000000 --- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/mistral/HuggingFaceH4/zephyr-7b-beta/f4459480a285f358164c.json +++ /dev/null @@ -1 +0,0 @@ -{"architectures": ["MistralForCausalLM"], "attention_dropout": 0.0, "bos_token_id": 1, "eos_token_id": 2, "hidden_act": "silu", "hidden_size": 4096, "initializer_range": 0.02, "intermediate_size": 14336, "max_position_embeddings": 32768, "model_type": "mistral", "neuron": {"auto_cast_type": "bf16", "batch_size": 4, "checkpoint_id": "HuggingFaceH4/zephyr-7b-beta", "checkpoint_revision": "b70e0c9a2d9e14bd1e812d3c398e5f313e93b473", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 2, "sequence_length": 4096, "task": "text-generation"}, "num_attention_heads": 32, "num_hidden_layers": 32, "num_key_value_heads": 8, "pad_token_id": 2, "rms_norm_eps": 1e-05, "rope_theta": 10000.0, "sliding_window": 4096, "tie_word_embeddings": false, "torch_dtype": "bfloat16", "use_cache": true, "vocab_size": 32000} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/mistral/NucleusOrg/Nucleus-1B-alpha-1/e189713e28900a6fd483.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/mistral/NucleusOrg/Nucleus-1B-alpha-1/e189713e28900a6fd483.json deleted file mode 100644 index 3e562cac1dcd60508b54bef58f90b6c27bd23cf1..0000000000000000000000000000000000000000 --- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/mistral/NucleusOrg/Nucleus-1B-alpha-1/e189713e28900a6fd483.json +++ /dev/null @@ -1 +0,0 @@ -{"architectures": ["MistralForCausalLM"], "attention_dropout": 0.0, "bos_token_id": 1, "eos_token_id": 2, "hidden_act": "silu", "hidden_size": 4096, "initializer_range": 0.02, "intermediate_size": 14336, "max_position_embeddings": 32768, "model_type": "mistral", "neuron": {"auto_cast_type": "fp16", "batch_size": 4, "checkpoint_id": "NucleusOrg/Nucleus-1B-alpha-1", "checkpoint_revision": "f484a4533292b1ba4ba29b9afb3bb07eef813024", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 2, "sequence_length": 2048, "task": "text-generation"}, "num_attention_heads": 32, "num_hidden_layers": 4, "num_key_value_heads": 8, "rms_norm_eps": 1e-05, "rope_theta": 10000.0, "sliding_window": 4096, "tie_word_embeddings": false, "torch_dtype": "float16", "use_cache": true, "vocab_size": 32000} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/mistral/dacorvo/tiny-random-MistralForCausalLM/1cfca4036d7b607639ea.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/mistral/dacorvo/tiny-random-MistralForCausalLM/1cfca4036d7b607639ea.json deleted file mode 100644 index 31af98819e3fe66d4a30ddc20d3b5550febb6388..0000000000000000000000000000000000000000 --- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/mistral/dacorvo/tiny-random-MistralForCausalLM/1cfca4036d7b607639ea.json +++ /dev/null @@ -1 +0,0 @@ -{"architectures": ["MistralForCausalLM"], "attention_dropout": 0.0, "attention_probs_dropout_prob": 0.1, "bos_token_id": 1, "eos_token_id": 2, "hidden_act": "gelu", "hidden_dropout_prob": 0.1, "hidden_size": 32, "initializer_range": 0.02, "intermediate_size": 37, "is_decoder": true, "max_position_embeddings": 512, "model_type": "mistral", "neuron": {"auto_cast_type": "fp32", "batch_size": 2, "checkpoint_id": "dacorvo/tiny-random-MistralForCausalLM", "checkpoint_revision": "81d453e3c8985649e9ee3d4c9378461029d1c73a", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 2, "sequence_length": 100, "task": "text-generation"}, "num_attention_heads": 4, "num_hidden_layers": 2, "num_key_value_heads": 2, "pad_token_id": 0, "rms_norm_eps": 1e-06, "rope_theta": 10000.0, "sliding_window": 4096, "tie_word_embeddings": false, "torch_dtype": "float32", "type_vocab_size": 16, "use_cache": true, "vocab_size": 32000} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/mistral/dacorvo/tiny-random-MistralForCausalLM/8ab25062c2316baf852b.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/mistral/dacorvo/tiny-random-MistralForCausalLM/8ab25062c2316baf852b.json deleted file mode 100644 index 18c3bc66a1499be375bf568c0e83cd1abdc396d5..0000000000000000000000000000000000000000 --- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/mistral/dacorvo/tiny-random-MistralForCausalLM/8ab25062c2316baf852b.json +++ /dev/null @@ -1 +0,0 @@ -{"architectures": ["MistralForCausalLM"], "attention_dropout": 0.0, "attention_probs_dropout_prob": 0.1, "bos_token_id": 1, "eos_token_id": 2, "hidden_act": "gelu", "hidden_dropout_prob": 0.1, "hidden_size": 32, "initializer_range": 0.02, "intermediate_size": 37, "is_decoder": true, "max_position_embeddings": 512, "model_type": "mistral", "neuron": {"auto_cast_type": "fp32", "batch_size": 2, "checkpoint_id": "dacorvo/tiny-random-MistralForCausalLM", "checkpoint_revision": "81d453e3c8985649e9ee3d4c9378461029d1c73a", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 2, "sequence_length": 512, "task": "text-generation"}, "num_attention_heads": 4, "num_hidden_layers": 2, "num_key_value_heads": 2, "pad_token_id": 0, "rms_norm_eps": 1e-06, "rope_theta": 10000.0, "sliding_window": 4096, "tie_word_embeddings": false, "torch_dtype": "float32", "type_vocab_size": 16, "use_cache": true, "vocab_size": 32000} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/mistral/mistralai/Mistral-7B-Instruct-v0.2/1245bb72b32d75db76f9.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/mistral/mistralai/Mistral-7B-Instruct-v0.2/1245bb72b32d75db76f9.json deleted file mode 100644 index bfa483779ffeb478c67a46b021b26beb31d6eab3..0000000000000000000000000000000000000000 --- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/mistral/mistralai/Mistral-7B-Instruct-v0.2/1245bb72b32d75db76f9.json +++ /dev/null @@ -1 +0,0 @@ -{"architectures": ["MistralForCausalLM"], "attention_dropout": 0.0, "bos_token_id": 1, "eos_token_id": 2, "hidden_act": "silu", "hidden_size": 4096, "initializer_range": 0.02, "intermediate_size": 14336, "max_position_embeddings": 32768, "model_type": "mistral", "neuron": {"auto_cast_type": "bf16", "batch_size": 4, "checkpoint_id": "mistralai/Mistral-7B-Instruct-v0.2", "checkpoint_revision": "41b61a33a2483885c981aa79e0df6b32407ed873", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 8, "sequence_length": 4096, "task": "text-generation"}, "num_attention_heads": 32, "num_hidden_layers": 32, "num_key_value_heads": 8, "rms_norm_eps": 1e-05, "rope_theta": 1000000.0, "sliding_window": null, "tie_word_embeddings": false, "torch_dtype": "bfloat16", "use_cache": true, "vocab_size": 32000} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/mistral/mistralai/Mistral-7B-Instruct-v0.2/5e1e1d2ca63370d9d0a0.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/mistral/mistralai/Mistral-7B-Instruct-v0.2/5e1e1d2ca63370d9d0a0.json deleted file mode 100644 index 485dc8ca9c30148eaccef2b6490c260ad8b13bd5..0000000000000000000000000000000000000000 --- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/mistral/mistralai/Mistral-7B-Instruct-v0.2/5e1e1d2ca63370d9d0a0.json +++ /dev/null @@ -1 +0,0 @@ -{"architectures": ["MistralForCausalLM"], "attention_dropout": 0.0, "bos_token_id": 1, "eos_token_id": 2, "hidden_act": "silu", "hidden_size": 4096, "initializer_range": 0.02, "intermediate_size": 14336, "max_position_embeddings": 32768, "model_type": "mistral", "neuron": {"auto_cast_type": "fp16", "batch_size": 8, "checkpoint_id": "mistralai/Mistral-7B-Instruct-v0.2", "checkpoint_revision": "41b61a33a2483885c981aa79e0df6b32407ed873", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 8, "sequence_length": 4096, "task": "text-generation"}, "num_attention_heads": 32, "num_hidden_layers": 32, "num_key_value_heads": 8, "rms_norm_eps": 1e-05, "rope_theta": 1000000.0, "sliding_window": null, "tie_word_embeddings": false, "torch_dtype": "bfloat16", "use_cache": true, "vocab_size": 32000} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/mistral/mistralai/Mistral-7B-Instruct-v0.2/6255cc2875c3d0c5e501.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/mistral/mistralai/Mistral-7B-Instruct-v0.2/6255cc2875c3d0c5e501.json deleted file mode 100644 index 217c62db5b1081a4d73636d896e6709c4408d244..0000000000000000000000000000000000000000 --- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/mistral/mistralai/Mistral-7B-Instruct-v0.2/6255cc2875c3d0c5e501.json +++ /dev/null @@ -1 +0,0 @@ -{"architectures": ["MistralForCausalLM"], "attention_dropout": 0.0, "bos_token_id": 1, "eos_token_id": 2, "hidden_act": "silu", "hidden_size": 4096, "initializer_range": 0.02, "intermediate_size": 14336, "max_position_embeddings": 32768, "model_type": "mistral", "neuron": {"auto_cast_type": "fp16", "batch_size": 32, "checkpoint_id": "mistralai/Mistral-7B-Instruct-v0.2", "checkpoint_revision": "41b61a33a2483885c981aa79e0df6b32407ed873", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 8, "sequence_length": 4096, "task": "text-generation"}, "num_attention_heads": 32, "num_hidden_layers": 32, "num_key_value_heads": 8, "rms_norm_eps": 1e-05, "rope_theta": 1000000.0, "sliding_window": null, "tie_word_embeddings": false, "torch_dtype": "bfloat16", "use_cache": true, "vocab_size": 32000} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/mistral/mistralai/Mistral-7B-Instruct-v0.2/64e0d5cd868faca4549f.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/mistral/mistralai/Mistral-7B-Instruct-v0.2/64e0d5cd868faca4549f.json deleted file mode 100644 index 8ed2a607b2ab3a2d8649610facd0b316dfdcfbc2..0000000000000000000000000000000000000000 --- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/mistral/mistralai/Mistral-7B-Instruct-v0.2/64e0d5cd868faca4549f.json +++ /dev/null @@ -1 +0,0 @@ -{"architectures": ["MistralForCausalLM"], "attention_dropout": 0.0, "bos_token_id": 1, "eos_token_id": 2, "hidden_act": "silu", "hidden_size": 4096, "initializer_range": 0.02, "intermediate_size": 14336, "max_position_embeddings": 32768, "model_type": "mistral", "neuron": {"auto_cast_type": "bf16", "batch_size": 1, "checkpoint_id": "mistralai/Mistral-7B-Instruct-v0.2", "checkpoint_revision": "41b61a33a2483885c981aa79e0df6b32407ed873", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 8, "sequence_length": 4096, "task": "text-generation"}, "num_attention_heads": 32, "num_hidden_layers": 32, "num_key_value_heads": 8, "rms_norm_eps": 1e-05, "rope_theta": 1000000.0, "sliding_window": null, "tie_word_embeddings": false, "torch_dtype": "bfloat16", "use_cache": true, "vocab_size": 32000} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/mistral/mistralai/Mistral-7B-Instruct-v0.2/733bc79383c0cc56537e.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/mistral/mistralai/Mistral-7B-Instruct-v0.2/733bc79383c0cc56537e.json deleted file mode 100644 index bdb28c314300cfdaff23a06b828f00e63c112bcb..0000000000000000000000000000000000000000 --- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/mistral/mistralai/Mistral-7B-Instruct-v0.2/733bc79383c0cc56537e.json +++ /dev/null @@ -1 +0,0 @@ -{"architectures": ["MistralForCausalLM"], "attention_dropout": 0.0, "bos_token_id": 1, "eos_token_id": 2, "hidden_act": "silu", "hidden_size": 4096, "initializer_range": 0.02, "intermediate_size": 14336, "max_position_embeddings": 32768, "model_type": "mistral", "neuron": {"auto_cast_type": "bf16", "batch_size": 16, "checkpoint_id": "mistralai/Mistral-7B-Instruct-v0.2", "checkpoint_revision": "41b61a33a2483885c981aa79e0df6b32407ed873", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 8, "sequence_length": 4096, "task": "text-generation"}, "num_attention_heads": 32, "num_hidden_layers": 32, "num_key_value_heads": 8, "rms_norm_eps": 1e-05, "rope_theta": 1000000.0, "sliding_window": null, "tie_word_embeddings": false, "torch_dtype": "bfloat16", "use_cache": true, "vocab_size": 32000} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/mistral/mistralai/Mistral-7B-Instruct-v0.2/7fd9b858cf45bbf6355f.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/mistral/mistralai/Mistral-7B-Instruct-v0.2/7fd9b858cf45bbf6355f.json deleted file mode 100644 index c8950bdffc2b4bdec2c36c737ea5171918dfb8c3..0000000000000000000000000000000000000000 --- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/mistral/mistralai/Mistral-7B-Instruct-v0.2/7fd9b858cf45bbf6355f.json +++ /dev/null @@ -1 +0,0 @@ -{"architectures": ["MistralForCausalLM"], "attention_dropout": 0.0, "bos_token_id": 1, "eos_token_id": 2, "hidden_act": "silu", "hidden_size": 4096, "initializer_range": 0.02, "intermediate_size": 14336, "max_position_embeddings": 32768, "model_type": "mistral", "neuron": {"auto_cast_type": "fp16", "batch_size": 16, "checkpoint_id": "mistralai/Mistral-7B-Instruct-v0.2", "checkpoint_revision": "41b61a33a2483885c981aa79e0df6b32407ed873", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 8, "sequence_length": 4096, "task": "text-generation"}, "num_attention_heads": 32, "num_hidden_layers": 32, "num_key_value_heads": 8, "rms_norm_eps": 1e-05, "rope_theta": 1000000.0, "sliding_window": null, "tie_word_embeddings": false, "torch_dtype": "bfloat16", "use_cache": true, "vocab_size": 32000} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/mistral/mistralai/Mistral-7B-Instruct-v0.2/979fce2dd4e4d499c2f0.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/mistral/mistralai/Mistral-7B-Instruct-v0.2/979fce2dd4e4d499c2f0.json deleted file mode 100644 index ce02184e9415f39bc22c1b2d0aa0c3cb8b87d812..0000000000000000000000000000000000000000 --- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/mistral/mistralai/Mistral-7B-Instruct-v0.2/979fce2dd4e4d499c2f0.json +++ /dev/null @@ -1 +0,0 @@ -{"architectures": ["MistralForCausalLM"], "attention_dropout": 0.0, "bos_token_id": 1, "eos_token_id": 2, "hidden_act": "silu", "hidden_size": 4096, "initializer_range": 0.02, "intermediate_size": 14336, "max_position_embeddings": 32768, "model_type": "mistral", "neuron": {"auto_cast_type": "fp16", "batch_size": 4, "checkpoint_id": "mistralai/Mistral-7B-Instruct-v0.2", "checkpoint_revision": "41b61a33a2483885c981aa79e0df6b32407ed873", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 8, "sequence_length": 4096, "task": "text-generation"}, "num_attention_heads": 32, "num_hidden_layers": 32, "num_key_value_heads": 8, "rms_norm_eps": 1e-05, "rope_theta": 1000000.0, "sliding_window": null, "tie_word_embeddings": false, "torch_dtype": "bfloat16", "use_cache": true, "vocab_size": 32000} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/mistral/mistralai/Mistral-7B-Instruct-v0.2/aa1ba12921766e16b7dc.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/mistral/mistralai/Mistral-7B-Instruct-v0.2/aa1ba12921766e16b7dc.json deleted file mode 100644 index 95e26a5622939f71cab2a9156495d24fcade9f2c..0000000000000000000000000000000000000000 --- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/mistral/mistralai/Mistral-7B-Instruct-v0.2/aa1ba12921766e16b7dc.json +++ /dev/null @@ -1 +0,0 @@ -{"architectures": ["MistralForCausalLM"], "attention_dropout": 0.0, "bos_token_id": 1, "eos_token_id": 2, "hidden_act": "silu", "hidden_size": 4096, "initializer_range": 0.02, "intermediate_size": 14336, "max_position_embeddings": 32768, "model_type": "mistral", "neuron": {"auto_cast_type": "fp16", "batch_size": 1, "checkpoint_id": "mistralai/Mistral-7B-Instruct-v0.2", "checkpoint_revision": "41b61a33a2483885c981aa79e0df6b32407ed873", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 8, "sequence_length": 4096, "task": "text-generation"}, "num_attention_heads": 32, "num_hidden_layers": 32, "num_key_value_heads": 8, "rms_norm_eps": 1e-05, "rope_theta": 1000000.0, "sliding_window": null, "tie_word_embeddings": false, "torch_dtype": "bfloat16", "use_cache": true, "vocab_size": 32000} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/mistral/mistralai/Mistral-7B-Instruct-v0.2/ca240a79f952204527fc.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/mistral/mistralai/Mistral-7B-Instruct-v0.2/ca240a79f952204527fc.json deleted file mode 100644 index 8a9159b8625e0e272ebe7a732d2f41569efcddd0..0000000000000000000000000000000000000000 --- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/mistral/mistralai/Mistral-7B-Instruct-v0.2/ca240a79f952204527fc.json +++ /dev/null @@ -1 +0,0 @@ -{"architectures": ["MistralForCausalLM"], "attention_dropout": 0.0, "bos_token_id": 1, "eos_token_id": 2, "hidden_act": "silu", "hidden_size": 4096, "initializer_range": 0.02, "intermediate_size": 14336, "max_position_embeddings": 32768, "model_type": "mistral", "neuron": {"auto_cast_type": "bf16", "batch_size": 8, "checkpoint_id": "mistralai/Mistral-7B-Instruct-v0.2", "checkpoint_revision": "41b61a33a2483885c981aa79e0df6b32407ed873", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 8, "sequence_length": 4096, "task": "text-generation"}, "num_attention_heads": 32, "num_hidden_layers": 32, "num_key_value_heads": 8, "rms_norm_eps": 1e-05, "rope_theta": 1000000.0, "sliding_window": null, "tie_word_embeddings": false, "torch_dtype": "bfloat16", "use_cache": true, "vocab_size": 32000} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/mistral/mistralai/Mistral-7B-Instruct-v0.2/eec3579b9122a80133a1.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/mistral/mistralai/Mistral-7B-Instruct-v0.2/eec3579b9122a80133a1.json deleted file mode 100644 index 1e62d97b754c28d5d38a5b14d60203cc413bab77..0000000000000000000000000000000000000000 --- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/mistral/mistralai/Mistral-7B-Instruct-v0.2/eec3579b9122a80133a1.json +++ /dev/null @@ -1 +0,0 @@ -{"architectures": ["MistralForCausalLM"], "attention_dropout": 0.0, "bos_token_id": 1, "eos_token_id": 2, "hidden_act": "silu", "hidden_size": 4096, "initializer_range": 0.02, "intermediate_size": 14336, "max_position_embeddings": 32768, "model_type": "mistral", "neuron": {"auto_cast_type": "bf16", "batch_size": 32, "checkpoint_id": "mistralai/Mistral-7B-Instruct-v0.2", "checkpoint_revision": "41b61a33a2483885c981aa79e0df6b32407ed873", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 8, "sequence_length": 4096, "task": "text-generation"}, "num_attention_heads": 32, "num_hidden_layers": 32, "num_key_value_heads": 8, "rms_norm_eps": 1e-05, "rope_theta": 1000000.0, "sliding_window": null, "tie_word_embeddings": false, "torch_dtype": "bfloat16", "use_cache": true, "vocab_size": 32000} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/mistral/mistralai/Mistral-7b-Instruct-v0.2/8ff7681fcc40de857709.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/mistral/mistralai/Mistral-7b-Instruct-v0.2/8ff7681fcc40de857709.json deleted file mode 100644 index dd7051aff271ce074fe6da078488d5295edcabf4..0000000000000000000000000000000000000000 --- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/mistral/mistralai/Mistral-7b-Instruct-v0.2/8ff7681fcc40de857709.json +++ /dev/null @@ -1 +0,0 @@ -{"architectures": ["MistralForCausalLM"], "attention_dropout": 0.0, "bos_token_id": 1, "eos_token_id": 2, "hidden_act": "silu", "hidden_size": 4096, "initializer_range": 0.02, "intermediate_size": 14336, "max_position_embeddings": 32768, "model_type": "mistral", "neuron": {"auto_cast_type": "bf16", "batch_size": 4, "checkpoint_id": "mistralai/Mistral-7b-Instruct-v0.2", "checkpoint_revision": "41b61a33a2483885c981aa79e0df6b32407ed873", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 2, "sequence_length": 4096, "task": "text-generation"}, "num_attention_heads": 32, "num_hidden_layers": 32, "num_key_value_heads": 8, "rms_norm_eps": 1e-05, "rope_theta": 1000000.0, "sliding_window": null, "tie_word_embeddings": false, "torch_dtype": "bfloat16", "use_cache": true, "vocab_size": 32000} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/mistral/mistralai/Mistral-7b-Instruct-v0.2/af430415031257ca2c88.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/mistral/mistralai/Mistral-7b-Instruct-v0.2/af430415031257ca2c88.json deleted file mode 100644 index 0fa09c390310454a9e7ec8e446cd50eed241fb34..0000000000000000000000000000000000000000 --- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/mistral/mistralai/Mistral-7b-Instruct-v0.2/af430415031257ca2c88.json +++ /dev/null @@ -1 +0,0 @@ -{"architectures": ["MistralForCausalLM"], "attention_dropout": 0.0, "bos_token_id": 1, "eos_token_id": 2, "hidden_act": "silu", "hidden_size": 4096, "initializer_range": 0.02, "intermediate_size": 14336, "max_position_embeddings": 32768, "model_type": "mistral", "neuron": {"auto_cast_type": "bf16", "batch_size": 32, "checkpoint_id": "mistralai/Mistral-7b-Instruct-v0.2", "checkpoint_revision": "41b61a33a2483885c981aa79e0df6b32407ed873", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 8, "sequence_length": 4096, "task": "text-generation"}, "num_attention_heads": 32, "num_hidden_layers": 32, "num_key_value_heads": 8, "rms_norm_eps": 1e-05, "rope_theta": 1000000.0, "sliding_window": null, "tie_word_embeddings": false, "torch_dtype": "bfloat16", "use_cache": true, "vocab_size": 32000} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/mistral/optimum/mistral-1.1b-testing/3f196a5a5beadf2af838.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/mistral/optimum/mistral-1.1b-testing/3f196a5a5beadf2af838.json deleted file mode 100644 index 8e3a99f0d6164120394a75a522672603f75501ab..0000000000000000000000000000000000000000 --- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/mistral/optimum/mistral-1.1b-testing/3f196a5a5beadf2af838.json +++ /dev/null @@ -1 +0,0 @@ -{"architectures": ["MistralForCausalLM"], "attention_dropout": 0.0, "bos_token_id": 1, "eos_token_id": 2, "hidden_act": "silu", "hidden_size": 2048, "initializer_range": 0.02, "intermediate_size": 5632, "max_position_embeddings": 32768, "model_type": "mistral", "neuron": {"auto_cast_type": "bf16", "batch_size": 4, "checkpoint_id": "optimum/mistral-1.1b-testing", "checkpoint_revision": "ce03bc8d47dbd2c173ff65f3a8de1325ba724195", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 2, "sequence_length": 4096, "task": "text-generation"}, "num_attention_heads": 32, "num_hidden_layers": 22, "num_key_value_heads": 4, "rms_norm_eps": 1e-05, "rope_theta": 1000000.0, "sliding_window": null, "tie_word_embeddings": false, "torch_dtype": "bfloat16", "use_cache": true, "vocab_size": 32000} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/mistral/optimum/mistral-en-1.1B-testing/f5764c6ffc3e69951b77.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/mistral/optimum/mistral-en-1.1B-testing/f5764c6ffc3e69951b77.json deleted file mode 100644 index 1aa262e4a8af138d905e768cf3e3819aeb8c2ebd..0000000000000000000000000000000000000000 --- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/mistral/optimum/mistral-en-1.1B-testing/f5764c6ffc3e69951b77.json +++ /dev/null @@ -1 +0,0 @@ -{"architectures": ["MistralForCausalLM"], "attention_dropout": 0.0, "bos_token_id": 0, "eos_token_id": 0, "hidden_act": "silu", "hidden_size": 2048, "initializer_range": 0.02, "intermediate_size": 5632, "max_position_embeddings": 4096, "model_type": "mistral", "neuron": {"auto_cast_type": "fp16", "batch_size": 4, "checkpoint_id": "optimum/mistral-en-1.1B-testing", "checkpoint_revision": "df7d850906b071dbb69cf11af1033d7cd735227d", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 2, "sequence_length": 2048, "task": "text-generation"}, "num_attention_heads": 32, "num_hidden_layers": 22, "num_key_value_heads": 4, "rms_norm_eps": 1e-05, "rope_theta": 10000.0, "sliding_window": 1024, "tie_word_embeddings": false, "torch_dtype": "float16", "use_cache": true, "vocab_size": 50257} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/mixtral/dacorvo/Mixtral-tiny/206ea44a949320faa38a.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/mixtral/dacorvo/Mixtral-tiny/206ea44a949320faa38a.json deleted file mode 100644 index 99240e5639166c4f216d60c20eb21710600ef881..0000000000000000000000000000000000000000 --- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/mixtral/dacorvo/Mixtral-tiny/206ea44a949320faa38a.json +++ /dev/null @@ -1 +0,0 @@ -{"architectures": ["MixtralForCausalLM"], "attention_dropout": 0.0, "bos_token_id": 1, "eos_token_id": 2, "hidden_act": "silu", "hidden_size": 1024, "initializer_range": 0.02, "intermediate_size": 3584, "max_position_embeddings": 131072, "model_type": "mixtral", "neuron": {"auto_cast_type": "fp32", "batch_size": 2, "checkpoint_id": "dacorvo/Mixtral-tiny", "checkpoint_revision": "dce0e7b71159ad011c84de5a5bd9515bb9176220", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 2, "sequence_length": 100, "task": "text-generation"}, "num_attention_heads": 32, "num_experts_per_tok": 2, "num_hidden_layers": 2, "num_key_value_heads": 8, "num_local_experts": 8, "output_router_logits": false, "rms_norm_eps": 1e-05, "rope_theta": 10000.0, "router_aux_loss_coef": 0.001, "sliding_window": 4096, "tie_word_embeddings": false, "torch_dtype": "float16", "use_cache": true, "vocab_size": 32000} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/mixtral/dacorvo/Mixtral-tiny/79f811350e822243d689.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/mixtral/dacorvo/Mixtral-tiny/79f811350e822243d689.json deleted file mode 100644 index 43cff8f94b68a03242d1452113a2f3a073bf9523..0000000000000000000000000000000000000000 --- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/mixtral/dacorvo/Mixtral-tiny/79f811350e822243d689.json +++ /dev/null @@ -1 +0,0 @@ -{"architectures": ["MixtralForCausalLM"], "attention_dropout": 0.0, "bos_token_id": 1, "eos_token_id": 2, "hidden_act": "silu", "hidden_size": 1024, "initializer_range": 0.02, "intermediate_size": 3584, "max_position_embeddings": 1024, "model_type": "mixtral", "neuron": {"auto_cast_type": "fp32", "batch_size": 2, "checkpoint_id": "dacorvo/Mixtral-tiny", "checkpoint_revision": "c557ba205ddff6ea911f4719e0d543d6c08356b6", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 2, "sequence_length": 1024, "task": "text-generation"}, "num_attention_heads": 32, "num_experts_per_tok": 2, "num_hidden_layers": 2, "num_key_value_heads": 8, "num_local_experts": 8, "output_router_logits": false, "rms_norm_eps": 1e-05, "rope_theta": 10000.0, "router_aux_loss_coef": 0.001, "sliding_window": 4096, "tie_word_embeddings": false, "torch_dtype": "float16", "use_cache": true, "vocab_size": 32000} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/mixtral/hf-internal-testing/Mixtral-tiny/c037b93c8efce0428ba9.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/mixtral/hf-internal-testing/Mixtral-tiny/c037b93c8efce0428ba9.json deleted file mode 100644 index 20e9c3cb4b42161705aa3f1d8647888866990890..0000000000000000000000000000000000000000 --- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/mixtral/hf-internal-testing/Mixtral-tiny/c037b93c8efce0428ba9.json +++ /dev/null @@ -1 +0,0 @@ -{"architectures": ["MixtralForCausalLM"], "attention_dropout": 0.0, "bos_token_id": 1, "eos_token_id": 2, "hidden_act": "silu", "hidden_size": 1024, "initializer_range": 0.02, "intermediate_size": 3584, "max_position_embeddings": 131072, "model_type": "mixtral", "neuron": {"auto_cast_type": "fp32", "batch_size": 2, "checkpoint_id": "hf-internal-testing/Mixtral-tiny", "checkpoint_revision": "746eaa355d31052d3a20bb1dfaa734454df41568", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 2, "sequence_length": 100, "task": "text-generation"}, "num_attention_heads": 32, "num_experts_per_tok": 2, "num_hidden_layers": 2, "num_key_value_heads": 8, "num_local_experts": 8, "output_router_logits": false, "rms_norm_eps": 1e-05, "rope_theta": 10000.0, "router_aux_loss_coef": 0.001, "sliding_window": 4096, "tie_word_embeddings": false, "torch_dtype": "float16", "use_cache": true, "vocab_size": 32000} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/mixtral/mistralai/Mixtral-8x7B-Instruct-v0.1/b17eb4729feac22df9ea.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/mixtral/mistralai/Mixtral-8x7B-Instruct-v0.1/b17eb4729feac22df9ea.json deleted file mode 100644 index d75c04bb915e753edfa27dd83ba7c098765e34ec..0000000000000000000000000000000000000000 --- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/mixtral/mistralai/Mixtral-8x7B-Instruct-v0.1/b17eb4729feac22df9ea.json +++ /dev/null @@ -1 +0,0 @@ -{"architectures": ["MixtralForCausalLM"], "attention_dropout": 0.0, "bos_token_id": 1, "eos_token_id": 2, "hidden_act": "silu", "hidden_size": 4096, "initializer_range": 0.02, "intermediate_size": 14336, "max_position_embeddings": 32768, "model_type": "mixtral", "neuron": {"auto_cast_type": "bf16", "batch_size": 1, "checkpoint_id": "mistralai/Mixtral-8x7B-Instruct-v0.1", "checkpoint_revision": "1e637f2d7cb0a9d6fb1922f305cb784995190a83", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 24, "sequence_length": 4096, "task": "text-generation"}, "num_attention_heads": 32, "num_experts_per_tok": 2, "num_hidden_layers": 32, "num_key_value_heads": 8, "num_local_experts": 8, "output_router_logits": false, "rms_norm_eps": 1e-05, "rope_theta": 1000000.0, "router_aux_loss_coef": 0.02, "sliding_window": null, "tie_word_embeddings": false, "torch_dtype": "bfloat16", "use_cache": true, "vocab_size": 32000} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/opt/hf-internal-testing/tiny-random-OPTForCausalLM/f3adfe5a9c79b5a36fd7.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/opt/hf-internal-testing/tiny-random-OPTForCausalLM/f3adfe5a9c79b5a36fd7.json deleted file mode 100644 index 66b101aaeb2d861a356a107afb2aef0b7115a83d..0000000000000000000000000000000000000000 --- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/opt/hf-internal-testing/tiny-random-OPTForCausalLM/f3adfe5a9c79b5a36fd7.json +++ /dev/null @@ -1 +0,0 @@ -{"_remove_final_layer_norm": false, "activation_function": "relu", "architectures": ["OPTForCausalLM"], "attention_dropout": 0.1, "bos_token_id": 2, "do_layer_norm_before": true, "dropout": 0.1, "embed_dim": 16, "enable_bias": true, "eos_token_id": 2, "ffn_dim": 4, "hidden_size": 16, "init_std": 0.02, "is_decoder": true, "layer_norm_elementwise_affine": true, "layerdrop": 0.0, "max_position_embeddings": 100, "model_type": "opt", "neuron": {"auto_cast_type": "fp32", "batch_size": 2, "checkpoint_id": "hf-internal-testing/tiny-random-OPTForCausalLM", "checkpoint_revision": "190d1f4fc0011d2eaeaa05282e0fbd2445e4b11f", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 2, "sequence_length": 100, "task": "text-generation"}, "num_attention_heads": 4, "num_hidden_layers": 5, "pad_token_id": 1, "torch_dtype": "float32", "use_cache": true, "vocab_size": 50265, "word_embed_proj_dim": 16} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/stable-diffusion/runwayml/stable-diffusion-v1-5/403c0449155a8a9f6bbb.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/stable-diffusion/runwayml/stable-diffusion-v1-5/403c0449155a8a9f6bbb.json deleted file mode 100644 index bdb41c798c190a80f29f94fa3463b89689af5d24..0000000000000000000000000000000000000000 --- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22.dev0/inference/stable-diffusion/runwayml/stable-diffusion-v1-5/403c0449155a8a9f6bbb.json +++ /dev/null @@ -1 +0,0 @@ -{"model_type": "stable-diffusion", "text_encoder": {"architectures": ["CLIPTextModel"], "attention_dropout": 0.0, "dropout": 0.0, "hidden_act": "quick_gelu", "hidden_size": 768, "initializer_factor": 1.0, "initializer_range": 0.02, "intermediate_size": 3072, "layer_norm_eps": 1e-05, "max_position_embeddings": 77, "model_type": "clip_text_model", "neuron": {"auto_cast": "matmul", "auto_cast_type": "bf16", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "dynamic_batch_size": false, "inline_weights_to_neff": false, "optlevel": "2", "output_attentions": false, "output_hidden_states": false, "static_batch_size": 1, "static_sequence_length": 77}, "num_attention_heads": 12, "num_hidden_layers": 12, "task": "feature-extraction", "vocab_size": 49408}, "unet": {"_class_name": "UNet2DConditionModel", "act_fn": "silu", "addition_embed_type": null, "addition_embed_type_num_heads": 64, "addition_time_embed_dim": null, "attention_head_dim": 8, "attention_type": "default", "block_out_channels": [320, 640, 1280, 1280], "center_input_sample": false, "class_embed_type": null, "class_embeddings_concat": false, "conv_in_kernel": 3, "conv_out_kernel": 3, "cross_attention_dim": 768, "cross_attention_norm": null, "down_block_types": ["CrossAttnDownBlock2D", "CrossAttnDownBlock2D", "CrossAttnDownBlock2D", "DownBlock2D"], "downsample_padding": 1, "dropout": 0.0, "dual_cross_attention": false, "encoder_hid_dim": null, "encoder_hid_dim_type": null, "flip_sin_to_cos": true, "freq_shift": 0, "in_channels": 4, "layers_per_block": 2, "mid_block_only_cross_attention": null, "mid_block_scale_factor": 1, "mid_block_type": "UNetMidBlock2DCrossAttn", "neuron": {"auto_cast": "matmul", "auto_cast_type": "bf16", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "dynamic_batch_size": false, "inline_weights_to_neff": false, "optlevel": "2", "output_attentions": false, "output_hidden_states": false, "static_batch_size": 1, "static_height": 64, "static_num_channels": 4, "static_sequence_length": 77, "static_width": 64}, "norm_eps": 1e-05, "norm_num_groups": 32, "num_attention_heads": null, "num_class_embeds": null, "only_cross_attention": false, "out_channels": 4, "projection_class_embeddings_input_dim": null, "resnet_out_scale_factor": 1.0, "resnet_skip_time_act": false, "resnet_time_scale_shift": "default", "reverse_transformer_layers_per_block": null, "task": "semantic-segmentation", "time_cond_proj_dim": null, "time_embedding_act_fn": null, "time_embedding_dim": null, "time_embedding_type": "positional", "timestep_post_act": null, "transformer_layers_per_block": 1, "up_block_types": ["UpBlock2D", "CrossAttnUpBlock2D", "CrossAttnUpBlock2D", "CrossAttnUpBlock2D"], "upcast_attention": false, "use_linear_projection": false}} \ No newline at end of file