Synchronizing local compiler cache.
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/bloom/hf-internal-testing/tiny-random-BloomForCausalLM/4cb32492bb3736b0f508.json +48 -0
- neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/bloom/hf-internal-testing/tiny-random-BloomForCausalLM/b3a5049f78aa165f1949.json +48 -0
- neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/bloom/hf-internal-testing/tiny-random-BloomForCausalLM/ec2660c61019ca183223.json +48 -0
- neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/gpt2/gpt2/bcf3b4c10abca97b8c19.json +48 -0
- neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/gpt2/hf-internal-testing/tiny-random-gpt2/1f43dd9159545f0247d5.json +45 -0
- neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/gpt2/hf-internal-testing/tiny-random-gpt2/588998ffbea59807bd3d.json +45 -0
- neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/gpt2/hf-internal-testing/tiny-random-gpt2/83ecbabedf314419f3ca.json +45 -0
- neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/llamafactory/tiny-random-Llama-3/12398fc9bb3b90b69c6d.json +45 -0
- neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/llamafactory/tiny-random-Llama-3/3159c7cc5b0c26793327.json +45 -0
- neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/llamafactory/tiny-random-Llama-3/e068af6507bc381e1370.json +45 -0
- neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/mistral/dacorvo/tiny-random-MistralForCausalLM/4ac2b0ca2f3012645cd2.json +40 -0
- neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/mistral/dacorvo/tiny-random-MistralForCausalLM/66507451739e53a47483.json +40 -0
- neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/mistral/dacorvo/tiny-random-MistralForCausalLM/ce2619dd085de8aadeff.json +40 -0
- neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/mixtral/dacorvo/Mixtral-tiny/3509514a093c68deeaca.json +41 -0
- neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/mixtral/dacorvo/Mixtral-tiny/7a7bef14f18dbabbd689.json +41 -0
- neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/mixtral/dacorvo/Mixtral-tiny/7f1476291e21a1e7a585.json +41 -0
- neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/opt/hf-internal-testing/tiny-random-OPTForCausalLM/549b38a19302cd6ff930.json +39 -0
- neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/opt/hf-internal-testing/tiny-random-OPTForCausalLM/6b898ea4e83643172eba.json +39 -0
- neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/opt/hf-internal-testing/tiny-random-OPTForCausalLM/d1b1af5fd67eb0577ee7.json +39 -0
- neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/phi3/yujiepan/phi-4-tiny-random/607ae698be806304fb50.json +42 -0
- neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/phi3/yujiepan/phi-4-tiny-random/da1641bf50b2440d8d42.json +42 -0
- neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/phi3/yujiepan/phi-4-tiny-random/ff5ccda1b163a504cfcc.json +42 -0
- neuronxcc-2.17.194.0+d312836f/MODULE_032f74178031f5ed3c74+613edded/model.neff +1 -1
- neuronxcc-2.17.194.0+d312836f/MODULE_0a88901e8c98f54e4c10+613edded/model.neff +1 -1
- neuronxcc-2.17.194.0+d312836f/MODULE_0cfdfd8c26d66b282d5a+613edded/model.neff +0 -0
- neuronxcc-2.17.194.0+d312836f/MODULE_1090c91454701eac92a9+613edded/model.neff +1 -1
- neuronxcc-2.17.194.0+d312836f/MODULE_113f3268e3fd4d66fe81+8a3305d3/model.neff +0 -0
- neuronxcc-2.17.194.0+d312836f/MODULE_151e013069d6b102df91+613edded/model.neff +1 -1
- neuronxcc-2.17.194.0+d312836f/MODULE_24ff9ac2787ce9a1d276+613edded/model.neff +1 -1
- neuronxcc-2.17.194.0+d312836f/MODULE_255bd8f176e49c261e93+613edded/model.neff +1 -1
- neuronxcc-2.17.194.0+d312836f/MODULE_272d7dabaeb48e6d7210+613edded/model.neff +1 -1
- neuronxcc-2.17.194.0+d312836f/MODULE_2b6914194b931d7496fc+613edded/model.neff +1 -1
- neuronxcc-2.17.194.0+d312836f/MODULE_2c290567bb51ca0069bf+613edded/model.neff +0 -0
- neuronxcc-2.17.194.0+d312836f/MODULE_320f2622d4d0c9fdd0f1+613edded/model.neff +1 -1
- neuronxcc-2.17.194.0+d312836f/MODULE_3aa61c04bd8d9ca8722a+613edded/model.neff +1 -1
- neuronxcc-2.17.194.0+d312836f/MODULE_3d7f2d2bef4f6fdd2c74+613edded/model.neff +1 -1
- neuronxcc-2.17.194.0+d312836f/MODULE_3e5f6b34247d2b457ec5+613edded/model.neff +1 -1
- neuronxcc-2.17.194.0+d312836f/MODULE_40f92bf9469aae653e93+613edded/model.neff +1 -1
- neuronxcc-2.17.194.0+d312836f/MODULE_48437cc74469a8ccaec8+613edded/model.neff +1 -1
- neuronxcc-2.17.194.0+d312836f/MODULE_48bfe9ceb9631fdca2d4+613edded/model.neff +1 -1
- neuronxcc-2.17.194.0+d312836f/MODULE_5a9cc28c7336fd9ea3ab+613edded/model.neff +1 -1
- neuronxcc-2.17.194.0+d312836f/MODULE_5de5ea910315ba9c0def+613edded/model.neff +1 -1
- neuronxcc-2.17.194.0+d312836f/MODULE_6819e64fb96e87ffece0+613edded/model.neff +1 -1
- neuronxcc-2.17.194.0+d312836f/MODULE_7062a76356ca462bcc78+613edded/model.neff +1 -1
- neuronxcc-2.17.194.0+d312836f/MODULE_878400bd48be9c48dd23+613edded/model.neff +0 -0
- neuronxcc-2.17.194.0+d312836f/MODULE_8bb2b91314df12f54a63+613edded/model.neff +1 -1
- neuronxcc-2.17.194.0+d312836f/MODULE_960d05b842008ddeae63+8a3305d3/model.neff +0 -0
- neuronxcc-2.17.194.0+d312836f/MODULE_99824fde40f6515f5b3d+613edded/model.neff +1 -1
- neuronxcc-2.17.194.0+d312836f/MODULE_9be95eab2454f5ade94b+613edded/model.neff +1 -1
- neuronxcc-2.17.194.0+d312836f/MODULE_a297084c31f9c1ff1d5d+613edded/model.neff +1 -1
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/bloom/hf-internal-testing/tiny-random-BloomForCausalLM/4cb32492bb3736b0f508.json
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
+
"_model_id": "hf-internal-testing/tiny-random-BloomForCausalLM",
|
| 4 |
+
"_task": "text-generation",
|
| 5 |
+
"apply_residual_connection_post_layernorm": false,
|
| 6 |
+
"architectures": [
|
| 7 |
+
"BloomForCausalLM"
|
| 8 |
+
],
|
| 9 |
+
"attention_dropout": 0.1,
|
| 10 |
+
"dtype": "float32",
|
| 11 |
+
"gradient_checkpointing": false,
|
| 12 |
+
"hidden_dropout": 0.1,
|
| 13 |
+
"hidden_size": 32,
|
| 14 |
+
"id2label": {
|
| 15 |
+
"0": "LABEL_0",
|
| 16 |
+
"1": "LABEL_1",
|
| 17 |
+
"2": "LABEL_2"
|
| 18 |
+
},
|
| 19 |
+
"initializer_range": 0.02,
|
| 20 |
+
"is_decoder": true,
|
| 21 |
+
"label2id": {
|
| 22 |
+
"LABEL_0": 0,
|
| 23 |
+
"LABEL_1": 1,
|
| 24 |
+
"LABEL_2": 2
|
| 25 |
+
},
|
| 26 |
+
"layer_norm_epsilon": 1e-05,
|
| 27 |
+
"model_type": "bloom",
|
| 28 |
+
"n_head": 4,
|
| 29 |
+
"n_layer": 5,
|
| 30 |
+
"n_positions": 512,
|
| 31 |
+
"neuron": {
|
| 32 |
+
"auto_cast_type": "fp32",
|
| 33 |
+
"batch_size": 1,
|
| 34 |
+
"checkpoint_id": "hf-internal-testing/tiny-random-BloomForCausalLM",
|
| 35 |
+
"checkpoint_revision": "92b07e9b7b4f986fa7c54e2ac3b9201b4ba5212e",
|
| 36 |
+
"compiler_type": "neuronx-cc",
|
| 37 |
+
"compiler_version": "2.17.194.0+d312836f",
|
| 38 |
+
"num_cores": 2,
|
| 39 |
+
"sequence_length": 100,
|
| 40 |
+
"task": "text-generation"
|
| 41 |
+
},
|
| 42 |
+
"pretraining_tp": 1,
|
| 43 |
+
"seq_length": 7,
|
| 44 |
+
"slow_but_exact": true,
|
| 45 |
+
"type_vocab_size": 16,
|
| 46 |
+
"use_cache": true,
|
| 47 |
+
"vocab_size": 1024
|
| 48 |
+
}
|
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/bloom/hf-internal-testing/tiny-random-BloomForCausalLM/b3a5049f78aa165f1949.json
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
+
"_model_id": "hf-internal-testing/tiny-random-BloomForCausalLM",
|
| 4 |
+
"_task": "text-generation",
|
| 5 |
+
"apply_residual_connection_post_layernorm": false,
|
| 6 |
+
"architectures": [
|
| 7 |
+
"BloomForCausalLM"
|
| 8 |
+
],
|
| 9 |
+
"attention_dropout": 0.1,
|
| 10 |
+
"dtype": "float32",
|
| 11 |
+
"gradient_checkpointing": false,
|
| 12 |
+
"hidden_dropout": 0.1,
|
| 13 |
+
"hidden_size": 32,
|
| 14 |
+
"id2label": {
|
| 15 |
+
"0": "LABEL_0",
|
| 16 |
+
"1": "LABEL_1",
|
| 17 |
+
"2": "LABEL_2"
|
| 18 |
+
},
|
| 19 |
+
"initializer_range": 0.02,
|
| 20 |
+
"is_decoder": true,
|
| 21 |
+
"label2id": {
|
| 22 |
+
"LABEL_0": 0,
|
| 23 |
+
"LABEL_1": 1,
|
| 24 |
+
"LABEL_2": 2
|
| 25 |
+
},
|
| 26 |
+
"layer_norm_epsilon": 1e-05,
|
| 27 |
+
"model_type": "bloom",
|
| 28 |
+
"n_head": 4,
|
| 29 |
+
"n_layer": 5,
|
| 30 |
+
"n_positions": 512,
|
| 31 |
+
"neuron": {
|
| 32 |
+
"auto_cast_type": "fp16",
|
| 33 |
+
"batch_size": 2,
|
| 34 |
+
"checkpoint_id": "hf-internal-testing/tiny-random-BloomForCausalLM",
|
| 35 |
+
"checkpoint_revision": "92b07e9b7b4f986fa7c54e2ac3b9201b4ba5212e",
|
| 36 |
+
"compiler_type": "neuronx-cc",
|
| 37 |
+
"compiler_version": "2.17.194.0+d312836f",
|
| 38 |
+
"num_cores": 2,
|
| 39 |
+
"sequence_length": 100,
|
| 40 |
+
"task": "text-generation"
|
| 41 |
+
},
|
| 42 |
+
"pretraining_tp": 1,
|
| 43 |
+
"seq_length": 7,
|
| 44 |
+
"slow_but_exact": true,
|
| 45 |
+
"type_vocab_size": 16,
|
| 46 |
+
"use_cache": true,
|
| 47 |
+
"vocab_size": 1024
|
| 48 |
+
}
|
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/bloom/hf-internal-testing/tiny-random-BloomForCausalLM/ec2660c61019ca183223.json
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
+
"_model_id": "hf-internal-testing/tiny-random-BloomForCausalLM",
|
| 4 |
+
"_task": "text-generation",
|
| 5 |
+
"apply_residual_connection_post_layernorm": false,
|
| 6 |
+
"architectures": [
|
| 7 |
+
"BloomForCausalLM"
|
| 8 |
+
],
|
| 9 |
+
"attention_dropout": 0.1,
|
| 10 |
+
"dtype": "float32",
|
| 11 |
+
"gradient_checkpointing": false,
|
| 12 |
+
"hidden_dropout": 0.1,
|
| 13 |
+
"hidden_size": 32,
|
| 14 |
+
"id2label": {
|
| 15 |
+
"0": "LABEL_0",
|
| 16 |
+
"1": "LABEL_1",
|
| 17 |
+
"2": "LABEL_2"
|
| 18 |
+
},
|
| 19 |
+
"initializer_range": 0.02,
|
| 20 |
+
"is_decoder": true,
|
| 21 |
+
"label2id": {
|
| 22 |
+
"LABEL_0": 0,
|
| 23 |
+
"LABEL_1": 1,
|
| 24 |
+
"LABEL_2": 2
|
| 25 |
+
},
|
| 26 |
+
"layer_norm_epsilon": 1e-05,
|
| 27 |
+
"model_type": "bloom",
|
| 28 |
+
"n_head": 4,
|
| 29 |
+
"n_layer": 5,
|
| 30 |
+
"n_positions": 512,
|
| 31 |
+
"neuron": {
|
| 32 |
+
"auto_cast_type": "fp16",
|
| 33 |
+
"batch_size": 1,
|
| 34 |
+
"checkpoint_id": "hf-internal-testing/tiny-random-BloomForCausalLM",
|
| 35 |
+
"checkpoint_revision": "92b07e9b7b4f986fa7c54e2ac3b9201b4ba5212e",
|
| 36 |
+
"compiler_type": "neuronx-cc",
|
| 37 |
+
"compiler_version": "2.17.194.0+d312836f",
|
| 38 |
+
"num_cores": 2,
|
| 39 |
+
"sequence_length": 100,
|
| 40 |
+
"task": "text-generation"
|
| 41 |
+
},
|
| 42 |
+
"pretraining_tp": 1,
|
| 43 |
+
"seq_length": 7,
|
| 44 |
+
"slow_but_exact": true,
|
| 45 |
+
"type_vocab_size": 16,
|
| 46 |
+
"use_cache": true,
|
| 47 |
+
"vocab_size": 1024
|
| 48 |
+
}
|
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/gpt2/gpt2/bcf3b4c10abca97b8c19.json
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
+
"_model_id": "gpt2",
|
| 4 |
+
"_task": "text-generation",
|
| 5 |
+
"activation_function": "gelu_new",
|
| 6 |
+
"architectures": [
|
| 7 |
+
"GPT2LMHeadModel"
|
| 8 |
+
],
|
| 9 |
+
"attn_pdrop": 0.1,
|
| 10 |
+
"embd_pdrop": 0.1,
|
| 11 |
+
"initializer_range": 0.02,
|
| 12 |
+
"layer_norm_epsilon": 1e-05,
|
| 13 |
+
"model_type": "gpt2",
|
| 14 |
+
"n_ctx": 1024,
|
| 15 |
+
"n_embd": 768,
|
| 16 |
+
"n_head": 12,
|
| 17 |
+
"n_inner": null,
|
| 18 |
+
"n_layer": 12,
|
| 19 |
+
"n_positions": 1024,
|
| 20 |
+
"neuron": {
|
| 21 |
+
"auto_cast_type": "fp16",
|
| 22 |
+
"batch_size": 4,
|
| 23 |
+
"checkpoint_id": "gpt2",
|
| 24 |
+
"checkpoint_revision": "607a30d783dfa663caf39e06633721c8d4cfcd7e",
|
| 25 |
+
"compiler_type": "neuronx-cc",
|
| 26 |
+
"compiler_version": "2.17.194.0+d312836f",
|
| 27 |
+
"num_cores": 2,
|
| 28 |
+
"sequence_length": 1024,
|
| 29 |
+
"task": "text-generation"
|
| 30 |
+
},
|
| 31 |
+
"reorder_and_upcast_attn": false,
|
| 32 |
+
"resid_pdrop": 0.1,
|
| 33 |
+
"scale_attn_by_inverse_layer_idx": false,
|
| 34 |
+
"scale_attn_weights": true,
|
| 35 |
+
"summary_activation": null,
|
| 36 |
+
"summary_first_dropout": 0.1,
|
| 37 |
+
"summary_proj_to_labels": true,
|
| 38 |
+
"summary_type": "cls_index",
|
| 39 |
+
"summary_use_proj": true,
|
| 40 |
+
"task_specific_params": {
|
| 41 |
+
"text-generation": {
|
| 42 |
+
"do_sample": true,
|
| 43 |
+
"max_length": 50
|
| 44 |
+
}
|
| 45 |
+
},
|
| 46 |
+
"use_cache": true,
|
| 47 |
+
"vocab_size": 50257
|
| 48 |
+
}
|
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/gpt2/hf-internal-testing/tiny-random-gpt2/1f43dd9159545f0247d5.json
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
+
"_model_id": "hf-internal-testing/tiny-random-gpt2",
|
| 4 |
+
"_task": "text-generation",
|
| 5 |
+
"activation_function": "gelu_new",
|
| 6 |
+
"attention_probs_dropout_prob": 0.1,
|
| 7 |
+
"attn_pdrop": 0.1,
|
| 8 |
+
"embd_pdrop": 0.1,
|
| 9 |
+
"gradient_checkpointing": false,
|
| 10 |
+
"hidden_act": "gelu",
|
| 11 |
+
"hidden_dropout_prob": 0.1,
|
| 12 |
+
"initializer_range": 0.02,
|
| 13 |
+
"intermediate_size": 37,
|
| 14 |
+
"layer_norm_epsilon": 1e-05,
|
| 15 |
+
"model_type": "gpt2",
|
| 16 |
+
"n_ctx": 512,
|
| 17 |
+
"n_embd": 32,
|
| 18 |
+
"n_head": 4,
|
| 19 |
+
"n_inner": null,
|
| 20 |
+
"n_layer": 5,
|
| 21 |
+
"n_positions": 512,
|
| 22 |
+
"neuron": {
|
| 23 |
+
"auto_cast_type": "fp16",
|
| 24 |
+
"batch_size": 2,
|
| 25 |
+
"checkpoint_id": "hf-internal-testing/tiny-random-gpt2",
|
| 26 |
+
"checkpoint_revision": "71034c5d8bde858ff824298bdedc65515b97d2b9",
|
| 27 |
+
"compiler_type": "neuronx-cc",
|
| 28 |
+
"compiler_version": "2.17.194.0+d312836f",
|
| 29 |
+
"num_cores": 2,
|
| 30 |
+
"sequence_length": 100,
|
| 31 |
+
"task": "text-generation"
|
| 32 |
+
},
|
| 33 |
+
"reorder_and_upcast_attn": false,
|
| 34 |
+
"resid_pdrop": 0.1,
|
| 35 |
+
"scale_attn_by_inverse_layer_idx": false,
|
| 36 |
+
"scale_attn_weights": true,
|
| 37 |
+
"summary_activation": null,
|
| 38 |
+
"summary_first_dropout": 0.1,
|
| 39 |
+
"summary_proj_to_labels": true,
|
| 40 |
+
"summary_type": "cls_index",
|
| 41 |
+
"summary_use_proj": true,
|
| 42 |
+
"type_vocab_size": 16,
|
| 43 |
+
"use_cache": true,
|
| 44 |
+
"vocab_size": 1000
|
| 45 |
+
}
|
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/gpt2/hf-internal-testing/tiny-random-gpt2/588998ffbea59807bd3d.json
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
+
"_model_id": "hf-internal-testing/tiny-random-gpt2",
|
| 4 |
+
"_task": "text-generation",
|
| 5 |
+
"activation_function": "gelu_new",
|
| 6 |
+
"attention_probs_dropout_prob": 0.1,
|
| 7 |
+
"attn_pdrop": 0.1,
|
| 8 |
+
"embd_pdrop": 0.1,
|
| 9 |
+
"gradient_checkpointing": false,
|
| 10 |
+
"hidden_act": "gelu",
|
| 11 |
+
"hidden_dropout_prob": 0.1,
|
| 12 |
+
"initializer_range": 0.02,
|
| 13 |
+
"intermediate_size": 37,
|
| 14 |
+
"layer_norm_epsilon": 1e-05,
|
| 15 |
+
"model_type": "gpt2",
|
| 16 |
+
"n_ctx": 512,
|
| 17 |
+
"n_embd": 32,
|
| 18 |
+
"n_head": 4,
|
| 19 |
+
"n_inner": null,
|
| 20 |
+
"n_layer": 5,
|
| 21 |
+
"n_positions": 512,
|
| 22 |
+
"neuron": {
|
| 23 |
+
"auto_cast_type": "fp16",
|
| 24 |
+
"batch_size": 1,
|
| 25 |
+
"checkpoint_id": "hf-internal-testing/tiny-random-gpt2",
|
| 26 |
+
"checkpoint_revision": "71034c5d8bde858ff824298bdedc65515b97d2b9",
|
| 27 |
+
"compiler_type": "neuronx-cc",
|
| 28 |
+
"compiler_version": "2.17.194.0+d312836f",
|
| 29 |
+
"num_cores": 2,
|
| 30 |
+
"sequence_length": 100,
|
| 31 |
+
"task": "text-generation"
|
| 32 |
+
},
|
| 33 |
+
"reorder_and_upcast_attn": false,
|
| 34 |
+
"resid_pdrop": 0.1,
|
| 35 |
+
"scale_attn_by_inverse_layer_idx": false,
|
| 36 |
+
"scale_attn_weights": true,
|
| 37 |
+
"summary_activation": null,
|
| 38 |
+
"summary_first_dropout": 0.1,
|
| 39 |
+
"summary_proj_to_labels": true,
|
| 40 |
+
"summary_type": "cls_index",
|
| 41 |
+
"summary_use_proj": true,
|
| 42 |
+
"type_vocab_size": 16,
|
| 43 |
+
"use_cache": true,
|
| 44 |
+
"vocab_size": 1000
|
| 45 |
+
}
|
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/gpt2/hf-internal-testing/tiny-random-gpt2/83ecbabedf314419f3ca.json
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
+
"_model_id": "hf-internal-testing/tiny-random-gpt2",
|
| 4 |
+
"_task": "text-generation",
|
| 5 |
+
"activation_function": "gelu_new",
|
| 6 |
+
"attention_probs_dropout_prob": 0.1,
|
| 7 |
+
"attn_pdrop": 0.1,
|
| 8 |
+
"embd_pdrop": 0.1,
|
| 9 |
+
"gradient_checkpointing": false,
|
| 10 |
+
"hidden_act": "gelu",
|
| 11 |
+
"hidden_dropout_prob": 0.1,
|
| 12 |
+
"initializer_range": 0.02,
|
| 13 |
+
"intermediate_size": 37,
|
| 14 |
+
"layer_norm_epsilon": 1e-05,
|
| 15 |
+
"model_type": "gpt2",
|
| 16 |
+
"n_ctx": 512,
|
| 17 |
+
"n_embd": 32,
|
| 18 |
+
"n_head": 4,
|
| 19 |
+
"n_inner": null,
|
| 20 |
+
"n_layer": 5,
|
| 21 |
+
"n_positions": 512,
|
| 22 |
+
"neuron": {
|
| 23 |
+
"auto_cast_type": "fp32",
|
| 24 |
+
"batch_size": 1,
|
| 25 |
+
"checkpoint_id": "hf-internal-testing/tiny-random-gpt2",
|
| 26 |
+
"checkpoint_revision": "71034c5d8bde858ff824298bdedc65515b97d2b9",
|
| 27 |
+
"compiler_type": "neuronx-cc",
|
| 28 |
+
"compiler_version": "2.17.194.0+d312836f",
|
| 29 |
+
"num_cores": 2,
|
| 30 |
+
"sequence_length": 100,
|
| 31 |
+
"task": "text-generation"
|
| 32 |
+
},
|
| 33 |
+
"reorder_and_upcast_attn": false,
|
| 34 |
+
"resid_pdrop": 0.1,
|
| 35 |
+
"scale_attn_by_inverse_layer_idx": false,
|
| 36 |
+
"scale_attn_weights": true,
|
| 37 |
+
"summary_activation": null,
|
| 38 |
+
"summary_first_dropout": 0.1,
|
| 39 |
+
"summary_proj_to_labels": true,
|
| 40 |
+
"summary_type": "cls_index",
|
| 41 |
+
"summary_use_proj": true,
|
| 42 |
+
"type_vocab_size": 16,
|
| 43 |
+
"use_cache": true,
|
| 44 |
+
"vocab_size": 1000
|
| 45 |
+
}
|
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/llamafactory/tiny-random-Llama-3/12398fc9bb3b90b69c6d.json
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
+
"_model_id": "llamafactory/tiny-random-Llama-3",
|
| 4 |
+
"_task": "text-generation",
|
| 5 |
+
"architectures": [
|
| 6 |
+
"LlamaForCausalLM"
|
| 7 |
+
],
|
| 8 |
+
"attention_bias": false,
|
| 9 |
+
"attention_dropout": 0.0,
|
| 10 |
+
"head_dim": 4,
|
| 11 |
+
"hidden_act": "silu",
|
| 12 |
+
"hidden_size": 16,
|
| 13 |
+
"initializer_range": 0.02,
|
| 14 |
+
"intermediate_size": 64,
|
| 15 |
+
"max_position_embeddings": 131072,
|
| 16 |
+
"mlp_bias": false,
|
| 17 |
+
"model_type": "llama",
|
| 18 |
+
"neuron": {
|
| 19 |
+
"auto_cast_type": "fp32",
|
| 20 |
+
"batch_size": 1,
|
| 21 |
+
"checkpoint_id": "llamafactory/tiny-random-Llama-3",
|
| 22 |
+
"checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8",
|
| 23 |
+
"compiler_type": "neuronx-cc",
|
| 24 |
+
"compiler_version": "2.17.194.0+d312836f",
|
| 25 |
+
"num_cores": 2,
|
| 26 |
+
"sequence_length": 100,
|
| 27 |
+
"task": "text-generation"
|
| 28 |
+
},
|
| 29 |
+
"num_attention_heads": 4,
|
| 30 |
+
"num_hidden_layers": 2,
|
| 31 |
+
"num_key_value_heads": 4,
|
| 32 |
+
"pretraining_tp": 1,
|
| 33 |
+
"rms_norm_eps": 1e-05,
|
| 34 |
+
"rope_scaling": {
|
| 35 |
+
"factor": 8.0,
|
| 36 |
+
"high_freq_factor": 4.0,
|
| 37 |
+
"low_freq_factor": 1.0,
|
| 38 |
+
"original_max_position_embeddings": 8192,
|
| 39 |
+
"rope_type": "llama3"
|
| 40 |
+
},
|
| 41 |
+
"rope_theta": 500000.0,
|
| 42 |
+
"tie_word_embeddings": false,
|
| 43 |
+
"use_cache": true,
|
| 44 |
+
"vocab_size": 128256
|
| 45 |
+
}
|
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/llamafactory/tiny-random-Llama-3/3159c7cc5b0c26793327.json
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
+
"_model_id": "llamafactory/tiny-random-Llama-3",
|
| 4 |
+
"_task": "text-generation",
|
| 5 |
+
"architectures": [
|
| 6 |
+
"LlamaForCausalLM"
|
| 7 |
+
],
|
| 8 |
+
"attention_bias": false,
|
| 9 |
+
"attention_dropout": 0.0,
|
| 10 |
+
"head_dim": 4,
|
| 11 |
+
"hidden_act": "silu",
|
| 12 |
+
"hidden_size": 16,
|
| 13 |
+
"initializer_range": 0.02,
|
| 14 |
+
"intermediate_size": 64,
|
| 15 |
+
"max_position_embeddings": 131072,
|
| 16 |
+
"mlp_bias": false,
|
| 17 |
+
"model_type": "llama",
|
| 18 |
+
"neuron": {
|
| 19 |
+
"auto_cast_type": "fp16",
|
| 20 |
+
"batch_size": 2,
|
| 21 |
+
"checkpoint_id": "llamafactory/tiny-random-Llama-3",
|
| 22 |
+
"checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8",
|
| 23 |
+
"compiler_type": "neuronx-cc",
|
| 24 |
+
"compiler_version": "2.17.194.0+d312836f",
|
| 25 |
+
"num_cores": 2,
|
| 26 |
+
"sequence_length": 100,
|
| 27 |
+
"task": "text-generation"
|
| 28 |
+
},
|
| 29 |
+
"num_attention_heads": 4,
|
| 30 |
+
"num_hidden_layers": 2,
|
| 31 |
+
"num_key_value_heads": 4,
|
| 32 |
+
"pretraining_tp": 1,
|
| 33 |
+
"rms_norm_eps": 1e-05,
|
| 34 |
+
"rope_scaling": {
|
| 35 |
+
"factor": 8.0,
|
| 36 |
+
"high_freq_factor": 4.0,
|
| 37 |
+
"low_freq_factor": 1.0,
|
| 38 |
+
"original_max_position_embeddings": 8192,
|
| 39 |
+
"rope_type": "llama3"
|
| 40 |
+
},
|
| 41 |
+
"rope_theta": 500000.0,
|
| 42 |
+
"tie_word_embeddings": false,
|
| 43 |
+
"use_cache": true,
|
| 44 |
+
"vocab_size": 128256
|
| 45 |
+
}
|
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/llamafactory/tiny-random-Llama-3/e068af6507bc381e1370.json
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
+
"_model_id": "llamafactory/tiny-random-Llama-3",
|
| 4 |
+
"_task": "text-generation",
|
| 5 |
+
"architectures": [
|
| 6 |
+
"LlamaForCausalLM"
|
| 7 |
+
],
|
| 8 |
+
"attention_bias": false,
|
| 9 |
+
"attention_dropout": 0.0,
|
| 10 |
+
"head_dim": 4,
|
| 11 |
+
"hidden_act": "silu",
|
| 12 |
+
"hidden_size": 16,
|
| 13 |
+
"initializer_range": 0.02,
|
| 14 |
+
"intermediate_size": 64,
|
| 15 |
+
"max_position_embeddings": 131072,
|
| 16 |
+
"mlp_bias": false,
|
| 17 |
+
"model_type": "llama",
|
| 18 |
+
"neuron": {
|
| 19 |
+
"auto_cast_type": "fp16",
|
| 20 |
+
"batch_size": 1,
|
| 21 |
+
"checkpoint_id": "llamafactory/tiny-random-Llama-3",
|
| 22 |
+
"checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8",
|
| 23 |
+
"compiler_type": "neuronx-cc",
|
| 24 |
+
"compiler_version": "2.17.194.0+d312836f",
|
| 25 |
+
"num_cores": 2,
|
| 26 |
+
"sequence_length": 100,
|
| 27 |
+
"task": "text-generation"
|
| 28 |
+
},
|
| 29 |
+
"num_attention_heads": 4,
|
| 30 |
+
"num_hidden_layers": 2,
|
| 31 |
+
"num_key_value_heads": 4,
|
| 32 |
+
"pretraining_tp": 1,
|
| 33 |
+
"rms_norm_eps": 1e-05,
|
| 34 |
+
"rope_scaling": {
|
| 35 |
+
"factor": 8.0,
|
| 36 |
+
"high_freq_factor": 4.0,
|
| 37 |
+
"low_freq_factor": 1.0,
|
| 38 |
+
"original_max_position_embeddings": 8192,
|
| 39 |
+
"rope_type": "llama3"
|
| 40 |
+
},
|
| 41 |
+
"rope_theta": 500000.0,
|
| 42 |
+
"tie_word_embeddings": false,
|
| 43 |
+
"use_cache": true,
|
| 44 |
+
"vocab_size": 128256
|
| 45 |
+
}
|
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/mistral/dacorvo/tiny-random-MistralForCausalLM/4ac2b0ca2f3012645cd2.json
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
+
"_model_id": "dacorvo/tiny-random-MistralForCausalLM",
|
| 4 |
+
"_task": "text-generation",
|
| 5 |
+
"architectures": [
|
| 6 |
+
"MistralForCausalLM"
|
| 7 |
+
],
|
| 8 |
+
"attention_dropout": 0.0,
|
| 9 |
+
"attention_probs_dropout_prob": 0.1,
|
| 10 |
+
"head_dim": 8,
|
| 11 |
+
"hidden_act": "gelu",
|
| 12 |
+
"hidden_dropout_prob": 0.1,
|
| 13 |
+
"hidden_size": 32,
|
| 14 |
+
"initializer_range": 0.02,
|
| 15 |
+
"intermediate_size": 37,
|
| 16 |
+
"is_decoder": true,
|
| 17 |
+
"max_position_embeddings": 512,
|
| 18 |
+
"model_type": "mistral",
|
| 19 |
+
"neuron": {
|
| 20 |
+
"auto_cast_type": "fp16",
|
| 21 |
+
"batch_size": 1,
|
| 22 |
+
"checkpoint_id": "dacorvo/tiny-random-MistralForCausalLM",
|
| 23 |
+
"checkpoint_revision": "81d453e3c8985649e9ee3d4c9378461029d1c73a",
|
| 24 |
+
"compiler_type": "neuronx-cc",
|
| 25 |
+
"compiler_version": "2.17.194.0+d312836f",
|
| 26 |
+
"num_cores": 2,
|
| 27 |
+
"sequence_length": 100,
|
| 28 |
+
"task": "text-generation"
|
| 29 |
+
},
|
| 30 |
+
"num_attention_heads": 4,
|
| 31 |
+
"num_hidden_layers": 2,
|
| 32 |
+
"num_key_value_heads": 2,
|
| 33 |
+
"rms_norm_eps": 1e-06,
|
| 34 |
+
"rope_theta": 10000.0,
|
| 35 |
+
"sliding_window": 4096,
|
| 36 |
+
"tie_word_embeddings": false,
|
| 37 |
+
"type_vocab_size": 16,
|
| 38 |
+
"use_cache": true,
|
| 39 |
+
"vocab_size": 32000
|
| 40 |
+
}
|
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/mistral/dacorvo/tiny-random-MistralForCausalLM/66507451739e53a47483.json
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
+
"_model_id": "dacorvo/tiny-random-MistralForCausalLM",
|
| 4 |
+
"_task": "text-generation",
|
| 5 |
+
"architectures": [
|
| 6 |
+
"MistralForCausalLM"
|
| 7 |
+
],
|
| 8 |
+
"attention_dropout": 0.0,
|
| 9 |
+
"attention_probs_dropout_prob": 0.1,
|
| 10 |
+
"head_dim": 8,
|
| 11 |
+
"hidden_act": "gelu",
|
| 12 |
+
"hidden_dropout_prob": 0.1,
|
| 13 |
+
"hidden_size": 32,
|
| 14 |
+
"initializer_range": 0.02,
|
| 15 |
+
"intermediate_size": 37,
|
| 16 |
+
"is_decoder": true,
|
| 17 |
+
"max_position_embeddings": 512,
|
| 18 |
+
"model_type": "mistral",
|
| 19 |
+
"neuron": {
|
| 20 |
+
"auto_cast_type": "fp32",
|
| 21 |
+
"batch_size": 1,
|
| 22 |
+
"checkpoint_id": "dacorvo/tiny-random-MistralForCausalLM",
|
| 23 |
+
"checkpoint_revision": "81d453e3c8985649e9ee3d4c9378461029d1c73a",
|
| 24 |
+
"compiler_type": "neuronx-cc",
|
| 25 |
+
"compiler_version": "2.17.194.0+d312836f",
|
| 26 |
+
"num_cores": 2,
|
| 27 |
+
"sequence_length": 100,
|
| 28 |
+
"task": "text-generation"
|
| 29 |
+
},
|
| 30 |
+
"num_attention_heads": 4,
|
| 31 |
+
"num_hidden_layers": 2,
|
| 32 |
+
"num_key_value_heads": 2,
|
| 33 |
+
"rms_norm_eps": 1e-06,
|
| 34 |
+
"rope_theta": 10000.0,
|
| 35 |
+
"sliding_window": 4096,
|
| 36 |
+
"tie_word_embeddings": false,
|
| 37 |
+
"type_vocab_size": 16,
|
| 38 |
+
"use_cache": true,
|
| 39 |
+
"vocab_size": 32000
|
| 40 |
+
}
|
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/mistral/dacorvo/tiny-random-MistralForCausalLM/ce2619dd085de8aadeff.json
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
+
"_model_id": "dacorvo/tiny-random-MistralForCausalLM",
|
| 4 |
+
"_task": "text-generation",
|
| 5 |
+
"architectures": [
|
| 6 |
+
"MistralForCausalLM"
|
| 7 |
+
],
|
| 8 |
+
"attention_dropout": 0.0,
|
| 9 |
+
"attention_probs_dropout_prob": 0.1,
|
| 10 |
+
"head_dim": 8,
|
| 11 |
+
"hidden_act": "gelu",
|
| 12 |
+
"hidden_dropout_prob": 0.1,
|
| 13 |
+
"hidden_size": 32,
|
| 14 |
+
"initializer_range": 0.02,
|
| 15 |
+
"intermediate_size": 37,
|
| 16 |
+
"is_decoder": true,
|
| 17 |
+
"max_position_embeddings": 512,
|
| 18 |
+
"model_type": "mistral",
|
| 19 |
+
"neuron": {
|
| 20 |
+
"auto_cast_type": "fp16",
|
| 21 |
+
"batch_size": 2,
|
| 22 |
+
"checkpoint_id": "dacorvo/tiny-random-MistralForCausalLM",
|
| 23 |
+
"checkpoint_revision": "81d453e3c8985649e9ee3d4c9378461029d1c73a",
|
| 24 |
+
"compiler_type": "neuronx-cc",
|
| 25 |
+
"compiler_version": "2.17.194.0+d312836f",
|
| 26 |
+
"num_cores": 2,
|
| 27 |
+
"sequence_length": 100,
|
| 28 |
+
"task": "text-generation"
|
| 29 |
+
},
|
| 30 |
+
"num_attention_heads": 4,
|
| 31 |
+
"num_hidden_layers": 2,
|
| 32 |
+
"num_key_value_heads": 2,
|
| 33 |
+
"rms_norm_eps": 1e-06,
|
| 34 |
+
"rope_theta": 10000.0,
|
| 35 |
+
"sliding_window": 4096,
|
| 36 |
+
"tie_word_embeddings": false,
|
| 37 |
+
"type_vocab_size": 16,
|
| 38 |
+
"use_cache": true,
|
| 39 |
+
"vocab_size": 32000
|
| 40 |
+
}
|
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/mixtral/dacorvo/Mixtral-tiny/3509514a093c68deeaca.json
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
+
"_model_id": "dacorvo/Mixtral-tiny",
|
| 4 |
+
"_task": "text-generation",
|
| 5 |
+
"architectures": [
|
| 6 |
+
"MixtralForCausalLM"
|
| 7 |
+
],
|
| 8 |
+
"attention_dropout": 0.0,
|
| 9 |
+
"head_dim": 32,
|
| 10 |
+
"hidden_act": "silu",
|
| 11 |
+
"hidden_size": 1024,
|
| 12 |
+
"initializer_range": 0.02,
|
| 13 |
+
"intermediate_size": 3584,
|
| 14 |
+
"max_position_embeddings": 1024,
|
| 15 |
+
"model_type": "mixtral",
|
| 16 |
+
"neuron": {
|
| 17 |
+
"auto_cast_type": "fp16",
|
| 18 |
+
"batch_size": 1,
|
| 19 |
+
"checkpoint_id": "dacorvo/Mixtral-tiny",
|
| 20 |
+
"checkpoint_revision": "c557ba205ddff6ea911f4719e0d543d6c08356b6",
|
| 21 |
+
"compiler_type": "neuronx-cc",
|
| 22 |
+
"compiler_version": "2.17.194.0+d312836f",
|
| 23 |
+
"num_cores": 2,
|
| 24 |
+
"sequence_length": 100,
|
| 25 |
+
"task": "text-generation"
|
| 26 |
+
},
|
| 27 |
+
"num_attention_heads": 32,
|
| 28 |
+
"num_experts_per_tok": 2,
|
| 29 |
+
"num_hidden_layers": 2,
|
| 30 |
+
"num_key_value_heads": 8,
|
| 31 |
+
"num_local_experts": 8,
|
| 32 |
+
"output_router_logits": false,
|
| 33 |
+
"rms_norm_eps": 1e-05,
|
| 34 |
+
"rope_theta": 10000.0,
|
| 35 |
+
"router_aux_loss_coef": 0.001,
|
| 36 |
+
"router_jitter_noise": 0.0,
|
| 37 |
+
"sliding_window": 4096,
|
| 38 |
+
"tie_word_embeddings": false,
|
| 39 |
+
"use_cache": true,
|
| 40 |
+
"vocab_size": 32000
|
| 41 |
+
}
|
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/mixtral/dacorvo/Mixtral-tiny/7a7bef14f18dbabbd689.json
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
+
"_model_id": "dacorvo/Mixtral-tiny",
|
| 4 |
+
"_task": "text-generation",
|
| 5 |
+
"architectures": [
|
| 6 |
+
"MixtralForCausalLM"
|
| 7 |
+
],
|
| 8 |
+
"attention_dropout": 0.0,
|
| 9 |
+
"head_dim": 32,
|
| 10 |
+
"hidden_act": "silu",
|
| 11 |
+
"hidden_size": 1024,
|
| 12 |
+
"initializer_range": 0.02,
|
| 13 |
+
"intermediate_size": 3584,
|
| 14 |
+
"max_position_embeddings": 1024,
|
| 15 |
+
"model_type": "mixtral",
|
| 16 |
+
"neuron": {
|
| 17 |
+
"auto_cast_type": "fp16",
|
| 18 |
+
"batch_size": 2,
|
| 19 |
+
"checkpoint_id": "dacorvo/Mixtral-tiny",
|
| 20 |
+
"checkpoint_revision": "c557ba205ddff6ea911f4719e0d543d6c08356b6",
|
| 21 |
+
"compiler_type": "neuronx-cc",
|
| 22 |
+
"compiler_version": "2.17.194.0+d312836f",
|
| 23 |
+
"num_cores": 2,
|
| 24 |
+
"sequence_length": 100,
|
| 25 |
+
"task": "text-generation"
|
| 26 |
+
},
|
| 27 |
+
"num_attention_heads": 32,
|
| 28 |
+
"num_experts_per_tok": 2,
|
| 29 |
+
"num_hidden_layers": 2,
|
| 30 |
+
"num_key_value_heads": 8,
|
| 31 |
+
"num_local_experts": 8,
|
| 32 |
+
"output_router_logits": false,
|
| 33 |
+
"rms_norm_eps": 1e-05,
|
| 34 |
+
"rope_theta": 10000.0,
|
| 35 |
+
"router_aux_loss_coef": 0.001,
|
| 36 |
+
"router_jitter_noise": 0.0,
|
| 37 |
+
"sliding_window": 4096,
|
| 38 |
+
"tie_word_embeddings": false,
|
| 39 |
+
"use_cache": true,
|
| 40 |
+
"vocab_size": 32000
|
| 41 |
+
}
|
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/mixtral/dacorvo/Mixtral-tiny/7f1476291e21a1e7a585.json
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
+
"_model_id": "dacorvo/Mixtral-tiny",
|
| 4 |
+
"_task": "text-generation",
|
| 5 |
+
"architectures": [
|
| 6 |
+
"MixtralForCausalLM"
|
| 7 |
+
],
|
| 8 |
+
"attention_dropout": 0.0,
|
| 9 |
+
"head_dim": 32,
|
| 10 |
+
"hidden_act": "silu",
|
| 11 |
+
"hidden_size": 1024,
|
| 12 |
+
"initializer_range": 0.02,
|
| 13 |
+
"intermediate_size": 3584,
|
| 14 |
+
"max_position_embeddings": 1024,
|
| 15 |
+
"model_type": "mixtral",
|
| 16 |
+
"neuron": {
|
| 17 |
+
"auto_cast_type": "fp32",
|
| 18 |
+
"batch_size": 1,
|
| 19 |
+
"checkpoint_id": "dacorvo/Mixtral-tiny",
|
| 20 |
+
"checkpoint_revision": "c557ba205ddff6ea911f4719e0d543d6c08356b6",
|
| 21 |
+
"compiler_type": "neuronx-cc",
|
| 22 |
+
"compiler_version": "2.17.194.0+d312836f",
|
| 23 |
+
"num_cores": 2,
|
| 24 |
+
"sequence_length": 100,
|
| 25 |
+
"task": "text-generation"
|
| 26 |
+
},
|
| 27 |
+
"num_attention_heads": 32,
|
| 28 |
+
"num_experts_per_tok": 2,
|
| 29 |
+
"num_hidden_layers": 2,
|
| 30 |
+
"num_key_value_heads": 8,
|
| 31 |
+
"num_local_experts": 8,
|
| 32 |
+
"output_router_logits": false,
|
| 33 |
+
"rms_norm_eps": 1e-05,
|
| 34 |
+
"rope_theta": 10000.0,
|
| 35 |
+
"router_aux_loss_coef": 0.001,
|
| 36 |
+
"router_jitter_noise": 0.0,
|
| 37 |
+
"sliding_window": 4096,
|
| 38 |
+
"tie_word_embeddings": false,
|
| 39 |
+
"use_cache": true,
|
| 40 |
+
"vocab_size": 32000
|
| 41 |
+
}
|
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/opt/hf-internal-testing/tiny-random-OPTForCausalLM/549b38a19302cd6ff930.json
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
+
"_model_id": "hf-internal-testing/tiny-random-OPTForCausalLM",
|
| 4 |
+
"_remove_final_layer_norm": false,
|
| 5 |
+
"_task": "text-generation",
|
| 6 |
+
"activation_function": "relu",
|
| 7 |
+
"architectures": [
|
| 8 |
+
"OPTForCausalLM"
|
| 9 |
+
],
|
| 10 |
+
"attention_dropout": 0.1,
|
| 11 |
+
"do_layer_norm_before": true,
|
| 12 |
+
"dropout": 0.1,
|
| 13 |
+
"embed_dim": 16,
|
| 14 |
+
"enable_bias": true,
|
| 15 |
+
"ffn_dim": 4,
|
| 16 |
+
"hidden_size": 16,
|
| 17 |
+
"init_std": 0.02,
|
| 18 |
+
"is_decoder": true,
|
| 19 |
+
"layer_norm_elementwise_affine": true,
|
| 20 |
+
"layerdrop": 0.0,
|
| 21 |
+
"max_position_embeddings": 100,
|
| 22 |
+
"model_type": "opt",
|
| 23 |
+
"neuron": {
|
| 24 |
+
"auto_cast_type": "fp32",
|
| 25 |
+
"batch_size": 1,
|
| 26 |
+
"checkpoint_id": "hf-internal-testing/tiny-random-OPTForCausalLM",
|
| 27 |
+
"checkpoint_revision": "3f3f2ee1e499cb7ad89b877068684fdc9d9513c3",
|
| 28 |
+
"compiler_type": "neuronx-cc",
|
| 29 |
+
"compiler_version": "2.17.194.0+d312836f",
|
| 30 |
+
"num_cores": 2,
|
| 31 |
+
"sequence_length": 100,
|
| 32 |
+
"task": "text-generation"
|
| 33 |
+
},
|
| 34 |
+
"num_attention_heads": 4,
|
| 35 |
+
"num_hidden_layers": 5,
|
| 36 |
+
"use_cache": true,
|
| 37 |
+
"vocab_size": 50265,
|
| 38 |
+
"word_embed_proj_dim": 16
|
| 39 |
+
}
|
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/opt/hf-internal-testing/tiny-random-OPTForCausalLM/6b898ea4e83643172eba.json
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
+
"_model_id": "hf-internal-testing/tiny-random-OPTForCausalLM",
|
| 4 |
+
"_remove_final_layer_norm": false,
|
| 5 |
+
"_task": "text-generation",
|
| 6 |
+
"activation_function": "relu",
|
| 7 |
+
"architectures": [
|
| 8 |
+
"OPTForCausalLM"
|
| 9 |
+
],
|
| 10 |
+
"attention_dropout": 0.1,
|
| 11 |
+
"do_layer_norm_before": true,
|
| 12 |
+
"dropout": 0.1,
|
| 13 |
+
"embed_dim": 16,
|
| 14 |
+
"enable_bias": true,
|
| 15 |
+
"ffn_dim": 4,
|
| 16 |
+
"hidden_size": 16,
|
| 17 |
+
"init_std": 0.02,
|
| 18 |
+
"is_decoder": true,
|
| 19 |
+
"layer_norm_elementwise_affine": true,
|
| 20 |
+
"layerdrop": 0.0,
|
| 21 |
+
"max_position_embeddings": 100,
|
| 22 |
+
"model_type": "opt",
|
| 23 |
+
"neuron": {
|
| 24 |
+
"auto_cast_type": "fp16",
|
| 25 |
+
"batch_size": 1,
|
| 26 |
+
"checkpoint_id": "hf-internal-testing/tiny-random-OPTForCausalLM",
|
| 27 |
+
"checkpoint_revision": "3f3f2ee1e499cb7ad89b877068684fdc9d9513c3",
|
| 28 |
+
"compiler_type": "neuronx-cc",
|
| 29 |
+
"compiler_version": "2.17.194.0+d312836f",
|
| 30 |
+
"num_cores": 2,
|
| 31 |
+
"sequence_length": 100,
|
| 32 |
+
"task": "text-generation"
|
| 33 |
+
},
|
| 34 |
+
"num_attention_heads": 4,
|
| 35 |
+
"num_hidden_layers": 5,
|
| 36 |
+
"use_cache": true,
|
| 37 |
+
"vocab_size": 50265,
|
| 38 |
+
"word_embed_proj_dim": 16
|
| 39 |
+
}
|
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/opt/hf-internal-testing/tiny-random-OPTForCausalLM/d1b1af5fd67eb0577ee7.json
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
+
"_model_id": "hf-internal-testing/tiny-random-OPTForCausalLM",
|
| 4 |
+
"_remove_final_layer_norm": false,
|
| 5 |
+
"_task": "text-generation",
|
| 6 |
+
"activation_function": "relu",
|
| 7 |
+
"architectures": [
|
| 8 |
+
"OPTForCausalLM"
|
| 9 |
+
],
|
| 10 |
+
"attention_dropout": 0.1,
|
| 11 |
+
"do_layer_norm_before": true,
|
| 12 |
+
"dropout": 0.1,
|
| 13 |
+
"embed_dim": 16,
|
| 14 |
+
"enable_bias": true,
|
| 15 |
+
"ffn_dim": 4,
|
| 16 |
+
"hidden_size": 16,
|
| 17 |
+
"init_std": 0.02,
|
| 18 |
+
"is_decoder": true,
|
| 19 |
+
"layer_norm_elementwise_affine": true,
|
| 20 |
+
"layerdrop": 0.0,
|
| 21 |
+
"max_position_embeddings": 100,
|
| 22 |
+
"model_type": "opt",
|
| 23 |
+
"neuron": {
|
| 24 |
+
"auto_cast_type": "fp16",
|
| 25 |
+
"batch_size": 2,
|
| 26 |
+
"checkpoint_id": "hf-internal-testing/tiny-random-OPTForCausalLM",
|
| 27 |
+
"checkpoint_revision": "3f3f2ee1e499cb7ad89b877068684fdc9d9513c3",
|
| 28 |
+
"compiler_type": "neuronx-cc",
|
| 29 |
+
"compiler_version": "2.17.194.0+d312836f",
|
| 30 |
+
"num_cores": 2,
|
| 31 |
+
"sequence_length": 100,
|
| 32 |
+
"task": "text-generation"
|
| 33 |
+
},
|
| 34 |
+
"num_attention_heads": 4,
|
| 35 |
+
"num_hidden_layers": 5,
|
| 36 |
+
"use_cache": true,
|
| 37 |
+
"vocab_size": 50265,
|
| 38 |
+
"word_embed_proj_dim": 16
|
| 39 |
+
}
|
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/phi3/yujiepan/phi-4-tiny-random/607ae698be806304fb50.json
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
+
"_model_id": "yujiepan/phi-4-tiny-random",
|
| 4 |
+
"_task": "text-generation",
|
| 5 |
+
"architectures": [
|
| 6 |
+
"Phi3ForCausalLM"
|
| 7 |
+
],
|
| 8 |
+
"attention_bias": false,
|
| 9 |
+
"attention_dropout": 0.0,
|
| 10 |
+
"auto_map": {},
|
| 11 |
+
"embd_pdrop": 0.0,
|
| 12 |
+
"hidden_act": "silu",
|
| 13 |
+
"hidden_size": 16,
|
| 14 |
+
"initializer_range": 0.02,
|
| 15 |
+
"intermediate_size": 32,
|
| 16 |
+
"max_position_embeddings": 16384,
|
| 17 |
+
"model_type": "phi3",
|
| 18 |
+
"neuron": {
|
| 19 |
+
"auto_cast_type": "fp16",
|
| 20 |
+
"batch_size": 2,
|
| 21 |
+
"checkpoint_id": "yujiepan/phi-4-tiny-random",
|
| 22 |
+
"checkpoint_revision": "18a9a1168dc97ac6d128f811925670c275610f5a",
|
| 23 |
+
"compiler_type": "neuronx-cc",
|
| 24 |
+
"compiler_version": "2.17.194.0+d312836f",
|
| 25 |
+
"num_cores": 2,
|
| 26 |
+
"sequence_length": 100,
|
| 27 |
+
"task": "text-generation"
|
| 28 |
+
},
|
| 29 |
+
"num_attention_heads": 2,
|
| 30 |
+
"num_hidden_layers": 2,
|
| 31 |
+
"num_key_value_heads": 1,
|
| 32 |
+
"original_max_position_embeddings": 16384,
|
| 33 |
+
"partial_rotary_factor": 1.0,
|
| 34 |
+
"resid_pdrop": 0.0,
|
| 35 |
+
"rms_norm_eps": 1e-05,
|
| 36 |
+
"rope_scaling": null,
|
| 37 |
+
"rope_theta": 250000,
|
| 38 |
+
"sliding_window": null,
|
| 39 |
+
"tie_word_embeddings": false,
|
| 40 |
+
"use_cache": true,
|
| 41 |
+
"vocab_size": 100352
|
| 42 |
+
}
|
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/phi3/yujiepan/phi-4-tiny-random/da1641bf50b2440d8d42.json
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
+
"_model_id": "yujiepan/phi-4-tiny-random",
|
| 4 |
+
"_task": "text-generation",
|
| 5 |
+
"architectures": [
|
| 6 |
+
"Phi3ForCausalLM"
|
| 7 |
+
],
|
| 8 |
+
"attention_bias": false,
|
| 9 |
+
"attention_dropout": 0.0,
|
| 10 |
+
"auto_map": {},
|
| 11 |
+
"embd_pdrop": 0.0,
|
| 12 |
+
"hidden_act": "silu",
|
| 13 |
+
"hidden_size": 16,
|
| 14 |
+
"initializer_range": 0.02,
|
| 15 |
+
"intermediate_size": 32,
|
| 16 |
+
"max_position_embeddings": 16384,
|
| 17 |
+
"model_type": "phi3",
|
| 18 |
+
"neuron": {
|
| 19 |
+
"auto_cast_type": "fp32",
|
| 20 |
+
"batch_size": 1,
|
| 21 |
+
"checkpoint_id": "yujiepan/phi-4-tiny-random",
|
| 22 |
+
"checkpoint_revision": "18a9a1168dc97ac6d128f811925670c275610f5a",
|
| 23 |
+
"compiler_type": "neuronx-cc",
|
| 24 |
+
"compiler_version": "2.17.194.0+d312836f",
|
| 25 |
+
"num_cores": 2,
|
| 26 |
+
"sequence_length": 100,
|
| 27 |
+
"task": "text-generation"
|
| 28 |
+
},
|
| 29 |
+
"num_attention_heads": 2,
|
| 30 |
+
"num_hidden_layers": 2,
|
| 31 |
+
"num_key_value_heads": 1,
|
| 32 |
+
"original_max_position_embeddings": 16384,
|
| 33 |
+
"partial_rotary_factor": 1.0,
|
| 34 |
+
"resid_pdrop": 0.0,
|
| 35 |
+
"rms_norm_eps": 1e-05,
|
| 36 |
+
"rope_scaling": null,
|
| 37 |
+
"rope_theta": 250000,
|
| 38 |
+
"sliding_window": null,
|
| 39 |
+
"tie_word_embeddings": false,
|
| 40 |
+
"use_cache": true,
|
| 41 |
+
"vocab_size": 100352
|
| 42 |
+
}
|
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/phi3/yujiepan/phi-4-tiny-random/ff5ccda1b163a504cfcc.json
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
+
"_model_id": "yujiepan/phi-4-tiny-random",
|
| 4 |
+
"_task": "text-generation",
|
| 5 |
+
"architectures": [
|
| 6 |
+
"Phi3ForCausalLM"
|
| 7 |
+
],
|
| 8 |
+
"attention_bias": false,
|
| 9 |
+
"attention_dropout": 0.0,
|
| 10 |
+
"auto_map": {},
|
| 11 |
+
"embd_pdrop": 0.0,
|
| 12 |
+
"hidden_act": "silu",
|
| 13 |
+
"hidden_size": 16,
|
| 14 |
+
"initializer_range": 0.02,
|
| 15 |
+
"intermediate_size": 32,
|
| 16 |
+
"max_position_embeddings": 16384,
|
| 17 |
+
"model_type": "phi3",
|
| 18 |
+
"neuron": {
|
| 19 |
+
"auto_cast_type": "fp16",
|
| 20 |
+
"batch_size": 1,
|
| 21 |
+
"checkpoint_id": "yujiepan/phi-4-tiny-random",
|
| 22 |
+
"checkpoint_revision": "18a9a1168dc97ac6d128f811925670c275610f5a",
|
| 23 |
+
"compiler_type": "neuronx-cc",
|
| 24 |
+
"compiler_version": "2.17.194.0+d312836f",
|
| 25 |
+
"num_cores": 2,
|
| 26 |
+
"sequence_length": 100,
|
| 27 |
+
"task": "text-generation"
|
| 28 |
+
},
|
| 29 |
+
"num_attention_heads": 2,
|
| 30 |
+
"num_hidden_layers": 2,
|
| 31 |
+
"num_key_value_heads": 1,
|
| 32 |
+
"original_max_position_embeddings": 16384,
|
| 33 |
+
"partial_rotary_factor": 1.0,
|
| 34 |
+
"resid_pdrop": 0.0,
|
| 35 |
+
"rms_norm_eps": 1e-05,
|
| 36 |
+
"rope_scaling": null,
|
| 37 |
+
"rope_theta": 250000,
|
| 38 |
+
"sliding_window": null,
|
| 39 |
+
"tie_word_embeddings": false,
|
| 40 |
+
"use_cache": true,
|
| 41 |
+
"vocab_size": 100352
|
| 42 |
+
}
|
neuronxcc-2.17.194.0+d312836f/MODULE_032f74178031f5ed3c74+613edded/model.neff
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 400384
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e27746423329d5ca3648b0a521481cc8c82c4b3642f47ecab0a048a6baa1bf36
|
| 3 |
size 400384
|
neuronxcc-2.17.194.0+d312836f/MODULE_0a88901e8c98f54e4c10+613edded/model.neff
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 472064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:624d26061877fb366d7c41b3f88961284e96702d6313b52a33877d915813c796
|
| 3 |
size 472064
|
neuronxcc-2.17.194.0+d312836f/MODULE_0cfdfd8c26d66b282d5a+613edded/model.neff
CHANGED
|
Binary files a/neuronxcc-2.17.194.0+d312836f/MODULE_0cfdfd8c26d66b282d5a+613edded/model.neff and b/neuronxcc-2.17.194.0+d312836f/MODULE_0cfdfd8c26d66b282d5a+613edded/model.neff differ
|
|
|
neuronxcc-2.17.194.0+d312836f/MODULE_1090c91454701eac92a9+613edded/model.neff
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 615424
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f1f5d71caa3c2f2b27838c6aca987ca6cfdbad5860d42c62f5bfd5cafa39322a
|
| 3 |
size 615424
|
neuronxcc-2.17.194.0+d312836f/MODULE_113f3268e3fd4d66fe81+8a3305d3/model.neff
CHANGED
|
Binary files a/neuronxcc-2.17.194.0+d312836f/MODULE_113f3268e3fd4d66fe81+8a3305d3/model.neff and b/neuronxcc-2.17.194.0+d312836f/MODULE_113f3268e3fd4d66fe81+8a3305d3/model.neff differ
|
|
|
neuronxcc-2.17.194.0+d312836f/MODULE_151e013069d6b102df91+613edded/model.neff
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 123904
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:29bb38b2d37442241496dc3e577d781d5701ea84a78bdf807cd9632cb3d4a479
|
| 3 |
size 123904
|
neuronxcc-2.17.194.0+d312836f/MODULE_24ff9ac2787ce9a1d276+613edded/model.neff
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 134144
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cfeda11cc0f3c212245069cb451b881ea4ab7ec09173824aafd1932c6ddaa02b
|
| 3 |
size 134144
|
neuronxcc-2.17.194.0+d312836f/MODULE_255bd8f176e49c261e93+613edded/model.neff
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 123904
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8fb97c20b57d947ca3713b658629285b31e42dd91c75b8ae38dabf1bf1d0e0a5
|
| 3 |
size 123904
|
neuronxcc-2.17.194.0+d312836f/MODULE_272d7dabaeb48e6d7210+613edded/model.neff
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 123904
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4a1d4df13c1620cf66c80bf725c56bb6d381c95de7daa138096249a3b24105e5
|
| 3 |
size 123904
|
neuronxcc-2.17.194.0+d312836f/MODULE_2b6914194b931d7496fc+613edded/model.neff
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 390144
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2b938f5ab46bc43919b7b6e6ee5f662dbe45a1e01b722f35caf06cc309bd9614
|
| 3 |
size 390144
|
neuronxcc-2.17.194.0+d312836f/MODULE_2c290567bb51ca0069bf+613edded/model.neff
CHANGED
|
Binary files a/neuronxcc-2.17.194.0+d312836f/MODULE_2c290567bb51ca0069bf+613edded/model.neff and b/neuronxcc-2.17.194.0+d312836f/MODULE_2c290567bb51ca0069bf+613edded/model.neff differ
|
|
|
neuronxcc-2.17.194.0+d312836f/MODULE_320f2622d4d0c9fdd0f1+613edded/model.neff
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 144384
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fd2fe87c0cecc2794f7c60fd83d1d0ec84e8c623c456709cc7b39b178be93dc2
|
| 3 |
size 144384
|
neuronxcc-2.17.194.0+d312836f/MODULE_3aa61c04bd8d9ca8722a+613edded/model.neff
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 144384
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6e23f8f5053a85ab214581a20d01be948511efa6c94de639ea9a0faf0e78fb43
|
| 3 |
size 144384
|
neuronxcc-2.17.194.0+d312836f/MODULE_3d7f2d2bef4f6fdd2c74+613edded/model.neff
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 134144
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b91101d383cee1359c09f57a632e02c300b21e27a7295193cdeea85eb45ee1bb
|
| 3 |
size 134144
|
neuronxcc-2.17.194.0+d312836f/MODULE_3e5f6b34247d2b457ec5+613edded/model.neff
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 134144
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8b2316f98ea92a283291c9a4274c380422be7c036fce4edeb9a50fe44bece16f
|
| 3 |
size 134144
|
neuronxcc-2.17.194.0+d312836f/MODULE_40f92bf9469aae653e93+613edded/model.neff
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 103424
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:79ec5411f21a49ce0ace6124438d98172ea326ed9726cf0829b59de6b9842f38
|
| 3 |
size 103424
|
neuronxcc-2.17.194.0+d312836f/MODULE_48437cc74469a8ccaec8+613edded/model.neff
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 154624
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2b1c68d1f3339c7c8578b3548a5cb94c40003af6ca357d051fe24348f19e0abd
|
| 3 |
size 154624
|
neuronxcc-2.17.194.0+d312836f/MODULE_48bfe9ceb9631fdca2d4+613edded/model.neff
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 144384
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f1a73c10b974b60e298a2c58c2913629564bf9746b1c62292fc461c41af22ff2
|
| 3 |
size 144384
|
neuronxcc-2.17.194.0+d312836f/MODULE_5a9cc28c7336fd9ea3ab+613edded/model.neff
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 482304
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:740741c09a4afc3930fb12221f6fb36235b1de2e5035bce90142e801337869b6
|
| 3 |
size 482304
|
neuronxcc-2.17.194.0+d312836f/MODULE_5de5ea910315ba9c0def+613edded/model.neff
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 134144
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:976bc34408e232169b1ab7193866ead180cd97813d19797f28160bc2015f6782
|
| 3 |
size 134144
|
neuronxcc-2.17.194.0+d312836f/MODULE_6819e64fb96e87ffece0+613edded/model.neff
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 144384
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:23ef7eabe51185c0a807a040447dd75ac0d2fb7238e38f9d8245b4f71d713bd8
|
| 3 |
size 144384
|
neuronxcc-2.17.194.0+d312836f/MODULE_7062a76356ca462bcc78+613edded/model.neff
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 144384
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1c4f650d174476fafcd6f2cd4d3001f5436ce1dfaa5f48cd9a95f05224883b73
|
| 3 |
size 144384
|
neuronxcc-2.17.194.0+d312836f/MODULE_878400bd48be9c48dd23+613edded/model.neff
CHANGED
|
Binary files a/neuronxcc-2.17.194.0+d312836f/MODULE_878400bd48be9c48dd23+613edded/model.neff and b/neuronxcc-2.17.194.0+d312836f/MODULE_878400bd48be9c48dd23+613edded/model.neff differ
|
|
|
neuronxcc-2.17.194.0+d312836f/MODULE_8bb2b91314df12f54a63+613edded/model.neff
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 400384
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cf1903c941e671f0a24c5c263d723e718412ddc66654f2a03aee1f3f8d948156
|
| 3 |
size 400384
|
neuronxcc-2.17.194.0+d312836f/MODULE_960d05b842008ddeae63+8a3305d3/model.neff
CHANGED
|
Binary files a/neuronxcc-2.17.194.0+d312836f/MODULE_960d05b842008ddeae63+8a3305d3/model.neff and b/neuronxcc-2.17.194.0+d312836f/MODULE_960d05b842008ddeae63+8a3305d3/model.neff differ
|
|
|
neuronxcc-2.17.194.0+d312836f/MODULE_99824fde40f6515f5b3d+613edded/model.neff
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 134144
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2f486b3d755c56de833246b62bf3bcdae5f44a49fee0b2df14b35fc9d740b05a
|
| 3 |
size 134144
|
neuronxcc-2.17.194.0+d312836f/MODULE_9be95eab2454f5ade94b+613edded/model.neff
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 123904
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:13e6d8d70a234e00d1b734fdf7fe4c9c53dd6430f03c14c0c8b4a42ffd02d27c
|
| 3 |
size 123904
|
neuronxcc-2.17.194.0+d312836f/MODULE_a297084c31f9c1ff1d5d+613edded/model.neff
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 134144
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5461f58c99a25feea9959fe94dcd65b799cf940d700be5578d33e7da4a5418b1
|
| 3 |
size 134144
|