diff --git a/.gitattributes b/.gitattributes
index e15ca5a29b2063bb686314b00a59dc1b4ae38457..5a15a019707c24b3b3ecedf38add2aa6c262c64b 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -6298,3 +6298,19 @@ neuronxcc-2.21.33363.0+82129205/MODULE_cb86ed7fc724b06726b3+fb4cc044/model.neff
 neuronxcc-2.21.18209.0+043b1bf7/MODULE_a9ea3bcc615b10517bb2+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
 neuronxcc-2.21.33363.0+82129205/MODULE_9b4ea40b364ed3edd3ad+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
 neuronxcc-2.21.33363.0+82129205/MODULE_55c2fa803a7e3881cef6+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_14268084bdb93e2af8ea+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_17b4453648b482087f44+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_1efaefc590ce7ee07e97+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_2828c4ae6bc360cc555f+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_32f3df92f722f8d61840+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_3683fa1d292af356adae+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_4effee1c1788c6eeab78+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_4fcdffc44bb528a047f0+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_5fdd7ad15e00da4fcd3e+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_78602cd5234279501590+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_8badd6f6eb69fa108ac8+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_93b5314f91b367ce312a+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_9c01f09fb84f2b339fc6+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_a08dd31a7a105fa45df2+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_c913669652fcaa9d5638+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.33363.0+82129205/MODULE_d3f277cf573c91b9ded8+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/60feecaa0c4c075e2f3e46a3f55d9a273f0ddd75a0ecf64e4ae27352e0819506/1c4ee5d7dc71b8843fca.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/60feecaa0c4c075e2f3e46a3f55d9a273f0ddd75a0ecf64e4ae27352e0819506/1c4ee5d7dc71b8843fca.json
new file mode 100644
index 0000000000000000000000000000000000000000..f9d2e5a4913a6235391b59b3be0cb4b5320c05ca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/60feecaa0c4c075e2f3e46a3f55d9a273f0ddd75a0ecf64e4ae27352e0819506/1c4ee5d7dc71b8843fca.json
@@ -0,0 +1,87 @@
+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-0.6B",
+  "_task": "feature-extraction",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 1024,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 32768,
+  "max_window_layers": 28,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 1,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-0.6B",
+    "checkpoint_revision": "c54f2e6e80b2d7b7de06f51cec4959f6b3e03418",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 1,
+    "max_batch_size": 1,
+    "max_context_length": 8192,
+    "max_topk": 256,
+    "n_active_tokens": 8192,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.5.dev2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 8192,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 1
+  },
+  "num_attention_heads": 16,
+  "num_hidden_layers": 28,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151669
+}
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/026fe44014d3f650a32e.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/026fe44014d3f650a32e.json
new file mode 100644
index 0000000000000000000000000000000000000000..a053e27463473226ceb8cdec2d194848e3d0fb94
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/026fe44014d3f650a32e.json
@@ -0,0 +1,95 @@
+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-8B",
+  "_task": "feature-extraction",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 4096,
+  "initializer_range": 0.02,
+  "intermediate_size": 12288,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 16,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-8B",
+    "checkpoint_revision": "1d8ad4ca9b3dd8059ad90a75d4983776a23d44af",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 2,
+    "max_batch_size": 16,
+    "max_context_length": 1024,
+    "max_topk": 256,
+    "n_active_tokens": 1024,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.5.dev2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 1024,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 2
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": false,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151665
+}
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/2ad8fd5368e5c42f132c.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/2ad8fd5368e5c42f132c.json
new file mode 100644
index 0000000000000000000000000000000000000000..755fc823d11ba6888e480e840cd907f6d4a515c9
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/2ad8fd5368e5c42f132c.json
@@ -0,0 +1,95 @@
+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-8B",
+  "_task": "feature-extraction",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 4096,
+  "initializer_range": 0.02,
+  "intermediate_size": 12288,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 32,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-8B",
+    "checkpoint_revision": "1d8ad4ca9b3dd8059ad90a75d4983776a23d44af",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 8,
+    "max_batch_size": 32,
+    "max_context_length": 1024,
+    "max_topk": 256,
+    "n_active_tokens": 1024,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.5.dev2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 1024,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 8
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": false,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151665
+}
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/66877d91fb0121840163.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/66877d91fb0121840163.json
new file mode 100644
index 0000000000000000000000000000000000000000..890cace77095cad9d2d1b895056f330759839a0e
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/66877d91fb0121840163.json
@@ -0,0 +1,95 @@
+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-8B",
+  "_task": "feature-extraction",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 4096,
+  "initializer_range": 0.02,
+  "intermediate_size": 12288,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 16,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-8B",
+    "checkpoint_revision": "1d8ad4ca9b3dd8059ad90a75d4983776a23d44af",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 8,
+    "max_batch_size": 16,
+    "max_context_length": 4096,
+    "max_topk": 256,
+    "n_active_tokens": 4096,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.5.dev2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 4096,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 8
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": false,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151665
+}
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/ae5d00317f4b117f94a8.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/ae5d00317f4b117f94a8.json
new file mode 100644
index 0000000000000000000000000000000000000000..5d9addba53088d0426588717880611742f91bc6d
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/ae5d00317f4b117f94a8.json
@@ -0,0 +1,95 @@
+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-8B",
+  "_task": "feature-extraction",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 4096,
+  "initializer_range": 0.02,
+  "intermediate_size": 12288,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 16,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-8B",
+    "checkpoint_revision": "1d8ad4ca9b3dd8059ad90a75d4983776a23d44af",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 8,
+    "max_batch_size": 16,
+    "max_context_length": 1024,
+    "max_topk": 256,
+    "n_active_tokens": 1024,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.5.dev2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 1024,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 8
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": false,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151665
+}
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/d9b9f628f0dee7a926d6.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/d9b9f628f0dee7a926d6.json
new file mode 100644
index 0000000000000000000000000000000000000000..b30effacb1069ca90ab0f05045c235c003e5e5d0
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/d9b9f628f0dee7a926d6.json
@@ -0,0 +1,95 @@
+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-8B",
+  "_task": "feature-extraction",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 4096,
+  "initializer_range": 0.02,
+  "intermediate_size": 12288,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 32,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-8B",
+    "checkpoint_revision": "1d8ad4ca9b3dd8059ad90a75d4983776a23d44af",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 8,
+    "max_batch_size": 32,
+    "max_context_length": 4096,
+    "max_topk": 256,
+    "n_active_tokens": 4096,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.5.dev2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 4096,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 8
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": false,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151665
+}
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/07552dc6c695df3ea557.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/07552dc6c695df3ea557.json
new file mode 100644
index 0000000000000000000000000000000000000000..908b2c13d2c33a7f8b5ade7c62948a0201d2f00c
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/07552dc6c695df3ea557.json
@@ -0,0 +1,95 @@
+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-4B",
+  "_task": "feature-extraction",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 2560,
+  "initializer_range": 0.02,
+  "intermediate_size": 9728,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 1,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-4B",
+    "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 1,
+    "max_batch_size": 1,
+    "max_context_length": 4096,
+    "max_topk": 256,
+    "n_active_tokens": 4096,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.5.dev2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 4096,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 1
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151665
+}
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/25770924bad8fff9ec23.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/25770924bad8fff9ec23.json
new file mode 100644
index 0000000000000000000000000000000000000000..0d0a0f36c5327e269f03ba3f068510674f159b20
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/25770924bad8fff9ec23.json
@@ -0,0 +1,95 @@
+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-4B",
+  "_task": "feature-extraction",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 2560,
+  "initializer_range": 0.02,
+  "intermediate_size": 9728,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 8,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-4B",
+    "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 8,
+    "max_batch_size": 8,
+    "max_context_length": 4096,
+    "max_topk": 256,
+    "n_active_tokens": 4096,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.5.dev2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 4096,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 8
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151665
+}
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/4f788775782f89b676c9.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/4f788775782f89b676c9.json
new file mode 100644
index 0000000000000000000000000000000000000000..3932f3f833b111584ace119d2e539344d8785149
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/4f788775782f89b676c9.json
@@ -0,0 +1,95 @@
+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-4B",
+  "_task": "feature-extraction",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 2560,
+  "initializer_range": 0.02,
+  "intermediate_size": 9728,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 16,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-4B",
+    "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 8,
+    "max_batch_size": 16,
+    "max_context_length": 1024,
+    "max_topk": 256,
+    "n_active_tokens": 1024,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.5.dev2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 1024,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 8
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151665
+}
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/7190036e8ed3be94399f.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/7190036e8ed3be94399f.json
new file mode 100644
index 0000000000000000000000000000000000000000..673c2d4870ed17f69572d8f9275de2ab297228f7
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/7190036e8ed3be94399f.json
@@ -0,0 +1,95 @@
+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-4B",
+  "_task": "feature-extraction",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 2560,
+  "initializer_range": 0.02,
+  "intermediate_size": 9728,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 16,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-4B",
+    "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 8,
+    "max_batch_size": 16,
+    "max_context_length": 4096,
+    "max_topk": 256,
+    "n_active_tokens": 4096,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.5.dev2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 4096,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 8
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151665
+}
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/9054b94f39e5c374b6b8.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/9054b94f39e5c374b6b8.json
new file mode 100644
index 0000000000000000000000000000000000000000..9de1e03e304c63b2477ffec5cdd0b4094af1cf8d
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/9054b94f39e5c374b6b8.json
@@ -0,0 +1,95 @@
+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-4B",
+  "_task": "feature-extraction",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 2560,
+  "initializer_range": 0.02,
+  "intermediate_size": 9728,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 32,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-4B",
+    "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 8,
+    "max_batch_size": 32,
+    "max_context_length": 4096,
+    "max_topk": 256,
+    "n_active_tokens": 4096,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.5.dev2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 4096,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 8
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151665
+}
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/bed88d82f075f516941d.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/bed88d82f075f516941d.json
new file mode 100644
index 0000000000000000000000000000000000000000..4b2d01b7226c6454cfd493111c1517d8b8c53461
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/bed88d82f075f516941d.json
@@ -0,0 +1,95 @@
+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-4B",
+  "_task": "feature-extraction",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 2560,
+  "initializer_range": 0.02,
+  "intermediate_size": 9728,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 32,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-4B",
+    "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 8,
+    "max_batch_size": 32,
+    "max_context_length": 1024,
+    "max_topk": 256,
+    "n_active_tokens": 1024,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.5.dev2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 1024,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 8
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151665
+}
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/cf5f2cf31cc338bcdce9.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/cf5f2cf31cc338bcdce9.json
new file mode 100644
index 0000000000000000000000000000000000000000..a65453ec94556a32b66056d6090db1d622d37c98
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/cf5f2cf31cc338bcdce9.json
@@ -0,0 +1,95 @@
+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-4B",
+  "_task": "feature-extraction",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 2560,
+  "initializer_range": 0.02,
+  "intermediate_size": 9728,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 16,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-4B",
+    "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 2,
+    "max_batch_size": 16,
+    "max_context_length": 1024,
+    "max_topk": 256,
+    "n_active_tokens": 1024,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.5.dev2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 1024,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 2
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151665
+}
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/d06a9ca97bbded610b72.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/d06a9ca97bbded610b72.json
new file mode 100644
index 0000000000000000000000000000000000000000..ead06e87aa4877b52c6e670253755c2d5d2c553b
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/d06a9ca97bbded610b72.json
@@ -0,0 +1,95 @@
+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-4B",
+  "_task": "feature-extraction",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 2560,
+  "initializer_range": 0.02,
+  "intermediate_size": 9728,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 4,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-4B",
+    "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 2,
+    "max_batch_size": 4,
+    "max_context_length": 1024,
+    "max_topk": 256,
+    "n_active_tokens": 1024,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.5.dev2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 1024,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 2
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151665
+}
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/d7e075cbb2c6bd78d6b4.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/d7e075cbb2c6bd78d6b4.json
new file mode 100644
index 0000000000000000000000000000000000000000..e8d261f284545be837731ba5e9a240a7f0467e14
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/d7e075cbb2c6bd78d6b4.json
@@ -0,0 +1,95 @@
+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-4B",
+  "_task": "feature-extraction",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 2560,
+  "initializer_range": 0.02,
+  "intermediate_size": 9728,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 64,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-4B",
+    "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 8,
+    "max_batch_size": 64,
+    "max_context_length": 4096,
+    "max_topk": 256,
+    "n_active_tokens": 4096,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.5.dev2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 4096,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 8
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151665
+}
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/e189289909e4808416f7.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/e189289909e4808416f7.json
new file mode 100644
index 0000000000000000000000000000000000000000..d689f7f78ecdd27cc273d7bc8fafe1fe9ff03566
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/e189289909e4808416f7.json
@@ -0,0 +1,95 @@
+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-4B",
+  "_task": "feature-extraction",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 2560,
+  "initializer_range": 0.02,
+  "intermediate_size": 9728,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 1,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-4B",
+    "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 2,
+    "max_batch_size": 1,
+    "max_context_length": 1024,
+    "max_topk": 256,
+    "n_active_tokens": 1024,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.5.dev2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 1024,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 2
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151665
+}
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/e79a64794f75d8045060.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/e79a64794f75d8045060.json
new file mode 100644
index 0000000000000000000000000000000000000000..3c2efe7e7a253e97e761c70bd8bf8af45f1ddc40
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/e79a64794f75d8045060.json
@@ -0,0 +1,95 @@
+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-4B",
+  "_task": "feature-extraction",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 2560,
+  "initializer_range": 0.02,
+  "intermediate_size": 9728,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 8,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-4B",
+    "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 2,
+    "max_batch_size": 8,
+    "max_context_length": 1024,
+    "max_topk": 256,
+    "n_active_tokens": 1024,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.5.dev2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 1024,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 2
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151665
+}
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/qwen3/Qwen/Qwen3-Embedding-0.6B/1c4ee5d7dc71b8843fca.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/qwen3/Qwen/Qwen3-Embedding-0.6B/1c4ee5d7dc71b8843fca.json
new file mode 100644
index 0000000000000000000000000000000000000000..f9d2e5a4913a6235391b59b3be0cb4b5320c05ca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/qwen3/Qwen/Qwen3-Embedding-0.6B/1c4ee5d7dc71b8843fca.json
@@ -0,0 +1,87 @@
+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-0.6B",
+  "_task": "feature-extraction",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 1024,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 32768,
+  "max_window_layers": 28,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 1,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-0.6B",
+    "checkpoint_revision": "c54f2e6e80b2d7b7de06f51cec4959f6b3e03418",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 1,
+    "max_batch_size": 1,
+    "max_context_length": 8192,
+    "max_topk": 256,
+    "n_active_tokens": 8192,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.5.dev2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 8192,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 1
+  },
+  "num_attention_heads": 16,
+  "num_hidden_layers": 28,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151669
+}
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/qwen3/Qwen/Qwen3-Embedding-4B/07552dc6c695df3ea557.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/qwen3/Qwen/Qwen3-Embedding-4B/07552dc6c695df3ea557.json
new file mode 100644
index 0000000000000000000000000000000000000000..908b2c13d2c33a7f8b5ade7c62948a0201d2f00c
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/qwen3/Qwen/Qwen3-Embedding-4B/07552dc6c695df3ea557.json
@@ -0,0 +1,95 @@
+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-4B",
+  "_task": "feature-extraction",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 2560,
+  "initializer_range": 0.02,
+  "intermediate_size": 9728,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 1,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-4B",
+    "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 1,
+    "max_batch_size": 1,
+    "max_context_length": 4096,
+    "max_topk": 256,
+    "n_active_tokens": 4096,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.5.dev2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 4096,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 1
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151665
+}
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/qwen3/Qwen/Qwen3-Embedding-8B/d9b9f628f0dee7a926d6.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/qwen3/Qwen/Qwen3-Embedding-8B/d9b9f628f0dee7a926d6.json
new file mode 100644
index 0000000000000000000000000000000000000000..b30effacb1069ca90ab0f05045c235c003e5e5d0
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/qwen3/Qwen/Qwen3-Embedding-8B/d9b9f628f0dee7a926d6.json
@@ -0,0 +1,95 @@
+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-8B",
+  "_task": "feature-extraction",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 4096,
+  "initializer_range": 0.02,
+  "intermediate_size": 12288,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 32,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-8B",
+    "checkpoint_revision": "1d8ad4ca9b3dd8059ad90a75d4983776a23d44af",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 8,
+    "max_batch_size": 32,
+    "max_context_length": 4096,
+    "max_topk": 256,
+    "n_active_tokens": 4096,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.5.dev2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 4096,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 8
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": false,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151665
+}
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/smollm3/HuggingFaceTB/SmolLM3-3B/588f7836eb16c9483d90.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/smollm3/HuggingFaceTB/SmolLM3-3B/588f7836eb16c9483d90.json
new file mode 100644
index 0000000000000000000000000000000000000000..2e6f473d6b2048c556b258aaa8afd2cd9e2bc2e3
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5.dev2/smollm3/HuggingFaceTB/SmolLM3-3B/588f7836eb16c9483d90.json
@@ -0,0 +1,134 @@
+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "HuggingFaceTB/SmolLM3-3B",
+  "_task": "text-generation",
+  "architectures": [
+    "SmolLM3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "hidden_act": "silu",
+  "hidden_size": 2048,
+  "initializer_range": 0.02,
+  "intermediate_size": 11008,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 65536,
+  "max_window_layers": 28,
+  "mlp_bias": false,
+  "model_type": "smollm3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 1,
+    "capacity_factor": null,
+    "checkpoint_id": "HuggingFaceTB/SmolLM3-3B",
+    "checkpoint_revision": "a07cc9a04f16550a088caea529712d1d335b0ac1",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 2,
+    "max_batch_size": 1,
+    "max_context_length": 1024,
+    "max_topk": 256,
+    "n_active_tokens": 1024,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": true,
+    "optimum_neuron_version": "0.4.5.dev2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 1024,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 2
+  },
+  "no_rope_layer_interval": 4,
+  "no_rope_layers": [
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0
+  ],
+  "num_attention_heads": 16,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 4,
+  "pretraining_tp": 2,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 5000000.0,
+  "sliding_window": null,
+  "use_cache": false,
+  "use_sliding_window": false,
+  "vocab_size": 128256
+}
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5/cf6b9a360dcf294104671106bae2adbd9fd291823bb60a351883163684073231/22277b72a5862009f452.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5/cf6b9a360dcf294104671106bae2adbd9fd291823bb60a351883163684073231/22277b72a5862009f452.json
new file mode 100644
index 0000000000000000000000000000000000000000..1f153b07fb8bde9aa288f76b2b9d6a4f85d49b09
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5/cf6b9a360dcf294104671106bae2adbd9fd291823bb60a351883163684073231/22277b72a5862009f452.json
@@ -0,0 +1,63 @@
+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "unsloth/Llama-3.2-1B-Instruct",
+  "_task": "text-generation",
+  "architectures": [
+    "LlamaForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 64,
+  "hidden_act": "silu",
+  "hidden_size": 2048,
+  "initializer_range": 0.02,
+  "intermediate_size": 8192,
+  "max_position_embeddings": 131072,
+  "mlp_bias": false,
+  "model_type": "llama",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 4,
+    "capacity_factor": null,
+    "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct",
+    "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c",
+    "continuous_batching": true,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 2,
+    "max_batch_size": 4,
+    "max_context_length": 4096,
+    "max_topk": 256,
+    "n_active_tokens": 4096,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": true,
+    "optimum_neuron_version": "0.4.5",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 4096,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 2
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 16,
+  "num_key_value_heads": 8,
+  "pretraining_tp": 1,
+  "rms_norm_eps": 1e-05,
+  "rope_scaling": {
+    "factor": 32.0,
+    "high_freq_factor": 4.0,
+    "low_freq_factor": 1.0,
+    "original_max_position_embeddings": 8192,
+    "rope_type": "llama3"
+  },
+  "rope_theta": 500000.0,
+  "tie_word_embeddings": true,
+  "unsloth_fixed": true,
+  "use_cache": true,
+  "vocab_size": 128256
+}
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5/llama/unsloth/Llama-3.2-1B-Instruct/22277b72a5862009f452.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5/llama/unsloth/Llama-3.2-1B-Instruct/22277b72a5862009f452.json
new file mode 100644
index 0000000000000000000000000000000000000000..1f153b07fb8bde9aa288f76b2b9d6a4f85d49b09
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5/llama/unsloth/Llama-3.2-1B-Instruct/22277b72a5862009f452.json
@@ -0,0 +1,63 @@
+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "unsloth/Llama-3.2-1B-Instruct",
+  "_task": "text-generation",
+  "architectures": [
+    "LlamaForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 64,
+  "hidden_act": "silu",
+  "hidden_size": 2048,
+  "initializer_range": 0.02,
+  "intermediate_size": 8192,
+  "max_position_embeddings": 131072,
+  "mlp_bias": false,
+  "model_type": "llama",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 4,
+    "capacity_factor": null,
+    "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct",
+    "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c",
+    "continuous_batching": true,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 2,
+    "max_batch_size": 4,
+    "max_context_length": 4096,
+    "max_topk": 256,
+    "n_active_tokens": 4096,
+    "neuronxcc_version": "2.21.33363.0+82129205",
+    "on_device_sampling": true,
+    "optimum_neuron_version": "0.4.5",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 4096,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 2
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 16,
+  "num_key_value_heads": 8,
+  "pretraining_tp": 1,
+  "rms_norm_eps": 1e-05,
+  "rope_scaling": {
+    "factor": 32.0,
+    "high_freq_factor": 4.0,
+    "low_freq_factor": 1.0,
+    "original_max_position_embeddings": 8192,
+    "rope_type": "llama3"
+  },
+  "rope_theta": 500000.0,
+  "tie_word_embeddings": true,
+  "unsloth_fixed": true,
+  "use_cache": true,
+  "vocab_size": 128256
+}
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_14268084bdb93e2af8ea+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_14268084bdb93e2af8ea+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_14268084bdb93e2af8ea+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_14268084bdb93e2af8ea+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_14268084bdb93e2af8ea+fb4cc044/model.done
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_14268084bdb93e2af8ea+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_14268084bdb93e2af8ea+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..8f9a4e0eaa80162b7c6f367c963981a2d5c86b9e
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_14268084bdb93e2af8ea+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5c252885b17164c4e4bf832f391aba7f3d8c7139bb59b1d4ee6df54c77398869
+size 840640
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_14268084bdb93e2af8ea+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_14268084bdb93e2af8ea+fb4cc044/model.neff
new file mode 100644
index 0000000000000000000000000000000000000000..705a9ef34c5b5609dbac60a42b0fb67ab0b00d27
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_14268084bdb93e2af8ea+fb4cc044/model.neff
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c01b0bcf9bc956fe6facbe1b5c2a47bb487a061ab522c3a8cb46305bae28028f
+size 54754304
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_17b4453648b482087f44+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_17b4453648b482087f44+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_17b4453648b482087f44+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_17b4453648b482087f44+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_17b4453648b482087f44+fb4cc044/model.done
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_17b4453648b482087f44+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_17b4453648b482087f44+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..ca712d55ae0e352f512c4688caf690116ec5c552
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_17b4453648b482087f44+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f061fa115ae7cc4aa283bb767164180d1817ccea49eb05c899e7f4d05b03a1db
+size 618697
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_17b4453648b482087f44+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_17b4453648b482087f44+fb4cc044/model.neff
new file mode 100644
index 0000000000000000000000000000000000000000..0c407982ad6ab5500b85d41d030c0da0e25b72fe
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_17b4453648b482087f44+fb4cc044/model.neff
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:116d5e8b16d09e647e218ae20fe1a255681d13daa59e28082c5e5ab4a7a8e03f
+size 8070144
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1efaefc590ce7ee07e97+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_1efaefc590ce7ee07e97+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1efaefc590ce7ee07e97+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1efaefc590ce7ee07e97+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_1efaefc590ce7ee07e97+fb4cc044/model.done
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1efaefc590ce7ee07e97+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_1efaefc590ce7ee07e97+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..aae092f7271c8cdaefb0bc7887ea378e516368ed
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1efaefc590ce7ee07e97+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0a917908cdedb3336190bdbfa5dc4c0b006052c9672b3c51ee0aa7c7a9e1e439
+size 628622
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1efaefc590ce7ee07e97+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_1efaefc590ce7ee07e97+fb4cc044/model.neff
new file mode 100644
index 0000000000000000000000000000000000000000..1f0624a7bfa513fd903b98a7bc8b836714b7f64c
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1efaefc590ce7ee07e97+fb4cc044/model.neff
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5cdc725c9067ddd09df8d0768aa1744294c6717afb9cb332d40ec2fa3d13f86d
+size 2888704
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2828c4ae6bc360cc555f+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_2828c4ae6bc360cc555f+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_2828c4ae6bc360cc555f+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2828c4ae6bc360cc555f+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_2828c4ae6bc360cc555f+fb4cc044/model.done
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2828c4ae6bc360cc555f+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_2828c4ae6bc360cc555f+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..fd0200c89032660ad01908166c47c54be46ec27f
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_2828c4ae6bc360cc555f+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e4bfd87783375e038bf25e0e2d8810154384e5f8e2e73640820a95ef156e16d6
+size 793180
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2828c4ae6bc360cc555f+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_2828c4ae6bc360cc555f+fb4cc044/model.neff
new file mode 100644
index 0000000000000000000000000000000000000000..bb85cc03f88d364116802323ce4c5f38f87b57f5
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_2828c4ae6bc360cc555f+fb4cc044/model.neff
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:121a4a45c22c23eaa0eb9bc94dcd4f162557e2c0574c2f9eefb1d7bcd069895b
+size 242791424
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_2da63caa31e7595bc07f+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_2da63caa31e7595bc07f+fb4cc044/model.neff
index 899a56b2ef4bb268bc3455d95df44d15c85f4db2..0d236132b203efa0292be95377b2a1813256e9ee 100644
--- a/neuronxcc-2.21.33363.0+82129205/MODULE_2da63caa31e7595bc07f+fb4cc044/model.neff
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_2da63caa31e7595bc07f+fb4cc044/model.neff
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fd5dc789465f3d32882920308f3d4a712edf866512104c7effa624cfea21354d
+oid sha256:b096fd09961ede9fc78f22a88f39e2023b064d98e4ecf8cae48c55a4e5c7b80e
 size 18473984
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_32f3df92f722f8d61840+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_32f3df92f722f8d61840+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_32f3df92f722f8d61840+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_32f3df92f722f8d61840+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_32f3df92f722f8d61840+fb4cc044/model.done
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_32f3df92f722f8d61840+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_32f3df92f722f8d61840+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..2f9adf4961ede24f992785a34b1d3e6fb050c158
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_32f3df92f722f8d61840+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8270a9ede2522ef5f9691a06f9f5622f5923a5b4344b00ac1bbe7b10b666e6de
+size 838840
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_32f3df92f722f8d61840+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_32f3df92f722f8d61840+fb4cc044/model.neff
new file mode 100644
index 0000000000000000000000000000000000000000..2983f4588802feee07c78ff3af5bde590d52caf2
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_32f3df92f722f8d61840+fb4cc044/model.neff
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7237e792db77272b7eadefed43ad0b601d2d5eb78ea028a48d5a0a244ba47466
+size 26809344
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_3683fa1d292af356adae+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_3683fa1d292af356adae+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_3683fa1d292af356adae+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_3683fa1d292af356adae+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_3683fa1d292af356adae+fb4cc044/model.done
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_3683fa1d292af356adae+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_3683fa1d292af356adae+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..d066edbb58eb14ee536aee4b15890e6fa317d4dd
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_3683fa1d292af356adae+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0a72c26ea4bfff05052405e8a360c5ead4dc213d746221968e452c3adbed7964
+size 618481
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_3683fa1d292af356adae+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_3683fa1d292af356adae+fb4cc044/model.neff
new file mode 100644
index 0000000000000000000000000000000000000000..c078d66c559f4e0465a08cad1ca7e1f964285107
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_3683fa1d292af356adae+fb4cc044/model.neff
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b2413f921e113dbc56d84a84a618ea2db978d407f5460463124a47a05840ab24
+size 4271104
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4effee1c1788c6eeab78+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_4effee1c1788c6eeab78+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_4effee1c1788c6eeab78+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4effee1c1788c6eeab78+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_4effee1c1788c6eeab78+fb4cc044/model.done
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4effee1c1788c6eeab78+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_4effee1c1788c6eeab78+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..d40b852174adeda38245368059dae82c53a40e02
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_4effee1c1788c6eeab78+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0f70fc236e9f92ab1fa0421a47be449b926660a67784b275927e8cb66dca0e39
+size 618697
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4effee1c1788c6eeab78+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_4effee1c1788c6eeab78+fb4cc044/model.neff
new file mode 100644
index 0000000000000000000000000000000000000000..08e82719ffd4588c36d505d96fc002d92dccf3ed
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_4effee1c1788c6eeab78+fb4cc044/model.neff
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:848f4efccf68564350550cac06b3729cabafd47692f4df18f5fe63748eb695b8
+size 10394624
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4fcdffc44bb528a047f0+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_4fcdffc44bb528a047f0+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_4fcdffc44bb528a047f0+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4fcdffc44bb528a047f0+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_4fcdffc44bb528a047f0+fb4cc044/model.done
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4fcdffc44bb528a047f0+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_4fcdffc44bb528a047f0+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..d42c3ba0d36b5dc258e2f260e64f760f19e0460f
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_4fcdffc44bb528a047f0+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6dd7f7b1a2558219081ab0d313a1278db0fdf3a960970d5663b476de30e8725d
+size 618481
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4fcdffc44bb528a047f0+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_4fcdffc44bb528a047f0+fb4cc044/model.neff
new file mode 100644
index 0000000000000000000000000000000000000000..3a6abfae04b6e30b82a4f27a9ff8e4c35044e792
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_4fcdffc44bb528a047f0+fb4cc044/model.neff
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ef96d9b1de73a404b91e5a05677cccf742bcd9a3bda777340488acad29b97622
+size 5346304
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5fdd7ad15e00da4fcd3e+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_5fdd7ad15e00da4fcd3e+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_5fdd7ad15e00da4fcd3e+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5fdd7ad15e00da4fcd3e+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_5fdd7ad15e00da4fcd3e+fb4cc044/model.done
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5fdd7ad15e00da4fcd3e+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_5fdd7ad15e00da4fcd3e+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..d4f5e4cb020cf2eed8a0567ff99aed07b4bcb061
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_5fdd7ad15e00da4fcd3e+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:232afcad03e2df34f698ed0701ed5e2072121afca9edb009d66a9aec12f1a379
+size 840640
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5fdd7ad15e00da4fcd3e+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_5fdd7ad15e00da4fcd3e+fb4cc044/model.neff
new file mode 100644
index 0000000000000000000000000000000000000000..f8b770fc8566507a864bbcdf97c1c1b30637fe37
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_5fdd7ad15e00da4fcd3e+fb4cc044/model.neff
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:296c2002b93c609d4a68eb22ac8c2afc5385391ddafe81315c177874ccdea5df
+size 45046784
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_78602cd5234279501590+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_78602cd5234279501590+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_78602cd5234279501590+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_78602cd5234279501590+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_78602cd5234279501590+fb4cc044/model.done
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_78602cd5234279501590+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_78602cd5234279501590+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..cdfd002b4ff01bf797e8bb61f8f7cae4e1bded36
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_78602cd5234279501590+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7771e8b633a25dad02aa999b81083c02c2cb7cc63b735ab794009698e7dd6524
+size 628810
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_78602cd5234279501590+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_78602cd5234279501590+fb4cc044/model.neff
new file mode 100644
index 0000000000000000000000000000000000000000..d68cb1a51a233a6f2c6ad89f1741e5caff2b5de7
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_78602cd5234279501590+fb4cc044/model.neff
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f02c1c244bc010e08ec988eee7ae622e88cf92b80df814adb48ab2d853395796
+size 22672384
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8508fced3b142be2a2ee+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_8508fced3b142be2a2ee+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_8508fced3b142be2a2ee+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8508fced3b142be2a2ee+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_8508fced3b142be2a2ee+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..d735d5f93ef7e58283d5ccadc6065ea413da1269
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_8508fced3b142be2a2ee+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2af5485875eb0c496ff7c3ecea5e35812074c14b25db1f04586a6e66b8d50cd7
+size 810056
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8508fced3b142be2a2ee+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_8508fced3b142be2a2ee+fb4cc044/model.log
new file mode 100644
index 0000000000000000000000000000000000000000..ded2158bce7dc066a77620081921c11f58549eae
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_8508fced3b142be2a2ee+fb4cc044/model.log
@@ -0,0 +1,53 @@
+Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_8508fced3b142be2a2ee+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_8508fced3b142be2a2ee+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: 2026-02-09T10:25:27Z 
+Pre-Partition Pre-Opt Histogram:
+total HLO instructions: 5171
+             convert       910  17.60% ################################################################
+             reshape       802  15.51% ########################################################
+           transpose       723  13.98% ##################################################
+           broadcast       548  10.60% ######################################
+               slice       543  10.50% ######################################
+            multiply       362   7.00% #########################
+           parameter       328   6.34% #######################
+            constant       221   4.27% ###############
+                call       217   4.20% ###############
+                 dot       181   3.50% ############
+                 add       144   2.78% ##########
+         concatenate        74   1.43% #####
+              negate        72   1.39% #####
+   get-tuple-element        37   0.72% ##
+                iota         3   0.06% 
+              gather         2   0.04% 
+               tuple         1   0.02% 
+              cosine         1   0.02% 
+              reduce         1   0.02% 
+                sine         1   0.02% 
+
+
+Pre-Partition Post-Op Histogram:
+total HLO instructions: 4140
+             convert       909  21.96% ################################################################
+             reshape       650  15.70% #############################################
+           transpose       540  13.04% ######################################
+           parameter       328   7.92% #######################
+            constant       256   6.18% ##################
+           broadcast       255   6.16% #################
+               slice       252   6.09% #################
+         custom-call       217   5.24% ###############
+            multiply       217   5.24% ###############
+                 dot       180   4.35% ############
+                 add       144   3.48% ##########
+         concatenate        74   1.79% #####
+              negate        72   1.74% #####
+   get-tuple-element        37   0.89% ##
+                iota         3   0.07% 
+              gather         2   0.05% 
+              cosine         1   0.02% 
+               tuple         1   0.02% 
+              reduce         1   0.02% 
+                sine         1   0.02% 
+
+Potential split-points stats: #CC 0 #AR 0 #AG 0 #BN 0 nClamp 0
+WARNING: Insufficient number of potential split points found. Entire model will be compiled as a single module.
+No partitions found. Compiling as flat model
+2026-02-09 10:25:27.146344: F hilo/hlo_passes/NeuronHloVerifier.cc:504]  [ERROR] [NCC_VRF007] Tiled instruction count 3784872448 exceeds 5000000. TIP: Input HLO might be too big, please consider using smaller batches, applying model parallelism or compile under --optlevel=1 to create smaller subgraphs
+
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_88190e1b8a8ceb313e53+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_88190e1b8a8ceb313e53+fb4cc044/model.neff
index 4eb7ed3a8f296ac58dd32e41668b8d4714e930c7..a00d63e822d6c03474c6bbda72034d9e3286eafd 100644
--- a/neuronxcc-2.21.33363.0+82129205/MODULE_88190e1b8a8ceb313e53+fb4cc044/model.neff
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_88190e1b8a8ceb313e53+fb4cc044/model.neff
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2ca1943286c8e972bc0090e3fe3ca6935aab857bda4db85e1bd362d544cd43c6
+oid sha256:9e31980eef984a5dca7919bec01552bcd34f96c43a1391d0fb74dc4a5a650955
 size 27689984
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8badd6f6eb69fa108ac8+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_8badd6f6eb69fa108ac8+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_8badd6f6eb69fa108ac8+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8badd6f6eb69fa108ac8+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_8badd6f6eb69fa108ac8+fb4cc044/model.done
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8badd6f6eb69fa108ac8+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_8badd6f6eb69fa108ac8+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..d46e4cd3a8d58cdb0f73e8095b1e4d81cda74bc8
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_8badd6f6eb69fa108ac8+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:35843e5fb4c876382538c2c296011d3e12c4bf8891f533afacc85305fa7369f7
+size 628841
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8badd6f6eb69fa108ac8+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_8badd6f6eb69fa108ac8+fb4cc044/model.neff
new file mode 100644
index 0000000000000000000000000000000000000000..20f0fa3038c73d1071750374d7b6a923f48094ff
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_8badd6f6eb69fa108ac8+fb4cc044/model.neff
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7c058612fa664928eaf60fd5aa93e68c147886daf2c056edf90b378cc4a307b6
+size 10128384
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_93b5314f91b367ce312a+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_93b5314f91b367ce312a+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_93b5314f91b367ce312a+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_93b5314f91b367ce312a+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_93b5314f91b367ce312a+fb4cc044/model.done
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_93b5314f91b367ce312a+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_93b5314f91b367ce312a+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..390c9e8125947f5e84acb388aa326f3d1c3cbef5
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_93b5314f91b367ce312a+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f8fa7c229bd926ce09a4a20474350250e01123d6d2cd8a768ee6de4473861c1d
+size 840640
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_93b5314f91b367ce312a+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_93b5314f91b367ce312a+fb4cc044/model.neff
new file mode 100644
index 0000000000000000000000000000000000000000..b0c17d1a9efbc7da661af8aab7758a0b1855640d
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_93b5314f91b367ce312a+fb4cc044/model.neff
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7ce410eef7e8b7a3ea20cf29705cdb146065ea094acf50f0dccad6705a67d738
+size 88853504
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9c01f09fb84f2b339fc6+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_9c01f09fb84f2b339fc6+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_9c01f09fb84f2b339fc6+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9c01f09fb84f2b339fc6+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_9c01f09fb84f2b339fc6+fb4cc044/model.done
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9c01f09fb84f2b339fc6+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_9c01f09fb84f2b339fc6+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..338ad239791970a8058a5f26bd7a779f46002928
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_9c01f09fb84f2b339fc6+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9f4a5d873782f0c2ddb0a99918ed821635ea8686356af4e9b834e35ca79e8936
+size 838696
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9c01f09fb84f2b339fc6+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_9c01f09fb84f2b339fc6+fb4cc044/model.neff
new file mode 100644
index 0000000000000000000000000000000000000000..03d9b31fb781c47fd53182d122e17615393e0903
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_9c01f09fb84f2b339fc6+fb4cc044/model.neff
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:248699dfced4324a86f270996c16bae9270f8ae24e4989704ec9b6e3b3b3d6c1
+size 11531264
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a08dd31a7a105fa45df2+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_a08dd31a7a105fa45df2+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_a08dd31a7a105fa45df2+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a08dd31a7a105fa45df2+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_a08dd31a7a105fa45df2+fb4cc044/model.done
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a08dd31a7a105fa45df2+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_a08dd31a7a105fa45df2+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..09ca2127301827763e6b1a36e2411f5107ae5e17
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_a08dd31a7a105fa45df2+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8d56d0facb8ba4b04314bfea029cf93e012552fc164e0e5e04053fb9971cc9f9
+size 628838
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a08dd31a7a105fa45df2+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_a08dd31a7a105fa45df2+fb4cc044/model.neff
new file mode 100644
index 0000000000000000000000000000000000000000..d13b05182b679f9480810608ca8ab2b8bae1c44b
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_a08dd31a7a105fa45df2+fb4cc044/model.neff
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7a2703684352bbea1321e6c16c42bf42e61d68b745049074d232727ad247acb0
+size 5284864
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_c913669652fcaa9d5638+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_c913669652fcaa9d5638+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_c913669652fcaa9d5638+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_c913669652fcaa9d5638+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_c913669652fcaa9d5638+fb4cc044/model.done
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_c913669652fcaa9d5638+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_c913669652fcaa9d5638+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..a58d0a283b25107462046c3902d171acffe58d4c
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_c913669652fcaa9d5638+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:35df9c1b5244d0e9c581c6cee6799f0b7a9a5b237400d5afebb1a523cd29a078
+size 838840
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_c913669652fcaa9d5638+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_c913669652fcaa9d5638+fb4cc044/model.neff
new file mode 100644
index 0000000000000000000000000000000000000000..91cfec5510627e8559f425c88f12856c856f2876
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_c913669652fcaa9d5638+fb4cc044/model.neff
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9396b601e6f597a226f6927edd90832616232783e20c1976cd34321a9c65fdb2
+size 22795264
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_d3f277cf573c91b9ded8+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_d3f277cf573c91b9ded8+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_d3f277cf573c91b9ded8+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_d3f277cf573c91b9ded8+fb4cc044/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_d3f277cf573c91b9ded8+fb4cc044/model.done
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_d3f277cf573c91b9ded8+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_d3f277cf573c91b9ded8+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..77904b171c7b5146c3d2e6f049bcd63c61520983
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_d3f277cf573c91b9ded8+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:53b4f366b8d782dd4d1d1d3bb864ff53306cb866cc8a1867efc3fbb2aa112d8d
+size 617945
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_d3f277cf573c91b9ded8+fb4cc044/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_d3f277cf573c91b9ded8+fb4cc044/model.neff
new file mode 100644
index 0000000000000000000000000000000000000000..78834622890200b3956caa04dc82a18eec841763
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_d3f277cf573c91b9ded8+fb4cc044/model.neff
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1a6408a53c92a01df4a363d200f1754ac088fb38e45c17f2e3ff5edd31aff30e
+size 200131584
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f5f81a13c8e6671b9b65+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_f5f81a13c8e6671b9b65+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_f5f81a13c8e6671b9b65+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f5f81a13c8e6671b9b65+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_f5f81a13c8e6671b9b65+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..aef42dbeb26b04f856f84f1bae494446196aa05e
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_f5f81a13c8e6671b9b65+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:412d9298a727c88ef811809b3cf48655cd25ce8300fe430c2187da886fffadf2
+size 793324
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f5f81a13c8e6671b9b65+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_f5f81a13c8e6671b9b65+fb4cc044/model.log
new file mode 100644
index 0000000000000000000000000000000000000000..6abc610510f70b376d8f5026c3ce1159e83d91ba
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_f5f81a13c8e6671b9b65+fb4cc044/model.log
@@ -0,0 +1,53 @@
+Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_f5f81a13c8e6671b9b65+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_f5f81a13c8e6671b9b65+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: 2026-02-04T14:17:28Z 
+Pre-Partition Pre-Opt Histogram:
+total HLO instructions: 5170
+             convert       910  17.60% ################################################################
+             reshape       802  15.51% ########################################################
+           transpose       723  13.98% ##################################################
+           broadcast       547  10.58% ######################################
+               slice       543  10.50% ######################################
+            multiply       362   7.00% #########################
+           parameter       328   6.34% #######################
+            constant       221   4.27% ###############
+                call       217   4.20% ###############
+                 dot       181   3.50% ############
+                 add       144   2.79% ##########
+         concatenate        74   1.43% #####
+              negate        72   1.39% #####
+   get-tuple-element        37   0.72% ##
+                iota         3   0.06% 
+              gather         2   0.04% 
+               tuple         1   0.02% 
+              cosine         1   0.02% 
+              reduce         1   0.02% 
+                sine         1   0.02% 
+
+
+Pre-Partition Post-Op Histogram:
+total HLO instructions: 4364
+             convert       909  20.83% ################################################################
+             reshape       870  19.94% #############################################################
+           transpose       543  12.44% ######################################
+           parameter       328   7.52% #######################
+            constant       257   5.89% ##################
+           broadcast       256   5.87% ##################
+               slice       252   5.77% #################
+         custom-call       217   4.97% ###############
+            multiply       217   4.97% ###############
+                 dot       180   4.12% ############
+                 add       144   3.30% ##########
+         concatenate        74   1.70% #####
+              negate        72   1.65% #####
+   get-tuple-element        37   0.85% ##
+              gather         2   0.05% 
+                iota         2   0.05% 
+              cosine         1   0.02% 
+               tuple         1   0.02% 
+              reduce         1   0.02% 
+                sine         1   0.02% 
+
+Potential split-points stats: #CC 0 #AR 0 #AG 0 #BN 0 nClamp 0
+WARNING: Insufficient number of potential split points found. Entire model will be compiled as a single module.
+No partitions found. Compiling as flat model
+2026-02-04 14:17:28.019879: F hilo/hlo_passes/NeuronHloVerifier.cc:504]  [ERROR] [NCC_VRF007] Tiled instruction count 7392334 exceeds 5000000. TIP: Input HLO might be too big, please consider using smaller batches, applying model parallelism or compile under --optlevel=1 to create smaller subgraphs
+
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_fb54563033b3f69f79b9+fb4cc044/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_fb54563033b3f69f79b9+fb4cc044/compile_flags.json
new file mode 100644
index 0000000000000000000000000000000000000000..07bdc1045dd850da0b9d66d697f3755e9be37aca
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_fb54563033b3f69f79b9+fb4cc044/compile_flags.json
@@ -0,0 +1 @@
+["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]
\ No newline at end of file
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_fb54563033b3f69f79b9+fb4cc044/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_fb54563033b3f69f79b9+fb4cc044/model.hlo_module.pb
new file mode 100644
index 0000000000000000000000000000000000000000..406d26205a62d7bc1c6466b1bf413167a23cee9e
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_fb54563033b3f69f79b9+fb4cc044/model.hlo_module.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e7969893711a56172d92c4bb78eee80280687230fb1c3c29db069f04bf97396e
+size 793324
diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_fb54563033b3f69f79b9+fb4cc044/model.log b/neuronxcc-2.21.33363.0+82129205/MODULE_fb54563033b3f69f79b9+fb4cc044/model.log
new file mode 100644
index 0000000000000000000000000000000000000000..b3682c390188d6ce33fc93474fb72a0a54b8bcc0
--- /dev/null
+++ b/neuronxcc-2.21.33363.0+82129205/MODULE_fb54563033b3f69f79b9+fb4cc044/model.log
@@ -0,0 +1,3 @@
+Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_fb54563033b3f69f79b9+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_fb54563033b3f69f79b9+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: [LUR015]  Compiler generated too many instructions (9816278). This maybe due to a failure in parallelism extraction by the tensorizer. - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables.
+2026-02-04T14:25:22Z Non-signal exit. Backend exited with code 1 and stderr: [LUR015]  Compiler generated too many instructions (9816278). This maybe due to a failure in parallelism extraction by the tensorizer. - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables.
+