diff --git a/.gitattributes b/.gitattributes index 2fafea36e8207748bd5db5c42d9e5e4df3719adc..60aed1764ee235435c8306a7f29f21b5dcb2602b 100644 --- a/.gitattributes +++ b/.gitattributes @@ -7339,3 +7339,67 @@ neuronxcc-2.23.6484.0+3b612583/MODULE_b64a1f9e4423dce4e8f3+84337dd9/model.neff f neuronxcc-2.23.6484.0+3b612583/MODULE_b64a1f9e4423dce4e8f3+84337dd9/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text neuronxcc-2.23.6484.0+3b612583/MODULE_865bee4df11d5919ca3f+97496b47/model.neff filter=lfs diff=lfs merge=lfs -text neuronxcc-2.23.6484.0+3b612583/MODULE_67b88de45bb6b959a5e9+97496b47/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.23.6484.0+3b612583/MODULE_01dcbea3110aa91df0df+84337dd9/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.23.6484.0+3b612583/MODULE_01dcbea3110aa91df0df+84337dd9/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.23.6484.0+3b612583/MODULE_051bce7ebf5bc407ccc8+84337dd9/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.23.6484.0+3b612583/MODULE_051bce7ebf5bc407ccc8+84337dd9/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.23.6484.0+3b612583/MODULE_1069b5eb45ba83722e10+84337dd9/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.23.6484.0+3b612583/MODULE_1069b5eb45ba83722e10+84337dd9/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.23.6484.0+3b612583/MODULE_14914b1ba1e6c2ba64f8+519f203d/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.23.6484.0+3b612583/MODULE_18b23db783736cb6d629+519f203d/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.23.6484.0+3b612583/MODULE_1db3db714a4501ae527b+d1c00e0b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.23.6484.0+3b612583/MODULE_26af3373fd44ec171fc9+91442d24/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.23.6484.0+3b612583/MODULE_26af3373fd44ec171fc9+91442d24/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.23.6484.0+3b612583/MODULE_2a5e8c0e62e59afdfa5d+31f34a48/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.23.6484.0+3b612583/MODULE_2a5e8c0e62e59afdfa5d+31f34a48/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.23.6484.0+3b612583/MODULE_30bb563379f04bb26c50+0788fa03/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.23.6484.0+3b612583/MODULE_3b55140cc73be3987155+84337dd9/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.23.6484.0+3b612583/MODULE_3b55140cc73be3987155+84337dd9/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.23.6484.0+3b612583/MODULE_4459752a3307588a8efc+519f203d/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.23.6484.0+3b612583/MODULE_51cf569eff8fef82de5c+e2a39c39/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.23.6484.0+3b612583/MODULE_51cf569eff8fef82de5c+e2a39c39/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.23.6484.0+3b612583/MODULE_559fbfb093c4b853b035+84337dd9/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.23.6484.0+3b612583/MODULE_559fbfb093c4b853b035+84337dd9/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.23.6484.0+3b612583/MODULE_6499de88c282859e63f9+84337dd9/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.23.6484.0+3b612583/MODULE_6499de88c282859e63f9+84337dd9/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.23.6484.0+3b612583/MODULE_684b764031e5f8c21d5d+84337dd9/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.23.6484.0+3b612583/MODULE_684b764031e5f8c21d5d+84337dd9/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.23.6484.0+3b612583/MODULE_69d2efed75dcc1c5fa31+519f203d/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.23.6484.0+3b612583/MODULE_79d82bd89e1b223ffcc0+84337dd9/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.23.6484.0+3b612583/MODULE_79d82bd89e1b223ffcc0+84337dd9/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.23.6484.0+3b612583/MODULE_7c4b6df574b6c3618da7+26edc17d/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.23.6484.0+3b612583/MODULE_7c4b6df574b6c3618da7+26edc17d/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.23.6484.0+3b612583/MODULE_7d07320b9068d08650f9+519f203d/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.23.6484.0+3b612583/MODULE_80411482c2074bd644d0+ad64ccab/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.23.6484.0+3b612583/MODULE_80411482c2074bd644d0+ad64ccab/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.23.6484.0+3b612583/MODULE_882d3df2f85cd1bac046+cf7feb23/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.23.6484.0+3b612583/MODULE_882d3df2f85cd1bac046+cf7feb23/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.23.6484.0+3b612583/MODULE_98b49fb7066d92cd7a34+1b41560c/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.23.6484.0+3b612583/MODULE_98b49fb7066d92cd7a34+1b41560c/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.23.6484.0+3b612583/MODULE_99d1af5cf4e4d170ae11+519f203d/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.23.6484.0+3b612583/MODULE_9a635ee77d57d928f306+4a27301b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.23.6484.0+3b612583/MODULE_9a635ee77d57d928f306+4a27301b/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.23.6484.0+3b612583/MODULE_9bc5ecc156503bbbf391+519f203d/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.23.6484.0+3b612583/MODULE_9e1e2791bf3a8339e933+84337dd9/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.23.6484.0+3b612583/MODULE_9e1e2791bf3a8339e933+84337dd9/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.23.6484.0+3b612583/MODULE_a28227665d3a91e65100+84337dd9/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.23.6484.0+3b612583/MODULE_a28227665d3a91e65100+84337dd9/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.23.6484.0+3b612583/MODULE_a8bdd2678c80a638674c+e457fe88/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.23.6484.0+3b612583/MODULE_adabd8090f20f2d654c6+ae2b3e2d/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.23.6484.0+3b612583/MODULE_adabd8090f20f2d654c6+ae2b3e2d/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.23.6484.0+3b612583/MODULE_bc130a5a10e96fc8a3b6+519f203d/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.23.6484.0+3b612583/MODULE_beed7b6ee886171fa077+519f203d/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.23.6484.0+3b612583/MODULE_c45d8e3cd36067a4dd7b+84337dd9/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.23.6484.0+3b612583/MODULE_c45d8e3cd36067a4dd7b+84337dd9/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.23.6484.0+3b612583/MODULE_ce3df00d70bddb9cd0ce+84337dd9/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.23.6484.0+3b612583/MODULE_ce3df00d70bddb9cd0ce+84337dd9/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.23.6484.0+3b612583/MODULE_d04fa963001e29808715+b7ba88c2/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.23.6484.0+3b612583/MODULE_d7f34bc5df8db9179a8a+519f203d/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.23.6484.0+3b612583/MODULE_d9cb9c6c39826f37fc70+84337dd9/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.23.6484.0+3b612583/MODULE_d9cb9c6c39826f37fc70+84337dd9/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.23.6484.0+3b612583/MODULE_e22805f26de61ae960e2+84337dd9/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.23.6484.0+3b612583/MODULE_e22805f26de61ae960e2+84337dd9/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.23.6484.0+3b612583/MODULE_e855926ccb3f126d7195+519f203d/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.23.6484.0+3b612583/MODULE_ec2c37db809075a6d417+519f203d/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.23.6484.0+3b612583/MODULE_fd2fd500686b8882cd25+519f203d/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.23.6484.0+3b612583/MODULE_ff117705b57d7b86f907+12dd9119/model.neff filter=lfs diff=lfs merge=lfs -text diff --git a/neuronxcc-2.23.6484.0+3b612583/0_REGISTRY/0.4.6.dev3/2da7a00f0478d50ae1e7f75f085c5b2773b5f355f427c61cf34cb6febd629d96/1633dd67ba36460ade83.json b/neuronxcc-2.23.6484.0+3b612583/0_REGISTRY/0.4.6.dev3/2da7a00f0478d50ae1e7f75f085c5b2773b5f355f427c61cf34cb6febd629d96/1633dd67ba36460ade83.json new file mode 100644 index 0000000000000000000000000000000000000000..a1a3d25698db6780cc81615dbde7afc53a5c9491 --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/0_REGISTRY/0.4.6.dev3/2da7a00f0478d50ae1e7f75f085c5b2773b5f355f427c61cf34cb6febd629d96/1633dd67ba36460ade83.json @@ -0,0 +1,60 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/phi-4-tiny-random", + "_task": "text-generation", + "architectures": [ + "Phi3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "auto_map": {}, + "dtype": "bfloat16", + "embd_pdrop": 0.0, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 32, + "max_position_embeddings": 16384, + "model_type": "phi3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "yujiepan/phi-4-tiny-random", + "checkpoint_revision": "18a9a1168dc97ac6d128f811925670c275610f5a", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.23.6484.0+3b612583", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev3", + "output_logits": false, + "pp_degree": 1, + "prefill_chunk_size": 0, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 1, + "original_max_position_embeddings": 16384, + "partial_rotary_factor": 1.0, + "resid_pdrop": 0.0, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 250000, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 100352 +} \ No newline at end of file diff --git a/neuronxcc-2.23.6484.0+3b612583/0_REGISTRY/0.4.6.dev3/441269935591cad8d370e512c0b93cdd2fce6247c40e5a4866d872ee5338b0de/b7c3323af28e1a50b1e1.json b/neuronxcc-2.23.6484.0+3b612583/0_REGISTRY/0.4.6.dev3/441269935591cad8d370e512c0b93cdd2fce6247c40e5a4866d872ee5338b0de/b7c3323af28e1a50b1e1.json new file mode 100644 index 0000000000000000000000000000000000000000..130dcd660c9d82a339a335995a0348a1c9b5f953 --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/0_REGISTRY/0.4.6.dev3/441269935591cad8d370e512c0b93cdd2fce6247c40e5a4866d872ee5338b0de/b7c3323af28e1a50b1e1.json @@ -0,0 +1,82 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/gemma-3-270m-it", + "_sliding_window_pattern": 6, + "_task": "text-generation", + "architectures": [ + "Gemma3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attn_logit_softcapping": null, + "dtype": "bfloat16", + "final_logit_softcapping": null, + "head_dim": 256, + "hidden_activation": "gelu_pytorch_tanh", + "hidden_size": 640, + "initializer_range": 0.02, + "intermediate_size": 2048, + "layer_types": [ + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "model_type": "gemma3_text", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "unsloth/gemma-3-270m-it", + "checkpoint_revision": "23cf460f6bb16954176b3ddcc8d4f250501458a9", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.23.6484.0+3b612583", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev3", + "output_logits": false, + "pp_degree": 1, + "prefill_chunk_size": 0, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 18, + "num_key_value_heads": 1, + "query_pre_attn_scalar": 256, + "rms_norm_eps": 1e-06, + "rope_local_base_freq": 10000.0, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": 512, + "unsloth_fixed": true, + "use_bidirectional_attention": false, + "use_cache": true, + "vocab_size": 262144 +} \ No newline at end of file diff --git a/neuronxcc-2.23.6484.0+3b612583/0_REGISTRY/0.4.6.dev3/4ab8140bc7eb4a553d95855c5c2be2cf8c0fbab21b823d76183b6f51e98b6fc5/333aa43a70b4a2f04569.json b/neuronxcc-2.23.6484.0+3b612583/0_REGISTRY/0.4.6.dev3/4ab8140bc7eb4a553d95855c5c2be2cf8c0fbab21b823d76183b6f51e98b6fc5/333aa43a70b4a2f04569.json new file mode 100644 index 0000000000000000000000000000000000000000..b212b15881a07eccf6fa3a07ba51863f02619815 --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/0_REGISTRY/0.4.6.dev3/4ab8140bc7eb4a553d95855c5c2be2cf8c0fbab21b823d76183b6f51e98b6fc5/333aa43a70b4a2f04569.json @@ -0,0 +1,59 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "dacorvo/Mixtral-tiny", + "_task": "text-generation", + "architectures": [ + "MixtralForCausalLM" + ], + "attention_dropout": 0.0, + "dtype": "float16", + "head_dim": 32, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3584, + "max_position_embeddings": 1024, + "model_type": "mixtral", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "dacorvo/Mixtral-tiny", + "checkpoint_revision": "c557ba205ddff6ea911f4719e0d543d6c08356b6", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.23.6484.0+3b612583", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.6.dev3", + "output_logits": false, + "pp_degree": 1, + "prefill_chunk_size": 0, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "float16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_experts_per_tok": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 8, + "num_local_experts": 8, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_theta": 10000.0, + "router_aux_loss_coef": 0.001, + "router_jitter_noise": 0.0, + "sliding_window": 4096, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 32000 +} \ No newline at end of file diff --git a/neuronxcc-2.23.6484.0+3b612583/0_REGISTRY/0.4.6.dev3/6454afdf3e9d66c7226c13a575b718845c25e53b0699600ba2bb4f883e9d841b/024fbf55f0875999ac40.json b/neuronxcc-2.23.6484.0+3b612583/0_REGISTRY/0.4.6.dev3/6454afdf3e9d66c7226c13a575b718845c25e53b0699600ba2bb4f883e9d841b/024fbf55f0875999ac40.json new file mode 100644 index 0000000000000000000000000000000000000000..25ce7301fd49cfffac55d08d39888fd9624a5735 --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/0_REGISTRY/0.4.6.dev3/6454afdf3e9d66c7226c13a575b718845c25e53b0699600ba2bb4f883e9d841b/024fbf55f0875999ac40.json @@ -0,0 +1,63 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "float16", + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 512, + "max_topk": 256, + "n_active_tokens": 512, + "neuronxcc_version": "2.23.6484.0+3b612583", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev3", + "output_logits": false, + "pp_degree": 1, + "prefill_chunk_size": 0, + "sequence_length": 512, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "float16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.23.6484.0+3b612583/0_REGISTRY/0.4.6.dev3/6454afdf3e9d66c7226c13a575b718845c25e53b0699600ba2bb4f883e9d841b/045b50345589a193bbb1.json b/neuronxcc-2.23.6484.0+3b612583/0_REGISTRY/0.4.6.dev3/6454afdf3e9d66c7226c13a575b718845c25e53b0699600ba2bb4f883e9d841b/045b50345589a193bbb1.json new file mode 100644 index 0000000000000000000000000000000000000000..aa92aaa4bf9b2e89b80936e4bf55411d83ca090f --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/0_REGISTRY/0.4.6.dev3/6454afdf3e9d66c7226c13a575b718845c25e53b0699600ba2bb4f883e9d841b/045b50345589a193bbb1.json @@ -0,0 +1,63 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "float16", + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 2, + "capacity_factor": null, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 2, + "max_context_length": 128, + "max_topk": 256, + "n_active_tokens": 128, + "neuronxcc_version": "2.23.6484.0+3b612583", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev3", + "output_logits": false, + "pp_degree": 1, + "prefill_chunk_size": 0, + "sequence_length": 128, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn2", + "torch_dtype": "float16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.23.6484.0+3b612583/0_REGISTRY/0.4.6.dev3/6454afdf3e9d66c7226c13a575b718845c25e53b0699600ba2bb4f883e9d841b/f40fa3ae1be5dbec60bf.json b/neuronxcc-2.23.6484.0+3b612583/0_REGISTRY/0.4.6.dev3/6454afdf3e9d66c7226c13a575b718845c25e53b0699600ba2bb4f883e9d841b/f40fa3ae1be5dbec60bf.json new file mode 100644 index 0000000000000000000000000000000000000000..ac256e206edceb92f5124740649acc34db58af39 --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/0_REGISTRY/0.4.6.dev3/6454afdf3e9d66c7226c13a575b718845c25e53b0699600ba2bb4f883e9d841b/f40fa3ae1be5dbec60bf.json @@ -0,0 +1,63 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "float16", + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.23.6484.0+3b612583", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev3", + "output_logits": false, + "pp_degree": 1, + "prefill_chunk_size": 0, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "float16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.23.6484.0+3b612583/0_REGISTRY/0.4.6.dev3/73707b485eab9008c7aba7f5dad0ce2384ac685318d5f888c12fa0d81ed90b19/4291a4c9b96d0fdb3a39.json b/neuronxcc-2.23.6484.0+3b612583/0_REGISTRY/0.4.6.dev3/73707b485eab9008c7aba7f5dad0ce2384ac685318d5f888c12fa0d81ed90b19/4291a4c9b96d0fdb3a39.json new file mode 100644 index 0000000000000000000000000000000000000000..01f9d7836183b846d505b53fb4ff1c7dfba4c9bb --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/0_REGISTRY/0.4.6.dev3/73707b485eab9008c7aba7f5dad0ce2384ac685318d5f888c12fa0d81ed90b19/4291a4c9b96d0fdb3a39.json @@ -0,0 +1,135 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "HuggingFaceTB/SmolLM3-3B", + "_task": "text-generation", + "architectures": [ + "SmolLM3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 11008, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 65536, + "max_window_layers": 28, + "mlp_bias": false, + "model_type": "smollm3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "HuggingFaceTB/SmolLM3-3B", + "checkpoint_revision": "a07cc9a04f16550a088caea529712d1d335b0ac1", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.23.6484.0+3b612583", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev3", + "output_logits": false, + "pp_degree": 1, + "prefill_chunk_size": 0, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "no_rope_layer_interval": 4, + "no_rope_layers": [ + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0 + ], + "num_attention_heads": 16, + "num_hidden_layers": 36, + "num_key_value_heads": 4, + "pretraining_tp": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 5000000.0, + "sliding_window": null, + "use_cache": false, + "use_sliding_window": false, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.23.6484.0+3b612583/0_REGISTRY/0.4.6.dev3/7518518c7e077820070186deda960d8cc49db068cdf0ac70664098fa2b6b698c/f1be1a35d87fcfce94ce.json b/neuronxcc-2.23.6484.0+3b612583/0_REGISTRY/0.4.6.dev3/7518518c7e077820070186deda960d8cc49db068cdf0ac70664098fa2b6b698c/f1be1a35d87fcfce94ce.json new file mode 100644 index 0000000000000000000000000000000000000000..e991e52e9983a1ba018b190945d0d6382adc7801 --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/0_REGISTRY/0.4.6.dev3/7518518c7e077820070186deda960d8cc49db068cdf0ac70664098fa2b6b698c/f1be1a35d87fcfce94ce.json @@ -0,0 +1,65 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/qwen2.5-128k-tiny-random", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "dtype": "bfloat16", + "hidden_act": "silu", + "hidden_size": 8, + "initializer_range": 0.02, + "intermediate_size": 16, + "layer_types": [ + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 1, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "yujiepan/qwen2.5-128k-tiny-random", + "checkpoint_revision": "c8296d4ca3f87782876d2382fbb6481d1beb8ef0", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.23.6484.0+3b612583", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev3", + "output_logits": false, + "pp_degree": 1, + "prefill_chunk_size": 0, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": { + "factor": 4.0, + "original_max_position_embeddings": 32768, + "rope_type": "yarn", + "type": "yarn" + }, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 152064 +} \ No newline at end of file diff --git a/neuronxcc-2.23.6484.0+3b612583/0_REGISTRY/0.4.6.dev3/920f44ce6d3e004d1ce547ae06644f7be262180644b04573153aa15d98742edc/9940f528b7629dd7232a.json b/neuronxcc-2.23.6484.0+3b612583/0_REGISTRY/0.4.6.dev3/920f44ce6d3e004d1ce547ae06644f7be262180644b04573153aa15d98742edc/9940f528b7629dd7232a.json new file mode 100644 index 0000000000000000000000000000000000000000..b105c9a56dc150f827384aef66d816e62160d497 --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/0_REGISTRY/0.4.6.dev3/920f44ce6d3e004d1ce547ae06644f7be262180644b04573153aa15d98742edc/9940f528b7629dd7232a.json @@ -0,0 +1,66 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "optimum-internal-testing/tiny-random-qwen3_moe", + "_task": "text-generation", + "architectures": [ + "Qwen3MoeForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "decoder_sparse_step": 2, + "dtype": "float32", + "head_dim": 32, + "hidden_act": "silu", + "hidden_size": 64, + "initializer_range": 0.02, + "intermediate_size": 128, + "max_position_embeddings": 40960, + "max_window_layers": 1, + "mlp_only_layers": [], + "model_type": "qwen3_moe", + "moe_intermediate_size": 128, + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "optimum-internal-testing/tiny-random-qwen3_moe", + "checkpoint_revision": "e0230be2839556b44b7400a233c73c74b4abb7af", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.23.6484.0+3b612583", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev3", + "output_logits": false, + "pp_degree": 1, + "prefill_chunk_size": 0, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "float32", + "tp_degree": 2 + }, + "norm_topk_prob": true, + "num_attention_heads": 2, + "num_experts": 8, + "num_experts_per_tok": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 1, + "output_router_logits": false, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000.0, + "router_aux_loss_coef": 0.001, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.23.6484.0+3b612583/0_REGISTRY/0.4.6.dev3/9c24a2080ec26e9c74d5bd90866f74131aa4d5c975415b96f5e6600f081d5647/0660f012c537b939ada6.json b/neuronxcc-2.23.6484.0+3b612583/0_REGISTRY/0.4.6.dev3/9c24a2080ec26e9c74d5bd90866f74131aa4d5c975415b96f5e6600f081d5647/0660f012c537b939ada6.json new file mode 100644 index 0000000000000000000000000000000000000000..69f2a7302df27f7c2214cb037e7e54528bb71f2b --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/0_REGISTRY/0.4.6.dev3/9c24a2080ec26e9c74d5bd90866f74131aa4d5c975415b96f5e6600f081d5647/0660f012c537b939ada6.json @@ -0,0 +1,82 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "tiny-random/llama-4", + "_task": "text-generation", + "attention_bias": false, + "attention_chunk_size": 128, + "attention_dropout": 0.0, + "attn_scale": 0.1, + "attn_temperature_tuning": 4, + "cache_implementation": "hybrid", + "dtype": "bfloat16", + "floor_scale": 8192, + "for_llm_compressor": false, + "head_dim": 32, + "hidden_act": "silu", + "hidden_size": 32, + "initializer_range": 0.02, + "interleave_moe_layer_step": 2, + "intermediate_size": 64, + "intermediate_size_mlp": 128, + "layer_types": [ + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention" + ], + "max_position_embeddings": 1048576, + "model_type": "llama4_text", + "moe_layers": [ + 1, + 3 + ], + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "tiny-random/llama-4", + "checkpoint_revision": "9e716f5d4d1ffe0a44a15f46f4a12b840439aba4", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.23.6484.0+3b612583", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev3", + "output_logits": false, + "pp_degree": 1, + "prefill_chunk_size": 0, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "no_rope_layers": [ + 1, + 1, + 1, + 0 + ], + "num_attention_heads": 1, + "num_experts_per_tok": 1, + "num_hidden_layers": 4, + "num_key_value_heads": 1, + "num_local_experts": 8, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 500000.0, + "router_aux_loss_coef": 0.001, + "router_jitter_noise": 0.0, + "tie_word_embeddings": true, + "use_cache": true, + "use_qk_norm": true, + "vocab_size": 202048 +} \ No newline at end of file diff --git a/neuronxcc-2.23.6484.0+3b612583/0_REGISTRY/0.4.6.dev3/cf6b9a360dcf294104671106bae2adbd9fd291823bb60a351883163684073231/57c7372b90daeeb7b0f2.json b/neuronxcc-2.23.6484.0+3b612583/0_REGISTRY/0.4.6.dev3/cf6b9a360dcf294104671106bae2adbd9fd291823bb60a351883163684073231/57c7372b90daeeb7b0f2.json new file mode 100644 index 0000000000000000000000000000000000000000..67afb420ddd19a916d08b22a47a3c46aa4239d98 --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/0_REGISTRY/0.4.6.dev3/cf6b9a360dcf294104671106bae2adbd9fd291823bb60a351883163684073231/57c7372b90daeeb7b0f2.json @@ -0,0 +1,64 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.23.6484.0+3b612583", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev3", + "output_logits": false, + "pp_degree": 1, + "prefill_chunk_size": 1024, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.23.6484.0+3b612583/0_REGISTRY/0.4.6.dev3/cf6b9a360dcf294104671106bae2adbd9fd291823bb60a351883163684073231/81e2befa175c22f47f22.json b/neuronxcc-2.23.6484.0+3b612583/0_REGISTRY/0.4.6.dev3/cf6b9a360dcf294104671106bae2adbd9fd291823bb60a351883163684073231/81e2befa175c22f47f22.json new file mode 100644 index 0000000000000000000000000000000000000000..b386249e0128bbf7f6258943e187c8b4ead515b9 --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/0_REGISTRY/0.4.6.dev3/cf6b9a360dcf294104671106bae2adbd9fd291823bb60a351883163684073231/81e2befa175c22f47f22.json @@ -0,0 +1,64 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": null, + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.23.6484.0+3b612583", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.6.dev3", + "output_logits": false, + "pp_degree": 1, + "prefill_chunk_size": 0, + "sequence_length": 4096, + "speculation_length": 5, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.23.6484.0+3b612583/0_REGISTRY/0.4.6.dev3/d139acf64685f15794bb983ff6eb881bdd31304bae88b0ce1ed20a54c21f2265/f47e188d9de0842ec1ef.json b/neuronxcc-2.23.6484.0+3b612583/0_REGISTRY/0.4.6.dev3/d139acf64685f15794bb983ff6eb881bdd31304bae88b0ce1ed20a54c21f2265/f47e188d9de0842ec1ef.json new file mode 100644 index 0000000000000000000000000000000000000000..698f7d566ce929111da8dafcc3c66b428d64748d --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/0_REGISTRY/0.4.6.dev3/d139acf64685f15794bb983ff6eb881bdd31304bae88b0ce1ed20a54c21f2265/f47e188d9de0842ec1ef.json @@ -0,0 +1,59 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "_task": "text-generation", + "architectures": [ + "GraniteForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attention_multiplier": 1.0, + "dtype": "float32", + "embedding_multiplier": 1.0, + "hidden_act": "silu", + "hidden_size": 32, + "initializer_range": 0.02, + "intermediate_size": 64, + "logits_scaling": 1.0, + "max_position_embeddings": 2048, + "mlp_bias": false, + "model_type": "granite", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "checkpoint_revision": "c3074ebc0ac2fe545305f5e5f6cce2cc9b2aa0c5", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.23.6484.0+3b612583", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev3", + "output_logits": false, + "pp_degree": 1, + "prefill_chunk_size": 0, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "float32", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "residual_multiplier": 1.0, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 10000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 49152 +} \ No newline at end of file diff --git a/neuronxcc-2.23.6484.0+3b612583/0_REGISTRY/0.4.6.dev3/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/f47e188d9de0842ec1ef.json b/neuronxcc-2.23.6484.0+3b612583/0_REGISTRY/0.4.6.dev3/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/f47e188d9de0842ec1ef.json new file mode 100644 index 0000000000000000000000000000000000000000..698f7d566ce929111da8dafcc3c66b428d64748d --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/0_REGISTRY/0.4.6.dev3/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/f47e188d9de0842ec1ef.json @@ -0,0 +1,59 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "_task": "text-generation", + "architectures": [ + "GraniteForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attention_multiplier": 1.0, + "dtype": "float32", + "embedding_multiplier": 1.0, + "hidden_act": "silu", + "hidden_size": 32, + "initializer_range": 0.02, + "intermediate_size": 64, + "logits_scaling": 1.0, + "max_position_embeddings": 2048, + "mlp_bias": false, + "model_type": "granite", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "checkpoint_revision": "c3074ebc0ac2fe545305f5e5f6cce2cc9b2aa0c5", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.23.6484.0+3b612583", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev3", + "output_logits": false, + "pp_degree": 1, + "prefill_chunk_size": 0, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "float32", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "residual_multiplier": 1.0, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 10000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 49152 +} \ No newline at end of file diff --git a/neuronxcc-2.23.6484.0+3b612583/0_REGISTRY/0.4.6.dev3/llama/llamafactory/tiny-random-Llama-3/024fbf55f0875999ac40.json b/neuronxcc-2.23.6484.0+3b612583/0_REGISTRY/0.4.6.dev3/llama/llamafactory/tiny-random-Llama-3/024fbf55f0875999ac40.json new file mode 100644 index 0000000000000000000000000000000000000000..25ce7301fd49cfffac55d08d39888fd9624a5735 --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/0_REGISTRY/0.4.6.dev3/llama/llamafactory/tiny-random-Llama-3/024fbf55f0875999ac40.json @@ -0,0 +1,63 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "float16", + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 512, + "max_topk": 256, + "n_active_tokens": 512, + "neuronxcc_version": "2.23.6484.0+3b612583", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev3", + "output_logits": false, + "pp_degree": 1, + "prefill_chunk_size": 0, + "sequence_length": 512, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "float16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.23.6484.0+3b612583/0_REGISTRY/0.4.6.dev3/llama4_text/tiny-random/llama-4/0660f012c537b939ada6.json b/neuronxcc-2.23.6484.0+3b612583/0_REGISTRY/0.4.6.dev3/llama4_text/tiny-random/llama-4/0660f012c537b939ada6.json new file mode 100644 index 0000000000000000000000000000000000000000..69f2a7302df27f7c2214cb037e7e54528bb71f2b --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/0_REGISTRY/0.4.6.dev3/llama4_text/tiny-random/llama-4/0660f012c537b939ada6.json @@ -0,0 +1,82 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "tiny-random/llama-4", + "_task": "text-generation", + "attention_bias": false, + "attention_chunk_size": 128, + "attention_dropout": 0.0, + "attn_scale": 0.1, + "attn_temperature_tuning": 4, + "cache_implementation": "hybrid", + "dtype": "bfloat16", + "floor_scale": 8192, + "for_llm_compressor": false, + "head_dim": 32, + "hidden_act": "silu", + "hidden_size": 32, + "initializer_range": 0.02, + "interleave_moe_layer_step": 2, + "intermediate_size": 64, + "intermediate_size_mlp": 128, + "layer_types": [ + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention" + ], + "max_position_embeddings": 1048576, + "model_type": "llama4_text", + "moe_layers": [ + 1, + 3 + ], + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "tiny-random/llama-4", + "checkpoint_revision": "9e716f5d4d1ffe0a44a15f46f4a12b840439aba4", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.23.6484.0+3b612583", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev3", + "output_logits": false, + "pp_degree": 1, + "prefill_chunk_size": 0, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "no_rope_layers": [ + 1, + 1, + 1, + 0 + ], + "num_attention_heads": 1, + "num_experts_per_tok": 1, + "num_hidden_layers": 4, + "num_key_value_heads": 1, + "num_local_experts": 8, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 500000.0, + "router_aux_loss_coef": 0.001, + "router_jitter_noise": 0.0, + "tie_word_embeddings": true, + "use_cache": true, + "use_qk_norm": true, + "vocab_size": 202048 +} \ No newline at end of file diff --git a/neuronxcc-2.23.6484.0+3b612583/0_REGISTRY/0.4.6.dev3/mixtral/dacorvo/Mixtral-tiny/333aa43a70b4a2f04569.json b/neuronxcc-2.23.6484.0+3b612583/0_REGISTRY/0.4.6.dev3/mixtral/dacorvo/Mixtral-tiny/333aa43a70b4a2f04569.json new file mode 100644 index 0000000000000000000000000000000000000000..b212b15881a07eccf6fa3a07ba51863f02619815 --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/0_REGISTRY/0.4.6.dev3/mixtral/dacorvo/Mixtral-tiny/333aa43a70b4a2f04569.json @@ -0,0 +1,59 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "dacorvo/Mixtral-tiny", + "_task": "text-generation", + "architectures": [ + "MixtralForCausalLM" + ], + "attention_dropout": 0.0, + "dtype": "float16", + "head_dim": 32, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3584, + "max_position_embeddings": 1024, + "model_type": "mixtral", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "dacorvo/Mixtral-tiny", + "checkpoint_revision": "c557ba205ddff6ea911f4719e0d543d6c08356b6", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.23.6484.0+3b612583", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.6.dev3", + "output_logits": false, + "pp_degree": 1, + "prefill_chunk_size": 0, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "float16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_experts_per_tok": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 8, + "num_local_experts": 8, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_theta": 10000.0, + "router_aux_loss_coef": 0.001, + "router_jitter_noise": 0.0, + "sliding_window": 4096, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 32000 +} \ No newline at end of file diff --git a/neuronxcc-2.23.6484.0+3b612583/0_REGISTRY/0.4.6.dev3/phi3/yujiepan/phi-4-tiny-random/1633dd67ba36460ade83.json b/neuronxcc-2.23.6484.0+3b612583/0_REGISTRY/0.4.6.dev3/phi3/yujiepan/phi-4-tiny-random/1633dd67ba36460ade83.json new file mode 100644 index 0000000000000000000000000000000000000000..a1a3d25698db6780cc81615dbde7afc53a5c9491 --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/0_REGISTRY/0.4.6.dev3/phi3/yujiepan/phi-4-tiny-random/1633dd67ba36460ade83.json @@ -0,0 +1,60 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/phi-4-tiny-random", + "_task": "text-generation", + "architectures": [ + "Phi3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "auto_map": {}, + "dtype": "bfloat16", + "embd_pdrop": 0.0, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 32, + "max_position_embeddings": 16384, + "model_type": "phi3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "yujiepan/phi-4-tiny-random", + "checkpoint_revision": "18a9a1168dc97ac6d128f811925670c275610f5a", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.23.6484.0+3b612583", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev3", + "output_logits": false, + "pp_degree": 1, + "prefill_chunk_size": 0, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 1, + "original_max_position_embeddings": 16384, + "partial_rotary_factor": 1.0, + "resid_pdrop": 0.0, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 250000, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 100352 +} \ No newline at end of file diff --git a/neuronxcc-2.23.6484.0+3b612583/0_REGISTRY/0.4.6.dev3/qwen2/yujiepan/qwen2.5-128k-tiny-random/f1be1a35d87fcfce94ce.json b/neuronxcc-2.23.6484.0+3b612583/0_REGISTRY/0.4.6.dev3/qwen2/yujiepan/qwen2.5-128k-tiny-random/f1be1a35d87fcfce94ce.json new file mode 100644 index 0000000000000000000000000000000000000000..e991e52e9983a1ba018b190945d0d6382adc7801 --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/0_REGISTRY/0.4.6.dev3/qwen2/yujiepan/qwen2.5-128k-tiny-random/f1be1a35d87fcfce94ce.json @@ -0,0 +1,65 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/qwen2.5-128k-tiny-random", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "dtype": "bfloat16", + "hidden_act": "silu", + "hidden_size": 8, + "initializer_range": 0.02, + "intermediate_size": 16, + "layer_types": [ + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 1, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "yujiepan/qwen2.5-128k-tiny-random", + "checkpoint_revision": "c8296d4ca3f87782876d2382fbb6481d1beb8ef0", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.23.6484.0+3b612583", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev3", + "output_logits": false, + "pp_degree": 1, + "prefill_chunk_size": 0, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": { + "factor": 4.0, + "original_max_position_embeddings": 32768, + "rope_type": "yarn", + "type": "yarn" + }, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 152064 +} \ No newline at end of file diff --git a/neuronxcc-2.23.6484.0+3b612583/0_REGISTRY/0.4.6.dev3/qwen3_moe/optimum-internal-testing/tiny-random-qwen3_moe/9940f528b7629dd7232a.json b/neuronxcc-2.23.6484.0+3b612583/0_REGISTRY/0.4.6.dev3/qwen3_moe/optimum-internal-testing/tiny-random-qwen3_moe/9940f528b7629dd7232a.json new file mode 100644 index 0000000000000000000000000000000000000000..b105c9a56dc150f827384aef66d816e62160d497 --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/0_REGISTRY/0.4.6.dev3/qwen3_moe/optimum-internal-testing/tiny-random-qwen3_moe/9940f528b7629dd7232a.json @@ -0,0 +1,66 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "optimum-internal-testing/tiny-random-qwen3_moe", + "_task": "text-generation", + "architectures": [ + "Qwen3MoeForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "decoder_sparse_step": 2, + "dtype": "float32", + "head_dim": 32, + "hidden_act": "silu", + "hidden_size": 64, + "initializer_range": 0.02, + "intermediate_size": 128, + "max_position_embeddings": 40960, + "max_window_layers": 1, + "mlp_only_layers": [], + "model_type": "qwen3_moe", + "moe_intermediate_size": 128, + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "optimum-internal-testing/tiny-random-qwen3_moe", + "checkpoint_revision": "e0230be2839556b44b7400a233c73c74b4abb7af", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.23.6484.0+3b612583", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev3", + "output_logits": false, + "pp_degree": 1, + "prefill_chunk_size": 0, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "float32", + "tp_degree": 2 + }, + "norm_topk_prob": true, + "num_attention_heads": 2, + "num_experts": 8, + "num_experts_per_tok": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 1, + "output_router_logits": false, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000.0, + "router_aux_loss_coef": 0.001, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_01dcbea3110aa91df0df+84337dd9/compile_flags.json b/neuronxcc-2.23.6484.0+3b612583/MODULE_01dcbea3110aa91df0df+84337dd9/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..46db31d3c333a208f1a44ff933642390ee2b728d --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_01dcbea3110aa91df0df+84337dd9/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--verbose=35", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_01dcbea3110aa91df0df+84337dd9/model.done b/neuronxcc-2.23.6484.0+3b612583/MODULE_01dcbea3110aa91df0df+84337dd9/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_01dcbea3110aa91df0df+84337dd9/model.hlo_module.pb b/neuronxcc-2.23.6484.0+3b612583/MODULE_01dcbea3110aa91df0df+84337dd9/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..bab0d30eec44764ae33a27f32e8338be27595f2d --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_01dcbea3110aa91df0df+84337dd9/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e6ab4fc3e43a5b074c16279846390ef1761b102e2e0a8eb8d363328e3bfff825 +size 1416185 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_01dcbea3110aa91df0df+84337dd9/model.neff b/neuronxcc-2.23.6484.0+3b612583/MODULE_01dcbea3110aa91df0df+84337dd9/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..eec76febdfd13e6624047477581381ee156ba065 --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_01dcbea3110aa91df0df+84337dd9/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db060ba11803c20c018e9fc84605a11337673070c2f0df7dd2c7bab84fff7035 +size 2356224 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_01dcbea3110aa91df0df+84337dd9/wrapped_neff.hlo b/neuronxcc-2.23.6484.0+3b612583/MODULE_01dcbea3110aa91df0df+84337dd9/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..735dbce3382b5ead33d397e82e2c29d876aa78b4 --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_01dcbea3110aa91df0df+84337dd9/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:233a2ef77264facbf9ae77440164653666e9f6f9e7be80efbe78fc41369971b6 +size 2430311 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_051bce7ebf5bc407ccc8+84337dd9/compile_flags.json b/neuronxcc-2.23.6484.0+3b612583/MODULE_051bce7ebf5bc407ccc8+84337dd9/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..46db31d3c333a208f1a44ff933642390ee2b728d --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_051bce7ebf5bc407ccc8+84337dd9/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--verbose=35", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_051bce7ebf5bc407ccc8+84337dd9/model.done b/neuronxcc-2.23.6484.0+3b612583/MODULE_051bce7ebf5bc407ccc8+84337dd9/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_051bce7ebf5bc407ccc8+84337dd9/model.hlo_module.pb b/neuronxcc-2.23.6484.0+3b612583/MODULE_051bce7ebf5bc407ccc8+84337dd9/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..4bc7f4cb7a67b31fa07aede74cc286f7691ada5f --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_051bce7ebf5bc407ccc8+84337dd9/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac5a7400c6702988d394c1de26fb344d8ff9896197ac27f3f0bc7ee852677000 +size 261545 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_051bce7ebf5bc407ccc8+84337dd9/model.neff b/neuronxcc-2.23.6484.0+3b612583/MODULE_051bce7ebf5bc407ccc8+84337dd9/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..e3389bb486a4aefde8470a89dd5bcbacfaa48367 --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_051bce7ebf5bc407ccc8+84337dd9/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d73fc0c34c8b0696742bcfe1d1a41ce603acd80b1289ebc47e67c9199c7e72e +size 308224 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_051bce7ebf5bc407ccc8+84337dd9/wrapped_neff.hlo b/neuronxcc-2.23.6484.0+3b612583/MODULE_051bce7ebf5bc407ccc8+84337dd9/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..ccea36df0b2cbb93c718e731504a642738b2ba25 --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_051bce7ebf5bc407ccc8+84337dd9/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2caa81e9b00b1fe5d79d71629285ca5f288de88edbf2b02b69245266c7a7ea22 +size 319615 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_1069b5eb45ba83722e10+84337dd9/compile_flags.json b/neuronxcc-2.23.6484.0+3b612583/MODULE_1069b5eb45ba83722e10+84337dd9/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..46db31d3c333a208f1a44ff933642390ee2b728d --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_1069b5eb45ba83722e10+84337dd9/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--verbose=35", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_1069b5eb45ba83722e10+84337dd9/model.done b/neuronxcc-2.23.6484.0+3b612583/MODULE_1069b5eb45ba83722e10+84337dd9/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_1069b5eb45ba83722e10+84337dd9/model.hlo_module.pb b/neuronxcc-2.23.6484.0+3b612583/MODULE_1069b5eb45ba83722e10+84337dd9/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..ad79f94359a036ea16bec940ebf58f267a364f81 --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_1069b5eb45ba83722e10+84337dd9/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61b4cc632454fa8c813df88c7a65c365bad7fd96563b17b1a0bf832f6e674697 +size 263125 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_1069b5eb45ba83722e10+84337dd9/model.neff b/neuronxcc-2.23.6484.0+3b612583/MODULE_1069b5eb45ba83722e10+84337dd9/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..0c7d080b064ab37e0daf8a8167df25dba0bb8e1b --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_1069b5eb45ba83722e10+84337dd9/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:052f168314ae0d59cdbe205c0f7ecc4ce024f36d238b0f120b1338b2d469e5b4 +size 308224 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_1069b5eb45ba83722e10+84337dd9/wrapped_neff.hlo b/neuronxcc-2.23.6484.0+3b612583/MODULE_1069b5eb45ba83722e10+84337dd9/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..bb37edd8bf2f5d0e996d34fa35dcf109f645ebff --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_1069b5eb45ba83722e10+84337dd9/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1569fd5df90fc2b459f322c414c6b99a1292f5aaf8139f39f2515d5b7010e8c +size 316437 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_14914b1ba1e6c2ba64f8+519f203d/compile_flags.json b/neuronxcc-2.23.6484.0+3b612583/MODULE_14914b1ba1e6c2ba64f8+519f203d/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..3ceda35df13d9f1ba719ce3eda3655a42e44fdba --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_14914b1ba1e6c2ba64f8+519f203d/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--verbose=35", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_14914b1ba1e6c2ba64f8+519f203d/model.done b/neuronxcc-2.23.6484.0+3b612583/MODULE_14914b1ba1e6c2ba64f8+519f203d/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_14914b1ba1e6c2ba64f8+519f203d/model.hlo_module.pb b/neuronxcc-2.23.6484.0+3b612583/MODULE_14914b1ba1e6c2ba64f8+519f203d/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..75b7a2442e8e59452a4c7e7334c1978ecaf8d69f --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_14914b1ba1e6c2ba64f8+519f203d/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a719b3ce59ec5d1f5af841e93dc5afb4ed1cdeb31cff20309f38d320e1520acb +size 995766 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_14914b1ba1e6c2ba64f8+519f203d/model.neff b/neuronxcc-2.23.6484.0+3b612583/MODULE_14914b1ba1e6c2ba64f8+519f203d/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..d31c3b8fe480962f3d3a9325816861533db89751 --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_14914b1ba1e6c2ba64f8+519f203d/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:259b55e7cd0636fcc74eca5a80c46797a4ca2bba5a0ecfd7b63190c253cba8a2 +size 6349824 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_18b23db783736cb6d629+519f203d/compile_flags.json b/neuronxcc-2.23.6484.0+3b612583/MODULE_18b23db783736cb6d629+519f203d/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..3ceda35df13d9f1ba719ce3eda3655a42e44fdba --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_18b23db783736cb6d629+519f203d/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--verbose=35", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_18b23db783736cb6d629+519f203d/model.done b/neuronxcc-2.23.6484.0+3b612583/MODULE_18b23db783736cb6d629+519f203d/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_18b23db783736cb6d629+519f203d/model.hlo_module.pb b/neuronxcc-2.23.6484.0+3b612583/MODULE_18b23db783736cb6d629+519f203d/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..db92fc3d4a9ff4828fce6bdbde9740424137b4ce --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_18b23db783736cb6d629+519f203d/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a2fc8afb60f32d77d964ccedc414f10a03fce4d461b67e90e557429192f13384 +size 2451486 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_18b23db783736cb6d629+519f203d/model.neff b/neuronxcc-2.23.6484.0+3b612583/MODULE_18b23db783736cb6d629+519f203d/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..9bf91f7a44616d595cce1c3511a98167466e16a8 --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_18b23db783736cb6d629+519f203d/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c0825b145d113d71838a5826752775072098f786140ae646181873395cd91282 +size 3318784 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_1db3db714a4501ae527b+d1c00e0b/compile_flags.json b/neuronxcc-2.23.6484.0+3b612583/MODULE_1db3db714a4501ae527b+d1c00e0b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..4c057daecdc0a5abf2a9f0b7e56c591ed15e347a --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_1db3db714a4501ae527b+d1c00e0b/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--enable-mixed-precision-accumulation", "--model-type", "transformer", "-O1", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2", "--auto-cast=none", "--internal-enable-dge-levels", "vector_dynamic_offsets", "--verbose=35", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_1db3db714a4501ae527b+d1c00e0b/model.done b/neuronxcc-2.23.6484.0+3b612583/MODULE_1db3db714a4501ae527b+d1c00e0b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_1db3db714a4501ae527b+d1c00e0b/model.hlo_module.pb b/neuronxcc-2.23.6484.0+3b612583/MODULE_1db3db714a4501ae527b+d1c00e0b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..44a8f1a1955e1dc3fa692487c8a28cd05041421a --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_1db3db714a4501ae527b+d1c00e0b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17fee0477c4baa0c42eef7c16dac790448774bd0ba3713b1f0b64649fc712234 +size 335515 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_1db3db714a4501ae527b+d1c00e0b/model.neff b/neuronxcc-2.23.6484.0+3b612583/MODULE_1db3db714a4501ae527b+d1c00e0b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..b9cf631cfd9657c3187f69ddfb8bb0667ed2c9ee --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_1db3db714a4501ae527b+d1c00e0b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d900edecc9ab868a1804d3eced61ace4a64014c62643e4db095be43cafcd11df +size 871424 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_221529edd1b774c76e41+a0056ba1/compile_flags.json b/neuronxcc-2.23.6484.0+3b612583/MODULE_221529edd1b774c76e41+a0056ba1/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..934f9328a30fbf53dfafb2041f17e5151c3b32be --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_221529edd1b774c76e41+a0056ba1/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--verbose=35", "--logfile=/tmp/nxdi_test_3ba5df95-497e-4bd0-8ef7-e73830a20a25/compiler_workdir/SoftmaxNoMask/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_221529edd1b774c76e41+a0056ba1/model.done b/neuronxcc-2.23.6484.0+3b612583/MODULE_221529edd1b774c76e41+a0056ba1/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_221529edd1b774c76e41+a0056ba1/model.hlo_module.pb b/neuronxcc-2.23.6484.0+3b612583/MODULE_221529edd1b774c76e41+a0056ba1/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..0af8a6578ff5c2235b99aa0341242867c9e3b572 --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_221529edd1b774c76e41+a0056ba1/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b2e244a35461f768cddd069add3869c3ae97e218ab6d6c33373c079dc1a6815 +size 10883 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_221529edd1b774c76e41+a0056ba1/model.neff b/neuronxcc-2.23.6484.0+3b612583/MODULE_221529edd1b774c76e41+a0056ba1/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..9f85ebd5990b1b11795920eecafbaf031f45b625 Binary files /dev/null and b/neuronxcc-2.23.6484.0+3b612583/MODULE_221529edd1b774c76e41+a0056ba1/model.neff differ diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_23d64b9e7e75ad3db5b2+6bdd387b/compile_flags.json b/neuronxcc-2.23.6484.0+3b612583/MODULE_23d64b9e7e75ad3db5b2+6bdd387b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..bb214adc2a295e8091df23a9f988e14eb4363b7d --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_23d64b9e7e75ad3db5b2+6bdd387b/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--verbose=35", "--logfile=/tmp/nxdi_test_c549975b-49a7-4e9c-9662-84276f3efee6/compiler_workdir/Llama3RotaryEmbedding/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_23d64b9e7e75ad3db5b2+6bdd387b/model.done b/neuronxcc-2.23.6484.0+3b612583/MODULE_23d64b9e7e75ad3db5b2+6bdd387b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_23d64b9e7e75ad3db5b2+6bdd387b/model.hlo_module.pb b/neuronxcc-2.23.6484.0+3b612583/MODULE_23d64b9e7e75ad3db5b2+6bdd387b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..dd20d41ac4f50b20b9c32ba7619252ea1dad3900 --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_23d64b9e7e75ad3db5b2+6bdd387b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ae49712228977b1bb44bda93d4b210603c9ebaed0588070863a7dd658285f3c +size 6207 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_23d64b9e7e75ad3db5b2+6bdd387b/model.neff b/neuronxcc-2.23.6484.0+3b612583/MODULE_23d64b9e7e75ad3db5b2+6bdd387b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..7572f1dbdb425b4cde79fbde5864b597d6b3dbed Binary files /dev/null and b/neuronxcc-2.23.6484.0+3b612583/MODULE_23d64b9e7e75ad3db5b2+6bdd387b/model.neff differ diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_26af3373fd44ec171fc9+91442d24/compile_flags.json b/neuronxcc-2.23.6484.0+3b612583/MODULE_26af3373fd44ec171fc9+91442d24/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5e1501eccbe092eb15dfa9a76b55e033987c76a8 --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_26af3373fd44ec171fc9+91442d24/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--verbose=35", "--logfile=/tmp/nxdi_test_043e8ebd-686c-476d-b17d-732f07fc2773/compiler_workdir/DecoderLayerWrapper/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_26af3373fd44ec171fc9+91442d24/model.done b/neuronxcc-2.23.6484.0+3b612583/MODULE_26af3373fd44ec171fc9+91442d24/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_26af3373fd44ec171fc9+91442d24/model.hlo_module.pb b/neuronxcc-2.23.6484.0+3b612583/MODULE_26af3373fd44ec171fc9+91442d24/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..c344b26aab55b78924ffeabe31d3a6cca4d34fb8 --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_26af3373fd44ec171fc9+91442d24/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:151cb2e938b95628af70ac7f046cf6c4d7f53e37b60400c0e33b1053726bf938 +size 50869 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_26af3373fd44ec171fc9+91442d24/model.neff b/neuronxcc-2.23.6484.0+3b612583/MODULE_26af3373fd44ec171fc9+91442d24/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..4091ef1eb3e3523db26051f2ef2cb5eedaa19f7a --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_26af3373fd44ec171fc9+91442d24/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6445ba37a7b9e71a86c89fcb9c70c1f8fba1e7caceab402099b98a51f3928301 +size 318464 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_26af3373fd44ec171fc9+91442d24/wrapped_neff.hlo b/neuronxcc-2.23.6484.0+3b612583/MODULE_26af3373fd44ec171fc9+91442d24/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..f86194e2948ffb2bf444f39646e86d7be67e27ab --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_26af3373fd44ec171fc9+91442d24/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a42ff7eaed69d6a0a55aee4673ef2e929551f34dde40a1c18f5ccc3ed0039e0 +size 324148 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_2a5e8c0e62e59afdfa5d+31f34a48/compile_flags.json b/neuronxcc-2.23.6484.0+3b612583/MODULE_2a5e8c0e62e59afdfa5d+31f34a48/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..11602f300e12e90d9bfe6c12bcf407450424f740 --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_2a5e8c0e62e59afdfa5d+31f34a48/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--verbose=35", "--logfile=/tmp/nxdi_test_0f6b8747-3133-487b-9cf7-9f36abe9e1fd/compiler_workdir/DecoderLayerWrapper/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_2a5e8c0e62e59afdfa5d+31f34a48/model.done b/neuronxcc-2.23.6484.0+3b612583/MODULE_2a5e8c0e62e59afdfa5d+31f34a48/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_2a5e8c0e62e59afdfa5d+31f34a48/model.hlo_module.pb b/neuronxcc-2.23.6484.0+3b612583/MODULE_2a5e8c0e62e59afdfa5d+31f34a48/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..a73aeccfa01db22531549f84fd6b4a563727367f --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_2a5e8c0e62e59afdfa5d+31f34a48/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae8d23489aec5bdcdc78ddc6b67adb9b1d4bddba3e4013c47e84f51ee2c485c7 +size 115705 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_2a5e8c0e62e59afdfa5d+31f34a48/model.neff b/neuronxcc-2.23.6484.0+3b612583/MODULE_2a5e8c0e62e59afdfa5d+31f34a48/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..67e5e39b81e98dac4358238491573b3b0b32b0f0 --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_2a5e8c0e62e59afdfa5d+31f34a48/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12a0521412db9b484cd3eb6f87866ad0ee2d29abf129078c3357f525916f682f +size 400384 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_2a5e8c0e62e59afdfa5d+31f34a48/wrapped_neff.hlo b/neuronxcc-2.23.6484.0+3b612583/MODULE_2a5e8c0e62e59afdfa5d+31f34a48/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..161855f447afa3be96afc6d18d9c8593c0b9deca --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_2a5e8c0e62e59afdfa5d+31f34a48/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d574299a4b5bd6b68c440232fa2484ab7fc8e59d5e1a9ed50ff646806a6c421f +size 406132 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_30b04eaa3828f95b6070+bdd54ed5/compile_flags.json b/neuronxcc-2.23.6484.0+3b612583/MODULE_30b04eaa3828f95b6070+bdd54ed5/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..f92729a701be4e3dbdea09abe339a0232ebcdff9 --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_30b04eaa3828f95b6070+bdd54ed5/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--verbose=35", "--logfile=/tmp/nxdi_test_2db70eca-4115-479b-98f3-dfc269af7cb1/compiler_workdir/SoftmaxNoMask/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_30b04eaa3828f95b6070+bdd54ed5/model.done b/neuronxcc-2.23.6484.0+3b612583/MODULE_30b04eaa3828f95b6070+bdd54ed5/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_30b04eaa3828f95b6070+bdd54ed5/model.hlo_module.pb b/neuronxcc-2.23.6484.0+3b612583/MODULE_30b04eaa3828f95b6070+bdd54ed5/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..a0a672ab1836f9d55bc9dd39d41b2b6eaa8b03e6 --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_30b04eaa3828f95b6070+bdd54ed5/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a56ce21160d3409b6f715e26de1e6a24d226a613df866bab4f6dff3175b5105 +size 10883 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_30b04eaa3828f95b6070+bdd54ed5/model.neff b/neuronxcc-2.23.6484.0+3b612583/MODULE_30b04eaa3828f95b6070+bdd54ed5/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..dc13f370545be148109d98e1c039ef4b8ef5dfe5 Binary files /dev/null and b/neuronxcc-2.23.6484.0+3b612583/MODULE_30b04eaa3828f95b6070+bdd54ed5/model.neff differ diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_30bb563379f04bb26c50+0788fa03/compile_flags.json b/neuronxcc-2.23.6484.0+3b612583/MODULE_30bb563379f04bb26c50+0788fa03/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..c27e6c39a7a4c3b736e614d7725e8bfa29486eb1 --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_30bb563379f04bb26c50+0788fa03/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--verbose=35", "--logfile=/tmp/nxd_model/speculation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_30bb563379f04bb26c50+0788fa03/model.done b/neuronxcc-2.23.6484.0+3b612583/MODULE_30bb563379f04bb26c50+0788fa03/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_30bb563379f04bb26c50+0788fa03/model.hlo_module.pb b/neuronxcc-2.23.6484.0+3b612583/MODULE_30bb563379f04bb26c50+0788fa03/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..14a72068f354d64527ca5830b0d3b98a12c0e1e8 --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_30bb563379f04bb26c50+0788fa03/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6aed48490c1e7a2156bf15f419d12d667b802f71a63545305af9678c7930e9f +size 1530931 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_30bb563379f04bb26c50+0788fa03/model.neff b/neuronxcc-2.23.6484.0+3b612583/MODULE_30bb563379f04bb26c50+0788fa03/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..9e4d64f4b08a34787758d0cfe42f1efdc860febe --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_30bb563379f04bb26c50+0788fa03/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:790418c4471d29bd58801950e17198abab4e0e1a0b256dca47b6e71b39071d72 +size 4312064 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_3b55140cc73be3987155+84337dd9/compile_flags.json b/neuronxcc-2.23.6484.0+3b612583/MODULE_3b55140cc73be3987155+84337dd9/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..46db31d3c333a208f1a44ff933642390ee2b728d --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_3b55140cc73be3987155+84337dd9/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--verbose=35", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_3b55140cc73be3987155+84337dd9/model.done b/neuronxcc-2.23.6484.0+3b612583/MODULE_3b55140cc73be3987155+84337dd9/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_3b55140cc73be3987155+84337dd9/model.hlo_module.pb b/neuronxcc-2.23.6484.0+3b612583/MODULE_3b55140cc73be3987155+84337dd9/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..f68e3a6e3bea9efdd75b2f0c0769e4e1b9c1570e --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_3b55140cc73be3987155+84337dd9/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:96a46072a3e2ddae01c6e25bfaf951b96138dc6ebd038de21331b9bc8794c9f8 +size 225449 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_3b55140cc73be3987155+84337dd9/model.neff b/neuronxcc-2.23.6484.0+3b612583/MODULE_3b55140cc73be3987155+84337dd9/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..7aad98fe31fb3b5f56def11b13d8a7058d60bc8e --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_3b55140cc73be3987155+84337dd9/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d79f05d70119224d301843d762579d2b814fa89d7ec837408e7a3ddfccc6fbc4 +size 236544 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_3b55140cc73be3987155+84337dd9/wrapped_neff.hlo b/neuronxcc-2.23.6484.0+3b612583/MODULE_3b55140cc73be3987155+84337dd9/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..4c2d6196705eb30b4a74983eb471a3c8b9cfabfd --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_3b55140cc73be3987155+84337dd9/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cec7aab2ca071cba08e1bf9c55f78226c930c9888d498ae61a706fd1a77d87af +size 244755 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_4459752a3307588a8efc+519f203d/compile_flags.json b/neuronxcc-2.23.6484.0+3b612583/MODULE_4459752a3307588a8efc+519f203d/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..3ceda35df13d9f1ba719ce3eda3655a42e44fdba --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_4459752a3307588a8efc+519f203d/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--verbose=35", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_4459752a3307588a8efc+519f203d/model.done b/neuronxcc-2.23.6484.0+3b612583/MODULE_4459752a3307588a8efc+519f203d/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_4459752a3307588a8efc+519f203d/model.hlo_module.pb b/neuronxcc-2.23.6484.0+3b612583/MODULE_4459752a3307588a8efc+519f203d/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..c597320d9498bc41bee2e2e77ace8a4aa9d07709 --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_4459752a3307588a8efc+519f203d/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:18a19881393a6adaf87658499115f6320507ec72ed18a7c5cd8c795e124356b9 +size 243998 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_4459752a3307588a8efc+519f203d/model.neff b/neuronxcc-2.23.6484.0+3b612583/MODULE_4459752a3307588a8efc+519f203d/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..23923c013855168c0a6a041453f9a0d4bda6feb4 --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_4459752a3307588a8efc+519f203d/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85424eac8b0cc3e6e9355147b6bd9e7e82f0f2fdbc47750e0cd38e0254619ec8 +size 400384 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_51cf569eff8fef82de5c+e2a39c39/compile_flags.json b/neuronxcc-2.23.6484.0+3b612583/MODULE_51cf569eff8fef82de5c+e2a39c39/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..4a2c6d0b18bdde36f52f4ed6e26438f684add83e --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_51cf569eff8fef82de5c+e2a39c39/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--enable-mixed-precision-accumulation", "--model-type", "transformer", "-O1", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2", "--auto-cast=none", "--internal-enable-dge-levels", "vector_dynamic_offsets", "--internal-hlo2tensorizer-options=--verify-hlo=true", "--verbose=35", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_51cf569eff8fef82de5c+e2a39c39/model.done b/neuronxcc-2.23.6484.0+3b612583/MODULE_51cf569eff8fef82de5c+e2a39c39/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_51cf569eff8fef82de5c+e2a39c39/model.hlo_module.pb b/neuronxcc-2.23.6484.0+3b612583/MODULE_51cf569eff8fef82de5c+e2a39c39/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..10923a7953901b7b3d9fac8c9d0f92d40604c2ee --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_51cf569eff8fef82de5c+e2a39c39/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26b8961fd503e7d0f0e389445cb96e6df705701eddc49eb882195183ff4b96c1 +size 261958 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_51cf569eff8fef82de5c+e2a39c39/model.neff b/neuronxcc-2.23.6484.0+3b612583/MODULE_51cf569eff8fef82de5c+e2a39c39/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..0c8af0ecbe46ab98137ec08845453a58bbe5e7ec --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_51cf569eff8fef82de5c+e2a39c39/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e0fc4ed99f0086e5ec82d6a2d0166822d5dd55ef9608699953e4c388a5d82d3c +size 410624 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_51cf569eff8fef82de5c+e2a39c39/wrapped_neff.hlo b/neuronxcc-2.23.6484.0+3b612583/MODULE_51cf569eff8fef82de5c+e2a39c39/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..b6c5e58596d566d92ca3229111218b5d3ccbdab1 --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_51cf569eff8fef82de5c+e2a39c39/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a13d12ebbd3c4edfcf653a432aa00b37d1d598c3501d3247325ea0d3d7935959 +size 420185 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_559fbfb093c4b853b035+84337dd9/compile_flags.json b/neuronxcc-2.23.6484.0+3b612583/MODULE_559fbfb093c4b853b035+84337dd9/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..46db31d3c333a208f1a44ff933642390ee2b728d --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_559fbfb093c4b853b035+84337dd9/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--verbose=35", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_559fbfb093c4b853b035+84337dd9/model.done b/neuronxcc-2.23.6484.0+3b612583/MODULE_559fbfb093c4b853b035+84337dd9/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_559fbfb093c4b853b035+84337dd9/model.hlo_module.pb b/neuronxcc-2.23.6484.0+3b612583/MODULE_559fbfb093c4b853b035+84337dd9/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..465b8df85e3a7af91d4f65b6711a0f1b68981539 --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_559fbfb093c4b853b035+84337dd9/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8845349b5ee53cbe9c59aff4620cc8a40499114f59d02414736241df17ee3a41 +size 261037 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_559fbfb093c4b853b035+84337dd9/model.neff b/neuronxcc-2.23.6484.0+3b612583/MODULE_559fbfb093c4b853b035+84337dd9/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..83215c91620c0d92361517ad25ce10854f86066e --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_559fbfb093c4b853b035+84337dd9/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd0764206e38309fc03a427bb8e2c991972cba46de488667bce662d7a448d008 +size 308224 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_559fbfb093c4b853b035+84337dd9/wrapped_neff.hlo b/neuronxcc-2.23.6484.0+3b612583/MODULE_559fbfb093c4b853b035+84337dd9/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..9a98a9221b1f73091499227bba290406f52bc489 --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_559fbfb093c4b853b035+84337dd9/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:05b34cf6db16c94e1b72e0e733c5b3f2f2ac449502742df2422cc3e3f15e72ac +size 319615 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_6499de88c282859e63f9+84337dd9/compile_flags.json b/neuronxcc-2.23.6484.0+3b612583/MODULE_6499de88c282859e63f9+84337dd9/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..46db31d3c333a208f1a44ff933642390ee2b728d --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_6499de88c282859e63f9+84337dd9/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--verbose=35", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_6499de88c282859e63f9+84337dd9/model.done b/neuronxcc-2.23.6484.0+3b612583/MODULE_6499de88c282859e63f9+84337dd9/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_6499de88c282859e63f9+84337dd9/model.hlo_module.pb b/neuronxcc-2.23.6484.0+3b612583/MODULE_6499de88c282859e63f9+84337dd9/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..e8d42f34d1bd9be754618b3ac2f8ae3973416a14 --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_6499de88c282859e63f9+84337dd9/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:090441f3d581e2c9318e2dd4e961c24cf9372c577a6434cc04c7b651d638f75d +size 261545 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_6499de88c282859e63f9+84337dd9/model.neff b/neuronxcc-2.23.6484.0+3b612583/MODULE_6499de88c282859e63f9+84337dd9/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..eb180ae08a556dd9659b65d3b91a44393fba0092 --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_6499de88c282859e63f9+84337dd9/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d38ec3b6ff038268acd980d333f4c6024b944451df35334e6818c51548694352 +size 308224 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_6499de88c282859e63f9+84337dd9/wrapped_neff.hlo b/neuronxcc-2.23.6484.0+3b612583/MODULE_6499de88c282859e63f9+84337dd9/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..3f8acbc522f56220eaa1099bce1c1e1c69552830 --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_6499de88c282859e63f9+84337dd9/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:faea4f645df72b4a3ff701091653a2bf196dff8163a6cd54c38e9dfdb66a69f5 +size 319615 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_684b764031e5f8c21d5d+84337dd9/compile_flags.json b/neuronxcc-2.23.6484.0+3b612583/MODULE_684b764031e5f8c21d5d+84337dd9/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..46db31d3c333a208f1a44ff933642390ee2b728d --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_684b764031e5f8c21d5d+84337dd9/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--verbose=35", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_684b764031e5f8c21d5d+84337dd9/model.done b/neuronxcc-2.23.6484.0+3b612583/MODULE_684b764031e5f8c21d5d+84337dd9/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_684b764031e5f8c21d5d+84337dd9/model.hlo_module.pb b/neuronxcc-2.23.6484.0+3b612583/MODULE_684b764031e5f8c21d5d+84337dd9/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..be97d827f037f159aada97686e8f310f260bf03c --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_684b764031e5f8c21d5d+84337dd9/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66c369d6316fadb78cfde7724c95eeaf892a67fbf2af527141734c783bd0ade2 +size 261118 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_684b764031e5f8c21d5d+84337dd9/model.neff b/neuronxcc-2.23.6484.0+3b612583/MODULE_684b764031e5f8c21d5d+84337dd9/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..6b55091a6d69182fa1eb0d0f68bd5638e03984d5 --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_684b764031e5f8c21d5d+84337dd9/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9967c93cc1f0a5f594d0d0dabacb71d820ae424c8e9df0a1524d86ccb903cafa +size 308224 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_684b764031e5f8c21d5d+84337dd9/wrapped_neff.hlo b/neuronxcc-2.23.6484.0+3b612583/MODULE_684b764031e5f8c21d5d+84337dd9/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..2381c275497575c00e0a296c7c731e99cc7c1d06 --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_684b764031e5f8c21d5d+84337dd9/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db6c27caaa86444e7582436fb282dd2145295f929aaaa2d75221815c00d3a2f0 +size 319615 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_69d2efed75dcc1c5fa31+519f203d/compile_flags.json b/neuronxcc-2.23.6484.0+3b612583/MODULE_69d2efed75dcc1c5fa31+519f203d/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..3ceda35df13d9f1ba719ce3eda3655a42e44fdba --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_69d2efed75dcc1c5fa31+519f203d/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--verbose=35", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_69d2efed75dcc1c5fa31+519f203d/model.done b/neuronxcc-2.23.6484.0+3b612583/MODULE_69d2efed75dcc1c5fa31+519f203d/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_69d2efed75dcc1c5fa31+519f203d/model.hlo_module.pb b/neuronxcc-2.23.6484.0+3b612583/MODULE_69d2efed75dcc1c5fa31+519f203d/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..521ec1569eb156590e3d4c6e3ea153a77594c212 --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_69d2efed75dcc1c5fa31+519f203d/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:211157456a755084a56ffe143371b1f15c077471d424ad31642210bc94cd8980 +size 208798 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_69d2efed75dcc1c5fa31+519f203d/model.neff b/neuronxcc-2.23.6484.0+3b612583/MODULE_69d2efed75dcc1c5fa31+519f203d/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..74310aa08e94384cf6bb4c3831da12f6d2656aaa --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_69d2efed75dcc1c5fa31+519f203d/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3212af1f7a48c3c4d72d056ab2e137ec75414682cd4cb0961fb1348986bc2541 +size 287744 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_7604efeba5b41a480e9f+f3c148eb/compile_flags.json b/neuronxcc-2.23.6484.0+3b612583/MODULE_7604efeba5b41a480e9f+f3c148eb/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..bb570153a0d0799552272716231ce63bf15a65e6 --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_7604efeba5b41a480e9f+f3c148eb/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--verbose=35", "--logfile=/tmp/nxdi_test_091559f6-2d14-4cc2-ac1f-4079d4296aa5/compiler_workdir/SoftmaxWithMask/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_7604efeba5b41a480e9f+f3c148eb/model.done b/neuronxcc-2.23.6484.0+3b612583/MODULE_7604efeba5b41a480e9f+f3c148eb/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_7604efeba5b41a480e9f+f3c148eb/model.hlo_module.pb b/neuronxcc-2.23.6484.0+3b612583/MODULE_7604efeba5b41a480e9f+f3c148eb/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..8c9645cdfee84ad9eb5e9f2ed6704f5b85c79997 --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_7604efeba5b41a480e9f+f3c148eb/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:63abe54890424f6a4ef7387a732019b8954e2fffce4ec7723259968d4eb5ff98 +size 16223 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_7604efeba5b41a480e9f+f3c148eb/model.neff b/neuronxcc-2.23.6484.0+3b612583/MODULE_7604efeba5b41a480e9f+f3c148eb/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..e611acb82b850cd373a842814df3a5c8622035c1 Binary files /dev/null and b/neuronxcc-2.23.6484.0+3b612583/MODULE_7604efeba5b41a480e9f+f3c148eb/model.neff differ diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_79d82bd89e1b223ffcc0+84337dd9/compile_flags.json b/neuronxcc-2.23.6484.0+3b612583/MODULE_79d82bd89e1b223ffcc0+84337dd9/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..46db31d3c333a208f1a44ff933642390ee2b728d --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_79d82bd89e1b223ffcc0+84337dd9/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--verbose=35", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_79d82bd89e1b223ffcc0+84337dd9/model.done b/neuronxcc-2.23.6484.0+3b612583/MODULE_79d82bd89e1b223ffcc0+84337dd9/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_79d82bd89e1b223ffcc0+84337dd9/model.hlo_module.pb b/neuronxcc-2.23.6484.0+3b612583/MODULE_79d82bd89e1b223ffcc0+84337dd9/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..a2c4ee126034e208bf949d556e80b3c5be38d8eb --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_79d82bd89e1b223ffcc0+84337dd9/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2fa9780251043d33afcfc82fd581547487fdd717173a0528c950b0ee9d9e38a +size 262189 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_79d82bd89e1b223ffcc0+84337dd9/model.neff b/neuronxcc-2.23.6484.0+3b612583/MODULE_79d82bd89e1b223ffcc0+84337dd9/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..47a15e89bf55c0d7cf7c2fe2d764aec0078477c8 --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_79d82bd89e1b223ffcc0+84337dd9/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a7de66f4b0cc185c313b4c9e4a26651d8c3a3c43e19710c95908e87c48896462 +size 308224 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_79d82bd89e1b223ffcc0+84337dd9/wrapped_neff.hlo b/neuronxcc-2.23.6484.0+3b612583/MODULE_79d82bd89e1b223ffcc0+84337dd9/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..2178cfaff9ee8159716349b710b51b9d1601d698 --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_79d82bd89e1b223ffcc0+84337dd9/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7dffc0d4a76bec6e0ee36774cd2a01a6c4e6e9cbd326565a99776d5f7d581f2c +size 319615 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_7c4b6df574b6c3618da7+26edc17d/compile_flags.json b/neuronxcc-2.23.6484.0+3b612583/MODULE_7c4b6df574b6c3618da7+26edc17d/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..351e97a7b598be221d2d6154665e991b785f7ac0 --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_7c4b6df574b6c3618da7+26edc17d/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--verbose=35", "--logfile=/tmp/nxdi_test_fb0d4ded-3ec3-4e6b-9d26-6b6675b59614/compiler_workdir/NeuronRMSNorm/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_7c4b6df574b6c3618da7+26edc17d/model.done b/neuronxcc-2.23.6484.0+3b612583/MODULE_7c4b6df574b6c3618da7+26edc17d/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_7c4b6df574b6c3618da7+26edc17d/model.hlo_module.pb b/neuronxcc-2.23.6484.0+3b612583/MODULE_7c4b6df574b6c3618da7+26edc17d/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..2bf5db80f6dd28e6b8f84ce52aa84c602f58de51 --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_7c4b6df574b6c3618da7+26edc17d/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5202458c61496f712d39bf6baaa64ebbdc4608a24c8703c37bd17f06029b803b +size 4231 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_7c4b6df574b6c3618da7+26edc17d/model.neff b/neuronxcc-2.23.6484.0+3b612583/MODULE_7c4b6df574b6c3618da7+26edc17d/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..f5d32b2840977e91bf975f3a6b8e3e1bf43fb60b --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_7c4b6df574b6c3618da7+26edc17d/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3afef7048f188cac32bcf3867efbf962660cfba3350bfa83fa050efa8d890180 +size 113664 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_7c4b6df574b6c3618da7+26edc17d/wrapped_neff.hlo b/neuronxcc-2.23.6484.0+3b612583/MODULE_7c4b6df574b6c3618da7+26edc17d/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..3e774b762c3f804a3efa1d1f3f667414c98f0226 --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_7c4b6df574b6c3618da7+26edc17d/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9dc1cb465c361a2575028cd7dc87da52896ae4b6f79971e8f46b4cf497498b7 +size 114560 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_7d07320b9068d08650f9+519f203d/compile_flags.json b/neuronxcc-2.23.6484.0+3b612583/MODULE_7d07320b9068d08650f9+519f203d/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..3ceda35df13d9f1ba719ce3eda3655a42e44fdba --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_7d07320b9068d08650f9+519f203d/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--verbose=35", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_7d07320b9068d08650f9+519f203d/model.done b/neuronxcc-2.23.6484.0+3b612583/MODULE_7d07320b9068d08650f9+519f203d/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_7d07320b9068d08650f9+519f203d/model.hlo_module.pb b/neuronxcc-2.23.6484.0+3b612583/MODULE_7d07320b9068d08650f9+519f203d/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..fb1cfb2d951c44b87611e12038a5079c1a1db33d --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_7d07320b9068d08650f9+519f203d/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf853d1159abce1ef82895995bce5b04f29276431dd73bb2bbfd568e7765d898 +size 243805 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_7d07320b9068d08650f9+519f203d/model.neff b/neuronxcc-2.23.6484.0+3b612583/MODULE_7d07320b9068d08650f9+519f203d/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..88254ccc20b6330175d044f304aa9df6b26cdac0 --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_7d07320b9068d08650f9+519f203d/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:443a074ebdce657c775c6ede47ec982480ae23ae4da02c0c2ec0be7b59fe357d +size 400384 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_80411482c2074bd644d0+ad64ccab/compile_flags.json b/neuronxcc-2.23.6484.0+3b612583/MODULE_80411482c2074bd644d0+ad64ccab/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..8b2b54ed5b62b947859275742ca7cf98fc1bf135 --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_80411482c2074bd644d0+ad64ccab/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--verbose=35", "--logfile=/tmp/nxdi_test_5d163499-5673-4822-912d-dc4ad73bff5f/compiler_workdir/AttentionModelWrapper/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_80411482c2074bd644d0+ad64ccab/model.done b/neuronxcc-2.23.6484.0+3b612583/MODULE_80411482c2074bd644d0+ad64ccab/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_80411482c2074bd644d0+ad64ccab/model.hlo_module.pb b/neuronxcc-2.23.6484.0+3b612583/MODULE_80411482c2074bd644d0+ad64ccab/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..d3a7be2839b213a2e47edbc41d1d10882b752229 --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_80411482c2074bd644d0+ad64ccab/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58390d577c962834f6c4863587afa5421cbb8e32f1c7044aa6ce9d9ecca5f8d9 +size 46026 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_80411482c2074bd644d0+ad64ccab/model.neff b/neuronxcc-2.23.6484.0+3b612583/MODULE_80411482c2074bd644d0+ad64ccab/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..6cbae706b1ca24a37438f0f9920deceef3e72e39 --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_80411482c2074bd644d0+ad64ccab/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f03dc2fd5d08ffc580a4d211a01a0e4de9b34eea12c3a167a7b41fc0ca448112 +size 2223104 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_80411482c2074bd644d0+ad64ccab/wrapped_neff.hlo b/neuronxcc-2.23.6484.0+3b612583/MODULE_80411482c2074bd644d0+ad64ccab/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..a75bb7ecf1bad30c7e4659b16d2a2dc99099339a --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_80411482c2074bd644d0+ad64ccab/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3756d5803116bab8fa1cd275f8dd6bb0b8d97c9c4dc6d6926762947e476cf3b4 +size 2226081 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_882d3df2f85cd1bac046+cf7feb23/compile_flags.json b/neuronxcc-2.23.6484.0+3b612583/MODULE_882d3df2f85cd1bac046+cf7feb23/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..01b66a8fec5ab776b80922a8e66f2e40185ee4e3 --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_882d3df2f85cd1bac046+cf7feb23/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--verbose=35", "--logfile=/tmp/nxdi_test_897244e8-0a05-4e09-9cf8-08ee2648d6d7/compiler_workdir/NeuronLlamaMLP/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_882d3df2f85cd1bac046+cf7feb23/model.done b/neuronxcc-2.23.6484.0+3b612583/MODULE_882d3df2f85cd1bac046+cf7feb23/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_882d3df2f85cd1bac046+cf7feb23/model.hlo_module.pb b/neuronxcc-2.23.6484.0+3b612583/MODULE_882d3df2f85cd1bac046+cf7feb23/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..b2248eeef9dcebf11abc3965018985b0a5540214 --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_882d3df2f85cd1bac046+cf7feb23/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc3686142a082048846f2143b04060b34a7d48cd585b60298cd799ad4c4c85b3 +size 8469 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_882d3df2f85cd1bac046+cf7feb23/model.neff b/neuronxcc-2.23.6484.0+3b612583/MODULE_882d3df2f85cd1bac046+cf7feb23/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..fd18a0935c3ada99d2a66dd4abeddfd5f5cc7784 --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_882d3df2f85cd1bac046+cf7feb23/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85a60cd91533317d307276da2d2818003478883ea624a1c8e3f28b3229e604d5 +size 144384 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_882d3df2f85cd1bac046+cf7feb23/wrapped_neff.hlo b/neuronxcc-2.23.6484.0+3b612583/MODULE_882d3df2f85cd1bac046+cf7feb23/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..3368d12ef82b19eb2ca9ef854efb89c34b4a820d --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_882d3df2f85cd1bac046+cf7feb23/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:91c77f74ad569909e012e9fc61f9afd41a8f52266b7cb6d0017a0cf2f312e3f6 +size 146462 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_98b49fb7066d92cd7a34+1b41560c/compile_flags.json b/neuronxcc-2.23.6484.0+3b612583/MODULE_98b49fb7066d92cd7a34+1b41560c/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e97781e71dd2fef762ff5d8013203eb522212a94 --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_98b49fb7066d92cd7a34+1b41560c/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--verbose=35", "--logfile=/tmp/nxdi_test_a1a3a07e-b767-43d4-8222-e6cc450f7524/compiler_workdir/AttentionModelWrapper/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_98b49fb7066d92cd7a34+1b41560c/model.done b/neuronxcc-2.23.6484.0+3b612583/MODULE_98b49fb7066d92cd7a34+1b41560c/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_98b49fb7066d92cd7a34+1b41560c/model.hlo_module.pb b/neuronxcc-2.23.6484.0+3b612583/MODULE_98b49fb7066d92cd7a34+1b41560c/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..2b4078c882b673b5620a4aa1d07569d01e605255 --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_98b49fb7066d92cd7a34+1b41560c/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7868279d724a0ecc90974f245f5aa79f963d3876cda19851b6f611cff1a5b61 +size 39099 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_98b49fb7066d92cd7a34+1b41560c/model.neff b/neuronxcc-2.23.6484.0+3b612583/MODULE_98b49fb7066d92cd7a34+1b41560c/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..16efd2367ca894d94c14c78baad2f7c22590906c --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_98b49fb7066d92cd7a34+1b41560c/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c5bc1055973040790be6ca6a730c7966b42f23336fa1f13b91392f19f7fd5c02 +size 287744 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_98b49fb7066d92cd7a34+1b41560c/wrapped_neff.hlo b/neuronxcc-2.23.6484.0+3b612583/MODULE_98b49fb7066d92cd7a34+1b41560c/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..f4bd135fcbc9b151e7f0c740c0260fff0faef7d8 --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_98b49fb7066d92cd7a34+1b41560c/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5038190e2bf75e61cae81bd773ed3b9ed6bfd6fee6327d88196156cd3876d56 +size 290568 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_99d1af5cf4e4d170ae11+519f203d/compile_flags.json b/neuronxcc-2.23.6484.0+3b612583/MODULE_99d1af5cf4e4d170ae11+519f203d/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..3ceda35df13d9f1ba719ce3eda3655a42e44fdba --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_99d1af5cf4e4d170ae11+519f203d/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--verbose=35", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_99d1af5cf4e4d170ae11+519f203d/model.done b/neuronxcc-2.23.6484.0+3b612583/MODULE_99d1af5cf4e4d170ae11+519f203d/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_99d1af5cf4e4d170ae11+519f203d/model.hlo_module.pb b/neuronxcc-2.23.6484.0+3b612583/MODULE_99d1af5cf4e4d170ae11+519f203d/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..6e1097eee37059f57221a07586677b2a16628e70 --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_99d1af5cf4e4d170ae11+519f203d/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f95228a4e956d2b9d3ab37833bbaac800491b46da30f2a37ffe9ec73ad58f401 +size 243467 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_99d1af5cf4e4d170ae11+519f203d/model.neff b/neuronxcc-2.23.6484.0+3b612583/MODULE_99d1af5cf4e4d170ae11+519f203d/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..449608422827507b875dad088f67dd199bc989a7 --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_99d1af5cf4e4d170ae11+519f203d/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:410493902ecba409fa1247c34ff3507364d699d735b3efc6a1987dc54c25379d +size 400384 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_9a635ee77d57d928f306+4a27301b/compile_flags.json b/neuronxcc-2.23.6484.0+3b612583/MODULE_9a635ee77d57d928f306+4a27301b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e28e4a3637aa7e0c96c56c30b4e73416fa25cf28 --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_9a635ee77d57d928f306+4a27301b/compile_flags.json @@ -0,0 +1 @@ +["--target=trn2", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=2", "--verbose=35", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_9a635ee77d57d928f306+4a27301b/model.done b/neuronxcc-2.23.6484.0+3b612583/MODULE_9a635ee77d57d928f306+4a27301b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_9a635ee77d57d928f306+4a27301b/model.hlo_module.pb b/neuronxcc-2.23.6484.0+3b612583/MODULE_9a635ee77d57d928f306+4a27301b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..d65070c0cb6fb18bb17fb8f0044aebc13d091cbb --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_9a635ee77d57d928f306+4a27301b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:424d5556d5182bc2e4ef50f24264aca65d527e1bd6221ddc3418b32edf4208ea +size 1447697 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_9a635ee77d57d928f306+4a27301b/model.neff b/neuronxcc-2.23.6484.0+3b612583/MODULE_9a635ee77d57d928f306+4a27301b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..fbad5c772c1cdcf212a197a9f0f5f1be4a9a8936 --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_9a635ee77d57d928f306+4a27301b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1068c9923e0aeeece044b933f898f815297e03f12ed1edf5e9d6259c30c7916f +size 666624 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_9a635ee77d57d928f306+4a27301b/wrapped_neff.hlo b/neuronxcc-2.23.6484.0+3b612583/MODULE_9a635ee77d57d928f306+4a27301b/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..9f34d78d342506e8093d0d688c7dda6af482f19f --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_9a635ee77d57d928f306+4a27301b/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93b2b9a4b399487f55a2c8c8f0e63c8266d6ef890987ebe8244175f3cb5099a7 +size 674420 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_9bc5ecc156503bbbf391+519f203d/compile_flags.json b/neuronxcc-2.23.6484.0+3b612583/MODULE_9bc5ecc156503bbbf391+519f203d/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..3ceda35df13d9f1ba719ce3eda3655a42e44fdba --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_9bc5ecc156503bbbf391+519f203d/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--verbose=35", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_9bc5ecc156503bbbf391+519f203d/model.done b/neuronxcc-2.23.6484.0+3b612583/MODULE_9bc5ecc156503bbbf391+519f203d/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_9bc5ecc156503bbbf391+519f203d/model.hlo_module.pb b/neuronxcc-2.23.6484.0+3b612583/MODULE_9bc5ecc156503bbbf391+519f203d/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..96154900e844eafeaa14fcaa97c5d9ade4090f73 --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_9bc5ecc156503bbbf391+519f203d/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bab1d3b69867d58f4a5b7f089ef89c64009797f8001a4be2f76d41cab93dc553 +size 218727 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_9bc5ecc156503bbbf391+519f203d/model.neff b/neuronxcc-2.23.6484.0+3b612583/MODULE_9bc5ecc156503bbbf391+519f203d/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..9f254d2884167e6d6b84711ad02cdb0d8f62fe6e --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_9bc5ecc156503bbbf391+519f203d/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3011715f77c21216178a79d8b3493e1361e34eb8501c955312e65ea8eed43ddb +size 359424 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_9e1e2791bf3a8339e933+84337dd9/compile_flags.json b/neuronxcc-2.23.6484.0+3b612583/MODULE_9e1e2791bf3a8339e933+84337dd9/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..46db31d3c333a208f1a44ff933642390ee2b728d --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_9e1e2791bf3a8339e933+84337dd9/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--verbose=35", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_9e1e2791bf3a8339e933+84337dd9/model.done b/neuronxcc-2.23.6484.0+3b612583/MODULE_9e1e2791bf3a8339e933+84337dd9/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_9e1e2791bf3a8339e933+84337dd9/model.hlo_module.pb b/neuronxcc-2.23.6484.0+3b612583/MODULE_9e1e2791bf3a8339e933+84337dd9/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..749175c39e3f21cfb62b20642c24f670b51dda52 --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_9e1e2791bf3a8339e933+84337dd9/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51b2947685e4e3fc4d520e47511950f71124d64d65af5f13c9237a0720f4b388 +size 1365272 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_9e1e2791bf3a8339e933+84337dd9/model.neff b/neuronxcc-2.23.6484.0+3b612583/MODULE_9e1e2791bf3a8339e933+84337dd9/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..7119b6b492e786c5fcc83b7ac610d153fa856c81 --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_9e1e2791bf3a8339e933+84337dd9/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e7b18ae0a6481b44a77af46dfc781c39aa2f63c826d8a8eedc539c39327844b7 +size 2182144 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_9e1e2791bf3a8339e933+84337dd9/wrapped_neff.hlo b/neuronxcc-2.23.6484.0+3b612583/MODULE_9e1e2791bf3a8339e933+84337dd9/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..f4a8db9c951d9701e287202bc783199cbc876158 --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_9e1e2791bf3a8339e933+84337dd9/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f798694d3ac6189bad88f94b9ab1ed123c7083ee173d30c58c57fcd6f7190982 +size 2275060 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_a28227665d3a91e65100+84337dd9/compile_flags.json b/neuronxcc-2.23.6484.0+3b612583/MODULE_a28227665d3a91e65100+84337dd9/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..46db31d3c333a208f1a44ff933642390ee2b728d --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_a28227665d3a91e65100+84337dd9/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--verbose=35", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_a28227665d3a91e65100+84337dd9/model.done b/neuronxcc-2.23.6484.0+3b612583/MODULE_a28227665d3a91e65100+84337dd9/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_a28227665d3a91e65100+84337dd9/model.hlo_module.pb b/neuronxcc-2.23.6484.0+3b612583/MODULE_a28227665d3a91e65100+84337dd9/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..6c52192161ab20f7a585b668b22e837e509c5990 --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_a28227665d3a91e65100+84337dd9/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5538e68dc0002b4aec4157c916d0dd9db9c4aea1053f2352fa6c3e0e29e83fb5 +size 1793659 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_a28227665d3a91e65100+84337dd9/model.neff b/neuronxcc-2.23.6484.0+3b612583/MODULE_a28227665d3a91e65100+84337dd9/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..67fa8745f86646aa6fdf6437c09c7d6a4fc62734 --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_a28227665d3a91e65100+84337dd9/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a895930cb0a76e84d1e3048a8092fde1b724294857253ab5e26348007caba465 +size 1168384 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_a28227665d3a91e65100+84337dd9/wrapped_neff.hlo b/neuronxcc-2.23.6484.0+3b612583/MODULE_a28227665d3a91e65100+84337dd9/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..8e018da2a1d99ece52fde2a188f044354fde6be4 --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_a28227665d3a91e65100+84337dd9/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8eb04edbf833295a2f0fccfc752c6f37f54ee0bc3d1605e0f12ad584eb8269fc +size 1290773 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_a8bdd2678c80a638674c+e457fe88/compile_flags.json b/neuronxcc-2.23.6484.0+3b612583/MODULE_a8bdd2678c80a638674c+e457fe88/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..23333f396509c6f3747252892313dc013437409a --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_a8bdd2678c80a638674c+e457fe88/compile_flags.json @@ -0,0 +1 @@ +["--target=trn2", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=2", "--verbose=35", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_a8bdd2678c80a638674c+e457fe88/model.done b/neuronxcc-2.23.6484.0+3b612583/MODULE_a8bdd2678c80a638674c+e457fe88/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_a8bdd2678c80a638674c+e457fe88/model.hlo_module.pb b/neuronxcc-2.23.6484.0+3b612583/MODULE_a8bdd2678c80a638674c+e457fe88/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..7dd1e8139183d14863a4db391b7cd659396e01bb --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_a8bdd2678c80a638674c+e457fe88/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d7e823691f2831158865178a72a620d29f308ba3addec67f15002564f796aab +size 1434430 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_a8bdd2678c80a638674c+e457fe88/model.neff b/neuronxcc-2.23.6484.0+3b612583/MODULE_a8bdd2678c80a638674c+e457fe88/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..7371ede81a6e30686bf25b5d92d5fbc3651a06c0 --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_a8bdd2678c80a638674c+e457fe88/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de80e1df3ab7ce85a72f47b8a9a2521ba56b0ba26e8eb821bf0bea62f357361b +size 758784 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_adabd8090f20f2d654c6+ae2b3e2d/compile_flags.json b/neuronxcc-2.23.6484.0+3b612583/MODULE_adabd8090f20f2d654c6+ae2b3e2d/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5c28e8882b049fd5293067813bd3615cbe5130b6 --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_adabd8090f20f2d654c6+ae2b3e2d/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--enable-mixed-precision-accumulation", "--model-type", "transformer", "-O1", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2", "--auto-cast=none", "--internal-enable-dge-levels", "vector_dynamic_offsets", "--verbose=35", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_adabd8090f20f2d654c6+ae2b3e2d/model.done b/neuronxcc-2.23.6484.0+3b612583/MODULE_adabd8090f20f2d654c6+ae2b3e2d/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_adabd8090f20f2d654c6+ae2b3e2d/model.hlo_module.pb b/neuronxcc-2.23.6484.0+3b612583/MODULE_adabd8090f20f2d654c6+ae2b3e2d/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..572257676f1e1cea1e5b1205edd07c8a72a21b9b --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_adabd8090f20f2d654c6+ae2b3e2d/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:71e15160817d35711ad050664c7207054599b8d29feca8cddd5d4427cf3460e9 +size 289584 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_adabd8090f20f2d654c6+ae2b3e2d/model.neff b/neuronxcc-2.23.6484.0+3b612583/MODULE_adabd8090f20f2d654c6+ae2b3e2d/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..6bb5aa1414149393d19ca1ce6e21c1b054fbb27f --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_adabd8090f20f2d654c6+ae2b3e2d/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:984cdfbe72dce4e316c5c87f94da5bcb396b2c5348f9c10379d5c2fc971537a6 +size 297984 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_adabd8090f20f2d654c6+ae2b3e2d/wrapped_neff.hlo b/neuronxcc-2.23.6484.0+3b612583/MODULE_adabd8090f20f2d654c6+ae2b3e2d/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..fc3b71f29af4e714d39cb991b4969d69a3b76471 --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_adabd8090f20f2d654c6+ae2b3e2d/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3738f16c243f8b6eb2fce48fd6010f99253adb6bcd278f0c033068e42f3774e2 +size 309914 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_bc130a5a10e96fc8a3b6+519f203d/compile_flags.json b/neuronxcc-2.23.6484.0+3b612583/MODULE_bc130a5a10e96fc8a3b6+519f203d/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..3ceda35df13d9f1ba719ce3eda3655a42e44fdba --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_bc130a5a10e96fc8a3b6+519f203d/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--verbose=35", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_bc130a5a10e96fc8a3b6+519f203d/model.done b/neuronxcc-2.23.6484.0+3b612583/MODULE_bc130a5a10e96fc8a3b6+519f203d/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_bc130a5a10e96fc8a3b6+519f203d/model.hlo_module.pb b/neuronxcc-2.23.6484.0+3b612583/MODULE_bc130a5a10e96fc8a3b6+519f203d/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..696c33eea91d0e963f6e90a99027035e345e44d7 --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_bc130a5a10e96fc8a3b6+519f203d/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:05f74b09b470d9ad0b959a3e54071f448e2d0f72d37ae814c85cd47be3bcbf6b +size 217403 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_bc130a5a10e96fc8a3b6+519f203d/model.neff b/neuronxcc-2.23.6484.0+3b612583/MODULE_bc130a5a10e96fc8a3b6+519f203d/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..fa1676fa82b4d4166efa73195d0abcc4464a263f --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_bc130a5a10e96fc8a3b6+519f203d/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:68cf3849ac50f2428793e54b11401ce37981f2d5a642e48df0f909079f547ac9 +size 287744 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_bd421059e20aaead3083+83df32e9/compile_flags.json b/neuronxcc-2.23.6484.0+3b612583/MODULE_bd421059e20aaead3083+83df32e9/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dd18130c00edb914289c6a538313ced4d3bce46d --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_bd421059e20aaead3083+83df32e9/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--verbose=35", "--logfile=/tmp/nxdi_test_49479b4c-5374-47ce-bce2-45107a92c83b/compiler_workdir/SoftmaxWithMask/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_bd421059e20aaead3083+83df32e9/model.done b/neuronxcc-2.23.6484.0+3b612583/MODULE_bd421059e20aaead3083+83df32e9/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_bd421059e20aaead3083+83df32e9/model.hlo_module.pb b/neuronxcc-2.23.6484.0+3b612583/MODULE_bd421059e20aaead3083+83df32e9/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..17bed66a626479d1590d770e3d917d027a452869 --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_bd421059e20aaead3083+83df32e9/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f197426bcab12e7ffc031e859a5a7c79c49e27bbbbc9b33f0b31580c68c8d48 +size 16223 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_bd421059e20aaead3083+83df32e9/model.neff b/neuronxcc-2.23.6484.0+3b612583/MODULE_bd421059e20aaead3083+83df32e9/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..da5c741e1255c53e029bb5ec3a695e624080fb4e Binary files /dev/null and b/neuronxcc-2.23.6484.0+3b612583/MODULE_bd421059e20aaead3083+83df32e9/model.neff differ diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_beed7b6ee886171fa077+519f203d/compile_flags.json b/neuronxcc-2.23.6484.0+3b612583/MODULE_beed7b6ee886171fa077+519f203d/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..3ceda35df13d9f1ba719ce3eda3655a42e44fdba --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_beed7b6ee886171fa077+519f203d/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--verbose=35", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_beed7b6ee886171fa077+519f203d/model.done b/neuronxcc-2.23.6484.0+3b612583/MODULE_beed7b6ee886171fa077+519f203d/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_beed7b6ee886171fa077+519f203d/model.hlo_module.pb b/neuronxcc-2.23.6484.0+3b612583/MODULE_beed7b6ee886171fa077+519f203d/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..c0d65d1f35adce43eea1442bbc0636100fae94dc --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_beed7b6ee886171fa077+519f203d/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:19804d0ec8930de4bd5d9e29dbd88ddd168ff87374594f14559642367b4534c2 +size 1623076 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_beed7b6ee886171fa077+519f203d/model.neff b/neuronxcc-2.23.6484.0+3b612583/MODULE_beed7b6ee886171fa077+519f203d/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..7d6de84a15150c1584bcfdf4981638d12f47c085 --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_beed7b6ee886171fa077+519f203d/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5c77e6a1132621b5602674480c0f9bba11395b3a631962120873d3883391ecd +size 1987584 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_c45d8e3cd36067a4dd7b+84337dd9/compile_flags.json b/neuronxcc-2.23.6484.0+3b612583/MODULE_c45d8e3cd36067a4dd7b+84337dd9/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..46db31d3c333a208f1a44ff933642390ee2b728d --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_c45d8e3cd36067a4dd7b+84337dd9/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--verbose=35", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_c45d8e3cd36067a4dd7b+84337dd9/model.done b/neuronxcc-2.23.6484.0+3b612583/MODULE_c45d8e3cd36067a4dd7b+84337dd9/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_c45d8e3cd36067a4dd7b+84337dd9/model.hlo_module.pb b/neuronxcc-2.23.6484.0+3b612583/MODULE_c45d8e3cd36067a4dd7b+84337dd9/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..b570f322721360a56788ee3848fd8cc9883ec1f8 --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_c45d8e3cd36067a4dd7b+84337dd9/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58ca328bb5bddfef67b124ffa4362f6467d5667c8d209b8a14c33cba432ee518 +size 1059657 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_c45d8e3cd36067a4dd7b+84337dd9/model.neff b/neuronxcc-2.23.6484.0+3b612583/MODULE_c45d8e3cd36067a4dd7b+84337dd9/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..ab6685c3c72457f9e5734ab787d1e0de3a33ce24 --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_c45d8e3cd36067a4dd7b+84337dd9/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4b0fefbe743c12368ffa967bb66a2d50828dd22999d731688377f268d762ef5 +size 605184 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_c45d8e3cd36067a4dd7b+84337dd9/wrapped_neff.hlo b/neuronxcc-2.23.6484.0+3b612583/MODULE_c45d8e3cd36067a4dd7b+84337dd9/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..bdeeb974e63e832f73d977823fadd378f0dc0ad9 --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_c45d8e3cd36067a4dd7b+84337dd9/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d33b8294ebb4a706d78100f0eb4dcb9361b7fb7e2601eff80c0b30352d5e090b +size 624685 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_ce3df00d70bddb9cd0ce+84337dd9/compile_flags.json b/neuronxcc-2.23.6484.0+3b612583/MODULE_ce3df00d70bddb9cd0ce+84337dd9/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..46db31d3c333a208f1a44ff933642390ee2b728d --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_ce3df00d70bddb9cd0ce+84337dd9/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--verbose=35", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_ce3df00d70bddb9cd0ce+84337dd9/model.done b/neuronxcc-2.23.6484.0+3b612583/MODULE_ce3df00d70bddb9cd0ce+84337dd9/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_ce3df00d70bddb9cd0ce+84337dd9/model.hlo_module.pb b/neuronxcc-2.23.6484.0+3b612583/MODULE_ce3df00d70bddb9cd0ce+84337dd9/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..e44536df70e104f4cacd923dbf6aeca5dead78c6 --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_ce3df00d70bddb9cd0ce+84337dd9/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d975d631a18191291d97ef7f380232dc51d85c214023dc4a6a349ab389c1ad25 +size 238477 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_ce3df00d70bddb9cd0ce+84337dd9/model.neff b/neuronxcc-2.23.6484.0+3b612583/MODULE_ce3df00d70bddb9cd0ce+84337dd9/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..5effb4f028807701d05fb298ab5c95befce3f207 --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_ce3df00d70bddb9cd0ce+84337dd9/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab977a048ec9c0d6acc645859a39b8edebc840deef73f37f834cdb2da72f1356 +size 318464 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_ce3df00d70bddb9cd0ce+84337dd9/wrapped_neff.hlo b/neuronxcc-2.23.6484.0+3b612583/MODULE_ce3df00d70bddb9cd0ce+84337dd9/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..68a64a27f58db0eb113a0a7210cfeeac3a9fd43c --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_ce3df00d70bddb9cd0ce+84337dd9/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:37937e5dd81ae91e18e1f6de32b1b103d911acd39e6649768a565fb395423ddf +size 326677 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_d04fa963001e29808715+b7ba88c2/compile_flags.json b/neuronxcc-2.23.6484.0+3b612583/MODULE_d04fa963001e29808715+b7ba88c2/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..2df555fab561ac4f8280e900c15e09b0f0dc23d2 --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_d04fa963001e29808715+b7ba88c2/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--enable-mixed-precision-accumulation", "--model-type", "transformer", "-O1", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2", "--auto-cast=none", "--internal-enable-dge-levels", "vector_dynamic_offsets", "--internal-hlo2tensorizer-options=--verify-hlo=true", "--verbose=35", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_d04fa963001e29808715+b7ba88c2/model.done b/neuronxcc-2.23.6484.0+3b612583/MODULE_d04fa963001e29808715+b7ba88c2/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_d04fa963001e29808715+b7ba88c2/model.hlo_module.pb b/neuronxcc-2.23.6484.0+3b612583/MODULE_d04fa963001e29808715+b7ba88c2/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..4d716b5d0049175d07f3fd4c6f87e3e11f7ad6e5 --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_d04fa963001e29808715+b7ba88c2/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed564a345269901552555d49b7c85c0f0ca03da2acc78ef764ca0a5345f0dae1 +size 277856 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_d04fa963001e29808715+b7ba88c2/model.neff b/neuronxcc-2.23.6484.0+3b612583/MODULE_d04fa963001e29808715+b7ba88c2/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..0e33e5d757bbc879046c367d87c1c90a7f640854 --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_d04fa963001e29808715+b7ba88c2/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:43af958750e7de7fdd2105b97d043749adcdc5f704a7a2e3b136aade065694cd +size 451584 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_d7f34bc5df8db9179a8a+519f203d/compile_flags.json b/neuronxcc-2.23.6484.0+3b612583/MODULE_d7f34bc5df8db9179a8a+519f203d/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..3ceda35df13d9f1ba719ce3eda3655a42e44fdba --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_d7f34bc5df8db9179a8a+519f203d/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--verbose=35", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_d7f34bc5df8db9179a8a+519f203d/model.done b/neuronxcc-2.23.6484.0+3b612583/MODULE_d7f34bc5df8db9179a8a+519f203d/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_d7f34bc5df8db9179a8a+519f203d/model.hlo_module.pb b/neuronxcc-2.23.6484.0+3b612583/MODULE_d7f34bc5df8db9179a8a+519f203d/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..393e6ff01bb38626e0cfbf9dc044559ff1332901 --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_d7f34bc5df8db9179a8a+519f203d/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f2556e7a10f9205573b6d09245eccf722de9e8bacc6cbe9b5cca0bc60c03b3d +size 243627 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_d7f34bc5df8db9179a8a+519f203d/model.neff b/neuronxcc-2.23.6484.0+3b612583/MODULE_d7f34bc5df8db9179a8a+519f203d/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..d2f9e5a2c6ae41e8a82069d1ab49dcd4930c8c66 --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_d7f34bc5df8db9179a8a+519f203d/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70b0bc13da5101681318956ca9cb49eb9f79fe1ac8e884614029bdcdd3db0039 +size 400384 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_d9cb9c6c39826f37fc70+84337dd9/compile_flags.json b/neuronxcc-2.23.6484.0+3b612583/MODULE_d9cb9c6c39826f37fc70+84337dd9/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..46db31d3c333a208f1a44ff933642390ee2b728d --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_d9cb9c6c39826f37fc70+84337dd9/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--verbose=35", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_d9cb9c6c39826f37fc70+84337dd9/model.done b/neuronxcc-2.23.6484.0+3b612583/MODULE_d9cb9c6c39826f37fc70+84337dd9/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_d9cb9c6c39826f37fc70+84337dd9/model.hlo_module.pb b/neuronxcc-2.23.6484.0+3b612583/MODULE_d9cb9c6c39826f37fc70+84337dd9/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..2dfe122256cc3c1e68e7f9d51d470b65837cc2fd --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_d9cb9c6c39826f37fc70+84337dd9/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1daa0e619127003942941e8505ed8dcd9984ee04ef5964328fd2428cb8afaad4 +size 232163 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_d9cb9c6c39826f37fc70+84337dd9/model.neff b/neuronxcc-2.23.6484.0+3b612583/MODULE_d9cb9c6c39826f37fc70+84337dd9/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..b4ac5a48b3da5bc720943556c376034d479714fd --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_d9cb9c6c39826f37fc70+84337dd9/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f05f35cf036bf75291639f9ecb1deec8653a5b96fcfe8de5ef66959e4134fdb +size 277504 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_d9cb9c6c39826f37fc70+84337dd9/wrapped_neff.hlo b/neuronxcc-2.23.6484.0+3b612583/MODULE_d9cb9c6c39826f37fc70+84337dd9/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..a54cbbd6a1e00509a25d14038ce4fc028d9102d2 --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_d9cb9c6c39826f37fc70+84337dd9/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ffcbd4e66a789a47b62ad29757f2adf399b0dca70e1066d3a316a907e5af6a8c +size 285687 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_e22805f26de61ae960e2+84337dd9/compile_flags.json b/neuronxcc-2.23.6484.0+3b612583/MODULE_e22805f26de61ae960e2+84337dd9/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..46db31d3c333a208f1a44ff933642390ee2b728d --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_e22805f26de61ae960e2+84337dd9/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--verbose=35", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_e22805f26de61ae960e2+84337dd9/model.done b/neuronxcc-2.23.6484.0+3b612583/MODULE_e22805f26de61ae960e2+84337dd9/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_e22805f26de61ae960e2+84337dd9/model.hlo_module.pb b/neuronxcc-2.23.6484.0+3b612583/MODULE_e22805f26de61ae960e2+84337dd9/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..49cae45e24c6b659f587d969441f23719fc7fc84 --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_e22805f26de61ae960e2+84337dd9/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75a20ee93b91d36eb4d9e116eca0832a776ce78e2a3dc0b41befbefd37f92b85 +size 2985560 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_e22805f26de61ae960e2+84337dd9/model.neff b/neuronxcc-2.23.6484.0+3b612583/MODULE_e22805f26de61ae960e2+84337dd9/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..17ee4116c29965b2a27cab2cf2272028c59ca3ee --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_e22805f26de61ae960e2+84337dd9/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:52ec2702c787c8c8cfe139bd4a486bbdc80708b39f19d3338b6b690d08351b2f +size 5182464 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_e22805f26de61ae960e2+84337dd9/wrapped_neff.hlo b/neuronxcc-2.23.6484.0+3b612583/MODULE_e22805f26de61ae960e2+84337dd9/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..6d2d3a1ed95bacdb6fc59f4238f97a8e214e2d04 --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_e22805f26de61ae960e2+84337dd9/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ab0af05432f1dd25f6676ca2fbd3723f9eac2d0d991ed8a97c6de78a07f7516 +size 5348988 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_e855926ccb3f126d7195+519f203d/compile_flags.json b/neuronxcc-2.23.6484.0+3b612583/MODULE_e855926ccb3f126d7195+519f203d/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..3ceda35df13d9f1ba719ce3eda3655a42e44fdba --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_e855926ccb3f126d7195+519f203d/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--verbose=35", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_e855926ccb3f126d7195+519f203d/model.done b/neuronxcc-2.23.6484.0+3b612583/MODULE_e855926ccb3f126d7195+519f203d/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_e855926ccb3f126d7195+519f203d/model.hlo_module.pb b/neuronxcc-2.23.6484.0+3b612583/MODULE_e855926ccb3f126d7195+519f203d/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..a88ee95f1cccfe8fd5fe229930015cb01e16cf6a --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_e855926ccb3f126d7195+519f203d/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3646d98a61f499942820bf18a4bf88df93b1bd709eb1e86fb318ef3294a6eeb +size 1114118 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_e855926ccb3f126d7195+519f203d/model.neff b/neuronxcc-2.23.6484.0+3b612583/MODULE_e855926ccb3f126d7195+519f203d/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..543486c5516383bb1ce4ead021141fc1937d48dd --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_e855926ccb3f126d7195+519f203d/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:937e7d54d0d781ad887ce8f09e5193fe065e8373069e4838d35d08639b4c4972 +size 687104 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_ec2c37db809075a6d417+519f203d/compile_flags.json b/neuronxcc-2.23.6484.0+3b612583/MODULE_ec2c37db809075a6d417+519f203d/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..3ceda35df13d9f1ba719ce3eda3655a42e44fdba --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_ec2c37db809075a6d417+519f203d/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--verbose=35", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_ec2c37db809075a6d417+519f203d/model.done b/neuronxcc-2.23.6484.0+3b612583/MODULE_ec2c37db809075a6d417+519f203d/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_ec2c37db809075a6d417+519f203d/model.hlo_module.pb b/neuronxcc-2.23.6484.0+3b612583/MODULE_ec2c37db809075a6d417+519f203d/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..cba25c6de6335bc8cc6c86c770623c6cfb506f61 --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_ec2c37db809075a6d417+519f203d/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a37c1b96c981598a817192ee07b0ce12460f56aea5835940bf61ebef6151770f +size 241222 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_ec2c37db809075a6d417+519f203d/model.neff b/neuronxcc-2.23.6484.0+3b612583/MODULE_ec2c37db809075a6d417+519f203d/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..13b24720d53bb4b2adde1df836a0d49442d7b77f --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_ec2c37db809075a6d417+519f203d/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c249300e9bd76687fa3ad003d3ae0ad632a08e045e7b7aa87988878f007c57e +size 318464 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_fd2fd500686b8882cd25+519f203d/compile_flags.json b/neuronxcc-2.23.6484.0+3b612583/MODULE_fd2fd500686b8882cd25+519f203d/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..3ceda35df13d9f1ba719ce3eda3655a42e44fdba --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_fd2fd500686b8882cd25+519f203d/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--verbose=35", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_fd2fd500686b8882cd25+519f203d/model.done b/neuronxcc-2.23.6484.0+3b612583/MODULE_fd2fd500686b8882cd25+519f203d/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_fd2fd500686b8882cd25+519f203d/model.hlo_module.pb b/neuronxcc-2.23.6484.0+3b612583/MODULE_fd2fd500686b8882cd25+519f203d/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..346d15d4f13d98fdd7b7618d6a74b39238b76062 --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_fd2fd500686b8882cd25+519f203d/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51ca9a07026e00d9327cb310709148f8ddfeb50b394d9d994521f0c11cea632e +size 244017 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_fd2fd500686b8882cd25+519f203d/model.neff b/neuronxcc-2.23.6484.0+3b612583/MODULE_fd2fd500686b8882cd25+519f203d/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..b02fa3a49b2eb348004048e92022d3bbb05c265b --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_fd2fd500686b8882cd25+519f203d/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7a987e0fe06a125381ad8c7df574340b9e21d6041bc455d76ce15199c613518 +size 400384 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_ff117705b57d7b86f907+12dd9119/compile_flags.json b/neuronxcc-2.23.6484.0+3b612583/MODULE_ff117705b57d7b86f907+12dd9119/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..043d6e4074631e832c744d21e67006afb148e60d --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_ff117705b57d7b86f907+12dd9119/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--verbose=35", "--logfile=/tmp/nxd_model/chunked_prefill/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_ff117705b57d7b86f907+12dd9119/model.done b/neuronxcc-2.23.6484.0+3b612583/MODULE_ff117705b57d7b86f907+12dd9119/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_ff117705b57d7b86f907+12dd9119/model.hlo_module.pb b/neuronxcc-2.23.6484.0+3b612583/MODULE_ff117705b57d7b86f907+12dd9119/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..33e36571b0750cb2526263c2693ef42e6997198d --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_ff117705b57d7b86f907+12dd9119/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ed157be0244a08030b2c97d5582cec695925ff9c4aea5246968c3665f1a7f58 +size 1818060 diff --git a/neuronxcc-2.23.6484.0+3b612583/MODULE_ff117705b57d7b86f907+12dd9119/model.neff b/neuronxcc-2.23.6484.0+3b612583/MODULE_ff117705b57d7b86f907+12dd9119/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..72274bb86348881b9b6c001fcfc88137ca81ce63 --- /dev/null +++ b/neuronxcc-2.23.6484.0+3b612583/MODULE_ff117705b57d7b86f907+12dd9119/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a20733620814ed71138fe8a0c7f6509a56e625bf762aec8e21223330601d356 +size 13057024