diff --git a/.gitattributes b/.gitattributes index bce021e34a8c6454becc7f1bb1991dec846e7b8a..b88a0719e6fc6a483ffe0e0bf0df69e8e4e07325 100644 --- a/.gitattributes +++ b/.gitattributes @@ -12095,3 +12095,18 @@ neuronxcc-2.21.18209.0+043b1bf7/MODULE_f9260d832dabcf299e0e+877608f3/model.neff neuronxcc-2.21.18209.0+043b1bf7/MODULE_fccf2ddf2bf73205ff5e+be13b572/model.neff filter=lfs diff=lfs merge=lfs -text neuronxcc-2.21.18209.0+043b1bf7/MODULE_fe41e7d06c2497a6f69b+747527b0/model.neff filter=lfs diff=lfs merge=lfs -text neuronxcc-2.21.18209.0+043b1bf7/MODULE_fe41e7d06c2497a6f69b+747527b0/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_32890ce50588eb4847d2+877608f3/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_4b9db1a378bb930adcac+877608f3/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_72ab545f5598ab43c620+747527b0/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_72ab545f5598ab43c620+747527b0/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_95da496a4b6f989006dc+877608f3/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_9e579d7817773781502b+747527b0/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_9e579d7817773781502b+747527b0/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_c5a63548165f9ad7b464+747527b0/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_c5a63548165f9ad7b464+747527b0/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_e460fe320b669663351a+747527b0/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_e460fe320b669663351a+747527b0/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_eecbe415d8ea39de1b02+877608f3/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_f5177664b154895397b4+747527b0/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_f5177664b154895397b4+747527b0/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_fa25918a7328b487263f+877608f3/model.neff filter=lfs diff=lfs merge=lfs -text diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1.dev0/llama/meta-llama/Llama-3.1-8B/72b9effb918b468fab6b.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1.dev0/llama/meta-llama/Llama-3.1-8B/72b9effb918b468fab6b.json new file mode 100644 index 0000000000000000000000000000000000000000..97ea3f8a5e8ca96e1e266e03a608d4f84e39d691 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1.dev0/llama/meta-llama/Llama-3.1-8B/72b9effb918b468fab6b.json @@ -0,0 +1,62 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "meta-llama/Llama-3.1-8B", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "meta-llama/Llama-3.1-8B", + "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 4, + "max_batch_size": 1, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.1.dev0", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn2", + "torch_dtype": "bfloat16", + "tp_degree": 4 + }, + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1.dev0/llama/meta-llama/Llama-3.2-1B-Instruct/c5642baef1ee43f63a47.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1.dev0/llama/meta-llama/Llama-3.2-1B-Instruct/c5642baef1ee43f63a47.json new file mode 100644 index 0000000000000000000000000000000000000000..8449d3c928d7e4c634d2331a90525ff34ec4bff3 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1.dev0/llama/meta-llama/Llama-3.2-1B-Instruct/c5642baef1ee43f63a47.json @@ -0,0 +1,62 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "meta-llama/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 64, + "capacity_factor": null, + "checkpoint_id": "meta-llama/Llama-3.2-1B-Instruct", + "checkpoint_revision": "9213176726f574b556790deb65791e0c5aa438b6", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 4, + "max_batch_size": 64, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.1.dev0", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn2", + "torch_dtype": "bfloat16", + "tp_degree": 4 + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1.dev0/llama/meta-llama/Llama-3.2-3B-Instruct/43e3a740fabdaa1eb172.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1.dev0/llama/meta-llama/Llama-3.2-3B-Instruct/43e3a740fabdaa1eb172.json new file mode 100644 index 0000000000000000000000000000000000000000..bd1feacbf1b0230cebb74887ed8dbf8180fcea33 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1.dev0/llama/meta-llama/Llama-3.2-3B-Instruct/43e3a740fabdaa1eb172.json @@ -0,0 +1,62 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "meta-llama/Llama-3.2-3B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 3072, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 64, + "capacity_factor": null, + "checkpoint_id": "meta-llama/Llama-3.2-3B-Instruct", + "checkpoint_revision": "0cb88a4f764b7a12671c53f0838cd831a0843b95", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 4, + "max_batch_size": 64, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.1.dev0", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn2", + "torch_dtype": "bfloat16", + "tp_degree": 4 + }, + "num_attention_heads": 24, + "num_hidden_layers": 28, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1.dev0/llama/meta-llama/Llama-3.3-70B-Instruct/63f364c92e814e7930a6.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1.dev0/llama/meta-llama/Llama-3.3-70B-Instruct/63f364c92e814e7930a6.json new file mode 100644 index 0000000000000000000000000000000000000000..bf6323cb57009dd089b14be8383915ce379fdeaf --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1.dev0/llama/meta-llama/Llama-3.3-70B-Instruct/63f364c92e814e7930a6.json @@ -0,0 +1,62 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "meta-llama/Llama-3.3-70B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 8192, + "initializer_range": 0.02, + "intermediate_size": 28672, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 64, + "capacity_factor": null, + "checkpoint_id": "meta-llama/Llama-3.3-70B-Instruct", + "checkpoint_revision": "6f6073b423013f6a7d4d9f39144961bfbfbc386b", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 64, + "max_batch_size": 64, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.1.dev0", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn2", + "torch_dtype": "bfloat16", + "tp_degree": 64 + }, + "num_attention_heads": 64, + "num_hidden_layers": 80, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1.dev0/llama/meta-llama/Llama-3.3-70B-Instruct/a756be1ea3564e691eca.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1.dev0/llama/meta-llama/Llama-3.3-70B-Instruct/a756be1ea3564e691eca.json new file mode 100644 index 0000000000000000000000000000000000000000..83083384c81a12991c567c28e6a3dfe13181d985 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1.dev0/llama/meta-llama/Llama-3.3-70B-Instruct/a756be1ea3564e691eca.json @@ -0,0 +1,62 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "meta-llama/Llama-3.3-70B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 8192, + "initializer_range": 0.02, + "intermediate_size": 28672, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 8, + "capacity_factor": null, + "checkpoint_id": "meta-llama/Llama-3.3-70B-Instruct", + "checkpoint_revision": "6f6073b423013f6a7d4d9f39144961bfbfbc386b", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 64, + "max_batch_size": 8, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.1.dev0", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn2", + "torch_dtype": "bfloat16", + "tp_degree": 64 + }, + "num_attention_heads": 64, + "num_hidden_layers": 80, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_32890ce50588eb4847d2+877608f3/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_32890ce50588eb4847d2+877608f3/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..fc27c67e0f3bd30aba010defcc6c2d0646e87b83 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_32890ce50588eb4847d2+877608f3/compile_flags.json @@ -0,0 +1 @@ +["--target=trn2", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=2", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_32890ce50588eb4847d2+877608f3/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_32890ce50588eb4847d2+877608f3/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_32890ce50588eb4847d2+877608f3/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_32890ce50588eb4847d2+877608f3/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..b4c49049990214f5cc9808bd64607c8f95f0bbf1 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_32890ce50588eb4847d2+877608f3/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dbdfef372f70f895a95a404326cbf6219ef292a024a0b5a5a945eaf779455740 +size 2046532 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_32890ce50588eb4847d2+877608f3/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_32890ce50588eb4847d2+877608f3/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..2c5e21f864be149c5e416c160c1aa27d5f03e3c9 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_32890ce50588eb4847d2+877608f3/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6eb056c8be8a0f4d90b775bbf339caca7df067d8cfd6b7d176e4ba445f5991e5 +size 37510144 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4b9db1a378bb930adcac+877608f3/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4b9db1a378bb930adcac+877608f3/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..fc27c67e0f3bd30aba010defcc6c2d0646e87b83 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4b9db1a378bb930adcac+877608f3/compile_flags.json @@ -0,0 +1 @@ +["--target=trn2", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=2", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4b9db1a378bb930adcac+877608f3/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4b9db1a378bb930adcac+877608f3/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4b9db1a378bb930adcac+877608f3/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4b9db1a378bb930adcac+877608f3/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..34ef05d43cd6429a0326ffb430ff552387d123cb --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4b9db1a378bb930adcac+877608f3/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c5483f941aaabf3a5667625992fb6714b1caab83f35f8d1da9535d7ffa77459 +size 2109389 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4b9db1a378bb930adcac+877608f3/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4b9db1a378bb930adcac+877608f3/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..831fb5d18de6fd5c5a1e508a7b376a176b47e696 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4b9db1a378bb930adcac+877608f3/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c10cb81eb03dec3f825b0083c2b867001a4000adf104b3905a03def3a775bbe5 +size 3421184 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_52d83050720e56e8e15a+a9d440f5/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_52d83050720e56e8e15a+a9d440f5/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..34675115220079309bfad2d45680c478c378a3c0 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_52d83050720e56e8e15a+a9d440f5/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_52d83050720e56e8e15a+a9d440f5/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_52d83050720e56e8e15a+a9d440f5/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..1f2f58476b88006bebedbf2c08e449a7fa80d33a --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_52d83050720e56e8e15a+a9d440f5/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b2f7b97006fd94793852f160d93db1b377aabab76815ddbb522e9c9f086014d +size 741435 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_72ab545f5598ab43c620+747527b0/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_72ab545f5598ab43c620+747527b0/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..8da50ffdebbe4b136d67d64bb87ac42d154163d7 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_72ab545f5598ab43c620+747527b0/compile_flags.json @@ -0,0 +1 @@ +["--target=trn2", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=2", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_72ab545f5598ab43c620+747527b0/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_72ab545f5598ab43c620+747527b0/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_72ab545f5598ab43c620+747527b0/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_72ab545f5598ab43c620+747527b0/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..a7419f3015de4f1769d3c24a9ef118c9a76f6e30 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_72ab545f5598ab43c620+747527b0/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2c88bf14989b363b08830daf74eb7acc0e53f9dcb5f4ea628e1f1962ac46066 +size 1876821 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_72ab545f5598ab43c620+747527b0/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_72ab545f5598ab43c620+747527b0/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..7786e3c46214c4e52326ae7fe08da3b03436c270 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_72ab545f5598ab43c620+747527b0/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f80d7dc4e8d61e380b0e6a0bd8d5e3067d68ad8d262a55554356760df02bb055 +size 5520384 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_72ab545f5598ab43c620+747527b0/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_72ab545f5598ab43c620+747527b0/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..6a68e3d98e4033ed51c84fb47b2f565f6fc5a1a8 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_72ab545f5598ab43c620+747527b0/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f1c053c7eaa5ed358465686b26ae8e886f3b42219121e94bccc287faaaeb0dd +size 5670933 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_95da496a4b6f989006dc+877608f3/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_95da496a4b6f989006dc+877608f3/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..fc27c67e0f3bd30aba010defcc6c2d0646e87b83 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_95da496a4b6f989006dc+877608f3/compile_flags.json @@ -0,0 +1 @@ +["--target=trn2", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=2", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_95da496a4b6f989006dc+877608f3/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_95da496a4b6f989006dc+877608f3/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_95da496a4b6f989006dc+877608f3/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_95da496a4b6f989006dc+877608f3/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..7021b6e112cfafefa7e0b1b0f0a27e9734ae04e8 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_95da496a4b6f989006dc+877608f3/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8faaa97644c827524615640a03a52356ca3054ac6df0c56807f9d4aa19e91b74 +size 3584622 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_95da496a4b6f989006dc+877608f3/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_95da496a4b6f989006dc+877608f3/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..086943c9906408ca04959f8e438e97e9ba1f5fb0 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_95da496a4b6f989006dc+877608f3/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca3e6ee4b164d9a1b746665b414409121f331800248c901dd2dd265c339f438d +size 1956864 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9e579d7817773781502b+747527b0/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9e579d7817773781502b+747527b0/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..8da50ffdebbe4b136d67d64bb87ac42d154163d7 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9e579d7817773781502b+747527b0/compile_flags.json @@ -0,0 +1 @@ +["--target=trn2", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=2", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9e579d7817773781502b+747527b0/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9e579d7817773781502b+747527b0/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9e579d7817773781502b+747527b0/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9e579d7817773781502b+747527b0/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..12aca1472bb21889cd018fb4ea418f732edcdd09 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9e579d7817773781502b+747527b0/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f6ff4972e80b66672ef8193cc583a12c08cd0e0466b560f4f71d9caedf0ca18d +size 6876289 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9e579d7817773781502b+747527b0/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9e579d7817773781502b+747527b0/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..170abd6f80e7f6d3afd6de6ef6af705dad6ef07d --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9e579d7817773781502b+747527b0/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd87ace9718b5629ff53dd2092db7be7d40816469b1eacd372ebfce033262cc3 +size 27864064 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9e579d7817773781502b+747527b0/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9e579d7817773781502b+747527b0/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..f6c3a0f9afb3311a4d5ff93089b3bfa1c75a0a3e --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9e579d7817773781502b+747527b0/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8049dc0e844dfb4713f9c3b4b57c8cc3c6ba654aa48759cf7bc0fa5cfdf8ca31 +size 28233183 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_c5a63548165f9ad7b464+747527b0/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_c5a63548165f9ad7b464+747527b0/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..8da50ffdebbe4b136d67d64bb87ac42d154163d7 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_c5a63548165f9ad7b464+747527b0/compile_flags.json @@ -0,0 +1 @@ +["--target=trn2", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=2", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_c5a63548165f9ad7b464+747527b0/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_c5a63548165f9ad7b464+747527b0/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_c5a63548165f9ad7b464+747527b0/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_c5a63548165f9ad7b464+747527b0/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..8f1e7234f1771e42f7344fe901a4c2c9878a963a --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_c5a63548165f9ad7b464+747527b0/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0145ce2660d92ca8aa9599e549f382bb2c55573522ec15d06e7c4a1ab432ac67 +size 3038407 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_c5a63548165f9ad7b464+747527b0/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_c5a63548165f9ad7b464+747527b0/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..dbcee99ffd9354b1e0997a41d08034628c6a17ba --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_c5a63548165f9ad7b464+747527b0/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e1c4e379697787efe99a0f24df3548d855c928861a231d07a5b93b3f2da522b +size 10476544 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_c5a63548165f9ad7b464+747527b0/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_c5a63548165f9ad7b464+747527b0/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..de24f981387625ab780ca02dd15ee1e34a2bc640 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_c5a63548165f9ad7b464+747527b0/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:65d6cd3d68487e0722807b213434674e85fb52dc05ceebce9bee5af3a0c3ef41 +size 10845663 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_e460fe320b669663351a+747527b0/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_e460fe320b669663351a+747527b0/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..8da50ffdebbe4b136d67d64bb87ac42d154163d7 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_e460fe320b669663351a+747527b0/compile_flags.json @@ -0,0 +1 @@ +["--target=trn2", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=2", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_e460fe320b669663351a+747527b0/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_e460fe320b669663351a+747527b0/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_e460fe320b669663351a+747527b0/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_e460fe320b669663351a+747527b0/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..cecd5c50ffbf6031f3a62326f2b3ac51de9097a0 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_e460fe320b669663351a+747527b0/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b030d629af2f45321359aea799c4192f0a08c005dbe74222f9811d92ee3de1d +size 9371669 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_e460fe320b669663351a+747527b0/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_e460fe320b669663351a+747527b0/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..5a7eafe385febcb8e50dc11a9f130f23230788c2 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_e460fe320b669663351a+747527b0/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e0c074b574aef75c9053e282320ff55786074d682d6fe75ab6caf6f0fa502bec +size 13630464 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_e460fe320b669663351a+747527b0/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_e460fe320b669663351a+747527b0/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..ffdbf773edefca52517be19cbc7d16cecf50747d --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_e460fe320b669663351a+747527b0/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04ccac76b00b3af6bd99f83d6e3d78fdf1730e4dffe10e12c04f04cfbd95cd2d +size 13705938 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_eecbe415d8ea39de1b02+877608f3/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_eecbe415d8ea39de1b02+877608f3/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..fc27c67e0f3bd30aba010defcc6c2d0646e87b83 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_eecbe415d8ea39de1b02+877608f3/compile_flags.json @@ -0,0 +1 @@ +["--target=trn2", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=2", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_eecbe415d8ea39de1b02+877608f3/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_eecbe415d8ea39de1b02+877608f3/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_eecbe415d8ea39de1b02+877608f3/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_eecbe415d8ea39de1b02+877608f3/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..acaa5049f0e2d20da1c52dba1f4c478c07833a30 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_eecbe415d8ea39de1b02+877608f3/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e81e34a9796c8b9b54cbd36dc706fbb406dd7fba59a1f8ab7332d5f0f19a0b1 +size 1647835 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_eecbe415d8ea39de1b02+877608f3/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_eecbe415d8ea39de1b02+877608f3/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..3707fcdc17cf6db510ff12807ba6b08a97e62227 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_eecbe415d8ea39de1b02+877608f3/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:deb61b358c0663a7ef5a2ffb35e941def521edb8c331fd99fc843b587f6db045 +size 18678784 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_f5177664b154895397b4+747527b0/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_f5177664b154895397b4+747527b0/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..8da50ffdebbe4b136d67d64bb87ac42d154163d7 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_f5177664b154895397b4+747527b0/compile_flags.json @@ -0,0 +1 @@ +["--target=trn2", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=2", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_f5177664b154895397b4+747527b0/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_f5177664b154895397b4+747527b0/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_f5177664b154895397b4+747527b0/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_f5177664b154895397b4+747527b0/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..a2a6db89b0b712b77a5649328d44eb29ed2e50b6 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_f5177664b154895397b4+747527b0/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a7de9bea1e6440b7d93c838816785f05a7504f49d73e339712146a309ebb36f6 +size 9658026 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_f5177664b154895397b4+747527b0/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_f5177664b154895397b4+747527b0/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..226ade87ad102c30611d82876676b831ecbe8e3d --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_f5177664b154895397b4+747527b0/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee95fdd918895d9198361d5afd72bb8d72afc49cbf16dd2c528c20448f44983c +size 22037504 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_f5177664b154895397b4+747527b0/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_f5177664b154895397b4+747527b0/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..e6ca8f6ea0ed70befcdd1af91e50996166bfd7e8 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_f5177664b154895397b4+747527b0/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5eae8a2659d3b90b0ae1b82a87f263e475b22b8643d7401f82597992c7699b2d +size 22168725 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_fa25918a7328b487263f+877608f3/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_fa25918a7328b487263f+877608f3/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..fc27c67e0f3bd30aba010defcc6c2d0646e87b83 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_fa25918a7328b487263f+877608f3/compile_flags.json @@ -0,0 +1 @@ +["--target=trn2", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=2", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_fa25918a7328b487263f+877608f3/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_fa25918a7328b487263f+877608f3/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_fa25918a7328b487263f+877608f3/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_fa25918a7328b487263f+877608f3/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..69d8856757611d67d0e00904101a453267490fb4 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_fa25918a7328b487263f+877608f3/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ecb953523060f4899511cf80c76c6586f19ebccdfc939e8fe76fa8c42b7bf28b +size 3584622 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_fa25918a7328b487263f+877608f3/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_fa25918a7328b487263f+877608f3/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..74b1bd9896ec8baf8506bd0e60783f33ba6ccd5e --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_fa25918a7328b487263f+877608f3/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c442503f84e966daf6a332ef7980a774ae6cf7aae216dd86831e3cc917a6b3b +size 1956864