NeMo
English
nvidia
code
math
igitman commited on
Commit
50162d5
1 Parent(s): deba377

Add model files

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +1 -0
  2. nemo_model/740dedcfac234904a4c5ed5ceb07d686_tokenizer.model +3 -0
  3. nemo_model/8da13ec9455c435fa215279cba8b8214_tokenizer.model +3 -0
  4. nemo_model/model_config.yaml +122 -0
  5. nemo_model/model_weights/common.pt +3 -0
  6. nemo_model/model_weights/metadata.json +3 -0
  7. nemo_model/model_weights/model.decoder.final_layernorm.weight/.zarray +3 -0
  8. nemo_model/model_weights/model.decoder.final_layernorm.weight/0 +3 -0
  9. nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_0_40.pt +3 -0
  10. nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_10_40.pt +3 -0
  11. nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_11_40.pt +3 -0
  12. nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_12_40.pt +3 -0
  13. nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_13_40.pt +3 -0
  14. nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_14_40.pt +3 -0
  15. nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_15_40.pt +3 -0
  16. nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_16_40.pt +3 -0
  17. nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_17_40.pt +3 -0
  18. nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_18_40.pt +3 -0
  19. nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_19_40.pt +3 -0
  20. nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_1_40.pt +3 -0
  21. nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_20_40.pt +3 -0
  22. nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_21_40.pt +3 -0
  23. nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_22_40.pt +3 -0
  24. nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_23_40.pt +3 -0
  25. nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_24_40.pt +3 -0
  26. nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_25_40.pt +3 -0
  27. nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_26_40.pt +3 -0
  28. nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_27_40.pt +3 -0
  29. nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_28_40.pt +3 -0
  30. nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_29_40.pt +3 -0
  31. nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_2_40.pt +3 -0
  32. nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_30_40.pt +3 -0
  33. nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_31_40.pt +3 -0
  34. nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_32_40.pt +3 -0
  35. nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_33_40.pt +3 -0
  36. nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_34_40.pt +3 -0
  37. nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_35_40.pt +3 -0
  38. nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_36_40.pt +3 -0
  39. nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_37_40.pt +3 -0
  40. nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_38_40.pt +3 -0
  41. nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_39_40.pt +3 -0
  42. nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_3_40.pt +3 -0
  43. nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_4_40.pt +3 -0
  44. nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_5_40.pt +3 -0
  45. nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_6_40.pt +3 -0
  46. nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_7_40.pt +3 -0
  47. nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_8_40.pt +3 -0
  48. nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_9_40.pt +3 -0
  49. nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1.layer_norm_weight/.zarray +3 -0
  50. nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1.layer_norm_weight/0.0 +3 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ nemo_model/model_weights/** filter=lfs diff=lfs merge=lfs -text
nemo_model/740dedcfac234904a4c5ed5ceb07d686_tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
3
+ size 499723
nemo_model/8da13ec9455c435fa215279cba8b8214_tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
3
+ size 499723
nemo_model/model_config.yaml ADDED
@@ -0,0 +1,122 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ mcore_gpt: true
2
+ micro_batch_size: 4
3
+ global_batch_size: 8
4
+ tensor_model_parallel_size: 1
5
+ pipeline_model_parallel_size: 1
6
+ virtual_pipeline_model_parallel_size: null
7
+ encoder_seq_length: 16384
8
+ max_position_embeddings: 16384
9
+ num_layers: 40
10
+ hidden_size: 5120
11
+ ffn_hidden_size: 13824
12
+ num_attention_heads: 40
13
+ init_method_std: 0.02
14
+ use_scaled_init_method: true
15
+ hidden_dropout: 0.0
16
+ attention_dropout: 0.0
17
+ ffn_dropout: 0.0
18
+ kv_channels: null
19
+ apply_query_key_layer_scaling: true
20
+ normalization: rmsnorm
21
+ layernorm_epsilon: 1.0e-05
22
+ do_layer_norm_weight_decay: false
23
+ make_vocab_size_divisible_by: 128
24
+ pre_process: true
25
+ post_process: true
26
+ persist_layer_norm: true
27
+ bias: false
28
+ activation: fast-swiglu
29
+ headscale: false
30
+ transformer_block_type: pre_ln
31
+ openai_gelu: false
32
+ normalize_attention_scores: true
33
+ position_embedding_type: rope
34
+ rotary_percentage: 1.0
35
+ attention_type: multihead
36
+ share_embeddings_and_output_weights: false
37
+ overlap_p2p_comm: false
38
+ batch_p2p_comm: true
39
+ num_query_groups: 40
40
+ tokenizer:
41
+ library: sentencepiece
42
+ type: null
43
+ model: nemo:8da13ec9455c435fa215279cba8b8214_tokenizer.model
44
+ vocab_file: null
45
+ merge_file: null
46
+ delimiter: null
47
+ sentencepiece_legacy: false
48
+ tokenizer_model: nemo:740dedcfac234904a4c5ed5ceb07d686_tokenizer.model
49
+ native_amp_init_scale: 4294967296
50
+ native_amp_growth_interval: 1000
51
+ hysteresis: 2
52
+ fp32_residual_connection: false
53
+ fp16_lm_cross_entropy: false
54
+ megatron_amp_O2: false
55
+ grad_allreduce_chunk_size_mb: 125
56
+ grad_div_ar_fusion: true
57
+ gradient_accumulation_fusion: false
58
+ bias_activation_fusion: false
59
+ bias_dropout_add_fusion: false
60
+ masked_softmax_fusion: true
61
+ get_attention_mask_from_fusion: true
62
+ seed: 1234
63
+ resume_from_checkpoint: null
64
+ use_cpu_initialization: false
65
+ onnx_safe: false
66
+ apex_transformer_log_level: 30
67
+ gradient_as_bucket_view: true
68
+ sync_batch_comm: false
69
+ activations_checkpoint_granularity: null
70
+ activations_checkpoint_method: null
71
+ activations_checkpoint_num_layers: null
72
+ num_micro_batches_with_partial_activation_checkpoints: null
73
+ activations_checkpoint_layers_per_pipeline: null
74
+ sequence_parallel: false
75
+ transformer_engine: true
76
+ fp8: false
77
+ fp8_e4m3: false
78
+ fp8_hybrid: false
79
+ fp8_margin: 0
80
+ fp8_interval: 1
81
+ fp8_amax_history_len: 1
82
+ fp8_amax_compute_algo: most_recent
83
+ reduce_amax: true
84
+ use_emha: false
85
+ data:
86
+ index_mapping_dir: null
87
+ data_impl: mmap
88
+ splits_string: 900,50,50
89
+ seq_length: 16384
90
+ skip_warmup: true
91
+ num_workers: 2
92
+ dataloader_type: single
93
+ reset_position_ids: false
94
+ reset_attention_mask: false
95
+ eod_mask_loss: false
96
+ validation_drop_last: true
97
+ no_seqlen_plus_one_input_tokens: false
98
+ pad_samples_to_global_batch_size: false
99
+ shuffle_documents: true
100
+ nsys_profile:
101
+ enabled: false
102
+ start_step: 10
103
+ end_step: 10
104
+ ranks:
105
+ - 0
106
+ gen_shape: false
107
+ optim:
108
+ name: fused_adam
109
+ lr: 0.0002
110
+ weight_decay: 0.01
111
+ betas:
112
+ - 0.9
113
+ - 0.98
114
+ sched:
115
+ name: CosineAnnealing
116
+ warmup_steps: 500
117
+ constant_steps: 50000
118
+ min_lr: 2.0e-05
119
+ precision: bf16
120
+ target: nemo.collections.nlp.models.language_modeling.megatron_gpt_model.MegatronGPTModel
121
+ nemo_version: 1.21.0
122
+ rotary_base: 1000000
nemo_model/model_weights/common.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f76a0eb3ecbf990b3c170880671cb68aca95f8190998b84aea67237d679ac15d
3
+ size 24013
nemo_model/model_weights/metadata.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f502ab47c9b1fe00615afb88074d49cf6f9b4c85f482734543160c5cb2f76af9
3
+ size 113
nemo_model/model_weights/model.decoder.final_layernorm.weight/.zarray ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:12b714654004b7f6ba8848aeb7af681a4e5147e49a6d59c08a1d5268dc8dd7f5
3
+ size 207
nemo_model/model_weights/model.decoder.final_layernorm.weight/0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e9dd801dacfcc73069c3beca7253986dd52df10bb31aaf17304f4be4424c238a
3
+ size 10240
nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_0_40.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:31c40dd0287f1a27458af09bb74b29153cac5444aa1bd2f4cce0d83294c6cc80
3
+ size 1836
nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_10_40.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ced1aad5f43ff19604b3887d7f489cecd16310155b1515bda0f15d1a1a279ff5
3
+ size 1840
nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_11_40.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:60c4e29a8324371cc5b87e502911ed070eea041c0e1bf965f9df21b1b0d9567a
3
+ size 1840
nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_12_40.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f13452b9d3aefa72c5b3778cf49ed98919489239f9608f06a3e08276b3c084f4
3
+ size 1840
nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_13_40.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8e62fd54a0c84dda40ba327a03010ad974965f12b9e73a7f337449e1b0480ee2
3
+ size 1840
nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_14_40.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fd8297dbbe8697c5c1af84e605e02903ce287d0db4c57003723cbe8e1d0216b0
3
+ size 1840
nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_15_40.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2c2eb0a283068b8b53638406aa4aad7177e9e020b94998675fca6e772e9bc8ab
3
+ size 1840
nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_16_40.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b120e3c7ce9d6add97c76e06320a69b75d73965046300ba783513a52bf3dc59c
3
+ size 1840
nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_17_40.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:46557afd4c478d8b95b004f2b98a8ae73ea48e2eeef0035b2c94edbf0b5e4da0
3
+ size 1840
nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_18_40.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b511ca88f8ca1a07ce92cbf8fce312bafca62e0c9cffbcc59c4b100aed67bc27
3
+ size 1840
nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_19_40.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c056d48650655c18add9a3fa57dc8cb49cff869d80439682a2d8bce099048867
3
+ size 1840
nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_1_40.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:71d52f0bdf8a6a6dd2d9b98000e8d7edccb7aaf522ec3215b0ef13d7057f99cf
3
+ size 1836
nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_20_40.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:91a75c59b219bc3fe062de79103d7c81dc110cfb35630efe65128dc7442f0ea1
3
+ size 1840
nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_21_40.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ad583e4785f3058325b56414dd384d14d7ae9ee26e71e225b33d70b6bea65249
3
+ size 1840
nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_22_40.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a167b3ab9467519e5172d0325288049acc57f8dedde9b3ff5744eee67d19cf0d
3
+ size 1840
nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_23_40.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4ad3f50e0b8200294e0cd7df2744327fd5a1fb1c71a828bff372411194bead0b
3
+ size 1840
nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_24_40.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e28818a6e54608f2e37819ebcac34fe7875bb3991e74ccb63db0edff7c74e442
3
+ size 1840
nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_25_40.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:351c2700526ef617d44dbb51648b4944e2962ff557b7542450a27cc5a366150c
3
+ size 1840
nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_26_40.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:99a49c7263b7e1910f1501ebfe94f22c66bf21f4b260db044201348a14f2ea0d
3
+ size 1840
nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_27_40.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:045c3b3a90c971f41db7e30ed0055a23cc8f754bc0e795e8c66712acdcc3c599
3
+ size 1840
nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_28_40.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ce179735c7fde2f2a68b2fb4a771f27cc07895e036f4d369129372775edd614
3
+ size 1840
nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_29_40.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b97cf35cc732b1d32ac528d6de7ab26a8d6eb6bc658936045bbb22e75d1518c9
3
+ size 1840
nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_2_40.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3046f1273a22949255fdf9e4122eca82022c620a497e9d7552da9b331076be6b
3
+ size 1836
nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_30_40.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:487bae520c2c087535facd524d2c50bdee0f835f91493645ed520004b765002e
3
+ size 1840
nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_31_40.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1ed3bec1cdb5d795d2e5c5670ae8ad6dced1f52b51b522f8f5e96ddc2350a3aa
3
+ size 1840
nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_32_40.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5ea3d93c1a486aa715b926b15312d6ebb5d86105ad976ee7dca467db07d3c0dc
3
+ size 1840
nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_33_40.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:616737c0a2ccaba7edd3265ce1583b12086e0ff1a452322c4b0afd403f588887
3
+ size 1840
nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_34_40.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b88de03d76fe635a139ea4269c60f8b40be5d6269fe55b987722f7dca2ff34ac
3
+ size 1840
nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_35_40.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:84ce38eeb065d81e732feb21df356657c4c88c1434ff090fbd686fc8070134d0
3
+ size 1840
nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_36_40.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d859d2add2b7ca45431a5176642e6adcac9b973dc0967473f9f4d542ecec0c1e
3
+ size 1840
nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_37_40.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f968f4f10dbdaa37dae351cf2ece23e13a199e3739cf047900d2483ae352077a
3
+ size 1840
nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_38_40.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e2d3005c75a40d5527b7bb9408f95d7a8d0d88bad90e3d27366ad39a4a74d577
3
+ size 1840
nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_39_40.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8385d2e77de59998e506312227cc05538e16161aff68ce5530e4f167aad6f924
3
+ size 1840
nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_3_40.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:39f3d6a88545a6aa5a491913ac5af27a9c0aba2276062e6376c8be05309a7c80
3
+ size 1836
nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_4_40.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1dd23da6b46ad09b7fdc86505a0ba48be942322ff50d967d1c16bd33c56bc7a3
3
+ size 1836
nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_5_40.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8772e4f8110d89ff0ac37552b3dac575065087e6b0a0f69c4a8054549c680e0d
3
+ size 1836
nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_6_40.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c722b9c8e0679d87daa0b008640f08bdfccc537ab77a6a11cbf4b9e9498a523d
3
+ size 1836
nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_7_40.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:84c9e777d0f03a9203899cb8758943583231673128008c46746df755d1c1f322
3
+ size 1836
nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_8_40.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e8d12a059e6eda98124ae0f27507555a52837a3d0d3eebc0832c0f66b43bb40e
3
+ size 1836
nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_9_40.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b01b5db9a41b2281b5568e51ec4c00a7698a5df72519da3031100e2867183979
3
+ size 1836
nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1.layer_norm_weight/.zarray ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b3916ec779470f49c2d4157b668b7ad1708b67fc2abd42bdad82a8f7e1ba84e7
3
+ size 230
nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1.layer_norm_weight/0.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:209fe6bf34d3be1bb81511f0372ef79835aa0b39c1df70bd68c12c54a19980aa
3
+ size 10240