Add model files
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +1 -0
- nemo_model/740dedcfac234904a4c5ed5ceb07d686_tokenizer.model +3 -0
- nemo_model/8da13ec9455c435fa215279cba8b8214_tokenizer.model +3 -0
- nemo_model/model_config.yaml +122 -0
- nemo_model/model_weights/common.pt +3 -0
- nemo_model/model_weights/metadata.json +3 -0
- nemo_model/model_weights/model.decoder.final_layernorm.weight/.zarray +3 -0
- nemo_model/model_weights/model.decoder.final_layernorm.weight/0 +3 -0
- nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_0_40.pt +3 -0
- nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_10_40.pt +3 -0
- nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_11_40.pt +3 -0
- nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_12_40.pt +3 -0
- nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_13_40.pt +3 -0
- nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_14_40.pt +3 -0
- nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_15_40.pt +3 -0
- nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_16_40.pt +3 -0
- nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_17_40.pt +3 -0
- nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_18_40.pt +3 -0
- nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_19_40.pt +3 -0
- nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_1_40.pt +3 -0
- nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_20_40.pt +3 -0
- nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_21_40.pt +3 -0
- nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_22_40.pt +3 -0
- nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_23_40.pt +3 -0
- nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_24_40.pt +3 -0
- nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_25_40.pt +3 -0
- nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_26_40.pt +3 -0
- nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_27_40.pt +3 -0
- nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_28_40.pt +3 -0
- nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_29_40.pt +3 -0
- nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_2_40.pt +3 -0
- nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_30_40.pt +3 -0
- nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_31_40.pt +3 -0
- nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_32_40.pt +3 -0
- nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_33_40.pt +3 -0
- nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_34_40.pt +3 -0
- nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_35_40.pt +3 -0
- nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_36_40.pt +3 -0
- nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_37_40.pt +3 -0
- nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_38_40.pt +3 -0
- nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_39_40.pt +3 -0
- nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_3_40.pt +3 -0
- nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_4_40.pt +3 -0
- nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_5_40.pt +3 -0
- nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_6_40.pt +3 -0
- nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_7_40.pt +3 -0
- nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_8_40.pt +3 -0
- nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_9_40.pt +3 -0
- nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1.layer_norm_weight/.zarray +3 -0
- nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1.layer_norm_weight/0.0 +3 -0
.gitattributes
CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
nemo_model/model_weights/** filter=lfs diff=lfs merge=lfs -text
|
nemo_model/740dedcfac234904a4c5ed5ceb07d686_tokenizer.model
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
|
3 |
+
size 499723
|
nemo_model/8da13ec9455c435fa215279cba8b8214_tokenizer.model
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
|
3 |
+
size 499723
|
nemo_model/model_config.yaml
ADDED
@@ -0,0 +1,122 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
mcore_gpt: true
|
2 |
+
micro_batch_size: 4
|
3 |
+
global_batch_size: 8
|
4 |
+
tensor_model_parallel_size: 1
|
5 |
+
pipeline_model_parallel_size: 1
|
6 |
+
virtual_pipeline_model_parallel_size: null
|
7 |
+
encoder_seq_length: 16384
|
8 |
+
max_position_embeddings: 16384
|
9 |
+
num_layers: 40
|
10 |
+
hidden_size: 5120
|
11 |
+
ffn_hidden_size: 13824
|
12 |
+
num_attention_heads: 40
|
13 |
+
init_method_std: 0.02
|
14 |
+
use_scaled_init_method: true
|
15 |
+
hidden_dropout: 0.0
|
16 |
+
attention_dropout: 0.0
|
17 |
+
ffn_dropout: 0.0
|
18 |
+
kv_channels: null
|
19 |
+
apply_query_key_layer_scaling: true
|
20 |
+
normalization: rmsnorm
|
21 |
+
layernorm_epsilon: 1.0e-05
|
22 |
+
do_layer_norm_weight_decay: false
|
23 |
+
make_vocab_size_divisible_by: 128
|
24 |
+
pre_process: true
|
25 |
+
post_process: true
|
26 |
+
persist_layer_norm: true
|
27 |
+
bias: false
|
28 |
+
activation: fast-swiglu
|
29 |
+
headscale: false
|
30 |
+
transformer_block_type: pre_ln
|
31 |
+
openai_gelu: false
|
32 |
+
normalize_attention_scores: true
|
33 |
+
position_embedding_type: rope
|
34 |
+
rotary_percentage: 1.0
|
35 |
+
attention_type: multihead
|
36 |
+
share_embeddings_and_output_weights: false
|
37 |
+
overlap_p2p_comm: false
|
38 |
+
batch_p2p_comm: true
|
39 |
+
num_query_groups: 40
|
40 |
+
tokenizer:
|
41 |
+
library: sentencepiece
|
42 |
+
type: null
|
43 |
+
model: nemo:8da13ec9455c435fa215279cba8b8214_tokenizer.model
|
44 |
+
vocab_file: null
|
45 |
+
merge_file: null
|
46 |
+
delimiter: null
|
47 |
+
sentencepiece_legacy: false
|
48 |
+
tokenizer_model: nemo:740dedcfac234904a4c5ed5ceb07d686_tokenizer.model
|
49 |
+
native_amp_init_scale: 4294967296
|
50 |
+
native_amp_growth_interval: 1000
|
51 |
+
hysteresis: 2
|
52 |
+
fp32_residual_connection: false
|
53 |
+
fp16_lm_cross_entropy: false
|
54 |
+
megatron_amp_O2: false
|
55 |
+
grad_allreduce_chunk_size_mb: 125
|
56 |
+
grad_div_ar_fusion: true
|
57 |
+
gradient_accumulation_fusion: false
|
58 |
+
bias_activation_fusion: false
|
59 |
+
bias_dropout_add_fusion: false
|
60 |
+
masked_softmax_fusion: true
|
61 |
+
get_attention_mask_from_fusion: true
|
62 |
+
seed: 1234
|
63 |
+
resume_from_checkpoint: null
|
64 |
+
use_cpu_initialization: false
|
65 |
+
onnx_safe: false
|
66 |
+
apex_transformer_log_level: 30
|
67 |
+
gradient_as_bucket_view: true
|
68 |
+
sync_batch_comm: false
|
69 |
+
activations_checkpoint_granularity: null
|
70 |
+
activations_checkpoint_method: null
|
71 |
+
activations_checkpoint_num_layers: null
|
72 |
+
num_micro_batches_with_partial_activation_checkpoints: null
|
73 |
+
activations_checkpoint_layers_per_pipeline: null
|
74 |
+
sequence_parallel: false
|
75 |
+
transformer_engine: true
|
76 |
+
fp8: false
|
77 |
+
fp8_e4m3: false
|
78 |
+
fp8_hybrid: false
|
79 |
+
fp8_margin: 0
|
80 |
+
fp8_interval: 1
|
81 |
+
fp8_amax_history_len: 1
|
82 |
+
fp8_amax_compute_algo: most_recent
|
83 |
+
reduce_amax: true
|
84 |
+
use_emha: false
|
85 |
+
data:
|
86 |
+
index_mapping_dir: null
|
87 |
+
data_impl: mmap
|
88 |
+
splits_string: 900,50,50
|
89 |
+
seq_length: 16384
|
90 |
+
skip_warmup: true
|
91 |
+
num_workers: 2
|
92 |
+
dataloader_type: single
|
93 |
+
reset_position_ids: false
|
94 |
+
reset_attention_mask: false
|
95 |
+
eod_mask_loss: false
|
96 |
+
validation_drop_last: true
|
97 |
+
no_seqlen_plus_one_input_tokens: false
|
98 |
+
pad_samples_to_global_batch_size: false
|
99 |
+
shuffle_documents: true
|
100 |
+
nsys_profile:
|
101 |
+
enabled: false
|
102 |
+
start_step: 10
|
103 |
+
end_step: 10
|
104 |
+
ranks:
|
105 |
+
- 0
|
106 |
+
gen_shape: false
|
107 |
+
optim:
|
108 |
+
name: fused_adam
|
109 |
+
lr: 0.0002
|
110 |
+
weight_decay: 0.01
|
111 |
+
betas:
|
112 |
+
- 0.9
|
113 |
+
- 0.98
|
114 |
+
sched:
|
115 |
+
name: CosineAnnealing
|
116 |
+
warmup_steps: 500
|
117 |
+
constant_steps: 50000
|
118 |
+
min_lr: 2.0e-05
|
119 |
+
precision: bf16
|
120 |
+
target: nemo.collections.nlp.models.language_modeling.megatron_gpt_model.MegatronGPTModel
|
121 |
+
nemo_version: 1.21.0
|
122 |
+
rotary_base: 1000000
|
nemo_model/model_weights/common.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f76a0eb3ecbf990b3c170880671cb68aca95f8190998b84aea67237d679ac15d
|
3 |
+
size 24013
|
nemo_model/model_weights/metadata.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f502ab47c9b1fe00615afb88074d49cf6f9b4c85f482734543160c5cb2f76af9
|
3 |
+
size 113
|
nemo_model/model_weights/model.decoder.final_layernorm.weight/.zarray
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:12b714654004b7f6ba8848aeb7af681a4e5147e49a6d59c08a1d5268dc8dd7f5
|
3 |
+
size 207
|
nemo_model/model_weights/model.decoder.final_layernorm.weight/0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e9dd801dacfcc73069c3beca7253986dd52df10bb31aaf17304f4be4424c238a
|
3 |
+
size 10240
|
nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_0_40.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:31c40dd0287f1a27458af09bb74b29153cac5444aa1bd2f4cce0d83294c6cc80
|
3 |
+
size 1836
|
nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_10_40.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ced1aad5f43ff19604b3887d7f489cecd16310155b1515bda0f15d1a1a279ff5
|
3 |
+
size 1840
|
nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_11_40.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:60c4e29a8324371cc5b87e502911ed070eea041c0e1bf965f9df21b1b0d9567a
|
3 |
+
size 1840
|
nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_12_40.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f13452b9d3aefa72c5b3778cf49ed98919489239f9608f06a3e08276b3c084f4
|
3 |
+
size 1840
|
nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_13_40.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8e62fd54a0c84dda40ba327a03010ad974965f12b9e73a7f337449e1b0480ee2
|
3 |
+
size 1840
|
nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_14_40.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fd8297dbbe8697c5c1af84e605e02903ce287d0db4c57003723cbe8e1d0216b0
|
3 |
+
size 1840
|
nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_15_40.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2c2eb0a283068b8b53638406aa4aad7177e9e020b94998675fca6e772e9bc8ab
|
3 |
+
size 1840
|
nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_16_40.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b120e3c7ce9d6add97c76e06320a69b75d73965046300ba783513a52bf3dc59c
|
3 |
+
size 1840
|
nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_17_40.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:46557afd4c478d8b95b004f2b98a8ae73ea48e2eeef0035b2c94edbf0b5e4da0
|
3 |
+
size 1840
|
nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_18_40.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b511ca88f8ca1a07ce92cbf8fce312bafca62e0c9cffbcc59c4b100aed67bc27
|
3 |
+
size 1840
|
nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_19_40.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c056d48650655c18add9a3fa57dc8cb49cff869d80439682a2d8bce099048867
|
3 |
+
size 1840
|
nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_1_40.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:71d52f0bdf8a6a6dd2d9b98000e8d7edccb7aaf522ec3215b0ef13d7057f99cf
|
3 |
+
size 1836
|
nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_20_40.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:91a75c59b219bc3fe062de79103d7c81dc110cfb35630efe65128dc7442f0ea1
|
3 |
+
size 1840
|
nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_21_40.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ad583e4785f3058325b56414dd384d14d7ae9ee26e71e225b33d70b6bea65249
|
3 |
+
size 1840
|
nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_22_40.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a167b3ab9467519e5172d0325288049acc57f8dedde9b3ff5744eee67d19cf0d
|
3 |
+
size 1840
|
nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_23_40.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4ad3f50e0b8200294e0cd7df2744327fd5a1fb1c71a828bff372411194bead0b
|
3 |
+
size 1840
|
nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_24_40.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e28818a6e54608f2e37819ebcac34fe7875bb3991e74ccb63db0edff7c74e442
|
3 |
+
size 1840
|
nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_25_40.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:351c2700526ef617d44dbb51648b4944e2962ff557b7542450a27cc5a366150c
|
3 |
+
size 1840
|
nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_26_40.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:99a49c7263b7e1910f1501ebfe94f22c66bf21f4b260db044201348a14f2ea0d
|
3 |
+
size 1840
|
nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_27_40.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:045c3b3a90c971f41db7e30ed0055a23cc8f754bc0e795e8c66712acdcc3c599
|
3 |
+
size 1840
|
nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_28_40.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3ce179735c7fde2f2a68b2fb4a771f27cc07895e036f4d369129372775edd614
|
3 |
+
size 1840
|
nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_29_40.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b97cf35cc732b1d32ac528d6de7ab26a8d6eb6bc658936045bbb22e75d1518c9
|
3 |
+
size 1840
|
nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_2_40.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3046f1273a22949255fdf9e4122eca82022c620a497e9d7552da9b331076be6b
|
3 |
+
size 1836
|
nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_30_40.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:487bae520c2c087535facd524d2c50bdee0f835f91493645ed520004b765002e
|
3 |
+
size 1840
|
nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_31_40.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1ed3bec1cdb5d795d2e5c5670ae8ad6dced1f52b51b522f8f5e96ddc2350a3aa
|
3 |
+
size 1840
|
nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_32_40.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5ea3d93c1a486aa715b926b15312d6ebb5d86105ad976ee7dca467db07d3c0dc
|
3 |
+
size 1840
|
nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_33_40.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:616737c0a2ccaba7edd3265ce1583b12086e0ff1a452322c4b0afd403f588887
|
3 |
+
size 1840
|
nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_34_40.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b88de03d76fe635a139ea4269c60f8b40be5d6269fe55b987722f7dca2ff34ac
|
3 |
+
size 1840
|
nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_35_40.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:84ce38eeb065d81e732feb21df356657c4c88c1434ff090fbd686fc8070134d0
|
3 |
+
size 1840
|
nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_36_40.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d859d2add2b7ca45431a5176642e6adcac9b973dc0967473f9f4d542ecec0c1e
|
3 |
+
size 1840
|
nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_37_40.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f968f4f10dbdaa37dae351cf2ece23e13a199e3739cf047900d2483ae352077a
|
3 |
+
size 1840
|
nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_38_40.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e2d3005c75a40d5527b7bb9408f95d7a8d0d88bad90e3d27366ad39a4a74d577
|
3 |
+
size 1840
|
nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_39_40.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8385d2e77de59998e506312227cc05538e16161aff68ce5530e4f167aad6f924
|
3 |
+
size 1840
|
nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_3_40.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:39f3d6a88545a6aa5a491913ac5af27a9c0aba2276062e6376c8be05309a7c80
|
3 |
+
size 1836
|
nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_4_40.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1dd23da6b46ad09b7fdc86505a0ba48be942322ff50d967d1c16bd33c56bc7a3
|
3 |
+
size 1836
|
nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_5_40.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8772e4f8110d89ff0ac37552b3dac575065087e6b0a0f69c4a8054549c680e0d
|
3 |
+
size 1836
|
nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_6_40.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c722b9c8e0679d87daa0b008640f08bdfccc537ab77a6a11cbf4b9e9498a523d
|
3 |
+
size 1836
|
nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_7_40.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:84c9e777d0f03a9203899cb8758943583231673128008c46746df755d1c1f322
|
3 |
+
size 1836
|
nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_8_40.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e8d12a059e6eda98124ae0f27507555a52837a3d0d3eebc0832c0f66b43bb40e
|
3 |
+
size 1836
|
nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1._extra_state/shard_9_40.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b01b5db9a41b2281b5568e51ec4c00a7698a5df72519da3031100e2867183979
|
3 |
+
size 1836
|
nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1.layer_norm_weight/.zarray
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b3916ec779470f49c2d4157b668b7ad1708b67fc2abd42bdad82a8f7e1ba84e7
|
3 |
+
size 230
|
nemo_model/model_weights/model.decoder.layers.mlp.linear_fc1.layer_norm_weight/0.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:209fe6bf34d3be1bb81511f0372ef79835aa0b39c1df70bd68c12c54a19980aa
|
3 |
+
size 10240
|