add the 70k checkpoint
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- 70000/checkpoint_metadata.json +9 -0
- 70000/config.yaml +117 -0
- 70000/lr_scheduler/lr_scheduler.pt +3 -0
- 70000/model/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-8.safetensors +3 -0
- 70000/model/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-1-of-8.safetensors +3 -0
- 70000/model/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-2-of-8.safetensors +3 -0
- 70000/model/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-3-of-8.safetensors +3 -0
- 70000/model/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-4-of-8.safetensors +3 -0
- 70000/model/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-5-of-8.safetensors +3 -0
- 70000/model/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-6-of-8.safetensors +3 -0
- 70000/model/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-7-of-8.safetensors +3 -0
- 70000/model/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-8.safetensors +3 -0
- 70000/model/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-1-of-8.safetensors +3 -0
- 70000/model/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-2-of-8.safetensors +3 -0
- 70000/model/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-3-of-8.safetensors +3 -0
- 70000/model/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-4-of-8.safetensors +3 -0
- 70000/model/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-5-of-8.safetensors +3 -0
- 70000/model/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-6-of-8.safetensors +3 -0
- 70000/model/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-7-of-8.safetensors +3 -0
- 70000/model/model/decoder/0/pp_block/input_layernorm/model_weight.safetensors +3 -0
- 70000/model/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-8.safetensors +3 -0
- 70000/model/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-1-of-8.safetensors +3 -0
- 70000/model/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-2-of-8.safetensors +3 -0
- 70000/model/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-3-of-8.safetensors +3 -0
- 70000/model/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-4-of-8.safetensors +3 -0
- 70000/model/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-5-of-8.safetensors +3 -0
- 70000/model/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-6-of-8.safetensors +3 -0
- 70000/model/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-7-of-8.safetensors +3 -0
- 70000/model/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-8.safetensors +3 -0
- 70000/model/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-1-of-8.safetensors +3 -0
- 70000/model/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-2-of-8.safetensors +3 -0
- 70000/model/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-3-of-8.safetensors +3 -0
- 70000/model/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-4-of-8.safetensors +3 -0
- 70000/model/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-5-of-8.safetensors +3 -0
- 70000/model/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-6-of-8.safetensors +3 -0
- 70000/model/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-7-of-8.safetensors +3 -0
- 70000/model/model/decoder/0/pp_block/post_attention_layernorm/model_weight.safetensors +3 -0
- 70000/model/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-8.safetensors +3 -0
- 70000/model/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-1-of-8.safetensors +3 -0
- 70000/model/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-2-of-8.safetensors +3 -0
- 70000/model/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-3-of-8.safetensors +3 -0
- 70000/model/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-4-of-8.safetensors +3 -0
- 70000/model/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-5-of-8.safetensors +3 -0
- 70000/model/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-6-of-8.safetensors +3 -0
- 70000/model/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-7-of-8.safetensors +3 -0
- 70000/model/model/decoder/1/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-8.safetensors +3 -0
- 70000/model/model/decoder/1/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-1-of-8.safetensors +3 -0
- 70000/model/model/decoder/1/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-2-of-8.safetensors +3 -0
- 70000/model/model/decoder/1/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-3-of-8.safetensors +3 -0
- 70000/model/model/decoder/1/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-4-of-8.safetensors +3 -0
70000/checkpoint_metadata.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"dp": 8,
|
3 |
+
"metas": {
|
4 |
+
"consumed_train_samples": 35840000,
|
5 |
+
"last_train_step": 70000
|
6 |
+
},
|
7 |
+
"tp": 8,
|
8 |
+
"version": "1.2"
|
9 |
+
}
|
70000/config.yaml
ADDED
@@ -0,0 +1,117 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
checkpoints:
|
2 |
+
checkpoint_interval: 1000
|
3 |
+
checkpoints_path: /fsx/phuc/checkpoints/doremi/big-run-02/reference-2.8b-llama
|
4 |
+
checkpoints_path_is_shared_file_system: true
|
5 |
+
resume_checkpoint_path: null
|
6 |
+
save_initial_state: false
|
7 |
+
data:
|
8 |
+
dataset:
|
9 |
+
dataset_overwrite_cache: false
|
10 |
+
dataset_processing_num_proc_per_process: 1
|
11 |
+
hf_dataset_config_name: null
|
12 |
+
hf_dataset_or_datasets: /fsx/phuc/project_data/doremi/datasets/the_pile_raw/tokenized_data/train
|
13 |
+
hf_dataset_splits: train
|
14 |
+
text_column_name: text
|
15 |
+
num_loading_workers: 1
|
16 |
+
seed: 42
|
17 |
+
doremi:
|
18 |
+
domain_names:
|
19 |
+
- Pile-CC
|
20 |
+
- Github
|
21 |
+
- OpenWebText2
|
22 |
+
- StackExchange
|
23 |
+
- Wikipedia (en)
|
24 |
+
- PubMed Abstracts
|
25 |
+
- USPTO Backgrounds
|
26 |
+
- FreeLaw
|
27 |
+
- PubMed Central
|
28 |
+
- Enron Emails
|
29 |
+
- HackerNews
|
30 |
+
- NIH ExPorter
|
31 |
+
- Books3
|
32 |
+
- ArXiv
|
33 |
+
- DM Mathematics
|
34 |
+
- OpenSubtitles
|
35 |
+
- Gutenberg (PG-19)
|
36 |
+
- Ubuntu IRC
|
37 |
+
- BookCorpus2
|
38 |
+
- EuroParl
|
39 |
+
- YoutubeSubtitles
|
40 |
+
- PhilPapers
|
41 |
+
domain_weights: null
|
42 |
+
ref_model_checkpoint_path: null
|
43 |
+
ref_model_resume_checkpoint_path: null
|
44 |
+
general:
|
45 |
+
benchmark_csv_path: null
|
46 |
+
consumed_train_samples: 35840000
|
47 |
+
ignore_sanity_checks: true
|
48 |
+
project: nanotron
|
49 |
+
run: train_2.8b_llama_reference
|
50 |
+
seed: 42
|
51 |
+
step: 70000
|
52 |
+
logging:
|
53 |
+
iteration_step_info_interval: 1
|
54 |
+
log_level: info
|
55 |
+
log_level_replica: info
|
56 |
+
model:
|
57 |
+
ddp_bucket_cap_mb: 120
|
58 |
+
dtype: bfloat16
|
59 |
+
init_method:
|
60 |
+
std: 0.025
|
61 |
+
make_vocab_size_divisible_by: 1
|
62 |
+
model_config:
|
63 |
+
bos_token_id: 1
|
64 |
+
eos_token_id: 2
|
65 |
+
hidden_act: silu
|
66 |
+
hidden_size: 4096
|
67 |
+
initializer_range: 0.02
|
68 |
+
intermediate_size: 24576
|
69 |
+
is_llama_config: true
|
70 |
+
max_position_embeddings: 1024
|
71 |
+
num_attention_heads: 32
|
72 |
+
num_hidden_layers: 6
|
73 |
+
num_key_value_heads: 16
|
74 |
+
pad_token_id: null
|
75 |
+
pretraining_tp: 1
|
76 |
+
rms_norm_eps: 1.0e-05
|
77 |
+
rope_scaling: null
|
78 |
+
tie_word_embeddings: true
|
79 |
+
use_cache: true
|
80 |
+
vocab_size: 49152
|
81 |
+
optimizer:
|
82 |
+
accumulate_grad_in_fp32: true
|
83 |
+
adam_beta1: 0.9
|
84 |
+
adam_beta2: 0.95
|
85 |
+
adam_eps: 1.0e-08
|
86 |
+
clip_grad: 1.0
|
87 |
+
learning_rate_scheduler:
|
88 |
+
learning_rate: 0.0003
|
89 |
+
lr_decay_steps: 8
|
90 |
+
lr_decay_style: cosine
|
91 |
+
lr_warmup_steps: 2
|
92 |
+
lr_warmup_style: linear
|
93 |
+
min_decay_lr: 1.0e-05
|
94 |
+
torch_adam_is_fused: true
|
95 |
+
weight_decay: 0.01
|
96 |
+
zero_stage: 0
|
97 |
+
parallelism:
|
98 |
+
dp: 8
|
99 |
+
pp: 1
|
100 |
+
pp_engine: 1f1b
|
101 |
+
recompute_granularity: SELECTIVE
|
102 |
+
tp: 8
|
103 |
+
tp_linear_async_communication: true
|
104 |
+
tp_mode: REDUCE_SCATTER
|
105 |
+
profiler: null
|
106 |
+
tokenizer:
|
107 |
+
tokenizer_max_length: null
|
108 |
+
tokenizer_name_or_path: gpt2
|
109 |
+
tokenizer_revision: null
|
110 |
+
tokens:
|
111 |
+
batch_accumulation_per_replica: 1
|
112 |
+
limit_test_batches: 0
|
113 |
+
limit_val_batches: 1
|
114 |
+
micro_batch_size: 64
|
115 |
+
sequence_length: 1024
|
116 |
+
train_steps: 70000
|
117 |
+
val_check_interval: 2
|
70000/lr_scheduler/lr_scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f44f89c4642a0011361e62b7717a7243492b6f41d8aa83936b9c4e75cdab7cf4
|
3 |
+
size 1012
|
70000/model/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-8.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0abf19c86ca2f7cfaf21e332657ce4aa2bc48b39b2da96b6f4fbc378fb505688
|
3 |
+
size 4194536
|
70000/model/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-1-of-8.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e5fda12a56b61ffc1d0ecf3ff69da872e1dbe40c2b7754f33231762956f31b64
|
3 |
+
size 4194544
|
70000/model/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-2-of-8.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9d17c372d1d44a630949516686a3a2f534740d07e5df4ec0be616650702b601c
|
3 |
+
size 4194544
|
70000/model/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-3-of-8.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7dfce8f93074595eb78584c368c83b2ade33d6e51cba6ab49c2fb6c19dd7690a
|
3 |
+
size 4194544
|
70000/model/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-4-of-8.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c2d9aebab109f77c446ff1e499dbd0de77d3021ff1b08a0839abf4a7a42afd0a
|
3 |
+
size 4194544
|
70000/model/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-5-of-8.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:40dcad668f886d37fff57396b032e68893dad2dcba8c06b70cc215018bc88457
|
3 |
+
size 4194544
|
70000/model/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-6-of-8.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b7216b15c25c20ef7982567eb827a5ab55ad2e182e103c2165f3ba094160f647
|
3 |
+
size 4194544
|
70000/model/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-7-of-8.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1db77215815ef2e8f48b09a9d2c70878961cc6abceb2374b8e45dd128baa94bf
|
3 |
+
size 4194544
|
70000/model/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-8.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b90f90ae9b7a6b5e5b405cf05f8fa38129123da008cbe2a946daaa337123562c
|
3 |
+
size 8388960
|
70000/model/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-1-of-8.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:59798fb4200a88427d3d180c86c8fc9c86d480e7cfff68ba8ce2ab81c925e3b7
|
3 |
+
size 8388960
|
70000/model/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-2-of-8.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:356a6d4f5f0c79fc5fd25388898d7003d041fb2f4ea68ae258961ae19295d556
|
3 |
+
size 8388960
|
70000/model/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-3-of-8.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:be4980dcf2b1e901bb89bb493916c5b6feb4fd893628933ba9137939ad90a36d
|
3 |
+
size 8388960
|
70000/model/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-4-of-8.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:21a53e1670bc13f3cfbba60b81b8e743f5ffe9c33733d3e01675f86a8e0d5966
|
3 |
+
size 8388960
|
70000/model/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-5-of-8.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8a3d53157ad38bd236ba0a5df55c81500d581c62f529a724a1e5a5a9b2ab1d07
|
3 |
+
size 8388960
|
70000/model/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-6-of-8.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:438373dc72a984ba6c2692b571a8f20a0ed066dde0dc35a7f4615fa987917a63
|
3 |
+
size 8388960
|
70000/model/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-7-of-8.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3acadfaf19b33d24ca07555ac6b81825bc9ae087dacbe426ba43f2c6ebcdb200
|
3 |
+
size 8388960
|
70000/model/model/decoder/0/pp_block/input_layernorm/model_weight.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:774ad370a6b80a9cf09dd458cbe7b76161038158a897388f61c799d49eb181f1
|
3 |
+
size 8288
|
70000/model/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-8.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f3dcc62fa20a5df18c54a6b0a3ecd79b7b70bb8131e96443fd6125ead3173b00
|
3 |
+
size 25166064
|
70000/model/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-1-of-8.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:01b833cecf89636acebc108f7aafc5be7778d330cc9fc5cedd070f2f6a41181c
|
3 |
+
size 25166064
|
70000/model/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-2-of-8.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:01b7785b5ebc0c79acf9a2f6e3eddba90d3f853f7ddb713aba37e38c9b0cdbe0
|
3 |
+
size 25166064
|
70000/model/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-3-of-8.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e9dda633c265872a9b669c282b5fcc07d0cbb36791878174e27ccd0186535800
|
3 |
+
size 25166064
|
70000/model/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-4-of-8.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cb08aeecd28f8354315bfeac041db8ba48cf8c62b05736037574307c5521f261
|
3 |
+
size 25166064
|
70000/model/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-5-of-8.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b96a0d464d05de9e6b1fd9bbc815f7916dce982ca134bd63b0c8e77473fa5278
|
3 |
+
size 25166064
|
70000/model/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-6-of-8.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:612e384536e2baaf39b56dbe05c0033f1cff456ba86a3e97f9a1bcb999ed5f66
|
3 |
+
size 25166064
|
70000/model/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-7-of-8.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b2410ebb49b0c40efed0d6bdfdc431e18b80e220c8b462e850d884b395615372
|
3 |
+
size 25166064
|
70000/model/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-8.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ce9baec6cb9ef9ca5030ca644d43e432a8bdbf33dfa083eec0f3df4896d2eadf
|
3 |
+
size 50331944
|
70000/model/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-1-of-8.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d532f1a432a703eabe9240a32842fb20382d68dfd25e42a5de2a3f265b1e467a
|
3 |
+
size 50331952
|
70000/model/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-2-of-8.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:882868708d235ced2ef99dd6287d7550ad15e017602277a9b34f8f671cf33aa8
|
3 |
+
size 50331952
|
70000/model/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-3-of-8.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:430ac7107dd22e67603f019646bcf0e9bac7718ccf23a6322fe36de5da7861c6
|
3 |
+
size 50331952
|
70000/model/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-4-of-8.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2486a1e8fba32ac068f82fd5533070fd75fa86ef16c01b5ecc6f80f074fe0d23
|
3 |
+
size 50331952
|
70000/model/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-5-of-8.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e5529e52c6546f35f6ff7c06e8dc13dfc516daa6d2042f95410ab145d137da06
|
3 |
+
size 50331952
|
70000/model/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-6-of-8.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:625a133fc1cb2a40398e66ac12b1eff18858a6bc31c915e5b815055ee7ab9a1f
|
3 |
+
size 50331952
|
70000/model/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-7-of-8.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:edcd885402490c7d3e648a5f8c258ae56bc0fce4dc6ba2ebcf1bc9307f490a5f
|
3 |
+
size 50331952
|
70000/model/model/decoder/0/pp_block/post_attention_layernorm/model_weight.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:84d18f18bc4ef6f53c78eb2fb30302e7ebf04201488a9371f42d6cf457e148e4
|
3 |
+
size 8288
|
70000/model/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-8.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3703978be2a58536e1003809b8939732e238ed14efbe2df98d9dab3b215def3a
|
3 |
+
size 4194536
|
70000/model/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-1-of-8.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:98e2e6893d73e7598f9d359d7be94ac7ba80abca894e96097281fc6652c5b167
|
3 |
+
size 4194544
|
70000/model/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-2-of-8.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4d4ef5bc53e2c1360e82d506024449f29002e25d223810a5d5093a794d8b3295
|
3 |
+
size 4194544
|
70000/model/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-3-of-8.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ee42f765ae98cf907ed8408f24aed93f89135d43f2378fbf29a807f7c89bb24a
|
3 |
+
size 4194544
|
70000/model/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-4-of-8.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2b2377adcc2197810ca5ca6745cf28a917f9a1305cd3dd111b41643ca1d96342
|
3 |
+
size 4194544
|
70000/model/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-5-of-8.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3c84163236fb84904dcdcfd571923c051b8503cac08f860db756d37039590d95
|
3 |
+
size 4194544
|
70000/model/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-6-of-8.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0181f25bf6752b326eb895c32bf875f9ec40719f282bac1a5b23258b458388a0
|
3 |
+
size 4194544
|
70000/model/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-7-of-8.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a0393ee44d4a634243e155c08817020e02ef6ab8a776d6814a5373ff91a9cf4f
|
3 |
+
size 4194544
|
70000/model/model/decoder/1/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-8.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:70c09c40d56b84709b1af4fa8e6ae3f7c33983410a92182bf7a6b1f598fccff3
|
3 |
+
size 8388960
|
70000/model/model/decoder/1/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-1-of-8.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bb2f1055fc26ba14e6432f25d0a50cef46625ee2e9363d779046e66eac6c142e
|
3 |
+
size 8388960
|
70000/model/model/decoder/1/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-2-of-8.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2959d35e64183a1da7890043fbf62e5e24fe9111380921d807d4c165963294d8
|
3 |
+
size 8388960
|
70000/model/model/decoder/1/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-3-of-8.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5028f5993b200c829b51a8ea5dca396fdb2847d1a9b25dcb6e72f6e961849989
|
3 |
+
size 8388960
|
70000/model/model/decoder/1/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-4-of-8.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d76a9f4a64d9d6e6377d0aba28db56286cc8a294d7ddb84134b9b8cf45fcf6d8
|
3 |
+
size 8388960
|