upload the 100k checkpoinmt
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- 100000/checkpoint_metadata.json +9 -0
- 100000/config.yaml +117 -0
- 100000/lr_scheduler/lr_scheduler.pt +3 -0
- 100000/model/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-2.safetensors +3 -0
- 100000/model/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-1-of-2.safetensors +3 -0
- 100000/model/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-2.safetensors +3 -0
- 100000/model/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-1-of-2.safetensors +3 -0
- 100000/model/model/decoder/0/pp_block/input_layernorm/model_weight.safetensors +3 -0
- 100000/model/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-2.safetensors +3 -0
- 100000/model/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-1-of-2.safetensors +3 -0
- 100000/model/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-2.safetensors +3 -0
- 100000/model/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-1-of-2.safetensors +3 -0
- 100000/model/model/decoder/0/pp_block/post_attention_layernorm/model_weight.safetensors +3 -0
- 100000/model/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-2.safetensors +3 -0
- 100000/model/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-1-of-2.safetensors +3 -0
- 100000/model/model/decoder/1/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-2.safetensors +3 -0
- 100000/model/model/decoder/1/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-1-of-2.safetensors +3 -0
- 100000/model/model/decoder/1/pp_block/input_layernorm/model_weight.safetensors +3 -0
- 100000/model/model/decoder/1/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-2.safetensors +3 -0
- 100000/model/model/decoder/1/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-1-of-2.safetensors +3 -0
- 100000/model/model/decoder/1/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-2.safetensors +3 -0
- 100000/model/model/decoder/1/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-1-of-2.safetensors +3 -0
- 100000/model/model/decoder/1/pp_block/post_attention_layernorm/model_weight.safetensors +3 -0
- 100000/model/model/decoder/2/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-2.safetensors +3 -0
- 100000/model/model/decoder/2/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-1-of-2.safetensors +3 -0
- 100000/model/model/decoder/2/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-2.safetensors +3 -0
- 100000/model/model/decoder/2/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-1-of-2.safetensors +3 -0
- 100000/model/model/decoder/2/pp_block/input_layernorm/model_weight.safetensors +3 -0
- 100000/model/model/decoder/2/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-2.safetensors +3 -0
- 100000/model/model/decoder/2/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-1-of-2.safetensors +3 -0
- 100000/model/model/decoder/2/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-2.safetensors +3 -0
- 100000/model/model/decoder/2/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-1-of-2.safetensors +3 -0
- 100000/model/model/decoder/2/pp_block/post_attention_layernorm/model_weight.safetensors +3 -0
- 100000/model/model/decoder/3/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-2.safetensors +3 -0
- 100000/model/model/decoder/3/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-1-of-2.safetensors +3 -0
- 100000/model/model/decoder/3/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-2.safetensors +3 -0
- 100000/model/model/decoder/3/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-1-of-2.safetensors +3 -0
- 100000/model/model/decoder/3/pp_block/input_layernorm/model_weight.safetensors +3 -0
- 100000/model/model/decoder/3/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-2.safetensors +3 -0
- 100000/model/model/decoder/3/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-1-of-2.safetensors +3 -0
- 100000/model/model/decoder/3/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-2.safetensors +3 -0
- 100000/model/model/decoder/3/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-1-of-2.safetensors +3 -0
- 100000/model/model/decoder/3/pp_block/post_attention_layernorm/model_weight.safetensors +3 -0
- 100000/model/model/decoder/4/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-2.safetensors +3 -0
- 100000/model/model/decoder/4/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-1-of-2.safetensors +3 -0
- 100000/model/model/decoder/4/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-2.safetensors +3 -0
- 100000/model/model/decoder/4/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-1-of-2.safetensors +3 -0
- 100000/model/model/decoder/4/pp_block/input_layernorm/model_weight.safetensors +3 -0
- 100000/model/model/decoder/4/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-2.safetensors +3 -0
- 100000/model/model/decoder/4/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-1-of-2.safetensors +3 -0
100000/checkpoint_metadata.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"dp": 16,
|
3 |
+
"metas": {
|
4 |
+
"consumed_train_samples": 51200000,
|
5 |
+
"last_train_step": 100000
|
6 |
+
},
|
7 |
+
"tp": 2,
|
8 |
+
"version": "1.2"
|
9 |
+
}
|
100000/config.yaml
ADDED
@@ -0,0 +1,117 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
checkpoints:
|
2 |
+
checkpoint_interval: 1000
|
3 |
+
checkpoints_path: /fsx/phuc/checkpoints/doremi/big-run-02/refrence-280m-llama
|
4 |
+
checkpoints_path_is_shared_file_system: true
|
5 |
+
resume_checkpoint_path: null
|
6 |
+
save_initial_state: false
|
7 |
+
data:
|
8 |
+
dataset:
|
9 |
+
dataset_overwrite_cache: false
|
10 |
+
dataset_processing_num_proc_per_process: 1
|
11 |
+
hf_dataset_config_name: null
|
12 |
+
hf_dataset_or_datasets: /fsx/phuc/project_data/doremi/datasets/the_pile_raw/tokenized_data/train
|
13 |
+
hf_dataset_splits: train
|
14 |
+
text_column_name: text
|
15 |
+
num_loading_workers: 1
|
16 |
+
seed: 42
|
17 |
+
doremi:
|
18 |
+
domain_names:
|
19 |
+
- Pile-CC
|
20 |
+
- Github
|
21 |
+
- OpenWebText2
|
22 |
+
- StackExchange
|
23 |
+
- Wikipedia (en)
|
24 |
+
- PubMed Abstracts
|
25 |
+
- USPTO Backgrounds
|
26 |
+
- FreeLaw
|
27 |
+
- PubMed Central
|
28 |
+
- Enron Emails
|
29 |
+
- HackerNews
|
30 |
+
- NIH ExPorter
|
31 |
+
- Books3
|
32 |
+
- ArXiv
|
33 |
+
- DM Mathematics
|
34 |
+
- OpenSubtitles
|
35 |
+
- Gutenberg (PG-19)
|
36 |
+
- Ubuntu IRC
|
37 |
+
- BookCorpus2
|
38 |
+
- EuroParl
|
39 |
+
- YoutubeSubtitles
|
40 |
+
- PhilPapers
|
41 |
+
domain_weights: null
|
42 |
+
ref_model_checkpoint_path: null
|
43 |
+
ref_model_resume_checkpoint_path: null
|
44 |
+
general:
|
45 |
+
benchmark_csv_path: null
|
46 |
+
consumed_train_samples: 51200000
|
47 |
+
ignore_sanity_checks: true
|
48 |
+
project: doremi
|
49 |
+
run: train_280m_reference_model
|
50 |
+
seed: 42
|
51 |
+
step: 100000
|
52 |
+
logging:
|
53 |
+
iteration_step_info_interval: 1
|
54 |
+
log_level: info
|
55 |
+
log_level_replica: info
|
56 |
+
model:
|
57 |
+
ddp_bucket_cap_mb: 120
|
58 |
+
dtype: bfloat16
|
59 |
+
init_method:
|
60 |
+
std: 0.025
|
61 |
+
make_vocab_size_divisible_by: 1
|
62 |
+
model_config:
|
63 |
+
bos_token_id: 1
|
64 |
+
eos_token_id: 2
|
65 |
+
hidden_act: silu
|
66 |
+
hidden_size: 1024
|
67 |
+
initializer_range: 0.02
|
68 |
+
intermediate_size: 4096
|
69 |
+
is_llama_config: true
|
70 |
+
max_position_embeddings: 1024
|
71 |
+
num_attention_heads: 8
|
72 |
+
num_hidden_layers: 10
|
73 |
+
num_key_value_heads: 4
|
74 |
+
pad_token_id: null
|
75 |
+
pretraining_tp: 1
|
76 |
+
rms_norm_eps: 1.0e-05
|
77 |
+
rope_scaling: null
|
78 |
+
tie_word_embeddings: true
|
79 |
+
use_cache: true
|
80 |
+
vocab_size: 49152
|
81 |
+
optimizer:
|
82 |
+
accumulate_grad_in_fp32: true
|
83 |
+
adam_beta1: 0.9
|
84 |
+
adam_beta2: 0.95
|
85 |
+
adam_eps: 1.0e-08
|
86 |
+
clip_grad: 1.0
|
87 |
+
learning_rate_scheduler:
|
88 |
+
learning_rate: 0.0003
|
89 |
+
lr_decay_steps: 8
|
90 |
+
lr_decay_style: cosine
|
91 |
+
lr_warmup_steps: 2
|
92 |
+
lr_warmup_style: linear
|
93 |
+
min_decay_lr: 1.0e-05
|
94 |
+
torch_adam_is_fused: true
|
95 |
+
weight_decay: 0.01
|
96 |
+
zero_stage: 0
|
97 |
+
parallelism:
|
98 |
+
dp: 16
|
99 |
+
pp: 1
|
100 |
+
pp_engine: 1f1b
|
101 |
+
recompute_granularity: SELECTIVE
|
102 |
+
tp: 2
|
103 |
+
tp_linear_async_communication: true
|
104 |
+
tp_mode: REDUCE_SCATTER
|
105 |
+
profiler: null
|
106 |
+
tokenizer:
|
107 |
+
tokenizer_max_length: null
|
108 |
+
tokenizer_name_or_path: gpt2
|
109 |
+
tokenizer_revision: null
|
110 |
+
tokens:
|
111 |
+
batch_accumulation_per_replica: 1
|
112 |
+
limit_test_batches: 0
|
113 |
+
limit_val_batches: 0
|
114 |
+
micro_batch_size: 32
|
115 |
+
sequence_length: 1024
|
116 |
+
train_steps: 100000
|
117 |
+
val_check_interval: -1
|
100000/lr_scheduler/lr_scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:85fa1779af5a5a8a5651de2b0227a581618cc5844ec96b8b8cd497d32769b6a7
|
3 |
+
size 1012
|
100000/model/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-2.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e56cfbc3153f1e497e3e8d949de9632b58c4a28aab9c2efa709058c0519397fe
|
3 |
+
size 1048808
|
100000/model/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-1-of-2.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1ef6bce8677b268be537cc138e5b63bc7cdeb96a6331161cb35e9a3384d5748d
|
3 |
+
size 1048816
|
100000/model/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-2.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2ad9892b59f2924c19a932ee1637085f530fdb6d80b8014bfc92153968256a09
|
3 |
+
size 2097504
|
100000/model/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-1-of-2.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:96397b06c214fad889d2e86f727e2cfe96ac414793c29754bcb4e4165ca2c47f
|
3 |
+
size 2097504
|
100000/model/model/decoder/0/pp_block/input_layernorm/model_weight.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:36221b994a324a2440a5cf5dd2f3a61b1607277ce9a2e8ff00d38e2f5a0cdc8b
|
3 |
+
size 2144
|
100000/model/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-2.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d893387a5d627d50e4a14ecf5e3481b19b807748e99e0c7780392cb3f996f308
|
3 |
+
size 4194544
|
100000/model/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-1-of-2.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f2932241feb0bd850fe5199e3e30e1aa4f57abea9bd2ecb7b6cfb4f73b4a9482
|
3 |
+
size 4194544
|
100000/model/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-2.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3801839e600f45a96f6facce0e6719419bdd899d5caaf7acef5044b22f14a677
|
3 |
+
size 8388904
|
100000/model/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-1-of-2.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f2477715f80bb9a92d5c872786b7ac7f21af270bc3e4f51e438a514e530ba33a
|
3 |
+
size 8388904
|
100000/model/model/decoder/0/pp_block/post_attention_layernorm/model_weight.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8fa0b601f22a1c25dd2a7fbdac15262cdc54e74c4206420150077da0061dd89b
|
3 |
+
size 2144
|
100000/model/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-2.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f48a01feab25dc47db8c862450fa54bbf31d272383dd798a96afbacd863790ff
|
3 |
+
size 1048808
|
100000/model/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-1-of-2.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:af9b9160557c15012ba27bd63c8679131487a85927140174e05087761636e922
|
3 |
+
size 1048816
|
100000/model/model/decoder/1/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-2.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:11c134bf805be3dac2b09526809f3b23346a85b1d291d9683a5b4934c942bd45
|
3 |
+
size 2097504
|
100000/model/model/decoder/1/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-1-of-2.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c50489b097403dc84dba1cfdacca387df048d1530d3721c8a0b1055f68bd264a
|
3 |
+
size 2097504
|
100000/model/model/decoder/1/pp_block/input_layernorm/model_weight.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1bbeec91d938818d3a9d6241f45edf69a289d9a2f0ba1a3c63ba9da967c3fdd1
|
3 |
+
size 2144
|
100000/model/model/decoder/1/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-2.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4d0cbbb269dbf76daea52818edf8879037bf32c45db3ed0bee34454d3f6a9aac
|
3 |
+
size 4194544
|
100000/model/model/decoder/1/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-1-of-2.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:67ec85ca44cc044f5e708ca09126de92efb5a7118df164ca7285152da9776f4b
|
3 |
+
size 4194544
|
100000/model/model/decoder/1/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-2.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:81f4d335a350af262155e4a68546f6e4bb19b6bbc0da36203421c5f881db4248
|
3 |
+
size 8388904
|
100000/model/model/decoder/1/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-1-of-2.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2af6b21f34b68dc30412ab76022072e082f487db0b5590c1c79ee1e9d5d1108a
|
3 |
+
size 8388904
|
100000/model/model/decoder/1/pp_block/post_attention_layernorm/model_weight.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d95c96f4d87b6d598dc6a539d7deddb016887f29f42f7f68f21846a797773359
|
3 |
+
size 2144
|
100000/model/model/decoder/2/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-2.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1b2c500e3998ab99b48ebb6e71eedd7ebf29ea1bc4eda60e982991b4aa004395
|
3 |
+
size 1048808
|
100000/model/model/decoder/2/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-1-of-2.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9099dd1c06edd9ee936a67545bedb13a7bba16abc87b4e00ea7a11d8544ff590
|
3 |
+
size 1048816
|
100000/model/model/decoder/2/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-2.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1e84acfc83ec3bb07bbf21f671bda9fe200e2eff6295df1ffd9481f90a8e2802
|
3 |
+
size 2097504
|
100000/model/model/decoder/2/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-1-of-2.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5e51754b3a4a3968cb92f8335b2cf5e151a3727618654dfc04a4c96843c522bf
|
3 |
+
size 2097504
|
100000/model/model/decoder/2/pp_block/input_layernorm/model_weight.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:69c036ef6a8596262091570f48cd439c75b602d16d3af57594ccbf2f16b993e1
|
3 |
+
size 2144
|
100000/model/model/decoder/2/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-2.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:66454f115d8208d76172bd631fe044233db4ea92f6c77e5db5261e83dbf8d77c
|
3 |
+
size 4194544
|
100000/model/model/decoder/2/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-1-of-2.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d4fdfcfc03a59ffbe00655a114bee4db16a4ac9165fc70fb97f5394bbeefa930
|
3 |
+
size 4194544
|
100000/model/model/decoder/2/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-2.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:298042c0b9267e4314d35c1bced11b2cff7082d0aba2fa761eb7da0cee772590
|
3 |
+
size 8388904
|
100000/model/model/decoder/2/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-1-of-2.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2ac0fff50f55e9a689d2bed2a45bf2329506cc80572078ad0a0913b73587be8f
|
3 |
+
size 8388904
|
100000/model/model/decoder/2/pp_block/post_attention_layernorm/model_weight.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4f034dca4d8d213d8d001b87230ba5a8ecf329de881b63712a35e27f618909a0
|
3 |
+
size 2144
|
100000/model/model/decoder/3/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-2.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:739055056da66f30fbfa0e17b2f40a5022b16ce79b978e7fefed8995be5b54fa
|
3 |
+
size 1048808
|
100000/model/model/decoder/3/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-1-of-2.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a8754fb91b13640ba81423fb3d18e44ecad6c09539c16f59e81717701b09a023
|
3 |
+
size 1048816
|
100000/model/model/decoder/3/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-2.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:98cb957be6d10f6a3fb61d86b8a3e8c65d00158bad31076a930201cfcc8d21db
|
3 |
+
size 2097504
|
100000/model/model/decoder/3/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-1-of-2.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1f4ddc32f94d55063bceadbb3eb8dbe88d4003fb3447dd371b64ea581d7f53fe
|
3 |
+
size 2097504
|
100000/model/model/decoder/3/pp_block/input_layernorm/model_weight.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9ae43e98223b7a4fab21af138e365176a2f75d2cf0c44ad1887d38df8708c240
|
3 |
+
size 2144
|
100000/model/model/decoder/3/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-2.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e638055de7a95192365e0dd53fe16d5f03e6fa59bb43c1c39c8520fe874d9c9e
|
3 |
+
size 4194544
|
100000/model/model/decoder/3/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-1-of-2.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f20eee6982e42ed281cf3f3f56351f546c406f28a3be490d7968a8c802a1a5a0
|
3 |
+
size 4194544
|
100000/model/model/decoder/3/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-2.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fe5f67d47cb28dc52bcad19701fac889724ed15c8c37310f41e281dfcc720bab
|
3 |
+
size 8388904
|
100000/model/model/decoder/3/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-1-of-2.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:eb690fba63dd3b538152c0a30171372da25f9ae34237454a69f2f68cb9262bbe
|
3 |
+
size 8388904
|
100000/model/model/decoder/3/pp_block/post_attention_layernorm/model_weight.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:37a9d5c0190aa437e46c169fb864f8c21e868447537a28590921373ea537b661
|
3 |
+
size 2144
|
100000/model/model/decoder/4/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-2.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:06e1fad6e4215e23a15c454064b0df68637b43e7ec3f74cbdc703ce170ceab3f
|
3 |
+
size 1048808
|
100000/model/model/decoder/4/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-1-of-2.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:102841695f5630b0d0ab035ec42058aa20a67a28a4e44ab83303539e16b23439
|
3 |
+
size 1048816
|
100000/model/model/decoder/4/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-2.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dbba131e86d7f0cfb98c27cd8c1fd73b2183d18d2326d25ebd5756b46e8ab37a
|
3 |
+
size 2097504
|
100000/model/model/decoder/4/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-1-of-2.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:37c6a6b084a70ac82b4b0bb035f5c1c4990ea4916f5dc1958247eb84cd945ef0
|
3 |
+
size 2097504
|
100000/model/model/decoder/4/pp_block/input_layernorm/model_weight.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:77de7ab0f88ff293d434dcc711cebaf136996b06c0b9afd9c3b5a94b8108a72f
|
3 |
+
size 2144
|
100000/model/model/decoder/4/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-2.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:81500b3a2f1311a782db5b449c70007109947edecfd713dccd368d6eb3010655
|
3 |
+
size 4194544
|
100000/model/model/decoder/4/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-1-of-2.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8ca7076e5227cd3067c18c5fa7a82e66a231d1a8195a2e1641d4e8d9e19ba9ef
|
3 |
+
size 4194544
|