neuralink commited on
Commit
1361151
1 Parent(s): 0a1724c

add the 70k checkpoint

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. 70000/checkpoint_metadata.json +9 -0
  2. 70000/config.yaml +139 -0
  3. 70000/lr_scheduler/lr_scheduler.pt +3 -0
  4. 70000/model/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-8.safetensors +3 -0
  5. 70000/model/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-1-of-8.safetensors +3 -0
  6. 70000/model/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-2-of-8.safetensors +3 -0
  7. 70000/model/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-3-of-8.safetensors +3 -0
  8. 70000/model/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-4-of-8.safetensors +3 -0
  9. 70000/model/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-5-of-8.safetensors +3 -0
  10. 70000/model/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-6-of-8.safetensors +3 -0
  11. 70000/model/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-7-of-8.safetensors +3 -0
  12. 70000/model/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-8.safetensors +3 -0
  13. 70000/model/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-1-of-8.safetensors +3 -0
  14. 70000/model/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-2-of-8.safetensors +3 -0
  15. 70000/model/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-3-of-8.safetensors +3 -0
  16. 70000/model/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-4-of-8.safetensors +3 -0
  17. 70000/model/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-5-of-8.safetensors +3 -0
  18. 70000/model/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-6-of-8.safetensors +3 -0
  19. 70000/model/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-7-of-8.safetensors +3 -0
  20. 70000/model/model/decoder/0/pp_block/input_layernorm/model_weight.safetensors +3 -0
  21. 70000/model/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-8.safetensors +3 -0
  22. 70000/model/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-1-of-8.safetensors +3 -0
  23. 70000/model/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-2-of-8.safetensors +3 -0
  24. 70000/model/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-3-of-8.safetensors +3 -0
  25. 70000/model/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-4-of-8.safetensors +3 -0
  26. 70000/model/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-5-of-8.safetensors +3 -0
  27. 70000/model/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-6-of-8.safetensors +3 -0
  28. 70000/model/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-7-of-8.safetensors +3 -0
  29. 70000/model/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-8.safetensors +3 -0
  30. 70000/model/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-1-of-8.safetensors +3 -0
  31. 70000/model/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-2-of-8.safetensors +3 -0
  32. 70000/model/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-3-of-8.safetensors +3 -0
  33. 70000/model/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-4-of-8.safetensors +3 -0
  34. 70000/model/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-5-of-8.safetensors +3 -0
  35. 70000/model/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-6-of-8.safetensors +3 -0
  36. 70000/model/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-7-of-8.safetensors +3 -0
  37. 70000/model/model/decoder/0/pp_block/post_attention_layernorm/model_weight.safetensors +3 -0
  38. 70000/model/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-8.safetensors +3 -0
  39. 70000/model/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-1-of-8.safetensors +3 -0
  40. 70000/model/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-2-of-8.safetensors +3 -0
  41. 70000/model/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-3-of-8.safetensors +3 -0
  42. 70000/model/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-4-of-8.safetensors +3 -0
  43. 70000/model/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-5-of-8.safetensors +3 -0
  44. 70000/model/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-6-of-8.safetensors +3 -0
  45. 70000/model/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-7-of-8.safetensors +3 -0
  46. 70000/model/model/decoder/1/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-8.safetensors +3 -0
  47. 70000/model/model/decoder/1/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-1-of-8.safetensors +3 -0
  48. 70000/model/model/decoder/1/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-2-of-8.safetensors +3 -0
  49. 70000/model/model/decoder/1/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-3-of-8.safetensors +3 -0
  50. 70000/model/model/decoder/1/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-4-of-8.safetensors +3 -0
70000/checkpoint_metadata.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dp": 8,
3
+ "metas": {
4
+ "consumed_train_samples": 35840000,
5
+ "last_train_step": 70000
6
+ },
7
+ "tp": 8,
8
+ "version": "1.2"
9
+ }
70000/config.yaml ADDED
@@ -0,0 +1,139 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ checkpoints:
2
+ checkpoint_interval: 5000
3
+ checkpoints_path: /fsx/phuc/checkpoints/doremi/big-run-02/reference-2.8b-llama-tuned-weights_with_100k_proxy
4
+ checkpoints_path_is_shared_file_system: true
5
+ resume_checkpoint_path: null
6
+ save_initial_state: false
7
+ data:
8
+ dataset:
9
+ dataset_overwrite_cache: false
10
+ dataset_processing_num_proc_per_process: 1
11
+ hf_dataset_config_name: null
12
+ hf_dataset_or_datasets: /fsx/phuc/project_data/doremi/datasets/the_pile_raw/tokenized_data/train
13
+ hf_dataset_splits: train
14
+ text_column_name: text
15
+ num_loading_workers: 1
16
+ seed: 42
17
+ doremi:
18
+ domain_names:
19
+ - Pile-CC
20
+ - Github
21
+ - OpenWebText2
22
+ - StackExchange
23
+ - Wikipedia (en)
24
+ - PubMed Abstracts
25
+ - USPTO Backgrounds
26
+ - FreeLaw
27
+ - PubMed Central
28
+ - Enron Emails
29
+ - HackerNews
30
+ - NIH ExPorter
31
+ - Books3
32
+ - ArXiv
33
+ - DM Mathematics
34
+ - OpenSubtitles
35
+ - Gutenberg (PG-19)
36
+ - Ubuntu IRC
37
+ - BookCorpus2
38
+ - EuroParl
39
+ - YoutubeSubtitles
40
+ - PhilPapers
41
+ domain_weights:
42
+ - 0.2333
43
+ - 0.07
44
+ - 0.1154
45
+ - 0.0528
46
+ - 0.0665
47
+ - 0.067
48
+ - 0.0366
49
+ - 0.0571
50
+ - 0.0451
51
+ - 0.0036
52
+ - 0.0087
53
+ - 0.0078
54
+ - 0.0708
55
+ - 0.0656
56
+ - 0.0034
57
+ - 0.0048
58
+ - 0.0222
59
+ - 0.0084
60
+ - 0.0038
61
+ - 0.0186
62
+ - 0.0149
63
+ - 0.0235
64
+ ref_model_checkpoint_path: null
65
+ ref_model_resume_checkpoint_path: null
66
+ general:
67
+ benchmark_csv_path: null
68
+ consumed_train_samples: 35840000
69
+ ignore_sanity_checks: true
70
+ project: nanotron
71
+ run: train_tuned_2.8b_model
72
+ seed: 42
73
+ step: 70000
74
+ logging:
75
+ iteration_step_info_interval: 1
76
+ log_level: info
77
+ log_level_replica: info
78
+ model:
79
+ ddp_bucket_cap_mb: 120
80
+ dtype: bfloat16
81
+ init_method:
82
+ std: 0.025
83
+ make_vocab_size_divisible_by: 1
84
+ model_config:
85
+ bos_token_id: 1
86
+ eos_token_id: 2
87
+ hidden_act: silu
88
+ hidden_size: 4096
89
+ initializer_range: 0.02
90
+ intermediate_size: 24576
91
+ is_llama_config: true
92
+ max_position_embeddings: 1024
93
+ num_attention_heads: 32
94
+ num_hidden_layers: 6
95
+ num_key_value_heads: 16
96
+ pad_token_id: null
97
+ pretraining_tp: 1
98
+ rms_norm_eps: 1.0e-05
99
+ rope_scaling: null
100
+ tie_word_embeddings: true
101
+ use_cache: true
102
+ vocab_size: 49152
103
+ optimizer:
104
+ accumulate_grad_in_fp32: true
105
+ adam_beta1: 0.9
106
+ adam_beta2: 0.95
107
+ adam_eps: 1.0e-08
108
+ clip_grad: 1.0
109
+ learning_rate_scheduler:
110
+ learning_rate: 0.0003
111
+ lr_decay_steps: 8
112
+ lr_decay_style: cosine
113
+ lr_warmup_steps: 2
114
+ lr_warmup_style: linear
115
+ min_decay_lr: 1.0e-05
116
+ torch_adam_is_fused: true
117
+ weight_decay: 0.01
118
+ zero_stage: 0
119
+ parallelism:
120
+ dp: 8
121
+ pp: 1
122
+ pp_engine: 1f1b
123
+ recompute_granularity: SELECTIVE
124
+ tp: 8
125
+ tp_linear_async_communication: true
126
+ tp_mode: REDUCE_SCATTER
127
+ profiler: null
128
+ tokenizer:
129
+ tokenizer_max_length: null
130
+ tokenizer_name_or_path: gpt2
131
+ tokenizer_revision: null
132
+ tokens:
133
+ batch_accumulation_per_replica: 1
134
+ limit_test_batches: 0
135
+ limit_val_batches: 8
136
+ micro_batch_size: 64
137
+ sequence_length: 1024
138
+ train_steps: 70000
139
+ val_check_interval: -1
70000/lr_scheduler/lr_scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f44f89c4642a0011361e62b7717a7243492b6f41d8aa83936b9c4e75cdab7cf4
3
+ size 1012
70000/model/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-8.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b924abc2c694bab96dd718980d2fc41374bb6a98c2a2a7ef033ce2084a80d34a
3
+ size 4194536
70000/model/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-1-of-8.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:befcaf368c5f7006f3f85a23ebe7ac2b8383f0e2dcb54f514485e450a803011a
3
+ size 4194544
70000/model/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-2-of-8.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:634a0e7e3a8f6e98e7bfbd8bdb9e09663e6155177fe47a5b8dae9741aee93b56
3
+ size 4194544
70000/model/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-3-of-8.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c42cbc6fda656a9efb9254ff1fb4b6aac1b1f53d9ffd08632df492a83e0578c7
3
+ size 4194544
70000/model/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-4-of-8.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:571e457e7da01bab5afb26c12f45d31383b2b6b45d7930defd16c444b9d20705
3
+ size 4194544
70000/model/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-5-of-8.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9aec92ba7b1146b18237c31eba3ccd2704ce4c9b877e9e98310cf6b7c8c2daa6
3
+ size 4194544
70000/model/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-6-of-8.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:84491e45c7d80cd47f542836ee82aec17a0f332d0a0bde0a3785aa20b2fe2ca3
3
+ size 4194544
70000/model/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-7-of-8.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d86ef426782938777a098a005c96184ad66e88b5559b32946096db74daeeec93
3
+ size 4194544
70000/model/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-8.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b5080436a11827555f2464b505bd293a4b5f6edefd3512b10581a67fa4fb7735
3
+ size 8388960
70000/model/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-1-of-8.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d9df3affd4284d543fe4ef9242de56e222a3bd61417b67b1c3437c2ddb471b0d
3
+ size 8388960
70000/model/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-2-of-8.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f4e1a2f0195db7bb0875718ad3aefc0e7fb36cc81d42e65c309aefee6d1b0d6b
3
+ size 8388960
70000/model/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-3-of-8.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff413c42cbaa4def157792ac531370732e092e372cbccbb98f99138b48275c01
3
+ size 8388960
70000/model/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-4-of-8.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f81d26513bf6ddd9a53ba9c7c925dfb53d47f36596426ff7402b1160a90a6e86
3
+ size 8388960
70000/model/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-5-of-8.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f0840250899782f7a0f5f246773ae560dc3bb0004d779dd30cdee0e133634da1
3
+ size 8388960
70000/model/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-6-of-8.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d6e73a67f24e9dd5c03ffc7549fbdbc809a70a8f60d4636ca691a7fa1901efb
3
+ size 8388960
70000/model/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-7-of-8.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:860fd9e1811888cf0dc8d38c1296edef0ae33fffe775a7f964dc7dd4f529db37
3
+ size 8388960
70000/model/model/decoder/0/pp_block/input_layernorm/model_weight.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:77aa2d98fa003f17cacd3e302f52690baf3dc7d1d51a1bcadc5c4b69b9765d8e
3
+ size 8288
70000/model/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-8.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:388b1e2644d45b43de282701f8fd15b4bdc9657ccf6680d6021e511c66b57a5e
3
+ size 25166064
70000/model/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-1-of-8.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:539857dae12a301c64f1cdcee5b9e755a3adf02b5f2e124e9dc3b2f9e248048e
3
+ size 25166064
70000/model/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-2-of-8.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:536398da36df23920039973a73d140566043cc53ee2a0d914a8844372a64aff9
3
+ size 25166064
70000/model/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-3-of-8.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d14cc8eddd75e465d397973a37b0782cb4904becec6aaf91a4367678873677c
3
+ size 25166064
70000/model/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-4-of-8.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0ce4f3f6650309f13f3e6d544de1e20e355a6f08806eef191cfd501c8ff2acbc
3
+ size 25166064
70000/model/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-5-of-8.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a7fd52544d2ce19fd2ae3abd5446bf450e9dfee065f42e0f181afb2e5413a86
3
+ size 25166064
70000/model/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-6-of-8.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b7bfd2dcd8cf9b2c47812d070d52dfe7de4e44087e51ca1e1d579f2f5f334260
3
+ size 25166064
70000/model/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-7-of-8.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:64b15490e5736a45fc3703acb705c86fb53bf08913287d1dc7d64b223537e01a
3
+ size 25166064
70000/model/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-8.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8dcbe0259c3f13e64c37658cc5bed072e1b14d1c1fd7bd920a7737f5df08d69a
3
+ size 50331944
70000/model/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-1-of-8.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:12c42c59c00b0a680a2da7797fe742a81dc60b471dc8e98dc735110c7e5a2ad2
3
+ size 50331952
70000/model/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-2-of-8.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b0b40fd72f645c955728741df102ce92daa359c789b8bc35a7c009c49ce80396
3
+ size 50331952
70000/model/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-3-of-8.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:347f3971a29037808915a992d49172b4f8aafe5142ed8517ef74cacab96238ca
3
+ size 50331952
70000/model/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-4-of-8.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d72a03fe0dfbe9c614e25c2a339979ad70cdb71ab3d8292c146bccabf9e0afb6
3
+ size 50331952
70000/model/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-5-of-8.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ec84771ce31f81f1ec995a024d49b497bba9e2024aeac5faadc8ffedf4458c7
3
+ size 50331952
70000/model/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-6-of-8.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:23f4fc83195f52847195a215dae1e010ca868000095092e64f69825ad050ef79
3
+ size 50331952
70000/model/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-7-of-8.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c53af868894ef04ad4253fbf68c121f01599134ce660e637b8a5e3f883334413
3
+ size 50331952
70000/model/model/decoder/0/pp_block/post_attention_layernorm/model_weight.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5cfef55c64df70d52293e020ab6059676279dee0eb6ec8b70021c2004340b1e8
3
+ size 8288
70000/model/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-8.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6844d86e43c3eeeb2f837a178d03f1d4ee2000fca9030a908828fbd259198f8c
3
+ size 4194536
70000/model/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-1-of-8.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf465add9144127313ccf1daed2830c00e80971ef8801b35f1a40c5432c800bd
3
+ size 4194544
70000/model/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-2-of-8.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:241fae3f5d988a9ed1547709a7bfbdb80adb33f62fc19465ebdb646996cc689e
3
+ size 4194544
70000/model/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-3-of-8.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dc7f9f5371026139f947dfbfb09a8f81bf04d0cf6851aa863edbd6846f240c29
3
+ size 4194544
70000/model/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-4-of-8.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:04d9cddd3c55dec8bf5d342e89a9704aa996387f3b7cf015152a7379498b3879
3
+ size 4194544
70000/model/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-5-of-8.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:077701a3bda055d412b631689fd87725d2a46a5d3b2cbff7ef86d01c81374018
3
+ size 4194544
70000/model/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-6-of-8.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c7806c296fe37fe4b2e5f83a948a6a7dace14e112b3ed5493804f9cfc2438010
3
+ size 4194544
70000/model/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-7-of-8.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dda24af303369e36c8aadf2f141f24e8286f63212cc4be01b58eeea199137347
3
+ size 4194544
70000/model/model/decoder/1/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-8.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a2ed20f3337c7ba5f80b3edc443d296071f232178daf676a330b0c420332f2f0
3
+ size 8388960
70000/model/model/decoder/1/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-1-of-8.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d0b8192b996f3fb5b193e441fabc579468aac681a98385d81b84bb1d0c48c57b
3
+ size 8388960
70000/model/model/decoder/1/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-2-of-8.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:88a8b9b3e76e271956e023d8d16968883902b927e1ea476ea0fe25a539b55ec5
3
+ size 8388960
70000/model/model/decoder/1/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-3-of-8.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:806c1d4682253e4d0d7da1cbfe787dc50b60732f14f26aaefba8a5eddf088adf
3
+ size 8388960
70000/model/model/decoder/1/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-4-of-8.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:60b6c0c58e3321fe8f82ed19d2a1529e652949a61570ef3c931af659920291c8
3
+ size 8388960