neuralink commited on
Commit
4a65de7
1 Parent(s): bcf6fbf

add the 70k checkpoint

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. 70000/checkpoint_metadata.json +9 -0
  2. 70000/config.yaml +117 -0
  3. 70000/lr_scheduler/lr_scheduler.pt +3 -0
  4. 70000/model/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-8.safetensors +3 -0
  5. 70000/model/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-1-of-8.safetensors +3 -0
  6. 70000/model/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-2-of-8.safetensors +3 -0
  7. 70000/model/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-3-of-8.safetensors +3 -0
  8. 70000/model/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-4-of-8.safetensors +3 -0
  9. 70000/model/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-5-of-8.safetensors +3 -0
  10. 70000/model/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-6-of-8.safetensors +3 -0
  11. 70000/model/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-7-of-8.safetensors +3 -0
  12. 70000/model/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-8.safetensors +3 -0
  13. 70000/model/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-1-of-8.safetensors +3 -0
  14. 70000/model/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-2-of-8.safetensors +3 -0
  15. 70000/model/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-3-of-8.safetensors +3 -0
  16. 70000/model/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-4-of-8.safetensors +3 -0
  17. 70000/model/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-5-of-8.safetensors +3 -0
  18. 70000/model/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-6-of-8.safetensors +3 -0
  19. 70000/model/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-7-of-8.safetensors +3 -0
  20. 70000/model/model/decoder/0/pp_block/input_layernorm/model_weight.safetensors +3 -0
  21. 70000/model/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-8.safetensors +3 -0
  22. 70000/model/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-1-of-8.safetensors +3 -0
  23. 70000/model/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-2-of-8.safetensors +3 -0
  24. 70000/model/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-3-of-8.safetensors +3 -0
  25. 70000/model/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-4-of-8.safetensors +3 -0
  26. 70000/model/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-5-of-8.safetensors +3 -0
  27. 70000/model/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-6-of-8.safetensors +3 -0
  28. 70000/model/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-7-of-8.safetensors +3 -0
  29. 70000/model/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-8.safetensors +3 -0
  30. 70000/model/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-1-of-8.safetensors +3 -0
  31. 70000/model/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-2-of-8.safetensors +3 -0
  32. 70000/model/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-3-of-8.safetensors +3 -0
  33. 70000/model/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-4-of-8.safetensors +3 -0
  34. 70000/model/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-5-of-8.safetensors +3 -0
  35. 70000/model/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-6-of-8.safetensors +3 -0
  36. 70000/model/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-7-of-8.safetensors +3 -0
  37. 70000/model/model/decoder/0/pp_block/post_attention_layernorm/model_weight.safetensors +3 -0
  38. 70000/model/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-8.safetensors +3 -0
  39. 70000/model/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-1-of-8.safetensors +3 -0
  40. 70000/model/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-2-of-8.safetensors +3 -0
  41. 70000/model/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-3-of-8.safetensors +3 -0
  42. 70000/model/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-4-of-8.safetensors +3 -0
  43. 70000/model/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-5-of-8.safetensors +3 -0
  44. 70000/model/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-6-of-8.safetensors +3 -0
  45. 70000/model/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-7-of-8.safetensors +3 -0
  46. 70000/model/model/decoder/1/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-8.safetensors +3 -0
  47. 70000/model/model/decoder/1/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-1-of-8.safetensors +3 -0
  48. 70000/model/model/decoder/1/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-2-of-8.safetensors +3 -0
  49. 70000/model/model/decoder/1/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-3-of-8.safetensors +3 -0
  50. 70000/model/model/decoder/1/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-4-of-8.safetensors +3 -0
70000/checkpoint_metadata.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dp": 8,
3
+ "metas": {
4
+ "consumed_train_samples": 35840000,
5
+ "last_train_step": 70000
6
+ },
7
+ "tp": 8,
8
+ "version": "1.2"
9
+ }
70000/config.yaml ADDED
@@ -0,0 +1,117 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ checkpoints:
2
+ checkpoint_interval: 1000
3
+ checkpoints_path: /fsx/phuc/checkpoints/doremi/big-run-02/reference-2.8b-llama
4
+ checkpoints_path_is_shared_file_system: true
5
+ resume_checkpoint_path: null
6
+ save_initial_state: false
7
+ data:
8
+ dataset:
9
+ dataset_overwrite_cache: false
10
+ dataset_processing_num_proc_per_process: 1
11
+ hf_dataset_config_name: null
12
+ hf_dataset_or_datasets: /fsx/phuc/project_data/doremi/datasets/the_pile_raw/tokenized_data/train
13
+ hf_dataset_splits: train
14
+ text_column_name: text
15
+ num_loading_workers: 1
16
+ seed: 42
17
+ doremi:
18
+ domain_names:
19
+ - Pile-CC
20
+ - Github
21
+ - OpenWebText2
22
+ - StackExchange
23
+ - Wikipedia (en)
24
+ - PubMed Abstracts
25
+ - USPTO Backgrounds
26
+ - FreeLaw
27
+ - PubMed Central
28
+ - Enron Emails
29
+ - HackerNews
30
+ - NIH ExPorter
31
+ - Books3
32
+ - ArXiv
33
+ - DM Mathematics
34
+ - OpenSubtitles
35
+ - Gutenberg (PG-19)
36
+ - Ubuntu IRC
37
+ - BookCorpus2
38
+ - EuroParl
39
+ - YoutubeSubtitles
40
+ - PhilPapers
41
+ domain_weights: null
42
+ ref_model_checkpoint_path: null
43
+ ref_model_resume_checkpoint_path: null
44
+ general:
45
+ benchmark_csv_path: null
46
+ consumed_train_samples: 35840000
47
+ ignore_sanity_checks: true
48
+ project: nanotron
49
+ run: train_2.8b_llama_reference
50
+ seed: 42
51
+ step: 70000
52
+ logging:
53
+ iteration_step_info_interval: 1
54
+ log_level: info
55
+ log_level_replica: info
56
+ model:
57
+ ddp_bucket_cap_mb: 120
58
+ dtype: bfloat16
59
+ init_method:
60
+ std: 0.025
61
+ make_vocab_size_divisible_by: 1
62
+ model_config:
63
+ bos_token_id: 1
64
+ eos_token_id: 2
65
+ hidden_act: silu
66
+ hidden_size: 4096
67
+ initializer_range: 0.02
68
+ intermediate_size: 24576
69
+ is_llama_config: true
70
+ max_position_embeddings: 1024
71
+ num_attention_heads: 32
72
+ num_hidden_layers: 6
73
+ num_key_value_heads: 16
74
+ pad_token_id: null
75
+ pretraining_tp: 1
76
+ rms_norm_eps: 1.0e-05
77
+ rope_scaling: null
78
+ tie_word_embeddings: true
79
+ use_cache: true
80
+ vocab_size: 49152
81
+ optimizer:
82
+ accumulate_grad_in_fp32: true
83
+ adam_beta1: 0.9
84
+ adam_beta2: 0.95
85
+ adam_eps: 1.0e-08
86
+ clip_grad: 1.0
87
+ learning_rate_scheduler:
88
+ learning_rate: 0.0003
89
+ lr_decay_steps: 8
90
+ lr_decay_style: cosine
91
+ lr_warmup_steps: 2
92
+ lr_warmup_style: linear
93
+ min_decay_lr: 1.0e-05
94
+ torch_adam_is_fused: true
95
+ weight_decay: 0.01
96
+ zero_stage: 0
97
+ parallelism:
98
+ dp: 8
99
+ pp: 1
100
+ pp_engine: 1f1b
101
+ recompute_granularity: SELECTIVE
102
+ tp: 8
103
+ tp_linear_async_communication: true
104
+ tp_mode: REDUCE_SCATTER
105
+ profiler: null
106
+ tokenizer:
107
+ tokenizer_max_length: null
108
+ tokenizer_name_or_path: gpt2
109
+ tokenizer_revision: null
110
+ tokens:
111
+ batch_accumulation_per_replica: 1
112
+ limit_test_batches: 0
113
+ limit_val_batches: 1
114
+ micro_batch_size: 64
115
+ sequence_length: 1024
116
+ train_steps: 70000
117
+ val_check_interval: 2
70000/lr_scheduler/lr_scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f44f89c4642a0011361e62b7717a7243492b6f41d8aa83936b9c4e75cdab7cf4
3
+ size 1012
70000/model/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-8.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0abf19c86ca2f7cfaf21e332657ce4aa2bc48b39b2da96b6f4fbc378fb505688
3
+ size 4194536
70000/model/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-1-of-8.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e5fda12a56b61ffc1d0ecf3ff69da872e1dbe40c2b7754f33231762956f31b64
3
+ size 4194544
70000/model/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-2-of-8.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d17c372d1d44a630949516686a3a2f534740d07e5df4ec0be616650702b601c
3
+ size 4194544
70000/model/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-3-of-8.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7dfce8f93074595eb78584c368c83b2ade33d6e51cba6ab49c2fb6c19dd7690a
3
+ size 4194544
70000/model/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-4-of-8.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c2d9aebab109f77c446ff1e499dbd0de77d3021ff1b08a0839abf4a7a42afd0a
3
+ size 4194544
70000/model/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-5-of-8.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:40dcad668f886d37fff57396b032e68893dad2dcba8c06b70cc215018bc88457
3
+ size 4194544
70000/model/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-6-of-8.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b7216b15c25c20ef7982567eb827a5ab55ad2e182e103c2165f3ba094160f647
3
+ size 4194544
70000/model/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-7-of-8.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1db77215815ef2e8f48b09a9d2c70878961cc6abceb2374b8e45dd128baa94bf
3
+ size 4194544
70000/model/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-8.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b90f90ae9b7a6b5e5b405cf05f8fa38129123da008cbe2a946daaa337123562c
3
+ size 8388960
70000/model/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-1-of-8.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:59798fb4200a88427d3d180c86c8fc9c86d480e7cfff68ba8ce2ab81c925e3b7
3
+ size 8388960
70000/model/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-2-of-8.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:356a6d4f5f0c79fc5fd25388898d7003d041fb2f4ea68ae258961ae19295d556
3
+ size 8388960
70000/model/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-3-of-8.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be4980dcf2b1e901bb89bb493916c5b6feb4fd893628933ba9137939ad90a36d
3
+ size 8388960
70000/model/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-4-of-8.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:21a53e1670bc13f3cfbba60b81b8e743f5ffe9c33733d3e01675f86a8e0d5966
3
+ size 8388960
70000/model/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-5-of-8.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a3d53157ad38bd236ba0a5df55c81500d581c62f529a724a1e5a5a9b2ab1d07
3
+ size 8388960
70000/model/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-6-of-8.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:438373dc72a984ba6c2692b571a8f20a0ed066dde0dc35a7f4615fa987917a63
3
+ size 8388960
70000/model/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-7-of-8.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3acadfaf19b33d24ca07555ac6b81825bc9ae087dacbe426ba43f2c6ebcdb200
3
+ size 8388960
70000/model/model/decoder/0/pp_block/input_layernorm/model_weight.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:774ad370a6b80a9cf09dd458cbe7b76161038158a897388f61c799d49eb181f1
3
+ size 8288
70000/model/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-8.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f3dcc62fa20a5df18c54a6b0a3ecd79b7b70bb8131e96443fd6125ead3173b00
3
+ size 25166064
70000/model/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-1-of-8.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:01b833cecf89636acebc108f7aafc5be7778d330cc9fc5cedd070f2f6a41181c
3
+ size 25166064
70000/model/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-2-of-8.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:01b7785b5ebc0c79acf9a2f6e3eddba90d3f853f7ddb713aba37e38c9b0cdbe0
3
+ size 25166064
70000/model/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-3-of-8.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e9dda633c265872a9b669c282b5fcc07d0cbb36791878174e27ccd0186535800
3
+ size 25166064
70000/model/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-4-of-8.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb08aeecd28f8354315bfeac041db8ba48cf8c62b05736037574307c5521f261
3
+ size 25166064
70000/model/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-5-of-8.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b96a0d464d05de9e6b1fd9bbc815f7916dce982ca134bd63b0c8e77473fa5278
3
+ size 25166064
70000/model/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-6-of-8.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:612e384536e2baaf39b56dbe05c0033f1cff456ba86a3e97f9a1bcb999ed5f66
3
+ size 25166064
70000/model/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-7-of-8.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b2410ebb49b0c40efed0d6bdfdc431e18b80e220c8b462e850d884b395615372
3
+ size 25166064
70000/model/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-8.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce9baec6cb9ef9ca5030ca644d43e432a8bdbf33dfa083eec0f3df4896d2eadf
3
+ size 50331944
70000/model/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-1-of-8.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d532f1a432a703eabe9240a32842fb20382d68dfd25e42a5de2a3f265b1e467a
3
+ size 50331952
70000/model/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-2-of-8.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:882868708d235ced2ef99dd6287d7550ad15e017602277a9b34f8f671cf33aa8
3
+ size 50331952
70000/model/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-3-of-8.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:430ac7107dd22e67603f019646bcf0e9bac7718ccf23a6322fe36de5da7861c6
3
+ size 50331952
70000/model/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-4-of-8.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2486a1e8fba32ac068f82fd5533070fd75fa86ef16c01b5ecc6f80f074fe0d23
3
+ size 50331952
70000/model/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-5-of-8.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e5529e52c6546f35f6ff7c06e8dc13dfc516daa6d2042f95410ab145d137da06
3
+ size 50331952
70000/model/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-6-of-8.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:625a133fc1cb2a40398e66ac12b1eff18858a6bc31c915e5b815055ee7ab9a1f
3
+ size 50331952
70000/model/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-7-of-8.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:edcd885402490c7d3e648a5f8c258ae56bc0fce4dc6ba2ebcf1bc9307f490a5f
3
+ size 50331952
70000/model/model/decoder/0/pp_block/post_attention_layernorm/model_weight.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:84d18f18bc4ef6f53c78eb2fb30302e7ebf04201488a9371f42d6cf457e148e4
3
+ size 8288
70000/model/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-8.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3703978be2a58536e1003809b8939732e238ed14efbe2df98d9dab3b215def3a
3
+ size 4194536
70000/model/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-1-of-8.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:98e2e6893d73e7598f9d359d7be94ac7ba80abca894e96097281fc6652c5b167
3
+ size 4194544
70000/model/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-2-of-8.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4d4ef5bc53e2c1360e82d506024449f29002e25d223810a5d5093a794d8b3295
3
+ size 4194544
70000/model/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-3-of-8.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ee42f765ae98cf907ed8408f24aed93f89135d43f2378fbf29a807f7c89bb24a
3
+ size 4194544
70000/model/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-4-of-8.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b2377adcc2197810ca5ca6745cf28a917f9a1305cd3dd111b41643ca1d96342
3
+ size 4194544
70000/model/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-5-of-8.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c84163236fb84904dcdcfd571923c051b8503cac08f860db756d37039590d95
3
+ size 4194544
70000/model/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-6-of-8.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0181f25bf6752b326eb895c32bf875f9ec40719f282bac1a5b23258b458388a0
3
+ size 4194544
70000/model/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-7-of-8.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a0393ee44d4a634243e155c08817020e02ef6ab8a776d6814a5373ff91a9cf4f
3
+ size 4194544
70000/model/model/decoder/1/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-8.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:70c09c40d56b84709b1af4fa8e6ae3f7c33983410a92182bf7a6b1f598fccff3
3
+ size 8388960
70000/model/model/decoder/1/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-1-of-8.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bb2f1055fc26ba14e6432f25d0a50cef46625ee2e9363d779046e66eac6c142e
3
+ size 8388960
70000/model/model/decoder/1/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-2-of-8.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2959d35e64183a1da7890043fbf62e5e24fe9111380921d807d4c165963294d8
3
+ size 8388960
70000/model/model/decoder/1/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-3-of-8.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5028f5993b200c829b51a8ea5dca396fdb2847d1a9b25dcb6e72f6e961849989
3
+ size 8388960
70000/model/model/decoder/1/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-4-of-8.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d76a9f4a64d9d6e6377d0aba28db56286cc8a294d7ddb84134b9b8cf45fcf6d8
3
+ size 8388960