neuralink commited on
Commit
fc22efd
1 Parent(s): 388f34d

upload the 100k checkpoinmt

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. 100000/checkpoint_metadata.json +9 -0
  2. 100000/config.yaml +117 -0
  3. 100000/lr_scheduler/lr_scheduler.pt +3 -0
  4. 100000/model/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-2.safetensors +3 -0
  5. 100000/model/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-1-of-2.safetensors +3 -0
  6. 100000/model/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-2.safetensors +3 -0
  7. 100000/model/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-1-of-2.safetensors +3 -0
  8. 100000/model/model/decoder/0/pp_block/input_layernorm/model_weight.safetensors +3 -0
  9. 100000/model/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-2.safetensors +3 -0
  10. 100000/model/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-1-of-2.safetensors +3 -0
  11. 100000/model/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-2.safetensors +3 -0
  12. 100000/model/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-1-of-2.safetensors +3 -0
  13. 100000/model/model/decoder/0/pp_block/post_attention_layernorm/model_weight.safetensors +3 -0
  14. 100000/model/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-2.safetensors +3 -0
  15. 100000/model/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-1-of-2.safetensors +3 -0
  16. 100000/model/model/decoder/1/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-2.safetensors +3 -0
  17. 100000/model/model/decoder/1/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-1-of-2.safetensors +3 -0
  18. 100000/model/model/decoder/1/pp_block/input_layernorm/model_weight.safetensors +3 -0
  19. 100000/model/model/decoder/1/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-2.safetensors +3 -0
  20. 100000/model/model/decoder/1/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-1-of-2.safetensors +3 -0
  21. 100000/model/model/decoder/1/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-2.safetensors +3 -0
  22. 100000/model/model/decoder/1/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-1-of-2.safetensors +3 -0
  23. 100000/model/model/decoder/1/pp_block/post_attention_layernorm/model_weight.safetensors +3 -0
  24. 100000/model/model/decoder/2/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-2.safetensors +3 -0
  25. 100000/model/model/decoder/2/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-1-of-2.safetensors +3 -0
  26. 100000/model/model/decoder/2/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-2.safetensors +3 -0
  27. 100000/model/model/decoder/2/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-1-of-2.safetensors +3 -0
  28. 100000/model/model/decoder/2/pp_block/input_layernorm/model_weight.safetensors +3 -0
  29. 100000/model/model/decoder/2/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-2.safetensors +3 -0
  30. 100000/model/model/decoder/2/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-1-of-2.safetensors +3 -0
  31. 100000/model/model/decoder/2/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-2.safetensors +3 -0
  32. 100000/model/model/decoder/2/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-1-of-2.safetensors +3 -0
  33. 100000/model/model/decoder/2/pp_block/post_attention_layernorm/model_weight.safetensors +3 -0
  34. 100000/model/model/decoder/3/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-2.safetensors +3 -0
  35. 100000/model/model/decoder/3/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-1-of-2.safetensors +3 -0
  36. 100000/model/model/decoder/3/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-2.safetensors +3 -0
  37. 100000/model/model/decoder/3/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-1-of-2.safetensors +3 -0
  38. 100000/model/model/decoder/3/pp_block/input_layernorm/model_weight.safetensors +3 -0
  39. 100000/model/model/decoder/3/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-2.safetensors +3 -0
  40. 100000/model/model/decoder/3/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-1-of-2.safetensors +3 -0
  41. 100000/model/model/decoder/3/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-2.safetensors +3 -0
  42. 100000/model/model/decoder/3/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-1-of-2.safetensors +3 -0
  43. 100000/model/model/decoder/3/pp_block/post_attention_layernorm/model_weight.safetensors +3 -0
  44. 100000/model/model/decoder/4/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-2.safetensors +3 -0
  45. 100000/model/model/decoder/4/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-1-of-2.safetensors +3 -0
  46. 100000/model/model/decoder/4/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-2.safetensors +3 -0
  47. 100000/model/model/decoder/4/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-1-of-2.safetensors +3 -0
  48. 100000/model/model/decoder/4/pp_block/input_layernorm/model_weight.safetensors +3 -0
  49. 100000/model/model/decoder/4/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-2.safetensors +3 -0
  50. 100000/model/model/decoder/4/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-1-of-2.safetensors +3 -0
100000/checkpoint_metadata.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dp": 16,
3
+ "metas": {
4
+ "consumed_train_samples": 51200000,
5
+ "last_train_step": 100000
6
+ },
7
+ "tp": 2,
8
+ "version": "1.2"
9
+ }
100000/config.yaml ADDED
@@ -0,0 +1,117 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ checkpoints:
2
+ checkpoint_interval: 1000
3
+ checkpoints_path: /fsx/phuc/checkpoints/doremi/big-run-02/refrence-280m-llama
4
+ checkpoints_path_is_shared_file_system: true
5
+ resume_checkpoint_path: null
6
+ save_initial_state: false
7
+ data:
8
+ dataset:
9
+ dataset_overwrite_cache: false
10
+ dataset_processing_num_proc_per_process: 1
11
+ hf_dataset_config_name: null
12
+ hf_dataset_or_datasets: /fsx/phuc/project_data/doremi/datasets/the_pile_raw/tokenized_data/train
13
+ hf_dataset_splits: train
14
+ text_column_name: text
15
+ num_loading_workers: 1
16
+ seed: 42
17
+ doremi:
18
+ domain_names:
19
+ - Pile-CC
20
+ - Github
21
+ - OpenWebText2
22
+ - StackExchange
23
+ - Wikipedia (en)
24
+ - PubMed Abstracts
25
+ - USPTO Backgrounds
26
+ - FreeLaw
27
+ - PubMed Central
28
+ - Enron Emails
29
+ - HackerNews
30
+ - NIH ExPorter
31
+ - Books3
32
+ - ArXiv
33
+ - DM Mathematics
34
+ - OpenSubtitles
35
+ - Gutenberg (PG-19)
36
+ - Ubuntu IRC
37
+ - BookCorpus2
38
+ - EuroParl
39
+ - YoutubeSubtitles
40
+ - PhilPapers
41
+ domain_weights: null
42
+ ref_model_checkpoint_path: null
43
+ ref_model_resume_checkpoint_path: null
44
+ general:
45
+ benchmark_csv_path: null
46
+ consumed_train_samples: 51200000
47
+ ignore_sanity_checks: true
48
+ project: doremi
49
+ run: train_280m_reference_model
50
+ seed: 42
51
+ step: 100000
52
+ logging:
53
+ iteration_step_info_interval: 1
54
+ log_level: info
55
+ log_level_replica: info
56
+ model:
57
+ ddp_bucket_cap_mb: 120
58
+ dtype: bfloat16
59
+ init_method:
60
+ std: 0.025
61
+ make_vocab_size_divisible_by: 1
62
+ model_config:
63
+ bos_token_id: 1
64
+ eos_token_id: 2
65
+ hidden_act: silu
66
+ hidden_size: 1024
67
+ initializer_range: 0.02
68
+ intermediate_size: 4096
69
+ is_llama_config: true
70
+ max_position_embeddings: 1024
71
+ num_attention_heads: 8
72
+ num_hidden_layers: 10
73
+ num_key_value_heads: 4
74
+ pad_token_id: null
75
+ pretraining_tp: 1
76
+ rms_norm_eps: 1.0e-05
77
+ rope_scaling: null
78
+ tie_word_embeddings: true
79
+ use_cache: true
80
+ vocab_size: 49152
81
+ optimizer:
82
+ accumulate_grad_in_fp32: true
83
+ adam_beta1: 0.9
84
+ adam_beta2: 0.95
85
+ adam_eps: 1.0e-08
86
+ clip_grad: 1.0
87
+ learning_rate_scheduler:
88
+ learning_rate: 0.0003
89
+ lr_decay_steps: 8
90
+ lr_decay_style: cosine
91
+ lr_warmup_steps: 2
92
+ lr_warmup_style: linear
93
+ min_decay_lr: 1.0e-05
94
+ torch_adam_is_fused: true
95
+ weight_decay: 0.01
96
+ zero_stage: 0
97
+ parallelism:
98
+ dp: 16
99
+ pp: 1
100
+ pp_engine: 1f1b
101
+ recompute_granularity: SELECTIVE
102
+ tp: 2
103
+ tp_linear_async_communication: true
104
+ tp_mode: REDUCE_SCATTER
105
+ profiler: null
106
+ tokenizer:
107
+ tokenizer_max_length: null
108
+ tokenizer_name_or_path: gpt2
109
+ tokenizer_revision: null
110
+ tokens:
111
+ batch_accumulation_per_replica: 1
112
+ limit_test_batches: 0
113
+ limit_val_batches: 0
114
+ micro_batch_size: 32
115
+ sequence_length: 1024
116
+ train_steps: 100000
117
+ val_check_interval: -1
100000/lr_scheduler/lr_scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:85fa1779af5a5a8a5651de2b0227a581618cc5844ec96b8b8cd497d32769b6a7
3
+ size 1012
100000/model/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-2.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e56cfbc3153f1e497e3e8d949de9632b58c4a28aab9c2efa709058c0519397fe
3
+ size 1048808
100000/model/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-1-of-2.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1ef6bce8677b268be537cc138e5b63bc7cdeb96a6331161cb35e9a3384d5748d
3
+ size 1048816
100000/model/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-2.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2ad9892b59f2924c19a932ee1637085f530fdb6d80b8014bfc92153968256a09
3
+ size 2097504
100000/model/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-1-of-2.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:96397b06c214fad889d2e86f727e2cfe96ac414793c29754bcb4e4165ca2c47f
3
+ size 2097504
100000/model/model/decoder/0/pp_block/input_layernorm/model_weight.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:36221b994a324a2440a5cf5dd2f3a61b1607277ce9a2e8ff00d38e2f5a0cdc8b
3
+ size 2144
100000/model/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-2.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d893387a5d627d50e4a14ecf5e3481b19b807748e99e0c7780392cb3f996f308
3
+ size 4194544
100000/model/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-1-of-2.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f2932241feb0bd850fe5199e3e30e1aa4f57abea9bd2ecb7b6cfb4f73b4a9482
3
+ size 4194544
100000/model/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-2.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3801839e600f45a96f6facce0e6719419bdd899d5caaf7acef5044b22f14a677
3
+ size 8388904
100000/model/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-1-of-2.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f2477715f80bb9a92d5c872786b7ac7f21af270bc3e4f51e438a514e530ba33a
3
+ size 8388904
100000/model/model/decoder/0/pp_block/post_attention_layernorm/model_weight.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8fa0b601f22a1c25dd2a7fbdac15262cdc54e74c4206420150077da0061dd89b
3
+ size 2144
100000/model/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-2.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f48a01feab25dc47db8c862450fa54bbf31d272383dd798a96afbacd863790ff
3
+ size 1048808
100000/model/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-1-of-2.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:af9b9160557c15012ba27bd63c8679131487a85927140174e05087761636e922
3
+ size 1048816
100000/model/model/decoder/1/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-2.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:11c134bf805be3dac2b09526809f3b23346a85b1d291d9683a5b4934c942bd45
3
+ size 2097504
100000/model/model/decoder/1/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-1-of-2.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c50489b097403dc84dba1cfdacca387df048d1530d3721c8a0b1055f68bd264a
3
+ size 2097504
100000/model/model/decoder/1/pp_block/input_layernorm/model_weight.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1bbeec91d938818d3a9d6241f45edf69a289d9a2f0ba1a3c63ba9da967c3fdd1
3
+ size 2144
100000/model/model/decoder/1/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-2.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4d0cbbb269dbf76daea52818edf8879037bf32c45db3ed0bee34454d3f6a9aac
3
+ size 4194544
100000/model/model/decoder/1/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-1-of-2.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:67ec85ca44cc044f5e708ca09126de92efb5a7118df164ca7285152da9776f4b
3
+ size 4194544
100000/model/model/decoder/1/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-2.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:81f4d335a350af262155e4a68546f6e4bb19b6bbc0da36203421c5f881db4248
3
+ size 8388904
100000/model/model/decoder/1/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-1-of-2.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2af6b21f34b68dc30412ab76022072e082f487db0b5590c1c79ee1e9d5d1108a
3
+ size 8388904
100000/model/model/decoder/1/pp_block/post_attention_layernorm/model_weight.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d95c96f4d87b6d598dc6a539d7deddb016887f29f42f7f68f21846a797773359
3
+ size 2144
100000/model/model/decoder/2/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-2.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b2c500e3998ab99b48ebb6e71eedd7ebf29ea1bc4eda60e982991b4aa004395
3
+ size 1048808
100000/model/model/decoder/2/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-1-of-2.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9099dd1c06edd9ee936a67545bedb13a7bba16abc87b4e00ea7a11d8544ff590
3
+ size 1048816
100000/model/model/decoder/2/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-2.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e84acfc83ec3bb07bbf21f671bda9fe200e2eff6295df1ffd9481f90a8e2802
3
+ size 2097504
100000/model/model/decoder/2/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-1-of-2.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5e51754b3a4a3968cb92f8335b2cf5e151a3727618654dfc04a4c96843c522bf
3
+ size 2097504
100000/model/model/decoder/2/pp_block/input_layernorm/model_weight.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:69c036ef6a8596262091570f48cd439c75b602d16d3af57594ccbf2f16b993e1
3
+ size 2144
100000/model/model/decoder/2/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-2.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:66454f115d8208d76172bd631fe044233db4ea92f6c77e5db5261e83dbf8d77c
3
+ size 4194544
100000/model/model/decoder/2/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-1-of-2.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d4fdfcfc03a59ffbe00655a114bee4db16a4ac9165fc70fb97f5394bbeefa930
3
+ size 4194544
100000/model/model/decoder/2/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-2.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:298042c0b9267e4314d35c1bced11b2cff7082d0aba2fa761eb7da0cee772590
3
+ size 8388904
100000/model/model/decoder/2/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-1-of-2.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2ac0fff50f55e9a689d2bed2a45bf2329506cc80572078ad0a0913b73587be8f
3
+ size 8388904
100000/model/model/decoder/2/pp_block/post_attention_layernorm/model_weight.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4f034dca4d8d213d8d001b87230ba5a8ecf329de881b63712a35e27f618909a0
3
+ size 2144
100000/model/model/decoder/3/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-2.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:739055056da66f30fbfa0e17b2f40a5022b16ce79b978e7fefed8995be5b54fa
3
+ size 1048808
100000/model/model/decoder/3/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-1-of-2.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a8754fb91b13640ba81423fb3d18e44ecad6c09539c16f59e81717701b09a023
3
+ size 1048816
100000/model/model/decoder/3/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-2.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:98cb957be6d10f6a3fb61d86b8a3e8c65d00158bad31076a930201cfcc8d21db
3
+ size 2097504
100000/model/model/decoder/3/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-1-of-2.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f4ddc32f94d55063bceadbb3eb8dbe88d4003fb3447dd371b64ea581d7f53fe
3
+ size 2097504
100000/model/model/decoder/3/pp_block/input_layernorm/model_weight.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ae43e98223b7a4fab21af138e365176a2f75d2cf0c44ad1887d38df8708c240
3
+ size 2144
100000/model/model/decoder/3/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-2.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e638055de7a95192365e0dd53fe16d5f03e6fa59bb43c1c39c8520fe874d9c9e
3
+ size 4194544
100000/model/model/decoder/3/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-1-of-2.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f20eee6982e42ed281cf3f3f56351f546c406f28a3be490d7968a8c802a1a5a0
3
+ size 4194544
100000/model/model/decoder/3/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-2.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fe5f67d47cb28dc52bcad19701fac889724ed15c8c37310f41e281dfcc720bab
3
+ size 8388904
100000/model/model/decoder/3/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-1-of-2.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eb690fba63dd3b538152c0a30171372da25f9ae34237454a69f2f68cb9262bbe
3
+ size 8388904
100000/model/model/decoder/3/pp_block/post_attention_layernorm/model_weight.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:37a9d5c0190aa437e46c169fb864f8c21e868447537a28590921373ea537b661
3
+ size 2144
100000/model/model/decoder/4/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-2.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:06e1fad6e4215e23a15c454064b0df68637b43e7ec3f74cbdc703ce170ceab3f
3
+ size 1048808
100000/model/model/decoder/4/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-1-of-2.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:102841695f5630b0d0ab035ec42058aa20a67a28a4e44ab83303539e16b23439
3
+ size 1048816
100000/model/model/decoder/4/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-2.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dbba131e86d7f0cfb98c27cd8c1fd73b2183d18d2326d25ebd5756b46e8ab37a
3
+ size 2097504
100000/model/model/decoder/4/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-1-of-2.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:37c6a6b084a70ac82b4b0bb035f5c1c4990ea4916f5dc1958247eb84cd945ef0
3
+ size 2097504
100000/model/model/decoder/4/pp_block/input_layernorm/model_weight.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:77de7ab0f88ff293d434dcc711cebaf136996b06c0b9afd9c3b5a94b8108a72f
3
+ size 2144
100000/model/model/decoder/4/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-2.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:81500b3a2f1311a782db5b449c70007109947edecfd713dccd368d6eb3010655
3
+ size 4194544
100000/model/model/decoder/4/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-1-of-2.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ca7076e5227cd3067c18c5fa7a82e66a231d1a8195a2e1641d4e8d9e19ba9ef
3
+ size 4194544