stellaathena commited on
Commit
360abcb
1 Parent(s): 92c5072

Added model weights

Browse files
Files changed (31) hide show
  1. configs/medium.yml +85 -0
  2. configs/sampling.yml +27 -0
  3. global_step250000/layer_00-model_00-model_states.pt +3 -0
  4. global_step250000/layer_02-model_00-model_states.pt +3 -0
  5. global_step250000/layer_03-model_00-model_states.pt +3 -0
  6. global_step250000/layer_04-model_00-model_states.pt +3 -0
  7. global_step250000/layer_05-model_00-model_states.pt +3 -0
  8. global_step250000/layer_06-model_00-model_states.pt +3 -0
  9. global_step250000/layer_07-model_00-model_states.pt +3 -0
  10. global_step250000/layer_08-model_00-model_states.pt +3 -0
  11. global_step250000/layer_09-model_00-model_states.pt +3 -0
  12. global_step250000/layer_10-model_00-model_states.pt +3 -0
  13. global_step250000/layer_11-model_00-model_states.pt +3 -0
  14. global_step250000/layer_12-model_00-model_states.pt +3 -0
  15. global_step250000/layer_13-model_00-model_states.pt +3 -0
  16. global_step250000/layer_14-model_00-model_states.pt +3 -0
  17. global_step250000/layer_15-model_00-model_states.pt +3 -0
  18. global_step250000/layer_16-model_00-model_states.pt +3 -0
  19. global_step250000/layer_17-model_00-model_states.pt +3 -0
  20. global_step250000/layer_18-model_00-model_states.pt +3 -0
  21. global_step250000/layer_19-model_00-model_states.pt +3 -0
  22. global_step250000/layer_20-model_00-model_states.pt +3 -0
  23. global_step250000/layer_21-model_00-model_states.pt +3 -0
  24. global_step250000/layer_22-model_00-model_states.pt +3 -0
  25. global_step250000/layer_23-model_00-model_states.pt +3 -0
  26. global_step250000/layer_24-model_00-model_states.pt +3 -0
  27. global_step250000/layer_25-model_00-model_states.pt +3 -0
  28. global_step250000/layer_27-model_00-model_states.pt +3 -0
  29. global_step250000/layer_28-model_00-model_states.pt +3 -0
  30. global_step250000/mp_rank_00_model_states.pt +3 -0
  31. latest +1 -0
configs/medium.yml ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # GPT-2 pretraining setup
2
+ {
3
+ # parallelism settings ( you will want to change these based on your cluster setup, ideally scheduling pipeline stages
4
+ # across the node boundaries )
5
+ "pipe-parallel-size": 1,
6
+ "model-parallel-size": 1,
7
+
8
+ # model settings
9
+ "num-layers": 24,
10
+ "hidden-size": 1024,
11
+ "num-attention-heads": 16,
12
+ "seq-length": 2048,
13
+ "max-position-embeddings": 2048,
14
+ "norm": "layernorm",
15
+ "pos-emb": "rotary",
16
+ "no-weight-tying": true,
17
+
18
+ # these should provide some speedup but takes a while to build, set to true if desired
19
+ "scaled-upper-triang-masked-softmax-fusion": false,
20
+ "bias-gelu-fusion": false,
21
+
22
+
23
+
24
+ # optimizer settings
25
+ "optimizer": {
26
+ "type": "Adam",
27
+ "params": {
28
+ "lr": 0.0003,
29
+ "betas": [0.9, 0.999],
30
+ "eps": 1.0e-8,
31
+ }
32
+ },
33
+ "zero_optimization": {
34
+ "stage": 1,
35
+ "allgather_partitions": True,
36
+ "allgather_bucket_size": 500000000,
37
+ "overlap_comm": True,
38
+ "reduce_scatter": True,
39
+ "reduce_bucket_size": 500000000,
40
+ "contiguous_gradients": True,
41
+ "cpu_offload": False
42
+ },
43
+ # batch / data settings
44
+ "train_micro_batch_size_per_gpu": 4,
45
+ "data-impl": "mmap",
46
+ "split": "949,50,1",
47
+
48
+ # activation checkpointing
49
+ "checkpoint-activations": true,
50
+ "checkpoint-num-layers": 1,
51
+ "partition-activations": true,
52
+ "synchronize-each-layer": true,
53
+
54
+ # regularization
55
+ "gradient_clipping": 1.0,
56
+ "weight-decay": 0,
57
+ "hidden-dropout": 0,
58
+ "attention-dropout": 0,
59
+
60
+ # precision settings
61
+ "fp16": {
62
+ "fp16": true,
63
+ "enabled": true,
64
+ "loss_scale": 0,
65
+ "loss_scale_window": 1000,
66
+ "hysteresis": 2,
67
+ "min_loss_scale": 1
68
+ },
69
+
70
+ # misc. training settings
71
+ "train-iters": 320000,
72
+ "lr-decay-iters": 320000,
73
+ "distributed-backend": "nccl",
74
+ "lr-decay-style": "cosine",
75
+ "warmup": 0.01,
76
+ "save-interval": 10000,
77
+ "eval-interval": 1000,
78
+ "eval-iters": 10,
79
+
80
+ # logging
81
+ "log-interval": 100,
82
+ "steps_per_print": 10,
83
+ "keep-last-n-checkpoints": 4,
84
+ "wall_clock_breakdown": true,
85
+ }
configs/sampling.yml ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Data paths and options when using EleutherAI cluster
2
+ {
3
+ "data-path": "/mnt/ssd-1/data/enron/enron_text_document",
4
+ # or for weighted datasets:
5
+ # "train-data-paths": ["/mnt/ssd-1/data/enron/enron_text_document", "/mnt/ssd-cluster/data/enron/enron_text_document"],
6
+ # "test-data-paths": ["/mnt/ssd-1/data/enron/enron_text_document", "/mnt/ssd-cluster/data/enron/enron_text_document"],
7
+ # "valid-data-paths": ["/mnt/ssd-1/data/enron/enron_text_document", "/mnt/ssd-cluster/data/enron/enron_text_document"],
8
+ # "train-data-weights": [1., 2.],
9
+ # "test-data-weights": [2., 1.],
10
+ # "valid-data-weights": [0.5, 0.4],
11
+
12
+ "vocab-file": "/mnt/ssd-1/data/gpt2-vocab.json",
13
+ "merge-file": "/mnt/ssd-1/data/gpt2-merges.txt",
14
+ "save": "/mnt/ssd-1/neox_checkpoints/dense_medium_checkpoints",
15
+ "load": "/mnt/ssd-1/neox_checkpoints/dense_medium_checkpoints",
16
+ "tensorboard-dir": "/mnt/ssd-1/tensorboard",
17
+ "log-dir": "/mnt/ssd-1/logs",
18
+ "wandb_team": "eleutherai",
19
+ "sample-input-file":"/home/mchorse/gpt-neox/samplefile.txt",
20
+ "sample-output-file":"/home/mchorse/gpt-neox/sampleoutput.txt",
21
+ "text-gen-type": "input-file",
22
+ "maximum_tokens": 256,
23
+ "temperature": 1.0,
24
+ "top_p": 0.0,
25
+ "top_k": 0,
26
+ "recompute": false,
27
+ }
global_step250000/layer_00-model_00-model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2bca85f6870b79e37cd5b6d2f57a2d3ea4fa830d44097ba8a14f243a8e1593b3
3
+ size 810025908
global_step250000/layer_02-model_00-model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:df797b578efa8eabf9de6e3a4cd5868e3787495854a7637264f7c94625fdeb5c
3
+ size 810671102
global_step250000/layer_03-model_00-model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45f1ab9920495ce4893eb0f7d049c2ccef14a518136e3b9966d3e724dc307d88
3
+ size 810671102
global_step250000/layer_04-model_00-model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d96d548266423cf2de6433fda3cf2b9ee2d26c9ac47172c87aac9e06943a102e
3
+ size 810671102
global_step250000/layer_05-model_00-model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:104d2211b0b084d1aee3079bf0847e3758d1b48630460410909028d280b58b3e
3
+ size 810671102
global_step250000/layer_06-model_00-model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cabcc4fa3268a0ab44d55f4fedaa6be78bd4051202b73b2805727c3aec125f69
3
+ size 810671102
global_step250000/layer_07-model_00-model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ebb7f6cb0f66a6b433c1e9a7b930f7752501a269f0f8cef062b66d2f210dc979
3
+ size 810671102
global_step250000/layer_08-model_00-model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:83ce8cef87435ad429ef72398e9286d24f27c352467c7b8ad89f0b4e103f9f92
3
+ size 810671102
global_step250000/layer_09-model_00-model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f7622c4a8bb123810133813d9643319fd933b58783fe0520167f7da8fc59ff04
3
+ size 810671102
global_step250000/layer_10-model_00-model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:12f2b5e20fafdb6ab8e3b52f4c70ed8f1d82e2fbe856cf1705aa7cec0965b56f
3
+ size 810671102
global_step250000/layer_11-model_00-model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:352c33d8e2be3a8bc5de14c41ec4333a0cca697c51076d65a204f2b11179fcc8
3
+ size 810671102
global_step250000/layer_12-model_00-model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:59efb776df07eae9132eb99481e8e672f0844392f980f2ae97fa23bb014e66ef
3
+ size 810671102
global_step250000/layer_13-model_00-model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a5d4f9e51c5f3c9689ee0872554877a6dd1d5ea5747df3651ca8a9ff0348d26b
3
+ size 810671102
global_step250000/layer_14-model_00-model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e9e60cc873e6b019ba30159d629d31a9f8197973aa4dad548a77e67793e2f43
3
+ size 810671102
global_step250000/layer_15-model_00-model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:39fff6ab89f61e6670b14b47220b11a2c9d038ec3d364eabc226f3180cd06119
3
+ size 810671102
global_step250000/layer_16-model_00-model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:65eb63736bb059d86d758b30d94902830f3dd05e45311a8cfdbfd5b58d9c5875
3
+ size 810671102
global_step250000/layer_17-model_00-model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b437ce62b339aecaf12ffd57d60724900fbe2bb68c3b8dc16f4b4000924b19c
3
+ size 810671102
global_step250000/layer_18-model_00-model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:28c87474ee6f1d10b39683296596a5c89ca507086bba52e2ee5d82e0462c3446
3
+ size 810671102
global_step250000/layer_19-model_00-model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:430bff2f98c76fd87cee1a5d2537c06e87a262820bc184709026bcb37ef27b21
3
+ size 810671102
global_step250000/layer_20-model_00-model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:58d60d621f58c3f16968590381700de79d08fca1e1ddcd9fd1cf98fff7ea9c95
3
+ size 810671102
global_step250000/layer_21-model_00-model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f3a1b44ba860988ed6c2d6e94d2cd42707a6db44d592419180fa9fe8fff2d293
3
+ size 810671102
global_step250000/layer_22-model_00-model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c0a1e79006a96715bbcc04440400d1ac9c5cbe74e990af815e4beb1575cc519d
3
+ size 810671102
global_step250000/layer_23-model_00-model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d28caa97642b8db432ee04df21c779b823fc58bcced1016cfd40d5592b5e144b
3
+ size 810671102
global_step250000/layer_24-model_00-model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6cb405a3b4ddb343ddfdb13f01916974bc3ac22cde057ad61868afc5d50d7fe5
3
+ size 810671102
global_step250000/layer_25-model_00-model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ccf890e94fcf2cf20e04ae603591fe03e0139f5d51571ecf1da1868a6dda873b
3
+ size 810671102
global_step250000/layer_27-model_00-model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:af885dc8fdb4b1057b29c4de949722e9280061da4f7c106d9ca0a370e2aa896e
3
+ size 644020
global_step250000/layer_28-model_00-model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:19854635784b991c7c08756d4d615d1703e918d11e80c25faea57cec7f713da2
3
+ size 810025844
global_step250000/mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a0f6bc8e3befff26762c7966d090064e692f5c16d136bbb8c07dcd25e86074e5
3
+ size 4864026812
latest ADDED
@@ -0,0 +1 @@
 
1
+ global_step250000