22
Browse files- checkpoint-9000 +1 -0
- ds_config.json.not +50 -0
- ds_config.json.orig +50 -0
- pytorch_model.bin +1 -1
- run.sh +3 -3
- runs/Dec22_09-38-38_129-146-176-120/events.out.tfevents.1671701963.129-146-176-120.830862.0 +2 -2
- runs/Dec22_14-10-41_129-146-176-120/1671718288.5083408/events.out.tfevents.1671718288.129-146-176-120.835630.1 +3 -0
- runs/Dec22_14-10-41_129-146-176-120/events.out.tfevents.1671718288.129-146-176-120.835630.0 +3 -0
- runs/Dec22_14-15-43_129-146-176-120/1671718590.3176517/events.out.tfevents.1671718590.129-146-176-120.836086.1 +3 -0
- runs/Dec22_14-15-43_129-146-176-120/events.out.tfevents.1671718590.129-146-176-120.836086.0 +3 -0
- runs/Dec22_14-19-53_129-146-176-120/1671718840.647013/events.out.tfevents.1671718840.129-146-176-120.836864.1 +3 -0
- runs/Dec22_14-19-53_129-146-176-120/events.out.tfevents.1671718840.129-146-176-120.836864.0 +3 -0
- runs/Dec22_14-21-56_129-146-176-120/1671718963.776668/events.out.tfevents.1671718963.129-146-176-120.837337.1 +3 -0
- runs/Dec22_14-21-56_129-146-176-120/events.out.tfevents.1671718963.129-146-176-120.837337.0 +3 -0
- runs/Dec22_16-56-17_129-146-176-120/1671728230.4621089/events.out.tfevents.1671728230.129-146-176-120.840536.1 +3 -0
- runs/Dec22_16-56-17_129-146-176-120/events.out.tfevents.1671728230.129-146-176-120.840536.0 +3 -0
- runs/Dec22_17-04-43_129-146-176-120/1671728738.1289852/events.out.tfevents.1671728738.129-146-176-120.841203.1 +3 -0
- runs/Dec22_17-04-43_129-146-176-120/events.out.tfevents.1671728738.129-146-176-120.841203.0 +3 -0
- runs/Dec22_17-37-10_129-146-176-120/1671730676.7038546/events.out.tfevents.1671730676.129-146-176-120.841810.1 +3 -0
- runs/Dec22_17-37-10_129-146-176-120/events.out.tfevents.1671730676.129-146-176-120.841810.0 +3 -0
- training_args.bin +1 -1
checkpoint-9000
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
checkpoint-12000
|
ds_config.json.not
ADDED
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"fp16": {
|
3 |
+
"enabled": "auto",
|
4 |
+
"loss_scale": 0,
|
5 |
+
"loss_scale_window": 1000,
|
6 |
+
"initial_scale_power": 16,
|
7 |
+
"hysteresis": 2,
|
8 |
+
"min_loss_scale": 1
|
9 |
+
},
|
10 |
+
|
11 |
+
"optimizer": {
|
12 |
+
"type": "AdamW",
|
13 |
+
"params": {
|
14 |
+
"lr": "auto",
|
15 |
+
"betas": "auto",
|
16 |
+
"eps": "auto",
|
17 |
+
"weight_decay": "auto"
|
18 |
+
}
|
19 |
+
},
|
20 |
+
|
21 |
+
"scheduler": {
|
22 |
+
"type": "OneCycle",
|
23 |
+
"params": {
|
24 |
+
"last_batch_iteration": -1,
|
25 |
+
"total_num_steps": "auto",
|
26 |
+
"warmup_min_lr": "auto",
|
27 |
+
"warmup_max_lr": "auto",
|
28 |
+
"warmup_num_steps": "auto"
|
29 |
+
}
|
30 |
+
},
|
31 |
+
|
32 |
+
"zero_optimization": {
|
33 |
+
"stage": 2,
|
34 |
+
"offload_optimizer": {
|
35 |
+
"device": "cpu",
|
36 |
+
"pin_memory": true
|
37 |
+
},
|
38 |
+
"allgather_partitions": true,
|
39 |
+
"allgather_bucket_size": 2e8,
|
40 |
+
"overlap_comm": true,
|
41 |
+
"reduce_scatter": true,
|
42 |
+
"reduce_bucket_size": 2e8,
|
43 |
+
"contiguous_gradients": true
|
44 |
+
},
|
45 |
+
|
46 |
+
"gradient_accumulation_steps": "auto",
|
47 |
+
"gradient_clipping": "auto",
|
48 |
+
"train_batch_size": "auto",
|
49 |
+
"train_micro_batch_size_per_gpu": "auto"
|
50 |
+
}
|
ds_config.json.orig
ADDED
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"fp16": {
|
3 |
+
"enabled": "auto",
|
4 |
+
"loss_scale": 0,
|
5 |
+
"loss_scale_window": 1000,
|
6 |
+
"initial_scale_power": 16,
|
7 |
+
"hysteresis": 2,
|
8 |
+
"min_loss_scale": 1
|
9 |
+
},
|
10 |
+
|
11 |
+
"optimizer": {
|
12 |
+
"type": "AdamW",
|
13 |
+
"params": {
|
14 |
+
"lr": "auto",
|
15 |
+
"betas": "auto",
|
16 |
+
"eps": "auto",
|
17 |
+
"weight_decay": "auto"
|
18 |
+
}
|
19 |
+
},
|
20 |
+
|
21 |
+
"scheduler": {
|
22 |
+
"type": "WarmupDecayLR",
|
23 |
+
"params": {
|
24 |
+
"last_batch_iteration": -1,
|
25 |
+
"total_num_steps": "auto",
|
26 |
+
"warmup_min_lr": "auto",
|
27 |
+
"warmup_max_lr": "auto",
|
28 |
+
"warmup_num_steps": "auto"
|
29 |
+
}
|
30 |
+
},
|
31 |
+
|
32 |
+
"zero_optimization": {
|
33 |
+
"stage": 2,
|
34 |
+
"offload_optimizer": {
|
35 |
+
"device": "cpu",
|
36 |
+
"pin_memory": true
|
37 |
+
},
|
38 |
+
"allgather_partitions": true,
|
39 |
+
"allgather_bucket_size": 2e8,
|
40 |
+
"overlap_comm": true,
|
41 |
+
"reduce_scatter": true,
|
42 |
+
"reduce_bucket_size": 2e8,
|
43 |
+
"contiguous_gradients": true
|
44 |
+
},
|
45 |
+
|
46 |
+
"gradient_accumulation_steps": "auto",
|
47 |
+
"gradient_clipping": "auto",
|
48 |
+
"train_batch_size": "auto",
|
49 |
+
"train_micro_batch_size_per_gpu": "auto"
|
50 |
+
}
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1527847357
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:da3cc32424000ff954bf49215879ed2e1a0d4eaab55388c0687d6ddcca9269e4
|
3 |
size 1527847357
|
run.sh
CHANGED
@@ -10,13 +10,13 @@ deepspeed run-ba.py \
|
|
10 |
--text_column_name="sentence" \
|
11 |
--streaming="False" \
|
12 |
--ignore_data_skip \
|
13 |
-
--resume_from_checkpoint="checkpoint-
|
14 |
--torch_compile="True" \
|
15 |
--torch_compile_mode="reduce-overhead" \
|
16 |
--torch_compile_mode="max-autotune" \
|
17 |
--logging_steps="25" \
|
18 |
-
--learning_rate="
|
19 |
-
--max_steps="
|
20 |
--output_dir="./" \
|
21 |
--per_device_train_batch_size="32" \
|
22 |
--gradient_accumulation_steps="1" \
|
|
|
10 |
--text_column_name="sentence" \
|
11 |
--streaming="False" \
|
12 |
--ignore_data_skip \
|
13 |
+
--resume_from_checkpoint="checkpoint-19000" \
|
14 |
--torch_compile="True" \
|
15 |
--torch_compile_mode="reduce-overhead" \
|
16 |
--torch_compile_mode="max-autotune" \
|
17 |
--logging_steps="25" \
|
18 |
+
--learning_rate="3e-6" \
|
19 |
+
--max_steps="19000" \
|
20 |
--output_dir="./" \
|
21 |
--per_device_train_batch_size="32" \
|
22 |
--gradient_accumulation_steps="1" \
|
runs/Dec22_09-38-38_129-146-176-120/events.out.tfevents.1671701963.129-146-176-120.830862.0
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7a0ec69676229e7bc70c0d6e6c94e611da3c7a90c0c1fa9747a91d9e37a4d74c
|
3 |
+
size 32037
|
runs/Dec22_14-10-41_129-146-176-120/1671718288.5083408/events.out.tfevents.1671718288.129-146-176-120.835630.1
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9f4f6f2a4beb435019093dbcd4c8abb32e696efe2dff30b9eaeb913c14416c12
|
3 |
+
size 5905
|
runs/Dec22_14-10-41_129-146-176-120/events.out.tfevents.1671718288.129-146-176-120.835630.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a513df02fbbacc5d41fbb60427a81d7c8c33d56bae2578bc969856641b3ea21a
|
3 |
+
size 4678
|
runs/Dec22_14-15-43_129-146-176-120/1671718590.3176517/events.out.tfevents.1671718590.129-146-176-120.836086.1
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:950a76771993ccc841ccffde75e5143e8b0cf5edc76fe70c4702c9b5ed6030ee
|
3 |
+
size 5905
|
runs/Dec22_14-15-43_129-146-176-120/events.out.tfevents.1671718590.129-146-176-120.836086.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f379df2d89232e9bb9dd50e2f178d9a94e83819c9f0f81ebe5b5ee6bf90219c1
|
3 |
+
size 4678
|
runs/Dec22_14-19-53_129-146-176-120/1671718840.647013/events.out.tfevents.1671718840.129-146-176-120.836864.1
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:828dc53a152bb05b8f428b4ab65624341c7d0f3116a9002121814941a0fcae10
|
3 |
+
size 5905
|
runs/Dec22_14-19-53_129-146-176-120/events.out.tfevents.1671718840.129-146-176-120.836864.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b685793e7fe2ed760f913188c41ae0f3610de9c5cb263d7f206ce162487bbb8c
|
3 |
+
size 4678
|
runs/Dec22_14-21-56_129-146-176-120/1671718963.776668/events.out.tfevents.1671718963.129-146-176-120.837337.1
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:94e58cedb662a8b9a2a8c9ef5c515126c3d6102d10f858353515ddca18667967
|
3 |
+
size 5905
|
runs/Dec22_14-21-56_129-146-176-120/events.out.tfevents.1671718963.129-146-176-120.837337.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a15d31a27be449c45bd148756805bec717a045e31ff0090abde0520ac266e2bd
|
3 |
+
size 11042
|
runs/Dec22_16-56-17_129-146-176-120/1671728230.4621089/events.out.tfevents.1671728230.129-146-176-120.840536.1
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3e86f74eabb164e217396adb3862926f83e2c8a446b10cb92fda92f0f9fc5532
|
3 |
+
size 5905
|
runs/Dec22_16-56-17_129-146-176-120/events.out.tfevents.1671728230.129-146-176-120.840536.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e264c48998d67d6ca5bca594f64f055d87f2a97102c8d89fd4b70a7c9db61850
|
3 |
+
size 4638
|
runs/Dec22_17-04-43_129-146-176-120/1671728738.1289852/events.out.tfevents.1671728738.129-146-176-120.841203.1
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ce53ac1d02a6acf4997476350f245b82bee6f7f26233d548a4a3a5085713ae3a
|
3 |
+
size 5905
|
runs/Dec22_17-04-43_129-146-176-120/events.out.tfevents.1671728738.129-146-176-120.841203.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4fe60c4ccd276569fd01d5fa92ffc2a944bec97d261b1125b67113834c9996b3
|
3 |
+
size 7357
|
runs/Dec22_17-37-10_129-146-176-120/1671730676.7038546/events.out.tfevents.1671730676.129-146-176-120.841810.1
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:895cbc5c6787d31e9e56c0417e517f2e43c1f63caea7e530091f1a1fe6482c68
|
3 |
+
size 5905
|
runs/Dec22_17-37-10_129-146-176-120/events.out.tfevents.1671730676.129-146-176-120.841810.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7e0820296075c022e4d6cd6f4a652cbfcd2060afb34384706c6a0e277cbf55ba
|
3 |
+
size 4677
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4795
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bdea1c10d1f1ed110f88766aa9ad66df2cba1add4701d0d9538b9250ae9331e7
|
3 |
size 4795
|