Commit
β’
dd5b800
1
Parent(s):
42d082a
Training in progress, step 1600
Browse files- checkpoint-1200/latest +0 -1
- {checkpoint-1200 β checkpoint-1600}/config.json +0 -0
- {checkpoint-1200 β checkpoint-1600}/generation_config.json +0 -0
- {checkpoint-1200/global_step1200 β checkpoint-1600/global_step1600}/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +1 -1
- {checkpoint-1200/global_step1200 β checkpoint-1600/global_step1600}/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +1 -1
- {checkpoint-1200/global_step1200 β checkpoint-1600/global_step1600}/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +1 -1
- {checkpoint-1200/global_step1200 β checkpoint-1600/global_step1600}/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +1 -1
- {checkpoint-1200/global_step1200 β checkpoint-1600/global_step1600}/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +1 -1
- {checkpoint-1200/global_step1200 β checkpoint-1600/global_step1600}/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +1 -1
- {checkpoint-1200/global_step1200 β checkpoint-1600/global_step1600}/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +1 -1
- {checkpoint-1200/global_step1200 β checkpoint-1600/global_step1600}/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +1 -1
- {checkpoint-1200/global_step1200 β checkpoint-1600/global_step1600}/zero_pp_rank_0_mp_rank_00_model_states.pt +1 -1
- {checkpoint-1200/global_step1200 β checkpoint-1600/global_step1600}/zero_pp_rank_1_mp_rank_00_model_states.pt +1 -1
- {checkpoint-1200/global_step1200 β checkpoint-1600/global_step1600}/zero_pp_rank_2_mp_rank_00_model_states.pt +1 -1
- {checkpoint-1200/global_step1200 β checkpoint-1600/global_step1600}/zero_pp_rank_3_mp_rank_00_model_states.pt +1 -1
- {checkpoint-1200/global_step1200 β checkpoint-1600/global_step1600}/zero_pp_rank_4_mp_rank_00_model_states.pt +1 -1
- {checkpoint-1200/global_step1200 β checkpoint-1600/global_step1600}/zero_pp_rank_5_mp_rank_00_model_states.pt +1 -1
- {checkpoint-1200/global_step1200 β checkpoint-1600/global_step1600}/zero_pp_rank_6_mp_rank_00_model_states.pt +1 -1
- {checkpoint-1200/global_step1200 β checkpoint-1600/global_step1600}/zero_pp_rank_7_mp_rank_00_model_states.pt +1 -1
- checkpoint-1600/latest +1 -0
- {checkpoint-1200 β checkpoint-1600}/model-00001-of-00002.safetensors +1 -1
- {checkpoint-1200 β checkpoint-1600}/model-00002-of-00002.safetensors +1 -1
- {checkpoint-1200 β checkpoint-1600}/model.safetensors.index.json +0 -0
- {checkpoint-1200 β checkpoint-1600}/rng_state_0.pth +0 -0
- {checkpoint-1200 β checkpoint-1600}/rng_state_1.pth +0 -0
- {checkpoint-1200 β checkpoint-1600}/rng_state_2.pth +0 -0
- {checkpoint-1200 β checkpoint-1600}/rng_state_3.pth +0 -0
- {checkpoint-1200 β checkpoint-1600}/rng_state_4.pth +0 -0
- {checkpoint-1200 β checkpoint-1600}/rng_state_5.pth +0 -0
- {checkpoint-1200 β checkpoint-1600}/rng_state_6.pth +0 -0
- {checkpoint-1200 β checkpoint-1600}/rng_state_7.pth +0 -0
- {checkpoint-1200 β checkpoint-1600}/special_tokens_map.json +0 -0
- {checkpoint-1200 β checkpoint-1600}/tokenizer.json +0 -0
- {checkpoint-1200 β checkpoint-1600}/tokenizer.model +0 -0
- {checkpoint-1200 β checkpoint-1600}/tokenizer_config.json +0 -0
- {checkpoint-1200 β checkpoint-1600}/trainer_state.json +243 -3
- {checkpoint-1200 β checkpoint-1600}/training_args.bin +0 -0
- {checkpoint-1200 β checkpoint-1600}/zero_to_fp32.py +0 -0
- runs/Aug22_18-42-03_ip-26-0-150-12/events.out.tfevents.1692729850.ip-26-0-150-12.2895584.0 +2 -2
checkpoint-1200/latest
DELETED
@@ -1 +0,0 @@
|
|
1 |
-
global_step1200
|
|
|
|
{checkpoint-1200 β checkpoint-1600}/config.json
RENAMED
File without changes
|
{checkpoint-1200 β checkpoint-1600}/generation_config.json
RENAMED
File without changes
|
{checkpoint-1200/global_step1200 β checkpoint-1600/global_step1600}/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 10107626487
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1a340884351df9f228d3ba5317543b112e21edc4d1572228a4abd7118e419a6b
|
3 |
size 10107626487
|
{checkpoint-1200/global_step1200 β checkpoint-1600/global_step1600}/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 10107626487
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:27f75017ef2953071a44cde3f1155976fb76dde43b6395726f2683ee1ec2c250
|
3 |
size 10107626487
|
{checkpoint-1200/global_step1200 β checkpoint-1600/global_step1600}/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 10107626487
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fdfad233cd4ea82dc4f02a8e5b074984bb6a29a18f6262838ec7b7f1630e0ac8
|
3 |
size 10107626487
|
{checkpoint-1200/global_step1200 β checkpoint-1600/global_step1600}/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 10107626487
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5919bc8e571e808ff3b2bcb9ad7014597b2ee31e29993555c14491a33d11f095
|
3 |
size 10107626487
|
{checkpoint-1200/global_step1200 β checkpoint-1600/global_step1600}/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 10107626487
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8c354eb1ceeeb29fb3a5f70328aa4850898429dd56b0f49cfb1b15ac4000f975
|
3 |
size 10107626487
|
{checkpoint-1200/global_step1200 β checkpoint-1600/global_step1600}/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 10107626487
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fead0347839e08434f2eed3165dad3e70691836ea6e9cf64ef56551331997bca
|
3 |
size 10107626487
|
{checkpoint-1200/global_step1200 β checkpoint-1600/global_step1600}/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 10107626487
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f8ffe9fbb7a126ec39112724337caecc4eae7d8e492d63490d8567816ba07929
|
3 |
size 10107626487
|
{checkpoint-1200/global_step1200 β checkpoint-1600/global_step1600}/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 10107626487
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:96e21de250ee92a08fe926befd4f33f363a7c9bd7ca8bbb6aef12bce9df04133
|
3 |
size 10107626487
|
{checkpoint-1200/global_step1200 β checkpoint-1600/global_step1600}/zero_pp_rank_0_mp_rank_00_model_states.pt
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 168086
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:91599707db75d7b03814882cbcbb8b854875d9e88102c0831463b1e42ed49ab1
|
3 |
size 168086
|
{checkpoint-1200/global_step1200 β checkpoint-1600/global_step1600}/zero_pp_rank_1_mp_rank_00_model_states.pt
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 168086
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6831020de473339b10b00488798cd0193af1763454bfa118f482faf07d70a44d
|
3 |
size 168086
|
{checkpoint-1200/global_step1200 β checkpoint-1600/global_step1600}/zero_pp_rank_2_mp_rank_00_model_states.pt
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 168086
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ddb9abf6f8ac61756727e9fcc585fd5a1a70c1a20ca3eb82c79388ae9fe06779
|
3 |
size 168086
|
{checkpoint-1200/global_step1200 β checkpoint-1600/global_step1600}/zero_pp_rank_3_mp_rank_00_model_states.pt
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 168086
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a24b271888c79b57824d2821f84a48dcb6381041a8b0c6c06463ebd491bb032d
|
3 |
size 168086
|
{checkpoint-1200/global_step1200 β checkpoint-1600/global_step1600}/zero_pp_rank_4_mp_rank_00_model_states.pt
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 168086
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:93af96588d797250ee8a47897e3e963aa391dabb815f6714e949496001e60f03
|
3 |
size 168086
|
{checkpoint-1200/global_step1200 β checkpoint-1600/global_step1600}/zero_pp_rank_5_mp_rank_00_model_states.pt
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 168086
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:288e31b1bddda038ec9427bc5bb7b953b1d58f1e7c11dbbf18e74dc37512d377
|
3 |
size 168086
|
{checkpoint-1200/global_step1200 β checkpoint-1600/global_step1600}/zero_pp_rank_6_mp_rank_00_model_states.pt
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 168086
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bab9e0ea1b5660b9e2221c0e11c6a20f979a0b1641b3514c45386900d1001699
|
3 |
size 168086
|
{checkpoint-1200/global_step1200 β checkpoint-1600/global_step1600}/zero_pp_rank_7_mp_rank_00_model_states.pt
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 168086
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:122e850bb9d0ec838955d8d4d6eabbb7c4437e56754813e6645e6ab2a32d64c8
|
3 |
size 168086
|
checkpoint-1600/latest
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
global_step1600
|
{checkpoint-1200 β checkpoint-1600}/model-00001-of-00002.safetensors
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 9976576392
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1bb8059dad24ddb21134712cb2d7c09e74afdb9c0ad8407d026104190bec0ae3
|
3 |
size 9976576392
|
{checkpoint-1200 β checkpoint-1600}/model-00002-of-00002.safetensors
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3500296504
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bf47ea3281e028d797e91e4208dafdd29c9912946218b26b9fc740a42513cda2
|
3 |
size 3500296504
|
{checkpoint-1200 β checkpoint-1600}/model.safetensors.index.json
RENAMED
File without changes
|
{checkpoint-1200 β checkpoint-1600}/rng_state_0.pth
RENAMED
File without changes
|
{checkpoint-1200 β checkpoint-1600}/rng_state_1.pth
RENAMED
File without changes
|
{checkpoint-1200 β checkpoint-1600}/rng_state_2.pth
RENAMED
File without changes
|
{checkpoint-1200 β checkpoint-1600}/rng_state_3.pth
RENAMED
File without changes
|
{checkpoint-1200 β checkpoint-1600}/rng_state_4.pth
RENAMED
File without changes
|
{checkpoint-1200 β checkpoint-1600}/rng_state_5.pth
RENAMED
File without changes
|
{checkpoint-1200 β checkpoint-1600}/rng_state_6.pth
RENAMED
File without changes
|
{checkpoint-1200 β checkpoint-1600}/rng_state_7.pth
RENAMED
File without changes
|
{checkpoint-1200 β checkpoint-1600}/special_tokens_map.json
RENAMED
File without changes
|
{checkpoint-1200 β checkpoint-1600}/tokenizer.json
RENAMED
File without changes
|
{checkpoint-1200 β checkpoint-1600}/tokenizer.model
RENAMED
File without changes
|
{checkpoint-1200 β checkpoint-1600}/tokenizer_config.json
RENAMED
File without changes
|
{checkpoint-1200 β checkpoint-1600}/trainer_state.json
RENAMED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -726,11 +726,251 @@
|
|
726 |
"learning_rate": 0.0003,
|
727 |
"loss": 0.3855,
|
728 |
"step": 1200
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
729 |
}
|
730 |
],
|
731 |
"max_steps": 3000,
|
732 |
"num_train_epochs": 9223372036854775807,
|
733 |
-
"total_flos":
|
734 |
"trial_name": null,
|
735 |
"trial_params": null
|
736 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.5333333333333333,
|
5 |
+
"global_step": 1600,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
726 |
"learning_rate": 0.0003,
|
727 |
"loss": 0.3855,
|
728 |
"step": 1200
|
729 |
+
},
|
730 |
+
{
|
731 |
+
"epoch": 0.4,
|
732 |
+
"learning_rate": 0.0003,
|
733 |
+
"loss": 0.3662,
|
734 |
+
"step": 1210
|
735 |
+
},
|
736 |
+
{
|
737 |
+
"epoch": 0.41,
|
738 |
+
"learning_rate": 0.0003,
|
739 |
+
"loss": 0.3727,
|
740 |
+
"step": 1220
|
741 |
+
},
|
742 |
+
{
|
743 |
+
"epoch": 0.41,
|
744 |
+
"learning_rate": 0.0003,
|
745 |
+
"loss": 0.3563,
|
746 |
+
"step": 1230
|
747 |
+
},
|
748 |
+
{
|
749 |
+
"epoch": 0.41,
|
750 |
+
"learning_rate": 0.0003,
|
751 |
+
"loss": 0.3297,
|
752 |
+
"step": 1240
|
753 |
+
},
|
754 |
+
{
|
755 |
+
"epoch": 0.42,
|
756 |
+
"learning_rate": 0.0003,
|
757 |
+
"loss": 0.314,
|
758 |
+
"step": 1250
|
759 |
+
},
|
760 |
+
{
|
761 |
+
"epoch": 0.42,
|
762 |
+
"learning_rate": 0.0003,
|
763 |
+
"loss": 0.2996,
|
764 |
+
"step": 1260
|
765 |
+
},
|
766 |
+
{
|
767 |
+
"epoch": 0.42,
|
768 |
+
"learning_rate": 0.0003,
|
769 |
+
"loss": 0.3,
|
770 |
+
"step": 1270
|
771 |
+
},
|
772 |
+
{
|
773 |
+
"epoch": 0.43,
|
774 |
+
"learning_rate": 0.0003,
|
775 |
+
"loss": 0.2773,
|
776 |
+
"step": 1280
|
777 |
+
},
|
778 |
+
{
|
779 |
+
"epoch": 0.43,
|
780 |
+
"learning_rate": 0.0003,
|
781 |
+
"loss": 0.2665,
|
782 |
+
"step": 1290
|
783 |
+
},
|
784 |
+
{
|
785 |
+
"epoch": 0.43,
|
786 |
+
"learning_rate": 0.0003,
|
787 |
+
"loss": 0.2646,
|
788 |
+
"step": 1300
|
789 |
+
},
|
790 |
+
{
|
791 |
+
"epoch": 0.44,
|
792 |
+
"learning_rate": 0.0003,
|
793 |
+
"loss": 0.2406,
|
794 |
+
"step": 1310
|
795 |
+
},
|
796 |
+
{
|
797 |
+
"epoch": 0.44,
|
798 |
+
"learning_rate": 0.0003,
|
799 |
+
"loss": 0.2206,
|
800 |
+
"step": 1320
|
801 |
+
},
|
802 |
+
{
|
803 |
+
"epoch": 0.44,
|
804 |
+
"learning_rate": 0.0003,
|
805 |
+
"loss": 0.2325,
|
806 |
+
"step": 1330
|
807 |
+
},
|
808 |
+
{
|
809 |
+
"epoch": 0.45,
|
810 |
+
"learning_rate": 0.0003,
|
811 |
+
"loss": 0.2152,
|
812 |
+
"step": 1340
|
813 |
+
},
|
814 |
+
{
|
815 |
+
"epoch": 0.45,
|
816 |
+
"learning_rate": 0.0003,
|
817 |
+
"loss": 0.2011,
|
818 |
+
"step": 1350
|
819 |
+
},
|
820 |
+
{
|
821 |
+
"epoch": 0.45,
|
822 |
+
"learning_rate": 0.0003,
|
823 |
+
"loss": 0.1953,
|
824 |
+
"step": 1360
|
825 |
+
},
|
826 |
+
{
|
827 |
+
"epoch": 0.46,
|
828 |
+
"learning_rate": 0.0003,
|
829 |
+
"loss": 0.1862,
|
830 |
+
"step": 1370
|
831 |
+
},
|
832 |
+
{
|
833 |
+
"epoch": 0.46,
|
834 |
+
"learning_rate": 0.0003,
|
835 |
+
"loss": 0.1814,
|
836 |
+
"step": 1380
|
837 |
+
},
|
838 |
+
{
|
839 |
+
"epoch": 0.46,
|
840 |
+
"learning_rate": 0.0003,
|
841 |
+
"loss": 0.1677,
|
842 |
+
"step": 1390
|
843 |
+
},
|
844 |
+
{
|
845 |
+
"epoch": 0.47,
|
846 |
+
"learning_rate": 0.0003,
|
847 |
+
"loss": 0.1657,
|
848 |
+
"step": 1400
|
849 |
+
},
|
850 |
+
{
|
851 |
+
"epoch": 0.47,
|
852 |
+
"learning_rate": 0.0003,
|
853 |
+
"loss": 0.155,
|
854 |
+
"step": 1410
|
855 |
+
},
|
856 |
+
{
|
857 |
+
"epoch": 0.47,
|
858 |
+
"learning_rate": 0.0003,
|
859 |
+
"loss": 0.1517,
|
860 |
+
"step": 1420
|
861 |
+
},
|
862 |
+
{
|
863 |
+
"epoch": 0.48,
|
864 |
+
"learning_rate": 0.0003,
|
865 |
+
"loss": 0.1481,
|
866 |
+
"step": 1430
|
867 |
+
},
|
868 |
+
{
|
869 |
+
"epoch": 0.48,
|
870 |
+
"learning_rate": 0.0003,
|
871 |
+
"loss": 0.1396,
|
872 |
+
"step": 1440
|
873 |
+
},
|
874 |
+
{
|
875 |
+
"epoch": 0.48,
|
876 |
+
"learning_rate": 0.0003,
|
877 |
+
"loss": 0.1301,
|
878 |
+
"step": 1450
|
879 |
+
},
|
880 |
+
{
|
881 |
+
"epoch": 0.49,
|
882 |
+
"learning_rate": 0.0003,
|
883 |
+
"loss": 0.1274,
|
884 |
+
"step": 1460
|
885 |
+
},
|
886 |
+
{
|
887 |
+
"epoch": 0.49,
|
888 |
+
"learning_rate": 0.0003,
|
889 |
+
"loss": 0.1244,
|
890 |
+
"step": 1470
|
891 |
+
},
|
892 |
+
{
|
893 |
+
"epoch": 0.49,
|
894 |
+
"learning_rate": 0.0003,
|
895 |
+
"loss": 0.1172,
|
896 |
+
"step": 1480
|
897 |
+
},
|
898 |
+
{
|
899 |
+
"epoch": 0.5,
|
900 |
+
"learning_rate": 0.0003,
|
901 |
+
"loss": 0.1177,
|
902 |
+
"step": 1490
|
903 |
+
},
|
904 |
+
{
|
905 |
+
"epoch": 0.5,
|
906 |
+
"learning_rate": 0.0003,
|
907 |
+
"loss": 0.1118,
|
908 |
+
"step": 1500
|
909 |
+
},
|
910 |
+
{
|
911 |
+
"epoch": 0.5,
|
912 |
+
"learning_rate": 0.0003,
|
913 |
+
"loss": 0.113,
|
914 |
+
"step": 1510
|
915 |
+
},
|
916 |
+
{
|
917 |
+
"epoch": 0.51,
|
918 |
+
"learning_rate": 0.0003,
|
919 |
+
"loss": 0.3687,
|
920 |
+
"step": 1520
|
921 |
+
},
|
922 |
+
{
|
923 |
+
"epoch": 0.51,
|
924 |
+
"learning_rate": 0.0003,
|
925 |
+
"loss": 0.1801,
|
926 |
+
"step": 1530
|
927 |
+
},
|
928 |
+
{
|
929 |
+
"epoch": 0.51,
|
930 |
+
"learning_rate": 0.0003,
|
931 |
+
"loss": 0.1534,
|
932 |
+
"step": 1540
|
933 |
+
},
|
934 |
+
{
|
935 |
+
"epoch": 0.52,
|
936 |
+
"learning_rate": 0.0003,
|
937 |
+
"loss": 0.1248,
|
938 |
+
"step": 1550
|
939 |
+
},
|
940 |
+
{
|
941 |
+
"epoch": 0.52,
|
942 |
+
"learning_rate": 0.0003,
|
943 |
+
"loss": 0.1091,
|
944 |
+
"step": 1560
|
945 |
+
},
|
946 |
+
{
|
947 |
+
"epoch": 0.52,
|
948 |
+
"learning_rate": 0.0003,
|
949 |
+
"loss": 0.0965,
|
950 |
+
"step": 1570
|
951 |
+
},
|
952 |
+
{
|
953 |
+
"epoch": 0.53,
|
954 |
+
"learning_rate": 0.0003,
|
955 |
+
"loss": 0.098,
|
956 |
+
"step": 1580
|
957 |
+
},
|
958 |
+
{
|
959 |
+
"epoch": 0.53,
|
960 |
+
"learning_rate": 0.0003,
|
961 |
+
"loss": 0.097,
|
962 |
+
"step": 1590
|
963 |
+
},
|
964 |
+
{
|
965 |
+
"epoch": 0.53,
|
966 |
+
"learning_rate": 0.0003,
|
967 |
+
"loss": 0.0885,
|
968 |
+
"step": 1600
|
969 |
}
|
970 |
],
|
971 |
"max_steps": 3000,
|
972 |
"num_train_epochs": 9223372036854775807,
|
973 |
+
"total_flos": 670014898176000.0,
|
974 |
"trial_name": null,
|
975 |
"trial_params": null
|
976 |
}
|
{checkpoint-1200 β checkpoint-1600}/training_args.bin
RENAMED
File without changes
|
{checkpoint-1200 β checkpoint-1600}/zero_to_fp32.py
RENAMED
File without changes
|
runs/Aug22_18-42-03_ip-26-0-150-12/events.out.tfevents.1692729850.ip-26-0-150-12.2895584.0
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:677d38e5d0f2898fe52eabe578b18234ab0985e274317f79127ce656648b82b9
|
3 |
+
size 29271
|