diff --git a/checkpoint-1000/optimizer.pt b/checkpoint-1000/optimizer.pt index a05db8f40eb682bd9f5b40e6e3d9e97393fea1d9..4d34cd5deea4ee5a5b13328aca44db6567030b26 100644 --- a/checkpoint-1000/optimizer.pt +++ b/checkpoint-1000/optimizer.pt @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:cc82743274985f20435c1a3f07714d1d9c5d49fd3f939e47d1db0139e53c0160 +oid sha256:40044f58ba6e4ad02a3afa257490cb1756f235f72ce0587d739f1457a50fd940 size 995605189 diff --git a/checkpoint-1000/pytorch_model.bin b/checkpoint-1000/pytorch_model.bin index 5cc606105d8f4897790c93c8ca61367869cb8c43..137143cd9e7b1f768aab4ee60d6f1f96dc5a8d50 100644 --- a/checkpoint-1000/pytorch_model.bin +++ b/checkpoint-1000/pytorch_model.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b27730d57f0da00683c2b12bd176295a9dbeb119e402e80445699aa0caac51e8 +oid sha256:577150ca55e27834c0a07cb22cf121e9dd81fff56de0a9eaf1751f8d42cc9931 size 497807197 diff --git a/checkpoint-1000/rng_state.pth b/checkpoint-1000/rng_state.pth index dccaa4032dc44ada0ca7c72db274d2b579fe7367..b8a8d616f57a393cd3130b9be8c8218978f537ac 100644 --- a/checkpoint-1000/rng_state.pth +++ b/checkpoint-1000/rng_state.pth @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:73543bf529463744a086bed7aa7a2369d7afaf931eb0765f938b912a54e0182f +oid sha256:a4e3efb199f5967e04c2f29974f5a4b60568298ec35d18c73cb2af6348f20a84 size 14575 diff --git a/checkpoint-1000/scheduler.pt b/checkpoint-1000/scheduler.pt index 30a266fb6c00d61a6df9b19fbee4308c1e07dee6..4d25bcee5f253037286c53234d36d6d5713c86f4 100644 --- a/checkpoint-1000/scheduler.pt +++ b/checkpoint-1000/scheduler.pt @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3003d262ae7e5e74c25925ba898d88b2fd6386924a71b1f0c91fba5b532b78f2 +oid sha256:3d3caf872438b5f86b8ac9cfbc93cd607ede756cfd2b6545eed4877e4009dac2 size 627 diff --git a/checkpoint-1000/trainer_state.json b/checkpoint-1000/trainer_state.json index 2f804f2509baab7f741f1be6b42a74ef7326ac61..d6dcc410a7341f1c14840e24401c30d312edd929 100644 --- a/checkpoint-1000/trainer_state.json +++ b/checkpoint-1000/trainer_state.json @@ -1,28 +1,28 @@ { "best_metric": null, "best_model_checkpoint": null, - "epoch": 38.46153846153846, + "epoch": 12.048192771084338, "global_step": 1000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { - "epoch": 19.23, - "learning_rate": 3.0769230769230774e-05, - "loss": 1.3827, + "epoch": 6.02, + "learning_rate": 4.3975903614457834e-05, + "loss": 1.4659, "step": 500 }, { - "epoch": 38.46, - "learning_rate": 1.153846153846154e-05, - "loss": 0.5306, + "epoch": 12.05, + "learning_rate": 3.7951807228915666e-05, + "loss": 0.9332, "step": 1000 } ], - "max_steps": 1300, + "max_steps": 4150, "num_train_epochs": 50, - "total_flos": 512654966784000.0, + "total_flos": 517096931328000.0, "trial_name": null, "trial_params": null } diff --git a/checkpoint-1000/training_args.bin b/checkpoint-1000/training_args.bin index 386ba1bc9e20be24282a24e62e19f3a471e2bded..a0e784c698917821554b9d758fb306a05972187a 100644 --- a/checkpoint-1000/training_args.bin +++ b/checkpoint-1000/training_args.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:daae856c1bd075ece296b326f62ba6652364b07db8daef89d6f2fbf3bd2bd41c +oid sha256:c714b36e7d24074058337bd366704620d82d3d6e9955157f5112b544294492db size 3963 diff --git a/checkpoint-1500/config.json b/checkpoint-1500/config.json new file mode 100644 index 0000000000000000000000000000000000000000..8305e9d38b1c596c7a37540423b631c2997bdcc9 --- /dev/null +++ b/checkpoint-1500/config.json @@ -0,0 +1,39 @@ +{ + "_name_or_path": "gpt2", + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 50256, + "embd_pdrop": 0.1, + "eos_token_id": 50256, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": false, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": false, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.1, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 50 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.31.0", + "use_cache": true, + "vocab_size": 50257 +} diff --git a/checkpoint-1500/generation_config.json b/checkpoint-1500/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..4b0a63041a006997073506e0cabadc754b24f557 --- /dev/null +++ b/checkpoint-1500/generation_config.json @@ -0,0 +1,6 @@ +{ + "_from_model_config": true, + "bos_token_id": 50256, + "eos_token_id": 50256, + "transformers_version": "4.31.0" +} diff --git a/checkpoint-1500/optimizer.pt b/checkpoint-1500/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..cffcbc102ab11682cc6542e81155a4ed4b752305 --- /dev/null +++ b/checkpoint-1500/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3508daf547ea7f7639a4fb24571a9b8f970238f6f697f19b0f1db2b9a98b29c6 +size 995605189 diff --git a/checkpoint-1500/pytorch_model.bin b/checkpoint-1500/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..eb77f1a7275634181814d1af284285e60b76a01a --- /dev/null +++ b/checkpoint-1500/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:06afd4a9f36e6f35427e7af9c5cccb7a9c72446582d5b8fbaa051f797663e154 +size 497807197 diff --git a/checkpoint-1500/rng_state.pth b/checkpoint-1500/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..1d4acbf1f7d1d9f78a92ed95e1490b688fcaa112 --- /dev/null +++ b/checkpoint-1500/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:30a541a95ca8532bf556cfdb7f8a3e1e420b0e7690ccb46c9bbe6564b3afaacb +size 14575 diff --git a/checkpoint-1500/scheduler.pt b/checkpoint-1500/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..875beb8bad416c8316604f0743b64de08f5bd4b9 --- /dev/null +++ b/checkpoint-1500/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c6b7284950f4953612836af1f56ea96facef57d66abf579d705f9e90f0c98a46 +size 627 diff --git a/checkpoint-1500/trainer_state.json b/checkpoint-1500/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..de23a6fc6e12f403aa36256a8aef562af43ad260 --- /dev/null +++ b/checkpoint-1500/trainer_state.json @@ -0,0 +1,34 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 18.072289156626507, + "global_step": 1500, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 6.02, + "learning_rate": 4.3975903614457834e-05, + "loss": 1.4659, + "step": 500 + }, + { + "epoch": 12.05, + "learning_rate": 3.7951807228915666e-05, + "loss": 0.9332, + "step": 1000 + }, + { + "epoch": 18.07, + "learning_rate": 3.192771084337349e-05, + "loss": 0.6382, + "step": 1500 + } + ], + "max_steps": 4150, + "num_train_epochs": 50, + "total_flos": 775645396992000.0, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-1500/training_args.bin b/checkpoint-1500/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..a0e784c698917821554b9d758fb306a05972187a --- /dev/null +++ b/checkpoint-1500/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c714b36e7d24074058337bd366704620d82d3d6e9955157f5112b544294492db +size 3963 diff --git a/checkpoint-2000/config.json b/checkpoint-2000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..8305e9d38b1c596c7a37540423b631c2997bdcc9 --- /dev/null +++ b/checkpoint-2000/config.json @@ -0,0 +1,39 @@ +{ + "_name_or_path": "gpt2", + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 50256, + "embd_pdrop": 0.1, + "eos_token_id": 50256, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": false, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": false, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.1, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 50 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.31.0", + "use_cache": true, + "vocab_size": 50257 +} diff --git a/checkpoint-2000/generation_config.json b/checkpoint-2000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..4b0a63041a006997073506e0cabadc754b24f557 --- /dev/null +++ b/checkpoint-2000/generation_config.json @@ -0,0 +1,6 @@ +{ + "_from_model_config": true, + "bos_token_id": 50256, + "eos_token_id": 50256, + "transformers_version": "4.31.0" +} diff --git a/checkpoint-2000/optimizer.pt b/checkpoint-2000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..d336baac104dec870e9c8bc91acb664fffd225d7 --- /dev/null +++ b/checkpoint-2000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88fb77e14f3f2bbf6601731efc849309e9ede701eed2cb0ef54a8294251af3cf +size 995605189 diff --git a/checkpoint-2000/pytorch_model.bin b/checkpoint-2000/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..4a7d6a1606f5f6e9bfb1d556288ab9c9c8c729bd --- /dev/null +++ b/checkpoint-2000/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23beb0a89f5348bb7af9be60296215311b3367fa345499c5ee0330d5ffd42a38 +size 497807197 diff --git a/checkpoint-2000/rng_state.pth b/checkpoint-2000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..d35410336bf10046f420b95184199820c7257a06 --- /dev/null +++ b/checkpoint-2000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:381ebe3e35df681c5618b3ab0df63f45938780152fc94545f3764f6b0192fcf7 +size 14575 diff --git a/checkpoint-2000/scheduler.pt b/checkpoint-2000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..381cfcce13f8e6c9b726eb5f99da0b9af9f14398 --- /dev/null +++ b/checkpoint-2000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:57b43099f81ffb6a9befdfae023d6369a7bd91bc547e0dc46fa50d4e069365d7 +size 627 diff --git a/checkpoint-2000/trainer_state.json b/checkpoint-2000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..acfa147d0e75457cb8c5d4e1739336cda37ae6c4 --- /dev/null +++ b/checkpoint-2000/trainer_state.json @@ -0,0 +1,40 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 24.096385542168676, + "global_step": 2000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 6.02, + "learning_rate": 4.3975903614457834e-05, + "loss": 1.4659, + "step": 500 + }, + { + "epoch": 12.05, + "learning_rate": 3.7951807228915666e-05, + "loss": 0.9332, + "step": 1000 + }, + { + "epoch": 18.07, + "learning_rate": 3.192771084337349e-05, + "loss": 0.6382, + "step": 1500 + }, + { + "epoch": 24.1, + "learning_rate": 2.5903614457831325e-05, + "loss": 0.4494, + "step": 2000 + } + ], + "max_steps": 4150, + "num_train_epochs": 50, + "total_flos": 1034193862656000.0, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-2000/training_args.bin b/checkpoint-2000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..a0e784c698917821554b9d758fb306a05972187a --- /dev/null +++ b/checkpoint-2000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c714b36e7d24074058337bd366704620d82d3d6e9955157f5112b544294492db +size 3963 diff --git a/checkpoint-2500/config.json b/checkpoint-2500/config.json new file mode 100644 index 0000000000000000000000000000000000000000..8305e9d38b1c596c7a37540423b631c2997bdcc9 --- /dev/null +++ b/checkpoint-2500/config.json @@ -0,0 +1,39 @@ +{ + "_name_or_path": "gpt2", + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 50256, + "embd_pdrop": 0.1, + "eos_token_id": 50256, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": false, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": false, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.1, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 50 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.31.0", + "use_cache": true, + "vocab_size": 50257 +} diff --git a/checkpoint-2500/generation_config.json b/checkpoint-2500/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..4b0a63041a006997073506e0cabadc754b24f557 --- /dev/null +++ b/checkpoint-2500/generation_config.json @@ -0,0 +1,6 @@ +{ + "_from_model_config": true, + "bos_token_id": 50256, + "eos_token_id": 50256, + "transformers_version": "4.31.0" +} diff --git a/checkpoint-2500/optimizer.pt b/checkpoint-2500/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..ccbd22c55beadf0cdb3c46862e3975c22cc0f63c --- /dev/null +++ b/checkpoint-2500/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a5371a17ee239fec7bf7d0c23e3435e00e76efc4d94b9d9941c8e57eea070784 +size 995605189 diff --git a/checkpoint-2500/pytorch_model.bin b/checkpoint-2500/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..7b425210f49f392dc0a8c8c8e46327ffa873e85a --- /dev/null +++ b/checkpoint-2500/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6abe995220e33a09e648c18d269c6d2ea69a77f7640fa5982efb6374fb9d983e +size 497807197 diff --git a/checkpoint-2500/rng_state.pth b/checkpoint-2500/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..d7586782f74904f7a9d8b30b6e350805ae6857b3 --- /dev/null +++ b/checkpoint-2500/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1520384ba472ff2f026dd4acfdd4ae2a72711e694cec5d27670d8b5709ac0c18 +size 14575 diff --git a/checkpoint-2500/scheduler.pt b/checkpoint-2500/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..8ebfe010ec4b02c0f4bf0e9492ca8fd9560ae994 --- /dev/null +++ b/checkpoint-2500/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:41014b3add9f861ce35d7c779356e164a56dc9ae7109e3d6ef3b1c10ef722681 +size 627 diff --git a/checkpoint-2500/trainer_state.json b/checkpoint-2500/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..5557d1933aa04ceba40aff0927ca40eb07e5b9dc --- /dev/null +++ b/checkpoint-2500/trainer_state.json @@ -0,0 +1,46 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 30.120481927710845, + "global_step": 2500, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 6.02, + "learning_rate": 4.3975903614457834e-05, + "loss": 1.4659, + "step": 500 + }, + { + "epoch": 12.05, + "learning_rate": 3.7951807228915666e-05, + "loss": 0.9332, + "step": 1000 + }, + { + "epoch": 18.07, + "learning_rate": 3.192771084337349e-05, + "loss": 0.6382, + "step": 1500 + }, + { + "epoch": 24.1, + "learning_rate": 2.5903614457831325e-05, + "loss": 0.4494, + "step": 2000 + }, + { + "epoch": 30.12, + "learning_rate": 1.9879518072289157e-05, + "loss": 0.3313, + "step": 2500 + } + ], + "max_steps": 4150, + "num_train_epochs": 50, + "total_flos": 1292742328320000.0, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-2500/training_args.bin b/checkpoint-2500/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..a0e784c698917821554b9d758fb306a05972187a --- /dev/null +++ b/checkpoint-2500/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c714b36e7d24074058337bd366704620d82d3d6e9955157f5112b544294492db +size 3963 diff --git a/checkpoint-3000/config.json b/checkpoint-3000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..8305e9d38b1c596c7a37540423b631c2997bdcc9 --- /dev/null +++ b/checkpoint-3000/config.json @@ -0,0 +1,39 @@ +{ + "_name_or_path": "gpt2", + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 50256, + "embd_pdrop": 0.1, + "eos_token_id": 50256, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": false, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": false, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.1, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 50 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.31.0", + "use_cache": true, + "vocab_size": 50257 +} diff --git a/checkpoint-3000/generation_config.json b/checkpoint-3000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..4b0a63041a006997073506e0cabadc754b24f557 --- /dev/null +++ b/checkpoint-3000/generation_config.json @@ -0,0 +1,6 @@ +{ + "_from_model_config": true, + "bos_token_id": 50256, + "eos_token_id": 50256, + "transformers_version": "4.31.0" +} diff --git a/checkpoint-3000/optimizer.pt b/checkpoint-3000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..e3551d237ff38a3ef1834774a82cfe64a0a87b28 --- /dev/null +++ b/checkpoint-3000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c153ba14052e884699ec52bbc6e9c5e3f366f2771c359a578bbac8b0756ec9d5 +size 995605189 diff --git a/checkpoint-3000/pytorch_model.bin b/checkpoint-3000/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..659ab95de224faa5612a2afa7031170e089e1361 --- /dev/null +++ b/checkpoint-3000/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e36db49ead0cdc668493714e88b32ab7879164341c222fcea17765bbe3314b5a +size 497807197 diff --git a/checkpoint-3000/rng_state.pth b/checkpoint-3000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..9fa12a146b98139818acec27a7614d9ce122be0c --- /dev/null +++ b/checkpoint-3000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:994bb7e6532a4e2b19f13f3ecc9614a59a21889ab7c83b77a91af4f71aadc1e4 +size 14575 diff --git a/checkpoint-3000/scheduler.pt b/checkpoint-3000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..1cb06ee0393dfde901c22a473bdaef9812d9fbbf --- /dev/null +++ b/checkpoint-3000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e6a91de98682a6a12555212398d5b531617c6d2235bbf2b3b89a4995df8dd915 +size 627 diff --git a/checkpoint-3000/trainer_state.json b/checkpoint-3000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..4706504660ea7b5973697c72a56e5845bb0c6529 --- /dev/null +++ b/checkpoint-3000/trainer_state.json @@ -0,0 +1,52 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 36.144578313253014, + "global_step": 3000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 6.02, + "learning_rate": 4.3975903614457834e-05, + "loss": 1.4659, + "step": 500 + }, + { + "epoch": 12.05, + "learning_rate": 3.7951807228915666e-05, + "loss": 0.9332, + "step": 1000 + }, + { + "epoch": 18.07, + "learning_rate": 3.192771084337349e-05, + "loss": 0.6382, + "step": 1500 + }, + { + "epoch": 24.1, + "learning_rate": 2.5903614457831325e-05, + "loss": 0.4494, + "step": 2000 + }, + { + "epoch": 30.12, + "learning_rate": 1.9879518072289157e-05, + "loss": 0.3313, + "step": 2500 + }, + { + "epoch": 36.14, + "learning_rate": 1.3855421686746989e-05, + "loss": 0.2613, + "step": 3000 + } + ], + "max_steps": 4150, + "num_train_epochs": 50, + "total_flos": 1551290793984000.0, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-3000/training_args.bin b/checkpoint-3000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..a0e784c698917821554b9d758fb306a05972187a --- /dev/null +++ b/checkpoint-3000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c714b36e7d24074058337bd366704620d82d3d6e9955157f5112b544294492db +size 3963 diff --git a/checkpoint-3500/config.json b/checkpoint-3500/config.json new file mode 100644 index 0000000000000000000000000000000000000000..8305e9d38b1c596c7a37540423b631c2997bdcc9 --- /dev/null +++ b/checkpoint-3500/config.json @@ -0,0 +1,39 @@ +{ + "_name_or_path": "gpt2", + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 50256, + "embd_pdrop": 0.1, + "eos_token_id": 50256, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": false, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": false, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.1, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 50 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.31.0", + "use_cache": true, + "vocab_size": 50257 +} diff --git a/checkpoint-3500/generation_config.json b/checkpoint-3500/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..4b0a63041a006997073506e0cabadc754b24f557 --- /dev/null +++ b/checkpoint-3500/generation_config.json @@ -0,0 +1,6 @@ +{ + "_from_model_config": true, + "bos_token_id": 50256, + "eos_token_id": 50256, + "transformers_version": "4.31.0" +} diff --git a/checkpoint-3500/optimizer.pt b/checkpoint-3500/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..7011f5d0b15d66d226c43570f540e9f28c9ed12c --- /dev/null +++ b/checkpoint-3500/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3fd0961db178fe69dde82e9633924bd5dade4b681580882d9c35d8435851a286 +size 995605189 diff --git a/checkpoint-3500/pytorch_model.bin b/checkpoint-3500/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..36e15ac4ad2e1e5037c3b39392071e8c1b5bcc01 --- /dev/null +++ b/checkpoint-3500/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:643b628f35d0674f567aa2c9dd03f36ac6a019641f121e2c43bf6415a1d9d402 +size 497807197 diff --git a/checkpoint-3500/rng_state.pth b/checkpoint-3500/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..e5cfe1c98bf8bf258ac8d132204433559b3ea74f --- /dev/null +++ b/checkpoint-3500/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b98c434920f0409371f6e5b00eef9d1d39333856c10622197b656c0c127a5d69 +size 14575 diff --git a/checkpoint-3500/scheduler.pt b/checkpoint-3500/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..1d4f60a3389920265ca447e5f86600a11b43df83 --- /dev/null +++ b/checkpoint-3500/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cda70456e3fe4f42776fd6dba170aa5984245afa17b35b13908f10fcf7043751 +size 627 diff --git a/checkpoint-3500/trainer_state.json b/checkpoint-3500/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..0e4322017a57462647554b6241c56f140f363b91 --- /dev/null +++ b/checkpoint-3500/trainer_state.json @@ -0,0 +1,58 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 42.16867469879518, + "global_step": 3500, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 6.02, + "learning_rate": 4.3975903614457834e-05, + "loss": 1.4659, + "step": 500 + }, + { + "epoch": 12.05, + "learning_rate": 3.7951807228915666e-05, + "loss": 0.9332, + "step": 1000 + }, + { + "epoch": 18.07, + "learning_rate": 3.192771084337349e-05, + "loss": 0.6382, + "step": 1500 + }, + { + "epoch": 24.1, + "learning_rate": 2.5903614457831325e-05, + "loss": 0.4494, + "step": 2000 + }, + { + "epoch": 30.12, + "learning_rate": 1.9879518072289157e-05, + "loss": 0.3313, + "step": 2500 + }, + { + "epoch": 36.14, + "learning_rate": 1.3855421686746989e-05, + "loss": 0.2613, + "step": 3000 + }, + { + "epoch": 42.17, + "learning_rate": 7.83132530120482e-06, + "loss": 0.2205, + "step": 3500 + } + ], + "max_steps": 4150, + "num_train_epochs": 50, + "total_flos": 1809839259648000.0, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-3500/training_args.bin b/checkpoint-3500/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..a0e784c698917821554b9d758fb306a05972187a --- /dev/null +++ b/checkpoint-3500/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c714b36e7d24074058337bd366704620d82d3d6e9955157f5112b544294492db +size 3963 diff --git a/checkpoint-4000/config.json b/checkpoint-4000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..8305e9d38b1c596c7a37540423b631c2997bdcc9 --- /dev/null +++ b/checkpoint-4000/config.json @@ -0,0 +1,39 @@ +{ + "_name_or_path": "gpt2", + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 50256, + "embd_pdrop": 0.1, + "eos_token_id": 50256, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": false, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": false, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.1, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 50 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.31.0", + "use_cache": true, + "vocab_size": 50257 +} diff --git a/checkpoint-4000/generation_config.json b/checkpoint-4000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..4b0a63041a006997073506e0cabadc754b24f557 --- /dev/null +++ b/checkpoint-4000/generation_config.json @@ -0,0 +1,6 @@ +{ + "_from_model_config": true, + "bos_token_id": 50256, + "eos_token_id": 50256, + "transformers_version": "4.31.0" +} diff --git a/checkpoint-4000/optimizer.pt b/checkpoint-4000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..2d42e6f47aaaad733b2ae76a467a3eb2835b2c06 --- /dev/null +++ b/checkpoint-4000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf56ca92472f163445ae9898ef949dee7d3e92d8d182756ca58588c86eb2748c +size 995605189 diff --git a/checkpoint-4000/pytorch_model.bin b/checkpoint-4000/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..b190c7f11caf0b0e124f51b0ad2fd9ccf175fc60 --- /dev/null +++ b/checkpoint-4000/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0eb251db276be3b1662a562bdc62ef443eac6d0cfb52e5ae30f26ea72447b795 +size 497807197 diff --git a/checkpoint-4000/rng_state.pth b/checkpoint-4000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..a9cb0b74e1846f619b1029bbdd6f0fd019a33330 --- /dev/null +++ b/checkpoint-4000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac94f6bd012683dbb0fb2ca37a3cf8be38e7dd3c6a576b630d1fe9898440e876 +size 14575 diff --git a/checkpoint-4000/scheduler.pt b/checkpoint-4000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..25ac85dc34a25688ea7f216d7b4328eea04dae16 --- /dev/null +++ b/checkpoint-4000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2b41ef5924f8ed320dbbc0797f0544354b5ff66153b45fc93fdbf91a5a60394 +size 627 diff --git a/checkpoint-4000/trainer_state.json b/checkpoint-4000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..73287a3d35bc99c90284056ddcabb2916d953e62 --- /dev/null +++ b/checkpoint-4000/trainer_state.json @@ -0,0 +1,64 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 48.19277108433735, + "global_step": 4000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 6.02, + "learning_rate": 4.3975903614457834e-05, + "loss": 1.4659, + "step": 500 + }, + { + "epoch": 12.05, + "learning_rate": 3.7951807228915666e-05, + "loss": 0.9332, + "step": 1000 + }, + { + "epoch": 18.07, + "learning_rate": 3.192771084337349e-05, + "loss": 0.6382, + "step": 1500 + }, + { + "epoch": 24.1, + "learning_rate": 2.5903614457831325e-05, + "loss": 0.4494, + "step": 2000 + }, + { + "epoch": 30.12, + "learning_rate": 1.9879518072289157e-05, + "loss": 0.3313, + "step": 2500 + }, + { + "epoch": 36.14, + "learning_rate": 1.3855421686746989e-05, + "loss": 0.2613, + "step": 3000 + }, + { + "epoch": 42.17, + "learning_rate": 7.83132530120482e-06, + "loss": 0.2205, + "step": 3500 + }, + { + "epoch": 48.19, + "learning_rate": 1.8072289156626506e-06, + "loss": 0.1976, + "step": 4000 + } + ], + "max_steps": 4150, + "num_train_epochs": 50, + "total_flos": 2068387725312000.0, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-4000/training_args.bin b/checkpoint-4000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..a0e784c698917821554b9d758fb306a05972187a --- /dev/null +++ b/checkpoint-4000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c714b36e7d24074058337bd366704620d82d3d6e9955157f5112b544294492db +size 3963 diff --git a/checkpoint-500/optimizer.pt b/checkpoint-500/optimizer.pt index 68f3f6c35b800e5af1fff0f957ae33549aa3e400..a00d2a0603e6233508269b036de87c58a1798d77 100644 --- a/checkpoint-500/optimizer.pt +++ b/checkpoint-500/optimizer.pt @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c82f75a691a634fb03262de48088234fe468bcef1d375cbd555b32b6dd3f077b +oid sha256:d5905ae0cf1d1a55a86815dffe7e73127e24d527d816a9871ce85a3de39b3732 size 995605189 diff --git a/checkpoint-500/pytorch_model.bin b/checkpoint-500/pytorch_model.bin index d846735ecabffffc8bc389b12ad4496f90990207..5db6b68f34847b8ea1897afea3791a73ad04e2cd 100644 --- a/checkpoint-500/pytorch_model.bin +++ b/checkpoint-500/pytorch_model.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6ad6544332d9fe6203f222c3e4e94019dab223f0b43408b7276741c5b1aa1715 +oid sha256:5e915660340697b4970bf58106b18936b77cc7a8a29a061be29dcdde97d81edf size 497807197 diff --git a/checkpoint-500/rng_state.pth b/checkpoint-500/rng_state.pth index fbf051c336c381662b8c2c2bdb833bec0a4c5e4e..fd6f3cfa92b4972a98c7ed0f0a9c3f5b6fd24226 100644 --- a/checkpoint-500/rng_state.pth +++ b/checkpoint-500/rng_state.pth @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b3a59d1d08ae42df3daceaf2d34d0a81b9f7d6f0a3b0cb10f37b12ab32eac0a0 +oid sha256:8a36a87d9e779b31eca87cbe0b3d161fc1766c3efa797e3df3cca1465350f3a7 size 14575 diff --git a/checkpoint-500/scheduler.pt b/checkpoint-500/scheduler.pt index ec92148000dbac2be77546762e3fefbfc079b7df..f155592c229bdc3e14aa0422de501ec450d57ddc 100644 --- a/checkpoint-500/scheduler.pt +++ b/checkpoint-500/scheduler.pt @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a16f923459df7bd6011df400a739fc0b8b51a1e3f6317c9cbd22b75b73d74426 +oid sha256:75fc81b90a080d688207e14ef34c4907998d9431265498d879366a613550e9b8 size 627 diff --git a/checkpoint-500/trainer_state.json b/checkpoint-500/trainer_state.json index 8f89153ad44c9ca93ee497b4efa99e8ae8ae5e77..64bcef0a1e17872941a301eb8ad1d3a79831468b 100644 --- a/checkpoint-500/trainer_state.json +++ b/checkpoint-500/trainer_state.json @@ -1,22 +1,22 @@ { "best_metric": null, "best_model_checkpoint": null, - "epoch": 19.23076923076923, + "epoch": 6.024096385542169, "global_step": 500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { - "epoch": 19.23, - "learning_rate": 3.0769230769230774e-05, - "loss": 1.3827, + "epoch": 6.02, + "learning_rate": 4.3975903614457834e-05, + "loss": 1.4659, "step": 500 } ], - "max_steps": 1300, + "max_steps": 4150, "num_train_epochs": 50, - "total_flos": 256327483392000.0, + "total_flos": 258548465664000.0, "trial_name": null, "trial_params": null } diff --git a/checkpoint-500/training_args.bin b/checkpoint-500/training_args.bin index 386ba1bc9e20be24282a24e62e19f3a471e2bded..a0e784c698917821554b9d758fb306a05972187a 100644 --- a/checkpoint-500/training_args.bin +++ b/checkpoint-500/training_args.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:daae856c1bd075ece296b326f62ba6652364b07db8daef89d6f2fbf3bd2bd41c +oid sha256:c714b36e7d24074058337bd366704620d82d3d6e9955157f5112b544294492db size 3963 diff --git a/pytorch_model.bin b/pytorch_model.bin index 76c72d56f35d03a105437ad280fcf798d2bd4b5d..2a7558e898f2440d48522a5fefda443d3ec8adfd 100644 --- a/pytorch_model.bin +++ b/pytorch_model.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c92ad905cd6d440873c9a6c3a725d3890373b3185af5d49397cf866f34def7f4 +oid sha256:11533bf97b08eaa7ffaaca93fdf37c86c4b92ec9a5a96a06e2c436794deac997 size 497807197 diff --git a/training_args.bin b/training_args.bin index 386ba1bc9e20be24282a24e62e19f3a471e2bded..a0e784c698917821554b9d758fb306a05972187a 100644 --- a/training_args.bin +++ b/training_args.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:daae856c1bd075ece296b326f62ba6652364b07db8daef89d6f2fbf3bd2bd41c +oid sha256:c714b36e7d24074058337bd366704620d82d3d6e9955157f5112b544294492db size 3963