MiriFur commited on
Commit
87043d2
·
1 Parent(s): 71901a2

Upload 72 files

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. checkpoint-1000/optimizer.pt +1 -1
  2. checkpoint-1000/pytorch_model.bin +1 -1
  3. checkpoint-1000/rng_state.pth +1 -1
  4. checkpoint-1000/scheduler.pt +1 -1
  5. checkpoint-1000/trainer_state.json +9 -9
  6. checkpoint-1000/training_args.bin +1 -1
  7. checkpoint-1500/config.json +39 -0
  8. checkpoint-1500/generation_config.json +6 -0
  9. checkpoint-1500/optimizer.pt +3 -0
  10. checkpoint-1500/pytorch_model.bin +3 -0
  11. checkpoint-1500/rng_state.pth +3 -0
  12. checkpoint-1500/scheduler.pt +3 -0
  13. checkpoint-1500/trainer_state.json +34 -0
  14. checkpoint-1500/training_args.bin +3 -0
  15. checkpoint-2000/config.json +39 -0
  16. checkpoint-2000/generation_config.json +6 -0
  17. checkpoint-2000/optimizer.pt +3 -0
  18. checkpoint-2000/pytorch_model.bin +3 -0
  19. checkpoint-2000/rng_state.pth +3 -0
  20. checkpoint-2000/scheduler.pt +3 -0
  21. checkpoint-2000/trainer_state.json +40 -0
  22. checkpoint-2000/training_args.bin +3 -0
  23. checkpoint-2500/config.json +39 -0
  24. checkpoint-2500/generation_config.json +6 -0
  25. checkpoint-2500/optimizer.pt +3 -0
  26. checkpoint-2500/pytorch_model.bin +3 -0
  27. checkpoint-2500/rng_state.pth +3 -0
  28. checkpoint-2500/scheduler.pt +3 -0
  29. checkpoint-2500/trainer_state.json +46 -0
  30. checkpoint-2500/training_args.bin +3 -0
  31. checkpoint-3000/config.json +39 -0
  32. checkpoint-3000/generation_config.json +6 -0
  33. checkpoint-3000/optimizer.pt +3 -0
  34. checkpoint-3000/pytorch_model.bin +3 -0
  35. checkpoint-3000/rng_state.pth +3 -0
  36. checkpoint-3000/scheduler.pt +3 -0
  37. checkpoint-3000/trainer_state.json +52 -0
  38. checkpoint-3000/training_args.bin +3 -0
  39. checkpoint-3500/config.json +39 -0
  40. checkpoint-3500/generation_config.json +6 -0
  41. checkpoint-3500/optimizer.pt +3 -0
  42. checkpoint-3500/pytorch_model.bin +3 -0
  43. checkpoint-3500/rng_state.pth +3 -0
  44. checkpoint-3500/scheduler.pt +3 -0
  45. checkpoint-3500/trainer_state.json +58 -0
  46. checkpoint-3500/training_args.bin +3 -0
  47. checkpoint-4000/config.json +39 -0
  48. checkpoint-4000/generation_config.json +6 -0
  49. checkpoint-4000/optimizer.pt +3 -0
  50. checkpoint-4000/pytorch_model.bin +3 -0
checkpoint-1000/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cc82743274985f20435c1a3f07714d1d9c5d49fd3f939e47d1db0139e53c0160
3
  size 995605189
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:40044f58ba6e4ad02a3afa257490cb1756f235f72ce0587d739f1457a50fd940
3
  size 995605189
checkpoint-1000/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b27730d57f0da00683c2b12bd176295a9dbeb119e402e80445699aa0caac51e8
3
  size 497807197
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:577150ca55e27834c0a07cb22cf121e9dd81fff56de0a9eaf1751f8d42cc9931
3
  size 497807197
checkpoint-1000/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:73543bf529463744a086bed7aa7a2369d7afaf931eb0765f938b912a54e0182f
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a4e3efb199f5967e04c2f29974f5a4b60568298ec35d18c73cb2af6348f20a84
3
  size 14575
checkpoint-1000/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3003d262ae7e5e74c25925ba898d88b2fd6386924a71b1f0c91fba5b532b78f2
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d3caf872438b5f86b8ac9cfbc93cd607ede756cfd2b6545eed4877e4009dac2
3
  size 627
checkpoint-1000/trainer_state.json CHANGED
@@ -1,28 +1,28 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 38.46153846153846,
5
  "global_step": 1000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
- "epoch": 19.23,
12
- "learning_rate": 3.0769230769230774e-05,
13
- "loss": 1.3827,
14
  "step": 500
15
  },
16
  {
17
- "epoch": 38.46,
18
- "learning_rate": 1.153846153846154e-05,
19
- "loss": 0.5306,
20
  "step": 1000
21
  }
22
  ],
23
- "max_steps": 1300,
24
  "num_train_epochs": 50,
25
- "total_flos": 512654966784000.0,
26
  "trial_name": null,
27
  "trial_params": null
28
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 12.048192771084338,
5
  "global_step": 1000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
+ "epoch": 6.02,
12
+ "learning_rate": 4.3975903614457834e-05,
13
+ "loss": 1.4659,
14
  "step": 500
15
  },
16
  {
17
+ "epoch": 12.05,
18
+ "learning_rate": 3.7951807228915666e-05,
19
+ "loss": 0.9332,
20
  "step": 1000
21
  }
22
  ],
23
+ "max_steps": 4150,
24
  "num_train_epochs": 50,
25
+ "total_flos": 517096931328000.0,
26
  "trial_name": null,
27
  "trial_params": null
28
  }
checkpoint-1000/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:daae856c1bd075ece296b326f62ba6652364b07db8daef89d6f2fbf3bd2bd41c
3
  size 3963
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c714b36e7d24074058337bd366704620d82d3d6e9955157f5112b544294492db
3
  size 3963
checkpoint-1500/config.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "gpt2",
3
+ "activation_function": "gelu_new",
4
+ "architectures": [
5
+ "GPT2LMHeadModel"
6
+ ],
7
+ "attn_pdrop": 0.1,
8
+ "bos_token_id": 50256,
9
+ "embd_pdrop": 0.1,
10
+ "eos_token_id": 50256,
11
+ "initializer_range": 0.02,
12
+ "layer_norm_epsilon": 1e-05,
13
+ "model_type": "gpt2",
14
+ "n_ctx": 1024,
15
+ "n_embd": 768,
16
+ "n_head": 12,
17
+ "n_inner": null,
18
+ "n_layer": 12,
19
+ "n_positions": 1024,
20
+ "reorder_and_upcast_attn": false,
21
+ "resid_pdrop": 0.1,
22
+ "scale_attn_by_inverse_layer_idx": false,
23
+ "scale_attn_weights": true,
24
+ "summary_activation": null,
25
+ "summary_first_dropout": 0.1,
26
+ "summary_proj_to_labels": true,
27
+ "summary_type": "cls_index",
28
+ "summary_use_proj": true,
29
+ "task_specific_params": {
30
+ "text-generation": {
31
+ "do_sample": true,
32
+ "max_length": 50
33
+ }
34
+ },
35
+ "torch_dtype": "float32",
36
+ "transformers_version": "4.31.0",
37
+ "use_cache": true,
38
+ "vocab_size": 50257
39
+ }
checkpoint-1500/generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 50256,
4
+ "eos_token_id": 50256,
5
+ "transformers_version": "4.31.0"
6
+ }
checkpoint-1500/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3508daf547ea7f7639a4fb24571a9b8f970238f6f697f19b0f1db2b9a98b29c6
3
+ size 995605189
checkpoint-1500/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:06afd4a9f36e6f35427e7af9c5cccb7a9c72446582d5b8fbaa051f797663e154
3
+ size 497807197
checkpoint-1500/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:30a541a95ca8532bf556cfdb7f8a3e1e420b0e7690ccb46c9bbe6564b3afaacb
3
+ size 14575
checkpoint-1500/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c6b7284950f4953612836af1f56ea96facef57d66abf579d705f9e90f0c98a46
3
+ size 627
checkpoint-1500/trainer_state.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 18.072289156626507,
5
+ "global_step": 1500,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 6.02,
12
+ "learning_rate": 4.3975903614457834e-05,
13
+ "loss": 1.4659,
14
+ "step": 500
15
+ },
16
+ {
17
+ "epoch": 12.05,
18
+ "learning_rate": 3.7951807228915666e-05,
19
+ "loss": 0.9332,
20
+ "step": 1000
21
+ },
22
+ {
23
+ "epoch": 18.07,
24
+ "learning_rate": 3.192771084337349e-05,
25
+ "loss": 0.6382,
26
+ "step": 1500
27
+ }
28
+ ],
29
+ "max_steps": 4150,
30
+ "num_train_epochs": 50,
31
+ "total_flos": 775645396992000.0,
32
+ "trial_name": null,
33
+ "trial_params": null
34
+ }
checkpoint-1500/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c714b36e7d24074058337bd366704620d82d3d6e9955157f5112b544294492db
3
+ size 3963
checkpoint-2000/config.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "gpt2",
3
+ "activation_function": "gelu_new",
4
+ "architectures": [
5
+ "GPT2LMHeadModel"
6
+ ],
7
+ "attn_pdrop": 0.1,
8
+ "bos_token_id": 50256,
9
+ "embd_pdrop": 0.1,
10
+ "eos_token_id": 50256,
11
+ "initializer_range": 0.02,
12
+ "layer_norm_epsilon": 1e-05,
13
+ "model_type": "gpt2",
14
+ "n_ctx": 1024,
15
+ "n_embd": 768,
16
+ "n_head": 12,
17
+ "n_inner": null,
18
+ "n_layer": 12,
19
+ "n_positions": 1024,
20
+ "reorder_and_upcast_attn": false,
21
+ "resid_pdrop": 0.1,
22
+ "scale_attn_by_inverse_layer_idx": false,
23
+ "scale_attn_weights": true,
24
+ "summary_activation": null,
25
+ "summary_first_dropout": 0.1,
26
+ "summary_proj_to_labels": true,
27
+ "summary_type": "cls_index",
28
+ "summary_use_proj": true,
29
+ "task_specific_params": {
30
+ "text-generation": {
31
+ "do_sample": true,
32
+ "max_length": 50
33
+ }
34
+ },
35
+ "torch_dtype": "float32",
36
+ "transformers_version": "4.31.0",
37
+ "use_cache": true,
38
+ "vocab_size": 50257
39
+ }
checkpoint-2000/generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 50256,
4
+ "eos_token_id": 50256,
5
+ "transformers_version": "4.31.0"
6
+ }
checkpoint-2000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:88fb77e14f3f2bbf6601731efc849309e9ede701eed2cb0ef54a8294251af3cf
3
+ size 995605189
checkpoint-2000/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:23beb0a89f5348bb7af9be60296215311b3367fa345499c5ee0330d5ffd42a38
3
+ size 497807197
checkpoint-2000/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:381ebe3e35df681c5618b3ab0df63f45938780152fc94545f3764f6b0192fcf7
3
+ size 14575
checkpoint-2000/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:57b43099f81ffb6a9befdfae023d6369a7bd91bc547e0dc46fa50d4e069365d7
3
+ size 627
checkpoint-2000/trainer_state.json ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 24.096385542168676,
5
+ "global_step": 2000,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 6.02,
12
+ "learning_rate": 4.3975903614457834e-05,
13
+ "loss": 1.4659,
14
+ "step": 500
15
+ },
16
+ {
17
+ "epoch": 12.05,
18
+ "learning_rate": 3.7951807228915666e-05,
19
+ "loss": 0.9332,
20
+ "step": 1000
21
+ },
22
+ {
23
+ "epoch": 18.07,
24
+ "learning_rate": 3.192771084337349e-05,
25
+ "loss": 0.6382,
26
+ "step": 1500
27
+ },
28
+ {
29
+ "epoch": 24.1,
30
+ "learning_rate": 2.5903614457831325e-05,
31
+ "loss": 0.4494,
32
+ "step": 2000
33
+ }
34
+ ],
35
+ "max_steps": 4150,
36
+ "num_train_epochs": 50,
37
+ "total_flos": 1034193862656000.0,
38
+ "trial_name": null,
39
+ "trial_params": null
40
+ }
checkpoint-2000/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c714b36e7d24074058337bd366704620d82d3d6e9955157f5112b544294492db
3
+ size 3963
checkpoint-2500/config.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "gpt2",
3
+ "activation_function": "gelu_new",
4
+ "architectures": [
5
+ "GPT2LMHeadModel"
6
+ ],
7
+ "attn_pdrop": 0.1,
8
+ "bos_token_id": 50256,
9
+ "embd_pdrop": 0.1,
10
+ "eos_token_id": 50256,
11
+ "initializer_range": 0.02,
12
+ "layer_norm_epsilon": 1e-05,
13
+ "model_type": "gpt2",
14
+ "n_ctx": 1024,
15
+ "n_embd": 768,
16
+ "n_head": 12,
17
+ "n_inner": null,
18
+ "n_layer": 12,
19
+ "n_positions": 1024,
20
+ "reorder_and_upcast_attn": false,
21
+ "resid_pdrop": 0.1,
22
+ "scale_attn_by_inverse_layer_idx": false,
23
+ "scale_attn_weights": true,
24
+ "summary_activation": null,
25
+ "summary_first_dropout": 0.1,
26
+ "summary_proj_to_labels": true,
27
+ "summary_type": "cls_index",
28
+ "summary_use_proj": true,
29
+ "task_specific_params": {
30
+ "text-generation": {
31
+ "do_sample": true,
32
+ "max_length": 50
33
+ }
34
+ },
35
+ "torch_dtype": "float32",
36
+ "transformers_version": "4.31.0",
37
+ "use_cache": true,
38
+ "vocab_size": 50257
39
+ }
checkpoint-2500/generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 50256,
4
+ "eos_token_id": 50256,
5
+ "transformers_version": "4.31.0"
6
+ }
checkpoint-2500/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a5371a17ee239fec7bf7d0c23e3435e00e76efc4d94b9d9941c8e57eea070784
3
+ size 995605189
checkpoint-2500/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6abe995220e33a09e648c18d269c6d2ea69a77f7640fa5982efb6374fb9d983e
3
+ size 497807197
checkpoint-2500/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1520384ba472ff2f026dd4acfdd4ae2a72711e694cec5d27670d8b5709ac0c18
3
+ size 14575
checkpoint-2500/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:41014b3add9f861ce35d7c779356e164a56dc9ae7109e3d6ef3b1c10ef722681
3
+ size 627
checkpoint-2500/trainer_state.json ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 30.120481927710845,
5
+ "global_step": 2500,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 6.02,
12
+ "learning_rate": 4.3975903614457834e-05,
13
+ "loss": 1.4659,
14
+ "step": 500
15
+ },
16
+ {
17
+ "epoch": 12.05,
18
+ "learning_rate": 3.7951807228915666e-05,
19
+ "loss": 0.9332,
20
+ "step": 1000
21
+ },
22
+ {
23
+ "epoch": 18.07,
24
+ "learning_rate": 3.192771084337349e-05,
25
+ "loss": 0.6382,
26
+ "step": 1500
27
+ },
28
+ {
29
+ "epoch": 24.1,
30
+ "learning_rate": 2.5903614457831325e-05,
31
+ "loss": 0.4494,
32
+ "step": 2000
33
+ },
34
+ {
35
+ "epoch": 30.12,
36
+ "learning_rate": 1.9879518072289157e-05,
37
+ "loss": 0.3313,
38
+ "step": 2500
39
+ }
40
+ ],
41
+ "max_steps": 4150,
42
+ "num_train_epochs": 50,
43
+ "total_flos": 1292742328320000.0,
44
+ "trial_name": null,
45
+ "trial_params": null
46
+ }
checkpoint-2500/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c714b36e7d24074058337bd366704620d82d3d6e9955157f5112b544294492db
3
+ size 3963
checkpoint-3000/config.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "gpt2",
3
+ "activation_function": "gelu_new",
4
+ "architectures": [
5
+ "GPT2LMHeadModel"
6
+ ],
7
+ "attn_pdrop": 0.1,
8
+ "bos_token_id": 50256,
9
+ "embd_pdrop": 0.1,
10
+ "eos_token_id": 50256,
11
+ "initializer_range": 0.02,
12
+ "layer_norm_epsilon": 1e-05,
13
+ "model_type": "gpt2",
14
+ "n_ctx": 1024,
15
+ "n_embd": 768,
16
+ "n_head": 12,
17
+ "n_inner": null,
18
+ "n_layer": 12,
19
+ "n_positions": 1024,
20
+ "reorder_and_upcast_attn": false,
21
+ "resid_pdrop": 0.1,
22
+ "scale_attn_by_inverse_layer_idx": false,
23
+ "scale_attn_weights": true,
24
+ "summary_activation": null,
25
+ "summary_first_dropout": 0.1,
26
+ "summary_proj_to_labels": true,
27
+ "summary_type": "cls_index",
28
+ "summary_use_proj": true,
29
+ "task_specific_params": {
30
+ "text-generation": {
31
+ "do_sample": true,
32
+ "max_length": 50
33
+ }
34
+ },
35
+ "torch_dtype": "float32",
36
+ "transformers_version": "4.31.0",
37
+ "use_cache": true,
38
+ "vocab_size": 50257
39
+ }
checkpoint-3000/generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 50256,
4
+ "eos_token_id": 50256,
5
+ "transformers_version": "4.31.0"
6
+ }
checkpoint-3000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c153ba14052e884699ec52bbc6e9c5e3f366f2771c359a578bbac8b0756ec9d5
3
+ size 995605189
checkpoint-3000/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e36db49ead0cdc668493714e88b32ab7879164341c222fcea17765bbe3314b5a
3
+ size 497807197
checkpoint-3000/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:994bb7e6532a4e2b19f13f3ecc9614a59a21889ab7c83b77a91af4f71aadc1e4
3
+ size 14575
checkpoint-3000/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e6a91de98682a6a12555212398d5b531617c6d2235bbf2b3b89a4995df8dd915
3
+ size 627
checkpoint-3000/trainer_state.json ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 36.144578313253014,
5
+ "global_step": 3000,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 6.02,
12
+ "learning_rate": 4.3975903614457834e-05,
13
+ "loss": 1.4659,
14
+ "step": 500
15
+ },
16
+ {
17
+ "epoch": 12.05,
18
+ "learning_rate": 3.7951807228915666e-05,
19
+ "loss": 0.9332,
20
+ "step": 1000
21
+ },
22
+ {
23
+ "epoch": 18.07,
24
+ "learning_rate": 3.192771084337349e-05,
25
+ "loss": 0.6382,
26
+ "step": 1500
27
+ },
28
+ {
29
+ "epoch": 24.1,
30
+ "learning_rate": 2.5903614457831325e-05,
31
+ "loss": 0.4494,
32
+ "step": 2000
33
+ },
34
+ {
35
+ "epoch": 30.12,
36
+ "learning_rate": 1.9879518072289157e-05,
37
+ "loss": 0.3313,
38
+ "step": 2500
39
+ },
40
+ {
41
+ "epoch": 36.14,
42
+ "learning_rate": 1.3855421686746989e-05,
43
+ "loss": 0.2613,
44
+ "step": 3000
45
+ }
46
+ ],
47
+ "max_steps": 4150,
48
+ "num_train_epochs": 50,
49
+ "total_flos": 1551290793984000.0,
50
+ "trial_name": null,
51
+ "trial_params": null
52
+ }
checkpoint-3000/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c714b36e7d24074058337bd366704620d82d3d6e9955157f5112b544294492db
3
+ size 3963
checkpoint-3500/config.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "gpt2",
3
+ "activation_function": "gelu_new",
4
+ "architectures": [
5
+ "GPT2LMHeadModel"
6
+ ],
7
+ "attn_pdrop": 0.1,
8
+ "bos_token_id": 50256,
9
+ "embd_pdrop": 0.1,
10
+ "eos_token_id": 50256,
11
+ "initializer_range": 0.02,
12
+ "layer_norm_epsilon": 1e-05,
13
+ "model_type": "gpt2",
14
+ "n_ctx": 1024,
15
+ "n_embd": 768,
16
+ "n_head": 12,
17
+ "n_inner": null,
18
+ "n_layer": 12,
19
+ "n_positions": 1024,
20
+ "reorder_and_upcast_attn": false,
21
+ "resid_pdrop": 0.1,
22
+ "scale_attn_by_inverse_layer_idx": false,
23
+ "scale_attn_weights": true,
24
+ "summary_activation": null,
25
+ "summary_first_dropout": 0.1,
26
+ "summary_proj_to_labels": true,
27
+ "summary_type": "cls_index",
28
+ "summary_use_proj": true,
29
+ "task_specific_params": {
30
+ "text-generation": {
31
+ "do_sample": true,
32
+ "max_length": 50
33
+ }
34
+ },
35
+ "torch_dtype": "float32",
36
+ "transformers_version": "4.31.0",
37
+ "use_cache": true,
38
+ "vocab_size": 50257
39
+ }
checkpoint-3500/generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 50256,
4
+ "eos_token_id": 50256,
5
+ "transformers_version": "4.31.0"
6
+ }
checkpoint-3500/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3fd0961db178fe69dde82e9633924bd5dade4b681580882d9c35d8435851a286
3
+ size 995605189
checkpoint-3500/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:643b628f35d0674f567aa2c9dd03f36ac6a019641f121e2c43bf6415a1d9d402
3
+ size 497807197
checkpoint-3500/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b98c434920f0409371f6e5b00eef9d1d39333856c10622197b656c0c127a5d69
3
+ size 14575
checkpoint-3500/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cda70456e3fe4f42776fd6dba170aa5984245afa17b35b13908f10fcf7043751
3
+ size 627
checkpoint-3500/trainer_state.json ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 42.16867469879518,
5
+ "global_step": 3500,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 6.02,
12
+ "learning_rate": 4.3975903614457834e-05,
13
+ "loss": 1.4659,
14
+ "step": 500
15
+ },
16
+ {
17
+ "epoch": 12.05,
18
+ "learning_rate": 3.7951807228915666e-05,
19
+ "loss": 0.9332,
20
+ "step": 1000
21
+ },
22
+ {
23
+ "epoch": 18.07,
24
+ "learning_rate": 3.192771084337349e-05,
25
+ "loss": 0.6382,
26
+ "step": 1500
27
+ },
28
+ {
29
+ "epoch": 24.1,
30
+ "learning_rate": 2.5903614457831325e-05,
31
+ "loss": 0.4494,
32
+ "step": 2000
33
+ },
34
+ {
35
+ "epoch": 30.12,
36
+ "learning_rate": 1.9879518072289157e-05,
37
+ "loss": 0.3313,
38
+ "step": 2500
39
+ },
40
+ {
41
+ "epoch": 36.14,
42
+ "learning_rate": 1.3855421686746989e-05,
43
+ "loss": 0.2613,
44
+ "step": 3000
45
+ },
46
+ {
47
+ "epoch": 42.17,
48
+ "learning_rate": 7.83132530120482e-06,
49
+ "loss": 0.2205,
50
+ "step": 3500
51
+ }
52
+ ],
53
+ "max_steps": 4150,
54
+ "num_train_epochs": 50,
55
+ "total_flos": 1809839259648000.0,
56
+ "trial_name": null,
57
+ "trial_params": null
58
+ }
checkpoint-3500/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c714b36e7d24074058337bd366704620d82d3d6e9955157f5112b544294492db
3
+ size 3963
checkpoint-4000/config.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "gpt2",
3
+ "activation_function": "gelu_new",
4
+ "architectures": [
5
+ "GPT2LMHeadModel"
6
+ ],
7
+ "attn_pdrop": 0.1,
8
+ "bos_token_id": 50256,
9
+ "embd_pdrop": 0.1,
10
+ "eos_token_id": 50256,
11
+ "initializer_range": 0.02,
12
+ "layer_norm_epsilon": 1e-05,
13
+ "model_type": "gpt2",
14
+ "n_ctx": 1024,
15
+ "n_embd": 768,
16
+ "n_head": 12,
17
+ "n_inner": null,
18
+ "n_layer": 12,
19
+ "n_positions": 1024,
20
+ "reorder_and_upcast_attn": false,
21
+ "resid_pdrop": 0.1,
22
+ "scale_attn_by_inverse_layer_idx": false,
23
+ "scale_attn_weights": true,
24
+ "summary_activation": null,
25
+ "summary_first_dropout": 0.1,
26
+ "summary_proj_to_labels": true,
27
+ "summary_type": "cls_index",
28
+ "summary_use_proj": true,
29
+ "task_specific_params": {
30
+ "text-generation": {
31
+ "do_sample": true,
32
+ "max_length": 50
33
+ }
34
+ },
35
+ "torch_dtype": "float32",
36
+ "transformers_version": "4.31.0",
37
+ "use_cache": true,
38
+ "vocab_size": 50257
39
+ }
checkpoint-4000/generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 50256,
4
+ "eos_token_id": 50256,
5
+ "transformers_version": "4.31.0"
6
+ }
checkpoint-4000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf56ca92472f163445ae9898ef949dee7d3e92d8d182756ca58588c86eb2748c
3
+ size 995605189
checkpoint-4000/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0eb251db276be3b1662a562bdc62ef443eac6d0cfb52e5ae30f26ea72447b795
3
+ size 497807197