imda-lseokmin commited on
Commit
717255d
1 Parent(s): 8b4cc20

Upload 159 files

Browse files

Updated with removed input and responses

This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. README.md +3 -3
  2. all_results.json +12 -12
  3. checkpoint-1000/config.json +39 -0
  4. checkpoint-1000/generation_config.json +6 -0
  5. checkpoint-1000/merges.txt +0 -0
  6. checkpoint-1000/model.safetensors +3 -0
  7. checkpoint-1000/optimizer.pt +3 -0
  8. checkpoint-1000/rng_state.pth +3 -0
  9. checkpoint-1000/scheduler.pt +3 -0
  10. checkpoint-1000/special_tokens_map.json +5 -0
  11. checkpoint-1000/tokenizer.json +0 -0
  12. checkpoint-1000/tokenizer_config.json +19 -0
  13. checkpoint-1000/trainer_state.json +35 -0
  14. checkpoint-1000/training_args.bin +3 -0
  15. checkpoint-1000/vocab.json +0 -0
  16. checkpoint-1500/config.json +39 -0
  17. checkpoint-1500/generation_config.json +6 -0
  18. checkpoint-1500/merges.txt +0 -0
  19. checkpoint-1500/model.safetensors +3 -0
  20. checkpoint-1500/optimizer.pt +3 -0
  21. checkpoint-1500/rng_state.pth +3 -0
  22. checkpoint-1500/scheduler.pt +3 -0
  23. checkpoint-1500/special_tokens_map.json +5 -0
  24. checkpoint-1500/tokenizer.json +0 -0
  25. checkpoint-1500/tokenizer_config.json +19 -0
  26. checkpoint-1500/trainer_state.json +42 -0
  27. checkpoint-1500/training_args.bin +3 -0
  28. checkpoint-1500/vocab.json +0 -0
  29. checkpoint-2000/config.json +39 -0
  30. checkpoint-2000/generation_config.json +6 -0
  31. checkpoint-2000/merges.txt +0 -0
  32. checkpoint-2000/model.safetensors +3 -0
  33. checkpoint-2000/optimizer.pt +3 -0
  34. checkpoint-2000/rng_state.pth +3 -0
  35. checkpoint-2000/scheduler.pt +3 -0
  36. checkpoint-2000/special_tokens_map.json +5 -0
  37. checkpoint-2000/tokenizer.json +0 -0
  38. checkpoint-2000/tokenizer_config.json +19 -0
  39. checkpoint-2000/trainer_state.json +49 -0
  40. checkpoint-2000/training_args.bin +3 -0
  41. checkpoint-2000/vocab.json +0 -0
  42. checkpoint-2500/config.json +39 -0
  43. checkpoint-2500/generation_config.json +6 -0
  44. checkpoint-2500/merges.txt +0 -0
  45. checkpoint-2500/model.safetensors +3 -0
  46. checkpoint-2500/optimizer.pt +3 -0
  47. checkpoint-2500/rng_state.pth +3 -0
  48. checkpoint-2500/scheduler.pt +3 -0
  49. checkpoint-2500/special_tokens_map.json +5 -0
  50. checkpoint-2500/tokenizer.json +0 -0
README.md CHANGED
@@ -17,8 +17,8 @@ should probably proofread and complete it, then remove this comment. -->
17
 
18
  This model is a fine-tuned version of [gpt2](https://huggingface.co/gpt2) on an unknown dataset.
19
  It achieves the following results on the evaluation set:
20
- - Loss: 3.3880
21
- - Accuracy: 0.4552
22
 
23
  ## Model description
24
 
@@ -43,7 +43,7 @@ The following hyperparameters were used during training:
43
  - seed: 42
44
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
45
  - lr_scheduler_type: linear
46
- - num_epochs: 40.0
47
 
48
  ### Training results
49
 
 
17
 
18
  This model is a fine-tuned version of [gpt2](https://huggingface.co/gpt2) on an unknown dataset.
19
  It achieves the following results on the evaluation set:
20
+ - Loss: 2.5745
21
+ - Accuracy: 0.4842
22
 
23
  ## Model description
24
 
 
43
  - seed: 42
44
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
45
  - lr_scheduler_type: linear
46
+ - num_epochs: 20.0
47
 
48
  ### Training results
49
 
all_results.json CHANGED
@@ -1,16 +1,16 @@
1
  {
2
- "epoch": 40.0,
3
- "eval_accuracy": 0.4551971326164875,
4
- "eval_loss": 3.3879826068878174,
5
- "eval_runtime": 0.6202,
6
  "eval_samples": 9,
7
- "eval_samples_per_second": 14.512,
8
- "eval_steps_per_second": 8.062,
9
- "perplexity": 29.606164710375456,
10
- "total_flos": 2884664033280000.0,
11
- "train_loss": 1.075979393115942,
12
- "train_runtime": 1098.0309,
13
  "train_samples": 138,
14
- "train_samples_per_second": 5.027,
15
- "train_steps_per_second": 2.514
16
  }
 
1
  {
2
+ "epoch": 20.0,
3
+ "eval_accuracy": 0.4841968067774519,
4
+ "eval_loss": 2.5744614601135254,
5
+ "eval_runtime": 0.6172,
6
  "eval_samples": 9,
7
+ "eval_samples_per_second": 14.581,
8
+ "eval_steps_per_second": 8.101,
9
+ "perplexity": 13.124247324302939,
10
+ "total_flos": 1442332016640000.0,
11
+ "train_loss": 1.6038585939269134,
12
+ "train_runtime": 546.7087,
13
  "train_samples": 138,
14
+ "train_samples_per_second": 5.048,
15
+ "train_steps_per_second": 2.524
16
  }
checkpoint-1000/config.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "gpt2",
3
+ "activation_function": "gelu_new",
4
+ "architectures": [
5
+ "GPT2LMHeadModel"
6
+ ],
7
+ "attn_pdrop": 0.1,
8
+ "bos_token_id": 50256,
9
+ "embd_pdrop": 0.1,
10
+ "eos_token_id": 50256,
11
+ "initializer_range": 0.02,
12
+ "layer_norm_epsilon": 1e-05,
13
+ "model_type": "gpt2",
14
+ "n_ctx": 1024,
15
+ "n_embd": 768,
16
+ "n_head": 12,
17
+ "n_inner": null,
18
+ "n_layer": 12,
19
+ "n_positions": 1024,
20
+ "reorder_and_upcast_attn": false,
21
+ "resid_pdrop": 0.1,
22
+ "scale_attn_by_inverse_layer_idx": false,
23
+ "scale_attn_weights": true,
24
+ "summary_activation": null,
25
+ "summary_first_dropout": 0.1,
26
+ "summary_proj_to_labels": true,
27
+ "summary_type": "cls_index",
28
+ "summary_use_proj": true,
29
+ "task_specific_params": {
30
+ "text-generation": {
31
+ "do_sample": true,
32
+ "max_length": 50
33
+ }
34
+ },
35
+ "torch_dtype": "float32",
36
+ "transformers_version": "4.40.1",
37
+ "use_cache": true,
38
+ "vocab_size": 50257
39
+ }
checkpoint-1000/generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 50256,
4
+ "eos_token_id": 50256,
5
+ "transformers_version": "4.40.1"
6
+ }
checkpoint-1000/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-1000/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f0a3045c4618f75a91e43e2b4079b4f70ddfea71b549212fe00c8bc207fce25d
3
+ size 497774208
checkpoint-1000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a5737a324483690f9aadd907a2e48f602e54024c812e42d2a6c0c28878d67400
3
+ size 995642298
checkpoint-1000/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fad09794a0db580c98cf06d0ebd5b66e92d3f86f4f7bd4a728e24f1a05467f0c
3
+ size 14244
checkpoint-1000/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7e4ac6e2694cbb9ef21cdf8af6f8d76becd952779eb03e00001aef172a063804
3
+ size 1064
checkpoint-1000/special_tokens_map.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<|endoftext|>",
3
+ "eos_token": "<|endoftext|>",
4
+ "unk_token": "<|endoftext|>"
5
+ }
checkpoint-1000/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-1000/tokenizer_config.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "50256": {
5
+ "content": "<|endoftext|>",
6
+ "lstrip": false,
7
+ "normalized": true,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ }
12
+ },
13
+ "bos_token": "<|endoftext|>",
14
+ "clean_up_tokenization_spaces": true,
15
+ "eos_token": "<|endoftext|>",
16
+ "model_max_length": 1024,
17
+ "tokenizer_class": "GPT2Tokenizer",
18
+ "unk_token": "<|endoftext|>"
19
+ }
checkpoint-1000/trainer_state.json ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 14.492753623188406,
5
+ "eval_steps": 500,
6
+ "global_step": 1000,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 7.246376811594203,
13
+ "grad_norm": 4.941216468811035,
14
+ "learning_rate": 3.188405797101449e-05,
15
+ "loss": 1.9657,
16
+ "step": 500
17
+ },
18
+ {
19
+ "epoch": 14.492753623188406,
20
+ "grad_norm": 4.604403495788574,
21
+ "learning_rate": 1.3768115942028985e-05,
22
+ "loss": 1.4853,
23
+ "step": 1000
24
+ }
25
+ ],
26
+ "logging_steps": 500,
27
+ "max_steps": 1380,
28
+ "num_input_tokens_seen": 0,
29
+ "num_train_epochs": 20,
30
+ "save_steps": 500,
31
+ "total_flos": 1045168128000000.0,
32
+ "train_batch_size": 2,
33
+ "trial_name": null,
34
+ "trial_params": null
35
+ }
checkpoint-1000/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d74f83bec7ea349d722f90da656054543e87196b485a7f973f3b2bf816599956
3
+ size 4984
checkpoint-1000/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-1500/config.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "gpt2",
3
+ "activation_function": "gelu_new",
4
+ "architectures": [
5
+ "GPT2LMHeadModel"
6
+ ],
7
+ "attn_pdrop": 0.1,
8
+ "bos_token_id": 50256,
9
+ "embd_pdrop": 0.1,
10
+ "eos_token_id": 50256,
11
+ "initializer_range": 0.02,
12
+ "layer_norm_epsilon": 1e-05,
13
+ "model_type": "gpt2",
14
+ "n_ctx": 1024,
15
+ "n_embd": 768,
16
+ "n_head": 12,
17
+ "n_inner": null,
18
+ "n_layer": 12,
19
+ "n_positions": 1024,
20
+ "reorder_and_upcast_attn": false,
21
+ "resid_pdrop": 0.1,
22
+ "scale_attn_by_inverse_layer_idx": false,
23
+ "scale_attn_weights": true,
24
+ "summary_activation": null,
25
+ "summary_first_dropout": 0.1,
26
+ "summary_proj_to_labels": true,
27
+ "summary_type": "cls_index",
28
+ "summary_use_proj": true,
29
+ "task_specific_params": {
30
+ "text-generation": {
31
+ "do_sample": true,
32
+ "max_length": 50
33
+ }
34
+ },
35
+ "torch_dtype": "float32",
36
+ "transformers_version": "4.40.1",
37
+ "use_cache": true,
38
+ "vocab_size": 50257
39
+ }
checkpoint-1500/generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 50256,
4
+ "eos_token_id": 50256,
5
+ "transformers_version": "4.40.1"
6
+ }
checkpoint-1500/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-1500/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9465c3d5498a14eff4afc1070fc554233f943e6eba2aabfc88b7074c1546d081
3
+ size 497774208
checkpoint-1500/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c260c3ea63415ce78c5ae5ffd403f9bb6e66bdd2d545964a6113f9907db3557f
3
+ size 995642298
checkpoint-1500/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7a5d0dc6dc976ddb0996d36c9a11fc601fadb39a823112e10d55371b69bb60c8
3
+ size 14244
checkpoint-1500/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:63d73ead489afb0c384edd6841da8cd61a5e9ac39008a9decfd3938022882349
3
+ size 1064
checkpoint-1500/special_tokens_map.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<|endoftext|>",
3
+ "eos_token": "<|endoftext|>",
4
+ "unk_token": "<|endoftext|>"
5
+ }
checkpoint-1500/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-1500/tokenizer_config.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "50256": {
5
+ "content": "<|endoftext|>",
6
+ "lstrip": false,
7
+ "normalized": true,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ }
12
+ },
13
+ "bos_token": "<|endoftext|>",
14
+ "clean_up_tokenization_spaces": true,
15
+ "eos_token": "<|endoftext|>",
16
+ "model_max_length": 1024,
17
+ "tokenizer_class": "GPT2Tokenizer",
18
+ "unk_token": "<|endoftext|>"
19
+ }
checkpoint-1500/trainer_state.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 26.31578947368421,
5
+ "eval_steps": 500,
6
+ "global_step": 1500,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 8.771929824561404,
13
+ "grad_norm": 5.547207832336426,
14
+ "learning_rate": 3.9035087719298244e-05,
15
+ "loss": 2.3503,
16
+ "step": 500
17
+ },
18
+ {
19
+ "epoch": 17.54385964912281,
20
+ "grad_norm": 4.723608493804932,
21
+ "learning_rate": 2.8070175438596492e-05,
22
+ "loss": 1.6049,
23
+ "step": 1000
24
+ },
25
+ {
26
+ "epoch": 26.31578947368421,
27
+ "grad_norm": 3.2716429233551025,
28
+ "learning_rate": 1.7105263157894737e-05,
29
+ "loss": 1.1304,
30
+ "step": 1500
31
+ }
32
+ ],
33
+ "logging_steps": 500,
34
+ "max_steps": 2280,
35
+ "num_input_tokens_seen": 0,
36
+ "num_train_epochs": 40,
37
+ "save_steps": 500,
38
+ "total_flos": 1554165006336000.0,
39
+ "train_batch_size": 2,
40
+ "trial_name": null,
41
+ "trial_params": null
42
+ }
checkpoint-1500/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:13b85d5955db3439393fdb946be63666175d55f4cd4ea26dedcc342dbcf5fbf3
3
+ size 4984
checkpoint-1500/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-2000/config.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "gpt2",
3
+ "activation_function": "gelu_new",
4
+ "architectures": [
5
+ "GPT2LMHeadModel"
6
+ ],
7
+ "attn_pdrop": 0.1,
8
+ "bos_token_id": 50256,
9
+ "embd_pdrop": 0.1,
10
+ "eos_token_id": 50256,
11
+ "initializer_range": 0.02,
12
+ "layer_norm_epsilon": 1e-05,
13
+ "model_type": "gpt2",
14
+ "n_ctx": 1024,
15
+ "n_embd": 768,
16
+ "n_head": 12,
17
+ "n_inner": null,
18
+ "n_layer": 12,
19
+ "n_positions": 1024,
20
+ "reorder_and_upcast_attn": false,
21
+ "resid_pdrop": 0.1,
22
+ "scale_attn_by_inverse_layer_idx": false,
23
+ "scale_attn_weights": true,
24
+ "summary_activation": null,
25
+ "summary_first_dropout": 0.1,
26
+ "summary_proj_to_labels": true,
27
+ "summary_type": "cls_index",
28
+ "summary_use_proj": true,
29
+ "task_specific_params": {
30
+ "text-generation": {
31
+ "do_sample": true,
32
+ "max_length": 50
33
+ }
34
+ },
35
+ "torch_dtype": "float32",
36
+ "transformers_version": "4.40.1",
37
+ "use_cache": true,
38
+ "vocab_size": 50257
39
+ }
checkpoint-2000/generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 50256,
4
+ "eos_token_id": 50256,
5
+ "transformers_version": "4.40.1"
6
+ }
checkpoint-2000/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-2000/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd7e17c6af9f22cfa4ca7a3119ecd8bd5541e2d28313decc3f3afe309eb5261e
3
+ size 497774208
checkpoint-2000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f82ecb38a4f52683fa6de7cfdee42793f9bf75dd744b4ab6a417438c6a5a4a07
3
+ size 995642298
checkpoint-2000/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a068e9171b43c381b4d97aa1db231465651c89decc2318271a0ef4e4f10c63e9
3
+ size 14244
checkpoint-2000/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:76ee29c7de4f93a78f39f2e05dadcee1556b60097ebe2743779eed6c51359032
3
+ size 1064
checkpoint-2000/special_tokens_map.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<|endoftext|>",
3
+ "eos_token": "<|endoftext|>",
4
+ "unk_token": "<|endoftext|>"
5
+ }
checkpoint-2000/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-2000/tokenizer_config.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "50256": {
5
+ "content": "<|endoftext|>",
6
+ "lstrip": false,
7
+ "normalized": true,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ }
12
+ },
13
+ "bos_token": "<|endoftext|>",
14
+ "clean_up_tokenization_spaces": true,
15
+ "eos_token": "<|endoftext|>",
16
+ "model_max_length": 1024,
17
+ "tokenizer_class": "GPT2Tokenizer",
18
+ "unk_token": "<|endoftext|>"
19
+ }
checkpoint-2000/trainer_state.json ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 35.08771929824562,
5
+ "eval_steps": 500,
6
+ "global_step": 2000,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 8.771929824561404,
13
+ "grad_norm": 5.547207832336426,
14
+ "learning_rate": 3.9035087719298244e-05,
15
+ "loss": 2.3503,
16
+ "step": 500
17
+ },
18
+ {
19
+ "epoch": 17.54385964912281,
20
+ "grad_norm": 4.723608493804932,
21
+ "learning_rate": 2.8070175438596492e-05,
22
+ "loss": 1.6049,
23
+ "step": 1000
24
+ },
25
+ {
26
+ "epoch": 26.31578947368421,
27
+ "grad_norm": 3.2716429233551025,
28
+ "learning_rate": 1.7105263157894737e-05,
29
+ "loss": 1.1304,
30
+ "step": 1500
31
+ },
32
+ {
33
+ "epoch": 35.08771929824562,
34
+ "grad_norm": 3.1184732913970947,
35
+ "learning_rate": 6.140350877192982e-06,
36
+ "loss": 0.8743,
37
+ "step": 2000
38
+ }
39
+ ],
40
+ "logging_steps": 500,
41
+ "max_steps": 2280,
42
+ "num_input_tokens_seen": 0,
43
+ "num_train_epochs": 40,
44
+ "save_steps": 500,
45
+ "total_flos": 2072045813760000.0,
46
+ "train_batch_size": 2,
47
+ "trial_name": null,
48
+ "trial_params": null
49
+ }
checkpoint-2000/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:13b85d5955db3439393fdb946be63666175d55f4cd4ea26dedcc342dbcf5fbf3
3
+ size 4984
checkpoint-2000/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-2500/config.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "gpt2",
3
+ "activation_function": "gelu_new",
4
+ "architectures": [
5
+ "GPT2LMHeadModel"
6
+ ],
7
+ "attn_pdrop": 0.1,
8
+ "bos_token_id": 50256,
9
+ "embd_pdrop": 0.1,
10
+ "eos_token_id": 50256,
11
+ "initializer_range": 0.02,
12
+ "layer_norm_epsilon": 1e-05,
13
+ "model_type": "gpt2",
14
+ "n_ctx": 1024,
15
+ "n_embd": 768,
16
+ "n_head": 12,
17
+ "n_inner": null,
18
+ "n_layer": 12,
19
+ "n_positions": 1024,
20
+ "reorder_and_upcast_attn": false,
21
+ "resid_pdrop": 0.1,
22
+ "scale_attn_by_inverse_layer_idx": false,
23
+ "scale_attn_weights": true,
24
+ "summary_activation": null,
25
+ "summary_first_dropout": 0.1,
26
+ "summary_proj_to_labels": true,
27
+ "summary_type": "cls_index",
28
+ "summary_use_proj": true,
29
+ "task_specific_params": {
30
+ "text-generation": {
31
+ "do_sample": true,
32
+ "max_length": 50
33
+ }
34
+ },
35
+ "torch_dtype": "float32",
36
+ "transformers_version": "4.40.1",
37
+ "use_cache": true,
38
+ "vocab_size": 50257
39
+ }
checkpoint-2500/generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 50256,
4
+ "eos_token_id": 50256,
5
+ "transformers_version": "4.40.1"
6
+ }
checkpoint-2500/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-2500/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b1f8b8fcba4ccf815d73ea20b625c2f4e5815fa47ef8064cf23ea3087a38b496
3
+ size 497774208
checkpoint-2500/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ef47719544b224a26bf342e7ed9be404d3369ec46e3b5db4c0075336fcba9cd
3
+ size 995642298
checkpoint-2500/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1a5aac99db29c70e1aaad7d1c648f0ec39b61bb7344d6cd77050012b5653176f
3
+ size 14244
checkpoint-2500/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da29dc3e59527e7abe6c76b3ac8afc3b60d6f976044939cf19f47b9a0849a321
3
+ size 1064
checkpoint-2500/special_tokens_map.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<|endoftext|>",
3
+ "eos_token": "<|endoftext|>",
4
+ "unk_token": "<|endoftext|>"
5
+ }
checkpoint-2500/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff