MiriFur commited on Aug 6, 2023

Commit

87043d2

1 Parent(s): 71901a2

Upload 72 files

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

checkpoint-1000/optimizer.pt +1 -1
checkpoint-1000/pytorch_model.bin +1 -1
checkpoint-1000/rng_state.pth +1 -1
checkpoint-1000/scheduler.pt +1 -1
checkpoint-1000/trainer_state.json +9 -9
checkpoint-1000/training_args.bin +1 -1
checkpoint-1500/config.json +39 -0
checkpoint-1500/generation_config.json +6 -0
checkpoint-1500/optimizer.pt +3 -0
checkpoint-1500/pytorch_model.bin +3 -0
checkpoint-1500/rng_state.pth +3 -0
checkpoint-1500/scheduler.pt +3 -0
checkpoint-1500/trainer_state.json +34 -0
checkpoint-1500/training_args.bin +3 -0
checkpoint-2000/config.json +39 -0
checkpoint-2000/generation_config.json +6 -0
checkpoint-2000/optimizer.pt +3 -0
checkpoint-2000/pytorch_model.bin +3 -0
checkpoint-2000/rng_state.pth +3 -0
checkpoint-2000/scheduler.pt +3 -0
checkpoint-2000/trainer_state.json +40 -0
checkpoint-2000/training_args.bin +3 -0
checkpoint-2500/config.json +39 -0
checkpoint-2500/generation_config.json +6 -0
checkpoint-2500/optimizer.pt +3 -0
checkpoint-2500/pytorch_model.bin +3 -0
checkpoint-2500/rng_state.pth +3 -0
checkpoint-2500/scheduler.pt +3 -0
checkpoint-2500/trainer_state.json +46 -0
checkpoint-2500/training_args.bin +3 -0
checkpoint-3000/config.json +39 -0
checkpoint-3000/generation_config.json +6 -0
checkpoint-3000/optimizer.pt +3 -0
checkpoint-3000/pytorch_model.bin +3 -0
checkpoint-3000/rng_state.pth +3 -0
checkpoint-3000/scheduler.pt +3 -0
checkpoint-3000/trainer_state.json +52 -0
checkpoint-3000/training_args.bin +3 -0
checkpoint-3500/config.json +39 -0
checkpoint-3500/generation_config.json +6 -0
checkpoint-3500/optimizer.pt +3 -0
checkpoint-3500/pytorch_model.bin +3 -0
checkpoint-3500/rng_state.pth +3 -0
checkpoint-3500/scheduler.pt +3 -0
checkpoint-3500/trainer_state.json +58 -0
checkpoint-3500/training_args.bin +3 -0
checkpoint-4000/config.json +39 -0
checkpoint-4000/generation_config.json +6 -0
checkpoint-4000/optimizer.pt +3 -0
checkpoint-4000/pytorch_model.bin +3 -0

checkpoint-1000/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:cc82743274985f20435c1a3f07714d1d9c5d49fd3f939e47d1db0139e53c0160
 size 995605189

 version https://git-lfs.github.com/spec/v1
+oid sha256:40044f58ba6e4ad02a3afa257490cb1756f235f72ce0587d739f1457a50fd940
 size 995605189

checkpoint-1000/pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b27730d57f0da00683c2b12bd176295a9dbeb119e402e80445699aa0caac51e8
 size 497807197

 version https://git-lfs.github.com/spec/v1
+oid sha256:577150ca55e27834c0a07cb22cf121e9dd81fff56de0a9eaf1751f8d42cc9931
 size 497807197

checkpoint-1000/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:73543bf529463744a086bed7aa7a2369d7afaf931eb0765f938b912a54e0182f
 size 14575

 version https://git-lfs.github.com/spec/v1
+oid sha256:a4e3efb199f5967e04c2f29974f5a4b60568298ec35d18c73cb2af6348f20a84
 size 14575

checkpoint-1000/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3003d262ae7e5e74c25925ba898d88b2fd6386924a71b1f0c91fba5b532b78f2
 size 627

 version https://git-lfs.github.com/spec/v1
+oid sha256:3d3caf872438b5f86b8ac9cfbc93cd607ede756cfd2b6545eed4877e4009dac2
 size 627

checkpoint-1000/trainer_state.json CHANGED Viewed

@@ -1,28 +1,28 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 38.46153846153846,
   "global_step": 1000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
-      "epoch": 19.23,
-      "learning_rate": 3.0769230769230774e-05,
-      "loss": 1.3827,
       "step": 500
     },
     {
-      "epoch": 38.46,
-      "learning_rate": 1.153846153846154e-05,
-      "loss": 0.5306,
       "step": 1000
     }
   ],
-  "max_steps": 1300,
   "num_train_epochs": 50,
-  "total_flos": 512654966784000.0,
   "trial_name": null,
   "trial_params": null
 }

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 12.048192771084338,
   "global_step": 1000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
+      "epoch": 6.02,
+      "learning_rate": 4.3975903614457834e-05,
+      "loss": 1.4659,
       "step": 500
     },
     {
+      "epoch": 12.05,
+      "learning_rate": 3.7951807228915666e-05,
+      "loss": 0.9332,
       "step": 1000
     }
   ],
+  "max_steps": 4150,
   "num_train_epochs": 50,
+  "total_flos": 517096931328000.0,
   "trial_name": null,
   "trial_params": null
 }

checkpoint-1000/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:daae856c1bd075ece296b326f62ba6652364b07db8daef89d6f2fbf3bd2bd41c
 size 3963

 version https://git-lfs.github.com/spec/v1
+oid sha256:c714b36e7d24074058337bd366704620d82d3d6e9955157f5112b544294492db
 size 3963

checkpoint-1500/config.json ADDED Viewed

	@@ -0,0 +1,39 @@

+{
+  "_name_or_path": "gpt2",
+  "activation_function": "gelu_new",
+  "architectures": [
+    "GPT2LMHeadModel"
+  ],
+  "attn_pdrop": 0.1,
+  "bos_token_id": 50256,
+  "embd_pdrop": 0.1,
+  "eos_token_id": 50256,
+  "initializer_range": 0.02,
+  "layer_norm_epsilon": 1e-05,
+  "model_type": "gpt2",
+  "n_ctx": 1024,
+  "n_embd": 768,
+  "n_head": 12,
+  "n_inner": null,
+  "n_layer": 12,
+  "n_positions": 1024,
+  "reorder_and_upcast_attn": false,
+  "resid_pdrop": 0.1,
+  "scale_attn_by_inverse_layer_idx": false,
+  "scale_attn_weights": true,
+  "summary_activation": null,
+  "summary_first_dropout": 0.1,
+  "summary_proj_to_labels": true,
+  "summary_type": "cls_index",
+  "summary_use_proj": true,
+  "task_specific_params": {
+    "text-generation": {
+      "do_sample": true,
+      "max_length": 50
+    }
+  },
+  "torch_dtype": "float32",
+  "transformers_version": "4.31.0",
+  "use_cache": true,
+  "vocab_size": 50257
+}

checkpoint-1500/generation_config.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+  "_from_model_config": true,
+  "bos_token_id": 50256,
+  "eos_token_id": 50256,
+  "transformers_version": "4.31.0"
+}

checkpoint-1500/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3508daf547ea7f7639a4fb24571a9b8f970238f6f697f19b0f1db2b9a98b29c6
+size 995605189

checkpoint-1500/pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:06afd4a9f36e6f35427e7af9c5cccb7a9c72446582d5b8fbaa051f797663e154
+size 497807197

checkpoint-1500/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:30a541a95ca8532bf556cfdb7f8a3e1e420b0e7690ccb46c9bbe6564b3afaacb
+size 14575

checkpoint-1500/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c6b7284950f4953612836af1f56ea96facef57d66abf579d705f9e90f0c98a46
+size 627

checkpoint-1500/trainer_state.json ADDED Viewed

	@@ -0,0 +1,34 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 18.072289156626507,
+  "global_step": 1500,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 6.02,
+      "learning_rate": 4.3975903614457834e-05,
+      "loss": 1.4659,
+      "step": 500
+    },
+    {
+      "epoch": 12.05,
+      "learning_rate": 3.7951807228915666e-05,
+      "loss": 0.9332,
+      "step": 1000
+    },
+    {
+      "epoch": 18.07,
+      "learning_rate": 3.192771084337349e-05,
+      "loss": 0.6382,
+      "step": 1500
+    }
+  ],
+  "max_steps": 4150,
+  "num_train_epochs": 50,
+  "total_flos": 775645396992000.0,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-1500/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c714b36e7d24074058337bd366704620d82d3d6e9955157f5112b544294492db
+size 3963

checkpoint-2000/config.json ADDED Viewed

	@@ -0,0 +1,39 @@

+{
+  "_name_or_path": "gpt2",
+  "activation_function": "gelu_new",
+  "architectures": [
+    "GPT2LMHeadModel"
+  ],
+  "attn_pdrop": 0.1,
+  "bos_token_id": 50256,
+  "embd_pdrop": 0.1,
+  "eos_token_id": 50256,
+  "initializer_range": 0.02,
+  "layer_norm_epsilon": 1e-05,
+  "model_type": "gpt2",
+  "n_ctx": 1024,
+  "n_embd": 768,
+  "n_head": 12,
+  "n_inner": null,
+  "n_layer": 12,
+  "n_positions": 1024,
+  "reorder_and_upcast_attn": false,
+  "resid_pdrop": 0.1,
+  "scale_attn_by_inverse_layer_idx": false,
+  "scale_attn_weights": true,
+  "summary_activation": null,
+  "summary_first_dropout": 0.1,
+  "summary_proj_to_labels": true,
+  "summary_type": "cls_index",
+  "summary_use_proj": true,
+  "task_specific_params": {
+    "text-generation": {
+      "do_sample": true,
+      "max_length": 50
+    }
+  },
+  "torch_dtype": "float32",
+  "transformers_version": "4.31.0",
+  "use_cache": true,
+  "vocab_size": 50257
+}

checkpoint-2000/generation_config.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+  "_from_model_config": true,
+  "bos_token_id": 50256,
+  "eos_token_id": 50256,
+  "transformers_version": "4.31.0"
+}

checkpoint-2000/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:88fb77e14f3f2bbf6601731efc849309e9ede701eed2cb0ef54a8294251af3cf
+size 995605189

checkpoint-2000/pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:23beb0a89f5348bb7af9be60296215311b3367fa345499c5ee0330d5ffd42a38
+size 497807197

checkpoint-2000/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:381ebe3e35df681c5618b3ab0df63f45938780152fc94545f3764f6b0192fcf7
+size 14575

checkpoint-2000/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:57b43099f81ffb6a9befdfae023d6369a7bd91bc547e0dc46fa50d4e069365d7
+size 627

checkpoint-2000/trainer_state.json ADDED Viewed

	@@ -0,0 +1,40 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 24.096385542168676,
+  "global_step": 2000,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 6.02,
+      "learning_rate": 4.3975903614457834e-05,
+      "loss": 1.4659,
+      "step": 500
+    },
+    {
+      "epoch": 12.05,
+      "learning_rate": 3.7951807228915666e-05,
+      "loss": 0.9332,
+      "step": 1000
+    },
+    {
+      "epoch": 18.07,
+      "learning_rate": 3.192771084337349e-05,
+      "loss": 0.6382,
+      "step": 1500
+    },
+    {
+      "epoch": 24.1,
+      "learning_rate": 2.5903614457831325e-05,
+      "loss": 0.4494,
+      "step": 2000
+    }
+  ],
+  "max_steps": 4150,
+  "num_train_epochs": 50,
+  "total_flos": 1034193862656000.0,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-2000/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c714b36e7d24074058337bd366704620d82d3d6e9955157f5112b544294492db
+size 3963

checkpoint-2500/config.json ADDED Viewed

	@@ -0,0 +1,39 @@

+{
+  "_name_or_path": "gpt2",
+  "activation_function": "gelu_new",
+  "architectures": [
+    "GPT2LMHeadModel"
+  ],
+  "attn_pdrop": 0.1,
+  "bos_token_id": 50256,
+  "embd_pdrop": 0.1,
+  "eos_token_id": 50256,
+  "initializer_range": 0.02,
+  "layer_norm_epsilon": 1e-05,
+  "model_type": "gpt2",
+  "n_ctx": 1024,
+  "n_embd": 768,
+  "n_head": 12,
+  "n_inner": null,
+  "n_layer": 12,
+  "n_positions": 1024,
+  "reorder_and_upcast_attn": false,
+  "resid_pdrop": 0.1,
+  "scale_attn_by_inverse_layer_idx": false,
+  "scale_attn_weights": true,
+  "summary_activation": null,
+  "summary_first_dropout": 0.1,
+  "summary_proj_to_labels": true,
+  "summary_type": "cls_index",
+  "summary_use_proj": true,
+  "task_specific_params": {
+    "text-generation": {
+      "do_sample": true,
+      "max_length": 50
+    }
+  },
+  "torch_dtype": "float32",
+  "transformers_version": "4.31.0",
+  "use_cache": true,
+  "vocab_size": 50257
+}

checkpoint-2500/generation_config.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+  "_from_model_config": true,
+  "bos_token_id": 50256,
+  "eos_token_id": 50256,
+  "transformers_version": "4.31.0"
+}

checkpoint-2500/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a5371a17ee239fec7bf7d0c23e3435e00e76efc4d94b9d9941c8e57eea070784
+size 995605189

checkpoint-2500/pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6abe995220e33a09e648c18d269c6d2ea69a77f7640fa5982efb6374fb9d983e
+size 497807197

checkpoint-2500/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1520384ba472ff2f026dd4acfdd4ae2a72711e694cec5d27670d8b5709ac0c18
+size 14575

checkpoint-2500/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:41014b3add9f861ce35d7c779356e164a56dc9ae7109e3d6ef3b1c10ef722681
+size 627

checkpoint-2500/trainer_state.json ADDED Viewed

	@@ -0,0 +1,46 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 30.120481927710845,
+  "global_step": 2500,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 6.02,
+      "learning_rate": 4.3975903614457834e-05,
+      "loss": 1.4659,
+      "step": 500
+    },
+    {
+      "epoch": 12.05,
+      "learning_rate": 3.7951807228915666e-05,
+      "loss": 0.9332,
+      "step": 1000
+    },
+    {
+      "epoch": 18.07,
+      "learning_rate": 3.192771084337349e-05,
+      "loss": 0.6382,
+      "step": 1500
+    },
+    {
+      "epoch": 24.1,
+      "learning_rate": 2.5903614457831325e-05,
+      "loss": 0.4494,
+      "step": 2000
+    },
+    {
+      "epoch": 30.12,
+      "learning_rate": 1.9879518072289157e-05,
+      "loss": 0.3313,
+      "step": 2500
+    }
+  ],
+  "max_steps": 4150,
+  "num_train_epochs": 50,
+  "total_flos": 1292742328320000.0,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-2500/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c714b36e7d24074058337bd366704620d82d3d6e9955157f5112b544294492db
+size 3963

checkpoint-3000/config.json ADDED Viewed

	@@ -0,0 +1,39 @@

+{
+  "_name_or_path": "gpt2",
+  "activation_function": "gelu_new",
+  "architectures": [
+    "GPT2LMHeadModel"
+  ],
+  "attn_pdrop": 0.1,
+  "bos_token_id": 50256,
+  "embd_pdrop": 0.1,
+  "eos_token_id": 50256,
+  "initializer_range": 0.02,
+  "layer_norm_epsilon": 1e-05,
+  "model_type": "gpt2",
+  "n_ctx": 1024,
+  "n_embd": 768,
+  "n_head": 12,
+  "n_inner": null,
+  "n_layer": 12,
+  "n_positions": 1024,
+  "reorder_and_upcast_attn": false,
+  "resid_pdrop": 0.1,
+  "scale_attn_by_inverse_layer_idx": false,
+  "scale_attn_weights": true,
+  "summary_activation": null,
+  "summary_first_dropout": 0.1,
+  "summary_proj_to_labels": true,
+  "summary_type": "cls_index",
+  "summary_use_proj": true,
+  "task_specific_params": {
+    "text-generation": {
+      "do_sample": true,
+      "max_length": 50
+    }
+  },
+  "torch_dtype": "float32",
+  "transformers_version": "4.31.0",
+  "use_cache": true,
+  "vocab_size": 50257
+}

checkpoint-3000/generation_config.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+  "_from_model_config": true,
+  "bos_token_id": 50256,
+  "eos_token_id": 50256,
+  "transformers_version": "4.31.0"
+}

checkpoint-3000/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c153ba14052e884699ec52bbc6e9c5e3f366f2771c359a578bbac8b0756ec9d5
+size 995605189

checkpoint-3000/pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e36db49ead0cdc668493714e88b32ab7879164341c222fcea17765bbe3314b5a
+size 497807197

checkpoint-3000/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:994bb7e6532a4e2b19f13f3ecc9614a59a21889ab7c83b77a91af4f71aadc1e4
+size 14575

checkpoint-3000/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e6a91de98682a6a12555212398d5b531617c6d2235bbf2b3b89a4995df8dd915
+size 627

checkpoint-3000/trainer_state.json ADDED Viewed

	@@ -0,0 +1,52 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 36.144578313253014,
+  "global_step": 3000,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 6.02,
+      "learning_rate": 4.3975903614457834e-05,
+      "loss": 1.4659,
+      "step": 500
+    },
+    {
+      "epoch": 12.05,
+      "learning_rate": 3.7951807228915666e-05,
+      "loss": 0.9332,
+      "step": 1000
+    },
+    {
+      "epoch": 18.07,
+      "learning_rate": 3.192771084337349e-05,
+      "loss": 0.6382,
+      "step": 1500
+    },
+    {
+      "epoch": 24.1,
+      "learning_rate": 2.5903614457831325e-05,
+      "loss": 0.4494,
+      "step": 2000
+    },
+    {
+      "epoch": 30.12,
+      "learning_rate": 1.9879518072289157e-05,
+      "loss": 0.3313,
+      "step": 2500
+    },
+    {
+      "epoch": 36.14,
+      "learning_rate": 1.3855421686746989e-05,
+      "loss": 0.2613,
+      "step": 3000
+    }
+  ],
+  "max_steps": 4150,
+  "num_train_epochs": 50,
+  "total_flos": 1551290793984000.0,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-3000/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c714b36e7d24074058337bd366704620d82d3d6e9955157f5112b544294492db
+size 3963

checkpoint-3500/config.json ADDED Viewed

	@@ -0,0 +1,39 @@

+{
+  "_name_or_path": "gpt2",
+  "activation_function": "gelu_new",
+  "architectures": [
+    "GPT2LMHeadModel"
+  ],
+  "attn_pdrop": 0.1,
+  "bos_token_id": 50256,
+  "embd_pdrop": 0.1,
+  "eos_token_id": 50256,
+  "initializer_range": 0.02,
+  "layer_norm_epsilon": 1e-05,
+  "model_type": "gpt2",
+  "n_ctx": 1024,
+  "n_embd": 768,
+  "n_head": 12,
+  "n_inner": null,
+  "n_layer": 12,
+  "n_positions": 1024,
+  "reorder_and_upcast_attn": false,
+  "resid_pdrop": 0.1,
+  "scale_attn_by_inverse_layer_idx": false,
+  "scale_attn_weights": true,
+  "summary_activation": null,
+  "summary_first_dropout": 0.1,
+  "summary_proj_to_labels": true,
+  "summary_type": "cls_index",
+  "summary_use_proj": true,
+  "task_specific_params": {
+    "text-generation": {
+      "do_sample": true,
+      "max_length": 50
+    }
+  },
+  "torch_dtype": "float32",
+  "transformers_version": "4.31.0",
+  "use_cache": true,
+  "vocab_size": 50257
+}

checkpoint-3500/generation_config.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+  "_from_model_config": true,
+  "bos_token_id": 50256,
+  "eos_token_id": 50256,
+  "transformers_version": "4.31.0"
+}

checkpoint-3500/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3fd0961db178fe69dde82e9633924bd5dade4b681580882d9c35d8435851a286
+size 995605189

checkpoint-3500/pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:643b628f35d0674f567aa2c9dd03f36ac6a019641f121e2c43bf6415a1d9d402
+size 497807197

checkpoint-3500/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b98c434920f0409371f6e5b00eef9d1d39333856c10622197b656c0c127a5d69
+size 14575

checkpoint-3500/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cda70456e3fe4f42776fd6dba170aa5984245afa17b35b13908f10fcf7043751
+size 627

checkpoint-3500/trainer_state.json ADDED Viewed

	@@ -0,0 +1,58 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 42.16867469879518,
+  "global_step": 3500,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 6.02,
+      "learning_rate": 4.3975903614457834e-05,
+      "loss": 1.4659,
+      "step": 500
+    },
+    {
+      "epoch": 12.05,
+      "learning_rate": 3.7951807228915666e-05,
+      "loss": 0.9332,
+      "step": 1000
+    },
+    {
+      "epoch": 18.07,
+      "learning_rate": 3.192771084337349e-05,
+      "loss": 0.6382,
+      "step": 1500
+    },
+    {
+      "epoch": 24.1,
+      "learning_rate": 2.5903614457831325e-05,
+      "loss": 0.4494,
+      "step": 2000
+    },
+    {
+      "epoch": 30.12,
+      "learning_rate": 1.9879518072289157e-05,
+      "loss": 0.3313,
+      "step": 2500
+    },
+    {
+      "epoch": 36.14,
+      "learning_rate": 1.3855421686746989e-05,
+      "loss": 0.2613,
+      "step": 3000
+    },
+    {
+      "epoch": 42.17,
+      "learning_rate": 7.83132530120482e-06,
+      "loss": 0.2205,
+      "step": 3500
+    }
+  ],
+  "max_steps": 4150,
+  "num_train_epochs": 50,
+  "total_flos": 1809839259648000.0,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-3500/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c714b36e7d24074058337bd366704620d82d3d6e9955157f5112b544294492db
+size 3963

checkpoint-4000/config.json ADDED Viewed

	@@ -0,0 +1,39 @@

+{
+  "_name_or_path": "gpt2",
+  "activation_function": "gelu_new",
+  "architectures": [
+    "GPT2LMHeadModel"
+  ],
+  "attn_pdrop": 0.1,
+  "bos_token_id": 50256,
+  "embd_pdrop": 0.1,
+  "eos_token_id": 50256,
+  "initializer_range": 0.02,
+  "layer_norm_epsilon": 1e-05,
+  "model_type": "gpt2",
+  "n_ctx": 1024,
+  "n_embd": 768,
+  "n_head": 12,
+  "n_inner": null,
+  "n_layer": 12,
+  "n_positions": 1024,
+  "reorder_and_upcast_attn": false,
+  "resid_pdrop": 0.1,
+  "scale_attn_by_inverse_layer_idx": false,
+  "scale_attn_weights": true,
+  "summary_activation": null,
+  "summary_first_dropout": 0.1,
+  "summary_proj_to_labels": true,
+  "summary_type": "cls_index",
+  "summary_use_proj": true,
+  "task_specific_params": {
+    "text-generation": {
+      "do_sample": true,
+      "max_length": 50
+    }
+  },
+  "torch_dtype": "float32",
+  "transformers_version": "4.31.0",
+  "use_cache": true,
+  "vocab_size": 50257
+}

checkpoint-4000/generation_config.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+  "_from_model_config": true,
+  "bos_token_id": 50256,
+  "eos_token_id": 50256,
+  "transformers_version": "4.31.0"
+}

checkpoint-4000/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cf56ca92472f163445ae9898ef949dee7d3e92d8d182756ca58588c86eb2748c
+size 995605189

checkpoint-4000/pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0eb251db276be3b1662a562bdc62ef443eac6d0cfb52e5ae30f26ea72447b795
+size 497807197