new model version

Files changed (11) hide show

checkpoint DELETED Viewed

@@ -1,3 +0,0 @@
-model_checkpoint_path: "model-1809"
-all_model_checkpoint_paths: "model-1000"
-all_model_checkpoint_paths: "model-1809"

config.json CHANGED Viewed

@@ -1,5 +1,9 @@
 {
   "activation_function": "gelu_new",
   "attn_pdrop": 0.1,
   "bos_token_id": 50256,
   "embd_pdrop": 0.1,
@@ -13,7 +17,8 @@
   "n_inner": null,
   "n_layer": 24,
   "n_positions": 1024,
-  "n_vocab": 50257,
   "reorder_and_upcast_attn": false,
   "resid_pdrop": 0.1,
   "scale_attn_by_inverse_layer_idx": false,
@@ -23,7 +28,14 @@
   "summary_proj_to_labels": true,
   "summary_type": "cls_index",
   "summary_use_proj": true,
-  "transformers_version": "4.22.1",
   "use_cache": true,
   "vocab_size": 50257
 }

 {
+  "_name_or_path": "gpt2-medium",
   "activation_function": "gelu_new",
+  "architectures": [
+    "GPT2LMHeadModel"
+  ],
   "attn_pdrop": 0.1,
   "bos_token_id": 50256,
   "embd_pdrop": 0.1,
   "n_inner": null,
   "n_layer": 24,
   "n_positions": 1024,
+  "n_special": 0,
+  "predict_special_tokens": true,
   "reorder_and_upcast_attn": false,
   "resid_pdrop": 0.1,
   "scale_attn_by_inverse_layer_idx": false,
   "summary_proj_to_labels": true,
   "summary_type": "cls_index",
   "summary_use_proj": true,
+  "task_specific_params": {
+    "text-generation": {
+      "do_sample": true,
+      "max_length": 50
+    }
+  },
+  "torch_dtype": "float32",
+  "transformers_version": "4.23.0.dev0",
   "use_cache": true,
   "vocab_size": 50257
 }

counter DELETED Viewed

	@@ -1 +0,0 @@
1	- 1809

encoder.json DELETED Viewed

The diff for this file is too large to render. See raw diff

merges.txt CHANGED Viewed

@@ -1,4 +1,4 @@
-#version: 0.2
 Ġ t
 Ġ a
 h e

+#version: 0.2 - Trained by `huggingface/tokenizers`
 Ġ t
 Ġ a
 h e

pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:82992202046d96c34a3668abc556f88103bb4d5ccffc6536ca617fc2210c8c9a
-size 1444569625

 version https://git-lfs.github.com/spec/v1
+oid sha256:8b771e607deb7759c82cdf3518edc6159296409048d879a8577c28311631bcd9
+size 1444566873

tokenizer.json CHANGED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

+{
+  "add_prefix_space": false,
+  "bos_token": "<|endoftext|>",
+  "eos_token": "<|endoftext|>",
+  "model_max_length": 1024,
+  "name_or_path": "gpt2-medium",
+  "special_tokens_map_file": null,
+  "tokenizer_class": "GPT2Tokenizer",
+  "unk_token": "<|endoftext|>"
+}

trainer_state.json ADDED Viewed

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.05319714863283328,
+  "global_step": 500,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.05,
+      "learning_rate": 4.9473348228534955e-05,
+      "loss": 2.8884,
+      "step": 500
+    }
+  ],
+  "max_steps": 46995,
+  "num_train_epochs": 5,
+  "total_flos": 928700694528000.0,
+  "trial_name": null,
+  "trial_params": null
+}

vocab.bpe DELETED Viewed

The diff for this file is too large to render. See raw diff

vocab.json CHANGED Viewed

The diff for this file is too large to render. See raw diff