End of training

Browse files

Files changed (12) hide show

README.md +49 -51
config.json +18 -23
generation_config.json +1 -0
model.safetensors +2 -2
runs/Mar07_16-27-25_ab0fb96f36d9/events.out.tfevents.1709828849.ab0fb96f36d9.391.0 +3 -0
runs/Mar07_16-31-01_ab0fb96f36d9/events.out.tfevents.1709829065.ab0fb96f36d9.391.1 +3 -0
runs/Mar07_16-32-29_ab0fb96f36d9/events.out.tfevents.1709829154.ab0fb96f36d9.391.2 +3 -0
runs/Mar07_16-32-29_ab0fb96f36d9/events.out.tfevents.1709829438.ab0fb96f36d9.391.3 +3 -0
runs/Mar07_16-37-22_ab0fb96f36d9/events.out.tfevents.1709829449.ab0fb96f36d9.391.4 +3 -0
runs/Mar07_16-38-38_ab0fb96f36d9/events.out.tfevents.1709829525.ab0fb96f36d9.391.5 +3 -0
tokenizer_config.json +5 -1
training_args.bin +1 -1

README.md CHANGED Viewed

@@ -1,6 +1,4 @@
 ---
-license: mit
-base_model: gpt2
 tags:
 - generated_from_trainer
 model-index:
@@ -13,9 +11,9 @@ should probably proofread and complete it, then remove this comment. -->
 # 130000
-This model is a fine-tuned version of [gpt2](https://huggingface.co/gpt2) on an unknown dataset.
 It achieves the following results on the evaluation set:
-- Loss: 5.9987
 ## Model description
@@ -49,53 +47,53 @@ The following hyperparameters were used during training:
 | Training Loss | Epoch | Step | Validation Loss |
 |:-------------:|:-----:|:----:|:---------------:|
-| No log        | 0.92  | 3    | 7.0396          |
-| No log        | 1.85  | 6    | 6.5398          |
-| No log        | 2.77  | 9    | 6.3337          |
-| 6.6916        | 4.0   | 13   | 6.3694          |
-| 6.6916        | 4.92  | 16   | 6.2945          |
-| 6.6916        | 5.85  | 19   | 6.3184          |
-| 6.1092        | 6.77  | 22   | 6.3726          |
-| 6.1092        | 8.0   | 26   | 6.2948          |
-| 6.1092        | 8.92  | 29   | 6.3374          |
-| 6.5151        | 9.85  | 32   | 6.3641          |
-| 6.5151        | 10.77 | 35   | 6.2335          |
-| 6.5151        | 12.0  | 39   | 6.1965          |
-| 5.998         | 12.92 | 42   | 6.0595          |
-| 5.998         | 13.85 | 45   | 6.0374          |
-| 5.998         | 14.77 | 48   | 6.0562          |
-| 5.6623        | 16.0  | 52   | 6.0128          |
-| 5.6623        | 16.92 | 55   | 5.9999          |
-| 5.6623        | 17.85 | 58   | 6.0008          |
-| 5.611         | 18.77 | 61   | 5.9992          |
-| 5.611         | 20.0  | 65   | 6.0017          |
-| 5.611         | 20.92 | 68   | 6.0005          |
-| 5.5519        | 21.85 | 71   | 5.9962          |
-| 5.5519        | 22.77 | 74   | 5.9964          |
-| 5.5519        | 24.0  | 78   | 5.9975          |
-| 5.5841        | 24.92 | 81   | 5.9974          |
-| 5.5841        | 25.85 | 84   | 6.0000          |
-| 5.5841        | 26.77 | 87   | 6.0019          |
-| 5.5582        | 28.0  | 91   | 6.0014          |
-| 5.5582        | 28.92 | 94   | 6.0016          |
-| 5.5582        | 29.85 | 97   | 5.9987          |
-| 5.591         | 30.77 | 100  | 5.9992          |
-| 5.591         | 32.0  | 104  | 5.9986          |
-| 5.591         | 32.92 | 107  | 5.9982          |
-| 5.5638        | 33.85 | 110  | 5.9983          |
-| 5.5638        | 34.77 | 113  | 5.9987          |
-| 5.5638        | 36.0  | 117  | 5.9989          |
-| 5.5683        | 36.92 | 120  | 5.9992          |
-| 5.5683        | 37.85 | 123  | 5.9995          |
-| 5.5683        | 38.77 | 126  | 5.9991          |
-| 5.5628        | 40.0  | 130  | 5.9992          |
-| 5.5628        | 40.92 | 133  | 5.9992          |
-| 5.5628        | 41.85 | 136  | 5.9991          |
-| 5.5628        | 42.77 | 139  | 5.9989          |
-| 5.5683        | 44.0  | 143  | 5.9987          |
-| 5.5683        | 44.92 | 146  | 5.9987          |
-| 5.5683        | 45.85 | 149  | 5.9987          |
-| 5.5534        | 46.15 | 150  | 5.9987          |
 ### Framework versions

 ---
 tags:
 - generated_from_trainer
 model-index:
 # 130000
+This model is a fine-tuned version of [](https://huggingface.co/) on an unknown dataset.
 It achieves the following results on the evaluation set:
+- Loss: 6.0491
 ## Model description
 | Training Loss | Epoch | Step | Validation Loss |
 |:-------------:|:-----:|:----:|:---------------:|
+| No log        | 0.92  | 3    | 6.2222          |
+| No log        | 1.85  | 6    | 6.2146          |
+| No log        | 2.77  | 9    | 6.2032          |
+| 5.9665        | 4.0   | 13   | 6.1877          |
+| 5.9665        | 4.92  | 16   | 6.1734          |
+| 5.9665        | 5.85  | 19   | 6.1620          |
+| 5.8921        | 6.77  | 22   | 6.1539          |
+| 5.8921        | 8.0   | 26   | 6.1426          |
+| 5.8921        | 8.92  | 29   | 6.1335          |
+| 5.8324        | 9.85  | 32   | 6.1277          |
+| 5.8324        | 10.77 | 35   | 6.1178          |
+| 5.8324        | 12.0  | 39   | 6.1105          |
+| 5.8012        | 12.92 | 42   | 6.1059          |
+| 5.8012        | 13.85 | 45   | 6.0992          |
+| 5.8012        | 14.77 | 48   | 6.0959          |
+| 5.7449        | 16.0  | 52   | 6.0910          |
+| 5.7449        | 16.92 | 55   | 6.0859          |
+| 5.7449        | 17.85 | 58   | 6.0819          |
+| 5.7303        | 18.77 | 61   | 6.0767          |
+| 5.7303        | 20.0  | 65   | 6.0734          |
+| 5.7303        | 20.92 | 68   | 6.0721          |
+| 5.6687        | 21.85 | 71   | 6.0694          |
+| 5.6687        | 22.77 | 74   | 6.0658          |
+| 5.6687        | 24.0  | 78   | 6.0628          |
+| 5.6839        | 24.92 | 81   | 6.0627          |
+| 5.6839        | 25.85 | 84   | 6.0600          |
+| 5.6839        | 26.77 | 87   | 6.0586          |
+| 5.6499        | 28.0  | 91   | 6.0572          |
+| 5.6499        | 28.92 | 94   | 6.0558          |
+| 5.6499        | 29.85 | 97   | 6.0555          |
+| 5.6703        | 30.77 | 100  | 6.0545          |
+| 5.6703        | 32.0  | 104  | 6.0533          |
+| 5.6703        | 32.92 | 107  | 6.0520          |
+| 5.6404        | 33.85 | 110  | 6.0518          |
+| 5.6404        | 34.77 | 113  | 6.0511          |
+| 5.6404        | 36.0  | 117  | 6.0509          |
+| 5.6414        | 36.92 | 120  | 6.0504          |
+| 5.6414        | 37.85 | 123  | 6.0498          |
+| 5.6414        | 38.77 | 126  | 6.0498          |
+| 5.6347        | 40.0  | 130  | 6.0496          |
+| 5.6347        | 40.92 | 133  | 6.0493          |
+| 5.6347        | 41.85 | 136  | 6.0491          |
+| 5.6347        | 42.77 | 139  | 6.0491          |
+| 5.638         | 44.0  | 143  | 6.0491          |
+| 5.638         | 44.92 | 146  | 6.0491          |
+| 5.638         | 45.85 | 149  | 6.0491          |
+| 5.6249        | 46.15 | 150  | 6.0491          |
 ### Framework versions

config.json CHANGED Viewed

@@ -1,39 +1,34 @@
 {
-  "_name_or_path": "gpt2",
   "activation_function": "gelu_new",
   "architectures": [
-    "GPT2LMHeadModel"
   ],
-  "attn_pdrop": 0.1,
   "bos_token_id": 0,
-  "embd_pdrop": 0.1,
   "eos_token_id": 0,
   "initializer_range": 0.02,
   "layer_norm_epsilon": 1e-05,
-  "model_type": "gpt2",
   "n_ctx": 512,
-  "n_embd": 768,
-  "n_head": 12,
   "n_inner": null,
-  "n_layer": 12,
-  "n_positions": 1024,
-  "reorder_and_upcast_attn": false,
-  "resid_pdrop": 0.1,
-  "scale_attn_by_inverse_layer_idx": false,
   "scale_attn_weights": true,
-  "summary_activation": null,
-  "summary_first_dropout": 0.1,
-  "summary_proj_to_labels": true,
-  "summary_type": "cls_index",
-  "summary_use_proj": true,
-  "task_specific_params": {
-    "text-generation": {
-      "do_sample": true,
-      "max_length": 50
-    }
-  },
   "torch_dtype": "float32",
   "transformers_version": "4.38.2",
   "use_cache": true,
   "vocab_size": 3000
 }

 {
   "activation_function": "gelu_new",
   "architectures": [
+    "GPTJForCausalLM"
   ],
+  "attention_probs_dropout_prob": 0.0,
+  "attn_pdrop": 0.0,
   "bos_token_id": 0,
+  "embd_pdrop": 0.0,
   "eos_token_id": 0,
+  "gradient_checkpointing": false,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.0,
   "initializer_range": 0.02,
+  "intermediate_size": 37,
   "layer_norm_epsilon": 1e-05,
+  "model_type": "gptj",
   "n_ctx": 512,
+  "n_embd": 32,
+  "n_head": 4,
   "n_inner": null,
+  "n_layer": 5,
+  "n_positions": 512,
+  "pad_token_id": 98,
+  "resid_pdrop": 0.0,
+  "rotary_dim": 4,
   "scale_attn_weights": true,
+  "tie_word_embeddings": false,
   "torch_dtype": "float32",
   "transformers_version": "4.38.2",
+  "type_vocab_size": 16,
   "use_cache": true,
   "vocab_size": 3000
 }

generation_config.json CHANGED Viewed

@@ -2,5 +2,6 @@
   "_from_model_config": true,
   "bos_token_id": 0,
   "eos_token_id": 0,
   "transformers_version": "4.38.2"
 }

   "_from_model_config": true,
   "bos_token_id": 0,
   "eos_token_id": 0,
+  "pad_token_id": 98,
   "transformers_version": "4.38.2"
 }

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:351b48eaff23f55746de242ba034442136e0a6352690b504c07a9fb3393b3097
-size 352600704

 version https://git-lfs.github.com/spec/v1
+oid sha256:6bf418c8888f3a04e41616a29ade978ed5a474ebb0d5b69f747abf61958af2ba
+size 1035784

runs/Mar07_16-27-25_ab0fb96f36d9/events.out.tfevents.1709828849.ab0fb96f36d9.391.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:28253ec6184cc4d03d3747f1cd9e1c9f30b2f1a2c686623ba7a4714ccf203732
+size 4657

runs/Mar07_16-31-01_ab0fb96f36d9/events.out.tfevents.1709829065.ab0fb96f36d9.391.1 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9e3994568be614943a8edf3e14794d450552093fc46ab84ebbd26e7ce11c239c
+size 4598

runs/Mar07_16-32-29_ab0fb96f36d9/events.out.tfevents.1709829154.ab0fb96f36d9.391.2 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8d167a6bba0084ca78afd9a6f0ae796b1fb605d091c0461f1be8cd549c746df8
+size 20611

runs/Mar07_16-32-29_ab0fb96f36d9/events.out.tfevents.1709829438.ab0fb96f36d9.391.3 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:843ad9f8ee5a0bf7b95773392ad29b4b35ca177b69470eade92c7cbcc779fe1e
+size 4598

runs/Mar07_16-37-22_ab0fb96f36d9/events.out.tfevents.1709829449.ab0fb96f36d9.391.4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0af42242235272c01af969c149f4988b176900a0f5ae2d314f48772a6fc1ad19
+size 20612

runs/Mar07_16-38-38_ab0fb96f36d9/events.out.tfevents.1709829525.ab0fb96f36d9.391.5 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7af6ae358089ef8fc638591dc53d38320e19cb425e393917f76e6dc91ccaa2a1
+size 20611

tokenizer_config.json CHANGED Viewed

@@ -12,7 +12,11 @@
   "bos_token": "<|endoftext|>",
   "clean_up_tokenization_spaces": true,
   "eos_token": "<|endoftext|>",
   "model_max_length": 1000000000000000019884624838656,
   "pad_token": "<|endoftext|>",
-  "tokenizer_class": "PreTrainedTokenizerFast"
 }

   "bos_token": "<|endoftext|>",
   "clean_up_tokenization_spaces": true,
   "eos_token": "<|endoftext|>",
+  "max_length": 512,
   "model_max_length": 1000000000000000019884624838656,
   "pad_token": "<|endoftext|>",
+  "stride": 0,
+  "tokenizer_class": "PreTrainedTokenizerFast",
+  "truncation_side": "right",
+  "truncation_strategy": "longest_first"
 }

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:271ba549ef8c7caf4fb91fe382f5864e4ddb623c2ad4de01dfdcd7b38cda4ee7
 size 4920

 version https://git-lfs.github.com/spec/v1
+oid sha256:1f698f3bfd884129510dd5f8a5793bf0f5d0a85c3f763e753ed43c97e4e97d78
 size 4920