End of training

Browse files

Files changed (8) hide show

README.md +11 -11
config.json +4 -4
generation_config.json +2 -2
model.safetensors +2 -2
runs/Feb28_16-17-19_3897ec21fae5/events.out.tfevents.1709137040.3897ec21fae5.58973.0 +3 -0
tokenizer.json +65 -116
tokenizer_config.json +3 -27
training_args.bin +1 -1

README.md CHANGED Viewed

@@ -13,7 +13,7 @@ should probably proofread and complete it, then remove this comment. -->
 This model is a fine-tuned version of [](https://huggingface.co/) on the None dataset.
 It achieves the following results on the evaluation set:
-- Loss: 0.7400
 ## Model description
@@ -44,16 +44,16 @@ The following hyperparameters were used during training:
 | Training Loss | Epoch | Step | Validation Loss |
 |:-------------:|:-----:|:----:|:---------------:|
-| 2.966         | 1.0   | 6    | 2.2765          |
-| 2.0606        | 2.0   | 12   | 1.7855          |
-| 1.6303        | 3.0   | 18   | 1.4249          |
-| 1.2968        | 4.0   | 24   | 1.1742          |
-| 1.1342        | 5.0   | 30   | 1.0421          |
-| 0.9916        | 6.0   | 36   | 0.9159          |
-| 0.8864        | 7.0   | 42   | 0.8410          |
-| 0.8119        | 8.0   | 48   | 0.7857          |
-| 0.7691        | 9.0   | 54   | 0.7509          |
-| 0.7515        | 10.0  | 60   | 0.7400          |
 ### Framework versions

 This model is a fine-tuned version of [](https://huggingface.co/) on the None dataset.
 It achieves the following results on the evaluation set:
+- Loss: 0.8293
 ## Model description
 | Training Loss | Epoch | Step | Validation Loss |
 |:-------------:|:-----:|:----:|:---------------:|
+| 3.1137        | 1.0   | 5    | 2.4084          |
+| 2.2183        | 2.0   | 10   | 1.9525          |
+| 1.8245        | 3.0   | 15   | 1.6233          |
+| 1.4964        | 4.0   | 20   | 1.3035          |
+| 1.2386        | 5.0   | 25   | 1.1278          |
+| 1.1028        | 6.0   | 30   | 1.0257          |
+| 1.0138        | 7.0   | 35   | 0.9629          |
+| 0.9489        | 8.0   | 40   | 0.8972          |
+| 0.8914        | 9.0   | 45   | 0.8501          |
+| 0.8541        | 10.0  | 50   | 0.8293          |
 ### Framework versions

config.json CHANGED Viewed

@@ -78,9 +78,9 @@
     "typical_p": 1.0,
     "use_bfloat16": false,
     "use_cache": true,
-    "vocab_size": 56
   },
-  "decoder_start_token_id": 1,
   "encoder": {
     "_name_or_path": "",
     "add_cross_attention": false,
@@ -157,9 +157,9 @@
     "typical_p": 1.0,
     "use_bfloat16": false,
     "use_cache": true,
-    "vocab_size": 56
   },
-  "eos_token_id": 6,
   "is_encoder_decoder": true,
   "model_type": "encoder-decoder",
   "pad_token_id": 3,

     "typical_p": 1.0,
     "use_bfloat16": false,
     "use_cache": true,
+    "vocab_size": 53
   },
+  "decoder_start_token_id": 2,
   "encoder": {
     "_name_or_path": "",
     "add_cross_attention": false,
     "typical_p": 1.0,
     "use_bfloat16": false,
     "use_cache": true,
+    "vocab_size": 53
   },
+  "eos_token_id": 0,
   "is_encoder_decoder": true,
   "model_type": "encoder-decoder",
   "pad_token_id": 3,

generation_config.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "_from_model_config": true,
-  "decoder_start_token_id": 1,
-  "eos_token_id": 6,
   "pad_token_id": 3,
   "transformers_version": "4.37.2"
 }

 {
   "_from_model_config": true,
+  "decoder_start_token_id": 2,
+  "eos_token_id": 0,
   "pad_token_id": 3,
   "transformers_version": "4.37.2"
 }

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:52242b7293996d19398d1cb5904e713f5bce9bdcddfa61064c3e552d21d77ae3
-size 31213760

 version https://git-lfs.github.com/spec/v1
+oid sha256:81fc7b57152471f3a7e91ddb5748cb31d364cdf1231993b9b2e9a2e4935eacc2
+size 31207604

runs/Feb28_16-17-19_3897ec21fae5/events.out.tfevents.1709137040.3897ec21fae5.58973.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:05368d123af3b649195433823c9c8dff918427372036ad24af5306e61e5c1043
+size 12864

tokenizer.json CHANGED Viewed

@@ -5,7 +5,7 @@
   "added_tokens": [
     {
       "id": 0,
-      "content": "[UNK]",
       "single_word": false,
       "lstrip": false,
       "rstrip": false,
@@ -14,7 +14,7 @@
     },
     {
       "id": 1,
-      "content": "[CLS]",
       "single_word": false,
       "lstrip": false,
       "rstrip": false,
@@ -23,7 +23,7 @@
     },
     {
       "id": 2,
-      "content": "[SEP]",
       "single_word": false,
       "lstrip": false,
       "rstrip": false,
@@ -38,33 +38,6 @@
       "rstrip": false,
       "normalized": false,
       "special": true
-    },
-    {
-      "id": 4,
-      "content": "[MASK]",
-      "single_word": false,
-      "lstrip": false,
-      "rstrip": false,
-      "normalized": false,
-      "special": true
-    },
-    {
-      "id": 5,
-      "content": "[BOS]",
-      "single_word": false,
-      "lstrip": false,
-      "rstrip": false,
-      "normalized": false,
-      "special": true
-    },
-    {
-      "id": 6,
-      "content": "[EOS]",
-      "single_word": false,
-      "lstrip": false,
-      "rstrip": false,
-      "normalized": false,
-      "special": true
     }
   ],
   "normalizer": null,
@@ -82,7 +55,7 @@
       },
       {
         "SpecialToken": {
-          "id": "[CLS]",
           "type_id": 0
         }
       }
@@ -94,39 +67,18 @@
           "type_id": 0
         }
       },
-      {
-        "SpecialToken": {
-          "id": "[CLS]",
-          "type_id": 0
-        }
-      },
       {
         "Sequence": {
           "id": "B",
           "type_id": 1
         }
-      },
-      {
-        "SpecialToken": {
-          "id": "[EOS]",
-          "type_id": 1
-        }
       }
     ],
     "special_tokens": {
-      "[CLS]": {
-        "id": "[CLS]",
-        "ids": [
-          0
-        ],
-        "tokens": [
-          "[CLS]"
-        ]
-      },
       "[EOS]": {
         "id": "[EOS]",
         "ids": [
-          1
         ],
         "tokens": [
           "[EOS]"
@@ -144,80 +96,77 @@
     "fuse_unk": false,
     "byte_fallback": false,
     "vocab": {
-      "[UNK]": 0,
-      "[CLS]": 1,
-      "[SEP]": 2,
       "[PAD]": 3,
-      "[MASK]": 4,
-      "[BOS]": 5,
-      "[EOS]": 6,
-      "+": 7,
-      "-": 8,
-      "0": 9,
-      "1": 10,
-      "2": 11,
-      "3": 12,
-      "4": 13,
-      "5": 14,
-      "6": 15,
-      "7": 16,
-      "8": 17,
-      "9": 18,
-      "=": 19,
-      "99": 20,
-      "10": 21,
-      "98": 22,
-      "11": 23,
-      "97": 24,
-      "12": 25,
-      "96": 26,
-      "13": 27,
-      "95": 28,
-      "14": 29,
-      "94": 30,
-      "15": 31,
-      "93": 32,
-      "16": 33,
-      "92": 34,
-      "17": 35,
-      "91": 36,
-      "18": 37,
-      "19": 38,
-      "90": 39,
-      "20": 40,
-      "89": 41,
-      "21": 42,
-      "88": 43,
-      "22": 44,
-      "87": 45,
-      "23": 46,
-      "86": 47,
-      "85": 48,
-      "24": 49,
-      "25": 50,
-      "84": 51,
-      "26": 52,
-      "83": 53,
-      "27": 54,
-      "82": 55
     },
     "merges": [
-      "9 9",
       "1 0",
-      "9 8",
       "1 1",
-      "9 7",
       "1 2",
-      "9 6",
       "1 3",
-      "9 5",
       "1 4",
       "9 4",
       "1 5",
       "9 3",
       "1 6",
-      "9 2",
       "1 7",
       "9 1",
       "1 8",
       "1 9",
@@ -228,10 +177,10 @@
       "8 8",
       "2 2",
       "8 7",
-      "2 3",
       "8 6",
-      "8 5",
       "2 4",
       "2 5",
       "8 4",
       "2 6",

   "added_tokens": [
     {
       "id": 0,
+      "content": "[EOS]",
       "single_word": false,
       "lstrip": false,
       "rstrip": false,
     },
     {
       "id": 1,
+      "content": "[UNK]",
       "single_word": false,
       "lstrip": false,
       "rstrip": false,
     },
     {
       "id": 2,
+      "content": "[CLS]",
       "single_word": false,
       "lstrip": false,
       "rstrip": false,
       "rstrip": false,
       "normalized": false,
       "special": true
     }
   ],
   "normalizer": null,
       },
       {
         "SpecialToken": {
+          "id": "[EOS]",
           "type_id": 0
         }
       }
           "type_id": 0
         }
       },
       {
         "Sequence": {
           "id": "B",
           "type_id": 1
         }
       }
     ],
     "special_tokens": {
       "[EOS]": {
         "id": "[EOS]",
         "ids": [
+          0
         ],
         "tokens": [
           "[EOS]"
     "fuse_unk": false,
     "byte_fallback": false,
     "vocab": {
+      "[EOS]": 0,
+      "[UNK]": 1,
+      "[CLS]": 2,
       "[PAD]": 3,
+      "+": 4,
+      "-": 5,
+      "0": 6,
+      "1": 7,
+      "2": 8,
+      "3": 9,
+      "4": 10,
+      "5": 11,
+      "6": 12,
+      "7": 13,
+      "8": 14,
+      "9": 15,
+      "=": 16,
+      "10": 17,
+      "99": 18,
+      "11": 19,
+      "98": 20,
+      "12": 21,
+      "97": 22,
+      "13": 23,
+      "96": 24,
+      "14": 25,
+      "95": 26,
+      "94": 27,
+      "15": 28,
+      "93": 29,
+      "16": 30,
+      "17": 31,
+      "92": 32,
+      "91": 33,
+      "18": 34,
+      "19": 35,
+      "90": 36,
+      "20": 37,
+      "89": 38,
+      "21": 39,
+      "88": 40,
+      "22": 41,
+      "87": 42,
+      "86": 43,
+      "23": 44,
+      "24": 45,
+      "85": 46,
+      "25": 47,
+      "84": 48,
+      "26": 49,
+      "83": 50,
+      "27": 51,
+      "82": 52
     },
     "merges": [
       "1 0",
+      "9 9",
       "1 1",
+      "9 8",
       "1 2",
+      "9 7",
       "1 3",
+      "9 6",
       "1 4",
+      "9 5",
       "9 4",
       "1 5",
       "9 3",
       "1 6",
       "1 7",
+      "9 2",
       "9 1",
       "1 8",
       "1 9",
       "8 8",
       "2 2",
       "8 7",
       "8 6",
+      "2 3",
       "2 4",
+      "8 5",
       "2 5",
       "8 4",
       "2 6",

tokenizer_config.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "added_tokens_decoder": {
     "0": {
-      "content": "[UNK]",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
@@ -9,7 +9,7 @@
       "special": true
     },
     "1": {
-      "content": "[CLS]",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
@@ -17,7 +17,7 @@
       "special": true
     },
     "2": {
-      "content": "[SEP]",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
@@ -31,30 +31,6 @@
       "rstrip": false,
       "single_word": false,
       "special": true
-    },
-    "4": {
-      "content": "[MASK]",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "5": {
-      "content": "[BOS]",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "6": {
-      "content": "[EOS]",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
     }
   },
   "clean_up_tokenization_spaces": true,

 {
   "added_tokens_decoder": {
     "0": {
+      "content": "[EOS]",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
       "special": true
     },
     "1": {
+      "content": "[UNK]",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
       "special": true
     },
     "2": {
+      "content": "[CLS]",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
       "rstrip": false,
       "single_word": false,
       "special": true
     }
   },
   "clean_up_tokenization_spaces": true,

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e54a33d2f290f85e42924b5f98306a6bedf48ced4afa93e90600479ea6cfd23f
 size 4920

 version https://git-lfs.github.com/spec/v1
+oid sha256:0e46815b331bc7909d4a5070fdf40bbc45cf7721c8d9b964c3fe72a560bdecce
 size 4920