End of training

Browse files

Files changed (6) hide show

README.md +41 -41
config.json +2 -2
model.safetensors +2 -2
runs/Mar04_10-27-31_987e769f7287/events.out.tfevents.1709548051.987e769f7287.502.2 +3 -0
tokenizer.json +125 -37
training_args.bin +1 -1

README.md CHANGED Viewed

@@ -13,7 +13,7 @@ should probably proofread and complete it, then remove this comment. -->
 This model is a fine-tuned version of [](https://huggingface.co/) on the None dataset.
 It achieves the following results on the evaluation set:
-- Loss: 0.1417
 ## Model description
@@ -45,46 +45,46 @@ The following hyperparameters were used during training:
 | Training Loss | Epoch | Step | Validation Loss |
 |:-------------:|:-----:|:----:|:---------------:|
-| 2.9571        | 1.0   | 6    | 2.2956          |
-| 2.0465        | 2.0   | 12   | 1.7293          |
-| 1.5363        | 3.0   | 18   | 1.2901          |
-| 1.2139        | 4.0   | 24   | 1.0866          |
-| 1.0369        | 5.0   | 30   | 0.9429          |
-| 0.9144        | 6.0   | 36   | 0.8284          |
-| 0.8202        | 7.0   | 42   | 0.7701          |
-| 0.7591        | 8.0   | 48   | 0.7184          |
-| 0.7075        | 9.0   | 54   | 0.6474          |
-| 0.6481        | 10.0  | 60   | 0.6074          |
-| 0.611         | 11.0  | 66   | 0.5786          |
-| 0.5714        | 12.0  | 72   | 0.5192          |
-| 0.5412        | 13.0  | 78   | 0.4924          |
-| 0.5092        | 14.0  | 84   | 0.5077          |
-| 0.5118        | 15.0  | 90   | 0.4989          |
-| 0.4982        | 16.0  | 96   | 0.4349          |
-| 0.4512        | 17.0  | 102  | 0.4090          |
-| 0.4176        | 18.0  | 108  | 0.3868          |
-| 0.395         | 19.0  | 114  | 0.3586          |
-| 0.3754        | 20.0  | 120  | 0.3666          |
-| 0.3561        | 21.0  | 126  | 0.3182          |
-| 0.3284        | 22.0  | 132  | 0.2906          |
-| 0.3123        | 23.0  | 138  | 0.2981          |
-| 0.3026        | 24.0  | 144  | 0.2740          |
-| 0.2809        | 25.0  | 150  | 0.2448          |
-| 0.2669        | 26.0  | 156  | 0.2321          |
-| 0.257         | 27.0  | 162  | 0.2179          |
-| 0.2349        | 28.0  | 168  | 0.2110          |
-| 0.2205        | 29.0  | 174  | 0.1969          |
-| 0.2142        | 30.0  | 180  | 0.1894          |
-| 0.2116        | 31.0  | 186  | 0.1819          |
-| 0.195         | 32.0  | 192  | 0.1747          |
-| 0.1884        | 33.0  | 198  | 0.1668          |
-| 0.1819        | 34.0  | 204  | 0.1615          |
-| 0.1741        | 35.0  | 210  | 0.1541          |
-| 0.172         | 36.0  | 216  | 0.1524          |
-| 0.1726        | 37.0  | 222  | 0.1471          |
-| 0.1555        | 38.0  | 228  | 0.1458          |
-| 0.1606        | 39.0  | 234  | 0.1430          |
-| 0.1604        | 40.0  | 240  | 0.1417          |
 ### Framework versions

 This model is a fine-tuned version of [](https://huggingface.co/) on the None dataset.
 It achieves the following results on the evaluation set:
+- Loss: 0.1171
 ## Model description
 | Training Loss | Epoch | Step | Validation Loss |
 |:-------------:|:-----:|:----:|:---------------:|
+| 2.9048        | 1.0   | 12   | 2.1270          |
+| 1.9316        | 2.0   | 24   | 1.6532          |
+| 1.4738        | 3.0   | 36   | 1.2096          |
+| 0.9811        | 4.0   | 48   | 0.6866          |
+| 0.6247        | 5.0   | 60   | 0.5666          |
+| 0.5433        | 6.0   | 72   | 0.5106          |
+| 0.4772        | 7.0   | 84   | 0.4398          |
+| 0.4271        | 8.0   | 96   | 0.3991          |
+| 0.3906        | 9.0   | 108  | 0.3674          |
+| 0.3565        | 10.0  | 120  | 0.3397          |
+| 0.3301        | 11.0  | 132  | 0.2913          |
+| 0.2869        | 12.0  | 144  | 0.2633          |
+| 0.2616        | 13.0  | 156  | 0.2313          |
+| 0.2375        | 14.0  | 168  | 0.2168          |
+| 0.2218        | 15.0  | 180  | 0.1979          |
+| 0.2127        | 16.0  | 192  | 0.1937          |
+| 0.2008        | 17.0  | 204  | 0.1870          |
+| 0.1933        | 18.0  | 216  | 0.1886          |
+| 0.1857        | 19.0  | 228  | 0.1726          |
+| 0.1801        | 20.0  | 240  | 0.1682          |
+| 0.1722        | 21.0  | 252  | 0.1655          |
+| 0.168         | 22.0  | 264  | 0.1604          |
+| 0.1644        | 23.0  | 276  | 0.1530          |
+| 0.1644        | 24.0  | 288  | 0.1574          |
+| 0.1582        | 25.0  | 300  | 0.1477          |
+| 0.1551        | 26.0  | 312  | 0.1460          |
+| 0.1523        | 27.0  | 324  | 0.1458          |
+| 0.1471        | 28.0  | 336  | 0.1365          |
+| 0.1463        | 29.0  | 348  | 0.1385          |
+| 0.1393        | 30.0  | 360  | 0.1364          |
+| 0.1355        | 31.0  | 372  | 0.1324          |
+| 0.134         | 32.0  | 384  | 0.1309          |
+| 0.1315        | 33.0  | 396  | 0.1274          |
+| 0.1317        | 34.0  | 408  | 0.1243          |
+| 0.1266        | 35.0  | 420  | 0.1223          |
+| 0.1248        | 36.0  | 432  | 0.1206          |
+| 0.1232        | 37.0  | 444  | 0.1211          |
+| 0.1217        | 38.0  | 456  | 0.1178          |
+| 0.1208        | 39.0  | 468  | 0.1166          |
+| 0.1208        | 40.0  | 480  | 0.1171          |
 ### Framework versions

config.json CHANGED Viewed

@@ -78,7 +78,7 @@
     "typical_p": 1.0,
     "use_bfloat16": false,
     "use_cache": true,
-    "vocab_size": 53
   },
   "decoder_start_token_id": 2,
   "encoder": {
@@ -157,7 +157,7 @@
     "typical_p": 1.0,
     "use_bfloat16": false,
     "use_cache": true,
-    "vocab_size": 53
   },
   "eos_token_id": 0,
   "is_encoder_decoder": true,

     "typical_p": 1.0,
     "use_bfloat16": false,
     "use_cache": true,
+    "vocab_size": 97
   },
   "decoder_start_token_id": 2,
   "encoder": {
     "typical_p": 1.0,
     "use_bfloat16": false,
     "use_cache": true,
+    "vocab_size": 97
   },
   "eos_token_id": 0,
   "is_encoder_decoder": true,

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:20adbf651b3249c3a50b9a2faa1eb49f74f3190f9ebed2ea62b13f2ddd8ecad3
-size 31207604

 version https://git-lfs.github.com/spec/v1
+oid sha256:048a14fe8adaf7264d94494209e53ae7ace0da5c1c197569cc31de9d06f9ebe3
+size 31297892

runs/Mar04_10-27-31_987e769f7287/events.out.tfevents.1709548051.987e769f7287.502.2 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3032a3709945b0fa85d302f32a8c3f768db41067ff6575d4a28cab2666f70ce3
+size 28095

tokenizer.json CHANGED Viewed

@@ -114,79 +114,167 @@
       "9": 15,
       "=": 16,
       "10": 17,
-      "99": 18,
-      "98": 19,
       "11": 20,
       "97": 21,
       "12": 22,
-      "96": 23,
-      "13": 24,
-      "95": 25,
-      "14": 26,
-      "94": 27,
       "15": 28,
-      "93": 29,
-      "16": 30,
-      "17": 31,
-      "92": 32,
       "18": 33,
       "91": 34,
-      "90": 35,
-      "19": 36,
-      "20": 37,
-      "89": 38,
-      "21": 39,
-      "88": 40,
-      "87": 41,
-      "22": 42,
-      "23": 43,
-      "86": 44,
       "85": 45,
       "24": 46,
-      "25": 47,
-      "84": 48,
-      "83": 49,
-      "26": 50,
       "27": 51,
-      "82": 52
     },
     "merges": [
       "1 0",
-      "9 9",
       "9 8",
       "1 1",
       "9 7",
       "1 2",
-      "9 6",
       "1 3",
-      "9 5",
       "1 4",
       "9 4",
       "1 5",
-      "9 3",
       "1 6",
-      "1 7",
       "9 2",
       "1 8",
       "9 1",
       "9 0",
       "1 9",
       "2 0",
-      "8 9",
-      "2 1",
       "8 8",
       "8 7",
       "2 2",
-      "2 3",
       "8 6",
       "8 5",
       "2 4",
-      "2 5",
       "8 4",
-      "8 3",
       "2 6",
       "2 7",
-      "8 2"
     ]
   }
 }

       "9": 15,
       "=": 16,
       "10": 17,
+      "98": 18,
+      "99": 19,
       "11": 20,
       "97": 21,
       "12": 22,
+      "13": 23,
+      "96": 24,
+      "14": 25,
+      "94": 26,
+      "95": 27,
       "15": 28,
+      "16": 29,
+      "93": 30,
+      "92": 31,
+      "17": 32,
       "18": 33,
       "91": 34,
+      "89": 35,
+      "90": 36,
+      "19": 37,
+      "20": 38,
+      "88": 39,
+      "87": 40,
+      "22": 41,
+      "21": 42,
+      "86": 43,
+      "23": 44,
       "85": 45,
       "24": 46,
+      "84": 47,
+      "25": 48,
+      "26": 49,
+      "83": 50,
       "27": 51,
+      "82": 52,
+      "28": 53,
+      "81": 54,
+      "29": 55,
+      "30": 56,
+      "80": 57,
+      "31": 58,
+      "77": 59,
+      "32": 60,
+      "79": 61,
+      "78": 62,
+      "76": 63,
+      "37": 64,
+      "34": 65,
+      "33": 66,
+      "36": 67,
+      "75": 68,
+      "35": 69,
+      "38": 70,
+      "72": 71,
+      "73": 72,
+      "70": 73,
+      "39": 74,
+      "41": 75,
+      "71": 76,
+      "74": 77,
+      "69": 78,
+      "40": 79,
+      "42": 80,
+      "66": 81,
+      "46": 82,
+      "67": 83,
+      "43": 84,
+      "44": 85,
+      "47": 86,
+      "61": 87,
+      "64": 88,
+      "65": 89,
+      "68": 90,
+      "45": 91,
+      "49": 92,
+      "51": 93,
+      "52": 94,
+      "53": 95,
+      "54": 96
     },
     "merges": [
       "1 0",
       "9 8",
+      "9 9",
       "1 1",
       "9 7",
       "1 2",
       "1 3",
+      "9 6",
       "1 4",
       "9 4",
+      "9 5",
       "1 5",
       "1 6",
+      "9 3",
       "9 2",
+      "1 7",
       "1 8",
       "9 1",
+      "8 9",
       "9 0",
       "1 9",
       "2 0",
       "8 8",
       "8 7",
       "2 2",
+      "2 1",
       "8 6",
+      "2 3",
       "8 5",
       "2 4",
       "8 4",
+      "2 5",
       "2 6",
+      "8 3",
       "2 7",
+      "8 2",
+      "2 8",
+      "8 1",
+      "2 9",
+      "3 0",
+      "8 0",
+      "3 1",
+      "7 7",
+      "3 2",
+      "7 9",
+      "7 8",
+      "7 6",
+      "3 7",
+      "3 4",
+      "3 3",
+      "3 6",
+      "7 5",
+      "3 5",
+      "3 8",
+      "7 2",
+      "7 3",
+      "7 0",
+      "3 9",
+      "4 1",
+      "7 1",
+      "7 4",
+      "6 9",
+      "4 0",
+      "4 2",
+      "6 6",
+      "4 6",
+      "6 7",
+      "4 3",
+      "4 4",
+      "4 7",
+      "6 1",
+      "6 4",
+      "6 5",
+      "6 8",
+      "4 5",
+      "4 9",
+      "5 1",
+      "5 2",
+      "5 3",
+      "5 4"
     ]
   }
 }

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4845741ab1a3479bc072df5105c0b7150f8aee2c76da05c97488950d259a6c8a
 size 5112

 version https://git-lfs.github.com/spec/v1
+oid sha256:4e4516adef82531e49bb6ecb5fc5acf436a78563c1953ab7de5740399eef4824
 size 5112