End of training

Browse files

Files changed (6) hide show

README.md +51 -51
config.json +2 -2
model.safetensors +2 -2
runs/Feb28_18-32-24_3897ec21fae5/events.out.tfevents.1709145145.3897ec21fae5.91861.0 +3 -0
tokenizer.json +37 -36
training_args.bin +1 -1

README.md CHANGED Viewed

@@ -13,7 +13,7 @@ should probably proofread and complete it, then remove this comment. -->
 This model is a fine-tuned version of [](https://huggingface.co/) on the None dataset.
 It achieves the following results on the evaluation set:
-- Loss: 1.0813
 ## Model description
@@ -44,56 +44,56 @@ The following hyperparameters were used during training:
 | Training Loss | Epoch | Step | Validation Loss |
 |:-------------:|:-----:|:----:|:---------------:|
-| 3.8078        | 1.0   | 6    | 3.6115          |
-| 3.5402        | 2.0   | 12   | 3.4403          |
-| 3.3905        | 3.0   | 18   | 3.3023          |
-| 3.2601        | 4.0   | 24   | 3.1757          |
-| 3.1298        | 5.0   | 30   | 3.0465          |
-| 2.9919        | 6.0   | 36   | 2.9159          |
-| 2.8647        | 7.0   | 42   | 2.7868          |
-| 2.7503        | 8.0   | 48   | 2.6616          |
-| 2.6207        | 9.0   | 54   | 2.5386          |
-| 2.4973        | 10.0  | 60   | 2.4256          |
-| 2.3944        | 11.0  | 66   | 2.3203          |
-| 2.2924        | 12.0  | 72   | 2.2263          |
-| 2.2061        | 13.0  | 78   | 2.1487          |
-| 2.117         | 14.0  | 84   | 2.0624          |
-| 2.044         | 15.0  | 90   | 1.9910          |
-| 1.9718        | 16.0  | 96   | 1.9239          |
-| 1.9093        | 17.0  | 102  | 1.8786          |
-| 1.8542        | 18.0  | 108  | 1.8129          |
-| 1.8085        | 19.0  | 114  | 1.7692          |
-| 1.7653        | 20.0  | 120  | 1.7316          |
-| 1.7103        | 21.0  | 126  | 1.6790          |
-| 1.6757        | 22.0  | 132  | 1.6199          |
-| 1.6089        | 23.0  | 138  | 1.5592          |
-| 1.5391        | 24.0  | 144  | 1.5067          |
-| 1.4987        | 25.0  | 150  | 1.4640          |
-| 1.4535        | 26.0  | 156  | 1.4296          |
-| 1.4285        | 27.0  | 162  | 1.3858          |
-| 1.3828        | 28.0  | 168  | 1.3493          |
-| 1.3468        | 29.0  | 174  | 1.3184          |
-| 1.3265        | 30.0  | 180  | 1.2910          |
-| 1.2953        | 31.0  | 186  | 1.2636          |
-| 1.2804        | 32.0  | 192  | 1.2402          |
-| 1.2522        | 33.0  | 198  | 1.2223          |
-| 1.2375        | 34.0  | 204  | 1.2094          |
-| 1.219         | 35.0  | 210  | 1.1914          |
-| 1.2133        | 36.0  | 216  | 1.1762          |
-| 1.19          | 37.0  | 222  | 1.1606          |
-| 1.1839        | 38.0  | 228  | 1.1532          |
-| 1.1737        | 39.0  | 234  | 1.1380          |
-| 1.1635        | 40.0  | 240  | 1.1267          |
-| 1.1496        | 41.0  | 246  | 1.1219          |
-| 1.1514        | 42.0  | 252  | 1.1104          |
-| 1.1285        | 43.0  | 258  | 1.1056          |
-| 1.1367        | 44.0  | 264  | 1.0976          |
-| 1.1232        | 45.0  | 270  | 1.0949          |
-| 1.1185        | 46.0  | 276  | 1.0896          |
-| 1.1155        | 47.0  | 282  | 1.0836          |
-| 1.1053        | 48.0  | 288  | 1.0834          |
-| 1.1071        | 49.0  | 294  | 1.0823          |
-| 1.1132        | 50.0  | 300  | 1.0813          |
 ### Framework versions

 This model is a fine-tuned version of [](https://huggingface.co/) on the None dataset.
 It achieves the following results on the evaluation set:
+- Loss: 0.8456
 ## Model description
 | Training Loss | Epoch | Step | Validation Loss |
 |:-------------:|:-----:|:----:|:---------------:|
+| 3.5829        | 1.0   | 6    | 3.1864          |
+| 3.0602        | 2.0   | 12   | 2.8642          |
+| 2.7987        | 3.0   | 18   | 2.6521          |
+| 2.6074        | 4.0   | 24   | 2.5190          |
+| 2.5114        | 5.0   | 30   | 2.4076          |
+| 2.3968        | 6.0   | 36   | 2.3019          |
+| 2.2612        | 7.0   | 42   | 2.2065          |
+| 2.1937        | 8.0   | 48   | 2.1258          |
+| 2.1043        | 9.0   | 54   | 2.0391          |
+| 2.0193        | 10.0  | 60   | 1.9546          |
+| 1.9518        | 11.0  | 66   | 1.8654          |
+| 1.8505        | 12.0  | 72   | 1.7672          |
+| 1.7482        | 13.0  | 78   | 1.6792          |
+| 1.695         | 14.0  | 84   | 1.6032          |
+| 1.6015        | 15.0  | 90   | 1.5389          |
+| 1.5568        | 16.0  | 96   | 1.4837          |
+| 1.4888        | 17.0  | 102  | 1.4349          |
+| 1.4429        | 18.0  | 108  | 1.3923          |
+| 1.392         | 19.0  | 114  | 1.3534          |
+| 1.3448        | 20.0  | 120  | 1.3181          |
+| 1.306         | 21.0  | 126  | 1.2881          |
+| 1.2845        | 22.0  | 132  | 1.2568          |
+| 1.2813        | 23.0  | 138  | 1.2262          |
+| 1.2479        | 24.0  | 144  | 1.1989          |
+| 1.2164        | 25.0  | 150  | 1.1719          |
+| 1.2116        | 26.0  | 156  | 1.1514          |
+| 1.1514        | 27.0  | 162  | 1.1247          |
+| 1.1571        | 28.0  | 168  | 1.1012          |
+| 1.1334        | 29.0  | 174  | 1.0781          |
+| 1.1193        | 30.0  | 180  | 1.0559          |
+| 1.0721        | 31.0  | 186  | 1.0307          |
+| 1.041         | 32.0  | 192  | 1.0095          |
+| 1.02          | 33.0  | 198  | 0.9873          |
+| 1.0145        | 34.0  | 204  | 0.9694          |
+| 0.9817        | 35.0  | 210  | 0.9526          |
+| 0.9905        | 36.0  | 216  | 0.9391          |
+| 0.9691        | 37.0  | 222  | 0.9243          |
+| 0.9294        | 38.0  | 228  | 0.9117          |
+| 0.9265        | 39.0  | 234  | 0.8995          |
+| 0.9165        | 40.0  | 240  | 0.8897          |
+| 0.9059        | 41.0  | 246  | 0.8805          |
+| 0.8916        | 42.0  | 252  | 0.8727          |
+| 0.8632        | 43.0  | 258  | 0.8659          |
+| 0.8767        | 44.0  | 264  | 0.8608          |
+| 0.8878        | 45.0  | 270  | 0.8564          |
+| 0.8914        | 46.0  | 276  | 0.8522          |
+| 0.8632        | 47.0  | 282  | 0.8491          |
+| 0.8638        | 48.0  | 288  | 0.8469          |
+| 0.843         | 49.0  | 294  | 0.8460          |
+| 0.8565        | 50.0  | 300  | 0.8456          |
 ### Framework versions

config.json CHANGED Viewed

@@ -78,7 +78,7 @@
     "typical_p": 1.0,
     "use_bfloat16": false,
     "use_cache": true,
-    "vocab_size": 52
   },
   "decoder_start_token_id": 2,
   "encoder": {
@@ -157,7 +157,7 @@
     "typical_p": 1.0,
     "use_bfloat16": false,
     "use_cache": true,
-    "vocab_size": 52
   },
   "eos_token_id": 0,
   "is_encoder_decoder": true,

     "typical_p": 1.0,
     "use_bfloat16": false,
     "use_cache": true,
+    "vocab_size": 53
   },
   "decoder_start_token_id": 2,
   "encoder": {
     "typical_p": 1.0,
     "use_bfloat16": false,
     "use_cache": true,
+    "vocab_size": 53
   },
   "eos_token_id": 0,
   "is_encoder_decoder": true,

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:da4f203a50465bbaf1babcff0cb9459252480321b46aa69609402a6d6f466c22
-size 31205552

 version https://git-lfs.github.com/spec/v1
+oid sha256:b103a0dbe09a82ff9b450dbdf4929fdc609afd639929df124571985170d7e93d
+size 31207604

runs/Feb28_18-32-24_3897ec21fae5/events.out.tfevents.1709145145.3897ec21fae5.91861.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:364def6cd24d7f154ab304ac352dcb5e71382b3d87842e0e684fd36a16f2c138
+size 29928

tokenizer.json CHANGED Viewed

@@ -112,78 +112,79 @@
       "7": 13,
       "8": 14,
       "9": 15,
-      "10": 16,
       "99": 17,
-      "98": 18,
       "11": 19,
-      "97": 20,
       "12": 21,
-      "96": 22,
       "13": 23,
-      "95": 24,
-      "14": 25,
-      "15": 26,
       "94": 27,
-      "93": 28,
-      "16": 29,
-      "17": 30,
-      "92": 31,
-      "18": 32,
       "91": 33,
-      "90": 34,
       "19": 35,
-      "20": 36,
-      "89": 37,
-      "21": 38,
-      "88": 39,
-      "87": 40,
       "22": 41,
-      "23": 42,
       "86": 43,
-      "85": 44,
       "24": 45,
-      "25": 46,
       "84": 47,
-      "83": 48,
       "26": 49,
-      "27": 50,
-      "82": 51
     },
     "merges": [
-      "1 0",
       "9 9",
-      "9 8",
       "1 1",
-      "9 7",
       "1 2",
-      "9 6",
       "1 3",
       "9 5",
       "1 4",
-      "1 5",
       "9 4",
       "9 3",
       "1 6",
       "1 7",
       "9 2",
-      "1 8",
       "9 1",
-      "9 0",
       "1 9",
       "2 0",
       "8 9",
       "2 1",
       "8 8",
-      "8 7",
       "2 2",
-      "2 3",
       "8 6",
-      "8 5",
       "2 4",
-      "2 5",
       "8 4",
-      "8 3",
       "2 6",
       "2 7",
       "8 2"
     ]

       "7": 13,
       "8": 14,
       "9": 15,
+      "=": 16,
       "99": 17,
+      "10": 18,
       "11": 19,
+      "98": 20,
       "12": 21,
+      "97": 22,
       "13": 23,
+      "96": 24,
+      "95": 25,
+      "14": 26,
       "94": 27,
+      "15": 28,
+      "93": 29,
+      "16": 30,
+      "17": 31,
+      "92": 32,
       "91": 33,
+      "18": 34,
       "19": 35,
+      "90": 36,
+      "20": 37,
+      "89": 38,
+      "21": 39,
+      "88": 40,
       "22": 41,
+      "87": 42,
       "86": 43,
+      "23": 44,
       "24": 45,
+      "85": 46,
       "84": 47,
+      "25": 48,
       "26": 49,
+      "83": 50,
+      "27": 51,
+      "82": 52
     },
     "merges": [
       "9 9",
+      "1 0",
       "1 1",
+      "9 8",
       "1 2",
+      "9 7",
       "1 3",
+      "9 6",
       "9 5",
       "1 4",
       "9 4",
+      "1 5",
       "9 3",
       "1 6",
       "1 7",
       "9 2",
       "9 1",
+      "1 8",
       "1 9",
+      "9 0",
       "2 0",
       "8 9",
       "2 1",
       "8 8",
       "2 2",
+      "8 7",
       "8 6",
+      "2 3",
       "2 4",
+      "8 5",
       "8 4",
+      "2 5",
       "2 6",
+      "8 3",
       "2 7",
       "8 2"
     ]

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d67f255dd0132ad0e26dff40d45f3b9dccbb9a1d04a4e4d270f7a3e6fa02c0ec
 size 4920

 version https://git-lfs.github.com/spec/v1
+oid sha256:7ee85665cc15713b5bd783ba232635c4e5e7bf8fde3048c9b9cdd845325cdd67
 size 4920