Model save

Browse files

Files changed (7) hide show

README.md +24 -24
config.json +1 -1
model.safetensors +1 -1
special_tokens_map.json +4 -28
tokenizer.json +0 -0
training_args.bin +1 -1
vocab.json +0 -0

README.md CHANGED Viewed

@@ -5,19 +5,19 @@ tags:
 metrics:
 - accuracy
 model-index:
-- name: babylm-unablated_seed-42_1e-3
   results: []
 ---
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 should probably proofread and complete it, then remove this comment. -->
-# babylm-unablated_seed-42_1e-3
 This model was trained from scratch on an unknown dataset.
 It achieves the following results on the evaluation set:
-- Loss: 3.0136
-- Accuracy: 0.4207
 ## Model description
@@ -52,26 +52,26 @@ The following hyperparameters were used during training:
 | Training Loss | Epoch   | Step  | Validation Loss | Accuracy |
 |:-------------:|:-------:|:-----:|:---------------:|:--------:|
-| 6.1723        | 0.9999  | 1788  | 4.2504          | 0.3067   |
-| 4.0454        | 1.9999  | 3576  | 3.7300          | 0.3471   |
-| 3.6164        | 2.9998  | 5364  | 3.4747          | 0.3703   |
-| 3.394         | 3.9997  | 7152  | 3.3376          | 0.3834   |
-| 3.3055        | 4.9997  | 8940  | 3.2589          | 0.3908   |
-| 3.2018        | 5.9996  | 10728 | 3.2086          | 0.3957   |
-| 3.1366        | 6.9995  | 12516 | 3.1760          | 0.3991   |
-| 3.093         | 8.0     | 14305 | 3.1520          | 0.4016   |
-| 3.0613        | 8.9999  | 16093 | 3.1356          | 0.4037   |
-| 3.0161        | 9.9999  | 17881 | 3.1245          | 0.4046   |
-| 2.9952        | 10.9998 | 19669 | 3.1162          | 0.4061   |
-| 2.9865        | 11.9997 | 21457 | 3.1086          | 0.4065   |
-| 2.9786        | 12.9997 | 23245 | 3.1062          | 0.4073   |
-| 2.9743        | 13.9996 | 25033 | 3.1010          | 0.4076   |
-| 2.9328        | 14.9995 | 26821 | 3.0967          | 0.4084   |
-| 2.9369        | 16.0    | 28610 | 3.0958          | 0.4086   |
-| 2.9408        | 16.9999 | 30398 | 3.0929          | 0.4088   |
-| 2.9445        | 17.9999 | 32186 | 3.0917          | 0.4094   |
-| 2.8906        | 18.9998 | 33974 | 3.0351          | 0.4163   |
-| 2.7359        | 19.9986 | 35760 | 3.0136          | 0.4207   |
 ### Framework versions

 metrics:
 - accuracy
 model-index:
+- name: babylm-default_seed-42_1e-3
   results: []
 ---
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 should probably proofread and complete it, then remove this comment. -->
+# babylm-default_seed-42_1e-3
 This model was trained from scratch on an unknown dataset.
 It achieves the following results on the evaluation set:
+- Loss: 3.0140
+- Accuracy: 0.4206
 ## Model description
 | Training Loss | Epoch   | Step  | Validation Loss | Accuracy |
 |:-------------:|:-------:|:-----:|:---------------:|:--------:|
+| 6.1739        | 0.9998  | 1788  | 4.2510          | 0.3061   |
+| 4.046         | 1.9996  | 3576  | 3.7290          | 0.3476   |
+| 3.6189        | 2.9999  | 5365  | 3.4764          | 0.3702   |
+| 3.3937        | 3.9997  | 7153  | 3.3392          | 0.3835   |
+| 3.31          | 4.9995  | 8941  | 3.2583          | 0.3910   |
+| 3.2013        | 5.9999  | 10730 | 3.2094          | 0.3957   |
+| 3.137         | 6.9997  | 12518 | 3.1786          | 0.3994   |
+| 3.093         | 8.0     | 14307 | 3.1544          | 0.4016   |
+| 3.0609        | 8.9998  | 16095 | 3.1376          | 0.4034   |
+| 3.0177        | 9.9996  | 17883 | 3.1239          | 0.4050   |
+| 2.996         | 10.9999 | 19672 | 3.1167          | 0.4059   |
+| 2.9871        | 11.9997 | 21460 | 3.1099          | 0.4064   |
+| 2.9784        | 12.9995 | 23248 | 3.1047          | 0.4073   |
+| 2.9731        | 13.9999 | 25037 | 3.1005          | 0.4079   |
+| 2.9327        | 14.9997 | 26825 | 3.0990          | 0.4084   |
+| 2.9351        | 16.0    | 28614 | 3.0970          | 0.4088   |
+| 2.9407        | 16.9998 | 30402 | 3.0905          | 0.4092   |
+| 2.9456        | 17.9996 | 32190 | 3.0857          | 0.4099   |
+| 2.8908        | 18.9999 | 33979 | 3.0347          | 0.4161   |
+| 2.7363        | 19.9958 | 35760 | 3.0140          | 0.4206   |
 ### Framework versions

config.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-  "_name_or_path": "models/babylm-unablated_seed-42_1e-3",
   "_remove_final_layer_norm": false,
   "activation_dropout": 0.0,
   "activation_function": "relu",

 {
+  "_name_or_path": "models/babylm-default_seed-42_1e-3",
   "_remove_final_layer_norm": false,
   "activation_dropout": 0.0,
   "activation_function": "relu",

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:18c4a883223cd42c0cb8a7322fdd660893099fa47fd031f308605de96818f353
 size 441702288

 version https://git-lfs.github.com/spec/v1
+oid sha256:1ce1722b06ce63c914a6e124cc1dd52d9113eb800971b6c28d8ebe3185903636
 size 441702288

special_tokens_map.json CHANGED Viewed

@@ -1,30 +1,6 @@
 {
-  "bos_token": {
-    "content": "<|endoftext|>",
-    "lstrip": false,
-    "normalized": false,
-    "rstrip": false,
-    "single_word": false
-  },
-  "eos_token": {
-    "content": "<|endoftext|>",
-    "lstrip": false,
-    "normalized": false,
-    "rstrip": false,
-    "single_word": false
-  },
-  "pad_token": {
-    "content": "<pad>",
-    "lstrip": false,
-    "normalized": false,
-    "rstrip": false,
-    "single_word": false
-  },
-  "unk_token": {
-    "content": "<|endoftext|>",
-    "lstrip": false,
-    "normalized": false,
-    "rstrip": false,
-    "single_word": false
-  }
 }

 {
+  "bos_token": "<|endoftext|>",
+  "eos_token": "<|endoftext|>",
+  "pad_token": "<pad>",
+  "unk_token": "<|endoftext|>"
 }

tokenizer.json CHANGED Viewed

The diff for this file is too large to render. See raw diff

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:94ea9885287048c764a4765857fc29d5a67c1621d5e8da170add10a216712460
 size 5304

 version https://git-lfs.github.com/spec/v1
+oid sha256:d759a97af8f8203454aec72aaf5e3c092f55cc7f36258b952920190608fd0a6c
 size 5304

vocab.json CHANGED Viewed

The diff for this file is too large to render. See raw diff