qing-yao commited on
Commit
f3a7dfe
·
verified ·
1 Parent(s): f6afe72

Model save

Browse files
Files changed (7) hide show
  1. README.md +24 -24
  2. config.json +1 -1
  3. model.safetensors +1 -1
  4. special_tokens_map.json +4 -28
  5. tokenizer.json +0 -0
  6. training_args.bin +1 -1
  7. vocab.json +0 -0
README.md CHANGED
@@ -5,19 +5,19 @@ tags:
5
  metrics:
6
  - accuracy
7
  model-index:
8
- - name: babylm-unablated_seed-42_1e-3
9
  results: []
10
  ---
11
 
12
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
13
  should probably proofread and complete it, then remove this comment. -->
14
 
15
- # babylm-unablated_seed-42_1e-3
16
 
17
  This model was trained from scratch on an unknown dataset.
18
  It achieves the following results on the evaluation set:
19
- - Loss: 3.0136
20
- - Accuracy: 0.4207
21
 
22
  ## Model description
23
 
@@ -52,26 +52,26 @@ The following hyperparameters were used during training:
52
 
53
  | Training Loss | Epoch | Step | Validation Loss | Accuracy |
54
  |:-------------:|:-------:|:-----:|:---------------:|:--------:|
55
- | 6.1723 | 0.9999 | 1788 | 4.2504 | 0.3067 |
56
- | 4.0454 | 1.9999 | 3576 | 3.7300 | 0.3471 |
57
- | 3.6164 | 2.9998 | 5364 | 3.4747 | 0.3703 |
58
- | 3.394 | 3.9997 | 7152 | 3.3376 | 0.3834 |
59
- | 3.3055 | 4.9997 | 8940 | 3.2589 | 0.3908 |
60
- | 3.2018 | 5.9996 | 10728 | 3.2086 | 0.3957 |
61
- | 3.1366 | 6.9995 | 12516 | 3.1760 | 0.3991 |
62
- | 3.093 | 8.0 | 14305 | 3.1520 | 0.4016 |
63
- | 3.0613 | 8.9999 | 16093 | 3.1356 | 0.4037 |
64
- | 3.0161 | 9.9999 | 17881 | 3.1245 | 0.4046 |
65
- | 2.9952 | 10.9998 | 19669 | 3.1162 | 0.4061 |
66
- | 2.9865 | 11.9997 | 21457 | 3.1086 | 0.4065 |
67
- | 2.9786 | 12.9997 | 23245 | 3.1062 | 0.4073 |
68
- | 2.9743 | 13.9996 | 25033 | 3.1010 | 0.4076 |
69
- | 2.9328 | 14.9995 | 26821 | 3.0967 | 0.4084 |
70
- | 2.9369 | 16.0 | 28610 | 3.0958 | 0.4086 |
71
- | 2.9408 | 16.9999 | 30398 | 3.0929 | 0.4088 |
72
- | 2.9445 | 17.9999 | 32186 | 3.0917 | 0.4094 |
73
- | 2.8906 | 18.9998 | 33974 | 3.0351 | 0.4163 |
74
- | 2.7359 | 19.9986 | 35760 | 3.0136 | 0.4207 |
75
 
76
 
77
  ### Framework versions
 
5
  metrics:
6
  - accuracy
7
  model-index:
8
+ - name: babylm-default_seed-42_1e-3
9
  results: []
10
  ---
11
 
12
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
13
  should probably proofread and complete it, then remove this comment. -->
14
 
15
+ # babylm-default_seed-42_1e-3
16
 
17
  This model was trained from scratch on an unknown dataset.
18
  It achieves the following results on the evaluation set:
19
+ - Loss: 3.0140
20
+ - Accuracy: 0.4206
21
 
22
  ## Model description
23
 
 
52
 
53
  | Training Loss | Epoch | Step | Validation Loss | Accuracy |
54
  |:-------------:|:-------:|:-----:|:---------------:|:--------:|
55
+ | 6.1739 | 0.9998 | 1788 | 4.2510 | 0.3061 |
56
+ | 4.046 | 1.9996 | 3576 | 3.7290 | 0.3476 |
57
+ | 3.6189 | 2.9999 | 5365 | 3.4764 | 0.3702 |
58
+ | 3.3937 | 3.9997 | 7153 | 3.3392 | 0.3835 |
59
+ | 3.31 | 4.9995 | 8941 | 3.2583 | 0.3910 |
60
+ | 3.2013 | 5.9999 | 10730 | 3.2094 | 0.3957 |
61
+ | 3.137 | 6.9997 | 12518 | 3.1786 | 0.3994 |
62
+ | 3.093 | 8.0 | 14307 | 3.1544 | 0.4016 |
63
+ | 3.0609 | 8.9998 | 16095 | 3.1376 | 0.4034 |
64
+ | 3.0177 | 9.9996 | 17883 | 3.1239 | 0.4050 |
65
+ | 2.996 | 10.9999 | 19672 | 3.1167 | 0.4059 |
66
+ | 2.9871 | 11.9997 | 21460 | 3.1099 | 0.4064 |
67
+ | 2.9784 | 12.9995 | 23248 | 3.1047 | 0.4073 |
68
+ | 2.9731 | 13.9999 | 25037 | 3.1005 | 0.4079 |
69
+ | 2.9327 | 14.9997 | 26825 | 3.0990 | 0.4084 |
70
+ | 2.9351 | 16.0 | 28614 | 3.0970 | 0.4088 |
71
+ | 2.9407 | 16.9998 | 30402 | 3.0905 | 0.4092 |
72
+ | 2.9456 | 17.9996 | 32190 | 3.0857 | 0.4099 |
73
+ | 2.8908 | 18.9999 | 33979 | 3.0347 | 0.4161 |
74
+ | 2.7363 | 19.9958 | 35760 | 3.0140 | 0.4206 |
75
 
76
 
77
  ### Framework versions
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "models/babylm-unablated_seed-42_1e-3",
3
  "_remove_final_layer_norm": false,
4
  "activation_dropout": 0.0,
5
  "activation_function": "relu",
 
1
  {
2
+ "_name_or_path": "models/babylm-default_seed-42_1e-3",
3
  "_remove_final_layer_norm": false,
4
  "activation_dropout": 0.0,
5
  "activation_function": "relu",
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:18c4a883223cd42c0cb8a7322fdd660893099fa47fd031f308605de96818f353
3
  size 441702288
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1ce1722b06ce63c914a6e124cc1dd52d9113eb800971b6c28d8ebe3185903636
3
  size 441702288
special_tokens_map.json CHANGED
@@ -1,30 +1,6 @@
1
  {
2
- "bos_token": {
3
- "content": "<|endoftext|>",
4
- "lstrip": false,
5
- "normalized": false,
6
- "rstrip": false,
7
- "single_word": false
8
- },
9
- "eos_token": {
10
- "content": "<|endoftext|>",
11
- "lstrip": false,
12
- "normalized": false,
13
- "rstrip": false,
14
- "single_word": false
15
- },
16
- "pad_token": {
17
- "content": "<pad>",
18
- "lstrip": false,
19
- "normalized": false,
20
- "rstrip": false,
21
- "single_word": false
22
- },
23
- "unk_token": {
24
- "content": "<|endoftext|>",
25
- "lstrip": false,
26
- "normalized": false,
27
- "rstrip": false,
28
- "single_word": false
29
- }
30
  }
 
1
  {
2
+ "bos_token": "<|endoftext|>",
3
+ "eos_token": "<|endoftext|>",
4
+ "pad_token": "<pad>",
5
+ "unk_token": "<|endoftext|>"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  }
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:94ea9885287048c764a4765857fc29d5a67c1621d5e8da170add10a216712460
3
  size 5304
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d759a97af8f8203454aec72aaf5e3c092f55cc7f36258b952920190608fd0a6c
3
  size 5304
vocab.json CHANGED
The diff for this file is too large to render. See raw diff