End of training
Browse files- README.md +69 -29
- config.json +2 -2
- generation_config.json +1 -1
- merges.txt +0 -0
- model.safetensors +2 -2
- runs/May27_20-03-52_a770b5fedbe1/events.out.tfevents.1716840235.a770b5fedbe1.872.0 +3 -0
- runs/May27_20-03-52_a770b5fedbe1/events.out.tfevents.1716844080.a770b5fedbe1.872.1 +3 -0
- tokenizer.json +0 -0
- training_args.bin +2 -2
- vocab.json +0 -0
README.md
CHANGED
@@ -15,7 +15,7 @@ should probably proofread and complete it, then remove this comment. -->
|
|
15 |
|
16 |
This model is a fine-tuned version of [gpt2](https://huggingface.co/gpt2) on the None dataset.
|
17 |
It achieves the following results on the evaluation set:
|
18 |
-
- Loss:
|
19 |
|
20 |
## Model description
|
21 |
|
@@ -43,40 +43,80 @@ The following hyperparameters were used during training:
|
|
43 |
- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
|
44 |
- lr_scheduler_type: cosine
|
45 |
- lr_scheduler_warmup_steps: 10
|
46 |
-
- num_epochs:
|
47 |
|
48 |
### Training results
|
49 |
|
50 |
-
| Training Loss | Epoch
|
51 |
-
|
52 |
-
|
|
53 |
-
|
|
54 |
-
|
|
55 |
-
| 6.
|
56 |
-
|
|
57 |
-
|
|
58 |
-
|
|
59 |
-
|
|
60 |
-
|
|
61 |
-
|
|
62 |
-
|
|
63 |
-
|
|
64 |
-
|
|
65 |
-
|
|
66 |
-
| 5.
|
67 |
-
| 5.
|
68 |
-
|
|
69 |
-
|
|
70 |
-
|
|
71 |
-
|
|
72 |
-
|
|
73 |
-
|
|
74 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
75 |
|
76 |
|
77 |
### Framework versions
|
78 |
|
79 |
-
- Transformers 4.
|
80 |
-
- Pytorch 2.
|
81 |
- Datasets 2.19.1
|
82 |
- Tokenizers 0.19.1
|
|
|
15 |
|
16 |
This model is a fine-tuned version of [gpt2](https://huggingface.co/gpt2) on the None dataset.
|
17 |
It achieves the following results on the evaluation set:
|
18 |
+
- Loss: 3.7688
|
19 |
|
20 |
## Model description
|
21 |
|
|
|
43 |
- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
|
44 |
- lr_scheduler_type: cosine
|
45 |
- lr_scheduler_warmup_steps: 10
|
46 |
+
- num_epochs: 15
|
47 |
|
48 |
### Training results
|
49 |
|
50 |
+
| Training Loss | Epoch | Step | Validation Loss |
|
51 |
+
|:-------------:|:-------:|:----:|:---------------:|
|
52 |
+
| 6.8679 | 0.2310 | 8 | 6.6211 |
|
53 |
+
| 6.3894 | 0.4621 | 16 | 6.3666 |
|
54 |
+
| 6.2641 | 0.6931 | 24 | 6.2481 |
|
55 |
+
| 6.1285 | 0.9242 | 32 | 6.0829 |
|
56 |
+
| 5.9436 | 1.1552 | 40 | 5.8900 |
|
57 |
+
| 5.8073 | 1.3863 | 48 | 5.7490 |
|
58 |
+
| 5.7164 | 1.6173 | 56 | 5.6617 |
|
59 |
+
| 5.6019 | 1.8484 | 64 | 5.5778 |
|
60 |
+
| 5.5427 | 2.0794 | 72 | 5.4886 |
|
61 |
+
| 5.454 | 2.3105 | 80 | 5.3954 |
|
62 |
+
| 5.3546 | 2.5415 | 88 | 5.3066 |
|
63 |
+
| 5.3014 | 2.7726 | 96 | 5.2124 |
|
64 |
+
| 5.2448 | 3.0036 | 104 | 5.1365 |
|
65 |
+
| 5.1185 | 3.2347 | 112 | 5.0765 |
|
66 |
+
| 5.0938 | 3.4657 | 120 | 5.0071 |
|
67 |
+
| 5.0347 | 3.6968 | 128 | 4.9339 |
|
68 |
+
| 4.9681 | 3.9278 | 136 | 4.8552 |
|
69 |
+
| 4.8323 | 4.1588 | 144 | 4.7821 |
|
70 |
+
| 4.7912 | 4.3899 | 152 | 4.7215 |
|
71 |
+
| 4.7225 | 4.6209 | 160 | 4.6431 |
|
72 |
+
| 4.6433 | 4.8520 | 168 | 4.5701 |
|
73 |
+
| 4.5309 | 5.0830 | 176 | 4.5002 |
|
74 |
+
| 4.4506 | 5.3141 | 184 | 4.4442 |
|
75 |
+
| 4.4097 | 5.5451 | 192 | 4.3820 |
|
76 |
+
| 4.3871 | 5.7762 | 200 | 4.3290 |
|
77 |
+
| 4.3345 | 6.0072 | 208 | 4.2869 |
|
78 |
+
| 4.2004 | 6.2383 | 216 | 4.2412 |
|
79 |
+
| 4.1716 | 6.4693 | 224 | 4.1978 |
|
80 |
+
| 4.1536 | 6.7004 | 232 | 4.1607 |
|
81 |
+
| 4.0975 | 6.9314 | 240 | 4.1294 |
|
82 |
+
| 3.9743 | 7.1625 | 248 | 4.1014 |
|
83 |
+
| 3.922 | 7.3935 | 256 | 4.0654 |
|
84 |
+
| 3.939 | 7.6245 | 264 | 4.0378 |
|
85 |
+
| 3.9208 | 7.8556 | 272 | 4.0102 |
|
86 |
+
| 3.8083 | 8.0866 | 280 | 3.9812 |
|
87 |
+
| 3.7611 | 8.3177 | 288 | 3.9630 |
|
88 |
+
| 3.7668 | 8.5487 | 296 | 3.9407 |
|
89 |
+
| 3.7285 | 8.7798 | 304 | 3.9183 |
|
90 |
+
| 3.6996 | 9.0108 | 312 | 3.8958 |
|
91 |
+
| 3.5754 | 9.2419 | 320 | 3.8825 |
|
92 |
+
| 3.5708 | 9.4729 | 328 | 3.8702 |
|
93 |
+
| 3.5607 | 9.7040 | 336 | 3.8510 |
|
94 |
+
| 3.5688 | 9.9350 | 344 | 3.8387 |
|
95 |
+
| 3.4188 | 10.1661 | 352 | 3.8350 |
|
96 |
+
| 3.432 | 10.3971 | 360 | 3.8261 |
|
97 |
+
| 3.4236 | 10.6282 | 368 | 3.8131 |
|
98 |
+
| 3.3985 | 10.8592 | 376 | 3.8026 |
|
99 |
+
| 3.306 | 11.0903 | 384 | 3.7934 |
|
100 |
+
| 3.3196 | 11.3213 | 392 | 3.7919 |
|
101 |
+
| 3.3031 | 11.5523 | 400 | 3.7908 |
|
102 |
+
| 3.2851 | 11.7834 | 408 | 3.7817 |
|
103 |
+
| 3.2703 | 12.0144 | 416 | 3.7789 |
|
104 |
+
| 3.2132 | 12.2455 | 424 | 3.7818 |
|
105 |
+
| 3.1829 | 12.4765 | 432 | 3.7778 |
|
106 |
+
| 3.1968 | 12.7076 | 440 | 3.7749 |
|
107 |
+
| 3.2206 | 12.9386 | 448 | 3.7711 |
|
108 |
+
| 3.1521 | 13.1697 | 456 | 3.7694 |
|
109 |
+
| 3.1412 | 13.4007 | 464 | 3.7700 |
|
110 |
+
| 3.1415 | 13.6318 | 472 | 3.7709 |
|
111 |
+
| 3.1402 | 13.8628 | 480 | 3.7694 |
|
112 |
+
| 3.129 | 14.0939 | 488 | 3.7689 |
|
113 |
+
| 3.1221 | 14.3249 | 496 | 3.7687 |
|
114 |
+
| 3.1576 | 14.5560 | 504 | 3.7688 |
|
115 |
|
116 |
|
117 |
### Framework versions
|
118 |
|
119 |
+
- Transformers 4.41.0
|
120 |
+
- Pytorch 2.3.0+cu121
|
121 |
- Datasets 2.19.1
|
122 |
- Tokenizers 0.19.1
|
config.json
CHANGED
@@ -33,7 +33,7 @@
|
|
33 |
}
|
34 |
},
|
35 |
"torch_dtype": "float32",
|
36 |
-
"transformers_version": "4.
|
37 |
"use_cache": true,
|
38 |
-
"vocab_size":
|
39 |
}
|
|
|
33 |
}
|
34 |
},
|
35 |
"torch_dtype": "float32",
|
36 |
+
"transformers_version": "4.41.0",
|
37 |
"use_cache": true,
|
38 |
+
"vocab_size": 2500
|
39 |
}
|
generation_config.json
CHANGED
@@ -2,5 +2,5 @@
|
|
2 |
"_from_model_config": true,
|
3 |
"bos_token_id": 0,
|
4 |
"eos_token_id": 0,
|
5 |
-
"transformers_version": "4.
|
6 |
}
|
|
|
2 |
"_from_model_config": true,
|
3 |
"bos_token_id": 0,
|
4 |
"eos_token_id": 0,
|
5 |
+
"transformers_version": "4.41.0"
|
6 |
}
|
merges.txt
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9ace8794fd5e49d9fb2cd6dea87f4b70acf62d85f88a22e0d4470c31d2f48acb
|
3 |
+
size 351064704
|
runs/May27_20-03-52_a770b5fedbe1/events.out.tfevents.1716840235.a770b5fedbe1.872.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a6884833a55dee994effe5ca7f2e733847bb98a27b2e42e08b160091da67ddf3
|
3 |
+
size 49042
|
runs/May27_20-03-52_a770b5fedbe1/events.out.tfevents.1716844080.a770b5fedbe1.872.1
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6a0dae89ac562a95604707be938196471a8e80505de8b64385449174cd13bd39
|
3 |
+
size 359
|
tokenizer.json
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:da9718ad8627a3d80d82421ecf1e84eee7058e348eae4b3d45ce8175e85a60fb
|
3 |
+
size 5048
|
vocab.json
CHANGED
The diff for this file is too large to render.
See raw diff
|
|