martimfasantos
commited on
Commit
•
c84fc05
1
Parent(s):
e0a6f20
Model save
Browse files- README.md +186 -0
- all_results.json +9 -0
- generation_config.json +7 -0
- model.safetensors +1 -1
- runs/Jul03_20-28-56_poseidon/events.out.tfevents.1720039159.poseidon.1491106.0 +2 -2
- train_results.json +9 -0
- trainer_state.json +0 -0
README.md
ADDED
@@ -0,0 +1,186 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
license: apache-2.0
|
3 |
+
base_model: martimfasantos/tinyllama-1.1b-sum-sft-full_old
|
4 |
+
tags:
|
5 |
+
- trl
|
6 |
+
- dpo
|
7 |
+
- generated_from_trainer
|
8 |
+
model-index:
|
9 |
+
- name: tinyllama-1.1b-sum-simpo
|
10 |
+
results: []
|
11 |
+
---
|
12 |
+
|
13 |
+
<!-- This model card has been generated automatically according to the information the Trainer had access to. You
|
14 |
+
should probably proofread and complete it, then remove this comment. -->
|
15 |
+
|
16 |
+
# tinyllama-1.1b-sum-simpo
|
17 |
+
|
18 |
+
This model is a fine-tuned version of [martimfasantos/tinyllama-1.1b-sum-sft-full_old](https://huggingface.co/martimfasantos/tinyllama-1.1b-sum-sft-full_old) on an unknown dataset.
|
19 |
+
It achieves the following results on the evaluation set:
|
20 |
+
- Loss: 1.6162
|
21 |
+
- Rewards/chosen: -3.6947
|
22 |
+
- Rewards/rejected: -4.0557
|
23 |
+
- Rewards/accuracies: 0.6224
|
24 |
+
- Rewards/margins: 0.3609
|
25 |
+
- Logps/rejected: -2.0278
|
26 |
+
- Logps/chosen: -1.8474
|
27 |
+
- Logits/rejected: -3.4422
|
28 |
+
- Logits/chosen: -3.4458
|
29 |
+
|
30 |
+
## Model description
|
31 |
+
|
32 |
+
More information needed
|
33 |
+
|
34 |
+
## Intended uses & limitations
|
35 |
+
|
36 |
+
More information needed
|
37 |
+
|
38 |
+
## Training and evaluation data
|
39 |
+
|
40 |
+
More information needed
|
41 |
+
|
42 |
+
## Training procedure
|
43 |
+
|
44 |
+
### Training hyperparameters
|
45 |
+
|
46 |
+
The following hyperparameters were used during training:
|
47 |
+
- learning_rate: 5e-08
|
48 |
+
- train_batch_size: 8
|
49 |
+
- eval_batch_size: 8
|
50 |
+
- seed: 42
|
51 |
+
- distributed_type: multi-GPU
|
52 |
+
- gradient_accumulation_steps: 2
|
53 |
+
- total_train_batch_size: 16
|
54 |
+
- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
|
55 |
+
- lr_scheduler_type: cosine
|
56 |
+
- lr_scheduler_warmup_ratio: 0.1
|
57 |
+
- num_epochs: 2
|
58 |
+
|
59 |
+
### Training results
|
60 |
+
|
61 |
+
| Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
|
62 |
+
|:-------------:|:------:|:-----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
|
63 |
+
| 1.6606 | 0.0172 | 100 | 1.6732 | -2.9619 | -3.2164 | 0.5960 | 0.2545 | -1.6082 | -1.4810 | -3.6346 | -3.6387 |
|
64 |
+
| 1.6959 | 0.0345 | 200 | 1.6729 | -2.9615 | -3.2164 | 0.5960 | 0.2549 | -1.6082 | -1.4807 | -3.6375 | -3.6416 |
|
65 |
+
| 1.6134 | 0.0517 | 300 | 1.6732 | -2.9617 | -3.2161 | 0.5955 | 0.2544 | -1.6081 | -1.4808 | -3.6490 | -3.6530 |
|
66 |
+
| 1.6596 | 0.0689 | 400 | 1.6733 | -2.9622 | -3.2167 | 0.5946 | 0.2546 | -1.6084 | -1.4811 | -3.6353 | -3.6394 |
|
67 |
+
| 1.7383 | 0.0861 | 500 | 1.6731 | -2.9622 | -3.2170 | 0.5967 | 0.2548 | -1.6085 | -1.4811 | -3.6413 | -3.6454 |
|
68 |
+
| 1.5371 | 0.1034 | 600 | 1.6726 | -2.9618 | -3.2172 | 0.5957 | 0.2554 | -1.6086 | -1.4809 | -3.6255 | -3.6296 |
|
69 |
+
| 1.8026 | 0.1206 | 700 | 1.6729 | -2.9621 | -3.2171 | 0.5950 | 0.2550 | -1.6086 | -1.4810 | -3.6350 | -3.6391 |
|
70 |
+
| 1.6605 | 0.1378 | 800 | 1.6727 | -2.9614 | -3.2169 | 0.5948 | 0.2555 | -1.6085 | -1.4807 | -3.6417 | -3.6457 |
|
71 |
+
| 1.8262 | 0.1551 | 900 | 1.6722 | -2.9616 | -3.2174 | 0.5957 | 0.2558 | -1.6087 | -1.4808 | -3.6484 | -3.6525 |
|
72 |
+
| 1.8332 | 0.1723 | 1000 | 1.6719 | -2.9613 | -3.2174 | 0.5957 | 0.2561 | -1.6087 | -1.4806 | -3.6351 | -3.6392 |
|
73 |
+
| 1.6755 | 0.1895 | 1100 | 1.6724 | -2.9614 | -3.2170 | 0.5943 | 0.2556 | -1.6085 | -1.4807 | -3.6276 | -3.6317 |
|
74 |
+
| 1.7075 | 0.2068 | 1200 | 1.6712 | -2.9611 | -3.2182 | 0.5955 | 0.2571 | -1.6091 | -1.4806 | -3.6330 | -3.6370 |
|
75 |
+
| 1.6355 | 0.2240 | 1300 | 1.6709 | -2.9606 | -3.2180 | 0.5957 | 0.2574 | -1.6090 | -1.4803 | -3.6280 | -3.6321 |
|
76 |
+
| 1.7433 | 0.2412 | 1400 | 1.6706 | -2.9613 | -3.2194 | 0.5962 | 0.2581 | -1.6097 | -1.4807 | -3.6354 | -3.6394 |
|
77 |
+
| 1.737 | 0.2584 | 1500 | 1.6698 | -2.9613 | -3.2204 | 0.5943 | 0.2590 | -1.6102 | -1.4807 | -3.6363 | -3.6403 |
|
78 |
+
| 1.66 | 0.2757 | 1600 | 1.6691 | -2.9621 | -3.2220 | 0.5985 | 0.2599 | -1.6110 | -1.4811 | -3.6325 | -3.6365 |
|
79 |
+
| 1.6776 | 0.2929 | 1700 | 1.6685 | -2.9630 | -3.2239 | 0.5934 | 0.2609 | -1.6120 | -1.4815 | -3.6317 | -3.6358 |
|
80 |
+
| 1.6114 | 0.3101 | 1800 | 1.6682 | -2.9632 | -3.2243 | 0.5957 | 0.2611 | -1.6121 | -1.4816 | -3.6194 | -3.6235 |
|
81 |
+
| 1.8167 | 0.3274 | 1900 | 1.6675 | -2.9655 | -3.2278 | 0.5964 | 0.2623 | -1.6139 | -1.4828 | -3.6257 | -3.6297 |
|
82 |
+
| 1.6553 | 0.3446 | 2000 | 1.6667 | -2.9673 | -3.2305 | 0.5980 | 0.2632 | -1.6152 | -1.4836 | -3.6228 | -3.6268 |
|
83 |
+
| 1.7986 | 0.3618 | 2100 | 1.6660 | -2.9692 | -3.2336 | 0.6022 | 0.2644 | -1.6168 | -1.4846 | -3.6199 | -3.6239 |
|
84 |
+
| 1.5912 | 0.3790 | 2200 | 1.6651 | -2.9708 | -3.2362 | 0.6011 | 0.2654 | -1.6181 | -1.4854 | -3.6074 | -3.6114 |
|
85 |
+
| 1.6622 | 0.3963 | 2300 | 1.6637 | -2.9746 | -3.2423 | 0.6043 | 0.2677 | -1.6211 | -1.4873 | -3.5982 | -3.6022 |
|
86 |
+
| 1.519 | 0.4135 | 2400 | 1.6631 | -2.9790 | -3.2473 | 0.6053 | 0.2684 | -1.6237 | -1.4895 | -3.6104 | -3.6143 |
|
87 |
+
| 1.7422 | 0.4307 | 2500 | 1.6621 | -2.9815 | -3.2515 | 0.6050 | 0.2700 | -1.6258 | -1.4908 | -3.5967 | -3.6007 |
|
88 |
+
| 1.7132 | 0.4480 | 2600 | 1.6615 | -2.9862 | -3.2569 | 0.6078 | 0.2707 | -1.6284 | -1.4931 | -3.5912 | -3.5951 |
|
89 |
+
| 1.5902 | 0.4652 | 2700 | 1.6600 | -2.9924 | -3.2651 | 0.6104 | 0.2727 | -1.6325 | -1.4962 | -3.5913 | -3.5953 |
|
90 |
+
| 1.7921 | 0.4824 | 2800 | 1.6595 | -2.9980 | -3.2719 | 0.6073 | 0.2738 | -1.6359 | -1.4990 | -3.5875 | -3.5915 |
|
91 |
+
| 1.8097 | 0.4997 | 2900 | 1.6581 | -3.0045 | -3.2802 | 0.6094 | 0.2756 | -1.6401 | -1.5023 | -3.5979 | -3.6018 |
|
92 |
+
| 1.7279 | 0.5169 | 3000 | 1.6570 | -3.0112 | -3.2886 | 0.6129 | 0.2774 | -1.6443 | -1.5056 | -3.5989 | -3.6028 |
|
93 |
+
| 1.6241 | 0.5341 | 3100 | 1.6565 | -3.0162 | -3.2940 | 0.6143 | 0.2778 | -1.6470 | -1.5081 | -3.5841 | -3.5880 |
|
94 |
+
| 1.7948 | 0.5513 | 3200 | 1.6549 | -3.0240 | -3.3044 | 0.6152 | 0.2805 | -1.6522 | -1.5120 | -3.5757 | -3.5796 |
|
95 |
+
| 1.7049 | 0.5686 | 3300 | 1.6538 | -3.0338 | -3.3161 | 0.6155 | 0.2823 | -1.6580 | -1.5169 | -3.5802 | -3.5841 |
|
96 |
+
| 1.543 | 0.5858 | 3400 | 1.6527 | -3.0475 | -3.3317 | 0.6150 | 0.2841 | -1.6658 | -1.5238 | -3.5716 | -3.5756 |
|
97 |
+
| 1.6486 | 0.6030 | 3500 | 1.6511 | -3.0585 | -3.3457 | 0.6145 | 0.2872 | -1.6729 | -1.5293 | -3.5665 | -3.5704 |
|
98 |
+
| 1.7359 | 0.6203 | 3600 | 1.6500 | -3.0714 | -3.3604 | 0.6129 | 0.2890 | -1.6802 | -1.5357 | -3.5795 | -3.5833 |
|
99 |
+
| 1.4877 | 0.6375 | 3700 | 1.6489 | -3.0839 | -3.3749 | 0.6118 | 0.2910 | -1.6874 | -1.5420 | -3.5742 | -3.5780 |
|
100 |
+
| 1.7414 | 0.6547 | 3800 | 1.6484 | -3.1014 | -3.3936 | 0.6057 | 0.2922 | -1.6968 | -1.5507 | -3.5619 | -3.5658 |
|
101 |
+
| 1.5137 | 0.6720 | 3900 | 1.6468 | -3.1177 | -3.4127 | 0.6090 | 0.2951 | -1.7064 | -1.5588 | -3.5568 | -3.5607 |
|
102 |
+
| 1.6939 | 0.6892 | 4000 | 1.6458 | -3.1401 | -3.4372 | 0.6097 | 0.2971 | -1.7186 | -1.5701 | -3.5542 | -3.5580 |
|
103 |
+
| 1.5735 | 0.7064 | 4100 | 1.6445 | -3.1583 | -3.4584 | 0.6101 | 0.3001 | -1.7292 | -1.5791 | -3.5624 | -3.5662 |
|
104 |
+
| 1.736 | 0.7236 | 4200 | 1.6430 | -3.1761 | -3.4787 | 0.6122 | 0.3026 | -1.7393 | -1.5880 | -3.5468 | -3.5506 |
|
105 |
+
| 1.6289 | 0.7409 | 4300 | 1.6422 | -3.1922 | -3.4967 | 0.6115 | 0.3045 | -1.7484 | -1.5961 | -3.5486 | -3.5524 |
|
106 |
+
| 1.6779 | 0.7581 | 4400 | 1.6411 | -3.2123 | -3.5189 | 0.6145 | 0.3066 | -1.7594 | -1.6061 | -3.5474 | -3.5512 |
|
107 |
+
| 1.7728 | 0.7753 | 4500 | 1.6399 | -3.2384 | -3.5475 | 0.6159 | 0.3092 | -1.7738 | -1.6192 | -3.5240 | -3.5279 |
|
108 |
+
| 1.5063 | 0.7926 | 4600 | 1.6391 | -3.2601 | -3.5713 | 0.6164 | 0.3112 | -1.7856 | -1.6300 | -3.5373 | -3.5411 |
|
109 |
+
| 1.5586 | 0.8098 | 4700 | 1.6376 | -3.2857 | -3.5998 | 0.6199 | 0.3141 | -1.7999 | -1.6429 | -3.5355 | -3.5393 |
|
110 |
+
| 1.6914 | 0.8270 | 4800 | 1.6363 | -3.3116 | -3.6282 | 0.6194 | 0.3165 | -1.8141 | -1.6558 | -3.5299 | -3.5337 |
|
111 |
+
| 1.5487 | 0.8442 | 4900 | 1.6350 | -3.3397 | -3.6586 | 0.6213 | 0.3189 | -1.8293 | -1.6698 | -3.5291 | -3.5329 |
|
112 |
+
| 1.7545 | 0.8615 | 5000 | 1.6343 | -3.3499 | -3.6700 | 0.6208 | 0.3201 | -1.8350 | -1.6750 | -3.5220 | -3.5258 |
|
113 |
+
| 1.5632 | 0.8787 | 5100 | 1.6322 | -3.3812 | -3.7050 | 0.6224 | 0.3237 | -1.8525 | -1.6906 | -3.5276 | -3.5313 |
|
114 |
+
| 1.5213 | 0.8959 | 5200 | 1.6313 | -3.4086 | -3.7337 | 0.6224 | 0.3252 | -1.8669 | -1.7043 | -3.5161 | -3.5199 |
|
115 |
+
| 1.5913 | 0.9132 | 5300 | 1.6305 | -3.4261 | -3.7528 | 0.6213 | 0.3267 | -1.8764 | -1.7130 | -3.5238 | -3.5275 |
|
116 |
+
| 1.4784 | 0.9304 | 5400 | 1.6297 | -3.4434 | -3.7719 | 0.6222 | 0.3285 | -1.8860 | -1.7217 | -3.5166 | -3.5204 |
|
117 |
+
| 1.5188 | 0.9476 | 5500 | 1.6296 | -3.4490 | -3.7777 | 0.6217 | 0.3287 | -1.8889 | -1.7245 | -3.5046 | -3.5084 |
|
118 |
+
| 1.6448 | 0.9649 | 5600 | 1.6285 | -3.4536 | -3.7840 | 0.6220 | 0.3305 | -1.8920 | -1.7268 | -3.5079 | -3.5116 |
|
119 |
+
| 1.6912 | 0.9821 | 5700 | 1.6277 | -3.4641 | -3.7963 | 0.6217 | 0.3323 | -1.8982 | -1.7320 | -3.5114 | -3.5152 |
|
120 |
+
| 1.4687 | 0.9993 | 5800 | 1.6266 | -3.4815 | -3.8161 | 0.6220 | 0.3347 | -1.9081 | -1.7407 | -3.4963 | -3.5001 |
|
121 |
+
| 1.5634 | 1.0165 | 5900 | 1.6262 | -3.4888 | -3.8243 | 0.6231 | 0.3355 | -1.9122 | -1.7444 | -3.4942 | -3.4980 |
|
122 |
+
| 1.5389 | 1.0338 | 6000 | 1.6258 | -3.4952 | -3.8314 | 0.6234 | 0.3362 | -1.9157 | -1.7476 | -3.5035 | -3.5072 |
|
123 |
+
| 1.7463 | 1.0510 | 6100 | 1.6253 | -3.5082 | -3.8453 | 0.6224 | 0.3371 | -1.9227 | -1.7541 | -3.4929 | -3.4967 |
|
124 |
+
| 1.5582 | 1.0682 | 6200 | 1.6248 | -3.5222 | -3.8605 | 0.6231 | 0.3383 | -1.9303 | -1.7611 | -3.4927 | -3.4964 |
|
125 |
+
| 1.556 | 1.0855 | 6300 | 1.6239 | -3.5306 | -3.8706 | 0.6234 | 0.3400 | -1.9353 | -1.7653 | -3.5044 | -3.5080 |
|
126 |
+
| 1.6222 | 1.1027 | 6400 | 1.6236 | -3.5370 | -3.8779 | 0.6241 | 0.3409 | -1.9390 | -1.7685 | -3.4886 | -3.4923 |
|
127 |
+
| 1.807 | 1.1199 | 6500 | 1.6233 | -3.5464 | -3.8884 | 0.6234 | 0.3420 | -1.9442 | -1.7732 | -3.4818 | -3.4855 |
|
128 |
+
| 1.746 | 1.1371 | 6600 | 1.6231 | -3.5530 | -3.8958 | 0.6245 | 0.3428 | -1.9479 | -1.7765 | -3.4855 | -3.4892 |
|
129 |
+
| 1.5871 | 1.1544 | 6700 | 1.6229 | -3.5545 | -3.8978 | 0.6217 | 0.3433 | -1.9489 | -1.7773 | -3.4817 | -3.4854 |
|
130 |
+
| 1.5459 | 1.1716 | 6800 | 1.6222 | -3.5671 | -3.9117 | 0.6224 | 0.3446 | -1.9559 | -1.7836 | -3.4770 | -3.4807 |
|
131 |
+
| 1.5606 | 1.1888 | 6900 | 1.6225 | -3.5758 | -3.9205 | 0.6236 | 0.3447 | -1.9603 | -1.7879 | -3.4844 | -3.4880 |
|
132 |
+
| 1.5876 | 1.2061 | 7000 | 1.6218 | -3.5845 | -3.9305 | 0.6217 | 0.3461 | -1.9653 | -1.7922 | -3.4732 | -3.4769 |
|
133 |
+
| 1.6316 | 1.2233 | 7100 | 1.6215 | -3.5825 | -3.9294 | 0.6222 | 0.3469 | -1.9647 | -1.7912 | -3.4741 | -3.4778 |
|
134 |
+
| 1.5 | 1.2405 | 7200 | 1.6215 | -3.5921 | -3.9396 | 0.6197 | 0.3474 | -1.9698 | -1.7961 | -3.4708 | -3.4745 |
|
135 |
+
| 1.5617 | 1.2578 | 7300 | 1.6209 | -3.6028 | -3.9514 | 0.6204 | 0.3486 | -1.9757 | -1.8014 | -3.4715 | -3.4752 |
|
136 |
+
| 1.5496 | 1.2750 | 7400 | 1.6206 | -3.6103 | -3.9596 | 0.6241 | 0.3493 | -1.9798 | -1.8051 | -3.4777 | -3.4813 |
|
137 |
+
| 1.5583 | 1.2922 | 7500 | 1.6202 | -3.6155 | -3.9654 | 0.6229 | 0.3499 | -1.9827 | -1.8078 | -3.4664 | -3.4700 |
|
138 |
+
| 1.5182 | 1.3094 | 7600 | 1.6203 | -3.6189 | -3.9689 | 0.6222 | 0.3501 | -1.9845 | -1.8094 | -3.4741 | -3.4777 |
|
139 |
+
| 1.5097 | 1.3267 | 7700 | 1.6201 | -3.6266 | -3.9775 | 0.6222 | 0.3509 | -1.9888 | -1.8133 | -3.4660 | -3.4696 |
|
140 |
+
| 1.5902 | 1.3439 | 7800 | 1.6199 | -3.6302 | -3.9819 | 0.6227 | 0.3517 | -1.9909 | -1.8151 | -3.4639 | -3.4676 |
|
141 |
+
| 1.633 | 1.3611 | 7900 | 1.6197 | -3.6336 | -3.9856 | 0.6213 | 0.3521 | -1.9928 | -1.8168 | -3.4621 | -3.4657 |
|
142 |
+
| 1.7 | 1.3784 | 8000 | 1.6187 | -3.6382 | -3.9919 | 0.6217 | 0.3537 | -1.9960 | -1.8191 | -3.4637 | -3.4673 |
|
143 |
+
| 1.5817 | 1.3956 | 8100 | 1.6185 | -3.6409 | -3.9947 | 0.6215 | 0.3538 | -1.9974 | -1.8205 | -3.4661 | -3.4698 |
|
144 |
+
| 1.4193 | 1.4128 | 8200 | 1.6182 | -3.6471 | -4.0019 | 0.6213 | 0.3547 | -2.0009 | -1.8236 | -3.4612 | -3.4649 |
|
145 |
+
| 1.6206 | 1.4300 | 8300 | 1.6177 | -3.6539 | -4.0098 | 0.6227 | 0.3559 | -2.0049 | -1.8269 | -3.4538 | -3.4575 |
|
146 |
+
| 1.5122 | 1.4473 | 8400 | 1.6177 | -3.6592 | -4.0154 | 0.6222 | 0.3562 | -2.0077 | -1.8296 | -3.4662 | -3.4698 |
|
147 |
+
| 1.5508 | 1.4645 | 8500 | 1.6178 | -3.6595 | -4.0151 | 0.6211 | 0.3556 | -2.0075 | -1.8297 | -3.4624 | -3.4660 |
|
148 |
+
| 1.5254 | 1.4817 | 8600 | 1.6180 | -3.6624 | -4.0184 | 0.6217 | 0.3560 | -2.0092 | -1.8312 | -3.4561 | -3.4597 |
|
149 |
+
| 1.5461 | 1.4990 | 8700 | 1.6176 | -3.6670 | -4.0237 | 0.6213 | 0.3567 | -2.0119 | -1.8335 | -3.4528 | -3.4564 |
|
150 |
+
| 1.4625 | 1.5162 | 8800 | 1.6176 | -3.6695 | -4.0267 | 0.6217 | 0.3572 | -2.0133 | -1.8347 | -3.4523 | -3.4560 |
|
151 |
+
| 1.5134 | 1.5334 | 8900 | 1.6171 | -3.6762 | -4.0342 | 0.6208 | 0.3580 | -2.0171 | -1.8381 | -3.4520 | -3.4557 |
|
152 |
+
| 1.5758 | 1.5507 | 9000 | 1.6168 | -3.6792 | -4.0379 | 0.6224 | 0.3587 | -2.0189 | -1.8396 | -3.4560 | -3.4597 |
|
153 |
+
| 1.6555 | 1.5679 | 9100 | 1.6169 | -3.6817 | -4.0402 | 0.6227 | 0.3585 | -2.0201 | -1.8408 | -3.4477 | -3.4514 |
|
154 |
+
| 1.5434 | 1.5851 | 9200 | 1.6171 | -3.6837 | -4.0421 | 0.6224 | 0.3584 | -2.0211 | -1.8419 | -3.4539 | -3.4575 |
|
155 |
+
| 1.6069 | 1.6023 | 9300 | 1.6168 | -3.6835 | -4.0426 | 0.6222 | 0.3592 | -2.0213 | -1.8417 | -3.4494 | -3.4530 |
|
156 |
+
| 1.5762 | 1.6196 | 9400 | 1.6165 | -3.6858 | -4.0456 | 0.6227 | 0.3598 | -2.0228 | -1.8429 | -3.4443 | -3.4479 |
|
157 |
+
| 1.5365 | 1.6368 | 9500 | 1.6166 | -3.6864 | -4.0459 | 0.6213 | 0.3595 | -2.0229 | -1.8432 | -3.4545 | -3.4581 |
|
158 |
+
| 1.5801 | 1.6540 | 9600 | 1.6168 | -3.6866 | -4.0459 | 0.6217 | 0.3593 | -2.0229 | -1.8433 | -3.4497 | -3.4533 |
|
159 |
+
| 1.4796 | 1.6713 | 9700 | 1.6170 | -3.6875 | -4.0464 | 0.6224 | 0.3588 | -2.0232 | -1.8438 | -3.4553 | -3.4589 |
|
160 |
+
| 1.384 | 1.6885 | 9800 | 1.6169 | -3.6887 | -4.0481 | 0.6229 | 0.3593 | -2.0240 | -1.8444 | -3.4448 | -3.4484 |
|
161 |
+
| 1.4182 | 1.7057 | 9900 | 1.6171 | -3.6889 | -4.0481 | 0.6217 | 0.3592 | -2.0241 | -1.8445 | -3.4585 | -3.4620 |
|
162 |
+
| 1.4467 | 1.7229 | 10000 | 1.6163 | -3.6896 | -4.0499 | 0.6220 | 0.3602 | -2.0249 | -1.8448 | -3.4488 | -3.4525 |
|
163 |
+
| 1.5786 | 1.7402 | 10100 | 1.6163 | -3.6910 | -4.0511 | 0.6220 | 0.3601 | -2.0256 | -1.8455 | -3.4456 | -3.4492 |
|
164 |
+
| 1.5566 | 1.7574 | 10200 | 1.6168 | -3.6935 | -4.0531 | 0.6215 | 0.3596 | -2.0265 | -1.8468 | -3.4598 | -3.4634 |
|
165 |
+
| 1.6336 | 1.7746 | 10300 | 1.6165 | -3.6933 | -4.0538 | 0.6220 | 0.3604 | -2.0269 | -1.8467 | -3.4436 | -3.4473 |
|
166 |
+
| 1.3869 | 1.7919 | 10400 | 1.6163 | -3.6937 | -4.0540 | 0.6217 | 0.3603 | -2.0270 | -1.8468 | -3.4476 | -3.4513 |
|
167 |
+
| 1.5501 | 1.8091 | 10500 | 1.6168 | -3.6947 | -4.0545 | 0.6231 | 0.3598 | -2.0273 | -1.8473 | -3.4492 | -3.4528 |
|
168 |
+
| 1.6115 | 1.8263 | 10600 | 1.6158 | -3.6930 | -4.0539 | 0.6213 | 0.3609 | -2.0270 | -1.8465 | -3.4452 | -3.4489 |
|
169 |
+
| 1.5153 | 1.8436 | 10700 | 1.6167 | -3.6940 | -4.0542 | 0.6213 | 0.3602 | -2.0271 | -1.8470 | -3.4421 | -3.4457 |
|
170 |
+
| 1.4252 | 1.8608 | 10800 | 1.6158 | -3.6932 | -4.0544 | 0.6215 | 0.3612 | -2.0272 | -1.8466 | -3.4450 | -3.4486 |
|
171 |
+
| 1.5627 | 1.8780 | 10900 | 1.6163 | -3.6941 | -4.0546 | 0.6224 | 0.3604 | -2.0273 | -1.8471 | -3.4576 | -3.4612 |
|
172 |
+
| 1.6357 | 1.8952 | 11000 | 1.6164 | -3.6944 | -4.0550 | 0.6224 | 0.3605 | -2.0275 | -1.8472 | -3.4500 | -3.4537 |
|
173 |
+
| 1.5558 | 1.9125 | 11100 | 1.6165 | -3.6941 | -4.0540 | 0.6213 | 0.3599 | -2.0270 | -1.8471 | -3.4450 | -3.4486 |
|
174 |
+
| 1.4591 | 1.9297 | 11200 | 1.6165 | -3.6936 | -4.0539 | 0.6227 | 0.3602 | -2.0269 | -1.8468 | -3.4470 | -3.4506 |
|
175 |
+
| 1.6996 | 1.9469 | 11300 | 1.6165 | -3.6938 | -4.0538 | 0.6220 | 0.3600 | -2.0269 | -1.8469 | -3.4578 | -3.4614 |
|
176 |
+
| 1.5312 | 1.9642 | 11400 | 1.6169 | -3.6941 | -4.0537 | 0.6211 | 0.3596 | -2.0268 | -1.8471 | -3.4501 | -3.4537 |
|
177 |
+
| 1.6372 | 1.9814 | 11500 | 1.6166 | -3.6950 | -4.0549 | 0.6222 | 0.3598 | -2.0274 | -1.8475 | -3.4421 | -3.4458 |
|
178 |
+
| 1.4693 | 1.9986 | 11600 | 1.6162 | -3.6947 | -4.0557 | 0.6224 | 0.3609 | -2.0278 | -1.8474 | -3.4422 | -3.4458 |
|
179 |
+
|
180 |
+
|
181 |
+
### Framework versions
|
182 |
+
|
183 |
+
- Transformers 4.41.2
|
184 |
+
- Pytorch 2.1.2
|
185 |
+
- Datasets 2.20.0
|
186 |
+
- Tokenizers 0.19.1
|
all_results.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"epoch": 2.0,
|
3 |
+
"total_flos": 0.0,
|
4 |
+
"train_loss": 1.6182483464356212,
|
5 |
+
"train_runtime": 54021.947,
|
6 |
+
"train_samples": 92858,
|
7 |
+
"train_samples_per_second": 3.438,
|
8 |
+
"train_steps_per_second": 0.215
|
9 |
+
}
|
generation_config.json
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bos_token_id": 1,
|
3 |
+
"eos_token_id": 2,
|
4 |
+
"max_length": 2048,
|
5 |
+
"pad_token_id": 0,
|
6 |
+
"transformers_version": "4.41.2"
|
7 |
+
}
|
model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 2200119864
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:45772ff1f74fd9f9902dea120752923cfff600e5dff6c3bdc555b9ce83992c90
|
3 |
size 2200119864
|
runs/Jul03_20-28-56_poseidon/events.out.tfevents.1720039159.poseidon.1491106.0
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a2e622ce1d7414bf4e9d33083e6fde33901c83c7126389cd391f2cde0ea31665
|
3 |
+
size 889880
|
train_results.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"epoch": 2.0,
|
3 |
+
"total_flos": 0.0,
|
4 |
+
"train_loss": 1.6182483464356212,
|
5 |
+
"train_runtime": 54021.947,
|
6 |
+
"train_samples": 92858,
|
7 |
+
"train_samples_per_second": 3.438,
|
8 |
+
"train_steps_per_second": 0.215
|
9 |
+
}
|
trainer_state.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|