happii commited on
Commit
1613883
1 Parent(s): d37af92

Model save

Browse files
README.md CHANGED
@@ -15,15 +15,15 @@ should probably proofread and complete it, then remove this comment. -->
15
 
16
  This model was trained from scratch on an unknown dataset.
17
  It achieves the following results on the evaluation set:
18
- - Loss: 0.5152
19
- - Rewards/chosen: -1.1647
20
- - Rewards/rejected: -2.0467
21
- - Rewards/accuracies: 0.7599
22
- - Rewards/margins: 0.8821
23
- - Logps/rejected: -470.8255
24
- - Logps/chosen: -408.1943
25
- - Logits/rejected: 1.9026
26
- - Logits/chosen: 1.3480
27
 
28
  ## Model description
29
 
@@ -60,15 +60,15 @@ The following hyperparameters were used during training:
60
 
61
  | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
62
  |:-------------:|:------:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
63
- | 0.6509 | 0.1047 | 100 | 0.6507 | -0.0317 | -0.1403 | 0.6865 | 0.1086 | -280.1782 | -294.8975 | -2.3950 | -2.4570 |
64
- | 0.5842 | 0.2093 | 200 | 0.5869 | -0.5628 | -1.0165 | 0.7183 | 0.4537 | -367.8032 | -348.0123 | -1.8883 | -1.9913 |
65
- | 0.5518 | 0.3140 | 300 | 0.5573 | -0.7616 | -1.3682 | 0.7421 | 0.6066 | -402.9727 | -367.8922 | -1.4401 | -1.5902 |
66
- | 0.5596 | 0.4186 | 400 | 0.5451 | -0.5483 | -1.1855 | 0.7579 | 0.6373 | -384.7072 | -346.5541 | -1.3655 | -1.5387 |
67
- | 0.5352 | 0.5233 | 500 | 0.5309 | -0.9240 | -1.6733 | 0.7440 | 0.7493 | -433.4807 | -384.1307 | -0.0937 | -0.4008 |
68
- | 0.4998 | 0.6279 | 600 | 0.5225 | -1.0419 | -1.8718 | 0.7401 | 0.8299 | -453.3297 | -395.9146 | 1.1206 | 0.6984 |
69
- | 0.4936 | 0.7326 | 700 | 0.5196 | -1.2088 | -2.0934 | 0.7540 | 0.8846 | -475.4923 | -412.6063 | 1.9631 | 1.4184 |
70
- | 0.5125 | 0.8373 | 800 | 0.5171 | -1.1926 | -2.0718 | 0.7659 | 0.8791 | -473.3307 | -410.9924 | 1.8737 | 1.3314 |
71
- | 0.4976 | 0.9419 | 900 | 0.5152 | -1.1647 | -2.0467 | 0.7599 | 0.8821 | -470.8255 | -408.1943 | 1.9026 | 1.3480 |
72
 
73
 
74
  ### Framework versions
 
15
 
16
  This model was trained from scratch on an unknown dataset.
17
  It achieves the following results on the evaluation set:
18
+ - Loss: 0.5082
19
+ - Rewards/chosen: -1.1578
20
+ - Rewards/rejected: -2.0459
21
+ - Rewards/accuracies: 0.7639
22
+ - Rewards/margins: 0.8881
23
+ - Logps/rejected: -470.7423
24
+ - Logps/chosen: -407.5118
25
+ - Logits/rejected: 3.4043
26
+ - Logits/chosen: 2.7671
27
 
28
  ## Model description
29
 
 
60
 
61
  | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
62
  |:-------------:|:------:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
63
+ | 0.652 | 0.1047 | 100 | 0.6516 | -0.0197 | -0.1250 | 0.6905 | 0.1053 | -278.6530 | -293.7002 | -2.3910 | -2.4541 |
64
+ | 0.5817 | 0.2093 | 200 | 0.5833 | -0.8527 | -1.3403 | 0.7123 | 0.4876 | -400.1837 | -376.9992 | -1.5444 | -1.6837 |
65
+ | 0.5434 | 0.3140 | 300 | 0.5530 | -0.9620 | -1.6381 | 0.7460 | 0.6761 | -429.9622 | -387.9330 | -0.5465 | -0.7917 |
66
+ | 0.5601 | 0.4186 | 400 | 0.5357 | -0.8421 | -1.5059 | 0.7440 | 0.6638 | -416.7414 | -375.9344 | 1.0675 | 0.6506 |
67
+ | 0.523 | 0.5233 | 500 | 0.5214 | -1.0264 | -1.8394 | 0.7599 | 0.8130 | -450.0945 | -394.3706 | 2.7809 | 2.2498 |
68
+ | 0.4939 | 0.6279 | 600 | 0.5188 | -1.2174 | -2.0583 | 0.7599 | 0.8409 | -471.9797 | -413.4645 | 2.9773 | 2.3838 |
69
+ | 0.4934 | 0.7326 | 700 | 0.5118 | -1.2353 | -2.1356 | 0.7698 | 0.9003 | -479.7107 | -415.2548 | 3.3093 | 2.6735 |
70
+ | 0.4975 | 0.8373 | 800 | 0.5096 | -1.1525 | -2.0253 | 0.7679 | 0.8729 | -468.6864 | -406.9773 | 3.3466 | 2.7191 |
71
+ | 0.4913 | 0.9419 | 900 | 0.5082 | -1.1578 | -2.0459 | 0.7639 | 0.8881 | -470.7423 | -407.5118 | 3.4043 | 2.7671 |
72
 
73
 
74
  ### Framework versions
all_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 0.9994767137624281,
3
  "total_flos": 0.0,
4
- "train_loss": 0.5508741353818883,
5
- "train_runtime": 17482.7224,
6
  "train_samples": 61134,
7
- "train_samples_per_second": 3.497,
8
- "train_steps_per_second": 0.055
9
  }
 
1
  {
2
  "epoch": 0.9994767137624281,
3
  "total_flos": 0.0,
4
+ "train_loss": 0.546376849968396,
5
+ "train_runtime": 17587.6764,
6
  "train_samples": 61134,
7
+ "train_samples_per_second": 3.476,
8
+ "train_steps_per_second": 0.054
9
  }
model-00001-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:57c0b665536374df8d97dec578ffef75e2cb9f9d567136165b4105f2a912e10e
3
  size 4943162336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d200a77dccf726f41ed8cf92716de458b39171937491ae87a7b744fb8a87cd5
3
  size 4943162336
model-00002-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:04bb2bbcf195b1b2d0a0c3986e6922028f8e45a9d86eaa99f739a4f59caa33ee
3
  size 4999819336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a8a602d8ee31ff3d4aa54dbde4bf615ce6be798331993dfbb13ef6df327f7fe
3
  size 4999819336
model-00003-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6ee9c6d3d3822933b345771cf18bfa1f8b0b225b386642f0efd6b2b12ad53392
3
  size 4540516344
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:168ead9eb319b2d5b5fc7d0647e965f8cd2c8e73b6f479d3bf1b8f654f59aa37
3
  size 4540516344
runs/May14_01-12-20_ubuntu/events.out.tfevents.1715649577.ubuntu.370462.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:331ed0ff85668826a0a8951510c7bce3c0ccbf3bfc0b392d71bfa4935c6c17d8
3
- size 73964
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1743ef675a4704f73f0b63960df1c87ab3a0913f85b1332350ef40d2caddd564
3
+ size 77758
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 0.9994767137624281,
3
  "total_flos": 0.0,
4
- "train_loss": 0.5508741353818883,
5
- "train_runtime": 17482.7224,
6
  "train_samples": 61134,
7
- "train_samples_per_second": 3.497,
8
- "train_steps_per_second": 0.055
9
  }
 
1
  {
2
  "epoch": 0.9994767137624281,
3
  "total_flos": 0.0,
4
+ "train_loss": 0.546376849968396,
5
+ "train_runtime": 17587.6764,
6
  "train_samples": 61134,
7
+ "train_samples_per_second": 3.476,
8
+ "train_steps_per_second": 0.054
9
  }
trainer_state.json CHANGED
The diff for this file is too large to render. See raw diff