happii commited on
Commit
73b1e51
1 Parent(s): c476ca2

Model save

Browse files
README.md CHANGED
@@ -15,15 +15,15 @@ should probably proofread and complete it, then remove this comment. -->
15
 
16
  This model was trained from scratch on an unknown dataset.
17
  It achieves the following results on the evaluation set:
18
- - Loss: 0.5130
19
- - Rewards/chosen: -1.1164
20
- - Rewards/rejected: -2.0177
21
- - Rewards/accuracies: 0.7679
22
- - Rewards/margins: 0.9013
23
- - Logps/rejected: -467.9212
24
- - Logps/chosen: -403.3655
25
- - Logits/rejected: 2.9275
26
- - Logits/chosen: 2.3129
27
 
28
  ## Model description
29
 
@@ -60,15 +60,15 @@ The following hyperparameters were used during training:
60
 
61
  | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
62
  |:-------------:|:------:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
63
- | 0.6527 | 0.1047 | 100 | 0.6527 | -0.0179 | -0.1219 | 0.7004 | 0.1039 | -278.3384 | -293.5221 | -2.3907 | -2.4541 |
64
- | 0.5965 | 0.2093 | 200 | 0.5949 | -0.2807 | -0.6672 | 0.7103 | 0.3865 | -332.8751 | -319.8004 | -1.7023 | -1.8206 |
65
- | 0.5658 | 0.3140 | 300 | 0.5555 | -0.7667 | -1.4014 | 0.7659 | 0.6347 | -406.2905 | -368.3990 | -1.3520 | -1.5126 |
66
- | 0.5589 | 0.4186 | 400 | 0.5487 | -0.5384 | -1.1710 | 0.7440 | 0.6326 | -383.2521 | -345.5683 | -1.1235 | -1.3051 |
67
- | 0.5239 | 0.5233 | 500 | 0.5286 | -0.9972 | -1.8237 | 0.7560 | 0.8264 | -448.5180 | -391.4498 | 1.9714 | 1.5219 |
68
- | 0.4866 | 0.6279 | 600 | 0.5203 | -1.0253 | -1.8795 | 0.7520 | 0.8543 | -454.1069 | -394.2553 | 2.1912 | 1.6721 |
69
- | 0.4883 | 0.7326 | 700 | 0.5170 | -1.0794 | -1.9670 | 0.7718 | 0.8876 | -462.8506 | -399.6694 | 2.4789 | 1.8789 |
70
- | 0.5062 | 0.8373 | 800 | 0.5139 | -1.1688 | -2.0662 | 0.7679 | 0.8974 | -472.7746 | -408.6074 | 2.9371 | 2.3343 |
71
- | 0.4881 | 0.9419 | 900 | 0.5130 | -1.1164 | -2.0177 | 0.7679 | 0.9013 | -467.9212 | -403.3655 | 2.9275 | 2.3129 |
72
 
73
 
74
  ### Framework versions
 
15
 
16
  This model was trained from scratch on an unknown dataset.
17
  It achieves the following results on the evaluation set:
18
+ - Loss: 0.5152
19
+ - Rewards/chosen: -1.1647
20
+ - Rewards/rejected: -2.0467
21
+ - Rewards/accuracies: 0.7599
22
+ - Rewards/margins: 0.8821
23
+ - Logps/rejected: -470.8255
24
+ - Logps/chosen: -408.1943
25
+ - Logits/rejected: 1.9026
26
+ - Logits/chosen: 1.3480
27
 
28
  ## Model description
29
 
 
60
 
61
  | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
62
  |:-------------:|:------:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
63
+ | 0.6509 | 0.1047 | 100 | 0.6507 | -0.0317 | -0.1403 | 0.6865 | 0.1086 | -280.1782 | -294.8975 | -2.3950 | -2.4570 |
64
+ | 0.5842 | 0.2093 | 200 | 0.5869 | -0.5628 | -1.0165 | 0.7183 | 0.4537 | -367.8032 | -348.0123 | -1.8883 | -1.9913 |
65
+ | 0.5518 | 0.3140 | 300 | 0.5573 | -0.7616 | -1.3682 | 0.7421 | 0.6066 | -402.9727 | -367.8922 | -1.4401 | -1.5902 |
66
+ | 0.5596 | 0.4186 | 400 | 0.5451 | -0.5483 | -1.1855 | 0.7579 | 0.6373 | -384.7072 | -346.5541 | -1.3655 | -1.5387 |
67
+ | 0.5352 | 0.5233 | 500 | 0.5309 | -0.9240 | -1.6733 | 0.7440 | 0.7493 | -433.4807 | -384.1307 | -0.0937 | -0.4008 |
68
+ | 0.4998 | 0.6279 | 600 | 0.5225 | -1.0419 | -1.8718 | 0.7401 | 0.8299 | -453.3297 | -395.9146 | 1.1206 | 0.6984 |
69
+ | 0.4936 | 0.7326 | 700 | 0.5196 | -1.2088 | -2.0934 | 0.7540 | 0.8846 | -475.4923 | -412.6063 | 1.9631 | 1.4184 |
70
+ | 0.5125 | 0.8373 | 800 | 0.5171 | -1.1926 | -2.0718 | 0.7659 | 0.8791 | -473.3307 | -410.9924 | 1.8737 | 1.3314 |
71
+ | 0.4976 | 0.9419 | 900 | 0.5152 | -1.1647 | -2.0467 | 0.7599 | 0.8821 | -470.8255 | -408.1943 | 1.9026 | 1.3480 |
72
 
73
 
74
  ### Framework versions
all_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 0.9994767137624281,
3
  "total_flos": 0.0,
4
- "train_loss": 0.5488577563101085,
5
- "train_runtime": 17633.9629,
6
  "train_samples": 61134,
7
- "train_samples_per_second": 3.467,
8
- "train_steps_per_second": 0.054
9
  }
 
1
  {
2
  "epoch": 0.9994767137624281,
3
  "total_flos": 0.0,
4
+ "train_loss": 0.5508741353818883,
5
+ "train_runtime": 17482.7224,
6
  "train_samples": 61134,
7
+ "train_samples_per_second": 3.497,
8
+ "train_steps_per_second": 0.055
9
  }
model-00001-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1576c535b0fb5bd22bcc4c79de501f22d38f1f7fa4d6dc93d58f1ae78bba7b10
3
  size 4943162336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:57c0b665536374df8d97dec578ffef75e2cb9f9d567136165b4105f2a912e10e
3
  size 4943162336
model-00002-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c7c90d8a0cf3f814bd61563b9a40e4a35f26b7b18d4d26a83ff60dad9bc91d61
3
  size 4999819336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:04bb2bbcf195b1b2d0a0c3986e6922028f8e45a9d86eaa99f739a4f59caa33ee
3
  size 4999819336
model-00003-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1ef11bc6688dc6858fe9b88faab965db852fd6422c00671729bf2c1e5c96bb59
3
  size 4540516344
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ee9c6d3d3822933b345771cf18bfa1f8b0b225b386642f0efd6b2b12ad53392
3
  size 4540516344
runs/May13_18-55-17_ubuntu/events.out.tfevents.1715626955.ubuntu.338424.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b5d6d87aa9054bb0a2cba0fb96fe0931eee0d479e8a6ccbfacf93c24d3338539
3
- size 73964
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a88cedc8a0647d40a599033e9ae6016890ba163b9ed43292f0aa3becd5a8c850
3
+ size 77758
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 0.9994767137624281,
3
  "total_flos": 0.0,
4
- "train_loss": 0.5488577563101085,
5
- "train_runtime": 17633.9629,
6
  "train_samples": 61134,
7
- "train_samples_per_second": 3.467,
8
- "train_steps_per_second": 0.054
9
  }
 
1
  {
2
  "epoch": 0.9994767137624281,
3
  "total_flos": 0.0,
4
+ "train_loss": 0.5508741353818883,
5
+ "train_runtime": 17482.7224,
6
  "train_samples": 61134,
7
+ "train_samples_per_second": 3.497,
8
+ "train_steps_per_second": 0.055
9
  }
trainer_state.json CHANGED
The diff for this file is too large to render. See raw diff