AmberYifan commited on
Commit
465603a
1 Parent(s): 89a8682

Model save

Browse files
README.md CHANGED
@@ -15,15 +15,15 @@ should probably proofread and complete it, then remove this comment. -->
15
 
16
  This model is a fine-tuned version of [Qwen/Qwen2.5-7B](https://huggingface.co/Qwen/Qwen2.5-7B) on an unknown dataset.
17
  It achieves the following results on the evaluation set:
18
- - Loss: 0.1366
19
- - Rewards/real: 7.8246
20
- - Rewards/generated: -2.3247
21
  - Rewards/accuracies: 1.0
22
- - Rewards/margins: 10.1492
23
- - Logps/generated: -263.3269
24
- - Logps/real: -127.7119
25
- - Logits/generated: -0.9835
26
- - Logits/real: -0.6921
27
 
28
  ## Model description
29
 
@@ -60,7 +60,7 @@ The following hyperparameters were used during training:
60
 
61
  | Training Loss | Epoch | Step | Validation Loss | Rewards/real | Rewards/generated | Rewards/accuracies | Rewards/margins | Logps/generated | Logps/real | Logits/generated | Logits/real |
62
  |:-------------:|:------:|:----:|:---------------:|:------------:|:-----------------:|:------------------:|:---------------:|:---------------:|:----------:|:----------------:|:-----------:|
63
- | 0.1463 | 0.6410 | 50 | 0.1366 | 7.8246 | -2.3247 | 1.0 | 10.1492 | -263.3269 | -127.7119 | -0.9835 | -0.6921 |
64
 
65
 
66
  ### Framework versions
 
15
 
16
  This model is a fine-tuned version of [Qwen/Qwen2.5-7B](https://huggingface.co/Qwen/Qwen2.5-7B) on an unknown dataset.
17
  It achieves the following results on the evaluation set:
18
+ - Loss: 0.1369
19
+ - Rewards/real: 7.7762
20
+ - Rewards/generated: -2.5293
21
  - Rewards/accuracies: 1.0
22
+ - Rewards/margins: 10.3055
23
+ - Logps/generated: -265.3735
24
+ - Logps/real: -128.1957
25
+ - Logits/generated: -0.9822
26
+ - Logits/real: -0.7243
27
 
28
  ## Model description
29
 
 
60
 
61
  | Training Loss | Epoch | Step | Validation Loss | Rewards/real | Rewards/generated | Rewards/accuracies | Rewards/margins | Logps/generated | Logps/real | Logits/generated | Logits/real |
62
  |:-------------:|:------:|:----:|:---------------:|:------------:|:-----------------:|:------------------:|:---------------:|:---------------:|:----------:|:----------------:|:-----------:|
63
+ | 0.1463 | 0.6410 | 50 | 0.1369 | 7.7762 | -2.5293 | 1.0 | 10.3055 | -265.3735 | -128.1957 | -0.9822 | -0.7243 |
64
 
65
 
66
  ### Framework versions
all_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 1.0,
3
  "total_flos": 0.0,
4
- "train_loss": 0.2174399999471811,
5
- "train_runtime": 1268.5918,
6
  "train_samples": 2484,
7
- "train_samples_per_second": 1.958,
8
- "train_steps_per_second": 0.061
9
  }
 
1
  {
2
  "epoch": 1.0,
3
  "total_flos": 0.0,
4
+ "train_loss": 0.21996817527673182,
5
+ "train_runtime": 1475.9075,
6
  "train_samples": 2484,
7
+ "train_samples_per_second": 1.683,
8
+ "train_steps_per_second": 0.053
9
  }
model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c1f8fe80a90ed968b51cf2b2acdf8e1374d7a5083a3b2d825cbd4b7df31ce9bd
3
  size 4877660776
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba4621422fcaa408b43c7e596d25660518be4fe007afc8a9f25eca1b67638b7a
3
  size 4877660776
model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3f2e1cfcef2525458b91722ea4094b7b9cb732b90fb8f764139d7857db8725ee
3
  size 4932751008
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:73baa311f2049027e0467a484b08689e8f299c29325caf8b332cff245e9a3795
3
  size 4932751008
model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7c12526a5353c6624fcbc0f507bedafa96f01e46949884d705cf4466838fbf3c
3
  size 4330865200
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c3ef277ef8ac9dfe10c2ed2e7f4f7311f5f0b0d4fef8f8322a168309c89c6c6
3
  size 4330865200
model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7e54b3e79abf32e06bf2cb5b5a5e8445e07215071711fa58308d71c1c0ccee30
3
  size 1089994880
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:11bffdf067690c282a41c2df69394f808572d595cca1b5cb5c1015eaec2f9139
3
  size 1089994880
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 1.0,
3
  "total_flos": 0.0,
4
- "train_loss": 0.2174399999471811,
5
- "train_runtime": 1268.5918,
6
  "train_samples": 2484,
7
- "train_samples_per_second": 1.958,
8
- "train_steps_per_second": 0.061
9
  }
 
1
  {
2
  "epoch": 1.0,
3
  "total_flos": 0.0,
4
+ "train_loss": 0.21996817527673182,
5
+ "train_runtime": 1475.9075,
6
  "train_samples": 2484,
7
+ "train_samples_per_second": 1.683,
8
+ "train_steps_per_second": 0.053
9
  }
trainer_state.json CHANGED
@@ -10,7 +10,7 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.01282051282051282,
13
- "grad_norm": 169.85406133026603,
14
  "learning_rate": 6.25e-08,
15
  "logits/generated": -1.1294811964035034,
16
  "logits/real": -0.7686214447021484,
@@ -25,133 +25,133 @@
25
  },
26
  {
27
  "epoch": 0.1282051282051282,
28
- "grad_norm": 63.23414694225829,
29
  "learning_rate": 4.857142857142857e-07,
30
- "logits/generated": -0.9032017588615417,
31
- "logits/real": -0.9559243321418762,
32
- "logps/generated": -279.2696838378906,
33
- "logps/real": -224.7206268310547,
34
- "loss": 0.6755,
35
- "rewards/accuracies": 0.8333333134651184,
36
- "rewards/generated": -0.39545938372612,
37
- "rewards/margins": 0.8560593724250793,
38
- "rewards/real": 0.4606000483036041,
39
  "step": 10
40
  },
41
  {
42
  "epoch": 0.2564102564102564,
43
- "grad_norm": 4.693240657102186,
44
  "learning_rate": 4.142857142857143e-07,
45
- "logits/generated": -0.951930820941925,
46
- "logits/real": -0.8856027722358704,
47
- "logps/generated": -284.35430908203125,
48
- "logps/real": -156.9384002685547,
49
- "loss": 0.1771,
50
  "rewards/accuracies": 1.0,
51
- "rewards/generated": -1.183829426765442,
52
- "rewards/margins": 5.8197126388549805,
53
- "rewards/real": 4.635883808135986,
54
  "step": 20
55
  },
56
  {
57
  "epoch": 0.38461538461538464,
58
- "grad_norm": 1.1912756226499355,
59
  "learning_rate": 3.4285714285714286e-07,
60
- "logits/generated": -0.5055528283119202,
61
- "logits/real": -0.7188040018081665,
62
- "logps/generated": -301.6423645019531,
63
- "logps/real": -140.4794921875,
64
- "loss": 0.1408,
65
  "rewards/accuracies": 1.0,
66
- "rewards/generated": -2.185486316680908,
67
- "rewards/margins": 9.369997024536133,
68
- "rewards/real": 7.184512138366699,
69
  "step": 30
70
  },
71
  {
72
  "epoch": 0.5128205128205128,
73
- "grad_norm": 1.2464411282623236,
74
  "learning_rate": 2.714285714285714e-07,
75
- "logits/generated": -0.5057858228683472,
76
- "logits/real": -0.5780868530273438,
77
- "logps/generated": -299.8144226074219,
78
- "logps/real": -154.14199829101562,
79
- "loss": 0.1457,
80
  "rewards/accuracies": 1.0,
81
- "rewards/generated": -2.8365063667297363,
82
- "rewards/margins": 10.34560489654541,
83
- "rewards/real": 7.509098052978516,
84
  "step": 40
85
  },
86
  {
87
  "epoch": 0.6410256410256411,
88
- "grad_norm": 1.0548152399288286,
89
  "learning_rate": 2e-07,
90
- "logits/generated": -0.38232049345970154,
91
- "logits/real": -0.6496118307113647,
92
- "logps/generated": -303.3151550292969,
93
- "logps/real": -143.59616088867188,
94
  "loss": 0.1463,
95
  "rewards/accuracies": 1.0,
96
- "rewards/generated": -2.8232903480529785,
97
- "rewards/margins": 10.742189407348633,
98
- "rewards/real": 7.9188995361328125,
99
  "step": 50
100
  },
101
  {
102
  "epoch": 0.6410256410256411,
103
- "eval_logits/generated": -0.9835317730903625,
104
- "eval_logits/real": -0.6921402812004089,
105
- "eval_logps/generated": -263.3268737792969,
106
- "eval_logps/real": -127.7119369506836,
107
- "eval_loss": 0.13658176362514496,
108
  "eval_rewards/accuracies": 1.0,
109
- "eval_rewards/generated": -2.3246724605560303,
110
- "eval_rewards/margins": 10.149224281311035,
111
- "eval_rewards/real": 7.824552059173584,
112
- "eval_runtime": 10.8908,
113
- "eval_samples_per_second": 4.591,
114
- "eval_steps_per_second": 0.367,
115
  "step": 50
116
  },
117
  {
118
  "epoch": 0.7692307692307693,
119
- "grad_norm": 1.1141881809347227,
120
  "learning_rate": 1.2857142857142855e-07,
121
- "logits/generated": -0.503508448600769,
122
- "logits/real": -0.6755964159965515,
123
- "logps/generated": -294.5934143066406,
124
- "logps/real": -133.47311401367188,
125
- "loss": 0.1358,
126
  "rewards/accuracies": 1.0,
127
- "rewards/generated": -3.0683863162994385,
128
- "rewards/margins": 10.883561134338379,
129
- "rewards/real": 7.815173149108887,
130
  "step": 60
131
  },
132
  {
133
  "epoch": 0.8974358974358975,
134
- "grad_norm": 1.0596728323269835,
135
  "learning_rate": 5.714285714285714e-08,
136
- "logits/generated": -0.6190778017044067,
137
- "logits/real": -0.6327217221260071,
138
- "logps/generated": -289.4383850097656,
139
- "logps/real": -148.02481079101562,
140
- "loss": 0.145,
141
  "rewards/accuracies": 1.0,
142
- "rewards/generated": -2.6634089946746826,
143
- "rewards/margins": 10.635811805725098,
144
- "rewards/real": 7.972402095794678,
145
  "step": 70
146
  },
147
  {
148
  "epoch": 1.0,
149
  "step": 78,
150
  "total_flos": 0.0,
151
- "train_loss": 0.2174399999471811,
152
- "train_runtime": 1268.5918,
153
- "train_samples_per_second": 1.958,
154
- "train_steps_per_second": 0.061
155
  }
156
  ],
157
  "logging_steps": 10,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.01282051282051282,
13
+ "grad_norm": 169.86816716315738,
14
  "learning_rate": 6.25e-08,
15
  "logits/generated": -1.1294811964035034,
16
  "logits/real": -0.7686214447021484,
 
25
  },
26
  {
27
  "epoch": 0.1282051282051282,
28
+ "grad_norm": 81.55279578883227,
29
  "learning_rate": 4.857142857142857e-07,
30
+ "logits/generated": -0.9103206992149353,
31
+ "logits/real": -0.9607930779457092,
32
+ "logps/generated": -278.6439208984375,
33
+ "logps/real": -225.61936950683594,
34
+ "loss": 0.6932,
35
+ "rewards/accuracies": 0.8472222089767456,
36
+ "rewards/generated": -0.3328787088394165,
37
+ "rewards/margins": 0.7036054730415344,
38
+ "rewards/real": 0.3707267642021179,
39
  "step": 10
40
  },
41
  {
42
  "epoch": 0.2564102564102564,
43
+ "grad_norm": 7.080598136148484,
44
  "learning_rate": 4.142857142857143e-07,
45
+ "logits/generated": -0.9712547063827515,
46
+ "logits/real": -0.9375473260879517,
47
+ "logps/generated": -284.4731140136719,
48
+ "logps/real": -156.74594116210938,
49
+ "loss": 0.1789,
50
  "rewards/accuracies": 1.0,
51
+ "rewards/generated": -1.1957100629806519,
52
+ "rewards/margins": 5.850839138031006,
53
+ "rewards/real": 4.655129432678223,
54
  "step": 20
55
  },
56
  {
57
  "epoch": 0.38461538461538464,
58
+ "grad_norm": 1.0725410774049144,
59
  "learning_rate": 3.4285714285714286e-07,
60
+ "logits/generated": -0.5535954833030701,
61
+ "logits/real": -0.7542158365249634,
62
+ "logps/generated": -300.3318786621094,
63
+ "logps/real": -141.39231872558594,
64
+ "loss": 0.1418,
65
  "rewards/accuracies": 1.0,
66
+ "rewards/generated": -2.0544393062591553,
67
+ "rewards/margins": 9.147669792175293,
68
+ "rewards/real": 7.093230247497559,
69
  "step": 30
70
  },
71
  {
72
  "epoch": 0.5128205128205128,
73
+ "grad_norm": 1.1609685873740068,
74
  "learning_rate": 2.714285714285714e-07,
75
+ "logits/generated": -0.5164953470230103,
76
+ "logits/real": -0.5888045430183411,
77
+ "logps/generated": -299.4565734863281,
78
+ "logps/real": -154.54141235351562,
79
+ "loss": 0.1462,
80
  "rewards/accuracies": 1.0,
81
+ "rewards/generated": -2.800717353820801,
82
+ "rewards/margins": 10.269874572753906,
83
+ "rewards/real": 7.469156742095947,
84
  "step": 40
85
  },
86
  {
87
  "epoch": 0.6410256410256411,
88
+ "grad_norm": 1.1798404000846567,
89
  "learning_rate": 2e-07,
90
+ "logits/generated": -0.40408602356910706,
91
+ "logits/real": -0.677122950553894,
92
+ "logps/generated": -303.20843505859375,
93
+ "logps/real": -143.86439514160156,
94
  "loss": 0.1463,
95
  "rewards/accuracies": 1.0,
96
+ "rewards/generated": -2.8126168251037598,
97
+ "rewards/margins": 10.704693794250488,
98
+ "rewards/real": 7.8920769691467285,
99
  "step": 50
100
  },
101
  {
102
  "epoch": 0.6410256410256411,
103
+ "eval_logits/generated": -0.9821963906288147,
104
+ "eval_logits/real": -0.7243232131004333,
105
+ "eval_logps/generated": -265.37347412109375,
106
+ "eval_logps/real": -128.19570922851562,
107
+ "eval_loss": 0.13692454993724823,
108
  "eval_rewards/accuracies": 1.0,
109
+ "eval_rewards/generated": -2.5293374061584473,
110
+ "eval_rewards/margins": 10.305512428283691,
111
+ "eval_rewards/real": 7.776175022125244,
112
+ "eval_runtime": 15.6793,
113
+ "eval_samples_per_second": 3.189,
114
+ "eval_steps_per_second": 0.255,
115
  "step": 50
116
  },
117
  {
118
  "epoch": 0.7692307692307693,
119
+ "grad_norm": 1.1860788753919929,
120
  "learning_rate": 1.2857142857142855e-07,
121
+ "logits/generated": -0.5171874165534973,
122
+ "logits/real": -0.6936507225036621,
123
+ "logps/generated": -294.03045654296875,
124
+ "logps/real": -133.57363891601562,
125
+ "loss": 0.136,
126
  "rewards/accuracies": 1.0,
127
+ "rewards/generated": -3.012089252471924,
128
+ "rewards/margins": 10.81721019744873,
129
+ "rewards/real": 7.805120944976807,
130
  "step": 60
131
  },
132
  {
133
  "epoch": 0.8974358974358975,
134
+ "grad_norm": 1.1142769804706112,
135
  "learning_rate": 5.714285714285714e-08,
136
+ "logits/generated": -0.6302377581596375,
137
+ "logits/real": -0.6565964818000793,
138
+ "logps/generated": -289.1152038574219,
139
+ "logps/real": -148.23373413085938,
140
+ "loss": 0.1452,
141
  "rewards/accuracies": 1.0,
142
+ "rewards/generated": -2.631093740463257,
143
+ "rewards/margins": 10.58260440826416,
144
+ "rewards/real": 7.951510429382324,
145
  "step": 70
146
  },
147
  {
148
  "epoch": 1.0,
149
  "step": 78,
150
  "total_flos": 0.0,
151
+ "train_loss": 0.21996817527673182,
152
+ "train_runtime": 1475.9075,
153
+ "train_samples_per_second": 1.683,
154
+ "train_steps_per_second": 0.053
155
  }
156
  ],
157
  "logging_steps": 10,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c2c5f648b014f6c1acc157d81fa5f5fb4cf76525e7160b32b8fc67294a1a9a31
3
  size 6392
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d54fba7b1ccc66f5d916428efb0a621e64059933f1084988291ad80aa0201e7d
3
  size 6392