kaifanli commited on
Commit
fd7f69b
1 Parent(s): 40bcf1a

Training in progress, epoch 1

Browse files
all_results.json CHANGED
@@ -1,18 +1,28 @@
1
  {
2
- "epoch": 9.0,
3
- "eval_gen_len": 294.2,
4
- "eval_loss": 2.3246188163757324,
5
- "eval_rouge1": 24.0821,
6
- "eval_rouge2": 4.2604,
7
- "eval_rougeL": 12.4489,
8
- "eval_rougeLsum": 19.152,
9
- "eval_runtime": 24.5774,
10
- "eval_samples": 10,
11
- "eval_samples_per_second": 0.407,
12
- "eval_steps_per_second": 0.407,
13
- "train_loss": 2.465799797905816,
14
- "train_runtime": 251.0487,
15
- "train_samples": 10,
16
- "train_samples_per_second": 0.398,
17
- "train_steps_per_second": 0.398
 
 
 
 
 
 
 
 
 
 
18
  }
 
1
  {
2
+ "epoch": 4.0,
3
+ "eval_gen_len": 52.15822784810127,
4
+ "eval_loss": 2.7362189292907715,
5
+ "eval_rouge1": 11.716,
6
+ "eval_rouge2": 1.7738,
7
+ "eval_rougeL": 7.7212,
8
+ "eval_rougeLsum": 10.0982,
9
+ "eval_runtime": 334.5899,
10
+ "eval_samples": 632,
11
+ "eval_samples_per_second": 1.889,
12
+ "eval_steps_per_second": 1.889,
13
+ "predict_gen_len": 56.19065420560748,
14
+ "predict_loss": 2.9006059169769287,
15
+ "predict_rouge1": 15.964,
16
+ "predict_rouge2": 2.625,
17
+ "predict_rougeL": 10.5843,
18
+ "predict_rougeLsum": 13.4933,
19
+ "predict_runtime": 286.0012,
20
+ "predict_samples": 535,
21
+ "predict_samples_per_second": 1.871,
22
+ "predict_steps_per_second": 1.871,
23
+ "train_loss": 0.29059598513036994,
24
+ "train_runtime": 2646.0496,
25
+ "train_samples": 4332,
26
+ "train_samples_per_second": 8.186,
27
+ "train_steps_per_second": 8.186
28
  }
eval_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
- "epoch": 9.0,
3
- "eval_gen_len": 294.2,
4
- "eval_loss": 2.3246188163757324,
5
- "eval_rouge1": 24.0821,
6
- "eval_rouge2": 4.2604,
7
- "eval_rougeL": 12.4489,
8
- "eval_rougeLsum": 19.152,
9
- "eval_runtime": 24.5774,
10
- "eval_samples": 10,
11
- "eval_samples_per_second": 0.407,
12
- "eval_steps_per_second": 0.407
13
  }
 
1
  {
2
+ "epoch": 4.0,
3
+ "eval_gen_len": 52.15822784810127,
4
+ "eval_loss": 2.7362189292907715,
5
+ "eval_rouge1": 11.716,
6
+ "eval_rouge2": 1.7738,
7
+ "eval_rougeL": 7.7212,
8
+ "eval_rougeLsum": 10.0982,
9
+ "eval_runtime": 334.5899,
10
+ "eval_samples": 632,
11
+ "eval_samples_per_second": 1.889,
12
+ "eval_steps_per_second": 1.889
13
  }
generated_predictions.txt ADDED
The diff for this file is too large to render. See raw diff
 
predict_results.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "predict_gen_len": 56.19065420560748,
3
+ "predict_loss": 2.9006059169769287,
4
+ "predict_rouge1": 15.964,
5
+ "predict_rouge2": 2.625,
6
+ "predict_rougeL": 10.5843,
7
+ "predict_rougeLsum": 13.4933,
8
+ "predict_runtime": 286.0012,
9
+ "predict_samples": 535,
10
+ "predict_samples_per_second": 1.871,
11
+ "predict_steps_per_second": 1.871
12
+ }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2450fefe8d8e94363f116d7a43c8ac624dc7e942afc049c9ec6a9b19c3862eb0
3
  size 501807853
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c425cf9e4c3e2fd9195d6383648d5a87621a02b533e94839dda64a780b60cd14
3
  size 501807853
runs/Mar20_21-00-25_benihi/events.out.tfevents.1710939143.benihi ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:684a2b5f534fcaa6b48641a4561685287bb4101d795d7ce8c12a5215c785bb52
3
+ size 575
runs/Mar20_22-04-51_benihi/events.out.tfevents.1710939902.benihi ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb64818bd89d12ad01ab9195f7dda71db9cc3eb353c0275535837a9699a1b4a0
3
+ size 7433
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 9.0,
3
- "train_loss": 2.465799797905816,
4
- "train_runtime": 251.0487,
5
- "train_samples": 10,
6
- "train_samples_per_second": 0.398,
7
- "train_steps_per_second": 0.398
8
  }
 
1
  {
2
+ "epoch": 4.0,
3
+ "train_loss": 0.29059598513036994,
4
+ "train_runtime": 2646.0496,
5
+ "train_samples": 4332,
6
+ "train_samples_per_second": 8.186,
7
+ "train_steps_per_second": 8.186
8
  }
trainer_state.json CHANGED
@@ -1,147 +1,281 @@
1
  {
2
- "best_metric": 24.0821,
3
- "best_model_checkpoint": "saved/tobyoki-pairwise/bart-base-japanese/BaseModel/checkpoint-60",
4
- "epoch": 9.0,
5
- "eval_steps": 500,
6
- "global_step": 90,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  {
12
  "epoch": 1.0,
13
- "eval_gen_len": 300.0,
14
- "eval_loss": 2.7064690589904785,
15
- "eval_rouge1": 17.4135,
16
- "eval_rouge2": 1.8293,
17
- "eval_rougeL": 10.3284,
18
- "eval_rougeLsum": 13.7135,
19
- "eval_runtime": 24.7791,
20
- "eval_samples_per_second": 0.404,
21
- "eval_steps_per_second": 0.404,
22
- "step": 10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  },
24
  {
25
  "epoch": 2.0,
26
- "eval_gen_len": 300.0,
27
- "eval_loss": 2.4982924461364746,
28
- "eval_rouge1": 15.5531,
29
- "eval_rouge2": 1.4496,
30
- "eval_rougeL": 9.0138,
31
- "eval_rougeLsum": 12.8628,
32
- "eval_runtime": 24.9094,
33
- "eval_samples_per_second": 0.401,
34
- "eval_steps_per_second": 0.401,
35
- "step": 20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  },
37
  {
38
  "epoch": 3.0,
39
- "eval_gen_len": 300.0,
40
- "eval_loss": 2.418119192123413,
41
- "eval_rouge1": 17.7832,
42
- "eval_rouge2": 2.7959,
43
- "eval_rougeL": 9.969,
44
- "eval_rougeLsum": 13.5642,
45
- "eval_runtime": 25.6031,
46
- "eval_samples_per_second": 0.391,
47
- "eval_steps_per_second": 0.391,
48
- "step": 30
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
  },
50
  {
51
  "epoch": 4.0,
52
- "eval_gen_len": 300.0,
53
- "eval_loss": 2.3930418491363525,
54
- "eval_rouge1": 20.0103,
55
- "eval_rouge2": 2.9976,
56
- "eval_rougeL": 11.0143,
57
- "eval_rougeLsum": 15.4415,
58
- "eval_runtime": 24.7517,
59
- "eval_samples_per_second": 0.404,
60
- "eval_steps_per_second": 0.404,
61
- "step": 40
62
- },
63
- {
64
- "epoch": 5.0,
65
- "eval_gen_len": 300.0,
66
- "eval_loss": 2.332805871963501,
67
- "eval_rouge1": 20.5428,
68
- "eval_rouge2": 3.6354,
69
- "eval_rougeL": 11.3166,
70
- "eval_rougeLsum": 16.0252,
71
- "eval_runtime": 25.4485,
72
- "eval_samples_per_second": 0.393,
73
- "eval_steps_per_second": 0.393,
74
- "step": 50
75
- },
76
- {
77
- "epoch": 6.0,
78
- "eval_gen_len": 294.2,
79
- "eval_loss": 2.3246188163757324,
80
- "eval_rouge1": 24.0821,
81
- "eval_rouge2": 4.2604,
82
- "eval_rougeL": 12.4489,
83
- "eval_rougeLsum": 19.152,
84
- "eval_runtime": 25.6727,
85
- "eval_samples_per_second": 0.39,
86
- "eval_steps_per_second": 0.39,
87
- "step": 60
88
- },
89
- {
90
- "epoch": 7.0,
91
- "eval_gen_len": 300.0,
92
- "eval_loss": 2.3076653480529785,
93
- "eval_rouge1": 20.4387,
94
- "eval_rouge2": 3.8043,
95
- "eval_rougeL": 11.4571,
96
- "eval_rougeLsum": 16.7923,
97
- "eval_runtime": 24.5822,
98
- "eval_samples_per_second": 0.407,
99
- "eval_steps_per_second": 0.407,
100
- "step": 70
101
- },
102
- {
103
- "epoch": 8.0,
104
- "eval_gen_len": 300.0,
105
- "eval_loss": 2.305992603302002,
106
- "eval_rouge1": 22.0434,
107
- "eval_rouge2": 4.3648,
108
- "eval_rougeL": 12.386,
109
- "eval_rougeLsum": 18.3828,
110
- "eval_runtime": 25.0442,
111
- "eval_samples_per_second": 0.399,
112
- "eval_steps_per_second": 0.399,
113
- "step": 80
114
- },
115
- {
116
- "epoch": 9.0,
117
- "eval_gen_len": 289.1,
118
- "eval_loss": 2.302980661392212,
119
- "eval_rouge1": 20.3362,
120
- "eval_rouge2": 3.6623,
121
- "eval_rougeL": 10.9234,
122
- "eval_rougeLsum": 16.1471,
123
- "eval_runtime": 25.3281,
124
- "eval_samples_per_second": 0.395,
125
- "eval_steps_per_second": 0.395,
126
- "step": 90
127
- },
128
- {
129
- "epoch": 9.0,
130
- "step": 90,
131
- "total_flos": 18151534964736.0,
132
- "train_loss": 2.465799797905816,
133
- "train_runtime": 251.0487,
134
- "train_samples_per_second": 0.398,
135
- "train_steps_per_second": 0.398
136
  }
137
  ],
138
- "logging_steps": 500,
139
- "max_steps": 100,
140
- "num_input_tokens_seen": 0,
141
- "num_train_epochs": 10,
142
- "save_steps": 500,
143
- "total_flos": 18151534964736.0,
144
- "train_batch_size": 1,
145
  "trial_name": null,
146
  "trial_params": null
147
  }
 
1
  {
2
+ "best_metric": 11.716,
3
+ "best_model_checkpoint": "saved/tobyoki-pairwise/bart-base-japanese/BaseModel/checkpoint-4332",
4
+ "epoch": 4.0,
5
+ "global_step": 17328,
 
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
+ {
11
+ "epoch": 0.12,
12
+ "learning_rate": 4.8845798707294554e-05,
13
+ "loss": 2.1617,
14
+ "step": 500
15
+ },
16
+ {
17
+ "epoch": 0.23,
18
+ "learning_rate": 4.7691597414589107e-05,
19
+ "loss": 1.346,
20
+ "step": 1000
21
+ },
22
+ {
23
+ "epoch": 0.35,
24
+ "learning_rate": 4.653739612188366e-05,
25
+ "loss": 0.96,
26
+ "step": 1500
27
+ },
28
+ {
29
+ "epoch": 0.46,
30
+ "learning_rate": 4.538319482917821e-05,
31
+ "loss": 0.7184,
32
+ "step": 2000
33
+ },
34
+ {
35
+ "epoch": 0.58,
36
+ "learning_rate": 4.422899353647276e-05,
37
+ "loss": 0.5936,
38
+ "step": 2500
39
+ },
40
+ {
41
+ "epoch": 0.69,
42
+ "learning_rate": 4.3074792243767315e-05,
43
+ "loss": 0.4745,
44
+ "step": 3000
45
+ },
46
+ {
47
+ "epoch": 0.81,
48
+ "learning_rate": 4.192059095106187e-05,
49
+ "loss": 0.389,
50
+ "step": 3500
51
+ },
52
+ {
53
+ "epoch": 0.92,
54
+ "learning_rate": 4.076638965835642e-05,
55
+ "loss": 0.298,
56
+ "step": 4000
57
+ },
58
  {
59
  "epoch": 1.0,
60
+ "eval_gen_len": 52.15822784810127,
61
+ "eval_loss": 2.7362189292907715,
62
+ "eval_rouge1": 11.716,
63
+ "eval_rouge2": 1.7738,
64
+ "eval_rougeL": 7.7212,
65
+ "eval_rougeLsum": 10.0982,
66
+ "eval_runtime": 282.218,
67
+ "eval_samples_per_second": 2.239,
68
+ "eval_steps_per_second": 2.239,
69
+ "step": 4332
70
+ },
71
+ {
72
+ "epoch": 1.04,
73
+ "learning_rate": 3.961218836565097e-05,
74
+ "loss": 0.2931,
75
+ "step": 4500
76
+ },
77
+ {
78
+ "epoch": 1.15,
79
+ "learning_rate": 3.845798707294552e-05,
80
+ "loss": 0.27,
81
+ "step": 5000
82
+ },
83
+ {
84
+ "epoch": 1.27,
85
+ "learning_rate": 3.7303785780240075e-05,
86
+ "loss": 0.2255,
87
+ "step": 5500
88
+ },
89
+ {
90
+ "epoch": 1.39,
91
+ "learning_rate": 3.614958448753463e-05,
92
+ "loss": 0.1818,
93
+ "step": 6000
94
+ },
95
+ {
96
+ "epoch": 1.5,
97
+ "learning_rate": 3.499538319482918e-05,
98
+ "loss": 0.1736,
99
+ "step": 6500
100
+ },
101
+ {
102
+ "epoch": 1.62,
103
+ "learning_rate": 3.384118190212373e-05,
104
+ "loss": 0.1894,
105
+ "step": 7000
106
+ },
107
+ {
108
+ "epoch": 1.73,
109
+ "learning_rate": 3.2686980609418284e-05,
110
+ "loss": 0.1642,
111
+ "step": 7500
112
+ },
113
+ {
114
+ "epoch": 1.85,
115
+ "learning_rate": 3.1532779316712836e-05,
116
+ "loss": 0.1262,
117
+ "step": 8000
118
+ },
119
+ {
120
+ "epoch": 1.96,
121
+ "learning_rate": 3.0378578024007388e-05,
122
+ "loss": 0.1596,
123
+ "step": 8500
124
  },
125
  {
126
  "epoch": 2.0,
127
+ "eval_gen_len": 42.75791139240506,
128
+ "eval_loss": 3.1079843044281006,
129
+ "eval_rouge1": 11.6339,
130
+ "eval_rouge2": 1.9589,
131
+ "eval_rougeL": 8.2226,
132
+ "eval_rougeLsum": 9.5881,
133
+ "eval_runtime": 452.5754,
134
+ "eval_samples_per_second": 1.396,
135
+ "eval_steps_per_second": 1.396,
136
+ "step": 8664
137
+ },
138
+ {
139
+ "epoch": 2.08,
140
+ "learning_rate": 2.922437673130194e-05,
141
+ "loss": 0.1249,
142
+ "step": 9000
143
+ },
144
+ {
145
+ "epoch": 2.19,
146
+ "learning_rate": 2.8070175438596492e-05,
147
+ "loss": 0.1056,
148
+ "step": 9500
149
+ },
150
+ {
151
+ "epoch": 2.31,
152
+ "learning_rate": 2.6915974145891044e-05,
153
+ "loss": 0.0981,
154
+ "step": 10000
155
+ },
156
+ {
157
+ "epoch": 2.42,
158
+ "learning_rate": 2.5761772853185596e-05,
159
+ "loss": 0.1025,
160
+ "step": 10500
161
+ },
162
+ {
163
+ "epoch": 2.54,
164
+ "learning_rate": 2.460757156048015e-05,
165
+ "loss": 0.0777,
166
+ "step": 11000
167
+ },
168
+ {
169
+ "epoch": 2.65,
170
+ "learning_rate": 2.3453370267774704e-05,
171
+ "loss": 0.0849,
172
+ "step": 11500
173
+ },
174
+ {
175
+ "epoch": 2.77,
176
+ "learning_rate": 2.2299168975069256e-05,
177
+ "loss": 0.095,
178
+ "step": 12000
179
+ },
180
+ {
181
+ "epoch": 2.89,
182
+ "learning_rate": 2.1144967682363804e-05,
183
+ "loss": 0.0805,
184
+ "step": 12500
185
  },
186
  {
187
  "epoch": 3.0,
188
+ "eval_gen_len": 43.24683544303797,
189
+ "eval_loss": 3.2877912521362305,
190
+ "eval_rouge1": 11.3717,
191
+ "eval_rouge2": 2.0083,
192
+ "eval_rougeL": 7.8703,
193
+ "eval_rougeLsum": 9.6383,
194
+ "eval_runtime": 236.6502,
195
+ "eval_samples_per_second": 2.671,
196
+ "eval_steps_per_second": 2.671,
197
+ "step": 12996
198
+ },
199
+ {
200
+ "epoch": 3.0,
201
+ "learning_rate": 1.9990766389658356e-05,
202
+ "loss": 0.081,
203
+ "step": 13000
204
+ },
205
+ {
206
+ "epoch": 3.12,
207
+ "learning_rate": 1.883656509695291e-05,
208
+ "loss": 0.0562,
209
+ "step": 13500
210
+ },
211
+ {
212
+ "epoch": 3.23,
213
+ "learning_rate": 1.768236380424746e-05,
214
+ "loss": 0.0659,
215
+ "step": 14000
216
+ },
217
+ {
218
+ "epoch": 3.35,
219
+ "learning_rate": 1.6528162511542013e-05,
220
+ "loss": 0.0641,
221
+ "step": 14500
222
+ },
223
+ {
224
+ "epoch": 3.46,
225
+ "learning_rate": 1.5373961218836565e-05,
226
+ "loss": 0.0621,
227
+ "step": 15000
228
+ },
229
+ {
230
+ "epoch": 3.58,
231
+ "learning_rate": 1.4219759926131118e-05,
232
+ "loss": 0.0495,
233
+ "step": 15500
234
+ },
235
+ {
236
+ "epoch": 3.69,
237
+ "learning_rate": 1.306555863342567e-05,
238
+ "loss": 0.052,
239
+ "step": 16000
240
+ },
241
+ {
242
+ "epoch": 3.81,
243
+ "learning_rate": 1.1911357340720223e-05,
244
+ "loss": 0.0624,
245
+ "step": 16500
246
+ },
247
+ {
248
+ "epoch": 3.92,
249
+ "learning_rate": 1.0757156048014775e-05,
250
+ "loss": 0.0576,
251
+ "step": 17000
252
+ },
253
+ {
254
+ "epoch": 4.0,
255
+ "eval_gen_len": 43.36867088607595,
256
+ "eval_loss": 3.3865511417388916,
257
+ "eval_rouge1": 11.5579,
258
+ "eval_rouge2": 2.3617,
259
+ "eval_rougeL": 8.299,
260
+ "eval_rougeLsum": 9.8507,
261
+ "eval_runtime": 456.2669,
262
+ "eval_samples_per_second": 1.385,
263
+ "eval_steps_per_second": 1.385,
264
+ "step": 17328
265
  },
266
  {
267
  "epoch": 4.0,
268
+ "step": 17328,
269
+ "total_flos": 2855176963080192.0,
270
+ "train_loss": 0.29059598513036994,
271
+ "train_runtime": 2646.0496,
272
+ "train_samples_per_second": 8.186,
273
+ "train_steps_per_second": 8.186
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
274
  }
275
  ],
276
+ "max_steps": 21660,
277
+ "num_train_epochs": 5,
278
+ "total_flos": 2855176963080192.0,
 
 
 
 
279
  "trial_name": null,
280
  "trial_params": null
281
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a07525eebef2d5afef7b265bb86819fb9995d1565b524cace57a62eecde1b408
3
  size 4411
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dacf9007d2268ced50160573cc2957c600341983cef3d21a43c082be0f9423f8
3
  size 4411