kaifanli commited on
Commit
06006f8
1 Parent(s): 6004f56

Training in progress, epoch 1

Browse files
all_results.json CHANGED
@@ -1,28 +1,28 @@
1
  {
2
  "epoch": 5.0,
3
- "eval_gen_len": 95.02447552447552,
4
- "eval_loss": 3.2694220542907715,
5
- "eval_rouge1": 18.7407,
6
- "eval_rouge2": 3.1211,
7
- "eval_rougeL": 10.9379,
8
- "eval_rougeLsum": 15.8203,
9
- "eval_runtime": 317.8178,
10
- "eval_samples": 286,
11
- "eval_samples_per_second": 0.9,
12
- "eval_steps_per_second": 0.9,
13
- "predict_gen_len": 86.58898305084746,
14
- "predict_loss": 3.0853381156921387,
15
- "predict_rouge1": 18.1172,
16
- "predict_rouge2": 3.4127,
17
- "predict_rougeL": 11.2062,
18
- "predict_rougeLsum": 12.5441,
19
- "predict_runtime": 262.8252,
20
- "predict_samples": 236,
21
- "predict_samples_per_second": 0.898,
22
- "predict_steps_per_second": 0.898,
23
- "train_loss": 0.4568321267587167,
24
- "train_runtime": 2611.1815,
25
- "train_samples": 2025,
26
- "train_samples_per_second": 3.878,
27
- "train_steps_per_second": 3.878
28
  }
 
1
  {
2
  "epoch": 5.0,
3
+ "eval_gen_len": 63.7625,
4
+ "eval_loss": 2.0723605155944824,
5
+ "eval_rouge1": 15.5769,
6
+ "eval_rouge2": 3.3042,
7
+ "eval_rougeL": 11.0176,
8
+ "eval_rougeLsum": 12.8107,
9
+ "eval_runtime": 78.8993,
10
+ "eval_samples": 80,
11
+ "eval_samples_per_second": 1.014,
12
+ "eval_steps_per_second": 1.014,
13
+ "predict_gen_len": 58.329787234042556,
14
+ "predict_loss": 2.241582155227661,
15
+ "predict_rouge1": 20.5805,
16
+ "predict_rouge2": 6.166,
17
+ "predict_rougeL": 14.2848,
18
+ "predict_rougeLsum": 15.2919,
19
+ "predict_runtime": 90.5083,
20
+ "predict_samples": 94,
21
+ "predict_samples_per_second": 1.039,
22
+ "predict_steps_per_second": 1.039,
23
+ "train_loss": 1.0774688316355856,
24
+ "train_runtime": 820.1414,
25
+ "train_samples": 717,
26
+ "train_samples_per_second": 4.371,
27
+ "train_steps_per_second": 4.371
28
  }
eval_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
  "epoch": 5.0,
3
- "eval_gen_len": 95.02447552447552,
4
- "eval_loss": 3.2694220542907715,
5
- "eval_rouge1": 18.7407,
6
- "eval_rouge2": 3.1211,
7
- "eval_rougeL": 10.9379,
8
- "eval_rougeLsum": 15.8203,
9
- "eval_runtime": 317.8178,
10
- "eval_samples": 286,
11
- "eval_samples_per_second": 0.9,
12
- "eval_steps_per_second": 0.9
13
  }
 
1
  {
2
  "epoch": 5.0,
3
+ "eval_gen_len": 63.7625,
4
+ "eval_loss": 2.0723605155944824,
5
+ "eval_rouge1": 15.5769,
6
+ "eval_rouge2": 3.3042,
7
+ "eval_rougeL": 11.0176,
8
+ "eval_rougeLsum": 12.8107,
9
+ "eval_runtime": 78.8993,
10
+ "eval_samples": 80,
11
+ "eval_samples_per_second": 1.014,
12
+ "eval_steps_per_second": 1.014
13
  }
generated_predictions.txt CHANGED
The diff for this file is too large to render. See raw diff
 
predict_results.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
- "predict_gen_len": 86.58898305084746,
3
- "predict_loss": 3.0853381156921387,
4
- "predict_rouge1": 18.1172,
5
- "predict_rouge2": 3.4127,
6
- "predict_rougeL": 11.2062,
7
- "predict_rougeLsum": 12.5441,
8
- "predict_runtime": 262.8252,
9
- "predict_samples": 236,
10
- "predict_samples_per_second": 0.898,
11
- "predict_steps_per_second": 0.898
12
  }
 
1
  {
2
+ "predict_gen_len": 58.329787234042556,
3
+ "predict_loss": 2.241582155227661,
4
+ "predict_rouge1": 20.5805,
5
+ "predict_rouge2": 6.166,
6
+ "predict_rougeL": 14.2848,
7
+ "predict_rougeLsum": 15.2919,
8
+ "predict_runtime": 90.5083,
9
+ "predict_samples": 94,
10
+ "predict_samples_per_second": 1.039,
11
+ "predict_steps_per_second": 1.039
12
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c25dc574abdfee75a7efeaf87fc36fa4c0d438145f9d28fad5ab241b3d452a81
3
  size 501807853
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:19dbc6c5a6017e5db63b49510291f078d1639718550d47be1687a31bf30a05af
3
  size 501807853
runs/Mar26_18-34-27_kogecha/events.out.tfevents.1711446687.kogecha ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a3139ec298ed4a872dfea73e55876be15c25d282143607cd2478f9e7e121ecd
3
+ size 565
runs/Mar26_19-07-17_kogecha/events.out.tfevents.1711447649.kogecha ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b968ba2687d93eecb051e7ade0a201cda659c0742805148d4e08b52f12a14971
3
+ size 6380
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 5.0,
3
- "train_loss": 0.4568321267587167,
4
- "train_runtime": 2611.1815,
5
- "train_samples": 2025,
6
- "train_samples_per_second": 3.878,
7
- "train_steps_per_second": 3.878
8
  }
 
1
  {
2
  "epoch": 5.0,
3
+ "train_loss": 1.0774688316355856,
4
+ "train_runtime": 820.1414,
5
+ "train_samples": 717,
6
+ "train_samples_per_second": 4.371,
7
+ "train_steps_per_second": 4.371
8
  }
trainer_state.json CHANGED
@@ -1,210 +1,132 @@
1
  {
2
- "best_metric": 18.7407,
3
- "best_model_checkpoint": "saved/tobyoki-pairwise-wo_space/bart-base-japanese/BaseModel/checkpoint-10125",
4
  "epoch": 5.0,
5
- "global_step": 10125,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
- "epoch": 0.25,
12
- "learning_rate": 4.7530864197530866e-05,
13
- "loss": 2.1397,
14
  "step": 500
15
  },
16
  {
17
- "epoch": 0.49,
18
- "learning_rate": 4.506172839506173e-05,
19
- "loss": 1.3468,
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  "step": 1000
21
  },
22
  {
23
- "epoch": 0.74,
24
- "learning_rate": 4.259259259259259e-05,
25
- "loss": 0.9786,
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  "step": 1500
27
  },
28
  {
29
- "epoch": 0.99,
30
- "learning_rate": 4.012345679012346e-05,
31
- "loss": 0.7833,
32
  "step": 2000
33
  },
34
  {
35
- "epoch": 1.0,
36
- "eval_gen_len": 65.37762237762237,
37
- "eval_loss": 2.5750701427459717,
38
- "eval_rouge1": 16.3343,
39
- "eval_rouge2": 1.2888,
40
- "eval_rougeL": 11.0128,
41
- "eval_rougeLsum": 15.2802,
42
- "eval_runtime": 267.3657,
43
- "eval_samples_per_second": 1.07,
44
- "eval_steps_per_second": 1.07,
45
- "step": 2025
46
- },
47
- {
48
- "epoch": 1.23,
49
- "learning_rate": 3.7654320987654326e-05,
50
- "loss": 0.5998,
51
  "step": 2500
52
  },
53
  {
54
- "epoch": 1.48,
55
- "learning_rate": 3.518518518518519e-05,
56
- "loss": 0.467,
 
 
 
 
 
 
 
 
 
 
 
 
 
57
  "step": 3000
58
  },
59
  {
60
- "epoch": 1.73,
61
- "learning_rate": 3.271604938271605e-05,
62
- "loss": 0.4008,
63
  "step": 3500
64
  },
65
- {
66
- "epoch": 1.98,
67
- "learning_rate": 3.0246913580246916e-05,
68
- "loss": 0.3308,
69
- "step": 4000
70
- },
71
- {
72
- "epoch": 2.0,
73
- "eval_gen_len": 91.65034965034965,
74
- "eval_loss": 2.9422976970672607,
75
- "eval_rouge1": 17.9514,
76
- "eval_rouge2": 2.8091,
77
- "eval_rougeL": 10.9133,
78
- "eval_rougeLsum": 15.4068,
79
- "eval_runtime": 356.3388,
80
- "eval_samples_per_second": 0.803,
81
- "eval_steps_per_second": 0.803,
82
- "step": 4050
83
- },
84
- {
85
- "epoch": 2.22,
86
- "learning_rate": 2.777777777777778e-05,
87
- "loss": 0.2778,
88
- "step": 4500
89
- },
90
- {
91
- "epoch": 2.47,
92
- "learning_rate": 2.5308641975308646e-05,
93
- "loss": 0.2482,
94
- "step": 5000
95
- },
96
- {
97
- "epoch": 2.72,
98
- "learning_rate": 2.2839506172839506e-05,
99
- "loss": 0.2349,
100
- "step": 5500
101
- },
102
- {
103
- "epoch": 2.96,
104
- "learning_rate": 2.037037037037037e-05,
105
- "loss": 0.2302,
106
- "step": 6000
107
- },
108
- {
109
- "epoch": 3.0,
110
- "eval_gen_len": 77.1993006993007,
111
- "eval_loss": 3.0624501705169678,
112
- "eval_rouge1": 16.1453,
113
- "eval_rouge2": 3.0026,
114
- "eval_rougeL": 10.272,
115
- "eval_rougeLsum": 14.0716,
116
- "eval_runtime": 279.7235,
117
- "eval_samples_per_second": 1.022,
118
- "eval_steps_per_second": 1.022,
119
- "step": 6075
120
- },
121
- {
122
- "epoch": 3.21,
123
- "learning_rate": 1.7901234567901236e-05,
124
- "loss": 0.1927,
125
- "step": 6500
126
- },
127
- {
128
- "epoch": 3.46,
129
- "learning_rate": 1.54320987654321e-05,
130
- "loss": 0.1607,
131
- "step": 7000
132
- },
133
- {
134
- "epoch": 3.7,
135
- "learning_rate": 1.2962962962962962e-05,
136
- "loss": 0.1621,
137
- "step": 7500
138
- },
139
- {
140
- "epoch": 3.95,
141
- "learning_rate": 1.0493827160493827e-05,
142
- "loss": 0.1576,
143
- "step": 8000
144
- },
145
- {
146
- "epoch": 4.0,
147
- "eval_gen_len": 88.3986013986014,
148
- "eval_loss": 3.2307794094085693,
149
- "eval_rouge1": 17.8409,
150
- "eval_rouge2": 2.9937,
151
- "eval_rougeL": 10.8765,
152
- "eval_rougeLsum": 15.6203,
153
- "eval_runtime": 323.891,
154
- "eval_samples_per_second": 0.883,
155
- "eval_steps_per_second": 0.883,
156
- "step": 8100
157
- },
158
- {
159
- "epoch": 4.2,
160
- "learning_rate": 8.02469135802469e-06,
161
- "loss": 0.1289,
162
- "step": 8500
163
- },
164
- {
165
- "epoch": 4.44,
166
- "learning_rate": 5.555555555555556e-06,
167
- "loss": 0.1385,
168
- "step": 9000
169
- },
170
- {
171
- "epoch": 4.69,
172
- "learning_rate": 3.0864197530864196e-06,
173
- "loss": 0.1315,
174
- "step": 9500
175
- },
176
- {
177
- "epoch": 4.94,
178
- "learning_rate": 6.17283950617284e-07,
179
- "loss": 0.1055,
180
- "step": 10000
181
- },
182
  {
183
  "epoch": 5.0,
184
- "eval_gen_len": 95.02447552447552,
185
- "eval_loss": 3.2694220542907715,
186
- "eval_rouge1": 18.7407,
187
- "eval_rouge2": 3.1211,
188
- "eval_rougeL": 10.9379,
189
- "eval_rougeLsum": 15.8203,
190
- "eval_runtime": 317.0639,
191
- "eval_samples_per_second": 0.902,
192
- "eval_steps_per_second": 0.902,
193
- "step": 10125
194
  },
195
  {
196
  "epoch": 5.0,
197
- "step": 10125,
198
- "total_flos": 1931007009669120.0,
199
- "train_loss": 0.4568321267587167,
200
- "train_runtime": 2611.1815,
201
- "train_samples_per_second": 3.878,
202
- "train_steps_per_second": 3.878
203
  }
204
  ],
205
- "max_steps": 10125,
206
  "num_train_epochs": 5,
207
- "total_flos": 1931007009669120.0,
208
  "trial_name": null,
209
  "trial_params": null
210
  }
 
1
  {
2
+ "best_metric": 15.5769,
3
+ "best_model_checkpoint": "saved/tobyoki-pairwise-wo_space/bart-base-japanese/BaseModel/checkpoint-2868",
4
  "epoch": 5.0,
5
+ "global_step": 3585,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
+ "epoch": 0.7,
12
+ "learning_rate": 4.302649930264993e-05,
13
+ "loss": 2.1701,
14
  "step": 500
15
  },
16
  {
17
+ "epoch": 1.0,
18
+ "eval_gen_len": 47.7375,
19
+ "eval_loss": 1.9507354497909546,
20
+ "eval_rouge1": 12.6467,
21
+ "eval_rouge2": 2.901,
22
+ "eval_rougeL": 10.0035,
23
+ "eval_rougeLsum": 11.1471,
24
+ "eval_runtime": 49.6131,
25
+ "eval_samples_per_second": 1.612,
26
+ "eval_steps_per_second": 1.612,
27
+ "step": 717
28
+ },
29
+ {
30
+ "epoch": 1.39,
31
+ "learning_rate": 3.6052998605299864e-05,
32
+ "loss": 1.4042,
33
  "step": 1000
34
  },
35
  {
36
+ "epoch": 2.0,
37
+ "eval_gen_len": 21.1375,
38
+ "eval_loss": 1.9519145488739014,
39
+ "eval_rouge1": 11.9515,
40
+ "eval_rouge2": 3.096,
41
+ "eval_rougeL": 10.2259,
42
+ "eval_rougeLsum": 10.8478,
43
+ "eval_runtime": 36.7295,
44
+ "eval_samples_per_second": 2.178,
45
+ "eval_steps_per_second": 2.178,
46
+ "step": 1434
47
+ },
48
+ {
49
+ "epoch": 2.09,
50
+ "learning_rate": 2.9079497907949792e-05,
51
+ "loss": 1.1417,
52
  "step": 1500
53
  },
54
  {
55
+ "epoch": 2.79,
56
+ "learning_rate": 2.2105997210599723e-05,
57
+ "loss": 0.8952,
58
  "step": 2000
59
  },
60
  {
61
+ "epoch": 3.0,
62
+ "eval_gen_len": 76.35,
63
+ "eval_loss": 2.0322935581207275,
64
+ "eval_rouge1": 15.5721,
65
+ "eval_rouge2": 3.5875,
66
+ "eval_rougeL": 10.6382,
67
+ "eval_rougeLsum": 12.9346,
68
+ "eval_runtime": 127.0578,
69
+ "eval_samples_per_second": 0.63,
70
+ "eval_steps_per_second": 0.63,
71
+ "step": 2151
72
+ },
73
+ {
74
+ "epoch": 3.49,
75
+ "learning_rate": 1.5132496513249652e-05,
76
+ "loss": 0.7489,
77
  "step": 2500
78
  },
79
  {
80
+ "epoch": 4.0,
81
+ "eval_gen_len": 63.7625,
82
+ "eval_loss": 2.0723605155944824,
83
+ "eval_rouge1": 15.5769,
84
+ "eval_rouge2": 3.3042,
85
+ "eval_rougeL": 11.0176,
86
+ "eval_rougeLsum": 12.8107,
87
+ "eval_runtime": 79.609,
88
+ "eval_samples_per_second": 1.005,
89
+ "eval_steps_per_second": 1.005,
90
+ "step": 2868
91
+ },
92
+ {
93
+ "epoch": 4.18,
94
+ "learning_rate": 8.158995815899583e-06,
95
+ "loss": 0.6757,
96
  "step": 3000
97
  },
98
  {
99
+ "epoch": 4.88,
100
+ "learning_rate": 1.185495118549512e-06,
101
+ "loss": 0.5941,
102
  "step": 3500
103
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
104
  {
105
  "epoch": 5.0,
106
+ "eval_gen_len": 66.075,
107
+ "eval_loss": 2.0849809646606445,
108
+ "eval_rouge1": 15.3454,
109
+ "eval_rouge2": 2.9489,
110
+ "eval_rougeL": 10.7691,
111
+ "eval_rougeLsum": 12.7028,
112
+ "eval_runtime": 76.3627,
113
+ "eval_samples_per_second": 1.048,
114
+ "eval_steps_per_second": 1.048,
115
+ "step": 3585
116
  },
117
  {
118
  "epoch": 5.0,
119
+ "step": 3585,
120
+ "total_flos": 682678854881280.0,
121
+ "train_loss": 1.0774688316355856,
122
+ "train_runtime": 820.1414,
123
+ "train_samples_per_second": 4.371,
124
+ "train_steps_per_second": 4.371
125
  }
126
  ],
127
+ "max_steps": 3585,
128
  "num_train_epochs": 5,
129
+ "total_flos": 682678854881280.0,
130
  "trial_name": null,
131
  "trial_params": null
132
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:37216331b5413f75221586ed2e7515ae60dd4e50ea9fbc42566855fffcb6168d
3
  size 4475
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c6030a0c82b601891bd894a96708a17ba86168c75bb195657482dcd66f486273
3
  size 4475