Weichuan commited on
Commit
9b43323
1 Parent(s): 0da036d

Upload trainer_state.json

Browse files
Files changed (1) hide show
  1. trainer_state.json +232 -0
trainer_state.json ADDED
@@ -0,0 +1,232 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 20.0,
5
+ "global_step": 3200,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 1.0,
12
+ "eval_bleu": 0.060808744840238495,
13
+ "eval_loss": 2.7135698795318604,
14
+ "eval_runtime": 315.8806,
15
+ "eval_samples_per_second": 3.603,
16
+ "eval_steps_per_second": 0.114,
17
+ "step": 160
18
+ },
19
+ {
20
+ "epoch": 2.0,
21
+ "eval_bleu": 0.23820890608280518,
22
+ "eval_loss": 1.7017812728881836,
23
+ "eval_runtime": 199.1113,
24
+ "eval_samples_per_second": 5.715,
25
+ "eval_steps_per_second": 0.181,
26
+ "step": 320
27
+ },
28
+ {
29
+ "epoch": 3.0,
30
+ "eval_bleu": 0.2333157930860921,
31
+ "eval_loss": 1.7553855180740356,
32
+ "eval_runtime": 183.4244,
33
+ "eval_samples_per_second": 6.204,
34
+ "eval_steps_per_second": 0.196,
35
+ "step": 480
36
+ },
37
+ {
38
+ "epoch": 3.12,
39
+ "learning_rate": 1.69125e-05,
40
+ "loss": 2.1899,
41
+ "step": 500
42
+ },
43
+ {
44
+ "epoch": 4.0,
45
+ "eval_bleu": 0.24403815580286167,
46
+ "eval_loss": 1.729722499847412,
47
+ "eval_runtime": 199.443,
48
+ "eval_samples_per_second": 5.706,
49
+ "eval_steps_per_second": 0.181,
50
+ "step": 640
51
+ },
52
+ {
53
+ "epoch": 5.0,
54
+ "eval_bleu": 0.24248370965297772,
55
+ "eval_loss": 1.7367736101150513,
56
+ "eval_runtime": 195.0886,
57
+ "eval_samples_per_second": 5.833,
58
+ "eval_steps_per_second": 0.185,
59
+ "step": 800
60
+ },
61
+ {
62
+ "epoch": 6.0,
63
+ "eval_bleu": 0.24708175633139415,
64
+ "eval_loss": 1.7663674354553223,
65
+ "eval_runtime": 190.8193,
66
+ "eval_samples_per_second": 5.964,
67
+ "eval_steps_per_second": 0.189,
68
+ "step": 960
69
+ },
70
+ {
71
+ "epoch": 6.25,
72
+ "learning_rate": 1.37875e-05,
73
+ "loss": 1.1629,
74
+ "step": 1000
75
+ },
76
+ {
77
+ "epoch": 7.0,
78
+ "eval_bleu": 0.24734088993827677,
79
+ "eval_loss": 1.8130639791488647,
80
+ "eval_runtime": 185.235,
81
+ "eval_samples_per_second": 6.144,
82
+ "eval_steps_per_second": 0.194,
83
+ "step": 1120
84
+ },
85
+ {
86
+ "epoch": 8.0,
87
+ "eval_bleu": 0.25035574867022287,
88
+ "eval_loss": 1.860228419303894,
89
+ "eval_runtime": 191.0903,
90
+ "eval_samples_per_second": 5.955,
91
+ "eval_steps_per_second": 0.188,
92
+ "step": 1280
93
+ },
94
+ {
95
+ "epoch": 9.0,
96
+ "eval_bleu": 0.2550337582147487,
97
+ "eval_loss": 1.9042994976043701,
98
+ "eval_runtime": 192.163,
99
+ "eval_samples_per_second": 5.922,
100
+ "eval_steps_per_second": 0.187,
101
+ "step": 1440
102
+ },
103
+ {
104
+ "epoch": 9.38,
105
+ "learning_rate": 1.06625e-05,
106
+ "loss": 0.807,
107
+ "step": 1500
108
+ },
109
+ {
110
+ "epoch": 10.0,
111
+ "eval_bleu": 0.255168022317063,
112
+ "eval_loss": 1.9512995481491089,
113
+ "eval_runtime": 193.6447,
114
+ "eval_samples_per_second": 5.877,
115
+ "eval_steps_per_second": 0.186,
116
+ "step": 1600
117
+ },
118
+ {
119
+ "epoch": 11.0,
120
+ "eval_bleu": 0.2583105775089707,
121
+ "eval_loss": 2.0014865398406982,
122
+ "eval_runtime": 194.2032,
123
+ "eval_samples_per_second": 5.86,
124
+ "eval_steps_per_second": 0.185,
125
+ "step": 1760
126
+ },
127
+ {
128
+ "epoch": 12.0,
129
+ "eval_bleu": 0.2611697071205468,
130
+ "eval_loss": 2.036052942276001,
131
+ "eval_runtime": 190.4985,
132
+ "eval_samples_per_second": 5.974,
133
+ "eval_steps_per_second": 0.189,
134
+ "step": 1920
135
+ },
136
+ {
137
+ "epoch": 12.5,
138
+ "learning_rate": 7.537500000000001e-06,
139
+ "loss": 0.5977,
140
+ "step": 2000
141
+ },
142
+ {
143
+ "epoch": 13.0,
144
+ "eval_bleu": 0.2624028465673082,
145
+ "eval_loss": 2.0794923305511475,
146
+ "eval_runtime": 192.5774,
147
+ "eval_samples_per_second": 5.909,
148
+ "eval_steps_per_second": 0.187,
149
+ "step": 2080
150
+ },
151
+ {
152
+ "epoch": 14.0,
153
+ "eval_bleu": 0.26034097889106955,
154
+ "eval_loss": 2.1036157608032227,
155
+ "eval_runtime": 198.097,
156
+ "eval_samples_per_second": 5.745,
157
+ "eval_steps_per_second": 0.182,
158
+ "step": 2240
159
+ },
160
+ {
161
+ "epoch": 15.0,
162
+ "eval_bleu": 0.264538215714405,
163
+ "eval_loss": 2.1185383796691895,
164
+ "eval_runtime": 189.6413,
165
+ "eval_samples_per_second": 6.001,
166
+ "eval_steps_per_second": 0.19,
167
+ "step": 2400
168
+ },
169
+ {
170
+ "epoch": 15.62,
171
+ "learning_rate": 4.4125000000000005e-06,
172
+ "loss": 0.4697,
173
+ "step": 2500
174
+ },
175
+ {
176
+ "epoch": 16.0,
177
+ "eval_bleu": 0.2666872542669057,
178
+ "eval_loss": 2.1361563205718994,
179
+ "eval_runtime": 189.4756,
180
+ "eval_samples_per_second": 6.006,
181
+ "eval_steps_per_second": 0.19,
182
+ "step": 2560
183
+ },
184
+ {
185
+ "epoch": 17.0,
186
+ "eval_bleu": 0.2652516887552325,
187
+ "eval_loss": 2.162111520767212,
188
+ "eval_runtime": 193.0939,
189
+ "eval_samples_per_second": 5.894,
190
+ "eval_steps_per_second": 0.186,
191
+ "step": 2720
192
+ },
193
+ {
194
+ "epoch": 18.0,
195
+ "eval_bleu": 0.2673360550776601,
196
+ "eval_loss": 2.163081169128418,
197
+ "eval_runtime": 190.9327,
198
+ "eval_samples_per_second": 5.96,
199
+ "eval_steps_per_second": 0.189,
200
+ "step": 2880
201
+ },
202
+ {
203
+ "epoch": 18.75,
204
+ "learning_rate": 1.2875000000000002e-06,
205
+ "loss": 0.4032,
206
+ "step": 3000
207
+ },
208
+ {
209
+ "epoch": 19.0,
210
+ "eval_bleu": 0.2660614156233256,
211
+ "eval_loss": 2.1683239936828613,
212
+ "eval_runtime": 190.2616,
213
+ "eval_samples_per_second": 5.981,
214
+ "eval_steps_per_second": 0.189,
215
+ "step": 3040
216
+ },
217
+ {
218
+ "epoch": 20.0,
219
+ "eval_bleu": 0.2674984140410235,
220
+ "eval_loss": 2.173663377761841,
221
+ "eval_runtime": 189.5283,
222
+ "eval_samples_per_second": 6.004,
223
+ "eval_steps_per_second": 0.19,
224
+ "step": 3200
225
+ }
226
+ ],
227
+ "max_steps": 3200,
228
+ "num_train_epochs": 20,
229
+ "total_flos": 5.530638338624717e+16,
230
+ "trial_name": null,
231
+ "trial_params": null
232
+ }