AlekseyKorshuk commited on
Commit
5f65f7f
·
1 Parent(s): 8b94b74

End of training

Browse files
Files changed (4) hide show
  1. all_results.json +10 -10
  2. eval_results.json +6 -6
  3. train_results.json +4 -4
  4. trainer_state.json +151 -58
all_results.json CHANGED
@@ -1,15 +1,15 @@
1
  {
2
  "epoch": 1.0,
3
- "eval_accuracy": 0.8760281609284458,
4
- "eval_loss": 0.6851363778114319,
5
- "eval_runtime": 15.1077,
6
  "eval_samples": 11583,
7
- "eval_samples_per_second": 766.695,
8
- "eval_steps_per_second": 23.961,
9
- "perplexity": 1.9840423964473037,
10
- "train_loss": 0.7258401500793972,
11
- "train_runtime": 612.7363,
12
  "train_samples": 220074,
13
- "train_samples_per_second": 359.166,
14
- "train_steps_per_second": 11.225
15
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "eval_accuracy": 0.8795507387461411,
4
+ "eval_loss": 0.6747884154319763,
5
+ "eval_runtime": 14.946,
6
  "eval_samples": 11583,
7
+ "eval_samples_per_second": 774.992,
8
+ "eval_steps_per_second": 6.089,
9
+ "perplexity": 1.963617460860885,
10
+ "train_loss": 0.730066760750704,
11
+ "train_runtime": 657.3049,
12
  "train_samples": 220074,
13
+ "train_samples_per_second": 334.813,
14
+ "train_steps_per_second": 2.617
15
  }
eval_results.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
  "epoch": 1.0,
3
- "eval_accuracy": 0.8760281609284458,
4
- "eval_loss": 0.6851363778114319,
5
- "eval_runtime": 15.1077,
6
  "eval_samples": 11583,
7
- "eval_samples_per_second": 766.695,
8
- "eval_steps_per_second": 23.961,
9
- "perplexity": 1.9840423964473037
10
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "eval_accuracy": 0.8795507387461411,
4
+ "eval_loss": 0.6747884154319763,
5
+ "eval_runtime": 14.946,
6
  "eval_samples": 11583,
7
+ "eval_samples_per_second": 774.992,
8
+ "eval_steps_per_second": 6.089,
9
+ "perplexity": 1.963617460860885
10
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 1.0,
3
- "train_loss": 0.7258401500793972,
4
- "train_runtime": 612.7363,
5
  "train_samples": 220074,
6
- "train_samples_per_second": 359.166,
7
- "train_steps_per_second": 11.225
8
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "train_loss": 0.730066760750704,
4
+ "train_runtime": 657.3049,
5
  "train_samples": 220074,
6
+ "train_samples_per_second": 334.813,
7
+ "train_steps_per_second": 2.617
8
  }
trainer_state.json CHANGED
@@ -2,102 +2,195 @@
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
  "epoch": 1.0,
5
- "global_step": 6878,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
- "epoch": 0.07,
12
- "learning_rate": 4.636522244838616e-05,
13
- "loss": 0.8273,
14
- "step": 500
 
 
 
15
  },
16
  {
17
- "epoch": 0.15,
18
- "learning_rate": 4.273044489677232e-05,
19
- "loss": 0.7505,
20
- "step": 1000
 
 
 
21
  },
22
  {
23
- "epoch": 0.22,
24
- "learning_rate": 3.9095667345158474e-05,
25
- "loss": 0.7387,
26
- "step": 1500
 
 
 
 
 
 
 
 
 
 
 
 
27
  },
28
  {
29
  "epoch": 0.29,
30
- "learning_rate": 3.546088979354464e-05,
31
- "loss": 0.7288,
32
- "step": 2000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  },
34
  {
35
- "epoch": 0.36,
36
- "learning_rate": 3.1826112241930793e-05,
37
- "loss": 0.7288,
38
- "step": 2500
 
 
 
39
  },
40
  {
41
- "epoch": 0.44,
42
- "learning_rate": 2.8191334690316956e-05,
43
- "loss": 0.7138,
44
- "step": 3000
 
 
 
45
  },
46
  {
47
- "epoch": 0.51,
48
- "learning_rate": 2.4556557138703113e-05,
49
- "loss": 0.7155,
50
- "step": 3500
51
  },
52
  {
53
  "epoch": 0.58,
54
- "learning_rate": 2.0921779587089272e-05,
55
- "loss": 0.7102,
56
- "step": 4000
 
 
 
57
  },
58
  {
59
- "epoch": 0.65,
60
- "learning_rate": 1.728700203547543e-05,
61
- "loss": 0.7133,
62
- "step": 4500
 
 
 
63
  },
64
  {
65
- "epoch": 0.73,
66
- "learning_rate": 1.365222448386159e-05,
67
- "loss": 0.7119,
68
- "step": 5000
 
 
 
69
  },
70
  {
71
- "epoch": 0.8,
72
- "learning_rate": 1.0017446932247747e-05,
73
- "loss": 0.7091,
74
- "step": 5500
 
 
 
 
 
 
 
 
 
 
 
 
75
  },
76
  {
77
  "epoch": 0.87,
78
- "learning_rate": 6.382669380633906e-06,
79
- "loss": 0.7014,
80
- "step": 6000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81
  },
82
  {
83
- "epoch": 0.95,
84
- "learning_rate": 2.747891829020064e-06,
85
- "loss": 0.7087,
86
- "step": 6500
 
 
 
87
  },
88
  {
89
  "epoch": 1.0,
90
- "step": 6878,
91
- "total_flos": 1.3252899316432896e+16,
92
- "train_loss": 0.7258401500793972,
93
- "train_runtime": 612.7363,
94
- "train_samples_per_second": 359.166,
95
- "train_steps_per_second": 11.225
96
  }
97
  ],
98
- "max_steps": 6878,
99
  "num_train_epochs": 1,
100
- "total_flos": 1.3252899316432896e+16,
101
  "trial_name": null,
102
  "trial_params": null
103
  }
 
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
  "epoch": 1.0,
5
+ "global_step": 1720,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
+ "epoch": 0.06,
12
+ "eval_accuracy": 0.8732394680670543,
13
+ "eval_loss": 0.7285259366035461,
14
+ "eval_runtime": 14.7926,
15
+ "eval_samples_per_second": 783.026,
16
+ "eval_steps_per_second": 6.152,
17
+ "step": 100
18
  },
19
  {
20
+ "epoch": 0.12,
21
+ "eval_accuracy": 0.8747227655273633,
22
+ "eval_loss": 0.7140512466430664,
23
+ "eval_runtime": 14.7732,
24
+ "eval_samples_per_second": 784.057,
25
+ "eval_steps_per_second": 6.16,
26
+ "step": 200
27
  },
28
  {
29
+ "epoch": 0.17,
30
+ "eval_accuracy": 0.8756672233683728,
31
+ "eval_loss": 0.7055667638778687,
32
+ "eval_runtime": 14.8094,
33
+ "eval_samples_per_second": 782.138,
34
+ "eval_steps_per_second": 6.145,
35
+ "step": 300
36
+ },
37
+ {
38
+ "epoch": 0.23,
39
+ "eval_accuracy": 0.8763824510950948,
40
+ "eval_loss": 0.6991910934448242,
41
+ "eval_runtime": 14.8106,
42
+ "eval_samples_per_second": 782.075,
43
+ "eval_steps_per_second": 6.144,
44
+ "step": 400
45
  },
46
  {
47
  "epoch": 0.29,
48
+ "learning_rate": 3.5465116279069774e-05,
49
+ "loss": 0.7907,
50
+ "step": 500
51
+ },
52
+ {
53
+ "epoch": 0.29,
54
+ "eval_accuracy": 0.8771028885971415,
55
+ "eval_loss": 0.6942312121391296,
56
+ "eval_runtime": 14.8425,
57
+ "eval_samples_per_second": 780.393,
58
+ "eval_steps_per_second": 6.131,
59
+ "step": 500
60
+ },
61
+ {
62
+ "epoch": 0.35,
63
+ "eval_accuracy": 0.877665544332211,
64
+ "eval_loss": 0.6905708909034729,
65
+ "eval_runtime": 14.8552,
66
+ "eval_samples_per_second": 779.725,
67
+ "eval_steps_per_second": 6.126,
68
+ "step": 600
69
+ },
70
+ {
71
+ "epoch": 0.41,
72
+ "eval_accuracy": 0.8779126365333262,
73
+ "eval_loss": 0.6872847676277161,
74
+ "eval_runtime": 14.7951,
75
+ "eval_samples_per_second": 782.894,
76
+ "eval_steps_per_second": 6.151,
77
+ "step": 700
78
  },
79
  {
80
+ "epoch": 0.47,
81
+ "eval_accuracy": 0.8782155477557776,
82
+ "eval_loss": 0.6847825646400452,
83
+ "eval_runtime": 13.7809,
84
+ "eval_samples_per_second": 840.512,
85
+ "eval_steps_per_second": 6.603,
86
+ "step": 800
87
  },
88
  {
89
+ "epoch": 0.52,
90
+ "eval_accuracy": 0.8785906515791573,
91
+ "eval_loss": 0.6830095648765564,
92
+ "eval_runtime": 14.7951,
93
+ "eval_samples_per_second": 782.896,
94
+ "eval_steps_per_second": 6.151,
95
+ "step": 900
96
  },
97
  {
98
+ "epoch": 0.58,
99
+ "learning_rate": 2.0930232558139536e-05,
100
+ "loss": 0.7105,
101
+ "step": 1000
102
  },
103
  {
104
  "epoch": 0.58,
105
+ "eval_accuracy": 0.8788102063964133,
106
+ "eval_loss": 0.6808722019195557,
107
+ "eval_runtime": 14.8083,
108
+ "eval_samples_per_second": 782.197,
109
+ "eval_steps_per_second": 6.145,
110
+ "step": 1000
111
  },
112
  {
113
+ "epoch": 0.64,
114
+ "eval_accuracy": 0.878962034134448,
115
+ "eval_loss": 0.6793943643569946,
116
+ "eval_runtime": 14.7885,
117
+ "eval_samples_per_second": 783.243,
118
+ "eval_steps_per_second": 6.153,
119
+ "step": 1100
120
  },
121
  {
122
+ "epoch": 0.7,
123
+ "eval_accuracy": 0.8791533073142268,
124
+ "eval_loss": 0.6780144572257996,
125
+ "eval_runtime": 13.7848,
126
+ "eval_samples_per_second": 840.275,
127
+ "eval_steps_per_second": 6.601,
128
+ "step": 1200
129
  },
130
  {
131
+ "epoch": 0.76,
132
+ "eval_accuracy": 0.8792887614726695,
133
+ "eval_loss": 0.6770240068435669,
134
+ "eval_runtime": 14.7863,
135
+ "eval_samples_per_second": 783.358,
136
+ "eval_steps_per_second": 6.154,
137
+ "step": 1300
138
+ },
139
+ {
140
+ "epoch": 0.81,
141
+ "eval_accuracy": 0.8794212386166409,
142
+ "eval_loss": 0.6760326027870178,
143
+ "eval_runtime": 14.785,
144
+ "eval_samples_per_second": 783.432,
145
+ "eval_steps_per_second": 6.155,
146
+ "step": 1400
147
  },
148
  {
149
  "epoch": 0.87,
150
+ "learning_rate": 6.395348837209303e-06,
151
+ "loss": 0.7034,
152
+ "step": 1500
153
+ },
154
+ {
155
+ "epoch": 0.87,
156
+ "eval_accuracy": 0.8794353794353794,
157
+ "eval_loss": 0.6754602789878845,
158
+ "eval_runtime": 14.7978,
159
+ "eval_samples_per_second": 782.753,
160
+ "eval_steps_per_second": 6.15,
161
+ "step": 1500
162
+ },
163
+ {
164
+ "epoch": 0.93,
165
+ "eval_accuracy": 0.8795403191954916,
166
+ "eval_loss": 0.6750109195709229,
167
+ "eval_runtime": 14.7942,
168
+ "eval_samples_per_second": 782.943,
169
+ "eval_steps_per_second": 6.151,
170
+ "step": 1600
171
  },
172
  {
173
+ "epoch": 0.99,
174
+ "eval_accuracy": 0.8795477617316698,
175
+ "eval_loss": 0.6747931838035583,
176
+ "eval_runtime": 14.8037,
177
+ "eval_samples_per_second": 782.441,
178
+ "eval_steps_per_second": 6.147,
179
+ "step": 1700
180
  },
181
  {
182
  "epoch": 1.0,
183
+ "step": 1720,
184
+ "total_flos": 1.3140586257186816e+16,
185
+ "train_loss": 0.730066760750704,
186
+ "train_runtime": 657.3049,
187
+ "train_samples_per_second": 334.813,
188
+ "train_steps_per_second": 2.617
189
  }
190
  ],
191
+ "max_steps": 1720,
192
  "num_train_epochs": 1,
193
+ "total_flos": 1.3140586257186816e+16,
194
  "trial_name": null,
195
  "trial_params": null
196
  }