mp-02 commited on
Commit
e3ee200
1 Parent(s): 6297883

End of training

Browse files
all_results.json CHANGED
@@ -1,14 +1,14 @@
1
  {
2
- "epoch": 16.0,
3
- "eval_accuracy": 0.8368167202572347,
4
- "eval_f1": 0.8891074502089993,
5
- "eval_loss": 0.6541090607643127,
6
- "eval_precision": 0.8746976294146106,
7
- "eval_recall": 0.904,
8
- "eval_runtime": 3.562,
9
  "eval_samples": 54,
10
- "eval_samples_per_second": 15.16,
11
- "eval_steps_per_second": 1.123,
12
  "predict_accuracy": 0.8218373936014088,
13
  "predict_f1": 0.9038133181559477,
14
  "predict_loss": 0.6888472437858582,
@@ -17,9 +17,9 @@
17
  "predict_runtime": 2.0596,
18
  "predict_samples_per_second": 8.74,
19
  "predict_steps_per_second": 0.971,
20
- "train_loss": 0.43604583740234376,
21
- "train_runtime": 307.9087,
22
  "train_samples": 150,
23
- "train_samples_per_second": 7.795,
24
- "train_steps_per_second": 1.299
25
  }
 
1
  {
2
+ "epoch": 26.32,
3
+ "eval_accuracy": 0.8407211759301791,
4
+ "eval_f1": 0.9107187266849044,
5
+ "eval_loss": 0.8859658241271973,
6
+ "eval_precision": 0.9059871350816427,
7
+ "eval_recall": 0.9155,
8
+ "eval_runtime": 3.6172,
9
  "eval_samples": 54,
10
+ "eval_samples_per_second": 14.929,
11
+ "eval_steps_per_second": 1.106,
12
  "predict_accuracy": 0.8218373936014088,
13
  "predict_f1": 0.9038133181559477,
14
  "predict_loss": 0.6888472437858582,
 
17
  "predict_runtime": 2.0596,
18
  "predict_samples_per_second": 8.74,
19
  "predict_steps_per_second": 0.971,
20
+ "train_loss": 0.23488603591918944,
21
+ "train_runtime": 622.7344,
22
  "train_samples": 150,
23
+ "train_samples_per_second": 6.423,
24
+ "train_steps_per_second": 1.606
25
  }
eval_results.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
- "epoch": 16.0,
3
- "eval_accuracy": 0.8368167202572347,
4
- "eval_f1": 0.8891074502089993,
5
- "eval_loss": 0.6541090607643127,
6
- "eval_precision": 0.8746976294146106,
7
- "eval_recall": 0.904,
8
- "eval_runtime": 3.562,
9
  "eval_samples": 54,
10
- "eval_samples_per_second": 15.16,
11
- "eval_steps_per_second": 1.123
12
  }
 
1
  {
2
+ "epoch": 26.32,
3
+ "eval_accuracy": 0.8407211759301791,
4
+ "eval_f1": 0.9107187266849044,
5
+ "eval_loss": 0.8859658241271973,
6
+ "eval_precision": 0.9059871350816427,
7
+ "eval_recall": 0.9155,
8
+ "eval_runtime": 3.6172,
9
  "eval_samples": 54,
10
+ "eval_samples_per_second": 14.929,
11
+ "eval_steps_per_second": 1.106
12
  }
runs/Aug24_19-33-30_bernini/events.out.tfevents.1724520821.bernini.27584.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5ac730116a233cf6fc8abc046bc02614e8d3a9c01c1e8ca99c117dc08602b9ea
3
- size 9111
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a68deafa55d8eed676e7f062d3f1cc33b1f4abcda0c9306ce87574dac1f4ca5e
3
+ size 9465
runs/Aug24_19-33-30_bernini/events.out.tfevents.1724521452.bernini.27584.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e230662cdece234002471f217f400afc9e5edf6815c5827b538e0860a40f05e9
3
+ size 512
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 16.0,
3
- "train_loss": 0.43604583740234376,
4
- "train_runtime": 307.9087,
5
  "train_samples": 150,
6
- "train_samples_per_second": 7.795,
7
- "train_steps_per_second": 1.299
8
  }
 
1
  {
2
+ "epoch": 26.32,
3
+ "train_loss": 0.23488603591918944,
4
+ "train_runtime": 622.7344,
5
  "train_samples": 150,
6
+ "train_samples_per_second": 6.423,
7
+ "train_steps_per_second": 1.606
8
  }
trainer_state.json CHANGED
@@ -1,217 +1,157 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 16.0,
5
- "global_step": 400,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
- "epoch": 1.0,
12
- "eval_accuracy": 0.6092099219108865,
13
- "eval_f1": 0.43809958885335776,
14
- "eval_loss": 1.2830967903137207,
15
- "eval_precision": 0.40328006728343146,
16
- "eval_recall": 0.4795,
17
- "eval_runtime": 3.5046,
18
- "eval_samples_per_second": 15.408,
19
- "eval_steps_per_second": 1.141,
20
- "step": 25
21
- },
22
- {
23
- "epoch": 2.0,
24
- "eval_accuracy": 0.7748047772163528,
25
- "eval_f1": 0.7586042065009561,
26
- "eval_loss": 0.8177912831306458,
27
- "eval_precision": 0.7266483516483516,
28
- "eval_recall": 0.7935,
29
- "eval_runtime": 3.5776,
30
- "eval_samples_per_second": 15.094,
31
- "eval_steps_per_second": 1.118,
32
- "step": 50
33
- },
34
- {
35
- "epoch": 3.0,
36
- "eval_accuracy": 0.7990353697749196,
37
- "eval_f1": 0.814344962185899,
38
- "eval_loss": 0.6842699646949768,
39
- "eval_precision": 0.7951405431157694,
40
- "eval_recall": 0.8345,
41
- "eval_runtime": 3.5751,
42
- "eval_samples_per_second": 15.104,
43
- "eval_steps_per_second": 1.119,
44
- "step": 75
45
- },
46
- {
47
- "epoch": 4.0,
48
- "eval_accuracy": 0.8161460725769407,
49
- "eval_f1": 0.8306801736613604,
50
- "eval_loss": 0.6316555142402649,
51
- "eval_precision": 0.8024231127679403,
52
- "eval_recall": 0.861,
53
- "eval_runtime": 3.6093,
54
- "eval_samples_per_second": 14.962,
55
- "eval_steps_per_second": 1.108,
56
  "step": 100
57
  },
58
  {
59
- "epoch": 5.0,
60
- "eval_accuracy": 0.8233807992650436,
61
- "eval_f1": 0.8600191754554171,
62
- "eval_loss": 0.5964206457138062,
63
- "eval_precision": 0.8259668508287292,
64
- "eval_recall": 0.897,
65
- "eval_runtime": 3.5003,
66
- "eval_samples_per_second": 15.427,
67
- "eval_steps_per_second": 1.143,
68
- "step": 125
69
- },
70
- {
71
- "epoch": 6.0,
72
- "eval_accuracy": 0.8207395498392283,
73
- "eval_f1": 0.8444552293132735,
74
- "eval_loss": 0.6049793362617493,
75
- "eval_precision": 0.8203677510608204,
76
- "eval_recall": 0.87,
77
- "eval_runtime": 3.5639,
78
- "eval_samples_per_second": 15.152,
79
- "eval_steps_per_second": 1.122,
80
- "step": 150
81
- },
82
- {
83
- "epoch": 7.0,
84
- "eval_accuracy": 0.8168350941662839,
85
- "eval_f1": 0.8474740149867052,
86
- "eval_loss": 0.628131091594696,
87
- "eval_precision": 0.8203088441740758,
88
- "eval_recall": 0.8765,
89
- "eval_runtime": 3.5404,
90
- "eval_samples_per_second": 15.252,
91
- "eval_steps_per_second": 1.13,
92
- "step": 175
93
- },
94
- {
95
- "epoch": 8.0,
96
- "eval_accuracy": 0.8234956361966008,
97
- "eval_f1": 0.8708504967288586,
98
- "eval_loss": 0.6227801442146301,
99
- "eval_precision": 0.844851904090268,
100
- "eval_recall": 0.8985,
101
- "eval_runtime": 3.5716,
102
- "eval_samples_per_second": 15.119,
103
- "eval_steps_per_second": 1.12,
104
- "step": 200
105
- },
106
- {
107
- "epoch": 9.0,
108
  "eval_accuracy": 0.826596233348645,
109
- "eval_f1": 0.8566561207106351,
110
- "eval_loss": 0.621345579624176,
111
- "eval_precision": 0.8345187292555714,
112
- "eval_recall": 0.88,
113
- "eval_runtime": 3.5239,
114
- "eval_samples_per_second": 15.324,
115
- "eval_steps_per_second": 1.135,
116
- "step": 225
117
- },
118
- {
119
- "epoch": 10.0,
120
- "eval_accuracy": 0.8356683509416628,
121
- "eval_f1": 0.8702401164200824,
122
- "eval_loss": 0.6172508597373962,
123
- "eval_precision": 0.8450306170513424,
124
- "eval_recall": 0.897,
125
- "eval_runtime": 3.508,
126
- "eval_samples_per_second": 15.394,
127
- "eval_steps_per_second": 1.14,
128
- "step": 250
129
- },
130
- {
131
- "epoch": 11.0,
132
- "eval_accuracy": 0.8299265043638034,
133
- "eval_f1": 0.8633826741082262,
134
- "eval_loss": 0.6476383209228516,
135
- "eval_precision": 0.8387553041018387,
136
- "eval_recall": 0.8895,
137
- "eval_runtime": 3.4928,
138
- "eval_samples_per_second": 15.46,
139
- "eval_steps_per_second": 1.145,
140
- "step": 275
141
  },
142
  {
143
- "epoch": 12.0,
144
- "eval_accuracy": 0.838194763435921,
145
- "eval_f1": 0.8761018609206659,
146
- "eval_loss": 0.6358577609062195,
147
- "eval_precision": 0.8584452975047985,
148
- "eval_recall": 0.8945,
149
- "eval_runtime": 3.4999,
150
- "eval_samples_per_second": 15.429,
151
- "eval_steps_per_second": 1.143,
152
  "step": 300
153
  },
154
  {
155
- "epoch": 13.0,
156
- "eval_accuracy": 0.8394579696830501,
157
- "eval_f1": 0.8911815278801277,
158
- "eval_loss": 0.6469025611877441,
159
- "eval_precision": 0.8759053597295993,
160
- "eval_recall": 0.907,
161
- "eval_runtime": 3.5529,
162
- "eval_samples_per_second": 15.199,
163
- "eval_steps_per_second": 1.126,
164
- "step": 325
165
  },
166
  {
167
- "epoch": 14.0,
168
- "eval_accuracy": 0.8372760679834634,
169
- "eval_f1": 0.8879606879606879,
170
- "eval_loss": 0.651043176651001,
171
- "eval_precision": 0.8729468599033816,
172
- "eval_recall": 0.9035,
173
- "eval_runtime": 3.5289,
174
- "eval_samples_per_second": 15.302,
175
- "eval_steps_per_second": 1.133,
176
- "step": 350
177
  },
178
  {
179
- "epoch": 15.0,
180
- "eval_accuracy": 0.8354386770785485,
181
- "eval_f1": 0.8834476003917728,
182
- "eval_loss": 0.6554981470108032,
183
- "eval_precision": 0.8656429942418427,
184
- "eval_recall": 0.902,
185
- "eval_runtime": 3.5307,
186
- "eval_samples_per_second": 15.294,
187
- "eval_steps_per_second": 1.133,
188
- "step": 375
189
  },
190
  {
191
- "epoch": 16.0,
192
- "eval_accuracy": 0.8368167202572347,
193
- "eval_f1": 0.8891074502089993,
194
- "eval_loss": 0.6541090607643127,
195
- "eval_precision": 0.8746976294146106,
196
  "eval_recall": 0.904,
197
- "eval_runtime": 3.5967,
198
- "eval_samples_per_second": 15.014,
199
- "eval_steps_per_second": 1.112,
200
- "step": 400
201
  },
202
  {
203
- "epoch": 16.0,
204
- "step": 400,
205
- "total_flos": 632652823756800.0,
206
- "train_loss": 0.43604583740234376,
207
- "train_runtime": 307.9087,
208
- "train_samples_per_second": 7.795,
209
- "train_steps_per_second": 1.299
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
210
  }
211
  ],
212
- "max_steps": 400,
213
- "num_train_epochs": 16,
214
- "total_flos": 632652823756800.0,
215
  "trial_name": null,
216
  "trial_params": null
217
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 26.31578947368421,
5
+ "global_step": 1000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
+ "epoch": 2.63,
12
+ "eval_accuracy": 0.798690858980248,
13
+ "eval_f1": 0.8287769784172662,
14
+ "eval_loss": 0.6110699772834778,
15
+ "eval_precision": 0.7963133640552995,
16
+ "eval_recall": 0.864,
17
+ "eval_runtime": 3.5435,
18
+ "eval_samples_per_second": 15.239,
19
+ "eval_steps_per_second": 1.129,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  "step": 100
21
  },
22
  {
23
+ "epoch": 5.26,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  "eval_accuracy": 0.826596233348645,
25
+ "eval_f1": 0.8665358194308145,
26
+ "eval_loss": 0.5860626697540283,
27
+ "eval_precision": 0.850674373795761,
28
+ "eval_recall": 0.883,
29
+ "eval_runtime": 3.5697,
30
+ "eval_samples_per_second": 15.127,
31
+ "eval_steps_per_second": 1.121,
32
+ "step": 200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  },
34
  {
35
+ "epoch": 7.89,
36
+ "eval_accuracy": 0.8425585668350941,
37
+ "eval_f1": 0.8826268071551091,
38
+ "eval_loss": 0.5855879783630371,
39
+ "eval_precision": 0.865449303219606,
40
+ "eval_recall": 0.9005,
41
+ "eval_runtime": 3.5797,
42
+ "eval_samples_per_second": 15.085,
43
+ "eval_steps_per_second": 1.117,
44
  "step": 300
45
  },
46
  {
47
+ "epoch": 10.53,
48
+ "eval_accuracy": 0.8426734037666513,
49
+ "eval_f1": 0.8897131552917903,
50
+ "eval_loss": 0.6501617431640625,
51
+ "eval_precision": 0.8801369863013698,
52
+ "eval_recall": 0.8995,
53
+ "eval_runtime": 3.5894,
54
+ "eval_samples_per_second": 15.044,
55
+ "eval_steps_per_second": 1.114,
56
+ "step": 400
57
  },
58
  {
59
+ "epoch": 13.16,
60
+ "learning_rate": 5e-06,
61
+ "loss": 0.4088,
62
+ "step": 500
 
 
 
 
 
 
63
  },
64
  {
65
+ "epoch": 13.16,
66
+ "eval_accuracy": 0.8372760679834634,
67
+ "eval_f1": 0.8959365708622398,
68
+ "eval_loss": 0.7679007649421692,
69
+ "eval_precision": 0.888015717092338,
70
+ "eval_recall": 0.904,
71
+ "eval_runtime": 3.5844,
72
+ "eval_samples_per_second": 15.065,
73
+ "eval_steps_per_second": 1.116,
74
+ "step": 500
75
  },
76
  {
77
+ "epoch": 15.79,
78
+ "eval_accuracy": 0.8332567753789619,
79
+ "eval_f1": 0.8928395061728396,
80
+ "eval_loss": 0.8370674848556519,
81
+ "eval_precision": 0.8819512195121951,
82
  "eval_recall": 0.904,
83
+ "eval_runtime": 3.4921,
84
+ "eval_samples_per_second": 15.463,
85
+ "eval_steps_per_second": 1.145,
86
+ "step": 600
87
  },
88
  {
89
+ "epoch": 18.42,
90
+ "eval_accuracy": 0.8336012861736335,
91
+ "eval_f1": 0.9036561264822135,
92
+ "eval_loss": 0.8320176601409912,
93
+ "eval_precision": 0.89306640625,
94
+ "eval_recall": 0.9145,
95
+ "eval_runtime": 3.5735,
96
+ "eval_samples_per_second": 15.111,
97
+ "eval_steps_per_second": 1.119,
98
+ "step": 700
99
+ },
100
+ {
101
+ "epoch": 21.05,
102
+ "eval_accuracy": 0.8340606338998622,
103
+ "eval_f1": 0.9051275699777063,
104
+ "eval_loss": 0.8494298458099365,
105
+ "eval_precision": 0.8969072164948454,
106
+ "eval_recall": 0.9135,
107
+ "eval_runtime": 3.6561,
108
+ "eval_samples_per_second": 14.77,
109
+ "eval_steps_per_second": 1.094,
110
+ "step": 800
111
+ },
112
+ {
113
+ "epoch": 23.68,
114
+ "eval_accuracy": 0.8385392742305926,
115
+ "eval_f1": 0.907196029776675,
116
+ "eval_loss": 0.8700249195098877,
117
+ "eval_precision": 0.9004926108374385,
118
+ "eval_recall": 0.914,
119
+ "eval_runtime": 3.5188,
120
+ "eval_samples_per_second": 15.346,
121
+ "eval_steps_per_second": 1.137,
122
+ "step": 900
123
+ },
124
+ {
125
+ "epoch": 26.32,
126
+ "learning_rate": 0.0,
127
+ "loss": 0.061,
128
+ "step": 1000
129
+ },
130
+ {
131
+ "epoch": 26.32,
132
+ "eval_accuracy": 0.8407211759301791,
133
+ "eval_f1": 0.9107187266849044,
134
+ "eval_loss": 0.8859658241271973,
135
+ "eval_precision": 0.9059871350816427,
136
+ "eval_recall": 0.9155,
137
+ "eval_runtime": 3.6186,
138
+ "eval_samples_per_second": 14.923,
139
+ "eval_steps_per_second": 1.105,
140
+ "step": 1000
141
+ },
142
+ {
143
+ "epoch": 26.32,
144
+ "step": 1000,
145
+ "total_flos": 1040713895079936.0,
146
+ "train_loss": 0.23488603591918944,
147
+ "train_runtime": 622.7344,
148
+ "train_samples_per_second": 6.423,
149
+ "train_steps_per_second": 1.606
150
  }
151
  ],
152
+ "max_steps": 1000,
153
+ "num_train_epochs": 27,
154
+ "total_flos": 1040713895079936.0,
155
  "trial_name": null,
156
  "trial_params": null
157
  }