QuentinKemperino commited on
Commit
7e6c576
1 Parent(s): be677de

Training complete

Browse files
Files changed (3) hide show
  1. all_results.json +5 -5
  2. train_results.json +5 -5
  3. trainer_state.json +74 -74
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 2.0,
3
- "total_flos": 719017869312000.0,
4
- "train_loss": 0.19799817996554905,
5
- "train_runtime": 356.7954,
6
- "train_samples_per_second": 50.449,
7
- "train_steps_per_second": 6.306
8
  }
 
1
  {
2
  "epoch": 2.0,
3
+ "total_flos": 8.8324576837632e+16,
4
+ "train_loss": 0.18118783251444498,
5
+ "train_runtime": 3615.8229,
6
+ "train_samples_per_second": 4.978,
7
+ "train_steps_per_second": 0.622
8
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 2.0,
3
- "total_flos": 719017869312000.0,
4
- "train_loss": 0.19799817996554905,
5
- "train_runtime": 356.7954,
6
- "train_samples_per_second": 50.449,
7
- "train_steps_per_second": 6.306
8
  }
 
1
  {
2
  "epoch": 2.0,
3
+ "total_flos": 8.8324576837632e+16,
4
+ "train_loss": 0.18118783251444498,
5
+ "train_runtime": 3615.8229,
6
+ "train_samples_per_second": 4.978,
7
+ "train_steps_per_second": 0.622
8
  }
trainer_state.json CHANGED
@@ -9,189 +9,189 @@
9
  "log_history": [
10
  {
11
  "epoch": 0.09,
12
- "learning_rate": 0.0009555555555555556,
13
- "loss": 0.2956,
14
  "step": 100
15
  },
16
  {
17
  "epoch": 0.18,
18
- "learning_rate": 0.0009111111111111111,
19
- "loss": 0.2635,
20
  "step": 200
21
  },
22
  {
23
  "epoch": 0.27,
24
- "learning_rate": 0.0008666666666666667,
25
- "loss": 0.2384,
26
  "step": 300
27
  },
28
  {
29
  "epoch": 0.36,
30
- "learning_rate": 0.0008222222222222222,
31
- "loss": 0.2234,
32
  "step": 400
33
  },
34
  {
35
  "epoch": 0.44,
36
- "learning_rate": 0.0007777777777777778,
37
- "loss": 0.2152,
38
  "step": 500
39
  },
40
  {
41
  "epoch": 0.44,
42
- "eval_loss": 0.3215401768684387,
43
- "eval_macro-f1": 0.36252944248976143,
44
- "eval_micro-f1": 0.5540940480105229,
45
- "eval_runtime": 14.835,
46
- "eval_samples_per_second": 67.408,
47
- "eval_steps_per_second": 8.426,
48
  "step": 500
49
  },
50
  {
51
  "epoch": 0.53,
52
- "learning_rate": 0.0007333333333333333,
53
- "loss": 0.2043,
54
  "step": 600
55
  },
56
  {
57
  "epoch": 0.62,
58
- "learning_rate": 0.000688888888888889,
59
- "loss": 0.2019,
60
  "step": 700
61
  },
62
  {
63
  "epoch": 0.71,
64
- "learning_rate": 0.0006444444444444444,
65
- "loss": 0.2058,
66
  "step": 800
67
  },
68
  {
69
  "epoch": 0.8,
70
- "learning_rate": 0.0006,
71
- "loss": 0.2043,
72
  "step": 900
73
  },
74
  {
75
  "epoch": 0.89,
76
- "learning_rate": 0.0005555555555555556,
77
- "loss": 0.1826,
78
  "step": 1000
79
  },
80
  {
81
  "epoch": 0.89,
82
- "eval_loss": 0.3090105652809143,
83
- "eval_macro-f1": 0.40861377528604365,
84
- "eval_micro-f1": 0.5695569729357007,
85
- "eval_runtime": 14.9867,
86
- "eval_samples_per_second": 66.726,
87
- "eval_steps_per_second": 8.341,
88
  "step": 1000
89
  },
90
  {
91
  "epoch": 0.98,
92
- "learning_rate": 0.0005111111111111111,
93
- "loss": 0.1933,
94
  "step": 1100
95
  },
96
  {
97
  "epoch": 1.07,
98
- "learning_rate": 0.00046666666666666666,
99
- "loss": 0.1791,
100
  "step": 1200
101
  },
102
  {
103
  "epoch": 1.16,
104
- "learning_rate": 0.0004222222222222222,
105
- "loss": 0.1879,
106
  "step": 1300
107
  },
108
  {
109
  "epoch": 1.24,
110
- "learning_rate": 0.00037777777777777777,
111
- "loss": 0.1739,
112
  "step": 1400
113
  },
114
  {
115
  "epoch": 1.33,
116
- "learning_rate": 0.0003333333333333333,
117
- "loss": 0.18,
118
  "step": 1500
119
  },
120
  {
121
  "epoch": 1.33,
122
- "eval_loss": 0.2883451581001282,
123
- "eval_macro-f1": 0.4507416671361624,
124
- "eval_micro-f1": 0.5963412549153702,
125
- "eval_runtime": 15.127,
126
- "eval_samples_per_second": 66.107,
127
- "eval_steps_per_second": 8.263,
128
  "step": 1500
129
  },
130
  {
131
  "epoch": 1.42,
132
- "learning_rate": 0.0002888888888888889,
133
- "loss": 0.179,
134
  "step": 1600
135
  },
136
  {
137
  "epoch": 1.51,
138
- "learning_rate": 0.00024444444444444443,
139
- "loss": 0.1729,
140
  "step": 1700
141
  },
142
  {
143
  "epoch": 1.6,
144
- "learning_rate": 0.0002,
145
- "loss": 0.1754,
146
  "step": 1800
147
  },
148
  {
149
  "epoch": 1.69,
150
- "learning_rate": 0.00015555555555555556,
151
- "loss": 0.1724,
152
  "step": 1900
153
  },
154
  {
155
  "epoch": 1.78,
156
- "learning_rate": 0.0001111111111111111,
157
- "loss": 0.1669,
158
  "step": 2000
159
  },
160
  {
161
  "epoch": 1.78,
162
- "eval_loss": 0.27745190262794495,
163
- "eval_macro-f1": 0.4739843869881502,
164
- "eval_micro-f1": 0.6247038917089679,
165
- "eval_runtime": 15.1069,
166
- "eval_samples_per_second": 66.195,
167
- "eval_steps_per_second": 8.274,
168
  "step": 2000
169
  },
170
  {
171
  "epoch": 1.87,
172
- "learning_rate": 6.666666666666667e-05,
173
- "loss": 0.1794,
174
  "step": 2100
175
  },
176
  {
177
  "epoch": 1.96,
178
- "learning_rate": 2.2222222222222223e-05,
179
- "loss": 0.1755,
180
  "step": 2200
181
  },
182
  {
183
  "epoch": 2.0,
184
  "step": 2250,
185
- "total_flos": 719017869312000.0,
186
- "train_loss": 0.19799817996554905,
187
- "train_runtime": 356.7954,
188
- "train_samples_per_second": 50.449,
189
- "train_steps_per_second": 6.306
190
  }
191
  ],
192
  "max_steps": 2250,
193
  "num_train_epochs": 2,
194
- "total_flos": 719017869312000.0,
195
  "trial_name": null,
196
  "trial_params": null
197
  }
 
9
  "log_history": [
10
  {
11
  "epoch": 0.09,
12
+ "learning_rate": 9.555555555555557e-05,
13
+ "loss": 0.2797,
14
  "step": 100
15
  },
16
  {
17
  "epoch": 0.18,
18
+ "learning_rate": 9.111111111111112e-05,
19
+ "loss": 0.2685,
20
  "step": 200
21
  },
22
  {
23
  "epoch": 0.27,
24
+ "learning_rate": 8.666666666666667e-05,
25
+ "loss": 0.233,
26
  "step": 300
27
  },
28
  {
29
  "epoch": 0.36,
30
+ "learning_rate": 8.222222222222222e-05,
31
+ "loss": 0.2162,
32
  "step": 400
33
  },
34
  {
35
  "epoch": 0.44,
36
+ "learning_rate": 7.777777777777778e-05,
37
+ "loss": 0.2058,
38
  "step": 500
39
  },
40
  {
41
  "epoch": 0.44,
42
+ "eval_loss": 0.27426090836524963,
43
+ "eval_macro-f1": 0.30734886729341776,
44
+ "eval_micro-f1": 0.45783132530120485,
45
+ "eval_runtime": 154.032,
46
+ "eval_samples_per_second": 6.492,
47
+ "eval_steps_per_second": 0.812,
48
  "step": 500
49
  },
50
  {
51
  "epoch": 0.53,
52
+ "learning_rate": 7.333333333333333e-05,
53
+ "loss": 0.1897,
54
  "step": 600
55
  },
56
  {
57
  "epoch": 0.62,
58
+ "learning_rate": 6.88888888888889e-05,
59
+ "loss": 0.1881,
60
  "step": 700
61
  },
62
  {
63
  "epoch": 0.71,
64
+ "learning_rate": 6.444444444444446e-05,
65
+ "loss": 0.1913,
66
  "step": 800
67
  },
68
  {
69
  "epoch": 0.8,
70
+ "learning_rate": 6e-05,
71
+ "loss": 0.1777,
72
  "step": 900
73
  },
74
  {
75
  "epoch": 0.89,
76
+ "learning_rate": 5.555555555555556e-05,
77
+ "loss": 0.1583,
78
  "step": 1000
79
  },
80
  {
81
  "epoch": 0.89,
82
+ "eval_loss": 0.2576006054878235,
83
+ "eval_macro-f1": 0.33354478819865846,
84
+ "eval_micro-f1": 0.5014016820184222,
85
+ "eval_runtime": 153.9992,
86
+ "eval_samples_per_second": 6.494,
87
+ "eval_steps_per_second": 0.812,
88
  "step": 1000
89
  },
90
  {
91
  "epoch": 0.98,
92
+ "learning_rate": 5.111111111111111e-05,
93
+ "loss": 0.1776,
94
  "step": 1100
95
  },
96
  {
97
  "epoch": 1.07,
98
+ "learning_rate": 4.666666666666667e-05,
99
+ "loss": 0.1606,
100
  "step": 1200
101
  },
102
  {
103
  "epoch": 1.16,
104
+ "learning_rate": 4.222222222222222e-05,
105
+ "loss": 0.1658,
106
  "step": 1300
107
  },
108
  {
109
  "epoch": 1.24,
110
+ "learning_rate": 3.777777777777778e-05,
111
+ "loss": 0.1532,
112
  "step": 1400
113
  },
114
  {
115
  "epoch": 1.33,
116
+ "learning_rate": 3.3333333333333335e-05,
117
+ "loss": 0.1602,
118
  "step": 1500
119
  },
120
  {
121
  "epoch": 1.33,
122
+ "eval_loss": 0.2343183010816574,
123
+ "eval_macro-f1": 0.36320960253779505,
124
+ "eval_micro-f1": 0.5341176470588236,
125
+ "eval_runtime": 153.838,
126
+ "eval_samples_per_second": 6.5,
127
+ "eval_steps_per_second": 0.813,
128
  "step": 1500
129
  },
130
  {
131
  "epoch": 1.42,
132
+ "learning_rate": 2.8888888888888888e-05,
133
+ "loss": 0.1647,
134
  "step": 1600
135
  },
136
  {
137
  "epoch": 1.51,
138
+ "learning_rate": 2.4444444444444445e-05,
139
+ "loss": 0.1588,
140
  "step": 1700
141
  },
142
  {
143
  "epoch": 1.6,
144
+ "learning_rate": 2e-05,
145
+ "loss": 0.1513,
146
  "step": 1800
147
  },
148
  {
149
  "epoch": 1.69,
150
+ "learning_rate": 1.5555555555555555e-05,
151
+ "loss": 0.1518,
152
  "step": 1900
153
  },
154
  {
155
  "epoch": 1.78,
156
+ "learning_rate": 1.1111111111111112e-05,
157
+ "loss": 0.1474,
158
  "step": 2000
159
  },
160
  {
161
  "epoch": 1.78,
162
+ "eval_loss": 0.2262774258852005,
163
+ "eval_macro-f1": 0.3867760365271726,
164
+ "eval_micro-f1": 0.546868922598211,
165
+ "eval_runtime": 154.1091,
166
+ "eval_samples_per_second": 6.489,
167
+ "eval_steps_per_second": 0.811,
168
  "step": 2000
169
  },
170
  {
171
  "epoch": 1.87,
172
+ "learning_rate": 6.666666666666667e-06,
173
+ "loss": 0.155,
174
  "step": 2100
175
  },
176
  {
177
  "epoch": 1.96,
178
+ "learning_rate": 2.2222222222222225e-06,
179
+ "loss": 0.1522,
180
  "step": 2200
181
  },
182
  {
183
  "epoch": 2.0,
184
  "step": 2250,
185
+ "total_flos": 8.8324576837632e+16,
186
+ "train_loss": 0.18118783251444498,
187
+ "train_runtime": 3615.8229,
188
+ "train_samples_per_second": 4.978,
189
+ "train_steps_per_second": 0.622
190
  }
191
  ],
192
  "max_steps": 2250,
193
  "num_train_epochs": 2,
194
+ "total_flos": 8.8324576837632e+16,
195
  "trial_name": null,
196
  "trial_params": null
197
  }