QuentinKemperino commited on
Commit
fdc942b
1 Parent(s): 7afe171

Training complete

Browse files
Files changed (3) hide show
  1. all_results.json +8 -0
  2. train_results.json +8 -0
  3. trainer_state.json +197 -0
all_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 2.0,
3
+ "total_flos": 719017869312000.0,
4
+ "train_loss": 0.19799817996554905,
5
+ "train_runtime": 356.7954,
6
+ "train_samples_per_second": 50.449,
7
+ "train_steps_per_second": 6.306
8
+ }
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 2.0,
3
+ "total_flos": 719017869312000.0,
4
+ "train_loss": 0.19799817996554905,
5
+ "train_runtime": 356.7954,
6
+ "train_samples_per_second": 50.449,
7
+ "train_steps_per_second": 6.306
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,197 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 2.0,
5
+ "global_step": 2250,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.09,
12
+ "learning_rate": 0.0009555555555555556,
13
+ "loss": 0.2956,
14
+ "step": 100
15
+ },
16
+ {
17
+ "epoch": 0.18,
18
+ "learning_rate": 0.0009111111111111111,
19
+ "loss": 0.2635,
20
+ "step": 200
21
+ },
22
+ {
23
+ "epoch": 0.27,
24
+ "learning_rate": 0.0008666666666666667,
25
+ "loss": 0.2384,
26
+ "step": 300
27
+ },
28
+ {
29
+ "epoch": 0.36,
30
+ "learning_rate": 0.0008222222222222222,
31
+ "loss": 0.2234,
32
+ "step": 400
33
+ },
34
+ {
35
+ "epoch": 0.44,
36
+ "learning_rate": 0.0007777777777777778,
37
+ "loss": 0.2152,
38
+ "step": 500
39
+ },
40
+ {
41
+ "epoch": 0.44,
42
+ "eval_loss": 0.3215401768684387,
43
+ "eval_macro-f1": 0.36252944248976143,
44
+ "eval_micro-f1": 0.5540940480105229,
45
+ "eval_runtime": 14.835,
46
+ "eval_samples_per_second": 67.408,
47
+ "eval_steps_per_second": 8.426,
48
+ "step": 500
49
+ },
50
+ {
51
+ "epoch": 0.53,
52
+ "learning_rate": 0.0007333333333333333,
53
+ "loss": 0.2043,
54
+ "step": 600
55
+ },
56
+ {
57
+ "epoch": 0.62,
58
+ "learning_rate": 0.000688888888888889,
59
+ "loss": 0.2019,
60
+ "step": 700
61
+ },
62
+ {
63
+ "epoch": 0.71,
64
+ "learning_rate": 0.0006444444444444444,
65
+ "loss": 0.2058,
66
+ "step": 800
67
+ },
68
+ {
69
+ "epoch": 0.8,
70
+ "learning_rate": 0.0006,
71
+ "loss": 0.2043,
72
+ "step": 900
73
+ },
74
+ {
75
+ "epoch": 0.89,
76
+ "learning_rate": 0.0005555555555555556,
77
+ "loss": 0.1826,
78
+ "step": 1000
79
+ },
80
+ {
81
+ "epoch": 0.89,
82
+ "eval_loss": 0.3090105652809143,
83
+ "eval_macro-f1": 0.40861377528604365,
84
+ "eval_micro-f1": 0.5695569729357007,
85
+ "eval_runtime": 14.9867,
86
+ "eval_samples_per_second": 66.726,
87
+ "eval_steps_per_second": 8.341,
88
+ "step": 1000
89
+ },
90
+ {
91
+ "epoch": 0.98,
92
+ "learning_rate": 0.0005111111111111111,
93
+ "loss": 0.1933,
94
+ "step": 1100
95
+ },
96
+ {
97
+ "epoch": 1.07,
98
+ "learning_rate": 0.00046666666666666666,
99
+ "loss": 0.1791,
100
+ "step": 1200
101
+ },
102
+ {
103
+ "epoch": 1.16,
104
+ "learning_rate": 0.0004222222222222222,
105
+ "loss": 0.1879,
106
+ "step": 1300
107
+ },
108
+ {
109
+ "epoch": 1.24,
110
+ "learning_rate": 0.00037777777777777777,
111
+ "loss": 0.1739,
112
+ "step": 1400
113
+ },
114
+ {
115
+ "epoch": 1.33,
116
+ "learning_rate": 0.0003333333333333333,
117
+ "loss": 0.18,
118
+ "step": 1500
119
+ },
120
+ {
121
+ "epoch": 1.33,
122
+ "eval_loss": 0.2883451581001282,
123
+ "eval_macro-f1": 0.4507416671361624,
124
+ "eval_micro-f1": 0.5963412549153702,
125
+ "eval_runtime": 15.127,
126
+ "eval_samples_per_second": 66.107,
127
+ "eval_steps_per_second": 8.263,
128
+ "step": 1500
129
+ },
130
+ {
131
+ "epoch": 1.42,
132
+ "learning_rate": 0.0002888888888888889,
133
+ "loss": 0.179,
134
+ "step": 1600
135
+ },
136
+ {
137
+ "epoch": 1.51,
138
+ "learning_rate": 0.00024444444444444443,
139
+ "loss": 0.1729,
140
+ "step": 1700
141
+ },
142
+ {
143
+ "epoch": 1.6,
144
+ "learning_rate": 0.0002,
145
+ "loss": 0.1754,
146
+ "step": 1800
147
+ },
148
+ {
149
+ "epoch": 1.69,
150
+ "learning_rate": 0.00015555555555555556,
151
+ "loss": 0.1724,
152
+ "step": 1900
153
+ },
154
+ {
155
+ "epoch": 1.78,
156
+ "learning_rate": 0.0001111111111111111,
157
+ "loss": 0.1669,
158
+ "step": 2000
159
+ },
160
+ {
161
+ "epoch": 1.78,
162
+ "eval_loss": 0.27745190262794495,
163
+ "eval_macro-f1": 0.4739843869881502,
164
+ "eval_micro-f1": 0.6247038917089679,
165
+ "eval_runtime": 15.1069,
166
+ "eval_samples_per_second": 66.195,
167
+ "eval_steps_per_second": 8.274,
168
+ "step": 2000
169
+ },
170
+ {
171
+ "epoch": 1.87,
172
+ "learning_rate": 6.666666666666667e-05,
173
+ "loss": 0.1794,
174
+ "step": 2100
175
+ },
176
+ {
177
+ "epoch": 1.96,
178
+ "learning_rate": 2.2222222222222223e-05,
179
+ "loss": 0.1755,
180
+ "step": 2200
181
+ },
182
+ {
183
+ "epoch": 2.0,
184
+ "step": 2250,
185
+ "total_flos": 719017869312000.0,
186
+ "train_loss": 0.19799817996554905,
187
+ "train_runtime": 356.7954,
188
+ "train_samples_per_second": 50.449,
189
+ "train_steps_per_second": 6.306
190
+ }
191
+ ],
192
+ "max_steps": 2250,
193
+ "num_train_epochs": 2,
194
+ "total_flos": 719017869312000.0,
195
+ "trial_name": null,
196
+ "trial_params": null
197
+ }