umer07 commited on
Commit
3acd484
·
verified ·
1 Parent(s): e1f1c05

Fathom: upload expert-e2-dynamic/training_log.json

Browse files
adapters/expert-e2-dynamic/training_log.json ADDED
@@ -0,0 +1,264 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "loss": 1.4307,
4
+ "grad_norm": 0.6940367817878723,
5
+ "learning_rate": 3e-05,
6
+ "entropy": 1.1788243889808654,
7
+ "num_tokens": 155850.0,
8
+ "mean_token_accuracy": 0.6935877948999405,
9
+ "epoch": 0.11764705882352941,
10
+ "step": 10
11
+ },
12
+ {
13
+ "loss": 1.1806,
14
+ "grad_norm": 0.7020200490951538,
15
+ "learning_rate": 6.333333333333333e-05,
16
+ "entropy": 1.2202600359916687,
17
+ "num_tokens": 312325.0,
18
+ "mean_token_accuracy": 0.7312566488981247,
19
+ "epoch": 0.23529411764705882,
20
+ "step": 20
21
+ },
22
+ {
23
+ "loss": 0.7607,
24
+ "grad_norm": 0.47733989357948303,
25
+ "learning_rate": 9.666666666666667e-05,
26
+ "entropy": 0.8012344032526016,
27
+ "num_tokens": 469039.0,
28
+ "mean_token_accuracy": 0.8055870264768601,
29
+ "epoch": 0.35294117647058826,
30
+ "step": 30
31
+ },
32
+ {
33
+ "loss": 0.5203,
34
+ "grad_norm": 0.45993906259536743,
35
+ "learning_rate": 9.96057350657239e-05,
36
+ "entropy": 0.5153448149561882,
37
+ "num_tokens": 623975.0,
38
+ "mean_token_accuracy": 0.8535495191812515,
39
+ "epoch": 0.47058823529411764,
40
+ "step": 40
41
+ },
42
+ {
43
+ "loss": 0.4475,
44
+ "grad_norm": 0.3403550088405609,
45
+ "learning_rate": 9.825082472361557e-05,
46
+ "entropy": 0.44883628338575365,
47
+ "num_tokens": 779685.0,
48
+ "mean_token_accuracy": 0.8701241254806519,
49
+ "epoch": 0.5882352941176471,
50
+ "step": 50
51
+ },
52
+ {
53
+ "loss": 0.4282,
54
+ "grad_norm": 0.2295123040676117,
55
+ "learning_rate": 9.595676696276172e-05,
56
+ "entropy": 0.43300508707761765,
57
+ "num_tokens": 936272.0,
58
+ "mean_token_accuracy": 0.874285313487053,
59
+ "epoch": 0.7058823529411765,
60
+ "step": 60
61
+ },
62
+ {
63
+ "loss": 0.4267,
64
+ "grad_norm": 0.17979678511619568,
65
+ "learning_rate": 9.276821300802534e-05,
66
+ "entropy": 0.4283462271094322,
67
+ "num_tokens": 1091178.0,
68
+ "mean_token_accuracy": 0.8739971458911896,
69
+ "epoch": 0.8235294117647058,
70
+ "step": 70
71
+ },
72
+ {
73
+ "loss": 0.4321,
74
+ "grad_norm": 0.14834725856781006,
75
+ "learning_rate": 8.874722443520899e-05,
76
+ "entropy": 0.4309688463807106,
77
+ "num_tokens": 1247181.0,
78
+ "mean_token_accuracy": 0.87317014336586,
79
+ "epoch": 0.9411764705882353,
80
+ "step": 80
81
+ },
82
+ {
83
+ "loss": 0.4183,
84
+ "grad_norm": 0.1518690288066864,
85
+ "learning_rate": 8.397206521307584e-05,
86
+ "entropy": 0.4187887296080589,
87
+ "num_tokens": 1399415.0,
88
+ "mean_token_accuracy": 0.8748669505119324,
89
+ "epoch": 1.0588235294117647,
90
+ "step": 90
91
+ },
92
+ {
93
+ "loss": 0.4285,
94
+ "grad_norm": 0.16970738768577576,
95
+ "learning_rate": 7.85356783842216e-05,
96
+ "entropy": 0.42983078211545944,
97
+ "num_tokens": 1552473.0,
98
+ "mean_token_accuracy": 0.8730897456407547,
99
+ "epoch": 1.1764705882352942,
100
+ "step": 100
101
+ },
102
+ {
103
+ "loss": 0.4141,
104
+ "grad_norm": 0.1731289178133011,
105
+ "learning_rate": 7.254387703447154e-05,
106
+ "entropy": 0.4201514065265656,
107
+ "num_tokens": 1709461.0,
108
+ "mean_token_accuracy": 0.875280225276947,
109
+ "epoch": 1.2941176470588236,
110
+ "step": 110
111
+ },
112
+ {
113
+ "loss": 0.4096,
114
+ "grad_norm": 0.1622275412082672,
115
+ "learning_rate": 6.611328476152557e-05,
116
+ "entropy": 0.41087806075811384,
117
+ "num_tokens": 1867531.0,
118
+ "mean_token_accuracy": 0.8777904808521271,
119
+ "epoch": 1.4117647058823528,
120
+ "step": 120
121
+ },
122
+ {
123
+ "loss": 0.4074,
124
+ "grad_norm": 0.15023751556873322,
125
+ "learning_rate": 5.9369065729286245e-05,
126
+ "entropy": 0.40929861813783647,
127
+ "num_tokens": 2023122.0,
128
+ "mean_token_accuracy": 0.8778889566659928,
129
+ "epoch": 1.5294117647058822,
130
+ "step": 130
131
+ },
132
+ {
133
+ "loss": 0.4091,
134
+ "grad_norm": 0.13255436718463898,
135
+ "learning_rate": 5.244248848978067e-05,
136
+ "entropy": 0.41203168481588365,
137
+ "num_tokens": 2178280.0,
138
+ "mean_token_accuracy": 0.8771576941013336,
139
+ "epoch": 1.6470588235294117,
140
+ "step": 140
141
+ },
142
+ {
143
+ "loss": 0.4027,
144
+ "grad_norm": 0.156217560172081,
145
+ "learning_rate": 4.5468370990111006e-05,
146
+ "entropy": 0.4050563558936119,
147
+ "num_tokens": 2335590.0,
148
+ "mean_token_accuracy": 0.8797280818223954,
149
+ "epoch": 1.7647058823529411,
150
+ "step": 150
151
+ },
152
+ {
153
+ "loss": 0.4089,
154
+ "grad_norm": 0.1703469306230545,
155
+ "learning_rate": 3.858245649446721e-05,
156
+ "entropy": 0.4095383077859879,
157
+ "num_tokens": 2490848.0,
158
+ "mean_token_accuracy": 0.8773213714361191,
159
+ "epoch": 1.8823529411764706,
160
+ "step": 160
161
+ },
162
+ {
163
+ "loss": 0.4052,
164
+ "grad_norm": 0.19920362532138824,
165
+ "learning_rate": 3.1918771495895396e-05,
166
+ "entropy": 0.4070162191987038,
167
+ "num_tokens": 2644022.0,
168
+ "mean_token_accuracy": 0.8788870364427567,
169
+ "epoch": 2.0,
170
+ "step": 170
171
+ },
172
+ {
173
+ "loss": 0.4031,
174
+ "grad_norm": 0.13015073537826538,
175
+ "learning_rate": 2.560701704306336e-05,
176
+ "entropy": 0.40551580488681793,
177
+ "num_tokens": 2800120.0,
178
+ "mean_token_accuracy": 0.8789848744869232,
179
+ "epoch": 2.1176470588235294,
180
+ "step": 180
181
+ },
182
+ {
183
+ "loss": 0.4067,
184
+ "grad_norm": 0.14793093502521515,
185
+ "learning_rate": 1.977004425688126e-05,
186
+ "entropy": 0.41181707233190534,
187
+ "num_tokens": 2955965.0,
188
+ "mean_token_accuracy": 0.877298504114151,
189
+ "epoch": 2.235294117647059,
190
+ "step": 190
191
+ },
192
+ {
193
+ "loss": 0.4079,
194
+ "grad_norm": 0.1416511833667755,
195
+ "learning_rate": 1.4521463173173965e-05,
196
+ "entropy": 0.41118668466806413,
197
+ "num_tokens": 3111112.0,
198
+ "mean_token_accuracy": 0.8776616156101227,
199
+ "epoch": 2.3529411764705883,
200
+ "step": 200
201
+ },
202
+ {
203
+ "loss": 0.3992,
204
+ "grad_norm": 0.14714457094669342,
205
+ "learning_rate": 9.963431452563332e-06,
206
+ "entropy": 0.4004165202379227,
207
+ "num_tokens": 3268703.0,
208
+ "mean_token_accuracy": 0.8799054056406022,
209
+ "epoch": 2.4705882352941178,
210
+ "step": 210
211
+ },
212
+ {
213
+ "loss": 0.4058,
214
+ "grad_norm": 0.14480003714561462,
215
+ "learning_rate": 6.184665997806832e-06,
216
+ "entropy": 0.4070618197321892,
217
+ "num_tokens": 3423830.0,
218
+ "mean_token_accuracy": 0.8782488256692886,
219
+ "epoch": 2.588235294117647,
220
+ "step": 220
221
+ },
222
+ {
223
+ "loss": 0.406,
224
+ "grad_norm": 0.14828461408615112,
225
+ "learning_rate": 3.258716180199278e-06,
226
+ "entropy": 0.4072370767593384,
227
+ "num_tokens": 3579594.0,
228
+ "mean_token_accuracy": 0.8782227575778961,
229
+ "epoch": 2.7058823529411766,
230
+ "step": 230
231
+ },
232
+ {
233
+ "loss": 0.4079,
234
+ "grad_norm": 0.16037017107009888,
235
+ "learning_rate": 1.2425322847218368e-06,
236
+ "entropy": 0.40918404459953306,
237
+ "num_tokens": 3734972.0,
238
+ "mean_token_accuracy": 0.8776972621679306,
239
+ "epoch": 2.8235294117647056,
240
+ "step": 240
241
+ },
242
+ {
243
+ "loss": 0.4035,
244
+ "grad_norm": 0.1524023711681366,
245
+ "learning_rate": 1.753570375247815e-07,
246
+ "entropy": 0.4051833271980286,
247
+ "num_tokens": 3891447.0,
248
+ "mean_token_accuracy": 0.8786362946033478,
249
+ "epoch": 2.9411764705882355,
250
+ "step": 250
251
+ },
252
+ {
253
+ "train_runtime": 1295.9489,
254
+ "train_samples_per_second": 6.28,
255
+ "train_steps_per_second": 0.197,
256
+ "total_flos": 1.3072916160774144e+18,
257
+ "train_loss": 0.5008774252498851,
258
+ "entropy": 0.406230491399765,
259
+ "num_tokens": 3966033.0,
260
+ "mean_token_accuracy": 0.8788680493831634,
261
+ "epoch": 3.0,
262
+ "step": 255
263
+ }
264
+ ]