File size: 47,289 Bytes
2bf6292
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
{"current_steps": 20, "total_steps": 4030, "loss": 2.826, "learning_rate": 2.3573200992555833e-06, "epoch": 0.32, "percentage": 0.5, "elapsed_time": "0:00:34", "remaining_time": "1:55:11", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 40, "total_steps": 4030, "loss": 2.72, "learning_rate": 4.838709677419355e-06, "epoch": 0.64, "percentage": 0.99, "elapsed_time": "0:01:06", "remaining_time": "1:51:00", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 60, "total_steps": 4030, "loss": 2.4912, "learning_rate": 7.320099255583126e-06, "epoch": 0.96, "percentage": 1.49, "elapsed_time": "0:01:40", "remaining_time": "1:50:44", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 80, "total_steps": 4030, "loss": 2.0561, "learning_rate": 9.801488833746898e-06, "epoch": 1.28, "percentage": 1.99, "elapsed_time": "0:02:15", "remaining_time": "1:51:10", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 100, "total_steps": 4030, "loss": 1.7744, "learning_rate": 1.2282878411910669e-05, "epoch": 1.6, "percentage": 2.48, "elapsed_time": "0:02:51", "remaining_time": "1:52:01", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 120, "total_steps": 4030, "loss": 1.8387, "learning_rate": 1.4764267990074444e-05, "epoch": 1.92, "percentage": 2.98, "elapsed_time": "0:03:25", "remaining_time": "1:51:28", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 140, "total_steps": 4030, "loss": 1.4478, "learning_rate": 1.7245657568238215e-05, "epoch": 2.24, "percentage": 3.47, "elapsed_time": "0:03:59", "remaining_time": "1:50:52", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 160, "total_steps": 4030, "loss": 1.285, "learning_rate": 1.9727047146401986e-05, "epoch": 2.56, "percentage": 3.97, "elapsed_time": "0:04:34", "remaining_time": "1:50:27", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 180, "total_steps": 4030, "loss": 1.2235, "learning_rate": 2.2208436724565757e-05, "epoch": 2.88, "percentage": 4.47, "elapsed_time": "0:05:08", "remaining_time": "1:50:02", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 200, "total_steps": 4030, "loss": 0.9575, "learning_rate": 2.468982630272953e-05, "epoch": 3.2, "percentage": 4.96, "elapsed_time": "0:05:40", "remaining_time": "1:48:48", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 220, "total_steps": 4030, "loss": 0.7086, "learning_rate": 2.7171215880893302e-05, "epoch": 3.52, "percentage": 5.46, "elapsed_time": "0:06:15", "remaining_time": "1:48:24", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 240, "total_steps": 4030, "loss": 0.8587, "learning_rate": 2.9652605459057077e-05, "epoch": 3.84, "percentage": 5.96, "elapsed_time": "0:06:50", "remaining_time": "1:48:03", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 260, "total_steps": 4030, "loss": 0.5978, "learning_rate": 3.2133995037220844e-05, "epoch": 4.16, "percentage": 6.45, "elapsed_time": "0:07:25", "remaining_time": "1:47:33", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 280, "total_steps": 4030, "loss": 0.4668, "learning_rate": 3.461538461538462e-05, "epoch": 4.48, "percentage": 6.95, "elapsed_time": "0:07:58", "remaining_time": "1:46:54", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 300, "total_steps": 4030, "loss": 0.5667, "learning_rate": 3.7096774193548386e-05, "epoch": 4.8, "percentage": 7.44, "elapsed_time": "0:08:32", "remaining_time": "1:46:10", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 320, "total_steps": 4030, "loss": 0.4373, "learning_rate": 3.957816377171216e-05, "epoch": 5.12, "percentage": 7.94, "elapsed_time": "0:09:06", "remaining_time": "1:45:31", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 340, "total_steps": 4030, "loss": 0.3492, "learning_rate": 4.205955334987593e-05, "epoch": 5.44, "percentage": 8.44, "elapsed_time": "0:09:39", "remaining_time": "1:44:54", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 360, "total_steps": 4030, "loss": 0.4018, "learning_rate": 4.45409429280397e-05, "epoch": 5.76, "percentage": 8.93, "elapsed_time": "0:10:14", "remaining_time": "1:44:19", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 380, "total_steps": 4030, "loss": 0.279, "learning_rate": 4.702233250620348e-05, "epoch": 6.08, "percentage": 9.43, "elapsed_time": "0:10:48", "remaining_time": "1:43:45", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 400, "total_steps": 4030, "loss": 0.2362, "learning_rate": 4.950372208436725e-05, "epoch": 6.4, "percentage": 9.93, "elapsed_time": "0:11:22", "remaining_time": "1:43:09", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 420, "total_steps": 4030, "loss": 0.2665, "learning_rate": 4.99975992459978e-05, "epoch": 6.72, "percentage": 10.42, "elapsed_time": "0:11:55", "remaining_time": "1:42:27", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 440, "total_steps": 4030, "loss": 0.2029, "learning_rate": 4.9987846973104825e-05, "epoch": 7.04, "percentage": 10.92, "elapsed_time": "0:12:29", "remaining_time": "1:41:58", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 460, "total_steps": 4030, "loss": 0.1747, "learning_rate": 4.9970596058519116e-05, "epoch": 7.36, "percentage": 11.41, "elapsed_time": "0:13:05", "remaining_time": "1:41:37", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 480, "total_steps": 4030, "loss": 0.1486, "learning_rate": 4.994585167909436e-05, "epoch": 7.68, "percentage": 11.91, "elapsed_time": "0:13:41", "remaining_time": "1:41:16", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 500, "total_steps": 4030, "loss": 0.1866, "learning_rate": 4.9913621260409695e-05, "epoch": 8.0, "percentage": 12.41, "elapsed_time": "0:14:17", "remaining_time": "1:40:57", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 520, "total_steps": 4030, "loss": 0.1476, "learning_rate": 4.987391447454136e-05, "epoch": 8.32, "percentage": 12.9, "elapsed_time": "0:14:54", "remaining_time": "1:40:37", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 540, "total_steps": 4030, "loss": 0.1403, "learning_rate": 4.982674323716023e-05, "epoch": 8.64, "percentage": 13.4, "elapsed_time": "0:15:31", "remaining_time": "1:40:18", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 560, "total_steps": 4030, "loss": 0.1018, "learning_rate": 4.977212170395598e-05, "epoch": 8.96, "percentage": 13.9, "elapsed_time": "0:16:08", "remaining_time": "1:40:00", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 580, "total_steps": 4030, "loss": 0.0992, "learning_rate": 4.9710066266389074e-05, "epoch": 9.28, "percentage": 14.39, "elapsed_time": "0:16:45", "remaining_time": "1:39:39", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 600, "total_steps": 4030, "loss": 0.1134, "learning_rate": 4.964059554677187e-05, "epoch": 9.6, "percentage": 14.89, "elapsed_time": "0:17:21", "remaining_time": "1:39:14", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 620, "total_steps": 4030, "loss": 0.0781, "learning_rate": 4.956373039268022e-05, "epoch": 9.92, "percentage": 15.38, "elapsed_time": "0:17:57", "remaining_time": "1:38:45", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 640, "total_steps": 4030, "loss": 0.0892, "learning_rate": 4.947949387069721e-05, "epoch": 10.24, "percentage": 15.88, "elapsed_time": "0:18:33", "remaining_time": "1:38:16", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 660, "total_steps": 4030, "loss": 0.0499, "learning_rate": 4.938791125949119e-05, "epoch": 10.56, "percentage": 16.38, "elapsed_time": "0:19:06", "remaining_time": "1:37:35", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 680, "total_steps": 4030, "loss": 0.0831, "learning_rate": 4.9289010042229765e-05, "epoch": 10.88, "percentage": 16.87, "elapsed_time": "0:19:40", "remaining_time": "1:36:56", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 700, "total_steps": 4030, "loss": 0.0715, "learning_rate": 4.918281989833238e-05, "epoch": 11.2, "percentage": 17.37, "elapsed_time": "0:20:14", "remaining_time": "1:36:19", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 720, "total_steps": 4030, "loss": 0.0718, "learning_rate": 4.9069372694563756e-05, "epoch": 11.52, "percentage": 17.87, "elapsed_time": "0:20:48", "remaining_time": "1:35:41", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 740, "total_steps": 4030, "loss": 0.0849, "learning_rate": 4.8948702475470933e-05, "epoch": 11.84, "percentage": 18.36, "elapsed_time": "0:21:22", "remaining_time": "1:35:03", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 760, "total_steps": 4030, "loss": 0.0683, "learning_rate": 4.882084545316684e-05, "epoch": 12.16, "percentage": 18.86, "elapsed_time": "0:21:56", "remaining_time": "1:34:25", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 780, "total_steps": 4030, "loss": 0.0808, "learning_rate": 4.868583999646329e-05, "epoch": 12.48, "percentage": 19.35, "elapsed_time": "0:22:30", "remaining_time": "1:33:48", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 800, "total_steps": 4030, "loss": 0.0607, "learning_rate": 4.8543726619356846e-05, "epoch": 12.8, "percentage": 19.85, "elapsed_time": "0:23:04", "remaining_time": "1:33:09", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 820, "total_steps": 4030, "loss": 0.062, "learning_rate": 4.83945479688709e-05, "epoch": 13.12, "percentage": 20.35, "elapsed_time": "0:23:38", "remaining_time": "1:32:32", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 840, "total_steps": 4030, "loss": 0.0461, "learning_rate": 4.8238348812257684e-05, "epoch": 13.44, "percentage": 20.84, "elapsed_time": "0:24:12", "remaining_time": "1:31:54", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 860, "total_steps": 4030, "loss": 0.0482, "learning_rate": 4.808349953928184e-05, "epoch": 13.76, "percentage": 21.34, "elapsed_time": "0:24:46", "remaining_time": "1:31:17", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 880, "total_steps": 4030, "loss": 0.0388, "learning_rate": 4.791374712344622e-05, "epoch": 14.08, "percentage": 21.84, "elapsed_time": "0:25:20", "remaining_time": "1:30:41", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 900, "total_steps": 4030, "loss": 0.0251, "learning_rate": 4.7737118485753564e-05, "epoch": 14.4, "percentage": 22.33, "elapsed_time": "0:25:54", "remaining_time": "1:30:05", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 920, "total_steps": 4030, "loss": 0.0515, "learning_rate": 4.75536666309653e-05, "epoch": 14.72, "percentage": 22.83, "elapsed_time": "0:26:27", "remaining_time": "1:29:27", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 940, "total_steps": 4030, "loss": 0.0536, "learning_rate": 4.73634466114326e-05, "epoch": 15.04, "percentage": 23.33, "elapsed_time": "0:27:01", "remaining_time": "1:28:51", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 960, "total_steps": 4030, "loss": 0.0392, "learning_rate": 4.7166515510575676e-05, "epoch": 15.36, "percentage": 23.82, "elapsed_time": "0:27:36", "remaining_time": "1:28:17", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 980, "total_steps": 4030, "loss": 0.0369, "learning_rate": 4.696293242575356e-05, "epoch": 15.68, "percentage": 24.32, "elapsed_time": "0:28:10", "remaining_time": "1:27:41", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 1000, "total_steps": 4030, "loss": 0.0651, "learning_rate": 4.675275845052942e-05, "epoch": 16.0, "percentage": 24.81, "elapsed_time": "0:28:45", "remaining_time": "1:27:07", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 1020, "total_steps": 4030, "loss": 0.037, "learning_rate": 4.6536056656336947e-05, "epoch": 16.32, "percentage": 25.31, "elapsed_time": "0:29:20", "remaining_time": "1:26:34", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 1040, "total_steps": 4030, "loss": 0.0272, "learning_rate": 4.631289207355313e-05, "epoch": 16.64, "percentage": 25.81, "elapsed_time": "0:29:53", "remaining_time": "1:25:57", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 1060, "total_steps": 4030, "loss": 0.0507, "learning_rate": 4.6083331671983185e-05, "epoch": 16.96, "percentage": 26.3, "elapsed_time": "0:30:27", "remaining_time": "1:25:19", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 1080, "total_steps": 4030, "loss": 0.0274, "learning_rate": 4.584744434076352e-05, "epoch": 17.28, "percentage": 26.8, "elapsed_time": "0:31:00", "remaining_time": "1:24:42", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 1100, "total_steps": 4030, "loss": 0.0565, "learning_rate": 4.560530086768863e-05, "epoch": 17.6, "percentage": 27.3, "elapsed_time": "0:31:33", "remaining_time": "1:24:04", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 1120, "total_steps": 4030, "loss": 0.0425, "learning_rate": 4.535697391796832e-05, "epoch": 17.92, "percentage": 27.79, "elapsed_time": "0:32:07", "remaining_time": "1:23:28", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 1140, "total_steps": 4030, "loss": 0.0273, "learning_rate": 4.510253801242147e-05, "epoch": 18.24, "percentage": 28.29, "elapsed_time": "0:32:41", "remaining_time": "1:22:52", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 1160, "total_steps": 4030, "loss": 0.0438, "learning_rate": 4.4842069505112984e-05, "epoch": 18.56, "percentage": 28.78, "elapsed_time": "0:33:15", "remaining_time": "1:22:17", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 1180, "total_steps": 4030, "loss": 0.0544, "learning_rate": 4.457564656044056e-05, "epoch": 18.88, "percentage": 29.28, "elapsed_time": "0:33:49", "remaining_time": "1:21:40", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 1200, "total_steps": 4030, "loss": 0.0283, "learning_rate": 4.430334912967824e-05, "epoch": 19.2, "percentage": 29.78, "elapsed_time": "0:34:23", "remaining_time": "1:21:05", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 1220, "total_steps": 4030, "loss": 0.0393, "learning_rate": 4.402525892698367e-05, "epoch": 19.52, "percentage": 30.27, "elapsed_time": "0:34:57", "remaining_time": "1:20:30", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 1240, "total_steps": 4030, "loss": 0.0249, "learning_rate": 4.374145940487641e-05, "epoch": 19.84, "percentage": 30.77, "elapsed_time": "0:35:30", "remaining_time": "1:19:53", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 1260, "total_steps": 4030, "loss": 0.0293, "learning_rate": 4.345203572919454e-05, "epoch": 20.16, "percentage": 31.27, "elapsed_time": "0:36:04", "remaining_time": "1:19:18", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 1280, "total_steps": 4030, "loss": 0.0287, "learning_rate": 4.315707475353706e-05, "epoch": 20.48, "percentage": 31.76, "elapsed_time": "0:36:38", "remaining_time": "1:18:43", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 1300, "total_steps": 4030, "loss": 0.0521, "learning_rate": 4.285666499319992e-05, "epoch": 20.8, "percentage": 32.26, "elapsed_time": "0:37:12", "remaining_time": "1:18:07", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 1320, "total_steps": 4030, "loss": 0.0285, "learning_rate": 4.25508965986133e-05, "epoch": 21.12, "percentage": 32.75, "elapsed_time": "0:37:46", "remaining_time": "1:17:32", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 1340, "total_steps": 4030, "loss": 0.0346, "learning_rate": 4.2239861328288214e-05, "epoch": 21.44, "percentage": 33.25, "elapsed_time": "0:38:19", "remaining_time": "1:16:56", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 1360, "total_steps": 4030, "loss": 0.022, "learning_rate": 4.1923652521280585e-05, "epoch": 21.76, "percentage": 33.75, "elapsed_time": "0:38:54", "remaining_time": "1:16:22", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 1380, "total_steps": 4030, "loss": 0.0482, "learning_rate": 4.160236506918098e-05, "epoch": 22.08, "percentage": 34.24, "elapsed_time": "0:39:28", "remaining_time": "1:15:48", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 1400, "total_steps": 4030, "loss": 0.019, "learning_rate": 4.127609538763842e-05, "epoch": 22.4, "percentage": 34.74, "elapsed_time": "0:40:02", "remaining_time": "1:15:12", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 1420, "total_steps": 4030, "loss": 0.0312, "learning_rate": 4.094494138742685e-05, "epoch": 22.72, "percentage": 35.24, "elapsed_time": "0:40:36", "remaining_time": "1:14:37", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 1440, "total_steps": 4030, "loss": 0.0377, "learning_rate": 4.0609002445063036e-05, "epoch": 23.04, "percentage": 35.73, "elapsed_time": "0:41:09", "remaining_time": "1:14:02", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 1460, "total_steps": 4030, "loss": 0.0307, "learning_rate": 4.02683793729844e-05, "epoch": 23.36, "percentage": 36.23, "elapsed_time": "0:41:43", "remaining_time": "1:13:26", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 1480, "total_steps": 4030, "loss": 0.0419, "learning_rate": 3.9923174389296085e-05, "epoch": 23.68, "percentage": 36.72, "elapsed_time": "0:42:16", "remaining_time": "1:12:51", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 1500, "total_steps": 4030, "loss": 0.0223, "learning_rate": 3.957349108709623e-05, "epoch": 24.0, "percentage": 37.22, "elapsed_time": "0:42:50", "remaining_time": "1:12:16", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 1520, "total_steps": 4030, "loss": 0.0209, "learning_rate": 3.921943440338849e-05, "epoch": 24.32, "percentage": 37.72, "elapsed_time": "0:43:24", "remaining_time": "1:11:40", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 1540, "total_steps": 4030, "loss": 0.0491, "learning_rate": 3.886111058759132e-05, "epoch": 24.64, "percentage": 38.21, "elapsed_time": "0:43:58", "remaining_time": "1:11:06", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 1560, "total_steps": 4030, "loss": 0.0298, "learning_rate": 3.849862716965352e-05, "epoch": 24.96, "percentage": 38.71, "elapsed_time": "0:44:32", "remaining_time": "1:10:31", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 1580, "total_steps": 4030, "loss": 0.0319, "learning_rate": 3.813209292778527e-05, "epoch": 25.28, "percentage": 39.21, "elapsed_time": "0:45:06", "remaining_time": "1:09:56", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 1600, "total_steps": 4030, "loss": 0.0302, "learning_rate": 3.776161785581481e-05, "epoch": 25.6, "percentage": 39.7, "elapsed_time": "0:45:40", "remaining_time": "1:09:22", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 1620, "total_steps": 4030, "loss": 0.04, "learning_rate": 3.738731313018019e-05, "epoch": 25.92, "percentage": 40.2, "elapsed_time": "0:46:14", "remaining_time": "1:08:47", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 1640, "total_steps": 4030, "loss": 0.0354, "learning_rate": 3.700929107656614e-05, "epoch": 26.24, "percentage": 40.69, "elapsed_time": "0:46:48", "remaining_time": "1:08:12", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 1660, "total_steps": 4030, "loss": 0.0186, "learning_rate": 3.662766513619611e-05, "epoch": 26.56, "percentage": 41.19, "elapsed_time": "0:47:22", "remaining_time": "1:07:38", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 1680, "total_steps": 4030, "loss": 0.022, "learning_rate": 3.62425498317895e-05, "epoch": 26.88, "percentage": 41.69, "elapsed_time": "0:47:57", "remaining_time": "1:07:04", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 1700, "total_steps": 4030, "loss": 0.015, "learning_rate": 3.585406073319439e-05, "epoch": 27.2, "percentage": 42.18, "elapsed_time": "0:48:31", "remaining_time": "1:06:30", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 1720, "total_steps": 4030, "loss": 0.0381, "learning_rate": 3.546231442270596e-05, "epoch": 27.52, "percentage": 42.68, "elapsed_time": "0:49:05", "remaining_time": "1:05:55", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 1740, "total_steps": 4030, "loss": 0.0277, "learning_rate": 3.506742846008116e-05, "epoch": 27.84, "percentage": 43.18, "elapsed_time": "0:49:39", "remaining_time": "1:05:20", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 1760, "total_steps": 4030, "loss": 0.0423, "learning_rate": 3.4669521347259996e-05, "epoch": 28.16, "percentage": 43.67, "elapsed_time": "0:50:12", "remaining_time": "1:04:45", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 1780, "total_steps": 4030, "loss": 0.0115, "learning_rate": 3.426871249280414e-05, "epoch": 28.48, "percentage": 44.17, "elapsed_time": "0:50:46", "remaining_time": "1:04:10", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 1800, "total_steps": 4030, "loss": 0.0275, "learning_rate": 3.386512217606339e-05, "epoch": 28.8, "percentage": 44.67, "elapsed_time": "0:51:19", "remaining_time": "1:03:35", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 1820, "total_steps": 4030, "loss": 0.0309, "learning_rate": 3.345887151108087e-05, "epoch": 29.12, "percentage": 45.16, "elapsed_time": "0:51:53", "remaining_time": "1:03:00", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 1840, "total_steps": 4030, "loss": 0.0294, "learning_rate": 3.305008241024774e-05, "epoch": 29.44, "percentage": 45.66, "elapsed_time": "0:52:26", "remaining_time": "1:02:25", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 1860, "total_steps": 4030, "loss": 0.0213, "learning_rate": 3.2638877547718264e-05, "epoch": 29.76, "percentage": 46.15, "elapsed_time": "0:53:00", "remaining_time": "1:01:50", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 1880, "total_steps": 4030, "loss": 0.0326, "learning_rate": 3.222538032259643e-05, "epoch": 30.08, "percentage": 46.65, "elapsed_time": "0:53:34", "remaining_time": "1:01:16", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 1900, "total_steps": 4030, "loss": 0.0249, "learning_rate": 3.1809714821904834e-05, "epoch": 30.4, "percentage": 47.15, "elapsed_time": "0:54:08", "remaining_time": "1:00:41", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 1920, "total_steps": 4030, "loss": 0.0115, "learning_rate": 3.1392005783347244e-05, "epoch": 30.72, "percentage": 47.64, "elapsed_time": "0:54:42", "remaining_time": "1:00:07", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 1940, "total_steps": 4030, "loss": 0.0322, "learning_rate": 3.0972378557875884e-05, "epoch": 31.04, "percentage": 48.14, "elapsed_time": "0:55:16", "remaining_time": "0:59:32", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 1960, "total_steps": 4030, "loss": 0.0316, "learning_rate": 3.055095907207465e-05, "epoch": 31.36, "percentage": 48.64, "elapsed_time": "0:55:50", "remaining_time": "0:58:58", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 1980, "total_steps": 4030, "loss": 0.0248, "learning_rate": 3.0127873790369627e-05, "epoch": 31.68, "percentage": 49.13, "elapsed_time": "0:56:24", "remaining_time": "0:58:24", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 2000, "total_steps": 4030, "loss": 0.0234, "learning_rate": 2.9703249677078156e-05, "epoch": 32.0, "percentage": 49.63, "elapsed_time": "0:56:58", "remaining_time": "0:57:49", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 2020, "total_steps": 4030, "loss": 0.0277, "learning_rate": 2.9277214158307937e-05, "epoch": 32.32, "percentage": 50.12, "elapsed_time": "0:57:33", "remaining_time": "0:57:16", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 2040, "total_steps": 4030, "loss": 0.0162, "learning_rate": 2.8849895083717537e-05, "epoch": 32.64, "percentage": 50.62, "elapsed_time": "0:58:07", "remaining_time": "0:56:41", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 2060, "total_steps": 4030, "loss": 0.022, "learning_rate": 2.842142068814977e-05, "epoch": 32.96, "percentage": 51.12, "elapsed_time": "0:58:41", "remaining_time": "0:56:08", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 2080, "total_steps": 4030, "loss": 0.0278, "learning_rate": 2.7991919553149497e-05, "epoch": 33.28, "percentage": 51.61, "elapsed_time": "0:59:15", "remaining_time": "0:55:33", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 2100, "total_steps": 4030, "loss": 0.0189, "learning_rate": 2.756152056837743e-05, "epoch": 33.6, "percentage": 52.11, "elapsed_time": "0:59:49", "remaining_time": "0:54:59", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 2120, "total_steps": 4030, "loss": 0.0228, "learning_rate": 2.7130352892931388e-05, "epoch": 33.92, "percentage": 52.61, "elapsed_time": "1:00:23", "remaining_time": "0:54:24", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 2140, "total_steps": 4030, "loss": 0.0319, "learning_rate": 2.669854591658679e-05, "epoch": 34.24, "percentage": 53.1, "elapsed_time": "1:00:56", "remaining_time": "0:53:49", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 2160, "total_steps": 4030, "loss": 0.0153, "learning_rate": 2.6266229220967818e-05, "epoch": 34.56, "percentage": 53.6, "elapsed_time": "1:01:29", "remaining_time": "0:53:14", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 2180, "total_steps": 4030, "loss": 0.0267, "learning_rate": 2.5833532540661127e-05, "epoch": 34.88, "percentage": 54.09, "elapsed_time": "1:02:03", "remaining_time": "0:52:39", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 2200, "total_steps": 4030, "loss": 0.0178, "learning_rate": 2.540058572428356e-05, "epoch": 35.2, "percentage": 54.59, "elapsed_time": "1:02:37", "remaining_time": "0:52:05", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 2220, "total_steps": 4030, "loss": 0.0217, "learning_rate": 2.496751869551567e-05, "epoch": 35.52, "percentage": 55.09, "elapsed_time": "1:03:10", "remaining_time": "0:51:30", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 2240, "total_steps": 4030, "loss": 0.017, "learning_rate": 2.453446141411273e-05, "epoch": 35.84, "percentage": 55.58, "elapsed_time": "1:03:44", "remaining_time": "0:50:56", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 2260, "total_steps": 4030, "loss": 0.0257, "learning_rate": 2.4101543836904938e-05, "epoch": 36.16, "percentage": 56.08, "elapsed_time": "1:04:18", "remaining_time": "0:50:21", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 2280, "total_steps": 4030, "loss": 0.0237, "learning_rate": 2.3668895878798424e-05, "epoch": 36.48, "percentage": 56.58, "elapsed_time": "1:04:52", "remaining_time": "0:49:47", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 2300, "total_steps": 4030, "loss": 0.024, "learning_rate": 2.32366473737889e-05, "epoch": 36.8, "percentage": 57.07, "elapsed_time": "1:05:25", "remaining_time": "0:49:12", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 2320, "total_steps": 4030, "loss": 0.0225, "learning_rate": 2.2804928035999594e-05, "epoch": 37.12, "percentage": 57.57, "elapsed_time": "1:06:00", "remaining_time": "0:48:39", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 2340, "total_steps": 4030, "loss": 0.0239, "learning_rate": 2.23738674207551e-05, "epoch": 37.44, "percentage": 58.06, "elapsed_time": "1:06:35", "remaining_time": "0:48:05", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 2360, "total_steps": 4030, "loss": 0.0235, "learning_rate": 2.1943594885702984e-05, "epoch": 37.76, "percentage": 58.56, "elapsed_time": "1:07:09", "remaining_time": "0:47:31", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 2380, "total_steps": 4030, "loss": 0.0286, "learning_rate": 2.151423955199456e-05, "epoch": 38.08, "percentage": 59.06, "elapsed_time": "1:07:43", "remaining_time": "0:46:57", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 2400, "total_steps": 4030, "loss": 0.0323, "learning_rate": 2.108593026553681e-05, "epoch": 38.4, "percentage": 59.55, "elapsed_time": "1:08:18", "remaining_time": "0:46:23", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 2420, "total_steps": 4030, "loss": 0.0241, "learning_rate": 2.0658795558326743e-05, "epoch": 38.72, "percentage": 60.05, "elapsed_time": "1:08:52", "remaining_time": "0:45:48", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 2440, "total_steps": 4030, "loss": 0.0158, "learning_rate": 2.0232963609880093e-05, "epoch": 39.04, "percentage": 60.55, "elapsed_time": "1:09:26", "remaining_time": "0:45:14", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 2460, "total_steps": 4030, "loss": 0.0241, "learning_rate": 1.9808562208765667e-05, "epoch": 39.36, "percentage": 61.04, "elapsed_time": "1:10:00", "remaining_time": "0:44:40", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 2480, "total_steps": 4030, "loss": 0.0174, "learning_rate": 1.938571871425715e-05, "epoch": 39.68, "percentage": 61.54, "elapsed_time": "1:10:34", "remaining_time": "0:44:06", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 2500, "total_steps": 4030, "loss": 0.0183, "learning_rate": 1.896456001811357e-05, "epoch": 40.0, "percentage": 62.03, "elapsed_time": "1:11:08", "remaining_time": "0:43:32", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 2520, "total_steps": 4030, "loss": 0.012, "learning_rate": 1.854521250650026e-05, "epoch": 40.32, "percentage": 62.53, "elapsed_time": "1:11:43", "remaining_time": "0:42:58", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 2540, "total_steps": 4030, "loss": 0.0225, "learning_rate": 1.8127802022061334e-05, "epoch": 40.64, "percentage": 63.03, "elapsed_time": "1:12:16", "remaining_time": "0:42:24", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 2560, "total_steps": 4030, "loss": 0.0391, "learning_rate": 1.7712453826155457e-05, "epoch": 40.96, "percentage": 63.52, "elapsed_time": "1:12:50", "remaining_time": "0:41:49", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 2580, "total_steps": 4030, "loss": 0.0229, "learning_rate": 1.72992925612659e-05, "epoch": 41.28, "percentage": 64.02, "elapsed_time": "1:13:24", "remaining_time": "0:41:15", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 2600, "total_steps": 4030, "loss": 0.015, "learning_rate": 1.688844221359645e-05, "epoch": 41.6, "percentage": 64.52, "elapsed_time": "1:13:59", "remaining_time": "0:40:41", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 2620, "total_steps": 4030, "loss": 0.0287, "learning_rate": 1.6480026075864163e-05, "epoch": 41.92, "percentage": 65.01, "elapsed_time": "1:14:33", "remaining_time": "0:40:07", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 2640, "total_steps": 4030, "loss": 0.0229, "learning_rate": 1.6074166710300247e-05, "epoch": 42.24, "percentage": 65.51, "elapsed_time": "1:15:08", "remaining_time": "0:39:33", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 2660, "total_steps": 4030, "loss": 0.0352, "learning_rate": 1.567098591187021e-05, "epoch": 42.56, "percentage": 66.0, "elapsed_time": "1:15:42", "remaining_time": "0:38:59", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 2680, "total_steps": 4030, "loss": 0.0242, "learning_rate": 1.5270604671724188e-05, "epoch": 42.88, "percentage": 66.5, "elapsed_time": "1:16:16", "remaining_time": "0:38:25", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 2700, "total_steps": 4030, "loss": 0.0165, "learning_rate": 1.4873143140888538e-05, "epoch": 43.2, "percentage": 67.0, "elapsed_time": "1:16:50", "remaining_time": "0:37:51", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 2720, "total_steps": 4030, "loss": 0.0274, "learning_rate": 1.4478720594209532e-05, "epoch": 43.52, "percentage": 67.49, "elapsed_time": "1:17:24", "remaining_time": "0:37:16", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 2740, "total_steps": 4030, "loss": 0.0185, "learning_rate": 1.4087455394559984e-05, "epoch": 43.84, "percentage": 67.99, "elapsed_time": "1:17:58", "remaining_time": "0:36:42", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 2760, "total_steps": 4030, "loss": 0.0509, "learning_rate": 1.369946495731954e-05, "epoch": 44.16, "percentage": 68.49, "elapsed_time": "1:18:31", "remaining_time": "0:36:08", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 2780, "total_steps": 4030, "loss": 0.027, "learning_rate": 1.3314865715139346e-05, "epoch": 44.48, "percentage": 68.98, "elapsed_time": "1:19:05", "remaining_time": "0:35:33", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 2800, "total_steps": 4030, "loss": 0.0163, "learning_rate": 1.2933773083001517e-05, "epoch": 44.8, "percentage": 69.48, "elapsed_time": "1:19:39", "remaining_time": "0:34:59", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 2820, "total_steps": 4030, "loss": 0.0125, "learning_rate": 1.255630142358421e-05, "epoch": 45.12, "percentage": 69.98, "elapsed_time": "1:20:14", "remaining_time": "0:34:25", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 2840, "total_steps": 4030, "loss": 0.0327, "learning_rate": 1.2182564012942193e-05, "epoch": 45.44, "percentage": 70.47, "elapsed_time": "1:20:49", "remaining_time": "0:33:51", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 2860, "total_steps": 4030, "loss": 0.0302, "learning_rate": 1.1812673006513789e-05, "epoch": 45.76, "percentage": 70.97, "elapsed_time": "1:21:23", "remaining_time": "0:33:17", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 2880, "total_steps": 4030, "loss": 0.0209, "learning_rate": 1.14467394054639e-05, "epoch": 46.08, "percentage": 71.46, "elapsed_time": "1:21:57", "remaining_time": "0:32:43", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 2900, "total_steps": 4030, "loss": 0.025, "learning_rate": 1.108487302337353e-05, "epoch": 46.4, "percentage": 71.96, "elapsed_time": "1:22:32", "remaining_time": "0:32:09", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 2920, "total_steps": 4030, "loss": 0.0284, "learning_rate": 1.0727182453285647e-05, "epoch": 46.72, "percentage": 72.46, "elapsed_time": "1:23:06", "remaining_time": "0:31:35", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 2940, "total_steps": 4030, "loss": 0.0174, "learning_rate": 1.0373775035117305e-05, "epoch": 47.04, "percentage": 72.95, "elapsed_time": "1:23:40", "remaining_time": "0:31:01", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 2960, "total_steps": 4030, "loss": 0.0115, "learning_rate": 1.002475682344792e-05, "epoch": 47.36, "percentage": 73.45, "elapsed_time": "1:24:15", "remaining_time": "0:30:27", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 2980, "total_steps": 4030, "loss": 0.0238, "learning_rate": 9.680232555693067e-06, "epoch": 47.68, "percentage": 73.95, "elapsed_time": "1:24:49", "remaining_time": "0:29:53", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 3000, "total_steps": 4030, "loss": 0.0294, "learning_rate": 9.340305620673778e-06, "epoch": 48.0, "percentage": 74.44, "elapsed_time": "1:25:23", "remaining_time": "0:29:19", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 3020, "total_steps": 4030, "loss": 0.0226, "learning_rate": 9.005078027590375e-06, "epoch": 48.32, "percentage": 74.94, "elapsed_time": "1:25:58", "remaining_time": "0:28:45", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 3040, "total_steps": 4030, "loss": 0.0196, "learning_rate": 8.67465037541038e-06, "epoch": 48.64, "percentage": 75.43, "elapsed_time": "1:26:32", "remaining_time": "0:28:11", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 3060, "total_steps": 4030, "loss": 0.0175, "learning_rate": 8.34912182267959e-06, "epoch": 48.96, "percentage": 75.93, "elapsed_time": "1:27:07", "remaining_time": "0:27:36", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 3080, "total_steps": 4030, "loss": 0.015, "learning_rate": 8.028590057765523e-06, "epoch": 49.28, "percentage": 76.43, "elapsed_time": "1:27:41", "remaining_time": "0:27:02", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 3100, "total_steps": 4030, "loss": 0.0221, "learning_rate": 7.713151269541844e-06, "epoch": 49.6, "percentage": 76.92, "elapsed_time": "1:28:15", "remaining_time": "0:26:28", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 3120, "total_steps": 4030, "loss": 0.0161, "learning_rate": 7.402900118522979e-06, "epoch": 49.92, "percentage": 77.42, "elapsed_time": "1:28:48", "remaining_time": "0:25:54", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 3140, "total_steps": 4030, "loss": 0.0237, "learning_rate": 7.097929708457282e-06, "epoch": 50.24, "percentage": 77.92, "elapsed_time": "1:29:23", "remaining_time": "0:25:20", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 3160, "total_steps": 4030, "loss": 0.0172, "learning_rate": 6.7983315583873695e-06, "epoch": 50.56, "percentage": 78.41, "elapsed_time": "1:29:56", "remaining_time": "0:24:45", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 3180, "total_steps": 4030, "loss": 0.0198, "learning_rate": 6.504195575186009e-06, "epoch": 50.88, "percentage": 78.91, "elapsed_time": "1:30:30", "remaining_time": "0:24:11", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 3200, "total_steps": 4030, "loss": 0.0227, "learning_rate": 6.215610026575916e-06, "epoch": 51.2, "percentage": 79.4, "elapsed_time": "1:31:04", "remaining_time": "0:23:37", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 3220, "total_steps": 4030, "loss": 0.0156, "learning_rate": 5.93266151464123e-06, "epoch": 51.52, "percentage": 79.9, "elapsed_time": "1:31:37", "remaining_time": "0:23:03", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 3240, "total_steps": 4030, "loss": 0.0268, "learning_rate": 5.655434949839061e-06, "epoch": 51.84, "percentage": 80.4, "elapsed_time": "1:32:12", "remaining_time": "0:22:28", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 3260, "total_steps": 4030, "loss": 0.0209, "learning_rate": 5.384013525518541e-06, "epoch": 52.16, "percentage": 80.89, "elapsed_time": "1:32:46", "remaining_time": "0:21:54", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 3280, "total_steps": 4030, "loss": 0.0202, "learning_rate": 5.118478692955194e-06, "epoch": 52.48, "percentage": 81.39, "elapsed_time": "1:33:19", "remaining_time": "0:21:20", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 3300, "total_steps": 4030, "loss": 0.0192, "learning_rate": 4.858910136908123e-06, "epoch": 52.8, "percentage": 81.89, "elapsed_time": "1:33:53", "remaining_time": "0:20:46", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 3320, "total_steps": 4030, "loss": 0.0205, "learning_rate": 4.605385751707248e-06, "epoch": 53.12, "percentage": 82.38, "elapsed_time": "1:34:27", "remaining_time": "0:20:12", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 3340, "total_steps": 4030, "loss": 0.0129, "learning_rate": 4.357981617877932e-06, "epoch": 53.44, "percentage": 82.88, "elapsed_time": "1:35:01", "remaining_time": "0:19:37", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 3360, "total_steps": 4030, "loss": 0.0258, "learning_rate": 4.116771979309797e-06, "epoch": 53.76, "percentage": 83.37, "elapsed_time": "1:35:35", "remaining_time": "0:19:03", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 3380, "total_steps": 4030, "loss": 0.0306, "learning_rate": 3.881829220976807e-06, "epoch": 54.08, "percentage": 83.87, "elapsed_time": "1:36:09", "remaining_time": "0:18:29", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 3400, "total_steps": 4030, "loss": 0.0198, "learning_rate": 3.653223847215126e-06, "epoch": 54.4, "percentage": 84.37, "elapsed_time": "1:36:43", "remaining_time": "0:17:55", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 3420, "total_steps": 4030, "loss": 0.0257, "learning_rate": 3.4310244605653797e-06, "epoch": 54.72, "percentage": 84.86, "elapsed_time": "1:37:17", "remaining_time": "0:17:21", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 3440, "total_steps": 4030, "loss": 0.0125, "learning_rate": 3.215297741185572e-06, "epoch": 55.04, "percentage": 85.36, "elapsed_time": "1:37:52", "remaining_time": "0:16:47", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 3460, "total_steps": 4030, "loss": 0.0124, "learning_rate": 3.0061084268410006e-06, "epoch": 55.36, "percentage": 85.86, "elapsed_time": "1:38:27", "remaining_time": "0:16:13", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 3480, "total_steps": 4030, "loss": 0.023, "learning_rate": 2.8035192934769362e-06, "epoch": 55.68, "percentage": 86.35, "elapsed_time": "1:39:01", "remaining_time": "0:15:39", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 3500, "total_steps": 4030, "loss": 0.0194, "learning_rate": 2.607591136380122e-06, "epoch": 56.0, "percentage": 86.85, "elapsed_time": "1:39:36", "remaining_time": "0:15:04", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 3520, "total_steps": 4030, "loss": 0.0162, "learning_rate": 2.4183827519346308e-06, "epoch": 56.32, "percentage": 87.34, "elapsed_time": "1:40:10", "remaining_time": "0:14:30", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 3540, "total_steps": 4030, "loss": 0.0337, "learning_rate": 2.235950919977545e-06, "epoch": 56.64, "percentage": 87.84, "elapsed_time": "1:40:44", "remaining_time": "0:13:56", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 3560, "total_steps": 4030, "loss": 0.0139, "learning_rate": 2.0603503867598182e-06, "epoch": 56.96, "percentage": 88.34, "elapsed_time": "1:41:18", "remaining_time": "0:13:22", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 3580, "total_steps": 4030, "loss": 0.0193, "learning_rate": 1.8916338485173823e-06, "epoch": 57.28, "percentage": 88.83, "elapsed_time": "1:41:52", "remaining_time": "0:12:48", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 3600, "total_steps": 4030, "loss": 0.0203, "learning_rate": 1.7298519356574727e-06, "epoch": 57.6, "percentage": 89.33, "elapsed_time": "1:42:28", "remaining_time": "0:12:14", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 3620, "total_steps": 4030, "loss": 0.0212, "learning_rate": 1.5750531975648324e-06, "epoch": 57.92, "percentage": 89.83, "elapsed_time": "1:43:02", "remaining_time": "0:11:40", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 3640, "total_steps": 4030, "loss": 0.0173, "learning_rate": 1.4272840880324934e-06, "epoch": 58.24, "percentage": 90.32, "elapsed_time": "1:43:36", "remaining_time": "0:11:06", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 3660, "total_steps": 4030, "loss": 0.0139, "learning_rate": 1.286588951321363e-06, "epoch": 58.56, "percentage": 90.82, "elapsed_time": "1:44:10", "remaining_time": "0:10:31", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 3680, "total_steps": 4030, "loss": 0.0268, "learning_rate": 1.1530100088528867e-06, "epoch": 58.88, "percentage": 91.32, "elapsed_time": "1:44:45", "remaining_time": "0:09:57", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 3700, "total_steps": 4030, "loss": 0.0191, "learning_rate": 1.0265873465387516e-06, "epoch": 59.2, "percentage": 91.81, "elapsed_time": "1:45:18", "remaining_time": "0:09:23", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 3720, "total_steps": 4030, "loss": 0.0168, "learning_rate": 9.073589027514789e-07, "epoch": 59.52, "percentage": 92.31, "elapsed_time": "1:45:53", "remaining_time": "0:08:49", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 3740, "total_steps": 4030, "loss": 0.0246, "learning_rate": 7.953604569393841e-07, "epoch": 59.84, "percentage": 92.8, "elapsed_time": "1:46:27", "remaining_time": "0:08:15", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 3760, "total_steps": 4030, "loss": 0.019, "learning_rate": 6.906256188895038e-07, "epoch": 60.16, "percentage": 93.3, "elapsed_time": "1:47:00", "remaining_time": "0:07:41", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 3780, "total_steps": 4030, "loss": 0.0168, "learning_rate": 5.931858186415756e-07, "epoch": 60.48, "percentage": 93.8, "elapsed_time": "1:47:34", "remaining_time": "0:07:06", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 3800, "total_steps": 4030, "loss": 0.0197, "learning_rate": 5.03070297056149e-07, "epoch": 60.8, "percentage": 94.29, "elapsed_time": "1:48:08", "remaining_time": "0:06:32", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 3820, "total_steps": 4030, "loss": 0.0207, "learning_rate": 4.203060970396383e-07, "epoch": 61.12, "percentage": 94.79, "elapsed_time": "1:48:42", "remaining_time": "0:05:58", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 3840, "total_steps": 4030, "loss": 0.0224, "learning_rate": 3.4491805542899157e-07, "epoch": 61.44, "percentage": 95.29, "elapsed_time": "1:49:16", "remaining_time": "0:05:24", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 3860, "total_steps": 4030, "loss": 0.0151, "learning_rate": 2.769287955383532e-07, "epoch": 61.76, "percentage": 95.78, "elapsed_time": "1:49:51", "remaining_time": "0:04:50", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 3880, "total_steps": 4030, "loss": 0.0284, "learning_rate": 2.1635872037001626e-07, "epoch": 62.08, "percentage": 96.28, "elapsed_time": "1:50:25", "remaining_time": "0:04:16", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 3900, "total_steps": 4030, "loss": 0.0217, "learning_rate": 1.6322600649162356e-07, "epoch": 62.4, "percentage": 96.77, "elapsed_time": "1:50:59", "remaining_time": "0:03:41", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 3920, "total_steps": 4030, "loss": 0.0103, "learning_rate": 1.1754659858156659e-07, "epoch": 62.72, "percentage": 97.27, "elapsed_time": "1:51:34", "remaining_time": "0:03:07", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 3940, "total_steps": 4030, "loss": 0.0333, "learning_rate": 7.933420464410201e-08, "epoch": 63.04, "percentage": 97.77, "elapsed_time": "1:52:08", "remaining_time": "0:02:33", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 3960, "total_steps": 4030, "loss": 0.0231, "learning_rate": 4.860029189569237e-08, "epoch": 63.36, "percentage": 98.26, "elapsed_time": "1:52:42", "remaining_time": "0:01:59", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 3980, "total_steps": 4030, "loss": 0.0226, "learning_rate": 2.535408332381417e-08, "epoch": 63.68, "percentage": 98.76, "elapsed_time": "1:53:16", "remaining_time": "0:01:25", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 4000, "total_steps": 4030, "loss": 0.0218, "learning_rate": 9.60255491919415e-09, "epoch": 64.0, "percentage": 99.26, "elapsed_time": "1:53:50", "remaining_time": "0:00:51", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 4020, "total_steps": 4030, "loss": 0.0219, "learning_rate": 1.3504335823810722e-09, "epoch": 64.32, "percentage": 99.75, "elapsed_time": "1:54:25", "remaining_time": "0:00:17", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 4030, "total_steps": 4030, "epoch": 64.48, "percentage": 100.0, "elapsed_time": "1:54:42", "remaining_time": "0:00:00", "throughput": "0.00", "total_tokens": 0}