houck2040 commited on
Commit
ffd2eb8
1 Parent(s): 9476e94

Upload trainer_state.json

Browse files
Files changed (1) hide show
  1. trainer_state.json +205 -0
trainer_state.json ADDED
@@ -0,0 +1,205 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 67.53246753246754,
5
+ "eval_steps": 500,
6
+ "global_step": 78000,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 2.16,
13
+ "learning_rate": 9.892164502164502e-06,
14
+ "loss": 0.8489,
15
+ "step": 2500
16
+ },
17
+ {
18
+ "epoch": 4.33,
19
+ "learning_rate": 9.783982683982685e-06,
20
+ "loss": 0.6312,
21
+ "step": 5000
22
+ },
23
+ {
24
+ "epoch": 6.49,
25
+ "learning_rate": 9.675757575757577e-06,
26
+ "loss": 0.5488,
27
+ "step": 7500
28
+ },
29
+ {
30
+ "epoch": 8.66,
31
+ "learning_rate": 9.56757575757576e-06,
32
+ "loss": 0.5403,
33
+ "step": 10000
34
+ },
35
+ {
36
+ "epoch": 10.82,
37
+ "learning_rate": 9.45939393939394e-06,
38
+ "loss": 0.5208,
39
+ "step": 12500
40
+ },
41
+ {
42
+ "epoch": 12.99,
43
+ "learning_rate": 9.351212121212121e-06,
44
+ "loss": 0.4982,
45
+ "step": 15000
46
+ },
47
+ {
48
+ "epoch": 15.15,
49
+ "learning_rate": 9.243030303030304e-06,
50
+ "loss": 0.4878,
51
+ "step": 17500
52
+ },
53
+ {
54
+ "epoch": 17.32,
55
+ "learning_rate": 9.134891774891776e-06,
56
+ "loss": 0.5034,
57
+ "step": 20000
58
+ },
59
+ {
60
+ "epoch": 19.48,
61
+ "learning_rate": 9.026666666666666e-06,
62
+ "loss": 0.4937,
63
+ "step": 22500
64
+ },
65
+ {
66
+ "epoch": 21.65,
67
+ "learning_rate": 8.91852813852814e-06,
68
+ "loss": 0.5369,
69
+ "step": 25000
70
+ },
71
+ {
72
+ "epoch": 23.81,
73
+ "learning_rate": 8.810346320346321e-06,
74
+ "loss": 0.4645,
75
+ "step": 27500
76
+ },
77
+ {
78
+ "epoch": 25.97,
79
+ "learning_rate": 8.702121212121212e-06,
80
+ "loss": 0.4509,
81
+ "step": 30000
82
+ },
83
+ {
84
+ "epoch": 28.14,
85
+ "learning_rate": 8.593939393939395e-06,
86
+ "loss": 0.4499,
87
+ "step": 32500
88
+ },
89
+ {
90
+ "epoch": 30.3,
91
+ "learning_rate": 8.485757575757576e-06,
92
+ "loss": 0.4875,
93
+ "step": 35000
94
+ },
95
+ {
96
+ "epoch": 32.47,
97
+ "learning_rate": 8.377575757575759e-06,
98
+ "loss": 0.4544,
99
+ "step": 37500
100
+ },
101
+ {
102
+ "epoch": 34.63,
103
+ "learning_rate": 8.26939393939394e-06,
104
+ "loss": 0.4406,
105
+ "step": 40000
106
+ },
107
+ {
108
+ "epoch": 36.8,
109
+ "learning_rate": 8.161212121212123e-06,
110
+ "loss": 0.4369,
111
+ "step": 42500
112
+ },
113
+ {
114
+ "epoch": 38.96,
115
+ "learning_rate": 8.053030303030304e-06,
116
+ "loss": 0.4193,
117
+ "step": 45000
118
+ },
119
+ {
120
+ "epoch": 41.13,
121
+ "learning_rate": 7.944848484848485e-06,
122
+ "loss": 0.4057,
123
+ "step": 47500
124
+ },
125
+ {
126
+ "epoch": 43.29,
127
+ "learning_rate": 7.836666666666667e-06,
128
+ "loss": 0.3984,
129
+ "step": 50000
130
+ },
131
+ {
132
+ "epoch": 45.45,
133
+ "learning_rate": 7.72848484848485e-06,
134
+ "loss": 0.3911,
135
+ "step": 52500
136
+ },
137
+ {
138
+ "epoch": 47.62,
139
+ "learning_rate": 7.620303030303031e-06,
140
+ "loss": 0.3861,
141
+ "step": 55000
142
+ },
143
+ {
144
+ "epoch": 49.78,
145
+ "learning_rate": 7.512121212121213e-06,
146
+ "loss": 0.3833,
147
+ "step": 57500
148
+ },
149
+ {
150
+ "epoch": 51.95,
151
+ "learning_rate": 7.403939393939395e-06,
152
+ "loss": 0.3836,
153
+ "step": 60000
154
+ },
155
+ {
156
+ "epoch": 54.11,
157
+ "learning_rate": 7.295757575757576e-06,
158
+ "loss": 0.3706,
159
+ "step": 62500
160
+ },
161
+ {
162
+ "epoch": 56.28,
163
+ "learning_rate": 7.1875757575757576e-06,
164
+ "loss": 0.3625,
165
+ "step": 65000
166
+ },
167
+ {
168
+ "epoch": 58.44,
169
+ "learning_rate": 7.07939393939394e-06,
170
+ "loss": 0.3563,
171
+ "step": 67500
172
+ },
173
+ {
174
+ "epoch": 60.61,
175
+ "learning_rate": 6.971212121212122e-06,
176
+ "loss": 0.3522,
177
+ "step": 70000
178
+ },
179
+ {
180
+ "epoch": 62.77,
181
+ "learning_rate": 6.863030303030304e-06,
182
+ "loss": 0.347,
183
+ "step": 72500
184
+ },
185
+ {
186
+ "epoch": 64.94,
187
+ "learning_rate": 6.754891774891775e-06,
188
+ "loss": 0.3413,
189
+ "step": 75000
190
+ },
191
+ {
192
+ "epoch": 67.1,
193
+ "learning_rate": 6.646709956709957e-06,
194
+ "loss": 0.3355,
195
+ "step": 77500
196
+ }
197
+ ],
198
+ "logging_steps": 2500,
199
+ "max_steps": 231000,
200
+ "num_train_epochs": 200,
201
+ "save_steps": 1000,
202
+ "total_flos": 1.863115306612301e+20,
203
+ "trial_name": null,
204
+ "trial_params": null
205
+ }