File size: 5,210 Bytes
c13e1d3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
{
  "best_metric": 0.25090140104293823,
  "best_model_checkpoint": "/data/jcanete/all_results/mldoc/albeto_large/epochs_3_bs_16_lr_5e-6/checkpoint-1700",
  "epoch": 3.0,
  "global_step": 1776,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.17,
      "eval_accuracy": 0.28299999237060547,
      "eval_loss": 1.2837506532669067,
      "eval_runtime": 8.1555,
      "eval_samples_per_second": 122.617,
      "eval_steps_per_second": 7.725,
      "step": 100
    },
    {
      "epoch": 0.34,
      "eval_accuracy": 0.531000018119812,
      "eval_loss": 1.1732338666915894,
      "eval_runtime": 8.1951,
      "eval_samples_per_second": 122.024,
      "eval_steps_per_second": 7.688,
      "step": 200
    },
    {
      "epoch": 0.51,
      "eval_accuracy": 0.5540000200271606,
      "eval_loss": 0.9090207815170288,
      "eval_runtime": 8.1405,
      "eval_samples_per_second": 122.843,
      "eval_steps_per_second": 7.739,
      "step": 300
    },
    {
      "epoch": 0.68,
      "eval_accuracy": 0.5619999766349792,
      "eval_loss": 0.8500489592552185,
      "eval_runtime": 8.4022,
      "eval_samples_per_second": 119.017,
      "eval_steps_per_second": 7.498,
      "step": 400
    },
    {
      "epoch": 0.84,
      "learning_rate": 3.603603603603604e-06,
      "loss": 0.2667,
      "step": 500
    },
    {
      "epoch": 0.84,
      "eval_accuracy": 0.6660000085830688,
      "eval_loss": 0.7123118042945862,
      "eval_runtime": 8.0822,
      "eval_samples_per_second": 123.728,
      "eval_steps_per_second": 7.795,
      "step": 500
    },
    {
      "epoch": 1.01,
      "eval_accuracy": 0.7820000052452087,
      "eval_loss": 0.6517438888549805,
      "eval_runtime": 8.9908,
      "eval_samples_per_second": 111.225,
      "eval_steps_per_second": 7.007,
      "step": 600
    },
    {
      "epoch": 1.18,
      "eval_accuracy": 0.8230000138282776,
      "eval_loss": 0.5667398571968079,
      "eval_runtime": 8.2149,
      "eval_samples_per_second": 121.73,
      "eval_steps_per_second": 7.669,
      "step": 700
    },
    {
      "epoch": 1.35,
      "eval_accuracy": 0.8650000095367432,
      "eval_loss": 0.47660863399505615,
      "eval_runtime": 8.0853,
      "eval_samples_per_second": 123.682,
      "eval_steps_per_second": 7.792,
      "step": 800
    },
    {
      "epoch": 1.52,
      "eval_accuracy": 0.8799999952316284,
      "eval_loss": 0.38954979181289673,
      "eval_runtime": 8.0289,
      "eval_samples_per_second": 124.55,
      "eval_steps_per_second": 7.847,
      "step": 900
    },
    {
      "epoch": 1.69,
      "learning_rate": 2.195945945945946e-06,
      "loss": 0.1264,
      "step": 1000
    },
    {
      "epoch": 1.69,
      "eval_accuracy": 0.8420000076293945,
      "eval_loss": 0.5382915139198303,
      "eval_runtime": 8.0917,
      "eval_samples_per_second": 123.584,
      "eval_steps_per_second": 7.786,
      "step": 1000
    },
    {
      "epoch": 1.86,
      "eval_accuracy": 0.9100000262260437,
      "eval_loss": 0.36202001571655273,
      "eval_runtime": 7.986,
      "eval_samples_per_second": 125.219,
      "eval_steps_per_second": 7.889,
      "step": 1100
    },
    {
      "epoch": 2.03,
      "eval_accuracy": 0.9240000247955322,
      "eval_loss": 0.3379409909248352,
      "eval_runtime": 8.6218,
      "eval_samples_per_second": 115.985,
      "eval_steps_per_second": 7.307,
      "step": 1200
    },
    {
      "epoch": 2.2,
      "eval_accuracy": 0.9279999732971191,
      "eval_loss": 0.29248443245887756,
      "eval_runtime": 8.2615,
      "eval_samples_per_second": 121.044,
      "eval_steps_per_second": 7.626,
      "step": 1300
    },
    {
      "epoch": 2.36,
      "eval_accuracy": 0.9240000247955322,
      "eval_loss": 0.28705379366874695,
      "eval_runtime": 8.1584,
      "eval_samples_per_second": 122.573,
      "eval_steps_per_second": 7.722,
      "step": 1400
    },
    {
      "epoch": 2.53,
      "learning_rate": 7.882882882882883e-07,
      "loss": 0.0676,
      "step": 1500
    },
    {
      "epoch": 2.53,
      "eval_accuracy": 0.9279999732971191,
      "eval_loss": 0.2619408965110779,
      "eval_runtime": 7.9492,
      "eval_samples_per_second": 125.798,
      "eval_steps_per_second": 7.925,
      "step": 1500
    },
    {
      "epoch": 2.7,
      "eval_accuracy": 0.9279999732971191,
      "eval_loss": 0.26721176505088806,
      "eval_runtime": 7.9627,
      "eval_samples_per_second": 125.585,
      "eval_steps_per_second": 7.912,
      "step": 1600
    },
    {
      "epoch": 2.87,
      "eval_accuracy": 0.9319999814033508,
      "eval_loss": 0.25090140104293823,
      "eval_runtime": 8.0714,
      "eval_samples_per_second": 123.894,
      "eval_steps_per_second": 7.805,
      "step": 1700
    },
    {
      "epoch": 3.0,
      "step": 1776,
      "total_flos": 982359617226624.0,
      "train_loss": 0.13927180917413384,
      "train_runtime": 1029.288,
      "train_samples_per_second": 27.567,
      "train_steps_per_second": 1.725
    }
  ],
  "max_steps": 1776,
  "num_train_epochs": 3,
  "total_flos": 982359617226624.0,
  "trial_name": null,
  "trial_params": null
}