deep-learning-analytics commited on
Commit
86abfd9
1 Parent(s): 8fdf81f

Upload trainer_state.json

Browse files
Files changed (1) hide show
  1. trainer_state.json +206 -0
trainer_state.json ADDED
@@ -0,0 +1,206 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.561149001121521,
3
+ "best_model_checkpoint": "/content/drive/MyDrive/c4_200m/weights/checkpoint-5000",
4
+ "epoch": 0.969681298080031,
5
+ "global_step": 5000,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.1,
12
+ "learning_rate": 1.8072148952676496e-05,
13
+ "loss": 0.763,
14
+ "step": 500
15
+ },
16
+ {
17
+ "epoch": 0.1,
18
+ "eval_gen_len": 17.3073,
19
+ "eval_loss": 0.6263013482093811,
20
+ "eval_rouge1": 71.3829,
21
+ "eval_rouge2": 60.9575,
22
+ "eval_rougeL": 70.617,
23
+ "eval_rougeLsum": 70.6508,
24
+ "eval_runtime": 4434.8322,
25
+ "eval_samples_per_second": 12.402,
26
+ "eval_steps_per_second": 0.775,
27
+ "step": 500
28
+ },
29
+ {
30
+ "epoch": 0.19,
31
+ "learning_rate": 1.6132660977501943e-05,
32
+ "loss": 0.6682,
33
+ "step": 1000
34
+ },
35
+ {
36
+ "epoch": 0.19,
37
+ "eval_gen_len": 17.2827,
38
+ "eval_loss": 0.5980147123336792,
39
+ "eval_rouge1": 71.6839,
40
+ "eval_rouge2": 61.5115,
41
+ "eval_rougeL": 70.9369,
42
+ "eval_rougeLsum": 70.9762,
43
+ "eval_runtime": 4421.7138,
44
+ "eval_samples_per_second": 12.439,
45
+ "eval_steps_per_second": 0.778,
46
+ "step": 1000
47
+ },
48
+ {
49
+ "epoch": 0.29,
50
+ "learning_rate": 1.4193173002327387e-05,
51
+ "loss": 0.6547,
52
+ "step": 1500
53
+ },
54
+ {
55
+ "epoch": 0.29,
56
+ "eval_gen_len": 17.2674,
57
+ "eval_loss": 0.5852676630020142,
58
+ "eval_rouge1": 71.8716,
59
+ "eval_rouge2": 61.8264,
60
+ "eval_rougeL": 71.1345,
61
+ "eval_rougeLsum": 71.1741,
62
+ "eval_runtime": 4432.6842,
63
+ "eval_samples_per_second": 12.408,
64
+ "eval_steps_per_second": 0.776,
65
+ "step": 1500
66
+ },
67
+ {
68
+ "epoch": 0.39,
69
+ "learning_rate": 1.2257564003103182e-05,
70
+ "loss": 0.6423,
71
+ "step": 2000
72
+ },
73
+ {
74
+ "epoch": 0.39,
75
+ "eval_gen_len": 17.2636,
76
+ "eval_loss": 0.5758475065231323,
77
+ "eval_rouge1": 71.9319,
78
+ "eval_rouge2": 61.9431,
79
+ "eval_rougeL": 71.1965,
80
+ "eval_rougeLsum": 71.2364,
81
+ "eval_runtime": 4426.8507,
82
+ "eval_samples_per_second": 12.424,
83
+ "eval_steps_per_second": 0.777,
84
+ "step": 2000
85
+ },
86
+ {
87
+ "epoch": 0.48,
88
+ "learning_rate": 1.0318076027928628e-05,
89
+ "loss": 0.6319,
90
+ "step": 2500
91
+ },
92
+ {
93
+ "epoch": 0.48,
94
+ "eval_gen_len": 17.2555,
95
+ "eval_loss": 0.5706557631492615,
96
+ "eval_rouge1": 72.004,
97
+ "eval_rouge2": 62.0721,
98
+ "eval_rougeL": 71.2739,
99
+ "eval_rougeLsum": 71.3176,
100
+ "eval_runtime": 4455.9486,
101
+ "eval_samples_per_second": 12.343,
102
+ "eval_steps_per_second": 0.772,
103
+ "step": 2500
104
+ },
105
+ {
106
+ "epoch": 0.58,
107
+ "learning_rate": 8.378588052754074e-06,
108
+ "loss": 0.6239,
109
+ "step": 3000
110
+ },
111
+ {
112
+ "epoch": 0.58,
113
+ "eval_gen_len": 17.251,
114
+ "eval_loss": 0.5678849220275879,
115
+ "eval_rouge1": 72.0655,
116
+ "eval_rouge2": 62.1749,
117
+ "eval_rougeL": 71.3432,
118
+ "eval_rougeLsum": 71.3854,
119
+ "eval_runtime": 4483.3834,
120
+ "eval_samples_per_second": 12.268,
121
+ "eval_steps_per_second": 0.767,
122
+ "step": 3000
123
+ },
124
+ {
125
+ "epoch": 0.68,
126
+ "learning_rate": 6.43910007757952e-06,
127
+ "loss": 0.619,
128
+ "step": 3500
129
+ },
130
+ {
131
+ "epoch": 0.68,
132
+ "eval_gen_len": 17.25,
133
+ "eval_loss": 0.5634791851043701,
134
+ "eval_rouge1": 72.0893,
135
+ "eval_rouge2": 62.2244,
136
+ "eval_rougeL": 71.3669,
137
+ "eval_rougeLsum": 71.4087,
138
+ "eval_runtime": 4466.0069,
139
+ "eval_samples_per_second": 12.315,
140
+ "eval_steps_per_second": 0.77,
141
+ "step": 3500
142
+ },
143
+ {
144
+ "epoch": 0.78,
145
+ "learning_rate": 4.499612102404966e-06,
146
+ "loss": 0.6248,
147
+ "step": 4000
148
+ },
149
+ {
150
+ "epoch": 0.78,
151
+ "eval_gen_len": 17.246,
152
+ "eval_loss": 0.5618749260902405,
153
+ "eval_rouge1": 72.1096,
154
+ "eval_rouge2": 62.261,
155
+ "eval_rougeL": 71.3877,
156
+ "eval_rougeLsum": 71.4304,
157
+ "eval_runtime": 4436.9366,
158
+ "eval_samples_per_second": 12.396,
159
+ "eval_steps_per_second": 0.775,
160
+ "step": 4000
161
+ },
162
+ {
163
+ "epoch": 0.87,
164
+ "learning_rate": 2.560124127230411e-06,
165
+ "loss": 0.6159,
166
+ "step": 4500
167
+ },
168
+ {
169
+ "epoch": 0.87,
170
+ "eval_gen_len": 17.2457,
171
+ "eval_loss": 0.5612673163414001,
172
+ "eval_rouge1": 72.1232,
173
+ "eval_rouge2": 62.2834,
174
+ "eval_rougeL": 71.4006,
175
+ "eval_rougeLsum": 71.4433,
176
+ "eval_runtime": 4436.0596,
177
+ "eval_samples_per_second": 12.398,
178
+ "eval_steps_per_second": 0.775,
179
+ "step": 4500
180
+ },
181
+ {
182
+ "epoch": 0.97,
183
+ "learning_rate": 6.206361520558573e-07,
184
+ "loss": 0.6118,
185
+ "step": 5000
186
+ },
187
+ {
188
+ "epoch": 0.97,
189
+ "eval_gen_len": 17.2461,
190
+ "eval_loss": 0.561149001121521,
191
+ "eval_rouge1": 72.1272,
192
+ "eval_rouge2": 62.2947,
193
+ "eval_rougeL": 71.4073,
194
+ "eval_rougeLsum": 71.4508,
195
+ "eval_runtime": 4447.3157,
196
+ "eval_samples_per_second": 12.367,
197
+ "eval_steps_per_second": 0.773,
198
+ "step": 5000
199
+ }
200
+ ],
201
+ "max_steps": 5156,
202
+ "num_train_epochs": 1,
203
+ "total_flos": 7.061289205579776e+16,
204
+ "trial_name": null,
205
+ "trial_params": null
206
+ }