dat commited on
Commit
f75b982
1 Parent(s): 0609cb0

Saving weights and logs of step 10000

Browse files
Files changed (34) hide show
  1. checkpoint_10000 +3 -0
  2. events.out.tfevents.1626359368.t1v-n-f5c06ea1-w-0.699645.3.v2 +3 -0
  3. events.out.tfevents.1626362091.t1v-n-f5c06ea1-w-0.704455.3.v2 +3 -0
  4. events.out.tfevents.1626362977.t1v-n-f5c06ea1-w-0.707091.3.v2 +3 -0
  5. flax_model.msgpack +1 -1
  6. run.sh +2 -2
  7. run_mlm_flax_no_accum.py +1 -1
  8. wandb/debug-internal.log +1 -1
  9. wandb/debug.log +1 -1
  10. wandb/latest-run +1 -1
  11. wandb/run-20210715_142929-jw5go9rv/files/config.yaml +301 -0
  12. wandb/run-20210715_142929-jw5go9rv/files/output.log +567 -0
  13. wandb/run-20210715_142929-jw5go9rv/files/requirements.txt +94 -0
  14. wandb/run-20210715_142929-jw5go9rv/files/wandb-metadata.json +45 -0
  15. wandb/run-20210715_142929-jw5go9rv/files/wandb-summary.json +1 -0
  16. wandb/run-20210715_142929-jw5go9rv/logs/debug-internal.log +0 -0
  17. wandb/run-20210715_142929-jw5go9rv/logs/debug.log +27 -0
  18. wandb/run-20210715_142929-jw5go9rv/run-jw5go9rv.wandb +0 -0
  19. wandb/run-20210715_151452-1w0sb5ma/files/config.yaml +301 -0
  20. wandb/run-20210715_151452-1w0sb5ma/files/output.log +84 -0
  21. wandb/run-20210715_151452-1w0sb5ma/files/requirements.txt +94 -0
  22. wandb/run-20210715_151452-1w0sb5ma/files/wandb-metadata.json +45 -0
  23. wandb/run-20210715_151452-1w0sb5ma/files/wandb-summary.json +1 -0
  24. wandb/run-20210715_151452-1w0sb5ma/logs/debug-internal.log +240 -0
  25. wandb/run-20210715_151452-1w0sb5ma/logs/debug.log +27 -0
  26. wandb/run-20210715_151452-1w0sb5ma/run-1w0sb5ma.wandb +0 -0
  27. wandb/run-20210715_152938-8qznp93p/files/config.yaml +301 -0
  28. wandb/run-20210715_152938-8qznp93p/files/output.log +1222 -0
  29. wandb/run-20210715_152938-8qznp93p/files/requirements.txt +94 -0
  30. wandb/run-20210715_152938-8qznp93p/files/wandb-metadata.json +45 -0
  31. wandb/run-20210715_152938-8qznp93p/files/wandb-summary.json +1 -0
  32. wandb/run-20210715_152938-8qznp93p/logs/debug-internal.log +0 -0
  33. wandb/run-20210715_152938-8qznp93p/logs/debug.log +25 -0
  34. wandb/run-20210715_152938-8qznp93p/run-8qznp93p.wandb +0 -0
checkpoint_10000 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:597fb40af449f7405460e2f66c0d15e1d26c79be2651d9c8f79d077600896c0d
3
+ size 1530270447
events.out.tfevents.1626359368.t1v-n-f5c06ea1-w-0.699645.3.v2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e6476ad76c6275977cb46d0b071aa6f147b29d0fba378c2c96099461a3299f96
3
+ size 667915
events.out.tfevents.1626362091.t1v-n-f5c06ea1-w-0.704455.3.v2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b239b684baed635186da9955197cd5480350d978a96818486fdc0f08816a3424
3
+ size 40
events.out.tfevents.1626362977.t1v-n-f5c06ea1-w-0.707091.3.v2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:29705d778db21abb9e80a3472189629d86cd8247cacda98d58b20a30bd684e63
3
+ size 1484145
flax_model.msgpack CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e9dd04b40d879736851e06d01940a5427c1086eabab0e17bdc29ee5016e1f264
3
  size 510090043
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b89d2017401b78b9121ca751668fd0246d3593be3d565ee5e7b06d3829e4ec6a
3
  size 510090043
run.sh CHANGED
@@ -15,7 +15,7 @@ python ./run_mlm_flax_no_accum.py \
15
  --adam_beta1="0.9" \
16
  --adam_beta2="0.98" \
17
  --logging_steps="50" \
18
- --eval_steps="2000" \
19
  --num_train_epochs="2"\
20
  --preprocessing_num_workers="96" \
21
  --save_steps="10000" \
@@ -23,7 +23,7 @@ python ./run_mlm_flax_no_accum.py \
23
  --per_device_train_batch_size="1" \
24
  --per_device_eval_batch_size="1" \
25
  --save_total_limit="5"\
26
- --max_eval_samples="5000"\
27
  --resume_from_checkpoint="./"\
28
  #--gradient_accumulation_steps="4"\
29
  #--adafactor \
 
15
  --adam_beta1="0.9" \
16
  --adam_beta2="0.98" \
17
  --logging_steps="50" \
18
+ --eval_steps="3000" \
19
  --num_train_epochs="2"\
20
  --preprocessing_num_workers="96" \
21
  --save_steps="10000" \
 
23
  --per_device_train_batch_size="1" \
24
  --per_device_eval_batch_size="1" \
25
  --save_total_limit="5"\
26
+ --max_eval_samples="4000"\
27
  --resume_from_checkpoint="./"\
28
  #--gradient_accumulation_steps="4"\
29
  #--adafactor \
run_mlm_flax_no_accum.py CHANGED
@@ -699,7 +699,7 @@ if __name__ == "__main__":
699
  model_inputs = shard(model_inputs.data)
700
  state, train_metric, dropout_rngs = p_train_step(state, model_inputs, dropout_rngs)
701
  train_metrics.append(train_metric)
702
-
703
  cur_step = epoch * (num_train_samples // train_batch_size) + step
704
  #if cur_step < resume_step:
705
  # continue
 
699
  model_inputs = shard(model_inputs.data)
700
  state, train_metric, dropout_rngs = p_train_step(state, model_inputs, dropout_rngs)
701
  train_metrics.append(train_metric)
702
+
703
  cur_step = epoch * (num_train_samples // train_batch_size) + step
704
  #if cur_step < resume_step:
705
  # continue
wandb/debug-internal.log CHANGED
@@ -1 +1 @@
1
- run-20210715_142215-1vry5yso/logs/debug-internal.log
 
1
+ run-20210715_152938-8qznp93p/logs/debug-internal.log
wandb/debug.log CHANGED
@@ -1 +1 @@
1
- run-20210715_142215-1vry5yso/logs/debug.log
 
1
+ run-20210715_152938-8qznp93p/logs/debug.log
wandb/latest-run CHANGED
@@ -1 +1 @@
1
- run-20210715_142215-1vry5yso
 
1
+ run-20210715_152938-8qznp93p
wandb/run-20210715_142929-jw5go9rv/files/config.yaml ADDED
@@ -0,0 +1,301 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ wandb_version: 1
2
+
3
+ _wandb:
4
+ desc: null
5
+ value:
6
+ cli_version: 0.10.33
7
+ framework: huggingface
8
+ huggingface_version: 4.9.0.dev0
9
+ is_jupyter_run: false
10
+ is_kaggle_kernel: false
11
+ python_version: 3.8.10
12
+ t:
13
+ 1:
14
+ - 3
15
+ - 11
16
+ 4: 3.8.10
17
+ 5: 0.10.33
18
+ 6: 4.9.0.dev0
19
+ 8:
20
+ - 5
21
+ adafactor:
22
+ desc: null
23
+ value: false
24
+ adam_beta1:
25
+ desc: null
26
+ value: 0.9
27
+ adam_beta2:
28
+ desc: null
29
+ value: 0.98
30
+ adam_epsilon:
31
+ desc: null
32
+ value: 1.0e-08
33
+ cache_dir:
34
+ desc: null
35
+ value: null
36
+ config_name:
37
+ desc: null
38
+ value: ./
39
+ dataloader_drop_last:
40
+ desc: null
41
+ value: false
42
+ dataloader_num_workers:
43
+ desc: null
44
+ value: 0
45
+ dataloader_pin_memory:
46
+ desc: null
47
+ value: true
48
+ dataset_config_name:
49
+ desc: null
50
+ value: null
51
+ dataset_name:
52
+ desc: null
53
+ value: null
54
+ ddp_find_unused_parameters:
55
+ desc: null
56
+ value: null
57
+ debug:
58
+ desc: null
59
+ value: []
60
+ deepspeed:
61
+ desc: null
62
+ value: null
63
+ disable_tqdm:
64
+ desc: null
65
+ value: false
66
+ do_eval:
67
+ desc: null
68
+ value: false
69
+ do_predict:
70
+ desc: null
71
+ value: false
72
+ do_train:
73
+ desc: null
74
+ value: false
75
+ dtype:
76
+ desc: null
77
+ value: float32
78
+ eval_accumulation_steps:
79
+ desc: null
80
+ value: null
81
+ eval_steps:
82
+ desc: null
83
+ value: 2000
84
+ evaluation_strategy:
85
+ desc: null
86
+ value: IntervalStrategy.NO
87
+ fp16:
88
+ desc: null
89
+ value: false
90
+ fp16_backend:
91
+ desc: null
92
+ value: auto
93
+ fp16_full_eval:
94
+ desc: null
95
+ value: false
96
+ fp16_opt_level:
97
+ desc: null
98
+ value: O1
99
+ gradient_accumulation_steps:
100
+ desc: null
101
+ value: 1
102
+ greater_is_better:
103
+ desc: null
104
+ value: null
105
+ group_by_length:
106
+ desc: null
107
+ value: false
108
+ ignore_data_skip:
109
+ desc: null
110
+ value: false
111
+ label_names:
112
+ desc: null
113
+ value: null
114
+ label_smoothing_factor:
115
+ desc: null
116
+ value: 0.0
117
+ learning_rate:
118
+ desc: null
119
+ value: 3.0e-05
120
+ length_column_name:
121
+ desc: null
122
+ value: length
123
+ line_by_line:
124
+ desc: null
125
+ value: false
126
+ load_best_model_at_end:
127
+ desc: null
128
+ value: false
129
+ local_rank:
130
+ desc: null
131
+ value: -1
132
+ log_level:
133
+ desc: null
134
+ value: -1
135
+ log_level_replica:
136
+ desc: null
137
+ value: -1
138
+ log_on_each_node:
139
+ desc: null
140
+ value: true
141
+ logging_dir:
142
+ desc: null
143
+ value: ./runs/Jul15_14-29-21_t1v-n-f5c06ea1-w-0
144
+ logging_first_step:
145
+ desc: null
146
+ value: false
147
+ logging_steps:
148
+ desc: null
149
+ value: 50
150
+ logging_strategy:
151
+ desc: null
152
+ value: IntervalStrategy.STEPS
153
+ lr_scheduler_type:
154
+ desc: null
155
+ value: SchedulerType.LINEAR
156
+ max_eval_samples:
157
+ desc: null
158
+ value: 5000
159
+ max_grad_norm:
160
+ desc: null
161
+ value: 1.0
162
+ max_seq_length:
163
+ desc: null
164
+ value: 4096
165
+ max_steps:
166
+ desc: null
167
+ value: -1
168
+ metric_for_best_model:
169
+ desc: null
170
+ value: null
171
+ mlm_probability:
172
+ desc: null
173
+ value: 0.15
174
+ model_name_or_path:
175
+ desc: null
176
+ value: null
177
+ model_type:
178
+ desc: null
179
+ value: big_bird
180
+ mp_parameters:
181
+ desc: null
182
+ value: ''
183
+ no_cuda:
184
+ desc: null
185
+ value: false
186
+ num_train_epochs:
187
+ desc: null
188
+ value: 2.0
189
+ output_dir:
190
+ desc: null
191
+ value: ./
192
+ overwrite_cache:
193
+ desc: null
194
+ value: false
195
+ overwrite_output_dir:
196
+ desc: null
197
+ value: true
198
+ pad_to_max_length:
199
+ desc: null
200
+ value: false
201
+ past_index:
202
+ desc: null
203
+ value: -1
204
+ per_device_eval_batch_size:
205
+ desc: null
206
+ value: 1
207
+ per_device_train_batch_size:
208
+ desc: null
209
+ value: 1
210
+ per_gpu_eval_batch_size:
211
+ desc: null
212
+ value: null
213
+ per_gpu_train_batch_size:
214
+ desc: null
215
+ value: null
216
+ prediction_loss_only:
217
+ desc: null
218
+ value: false
219
+ preprocessing_num_workers:
220
+ desc: null
221
+ value: 96
222
+ push_to_hub:
223
+ desc: null
224
+ value: true
225
+ push_to_hub_model_id:
226
+ desc: null
227
+ value: ''
228
+ push_to_hub_organization:
229
+ desc: null
230
+ value: null
231
+ push_to_hub_token:
232
+ desc: null
233
+ value: null
234
+ remove_unused_columns:
235
+ desc: null
236
+ value: true
237
+ report_to:
238
+ desc: null
239
+ value:
240
+ - tensorboard
241
+ - wandb
242
+ resume_from_checkpoint:
243
+ desc: null
244
+ value: ./
245
+ run_name:
246
+ desc: null
247
+ value: ./
248
+ save_on_each_node:
249
+ desc: null
250
+ value: false
251
+ save_steps:
252
+ desc: null
253
+ value: 10000
254
+ save_strategy:
255
+ desc: null
256
+ value: IntervalStrategy.STEPS
257
+ save_total_limit:
258
+ desc: null
259
+ value: 5
260
+ seed:
261
+ desc: null
262
+ value: 42
263
+ sharded_ddp:
264
+ desc: null
265
+ value: []
266
+ skip_memory_metrics:
267
+ desc: null
268
+ value: true
269
+ tokenizer_name:
270
+ desc: null
271
+ value: ./
272
+ tpu_metrics_debug:
273
+ desc: null
274
+ value: false
275
+ tpu_num_cores:
276
+ desc: null
277
+ value: null
278
+ train_ref_file:
279
+ desc: null
280
+ value: null
281
+ use_fast_tokenizer:
282
+ desc: null
283
+ value: true
284
+ use_legacy_prediction_loop:
285
+ desc: null
286
+ value: false
287
+ validation_ref_file:
288
+ desc: null
289
+ value: null
290
+ validation_split_percentage:
291
+ desc: null
292
+ value: 5
293
+ warmup_ratio:
294
+ desc: null
295
+ value: 0.0
296
+ warmup_steps:
297
+ desc: null
298
+ value: 10000
299
+ weight_decay:
300
+ desc: null
301
+ value: 0.0095
wandb/run-20210715_142929-jw5go9rv/files/output.log ADDED
@@ -0,0 +1,567 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [14:29:44] - INFO - absl - Restoring checkpoint from ./checkpoint_90000
2
+ tcmalloc: large alloc 1530273792 bytes == 0x9b2c6000 @ 0x7f647d05f680 0x7f647d080824 0x5b9a14 0x50b2ae 0x50cb1b 0x5a6f17 0x5f3010 0x56fd36 0x568d9a 0x5f5b33 0x56aadf 0x568d9a 0x68cdc7 0x67e161 0x67e1df 0x67e281 0x67e627 0x6b6e62 0x6b71ed 0x7f647ce740b3 0x5f96de
3
+ /home/dat/pino/lib/python3.8/site-packages/jax/lib/xla_bridge.py:386: UserWarning: jax.host_count has been renamed to jax.process_count. This alias will eventually be removed; please update your code.
4
+ warnings.warn(
5
+ /home/dat/pino/lib/python3.8/site-packages/jax/lib/xla_bridge.py:373: UserWarning: jax.host_id has been renamed to jax.process_index. This alias will eventually be removed; please update your code.
6
+ warnings.warn(
7
+ Epoch ... (1/2): 0%| | 0/2 [00:00<?, ?it/s]
8
+
9
+
10
+
11
+
12
+
13
+ Training...: 26%|████████████████████████████████████▊ | 90051/352766 [04:33<27:53:42, 2.62it/s]
14
+
15
+
16
+
17
+
18
+ Training...: 26%|████████████████████████████████████▊ | 90101/352766 [04:53<27:06:50, 2.69it/s]
19
+
20
+
21
+
22
+
23
+ Training...: 26%|████████████████████████████████████▊ | 90151/352766 [05:13<26:28:09, 2.76it/s]
24
+
25
+
26
+
27
+
28
+ Training...: 26%|████████████████████████████████████▊ | 90201/352766 [05:33<27:17:39, 2.67it/s]
29
+
30
+
31
+
32
+
33
+ Training...: 26%|████████████████████████████████████▊ | 90251/352766 [05:53<27:03:18, 2.70it/s]
34
+
35
+
36
+
37
+
38
+ Training...: 26%|████████████████████████████████████▊ | 90301/352766 [06:13<27:18:44, 2.67it/s]
39
+
40
+
41
+
42
+
43
+ Training...: 26%|████████████████████████████████████▉ | 90351/352766 [06:33<27:03:43, 2.69it/s]
44
+
45
+
46
+
47
+
48
+ Training...: 26%|████████████████████████████████████▉ | 90401/352766 [06:53<27:17:44, 2.67it/s]
49
+
50
+
51
+
52
+
53
+ Training...: 26%|████████████████████████████████████▉ | 90451/352766 [07:14<27:00:34, 2.70it/s]
54
+
55
+
56
+
57
+
58
+ Training...: 26%|████████████████████████████████████▉ | 90501/352766 [07:34<27:15:23, 2.67it/s]
59
+
60
+
61
+
62
+
63
+ Training...: 26%|████████████████████████████████████▉ | 90551/352766 [07:54<27:07:13, 2.69it/s]
64
+
65
+
66
+
67
+
68
+ Training...: 26%|████████████████████████████████████▉ | 90601/352766 [08:14<27:13:48, 2.67it/s]
69
+
70
+
71
+
72
+
73
+ Training...: 26%|█████████████████████████████████████ | 90651/352766 [08:34<27:17:20, 2.67it/s]
74
+
75
+
76
+
77
+
78
+ Training...: 26%|█████████████████████████████████████ | 90701/352766 [08:54<27:16:04, 2.67it/s]
79
+
80
+
81
+
82
+
83
+ Training...: 26%|████████████████████���████████████████ | 90751/352766 [09:14<27:12:21, 2.68it/s]
84
+
85
+
86
+
87
+
88
+ Training...: 26%|█████████████████████████████████████ | 90801/352766 [09:34<27:12:10, 2.67it/s]
89
+
90
+
91
+
92
+
93
+ Training...: 26%|█████████████████████████████████████ | 90851/352766 [09:54<27:14:51, 2.67it/s]
94
+
95
+
96
+
97
+
98
+ Training...: 26%|█████████████████████████████████████ | 90901/352766 [10:14<26:59:51, 2.69it/s]
99
+
100
+
101
+
102
+
103
+ Training...: 26%|█████████████████████████████████████▏ | 90951/352766 [10:34<27:13:26, 2.67it/s]
104
+
105
+
106
+
107
+
108
+ Training...: 26%|█████████████████████████████████████▏ | 91001/352766 [10:54<27:01:45, 2.69it/s]
109
+
110
+
111
+
112
+
113
+ Training...: 26%|█████████████████████████████████████▏ | 91051/352766 [11:14<27:12:36, 2.67it/s]
114
+
115
+
116
+
117
+
118
+ Training...: 26%|█████████████████████████████████████▏ | 91101/352766 [11:34<27:01:38, 2.69it/s]
119
+
120
+
121
+
122
+
123
+ Training...: 26%|█████████████████████████████████████▏ | 91151/352766 [11:54<27:12:29, 2.67it/s]
124
+
125
+
126
+
127
+
128
+ Training...: 26%|█████████████████████████████████████▏ | 91201/352766 [12:14<27:05:33, 2.68it/s]
129
+
130
+
131
+
132
+
133
+ Training...: 26%|█████████████████████████████████████▏ | 91251/352766 [12:34<27:10:50, 2.67it/s]
134
+
135
+
136
+
137
+
138
+ Training...: 26%|█████████████████████████████████████▎ | 91301/352766 [12:54<27:11:15, 2.67it/s]
139
+
140
+
141
+
142
+
143
+ Training...: 26%|█████████████████████████████████████▎ | 91351/352766 [13:14<27:10:00, 2.67it/s]
144
+
145
+
146
+
147
+
148
+ Training...: 26%|█████████████████████████████████████▎ | 91401/352766 [13:34<27:10:10, 2.67it/s]
149
+
150
+
151
+
152
+
153
+ Training...: 26%|█████████████████████████████████████▎ | 91451/352766 [13:54<27:11:43, 2.67it/s]
154
+
155
+
156
+
157
+
158
+ Training...: 26%|█████████████████████████████████████▎ | 91501/352766 [14:14<27:10:40, 2.67it/s]
159
+
160
+
161
+
162
+
163
+ Training...: 26%|█████████████████████████████████████▎ | 91551/352766 [14:34<26:54:10, 2.70it/s]
164
+
165
+
166
+
167
+
168
+ Training...: 26%|█████████████████████████████████████▍ | 91601/352766 [14:54<27:07:35, 2.67it/s]
169
+
170
+
171
+
172
+
173
+ Training...: 26%|█████████████████████████████████████▍ | 91651/352766 [15:14<26:56:48, 2.69it/s]
174
+
175
+
176
+
177
+
178
+ Training...: 26%|█████████████████████████████████████▍ | 91701/352766 [15:34<27:08:13, 2.67it/s]
179
+
180
+
181
+
182
+
183
+ Training...: 26%|█████████████████████████████████████▍ | 91751/352766 [15:55<26:55:50, 2.69it/s]
184
+
185
+
186
+
187
+
188
+ Training...: 26%|█████████████████████████████████████▍ | 91801/352766 [16:15<27:08:24, 2.67it/s]
189
+
190
+
191
+
192
+
193
+ Training...: 26%|█████████████████████████████████████▍ | 91851/352766 [16:35<26:57:13, 2.69it/s]
194
+
195
+
196
+
197
+
198
+ Training...: 26%|█████████████████████████████████████▌ | 91901/352766 [16:55<27:06:54, 2.67it/s]
199
+
200
+
201
+
202
+
203
+ Training...: 26%|█████████████████████████████████████▌ | 91951/352766 [17:15<27:07:52, 2.67it/s]
204
+
205
+
206
+
207
+
208
+ Training...: 26%|█████████████████████████████████████▌ | 92001/352766 [17:35<27:05:59, 2.67it/s]
209
+ Training...: 26%|█████████████████████████████████████▌ | 92001/352766 [17:48<27:05:59, 2.67it/s]
210
+
211
+
212
+
213
+
214
+
215
+
216
+
217
+
218
+
219
+
220
+
221
+
222
+
223
+
224
+
225
+
226
+
227
+
228
+
229
+
230
+
231
+
232
+
233
+
234
+
235
+
236
+
237
+
238
+
239
+
240
+
241
+
242
+
243
+
244
+
245
+
246
+
247
+
248
+
249
+
250
+
251
+
252
+ Training...: 26%|█████████████████████████████████████▌ | 92051/352766 [19:33<27:07:03, 2.67it/s]
253
+
254
+
255
+
256
+
257
+ Training...: 26%|█████████████████████████████████████▌ | 92101/352766 [19:53<27:07:18, 2.67it/s]
258
+
259
+
260
+
261
+
262
+ Training...: 26%|█████████████████████████████████████▌ | 92151/352766 [20:14<27:06:37, 2.67it/s]
263
+
264
+
265
+
266
+
267
+ Training...: 26%|█████████████████████████████████████▋ | 92201/352766 [20:34<26:51:07, 2.70it/s]
268
+
269
+
270
+
271
+
272
+ Training...: 26%|█████████████████████████████████████▋ | 92251/352766 [20:54<27:05:26, 2.67it/s]
273
+
274
+
275
+
276
+
277
+ Training...: 26%|█████████████████████████████████████▋ | 92301/352766 [21:14<26:47:55, 2.70it/s]
278
+
279
+
280
+
281
+
282
+ Training...: 26%|█████████████████████████████████████▋ | 92351/352766 [21:34<27:03:55, 2.67it/s]
283
+
284
+
285
+
286
+
287
+ Training...: 26%|█████████████████████████████████████▋ | 92401/352766 [21:54<26:56:06, 2.69it/s]
288
+
289
+
290
+
291
+
292
+ Training...: 26%|██████████████████████████████���██████▋ | 92451/352766 [22:14<27:03:33, 2.67it/s]
293
+
294
+
295
+
296
+
297
+ Training...: 26%|█████████████████████████████████████▊ | 92501/352766 [22:34<26:36:37, 2.72it/s]
298
+
299
+
300
+
301
+
302
+
303
+ Training...: 26%|█████████████████████████████████████▌ | 92552/352766 [23:07<289:51:50, 4.01s/it]
304
+
305
+
306
+
307
+
308
+
309
+ Training...: 26%|█████████████████████████████████████▌ | 92602/352766 [23:27<289:47:16, 4.01s/it]
310
+
311
+
312
+
313
+
314
+
315
+ Training...: 26%|█████████████████████████████████████▌ | 92654/352766 [23:47<162:07:54, 2.24s/it]
316
+
317
+
318
+
319
+
320
+
321
+ Training...: 26%|█████████████████████████████████████▌ | 92704/352766 [24:07<162:28:16, 2.25s/it]
322
+
323
+
324
+
325
+
326
+
327
+ Training...: 26%|█████████████████████████████████████▌ | 92756/352766 [24:27<100:39:01, 1.39s/it]
328
+
329
+
330
+
331
+
332
+
333
+ Training...: 26%|█████████████████████████████████████▌ | 92806/352766 [24:47<100:39:52, 1.39s/it]
334
+
335
+
336
+
337
+
338
+
339
+ Training...: 26%|█████████████████████████████████████▋ | 92856/352766 [25:08<100:35:33, 1.39s/it]
340
+
341
+
342
+
343
+
344
+
345
+ Training...: 26%|█████████████████████████████████████▉ | 92908/352766 [25:28<66:16:29, 1.09it/s]
346
+
347
+
348
+
349
+
350
+
351
+ Training...: 26%|█████████████████████████████████████▋ | 92952/352766 [25:47<289:19:30, 4.01s/it]
352
+
353
+
354
+
355
+
356
+
357
+ Training...: 26%|█████████████████████████████████████▋ | 93002/352766 [26:07<289:25:26, 4.01s/it]
358
+
359
+
360
+
361
+
362
+
363
+ Training...: 26%|█████████████████████████████████████▋ | 93054/352766 [26:28<162:04:31, 2.25s/it]
364
+
365
+
366
+
367
+
368
+
369
+ Training...: 26%|█████████████████████████████████████▋ | 93106/352766 [26:48<100:24:31, 1.39s/it]
370
+
371
+
372
+
373
+
374
+
375
+ Training...: 26%|█████████████████████████████████████▊ | 93156/352766 [27:08<100:29:39, 1.39s/it]
376
+
377
+
378
+
379
+
380
+
381
+ Training...: 26%|██████████████████████████████████████ | 93208/352766 [27:28<66:04:11, 1.09it/s]
382
+
383
+
384
+
385
+
386
+
387
+ Training...: 26%|█████████████████████████████████████▊ | 93252/352766 [27:48<289:06:44, 4.01s/it]
388
+
389
+
390
+
391
+
392
+
393
+ Training...: 26%|█████████████████████████████████████▊ | 93302/352766 [28:08<288:57:36, 4.01s/it]
394
+
395
+
396
+
397
+
398
+
399
+ Training...: 26%|█████████████████████████████████████▊ | 93352/352766 [28:28<289:00:22, 4.01s/it]
400
+
401
+
402
+
403
+
404
+
405
+ Training...: 26%|█████████████████████████████████████▊ | 93406/352766 [28:48<100:19:22, 1.39s/it]
406
+
407
+
408
+
409
+
410
+
411
+ Training...: 26%|██████████████████████████████████████▏ | 93458/352766 [29:08<66:02:29, 1.09it/s]
412
+
413
+
414
+
415
+
416
+
417
+ Training...: 27%|██████████████████████████████████████▏ | 93508/352766 [29:28<66:03:24, 1.09it/s]
418
+
419
+
420
+
421
+
422
+
423
+ Training...: 27%|██████████████████████████████████████▏ | 93560/352766 [29:48<45:15:59, 1.59it/s]
424
+
425
+
426
+
427
+
428
+
429
+ Training...: 27%|██████████████████████████████████████▏ | 93610/352766 [30:08<45:16:44, 1.59it/s]
430
+
431
+
432
+
433
+
434
+
435
+ Training...: 27%|█████████████████████████████████████▉ | 93652/352766 [30:28<288:28:02, 4.01s/it]
436
+
437
+
438
+
439
+
440
+
441
+ Training...: 27%|█████████████████████████████████████▉ | 93704/352766 [30:48<161:32:21, 2.24s/it]
442
+
443
+
444
+
445
+
446
+
447
+ Training...: 27%|██████████████████████████████████████ | 93754/352766 [31:08<161:31:25, 2.25s/it]
448
+
449
+
450
+
451
+
452
+
453
+ Training...: 27%|██████████████████████████████████████ | 93804/352766 [31:28<161:29:49, 2.25s/it]
454
+
455
+
456
+
457
+
458
+
459
+ Training...: 27%|██████████████████████████████████████▎ | 93856/352766 [31:48<99:59:56, 1.39s/it]
460
+
461
+
462
+
463
+
464
+
465
+ Training...: 27%|██████████████████████████████████████ | 93906/352766 [32:08<100:12:30, 1.39s/it]
466
+
467
+
468
+
469
+
470
+
471
+ Training...: 27%|██████████████████████████████████████ | 93956/352766 [32:28<100:46:26, 1.40s/it]
472
+
473
+
474
+
475
+
476
+
477
+ Training...: 27%|██████████████████████████████████████▎ | 94001/352766 [32:48<26:38:53, 2.70it/s]
478
+ Step... (4000 | Loss: 2.5976178646087646, Learning Rate: 2.6376830646768212e-05)
479
+
480
+
481
+
482
+
483
+
484
+
485
+
486
+
487
+
488
+
489
+
490
+
491
+
492
+
493
+
494
+
495
+
496
+
497
+
498
+
499
+
500
+
501
+
502
+
503
+
504
+
505
+
506
+
507
+
508
+
509
+
510
+
511
+
512
+
513
+
514
+
515
+
516
+
517
+
518
+
519
+
520
+ Training...: 27%|██████████████████████████████████████▍ | 94051/352766 [34:15<27:00:04, 2.66it/s]
521
+
522
+
523
+
524
+
525
+ Training...: 27%|██████████████████████████████████████▍ | 94101/352766 [34:35<26:33:43, 2.71it/s]
526
+
527
+
528
+
529
+
530
+ Training...: 27%|██████████████████████████████████████▍ | 94151/352766 [34:55<26:51:02, 2.68it/s]
531
+
532
+
533
+
534
+
535
+ Training...: 27%|██████████████████████████████████████▍ | 94201/352766 [35:15<26:42:34, 2.69it/s]
536
+
537
+
538
+
539
+
540
+ Training...: 27%|██████████████████████████████████████▍ | 94251/352766 [35:35<26:50:54, 2.67it/s]
541
+
542
+
543
+
544
+
545
+ Training...: 27%|██████████████████████████████████████▍ | 94301/352766 [35:55<26:34:16, 2.70it/s]
546
+
547
+
548
+
549
+
550
+ Training...: 27%|██████████████████████████████████████▌ | 94351/352766 [36:15<26:49:47, 2.68it/s]
551
+
552
+
553
+
554
+
555
+ Training...: 27%|██████████████████████████████████████▌ | 94401/352766 [36:35<26:52:57, 2.67it/s]
556
+
557
+
558
+
559
+
560
+ Training...: 27%|██████████████████████████████████████▌ | 94451/352766 [36:55<26:40:59, 2.69it/s]
561
+
562
+
563
+
564
+
565
+ Training...: 27%|██████████████████████████████████████▌ | 94501/352766 [37:15<26:50:11, 2.67it/s]
566
+
567
+
wandb/run-20210715_142929-jw5go9rv/files/requirements.txt ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ absl-py==0.13.0
2
+ aiohttp==3.7.4.post0
3
+ astunparse==1.6.3
4
+ async-timeout==3.0.1
5
+ attrs==21.2.0
6
+ cachetools==4.2.2
7
+ certifi==2021.5.30
8
+ chardet==4.0.0
9
+ charset-normalizer==2.0.1
10
+ chex==0.0.8
11
+ click==8.0.1
12
+ configparser==5.0.2
13
+ cycler==0.10.0
14
+ datasets==1.9.1.dev0
15
+ dill==0.3.4
16
+ dm-tree==0.1.6
17
+ docker-pycreds==0.4.0
18
+ filelock==3.0.12
19
+ flatbuffers==1.12
20
+ flax==0.3.4
21
+ fsspec==2021.7.0
22
+ gast==0.4.0
23
+ gitdb==4.0.7
24
+ gitpython==3.1.18
25
+ google-auth-oauthlib==0.4.4
26
+ google-auth==1.32.1
27
+ google-pasta==0.2.0
28
+ grpcio==1.34.1
29
+ h5py==3.1.0
30
+ huggingface-hub==0.0.12
31
+ idna==3.2
32
+ install==1.3.4
33
+ jax==0.2.17
34
+ jaxlib==0.1.68
35
+ joblib==1.0.1
36
+ keras-nightly==2.5.0.dev2021032900
37
+ keras-preprocessing==1.1.2
38
+ kiwisolver==1.3.1
39
+ libtpu-nightly==0.1.dev20210615
40
+ markdown==3.3.4
41
+ matplotlib==3.4.2
42
+ msgpack==1.0.2
43
+ multidict==5.1.0
44
+ multiprocess==0.70.12.2
45
+ numpy==1.19.5
46
+ oauthlib==3.1.1
47
+ opt-einsum==3.3.0
48
+ optax==0.0.9
49
+ packaging==21.0
50
+ pandas==1.3.0
51
+ pathtools==0.1.2
52
+ pillow==8.3.1
53
+ pip==20.0.2
54
+ pkg-resources==0.0.0
55
+ promise==2.3
56
+ protobuf==3.17.3
57
+ psutil==5.8.0
58
+ pyarrow==4.0.1
59
+ pyasn1-modules==0.2.8
60
+ pyasn1==0.4.8
61
+ pyparsing==2.4.7
62
+ python-dateutil==2.8.1
63
+ pytz==2021.1
64
+ pyyaml==5.4.1
65
+ regex==2021.7.6
66
+ requests-oauthlib==1.3.0
67
+ requests==2.26.0
68
+ rsa==4.7.2
69
+ sacremoses==0.0.45
70
+ scipy==1.7.0
71
+ sentry-sdk==1.3.0
72
+ setuptools==44.0.0
73
+ shortuuid==1.0.1
74
+ six==1.15.0
75
+ smmap==4.0.0
76
+ subprocess32==3.5.4
77
+ tensorboard-data-server==0.6.1
78
+ tensorboard-plugin-wit==1.8.0
79
+ tensorboard==2.5.0
80
+ tensorflow-estimator==2.5.0
81
+ tensorflow==2.5.0
82
+ termcolor==1.1.0
83
+ tokenizers==0.10.3
84
+ toolz==0.11.1
85
+ tqdm==4.61.2
86
+ transformers==4.9.0.dev0
87
+ typing-extensions==3.7.4.3
88
+ urllib3==1.26.6
89
+ wandb==0.10.33
90
+ werkzeug==2.0.1
91
+ wheel==0.36.2
92
+ wrapt==1.12.1
93
+ xxhash==2.0.2
94
+ yarl==1.6.3
wandb/run-20210715_142929-jw5go9rv/files/wandb-metadata.json ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-5.4.0-1043-gcp-x86_64-with-glibc2.29",
3
+ "python": "3.8.10",
4
+ "heartbeatAt": "2021-07-15T14:29:31.841702",
5
+ "startedAt": "2021-07-15T14:29:29.769014",
6
+ "docker": null,
7
+ "cpu_count": 96,
8
+ "cuda": null,
9
+ "args": [
10
+ "--push_to_hub",
11
+ "--output_dir=./",
12
+ "--model_type=big_bird",
13
+ "--config_name=./",
14
+ "--tokenizer_name=./",
15
+ "--max_seq_length=4096",
16
+ "--weight_decay=0.0095",
17
+ "--warmup_steps=10000",
18
+ "--overwrite_output_dir",
19
+ "--adam_beta1=0.9",
20
+ "--adam_beta2=0.98",
21
+ "--logging_steps=50",
22
+ "--eval_steps=2000",
23
+ "--num_train_epochs=2",
24
+ "--preprocessing_num_workers=96",
25
+ "--save_steps=10000",
26
+ "--learning_rate=3e-5",
27
+ "--per_device_train_batch_size=1",
28
+ "--per_device_eval_batch_size=1",
29
+ "--save_total_limit=5",
30
+ "--max_eval_samples=5000",
31
+ "--resume_from_checkpoint=./"
32
+ ],
33
+ "state": "running",
34
+ "program": "./run_mlm_flax_no_accum.py",
35
+ "codePath": "run_mlm_flax_no_accum.py",
36
+ "git": {
37
+ "remote": "https://huggingface.co/flax-community/pino-roberta-base",
38
+ "commit": "0609cb016dd30db635378fb0cd251ec35eed4e53"
39
+ },
40
+ "email": null,
41
+ "root": "/home/dat/pino-roberta-base",
42
+ "host": "t1v-n-f5c06ea1-w-0",
43
+ "username": "dat",
44
+ "executable": "/home/dat/pino/bin/python"
45
+ }
wandb/run-20210715_142929-jw5go9rv/files/wandb-summary.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"training_step": 4500, "learning_rate": 2.635526288941037e-05, "train_loss": 2.548762559890747, "_runtime": 2596, "_timestamp": 1626361965, "_step": 91, "eval_step": 4000, "eval_accuracy": 0.5539451241493225, "eval_loss": 2.4207205772399902}
wandb/run-20210715_142929-jw5go9rv/logs/debug-internal.log ADDED
The diff for this file is too large to render. See raw diff
 
wandb/run-20210715_142929-jw5go9rv/logs/debug.log ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2021-07-15 14:29:29,770 INFO MainThread:699645 [wandb_setup.py:_flush():69] setting env: {}
2
+ 2021-07-15 14:29:29,771 INFO MainThread:699645 [wandb_setup.py:_flush():69] setting login settings: {}
3
+ 2021-07-15 14:29:29,771 INFO MainThread:699645 [wandb_init.py:_log_setup():337] Logging user logs to /home/dat/pino-roberta-base/wandb/run-20210715_142929-jw5go9rv/logs/debug.log
4
+ 2021-07-15 14:29:29,771 INFO MainThread:699645 [wandb_init.py:_log_setup():338] Logging internal logs to /home/dat/pino-roberta-base/wandb/run-20210715_142929-jw5go9rv/logs/debug-internal.log
5
+ 2021-07-15 14:29:29,771 INFO MainThread:699645 [wandb_init.py:init():370] calling init triggers
6
+ 2021-07-15 14:29:29,771 INFO MainThread:699645 [wandb_init.py:init():375] wandb.init called with sweep_config: {}
7
+ config: {}
8
+ 2021-07-15 14:29:29,771 INFO MainThread:699645 [wandb_init.py:init():419] starting backend
9
+ 2021-07-15 14:29:29,771 INFO MainThread:699645 [backend.py:_multiprocessing_setup():70] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
10
+ 2021-07-15 14:29:29,831 INFO MainThread:699645 [backend.py:ensure_launched():135] starting backend process...
11
+ 2021-07-15 14:29:29,891 INFO MainThread:699645 [backend.py:ensure_launched():139] started backend process with pid: 700904
12
+ 2021-07-15 14:29:29,893 INFO MainThread:699645 [wandb_init.py:init():424] backend started and connected
13
+ 2021-07-15 14:29:29,896 INFO MainThread:699645 [wandb_init.py:init():472] updated telemetry
14
+ 2021-07-15 14:29:29,897 INFO MainThread:699645 [wandb_init.py:init():491] communicating current version
15
+ 2021-07-15 14:29:30,532 INFO MainThread:699645 [wandb_init.py:init():496] got version response
16
+ 2021-07-15 14:29:30,532 INFO MainThread:699645 [wandb_init.py:init():504] communicating run to backend with 30 second timeout
17
+ 2021-07-15 14:29:30,726 INFO MainThread:699645 [wandb_init.py:init():529] starting run threads in backend
18
+ 2021-07-15 14:29:31,880 INFO MainThread:699645 [wandb_run.py:_console_start():1623] atexit reg
19
+ 2021-07-15 14:29:31,880 INFO MainThread:699645 [wandb_run.py:_redirect():1497] redirect: SettingsConsole.REDIRECT
20
+ 2021-07-15 14:29:31,881 INFO MainThread:699645 [wandb_run.py:_redirect():1502] Redirecting console.
21
+ 2021-07-15 14:29:31,883 INFO MainThread:699645 [wandb_run.py:_redirect():1558] Redirects installed.
22
+ 2021-07-15 14:29:31,884 INFO MainThread:699645 [wandb_init.py:init():554] run started, returning control to user process
23
+ 2021-07-15 14:29:31,889 INFO MainThread:699645 [wandb_run.py:_config_callback():872] config_cb None None {'output_dir': './', 'overwrite_output_dir': True, 'do_train': False, 'do_eval': False, 'do_predict': False, 'evaluation_strategy': 'IntervalStrategy.NO', 'prediction_loss_only': False, 'per_device_train_batch_size': 1, 'per_device_eval_batch_size': 1, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'learning_rate': 3e-05, 'weight_decay': 0.0095, 'adam_beta1': 0.9, 'adam_beta2': 0.98, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 2.0, 'max_steps': -1, 'lr_scheduler_type': 'SchedulerType.LINEAR', 'warmup_ratio': 0.0, 'warmup_steps': 10000, 'log_level': -1, 'log_level_replica': -1, 'log_on_each_node': True, 'logging_dir': './runs/Jul15_14-29-21_t1v-n-f5c06ea1-w-0', 'logging_strategy': 'IntervalStrategy.STEPS', 'logging_first_step': False, 'logging_steps': 50, 'save_strategy': 'IntervalStrategy.STEPS', 'save_steps': 10000, 'save_total_limit': 5, 'save_on_each_node': False, 'no_cuda': False, 'seed': 42, 'fp16': False, 'fp16_opt_level': 'O1', 'fp16_backend': 'auto', 'fp16_full_eval': False, 'local_rank': -1, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 2000, 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': './', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'sharded_ddp': [], 'deepspeed': None, 'label_smoothing_factor': 0.0, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'dataloader_pin_memory': True, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': './', 'push_to_hub_model_id': '', 'push_to_hub_organization': None, 'push_to_hub_token': None, 'mp_parameters': ''}
24
+ 2021-07-15 14:29:31,891 INFO MainThread:699645 [wandb_run.py:_config_callback():872] config_cb None None {'model_name_or_path': None, 'model_type': 'big_bird', 'config_name': './', 'tokenizer_name': './', 'cache_dir': None, 'use_fast_tokenizer': True, 'dtype': 'float32'}
25
+ 2021-07-15 14:29:31,893 INFO MainThread:699645 [wandb_run.py:_config_callback():872] config_cb None None {'dataset_name': None, 'dataset_config_name': None, 'train_ref_file': None, 'validation_ref_file': None, 'overwrite_cache': False, 'validation_split_percentage': 5, 'max_seq_length': 4096, 'preprocessing_num_workers': 96, 'mlm_probability': 0.15, 'pad_to_max_length': False, 'line_by_line': False, 'max_eval_samples': 5000}
26
+ 2021-07-15 15:12:50,784 INFO MainThread:699645 [wandb_run.py:_atexit_cleanup():1593] got exitcode: 255
27
+ 2021-07-15 15:12:50,785 INFO MainThread:699645 [wandb_run.py:_restore():1565] restore
wandb/run-20210715_142929-jw5go9rv/run-jw5go9rv.wandb ADDED
Binary file (202 kB). View file
 
wandb/run-20210715_151452-1w0sb5ma/files/config.yaml ADDED
@@ -0,0 +1,301 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ wandb_version: 1
2
+
3
+ _wandb:
4
+ desc: null
5
+ value:
6
+ cli_version: 0.10.33
7
+ framework: huggingface
8
+ huggingface_version: 4.9.0.dev0
9
+ is_jupyter_run: false
10
+ is_kaggle_kernel: false
11
+ python_version: 3.8.10
12
+ t:
13
+ 1:
14
+ - 3
15
+ - 11
16
+ 4: 3.8.10
17
+ 5: 0.10.33
18
+ 6: 4.9.0.dev0
19
+ 8:
20
+ - 5
21
+ adafactor:
22
+ desc: null
23
+ value: false
24
+ adam_beta1:
25
+ desc: null
26
+ value: 0.9
27
+ adam_beta2:
28
+ desc: null
29
+ value: 0.98
30
+ adam_epsilon:
31
+ desc: null
32
+ value: 1.0e-08
33
+ cache_dir:
34
+ desc: null
35
+ value: null
36
+ config_name:
37
+ desc: null
38
+ value: ./
39
+ dataloader_drop_last:
40
+ desc: null
41
+ value: false
42
+ dataloader_num_workers:
43
+ desc: null
44
+ value: 0
45
+ dataloader_pin_memory:
46
+ desc: null
47
+ value: true
48
+ dataset_config_name:
49
+ desc: null
50
+ value: null
51
+ dataset_name:
52
+ desc: null
53
+ value: null
54
+ ddp_find_unused_parameters:
55
+ desc: null
56
+ value: null
57
+ debug:
58
+ desc: null
59
+ value: []
60
+ deepspeed:
61
+ desc: null
62
+ value: null
63
+ disable_tqdm:
64
+ desc: null
65
+ value: false
66
+ do_eval:
67
+ desc: null
68
+ value: false
69
+ do_predict:
70
+ desc: null
71
+ value: false
72
+ do_train:
73
+ desc: null
74
+ value: false
75
+ dtype:
76
+ desc: null
77
+ value: float32
78
+ eval_accumulation_steps:
79
+ desc: null
80
+ value: null
81
+ eval_steps:
82
+ desc: null
83
+ value: 4000
84
+ evaluation_strategy:
85
+ desc: null
86
+ value: IntervalStrategy.NO
87
+ fp16:
88
+ desc: null
89
+ value: false
90
+ fp16_backend:
91
+ desc: null
92
+ value: auto
93
+ fp16_full_eval:
94
+ desc: null
95
+ value: false
96
+ fp16_opt_level:
97
+ desc: null
98
+ value: O1
99
+ gradient_accumulation_steps:
100
+ desc: null
101
+ value: 1
102
+ greater_is_better:
103
+ desc: null
104
+ value: null
105
+ group_by_length:
106
+ desc: null
107
+ value: false
108
+ ignore_data_skip:
109
+ desc: null
110
+ value: false
111
+ label_names:
112
+ desc: null
113
+ value: null
114
+ label_smoothing_factor:
115
+ desc: null
116
+ value: 0.0
117
+ learning_rate:
118
+ desc: null
119
+ value: 2.64e-05
120
+ length_column_name:
121
+ desc: null
122
+ value: length
123
+ line_by_line:
124
+ desc: null
125
+ value: false
126
+ load_best_model_at_end:
127
+ desc: null
128
+ value: false
129
+ local_rank:
130
+ desc: null
131
+ value: -1
132
+ log_level:
133
+ desc: null
134
+ value: -1
135
+ log_level_replica:
136
+ desc: null
137
+ value: -1
138
+ log_on_each_node:
139
+ desc: null
140
+ value: true
141
+ logging_dir:
142
+ desc: null
143
+ value: ./runs/Jul15_15-14-44_t1v-n-f5c06ea1-w-0
144
+ logging_first_step:
145
+ desc: null
146
+ value: false
147
+ logging_steps:
148
+ desc: null
149
+ value: 50
150
+ logging_strategy:
151
+ desc: null
152
+ value: IntervalStrategy.STEPS
153
+ lr_scheduler_type:
154
+ desc: null
155
+ value: SchedulerType.LINEAR
156
+ max_eval_samples:
157
+ desc: null
158
+ value: 4000
159
+ max_grad_norm:
160
+ desc: null
161
+ value: 1.0
162
+ max_seq_length:
163
+ desc: null
164
+ value: 4096
165
+ max_steps:
166
+ desc: null
167
+ value: -1
168
+ metric_for_best_model:
169
+ desc: null
170
+ value: null
171
+ mlm_probability:
172
+ desc: null
173
+ value: 0.15
174
+ model_name_or_path:
175
+ desc: null
176
+ value: null
177
+ model_type:
178
+ desc: null
179
+ value: big_bird
180
+ mp_parameters:
181
+ desc: null
182
+ value: ''
183
+ no_cuda:
184
+ desc: null
185
+ value: false
186
+ num_train_epochs:
187
+ desc: null
188
+ value: 5.0
189
+ output_dir:
190
+ desc: null
191
+ value: ./
192
+ overwrite_cache:
193
+ desc: null
194
+ value: false
195
+ overwrite_output_dir:
196
+ desc: null
197
+ value: true
198
+ pad_to_max_length:
199
+ desc: null
200
+ value: false
201
+ past_index:
202
+ desc: null
203
+ value: -1
204
+ per_device_eval_batch_size:
205
+ desc: null
206
+ value: 1
207
+ per_device_train_batch_size:
208
+ desc: null
209
+ value: 1
210
+ per_gpu_eval_batch_size:
211
+ desc: null
212
+ value: null
213
+ per_gpu_train_batch_size:
214
+ desc: null
215
+ value: null
216
+ prediction_loss_only:
217
+ desc: null
218
+ value: false
219
+ preprocessing_num_workers:
220
+ desc: null
221
+ value: 96
222
+ push_to_hub:
223
+ desc: null
224
+ value: true
225
+ push_to_hub_model_id:
226
+ desc: null
227
+ value: ''
228
+ push_to_hub_organization:
229
+ desc: null
230
+ value: null
231
+ push_to_hub_token:
232
+ desc: null
233
+ value: null
234
+ remove_unused_columns:
235
+ desc: null
236
+ value: true
237
+ report_to:
238
+ desc: null
239
+ value:
240
+ - tensorboard
241
+ - wandb
242
+ resume_from_checkpoint:
243
+ desc: null
244
+ value: ./
245
+ run_name:
246
+ desc: null
247
+ value: ./
248
+ save_on_each_node:
249
+ desc: null
250
+ value: false
251
+ save_steps:
252
+ desc: null
253
+ value: 10000
254
+ save_strategy:
255
+ desc: null
256
+ value: IntervalStrategy.STEPS
257
+ save_total_limit:
258
+ desc: null
259
+ value: 5
260
+ seed:
261
+ desc: null
262
+ value: 42
263
+ sharded_ddp:
264
+ desc: null
265
+ value: []
266
+ skip_memory_metrics:
267
+ desc: null
268
+ value: true
269
+ tokenizer_name:
270
+ desc: null
271
+ value: ./
272
+ tpu_metrics_debug:
273
+ desc: null
274
+ value: false
275
+ tpu_num_cores:
276
+ desc: null
277
+ value: null
278
+ train_ref_file:
279
+ desc: null
280
+ value: null
281
+ use_fast_tokenizer:
282
+ desc: null
283
+ value: true
284
+ use_legacy_prediction_loop:
285
+ desc: null
286
+ value: false
287
+ validation_ref_file:
288
+ desc: null
289
+ value: null
290
+ validation_split_percentage:
291
+ desc: null
292
+ value: 5
293
+ warmup_ratio:
294
+ desc: null
295
+ value: 0.0
296
+ warmup_steps:
297
+ desc: null
298
+ value: 10000
299
+ weight_decay:
300
+ desc: null
301
+ value: 0.0095
wandb/run-20210715_151452-1w0sb5ma/files/output.log ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [15:15:07] - INFO - absl - Restoring checkpoint from ./checkpoint_90000
2
+ tcmalloc: large alloc 1530273792 bytes == 0x99e4e000 @ 0x7f457f408680 0x7f457f429824 0x5b9a14 0x50b2ae 0x50cb1b 0x5a6f17 0x5f3010 0x56fd36 0x568d9a 0x5f5b33 0x56aadf 0x568d9a 0x68cdc7 0x67e161 0x67e1df 0x67e281 0x67e627 0x6b6e62 0x6b71ed 0x7f457f21d0b3 0x5f96de
3
+ /home/dat/pino/lib/python3.8/site-packages/jax/lib/xla_bridge.py:386: UserWarning: jax.host_count has been renamed to jax.process_count. This alias will eventually be removed; please update your code.
4
+ warnings.warn(
5
+ /home/dat/pino/lib/python3.8/site-packages/jax/lib/xla_bridge.py:373: UserWarning: jax.host_id has been renamed to jax.process_index. This alias will eventually be removed; please update your code.
6
+ warnings.warn(
7
+ Epoch ... (1/5): 0%| | 0/5 [00:00<?, ?it/s]
8
+
9
+
10
+
11
+
12
+
13
+
14
+
15
+
16
+
17
+
18
+
19
+
20
+
21
+
22
+
23
+
24
+
25
+
26
+
27
+
28
+
29
+
30
+
31
+
32
+
33
+
34
+
35
+
36
+
37
+
38
+
39
+
40
+
41
+
42
+
43
+
44
+
45
+
46
+
47
+
48
+
49
+
50
+
51
+
52
+
53
+
54
+
55
+
56
+
57
+
58
+
59
+
60
+
61
+
62
+
63
+
64
+
65
+
66
+
67
+
68
+
69
+
70
+
71
+
72
+
73
+
74
+
75
+
76
+
77
+
78
+
79
+
80
+
81
+
82
+
83
+
84
+
wandb/run-20210715_151452-1w0sb5ma/files/requirements.txt ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ absl-py==0.13.0
2
+ aiohttp==3.7.4.post0
3
+ astunparse==1.6.3
4
+ async-timeout==3.0.1
5
+ attrs==21.2.0
6
+ cachetools==4.2.2
7
+ certifi==2021.5.30
8
+ chardet==4.0.0
9
+ charset-normalizer==2.0.1
10
+ chex==0.0.8
11
+ click==8.0.1
12
+ configparser==5.0.2
13
+ cycler==0.10.0
14
+ datasets==1.9.1.dev0
15
+ dill==0.3.4
16
+ dm-tree==0.1.6
17
+ docker-pycreds==0.4.0
18
+ filelock==3.0.12
19
+ flatbuffers==1.12
20
+ flax==0.3.4
21
+ fsspec==2021.7.0
22
+ gast==0.4.0
23
+ gitdb==4.0.7
24
+ gitpython==3.1.18
25
+ google-auth-oauthlib==0.4.4
26
+ google-auth==1.32.1
27
+ google-pasta==0.2.0
28
+ grpcio==1.34.1
29
+ h5py==3.1.0
30
+ huggingface-hub==0.0.12
31
+ idna==3.2
32
+ install==1.3.4
33
+ jax==0.2.17
34
+ jaxlib==0.1.68
35
+ joblib==1.0.1
36
+ keras-nightly==2.5.0.dev2021032900
37
+ keras-preprocessing==1.1.2
38
+ kiwisolver==1.3.1
39
+ libtpu-nightly==0.1.dev20210615
40
+ markdown==3.3.4
41
+ matplotlib==3.4.2
42
+ msgpack==1.0.2
43
+ multidict==5.1.0
44
+ multiprocess==0.70.12.2
45
+ numpy==1.19.5
46
+ oauthlib==3.1.1
47
+ opt-einsum==3.3.0
48
+ optax==0.0.9
49
+ packaging==21.0
50
+ pandas==1.3.0
51
+ pathtools==0.1.2
52
+ pillow==8.3.1
53
+ pip==20.0.2
54
+ pkg-resources==0.0.0
55
+ promise==2.3
56
+ protobuf==3.17.3
57
+ psutil==5.8.0
58
+ pyarrow==4.0.1
59
+ pyasn1-modules==0.2.8
60
+ pyasn1==0.4.8
61
+ pyparsing==2.4.7
62
+ python-dateutil==2.8.1
63
+ pytz==2021.1
64
+ pyyaml==5.4.1
65
+ regex==2021.7.6
66
+ requests-oauthlib==1.3.0
67
+ requests==2.26.0
68
+ rsa==4.7.2
69
+ sacremoses==0.0.45
70
+ scipy==1.7.0
71
+ sentry-sdk==1.3.0
72
+ setuptools==44.0.0
73
+ shortuuid==1.0.1
74
+ six==1.15.0
75
+ smmap==4.0.0
76
+ subprocess32==3.5.4
77
+ tensorboard-data-server==0.6.1
78
+ tensorboard-plugin-wit==1.8.0
79
+ tensorboard==2.5.0
80
+ tensorflow-estimator==2.5.0
81
+ tensorflow==2.5.0
82
+ termcolor==1.1.0
83
+ tokenizers==0.10.3
84
+ toolz==0.11.1
85
+ tqdm==4.61.2
86
+ transformers==4.9.0.dev0
87
+ typing-extensions==3.7.4.3
88
+ urllib3==1.26.6
89
+ wandb==0.10.33
90
+ werkzeug==2.0.1
91
+ wheel==0.36.2
92
+ wrapt==1.12.1
93
+ xxhash==2.0.2
94
+ yarl==1.6.3
wandb/run-20210715_151452-1w0sb5ma/files/wandb-metadata.json ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-5.4.0-1043-gcp-x86_64-with-glibc2.29",
3
+ "python": "3.8.10",
4
+ "heartbeatAt": "2021-07-15T15:14:54.759442",
5
+ "startedAt": "2021-07-15T15:14:52.748648",
6
+ "docker": null,
7
+ "cpu_count": 96,
8
+ "cuda": null,
9
+ "args": [
10
+ "--push_to_hub",
11
+ "--output_dir=./",
12
+ "--model_type=big_bird",
13
+ "--config_name=./",
14
+ "--tokenizer_name=./",
15
+ "--max_seq_length=4096",
16
+ "--weight_decay=0.0095",
17
+ "--warmup_steps=10000",
18
+ "--overwrite_output_dir",
19
+ "--adam_beta1=0.9",
20
+ "--adam_beta2=0.98",
21
+ "--logging_steps=50",
22
+ "--eval_steps=4000",
23
+ "--num_train_epochs=5",
24
+ "--preprocessing_num_workers=96",
25
+ "--save_steps=10000",
26
+ "--learning_rate=2.64e-5",
27
+ "--per_device_train_batch_size=1",
28
+ "--per_device_eval_batch_size=1",
29
+ "--save_total_limit=5",
30
+ "--max_eval_samples=4000",
31
+ "--resume_from_checkpoint=./"
32
+ ],
33
+ "state": "running",
34
+ "program": "./run_mlm_flax_no_accum.py",
35
+ "codePath": "run_mlm_flax_no_accum.py",
36
+ "git": {
37
+ "remote": "https://huggingface.co/flax-community/pino-roberta-base",
38
+ "commit": "0609cb016dd30db635378fb0cd251ec35eed4e53"
39
+ },
40
+ "email": null,
41
+ "root": "/home/dat/pino-roberta-base",
42
+ "host": "t1v-n-f5c06ea1-w-0",
43
+ "username": "dat",
44
+ "executable": "/home/dat/pino/bin/python"
45
+ }
wandb/run-20210715_151452-1w0sb5ma/files/wandb-summary.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {}
wandb/run-20210715_151452-1w0sb5ma/logs/debug-internal.log ADDED
@@ -0,0 +1,240 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2021-07-15 15:14:53,445 INFO MainThread:705712 [internal.py:wandb_internal():88] W&B internal server running at pid: 705712, started at: 2021-07-15 15:14:53.445132
2
+ 2021-07-15 15:14:53,447 DEBUG HandlerThread:705712 [handler.py:handle_request():124] handle_request: check_version
3
+ 2021-07-15 15:14:53,448 INFO WriterThread:705712 [datastore.py:open_for_write():80] open: /home/dat/pino-roberta-base/wandb/run-20210715_151452-1w0sb5ma/run-1w0sb5ma.wandb
4
+ 2021-07-15 15:14:53,449 DEBUG SenderThread:705712 [sender.py:send():179] send: header
5
+ 2021-07-15 15:14:53,449 DEBUG SenderThread:705712 [sender.py:send_request():193] send_request: check_version
6
+ 2021-07-15 15:14:53,487 DEBUG SenderThread:705712 [sender.py:send():179] send: run
7
+ 2021-07-15 15:14:53,663 INFO SenderThread:705712 [dir_watcher.py:__init__():168] watching files in: /home/dat/pino-roberta-base/wandb/run-20210715_151452-1w0sb5ma/files
8
+ 2021-07-15 15:14:53,663 INFO SenderThread:705712 [sender.py:_start_run_threads():716] run started: 1w0sb5ma with start time 1626362092
9
+ 2021-07-15 15:14:53,663 DEBUG SenderThread:705712 [sender.py:send():179] send: summary
10
+ 2021-07-15 15:14:53,664 DEBUG HandlerThread:705712 [handler.py:handle_request():124] handle_request: run_start
11
+ 2021-07-15 15:14:53,664 INFO SenderThread:705712 [sender.py:_save_file():841] saving file wandb-summary.json with policy end
12
+ 2021-07-15 15:14:54,666 INFO Thread-8 :705712 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210715_151452-1w0sb5ma/files/wandb-summary.json
13
+ 2021-07-15 15:14:54,759 DEBUG HandlerThread:705712 [meta.py:__init__():39] meta init
14
+ 2021-07-15 15:14:54,759 DEBUG HandlerThread:705712 [meta.py:__init__():53] meta init done
15
+ 2021-07-15 15:14:54,759 DEBUG HandlerThread:705712 [meta.py:probe():210] probe
16
+ 2021-07-15 15:14:54,760 DEBUG HandlerThread:705712 [meta.py:_setup_git():200] setup git
17
+ 2021-07-15 15:14:54,790 DEBUG HandlerThread:705712 [meta.py:_setup_git():207] setup git done
18
+ 2021-07-15 15:14:54,790 DEBUG HandlerThread:705712 [meta.py:_save_pip():57] save pip
19
+ 2021-07-15 15:14:54,790 DEBUG HandlerThread:705712 [meta.py:_save_pip():71] save pip done
20
+ 2021-07-15 15:14:54,791 DEBUG HandlerThread:705712 [meta.py:probe():252] probe done
21
+ 2021-07-15 15:14:54,794 DEBUG SenderThread:705712 [sender.py:send():179] send: files
22
+ 2021-07-15 15:14:54,794 INFO SenderThread:705712 [sender.py:_save_file():841] saving file wandb-metadata.json with policy now
23
+ 2021-07-15 15:14:54,802 DEBUG HandlerThread:705712 [handler.py:handle_request():124] handle_request: stop_status
24
+ 2021-07-15 15:14:54,802 DEBUG SenderThread:705712 [sender.py:send_request():193] send_request: stop_status
25
+ 2021-07-15 15:14:54,933 DEBUG SenderThread:705712 [sender.py:send():179] send: config
26
+ 2021-07-15 15:14:54,934 DEBUG SenderThread:705712 [sender.py:send():179] send: config
27
+ 2021-07-15 15:14:54,934 DEBUG SenderThread:705712 [sender.py:send():179] send: config
28
+ 2021-07-15 15:14:55,295 INFO Thread-11 :705712 [upload_job.py:push():137] Uploaded file /tmp/tmpoinq2v7vwandb/yypwdo2o-wandb-metadata.json
29
+ 2021-07-15 15:14:55,665 INFO Thread-8 :705712 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210715_151452-1w0sb5ma/files/output.log
30
+ 2021-07-15 15:14:55,666 INFO Thread-8 :705712 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210715_151452-1w0sb5ma/files/requirements.txt
31
+ 2021-07-15 15:14:55,666 INFO Thread-8 :705712 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210715_151452-1w0sb5ma/files/wandb-metadata.json
32
+ 2021-07-15 15:15:09,950 DEBUG HandlerThread:705712 [handler.py:handle_request():124] handle_request: stop_status
33
+ 2021-07-15 15:15:09,950 DEBUG SenderThread:705712 [sender.py:send_request():193] send_request: stop_status
34
+ 2021-07-15 15:15:11,672 INFO Thread-8 :705712 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_151452-1w0sb5ma/files/output.log
35
+ 2021-07-15 15:15:13,673 INFO Thread-8 :705712 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_151452-1w0sb5ma/files/output.log
36
+ 2021-07-15 15:15:22,842 DEBUG SenderThread:705712 [sender.py:send():179] send: stats
37
+ 2021-07-15 15:15:24,678 INFO Thread-8 :705712 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_151452-1w0sb5ma/files/config.yaml
38
+ 2021-07-15 15:15:25,119 DEBUG HandlerThread:705712 [handler.py:handle_request():124] handle_request: stop_status
39
+ 2021-07-15 15:15:25,119 DEBUG SenderThread:705712 [sender.py:send_request():193] send_request: stop_status
40
+ 2021-07-15 15:15:40,251 DEBUG HandlerThread:705712 [handler.py:handle_request():124] handle_request: stop_status
41
+ 2021-07-15 15:15:40,251 DEBUG SenderThread:705712 [sender.py:send_request():193] send_request: stop_status
42
+ 2021-07-15 15:15:52,920 DEBUG SenderThread:705712 [sender.py:send():179] send: stats
43
+ 2021-07-15 15:15:55,385 DEBUG HandlerThread:705712 [handler.py:handle_request():124] handle_request: stop_status
44
+ 2021-07-15 15:15:55,386 DEBUG SenderThread:705712 [sender.py:send_request():193] send_request: stop_status
45
+ 2021-07-15 15:16:10,516 DEBUG HandlerThread:705712 [handler.py:handle_request():124] handle_request: stop_status
46
+ 2021-07-15 15:16:10,516 DEBUG SenderThread:705712 [sender.py:send_request():193] send_request: stop_status
47
+ 2021-07-15 15:16:22,997 DEBUG SenderThread:705712 [sender.py:send():179] send: stats
48
+ 2021-07-15 15:16:25,648 DEBUG HandlerThread:705712 [handler.py:handle_request():124] handle_request: stop_status
49
+ 2021-07-15 15:16:25,648 DEBUG SenderThread:705712 [sender.py:send_request():193] send_request: stop_status
50
+ 2021-07-15 15:16:40,817 DEBUG HandlerThread:705712 [handler.py:handle_request():124] handle_request: stop_status
51
+ 2021-07-15 15:16:40,818 DEBUG SenderThread:705712 [sender.py:send_request():193] send_request: stop_status
52
+ 2021-07-15 15:16:53,062 DEBUG SenderThread:705712 [sender.py:send():179] send: stats
53
+ 2021-07-15 15:16:55,950 DEBUG HandlerThread:705712 [handler.py:handle_request():124] handle_request: stop_status
54
+ 2021-07-15 15:16:55,950 DEBUG SenderThread:705712 [sender.py:send_request():193] send_request: stop_status
55
+ 2021-07-15 15:17:11,081 DEBUG HandlerThread:705712 [handler.py:handle_request():124] handle_request: stop_status
56
+ 2021-07-15 15:17:11,082 DEBUG SenderThread:705712 [sender.py:send_request():193] send_request: stop_status
57
+ 2021-07-15 15:17:23,127 DEBUG SenderThread:705712 [sender.py:send():179] send: stats
58
+ 2021-07-15 15:17:26,214 DEBUG HandlerThread:705712 [handler.py:handle_request():124] handle_request: stop_status
59
+ 2021-07-15 15:17:26,214 DEBUG SenderThread:705712 [sender.py:send_request():193] send_request: stop_status
60
+ 2021-07-15 15:17:41,346 DEBUG HandlerThread:705712 [handler.py:handle_request():124] handle_request: stop_status
61
+ 2021-07-15 15:17:41,346 DEBUG SenderThread:705712 [sender.py:send_request():193] send_request: stop_status
62
+ 2021-07-15 15:17:53,196 DEBUG SenderThread:705712 [sender.py:send():179] send: stats
63
+ 2021-07-15 15:17:56,480 DEBUG HandlerThread:705712 [handler.py:handle_request():124] handle_request: stop_status
64
+ 2021-07-15 15:17:56,480 DEBUG SenderThread:705712 [sender.py:send_request():193] send_request: stop_status
65
+ 2021-07-15 15:18:11,620 DEBUG HandlerThread:705712 [handler.py:handle_request():124] handle_request: stop_status
66
+ 2021-07-15 15:18:11,620 DEBUG SenderThread:705712 [sender.py:send_request():193] send_request: stop_status
67
+ 2021-07-15 15:18:23,271 DEBUG SenderThread:705712 [sender.py:send():179] send: stats
68
+ 2021-07-15 15:18:26,752 DEBUG HandlerThread:705712 [handler.py:handle_request():124] handle_request: stop_status
69
+ 2021-07-15 15:18:26,752 DEBUG SenderThread:705712 [sender.py:send_request():193] send_request: stop_status
70
+ 2021-07-15 15:18:41,889 DEBUG HandlerThread:705712 [handler.py:handle_request():124] handle_request: stop_status
71
+ 2021-07-15 15:18:41,890 DEBUG SenderThread:705712 [sender.py:send_request():193] send_request: stop_status
72
+ 2021-07-15 15:18:53,347 DEBUG SenderThread:705712 [sender.py:send():179] send: stats
73
+ 2021-07-15 15:18:57,024 DEBUG HandlerThread:705712 [handler.py:handle_request():124] handle_request: stop_status
74
+ 2021-07-15 15:18:57,025 DEBUG SenderThread:705712 [sender.py:send_request():193] send_request: stop_status
75
+ 2021-07-15 15:19:12,154 DEBUG HandlerThread:705712 [handler.py:handle_request():124] handle_request: stop_status
76
+ 2021-07-15 15:19:12,155 DEBUG SenderThread:705712 [sender.py:send_request():193] send_request: stop_status
77
+ 2021-07-15 15:19:23,419 DEBUG SenderThread:705712 [sender.py:send():179] send: stats
78
+ 2021-07-15 15:19:27,288 DEBUG HandlerThread:705712 [handler.py:handle_request():124] handle_request: stop_status
79
+ 2021-07-15 15:19:27,289 DEBUG SenderThread:705712 [sender.py:send_request():193] send_request: stop_status
80
+ 2021-07-15 15:19:42,421 DEBUG HandlerThread:705712 [handler.py:handle_request():124] handle_request: stop_status
81
+ 2021-07-15 15:19:42,421 DEBUG SenderThread:705712 [sender.py:send_request():193] send_request: stop_status
82
+ 2021-07-15 15:19:53,494 DEBUG SenderThread:705712 [sender.py:send():179] send: stats
83
+ 2021-07-15 15:19:57,557 DEBUG HandlerThread:705712 [handler.py:handle_request():124] handle_request: stop_status
84
+ 2021-07-15 15:19:57,558 DEBUG SenderThread:705712 [sender.py:send_request():193] send_request: stop_status
85
+ 2021-07-15 15:20:12,693 DEBUG HandlerThread:705712 [handler.py:handle_request():124] handle_request: stop_status
86
+ 2021-07-15 15:20:12,693 DEBUG SenderThread:705712 [sender.py:send_request():193] send_request: stop_status
87
+ 2021-07-15 15:20:23,567 DEBUG SenderThread:705712 [sender.py:send():179] send: stats
88
+ 2021-07-15 15:20:27,838 DEBUG HandlerThread:705712 [handler.py:handle_request():124] handle_request: stop_status
89
+ 2021-07-15 15:20:27,839 DEBUG SenderThread:705712 [sender.py:send_request():193] send_request: stop_status
90
+ 2021-07-15 15:20:33,798 INFO Thread-8 :705712 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_151452-1w0sb5ma/files/output.log
91
+ 2021-07-15 15:20:42,987 DEBUG HandlerThread:705712 [handler.py:handle_request():124] handle_request: stop_status
92
+ 2021-07-15 15:20:42,988 DEBUG SenderThread:705712 [sender.py:send_request():193] send_request: stop_status
93
+ 2021-07-15 15:20:53,641 DEBUG SenderThread:705712 [sender.py:send():179] send: stats
94
+ 2021-07-15 15:20:58,142 DEBUG HandlerThread:705712 [handler.py:handle_request():124] handle_request: stop_status
95
+ 2021-07-15 15:20:58,142 DEBUG SenderThread:705712 [sender.py:send_request():193] send_request: stop_status
96
+ 2021-07-15 15:21:13,284 DEBUG HandlerThread:705712 [handler.py:handle_request():124] handle_request: stop_status
97
+ 2021-07-15 15:21:13,284 DEBUG SenderThread:705712 [sender.py:send_request():193] send_request: stop_status
98
+ 2021-07-15 15:21:23,721 DEBUG SenderThread:705712 [sender.py:send():179] send: stats
99
+ 2021-07-15 15:21:28,415 DEBUG HandlerThread:705712 [handler.py:handle_request():124] handle_request: stop_status
100
+ 2021-07-15 15:21:28,415 DEBUG SenderThread:705712 [sender.py:send_request():193] send_request: stop_status
101
+ 2021-07-15 15:21:43,547 DEBUG HandlerThread:705712 [handler.py:handle_request():124] handle_request: stop_status
102
+ 2021-07-15 15:21:43,547 DEBUG SenderThread:705712 [sender.py:send_request():193] send_request: stop_status
103
+ 2021-07-15 15:21:53,805 DEBUG SenderThread:705712 [sender.py:send():179] send: stats
104
+ 2021-07-15 15:21:58,679 DEBUG HandlerThread:705712 [handler.py:handle_request():124] handle_request: stop_status
105
+ 2021-07-15 15:21:58,679 DEBUG SenderThread:705712 [sender.py:send_request():193] send_request: stop_status
106
+ 2021-07-15 15:22:13,810 DEBUG HandlerThread:705712 [handler.py:handle_request():124] handle_request: stop_status
107
+ 2021-07-15 15:22:13,810 DEBUG SenderThread:705712 [sender.py:send_request():193] send_request: stop_status
108
+ 2021-07-15 15:22:23,885 DEBUG SenderThread:705712 [sender.py:send():179] send: stats
109
+ 2021-07-15 15:22:28,944 DEBUG HandlerThread:705712 [handler.py:handle_request():124] handle_request: stop_status
110
+ 2021-07-15 15:22:28,945 DEBUG SenderThread:705712 [sender.py:send_request():193] send_request: stop_status
111
+ 2021-07-15 15:22:34,846 INFO Thread-8 :705712 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_151452-1w0sb5ma/files/output.log
112
+ 2021-07-15 15:22:44,090 DEBUG HandlerThread:705712 [handler.py:handle_request():124] handle_request: stop_status
113
+ 2021-07-15 15:22:44,090 DEBUG SenderThread:705712 [sender.py:send_request():193] send_request: stop_status
114
+ 2021-07-15 15:22:53,961 DEBUG SenderThread:705712 [sender.py:send():179] send: stats
115
+ 2021-07-15 15:22:59,232 DEBUG HandlerThread:705712 [handler.py:handle_request():124] handle_request: stop_status
116
+ 2021-07-15 15:22:59,233 DEBUG SenderThread:705712 [sender.py:send_request():193] send_request: stop_status
117
+ 2021-07-15 15:23:14,386 DEBUG HandlerThread:705712 [handler.py:handle_request():124] handle_request: stop_status
118
+ 2021-07-15 15:23:14,387 DEBUG SenderThread:705712 [sender.py:send_request():193] send_request: stop_status
119
+ 2021-07-15 15:23:24,044 DEBUG SenderThread:705712 [sender.py:send():179] send: stats
120
+ 2021-07-15 15:23:29,516 DEBUG HandlerThread:705712 [handler.py:handle_request():124] handle_request: stop_status
121
+ 2021-07-15 15:23:29,517 DEBUG SenderThread:705712 [sender.py:send_request():193] send_request: stop_status
122
+ 2021-07-15 15:23:44,659 DEBUG HandlerThread:705712 [handler.py:handle_request():124] handle_request: stop_status
123
+ 2021-07-15 15:23:44,660 DEBUG SenderThread:705712 [sender.py:send_request():193] send_request: stop_status
124
+ 2021-07-15 15:23:54,126 DEBUG SenderThread:705712 [sender.py:send():179] send: stats
125
+ 2021-07-15 15:23:59,793 DEBUG HandlerThread:705712 [handler.py:handle_request():124] handle_request: stop_status
126
+ 2021-07-15 15:23:59,793 DEBUG SenderThread:705712 [sender.py:send_request():193] send_request: stop_status
127
+ 2021-07-15 15:24:14,932 DEBUG HandlerThread:705712 [handler.py:handle_request():124] handle_request: stop_status
128
+ 2021-07-15 15:24:14,932 DEBUG SenderThread:705712 [sender.py:send_request():193] send_request: stop_status
129
+ 2021-07-15 15:24:24,201 DEBUG SenderThread:705712 [sender.py:send():179] send: stats
130
+ 2021-07-15 15:24:30,066 DEBUG HandlerThread:705712 [handler.py:handle_request():124] handle_request: stop_status
131
+ 2021-07-15 15:24:30,066 DEBUG SenderThread:705712 [sender.py:send_request():193] send_request: stop_status
132
+ 2021-07-15 15:24:45,203 DEBUG HandlerThread:705712 [handler.py:handle_request():124] handle_request: stop_status
133
+ 2021-07-15 15:24:45,204 DEBUG SenderThread:705712 [sender.py:send_request():193] send_request: stop_status
134
+ 2021-07-15 15:24:46,903 INFO Thread-8 :705712 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_151452-1w0sb5ma/files/output.log
135
+ 2021-07-15 15:24:48,904 INFO Thread-8 :705712 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_151452-1w0sb5ma/files/output.log
136
+ 2021-07-15 15:24:50,904 INFO Thread-8 :705712 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_151452-1w0sb5ma/files/output.log
137
+ 2021-07-15 15:24:52,905 INFO Thread-8 :705712 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_151452-1w0sb5ma/files/output.log
138
+ 2021-07-15 15:24:54,284 DEBUG SenderThread:705712 [sender.py:send():179] send: stats
139
+ 2021-07-15 15:24:54,906 INFO Thread-8 :705712 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_151452-1w0sb5ma/files/output.log
140
+ 2021-07-15 15:24:56,907 INFO Thread-8 :705712 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_151452-1w0sb5ma/files/output.log
141
+ 2021-07-15 15:24:58,908 INFO Thread-8 :705712 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_151452-1w0sb5ma/files/output.log
142
+ 2021-07-15 15:25:00,366 DEBUG HandlerThread:705712 [handler.py:handle_request():124] handle_request: stop_status
143
+ 2021-07-15 15:25:00,367 DEBUG SenderThread:705712 [sender.py:send_request():193] send_request: stop_status
144
+ 2021-07-15 15:25:00,908 INFO Thread-8 :705712 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_151452-1w0sb5ma/files/output.log
145
+ 2021-07-15 15:25:02,909 INFO Thread-8 :705712 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_151452-1w0sb5ma/files/output.log
146
+ 2021-07-15 15:25:04,910 INFO Thread-8 :705712 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_151452-1w0sb5ma/files/output.log
147
+ 2021-07-15 15:25:06,911 INFO Thread-8 :705712 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_151452-1w0sb5ma/files/output.log
148
+ 2021-07-15 15:25:08,912 INFO Thread-8 :705712 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_151452-1w0sb5ma/files/output.log
149
+ 2021-07-15 15:25:10,912 INFO Thread-8 :705712 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_151452-1w0sb5ma/files/output.log
150
+ 2021-07-15 15:25:12,913 INFO Thread-8 :705712 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_151452-1w0sb5ma/files/output.log
151
+ 2021-07-15 15:25:14,914 INFO Thread-8 :705712 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_151452-1w0sb5ma/files/output.log
152
+ 2021-07-15 15:25:15,506 DEBUG HandlerThread:705712 [handler.py:handle_request():124] handle_request: stop_status
153
+ 2021-07-15 15:25:15,506 DEBUG SenderThread:705712 [sender.py:send_request():193] send_request: stop_status
154
+ 2021-07-15 15:25:16,915 INFO Thread-8 :705712 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_151452-1w0sb5ma/files/output.log
155
+ 2021-07-15 15:25:18,916 INFO Thread-8 :705712 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_151452-1w0sb5ma/files/output.log
156
+ 2021-07-15 15:25:20,917 INFO Thread-8 :705712 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_151452-1w0sb5ma/files/output.log
157
+ 2021-07-15 15:25:22,917 INFO Thread-8 :705712 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_151452-1w0sb5ma/files/output.log
158
+ 2021-07-15 15:25:24,360 DEBUG SenderThread:705712 [sender.py:send():179] send: stats
159
+ 2021-07-15 15:25:24,918 INFO Thread-8 :705712 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_151452-1w0sb5ma/files/output.log
160
+ 2021-07-15 15:25:26,919 INFO Thread-8 :705712 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_151452-1w0sb5ma/files/output.log
161
+ 2021-07-15 15:25:28,920 INFO Thread-8 :705712 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_151452-1w0sb5ma/files/output.log
162
+ 2021-07-15 15:25:30,642 DEBUG HandlerThread:705712 [handler.py:handle_request():124] handle_request: stop_status
163
+ 2021-07-15 15:25:30,642 DEBUG SenderThread:705712 [sender.py:send_request():193] send_request: stop_status
164
+ 2021-07-15 15:25:30,921 INFO Thread-8 :705712 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_151452-1w0sb5ma/files/output.log
165
+ 2021-07-15 15:25:32,921 INFO Thread-8 :705712 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_151452-1w0sb5ma/files/output.log
166
+ 2021-07-15 15:25:34,922 INFO Thread-8 :705712 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_151452-1w0sb5ma/files/output.log
167
+ 2021-07-15 15:25:36,923 INFO Thread-8 :705712 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_151452-1w0sb5ma/files/output.log
168
+ 2021-07-15 15:25:38,924 INFO Thread-8 :705712 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_151452-1w0sb5ma/files/output.log
169
+ 2021-07-15 15:25:40,924 INFO Thread-8 :705712 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_151452-1w0sb5ma/files/output.log
170
+ 2021-07-15 15:25:42,925 INFO Thread-8 :705712 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_151452-1w0sb5ma/files/output.log
171
+ 2021-07-15 15:25:44,926 INFO Thread-8 :705712 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_151452-1w0sb5ma/files/output.log
172
+ 2021-07-15 15:25:45,773 DEBUG HandlerThread:705712 [handler.py:handle_request():124] handle_request: stop_status
173
+ 2021-07-15 15:25:45,773 DEBUG SenderThread:705712 [sender.py:send_request():193] send_request: stop_status
174
+ 2021-07-15 15:25:46,927 INFO Thread-8 :705712 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_151452-1w0sb5ma/files/output.log
175
+ 2021-07-15 15:25:48,927 INFO Thread-8 :705712 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_151452-1w0sb5ma/files/output.log
176
+ 2021-07-15 15:25:50,928 INFO Thread-8 :705712 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_151452-1w0sb5ma/files/output.log
177
+ 2021-07-15 15:25:52,929 INFO Thread-8 :705712 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_151452-1w0sb5ma/files/output.log
178
+ 2021-07-15 15:25:54,433 DEBUG SenderThread:705712 [sender.py:send():179] send: stats
179
+ 2021-07-15 15:25:54,930 INFO Thread-8 :705712 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_151452-1w0sb5ma/files/output.log
180
+ 2021-07-15 15:25:56,930 INFO Thread-8 :705712 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_151452-1w0sb5ma/files/output.log
181
+ 2021-07-15 15:25:58,931 INFO Thread-8 :705712 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_151452-1w0sb5ma/files/output.log
182
+ 2021-07-15 15:26:00,906 DEBUG HandlerThread:705712 [handler.py:handle_request():124] handle_request: stop_status
183
+ 2021-07-15 15:26:00,906 DEBUG SenderThread:705712 [sender.py:send_request():193] send_request: stop_status
184
+ 2021-07-15 15:26:00,932 INFO Thread-8 :705712 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_151452-1w0sb5ma/files/output.log
185
+ 2021-07-15 15:26:02,932 INFO Thread-8 :705712 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_151452-1w0sb5ma/files/output.log
186
+ 2021-07-15 15:26:04,933 INFO Thread-8 :705712 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_151452-1w0sb5ma/files/output.log
187
+ 2021-07-15 15:26:06,934 INFO Thread-8 :705712 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_151452-1w0sb5ma/files/output.log
188
+ 2021-07-15 15:26:08,935 INFO Thread-8 :705712 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_151452-1w0sb5ma/files/output.log
189
+ 2021-07-15 15:26:10,935 INFO Thread-8 :705712 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_151452-1w0sb5ma/files/output.log
190
+ 2021-07-15 15:26:12,936 INFO Thread-8 :705712 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_151452-1w0sb5ma/files/output.log
191
+ 2021-07-15 15:26:14,937 INFO Thread-8 :705712 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_151452-1w0sb5ma/files/output.log
192
+ 2021-07-15 15:26:16,041 DEBUG HandlerThread:705712 [handler.py:handle_request():124] handle_request: stop_status
193
+ 2021-07-15 15:26:16,041 DEBUG SenderThread:705712 [sender.py:send_request():193] send_request: stop_status
194
+ 2021-07-15 15:26:16,937 INFO Thread-8 :705712 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_151452-1w0sb5ma/files/output.log
195
+ 2021-07-15 15:26:18,938 INFO Thread-8 :705712 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_151452-1w0sb5ma/files/output.log
196
+ 2021-07-15 15:26:20,939 INFO Thread-8 :705712 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_151452-1w0sb5ma/files/output.log
197
+ 2021-07-15 15:26:22,939 INFO Thread-8 :705712 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_151452-1w0sb5ma/files/output.log
198
+ 2021-07-15 15:26:24,503 DEBUG SenderThread:705712 [sender.py:send():179] send: stats
199
+ 2021-07-15 15:26:24,940 INFO Thread-8 :705712 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_151452-1w0sb5ma/files/output.log
200
+ 2021-07-15 15:26:26,941 INFO Thread-8 :705712 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_151452-1w0sb5ma/files/output.log
201
+ 2021-07-15 15:26:28,942 INFO Thread-8 :705712 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_151452-1w0sb5ma/files/output.log
202
+ 2021-07-15 15:26:30,943 INFO Thread-8 :705712 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_151452-1w0sb5ma/files/output.log
203
+ 2021-07-15 15:26:31,176 DEBUG HandlerThread:705712 [handler.py:handle_request():124] handle_request: stop_status
204
+ 2021-07-15 15:26:31,177 DEBUG SenderThread:705712 [sender.py:send_request():193] send_request: stop_status
205
+ 2021-07-15 15:26:32,943 INFO Thread-8 :705712 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_151452-1w0sb5ma/files/output.log
206
+ 2021-07-15 15:26:34,944 INFO Thread-8 :705712 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_151452-1w0sb5ma/files/output.log
207
+ 2021-07-15 15:26:36,945 INFO Thread-8 :705712 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_151452-1w0sb5ma/files/output.log
208
+ 2021-07-15 15:26:38,946 INFO Thread-8 :705712 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_151452-1w0sb5ma/files/output.log
209
+ 2021-07-15 15:26:40,947 INFO Thread-8 :705712 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_151452-1w0sb5ma/files/output.log
210
+ 2021-07-15 15:26:42,947 INFO Thread-8 :705712 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_151452-1w0sb5ma/files/output.log
211
+ 2021-07-15 15:26:44,948 INFO Thread-8 :705712 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_151452-1w0sb5ma/files/output.log
212
+ 2021-07-15 15:26:46,307 DEBUG HandlerThread:705712 [handler.py:handle_request():124] handle_request: stop_status
213
+ 2021-07-15 15:26:46,308 DEBUG SenderThread:705712 [sender.py:send_request():193] send_request: stop_status
214
+ 2021-07-15 15:26:46,949 INFO Thread-8 :705712 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_151452-1w0sb5ma/files/output.log
215
+ 2021-07-15 15:26:48,950 INFO Thread-8 :705712 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_151452-1w0sb5ma/files/output.log
216
+ 2021-07-15 15:26:50,950 INFO Thread-8 :705712 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_151452-1w0sb5ma/files/output.log
217
+ 2021-07-15 15:26:52,951 INFO Thread-8 :705712 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_151452-1w0sb5ma/files/output.log
218
+ 2021-07-15 15:26:54,573 DEBUG SenderThread:705712 [sender.py:send():179] send: stats
219
+ 2021-07-15 15:26:54,952 INFO Thread-8 :705712 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_151452-1w0sb5ma/files/output.log
220
+ 2021-07-15 15:26:56,952 INFO Thread-8 :705712 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_151452-1w0sb5ma/files/output.log
221
+ 2021-07-15 15:26:58,953 INFO Thread-8 :705712 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_151452-1w0sb5ma/files/output.log
222
+ 2021-07-15 15:27:00,954 INFO Thread-8 :705712 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_151452-1w0sb5ma/files/output.log
223
+ 2021-07-15 15:27:01,439 DEBUG HandlerThread:705712 [handler.py:handle_request():124] handle_request: stop_status
224
+ 2021-07-15 15:27:01,439 DEBUG SenderThread:705712 [sender.py:send_request():193] send_request: stop_status
225
+ 2021-07-15 15:27:02,955 INFO Thread-8 :705712 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_151452-1w0sb5ma/files/output.log
226
+ 2021-07-15 15:27:04,955 INFO Thread-8 :705712 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_151452-1w0sb5ma/files/output.log
227
+ 2021-07-15 15:27:06,956 INFO Thread-8 :705712 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_151452-1w0sb5ma/files/output.log
228
+ 2021-07-15 15:27:08,957 INFO Thread-8 :705712 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_151452-1w0sb5ma/files/output.log
229
+ 2021-07-15 15:27:10,958 INFO Thread-8 :705712 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_151452-1w0sb5ma/files/output.log
230
+ 2021-07-15 15:27:12,958 INFO Thread-8 :705712 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_151452-1w0sb5ma/files/output.log
231
+ 2021-07-15 15:27:14,959 INFO Thread-8 :705712 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_151452-1w0sb5ma/files/output.log
232
+ 2021-07-15 15:27:15,923 WARNING MainThread:705712 [internal.py:wandb_internal():147] Internal process interrupt: 1
233
+ 2021-07-15 15:27:16,960 INFO Thread-8 :705712 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_151452-1w0sb5ma/files/output.log
234
+ 2021-07-15 15:27:17,173 WARNING MainThread:705712 [internal.py:wandb_internal():147] Internal process interrupt: 2
235
+ 2021-07-15 15:27:17,173 ERROR MainThread:705712 [internal.py:wandb_internal():150] Internal process interrupted.
236
+ 2021-07-15 15:27:17,481 INFO SenderThread:705712 [sender.py:finish():945] shutting down sender
237
+ 2021-07-15 15:27:17,481 INFO WriterThread:705712 [datastore.py:close():288] close: /home/dat/pino-roberta-base/wandb/run-20210715_151452-1w0sb5ma/run-1w0sb5ma.wandb
238
+ 2021-07-15 15:27:17,481 INFO SenderThread:705712 [dir_watcher.py:finish():282] shutting down directory watcher
239
+ 2021-07-15 15:27:17,481 INFO HandlerThread:705712 [handler.py:finish():638] shutting down handler
240
+ 2021-07-15 15:27:17,643 INFO MainThread:705712 [internal.py:handle_exit():78] Internal process exited
wandb/run-20210715_151452-1w0sb5ma/logs/debug.log ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2021-07-15 15:14:52,750 INFO MainThread:704455 [wandb_setup.py:_flush():69] setting env: {}
2
+ 2021-07-15 15:14:52,750 INFO MainThread:704455 [wandb_setup.py:_flush():69] setting login settings: {}
3
+ 2021-07-15 15:14:52,750 INFO MainThread:704455 [wandb_init.py:_log_setup():337] Logging user logs to /home/dat/pino-roberta-base/wandb/run-20210715_151452-1w0sb5ma/logs/debug.log
4
+ 2021-07-15 15:14:52,750 INFO MainThread:704455 [wandb_init.py:_log_setup():338] Logging internal logs to /home/dat/pino-roberta-base/wandb/run-20210715_151452-1w0sb5ma/logs/debug-internal.log
5
+ 2021-07-15 15:14:52,750 INFO MainThread:704455 [wandb_init.py:init():370] calling init triggers
6
+ 2021-07-15 15:14:52,750 INFO MainThread:704455 [wandb_init.py:init():375] wandb.init called with sweep_config: {}
7
+ config: {}
8
+ 2021-07-15 15:14:52,751 INFO MainThread:704455 [wandb_init.py:init():419] starting backend
9
+ 2021-07-15 15:14:52,751 INFO MainThread:704455 [backend.py:_multiprocessing_setup():70] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
10
+ 2021-07-15 15:14:52,798 INFO MainThread:704455 [backend.py:ensure_launched():135] starting backend process...
11
+ 2021-07-15 15:14:52,844 INFO MainThread:704455 [backend.py:ensure_launched():139] started backend process with pid: 705712
12
+ 2021-07-15 15:14:52,846 INFO MainThread:704455 [wandb_init.py:init():424] backend started and connected
13
+ 2021-07-15 15:14:52,849 INFO MainThread:704455 [wandb_init.py:init():472] updated telemetry
14
+ 2021-07-15 15:14:52,850 INFO MainThread:704455 [wandb_init.py:init():491] communicating current version
15
+ 2021-07-15 15:14:53,485 INFO MainThread:704455 [wandb_init.py:init():496] got version response
16
+ 2021-07-15 15:14:53,485 INFO MainThread:704455 [wandb_init.py:init():504] communicating run to backend with 30 second timeout
17
+ 2021-07-15 15:14:53,663 INFO MainThread:704455 [wandb_init.py:init():529] starting run threads in backend
18
+ 2021-07-15 15:14:54,797 INFO MainThread:704455 [wandb_run.py:_console_start():1623] atexit reg
19
+ 2021-07-15 15:14:54,797 INFO MainThread:704455 [wandb_run.py:_redirect():1497] redirect: SettingsConsole.REDIRECT
20
+ 2021-07-15 15:14:54,798 INFO MainThread:704455 [wandb_run.py:_redirect():1502] Redirecting console.
21
+ 2021-07-15 15:14:54,800 INFO MainThread:704455 [wandb_run.py:_redirect():1558] Redirects installed.
22
+ 2021-07-15 15:14:54,800 INFO MainThread:704455 [wandb_init.py:init():554] run started, returning control to user process
23
+ 2021-07-15 15:14:54,806 INFO MainThread:704455 [wandb_run.py:_config_callback():872] config_cb None None {'output_dir': './', 'overwrite_output_dir': True, 'do_train': False, 'do_eval': False, 'do_predict': False, 'evaluation_strategy': 'IntervalStrategy.NO', 'prediction_loss_only': False, 'per_device_train_batch_size': 1, 'per_device_eval_batch_size': 1, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'learning_rate': 2.64e-05, 'weight_decay': 0.0095, 'adam_beta1': 0.9, 'adam_beta2': 0.98, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 5.0, 'max_steps': -1, 'lr_scheduler_type': 'SchedulerType.LINEAR', 'warmup_ratio': 0.0, 'warmup_steps': 10000, 'log_level': -1, 'log_level_replica': -1, 'log_on_each_node': True, 'logging_dir': './runs/Jul15_15-14-44_t1v-n-f5c06ea1-w-0', 'logging_strategy': 'IntervalStrategy.STEPS', 'logging_first_step': False, 'logging_steps': 50, 'save_strategy': 'IntervalStrategy.STEPS', 'save_steps': 10000, 'save_total_limit': 5, 'save_on_each_node': False, 'no_cuda': False, 'seed': 42, 'fp16': False, 'fp16_opt_level': 'O1', 'fp16_backend': 'auto', 'fp16_full_eval': False, 'local_rank': -1, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 4000, 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': './', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'sharded_ddp': [], 'deepspeed': None, 'label_smoothing_factor': 0.0, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'dataloader_pin_memory': True, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': './', 'push_to_hub_model_id': '', 'push_to_hub_organization': None, 'push_to_hub_token': None, 'mp_parameters': ''}
24
+ 2021-07-15 15:14:54,807 INFO MainThread:704455 [wandb_run.py:_config_callback():872] config_cb None None {'model_name_or_path': None, 'model_type': 'big_bird', 'config_name': './', 'tokenizer_name': './', 'cache_dir': None, 'use_fast_tokenizer': True, 'dtype': 'float32'}
25
+ 2021-07-15 15:14:54,809 INFO MainThread:704455 [wandb_run.py:_config_callback():872] config_cb None None {'dataset_name': None, 'dataset_config_name': None, 'train_ref_file': None, 'validation_ref_file': None, 'overwrite_cache': False, 'validation_split_percentage': 5, 'max_seq_length': 4096, 'preprocessing_num_workers': 96, 'mlm_probability': 0.15, 'pad_to_max_length': False, 'line_by_line': False, 'max_eval_samples': 4000}
26
+ 2021-07-15 15:27:16,260 INFO MainThread:704455 [wandb_run.py:_atexit_cleanup():1593] got exitcode: 255
27
+ 2021-07-15 15:27:16,261 INFO MainThread:704455 [wandb_run.py:_restore():1565] restore
wandb/run-20210715_151452-1w0sb5ma/run-1w0sb5ma.wandb ADDED
Binary file (33.6 kB). View file
 
wandb/run-20210715_152938-8qznp93p/files/config.yaml ADDED
@@ -0,0 +1,301 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ wandb_version: 1
2
+
3
+ _wandb:
4
+ desc: null
5
+ value:
6
+ cli_version: 0.10.33
7
+ framework: huggingface
8
+ huggingface_version: 4.9.0.dev0
9
+ is_jupyter_run: false
10
+ is_kaggle_kernel: false
11
+ python_version: 3.8.10
12
+ t:
13
+ 1:
14
+ - 3
15
+ - 11
16
+ 4: 3.8.10
17
+ 5: 0.10.33
18
+ 6: 4.9.0.dev0
19
+ 8:
20
+ - 5
21
+ adafactor:
22
+ desc: null
23
+ value: false
24
+ adam_beta1:
25
+ desc: null
26
+ value: 0.9
27
+ adam_beta2:
28
+ desc: null
29
+ value: 0.98
30
+ adam_epsilon:
31
+ desc: null
32
+ value: 1.0e-08
33
+ cache_dir:
34
+ desc: null
35
+ value: null
36
+ config_name:
37
+ desc: null
38
+ value: ./
39
+ dataloader_drop_last:
40
+ desc: null
41
+ value: false
42
+ dataloader_num_workers:
43
+ desc: null
44
+ value: 0
45
+ dataloader_pin_memory:
46
+ desc: null
47
+ value: true
48
+ dataset_config_name:
49
+ desc: null
50
+ value: null
51
+ dataset_name:
52
+ desc: null
53
+ value: null
54
+ ddp_find_unused_parameters:
55
+ desc: null
56
+ value: null
57
+ debug:
58
+ desc: null
59
+ value: []
60
+ deepspeed:
61
+ desc: null
62
+ value: null
63
+ disable_tqdm:
64
+ desc: null
65
+ value: false
66
+ do_eval:
67
+ desc: null
68
+ value: false
69
+ do_predict:
70
+ desc: null
71
+ value: false
72
+ do_train:
73
+ desc: null
74
+ value: false
75
+ dtype:
76
+ desc: null
77
+ value: float32
78
+ eval_accumulation_steps:
79
+ desc: null
80
+ value: null
81
+ eval_steps:
82
+ desc: null
83
+ value: 3000
84
+ evaluation_strategy:
85
+ desc: null
86
+ value: IntervalStrategy.NO
87
+ fp16:
88
+ desc: null
89
+ value: false
90
+ fp16_backend:
91
+ desc: null
92
+ value: auto
93
+ fp16_full_eval:
94
+ desc: null
95
+ value: false
96
+ fp16_opt_level:
97
+ desc: null
98
+ value: O1
99
+ gradient_accumulation_steps:
100
+ desc: null
101
+ value: 1
102
+ greater_is_better:
103
+ desc: null
104
+ value: null
105
+ group_by_length:
106
+ desc: null
107
+ value: false
108
+ ignore_data_skip:
109
+ desc: null
110
+ value: false
111
+ label_names:
112
+ desc: null
113
+ value: null
114
+ label_smoothing_factor:
115
+ desc: null
116
+ value: 0.0
117
+ learning_rate:
118
+ desc: null
119
+ value: 3.0e-05
120
+ length_column_name:
121
+ desc: null
122
+ value: length
123
+ line_by_line:
124
+ desc: null
125
+ value: false
126
+ load_best_model_at_end:
127
+ desc: null
128
+ value: false
129
+ local_rank:
130
+ desc: null
131
+ value: -1
132
+ log_level:
133
+ desc: null
134
+ value: -1
135
+ log_level_replica:
136
+ desc: null
137
+ value: -1
138
+ log_on_each_node:
139
+ desc: null
140
+ value: true
141
+ logging_dir:
142
+ desc: null
143
+ value: ./runs/Jul15_15-29-30_t1v-n-f5c06ea1-w-0
144
+ logging_first_step:
145
+ desc: null
146
+ value: false
147
+ logging_steps:
148
+ desc: null
149
+ value: 50
150
+ logging_strategy:
151
+ desc: null
152
+ value: IntervalStrategy.STEPS
153
+ lr_scheduler_type:
154
+ desc: null
155
+ value: SchedulerType.LINEAR
156
+ max_eval_samples:
157
+ desc: null
158
+ value: 4000
159
+ max_grad_norm:
160
+ desc: null
161
+ value: 1.0
162
+ max_seq_length:
163
+ desc: null
164
+ value: 4096
165
+ max_steps:
166
+ desc: null
167
+ value: -1
168
+ metric_for_best_model:
169
+ desc: null
170
+ value: null
171
+ mlm_probability:
172
+ desc: null
173
+ value: 0.15
174
+ model_name_or_path:
175
+ desc: null
176
+ value: null
177
+ model_type:
178
+ desc: null
179
+ value: big_bird
180
+ mp_parameters:
181
+ desc: null
182
+ value: ''
183
+ no_cuda:
184
+ desc: null
185
+ value: false
186
+ num_train_epochs:
187
+ desc: null
188
+ value: 2.0
189
+ output_dir:
190
+ desc: null
191
+ value: ./
192
+ overwrite_cache:
193
+ desc: null
194
+ value: false
195
+ overwrite_output_dir:
196
+ desc: null
197
+ value: true
198
+ pad_to_max_length:
199
+ desc: null
200
+ value: false
201
+ past_index:
202
+ desc: null
203
+ value: -1
204
+ per_device_eval_batch_size:
205
+ desc: null
206
+ value: 1
207
+ per_device_train_batch_size:
208
+ desc: null
209
+ value: 1
210
+ per_gpu_eval_batch_size:
211
+ desc: null
212
+ value: null
213
+ per_gpu_train_batch_size:
214
+ desc: null
215
+ value: null
216
+ prediction_loss_only:
217
+ desc: null
218
+ value: false
219
+ preprocessing_num_workers:
220
+ desc: null
221
+ value: 96
222
+ push_to_hub:
223
+ desc: null
224
+ value: true
225
+ push_to_hub_model_id:
226
+ desc: null
227
+ value: ''
228
+ push_to_hub_organization:
229
+ desc: null
230
+ value: null
231
+ push_to_hub_token:
232
+ desc: null
233
+ value: null
234
+ remove_unused_columns:
235
+ desc: null
236
+ value: true
237
+ report_to:
238
+ desc: null
239
+ value:
240
+ - tensorboard
241
+ - wandb
242
+ resume_from_checkpoint:
243
+ desc: null
244
+ value: ./
245
+ run_name:
246
+ desc: null
247
+ value: ./
248
+ save_on_each_node:
249
+ desc: null
250
+ value: false
251
+ save_steps:
252
+ desc: null
253
+ value: 10000
254
+ save_strategy:
255
+ desc: null
256
+ value: IntervalStrategy.STEPS
257
+ save_total_limit:
258
+ desc: null
259
+ value: 5
260
+ seed:
261
+ desc: null
262
+ value: 42
263
+ sharded_ddp:
264
+ desc: null
265
+ value: []
266
+ skip_memory_metrics:
267
+ desc: null
268
+ value: true
269
+ tokenizer_name:
270
+ desc: null
271
+ value: ./
272
+ tpu_metrics_debug:
273
+ desc: null
274
+ value: false
275
+ tpu_num_cores:
276
+ desc: null
277
+ value: null
278
+ train_ref_file:
279
+ desc: null
280
+ value: null
281
+ use_fast_tokenizer:
282
+ desc: null
283
+ value: true
284
+ use_legacy_prediction_loop:
285
+ desc: null
286
+ value: false
287
+ validation_ref_file:
288
+ desc: null
289
+ value: null
290
+ validation_split_percentage:
291
+ desc: null
292
+ value: 5
293
+ warmup_ratio:
294
+ desc: null
295
+ value: 0.0
296
+ warmup_steps:
297
+ desc: null
298
+ value: 10000
299
+ weight_decay:
300
+ desc: null
301
+ value: 0.0095
wandb/run-20210715_152938-8qznp93p/files/output.log ADDED
@@ -0,0 +1,1222 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [15:29:53] - INFO - absl - Restoring checkpoint from ./checkpoint_90000
2
+ tcmalloc: large alloc 1530273792 bytes == 0x9ab8a000 @ 0x7f3586844680 0x7f3586865824 0x5b9a14 0x50b2ae 0x50cb1b 0x5a6f17 0x5f3010 0x56fd36 0x568d9a 0x5f5b33 0x56aadf 0x568d9a 0x68cdc7 0x67e161 0x67e1df 0x67e281 0x67e627 0x6b6e62 0x6b71ed 0x7f35866590b3 0x5f96de
3
+ /home/dat/pino/lib/python3.8/site-packages/jax/lib/xla_bridge.py:386: UserWarning: jax.host_count has been renamed to jax.process_count. This alias will eventually be removed; please update your code.
4
+ warnings.warn(
5
+ /home/dat/pino/lib/python3.8/site-packages/jax/lib/xla_bridge.py:373: UserWarning: jax.host_id has been renamed to jax.process_index. This alias will eventually be removed; please update your code.
6
+ warnings.warn(
7
+ Epoch ... (1/2): 0%| | 0/2 [00:00<?, ?it/s]
8
+
9
+
10
+
11
+
12
+
13
+ Training...: 26%|████████████████████████████████████▊ | 90051/352766 [04:22<27:23:19, 2.66it/s]
14
+
15
+
16
+
17
+
18
+ Training...: 26%|████████████████████████████████████▊ | 90101/352766 [04:42<27:20:01, 2.67it/s]
19
+
20
+
21
+
22
+
23
+ Training...: 26%|████████████████████████████████████▊ | 90151/352766 [05:02<27:02:44, 2.70it/s]
24
+
25
+
26
+
27
+
28
+ Training...: 26%|████████████████████████████████████▊ | 90201/352766 [05:22<27:16:45, 2.67it/s]
29
+
30
+
31
+
32
+
33
+
34
+ Training...: 26%|████████████████████████████████████▌ | 90252/352766 [05:55<292:20:28, 4.01s/it]
35
+
36
+
37
+
38
+
39
+
40
+ Training...: 26%|████████████████████████████████████▌ | 90302/352766 [06:15<292:23:39, 4.01s/it]
41
+
42
+
43
+
44
+
45
+
46
+ Training...: 26%|████████████████████████████████████▋ | 90354/352766 [06:35<163:32:02, 2.24s/it]
47
+
48
+
49
+
50
+
51
+
52
+ Training...: 26%|████████████████████████████████████▋ | 90406/352766 [06:56<101:35:29, 1.39s/it]
53
+
54
+
55
+
56
+
57
+
58
+ Training...: 26%|████████████████████████████████████▋ | 90456/352766 [07:16<101:34:02, 1.39s/it]
59
+
60
+
61
+
62
+
63
+
64
+ Training...: 26%|████████████████████████████████████▉ | 90508/352766 [07:36<66:54:43, 1.09it/s]
65
+
66
+
67
+
68
+
69
+
70
+ Training...: 26%|████████████████████████████████████▋ | 90554/352766 [07:56<165:48:16, 2.28s/it]
71
+
72
+
73
+
74
+
75
+
76
+ Training...: 26%|████████████████████████████████████▋ | 90604/352766 [08:16<163:40:12, 2.25s/it]
77
+
78
+
79
+
80
+
81
+
82
+ Training...: 26%|████████████████████████████████████▋ | 90654/352766 [08:36<163:33:31, 2.25s/it]
83
+
84
+
85
+
86
+
87
+ Training...: 26%|█████████████████████████████████████ | 90701/352766 [08:42<27:12:14, 2.68it/s]
88
+
89
+
90
+
91
+
92
+
93
+ Training...: 26%|█████████████���███████████████████████ | 90751/352766 [09:02<27:14:36, 2.67it/s]
94
+
95
+
96
+
97
+
98
+
99
+ Training...: 26%|█████████████████████████████████████ | 90801/352766 [09:23<26:51:41, 2.71it/s]
100
+
101
+
102
+
103
+
104
+
105
+ Training...: 26%|█████████████████████████████████████ | 90851/352766 [09:43<27:11:58, 2.67it/s]
106
+
107
+
108
+
109
+
110
+
111
+ Training...: 26%|█████████████████████████████████████ | 90901/352766 [10:03<26:57:09, 2.70it/s]
112
+
113
+
114
+
115
+
116
+
117
+
118
+ Training...: 26%|████████████████████████████████████▊ | 90956/352766 [10:36<101:30:11, 1.40s/it]
119
+
120
+
121
+
122
+
123
+
124
+ Training...: 26%|█████████████████████████████████████▏ | 91008/352766 [10:56<66:39:23, 1.09it/s]
125
+
126
+
127
+
128
+
129
+ Training...: 26%|█████████████████████████████████████▏ | 91051/352766 [11:03<27:10:33, 2.68it/s]
130
+
131
+
132
+
133
+
134
+
135
+ Training...: 26%|█████████████████████████████████████▏ | 91101/352766 [11:23<26:59:12, 2.69it/s]
136
+
137
+
138
+
139
+
140
+
141
+ Training...: 26%|█████████████████████████████████████▏ | 91151/352766 [11:43<27:13:17, 2.67it/s]
142
+
143
+
144
+
145
+
146
+
147
+ Training...: 26%|█████████████████████████████████████▏ | 91201/352766 [12:03<27:02:48, 2.69it/s]
148
+
149
+
150
+
151
+
152
+
153
+ Training...: 26%|█████████████████████████████████████▏ | 91251/352766 [12:23<27:10:18, 2.67it/s]
154
+
155
+
156
+
157
+
158
+
159
+ Training...: 26%|█████████████████████████████████████▎ | 91301/352766 [12:43<27:09:13, 2.67it/s]
160
+
161
+
162
+
163
+
164
+
165
+ Training...: 26%|█████████████████████████████████████▎ | 91351/352766 [13:03<27:06:17, 2.68it/s]
166
+
167
+
168
+
169
+
170
+
171
+ Training...: 26%|█████████████████████████████████████▎ | 91401/352766 [13:23<27:10:11, 2.67it/s]
172
+
173
+
174
+
175
+
176
+
177
+
178
+ Training...: 26%|█████████████████████████████████████▎ | 91458/352766 [13:57<66:32:52, 1.09it/s]
179
+
180
+
181
+
182
+
183
+
184
+ Training...: 26%|█████████████████████████████████████▎ | 91508/352766 [14:17<66:37:49, 1.09it/s]
185
+
186
+
187
+
188
+
189
+
190
+ Training...: 26%|█████████████████████████████████████▎ | 91558/352766 [14:37<66:36:05, 1.09it/s]
191
+
192
+
193
+
194
+
195
+ Training...: 26%|█████████████████████████████████████▍ | 91601/352766 [14:43<27:05:25, 2.68it/s]
196
+
197
+
198
+
199
+
200
+
201
+ Training...: 26%|█████████████████████████████████████▍ | 91651/352766 [15:03<26:56:07, 2.69it/s]
202
+
203
+
204
+
205
+
206
+
207
+ Training...: 26%|█████████████████████████████████████▍ | 91701/352766 [15:23<27:07:19, 2.67it/s]
208
+
209
+
210
+
211
+
212
+
213
+ Training...: 26%|█████████████████████████████████████▍ | 91751/352766 [15:43<26:55:55, 2.69it/s]
214
+
215
+
216
+
217
+
218
+
219
+ Training...: 26%|█████████████████████████████████████▍ | 91801/352766 [16:03<27:06:29, 2.67it/s]
220
+
221
+
222
+
223
+
224
+
225
+ Training...: 26%|█████████████████████████████████████▍ | 91851/352766 [16:23<26:56:28, 2.69it/s]
226
+
227
+
228
+
229
+
230
+
231
+ Training...: 26%|█████████████████████████████████████▌ | 91901/352766 [16:43<27:04:32, 2.68it/s]
232
+
233
+
234
+
235
+
236
+
237
+ Training...: 26%|█████████████████████████████████████▌ | 91951/352766 [17:03<27:04:14, 2.68it/s]
238
+
239
+
240
+
241
+
242
+
243
+ Training...: 26%|█████████████████████████████████████▌ | 92001/352766 [17:23<27:05:54, 2.67it/s]
244
+
245
+
246
+
247
+
248
+
249
+ Training...: 26%|█████████████████████████████████████▌ | 92051/352766 [17:44<27:06:59, 2.67it/s]
250
+
251
+
252
+
253
+
254
+
255
+ Training...: 26%|█████████████████████████████████████▌ | 92101/352766 [18:04<27:03:49, 2.68it/s]
256
+
257
+
258
+
259
+
260
+
261
+ Training...: 26%|█████████████████████████████████████▌ | 92151/352766 [18:24<27:04:10, 2.67it/s]
262
+
263
+
264
+
265
+
266
+
267
+ Training...: 26%|█████████████████████████████████████▋ | 92201/352766 [18:44<26:46:11, 2.70it/s]
268
+
269
+
270
+
271
+
272
+
273
+ Training...: 26%|█████████████████████████████████████▋ | 92251/352766 [19:04<27:04:12, 2.67it/s]
274
+
275
+
276
+
277
+
278
+
279
+ Training...: 26%|█████████████████████████████████████▋ | 92301/352766 [19:24<26:48:10, 2.70it/s]
280
+
281
+
282
+
283
+
284
+
285
+ Training...: 26%|█████████████████████████████████████▋ | 92351/352766 [19:44<27:33:02, 2.63it/s]
286
+
287
+
288
+
289
+
290
+
291
+ Training...: 26%|█████████████████████████████████████▋ | 92401/352766 [20:04<26:50:57, 2.69it/s]
292
+
293
+
294
+
295
+
296
+
297
+
298
+ Training...: 26%|█████████████████████████████████████▍ | 92456/352766 [20:37<100:53:05, 1.40s/it]
299
+
300
+
301
+
302
+
303
+ Training...: 26%|███████████████████████���█████████████▊ | 92501/352766 [20:44<26:54:37, 2.69it/s]
304
+
305
+
306
+
307
+
308
+
309
+ Training...: 26%|█████████████████████████████████████▊ | 92551/352766 [21:04<27:02:28, 2.67it/s]
310
+
311
+
312
+
313
+
314
+
315
+ Training...: 26%|█████████████████████████████████████▊ | 92601/352766 [21:24<27:02:13, 2.67it/s]
316
+
317
+
318
+
319
+
320
+
321
+ Training...: 26%|█████████████████████████████████████▊ | 92651/352766 [21:44<27:02:14, 2.67it/s]
322
+
323
+
324
+
325
+
326
+
327
+ Training...: 26%|█████████████████████████████████████▊ | 92701/352766 [22:04<27:00:34, 2.67it/s]
328
+
329
+
330
+
331
+
332
+
333
+ Training...: 26%|█████████████████████████████████████▊ | 92751/352766 [22:24<26:59:28, 2.68it/s]
334
+
335
+
336
+
337
+
338
+
339
+ Training...: 26%|█████████████████████████████████████▉ | 92801/352766 [22:44<26:59:56, 2.67it/s]
340
+
341
+
342
+
343
+
344
+
345
+ Training...: 26%|█████████████████████████████████████▉ | 92851/352766 [23:04<27:01:40, 2.67it/s]
346
+
347
+
348
+
349
+
350
+
351
+ Training...: 26%|█████████████████████████████████████▉ | 92901/352766 [23:24<26:59:04, 2.68it/s]
352
+
353
+
354
+
355
+
356
+
357
+ Training...: 26%|█████████████████████████████████████▉ | 92951/352766 [23:44<26:48:08, 2.69it/s]
358
+
359
+
360
+
361
+
362
+
363
+ Training...: 26%|█████████████████████████████████████▉ | 93001/352766 [24:04<26:58:52, 2.67it/s]
364
+ Training...: 26%|█████████████████████████████████████▉ | 93001/352766 [24:17<26:58:52, 2.67it/s]
365
+
366
+
367
+
368
+
369
+
370
+
371
+
372
+
373
+
374
+
375
+
376
+
377
+
378
+
379
+
380
+
381
+
382
+
383
+
384
+
385
+
386
+
387
+
388
+
389
+
390
+
391
+
392
+
393
+
394
+
395
+
396
+
397
+
398
+
399
+ Training...: 26%|█████████████████████████████████████▉ | 93051/352766 [25:47<26:47:27, 2.69it/s]
400
+
401
+
402
+
403
+
404
+ Training...: 26%|██████████████████████████████████████ | 93101/352766 [26:07<26:59:47, 2.67it/s]
405
+
406
+
407
+
408
+
409
+ Training...: 26%|██████████████████████████████████████ | 93151/352766 [26:27<26:48:37, 2.69it/s]
410
+
411
+
412
+
413
+
414
+ Training...: 26%|██████████████████████████████████████ | 93201/352766 [26:47<26:57:33, 2.67it/s]
415
+
416
+
417
+
418
+
419
+ Training...: 26%|██████████████████████████████████████ | 93251/352766 [27:07<26:41:44, 2.70it/s]
420
+
421
+
422
+
423
+
424
+ Training...: 26%|██████████████████████████████████████ | 93301/352766 [27:27<26:58:20, 2.67it/s]
425
+
426
+
427
+
428
+
429
+ Training...: 26%|██████████████████████████████████████ | 93351/352766 [27:47<26:57:35, 2.67it/s]
430
+
431
+
432
+
433
+
434
+ Training...: 26%|██████████████████████████████████████▏ | 93401/352766 [28:07<27:01:51, 2.67it/s]
435
+
436
+
437
+
438
+
439
+ Training...: 26%|██████████████████████████████████████▏ | 93451/352766 [28:27<26:58:32, 2.67it/s]
440
+
441
+
442
+
443
+
444
+ Training...: 27%|██████████████████████████████████████▏ | 93501/352766 [28:47<26:43:59, 2.69it/s]
445
+
446
+
447
+
448
+
449
+ Training...: 27%|██████████████████████████████████████▏ | 93551/352766 [29:07<26:54:19, 2.68it/s]
450
+
451
+
452
+
453
+
454
+ Training...: 27%|██████████████████████████████████████▏ | 93601/352766 [29:27<26:44:45, 2.69it/s]
455
+
456
+
457
+
458
+
459
+ Training...: 27%|██████████████████████████████████████▏ | 93651/352766 [29:47<26:55:25, 2.67it/s]
460
+
461
+
462
+
463
+
464
+ Training...: 27%|██████████████████████████████████████▏ | 93701/352766 [30:07<26:42:56, 2.69it/s]
465
+
466
+
467
+
468
+
469
+ Training...: 27%|██████████████████████████████████████▎ | 93751/352766 [30:27<26:54:13, 2.67it/s]
470
+
471
+
472
+
473
+
474
+ Training...: 27%|██████████████████████████████████████▎ | 93801/352766 [30:47<26:52:48, 2.68it/s]
475
+
476
+
477
+
478
+
479
+ Training...: 27%|██████████████████████████████████████▎ | 93851/352766 [31:07<26:38:09, 2.70it/s]
480
+
481
+
482
+
483
+
484
+ Training...: 27%|██████████████████████████████████████▎ | 93901/352766 [31:27<26:52:37, 2.68it/s]
485
+
486
+
487
+
488
+
489
+ Training...: 27%|██████████████████████████████████████▎ | 93951/352766 [31:47<26:36:27, 2.70it/s]
490
+
491
+
492
+
493
+
494
+ Training...: 27%|██████████████████████████████████████▎ | 94001/352766 [32:07<26:52:51, 2.67it/s]
495
+
496
+
497
+
498
+
499
+ Training...: 27%|██████████████████████████████████████▍ | 94051/352766 [32:27<26:40:41, 2.69it/s]
500
+
501
+
502
+
503
+
504
+ Training...: 27%|██████████████████████████████████████▍ | 94101/352766 [32:48<26:53:29, 2.67it/s]
505
+
506
+
507
+
508
+
509
+ Training...: 27%|██████████████████████████████████████▍ | 94151/352766 [33:08<26:26:05, 2.72it/s]
510
+
511
+
512
+
513
+
514
+ Training...: 27%|██████████████████��███████████████████▍ | 94201/352766 [33:28<26:49:38, 2.68it/s]
515
+
516
+
517
+
518
+
519
+ Training...: 27%|██████████████████████████████████████▍ | 94251/352766 [33:48<26:49:57, 2.68it/s]
520
+
521
+
522
+
523
+
524
+ Training...: 27%|██████████████████████████████████████▍ | 94301/352766 [34:08<26:32:41, 2.70it/s]
525
+
526
+
527
+
528
+
529
+ Training...: 27%|██████████████████████████████████████▌ | 94351/352766 [34:28<26:49:37, 2.68it/s]
530
+
531
+
532
+
533
+
534
+ Training...: 27%|██████████████████████████████████████▌ | 94401/352766 [34:48<26:37:10, 2.70it/s]
535
+
536
+
537
+
538
+
539
+ Training...: 27%|██████████████████████████████████████▌ | 94451/352766 [35:08<26:49:46, 2.67it/s]
540
+
541
+
542
+
543
+
544
+ Training...: 27%|██████████████████████████████████████▌ | 94501/352766 [35:28<26:39:01, 2.69it/s]
545
+
546
+
547
+
548
+
549
+ Training...: 27%|██████████████████████████████████████▌ | 94551/352766 [35:48<26:47:38, 2.68it/s]
550
+
551
+
552
+
553
+
554
+ Training...: 27%|██████████████████████████████████████▌ | 94601/352766 [36:08<25:53:23, 2.77it/s]
555
+
556
+
557
+
558
+
559
+ Training...: 27%|██████████████████████████████████████▋ | 94651/352766 [36:28<26:46:26, 2.68it/s]
560
+
561
+
562
+
563
+
564
+ Training...: 27%|██████████████████████████████████████▋ | 94701/352766 [36:48<26:48:06, 2.67it/s]
565
+
566
+
567
+
568
+
569
+ Training...: 27%|██████████████████████████████████████▋ | 94751/352766 [37:08<26:33:12, 2.70it/s]
570
+
571
+
572
+
573
+
574
+ Training...: 27%|██████████████████████████████████████▋ | 94801/352766 [37:28<26:48:45, 2.67it/s]
575
+
576
+
577
+
578
+
579
+ Training...: 27%|██████████████████████████████████████▋ | 94851/352766 [37:48<26:32:22, 2.70it/s]
580
+
581
+
582
+
583
+
584
+ Training...: 27%|██████████████████████████████████████▋ | 94901/352766 [38:08<26:47:38, 2.67it/s]
585
+
586
+
587
+
588
+
589
+ Training...: 27%|██████████████████████████████████████▊ | 94951/352766 [38:28<26:36:22, 2.69it/s]
590
+
591
+
592
+
593
+
594
+ Training...: 27%|██████████████████████████████████████▊ | 95001/352766 [38:48<26:46:43, 2.67it/s]
595
+
596
+
597
+
598
+
599
+ Training...: 27%|██████████████████████████████████████▊ | 95051/352766 [39:09<30:59:06, 2.31it/s]
600
+
601
+
602
+
603
+
604
+ Training...: 27%|██████████████████████████████████████▊ | 95101/352766 [39:28<26:44:57, 2.68it/s]
605
+
606
+
607
+
608
+
609
+ Training...: 27%|██████████████████████████████████████▊ | 95151/352766 [39:48<26:45:41, 2.67it/s]
610
+
611
+
612
+
613
+
614
+ Training...: 27%|██████████████████████████████████████▊ | 95201/352766 [40:08<26:23:41, 2.71it/s]
615
+
616
+
617
+
618
+
619
+ Training...: 27%|██████████████████████████████████████▉ | 95251/352766 [40:28<26:45:05, 2.67it/s]
620
+
621
+
622
+
623
+
624
+ Training...: 27%|██████████████████████████████████████▉ | 95301/352766 [40:48<26:31:57, 2.70it/s]
625
+
626
+
627
+
628
+
629
+ Training...: 27%|██████████████████████████████████████▉ | 95351/352766 [41:09<26:44:24, 2.67it/s]
630
+
631
+
632
+
633
+
634
+ Training...: 27%|██████████████████████████████████████▉ | 95401/352766 [41:29<27:07:39, 2.64it/s]
635
+
636
+
637
+
638
+
639
+ Training...: 27%|██████████████████████████████████████▉ | 95451/352766 [41:49<26:42:40, 2.68it/s]
640
+
641
+
642
+
643
+
644
+ Training...: 27%|██████████████████████████████████████▉ | 95501/352766 [42:09<26:47:12, 2.67it/s]
645
+
646
+
647
+
648
+
649
+ Training...: 27%|███████████████████████████████████████ | 95551/352766 [42:29<26:10:25, 2.73it/s]
650
+
651
+
652
+
653
+
654
+ Training...: 27%|███████████████████████████████████████ | 95601/352766 [42:49<26:42:07, 2.68it/s]
655
+
656
+
657
+
658
+
659
+ Training...: 27%|███████████████████████████████████████ | 95651/352766 [43:09<26:27:27, 2.70it/s]
660
+
661
+
662
+
663
+
664
+ Training...: 27%|███████████████████████████████████████ | 95701/352766 [43:29<27:07:53, 2.63it/s]
665
+
666
+
667
+
668
+
669
+ Training...: 27%|███████████████████████████████████████ | 95751/352766 [43:49<26:28:17, 2.70it/s]
670
+
671
+
672
+
673
+
674
+ Training...: 27%|███████████████████████████████████████ | 95801/352766 [44:09<26:44:06, 2.67it/s]
675
+
676
+
677
+
678
+
679
+ Training...: 27%|███████████████████████████████████████▏ | 95851/352766 [44:29<26:36:05, 2.68it/s]
680
+
681
+
682
+
683
+
684
+ Training...: 27%|███████████████████████████████████████▏ | 95901/352766 [44:49<26:40:28, 2.67it/s]
685
+
686
+
687
+
688
+
689
+ Training...: 27%|███████████████████████████████████████▏ | 95951/352766 [45:09<26:37:11, 2.68it/s]
690
+
691
+
692
+
693
+
694
+ Training...: 27%|███████████████████████████████████████▏ | 96001/352766 [45:29<26:43:50, 2.67it/s]
695
+ Training...: 27%|███████████████████████████████████████▏ | 96001/352766 [45:42<26:43:50, 2.67it/s]
696
+
697
+
698
+
699
+
700
+
701
+
702
+
703
+
704
+
705
+
706
+
707
+
708
+
709
+
710
+
711
+
712
+
713
+
714
+
715
+
716
+
717
+
718
+
719
+
720
+
721
+
722
+
723
+
724
+
725
+
726
+
727
+
728
+
729
+
730
+
731
+ Training...: 27%|███████████████████████████████████████▏ | 96056/352766 [47:06<99:22:10, 1.39s/it]
732
+
733
+
734
+
735
+
736
+
737
+ Training...: 27%|███████████████████████████████████████▏ | 96110/352766 [47:27<44:48:48, 1.59it/s]
738
+
739
+
740
+
741
+
742
+
743
+ Training...: 27%|██████████████████████████████████████▉ | 96154/352766 [47:46<160:01:49, 2.25s/it]
744
+
745
+
746
+
747
+
748
+
749
+ Training...: 27%|███████████████████████████████████████▎ | 96206/352766 [48:07<99:20:51, 1.39s/it]
750
+
751
+
752
+
753
+
754
+
755
+ Training...: 27%|███████████████████████████████████████▎ | 96256/352766 [48:27<99:22:19, 1.39s/it]
756
+
757
+
758
+
759
+
760
+
761
+ Training...: 27%|███████████████████████████████████████▎ | 96310/352766 [48:47<44:53:32, 1.59it/s]
762
+
763
+
764
+
765
+
766
+
767
+ Training...: 27%|███████████████████████████████████████▎ | 96360/352766 [49:07<44:49:53, 1.59it/s]
768
+
769
+
770
+
771
+
772
+
773
+ Training...: 27%|███████████████████████████████████████ | 96404/352766 [49:27<159:58:23, 2.25s/it]
774
+
775
+
776
+
777
+
778
+
779
+ Training...: 27%|███████████████████████████████████████ | 96454/352766 [49:47<159:55:14, 2.25s/it]
780
+
781
+
782
+
783
+
784
+
785
+ Training...: 27%|███████████████████████████████████████▍ | 96506/352766 [50:07<99:13:33, 1.39s/it]
786
+
787
+
788
+
789
+
790
+
791
+ Training...: 27%|███████████████████████████████████████▍ | 96556/352766 [50:27<99:05:56, 1.39s/it]
792
+
793
+
794
+
795
+
796
+
797
+ Training...: 27%|███████████████████████████████████████▍ | 96608/352766 [50:47<65:13:20, 1.09it/s]
798
+
799
+
800
+
801
+
802
+
803
+ Training...: 27%|███████████████████████████████████████▍ | 96658/352766 [51:07<65:14:43, 1.09it/s]
804
+
805
+
806
+
807
+
808
+
809
+ Training...: 27%|███████████████████████████████████████▍ | 96710/352766 [51:27<44:40:42, 1.59it/s]
810
+
811
+
812
+
813
+
814
+
815
+ Training...: 27%|███████████████████████████████████████��� | 96760/352766 [51:47<44:38:45, 1.59it/s]
816
+
817
+
818
+
819
+
820
+
821
+ Training...: 27%|███████████████████████████████████████▌ | 96810/352766 [52:07<44:48:15, 1.59it/s]
822
+
823
+
824
+
825
+
826
+
827
+ Training...: 27%|███████████████████████████████████████▎ | 96852/352766 [52:27<285:03:12, 4.01s/it]
828
+
829
+
830
+
831
+
832
+
833
+ Training...: 27%|███████████████████████████████████████▌ | 96908/352766 [52:47<65:06:39, 1.09it/s]
834
+
835
+
836
+
837
+
838
+
839
+ Training...: 27%|███████████████████████████████████████▌ | 96958/352766 [53:07<65:12:53, 1.09it/s]
840
+
841
+
842
+
843
+
844
+
845
+ Training...: 27%|███████████████████████████████████████▌ | 97008/352766 [53:27<65:04:07, 1.09it/s]
846
+
847
+
848
+
849
+
850
+
851
+ Training...: 28%|███████████████████████████████████████▌ | 97058/352766 [53:47<65:08:47, 1.09it/s]
852
+
853
+
854
+
855
+
856
+
857
+ Training...: 28%|███████████████████████████████████████▋ | 97110/352766 [54:07<44:35:46, 1.59it/s]
858
+
859
+
860
+
861
+
862
+
863
+ Training...: 28%|███████████████████████████████████████▋ | 97160/352766 [54:28<44:44:31, 1.59it/s]
864
+
865
+
866
+
867
+
868
+
869
+ Training...: 28%|███████████████████████████████████████▋ | 97212/352766 [54:48<31:36:08, 2.25it/s]
870
+
871
+
872
+
873
+
874
+
875
+ Training...: 28%|███████████████████████████████████████▋ | 97262/352766 [55:08<31:31:45, 2.25it/s]
876
+
877
+
878
+
879
+
880
+
881
+ Training...: 28%|███████████████████████████████████████▍ | 97304/352766 [55:27<159:16:52, 2.24s/it]
882
+
883
+
884
+
885
+
886
+
887
+ Training...: 28%|███████████████████████████████████████▋ | 97358/352766 [55:48<65:02:52, 1.09it/s]
888
+
889
+
890
+
891
+
892
+
893
+ Training...: 28%|███████████████████████████████████████▊ | 97410/352766 [56:08<44:35:50, 1.59it/s]
894
+
895
+
896
+
897
+
898
+
899
+ Training...: 28%|███████████████████████████████████████▊ | 97462/352766 [56:28<31:31:40, 2.25it/s]
900
+
901
+
902
+
903
+
904
+
905
+ Training...: 28%|███████████████████████████████████████▊ | 97506/352766 [56:48<98:49:36, 1.39s/it]
906
+
907
+
908
+
909
+
910
+
911
+ Training...: 28%|███████████████████████████████████████▊ | 97556/352766 [57:08<98:48:56, 1.39s/it]
912
+
913
+
914
+
915
+
916
+
917
+ Training...: 28%|███████████████████████████████████████▊ | 97608/352766 [57:28<64:55:42, 1.09it/s]
918
+
919
+
920
+
921
+
922
+
923
+ Training...: 28%|███████████████████████████████████████▊ | 97658/352766 [57:48<64:59:47, 1.09it/s]
924
+
925
+
926
+
927
+
928
+
929
+ Training...: 28%|███████████████████████████████████████▉ | 97708/352766 [58:08<65:00:59, 1.09it/s]
930
+
931
+
932
+
933
+
934
+ Training...: 28%|███████████████████████████████████████▉ | 97751/352766 [58:14<26:29:04, 2.67it/s]
935
+
936
+
937
+
938
+
939
+
940
+ Training...: 28%|███████████████████████████████████████▉ | 97801/352766 [58:34<26:32:32, 2.67it/s]
941
+
942
+
943
+
944
+
945
+
946
+ Training...: 28%|███████████████████████████████████████▉ | 97851/352766 [58:54<26:28:23, 2.67it/s]
947
+
948
+
949
+
950
+
951
+
952
+
953
+ Training...: 28%|███████████████████████████████████████▉ | 97914/352766 [59:28<22:50:46, 3.10it/s]
954
+
955
+
956
+
957
+
958
+
959
+ Training...: 28%|███████████████████████████████████████▉ | 97956/352766 [59:48<98:38:03, 1.39s/it]
960
+
961
+
962
+
963
+
964
+
965
+ Training...: 28%|███████████████████████████████████████▍ | 98006/352766 [1:00:08<98:33:05, 1.39s/it]
966
+
967
+
968
+
969
+
970
+
971
+ Training...: 28%|███████████████████████████████████████▍ | 98058/352766 [1:00:28<64:51:05, 1.09it/s]
972
+
973
+
974
+
975
+
976
+
977
+ Training...: 28%|███████████████████████████████████████▍ | 98112/352766 [1:00:48<31:24:35, 2.25it/s]
978
+
979
+
980
+
981
+
982
+
983
+ Training...: 28%|███████████████████████████████████████▌ | 98162/352766 [1:01:08<31:21:50, 2.25it/s]
984
+
985
+
986
+
987
+
988
+
989
+ Training...: 28%|███████████████████████████████████████▌ | 98210/352766 [1:01:29<46:20:37, 1.53it/s]
990
+
991
+
992
+
993
+
994
+ Training...: 28%|███████████████████████████████████████▌ | 98251/352766 [1:01:35<26:12:55, 2.70it/s]
995
+
996
+
997
+
998
+
999
+
1000
+ Training...: 28%|███████████████████████████████████████▌ | 98301/352766 [1:01:55<26:25:51, 2.67it/s]
1001
+
1002
+
1003
+
1004
+
1005
+
1006
+ Training...: 28%|███████████████████████████████████████▌ | 98351/352766 [1:02:15<26:12:02, 2.70it/s]
1007
+
1008
+
1009
+
1010
+
1011
+
1012
+ Training...: 28%|███████████████████████████████████████▌ | 98401/352766 [1:02:35<26:25:05, 2.67it/s]
1013
+
1014
+
1015
+
1016
+
1017
+
1018
+
1019
+ Training...: 28%|███████████████████████████████████████▋ | 98460/352766 [1:03:09<44:27:50, 1.59it/s]
1020
+
1021
+
1022
+
1023
+
1024
+
1025
+ Training...: 28%|███████████████████████████████████████▋ | 98510/352766 [1:03:29<44:27:12, 1.59it/s]
1026
+
1027
+
1028
+
1029
+
1030
+
1031
+ Training...: 28%|███████████████████████████████████████▋ | 98562/352766 [1:03:49<31:22:01, 2.25it/s]
1032
+
1033
+
1034
+
1035
+
1036
+
1037
+ Training...: 28%|███████████████████████████████████████▋ | 98612/352766 [1:04:09<31:37:54, 2.23it/s]
1038
+
1039
+
1040
+
1041
+
1042
+ Training...: 28%|███████████████████████████████████████▋ | 98651/352766 [1:04:15<26:26:07, 2.67it/s]
1043
+
1044
+
1045
+
1046
+
1047
+ Training...: 28%|███████████████████████████████████████▋ | 98701/352766 [1:04:35<26:08:04, 2.70it/s]
1048
+
1049
+
1050
+
1051
+
1052
+
1053
+ Training...: 28%|███████████████████████████████████████▊ | 98751/352766 [1:04:55<26:20:41, 2.68it/s]
1054
+
1055
+
1056
+
1057
+
1058
+
1059
+ Training...: 28%|███████████████████████████████████████▊ | 98801/352766 [1:05:15<26:06:34, 2.70it/s]
1060
+
1061
+
1062
+
1063
+
1064
+
1065
+
1066
+ Training...: 28%|███████████████████████████████████████▊ | 98862/352766 [1:05:49<31:24:36, 2.25it/s]
1067
+
1068
+
1069
+
1070
+
1071
+ Training...: 28%|███████████████████████████████████████▊ | 98901/352766 [1:05:55<26:11:39, 2.69it/s]
1072
+
1073
+
1074
+
1075
+
1076
+
1077
+
1078
+ Training...: 28%|███████████████████████████████████████▊ | 98958/352766 [1:06:29<64:42:06, 1.09it/s]
1079
+
1080
+
1081
+
1082
+
1083
+ Training...: 28%|███████████████████████████████████████▊ | 99001/352766 [1:06:35<25:28:49, 2.77it/s]
1084
+ Training...: 28%|███████████████████████████████████████▊ | 99001/352766 [1:06:48<25:28:49, 2.77it/s]
1085
+
1086
+
1087
+
1088
+
1089
+
1090
+
1091
+
1092
+
1093
+
1094
+
1095
+
1096
+
1097
+
1098
+
1099
+
1100
+
1101
+
1102
+
1103
+
1104
+
1105
+
1106
+
1107
+
1108
+
1109
+
1110
+
1111
+
1112
+
1113
+
1114
+
1115
+
1116
+
1117
+
1118
+
1119
+ Training...: 28%|███████████████████████████████████████▊ | 99051/352766 [1:07:59<26:23:16, 2.67it/s]
1120
+
1121
+
1122
+
1123
+
1124
+ Training...: 28%|███████████████████████████████████████▉ | 99101/352766 [1:08:19<26:21:47, 2.67it/s]
1125
+
1126
+
1127
+
1128
+
1129
+ Training...: 28%|███████████████████████████████████████▉ | 99151/352766 [1:08:39<25:42:43, 2.74it/s]
1130
+
1131
+
1132
+
1133
+
1134
+ Training...: 28%|███████████████████████████████████████▉ | 99201/352766 [1:08:59<26:20:30, 2.67it/s]
1135
+
1136
+
1137
+
1138
+
1139
+ Training...: 28%|███████████████████████████████████████▉ | 99251/352766 [1:09:19<26:06:25, 2.70it/s]
1140
+
1141
+
1142
+
1143
+
1144
+ Training...: 28%|█████████████████���█████████████████████▉ | 99301/352766 [1:09:39<26:20:12, 2.67it/s]
1145
+
1146
+
1147
+
1148
+
1149
+ Training...: 28%|███████████████████████████████████████▉ | 99351/352766 [1:09:59<25:59:43, 2.71it/s]
1150
+
1151
+
1152
+
1153
+
1154
+ Training...: 28%|████████████████████████████████████████ | 99401/352766 [1:10:19<26:18:53, 2.67it/s]
1155
+
1156
+
1157
+
1158
+
1159
+ Training...: 28%|████████████████████████████████████████ | 99451/352766 [1:10:39<26:05:25, 2.70it/s]
1160
+
1161
+
1162
+
1163
+
1164
+ Training...: 28%|████████████████████████████████████████ | 99501/352766 [1:11:00<26:16:59, 2.68it/s]
1165
+
1166
+
1167
+
1168
+
1169
+ Training...: 28%|████████████████████████████████████████ | 99551/352766 [1:11:20<26:19:15, 2.67it/s]
1170
+
1171
+
1172
+
1173
+
1174
+ Training...: 28%|████████████████████████████████████████ | 99601/352766 [1:11:40<26:02:15, 2.70it/s]
1175
+
1176
+
1177
+
1178
+
1179
+ Training...: 28%|████████████████████████████████████████ | 99651/352766 [1:12:00<26:16:48, 2.68it/s]
1180
+
1181
+
1182
+
1183
+
1184
+ Training...: 28%|████████████████████████████████████████▏ | 99701/352766 [1:12:20<26:06:24, 2.69it/s]
1185
+
1186
+
1187
+
1188
+
1189
+ Training...: 28%|████████████████████████████████████████▏ | 99751/352766 [1:12:40<26:17:58, 2.67it/s]
1190
+
1191
+
1192
+
1193
+
1194
+ Training...: 28%|████████████████████████████████████████▏ | 99801/352766 [1:13:00<26:02:36, 2.70it/s]
1195
+
1196
+
1197
+
1198
+
1199
+ Training...: 28%|████████████████████████████████████████▏ | 99851/352766 [1:13:20<26:15:22, 2.68it/s]
1200
+
1201
+
1202
+
1203
+
1204
+ Training...: 28%|████████████████████████████████████████▏ | 99901/352766 [1:13:40<25:03:26, 2.80it/s]
1205
+
1206
+
1207
+
1208
+
1209
+ Training...: 28%|████████████████████████████████████████▏ | 99951/352766 [1:14:00<26:16:22, 2.67it/s]
1210
+
1211
+
1212
+
1213
+
1214
+ Training...: 28%|███████████████████████████████████████▉ | 100001/352766 [1:14:20<26:17:00, 2.67it/s]
1215
+
1216
+
1217
+ tcmalloc: large alloc 1354776576 bytes == 0x371a4e000 @ 0x7f3586844680 0x7f3586864bdd 0x7f356cdfe20d 0x7f356ce0c340 0x7f356ce0be87 0x7f356ce0be87 0x7f356ce0be87 0x7f356ce0be87 0x7f356ce0be87 0x7f356ce0be87 0x7f356ce0be87 0x7f356ce0be87 0x7f356ce0be87 0x7f356ce07bd3 0x7f356ce081fe 0x504d56 0x56acb6 0x568d9a 0x5f5b33 0x56bc9b 0x5f5956 0x56aadf 0x5f5956 0x56fb87 0x568d9a 0x5f5b33 0x56bc9b 0x568d9a 0x68cdc7 0x67e161 0x67e1df
1218
+ tcmalloc: large alloc 2715181056 bytes == 0x3c2652000 @ 0x7f3586844680 0x7f3586864bdd 0x7f356cdfe20d 0x7f356ce0c340 0x7f356ce0be87 0x7f356ce0be87 0x7f356ce0be87 0x7f356ce0be87 0x7f356ce0be87 0x7f356ce0be87 0x7f356ce0be87 0x7f356ce0be87 0x7f356ce0be87 0x7f356ce0be87 0x7f356ce0be87 0x7f356ce0be87 0x7f356ce07bd3 0x7f356ce081fe 0x504d56 0x56acb6 0x568d9a 0x5f5b33 0x56bc9b 0x5f5956 0x56aadf 0x5f5956 0x56fb87 0x568d9a 0x5f5b33 0x56bc9b 0x568d9a
1219
+ tcmalloc: large alloc 1530273792 bytes == 0x31850a000 @ 0x7f3586844680 0x7f3586865824 0x5f7b11 0x7f356ce07c6f 0x7f356ce081fe 0x504d56 0x56acb6 0x568d9a 0x5f5b33 0x56bc9b 0x5f5956 0x56aadf 0x5f5956 0x56fb87 0x568d9a 0x5f5b33 0x56bc9b 0x568d9a 0x68cdc7 0x67e161 0x67e1df 0x67e281 0x67e627 0x6b6e62 0x6b71ed 0x7f35866590b3 0x5f96de
1220
+ [16:50:06] - INFO - absl - Saved checkpoint at checkpoint_10000
1221
+ [16:50:07] - INFO - huggingface_hub.repository - git version 2.25.1
1222
+ git-lfs/2.9.2 (GitHub; linux amd64; go 1.13.5)
wandb/run-20210715_152938-8qznp93p/files/requirements.txt ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ absl-py==0.13.0
2
+ aiohttp==3.7.4.post0
3
+ astunparse==1.6.3
4
+ async-timeout==3.0.1
5
+ attrs==21.2.0
6
+ cachetools==4.2.2
7
+ certifi==2021.5.30
8
+ chardet==4.0.0
9
+ charset-normalizer==2.0.1
10
+ chex==0.0.8
11
+ click==8.0.1
12
+ configparser==5.0.2
13
+ cycler==0.10.0
14
+ datasets==1.9.1.dev0
15
+ dill==0.3.4
16
+ dm-tree==0.1.6
17
+ docker-pycreds==0.4.0
18
+ filelock==3.0.12
19
+ flatbuffers==1.12
20
+ flax==0.3.4
21
+ fsspec==2021.7.0
22
+ gast==0.4.0
23
+ gitdb==4.0.7
24
+ gitpython==3.1.18
25
+ google-auth-oauthlib==0.4.4
26
+ google-auth==1.32.1
27
+ google-pasta==0.2.0
28
+ grpcio==1.34.1
29
+ h5py==3.1.0
30
+ huggingface-hub==0.0.12
31
+ idna==3.2
32
+ install==1.3.4
33
+ jax==0.2.17
34
+ jaxlib==0.1.68
35
+ joblib==1.0.1
36
+ keras-nightly==2.5.0.dev2021032900
37
+ keras-preprocessing==1.1.2
38
+ kiwisolver==1.3.1
39
+ libtpu-nightly==0.1.dev20210615
40
+ markdown==3.3.4
41
+ matplotlib==3.4.2
42
+ msgpack==1.0.2
43
+ multidict==5.1.0
44
+ multiprocess==0.70.12.2
45
+ numpy==1.19.5
46
+ oauthlib==3.1.1
47
+ opt-einsum==3.3.0
48
+ optax==0.0.9
49
+ packaging==21.0
50
+ pandas==1.3.0
51
+ pathtools==0.1.2
52
+ pillow==8.3.1
53
+ pip==20.0.2
54
+ pkg-resources==0.0.0
55
+ promise==2.3
56
+ protobuf==3.17.3
57
+ psutil==5.8.0
58
+ pyarrow==4.0.1
59
+ pyasn1-modules==0.2.8
60
+ pyasn1==0.4.8
61
+ pyparsing==2.4.7
62
+ python-dateutil==2.8.1
63
+ pytz==2021.1
64
+ pyyaml==5.4.1
65
+ regex==2021.7.6
66
+ requests-oauthlib==1.3.0
67
+ requests==2.26.0
68
+ rsa==4.7.2
69
+ sacremoses==0.0.45
70
+ scipy==1.7.0
71
+ sentry-sdk==1.3.0
72
+ setuptools==44.0.0
73
+ shortuuid==1.0.1
74
+ six==1.15.0
75
+ smmap==4.0.0
76
+ subprocess32==3.5.4
77
+ tensorboard-data-server==0.6.1
78
+ tensorboard-plugin-wit==1.8.0
79
+ tensorboard==2.5.0
80
+ tensorflow-estimator==2.5.0
81
+ tensorflow==2.5.0
82
+ termcolor==1.1.0
83
+ tokenizers==0.10.3
84
+ toolz==0.11.1
85
+ tqdm==4.61.2
86
+ transformers==4.9.0.dev0
87
+ typing-extensions==3.7.4.3
88
+ urllib3==1.26.6
89
+ wandb==0.10.33
90
+ werkzeug==2.0.1
91
+ wheel==0.36.2
92
+ wrapt==1.12.1
93
+ xxhash==2.0.2
94
+ yarl==1.6.3
wandb/run-20210715_152938-8qznp93p/files/wandb-metadata.json ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-5.4.0-1043-gcp-x86_64-with-glibc2.29",
3
+ "python": "3.8.10",
4
+ "heartbeatAt": "2021-07-15T15:29:40.448025",
5
+ "startedAt": "2021-07-15T15:29:38.447383",
6
+ "docker": null,
7
+ "cpu_count": 96,
8
+ "cuda": null,
9
+ "args": [
10
+ "--push_to_hub",
11
+ "--output_dir=./",
12
+ "--model_type=big_bird",
13
+ "--config_name=./",
14
+ "--tokenizer_name=./",
15
+ "--max_seq_length=4096",
16
+ "--weight_decay=0.0095",
17
+ "--warmup_steps=10000",
18
+ "--overwrite_output_dir",
19
+ "--adam_beta1=0.9",
20
+ "--adam_beta2=0.98",
21
+ "--logging_steps=50",
22
+ "--eval_steps=3000",
23
+ "--num_train_epochs=2",
24
+ "--preprocessing_num_workers=96",
25
+ "--save_steps=10000",
26
+ "--learning_rate=3e-5",
27
+ "--per_device_train_batch_size=1",
28
+ "--per_device_eval_batch_size=1",
29
+ "--save_total_limit=5",
30
+ "--max_eval_samples=4000",
31
+ "--resume_from_checkpoint=./"
32
+ ],
33
+ "state": "running",
34
+ "program": "./run_mlm_flax_no_accum.py",
35
+ "codePath": "run_mlm_flax_no_accum.py",
36
+ "git": {
37
+ "remote": "https://huggingface.co/flax-community/pino-roberta-base",
38
+ "commit": "0609cb016dd30db635378fb0cd251ec35eed4e53"
39
+ },
40
+ "email": null,
41
+ "root": "/home/dat/pino-roberta-base",
42
+ "host": "t1v-n-f5c06ea1-w-0",
43
+ "username": "dat",
44
+ "executable": "/home/dat/pino/bin/python"
45
+ }
wandb/run-20210715_152938-8qznp93p/files/wandb-summary.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"training_step": 10000, "learning_rate": 2.6118033929378726e-05, "train_loss": 2.5166258811950684, "_runtime": 4818, "_timestamp": 1626367796, "_step": 202, "eval_step": 9000, "eval_accuracy": 0.5589502453804016, "eval_loss": 2.3799679279327393}
wandb/run-20210715_152938-8qznp93p/logs/debug-internal.log ADDED
The diff for this file is too large to render. See raw diff
 
wandb/run-20210715_152938-8qznp93p/logs/debug.log ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2021-07-15 15:29:38,448 INFO MainThread:707091 [wandb_setup.py:_flush():69] setting env: {}
2
+ 2021-07-15 15:29:38,449 INFO MainThread:707091 [wandb_setup.py:_flush():69] setting login settings: {}
3
+ 2021-07-15 15:29:38,449 INFO MainThread:707091 [wandb_init.py:_log_setup():337] Logging user logs to /home/dat/pino-roberta-base/wandb/run-20210715_152938-8qznp93p/logs/debug.log
4
+ 2021-07-15 15:29:38,449 INFO MainThread:707091 [wandb_init.py:_log_setup():338] Logging internal logs to /home/dat/pino-roberta-base/wandb/run-20210715_152938-8qznp93p/logs/debug-internal.log
5
+ 2021-07-15 15:29:38,449 INFO MainThread:707091 [wandb_init.py:init():370] calling init triggers
6
+ 2021-07-15 15:29:38,449 INFO MainThread:707091 [wandb_init.py:init():375] wandb.init called with sweep_config: {}
7
+ config: {}
8
+ 2021-07-15 15:29:38,449 INFO MainThread:707091 [wandb_init.py:init():419] starting backend
9
+ 2021-07-15 15:29:38,449 INFO MainThread:707091 [backend.py:_multiprocessing_setup():70] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
10
+ 2021-07-15 15:29:38,496 INFO MainThread:707091 [backend.py:ensure_launched():135] starting backend process...
11
+ 2021-07-15 15:29:38,542 INFO MainThread:707091 [backend.py:ensure_launched():139] started backend process with pid: 708348
12
+ 2021-07-15 15:29:38,544 INFO MainThread:707091 [wandb_init.py:init():424] backend started and connected
13
+ 2021-07-15 15:29:38,547 INFO MainThread:707091 [wandb_init.py:init():472] updated telemetry
14
+ 2021-07-15 15:29:38,548 INFO MainThread:707091 [wandb_init.py:init():491] communicating current version
15
+ 2021-07-15 15:29:39,184 INFO MainThread:707091 [wandb_init.py:init():496] got version response
16
+ 2021-07-15 15:29:39,184 INFO MainThread:707091 [wandb_init.py:init():504] communicating run to backend with 30 second timeout
17
+ 2021-07-15 15:29:39,355 INFO MainThread:707091 [wandb_init.py:init():529] starting run threads in backend
18
+ 2021-07-15 15:29:40,489 INFO MainThread:707091 [wandb_run.py:_console_start():1623] atexit reg
19
+ 2021-07-15 15:29:40,489 INFO MainThread:707091 [wandb_run.py:_redirect():1497] redirect: SettingsConsole.REDIRECT
20
+ 2021-07-15 15:29:40,490 INFO MainThread:707091 [wandb_run.py:_redirect():1502] Redirecting console.
21
+ 2021-07-15 15:29:40,492 INFO MainThread:707091 [wandb_run.py:_redirect():1558] Redirects installed.
22
+ 2021-07-15 15:29:40,492 INFO MainThread:707091 [wandb_init.py:init():554] run started, returning control to user process
23
+ 2021-07-15 15:29:40,498 INFO MainThread:707091 [wandb_run.py:_config_callback():872] config_cb None None {'output_dir': './', 'overwrite_output_dir': True, 'do_train': False, 'do_eval': False, 'do_predict': False, 'evaluation_strategy': 'IntervalStrategy.NO', 'prediction_loss_only': False, 'per_device_train_batch_size': 1, 'per_device_eval_batch_size': 1, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'learning_rate': 3e-05, 'weight_decay': 0.0095, 'adam_beta1': 0.9, 'adam_beta2': 0.98, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 2.0, 'max_steps': -1, 'lr_scheduler_type': 'SchedulerType.LINEAR', 'warmup_ratio': 0.0, 'warmup_steps': 10000, 'log_level': -1, 'log_level_replica': -1, 'log_on_each_node': True, 'logging_dir': './runs/Jul15_15-29-30_t1v-n-f5c06ea1-w-0', 'logging_strategy': 'IntervalStrategy.STEPS', 'logging_first_step': False, 'logging_steps': 50, 'save_strategy': 'IntervalStrategy.STEPS', 'save_steps': 10000, 'save_total_limit': 5, 'save_on_each_node': False, 'no_cuda': False, 'seed': 42, 'fp16': False, 'fp16_opt_level': 'O1', 'fp16_backend': 'auto', 'fp16_full_eval': False, 'local_rank': -1, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 3000, 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': './', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'sharded_ddp': [], 'deepspeed': None, 'label_smoothing_factor': 0.0, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'dataloader_pin_memory': True, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': './', 'push_to_hub_model_id': '', 'push_to_hub_organization': None, 'push_to_hub_token': None, 'mp_parameters': ''}
24
+ 2021-07-15 15:29:40,500 INFO MainThread:707091 [wandb_run.py:_config_callback():872] config_cb None None {'model_name_or_path': None, 'model_type': 'big_bird', 'config_name': './', 'tokenizer_name': './', 'cache_dir': None, 'use_fast_tokenizer': True, 'dtype': 'float32'}
25
+ 2021-07-15 15:29:40,501 INFO MainThread:707091 [wandb_run.py:_config_callback():872] config_cb None None {'dataset_name': None, 'dataset_config_name': None, 'train_ref_file': None, 'validation_ref_file': None, 'overwrite_cache': False, 'validation_split_percentage': 5, 'max_seq_length': 4096, 'preprocessing_num_workers': 96, 'mlm_probability': 0.15, 'pad_to_max_length': False, 'line_by_line': False, 'max_eval_samples': 4000}
wandb/run-20210715_152938-8qznp93p/run-8qznp93p.wandb ADDED
Binary file (415 kB). View file