dat commited on
Commit
d725b93
1 Parent(s): 5551a58

Saving weights and logs at step 5

Browse files
Files changed (31) hide show
  1. events.out.tfevents.1626298536.t1v-n-f5c06ea1-w-0.581357.3.v2 +3 -0
  2. events.out.tfevents.1626298784.t1v-n-f5c06ea1-w-0.583081.3.v2 +3 -0
  3. flax_model.msgpack +1 -1
  4. opt_state.msgpack +3 -0
  5. run.sh +1 -1
  6. run_mlm_flax.py +11 -0
  7. training_state.json +1 -0
  8. wandb/debug-internal.log +1 -1
  9. wandb/debug.log +1 -1
  10. wandb/latest-run +1 -1
  11. wandb/run-20210714_212350-6qjir2uu/files/config.yaml +3 -0
  12. wandb/run-20210714_212350-6qjir2uu/files/output.log +12 -0
  13. wandb/run-20210714_212350-6qjir2uu/logs/debug-internal.log +115 -0
  14. wandb/run-20210714_212350-6qjir2uu/logs/debug.log +96 -0
  15. wandb/run-20210714_212350-6qjir2uu/run-6qjir2uu.wandb +0 -0
  16. wandb/run-20210714_213537-20l16od8/files/config.yaml +307 -0
  17. wandb/run-20210714_213537-20l16od8/files/output.log +15 -0
  18. wandb/run-20210714_213537-20l16od8/files/requirements.txt +94 -0
  19. wandb/run-20210714_213537-20l16od8/files/wandb-metadata.json +48 -0
  20. wandb/run-20210714_213537-20l16od8/files/wandb-summary.json +1 -0
  21. wandb/run-20210714_213537-20l16od8/logs/debug-internal.log +141 -0
  22. wandb/run-20210714_213537-20l16od8/logs/debug.log +119 -0
  23. wandb/run-20210714_213537-20l16od8/run-20l16od8.wandb +0 -0
  24. wandb/run-20210714_213944-3j6d3fy2/files/config.yaml +304 -0
  25. wandb/run-20210714_213944-3j6d3fy2/files/output.log +12 -0
  26. wandb/run-20210714_213944-3j6d3fy2/files/requirements.txt +94 -0
  27. wandb/run-20210714_213944-3j6d3fy2/files/wandb-metadata.json +47 -0
  28. wandb/run-20210714_213944-3j6d3fy2/files/wandb-summary.json +1 -0
  29. wandb/run-20210714_213944-3j6d3fy2/logs/debug-internal.log +57 -0
  30. wandb/run-20210714_213944-3j6d3fy2/logs/debug.log +25 -0
  31. wandb/run-20210714_213944-3j6d3fy2/run-3j6d3fy2.wandb +0 -0
events.out.tfevents.1626298536.t1v-n-f5c06ea1-w-0.581357.3.v2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e6140a1c229a49ca5487ff7762776759d5ea43ddad0d97c1e0b7389f25cae2ad
3
+ size 40
events.out.tfevents.1626298784.t1v-n-f5c06ea1-w-0.583081.3.v2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4a72b4b117c623ae62a396471264fcbea77d5fb762251237be0b92401d939a95
3
+ size 40
flax_model.msgpack CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ba765ca93df3c067ee3d3ec56bc471ecc71072620bbbdc50509b516efdbf60c4
3
  size 510090043
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a07e8333f5822e57ebf3a1c59c4f6c6d734efaf0017732c6c8254869a4894524
3
  size 510090043
opt_state.msgpack CHANGED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:328aaa347b117dca77396a9d1fbc5f412fc5e22d9f5bf9e8b2eb8b89202f97d9
3
+ size 1530270545
run.sh CHANGED
@@ -18,7 +18,7 @@ python ./run_mlm_flax.py \
18
  --eval_steps="500" \
19
  --num_train_epochs="5" \
20
  --preprocessing_num_workers="96" \
21
- --save_steps="500" \
22
  --learning_rate="5e-5" \
23
  --per_device_train_batch_size="2" \
24
  --per_device_eval_batch_size="2" \
 
18
  --eval_steps="500" \
19
  --num_train_epochs="5" \
20
  --preprocessing_num_workers="96" \
21
+ --save_steps="5" \
22
  --learning_rate="5e-5" \
23
  --per_device_train_batch_size="2" \
24
  --per_device_eval_batch_size="2" \
run_mlm_flax.py CHANGED
@@ -339,6 +339,17 @@ def save_model_checkpoint(model, save_dir, state, with_opt:bool=True, push_to_hu
339
  json.dump({"step": state.step.item()}, f)
340
  logger.info("checkpoint saved")
341
 
 
 
 
 
 
 
 
 
 
 
 
342
 
343
 
344
  if __name__ == "__main__":
 
339
  json.dump({"step": state.step.item()}, f)
340
  logger.info("checkpoint saved")
341
 
342
+ def rotate_checkpoints(ckpt_dir:str, save_total_limit:int):
343
+ "Removes older checkpoints so that `save_total_limit` checkpoints are kept"
344
+ # TODO: what to remove is decided using step number only, we might want to improve that
345
+ ckpts = [str(x) for x in Path(ckpt_dir).glob("ckpt-*")]
346
+ # sort checkpoints by step
347
+ ckpts_sorted = sorted(ckpts, key=lambda x: int(x.split('-')[-1]))
348
+ ckpts_to_delete = ckpts_sorted[:-save_total_limit]
349
+ for ckpt in ckpts_to_delete:
350
+ logger.info(f"Deleting older checkpoint [{ckpt}] due to save_total_limit ({save_total_limit})")
351
+ shutil.rmtree(ckpt)
352
+
353
 
354
 
355
  if __name__ == "__main__":
training_state.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"step": 501}
wandb/debug-internal.log CHANGED
@@ -1 +1 @@
1
- run-20210714_212350-6qjir2uu/logs/debug-internal.log
 
1
+ run-20210714_213944-3j6d3fy2/logs/debug-internal.log
wandb/debug.log CHANGED
@@ -1 +1 @@
1
- run-20210714_212350-6qjir2uu/logs/debug.log
 
1
+ run-20210714_213944-3j6d3fy2/logs/debug.log
wandb/latest-run CHANGED
@@ -1 +1 @@
1
- run-20210714_212350-6qjir2uu
 
1
+ run-20210714_213944-3j6d3fy2
wandb/run-20210714_212350-6qjir2uu/files/config.yaml CHANGED
@@ -13,6 +13,9 @@ _wandb:
13
  1:
14
  - 3
15
  - 11
 
 
 
16
  4: 3.8.10
17
  5: 0.10.33
18
  6: 4.9.0.dev0
 
13
  1:
14
  - 3
15
  - 11
16
+ 2:
17
+ - 3
18
+ - 11
19
  4: 3.8.10
20
  5: 0.10.33
21
  6: 4.9.0.dev0
wandb/run-20210714_212350-6qjir2uu/files/output.log CHANGED
@@ -171,3 +171,15 @@ Evaluating ...: 0%|
171
 
172
  [21:32:05] - INFO - huggingface_hub.repository - git version 2.25.1██████████████████████████████████████████████████████████████████████████████████| 31/31 [00:21<00:00, 9.98it/s]
173
  git-lfs/2.9.2 (GitHub; linux amd64; go 1.13.5)
 
 
 
 
 
 
 
 
 
 
 
 
 
171
 
172
  [21:32:05] - INFO - huggingface_hub.repository - git version 2.25.1██████████████████████████████████████████████████████████████████████████████████| 31/31 [00:21<00:00, 9.98it/s]
173
  git-lfs/2.9.2 (GitHub; linux amd64; go 1.13.5)
174
+ [21:32:05] - DEBUG - huggingface_hub.repository - [Repository] is a valid git repo
175
+ [21:32:35] - INFO - huggingface_hub.repository - Uploading LFS objects: 100% (2/2), 510 MB | 31 MB/s, done.
176
+ tcmalloc: large alloc 1354776576 bytes == 0x304b28000 @ 0x7fd74488d680 0x7fd7448adbdd 0x7fd478ac320d 0x7fd478ad1340 0x7fd478ad0e87 0x7fd478ad0e87 0x7fd478ad0e87 0x7fd478ad0e87 0x7fd478ad0e87 0x7fd478ad0e87 0x7fd478ad0e87 0x7fd478ad0e87 0x7fd478ad0e87 0x7fd478ad0e87 0x7fd478accbd3 0x7fd478acd1fe 0x504d56 0x56acb6 0x568d9a 0x5f5b33 0x56bc9b 0x5f5956 0x56aadf 0x5f5956 0x56aadf 0x568d9a 0x5f5b33 0x56bc9b 0x568d9a 0x68cdc7 0x67e161
177
+ tcmalloc: large alloc 2715181056 bytes == 0x35572c000 @ 0x7fd74488d680 0x7fd7448adbdd 0x7fd478ac320d 0x7fd478ad1340 0x7fd478ad0e87 0x7fd478ad0e87 0x7fd478ad0e87 0x7fd478ad0e87 0x7fd478ad0e87 0x7fd478ad0e87 0x7fd478ad0e87 0x7fd478ad0e87 0x7fd478ad0e87 0x7fd478accbd3 0x7fd478acd1fe 0x504d56 0x56acb6 0x568d9a 0x5f5b33 0x56bc9b 0x5f5956 0x56aadf 0x5f5956 0x56aadf 0x568d9a 0x5f5b33 0x56bc9b 0x568d9a 0x68cdc7 0x67e161 0x67e1df
178
+ tcmalloc: large alloc 1530273792 bytes == 0x2ae462000 @ 0x7fd74488d680 0x7fd7448ae824 0x5f7b11 0x7fd478accc6f 0x7fd478acd1fe 0x504d56 0x56acb6 0x568d9a 0x5f5b33 0x56bc9b 0x5f5956 0x56aadf 0x5f5956 0x56aadf 0x568d9a 0x5f5b33 0x56bc9b 0x568d9a 0x68cdc7 0x67e161 0x67e1df 0x67e281 0x67e627 0x6b6e62 0x6b71ed 0x7fd7446a20b3 0x5f96de
179
+ [21:32:57] - INFO - __main__ - checkpoint saved
180
+ Training...: 40%|███████████████████████████████████████████████████▏ | 500/1250 [08:46<13:09, 1.05s/it]
181
+ Step... (500 | Loss: 10.108721733093262, Acc: 0.043713752180337906): 0%| | 0/5 [08:51<?, ?it/s]
182
+ Traceback (most recent call last):
183
+ File "./run_mlm_flax.py", line 853, in <module>
184
+ rotate_checkpoints(training_args.output_dir, training_args.save_total_limit)
185
+ NameError: name 'rotate_checkpoints' is not defined
wandb/run-20210714_212350-6qjir2uu/logs/debug-internal.log CHANGED
@@ -289,3 +289,118 @@
289
  2021-07-14 21:32:07,771 INFO Thread-8 :580021 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_212350-6qjir2uu/files/output.log
290
  2021-07-14 21:32:12,254 DEBUG HandlerThread:580021 [handler.py:handle_request():124] handle_request: stop_status
291
  2021-07-14 21:32:12,255 DEBUG SenderThread:580021 [sender.py:send_request():193] send_request: stop_status
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
289
  2021-07-14 21:32:07,771 INFO Thread-8 :580021 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_212350-6qjir2uu/files/output.log
290
  2021-07-14 21:32:12,254 DEBUG HandlerThread:580021 [handler.py:handle_request():124] handle_request: stop_status
291
  2021-07-14 21:32:12,255 DEBUG SenderThread:580021 [sender.py:send_request():193] send_request: stop_status
292
+ 2021-07-14 21:32:21,959 DEBUG SenderThread:580021 [sender.py:send():179] send: stats
293
+ 2021-07-14 21:32:27,390 DEBUG HandlerThread:580021 [handler.py:handle_request():124] handle_request: stop_status
294
+ 2021-07-14 21:32:27,391 DEBUG SenderThread:580021 [sender.py:send_request():193] send_request: stop_status
295
+ 2021-07-14 21:32:37,785 INFO Thread-8 :580021 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_212350-6qjir2uu/files/output.log
296
+ 2021-07-14 21:32:41,786 INFO Thread-8 :580021 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_212350-6qjir2uu/files/output.log
297
+ 2021-07-14 21:32:42,870 DEBUG HandlerThread:580021 [handler.py:handle_request():124] handle_request: stop_status
298
+ 2021-07-14 21:32:42,871 DEBUG SenderThread:580021 [sender.py:send_request():193] send_request: stop_status
299
+ 2021-07-14 21:32:43,788 INFO Thread-8 :580021 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_212350-6qjir2uu/files/output.log
300
+ 2021-07-14 21:32:52,027 DEBUG SenderThread:580021 [sender.py:send():179] send: stats
301
+ 2021-07-14 21:32:59,795 INFO Thread-8 :580021 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_212350-6qjir2uu/files/output.log
302
+ 2021-07-14 21:32:59,895 DEBUG SenderThread:580021 [sender.py:send():179] send: telemetry
303
+ 2021-07-14 21:32:59,896 DEBUG SenderThread:580021 [sender.py:send():179] send: exit
304
+ 2021-07-14 21:32:59,896 INFO SenderThread:580021 [sender.py:send_exit():287] handling exit code: 1
305
+ 2021-07-14 21:32:59,896 INFO SenderThread:580021 [sender.py:send_exit():295] send defer
306
+ 2021-07-14 21:32:59,896 DEBUG HandlerThread:580021 [handler.py:handle_request():124] handle_request: poll_exit
307
+ 2021-07-14 21:32:59,897 DEBUG HandlerThread:580021 [handler.py:handle_request():124] handle_request: defer
308
+ 2021-07-14 21:32:59,897 INFO HandlerThread:580021 [handler.py:handle_request_defer():141] handle defer: 0
309
+ 2021-07-14 21:32:59,897 DEBUG SenderThread:580021 [sender.py:send_request():193] send_request: poll_exit
310
+ 2021-07-14 21:32:59,897 DEBUG SenderThread:580021 [sender.py:send_request():193] send_request: defer
311
+ 2021-07-14 21:32:59,897 INFO SenderThread:580021 [sender.py:send_request_defer():304] handle sender defer: 0
312
+ 2021-07-14 21:32:59,897 INFO SenderThread:580021 [sender.py:transition_state():308] send defer: 1
313
+ 2021-07-14 21:32:59,897 DEBUG HandlerThread:580021 [handler.py:handle_request():124] handle_request: defer
314
+ 2021-07-14 21:32:59,898 INFO HandlerThread:580021 [handler.py:handle_request_defer():141] handle defer: 1
315
+ 2021-07-14 21:32:59,944 DEBUG SenderThread:580021 [sender.py:send_request():193] send_request: defer
316
+ 2021-07-14 21:32:59,944 INFO SenderThread:580021 [sender.py:send_request_defer():304] handle sender defer: 1
317
+ 2021-07-14 21:32:59,944 INFO SenderThread:580021 [sender.py:transition_state():308] send defer: 2
318
+ 2021-07-14 21:32:59,944 DEBUG SenderThread:580021 [sender.py:send():179] send: stats
319
+ 2021-07-14 21:32:59,945 DEBUG HandlerThread:580021 [handler.py:handle_request():124] handle_request: defer
320
+ 2021-07-14 21:32:59,945 INFO HandlerThread:580021 [handler.py:handle_request_defer():141] handle defer: 2
321
+ 2021-07-14 21:32:59,945 DEBUG SenderThread:580021 [sender.py:send_request():193] send_request: defer
322
+ 2021-07-14 21:32:59,945 INFO SenderThread:580021 [sender.py:send_request_defer():304] handle sender defer: 2
323
+ 2021-07-14 21:32:59,945 INFO SenderThread:580021 [sender.py:transition_state():308] send defer: 3
324
+ 2021-07-14 21:32:59,945 DEBUG HandlerThread:580021 [handler.py:handle_request():124] handle_request: defer
325
+ 2021-07-14 21:32:59,946 INFO HandlerThread:580021 [handler.py:handle_request_defer():141] handle defer: 3
326
+ 2021-07-14 21:32:59,946 DEBUG SenderThread:580021 [sender.py:send():179] send: summary
327
+ 2021-07-14 21:32:59,946 INFO SenderThread:580021 [sender.py:_save_file():841] saving file wandb-summary.json with policy end
328
+ 2021-07-14 21:32:59,946 DEBUG SenderThread:580021 [sender.py:send_request():193] send_request: defer
329
+ 2021-07-14 21:32:59,946 INFO SenderThread:580021 [sender.py:send_request_defer():304] handle sender defer: 3
330
+ 2021-07-14 21:32:59,946 INFO SenderThread:580021 [sender.py:transition_state():308] send defer: 4
331
+ 2021-07-14 21:32:59,947 DEBUG HandlerThread:580021 [handler.py:handle_request():124] handle_request: defer
332
+ 2021-07-14 21:32:59,947 INFO HandlerThread:580021 [handler.py:handle_request_defer():141] handle defer: 4
333
+ 2021-07-14 21:32:59,947 DEBUG SenderThread:580021 [sender.py:send_request():193] send_request: defer
334
+ 2021-07-14 21:32:59,947 INFO SenderThread:580021 [sender.py:send_request_defer():304] handle sender defer: 4
335
+ 2021-07-14 21:32:59,999 DEBUG HandlerThread:580021 [handler.py:handle_request():124] handle_request: poll_exit
336
+ 2021-07-14 21:33:00,122 INFO SenderThread:580021 [sender.py:transition_state():308] send defer: 5
337
+ 2021-07-14 21:33:00,122 DEBUG SenderThread:580021 [sender.py:send_request():193] send_request: poll_exit
338
+ 2021-07-14 21:33:00,122 DEBUG HandlerThread:580021 [handler.py:handle_request():124] handle_request: defer
339
+ 2021-07-14 21:33:00,122 INFO HandlerThread:580021 [handler.py:handle_request_defer():141] handle defer: 5
340
+ 2021-07-14 21:33:00,123 DEBUG SenderThread:580021 [sender.py:send_request():193] send_request: defer
341
+ 2021-07-14 21:33:00,123 INFO SenderThread:580021 [sender.py:send_request_defer():304] handle sender defer: 5
342
+ 2021-07-14 21:33:00,123 INFO SenderThread:580021 [dir_watcher.py:finish():282] shutting down directory watcher
343
+ 2021-07-14 21:33:00,224 DEBUG HandlerThread:580021 [handler.py:handle_request():124] handle_request: poll_exit
344
+ 2021-07-14 21:33:00,795 INFO Thread-8 :580021 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_212350-6qjir2uu/files/config.yaml
345
+ 2021-07-14 21:33:00,796 INFO SenderThread:580021 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_212350-6qjir2uu/files/output.log
346
+ 2021-07-14 21:33:00,796 INFO SenderThread:580021 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_212350-6qjir2uu/files/wandb-summary.json
347
+ 2021-07-14 21:33:00,796 INFO SenderThread:580021 [dir_watcher.py:finish():312] scan: /home/dat/pino-roberta-base/wandb/run-20210714_212350-6qjir2uu/files
348
+ 2021-07-14 21:33:00,796 INFO SenderThread:580021 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210714_212350-6qjir2uu/files/requirements.txt requirements.txt
349
+ 2021-07-14 21:33:00,796 INFO SenderThread:580021 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210714_212350-6qjir2uu/files/output.log output.log
350
+ 2021-07-14 21:33:00,797 INFO SenderThread:580021 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210714_212350-6qjir2uu/files/wandb-metadata.json wandb-metadata.json
351
+ 2021-07-14 21:33:00,797 INFO SenderThread:580021 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210714_212350-6qjir2uu/files/config.yaml config.yaml
352
+ 2021-07-14 21:33:00,797 INFO SenderThread:580021 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210714_212350-6qjir2uu/files/wandb-summary.json wandb-summary.json
353
+ 2021-07-14 21:33:00,797 INFO SenderThread:580021 [sender.py:transition_state():308] send defer: 6
354
+ 2021-07-14 21:33:00,797 DEBUG SenderThread:580021 [sender.py:send_request():193] send_request: poll_exit
355
+ 2021-07-14 21:33:00,801 DEBUG HandlerThread:580021 [handler.py:handle_request():124] handle_request: defer
356
+ 2021-07-14 21:33:00,801 INFO HandlerThread:580021 [handler.py:handle_request_defer():141] handle defer: 6
357
+ 2021-07-14 21:33:00,804 DEBUG SenderThread:580021 [sender.py:send_request():193] send_request: defer
358
+ 2021-07-14 21:33:00,805 INFO SenderThread:580021 [sender.py:send_request_defer():304] handle sender defer: 6
359
+ 2021-07-14 21:33:00,805 INFO SenderThread:580021 [file_pusher.py:finish():177] shutting down file pusher
360
+ 2021-07-14 21:33:00,903 DEBUG HandlerThread:580021 [handler.py:handle_request():124] handle_request: poll_exit
361
+ 2021-07-14 21:33:00,903 DEBUG SenderThread:580021 [sender.py:send_request():193] send_request: poll_exit
362
+ 2021-07-14 21:33:01,005 DEBUG HandlerThread:580021 [handler.py:handle_request():124] handle_request: poll_exit
363
+ 2021-07-14 21:33:01,006 DEBUG SenderThread:580021 [sender.py:send_request():193] send_request: poll_exit
364
+ 2021-07-14 21:33:01,108 DEBUG HandlerThread:580021 [handler.py:handle_request():124] handle_request: poll_exit
365
+ 2021-07-14 21:33:01,108 DEBUG SenderThread:580021 [sender.py:send_request():193] send_request: poll_exit
366
+ 2021-07-14 21:33:01,210 DEBUG HandlerThread:580021 [handler.py:handle_request():124] handle_request: poll_exit
367
+ 2021-07-14 21:33:01,210 DEBUG SenderThread:580021 [sender.py:send_request():193] send_request: poll_exit
368
+ 2021-07-14 21:33:01,236 INFO Thread-14 :580021 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210714_212350-6qjir2uu/files/config.yaml
369
+ 2021-07-14 21:33:01,247 INFO Thread-12 :580021 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210714_212350-6qjir2uu/files/requirements.txt
370
+ 2021-07-14 21:33:01,250 INFO Thread-15 :580021 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210714_212350-6qjir2uu/files/wandb-summary.json
371
+ 2021-07-14 21:33:01,286 INFO Thread-13 :580021 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210714_212350-6qjir2uu/files/output.log
372
+ 2021-07-14 21:33:01,312 DEBUG HandlerThread:580021 [handler.py:handle_request():124] handle_request: poll_exit
373
+ 2021-07-14 21:33:01,312 DEBUG SenderThread:580021 [sender.py:send_request():193] send_request: poll_exit
374
+ 2021-07-14 21:33:01,414 DEBUG HandlerThread:580021 [handler.py:handle_request():124] handle_request: poll_exit
375
+ 2021-07-14 21:33:01,414 DEBUG SenderThread:580021 [sender.py:send_request():193] send_request: poll_exit
376
+ 2021-07-14 21:33:01,486 INFO Thread-7 :580021 [sender.py:transition_state():308] send defer: 7
377
+ 2021-07-14 21:33:01,487 DEBUG HandlerThread:580021 [handler.py:handle_request():124] handle_request: defer
378
+ 2021-07-14 21:33:01,487 INFO HandlerThread:580021 [handler.py:handle_request_defer():141] handle defer: 7
379
+ 2021-07-14 21:33:01,487 DEBUG SenderThread:580021 [sender.py:send_request():193] send_request: defer
380
+ 2021-07-14 21:33:01,487 INFO SenderThread:580021 [sender.py:send_request_defer():304] handle sender defer: 7
381
+ 2021-07-14 21:33:01,516 DEBUG HandlerThread:580021 [handler.py:handle_request():124] handle_request: poll_exit
382
+ 2021-07-14 21:33:01,759 INFO SenderThread:580021 [sender.py:transition_state():308] send defer: 8
383
+ 2021-07-14 21:33:01,759 DEBUG SenderThread:580021 [sender.py:send_request():193] send_request: poll_exit
384
+ 2021-07-14 21:33:01,759 DEBUG HandlerThread:580021 [handler.py:handle_request():124] handle_request: defer
385
+ 2021-07-14 21:33:01,760 INFO HandlerThread:580021 [handler.py:handle_request_defer():141] handle defer: 8
386
+ 2021-07-14 21:33:01,760 DEBUG SenderThread:580021 [sender.py:send_request():193] send_request: defer
387
+ 2021-07-14 21:33:01,760 INFO SenderThread:580021 [sender.py:send_request_defer():304] handle sender defer: 8
388
+ 2021-07-14 21:33:01,760 INFO SenderThread:580021 [sender.py:transition_state():308] send defer: 9
389
+ 2021-07-14 21:33:01,760 DEBUG HandlerThread:580021 [handler.py:handle_request():124] handle_request: defer
390
+ 2021-07-14 21:33:01,760 DEBUG SenderThread:580021 [sender.py:send():179] send: final
391
+ 2021-07-14 21:33:01,761 INFO HandlerThread:580021 [handler.py:handle_request_defer():141] handle defer: 9
392
+ 2021-07-14 21:33:01,761 DEBUG SenderThread:580021 [sender.py:send():179] send: footer
393
+ 2021-07-14 21:33:01,761 DEBUG SenderThread:580021 [sender.py:send_request():193] send_request: defer
394
+ 2021-07-14 21:33:01,761 INFO SenderThread:580021 [sender.py:send_request_defer():304] handle sender defer: 9
395
+ 2021-07-14 21:33:01,861 DEBUG HandlerThread:580021 [handler.py:handle_request():124] handle_request: poll_exit
396
+ 2021-07-14 21:33:01,861 DEBUG SenderThread:580021 [sender.py:send_request():193] send_request: poll_exit
397
+ 2021-07-14 21:33:01,861 INFO SenderThread:580021 [file_pusher.py:join():182] waiting for file pusher
398
+ 2021-07-14 21:33:01,863 DEBUG HandlerThread:580021 [handler.py:handle_request():124] handle_request: get_summary
399
+ 2021-07-14 21:33:01,864 DEBUG HandlerThread:580021 [handler.py:handle_request():124] handle_request: sampled_history
400
+ 2021-07-14 21:33:01,865 DEBUG HandlerThread:580021 [handler.py:handle_request():124] handle_request: shutdown
401
+ 2021-07-14 21:33:01,866 INFO HandlerThread:580021 [handler.py:finish():638] shutting down handler
402
+ 2021-07-14 21:33:02,761 INFO WriterThread:580021 [datastore.py:close():288] close: /home/dat/pino-roberta-base/wandb/run-20210714_212350-6qjir2uu/run-6qjir2uu.wandb
403
+ 2021-07-14 21:33:02,862 INFO SenderThread:580021 [sender.py:finish():945] shutting down sender
404
+ 2021-07-14 21:33:02,862 INFO SenderThread:580021 [file_pusher.py:finish():177] shutting down file pusher
405
+ 2021-07-14 21:33:02,862 INFO SenderThread:580021 [file_pusher.py:join():182] waiting for file pusher
406
+ 2021-07-14 21:33:02,864 INFO MainThread:580021 [internal.py:handle_exit():78] Internal process exited
wandb/run-20210714_212350-6qjir2uu/logs/debug.log CHANGED
@@ -23,3 +23,99 @@ config: {}
23
  2021-07-14 21:23:52,724 INFO MainThread:578764 [wandb_run.py:_config_callback():872] config_cb None None {'output_dir': './', 'overwrite_output_dir': True, 'do_train': False, 'do_eval': False, 'do_predict': False, 'evaluation_strategy': 'IntervalStrategy.NO', 'prediction_loss_only': False, 'per_device_train_batch_size': 2, 'per_device_eval_batch_size': 2, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 4, 'eval_accumulation_steps': None, 'learning_rate': 5e-05, 'weight_decay': 0.0095, 'adam_beta1': 0.9, 'adam_beta2': 0.98, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 5.0, 'max_steps': -1, 'lr_scheduler_type': 'SchedulerType.LINEAR', 'warmup_ratio': 0.0, 'warmup_steps': 5000, 'log_level': -1, 'log_level_replica': -1, 'log_on_each_node': True, 'logging_dir': './runs/Jul14_21-23-42_t1v-n-f5c06ea1-w-0', 'logging_strategy': 'IntervalStrategy.STEPS', 'logging_first_step': False, 'logging_steps': 250, 'save_strategy': 'IntervalStrategy.STEPS', 'save_steps': 500, 'save_total_limit': 5, 'save_on_each_node': False, 'no_cuda': False, 'seed': 42, 'fp16': False, 'fp16_opt_level': 'O1', 'fp16_backend': 'auto', 'fp16_full_eval': False, 'local_rank': -1, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 500, 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': './', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'sharded_ddp': [], 'deepspeed': None, 'label_smoothing_factor': 0.0, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'dataloader_pin_memory': True, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'push_to_hub_model_id': '', 'push_to_hub_organization': None, 'push_to_hub_token': None, 'mp_parameters': ''}
24
  2021-07-14 21:23:52,726 INFO MainThread:578764 [wandb_run.py:_config_callback():872] config_cb None None {'model_name_or_path': None, 'model_type': 'big_bird', 'config_name': './', 'tokenizer_name': './', 'cache_dir': None, 'use_fast_tokenizer': True, 'dtype': 'float32', 'save_optimizer': True}
25
  2021-07-14 21:23:52,727 INFO MainThread:578764 [wandb_run.py:_config_callback():872] config_cb None None {'dataset_name': None, 'dataset_config_name': None, 'train_ref_file': None, 'validation_ref_file': None, 'overwrite_cache': False, 'validation_split_percentage': 5, 'max_seq_length': 4096, 'preprocessing_num_workers': 96, 'mlm_probability': 0.15, 'pad_to_max_length': False, 'line_by_line': False, 'max_eval_samples': 500}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  2021-07-14 21:23:52,724 INFO MainThread:578764 [wandb_run.py:_config_callback():872] config_cb None None {'output_dir': './', 'overwrite_output_dir': True, 'do_train': False, 'do_eval': False, 'do_predict': False, 'evaluation_strategy': 'IntervalStrategy.NO', 'prediction_loss_only': False, 'per_device_train_batch_size': 2, 'per_device_eval_batch_size': 2, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 4, 'eval_accumulation_steps': None, 'learning_rate': 5e-05, 'weight_decay': 0.0095, 'adam_beta1': 0.9, 'adam_beta2': 0.98, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 5.0, 'max_steps': -1, 'lr_scheduler_type': 'SchedulerType.LINEAR', 'warmup_ratio': 0.0, 'warmup_steps': 5000, 'log_level': -1, 'log_level_replica': -1, 'log_on_each_node': True, 'logging_dir': './runs/Jul14_21-23-42_t1v-n-f5c06ea1-w-0', 'logging_strategy': 'IntervalStrategy.STEPS', 'logging_first_step': False, 'logging_steps': 250, 'save_strategy': 'IntervalStrategy.STEPS', 'save_steps': 500, 'save_total_limit': 5, 'save_on_each_node': False, 'no_cuda': False, 'seed': 42, 'fp16': False, 'fp16_opt_level': 'O1', 'fp16_backend': 'auto', 'fp16_full_eval': False, 'local_rank': -1, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 500, 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': './', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'sharded_ddp': [], 'deepspeed': None, 'label_smoothing_factor': 0.0, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'dataloader_pin_memory': True, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'push_to_hub_model_id': '', 'push_to_hub_organization': None, 'push_to_hub_token': None, 'mp_parameters': ''}
24
  2021-07-14 21:23:52,726 INFO MainThread:578764 [wandb_run.py:_config_callback():872] config_cb None None {'model_name_or_path': None, 'model_type': 'big_bird', 'config_name': './', 'tokenizer_name': './', 'cache_dir': None, 'use_fast_tokenizer': True, 'dtype': 'float32', 'save_optimizer': True}
25
  2021-07-14 21:23:52,727 INFO MainThread:578764 [wandb_run.py:_config_callback():872] config_cb None None {'dataset_name': None, 'dataset_config_name': None, 'train_ref_file': None, 'validation_ref_file': None, 'overwrite_cache': False, 'validation_split_percentage': 5, 'max_seq_length': 4096, 'preprocessing_num_workers': 96, 'mlm_probability': 0.15, 'pad_to_max_length': False, 'line_by_line': False, 'max_eval_samples': 500}
26
+ 2021-07-14 21:32:57,866 INFO MainThread:578764 [wandb_run.py:_atexit_cleanup():1593] got exitcode: 1
27
+ 2021-07-14 21:32:57,867 INFO MainThread:578764 [wandb_run.py:_restore():1565] restore
28
+ 2021-07-14 21:32:59,898 INFO MainThread:578764 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
29
+ wandb_count: 1
30
+ }
31
+ pusher_stats {
32
+ uploaded_bytes: 1442
33
+ total_bytes: 1442
34
+ }
35
+
36
+ 2021-07-14 21:33:00,123 INFO MainThread:578764 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
37
+ wandb_count: 1
38
+ }
39
+ pusher_stats {
40
+ uploaded_bytes: 1442
41
+ total_bytes: 1442
42
+ }
43
+
44
+ 2021-07-14 21:33:00,802 INFO MainThread:578764 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
45
+ wandb_count: 4
46
+ }
47
+ pusher_stats {
48
+ uploaded_bytes: 1442
49
+ total_bytes: 12361
50
+ }
51
+
52
+ 2021-07-14 21:33:00,904 INFO MainThread:578764 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
53
+ wandb_count: 5
54
+ }
55
+ pusher_stats {
56
+ uploaded_bytes: 1442
57
+ total_bytes: 12603
58
+ }
59
+
60
+ 2021-07-14 21:33:01,006 INFO MainThread:578764 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
61
+ wandb_count: 5
62
+ }
63
+ pusher_stats {
64
+ uploaded_bytes: 12603
65
+ total_bytes: 12603
66
+ }
67
+
68
+ 2021-07-14 21:33:01,109 INFO MainThread:578764 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
69
+ wandb_count: 5
70
+ }
71
+ pusher_stats {
72
+ uploaded_bytes: 12603
73
+ total_bytes: 12603
74
+ }
75
+
76
+ 2021-07-14 21:33:01,210 INFO MainThread:578764 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
77
+ wandb_count: 5
78
+ }
79
+ pusher_stats {
80
+ uploaded_bytes: 12603
81
+ total_bytes: 12603
82
+ }
83
+
84
+ 2021-07-14 21:33:01,312 INFO MainThread:578764 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
85
+ wandb_count: 5
86
+ }
87
+ pusher_stats {
88
+ uploaded_bytes: 12603
89
+ total_bytes: 12603
90
+ }
91
+
92
+ 2021-07-14 21:33:01,414 INFO MainThread:578764 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
93
+ wandb_count: 5
94
+ }
95
+ pusher_stats {
96
+ uploaded_bytes: 12603
97
+ total_bytes: 12603
98
+ }
99
+
100
+ 2021-07-14 21:33:01,760 INFO MainThread:578764 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
101
+ wandb_count: 5
102
+ }
103
+ pusher_stats {
104
+ uploaded_bytes: 12603
105
+ total_bytes: 12603
106
+ }
107
+
108
+ 2021-07-14 21:33:01,862 INFO MainThread:578764 [wandb_run.py:_wait_for_finish():1715] got exit ret: done: true
109
+ exit_result {
110
+ }
111
+ file_counts {
112
+ wandb_count: 5
113
+ }
114
+ pusher_stats {
115
+ uploaded_bytes: 12603
116
+ total_bytes: 12603
117
+ }
118
+
119
+ 2021-07-14 21:33:03,150 INFO MainThread:578764 [wandb_run.py:_show_summary():1870] rendering summary
120
+ 2021-07-14 21:33:03,150 INFO MainThread:578764 [wandb_run.py:_show_history():1908] rendering history
121
+ 2021-07-14 21:33:03,150 INFO MainThread:578764 [wandb_run.py:_show_files():1937] logging synced files
wandb/run-20210714_212350-6qjir2uu/run-6qjir2uu.wandb CHANGED
Binary files a/wandb/run-20210714_212350-6qjir2uu/run-6qjir2uu.wandb and b/wandb/run-20210714_212350-6qjir2uu/run-6qjir2uu.wandb differ
 
wandb/run-20210714_213537-20l16od8/files/config.yaml ADDED
@@ -0,0 +1,307 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ wandb_version: 1
2
+
3
+ _wandb:
4
+ desc: null
5
+ value:
6
+ cli_version: 0.10.33
7
+ framework: huggingface
8
+ huggingface_version: 4.9.0.dev0
9
+ is_jupyter_run: false
10
+ is_kaggle_kernel: false
11
+ python_version: 3.8.10
12
+ t:
13
+ 1:
14
+ - 3
15
+ - 11
16
+ 2:
17
+ - 3
18
+ - 11
19
+ 4: 3.8.10
20
+ 5: 0.10.33
21
+ 6: 4.9.0.dev0
22
+ 8:
23
+ - 5
24
+ adafactor:
25
+ desc: null
26
+ value: false
27
+ adam_beta1:
28
+ desc: null
29
+ value: 0.9
30
+ adam_beta2:
31
+ desc: null
32
+ value: 0.98
33
+ adam_epsilon:
34
+ desc: null
35
+ value: 1.0e-08
36
+ cache_dir:
37
+ desc: null
38
+ value: null
39
+ config_name:
40
+ desc: null
41
+ value: ./
42
+ dataloader_drop_last:
43
+ desc: null
44
+ value: false
45
+ dataloader_num_workers:
46
+ desc: null
47
+ value: 0
48
+ dataloader_pin_memory:
49
+ desc: null
50
+ value: true
51
+ dataset_config_name:
52
+ desc: null
53
+ value: null
54
+ dataset_name:
55
+ desc: null
56
+ value: null
57
+ ddp_find_unused_parameters:
58
+ desc: null
59
+ value: null
60
+ debug:
61
+ desc: null
62
+ value: []
63
+ deepspeed:
64
+ desc: null
65
+ value: null
66
+ disable_tqdm:
67
+ desc: null
68
+ value: false
69
+ do_eval:
70
+ desc: null
71
+ value: false
72
+ do_predict:
73
+ desc: null
74
+ value: false
75
+ do_train:
76
+ desc: null
77
+ value: false
78
+ dtype:
79
+ desc: null
80
+ value: float32
81
+ eval_accumulation_steps:
82
+ desc: null
83
+ value: null
84
+ eval_steps:
85
+ desc: null
86
+ value: 500
87
+ evaluation_strategy:
88
+ desc: null
89
+ value: IntervalStrategy.NO
90
+ fp16:
91
+ desc: null
92
+ value: false
93
+ fp16_backend:
94
+ desc: null
95
+ value: auto
96
+ fp16_full_eval:
97
+ desc: null
98
+ value: false
99
+ fp16_opt_level:
100
+ desc: null
101
+ value: O1
102
+ gradient_accumulation_steps:
103
+ desc: null
104
+ value: 4
105
+ greater_is_better:
106
+ desc: null
107
+ value: null
108
+ group_by_length:
109
+ desc: null
110
+ value: false
111
+ ignore_data_skip:
112
+ desc: null
113
+ value: false
114
+ label_names:
115
+ desc: null
116
+ value: null
117
+ label_smoothing_factor:
118
+ desc: null
119
+ value: 0.0
120
+ learning_rate:
121
+ desc: null
122
+ value: 5.0e-05
123
+ length_column_name:
124
+ desc: null
125
+ value: length
126
+ line_by_line:
127
+ desc: null
128
+ value: false
129
+ load_best_model_at_end:
130
+ desc: null
131
+ value: false
132
+ local_rank:
133
+ desc: null
134
+ value: -1
135
+ log_level:
136
+ desc: null
137
+ value: -1
138
+ log_level_replica:
139
+ desc: null
140
+ value: -1
141
+ log_on_each_node:
142
+ desc: null
143
+ value: true
144
+ logging_dir:
145
+ desc: null
146
+ value: ./runs/Jul14_21-35-30_t1v-n-f5c06ea1-w-0
147
+ logging_first_step:
148
+ desc: null
149
+ value: false
150
+ logging_steps:
151
+ desc: null
152
+ value: 250
153
+ logging_strategy:
154
+ desc: null
155
+ value: IntervalStrategy.STEPS
156
+ lr_scheduler_type:
157
+ desc: null
158
+ value: SchedulerType.LINEAR
159
+ max_eval_samples:
160
+ desc: null
161
+ value: 500
162
+ max_grad_norm:
163
+ desc: null
164
+ value: 1.0
165
+ max_seq_length:
166
+ desc: null
167
+ value: 4096
168
+ max_steps:
169
+ desc: null
170
+ value: -1
171
+ metric_for_best_model:
172
+ desc: null
173
+ value: null
174
+ mlm_probability:
175
+ desc: null
176
+ value: 0.15
177
+ model_name_or_path:
178
+ desc: null
179
+ value: null
180
+ model_type:
181
+ desc: null
182
+ value: big_bird
183
+ mp_parameters:
184
+ desc: null
185
+ value: ''
186
+ no_cuda:
187
+ desc: null
188
+ value: false
189
+ num_train_epochs:
190
+ desc: null
191
+ value: 5.0
192
+ output_dir:
193
+ desc: null
194
+ value: ./
195
+ overwrite_cache:
196
+ desc: null
197
+ value: false
198
+ overwrite_output_dir:
199
+ desc: null
200
+ value: true
201
+ pad_to_max_length:
202
+ desc: null
203
+ value: false
204
+ past_index:
205
+ desc: null
206
+ value: -1
207
+ per_device_eval_batch_size:
208
+ desc: null
209
+ value: 2
210
+ per_device_train_batch_size:
211
+ desc: null
212
+ value: 2
213
+ per_gpu_eval_batch_size:
214
+ desc: null
215
+ value: null
216
+ per_gpu_train_batch_size:
217
+ desc: null
218
+ value: null
219
+ prediction_loss_only:
220
+ desc: null
221
+ value: false
222
+ preprocessing_num_workers:
223
+ desc: null
224
+ value: 96
225
+ push_to_hub:
226
+ desc: null
227
+ value: true
228
+ push_to_hub_model_id:
229
+ desc: null
230
+ value: ''
231
+ push_to_hub_organization:
232
+ desc: null
233
+ value: null
234
+ push_to_hub_token:
235
+ desc: null
236
+ value: null
237
+ remove_unused_columns:
238
+ desc: null
239
+ value: true
240
+ report_to:
241
+ desc: null
242
+ value:
243
+ - tensorboard
244
+ - wandb
245
+ resume_from_checkpoint:
246
+ desc: null
247
+ value: ./
248
+ run_name:
249
+ desc: null
250
+ value: ./
251
+ save_on_each_node:
252
+ desc: null
253
+ value: false
254
+ save_optimizer:
255
+ desc: null
256
+ value: true
257
+ save_steps:
258
+ desc: null
259
+ value: 500
260
+ save_strategy:
261
+ desc: null
262
+ value: IntervalStrategy.STEPS
263
+ save_total_limit:
264
+ desc: null
265
+ value: 5
266
+ seed:
267
+ desc: null
268
+ value: 42
269
+ sharded_ddp:
270
+ desc: null
271
+ value: []
272
+ skip_memory_metrics:
273
+ desc: null
274
+ value: true
275
+ tokenizer_name:
276
+ desc: null
277
+ value: ./
278
+ tpu_metrics_debug:
279
+ desc: null
280
+ value: false
281
+ tpu_num_cores:
282
+ desc: null
283
+ value: null
284
+ train_ref_file:
285
+ desc: null
286
+ value: null
287
+ use_fast_tokenizer:
288
+ desc: null
289
+ value: true
290
+ use_legacy_prediction_loop:
291
+ desc: null
292
+ value: false
293
+ validation_ref_file:
294
+ desc: null
295
+ value: null
296
+ validation_split_percentage:
297
+ desc: null
298
+ value: 5
299
+ warmup_ratio:
300
+ desc: null
301
+ value: 0.0
302
+ warmup_steps:
303
+ desc: null
304
+ value: 5000
305
+ weight_decay:
306
+ desc: null
307
+ value: 0.0095
wandb/run-20210714_213537-20l16od8/files/output.log ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [21:35:52] - INFO - absl - A polynomial schedule was set with a non-positive `transition_steps` value; this results in a constant schedule with value `init_value`.
2
+ /home/dat/pino/lib/python3.8/site-packages/jax/_src/numpy/lax_numpy.py:3132: UserWarning: Explicitly requested dtype <class 'jax._src.numpy.lax_numpy.int64'> requested in zeros is not available, and will be truncated to dtype int32. To enable more dtypes, set the jax_enable_x64 configuration option or the JAX_ENABLE_X64 shell environment variable. See https://github.com/google/jax#current-gotchas for more.
3
+ lax._check_user_dtype_supported(dtype, "zeros")
4
+ [21:35:52] - INFO - __main__ - RESTORING CHECKPOINT FROM ./...
5
+ tcmalloc: large alloc 1530273792 bytes == 0xd8c8e000 @ 0x7f41b5469680 0x7f41b548a824 0x5f7b11 0x648631 0x5c38e6 0x4f30e6 0x64ee88 0x505653 0x56acb6 0x568d9a 0x5f5b33 0x56aadf 0x568d9a 0x68cdc7 0x67e161 0x67e1df 0x67e281 0x67e627 0x6b6e62 0x6b71ed 0x7f41b527e0b3 0x5f96de
6
+ restoring state of multisteps optimizer
7
+ [21:35:55] - INFO - __main__ - checkpoint restored
8
+ Traceback (most recent call last):
9
+ File "./run_mlm_flax.py", line 712, in <module>
10
+ state = restore_model_checkpoint(training_args.resume_from_checkpoint, state)
11
+ File "./run_mlm_flax.py", line 314, in restore_model_checkpoint
12
+ inner_opt_state = reinstantiate_states(opt_state.inner_opt_state)
13
+ File "./run_mlm_flax.py", line 294, in reinstantiate_states
14
+ cls = getattr(optax, type(state).__name__)
15
+ AttributeError: module 'optax' has no attribute 'list'
wandb/run-20210714_213537-20l16od8/files/requirements.txt ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ absl-py==0.13.0
2
+ aiohttp==3.7.4.post0
3
+ astunparse==1.6.3
4
+ async-timeout==3.0.1
5
+ attrs==21.2.0
6
+ cachetools==4.2.2
7
+ certifi==2021.5.30
8
+ chardet==4.0.0
9
+ charset-normalizer==2.0.1
10
+ chex==0.0.8
11
+ click==8.0.1
12
+ configparser==5.0.2
13
+ cycler==0.10.0
14
+ datasets==1.9.1.dev0
15
+ dill==0.3.4
16
+ dm-tree==0.1.6
17
+ docker-pycreds==0.4.0
18
+ filelock==3.0.12
19
+ flatbuffers==1.12
20
+ flax==0.3.4
21
+ fsspec==2021.7.0
22
+ gast==0.4.0
23
+ gitdb==4.0.7
24
+ gitpython==3.1.18
25
+ google-auth-oauthlib==0.4.4
26
+ google-auth==1.32.1
27
+ google-pasta==0.2.0
28
+ grpcio==1.34.1
29
+ h5py==3.1.0
30
+ huggingface-hub==0.0.12
31
+ idna==3.2
32
+ install==1.3.4
33
+ jax==0.2.17
34
+ jaxlib==0.1.68
35
+ joblib==1.0.1
36
+ keras-nightly==2.5.0.dev2021032900
37
+ keras-preprocessing==1.1.2
38
+ kiwisolver==1.3.1
39
+ libtpu-nightly==0.1.dev20210615
40
+ markdown==3.3.4
41
+ matplotlib==3.4.2
42
+ msgpack==1.0.2
43
+ multidict==5.1.0
44
+ multiprocess==0.70.12.2
45
+ numpy==1.19.5
46
+ oauthlib==3.1.1
47
+ opt-einsum==3.3.0
48
+ optax==0.0.9
49
+ packaging==21.0
50
+ pandas==1.3.0
51
+ pathtools==0.1.2
52
+ pillow==8.3.1
53
+ pip==20.0.2
54
+ pkg-resources==0.0.0
55
+ promise==2.3
56
+ protobuf==3.17.3
57
+ psutil==5.8.0
58
+ pyarrow==4.0.1
59
+ pyasn1-modules==0.2.8
60
+ pyasn1==0.4.8
61
+ pyparsing==2.4.7
62
+ python-dateutil==2.8.1
63
+ pytz==2021.1
64
+ pyyaml==5.4.1
65
+ regex==2021.7.6
66
+ requests-oauthlib==1.3.0
67
+ requests==2.26.0
68
+ rsa==4.7.2
69
+ sacremoses==0.0.45
70
+ scipy==1.7.0
71
+ sentry-sdk==1.3.0
72
+ setuptools==44.0.0
73
+ shortuuid==1.0.1
74
+ six==1.15.0
75
+ smmap==4.0.0
76
+ subprocess32==3.5.4
77
+ tensorboard-data-server==0.6.1
78
+ tensorboard-plugin-wit==1.8.0
79
+ tensorboard==2.5.0
80
+ tensorflow-estimator==2.5.0
81
+ tensorflow==2.5.0
82
+ termcolor==1.1.0
83
+ tokenizers==0.10.3
84
+ toolz==0.11.1
85
+ tqdm==4.61.2
86
+ transformers==4.9.0.dev0
87
+ typing-extensions==3.7.4.3
88
+ urllib3==1.26.6
89
+ wandb==0.10.33
90
+ werkzeug==2.0.1
91
+ wheel==0.36.2
92
+ wrapt==1.12.1
93
+ xxhash==2.0.2
94
+ yarl==1.6.3
wandb/run-20210714_213537-20l16od8/files/wandb-metadata.json ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-5.4.0-1043-gcp-x86_64-with-glibc2.29",
3
+ "python": "3.8.10",
4
+ "heartbeatAt": "2021-07-14T21:35:39.374289",
5
+ "startedAt": "2021-07-14T21:35:37.412426",
6
+ "docker": null,
7
+ "cpu_count": 96,
8
+ "cuda": null,
9
+ "args": [
10
+ "--push_to_hub",
11
+ "--output_dir=./",
12
+ "--model_type=big_bird",
13
+ "--config_name=./",
14
+ "--tokenizer_name=./",
15
+ "--max_seq_length=4096",
16
+ "--weight_decay=0.0095",
17
+ "--warmup_steps=5000",
18
+ "--overwrite_output_dir",
19
+ "--adam_beta1=0.9",
20
+ "--adam_beta2=0.98",
21
+ "--logging_steps=250",
22
+ "--eval_steps=500",
23
+ "--num_train_epochs=5",
24
+ "--preprocessing_num_workers=96",
25
+ "--save_steps=500",
26
+ "--learning_rate=5e-5",
27
+ "--per_device_train_batch_size=2",
28
+ "--per_device_eval_batch_size=2",
29
+ "--save_total_limit=5",
30
+ "--max_eval_samples=500",
31
+ "--overwrite_cache",
32
+ "False",
33
+ "--gradient_accumulation_steps=4",
34
+ "--resume_from_checkpoint=./"
35
+ ],
36
+ "state": "running",
37
+ "program": "./run_mlm_flax.py",
38
+ "codePath": "run_mlm_flax.py",
39
+ "git": {
40
+ "remote": "https://huggingface.co/flax-community/pino-roberta-base",
41
+ "commit": "5551a58bbb1262f3d17d25171ffa2da47567c22a"
42
+ },
43
+ "email": null,
44
+ "root": "/home/dat/pino-roberta-base",
45
+ "host": "t1v-n-f5c06ea1-w-0",
46
+ "username": "dat",
47
+ "executable": "/home/dat/pino/bin/python"
48
+ }
wandb/run-20210714_213537-20l16od8/files/wandb-summary.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {}
wandb/run-20210714_213537-20l16od8/logs/debug-internal.log ADDED
@@ -0,0 +1,141 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2021-07-14 21:35:38,058 INFO MainThread:582612 [internal.py:wandb_internal():88] W&B internal server running at pid: 582612, started at: 2021-07-14 21:35:38.058075
2
+ 2021-07-14 21:35:38,060 INFO WriterThread:582612 [datastore.py:open_for_write():80] open: /home/dat/pino-roberta-base/wandb/run-20210714_213537-20l16od8/run-20l16od8.wandb
3
+ 2021-07-14 21:35:38,060 DEBUG HandlerThread:582612 [handler.py:handle_request():124] handle_request: check_version
4
+ 2021-07-14 21:35:38,061 DEBUG SenderThread:582612 [sender.py:send():179] send: header
5
+ 2021-07-14 21:35:38,061 DEBUG SenderThread:582612 [sender.py:send_request():193] send_request: check_version
6
+ 2021-07-14 21:35:38,099 DEBUG SenderThread:582612 [sender.py:send():179] send: run
7
+ 2021-07-14 21:35:38,275 INFO SenderThread:582612 [dir_watcher.py:__init__():168] watching files in: /home/dat/pino-roberta-base/wandb/run-20210714_213537-20l16od8/files
8
+ 2021-07-14 21:35:38,275 INFO SenderThread:582612 [sender.py:_start_run_threads():716] run started: 20l16od8 with start time 1626298537
9
+ 2021-07-14 21:35:38,275 DEBUG SenderThread:582612 [sender.py:send():179] send: summary
10
+ 2021-07-14 21:35:38,275 INFO SenderThread:582612 [sender.py:_save_file():841] saving file wandb-summary.json with policy end
11
+ 2021-07-14 21:35:38,276 DEBUG HandlerThread:582612 [handler.py:handle_request():124] handle_request: run_start
12
+ 2021-07-14 21:35:39,277 INFO Thread-8 :582612 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210714_213537-20l16od8/files/wandb-summary.json
13
+ 2021-07-14 21:35:39,374 DEBUG HandlerThread:582612 [meta.py:__init__():39] meta init
14
+ 2021-07-14 21:35:39,374 DEBUG HandlerThread:582612 [meta.py:__init__():53] meta init done
15
+ 2021-07-14 21:35:39,374 DEBUG HandlerThread:582612 [meta.py:probe():210] probe
16
+ 2021-07-14 21:35:39,375 DEBUG HandlerThread:582612 [meta.py:_setup_git():200] setup git
17
+ 2021-07-14 21:35:39,404 DEBUG HandlerThread:582612 [meta.py:_setup_git():207] setup git done
18
+ 2021-07-14 21:35:39,405 DEBUG HandlerThread:582612 [meta.py:_save_pip():57] save pip
19
+ 2021-07-14 21:35:39,405 DEBUG HandlerThread:582612 [meta.py:_save_pip():71] save pip done
20
+ 2021-07-14 21:35:39,405 DEBUG HandlerThread:582612 [meta.py:probe():252] probe done
21
+ 2021-07-14 21:35:39,408 DEBUG SenderThread:582612 [sender.py:send():179] send: files
22
+ 2021-07-14 21:35:39,408 INFO SenderThread:582612 [sender.py:_save_file():841] saving file wandb-metadata.json with policy now
23
+ 2021-07-14 21:35:39,414 DEBUG HandlerThread:582612 [handler.py:handle_request():124] handle_request: stop_status
24
+ 2021-07-14 21:35:39,415 DEBUG SenderThread:582612 [sender.py:send_request():193] send_request: stop_status
25
+ 2021-07-14 21:35:39,542 DEBUG SenderThread:582612 [sender.py:send():179] send: config
26
+ 2021-07-14 21:35:39,542 DEBUG SenderThread:582612 [sender.py:send():179] send: config
27
+ 2021-07-14 21:35:39,542 DEBUG SenderThread:582612 [sender.py:send():179] send: config
28
+ 2021-07-14 21:35:39,845 INFO Thread-11 :582612 [upload_job.py:push():137] Uploaded file /tmp/tmp81cfdra0wandb/3f4lkdmt-wandb-metadata.json
29
+ 2021-07-14 21:35:40,276 INFO Thread-8 :582612 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210714_213537-20l16od8/files/requirements.txt
30
+ 2021-07-14 21:35:40,276 INFO Thread-8 :582612 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210714_213537-20l16od8/files/wandb-metadata.json
31
+ 2021-07-14 21:35:40,276 INFO Thread-8 :582612 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210714_213537-20l16od8/files/output.log
32
+ 2021-07-14 21:35:54,281 INFO Thread-8 :582612 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_213537-20l16od8/files/output.log
33
+ 2021-07-14 21:35:54,573 DEBUG HandlerThread:582612 [handler.py:handle_request():124] handle_request: stop_status
34
+ 2021-07-14 21:35:54,574 DEBUG SenderThread:582612 [sender.py:send_request():193] send_request: stop_status
35
+ 2021-07-14 21:35:56,282 INFO Thread-8 :582612 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_213537-20l16od8/files/output.log
36
+ 2021-07-14 21:35:57,283 INFO Thread-8 :582612 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_213537-20l16od8/files/output.log
37
+ 2021-07-14 21:35:58,079 DEBUG SenderThread:582612 [sender.py:send():179] send: telemetry
38
+ 2021-07-14 21:35:58,080 DEBUG SenderThread:582612 [sender.py:send():179] send: exit
39
+ 2021-07-14 21:35:58,080 INFO SenderThread:582612 [sender.py:send_exit():287] handling exit code: 1
40
+ 2021-07-14 21:35:58,080 INFO SenderThread:582612 [sender.py:send_exit():295] send defer
41
+ 2021-07-14 21:35:58,080 DEBUG HandlerThread:582612 [handler.py:handle_request():124] handle_request: poll_exit
42
+ 2021-07-14 21:35:58,080 DEBUG SenderThread:582612 [sender.py:send_request():193] send_request: poll_exit
43
+ 2021-07-14 21:35:58,081 DEBUG HandlerThread:582612 [handler.py:handle_request():124] handle_request: defer
44
+ 2021-07-14 21:35:58,081 INFO HandlerThread:582612 [handler.py:handle_request_defer():141] handle defer: 0
45
+ 2021-07-14 21:35:58,081 DEBUG SenderThread:582612 [sender.py:send_request():193] send_request: defer
46
+ 2021-07-14 21:35:58,081 INFO SenderThread:582612 [sender.py:send_request_defer():304] handle sender defer: 0
47
+ 2021-07-14 21:35:58,081 INFO SenderThread:582612 [sender.py:transition_state():308] send defer: 1
48
+ 2021-07-14 21:35:58,081 DEBUG HandlerThread:582612 [handler.py:handle_request():124] handle_request: defer
49
+ 2021-07-14 21:35:58,081 INFO HandlerThread:582612 [handler.py:handle_request_defer():141] handle defer: 1
50
+ 2021-07-14 21:35:58,128 DEBUG SenderThread:582612 [sender.py:send_request():193] send_request: defer
51
+ 2021-07-14 21:35:58,128 INFO SenderThread:582612 [sender.py:send_request_defer():304] handle sender defer: 1
52
+ 2021-07-14 21:35:58,128 INFO SenderThread:582612 [sender.py:transition_state():308] send defer: 2
53
+ 2021-07-14 21:35:58,128 DEBUG SenderThread:582612 [sender.py:send():179] send: stats
54
+ 2021-07-14 21:35:58,128 DEBUG HandlerThread:582612 [handler.py:handle_request():124] handle_request: defer
55
+ 2021-07-14 21:35:58,129 INFO HandlerThread:582612 [handler.py:handle_request_defer():141] handle defer: 2
56
+ 2021-07-14 21:35:58,129 DEBUG SenderThread:582612 [sender.py:send_request():193] send_request: defer
57
+ 2021-07-14 21:35:58,129 INFO SenderThread:582612 [sender.py:send_request_defer():304] handle sender defer: 2
58
+ 2021-07-14 21:35:58,129 INFO SenderThread:582612 [sender.py:transition_state():308] send defer: 3
59
+ 2021-07-14 21:35:58,129 DEBUG HandlerThread:582612 [handler.py:handle_request():124] handle_request: defer
60
+ 2021-07-14 21:35:58,129 INFO HandlerThread:582612 [handler.py:handle_request_defer():141] handle defer: 3
61
+ 2021-07-14 21:35:58,129 DEBUG SenderThread:582612 [sender.py:send():179] send: summary
62
+ 2021-07-14 21:35:58,130 INFO SenderThread:582612 [sender.py:_save_file():841] saving file wandb-summary.json with policy end
63
+ 2021-07-14 21:35:58,130 DEBUG SenderThread:582612 [sender.py:send_request():193] send_request: defer
64
+ 2021-07-14 21:35:58,130 INFO SenderThread:582612 [sender.py:send_request_defer():304] handle sender defer: 3
65
+ 2021-07-14 21:35:58,130 INFO SenderThread:582612 [sender.py:transition_state():308] send defer: 4
66
+ 2021-07-14 21:35:58,130 DEBUG HandlerThread:582612 [handler.py:handle_request():124] handle_request: defer
67
+ 2021-07-14 21:35:58,130 INFO HandlerThread:582612 [handler.py:handle_request_defer():141] handle defer: 4
68
+ 2021-07-14 21:35:58,130 DEBUG SenderThread:582612 [sender.py:send_request():193] send_request: defer
69
+ 2021-07-14 21:35:58,130 INFO SenderThread:582612 [sender.py:send_request_defer():304] handle sender defer: 4
70
+ 2021-07-14 21:35:58,183 DEBUG HandlerThread:582612 [handler.py:handle_request():124] handle_request: poll_exit
71
+ 2021-07-14 21:35:58,299 INFO SenderThread:582612 [sender.py:transition_state():308] send defer: 5
72
+ 2021-07-14 21:35:58,299 DEBUG SenderThread:582612 [sender.py:send_request():193] send_request: poll_exit
73
+ 2021-07-14 21:35:58,300 INFO Thread-8 :582612 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_213537-20l16od8/files/output.log
74
+ 2021-07-14 21:35:58,300 INFO Thread-8 :582612 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_213537-20l16od8/files/wandb-summary.json
75
+ 2021-07-14 21:35:58,300 DEBUG HandlerThread:582612 [handler.py:handle_request():124] handle_request: defer
76
+ 2021-07-14 21:35:58,300 INFO HandlerThread:582612 [handler.py:handle_request_defer():141] handle defer: 5
77
+ 2021-07-14 21:35:58,300 INFO Thread-8 :582612 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_213537-20l16od8/files/config.yaml
78
+ 2021-07-14 21:35:58,301 DEBUG SenderThread:582612 [sender.py:send_request():193] send_request: defer
79
+ 2021-07-14 21:35:58,301 INFO SenderThread:582612 [sender.py:send_request_defer():304] handle sender defer: 5
80
+ 2021-07-14 21:35:58,301 INFO SenderThread:582612 [dir_watcher.py:finish():282] shutting down directory watcher
81
+ 2021-07-14 21:35:58,402 DEBUG HandlerThread:582612 [handler.py:handle_request():124] handle_request: poll_exit
82
+ 2021-07-14 21:35:59,301 INFO SenderThread:582612 [dir_watcher.py:finish():312] scan: /home/dat/pino-roberta-base/wandb/run-20210714_213537-20l16od8/files
83
+ 2021-07-14 21:35:59,302 INFO SenderThread:582612 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210714_213537-20l16od8/files/requirements.txt requirements.txt
84
+ 2021-07-14 21:35:59,302 INFO SenderThread:582612 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210714_213537-20l16od8/files/output.log output.log
85
+ 2021-07-14 21:35:59,302 INFO SenderThread:582612 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210714_213537-20l16od8/files/wandb-metadata.json wandb-metadata.json
86
+ 2021-07-14 21:35:59,302 INFO SenderThread:582612 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210714_213537-20l16od8/files/config.yaml config.yaml
87
+ 2021-07-14 21:35:59,302 INFO SenderThread:582612 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210714_213537-20l16od8/files/wandb-summary.json wandb-summary.json
88
+ 2021-07-14 21:35:59,302 INFO SenderThread:582612 [sender.py:transition_state():308] send defer: 6
89
+ 2021-07-14 21:35:59,303 DEBUG SenderThread:582612 [sender.py:send_request():193] send_request: poll_exit
90
+ 2021-07-14 21:35:59,303 DEBUG HandlerThread:582612 [handler.py:handle_request():124] handle_request: defer
91
+ 2021-07-14 21:35:59,303 INFO HandlerThread:582612 [handler.py:handle_request_defer():141] handle defer: 6
92
+ 2021-07-14 21:35:59,307 DEBUG SenderThread:582612 [sender.py:send_request():193] send_request: defer
93
+ 2021-07-14 21:35:59,307 INFO SenderThread:582612 [sender.py:send_request_defer():304] handle sender defer: 6
94
+ 2021-07-14 21:35:59,307 INFO SenderThread:582612 [file_pusher.py:finish():177] shutting down file pusher
95
+ 2021-07-14 21:35:59,405 DEBUG HandlerThread:582612 [handler.py:handle_request():124] handle_request: poll_exit
96
+ 2021-07-14 21:35:59,405 DEBUG SenderThread:582612 [sender.py:send_request():193] send_request: poll_exit
97
+ 2021-07-14 21:35:59,507 DEBUG HandlerThread:582612 [handler.py:handle_request():124] handle_request: poll_exit
98
+ 2021-07-14 21:35:59,507 DEBUG SenderThread:582612 [sender.py:send_request():193] send_request: poll_exit
99
+ 2021-07-14 21:35:59,609 DEBUG HandlerThread:582612 [handler.py:handle_request():124] handle_request: poll_exit
100
+ 2021-07-14 21:35:59,609 DEBUG SenderThread:582612 [sender.py:send_request():193] send_request: poll_exit
101
+ 2021-07-14 21:35:59,711 DEBUG HandlerThread:582612 [handler.py:handle_request():124] handle_request: poll_exit
102
+ 2021-07-14 21:35:59,711 DEBUG SenderThread:582612 [sender.py:send_request():193] send_request: poll_exit
103
+ 2021-07-14 21:35:59,738 INFO Thread-15 :582612 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210714_213537-20l16od8/files/wandb-summary.json
104
+ 2021-07-14 21:35:59,759 INFO Thread-13 :582612 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210714_213537-20l16od8/files/output.log
105
+ 2021-07-14 21:35:59,764 INFO Thread-14 :582612 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210714_213537-20l16od8/files/config.yaml
106
+ 2021-07-14 21:35:59,779 INFO Thread-12 :582612 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210714_213537-20l16od8/files/requirements.txt
107
+ 2021-07-14 21:35:59,813 DEBUG HandlerThread:582612 [handler.py:handle_request():124] handle_request: poll_exit
108
+ 2021-07-14 21:35:59,813 DEBUG SenderThread:582612 [sender.py:send_request():193] send_request: poll_exit
109
+ 2021-07-14 21:35:59,915 DEBUG HandlerThread:582612 [handler.py:handle_request():124] handle_request: poll_exit
110
+ 2021-07-14 21:35:59,915 DEBUG SenderThread:582612 [sender.py:send_request():193] send_request: poll_exit
111
+ 2021-07-14 21:35:59,979 INFO Thread-7 :582612 [sender.py:transition_state():308] send defer: 7
112
+ 2021-07-14 21:35:59,980 DEBUG HandlerThread:582612 [handler.py:handle_request():124] handle_request: defer
113
+ 2021-07-14 21:35:59,980 INFO HandlerThread:582612 [handler.py:handle_request_defer():141] handle defer: 7
114
+ 2021-07-14 21:35:59,980 DEBUG SenderThread:582612 [sender.py:send_request():193] send_request: defer
115
+ 2021-07-14 21:35:59,980 INFO SenderThread:582612 [sender.py:send_request_defer():304] handle sender defer: 7
116
+ 2021-07-14 21:36:00,016 DEBUG HandlerThread:582612 [handler.py:handle_request():124] handle_request: poll_exit
117
+ 2021-07-14 21:36:00,132 INFO SenderThread:582612 [sender.py:transition_state():308] send defer: 8
118
+ 2021-07-14 21:36:00,132 DEBUG SenderThread:582612 [sender.py:send_request():193] send_request: poll_exit
119
+ 2021-07-14 21:36:00,133 DEBUG HandlerThread:582612 [handler.py:handle_request():124] handle_request: defer
120
+ 2021-07-14 21:36:00,133 INFO HandlerThread:582612 [handler.py:handle_request_defer():141] handle defer: 8
121
+ 2021-07-14 21:36:00,133 DEBUG SenderThread:582612 [sender.py:send_request():193] send_request: defer
122
+ 2021-07-14 21:36:00,133 INFO SenderThread:582612 [sender.py:send_request_defer():304] handle sender defer: 8
123
+ 2021-07-14 21:36:00,133 INFO SenderThread:582612 [sender.py:transition_state():308] send defer: 9
124
+ 2021-07-14 21:36:00,134 DEBUG SenderThread:582612 [sender.py:send():179] send: final
125
+ 2021-07-14 21:36:00,134 DEBUG SenderThread:582612 [sender.py:send():179] send: footer
126
+ 2021-07-14 21:36:00,134 DEBUG HandlerThread:582612 [handler.py:handle_request():124] handle_request: defer
127
+ 2021-07-14 21:36:00,134 INFO HandlerThread:582612 [handler.py:handle_request_defer():141] handle defer: 9
128
+ 2021-07-14 21:36:00,134 DEBUG SenderThread:582612 [sender.py:send_request():193] send_request: defer
129
+ 2021-07-14 21:36:00,134 INFO SenderThread:582612 [sender.py:send_request_defer():304] handle sender defer: 9
130
+ 2021-07-14 21:36:00,234 DEBUG HandlerThread:582612 [handler.py:handle_request():124] handle_request: poll_exit
131
+ 2021-07-14 21:36:00,235 DEBUG SenderThread:582612 [sender.py:send_request():193] send_request: poll_exit
132
+ 2021-07-14 21:36:00,235 INFO SenderThread:582612 [file_pusher.py:join():182] waiting for file pusher
133
+ 2021-07-14 21:36:00,236 DEBUG HandlerThread:582612 [handler.py:handle_request():124] handle_request: get_summary
134
+ 2021-07-14 21:36:00,237 DEBUG HandlerThread:582612 [handler.py:handle_request():124] handle_request: sampled_history
135
+ 2021-07-14 21:36:00,237 DEBUG HandlerThread:582612 [handler.py:handle_request():124] handle_request: shutdown
136
+ 2021-07-14 21:36:00,237 INFO HandlerThread:582612 [handler.py:finish():638] shutting down handler
137
+ 2021-07-14 21:36:01,134 INFO WriterThread:582612 [datastore.py:close():288] close: /home/dat/pino-roberta-base/wandb/run-20210714_213537-20l16od8/run-20l16od8.wandb
138
+ 2021-07-14 21:36:01,235 INFO SenderThread:582612 [sender.py:finish():945] shutting down sender
139
+ 2021-07-14 21:36:01,235 INFO SenderThread:582612 [file_pusher.py:finish():177] shutting down file pusher
140
+ 2021-07-14 21:36:01,235 INFO SenderThread:582612 [file_pusher.py:join():182] waiting for file pusher
141
+ 2021-07-14 21:36:01,237 INFO MainThread:582612 [internal.py:handle_exit():78] Internal process exited
wandb/run-20210714_213537-20l16od8/logs/debug.log ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2021-07-14 21:35:37,414 INFO MainThread:581357 [wandb_setup.py:_flush():69] setting env: {}
2
+ 2021-07-14 21:35:37,414 INFO MainThread:581357 [wandb_setup.py:_flush():69] setting login settings: {}
3
+ 2021-07-14 21:35:37,414 INFO MainThread:581357 [wandb_init.py:_log_setup():337] Logging user logs to /home/dat/pino-roberta-base/wandb/run-20210714_213537-20l16od8/logs/debug.log
4
+ 2021-07-14 21:35:37,414 INFO MainThread:581357 [wandb_init.py:_log_setup():338] Logging internal logs to /home/dat/pino-roberta-base/wandb/run-20210714_213537-20l16od8/logs/debug-internal.log
5
+ 2021-07-14 21:35:37,414 INFO MainThread:581357 [wandb_init.py:init():370] calling init triggers
6
+ 2021-07-14 21:35:37,414 INFO MainThread:581357 [wandb_init.py:init():375] wandb.init called with sweep_config: {}
7
+ config: {}
8
+ 2021-07-14 21:35:37,414 INFO MainThread:581357 [wandb_init.py:init():419] starting backend
9
+ 2021-07-14 21:35:37,414 INFO MainThread:581357 [backend.py:_multiprocessing_setup():70] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
10
+ 2021-07-14 21:35:37,461 INFO MainThread:581357 [backend.py:ensure_launched():135] starting backend process...
11
+ 2021-07-14 21:35:37,506 INFO MainThread:581357 [backend.py:ensure_launched():139] started backend process with pid: 582612
12
+ 2021-07-14 21:35:37,508 INFO MainThread:581357 [wandb_init.py:init():424] backend started and connected
13
+ 2021-07-14 21:35:37,511 INFO MainThread:581357 [wandb_init.py:init():472] updated telemetry
14
+ 2021-07-14 21:35:37,512 INFO MainThread:581357 [wandb_init.py:init():491] communicating current version
15
+ 2021-07-14 21:35:38,098 INFO MainThread:581357 [wandb_init.py:init():496] got version response
16
+ 2021-07-14 21:35:38,098 INFO MainThread:581357 [wandb_init.py:init():504] communicating run to backend with 30 second timeout
17
+ 2021-07-14 21:35:38,275 INFO MainThread:581357 [wandb_init.py:init():529] starting run threads in backend
18
+ 2021-07-14 21:35:39,411 INFO MainThread:581357 [wandb_run.py:_console_start():1623] atexit reg
19
+ 2021-07-14 21:35:39,412 INFO MainThread:581357 [wandb_run.py:_redirect():1497] redirect: SettingsConsole.REDIRECT
20
+ 2021-07-14 21:35:39,412 INFO MainThread:581357 [wandb_run.py:_redirect():1502] Redirecting console.
21
+ 2021-07-14 21:35:39,414 INFO MainThread:581357 [wandb_run.py:_redirect():1558] Redirects installed.
22
+ 2021-07-14 21:35:39,414 INFO MainThread:581357 [wandb_init.py:init():554] run started, returning control to user process
23
+ 2021-07-14 21:35:39,420 INFO MainThread:581357 [wandb_run.py:_config_callback():872] config_cb None None {'output_dir': './', 'overwrite_output_dir': True, 'do_train': False, 'do_eval': False, 'do_predict': False, 'evaluation_strategy': 'IntervalStrategy.NO', 'prediction_loss_only': False, 'per_device_train_batch_size': 2, 'per_device_eval_batch_size': 2, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 4, 'eval_accumulation_steps': None, 'learning_rate': 5e-05, 'weight_decay': 0.0095, 'adam_beta1': 0.9, 'adam_beta2': 0.98, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 5.0, 'max_steps': -1, 'lr_scheduler_type': 'SchedulerType.LINEAR', 'warmup_ratio': 0.0, 'warmup_steps': 5000, 'log_level': -1, 'log_level_replica': -1, 'log_on_each_node': True, 'logging_dir': './runs/Jul14_21-35-30_t1v-n-f5c06ea1-w-0', 'logging_strategy': 'IntervalStrategy.STEPS', 'logging_first_step': False, 'logging_steps': 250, 'save_strategy': 'IntervalStrategy.STEPS', 'save_steps': 500, 'save_total_limit': 5, 'save_on_each_node': False, 'no_cuda': False, 'seed': 42, 'fp16': False, 'fp16_opt_level': 'O1', 'fp16_backend': 'auto', 'fp16_full_eval': False, 'local_rank': -1, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 500, 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': './', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'sharded_ddp': [], 'deepspeed': None, 'label_smoothing_factor': 0.0, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'dataloader_pin_memory': True, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': './', 'push_to_hub_model_id': '', 'push_to_hub_organization': None, 'push_to_hub_token': None, 'mp_parameters': ''}
24
+ 2021-07-14 21:35:39,422 INFO MainThread:581357 [wandb_run.py:_config_callback():872] config_cb None None {'model_name_or_path': None, 'model_type': 'big_bird', 'config_name': './', 'tokenizer_name': './', 'cache_dir': None, 'use_fast_tokenizer': True, 'dtype': 'float32', 'save_optimizer': True}
25
+ 2021-07-14 21:35:39,424 INFO MainThread:581357 [wandb_run.py:_config_callback():872] config_cb None None {'dataset_name': None, 'dataset_config_name': None, 'train_ref_file': None, 'validation_ref_file': None, 'overwrite_cache': False, 'validation_split_percentage': 5, 'max_seq_length': 4096, 'preprocessing_num_workers': 96, 'mlm_probability': 0.15, 'pad_to_max_length': False, 'line_by_line': False, 'max_eval_samples': 500}
26
+ 2021-07-14 21:35:55,601 INFO MainThread:581357 [wandb_run.py:_atexit_cleanup():1593] got exitcode: 1
27
+ 2021-07-14 21:35:55,602 INFO MainThread:581357 [wandb_run.py:_restore():1565] restore
28
+ 2021-07-14 21:35:58,081 INFO MainThread:581357 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
29
+ wandb_count: 1
30
+ }
31
+ pusher_stats {
32
+ uploaded_bytes: 1481
33
+ total_bytes: 1481
34
+ }
35
+
36
+ 2021-07-14 21:35:58,301 INFO MainThread:581357 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
37
+ wandb_count: 1
38
+ }
39
+ pusher_stats {
40
+ uploaded_bytes: 1481
41
+ total_bytes: 1481
42
+ }
43
+
44
+ 2021-07-14 21:35:59,303 INFO MainThread:581357 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
45
+ wandb_count: 4
46
+ }
47
+ pusher_stats {
48
+ uploaded_bytes: 1481
49
+ total_bytes: 9251
50
+ }
51
+
52
+ 2021-07-14 21:35:59,406 INFO MainThread:581357 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
53
+ wandb_count: 5
54
+ }
55
+ pusher_stats {
56
+ uploaded_bytes: 1481
57
+ total_bytes: 9253
58
+ }
59
+
60
+ 2021-07-14 21:35:59,508 INFO MainThread:581357 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
61
+ wandb_count: 5
62
+ }
63
+ pusher_stats {
64
+ uploaded_bytes: 9253
65
+ total_bytes: 9253
66
+ }
67
+
68
+ 2021-07-14 21:35:59,610 INFO MainThread:581357 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
69
+ wandb_count: 5
70
+ }
71
+ pusher_stats {
72
+ uploaded_bytes: 9253
73
+ total_bytes: 9253
74
+ }
75
+
76
+ 2021-07-14 21:35:59,712 INFO MainThread:581357 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
77
+ wandb_count: 5
78
+ }
79
+ pusher_stats {
80
+ uploaded_bytes: 9253
81
+ total_bytes: 9253
82
+ }
83
+
84
+ 2021-07-14 21:35:59,814 INFO MainThread:581357 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
85
+ wandb_count: 5
86
+ }
87
+ pusher_stats {
88
+ uploaded_bytes: 9253
89
+ total_bytes: 9253
90
+ }
91
+
92
+ 2021-07-14 21:35:59,915 INFO MainThread:581357 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
93
+ wandb_count: 5
94
+ }
95
+ pusher_stats {
96
+ uploaded_bytes: 9253
97
+ total_bytes: 9253
98
+ }
99
+
100
+ 2021-07-14 21:36:00,133 INFO MainThread:581357 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
101
+ wandb_count: 5
102
+ }
103
+ pusher_stats {
104
+ uploaded_bytes: 9253
105
+ total_bytes: 9253
106
+ }
107
+
108
+ 2021-07-14 21:36:00,235 INFO MainThread:581357 [wandb_run.py:_wait_for_finish():1715] got exit ret: done: true
109
+ exit_result {
110
+ }
111
+ file_counts {
112
+ wandb_count: 5
113
+ }
114
+ pusher_stats {
115
+ uploaded_bytes: 9253
116
+ total_bytes: 9253
117
+ }
118
+
119
+ 2021-07-14 21:36:01,503 INFO MainThread:581357 [wandb_run.py:_show_files():1937] logging synced files
wandb/run-20210714_213537-20l16od8/run-20l16od8.wandb ADDED
Binary file (4.8 kB). View file
 
wandb/run-20210714_213944-3j6d3fy2/files/config.yaml ADDED
@@ -0,0 +1,304 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ wandb_version: 1
2
+
3
+ _wandb:
4
+ desc: null
5
+ value:
6
+ cli_version: 0.10.33
7
+ framework: huggingface
8
+ huggingface_version: 4.9.0.dev0
9
+ is_jupyter_run: false
10
+ is_kaggle_kernel: false
11
+ python_version: 3.8.10
12
+ t:
13
+ 1:
14
+ - 3
15
+ - 11
16
+ 4: 3.8.10
17
+ 5: 0.10.33
18
+ 6: 4.9.0.dev0
19
+ 8:
20
+ - 5
21
+ adafactor:
22
+ desc: null
23
+ value: false
24
+ adam_beta1:
25
+ desc: null
26
+ value: 0.9
27
+ adam_beta2:
28
+ desc: null
29
+ value: 0.98
30
+ adam_epsilon:
31
+ desc: null
32
+ value: 1.0e-08
33
+ cache_dir:
34
+ desc: null
35
+ value: null
36
+ config_name:
37
+ desc: null
38
+ value: ./
39
+ dataloader_drop_last:
40
+ desc: null
41
+ value: false
42
+ dataloader_num_workers:
43
+ desc: null
44
+ value: 0
45
+ dataloader_pin_memory:
46
+ desc: null
47
+ value: true
48
+ dataset_config_name:
49
+ desc: null
50
+ value: null
51
+ dataset_name:
52
+ desc: null
53
+ value: null
54
+ ddp_find_unused_parameters:
55
+ desc: null
56
+ value: null
57
+ debug:
58
+ desc: null
59
+ value: []
60
+ deepspeed:
61
+ desc: null
62
+ value: null
63
+ disable_tqdm:
64
+ desc: null
65
+ value: false
66
+ do_eval:
67
+ desc: null
68
+ value: false
69
+ do_predict:
70
+ desc: null
71
+ value: false
72
+ do_train:
73
+ desc: null
74
+ value: false
75
+ dtype:
76
+ desc: null
77
+ value: float32
78
+ eval_accumulation_steps:
79
+ desc: null
80
+ value: null
81
+ eval_steps:
82
+ desc: null
83
+ value: 500
84
+ evaluation_strategy:
85
+ desc: null
86
+ value: IntervalStrategy.NO
87
+ fp16:
88
+ desc: null
89
+ value: false
90
+ fp16_backend:
91
+ desc: null
92
+ value: auto
93
+ fp16_full_eval:
94
+ desc: null
95
+ value: false
96
+ fp16_opt_level:
97
+ desc: null
98
+ value: O1
99
+ gradient_accumulation_steps:
100
+ desc: null
101
+ value: 4
102
+ greater_is_better:
103
+ desc: null
104
+ value: null
105
+ group_by_length:
106
+ desc: null
107
+ value: false
108
+ ignore_data_skip:
109
+ desc: null
110
+ value: false
111
+ label_names:
112
+ desc: null
113
+ value: null
114
+ label_smoothing_factor:
115
+ desc: null
116
+ value: 0.0
117
+ learning_rate:
118
+ desc: null
119
+ value: 5.0e-05
120
+ length_column_name:
121
+ desc: null
122
+ value: length
123
+ line_by_line:
124
+ desc: null
125
+ value: false
126
+ load_best_model_at_end:
127
+ desc: null
128
+ value: false
129
+ local_rank:
130
+ desc: null
131
+ value: -1
132
+ log_level:
133
+ desc: null
134
+ value: -1
135
+ log_level_replica:
136
+ desc: null
137
+ value: -1
138
+ log_on_each_node:
139
+ desc: null
140
+ value: true
141
+ logging_dir:
142
+ desc: null
143
+ value: ./runs/Jul14_21-39-37_t1v-n-f5c06ea1-w-0
144
+ logging_first_step:
145
+ desc: null
146
+ value: false
147
+ logging_steps:
148
+ desc: null
149
+ value: 250
150
+ logging_strategy:
151
+ desc: null
152
+ value: IntervalStrategy.STEPS
153
+ lr_scheduler_type:
154
+ desc: null
155
+ value: SchedulerType.LINEAR
156
+ max_eval_samples:
157
+ desc: null
158
+ value: 500
159
+ max_grad_norm:
160
+ desc: null
161
+ value: 1.0
162
+ max_seq_length:
163
+ desc: null
164
+ value: 4096
165
+ max_steps:
166
+ desc: null
167
+ value: -1
168
+ metric_for_best_model:
169
+ desc: null
170
+ value: null
171
+ mlm_probability:
172
+ desc: null
173
+ value: 0.15
174
+ model_name_or_path:
175
+ desc: null
176
+ value: null
177
+ model_type:
178
+ desc: null
179
+ value: big_bird
180
+ mp_parameters:
181
+ desc: null
182
+ value: ''
183
+ no_cuda:
184
+ desc: null
185
+ value: false
186
+ num_train_epochs:
187
+ desc: null
188
+ value: 5.0
189
+ output_dir:
190
+ desc: null
191
+ value: ./
192
+ overwrite_cache:
193
+ desc: null
194
+ value: false
195
+ overwrite_output_dir:
196
+ desc: null
197
+ value: true
198
+ pad_to_max_length:
199
+ desc: null
200
+ value: false
201
+ past_index:
202
+ desc: null
203
+ value: -1
204
+ per_device_eval_batch_size:
205
+ desc: null
206
+ value: 2
207
+ per_device_train_batch_size:
208
+ desc: null
209
+ value: 2
210
+ per_gpu_eval_batch_size:
211
+ desc: null
212
+ value: null
213
+ per_gpu_train_batch_size:
214
+ desc: null
215
+ value: null
216
+ prediction_loss_only:
217
+ desc: null
218
+ value: false
219
+ preprocessing_num_workers:
220
+ desc: null
221
+ value: 96
222
+ push_to_hub:
223
+ desc: null
224
+ value: true
225
+ push_to_hub_model_id:
226
+ desc: null
227
+ value: ''
228
+ push_to_hub_organization:
229
+ desc: null
230
+ value: null
231
+ push_to_hub_token:
232
+ desc: null
233
+ value: null
234
+ remove_unused_columns:
235
+ desc: null
236
+ value: true
237
+ report_to:
238
+ desc: null
239
+ value:
240
+ - tensorboard
241
+ - wandb
242
+ resume_from_checkpoint:
243
+ desc: null
244
+ value: null
245
+ run_name:
246
+ desc: null
247
+ value: ./
248
+ save_on_each_node:
249
+ desc: null
250
+ value: false
251
+ save_optimizer:
252
+ desc: null
253
+ value: true
254
+ save_steps:
255
+ desc: null
256
+ value: 5
257
+ save_strategy:
258
+ desc: null
259
+ value: IntervalStrategy.STEPS
260
+ save_total_limit:
261
+ desc: null
262
+ value: 5
263
+ seed:
264
+ desc: null
265
+ value: 42
266
+ sharded_ddp:
267
+ desc: null
268
+ value: []
269
+ skip_memory_metrics:
270
+ desc: null
271
+ value: true
272
+ tokenizer_name:
273
+ desc: null
274
+ value: ./
275
+ tpu_metrics_debug:
276
+ desc: null
277
+ value: false
278
+ tpu_num_cores:
279
+ desc: null
280
+ value: null
281
+ train_ref_file:
282
+ desc: null
283
+ value: null
284
+ use_fast_tokenizer:
285
+ desc: null
286
+ value: true
287
+ use_legacy_prediction_loop:
288
+ desc: null
289
+ value: false
290
+ validation_ref_file:
291
+ desc: null
292
+ value: null
293
+ validation_split_percentage:
294
+ desc: null
295
+ value: 5
296
+ warmup_ratio:
297
+ desc: null
298
+ value: 0.0
299
+ warmup_steps:
300
+ desc: null
301
+ value: 5000
302
+ weight_decay:
303
+ desc: null
304
+ value: 0.0095
wandb/run-20210714_213944-3j6d3fy2/files/output.log ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [21:39:59] - INFO - absl - A polynomial schedule was set with a non-positive `transition_steps` value; this results in a constant schedule with value `init_value`.
2
+ /home/dat/pino/lib/python3.8/site-packages/jax/_src/numpy/lax_numpy.py:3132: UserWarning: Explicitly requested dtype <class 'jax._src.numpy.lax_numpy.int64'> requested in zeros is not available, and will be truncated to dtype int32. To enable more dtypes, set the jax_enable_x64 configuration option or the JAX_ENABLE_X64 shell environment variable. See https://github.com/google/jax#current-gotchas for more.
3
+ lax._check_user_dtype_supported(dtype, "zeros")
4
+ /home/dat/pino/lib/python3.8/site-packages/jax/lib/xla_bridge.py:386: UserWarning: jax.host_count has been renamed to jax.process_count. This alias will eventually be removed; please update your code.
5
+ warnings.warn(
6
+ /home/dat/pino/lib/python3.8/site-packages/jax/lib/xla_bridge.py:373: UserWarning: jax.host_id has been renamed to jax.process_index. This alias will eventually be removed; please update your code.
7
+ warnings.warn(
8
+ Epoch ... (1/5): 0%| | 0/5 [00:00<?, ?it/s][21:40:00] - INFO - __main__ - Skipping to epoch 0 step 0
9
+
10
+
11
+ [21:41:37] - INFO - huggingface_hub.repository - git version 2.25.1 | 5/1250 [01:27<2:38:49, 7.65s/it]
12
+ git-lfs/2.9.2 (GitHub; linux amd64; go 1.13.5)
wandb/run-20210714_213944-3j6d3fy2/files/requirements.txt ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ absl-py==0.13.0
2
+ aiohttp==3.7.4.post0
3
+ astunparse==1.6.3
4
+ async-timeout==3.0.1
5
+ attrs==21.2.0
6
+ cachetools==4.2.2
7
+ certifi==2021.5.30
8
+ chardet==4.0.0
9
+ charset-normalizer==2.0.1
10
+ chex==0.0.8
11
+ click==8.0.1
12
+ configparser==5.0.2
13
+ cycler==0.10.0
14
+ datasets==1.9.1.dev0
15
+ dill==0.3.4
16
+ dm-tree==0.1.6
17
+ docker-pycreds==0.4.0
18
+ filelock==3.0.12
19
+ flatbuffers==1.12
20
+ flax==0.3.4
21
+ fsspec==2021.7.0
22
+ gast==0.4.0
23
+ gitdb==4.0.7
24
+ gitpython==3.1.18
25
+ google-auth-oauthlib==0.4.4
26
+ google-auth==1.32.1
27
+ google-pasta==0.2.0
28
+ grpcio==1.34.1
29
+ h5py==3.1.0
30
+ huggingface-hub==0.0.12
31
+ idna==3.2
32
+ install==1.3.4
33
+ jax==0.2.17
34
+ jaxlib==0.1.68
35
+ joblib==1.0.1
36
+ keras-nightly==2.5.0.dev2021032900
37
+ keras-preprocessing==1.1.2
38
+ kiwisolver==1.3.1
39
+ libtpu-nightly==0.1.dev20210615
40
+ markdown==3.3.4
41
+ matplotlib==3.4.2
42
+ msgpack==1.0.2
43
+ multidict==5.1.0
44
+ multiprocess==0.70.12.2
45
+ numpy==1.19.5
46
+ oauthlib==3.1.1
47
+ opt-einsum==3.3.0
48
+ optax==0.0.9
49
+ packaging==21.0
50
+ pandas==1.3.0
51
+ pathtools==0.1.2
52
+ pillow==8.3.1
53
+ pip==20.0.2
54
+ pkg-resources==0.0.0
55
+ promise==2.3
56
+ protobuf==3.17.3
57
+ psutil==5.8.0
58
+ pyarrow==4.0.1
59
+ pyasn1-modules==0.2.8
60
+ pyasn1==0.4.8
61
+ pyparsing==2.4.7
62
+ python-dateutil==2.8.1
63
+ pytz==2021.1
64
+ pyyaml==5.4.1
65
+ regex==2021.7.6
66
+ requests-oauthlib==1.3.0
67
+ requests==2.26.0
68
+ rsa==4.7.2
69
+ sacremoses==0.0.45
70
+ scipy==1.7.0
71
+ sentry-sdk==1.3.0
72
+ setuptools==44.0.0
73
+ shortuuid==1.0.1
74
+ six==1.15.0
75
+ smmap==4.0.0
76
+ subprocess32==3.5.4
77
+ tensorboard-data-server==0.6.1
78
+ tensorboard-plugin-wit==1.8.0
79
+ tensorboard==2.5.0
80
+ tensorflow-estimator==2.5.0
81
+ tensorflow==2.5.0
82
+ termcolor==1.1.0
83
+ tokenizers==0.10.3
84
+ toolz==0.11.1
85
+ tqdm==4.61.2
86
+ transformers==4.9.0.dev0
87
+ typing-extensions==3.7.4.3
88
+ urllib3==1.26.6
89
+ wandb==0.10.33
90
+ werkzeug==2.0.1
91
+ wheel==0.36.2
92
+ wrapt==1.12.1
93
+ xxhash==2.0.2
94
+ yarl==1.6.3
wandb/run-20210714_213944-3j6d3fy2/files/wandb-metadata.json ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-5.4.0-1043-gcp-x86_64-with-glibc2.29",
3
+ "python": "3.8.10",
4
+ "heartbeatAt": "2021-07-14T21:39:46.918417",
5
+ "startedAt": "2021-07-14T21:39:44.931675",
6
+ "docker": null,
7
+ "cpu_count": 96,
8
+ "cuda": null,
9
+ "args": [
10
+ "--push_to_hub",
11
+ "--output_dir=./",
12
+ "--model_type=big_bird",
13
+ "--config_name=./",
14
+ "--tokenizer_name=./",
15
+ "--max_seq_length=4096",
16
+ "--weight_decay=0.0095",
17
+ "--warmup_steps=5000",
18
+ "--overwrite_output_dir",
19
+ "--adam_beta1=0.9",
20
+ "--adam_beta2=0.98",
21
+ "--logging_steps=250",
22
+ "--eval_steps=500",
23
+ "--num_train_epochs=5",
24
+ "--preprocessing_num_workers=96",
25
+ "--save_steps=5",
26
+ "--learning_rate=5e-5",
27
+ "--per_device_train_batch_size=2",
28
+ "--per_device_eval_batch_size=2",
29
+ "--save_total_limit=5",
30
+ "--max_eval_samples=500",
31
+ "--overwrite_cache",
32
+ "False",
33
+ "--gradient_accumulation_steps=4"
34
+ ],
35
+ "state": "running",
36
+ "program": "./run_mlm_flax.py",
37
+ "codePath": "run_mlm_flax.py",
38
+ "git": {
39
+ "remote": "https://huggingface.co/flax-community/pino-roberta-base",
40
+ "commit": "5551a58bbb1262f3d17d25171ffa2da47567c22a"
41
+ },
42
+ "email": null,
43
+ "root": "/home/dat/pino-roberta-base",
44
+ "host": "t1v-n-f5c06ea1-w-0",
45
+ "username": "dat",
46
+ "executable": "/home/dat/pino/bin/python"
47
+ }
wandb/run-20210714_213944-3j6d3fy2/files/wandb-summary.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {}
wandb/run-20210714_213944-3j6d3fy2/logs/debug-internal.log ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2021-07-14 21:39:45,622 INFO MainThread:584332 [internal.py:wandb_internal():88] W&B internal server running at pid: 584332, started at: 2021-07-14 21:39:45.622063
2
+ 2021-07-14 21:39:45,624 DEBUG HandlerThread:584332 [handler.py:handle_request():124] handle_request: check_version
3
+ 2021-07-14 21:39:45,624 INFO WriterThread:584332 [datastore.py:open_for_write():80] open: /home/dat/pino-roberta-base/wandb/run-20210714_213944-3j6d3fy2/run-3j6d3fy2.wandb
4
+ 2021-07-14 21:39:45,625 DEBUG SenderThread:584332 [sender.py:send():179] send: header
5
+ 2021-07-14 21:39:45,625 DEBUG SenderThread:584332 [sender.py:send_request():193] send_request: check_version
6
+ 2021-07-14 21:39:45,661 DEBUG SenderThread:584332 [sender.py:send():179] send: run
7
+ 2021-07-14 21:39:45,830 INFO SenderThread:584332 [dir_watcher.py:__init__():168] watching files in: /home/dat/pino-roberta-base/wandb/run-20210714_213944-3j6d3fy2/files
8
+ 2021-07-14 21:39:45,830 INFO SenderThread:584332 [sender.py:_start_run_threads():716] run started: 3j6d3fy2 with start time 1626298785
9
+ 2021-07-14 21:39:45,830 DEBUG SenderThread:584332 [sender.py:send():179] send: summary
10
+ 2021-07-14 21:39:45,830 INFO SenderThread:584332 [sender.py:_save_file():841] saving file wandb-summary.json with policy end
11
+ 2021-07-14 21:39:45,831 DEBUG HandlerThread:584332 [handler.py:handle_request():124] handle_request: run_start
12
+ 2021-07-14 21:39:46,834 INFO Thread-8 :584332 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210714_213944-3j6d3fy2/files/wandb-summary.json
13
+ 2021-07-14 21:39:46,918 DEBUG HandlerThread:584332 [meta.py:__init__():39] meta init
14
+ 2021-07-14 21:39:46,918 DEBUG HandlerThread:584332 [meta.py:__init__():53] meta init done
15
+ 2021-07-14 21:39:46,918 DEBUG HandlerThread:584332 [meta.py:probe():210] probe
16
+ 2021-07-14 21:39:46,919 DEBUG HandlerThread:584332 [meta.py:_setup_git():200] setup git
17
+ 2021-07-14 21:39:46,949 DEBUG HandlerThread:584332 [meta.py:_setup_git():207] setup git done
18
+ 2021-07-14 21:39:46,949 DEBUG HandlerThread:584332 [meta.py:_save_pip():57] save pip
19
+ 2021-07-14 21:39:46,949 DEBUG HandlerThread:584332 [meta.py:_save_pip():71] save pip done
20
+ 2021-07-14 21:39:46,949 DEBUG HandlerThread:584332 [meta.py:probe():252] probe done
21
+ 2021-07-14 21:39:46,952 DEBUG SenderThread:584332 [sender.py:send():179] send: files
22
+ 2021-07-14 21:39:46,952 INFO SenderThread:584332 [sender.py:_save_file():841] saving file wandb-metadata.json with policy now
23
+ 2021-07-14 21:39:46,958 DEBUG HandlerThread:584332 [handler.py:handle_request():124] handle_request: stop_status
24
+ 2021-07-14 21:39:46,959 DEBUG SenderThread:584332 [sender.py:send_request():193] send_request: stop_status
25
+ 2021-07-14 21:39:47,087 DEBUG SenderThread:584332 [sender.py:send():179] send: config
26
+ 2021-07-14 21:39:47,088 DEBUG SenderThread:584332 [sender.py:send():179] send: config
27
+ 2021-07-14 21:39:47,088 DEBUG SenderThread:584332 [sender.py:send():179] send: config
28
+ 2021-07-14 21:39:47,442 INFO Thread-11 :584332 [upload_job.py:push():137] Uploaded file /tmp/tmp4jyyzi24wandb/1yoeh974-wandb-metadata.json
29
+ 2021-07-14 21:39:47,832 INFO Thread-8 :584332 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210714_213944-3j6d3fy2/files/wandb-metadata.json
30
+ 2021-07-14 21:39:47,832 INFO Thread-8 :584332 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210714_213944-3j6d3fy2/files/output.log
31
+ 2021-07-14 21:39:47,832 INFO Thread-8 :584332 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210714_213944-3j6d3fy2/files/requirements.txt
32
+ 2021-07-14 21:40:01,837 INFO Thread-8 :584332 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_213944-3j6d3fy2/files/output.log
33
+ 2021-07-14 21:40:02,089 DEBUG HandlerThread:584332 [handler.py:handle_request():124] handle_request: stop_status
34
+ 2021-07-14 21:40:02,090 DEBUG SenderThread:584332 [sender.py:send_request():193] send_request: stop_status
35
+ 2021-07-14 21:40:07,839 INFO Thread-8 :584332 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_213944-3j6d3fy2/files/output.log
36
+ 2021-07-14 21:40:14,997 DEBUG SenderThread:584332 [sender.py:send():179] send: stats
37
+ 2021-07-14 21:40:16,842 INFO Thread-8 :584332 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_213944-3j6d3fy2/files/config.yaml
38
+ 2021-07-14 21:40:17,237 DEBUG HandlerThread:584332 [handler.py:handle_request():124] handle_request: stop_status
39
+ 2021-07-14 21:40:17,237 DEBUG SenderThread:584332 [sender.py:send_request():193] send_request: stop_status
40
+ 2021-07-14 21:40:32,378 DEBUG HandlerThread:584332 [handler.py:handle_request():124] handle_request: stop_status
41
+ 2021-07-14 21:40:32,378 DEBUG SenderThread:584332 [sender.py:send_request():193] send_request: stop_status
42
+ 2021-07-14 21:40:45,080 DEBUG SenderThread:584332 [sender.py:send():179] send: stats
43
+ 2021-07-14 21:40:47,511 DEBUG HandlerThread:584332 [handler.py:handle_request():124] handle_request: stop_status
44
+ 2021-07-14 21:40:47,511 DEBUG SenderThread:584332 [sender.py:send_request():193] send_request: stop_status
45
+ 2021-07-14 21:41:02,643 DEBUG HandlerThread:584332 [handler.py:handle_request():124] handle_request: stop_status
46
+ 2021-07-14 21:41:02,643 DEBUG SenderThread:584332 [sender.py:send_request():193] send_request: stop_status
47
+ 2021-07-14 21:41:15,151 DEBUG SenderThread:584332 [sender.py:send():179] send: stats
48
+ 2021-07-14 21:41:17,776 DEBUG HandlerThread:584332 [handler.py:handle_request():124] handle_request: stop_status
49
+ 2021-07-14 21:41:17,776 DEBUG SenderThread:584332 [sender.py:send_request():193] send_request: stop_status
50
+ 2021-07-14 21:41:32,912 DEBUG HandlerThread:584332 [handler.py:handle_request():124] handle_request: stop_status
51
+ 2021-07-14 21:41:32,912 DEBUG SenderThread:584332 [sender.py:send_request():193] send_request: stop_status
52
+ 2021-07-14 21:41:33,872 INFO Thread-8 :584332 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_213944-3j6d3fy2/files/output.log
53
+ 2021-07-14 21:41:35,873 INFO Thread-8 :584332 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_213944-3j6d3fy2/files/output.log
54
+ 2021-07-14 21:41:39,875 INFO Thread-8 :584332 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_213944-3j6d3fy2/files/output.log
55
+ 2021-07-14 21:41:45,230 DEBUG SenderThread:584332 [sender.py:send():179] send: stats
56
+ 2021-07-14 21:41:48,051 DEBUG HandlerThread:584332 [handler.py:handle_request():124] handle_request: stop_status
57
+ 2021-07-14 21:41:48,052 DEBUG SenderThread:584332 [sender.py:send_request():193] send_request: stop_status
wandb/run-20210714_213944-3j6d3fy2/logs/debug.log ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2021-07-14 21:39:44,933 INFO MainThread:583081 [wandb_setup.py:_flush():69] setting env: {}
2
+ 2021-07-14 21:39:44,933 INFO MainThread:583081 [wandb_setup.py:_flush():69] setting login settings: {}
3
+ 2021-07-14 21:39:44,933 INFO MainThread:583081 [wandb_init.py:_log_setup():337] Logging user logs to /home/dat/pino-roberta-base/wandb/run-20210714_213944-3j6d3fy2/logs/debug.log
4
+ 2021-07-14 21:39:44,933 INFO MainThread:583081 [wandb_init.py:_log_setup():338] Logging internal logs to /home/dat/pino-roberta-base/wandb/run-20210714_213944-3j6d3fy2/logs/debug-internal.log
5
+ 2021-07-14 21:39:44,933 INFO MainThread:583081 [wandb_init.py:init():370] calling init triggers
6
+ 2021-07-14 21:39:44,933 INFO MainThread:583081 [wandb_init.py:init():375] wandb.init called with sweep_config: {}
7
+ config: {}
8
+ 2021-07-14 21:39:44,933 INFO MainThread:583081 [wandb_init.py:init():419] starting backend
9
+ 2021-07-14 21:39:44,933 INFO MainThread:583081 [backend.py:_multiprocessing_setup():70] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
10
+ 2021-07-14 21:39:44,979 INFO MainThread:583081 [backend.py:ensure_launched():135] starting backend process...
11
+ 2021-07-14 21:39:45,023 INFO MainThread:583081 [backend.py:ensure_launched():139] started backend process with pid: 584332
12
+ 2021-07-14 21:39:45,025 INFO MainThread:583081 [wandb_init.py:init():424] backend started and connected
13
+ 2021-07-14 21:39:45,028 INFO MainThread:583081 [wandb_init.py:init():472] updated telemetry
14
+ 2021-07-14 21:39:45,029 INFO MainThread:583081 [wandb_init.py:init():491] communicating current version
15
+ 2021-07-14 21:39:45,659 INFO MainThread:583081 [wandb_init.py:init():496] got version response
16
+ 2021-07-14 21:39:45,659 INFO MainThread:583081 [wandb_init.py:init():504] communicating run to backend with 30 second timeout
17
+ 2021-07-14 21:39:45,830 INFO MainThread:583081 [wandb_init.py:init():529] starting run threads in backend
18
+ 2021-07-14 21:39:46,956 INFO MainThread:583081 [wandb_run.py:_console_start():1623] atexit reg
19
+ 2021-07-14 21:39:46,956 INFO MainThread:583081 [wandb_run.py:_redirect():1497] redirect: SettingsConsole.REDIRECT
20
+ 2021-07-14 21:39:46,957 INFO MainThread:583081 [wandb_run.py:_redirect():1502] Redirecting console.
21
+ 2021-07-14 21:39:46,959 INFO MainThread:583081 [wandb_run.py:_redirect():1558] Redirects installed.
22
+ 2021-07-14 21:39:46,959 INFO MainThread:583081 [wandb_init.py:init():554] run started, returning control to user process
23
+ 2021-07-14 21:39:46,965 INFO MainThread:583081 [wandb_run.py:_config_callback():872] config_cb None None {'output_dir': './', 'overwrite_output_dir': True, 'do_train': False, 'do_eval': False, 'do_predict': False, 'evaluation_strategy': 'IntervalStrategy.NO', 'prediction_loss_only': False, 'per_device_train_batch_size': 2, 'per_device_eval_batch_size': 2, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 4, 'eval_accumulation_steps': None, 'learning_rate': 5e-05, 'weight_decay': 0.0095, 'adam_beta1': 0.9, 'adam_beta2': 0.98, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 5.0, 'max_steps': -1, 'lr_scheduler_type': 'SchedulerType.LINEAR', 'warmup_ratio': 0.0, 'warmup_steps': 5000, 'log_level': -1, 'log_level_replica': -1, 'log_on_each_node': True, 'logging_dir': './runs/Jul14_21-39-37_t1v-n-f5c06ea1-w-0', 'logging_strategy': 'IntervalStrategy.STEPS', 'logging_first_step': False, 'logging_steps': 250, 'save_strategy': 'IntervalStrategy.STEPS', 'save_steps': 5, 'save_total_limit': 5, 'save_on_each_node': False, 'no_cuda': False, 'seed': 42, 'fp16': False, 'fp16_opt_level': 'O1', 'fp16_backend': 'auto', 'fp16_full_eval': False, 'local_rank': -1, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 500, 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': './', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'sharded_ddp': [], 'deepspeed': None, 'label_smoothing_factor': 0.0, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'dataloader_pin_memory': True, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'push_to_hub_model_id': '', 'push_to_hub_organization': None, 'push_to_hub_token': None, 'mp_parameters': ''}
24
+ 2021-07-14 21:39:46,971 INFO MainThread:583081 [wandb_run.py:_config_callback():872] config_cb None None {'model_name_or_path': None, 'model_type': 'big_bird', 'config_name': './', 'tokenizer_name': './', 'cache_dir': None, 'use_fast_tokenizer': True, 'dtype': 'float32', 'save_optimizer': True}
25
+ 2021-07-14 21:39:46,972 INFO MainThread:583081 [wandb_run.py:_config_callback():872] config_cb None None {'dataset_name': None, 'dataset_config_name': None, 'train_ref_file': None, 'validation_ref_file': None, 'overwrite_cache': False, 'validation_split_percentage': 5, 'max_seq_length': 4096, 'preprocessing_num_workers': 96, 'mlm_probability': 0.15, 'pad_to_max_length': False, 'line_by_line': False, 'max_eval_samples': 500}
wandb/run-20210714_213944-3j6d3fy2/run-3j6d3fy2.wandb ADDED
Binary file (2.84 kB). View file