dat commited on
Commit
77f6e8d
1 Parent(s): d725b93

remove trash

Browse files
Files changed (30) hide show
  1. events.out.tfevents.1626135630.t1v-n-f5c06ea1-w-0.319169.3.v2 +0 -3
  2. events.out.tfevents.1626137349.t1v-n-f5c06ea1-w-0.323744.3.v2 +0 -3
  3. events.out.tfevents.1626137580.t1v-n-f5c06ea1-w-0.325900.3.v2 +0 -3
  4. events.out.tfevents.1626137871.t1v-n-f5c06ea1-w-0.327810.3.v2 +0 -3
  5. events.out.tfevents.1626138389.t1v-n-f5c06ea1-w-0.330819.3.v2 +0 -3
  6. events.out.tfevents.1626173264.t1v-n-f5c06ea1-w-0.340852.3.v2 +0 -3
  7. events.out.tfevents.1626174131.t1v-n-f5c06ea1-w-0.343920.3.v2 +0 -3
  8. events.out.tfevents.1626174670.t1v-n-f5c06ea1-w-0.346512.3.v2 +0 -3
  9. events.out.tfevents.1626175237.t1v-n-f5c06ea1-w-0.349243.3.v2 +0 -3
  10. events.out.tfevents.1626176074.t1v-n-f5c06ea1-w-0.351681.3.v2 +0 -3
  11. events.out.tfevents.1626180467.t1v-n-f5c06ea1-w-0.354027.3.v2 +0 -3
  12. events.out.tfevents.1626180750.t1v-n-f5c06ea1-w-0.355855.3.v2 +0 -3
  13. events.out.tfevents.1626181600.t1v-n-f5c06ea1-w-0.357816.3.v2 +0 -3
  14. events.out.tfevents.1626181889.t1v-n-f5c06ea1-w-0.360037.3.v2 +0 -3
  15. events.out.tfevents.1626182175.t1v-n-f5c06ea1-w-0.362298.3.v2 +0 -3
  16. events.out.tfevents.1626182874.t1v-n-f5c06ea1-w-0.365284.3.v2 +0 -3
  17. events.out.tfevents.1626184460.t1v-n-f5c06ea1-w-0.369028.3.v2 +0 -3
  18. events.out.tfevents.1626242600.t1v-n-f5c06ea1-w-0.491835.3.v2 +0 -3
  19. events.out.tfevents.1626285315.t1v-n-f5c06ea1-w-0.533662.3.v2 +0 -3
  20. events.out.tfevents.1626286793.t1v-n-f5c06ea1-w-0.547087.3.v2 +0 -3
  21. events.out.tfevents.1626287584.t1v-n-f5c06ea1-w-0.550207.3.v2 +0 -3
  22. events.out.tfevents.1626288936.t1v-n-f5c06ea1-w-0.553832.3.v2 +0 -3
  23. flax_model.msgpack +1 -1
  24. opt_state.msgpack +1 -1
  25. run.sh +1 -1
  26. training_state.json +1 -1
  27. wandb/run-20210714_213944-3j6d3fy2/files/output.log +20 -0
  28. wandb/run-20210714_213944-3j6d3fy2/logs/debug-internal.log +24 -0
  29. wandb/run-20210714_213944-3j6d3fy2/logs/debug.log +3 -0
  30. wandb/run-20210714_213944-3j6d3fy2/run-3j6d3fy2.wandb +0 -0
events.out.tfevents.1626135630.t1v-n-f5c06ea1-w-0.319169.3.v2 DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:e2be9a3fee86650547c5ddda2f6a3e6336bd828bc45c95d87626f6c16425f071
3
- size 147065
 
 
 
 
events.out.tfevents.1626137349.t1v-n-f5c06ea1-w-0.323744.3.v2 DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:f159e4108512bc68b8363ca06b6026ff0844d045b08ba76516f2764b90277292
3
- size 40
 
 
 
 
events.out.tfevents.1626137580.t1v-n-f5c06ea1-w-0.325900.3.v2 DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:72867ca0c2d013977242562e1efa683ba957c1b4c3352c0547c72dcd0e611de8
3
- size 40
 
 
 
 
events.out.tfevents.1626137871.t1v-n-f5c06ea1-w-0.327810.3.v2 DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:f4e6248f4d9c467b9b97ff9829c4847d2f568eaf3b4c6b79865519f1e98780a9
3
- size 40
 
 
 
 
events.out.tfevents.1626138389.t1v-n-f5c06ea1-w-0.330819.3.v2 DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:279cc56d0b6cc6fbb971cb2ba30bfa7004a58bd5eea377c189dda74cdb77b8e9
3
- size 8915529
 
 
 
 
events.out.tfevents.1626173264.t1v-n-f5c06ea1-w-0.340852.3.v2 DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:73fdfc3eb9d8111b1e3460227717a3942adfe9263bca08b7fd2bfab9af98d9a1
3
- size 38186
 
 
 
 
events.out.tfevents.1626174131.t1v-n-f5c06ea1-w-0.343920.3.v2 DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:dfc6f0b5b354bd4d8d13834613ece71ac9d948186313bc3fde5e2e132a1c9cab
3
- size 40
 
 
 
 
events.out.tfevents.1626174670.t1v-n-f5c06ea1-w-0.346512.3.v2 DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:f74cf77c0a672ad1201614ba6642a4f3a27b9cf021d0e88eb362c7f38ee86304
3
- size 40
 
 
 
 
events.out.tfevents.1626175237.t1v-n-f5c06ea1-w-0.349243.3.v2 DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:be5c2acf821fd2ce776ff5e434706cb933a0fa323f0bb1a82dadd832f1f589d4
3
- size 40
 
 
 
 
events.out.tfevents.1626176074.t1v-n-f5c06ea1-w-0.351681.3.v2 DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:b085d5029d052defe00b26c54b6357e9d05cbc5ad38cdd2f12537ed0b90008d2
3
- size 441341
 
 
 
 
events.out.tfevents.1626180467.t1v-n-f5c06ea1-w-0.354027.3.v2 DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:973eec9b2b17e54f3ee35dc0c4b85a4a3ecf5488cb59f5619d7c635641bfe7b6
3
- size 40
 
 
 
 
events.out.tfevents.1626180750.t1v-n-f5c06ea1-w-0.355855.3.v2 DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:013fc500b7fdd46262ee2b2ed5a3624249adef426d0b134944080ccf90d363ed
3
- size 40
 
 
 
 
events.out.tfevents.1626181600.t1v-n-f5c06ea1-w-0.357816.3.v2 DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:a3d4a519b8f1c293258e292768822980b487ef0e02bbfe9d6a3132b8c2fdd791
3
- size 40
 
 
 
 
events.out.tfevents.1626181889.t1v-n-f5c06ea1-w-0.360037.3.v2 DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:7c1ed9142ba98f2f7197e2a44361331a8c112af5dba98d7fc9f0bcab6228ae8c
3
- size 40
 
 
 
 
events.out.tfevents.1626182175.t1v-n-f5c06ea1-w-0.362298.3.v2 DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:29cc2c143c306c4619802094513459dbb71c4730d3cdfb879e7224923ddfe7ea
3
- size 40
 
 
 
 
events.out.tfevents.1626182874.t1v-n-f5c06ea1-w-0.365284.3.v2 DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:24aa4302db5d02121389fc7f8944025588034aedd21f772c2b71224e3a0b0d13
3
- size 220634
 
 
 
 
events.out.tfevents.1626184460.t1v-n-f5c06ea1-w-0.369028.3.v2 DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:6e5631bf443386a4e37d77053e55ba4517153d5f6d7f77b616258d9c78e6901f
3
- size 367772
 
 
 
 
events.out.tfevents.1626242600.t1v-n-f5c06ea1-w-0.491835.3.v2 DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:1f94f6c2d80b0e0d6247997634649101caefa3ad8ab4f408b529ad38f86c8770
3
- size 40
 
 
 
 
events.out.tfevents.1626285315.t1v-n-f5c06ea1-w-0.533662.3.v2 DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:29b681f16c441caf85381c9def58d19f4479a2460146d2cfb68991f8327f01fe
3
- size 40
 
 
 
 
events.out.tfevents.1626286793.t1v-n-f5c06ea1-w-0.547087.3.v2 DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:53d63b11450875138751afac48c611f4da76fadc0affb0ec98896b35dbad9728
3
- size 40
 
 
 
 
events.out.tfevents.1626287584.t1v-n-f5c06ea1-w-0.550207.3.v2 DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:62cc6dc4bf215d99f8685629bf632f82d65fc7f1127d876ded332b31b5432064
3
- size 40
 
 
 
 
events.out.tfevents.1626288936.t1v-n-f5c06ea1-w-0.553832.3.v2 DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:1fccf6070edac76c190b8bb8de4e37b889dd1b18835777203f9d16ac658aaf71
3
- size 40
 
 
 
 
flax_model.msgpack CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a07e8333f5822e57ebf3a1c59c4f6c6d734efaf0017732c6c8254869a4894524
3
  size 510090043
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:920cc52411bb9cee8aec7d54fbccc6e52223b9df8ed791d4fdd90a31831aed15
3
  size 510090043
opt_state.msgpack CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:328aaa347b117dca77396a9d1fbc5f412fc5e22d9f5bf9e8b2eb8b89202f97d9
3
  size 1530270545
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:781938e1ea89afbf3efcc8a49f7255f7696a3c91200a25293491749289e3d6a8
3
  size 1530270545
run.sh CHANGED
@@ -18,7 +18,7 @@ python ./run_mlm_flax.py \
18
  --eval_steps="500" \
19
  --num_train_epochs="5" \
20
  --preprocessing_num_workers="96" \
21
- --save_steps="5" \
22
  --learning_rate="5e-5" \
23
  --per_device_train_batch_size="2" \
24
  --per_device_eval_batch_size="2" \
 
18
  --eval_steps="500" \
19
  --num_train_epochs="5" \
20
  --preprocessing_num_workers="96" \
21
+ --save_steps="20000" \
22
  --learning_rate="5e-5" \
23
  --per_device_train_batch_size="2" \
24
  --per_device_eval_batch_size="2" \
training_state.json CHANGED
@@ -1 +1 @@
1
- {"step": 501}
 
1
+ {"step": 6}
wandb/run-20210714_213944-3j6d3fy2/files/output.log CHANGED
@@ -10,3 +10,23 @@ Epoch ... (1/5): 0%|
10
 
11
  [21:41:37] - INFO - huggingface_hub.repository - git version 2.25.1 | 5/1250 [01:27<2:38:49, 7.65s/it]
12
  git-lfs/2.9.2 (GitHub; linux amd64; go 1.13.5)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
 
11
  [21:41:37] - INFO - huggingface_hub.repository - git version 2.25.1 | 5/1250 [01:27<2:38:49, 7.65s/it]
12
  git-lfs/2.9.2 (GitHub; linux amd64; go 1.13.5)
13
+ [21:41:38] - DEBUG - huggingface_hub.repository - [Repository] is a valid git repo
14
+ [21:42:44] - INFO - huggingface_hub.repository - Uploading LFS objects: 100% (4/4), 2.0 GB | 35 MB/s, done.
15
+ tcmalloc: large alloc 1354776576 bytes == 0x310a78000 @ 0x7fa277001680 0x7fa277021bdd 0x7f9fab23220d 0x7f9fab240340 0x7f9fab23fe87 0x7f9fab23fe87 0x7f9fab23fe87 0x7f9fab23fe87 0x7f9fab23fe87 0x7f9fab23fe87 0x7f9fab23fe87 0x7f9fab23fe87 0x7f9fab23fe87 0x7f9fab23fe87 0x7f9fab23bbd3 0x7f9fab23c1fe 0x504d56 0x56acb6 0x568d9a 0x5f5b33 0x56bc9b 0x5f5956 0x56aadf 0x5f5956 0x56aadf 0x568d9a 0x5f5b33 0x56bc9b 0x568d9a 0x68cdc7 0x67e161
16
+ tcmalloc: large alloc 2715181056 bytes == 0x36167c000 @ 0x7fa277001680 0x7fa277021bdd 0x7f9fab23220d 0x7f9fab240340 0x7f9fab23fe87 0x7f9fab23fe87 0x7f9fab23fe87 0x7f9fab23fe87 0x7f9fab23fe87 0x7f9fab23fe87 0x7f9fab23fe87 0x7f9fab23fe87 0x7f9fab23fe87 0x7f9fab23bbd3 0x7f9fab23c1fe 0x504d56 0x56acb6 0x568d9a 0x5f5b33 0x56bc9b 0x5f5956 0x56aadf 0x5f5956 0x56aadf 0x568d9a 0x5f5b33 0x56bc9b 0x568d9a 0x68cdc7 0x67e161 0x67e1df
17
+ tcmalloc: large alloc 1530273792 bytes == 0x4033e2000 @ 0x7fa277001680 0x7fa277022824 0x5f7b11 0x7f9fab23bc6f 0x7f9fab23c1fe 0x504d56 0x56acb6 0x568d9a 0x5f5b33 0x56bc9b 0x5f5956 0x56aadf 0x5f5956 0x56aadf 0x568d9a 0x5f5b33 0x56bc9b 0x568d9a 0x68cdc7 0x67e161 0x67e1df 0x67e281 0x67e627 0x6b6e62 0x6b71ed 0x7fa276e160b3 0x5f96de
18
+ [21:43:04] - INFO - __main__ - checkpoint saved
19
+ [21:43:05] - INFO - __main__ - SAVING CHECKPOINT IN ./...
20
+ [21:43:09] - INFO - huggingface_hub.repository - git version 2.25.1 | 10/1250 [02:59<2:44:46, 7.97s/it]
21
+ git-lfs/2.9.2 (GitHub; linux amd64; go 1.13.5)
22
+ [21:43:09] - DEBUG - huggingface_hub.repository - [Repository] is a valid git repo
23
+ Training...: 1%|█ | 10/1250 [03:10<6:34:28, 19.09s/it]
24
+ Epoch ... (1/5): 0%| | 0/5 [03:15<?, ?it/s]
25
+ Traceback (most recent call last):
26
+ File "./run_mlm_flax.py", line 861, in <module>
27
+ save_model_checkpoint(model, training_args.output_dir, state, with_opt=model_args.save_optimizer,
28
+ File "./run_mlm_flax.py", line 464, in save_model_checkpoint
29
+ model.save_pretrained(
30
+ File "/home/dat/transformers/src/transformers/modeling_flax_utils.py", line 484, in save_pretrained
31
+ f.write(model_bytes)
32
+ KeyboardInterrupt
wandb/run-20210714_213944-3j6d3fy2/logs/debug-internal.log CHANGED
@@ -55,3 +55,27 @@
55
  2021-07-14 21:41:45,230 DEBUG SenderThread:584332 [sender.py:send():179] send: stats
56
  2021-07-14 21:41:48,051 DEBUG HandlerThread:584332 [handler.py:handle_request():124] handle_request: stop_status
57
  2021-07-14 21:41:48,052 DEBUG SenderThread:584332 [sender.py:send_request():193] send_request: stop_status
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
  2021-07-14 21:41:45,230 DEBUG SenderThread:584332 [sender.py:send():179] send: stats
56
  2021-07-14 21:41:48,051 DEBUG HandlerThread:584332 [handler.py:handle_request():124] handle_request: stop_status
57
  2021-07-14 21:41:48,052 DEBUG SenderThread:584332 [sender.py:send_request():193] send_request: stop_status
58
+ 2021-07-14 21:42:03,185 DEBUG HandlerThread:584332 [handler.py:handle_request():124] handle_request: stop_status
59
+ 2021-07-14 21:42:03,186 DEBUG SenderThread:584332 [sender.py:send_request():193] send_request: stop_status
60
+ 2021-07-14 21:42:15,307 DEBUG SenderThread:584332 [sender.py:send():179] send: stats
61
+ 2021-07-14 21:42:18,315 DEBUG HandlerThread:584332 [handler.py:handle_request():124] handle_request: stop_status
62
+ 2021-07-14 21:42:18,315 DEBUG SenderThread:584332 [sender.py:send_request():193] send_request: stop_status
63
+ 2021-07-14 21:42:33,446 DEBUG HandlerThread:584332 [handler.py:handle_request():124] handle_request: stop_status
64
+ 2021-07-14 21:42:33,447 DEBUG SenderThread:584332 [sender.py:send_request():193] send_request: stop_status
65
+ 2021-07-14 21:42:45,385 DEBUG SenderThread:584332 [sender.py:send():179] send: stats
66
+ 2021-07-14 21:42:45,900 INFO Thread-8 :584332 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_213944-3j6d3fy2/files/output.log
67
+ 2021-07-14 21:42:48,745 DEBUG HandlerThread:584332 [handler.py:handle_request():124] handle_request: stop_status
68
+ 2021-07-14 21:42:48,745 DEBUG SenderThread:584332 [sender.py:send_request():193] send_request: stop_status
69
+ 2021-07-14 21:42:49,902 INFO Thread-8 :584332 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_213944-3j6d3fy2/files/output.log
70
+ 2021-07-14 21:42:52,903 INFO Thread-8 :584332 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_213944-3j6d3fy2/files/output.log
71
+ 2021-07-14 21:42:54,904 INFO Thread-8 :584332 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_213944-3j6d3fy2/files/output.log
72
+ 2021-07-14 21:43:03,906 DEBUG HandlerThread:584332 [handler.py:handle_request():124] handle_request: stop_status
73
+ 2021-07-14 21:43:03,907 DEBUG SenderThread:584332 [sender.py:send_request():193] send_request: stop_status
74
+ 2021-07-14 21:43:06,910 INFO Thread-8 :584332 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_213944-3j6d3fy2/files/output.log
75
+ 2021-07-14 21:43:10,912 INFO Thread-8 :584332 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_213944-3j6d3fy2/files/output.log
76
+ 2021-07-14 21:43:12,913 INFO Thread-8 :584332 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_213944-3j6d3fy2/files/output.log
77
+ 2021-07-14 21:43:13,847 WARNING MainThread:584332 [internal.py:wandb_internal():147] Internal process interrupt: 1
78
+ 2021-07-14 21:43:14,047 WARNING MainThread:584332 [internal.py:wandb_internal():147] Internal process interrupt: 2
79
+ 2021-07-14 21:43:14,048 ERROR MainThread:584332 [internal.py:wandb_internal():150] Internal process interrupted.
80
+ 2021-07-14 21:43:14,136 INFO WriterThread:584332 [datastore.py:close():288] close: /home/dat/pino-roberta-base/wandb/run-20210714_213944-3j6d3fy2/run-3j6d3fy2.wandb
81
+ 2021-07-14 21:43:14,525 INFO MainThread:584332 [internal.py:handle_exit():78] Internal process exited
wandb/run-20210714_213944-3j6d3fy2/logs/debug.log CHANGED
@@ -23,3 +23,6 @@ config: {}
23
  2021-07-14 21:39:46,965 INFO MainThread:583081 [wandb_run.py:_config_callback():872] config_cb None None {'output_dir': './', 'overwrite_output_dir': True, 'do_train': False, 'do_eval': False, 'do_predict': False, 'evaluation_strategy': 'IntervalStrategy.NO', 'prediction_loss_only': False, 'per_device_train_batch_size': 2, 'per_device_eval_batch_size': 2, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 4, 'eval_accumulation_steps': None, 'learning_rate': 5e-05, 'weight_decay': 0.0095, 'adam_beta1': 0.9, 'adam_beta2': 0.98, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 5.0, 'max_steps': -1, 'lr_scheduler_type': 'SchedulerType.LINEAR', 'warmup_ratio': 0.0, 'warmup_steps': 5000, 'log_level': -1, 'log_level_replica': -1, 'log_on_each_node': True, 'logging_dir': './runs/Jul14_21-39-37_t1v-n-f5c06ea1-w-0', 'logging_strategy': 'IntervalStrategy.STEPS', 'logging_first_step': False, 'logging_steps': 250, 'save_strategy': 'IntervalStrategy.STEPS', 'save_steps': 5, 'save_total_limit': 5, 'save_on_each_node': False, 'no_cuda': False, 'seed': 42, 'fp16': False, 'fp16_opt_level': 'O1', 'fp16_backend': 'auto', 'fp16_full_eval': False, 'local_rank': -1, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 500, 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': './', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'sharded_ddp': [], 'deepspeed': None, 'label_smoothing_factor': 0.0, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'dataloader_pin_memory': True, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'push_to_hub_model_id': '', 'push_to_hub_organization': None, 'push_to_hub_token': None, 'mp_parameters': ''}
24
  2021-07-14 21:39:46,971 INFO MainThread:583081 [wandb_run.py:_config_callback():872] config_cb None None {'model_name_or_path': None, 'model_type': 'big_bird', 'config_name': './', 'tokenizer_name': './', 'cache_dir': None, 'use_fast_tokenizer': True, 'dtype': 'float32', 'save_optimizer': True}
25
  2021-07-14 21:39:46,972 INFO MainThread:583081 [wandb_run.py:_config_callback():872] config_cb None None {'dataset_name': None, 'dataset_config_name': None, 'train_ref_file': None, 'validation_ref_file': None, 'overwrite_cache': False, 'validation_split_percentage': 5, 'max_seq_length': 4096, 'preprocessing_num_workers': 96, 'mlm_probability': 0.15, 'pad_to_max_length': False, 'line_by_line': False, 'max_eval_samples': 500}
 
 
 
 
23
  2021-07-14 21:39:46,965 INFO MainThread:583081 [wandb_run.py:_config_callback():872] config_cb None None {'output_dir': './', 'overwrite_output_dir': True, 'do_train': False, 'do_eval': False, 'do_predict': False, 'evaluation_strategy': 'IntervalStrategy.NO', 'prediction_loss_only': False, 'per_device_train_batch_size': 2, 'per_device_eval_batch_size': 2, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 4, 'eval_accumulation_steps': None, 'learning_rate': 5e-05, 'weight_decay': 0.0095, 'adam_beta1': 0.9, 'adam_beta2': 0.98, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 5.0, 'max_steps': -1, 'lr_scheduler_type': 'SchedulerType.LINEAR', 'warmup_ratio': 0.0, 'warmup_steps': 5000, 'log_level': -1, 'log_level_replica': -1, 'log_on_each_node': True, 'logging_dir': './runs/Jul14_21-39-37_t1v-n-f5c06ea1-w-0', 'logging_strategy': 'IntervalStrategy.STEPS', 'logging_first_step': False, 'logging_steps': 250, 'save_strategy': 'IntervalStrategy.STEPS', 'save_steps': 5, 'save_total_limit': 5, 'save_on_each_node': False, 'no_cuda': False, 'seed': 42, 'fp16': False, 'fp16_opt_level': 'O1', 'fp16_backend': 'auto', 'fp16_full_eval': False, 'local_rank': -1, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 500, 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': './', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'sharded_ddp': [], 'deepspeed': None, 'label_smoothing_factor': 0.0, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'dataloader_pin_memory': True, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'push_to_hub_model_id': '', 'push_to_hub_organization': None, 'push_to_hub_token': None, 'mp_parameters': ''}
24
  2021-07-14 21:39:46,971 INFO MainThread:583081 [wandb_run.py:_config_callback():872] config_cb None None {'model_name_or_path': None, 'model_type': 'big_bird', 'config_name': './', 'tokenizer_name': './', 'cache_dir': None, 'use_fast_tokenizer': True, 'dtype': 'float32', 'save_optimizer': True}
25
  2021-07-14 21:39:46,972 INFO MainThread:583081 [wandb_run.py:_config_callback():872] config_cb None None {'dataset_name': None, 'dataset_config_name': None, 'train_ref_file': None, 'validation_ref_file': None, 'overwrite_cache': False, 'validation_split_percentage': 5, 'max_seq_length': 4096, 'preprocessing_num_workers': 96, 'mlm_probability': 0.15, 'pad_to_max_length': False, 'line_by_line': False, 'max_eval_samples': 500}
26
+ 2021-07-14 21:43:16,210 INFO MainThread:583081 [wandb_run.py:_atexit_cleanup():1593] got exitcode: 255
27
+ 2021-07-14 21:43:16,212 INFO MainThread:583081 [wandb_run.py:_restore():1565] restore
28
+ 2021-07-14 21:43:18,834 INFO MainThread:583081 [wandb_run.py:_restore():1565] restore
wandb/run-20210714_213944-3j6d3fy2/run-3j6d3fy2.wandb CHANGED
Binary files a/wandb/run-20210714_213944-3j6d3fy2/run-3j6d3fy2.wandb and b/wandb/run-20210714_213944-3j6d3fy2/run-3j6d3fy2.wandb differ