dat commited on
Commit
5575ac1
1 Parent(s): cc569ae

Saving weights and logs of step 105000

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. checkpoint_105000 +3 -0
  2. events.out.tfevents.1626362977.t1v-n-f5c06ea1-w-0.707091.3.v2 +2 -2
  3. events.out.tfevents.1626368154.t1v-n-f5c06ea1-w-0.715071.3.v2 +3 -0
  4. events.out.tfevents.1626369005.t1v-n-f5c06ea1-w-0.717656.3.v2 +3 -0
  5. events.out.tfevents.1626370906.t1v-n-f5c06ea1-w-0.721922.3.v2 +3 -0
  6. events.out.tfevents.1626371506.t1v-n-f5c06ea1-w-0.724375.3.v2 +3 -0
  7. events.out.tfevents.1626372294.t1v-n-f5c06ea1-w-0.727475.3.v2 +3 -0
  8. events.out.tfevents.1626374797.t1v-n-f5c06ea1-w-0.731696.3.v2 +3 -0
  9. events.out.tfevents.1626375524.t1v-n-f5c06ea1-w-0.734136.3.v2 +3 -0
  10. flax_model.msgpack +1 -1
  11. run.sh +3 -3
  12. run_mlm_flax_no_accum.py +5 -3
  13. wandb/debug-internal.log +1 -1
  14. wandb/debug.log +1 -1
  15. wandb/latest-run +1 -1
  16. wandb/run-20210715_152938-8qznp93p/files/output.log +32 -0
  17. wandb/run-20210715_152938-8qznp93p/files/wandb-summary.json +1 -1
  18. wandb/run-20210715_152938-8qznp93p/logs/debug-internal.log +49 -0
  19. wandb/run-20210715_152938-8qznp93p/logs/debug.log +2 -0
  20. wandb/run-20210715_152938-8qznp93p/run-8qznp93p.wandb +0 -0
  21. wandb/run-20210715_165555-25rtfw59/files/config.yaml +301 -0
  22. wandb/run-20210715_165555-25rtfw59/files/output.log +48 -0
  23. wandb/run-20210715_165555-25rtfw59/files/requirements.txt +94 -0
  24. wandb/run-20210715_165555-25rtfw59/files/wandb-metadata.json +45 -0
  25. wandb/run-20210715_165555-25rtfw59/files/wandb-summary.json +1 -0
  26. wandb/run-20210715_165555-25rtfw59/logs/debug-internal.log +236 -0
  27. wandb/run-20210715_165555-25rtfw59/logs/debug.log +27 -0
  28. wandb/run-20210715_165555-25rtfw59/run-25rtfw59.wandb +0 -0
  29. wandb/run-20210715_171007-1mu5szt1/files/config.yaml +301 -0
  30. wandb/run-20210715_171007-1mu5szt1/files/output.log +307 -0
  31. wandb/run-20210715_171007-1mu5szt1/files/requirements.txt +94 -0
  32. wandb/run-20210715_171007-1mu5szt1/files/wandb-metadata.json +45 -0
  33. wandb/run-20210715_171007-1mu5szt1/files/wandb-summary.json +1 -0
  34. wandb/run-20210715_171007-1mu5szt1/logs/debug-internal.log +0 -0
  35. wandb/run-20210715_171007-1mu5szt1/logs/debug.log +27 -0
  36. wandb/run-20210715_171007-1mu5szt1/run-1mu5szt1.wandb +0 -0
  37. wandb/run-20210715_174147-3nkn7hxg/files/config.yaml +304 -0
  38. wandb/run-20210715_174147-3nkn7hxg/files/output.log +14 -0
  39. wandb/run-20210715_174147-3nkn7hxg/files/requirements.txt +94 -0
  40. wandb/run-20210715_174147-3nkn7hxg/files/wandb-metadata.json +45 -0
  41. wandb/run-20210715_174147-3nkn7hxg/files/wandb-summary.json +1 -0
  42. wandb/run-20210715_174147-3nkn7hxg/logs/debug-internal.log +195 -0
  43. wandb/run-20210715_174147-3nkn7hxg/logs/debug.log +119 -0
  44. wandb/run-20210715_174147-3nkn7hxg/run-3nkn7hxg.wandb +0 -0
  45. wandb/run-20210715_175147-3lygnexi/files/config.yaml +301 -0
  46. wandb/run-20210715_175147-3lygnexi/files/output.log +26 -0
  47. wandb/run-20210715_175147-3lygnexi/files/requirements.txt +94 -0
  48. wandb/run-20210715_175147-3lygnexi/files/wandb-metadata.json +45 -0
  49. wandb/run-20210715_175147-3lygnexi/files/wandb-summary.json +1 -0
  50. wandb/run-20210715_175147-3lygnexi/logs/debug-internal.log +170 -0
checkpoint_105000 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dd3729e2e9d09e233e2f4dfecac9dc1888f374b3614c8a092c1bc958fdab2ccf
3
+ size 1530270447
events.out.tfevents.1626362977.t1v-n-f5c06ea1-w-0.707091.3.v2 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:29705d778db21abb9e80a3472189629d86cd8247cacda98d58b20a30bd684e63
3
- size 1484145
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff143b1f5efcedbe6cb99342667f8e6e2c855c4b038a867b719689a9052d49f9
3
+ size 1491633
events.out.tfevents.1626368154.t1v-n-f5c06ea1-w-0.715071.3.v2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9278085ac62dbd79723fd51fc60c6044bcc9f3786ff81b54b11a5f5e9cdd4a8d
3
+ size 44508
events.out.tfevents.1626369005.t1v-n-f5c06ea1-w-0.717656.3.v2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:11918a96e9d622df1b0d2f1b7999d21bf87fd9b7b7c937dcecd680857aacdd68
3
+ size 437715
events.out.tfevents.1626370906.t1v-n-f5c06ea1-w-0.721922.3.v2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5eebb8a50d7001a4d23af995647a2f73df34f476a7489826b8dee64940fa1597
3
+ size 40
events.out.tfevents.1626371506.t1v-n-f5c06ea1-w-0.724375.3.v2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e90de36468e161351a0990822bb3945c86e791570c1c95ee000e62bcdd64e7d6
3
+ size 14886
events.out.tfevents.1626372294.t1v-n-f5c06ea1-w-0.727475.3.v2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b6b8f2fe86e2ccf16382846366ee00347b75c8b502be2e423a846260c6bae4f
3
+ size 40
events.out.tfevents.1626374797.t1v-n-f5c06ea1-w-0.731696.3.v2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f583930fdc45029a3ba0585014533fd8ef9a4c87a21651e280f8759447faca8f
3
+ size 37561
events.out.tfevents.1626375524.t1v-n-f5c06ea1-w-0.734136.3.v2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:42934a5a9a64204b53646673742f1b60cb101307e03f942f13e21a364e9e5ac7
3
+ size 752033
flax_model.msgpack CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b89d2017401b78b9121ca751668fd0246d3593be3d565ee5e7b06d3829e4ec6a
3
  size 510090043
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e40ad9ce0279ffbd95b20a8774cb1469a1b0a3a27fe354529522addece034982
3
  size 510090043
run.sh CHANGED
@@ -15,14 +15,14 @@ python ./run_mlm_flax_no_accum.py \
15
  --adam_beta1="0.9" \
16
  --adam_beta2="0.98" \
17
  --logging_steps="50" \
18
- --eval_steps="3000" \
19
  --num_train_epochs="2"\
20
  --preprocessing_num_workers="96" \
21
- --save_steps="10000" \
22
  --learning_rate="3e-5" \
23
  --per_device_train_batch_size="1" \
24
  --per_device_eval_batch_size="1" \
25
- --save_total_limit="5"\
26
  --max_eval_samples="4000"\
27
  --resume_from_checkpoint="./"\
28
  #--gradient_accumulation_steps="4"\
 
15
  --adam_beta1="0.9" \
16
  --adam_beta2="0.98" \
17
  --logging_steps="50" \
18
+ --eval_steps="6000" \
19
  --num_train_epochs="2"\
20
  --preprocessing_num_workers="96" \
21
+ --save_steps="15000" \
22
  --learning_rate="3e-5" \
23
  --per_device_train_batch_size="1" \
24
  --per_device_eval_batch_size="1" \
25
+ --save_total_limit="20"\
26
  --max_eval_samples="4000"\
27
  --resume_from_checkpoint="./"\
28
  #--gradient_accumulation_steps="4"\
run_mlm_flax_no_accum.py CHANGED
@@ -689,9 +689,9 @@ if __name__ == "__main__":
689
  num_train_samples = len(tokenized_datasets["train"])
690
  train_samples_idx = jax.random.permutation(input_rng, jnp.arange(num_train_samples))
691
  train_batch_idx = generate_batch_splits(train_samples_idx, train_batch_size)
692
-
693
  # Gather the indexes for creating the batch and do a training step
694
- for step, batch_idx in enumerate(tqdm(train_batch_idx, desc="Training...", position=1,initial=resume_step)):
695
  samples = [tokenized_datasets["train"][int(idx)] for idx in batch_idx]
696
  model_inputs = data_collator(samples, pad_to_multiple_of=16)
697
 
@@ -699,8 +699,10 @@ if __name__ == "__main__":
699
  model_inputs = shard(model_inputs.data)
700
  state, train_metric, dropout_rngs = p_train_step(state, model_inputs, dropout_rngs)
701
  train_metrics.append(train_metric)
702
-
703
  cur_step = epoch * (num_train_samples // train_batch_size) + step
 
 
704
  #if cur_step < resume_step:
705
  # continue
706
 
 
689
  num_train_samples = len(tokenized_datasets["train"])
690
  train_samples_idx = jax.random.permutation(input_rng, jnp.arange(num_train_samples))
691
  train_batch_idx = generate_batch_splits(train_samples_idx, train_batch_size)
692
+ hooks = []
693
  # Gather the indexes for creating the batch and do a training step
694
+ for step, batch_idx in tqdm(enumerate(train_batch_idx,start=resume_step), desc="Training...", position=1):
695
  samples = [tokenized_datasets["train"][int(idx)] for idx in batch_idx]
696
  model_inputs = data_collator(samples, pad_to_multiple_of=16)
697
 
 
699
  model_inputs = shard(model_inputs.data)
700
  state, train_metric, dropout_rngs = p_train_step(state, model_inputs, dropout_rngs)
701
  train_metrics.append(train_metric)
702
+
703
  cur_step = epoch * (num_train_samples // train_batch_size) + step
704
+ if cur_step == resume_step:
705
+ logging.info('Initial compilation completed.')
706
  #if cur_step < resume_step:
707
  # continue
708
 
wandb/debug-internal.log CHANGED
@@ -1 +1 @@
1
- run-20210715_152938-8qznp93p/logs/debug-internal.log
 
1
+ run-20210715_185845-dq8uirtg/logs/debug-internal.log
wandb/debug.log CHANGED
@@ -1 +1 @@
1
- run-20210715_152938-8qznp93p/logs/debug.log
 
1
+ run-20210715_185845-dq8uirtg/logs/debug.log
wandb/latest-run CHANGED
@@ -1 +1 @@
1
- run-20210715_152938-8qznp93p
 
1
+ run-20210715_185845-dq8uirtg
wandb/run-20210715_152938-8qznp93p/files/output.log CHANGED
@@ -1220,3 +1220,35 @@ tcmalloc: large alloc 1530273792 bytes == 0x31850a000 @ 0x7f3586844680 0x7f3586
1220
  [16:50:06] - INFO - absl - Saved checkpoint at checkpoint_10000
1221
  [16:50:07] - INFO - huggingface_hub.repository - git version 2.25.1
1222
  git-lfs/2.9.2 (GitHub; linux amd64; go 1.13.5)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1220
  [16:50:06] - INFO - absl - Saved checkpoint at checkpoint_10000
1221
  [16:50:07] - INFO - huggingface_hub.repository - git version 2.25.1
1222
  git-lfs/2.9.2 (GitHub; linux amd64; go 1.13.5)
1223
+ [16:50:07] - DEBUG - huggingface_hub.repository - [Repository] is a valid git repo
1224
+ [16:51:22] - INFO - huggingface_hub.repository - Uploading LFS objects: 100% (5/5), 2.0 GB | 43 MB/s, done.
1225
+
1226
+
1227
+
1228
+ Training...: 28%|███████████████████████████████████████▉ | 100051/352766 [1:16:06<25:42:07, 2.73it/s]
1229
+
1230
+
1231
+
1232
+
1233
+ Step... (9000 | Loss: 2.3799679279327393, Acc: 0.5589502453804016): 0%| | 0/2 [1:22:05<?, ?it/s]
1234
+ Traceback (most recent call last):
1235
+ File "./run_mlm_flax_no_accum.py", line 712, in <module>
1236
+ write_train_metric(summary_writer, train_metrics, train_time, cur_step)
1237
+ File "./run_mlm_flax_no_accum.py", line 263, in write_train_metric
1238
+ train_metrics = get_metrics(train_metrics)
1239
+ File "/home/dat/pino/lib/python3.8/site-packages/flax/training/common_utils.py", line 52, in get_metrics
1240
+ metrics_np = jax.device_get(device_metrics)
1241
+ File "/home/dat/pino/lib/python3.8/site-packages/jax/_src/api.py", line 2337, in device_get
1242
+ return tree_map(_device_get, x)
1243
+ File "/home/dat/pino/lib/python3.8/site-packages/jax/_src/tree_util.py", line 168, in tree_map
1244
+ return treedef.unflatten(f(*xs) for xs in zip(*all_leaves))
1245
+ File "/home/dat/pino/lib/python3.8/site-packages/jax/_src/tree_util.py", line 168, in <genexpr>
1246
+ return treedef.unflatten(f(*xs) for xs in zip(*all_leaves))
1247
+ File "/home/dat/pino/lib/python3.8/site-packages/jax/_src/api.py", line 2329, in _device_get
1248
+ return copy()
1249
+ File "/home/dat/pino/lib/python3.8/site-packages/jax/interpreters/xla.py", line 1221, in copy
1250
+ return np.asarray(self)
1251
+ File "/home/dat/pino/lib/python3.8/site-packages/numpy/core/_asarray.py", line 83, in asarray
1252
+ return array(a, dtype, copy=False, order=order)
1253
+ File "/home/dat/pino/lib/python3.8/site-packages/jax/interpreters/xla.py", line 1286, in __array__
1254
+ return np.asarray(self._value, dtype=dtype)
wandb/run-20210715_152938-8qznp93p/files/wandb-summary.json CHANGED
@@ -1 +1 @@
1
- {"training_step": 10000, "learning_rate": 2.6118033929378726e-05, "train_loss": 2.5166258811950684, "_runtime": 4818, "_timestamp": 1626367796, "_step": 202, "eval_step": 9000, "eval_accuracy": 0.5589502453804016, "eval_loss": 2.3799679279327393}
 
1
+ {"training_step": 10050, "learning_rate": 2.6115878426935524e-05, "train_loss": 2.5157060623168945, "_runtime": 4924, "_timestamp": 1626367902, "_step": 203, "eval_step": 9000, "eval_accuracy": 0.5589502453804016, "eval_loss": 2.3799679279327393}
wandb/run-20210715_152938-8qznp93p/logs/debug-internal.log CHANGED
@@ -2791,3 +2791,52 @@
2791
  2021-07-15 16:50:16,636 DEBUG HandlerThread:708348 [handler.py:handle_request():124] handle_request: stop_status
2792
  2021-07-15 16:50:16,637 DEBUG SenderThread:708348 [sender.py:send_request():193] send_request: stop_status
2793
  2021-07-15 16:50:20,859 DEBUG SenderThread:708348 [sender.py:send():179] send: stats
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2791
  2021-07-15 16:50:16,636 DEBUG HandlerThread:708348 [handler.py:handle_request():124] handle_request: stop_status
2792
  2021-07-15 16:50:16,637 DEBUG SenderThread:708348 [sender.py:send_request():193] send_request: stop_status
2793
  2021-07-15 16:50:20,859 DEBUG SenderThread:708348 [sender.py:send():179] send: stats
2794
+ 2021-07-15 16:50:31,769 DEBUG HandlerThread:708348 [handler.py:handle_request():124] handle_request: stop_status
2795
+ 2021-07-15 16:50:31,770 DEBUG SenderThread:708348 [sender.py:send_request():193] send_request: stop_status
2796
+ 2021-07-15 16:50:46,904 DEBUG HandlerThread:708348 [handler.py:handle_request():124] handle_request: stop_status
2797
+ 2021-07-15 16:50:46,904 DEBUG SenderThread:708348 [sender.py:send_request():193] send_request: stop_status
2798
+ 2021-07-15 16:50:50,940 DEBUG SenderThread:708348 [sender.py:send():179] send: stats
2799
+ 2021-07-15 16:51:02,034 DEBUG HandlerThread:708348 [handler.py:handle_request():124] handle_request: stop_status
2800
+ 2021-07-15 16:51:02,035 DEBUG SenderThread:708348 [sender.py:send_request():193] send_request: stop_status
2801
+ 2021-07-15 16:51:17,167 DEBUG HandlerThread:708348 [handler.py:handle_request():124] handle_request: stop_status
2802
+ 2021-07-15 16:51:17,167 DEBUG SenderThread:708348 [sender.py:send_request():193] send_request: stop_status
2803
+ 2021-07-15 16:51:21,017 DEBUG SenderThread:708348 [sender.py:send():179] send: stats
2804
+ 2021-07-15 16:51:23,329 INFO Thread-8 :708348 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_152938-8qznp93p/files/output.log
2805
+ 2021-07-15 16:51:25,330 INFO Thread-8 :708348 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_152938-8qznp93p/files/output.log
2806
+ 2021-07-15 16:51:27,330 INFO Thread-8 :708348 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_152938-8qznp93p/files/output.log
2807
+ 2021-07-15 16:51:29,331 INFO Thread-8 :708348 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_152938-8qznp93p/files/output.log
2808
+ 2021-07-15 16:51:31,332 INFO Thread-8 :708348 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_152938-8qznp93p/files/output.log
2809
+ 2021-07-15 16:51:32,298 DEBUG HandlerThread:708348 [handler.py:handle_request():124] handle_request: stop_status
2810
+ 2021-07-15 16:51:32,298 DEBUG SenderThread:708348 [sender.py:send_request():193] send_request: stop_status
2811
+ 2021-07-15 16:51:42,885 DEBUG SenderThread:708348 [sender.py:send():179] send: history
2812
+ 2021-07-15 16:51:42,886 DEBUG SenderThread:708348 [sender.py:send():179] send: summary
2813
+ 2021-07-15 16:51:42,886 INFO SenderThread:708348 [sender.py:_save_file():841] saving file wandb-summary.json with policy end
2814
+ 2021-07-15 16:51:43,337 INFO Thread-8 :708348 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_152938-8qznp93p/files/wandb-summary.json
2815
+ 2021-07-15 16:51:45,338 INFO Thread-8 :708348 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_152938-8qznp93p/files/output.log
2816
+ 2021-07-15 16:51:47,339 INFO Thread-8 :708348 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_152938-8qznp93p/files/output.log
2817
+ 2021-07-15 16:51:47,437 DEBUG HandlerThread:708348 [handler.py:handle_request():124] handle_request: stop_status
2818
+ 2021-07-15 16:51:47,438 DEBUG SenderThread:708348 [sender.py:send_request():193] send_request: stop_status
2819
+ 2021-07-15 16:51:49,339 INFO Thread-8 :708348 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_152938-8qznp93p/files/output.log
2820
+ 2021-07-15 16:51:51,095 DEBUG SenderThread:708348 [sender.py:send():179] send: stats
2821
+ 2021-07-15 16:51:51,340 INFO Thread-8 :708348 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_152938-8qznp93p/files/output.log
2822
+ 2021-07-15 16:52:01,993 WARNING MainThread:708348 [internal.py:wandb_internal():147] Internal process interrupt: 1
2823
+ 2021-07-15 16:52:03,090 WARNING MainThread:708348 [internal.py:wandb_internal():147] Internal process interrupt: 2
2824
+ 2021-07-15 16:52:03,091 ERROR MainThread:708348 [internal.py:wandb_internal():150] Internal process interrupted.
2825
+ 2021-07-15 16:52:03,345 INFO Thread-8 :708348 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_152938-8qznp93p/files/output.log
2826
+ 2021-07-15 16:52:03,763 INFO SenderThread:708348 [sender.py:finish():945] shutting down sender
2827
+ 2021-07-15 16:52:03,763 INFO WriterThread:708348 [datastore.py:close():288] close: /home/dat/pino-roberta-base/wandb/run-20210715_152938-8qznp93p/run-8qznp93p.wandb
2828
+ 2021-07-15 16:52:03,763 INFO SenderThread:708348 [dir_watcher.py:finish():282] shutting down directory watcher
2829
+ 2021-07-15 16:52:03,764 INFO HandlerThread:708348 [handler.py:finish():638] shutting down handler
2830
+ 2021-07-15 16:52:04,346 INFO SenderThread:708348 [dir_watcher.py:finish():312] scan: /home/dat/pino-roberta-base/wandb/run-20210715_152938-8qznp93p/files
2831
+ 2021-07-15 16:52:04,346 INFO SenderThread:708348 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_152938-8qznp93p/files/requirements.txt requirements.txt
2832
+ 2021-07-15 16:52:04,346 INFO SenderThread:708348 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_152938-8qznp93p/files/output.log output.log
2833
+ 2021-07-15 16:52:04,346 INFO SenderThread:708348 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_152938-8qznp93p/files/wandb-metadata.json wandb-metadata.json
2834
+ 2021-07-15 16:52:04,346 INFO SenderThread:708348 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_152938-8qznp93p/files/config.yaml config.yaml
2835
+ 2021-07-15 16:52:04,346 INFO SenderThread:708348 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_152938-8qznp93p/files/wandb-summary.json wandb-summary.json
2836
+ 2021-07-15 16:52:04,347 INFO SenderThread:708348 [file_pusher.py:finish():177] shutting down file pusher
2837
+ 2021-07-15 16:52:04,347 INFO SenderThread:708348 [file_pusher.py:join():182] waiting for file pusher
2838
+ 2021-07-15 16:52:04,799 INFO Thread-15 :708348 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210715_152938-8qznp93p/files/wandb-summary.json
2839
+ 2021-07-15 16:52:04,811 INFO Thread-14 :708348 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210715_152938-8qznp93p/files/config.yaml
2840
+ 2021-07-15 16:52:04,820 INFO Thread-13 :708348 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210715_152938-8qznp93p/files/output.log
2841
+ 2021-07-15 16:52:04,835 INFO Thread-12 :708348 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210715_152938-8qznp93p/files/requirements.txt
2842
+ 2021-07-15 16:52:05,617 INFO MainThread:708348 [internal.py:handle_exit():78] Internal process exited
wandb/run-20210715_152938-8qznp93p/logs/debug.log CHANGED
@@ -23,3 +23,5 @@ config: {}
23
  2021-07-15 15:29:40,498 INFO MainThread:707091 [wandb_run.py:_config_callback():872] config_cb None None {'output_dir': './', 'overwrite_output_dir': True, 'do_train': False, 'do_eval': False, 'do_predict': False, 'evaluation_strategy': 'IntervalStrategy.NO', 'prediction_loss_only': False, 'per_device_train_batch_size': 1, 'per_device_eval_batch_size': 1, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'learning_rate': 3e-05, 'weight_decay': 0.0095, 'adam_beta1': 0.9, 'adam_beta2': 0.98, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 2.0, 'max_steps': -1, 'lr_scheduler_type': 'SchedulerType.LINEAR', 'warmup_ratio': 0.0, 'warmup_steps': 10000, 'log_level': -1, 'log_level_replica': -1, 'log_on_each_node': True, 'logging_dir': './runs/Jul15_15-29-30_t1v-n-f5c06ea1-w-0', 'logging_strategy': 'IntervalStrategy.STEPS', 'logging_first_step': False, 'logging_steps': 50, 'save_strategy': 'IntervalStrategy.STEPS', 'save_steps': 10000, 'save_total_limit': 5, 'save_on_each_node': False, 'no_cuda': False, 'seed': 42, 'fp16': False, 'fp16_opt_level': 'O1', 'fp16_backend': 'auto', 'fp16_full_eval': False, 'local_rank': -1, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 3000, 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': './', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'sharded_ddp': [], 'deepspeed': None, 'label_smoothing_factor': 0.0, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'dataloader_pin_memory': True, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': './', 'push_to_hub_model_id': '', 'push_to_hub_organization': None, 'push_to_hub_token': None, 'mp_parameters': ''}
24
  2021-07-15 15:29:40,500 INFO MainThread:707091 [wandb_run.py:_config_callback():872] config_cb None None {'model_name_or_path': None, 'model_type': 'big_bird', 'config_name': './', 'tokenizer_name': './', 'cache_dir': None, 'use_fast_tokenizer': True, 'dtype': 'float32'}
25
  2021-07-15 15:29:40,501 INFO MainThread:707091 [wandb_run.py:_config_callback():872] config_cb None None {'dataset_name': None, 'dataset_config_name': None, 'train_ref_file': None, 'validation_ref_file': None, 'overwrite_cache': False, 'validation_split_percentage': 5, 'max_seq_length': 4096, 'preprocessing_num_workers': 96, 'mlm_probability': 0.15, 'pad_to_max_length': False, 'line_by_line': False, 'max_eval_samples': 4000}
 
 
 
23
  2021-07-15 15:29:40,498 INFO MainThread:707091 [wandb_run.py:_config_callback():872] config_cb None None {'output_dir': './', 'overwrite_output_dir': True, 'do_train': False, 'do_eval': False, 'do_predict': False, 'evaluation_strategy': 'IntervalStrategy.NO', 'prediction_loss_only': False, 'per_device_train_batch_size': 1, 'per_device_eval_batch_size': 1, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'learning_rate': 3e-05, 'weight_decay': 0.0095, 'adam_beta1': 0.9, 'adam_beta2': 0.98, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 2.0, 'max_steps': -1, 'lr_scheduler_type': 'SchedulerType.LINEAR', 'warmup_ratio': 0.0, 'warmup_steps': 10000, 'log_level': -1, 'log_level_replica': -1, 'log_on_each_node': True, 'logging_dir': './runs/Jul15_15-29-30_t1v-n-f5c06ea1-w-0', 'logging_strategy': 'IntervalStrategy.STEPS', 'logging_first_step': False, 'logging_steps': 50, 'save_strategy': 'IntervalStrategy.STEPS', 'save_steps': 10000, 'save_total_limit': 5, 'save_on_each_node': False, 'no_cuda': False, 'seed': 42, 'fp16': False, 'fp16_opt_level': 'O1', 'fp16_backend': 'auto', 'fp16_full_eval': False, 'local_rank': -1, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 3000, 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': './', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'sharded_ddp': [], 'deepspeed': None, 'label_smoothing_factor': 0.0, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'dataloader_pin_memory': True, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': './', 'push_to_hub_model_id': '', 'push_to_hub_organization': None, 'push_to_hub_token': None, 'mp_parameters': ''}
24
  2021-07-15 15:29:40,500 INFO MainThread:707091 [wandb_run.py:_config_callback():872] config_cb None None {'model_name_or_path': None, 'model_type': 'big_bird', 'config_name': './', 'tokenizer_name': './', 'cache_dir': None, 'use_fast_tokenizer': True, 'dtype': 'float32'}
25
  2021-07-15 15:29:40,501 INFO MainThread:707091 [wandb_run.py:_config_callback():872] config_cb None None {'dataset_name': None, 'dataset_config_name': None, 'train_ref_file': None, 'validation_ref_file': None, 'overwrite_cache': False, 'validation_split_percentage': 5, 'max_seq_length': 4096, 'preprocessing_num_workers': 96, 'mlm_probability': 0.15, 'pad_to_max_length': False, 'line_by_line': False, 'max_eval_samples': 4000}
26
+ 2021-07-15 16:52:02,189 INFO MainThread:707091 [wandb_run.py:_atexit_cleanup():1593] got exitcode: 255
27
+ 2021-07-15 16:52:02,189 INFO MainThread:707091 [wandb_run.py:_restore():1565] restore
wandb/run-20210715_152938-8qznp93p/run-8qznp93p.wandb CHANGED
Binary files a/wandb/run-20210715_152938-8qznp93p/run-8qznp93p.wandb and b/wandb/run-20210715_152938-8qznp93p/run-8qznp93p.wandb differ
 
wandb/run-20210715_165555-25rtfw59/files/config.yaml ADDED
@@ -0,0 +1,301 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ wandb_version: 1
2
+
3
+ _wandb:
4
+ desc: null
5
+ value:
6
+ cli_version: 0.10.33
7
+ framework: huggingface
8
+ huggingface_version: 4.9.0.dev0
9
+ is_jupyter_run: false
10
+ is_kaggle_kernel: false
11
+ python_version: 3.8.10
12
+ t:
13
+ 1:
14
+ - 3
15
+ - 11
16
+ 4: 3.8.10
17
+ 5: 0.10.33
18
+ 6: 4.9.0.dev0
19
+ 8:
20
+ - 5
21
+ adafactor:
22
+ desc: null
23
+ value: false
24
+ adam_beta1:
25
+ desc: null
26
+ value: 0.9
27
+ adam_beta2:
28
+ desc: null
29
+ value: 0.98
30
+ adam_epsilon:
31
+ desc: null
32
+ value: 1.0e-08
33
+ cache_dir:
34
+ desc: null
35
+ value: null
36
+ config_name:
37
+ desc: null
38
+ value: ./
39
+ dataloader_drop_last:
40
+ desc: null
41
+ value: false
42
+ dataloader_num_workers:
43
+ desc: null
44
+ value: 0
45
+ dataloader_pin_memory:
46
+ desc: null
47
+ value: true
48
+ dataset_config_name:
49
+ desc: null
50
+ value: null
51
+ dataset_name:
52
+ desc: null
53
+ value: null
54
+ ddp_find_unused_parameters:
55
+ desc: null
56
+ value: null
57
+ debug:
58
+ desc: null
59
+ value: []
60
+ deepspeed:
61
+ desc: null
62
+ value: null
63
+ disable_tqdm:
64
+ desc: null
65
+ value: false
66
+ do_eval:
67
+ desc: null
68
+ value: false
69
+ do_predict:
70
+ desc: null
71
+ value: false
72
+ do_train:
73
+ desc: null
74
+ value: false
75
+ dtype:
76
+ desc: null
77
+ value: float32
78
+ eval_accumulation_steps:
79
+ desc: null
80
+ value: null
81
+ eval_steps:
82
+ desc: null
83
+ value: 6000
84
+ evaluation_strategy:
85
+ desc: null
86
+ value: IntervalStrategy.NO
87
+ fp16:
88
+ desc: null
89
+ value: false
90
+ fp16_backend:
91
+ desc: null
92
+ value: auto
93
+ fp16_full_eval:
94
+ desc: null
95
+ value: false
96
+ fp16_opt_level:
97
+ desc: null
98
+ value: O1
99
+ gradient_accumulation_steps:
100
+ desc: null
101
+ value: 1
102
+ greater_is_better:
103
+ desc: null
104
+ value: null
105
+ group_by_length:
106
+ desc: null
107
+ value: false
108
+ ignore_data_skip:
109
+ desc: null
110
+ value: false
111
+ label_names:
112
+ desc: null
113
+ value: null
114
+ label_smoothing_factor:
115
+ desc: null
116
+ value: 0.0
117
+ learning_rate:
118
+ desc: null
119
+ value: 3.0e-05
120
+ length_column_name:
121
+ desc: null
122
+ value: length
123
+ line_by_line:
124
+ desc: null
125
+ value: false
126
+ load_best_model_at_end:
127
+ desc: null
128
+ value: false
129
+ local_rank:
130
+ desc: null
131
+ value: -1
132
+ log_level:
133
+ desc: null
134
+ value: -1
135
+ log_level_replica:
136
+ desc: null
137
+ value: -1
138
+ log_on_each_node:
139
+ desc: null
140
+ value: true
141
+ logging_dir:
142
+ desc: null
143
+ value: ./runs/Jul15_16-55-47_t1v-n-f5c06ea1-w-0
144
+ logging_first_step:
145
+ desc: null
146
+ value: false
147
+ logging_steps:
148
+ desc: null
149
+ value: 50
150
+ logging_strategy:
151
+ desc: null
152
+ value: IntervalStrategy.STEPS
153
+ lr_scheduler_type:
154
+ desc: null
155
+ value: SchedulerType.LINEAR
156
+ max_eval_samples:
157
+ desc: null
158
+ value: 4000
159
+ max_grad_norm:
160
+ desc: null
161
+ value: 1.0
162
+ max_seq_length:
163
+ desc: null
164
+ value: 4096
165
+ max_steps:
166
+ desc: null
167
+ value: -1
168
+ metric_for_best_model:
169
+ desc: null
170
+ value: null
171
+ mlm_probability:
172
+ desc: null
173
+ value: 0.15
174
+ model_name_or_path:
175
+ desc: null
176
+ value: null
177
+ model_type:
178
+ desc: null
179
+ value: big_bird
180
+ mp_parameters:
181
+ desc: null
182
+ value: ''
183
+ no_cuda:
184
+ desc: null
185
+ value: false
186
+ num_train_epochs:
187
+ desc: null
188
+ value: 2.0
189
+ output_dir:
190
+ desc: null
191
+ value: ./
192
+ overwrite_cache:
193
+ desc: null
194
+ value: false
195
+ overwrite_output_dir:
196
+ desc: null
197
+ value: true
198
+ pad_to_max_length:
199
+ desc: null
200
+ value: false
201
+ past_index:
202
+ desc: null
203
+ value: -1
204
+ per_device_eval_batch_size:
205
+ desc: null
206
+ value: 1
207
+ per_device_train_batch_size:
208
+ desc: null
209
+ value: 1
210
+ per_gpu_eval_batch_size:
211
+ desc: null
212
+ value: null
213
+ per_gpu_train_batch_size:
214
+ desc: null
215
+ value: null
216
+ prediction_loss_only:
217
+ desc: null
218
+ value: false
219
+ preprocessing_num_workers:
220
+ desc: null
221
+ value: 96
222
+ push_to_hub:
223
+ desc: null
224
+ value: true
225
+ push_to_hub_model_id:
226
+ desc: null
227
+ value: ''
228
+ push_to_hub_organization:
229
+ desc: null
230
+ value: null
231
+ push_to_hub_token:
232
+ desc: null
233
+ value: null
234
+ remove_unused_columns:
235
+ desc: null
236
+ value: true
237
+ report_to:
238
+ desc: null
239
+ value:
240
+ - tensorboard
241
+ - wandb
242
+ resume_from_checkpoint:
243
+ desc: null
244
+ value: ./
245
+ run_name:
246
+ desc: null
247
+ value: ./
248
+ save_on_each_node:
249
+ desc: null
250
+ value: false
251
+ save_steps:
252
+ desc: null
253
+ value: 15000
254
+ save_strategy:
255
+ desc: null
256
+ value: IntervalStrategy.STEPS
257
+ save_total_limit:
258
+ desc: null
259
+ value: 20
260
+ seed:
261
+ desc: null
262
+ value: 42
263
+ sharded_ddp:
264
+ desc: null
265
+ value: []
266
+ skip_memory_metrics:
267
+ desc: null
268
+ value: true
269
+ tokenizer_name:
270
+ desc: null
271
+ value: ./
272
+ tpu_metrics_debug:
273
+ desc: null
274
+ value: false
275
+ tpu_num_cores:
276
+ desc: null
277
+ value: null
278
+ train_ref_file:
279
+ desc: null
280
+ value: null
281
+ use_fast_tokenizer:
282
+ desc: null
283
+ value: true
284
+ use_legacy_prediction_loop:
285
+ desc: null
286
+ value: false
287
+ validation_ref_file:
288
+ desc: null
289
+ value: null
290
+ validation_split_percentage:
291
+ desc: null
292
+ value: 5
293
+ warmup_ratio:
294
+ desc: null
295
+ value: 0.0
296
+ warmup_steps:
297
+ desc: null
298
+ value: 10000
299
+ weight_decay:
300
+ desc: null
301
+ value: 0.0095
wandb/run-20210715_165555-25rtfw59/files/output.log ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [16:56:11] - INFO - absl - Restoring checkpoint from ./checkpoint_10000
2
+ tcmalloc: large alloc 1530273792 bytes == 0x9b046000 @ 0x7f018a0fa680 0x7f018a11b824 0x5b9a14 0x50b2ae 0x50cb1b 0x5a6f17 0x5f3010 0x56fd36 0x568d9a 0x5f5b33 0x56aadf 0x568d9a 0x68cdc7 0x67e161 0x67e1df 0x67e281 0x67e627 0x6b6e62 0x6b71ed 0x7f0189f0f0b3 0x5f96de
3
+ /home/dat/pino/lib/python3.8/site-packages/jax/lib/xla_bridge.py:386: UserWarning: jax.host_count has been renamed to jax.process_count. This alias will eventually be removed; please update your code.
4
+ warnings.warn(
5
+ /home/dat/pino/lib/python3.8/site-packages/jax/lib/xla_bridge.py:373: UserWarning: jax.host_id has been renamed to jax.process_index. This alias will eventually be removed; please update your code.
6
+ warnings.warn(
7
+ Epoch ... (1/2): 0%| | 0/2 [00:00<?, ?it/s]
8
+
9
+
10
+
11
+
12
+
13
+
14
+
15
+ Training...: 28%|██████████████████████████████████████▌ | 100059/352766 [04:37<64:29:15, 1.09it/s]
16
+
17
+
18
+
19
+
20
+ Training...: 28%|██████████████████████████████████████▌ | 100102/352766 [04:43<26:15:09, 2.67it/s]
21
+
22
+
23
+
24
+
25
+ Training...: 28%|██████████████████████████████████████▌ | 100152/352766 [05:03<26:05:11, 2.69it/s]
26
+
27
+
28
+
29
+
30
+
31
+ Training...: 28%|██████████████████████████████████████▋ | 100202/352766 [05:23<26:12:27, 2.68it/s]
32
+
33
+
34
+
35
+
36
+
37
+ Training...: 28%|██████████████████████████████████████▋ | 100252/352766 [05:43<25:55:44, 2.71it/s]
38
+
39
+
40
+
41
+
42
+
43
+ Training...: 28%|██████████████████████████████████████▋ | 100302/352766 [06:03<26:14:51, 2.67it/s]
44
+
45
+
46
+
47
+
48
+
wandb/run-20210715_165555-25rtfw59/files/requirements.txt ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ absl-py==0.13.0
2
+ aiohttp==3.7.4.post0
3
+ astunparse==1.6.3
4
+ async-timeout==3.0.1
5
+ attrs==21.2.0
6
+ cachetools==4.2.2
7
+ certifi==2021.5.30
8
+ chardet==4.0.0
9
+ charset-normalizer==2.0.1
10
+ chex==0.0.8
11
+ click==8.0.1
12
+ configparser==5.0.2
13
+ cycler==0.10.0
14
+ datasets==1.9.1.dev0
15
+ dill==0.3.4
16
+ dm-tree==0.1.6
17
+ docker-pycreds==0.4.0
18
+ filelock==3.0.12
19
+ flatbuffers==1.12
20
+ flax==0.3.4
21
+ fsspec==2021.7.0
22
+ gast==0.4.0
23
+ gitdb==4.0.7
24
+ gitpython==3.1.18
25
+ google-auth-oauthlib==0.4.4
26
+ google-auth==1.32.1
27
+ google-pasta==0.2.0
28
+ grpcio==1.34.1
29
+ h5py==3.1.0
30
+ huggingface-hub==0.0.12
31
+ idna==3.2
32
+ install==1.3.4
33
+ jax==0.2.17
34
+ jaxlib==0.1.68
35
+ joblib==1.0.1
36
+ keras-nightly==2.5.0.dev2021032900
37
+ keras-preprocessing==1.1.2
38
+ kiwisolver==1.3.1
39
+ libtpu-nightly==0.1.dev20210615
40
+ markdown==3.3.4
41
+ matplotlib==3.4.2
42
+ msgpack==1.0.2
43
+ multidict==5.1.0
44
+ multiprocess==0.70.12.2
45
+ numpy==1.19.5
46
+ oauthlib==3.1.1
47
+ opt-einsum==3.3.0
48
+ optax==0.0.9
49
+ packaging==21.0
50
+ pandas==1.3.0
51
+ pathtools==0.1.2
52
+ pillow==8.3.1
53
+ pip==20.0.2
54
+ pkg-resources==0.0.0
55
+ promise==2.3
56
+ protobuf==3.17.3
57
+ psutil==5.8.0
58
+ pyarrow==4.0.1
59
+ pyasn1-modules==0.2.8
60
+ pyasn1==0.4.8
61
+ pyparsing==2.4.7
62
+ python-dateutil==2.8.1
63
+ pytz==2021.1
64
+ pyyaml==5.4.1
65
+ regex==2021.7.6
66
+ requests-oauthlib==1.3.0
67
+ requests==2.26.0
68
+ rsa==4.7.2
69
+ sacremoses==0.0.45
70
+ scipy==1.7.0
71
+ sentry-sdk==1.3.0
72
+ setuptools==44.0.0
73
+ shortuuid==1.0.1
74
+ six==1.15.0
75
+ smmap==4.0.0
76
+ subprocess32==3.5.4
77
+ tensorboard-data-server==0.6.1
78
+ tensorboard-plugin-wit==1.8.0
79
+ tensorboard==2.5.0
80
+ tensorflow-estimator==2.5.0
81
+ tensorflow==2.5.0
82
+ termcolor==1.1.0
83
+ tokenizers==0.10.3
84
+ toolz==0.11.1
85
+ tqdm==4.61.2
86
+ transformers==4.9.0.dev0
87
+ typing-extensions==3.7.4.3
88
+ urllib3==1.26.6
89
+ wandb==0.10.33
90
+ werkzeug==2.0.1
91
+ wheel==0.36.2
92
+ wrapt==1.12.1
93
+ xxhash==2.0.2
94
+ yarl==1.6.3
wandb/run-20210715_165555-25rtfw59/files/wandb-metadata.json ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-5.4.0-1043-gcp-x86_64-with-glibc2.29",
3
+ "python": "3.8.10",
4
+ "heartbeatAt": "2021-07-15T16:55:57.916568",
5
+ "startedAt": "2021-07-15T16:55:55.783375",
6
+ "docker": null,
7
+ "cpu_count": 96,
8
+ "cuda": null,
9
+ "args": [
10
+ "--push_to_hub",
11
+ "--output_dir=./",
12
+ "--model_type=big_bird",
13
+ "--config_name=./",
14
+ "--tokenizer_name=./",
15
+ "--max_seq_length=4096",
16
+ "--weight_decay=0.0095",
17
+ "--warmup_steps=10000",
18
+ "--overwrite_output_dir",
19
+ "--adam_beta1=0.9",
20
+ "--adam_beta2=0.98",
21
+ "--logging_steps=50",
22
+ "--eval_steps=6000",
23
+ "--num_train_epochs=2",
24
+ "--preprocessing_num_workers=96",
25
+ "--save_steps=15000",
26
+ "--learning_rate=3e-5",
27
+ "--per_device_train_batch_size=1",
28
+ "--per_device_eval_batch_size=1",
29
+ "--save_total_limit=20",
30
+ "--max_eval_samples=4000",
31
+ "--resume_from_checkpoint=./"
32
+ ],
33
+ "state": "running",
34
+ "program": "./run_mlm_flax_no_accum.py",
35
+ "codePath": "run_mlm_flax_no_accum.py",
36
+ "git": {
37
+ "remote": "https://huggingface.co/flax-community/pino-roberta-base",
38
+ "commit": "cc569aecf5e26454416d7a13c7876ad9111120cf"
39
+ },
40
+ "email": null,
41
+ "root": "/home/dat/pino-roberta-base",
42
+ "host": "t1v-n-f5c06ea1-w-0",
43
+ "username": "dat",
44
+ "executable": "/home/dat/pino/bin/python"
45
+ }
wandb/run-20210715_165555-25rtfw59/files/wandb-summary.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"training_step": 300, "learning_rate": 2.6105053620995022e-05, "train_loss": 2.366305351257324, "_runtime": 722, "_timestamp": 1626368877, "_step": 5}
wandb/run-20210715_165555-25rtfw59/logs/debug-internal.log ADDED
@@ -0,0 +1,236 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2021-07-15 16:55:56,515 INFO MainThread:716328 [internal.py:wandb_internal():88] W&B internal server running at pid: 716328, started at: 2021-07-15 16:55:56.514842
2
+ 2021-07-15 16:55:56,517 DEBUG HandlerThread:716328 [handler.py:handle_request():124] handle_request: check_version
3
+ 2021-07-15 16:55:56,517 INFO WriterThread:716328 [datastore.py:open_for_write():80] open: /home/dat/pino-roberta-base/wandb/run-20210715_165555-25rtfw59/run-25rtfw59.wandb
4
+ 2021-07-15 16:55:56,518 DEBUG SenderThread:716328 [sender.py:send():179] send: header
5
+ 2021-07-15 16:55:56,518 DEBUG SenderThread:716328 [sender.py:send_request():193] send_request: check_version
6
+ 2021-07-15 16:55:56,561 DEBUG SenderThread:716328 [sender.py:send():179] send: run
7
+ 2021-07-15 16:55:56,732 INFO SenderThread:716328 [dir_watcher.py:__init__():168] watching files in: /home/dat/pino-roberta-base/wandb/run-20210715_165555-25rtfw59/files
8
+ 2021-07-15 16:55:56,732 INFO SenderThread:716328 [sender.py:_start_run_threads():716] run started: 25rtfw59 with start time 1626368155
9
+ 2021-07-15 16:55:56,732 DEBUG SenderThread:716328 [sender.py:send():179] send: summary
10
+ 2021-07-15 16:55:56,733 INFO SenderThread:716328 [sender.py:_save_file():841] saving file wandb-summary.json with policy end
11
+ 2021-07-15 16:55:56,733 DEBUG HandlerThread:716328 [handler.py:handle_request():124] handle_request: run_start
12
+ 2021-07-15 16:55:57,738 INFO Thread-8 :716328 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210715_165555-25rtfw59/files/wandb-summary.json
13
+ 2021-07-15 16:55:57,916 DEBUG HandlerThread:716328 [meta.py:__init__():39] meta init
14
+ 2021-07-15 16:55:57,916 DEBUG HandlerThread:716328 [meta.py:__init__():53] meta init done
15
+ 2021-07-15 16:55:57,916 DEBUG HandlerThread:716328 [meta.py:probe():210] probe
16
+ 2021-07-15 16:55:57,917 DEBUG HandlerThread:716328 [meta.py:_setup_git():200] setup git
17
+ 2021-07-15 16:55:57,953 DEBUG HandlerThread:716328 [meta.py:_setup_git():207] setup git done
18
+ 2021-07-15 16:55:57,953 DEBUG HandlerThread:716328 [meta.py:_save_pip():57] save pip
19
+ 2021-07-15 16:55:57,953 DEBUG HandlerThread:716328 [meta.py:_save_pip():71] save pip done
20
+ 2021-07-15 16:55:57,953 DEBUG HandlerThread:716328 [meta.py:probe():252] probe done
21
+ 2021-07-15 16:55:57,957 DEBUG SenderThread:716328 [sender.py:send():179] send: files
22
+ 2021-07-15 16:55:57,957 INFO SenderThread:716328 [sender.py:_save_file():841] saving file wandb-metadata.json with policy now
23
+ 2021-07-15 16:55:57,963 DEBUG HandlerThread:716328 [handler.py:handle_request():124] handle_request: stop_status
24
+ 2021-07-15 16:55:57,963 DEBUG SenderThread:716328 [sender.py:send_request():193] send_request: stop_status
25
+ 2021-07-15 16:55:58,096 DEBUG SenderThread:716328 [sender.py:send():179] send: config
26
+ 2021-07-15 16:55:58,097 DEBUG SenderThread:716328 [sender.py:send():179] send: config
27
+ 2021-07-15 16:55:58,097 DEBUG SenderThread:716328 [sender.py:send():179] send: config
28
+ 2021-07-15 16:55:58,393 INFO Thread-11 :716328 [upload_job.py:push():137] Uploaded file /tmp/tmpbbcxwsn2wandb/2zsx43w4-wandb-metadata.json
29
+ 2021-07-15 16:55:58,736 INFO Thread-8 :716328 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210715_165555-25rtfw59/files/output.log
30
+ 2021-07-15 16:55:58,737 INFO Thread-8 :716328 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210715_165555-25rtfw59/files/wandb-metadata.json
31
+ 2021-07-15 16:55:58,737 INFO Thread-8 :716328 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210715_165555-25rtfw59/files/requirements.txt
32
+ 2021-07-15 16:56:13,662 DEBUG HandlerThread:716328 [handler.py:handle_request():124] handle_request: stop_status
33
+ 2021-07-15 16:56:13,662 DEBUG SenderThread:716328 [sender.py:send_request():193] send_request: stop_status
34
+ 2021-07-15 16:56:14,743 INFO Thread-8 :716328 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_165555-25rtfw59/files/output.log
35
+ 2021-07-15 16:56:16,744 INFO Thread-8 :716328 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_165555-25rtfw59/files/output.log
36
+ 2021-07-15 16:56:26,001 DEBUG SenderThread:716328 [sender.py:send():179] send: stats
37
+ 2021-07-15 16:56:27,749 INFO Thread-8 :716328 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_165555-25rtfw59/files/config.yaml
38
+ 2021-07-15 16:56:28,821 DEBUG HandlerThread:716328 [handler.py:handle_request():124] handle_request: stop_status
39
+ 2021-07-15 16:56:28,821 DEBUG SenderThread:716328 [sender.py:send_request():193] send_request: stop_status
40
+ 2021-07-15 16:56:43,953 DEBUG HandlerThread:716328 [handler.py:handle_request():124] handle_request: stop_status
41
+ 2021-07-15 16:56:43,953 DEBUG SenderThread:716328 [sender.py:send_request():193] send_request: stop_status
42
+ 2021-07-15 16:56:56,082 DEBUG SenderThread:716328 [sender.py:send():179] send: stats
43
+ 2021-07-15 16:56:59,099 DEBUG HandlerThread:716328 [handler.py:handle_request():124] handle_request: stop_status
44
+ 2021-07-15 16:56:59,099 DEBUG SenderThread:716328 [sender.py:send_request():193] send_request: stop_status
45
+ 2021-07-15 16:57:14,230 DEBUG HandlerThread:716328 [handler.py:handle_request():124] handle_request: stop_status
46
+ 2021-07-15 16:57:14,230 DEBUG SenderThread:716328 [sender.py:send_request():193] send_request: stop_status
47
+ 2021-07-15 16:57:26,150 DEBUG SenderThread:716328 [sender.py:send():179] send: stats
48
+ 2021-07-15 16:57:29,366 DEBUG HandlerThread:716328 [handler.py:handle_request():124] handle_request: stop_status
49
+ 2021-07-15 16:57:29,366 DEBUG SenderThread:716328 [sender.py:send_request():193] send_request: stop_status
50
+ 2021-07-15 16:57:44,497 DEBUG HandlerThread:716328 [handler.py:handle_request():124] handle_request: stop_status
51
+ 2021-07-15 16:57:44,498 DEBUG SenderThread:716328 [sender.py:send_request():193] send_request: stop_status
52
+ 2021-07-15 16:57:56,224 DEBUG SenderThread:716328 [sender.py:send():179] send: stats
53
+ 2021-07-15 16:57:59,628 DEBUG HandlerThread:716328 [handler.py:handle_request():124] handle_request: stop_status
54
+ 2021-07-15 16:57:59,628 DEBUG SenderThread:716328 [sender.py:send_request():193] send_request: stop_status
55
+ 2021-07-15 16:58:14,771 DEBUG HandlerThread:716328 [handler.py:handle_request():124] handle_request: stop_status
56
+ 2021-07-15 16:58:14,771 DEBUG SenderThread:716328 [sender.py:send_request():193] send_request: stop_status
57
+ 2021-07-15 16:58:26,290 DEBUG SenderThread:716328 [sender.py:send():179] send: stats
58
+ 2021-07-15 16:58:29,901 DEBUG HandlerThread:716328 [handler.py:handle_request():124] handle_request: stop_status
59
+ 2021-07-15 16:58:29,901 DEBUG SenderThread:716328 [sender.py:send_request():193] send_request: stop_status
60
+ 2021-07-15 16:58:45,036 DEBUG HandlerThread:716328 [handler.py:handle_request():124] handle_request: stop_status
61
+ 2021-07-15 16:58:45,037 DEBUG SenderThread:716328 [sender.py:send_request():193] send_request: stop_status
62
+ 2021-07-15 16:58:56,353 DEBUG SenderThread:716328 [sender.py:send():179] send: stats
63
+ 2021-07-15 16:59:00,169 DEBUG HandlerThread:716328 [handler.py:handle_request():124] handle_request: stop_status
64
+ 2021-07-15 16:59:00,169 DEBUG SenderThread:716328 [sender.py:send_request():193] send_request: stop_status
65
+ 2021-07-15 16:59:15,303 DEBUG HandlerThread:716328 [handler.py:handle_request():124] handle_request: stop_status
66
+ 2021-07-15 16:59:15,303 DEBUG SenderThread:716328 [sender.py:send_request():193] send_request: stop_status
67
+ 2021-07-15 16:59:26,416 DEBUG SenderThread:716328 [sender.py:send():179] send: stats
68
+ 2021-07-15 16:59:30,437 DEBUG HandlerThread:716328 [handler.py:handle_request():124] handle_request: stop_status
69
+ 2021-07-15 16:59:30,438 DEBUG SenderThread:716328 [sender.py:send_request():193] send_request: stop_status
70
+ 2021-07-15 16:59:45,570 DEBUG HandlerThread:716328 [handler.py:handle_request():124] handle_request: stop_status
71
+ 2021-07-15 16:59:45,570 DEBUG SenderThread:716328 [sender.py:send_request():193] send_request: stop_status
72
+ 2021-07-15 16:59:56,482 DEBUG SenderThread:716328 [sender.py:send():179] send: stats
73
+ 2021-07-15 17:00:00,706 DEBUG HandlerThread:716328 [handler.py:handle_request():124] handle_request: stop_status
74
+ 2021-07-15 17:00:00,706 DEBUG SenderThread:716328 [sender.py:send_request():193] send_request: stop_status
75
+ 2021-07-15 17:00:15,845 DEBUG HandlerThread:716328 [handler.py:handle_request():124] handle_request: stop_status
76
+ 2021-07-15 17:00:15,846 DEBUG SenderThread:716328 [sender.py:send_request():193] send_request: stop_status
77
+ 2021-07-15 17:00:26,546 DEBUG SenderThread:716328 [sender.py:send():179] send: stats
78
+ 2021-07-15 17:00:30,981 DEBUG HandlerThread:716328 [handler.py:handle_request():124] handle_request: stop_status
79
+ 2021-07-15 17:00:30,982 DEBUG SenderThread:716328 [sender.py:send_request():193] send_request: stop_status
80
+ 2021-07-15 17:00:46,115 DEBUG HandlerThread:716328 [handler.py:handle_request():124] handle_request: stop_status
81
+ 2021-07-15 17:00:46,116 DEBUG SenderThread:716328 [sender.py:send_request():193] send_request: stop_status
82
+ 2021-07-15 17:00:56,610 DEBUG SenderThread:716328 [sender.py:send():179] send: stats
83
+ 2021-07-15 17:01:01,251 DEBUG HandlerThread:716328 [handler.py:handle_request():124] handle_request: stop_status
84
+ 2021-07-15 17:01:01,251 DEBUG SenderThread:716328 [sender.py:send_request():193] send_request: stop_status
85
+ 2021-07-15 17:01:16,388 DEBUG HandlerThread:716328 [handler.py:handle_request():124] handle_request: stop_status
86
+ 2021-07-15 17:01:16,388 DEBUG SenderThread:716328 [sender.py:send_request():193] send_request: stop_status
87
+ 2021-07-15 17:01:26,676 DEBUG SenderThread:716328 [sender.py:send():179] send: stats
88
+ 2021-07-15 17:01:31,521 DEBUG HandlerThread:716328 [handler.py:handle_request():124] handle_request: stop_status
89
+ 2021-07-15 17:01:31,522 DEBUG SenderThread:716328 [sender.py:send_request():193] send_request: stop_status
90
+ 2021-07-15 17:01:40,875 INFO Thread-8 :716328 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_165555-25rtfw59/files/output.log
91
+ 2021-07-15 17:01:46,668 DEBUG HandlerThread:716328 [handler.py:handle_request():124] handle_request: stop_status
92
+ 2021-07-15 17:01:46,668 DEBUG SenderThread:716328 [sender.py:send_request():193] send_request: stop_status
93
+ 2021-07-15 17:01:56,742 DEBUG SenderThread:716328 [sender.py:send():179] send: stats
94
+ 2021-07-15 17:02:01,854 DEBUG HandlerThread:716328 [handler.py:handle_request():124] handle_request: stop_status
95
+ 2021-07-15 17:02:01,855 DEBUG SenderThread:716328 [sender.py:send_request():193] send_request: stop_status
96
+ 2021-07-15 17:02:17,001 DEBUG HandlerThread:716328 [handler.py:handle_request():124] handle_request: stop_status
97
+ 2021-07-15 17:02:17,002 DEBUG SenderThread:716328 [sender.py:send_request():193] send_request: stop_status
98
+ 2021-07-15 17:02:26,812 DEBUG SenderThread:716328 [sender.py:send():179] send: stats
99
+ 2021-07-15 17:02:32,137 DEBUG HandlerThread:716328 [handler.py:handle_request():124] handle_request: stop_status
100
+ 2021-07-15 17:02:32,138 DEBUG SenderThread:716328 [sender.py:send_request():193] send_request: stop_status
101
+ 2021-07-15 17:02:47,271 DEBUG HandlerThread:716328 [handler.py:handle_request():124] handle_request: stop_status
102
+ 2021-07-15 17:02:47,272 DEBUG SenderThread:716328 [sender.py:send_request():193] send_request: stop_status
103
+ 2021-07-15 17:02:56,881 DEBUG SenderThread:716328 [sender.py:send():179] send: stats
104
+ 2021-07-15 17:03:02,406 DEBUG HandlerThread:716328 [handler.py:handle_request():124] handle_request: stop_status
105
+ 2021-07-15 17:03:02,406 DEBUG SenderThread:716328 [sender.py:send_request():193] send_request: stop_status
106
+ 2021-07-15 17:03:17,544 DEBUG HandlerThread:716328 [handler.py:handle_request():124] handle_request: stop_status
107
+ 2021-07-15 17:03:17,544 DEBUG SenderThread:716328 [sender.py:send_request():193] send_request: stop_status
108
+ 2021-07-15 17:03:26,959 DEBUG SenderThread:716328 [sender.py:send():179] send: stats
109
+ 2021-07-15 17:03:32,679 DEBUG HandlerThread:716328 [handler.py:handle_request():124] handle_request: stop_status
110
+ 2021-07-15 17:03:32,679 DEBUG SenderThread:716328 [sender.py:send_request():193] send_request: stop_status
111
+ 2021-07-15 17:03:47,834 DEBUG HandlerThread:716328 [handler.py:handle_request():124] handle_request: stop_status
112
+ 2021-07-15 17:03:47,835 DEBUG SenderThread:716328 [sender.py:send_request():193] send_request: stop_status
113
+ 2021-07-15 17:03:49,928 INFO Thread-8 :716328 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_165555-25rtfw59/files/output.log
114
+ 2021-07-15 17:03:57,035 DEBUG SenderThread:716328 [sender.py:send():179] send: stats
115
+ 2021-07-15 17:04:02,987 DEBUG HandlerThread:716328 [handler.py:handle_request():124] handle_request: stop_status
116
+ 2021-07-15 17:04:02,988 DEBUG SenderThread:716328 [sender.py:send_request():193] send_request: stop_status
117
+ 2021-07-15 17:04:18,133 DEBUG HandlerThread:716328 [handler.py:handle_request():124] handle_request: stop_status
118
+ 2021-07-15 17:04:18,133 DEBUG SenderThread:716328 [sender.py:send_request():193] send_request: stop_status
119
+ 2021-07-15 17:04:27,107 DEBUG SenderThread:716328 [sender.py:send():179] send: stats
120
+ 2021-07-15 17:04:33,265 DEBUG HandlerThread:716328 [handler.py:handle_request():124] handle_request: stop_status
121
+ 2021-07-15 17:04:33,266 DEBUG SenderThread:716328 [sender.py:send_request():193] send_request: stop_status
122
+ 2021-07-15 17:04:48,399 DEBUG HandlerThread:716328 [handler.py:handle_request():124] handle_request: stop_status
123
+ 2021-07-15 17:04:48,399 DEBUG SenderThread:716328 [sender.py:send_request():193] send_request: stop_status
124
+ 2021-07-15 17:04:57,177 DEBUG SenderThread:716328 [sender.py:send():179] send: stats
125
+ 2021-07-15 17:05:03,543 DEBUG HandlerThread:716328 [handler.py:handle_request():124] handle_request: stop_status
126
+ 2021-07-15 17:05:03,543 DEBUG SenderThread:716328 [sender.py:send_request():193] send_request: stop_status
127
+ 2021-07-15 17:05:18,688 DEBUG HandlerThread:716328 [handler.py:handle_request():124] handle_request: stop_status
128
+ 2021-07-15 17:05:18,688 DEBUG SenderThread:716328 [sender.py:send_request():193] send_request: stop_status
129
+ 2021-07-15 17:05:27,251 DEBUG SenderThread:716328 [sender.py:send():179] send: stats
130
+ 2021-07-15 17:05:33,820 DEBUG HandlerThread:716328 [handler.py:handle_request():124] handle_request: stop_status
131
+ 2021-07-15 17:05:33,820 DEBUG SenderThread:716328 [sender.py:send_request():193] send_request: stop_status
132
+ 2021-07-15 17:05:48,955 DEBUG HandlerThread:716328 [handler.py:handle_request():124] handle_request: stop_status
133
+ 2021-07-15 17:05:48,955 DEBUG SenderThread:716328 [sender.py:send_request():193] send_request: stop_status
134
+ 2021-07-15 17:05:57,351 DEBUG SenderThread:716328 [sender.py:send():179] send: stats
135
+ 2021-07-15 17:05:58,008 INFO Thread-8 :716328 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_165555-25rtfw59/files/output.log
136
+ 2021-07-15 17:06:00,008 INFO Thread-8 :716328 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_165555-25rtfw59/files/output.log
137
+ 2021-07-15 17:06:02,009 INFO Thread-8 :716328 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_165555-25rtfw59/files/output.log
138
+ 2021-07-15 17:06:04,010 INFO Thread-8 :716328 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_165555-25rtfw59/files/output.log
139
+ 2021-07-15 17:06:04,086 DEBUG HandlerThread:716328 [handler.py:handle_request():124] handle_request: stop_status
140
+ 2021-07-15 17:06:04,086 DEBUG SenderThread:716328 [sender.py:send_request():193] send_request: stop_status
141
+ 2021-07-15 17:06:06,011 INFO Thread-8 :716328 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_165555-25rtfw59/files/output.log
142
+ 2021-07-15 17:06:17,423 DEBUG SenderThread:716328 [sender.py:send():179] send: history
143
+ 2021-07-15 17:06:17,424 DEBUG SenderThread:716328 [sender.py:send():179] send: summary
144
+ 2021-07-15 17:06:17,424 INFO SenderThread:716328 [sender.py:_save_file():841] saving file wandb-summary.json with policy end
145
+ 2021-07-15 17:06:18,015 INFO Thread-8 :716328 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_165555-25rtfw59/files/output.log
146
+ 2021-07-15 17:06:18,016 INFO Thread-8 :716328 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_165555-25rtfw59/files/wandb-summary.json
147
+ 2021-07-15 17:06:19,240 DEBUG HandlerThread:716328 [handler.py:handle_request():124] handle_request: stop_status
148
+ 2021-07-15 17:06:19,241 DEBUG SenderThread:716328 [sender.py:send_request():193] send_request: stop_status
149
+ 2021-07-15 17:06:20,016 INFO Thread-8 :716328 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_165555-25rtfw59/files/output.log
150
+ 2021-07-15 17:06:22,017 INFO Thread-8 :716328 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_165555-25rtfw59/files/output.log
151
+ 2021-07-15 17:06:24,018 INFO Thread-8 :716328 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_165555-25rtfw59/files/output.log
152
+ 2021-07-15 17:06:26,018 INFO Thread-8 :716328 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_165555-25rtfw59/files/output.log
153
+ 2021-07-15 17:06:27,428 DEBUG SenderThread:716328 [sender.py:send():179] send: stats
154
+ 2021-07-15 17:06:34,378 DEBUG HandlerThread:716328 [handler.py:handle_request():124] handle_request: stop_status
155
+ 2021-07-15 17:06:34,378 DEBUG SenderThread:716328 [sender.py:send_request():193] send_request: stop_status
156
+ 2021-07-15 17:06:37,459 DEBUG SenderThread:716328 [sender.py:send():179] send: history
157
+ 2021-07-15 17:06:37,460 DEBUG SenderThread:716328 [sender.py:send():179] send: summary
158
+ 2021-07-15 17:06:37,460 INFO SenderThread:716328 [sender.py:_save_file():841] saving file wandb-summary.json with policy end
159
+ 2021-07-15 17:06:38,023 INFO Thread-8 :716328 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_165555-25rtfw59/files/wandb-summary.json
160
+ 2021-07-15 17:06:40,024 INFO Thread-8 :716328 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_165555-25rtfw59/files/output.log
161
+ 2021-07-15 17:06:42,025 INFO Thread-8 :716328 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_165555-25rtfw59/files/output.log
162
+ 2021-07-15 17:06:44,025 INFO Thread-8 :716328 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_165555-25rtfw59/files/output.log
163
+ 2021-07-15 17:06:46,026 INFO Thread-8 :716328 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_165555-25rtfw59/files/output.log
164
+ 2021-07-15 17:06:49,510 DEBUG HandlerThread:716328 [handler.py:handle_request():124] handle_request: stop_status
165
+ 2021-07-15 17:06:49,511 DEBUG SenderThread:716328 [sender.py:send_request():193] send_request: stop_status
166
+ 2021-07-15 17:06:57,494 DEBUG SenderThread:716328 [sender.py:send():179] send: history
167
+ 2021-07-15 17:06:57,494 DEBUG SenderThread:716328 [sender.py:send():179] send: summary
168
+ 2021-07-15 17:06:57,496 INFO SenderThread:716328 [sender.py:_save_file():841] saving file wandb-summary.json with policy end
169
+ 2021-07-15 17:06:57,506 DEBUG SenderThread:716328 [sender.py:send():179] send: stats
170
+ 2021-07-15 17:06:58,031 INFO Thread-8 :716328 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_165555-25rtfw59/files/output.log
171
+ 2021-07-15 17:06:58,031 INFO Thread-8 :716328 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_165555-25rtfw59/files/wandb-summary.json
172
+ 2021-07-15 17:06:59,031 INFO Thread-8 :716328 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_165555-25rtfw59/files/output.log
173
+ 2021-07-15 17:07:01,032 INFO Thread-8 :716328 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_165555-25rtfw59/files/output.log
174
+ 2021-07-15 17:07:03,033 INFO Thread-8 :716328 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_165555-25rtfw59/files/output.log
175
+ 2021-07-15 17:07:04,647 DEBUG HandlerThread:716328 [handler.py:handle_request():124] handle_request: stop_status
176
+ 2021-07-15 17:07:04,647 DEBUG SenderThread:716328 [sender.py:send_request():193] send_request: stop_status
177
+ 2021-07-15 17:07:05,034 INFO Thread-8 :716328 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_165555-25rtfw59/files/output.log
178
+ 2021-07-15 17:07:07,034 INFO Thread-8 :716328 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_165555-25rtfw59/files/output.log
179
+ 2021-07-15 17:07:17,535 DEBUG SenderThread:716328 [sender.py:send():179] send: history
180
+ 2021-07-15 17:07:17,535 DEBUG SenderThread:716328 [sender.py:send():179] send: summary
181
+ 2021-07-15 17:07:17,535 INFO SenderThread:716328 [sender.py:_save_file():841] saving file wandb-summary.json with policy end
182
+ 2021-07-15 17:07:18,039 INFO Thread-8 :716328 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_165555-25rtfw59/files/output.log
183
+ 2021-07-15 17:07:18,039 INFO Thread-8 :716328 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_165555-25rtfw59/files/wandb-summary.json
184
+ 2021-07-15 17:07:19,039 INFO Thread-8 :716328 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_165555-25rtfw59/files/output.log
185
+ 2021-07-15 17:07:19,784 DEBUG HandlerThread:716328 [handler.py:handle_request():124] handle_request: stop_status
186
+ 2021-07-15 17:07:19,784 DEBUG SenderThread:716328 [sender.py:send_request():193] send_request: stop_status
187
+ 2021-07-15 17:07:21,040 INFO Thread-8 :716328 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_165555-25rtfw59/files/output.log
188
+ 2021-07-15 17:07:23,041 INFO Thread-8 :716328 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_165555-25rtfw59/files/output.log
189
+ 2021-07-15 17:07:25,042 INFO Thread-8 :716328 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_165555-25rtfw59/files/output.log
190
+ 2021-07-15 17:07:27,042 INFO Thread-8 :716328 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_165555-25rtfw59/files/output.log
191
+ 2021-07-15 17:07:27,581 DEBUG SenderThread:716328 [sender.py:send():179] send: stats
192
+ 2021-07-15 17:07:34,923 DEBUG HandlerThread:716328 [handler.py:handle_request():124] handle_request: stop_status
193
+ 2021-07-15 17:07:34,924 DEBUG SenderThread:716328 [sender.py:send_request():193] send_request: stop_status
194
+ 2021-07-15 17:07:37,578 DEBUG SenderThread:716328 [sender.py:send():179] send: history
195
+ 2021-07-15 17:07:37,579 DEBUG SenderThread:716328 [sender.py:send():179] send: summary
196
+ 2021-07-15 17:07:37,579 INFO SenderThread:716328 [sender.py:_save_file():841] saving file wandb-summary.json with policy end
197
+ 2021-07-15 17:07:38,047 INFO Thread-8 :716328 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_165555-25rtfw59/files/wandb-summary.json
198
+ 2021-07-15 17:07:39,047 INFO Thread-8 :716328 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_165555-25rtfw59/files/output.log
199
+ 2021-07-15 17:07:41,048 INFO Thread-8 :716328 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_165555-25rtfw59/files/output.log
200
+ 2021-07-15 17:07:43,049 INFO Thread-8 :716328 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_165555-25rtfw59/files/output.log
201
+ 2021-07-15 17:07:45,049 INFO Thread-8 :716328 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_165555-25rtfw59/files/output.log
202
+ 2021-07-15 17:07:47,050 INFO Thread-8 :716328 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_165555-25rtfw59/files/output.log
203
+ 2021-07-15 17:07:50,056 DEBUG HandlerThread:716328 [handler.py:handle_request():124] handle_request: stop_status
204
+ 2021-07-15 17:07:50,056 DEBUG SenderThread:716328 [sender.py:send_request():193] send_request: stop_status
205
+ 2021-07-15 17:07:57,615 DEBUG SenderThread:716328 [sender.py:send():179] send: history
206
+ 2021-07-15 17:07:57,615 DEBUG SenderThread:716328 [sender.py:send():179] send: summary
207
+ 2021-07-15 17:07:57,617 INFO SenderThread:716328 [sender.py:_save_file():841] saving file wandb-summary.json with policy end
208
+ 2021-07-15 17:07:57,658 DEBUG SenderThread:716328 [sender.py:send():179] send: stats
209
+ 2021-07-15 17:07:58,055 INFO Thread-8 :716328 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_165555-25rtfw59/files/wandb-summary.json
210
+ 2021-07-15 17:07:59,055 INFO Thread-8 :716328 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_165555-25rtfw59/files/output.log
211
+ 2021-07-15 17:08:01,056 INFO Thread-8 :716328 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_165555-25rtfw59/files/output.log
212
+ 2021-07-15 17:08:03,057 INFO Thread-8 :716328 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_165555-25rtfw59/files/output.log
213
+ 2021-07-15 17:08:05,058 INFO Thread-8 :716328 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_165555-25rtfw59/files/output.log
214
+ 2021-07-15 17:08:05,190 DEBUG HandlerThread:716328 [handler.py:handle_request():124] handle_request: stop_status
215
+ 2021-07-15 17:08:05,191 DEBUG SenderThread:716328 [sender.py:send_request():193] send_request: stop_status
216
+ 2021-07-15 17:08:07,058 INFO Thread-8 :716328 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_165555-25rtfw59/files/output.log
217
+ 2021-07-15 17:08:09,176 WARNING MainThread:716328 [internal.py:wandb_internal():147] Internal process interrupt: 1
218
+ 2021-07-15 17:08:09,428 WARNING MainThread:716328 [internal.py:wandb_internal():147] Internal process interrupt: 2
219
+ 2021-07-15 17:08:09,428 ERROR MainThread:716328 [internal.py:wandb_internal():150] Internal process interrupted.
220
+ 2021-07-15 17:08:10,189 INFO SenderThread:716328 [sender.py:finish():945] shutting down sender
221
+ 2021-07-15 17:08:10,190 INFO WriterThread:716328 [datastore.py:close():288] close: /home/dat/pino-roberta-base/wandb/run-20210715_165555-25rtfw59/run-25rtfw59.wandb
222
+ 2021-07-15 17:08:10,190 INFO SenderThread:716328 [dir_watcher.py:finish():282] shutting down directory watcher
223
+ 2021-07-15 17:08:10,193 INFO HandlerThread:716328 [handler.py:finish():638] shutting down handler
224
+ 2021-07-15 17:08:11,060 INFO SenderThread:716328 [dir_watcher.py:finish():312] scan: /home/dat/pino-roberta-base/wandb/run-20210715_165555-25rtfw59/files
225
+ 2021-07-15 17:08:11,061 INFO SenderThread:716328 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_165555-25rtfw59/files/requirements.txt requirements.txt
226
+ 2021-07-15 17:08:11,061 INFO SenderThread:716328 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_165555-25rtfw59/files/output.log output.log
227
+ 2021-07-15 17:08:11,061 INFO SenderThread:716328 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_165555-25rtfw59/files/wandb-metadata.json wandb-metadata.json
228
+ 2021-07-15 17:08:11,061 INFO SenderThread:716328 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_165555-25rtfw59/files/config.yaml config.yaml
229
+ 2021-07-15 17:08:11,061 INFO SenderThread:716328 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_165555-25rtfw59/files/wandb-summary.json wandb-summary.json
230
+ 2021-07-15 17:08:11,062 INFO SenderThread:716328 [file_pusher.py:finish():177] shutting down file pusher
231
+ 2021-07-15 17:08:11,062 INFO SenderThread:716328 [file_pusher.py:join():182] waiting for file pusher
232
+ 2021-07-15 17:08:11,509 INFO Thread-14 :716328 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210715_165555-25rtfw59/files/config.yaml
233
+ 2021-07-15 17:08:11,528 INFO Thread-13 :716328 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210715_165555-25rtfw59/files/output.log
234
+ 2021-07-15 17:08:11,529 INFO Thread-12 :716328 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210715_165555-25rtfw59/files/requirements.txt
235
+ 2021-07-15 17:08:11,574 INFO Thread-15 :716328 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210715_165555-25rtfw59/files/wandb-summary.json
236
+ 2021-07-15 17:08:12,362 INFO MainThread:716328 [internal.py:handle_exit():78] Internal process exited
wandb/run-20210715_165555-25rtfw59/logs/debug.log ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2021-07-15 16:55:55,785 INFO MainThread:715071 [wandb_setup.py:_flush():69] setting env: {}
2
+ 2021-07-15 16:55:55,785 INFO MainThread:715071 [wandb_setup.py:_flush():69] setting login settings: {}
3
+ 2021-07-15 16:55:55,785 INFO MainThread:715071 [wandb_init.py:_log_setup():337] Logging user logs to /home/dat/pino-roberta-base/wandb/run-20210715_165555-25rtfw59/logs/debug.log
4
+ 2021-07-15 16:55:55,785 INFO MainThread:715071 [wandb_init.py:_log_setup():338] Logging internal logs to /home/dat/pino-roberta-base/wandb/run-20210715_165555-25rtfw59/logs/debug-internal.log
5
+ 2021-07-15 16:55:55,785 INFO MainThread:715071 [wandb_init.py:init():370] calling init triggers
6
+ 2021-07-15 16:55:55,785 INFO MainThread:715071 [wandb_init.py:init():375] wandb.init called with sweep_config: {}
7
+ config: {}
8
+ 2021-07-15 16:55:55,785 INFO MainThread:715071 [wandb_init.py:init():419] starting backend
9
+ 2021-07-15 16:55:55,785 INFO MainThread:715071 [backend.py:_multiprocessing_setup():70] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
10
+ 2021-07-15 16:55:55,838 INFO MainThread:715071 [backend.py:ensure_launched():135] starting backend process...
11
+ 2021-07-15 16:55:55,890 INFO MainThread:715071 [backend.py:ensure_launched():139] started backend process with pid: 716328
12
+ 2021-07-15 16:55:55,893 INFO MainThread:715071 [wandb_init.py:init():424] backend started and connected
13
+ 2021-07-15 16:55:55,896 INFO MainThread:715071 [wandb_init.py:init():472] updated telemetry
14
+ 2021-07-15 16:55:55,897 INFO MainThread:715071 [wandb_init.py:init():491] communicating current version
15
+ 2021-07-15 16:55:56,559 INFO MainThread:715071 [wandb_init.py:init():496] got version response
16
+ 2021-07-15 16:55:56,560 INFO MainThread:715071 [wandb_init.py:init():504] communicating run to backend with 30 second timeout
17
+ 2021-07-15 16:55:56,733 INFO MainThread:715071 [wandb_init.py:init():529] starting run threads in backend
18
+ 2021-07-15 16:55:57,960 INFO MainThread:715071 [wandb_run.py:_console_start():1623] atexit reg
19
+ 2021-07-15 16:55:57,961 INFO MainThread:715071 [wandb_run.py:_redirect():1497] redirect: SettingsConsole.REDIRECT
20
+ 2021-07-15 16:55:57,961 INFO MainThread:715071 [wandb_run.py:_redirect():1502] Redirecting console.
21
+ 2021-07-15 16:55:57,964 INFO MainThread:715071 [wandb_run.py:_redirect():1558] Redirects installed.
22
+ 2021-07-15 16:55:57,964 INFO MainThread:715071 [wandb_init.py:init():554] run started, returning control to user process
23
+ 2021-07-15 16:55:57,970 INFO MainThread:715071 [wandb_run.py:_config_callback():872] config_cb None None {'output_dir': './', 'overwrite_output_dir': True, 'do_train': False, 'do_eval': False, 'do_predict': False, 'evaluation_strategy': 'IntervalStrategy.NO', 'prediction_loss_only': False, 'per_device_train_batch_size': 1, 'per_device_eval_batch_size': 1, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'learning_rate': 3e-05, 'weight_decay': 0.0095, 'adam_beta1': 0.9, 'adam_beta2': 0.98, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 2.0, 'max_steps': -1, 'lr_scheduler_type': 'SchedulerType.LINEAR', 'warmup_ratio': 0.0, 'warmup_steps': 10000, 'log_level': -1, 'log_level_replica': -1, 'log_on_each_node': True, 'logging_dir': './runs/Jul15_16-55-47_t1v-n-f5c06ea1-w-0', 'logging_strategy': 'IntervalStrategy.STEPS', 'logging_first_step': False, 'logging_steps': 50, 'save_strategy': 'IntervalStrategy.STEPS', 'save_steps': 15000, 'save_total_limit': 20, 'save_on_each_node': False, 'no_cuda': False, 'seed': 42, 'fp16': False, 'fp16_opt_level': 'O1', 'fp16_backend': 'auto', 'fp16_full_eval': False, 'local_rank': -1, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 6000, 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': './', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'sharded_ddp': [], 'deepspeed': None, 'label_smoothing_factor': 0.0, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'dataloader_pin_memory': True, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': './', 'push_to_hub_model_id': '', 'push_to_hub_organization': None, 'push_to_hub_token': None, 'mp_parameters': ''}
24
+ 2021-07-15 16:55:57,972 INFO MainThread:715071 [wandb_run.py:_config_callback():872] config_cb None None {'model_name_or_path': None, 'model_type': 'big_bird', 'config_name': './', 'tokenizer_name': './', 'cache_dir': None, 'use_fast_tokenizer': True, 'dtype': 'float32'}
25
+ 2021-07-15 16:55:57,974 INFO MainThread:715071 [wandb_run.py:_config_callback():872] config_cb None None {'dataset_name': None, 'dataset_config_name': None, 'train_ref_file': None, 'validation_ref_file': None, 'overwrite_cache': False, 'validation_split_percentage': 5, 'max_seq_length': 4096, 'preprocessing_num_workers': 96, 'mlm_probability': 0.15, 'pad_to_max_length': False, 'line_by_line': False, 'max_eval_samples': 4000}
26
+ 2021-07-15 17:08:09,232 INFO MainThread:715071 [wandb_run.py:_atexit_cleanup():1593] got exitcode: 255
27
+ 2021-07-15 17:08:09,232 INFO MainThread:715071 [wandb_run.py:_restore():1565] restore
wandb/run-20210715_165555-25rtfw59/run-25rtfw59.wandb ADDED
Binary file (22.3 kB). View file
 
wandb/run-20210715_171007-1mu5szt1/files/config.yaml ADDED
@@ -0,0 +1,301 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ wandb_version: 1
2
+
3
+ _wandb:
4
+ desc: null
5
+ value:
6
+ cli_version: 0.10.33
7
+ framework: huggingface
8
+ huggingface_version: 4.9.0.dev0
9
+ is_jupyter_run: false
10
+ is_kaggle_kernel: false
11
+ python_version: 3.8.10
12
+ t:
13
+ 1:
14
+ - 3
15
+ - 11
16
+ 4: 3.8.10
17
+ 5: 0.10.33
18
+ 6: 4.9.0.dev0
19
+ 8:
20
+ - 5
21
+ adafactor:
22
+ desc: null
23
+ value: false
24
+ adam_beta1:
25
+ desc: null
26
+ value: 0.9
27
+ adam_beta2:
28
+ desc: null
29
+ value: 0.98
30
+ adam_epsilon:
31
+ desc: null
32
+ value: 1.0e-08
33
+ cache_dir:
34
+ desc: null
35
+ value: null
36
+ config_name:
37
+ desc: null
38
+ value: ./
39
+ dataloader_drop_last:
40
+ desc: null
41
+ value: false
42
+ dataloader_num_workers:
43
+ desc: null
44
+ value: 0
45
+ dataloader_pin_memory:
46
+ desc: null
47
+ value: true
48
+ dataset_config_name:
49
+ desc: null
50
+ value: null
51
+ dataset_name:
52
+ desc: null
53
+ value: null
54
+ ddp_find_unused_parameters:
55
+ desc: null
56
+ value: null
57
+ debug:
58
+ desc: null
59
+ value: []
60
+ deepspeed:
61
+ desc: null
62
+ value: null
63
+ disable_tqdm:
64
+ desc: null
65
+ value: false
66
+ do_eval:
67
+ desc: null
68
+ value: false
69
+ do_predict:
70
+ desc: null
71
+ value: false
72
+ do_train:
73
+ desc: null
74
+ value: false
75
+ dtype:
76
+ desc: null
77
+ value: float32
78
+ eval_accumulation_steps:
79
+ desc: null
80
+ value: null
81
+ eval_steps:
82
+ desc: null
83
+ value: 6000
84
+ evaluation_strategy:
85
+ desc: null
86
+ value: IntervalStrategy.NO
87
+ fp16:
88
+ desc: null
89
+ value: false
90
+ fp16_backend:
91
+ desc: null
92
+ value: auto
93
+ fp16_full_eval:
94
+ desc: null
95
+ value: false
96
+ fp16_opt_level:
97
+ desc: null
98
+ value: O1
99
+ gradient_accumulation_steps:
100
+ desc: null
101
+ value: 1
102
+ greater_is_better:
103
+ desc: null
104
+ value: null
105
+ group_by_length:
106
+ desc: null
107
+ value: false
108
+ ignore_data_skip:
109
+ desc: null
110
+ value: false
111
+ label_names:
112
+ desc: null
113
+ value: null
114
+ label_smoothing_factor:
115
+ desc: null
116
+ value: 0.0
117
+ learning_rate:
118
+ desc: null
119
+ value: 3.0e-05
120
+ length_column_name:
121
+ desc: null
122
+ value: length
123
+ line_by_line:
124
+ desc: null
125
+ value: false
126
+ load_best_model_at_end:
127
+ desc: null
128
+ value: false
129
+ local_rank:
130
+ desc: null
131
+ value: -1
132
+ log_level:
133
+ desc: null
134
+ value: -1
135
+ log_level_replica:
136
+ desc: null
137
+ value: -1
138
+ log_on_each_node:
139
+ desc: null
140
+ value: true
141
+ logging_dir:
142
+ desc: null
143
+ value: ./runs/Jul15_17-09-58_t1v-n-f5c06ea1-w-0
144
+ logging_first_step:
145
+ desc: null
146
+ value: false
147
+ logging_steps:
148
+ desc: null
149
+ value: 50
150
+ logging_strategy:
151
+ desc: null
152
+ value: IntervalStrategy.STEPS
153
+ lr_scheduler_type:
154
+ desc: null
155
+ value: SchedulerType.LINEAR
156
+ max_eval_samples:
157
+ desc: null
158
+ value: 4000
159
+ max_grad_norm:
160
+ desc: null
161
+ value: 1.0
162
+ max_seq_length:
163
+ desc: null
164
+ value: 4096
165
+ max_steps:
166
+ desc: null
167
+ value: -1
168
+ metric_for_best_model:
169
+ desc: null
170
+ value: null
171
+ mlm_probability:
172
+ desc: null
173
+ value: 0.15
174
+ model_name_or_path:
175
+ desc: null
176
+ value: null
177
+ model_type:
178
+ desc: null
179
+ value: big_bird
180
+ mp_parameters:
181
+ desc: null
182
+ value: ''
183
+ no_cuda:
184
+ desc: null
185
+ value: false
186
+ num_train_epochs:
187
+ desc: null
188
+ value: 2.0
189
+ output_dir:
190
+ desc: null
191
+ value: ./
192
+ overwrite_cache:
193
+ desc: null
194
+ value: false
195
+ overwrite_output_dir:
196
+ desc: null
197
+ value: true
198
+ pad_to_max_length:
199
+ desc: null
200
+ value: false
201
+ past_index:
202
+ desc: null
203
+ value: -1
204
+ per_device_eval_batch_size:
205
+ desc: null
206
+ value: 1
207
+ per_device_train_batch_size:
208
+ desc: null
209
+ value: 1
210
+ per_gpu_eval_batch_size:
211
+ desc: null
212
+ value: null
213
+ per_gpu_train_batch_size:
214
+ desc: null
215
+ value: null
216
+ prediction_loss_only:
217
+ desc: null
218
+ value: false
219
+ preprocessing_num_workers:
220
+ desc: null
221
+ value: 96
222
+ push_to_hub:
223
+ desc: null
224
+ value: true
225
+ push_to_hub_model_id:
226
+ desc: null
227
+ value: ''
228
+ push_to_hub_organization:
229
+ desc: null
230
+ value: null
231
+ push_to_hub_token:
232
+ desc: null
233
+ value: null
234
+ remove_unused_columns:
235
+ desc: null
236
+ value: true
237
+ report_to:
238
+ desc: null
239
+ value:
240
+ - tensorboard
241
+ - wandb
242
+ resume_from_checkpoint:
243
+ desc: null
244
+ value: ./
245
+ run_name:
246
+ desc: null
247
+ value: ./
248
+ save_on_each_node:
249
+ desc: null
250
+ value: false
251
+ save_steps:
252
+ desc: null
253
+ value: 15000
254
+ save_strategy:
255
+ desc: null
256
+ value: IntervalStrategy.STEPS
257
+ save_total_limit:
258
+ desc: null
259
+ value: 20
260
+ seed:
261
+ desc: null
262
+ value: 42
263
+ sharded_ddp:
264
+ desc: null
265
+ value: []
266
+ skip_memory_metrics:
267
+ desc: null
268
+ value: true
269
+ tokenizer_name:
270
+ desc: null
271
+ value: ./
272
+ tpu_metrics_debug:
273
+ desc: null
274
+ value: false
275
+ tpu_num_cores:
276
+ desc: null
277
+ value: null
278
+ train_ref_file:
279
+ desc: null
280
+ value: null
281
+ use_fast_tokenizer:
282
+ desc: null
283
+ value: true
284
+ use_legacy_prediction_loop:
285
+ desc: null
286
+ value: false
287
+ validation_ref_file:
288
+ desc: null
289
+ value: null
290
+ validation_split_percentage:
291
+ desc: null
292
+ value: 5
293
+ warmup_ratio:
294
+ desc: null
295
+ value: 0.0
296
+ warmup_steps:
297
+ desc: null
298
+ value: 10000
299
+ weight_decay:
300
+ desc: null
301
+ value: 0.0095
wandb/run-20210715_171007-1mu5szt1/files/output.log ADDED
@@ -0,0 +1,307 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [17:10:21] - INFO - absl - Restoring checkpoint from ./checkpoint_10000
2
+ tcmalloc: large alloc 1530273792 bytes == 0x9b524000 @ 0x7f5b75dd5680 0x7f5b75df6824 0x5b9a14 0x50b2ae 0x50cb1b 0x5a6f17 0x5f3010 0x56fd36 0x568d9a 0x5f5b33 0x56aadf 0x568d9a 0x68cdc7 0x67e161 0x67e1df 0x67e281 0x67e627 0x6b6e62 0x6b71ed 0x7f5b75bea0b3 0x5f96de
3
+ /home/dat/pino/lib/python3.8/site-packages/jax/lib/xla_bridge.py:386: UserWarning: jax.host_count has been renamed to jax.process_count. This alias will eventually be removed; please update your code.
4
+ warnings.warn(
5
+ /home/dat/pino/lib/python3.8/site-packages/jax/lib/xla_bridge.py:373: UserWarning: jax.host_id has been renamed to jax.process_index. This alias will eventually be removed; please update your code.
6
+ warnings.warn(
7
+ Epoch ... (1/2): 0%| | 0/2 [00:00<?, ?it/s]
8
+
9
+
10
+
11
+
12
+
13
+
14
+ Training...: 28%|██████████████████████████████████████▌ | 100067/352766 [04:37<16:57:35, 4.14it/s]
15
+
16
+
17
+
18
+
19
+ Training...: 28%|██████████████████████████████████████▌ | 100119/352766 [04:57<13:06:22, 5.35it/s]
20
+
21
+
22
+
23
+ Training...: 28%|██████████████████████████████████████▌ | 100152/352766 [05:03<26:17:15, 2.67it/s]
24
+
25
+
26
+
27
+
28
+ Training...: 28%|██████████████████████████████████████▋ | 100202/352766 [05:23<26:02:15, 2.69it/s]
29
+
30
+
31
+
32
+
33
+ Training...: 28%|██████████████████████████████████████▋ | 100252/352766 [05:43<26:10:16, 2.68it/s]
34
+
35
+
36
+
37
+
38
+
39
+ Training...: 28%|██████████████████████████████████████▋ | 100317/352766 [06:17<17:05:12, 4.10it/s]
40
+
41
+
42
+
43
+ Training...: 28%|██████████████████████████████████████▋ | 100352/352766 [06:23<26:12:05, 2.68it/s]
44
+
45
+
46
+
47
+
48
+ Training...: 28%|██████████████████████████████████████▋ | 100402/352766 [06:43<26:11:04, 2.68it/s]
49
+
50
+
51
+
52
+
53
+
54
+ Training...: 28%|██████████████████████████████████████▋ | 100469/352766 [07:17<13:12:02, 5.31it/s]
55
+
56
+
57
+
58
+ Training...: 28%|██████████████████████████████████████▋ | 100502/352766 [07:23<26:09:06, 2.68it/s]
59
+
60
+
61
+
62
+
63
+
64
+ Training...: 29%|██████████████████████████████████████▊ | 100571/352766 [07:58<10:30:33, 6.67it/s]
65
+
66
+
67
+
68
+
69
+ Training...: 29%|██████████████████████████████████████▊ | 100619/352766 [08:18<13:30:57, 5.18it/s]
70
+
71
+
72
+
73
+ Training...: 29%|██████████████████████████████████████▊ | 100652/352766 [08:23<25:56:56, 2.70it/s]
74
+
75
+
76
+
77
+
78
+ Training...: 29%|██████████████████████████████████████▊ | 100702/352766 [08:43<26:11:10, 2.67it/s]
79
+
80
+
81
+
82
+
83
+
84
+ Training...: 29%|██████████████████████████████████████▊ | 100771/352766 [09:18<10:27:46, 6.69it/s]
85
+
86
+
87
+
88
+
89
+ Training...: 29%|██████████████████████████████████████▊ | 100821/352766 [09:38<10:21:37, 6.75it/s]
90
+
91
+
92
+
93
+
94
+ Training...: 29%|███████████████████████████████████████▏ | 100873/352766 [09:58<8:37:24, 8.11it/s]
95
+
96
+
97
+
98
+
99
+ Training...: 29%|███████████████████████████████████████▏ | 100923/352766 [10:18<8:39:16, 8.08it/s]
100
+
101
+
102
+
103
+ Training...: 29%|██████████████████████████████████████▉ | 100952/352766 [10:24<25:57:41, 2.69it/s]
104
+
105
+
106
+
107
+
108
+ Training...: 29%|██████████████████████████████████████▉ | 101002/352766 [10:44<26:08:39, 2.67it/s]
109
+
110
+
111
+
112
+
113
+ Training...: 29%|██████████████████████████████████████▉ | 101052/352766 [11:04<26:34:55, 2.63it/s]
114
+
115
+
116
+
117
+
118
+ Training...: 29%|██████████████████████████████████████▉ | 101102/352766 [11:24<26:07:20, 2.68it/s]
119
+
120
+
121
+
122
+
123
+ Training...: 29%|██████████████████████████████████████▉ | 101152/352766 [11:44<25:00:47, 2.79it/s]
124
+
125
+
126
+
127
+
128
+ Training...: 29%|███████████████████████████████████████ | 101202/352766 [12:04<26:07:29, 2.67it/s]
129
+
130
+
131
+
132
+
133
+
134
+ Training...: 29%|███████████████████████████████████████ | 101271/352766 [12:38<10:35:05, 6.60it/s]
135
+
136
+
137
+
138
+ Training...: 29%|███████████████████████████████████████ | 101302/352766 [12:44<25:54:10, 2.70it/s]
139
+
140
+
141
+
142
+
143
+ Training...: 29%|███████████████████████████████████████ | 101352/352766 [13:04<26:04:56, 2.68it/s]
144
+
145
+
146
+
147
+
148
+ Training...: 29%|███████████████████████████████████████ | 101402/352766 [13:24<25:52:37, 2.70it/s]
149
+
150
+
151
+
152
+
153
+ Training...: 29%|███████████████████████████████████████ | 101452/352766 [13:44<26:05:47, 2.68it/s]
154
+
155
+
156
+
157
+
158
+ Training...: 29%|███████████████████████████████████████▏ | 101502/352766 [14:04<25:53:30, 2.70it/s]
159
+
160
+
161
+
162
+
163
+ Training...: 29%|███████████████████████████████████████▏ | 101552/352766 [14:24<26:05:49, 2.67it/s]
164
+
165
+
166
+
167
+
168
+ Training...: 29%|███████████████████████████████████████▏ | 101602/352766 [14:44<25:48:27, 2.70it/s]
169
+
170
+
171
+
172
+
173
+ Training...: 29%|███████████████████████████████████████▏ | 101652/352766 [15:04<26:04:33, 2.68it/s]
174
+
175
+
176
+
177
+
178
+ Training...: 29%|███████████████████████████████████████▏ | 101702/352766 [15:24<25:57:03, 2.69it/s]
179
+
180
+
181
+
182
+
183
+ Training...: 29%|███████████████████████████████████████▏ | 101752/352766 [15:44<26:05:09, 2.67it/s]
184
+
185
+
186
+
187
+
188
+ Training...: 29%|███████████████████████████████████████▏ | 101802/352766 [16:04<26:04:34, 2.67it/s]
189
+
190
+
191
+
192
+
193
+ Training...: 29%|███████████████████████████████████████▎ | 101852/352766 [16:24<26:03:08, 2.68it/s]
194
+
195
+
196
+
197
+
198
+ Training...: 29%|███████████████████████████████████████▎ | 101902/352766 [16:44<26:01:53, 2.68it/s]
199
+
200
+
201
+
202
+
203
+ Training...: 29%|███████████████████████████████████████▎ | 101952/352766 [17:04<26:08:22, 2.67it/s]
204
+
205
+
206
+
207
+
208
+ Training...: 29%|███████████████████████████████████████▎ | 102002/352766 [17:24<26:02:13, 2.68it/s]
209
+
210
+
211
+
212
+
213
+ Training...: 29%|███████████████████████████████████████▎ | 102052/352766 [17:44<25:49:15, 2.70it/s]
214
+
215
+
216
+
217
+
218
+ Training...: 29%|███████████████████████████████████████▎ | 102102/352766 [18:05<26:01:33, 2.68it/s]
219
+
220
+
221
+
222
+
223
+ Training...: 29%|███████████████████████████████████████▍ | 102152/352766 [18:25<25:46:31, 2.70it/s]
224
+
225
+
226
+
227
+
228
+ Training...: 29%|███████████████████████████████████████▍ | 102202/352766 [18:45<26:00:54, 2.68it/s]
229
+
230
+
231
+
232
+
233
+ Training...: 29%|███████████████████████████████████████▍ | 102252/352766 [19:05<25:47:08, 2.70it/s]
234
+
235
+
236
+
237
+
238
+ Training...: 29%|███████████████████████████████████████▍ | 102302/352766 [19:25<26:00:27, 2.68it/s]
239
+
240
+
241
+
242
+
243
+ Training...: 29%|███████████████████████████████████████▍ | 102352/352766 [19:45<25:53:04, 2.69it/s]
244
+
245
+
246
+
247
+
248
+ Training...: 29%|███████████████████████████████████████▍ | 102402/352766 [20:05<26:01:48, 2.67it/s]
249
+
250
+
251
+
252
+
253
+ Training...: 29%|███████████████████████████████████████▍ | 102452/352766 [20:25<26:00:49, 2.67it/s]
254
+
255
+
256
+
257
+
258
+ Training...: 29%|███████████████████████████████████████▌ | 102502/352766 [20:45<25:58:45, 2.68it/s]
259
+
260
+
261
+
262
+
263
+ Training...: 29%|██████████████��████████████████████████▌ | 102552/352766 [21:05<25:58:02, 2.68it/s]
264
+
265
+
266
+
267
+
268
+ Training...: 29%|███████████████████████████████████████▌ | 102602/352766 [21:25<25:58:39, 2.67it/s]
269
+
270
+
271
+
272
+
273
+ Training...: 29%|███████████████████████████████████████▌ | 102652/352766 [21:45<25:59:04, 2.67it/s]
274
+
275
+
276
+
277
+
278
+ Training...: 29%|███████████████████████████████████████▌ | 102702/352766 [22:05<25:43:17, 2.70it/s]
279
+
280
+
281
+
282
+
283
+
284
+ Training...: 29%|███████████████████████████████████████▉ | 102777/352766 [22:40<6:58:15, 9.96it/s]
285
+
286
+
287
+
288
+
289
+ Training...: 29%|███████████████████████████████████████▉ | 102831/352766 [23:00<5:31:01, 12.58it/s]
290
+
291
+
292
+
293
+ Training...: 29%|███████████████████████████████████████▋ | 102852/352766 [23:05<25:58:26, 2.67it/s]
294
+
295
+
296
+
297
+
298
+ Training...: 29%|███████████████████████████████████████▋ | 102902/352766 [23:25<25:41:12, 2.70it/s]
299
+
300
+
301
+
302
+
303
+
304
+ Training...: 29%|███████████████████████████████████████▉ | 102977/352766 [24:00<6:26:06, 10.78it/s]
305
+
306
+
307
+
wandb/run-20210715_171007-1mu5szt1/files/requirements.txt ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ absl-py==0.13.0
2
+ aiohttp==3.7.4.post0
3
+ astunparse==1.6.3
4
+ async-timeout==3.0.1
5
+ attrs==21.2.0
6
+ cachetools==4.2.2
7
+ certifi==2021.5.30
8
+ chardet==4.0.0
9
+ charset-normalizer==2.0.1
10
+ chex==0.0.8
11
+ click==8.0.1
12
+ configparser==5.0.2
13
+ cycler==0.10.0
14
+ datasets==1.9.1.dev0
15
+ dill==0.3.4
16
+ dm-tree==0.1.6
17
+ docker-pycreds==0.4.0
18
+ filelock==3.0.12
19
+ flatbuffers==1.12
20
+ flax==0.3.4
21
+ fsspec==2021.7.0
22
+ gast==0.4.0
23
+ gitdb==4.0.7
24
+ gitpython==3.1.18
25
+ google-auth-oauthlib==0.4.4
26
+ google-auth==1.32.1
27
+ google-pasta==0.2.0
28
+ grpcio==1.34.1
29
+ h5py==3.1.0
30
+ huggingface-hub==0.0.12
31
+ idna==3.2
32
+ install==1.3.4
33
+ jax==0.2.17
34
+ jaxlib==0.1.68
35
+ joblib==1.0.1
36
+ keras-nightly==2.5.0.dev2021032900
37
+ keras-preprocessing==1.1.2
38
+ kiwisolver==1.3.1
39
+ libtpu-nightly==0.1.dev20210615
40
+ markdown==3.3.4
41
+ matplotlib==3.4.2
42
+ msgpack==1.0.2
43
+ multidict==5.1.0
44
+ multiprocess==0.70.12.2
45
+ numpy==1.19.5
46
+ oauthlib==3.1.1
47
+ opt-einsum==3.3.0
48
+ optax==0.0.9
49
+ packaging==21.0
50
+ pandas==1.3.0
51
+ pathtools==0.1.2
52
+ pillow==8.3.1
53
+ pip==20.0.2
54
+ pkg-resources==0.0.0
55
+ promise==2.3
56
+ protobuf==3.17.3
57
+ psutil==5.8.0
58
+ pyarrow==4.0.1
59
+ pyasn1-modules==0.2.8
60
+ pyasn1==0.4.8
61
+ pyparsing==2.4.7
62
+ python-dateutil==2.8.1
63
+ pytz==2021.1
64
+ pyyaml==5.4.1
65
+ regex==2021.7.6
66
+ requests-oauthlib==1.3.0
67
+ requests==2.26.0
68
+ rsa==4.7.2
69
+ sacremoses==0.0.45
70
+ scipy==1.7.0
71
+ sentry-sdk==1.3.0
72
+ setuptools==44.0.0
73
+ shortuuid==1.0.1
74
+ six==1.15.0
75
+ smmap==4.0.0
76
+ subprocess32==3.5.4
77
+ tensorboard-data-server==0.6.1
78
+ tensorboard-plugin-wit==1.8.0
79
+ tensorboard==2.5.0
80
+ tensorflow-estimator==2.5.0
81
+ tensorflow==2.5.0
82
+ termcolor==1.1.0
83
+ tokenizers==0.10.3
84
+ toolz==0.11.1
85
+ tqdm==4.61.2
86
+ transformers==4.9.0.dev0
87
+ typing-extensions==3.7.4.3
88
+ urllib3==1.26.6
89
+ wandb==0.10.33
90
+ werkzeug==2.0.1
91
+ wheel==0.36.2
92
+ wrapt==1.12.1
93
+ xxhash==2.0.2
94
+ yarl==1.6.3
wandb/run-20210715_171007-1mu5szt1/files/wandb-metadata.json ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-5.4.0-1043-gcp-x86_64-with-glibc2.29",
3
+ "python": "3.8.10",
4
+ "heartbeatAt": "2021-07-15T17:10:09.049159",
5
+ "startedAt": "2021-07-15T17:10:07.007216",
6
+ "docker": null,
7
+ "cpu_count": 96,
8
+ "cuda": null,
9
+ "args": [
10
+ "--push_to_hub",
11
+ "--output_dir=./",
12
+ "--model_type=big_bird",
13
+ "--config_name=./",
14
+ "--tokenizer_name=./",
15
+ "--max_seq_length=4096",
16
+ "--weight_decay=0.0095",
17
+ "--warmup_steps=10000",
18
+ "--overwrite_output_dir",
19
+ "--adam_beta1=0.9",
20
+ "--adam_beta2=0.98",
21
+ "--logging_steps=50",
22
+ "--eval_steps=6000",
23
+ "--num_train_epochs=2",
24
+ "--preprocessing_num_workers=96",
25
+ "--save_steps=15000",
26
+ "--learning_rate=3e-5",
27
+ "--per_device_train_batch_size=1",
28
+ "--per_device_eval_batch_size=1",
29
+ "--save_total_limit=20",
30
+ "--max_eval_samples=4000",
31
+ "--resume_from_checkpoint=./"
32
+ ],
33
+ "state": "running",
34
+ "program": "./run_mlm_flax_no_accum.py",
35
+ "codePath": "run_mlm_flax_no_accum.py",
36
+ "git": {
37
+ "remote": "https://huggingface.co/flax-community/pino-roberta-base",
38
+ "commit": "cc569aecf5e26454416d7a13c7876ad9111120cf"
39
+ },
40
+ "email": null,
41
+ "root": "/home/dat/pino-roberta-base",
42
+ "host": "t1v-n-f5c06ea1-w-0",
43
+ "username": "dat",
44
+ "executable": "/home/dat/pino/bin/python"
45
+ }
wandb/run-20210715_171007-1mu5szt1/files/wandb-summary.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"training_step": 2950, "learning_rate": 2.5990751964855008e-05, "train_loss": 2.4761266708374023, "_runtime": 1780, "_timestamp": 1626370787, "_step": 58}
wandb/run-20210715_171007-1mu5szt1/logs/debug-internal.log ADDED
The diff for this file is too large to render. See raw diff
 
wandb/run-20210715_171007-1mu5szt1/logs/debug.log ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2021-07-15 17:10:07,008 INFO MainThread:717656 [wandb_setup.py:_flush():69] setting env: {}
2
+ 2021-07-15 17:10:07,008 INFO MainThread:717656 [wandb_setup.py:_flush():69] setting login settings: {}
3
+ 2021-07-15 17:10:07,008 INFO MainThread:717656 [wandb_init.py:_log_setup():337] Logging user logs to /home/dat/pino-roberta-base/wandb/run-20210715_171007-1mu5szt1/logs/debug.log
4
+ 2021-07-15 17:10:07,009 INFO MainThread:717656 [wandb_init.py:_log_setup():338] Logging internal logs to /home/dat/pino-roberta-base/wandb/run-20210715_171007-1mu5szt1/logs/debug-internal.log
5
+ 2021-07-15 17:10:07,009 INFO MainThread:717656 [wandb_init.py:init():370] calling init triggers
6
+ 2021-07-15 17:10:07,009 INFO MainThread:717656 [wandb_init.py:init():375] wandb.init called with sweep_config: {}
7
+ config: {}
8
+ 2021-07-15 17:10:07,009 INFO MainThread:717656 [wandb_init.py:init():419] starting backend
9
+ 2021-07-15 17:10:07,009 INFO MainThread:717656 [backend.py:_multiprocessing_setup():70] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
10
+ 2021-07-15 17:10:07,057 INFO MainThread:717656 [backend.py:ensure_launched():135] starting backend process...
11
+ 2021-07-15 17:10:07,104 INFO MainThread:717656 [backend.py:ensure_launched():139] started backend process with pid: 719053
12
+ 2021-07-15 17:10:07,106 INFO MainThread:717656 [wandb_init.py:init():424] backend started and connected
13
+ 2021-07-15 17:10:07,109 INFO MainThread:717656 [wandb_init.py:init():472] updated telemetry
14
+ 2021-07-15 17:10:07,110 INFO MainThread:717656 [wandb_init.py:init():491] communicating current version
15
+ 2021-07-15 17:10:07,747 INFO MainThread:717656 [wandb_init.py:init():496] got version response
16
+ 2021-07-15 17:10:07,747 INFO MainThread:717656 [wandb_init.py:init():504] communicating run to backend with 30 second timeout
17
+ 2021-07-15 17:10:07,930 INFO MainThread:717656 [wandb_init.py:init():529] starting run threads in backend
18
+ 2021-07-15 17:10:09,087 INFO MainThread:717656 [wandb_run.py:_console_start():1623] atexit reg
19
+ 2021-07-15 17:10:09,088 INFO MainThread:717656 [wandb_run.py:_redirect():1497] redirect: SettingsConsole.REDIRECT
20
+ 2021-07-15 17:10:09,088 INFO MainThread:717656 [wandb_run.py:_redirect():1502] Redirecting console.
21
+ 2021-07-15 17:10:09,091 INFO MainThread:717656 [wandb_run.py:_redirect():1558] Redirects installed.
22
+ 2021-07-15 17:10:09,091 INFO MainThread:717656 [wandb_init.py:init():554] run started, returning control to user process
23
+ 2021-07-15 17:10:09,097 INFO MainThread:717656 [wandb_run.py:_config_callback():872] config_cb None None {'output_dir': './', 'overwrite_output_dir': True, 'do_train': False, 'do_eval': False, 'do_predict': False, 'evaluation_strategy': 'IntervalStrategy.NO', 'prediction_loss_only': False, 'per_device_train_batch_size': 1, 'per_device_eval_batch_size': 1, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'learning_rate': 3e-05, 'weight_decay': 0.0095, 'adam_beta1': 0.9, 'adam_beta2': 0.98, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 2.0, 'max_steps': -1, 'lr_scheduler_type': 'SchedulerType.LINEAR', 'warmup_ratio': 0.0, 'warmup_steps': 10000, 'log_level': -1, 'log_level_replica': -1, 'log_on_each_node': True, 'logging_dir': './runs/Jul15_17-09-58_t1v-n-f5c06ea1-w-0', 'logging_strategy': 'IntervalStrategy.STEPS', 'logging_first_step': False, 'logging_steps': 50, 'save_strategy': 'IntervalStrategy.STEPS', 'save_steps': 15000, 'save_total_limit': 20, 'save_on_each_node': False, 'no_cuda': False, 'seed': 42, 'fp16': False, 'fp16_opt_level': 'O1', 'fp16_backend': 'auto', 'fp16_full_eval': False, 'local_rank': -1, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 6000, 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': './', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'sharded_ddp': [], 'deepspeed': None, 'label_smoothing_factor': 0.0, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'dataloader_pin_memory': True, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': './', 'push_to_hub_model_id': '', 'push_to_hub_organization': None, 'push_to_hub_token': None, 'mp_parameters': ''}
24
+ 2021-07-15 17:10:09,098 INFO MainThread:717656 [wandb_run.py:_config_callback():872] config_cb None None {'model_name_or_path': None, 'model_type': 'big_bird', 'config_name': './', 'tokenizer_name': './', 'cache_dir': None, 'use_fast_tokenizer': True, 'dtype': 'float32'}
25
+ 2021-07-15 17:10:09,100 INFO MainThread:717656 [wandb_run.py:_config_callback():872] config_cb None None {'dataset_name': None, 'dataset_config_name': None, 'train_ref_file': None, 'validation_ref_file': None, 'overwrite_cache': False, 'validation_split_percentage': 5, 'max_seq_length': 4096, 'preprocessing_num_workers': 96, 'mlm_probability': 0.15, 'pad_to_max_length': False, 'line_by_line': False, 'max_eval_samples': 4000}
26
+ 2021-07-15 17:40:07,255 INFO MainThread:717656 [wandb_run.py:_atexit_cleanup():1593] got exitcode: 255
27
+ 2021-07-15 17:40:07,255 INFO MainThread:717656 [wandb_run.py:_restore():1565] restore
wandb/run-20210715_171007-1mu5szt1/run-1mu5szt1.wandb ADDED
Binary file (108 kB). View file
 
wandb/run-20210715_174147-3nkn7hxg/files/config.yaml ADDED
@@ -0,0 +1,304 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ wandb_version: 1
2
+
3
+ _wandb:
4
+ desc: null
5
+ value:
6
+ cli_version: 0.10.33
7
+ framework: huggingface
8
+ huggingface_version: 4.9.0.dev0
9
+ is_jupyter_run: false
10
+ is_kaggle_kernel: false
11
+ python_version: 3.8.10
12
+ t:
13
+ 1:
14
+ - 3
15
+ - 11
16
+ 2:
17
+ - 3
18
+ - 11
19
+ 4: 3.8.10
20
+ 5: 0.10.33
21
+ 6: 4.9.0.dev0
22
+ 8:
23
+ - 5
24
+ adafactor:
25
+ desc: null
26
+ value: false
27
+ adam_beta1:
28
+ desc: null
29
+ value: 0.9
30
+ adam_beta2:
31
+ desc: null
32
+ value: 0.98
33
+ adam_epsilon:
34
+ desc: null
35
+ value: 1.0e-08
36
+ cache_dir:
37
+ desc: null
38
+ value: null
39
+ config_name:
40
+ desc: null
41
+ value: ./
42
+ dataloader_drop_last:
43
+ desc: null
44
+ value: false
45
+ dataloader_num_workers:
46
+ desc: null
47
+ value: 0
48
+ dataloader_pin_memory:
49
+ desc: null
50
+ value: true
51
+ dataset_config_name:
52
+ desc: null
53
+ value: null
54
+ dataset_name:
55
+ desc: null
56
+ value: null
57
+ ddp_find_unused_parameters:
58
+ desc: null
59
+ value: null
60
+ debug:
61
+ desc: null
62
+ value: []
63
+ deepspeed:
64
+ desc: null
65
+ value: null
66
+ disable_tqdm:
67
+ desc: null
68
+ value: false
69
+ do_eval:
70
+ desc: null
71
+ value: false
72
+ do_predict:
73
+ desc: null
74
+ value: false
75
+ do_train:
76
+ desc: null
77
+ value: false
78
+ dtype:
79
+ desc: null
80
+ value: float32
81
+ eval_accumulation_steps:
82
+ desc: null
83
+ value: null
84
+ eval_steps:
85
+ desc: null
86
+ value: 6000
87
+ evaluation_strategy:
88
+ desc: null
89
+ value: IntervalStrategy.NO
90
+ fp16:
91
+ desc: null
92
+ value: false
93
+ fp16_backend:
94
+ desc: null
95
+ value: auto
96
+ fp16_full_eval:
97
+ desc: null
98
+ value: false
99
+ fp16_opt_level:
100
+ desc: null
101
+ value: O1
102
+ gradient_accumulation_steps:
103
+ desc: null
104
+ value: 1
105
+ greater_is_better:
106
+ desc: null
107
+ value: null
108
+ group_by_length:
109
+ desc: null
110
+ value: false
111
+ ignore_data_skip:
112
+ desc: null
113
+ value: false
114
+ label_names:
115
+ desc: null
116
+ value: null
117
+ label_smoothing_factor:
118
+ desc: null
119
+ value: 0.0
120
+ learning_rate:
121
+ desc: null
122
+ value: 3.0e-05
123
+ length_column_name:
124
+ desc: null
125
+ value: length
126
+ line_by_line:
127
+ desc: null
128
+ value: false
129
+ load_best_model_at_end:
130
+ desc: null
131
+ value: false
132
+ local_rank:
133
+ desc: null
134
+ value: -1
135
+ log_level:
136
+ desc: null
137
+ value: -1
138
+ log_level_replica:
139
+ desc: null
140
+ value: -1
141
+ log_on_each_node:
142
+ desc: null
143
+ value: true
144
+ logging_dir:
145
+ desc: null
146
+ value: ./runs/Jul15_17-41-39_t1v-n-f5c06ea1-w-0
147
+ logging_first_step:
148
+ desc: null
149
+ value: false
150
+ logging_steps:
151
+ desc: null
152
+ value: 50
153
+ logging_strategy:
154
+ desc: null
155
+ value: IntervalStrategy.STEPS
156
+ lr_scheduler_type:
157
+ desc: null
158
+ value: SchedulerType.LINEAR
159
+ max_eval_samples:
160
+ desc: null
161
+ value: 4000
162
+ max_grad_norm:
163
+ desc: null
164
+ value: 1.0
165
+ max_seq_length:
166
+ desc: null
167
+ value: 4096
168
+ max_steps:
169
+ desc: null
170
+ value: -1
171
+ metric_for_best_model:
172
+ desc: null
173
+ value: null
174
+ mlm_probability:
175
+ desc: null
176
+ value: 0.15
177
+ model_name_or_path:
178
+ desc: null
179
+ value: null
180
+ model_type:
181
+ desc: null
182
+ value: big_bird
183
+ mp_parameters:
184
+ desc: null
185
+ value: ''
186
+ no_cuda:
187
+ desc: null
188
+ value: false
189
+ num_train_epochs:
190
+ desc: null
191
+ value: 2.0
192
+ output_dir:
193
+ desc: null
194
+ value: ./
195
+ overwrite_cache:
196
+ desc: null
197
+ value: false
198
+ overwrite_output_dir:
199
+ desc: null
200
+ value: true
201
+ pad_to_max_length:
202
+ desc: null
203
+ value: false
204
+ past_index:
205
+ desc: null
206
+ value: -1
207
+ per_device_eval_batch_size:
208
+ desc: null
209
+ value: 1
210
+ per_device_train_batch_size:
211
+ desc: null
212
+ value: 1
213
+ per_gpu_eval_batch_size:
214
+ desc: null
215
+ value: null
216
+ per_gpu_train_batch_size:
217
+ desc: null
218
+ value: null
219
+ prediction_loss_only:
220
+ desc: null
221
+ value: false
222
+ preprocessing_num_workers:
223
+ desc: null
224
+ value: 96
225
+ push_to_hub:
226
+ desc: null
227
+ value: true
228
+ push_to_hub_model_id:
229
+ desc: null
230
+ value: ''
231
+ push_to_hub_organization:
232
+ desc: null
233
+ value: null
234
+ push_to_hub_token:
235
+ desc: null
236
+ value: null
237
+ remove_unused_columns:
238
+ desc: null
239
+ value: true
240
+ report_to:
241
+ desc: null
242
+ value:
243
+ - tensorboard
244
+ - wandb
245
+ resume_from_checkpoint:
246
+ desc: null
247
+ value: ./
248
+ run_name:
249
+ desc: null
250
+ value: ./
251
+ save_on_each_node:
252
+ desc: null
253
+ value: false
254
+ save_steps:
255
+ desc: null
256
+ value: 15000
257
+ save_strategy:
258
+ desc: null
259
+ value: IntervalStrategy.STEPS
260
+ save_total_limit:
261
+ desc: null
262
+ value: 20
263
+ seed:
264
+ desc: null
265
+ value: 42
266
+ sharded_ddp:
267
+ desc: null
268
+ value: []
269
+ skip_memory_metrics:
270
+ desc: null
271
+ value: true
272
+ tokenizer_name:
273
+ desc: null
274
+ value: ./
275
+ tpu_metrics_debug:
276
+ desc: null
277
+ value: false
278
+ tpu_num_cores:
279
+ desc: null
280
+ value: null
281
+ train_ref_file:
282
+ desc: null
283
+ value: null
284
+ use_fast_tokenizer:
285
+ desc: null
286
+ value: true
287
+ use_legacy_prediction_loop:
288
+ desc: null
289
+ value: false
290
+ validation_ref_file:
291
+ desc: null
292
+ value: null
293
+ validation_split_percentage:
294
+ desc: null
295
+ value: 5
296
+ warmup_ratio:
297
+ desc: null
298
+ value: 0.0
299
+ warmup_steps:
300
+ desc: null
301
+ value: 10000
302
+ weight_decay:
303
+ desc: null
304
+ value: 0.0095
wandb/run-20210715_174147-3nkn7hxg/files/output.log ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ [17:42:02] - INFO - absl - Restoring checkpoint from ./checkpoint_10000
3
+ tcmalloc: large alloc 1530273792 bytes == 0x9a3bc000 @ 0x7ffb11761680 0x7ffb11782824 0x5b9a14 0x50b2ae 0x50cb1b 0x5a6f17 0x5f3010 0x56fd36 0x568d9a 0x5f5b33 0x56aadf 0x568d9a 0x68cdc7 0x67e161 0x67e1df 0x67e281 0x67e627 0x6b6e62 0x6b71ed 0x7ffb115760b3 0x5f96de
4
+ /home/dat/pino/lib/python3.8/site-packages/jax/lib/xla_bridge.py:386: UserWarning: jax.host_count has been renamed to jax.process_count. This alias will eventually be removed; please update your code.
5
+ warnings.warn(
6
+ /home/dat/pino/lib/python3.8/site-packages/jax/lib/xla_bridge.py:373: UserWarning: jax.host_id has been renamed to jax.process_index. This alias will eventually be removed; please update your code.
7
+ warnings.warn(
8
+ Epoch ... (1/2): 0%| | 0/2 [05:22<?, ?it/s]
9
+ Traceback (most recent call last):
10
+ File "./run_mlm_flax_no_accum.py", line 694, in <module>
11
+ for step, batch_idx in enumerate(tqdm(train_batch_idx, desc="Training...", position=1,start=resume_step)):
12
+ File "/home/dat/pino/lib/python3.8/site-packages/tqdm/std.py", line 1015, in __init__
13
+ raise (
14
+ tqdm.std.TqdmKeyError: "Unknown argument(s): {'start': 100002}"
wandb/run-20210715_174147-3nkn7hxg/files/requirements.txt ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ absl-py==0.13.0
2
+ aiohttp==3.7.4.post0
3
+ astunparse==1.6.3
4
+ async-timeout==3.0.1
5
+ attrs==21.2.0
6
+ cachetools==4.2.2
7
+ certifi==2021.5.30
8
+ chardet==4.0.0
9
+ charset-normalizer==2.0.1
10
+ chex==0.0.8
11
+ click==8.0.1
12
+ configparser==5.0.2
13
+ cycler==0.10.0
14
+ datasets==1.9.1.dev0
15
+ dill==0.3.4
16
+ dm-tree==0.1.6
17
+ docker-pycreds==0.4.0
18
+ filelock==3.0.12
19
+ flatbuffers==1.12
20
+ flax==0.3.4
21
+ fsspec==2021.7.0
22
+ gast==0.4.0
23
+ gitdb==4.0.7
24
+ gitpython==3.1.18
25
+ google-auth-oauthlib==0.4.4
26
+ google-auth==1.32.1
27
+ google-pasta==0.2.0
28
+ grpcio==1.34.1
29
+ h5py==3.1.0
30
+ huggingface-hub==0.0.12
31
+ idna==3.2
32
+ install==1.3.4
33
+ jax==0.2.17
34
+ jaxlib==0.1.68
35
+ joblib==1.0.1
36
+ keras-nightly==2.5.0.dev2021032900
37
+ keras-preprocessing==1.1.2
38
+ kiwisolver==1.3.1
39
+ libtpu-nightly==0.1.dev20210615
40
+ markdown==3.3.4
41
+ matplotlib==3.4.2
42
+ msgpack==1.0.2
43
+ multidict==5.1.0
44
+ multiprocess==0.70.12.2
45
+ numpy==1.19.5
46
+ oauthlib==3.1.1
47
+ opt-einsum==3.3.0
48
+ optax==0.0.9
49
+ packaging==21.0
50
+ pandas==1.3.0
51
+ pathtools==0.1.2
52
+ pillow==8.3.1
53
+ pip==20.0.2
54
+ pkg-resources==0.0.0
55
+ promise==2.3
56
+ protobuf==3.17.3
57
+ psutil==5.8.0
58
+ pyarrow==4.0.1
59
+ pyasn1-modules==0.2.8
60
+ pyasn1==0.4.8
61
+ pyparsing==2.4.7
62
+ python-dateutil==2.8.1
63
+ pytz==2021.1
64
+ pyyaml==5.4.1
65
+ regex==2021.7.6
66
+ requests-oauthlib==1.3.0
67
+ requests==2.26.0
68
+ rsa==4.7.2
69
+ sacremoses==0.0.45
70
+ scipy==1.7.0
71
+ sentry-sdk==1.3.0
72
+ setuptools==44.0.0
73
+ shortuuid==1.0.1
74
+ six==1.15.0
75
+ smmap==4.0.0
76
+ subprocess32==3.5.4
77
+ tensorboard-data-server==0.6.1
78
+ tensorboard-plugin-wit==1.8.0
79
+ tensorboard==2.5.0
80
+ tensorflow-estimator==2.5.0
81
+ tensorflow==2.5.0
82
+ termcolor==1.1.0
83
+ tokenizers==0.10.3
84
+ toolz==0.11.1
85
+ tqdm==4.61.2
86
+ transformers==4.9.0.dev0
87
+ typing-extensions==3.7.4.3
88
+ urllib3==1.26.6
89
+ wandb==0.10.33
90
+ werkzeug==2.0.1
91
+ wheel==0.36.2
92
+ wrapt==1.12.1
93
+ xxhash==2.0.2
94
+ yarl==1.6.3
wandb/run-20210715_174147-3nkn7hxg/files/wandb-metadata.json ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-5.4.0-1043-gcp-x86_64-with-glibc2.29",
3
+ "python": "3.8.10",
4
+ "heartbeatAt": "2021-07-15T17:41:49.930191",
5
+ "startedAt": "2021-07-15T17:41:47.904489",
6
+ "docker": null,
7
+ "cpu_count": 96,
8
+ "cuda": null,
9
+ "args": [
10
+ "--push_to_hub",
11
+ "--output_dir=./",
12
+ "--model_type=big_bird",
13
+ "--config_name=./",
14
+ "--tokenizer_name=./",
15
+ "--max_seq_length=4096",
16
+ "--weight_decay=0.0095",
17
+ "--warmup_steps=10000",
18
+ "--overwrite_output_dir",
19
+ "--adam_beta1=0.9",
20
+ "--adam_beta2=0.98",
21
+ "--logging_steps=50",
22
+ "--eval_steps=6000",
23
+ "--num_train_epochs=2",
24
+ "--preprocessing_num_workers=96",
25
+ "--save_steps=15000",
26
+ "--learning_rate=3e-5",
27
+ "--per_device_train_batch_size=1",
28
+ "--per_device_eval_batch_size=1",
29
+ "--save_total_limit=20",
30
+ "--max_eval_samples=4000",
31
+ "--resume_from_checkpoint=./"
32
+ ],
33
+ "state": "running",
34
+ "program": "./run_mlm_flax_no_accum.py",
35
+ "codePath": "run_mlm_flax_no_accum.py",
36
+ "git": {
37
+ "remote": "https://huggingface.co/flax-community/pino-roberta-base",
38
+ "commit": "cc569aecf5e26454416d7a13c7876ad9111120cf"
39
+ },
40
+ "email": null,
41
+ "root": "/home/dat/pino-roberta-base",
42
+ "host": "t1v-n-f5c06ea1-w-0",
43
+ "username": "dat",
44
+ "executable": "/home/dat/pino/bin/python"
45
+ }
wandb/run-20210715_174147-3nkn7hxg/files/wandb-summary.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {}
wandb/run-20210715_174147-3nkn7hxg/logs/debug-internal.log ADDED
@@ -0,0 +1,195 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2021-07-15 17:41:48,594 INFO MainThread:723175 [internal.py:wandb_internal():88] W&B internal server running at pid: 723175, started at: 2021-07-15 17:41:48.594473
2
+ 2021-07-15 17:41:48,596 DEBUG HandlerThread:723175 [handler.py:handle_request():124] handle_request: check_version
3
+ 2021-07-15 17:41:48,597 INFO WriterThread:723175 [datastore.py:open_for_write():80] open: /home/dat/pino-roberta-base/wandb/run-20210715_174147-3nkn7hxg/run-3nkn7hxg.wandb
4
+ 2021-07-15 17:41:48,598 DEBUG SenderThread:723175 [sender.py:send():179] send: header
5
+ 2021-07-15 17:41:48,598 DEBUG SenderThread:723175 [sender.py:send_request():193] send_request: check_version
6
+ 2021-07-15 17:41:48,634 DEBUG SenderThread:723175 [sender.py:send():179] send: run
7
+ 2021-07-15 17:41:48,804 INFO SenderThread:723175 [dir_watcher.py:__init__():168] watching files in: /home/dat/pino-roberta-base/wandb/run-20210715_174147-3nkn7hxg/files
8
+ 2021-07-15 17:41:48,805 INFO SenderThread:723175 [sender.py:_start_run_threads():716] run started: 3nkn7hxg with start time 1626370908
9
+ 2021-07-15 17:41:48,805 DEBUG SenderThread:723175 [sender.py:send():179] send: summary
10
+ 2021-07-15 17:41:48,805 INFO SenderThread:723175 [sender.py:_save_file():841] saving file wandb-summary.json with policy end
11
+ 2021-07-15 17:41:48,805 DEBUG HandlerThread:723175 [handler.py:handle_request():124] handle_request: run_start
12
+ 2021-07-15 17:41:49,817 INFO Thread-8 :723175 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210715_174147-3nkn7hxg/files/wandb-summary.json
13
+ 2021-07-15 17:41:49,929 DEBUG HandlerThread:723175 [meta.py:__init__():39] meta init
14
+ 2021-07-15 17:41:49,930 DEBUG HandlerThread:723175 [meta.py:__init__():53] meta init done
15
+ 2021-07-15 17:41:49,930 DEBUG HandlerThread:723175 [meta.py:probe():210] probe
16
+ 2021-07-15 17:41:49,931 DEBUG HandlerThread:723175 [meta.py:_setup_git():200] setup git
17
+ 2021-07-15 17:41:49,960 DEBUG HandlerThread:723175 [meta.py:_setup_git():207] setup git done
18
+ 2021-07-15 17:41:49,961 DEBUG HandlerThread:723175 [meta.py:_save_pip():57] save pip
19
+ 2021-07-15 17:41:49,961 DEBUG HandlerThread:723175 [meta.py:_save_pip():71] save pip done
20
+ 2021-07-15 17:41:49,961 DEBUG HandlerThread:723175 [meta.py:probe():252] probe done
21
+ 2021-07-15 17:41:49,964 DEBUG SenderThread:723175 [sender.py:send():179] send: files
22
+ 2021-07-15 17:41:49,964 INFO SenderThread:723175 [sender.py:_save_file():841] saving file wandb-metadata.json with policy now
23
+ 2021-07-15 17:41:49,970 DEBUG HandlerThread:723175 [handler.py:handle_request():124] handle_request: stop_status
24
+ 2021-07-15 17:41:49,971 DEBUG SenderThread:723175 [sender.py:send_request():193] send_request: stop_status
25
+ 2021-07-15 17:41:50,099 DEBUG SenderThread:723175 [sender.py:send():179] send: config
26
+ 2021-07-15 17:41:50,100 DEBUG SenderThread:723175 [sender.py:send():179] send: config
27
+ 2021-07-15 17:41:50,100 DEBUG SenderThread:723175 [sender.py:send():179] send: config
28
+ 2021-07-15 17:41:50,404 INFO Thread-11 :723175 [upload_job.py:push():137] Uploaded file /tmp/tmplksa3t0ywandb/5lvlwhj5-wandb-metadata.json
29
+ 2021-07-15 17:41:50,815 INFO Thread-8 :723175 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210715_174147-3nkn7hxg/files/requirements.txt
30
+ 2021-07-15 17:41:50,816 INFO Thread-8 :723175 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210715_174147-3nkn7hxg/files/output.log
31
+ 2021-07-15 17:41:50,816 INFO Thread-8 :723175 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210715_174147-3nkn7hxg/files/wandb-metadata.json
32
+ 2021-07-15 17:42:04,821 INFO Thread-8 :723175 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_174147-3nkn7hxg/files/output.log
33
+ 2021-07-15 17:42:05,202 DEBUG HandlerThread:723175 [handler.py:handle_request():124] handle_request: stop_status
34
+ 2021-07-15 17:42:05,203 DEBUG SenderThread:723175 [sender.py:send_request():193] send_request: stop_status
35
+ 2021-07-15 17:42:06,821 INFO Thread-8 :723175 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_174147-3nkn7hxg/files/output.log
36
+ 2021-07-15 17:42:08,822 INFO Thread-8 :723175 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_174147-3nkn7hxg/files/output.log
37
+ 2021-07-15 17:42:18,016 DEBUG SenderThread:723175 [sender.py:send():179] send: stats
38
+ 2021-07-15 17:42:19,827 INFO Thread-8 :723175 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_174147-3nkn7hxg/files/config.yaml
39
+ 2021-07-15 17:42:20,347 DEBUG HandlerThread:723175 [handler.py:handle_request():124] handle_request: stop_status
40
+ 2021-07-15 17:42:20,347 DEBUG SenderThread:723175 [sender.py:send_request():193] send_request: stop_status
41
+ 2021-07-15 17:42:35,479 DEBUG HandlerThread:723175 [handler.py:handle_request():124] handle_request: stop_status
42
+ 2021-07-15 17:42:35,480 DEBUG SenderThread:723175 [sender.py:send_request():193] send_request: stop_status
43
+ 2021-07-15 17:42:48,096 DEBUG SenderThread:723175 [sender.py:send():179] send: stats
44
+ 2021-07-15 17:42:50,612 DEBUG HandlerThread:723175 [handler.py:handle_request():124] handle_request: stop_status
45
+ 2021-07-15 17:42:50,612 DEBUG SenderThread:723175 [sender.py:send_request():193] send_request: stop_status
46
+ 2021-07-15 17:43:05,746 DEBUG HandlerThread:723175 [handler.py:handle_request():124] handle_request: stop_status
47
+ 2021-07-15 17:43:05,747 DEBUG SenderThread:723175 [sender.py:send_request():193] send_request: stop_status
48
+ 2021-07-15 17:43:18,173 DEBUG SenderThread:723175 [sender.py:send():179] send: stats
49
+ 2021-07-15 17:43:20,877 DEBUG HandlerThread:723175 [handler.py:handle_request():124] handle_request: stop_status
50
+ 2021-07-15 17:43:20,878 DEBUG SenderThread:723175 [sender.py:send_request():193] send_request: stop_status
51
+ 2021-07-15 17:43:36,007 DEBUG HandlerThread:723175 [handler.py:handle_request():124] handle_request: stop_status
52
+ 2021-07-15 17:43:36,008 DEBUG SenderThread:723175 [sender.py:send_request():193] send_request: stop_status
53
+ 2021-07-15 17:43:48,239 DEBUG SenderThread:723175 [sender.py:send():179] send: stats
54
+ 2021-07-15 17:43:51,139 DEBUG HandlerThread:723175 [handler.py:handle_request():124] handle_request: stop_status
55
+ 2021-07-15 17:43:51,139 DEBUG SenderThread:723175 [sender.py:send_request():193] send_request: stop_status
56
+ 2021-07-15 17:44:06,269 DEBUG HandlerThread:723175 [handler.py:handle_request():124] handle_request: stop_status
57
+ 2021-07-15 17:44:06,269 DEBUG SenderThread:723175 [sender.py:send_request():193] send_request: stop_status
58
+ 2021-07-15 17:44:18,311 DEBUG SenderThread:723175 [sender.py:send():179] send: stats
59
+ 2021-07-15 17:44:21,400 DEBUG HandlerThread:723175 [handler.py:handle_request():124] handle_request: stop_status
60
+ 2021-07-15 17:44:21,400 DEBUG SenderThread:723175 [sender.py:send_request():193] send_request: stop_status
61
+ 2021-07-15 17:44:36,530 DEBUG HandlerThread:723175 [handler.py:handle_request():124] handle_request: stop_status
62
+ 2021-07-15 17:44:36,530 DEBUG SenderThread:723175 [sender.py:send_request():193] send_request: stop_status
63
+ 2021-07-15 17:44:48,385 DEBUG SenderThread:723175 [sender.py:send():179] send: stats
64
+ 2021-07-15 17:44:51,661 DEBUG HandlerThread:723175 [handler.py:handle_request():124] handle_request: stop_status
65
+ 2021-07-15 17:44:51,662 DEBUG SenderThread:723175 [sender.py:send_request():193] send_request: stop_status
66
+ 2021-07-15 17:45:06,796 DEBUG HandlerThread:723175 [handler.py:handle_request():124] handle_request: stop_status
67
+ 2021-07-15 17:45:06,796 DEBUG SenderThread:723175 [sender.py:send_request():193] send_request: stop_status
68
+ 2021-07-15 17:45:18,460 DEBUG SenderThread:723175 [sender.py:send():179] send: stats
69
+ 2021-07-15 17:45:21,928 DEBUG HandlerThread:723175 [handler.py:handle_request():124] handle_request: stop_status
70
+ 2021-07-15 17:45:21,928 DEBUG SenderThread:723175 [sender.py:send_request():193] send_request: stop_status
71
+ 2021-07-15 17:45:37,059 DEBUG HandlerThread:723175 [handler.py:handle_request():124] handle_request: stop_status
72
+ 2021-07-15 17:45:37,059 DEBUG SenderThread:723175 [sender.py:send_request():193] send_request: stop_status
73
+ 2021-07-15 17:45:48,530 DEBUG SenderThread:723175 [sender.py:send():179] send: stats
74
+ 2021-07-15 17:45:52,197 DEBUG HandlerThread:723175 [handler.py:handle_request():124] handle_request: stop_status
75
+ 2021-07-15 17:45:52,198 DEBUG SenderThread:723175 [sender.py:send_request():193] send_request: stop_status
76
+ 2021-07-15 17:46:07,329 DEBUG HandlerThread:723175 [handler.py:handle_request():124] handle_request: stop_status
77
+ 2021-07-15 17:46:07,329 DEBUG SenderThread:723175 [sender.py:send_request():193] send_request: stop_status
78
+ 2021-07-15 17:46:18,600 DEBUG SenderThread:723175 [sender.py:send():179] send: stats
79
+ 2021-07-15 17:46:22,464 DEBUG HandlerThread:723175 [handler.py:handle_request():124] handle_request: stop_status
80
+ 2021-07-15 17:46:22,464 DEBUG SenderThread:723175 [sender.py:send_request():193] send_request: stop_status
81
+ 2021-07-15 17:46:37,595 DEBUG HandlerThread:723175 [handler.py:handle_request():124] handle_request: stop_status
82
+ 2021-07-15 17:46:37,595 DEBUG SenderThread:723175 [sender.py:send_request():193] send_request: stop_status
83
+ 2021-07-15 17:46:48,677 DEBUG SenderThread:723175 [sender.py:send():179] send: stats
84
+ 2021-07-15 17:46:52,729 DEBUG HandlerThread:723175 [handler.py:handle_request():124] handle_request: stop_status
85
+ 2021-07-15 17:46:52,729 DEBUG SenderThread:723175 [sender.py:send_request():193] send_request: stop_status
86
+ 2021-07-15 17:47:07,861 DEBUG HandlerThread:723175 [handler.py:handle_request():124] handle_request: stop_status
87
+ 2021-07-15 17:47:07,862 DEBUG SenderThread:723175 [sender.py:send_request():193] send_request: stop_status
88
+ 2021-07-15 17:47:18,753 DEBUG SenderThread:723175 [sender.py:send():179] send: stats
89
+ 2021-07-15 17:47:22,995 DEBUG HandlerThread:723175 [handler.py:handle_request():124] handle_request: stop_status
90
+ 2021-07-15 17:47:22,995 DEBUG SenderThread:723175 [sender.py:send_request():193] send_request: stop_status
91
+ 2021-07-15 17:47:31,673 DEBUG HandlerThread:723175 [handler.py:handle_request():124] handle_request: poll_exit
92
+ 2021-07-15 17:47:31,673 DEBUG SenderThread:723175 [sender.py:send():179] send: telemetry
93
+ 2021-07-15 17:47:31,673 DEBUG SenderThread:723175 [sender.py:send():179] send: exit
94
+ 2021-07-15 17:47:31,673 INFO SenderThread:723175 [sender.py:send_exit():287] handling exit code: 1
95
+ 2021-07-15 17:47:31,674 INFO SenderThread:723175 [sender.py:send_exit():295] send defer
96
+ 2021-07-15 17:47:31,674 DEBUG SenderThread:723175 [sender.py:send_request():193] send_request: poll_exit
97
+ 2021-07-15 17:47:31,674 DEBUG HandlerThread:723175 [handler.py:handle_request():124] handle_request: defer
98
+ 2021-07-15 17:47:31,674 INFO HandlerThread:723175 [handler.py:handle_request_defer():141] handle defer: 0
99
+ 2021-07-15 17:47:31,674 DEBUG SenderThread:723175 [sender.py:send_request():193] send_request: defer
100
+ 2021-07-15 17:47:31,674 INFO SenderThread:723175 [sender.py:send_request_defer():304] handle sender defer: 0
101
+ 2021-07-15 17:47:31,675 INFO SenderThread:723175 [sender.py:transition_state():308] send defer: 1
102
+ 2021-07-15 17:47:31,675 DEBUG HandlerThread:723175 [handler.py:handle_request():124] handle_request: defer
103
+ 2021-07-15 17:47:31,675 INFO HandlerThread:723175 [handler.py:handle_request_defer():141] handle defer: 1
104
+ 2021-07-15 17:47:31,686 DEBUG SenderThread:723175 [sender.py:send_request():193] send_request: defer
105
+ 2021-07-15 17:47:31,686 INFO SenderThread:723175 [sender.py:send_request_defer():304] handle sender defer: 1
106
+ 2021-07-15 17:47:31,686 INFO SenderThread:723175 [sender.py:transition_state():308] send defer: 2
107
+ 2021-07-15 17:47:31,686 DEBUG SenderThread:723175 [sender.py:send():179] send: stats
108
+ 2021-07-15 17:47:31,687 DEBUG HandlerThread:723175 [handler.py:handle_request():124] handle_request: defer
109
+ 2021-07-15 17:47:31,687 INFO HandlerThread:723175 [handler.py:handle_request_defer():141] handle defer: 2
110
+ 2021-07-15 17:47:31,687 DEBUG SenderThread:723175 [sender.py:send_request():193] send_request: defer
111
+ 2021-07-15 17:47:31,687 INFO SenderThread:723175 [sender.py:send_request_defer():304] handle sender defer: 2
112
+ 2021-07-15 17:47:31,687 INFO SenderThread:723175 [sender.py:transition_state():308] send defer: 3
113
+ 2021-07-15 17:47:31,687 DEBUG HandlerThread:723175 [handler.py:handle_request():124] handle_request: defer
114
+ 2021-07-15 17:47:31,687 INFO HandlerThread:723175 [handler.py:handle_request_defer():141] handle defer: 3
115
+ 2021-07-15 17:47:31,687 DEBUG SenderThread:723175 [sender.py:send():179] send: summary
116
+ 2021-07-15 17:47:31,688 INFO SenderThread:723175 [sender.py:_save_file():841] saving file wandb-summary.json with policy end
117
+ 2021-07-15 17:47:31,688 DEBUG SenderThread:723175 [sender.py:send_request():193] send_request: defer
118
+ 2021-07-15 17:47:31,688 INFO SenderThread:723175 [sender.py:send_request_defer():304] handle sender defer: 3
119
+ 2021-07-15 17:47:31,688 INFO SenderThread:723175 [sender.py:transition_state():308] send defer: 4
120
+ 2021-07-15 17:47:31,688 DEBUG HandlerThread:723175 [handler.py:handle_request():124] handle_request: defer
121
+ 2021-07-15 17:47:31,688 INFO HandlerThread:723175 [handler.py:handle_request_defer():141] handle defer: 4
122
+ 2021-07-15 17:47:31,688 DEBUG SenderThread:723175 [sender.py:send_request():193] send_request: defer
123
+ 2021-07-15 17:47:31,688 INFO SenderThread:723175 [sender.py:send_request_defer():304] handle sender defer: 4
124
+ 2021-07-15 17:47:31,776 DEBUG HandlerThread:723175 [handler.py:handle_request():124] handle_request: poll_exit
125
+ 2021-07-15 17:47:31,881 INFO SenderThread:723175 [sender.py:transition_state():308] send defer: 5
126
+ 2021-07-15 17:47:31,881 DEBUG SenderThread:723175 [sender.py:send_request():193] send_request: poll_exit
127
+ 2021-07-15 17:47:31,881 DEBUG HandlerThread:723175 [handler.py:handle_request():124] handle_request: defer
128
+ 2021-07-15 17:47:31,881 INFO HandlerThread:723175 [handler.py:handle_request_defer():141] handle defer: 5
129
+ 2021-07-15 17:47:31,882 DEBUG SenderThread:723175 [sender.py:send_request():193] send_request: defer
130
+ 2021-07-15 17:47:31,882 INFO SenderThread:723175 [sender.py:send_request_defer():304] handle sender defer: 5
131
+ 2021-07-15 17:47:31,882 INFO SenderThread:723175 [dir_watcher.py:finish():282] shutting down directory watcher
132
+ 2021-07-15 17:47:31,891 INFO SenderThread:723175 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_174147-3nkn7hxg/files/config.yaml
133
+ 2021-07-15 17:47:31,891 INFO SenderThread:723175 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_174147-3nkn7hxg/files/wandb-summary.json
134
+ 2021-07-15 17:47:31,891 INFO SenderThread:723175 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_174147-3nkn7hxg/files/output.log
135
+ 2021-07-15 17:47:31,891 INFO SenderThread:723175 [dir_watcher.py:finish():312] scan: /home/dat/pino-roberta-base/wandb/run-20210715_174147-3nkn7hxg/files
136
+ 2021-07-15 17:47:31,891 INFO SenderThread:723175 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_174147-3nkn7hxg/files/requirements.txt requirements.txt
137
+ 2021-07-15 17:47:31,892 INFO SenderThread:723175 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_174147-3nkn7hxg/files/output.log output.log
138
+ 2021-07-15 17:47:31,892 INFO SenderThread:723175 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_174147-3nkn7hxg/files/wandb-metadata.json wandb-metadata.json
139
+ 2021-07-15 17:47:31,892 INFO SenderThread:723175 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_174147-3nkn7hxg/files/config.yaml config.yaml
140
+ 2021-07-15 17:47:31,892 INFO SenderThread:723175 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_174147-3nkn7hxg/files/wandb-summary.json wandb-summary.json
141
+ 2021-07-15 17:47:31,895 INFO SenderThread:723175 [sender.py:transition_state():308] send defer: 6
142
+ 2021-07-15 17:47:31,896 DEBUG HandlerThread:723175 [handler.py:handle_request():124] handle_request: defer
143
+ 2021-07-15 17:47:31,896 INFO HandlerThread:723175 [handler.py:handle_request_defer():141] handle defer: 6
144
+ 2021-07-15 17:47:31,896 DEBUG SenderThread:723175 [sender.py:send_request():193] send_request: defer
145
+ 2021-07-15 17:47:31,896 INFO SenderThread:723175 [sender.py:send_request_defer():304] handle sender defer: 6
146
+ 2021-07-15 17:47:31,896 INFO SenderThread:723175 [file_pusher.py:finish():177] shutting down file pusher
147
+ 2021-07-15 17:47:31,983 DEBUG HandlerThread:723175 [handler.py:handle_request():124] handle_request: poll_exit
148
+ 2021-07-15 17:47:31,983 DEBUG SenderThread:723175 [sender.py:send_request():193] send_request: poll_exit
149
+ 2021-07-15 17:47:32,085 DEBUG HandlerThread:723175 [handler.py:handle_request():124] handle_request: poll_exit
150
+ 2021-07-15 17:47:32,085 DEBUG SenderThread:723175 [sender.py:send_request():193] send_request: poll_exit
151
+ 2021-07-15 17:47:32,187 DEBUG HandlerThread:723175 [handler.py:handle_request():124] handle_request: poll_exit
152
+ 2021-07-15 17:47:32,187 DEBUG SenderThread:723175 [sender.py:send_request():193] send_request: poll_exit
153
+ 2021-07-15 17:47:32,289 DEBUG HandlerThread:723175 [handler.py:handle_request():124] handle_request: poll_exit
154
+ 2021-07-15 17:47:32,289 DEBUG SenderThread:723175 [sender.py:send_request():193] send_request: poll_exit
155
+ 2021-07-15 17:47:32,333 INFO Thread-12 :723175 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210715_174147-3nkn7hxg/files/requirements.txt
156
+ 2021-07-15 17:47:32,347 INFO Thread-13 :723175 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210715_174147-3nkn7hxg/files/output.log
157
+ 2021-07-15 17:47:32,391 DEBUG HandlerThread:723175 [handler.py:handle_request():124] handle_request: poll_exit
158
+ 2021-07-15 17:47:32,391 DEBUG SenderThread:723175 [sender.py:send_request():193] send_request: poll_exit
159
+ 2021-07-15 17:47:32,408 INFO Thread-14 :723175 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210715_174147-3nkn7hxg/files/config.yaml
160
+ 2021-07-15 17:47:32,444 INFO Thread-15 :723175 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210715_174147-3nkn7hxg/files/wandb-summary.json
161
+ 2021-07-15 17:47:32,493 DEBUG HandlerThread:723175 [handler.py:handle_request():124] handle_request: poll_exit
162
+ 2021-07-15 17:47:32,493 DEBUG SenderThread:723175 [sender.py:send_request():193] send_request: poll_exit
163
+ 2021-07-15 17:47:32,595 DEBUG HandlerThread:723175 [handler.py:handle_request():124] handle_request: poll_exit
164
+ 2021-07-15 17:47:32,595 DEBUG SenderThread:723175 [sender.py:send_request():193] send_request: poll_exit
165
+ 2021-07-15 17:47:32,645 INFO Thread-7 :723175 [sender.py:transition_state():308] send defer: 7
166
+ 2021-07-15 17:47:32,645 DEBUG HandlerThread:723175 [handler.py:handle_request():124] handle_request: defer
167
+ 2021-07-15 17:47:32,645 INFO HandlerThread:723175 [handler.py:handle_request_defer():141] handle defer: 7
168
+ 2021-07-15 17:47:32,646 DEBUG SenderThread:723175 [sender.py:send_request():193] send_request: defer
169
+ 2021-07-15 17:47:32,646 INFO SenderThread:723175 [sender.py:send_request_defer():304] handle sender defer: 7
170
+ 2021-07-15 17:47:32,697 DEBUG HandlerThread:723175 [handler.py:handle_request():124] handle_request: poll_exit
171
+ 2021-07-15 17:47:32,951 INFO SenderThread:723175 [sender.py:transition_state():308] send defer: 8
172
+ 2021-07-15 17:47:32,951 DEBUG SenderThread:723175 [sender.py:send_request():193] send_request: poll_exit
173
+ 2021-07-15 17:47:32,952 DEBUG HandlerThread:723175 [handler.py:handle_request():124] handle_request: defer
174
+ 2021-07-15 17:47:32,952 INFO HandlerThread:723175 [handler.py:handle_request_defer():141] handle defer: 8
175
+ 2021-07-15 17:47:32,952 DEBUG SenderThread:723175 [sender.py:send_request():193] send_request: defer
176
+ 2021-07-15 17:47:32,952 INFO SenderThread:723175 [sender.py:send_request_defer():304] handle sender defer: 8
177
+ 2021-07-15 17:47:32,952 INFO SenderThread:723175 [sender.py:transition_state():308] send defer: 9
178
+ 2021-07-15 17:47:32,953 DEBUG HandlerThread:723175 [handler.py:handle_request():124] handle_request: defer
179
+ 2021-07-15 17:47:32,953 INFO HandlerThread:723175 [handler.py:handle_request_defer():141] handle defer: 9
180
+ 2021-07-15 17:47:32,953 DEBUG SenderThread:723175 [sender.py:send():179] send: final
181
+ 2021-07-15 17:47:32,953 DEBUG SenderThread:723175 [sender.py:send():179] send: footer
182
+ 2021-07-15 17:47:32,953 DEBUG SenderThread:723175 [sender.py:send_request():193] send_request: defer
183
+ 2021-07-15 17:47:32,953 INFO SenderThread:723175 [sender.py:send_request_defer():304] handle sender defer: 9
184
+ 2021-07-15 17:47:33,053 DEBUG HandlerThread:723175 [handler.py:handle_request():124] handle_request: poll_exit
185
+ 2021-07-15 17:47:33,053 DEBUG SenderThread:723175 [sender.py:send_request():193] send_request: poll_exit
186
+ 2021-07-15 17:47:33,053 INFO SenderThread:723175 [file_pusher.py:join():182] waiting for file pusher
187
+ 2021-07-15 17:47:33,055 DEBUG HandlerThread:723175 [handler.py:handle_request():124] handle_request: get_summary
188
+ 2021-07-15 17:47:33,055 DEBUG HandlerThread:723175 [handler.py:handle_request():124] handle_request: sampled_history
189
+ 2021-07-15 17:47:33,056 DEBUG HandlerThread:723175 [handler.py:handle_request():124] handle_request: shutdown
190
+ 2021-07-15 17:47:33,056 INFO HandlerThread:723175 [handler.py:finish():638] shutting down handler
191
+ 2021-07-15 17:47:33,953 INFO WriterThread:723175 [datastore.py:close():288] close: /home/dat/pino-roberta-base/wandb/run-20210715_174147-3nkn7hxg/run-3nkn7hxg.wandb
192
+ 2021-07-15 17:47:34,054 INFO SenderThread:723175 [sender.py:finish():945] shutting down sender
193
+ 2021-07-15 17:47:34,054 INFO SenderThread:723175 [file_pusher.py:finish():177] shutting down file pusher
194
+ 2021-07-15 17:47:34,054 INFO SenderThread:723175 [file_pusher.py:join():182] waiting for file pusher
195
+ 2021-07-15 17:47:34,056 INFO MainThread:723175 [internal.py:handle_exit():78] Internal process exited
wandb/run-20210715_174147-3nkn7hxg/logs/debug.log ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2021-07-15 17:41:47,906 INFO MainThread:721922 [wandb_setup.py:_flush():69] setting env: {}
2
+ 2021-07-15 17:41:47,906 INFO MainThread:721922 [wandb_setup.py:_flush():69] setting login settings: {}
3
+ 2021-07-15 17:41:47,906 INFO MainThread:721922 [wandb_init.py:_log_setup():337] Logging user logs to /home/dat/pino-roberta-base/wandb/run-20210715_174147-3nkn7hxg/logs/debug.log
4
+ 2021-07-15 17:41:47,906 INFO MainThread:721922 [wandb_init.py:_log_setup():338] Logging internal logs to /home/dat/pino-roberta-base/wandb/run-20210715_174147-3nkn7hxg/logs/debug-internal.log
5
+ 2021-07-15 17:41:47,906 INFO MainThread:721922 [wandb_init.py:init():370] calling init triggers
6
+ 2021-07-15 17:41:47,906 INFO MainThread:721922 [wandb_init.py:init():375] wandb.init called with sweep_config: {}
7
+ config: {}
8
+ 2021-07-15 17:41:47,906 INFO MainThread:721922 [wandb_init.py:init():419] starting backend
9
+ 2021-07-15 17:41:47,906 INFO MainThread:721922 [backend.py:_multiprocessing_setup():70] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
10
+ 2021-07-15 17:41:47,954 INFO MainThread:721922 [backend.py:ensure_launched():135] starting backend process...
11
+ 2021-07-15 17:41:48,000 INFO MainThread:721922 [backend.py:ensure_launched():139] started backend process with pid: 723175
12
+ 2021-07-15 17:41:48,002 INFO MainThread:721922 [wandb_init.py:init():424] backend started and connected
13
+ 2021-07-15 17:41:48,005 INFO MainThread:721922 [wandb_init.py:init():472] updated telemetry
14
+ 2021-07-15 17:41:48,006 INFO MainThread:721922 [wandb_init.py:init():491] communicating current version
15
+ 2021-07-15 17:41:48,633 INFO MainThread:721922 [wandb_init.py:init():496] got version response
16
+ 2021-07-15 17:41:48,633 INFO MainThread:721922 [wandb_init.py:init():504] communicating run to backend with 30 second timeout
17
+ 2021-07-15 17:41:48,805 INFO MainThread:721922 [wandb_init.py:init():529] starting run threads in backend
18
+ 2021-07-15 17:41:49,968 INFO MainThread:721922 [wandb_run.py:_console_start():1623] atexit reg
19
+ 2021-07-15 17:41:49,968 INFO MainThread:721922 [wandb_run.py:_redirect():1497] redirect: SettingsConsole.REDIRECT
20
+ 2021-07-15 17:41:49,969 INFO MainThread:721922 [wandb_run.py:_redirect():1502] Redirecting console.
21
+ 2021-07-15 17:41:49,971 INFO MainThread:721922 [wandb_run.py:_redirect():1558] Redirects installed.
22
+ 2021-07-15 17:41:49,971 INFO MainThread:721922 [wandb_init.py:init():554] run started, returning control to user process
23
+ 2021-07-15 17:41:49,977 INFO MainThread:721922 [wandb_run.py:_config_callback():872] config_cb None None {'output_dir': './', 'overwrite_output_dir': True, 'do_train': False, 'do_eval': False, 'do_predict': False, 'evaluation_strategy': 'IntervalStrategy.NO', 'prediction_loss_only': False, 'per_device_train_batch_size': 1, 'per_device_eval_batch_size': 1, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'learning_rate': 3e-05, 'weight_decay': 0.0095, 'adam_beta1': 0.9, 'adam_beta2': 0.98, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 2.0, 'max_steps': -1, 'lr_scheduler_type': 'SchedulerType.LINEAR', 'warmup_ratio': 0.0, 'warmup_steps': 10000, 'log_level': -1, 'log_level_replica': -1, 'log_on_each_node': True, 'logging_dir': './runs/Jul15_17-41-39_t1v-n-f5c06ea1-w-0', 'logging_strategy': 'IntervalStrategy.STEPS', 'logging_first_step': False, 'logging_steps': 50, 'save_strategy': 'IntervalStrategy.STEPS', 'save_steps': 15000, 'save_total_limit': 20, 'save_on_each_node': False, 'no_cuda': False, 'seed': 42, 'fp16': False, 'fp16_opt_level': 'O1', 'fp16_backend': 'auto', 'fp16_full_eval': False, 'local_rank': -1, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 6000, 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': './', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'sharded_ddp': [], 'deepspeed': None, 'label_smoothing_factor': 0.0, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'dataloader_pin_memory': True, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': './', 'push_to_hub_model_id': '', 'push_to_hub_organization': None, 'push_to_hub_token': None, 'mp_parameters': ''}
24
+ 2021-07-15 17:41:49,978 INFO MainThread:721922 [wandb_run.py:_config_callback():872] config_cb None None {'model_name_or_path': None, 'model_type': 'big_bird', 'config_name': './', 'tokenizer_name': './', 'cache_dir': None, 'use_fast_tokenizer': True, 'dtype': 'float32'}
25
+ 2021-07-15 17:41:49,980 INFO MainThread:721922 [wandb_run.py:_config_callback():872] config_cb None None {'dataset_name': None, 'dataset_config_name': None, 'train_ref_file': None, 'validation_ref_file': None, 'overwrite_cache': False, 'validation_split_percentage': 5, 'max_seq_length': 4096, 'preprocessing_num_workers': 96, 'mlm_probability': 0.15, 'pad_to_max_length': False, 'line_by_line': False, 'max_eval_samples': 4000}
26
+ 2021-07-15 17:47:28,749 INFO MainThread:721922 [wandb_run.py:_atexit_cleanup():1593] got exitcode: 1
27
+ 2021-07-15 17:47:28,750 INFO MainThread:721922 [wandb_run.py:_restore():1565] restore
28
+ 2021-07-15 17:47:31,674 INFO MainThread:721922 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
29
+ wandb_count: 1
30
+ }
31
+ pusher_stats {
32
+ uploaded_bytes: 1415
33
+ total_bytes: 1415
34
+ }
35
+
36
+ 2021-07-15 17:47:31,882 INFO MainThread:721922 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
37
+ wandb_count: 1
38
+ }
39
+ pusher_stats {
40
+ uploaded_bytes: 1415
41
+ total_bytes: 1415
42
+ }
43
+
44
+ 2021-07-15 17:47:31,984 INFO MainThread:721922 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
45
+ wandb_count: 5
46
+ }
47
+ pusher_stats {
48
+ uploaded_bytes: 1415
49
+ total_bytes: 8975
50
+ }
51
+
52
+ 2021-07-15 17:47:32,086 INFO MainThread:721922 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
53
+ wandb_count: 5
54
+ }
55
+ pusher_stats {
56
+ uploaded_bytes: 8975
57
+ total_bytes: 8975
58
+ }
59
+
60
+ 2021-07-15 17:47:32,188 INFO MainThread:721922 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
61
+ wandb_count: 5
62
+ }
63
+ pusher_stats {
64
+ uploaded_bytes: 8975
65
+ total_bytes: 8975
66
+ }
67
+
68
+ 2021-07-15 17:47:32,290 INFO MainThread:721922 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
69
+ wandb_count: 5
70
+ }
71
+ pusher_stats {
72
+ uploaded_bytes: 8975
73
+ total_bytes: 8975
74
+ }
75
+
76
+ 2021-07-15 17:47:32,392 INFO MainThread:721922 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
77
+ wandb_count: 5
78
+ }
79
+ pusher_stats {
80
+ uploaded_bytes: 8975
81
+ total_bytes: 8975
82
+ }
83
+
84
+ 2021-07-15 17:47:32,494 INFO MainThread:721922 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
85
+ wandb_count: 5
86
+ }
87
+ pusher_stats {
88
+ uploaded_bytes: 8975
89
+ total_bytes: 8975
90
+ }
91
+
92
+ 2021-07-15 17:47:32,596 INFO MainThread:721922 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
93
+ wandb_count: 5
94
+ }
95
+ pusher_stats {
96
+ uploaded_bytes: 8975
97
+ total_bytes: 8975
98
+ }
99
+
100
+ 2021-07-15 17:47:32,952 INFO MainThread:721922 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
101
+ wandb_count: 5
102
+ }
103
+ pusher_stats {
104
+ uploaded_bytes: 8975
105
+ total_bytes: 8975
106
+ }
107
+
108
+ 2021-07-15 17:47:33,054 INFO MainThread:721922 [wandb_run.py:_wait_for_finish():1715] got exit ret: done: true
109
+ exit_result {
110
+ }
111
+ file_counts {
112
+ wandb_count: 5
113
+ }
114
+ pusher_stats {
115
+ uploaded_bytes: 8975
116
+ total_bytes: 8975
117
+ }
118
+
119
+ 2021-07-15 17:47:34,335 INFO MainThread:721922 [wandb_run.py:_show_files():1937] logging synced files
wandb/run-20210715_174147-3nkn7hxg/run-3nkn7hxg.wandb ADDED
Binary file (7.49 kB). View file
 
wandb/run-20210715_175147-3lygnexi/files/config.yaml ADDED
@@ -0,0 +1,301 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ wandb_version: 1
2
+
3
+ _wandb:
4
+ desc: null
5
+ value:
6
+ cli_version: 0.10.33
7
+ framework: huggingface
8
+ huggingface_version: 4.9.0.dev0
9
+ is_jupyter_run: false
10
+ is_kaggle_kernel: false
11
+ python_version: 3.8.10
12
+ t:
13
+ 1:
14
+ - 3
15
+ - 11
16
+ 4: 3.8.10
17
+ 5: 0.10.33
18
+ 6: 4.9.0.dev0
19
+ 8:
20
+ - 5
21
+ adafactor:
22
+ desc: null
23
+ value: false
24
+ adam_beta1:
25
+ desc: null
26
+ value: 0.9
27
+ adam_beta2:
28
+ desc: null
29
+ value: 0.98
30
+ adam_epsilon:
31
+ desc: null
32
+ value: 1.0e-08
33
+ cache_dir:
34
+ desc: null
35
+ value: null
36
+ config_name:
37
+ desc: null
38
+ value: ./
39
+ dataloader_drop_last:
40
+ desc: null
41
+ value: false
42
+ dataloader_num_workers:
43
+ desc: null
44
+ value: 0
45
+ dataloader_pin_memory:
46
+ desc: null
47
+ value: true
48
+ dataset_config_name:
49
+ desc: null
50
+ value: null
51
+ dataset_name:
52
+ desc: null
53
+ value: null
54
+ ddp_find_unused_parameters:
55
+ desc: null
56
+ value: null
57
+ debug:
58
+ desc: null
59
+ value: []
60
+ deepspeed:
61
+ desc: null
62
+ value: null
63
+ disable_tqdm:
64
+ desc: null
65
+ value: false
66
+ do_eval:
67
+ desc: null
68
+ value: false
69
+ do_predict:
70
+ desc: null
71
+ value: false
72
+ do_train:
73
+ desc: null
74
+ value: false
75
+ dtype:
76
+ desc: null
77
+ value: float32
78
+ eval_accumulation_steps:
79
+ desc: null
80
+ value: null
81
+ eval_steps:
82
+ desc: null
83
+ value: 6000
84
+ evaluation_strategy:
85
+ desc: null
86
+ value: IntervalStrategy.NO
87
+ fp16:
88
+ desc: null
89
+ value: false
90
+ fp16_backend:
91
+ desc: null
92
+ value: auto
93
+ fp16_full_eval:
94
+ desc: null
95
+ value: false
96
+ fp16_opt_level:
97
+ desc: null
98
+ value: O1
99
+ gradient_accumulation_steps:
100
+ desc: null
101
+ value: 1
102
+ greater_is_better:
103
+ desc: null
104
+ value: null
105
+ group_by_length:
106
+ desc: null
107
+ value: false
108
+ ignore_data_skip:
109
+ desc: null
110
+ value: false
111
+ label_names:
112
+ desc: null
113
+ value: null
114
+ label_smoothing_factor:
115
+ desc: null
116
+ value: 0.0
117
+ learning_rate:
118
+ desc: null
119
+ value: 3.0e-05
120
+ length_column_name:
121
+ desc: null
122
+ value: length
123
+ line_by_line:
124
+ desc: null
125
+ value: false
126
+ load_best_model_at_end:
127
+ desc: null
128
+ value: false
129
+ local_rank:
130
+ desc: null
131
+ value: -1
132
+ log_level:
133
+ desc: null
134
+ value: -1
135
+ log_level_replica:
136
+ desc: null
137
+ value: -1
138
+ log_on_each_node:
139
+ desc: null
140
+ value: true
141
+ logging_dir:
142
+ desc: null
143
+ value: ./runs/Jul15_17-51-39_t1v-n-f5c06ea1-w-0
144
+ logging_first_step:
145
+ desc: null
146
+ value: false
147
+ logging_steps:
148
+ desc: null
149
+ value: 50
150
+ logging_strategy:
151
+ desc: null
152
+ value: IntervalStrategy.STEPS
153
+ lr_scheduler_type:
154
+ desc: null
155
+ value: SchedulerType.LINEAR
156
+ max_eval_samples:
157
+ desc: null
158
+ value: 4000
159
+ max_grad_norm:
160
+ desc: null
161
+ value: 1.0
162
+ max_seq_length:
163
+ desc: null
164
+ value: 4096
165
+ max_steps:
166
+ desc: null
167
+ value: -1
168
+ metric_for_best_model:
169
+ desc: null
170
+ value: null
171
+ mlm_probability:
172
+ desc: null
173
+ value: 0.15
174
+ model_name_or_path:
175
+ desc: null
176
+ value: null
177
+ model_type:
178
+ desc: null
179
+ value: big_bird
180
+ mp_parameters:
181
+ desc: null
182
+ value: ''
183
+ no_cuda:
184
+ desc: null
185
+ value: false
186
+ num_train_epochs:
187
+ desc: null
188
+ value: 2.0
189
+ output_dir:
190
+ desc: null
191
+ value: ./
192
+ overwrite_cache:
193
+ desc: null
194
+ value: false
195
+ overwrite_output_dir:
196
+ desc: null
197
+ value: true
198
+ pad_to_max_length:
199
+ desc: null
200
+ value: false
201
+ past_index:
202
+ desc: null
203
+ value: -1
204
+ per_device_eval_batch_size:
205
+ desc: null
206
+ value: 1
207
+ per_device_train_batch_size:
208
+ desc: null
209
+ value: 1
210
+ per_gpu_eval_batch_size:
211
+ desc: null
212
+ value: null
213
+ per_gpu_train_batch_size:
214
+ desc: null
215
+ value: null
216
+ prediction_loss_only:
217
+ desc: null
218
+ value: false
219
+ preprocessing_num_workers:
220
+ desc: null
221
+ value: 96
222
+ push_to_hub:
223
+ desc: null
224
+ value: true
225
+ push_to_hub_model_id:
226
+ desc: null
227
+ value: ''
228
+ push_to_hub_organization:
229
+ desc: null
230
+ value: null
231
+ push_to_hub_token:
232
+ desc: null
233
+ value: null
234
+ remove_unused_columns:
235
+ desc: null
236
+ value: true
237
+ report_to:
238
+ desc: null
239
+ value:
240
+ - tensorboard
241
+ - wandb
242
+ resume_from_checkpoint:
243
+ desc: null
244
+ value: ./
245
+ run_name:
246
+ desc: null
247
+ value: ./
248
+ save_on_each_node:
249
+ desc: null
250
+ value: false
251
+ save_steps:
252
+ desc: null
253
+ value: 15000
254
+ save_strategy:
255
+ desc: null
256
+ value: IntervalStrategy.STEPS
257
+ save_total_limit:
258
+ desc: null
259
+ value: 20
260
+ seed:
261
+ desc: null
262
+ value: 42
263
+ sharded_ddp:
264
+ desc: null
265
+ value: []
266
+ skip_memory_metrics:
267
+ desc: null
268
+ value: true
269
+ tokenizer_name:
270
+ desc: null
271
+ value: ./
272
+ tpu_metrics_debug:
273
+ desc: null
274
+ value: false
275
+ tpu_num_cores:
276
+ desc: null
277
+ value: null
278
+ train_ref_file:
279
+ desc: null
280
+ value: null
281
+ use_fast_tokenizer:
282
+ desc: null
283
+ value: true
284
+ use_legacy_prediction_loop:
285
+ desc: null
286
+ value: false
287
+ validation_ref_file:
288
+ desc: null
289
+ value: null
290
+ validation_split_percentage:
291
+ desc: null
292
+ value: 5
293
+ warmup_ratio:
294
+ desc: null
295
+ value: 0.0
296
+ warmup_steps:
297
+ desc: null
298
+ value: 10000
299
+ weight_decay:
300
+ desc: null
301
+ value: 0.0095
wandb/run-20210715_175147-3lygnexi/files/output.log ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ [17:52:01] - INFO - absl - Restoring checkpoint from ./checkpoint_10000
3
+ tcmalloc: large alloc 1530273792 bytes == 0x9b410000 @ 0x7f6acebf3680 0x7f6acec14824 0x5b9a14 0x50b2ae 0x50cb1b 0x5a6f17 0x5f3010 0x56fd36 0x568d9a 0x5f5b33 0x56aadf 0x568d9a 0x68cdc7 0x67e161 0x67e1df 0x67e281 0x67e627 0x6b6e62 0x6b71ed 0x7f6acea080b3 0x5f96de
4
+ /home/dat/pino/lib/python3.8/site-packages/jax/lib/xla_bridge.py:386: UserWarning: jax.host_count has been renamed to jax.process_count. This alias will eventually be removed; please update your code.
5
+ warnings.warn(
6
+ /home/dat/pino/lib/python3.8/site-packages/jax/lib/xla_bridge.py:373: UserWarning: jax.host_id has been renamed to jax.process_index. This alias will eventually be removed; please update your code.
7
+ warnings.warn(
8
+ Epoch ... (1/2): 0%| | 0/2 [00:00<?, ?it/s]
9
+
10
+
11
+
12
+
13
+
14
+
15
+
16
+ Training...: 28%|█████████████████▌ | 100059/352766 [04:35<64:36:13, 1.09it/s]
17
+
18
+
19
+
20
+
21
+
22
+ Training...: 28%|█████████████████▌ | 100113/352766 [04:55<31:00:36, 2.26it/s]
23
+
24
+
25
+
26
+
wandb/run-20210715_175147-3lygnexi/files/requirements.txt ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ absl-py==0.13.0
2
+ aiohttp==3.7.4.post0
3
+ astunparse==1.6.3
4
+ async-timeout==3.0.1
5
+ attrs==21.2.0
6
+ cachetools==4.2.2
7
+ certifi==2021.5.30
8
+ chardet==4.0.0
9
+ charset-normalizer==2.0.1
10
+ chex==0.0.8
11
+ click==8.0.1
12
+ configparser==5.0.2
13
+ cycler==0.10.0
14
+ datasets==1.9.1.dev0
15
+ dill==0.3.4
16
+ dm-tree==0.1.6
17
+ docker-pycreds==0.4.0
18
+ filelock==3.0.12
19
+ flatbuffers==1.12
20
+ flax==0.3.4
21
+ fsspec==2021.7.0
22
+ gast==0.4.0
23
+ gitdb==4.0.7
24
+ gitpython==3.1.18
25
+ google-auth-oauthlib==0.4.4
26
+ google-auth==1.32.1
27
+ google-pasta==0.2.0
28
+ grpcio==1.34.1
29
+ h5py==3.1.0
30
+ huggingface-hub==0.0.12
31
+ idna==3.2
32
+ install==1.3.4
33
+ jax==0.2.17
34
+ jaxlib==0.1.68
35
+ joblib==1.0.1
36
+ keras-nightly==2.5.0.dev2021032900
37
+ keras-preprocessing==1.1.2
38
+ kiwisolver==1.3.1
39
+ libtpu-nightly==0.1.dev20210615
40
+ markdown==3.3.4
41
+ matplotlib==3.4.2
42
+ msgpack==1.0.2
43
+ multidict==5.1.0
44
+ multiprocess==0.70.12.2
45
+ numpy==1.19.5
46
+ oauthlib==3.1.1
47
+ opt-einsum==3.3.0
48
+ optax==0.0.9
49
+ packaging==21.0
50
+ pandas==1.3.0
51
+ pathtools==0.1.2
52
+ pillow==8.3.1
53
+ pip==20.0.2
54
+ pkg-resources==0.0.0
55
+ promise==2.3
56
+ protobuf==3.17.3
57
+ psutil==5.8.0
58
+ pyarrow==4.0.1
59
+ pyasn1-modules==0.2.8
60
+ pyasn1==0.4.8
61
+ pyparsing==2.4.7
62
+ python-dateutil==2.8.1
63
+ pytz==2021.1
64
+ pyyaml==5.4.1
65
+ regex==2021.7.6
66
+ requests-oauthlib==1.3.0
67
+ requests==2.26.0
68
+ rsa==4.7.2
69
+ sacremoses==0.0.45
70
+ scipy==1.7.0
71
+ sentry-sdk==1.3.0
72
+ setuptools==44.0.0
73
+ shortuuid==1.0.1
74
+ six==1.15.0
75
+ smmap==4.0.0
76
+ subprocess32==3.5.4
77
+ tensorboard-data-server==0.6.1
78
+ tensorboard-plugin-wit==1.8.0
79
+ tensorboard==2.5.0
80
+ tensorflow-estimator==2.5.0
81
+ tensorflow==2.5.0
82
+ termcolor==1.1.0
83
+ tokenizers==0.10.3
84
+ toolz==0.11.1
85
+ tqdm==4.61.2
86
+ transformers==4.9.0.dev0
87
+ typing-extensions==3.7.4.3
88
+ urllib3==1.26.6
89
+ wandb==0.10.33
90
+ werkzeug==2.0.1
91
+ wheel==0.36.2
92
+ wrapt==1.12.1
93
+ xxhash==2.0.2
94
+ yarl==1.6.3
wandb/run-20210715_175147-3lygnexi/files/wandb-metadata.json ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-5.4.0-1043-gcp-x86_64-with-glibc2.29",
3
+ "python": "3.8.10",
4
+ "heartbeatAt": "2021-07-15T17:51:49.050579",
5
+ "startedAt": "2021-07-15T17:51:47.009391",
6
+ "docker": null,
7
+ "cpu_count": 96,
8
+ "cuda": null,
9
+ "args": [
10
+ "--push_to_hub",
11
+ "--output_dir=./",
12
+ "--model_type=big_bird",
13
+ "--config_name=./",
14
+ "--tokenizer_name=./",
15
+ "--max_seq_length=4096",
16
+ "--weight_decay=0.0095",
17
+ "--warmup_steps=10000",
18
+ "--overwrite_output_dir",
19
+ "--adam_beta1=0.9",
20
+ "--adam_beta2=0.98",
21
+ "--logging_steps=50",
22
+ "--eval_steps=6000",
23
+ "--num_train_epochs=2",
24
+ "--preprocessing_num_workers=96",
25
+ "--save_steps=15000",
26
+ "--learning_rate=3e-5",
27
+ "--per_device_train_batch_size=1",
28
+ "--per_device_eval_batch_size=1",
29
+ "--save_total_limit=20",
30
+ "--max_eval_samples=4000",
31
+ "--resume_from_checkpoint=./"
32
+ ],
33
+ "state": "running",
34
+ "program": "./run_mlm_flax_no_accum.py",
35
+ "codePath": "run_mlm_flax_no_accum.py",
36
+ "git": {
37
+ "remote": "https://huggingface.co/flax-community/pino-roberta-base",
38
+ "commit": "cc569aecf5e26454416d7a13c7876ad9111120cf"
39
+ },
40
+ "email": null,
41
+ "root": "/home/dat/pino-roberta-base",
42
+ "host": "t1v-n-f5c06ea1-w-0",
43
+ "username": "dat",
44
+ "executable": "/home/dat/pino/bin/python"
45
+ }
wandb/run-20210715_175147-3lygnexi/files/wandb-summary.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"training_step": 100, "learning_rate": 2.6113679268746637e-05, "train_loss": 2.3843164443969727, "_runtime": 635, "_timestamp": 1626372142, "_step": 1}
wandb/run-20210715_175147-3lygnexi/logs/debug-internal.log ADDED
@@ -0,0 +1,170 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2021-07-15 17:51:47,712 INFO MainThread:725632 [internal.py:wandb_internal():88] W&B internal server running at pid: 725632, started at: 2021-07-15 17:51:47.712407
2
+ 2021-07-15 17:51:47,714 DEBUG HandlerThread:725632 [handler.py:handle_request():124] handle_request: check_version
3
+ 2021-07-15 17:51:47,714 INFO WriterThread:725632 [datastore.py:open_for_write():80] open: /home/dat/pino-roberta-base/wandb/run-20210715_175147-3lygnexi/run-3lygnexi.wandb
4
+ 2021-07-15 17:51:47,715 DEBUG SenderThread:725632 [sender.py:send():179] send: header
5
+ 2021-07-15 17:51:47,716 DEBUG SenderThread:725632 [sender.py:send_request():193] send_request: check_version
6
+ 2021-07-15 17:51:47,753 DEBUG SenderThread:725632 [sender.py:send():179] send: run
7
+ 2021-07-15 17:51:47,932 INFO SenderThread:725632 [dir_watcher.py:__init__():168] watching files in: /home/dat/pino-roberta-base/wandb/run-20210715_175147-3lygnexi/files
8
+ 2021-07-15 17:51:47,933 INFO SenderThread:725632 [sender.py:_start_run_threads():716] run started: 3lygnexi with start time 1626371507
9
+ 2021-07-15 17:51:47,933 DEBUG SenderThread:725632 [sender.py:send():179] send: summary
10
+ 2021-07-15 17:51:47,933 INFO SenderThread:725632 [sender.py:_save_file():841] saving file wandb-summary.json with policy end
11
+ 2021-07-15 17:51:47,933 DEBUG HandlerThread:725632 [handler.py:handle_request():124] handle_request: run_start
12
+ 2021-07-15 17:51:48,936 INFO Thread-8 :725632 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210715_175147-3lygnexi/files/wandb-summary.json
13
+ 2021-07-15 17:51:49,050 DEBUG HandlerThread:725632 [meta.py:__init__():39] meta init
14
+ 2021-07-15 17:51:49,050 DEBUG HandlerThread:725632 [meta.py:__init__():53] meta init done
15
+ 2021-07-15 17:51:49,050 DEBUG HandlerThread:725632 [meta.py:probe():210] probe
16
+ 2021-07-15 17:51:49,051 DEBUG HandlerThread:725632 [meta.py:_setup_git():200] setup git
17
+ 2021-07-15 17:51:49,081 DEBUG HandlerThread:725632 [meta.py:_setup_git():207] setup git done
18
+ 2021-07-15 17:51:49,081 DEBUG HandlerThread:725632 [meta.py:_save_pip():57] save pip
19
+ 2021-07-15 17:51:49,082 DEBUG HandlerThread:725632 [meta.py:_save_pip():71] save pip done
20
+ 2021-07-15 17:51:49,082 DEBUG HandlerThread:725632 [meta.py:probe():252] probe done
21
+ 2021-07-15 17:51:49,085 DEBUG SenderThread:725632 [sender.py:send():179] send: files
22
+ 2021-07-15 17:51:49,085 INFO SenderThread:725632 [sender.py:_save_file():841] saving file wandb-metadata.json with policy now
23
+ 2021-07-15 17:51:49,091 DEBUG HandlerThread:725632 [handler.py:handle_request():124] handle_request: stop_status
24
+ 2021-07-15 17:51:49,092 DEBUG SenderThread:725632 [sender.py:send_request():193] send_request: stop_status
25
+ 2021-07-15 17:51:49,220 DEBUG SenderThread:725632 [sender.py:send():179] send: config
26
+ 2021-07-15 17:51:49,221 DEBUG SenderThread:725632 [sender.py:send():179] send: config
27
+ 2021-07-15 17:51:49,221 DEBUG SenderThread:725632 [sender.py:send():179] send: config
28
+ 2021-07-15 17:51:49,586 INFO Thread-11 :725632 [upload_job.py:push():137] Uploaded file /tmp/tmprw9uts3owandb/626w6hbr-wandb-metadata.json
29
+ 2021-07-15 17:51:49,934 INFO Thread-8 :725632 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210715_175147-3lygnexi/files/output.log
30
+ 2021-07-15 17:51:49,935 INFO Thread-8 :725632 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210715_175147-3lygnexi/files/requirements.txt
31
+ 2021-07-15 17:51:49,935 INFO Thread-8 :725632 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210715_175147-3lygnexi/files/wandb-metadata.json
32
+ 2021-07-15 17:52:04,352 DEBUG HandlerThread:725632 [handler.py:handle_request():124] handle_request: stop_status
33
+ 2021-07-15 17:52:04,352 DEBUG SenderThread:725632 [sender.py:send_request():193] send_request: stop_status
34
+ 2021-07-15 17:52:04,940 INFO Thread-8 :725632 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_175147-3lygnexi/files/output.log
35
+ 2021-07-15 17:52:06,941 INFO Thread-8 :725632 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_175147-3lygnexi/files/output.log
36
+ 2021-07-15 17:52:17,133 DEBUG SenderThread:725632 [sender.py:send():179] send: stats
37
+ 2021-07-15 17:52:18,946 INFO Thread-8 :725632 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_175147-3lygnexi/files/config.yaml
38
+ 2021-07-15 17:52:19,500 DEBUG HandlerThread:725632 [handler.py:handle_request():124] handle_request: stop_status
39
+ 2021-07-15 17:52:19,500 DEBUG SenderThread:725632 [sender.py:send_request():193] send_request: stop_status
40
+ 2021-07-15 17:52:34,650 DEBUG HandlerThread:725632 [handler.py:handle_request():124] handle_request: stop_status
41
+ 2021-07-15 17:52:34,651 DEBUG SenderThread:725632 [sender.py:send_request():193] send_request: stop_status
42
+ 2021-07-15 17:52:47,211 DEBUG SenderThread:725632 [sender.py:send():179] send: stats
43
+ 2021-07-15 17:52:49,782 DEBUG HandlerThread:725632 [handler.py:handle_request():124] handle_request: stop_status
44
+ 2021-07-15 17:52:49,782 DEBUG SenderThread:725632 [sender.py:send_request():193] send_request: stop_status
45
+ 2021-07-15 17:53:04,916 DEBUG HandlerThread:725632 [handler.py:handle_request():124] handle_request: stop_status
46
+ 2021-07-15 17:53:04,916 DEBUG SenderThread:725632 [sender.py:send_request():193] send_request: stop_status
47
+ 2021-07-15 17:53:17,287 DEBUG SenderThread:725632 [sender.py:send():179] send: stats
48
+ 2021-07-15 17:53:20,049 DEBUG HandlerThread:725632 [handler.py:handle_request():124] handle_request: stop_status
49
+ 2021-07-15 17:53:20,049 DEBUG SenderThread:725632 [sender.py:send_request():193] send_request: stop_status
50
+ 2021-07-15 17:53:35,180 DEBUG HandlerThread:725632 [handler.py:handle_request():124] handle_request: stop_status
51
+ 2021-07-15 17:53:35,180 DEBUG SenderThread:725632 [sender.py:send_request():193] send_request: stop_status
52
+ 2021-07-15 17:53:47,359 DEBUG SenderThread:725632 [sender.py:send():179] send: stats
53
+ 2021-07-15 17:53:50,321 DEBUG HandlerThread:725632 [handler.py:handle_request():124] handle_request: stop_status
54
+ 2021-07-15 17:53:50,322 DEBUG SenderThread:725632 [sender.py:send_request():193] send_request: stop_status
55
+ 2021-07-15 17:54:05,456 DEBUG HandlerThread:725632 [handler.py:handle_request():124] handle_request: stop_status
56
+ 2021-07-15 17:54:05,456 DEBUG SenderThread:725632 [sender.py:send_request():193] send_request: stop_status
57
+ 2021-07-15 17:54:17,434 DEBUG SenderThread:725632 [sender.py:send():179] send: stats
58
+ 2021-07-15 17:54:20,594 DEBUG HandlerThread:725632 [handler.py:handle_request():124] handle_request: stop_status
59
+ 2021-07-15 17:54:20,595 DEBUG SenderThread:725632 [sender.py:send_request():193] send_request: stop_status
60
+ 2021-07-15 17:54:35,726 DEBUG HandlerThread:725632 [handler.py:handle_request():124] handle_request: stop_status
61
+ 2021-07-15 17:54:35,726 DEBUG SenderThread:725632 [sender.py:send_request():193] send_request: stop_status
62
+ 2021-07-15 17:54:47,513 DEBUG SenderThread:725632 [sender.py:send():179] send: stats
63
+ 2021-07-15 17:54:50,859 DEBUG HandlerThread:725632 [handler.py:handle_request():124] handle_request: stop_status
64
+ 2021-07-15 17:54:50,859 DEBUG SenderThread:725632 [sender.py:send_request():193] send_request: stop_status
65
+ 2021-07-15 17:55:05,995 DEBUG HandlerThread:725632 [handler.py:handle_request():124] handle_request: stop_status
66
+ 2021-07-15 17:55:05,995 DEBUG SenderThread:725632 [sender.py:send_request():193] send_request: stop_status
67
+ 2021-07-15 17:55:17,578 DEBUG SenderThread:725632 [sender.py:send():179] send: stats
68
+ 2021-07-15 17:55:21,130 DEBUG HandlerThread:725632 [handler.py:handle_request():124] handle_request: stop_status
69
+ 2021-07-15 17:55:21,131 DEBUG SenderThread:725632 [sender.py:send_request():193] send_request: stop_status
70
+ 2021-07-15 17:55:36,263 DEBUG HandlerThread:725632 [handler.py:handle_request():124] handle_request: stop_status
71
+ 2021-07-15 17:55:36,263 DEBUG SenderThread:725632 [sender.py:send_request():193] send_request: stop_status
72
+ 2021-07-15 17:55:47,636 DEBUG SenderThread:725632 [sender.py:send():179] send: stats
73
+ 2021-07-15 17:55:51,395 DEBUG HandlerThread:725632 [handler.py:handle_request():124] handle_request: stop_status
74
+ 2021-07-15 17:55:51,396 DEBUG SenderThread:725632 [sender.py:send_request():193] send_request: stop_status
75
+ 2021-07-15 17:56:06,527 DEBUG HandlerThread:725632 [handler.py:handle_request():124] handle_request: stop_status
76
+ 2021-07-15 17:56:06,528 DEBUG SenderThread:725632 [sender.py:send_request():193] send_request: stop_status
77
+ 2021-07-15 17:56:17,705 DEBUG SenderThread:725632 [sender.py:send():179] send: stats
78
+ 2021-07-15 17:56:21,658 DEBUG HandlerThread:725632 [handler.py:handle_request():124] handle_request: stop_status
79
+ 2021-07-15 17:56:21,658 DEBUG SenderThread:725632 [sender.py:send_request():193] send_request: stop_status
80
+ 2021-07-15 17:56:36,787 DEBUG HandlerThread:725632 [handler.py:handle_request():124] handle_request: stop_status
81
+ 2021-07-15 17:56:36,787 DEBUG SenderThread:725632 [sender.py:send_request():193] send_request: stop_status
82
+ 2021-07-15 17:56:47,777 DEBUG SenderThread:725632 [sender.py:send():179] send: stats
83
+ 2021-07-15 17:56:51,920 DEBUG HandlerThread:725632 [handler.py:handle_request():124] handle_request: stop_status
84
+ 2021-07-15 17:56:51,920 DEBUG SenderThread:725632 [sender.py:send_request():193] send_request: stop_status
85
+ 2021-07-15 17:57:07,051 DEBUG HandlerThread:725632 [handler.py:handle_request():124] handle_request: stop_status
86
+ 2021-07-15 17:57:07,051 DEBUG SenderThread:725632 [sender.py:send_request():193] send_request: stop_status
87
+ 2021-07-15 17:57:17,846 DEBUG SenderThread:725632 [sender.py:send():179] send: stats
88
+ 2021-07-15 17:57:22,182 DEBUG HandlerThread:725632 [handler.py:handle_request():124] handle_request: stop_status
89
+ 2021-07-15 17:57:22,182 DEBUG SenderThread:725632 [sender.py:send_request():193] send_request: stop_status
90
+ 2021-07-15 17:57:29,057 INFO Thread-8 :725632 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_175147-3lygnexi/files/output.log
91
+ 2021-07-15 17:57:37,329 DEBUG HandlerThread:725632 [handler.py:handle_request():124] handle_request: stop_status
92
+ 2021-07-15 17:57:37,330 DEBUG SenderThread:725632 [sender.py:send_request():193] send_request: stop_status
93
+ 2021-07-15 17:57:47,918 DEBUG SenderThread:725632 [sender.py:send():179] send: stats
94
+ 2021-07-15 17:57:52,478 DEBUG HandlerThread:725632 [handler.py:handle_request():124] handle_request: stop_status
95
+ 2021-07-15 17:57:52,478 DEBUG SenderThread:725632 [sender.py:send_request():193] send_request: stop_status
96
+ 2021-07-15 17:58:07,606 DEBUG HandlerThread:725632 [handler.py:handle_request():124] handle_request: stop_status
97
+ 2021-07-15 17:58:07,607 DEBUG SenderThread:725632 [sender.py:send_request():193] send_request: stop_status
98
+ 2021-07-15 17:58:17,984 DEBUG SenderThread:725632 [sender.py:send():179] send: stats
99
+ 2021-07-15 17:58:22,739 DEBUG HandlerThread:725632 [handler.py:handle_request():124] handle_request: stop_status
100
+ 2021-07-15 17:58:22,739 DEBUG SenderThread:725632 [sender.py:send_request():193] send_request: stop_status
101
+ 2021-07-15 17:58:37,872 DEBUG HandlerThread:725632 [handler.py:handle_request():124] handle_request: stop_status
102
+ 2021-07-15 17:58:37,872 DEBUG SenderThread:725632 [sender.py:send_request():193] send_request: stop_status
103
+ 2021-07-15 17:58:48,054 DEBUG SenderThread:725632 [sender.py:send():179] send: stats
104
+ 2021-07-15 17:58:53,004 DEBUG HandlerThread:725632 [handler.py:handle_request():124] handle_request: stop_status
105
+ 2021-07-15 17:58:53,005 DEBUG SenderThread:725632 [sender.py:send_request():193] send_request: stop_status
106
+ 2021-07-15 17:59:08,136 DEBUG HandlerThread:725632 [handler.py:handle_request():124] handle_request: stop_status
107
+ 2021-07-15 17:59:08,136 DEBUG SenderThread:725632 [sender.py:send_request():193] send_request: stop_status
108
+ 2021-07-15 17:59:18,132 DEBUG SenderThread:725632 [sender.py:send():179] send: stats
109
+ 2021-07-15 17:59:23,266 DEBUG HandlerThread:725632 [handler.py:handle_request():124] handle_request: stop_status
110
+ 2021-07-15 17:59:23,266 DEBUG SenderThread:725632 [sender.py:send_request():193] send_request: stop_status
111
+ 2021-07-15 17:59:35,104 INFO Thread-8 :725632 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_175147-3lygnexi/files/output.log
112
+ 2021-07-15 17:59:38,608 DEBUG HandlerThread:725632 [handler.py:handle_request():124] handle_request: stop_status
113
+ 2021-07-15 17:59:38,608 DEBUG SenderThread:725632 [sender.py:send_request():193] send_request: stop_status
114
+ 2021-07-15 17:59:48,205 DEBUG SenderThread:725632 [sender.py:send():179] send: stats
115
+ 2021-07-15 17:59:53,754 DEBUG HandlerThread:725632 [handler.py:handle_request():124] handle_request: stop_status
116
+ 2021-07-15 17:59:53,755 DEBUG SenderThread:725632 [sender.py:send_request():193] send_request: stop_status
117
+ 2021-07-15 18:00:08,886 DEBUG HandlerThread:725632 [handler.py:handle_request():124] handle_request: stop_status
118
+ 2021-07-15 18:00:08,887 DEBUG SenderThread:725632 [sender.py:send_request():193] send_request: stop_status
119
+ 2021-07-15 18:00:18,277 DEBUG SenderThread:725632 [sender.py:send():179] send: stats
120
+ 2021-07-15 18:00:24,017 DEBUG HandlerThread:725632 [handler.py:handle_request():124] handle_request: stop_status
121
+ 2021-07-15 18:00:24,018 DEBUG SenderThread:725632 [sender.py:send_request():193] send_request: stop_status
122
+ 2021-07-15 18:00:39,149 DEBUG HandlerThread:725632 [handler.py:handle_request():124] handle_request: stop_status
123
+ 2021-07-15 18:00:39,149 DEBUG SenderThread:725632 [sender.py:send_request():193] send_request: stop_status
124
+ 2021-07-15 18:00:48,356 DEBUG SenderThread:725632 [sender.py:send():179] send: stats
125
+ 2021-07-15 18:00:54,282 DEBUG HandlerThread:725632 [handler.py:handle_request():124] handle_request: stop_status
126
+ 2021-07-15 18:00:54,283 DEBUG SenderThread:725632 [sender.py:send_request():193] send_request: stop_status
127
+ 2021-07-15 18:01:09,424 DEBUG HandlerThread:725632 [handler.py:handle_request():124] handle_request: stop_status
128
+ 2021-07-15 18:01:09,424 DEBUG SenderThread:725632 [sender.py:send_request():193] send_request: stop_status
129
+ 2021-07-15 18:01:18,438 DEBUG SenderThread:725632 [sender.py:send():179] send: stats
130
+ 2021-07-15 18:01:24,555 DEBUG HandlerThread:725632 [handler.py:handle_request():124] handle_request: stop_status
131
+ 2021-07-15 18:01:24,555 DEBUG SenderThread:725632 [sender.py:send_request():193] send_request: stop_status
132
+ 2021-07-15 18:01:39,687 DEBUG HandlerThread:725632 [handler.py:handle_request():124] handle_request: stop_status
133
+ 2021-07-15 18:01:39,688 DEBUG SenderThread:725632 [sender.py:send_request():193] send_request: stop_status
134
+ 2021-07-15 18:01:44,155 INFO Thread-8 :725632 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_175147-3lygnexi/files/output.log
135
+ 2021-07-15 18:01:46,156 INFO Thread-8 :725632 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_175147-3lygnexi/files/output.log
136
+ 2021-07-15 18:01:48,157 INFO Thread-8 :725632 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_175147-3lygnexi/files/output.log
137
+ 2021-07-15 18:01:48,515 DEBUG SenderThread:725632 [sender.py:send():179] send: stats
138
+ 2021-07-15 18:01:50,158 INFO Thread-8 :725632 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_175147-3lygnexi/files/output.log
139
+ 2021-07-15 18:01:52,159 INFO Thread-8 :725632 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_175147-3lygnexi/files/output.log
140
+ 2021-07-15 18:01:54,817 DEBUG HandlerThread:725632 [handler.py:handle_request():124] handle_request: stop_status
141
+ 2021-07-15 18:01:54,818 DEBUG SenderThread:725632 [sender.py:send_request():193] send_request: stop_status
142
+ 2021-07-15 18:02:02,507 DEBUG SenderThread:725632 [sender.py:send():179] send: history
143
+ 2021-07-15 18:02:02,507 DEBUG SenderThread:725632 [sender.py:send():179] send: summary
144
+ 2021-07-15 18:02:02,507 INFO SenderThread:725632 [sender.py:_save_file():841] saving file wandb-summary.json with policy end
145
+ 2021-07-15 18:02:03,163 INFO Thread-8 :725632 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_175147-3lygnexi/files/wandb-summary.json
146
+ 2021-07-15 18:02:04,163 INFO Thread-8 :725632 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_175147-3lygnexi/files/output.log
147
+ 2021-07-15 18:02:05,164 INFO Thread-8 :725632 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_175147-3lygnexi/files/output.log
148
+ 2021-07-15 18:02:06,164 INFO Thread-8 :725632 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_175147-3lygnexi/files/output.log
149
+ 2021-07-15 18:02:08,165 INFO Thread-8 :725632 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_175147-3lygnexi/files/output.log
150
+ 2021-07-15 18:02:09,950 DEBUG HandlerThread:725632 [handler.py:handle_request():124] handle_request: stop_status
151
+ 2021-07-15 18:02:09,950 DEBUG SenderThread:725632 [sender.py:send_request():193] send_request: stop_status
152
+ 2021-07-15 18:02:10,166 INFO Thread-8 :725632 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_175147-3lygnexi/files/output.log
153
+ 2021-07-15 18:02:12,167 INFO Thread-8 :725632 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_175147-3lygnexi/files/output.log
154
+ 2021-07-15 18:02:18,592 DEBUG SenderThread:725632 [sender.py:send():179] send: stats
155
+ 2021-07-15 18:02:22,541 DEBUG SenderThread:725632 [sender.py:send():179] send: history
156
+ 2021-07-15 18:02:22,542 DEBUG SenderThread:725632 [sender.py:send():179] send: summary
157
+ 2021-07-15 18:02:22,542 INFO SenderThread:725632 [sender.py:_save_file():841] saving file wandb-summary.json with policy end
158
+ 2021-07-15 18:02:23,171 INFO Thread-8 :725632 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_175147-3lygnexi/files/wandb-summary.json
159
+ 2021-07-15 18:02:24,172 INFO Thread-8 :725632 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_175147-3lygnexi/files/output.log
160
+ 2021-07-15 18:02:25,082 DEBUG HandlerThread:725632 [handler.py:handle_request():124] handle_request: stop_status
161
+ 2021-07-15 18:02:25,082 DEBUG SenderThread:725632 [sender.py:send_request():193] send_request: stop_status
162
+ 2021-07-15 18:02:25,172 INFO Thread-8 :725632 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_175147-3lygnexi/files/output.log
163
+ 2021-07-15 18:02:26,173 INFO Thread-8 :725632 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_175147-3lygnexi/files/output.log
164
+ 2021-07-15 18:02:28,174 INFO Thread-8 :725632 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_175147-3lygnexi/files/output.log
165
+ 2021-07-15 18:02:30,174 INFO Thread-8 :725632 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_175147-3lygnexi/files/output.log
166
+ 2021-07-15 18:02:32,175 INFO Thread-8 :725632 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_175147-3lygnexi/files/output.log
167
+ 2021-07-15 18:02:33,395 WARNING MainThread:725632 [internal.py:wandb_internal():147] Internal process interrupt: 1
168
+ 2021-07-15 18:02:33,592 WARNING MainThread:725632 [internal.py:wandb_internal():147] Internal process interrupt: 2
169
+ 2021-07-15 18:02:33,592 ERROR MainThread:725632 [internal.py:wandb_internal():150] Internal process interrupted.
170
+ 2021-07-15 18:02:34,020 INFO MainThread:725632 [internal.py:handle_exit():78] Internal process exited