diff --git a/config.json b/config.json index d983e12af73121af5a17c4c69ffb6ece637889a7..849f38f31170912b81fb7116a08702ee35828544 100644 --- a/config.json +++ b/config.json @@ -4,7 +4,7 @@ ], "attention_probs_dropout_prob": 0.1, "attention_type": "block_sparse", - "block_size": 64, + "block_size": 128, "bos_token_id": 1, "eos_token_id": 2, "gradient_checkpointing": false, diff --git a/events.out.tfevents.1626304306.t1v-n-f5c06ea1-w-0.602807.3.v2 b/events.out.tfevents.1626304306.t1v-n-f5c06ea1-w-0.602807.3.v2 new file mode 100644 index 0000000000000000000000000000000000000000..7d020239e11dad58ce49a288618c29ff4ed8a93c --- /dev/null +++ b/events.out.tfevents.1626304306.t1v-n-f5c06ea1-w-0.602807.3.v2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5af1d6e48b81f0b9d7a35be9c91ffce03eee366544729e50b2f6158dda353896 +size 40 diff --git a/events.out.tfevents.1626305223.t1v-n-f5c06ea1-w-0.605532.3.v2 b/events.out.tfevents.1626305223.t1v-n-f5c06ea1-w-0.605532.3.v2 new file mode 100644 index 0000000000000000000000000000000000000000..1ad778e0a3e5a774399e232b30fe370d537ab277 --- /dev/null +++ b/events.out.tfevents.1626305223.t1v-n-f5c06ea1-w-0.605532.3.v2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50736e35dba095f7e44007a17a7f2702b81215a45256c8a92304137fb1352920 +size 40 diff --git a/events.out.tfevents.1626306374.t1v-n-f5c06ea1-w-0.608526.3.v2 b/events.out.tfevents.1626306374.t1v-n-f5c06ea1-w-0.608526.3.v2 new file mode 100644 index 0000000000000000000000000000000000000000..511f9f26114d50ae7ac9c4cfaf8a451e2bb5099b --- /dev/null +++ b/events.out.tfevents.1626306374.t1v-n-f5c06ea1-w-0.608526.3.v2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c754bf3ea8fba0ce37ab52a99fc32dbd0370af24af9c99ff82c5d66f750ccf9 +size 40 diff --git a/events.out.tfevents.1626306537.t1v-n-f5c06ea1-w-0.610166.3.v2 b/events.out.tfevents.1626306537.t1v-n-f5c06ea1-w-0.610166.3.v2 new file mode 100644 index 0000000000000000000000000000000000000000..2cdbd054814d2a76a1cbaa9991df399157a31cee --- /dev/null +++ b/events.out.tfevents.1626306537.t1v-n-f5c06ea1-w-0.610166.3.v2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a0c43d6c44047fae8000715c934e7f0ecbdcde1074ad33da1da321c3b27e0fa +size 40 diff --git a/events.out.tfevents.1626306954.t1v-n-f5c06ea1-w-0.612049.3.v2 b/events.out.tfevents.1626306954.t1v-n-f5c06ea1-w-0.612049.3.v2 new file mode 100644 index 0000000000000000000000000000000000000000..fb82f27239c8e24b29010d6e437fe624bc438785 --- /dev/null +++ b/events.out.tfevents.1626306954.t1v-n-f5c06ea1-w-0.612049.3.v2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:399e413d5125b81deb4821e6cf2d48594b856250dc9a25b5b5a640e53b05dff1 +size 40 diff --git a/events.out.tfevents.1626307676.t1v-n-f5c06ea1-w-0.614342.3.v2 b/events.out.tfevents.1626307676.t1v-n-f5c06ea1-w-0.614342.3.v2 new file mode 100644 index 0000000000000000000000000000000000000000..3682623d147022de829c9acacbd9279e909359ed --- /dev/null +++ b/events.out.tfevents.1626307676.t1v-n-f5c06ea1-w-0.614342.3.v2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14bf286b75218519d67d5dfc8dd5186890bde2fc12fad6c71348d0dc35a5c0d8 +size 40 diff --git a/events.out.tfevents.1626308255.t1v-n-f5c06ea1-w-0.616592.3.v2 b/events.out.tfevents.1626308255.t1v-n-f5c06ea1-w-0.616592.3.v2 new file mode 100644 index 0000000000000000000000000000000000000000..603ab140d1d6eb4114ccd6bca72547fd915d11bb --- /dev/null +++ b/events.out.tfevents.1626308255.t1v-n-f5c06ea1-w-0.616592.3.v2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eba6714d53d12785a3f81e899889244c67dcbc04cb9fad8562008dcf8302a212 +size 40 diff --git a/events.out.tfevents.1626308889.t1v-n-f5c06ea1-w-0.618785.3.v2 b/events.out.tfevents.1626308889.t1v-n-f5c06ea1-w-0.618785.3.v2 new file mode 100644 index 0000000000000000000000000000000000000000..45c0d505dfd63917f8199e5895e3abd3593405b3 --- /dev/null +++ b/events.out.tfevents.1626308889.t1v-n-f5c06ea1-w-0.618785.3.v2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3b826e6696483c676eb0da8b48cd3811887b854b324027f4c4f58af92933d17 +size 40 diff --git a/events.out.tfevents.1626309457.t1v-n-f5c06ea1-w-0.620917.3.v2 b/events.out.tfevents.1626309457.t1v-n-f5c06ea1-w-0.620917.3.v2 new file mode 100644 index 0000000000000000000000000000000000000000..16792eeec4cbc79eede77d5021d81716c4fa0802 --- /dev/null +++ b/events.out.tfevents.1626309457.t1v-n-f5c06ea1-w-0.620917.3.v2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d144d24c7f4699737d4dcd0b3619027b85af5478c55be0142c8ba091253568fc +size 40 diff --git a/events.out.tfevents.1626310347.t1v-n-f5c06ea1-w-0.623339.3.v2 b/events.out.tfevents.1626310347.t1v-n-f5c06ea1-w-0.623339.3.v2 new file mode 100644 index 0000000000000000000000000000000000000000..c6f2ac3fb0515e5b09f8091e03bc3983e0bebd12 --- /dev/null +++ b/events.out.tfevents.1626310347.t1v-n-f5c06ea1-w-0.623339.3.v2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83fbcbad2d35506b956452a251f6ed447335e90407b067c149ba3127b303632a +size 40 diff --git a/events.out.tfevents.1626310837.t1v-n-f5c06ea1-w-0.625421.3.v2 b/events.out.tfevents.1626310837.t1v-n-f5c06ea1-w-0.625421.3.v2 new file mode 100644 index 0000000000000000000000000000000000000000..298d1118afb2c2bc25d008f07a093310de281ee5 --- /dev/null +++ b/events.out.tfevents.1626310837.t1v-n-f5c06ea1-w-0.625421.3.v2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b99cff29b8871cd8db3f8fa198969b985fea2ffc47e79089d3d725ef9f29080 +size 40 diff --git a/events.out.tfevents.1626311317.t1v-n-f5c06ea1-w-0.626982.3.v2 b/events.out.tfevents.1626311317.t1v-n-f5c06ea1-w-0.626982.3.v2 new file mode 100644 index 0000000000000000000000000000000000000000..d413267ecdb3d3884751f7276a401c98387b225b --- /dev/null +++ b/events.out.tfevents.1626311317.t1v-n-f5c06ea1-w-0.626982.3.v2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f09f06cc74a2b6e25df6452b382ba19720ee66dbaf384048c15058d496280a8 +size 40 diff --git a/events.out.tfevents.1626311757.t1v-n-f5c06ea1-w-0.628566.3.v2 b/events.out.tfevents.1626311757.t1v-n-f5c06ea1-w-0.628566.3.v2 new file mode 100644 index 0000000000000000000000000000000000000000..93d4416e39b07f8bb9b17da9f72a353993e8541b --- /dev/null +++ b/events.out.tfevents.1626311757.t1v-n-f5c06ea1-w-0.628566.3.v2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7465c4dbbaee983519373fb924605fb6aacc585352185e9d2950814da4100c3b +size 7499 diff --git a/events.out.tfevents.1626312025.t1v-n-f5c06ea1-w-0.630273.3.v2 b/events.out.tfevents.1626312025.t1v-n-f5c06ea1-w-0.630273.3.v2 new file mode 100644 index 0000000000000000000000000000000000000000..d0a7ddcfa1de92632d955ea9c32a40ce313cfa60 --- /dev/null +++ b/events.out.tfevents.1626312025.t1v-n-f5c06ea1-w-0.630273.3.v2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:635dcd8f0941e3cc687e76b31e5803cfcf80f17f9d50615fe8e93c2a0768e08c +size 40 diff --git a/events.out.tfevents.1626312342.t1v-n-f5c06ea1-w-0.631837.3.v2 b/events.out.tfevents.1626312342.t1v-n-f5c06ea1-w-0.631837.3.v2 new file mode 100644 index 0000000000000000000000000000000000000000..faf50128d18b027b62e99a8b1dc11d1e25043b6e --- /dev/null +++ b/events.out.tfevents.1626312342.t1v-n-f5c06ea1-w-0.631837.3.v2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a47b8e795ffa4867c8acf33eb5022d1bdaad588280c75b3bdebf47dda704101a +size 40 diff --git a/events.out.tfevents.1626312869.t1v-n-f5c06ea1-w-0.634228.3.v2 b/events.out.tfevents.1626312869.t1v-n-f5c06ea1-w-0.634228.3.v2 new file mode 100644 index 0000000000000000000000000000000000000000..22ae5dd6ca7989980fbd39cf50ff56da0763aae1 --- /dev/null +++ b/events.out.tfevents.1626312869.t1v-n-f5c06ea1-w-0.634228.3.v2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8228686009da130e2b1ae2918f1b8afc864f280b17fc48e479c5342dc466bec2 +size 40 diff --git a/events.out.tfevents.1626312958.t1v-n-f5c06ea1-w-0.635913.3.v2 b/events.out.tfevents.1626312958.t1v-n-f5c06ea1-w-0.635913.3.v2 new file mode 100644 index 0000000000000000000000000000000000000000..e2e83574920d58fc27e201d42cda3714364d62af --- /dev/null +++ b/events.out.tfevents.1626312958.t1v-n-f5c06ea1-w-0.635913.3.v2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c7381fc3fe2646dc9fa2397819b0db0d19f859ddbd6061a67f237b2e1dd61e5 +size 40 diff --git a/events.out.tfevents.1626313509.t1v-n-f5c06ea1-w-0.638079.3.v2 b/events.out.tfevents.1626313509.t1v-n-f5c06ea1-w-0.638079.3.v2 new file mode 100644 index 0000000000000000000000000000000000000000..cc19590af17adf56d41ac2fad198aea4d06c1c22 --- /dev/null +++ b/events.out.tfevents.1626313509.t1v-n-f5c06ea1-w-0.638079.3.v2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c70c504b2ddc63c6d6ffb5846f09dd37ed05743155ecae6469ce433583aca0bc +size 40 diff --git a/events.out.tfevents.1626314417.t1v-n-f5c06ea1-w-0.640692.3.v2 b/events.out.tfevents.1626314417.t1v-n-f5c06ea1-w-0.640692.3.v2 new file mode 100644 index 0000000000000000000000000000000000000000..2a2250d7f832a39816a21f1fbf31ee3b5f484131 --- /dev/null +++ b/events.out.tfevents.1626314417.t1v-n-f5c06ea1-w-0.640692.3.v2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a907482f1b09a2b5e4e62b66f168db6d65bc1c98b9a140174cfe918b8039be5e +size 40 diff --git a/run.sh b/run.sh index cc2e94eb7c8ff2c871cb3366b24e0dbf926ef228..26797e2cf9f7578e1509f9d9843cd4cdd85035c8 100644 --- a/run.sh +++ b/run.sh @@ -2,7 +2,7 @@ #export TOKENIZERS_PARALLELISM=0 -python ./run_mlm_flax.py \ +python ./run_mlm_flax_no_accum.py \ --push_to_hub \ --output_dir="./" \ --model_type="big_bird" \ @@ -14,18 +14,17 @@ python ./run_mlm_flax.py \ --overwrite_output_dir \ --adam_beta1="0.9" \ --adam_beta2="0.98" \ - --logging_steps="500" \ + --logging_steps="50" \ --eval_steps="20000" \ --num_train_epochs="5" \ --preprocessing_num_workers="96" \ --save_steps="20000" \ - --learning_rate="5e-5" \ - --per_device_train_batch_size="2" \ - --per_device_eval_batch_size="2" \ + --learning_rate="3e-5" \ + --per_device_train_batch_size="1" \ + --per_device_eval_batch_size="1" \ --save_total_limit="5"\ - --max_eval_samples="2000"\ - --overwrite_cache False \ - --gradient_accumulation_steps="8" \ + --max_eval_samples="500"\ + #--gradient_accumulation_steps="4"\ #--resume_from_checkpoint="./"\ #--adafactor \ #--dtype="bfloat16" \ diff --git a/run_mlm_flax.py b/run_mlm_flax.py index f96f248e40bb6d31de97c8fbc499c44c735f54a9..ceaf181a53afad3a2325dd232c6df0a03d8cbc82 100644 --- a/run_mlm_flax.py +++ b/run_mlm_flax.py @@ -525,10 +525,10 @@ if __name__ == "__main__": if load_grouped: logger.info("Loading tokenized and grouped dataset") tokenized_datasets = DatasetDict.load_from_disk("/data/tokenized_data") - logger.info("Setting max validation examples to ") - print(f"Number of validation examples {data_args.max_eval_samples}") - #tokenized_datasets["train"]= tokenized_datasets["train"].select(range(20000)) + tokenized_datasets["train"]= tokenized_datasets["train"].select(range(int(0.3*len(tokenized_datasets["train"])))) if data_args.max_eval_samples is not None: + logger.info("Setting max validation examples to ") + print(f"Number of validation examples {data_args.max_eval_samples}") tokenized_datasets["validation"] = tokenized_datasets["validation"].select(range(data_args.max_eval_samples)) else: if training_args.do_train: diff --git a/run_mlm_flax_no_accum.py b/run_mlm_flax_no_accum.py index 7f3ef2d339f666eb4ab4d937ccc796082905ac92..95004f842878b3c5b3452face9c836bd299cd6fc 100644 --- a/run_mlm_flax_no_accum.py +++ b/run_mlm_flax_no_accum.py @@ -25,7 +25,7 @@ import os import sys import time from dataclasses import dataclass, field - +from optax import clip_by_global_norm # You can also adapt this script on your own masked language modeling task. Pointers for this are left as comments. from pathlib import Path from typing import Dict, List, Optional, Tuple @@ -421,7 +421,7 @@ if __name__ == "__main__": tokenized_datasets = DatasetDict.load_from_disk("/data/tokenized_data") logger.info("Setting max validation examples to ") print(f"Number of validation examples {data_args.max_eval_samples}") - tokenized_datasets["train"]= tokenized_datasets["train"].select(range(20000)) + #tokenized_datasets["train"]= tokenized_datasets["train"].select(range(int(0.3*len(tokenized_datasets["train"])))) if data_args.max_eval_samples is not None: tokenized_datasets["validation"] = tokenized_datasets["validation"].select(range(data_args.max_eval_samples)) else: @@ -604,7 +604,7 @@ if __name__ == "__main__": mask=decay_mask_fn, ) optimizer = optax.chain( - optax.clip_grad_by_global_norm(1.), + optax.clip_by_global_norm(1.), optimizer ) diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log index 0d918acb88e38282460bcd0b5a158f53bd6e7609..436926eb20a98d5ac77e0f29c4908d5ab93f55fd 120000 --- a/wandb/debug-internal.log +++ b/wandb/debug-internal.log @@ -1 +1 @@ -run-20210714_225820-1dpoijkp/logs/debug-internal.log \ No newline at end of file +run-20210715_020018-3i0mvo08/logs/debug-internal.log \ No newline at end of file diff --git a/wandb/debug.log b/wandb/debug.log index e8517a240a8369bb78195e44a363d7213a488ab3..3e9479db1a2bcb33dc4f7e6bb4d08bc8a6fd995f 120000 --- a/wandb/debug.log +++ b/wandb/debug.log @@ -1 +1 @@ -run-20210714_225820-1dpoijkp/logs/debug.log \ No newline at end of file +run-20210715_020018-3i0mvo08/logs/debug.log \ No newline at end of file diff --git a/wandb/latest-run b/wandb/latest-run index 1dbf35e208de011d8d308816d17498c2c987b2fa..7f862e9df6a33d2250244dd835ed7820e9276ebd 120000 --- a/wandb/latest-run +++ b/wandb/latest-run @@ -1 +1 @@ -run-20210714_225820-1dpoijkp \ No newline at end of file +run-20210715_020018-3i0mvo08 \ No newline at end of file diff --git a/wandb/run-20210714_225820-1dpoijkp/files/config.yaml b/wandb/run-20210714_225820-1dpoijkp/files/config.yaml index 73a1d00888b8c4ac2e215babbc0fb34d2bd186d3..861075e545b964a5b49b77fc300fc636783f513a 100644 --- a/wandb/run-20210714_225820-1dpoijkp/files/config.yaml +++ b/wandb/run-20210714_225820-1dpoijkp/files/config.yaml @@ -13,6 +13,9 @@ _wandb: 1: - 3 - 11 + 2: + - 3 + - 11 4: 3.8.10 5: 0.10.33 6: 4.9.0.dev0 diff --git a/wandb/run-20210714_225820-1dpoijkp/files/output.log b/wandb/run-20210714_225820-1dpoijkp/files/output.log index a0f2fd4451704b4da68d71b21f5d9abb32e0fd5e..1ccdcd38b03bb546ea22ffe4c40485756a9fe867 100644 --- a/wandb/run-20210714_225820-1dpoijkp/files/output.log +++ b/wandb/run-20210714_225820-1dpoijkp/files/output.log @@ -4,3 +4,36 @@ warnings.warn( /home/dat/pino/lib/python3.8/site-packages/jax/lib/xla_bridge.py:373: UserWarning: jax.host_id has been renamed to jax.process_index. This alias will eventually be removed; please update your code. warnings.warn( +Epoch ... (1/5): 0%| | 0/5 [00:00 + state, train_metric, dropout_rngs = p_train_step(state, model_inputs, dropout_rngs) + File "/home/dat/pino/lib/python3.8/site-packages/jax/_src/traceback_util.py", line 183, in reraise_with_filtered_traceback + return fun(*args, **kwargs) + File "/home/dat/pino/lib/python3.8/site-packages/jax/_src/api.py", line 1669, in f_pmapped + out = pxla.xla_pmap( + File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 1620, in bind + return call_bind(self, fun, *args, **params) + File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 1551, in call_bind + outs = primitive.process(top_trace, fun, tracers, params) + File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 1623, in process + return trace.process_map(self, fun, tracers, params) + File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 606, in process_call + return primitive.impl(f, *tracers, **params) + File "/home/dat/pino/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 637, in xla_pmap_impl + return compiled_fun(*args) + File "/home/dat/pino/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 1159, in execute_replicated + out_bufs = compiled.execute_sharded_on_local_devices(input_bufs) +jax._src.traceback_util.UnfilteredStackTrace: RuntimeError: Resource exhausted: Attempting to reserve 12.58G at the bottom of memory. That was not possible. There are 5.86G free, 0B reserved, and 5.66G reservable.: while running replica 0 and partition 0 of a replicated computation (other replicas may have failed as well). +The stack trace below excludes JAX-internal frames. +The preceding is the original exception that occurred, unmodified. +-------------------- +The above exception was the direct cause of the following exception: +Traceback (most recent call last): + File "./run_mlm_flax.py", line 815, in + state, train_metric, dropout_rngs = p_train_step(state, model_inputs, dropout_rngs) + File "/home/dat/pino/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 1159, in execute_replicated + out_bufs = compiled.execute_sharded_on_local_devices(input_bufs) +RuntimeError: Resource exhausted: Attempting to reserve 12.58G at the bottom of memory. That was not possible. There are 5.86G free, 0B reserved, and 5.66G reservable.: while running replica 0 and partition 0 of a replicated computation (other replicas may have failed as well). \ No newline at end of file diff --git a/wandb/run-20210714_225820-1dpoijkp/logs/debug-internal.log b/wandb/run-20210714_225820-1dpoijkp/logs/debug-internal.log index fa339fd96f591cef4c30a5a8beb576c2c2fa7b01..ec6f3b1d95af0517caf1d23b408439ce959e62ff 100644 --- a/wandb/run-20210714_225820-1dpoijkp/logs/debug-internal.log +++ b/wandb/run-20210714_225820-1dpoijkp/logs/debug-internal.log @@ -38,3 +38,198 @@ 2021-07-14 22:58:53,006 DEBUG SenderThread:601574 [sender.py:send_request():193] send_request: stop_status 2021-07-14 22:59:08,141 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: stop_status 2021-07-14 22:59:08,141 DEBUG SenderThread:601574 [sender.py:send_request():193] send_request: stop_status +2021-07-14 22:59:20,866 DEBUG SenderThread:601574 [sender.py:send():179] send: stats +2021-07-14 22:59:23,276 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 22:59:23,277 DEBUG SenderThread:601574 [sender.py:send_request():193] send_request: stop_status +2021-07-14 22:59:38,415 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 22:59:38,415 DEBUG SenderThread:601574 [sender.py:send_request():193] send_request: stop_status +2021-07-14 22:59:50,943 DEBUG SenderThread:601574 [sender.py:send():179] send: stats +2021-07-14 22:59:53,547 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 22:59:53,547 DEBUG SenderThread:601574 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:00:08,680 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:00:08,680 DEBUG SenderThread:601574 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:00:21,020 DEBUG SenderThread:601574 [sender.py:send():179] send: stats +2021-07-14 23:00:23,810 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:00:23,811 DEBUG SenderThread:601574 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:00:38,944 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:00:38,945 DEBUG SenderThread:601574 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:00:51,098 DEBUG SenderThread:601574 [sender.py:send():179] send: stats +2021-07-14 23:00:54,080 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:00:54,080 DEBUG SenderThread:601574 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:01:09,212 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:01:09,212 DEBUG SenderThread:601574 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:01:21,176 DEBUG SenderThread:601574 [sender.py:send():179] send: stats +2021-07-14 23:01:24,345 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:01:24,346 DEBUG SenderThread:601574 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:01:39,477 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:01:39,478 DEBUG SenderThread:601574 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:01:51,254 DEBUG SenderThread:601574 [sender.py:send():179] send: stats +2021-07-14 23:01:54,612 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:01:54,612 DEBUG SenderThread:601574 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:02:09,744 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:02:09,744 DEBUG SenderThread:601574 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:02:21,332 DEBUG SenderThread:601574 [sender.py:send():179] send: stats +2021-07-14 23:02:24,932 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:02:24,932 DEBUG SenderThread:601574 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:02:40,066 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:02:40,067 DEBUG SenderThread:601574 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:02:51,409 DEBUG SenderThread:601574 [sender.py:send():179] send: stats +2021-07-14 23:02:55,209 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:02:55,209 DEBUG SenderThread:601574 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:03:10,341 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:03:10,341 DEBUG SenderThread:601574 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:03:21,484 DEBUG SenderThread:601574 [sender.py:send():179] send: stats +2021-07-14 23:03:25,483 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:03:25,483 DEBUG SenderThread:601574 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:03:40,615 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:03:40,615 DEBUG SenderThread:601574 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:03:51,547 DEBUG SenderThread:601574 [sender.py:send():179] send: stats +2021-07-14 23:03:56,377 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:03:56,377 DEBUG SenderThread:601574 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:04:11,520 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:04:11,520 DEBUG SenderThread:601574 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:04:21,610 DEBUG SenderThread:601574 [sender.py:send():179] send: stats +2021-07-14 23:04:26,652 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:04:26,652 DEBUG SenderThread:601574 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:04:41,781 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:04:41,781 DEBUG SenderThread:601574 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:04:51,676 DEBUG SenderThread:601574 [sender.py:send():179] send: stats +2021-07-14 23:04:56,913 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:04:56,913 DEBUG SenderThread:601574 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:05:13,026 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:05:13,027 DEBUG SenderThread:601574 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:05:21,744 DEBUG SenderThread:601574 [sender.py:send():179] send: stats +2021-07-14 23:05:28,237 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:05:28,238 DEBUG SenderThread:601574 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:05:44,049 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:05:44,049 DEBUG SenderThread:601574 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:05:51,809 DEBUG SenderThread:601574 [sender.py:send():179] send: stats +2021-07-14 23:05:59,179 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:05:59,180 DEBUG SenderThread:601574 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:06:14,311 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:06:14,311 DEBUG SenderThread:601574 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:06:21,764 INFO Thread-8 :601574 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_225820-1dpoijkp/files/output.log +2021-07-14 23:06:21,877 DEBUG SenderThread:601574 [sender.py:send():179] send: stats +2021-07-14 23:06:29,456 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:06:29,457 DEBUG SenderThread:601574 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:06:44,749 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:06:44,749 DEBUG SenderThread:601574 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:06:51,950 DEBUG SenderThread:601574 [sender.py:send():179] send: stats +2021-07-14 23:06:59,941 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:06:59,941 DEBUG SenderThread:601574 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:07:15,073 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:07:15,077 DEBUG SenderThread:601574 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:07:22,025 DEBUG SenderThread:601574 [sender.py:send():179] send: stats +2021-07-14 23:07:30,221 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:07:30,221 DEBUG SenderThread:601574 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:07:45,363 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:07:45,364 DEBUG SenderThread:601574 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:07:50,438 DEBUG SenderThread:601574 [sender.py:send():179] send: telemetry +2021-07-14 23:07:50,439 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-14 23:07:50,439 DEBUG SenderThread:601574 [sender.py:send():179] send: exit +2021-07-14 23:07:50,439 INFO SenderThread:601574 [sender.py:send_exit():287] handling exit code: 1 +2021-07-14 23:07:50,440 INFO SenderThread:601574 [sender.py:send_exit():295] send defer +2021-07-14 23:07:50,440 DEBUG SenderThread:601574 [sender.py:send_request():193] send_request: poll_exit +2021-07-14 23:07:50,441 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: defer +2021-07-14 23:07:50,441 INFO HandlerThread:601574 [handler.py:handle_request_defer():141] handle defer: 0 +2021-07-14 23:07:50,441 DEBUG SenderThread:601574 [sender.py:send_request():193] send_request: defer +2021-07-14 23:07:50,441 INFO SenderThread:601574 [sender.py:send_request_defer():304] handle sender defer: 0 +2021-07-14 23:07:50,441 INFO SenderThread:601574 [sender.py:transition_state():308] send defer: 1 +2021-07-14 23:07:50,442 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: defer +2021-07-14 23:07:50,442 INFO HandlerThread:601574 [handler.py:handle_request_defer():141] handle defer: 1 +2021-07-14 23:07:50,497 DEBUG SenderThread:601574 [sender.py:send_request():193] send_request: defer +2021-07-14 23:07:50,497 INFO SenderThread:601574 [sender.py:send_request_defer():304] handle sender defer: 1 +2021-07-14 23:07:50,497 INFO SenderThread:601574 [sender.py:transition_state():308] send defer: 2 +2021-07-14 23:07:50,497 DEBUG SenderThread:601574 [sender.py:send():179] send: stats +2021-07-14 23:07:50,498 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: defer +2021-07-14 23:07:50,498 INFO HandlerThread:601574 [handler.py:handle_request_defer():141] handle defer: 2 +2021-07-14 23:07:50,498 DEBUG SenderThread:601574 [sender.py:send_request():193] send_request: defer +2021-07-14 23:07:50,498 INFO SenderThread:601574 [sender.py:send_request_defer():304] handle sender defer: 2 +2021-07-14 23:07:50,498 INFO SenderThread:601574 [sender.py:transition_state():308] send defer: 3 +2021-07-14 23:07:50,499 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: defer +2021-07-14 23:07:50,499 INFO HandlerThread:601574 [handler.py:handle_request_defer():141] handle defer: 3 +2021-07-14 23:07:50,499 DEBUG SenderThread:601574 [sender.py:send():179] send: summary +2021-07-14 23:07:50,499 INFO SenderThread:601574 [sender.py:_save_file():841] saving file wandb-summary.json with policy end +2021-07-14 23:07:50,499 DEBUG SenderThread:601574 [sender.py:send_request():193] send_request: defer +2021-07-14 23:07:50,499 INFO SenderThread:601574 [sender.py:send_request_defer():304] handle sender defer: 3 +2021-07-14 23:07:50,499 INFO SenderThread:601574 [sender.py:transition_state():308] send defer: 4 +2021-07-14 23:07:50,500 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: defer +2021-07-14 23:07:50,500 INFO HandlerThread:601574 [handler.py:handle_request_defer():141] handle defer: 4 +2021-07-14 23:07:50,500 DEBUG SenderThread:601574 [sender.py:send_request():193] send_request: defer +2021-07-14 23:07:50,500 INFO SenderThread:601574 [sender.py:send_request_defer():304] handle sender defer: 4 +2021-07-14 23:07:50,543 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-14 23:07:50,746 INFO SenderThread:601574 [sender.py:transition_state():308] send defer: 5 +2021-07-14 23:07:50,746 DEBUG SenderThread:601574 [sender.py:send_request():193] send_request: poll_exit +2021-07-14 23:07:50,747 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: defer +2021-07-14 23:07:50,747 INFO HandlerThread:601574 [handler.py:handle_request_defer():141] handle defer: 5 +2021-07-14 23:07:50,747 DEBUG SenderThread:601574 [sender.py:send_request():193] send_request: defer +2021-07-14 23:07:50,747 INFO SenderThread:601574 [sender.py:send_request_defer():304] handle sender defer: 5 +2021-07-14 23:07:50,747 INFO SenderThread:601574 [dir_watcher.py:finish():282] shutting down directory watcher +2021-07-14 23:07:50,787 INFO SenderThread:601574 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_225820-1dpoijkp/files/output.log +2021-07-14 23:07:50,787 INFO SenderThread:601574 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_225820-1dpoijkp/files/config.yaml +2021-07-14 23:07:50,787 INFO SenderThread:601574 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_225820-1dpoijkp/files/wandb-summary.json +2021-07-14 23:07:50,788 INFO SenderThread:601574 [dir_watcher.py:finish():312] scan: /home/dat/pino-roberta-base/wandb/run-20210714_225820-1dpoijkp/files +2021-07-14 23:07:50,788 INFO SenderThread:601574 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210714_225820-1dpoijkp/files/requirements.txt requirements.txt +2021-07-14 23:07:50,788 INFO SenderThread:601574 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210714_225820-1dpoijkp/files/output.log output.log +2021-07-14 23:07:50,788 INFO SenderThread:601574 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210714_225820-1dpoijkp/files/wandb-metadata.json wandb-metadata.json +2021-07-14 23:07:50,788 INFO SenderThread:601574 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210714_225820-1dpoijkp/files/config.yaml config.yaml +2021-07-14 23:07:50,789 INFO SenderThread:601574 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210714_225820-1dpoijkp/files/wandb-summary.json wandb-summary.json +2021-07-14 23:07:50,792 INFO SenderThread:601574 [sender.py:transition_state():308] send defer: 6 +2021-07-14 23:07:50,792 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: defer +2021-07-14 23:07:50,792 INFO HandlerThread:601574 [handler.py:handle_request_defer():141] handle defer: 6 +2021-07-14 23:07:50,795 DEBUG SenderThread:601574 [sender.py:send_request():193] send_request: defer +2021-07-14 23:07:50,795 INFO SenderThread:601574 [sender.py:send_request_defer():304] handle sender defer: 6 +2021-07-14 23:07:50,795 INFO SenderThread:601574 [file_pusher.py:finish():177] shutting down file pusher +2021-07-14 23:07:50,848 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-14 23:07:50,849 DEBUG SenderThread:601574 [sender.py:send_request():193] send_request: poll_exit +2021-07-14 23:07:50,951 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-14 23:07:50,951 DEBUG SenderThread:601574 [sender.py:send_request():193] send_request: poll_exit +2021-07-14 23:07:51,053 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-14 23:07:51,053 DEBUG SenderThread:601574 [sender.py:send_request():193] send_request: poll_exit +2021-07-14 23:07:51,155 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-14 23:07:51,155 DEBUG SenderThread:601574 [sender.py:send_request():193] send_request: poll_exit +2021-07-14 23:07:51,257 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-14 23:07:51,257 DEBUG SenderThread:601574 [sender.py:send_request():193] send_request: poll_exit +2021-07-14 23:07:51,284 INFO Thread-12 :601574 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210714_225820-1dpoijkp/files/requirements.txt +2021-07-14 23:07:51,311 INFO Thread-14 :601574 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210714_225820-1dpoijkp/files/config.yaml +2021-07-14 23:07:51,322 INFO Thread-13 :601574 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210714_225820-1dpoijkp/files/output.log +2021-07-14 23:07:51,359 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-14 23:07:51,359 DEBUG SenderThread:601574 [sender.py:send_request():193] send_request: poll_exit +2021-07-14 23:07:51,461 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-14 23:07:51,461 DEBUG SenderThread:601574 [sender.py:send_request():193] send_request: poll_exit +2021-07-14 23:07:51,466 INFO Thread-15 :601574 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210714_225820-1dpoijkp/files/wandb-summary.json +2021-07-14 23:07:51,563 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-14 23:07:51,563 DEBUG SenderThread:601574 [sender.py:send_request():193] send_request: poll_exit +2021-07-14 23:07:51,665 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-14 23:07:51,665 DEBUG SenderThread:601574 [sender.py:send_request():193] send_request: poll_exit +2021-07-14 23:07:51,666 INFO Thread-7 :601574 [sender.py:transition_state():308] send defer: 7 +2021-07-14 23:07:51,667 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: defer +2021-07-14 23:07:51,667 INFO HandlerThread:601574 [handler.py:handle_request_defer():141] handle defer: 7 +2021-07-14 23:07:51,667 DEBUG SenderThread:601574 [sender.py:send_request():193] send_request: defer +2021-07-14 23:07:51,667 INFO SenderThread:601574 [sender.py:send_request_defer():304] handle sender defer: 7 +2021-07-14 23:07:51,766 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-14 23:07:51,941 INFO SenderThread:601574 [sender.py:transition_state():308] send defer: 8 +2021-07-14 23:07:51,941 DEBUG SenderThread:601574 [sender.py:send_request():193] send_request: poll_exit +2021-07-14 23:07:51,942 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: defer +2021-07-14 23:07:51,942 INFO HandlerThread:601574 [handler.py:handle_request_defer():141] handle defer: 8 +2021-07-14 23:07:51,942 DEBUG SenderThread:601574 [sender.py:send_request():193] send_request: defer +2021-07-14 23:07:51,942 INFO SenderThread:601574 [sender.py:send_request_defer():304] handle sender defer: 8 +2021-07-14 23:07:51,942 INFO SenderThread:601574 [sender.py:transition_state():308] send defer: 9 +2021-07-14 23:07:51,942 DEBUG SenderThread:601574 [sender.py:send():179] send: final +2021-07-14 23:07:51,942 DEBUG SenderThread:601574 [sender.py:send():179] send: footer +2021-07-14 23:07:51,943 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: defer +2021-07-14 23:07:51,943 INFO HandlerThread:601574 [handler.py:handle_request_defer():141] handle defer: 9 +2021-07-14 23:07:51,943 DEBUG SenderThread:601574 [sender.py:send_request():193] send_request: defer +2021-07-14 23:07:51,943 INFO SenderThread:601574 [sender.py:send_request_defer():304] handle sender defer: 9 +2021-07-14 23:07:52,043 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-14 23:07:52,043 DEBUG SenderThread:601574 [sender.py:send_request():193] send_request: poll_exit +2021-07-14 23:07:52,044 INFO SenderThread:601574 [file_pusher.py:join():182] waiting for file pusher +2021-07-14 23:07:52,045 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: get_summary +2021-07-14 23:07:52,045 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: sampled_history +2021-07-14 23:07:52,046 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: shutdown +2021-07-14 23:07:52,046 INFO HandlerThread:601574 [handler.py:finish():638] shutting down handler +2021-07-14 23:07:52,943 INFO WriterThread:601574 [datastore.py:close():288] close: /home/dat/pino-roberta-base/wandb/run-20210714_225820-1dpoijkp/run-1dpoijkp.wandb +2021-07-14 23:07:53,044 INFO SenderThread:601574 [sender.py:finish():945] shutting down sender +2021-07-14 23:07:53,044 INFO SenderThread:601574 [file_pusher.py:finish():177] shutting down file pusher +2021-07-14 23:07:53,044 INFO SenderThread:601574 [file_pusher.py:join():182] waiting for file pusher +2021-07-14 23:07:53,047 INFO MainThread:601574 [internal.py:handle_exit():78] Internal process exited diff --git a/wandb/run-20210714_225820-1dpoijkp/logs/debug.log b/wandb/run-20210714_225820-1dpoijkp/logs/debug.log index f7410944bc848b8f681ec31ce3fb5eba71d69059..c654d0f315063e53732bcfa2224aac7e750ea561 100644 --- a/wandb/run-20210714_225820-1dpoijkp/logs/debug.log +++ b/wandb/run-20210714_225820-1dpoijkp/logs/debug.log @@ -23,3 +23,113 @@ config: {} 2021-07-14 22:58:22,750 INFO MainThread:600323 [wandb_run.py:_config_callback():872] config_cb None None {'output_dir': './', 'overwrite_output_dir': True, 'do_train': False, 'do_eval': False, 'do_predict': False, 'evaluation_strategy': 'IntervalStrategy.NO', 'prediction_loss_only': False, 'per_device_train_batch_size': 2, 'per_device_eval_batch_size': 2, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 8, 'eval_accumulation_steps': None, 'learning_rate': 5e-05, 'weight_decay': 0.0095, 'adam_beta1': 0.9, 'adam_beta2': 0.98, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 5.0, 'max_steps': -1, 'lr_scheduler_type': 'SchedulerType.LINEAR', 'warmup_ratio': 0.0, 'warmup_steps': 10000, 'log_level': -1, 'log_level_replica': -1, 'log_on_each_node': True, 'logging_dir': './runs/Jul14_22-58-13_t1v-n-f5c06ea1-w-0', 'logging_strategy': 'IntervalStrategy.STEPS', 'logging_first_step': False, 'logging_steps': 500, 'save_strategy': 'IntervalStrategy.STEPS', 'save_steps': 20000, 'save_total_limit': 5, 'save_on_each_node': False, 'no_cuda': False, 'seed': 42, 'fp16': False, 'fp16_opt_level': 'O1', 'fp16_backend': 'auto', 'fp16_full_eval': False, 'local_rank': -1, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 20000, 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': './', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'sharded_ddp': [], 'deepspeed': None, 'label_smoothing_factor': 0.0, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'dataloader_pin_memory': True, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'push_to_hub_model_id': '', 'push_to_hub_organization': None, 'push_to_hub_token': None, 'mp_parameters': ''} 2021-07-14 22:58:22,752 INFO MainThread:600323 [wandb_run.py:_config_callback():872] config_cb None None {'model_name_or_path': None, 'model_type': 'big_bird', 'config_name': './', 'tokenizer_name': './', 'cache_dir': None, 'use_fast_tokenizer': True, 'dtype': 'float32', 'save_optimizer': True} 2021-07-14 22:58:22,753 INFO MainThread:600323 [wandb_run.py:_config_callback():872] config_cb None None {'dataset_name': None, 'dataset_config_name': None, 'train_ref_file': None, 'validation_ref_file': None, 'overwrite_cache': False, 'validation_split_percentage': 5, 'max_seq_length': 4096, 'preprocessing_num_workers': 96, 'mlm_probability': 0.15, 'pad_to_max_length': False, 'line_by_line': False, 'max_eval_samples': 2000} +2021-07-14 23:07:48,138 INFO MainThread:600323 [wandb_run.py:_atexit_cleanup():1593] got exitcode: 1 +2021-07-14 23:07:48,139 INFO MainThread:600323 [wandb_run.py:_restore():1565] restore +2021-07-14 23:07:50,441 INFO MainThread:600323 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 1 +} +pusher_stats { + uploaded_bytes: 1448 + total_bytes: 1448 +} + +2021-07-14 23:07:50,747 INFO MainThread:600323 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 1 +} +pusher_stats { + uploaded_bytes: 1448 + total_bytes: 1448 +} + +2021-07-14 23:07:50,849 INFO MainThread:600323 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 1448 + total_bytes: 11717 +} + +2021-07-14 23:07:50,951 INFO MainThread:600323 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 1448 + total_bytes: 11717 +} + +2021-07-14 23:07:51,054 INFO MainThread:600323 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 11717 + total_bytes: 11717 +} + +2021-07-14 23:07:51,156 INFO MainThread:600323 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 11717 + total_bytes: 11717 +} + +2021-07-14 23:07:51,258 INFO MainThread:600323 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 11717 + total_bytes: 11717 +} + +2021-07-14 23:07:51,360 INFO MainThread:600323 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 11717 + total_bytes: 11717 +} + +2021-07-14 23:07:51,462 INFO MainThread:600323 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 11717 + total_bytes: 11717 +} + +2021-07-14 23:07:51,564 INFO MainThread:600323 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 11717 + total_bytes: 11717 +} + +2021-07-14 23:07:51,665 INFO MainThread:600323 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 11717 + total_bytes: 11717 +} + +2021-07-14 23:07:51,942 INFO MainThread:600323 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 11717 + total_bytes: 11717 +} + +2021-07-14 23:07:52,044 INFO MainThread:600323 [wandb_run.py:_wait_for_finish():1715] got exit ret: done: true +exit_result { +} +file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 11717 + total_bytes: 11717 +} + +2021-07-14 23:07:53,341 INFO MainThread:600323 [wandb_run.py:_show_files():1937] logging synced files diff --git a/wandb/run-20210714_225820-1dpoijkp/run-1dpoijkp.wandb b/wandb/run-20210714_225820-1dpoijkp/run-1dpoijkp.wandb index a484d92e324e9902f1e849ceb17c2a173dc3b637..f82e193a48a7f96ebb97ae9da28dabf0d157c440 100644 Binary files a/wandb/run-20210714_225820-1dpoijkp/run-1dpoijkp.wandb and b/wandb/run-20210714_225820-1dpoijkp/run-1dpoijkp.wandb differ diff --git a/wandb/run-20210714_231147-gkn68kcy/files/config.yaml b/wandb/run-20210714_231147-gkn68kcy/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f0628e4a99fc73d010132cce3552dd86f69208b0 --- /dev/null +++ b/wandb/run-20210714_231147-gkn68kcy/files/config.yaml @@ -0,0 +1,307 @@ +wandb_version: 1 + +_wandb: + desc: null + value: + cli_version: 0.10.33 + framework: huggingface + huggingface_version: 4.9.0.dev0 + is_jupyter_run: false + is_kaggle_kernel: false + python_version: 3.8.10 + t: + 1: + - 3 + - 11 + 2: + - 3 + - 11 + 4: 3.8.10 + 5: 0.10.33 + 6: 4.9.0.dev0 + 8: + - 5 +adafactor: + desc: null + value: false +adam_beta1: + desc: null + value: 0.9 +adam_beta2: + desc: null + value: 0.98 +adam_epsilon: + desc: null + value: 1.0e-08 +cache_dir: + desc: null + value: null +config_name: + desc: null + value: ./ +dataloader_drop_last: + desc: null + value: false +dataloader_num_workers: + desc: null + value: 0 +dataloader_pin_memory: + desc: null + value: true +dataset_config_name: + desc: null + value: null +dataset_name: + desc: null + value: null +ddp_find_unused_parameters: + desc: null + value: null +debug: + desc: null + value: [] +deepspeed: + desc: null + value: null +disable_tqdm: + desc: null + value: false +do_eval: + desc: null + value: false +do_predict: + desc: null + value: false +do_train: + desc: null + value: false +dtype: + desc: null + value: float32 +eval_accumulation_steps: + desc: null + value: null +eval_steps: + desc: null + value: 20000 +evaluation_strategy: + desc: null + value: IntervalStrategy.NO +fp16: + desc: null + value: false +fp16_backend: + desc: null + value: auto +fp16_full_eval: + desc: null + value: false +fp16_opt_level: + desc: null + value: O1 +gradient_accumulation_steps: + desc: null + value: 1 +greater_is_better: + desc: null + value: null +group_by_length: + desc: null + value: false +ignore_data_skip: + desc: null + value: false +label_names: + desc: null + value: null +label_smoothing_factor: + desc: null + value: 0.0 +learning_rate: + desc: null + value: 5.0e-05 +length_column_name: + desc: null + value: length +line_by_line: + desc: null + value: false +load_best_model_at_end: + desc: null + value: false +local_rank: + desc: null + value: -1 +log_level: + desc: null + value: -1 +log_level_replica: + desc: null + value: -1 +log_on_each_node: + desc: null + value: true +logging_dir: + desc: null + value: ./runs/Jul14_23-11-40_t1v-n-f5c06ea1-w-0 +logging_first_step: + desc: null + value: false +logging_steps: + desc: null + value: 500 +logging_strategy: + desc: null + value: IntervalStrategy.STEPS +lr_scheduler_type: + desc: null + value: SchedulerType.LINEAR +max_eval_samples: + desc: null + value: 2000 +max_grad_norm: + desc: null + value: 1.0 +max_seq_length: + desc: null + value: 4096 +max_steps: + desc: null + value: -1 +metric_for_best_model: + desc: null + value: null +mlm_probability: + desc: null + value: 0.15 +model_name_or_path: + desc: null + value: null +model_type: + desc: null + value: big_bird +mp_parameters: + desc: null + value: '' +no_cuda: + desc: null + value: false +num_train_epochs: + desc: null + value: 5.0 +output_dir: + desc: null + value: ./ +overwrite_cache: + desc: null + value: false +overwrite_output_dir: + desc: null + value: true +pad_to_max_length: + desc: null + value: false +past_index: + desc: null + value: -1 +per_device_eval_batch_size: + desc: null + value: 1 +per_device_train_batch_size: + desc: null + value: 1 +per_gpu_eval_batch_size: + desc: null + value: null +per_gpu_train_batch_size: + desc: null + value: null +prediction_loss_only: + desc: null + value: false +preprocessing_num_workers: + desc: null + value: 96 +push_to_hub: + desc: null + value: true +push_to_hub_model_id: + desc: null + value: '' +push_to_hub_organization: + desc: null + value: null +push_to_hub_token: + desc: null + value: null +remove_unused_columns: + desc: null + value: true +report_to: + desc: null + value: + - tensorboard + - wandb +resume_from_checkpoint: + desc: null + value: null +run_name: + desc: null + value: ./ +save_on_each_node: + desc: null + value: false +save_optimizer: + desc: null + value: true +save_steps: + desc: null + value: 20000 +save_strategy: + desc: null + value: IntervalStrategy.STEPS +save_total_limit: + desc: null + value: 5 +seed: + desc: null + value: 42 +sharded_ddp: + desc: null + value: [] +skip_memory_metrics: + desc: null + value: true +tokenizer_name: + desc: null + value: ./ +tpu_metrics_debug: + desc: null + value: false +tpu_num_cores: + desc: null + value: null +train_ref_file: + desc: null + value: null +use_fast_tokenizer: + desc: null + value: true +use_legacy_prediction_loop: + desc: null + value: false +validation_ref_file: + desc: null + value: null +validation_split_percentage: + desc: null + value: 5 +warmup_ratio: + desc: null + value: 0.0 +warmup_steps: + desc: null + value: 10000 +weight_decay: + desc: null + value: 0.0095 diff --git a/wandb/run-20210714_231147-gkn68kcy/files/output.log b/wandb/run-20210714_231147-gkn68kcy/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..e503783ef8f9c50a8b0361f9bb3d6fa911d15794 --- /dev/null +++ b/wandb/run-20210714_231147-gkn68kcy/files/output.log @@ -0,0 +1,32 @@ +/home/dat/pino/lib/python3.8/site-packages/jax/lib/xla_bridge.py:386: UserWarning: jax.host_count has been renamed to jax.process_count. This alias will eventually be removed; please update your code. + warnings.warn( +/home/dat/pino/lib/python3.8/site-packages/jax/lib/xla_bridge.py:373: UserWarning: jax.host_id has been renamed to jax.process_index. This alias will eventually be removed; please update your code. + warnings.warn( +Epoch ... (1/5): 0%| | 0/5 [00:00 + train_batch_idx = generate_batch_splits(train_samples_idx, train_batch_size // grad_accum_steps) + File "./run_mlm_flax.py", line 263, in generate_batch_splits + batch_idx = np.split(samples_idx, sections_split) + File "<__array_function__ internals>", line 5, in split + File "/home/dat/pino/lib/python3.8/site-packages/numpy/lib/shape_base.py", line 874, in split + return array_split(ary, indices_or_sections, axis) + File "<__array_function__ internals>", line 5, in array_split + File "/home/dat/pino/lib/python3.8/site-packages/numpy/lib/shape_base.py", line 790, in array_split + sub_arys.append(_nx.swapaxes(sary[st:end], axis, 0)) + File "/home/dat/pino/lib/python3.8/site-packages/jax/_src/numpy/lax_numpy.py", line 5009, in _rewriting_take + return _gather(arr, treedef, static_idx, dynamic_idx, indices_are_sorted, + File "/home/dat/pino/lib/python3.8/site-packages/jax/_src/numpy/lax_numpy.py", line 5028, in _gather + y = lax.gather( + File "/home/dat/pino/lib/python3.8/site-packages/jax/_src/lax/lax.py", line 984, in gather + return gather_p.bind( + File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 264, in bind + out = top_trace.process_primitive(self, tracers, params) + File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 603, in process_primitive + return primitive.impl(*tracers, **params) + File "/home/dat/pino/lib/python3.8/site-packages/jax/interpreters/xla.py", line 249, in apply_primitive + return compiled_fun(*args) + File "/home/dat/pino/lib/python3.8/site-packages/jax/interpreters/xla.py", line 365, in _execute_compiled_primitive + out_bufs = compiled.execute(input_bufs) +RuntimeError: Resource exhausted: Attempting to allocate 17.0K. That was not possible. There are 48.0K free. Due to fragmentation, the largest contiguous region of free memory is 16.0K.; (0x0x0_HBM0) \ No newline at end of file diff --git a/wandb/run-20210714_231147-gkn68kcy/files/requirements.txt b/wandb/run-20210714_231147-gkn68kcy/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..554e3a34bc91714a8462f65295a12e9a04537637 --- /dev/null +++ b/wandb/run-20210714_231147-gkn68kcy/files/requirements.txt @@ -0,0 +1,94 @@ +absl-py==0.13.0 +aiohttp==3.7.4.post0 +astunparse==1.6.3 +async-timeout==3.0.1 +attrs==21.2.0 +cachetools==4.2.2 +certifi==2021.5.30 +chardet==4.0.0 +charset-normalizer==2.0.1 +chex==0.0.8 +click==8.0.1 +configparser==5.0.2 +cycler==0.10.0 +datasets==1.9.1.dev0 +dill==0.3.4 +dm-tree==0.1.6 +docker-pycreds==0.4.0 +filelock==3.0.12 +flatbuffers==1.12 +flax==0.3.4 +fsspec==2021.7.0 +gast==0.4.0 +gitdb==4.0.7 +gitpython==3.1.18 +google-auth-oauthlib==0.4.4 +google-auth==1.32.1 +google-pasta==0.2.0 +grpcio==1.34.1 +h5py==3.1.0 +huggingface-hub==0.0.12 +idna==3.2 +install==1.3.4 +jax==0.2.17 +jaxlib==0.1.68 +joblib==1.0.1 +keras-nightly==2.5.0.dev2021032900 +keras-preprocessing==1.1.2 +kiwisolver==1.3.1 +libtpu-nightly==0.1.dev20210615 +markdown==3.3.4 +matplotlib==3.4.2 +msgpack==1.0.2 +multidict==5.1.0 +multiprocess==0.70.12.2 +numpy==1.19.5 +oauthlib==3.1.1 +opt-einsum==3.3.0 +optax==0.0.9 +packaging==21.0 +pandas==1.3.0 +pathtools==0.1.2 +pillow==8.3.1 +pip==20.0.2 +pkg-resources==0.0.0 +promise==2.3 +protobuf==3.17.3 +psutil==5.8.0 +pyarrow==4.0.1 +pyasn1-modules==0.2.8 +pyasn1==0.4.8 +pyparsing==2.4.7 +python-dateutil==2.8.1 +pytz==2021.1 +pyyaml==5.4.1 +regex==2021.7.6 +requests-oauthlib==1.3.0 +requests==2.26.0 +rsa==4.7.2 +sacremoses==0.0.45 +scipy==1.7.0 +sentry-sdk==1.3.0 +setuptools==44.0.0 +shortuuid==1.0.1 +six==1.15.0 +smmap==4.0.0 +subprocess32==3.5.4 +tensorboard-data-server==0.6.1 +tensorboard-plugin-wit==1.8.0 +tensorboard==2.5.0 +tensorflow-estimator==2.5.0 +tensorflow==2.5.0 +termcolor==1.1.0 +tokenizers==0.10.3 +toolz==0.11.1 +tqdm==4.61.2 +transformers==4.9.0.dev0 +typing-extensions==3.7.4.3 +urllib3==1.26.6 +wandb==0.10.33 +werkzeug==2.0.1 +wheel==0.36.2 +wrapt==1.12.1 +xxhash==2.0.2 +yarl==1.6.3 \ No newline at end of file diff --git a/wandb/run-20210714_231147-gkn68kcy/files/wandb-metadata.json b/wandb/run-20210714_231147-gkn68kcy/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..efb9a2c94a43df8f14e61c440465f57a5141dd1a --- /dev/null +++ b/wandb/run-20210714_231147-gkn68kcy/files/wandb-metadata.json @@ -0,0 +1,46 @@ +{ + "os": "Linux-5.4.0-1043-gcp-x86_64-with-glibc2.29", + "python": "3.8.10", + "heartbeatAt": "2021-07-14T23:11:49.155779", + "startedAt": "2021-07-14T23:11:47.117291", + "docker": null, + "cpu_count": 96, + "cuda": null, + "args": [ + "--push_to_hub", + "--output_dir=./", + "--model_type=big_bird", + "--config_name=./", + "--tokenizer_name=./", + "--max_seq_length=4096", + "--weight_decay=0.0095", + "--warmup_steps=10000", + "--overwrite_output_dir", + "--adam_beta1=0.9", + "--adam_beta2=0.98", + "--logging_steps=500", + "--eval_steps=20000", + "--num_train_epochs=5", + "--preprocessing_num_workers=96", + "--save_steps=20000", + "--learning_rate=5e-5", + "--per_device_train_batch_size=1", + "--per_device_eval_batch_size=1", + "--save_total_limit=5", + "--max_eval_samples=2000", + "--overwrite_cache", + "False" + ], + "state": "running", + "program": "./run_mlm_flax.py", + "codePath": "run_mlm_flax.py", + "git": { + "remote": "https://huggingface.co/flax-community/pino-roberta-base", + "commit": "9915204ae8501d6ae8425aa8f0b4b290cbc7e5b6" + }, + "email": null, + "root": "/home/dat/pino-roberta-base", + "host": "t1v-n-f5c06ea1-w-0", + "username": "dat", + "executable": "/home/dat/pino/bin/python" +} diff --git a/wandb/run-20210714_231147-gkn68kcy/files/wandb-summary.json b/wandb/run-20210714_231147-gkn68kcy/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/wandb/run-20210714_231147-gkn68kcy/files/wandb-summary.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/wandb/run-20210714_231147-gkn68kcy/logs/debug-internal.log b/wandb/run-20210714_231147-gkn68kcy/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..5c2e97cd61dafae1a6ec57288e3e8f71f4af83e9 --- /dev/null +++ b/wandb/run-20210714_231147-gkn68kcy/logs/debug-internal.log @@ -0,0 +1,279 @@ +2021-07-14 23:11:47,821 INFO MainThread:604064 [internal.py:wandb_internal():88] W&B internal server running at pid: 604064, started at: 2021-07-14 23:11:47.821366 +2021-07-14 23:11:47,823 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: check_version +2021-07-14 23:11:47,824 INFO WriterThread:604064 [datastore.py:open_for_write():80] open: /home/dat/pino-roberta-base/wandb/run-20210714_231147-gkn68kcy/run-gkn68kcy.wandb +2021-07-14 23:11:47,824 DEBUG SenderThread:604064 [sender.py:send():179] send: header +2021-07-14 23:11:47,825 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: check_version +2021-07-14 23:11:47,862 DEBUG SenderThread:604064 [sender.py:send():179] send: run +2021-07-14 23:11:48,044 INFO SenderThread:604064 [dir_watcher.py:__init__():168] watching files in: /home/dat/pino-roberta-base/wandb/run-20210714_231147-gkn68kcy/files +2021-07-14 23:11:48,044 INFO SenderThread:604064 [sender.py:_start_run_threads():716] run started: gkn68kcy with start time 1626304307 +2021-07-14 23:11:48,044 DEBUG SenderThread:604064 [sender.py:send():179] send: summary +2021-07-14 23:11:48,044 INFO SenderThread:604064 [sender.py:_save_file():841] saving file wandb-summary.json with policy end +2021-07-14 23:11:48,045 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: run_start +2021-07-14 23:11:49,047 INFO Thread-8 :604064 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210714_231147-gkn68kcy/files/wandb-summary.json +2021-07-14 23:11:49,155 DEBUG HandlerThread:604064 [meta.py:__init__():39] meta init +2021-07-14 23:11:49,155 DEBUG HandlerThread:604064 [meta.py:__init__():53] meta init done +2021-07-14 23:11:49,155 DEBUG HandlerThread:604064 [meta.py:probe():210] probe +2021-07-14 23:11:49,156 DEBUG HandlerThread:604064 [meta.py:_setup_git():200] setup git +2021-07-14 23:11:49,187 DEBUG HandlerThread:604064 [meta.py:_setup_git():207] setup git done +2021-07-14 23:11:49,188 DEBUG HandlerThread:604064 [meta.py:_save_pip():57] save pip +2021-07-14 23:11:49,188 DEBUG HandlerThread:604064 [meta.py:_save_pip():71] save pip done +2021-07-14 23:11:49,188 DEBUG HandlerThread:604064 [meta.py:probe():252] probe done +2021-07-14 23:11:49,192 DEBUG SenderThread:604064 [sender.py:send():179] send: files +2021-07-14 23:11:49,192 INFO SenderThread:604064 [sender.py:_save_file():841] saving file wandb-metadata.json with policy now +2021-07-14 23:11:49,199 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:11:49,200 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:11:49,336 DEBUG SenderThread:604064 [sender.py:send():179] send: config +2021-07-14 23:11:49,336 DEBUG SenderThread:604064 [sender.py:send():179] send: config +2021-07-14 23:11:49,337 DEBUG SenderThread:604064 [sender.py:send():179] send: config +2021-07-14 23:11:49,685 INFO Thread-11 :604064 [upload_job.py:push():137] Uploaded file /tmp/tmpnh56hsgtwandb/3jt8aozu-wandb-metadata.json +2021-07-14 23:11:50,046 INFO Thread-8 :604064 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210714_231147-gkn68kcy/files/output.log +2021-07-14 23:11:50,046 INFO Thread-8 :604064 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210714_231147-gkn68kcy/files/requirements.txt +2021-07-14 23:11:50,046 INFO Thread-8 :604064 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210714_231147-gkn68kcy/files/wandb-metadata.json +2021-07-14 23:12:04,051 INFO Thread-8 :604064 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_231147-gkn68kcy/files/output.log +2021-07-14 23:12:04,338 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:12:04,339 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:12:17,233 DEBUG SenderThread:604064 [sender.py:send():179] send: stats +2021-07-14 23:12:19,058 INFO Thread-8 :604064 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_231147-gkn68kcy/files/config.yaml +2021-07-14 23:12:19,470 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:12:19,471 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:12:34,607 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:12:34,607 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:12:47,304 DEBUG SenderThread:604064 [sender.py:send():179] send: stats +2021-07-14 23:12:49,759 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:12:49,760 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:13:04,899 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:13:04,899 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:13:17,375 DEBUG SenderThread:604064 [sender.py:send():179] send: stats +2021-07-14 23:13:20,032 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:13:20,032 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:13:35,167 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:13:35,168 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:13:47,450 DEBUG SenderThread:604064 [sender.py:send():179] send: stats +2021-07-14 23:13:50,301 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:13:50,301 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:14:05,433 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:14:05,434 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:14:17,527 DEBUG SenderThread:604064 [sender.py:send():179] send: stats +2021-07-14 23:14:20,564 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:14:20,564 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:14:35,694 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:14:35,695 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:14:47,605 DEBUG SenderThread:604064 [sender.py:send():179] send: stats +2021-07-14 23:14:50,827 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:14:50,827 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:15:05,965 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:15:05,965 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:15:17,682 DEBUG SenderThread:604064 [sender.py:send():179] send: stats +2021-07-14 23:15:21,099 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:15:21,100 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:15:36,236 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:15:36,237 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:15:47,752 DEBUG SenderThread:604064 [sender.py:send():179] send: stats +2021-07-14 23:15:51,383 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:15:51,384 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:16:06,514 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:16:06,515 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:16:17,832 DEBUG SenderThread:604064 [sender.py:send():179] send: stats +2021-07-14 23:16:21,647 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:16:21,647 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:16:36,777 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:16:36,778 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:16:47,906 DEBUG SenderThread:604064 [sender.py:send():179] send: stats +2021-07-14 23:16:51,911 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:16:51,911 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:17:07,045 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:17:07,045 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:17:17,984 DEBUG SenderThread:604064 [sender.py:send():179] send: stats +2021-07-14 23:17:22,180 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:17:22,180 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:17:37,312 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:17:37,313 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:17:48,061 DEBUG SenderThread:604064 [sender.py:send():179] send: stats +2021-07-14 23:17:52,447 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:17:52,447 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:18:07,584 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:18:07,584 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:18:18,134 DEBUG SenderThread:604064 [sender.py:send():179] send: stats +2021-07-14 23:18:22,718 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:18:22,718 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:18:37,852 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:18:37,852 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:18:48,202 DEBUG SenderThread:604064 [sender.py:send():179] send: stats +2021-07-14 23:18:52,983 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:18:52,984 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:19:08,118 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:19:08,118 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:19:18,280 DEBUG SenderThread:604064 [sender.py:send():179] send: stats +2021-07-14 23:19:23,249 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:19:23,249 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:19:38,406 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:19:38,407 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:19:48,357 DEBUG SenderThread:604064 [sender.py:send():179] send: stats +2021-07-14 23:19:53,538 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:19:53,539 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:20:08,668 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:20:08,668 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:20:18,431 DEBUG SenderThread:604064 [sender.py:send():179] send: stats +2021-07-14 23:20:23,808 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:20:23,809 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:20:38,943 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:20:38,943 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:20:48,507 DEBUG SenderThread:604064 [sender.py:send():179] send: stats +2021-07-14 23:20:54,079 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:20:54,079 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:21:09,222 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:21:09,223 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:21:18,586 DEBUG SenderThread:604064 [sender.py:send():179] send: stats +2021-07-14 23:21:24,353 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:21:24,354 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:21:39,483 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:21:39,484 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:21:48,663 DEBUG SenderThread:604064 [sender.py:send():179] send: stats +2021-07-14 23:21:55,394 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:21:55,394 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:22:10,527 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:22:10,528 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:22:18,741 DEBUG SenderThread:604064 [sender.py:send():179] send: stats +2021-07-14 23:22:25,659 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:22:25,659 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:22:40,790 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:22:40,790 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:22:48,820 DEBUG SenderThread:604064 [sender.py:send():179] send: stats +2021-07-14 23:22:55,922 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:22:55,923 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:23:11,058 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:23:11,059 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:23:18,897 DEBUG SenderThread:604064 [sender.py:send():179] send: stats +2021-07-14 23:23:26,197 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:23:26,198 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:23:41,329 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:23:41,329 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:23:48,974 DEBUG SenderThread:604064 [sender.py:send():179] send: stats +2021-07-14 23:23:56,463 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:23:56,463 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:24:11,593 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:24:11,593 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:24:19,051 DEBUG SenderThread:604064 [sender.py:send():179] send: stats +2021-07-14 23:24:26,724 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:24:26,724 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:24:41,858 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:24:41,858 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:24:49,130 DEBUG SenderThread:604064 [sender.py:send():179] send: stats +2021-07-14 23:24:56,991 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:24:56,991 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:25:12,121 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:25:12,122 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:25:19,207 DEBUG SenderThread:604064 [sender.py:send():179] send: stats +2021-07-14 23:25:27,253 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:25:27,254 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:25:42,385 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:25:42,386 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:25:49,284 DEBUG SenderThread:604064 [sender.py:send():179] send: stats +2021-07-14 23:25:57,527 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:25:57,527 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:26:06,351 INFO Thread-8 :604064 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_231147-gkn68kcy/files/output.log +2021-07-14 23:26:07,081 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-14 23:26:07,082 DEBUG SenderThread:604064 [sender.py:send():179] send: telemetry +2021-07-14 23:26:07,082 DEBUG SenderThread:604064 [sender.py:send():179] send: exit +2021-07-14 23:26:07,082 INFO SenderThread:604064 [sender.py:send_exit():287] handling exit code: 1 +2021-07-14 23:26:07,083 INFO SenderThread:604064 [sender.py:send_exit():295] send defer +2021-07-14 23:26:07,084 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: poll_exit +2021-07-14 23:26:07,084 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: defer +2021-07-14 23:26:07,085 INFO HandlerThread:604064 [handler.py:handle_request_defer():141] handle defer: 0 +2021-07-14 23:26:07,085 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: defer +2021-07-14 23:26:07,085 INFO SenderThread:604064 [sender.py:send_request_defer():304] handle sender defer: 0 +2021-07-14 23:26:07,085 INFO SenderThread:604064 [sender.py:transition_state():308] send defer: 1 +2021-07-14 23:26:07,085 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: defer +2021-07-14 23:26:07,085 INFO HandlerThread:604064 [handler.py:handle_request_defer():141] handle defer: 1 +2021-07-14 23:26:07,130 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: defer +2021-07-14 23:26:07,130 INFO SenderThread:604064 [sender.py:send_request_defer():304] handle sender defer: 1 +2021-07-14 23:26:07,130 INFO SenderThread:604064 [sender.py:transition_state():308] send defer: 2 +2021-07-14 23:26:07,131 DEBUG SenderThread:604064 [sender.py:send():179] send: stats +2021-07-14 23:26:07,131 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: defer +2021-07-14 23:26:07,131 INFO HandlerThread:604064 [handler.py:handle_request_defer():141] handle defer: 2 +2021-07-14 23:26:07,132 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: defer +2021-07-14 23:26:07,132 INFO SenderThread:604064 [sender.py:send_request_defer():304] handle sender defer: 2 +2021-07-14 23:26:07,132 INFO SenderThread:604064 [sender.py:transition_state():308] send defer: 3 +2021-07-14 23:26:07,132 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: defer +2021-07-14 23:26:07,132 INFO HandlerThread:604064 [handler.py:handle_request_defer():141] handle defer: 3 +2021-07-14 23:26:07,132 DEBUG SenderThread:604064 [sender.py:send():179] send: summary +2021-07-14 23:26:07,133 INFO SenderThread:604064 [sender.py:_save_file():841] saving file wandb-summary.json with policy end +2021-07-14 23:26:07,133 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: defer +2021-07-14 23:26:07,133 INFO SenderThread:604064 [sender.py:send_request_defer():304] handle sender defer: 3 +2021-07-14 23:26:07,133 INFO SenderThread:604064 [sender.py:transition_state():308] send defer: 4 +2021-07-14 23:26:07,134 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: defer +2021-07-14 23:26:07,134 INFO HandlerThread:604064 [handler.py:handle_request_defer():141] handle defer: 4 +2021-07-14 23:26:07,134 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: defer +2021-07-14 23:26:07,134 INFO SenderThread:604064 [sender.py:send_request_defer():304] handle sender defer: 4 +2021-07-14 23:26:07,228 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-14 23:26:07,305 INFO SenderThread:604064 [sender.py:transition_state():308] send defer: 5 +2021-07-14 23:26:07,305 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: poll_exit +2021-07-14 23:26:07,306 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: defer +2021-07-14 23:26:07,306 INFO HandlerThread:604064 [handler.py:handle_request_defer():141] handle defer: 5 +2021-07-14 23:26:07,306 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: defer +2021-07-14 23:26:07,306 INFO SenderThread:604064 [sender.py:send_request_defer():304] handle sender defer: 5 +2021-07-14 23:26:07,306 INFO SenderThread:604064 [dir_watcher.py:finish():282] shutting down directory watcher +2021-07-14 23:26:07,351 INFO Thread-8 :604064 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_231147-gkn68kcy/files/config.yaml +2021-07-14 23:26:07,351 INFO SenderThread:604064 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_231147-gkn68kcy/files/output.log +2021-07-14 23:26:07,352 INFO SenderThread:604064 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_231147-gkn68kcy/files/wandb-summary.json +2021-07-14 23:26:07,352 INFO SenderThread:604064 [dir_watcher.py:finish():312] scan: /home/dat/pino-roberta-base/wandb/run-20210714_231147-gkn68kcy/files +2021-07-14 23:26:07,352 INFO SenderThread:604064 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210714_231147-gkn68kcy/files/requirements.txt requirements.txt +2021-07-14 23:26:07,352 INFO SenderThread:604064 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210714_231147-gkn68kcy/files/output.log output.log +2021-07-14 23:26:07,352 INFO SenderThread:604064 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210714_231147-gkn68kcy/files/wandb-metadata.json wandb-metadata.json +2021-07-14 23:26:07,352 INFO SenderThread:604064 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210714_231147-gkn68kcy/files/config.yaml config.yaml +2021-07-14 23:26:07,356 INFO SenderThread:604064 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210714_231147-gkn68kcy/files/wandb-summary.json wandb-summary.json +2021-07-14 23:26:07,359 INFO SenderThread:604064 [sender.py:transition_state():308] send defer: 6 +2021-07-14 23:26:07,360 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: defer +2021-07-14 23:26:07,360 INFO HandlerThread:604064 [handler.py:handle_request_defer():141] handle defer: 6 +2021-07-14 23:26:07,360 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: defer +2021-07-14 23:26:07,361 INFO SenderThread:604064 [sender.py:send_request_defer():304] handle sender defer: 6 +2021-07-14 23:26:07,361 INFO SenderThread:604064 [file_pusher.py:finish():177] shutting down file pusher +2021-07-14 23:26:07,407 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-14 23:26:07,408 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: poll_exit +2021-07-14 23:26:07,513 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-14 23:26:07,514 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: poll_exit +2021-07-14 23:26:07,616 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-14 23:26:07,616 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: poll_exit +2021-07-14 23:26:07,718 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-14 23:26:07,718 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: poll_exit +2021-07-14 23:26:07,820 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-14 23:26:07,820 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: poll_exit +2021-07-14 23:26:07,840 INFO Thread-14 :604064 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210714_231147-gkn68kcy/files/config.yaml +2021-07-14 23:26:07,841 INFO Thread-13 :604064 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210714_231147-gkn68kcy/files/output.log +2021-07-14 23:26:07,874 INFO Thread-12 :604064 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210714_231147-gkn68kcy/files/requirements.txt +2021-07-14 23:26:07,875 INFO Thread-15 :604064 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210714_231147-gkn68kcy/files/wandb-summary.json +2021-07-14 23:26:07,922 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-14 23:26:07,922 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: poll_exit +2021-07-14 23:26:08,024 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-14 23:26:08,024 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: poll_exit +2021-07-14 23:26:08,076 INFO Thread-7 :604064 [sender.py:transition_state():308] send defer: 7 +2021-07-14 23:26:08,077 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: defer +2021-07-14 23:26:08,077 INFO HandlerThread:604064 [handler.py:handle_request_defer():141] handle defer: 7 +2021-07-14 23:26:08,077 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: defer +2021-07-14 23:26:08,077 INFO SenderThread:604064 [sender.py:send_request_defer():304] handle sender defer: 7 +2021-07-14 23:26:08,126 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-14 23:26:08,360 INFO SenderThread:604064 [sender.py:transition_state():308] send defer: 8 +2021-07-14 23:26:08,360 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: poll_exit +2021-07-14 23:26:08,361 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: defer +2021-07-14 23:26:08,361 INFO HandlerThread:604064 [handler.py:handle_request_defer():141] handle defer: 8 +2021-07-14 23:26:08,361 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: defer +2021-07-14 23:26:08,361 INFO SenderThread:604064 [sender.py:send_request_defer():304] handle sender defer: 8 +2021-07-14 23:26:08,361 INFO SenderThread:604064 [sender.py:transition_state():308] send defer: 9 +2021-07-14 23:26:08,362 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: defer +2021-07-14 23:26:08,362 INFO HandlerThread:604064 [handler.py:handle_request_defer():141] handle defer: 9 +2021-07-14 23:26:08,362 DEBUG SenderThread:604064 [sender.py:send():179] send: final +2021-07-14 23:26:08,362 DEBUG SenderThread:604064 [sender.py:send():179] send: footer +2021-07-14 23:26:08,362 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: defer +2021-07-14 23:26:08,362 INFO SenderThread:604064 [sender.py:send_request_defer():304] handle sender defer: 9 +2021-07-14 23:26:08,462 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-14 23:26:08,462 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: poll_exit +2021-07-14 23:26:08,463 INFO SenderThread:604064 [file_pusher.py:join():182] waiting for file pusher +2021-07-14 23:26:08,464 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: get_summary +2021-07-14 23:26:08,465 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: sampled_history +2021-07-14 23:26:08,465 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: shutdown +2021-07-14 23:26:08,465 INFO HandlerThread:604064 [handler.py:finish():638] shutting down handler +2021-07-14 23:26:09,363 INFO WriterThread:604064 [datastore.py:close():288] close: /home/dat/pino-roberta-base/wandb/run-20210714_231147-gkn68kcy/run-gkn68kcy.wandb +2021-07-14 23:26:09,463 INFO SenderThread:604064 [sender.py:finish():945] shutting down sender +2021-07-14 23:26:09,463 INFO SenderThread:604064 [file_pusher.py:finish():177] shutting down file pusher +2021-07-14 23:26:09,463 INFO SenderThread:604064 [file_pusher.py:join():182] waiting for file pusher +2021-07-14 23:26:09,466 INFO MainThread:604064 [internal.py:handle_exit():78] Internal process exited diff --git a/wandb/run-20210714_231147-gkn68kcy/logs/debug.log b/wandb/run-20210714_231147-gkn68kcy/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..406d21f52e1892d49e553fe64a16be083b1bf6b0 --- /dev/null +++ b/wandb/run-20210714_231147-gkn68kcy/logs/debug.log @@ -0,0 +1,119 @@ +2021-07-14 23:11:47,119 INFO MainThread:602807 [wandb_setup.py:_flush():69] setting env: {} +2021-07-14 23:11:47,119 INFO MainThread:602807 [wandb_setup.py:_flush():69] setting login settings: {} +2021-07-14 23:11:47,119 INFO MainThread:602807 [wandb_init.py:_log_setup():337] Logging user logs to /home/dat/pino-roberta-base/wandb/run-20210714_231147-gkn68kcy/logs/debug.log +2021-07-14 23:11:47,119 INFO MainThread:602807 [wandb_init.py:_log_setup():338] Logging internal logs to /home/dat/pino-roberta-base/wandb/run-20210714_231147-gkn68kcy/logs/debug-internal.log +2021-07-14 23:11:47,119 INFO MainThread:602807 [wandb_init.py:init():370] calling init triggers +2021-07-14 23:11:47,119 INFO MainThread:602807 [wandb_init.py:init():375] wandb.init called with sweep_config: {} +config: {} +2021-07-14 23:11:47,119 INFO MainThread:602807 [wandb_init.py:init():419] starting backend +2021-07-14 23:11:47,120 INFO MainThread:602807 [backend.py:_multiprocessing_setup():70] multiprocessing start_methods=fork,spawn,forkserver, using: spawn +2021-07-14 23:11:47,167 INFO MainThread:602807 [backend.py:ensure_launched():135] starting backend process... +2021-07-14 23:11:47,212 INFO MainThread:602807 [backend.py:ensure_launched():139] started backend process with pid: 604064 +2021-07-14 23:11:47,214 INFO MainThread:602807 [wandb_init.py:init():424] backend started and connected +2021-07-14 23:11:47,217 INFO MainThread:602807 [wandb_init.py:init():472] updated telemetry +2021-07-14 23:11:47,218 INFO MainThread:602807 [wandb_init.py:init():491] communicating current version +2021-07-14 23:11:47,860 INFO MainThread:602807 [wandb_init.py:init():496] got version response +2021-07-14 23:11:47,860 INFO MainThread:602807 [wandb_init.py:init():504] communicating run to backend with 30 second timeout +2021-07-14 23:11:48,044 INFO MainThread:602807 [wandb_init.py:init():529] starting run threads in backend +2021-07-14 23:11:49,195 INFO MainThread:602807 [wandb_run.py:_console_start():1623] atexit reg +2021-07-14 23:11:49,196 INFO MainThread:602807 [wandb_run.py:_redirect():1497] redirect: SettingsConsole.REDIRECT +2021-07-14 23:11:49,196 INFO MainThread:602807 [wandb_run.py:_redirect():1502] Redirecting console. +2021-07-14 23:11:49,199 INFO MainThread:602807 [wandb_run.py:_redirect():1558] Redirects installed. +2021-07-14 23:11:49,199 INFO MainThread:602807 [wandb_init.py:init():554] run started, returning control to user process +2021-07-14 23:11:49,207 INFO MainThread:602807 [wandb_run.py:_config_callback():872] config_cb None None {'output_dir': './', 'overwrite_output_dir': True, 'do_train': False, 'do_eval': False, 'do_predict': False, 'evaluation_strategy': 'IntervalStrategy.NO', 'prediction_loss_only': False, 'per_device_train_batch_size': 1, 'per_device_eval_batch_size': 1, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'learning_rate': 5e-05, 'weight_decay': 0.0095, 'adam_beta1': 0.9, 'adam_beta2': 0.98, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 5.0, 'max_steps': -1, 'lr_scheduler_type': 'SchedulerType.LINEAR', 'warmup_ratio': 0.0, 'warmup_steps': 10000, 'log_level': -1, 'log_level_replica': -1, 'log_on_each_node': True, 'logging_dir': './runs/Jul14_23-11-40_t1v-n-f5c06ea1-w-0', 'logging_strategy': 'IntervalStrategy.STEPS', 'logging_first_step': False, 'logging_steps': 500, 'save_strategy': 'IntervalStrategy.STEPS', 'save_steps': 20000, 'save_total_limit': 5, 'save_on_each_node': False, 'no_cuda': False, 'seed': 42, 'fp16': False, 'fp16_opt_level': 'O1', 'fp16_backend': 'auto', 'fp16_full_eval': False, 'local_rank': -1, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 20000, 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': './', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'sharded_ddp': [], 'deepspeed': None, 'label_smoothing_factor': 0.0, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'dataloader_pin_memory': True, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'push_to_hub_model_id': '', 'push_to_hub_organization': None, 'push_to_hub_token': None, 'mp_parameters': ''} +2021-07-14 23:11:49,209 INFO MainThread:602807 [wandb_run.py:_config_callback():872] config_cb None None {'model_name_or_path': None, 'model_type': 'big_bird', 'config_name': './', 'tokenizer_name': './', 'cache_dir': None, 'use_fast_tokenizer': True, 'dtype': 'float32', 'save_optimizer': True} +2021-07-14 23:11:49,210 INFO MainThread:602807 [wandb_run.py:_config_callback():872] config_cb None None {'dataset_name': None, 'dataset_config_name': None, 'train_ref_file': None, 'validation_ref_file': None, 'overwrite_cache': False, 'validation_split_percentage': 5, 'max_seq_length': 4096, 'preprocessing_num_workers': 96, 'mlm_probability': 0.15, 'pad_to_max_length': False, 'line_by_line': False, 'max_eval_samples': 2000} +2021-07-14 23:26:04,706 INFO MainThread:602807 [wandb_run.py:_atexit_cleanup():1593] got exitcode: 1 +2021-07-14 23:26:04,707 INFO MainThread:602807 [wandb_run.py:_restore():1565] restore +2021-07-14 23:26:07,085 INFO MainThread:602807 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 1 +} +pusher_stats { + uploaded_bytes: 1405 + total_bytes: 1405 +} + +2021-07-14 23:26:07,306 INFO MainThread:602807 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 1 +} +pusher_stats { + uploaded_bytes: 1405 + total_bytes: 1405 +} + +2021-07-14 23:26:07,408 INFO MainThread:602807 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 1405 + total_bytes: 10798 +} + +2021-07-14 23:26:07,515 INFO MainThread:602807 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 1405 + total_bytes: 10798 +} + +2021-07-14 23:26:07,617 INFO MainThread:602807 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 10798 + total_bytes: 10798 +} + +2021-07-14 23:26:07,719 INFO MainThread:602807 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 10798 + total_bytes: 10798 +} + +2021-07-14 23:26:07,821 INFO MainThread:602807 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 10798 + total_bytes: 10798 +} + +2021-07-14 23:26:07,923 INFO MainThread:602807 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 10798 + total_bytes: 10798 +} + +2021-07-14 23:26:08,025 INFO MainThread:602807 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 10798 + total_bytes: 10798 +} + +2021-07-14 23:26:08,361 INFO MainThread:602807 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 10798 + total_bytes: 10798 +} + +2021-07-14 23:26:08,463 INFO MainThread:602807 [wandb_run.py:_wait_for_finish():1715] got exit ret: done: true +exit_result { +} +file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 10798 + total_bytes: 10798 +} + +2021-07-14 23:26:09,799 INFO MainThread:602807 [wandb_run.py:_show_files():1937] logging synced files diff --git a/wandb/run-20210714_231147-gkn68kcy/run-gkn68kcy.wandb b/wandb/run-20210714_231147-gkn68kcy/run-gkn68kcy.wandb new file mode 100644 index 0000000000000000000000000000000000000000..18818820199077993d796d9d7aa72557c4579b79 Binary files /dev/null and b/wandb/run-20210714_231147-gkn68kcy/run-gkn68kcy.wandb differ diff --git a/wandb/run-20210714_232703-1jijl27o/files/config.yaml b/wandb/run-20210714_232703-1jijl27o/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..efb76c886c937947ebe33aeec93a64ff10926927 --- /dev/null +++ b/wandb/run-20210714_232703-1jijl27o/files/config.yaml @@ -0,0 +1,307 @@ +wandb_version: 1 + +_wandb: + desc: null + value: + cli_version: 0.10.33 + framework: huggingface + huggingface_version: 4.9.0.dev0 + is_jupyter_run: false + is_kaggle_kernel: false + python_version: 3.8.10 + t: + 1: + - 3 + - 11 + 2: + - 3 + - 11 + 4: 3.8.10 + 5: 0.10.33 + 6: 4.9.0.dev0 + 8: + - 5 +adafactor: + desc: null + value: false +adam_beta1: + desc: null + value: 0.9 +adam_beta2: + desc: null + value: 0.98 +adam_epsilon: + desc: null + value: 1.0e-08 +cache_dir: + desc: null + value: null +config_name: + desc: null + value: ./ +dataloader_drop_last: + desc: null + value: false +dataloader_num_workers: + desc: null + value: 0 +dataloader_pin_memory: + desc: null + value: true +dataset_config_name: + desc: null + value: null +dataset_name: + desc: null + value: null +ddp_find_unused_parameters: + desc: null + value: null +debug: + desc: null + value: [] +deepspeed: + desc: null + value: null +disable_tqdm: + desc: null + value: false +do_eval: + desc: null + value: false +do_predict: + desc: null + value: false +do_train: + desc: null + value: false +dtype: + desc: null + value: float32 +eval_accumulation_steps: + desc: null + value: null +eval_steps: + desc: null + value: 20000 +evaluation_strategy: + desc: null + value: IntervalStrategy.NO +fp16: + desc: null + value: false +fp16_backend: + desc: null + value: auto +fp16_full_eval: + desc: null + value: false +fp16_opt_level: + desc: null + value: O1 +gradient_accumulation_steps: + desc: null + value: 4 +greater_is_better: + desc: null + value: null +group_by_length: + desc: null + value: false +ignore_data_skip: + desc: null + value: false +label_names: + desc: null + value: null +label_smoothing_factor: + desc: null + value: 0.0 +learning_rate: + desc: null + value: 5.0e-05 +length_column_name: + desc: null + value: length +line_by_line: + desc: null + value: false +load_best_model_at_end: + desc: null + value: false +local_rank: + desc: null + value: -1 +log_level: + desc: null + value: -1 +log_level_replica: + desc: null + value: -1 +log_on_each_node: + desc: null + value: true +logging_dir: + desc: null + value: ./runs/Jul14_23-26-56_t1v-n-f5c06ea1-w-0 +logging_first_step: + desc: null + value: false +logging_steps: + desc: null + value: 500 +logging_strategy: + desc: null + value: IntervalStrategy.STEPS +lr_scheduler_type: + desc: null + value: SchedulerType.LINEAR +max_eval_samples: + desc: null + value: 2000 +max_grad_norm: + desc: null + value: 1.0 +max_seq_length: + desc: null + value: 4096 +max_steps: + desc: null + value: -1 +metric_for_best_model: + desc: null + value: null +mlm_probability: + desc: null + value: 0.15 +model_name_or_path: + desc: null + value: null +model_type: + desc: null + value: big_bird +mp_parameters: + desc: null + value: '' +no_cuda: + desc: null + value: false +num_train_epochs: + desc: null + value: 5.0 +output_dir: + desc: null + value: ./ +overwrite_cache: + desc: null + value: false +overwrite_output_dir: + desc: null + value: true +pad_to_max_length: + desc: null + value: false +past_index: + desc: null + value: -1 +per_device_eval_batch_size: + desc: null + value: 1 +per_device_train_batch_size: + desc: null + value: 1 +per_gpu_eval_batch_size: + desc: null + value: null +per_gpu_train_batch_size: + desc: null + value: null +prediction_loss_only: + desc: null + value: false +preprocessing_num_workers: + desc: null + value: 96 +push_to_hub: + desc: null + value: true +push_to_hub_model_id: + desc: null + value: '' +push_to_hub_organization: + desc: null + value: null +push_to_hub_token: + desc: null + value: null +remove_unused_columns: + desc: null + value: true +report_to: + desc: null + value: + - tensorboard + - wandb +resume_from_checkpoint: + desc: null + value: null +run_name: + desc: null + value: ./ +save_on_each_node: + desc: null + value: false +save_optimizer: + desc: null + value: true +save_steps: + desc: null + value: 20000 +save_strategy: + desc: null + value: IntervalStrategy.STEPS +save_total_limit: + desc: null + value: 5 +seed: + desc: null + value: 42 +sharded_ddp: + desc: null + value: [] +skip_memory_metrics: + desc: null + value: true +tokenizer_name: + desc: null + value: ./ +tpu_metrics_debug: + desc: null + value: false +tpu_num_cores: + desc: null + value: null +train_ref_file: + desc: null + value: null +use_fast_tokenizer: + desc: null + value: true +use_legacy_prediction_loop: + desc: null + value: false +validation_ref_file: + desc: null + value: null +validation_split_percentage: + desc: null + value: 5 +warmup_ratio: + desc: null + value: 0.0 +warmup_steps: + desc: null + value: 10000 +weight_decay: + desc: null + value: 0.0095 diff --git a/wandb/run-20210714_232703-1jijl27o/files/output.log b/wandb/run-20210714_232703-1jijl27o/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..840db0780b0c1343c0408f34f6b172180306d50b --- /dev/null +++ b/wandb/run-20210714_232703-1jijl27o/files/output.log @@ -0,0 +1,34 @@ +/home/dat/pino/lib/python3.8/site-packages/jax/_src/numpy/lax_numpy.py:3132: UserWarning: Explicitly requested dtype requested in zeros is not available, and will be truncated to dtype int32. To enable more dtypes, set the jax_enable_x64 configuration option or the JAX_ENABLE_X64 shell environment variable. See https://github.com/google/jax#current-gotchas for more. + lax._check_user_dtype_supported(dtype, "zeros") +/home/dat/pino/lib/python3.8/site-packages/jax/lib/xla_bridge.py:386: UserWarning: jax.host_count has been renamed to jax.process_count. This alias will eventually be removed; please update your code. + warnings.warn( +/home/dat/pino/lib/python3.8/site-packages/jax/lib/xla_bridge.py:373: UserWarning: jax.host_id has been renamed to jax.process_index. This alias will eventually be removed; please update your code. + warnings.warn( +Epoch ... (1/5): 0%| | 0/5 [00:00 + train_batch_idx = generate_batch_splits(train_samples_idx, train_batch_size // grad_accum_steps) + File "./run_mlm_flax.py", line 263, in generate_batch_splits + batch_idx = np.split(samples_idx, sections_split) + File "<__array_function__ internals>", line 5, in split + File "/home/dat/pino/lib/python3.8/site-packages/numpy/lib/shape_base.py", line 874, in split + return array_split(ary, indices_or_sections, axis) + File "<__array_function__ internals>", line 5, in array_split + File "/home/dat/pino/lib/python3.8/site-packages/numpy/lib/shape_base.py", line 790, in array_split + sub_arys.append(_nx.swapaxes(sary[st:end], axis, 0)) + File "/home/dat/pino/lib/python3.8/site-packages/jax/_src/numpy/lax_numpy.py", line 5009, in _rewriting_take + return _gather(arr, treedef, static_idx, dynamic_idx, indices_are_sorted, + File "/home/dat/pino/lib/python3.8/site-packages/jax/_src/numpy/lax_numpy.py", line 5028, in _gather + y = lax.gather( + File "/home/dat/pino/lib/python3.8/site-packages/jax/_src/lax/lax.py", line 984, in gather + return gather_p.bind( + File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 264, in bind + out = top_trace.process_primitive(self, tracers, params) + File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 603, in process_primitive + return primitive.impl(*tracers, **params) + File "/home/dat/pino/lib/python3.8/site-packages/jax/interpreters/xla.py", line 249, in apply_primitive + return compiled_fun(*args) + File "/home/dat/pino/lib/python3.8/site-packages/jax/interpreters/xla.py", line 365, in _execute_compiled_primitive + out_bufs = compiled.execute(input_bufs) +RuntimeError: Resource exhausted: Attempting to allocate 17.0K. That was not possible. There are 48.0K free. Due to fragmentation, the largest contiguous region of free memory is 16.0K.; (0x0x0_HBM0) \ No newline at end of file diff --git a/wandb/run-20210714_232703-1jijl27o/files/requirements.txt b/wandb/run-20210714_232703-1jijl27o/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..554e3a34bc91714a8462f65295a12e9a04537637 --- /dev/null +++ b/wandb/run-20210714_232703-1jijl27o/files/requirements.txt @@ -0,0 +1,94 @@ +absl-py==0.13.0 +aiohttp==3.7.4.post0 +astunparse==1.6.3 +async-timeout==3.0.1 +attrs==21.2.0 +cachetools==4.2.2 +certifi==2021.5.30 +chardet==4.0.0 +charset-normalizer==2.0.1 +chex==0.0.8 +click==8.0.1 +configparser==5.0.2 +cycler==0.10.0 +datasets==1.9.1.dev0 +dill==0.3.4 +dm-tree==0.1.6 +docker-pycreds==0.4.0 +filelock==3.0.12 +flatbuffers==1.12 +flax==0.3.4 +fsspec==2021.7.0 +gast==0.4.0 +gitdb==4.0.7 +gitpython==3.1.18 +google-auth-oauthlib==0.4.4 +google-auth==1.32.1 +google-pasta==0.2.0 +grpcio==1.34.1 +h5py==3.1.0 +huggingface-hub==0.0.12 +idna==3.2 +install==1.3.4 +jax==0.2.17 +jaxlib==0.1.68 +joblib==1.0.1 +keras-nightly==2.5.0.dev2021032900 +keras-preprocessing==1.1.2 +kiwisolver==1.3.1 +libtpu-nightly==0.1.dev20210615 +markdown==3.3.4 +matplotlib==3.4.2 +msgpack==1.0.2 +multidict==5.1.0 +multiprocess==0.70.12.2 +numpy==1.19.5 +oauthlib==3.1.1 +opt-einsum==3.3.0 +optax==0.0.9 +packaging==21.0 +pandas==1.3.0 +pathtools==0.1.2 +pillow==8.3.1 +pip==20.0.2 +pkg-resources==0.0.0 +promise==2.3 +protobuf==3.17.3 +psutil==5.8.0 +pyarrow==4.0.1 +pyasn1-modules==0.2.8 +pyasn1==0.4.8 +pyparsing==2.4.7 +python-dateutil==2.8.1 +pytz==2021.1 +pyyaml==5.4.1 +regex==2021.7.6 +requests-oauthlib==1.3.0 +requests==2.26.0 +rsa==4.7.2 +sacremoses==0.0.45 +scipy==1.7.0 +sentry-sdk==1.3.0 +setuptools==44.0.0 +shortuuid==1.0.1 +six==1.15.0 +smmap==4.0.0 +subprocess32==3.5.4 +tensorboard-data-server==0.6.1 +tensorboard-plugin-wit==1.8.0 +tensorboard==2.5.0 +tensorflow-estimator==2.5.0 +tensorflow==2.5.0 +termcolor==1.1.0 +tokenizers==0.10.3 +toolz==0.11.1 +tqdm==4.61.2 +transformers==4.9.0.dev0 +typing-extensions==3.7.4.3 +urllib3==1.26.6 +wandb==0.10.33 +werkzeug==2.0.1 +wheel==0.36.2 +wrapt==1.12.1 +xxhash==2.0.2 +yarl==1.6.3 \ No newline at end of file diff --git a/wandb/run-20210714_232703-1jijl27o/files/wandb-metadata.json b/wandb/run-20210714_232703-1jijl27o/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..b60dce80b8b97f477ca26dd59a1294767cbf5c7a --- /dev/null +++ b/wandb/run-20210714_232703-1jijl27o/files/wandb-metadata.json @@ -0,0 +1,47 @@ +{ + "os": "Linux-5.4.0-1043-gcp-x86_64-with-glibc2.29", + "python": "3.8.10", + "heartbeatAt": "2021-07-14T23:27:05.989594", + "startedAt": "2021-07-14T23:27:03.993085", + "docker": null, + "cpu_count": 96, + "cuda": null, + "args": [ + "--push_to_hub", + "--output_dir=./", + "--model_type=big_bird", + "--config_name=./", + "--tokenizer_name=./", + "--max_seq_length=4096", + "--weight_decay=0.0095", + "--warmup_steps=10000", + "--overwrite_output_dir", + "--adam_beta1=0.9", + "--adam_beta2=0.98", + "--logging_steps=500", + "--eval_steps=20000", + "--num_train_epochs=5", + "--preprocessing_num_workers=96", + "--save_steps=20000", + "--learning_rate=5e-5", + "--per_device_train_batch_size=1", + "--per_device_eval_batch_size=1", + "--save_total_limit=5", + "--max_eval_samples=2000", + "--overwrite_cache", + "False", + "--gradient_accumulation_steps=4" + ], + "state": "running", + "program": "./run_mlm_flax.py", + "codePath": "run_mlm_flax.py", + "git": { + "remote": "https://huggingface.co/flax-community/pino-roberta-base", + "commit": "9915204ae8501d6ae8425aa8f0b4b290cbc7e5b6" + }, + "email": null, + "root": "/home/dat/pino-roberta-base", + "host": "t1v-n-f5c06ea1-w-0", + "username": "dat", + "executable": "/home/dat/pino/bin/python" +} diff --git a/wandb/run-20210714_232703-1jijl27o/files/wandb-summary.json b/wandb/run-20210714_232703-1jijl27o/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/wandb/run-20210714_232703-1jijl27o/files/wandb-summary.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/wandb/run-20210714_232703-1jijl27o/logs/debug-internal.log b/wandb/run-20210714_232703-1jijl27o/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..8688bf68395f2c44030ccbfed19c8f1011bd1369 --- /dev/null +++ b/wandb/run-20210714_232703-1jijl27o/logs/debug-internal.log @@ -0,0 +1,275 @@ +2021-07-14 23:27:04,669 INFO MainThread:606786 [internal.py:wandb_internal():88] W&B internal server running at pid: 606786, started at: 2021-07-14 23:27:04.669257 +2021-07-14 23:27:04,671 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: check_version +2021-07-14 23:27:04,671 INFO WriterThread:606786 [datastore.py:open_for_write():80] open: /home/dat/pino-roberta-base/wandb/run-20210714_232703-1jijl27o/run-1jijl27o.wandb +2021-07-14 23:27:04,672 DEBUG SenderThread:606786 [sender.py:send():179] send: header +2021-07-14 23:27:04,672 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: check_version +2021-07-14 23:27:04,709 DEBUG SenderThread:606786 [sender.py:send():179] send: run +2021-07-14 23:27:04,879 INFO SenderThread:606786 [dir_watcher.py:__init__():168] watching files in: /home/dat/pino-roberta-base/wandb/run-20210714_232703-1jijl27o/files +2021-07-14 23:27:04,880 INFO SenderThread:606786 [sender.py:_start_run_threads():716] run started: 1jijl27o with start time 1626305224 +2021-07-14 23:27:04,880 DEBUG SenderThread:606786 [sender.py:send():179] send: summary +2021-07-14 23:27:04,880 INFO SenderThread:606786 [sender.py:_save_file():841] saving file wandb-summary.json with policy end +2021-07-14 23:27:04,880 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: run_start +2021-07-14 23:27:05,883 INFO Thread-8 :606786 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210714_232703-1jijl27o/files/wandb-summary.json +2021-07-14 23:27:05,989 DEBUG HandlerThread:606786 [meta.py:__init__():39] meta init +2021-07-14 23:27:05,989 DEBUG HandlerThread:606786 [meta.py:__init__():53] meta init done +2021-07-14 23:27:05,989 DEBUG HandlerThread:606786 [meta.py:probe():210] probe +2021-07-14 23:27:05,990 DEBUG HandlerThread:606786 [meta.py:_setup_git():200] setup git +2021-07-14 23:27:06,020 DEBUG HandlerThread:606786 [meta.py:_setup_git():207] setup git done +2021-07-14 23:27:06,020 DEBUG HandlerThread:606786 [meta.py:_save_pip():57] save pip +2021-07-14 23:27:06,021 DEBUG HandlerThread:606786 [meta.py:_save_pip():71] save pip done +2021-07-14 23:27:06,021 DEBUG HandlerThread:606786 [meta.py:probe():252] probe done +2021-07-14 23:27:06,024 DEBUG SenderThread:606786 [sender.py:send():179] send: files +2021-07-14 23:27:06,024 INFO SenderThread:606786 [sender.py:_save_file():841] saving file wandb-metadata.json with policy now +2021-07-14 23:27:06,032 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:27:06,033 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:27:06,166 DEBUG SenderThread:606786 [sender.py:send():179] send: config +2021-07-14 23:27:06,166 DEBUG SenderThread:606786 [sender.py:send():179] send: config +2021-07-14 23:27:06,166 DEBUG SenderThread:606786 [sender.py:send():179] send: config +2021-07-14 23:27:06,504 INFO Thread-11 :606786 [upload_job.py:push():137] Uploaded file /tmp/tmpf02i07o_wandb/gludn8x0-wandb-metadata.json +2021-07-14 23:27:06,881 INFO Thread-8 :606786 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210714_232703-1jijl27o/files/requirements.txt +2021-07-14 23:27:06,881 INFO Thread-8 :606786 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210714_232703-1jijl27o/files/wandb-metadata.json +2021-07-14 23:27:06,881 INFO Thread-8 :606786 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210714_232703-1jijl27o/files/output.log +2021-07-14 23:27:20,887 INFO Thread-8 :606786 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_232703-1jijl27o/files/output.log +2021-07-14 23:27:21,168 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:27:21,168 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:27:22,888 INFO Thread-8 :606786 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_232703-1jijl27o/files/output.log +2021-07-14 23:27:34,069 DEBUG SenderThread:606786 [sender.py:send():179] send: stats +2021-07-14 23:27:35,894 INFO Thread-8 :606786 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_232703-1jijl27o/files/config.yaml +2021-07-14 23:27:36,301 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:27:36,302 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:27:51,435 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:27:51,435 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:28:04,145 DEBUG SenderThread:606786 [sender.py:send():179] send: stats +2021-07-14 23:28:06,571 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:28:06,571 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:28:21,704 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:28:21,704 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:28:34,210 DEBUG SenderThread:606786 [sender.py:send():179] send: stats +2021-07-14 23:28:36,838 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:28:36,838 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:28:51,969 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:28:51,970 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:29:04,276 DEBUG SenderThread:606786 [sender.py:send():179] send: stats +2021-07-14 23:29:11,606 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:29:11,606 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:29:26,738 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:29:26,739 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:29:34,341 DEBUG SenderThread:606786 [sender.py:send():179] send: stats +2021-07-14 23:29:41,875 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:29:41,875 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:29:57,008 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:29:57,009 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:30:04,407 DEBUG SenderThread:606786 [sender.py:send():179] send: stats +2021-07-14 23:30:12,141 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:30:12,142 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:30:27,275 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:30:27,276 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:30:34,475 DEBUG SenderThread:606786 [sender.py:send():179] send: stats +2021-07-14 23:30:42,407 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:30:42,407 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:30:57,540 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:30:57,541 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:31:04,540 DEBUG SenderThread:606786 [sender.py:send():179] send: stats +2021-07-14 23:31:12,674 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:31:12,674 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:31:27,806 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:31:27,807 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:31:34,612 DEBUG SenderThread:606786 [sender.py:send():179] send: stats +2021-07-14 23:31:42,940 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:31:42,940 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:31:58,074 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:31:58,075 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:32:04,688 DEBUG SenderThread:606786 [sender.py:send():179] send: stats +2021-07-14 23:32:13,208 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:32:13,208 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:32:28,339 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:32:28,340 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:32:34,767 DEBUG SenderThread:606786 [sender.py:send():179] send: stats +2021-07-14 23:32:43,471 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:32:43,472 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:32:58,604 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:32:58,604 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:33:04,843 DEBUG SenderThread:606786 [sender.py:send():179] send: stats +2021-07-14 23:33:13,739 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:33:13,739 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:33:28,873 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:33:28,874 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:33:34,917 DEBUG SenderThread:606786 [sender.py:send():179] send: stats +2021-07-14 23:33:44,007 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:33:44,007 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:33:59,140 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:33:59,141 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:34:04,989 DEBUG SenderThread:606786 [sender.py:send():179] send: stats +2021-07-14 23:34:14,274 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:34:14,275 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:34:29,406 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:34:29,406 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:34:35,063 DEBUG SenderThread:606786 [sender.py:send():179] send: stats +2021-07-14 23:34:44,538 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:34:44,538 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:34:59,670 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:34:59,671 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:35:05,136 DEBUG SenderThread:606786 [sender.py:send():179] send: stats +2021-07-14 23:35:14,806 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:35:14,806 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:35:29,937 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:35:29,938 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:35:35,212 DEBUG SenderThread:606786 [sender.py:send():179] send: stats +2021-07-14 23:35:45,083 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:35:45,084 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:36:00,215 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:36:00,215 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:36:05,289 DEBUG SenderThread:606786 [sender.py:send():179] send: stats +2021-07-14 23:36:15,359 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:36:15,359 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:36:30,491 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:36:30,492 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:36:35,354 DEBUG SenderThread:606786 [sender.py:send():179] send: stats +2021-07-14 23:36:45,626 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:36:45,626 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:37:00,758 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:37:00,758 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:37:05,418 DEBUG SenderThread:606786 [sender.py:send():179] send: stats +2021-07-14 23:37:15,896 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:37:15,896 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:37:31,030 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:37:31,031 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:37:35,484 DEBUG SenderThread:606786 [sender.py:send():179] send: stats +2021-07-14 23:37:46,162 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:37:46,162 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:38:01,293 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:38:01,294 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:38:05,552 DEBUG SenderThread:606786 [sender.py:send():179] send: stats +2021-07-14 23:38:16,425 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:38:16,425 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:38:31,557 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:38:31,557 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:38:35,624 DEBUG SenderThread:606786 [sender.py:send():179] send: stats +2021-07-14 23:38:46,691 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:38:46,691 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:39:01,823 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:39:01,824 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:39:05,695 DEBUG SenderThread:606786 [sender.py:send():179] send: stats +2021-07-14 23:39:16,955 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:39:16,955 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:39:32,087 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:39:32,088 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:39:35,769 DEBUG SenderThread:606786 [sender.py:send():179] send: stats +2021-07-14 23:39:47,220 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:39:47,221 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:40:02,350 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:40:02,351 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:40:05,843 DEBUG SenderThread:606786 [sender.py:send():179] send: stats +2021-07-14 23:40:17,481 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:40:17,482 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:40:32,615 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:40:32,616 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:40:35,918 DEBUG SenderThread:606786 [sender.py:send():179] send: stats +2021-07-14 23:40:47,205 INFO Thread-8 :606786 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_232703-1jijl27o/files/output.log +2021-07-14 23:40:48,363 DEBUG SenderThread:606786 [sender.py:send():179] send: telemetry +2021-07-14 23:40:48,364 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-14 23:40:48,364 DEBUG SenderThread:606786 [sender.py:send():179] send: exit +2021-07-14 23:40:48,364 INFO SenderThread:606786 [sender.py:send_exit():287] handling exit code: 1 +2021-07-14 23:40:48,366 INFO SenderThread:606786 [sender.py:send_exit():295] send defer +2021-07-14 23:40:48,366 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: poll_exit +2021-07-14 23:40:48,367 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: defer +2021-07-14 23:40:48,367 INFO HandlerThread:606786 [handler.py:handle_request_defer():141] handle defer: 0 +2021-07-14 23:40:48,367 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: defer +2021-07-14 23:40:48,367 INFO SenderThread:606786 [sender.py:send_request_defer():304] handle sender defer: 0 +2021-07-14 23:40:48,367 INFO SenderThread:606786 [sender.py:transition_state():308] send defer: 1 +2021-07-14 23:40:48,368 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: defer +2021-07-14 23:40:48,368 INFO HandlerThread:606786 [handler.py:handle_request_defer():141] handle defer: 1 +2021-07-14 23:40:48,446 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: defer +2021-07-14 23:40:48,446 INFO SenderThread:606786 [sender.py:send_request_defer():304] handle sender defer: 1 +2021-07-14 23:40:48,446 INFO SenderThread:606786 [sender.py:transition_state():308] send defer: 2 +2021-07-14 23:40:48,446 DEBUG SenderThread:606786 [sender.py:send():179] send: stats +2021-07-14 23:40:48,447 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: defer +2021-07-14 23:40:48,447 INFO HandlerThread:606786 [handler.py:handle_request_defer():141] handle defer: 2 +2021-07-14 23:40:48,447 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: defer +2021-07-14 23:40:48,447 INFO SenderThread:606786 [sender.py:send_request_defer():304] handle sender defer: 2 +2021-07-14 23:40:48,447 INFO SenderThread:606786 [sender.py:transition_state():308] send defer: 3 +2021-07-14 23:40:48,447 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: defer +2021-07-14 23:40:48,448 INFO HandlerThread:606786 [handler.py:handle_request_defer():141] handle defer: 3 +2021-07-14 23:40:48,448 DEBUG SenderThread:606786 [sender.py:send():179] send: summary +2021-07-14 23:40:48,448 INFO SenderThread:606786 [sender.py:_save_file():841] saving file wandb-summary.json with policy end +2021-07-14 23:40:48,449 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: defer +2021-07-14 23:40:48,449 INFO SenderThread:606786 [sender.py:send_request_defer():304] handle sender defer: 3 +2021-07-14 23:40:48,449 INFO SenderThread:606786 [sender.py:transition_state():308] send defer: 4 +2021-07-14 23:40:48,449 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: defer +2021-07-14 23:40:48,449 INFO HandlerThread:606786 [handler.py:handle_request_defer():141] handle defer: 4 +2021-07-14 23:40:48,449 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: defer +2021-07-14 23:40:48,449 INFO SenderThread:606786 [sender.py:send_request_defer():304] handle sender defer: 4 +2021-07-14 23:40:48,469 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-14 23:40:48,629 INFO SenderThread:606786 [sender.py:transition_state():308] send defer: 5 +2021-07-14 23:40:48,629 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: poll_exit +2021-07-14 23:40:48,630 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: defer +2021-07-14 23:40:48,630 INFO HandlerThread:606786 [handler.py:handle_request_defer():141] handle defer: 5 +2021-07-14 23:40:48,630 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: defer +2021-07-14 23:40:48,630 INFO SenderThread:606786 [sender.py:send_request_defer():304] handle sender defer: 5 +2021-07-14 23:40:48,630 INFO SenderThread:606786 [dir_watcher.py:finish():282] shutting down directory watcher +2021-07-14 23:40:48,732 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-14 23:40:49,206 INFO Thread-8 :606786 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_232703-1jijl27o/files/config.yaml +2021-07-14 23:40:49,207 INFO SenderThread:606786 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_232703-1jijl27o/files/output.log +2021-07-14 23:40:49,207 INFO SenderThread:606786 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_232703-1jijl27o/files/wandb-summary.json +2021-07-14 23:40:49,207 INFO SenderThread:606786 [dir_watcher.py:finish():312] scan: /home/dat/pino-roberta-base/wandb/run-20210714_232703-1jijl27o/files +2021-07-14 23:40:49,207 INFO SenderThread:606786 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210714_232703-1jijl27o/files/requirements.txt requirements.txt +2021-07-14 23:40:49,208 INFO SenderThread:606786 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210714_232703-1jijl27o/files/output.log output.log +2021-07-14 23:40:49,208 INFO SenderThread:606786 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210714_232703-1jijl27o/files/wandb-metadata.json wandb-metadata.json +2021-07-14 23:40:49,208 INFO SenderThread:606786 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210714_232703-1jijl27o/files/config.yaml config.yaml +2021-07-14 23:40:49,208 INFO SenderThread:606786 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210714_232703-1jijl27o/files/wandb-summary.json wandb-summary.json +2021-07-14 23:40:49,209 INFO SenderThread:606786 [sender.py:transition_state():308] send defer: 6 +2021-07-14 23:40:49,209 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: poll_exit +2021-07-14 23:40:49,210 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: defer +2021-07-14 23:40:49,210 INFO HandlerThread:606786 [handler.py:handle_request_defer():141] handle defer: 6 +2021-07-14 23:40:49,215 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: defer +2021-07-14 23:40:49,216 INFO SenderThread:606786 [sender.py:send_request_defer():304] handle sender defer: 6 +2021-07-14 23:40:49,216 INFO SenderThread:606786 [file_pusher.py:finish():177] shutting down file pusher +2021-07-14 23:40:49,311 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-14 23:40:49,311 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: poll_exit +2021-07-14 23:40:49,413 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-14 23:40:49,413 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: poll_exit +2021-07-14 23:40:49,515 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-14 23:40:49,515 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: poll_exit +2021-07-14 23:40:49,617 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-14 23:40:49,618 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: poll_exit +2021-07-14 23:40:49,652 INFO Thread-12 :606786 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210714_232703-1jijl27o/files/requirements.txt +2021-07-14 23:40:49,653 INFO Thread-14 :606786 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210714_232703-1jijl27o/files/config.yaml +2021-07-14 23:40:49,698 INFO Thread-15 :606786 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210714_232703-1jijl27o/files/wandb-summary.json +2021-07-14 23:40:49,719 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-14 23:40:49,720 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: poll_exit +2021-07-14 23:40:49,739 INFO Thread-13 :606786 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210714_232703-1jijl27o/files/output.log +2021-07-14 23:40:49,821 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-14 23:40:49,822 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: poll_exit +2021-07-14 23:40:49,923 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-14 23:40:49,923 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: poll_exit +2021-07-14 23:40:49,940 INFO Thread-7 :606786 [sender.py:transition_state():308] send defer: 7 +2021-07-14 23:40:49,940 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: defer +2021-07-14 23:40:49,940 INFO HandlerThread:606786 [handler.py:handle_request_defer():141] handle defer: 7 +2021-07-14 23:40:49,940 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: defer +2021-07-14 23:40:49,940 INFO SenderThread:606786 [sender.py:send_request_defer():304] handle sender defer: 7 +2021-07-14 23:40:50,025 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-14 23:40:50,227 INFO SenderThread:606786 [sender.py:transition_state():308] send defer: 8 +2021-07-14 23:40:50,228 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: poll_exit +2021-07-14 23:40:50,228 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: defer +2021-07-14 23:40:50,228 INFO HandlerThread:606786 [handler.py:handle_request_defer():141] handle defer: 8 +2021-07-14 23:40:50,229 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: defer +2021-07-14 23:40:50,229 INFO SenderThread:606786 [sender.py:send_request_defer():304] handle sender defer: 8 +2021-07-14 23:40:50,229 INFO SenderThread:606786 [sender.py:transition_state():308] send defer: 9 +2021-07-14 23:40:50,229 DEBUG SenderThread:606786 [sender.py:send():179] send: final +2021-07-14 23:40:50,229 DEBUG SenderThread:606786 [sender.py:send():179] send: footer +2021-07-14 23:40:50,230 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: defer +2021-07-14 23:40:50,230 INFO HandlerThread:606786 [handler.py:handle_request_defer():141] handle defer: 9 +2021-07-14 23:40:50,230 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: defer +2021-07-14 23:40:50,230 INFO SenderThread:606786 [sender.py:send_request_defer():304] handle sender defer: 9 +2021-07-14 23:40:50,330 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-14 23:40:50,330 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: poll_exit +2021-07-14 23:40:50,330 INFO SenderThread:606786 [file_pusher.py:join():182] waiting for file pusher +2021-07-14 23:40:50,332 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: get_summary +2021-07-14 23:40:50,332 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: sampled_history +2021-07-14 23:40:50,333 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: shutdown +2021-07-14 23:40:50,333 INFO HandlerThread:606786 [handler.py:finish():638] shutting down handler +2021-07-14 23:40:51,230 INFO WriterThread:606786 [datastore.py:close():288] close: /home/dat/pino-roberta-base/wandb/run-20210714_232703-1jijl27o/run-1jijl27o.wandb +2021-07-14 23:40:51,330 INFO SenderThread:606786 [sender.py:finish():945] shutting down sender +2021-07-14 23:40:51,331 INFO SenderThread:606786 [file_pusher.py:finish():177] shutting down file pusher +2021-07-14 23:40:51,331 INFO SenderThread:606786 [file_pusher.py:join():182] waiting for file pusher +2021-07-14 23:40:51,334 INFO MainThread:606786 [internal.py:handle_exit():78] Internal process exited diff --git a/wandb/run-20210714_232703-1jijl27o/logs/debug.log b/wandb/run-20210714_232703-1jijl27o/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..16ec8f83282dd8492e1da2d09a51b0d338e9bb77 --- /dev/null +++ b/wandb/run-20210714_232703-1jijl27o/logs/debug.log @@ -0,0 +1,127 @@ +2021-07-14 23:27:03,994 INFO MainThread:605532 [wandb_setup.py:_flush():69] setting env: {} +2021-07-14 23:27:03,994 INFO MainThread:605532 [wandb_setup.py:_flush():69] setting login settings: {} +2021-07-14 23:27:03,994 INFO MainThread:605532 [wandb_init.py:_log_setup():337] Logging user logs to /home/dat/pino-roberta-base/wandb/run-20210714_232703-1jijl27o/logs/debug.log +2021-07-14 23:27:03,994 INFO MainThread:605532 [wandb_init.py:_log_setup():338] Logging internal logs to /home/dat/pino-roberta-base/wandb/run-20210714_232703-1jijl27o/logs/debug-internal.log +2021-07-14 23:27:03,995 INFO MainThread:605532 [wandb_init.py:init():370] calling init triggers +2021-07-14 23:27:03,995 INFO MainThread:605532 [wandb_init.py:init():375] wandb.init called with sweep_config: {} +config: {} +2021-07-14 23:27:03,995 INFO MainThread:605532 [wandb_init.py:init():419] starting backend +2021-07-14 23:27:03,995 INFO MainThread:605532 [backend.py:_multiprocessing_setup():70] multiprocessing start_methods=fork,spawn,forkserver, using: spawn +2021-07-14 23:27:04,041 INFO MainThread:605532 [backend.py:ensure_launched():135] starting backend process... +2021-07-14 23:27:04,085 INFO MainThread:605532 [backend.py:ensure_launched():139] started backend process with pid: 606786 +2021-07-14 23:27:04,087 INFO MainThread:605532 [wandb_init.py:init():424] backend started and connected +2021-07-14 23:27:04,090 INFO MainThread:605532 [wandb_init.py:init():472] updated telemetry +2021-07-14 23:27:04,091 INFO MainThread:605532 [wandb_init.py:init():491] communicating current version +2021-07-14 23:27:04,708 INFO MainThread:605532 [wandb_init.py:init():496] got version response +2021-07-14 23:27:04,708 INFO MainThread:605532 [wandb_init.py:init():504] communicating run to backend with 30 second timeout +2021-07-14 23:27:04,879 INFO MainThread:605532 [wandb_init.py:init():529] starting run threads in backend +2021-07-14 23:27:06,027 INFO MainThread:605532 [wandb_run.py:_console_start():1623] atexit reg +2021-07-14 23:27:06,028 INFO MainThread:605532 [wandb_run.py:_redirect():1497] redirect: SettingsConsole.REDIRECT +2021-07-14 23:27:06,028 INFO MainThread:605532 [wandb_run.py:_redirect():1502] Redirecting console. +2021-07-14 23:27:06,030 INFO MainThread:605532 [wandb_run.py:_redirect():1558] Redirects installed. +2021-07-14 23:27:06,030 INFO MainThread:605532 [wandb_init.py:init():554] run started, returning control to user process +2021-07-14 23:27:06,038 INFO MainThread:605532 [wandb_run.py:_config_callback():872] config_cb None None {'output_dir': './', 'overwrite_output_dir': True, 'do_train': False, 'do_eval': False, 'do_predict': False, 'evaluation_strategy': 'IntervalStrategy.NO', 'prediction_loss_only': False, 'per_device_train_batch_size': 1, 'per_device_eval_batch_size': 1, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 4, 'eval_accumulation_steps': None, 'learning_rate': 5e-05, 'weight_decay': 0.0095, 'adam_beta1': 0.9, 'adam_beta2': 0.98, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 5.0, 'max_steps': -1, 'lr_scheduler_type': 'SchedulerType.LINEAR', 'warmup_ratio': 0.0, 'warmup_steps': 10000, 'log_level': -1, 'log_level_replica': -1, 'log_on_each_node': True, 'logging_dir': './runs/Jul14_23-26-56_t1v-n-f5c06ea1-w-0', 'logging_strategy': 'IntervalStrategy.STEPS', 'logging_first_step': False, 'logging_steps': 500, 'save_strategy': 'IntervalStrategy.STEPS', 'save_steps': 20000, 'save_total_limit': 5, 'save_on_each_node': False, 'no_cuda': False, 'seed': 42, 'fp16': False, 'fp16_opt_level': 'O1', 'fp16_backend': 'auto', 'fp16_full_eval': False, 'local_rank': -1, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 20000, 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': './', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'sharded_ddp': [], 'deepspeed': None, 'label_smoothing_factor': 0.0, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'dataloader_pin_memory': True, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'push_to_hub_model_id': '', 'push_to_hub_organization': None, 'push_to_hub_token': None, 'mp_parameters': ''} +2021-07-14 23:27:06,040 INFO MainThread:605532 [wandb_run.py:_config_callback():872] config_cb None None {'model_name_or_path': None, 'model_type': 'big_bird', 'config_name': './', 'tokenizer_name': './', 'cache_dir': None, 'use_fast_tokenizer': True, 'dtype': 'float32', 'save_optimizer': True} +2021-07-14 23:27:06,041 INFO MainThread:605532 [wandb_run.py:_config_callback():872] config_cb None None {'dataset_name': None, 'dataset_config_name': None, 'train_ref_file': None, 'validation_ref_file': None, 'overwrite_cache': False, 'validation_split_percentage': 5, 'max_seq_length': 4096, 'preprocessing_num_workers': 96, 'mlm_probability': 0.15, 'pad_to_max_length': False, 'line_by_line': False, 'max_eval_samples': 2000} +2021-07-14 23:40:46,049 INFO MainThread:605532 [wandb_run.py:_atexit_cleanup():1593] got exitcode: 1 +2021-07-14 23:40:46,050 INFO MainThread:605532 [wandb_run.py:_restore():1565] restore +2021-07-14 23:40:48,367 INFO MainThread:605532 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 1 +} +pusher_stats { + uploaded_bytes: 1448 + total_bytes: 1448 +} + +2021-07-14 23:40:48,630 INFO MainThread:605532 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 1 +} +pusher_stats { + uploaded_bytes: 1448 + total_bytes: 1448 +} + +2021-07-14 23:40:49,210 INFO MainThread:605532 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 4 +} +pusher_stats { + uploaded_bytes: 1448 + total_bytes: 11299 +} + +2021-07-14 23:40:49,312 INFO MainThread:605532 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 1448 + total_bytes: 11301 +} + +2021-07-14 23:40:49,414 INFO MainThread:605532 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 11301 + total_bytes: 11301 +} + +2021-07-14 23:40:49,516 INFO MainThread:605532 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 11301 + total_bytes: 11301 +} + +2021-07-14 23:40:49,618 INFO MainThread:605532 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 11301 + total_bytes: 11301 +} + +2021-07-14 23:40:49,720 INFO MainThread:605532 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 11301 + total_bytes: 11301 +} + +2021-07-14 23:40:49,822 INFO MainThread:605532 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 11301 + total_bytes: 11301 +} + +2021-07-14 23:40:49,924 INFO MainThread:605532 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 11301 + total_bytes: 11301 +} + +2021-07-14 23:40:50,228 INFO MainThread:605532 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 11301 + total_bytes: 11301 +} + +2021-07-14 23:40:50,331 INFO MainThread:605532 [wandb_run.py:_wait_for_finish():1715] got exit ret: done: true +exit_result { +} +file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 11301 + total_bytes: 11301 +} + +2021-07-14 23:40:51,653 INFO MainThread:605532 [wandb_run.py:_show_files():1937] logging synced files diff --git a/wandb/run-20210714_232703-1jijl27o/run-1jijl27o.wandb b/wandb/run-20210714_232703-1jijl27o/run-1jijl27o.wandb new file mode 100644 index 0000000000000000000000000000000000000000..1da37d0929ce91666027ad8f38adffbb8f2072ba Binary files /dev/null and b/wandb/run-20210714_232703-1jijl27o/run-1jijl27o.wandb differ diff --git a/wandb/run-20210714_234615-3p6vlfc3/files/config.yaml b/wandb/run-20210714_234615-3p6vlfc3/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e4962f78e8ce19dbcb99a72507d323ed73b111df --- /dev/null +++ b/wandb/run-20210714_234615-3p6vlfc3/files/config.yaml @@ -0,0 +1,304 @@ +wandb_version: 1 + +_wandb: + desc: null + value: + cli_version: 0.10.33 + framework: huggingface + huggingface_version: 4.9.0.dev0 + is_jupyter_run: false + is_kaggle_kernel: false + python_version: 3.8.10 + t: + 1: + - 3 + - 11 + 4: 3.8.10 + 5: 0.10.33 + 6: 4.9.0.dev0 + 8: + - 5 +adafactor: + desc: null + value: false +adam_beta1: + desc: null + value: 0.9 +adam_beta2: + desc: null + value: 0.98 +adam_epsilon: + desc: null + value: 1.0e-08 +cache_dir: + desc: null + value: null +config_name: + desc: null + value: ./ +dataloader_drop_last: + desc: null + value: false +dataloader_num_workers: + desc: null + value: 0 +dataloader_pin_memory: + desc: null + value: true +dataset_config_name: + desc: null + value: null +dataset_name: + desc: null + value: null +ddp_find_unused_parameters: + desc: null + value: null +debug: + desc: null + value: [] +deepspeed: + desc: null + value: null +disable_tqdm: + desc: null + value: false +do_eval: + desc: null + value: false +do_predict: + desc: null + value: false +do_train: + desc: null + value: false +dtype: + desc: null + value: float32 +eval_accumulation_steps: + desc: null + value: null +eval_steps: + desc: null + value: 20000 +evaluation_strategy: + desc: null + value: IntervalStrategy.NO +fp16: + desc: null + value: false +fp16_backend: + desc: null + value: auto +fp16_full_eval: + desc: null + value: false +fp16_opt_level: + desc: null + value: O1 +gradient_accumulation_steps: + desc: null + value: 4 +greater_is_better: + desc: null + value: null +group_by_length: + desc: null + value: false +ignore_data_skip: + desc: null + value: false +label_names: + desc: null + value: null +label_smoothing_factor: + desc: null + value: 0.0 +learning_rate: + desc: null + value: 5.0e-05 +length_column_name: + desc: null + value: length +line_by_line: + desc: null + value: false +load_best_model_at_end: + desc: null + value: false +local_rank: + desc: null + value: -1 +log_level: + desc: null + value: -1 +log_level_replica: + desc: null + value: -1 +log_on_each_node: + desc: null + value: true +logging_dir: + desc: null + value: ./runs/Jul14_23-46-07_t1v-n-f5c06ea1-w-0 +logging_first_step: + desc: null + value: false +logging_steps: + desc: null + value: 250 +logging_strategy: + desc: null + value: IntervalStrategy.STEPS +lr_scheduler_type: + desc: null + value: SchedulerType.LINEAR +max_eval_samples: + desc: null + value: 2000 +max_grad_norm: + desc: null + value: 1.0 +max_seq_length: + desc: null + value: 4096 +max_steps: + desc: null + value: -1 +metric_for_best_model: + desc: null + value: null +mlm_probability: + desc: null + value: 0.15 +model_name_or_path: + desc: null + value: null +model_type: + desc: null + value: big_bird +mp_parameters: + desc: null + value: '' +no_cuda: + desc: null + value: false +num_train_epochs: + desc: null + value: 5.0 +output_dir: + desc: null + value: ./ +overwrite_cache: + desc: null + value: false +overwrite_output_dir: + desc: null + value: true +pad_to_max_length: + desc: null + value: false +past_index: + desc: null + value: -1 +per_device_eval_batch_size: + desc: null + value: 2 +per_device_train_batch_size: + desc: null + value: 2 +per_gpu_eval_batch_size: + desc: null + value: null +per_gpu_train_batch_size: + desc: null + value: null +prediction_loss_only: + desc: null + value: false +preprocessing_num_workers: + desc: null + value: 96 +push_to_hub: + desc: null + value: true +push_to_hub_model_id: + desc: null + value: '' +push_to_hub_organization: + desc: null + value: null +push_to_hub_token: + desc: null + value: null +remove_unused_columns: + desc: null + value: true +report_to: + desc: null + value: + - tensorboard + - wandb +resume_from_checkpoint: + desc: null + value: null +run_name: + desc: null + value: ./ +save_on_each_node: + desc: null + value: false +save_optimizer: + desc: null + value: true +save_steps: + desc: null + value: 20000 +save_strategy: + desc: null + value: IntervalStrategy.STEPS +save_total_limit: + desc: null + value: 5 +seed: + desc: null + value: 42 +sharded_ddp: + desc: null + value: [] +skip_memory_metrics: + desc: null + value: true +tokenizer_name: + desc: null + value: ./ +tpu_metrics_debug: + desc: null + value: false +tpu_num_cores: + desc: null + value: null +train_ref_file: + desc: null + value: null +use_fast_tokenizer: + desc: null + value: true +use_legacy_prediction_loop: + desc: null + value: false +validation_ref_file: + desc: null + value: null +validation_split_percentage: + desc: null + value: 5 +warmup_ratio: + desc: null + value: 0.0 +warmup_steps: + desc: null + value: 10000 +weight_decay: + desc: null + value: 0.0095 diff --git a/wandb/run-20210714_234615-3p6vlfc3/files/output.log b/wandb/run-20210714_234615-3p6vlfc3/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..a0f2fd4451704b4da68d71b21f5d9abb32e0fd5e --- /dev/null +++ b/wandb/run-20210714_234615-3p6vlfc3/files/output.log @@ -0,0 +1,6 @@ +/home/dat/pino/lib/python3.8/site-packages/jax/_src/numpy/lax_numpy.py:3132: UserWarning: Explicitly requested dtype requested in zeros is not available, and will be truncated to dtype int32. To enable more dtypes, set the jax_enable_x64 configuration option or the JAX_ENABLE_X64 shell environment variable. See https://github.com/google/jax#current-gotchas for more. + lax._check_user_dtype_supported(dtype, "zeros") +/home/dat/pino/lib/python3.8/site-packages/jax/lib/xla_bridge.py:386: UserWarning: jax.host_count has been renamed to jax.process_count. This alias will eventually be removed; please update your code. + warnings.warn( +/home/dat/pino/lib/python3.8/site-packages/jax/lib/xla_bridge.py:373: UserWarning: jax.host_id has been renamed to jax.process_index. This alias will eventually be removed; please update your code. + warnings.warn( diff --git a/wandb/run-20210714_234615-3p6vlfc3/files/requirements.txt b/wandb/run-20210714_234615-3p6vlfc3/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..554e3a34bc91714a8462f65295a12e9a04537637 --- /dev/null +++ b/wandb/run-20210714_234615-3p6vlfc3/files/requirements.txt @@ -0,0 +1,94 @@ +absl-py==0.13.0 +aiohttp==3.7.4.post0 +astunparse==1.6.3 +async-timeout==3.0.1 +attrs==21.2.0 +cachetools==4.2.2 +certifi==2021.5.30 +chardet==4.0.0 +charset-normalizer==2.0.1 +chex==0.0.8 +click==8.0.1 +configparser==5.0.2 +cycler==0.10.0 +datasets==1.9.1.dev0 +dill==0.3.4 +dm-tree==0.1.6 +docker-pycreds==0.4.0 +filelock==3.0.12 +flatbuffers==1.12 +flax==0.3.4 +fsspec==2021.7.0 +gast==0.4.0 +gitdb==4.0.7 +gitpython==3.1.18 +google-auth-oauthlib==0.4.4 +google-auth==1.32.1 +google-pasta==0.2.0 +grpcio==1.34.1 +h5py==3.1.0 +huggingface-hub==0.0.12 +idna==3.2 +install==1.3.4 +jax==0.2.17 +jaxlib==0.1.68 +joblib==1.0.1 +keras-nightly==2.5.0.dev2021032900 +keras-preprocessing==1.1.2 +kiwisolver==1.3.1 +libtpu-nightly==0.1.dev20210615 +markdown==3.3.4 +matplotlib==3.4.2 +msgpack==1.0.2 +multidict==5.1.0 +multiprocess==0.70.12.2 +numpy==1.19.5 +oauthlib==3.1.1 +opt-einsum==3.3.0 +optax==0.0.9 +packaging==21.0 +pandas==1.3.0 +pathtools==0.1.2 +pillow==8.3.1 +pip==20.0.2 +pkg-resources==0.0.0 +promise==2.3 +protobuf==3.17.3 +psutil==5.8.0 +pyarrow==4.0.1 +pyasn1-modules==0.2.8 +pyasn1==0.4.8 +pyparsing==2.4.7 +python-dateutil==2.8.1 +pytz==2021.1 +pyyaml==5.4.1 +regex==2021.7.6 +requests-oauthlib==1.3.0 +requests==2.26.0 +rsa==4.7.2 +sacremoses==0.0.45 +scipy==1.7.0 +sentry-sdk==1.3.0 +setuptools==44.0.0 +shortuuid==1.0.1 +six==1.15.0 +smmap==4.0.0 +subprocess32==3.5.4 +tensorboard-data-server==0.6.1 +tensorboard-plugin-wit==1.8.0 +tensorboard==2.5.0 +tensorflow-estimator==2.5.0 +tensorflow==2.5.0 +termcolor==1.1.0 +tokenizers==0.10.3 +toolz==0.11.1 +tqdm==4.61.2 +transformers==4.9.0.dev0 +typing-extensions==3.7.4.3 +urllib3==1.26.6 +wandb==0.10.33 +werkzeug==2.0.1 +wheel==0.36.2 +wrapt==1.12.1 +xxhash==2.0.2 +yarl==1.6.3 \ No newline at end of file diff --git a/wandb/run-20210714_234615-3p6vlfc3/files/wandb-metadata.json b/wandb/run-20210714_234615-3p6vlfc3/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..5e86487d03abc76969b65fdf4d152a83b5cf1208 --- /dev/null +++ b/wandb/run-20210714_234615-3p6vlfc3/files/wandb-metadata.json @@ -0,0 +1,45 @@ +{ + "os": "Linux-5.4.0-1043-gcp-x86_64-with-glibc2.29", + "python": "3.8.10", + "heartbeatAt": "2021-07-14T23:46:17.758445", + "startedAt": "2021-07-14T23:46:15.750284", + "docker": null, + "cpu_count": 96, + "cuda": null, + "args": [ + "--push_to_hub", + "--output_dir=./", + "--model_type=big_bird", + "--config_name=./", + "--tokenizer_name=./", + "--max_seq_length=4096", + "--weight_decay=0.0095", + "--warmup_steps=10000", + "--overwrite_output_dir", + "--adam_beta1=0.9", + "--adam_beta2=0.98", + "--logging_steps=250", + "--eval_steps=20000", + "--num_train_epochs=5", + "--preprocessing_num_workers=96", + "--save_steps=20000", + "--learning_rate=5e-5", + "--per_device_train_batch_size=2", + "--per_device_eval_batch_size=2", + "--save_total_limit=5", + "--max_eval_samples=2000", + "--gradient_accumulation_steps=4" + ], + "state": "running", + "program": "./run_mlm_flax.py", + "codePath": "run_mlm_flax.py", + "git": { + "remote": "https://huggingface.co/flax-community/pino-roberta-base", + "commit": "9915204ae8501d6ae8425aa8f0b4b290cbc7e5b6" + }, + "email": null, + "root": "/home/dat/pino-roberta-base", + "host": "t1v-n-f5c06ea1-w-0", + "username": "dat", + "executable": "/home/dat/pino/bin/python" +} diff --git a/wandb/run-20210714_234615-3p6vlfc3/files/wandb-summary.json b/wandb/run-20210714_234615-3p6vlfc3/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/wandb/run-20210714_234615-3p6vlfc3/files/wandb-summary.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/wandb/run-20210714_234615-3p6vlfc3/logs/debug-internal.log b/wandb/run-20210714_234615-3p6vlfc3/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..a3b1a62257f9f9f8a588a3cb2122b1b6c77a332f --- /dev/null +++ b/wandb/run-20210714_234615-3p6vlfc3/logs/debug-internal.log @@ -0,0 +1,61 @@ +2021-07-14 23:46:16,446 INFO MainThread:609784 [internal.py:wandb_internal():88] W&B internal server running at pid: 609784, started at: 2021-07-14 23:46:16.445875 +2021-07-14 23:46:16,448 DEBUG HandlerThread:609784 [handler.py:handle_request():124] handle_request: check_version +2021-07-14 23:46:16,448 INFO WriterThread:609784 [datastore.py:open_for_write():80] open: /home/dat/pino-roberta-base/wandb/run-20210714_234615-3p6vlfc3/run-3p6vlfc3.wandb +2021-07-14 23:46:16,449 DEBUG SenderThread:609784 [sender.py:send():179] send: header +2021-07-14 23:46:16,449 DEBUG SenderThread:609784 [sender.py:send_request():193] send_request: check_version +2021-07-14 23:46:16,489 DEBUG SenderThread:609784 [sender.py:send():179] send: run +2021-07-14 23:46:16,653 INFO SenderThread:609784 [dir_watcher.py:__init__():168] watching files in: /home/dat/pino-roberta-base/wandb/run-20210714_234615-3p6vlfc3/files +2021-07-14 23:46:16,653 INFO SenderThread:609784 [sender.py:_start_run_threads():716] run started: 3p6vlfc3 with start time 1626306375 +2021-07-14 23:46:16,655 DEBUG SenderThread:609784 [sender.py:send():179] send: summary +2021-07-14 23:46:16,655 INFO SenderThread:609784 [sender.py:_save_file():841] saving file wandb-summary.json with policy end +2021-07-14 23:46:16,656 DEBUG HandlerThread:609784 [handler.py:handle_request():124] handle_request: run_start +2021-07-14 23:46:17,658 INFO Thread-8 :609784 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210714_234615-3p6vlfc3/files/wandb-summary.json +2021-07-14 23:46:17,758 DEBUG HandlerThread:609784 [meta.py:__init__():39] meta init +2021-07-14 23:46:17,758 DEBUG HandlerThread:609784 [meta.py:__init__():53] meta init done +2021-07-14 23:46:17,758 DEBUG HandlerThread:609784 [meta.py:probe():210] probe +2021-07-14 23:46:17,759 DEBUG HandlerThread:609784 [meta.py:_setup_git():200] setup git +2021-07-14 23:46:17,789 DEBUG HandlerThread:609784 [meta.py:_setup_git():207] setup git done +2021-07-14 23:46:17,789 DEBUG HandlerThread:609784 [meta.py:_save_pip():57] save pip +2021-07-14 23:46:17,789 DEBUG HandlerThread:609784 [meta.py:_save_pip():71] save pip done +2021-07-14 23:46:17,789 DEBUG HandlerThread:609784 [meta.py:probe():252] probe done +2021-07-14 23:46:17,793 DEBUG SenderThread:609784 [sender.py:send():179] send: files +2021-07-14 23:46:17,793 INFO SenderThread:609784 [sender.py:_save_file():841] saving file wandb-metadata.json with policy now +2021-07-14 23:46:17,800 DEBUG HandlerThread:609784 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:46:17,800 DEBUG SenderThread:609784 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:46:17,931 DEBUG SenderThread:609784 [sender.py:send():179] send: config +2021-07-14 23:46:17,931 DEBUG SenderThread:609784 [sender.py:send():179] send: config +2021-07-14 23:46:17,931 DEBUG SenderThread:609784 [sender.py:send():179] send: config +2021-07-14 23:46:18,248 INFO Thread-11 :609784 [upload_job.py:push():137] Uploaded file /tmp/tmp_6x4a4rxwandb/105rxll0-wandb-metadata.json +2021-07-14 23:46:18,655 INFO Thread-8 :609784 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210714_234615-3p6vlfc3/files/wandb-metadata.json +2021-07-14 23:46:18,655 INFO Thread-8 :609784 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210714_234615-3p6vlfc3/files/output.log +2021-07-14 23:46:18,656 INFO Thread-8 :609784 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210714_234615-3p6vlfc3/files/requirements.txt +2021-07-14 23:46:32,661 INFO Thread-8 :609784 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_234615-3p6vlfc3/files/output.log +2021-07-14 23:46:32,932 DEBUG HandlerThread:609784 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:46:32,933 DEBUG SenderThread:609784 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:46:45,834 DEBUG SenderThread:609784 [sender.py:send():179] send: stats +2021-07-14 23:46:47,667 INFO Thread-8 :609784 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_234615-3p6vlfc3/files/config.yaml +2021-07-14 23:46:48,065 DEBUG HandlerThread:609784 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:46:48,066 DEBUG SenderThread:609784 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:47:03,200 DEBUG HandlerThread:609784 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:47:03,200 DEBUG SenderThread:609784 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:47:15,905 DEBUG SenderThread:609784 [sender.py:send():179] send: stats +2021-07-14 23:47:18,345 DEBUG HandlerThread:609784 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:47:18,345 DEBUG SenderThread:609784 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:47:33,477 DEBUG HandlerThread:609784 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:47:33,477 DEBUG SenderThread:609784 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:47:45,968 DEBUG SenderThread:609784 [sender.py:send():179] send: stats +2021-07-14 23:47:48,610 DEBUG HandlerThread:609784 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:47:48,610 DEBUG SenderThread:609784 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:48:03,740 DEBUG HandlerThread:609784 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:48:03,741 DEBUG SenderThread:609784 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:48:16,036 DEBUG SenderThread:609784 [sender.py:send():179] send: stats +2021-07-14 23:48:18,872 DEBUG HandlerThread:609784 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:48:18,873 DEBUG SenderThread:609784 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:48:20,502 WARNING MainThread:609784 [internal.py:wandb_internal():147] Internal process interrupt: 1 +2021-07-14 23:48:20,643 WARNING MainThread:609784 [internal.py:wandb_internal():147] Internal process interrupt: 2 +2021-07-14 23:48:20,644 ERROR MainThread:609784 [internal.py:wandb_internal():150] Internal process interrupted. +2021-07-14 23:48:20,878 INFO HandlerThread:609784 [handler.py:finish():638] shutting down handler +2021-07-14 23:48:21,037 INFO WriterThread:609784 [datastore.py:close():288] close: /home/dat/pino-roberta-base/wandb/run-20210714_234615-3p6vlfc3/run-3p6vlfc3.wandb +2021-07-14 23:48:21,119 INFO SenderThread:609784 [sender.py:finish():945] shutting down sender +2021-07-14 23:48:21,119 INFO SenderThread:609784 [dir_watcher.py:finish():282] shutting down directory watcher +2021-07-14 23:48:21,505 INFO MainThread:609784 [internal.py:handle_exit():78] Internal process exited diff --git a/wandb/run-20210714_234615-3p6vlfc3/logs/debug.log b/wandb/run-20210714_234615-3p6vlfc3/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..4e4c9a4f0146e7c38cd2b08353a6dfd6981c84d0 --- /dev/null +++ b/wandb/run-20210714_234615-3p6vlfc3/logs/debug.log @@ -0,0 +1,27 @@ +2021-07-14 23:46:15,751 INFO MainThread:608526 [wandb_setup.py:_flush():69] setting env: {} +2021-07-14 23:46:15,751 INFO MainThread:608526 [wandb_setup.py:_flush():69] setting login settings: {} +2021-07-14 23:46:15,752 INFO MainThread:608526 [wandb_init.py:_log_setup():337] Logging user logs to /home/dat/pino-roberta-base/wandb/run-20210714_234615-3p6vlfc3/logs/debug.log +2021-07-14 23:46:15,752 INFO MainThread:608526 [wandb_init.py:_log_setup():338] Logging internal logs to /home/dat/pino-roberta-base/wandb/run-20210714_234615-3p6vlfc3/logs/debug-internal.log +2021-07-14 23:46:15,752 INFO MainThread:608526 [wandb_init.py:init():370] calling init triggers +2021-07-14 23:46:15,752 INFO MainThread:608526 [wandb_init.py:init():375] wandb.init called with sweep_config: {} +config: {} +2021-07-14 23:46:15,752 INFO MainThread:608526 [wandb_init.py:init():419] starting backend +2021-07-14 23:46:15,752 INFO MainThread:608526 [backend.py:_multiprocessing_setup():70] multiprocessing start_methods=fork,spawn,forkserver, using: spawn +2021-07-14 23:46:15,803 INFO MainThread:608526 [backend.py:ensure_launched():135] starting backend process... +2021-07-14 23:46:15,852 INFO MainThread:608526 [backend.py:ensure_launched():139] started backend process with pid: 609784 +2021-07-14 23:46:15,854 INFO MainThread:608526 [wandb_init.py:init():424] backend started and connected +2021-07-14 23:46:15,858 INFO MainThread:608526 [wandb_init.py:init():472] updated telemetry +2021-07-14 23:46:15,859 INFO MainThread:608526 [wandb_init.py:init():491] communicating current version +2021-07-14 23:46:16,488 INFO MainThread:608526 [wandb_init.py:init():496] got version response +2021-07-14 23:46:16,488 INFO MainThread:608526 [wandb_init.py:init():504] communicating run to backend with 30 second timeout +2021-07-14 23:46:16,655 INFO MainThread:608526 [wandb_init.py:init():529] starting run threads in backend +2021-07-14 23:46:17,796 INFO MainThread:608526 [wandb_run.py:_console_start():1623] atexit reg +2021-07-14 23:46:17,796 INFO MainThread:608526 [wandb_run.py:_redirect():1497] redirect: SettingsConsole.REDIRECT +2021-07-14 23:46:17,797 INFO MainThread:608526 [wandb_run.py:_redirect():1502] Redirecting console. +2021-07-14 23:46:17,799 INFO MainThread:608526 [wandb_run.py:_redirect():1558] Redirects installed. +2021-07-14 23:46:17,799 INFO MainThread:608526 [wandb_init.py:init():554] run started, returning control to user process +2021-07-14 23:46:17,805 INFO MainThread:608526 [wandb_run.py:_config_callback():872] config_cb None None {'output_dir': './', 'overwrite_output_dir': True, 'do_train': False, 'do_eval': False, 'do_predict': False, 'evaluation_strategy': 'IntervalStrategy.NO', 'prediction_loss_only': False, 'per_device_train_batch_size': 2, 'per_device_eval_batch_size': 2, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 4, 'eval_accumulation_steps': None, 'learning_rate': 5e-05, 'weight_decay': 0.0095, 'adam_beta1': 0.9, 'adam_beta2': 0.98, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 5.0, 'max_steps': -1, 'lr_scheduler_type': 'SchedulerType.LINEAR', 'warmup_ratio': 0.0, 'warmup_steps': 10000, 'log_level': -1, 'log_level_replica': -1, 'log_on_each_node': True, 'logging_dir': './runs/Jul14_23-46-07_t1v-n-f5c06ea1-w-0', 'logging_strategy': 'IntervalStrategy.STEPS', 'logging_first_step': False, 'logging_steps': 250, 'save_strategy': 'IntervalStrategy.STEPS', 'save_steps': 20000, 'save_total_limit': 5, 'save_on_each_node': False, 'no_cuda': False, 'seed': 42, 'fp16': False, 'fp16_opt_level': 'O1', 'fp16_backend': 'auto', 'fp16_full_eval': False, 'local_rank': -1, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 20000, 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': './', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'sharded_ddp': [], 'deepspeed': None, 'label_smoothing_factor': 0.0, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'dataloader_pin_memory': True, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'push_to_hub_model_id': '', 'push_to_hub_organization': None, 'push_to_hub_token': None, 'mp_parameters': ''} +2021-07-14 23:46:17,807 INFO MainThread:608526 [wandb_run.py:_config_callback():872] config_cb None None {'model_name_or_path': None, 'model_type': 'big_bird', 'config_name': './', 'tokenizer_name': './', 'cache_dir': None, 'use_fast_tokenizer': True, 'dtype': 'float32', 'save_optimizer': True} +2021-07-14 23:46:17,808 INFO MainThread:608526 [wandb_run.py:_config_callback():872] config_cb None None {'dataset_name': None, 'dataset_config_name': None, 'train_ref_file': None, 'validation_ref_file': None, 'overwrite_cache': False, 'validation_split_percentage': 5, 'max_seq_length': 4096, 'preprocessing_num_workers': 96, 'mlm_probability': 0.15, 'pad_to_max_length': False, 'line_by_line': False, 'max_eval_samples': 2000} +2021-07-14 23:48:20,504 INFO MainThread:608526 [wandb_run.py:_atexit_cleanup():1593] got exitcode: 255 +2021-07-14 23:48:20,505 INFO MainThread:608526 [wandb_run.py:_restore():1565] restore diff --git a/wandb/run-20210714_234615-3p6vlfc3/run-3p6vlfc3.wandb b/wandb/run-20210714_234615-3p6vlfc3/run-3p6vlfc3.wandb new file mode 100644 index 0000000000000000000000000000000000000000..de32fff37de178536897d75bf88306caa54dffd8 Binary files /dev/null and b/wandb/run-20210714_234615-3p6vlfc3/run-3p6vlfc3.wandb differ diff --git a/wandb/run-20210714_234858-1nb14dm7/files/config.yaml b/wandb/run-20210714_234858-1nb14dm7/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7cc707c5f5c74d1dd48440a20762638854814894 --- /dev/null +++ b/wandb/run-20210714_234858-1nb14dm7/files/config.yaml @@ -0,0 +1,304 @@ +wandb_version: 1 + +_wandb: + desc: null + value: + cli_version: 0.10.33 + framework: huggingface + huggingface_version: 4.9.0.dev0 + is_jupyter_run: false + is_kaggle_kernel: false + python_version: 3.8.10 + t: + 1: + - 3 + - 11 + 4: 3.8.10 + 5: 0.10.33 + 6: 4.9.0.dev0 + 8: + - 5 +adafactor: + desc: null + value: false +adam_beta1: + desc: null + value: 0.9 +adam_beta2: + desc: null + value: 0.98 +adam_epsilon: + desc: null + value: 1.0e-08 +cache_dir: + desc: null + value: null +config_name: + desc: null + value: ./ +dataloader_drop_last: + desc: null + value: false +dataloader_num_workers: + desc: null + value: 0 +dataloader_pin_memory: + desc: null + value: true +dataset_config_name: + desc: null + value: null +dataset_name: + desc: null + value: null +ddp_find_unused_parameters: + desc: null + value: null +debug: + desc: null + value: [] +deepspeed: + desc: null + value: null +disable_tqdm: + desc: null + value: false +do_eval: + desc: null + value: false +do_predict: + desc: null + value: false +do_train: + desc: null + value: false +dtype: + desc: null + value: float32 +eval_accumulation_steps: + desc: null + value: null +eval_steps: + desc: null + value: 20000 +evaluation_strategy: + desc: null + value: IntervalStrategy.NO +fp16: + desc: null + value: false +fp16_backend: + desc: null + value: auto +fp16_full_eval: + desc: null + value: false +fp16_opt_level: + desc: null + value: O1 +gradient_accumulation_steps: + desc: null + value: 4 +greater_is_better: + desc: null + value: null +group_by_length: + desc: null + value: false +ignore_data_skip: + desc: null + value: false +label_names: + desc: null + value: null +label_smoothing_factor: + desc: null + value: 0.0 +learning_rate: + desc: null + value: 5.0e-05 +length_column_name: + desc: null + value: length +line_by_line: + desc: null + value: false +load_best_model_at_end: + desc: null + value: false +local_rank: + desc: null + value: -1 +log_level: + desc: null + value: -1 +log_level_replica: + desc: null + value: -1 +log_on_each_node: + desc: null + value: true +logging_dir: + desc: null + value: ./runs/Jul14_23-48-51_t1v-n-f5c06ea1-w-0 +logging_first_step: + desc: null + value: false +logging_steps: + desc: null + value: 50 +logging_strategy: + desc: null + value: IntervalStrategy.STEPS +lr_scheduler_type: + desc: null + value: SchedulerType.LINEAR +max_eval_samples: + desc: null + value: 2000 +max_grad_norm: + desc: null + value: 1.0 +max_seq_length: + desc: null + value: 4096 +max_steps: + desc: null + value: -1 +metric_for_best_model: + desc: null + value: null +mlm_probability: + desc: null + value: 0.15 +model_name_or_path: + desc: null + value: null +model_type: + desc: null + value: big_bird +mp_parameters: + desc: null + value: '' +no_cuda: + desc: null + value: false +num_train_epochs: + desc: null + value: 5.0 +output_dir: + desc: null + value: ./ +overwrite_cache: + desc: null + value: false +overwrite_output_dir: + desc: null + value: true +pad_to_max_length: + desc: null + value: false +past_index: + desc: null + value: -1 +per_device_eval_batch_size: + desc: null + value: 2 +per_device_train_batch_size: + desc: null + value: 2 +per_gpu_eval_batch_size: + desc: null + value: null +per_gpu_train_batch_size: + desc: null + value: null +prediction_loss_only: + desc: null + value: false +preprocessing_num_workers: + desc: null + value: 96 +push_to_hub: + desc: null + value: true +push_to_hub_model_id: + desc: null + value: '' +push_to_hub_organization: + desc: null + value: null +push_to_hub_token: + desc: null + value: null +remove_unused_columns: + desc: null + value: true +report_to: + desc: null + value: + - tensorboard + - wandb +resume_from_checkpoint: + desc: null + value: null +run_name: + desc: null + value: ./ +save_on_each_node: + desc: null + value: false +save_optimizer: + desc: null + value: true +save_steps: + desc: null + value: 20000 +save_strategy: + desc: null + value: IntervalStrategy.STEPS +save_total_limit: + desc: null + value: 5 +seed: + desc: null + value: 42 +sharded_ddp: + desc: null + value: [] +skip_memory_metrics: + desc: null + value: true +tokenizer_name: + desc: null + value: ./ +tpu_metrics_debug: + desc: null + value: false +tpu_num_cores: + desc: null + value: null +train_ref_file: + desc: null + value: null +use_fast_tokenizer: + desc: null + value: true +use_legacy_prediction_loop: + desc: null + value: false +validation_ref_file: + desc: null + value: null +validation_split_percentage: + desc: null + value: 5 +warmup_ratio: + desc: null + value: 0.0 +warmup_steps: + desc: null + value: 10000 +weight_decay: + desc: null + value: 0.0095 diff --git a/wandb/run-20210714_234858-1nb14dm7/files/output.log b/wandb/run-20210714_234858-1nb14dm7/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..a0f2fd4451704b4da68d71b21f5d9abb32e0fd5e --- /dev/null +++ b/wandb/run-20210714_234858-1nb14dm7/files/output.log @@ -0,0 +1,6 @@ +/home/dat/pino/lib/python3.8/site-packages/jax/_src/numpy/lax_numpy.py:3132: UserWarning: Explicitly requested dtype requested in zeros is not available, and will be truncated to dtype int32. To enable more dtypes, set the jax_enable_x64 configuration option or the JAX_ENABLE_X64 shell environment variable. See https://github.com/google/jax#current-gotchas for more. + lax._check_user_dtype_supported(dtype, "zeros") +/home/dat/pino/lib/python3.8/site-packages/jax/lib/xla_bridge.py:386: UserWarning: jax.host_count has been renamed to jax.process_count. This alias will eventually be removed; please update your code. + warnings.warn( +/home/dat/pino/lib/python3.8/site-packages/jax/lib/xla_bridge.py:373: UserWarning: jax.host_id has been renamed to jax.process_index. This alias will eventually be removed; please update your code. + warnings.warn( diff --git a/wandb/run-20210714_234858-1nb14dm7/files/requirements.txt b/wandb/run-20210714_234858-1nb14dm7/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..554e3a34bc91714a8462f65295a12e9a04537637 --- /dev/null +++ b/wandb/run-20210714_234858-1nb14dm7/files/requirements.txt @@ -0,0 +1,94 @@ +absl-py==0.13.0 +aiohttp==3.7.4.post0 +astunparse==1.6.3 +async-timeout==3.0.1 +attrs==21.2.0 +cachetools==4.2.2 +certifi==2021.5.30 +chardet==4.0.0 +charset-normalizer==2.0.1 +chex==0.0.8 +click==8.0.1 +configparser==5.0.2 +cycler==0.10.0 +datasets==1.9.1.dev0 +dill==0.3.4 +dm-tree==0.1.6 +docker-pycreds==0.4.0 +filelock==3.0.12 +flatbuffers==1.12 +flax==0.3.4 +fsspec==2021.7.0 +gast==0.4.0 +gitdb==4.0.7 +gitpython==3.1.18 +google-auth-oauthlib==0.4.4 +google-auth==1.32.1 +google-pasta==0.2.0 +grpcio==1.34.1 +h5py==3.1.0 +huggingface-hub==0.0.12 +idna==3.2 +install==1.3.4 +jax==0.2.17 +jaxlib==0.1.68 +joblib==1.0.1 +keras-nightly==2.5.0.dev2021032900 +keras-preprocessing==1.1.2 +kiwisolver==1.3.1 +libtpu-nightly==0.1.dev20210615 +markdown==3.3.4 +matplotlib==3.4.2 +msgpack==1.0.2 +multidict==5.1.0 +multiprocess==0.70.12.2 +numpy==1.19.5 +oauthlib==3.1.1 +opt-einsum==3.3.0 +optax==0.0.9 +packaging==21.0 +pandas==1.3.0 +pathtools==0.1.2 +pillow==8.3.1 +pip==20.0.2 +pkg-resources==0.0.0 +promise==2.3 +protobuf==3.17.3 +psutil==5.8.0 +pyarrow==4.0.1 +pyasn1-modules==0.2.8 +pyasn1==0.4.8 +pyparsing==2.4.7 +python-dateutil==2.8.1 +pytz==2021.1 +pyyaml==5.4.1 +regex==2021.7.6 +requests-oauthlib==1.3.0 +requests==2.26.0 +rsa==4.7.2 +sacremoses==0.0.45 +scipy==1.7.0 +sentry-sdk==1.3.0 +setuptools==44.0.0 +shortuuid==1.0.1 +six==1.15.0 +smmap==4.0.0 +subprocess32==3.5.4 +tensorboard-data-server==0.6.1 +tensorboard-plugin-wit==1.8.0 +tensorboard==2.5.0 +tensorflow-estimator==2.5.0 +tensorflow==2.5.0 +termcolor==1.1.0 +tokenizers==0.10.3 +toolz==0.11.1 +tqdm==4.61.2 +transformers==4.9.0.dev0 +typing-extensions==3.7.4.3 +urllib3==1.26.6 +wandb==0.10.33 +werkzeug==2.0.1 +wheel==0.36.2 +wrapt==1.12.1 +xxhash==2.0.2 +yarl==1.6.3 \ No newline at end of file diff --git a/wandb/run-20210714_234858-1nb14dm7/files/wandb-metadata.json b/wandb/run-20210714_234858-1nb14dm7/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..8b778455bb7091f00b10b50ab25684806394b455 --- /dev/null +++ b/wandb/run-20210714_234858-1nb14dm7/files/wandb-metadata.json @@ -0,0 +1,45 @@ +{ + "os": "Linux-5.4.0-1043-gcp-x86_64-with-glibc2.29", + "python": "3.8.10", + "heartbeatAt": "2021-07-14T23:49:00.797924", + "startedAt": "2021-07-14T23:48:58.775858", + "docker": null, + "cpu_count": 96, + "cuda": null, + "args": [ + "--push_to_hub", + "--output_dir=./", + "--model_type=big_bird", + "--config_name=./", + "--tokenizer_name=./", + "--max_seq_length=4096", + "--weight_decay=0.0095", + "--warmup_steps=10000", + "--overwrite_output_dir", + "--adam_beta1=0.9", + "--adam_beta2=0.98", + "--logging_steps=50", + "--eval_steps=20000", + "--num_train_epochs=5", + "--preprocessing_num_workers=96", + "--save_steps=20000", + "--learning_rate=5e-5", + "--per_device_train_batch_size=2", + "--per_device_eval_batch_size=2", + "--save_total_limit=5", + "--max_eval_samples=2000", + "--gradient_accumulation_steps=4" + ], + "state": "running", + "program": "./run_mlm_flax.py", + "codePath": "run_mlm_flax.py", + "git": { + "remote": "https://huggingface.co/flax-community/pino-roberta-base", + "commit": "9915204ae8501d6ae8425aa8f0b4b290cbc7e5b6" + }, + "email": null, + "root": "/home/dat/pino-roberta-base", + "host": "t1v-n-f5c06ea1-w-0", + "username": "dat", + "executable": "/home/dat/pino/bin/python" +} diff --git a/wandb/run-20210714_234858-1nb14dm7/files/wandb-summary.json b/wandb/run-20210714_234858-1nb14dm7/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/wandb/run-20210714_234858-1nb14dm7/files/wandb-summary.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/wandb/run-20210714_234858-1nb14dm7/logs/debug-internal.log b/wandb/run-20210714_234858-1nb14dm7/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..6992cb5bc7350d172293105eca781d01420369ad --- /dev/null +++ b/wandb/run-20210714_234858-1nb14dm7/logs/debug-internal.log @@ -0,0 +1,99 @@ +2021-07-14 23:48:59,470 INFO MainThread:611423 [internal.py:wandb_internal():88] W&B internal server running at pid: 611423, started at: 2021-07-14 23:48:59.470596 +2021-07-14 23:48:59,473 DEBUG HandlerThread:611423 [handler.py:handle_request():124] handle_request: check_version +2021-07-14 23:48:59,473 INFO WriterThread:611423 [datastore.py:open_for_write():80] open: /home/dat/pino-roberta-base/wandb/run-20210714_234858-1nb14dm7/run-1nb14dm7.wandb +2021-07-14 23:48:59,474 DEBUG SenderThread:611423 [sender.py:send():179] send: header +2021-07-14 23:48:59,474 DEBUG SenderThread:611423 [sender.py:send_request():193] send_request: check_version +2021-07-14 23:48:59,513 DEBUG SenderThread:611423 [sender.py:send():179] send: run +2021-07-14 23:48:59,693 INFO SenderThread:611423 [dir_watcher.py:__init__():168] watching files in: /home/dat/pino-roberta-base/wandb/run-20210714_234858-1nb14dm7/files +2021-07-14 23:48:59,694 INFO SenderThread:611423 [sender.py:_start_run_threads():716] run started: 1nb14dm7 with start time 1626306538 +2021-07-14 23:48:59,694 DEBUG SenderThread:611423 [sender.py:send():179] send: summary +2021-07-14 23:48:59,694 DEBUG HandlerThread:611423 [handler.py:handle_request():124] handle_request: run_start +2021-07-14 23:48:59,695 INFO SenderThread:611423 [sender.py:_save_file():841] saving file wandb-summary.json with policy end +2021-07-14 23:49:00,696 INFO Thread-8 :611423 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210714_234858-1nb14dm7/files/wandb-summary.json +2021-07-14 23:49:00,797 DEBUG HandlerThread:611423 [meta.py:__init__():39] meta init +2021-07-14 23:49:00,797 DEBUG HandlerThread:611423 [meta.py:__init__():53] meta init done +2021-07-14 23:49:00,797 DEBUG HandlerThread:611423 [meta.py:probe():210] probe +2021-07-14 23:49:00,799 DEBUG HandlerThread:611423 [meta.py:_setup_git():200] setup git +2021-07-14 23:49:00,828 DEBUG HandlerThread:611423 [meta.py:_setup_git():207] setup git done +2021-07-14 23:49:00,828 DEBUG HandlerThread:611423 [meta.py:_save_pip():57] save pip +2021-07-14 23:49:00,829 DEBUG HandlerThread:611423 [meta.py:_save_pip():71] save pip done +2021-07-14 23:49:00,829 DEBUG HandlerThread:611423 [meta.py:probe():252] probe done +2021-07-14 23:49:00,832 DEBUG SenderThread:611423 [sender.py:send():179] send: files +2021-07-14 23:49:00,832 INFO SenderThread:611423 [sender.py:_save_file():841] saving file wandb-metadata.json with policy now +2021-07-14 23:49:00,839 DEBUG HandlerThread:611423 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:49:00,840 DEBUG SenderThread:611423 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:49:00,973 DEBUG SenderThread:611423 [sender.py:send():179] send: config +2021-07-14 23:49:00,974 DEBUG SenderThread:611423 [sender.py:send():179] send: config +2021-07-14 23:49:00,974 DEBUG SenderThread:611423 [sender.py:send():179] send: config +2021-07-14 23:49:01,291 INFO Thread-11 :611423 [upload_job.py:push():137] Uploaded file /tmp/tmp8gkqss28wandb/3utylojv-wandb-metadata.json +2021-07-14 23:49:01,696 INFO Thread-8 :611423 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210714_234858-1nb14dm7/files/requirements.txt +2021-07-14 23:49:01,696 INFO Thread-8 :611423 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210714_234858-1nb14dm7/files/output.log +2021-07-14 23:49:01,696 INFO Thread-8 :611423 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210714_234858-1nb14dm7/files/wandb-metadata.json +2021-07-14 23:49:15,703 INFO Thread-8 :611423 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_234858-1nb14dm7/files/output.log +2021-07-14 23:49:15,975 DEBUG HandlerThread:611423 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:49:15,976 DEBUG SenderThread:611423 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:49:28,873 DEBUG SenderThread:611423 [sender.py:send():179] send: stats +2021-07-14 23:49:30,710 INFO Thread-8 :611423 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_234858-1nb14dm7/files/config.yaml +2021-07-14 23:49:31,108 DEBUG HandlerThread:611423 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:49:31,109 DEBUG SenderThread:611423 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:49:46,241 DEBUG HandlerThread:611423 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:49:46,241 DEBUG SenderThread:611423 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:49:58,942 DEBUG SenderThread:611423 [sender.py:send():179] send: stats +2021-07-14 23:50:01,376 DEBUG HandlerThread:611423 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:50:01,376 DEBUG SenderThread:611423 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:50:16,507 DEBUG HandlerThread:611423 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:50:16,507 DEBUG SenderThread:611423 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:50:29,008 DEBUG SenderThread:611423 [sender.py:send():179] send: stats +2021-07-14 23:50:31,638 DEBUG HandlerThread:611423 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:50:31,638 DEBUG SenderThread:611423 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:50:46,770 DEBUG HandlerThread:611423 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:50:46,770 DEBUG SenderThread:611423 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:50:59,072 DEBUG SenderThread:611423 [sender.py:send():179] send: stats +2021-07-14 23:51:01,902 DEBUG HandlerThread:611423 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:51:01,902 DEBUG SenderThread:611423 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:51:17,036 DEBUG HandlerThread:611423 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:51:17,036 DEBUG SenderThread:611423 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:51:29,138 DEBUG SenderThread:611423 [sender.py:send():179] send: stats +2021-07-14 23:51:32,170 DEBUG HandlerThread:611423 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:51:32,170 DEBUG SenderThread:611423 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:51:47,301 DEBUG HandlerThread:611423 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:51:47,302 DEBUG SenderThread:611423 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:51:59,209 DEBUG SenderThread:611423 [sender.py:send():179] send: stats +2021-07-14 23:52:02,435 DEBUG HandlerThread:611423 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:52:02,436 DEBUG SenderThread:611423 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:52:17,574 DEBUG HandlerThread:611423 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:52:17,574 DEBUG SenderThread:611423 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:52:29,283 DEBUG SenderThread:611423 [sender.py:send():179] send: stats +2021-07-14 23:52:32,704 DEBUG HandlerThread:611423 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:52:32,704 DEBUG SenderThread:611423 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:52:47,835 DEBUG HandlerThread:611423 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:52:47,835 DEBUG SenderThread:611423 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:52:59,358 DEBUG SenderThread:611423 [sender.py:send():179] send: stats +2021-07-14 23:53:02,969 DEBUG HandlerThread:611423 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:53:02,969 DEBUG SenderThread:611423 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:53:18,103 DEBUG HandlerThread:611423 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:53:18,103 DEBUG SenderThread:611423 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:53:29,435 DEBUG SenderThread:611423 [sender.py:send():179] send: stats +2021-07-14 23:53:33,953 DEBUG HandlerThread:611423 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:53:33,954 DEBUG SenderThread:611423 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:53:49,091 DEBUG HandlerThread:611423 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:53:49,092 DEBUG SenderThread:611423 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:53:59,513 DEBUG SenderThread:611423 [sender.py:send():179] send: stats +2021-07-14 23:54:04,224 DEBUG HandlerThread:611423 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:54:04,224 DEBUG SenderThread:611423 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:54:19,356 DEBUG HandlerThread:611423 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:54:19,357 DEBUG SenderThread:611423 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:54:29,582 DEBUG SenderThread:611423 [sender.py:send():179] send: stats +2021-07-14 23:54:34,489 DEBUG HandlerThread:611423 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:54:34,489 DEBUG SenderThread:611423 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:54:49,620 DEBUG HandlerThread:611423 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:54:49,620 DEBUG SenderThread:611423 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:54:59,658 DEBUG SenderThread:611423 [sender.py:send():179] send: stats +2021-07-14 23:55:04,755 DEBUG HandlerThread:611423 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:55:04,756 DEBUG SenderThread:611423 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:55:19,888 DEBUG HandlerThread:611423 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:55:19,888 DEBUG SenderThread:611423 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:55:20,077 WARNING MainThread:611423 [internal.py:wandb_internal():147] Internal process interrupt: 1 +2021-07-14 23:55:20,252 WARNING MainThread:611423 [internal.py:wandb_internal():147] Internal process interrupt: 2 +2021-07-14 23:55:20,253 ERROR MainThread:611423 [internal.py:wandb_internal():150] Internal process interrupted. +2021-07-14 23:55:20,555 INFO MainThread:611423 [internal.py:handle_exit():78] Internal process exited diff --git a/wandb/run-20210714_234858-1nb14dm7/logs/debug.log b/wandb/run-20210714_234858-1nb14dm7/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..6d42b72e8b59523103868e99b1197850ff0c8b2c --- /dev/null +++ b/wandb/run-20210714_234858-1nb14dm7/logs/debug.log @@ -0,0 +1,27 @@ +2021-07-14 23:48:58,777 INFO MainThread:610166 [wandb_setup.py:_flush():69] setting env: {} +2021-07-14 23:48:58,777 INFO MainThread:610166 [wandb_setup.py:_flush():69] setting login settings: {} +2021-07-14 23:48:58,777 INFO MainThread:610166 [wandb_init.py:_log_setup():337] Logging user logs to /home/dat/pino-roberta-base/wandb/run-20210714_234858-1nb14dm7/logs/debug.log +2021-07-14 23:48:58,777 INFO MainThread:610166 [wandb_init.py:_log_setup():338] Logging internal logs to /home/dat/pino-roberta-base/wandb/run-20210714_234858-1nb14dm7/logs/debug-internal.log +2021-07-14 23:48:58,778 INFO MainThread:610166 [wandb_init.py:init():370] calling init triggers +2021-07-14 23:48:58,778 INFO MainThread:610166 [wandb_init.py:init():375] wandb.init called with sweep_config: {} +config: {} +2021-07-14 23:48:58,778 INFO MainThread:610166 [wandb_init.py:init():419] starting backend +2021-07-14 23:48:58,778 INFO MainThread:610166 [backend.py:_multiprocessing_setup():70] multiprocessing start_methods=fork,spawn,forkserver, using: spawn +2021-07-14 23:48:58,824 INFO MainThread:610166 [backend.py:ensure_launched():135] starting backend process... +2021-07-14 23:48:58,870 INFO MainThread:610166 [backend.py:ensure_launched():139] started backend process with pid: 611423 +2021-07-14 23:48:58,872 INFO MainThread:610166 [wandb_init.py:init():424] backend started and connected +2021-07-14 23:48:58,874 INFO MainThread:610166 [wandb_init.py:init():472] updated telemetry +2021-07-14 23:48:58,875 INFO MainThread:610166 [wandb_init.py:init():491] communicating current version +2021-07-14 23:48:59,511 INFO MainThread:610166 [wandb_init.py:init():496] got version response +2021-07-14 23:48:59,511 INFO MainThread:610166 [wandb_init.py:init():504] communicating run to backend with 30 second timeout +2021-07-14 23:48:59,693 INFO MainThread:610166 [wandb_init.py:init():529] starting run threads in backend +2021-07-14 23:49:00,835 INFO MainThread:610166 [wandb_run.py:_console_start():1623] atexit reg +2021-07-14 23:49:00,836 INFO MainThread:610166 [wandb_run.py:_redirect():1497] redirect: SettingsConsole.REDIRECT +2021-07-14 23:49:00,836 INFO MainThread:610166 [wandb_run.py:_redirect():1502] Redirecting console. +2021-07-14 23:49:00,838 INFO MainThread:610166 [wandb_run.py:_redirect():1558] Redirects installed. +2021-07-14 23:49:00,838 INFO MainThread:610166 [wandb_init.py:init():554] run started, returning control to user process +2021-07-14 23:49:00,846 INFO MainThread:610166 [wandb_run.py:_config_callback():872] config_cb None None {'output_dir': './', 'overwrite_output_dir': True, 'do_train': False, 'do_eval': False, 'do_predict': False, 'evaluation_strategy': 'IntervalStrategy.NO', 'prediction_loss_only': False, 'per_device_train_batch_size': 2, 'per_device_eval_batch_size': 2, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 4, 'eval_accumulation_steps': None, 'learning_rate': 5e-05, 'weight_decay': 0.0095, 'adam_beta1': 0.9, 'adam_beta2': 0.98, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 5.0, 'max_steps': -1, 'lr_scheduler_type': 'SchedulerType.LINEAR', 'warmup_ratio': 0.0, 'warmup_steps': 10000, 'log_level': -1, 'log_level_replica': -1, 'log_on_each_node': True, 'logging_dir': './runs/Jul14_23-48-51_t1v-n-f5c06ea1-w-0', 'logging_strategy': 'IntervalStrategy.STEPS', 'logging_first_step': False, 'logging_steps': 50, 'save_strategy': 'IntervalStrategy.STEPS', 'save_steps': 20000, 'save_total_limit': 5, 'save_on_each_node': False, 'no_cuda': False, 'seed': 42, 'fp16': False, 'fp16_opt_level': 'O1', 'fp16_backend': 'auto', 'fp16_full_eval': False, 'local_rank': -1, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 20000, 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': './', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'sharded_ddp': [], 'deepspeed': None, 'label_smoothing_factor': 0.0, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'dataloader_pin_memory': True, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'push_to_hub_model_id': '', 'push_to_hub_organization': None, 'push_to_hub_token': None, 'mp_parameters': ''} +2021-07-14 23:49:00,847 INFO MainThread:610166 [wandb_run.py:_config_callback():872] config_cb None None {'model_name_or_path': None, 'model_type': 'big_bird', 'config_name': './', 'tokenizer_name': './', 'cache_dir': None, 'use_fast_tokenizer': True, 'dtype': 'float32', 'save_optimizer': True} +2021-07-14 23:49:00,849 INFO MainThread:610166 [wandb_run.py:_config_callback():872] config_cb None None {'dataset_name': None, 'dataset_config_name': None, 'train_ref_file': None, 'validation_ref_file': None, 'overwrite_cache': False, 'validation_split_percentage': 5, 'max_seq_length': 4096, 'preprocessing_num_workers': 96, 'mlm_probability': 0.15, 'pad_to_max_length': False, 'line_by_line': False, 'max_eval_samples': 2000} +2021-07-14 23:55:20,080 INFO MainThread:610166 [wandb_run.py:_atexit_cleanup():1593] got exitcode: 255 +2021-07-14 23:55:20,082 INFO MainThread:610166 [wandb_run.py:_restore():1565] restore diff --git a/wandb/run-20210714_234858-1nb14dm7/run-1nb14dm7.wandb b/wandb/run-20210714_234858-1nb14dm7/run-1nb14dm7.wandb new file mode 100644 index 0000000000000000000000000000000000000000..fb2de4d5ed5d4c8c43a6a829c7b93fb959b2aec2 Binary files /dev/null and b/wandb/run-20210714_234858-1nb14dm7/run-1nb14dm7.wandb differ diff --git a/wandb/run-20210714_235555-y01xq728/files/config.yaml b/wandb/run-20210714_235555-y01xq728/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..894152b0d5420e1cbd9dcb9070a50b06483df65f --- /dev/null +++ b/wandb/run-20210714_235555-y01xq728/files/config.yaml @@ -0,0 +1,307 @@ +wandb_version: 1 + +_wandb: + desc: null + value: + cli_version: 0.10.33 + framework: huggingface + huggingface_version: 4.9.0.dev0 + is_jupyter_run: false + is_kaggle_kernel: false + python_version: 3.8.10 + t: + 1: + - 3 + - 11 + 2: + - 3 + - 11 + 4: 3.8.10 + 5: 0.10.33 + 6: 4.9.0.dev0 + 8: + - 5 +adafactor: + desc: null + value: false +adam_beta1: + desc: null + value: 0.9 +adam_beta2: + desc: null + value: 0.98 +adam_epsilon: + desc: null + value: 1.0e-08 +cache_dir: + desc: null + value: null +config_name: + desc: null + value: ./ +dataloader_drop_last: + desc: null + value: false +dataloader_num_workers: + desc: null + value: 0 +dataloader_pin_memory: + desc: null + value: true +dataset_config_name: + desc: null + value: null +dataset_name: + desc: null + value: null +ddp_find_unused_parameters: + desc: null + value: null +debug: + desc: null + value: [] +deepspeed: + desc: null + value: null +disable_tqdm: + desc: null + value: false +do_eval: + desc: null + value: false +do_predict: + desc: null + value: false +do_train: + desc: null + value: false +dtype: + desc: null + value: float32 +eval_accumulation_steps: + desc: null + value: null +eval_steps: + desc: null + value: 20000 +evaluation_strategy: + desc: null + value: IntervalStrategy.NO +fp16: + desc: null + value: false +fp16_backend: + desc: null + value: auto +fp16_full_eval: + desc: null + value: false +fp16_opt_level: + desc: null + value: O1 +gradient_accumulation_steps: + desc: null + value: 4 +greater_is_better: + desc: null + value: null +group_by_length: + desc: null + value: false +ignore_data_skip: + desc: null + value: false +label_names: + desc: null + value: null +label_smoothing_factor: + desc: null + value: 0.0 +learning_rate: + desc: null + value: 5.0e-05 +length_column_name: + desc: null + value: length +line_by_line: + desc: null + value: false +load_best_model_at_end: + desc: null + value: false +local_rank: + desc: null + value: -1 +log_level: + desc: null + value: -1 +log_level_replica: + desc: null + value: -1 +log_on_each_node: + desc: null + value: true +logging_dir: + desc: null + value: ./runs/Jul14_23-55-47_t1v-n-f5c06ea1-w-0 +logging_first_step: + desc: null + value: false +logging_steps: + desc: null + value: 50 +logging_strategy: + desc: null + value: IntervalStrategy.STEPS +lr_scheduler_type: + desc: null + value: SchedulerType.LINEAR +max_eval_samples: + desc: null + value: 2000 +max_grad_norm: + desc: null + value: 1.0 +max_seq_length: + desc: null + value: 4096 +max_steps: + desc: null + value: -1 +metric_for_best_model: + desc: null + value: null +mlm_probability: + desc: null + value: 0.15 +model_name_or_path: + desc: null + value: null +model_type: + desc: null + value: big_bird +mp_parameters: + desc: null + value: '' +no_cuda: + desc: null + value: false +num_train_epochs: + desc: null + value: 5.0 +output_dir: + desc: null + value: ./ +overwrite_cache: + desc: null + value: false +overwrite_output_dir: + desc: null + value: true +pad_to_max_length: + desc: null + value: false +past_index: + desc: null + value: -1 +per_device_eval_batch_size: + desc: null + value: 2 +per_device_train_batch_size: + desc: null + value: 2 +per_gpu_eval_batch_size: + desc: null + value: null +per_gpu_train_batch_size: + desc: null + value: null +prediction_loss_only: + desc: null + value: false +preprocessing_num_workers: + desc: null + value: 96 +push_to_hub: + desc: null + value: true +push_to_hub_model_id: + desc: null + value: '' +push_to_hub_organization: + desc: null + value: null +push_to_hub_token: + desc: null + value: null +remove_unused_columns: + desc: null + value: true +report_to: + desc: null + value: + - tensorboard + - wandb +resume_from_checkpoint: + desc: null + value: null +run_name: + desc: null + value: ./ +save_on_each_node: + desc: null + value: false +save_optimizer: + desc: null + value: true +save_steps: + desc: null + value: 20000 +save_strategy: + desc: null + value: IntervalStrategy.STEPS +save_total_limit: + desc: null + value: 5 +seed: + desc: null + value: 42 +sharded_ddp: + desc: null + value: [] +skip_memory_metrics: + desc: null + value: true +tokenizer_name: + desc: null + value: ./ +tpu_metrics_debug: + desc: null + value: false +tpu_num_cores: + desc: null + value: null +train_ref_file: + desc: null + value: null +use_fast_tokenizer: + desc: null + value: true +use_legacy_prediction_loop: + desc: null + value: false +validation_ref_file: + desc: null + value: null +validation_split_percentage: + desc: null + value: 5 +warmup_ratio: + desc: null + value: 0.0 +warmup_steps: + desc: null + value: 5000 +weight_decay: + desc: null + value: 0.0095 diff --git a/wandb/run-20210714_235555-y01xq728/files/output.log b/wandb/run-20210714_235555-y01xq728/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..89f6fb47e4d0917b60b8039aaf9fdd6edebc3445 --- /dev/null +++ b/wandb/run-20210714_235555-y01xq728/files/output.log @@ -0,0 +1,39 @@ +/home/dat/pino/lib/python3.8/site-packages/jax/_src/numpy/lax_numpy.py:3132: UserWarning: Explicitly requested dtype requested in zeros is not available, and will be truncated to dtype int32. To enable more dtypes, set the jax_enable_x64 configuration option or the JAX_ENABLE_X64 shell environment variable. See https://github.com/google/jax#current-gotchas for more. + lax._check_user_dtype_supported(dtype, "zeros") +/home/dat/pino/lib/python3.8/site-packages/jax/lib/xla_bridge.py:386: UserWarning: jax.host_count has been renamed to jax.process_count. This alias will eventually be removed; please update your code. + warnings.warn( +/home/dat/pino/lib/python3.8/site-packages/jax/lib/xla_bridge.py:373: UserWarning: jax.host_id has been renamed to jax.process_index. This alias will eventually be removed; please update your code. + warnings.warn( +Epoch ... (1/5): 0%| | 0/5 [00:00 + state, train_metric, dropout_rngs = p_train_step(state, model_inputs, dropout_rngs) + File "/home/dat/pino/lib/python3.8/site-packages/jax/_src/traceback_util.py", line 183, in reraise_with_filtered_traceback + return fun(*args, **kwargs) + File "/home/dat/pino/lib/python3.8/site-packages/jax/_src/api.py", line 1669, in f_pmapped + out = pxla.xla_pmap( + File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 1620, in bind + return call_bind(self, fun, *args, **params) + File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 1551, in call_bind + outs = primitive.process(top_trace, fun, tracers, params) + File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 1623, in process + return trace.process_map(self, fun, tracers, params) + File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 606, in process_call + return primitive.impl(f, *tracers, **params) + File "/home/dat/pino/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 637, in xla_pmap_impl + return compiled_fun(*args) + File "/home/dat/pino/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 1159, in execute_replicated + out_bufs = compiled.execute_sharded_on_local_devices(input_bufs) +jax._src.traceback_util.UnfilteredStackTrace: RuntimeError: Resource exhausted: Attempting to reserve 12.60G at the bottom of memory. That was not possible. There are 5.86G free, 0B reserved, and 5.65G reservable.: while running replica 0 and partition 0 of a replicated computation (other replicas may have failed as well). +The stack trace below excludes JAX-internal frames. +The preceding is the original exception that occurred, unmodified. +-------------------- +The above exception was the direct cause of the following exception: +Traceback (most recent call last): + File "./run_mlm_flax.py", line 815, in + state, train_metric, dropout_rngs = p_train_step(state, model_inputs, dropout_rngs) + File "/home/dat/pino/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 1159, in execute_replicated + out_bufs = compiled.execute_sharded_on_local_devices(input_bufs) +RuntimeError: Resource exhausted: Attempting to reserve 12.60G at the bottom of memory. That was not possible. There are 5.86G free, 0B reserved, and 5.65G reservable.: while running replica 0 and partition 0 of a replicated computation (other replicas may have failed as well). \ No newline at end of file diff --git a/wandb/run-20210714_235555-y01xq728/files/requirements.txt b/wandb/run-20210714_235555-y01xq728/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..554e3a34bc91714a8462f65295a12e9a04537637 --- /dev/null +++ b/wandb/run-20210714_235555-y01xq728/files/requirements.txt @@ -0,0 +1,94 @@ +absl-py==0.13.0 +aiohttp==3.7.4.post0 +astunparse==1.6.3 +async-timeout==3.0.1 +attrs==21.2.0 +cachetools==4.2.2 +certifi==2021.5.30 +chardet==4.0.0 +charset-normalizer==2.0.1 +chex==0.0.8 +click==8.0.1 +configparser==5.0.2 +cycler==0.10.0 +datasets==1.9.1.dev0 +dill==0.3.4 +dm-tree==0.1.6 +docker-pycreds==0.4.0 +filelock==3.0.12 +flatbuffers==1.12 +flax==0.3.4 +fsspec==2021.7.0 +gast==0.4.0 +gitdb==4.0.7 +gitpython==3.1.18 +google-auth-oauthlib==0.4.4 +google-auth==1.32.1 +google-pasta==0.2.0 +grpcio==1.34.1 +h5py==3.1.0 +huggingface-hub==0.0.12 +idna==3.2 +install==1.3.4 +jax==0.2.17 +jaxlib==0.1.68 +joblib==1.0.1 +keras-nightly==2.5.0.dev2021032900 +keras-preprocessing==1.1.2 +kiwisolver==1.3.1 +libtpu-nightly==0.1.dev20210615 +markdown==3.3.4 +matplotlib==3.4.2 +msgpack==1.0.2 +multidict==5.1.0 +multiprocess==0.70.12.2 +numpy==1.19.5 +oauthlib==3.1.1 +opt-einsum==3.3.0 +optax==0.0.9 +packaging==21.0 +pandas==1.3.0 +pathtools==0.1.2 +pillow==8.3.1 +pip==20.0.2 +pkg-resources==0.0.0 +promise==2.3 +protobuf==3.17.3 +psutil==5.8.0 +pyarrow==4.0.1 +pyasn1-modules==0.2.8 +pyasn1==0.4.8 +pyparsing==2.4.7 +python-dateutil==2.8.1 +pytz==2021.1 +pyyaml==5.4.1 +regex==2021.7.6 +requests-oauthlib==1.3.0 +requests==2.26.0 +rsa==4.7.2 +sacremoses==0.0.45 +scipy==1.7.0 +sentry-sdk==1.3.0 +setuptools==44.0.0 +shortuuid==1.0.1 +six==1.15.0 +smmap==4.0.0 +subprocess32==3.5.4 +tensorboard-data-server==0.6.1 +tensorboard-plugin-wit==1.8.0 +tensorboard==2.5.0 +tensorflow-estimator==2.5.0 +tensorflow==2.5.0 +termcolor==1.1.0 +tokenizers==0.10.3 +toolz==0.11.1 +tqdm==4.61.2 +transformers==4.9.0.dev0 +typing-extensions==3.7.4.3 +urllib3==1.26.6 +wandb==0.10.33 +werkzeug==2.0.1 +wheel==0.36.2 +wrapt==1.12.1 +xxhash==2.0.2 +yarl==1.6.3 \ No newline at end of file diff --git a/wandb/run-20210714_235555-y01xq728/files/wandb-metadata.json b/wandb/run-20210714_235555-y01xq728/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..db7e627b313100c837567314d6468e44608f28d7 --- /dev/null +++ b/wandb/run-20210714_235555-y01xq728/files/wandb-metadata.json @@ -0,0 +1,45 @@ +{ + "os": "Linux-5.4.0-1043-gcp-x86_64-with-glibc2.29", + "python": "3.8.10", + "heartbeatAt": "2021-07-14T23:55:57.056194", + "startedAt": "2021-07-14T23:55:55.024824", + "docker": null, + "cpu_count": 96, + "cuda": null, + "args": [ + "--push_to_hub", + "--output_dir=./", + "--model_type=big_bird", + "--config_name=./", + "--tokenizer_name=./", + "--max_seq_length=4096", + "--weight_decay=0.0095", + "--warmup_steps=5000", + "--overwrite_output_dir", + "--adam_beta1=0.9", + "--adam_beta2=0.98", + "--logging_steps=50", + "--eval_steps=20000", + "--num_train_epochs=5", + "--preprocessing_num_workers=96", + "--save_steps=20000", + "--learning_rate=5e-5", + "--per_device_train_batch_size=2", + "--per_device_eval_batch_size=2", + "--save_total_limit=5", + "--max_eval_samples=2000", + "--gradient_accumulation_steps=4" + ], + "state": "running", + "program": "./run_mlm_flax.py", + "codePath": "run_mlm_flax.py", + "git": { + "remote": "https://huggingface.co/flax-community/pino-roberta-base", + "commit": "9915204ae8501d6ae8425aa8f0b4b290cbc7e5b6" + }, + "email": null, + "root": "/home/dat/pino-roberta-base", + "host": "t1v-n-f5c06ea1-w-0", + "username": "dat", + "executable": "/home/dat/pino/bin/python" +} diff --git a/wandb/run-20210714_235555-y01xq728/files/wandb-summary.json b/wandb/run-20210714_235555-y01xq728/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/wandb/run-20210714_235555-y01xq728/files/wandb-summary.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/wandb/run-20210714_235555-y01xq728/logs/debug-internal.log b/wandb/run-20210714_235555-y01xq728/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..b30895f1923ba77b22bb07ab83799409be47ebb1 --- /dev/null +++ b/wandb/run-20210714_235555-y01xq728/logs/debug-internal.log @@ -0,0 +1,233 @@ +2021-07-14 23:55:55,747 INFO MainThread:613304 [internal.py:wandb_internal():88] W&B internal server running at pid: 613304, started at: 2021-07-14 23:55:55.746857 +2021-07-14 23:55:55,749 DEBUG HandlerThread:613304 [handler.py:handle_request():124] handle_request: check_version +2021-07-14 23:55:55,749 INFO WriterThread:613304 [datastore.py:open_for_write():80] open: /home/dat/pino-roberta-base/wandb/run-20210714_235555-y01xq728/run-y01xq728.wandb +2021-07-14 23:55:55,750 DEBUG SenderThread:613304 [sender.py:send():179] send: header +2021-07-14 23:55:55,750 DEBUG SenderThread:613304 [sender.py:send_request():193] send_request: check_version +2021-07-14 23:55:55,788 DEBUG SenderThread:613304 [sender.py:send():179] send: run +2021-07-14 23:55:55,953 INFO SenderThread:613304 [dir_watcher.py:__init__():168] watching files in: /home/dat/pino-roberta-base/wandb/run-20210714_235555-y01xq728/files +2021-07-14 23:55:55,954 INFO SenderThread:613304 [sender.py:_start_run_threads():716] run started: y01xq728 with start time 1626306955 +2021-07-14 23:55:55,954 DEBUG SenderThread:613304 [sender.py:send():179] send: summary +2021-07-14 23:55:55,954 INFO SenderThread:613304 [sender.py:_save_file():841] saving file wandb-summary.json with policy end +2021-07-14 23:55:55,954 DEBUG HandlerThread:613304 [handler.py:handle_request():124] handle_request: run_start +2021-07-14 23:55:56,957 INFO Thread-8 :613304 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210714_235555-y01xq728/files/wandb-summary.json +2021-07-14 23:55:57,055 DEBUG HandlerThread:613304 [meta.py:__init__():39] meta init +2021-07-14 23:55:57,056 DEBUG HandlerThread:613304 [meta.py:__init__():53] meta init done +2021-07-14 23:55:57,056 DEBUG HandlerThread:613304 [meta.py:probe():210] probe +2021-07-14 23:55:57,057 DEBUG HandlerThread:613304 [meta.py:_setup_git():200] setup git +2021-07-14 23:55:57,086 DEBUG HandlerThread:613304 [meta.py:_setup_git():207] setup git done +2021-07-14 23:55:57,087 DEBUG HandlerThread:613304 [meta.py:_save_pip():57] save pip +2021-07-14 23:55:57,087 DEBUG HandlerThread:613304 [meta.py:_save_pip():71] save pip done +2021-07-14 23:55:57,087 DEBUG HandlerThread:613304 [meta.py:probe():252] probe done +2021-07-14 23:55:57,090 DEBUG SenderThread:613304 [sender.py:send():179] send: files +2021-07-14 23:55:57,091 INFO SenderThread:613304 [sender.py:_save_file():841] saving file wandb-metadata.json with policy now +2021-07-14 23:55:57,098 DEBUG HandlerThread:613304 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:55:57,098 DEBUG SenderThread:613304 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:55:57,230 DEBUG SenderThread:613304 [sender.py:send():179] send: config +2021-07-14 23:55:57,231 DEBUG SenderThread:613304 [sender.py:send():179] send: config +2021-07-14 23:55:57,231 DEBUG SenderThread:613304 [sender.py:send():179] send: config +2021-07-14 23:55:57,551 INFO Thread-11 :613304 [upload_job.py:push():137] Uploaded file /tmp/tmp_7vrmrnzwandb/30914hmv-wandb-metadata.json +2021-07-14 23:55:57,956 INFO Thread-8 :613304 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210714_235555-y01xq728/files/requirements.txt +2021-07-14 23:55:57,956 INFO Thread-8 :613304 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210714_235555-y01xq728/files/wandb-metadata.json +2021-07-14 23:55:57,956 INFO Thread-8 :613304 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210714_235555-y01xq728/files/output.log +2021-07-14 23:56:11,962 INFO Thread-8 :613304 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_235555-y01xq728/files/output.log +2021-07-14 23:56:12,233 DEBUG HandlerThread:613304 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:56:12,233 DEBUG SenderThread:613304 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:56:13,962 INFO Thread-8 :613304 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_235555-y01xq728/files/output.log +2021-07-14 23:56:25,133 DEBUG SenderThread:613304 [sender.py:send():179] send: stats +2021-07-14 23:56:26,968 INFO Thread-8 :613304 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_235555-y01xq728/files/config.yaml +2021-07-14 23:56:27,371 DEBUG HandlerThread:613304 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:56:27,372 DEBUG SenderThread:613304 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:56:42,505 DEBUG HandlerThread:613304 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:56:42,505 DEBUG SenderThread:613304 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:56:55,203 DEBUG SenderThread:613304 [sender.py:send():179] send: stats +2021-07-14 23:56:57,640 DEBUG HandlerThread:613304 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:56:57,641 DEBUG SenderThread:613304 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:57:12,787 DEBUG HandlerThread:613304 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:57:12,788 DEBUG SenderThread:613304 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:57:25,275 DEBUG SenderThread:613304 [sender.py:send():179] send: stats +2021-07-14 23:57:27,922 DEBUG HandlerThread:613304 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:57:27,922 DEBUG SenderThread:613304 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:57:43,054 DEBUG HandlerThread:613304 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:57:43,055 DEBUG SenderThread:613304 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:57:55,351 DEBUG SenderThread:613304 [sender.py:send():179] send: stats +2021-07-14 23:57:58,186 DEBUG HandlerThread:613304 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:57:58,187 DEBUG SenderThread:613304 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:58:13,317 DEBUG HandlerThread:613304 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:58:13,318 DEBUG SenderThread:613304 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:58:25,419 DEBUG SenderThread:613304 [sender.py:send():179] send: stats +2021-07-14 23:58:28,450 DEBUG HandlerThread:613304 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:58:28,450 DEBUG SenderThread:613304 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:58:43,580 DEBUG HandlerThread:613304 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:58:43,581 DEBUG SenderThread:613304 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:58:55,493 DEBUG SenderThread:613304 [sender.py:send():179] send: stats +2021-07-14 23:58:58,713 DEBUG HandlerThread:613304 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:58:58,713 DEBUG SenderThread:613304 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:59:13,843 DEBUG HandlerThread:613304 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:59:13,844 DEBUG SenderThread:613304 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:59:25,562 DEBUG SenderThread:613304 [sender.py:send():179] send: stats +2021-07-14 23:59:28,973 DEBUG HandlerThread:613304 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:59:28,974 DEBUG SenderThread:613304 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:59:44,104 DEBUG HandlerThread:613304 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:59:44,104 DEBUG SenderThread:613304 [sender.py:send_request():193] send_request: stop_status +2021-07-14 23:59:55,632 DEBUG SenderThread:613304 [sender.py:send():179] send: stats +2021-07-14 23:59:59,242 DEBUG HandlerThread:613304 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 23:59:59,243 DEBUG SenderThread:613304 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:00:14,374 DEBUG HandlerThread:613304 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:00:14,375 DEBUG SenderThread:613304 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:00:25,703 DEBUG SenderThread:613304 [sender.py:send():179] send: stats +2021-07-15 00:00:29,505 DEBUG HandlerThread:613304 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:00:29,505 DEBUG SenderThread:613304 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:00:44,637 DEBUG HandlerThread:613304 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:00:44,637 DEBUG SenderThread:613304 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:00:55,773 DEBUG SenderThread:613304 [sender.py:send():179] send: stats +2021-07-15 00:00:59,774 DEBUG HandlerThread:613304 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:00:59,774 DEBUG SenderThread:613304 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:01:14,907 DEBUG HandlerThread:613304 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:01:14,907 DEBUG SenderThread:613304 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:01:25,847 DEBUG SenderThread:613304 [sender.py:send():179] send: stats +2021-07-15 00:01:30,041 DEBUG HandlerThread:613304 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:01:30,042 DEBUG SenderThread:613304 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:01:45,174 DEBUG HandlerThread:613304 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:01:45,174 DEBUG SenderThread:613304 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:01:55,920 DEBUG SenderThread:613304 [sender.py:send():179] send: stats +2021-07-15 00:02:00,307 DEBUG HandlerThread:613304 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:02:00,308 DEBUG SenderThread:613304 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:02:15,449 DEBUG HandlerThread:613304 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:02:15,449 DEBUG SenderThread:613304 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:02:25,995 DEBUG SenderThread:613304 [sender.py:send():179] send: stats +2021-07-15 00:02:30,588 DEBUG HandlerThread:613304 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:02:30,588 DEBUG SenderThread:613304 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:02:45,719 DEBUG HandlerThread:613304 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:02:45,720 DEBUG SenderThread:613304 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:02:56,066 DEBUG SenderThread:613304 [sender.py:send():179] send: stats +2021-07-15 00:03:00,850 DEBUG HandlerThread:613304 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:03:00,850 DEBUG SenderThread:613304 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:03:15,978 DEBUG HandlerThread:613304 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:03:15,978 DEBUG SenderThread:613304 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:03:26,139 DEBUG SenderThread:613304 [sender.py:send():179] send: stats +2021-07-15 00:03:31,111 DEBUG HandlerThread:613304 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:03:31,112 DEBUG SenderThread:613304 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:03:46,244 DEBUG HandlerThread:613304 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:03:46,244 DEBUG SenderThread:613304 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:03:56,128 INFO Thread-8 :613304 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_235555-y01xq728/files/output.log +2021-07-15 00:03:56,212 DEBUG SenderThread:613304 [sender.py:send():179] send: stats +2021-07-15 00:04:01,389 DEBUG HandlerThread:613304 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:04:01,390 DEBUG SenderThread:613304 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:04:16,550 DEBUG HandlerThread:613304 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:04:16,551 DEBUG SenderThread:613304 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:04:26,288 DEBUG SenderThread:613304 [sender.py:send():179] send: stats +2021-07-15 00:04:31,686 DEBUG HandlerThread:613304 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:04:31,686 DEBUG SenderThread:613304 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:04:46,826 DEBUG HandlerThread:613304 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:04:46,826 DEBUG SenderThread:613304 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:04:56,371 DEBUG SenderThread:613304 [sender.py:send():179] send: stats +2021-07-15 00:05:01,959 DEBUG HandlerThread:613304 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:05:01,959 DEBUG SenderThread:613304 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:05:17,092 DEBUG HandlerThread:613304 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:05:17,092 DEBUG SenderThread:613304 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:05:22,163 INFO Thread-8 :613304 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_235555-y01xq728/files/output.log +2021-07-15 00:05:23,289 DEBUG HandlerThread:613304 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 00:05:23,289 DEBUG SenderThread:613304 [sender.py:send():179] send: telemetry +2021-07-15 00:05:23,289 DEBUG SenderThread:613304 [sender.py:send():179] send: exit +2021-07-15 00:05:23,289 INFO SenderThread:613304 [sender.py:send_exit():287] handling exit code: 1 +2021-07-15 00:05:23,291 INFO SenderThread:613304 [sender.py:send_exit():295] send defer +2021-07-15 00:05:23,291 DEBUG SenderThread:613304 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 00:05:23,292 DEBUG HandlerThread:613304 [handler.py:handle_request():124] handle_request: defer +2021-07-15 00:05:23,292 INFO HandlerThread:613304 [handler.py:handle_request_defer():141] handle defer: 0 +2021-07-15 00:05:23,292 DEBUG SenderThread:613304 [sender.py:send_request():193] send_request: defer +2021-07-15 00:05:23,292 INFO SenderThread:613304 [sender.py:send_request_defer():304] handle sender defer: 0 +2021-07-15 00:05:23,292 INFO SenderThread:613304 [sender.py:transition_state():308] send defer: 1 +2021-07-15 00:05:23,293 DEBUG HandlerThread:613304 [handler.py:handle_request():124] handle_request: defer +2021-07-15 00:05:23,293 INFO HandlerThread:613304 [handler.py:handle_request_defer():141] handle defer: 1 +2021-07-15 00:05:23,339 DEBUG SenderThread:613304 [sender.py:send_request():193] send_request: defer +2021-07-15 00:05:23,339 INFO SenderThread:613304 [sender.py:send_request_defer():304] handle sender defer: 1 +2021-07-15 00:05:23,339 INFO SenderThread:613304 [sender.py:transition_state():308] send defer: 2 +2021-07-15 00:05:23,339 DEBUG SenderThread:613304 [sender.py:send():179] send: stats +2021-07-15 00:05:23,340 DEBUG HandlerThread:613304 [handler.py:handle_request():124] handle_request: defer +2021-07-15 00:05:23,340 INFO HandlerThread:613304 [handler.py:handle_request_defer():141] handle defer: 2 +2021-07-15 00:05:23,340 DEBUG SenderThread:613304 [sender.py:send_request():193] send_request: defer +2021-07-15 00:05:23,340 INFO SenderThread:613304 [sender.py:send_request_defer():304] handle sender defer: 2 +2021-07-15 00:05:23,340 INFO SenderThread:613304 [sender.py:transition_state():308] send defer: 3 +2021-07-15 00:05:23,341 DEBUG HandlerThread:613304 [handler.py:handle_request():124] handle_request: defer +2021-07-15 00:05:23,341 INFO HandlerThread:613304 [handler.py:handle_request_defer():141] handle defer: 3 +2021-07-15 00:05:23,341 DEBUG SenderThread:613304 [sender.py:send():179] send: summary +2021-07-15 00:05:23,342 INFO SenderThread:613304 [sender.py:_save_file():841] saving file wandb-summary.json with policy end +2021-07-15 00:05:23,342 DEBUG SenderThread:613304 [sender.py:send_request():193] send_request: defer +2021-07-15 00:05:23,342 INFO SenderThread:613304 [sender.py:send_request_defer():304] handle sender defer: 3 +2021-07-15 00:05:23,342 INFO SenderThread:613304 [sender.py:transition_state():308] send defer: 4 +2021-07-15 00:05:23,342 DEBUG HandlerThread:613304 [handler.py:handle_request():124] handle_request: defer +2021-07-15 00:05:23,342 INFO HandlerThread:613304 [handler.py:handle_request_defer():141] handle defer: 4 +2021-07-15 00:05:23,343 DEBUG SenderThread:613304 [sender.py:send_request():193] send_request: defer +2021-07-15 00:05:23,343 INFO SenderThread:613304 [sender.py:send_request_defer():304] handle sender defer: 4 +2021-07-15 00:05:23,393 DEBUG HandlerThread:613304 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 00:05:23,512 INFO SenderThread:613304 [sender.py:transition_state():308] send defer: 5 +2021-07-15 00:05:23,512 DEBUG SenderThread:613304 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 00:05:23,513 DEBUG HandlerThread:613304 [handler.py:handle_request():124] handle_request: defer +2021-07-15 00:05:23,513 INFO HandlerThread:613304 [handler.py:handle_request_defer():141] handle defer: 5 +2021-07-15 00:05:23,513 DEBUG SenderThread:613304 [sender.py:send_request():193] send_request: defer +2021-07-15 00:05:23,514 INFO SenderThread:613304 [sender.py:send_request_defer():304] handle sender defer: 5 +2021-07-15 00:05:23,514 INFO SenderThread:613304 [dir_watcher.py:finish():282] shutting down directory watcher +2021-07-15 00:05:23,615 DEBUG HandlerThread:613304 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 00:05:24,163 INFO Thread-8 :613304 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_235555-y01xq728/files/config.yaml +2021-07-15 00:05:24,164 INFO SenderThread:613304 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_235555-y01xq728/files/output.log +2021-07-15 00:05:24,164 INFO SenderThread:613304 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_235555-y01xq728/files/wandb-summary.json +2021-07-15 00:05:24,164 INFO SenderThread:613304 [dir_watcher.py:finish():312] scan: /home/dat/pino-roberta-base/wandb/run-20210714_235555-y01xq728/files +2021-07-15 00:05:24,165 INFO SenderThread:613304 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210714_235555-y01xq728/files/requirements.txt requirements.txt +2021-07-15 00:05:24,165 INFO SenderThread:613304 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210714_235555-y01xq728/files/output.log output.log +2021-07-15 00:05:24,165 INFO SenderThread:613304 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210714_235555-y01xq728/files/wandb-metadata.json wandb-metadata.json +2021-07-15 00:05:24,165 INFO SenderThread:613304 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210714_235555-y01xq728/files/config.yaml config.yaml +2021-07-15 00:05:24,165 INFO SenderThread:613304 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210714_235555-y01xq728/files/wandb-summary.json wandb-summary.json +2021-07-15 00:05:24,166 INFO SenderThread:613304 [sender.py:transition_state():308] send defer: 6 +2021-07-15 00:05:24,166 DEBUG SenderThread:613304 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 00:05:24,167 DEBUG HandlerThread:613304 [handler.py:handle_request():124] handle_request: defer +2021-07-15 00:05:24,167 INFO HandlerThread:613304 [handler.py:handle_request_defer():141] handle defer: 6 +2021-07-15 00:05:24,170 DEBUG SenderThread:613304 [sender.py:send_request():193] send_request: defer +2021-07-15 00:05:24,170 INFO SenderThread:613304 [sender.py:send_request_defer():304] handle sender defer: 6 +2021-07-15 00:05:24,170 INFO SenderThread:613304 [file_pusher.py:finish():177] shutting down file pusher +2021-07-15 00:05:24,275 DEBUG HandlerThread:613304 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 00:05:24,275 DEBUG SenderThread:613304 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 00:05:24,377 DEBUG HandlerThread:613304 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 00:05:24,378 DEBUG SenderThread:613304 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 00:05:24,479 DEBUG HandlerThread:613304 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 00:05:24,480 DEBUG SenderThread:613304 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 00:05:24,582 DEBUG HandlerThread:613304 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 00:05:24,582 DEBUG SenderThread:613304 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 00:05:24,625 INFO Thread-15 :613304 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210714_235555-y01xq728/files/wandb-summary.json +2021-07-15 00:05:24,632 INFO Thread-12 :613304 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210714_235555-y01xq728/files/requirements.txt +2021-07-15 00:05:24,643 INFO Thread-13 :613304 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210714_235555-y01xq728/files/output.log +2021-07-15 00:05:24,651 INFO Thread-14 :613304 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210714_235555-y01xq728/files/config.yaml +2021-07-15 00:05:24,684 DEBUG HandlerThread:613304 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 00:05:24,684 DEBUG SenderThread:613304 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 00:05:24,786 DEBUG HandlerThread:613304 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 00:05:24,786 DEBUG SenderThread:613304 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 00:05:24,851 INFO Thread-7 :613304 [sender.py:transition_state():308] send defer: 7 +2021-07-15 00:05:24,852 DEBUG HandlerThread:613304 [handler.py:handle_request():124] handle_request: defer +2021-07-15 00:05:24,852 INFO HandlerThread:613304 [handler.py:handle_request_defer():141] handle defer: 7 +2021-07-15 00:05:24,852 DEBUG SenderThread:613304 [sender.py:send_request():193] send_request: defer +2021-07-15 00:05:24,852 INFO SenderThread:613304 [sender.py:send_request_defer():304] handle sender defer: 7 +2021-07-15 00:05:24,888 DEBUG HandlerThread:613304 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 00:05:25,136 INFO SenderThread:613304 [sender.py:transition_state():308] send defer: 8 +2021-07-15 00:05:25,136 DEBUG SenderThread:613304 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 00:05:25,137 DEBUG HandlerThread:613304 [handler.py:handle_request():124] handle_request: defer +2021-07-15 00:05:25,137 INFO HandlerThread:613304 [handler.py:handle_request_defer():141] handle defer: 8 +2021-07-15 00:05:25,137 DEBUG SenderThread:613304 [sender.py:send_request():193] send_request: defer +2021-07-15 00:05:25,137 INFO SenderThread:613304 [sender.py:send_request_defer():304] handle sender defer: 8 +2021-07-15 00:05:25,138 INFO SenderThread:613304 [sender.py:transition_state():308] send defer: 9 +2021-07-15 00:05:25,138 DEBUG SenderThread:613304 [sender.py:send():179] send: final +2021-07-15 00:05:25,138 DEBUG SenderThread:613304 [sender.py:send():179] send: footer +2021-07-15 00:05:25,138 DEBUG HandlerThread:613304 [handler.py:handle_request():124] handle_request: defer +2021-07-15 00:05:25,139 INFO HandlerThread:613304 [handler.py:handle_request_defer():141] handle defer: 9 +2021-07-15 00:05:25,139 DEBUG SenderThread:613304 [sender.py:send_request():193] send_request: defer +2021-07-15 00:05:25,139 INFO SenderThread:613304 [sender.py:send_request_defer():304] handle sender defer: 9 +2021-07-15 00:05:25,238 DEBUG HandlerThread:613304 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 00:05:25,239 DEBUG SenderThread:613304 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 00:05:25,239 INFO SenderThread:613304 [file_pusher.py:join():182] waiting for file pusher +2021-07-15 00:05:25,240 DEBUG HandlerThread:613304 [handler.py:handle_request():124] handle_request: get_summary +2021-07-15 00:05:25,241 DEBUG HandlerThread:613304 [handler.py:handle_request():124] handle_request: sampled_history +2021-07-15 00:05:25,242 DEBUG HandlerThread:613304 [handler.py:handle_request():124] handle_request: shutdown +2021-07-15 00:05:25,242 INFO HandlerThread:613304 [handler.py:finish():638] shutting down handler +2021-07-15 00:05:26,139 INFO WriterThread:613304 [datastore.py:close():288] close: /home/dat/pino-roberta-base/wandb/run-20210714_235555-y01xq728/run-y01xq728.wandb +2021-07-15 00:05:26,239 INFO SenderThread:613304 [sender.py:finish():945] shutting down sender +2021-07-15 00:05:26,239 INFO SenderThread:613304 [file_pusher.py:finish():177] shutting down file pusher +2021-07-15 00:05:26,240 INFO SenderThread:613304 [file_pusher.py:join():182] waiting for file pusher +2021-07-15 00:05:26,242 INFO MainThread:613304 [internal.py:handle_exit():78] Internal process exited diff --git a/wandb/run-20210714_235555-y01xq728/logs/debug.log b/wandb/run-20210714_235555-y01xq728/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..c9060016cbb10756011b64f065977fee71ffdc1e --- /dev/null +++ b/wandb/run-20210714_235555-y01xq728/logs/debug.log @@ -0,0 +1,119 @@ +2021-07-14 23:55:55,026 INFO MainThread:612049 [wandb_setup.py:_flush():69] setting env: {} +2021-07-14 23:55:55,026 INFO MainThread:612049 [wandb_setup.py:_flush():69] setting login settings: {} +2021-07-14 23:55:55,026 INFO MainThread:612049 [wandb_init.py:_log_setup():337] Logging user logs to /home/dat/pino-roberta-base/wandb/run-20210714_235555-y01xq728/logs/debug.log +2021-07-14 23:55:55,026 INFO MainThread:612049 [wandb_init.py:_log_setup():338] Logging internal logs to /home/dat/pino-roberta-base/wandb/run-20210714_235555-y01xq728/logs/debug-internal.log +2021-07-14 23:55:55,026 INFO MainThread:612049 [wandb_init.py:init():370] calling init triggers +2021-07-14 23:55:55,027 INFO MainThread:612049 [wandb_init.py:init():375] wandb.init called with sweep_config: {} +config: {} +2021-07-14 23:55:55,027 INFO MainThread:612049 [wandb_init.py:init():419] starting backend +2021-07-14 23:55:55,027 INFO MainThread:612049 [backend.py:_multiprocessing_setup():70] multiprocessing start_methods=fork,spawn,forkserver, using: spawn +2021-07-14 23:55:55,072 INFO MainThread:612049 [backend.py:ensure_launched():135] starting backend process... +2021-07-14 23:55:55,117 INFO MainThread:612049 [backend.py:ensure_launched():139] started backend process with pid: 613304 +2021-07-14 23:55:55,119 INFO MainThread:612049 [wandb_init.py:init():424] backend started and connected +2021-07-14 23:55:55,122 INFO MainThread:612049 [wandb_init.py:init():472] updated telemetry +2021-07-14 23:55:55,123 INFO MainThread:612049 [wandb_init.py:init():491] communicating current version +2021-07-14 23:55:55,786 INFO MainThread:612049 [wandb_init.py:init():496] got version response +2021-07-14 23:55:55,786 INFO MainThread:612049 [wandb_init.py:init():504] communicating run to backend with 30 second timeout +2021-07-14 23:55:55,954 INFO MainThread:612049 [wandb_init.py:init():529] starting run threads in backend +2021-07-14 23:55:57,093 INFO MainThread:612049 [wandb_run.py:_console_start():1623] atexit reg +2021-07-14 23:55:57,094 INFO MainThread:612049 [wandb_run.py:_redirect():1497] redirect: SettingsConsole.REDIRECT +2021-07-14 23:55:57,094 INFO MainThread:612049 [wandb_run.py:_redirect():1502] Redirecting console. +2021-07-14 23:55:57,096 INFO MainThread:612049 [wandb_run.py:_redirect():1558] Redirects installed. +2021-07-14 23:55:57,096 INFO MainThread:612049 [wandb_init.py:init():554] run started, returning control to user process +2021-07-14 23:55:57,104 INFO MainThread:612049 [wandb_run.py:_config_callback():872] config_cb None None {'output_dir': './', 'overwrite_output_dir': True, 'do_train': False, 'do_eval': False, 'do_predict': False, 'evaluation_strategy': 'IntervalStrategy.NO', 'prediction_loss_only': False, 'per_device_train_batch_size': 2, 'per_device_eval_batch_size': 2, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 4, 'eval_accumulation_steps': None, 'learning_rate': 5e-05, 'weight_decay': 0.0095, 'adam_beta1': 0.9, 'adam_beta2': 0.98, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 5.0, 'max_steps': -1, 'lr_scheduler_type': 'SchedulerType.LINEAR', 'warmup_ratio': 0.0, 'warmup_steps': 5000, 'log_level': -1, 'log_level_replica': -1, 'log_on_each_node': True, 'logging_dir': './runs/Jul14_23-55-47_t1v-n-f5c06ea1-w-0', 'logging_strategy': 'IntervalStrategy.STEPS', 'logging_first_step': False, 'logging_steps': 50, 'save_strategy': 'IntervalStrategy.STEPS', 'save_steps': 20000, 'save_total_limit': 5, 'save_on_each_node': False, 'no_cuda': False, 'seed': 42, 'fp16': False, 'fp16_opt_level': 'O1', 'fp16_backend': 'auto', 'fp16_full_eval': False, 'local_rank': -1, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 20000, 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': './', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'sharded_ddp': [], 'deepspeed': None, 'label_smoothing_factor': 0.0, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'dataloader_pin_memory': True, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'push_to_hub_model_id': '', 'push_to_hub_organization': None, 'push_to_hub_token': None, 'mp_parameters': ''} +2021-07-14 23:55:57,105 INFO MainThread:612049 [wandb_run.py:_config_callback():872] config_cb None None {'model_name_or_path': None, 'model_type': 'big_bird', 'config_name': './', 'tokenizer_name': './', 'cache_dir': None, 'use_fast_tokenizer': True, 'dtype': 'float32', 'save_optimizer': True} +2021-07-14 23:55:57,107 INFO MainThread:612049 [wandb_run.py:_config_callback():872] config_cb None None {'dataset_name': None, 'dataset_config_name': None, 'train_ref_file': None, 'validation_ref_file': None, 'overwrite_cache': False, 'validation_split_percentage': 5, 'max_seq_length': 4096, 'preprocessing_num_workers': 96, 'mlm_probability': 0.15, 'pad_to_max_length': False, 'line_by_line': False, 'max_eval_samples': 2000} +2021-07-15 00:05:20,827 INFO MainThread:612049 [wandb_run.py:_atexit_cleanup():1593] got exitcode: 1 +2021-07-15 00:05:20,828 INFO MainThread:612049 [wandb_run.py:_restore():1565] restore +2021-07-15 00:05:23,292 INFO MainThread:612049 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 1 +} +pusher_stats { + uploaded_bytes: 1400 + total_bytes: 1400 +} + +2021-07-15 00:05:23,513 INFO MainThread:612049 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 1 +} +pusher_stats { + uploaded_bytes: 1400 + total_bytes: 1400 +} + +2021-07-15 00:05:24,174 INFO MainThread:612049 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 4 +} +pusher_stats { + uploaded_bytes: 1400 + total_bytes: 11344 +} + +2021-07-15 00:05:24,276 INFO MainThread:612049 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 1400 + total_bytes: 11346 +} + +2021-07-15 00:05:24,378 INFO MainThread:612049 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 11346 + total_bytes: 11346 +} + +2021-07-15 00:05:24,480 INFO MainThread:612049 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 11346 + total_bytes: 11346 +} + +2021-07-15 00:05:24,583 INFO MainThread:612049 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 11346 + total_bytes: 11346 +} + +2021-07-15 00:05:24,685 INFO MainThread:612049 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 11346 + total_bytes: 11346 +} + +2021-07-15 00:05:24,787 INFO MainThread:612049 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 11346 + total_bytes: 11346 +} + +2021-07-15 00:05:25,137 INFO MainThread:612049 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 11346 + total_bytes: 11346 +} + +2021-07-15 00:05:25,240 INFO MainThread:612049 [wandb_run.py:_wait_for_finish():1715] got exit ret: done: true +exit_result { +} +file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 11346 + total_bytes: 11346 +} + +2021-07-15 00:05:26,548 INFO MainThread:612049 [wandb_run.py:_show_files():1937] logging synced files diff --git a/wandb/run-20210714_235555-y01xq728/run-y01xq728.wandb b/wandb/run-20210714_235555-y01xq728/run-y01xq728.wandb new file mode 100644 index 0000000000000000000000000000000000000000..ce36975200572b3eea0b8b83112754df90ba75be Binary files /dev/null and b/wandb/run-20210714_235555-y01xq728/run-y01xq728.wandb differ diff --git a/wandb/run-20210715_000757-1ymp5lov/files/config.yaml b/wandb/run-20210715_000757-1ymp5lov/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..14dee6ae922a908cf480af61907a3d0373310514 --- /dev/null +++ b/wandb/run-20210715_000757-1ymp5lov/files/config.yaml @@ -0,0 +1,307 @@ +wandb_version: 1 + +_wandb: + desc: null + value: + cli_version: 0.10.33 + framework: huggingface + huggingface_version: 4.9.0.dev0 + is_jupyter_run: false + is_kaggle_kernel: false + python_version: 3.8.10 + t: + 1: + - 3 + - 11 + 2: + - 3 + - 11 + 4: 3.8.10 + 5: 0.10.33 + 6: 4.9.0.dev0 + 8: + - 5 +adafactor: + desc: null + value: false +adam_beta1: + desc: null + value: 0.9 +adam_beta2: + desc: null + value: 0.98 +adam_epsilon: + desc: null + value: 1.0e-08 +cache_dir: + desc: null + value: null +config_name: + desc: null + value: ./ +dataloader_drop_last: + desc: null + value: false +dataloader_num_workers: + desc: null + value: 0 +dataloader_pin_memory: + desc: null + value: true +dataset_config_name: + desc: null + value: null +dataset_name: + desc: null + value: null +ddp_find_unused_parameters: + desc: null + value: null +debug: + desc: null + value: [] +deepspeed: + desc: null + value: null +disable_tqdm: + desc: null + value: false +do_eval: + desc: null + value: false +do_predict: + desc: null + value: false +do_train: + desc: null + value: false +dtype: + desc: null + value: float32 +eval_accumulation_steps: + desc: null + value: null +eval_steps: + desc: null + value: 20000 +evaluation_strategy: + desc: null + value: IntervalStrategy.NO +fp16: + desc: null + value: false +fp16_backend: + desc: null + value: auto +fp16_full_eval: + desc: null + value: false +fp16_opt_level: + desc: null + value: O1 +gradient_accumulation_steps: + desc: null + value: 4 +greater_is_better: + desc: null + value: null +group_by_length: + desc: null + value: false +ignore_data_skip: + desc: null + value: false +label_names: + desc: null + value: null +label_smoothing_factor: + desc: null + value: 0.0 +learning_rate: + desc: null + value: 5.0e-05 +length_column_name: + desc: null + value: length +line_by_line: + desc: null + value: false +load_best_model_at_end: + desc: null + value: false +local_rank: + desc: null + value: -1 +log_level: + desc: null + value: -1 +log_level_replica: + desc: null + value: -1 +log_on_each_node: + desc: null + value: true +logging_dir: + desc: null + value: ./runs/Jul15_00-07-49_t1v-n-f5c06ea1-w-0 +logging_first_step: + desc: null + value: false +logging_steps: + desc: null + value: 50 +logging_strategy: + desc: null + value: IntervalStrategy.STEPS +lr_scheduler_type: + desc: null + value: SchedulerType.LINEAR +max_eval_samples: + desc: null + value: 2000 +max_grad_norm: + desc: null + value: 1.0 +max_seq_length: + desc: null + value: 4096 +max_steps: + desc: null + value: -1 +metric_for_best_model: + desc: null + value: null +mlm_probability: + desc: null + value: 0.15 +model_name_or_path: + desc: null + value: null +model_type: + desc: null + value: big_bird +mp_parameters: + desc: null + value: '' +no_cuda: + desc: null + value: false +num_train_epochs: + desc: null + value: 5.0 +output_dir: + desc: null + value: ./ +overwrite_cache: + desc: null + value: false +overwrite_output_dir: + desc: null + value: true +pad_to_max_length: + desc: null + value: false +past_index: + desc: null + value: -1 +per_device_eval_batch_size: + desc: null + value: 2 +per_device_train_batch_size: + desc: null + value: 2 +per_gpu_eval_batch_size: + desc: null + value: null +per_gpu_train_batch_size: + desc: null + value: null +prediction_loss_only: + desc: null + value: false +preprocessing_num_workers: + desc: null + value: 96 +push_to_hub: + desc: null + value: true +push_to_hub_model_id: + desc: null + value: '' +push_to_hub_organization: + desc: null + value: null +push_to_hub_token: + desc: null + value: null +remove_unused_columns: + desc: null + value: true +report_to: + desc: null + value: + - tensorboard + - wandb +resume_from_checkpoint: + desc: null + value: null +run_name: + desc: null + value: ./ +save_on_each_node: + desc: null + value: false +save_optimizer: + desc: null + value: true +save_steps: + desc: null + value: 20000 +save_strategy: + desc: null + value: IntervalStrategy.STEPS +save_total_limit: + desc: null + value: 5 +seed: + desc: null + value: 42 +sharded_ddp: + desc: null + value: [] +skip_memory_metrics: + desc: null + value: true +tokenizer_name: + desc: null + value: ./ +tpu_metrics_debug: + desc: null + value: false +tpu_num_cores: + desc: null + value: null +train_ref_file: + desc: null + value: null +use_fast_tokenizer: + desc: null + value: true +use_legacy_prediction_loop: + desc: null + value: false +validation_ref_file: + desc: null + value: null +validation_split_percentage: + desc: null + value: 5 +warmup_ratio: + desc: null + value: 0.0 +warmup_steps: + desc: null + value: 5000 +weight_decay: + desc: null + value: 0.0095 diff --git a/wandb/run-20210715_000757-1ymp5lov/files/output.log b/wandb/run-20210715_000757-1ymp5lov/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..0d9ff2d52ab764575d1a716e1e722b0336297b43 --- /dev/null +++ b/wandb/run-20210715_000757-1ymp5lov/files/output.log @@ -0,0 +1,39 @@ +/home/dat/pino/lib/python3.8/site-packages/jax/_src/numpy/lax_numpy.py:3132: UserWarning: Explicitly requested dtype requested in zeros is not available, and will be truncated to dtype int32. To enable more dtypes, set the jax_enable_x64 configuration option or the JAX_ENABLE_X64 shell environment variable. See https://github.com/google/jax#current-gotchas for more. + lax._check_user_dtype_supported(dtype, "zeros") +/home/dat/pino/lib/python3.8/site-packages/jax/lib/xla_bridge.py:386: UserWarning: jax.host_count has been renamed to jax.process_count. This alias will eventually be removed; please update your code. + warnings.warn( +/home/dat/pino/lib/python3.8/site-packages/jax/lib/xla_bridge.py:373: UserWarning: jax.host_id has been renamed to jax.process_index. This alias will eventually be removed; please update your code. + warnings.warn( +Epoch ... (1/5): 0%| | 0/5 [00:00 + state, train_metric, dropout_rngs = p_train_step(state, model_inputs, dropout_rngs) + File "/home/dat/pino/lib/python3.8/site-packages/jax/_src/traceback_util.py", line 183, in reraise_with_filtered_traceback + return fun(*args, **kwargs) + File "/home/dat/pino/lib/python3.8/site-packages/jax/_src/api.py", line 1669, in f_pmapped + out = pxla.xla_pmap( + File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 1620, in bind + return call_bind(self, fun, *args, **params) + File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 1551, in call_bind + outs = primitive.process(top_trace, fun, tracers, params) + File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 1623, in process + return trace.process_map(self, fun, tracers, params) + File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 606, in process_call + return primitive.impl(f, *tracers, **params) + File "/home/dat/pino/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 637, in xla_pmap_impl + return compiled_fun(*args) + File "/home/dat/pino/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 1159, in execute_replicated + out_bufs = compiled.execute_sharded_on_local_devices(input_bufs) +jax._src.traceback_util.UnfilteredStackTrace: RuntimeError: Resource exhausted: Attempting to reserve 12.60G at the bottom of memory. That was not possible. There are 6.63G free, 0B reserved, and 6.57G reservable.: while running replica 0 and partition 0 of a replicated computation (other replicas may have failed as well). +The stack trace below excludes JAX-internal frames. +The preceding is the original exception that occurred, unmodified. +-------------------- +The above exception was the direct cause of the following exception: +Traceback (most recent call last): + File "./run_mlm_flax.py", line 815, in + state, train_metric, dropout_rngs = p_train_step(state, model_inputs, dropout_rngs) + File "/home/dat/pino/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 1159, in execute_replicated + out_bufs = compiled.execute_sharded_on_local_devices(input_bufs) +RuntimeError: Resource exhausted: Attempting to reserve 12.60G at the bottom of memory. That was not possible. There are 6.63G free, 0B reserved, and 6.57G reservable.: while running replica 0 and partition 0 of a replicated computation (other replicas may have failed as well). \ No newline at end of file diff --git a/wandb/run-20210715_000757-1ymp5lov/files/requirements.txt b/wandb/run-20210715_000757-1ymp5lov/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..554e3a34bc91714a8462f65295a12e9a04537637 --- /dev/null +++ b/wandb/run-20210715_000757-1ymp5lov/files/requirements.txt @@ -0,0 +1,94 @@ +absl-py==0.13.0 +aiohttp==3.7.4.post0 +astunparse==1.6.3 +async-timeout==3.0.1 +attrs==21.2.0 +cachetools==4.2.2 +certifi==2021.5.30 +chardet==4.0.0 +charset-normalizer==2.0.1 +chex==0.0.8 +click==8.0.1 +configparser==5.0.2 +cycler==0.10.0 +datasets==1.9.1.dev0 +dill==0.3.4 +dm-tree==0.1.6 +docker-pycreds==0.4.0 +filelock==3.0.12 +flatbuffers==1.12 +flax==0.3.4 +fsspec==2021.7.0 +gast==0.4.0 +gitdb==4.0.7 +gitpython==3.1.18 +google-auth-oauthlib==0.4.4 +google-auth==1.32.1 +google-pasta==0.2.0 +grpcio==1.34.1 +h5py==3.1.0 +huggingface-hub==0.0.12 +idna==3.2 +install==1.3.4 +jax==0.2.17 +jaxlib==0.1.68 +joblib==1.0.1 +keras-nightly==2.5.0.dev2021032900 +keras-preprocessing==1.1.2 +kiwisolver==1.3.1 +libtpu-nightly==0.1.dev20210615 +markdown==3.3.4 +matplotlib==3.4.2 +msgpack==1.0.2 +multidict==5.1.0 +multiprocess==0.70.12.2 +numpy==1.19.5 +oauthlib==3.1.1 +opt-einsum==3.3.0 +optax==0.0.9 +packaging==21.0 +pandas==1.3.0 +pathtools==0.1.2 +pillow==8.3.1 +pip==20.0.2 +pkg-resources==0.0.0 +promise==2.3 +protobuf==3.17.3 +psutil==5.8.0 +pyarrow==4.0.1 +pyasn1-modules==0.2.8 +pyasn1==0.4.8 +pyparsing==2.4.7 +python-dateutil==2.8.1 +pytz==2021.1 +pyyaml==5.4.1 +regex==2021.7.6 +requests-oauthlib==1.3.0 +requests==2.26.0 +rsa==4.7.2 +sacremoses==0.0.45 +scipy==1.7.0 +sentry-sdk==1.3.0 +setuptools==44.0.0 +shortuuid==1.0.1 +six==1.15.0 +smmap==4.0.0 +subprocess32==3.5.4 +tensorboard-data-server==0.6.1 +tensorboard-plugin-wit==1.8.0 +tensorboard==2.5.0 +tensorflow-estimator==2.5.0 +tensorflow==2.5.0 +termcolor==1.1.0 +tokenizers==0.10.3 +toolz==0.11.1 +tqdm==4.61.2 +transformers==4.9.0.dev0 +typing-extensions==3.7.4.3 +urllib3==1.26.6 +wandb==0.10.33 +werkzeug==2.0.1 +wheel==0.36.2 +wrapt==1.12.1 +xxhash==2.0.2 +yarl==1.6.3 \ No newline at end of file diff --git a/wandb/run-20210715_000757-1ymp5lov/files/wandb-metadata.json b/wandb/run-20210715_000757-1ymp5lov/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..3466727dcb382108ccd32c1f4e3ae5f290aae44d --- /dev/null +++ b/wandb/run-20210715_000757-1ymp5lov/files/wandb-metadata.json @@ -0,0 +1,45 @@ +{ + "os": "Linux-5.4.0-1043-gcp-x86_64-with-glibc2.29", + "python": "3.8.10", + "heartbeatAt": "2021-07-15T00:07:59.582499", + "startedAt": "2021-07-15T00:07:57.603869", + "docker": null, + "cpu_count": 96, + "cuda": null, + "args": [ + "--push_to_hub", + "--output_dir=./", + "--model_type=big_bird", + "--config_name=./", + "--tokenizer_name=./", + "--max_seq_length=4096", + "--weight_decay=0.0095", + "--warmup_steps=5000", + "--overwrite_output_dir", + "--adam_beta1=0.9", + "--adam_beta2=0.98", + "--logging_steps=50", + "--eval_steps=20000", + "--num_train_epochs=5", + "--preprocessing_num_workers=96", + "--save_steps=20000", + "--learning_rate=5e-5", + "--per_device_train_batch_size=2", + "--per_device_eval_batch_size=2", + "--save_total_limit=5", + "--max_eval_samples=2000", + "--gradient_accumulation_steps=4" + ], + "state": "running", + "program": "./run_mlm_flax.py", + "codePath": "run_mlm_flax.py", + "git": { + "remote": "https://huggingface.co/flax-community/pino-roberta-base", + "commit": "9915204ae8501d6ae8425aa8f0b4b290cbc7e5b6" + }, + "email": null, + "root": "/home/dat/pino-roberta-base", + "host": "t1v-n-f5c06ea1-w-0", + "username": "dat", + "executable": "/home/dat/pino/bin/python" +} diff --git a/wandb/run-20210715_000757-1ymp5lov/files/wandb-summary.json b/wandb/run-20210715_000757-1ymp5lov/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/wandb/run-20210715_000757-1ymp5lov/files/wandb-summary.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/wandb/run-20210715_000757-1ymp5lov/logs/debug-internal.log b/wandb/run-20210715_000757-1ymp5lov/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..0d1c548a2ece5f372c58d3b59be9eccf28795407 --- /dev/null +++ b/wandb/run-20210715_000757-1ymp5lov/logs/debug-internal.log @@ -0,0 +1,223 @@ +2021-07-15 00:07:58,272 INFO MainThread:615600 [internal.py:wandb_internal():88] W&B internal server running at pid: 615600, started at: 2021-07-15 00:07:58.272128 +2021-07-15 00:07:58,274 DEBUG HandlerThread:615600 [handler.py:handle_request():124] handle_request: check_version +2021-07-15 00:07:58,274 INFO WriterThread:615600 [datastore.py:open_for_write():80] open: /home/dat/pino-roberta-base/wandb/run-20210715_000757-1ymp5lov/run-1ymp5lov.wandb +2021-07-15 00:07:58,275 DEBUG SenderThread:615600 [sender.py:send():179] send: header +2021-07-15 00:07:58,275 DEBUG SenderThread:615600 [sender.py:send_request():193] send_request: check_version +2021-07-15 00:07:58,315 DEBUG SenderThread:615600 [sender.py:send():179] send: run +2021-07-15 00:07:58,483 INFO SenderThread:615600 [dir_watcher.py:__init__():168] watching files in: /home/dat/pino-roberta-base/wandb/run-20210715_000757-1ymp5lov/files +2021-07-15 00:07:58,483 INFO SenderThread:615600 [sender.py:_start_run_threads():716] run started: 1ymp5lov with start time 1626307677 +2021-07-15 00:07:58,484 DEBUG SenderThread:615600 [sender.py:send():179] send: summary +2021-07-15 00:07:58,484 INFO SenderThread:615600 [sender.py:_save_file():841] saving file wandb-summary.json with policy end +2021-07-15 00:07:58,485 DEBUG HandlerThread:615600 [handler.py:handle_request():124] handle_request: run_start +2021-07-15 00:07:59,486 INFO Thread-8 :615600 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210715_000757-1ymp5lov/files/wandb-summary.json +2021-07-15 00:07:59,582 DEBUG HandlerThread:615600 [meta.py:__init__():39] meta init +2021-07-15 00:07:59,582 DEBUG HandlerThread:615600 [meta.py:__init__():53] meta init done +2021-07-15 00:07:59,582 DEBUG HandlerThread:615600 [meta.py:probe():210] probe +2021-07-15 00:07:59,583 DEBUG HandlerThread:615600 [meta.py:_setup_git():200] setup git +2021-07-15 00:07:59,609 DEBUG HandlerThread:615600 [meta.py:_setup_git():207] setup git done +2021-07-15 00:07:59,610 DEBUG HandlerThread:615600 [meta.py:_save_pip():57] save pip +2021-07-15 00:07:59,610 DEBUG HandlerThread:615600 [meta.py:_save_pip():71] save pip done +2021-07-15 00:07:59,610 DEBUG HandlerThread:615600 [meta.py:probe():252] probe done +2021-07-15 00:07:59,613 DEBUG SenderThread:615600 [sender.py:send():179] send: files +2021-07-15 00:07:59,614 INFO SenderThread:615600 [sender.py:_save_file():841] saving file wandb-metadata.json with policy now +2021-07-15 00:07:59,622 DEBUG HandlerThread:615600 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:07:59,622 DEBUG SenderThread:615600 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:07:59,752 DEBUG SenderThread:615600 [sender.py:send():179] send: config +2021-07-15 00:07:59,752 DEBUG SenderThread:615600 [sender.py:send():179] send: config +2021-07-15 00:07:59,752 DEBUG SenderThread:615600 [sender.py:send():179] send: config +2021-07-15 00:08:00,093 INFO Thread-11 :615600 [upload_job.py:push():137] Uploaded file /tmp/tmpqbmmx6zswandb/3r9qdffv-wandb-metadata.json +2021-07-15 00:08:00,485 INFO Thread-8 :615600 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210715_000757-1ymp5lov/files/requirements.txt +2021-07-15 00:08:00,485 INFO Thread-8 :615600 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210715_000757-1ymp5lov/files/wandb-metadata.json +2021-07-15 00:08:00,486 INFO Thread-8 :615600 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210715_000757-1ymp5lov/files/output.log +2021-07-15 00:08:14,491 INFO Thread-8 :615600 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_000757-1ymp5lov/files/output.log +2021-07-15 00:08:14,754 DEBUG HandlerThread:615600 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:08:14,754 DEBUG SenderThread:615600 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:08:27,665 DEBUG SenderThread:615600 [sender.py:send():179] send: stats +2021-07-15 00:08:29,497 INFO Thread-8 :615600 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_000757-1ymp5lov/files/config.yaml +2021-07-15 00:08:29,886 DEBUG HandlerThread:615600 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:08:29,887 DEBUG SenderThread:615600 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:08:45,017 DEBUG HandlerThread:615600 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:08:45,018 DEBUG SenderThread:615600 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:08:57,741 DEBUG SenderThread:615600 [sender.py:send():179] send: stats +2021-07-15 00:09:00,151 DEBUG HandlerThread:615600 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:09:00,151 DEBUG SenderThread:615600 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:09:15,283 DEBUG HandlerThread:615600 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:09:15,283 DEBUG SenderThread:615600 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:09:27,808 DEBUG SenderThread:615600 [sender.py:send():179] send: stats +2021-07-15 00:09:30,415 DEBUG HandlerThread:615600 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:09:30,415 DEBUG SenderThread:615600 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:09:45,546 DEBUG HandlerThread:615600 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:09:45,547 DEBUG SenderThread:615600 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:09:57,861 DEBUG SenderThread:615600 [sender.py:send():179] send: stats +2021-07-15 00:10:00,678 DEBUG HandlerThread:615600 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:10:00,679 DEBUG SenderThread:615600 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:10:15,809 DEBUG HandlerThread:615600 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:10:15,810 DEBUG SenderThread:615600 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:10:27,924 DEBUG SenderThread:615600 [sender.py:send():179] send: stats +2021-07-15 00:10:30,942 DEBUG HandlerThread:615600 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:10:30,942 DEBUG SenderThread:615600 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:10:46,074 DEBUG HandlerThread:615600 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:10:46,074 DEBUG SenderThread:615600 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:10:57,996 DEBUG SenderThread:615600 [sender.py:send():179] send: stats +2021-07-15 00:11:01,208 DEBUG HandlerThread:615600 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:11:01,208 DEBUG SenderThread:615600 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:11:16,344 DEBUG HandlerThread:615600 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:11:16,344 DEBUG SenderThread:615600 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:11:28,070 DEBUG SenderThread:615600 [sender.py:send():179] send: stats +2021-07-15 00:11:31,476 DEBUG HandlerThread:615600 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:11:31,476 DEBUG SenderThread:615600 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:11:46,611 DEBUG HandlerThread:615600 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:11:46,611 DEBUG SenderThread:615600 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:11:58,144 DEBUG SenderThread:615600 [sender.py:send():179] send: stats +2021-07-15 00:12:01,748 DEBUG HandlerThread:615600 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:12:01,749 DEBUG SenderThread:615600 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:12:16,884 DEBUG HandlerThread:615600 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:12:16,884 DEBUG SenderThread:615600 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:12:28,209 DEBUG SenderThread:615600 [sender.py:send():179] send: stats +2021-07-15 00:12:32,014 DEBUG HandlerThread:615600 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:12:32,014 DEBUG SenderThread:615600 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:12:47,146 DEBUG HandlerThread:615600 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:12:47,147 DEBUG SenderThread:615600 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:12:58,278 DEBUG SenderThread:615600 [sender.py:send():179] send: stats +2021-07-15 00:13:02,280 DEBUG HandlerThread:615600 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:13:02,280 DEBUG SenderThread:615600 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:13:17,412 DEBUG HandlerThread:615600 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:13:17,413 DEBUG SenderThread:615600 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:13:28,350 DEBUG SenderThread:615600 [sender.py:send():179] send: stats +2021-07-15 00:13:32,544 DEBUG HandlerThread:615600 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:13:32,544 DEBUG SenderThread:615600 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:13:47,679 DEBUG HandlerThread:615600 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:13:47,679 DEBUG SenderThread:615600 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:13:58,420 DEBUG SenderThread:615600 [sender.py:send():179] send: stats +2021-07-15 00:14:02,813 DEBUG HandlerThread:615600 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:14:02,814 DEBUG SenderThread:615600 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:14:17,958 DEBUG HandlerThread:615600 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:14:17,959 DEBUG SenderThread:615600 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:14:28,489 DEBUG SenderThread:615600 [sender.py:send():179] send: stats +2021-07-15 00:14:33,090 DEBUG HandlerThread:615600 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:14:33,091 DEBUG SenderThread:615600 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:14:48,223 DEBUG HandlerThread:615600 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:14:48,224 DEBUG SenderThread:615600 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:14:58,559 DEBUG SenderThread:615600 [sender.py:send():179] send: stats +2021-07-15 00:15:03,354 DEBUG HandlerThread:615600 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:15:03,355 DEBUG SenderThread:615600 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:15:16,673 INFO Thread-8 :615600 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_000757-1ymp5lov/files/output.log +2021-07-15 00:15:18,488 DEBUG HandlerThread:615600 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:15:18,488 DEBUG SenderThread:615600 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:15:28,630 DEBUG SenderThread:615600 [sender.py:send():179] send: stats +2021-07-15 00:15:33,644 DEBUG HandlerThread:615600 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:15:33,645 DEBUG SenderThread:615600 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:15:48,779 DEBUG HandlerThread:615600 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:15:48,780 DEBUG SenderThread:615600 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:15:58,711 DEBUG SenderThread:615600 [sender.py:send():179] send: stats +2021-07-15 00:16:03,911 DEBUG HandlerThread:615600 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:16:03,912 DEBUG SenderThread:615600 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:16:19,047 DEBUG HandlerThread:615600 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:16:19,047 DEBUG SenderThread:615600 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:16:28,794 DEBUG SenderThread:615600 [sender.py:send():179] send: stats +2021-07-15 00:16:34,182 DEBUG HandlerThread:615600 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:16:34,183 DEBUG SenderThread:615600 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:16:38,710 INFO Thread-8 :615600 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_000757-1ymp5lov/files/output.log +2021-07-15 00:16:39,455 DEBUG HandlerThread:615600 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 00:16:39,455 DEBUG SenderThread:615600 [sender.py:send():179] send: telemetry +2021-07-15 00:16:39,456 DEBUG SenderThread:615600 [sender.py:send():179] send: exit +2021-07-15 00:16:39,456 INFO SenderThread:615600 [sender.py:send_exit():287] handling exit code: 1 +2021-07-15 00:16:39,457 INFO SenderThread:615600 [sender.py:send_exit():295] send defer +2021-07-15 00:16:39,458 DEBUG SenderThread:615600 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 00:16:39,458 DEBUG HandlerThread:615600 [handler.py:handle_request():124] handle_request: defer +2021-07-15 00:16:39,459 INFO HandlerThread:615600 [handler.py:handle_request_defer():141] handle defer: 0 +2021-07-15 00:16:39,459 DEBUG SenderThread:615600 [sender.py:send_request():193] send_request: defer +2021-07-15 00:16:39,459 INFO SenderThread:615600 [sender.py:send_request_defer():304] handle sender defer: 0 +2021-07-15 00:16:39,459 INFO SenderThread:615600 [sender.py:transition_state():308] send defer: 1 +2021-07-15 00:16:39,459 DEBUG HandlerThread:615600 [handler.py:handle_request():124] handle_request: defer +2021-07-15 00:16:39,459 INFO HandlerThread:615600 [handler.py:handle_request_defer():141] handle defer: 1 +2021-07-15 00:16:39,520 DEBUG SenderThread:615600 [sender.py:send_request():193] send_request: defer +2021-07-15 00:16:39,520 INFO SenderThread:615600 [sender.py:send_request_defer():304] handle sender defer: 1 +2021-07-15 00:16:39,520 INFO SenderThread:615600 [sender.py:transition_state():308] send defer: 2 +2021-07-15 00:16:39,520 DEBUG SenderThread:615600 [sender.py:send():179] send: stats +2021-07-15 00:16:39,521 DEBUG HandlerThread:615600 [handler.py:handle_request():124] handle_request: defer +2021-07-15 00:16:39,521 INFO HandlerThread:615600 [handler.py:handle_request_defer():141] handle defer: 2 +2021-07-15 00:16:39,521 DEBUG SenderThread:615600 [sender.py:send_request():193] send_request: defer +2021-07-15 00:16:39,521 INFO SenderThread:615600 [sender.py:send_request_defer():304] handle sender defer: 2 +2021-07-15 00:16:39,521 INFO SenderThread:615600 [sender.py:transition_state():308] send defer: 3 +2021-07-15 00:16:39,522 DEBUG HandlerThread:615600 [handler.py:handle_request():124] handle_request: defer +2021-07-15 00:16:39,522 INFO HandlerThread:615600 [handler.py:handle_request_defer():141] handle defer: 3 +2021-07-15 00:16:39,522 DEBUG SenderThread:615600 [sender.py:send():179] send: summary +2021-07-15 00:16:39,523 INFO SenderThread:615600 [sender.py:_save_file():841] saving file wandb-summary.json with policy end +2021-07-15 00:16:39,523 DEBUG SenderThread:615600 [sender.py:send_request():193] send_request: defer +2021-07-15 00:16:39,523 INFO SenderThread:615600 [sender.py:send_request_defer():304] handle sender defer: 3 +2021-07-15 00:16:39,523 INFO SenderThread:615600 [sender.py:transition_state():308] send defer: 4 +2021-07-15 00:16:39,524 DEBUG HandlerThread:615600 [handler.py:handle_request():124] handle_request: defer +2021-07-15 00:16:39,524 INFO HandlerThread:615600 [handler.py:handle_request_defer():141] handle defer: 4 +2021-07-15 00:16:39,524 DEBUG SenderThread:615600 [sender.py:send_request():193] send_request: defer +2021-07-15 00:16:39,524 INFO SenderThread:615600 [sender.py:send_request_defer():304] handle sender defer: 4 +2021-07-15 00:16:39,560 DEBUG HandlerThread:615600 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 00:16:39,701 INFO SenderThread:615600 [sender.py:transition_state():308] send defer: 5 +2021-07-15 00:16:39,701 DEBUG SenderThread:615600 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 00:16:39,702 DEBUG HandlerThread:615600 [handler.py:handle_request():124] handle_request: defer +2021-07-15 00:16:39,702 INFO HandlerThread:615600 [handler.py:handle_request_defer():141] handle defer: 5 +2021-07-15 00:16:39,702 DEBUG SenderThread:615600 [sender.py:send_request():193] send_request: defer +2021-07-15 00:16:39,702 INFO SenderThread:615600 [sender.py:send_request_defer():304] handle sender defer: 5 +2021-07-15 00:16:39,702 INFO SenderThread:615600 [dir_watcher.py:finish():282] shutting down directory watcher +2021-07-15 00:16:39,710 INFO Thread-8 :615600 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_000757-1ymp5lov/files/config.yaml +2021-07-15 00:16:39,711 INFO SenderThread:615600 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_000757-1ymp5lov/files/wandb-summary.json +2021-07-15 00:16:39,711 INFO SenderThread:615600 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_000757-1ymp5lov/files/output.log +2021-07-15 00:16:39,711 INFO SenderThread:615600 [dir_watcher.py:finish():312] scan: /home/dat/pino-roberta-base/wandb/run-20210715_000757-1ymp5lov/files +2021-07-15 00:16:39,711 INFO SenderThread:615600 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_000757-1ymp5lov/files/requirements.txt requirements.txt +2021-07-15 00:16:39,711 INFO SenderThread:615600 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_000757-1ymp5lov/files/output.log output.log +2021-07-15 00:16:39,711 INFO SenderThread:615600 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_000757-1ymp5lov/files/wandb-metadata.json wandb-metadata.json +2021-07-15 00:16:39,712 INFO SenderThread:615600 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_000757-1ymp5lov/files/config.yaml config.yaml +2021-07-15 00:16:39,712 INFO SenderThread:615600 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_000757-1ymp5lov/files/wandb-summary.json wandb-summary.json +2021-07-15 00:16:39,712 INFO SenderThread:615600 [sender.py:transition_state():308] send defer: 6 +2021-07-15 00:16:39,716 DEBUG HandlerThread:615600 [handler.py:handle_request():124] handle_request: defer +2021-07-15 00:16:39,716 INFO HandlerThread:615600 [handler.py:handle_request_defer():141] handle defer: 6 +2021-07-15 00:16:39,719 DEBUG SenderThread:615600 [sender.py:send_request():193] send_request: defer +2021-07-15 00:16:39,719 INFO SenderThread:615600 [sender.py:send_request_defer():304] handle sender defer: 6 +2021-07-15 00:16:39,719 INFO SenderThread:615600 [file_pusher.py:finish():177] shutting down file pusher +2021-07-15 00:16:39,803 DEBUG HandlerThread:615600 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 00:16:39,803 DEBUG SenderThread:615600 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 00:16:39,905 DEBUG HandlerThread:615600 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 00:16:39,905 DEBUG SenderThread:615600 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 00:16:40,007 DEBUG HandlerThread:615600 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 00:16:40,007 DEBUG SenderThread:615600 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 00:16:40,109 DEBUG HandlerThread:615600 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 00:16:40,109 DEBUG SenderThread:615600 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 00:16:40,166 INFO Thread-14 :615600 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210715_000757-1ymp5lov/files/config.yaml +2021-07-15 00:16:40,175 INFO Thread-12 :615600 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210715_000757-1ymp5lov/files/requirements.txt +2021-07-15 00:16:40,189 INFO Thread-15 :615600 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210715_000757-1ymp5lov/files/wandb-summary.json +2021-07-15 00:16:40,193 INFO Thread-13 :615600 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210715_000757-1ymp5lov/files/output.log +2021-07-15 00:16:40,211 DEBUG HandlerThread:615600 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 00:16:40,211 DEBUG SenderThread:615600 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 00:16:40,313 DEBUG HandlerThread:615600 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 00:16:40,313 DEBUG SenderThread:615600 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 00:16:40,394 INFO Thread-7 :615600 [sender.py:transition_state():308] send defer: 7 +2021-07-15 00:16:40,394 DEBUG HandlerThread:615600 [handler.py:handle_request():124] handle_request: defer +2021-07-15 00:16:40,394 INFO HandlerThread:615600 [handler.py:handle_request_defer():141] handle defer: 7 +2021-07-15 00:16:40,394 DEBUG SenderThread:615600 [sender.py:send_request():193] send_request: defer +2021-07-15 00:16:40,395 INFO SenderThread:615600 [sender.py:send_request_defer():304] handle sender defer: 7 +2021-07-15 00:16:40,415 DEBUG HandlerThread:615600 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 00:16:40,671 INFO SenderThread:615600 [sender.py:transition_state():308] send defer: 8 +2021-07-15 00:16:40,671 DEBUG SenderThread:615600 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 00:16:40,671 DEBUG HandlerThread:615600 [handler.py:handle_request():124] handle_request: defer +2021-07-15 00:16:40,671 INFO HandlerThread:615600 [handler.py:handle_request_defer():141] handle defer: 8 +2021-07-15 00:16:40,672 DEBUG SenderThread:615600 [sender.py:send_request():193] send_request: defer +2021-07-15 00:16:40,672 INFO SenderThread:615600 [sender.py:send_request_defer():304] handle sender defer: 8 +2021-07-15 00:16:40,672 INFO SenderThread:615600 [sender.py:transition_state():308] send defer: 9 +2021-07-15 00:16:40,672 DEBUG SenderThread:615600 [sender.py:send():179] send: final +2021-07-15 00:16:40,672 DEBUG HandlerThread:615600 [handler.py:handle_request():124] handle_request: defer +2021-07-15 00:16:40,673 INFO HandlerThread:615600 [handler.py:handle_request_defer():141] handle defer: 9 +2021-07-15 00:16:40,673 DEBUG SenderThread:615600 [sender.py:send():179] send: footer +2021-07-15 00:16:40,673 DEBUG SenderThread:615600 [sender.py:send_request():193] send_request: defer +2021-07-15 00:16:40,673 INFO SenderThread:615600 [sender.py:send_request_defer():304] handle sender defer: 9 +2021-07-15 00:16:40,772 DEBUG HandlerThread:615600 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 00:16:40,773 DEBUG SenderThread:615600 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 00:16:40,773 INFO SenderThread:615600 [file_pusher.py:join():182] waiting for file pusher +2021-07-15 00:16:40,774 DEBUG HandlerThread:615600 [handler.py:handle_request():124] handle_request: get_summary +2021-07-15 00:16:40,775 DEBUG HandlerThread:615600 [handler.py:handle_request():124] handle_request: sampled_history +2021-07-15 00:16:40,775 DEBUG HandlerThread:615600 [handler.py:handle_request():124] handle_request: shutdown +2021-07-15 00:16:40,776 INFO HandlerThread:615600 [handler.py:finish():638] shutting down handler +2021-07-15 00:16:41,673 INFO WriterThread:615600 [datastore.py:close():288] close: /home/dat/pino-roberta-base/wandb/run-20210715_000757-1ymp5lov/run-1ymp5lov.wandb +2021-07-15 00:16:41,773 INFO SenderThread:615600 [sender.py:finish():945] shutting down sender +2021-07-15 00:16:41,774 INFO SenderThread:615600 [file_pusher.py:finish():177] shutting down file pusher +2021-07-15 00:16:41,774 INFO SenderThread:615600 [file_pusher.py:join():182] waiting for file pusher +2021-07-15 00:16:41,776 INFO MainThread:615600 [internal.py:handle_exit():78] Internal process exited diff --git a/wandb/run-20210715_000757-1ymp5lov/logs/debug.log b/wandb/run-20210715_000757-1ymp5lov/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..2338349b002489bf016ea74c6efd72fee46852f5 --- /dev/null +++ b/wandb/run-20210715_000757-1ymp5lov/logs/debug.log @@ -0,0 +1,111 @@ +2021-07-15 00:07:57,605 INFO MainThread:614342 [wandb_setup.py:_flush():69] setting env: {} +2021-07-15 00:07:57,605 INFO MainThread:614342 [wandb_setup.py:_flush():69] setting login settings: {} +2021-07-15 00:07:57,605 INFO MainThread:614342 [wandb_init.py:_log_setup():337] Logging user logs to /home/dat/pino-roberta-base/wandb/run-20210715_000757-1ymp5lov/logs/debug.log +2021-07-15 00:07:57,605 INFO MainThread:614342 [wandb_init.py:_log_setup():338] Logging internal logs to /home/dat/pino-roberta-base/wandb/run-20210715_000757-1ymp5lov/logs/debug-internal.log +2021-07-15 00:07:57,605 INFO MainThread:614342 [wandb_init.py:init():370] calling init triggers +2021-07-15 00:07:57,605 INFO MainThread:614342 [wandb_init.py:init():375] wandb.init called with sweep_config: {} +config: {} +2021-07-15 00:07:57,605 INFO MainThread:614342 [wandb_init.py:init():419] starting backend +2021-07-15 00:07:57,605 INFO MainThread:614342 [backend.py:_multiprocessing_setup():70] multiprocessing start_methods=fork,spawn,forkserver, using: spawn +2021-07-15 00:07:57,643 INFO MainThread:614342 [backend.py:ensure_launched():135] starting backend process... +2021-07-15 00:07:57,681 INFO MainThread:614342 [backend.py:ensure_launched():139] started backend process with pid: 615600 +2021-07-15 00:07:57,683 INFO MainThread:614342 [wandb_init.py:init():424] backend started and connected +2021-07-15 00:07:57,686 INFO MainThread:614342 [wandb_init.py:init():472] updated telemetry +2021-07-15 00:07:57,687 INFO MainThread:614342 [wandb_init.py:init():491] communicating current version +2021-07-15 00:07:58,313 INFO MainThread:614342 [wandb_init.py:init():496] got version response +2021-07-15 00:07:58,314 INFO MainThread:614342 [wandb_init.py:init():504] communicating run to backend with 30 second timeout +2021-07-15 00:07:58,485 INFO MainThread:614342 [wandb_init.py:init():529] starting run threads in backend +2021-07-15 00:07:59,616 INFO MainThread:614342 [wandb_run.py:_console_start():1623] atexit reg +2021-07-15 00:07:59,617 INFO MainThread:614342 [wandb_run.py:_redirect():1497] redirect: SettingsConsole.REDIRECT +2021-07-15 00:07:59,618 INFO MainThread:614342 [wandb_run.py:_redirect():1502] Redirecting console. +2021-07-15 00:07:59,620 INFO MainThread:614342 [wandb_run.py:_redirect():1558] Redirects installed. +2021-07-15 00:07:59,620 INFO MainThread:614342 [wandb_init.py:init():554] run started, returning control to user process +2021-07-15 00:07:59,626 INFO MainThread:614342 [wandb_run.py:_config_callback():872] config_cb None None {'output_dir': './', 'overwrite_output_dir': True, 'do_train': False, 'do_eval': False, 'do_predict': False, 'evaluation_strategy': 'IntervalStrategy.NO', 'prediction_loss_only': False, 'per_device_train_batch_size': 2, 'per_device_eval_batch_size': 2, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 4, 'eval_accumulation_steps': None, 'learning_rate': 5e-05, 'weight_decay': 0.0095, 'adam_beta1': 0.9, 'adam_beta2': 0.98, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 5.0, 'max_steps': -1, 'lr_scheduler_type': 'SchedulerType.LINEAR', 'warmup_ratio': 0.0, 'warmup_steps': 5000, 'log_level': -1, 'log_level_replica': -1, 'log_on_each_node': True, 'logging_dir': './runs/Jul15_00-07-49_t1v-n-f5c06ea1-w-0', 'logging_strategy': 'IntervalStrategy.STEPS', 'logging_first_step': False, 'logging_steps': 50, 'save_strategy': 'IntervalStrategy.STEPS', 'save_steps': 20000, 'save_total_limit': 5, 'save_on_each_node': False, 'no_cuda': False, 'seed': 42, 'fp16': False, 'fp16_opt_level': 'O1', 'fp16_backend': 'auto', 'fp16_full_eval': False, 'local_rank': -1, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 20000, 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': './', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'sharded_ddp': [], 'deepspeed': None, 'label_smoothing_factor': 0.0, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'dataloader_pin_memory': True, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'push_to_hub_model_id': '', 'push_to_hub_organization': None, 'push_to_hub_token': None, 'mp_parameters': ''} +2021-07-15 00:07:59,628 INFO MainThread:614342 [wandb_run.py:_config_callback():872] config_cb None None {'model_name_or_path': None, 'model_type': 'big_bird', 'config_name': './', 'tokenizer_name': './', 'cache_dir': None, 'use_fast_tokenizer': True, 'dtype': 'float32', 'save_optimizer': True} +2021-07-15 00:07:59,629 INFO MainThread:614342 [wandb_run.py:_config_callback():872] config_cb None None {'dataset_name': None, 'dataset_config_name': None, 'train_ref_file': None, 'validation_ref_file': None, 'overwrite_cache': False, 'validation_split_percentage': 5, 'max_seq_length': 4096, 'preprocessing_num_workers': 96, 'mlm_probability': 0.15, 'pad_to_max_length': False, 'line_by_line': False, 'max_eval_samples': 2000} +2021-07-15 00:16:37,252 INFO MainThread:614342 [wandb_run.py:_atexit_cleanup():1593] got exitcode: 1 +2021-07-15 00:16:37,253 INFO MainThread:614342 [wandb_run.py:_restore():1565] restore +2021-07-15 00:16:39,459 INFO MainThread:614342 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 1 +} +pusher_stats { + uploaded_bytes: 1400 + total_bytes: 1400 +} + +2021-07-15 00:16:39,702 INFO MainThread:614342 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 1 +} +pusher_stats { + uploaded_bytes: 1400 + total_bytes: 1400 +} + +2021-07-15 00:16:39,804 INFO MainThread:614342 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 1400 + total_bytes: 11346 +} + +2021-07-15 00:16:39,906 INFO MainThread:614342 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 11346 + total_bytes: 11346 +} + +2021-07-15 00:16:40,008 INFO MainThread:614342 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 11346 + total_bytes: 11346 +} + +2021-07-15 00:16:40,110 INFO MainThread:614342 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 11346 + total_bytes: 11346 +} + +2021-07-15 00:16:40,212 INFO MainThread:614342 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 11346 + total_bytes: 11346 +} + +2021-07-15 00:16:40,314 INFO MainThread:614342 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 11346 + total_bytes: 11346 +} + +2021-07-15 00:16:40,671 INFO MainThread:614342 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 11346 + total_bytes: 11346 +} + +2021-07-15 00:16:40,774 INFO MainThread:614342 [wandb_run.py:_wait_for_finish():1715] got exit ret: done: true +exit_result { +} +file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 11346 + total_bytes: 11346 +} + +2021-07-15 00:16:42,055 INFO MainThread:614342 [wandb_run.py:_show_files():1937] logging synced files diff --git a/wandb/run-20210715_000757-1ymp5lov/run-1ymp5lov.wandb b/wandb/run-20210715_000757-1ymp5lov/run-1ymp5lov.wandb new file mode 100644 index 0000000000000000000000000000000000000000..dc09bc016050ea67bf1e982180bf0bb4ce153bf8 Binary files /dev/null and b/wandb/run-20210715_000757-1ymp5lov/run-1ymp5lov.wandb differ diff --git a/wandb/run-20210715_001736-u185degl/files/config.yaml b/wandb/run-20210715_001736-u185degl/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..daa805bf3f18efb1eed9786da36bf4711045a9ed --- /dev/null +++ b/wandb/run-20210715_001736-u185degl/files/config.yaml @@ -0,0 +1,307 @@ +wandb_version: 1 + +_wandb: + desc: null + value: + cli_version: 0.10.33 + framework: huggingface + huggingface_version: 4.9.0.dev0 + is_jupyter_run: false + is_kaggle_kernel: false + python_version: 3.8.10 + t: + 1: + - 3 + - 11 + 2: + - 3 + - 11 + 4: 3.8.10 + 5: 0.10.33 + 6: 4.9.0.dev0 + 8: + - 5 +adafactor: + desc: null + value: false +adam_beta1: + desc: null + value: 0.9 +adam_beta2: + desc: null + value: 0.98 +adam_epsilon: + desc: null + value: 1.0e-08 +cache_dir: + desc: null + value: null +config_name: + desc: null + value: ./ +dataloader_drop_last: + desc: null + value: false +dataloader_num_workers: + desc: null + value: 0 +dataloader_pin_memory: + desc: null + value: true +dataset_config_name: + desc: null + value: null +dataset_name: + desc: null + value: null +ddp_find_unused_parameters: + desc: null + value: null +debug: + desc: null + value: [] +deepspeed: + desc: null + value: null +disable_tqdm: + desc: null + value: false +do_eval: + desc: null + value: false +do_predict: + desc: null + value: false +do_train: + desc: null + value: false +dtype: + desc: null + value: float32 +eval_accumulation_steps: + desc: null + value: null +eval_steps: + desc: null + value: 20000 +evaluation_strategy: + desc: null + value: IntervalStrategy.NO +fp16: + desc: null + value: false +fp16_backend: + desc: null + value: auto +fp16_full_eval: + desc: null + value: false +fp16_opt_level: + desc: null + value: O1 +gradient_accumulation_steps: + desc: null + value: 4 +greater_is_better: + desc: null + value: null +group_by_length: + desc: null + value: false +ignore_data_skip: + desc: null + value: false +label_names: + desc: null + value: null +label_smoothing_factor: + desc: null + value: 0.0 +learning_rate: + desc: null + value: 5.0e-05 +length_column_name: + desc: null + value: length +line_by_line: + desc: null + value: false +load_best_model_at_end: + desc: null + value: false +local_rank: + desc: null + value: -1 +log_level: + desc: null + value: -1 +log_level_replica: + desc: null + value: -1 +log_on_each_node: + desc: null + value: true +logging_dir: + desc: null + value: ./runs/Jul15_00-17-27_t1v-n-f5c06ea1-w-0 +logging_first_step: + desc: null + value: false +logging_steps: + desc: null + value: 25 +logging_strategy: + desc: null + value: IntervalStrategy.STEPS +lr_scheduler_type: + desc: null + value: SchedulerType.LINEAR +max_eval_samples: + desc: null + value: 2000 +max_grad_norm: + desc: null + value: 1.0 +max_seq_length: + desc: null + value: 4096 +max_steps: + desc: null + value: -1 +metric_for_best_model: + desc: null + value: null +mlm_probability: + desc: null + value: 0.15 +model_name_or_path: + desc: null + value: null +model_type: + desc: null + value: big_bird +mp_parameters: + desc: null + value: '' +no_cuda: + desc: null + value: false +num_train_epochs: + desc: null + value: 5.0 +output_dir: + desc: null + value: ./ +overwrite_cache: + desc: null + value: false +overwrite_output_dir: + desc: null + value: true +pad_to_max_length: + desc: null + value: false +past_index: + desc: null + value: -1 +per_device_eval_batch_size: + desc: null + value: 2 +per_device_train_batch_size: + desc: null + value: 2 +per_gpu_eval_batch_size: + desc: null + value: null +per_gpu_train_batch_size: + desc: null + value: null +prediction_loss_only: + desc: null + value: false +preprocessing_num_workers: + desc: null + value: 96 +push_to_hub: + desc: null + value: true +push_to_hub_model_id: + desc: null + value: '' +push_to_hub_organization: + desc: null + value: null +push_to_hub_token: + desc: null + value: null +remove_unused_columns: + desc: null + value: true +report_to: + desc: null + value: + - tensorboard + - wandb +resume_from_checkpoint: + desc: null + value: null +run_name: + desc: null + value: ./ +save_on_each_node: + desc: null + value: false +save_optimizer: + desc: null + value: true +save_steps: + desc: null + value: 20000 +save_strategy: + desc: null + value: IntervalStrategy.STEPS +save_total_limit: + desc: null + value: 5 +seed: + desc: null + value: 42 +sharded_ddp: + desc: null + value: [] +skip_memory_metrics: + desc: null + value: true +tokenizer_name: + desc: null + value: ./ +tpu_metrics_debug: + desc: null + value: false +tpu_num_cores: + desc: null + value: null +train_ref_file: + desc: null + value: null +use_fast_tokenizer: + desc: null + value: true +use_legacy_prediction_loop: + desc: null + value: false +validation_ref_file: + desc: null + value: null +validation_split_percentage: + desc: null + value: 5 +warmup_ratio: + desc: null + value: 0.0 +warmup_steps: + desc: null + value: 5000 +weight_decay: + desc: null + value: 0.0095 diff --git a/wandb/run-20210715_001736-u185degl/files/output.log b/wandb/run-20210715_001736-u185degl/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..e09f2c5f3dd3bef1accbda91ed24a3b03cf809f9 --- /dev/null +++ b/wandb/run-20210715_001736-u185degl/files/output.log @@ -0,0 +1,39 @@ +/home/dat/pino/lib/python3.8/site-packages/jax/_src/numpy/lax_numpy.py:3132: UserWarning: Explicitly requested dtype requested in zeros is not available, and will be truncated to dtype int32. To enable more dtypes, set the jax_enable_x64 configuration option or the JAX_ENABLE_X64 shell environment variable. See https://github.com/google/jax#current-gotchas for more. + lax._check_user_dtype_supported(dtype, "zeros") +/home/dat/pino/lib/python3.8/site-packages/jax/lib/xla_bridge.py:386: UserWarning: jax.host_count has been renamed to jax.process_count. This alias will eventually be removed; please update your code. + warnings.warn( +/home/dat/pino/lib/python3.8/site-packages/jax/lib/xla_bridge.py:373: UserWarning: jax.host_id has been renamed to jax.process_index. This alias will eventually be removed; please update your code. + warnings.warn( +Epoch ... (1/5): 0%| | 0/5 [00:00 + state, train_metric, dropout_rngs = p_train_step(state, model_inputs, dropout_rngs) + File "/home/dat/pino/lib/python3.8/site-packages/jax/_src/traceback_util.py", line 183, in reraise_with_filtered_traceback + return fun(*args, **kwargs) + File "/home/dat/pino/lib/python3.8/site-packages/jax/_src/api.py", line 1669, in f_pmapped + out = pxla.xla_pmap( + File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 1620, in bind + return call_bind(self, fun, *args, **params) + File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 1551, in call_bind + outs = primitive.process(top_trace, fun, tracers, params) + File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 1623, in process + return trace.process_map(self, fun, tracers, params) + File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 606, in process_call + return primitive.impl(f, *tracers, **params) + File "/home/dat/pino/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 637, in xla_pmap_impl + return compiled_fun(*args) + File "/home/dat/pino/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 1159, in execute_replicated + out_bufs = compiled.execute_sharded_on_local_devices(input_bufs) +jax._src.traceback_util.UnfilteredStackTrace: RuntimeError: Resource exhausted: Attempting to reserve 12.60G at the bottom of memory. That was not possible. There are 7.01G free, 0B reserved, and 6.96G reservable.: while running replica 0 and partition 0 of a replicated computation (other replicas may have failed as well). +The stack trace below excludes JAX-internal frames. +The preceding is the original exception that occurred, unmodified. +-------------------- +The above exception was the direct cause of the following exception: +Traceback (most recent call last): + File "./run_mlm_flax.py", line 815, in + state, train_metric, dropout_rngs = p_train_step(state, model_inputs, dropout_rngs) + File "/home/dat/pino/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 1159, in execute_replicated + out_bufs = compiled.execute_sharded_on_local_devices(input_bufs) +RuntimeError: Resource exhausted: Attempting to reserve 12.60G at the bottom of memory. That was not possible. There are 7.01G free, 0B reserved, and 6.96G reservable.: while running replica 0 and partition 0 of a replicated computation (other replicas may have failed as well). \ No newline at end of file diff --git a/wandb/run-20210715_001736-u185degl/files/requirements.txt b/wandb/run-20210715_001736-u185degl/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..554e3a34bc91714a8462f65295a12e9a04537637 --- /dev/null +++ b/wandb/run-20210715_001736-u185degl/files/requirements.txt @@ -0,0 +1,94 @@ +absl-py==0.13.0 +aiohttp==3.7.4.post0 +astunparse==1.6.3 +async-timeout==3.0.1 +attrs==21.2.0 +cachetools==4.2.2 +certifi==2021.5.30 +chardet==4.0.0 +charset-normalizer==2.0.1 +chex==0.0.8 +click==8.0.1 +configparser==5.0.2 +cycler==0.10.0 +datasets==1.9.1.dev0 +dill==0.3.4 +dm-tree==0.1.6 +docker-pycreds==0.4.0 +filelock==3.0.12 +flatbuffers==1.12 +flax==0.3.4 +fsspec==2021.7.0 +gast==0.4.0 +gitdb==4.0.7 +gitpython==3.1.18 +google-auth-oauthlib==0.4.4 +google-auth==1.32.1 +google-pasta==0.2.0 +grpcio==1.34.1 +h5py==3.1.0 +huggingface-hub==0.0.12 +idna==3.2 +install==1.3.4 +jax==0.2.17 +jaxlib==0.1.68 +joblib==1.0.1 +keras-nightly==2.5.0.dev2021032900 +keras-preprocessing==1.1.2 +kiwisolver==1.3.1 +libtpu-nightly==0.1.dev20210615 +markdown==3.3.4 +matplotlib==3.4.2 +msgpack==1.0.2 +multidict==5.1.0 +multiprocess==0.70.12.2 +numpy==1.19.5 +oauthlib==3.1.1 +opt-einsum==3.3.0 +optax==0.0.9 +packaging==21.0 +pandas==1.3.0 +pathtools==0.1.2 +pillow==8.3.1 +pip==20.0.2 +pkg-resources==0.0.0 +promise==2.3 +protobuf==3.17.3 +psutil==5.8.0 +pyarrow==4.0.1 +pyasn1-modules==0.2.8 +pyasn1==0.4.8 +pyparsing==2.4.7 +python-dateutil==2.8.1 +pytz==2021.1 +pyyaml==5.4.1 +regex==2021.7.6 +requests-oauthlib==1.3.0 +requests==2.26.0 +rsa==4.7.2 +sacremoses==0.0.45 +scipy==1.7.0 +sentry-sdk==1.3.0 +setuptools==44.0.0 +shortuuid==1.0.1 +six==1.15.0 +smmap==4.0.0 +subprocess32==3.5.4 +tensorboard-data-server==0.6.1 +tensorboard-plugin-wit==1.8.0 +tensorboard==2.5.0 +tensorflow-estimator==2.5.0 +tensorflow==2.5.0 +termcolor==1.1.0 +tokenizers==0.10.3 +toolz==0.11.1 +tqdm==4.61.2 +transformers==4.9.0.dev0 +typing-extensions==3.7.4.3 +urllib3==1.26.6 +wandb==0.10.33 +werkzeug==2.0.1 +wheel==0.36.2 +wrapt==1.12.1 +xxhash==2.0.2 +yarl==1.6.3 \ No newline at end of file diff --git a/wandb/run-20210715_001736-u185degl/files/wandb-metadata.json b/wandb/run-20210715_001736-u185degl/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..f6bae97e1babb375544f621f341c0c778acd37b4 --- /dev/null +++ b/wandb/run-20210715_001736-u185degl/files/wandb-metadata.json @@ -0,0 +1,45 @@ +{ + "os": "Linux-5.4.0-1043-gcp-x86_64-with-glibc2.29", + "python": "3.8.10", + "heartbeatAt": "2021-07-15T00:17:38.284204", + "startedAt": "2021-07-15T00:17:36.238199", + "docker": null, + "cpu_count": 96, + "cuda": null, + "args": [ + "--push_to_hub", + "--output_dir=./", + "--model_type=big_bird", + "--config_name=./", + "--tokenizer_name=./", + "--max_seq_length=4096", + "--weight_decay=0.0095", + "--warmup_steps=5000", + "--overwrite_output_dir", + "--adam_beta1=0.9", + "--adam_beta2=0.98", + "--logging_steps=25", + "--eval_steps=20000", + "--num_train_epochs=5", + "--preprocessing_num_workers=96", + "--save_steps=20000", + "--learning_rate=5e-5", + "--per_device_train_batch_size=2", + "--per_device_eval_batch_size=2", + "--save_total_limit=5", + "--max_eval_samples=2000", + "--gradient_accumulation_steps=4" + ], + "state": "running", + "program": "./run_mlm_flax.py", + "codePath": "run_mlm_flax.py", + "git": { + "remote": "https://huggingface.co/flax-community/pino-roberta-base", + "commit": "9915204ae8501d6ae8425aa8f0b4b290cbc7e5b6" + }, + "email": null, + "root": "/home/dat/pino-roberta-base", + "host": "t1v-n-f5c06ea1-w-0", + "username": "dat", + "executable": "/home/dat/pino/bin/python" +} diff --git a/wandb/run-20210715_001736-u185degl/files/wandb-summary.json b/wandb/run-20210715_001736-u185degl/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/wandb/run-20210715_001736-u185degl/files/wandb-summary.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/wandb/run-20210715_001736-u185degl/logs/debug-internal.log b/wandb/run-20210715_001736-u185degl/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..b3efc51d18a1817d2df00c4c1f2784432ea189b8 --- /dev/null +++ b/wandb/run-20210715_001736-u185degl/logs/debug-internal.log @@ -0,0 +1,222 @@ +2021-07-15 00:17:36,936 INFO MainThread:617849 [internal.py:wandb_internal():88] W&B internal server running at pid: 617849, started at: 2021-07-15 00:17:36.936178 +2021-07-15 00:17:36,938 DEBUG HandlerThread:617849 [handler.py:handle_request():124] handle_request: check_version +2021-07-15 00:17:36,939 DEBUG SenderThread:617849 [sender.py:send():179] send: header +2021-07-15 00:17:36,939 INFO WriterThread:617849 [datastore.py:open_for_write():80] open: /home/dat/pino-roberta-base/wandb/run-20210715_001736-u185degl/run-u185degl.wandb +2021-07-15 00:17:36,939 DEBUG SenderThread:617849 [sender.py:send_request():193] send_request: check_version +2021-07-15 00:17:36,977 DEBUG SenderThread:617849 [sender.py:send():179] send: run +2021-07-15 00:17:37,151 INFO SenderThread:617849 [dir_watcher.py:__init__():168] watching files in: /home/dat/pino-roberta-base/wandb/run-20210715_001736-u185degl/files +2021-07-15 00:17:37,151 INFO SenderThread:617849 [sender.py:_start_run_threads():716] run started: u185degl with start time 1626308256 +2021-07-15 00:17:37,151 DEBUG SenderThread:617849 [sender.py:send():179] send: summary +2021-07-15 00:17:37,151 INFO SenderThread:617849 [sender.py:_save_file():841] saving file wandb-summary.json with policy end +2021-07-15 00:17:37,151 DEBUG HandlerThread:617849 [handler.py:handle_request():124] handle_request: run_start +2021-07-15 00:17:38,153 INFO Thread-8 :617849 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210715_001736-u185degl/files/wandb-summary.json +2021-07-15 00:17:38,283 DEBUG HandlerThread:617849 [meta.py:__init__():39] meta init +2021-07-15 00:17:38,284 DEBUG HandlerThread:617849 [meta.py:__init__():53] meta init done +2021-07-15 00:17:38,284 DEBUG HandlerThread:617849 [meta.py:probe():210] probe +2021-07-15 00:17:38,285 DEBUG HandlerThread:617849 [meta.py:_setup_git():200] setup git +2021-07-15 00:17:38,315 DEBUG HandlerThread:617849 [meta.py:_setup_git():207] setup git done +2021-07-15 00:17:38,315 DEBUG HandlerThread:617849 [meta.py:_save_pip():57] save pip +2021-07-15 00:17:38,316 DEBUG HandlerThread:617849 [meta.py:_save_pip():71] save pip done +2021-07-15 00:17:38,316 DEBUG HandlerThread:617849 [meta.py:probe():252] probe done +2021-07-15 00:17:38,319 DEBUG SenderThread:617849 [sender.py:send():179] send: files +2021-07-15 00:17:38,319 INFO SenderThread:617849 [sender.py:_save_file():841] saving file wandb-metadata.json with policy now +2021-07-15 00:17:38,325 DEBUG HandlerThread:617849 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:17:38,325 DEBUG SenderThread:617849 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:17:38,454 DEBUG SenderThread:617849 [sender.py:send():179] send: config +2021-07-15 00:17:38,454 DEBUG SenderThread:617849 [sender.py:send():179] send: config +2021-07-15 00:17:38,454 DEBUG SenderThread:617849 [sender.py:send():179] send: config +2021-07-15 00:17:38,776 INFO Thread-11 :617849 [upload_job.py:push():137] Uploaded file /tmp/tmpxo1vs5f9wandb/odzm0bpa-wandb-metadata.json +2021-07-15 00:17:39,152 INFO Thread-8 :617849 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210715_001736-u185degl/files/requirements.txt +2021-07-15 00:17:39,152 INFO Thread-8 :617849 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210715_001736-u185degl/files/wandb-metadata.json +2021-07-15 00:17:39,152 INFO Thread-8 :617849 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210715_001736-u185degl/files/output.log +2021-07-15 00:17:53,157 INFO Thread-8 :617849 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_001736-u185degl/files/output.log +2021-07-15 00:17:53,456 DEBUG HandlerThread:617849 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:17:53,456 DEBUG SenderThread:617849 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:18:06,368 DEBUG SenderThread:617849 [sender.py:send():179] send: stats +2021-07-15 00:18:08,163 INFO Thread-8 :617849 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_001736-u185degl/files/config.yaml +2021-07-15 00:18:08,591 DEBUG HandlerThread:617849 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:18:08,592 DEBUG SenderThread:617849 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:18:23,724 DEBUG HandlerThread:617849 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:18:23,725 DEBUG SenderThread:617849 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:18:36,448 DEBUG SenderThread:617849 [sender.py:send():179] send: stats +2021-07-15 00:18:38,860 DEBUG HandlerThread:617849 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:18:38,860 DEBUG SenderThread:617849 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:18:53,994 DEBUG HandlerThread:617849 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:18:53,994 DEBUG SenderThread:617849 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:19:06,524 DEBUG SenderThread:617849 [sender.py:send():179] send: stats +2021-07-15 00:19:09,126 DEBUG HandlerThread:617849 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:19:09,127 DEBUG SenderThread:617849 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:19:24,259 DEBUG HandlerThread:617849 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:19:24,260 DEBUG SenderThread:617849 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:19:36,600 DEBUG SenderThread:617849 [sender.py:send():179] send: stats +2021-07-15 00:19:39,407 DEBUG HandlerThread:617849 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:19:39,407 DEBUG SenderThread:617849 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:19:54,540 DEBUG HandlerThread:617849 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:19:54,540 DEBUG SenderThread:617849 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:20:06,663 DEBUG SenderThread:617849 [sender.py:send():179] send: stats +2021-07-15 00:20:09,672 DEBUG HandlerThread:617849 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:20:09,672 DEBUG SenderThread:617849 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:20:24,807 DEBUG HandlerThread:617849 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:20:24,807 DEBUG SenderThread:617849 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:20:36,726 DEBUG SenderThread:617849 [sender.py:send():179] send: stats +2021-07-15 00:20:39,938 DEBUG HandlerThread:617849 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:20:39,938 DEBUG SenderThread:617849 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:20:55,070 DEBUG HandlerThread:617849 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:20:55,070 DEBUG SenderThread:617849 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:21:06,791 DEBUG SenderThread:617849 [sender.py:send():179] send: stats +2021-07-15 00:21:10,200 DEBUG HandlerThread:617849 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:21:10,200 DEBUG SenderThread:617849 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:21:25,331 DEBUG HandlerThread:617849 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:21:25,331 DEBUG SenderThread:617849 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:21:36,863 DEBUG SenderThread:617849 [sender.py:send():179] send: stats +2021-07-15 00:21:40,462 DEBUG HandlerThread:617849 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:21:40,462 DEBUG SenderThread:617849 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:21:55,592 DEBUG HandlerThread:617849 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:21:55,593 DEBUG SenderThread:617849 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:22:06,940 DEBUG SenderThread:617849 [sender.py:send():179] send: stats +2021-07-15 00:22:10,734 DEBUG HandlerThread:617849 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:22:10,735 DEBUG SenderThread:617849 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:22:25,867 DEBUG HandlerThread:617849 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:22:25,867 DEBUG SenderThread:617849 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:22:37,017 DEBUG SenderThread:617849 [sender.py:send():179] send: stats +2021-07-15 00:22:40,998 DEBUG HandlerThread:617849 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:22:40,998 DEBUG SenderThread:617849 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:22:56,126 DEBUG HandlerThread:617849 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:22:56,126 DEBUG SenderThread:617849 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:23:07,091 DEBUG SenderThread:617849 [sender.py:send():179] send: stats +2021-07-15 00:23:11,257 DEBUG HandlerThread:617849 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:23:11,257 DEBUG SenderThread:617849 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:23:26,389 DEBUG HandlerThread:617849 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:23:26,389 DEBUG SenderThread:617849 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:23:37,156 DEBUG SenderThread:617849 [sender.py:send():179] send: stats +2021-07-15 00:23:41,522 DEBUG HandlerThread:617849 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:23:41,522 DEBUG SenderThread:617849 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:23:56,651 DEBUG HandlerThread:617849 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:23:56,652 DEBUG SenderThread:617849 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:24:07,226 DEBUG SenderThread:617849 [sender.py:send():179] send: stats +2021-07-15 00:24:11,783 DEBUG HandlerThread:617849 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:24:11,783 DEBUG SenderThread:617849 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:24:26,932 DEBUG HandlerThread:617849 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:24:26,932 DEBUG SenderThread:617849 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:24:27,307 INFO Thread-8 :617849 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_001736-u185degl/files/output.log +2021-07-15 00:24:37,304 DEBUG SenderThread:617849 [sender.py:send():179] send: stats +2021-07-15 00:24:42,077 DEBUG HandlerThread:617849 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:24:42,078 DEBUG SenderThread:617849 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:24:57,226 DEBUG HandlerThread:617849 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:24:57,227 DEBUG SenderThread:617849 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:25:07,379 DEBUG SenderThread:617849 [sender.py:send():179] send: stats +2021-07-15 00:25:12,357 DEBUG HandlerThread:617849 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:25:12,357 DEBUG SenderThread:617849 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:25:27,488 DEBUG HandlerThread:617849 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:25:27,489 DEBUG SenderThread:617849 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:25:37,452 DEBUG SenderThread:617849 [sender.py:send():179] send: stats +2021-07-15 00:25:42,619 DEBUG HandlerThread:617849 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:25:42,619 DEBUG SenderThread:617849 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:25:54,343 INFO Thread-8 :617849 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_001736-u185degl/files/output.log +2021-07-15 00:25:54,412 DEBUG SenderThread:617849 [sender.py:send():179] send: telemetry +2021-07-15 00:25:54,412 DEBUG SenderThread:617849 [sender.py:send():179] send: exit +2021-07-15 00:25:54,412 INFO SenderThread:617849 [sender.py:send_exit():287] handling exit code: 1 +2021-07-15 00:25:54,412 INFO SenderThread:617849 [sender.py:send_exit():295] send defer +2021-07-15 00:25:54,413 DEBUG HandlerThread:617849 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 00:25:54,413 DEBUG HandlerThread:617849 [handler.py:handle_request():124] handle_request: defer +2021-07-15 00:25:54,413 INFO HandlerThread:617849 [handler.py:handle_request_defer():141] handle defer: 0 +2021-07-15 00:25:54,413 DEBUG SenderThread:617849 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 00:25:54,413 DEBUG SenderThread:617849 [sender.py:send_request():193] send_request: defer +2021-07-15 00:25:54,414 INFO SenderThread:617849 [sender.py:send_request_defer():304] handle sender defer: 0 +2021-07-15 00:25:54,414 INFO SenderThread:617849 [sender.py:transition_state():308] send defer: 1 +2021-07-15 00:25:54,414 DEBUG HandlerThread:617849 [handler.py:handle_request():124] handle_request: defer +2021-07-15 00:25:54,414 INFO HandlerThread:617849 [handler.py:handle_request_defer():141] handle defer: 1 +2021-07-15 00:25:54,493 DEBUG SenderThread:617849 [sender.py:send_request():193] send_request: defer +2021-07-15 00:25:54,493 INFO SenderThread:617849 [sender.py:send_request_defer():304] handle sender defer: 1 +2021-07-15 00:25:54,493 INFO SenderThread:617849 [sender.py:transition_state():308] send defer: 2 +2021-07-15 00:25:54,493 DEBUG SenderThread:617849 [sender.py:send():179] send: stats +2021-07-15 00:25:54,494 DEBUG HandlerThread:617849 [handler.py:handle_request():124] handle_request: defer +2021-07-15 00:25:54,494 INFO HandlerThread:617849 [handler.py:handle_request_defer():141] handle defer: 2 +2021-07-15 00:25:54,494 DEBUG SenderThread:617849 [sender.py:send_request():193] send_request: defer +2021-07-15 00:25:54,494 INFO SenderThread:617849 [sender.py:send_request_defer():304] handle sender defer: 2 +2021-07-15 00:25:54,494 INFO SenderThread:617849 [sender.py:transition_state():308] send defer: 3 +2021-07-15 00:25:54,494 DEBUG HandlerThread:617849 [handler.py:handle_request():124] handle_request: defer +2021-07-15 00:25:54,494 INFO HandlerThread:617849 [handler.py:handle_request_defer():141] handle defer: 3 +2021-07-15 00:25:54,494 DEBUG SenderThread:617849 [sender.py:send():179] send: summary +2021-07-15 00:25:54,495 INFO SenderThread:617849 [sender.py:_save_file():841] saving file wandb-summary.json with policy end +2021-07-15 00:25:54,495 DEBUG SenderThread:617849 [sender.py:send_request():193] send_request: defer +2021-07-15 00:25:54,495 INFO SenderThread:617849 [sender.py:send_request_defer():304] handle sender defer: 3 +2021-07-15 00:25:54,495 INFO SenderThread:617849 [sender.py:transition_state():308] send defer: 4 +2021-07-15 00:25:54,495 DEBUG HandlerThread:617849 [handler.py:handle_request():124] handle_request: defer +2021-07-15 00:25:54,495 INFO HandlerThread:617849 [handler.py:handle_request_defer():141] handle defer: 4 +2021-07-15 00:25:54,495 DEBUG SenderThread:617849 [sender.py:send_request():193] send_request: defer +2021-07-15 00:25:54,495 INFO SenderThread:617849 [sender.py:send_request_defer():304] handle sender defer: 4 +2021-07-15 00:25:54,516 DEBUG HandlerThread:617849 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 00:25:54,687 INFO SenderThread:617849 [sender.py:transition_state():308] send defer: 5 +2021-07-15 00:25:54,688 DEBUG SenderThread:617849 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 00:25:54,688 DEBUG HandlerThread:617849 [handler.py:handle_request():124] handle_request: defer +2021-07-15 00:25:54,688 INFO HandlerThread:617849 [handler.py:handle_request_defer():141] handle defer: 5 +2021-07-15 00:25:54,688 DEBUG SenderThread:617849 [sender.py:send_request():193] send_request: defer +2021-07-15 00:25:54,688 INFO SenderThread:617849 [sender.py:send_request_defer():304] handle sender defer: 5 +2021-07-15 00:25:54,688 INFO SenderThread:617849 [dir_watcher.py:finish():282] shutting down directory watcher +2021-07-15 00:25:54,790 DEBUG HandlerThread:617849 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 00:25:55,343 INFO Thread-8 :617849 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_001736-u185degl/files/output.log +2021-07-15 00:25:55,344 INFO SenderThread:617849 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_001736-u185degl/files/config.yaml +2021-07-15 00:25:55,344 INFO SenderThread:617849 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_001736-u185degl/files/wandb-summary.json +2021-07-15 00:25:55,344 INFO SenderThread:617849 [dir_watcher.py:finish():312] scan: /home/dat/pino-roberta-base/wandb/run-20210715_001736-u185degl/files +2021-07-15 00:25:55,344 INFO SenderThread:617849 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_001736-u185degl/files/requirements.txt requirements.txt +2021-07-15 00:25:55,344 INFO SenderThread:617849 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_001736-u185degl/files/output.log output.log +2021-07-15 00:25:55,344 INFO SenderThread:617849 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_001736-u185degl/files/wandb-metadata.json wandb-metadata.json +2021-07-15 00:25:55,344 INFO SenderThread:617849 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_001736-u185degl/files/config.yaml config.yaml +2021-07-15 00:25:55,345 INFO SenderThread:617849 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_001736-u185degl/files/wandb-summary.json wandb-summary.json +2021-07-15 00:25:55,345 INFO SenderThread:617849 [sender.py:transition_state():308] send defer: 6 +2021-07-15 00:25:55,345 DEBUG SenderThread:617849 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 00:25:55,346 DEBUG HandlerThread:617849 [handler.py:handle_request():124] handle_request: defer +2021-07-15 00:25:55,346 INFO HandlerThread:617849 [handler.py:handle_request_defer():141] handle defer: 6 +2021-07-15 00:25:55,349 DEBUG SenderThread:617849 [sender.py:send_request():193] send_request: defer +2021-07-15 00:25:55,349 INFO SenderThread:617849 [sender.py:send_request_defer():304] handle sender defer: 6 +2021-07-15 00:25:55,349 INFO SenderThread:617849 [file_pusher.py:finish():177] shutting down file pusher +2021-07-15 00:25:55,447 DEBUG HandlerThread:617849 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 00:25:55,448 DEBUG SenderThread:617849 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 00:25:55,550 DEBUG HandlerThread:617849 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 00:25:55,550 DEBUG SenderThread:617849 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 00:25:55,652 DEBUG HandlerThread:617849 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 00:25:55,652 DEBUG SenderThread:617849 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 00:25:55,754 DEBUG HandlerThread:617849 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 00:25:55,754 DEBUG SenderThread:617849 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 00:25:55,810 INFO Thread-13 :617849 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210715_001736-u185degl/files/output.log +2021-07-15 00:25:55,821 INFO Thread-15 :617849 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210715_001736-u185degl/files/wandb-summary.json +2021-07-15 00:25:55,827 INFO Thread-12 :617849 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210715_001736-u185degl/files/requirements.txt +2021-07-15 00:25:55,856 DEBUG HandlerThread:617849 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 00:25:55,856 DEBUG SenderThread:617849 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 00:25:55,869 INFO Thread-14 :617849 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210715_001736-u185degl/files/config.yaml +2021-07-15 00:25:55,958 DEBUG HandlerThread:617849 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 00:25:55,958 DEBUG SenderThread:617849 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 00:25:56,060 DEBUG HandlerThread:617849 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 00:25:56,060 DEBUG SenderThread:617849 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 00:25:56,069 INFO Thread-7 :617849 [sender.py:transition_state():308] send defer: 7 +2021-07-15 00:25:56,070 DEBUG HandlerThread:617849 [handler.py:handle_request():124] handle_request: defer +2021-07-15 00:25:56,070 INFO HandlerThread:617849 [handler.py:handle_request_defer():141] handle defer: 7 +2021-07-15 00:25:56,070 DEBUG SenderThread:617849 [sender.py:send_request():193] send_request: defer +2021-07-15 00:25:56,070 INFO SenderThread:617849 [sender.py:send_request_defer():304] handle sender defer: 7 +2021-07-15 00:25:56,162 DEBUG HandlerThread:617849 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 00:25:56,354 INFO SenderThread:617849 [sender.py:transition_state():308] send defer: 8 +2021-07-15 00:25:56,354 DEBUG SenderThread:617849 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 00:25:56,355 DEBUG HandlerThread:617849 [handler.py:handle_request():124] handle_request: defer +2021-07-15 00:25:56,355 INFO HandlerThread:617849 [handler.py:handle_request_defer():141] handle defer: 8 +2021-07-15 00:25:56,355 DEBUG SenderThread:617849 [sender.py:send_request():193] send_request: defer +2021-07-15 00:25:56,355 INFO SenderThread:617849 [sender.py:send_request_defer():304] handle sender defer: 8 +2021-07-15 00:25:56,355 INFO SenderThread:617849 [sender.py:transition_state():308] send defer: 9 +2021-07-15 00:25:56,356 DEBUG SenderThread:617849 [sender.py:send():179] send: final +2021-07-15 00:25:56,356 DEBUG HandlerThread:617849 [handler.py:handle_request():124] handle_request: defer +2021-07-15 00:25:56,356 DEBUG SenderThread:617849 [sender.py:send():179] send: footer +2021-07-15 00:25:56,356 INFO HandlerThread:617849 [handler.py:handle_request_defer():141] handle defer: 9 +2021-07-15 00:25:56,356 DEBUG SenderThread:617849 [sender.py:send_request():193] send_request: defer +2021-07-15 00:25:56,356 INFO SenderThread:617849 [sender.py:send_request_defer():304] handle sender defer: 9 +2021-07-15 00:25:56,456 DEBUG HandlerThread:617849 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 00:25:56,456 DEBUG SenderThread:617849 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 00:25:56,456 INFO SenderThread:617849 [file_pusher.py:join():182] waiting for file pusher +2021-07-15 00:25:56,458 DEBUG HandlerThread:617849 [handler.py:handle_request():124] handle_request: get_summary +2021-07-15 00:25:56,458 DEBUG HandlerThread:617849 [handler.py:handle_request():124] handle_request: sampled_history +2021-07-15 00:25:56,459 DEBUG HandlerThread:617849 [handler.py:handle_request():124] handle_request: shutdown +2021-07-15 00:25:56,459 INFO HandlerThread:617849 [handler.py:finish():638] shutting down handler +2021-07-15 00:25:57,356 INFO WriterThread:617849 [datastore.py:close():288] close: /home/dat/pino-roberta-base/wandb/run-20210715_001736-u185degl/run-u185degl.wandb +2021-07-15 00:25:57,457 INFO SenderThread:617849 [sender.py:finish():945] shutting down sender +2021-07-15 00:25:57,457 INFO SenderThread:617849 [file_pusher.py:finish():177] shutting down file pusher +2021-07-15 00:25:57,457 INFO SenderThread:617849 [file_pusher.py:join():182] waiting for file pusher +2021-07-15 00:25:57,462 INFO MainThread:617849 [internal.py:handle_exit():78] Internal process exited diff --git a/wandb/run-20210715_001736-u185degl/logs/debug.log b/wandb/run-20210715_001736-u185degl/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..0f6d0d48e478038812d20e7d79774cc755cded3e --- /dev/null +++ b/wandb/run-20210715_001736-u185degl/logs/debug.log @@ -0,0 +1,127 @@ +2021-07-15 00:17:36,239 INFO MainThread:616592 [wandb_setup.py:_flush():69] setting env: {} +2021-07-15 00:17:36,239 INFO MainThread:616592 [wandb_setup.py:_flush():69] setting login settings: {} +2021-07-15 00:17:36,239 INFO MainThread:616592 [wandb_init.py:_log_setup():337] Logging user logs to /home/dat/pino-roberta-base/wandb/run-20210715_001736-u185degl/logs/debug.log +2021-07-15 00:17:36,239 INFO MainThread:616592 [wandb_init.py:_log_setup():338] Logging internal logs to /home/dat/pino-roberta-base/wandb/run-20210715_001736-u185degl/logs/debug-internal.log +2021-07-15 00:17:36,240 INFO MainThread:616592 [wandb_init.py:init():370] calling init triggers +2021-07-15 00:17:36,240 INFO MainThread:616592 [wandb_init.py:init():375] wandb.init called with sweep_config: {} +config: {} +2021-07-15 00:17:36,240 INFO MainThread:616592 [wandb_init.py:init():419] starting backend +2021-07-15 00:17:36,240 INFO MainThread:616592 [backend.py:_multiprocessing_setup():70] multiprocessing start_methods=fork,spawn,forkserver, using: spawn +2021-07-15 00:17:36,287 INFO MainThread:616592 [backend.py:ensure_launched():135] starting backend process... +2021-07-15 00:17:36,333 INFO MainThread:616592 [backend.py:ensure_launched():139] started backend process with pid: 617849 +2021-07-15 00:17:36,335 INFO MainThread:616592 [wandb_init.py:init():424] backend started and connected +2021-07-15 00:17:36,338 INFO MainThread:616592 [wandb_init.py:init():472] updated telemetry +2021-07-15 00:17:36,338 INFO MainThread:616592 [wandb_init.py:init():491] communicating current version +2021-07-15 00:17:36,976 INFO MainThread:616592 [wandb_init.py:init():496] got version response +2021-07-15 00:17:36,976 INFO MainThread:616592 [wandb_init.py:init():504] communicating run to backend with 30 second timeout +2021-07-15 00:17:37,151 INFO MainThread:616592 [wandb_init.py:init():529] starting run threads in backend +2021-07-15 00:17:38,322 INFO MainThread:616592 [wandb_run.py:_console_start():1623] atexit reg +2021-07-15 00:17:38,323 INFO MainThread:616592 [wandb_run.py:_redirect():1497] redirect: SettingsConsole.REDIRECT +2021-07-15 00:17:38,323 INFO MainThread:616592 [wandb_run.py:_redirect():1502] Redirecting console. +2021-07-15 00:17:38,325 INFO MainThread:616592 [wandb_run.py:_redirect():1558] Redirects installed. +2021-07-15 00:17:38,325 INFO MainThread:616592 [wandb_init.py:init():554] run started, returning control to user process +2021-07-15 00:17:38,332 INFO MainThread:616592 [wandb_run.py:_config_callback():872] config_cb None None {'output_dir': './', 'overwrite_output_dir': True, 'do_train': False, 'do_eval': False, 'do_predict': False, 'evaluation_strategy': 'IntervalStrategy.NO', 'prediction_loss_only': False, 'per_device_train_batch_size': 2, 'per_device_eval_batch_size': 2, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 4, 'eval_accumulation_steps': None, 'learning_rate': 5e-05, 'weight_decay': 0.0095, 'adam_beta1': 0.9, 'adam_beta2': 0.98, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 5.0, 'max_steps': -1, 'lr_scheduler_type': 'SchedulerType.LINEAR', 'warmup_ratio': 0.0, 'warmup_steps': 5000, 'log_level': -1, 'log_level_replica': -1, 'log_on_each_node': True, 'logging_dir': './runs/Jul15_00-17-27_t1v-n-f5c06ea1-w-0', 'logging_strategy': 'IntervalStrategy.STEPS', 'logging_first_step': False, 'logging_steps': 25, 'save_strategy': 'IntervalStrategy.STEPS', 'save_steps': 20000, 'save_total_limit': 5, 'save_on_each_node': False, 'no_cuda': False, 'seed': 42, 'fp16': False, 'fp16_opt_level': 'O1', 'fp16_backend': 'auto', 'fp16_full_eval': False, 'local_rank': -1, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 20000, 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': './', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'sharded_ddp': [], 'deepspeed': None, 'label_smoothing_factor': 0.0, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'dataloader_pin_memory': True, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'push_to_hub_model_id': '', 'push_to_hub_organization': None, 'push_to_hub_token': None, 'mp_parameters': ''} +2021-07-15 00:17:38,334 INFO MainThread:616592 [wandb_run.py:_config_callback():872] config_cb None None {'model_name_or_path': None, 'model_type': 'big_bird', 'config_name': './', 'tokenizer_name': './', 'cache_dir': None, 'use_fast_tokenizer': True, 'dtype': 'float32', 'save_optimizer': True} +2021-07-15 00:17:38,335 INFO MainThread:616592 [wandb_run.py:_config_callback():872] config_cb None None {'dataset_name': None, 'dataset_config_name': None, 'train_ref_file': None, 'validation_ref_file': None, 'overwrite_cache': False, 'validation_split_percentage': 5, 'max_seq_length': 4096, 'preprocessing_num_workers': 96, 'mlm_probability': 0.15, 'pad_to_max_length': False, 'line_by_line': False, 'max_eval_samples': 2000} +2021-07-15 00:25:51,753 INFO MainThread:616592 [wandb_run.py:_atexit_cleanup():1593] got exitcode: 1 +2021-07-15 00:25:51,754 INFO MainThread:616592 [wandb_run.py:_restore():1565] restore +2021-07-15 00:25:54,414 INFO MainThread:616592 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 1 +} +pusher_stats { + uploaded_bytes: 1400 + total_bytes: 1400 +} + +2021-07-15 00:25:54,688 INFO MainThread:616592 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 1 +} +pusher_stats { + uploaded_bytes: 1400 + total_bytes: 1400 +} + +2021-07-15 00:25:55,346 INFO MainThread:616592 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 4 +} +pusher_stats { + uploaded_bytes: 1400 + total_bytes: 11437 +} + +2021-07-15 00:25:55,448 INFO MainThread:616592 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 1400 + total_bytes: 11439 +} + +2021-07-15 00:25:55,551 INFO MainThread:616592 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 11439 + total_bytes: 11439 +} + +2021-07-15 00:25:55,653 INFO MainThread:616592 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 11439 + total_bytes: 11439 +} + +2021-07-15 00:25:55,755 INFO MainThread:616592 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 11439 + total_bytes: 11439 +} + +2021-07-15 00:25:55,857 INFO MainThread:616592 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 11439 + total_bytes: 11439 +} + +2021-07-15 00:25:55,959 INFO MainThread:616592 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 11439 + total_bytes: 11439 +} + +2021-07-15 00:25:56,061 INFO MainThread:616592 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 11439 + total_bytes: 11439 +} + +2021-07-15 00:25:56,355 INFO MainThread:616592 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 11439 + total_bytes: 11439 +} + +2021-07-15 00:25:56,457 INFO MainThread:616592 [wandb_run.py:_wait_for_finish():1715] got exit ret: done: true +exit_result { +} +file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 11439 + total_bytes: 11439 +} + +2021-07-15 00:25:57,756 INFO MainThread:616592 [wandb_run.py:_show_files():1937] logging synced files diff --git a/wandb/run-20210715_001736-u185degl/run-u185degl.wandb b/wandb/run-20210715_001736-u185degl/run-u185degl.wandb new file mode 100644 index 0000000000000000000000000000000000000000..5f92f8e6a9db0ffc0162c30cb3693ea927c10a26 Binary files /dev/null and b/wandb/run-20210715_001736-u185degl/run-u185degl.wandb differ diff --git a/wandb/run-20210715_002810-22keyfxg/files/config.yaml b/wandb/run-20210715_002810-22keyfxg/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8a752ebc393a29870269c2a6ecbb3fd47fcea98f --- /dev/null +++ b/wandb/run-20210715_002810-22keyfxg/files/config.yaml @@ -0,0 +1,307 @@ +wandb_version: 1 + +_wandb: + desc: null + value: + cli_version: 0.10.33 + framework: huggingface + huggingface_version: 4.9.0.dev0 + is_jupyter_run: false + is_kaggle_kernel: false + python_version: 3.8.10 + t: + 1: + - 3 + - 11 + 2: + - 3 + - 11 + 4: 3.8.10 + 5: 0.10.33 + 6: 4.9.0.dev0 + 8: + - 5 +adafactor: + desc: null + value: false +adam_beta1: + desc: null + value: 0.9 +adam_beta2: + desc: null + value: 0.98 +adam_epsilon: + desc: null + value: 1.0e-08 +cache_dir: + desc: null + value: null +config_name: + desc: null + value: ./ +dataloader_drop_last: + desc: null + value: false +dataloader_num_workers: + desc: null + value: 0 +dataloader_pin_memory: + desc: null + value: true +dataset_config_name: + desc: null + value: null +dataset_name: + desc: null + value: null +ddp_find_unused_parameters: + desc: null + value: null +debug: + desc: null + value: [] +deepspeed: + desc: null + value: null +disable_tqdm: + desc: null + value: false +do_eval: + desc: null + value: false +do_predict: + desc: null + value: false +do_train: + desc: null + value: false +dtype: + desc: null + value: float32 +eval_accumulation_steps: + desc: null + value: null +eval_steps: + desc: null + value: 20000 +evaluation_strategy: + desc: null + value: IntervalStrategy.NO +fp16: + desc: null + value: false +fp16_backend: + desc: null + value: auto +fp16_full_eval: + desc: null + value: false +fp16_opt_level: + desc: null + value: O1 +gradient_accumulation_steps: + desc: null + value: 4 +greater_is_better: + desc: null + value: null +group_by_length: + desc: null + value: false +ignore_data_skip: + desc: null + value: false +label_names: + desc: null + value: null +label_smoothing_factor: + desc: null + value: 0.0 +learning_rate: + desc: null + value: 5.0e-05 +length_column_name: + desc: null + value: length +line_by_line: + desc: null + value: false +load_best_model_at_end: + desc: null + value: false +local_rank: + desc: null + value: -1 +log_level: + desc: null + value: -1 +log_level_replica: + desc: null + value: -1 +log_on_each_node: + desc: null + value: true +logging_dir: + desc: null + value: ./runs/Jul15_00-28-01_t1v-n-f5c06ea1-w-0 +logging_first_step: + desc: null + value: false +logging_steps: + desc: null + value: 25 +logging_strategy: + desc: null + value: IntervalStrategy.STEPS +lr_scheduler_type: + desc: null + value: SchedulerType.LINEAR +max_eval_samples: + desc: null + value: 2000 +max_grad_norm: + desc: null + value: 1.0 +max_seq_length: + desc: null + value: 4096 +max_steps: + desc: null + value: -1 +metric_for_best_model: + desc: null + value: null +mlm_probability: + desc: null + value: 0.15 +model_name_or_path: + desc: null + value: null +model_type: + desc: null + value: big_bird +mp_parameters: + desc: null + value: '' +no_cuda: + desc: null + value: false +num_train_epochs: + desc: null + value: 5.0 +output_dir: + desc: null + value: ./ +overwrite_cache: + desc: null + value: false +overwrite_output_dir: + desc: null + value: true +pad_to_max_length: + desc: null + value: false +past_index: + desc: null + value: -1 +per_device_eval_batch_size: + desc: null + value: 2 +per_device_train_batch_size: + desc: null + value: 2 +per_gpu_eval_batch_size: + desc: null + value: null +per_gpu_train_batch_size: + desc: null + value: null +prediction_loss_only: + desc: null + value: false +preprocessing_num_workers: + desc: null + value: 96 +push_to_hub: + desc: null + value: true +push_to_hub_model_id: + desc: null + value: '' +push_to_hub_organization: + desc: null + value: null +push_to_hub_token: + desc: null + value: null +remove_unused_columns: + desc: null + value: true +report_to: + desc: null + value: + - tensorboard + - wandb +resume_from_checkpoint: + desc: null + value: null +run_name: + desc: null + value: ./ +save_on_each_node: + desc: null + value: false +save_optimizer: + desc: null + value: true +save_steps: + desc: null + value: 20000 +save_strategy: + desc: null + value: IntervalStrategy.STEPS +save_total_limit: + desc: null + value: 5 +seed: + desc: null + value: 42 +sharded_ddp: + desc: null + value: [] +skip_memory_metrics: + desc: null + value: true +tokenizer_name: + desc: null + value: ./ +tpu_metrics_debug: + desc: null + value: false +tpu_num_cores: + desc: null + value: null +train_ref_file: + desc: null + value: null +use_fast_tokenizer: + desc: null + value: true +use_legacy_prediction_loop: + desc: null + value: false +validation_ref_file: + desc: null + value: null +validation_split_percentage: + desc: null + value: 5 +warmup_ratio: + desc: null + value: 0.0 +warmup_steps: + desc: null + value: 5000 +weight_decay: + desc: null + value: 0.0095 diff --git a/wandb/run-20210715_002810-22keyfxg/files/output.log b/wandb/run-20210715_002810-22keyfxg/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..18162f572099a75f895d07904b9b05a278546ea3 --- /dev/null +++ b/wandb/run-20210715_002810-22keyfxg/files/output.log @@ -0,0 +1,39 @@ +/home/dat/pino/lib/python3.8/site-packages/jax/_src/numpy/lax_numpy.py:3132: UserWarning: Explicitly requested dtype requested in zeros is not available, and will be truncated to dtype int32. To enable more dtypes, set the jax_enable_x64 configuration option or the JAX_ENABLE_X64 shell environment variable. See https://github.com/google/jax#current-gotchas for more. + lax._check_user_dtype_supported(dtype, "zeros") +/home/dat/pino/lib/python3.8/site-packages/jax/lib/xla_bridge.py:386: UserWarning: jax.host_count has been renamed to jax.process_count. This alias will eventually be removed; please update your code. + warnings.warn( +/home/dat/pino/lib/python3.8/site-packages/jax/lib/xla_bridge.py:373: UserWarning: jax.host_id has been renamed to jax.process_index. This alias will eventually be removed; please update your code. + warnings.warn( +Epoch ... (1/5): 0%| | 0/5 [00:00 + state, train_metric, dropout_rngs = p_train_step(state, model_inputs, dropout_rngs) + File "/home/dat/pino/lib/python3.8/site-packages/jax/_src/traceback_util.py", line 183, in reraise_with_filtered_traceback + return fun(*args, **kwargs) + File "/home/dat/pino/lib/python3.8/site-packages/jax/_src/api.py", line 1669, in f_pmapped + out = pxla.xla_pmap( + File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 1620, in bind + return call_bind(self, fun, *args, **params) + File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 1551, in call_bind + outs = primitive.process(top_trace, fun, tracers, params) + File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 1623, in process + return trace.process_map(self, fun, tracers, params) + File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 606, in process_call + return primitive.impl(f, *tracers, **params) + File "/home/dat/pino/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 637, in xla_pmap_impl + return compiled_fun(*args) + File "/home/dat/pino/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 1159, in execute_replicated + out_bufs = compiled.execute_sharded_on_local_devices(input_bufs) +jax._src.traceback_util.UnfilteredStackTrace: RuntimeError: Resource exhausted: Attempting to reserve 12.60G at the bottom of memory. That was not possible. There are 7.40G free, 0B reserved, and 7.37G reservable.: while running replica 0 and partition 0 of a replicated computation (other replicas may have failed as well). +The stack trace below excludes JAX-internal frames. +The preceding is the original exception that occurred, unmodified. +-------------------- +The above exception was the direct cause of the following exception: +Traceback (most recent call last): + File "./run_mlm_flax.py", line 815, in + state, train_metric, dropout_rngs = p_train_step(state, model_inputs, dropout_rngs) + File "/home/dat/pino/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 1159, in execute_replicated + out_bufs = compiled.execute_sharded_on_local_devices(input_bufs) +RuntimeError: Resource exhausted: Attempting to reserve 12.60G at the bottom of memory. That was not possible. There are 7.40G free, 0B reserved, and 7.37G reservable.: while running replica 0 and partition 0 of a replicated computation (other replicas may have failed as well). \ No newline at end of file diff --git a/wandb/run-20210715_002810-22keyfxg/files/requirements.txt b/wandb/run-20210715_002810-22keyfxg/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..554e3a34bc91714a8462f65295a12e9a04537637 --- /dev/null +++ b/wandb/run-20210715_002810-22keyfxg/files/requirements.txt @@ -0,0 +1,94 @@ +absl-py==0.13.0 +aiohttp==3.7.4.post0 +astunparse==1.6.3 +async-timeout==3.0.1 +attrs==21.2.0 +cachetools==4.2.2 +certifi==2021.5.30 +chardet==4.0.0 +charset-normalizer==2.0.1 +chex==0.0.8 +click==8.0.1 +configparser==5.0.2 +cycler==0.10.0 +datasets==1.9.1.dev0 +dill==0.3.4 +dm-tree==0.1.6 +docker-pycreds==0.4.0 +filelock==3.0.12 +flatbuffers==1.12 +flax==0.3.4 +fsspec==2021.7.0 +gast==0.4.0 +gitdb==4.0.7 +gitpython==3.1.18 +google-auth-oauthlib==0.4.4 +google-auth==1.32.1 +google-pasta==0.2.0 +grpcio==1.34.1 +h5py==3.1.0 +huggingface-hub==0.0.12 +idna==3.2 +install==1.3.4 +jax==0.2.17 +jaxlib==0.1.68 +joblib==1.0.1 +keras-nightly==2.5.0.dev2021032900 +keras-preprocessing==1.1.2 +kiwisolver==1.3.1 +libtpu-nightly==0.1.dev20210615 +markdown==3.3.4 +matplotlib==3.4.2 +msgpack==1.0.2 +multidict==5.1.0 +multiprocess==0.70.12.2 +numpy==1.19.5 +oauthlib==3.1.1 +opt-einsum==3.3.0 +optax==0.0.9 +packaging==21.0 +pandas==1.3.0 +pathtools==0.1.2 +pillow==8.3.1 +pip==20.0.2 +pkg-resources==0.0.0 +promise==2.3 +protobuf==3.17.3 +psutil==5.8.0 +pyarrow==4.0.1 +pyasn1-modules==0.2.8 +pyasn1==0.4.8 +pyparsing==2.4.7 +python-dateutil==2.8.1 +pytz==2021.1 +pyyaml==5.4.1 +regex==2021.7.6 +requests-oauthlib==1.3.0 +requests==2.26.0 +rsa==4.7.2 +sacremoses==0.0.45 +scipy==1.7.0 +sentry-sdk==1.3.0 +setuptools==44.0.0 +shortuuid==1.0.1 +six==1.15.0 +smmap==4.0.0 +subprocess32==3.5.4 +tensorboard-data-server==0.6.1 +tensorboard-plugin-wit==1.8.0 +tensorboard==2.5.0 +tensorflow-estimator==2.5.0 +tensorflow==2.5.0 +termcolor==1.1.0 +tokenizers==0.10.3 +toolz==0.11.1 +tqdm==4.61.2 +transformers==4.9.0.dev0 +typing-extensions==3.7.4.3 +urllib3==1.26.6 +wandb==0.10.33 +werkzeug==2.0.1 +wheel==0.36.2 +wrapt==1.12.1 +xxhash==2.0.2 +yarl==1.6.3 \ No newline at end of file diff --git a/wandb/run-20210715_002810-22keyfxg/files/wandb-metadata.json b/wandb/run-20210715_002810-22keyfxg/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..a3901f3275f3236a447dc402b6f10c2d2519f6ad --- /dev/null +++ b/wandb/run-20210715_002810-22keyfxg/files/wandb-metadata.json @@ -0,0 +1,45 @@ +{ + "os": "Linux-5.4.0-1043-gcp-x86_64-with-glibc2.29", + "python": "3.8.10", + "heartbeatAt": "2021-07-15T00:28:12.177468", + "startedAt": "2021-07-15T00:28:10.178861", + "docker": null, + "cpu_count": 96, + "cuda": null, + "args": [ + "--push_to_hub", + "--output_dir=./", + "--model_type=big_bird", + "--config_name=./", + "--tokenizer_name=./", + "--max_seq_length=4096", + "--weight_decay=0.0095", + "--warmup_steps=5000", + "--overwrite_output_dir", + "--adam_beta1=0.9", + "--adam_beta2=0.98", + "--logging_steps=25", + "--eval_steps=20000", + "--num_train_epochs=5", + "--preprocessing_num_workers=96", + "--save_steps=20000", + "--learning_rate=5e-5", + "--per_device_train_batch_size=2", + "--per_device_eval_batch_size=2", + "--save_total_limit=5", + "--max_eval_samples=2000", + "--gradient_accumulation_steps=4" + ], + "state": "running", + "program": "./run_mlm_flax.py", + "codePath": "run_mlm_flax.py", + "git": { + "remote": "https://huggingface.co/flax-community/pino-roberta-base", + "commit": "9915204ae8501d6ae8425aa8f0b4b290cbc7e5b6" + }, + "email": null, + "root": "/home/dat/pino-roberta-base", + "host": "t1v-n-f5c06ea1-w-0", + "username": "dat", + "executable": "/home/dat/pino/bin/python" +} diff --git a/wandb/run-20210715_002810-22keyfxg/files/wandb-summary.json b/wandb/run-20210715_002810-22keyfxg/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/wandb/run-20210715_002810-22keyfxg/files/wandb-summary.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/wandb/run-20210715_002810-22keyfxg/logs/debug-internal.log b/wandb/run-20210715_002810-22keyfxg/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..74336794bb29dfc828701aedcabbfb52cfdca6b0 --- /dev/null +++ b/wandb/run-20210715_002810-22keyfxg/logs/debug-internal.log @@ -0,0 +1,220 @@ +2021-07-15 00:28:10,840 INFO MainThread:620043 [internal.py:wandb_internal():88] W&B internal server running at pid: 620043, started at: 2021-07-15 00:28:10.839711 +2021-07-15 00:28:10,842 INFO WriterThread:620043 [datastore.py:open_for_write():80] open: /home/dat/pino-roberta-base/wandb/run-20210715_002810-22keyfxg/run-22keyfxg.wandb +2021-07-15 00:28:10,842 DEBUG HandlerThread:620043 [handler.py:handle_request():124] handle_request: check_version +2021-07-15 00:28:10,843 DEBUG SenderThread:620043 [sender.py:send():179] send: header +2021-07-15 00:28:10,843 DEBUG SenderThread:620043 [sender.py:send_request():193] send_request: check_version +2021-07-15 00:28:10,882 DEBUG SenderThread:620043 [sender.py:send():179] send: run +2021-07-15 00:28:11,065 INFO SenderThread:620043 [dir_watcher.py:__init__():168] watching files in: /home/dat/pino-roberta-base/wandb/run-20210715_002810-22keyfxg/files +2021-07-15 00:28:11,065 INFO SenderThread:620043 [sender.py:_start_run_threads():716] run started: 22keyfxg with start time 1626308890 +2021-07-15 00:28:11,065 DEBUG SenderThread:620043 [sender.py:send():179] send: summary +2021-07-15 00:28:11,065 DEBUG HandlerThread:620043 [handler.py:handle_request():124] handle_request: run_start +2021-07-15 00:28:11,066 INFO SenderThread:620043 [sender.py:_save_file():841] saving file wandb-summary.json with policy end +2021-07-15 00:28:12,069 INFO Thread-8 :620043 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210715_002810-22keyfxg/files/wandb-summary.json +2021-07-15 00:28:12,177 DEBUG HandlerThread:620043 [meta.py:__init__():39] meta init +2021-07-15 00:28:12,177 DEBUG HandlerThread:620043 [meta.py:__init__():53] meta init done +2021-07-15 00:28:12,177 DEBUG HandlerThread:620043 [meta.py:probe():210] probe +2021-07-15 00:28:12,178 DEBUG HandlerThread:620043 [meta.py:_setup_git():200] setup git +2021-07-15 00:28:12,205 DEBUG HandlerThread:620043 [meta.py:_setup_git():207] setup git done +2021-07-15 00:28:12,205 DEBUG HandlerThread:620043 [meta.py:_save_pip():57] save pip +2021-07-15 00:28:12,206 DEBUG HandlerThread:620043 [meta.py:_save_pip():71] save pip done +2021-07-15 00:28:12,206 DEBUG HandlerThread:620043 [meta.py:probe():252] probe done +2021-07-15 00:28:12,209 DEBUG SenderThread:620043 [sender.py:send():179] send: files +2021-07-15 00:28:12,209 INFO SenderThread:620043 [sender.py:_save_file():841] saving file wandb-metadata.json with policy now +2021-07-15 00:28:12,217 DEBUG HandlerThread:620043 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:28:12,217 DEBUG SenderThread:620043 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:28:12,363 DEBUG SenderThread:620043 [sender.py:send():179] send: config +2021-07-15 00:28:12,363 DEBUG SenderThread:620043 [sender.py:send():179] send: config +2021-07-15 00:28:12,364 DEBUG SenderThread:620043 [sender.py:send():179] send: config +2021-07-15 00:28:12,702 INFO Thread-11 :620043 [upload_job.py:push():137] Uploaded file /tmp/tmpkt2gzisdwandb/opsxvzfj-wandb-metadata.json +2021-07-15 00:28:13,068 INFO Thread-8 :620043 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210715_002810-22keyfxg/files/requirements.txt +2021-07-15 00:28:13,068 INFO Thread-8 :620043 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210715_002810-22keyfxg/files/output.log +2021-07-15 00:28:13,069 INFO Thread-8 :620043 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210715_002810-22keyfxg/files/wandb-metadata.json +2021-07-15 00:28:27,074 INFO Thread-8 :620043 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_002810-22keyfxg/files/output.log +2021-07-15 00:28:27,365 DEBUG HandlerThread:620043 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:28:27,366 DEBUG SenderThread:620043 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:28:40,252 DEBUG SenderThread:620043 [sender.py:send():179] send: stats +2021-07-15 00:28:42,080 INFO Thread-8 :620043 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_002810-22keyfxg/files/config.yaml +2021-07-15 00:28:42,496 DEBUG HandlerThread:620043 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:28:42,497 DEBUG SenderThread:620043 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:28:57,628 DEBUG HandlerThread:620043 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:28:57,628 DEBUG SenderThread:620043 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:29:10,324 DEBUG SenderThread:620043 [sender.py:send():179] send: stats +2021-07-15 00:29:12,761 DEBUG HandlerThread:620043 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:29:12,762 DEBUG SenderThread:620043 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:29:27,895 DEBUG HandlerThread:620043 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:29:27,896 DEBUG SenderThread:620043 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:29:40,392 DEBUG SenderThread:620043 [sender.py:send():179] send: stats +2021-07-15 00:29:43,029 DEBUG HandlerThread:620043 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:29:43,029 DEBUG SenderThread:620043 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:29:58,161 DEBUG HandlerThread:620043 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:29:58,161 DEBUG SenderThread:620043 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:30:10,458 DEBUG SenderThread:620043 [sender.py:send():179] send: stats +2021-07-15 00:30:13,295 DEBUG HandlerThread:620043 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:30:13,296 DEBUG SenderThread:620043 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:30:28,431 DEBUG HandlerThread:620043 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:30:28,431 DEBUG SenderThread:620043 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:30:40,524 DEBUG SenderThread:620043 [sender.py:send():179] send: stats +2021-07-15 00:30:43,564 DEBUG HandlerThread:620043 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:30:43,564 DEBUG SenderThread:620043 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:30:58,695 DEBUG HandlerThread:620043 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:30:58,695 DEBUG SenderThread:620043 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:31:10,591 DEBUG SenderThread:620043 [sender.py:send():179] send: stats +2021-07-15 00:31:13,829 DEBUG HandlerThread:620043 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:31:13,829 DEBUG SenderThread:620043 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:31:28,959 DEBUG HandlerThread:620043 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:31:28,960 DEBUG SenderThread:620043 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:31:40,658 DEBUG SenderThread:620043 [sender.py:send():179] send: stats +2021-07-15 00:31:44,094 DEBUG HandlerThread:620043 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:31:44,095 DEBUG SenderThread:620043 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:31:59,226 DEBUG HandlerThread:620043 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:31:59,227 DEBUG SenderThread:620043 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:32:10,721 DEBUG SenderThread:620043 [sender.py:send():179] send: stats +2021-07-15 00:32:14,364 DEBUG HandlerThread:620043 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:32:14,364 DEBUG SenderThread:620043 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:32:29,496 DEBUG HandlerThread:620043 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:32:29,497 DEBUG SenderThread:620043 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:32:40,792 DEBUG SenderThread:620043 [sender.py:send():179] send: stats +2021-07-15 00:32:44,631 DEBUG HandlerThread:620043 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:32:44,632 DEBUG SenderThread:620043 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:32:59,762 DEBUG HandlerThread:620043 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:32:59,762 DEBUG SenderThread:620043 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:33:10,860 DEBUG SenderThread:620043 [sender.py:send():179] send: stats +2021-07-15 00:33:14,895 DEBUG HandlerThread:620043 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:33:14,895 DEBUG SenderThread:620043 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:33:30,027 DEBUG HandlerThread:620043 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:33:30,027 DEBUG SenderThread:620043 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:33:40,933 DEBUG SenderThread:620043 [sender.py:send():179] send: stats +2021-07-15 00:33:45,162 DEBUG HandlerThread:620043 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:33:45,162 DEBUG SenderThread:620043 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:34:00,292 DEBUG HandlerThread:620043 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:34:00,293 DEBUG SenderThread:620043 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:34:11,005 DEBUG SenderThread:620043 [sender.py:send():179] send: stats +2021-07-15 00:34:15,424 DEBUG HandlerThread:620043 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:34:15,425 DEBUG SenderThread:620043 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:34:30,556 DEBUG HandlerThread:620043 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:34:30,556 DEBUG SenderThread:620043 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:34:41,080 DEBUG SenderThread:620043 [sender.py:send():179] send: stats +2021-07-15 00:34:45,687 DEBUG HandlerThread:620043 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:34:45,688 DEBUG SenderThread:620043 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:34:57,238 INFO Thread-8 :620043 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_002810-22keyfxg/files/output.log +2021-07-15 00:35:00,957 DEBUG HandlerThread:620043 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:35:00,957 DEBUG SenderThread:620043 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:35:11,161 DEBUG SenderThread:620043 [sender.py:send():179] send: stats +2021-07-15 00:35:16,118 DEBUG HandlerThread:620043 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:35:16,119 DEBUG SenderThread:620043 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:35:31,255 DEBUG HandlerThread:620043 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:35:31,255 DEBUG SenderThread:620043 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:35:41,236 DEBUG SenderThread:620043 [sender.py:send():179] send: stats +2021-07-15 00:35:46,389 DEBUG HandlerThread:620043 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:35:46,390 DEBUG SenderThread:620043 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:36:01,522 DEBUG HandlerThread:620043 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:36:01,522 DEBUG SenderThread:620043 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:36:11,313 DEBUG SenderThread:620043 [sender.py:send():179] send: stats +2021-07-15 00:36:16,658 DEBUG HandlerThread:620043 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:36:16,658 DEBUG SenderThread:620043 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:36:19,273 INFO Thread-8 :620043 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_002810-22keyfxg/files/output.log +2021-07-15 00:36:20,774 DEBUG HandlerThread:620043 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 00:36:20,775 DEBUG SenderThread:620043 [sender.py:send():179] send: telemetry +2021-07-15 00:36:20,775 DEBUG SenderThread:620043 [sender.py:send():179] send: exit +2021-07-15 00:36:20,775 INFO SenderThread:620043 [sender.py:send_exit():287] handling exit code: 1 +2021-07-15 00:36:20,775 INFO SenderThread:620043 [sender.py:send_exit():295] send defer +2021-07-15 00:36:20,776 DEBUG SenderThread:620043 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 00:36:20,776 DEBUG HandlerThread:620043 [handler.py:handle_request():124] handle_request: defer +2021-07-15 00:36:20,776 INFO HandlerThread:620043 [handler.py:handle_request_defer():141] handle defer: 0 +2021-07-15 00:36:20,777 DEBUG SenderThread:620043 [sender.py:send_request():193] send_request: defer +2021-07-15 00:36:20,777 INFO SenderThread:620043 [sender.py:send_request_defer():304] handle sender defer: 0 +2021-07-15 00:36:20,777 INFO SenderThread:620043 [sender.py:transition_state():308] send defer: 1 +2021-07-15 00:36:20,777 DEBUG HandlerThread:620043 [handler.py:handle_request():124] handle_request: defer +2021-07-15 00:36:20,777 INFO HandlerThread:620043 [handler.py:handle_request_defer():141] handle defer: 1 +2021-07-15 00:36:20,837 DEBUG SenderThread:620043 [sender.py:send_request():193] send_request: defer +2021-07-15 00:36:20,837 INFO SenderThread:620043 [sender.py:send_request_defer():304] handle sender defer: 1 +2021-07-15 00:36:20,837 INFO SenderThread:620043 [sender.py:transition_state():308] send defer: 2 +2021-07-15 00:36:20,837 DEBUG SenderThread:620043 [sender.py:send():179] send: stats +2021-07-15 00:36:20,843 DEBUG HandlerThread:620043 [handler.py:handle_request():124] handle_request: defer +2021-07-15 00:36:20,843 INFO HandlerThread:620043 [handler.py:handle_request_defer():141] handle defer: 2 +2021-07-15 00:36:20,843 DEBUG SenderThread:620043 [sender.py:send_request():193] send_request: defer +2021-07-15 00:36:20,844 INFO SenderThread:620043 [sender.py:send_request_defer():304] handle sender defer: 2 +2021-07-15 00:36:20,844 INFO SenderThread:620043 [sender.py:transition_state():308] send defer: 3 +2021-07-15 00:36:20,844 DEBUG HandlerThread:620043 [handler.py:handle_request():124] handle_request: defer +2021-07-15 00:36:20,844 INFO HandlerThread:620043 [handler.py:handle_request_defer():141] handle defer: 3 +2021-07-15 00:36:20,844 DEBUG SenderThread:620043 [sender.py:send():179] send: summary +2021-07-15 00:36:20,845 INFO SenderThread:620043 [sender.py:_save_file():841] saving file wandb-summary.json with policy end +2021-07-15 00:36:20,846 DEBUG SenderThread:620043 [sender.py:send_request():193] send_request: defer +2021-07-15 00:36:20,846 INFO SenderThread:620043 [sender.py:send_request_defer():304] handle sender defer: 3 +2021-07-15 00:36:20,846 INFO SenderThread:620043 [sender.py:transition_state():308] send defer: 4 +2021-07-15 00:36:20,847 DEBUG HandlerThread:620043 [handler.py:handle_request():124] handle_request: defer +2021-07-15 00:36:20,847 INFO HandlerThread:620043 [handler.py:handle_request_defer():141] handle defer: 4 +2021-07-15 00:36:20,847 DEBUG SenderThread:620043 [sender.py:send_request():193] send_request: defer +2021-07-15 00:36:20,847 INFO SenderThread:620043 [sender.py:send_request_defer():304] handle sender defer: 4 +2021-07-15 00:36:20,878 DEBUG HandlerThread:620043 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 00:36:21,021 INFO SenderThread:620043 [sender.py:transition_state():308] send defer: 5 +2021-07-15 00:36:21,021 DEBUG SenderThread:620043 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 00:36:21,022 DEBUG HandlerThread:620043 [handler.py:handle_request():124] handle_request: defer +2021-07-15 00:36:21,022 INFO HandlerThread:620043 [handler.py:handle_request_defer():141] handle defer: 5 +2021-07-15 00:36:21,022 DEBUG SenderThread:620043 [sender.py:send_request():193] send_request: defer +2021-07-15 00:36:21,022 INFO SenderThread:620043 [sender.py:send_request_defer():304] handle sender defer: 5 +2021-07-15 00:36:21,023 INFO SenderThread:620043 [dir_watcher.py:finish():282] shutting down directory watcher +2021-07-15 00:36:21,124 DEBUG HandlerThread:620043 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 00:36:21,275 INFO SenderThread:620043 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_002810-22keyfxg/files/output.log +2021-07-15 00:36:21,275 INFO SenderThread:620043 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_002810-22keyfxg/files/config.yaml +2021-07-15 00:36:21,275 INFO SenderThread:620043 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_002810-22keyfxg/files/wandb-summary.json +2021-07-15 00:36:21,275 INFO SenderThread:620043 [dir_watcher.py:finish():312] scan: /home/dat/pino-roberta-base/wandb/run-20210715_002810-22keyfxg/files +2021-07-15 00:36:21,276 INFO SenderThread:620043 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_002810-22keyfxg/files/requirements.txt requirements.txt +2021-07-15 00:36:21,276 INFO SenderThread:620043 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_002810-22keyfxg/files/output.log output.log +2021-07-15 00:36:21,276 INFO SenderThread:620043 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_002810-22keyfxg/files/wandb-metadata.json wandb-metadata.json +2021-07-15 00:36:21,276 INFO SenderThread:620043 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_002810-22keyfxg/files/config.yaml config.yaml +2021-07-15 00:36:21,280 INFO SenderThread:620043 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_002810-22keyfxg/files/wandb-summary.json wandb-summary.json +2021-07-15 00:36:21,281 INFO SenderThread:620043 [sender.py:transition_state():308] send defer: 6 +2021-07-15 00:36:21,281 DEBUG SenderThread:620043 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 00:36:21,284 DEBUG HandlerThread:620043 [handler.py:handle_request():124] handle_request: defer +2021-07-15 00:36:21,284 INFO HandlerThread:620043 [handler.py:handle_request_defer():141] handle defer: 6 +2021-07-15 00:36:21,288 DEBUG SenderThread:620043 [sender.py:send_request():193] send_request: defer +2021-07-15 00:36:21,288 INFO SenderThread:620043 [sender.py:send_request_defer():304] handle sender defer: 6 +2021-07-15 00:36:21,288 INFO SenderThread:620043 [file_pusher.py:finish():177] shutting down file pusher +2021-07-15 00:36:21,386 DEBUG HandlerThread:620043 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 00:36:21,386 DEBUG SenderThread:620043 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 00:36:21,488 DEBUG HandlerThread:620043 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 00:36:21,488 DEBUG SenderThread:620043 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 00:36:21,590 DEBUG HandlerThread:620043 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 00:36:21,590 DEBUG SenderThread:620043 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 00:36:21,691 DEBUG HandlerThread:620043 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 00:36:21,692 DEBUG SenderThread:620043 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 00:36:21,711 INFO Thread-12 :620043 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210715_002810-22keyfxg/files/requirements.txt +2021-07-15 00:36:21,719 INFO Thread-13 :620043 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210715_002810-22keyfxg/files/output.log +2021-07-15 00:36:21,731 INFO Thread-14 :620043 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210715_002810-22keyfxg/files/config.yaml +2021-07-15 00:36:21,759 INFO Thread-15 :620043 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210715_002810-22keyfxg/files/wandb-summary.json +2021-07-15 00:36:21,793 DEBUG HandlerThread:620043 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 00:36:21,793 DEBUG SenderThread:620043 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 00:36:21,895 DEBUG HandlerThread:620043 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 00:36:21,895 DEBUG SenderThread:620043 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 00:36:21,959 INFO Thread-7 :620043 [sender.py:transition_state():308] send defer: 7 +2021-07-15 00:36:21,960 DEBUG HandlerThread:620043 [handler.py:handle_request():124] handle_request: defer +2021-07-15 00:36:21,960 INFO HandlerThread:620043 [handler.py:handle_request_defer():141] handle defer: 7 +2021-07-15 00:36:21,960 DEBUG SenderThread:620043 [sender.py:send_request():193] send_request: defer +2021-07-15 00:36:21,960 INFO SenderThread:620043 [sender.py:send_request_defer():304] handle sender defer: 7 +2021-07-15 00:36:21,997 DEBUG HandlerThread:620043 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 00:36:22,240 INFO SenderThread:620043 [sender.py:transition_state():308] send defer: 8 +2021-07-15 00:36:22,241 DEBUG SenderThread:620043 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 00:36:22,241 DEBUG HandlerThread:620043 [handler.py:handle_request():124] handle_request: defer +2021-07-15 00:36:22,241 INFO HandlerThread:620043 [handler.py:handle_request_defer():141] handle defer: 8 +2021-07-15 00:36:22,242 DEBUG SenderThread:620043 [sender.py:send_request():193] send_request: defer +2021-07-15 00:36:22,242 INFO SenderThread:620043 [sender.py:send_request_defer():304] handle sender defer: 8 +2021-07-15 00:36:22,242 INFO SenderThread:620043 [sender.py:transition_state():308] send defer: 9 +2021-07-15 00:36:22,242 DEBUG HandlerThread:620043 [handler.py:handle_request():124] handle_request: defer +2021-07-15 00:36:22,242 INFO HandlerThread:620043 [handler.py:handle_request_defer():141] handle defer: 9 +2021-07-15 00:36:22,243 DEBUG SenderThread:620043 [sender.py:send():179] send: final +2021-07-15 00:36:22,243 DEBUG SenderThread:620043 [sender.py:send():179] send: footer +2021-07-15 00:36:22,243 DEBUG SenderThread:620043 [sender.py:send_request():193] send_request: defer +2021-07-15 00:36:22,243 INFO SenderThread:620043 [sender.py:send_request_defer():304] handle sender defer: 9 +2021-07-15 00:36:22,342 DEBUG HandlerThread:620043 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 00:36:22,343 DEBUG SenderThread:620043 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 00:36:22,343 INFO SenderThread:620043 [file_pusher.py:join():182] waiting for file pusher +2021-07-15 00:36:22,344 DEBUG HandlerThread:620043 [handler.py:handle_request():124] handle_request: get_summary +2021-07-15 00:36:22,345 DEBUG HandlerThread:620043 [handler.py:handle_request():124] handle_request: sampled_history +2021-07-15 00:36:22,345 DEBUG HandlerThread:620043 [handler.py:handle_request():124] handle_request: shutdown +2021-07-15 00:36:22,345 INFO HandlerThread:620043 [handler.py:finish():638] shutting down handler +2021-07-15 00:36:23,243 INFO WriterThread:620043 [datastore.py:close():288] close: /home/dat/pino-roberta-base/wandb/run-20210715_002810-22keyfxg/run-22keyfxg.wandb +2021-07-15 00:36:23,343 INFO SenderThread:620043 [sender.py:finish():945] shutting down sender +2021-07-15 00:36:23,344 INFO SenderThread:620043 [file_pusher.py:finish():177] shutting down file pusher +2021-07-15 00:36:23,344 INFO SenderThread:620043 [file_pusher.py:join():182] waiting for file pusher +2021-07-15 00:36:23,346 INFO MainThread:620043 [internal.py:handle_exit():78] Internal process exited diff --git a/wandb/run-20210715_002810-22keyfxg/logs/debug.log b/wandb/run-20210715_002810-22keyfxg/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..213c737ed8bf9855d0ad539fd97edc7adabbabe0 --- /dev/null +++ b/wandb/run-20210715_002810-22keyfxg/logs/debug.log @@ -0,0 +1,119 @@ +2021-07-15 00:28:10,180 INFO MainThread:618785 [wandb_setup.py:_flush():69] setting env: {} +2021-07-15 00:28:10,180 INFO MainThread:618785 [wandb_setup.py:_flush():69] setting login settings: {} +2021-07-15 00:28:10,180 INFO MainThread:618785 [wandb_init.py:_log_setup():337] Logging user logs to /home/dat/pino-roberta-base/wandb/run-20210715_002810-22keyfxg/logs/debug.log +2021-07-15 00:28:10,180 INFO MainThread:618785 [wandb_init.py:_log_setup():338] Logging internal logs to /home/dat/pino-roberta-base/wandb/run-20210715_002810-22keyfxg/logs/debug-internal.log +2021-07-15 00:28:10,181 INFO MainThread:618785 [wandb_init.py:init():370] calling init triggers +2021-07-15 00:28:10,181 INFO MainThread:618785 [wandb_init.py:init():375] wandb.init called with sweep_config: {} +config: {} +2021-07-15 00:28:10,181 INFO MainThread:618785 [wandb_init.py:init():419] starting backend +2021-07-15 00:28:10,181 INFO MainThread:618785 [backend.py:_multiprocessing_setup():70] multiprocessing start_methods=fork,spawn,forkserver, using: spawn +2021-07-15 00:28:10,222 INFO MainThread:618785 [backend.py:ensure_launched():135] starting backend process... +2021-07-15 00:28:10,262 INFO MainThread:618785 [backend.py:ensure_launched():139] started backend process with pid: 620043 +2021-07-15 00:28:10,264 INFO MainThread:618785 [wandb_init.py:init():424] backend started and connected +2021-07-15 00:28:10,267 INFO MainThread:618785 [wandb_init.py:init():472] updated telemetry +2021-07-15 00:28:10,268 INFO MainThread:618785 [wandb_init.py:init():491] communicating current version +2021-07-15 00:28:10,881 INFO MainThread:618785 [wandb_init.py:init():496] got version response +2021-07-15 00:28:10,881 INFO MainThread:618785 [wandb_init.py:init():504] communicating run to backend with 30 second timeout +2021-07-15 00:28:11,064 INFO MainThread:618785 [wandb_init.py:init():529] starting run threads in backend +2021-07-15 00:28:12,212 INFO MainThread:618785 [wandb_run.py:_console_start():1623] atexit reg +2021-07-15 00:28:12,213 INFO MainThread:618785 [wandb_run.py:_redirect():1497] redirect: SettingsConsole.REDIRECT +2021-07-15 00:28:12,214 INFO MainThread:618785 [wandb_run.py:_redirect():1502] Redirecting console. +2021-07-15 00:28:12,215 INFO MainThread:618785 [wandb_run.py:_redirect():1558] Redirects installed. +2021-07-15 00:28:12,215 INFO MainThread:618785 [wandb_init.py:init():554] run started, returning control to user process +2021-07-15 00:28:12,222 INFO MainThread:618785 [wandb_run.py:_config_callback():872] config_cb None None {'output_dir': './', 'overwrite_output_dir': True, 'do_train': False, 'do_eval': False, 'do_predict': False, 'evaluation_strategy': 'IntervalStrategy.NO', 'prediction_loss_only': False, 'per_device_train_batch_size': 2, 'per_device_eval_batch_size': 2, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 4, 'eval_accumulation_steps': None, 'learning_rate': 5e-05, 'weight_decay': 0.0095, 'adam_beta1': 0.9, 'adam_beta2': 0.98, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 5.0, 'max_steps': -1, 'lr_scheduler_type': 'SchedulerType.LINEAR', 'warmup_ratio': 0.0, 'warmup_steps': 5000, 'log_level': -1, 'log_level_replica': -1, 'log_on_each_node': True, 'logging_dir': './runs/Jul15_00-28-01_t1v-n-f5c06ea1-w-0', 'logging_strategy': 'IntervalStrategy.STEPS', 'logging_first_step': False, 'logging_steps': 25, 'save_strategy': 'IntervalStrategy.STEPS', 'save_steps': 20000, 'save_total_limit': 5, 'save_on_each_node': False, 'no_cuda': False, 'seed': 42, 'fp16': False, 'fp16_opt_level': 'O1', 'fp16_backend': 'auto', 'fp16_full_eval': False, 'local_rank': -1, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 20000, 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': './', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'sharded_ddp': [], 'deepspeed': None, 'label_smoothing_factor': 0.0, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'dataloader_pin_memory': True, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'push_to_hub_model_id': '', 'push_to_hub_organization': None, 'push_to_hub_token': None, 'mp_parameters': ''} +2021-07-15 00:28:12,223 INFO MainThread:618785 [wandb_run.py:_config_callback():872] config_cb None None {'model_name_or_path': None, 'model_type': 'big_bird', 'config_name': './', 'tokenizer_name': './', 'cache_dir': None, 'use_fast_tokenizer': True, 'dtype': 'float32', 'save_optimizer': True} +2021-07-15 00:28:12,225 INFO MainThread:618785 [wandb_run.py:_config_callback():872] config_cb None None {'dataset_name': None, 'dataset_config_name': None, 'train_ref_file': None, 'validation_ref_file': None, 'overwrite_cache': False, 'validation_split_percentage': 5, 'max_seq_length': 4096, 'preprocessing_num_workers': 96, 'mlm_probability': 0.15, 'pad_to_max_length': False, 'line_by_line': False, 'max_eval_samples': 2000} +2021-07-15 00:36:18,561 INFO MainThread:618785 [wandb_run.py:_atexit_cleanup():1593] got exitcode: 1 +2021-07-15 00:36:18,562 INFO MainThread:618785 [wandb_run.py:_restore():1565] restore +2021-07-15 00:36:20,777 INFO MainThread:618785 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 1 +} +pusher_stats { + uploaded_bytes: 1400 + total_bytes: 1400 +} + +2021-07-15 00:36:21,023 INFO MainThread:618785 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 1 +} +pusher_stats { + uploaded_bytes: 1400 + total_bytes: 1400 +} + +2021-07-15 00:36:21,285 INFO MainThread:618785 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 4 +} +pusher_stats { + uploaded_bytes: 1400 + total_bytes: 11560 +} + +2021-07-15 00:36:21,387 INFO MainThread:618785 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 1400 + total_bytes: 11562 +} + +2021-07-15 00:36:21,489 INFO MainThread:618785 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 11562 + total_bytes: 11562 +} + +2021-07-15 00:36:21,590 INFO MainThread:618785 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 11562 + total_bytes: 11562 +} + +2021-07-15 00:36:21,692 INFO MainThread:618785 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 11562 + total_bytes: 11562 +} + +2021-07-15 00:36:21,794 INFO MainThread:618785 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 11562 + total_bytes: 11562 +} + +2021-07-15 00:36:21,896 INFO MainThread:618785 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 11562 + total_bytes: 11562 +} + +2021-07-15 00:36:22,241 INFO MainThread:618785 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 11562 + total_bytes: 11562 +} + +2021-07-15 00:36:22,344 INFO MainThread:618785 [wandb_run.py:_wait_for_finish():1715] got exit ret: done: true +exit_result { +} +file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 11562 + total_bytes: 11562 +} + +2021-07-15 00:36:23,629 INFO MainThread:618785 [wandb_run.py:_show_files():1937] logging synced files diff --git a/wandb/run-20210715_002810-22keyfxg/run-22keyfxg.wandb b/wandb/run-20210715_002810-22keyfxg/run-22keyfxg.wandb new file mode 100644 index 0000000000000000000000000000000000000000..0141eb58bded6eed5bcc133cebd4720bf5f5746f Binary files /dev/null and b/wandb/run-20210715_002810-22keyfxg/run-22keyfxg.wandb differ diff --git a/wandb/run-20210715_003738-kj539nne/files/config.yaml b/wandb/run-20210715_003738-kj539nne/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d3a92db168703029911cae47c05d089d177a3f2f --- /dev/null +++ b/wandb/run-20210715_003738-kj539nne/files/config.yaml @@ -0,0 +1,307 @@ +wandb_version: 1 + +_wandb: + desc: null + value: + cli_version: 0.10.33 + framework: huggingface + huggingface_version: 4.9.0.dev0 + is_jupyter_run: false + is_kaggle_kernel: false + python_version: 3.8.10 + t: + 1: + - 3 + - 11 + 2: + - 3 + - 11 + 4: 3.8.10 + 5: 0.10.33 + 6: 4.9.0.dev0 + 8: + - 5 +adafactor: + desc: null + value: false +adam_beta1: + desc: null + value: 0.9 +adam_beta2: + desc: null + value: 0.98 +adam_epsilon: + desc: null + value: 1.0e-08 +cache_dir: + desc: null + value: null +config_name: + desc: null + value: ./ +dataloader_drop_last: + desc: null + value: false +dataloader_num_workers: + desc: null + value: 0 +dataloader_pin_memory: + desc: null + value: true +dataset_config_name: + desc: null + value: null +dataset_name: + desc: null + value: null +ddp_find_unused_parameters: + desc: null + value: null +debug: + desc: null + value: [] +deepspeed: + desc: null + value: null +disable_tqdm: + desc: null + value: false +do_eval: + desc: null + value: false +do_predict: + desc: null + value: false +do_train: + desc: null + value: false +dtype: + desc: null + value: float32 +eval_accumulation_steps: + desc: null + value: null +eval_steps: + desc: null + value: 20000 +evaluation_strategy: + desc: null + value: IntervalStrategy.NO +fp16: + desc: null + value: false +fp16_backend: + desc: null + value: auto +fp16_full_eval: + desc: null + value: false +fp16_opt_level: + desc: null + value: O1 +gradient_accumulation_steps: + desc: null + value: 4 +greater_is_better: + desc: null + value: null +group_by_length: + desc: null + value: false +ignore_data_skip: + desc: null + value: false +label_names: + desc: null + value: null +label_smoothing_factor: + desc: null + value: 0.0 +learning_rate: + desc: null + value: 5.0e-05 +length_column_name: + desc: null + value: length +line_by_line: + desc: null + value: false +load_best_model_at_end: + desc: null + value: false +local_rank: + desc: null + value: -1 +log_level: + desc: null + value: -1 +log_level_replica: + desc: null + value: -1 +log_on_each_node: + desc: null + value: true +logging_dir: + desc: null + value: ./runs/Jul15_00-37-30_t1v-n-f5c06ea1-w-0 +logging_first_step: + desc: null + value: false +logging_steps: + desc: null + value: 50 +logging_strategy: + desc: null + value: IntervalStrategy.STEPS +lr_scheduler_type: + desc: null + value: SchedulerType.LINEAR +max_eval_samples: + desc: null + value: 2000 +max_grad_norm: + desc: null + value: 1.0 +max_seq_length: + desc: null + value: 4096 +max_steps: + desc: null + value: -1 +metric_for_best_model: + desc: null + value: null +mlm_probability: + desc: null + value: 0.15 +model_name_or_path: + desc: null + value: null +model_type: + desc: null + value: big_bird +mp_parameters: + desc: null + value: '' +no_cuda: + desc: null + value: false +num_train_epochs: + desc: null + value: 5.0 +output_dir: + desc: null + value: ./ +overwrite_cache: + desc: null + value: false +overwrite_output_dir: + desc: null + value: true +pad_to_max_length: + desc: null + value: false +past_index: + desc: null + value: -1 +per_device_eval_batch_size: + desc: null + value: 1 +per_device_train_batch_size: + desc: null + value: 1 +per_gpu_eval_batch_size: + desc: null + value: null +per_gpu_train_batch_size: + desc: null + value: null +prediction_loss_only: + desc: null + value: false +preprocessing_num_workers: + desc: null + value: 96 +push_to_hub: + desc: null + value: true +push_to_hub_model_id: + desc: null + value: '' +push_to_hub_organization: + desc: null + value: null +push_to_hub_token: + desc: null + value: null +remove_unused_columns: + desc: null + value: true +report_to: + desc: null + value: + - tensorboard + - wandb +resume_from_checkpoint: + desc: null + value: null +run_name: + desc: null + value: ./ +save_on_each_node: + desc: null + value: false +save_optimizer: + desc: null + value: true +save_steps: + desc: null + value: 20000 +save_strategy: + desc: null + value: IntervalStrategy.STEPS +save_total_limit: + desc: null + value: 5 +seed: + desc: null + value: 42 +sharded_ddp: + desc: null + value: [] +skip_memory_metrics: + desc: null + value: true +tokenizer_name: + desc: null + value: ./ +tpu_metrics_debug: + desc: null + value: false +tpu_num_cores: + desc: null + value: null +train_ref_file: + desc: null + value: null +use_fast_tokenizer: + desc: null + value: true +use_legacy_prediction_loop: + desc: null + value: false +validation_ref_file: + desc: null + value: null +validation_split_percentage: + desc: null + value: 5 +warmup_ratio: + desc: null + value: 0.0 +warmup_steps: + desc: null + value: 5000 +weight_decay: + desc: null + value: 0.0095 diff --git a/wandb/run-20210715_003738-kj539nne/files/output.log b/wandb/run-20210715_003738-kj539nne/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..7247b0ca891aadaecca8ef469601d2ac030351b1 --- /dev/null +++ b/wandb/run-20210715_003738-kj539nne/files/output.log @@ -0,0 +1,34 @@ +/home/dat/pino/lib/python3.8/site-packages/jax/_src/numpy/lax_numpy.py:3132: UserWarning: Explicitly requested dtype requested in zeros is not available, and will be truncated to dtype int32. To enable more dtypes, set the jax_enable_x64 configuration option or the JAX_ENABLE_X64 shell environment variable. See https://github.com/google/jax#current-gotchas for more. + lax._check_user_dtype_supported(dtype, "zeros") +/home/dat/pino/lib/python3.8/site-packages/jax/lib/xla_bridge.py:386: UserWarning: jax.host_count has been renamed to jax.process_count. This alias will eventually be removed; please update your code. + warnings.warn( +/home/dat/pino/lib/python3.8/site-packages/jax/lib/xla_bridge.py:373: UserWarning: jax.host_id has been renamed to jax.process_index. This alias will eventually be removed; please update your code. + warnings.warn( +Epoch ... (1/5): 0%| | 0/5 [00:00 + train_batch_idx = generate_batch_splits(train_samples_idx, train_batch_size // grad_accum_steps) + File "./run_mlm_flax.py", line 263, in generate_batch_splits + batch_idx = np.split(samples_idx, sections_split) + File "<__array_function__ internals>", line 5, in split + File "/home/dat/pino/lib/python3.8/site-packages/numpy/lib/shape_base.py", line 874, in split + return array_split(ary, indices_or_sections, axis) + File "<__array_function__ internals>", line 5, in array_split + File "/home/dat/pino/lib/python3.8/site-packages/numpy/lib/shape_base.py", line 790, in array_split + sub_arys.append(_nx.swapaxes(sary[st:end], axis, 0)) + File "/home/dat/pino/lib/python3.8/site-packages/jax/_src/numpy/lax_numpy.py", line 5009, in _rewriting_take + return _gather(arr, treedef, static_idx, dynamic_idx, indices_are_sorted, + File "/home/dat/pino/lib/python3.8/site-packages/jax/_src/numpy/lax_numpy.py", line 5028, in _gather + y = lax.gather( + File "/home/dat/pino/lib/python3.8/site-packages/jax/_src/lax/lax.py", line 984, in gather + return gather_p.bind( + File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 264, in bind + out = top_trace.process_primitive(self, tracers, params) + File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 603, in process_primitive + return primitive.impl(*tracers, **params) + File "/home/dat/pino/lib/python3.8/site-packages/jax/interpreters/xla.py", line 249, in apply_primitive + return compiled_fun(*args) + File "/home/dat/pino/lib/python3.8/site-packages/jax/interpreters/xla.py", line 365, in _execute_compiled_primitive + out_bufs = compiled.execute(input_bufs) +RuntimeError: Resource exhausted: Attempting to allocate 17.0K. That was not possible. There are 48.0K free. Due to fragmentation, the largest contiguous region of free memory is 16.0K.; (0x0x0_HBM0) \ No newline at end of file diff --git a/wandb/run-20210715_003738-kj539nne/files/requirements.txt b/wandb/run-20210715_003738-kj539nne/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..554e3a34bc91714a8462f65295a12e9a04537637 --- /dev/null +++ b/wandb/run-20210715_003738-kj539nne/files/requirements.txt @@ -0,0 +1,94 @@ +absl-py==0.13.0 +aiohttp==3.7.4.post0 +astunparse==1.6.3 +async-timeout==3.0.1 +attrs==21.2.0 +cachetools==4.2.2 +certifi==2021.5.30 +chardet==4.0.0 +charset-normalizer==2.0.1 +chex==0.0.8 +click==8.0.1 +configparser==5.0.2 +cycler==0.10.0 +datasets==1.9.1.dev0 +dill==0.3.4 +dm-tree==0.1.6 +docker-pycreds==0.4.0 +filelock==3.0.12 +flatbuffers==1.12 +flax==0.3.4 +fsspec==2021.7.0 +gast==0.4.0 +gitdb==4.0.7 +gitpython==3.1.18 +google-auth-oauthlib==0.4.4 +google-auth==1.32.1 +google-pasta==0.2.0 +grpcio==1.34.1 +h5py==3.1.0 +huggingface-hub==0.0.12 +idna==3.2 +install==1.3.4 +jax==0.2.17 +jaxlib==0.1.68 +joblib==1.0.1 +keras-nightly==2.5.0.dev2021032900 +keras-preprocessing==1.1.2 +kiwisolver==1.3.1 +libtpu-nightly==0.1.dev20210615 +markdown==3.3.4 +matplotlib==3.4.2 +msgpack==1.0.2 +multidict==5.1.0 +multiprocess==0.70.12.2 +numpy==1.19.5 +oauthlib==3.1.1 +opt-einsum==3.3.0 +optax==0.0.9 +packaging==21.0 +pandas==1.3.0 +pathtools==0.1.2 +pillow==8.3.1 +pip==20.0.2 +pkg-resources==0.0.0 +promise==2.3 +protobuf==3.17.3 +psutil==5.8.0 +pyarrow==4.0.1 +pyasn1-modules==0.2.8 +pyasn1==0.4.8 +pyparsing==2.4.7 +python-dateutil==2.8.1 +pytz==2021.1 +pyyaml==5.4.1 +regex==2021.7.6 +requests-oauthlib==1.3.0 +requests==2.26.0 +rsa==4.7.2 +sacremoses==0.0.45 +scipy==1.7.0 +sentry-sdk==1.3.0 +setuptools==44.0.0 +shortuuid==1.0.1 +six==1.15.0 +smmap==4.0.0 +subprocess32==3.5.4 +tensorboard-data-server==0.6.1 +tensorboard-plugin-wit==1.8.0 +tensorboard==2.5.0 +tensorflow-estimator==2.5.0 +tensorflow==2.5.0 +termcolor==1.1.0 +tokenizers==0.10.3 +toolz==0.11.1 +tqdm==4.61.2 +transformers==4.9.0.dev0 +typing-extensions==3.7.4.3 +urllib3==1.26.6 +wandb==0.10.33 +werkzeug==2.0.1 +wheel==0.36.2 +wrapt==1.12.1 +xxhash==2.0.2 +yarl==1.6.3 \ No newline at end of file diff --git a/wandb/run-20210715_003738-kj539nne/files/wandb-metadata.json b/wandb/run-20210715_003738-kj539nne/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..41b84c65edbb33bb86d1a735252a38f9f2a267b1 --- /dev/null +++ b/wandb/run-20210715_003738-kj539nne/files/wandb-metadata.json @@ -0,0 +1,45 @@ +{ + "os": "Linux-5.4.0-1043-gcp-x86_64-with-glibc2.29", + "python": "3.8.10", + "heartbeatAt": "2021-07-15T00:37:40.759058", + "startedAt": "2021-07-15T00:37:38.666838", + "docker": null, + "cpu_count": 96, + "cuda": null, + "args": [ + "--push_to_hub", + "--output_dir=./", + "--model_type=big_bird", + "--config_name=./", + "--tokenizer_name=./", + "--max_seq_length=4096", + "--weight_decay=0.0095", + "--warmup_steps=5000", + "--overwrite_output_dir", + "--adam_beta1=0.9", + "--adam_beta2=0.98", + "--logging_steps=50", + "--eval_steps=20000", + "--num_train_epochs=5", + "--preprocessing_num_workers=96", + "--save_steps=20000", + "--learning_rate=5e-5", + "--per_device_train_batch_size=1", + "--per_device_eval_batch_size=1", + "--save_total_limit=5", + "--max_eval_samples=2000", + "--gradient_accumulation_steps=4" + ], + "state": "running", + "program": "./run_mlm_flax.py", + "codePath": "run_mlm_flax.py", + "git": { + "remote": "https://huggingface.co/flax-community/pino-roberta-base", + "commit": "9915204ae8501d6ae8425aa8f0b4b290cbc7e5b6" + }, + "email": null, + "root": "/home/dat/pino-roberta-base", + "host": "t1v-n-f5c06ea1-w-0", + "username": "dat", + "executable": "/home/dat/pino/bin/python" +} diff --git a/wandb/run-20210715_003738-kj539nne/files/wandb-summary.json b/wandb/run-20210715_003738-kj539nne/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/wandb/run-20210715_003738-kj539nne/files/wandb-summary.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/wandb/run-20210715_003738-kj539nne/logs/debug-internal.log b/wandb/run-20210715_003738-kj539nne/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..8946443c8efb24466de07e831fcb695e57441d4d --- /dev/null +++ b/wandb/run-20210715_003738-kj539nne/logs/debug-internal.log @@ -0,0 +1,275 @@ +2021-07-15 00:37:39,369 INFO MainThread:622170 [internal.py:wandb_internal():88] W&B internal server running at pid: 622170, started at: 2021-07-15 00:37:39.369673 +2021-07-15 00:37:39,371 DEBUG HandlerThread:622170 [handler.py:handle_request():124] handle_request: check_version +2021-07-15 00:37:39,372 INFO WriterThread:622170 [datastore.py:open_for_write():80] open: /home/dat/pino-roberta-base/wandb/run-20210715_003738-kj539nne/run-kj539nne.wandb +2021-07-15 00:37:39,373 DEBUG SenderThread:622170 [sender.py:send():179] send: header +2021-07-15 00:37:39,373 DEBUG SenderThread:622170 [sender.py:send_request():193] send_request: check_version +2021-07-15 00:37:39,410 DEBUG SenderThread:622170 [sender.py:send():179] send: run +2021-07-15 00:37:39,578 INFO SenderThread:622170 [dir_watcher.py:__init__():168] watching files in: /home/dat/pino-roberta-base/wandb/run-20210715_003738-kj539nne/files +2021-07-15 00:37:39,578 INFO SenderThread:622170 [sender.py:_start_run_threads():716] run started: kj539nne with start time 1626309458 +2021-07-15 00:37:39,578 DEBUG SenderThread:622170 [sender.py:send():179] send: summary +2021-07-15 00:37:39,578 INFO SenderThread:622170 [sender.py:_save_file():841] saving file wandb-summary.json with policy end +2021-07-15 00:37:39,579 DEBUG HandlerThread:622170 [handler.py:handle_request():124] handle_request: run_start +2021-07-15 00:37:40,586 INFO Thread-8 :622170 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210715_003738-kj539nne/files/wandb-summary.json +2021-07-15 00:37:40,758 DEBUG HandlerThread:622170 [meta.py:__init__():39] meta init +2021-07-15 00:37:40,758 DEBUG HandlerThread:622170 [meta.py:__init__():53] meta init done +2021-07-15 00:37:40,759 DEBUG HandlerThread:622170 [meta.py:probe():210] probe +2021-07-15 00:37:40,760 DEBUG HandlerThread:622170 [meta.py:_setup_git():200] setup git +2021-07-15 00:37:40,791 DEBUG HandlerThread:622170 [meta.py:_setup_git():207] setup git done +2021-07-15 00:37:40,791 DEBUG HandlerThread:622170 [meta.py:_save_pip():57] save pip +2021-07-15 00:37:40,792 DEBUG HandlerThread:622170 [meta.py:_save_pip():71] save pip done +2021-07-15 00:37:40,792 DEBUG HandlerThread:622170 [meta.py:probe():252] probe done +2021-07-15 00:37:40,796 DEBUG SenderThread:622170 [sender.py:send():179] send: files +2021-07-15 00:37:40,796 INFO SenderThread:622170 [sender.py:_save_file():841] saving file wandb-metadata.json with policy now +2021-07-15 00:37:40,802 DEBUG HandlerThread:622170 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:37:40,802 DEBUG SenderThread:622170 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:37:40,930 DEBUG SenderThread:622170 [sender.py:send():179] send: config +2021-07-15 00:37:40,931 DEBUG SenderThread:622170 [sender.py:send():179] send: config +2021-07-15 00:37:40,931 DEBUG SenderThread:622170 [sender.py:send():179] send: config +2021-07-15 00:37:41,235 INFO Thread-11 :622170 [upload_job.py:push():137] Uploaded file /tmp/tmpzvfgl2cpwandb/2r82kf3f-wandb-metadata.json +2021-07-15 00:37:41,584 INFO Thread-8 :622170 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210715_003738-kj539nne/files/output.log +2021-07-15 00:37:41,585 INFO Thread-8 :622170 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210715_003738-kj539nne/files/requirements.txt +2021-07-15 00:37:41,585 INFO Thread-8 :622170 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210715_003738-kj539nne/files/wandb-metadata.json +2021-07-15 00:37:55,591 INFO Thread-8 :622170 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_003738-kj539nne/files/output.log +2021-07-15 00:37:55,933 DEBUG HandlerThread:622170 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:37:55,933 DEBUG SenderThread:622170 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:37:57,592 INFO Thread-8 :622170 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_003738-kj539nne/files/output.log +2021-07-15 00:38:08,843 DEBUG SenderThread:622170 [sender.py:send():179] send: stats +2021-07-15 00:38:10,597 INFO Thread-8 :622170 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_003738-kj539nne/files/config.yaml +2021-07-15 00:38:11,072 DEBUG HandlerThread:622170 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:38:11,073 DEBUG SenderThread:622170 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:38:26,208 DEBUG HandlerThread:622170 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:38:26,208 DEBUG SenderThread:622170 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:38:38,927 DEBUG SenderThread:622170 [sender.py:send():179] send: stats +2021-07-15 00:38:41,341 DEBUG HandlerThread:622170 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:38:41,341 DEBUG SenderThread:622170 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:38:56,476 DEBUG HandlerThread:622170 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:38:56,476 DEBUG SenderThread:622170 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:39:09,001 DEBUG SenderThread:622170 [sender.py:send():179] send: stats +2021-07-15 00:39:11,609 DEBUG HandlerThread:622170 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:39:11,609 DEBUG SenderThread:622170 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:39:26,745 DEBUG HandlerThread:622170 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:39:26,746 DEBUG SenderThread:622170 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:39:39,080 DEBUG SenderThread:622170 [sender.py:send():179] send: stats +2021-07-15 00:39:41,877 DEBUG HandlerThread:622170 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:39:41,877 DEBUG SenderThread:622170 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:39:57,009 DEBUG HandlerThread:622170 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:39:57,010 DEBUG SenderThread:622170 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:40:09,159 DEBUG SenderThread:622170 [sender.py:send():179] send: stats +2021-07-15 00:40:12,144 DEBUG HandlerThread:622170 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:40:12,145 DEBUG SenderThread:622170 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:40:27,280 DEBUG HandlerThread:622170 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:40:27,280 DEBUG SenderThread:622170 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:40:39,237 DEBUG SenderThread:622170 [sender.py:send():179] send: stats +2021-07-15 00:40:42,413 DEBUG HandlerThread:622170 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:40:42,414 DEBUG SenderThread:622170 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:40:57,550 DEBUG HandlerThread:622170 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:40:57,551 DEBUG SenderThread:622170 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:41:09,313 DEBUG SenderThread:622170 [sender.py:send():179] send: stats +2021-07-15 00:41:12,683 DEBUG HandlerThread:622170 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:41:12,684 DEBUG SenderThread:622170 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:41:27,817 DEBUG HandlerThread:622170 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:41:27,818 DEBUG SenderThread:622170 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:41:39,376 DEBUG SenderThread:622170 [sender.py:send():179] send: stats +2021-07-15 00:41:42,956 DEBUG HandlerThread:622170 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:41:42,956 DEBUG SenderThread:622170 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:41:58,089 DEBUG HandlerThread:622170 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:41:58,090 DEBUG SenderThread:622170 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:42:09,437 DEBUG SenderThread:622170 [sender.py:send():179] send: stats +2021-07-15 00:42:13,226 DEBUG HandlerThread:622170 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:42:13,226 DEBUG SenderThread:622170 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:42:28,355 DEBUG HandlerThread:622170 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:42:28,356 DEBUG SenderThread:622170 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:42:39,515 DEBUG SenderThread:622170 [sender.py:send():179] send: stats +2021-07-15 00:42:43,487 DEBUG HandlerThread:622170 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:42:43,488 DEBUG SenderThread:622170 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:42:58,620 DEBUG HandlerThread:622170 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:42:58,621 DEBUG SenderThread:622170 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:43:09,593 DEBUG SenderThread:622170 [sender.py:send():179] send: stats +2021-07-15 00:43:13,756 DEBUG HandlerThread:622170 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:43:13,756 DEBUG SenderThread:622170 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:43:28,889 DEBUG HandlerThread:622170 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:43:28,889 DEBUG SenderThread:622170 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:43:39,663 DEBUG SenderThread:622170 [sender.py:send():179] send: stats +2021-07-15 00:43:44,020 DEBUG HandlerThread:622170 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:43:44,020 DEBUG SenderThread:622170 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:43:59,151 DEBUG HandlerThread:622170 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:43:59,151 DEBUG SenderThread:622170 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:44:09,739 DEBUG SenderThread:622170 [sender.py:send():179] send: stats +2021-07-15 00:44:14,283 DEBUG HandlerThread:622170 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:44:14,284 DEBUG SenderThread:622170 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:44:29,414 DEBUG HandlerThread:622170 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:44:29,414 DEBUG SenderThread:622170 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:44:39,817 DEBUG SenderThread:622170 [sender.py:send():179] send: stats +2021-07-15 00:44:44,546 DEBUG HandlerThread:622170 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:44:44,547 DEBUG SenderThread:622170 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:44:59,680 DEBUG HandlerThread:622170 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:44:59,681 DEBUG SenderThread:622170 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:45:09,895 DEBUG SenderThread:622170 [sender.py:send():179] send: stats +2021-07-15 00:45:14,816 DEBUG HandlerThread:622170 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:45:14,816 DEBUG SenderThread:622170 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:45:29,950 DEBUG HandlerThread:622170 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:45:29,950 DEBUG SenderThread:622170 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:45:39,970 DEBUG SenderThread:622170 [sender.py:send():179] send: stats +2021-07-15 00:45:45,082 DEBUG HandlerThread:622170 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:45:45,082 DEBUG SenderThread:622170 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:46:00,214 DEBUG HandlerThread:622170 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:46:00,215 DEBUG SenderThread:622170 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:46:10,046 DEBUG SenderThread:622170 [sender.py:send():179] send: stats +2021-07-15 00:46:15,347 DEBUG HandlerThread:622170 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:46:15,347 DEBUG SenderThread:622170 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:46:30,482 DEBUG HandlerThread:622170 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:46:30,483 DEBUG SenderThread:622170 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:46:40,123 DEBUG SenderThread:622170 [sender.py:send():179] send: stats +2021-07-15 00:46:45,616 DEBUG HandlerThread:622170 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:46:45,616 DEBUG SenderThread:622170 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:47:00,745 DEBUG HandlerThread:622170 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:47:00,745 DEBUG SenderThread:622170 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:47:10,194 DEBUG SenderThread:622170 [sender.py:send():179] send: stats +2021-07-15 00:47:15,881 DEBUG HandlerThread:622170 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:47:15,881 DEBUG SenderThread:622170 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:47:31,011 DEBUG HandlerThread:622170 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:47:31,012 DEBUG SenderThread:622170 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:47:40,259 DEBUG SenderThread:622170 [sender.py:send():179] send: stats +2021-07-15 00:47:46,146 DEBUG HandlerThread:622170 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:47:46,147 DEBUG SenderThread:622170 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:48:01,282 DEBUG HandlerThread:622170 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:48:01,283 DEBUG SenderThread:622170 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:48:10,323 DEBUG SenderThread:622170 [sender.py:send():179] send: stats +2021-07-15 00:48:16,413 DEBUG HandlerThread:622170 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:48:16,414 DEBUG SenderThread:622170 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:48:31,562 DEBUG HandlerThread:622170 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:48:31,563 DEBUG SenderThread:622170 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:48:40,388 DEBUG SenderThread:622170 [sender.py:send():179] send: stats +2021-07-15 00:48:46,694 DEBUG HandlerThread:622170 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:48:46,695 DEBUG SenderThread:622170 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:49:02,047 DEBUG HandlerThread:622170 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:49:02,047 DEBUG SenderThread:622170 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:49:10,453 DEBUG SenderThread:622170 [sender.py:send():179] send: stats +2021-07-15 00:49:17,178 DEBUG HandlerThread:622170 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:49:17,179 DEBUG SenderThread:622170 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:49:32,311 DEBUG HandlerThread:622170 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:49:32,311 DEBUG SenderThread:622170 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:49:40,521 DEBUG SenderThread:622170 [sender.py:send():179] send: stats +2021-07-15 00:49:47,444 DEBUG HandlerThread:622170 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:49:47,444 DEBUG SenderThread:622170 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:50:02,576 DEBUG HandlerThread:622170 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:50:02,576 DEBUG SenderThread:622170 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:50:10,594 DEBUG SenderThread:622170 [sender.py:send():179] send: stats +2021-07-15 00:50:17,705 DEBUG HandlerThread:622170 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:50:17,706 DEBUG SenderThread:622170 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:50:32,835 DEBUG HandlerThread:622170 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:50:32,836 DEBUG SenderThread:622170 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:50:40,670 DEBUG SenderThread:622170 [sender.py:send():179] send: stats +2021-07-15 00:50:47,970 DEBUG HandlerThread:622170 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:50:47,970 DEBUG SenderThread:622170 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:51:03,105 DEBUG HandlerThread:622170 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:51:03,105 DEBUG SenderThread:622170 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:51:10,739 DEBUG SenderThread:622170 [sender.py:send():179] send: stats +2021-07-15 00:51:18,234 DEBUG HandlerThread:622170 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:51:18,235 DEBUG SenderThread:622170 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:51:21,907 INFO Thread-8 :622170 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_003738-kj539nne/files/output.log +2021-07-15 00:51:23,131 DEBUG SenderThread:622170 [sender.py:send():179] send: telemetry +2021-07-15 00:51:23,132 DEBUG HandlerThread:622170 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 00:51:23,132 DEBUG SenderThread:622170 [sender.py:send():179] send: exit +2021-07-15 00:51:23,132 INFO SenderThread:622170 [sender.py:send_exit():287] handling exit code: 1 +2021-07-15 00:51:23,133 INFO SenderThread:622170 [sender.py:send_exit():295] send defer +2021-07-15 00:51:23,133 DEBUG SenderThread:622170 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 00:51:23,134 DEBUG HandlerThread:622170 [handler.py:handle_request():124] handle_request: defer +2021-07-15 00:51:23,134 INFO HandlerThread:622170 [handler.py:handle_request_defer():141] handle defer: 0 +2021-07-15 00:51:23,134 DEBUG SenderThread:622170 [sender.py:send_request():193] send_request: defer +2021-07-15 00:51:23,134 INFO SenderThread:622170 [sender.py:send_request_defer():304] handle sender defer: 0 +2021-07-15 00:51:23,135 INFO SenderThread:622170 [sender.py:transition_state():308] send defer: 1 +2021-07-15 00:51:23,135 DEBUG HandlerThread:622170 [handler.py:handle_request():124] handle_request: defer +2021-07-15 00:51:23,135 INFO HandlerThread:622170 [handler.py:handle_request_defer():141] handle defer: 1 +2021-07-15 00:51:23,170 DEBUG SenderThread:622170 [sender.py:send_request():193] send_request: defer +2021-07-15 00:51:23,170 INFO SenderThread:622170 [sender.py:send_request_defer():304] handle sender defer: 1 +2021-07-15 00:51:23,170 INFO SenderThread:622170 [sender.py:transition_state():308] send defer: 2 +2021-07-15 00:51:23,170 DEBUG SenderThread:622170 [sender.py:send():179] send: stats +2021-07-15 00:51:23,170 DEBUG HandlerThread:622170 [handler.py:handle_request():124] handle_request: defer +2021-07-15 00:51:23,171 INFO HandlerThread:622170 [handler.py:handle_request_defer():141] handle defer: 2 +2021-07-15 00:51:23,171 DEBUG SenderThread:622170 [sender.py:send_request():193] send_request: defer +2021-07-15 00:51:23,171 INFO SenderThread:622170 [sender.py:send_request_defer():304] handle sender defer: 2 +2021-07-15 00:51:23,171 INFO SenderThread:622170 [sender.py:transition_state():308] send defer: 3 +2021-07-15 00:51:23,171 DEBUG HandlerThread:622170 [handler.py:handle_request():124] handle_request: defer +2021-07-15 00:51:23,171 INFO HandlerThread:622170 [handler.py:handle_request_defer():141] handle defer: 3 +2021-07-15 00:51:23,171 DEBUG SenderThread:622170 [sender.py:send():179] send: summary +2021-07-15 00:51:23,172 INFO SenderThread:622170 [sender.py:_save_file():841] saving file wandb-summary.json with policy end +2021-07-15 00:51:23,172 DEBUG SenderThread:622170 [sender.py:send_request():193] send_request: defer +2021-07-15 00:51:23,172 INFO SenderThread:622170 [sender.py:send_request_defer():304] handle sender defer: 3 +2021-07-15 00:51:23,172 INFO SenderThread:622170 [sender.py:transition_state():308] send defer: 4 +2021-07-15 00:51:23,172 DEBUG HandlerThread:622170 [handler.py:handle_request():124] handle_request: defer +2021-07-15 00:51:23,172 INFO HandlerThread:622170 [handler.py:handle_request_defer():141] handle defer: 4 +2021-07-15 00:51:23,173 DEBUG SenderThread:622170 [sender.py:send_request():193] send_request: defer +2021-07-15 00:51:23,173 INFO SenderThread:622170 [sender.py:send_request_defer():304] handle sender defer: 4 +2021-07-15 00:51:23,236 DEBUG HandlerThread:622170 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 00:51:23,352 INFO SenderThread:622170 [sender.py:transition_state():308] send defer: 5 +2021-07-15 00:51:23,352 DEBUG SenderThread:622170 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 00:51:23,353 DEBUG HandlerThread:622170 [handler.py:handle_request():124] handle_request: defer +2021-07-15 00:51:23,353 INFO HandlerThread:622170 [handler.py:handle_request_defer():141] handle defer: 5 +2021-07-15 00:51:23,353 DEBUG SenderThread:622170 [sender.py:send_request():193] send_request: defer +2021-07-15 00:51:23,353 INFO SenderThread:622170 [sender.py:send_request_defer():304] handle sender defer: 5 +2021-07-15 00:51:23,353 INFO SenderThread:622170 [dir_watcher.py:finish():282] shutting down directory watcher +2021-07-15 00:51:23,454 DEBUG HandlerThread:622170 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 00:51:23,908 INFO Thread-8 :622170 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_003738-kj539nne/files/output.log +2021-07-15 00:51:23,909 INFO SenderThread:622170 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_003738-kj539nne/files/wandb-summary.json +2021-07-15 00:51:23,909 INFO SenderThread:622170 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_003738-kj539nne/files/config.yaml +2021-07-15 00:51:23,909 INFO SenderThread:622170 [dir_watcher.py:finish():312] scan: /home/dat/pino-roberta-base/wandb/run-20210715_003738-kj539nne/files +2021-07-15 00:51:23,909 INFO SenderThread:622170 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_003738-kj539nne/files/requirements.txt requirements.txt +2021-07-15 00:51:23,909 INFO SenderThread:622170 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_003738-kj539nne/files/output.log output.log +2021-07-15 00:51:23,909 INFO SenderThread:622170 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_003738-kj539nne/files/wandb-metadata.json wandb-metadata.json +2021-07-15 00:51:23,910 INFO SenderThread:622170 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_003738-kj539nne/files/config.yaml config.yaml +2021-07-15 00:51:23,913 INFO SenderThread:622170 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_003738-kj539nne/files/wandb-summary.json wandb-summary.json +2021-07-15 00:51:23,913 INFO SenderThread:622170 [sender.py:transition_state():308] send defer: 6 +2021-07-15 00:51:23,913 DEBUG SenderThread:622170 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 00:51:23,917 DEBUG HandlerThread:622170 [handler.py:handle_request():124] handle_request: defer +2021-07-15 00:51:23,917 INFO HandlerThread:622170 [handler.py:handle_request_defer():141] handle defer: 6 +2021-07-15 00:51:23,917 DEBUG SenderThread:622170 [sender.py:send_request():193] send_request: defer +2021-07-15 00:51:23,917 INFO SenderThread:622170 [sender.py:send_request_defer():304] handle sender defer: 6 +2021-07-15 00:51:23,917 INFO SenderThread:622170 [file_pusher.py:finish():177] shutting down file pusher +2021-07-15 00:51:24,016 DEBUG HandlerThread:622170 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 00:51:24,016 DEBUG SenderThread:622170 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 00:51:24,118 DEBUG HandlerThread:622170 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 00:51:24,118 DEBUG SenderThread:622170 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 00:51:24,220 DEBUG HandlerThread:622170 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 00:51:24,220 DEBUG SenderThread:622170 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 00:51:24,322 DEBUG HandlerThread:622170 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 00:51:24,322 DEBUG SenderThread:622170 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 00:51:24,352 INFO Thread-15 :622170 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210715_003738-kj539nne/files/wandb-summary.json +2021-07-15 00:51:24,355 INFO Thread-12 :622170 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210715_003738-kj539nne/files/requirements.txt +2021-07-15 00:51:24,384 INFO Thread-14 :622170 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210715_003738-kj539nne/files/config.yaml +2021-07-15 00:51:24,394 INFO Thread-13 :622170 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210715_003738-kj539nne/files/output.log +2021-07-15 00:51:24,424 DEBUG HandlerThread:622170 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 00:51:24,424 DEBUG SenderThread:622170 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 00:51:24,526 DEBUG HandlerThread:622170 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 00:51:24,526 DEBUG SenderThread:622170 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 00:51:24,595 INFO Thread-7 :622170 [sender.py:transition_state():308] send defer: 7 +2021-07-15 00:51:24,595 DEBUG HandlerThread:622170 [handler.py:handle_request():124] handle_request: defer +2021-07-15 00:51:24,595 INFO HandlerThread:622170 [handler.py:handle_request_defer():141] handle defer: 7 +2021-07-15 00:51:24,595 DEBUG SenderThread:622170 [sender.py:send_request():193] send_request: defer +2021-07-15 00:51:24,595 INFO SenderThread:622170 [sender.py:send_request_defer():304] handle sender defer: 7 +2021-07-15 00:51:24,627 DEBUG HandlerThread:622170 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 00:51:24,878 INFO SenderThread:622170 [sender.py:transition_state():308] send defer: 8 +2021-07-15 00:51:24,879 DEBUG SenderThread:622170 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 00:51:24,879 DEBUG HandlerThread:622170 [handler.py:handle_request():124] handle_request: defer +2021-07-15 00:51:24,879 INFO HandlerThread:622170 [handler.py:handle_request_defer():141] handle defer: 8 +2021-07-15 00:51:24,879 DEBUG SenderThread:622170 [sender.py:send_request():193] send_request: defer +2021-07-15 00:51:24,879 INFO SenderThread:622170 [sender.py:send_request_defer():304] handle sender defer: 8 +2021-07-15 00:51:24,879 INFO SenderThread:622170 [sender.py:transition_state():308] send defer: 9 +2021-07-15 00:51:24,880 DEBUG SenderThread:622170 [sender.py:send():179] send: final +2021-07-15 00:51:24,880 DEBUG SenderThread:622170 [sender.py:send():179] send: footer +2021-07-15 00:51:24,880 DEBUG HandlerThread:622170 [handler.py:handle_request():124] handle_request: defer +2021-07-15 00:51:24,880 INFO HandlerThread:622170 [handler.py:handle_request_defer():141] handle defer: 9 +2021-07-15 00:51:24,880 DEBUG SenderThread:622170 [sender.py:send_request():193] send_request: defer +2021-07-15 00:51:24,880 INFO SenderThread:622170 [sender.py:send_request_defer():304] handle sender defer: 9 +2021-07-15 00:51:24,980 DEBUG HandlerThread:622170 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 00:51:24,981 DEBUG SenderThread:622170 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 00:51:24,981 INFO SenderThread:622170 [file_pusher.py:join():182] waiting for file pusher +2021-07-15 00:51:24,982 DEBUG HandlerThread:622170 [handler.py:handle_request():124] handle_request: get_summary +2021-07-15 00:51:24,983 DEBUG HandlerThread:622170 [handler.py:handle_request():124] handle_request: sampled_history +2021-07-15 00:51:24,984 DEBUG HandlerThread:622170 [handler.py:handle_request():124] handle_request: shutdown +2021-07-15 00:51:24,984 INFO HandlerThread:622170 [handler.py:finish():638] shutting down handler +2021-07-15 00:51:25,880 INFO WriterThread:622170 [datastore.py:close():288] close: /home/dat/pino-roberta-base/wandb/run-20210715_003738-kj539nne/run-kj539nne.wandb +2021-07-15 00:51:25,981 INFO SenderThread:622170 [sender.py:finish():945] shutting down sender +2021-07-15 00:51:25,981 INFO SenderThread:622170 [file_pusher.py:finish():177] shutting down file pusher +2021-07-15 00:51:25,981 INFO SenderThread:622170 [file_pusher.py:join():182] waiting for file pusher +2021-07-15 00:51:25,984 INFO MainThread:622170 [internal.py:handle_exit():78] Internal process exited diff --git a/wandb/run-20210715_003738-kj539nne/logs/debug.log b/wandb/run-20210715_003738-kj539nne/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..13744a7afd864ca97ca96fa859924aa3dbc470ce --- /dev/null +++ b/wandb/run-20210715_003738-kj539nne/logs/debug.log @@ -0,0 +1,119 @@ +2021-07-15 00:37:38,668 INFO MainThread:620917 [wandb_setup.py:_flush():69] setting env: {} +2021-07-15 00:37:38,668 INFO MainThread:620917 [wandb_setup.py:_flush():69] setting login settings: {} +2021-07-15 00:37:38,668 INFO MainThread:620917 [wandb_init.py:_log_setup():337] Logging user logs to /home/dat/pino-roberta-base/wandb/run-20210715_003738-kj539nne/logs/debug.log +2021-07-15 00:37:38,668 INFO MainThread:620917 [wandb_init.py:_log_setup():338] Logging internal logs to /home/dat/pino-roberta-base/wandb/run-20210715_003738-kj539nne/logs/debug-internal.log +2021-07-15 00:37:38,668 INFO MainThread:620917 [wandb_init.py:init():370] calling init triggers +2021-07-15 00:37:38,668 INFO MainThread:620917 [wandb_init.py:init():375] wandb.init called with sweep_config: {} +config: {} +2021-07-15 00:37:38,668 INFO MainThread:620917 [wandb_init.py:init():419] starting backend +2021-07-15 00:37:38,668 INFO MainThread:620917 [backend.py:_multiprocessing_setup():70] multiprocessing start_methods=fork,spawn,forkserver, using: spawn +2021-07-15 00:37:38,714 INFO MainThread:620917 [backend.py:ensure_launched():135] starting backend process... +2021-07-15 00:37:38,758 INFO MainThread:620917 [backend.py:ensure_launched():139] started backend process with pid: 622170 +2021-07-15 00:37:38,760 INFO MainThread:620917 [wandb_init.py:init():424] backend started and connected +2021-07-15 00:37:38,763 INFO MainThread:620917 [wandb_init.py:init():472] updated telemetry +2021-07-15 00:37:38,764 INFO MainThread:620917 [wandb_init.py:init():491] communicating current version +2021-07-15 00:37:39,409 INFO MainThread:620917 [wandb_init.py:init():496] got version response +2021-07-15 00:37:39,409 INFO MainThread:620917 [wandb_init.py:init():504] communicating run to backend with 30 second timeout +2021-07-15 00:37:39,578 INFO MainThread:620917 [wandb_init.py:init():529] starting run threads in backend +2021-07-15 00:37:40,799 INFO MainThread:620917 [wandb_run.py:_console_start():1623] atexit reg +2021-07-15 00:37:40,799 INFO MainThread:620917 [wandb_run.py:_redirect():1497] redirect: SettingsConsole.REDIRECT +2021-07-15 00:37:40,800 INFO MainThread:620917 [wandb_run.py:_redirect():1502] Redirecting console. +2021-07-15 00:37:40,802 INFO MainThread:620917 [wandb_run.py:_redirect():1558] Redirects installed. +2021-07-15 00:37:40,802 INFO MainThread:620917 [wandb_init.py:init():554] run started, returning control to user process +2021-07-15 00:37:40,809 INFO MainThread:620917 [wandb_run.py:_config_callback():872] config_cb None None {'output_dir': './', 'overwrite_output_dir': True, 'do_train': False, 'do_eval': False, 'do_predict': False, 'evaluation_strategy': 'IntervalStrategy.NO', 'prediction_loss_only': False, 'per_device_train_batch_size': 1, 'per_device_eval_batch_size': 1, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 4, 'eval_accumulation_steps': None, 'learning_rate': 5e-05, 'weight_decay': 0.0095, 'adam_beta1': 0.9, 'adam_beta2': 0.98, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 5.0, 'max_steps': -1, 'lr_scheduler_type': 'SchedulerType.LINEAR', 'warmup_ratio': 0.0, 'warmup_steps': 5000, 'log_level': -1, 'log_level_replica': -1, 'log_on_each_node': True, 'logging_dir': './runs/Jul15_00-37-30_t1v-n-f5c06ea1-w-0', 'logging_strategy': 'IntervalStrategy.STEPS', 'logging_first_step': False, 'logging_steps': 50, 'save_strategy': 'IntervalStrategy.STEPS', 'save_steps': 20000, 'save_total_limit': 5, 'save_on_each_node': False, 'no_cuda': False, 'seed': 42, 'fp16': False, 'fp16_opt_level': 'O1', 'fp16_backend': 'auto', 'fp16_full_eval': False, 'local_rank': -1, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 20000, 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': './', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'sharded_ddp': [], 'deepspeed': None, 'label_smoothing_factor': 0.0, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'dataloader_pin_memory': True, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'push_to_hub_model_id': '', 'push_to_hub_organization': None, 'push_to_hub_token': None, 'mp_parameters': ''} +2021-07-15 00:37:40,811 INFO MainThread:620917 [wandb_run.py:_config_callback():872] config_cb None None {'model_name_or_path': None, 'model_type': 'big_bird', 'config_name': './', 'tokenizer_name': './', 'cache_dir': None, 'use_fast_tokenizer': True, 'dtype': 'float32', 'save_optimizer': True} +2021-07-15 00:37:40,813 INFO MainThread:620917 [wandb_run.py:_config_callback():872] config_cb None None {'dataset_name': None, 'dataset_config_name': None, 'train_ref_file': None, 'validation_ref_file': None, 'overwrite_cache': False, 'validation_split_percentage': 5, 'max_seq_length': 4096, 'preprocessing_num_workers': 96, 'mlm_probability': 0.15, 'pad_to_max_length': False, 'line_by_line': False, 'max_eval_samples': 2000} +2021-07-15 00:51:20,965 INFO MainThread:620917 [wandb_run.py:_atexit_cleanup():1593] got exitcode: 1 +2021-07-15 00:51:20,966 INFO MainThread:620917 [wandb_run.py:_restore():1565] restore +2021-07-15 00:51:23,134 INFO MainThread:620917 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 1 +} +pusher_stats { + uploaded_bytes: 1400 + total_bytes: 1400 +} + +2021-07-15 00:51:23,353 INFO MainThread:620917 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 1 +} +pusher_stats { + uploaded_bytes: 1400 + total_bytes: 1400 +} + +2021-07-15 00:51:23,914 INFO MainThread:620917 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 4 +} +pusher_stats { + uploaded_bytes: 1400 + total_bytes: 11179 +} + +2021-07-15 00:51:24,016 INFO MainThread:620917 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 1400 + total_bytes: 11181 +} + +2021-07-15 00:51:24,119 INFO MainThread:620917 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 11181 + total_bytes: 11181 +} + +2021-07-15 00:51:24,221 INFO MainThread:620917 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 11181 + total_bytes: 11181 +} + +2021-07-15 00:51:24,323 INFO MainThread:620917 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 11181 + total_bytes: 11181 +} + +2021-07-15 00:51:24,425 INFO MainThread:620917 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 11181 + total_bytes: 11181 +} + +2021-07-15 00:51:24,527 INFO MainThread:620917 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 11181 + total_bytes: 11181 +} + +2021-07-15 00:51:24,879 INFO MainThread:620917 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 11181 + total_bytes: 11181 +} + +2021-07-15 00:51:24,981 INFO MainThread:620917 [wandb_run.py:_wait_for_finish():1715] got exit ret: done: true +exit_result { +} +file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 11181 + total_bytes: 11181 +} + +2021-07-15 00:51:26,294 INFO MainThread:620917 [wandb_run.py:_show_files():1937] logging synced files diff --git a/wandb/run-20210715_003738-kj539nne/run-kj539nne.wandb b/wandb/run-20210715_003738-kj539nne/run-kj539nne.wandb new file mode 100644 index 0000000000000000000000000000000000000000..c6dc13969ddef765f4e58c0898323ac37d2739e1 Binary files /dev/null and b/wandb/run-20210715_003738-kj539nne/run-kj539nne.wandb differ diff --git a/wandb/run-20210715_005228-361cvuow/files/config.yaml b/wandb/run-20210715_005228-361cvuow/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..476fec00ef4a1a98244f3ef4eddb18031f31e3b5 --- /dev/null +++ b/wandb/run-20210715_005228-361cvuow/files/config.yaml @@ -0,0 +1,307 @@ +wandb_version: 1 + +_wandb: + desc: null + value: + cli_version: 0.10.33 + framework: huggingface + huggingface_version: 4.9.0.dev0 + is_jupyter_run: false + is_kaggle_kernel: false + python_version: 3.8.10 + t: + 1: + - 3 + - 11 + 2: + - 3 + - 11 + 4: 3.8.10 + 5: 0.10.33 + 6: 4.9.0.dev0 + 8: + - 5 +adafactor: + desc: null + value: false +adam_beta1: + desc: null + value: 0.9 +adam_beta2: + desc: null + value: 0.98 +adam_epsilon: + desc: null + value: 1.0e-08 +cache_dir: + desc: null + value: null +config_name: + desc: null + value: ./ +dataloader_drop_last: + desc: null + value: false +dataloader_num_workers: + desc: null + value: 0 +dataloader_pin_memory: + desc: null + value: true +dataset_config_name: + desc: null + value: null +dataset_name: + desc: null + value: null +ddp_find_unused_parameters: + desc: null + value: null +debug: + desc: null + value: [] +deepspeed: + desc: null + value: null +disable_tqdm: + desc: null + value: false +do_eval: + desc: null + value: false +do_predict: + desc: null + value: false +do_train: + desc: null + value: false +dtype: + desc: null + value: float32 +eval_accumulation_steps: + desc: null + value: null +eval_steps: + desc: null + value: 20000 +evaluation_strategy: + desc: null + value: IntervalStrategy.NO +fp16: + desc: null + value: false +fp16_backend: + desc: null + value: auto +fp16_full_eval: + desc: null + value: false +fp16_opt_level: + desc: null + value: O1 +gradient_accumulation_steps: + desc: null + value: 4 +greater_is_better: + desc: null + value: null +group_by_length: + desc: null + value: false +ignore_data_skip: + desc: null + value: false +label_names: + desc: null + value: null +label_smoothing_factor: + desc: null + value: 0.0 +learning_rate: + desc: null + value: 5.0e-05 +length_column_name: + desc: null + value: length +line_by_line: + desc: null + value: false +load_best_model_at_end: + desc: null + value: false +local_rank: + desc: null + value: -1 +log_level: + desc: null + value: -1 +log_level_replica: + desc: null + value: -1 +log_on_each_node: + desc: null + value: true +logging_dir: + desc: null + value: ./runs/Jul15_00-52-20_t1v-n-f5c06ea1-w-0 +logging_first_step: + desc: null + value: false +logging_steps: + desc: null + value: 50 +logging_strategy: + desc: null + value: IntervalStrategy.STEPS +lr_scheduler_type: + desc: null + value: SchedulerType.LINEAR +max_eval_samples: + desc: null + value: 1000 +max_grad_norm: + desc: null + value: 1.0 +max_seq_length: + desc: null + value: 4096 +max_steps: + desc: null + value: -1 +metric_for_best_model: + desc: null + value: null +mlm_probability: + desc: null + value: 0.15 +model_name_or_path: + desc: null + value: null +model_type: + desc: null + value: big_bird +mp_parameters: + desc: null + value: '' +no_cuda: + desc: null + value: false +num_train_epochs: + desc: null + value: 5.0 +output_dir: + desc: null + value: ./ +overwrite_cache: + desc: null + value: false +overwrite_output_dir: + desc: null + value: true +pad_to_max_length: + desc: null + value: false +past_index: + desc: null + value: -1 +per_device_eval_batch_size: + desc: null + value: 2 +per_device_train_batch_size: + desc: null + value: 2 +per_gpu_eval_batch_size: + desc: null + value: null +per_gpu_train_batch_size: + desc: null + value: null +prediction_loss_only: + desc: null + value: false +preprocessing_num_workers: + desc: null + value: 96 +push_to_hub: + desc: null + value: true +push_to_hub_model_id: + desc: null + value: '' +push_to_hub_organization: + desc: null + value: null +push_to_hub_token: + desc: null + value: null +remove_unused_columns: + desc: null + value: true +report_to: + desc: null + value: + - tensorboard + - wandb +resume_from_checkpoint: + desc: null + value: null +run_name: + desc: null + value: ./ +save_on_each_node: + desc: null + value: false +save_optimizer: + desc: null + value: true +save_steps: + desc: null + value: 20000 +save_strategy: + desc: null + value: IntervalStrategy.STEPS +save_total_limit: + desc: null + value: 5 +seed: + desc: null + value: 42 +sharded_ddp: + desc: null + value: [] +skip_memory_metrics: + desc: null + value: true +tokenizer_name: + desc: null + value: ./ +tpu_metrics_debug: + desc: null + value: false +tpu_num_cores: + desc: null + value: null +train_ref_file: + desc: null + value: null +use_fast_tokenizer: + desc: null + value: true +use_legacy_prediction_loop: + desc: null + value: false +validation_ref_file: + desc: null + value: null +validation_split_percentage: + desc: null + value: 5 +warmup_ratio: + desc: null + value: 0.0 +warmup_steps: + desc: null + value: 5000 +weight_decay: + desc: null + value: 0.0095 diff --git a/wandb/run-20210715_005228-361cvuow/files/output.log b/wandb/run-20210715_005228-361cvuow/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..e070d7106e8580e92f6ca9d70a5b08e171cc28e4 --- /dev/null +++ b/wandb/run-20210715_005228-361cvuow/files/output.log @@ -0,0 +1,39 @@ +/home/dat/pino/lib/python3.8/site-packages/jax/_src/numpy/lax_numpy.py:3132: UserWarning: Explicitly requested dtype requested in zeros is not available, and will be truncated to dtype int32. To enable more dtypes, set the jax_enable_x64 configuration option or the JAX_ENABLE_X64 shell environment variable. See https://github.com/google/jax#current-gotchas for more. + lax._check_user_dtype_supported(dtype, "zeros") +/home/dat/pino/lib/python3.8/site-packages/jax/lib/xla_bridge.py:386: UserWarning: jax.host_count has been renamed to jax.process_count. This alias will eventually be removed; please update your code. + warnings.warn( +/home/dat/pino/lib/python3.8/site-packages/jax/lib/xla_bridge.py:373: UserWarning: jax.host_id has been renamed to jax.process_index. This alias will eventually be removed; please update your code. + warnings.warn( +Epoch ... (1/5): 0%| | 0/5 [00:00 + state, train_metric, dropout_rngs = p_train_step(state, model_inputs, dropout_rngs) + File "/home/dat/pino/lib/python3.8/site-packages/jax/_src/traceback_util.py", line 183, in reraise_with_filtered_traceback + return fun(*args, **kwargs) + File "/home/dat/pino/lib/python3.8/site-packages/jax/_src/api.py", line 1669, in f_pmapped + out = pxla.xla_pmap( + File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 1620, in bind + return call_bind(self, fun, *args, **params) + File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 1551, in call_bind + outs = primitive.process(top_trace, fun, tracers, params) + File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 1623, in process + return trace.process_map(self, fun, tracers, params) + File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 606, in process_call + return primitive.impl(f, *tracers, **params) + File "/home/dat/pino/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 637, in xla_pmap_impl + return compiled_fun(*args) + File "/home/dat/pino/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 1159, in execute_replicated + out_bufs = compiled.execute_sharded_on_local_devices(input_bufs) +jax._src.traceback_util.UnfilteredStackTrace: RuntimeError: Resource exhausted: Attempting to reserve 12.60G at the bottom of memory. That was not possible. There are 8.17G free, 0B reserved, and 8.12G reservable.: while running replica 0 and partition 0 of a replicated computation (other replicas may have failed as well). +The stack trace below excludes JAX-internal frames. +The preceding is the original exception that occurred, unmodified. +-------------------- +The above exception was the direct cause of the following exception: +Traceback (most recent call last): + File "./run_mlm_flax.py", line 815, in + state, train_metric, dropout_rngs = p_train_step(state, model_inputs, dropout_rngs) + File "/home/dat/pino/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 1159, in execute_replicated + out_bufs = compiled.execute_sharded_on_local_devices(input_bufs) +RuntimeError: Resource exhausted: Attempting to reserve 12.60G at the bottom of memory. That was not possible. There are 8.17G free, 0B reserved, and 8.12G reservable.: while running replica 0 and partition 0 of a replicated computation (other replicas may have failed as well). \ No newline at end of file diff --git a/wandb/run-20210715_005228-361cvuow/files/requirements.txt b/wandb/run-20210715_005228-361cvuow/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..554e3a34bc91714a8462f65295a12e9a04537637 --- /dev/null +++ b/wandb/run-20210715_005228-361cvuow/files/requirements.txt @@ -0,0 +1,94 @@ +absl-py==0.13.0 +aiohttp==3.7.4.post0 +astunparse==1.6.3 +async-timeout==3.0.1 +attrs==21.2.0 +cachetools==4.2.2 +certifi==2021.5.30 +chardet==4.0.0 +charset-normalizer==2.0.1 +chex==0.0.8 +click==8.0.1 +configparser==5.0.2 +cycler==0.10.0 +datasets==1.9.1.dev0 +dill==0.3.4 +dm-tree==0.1.6 +docker-pycreds==0.4.0 +filelock==3.0.12 +flatbuffers==1.12 +flax==0.3.4 +fsspec==2021.7.0 +gast==0.4.0 +gitdb==4.0.7 +gitpython==3.1.18 +google-auth-oauthlib==0.4.4 +google-auth==1.32.1 +google-pasta==0.2.0 +grpcio==1.34.1 +h5py==3.1.0 +huggingface-hub==0.0.12 +idna==3.2 +install==1.3.4 +jax==0.2.17 +jaxlib==0.1.68 +joblib==1.0.1 +keras-nightly==2.5.0.dev2021032900 +keras-preprocessing==1.1.2 +kiwisolver==1.3.1 +libtpu-nightly==0.1.dev20210615 +markdown==3.3.4 +matplotlib==3.4.2 +msgpack==1.0.2 +multidict==5.1.0 +multiprocess==0.70.12.2 +numpy==1.19.5 +oauthlib==3.1.1 +opt-einsum==3.3.0 +optax==0.0.9 +packaging==21.0 +pandas==1.3.0 +pathtools==0.1.2 +pillow==8.3.1 +pip==20.0.2 +pkg-resources==0.0.0 +promise==2.3 +protobuf==3.17.3 +psutil==5.8.0 +pyarrow==4.0.1 +pyasn1-modules==0.2.8 +pyasn1==0.4.8 +pyparsing==2.4.7 +python-dateutil==2.8.1 +pytz==2021.1 +pyyaml==5.4.1 +regex==2021.7.6 +requests-oauthlib==1.3.0 +requests==2.26.0 +rsa==4.7.2 +sacremoses==0.0.45 +scipy==1.7.0 +sentry-sdk==1.3.0 +setuptools==44.0.0 +shortuuid==1.0.1 +six==1.15.0 +smmap==4.0.0 +subprocess32==3.5.4 +tensorboard-data-server==0.6.1 +tensorboard-plugin-wit==1.8.0 +tensorboard==2.5.0 +tensorflow-estimator==2.5.0 +tensorflow==2.5.0 +termcolor==1.1.0 +tokenizers==0.10.3 +toolz==0.11.1 +tqdm==4.61.2 +transformers==4.9.0.dev0 +typing-extensions==3.7.4.3 +urllib3==1.26.6 +wandb==0.10.33 +werkzeug==2.0.1 +wheel==0.36.2 +wrapt==1.12.1 +xxhash==2.0.2 +yarl==1.6.3 \ No newline at end of file diff --git a/wandb/run-20210715_005228-361cvuow/files/wandb-metadata.json b/wandb/run-20210715_005228-361cvuow/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..79bb5be3bce95a1330ec9e6deaef94d9878047cf --- /dev/null +++ b/wandb/run-20210715_005228-361cvuow/files/wandb-metadata.json @@ -0,0 +1,45 @@ +{ + "os": "Linux-5.4.0-1043-gcp-x86_64-with-glibc2.29", + "python": "3.8.10", + "heartbeatAt": "2021-07-15T00:52:30.640293", + "startedAt": "2021-07-15T00:52:28.636436", + "docker": null, + "cpu_count": 96, + "cuda": null, + "args": [ + "--push_to_hub", + "--output_dir=./", + "--model_type=big_bird", + "--config_name=./", + "--tokenizer_name=./", + "--max_seq_length=4096", + "--weight_decay=0.0095", + "--warmup_steps=5000", + "--overwrite_output_dir", + "--adam_beta1=0.9", + "--adam_beta2=0.98", + "--logging_steps=50", + "--eval_steps=20000", + "--num_train_epochs=5", + "--preprocessing_num_workers=96", + "--save_steps=20000", + "--learning_rate=5e-5", + "--per_device_train_batch_size=2", + "--per_device_eval_batch_size=2", + "--save_total_limit=5", + "--max_eval_samples=1000", + "--gradient_accumulation_steps=4" + ], + "state": "running", + "program": "./run_mlm_flax.py", + "codePath": "run_mlm_flax.py", + "git": { + "remote": "https://huggingface.co/flax-community/pino-roberta-base", + "commit": "9915204ae8501d6ae8425aa8f0b4b290cbc7e5b6" + }, + "email": null, + "root": "/home/dat/pino-roberta-base", + "host": "t1v-n-f5c06ea1-w-0", + "username": "dat", + "executable": "/home/dat/pino/bin/python" +} diff --git a/wandb/run-20210715_005228-361cvuow/files/wandb-summary.json b/wandb/run-20210715_005228-361cvuow/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/wandb/run-20210715_005228-361cvuow/files/wandb-summary.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/wandb/run-20210715_005228-361cvuow/logs/debug-internal.log b/wandb/run-20210715_005228-361cvuow/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..c265b4433752a3048ff388c9f78ef6959b5dc65b --- /dev/null +++ b/wandb/run-20210715_005228-361cvuow/logs/debug-internal.log @@ -0,0 +1,210 @@ +2021-07-15 00:52:29,330 INFO MainThread:624595 [internal.py:wandb_internal():88] W&B internal server running at pid: 624595, started at: 2021-07-15 00:52:29.329937 +2021-07-15 00:52:29,332 DEBUG HandlerThread:624595 [handler.py:handle_request():124] handle_request: check_version +2021-07-15 00:52:29,333 INFO WriterThread:624595 [datastore.py:open_for_write():80] open: /home/dat/pino-roberta-base/wandb/run-20210715_005228-361cvuow/run-361cvuow.wandb +2021-07-15 00:52:29,333 DEBUG SenderThread:624595 [sender.py:send():179] send: header +2021-07-15 00:52:29,333 DEBUG SenderThread:624595 [sender.py:send_request():193] send_request: check_version +2021-07-15 00:52:29,368 DEBUG SenderThread:624595 [sender.py:send():179] send: run +2021-07-15 00:52:29,550 INFO SenderThread:624595 [dir_watcher.py:__init__():168] watching files in: /home/dat/pino-roberta-base/wandb/run-20210715_005228-361cvuow/files +2021-07-15 00:52:29,550 INFO SenderThread:624595 [sender.py:_start_run_threads():716] run started: 361cvuow with start time 1626310348 +2021-07-15 00:52:29,550 DEBUG SenderThread:624595 [sender.py:send():179] send: summary +2021-07-15 00:52:29,551 INFO SenderThread:624595 [sender.py:_save_file():841] saving file wandb-summary.json with policy end +2021-07-15 00:52:29,551 DEBUG HandlerThread:624595 [handler.py:handle_request():124] handle_request: run_start +2021-07-15 00:52:30,553 INFO Thread-8 :624595 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210715_005228-361cvuow/files/wandb-summary.json +2021-07-15 00:52:30,640 DEBUG HandlerThread:624595 [meta.py:__init__():39] meta init +2021-07-15 00:52:30,640 DEBUG HandlerThread:624595 [meta.py:__init__():53] meta init done +2021-07-15 00:52:30,640 DEBUG HandlerThread:624595 [meta.py:probe():210] probe +2021-07-15 00:52:30,641 DEBUG HandlerThread:624595 [meta.py:_setup_git():200] setup git +2021-07-15 00:52:30,670 DEBUG HandlerThread:624595 [meta.py:_setup_git():207] setup git done +2021-07-15 00:52:30,670 DEBUG HandlerThread:624595 [meta.py:_save_pip():57] save pip +2021-07-15 00:52:30,671 DEBUG HandlerThread:624595 [meta.py:_save_pip():71] save pip done +2021-07-15 00:52:30,671 DEBUG HandlerThread:624595 [meta.py:probe():252] probe done +2021-07-15 00:52:30,674 DEBUG SenderThread:624595 [sender.py:send():179] send: files +2021-07-15 00:52:30,674 INFO SenderThread:624595 [sender.py:_save_file():841] saving file wandb-metadata.json with policy now +2021-07-15 00:52:30,680 DEBUG HandlerThread:624595 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:52:30,680 DEBUG SenderThread:624595 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:52:30,812 DEBUG SenderThread:624595 [sender.py:send():179] send: config +2021-07-15 00:52:30,812 DEBUG SenderThread:624595 [sender.py:send():179] send: config +2021-07-15 00:52:30,813 DEBUG SenderThread:624595 [sender.py:send():179] send: config +2021-07-15 00:52:31,126 INFO Thread-11 :624595 [upload_job.py:push():137] Uploaded file /tmp/tmp7sk_pd_0wandb/2r26ralk-wandb-metadata.json +2021-07-15 00:52:31,551 INFO Thread-8 :624595 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210715_005228-361cvuow/files/wandb-metadata.json +2021-07-15 00:52:31,552 INFO Thread-8 :624595 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210715_005228-361cvuow/files/requirements.txt +2021-07-15 00:52:31,552 INFO Thread-8 :624595 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210715_005228-361cvuow/files/output.log +2021-07-15 00:52:45,557 INFO Thread-8 :624595 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_005228-361cvuow/files/output.log +2021-07-15 00:52:45,814 DEBUG HandlerThread:624595 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:52:45,814 DEBUG SenderThread:624595 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:52:58,723 DEBUG SenderThread:624595 [sender.py:send():179] send: stats +2021-07-15 00:53:00,564 INFO Thread-8 :624595 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_005228-361cvuow/files/config.yaml +2021-07-15 00:53:00,947 DEBUG HandlerThread:624595 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:53:00,947 DEBUG SenderThread:624595 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:53:16,080 DEBUG HandlerThread:624595 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:53:16,080 DEBUG SenderThread:624595 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:53:28,803 DEBUG SenderThread:624595 [sender.py:send():179] send: stats +2021-07-15 00:53:31,216 DEBUG HandlerThread:624595 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:53:31,216 DEBUG SenderThread:624595 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:53:46,347 DEBUG HandlerThread:624595 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:53:46,347 DEBUG SenderThread:624595 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:53:58,879 DEBUG SenderThread:624595 [sender.py:send():179] send: stats +2021-07-15 00:54:01,496 DEBUG HandlerThread:624595 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:54:01,496 DEBUG SenderThread:624595 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:54:16,630 DEBUG HandlerThread:624595 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:54:16,630 DEBUG SenderThread:624595 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:54:28,953 DEBUG SenderThread:624595 [sender.py:send():179] send: stats +2021-07-15 00:54:31,760 DEBUG HandlerThread:624595 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:54:31,761 DEBUG SenderThread:624595 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:54:46,892 DEBUG HandlerThread:624595 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:54:46,893 DEBUG SenderThread:624595 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:54:59,029 DEBUG SenderThread:624595 [sender.py:send():179] send: stats +2021-07-15 00:55:02,026 DEBUG HandlerThread:624595 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:55:02,027 DEBUG SenderThread:624595 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:55:17,159 DEBUG HandlerThread:624595 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:55:17,160 DEBUG SenderThread:624595 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:55:29,107 DEBUG SenderThread:624595 [sender.py:send():179] send: stats +2021-07-15 00:55:32,292 DEBUG HandlerThread:624595 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:55:32,292 DEBUG SenderThread:624595 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:55:47,422 DEBUG HandlerThread:624595 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:55:47,422 DEBUG SenderThread:624595 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:55:59,186 DEBUG SenderThread:624595 [sender.py:send():179] send: stats +2021-07-15 00:56:02,555 DEBUG HandlerThread:624595 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:56:02,556 DEBUG SenderThread:624595 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:56:17,688 DEBUG HandlerThread:624595 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:56:17,688 DEBUG SenderThread:624595 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:56:29,261 DEBUG SenderThread:624595 [sender.py:send():179] send: stats +2021-07-15 00:56:32,824 DEBUG HandlerThread:624595 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:56:32,824 DEBUG SenderThread:624595 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:56:47,955 DEBUG HandlerThread:624595 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:56:47,955 DEBUG SenderThread:624595 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:56:59,339 DEBUG SenderThread:624595 [sender.py:send():179] send: stats +2021-07-15 00:57:03,086 DEBUG HandlerThread:624595 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:57:03,086 DEBUG SenderThread:624595 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:57:18,219 DEBUG HandlerThread:624595 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:57:18,219 DEBUG SenderThread:624595 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:57:29,416 DEBUG SenderThread:624595 [sender.py:send():179] send: stats +2021-07-15 00:57:33,350 DEBUG HandlerThread:624595 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:57:33,350 DEBUG SenderThread:624595 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:57:48,482 DEBUG HandlerThread:624595 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:57:48,483 DEBUG SenderThread:624595 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:57:59,490 DEBUG SenderThread:624595 [sender.py:send():179] send: stats +2021-07-15 00:58:03,614 DEBUG HandlerThread:624595 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:58:03,614 DEBUG SenderThread:624595 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:58:17,674 INFO Thread-8 :624595 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_005228-361cvuow/files/output.log +2021-07-15 00:58:18,763 DEBUG HandlerThread:624595 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:58:18,763 DEBUG SenderThread:624595 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:58:29,567 DEBUG SenderThread:624595 [sender.py:send():179] send: stats +2021-07-15 00:58:33,909 DEBUG HandlerThread:624595 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:58:33,910 DEBUG SenderThread:624595 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:58:49,058 DEBUG HandlerThread:624595 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:58:49,059 DEBUG SenderThread:624595 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:58:59,648 DEBUG SenderThread:624595 [sender.py:send():179] send: stats +2021-07-15 00:59:04,188 DEBUG HandlerThread:624595 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:59:04,188 DEBUG SenderThread:624595 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:59:19,321 DEBUG HandlerThread:624595 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:59:19,322 DEBUG SenderThread:624595 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:59:29,729 DEBUG SenderThread:624595 [sender.py:send():179] send: stats +2021-07-15 00:59:34,455 DEBUG HandlerThread:624595 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 00:59:34,455 DEBUG SenderThread:624595 [sender.py:send_request():193] send_request: stop_status +2021-07-15 00:59:41,706 INFO Thread-8 :624595 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_005228-361cvuow/files/output.log +2021-07-15 00:59:43,394 DEBUG SenderThread:624595 [sender.py:send():179] send: telemetry +2021-07-15 00:59:43,394 DEBUG HandlerThread:624595 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 00:59:43,395 DEBUG SenderThread:624595 [sender.py:send():179] send: exit +2021-07-15 00:59:43,395 INFO SenderThread:624595 [sender.py:send_exit():287] handling exit code: 1 +2021-07-15 00:59:43,395 INFO SenderThread:624595 [sender.py:send_exit():295] send defer +2021-07-15 00:59:43,395 DEBUG SenderThread:624595 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 00:59:43,396 DEBUG HandlerThread:624595 [handler.py:handle_request():124] handle_request: defer +2021-07-15 00:59:43,396 INFO HandlerThread:624595 [handler.py:handle_request_defer():141] handle defer: 0 +2021-07-15 00:59:43,396 DEBUG SenderThread:624595 [sender.py:send_request():193] send_request: defer +2021-07-15 00:59:43,396 INFO SenderThread:624595 [sender.py:send_request_defer():304] handle sender defer: 0 +2021-07-15 00:59:43,396 INFO SenderThread:624595 [sender.py:transition_state():308] send defer: 1 +2021-07-15 00:59:43,396 DEBUG HandlerThread:624595 [handler.py:handle_request():124] handle_request: defer +2021-07-15 00:59:43,396 INFO HandlerThread:624595 [handler.py:handle_request_defer():141] handle defer: 1 +2021-07-15 00:59:43,467 DEBUG SenderThread:624595 [sender.py:send_request():193] send_request: defer +2021-07-15 00:59:43,467 INFO SenderThread:624595 [sender.py:send_request_defer():304] handle sender defer: 1 +2021-07-15 00:59:43,467 INFO SenderThread:624595 [sender.py:transition_state():308] send defer: 2 +2021-07-15 00:59:43,467 DEBUG SenderThread:624595 [sender.py:send():179] send: stats +2021-07-15 00:59:43,467 DEBUG HandlerThread:624595 [handler.py:handle_request():124] handle_request: defer +2021-07-15 00:59:43,467 INFO HandlerThread:624595 [handler.py:handle_request_defer():141] handle defer: 2 +2021-07-15 00:59:43,468 DEBUG SenderThread:624595 [sender.py:send_request():193] send_request: defer +2021-07-15 00:59:43,468 INFO SenderThread:624595 [sender.py:send_request_defer():304] handle sender defer: 2 +2021-07-15 00:59:43,468 INFO SenderThread:624595 [sender.py:transition_state():308] send defer: 3 +2021-07-15 00:59:43,468 DEBUG HandlerThread:624595 [handler.py:handle_request():124] handle_request: defer +2021-07-15 00:59:43,468 INFO HandlerThread:624595 [handler.py:handle_request_defer():141] handle defer: 3 +2021-07-15 00:59:43,468 DEBUG SenderThread:624595 [sender.py:send():179] send: summary +2021-07-15 00:59:43,468 INFO SenderThread:624595 [sender.py:_save_file():841] saving file wandb-summary.json with policy end +2021-07-15 00:59:43,468 DEBUG SenderThread:624595 [sender.py:send_request():193] send_request: defer +2021-07-15 00:59:43,469 INFO SenderThread:624595 [sender.py:send_request_defer():304] handle sender defer: 3 +2021-07-15 00:59:43,469 INFO SenderThread:624595 [sender.py:transition_state():308] send defer: 4 +2021-07-15 00:59:43,469 DEBUG HandlerThread:624595 [handler.py:handle_request():124] handle_request: defer +2021-07-15 00:59:43,469 INFO HandlerThread:624595 [handler.py:handle_request_defer():141] handle defer: 4 +2021-07-15 00:59:43,469 DEBUG SenderThread:624595 [sender.py:send_request():193] send_request: defer +2021-07-15 00:59:43,469 INFO SenderThread:624595 [sender.py:send_request_defer():304] handle sender defer: 4 +2021-07-15 00:59:43,498 DEBUG HandlerThread:624595 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 00:59:43,675 INFO SenderThread:624595 [sender.py:transition_state():308] send defer: 5 +2021-07-15 00:59:43,675 DEBUG SenderThread:624595 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 00:59:43,676 DEBUG HandlerThread:624595 [handler.py:handle_request():124] handle_request: defer +2021-07-15 00:59:43,676 INFO HandlerThread:624595 [handler.py:handle_request_defer():141] handle defer: 5 +2021-07-15 00:59:43,676 DEBUG SenderThread:624595 [sender.py:send_request():193] send_request: defer +2021-07-15 00:59:43,676 INFO SenderThread:624595 [sender.py:send_request_defer():304] handle sender defer: 5 +2021-07-15 00:59:43,676 INFO SenderThread:624595 [dir_watcher.py:finish():282] shutting down directory watcher +2021-07-15 00:59:43,707 INFO SenderThread:624595 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_005228-361cvuow/files/config.yaml +2021-07-15 00:59:43,708 INFO SenderThread:624595 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_005228-361cvuow/files/output.log +2021-07-15 00:59:43,708 INFO SenderThread:624595 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_005228-361cvuow/files/wandb-summary.json +2021-07-15 00:59:43,708 INFO SenderThread:624595 [dir_watcher.py:finish():312] scan: /home/dat/pino-roberta-base/wandb/run-20210715_005228-361cvuow/files +2021-07-15 00:59:43,708 INFO SenderThread:624595 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_005228-361cvuow/files/requirements.txt requirements.txt +2021-07-15 00:59:43,708 INFO SenderThread:624595 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_005228-361cvuow/files/output.log output.log +2021-07-15 00:59:43,708 INFO SenderThread:624595 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_005228-361cvuow/files/wandb-metadata.json wandb-metadata.json +2021-07-15 00:59:43,708 INFO SenderThread:624595 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_005228-361cvuow/files/config.yaml config.yaml +2021-07-15 00:59:43,708 INFO SenderThread:624595 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_005228-361cvuow/files/wandb-summary.json wandb-summary.json +2021-07-15 00:59:43,709 INFO SenderThread:624595 [sender.py:transition_state():308] send defer: 6 +2021-07-15 00:59:43,709 DEBUG HandlerThread:624595 [handler.py:handle_request():124] handle_request: defer +2021-07-15 00:59:43,715 INFO HandlerThread:624595 [handler.py:handle_request_defer():141] handle defer: 6 +2021-07-15 00:59:43,720 DEBUG SenderThread:624595 [sender.py:send_request():193] send_request: defer +2021-07-15 00:59:43,720 INFO SenderThread:624595 [sender.py:send_request_defer():304] handle sender defer: 6 +2021-07-15 00:59:43,721 INFO SenderThread:624595 [file_pusher.py:finish():177] shutting down file pusher +2021-07-15 00:59:43,778 DEBUG HandlerThread:624595 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 00:59:43,778 DEBUG SenderThread:624595 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 00:59:43,880 DEBUG HandlerThread:624595 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 00:59:43,880 DEBUG SenderThread:624595 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 00:59:43,982 DEBUG HandlerThread:624595 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 00:59:43,982 DEBUG SenderThread:624595 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 00:59:44,084 DEBUG HandlerThread:624595 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 00:59:44,084 DEBUG SenderThread:624595 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 00:59:44,146 INFO Thread-13 :624595 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210715_005228-361cvuow/files/output.log +2021-07-15 00:59:44,161 INFO Thread-15 :624595 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210715_005228-361cvuow/files/wandb-summary.json +2021-07-15 00:59:44,186 DEBUG HandlerThread:624595 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 00:59:44,187 DEBUG SenderThread:624595 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 00:59:44,194 INFO Thread-12 :624595 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210715_005228-361cvuow/files/requirements.txt +2021-07-15 00:59:44,198 INFO Thread-14 :624595 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210715_005228-361cvuow/files/config.yaml +2021-07-15 00:59:44,289 DEBUG HandlerThread:624595 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 00:59:44,289 DEBUG SenderThread:624595 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 00:59:44,391 DEBUG HandlerThread:624595 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 00:59:44,391 DEBUG SenderThread:624595 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 00:59:44,399 INFO Thread-7 :624595 [sender.py:transition_state():308] send defer: 7 +2021-07-15 00:59:44,399 DEBUG HandlerThread:624595 [handler.py:handle_request():124] handle_request: defer +2021-07-15 00:59:44,399 INFO HandlerThread:624595 [handler.py:handle_request_defer():141] handle defer: 7 +2021-07-15 00:59:44,399 DEBUG SenderThread:624595 [sender.py:send_request():193] send_request: defer +2021-07-15 00:59:44,400 INFO SenderThread:624595 [sender.py:send_request_defer():304] handle sender defer: 7 +2021-07-15 00:59:44,493 DEBUG HandlerThread:624595 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 00:59:44,686 INFO SenderThread:624595 [sender.py:transition_state():308] send defer: 8 +2021-07-15 00:59:44,687 DEBUG SenderThread:624595 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 00:59:44,687 DEBUG HandlerThread:624595 [handler.py:handle_request():124] handle_request: defer +2021-07-15 00:59:44,687 INFO HandlerThread:624595 [handler.py:handle_request_defer():141] handle defer: 8 +2021-07-15 00:59:44,687 DEBUG SenderThread:624595 [sender.py:send_request():193] send_request: defer +2021-07-15 00:59:44,688 INFO SenderThread:624595 [sender.py:send_request_defer():304] handle sender defer: 8 +2021-07-15 00:59:44,688 INFO SenderThread:624595 [sender.py:transition_state():308] send defer: 9 +2021-07-15 00:59:44,688 DEBUG SenderThread:624595 [sender.py:send():179] send: final +2021-07-15 00:59:44,688 DEBUG SenderThread:624595 [sender.py:send():179] send: footer +2021-07-15 00:59:44,688 DEBUG HandlerThread:624595 [handler.py:handle_request():124] handle_request: defer +2021-07-15 00:59:44,689 INFO HandlerThread:624595 [handler.py:handle_request_defer():141] handle defer: 9 +2021-07-15 00:59:44,689 DEBUG SenderThread:624595 [sender.py:send_request():193] send_request: defer +2021-07-15 00:59:44,689 INFO SenderThread:624595 [sender.py:send_request_defer():304] handle sender defer: 9 +2021-07-15 00:59:44,789 DEBUG HandlerThread:624595 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 00:59:44,789 DEBUG SenderThread:624595 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 00:59:44,789 INFO SenderThread:624595 [file_pusher.py:join():182] waiting for file pusher +2021-07-15 00:59:44,791 DEBUG HandlerThread:624595 [handler.py:handle_request():124] handle_request: get_summary +2021-07-15 00:59:44,791 DEBUG HandlerThread:624595 [handler.py:handle_request():124] handle_request: sampled_history +2021-07-15 00:59:44,792 DEBUG HandlerThread:624595 [handler.py:handle_request():124] handle_request: shutdown +2021-07-15 00:59:44,792 INFO HandlerThread:624595 [handler.py:finish():638] shutting down handler +2021-07-15 00:59:45,689 INFO WriterThread:624595 [datastore.py:close():288] close: /home/dat/pino-roberta-base/wandb/run-20210715_005228-361cvuow/run-361cvuow.wandb +2021-07-15 00:59:45,789 INFO SenderThread:624595 [sender.py:finish():945] shutting down sender +2021-07-15 00:59:45,790 INFO SenderThread:624595 [file_pusher.py:finish():177] shutting down file pusher +2021-07-15 00:59:45,790 INFO SenderThread:624595 [file_pusher.py:join():182] waiting for file pusher +2021-07-15 00:59:45,792 INFO MainThread:624595 [internal.py:handle_exit():78] Internal process exited diff --git a/wandb/run-20210715_005228-361cvuow/logs/debug.log b/wandb/run-20210715_005228-361cvuow/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..bef0300bc067be796edf2966712bb852253cec58 --- /dev/null +++ b/wandb/run-20210715_005228-361cvuow/logs/debug.log @@ -0,0 +1,119 @@ +2021-07-15 00:52:28,637 INFO MainThread:623339 [wandb_setup.py:_flush():69] setting env: {} +2021-07-15 00:52:28,638 INFO MainThread:623339 [wandb_setup.py:_flush():69] setting login settings: {} +2021-07-15 00:52:28,638 INFO MainThread:623339 [wandb_init.py:_log_setup():337] Logging user logs to /home/dat/pino-roberta-base/wandb/run-20210715_005228-361cvuow/logs/debug.log +2021-07-15 00:52:28,638 INFO MainThread:623339 [wandb_init.py:_log_setup():338] Logging internal logs to /home/dat/pino-roberta-base/wandb/run-20210715_005228-361cvuow/logs/debug-internal.log +2021-07-15 00:52:28,638 INFO MainThread:623339 [wandb_init.py:init():370] calling init triggers +2021-07-15 00:52:28,638 INFO MainThread:623339 [wandb_init.py:init():375] wandb.init called with sweep_config: {} +config: {} +2021-07-15 00:52:28,638 INFO MainThread:623339 [wandb_init.py:init():419] starting backend +2021-07-15 00:52:28,638 INFO MainThread:623339 [backend.py:_multiprocessing_setup():70] multiprocessing start_methods=fork,spawn,forkserver, using: spawn +2021-07-15 00:52:28,684 INFO MainThread:623339 [backend.py:ensure_launched():135] starting backend process... +2021-07-15 00:52:28,730 INFO MainThread:623339 [backend.py:ensure_launched():139] started backend process with pid: 624595 +2021-07-15 00:52:28,732 INFO MainThread:623339 [wandb_init.py:init():424] backend started and connected +2021-07-15 00:52:28,735 INFO MainThread:623339 [wandb_init.py:init():472] updated telemetry +2021-07-15 00:52:28,736 INFO MainThread:623339 [wandb_init.py:init():491] communicating current version +2021-07-15 00:52:29,367 INFO MainThread:623339 [wandb_init.py:init():496] got version response +2021-07-15 00:52:29,367 INFO MainThread:623339 [wandb_init.py:init():504] communicating run to backend with 30 second timeout +2021-07-15 00:52:29,550 INFO MainThread:623339 [wandb_init.py:init():529] starting run threads in backend +2021-07-15 00:52:30,677 INFO MainThread:623339 [wandb_run.py:_console_start():1623] atexit reg +2021-07-15 00:52:30,678 INFO MainThread:623339 [wandb_run.py:_redirect():1497] redirect: SettingsConsole.REDIRECT +2021-07-15 00:52:30,678 INFO MainThread:623339 [wandb_run.py:_redirect():1502] Redirecting console. +2021-07-15 00:52:30,680 INFO MainThread:623339 [wandb_run.py:_redirect():1558] Redirects installed. +2021-07-15 00:52:30,680 INFO MainThread:623339 [wandb_init.py:init():554] run started, returning control to user process +2021-07-15 00:52:30,686 INFO MainThread:623339 [wandb_run.py:_config_callback():872] config_cb None None {'output_dir': './', 'overwrite_output_dir': True, 'do_train': False, 'do_eval': False, 'do_predict': False, 'evaluation_strategy': 'IntervalStrategy.NO', 'prediction_loss_only': False, 'per_device_train_batch_size': 2, 'per_device_eval_batch_size': 2, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 4, 'eval_accumulation_steps': None, 'learning_rate': 5e-05, 'weight_decay': 0.0095, 'adam_beta1': 0.9, 'adam_beta2': 0.98, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 5.0, 'max_steps': -1, 'lr_scheduler_type': 'SchedulerType.LINEAR', 'warmup_ratio': 0.0, 'warmup_steps': 5000, 'log_level': -1, 'log_level_replica': -1, 'log_on_each_node': True, 'logging_dir': './runs/Jul15_00-52-20_t1v-n-f5c06ea1-w-0', 'logging_strategy': 'IntervalStrategy.STEPS', 'logging_first_step': False, 'logging_steps': 50, 'save_strategy': 'IntervalStrategy.STEPS', 'save_steps': 20000, 'save_total_limit': 5, 'save_on_each_node': False, 'no_cuda': False, 'seed': 42, 'fp16': False, 'fp16_opt_level': 'O1', 'fp16_backend': 'auto', 'fp16_full_eval': False, 'local_rank': -1, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 20000, 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': './', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'sharded_ddp': [], 'deepspeed': None, 'label_smoothing_factor': 0.0, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'dataloader_pin_memory': True, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'push_to_hub_model_id': '', 'push_to_hub_organization': None, 'push_to_hub_token': None, 'mp_parameters': ''} +2021-07-15 00:52:30,688 INFO MainThread:623339 [wandb_run.py:_config_callback():872] config_cb None None {'model_name_or_path': None, 'model_type': 'big_bird', 'config_name': './', 'tokenizer_name': './', 'cache_dir': None, 'use_fast_tokenizer': True, 'dtype': 'float32', 'save_optimizer': True} +2021-07-15 00:52:30,689 INFO MainThread:623339 [wandb_run.py:_config_callback():872] config_cb None None {'dataset_name': None, 'dataset_config_name': None, 'train_ref_file': None, 'validation_ref_file': None, 'overwrite_cache': False, 'validation_split_percentage': 5, 'max_seq_length': 4096, 'preprocessing_num_workers': 96, 'mlm_probability': 0.15, 'pad_to_max_length': False, 'line_by_line': False, 'max_eval_samples': 1000} +2021-07-15 00:59:41,158 INFO MainThread:623339 [wandb_run.py:_atexit_cleanup():1593] got exitcode: 1 +2021-07-15 00:59:41,159 INFO MainThread:623339 [wandb_run.py:_restore():1565] restore +2021-07-15 00:59:43,396 INFO MainThread:623339 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 1 +} +pusher_stats { + uploaded_bytes: 1400 + total_bytes: 1400 +} + +2021-07-15 00:59:43,676 INFO MainThread:623339 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 1 +} +pusher_stats { + uploaded_bytes: 1400 + total_bytes: 1400 +} + +2021-07-15 00:59:43,779 INFO MainThread:623339 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 1400 + total_bytes: 11562 +} + +2021-07-15 00:59:43,881 INFO MainThread:623339 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 11562 + total_bytes: 11562 +} + +2021-07-15 00:59:43,983 INFO MainThread:623339 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 11562 + total_bytes: 11562 +} + +2021-07-15 00:59:44,085 INFO MainThread:623339 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 11562 + total_bytes: 11562 +} + +2021-07-15 00:59:44,187 INFO MainThread:623339 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 11562 + total_bytes: 11562 +} + +2021-07-15 00:59:44,290 INFO MainThread:623339 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 11562 + total_bytes: 11562 +} + +2021-07-15 00:59:44,392 INFO MainThread:623339 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 11562 + total_bytes: 11562 +} + +2021-07-15 00:59:44,687 INFO MainThread:623339 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 11562 + total_bytes: 11562 +} + +2021-07-15 00:59:44,790 INFO MainThread:623339 [wandb_run.py:_wait_for_finish():1715] got exit ret: done: true +exit_result { +} +file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 11562 + total_bytes: 11562 +} + +2021-07-15 00:59:46,069 INFO MainThread:623339 [wandb_run.py:_show_files():1937] logging synced files diff --git a/wandb/run-20210715_005228-361cvuow/run-361cvuow.wandb b/wandb/run-20210715_005228-361cvuow/run-361cvuow.wandb new file mode 100644 index 0000000000000000000000000000000000000000..11b78fce46ebc96035ece61d2101da90f42be8f3 Binary files /dev/null and b/wandb/run-20210715_005228-361cvuow/run-361cvuow.wandb differ diff --git a/wandb/run-20210715_010038-2gdygcu4/files/config.yaml b/wandb/run-20210715_010038-2gdygcu4/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..de0d732f97aa04800d8621b9b0f532cca45e767a --- /dev/null +++ b/wandb/run-20210715_010038-2gdygcu4/files/config.yaml @@ -0,0 +1,307 @@ +wandb_version: 1 + +_wandb: + desc: null + value: + cli_version: 0.10.33 + framework: huggingface + huggingface_version: 4.9.0.dev0 + is_jupyter_run: false + is_kaggle_kernel: false + python_version: 3.8.10 + t: + 1: + - 3 + - 11 + 2: + - 3 + - 11 + 4: 3.8.10 + 5: 0.10.33 + 6: 4.9.0.dev0 + 8: + - 5 +adafactor: + desc: null + value: false +adam_beta1: + desc: null + value: 0.9 +adam_beta2: + desc: null + value: 0.98 +adam_epsilon: + desc: null + value: 1.0e-08 +cache_dir: + desc: null + value: null +config_name: + desc: null + value: ./ +dataloader_drop_last: + desc: null + value: false +dataloader_num_workers: + desc: null + value: 0 +dataloader_pin_memory: + desc: null + value: true +dataset_config_name: + desc: null + value: null +dataset_name: + desc: null + value: null +ddp_find_unused_parameters: + desc: null + value: null +debug: + desc: null + value: [] +deepspeed: + desc: null + value: null +disable_tqdm: + desc: null + value: false +do_eval: + desc: null + value: false +do_predict: + desc: null + value: false +do_train: + desc: null + value: false +dtype: + desc: null + value: float32 +eval_accumulation_steps: + desc: null + value: null +eval_steps: + desc: null + value: 20000 +evaluation_strategy: + desc: null + value: IntervalStrategy.NO +fp16: + desc: null + value: false +fp16_backend: + desc: null + value: auto +fp16_full_eval: + desc: null + value: false +fp16_opt_level: + desc: null + value: O1 +gradient_accumulation_steps: + desc: null + value: 4 +greater_is_better: + desc: null + value: null +group_by_length: + desc: null + value: false +ignore_data_skip: + desc: null + value: false +label_names: + desc: null + value: null +label_smoothing_factor: + desc: null + value: 0.0 +learning_rate: + desc: null + value: 5.0e-05 +length_column_name: + desc: null + value: length +line_by_line: + desc: null + value: false +load_best_model_at_end: + desc: null + value: false +local_rank: + desc: null + value: -1 +log_level: + desc: null + value: -1 +log_level_replica: + desc: null + value: -1 +log_on_each_node: + desc: null + value: true +logging_dir: + desc: null + value: ./runs/Jul15_01-00-30_t1v-n-f5c06ea1-w-0 +logging_first_step: + desc: null + value: false +logging_steps: + desc: null + value: 50 +logging_strategy: + desc: null + value: IntervalStrategy.STEPS +lr_scheduler_type: + desc: null + value: SchedulerType.LINEAR +max_eval_samples: + desc: null + value: 1000 +max_grad_norm: + desc: null + value: 1.0 +max_seq_length: + desc: null + value: 4096 +max_steps: + desc: null + value: -1 +metric_for_best_model: + desc: null + value: null +mlm_probability: + desc: null + value: 0.15 +model_name_or_path: + desc: null + value: null +model_type: + desc: null + value: big_bird +mp_parameters: + desc: null + value: '' +no_cuda: + desc: null + value: false +num_train_epochs: + desc: null + value: 5.0 +output_dir: + desc: null + value: ./ +overwrite_cache: + desc: null + value: false +overwrite_output_dir: + desc: null + value: true +pad_to_max_length: + desc: null + value: false +past_index: + desc: null + value: -1 +per_device_eval_batch_size: + desc: null + value: 2 +per_device_train_batch_size: + desc: null + value: 2 +per_gpu_eval_batch_size: + desc: null + value: null +per_gpu_train_batch_size: + desc: null + value: null +prediction_loss_only: + desc: null + value: false +preprocessing_num_workers: + desc: null + value: 96 +push_to_hub: + desc: null + value: true +push_to_hub_model_id: + desc: null + value: '' +push_to_hub_organization: + desc: null + value: null +push_to_hub_token: + desc: null + value: null +remove_unused_columns: + desc: null + value: true +report_to: + desc: null + value: + - tensorboard + - wandb +resume_from_checkpoint: + desc: null + value: null +run_name: + desc: null + value: ./ +save_on_each_node: + desc: null + value: false +save_optimizer: + desc: null + value: true +save_steps: + desc: null + value: 20000 +save_strategy: + desc: null + value: IntervalStrategy.STEPS +save_total_limit: + desc: null + value: 5 +seed: + desc: null + value: 42 +sharded_ddp: + desc: null + value: [] +skip_memory_metrics: + desc: null + value: true +tokenizer_name: + desc: null + value: ./ +tpu_metrics_debug: + desc: null + value: false +tpu_num_cores: + desc: null + value: null +train_ref_file: + desc: null + value: null +use_fast_tokenizer: + desc: null + value: true +use_legacy_prediction_loop: + desc: null + value: false +validation_ref_file: + desc: null + value: null +validation_split_percentage: + desc: null + value: 5 +warmup_ratio: + desc: null + value: 0.0 +warmup_steps: + desc: null + value: 5000 +weight_decay: + desc: null + value: 0.0095 diff --git a/wandb/run-20210715_010038-2gdygcu4/files/output.log b/wandb/run-20210715_010038-2gdygcu4/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..d6a43676343722cb2f3e6cf3abd44404aebec230 --- /dev/null +++ b/wandb/run-20210715_010038-2gdygcu4/files/output.log @@ -0,0 +1,39 @@ +/home/dat/pino/lib/python3.8/site-packages/jax/_src/numpy/lax_numpy.py:3132: UserWarning: Explicitly requested dtype requested in zeros is not available, and will be truncated to dtype int32. To enable more dtypes, set the jax_enable_x64 configuration option or the JAX_ENABLE_X64 shell environment variable. See https://github.com/google/jax#current-gotchas for more. + lax._check_user_dtype_supported(dtype, "zeros") +/home/dat/pino/lib/python3.8/site-packages/jax/lib/xla_bridge.py:386: UserWarning: jax.host_count has been renamed to jax.process_count. This alias will eventually be removed; please update your code. + warnings.warn( +/home/dat/pino/lib/python3.8/site-packages/jax/lib/xla_bridge.py:373: UserWarning: jax.host_id has been renamed to jax.process_index. This alias will eventually be removed; please update your code. + warnings.warn( +Epoch ... (1/5): 0%| | 0/5 [00:00 + state, train_metric, dropout_rngs = p_train_step(state, model_inputs, dropout_rngs) + File "/home/dat/pino/lib/python3.8/site-packages/jax/_src/traceback_util.py", line 183, in reraise_with_filtered_traceback + return fun(*args, **kwargs) + File "/home/dat/pino/lib/python3.8/site-packages/jax/_src/api.py", line 1669, in f_pmapped + out = pxla.xla_pmap( + File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 1620, in bind + return call_bind(self, fun, *args, **params) + File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 1551, in call_bind + outs = primitive.process(top_trace, fun, tracers, params) + File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 1623, in process + return trace.process_map(self, fun, tracers, params) + File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 606, in process_call + return primitive.impl(f, *tracers, **params) + File "/home/dat/pino/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 637, in xla_pmap_impl + return compiled_fun(*args) + File "/home/dat/pino/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 1159, in execute_replicated + out_bufs = compiled.execute_sharded_on_local_devices(input_bufs) +jax._src.traceback_util.UnfilteredStackTrace: RuntimeError: Resource exhausted: Attempting to reserve 12.60G at the bottom of memory. That was not possible. There are 8.94G free, 0B reserved, and 8.90G reservable.: while running replica 0 and partition 0 of a replicated computation (other replicas may have failed as well). +The stack trace below excludes JAX-internal frames. +The preceding is the original exception that occurred, unmodified. +-------------------- +The above exception was the direct cause of the following exception: +Traceback (most recent call last): + File "./run_mlm_flax.py", line 815, in + state, train_metric, dropout_rngs = p_train_step(state, model_inputs, dropout_rngs) + File "/home/dat/pino/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 1159, in execute_replicated + out_bufs = compiled.execute_sharded_on_local_devices(input_bufs) +RuntimeError: Resource exhausted: Attempting to reserve 12.60G at the bottom of memory. That was not possible. There are 8.94G free, 0B reserved, and 8.90G reservable.: while running replica 0 and partition 0 of a replicated computation (other replicas may have failed as well). \ No newline at end of file diff --git a/wandb/run-20210715_010038-2gdygcu4/files/requirements.txt b/wandb/run-20210715_010038-2gdygcu4/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..554e3a34bc91714a8462f65295a12e9a04537637 --- /dev/null +++ b/wandb/run-20210715_010038-2gdygcu4/files/requirements.txt @@ -0,0 +1,94 @@ +absl-py==0.13.0 +aiohttp==3.7.4.post0 +astunparse==1.6.3 +async-timeout==3.0.1 +attrs==21.2.0 +cachetools==4.2.2 +certifi==2021.5.30 +chardet==4.0.0 +charset-normalizer==2.0.1 +chex==0.0.8 +click==8.0.1 +configparser==5.0.2 +cycler==0.10.0 +datasets==1.9.1.dev0 +dill==0.3.4 +dm-tree==0.1.6 +docker-pycreds==0.4.0 +filelock==3.0.12 +flatbuffers==1.12 +flax==0.3.4 +fsspec==2021.7.0 +gast==0.4.0 +gitdb==4.0.7 +gitpython==3.1.18 +google-auth-oauthlib==0.4.4 +google-auth==1.32.1 +google-pasta==0.2.0 +grpcio==1.34.1 +h5py==3.1.0 +huggingface-hub==0.0.12 +idna==3.2 +install==1.3.4 +jax==0.2.17 +jaxlib==0.1.68 +joblib==1.0.1 +keras-nightly==2.5.0.dev2021032900 +keras-preprocessing==1.1.2 +kiwisolver==1.3.1 +libtpu-nightly==0.1.dev20210615 +markdown==3.3.4 +matplotlib==3.4.2 +msgpack==1.0.2 +multidict==5.1.0 +multiprocess==0.70.12.2 +numpy==1.19.5 +oauthlib==3.1.1 +opt-einsum==3.3.0 +optax==0.0.9 +packaging==21.0 +pandas==1.3.0 +pathtools==0.1.2 +pillow==8.3.1 +pip==20.0.2 +pkg-resources==0.0.0 +promise==2.3 +protobuf==3.17.3 +psutil==5.8.0 +pyarrow==4.0.1 +pyasn1-modules==0.2.8 +pyasn1==0.4.8 +pyparsing==2.4.7 +python-dateutil==2.8.1 +pytz==2021.1 +pyyaml==5.4.1 +regex==2021.7.6 +requests-oauthlib==1.3.0 +requests==2.26.0 +rsa==4.7.2 +sacremoses==0.0.45 +scipy==1.7.0 +sentry-sdk==1.3.0 +setuptools==44.0.0 +shortuuid==1.0.1 +six==1.15.0 +smmap==4.0.0 +subprocess32==3.5.4 +tensorboard-data-server==0.6.1 +tensorboard-plugin-wit==1.8.0 +tensorboard==2.5.0 +tensorflow-estimator==2.5.0 +tensorflow==2.5.0 +termcolor==1.1.0 +tokenizers==0.10.3 +toolz==0.11.1 +tqdm==4.61.2 +transformers==4.9.0.dev0 +typing-extensions==3.7.4.3 +urllib3==1.26.6 +wandb==0.10.33 +werkzeug==2.0.1 +wheel==0.36.2 +wrapt==1.12.1 +xxhash==2.0.2 +yarl==1.6.3 \ No newline at end of file diff --git a/wandb/run-20210715_010038-2gdygcu4/files/wandb-metadata.json b/wandb/run-20210715_010038-2gdygcu4/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..3d2ef211fb137ddb9c277a75eca9adb24447f316 --- /dev/null +++ b/wandb/run-20210715_010038-2gdygcu4/files/wandb-metadata.json @@ -0,0 +1,45 @@ +{ + "os": "Linux-5.4.0-1043-gcp-x86_64-with-glibc2.29", + "python": "3.8.10", + "heartbeatAt": "2021-07-15T01:00:40.314950", + "startedAt": "2021-07-15T01:00:38.322778", + "docker": null, + "cpu_count": 96, + "cuda": null, + "args": [ + "--push_to_hub", + "--output_dir=./", + "--model_type=big_bird", + "--config_name=./", + "--tokenizer_name=./", + "--max_seq_length=4096", + "--weight_decay=0.0095", + "--warmup_steps=5000", + "--overwrite_output_dir", + "--adam_beta1=0.9", + "--adam_beta2=0.98", + "--logging_steps=50", + "--eval_steps=20000", + "--num_train_epochs=5", + "--preprocessing_num_workers=96", + "--save_steps=20000", + "--learning_rate=5e-5", + "--per_device_train_batch_size=2", + "--per_device_eval_batch_size=2", + "--save_total_limit=5", + "--max_eval_samples=1000", + "--gradient_accumulation_steps=4" + ], + "state": "running", + "program": "./run_mlm_flax.py", + "codePath": "run_mlm_flax.py", + "git": { + "remote": "https://huggingface.co/flax-community/pino-roberta-base", + "commit": "9915204ae8501d6ae8425aa8f0b4b290cbc7e5b6" + }, + "email": null, + "root": "/home/dat/pino-roberta-base", + "host": "t1v-n-f5c06ea1-w-0", + "username": "dat", + "executable": "/home/dat/pino/bin/python" +} diff --git a/wandb/run-20210715_010038-2gdygcu4/files/wandb-summary.json b/wandb/run-20210715_010038-2gdygcu4/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/wandb/run-20210715_010038-2gdygcu4/files/wandb-summary.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/wandb/run-20210715_010038-2gdygcu4/logs/debug-internal.log b/wandb/run-20210715_010038-2gdygcu4/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..38e3624db667d91d79b632e09bf7bae7e65e1f69 --- /dev/null +++ b/wandb/run-20210715_010038-2gdygcu4/logs/debug-internal.log @@ -0,0 +1,203 @@ +2021-07-15 01:00:39,018 INFO MainThread:626669 [internal.py:wandb_internal():88] W&B internal server running at pid: 626669, started at: 2021-07-15 01:00:39.017812 +2021-07-15 01:00:39,020 DEBUG HandlerThread:626669 [handler.py:handle_request():124] handle_request: check_version +2021-07-15 01:00:39,020 INFO WriterThread:626669 [datastore.py:open_for_write():80] open: /home/dat/pino-roberta-base/wandb/run-20210715_010038-2gdygcu4/run-2gdygcu4.wandb +2021-07-15 01:00:39,021 DEBUG SenderThread:626669 [sender.py:send():179] send: header +2021-07-15 01:00:39,021 DEBUG SenderThread:626669 [sender.py:send_request():193] send_request: check_version +2021-07-15 01:00:39,055 DEBUG SenderThread:626669 [sender.py:send():179] send: run +2021-07-15 01:00:39,219 INFO SenderThread:626669 [dir_watcher.py:__init__():168] watching files in: /home/dat/pino-roberta-base/wandb/run-20210715_010038-2gdygcu4/files +2021-07-15 01:00:39,219 INFO SenderThread:626669 [sender.py:_start_run_threads():716] run started: 2gdygcu4 with start time 1626310838 +2021-07-15 01:00:39,220 DEBUG SenderThread:626669 [sender.py:send():179] send: summary +2021-07-15 01:00:39,220 INFO SenderThread:626669 [sender.py:_save_file():841] saving file wandb-summary.json with policy end +2021-07-15 01:00:39,220 DEBUG HandlerThread:626669 [handler.py:handle_request():124] handle_request: run_start +2021-07-15 01:00:40,222 INFO Thread-8 :626669 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210715_010038-2gdygcu4/files/wandb-summary.json +2021-07-15 01:00:40,314 DEBUG HandlerThread:626669 [meta.py:__init__():39] meta init +2021-07-15 01:00:40,314 DEBUG HandlerThread:626669 [meta.py:__init__():53] meta init done +2021-07-15 01:00:40,314 DEBUG HandlerThread:626669 [meta.py:probe():210] probe +2021-07-15 01:00:40,316 DEBUG HandlerThread:626669 [meta.py:_setup_git():200] setup git +2021-07-15 01:00:40,345 DEBUG HandlerThread:626669 [meta.py:_setup_git():207] setup git done +2021-07-15 01:00:40,345 DEBUG HandlerThread:626669 [meta.py:_save_pip():57] save pip +2021-07-15 01:00:40,346 DEBUG HandlerThread:626669 [meta.py:_save_pip():71] save pip done +2021-07-15 01:00:40,346 DEBUG HandlerThread:626669 [meta.py:probe():252] probe done +2021-07-15 01:00:40,349 DEBUG SenderThread:626669 [sender.py:send():179] send: files +2021-07-15 01:00:40,349 INFO SenderThread:626669 [sender.py:_save_file():841] saving file wandb-metadata.json with policy now +2021-07-15 01:00:40,354 DEBUG HandlerThread:626669 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:00:40,355 DEBUG SenderThread:626669 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:00:40,481 DEBUG SenderThread:626669 [sender.py:send():179] send: config +2021-07-15 01:00:40,482 DEBUG SenderThread:626669 [sender.py:send():179] send: config +2021-07-15 01:00:40,482 DEBUG SenderThread:626669 [sender.py:send():179] send: config +2021-07-15 01:00:40,774 INFO Thread-11 :626669 [upload_job.py:push():137] Uploaded file /tmp/tmpwx_o6qsuwandb/316agsp6-wandb-metadata.json +2021-07-15 01:00:41,221 INFO Thread-8 :626669 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210715_010038-2gdygcu4/files/requirements.txt +2021-07-15 01:00:41,221 INFO Thread-8 :626669 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210715_010038-2gdygcu4/files/wandb-metadata.json +2021-07-15 01:00:41,221 INFO Thread-8 :626669 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210715_010038-2gdygcu4/files/output.log +2021-07-15 01:00:55,227 INFO Thread-8 :626669 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_010038-2gdygcu4/files/output.log +2021-07-15 01:00:55,483 DEBUG HandlerThread:626669 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:00:55,484 DEBUG SenderThread:626669 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:01:08,399 DEBUG SenderThread:626669 [sender.py:send():179] send: stats +2021-07-15 01:01:10,233 INFO Thread-8 :626669 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_010038-2gdygcu4/files/config.yaml +2021-07-15 01:01:10,612 DEBUG HandlerThread:626669 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:01:10,613 DEBUG SenderThread:626669 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:01:25,743 DEBUG HandlerThread:626669 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:01:25,744 DEBUG SenderThread:626669 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:01:38,482 DEBUG SenderThread:626669 [sender.py:send():179] send: stats +2021-07-15 01:01:40,875 DEBUG HandlerThread:626669 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:01:40,875 DEBUG SenderThread:626669 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:01:56,006 DEBUG HandlerThread:626669 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:01:56,006 DEBUG SenderThread:626669 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:02:08,558 DEBUG SenderThread:626669 [sender.py:send():179] send: stats +2021-07-15 01:02:11,138 DEBUG HandlerThread:626669 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:02:11,138 DEBUG SenderThread:626669 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:02:26,268 DEBUG HandlerThread:626669 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:02:26,268 DEBUG SenderThread:626669 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:02:38,636 DEBUG SenderThread:626669 [sender.py:send():179] send: stats +2021-07-15 01:02:41,402 DEBUG HandlerThread:626669 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:02:41,402 DEBUG SenderThread:626669 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:02:56,534 DEBUG HandlerThread:626669 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:02:56,534 DEBUG SenderThread:626669 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:03:08,712 DEBUG SenderThread:626669 [sender.py:send():179] send: stats +2021-07-15 01:03:11,667 DEBUG HandlerThread:626669 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:03:11,668 DEBUG SenderThread:626669 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:03:26,799 DEBUG HandlerThread:626669 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:03:26,800 DEBUG SenderThread:626669 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:03:38,780 DEBUG SenderThread:626669 [sender.py:send():179] send: stats +2021-07-15 01:03:41,932 DEBUG HandlerThread:626669 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:03:41,932 DEBUG SenderThread:626669 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:03:57,065 DEBUG HandlerThread:626669 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:03:57,065 DEBUG SenderThread:626669 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:04:08,851 DEBUG SenderThread:626669 [sender.py:send():179] send: stats +2021-07-15 01:04:12,197 DEBUG HandlerThread:626669 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:04:12,197 DEBUG SenderThread:626669 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:04:27,326 DEBUG HandlerThread:626669 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:04:27,327 DEBUG SenderThread:626669 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:04:38,928 DEBUG SenderThread:626669 [sender.py:send():179] send: stats +2021-07-15 01:04:42,462 DEBUG HandlerThread:626669 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:04:42,463 DEBUG SenderThread:626669 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:04:57,592 DEBUG HandlerThread:626669 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:04:57,593 DEBUG SenderThread:626669 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:05:08,987 DEBUG SenderThread:626669 [sender.py:send():179] send: stats +2021-07-15 01:05:12,723 DEBUG HandlerThread:626669 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:05:12,723 DEBUG SenderThread:626669 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:05:27,851 DEBUG HandlerThread:626669 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:05:27,852 DEBUG SenderThread:626669 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:05:39,058 DEBUG SenderThread:626669 [sender.py:send():179] send: stats +2021-07-15 01:05:39,359 INFO Thread-8 :626669 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_010038-2gdygcu4/files/output.log +2021-07-15 01:05:43,032 DEBUG HandlerThread:626669 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:05:43,032 DEBUG SenderThread:626669 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:05:58,189 DEBUG HandlerThread:626669 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:05:58,190 DEBUG SenderThread:626669 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:06:09,137 DEBUG SenderThread:626669 [sender.py:send():179] send: stats +2021-07-15 01:06:13,331 DEBUG HandlerThread:626669 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:06:13,331 DEBUG SenderThread:626669 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:06:28,461 DEBUG HandlerThread:626669 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:06:28,461 DEBUG SenderThread:626669 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:06:39,220 DEBUG SenderThread:626669 [sender.py:send():179] send: stats +2021-07-15 01:06:43,596 DEBUG HandlerThread:626669 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:06:43,596 DEBUG SenderThread:626669 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:06:58,728 DEBUG HandlerThread:626669 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:06:58,728 DEBUG SenderThread:626669 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:07:05,299 DEBUG HandlerThread:626669 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 01:07:05,299 DEBUG SenderThread:626669 [sender.py:send():179] send: telemetry +2021-07-15 01:07:05,299 DEBUG SenderThread:626669 [sender.py:send():179] send: exit +2021-07-15 01:07:05,299 INFO SenderThread:626669 [sender.py:send_exit():287] handling exit code: 1 +2021-07-15 01:07:05,299 INFO SenderThread:626669 [sender.py:send_exit():295] send defer +2021-07-15 01:07:05,299 DEBUG SenderThread:626669 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 01:07:05,300 DEBUG HandlerThread:626669 [handler.py:handle_request():124] handle_request: defer +2021-07-15 01:07:05,300 INFO HandlerThread:626669 [handler.py:handle_request_defer():141] handle defer: 0 +2021-07-15 01:07:05,300 DEBUG SenderThread:626669 [sender.py:send_request():193] send_request: defer +2021-07-15 01:07:05,300 INFO SenderThread:626669 [sender.py:send_request_defer():304] handle sender defer: 0 +2021-07-15 01:07:05,300 INFO SenderThread:626669 [sender.py:transition_state():308] send defer: 1 +2021-07-15 01:07:05,300 DEBUG HandlerThread:626669 [handler.py:handle_request():124] handle_request: defer +2021-07-15 01:07:05,301 INFO HandlerThread:626669 [handler.py:handle_request_defer():141] handle defer: 1 +2021-07-15 01:07:05,385 DEBUG SenderThread:626669 [sender.py:send_request():193] send_request: defer +2021-07-15 01:07:05,385 INFO SenderThread:626669 [sender.py:send_request_defer():304] handle sender defer: 1 +2021-07-15 01:07:05,385 INFO SenderThread:626669 [sender.py:transition_state():308] send defer: 2 +2021-07-15 01:07:05,385 DEBUG SenderThread:626669 [sender.py:send():179] send: stats +2021-07-15 01:07:05,386 DEBUG HandlerThread:626669 [handler.py:handle_request():124] handle_request: defer +2021-07-15 01:07:05,386 INFO HandlerThread:626669 [handler.py:handle_request_defer():141] handle defer: 2 +2021-07-15 01:07:05,386 DEBUG SenderThread:626669 [sender.py:send_request():193] send_request: defer +2021-07-15 01:07:05,386 INFO SenderThread:626669 [sender.py:send_request_defer():304] handle sender defer: 2 +2021-07-15 01:07:05,386 INFO SenderThread:626669 [sender.py:transition_state():308] send defer: 3 +2021-07-15 01:07:05,386 DEBUG HandlerThread:626669 [handler.py:handle_request():124] handle_request: defer +2021-07-15 01:07:05,386 INFO HandlerThread:626669 [handler.py:handle_request_defer():141] handle defer: 3 +2021-07-15 01:07:05,386 DEBUG SenderThread:626669 [sender.py:send():179] send: summary +2021-07-15 01:07:05,387 INFO SenderThread:626669 [sender.py:_save_file():841] saving file wandb-summary.json with policy end +2021-07-15 01:07:05,387 DEBUG SenderThread:626669 [sender.py:send_request():193] send_request: defer +2021-07-15 01:07:05,387 INFO SenderThread:626669 [sender.py:send_request_defer():304] handle sender defer: 3 +2021-07-15 01:07:05,387 INFO SenderThread:626669 [sender.py:transition_state():308] send defer: 4 +2021-07-15 01:07:05,387 DEBUG HandlerThread:626669 [handler.py:handle_request():124] handle_request: defer +2021-07-15 01:07:05,387 INFO HandlerThread:626669 [handler.py:handle_request_defer():141] handle defer: 4 +2021-07-15 01:07:05,387 DEBUG SenderThread:626669 [sender.py:send_request():193] send_request: defer +2021-07-15 01:07:05,387 INFO SenderThread:626669 [sender.py:send_request_defer():304] handle sender defer: 4 +2021-07-15 01:07:05,393 INFO Thread-8 :626669 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_010038-2gdygcu4/files/wandb-summary.json +2021-07-15 01:07:05,393 INFO Thread-8 :626669 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_010038-2gdygcu4/files/output.log +2021-07-15 01:07:05,401 DEBUG HandlerThread:626669 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 01:07:05,571 INFO SenderThread:626669 [sender.py:transition_state():308] send defer: 5 +2021-07-15 01:07:05,571 DEBUG SenderThread:626669 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 01:07:05,572 DEBUG HandlerThread:626669 [handler.py:handle_request():124] handle_request: defer +2021-07-15 01:07:05,572 INFO HandlerThread:626669 [handler.py:handle_request_defer():141] handle defer: 5 +2021-07-15 01:07:05,572 DEBUG SenderThread:626669 [sender.py:send_request():193] send_request: defer +2021-07-15 01:07:05,572 INFO SenderThread:626669 [sender.py:send_request_defer():304] handle sender defer: 5 +2021-07-15 01:07:05,572 INFO SenderThread:626669 [dir_watcher.py:finish():282] shutting down directory watcher +2021-07-15 01:07:05,673 DEBUG HandlerThread:626669 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 01:07:06,394 INFO Thread-8 :626669 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_010038-2gdygcu4/files/config.yaml +2021-07-15 01:07:06,394 INFO SenderThread:626669 [dir_watcher.py:finish():312] scan: /home/dat/pino-roberta-base/wandb/run-20210715_010038-2gdygcu4/files +2021-07-15 01:07:06,395 INFO SenderThread:626669 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_010038-2gdygcu4/files/requirements.txt requirements.txt +2021-07-15 01:07:06,395 INFO SenderThread:626669 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_010038-2gdygcu4/files/output.log output.log +2021-07-15 01:07:06,395 INFO SenderThread:626669 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_010038-2gdygcu4/files/wandb-metadata.json wandb-metadata.json +2021-07-15 01:07:06,395 INFO SenderThread:626669 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_010038-2gdygcu4/files/config.yaml config.yaml +2021-07-15 01:07:06,395 INFO SenderThread:626669 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_010038-2gdygcu4/files/wandb-summary.json wandb-summary.json +2021-07-15 01:07:06,395 INFO SenderThread:626669 [sender.py:transition_state():308] send defer: 6 +2021-07-15 01:07:06,396 DEBUG SenderThread:626669 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 01:07:06,399 DEBUG HandlerThread:626669 [handler.py:handle_request():124] handle_request: defer +2021-07-15 01:07:06,399 INFO HandlerThread:626669 [handler.py:handle_request_defer():141] handle defer: 6 +2021-07-15 01:07:06,399 DEBUG SenderThread:626669 [sender.py:send_request():193] send_request: defer +2021-07-15 01:07:06,400 INFO SenderThread:626669 [sender.py:send_request_defer():304] handle sender defer: 6 +2021-07-15 01:07:06,400 INFO SenderThread:626669 [file_pusher.py:finish():177] shutting down file pusher +2021-07-15 01:07:06,501 DEBUG HandlerThread:626669 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 01:07:06,501 DEBUG SenderThread:626669 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 01:07:06,603 DEBUG HandlerThread:626669 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 01:07:06,604 DEBUG SenderThread:626669 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 01:07:06,706 DEBUG HandlerThread:626669 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 01:07:06,706 DEBUG SenderThread:626669 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 01:07:06,808 DEBUG HandlerThread:626669 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 01:07:06,808 DEBUG SenderThread:626669 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 01:07:06,857 INFO Thread-12 :626669 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210715_010038-2gdygcu4/files/requirements.txt +2021-07-15 01:07:06,884 INFO Thread-15 :626669 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210715_010038-2gdygcu4/files/wandb-summary.json +2021-07-15 01:07:06,910 DEBUG HandlerThread:626669 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 01:07:06,910 DEBUG SenderThread:626669 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 01:07:06,920 INFO Thread-14 :626669 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210715_010038-2gdygcu4/files/config.yaml +2021-07-15 01:07:06,968 INFO Thread-13 :626669 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210715_010038-2gdygcu4/files/output.log +2021-07-15 01:07:07,012 DEBUG HandlerThread:626669 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 01:07:07,012 DEBUG SenderThread:626669 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 01:07:07,115 DEBUG HandlerThread:626669 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 01:07:07,115 DEBUG SenderThread:626669 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 01:07:07,169 INFO Thread-7 :626669 [sender.py:transition_state():308] send defer: 7 +2021-07-15 01:07:07,169 DEBUG HandlerThread:626669 [handler.py:handle_request():124] handle_request: defer +2021-07-15 01:07:07,169 INFO HandlerThread:626669 [handler.py:handle_request_defer():141] handle defer: 7 +2021-07-15 01:07:07,170 DEBUG SenderThread:626669 [sender.py:send_request():193] send_request: defer +2021-07-15 01:07:07,170 INFO SenderThread:626669 [sender.py:send_request_defer():304] handle sender defer: 7 +2021-07-15 01:07:07,217 DEBUG HandlerThread:626669 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 01:07:07,450 INFO SenderThread:626669 [sender.py:transition_state():308] send defer: 8 +2021-07-15 01:07:07,450 DEBUG SenderThread:626669 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 01:07:07,451 DEBUG HandlerThread:626669 [handler.py:handle_request():124] handle_request: defer +2021-07-15 01:07:07,451 INFO HandlerThread:626669 [handler.py:handle_request_defer():141] handle defer: 8 +2021-07-15 01:07:07,451 DEBUG SenderThread:626669 [sender.py:send_request():193] send_request: defer +2021-07-15 01:07:07,451 INFO SenderThread:626669 [sender.py:send_request_defer():304] handle sender defer: 8 +2021-07-15 01:07:07,451 INFO SenderThread:626669 [sender.py:transition_state():308] send defer: 9 +2021-07-15 01:07:07,452 DEBUG SenderThread:626669 [sender.py:send():179] send: final +2021-07-15 01:07:07,452 DEBUG HandlerThread:626669 [handler.py:handle_request():124] handle_request: defer +2021-07-15 01:07:07,452 INFO HandlerThread:626669 [handler.py:handle_request_defer():141] handle defer: 9 +2021-07-15 01:07:07,452 DEBUG SenderThread:626669 [sender.py:send():179] send: footer +2021-07-15 01:07:07,452 DEBUG SenderThread:626669 [sender.py:send_request():193] send_request: defer +2021-07-15 01:07:07,452 INFO SenderThread:626669 [sender.py:send_request_defer():304] handle sender defer: 9 +2021-07-15 01:07:07,552 DEBUG HandlerThread:626669 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 01:07:07,552 DEBUG SenderThread:626669 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 01:07:07,553 INFO SenderThread:626669 [file_pusher.py:join():182] waiting for file pusher +2021-07-15 01:07:07,554 DEBUG HandlerThread:626669 [handler.py:handle_request():124] handle_request: get_summary +2021-07-15 01:07:07,555 DEBUG HandlerThread:626669 [handler.py:handle_request():124] handle_request: sampled_history +2021-07-15 01:07:07,555 DEBUG HandlerThread:626669 [handler.py:handle_request():124] handle_request: shutdown +2021-07-15 01:07:07,555 INFO HandlerThread:626669 [handler.py:finish():638] shutting down handler +2021-07-15 01:07:08,453 INFO WriterThread:626669 [datastore.py:close():288] close: /home/dat/pino-roberta-base/wandb/run-20210715_010038-2gdygcu4/run-2gdygcu4.wandb +2021-07-15 01:07:08,553 INFO SenderThread:626669 [sender.py:finish():945] shutting down sender +2021-07-15 01:07:08,553 INFO SenderThread:626669 [file_pusher.py:finish():177] shutting down file pusher +2021-07-15 01:07:08,553 INFO SenderThread:626669 [file_pusher.py:join():182] waiting for file pusher +2021-07-15 01:07:08,555 INFO MainThread:626669 [internal.py:handle_exit():78] Internal process exited diff --git a/wandb/run-20210715_010038-2gdygcu4/logs/debug.log b/wandb/run-20210715_010038-2gdygcu4/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..a6cebc8f60df94ad6ed405c7749d70245416cad1 --- /dev/null +++ b/wandb/run-20210715_010038-2gdygcu4/logs/debug.log @@ -0,0 +1,127 @@ +2021-07-15 01:00:38,324 INFO MainThread:625421 [wandb_setup.py:_flush():69] setting env: {} +2021-07-15 01:00:38,324 INFO MainThread:625421 [wandb_setup.py:_flush():69] setting login settings: {} +2021-07-15 01:00:38,324 INFO MainThread:625421 [wandb_init.py:_log_setup():337] Logging user logs to /home/dat/pino-roberta-base/wandb/run-20210715_010038-2gdygcu4/logs/debug.log +2021-07-15 01:00:38,324 INFO MainThread:625421 [wandb_init.py:_log_setup():338] Logging internal logs to /home/dat/pino-roberta-base/wandb/run-20210715_010038-2gdygcu4/logs/debug-internal.log +2021-07-15 01:00:38,324 INFO MainThread:625421 [wandb_init.py:init():370] calling init triggers +2021-07-15 01:00:38,324 INFO MainThread:625421 [wandb_init.py:init():375] wandb.init called with sweep_config: {} +config: {} +2021-07-15 01:00:38,324 INFO MainThread:625421 [wandb_init.py:init():419] starting backend +2021-07-15 01:00:38,324 INFO MainThread:625421 [backend.py:_multiprocessing_setup():70] multiprocessing start_methods=fork,spawn,forkserver, using: spawn +2021-07-15 01:00:38,373 INFO MainThread:625421 [backend.py:ensure_launched():135] starting backend process... +2021-07-15 01:00:38,419 INFO MainThread:625421 [backend.py:ensure_launched():139] started backend process with pid: 626669 +2021-07-15 01:00:38,421 INFO MainThread:625421 [wandb_init.py:init():424] backend started and connected +2021-07-15 01:00:38,424 INFO MainThread:625421 [wandb_init.py:init():472] updated telemetry +2021-07-15 01:00:38,425 INFO MainThread:625421 [wandb_init.py:init():491] communicating current version +2021-07-15 01:00:39,054 INFO MainThread:625421 [wandb_init.py:init():496] got version response +2021-07-15 01:00:39,054 INFO MainThread:625421 [wandb_init.py:init():504] communicating run to backend with 30 second timeout +2021-07-15 01:00:39,219 INFO MainThread:625421 [wandb_init.py:init():529] starting run threads in backend +2021-07-15 01:00:40,352 INFO MainThread:625421 [wandb_run.py:_console_start():1623] atexit reg +2021-07-15 01:00:40,353 INFO MainThread:625421 [wandb_run.py:_redirect():1497] redirect: SettingsConsole.REDIRECT +2021-07-15 01:00:40,353 INFO MainThread:625421 [wandb_run.py:_redirect():1502] Redirecting console. +2021-07-15 01:00:40,355 INFO MainThread:625421 [wandb_run.py:_redirect():1558] Redirects installed. +2021-07-15 01:00:40,355 INFO MainThread:625421 [wandb_init.py:init():554] run started, returning control to user process +2021-07-15 01:00:40,361 INFO MainThread:625421 [wandb_run.py:_config_callback():872] config_cb None None {'output_dir': './', 'overwrite_output_dir': True, 'do_train': False, 'do_eval': False, 'do_predict': False, 'evaluation_strategy': 'IntervalStrategy.NO', 'prediction_loss_only': False, 'per_device_train_batch_size': 2, 'per_device_eval_batch_size': 2, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 4, 'eval_accumulation_steps': None, 'learning_rate': 5e-05, 'weight_decay': 0.0095, 'adam_beta1': 0.9, 'adam_beta2': 0.98, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 5.0, 'max_steps': -1, 'lr_scheduler_type': 'SchedulerType.LINEAR', 'warmup_ratio': 0.0, 'warmup_steps': 5000, 'log_level': -1, 'log_level_replica': -1, 'log_on_each_node': True, 'logging_dir': './runs/Jul15_01-00-30_t1v-n-f5c06ea1-w-0', 'logging_strategy': 'IntervalStrategy.STEPS', 'logging_first_step': False, 'logging_steps': 50, 'save_strategy': 'IntervalStrategy.STEPS', 'save_steps': 20000, 'save_total_limit': 5, 'save_on_each_node': False, 'no_cuda': False, 'seed': 42, 'fp16': False, 'fp16_opt_level': 'O1', 'fp16_backend': 'auto', 'fp16_full_eval': False, 'local_rank': -1, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 20000, 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': './', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'sharded_ddp': [], 'deepspeed': None, 'label_smoothing_factor': 0.0, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'dataloader_pin_memory': True, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'push_to_hub_model_id': '', 'push_to_hub_organization': None, 'push_to_hub_token': None, 'mp_parameters': ''} +2021-07-15 01:00:40,363 INFO MainThread:625421 [wandb_run.py:_config_callback():872] config_cb None None {'model_name_or_path': None, 'model_type': 'big_bird', 'config_name': './', 'tokenizer_name': './', 'cache_dir': None, 'use_fast_tokenizer': True, 'dtype': 'float32', 'save_optimizer': True} +2021-07-15 01:00:40,364 INFO MainThread:625421 [wandb_run.py:_config_callback():872] config_cb None None {'dataset_name': None, 'dataset_config_name': None, 'train_ref_file': None, 'validation_ref_file': None, 'overwrite_cache': False, 'validation_split_percentage': 5, 'max_seq_length': 4096, 'preprocessing_num_workers': 96, 'mlm_probability': 0.15, 'pad_to_max_length': False, 'line_by_line': False, 'max_eval_samples': 1000} +2021-07-15 01:07:03,087 INFO MainThread:625421 [wandb_run.py:_atexit_cleanup():1593] got exitcode: 1 +2021-07-15 01:07:03,088 INFO MainThread:625421 [wandb_run.py:_restore():1565] restore +2021-07-15 01:07:05,300 INFO MainThread:625421 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 1 +} +pusher_stats { + uploaded_bytes: 1400 + total_bytes: 1400 +} + +2021-07-15 01:07:05,572 INFO MainThread:625421 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 1 +} +pusher_stats { + uploaded_bytes: 1400 + total_bytes: 1400 +} + +2021-07-15 01:07:06,399 INFO MainThread:625421 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 1400 + total_bytes: 11562 +} + +2021-07-15 01:07:06,502 INFO MainThread:625421 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 1400 + total_bytes: 11562 +} + +2021-07-15 01:07:06,604 INFO MainThread:625421 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 11562 + total_bytes: 11562 +} + +2021-07-15 01:07:06,706 INFO MainThread:625421 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 11562 + total_bytes: 11562 +} + +2021-07-15 01:07:06,809 INFO MainThread:625421 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 11562 + total_bytes: 11562 +} + +2021-07-15 01:07:06,911 INFO MainThread:625421 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 11562 + total_bytes: 11562 +} + +2021-07-15 01:07:07,013 INFO MainThread:625421 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 11562 + total_bytes: 11562 +} + +2021-07-15 01:07:07,116 INFO MainThread:625421 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 11562 + total_bytes: 11562 +} + +2021-07-15 01:07:07,451 INFO MainThread:625421 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 11562 + total_bytes: 11562 +} + +2021-07-15 01:07:07,553 INFO MainThread:625421 [wandb_run.py:_wait_for_finish():1715] got exit ret: done: true +exit_result { +} +file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 11562 + total_bytes: 11562 +} + +2021-07-15 01:07:08,813 INFO MainThread:625421 [wandb_run.py:_show_files():1937] logging synced files diff --git a/wandb/run-20210715_010038-2gdygcu4/run-2gdygcu4.wandb b/wandb/run-20210715_010038-2gdygcu4/run-2gdygcu4.wandb new file mode 100644 index 0000000000000000000000000000000000000000..b457839eab932ae2458d94ee8a8e48da36abde89 Binary files /dev/null and b/wandb/run-20210715_010038-2gdygcu4/run-2gdygcu4.wandb differ diff --git a/wandb/run-20210715_010838-hepp3r83/files/config.yaml b/wandb/run-20210715_010838-hepp3r83/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..030ff6a4d4b2d88d960503c9166c8a1f973b71a9 --- /dev/null +++ b/wandb/run-20210715_010838-hepp3r83/files/config.yaml @@ -0,0 +1,307 @@ +wandb_version: 1 + +_wandb: + desc: null + value: + cli_version: 0.10.33 + framework: huggingface + huggingface_version: 4.9.0.dev0 + is_jupyter_run: false + is_kaggle_kernel: false + python_version: 3.8.10 + t: + 1: + - 3 + - 11 + 2: + - 3 + - 11 + 4: 3.8.10 + 5: 0.10.33 + 6: 4.9.0.dev0 + 8: + - 5 +adafactor: + desc: null + value: false +adam_beta1: + desc: null + value: 0.9 +adam_beta2: + desc: null + value: 0.98 +adam_epsilon: + desc: null + value: 1.0e-08 +cache_dir: + desc: null + value: null +config_name: + desc: null + value: ./ +dataloader_drop_last: + desc: null + value: false +dataloader_num_workers: + desc: null + value: 0 +dataloader_pin_memory: + desc: null + value: true +dataset_config_name: + desc: null + value: null +dataset_name: + desc: null + value: null +ddp_find_unused_parameters: + desc: null + value: null +debug: + desc: null + value: [] +deepspeed: + desc: null + value: null +disable_tqdm: + desc: null + value: false +do_eval: + desc: null + value: false +do_predict: + desc: null + value: false +do_train: + desc: null + value: false +dtype: + desc: null + value: float32 +eval_accumulation_steps: + desc: null + value: null +eval_steps: + desc: null + value: 20000 +evaluation_strategy: + desc: null + value: IntervalStrategy.NO +fp16: + desc: null + value: false +fp16_backend: + desc: null + value: auto +fp16_full_eval: + desc: null + value: false +fp16_opt_level: + desc: null + value: O1 +gradient_accumulation_steps: + desc: null + value: 4 +greater_is_better: + desc: null + value: null +group_by_length: + desc: null + value: false +ignore_data_skip: + desc: null + value: false +label_names: + desc: null + value: null +label_smoothing_factor: + desc: null + value: 0.0 +learning_rate: + desc: null + value: 5.0e-05 +length_column_name: + desc: null + value: length +line_by_line: + desc: null + value: false +load_best_model_at_end: + desc: null + value: false +local_rank: + desc: null + value: -1 +log_level: + desc: null + value: -1 +log_level_replica: + desc: null + value: -1 +log_on_each_node: + desc: null + value: true +logging_dir: + desc: null + value: ./runs/Jul15_01-08-29_t1v-n-f5c06ea1-w-0 +logging_first_step: + desc: null + value: false +logging_steps: + desc: null + value: 50 +logging_strategy: + desc: null + value: IntervalStrategy.STEPS +lr_scheduler_type: + desc: null + value: SchedulerType.LINEAR +max_eval_samples: + desc: null + value: 1000 +max_grad_norm: + desc: null + value: 1.0 +max_seq_length: + desc: null + value: 4096 +max_steps: + desc: null + value: -1 +metric_for_best_model: + desc: null + value: null +mlm_probability: + desc: null + value: 0.15 +model_name_or_path: + desc: null + value: null +model_type: + desc: null + value: big_bird +mp_parameters: + desc: null + value: '' +no_cuda: + desc: null + value: false +num_train_epochs: + desc: null + value: 5.0 +output_dir: + desc: null + value: ./ +overwrite_cache: + desc: null + value: false +overwrite_output_dir: + desc: null + value: true +pad_to_max_length: + desc: null + value: false +past_index: + desc: null + value: -1 +per_device_eval_batch_size: + desc: null + value: 2 +per_device_train_batch_size: + desc: null + value: 2 +per_gpu_eval_batch_size: + desc: null + value: null +per_gpu_train_batch_size: + desc: null + value: null +prediction_loss_only: + desc: null + value: false +preprocessing_num_workers: + desc: null + value: 96 +push_to_hub: + desc: null + value: true +push_to_hub_model_id: + desc: null + value: '' +push_to_hub_organization: + desc: null + value: null +push_to_hub_token: + desc: null + value: null +remove_unused_columns: + desc: null + value: true +report_to: + desc: null + value: + - tensorboard + - wandb +resume_from_checkpoint: + desc: null + value: null +run_name: + desc: null + value: ./ +save_on_each_node: + desc: null + value: false +save_optimizer: + desc: null + value: true +save_steps: + desc: null + value: 20000 +save_strategy: + desc: null + value: IntervalStrategy.STEPS +save_total_limit: + desc: null + value: 5 +seed: + desc: null + value: 42 +sharded_ddp: + desc: null + value: [] +skip_memory_metrics: + desc: null + value: true +tokenizer_name: + desc: null + value: ./ +tpu_metrics_debug: + desc: null + value: false +tpu_num_cores: + desc: null + value: null +train_ref_file: + desc: null + value: null +use_fast_tokenizer: + desc: null + value: true +use_legacy_prediction_loop: + desc: null + value: false +validation_ref_file: + desc: null + value: null +validation_split_percentage: + desc: null + value: 5 +warmup_ratio: + desc: null + value: 0.0 +warmup_steps: + desc: null + value: 5000 +weight_decay: + desc: null + value: 0.0095 diff --git a/wandb/run-20210715_010838-hepp3r83/files/output.log b/wandb/run-20210715_010838-hepp3r83/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..f0c0c7b41dd5fa2314d0785878c7549923271415 --- /dev/null +++ b/wandb/run-20210715_010838-hepp3r83/files/output.log @@ -0,0 +1,39 @@ +/home/dat/pino/lib/python3.8/site-packages/jax/_src/numpy/lax_numpy.py:3132: UserWarning: Explicitly requested dtype requested in zeros is not available, and will be truncated to dtype int32. To enable more dtypes, set the jax_enable_x64 configuration option or the JAX_ENABLE_X64 shell environment variable. See https://github.com/google/jax#current-gotchas for more. + lax._check_user_dtype_supported(dtype, "zeros") +/home/dat/pino/lib/python3.8/site-packages/jax/lib/xla_bridge.py:386: UserWarning: jax.host_count has been renamed to jax.process_count. This alias will eventually be removed; please update your code. + warnings.warn( +/home/dat/pino/lib/python3.8/site-packages/jax/lib/xla_bridge.py:373: UserWarning: jax.host_id has been renamed to jax.process_index. This alias will eventually be removed; please update your code. + warnings.warn( +Epoch ... (1/5): 0%| | 0/5 [00:00 + state, train_metric, dropout_rngs = p_train_step(state, model_inputs, dropout_rngs) + File "/home/dat/pino/lib/python3.8/site-packages/jax/_src/traceback_util.py", line 183, in reraise_with_filtered_traceback + return fun(*args, **kwargs) + File "/home/dat/pino/lib/python3.8/site-packages/jax/_src/api.py", line 1669, in f_pmapped + out = pxla.xla_pmap( + File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 1620, in bind + return call_bind(self, fun, *args, **params) + File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 1551, in call_bind + outs = primitive.process(top_trace, fun, tracers, params) + File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 1623, in process + return trace.process_map(self, fun, tracers, params) + File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 606, in process_call + return primitive.impl(f, *tracers, **params) + File "/home/dat/pino/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 637, in xla_pmap_impl + return compiled_fun(*args) + File "/home/dat/pino/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 1159, in execute_replicated + out_bufs = compiled.execute_sharded_on_local_devices(input_bufs) +jax._src.traceback_util.UnfilteredStackTrace: RuntimeError: Resource exhausted: Attempting to reserve 12.60G at the bottom of memory. That was not possible. There are 9.72G free, 0B reserved, and 9.67G reservable.: while running replica 0 and partition 0 of a replicated computation (other replicas may have failed as well). +The stack trace below excludes JAX-internal frames. +The preceding is the original exception that occurred, unmodified. +-------------------- +The above exception was the direct cause of the following exception: +Traceback (most recent call last): + File "./run_mlm_flax.py", line 815, in + state, train_metric, dropout_rngs = p_train_step(state, model_inputs, dropout_rngs) + File "/home/dat/pino/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 1159, in execute_replicated + out_bufs = compiled.execute_sharded_on_local_devices(input_bufs) +RuntimeError: Resource exhausted: Attempting to reserve 12.60G at the bottom of memory. That was not possible. There are 9.72G free, 0B reserved, and 9.67G reservable.: while running replica 0 and partition 0 of a replicated computation (other replicas may have failed as well). \ No newline at end of file diff --git a/wandb/run-20210715_010838-hepp3r83/files/requirements.txt b/wandb/run-20210715_010838-hepp3r83/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..554e3a34bc91714a8462f65295a12e9a04537637 --- /dev/null +++ b/wandb/run-20210715_010838-hepp3r83/files/requirements.txt @@ -0,0 +1,94 @@ +absl-py==0.13.0 +aiohttp==3.7.4.post0 +astunparse==1.6.3 +async-timeout==3.0.1 +attrs==21.2.0 +cachetools==4.2.2 +certifi==2021.5.30 +chardet==4.0.0 +charset-normalizer==2.0.1 +chex==0.0.8 +click==8.0.1 +configparser==5.0.2 +cycler==0.10.0 +datasets==1.9.1.dev0 +dill==0.3.4 +dm-tree==0.1.6 +docker-pycreds==0.4.0 +filelock==3.0.12 +flatbuffers==1.12 +flax==0.3.4 +fsspec==2021.7.0 +gast==0.4.0 +gitdb==4.0.7 +gitpython==3.1.18 +google-auth-oauthlib==0.4.4 +google-auth==1.32.1 +google-pasta==0.2.0 +grpcio==1.34.1 +h5py==3.1.0 +huggingface-hub==0.0.12 +idna==3.2 +install==1.3.4 +jax==0.2.17 +jaxlib==0.1.68 +joblib==1.0.1 +keras-nightly==2.5.0.dev2021032900 +keras-preprocessing==1.1.2 +kiwisolver==1.3.1 +libtpu-nightly==0.1.dev20210615 +markdown==3.3.4 +matplotlib==3.4.2 +msgpack==1.0.2 +multidict==5.1.0 +multiprocess==0.70.12.2 +numpy==1.19.5 +oauthlib==3.1.1 +opt-einsum==3.3.0 +optax==0.0.9 +packaging==21.0 +pandas==1.3.0 +pathtools==0.1.2 +pillow==8.3.1 +pip==20.0.2 +pkg-resources==0.0.0 +promise==2.3 +protobuf==3.17.3 +psutil==5.8.0 +pyarrow==4.0.1 +pyasn1-modules==0.2.8 +pyasn1==0.4.8 +pyparsing==2.4.7 +python-dateutil==2.8.1 +pytz==2021.1 +pyyaml==5.4.1 +regex==2021.7.6 +requests-oauthlib==1.3.0 +requests==2.26.0 +rsa==4.7.2 +sacremoses==0.0.45 +scipy==1.7.0 +sentry-sdk==1.3.0 +setuptools==44.0.0 +shortuuid==1.0.1 +six==1.15.0 +smmap==4.0.0 +subprocess32==3.5.4 +tensorboard-data-server==0.6.1 +tensorboard-plugin-wit==1.8.0 +tensorboard==2.5.0 +tensorflow-estimator==2.5.0 +tensorflow==2.5.0 +termcolor==1.1.0 +tokenizers==0.10.3 +toolz==0.11.1 +tqdm==4.61.2 +transformers==4.9.0.dev0 +typing-extensions==3.7.4.3 +urllib3==1.26.6 +wandb==0.10.33 +werkzeug==2.0.1 +wheel==0.36.2 +wrapt==1.12.1 +xxhash==2.0.2 +yarl==1.6.3 \ No newline at end of file diff --git a/wandb/run-20210715_010838-hepp3r83/files/wandb-metadata.json b/wandb/run-20210715_010838-hepp3r83/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..61bfd74af39090b0a9809e9415d0ab8a99b4b454 --- /dev/null +++ b/wandb/run-20210715_010838-hepp3r83/files/wandb-metadata.json @@ -0,0 +1,45 @@ +{ + "os": "Linux-5.4.0-1043-gcp-x86_64-with-glibc2.29", + "python": "3.8.10", + "heartbeatAt": "2021-07-15T01:08:40.001467", + "startedAt": "2021-07-15T01:08:38.011918", + "docker": null, + "cpu_count": 96, + "cuda": null, + "args": [ + "--push_to_hub", + "--output_dir=./", + "--model_type=big_bird", + "--config_name=./", + "--tokenizer_name=./", + "--max_seq_length=4096", + "--weight_decay=0.0095", + "--warmup_steps=5000", + "--overwrite_output_dir", + "--adam_beta1=0.9", + "--adam_beta2=0.98", + "--logging_steps=50", + "--eval_steps=20000", + "--num_train_epochs=5", + "--preprocessing_num_workers=96", + "--save_steps=20000", + "--learning_rate=5e-5", + "--per_device_train_batch_size=2", + "--per_device_eval_batch_size=2", + "--save_total_limit=5", + "--max_eval_samples=1000", + "--gradient_accumulation_steps=4" + ], + "state": "running", + "program": "./run_mlm_flax.py", + "codePath": "run_mlm_flax.py", + "git": { + "remote": "https://huggingface.co/flax-community/pino-roberta-base", + "commit": "9915204ae8501d6ae8425aa8f0b4b290cbc7e5b6" + }, + "email": null, + "root": "/home/dat/pino-roberta-base", + "host": "t1v-n-f5c06ea1-w-0", + "username": "dat", + "executable": "/home/dat/pino/bin/python" +} diff --git a/wandb/run-20210715_010838-hepp3r83/files/wandb-summary.json b/wandb/run-20210715_010838-hepp3r83/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/wandb/run-20210715_010838-hepp3r83/files/wandb-summary.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/wandb/run-20210715_010838-hepp3r83/logs/debug-internal.log b/wandb/run-20210715_010838-hepp3r83/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..1fab240e9aae4594f4b0018da0380087dc972019 --- /dev/null +++ b/wandb/run-20210715_010838-hepp3r83/logs/debug-internal.log @@ -0,0 +1,193 @@ +2021-07-15 01:08:38,704 INFO MainThread:628228 [internal.py:wandb_internal():88] W&B internal server running at pid: 628228, started at: 2021-07-15 01:08:38.703926 +2021-07-15 01:08:38,706 DEBUG HandlerThread:628228 [handler.py:handle_request():124] handle_request: check_version +2021-07-15 01:08:38,706 INFO WriterThread:628228 [datastore.py:open_for_write():80] open: /home/dat/pino-roberta-base/wandb/run-20210715_010838-hepp3r83/run-hepp3r83.wandb +2021-07-15 01:08:38,707 DEBUG SenderThread:628228 [sender.py:send():179] send: header +2021-07-15 01:08:38,707 DEBUG SenderThread:628228 [sender.py:send_request():193] send_request: check_version +2021-07-15 01:08:38,744 DEBUG SenderThread:628228 [sender.py:send():179] send: run +2021-07-15 01:08:38,910 INFO SenderThread:628228 [dir_watcher.py:__init__():168] watching files in: /home/dat/pino-roberta-base/wandb/run-20210715_010838-hepp3r83/files +2021-07-15 01:08:38,910 INFO SenderThread:628228 [sender.py:_start_run_threads():716] run started: hepp3r83 with start time 1626311318 +2021-07-15 01:08:38,911 DEBUG SenderThread:628228 [sender.py:send():179] send: summary +2021-07-15 01:08:38,911 INFO SenderThread:628228 [sender.py:_save_file():841] saving file wandb-summary.json with policy end +2021-07-15 01:08:38,911 DEBUG HandlerThread:628228 [handler.py:handle_request():124] handle_request: run_start +2021-07-15 01:08:39,914 INFO Thread-8 :628228 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210715_010838-hepp3r83/files/wandb-summary.json +2021-07-15 01:08:40,001 DEBUG HandlerThread:628228 [meta.py:__init__():39] meta init +2021-07-15 01:08:40,001 DEBUG HandlerThread:628228 [meta.py:__init__():53] meta init done +2021-07-15 01:08:40,001 DEBUG HandlerThread:628228 [meta.py:probe():210] probe +2021-07-15 01:08:40,002 DEBUG HandlerThread:628228 [meta.py:_setup_git():200] setup git +2021-07-15 01:08:40,031 DEBUG HandlerThread:628228 [meta.py:_setup_git():207] setup git done +2021-07-15 01:08:40,032 DEBUG HandlerThread:628228 [meta.py:_save_pip():57] save pip +2021-07-15 01:08:40,032 DEBUG HandlerThread:628228 [meta.py:_save_pip():71] save pip done +2021-07-15 01:08:40,032 DEBUG HandlerThread:628228 [meta.py:probe():252] probe done +2021-07-15 01:08:40,035 DEBUG SenderThread:628228 [sender.py:send():179] send: files +2021-07-15 01:08:40,035 INFO SenderThread:628228 [sender.py:_save_file():841] saving file wandb-metadata.json with policy now +2021-07-15 01:08:40,041 DEBUG HandlerThread:628228 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:08:40,041 DEBUG SenderThread:628228 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:08:40,174 DEBUG SenderThread:628228 [sender.py:send():179] send: config +2021-07-15 01:08:40,174 DEBUG SenderThread:628228 [sender.py:send():179] send: config +2021-07-15 01:08:40,175 DEBUG SenderThread:628228 [sender.py:send():179] send: config +2021-07-15 01:08:40,469 INFO Thread-11 :628228 [upload_job.py:push():137] Uploaded file /tmp/tmpdiv10f4rwandb/1ladryaq-wandb-metadata.json +2021-07-15 01:08:40,913 INFO Thread-8 :628228 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210715_010838-hepp3r83/files/output.log +2021-07-15 01:08:40,913 INFO Thread-8 :628228 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210715_010838-hepp3r83/files/wandb-metadata.json +2021-07-15 01:08:40,913 INFO Thread-8 :628228 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210715_010838-hepp3r83/files/requirements.txt +2021-07-15 01:08:54,918 INFO Thread-8 :628228 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_010838-hepp3r83/files/output.log +2021-07-15 01:08:55,176 DEBUG HandlerThread:628228 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:08:55,177 DEBUG SenderThread:628228 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:09:08,085 DEBUG SenderThread:628228 [sender.py:send():179] send: stats +2021-07-15 01:09:09,924 INFO Thread-8 :628228 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_010838-hepp3r83/files/config.yaml +2021-07-15 01:09:10,307 DEBUG HandlerThread:628228 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:09:10,308 DEBUG SenderThread:628228 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:09:25,440 DEBUG HandlerThread:628228 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:09:25,440 DEBUG SenderThread:628228 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:09:38,165 DEBUG SenderThread:628228 [sender.py:send():179] send: stats +2021-07-15 01:09:40,570 DEBUG HandlerThread:628228 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:09:40,571 DEBUG SenderThread:628228 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:09:55,703 DEBUG HandlerThread:628228 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:09:55,703 DEBUG SenderThread:628228 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:10:08,241 DEBUG SenderThread:628228 [sender.py:send():179] send: stats +2021-07-15 01:10:10,835 DEBUG HandlerThread:628228 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:10:10,836 DEBUG SenderThread:628228 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:10:25,968 DEBUG HandlerThread:628228 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:10:25,968 DEBUG SenderThread:628228 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:10:38,315 DEBUG SenderThread:628228 [sender.py:send():179] send: stats +2021-07-15 01:10:41,102 DEBUG HandlerThread:628228 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:10:41,102 DEBUG SenderThread:628228 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:10:56,235 DEBUG HandlerThread:628228 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:10:56,235 DEBUG SenderThread:628228 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:11:08,383 DEBUG SenderThread:628228 [sender.py:send():179] send: stats +2021-07-15 01:11:11,368 DEBUG HandlerThread:628228 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:11:11,368 DEBUG SenderThread:628228 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:11:26,499 DEBUG HandlerThread:628228 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:11:26,500 DEBUG SenderThread:628228 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:11:38,459 DEBUG SenderThread:628228 [sender.py:send():179] send: stats +2021-07-15 01:11:41,634 DEBUG HandlerThread:628228 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:11:41,634 DEBUG SenderThread:628228 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:11:56,768 DEBUG HandlerThread:628228 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:11:56,769 DEBUG SenderThread:628228 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:12:08,533 DEBUG SenderThread:628228 [sender.py:send():179] send: stats +2021-07-15 01:12:11,900 DEBUG HandlerThread:628228 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:12:11,901 DEBUG SenderThread:628228 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:12:27,032 DEBUG HandlerThread:628228 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:12:27,032 DEBUG SenderThread:628228 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:12:38,609 DEBUG SenderThread:628228 [sender.py:send():179] send: stats +2021-07-15 01:12:42,164 DEBUG HandlerThread:628228 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:12:42,165 DEBUG SenderThread:628228 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:12:47,008 INFO Thread-8 :628228 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_010838-hepp3r83/files/output.log +2021-07-15 01:12:57,312 DEBUG HandlerThread:628228 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:12:57,312 DEBUG SenderThread:628228 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:13:08,688 DEBUG SenderThread:628228 [sender.py:send():179] send: stats +2021-07-15 01:13:12,457 DEBUG HandlerThread:628228 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:13:12,457 DEBUG SenderThread:628228 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:13:27,589 DEBUG HandlerThread:628228 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:13:27,589 DEBUG SenderThread:628228 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:13:38,769 DEBUG SenderThread:628228 [sender.py:send():179] send: stats +2021-07-15 01:13:42,720 DEBUG HandlerThread:628228 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:13:42,721 DEBUG SenderThread:628228 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:13:57,852 DEBUG HandlerThread:628228 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:13:57,852 DEBUG SenderThread:628228 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:14:08,848 DEBUG SenderThread:628228 [sender.py:send():179] send: stats +2021-07-15 01:14:13,043 INFO Thread-8 :628228 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_010838-hepp3r83/files/output.log +2021-07-15 01:14:13,314 DEBUG HandlerThread:628228 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 01:14:13,314 DEBUG SenderThread:628228 [sender.py:send():179] send: telemetry +2021-07-15 01:14:13,315 DEBUG SenderThread:628228 [sender.py:send():179] send: exit +2021-07-15 01:14:13,315 INFO SenderThread:628228 [sender.py:send_exit():287] handling exit code: 1 +2021-07-15 01:14:13,315 INFO SenderThread:628228 [sender.py:send_exit():295] send defer +2021-07-15 01:14:13,315 DEBUG SenderThread:628228 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 01:14:13,316 DEBUG HandlerThread:628228 [handler.py:handle_request():124] handle_request: defer +2021-07-15 01:14:13,316 INFO HandlerThread:628228 [handler.py:handle_request_defer():141] handle defer: 0 +2021-07-15 01:14:13,316 DEBUG SenderThread:628228 [sender.py:send_request():193] send_request: defer +2021-07-15 01:14:13,316 INFO SenderThread:628228 [sender.py:send_request_defer():304] handle sender defer: 0 +2021-07-15 01:14:13,316 INFO SenderThread:628228 [sender.py:transition_state():308] send defer: 1 +2021-07-15 01:14:13,316 DEBUG HandlerThread:628228 [handler.py:handle_request():124] handle_request: defer +2021-07-15 01:14:13,316 INFO HandlerThread:628228 [handler.py:handle_request_defer():141] handle defer: 1 +2021-07-15 01:14:13,362 DEBUG SenderThread:628228 [sender.py:send_request():193] send_request: defer +2021-07-15 01:14:13,362 INFO SenderThread:628228 [sender.py:send_request_defer():304] handle sender defer: 1 +2021-07-15 01:14:13,362 INFO SenderThread:628228 [sender.py:transition_state():308] send defer: 2 +2021-07-15 01:14:13,362 DEBUG SenderThread:628228 [sender.py:send():179] send: stats +2021-07-15 01:14:13,363 DEBUG HandlerThread:628228 [handler.py:handle_request():124] handle_request: defer +2021-07-15 01:14:13,363 INFO HandlerThread:628228 [handler.py:handle_request_defer():141] handle defer: 2 +2021-07-15 01:14:13,363 DEBUG SenderThread:628228 [sender.py:send_request():193] send_request: defer +2021-07-15 01:14:13,363 INFO SenderThread:628228 [sender.py:send_request_defer():304] handle sender defer: 2 +2021-07-15 01:14:13,363 INFO SenderThread:628228 [sender.py:transition_state():308] send defer: 3 +2021-07-15 01:14:13,363 DEBUG HandlerThread:628228 [handler.py:handle_request():124] handle_request: defer +2021-07-15 01:14:13,363 INFO HandlerThread:628228 [handler.py:handle_request_defer():141] handle defer: 3 +2021-07-15 01:14:13,363 DEBUG SenderThread:628228 [sender.py:send():179] send: summary +2021-07-15 01:14:13,364 INFO SenderThread:628228 [sender.py:_save_file():841] saving file wandb-summary.json with policy end +2021-07-15 01:14:13,364 DEBUG SenderThread:628228 [sender.py:send_request():193] send_request: defer +2021-07-15 01:14:13,364 INFO SenderThread:628228 [sender.py:send_request_defer():304] handle sender defer: 3 +2021-07-15 01:14:13,364 INFO SenderThread:628228 [sender.py:transition_state():308] send defer: 4 +2021-07-15 01:14:13,364 DEBUG HandlerThread:628228 [handler.py:handle_request():124] handle_request: defer +2021-07-15 01:14:13,364 INFO HandlerThread:628228 [handler.py:handle_request_defer():141] handle defer: 4 +2021-07-15 01:14:13,364 DEBUG SenderThread:628228 [sender.py:send_request():193] send_request: defer +2021-07-15 01:14:13,364 INFO SenderThread:628228 [sender.py:send_request_defer():304] handle sender defer: 4 +2021-07-15 01:14:13,417 DEBUG HandlerThread:628228 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 01:14:13,544 INFO SenderThread:628228 [sender.py:transition_state():308] send defer: 5 +2021-07-15 01:14:13,545 DEBUG SenderThread:628228 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 01:14:13,545 DEBUG HandlerThread:628228 [handler.py:handle_request():124] handle_request: defer +2021-07-15 01:14:13,545 INFO HandlerThread:628228 [handler.py:handle_request_defer():141] handle defer: 5 +2021-07-15 01:14:13,545 DEBUG SenderThread:628228 [sender.py:send_request():193] send_request: defer +2021-07-15 01:14:13,545 INFO SenderThread:628228 [sender.py:send_request_defer():304] handle sender defer: 5 +2021-07-15 01:14:13,545 INFO SenderThread:628228 [dir_watcher.py:finish():282] shutting down directory watcher +2021-07-15 01:14:13,647 DEBUG HandlerThread:628228 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 01:14:14,043 INFO Thread-8 :628228 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_010838-hepp3r83/files/config.yaml +2021-07-15 01:14:14,044 INFO SenderThread:628228 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_010838-hepp3r83/files/output.log +2021-07-15 01:14:14,044 INFO SenderThread:628228 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_010838-hepp3r83/files/wandb-summary.json +2021-07-15 01:14:14,044 INFO SenderThread:628228 [dir_watcher.py:finish():312] scan: /home/dat/pino-roberta-base/wandb/run-20210715_010838-hepp3r83/files +2021-07-15 01:14:14,045 INFO SenderThread:628228 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_010838-hepp3r83/files/requirements.txt requirements.txt +2021-07-15 01:14:14,045 INFO SenderThread:628228 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_010838-hepp3r83/files/output.log output.log +2021-07-15 01:14:14,045 INFO SenderThread:628228 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_010838-hepp3r83/files/wandb-metadata.json wandb-metadata.json +2021-07-15 01:14:14,045 INFO SenderThread:628228 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_010838-hepp3r83/files/config.yaml config.yaml +2021-07-15 01:14:14,045 INFO SenderThread:628228 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_010838-hepp3r83/files/wandb-summary.json wandb-summary.json +2021-07-15 01:14:14,045 INFO SenderThread:628228 [sender.py:transition_state():308] send defer: 6 +2021-07-15 01:14:14,046 DEBUG SenderThread:628228 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 01:14:14,052 DEBUG HandlerThread:628228 [handler.py:handle_request():124] handle_request: defer +2021-07-15 01:14:14,052 INFO HandlerThread:628228 [handler.py:handle_request_defer():141] handle defer: 6 +2021-07-15 01:14:14,052 DEBUG SenderThread:628228 [sender.py:send_request():193] send_request: defer +2021-07-15 01:14:14,052 INFO SenderThread:628228 [sender.py:send_request_defer():304] handle sender defer: 6 +2021-07-15 01:14:14,052 INFO SenderThread:628228 [file_pusher.py:finish():177] shutting down file pusher +2021-07-15 01:14:14,151 DEBUG HandlerThread:628228 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 01:14:14,151 DEBUG SenderThread:628228 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 01:14:14,253 DEBUG HandlerThread:628228 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 01:14:14,253 DEBUG SenderThread:628228 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 01:14:14,355 DEBUG HandlerThread:628228 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 01:14:14,355 DEBUG SenderThread:628228 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 01:14:14,457 DEBUG HandlerThread:628228 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 01:14:14,457 DEBUG SenderThread:628228 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 01:14:14,483 INFO Thread-12 :628228 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210715_010838-hepp3r83/files/requirements.txt +2021-07-15 01:14:14,487 INFO Thread-15 :628228 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210715_010838-hepp3r83/files/wandb-summary.json +2021-07-15 01:14:14,535 INFO Thread-14 :628228 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210715_010838-hepp3r83/files/config.yaml +2021-07-15 01:14:14,545 INFO Thread-13 :628228 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210715_010838-hepp3r83/files/output.log +2021-07-15 01:14:14,559 DEBUG HandlerThread:628228 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 01:14:14,559 DEBUG SenderThread:628228 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 01:14:14,661 DEBUG HandlerThread:628228 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 01:14:14,661 DEBUG SenderThread:628228 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 01:14:14,745 INFO Thread-7 :628228 [sender.py:transition_state():308] send defer: 7 +2021-07-15 01:14:14,745 DEBUG HandlerThread:628228 [handler.py:handle_request():124] handle_request: defer +2021-07-15 01:14:14,746 INFO HandlerThread:628228 [handler.py:handle_request_defer():141] handle defer: 7 +2021-07-15 01:14:14,746 DEBUG SenderThread:628228 [sender.py:send_request():193] send_request: defer +2021-07-15 01:14:14,746 INFO SenderThread:628228 [sender.py:send_request_defer():304] handle sender defer: 7 +2021-07-15 01:14:14,763 DEBUG HandlerThread:628228 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 01:14:15,027 INFO SenderThread:628228 [sender.py:transition_state():308] send defer: 8 +2021-07-15 01:14:15,028 DEBUG SenderThread:628228 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 01:14:15,028 DEBUG HandlerThread:628228 [handler.py:handle_request():124] handle_request: defer +2021-07-15 01:14:15,028 INFO HandlerThread:628228 [handler.py:handle_request_defer():141] handle defer: 8 +2021-07-15 01:14:15,028 DEBUG SenderThread:628228 [sender.py:send_request():193] send_request: defer +2021-07-15 01:14:15,028 INFO SenderThread:628228 [sender.py:send_request_defer():304] handle sender defer: 8 +2021-07-15 01:14:15,028 INFO SenderThread:628228 [sender.py:transition_state():308] send defer: 9 +2021-07-15 01:14:15,029 DEBUG SenderThread:628228 [sender.py:send():179] send: final +2021-07-15 01:14:15,029 DEBUG HandlerThread:628228 [handler.py:handle_request():124] handle_request: defer +2021-07-15 01:14:15,029 DEBUG SenderThread:628228 [sender.py:send():179] send: footer +2021-07-15 01:14:15,029 INFO HandlerThread:628228 [handler.py:handle_request_defer():141] handle defer: 9 +2021-07-15 01:14:15,030 DEBUG SenderThread:628228 [sender.py:send_request():193] send_request: defer +2021-07-15 01:14:15,030 INFO SenderThread:628228 [sender.py:send_request_defer():304] handle sender defer: 9 +2021-07-15 01:14:15,130 DEBUG HandlerThread:628228 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 01:14:15,130 DEBUG SenderThread:628228 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 01:14:15,130 INFO SenderThread:628228 [file_pusher.py:join():182] waiting for file pusher +2021-07-15 01:14:15,131 DEBUG HandlerThread:628228 [handler.py:handle_request():124] handle_request: get_summary +2021-07-15 01:14:15,132 DEBUG HandlerThread:628228 [handler.py:handle_request():124] handle_request: sampled_history +2021-07-15 01:14:15,132 DEBUG HandlerThread:628228 [handler.py:handle_request():124] handle_request: shutdown +2021-07-15 01:14:15,133 INFO HandlerThread:628228 [handler.py:finish():638] shutting down handler +2021-07-15 01:14:16,030 INFO WriterThread:628228 [datastore.py:close():288] close: /home/dat/pino-roberta-base/wandb/run-20210715_010838-hepp3r83/run-hepp3r83.wandb +2021-07-15 01:14:16,130 INFO SenderThread:628228 [sender.py:finish():945] shutting down sender +2021-07-15 01:14:16,131 INFO SenderThread:628228 [file_pusher.py:finish():177] shutting down file pusher +2021-07-15 01:14:16,131 INFO SenderThread:628228 [file_pusher.py:join():182] waiting for file pusher +2021-07-15 01:14:16,133 INFO MainThread:628228 [internal.py:handle_exit():78] Internal process exited diff --git a/wandb/run-20210715_010838-hepp3r83/logs/debug.log b/wandb/run-20210715_010838-hepp3r83/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..5912b0777a0df0a2ef49017b8b4eb5451ec330d7 --- /dev/null +++ b/wandb/run-20210715_010838-hepp3r83/logs/debug.log @@ -0,0 +1,119 @@ +2021-07-15 01:08:38,013 INFO MainThread:626982 [wandb_setup.py:_flush():69] setting env: {} +2021-07-15 01:08:38,013 INFO MainThread:626982 [wandb_setup.py:_flush():69] setting login settings: {} +2021-07-15 01:08:38,013 INFO MainThread:626982 [wandb_init.py:_log_setup():337] Logging user logs to /home/dat/pino-roberta-base/wandb/run-20210715_010838-hepp3r83/logs/debug.log +2021-07-15 01:08:38,013 INFO MainThread:626982 [wandb_init.py:_log_setup():338] Logging internal logs to /home/dat/pino-roberta-base/wandb/run-20210715_010838-hepp3r83/logs/debug-internal.log +2021-07-15 01:08:38,013 INFO MainThread:626982 [wandb_init.py:init():370] calling init triggers +2021-07-15 01:08:38,013 INFO MainThread:626982 [wandb_init.py:init():375] wandb.init called with sweep_config: {} +config: {} +2021-07-15 01:08:38,014 INFO MainThread:626982 [wandb_init.py:init():419] starting backend +2021-07-15 01:08:38,014 INFO MainThread:626982 [backend.py:_multiprocessing_setup():70] multiprocessing start_methods=fork,spawn,forkserver, using: spawn +2021-07-15 01:08:38,062 INFO MainThread:626982 [backend.py:ensure_launched():135] starting backend process... +2021-07-15 01:08:38,109 INFO MainThread:626982 [backend.py:ensure_launched():139] started backend process with pid: 628228 +2021-07-15 01:08:38,111 INFO MainThread:626982 [wandb_init.py:init():424] backend started and connected +2021-07-15 01:08:38,114 INFO MainThread:626982 [wandb_init.py:init():472] updated telemetry +2021-07-15 01:08:38,115 INFO MainThread:626982 [wandb_init.py:init():491] communicating current version +2021-07-15 01:08:38,743 INFO MainThread:626982 [wandb_init.py:init():496] got version response +2021-07-15 01:08:38,743 INFO MainThread:626982 [wandb_init.py:init():504] communicating run to backend with 30 second timeout +2021-07-15 01:08:38,910 INFO MainThread:626982 [wandb_init.py:init():529] starting run threads in backend +2021-07-15 01:08:40,039 INFO MainThread:626982 [wandb_run.py:_console_start():1623] atexit reg +2021-07-15 01:08:40,039 INFO MainThread:626982 [wandb_run.py:_redirect():1497] redirect: SettingsConsole.REDIRECT +2021-07-15 01:08:40,040 INFO MainThread:626982 [wandb_run.py:_redirect():1502] Redirecting console. +2021-07-15 01:08:40,042 INFO MainThread:626982 [wandb_run.py:_redirect():1558] Redirects installed. +2021-07-15 01:08:40,042 INFO MainThread:626982 [wandb_init.py:init():554] run started, returning control to user process +2021-07-15 01:08:40,047 INFO MainThread:626982 [wandb_run.py:_config_callback():872] config_cb None None {'output_dir': './', 'overwrite_output_dir': True, 'do_train': False, 'do_eval': False, 'do_predict': False, 'evaluation_strategy': 'IntervalStrategy.NO', 'prediction_loss_only': False, 'per_device_train_batch_size': 2, 'per_device_eval_batch_size': 2, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 4, 'eval_accumulation_steps': None, 'learning_rate': 5e-05, 'weight_decay': 0.0095, 'adam_beta1': 0.9, 'adam_beta2': 0.98, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 5.0, 'max_steps': -1, 'lr_scheduler_type': 'SchedulerType.LINEAR', 'warmup_ratio': 0.0, 'warmup_steps': 5000, 'log_level': -1, 'log_level_replica': -1, 'log_on_each_node': True, 'logging_dir': './runs/Jul15_01-08-29_t1v-n-f5c06ea1-w-0', 'logging_strategy': 'IntervalStrategy.STEPS', 'logging_first_step': False, 'logging_steps': 50, 'save_strategy': 'IntervalStrategy.STEPS', 'save_steps': 20000, 'save_total_limit': 5, 'save_on_each_node': False, 'no_cuda': False, 'seed': 42, 'fp16': False, 'fp16_opt_level': 'O1', 'fp16_backend': 'auto', 'fp16_full_eval': False, 'local_rank': -1, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 20000, 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': './', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'sharded_ddp': [], 'deepspeed': None, 'label_smoothing_factor': 0.0, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'dataloader_pin_memory': True, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'push_to_hub_model_id': '', 'push_to_hub_organization': None, 'push_to_hub_token': None, 'mp_parameters': ''} +2021-07-15 01:08:40,049 INFO MainThread:626982 [wandb_run.py:_config_callback():872] config_cb None None {'model_name_or_path': None, 'model_type': 'big_bird', 'config_name': './', 'tokenizer_name': './', 'cache_dir': None, 'use_fast_tokenizer': True, 'dtype': 'float32', 'save_optimizer': True} +2021-07-15 01:08:40,051 INFO MainThread:626982 [wandb_run.py:_config_callback():872] config_cb None None {'dataset_name': None, 'dataset_config_name': None, 'train_ref_file': None, 'validation_ref_file': None, 'overwrite_cache': False, 'validation_split_percentage': 5, 'max_seq_length': 4096, 'preprocessing_num_workers': 96, 'mlm_probability': 0.15, 'pad_to_max_length': False, 'line_by_line': False, 'max_eval_samples': 1000} +2021-07-15 01:14:11,086 INFO MainThread:626982 [wandb_run.py:_atexit_cleanup():1593] got exitcode: 1 +2021-07-15 01:14:11,087 INFO MainThread:626982 [wandb_run.py:_restore():1565] restore +2021-07-15 01:14:13,316 INFO MainThread:626982 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 1 +} +pusher_stats { + uploaded_bytes: 1400 + total_bytes: 1400 +} + +2021-07-15 01:14:13,545 INFO MainThread:626982 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 1 +} +pusher_stats { + uploaded_bytes: 1400 + total_bytes: 1400 +} + +2021-07-15 01:14:14,049 INFO MainThread:626982 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 4 +} +pusher_stats { + uploaded_bytes: 1400 + total_bytes: 11560 +} + +2021-07-15 01:14:14,151 INFO MainThread:626982 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 1400 + total_bytes: 11562 +} + +2021-07-15 01:14:14,254 INFO MainThread:626982 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 11562 + total_bytes: 11562 +} + +2021-07-15 01:14:14,356 INFO MainThread:626982 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 11562 + total_bytes: 11562 +} + +2021-07-15 01:14:14,458 INFO MainThread:626982 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 11562 + total_bytes: 11562 +} + +2021-07-15 01:14:14,560 INFO MainThread:626982 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 11562 + total_bytes: 11562 +} + +2021-07-15 01:14:14,662 INFO MainThread:626982 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 11562 + total_bytes: 11562 +} + +2021-07-15 01:14:15,028 INFO MainThread:626982 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 11562 + total_bytes: 11562 +} + +2021-07-15 01:14:15,131 INFO MainThread:626982 [wandb_run.py:_wait_for_finish():1715] got exit ret: done: true +exit_result { +} +file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 11562 + total_bytes: 11562 +} + +2021-07-15 01:14:16,396 INFO MainThread:626982 [wandb_run.py:_show_files():1937] logging synced files diff --git a/wandb/run-20210715_010838-hepp3r83/run-hepp3r83.wandb b/wandb/run-20210715_010838-hepp3r83/run-hepp3r83.wandb new file mode 100644 index 0000000000000000000000000000000000000000..532e338458961840e3e958abbb7e66730b328cd1 Binary files /dev/null and b/wandb/run-20210715_010838-hepp3r83/run-hepp3r83.wandb differ diff --git a/wandb/run-20210715_011557-15p55mwc/files/config.yaml b/wandb/run-20210715_011557-15p55mwc/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e6af938c8ba358da2070e33ab1e5c999eca9581d --- /dev/null +++ b/wandb/run-20210715_011557-15p55mwc/files/config.yaml @@ -0,0 +1,304 @@ +wandb_version: 1 + +_wandb: + desc: null + value: + cli_version: 0.10.33 + framework: huggingface + huggingface_version: 4.9.0.dev0 + is_jupyter_run: false + is_kaggle_kernel: false + python_version: 3.8.10 + t: + 1: + - 3 + - 11 + 4: 3.8.10 + 5: 0.10.33 + 6: 4.9.0.dev0 + 8: + - 5 +adafactor: + desc: null + value: false +adam_beta1: + desc: null + value: 0.9 +adam_beta2: + desc: null + value: 0.98 +adam_epsilon: + desc: null + value: 1.0e-08 +cache_dir: + desc: null + value: null +config_name: + desc: null + value: ./ +dataloader_drop_last: + desc: null + value: false +dataloader_num_workers: + desc: null + value: 0 +dataloader_pin_memory: + desc: null + value: true +dataset_config_name: + desc: null + value: null +dataset_name: + desc: null + value: null +ddp_find_unused_parameters: + desc: null + value: null +debug: + desc: null + value: [] +deepspeed: + desc: null + value: null +disable_tqdm: + desc: null + value: false +do_eval: + desc: null + value: false +do_predict: + desc: null + value: false +do_train: + desc: null + value: false +dtype: + desc: null + value: float32 +eval_accumulation_steps: + desc: null + value: null +eval_steps: + desc: null + value: 20000 +evaluation_strategy: + desc: null + value: IntervalStrategy.NO +fp16: + desc: null + value: false +fp16_backend: + desc: null + value: auto +fp16_full_eval: + desc: null + value: false +fp16_opt_level: + desc: null + value: O1 +gradient_accumulation_steps: + desc: null + value: 4 +greater_is_better: + desc: null + value: null +group_by_length: + desc: null + value: false +ignore_data_skip: + desc: null + value: false +label_names: + desc: null + value: null +label_smoothing_factor: + desc: null + value: 0.0 +learning_rate: + desc: null + value: 5.0e-05 +length_column_name: + desc: null + value: length +line_by_line: + desc: null + value: false +load_best_model_at_end: + desc: null + value: false +local_rank: + desc: null + value: -1 +log_level: + desc: null + value: -1 +log_level_replica: + desc: null + value: -1 +log_on_each_node: + desc: null + value: true +logging_dir: + desc: null + value: ./runs/Jul15_01-15-49_t1v-n-f5c06ea1-w-0 +logging_first_step: + desc: null + value: false +logging_steps: + desc: null + value: 50 +logging_strategy: + desc: null + value: IntervalStrategy.STEPS +lr_scheduler_type: + desc: null + value: SchedulerType.LINEAR +max_eval_samples: + desc: null + value: 1000 +max_grad_norm: + desc: null + value: 1.0 +max_seq_length: + desc: null + value: 4096 +max_steps: + desc: null + value: -1 +metric_for_best_model: + desc: null + value: null +mlm_probability: + desc: null + value: 0.15 +model_name_or_path: + desc: null + value: null +model_type: + desc: null + value: big_bird +mp_parameters: + desc: null + value: '' +no_cuda: + desc: null + value: false +num_train_epochs: + desc: null + value: 5.0 +output_dir: + desc: null + value: ./ +overwrite_cache: + desc: null + value: false +overwrite_output_dir: + desc: null + value: true +pad_to_max_length: + desc: null + value: false +past_index: + desc: null + value: -1 +per_device_eval_batch_size: + desc: null + value: 2 +per_device_train_batch_size: + desc: null + value: 2 +per_gpu_eval_batch_size: + desc: null + value: null +per_gpu_train_batch_size: + desc: null + value: null +prediction_loss_only: + desc: null + value: false +preprocessing_num_workers: + desc: null + value: 96 +push_to_hub: + desc: null + value: true +push_to_hub_model_id: + desc: null + value: '' +push_to_hub_organization: + desc: null + value: null +push_to_hub_token: + desc: null + value: null +remove_unused_columns: + desc: null + value: true +report_to: + desc: null + value: + - tensorboard + - wandb +resume_from_checkpoint: + desc: null + value: null +run_name: + desc: null + value: ./ +save_on_each_node: + desc: null + value: false +save_optimizer: + desc: null + value: true +save_steps: + desc: null + value: 20000 +save_strategy: + desc: null + value: IntervalStrategy.STEPS +save_total_limit: + desc: null + value: 5 +seed: + desc: null + value: 42 +sharded_ddp: + desc: null + value: [] +skip_memory_metrics: + desc: null + value: true +tokenizer_name: + desc: null + value: ./ +tpu_metrics_debug: + desc: null + value: false +tpu_num_cores: + desc: null + value: null +train_ref_file: + desc: null + value: null +use_fast_tokenizer: + desc: null + value: true +use_legacy_prediction_loop: + desc: null + value: false +validation_ref_file: + desc: null + value: null +validation_split_percentage: + desc: null + value: 5 +warmup_ratio: + desc: null + value: 0.0 +warmup_steps: + desc: null + value: 5000 +weight_decay: + desc: null + value: 0.0095 diff --git a/wandb/run-20210715_011557-15p55mwc/files/output.log b/wandb/run-20210715_011557-15p55mwc/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..02aebae2d6c6a78caa7ae9bab253e5385a3a6c36 --- /dev/null +++ b/wandb/run-20210715_011557-15p55mwc/files/output.log @@ -0,0 +1,30 @@ +/home/dat/pino/lib/python3.8/site-packages/jax/_src/numpy/lax_numpy.py:3132: UserWarning: Explicitly requested dtype requested in zeros is not available, and will be truncated to dtype int32. To enable more dtypes, set the jax_enable_x64 configuration option or the JAX_ENABLE_X64 shell environment variable. See https://github.com/google/jax#current-gotchas for more. + lax._check_user_dtype_supported(dtype, "zeros") +/home/dat/pino/lib/python3.8/site-packages/jax/lib/xla_bridge.py:386: UserWarning: jax.host_count has been renamed to jax.process_count. This alias will eventually be removed; please update your code. + warnings.warn( +/home/dat/pino/lib/python3.8/site-packages/jax/lib/xla_bridge.py:373: UserWarning: jax.host_id has been renamed to jax.process_index. This alias will eventually be removed; please update your code. + warnings.warn( +Epoch ... (1/5): 0%| | 0/5 [00:00 + samples = [tokenized_datasets["train"][int(idx)] for idx in batch_idx] diff --git a/wandb/run-20210715_011557-15p55mwc/files/requirements.txt b/wandb/run-20210715_011557-15p55mwc/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..554e3a34bc91714a8462f65295a12e9a04537637 --- /dev/null +++ b/wandb/run-20210715_011557-15p55mwc/files/requirements.txt @@ -0,0 +1,94 @@ +absl-py==0.13.0 +aiohttp==3.7.4.post0 +astunparse==1.6.3 +async-timeout==3.0.1 +attrs==21.2.0 +cachetools==4.2.2 +certifi==2021.5.30 +chardet==4.0.0 +charset-normalizer==2.0.1 +chex==0.0.8 +click==8.0.1 +configparser==5.0.2 +cycler==0.10.0 +datasets==1.9.1.dev0 +dill==0.3.4 +dm-tree==0.1.6 +docker-pycreds==0.4.0 +filelock==3.0.12 +flatbuffers==1.12 +flax==0.3.4 +fsspec==2021.7.0 +gast==0.4.0 +gitdb==4.0.7 +gitpython==3.1.18 +google-auth-oauthlib==0.4.4 +google-auth==1.32.1 +google-pasta==0.2.0 +grpcio==1.34.1 +h5py==3.1.0 +huggingface-hub==0.0.12 +idna==3.2 +install==1.3.4 +jax==0.2.17 +jaxlib==0.1.68 +joblib==1.0.1 +keras-nightly==2.5.0.dev2021032900 +keras-preprocessing==1.1.2 +kiwisolver==1.3.1 +libtpu-nightly==0.1.dev20210615 +markdown==3.3.4 +matplotlib==3.4.2 +msgpack==1.0.2 +multidict==5.1.0 +multiprocess==0.70.12.2 +numpy==1.19.5 +oauthlib==3.1.1 +opt-einsum==3.3.0 +optax==0.0.9 +packaging==21.0 +pandas==1.3.0 +pathtools==0.1.2 +pillow==8.3.1 +pip==20.0.2 +pkg-resources==0.0.0 +promise==2.3 +protobuf==3.17.3 +psutil==5.8.0 +pyarrow==4.0.1 +pyasn1-modules==0.2.8 +pyasn1==0.4.8 +pyparsing==2.4.7 +python-dateutil==2.8.1 +pytz==2021.1 +pyyaml==5.4.1 +regex==2021.7.6 +requests-oauthlib==1.3.0 +requests==2.26.0 +rsa==4.7.2 +sacremoses==0.0.45 +scipy==1.7.0 +sentry-sdk==1.3.0 +setuptools==44.0.0 +shortuuid==1.0.1 +six==1.15.0 +smmap==4.0.0 +subprocess32==3.5.4 +tensorboard-data-server==0.6.1 +tensorboard-plugin-wit==1.8.0 +tensorboard==2.5.0 +tensorflow-estimator==2.5.0 +tensorflow==2.5.0 +termcolor==1.1.0 +tokenizers==0.10.3 +toolz==0.11.1 +tqdm==4.61.2 +transformers==4.9.0.dev0 +typing-extensions==3.7.4.3 +urllib3==1.26.6 +wandb==0.10.33 +werkzeug==2.0.1 +wheel==0.36.2 +wrapt==1.12.1 +xxhash==2.0.2 +yarl==1.6.3 \ No newline at end of file diff --git a/wandb/run-20210715_011557-15p55mwc/files/wandb-metadata.json b/wandb/run-20210715_011557-15p55mwc/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..e2b10c0dabdf98fb7161f6708910a64d689ae5ad --- /dev/null +++ b/wandb/run-20210715_011557-15p55mwc/files/wandb-metadata.json @@ -0,0 +1,45 @@ +{ + "os": "Linux-5.4.0-1043-gcp-x86_64-with-glibc2.29", + "python": "3.8.10", + "heartbeatAt": "2021-07-15T01:15:59.927497", + "startedAt": "2021-07-15T01:15:57.930755", + "docker": null, + "cpu_count": 96, + "cuda": null, + "args": [ + "--push_to_hub", + "--output_dir=./", + "--model_type=big_bird", + "--config_name=./", + "--tokenizer_name=./", + "--max_seq_length=4096", + "--weight_decay=0.0095", + "--warmup_steps=5000", + "--overwrite_output_dir", + "--adam_beta1=0.9", + "--adam_beta2=0.98", + "--logging_steps=50", + "--eval_steps=20000", + "--num_train_epochs=5", + "--preprocessing_num_workers=96", + "--save_steps=20000", + "--learning_rate=5e-5", + "--per_device_train_batch_size=2", + "--per_device_eval_batch_size=2", + "--save_total_limit=5", + "--max_eval_samples=1000", + "--gradient_accumulation_steps=4" + ], + "state": "running", + "program": "./run_mlm_flax.py", + "codePath": "run_mlm_flax.py", + "git": { + "remote": "https://huggingface.co/flax-community/pino-roberta-base", + "commit": "9915204ae8501d6ae8425aa8f0b4b290cbc7e5b6" + }, + "email": null, + "root": "/home/dat/pino-roberta-base", + "host": "t1v-n-f5c06ea1-w-0", + "username": "dat", + "executable": "/home/dat/pino/bin/python" +} diff --git a/wandb/run-20210715_011557-15p55mwc/files/wandb-summary.json b/wandb/run-20210715_011557-15p55mwc/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..9af2685b9ef8dd0dd2f413a7feae3465018cbb17 --- /dev/null +++ b/wandb/run-20210715_011557-15p55mwc/files/wandb-summary.json @@ -0,0 +1 @@ +{"training_step": 50, "learning_rate": 1.1999873095192015e-07, "train_loss": 10.934200286865234, "_runtime": 167, "_timestamp": 1626311925, "_step": 0} \ No newline at end of file diff --git a/wandb/run-20210715_011557-15p55mwc/logs/debug-internal.log b/wandb/run-20210715_011557-15p55mwc/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..276a95a92b17c23d4cdb3ee98db0db59d90b0dab --- /dev/null +++ b/wandb/run-20210715_011557-15p55mwc/logs/debug-internal.log @@ -0,0 +1,105 @@ +2021-07-15 01:15:58,624 INFO MainThread:629810 [internal.py:wandb_internal():88] W&B internal server running at pid: 629810, started at: 2021-07-15 01:15:58.624293 +2021-07-15 01:15:58,626 DEBUG HandlerThread:629810 [handler.py:handle_request():124] handle_request: check_version +2021-07-15 01:15:58,626 INFO WriterThread:629810 [datastore.py:open_for_write():80] open: /home/dat/pino-roberta-base/wandb/run-20210715_011557-15p55mwc/run-15p55mwc.wandb +2021-07-15 01:15:58,627 DEBUG SenderThread:629810 [sender.py:send():179] send: header +2021-07-15 01:15:58,627 DEBUG SenderThread:629810 [sender.py:send_request():193] send_request: check_version +2021-07-15 01:15:58,664 DEBUG SenderThread:629810 [sender.py:send():179] send: run +2021-07-15 01:15:58,831 INFO SenderThread:629810 [dir_watcher.py:__init__():168] watching files in: /home/dat/pino-roberta-base/wandb/run-20210715_011557-15p55mwc/files +2021-07-15 01:15:58,831 INFO SenderThread:629810 [sender.py:_start_run_threads():716] run started: 15p55mwc with start time 1626311758 +2021-07-15 01:15:58,831 DEBUG SenderThread:629810 [sender.py:send():179] send: summary +2021-07-15 01:15:58,831 INFO SenderThread:629810 [sender.py:_save_file():841] saving file wandb-summary.json with policy end +2021-07-15 01:15:58,832 DEBUG HandlerThread:629810 [handler.py:handle_request():124] handle_request: run_start +2021-07-15 01:15:59,834 INFO Thread-8 :629810 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210715_011557-15p55mwc/files/wandb-summary.json +2021-07-15 01:15:59,927 DEBUG HandlerThread:629810 [meta.py:__init__():39] meta init +2021-07-15 01:15:59,927 DEBUG HandlerThread:629810 [meta.py:__init__():53] meta init done +2021-07-15 01:15:59,927 DEBUG HandlerThread:629810 [meta.py:probe():210] probe +2021-07-15 01:15:59,928 DEBUG HandlerThread:629810 [meta.py:_setup_git():200] setup git +2021-07-15 01:15:59,958 DEBUG HandlerThread:629810 [meta.py:_setup_git():207] setup git done +2021-07-15 01:15:59,958 DEBUG HandlerThread:629810 [meta.py:_save_pip():57] save pip +2021-07-15 01:15:59,959 DEBUG HandlerThread:629810 [meta.py:_save_pip():71] save pip done +2021-07-15 01:15:59,959 DEBUG HandlerThread:629810 [meta.py:probe():252] probe done +2021-07-15 01:15:59,962 DEBUG SenderThread:629810 [sender.py:send():179] send: files +2021-07-15 01:15:59,962 INFO SenderThread:629810 [sender.py:_save_file():841] saving file wandb-metadata.json with policy now +2021-07-15 01:15:59,968 DEBUG HandlerThread:629810 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:15:59,968 DEBUG SenderThread:629810 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:16:00,097 DEBUG SenderThread:629810 [sender.py:send():179] send: config +2021-07-15 01:16:00,097 DEBUG SenderThread:629810 [sender.py:send():179] send: config +2021-07-15 01:16:00,097 DEBUG SenderThread:629810 [sender.py:send():179] send: config +2021-07-15 01:16:00,392 INFO Thread-11 :629810 [upload_job.py:push():137] Uploaded file /tmp/tmplha_nygjwandb/lrp352pp-wandb-metadata.json +2021-07-15 01:16:00,832 INFO Thread-8 :629810 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210715_011557-15p55mwc/files/wandb-metadata.json +2021-07-15 01:16:00,833 INFO Thread-8 :629810 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210715_011557-15p55mwc/files/requirements.txt +2021-07-15 01:16:00,833 INFO Thread-8 :629810 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210715_011557-15p55mwc/files/output.log +2021-07-15 01:16:14,838 INFO Thread-8 :629810 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_011557-15p55mwc/files/output.log +2021-07-15 01:16:15,098 DEBUG HandlerThread:629810 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:16:15,099 DEBUG SenderThread:629810 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:16:28,011 DEBUG SenderThread:629810 [sender.py:send():179] send: stats +2021-07-15 01:16:29,844 INFO Thread-8 :629810 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_011557-15p55mwc/files/config.yaml +2021-07-15 01:16:30,228 DEBUG HandlerThread:629810 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:16:30,228 DEBUG SenderThread:629810 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:16:45,425 DEBUG HandlerThread:629810 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:16:45,425 DEBUG SenderThread:629810 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:16:46,851 INFO Thread-8 :629810 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_011557-15p55mwc/files/output.log +2021-07-15 01:16:58,093 DEBUG SenderThread:629810 [sender.py:send():179] send: stats +2021-07-15 01:17:00,674 DEBUG HandlerThread:629810 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:17:00,674 DEBUG SenderThread:629810 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:17:15,824 DEBUG HandlerThread:629810 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:17:15,824 DEBUG SenderThread:629810 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:17:28,171 DEBUG SenderThread:629810 [sender.py:send():179] send: stats +2021-07-15 01:17:30,956 DEBUG HandlerThread:629810 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:17:30,956 DEBUG SenderThread:629810 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:17:46,085 DEBUG HandlerThread:629810 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:17:46,086 DEBUG SenderThread:629810 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:17:58,248 DEBUG SenderThread:629810 [sender.py:send():179] send: stats +2021-07-15 01:18:01,230 DEBUG HandlerThread:629810 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:18:01,230 DEBUG SenderThread:629810 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:18:10,885 INFO Thread-8 :629810 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_011557-15p55mwc/files/output.log +2021-07-15 01:18:12,886 INFO Thread-8 :629810 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_011557-15p55mwc/files/output.log +2021-07-15 01:18:14,887 INFO Thread-8 :629810 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_011557-15p55mwc/files/output.log +2021-07-15 01:18:16,451 DEBUG HandlerThread:629810 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:18:16,452 DEBUG SenderThread:629810 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:18:16,888 INFO Thread-8 :629810 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_011557-15p55mwc/files/output.log +2021-07-15 01:18:18,889 INFO Thread-8 :629810 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_011557-15p55mwc/files/output.log +2021-07-15 01:18:20,890 INFO Thread-8 :629810 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_011557-15p55mwc/files/output.log +2021-07-15 01:18:22,890 INFO Thread-8 :629810 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_011557-15p55mwc/files/output.log +2021-07-15 01:18:25,892 INFO Thread-8 :629810 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_011557-15p55mwc/files/output.log +2021-07-15 01:18:27,893 INFO Thread-8 :629810 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_011557-15p55mwc/files/output.log +2021-07-15 01:18:28,326 DEBUG SenderThread:629810 [sender.py:send():179] send: stats +2021-07-15 01:18:29,894 INFO Thread-8 :629810 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_011557-15p55mwc/files/output.log +2021-07-15 01:18:31,742 DEBUG HandlerThread:629810 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:18:31,743 DEBUG SenderThread:629810 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:18:31,895 INFO Thread-8 :629810 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_011557-15p55mwc/files/output.log +2021-07-15 01:18:33,895 INFO Thread-8 :629810 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_011557-15p55mwc/files/output.log +2021-07-15 01:18:45,619 DEBUG SenderThread:629810 [sender.py:send():179] send: history +2021-07-15 01:18:45,620 DEBUG SenderThread:629810 [sender.py:send():179] send: summary +2021-07-15 01:18:45,620 INFO SenderThread:629810 [sender.py:_save_file():841] saving file wandb-summary.json with policy end +2021-07-15 01:18:45,901 INFO Thread-8 :629810 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_011557-15p55mwc/files/wandb-summary.json +2021-07-15 01:18:46,901 INFO Thread-8 :629810 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_011557-15p55mwc/files/output.log +2021-07-15 01:18:47,065 DEBUG HandlerThread:629810 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:18:47,065 DEBUG SenderThread:629810 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:18:47,902 INFO Thread-8 :629810 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_011557-15p55mwc/files/output.log +2021-07-15 01:18:49,902 INFO Thread-8 :629810 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_011557-15p55mwc/files/output.log +2021-07-15 01:18:51,903 INFO Thread-8 :629810 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_011557-15p55mwc/files/output.log +2021-07-15 01:18:53,904 INFO Thread-8 :629810 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_011557-15p55mwc/files/output.log +2021-07-15 01:18:55,905 INFO Thread-8 :629810 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_011557-15p55mwc/files/output.log +2021-07-15 01:18:57,906 INFO Thread-8 :629810 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_011557-15p55mwc/files/output.log +2021-07-15 01:18:58,403 DEBUG SenderThread:629810 [sender.py:send():179] send: stats +2021-07-15 01:18:59,331 WARNING MainThread:629810 [internal.py:wandb_internal():147] Internal process interrupt: 1 +2021-07-15 01:18:59,907 INFO Thread-8 :629810 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_011557-15p55mwc/files/output.log +2021-07-15 01:18:59,988 WARNING MainThread:629810 [internal.py:wandb_internal():147] Internal process interrupt: 2 +2021-07-15 01:18:59,988 ERROR MainThread:629810 [internal.py:wandb_internal():150] Internal process interrupted. +2021-07-15 01:19:00,790 INFO SenderThread:629810 [sender.py:finish():945] shutting down sender +2021-07-15 01:19:00,790 INFO WriterThread:629810 [datastore.py:close():288] close: /home/dat/pino-roberta-base/wandb/run-20210715_011557-15p55mwc/run-15p55mwc.wandb +2021-07-15 01:19:00,790 INFO SenderThread:629810 [dir_watcher.py:finish():282] shutting down directory watcher +2021-07-15 01:19:00,791 INFO HandlerThread:629810 [handler.py:finish():638] shutting down handler +2021-07-15 01:19:00,908 INFO SenderThread:629810 [dir_watcher.py:finish():312] scan: /home/dat/pino-roberta-base/wandb/run-20210715_011557-15p55mwc/files +2021-07-15 01:19:00,908 INFO SenderThread:629810 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_011557-15p55mwc/files/requirements.txt requirements.txt +2021-07-15 01:19:00,908 INFO SenderThread:629810 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_011557-15p55mwc/files/output.log output.log +2021-07-15 01:19:00,908 INFO SenderThread:629810 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_011557-15p55mwc/files/wandb-metadata.json wandb-metadata.json +2021-07-15 01:19:00,908 INFO SenderThread:629810 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_011557-15p55mwc/files/config.yaml config.yaml +2021-07-15 01:19:00,909 INFO SenderThread:629810 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_011557-15p55mwc/files/wandb-summary.json wandb-summary.json +2021-07-15 01:19:00,909 INFO SenderThread:629810 [file_pusher.py:finish():177] shutting down file pusher +2021-07-15 01:19:00,909 INFO SenderThread:629810 [file_pusher.py:join():182] waiting for file pusher +2021-07-15 01:19:01,361 INFO Thread-13 :629810 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210715_011557-15p55mwc/files/output.log +2021-07-15 01:19:01,388 INFO Thread-15 :629810 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210715_011557-15p55mwc/files/wandb-summary.json +2021-07-15 01:19:01,393 INFO Thread-12 :629810 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210715_011557-15p55mwc/files/requirements.txt +2021-07-15 01:19:01,446 INFO MainThread:629810 [internal.py:handle_exit():78] Internal process exited diff --git a/wandb/run-20210715_011557-15p55mwc/logs/debug.log b/wandb/run-20210715_011557-15p55mwc/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..d0e86d69870116822ba70deb03f64b8a324443c6 --- /dev/null +++ b/wandb/run-20210715_011557-15p55mwc/logs/debug.log @@ -0,0 +1,27 @@ +2021-07-15 01:15:57,932 INFO MainThread:628566 [wandb_setup.py:_flush():69] setting env: {} +2021-07-15 01:15:57,932 INFO MainThread:628566 [wandb_setup.py:_flush():69] setting login settings: {} +2021-07-15 01:15:57,932 INFO MainThread:628566 [wandb_init.py:_log_setup():337] Logging user logs to /home/dat/pino-roberta-base/wandb/run-20210715_011557-15p55mwc/logs/debug.log +2021-07-15 01:15:57,932 INFO MainThread:628566 [wandb_init.py:_log_setup():338] Logging internal logs to /home/dat/pino-roberta-base/wandb/run-20210715_011557-15p55mwc/logs/debug-internal.log +2021-07-15 01:15:57,932 INFO MainThread:628566 [wandb_init.py:init():370] calling init triggers +2021-07-15 01:15:57,932 INFO MainThread:628566 [wandb_init.py:init():375] wandb.init called with sweep_config: {} +config: {} +2021-07-15 01:15:57,932 INFO MainThread:628566 [wandb_init.py:init():419] starting backend +2021-07-15 01:15:57,933 INFO MainThread:628566 [backend.py:_multiprocessing_setup():70] multiprocessing start_methods=fork,spawn,forkserver, using: spawn +2021-07-15 01:15:57,979 INFO MainThread:628566 [backend.py:ensure_launched():135] starting backend process... +2021-07-15 01:15:58,026 INFO MainThread:628566 [backend.py:ensure_launched():139] started backend process with pid: 629810 +2021-07-15 01:15:58,027 INFO MainThread:628566 [wandb_init.py:init():424] backend started and connected +2021-07-15 01:15:58,031 INFO MainThread:628566 [wandb_init.py:init():472] updated telemetry +2021-07-15 01:15:58,032 INFO MainThread:628566 [wandb_init.py:init():491] communicating current version +2021-07-15 01:15:58,662 INFO MainThread:628566 [wandb_init.py:init():496] got version response +2021-07-15 01:15:58,662 INFO MainThread:628566 [wandb_init.py:init():504] communicating run to backend with 30 second timeout +2021-07-15 01:15:58,831 INFO MainThread:628566 [wandb_init.py:init():529] starting run threads in backend +2021-07-15 01:15:59,965 INFO MainThread:628566 [wandb_run.py:_console_start():1623] atexit reg +2021-07-15 01:15:59,966 INFO MainThread:628566 [wandb_run.py:_redirect():1497] redirect: SettingsConsole.REDIRECT +2021-07-15 01:15:59,966 INFO MainThread:628566 [wandb_run.py:_redirect():1502] Redirecting console. +2021-07-15 01:15:59,968 INFO MainThread:628566 [wandb_run.py:_redirect():1558] Redirects installed. +2021-07-15 01:15:59,969 INFO MainThread:628566 [wandb_init.py:init():554] run started, returning control to user process +2021-07-15 01:15:59,975 INFO MainThread:628566 [wandb_run.py:_config_callback():872] config_cb None None {'output_dir': './', 'overwrite_output_dir': True, 'do_train': False, 'do_eval': False, 'do_predict': False, 'evaluation_strategy': 'IntervalStrategy.NO', 'prediction_loss_only': False, 'per_device_train_batch_size': 2, 'per_device_eval_batch_size': 2, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 4, 'eval_accumulation_steps': None, 'learning_rate': 5e-05, 'weight_decay': 0.0095, 'adam_beta1': 0.9, 'adam_beta2': 0.98, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 5.0, 'max_steps': -1, 'lr_scheduler_type': 'SchedulerType.LINEAR', 'warmup_ratio': 0.0, 'warmup_steps': 5000, 'log_level': -1, 'log_level_replica': -1, 'log_on_each_node': True, 'logging_dir': './runs/Jul15_01-15-49_t1v-n-f5c06ea1-w-0', 'logging_strategy': 'IntervalStrategy.STEPS', 'logging_first_step': False, 'logging_steps': 50, 'save_strategy': 'IntervalStrategy.STEPS', 'save_steps': 20000, 'save_total_limit': 5, 'save_on_each_node': False, 'no_cuda': False, 'seed': 42, 'fp16': False, 'fp16_opt_level': 'O1', 'fp16_backend': 'auto', 'fp16_full_eval': False, 'local_rank': -1, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 20000, 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': './', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'sharded_ddp': [], 'deepspeed': None, 'label_smoothing_factor': 0.0, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'dataloader_pin_memory': True, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'push_to_hub_model_id': '', 'push_to_hub_organization': None, 'push_to_hub_token': None, 'mp_parameters': ''} +2021-07-15 01:15:59,976 INFO MainThread:628566 [wandb_run.py:_config_callback():872] config_cb None None {'model_name_or_path': None, 'model_type': 'big_bird', 'config_name': './', 'tokenizer_name': './', 'cache_dir': None, 'use_fast_tokenizer': True, 'dtype': 'float32', 'save_optimizer': True} +2021-07-15 01:15:59,978 INFO MainThread:628566 [wandb_run.py:_config_callback():872] config_cb None None {'dataset_name': None, 'dataset_config_name': None, 'train_ref_file': None, 'validation_ref_file': None, 'overwrite_cache': False, 'validation_split_percentage': 5, 'max_seq_length': 4096, 'preprocessing_num_workers': 96, 'mlm_probability': 0.15, 'pad_to_max_length': False, 'line_by_line': False, 'max_eval_samples': 1000} +2021-07-15 01:18:59,334 INFO MainThread:628566 [wandb_run.py:_atexit_cleanup():1593] got exitcode: 255 +2021-07-15 01:18:59,334 INFO MainThread:628566 [wandb_run.py:_restore():1565] restore diff --git a/wandb/run-20210715_011557-15p55mwc/run-15p55mwc.wandb b/wandb/run-20210715_011557-15p55mwc/run-15p55mwc.wandb new file mode 100644 index 0000000000000000000000000000000000000000..bd4771f3a0e656a3096cb946cc4c09bf28d3c7a0 Binary files /dev/null and b/wandb/run-20210715_011557-15p55mwc/run-15p55mwc.wandb differ diff --git a/wandb/run-20210715_012026-kk67hlli/files/config.yaml b/wandb/run-20210715_012026-kk67hlli/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..88189bb1393ff167d50574e845e7528decd87e74 --- /dev/null +++ b/wandb/run-20210715_012026-kk67hlli/files/config.yaml @@ -0,0 +1,307 @@ +wandb_version: 1 + +_wandb: + desc: null + value: + cli_version: 0.10.33 + framework: huggingface + huggingface_version: 4.9.0.dev0 + is_jupyter_run: false + is_kaggle_kernel: false + python_version: 3.8.10 + t: + 1: + - 3 + - 11 + 2: + - 3 + - 11 + 4: 3.8.10 + 5: 0.10.33 + 6: 4.9.0.dev0 + 8: + - 5 +adafactor: + desc: null + value: false +adam_beta1: + desc: null + value: 0.9 +adam_beta2: + desc: null + value: 0.98 +adam_epsilon: + desc: null + value: 1.0e-08 +cache_dir: + desc: null + value: null +config_name: + desc: null + value: ./ +dataloader_drop_last: + desc: null + value: false +dataloader_num_workers: + desc: null + value: 0 +dataloader_pin_memory: + desc: null + value: true +dataset_config_name: + desc: null + value: null +dataset_name: + desc: null + value: null +ddp_find_unused_parameters: + desc: null + value: null +debug: + desc: null + value: [] +deepspeed: + desc: null + value: null +disable_tqdm: + desc: null + value: false +do_eval: + desc: null + value: false +do_predict: + desc: null + value: false +do_train: + desc: null + value: false +dtype: + desc: null + value: float32 +eval_accumulation_steps: + desc: null + value: null +eval_steps: + desc: null + value: 20000 +evaluation_strategy: + desc: null + value: IntervalStrategy.NO +fp16: + desc: null + value: false +fp16_backend: + desc: null + value: auto +fp16_full_eval: + desc: null + value: false +fp16_opt_level: + desc: null + value: O1 +gradient_accumulation_steps: + desc: null + value: 4 +greater_is_better: + desc: null + value: null +group_by_length: + desc: null + value: false +ignore_data_skip: + desc: null + value: false +label_names: + desc: null + value: null +label_smoothing_factor: + desc: null + value: 0.0 +learning_rate: + desc: null + value: 5.0e-05 +length_column_name: + desc: null + value: length +line_by_line: + desc: null + value: false +load_best_model_at_end: + desc: null + value: false +local_rank: + desc: null + value: -1 +log_level: + desc: null + value: -1 +log_level_replica: + desc: null + value: -1 +log_on_each_node: + desc: null + value: true +logging_dir: + desc: null + value: ./runs/Jul15_01-20-17_t1v-n-f5c06ea1-w-0 +logging_first_step: + desc: null + value: false +logging_steps: + desc: null + value: 100 +logging_strategy: + desc: null + value: IntervalStrategy.STEPS +lr_scheduler_type: + desc: null + value: SchedulerType.LINEAR +max_eval_samples: + desc: null + value: 500 +max_grad_norm: + desc: null + value: 1.0 +max_seq_length: + desc: null + value: 4096 +max_steps: + desc: null + value: -1 +metric_for_best_model: + desc: null + value: null +mlm_probability: + desc: null + value: 0.15 +model_name_or_path: + desc: null + value: null +model_type: + desc: null + value: big_bird +mp_parameters: + desc: null + value: '' +no_cuda: + desc: null + value: false +num_train_epochs: + desc: null + value: 5.0 +output_dir: + desc: null + value: ./ +overwrite_cache: + desc: null + value: false +overwrite_output_dir: + desc: null + value: true +pad_to_max_length: + desc: null + value: false +past_index: + desc: null + value: -1 +per_device_eval_batch_size: + desc: null + value: 2 +per_device_train_batch_size: + desc: null + value: 2 +per_gpu_eval_batch_size: + desc: null + value: null +per_gpu_train_batch_size: + desc: null + value: null +prediction_loss_only: + desc: null + value: false +preprocessing_num_workers: + desc: null + value: 96 +push_to_hub: + desc: null + value: true +push_to_hub_model_id: + desc: null + value: '' +push_to_hub_organization: + desc: null + value: null +push_to_hub_token: + desc: null + value: null +remove_unused_columns: + desc: null + value: true +report_to: + desc: null + value: + - tensorboard + - wandb +resume_from_checkpoint: + desc: null + value: null +run_name: + desc: null + value: ./ +save_on_each_node: + desc: null + value: false +save_optimizer: + desc: null + value: true +save_steps: + desc: null + value: 20000 +save_strategy: + desc: null + value: IntervalStrategy.STEPS +save_total_limit: + desc: null + value: 5 +seed: + desc: null + value: 42 +sharded_ddp: + desc: null + value: [] +skip_memory_metrics: + desc: null + value: true +tokenizer_name: + desc: null + value: ./ +tpu_metrics_debug: + desc: null + value: false +tpu_num_cores: + desc: null + value: null +train_ref_file: + desc: null + value: null +use_fast_tokenizer: + desc: null + value: true +use_legacy_prediction_loop: + desc: null + value: false +validation_ref_file: + desc: null + value: null +validation_split_percentage: + desc: null + value: 5 +warmup_ratio: + desc: null + value: 0.0 +warmup_steps: + desc: null + value: 5000 +weight_decay: + desc: null + value: 0.0095 diff --git a/wandb/run-20210715_012026-kk67hlli/files/output.log b/wandb/run-20210715_012026-kk67hlli/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..2f93bde064bb1439324831a17603a874bae3298a --- /dev/null +++ b/wandb/run-20210715_012026-kk67hlli/files/output.log @@ -0,0 +1,39 @@ +/home/dat/pino/lib/python3.8/site-packages/jax/_src/numpy/lax_numpy.py:3132: UserWarning: Explicitly requested dtype requested in zeros is not available, and will be truncated to dtype int32. To enable more dtypes, set the jax_enable_x64 configuration option or the JAX_ENABLE_X64 shell environment variable. See https://github.com/google/jax#current-gotchas for more. + lax._check_user_dtype_supported(dtype, "zeros") +/home/dat/pino/lib/python3.8/site-packages/jax/lib/xla_bridge.py:386: UserWarning: jax.host_count has been renamed to jax.process_count. This alias will eventually be removed; please update your code. + warnings.warn( +/home/dat/pino/lib/python3.8/site-packages/jax/lib/xla_bridge.py:373: UserWarning: jax.host_id has been renamed to jax.process_index. This alias will eventually be removed; please update your code. + warnings.warn( +Epoch ... (1/5): 0%| | 0/5 [00:00 + state, train_metric, dropout_rngs = p_train_step(state, model_inputs, dropout_rngs) + File "/home/dat/pino/lib/python3.8/site-packages/jax/_src/traceback_util.py", line 183, in reraise_with_filtered_traceback + return fun(*args, **kwargs) + File "/home/dat/pino/lib/python3.8/site-packages/jax/_src/api.py", line 1669, in f_pmapped + out = pxla.xla_pmap( + File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 1620, in bind + return call_bind(self, fun, *args, **params) + File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 1551, in call_bind + outs = primitive.process(top_trace, fun, tracers, params) + File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 1623, in process + return trace.process_map(self, fun, tracers, params) + File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 606, in process_call + return primitive.impl(f, *tracers, **params) + File "/home/dat/pino/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 637, in xla_pmap_impl + return compiled_fun(*args) + File "/home/dat/pino/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 1159, in execute_replicated + out_bufs = compiled.execute_sharded_on_local_devices(input_bufs) +jax._src.traceback_util.UnfilteredStackTrace: RuntimeError: Resource exhausted: Attempting to reserve 12.60G at the bottom of memory. That was not possible. There are 10.49G free, 0B reserved, and 10.46G reservable.: while running replica 0 and partition 0 of a replicated computation (other replicas may have failed as well). +The stack trace below excludes JAX-internal frames. +The preceding is the original exception that occurred, unmodified. +-------------------- +The above exception was the direct cause of the following exception: +Traceback (most recent call last): + File "./run_mlm_flax.py", line 815, in + state, train_metric, dropout_rngs = p_train_step(state, model_inputs, dropout_rngs) + File "/home/dat/pino/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 1159, in execute_replicated + out_bufs = compiled.execute_sharded_on_local_devices(input_bufs) +RuntimeError: Resource exhausted: Attempting to reserve 12.60G at the bottom of memory. That was not possible. There are 10.49G free, 0B reserved, and 10.46G reservable.: while running replica 0 and partition 0 of a replicated computation (other replicas may have failed as well). \ No newline at end of file diff --git a/wandb/run-20210715_012026-kk67hlli/files/requirements.txt b/wandb/run-20210715_012026-kk67hlli/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..554e3a34bc91714a8462f65295a12e9a04537637 --- /dev/null +++ b/wandb/run-20210715_012026-kk67hlli/files/requirements.txt @@ -0,0 +1,94 @@ +absl-py==0.13.0 +aiohttp==3.7.4.post0 +astunparse==1.6.3 +async-timeout==3.0.1 +attrs==21.2.0 +cachetools==4.2.2 +certifi==2021.5.30 +chardet==4.0.0 +charset-normalizer==2.0.1 +chex==0.0.8 +click==8.0.1 +configparser==5.0.2 +cycler==0.10.0 +datasets==1.9.1.dev0 +dill==0.3.4 +dm-tree==0.1.6 +docker-pycreds==0.4.0 +filelock==3.0.12 +flatbuffers==1.12 +flax==0.3.4 +fsspec==2021.7.0 +gast==0.4.0 +gitdb==4.0.7 +gitpython==3.1.18 +google-auth-oauthlib==0.4.4 +google-auth==1.32.1 +google-pasta==0.2.0 +grpcio==1.34.1 +h5py==3.1.0 +huggingface-hub==0.0.12 +idna==3.2 +install==1.3.4 +jax==0.2.17 +jaxlib==0.1.68 +joblib==1.0.1 +keras-nightly==2.5.0.dev2021032900 +keras-preprocessing==1.1.2 +kiwisolver==1.3.1 +libtpu-nightly==0.1.dev20210615 +markdown==3.3.4 +matplotlib==3.4.2 +msgpack==1.0.2 +multidict==5.1.0 +multiprocess==0.70.12.2 +numpy==1.19.5 +oauthlib==3.1.1 +opt-einsum==3.3.0 +optax==0.0.9 +packaging==21.0 +pandas==1.3.0 +pathtools==0.1.2 +pillow==8.3.1 +pip==20.0.2 +pkg-resources==0.0.0 +promise==2.3 +protobuf==3.17.3 +psutil==5.8.0 +pyarrow==4.0.1 +pyasn1-modules==0.2.8 +pyasn1==0.4.8 +pyparsing==2.4.7 +python-dateutil==2.8.1 +pytz==2021.1 +pyyaml==5.4.1 +regex==2021.7.6 +requests-oauthlib==1.3.0 +requests==2.26.0 +rsa==4.7.2 +sacremoses==0.0.45 +scipy==1.7.0 +sentry-sdk==1.3.0 +setuptools==44.0.0 +shortuuid==1.0.1 +six==1.15.0 +smmap==4.0.0 +subprocess32==3.5.4 +tensorboard-data-server==0.6.1 +tensorboard-plugin-wit==1.8.0 +tensorboard==2.5.0 +tensorflow-estimator==2.5.0 +tensorflow==2.5.0 +termcolor==1.1.0 +tokenizers==0.10.3 +toolz==0.11.1 +tqdm==4.61.2 +transformers==4.9.0.dev0 +typing-extensions==3.7.4.3 +urllib3==1.26.6 +wandb==0.10.33 +werkzeug==2.0.1 +wheel==0.36.2 +wrapt==1.12.1 +xxhash==2.0.2 +yarl==1.6.3 \ No newline at end of file diff --git a/wandb/run-20210715_012026-kk67hlli/files/wandb-metadata.json b/wandb/run-20210715_012026-kk67hlli/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..7d03b2ea6c05b6056508f481554ac95c1547e4ac --- /dev/null +++ b/wandb/run-20210715_012026-kk67hlli/files/wandb-metadata.json @@ -0,0 +1,45 @@ +{ + "os": "Linux-5.4.0-1043-gcp-x86_64-with-glibc2.29", + "python": "3.8.10", + "heartbeatAt": "2021-07-15T01:20:28.191847", + "startedAt": "2021-07-15T01:20:26.082406", + "docker": null, + "cpu_count": 96, + "cuda": null, + "args": [ + "--push_to_hub", + "--output_dir=./", + "--model_type=big_bird", + "--config_name=./", + "--tokenizer_name=./", + "--max_seq_length=4096", + "--weight_decay=0.0095", + "--warmup_steps=5000", + "--overwrite_output_dir", + "--adam_beta1=0.9", + "--adam_beta2=0.98", + "--logging_steps=100", + "--eval_steps=20000", + "--num_train_epochs=5", + "--preprocessing_num_workers=96", + "--save_steps=20000", + "--learning_rate=5e-5", + "--per_device_train_batch_size=2", + "--per_device_eval_batch_size=2", + "--save_total_limit=5", + "--max_eval_samples=500", + "--gradient_accumulation_steps=4" + ], + "state": "running", + "program": "./run_mlm_flax.py", + "codePath": "run_mlm_flax.py", + "git": { + "remote": "https://huggingface.co/flax-community/pino-roberta-base", + "commit": "9915204ae8501d6ae8425aa8f0b4b290cbc7e5b6" + }, + "email": null, + "root": "/home/dat/pino-roberta-base", + "host": "t1v-n-f5c06ea1-w-0", + "username": "dat", + "executable": "/home/dat/pino/bin/python" +} diff --git a/wandb/run-20210715_012026-kk67hlli/files/wandb-summary.json b/wandb/run-20210715_012026-kk67hlli/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/wandb/run-20210715_012026-kk67hlli/files/wandb-summary.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/wandb/run-20210715_012026-kk67hlli/logs/debug-internal.log b/wandb/run-20210715_012026-kk67hlli/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..f66c564251fe25da2f79b274fbe9c2ad94eb86dc --- /dev/null +++ b/wandb/run-20210715_012026-kk67hlli/logs/debug-internal.log @@ -0,0 +1,185 @@ +2021-07-15 01:20:26,743 INFO MainThread:631520 [internal.py:wandb_internal():88] W&B internal server running at pid: 631520, started at: 2021-07-15 01:20:26.742952 +2021-07-15 01:20:26,745 DEBUG HandlerThread:631520 [handler.py:handle_request():124] handle_request: check_version +2021-07-15 01:20:26,745 INFO WriterThread:631520 [datastore.py:open_for_write():80] open: /home/dat/pino-roberta-base/wandb/run-20210715_012026-kk67hlli/run-kk67hlli.wandb +2021-07-15 01:20:26,746 DEBUG SenderThread:631520 [sender.py:send():179] send: header +2021-07-15 01:20:26,746 DEBUG SenderThread:631520 [sender.py:send_request():193] send_request: check_version +2021-07-15 01:20:26,781 DEBUG SenderThread:631520 [sender.py:send():179] send: run +2021-07-15 01:20:26,945 INFO SenderThread:631520 [dir_watcher.py:__init__():168] watching files in: /home/dat/pino-roberta-base/wandb/run-20210715_012026-kk67hlli/files +2021-07-15 01:20:26,945 INFO SenderThread:631520 [sender.py:_start_run_threads():716] run started: kk67hlli with start time 1626312026 +2021-07-15 01:20:27,089 DEBUG SenderThread:631520 [sender.py:send():179] send: summary +2021-07-15 01:20:27,090 INFO SenderThread:631520 [sender.py:_save_file():841] saving file wandb-summary.json with policy end +2021-07-15 01:20:27,091 DEBUG HandlerThread:631520 [handler.py:handle_request():124] handle_request: run_start +2021-07-15 01:20:27,947 INFO Thread-8 :631520 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210715_012026-kk67hlli/files/wandb-summary.json +2021-07-15 01:20:28,191 DEBUG HandlerThread:631520 [meta.py:__init__():39] meta init +2021-07-15 01:20:28,191 DEBUG HandlerThread:631520 [meta.py:__init__():53] meta init done +2021-07-15 01:20:28,191 DEBUG HandlerThread:631520 [meta.py:probe():210] probe +2021-07-15 01:20:28,192 DEBUG HandlerThread:631520 [meta.py:_setup_git():200] setup git +2021-07-15 01:20:28,220 DEBUG HandlerThread:631520 [meta.py:_setup_git():207] setup git done +2021-07-15 01:20:28,220 DEBUG HandlerThread:631520 [meta.py:_save_pip():57] save pip +2021-07-15 01:20:28,220 DEBUG HandlerThread:631520 [meta.py:_save_pip():71] save pip done +2021-07-15 01:20:28,220 DEBUG HandlerThread:631520 [meta.py:probe():252] probe done +2021-07-15 01:20:28,223 DEBUG SenderThread:631520 [sender.py:send():179] send: files +2021-07-15 01:20:28,224 INFO SenderThread:631520 [sender.py:_save_file():841] saving file wandb-metadata.json with policy now +2021-07-15 01:20:28,232 DEBUG HandlerThread:631520 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:20:28,232 DEBUG SenderThread:631520 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:20:28,361 DEBUG SenderThread:631520 [sender.py:send():179] send: config +2021-07-15 01:20:28,361 DEBUG SenderThread:631520 [sender.py:send():179] send: config +2021-07-15 01:20:28,361 DEBUG SenderThread:631520 [sender.py:send():179] send: config +2021-07-15 01:20:28,661 INFO Thread-11 :631520 [upload_job.py:push():137] Uploaded file /tmp/tmp1y9e8uhhwandb/1rrxsmml-wandb-metadata.json +2021-07-15 01:20:28,947 INFO Thread-8 :631520 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210715_012026-kk67hlli/files/output.log +2021-07-15 01:20:28,947 INFO Thread-8 :631520 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210715_012026-kk67hlli/files/requirements.txt +2021-07-15 01:20:28,948 INFO Thread-8 :631520 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210715_012026-kk67hlli/files/wandb-metadata.json +2021-07-15 01:20:42,953 INFO Thread-8 :631520 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_012026-kk67hlli/files/output.log +2021-07-15 01:20:43,363 DEBUG HandlerThread:631520 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:20:43,363 DEBUG SenderThread:631520 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:20:56,274 DEBUG SenderThread:631520 [sender.py:send():179] send: stats +2021-07-15 01:20:57,959 INFO Thread-8 :631520 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_012026-kk67hlli/files/config.yaml +2021-07-15 01:20:58,495 DEBUG HandlerThread:631520 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:20:58,495 DEBUG SenderThread:631520 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:21:13,626 DEBUG HandlerThread:631520 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:21:13,626 DEBUG SenderThread:631520 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:21:26,353 DEBUG SenderThread:631520 [sender.py:send():179] send: stats +2021-07-15 01:21:28,761 DEBUG HandlerThread:631520 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:21:28,761 DEBUG SenderThread:631520 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:21:43,895 DEBUG HandlerThread:631520 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:21:43,895 DEBUG SenderThread:631520 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:21:56,429 DEBUG SenderThread:631520 [sender.py:send():179] send: stats +2021-07-15 01:21:59,030 DEBUG HandlerThread:631520 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:21:59,030 DEBUG SenderThread:631520 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:22:14,166 DEBUG HandlerThread:631520 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:22:14,166 DEBUG SenderThread:631520 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:22:26,495 DEBUG SenderThread:631520 [sender.py:send():179] send: stats +2021-07-15 01:22:29,298 DEBUG HandlerThread:631520 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:22:29,299 DEBUG SenderThread:631520 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:22:44,435 DEBUG HandlerThread:631520 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:22:44,435 DEBUG SenderThread:631520 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:22:56,570 DEBUG SenderThread:631520 [sender.py:send():179] send: stats +2021-07-15 01:22:59,568 DEBUG HandlerThread:631520 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:22:59,568 DEBUG SenderThread:631520 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:23:14,721 DEBUG HandlerThread:631520 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:23:14,722 DEBUG SenderThread:631520 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:23:26,644 DEBUG SenderThread:631520 [sender.py:send():179] send: stats +2021-07-15 01:23:29,854 DEBUG HandlerThread:631520 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:23:29,855 DEBUG SenderThread:631520 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:23:44,986 DEBUG HandlerThread:631520 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:23:44,987 DEBUG SenderThread:631520 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:23:51,032 INFO Thread-8 :631520 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_012026-kk67hlli/files/output.log +2021-07-15 01:23:56,718 DEBUG SenderThread:631520 [sender.py:send():179] send: stats +2021-07-15 01:24:00,136 DEBUG HandlerThread:631520 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:24:00,136 DEBUG SenderThread:631520 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:24:15,289 DEBUG HandlerThread:631520 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:24:15,290 DEBUG SenderThread:631520 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:24:26,798 DEBUG SenderThread:631520 [sender.py:send():179] send: stats +2021-07-15 01:24:30,424 DEBUG HandlerThread:631520 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:24:30,424 DEBUG SenderThread:631520 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:24:45,556 DEBUG HandlerThread:631520 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:24:45,557 DEBUG SenderThread:631520 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:24:56,883 DEBUG SenderThread:631520 [sender.py:send():179] send: stats +2021-07-15 01:25:00,743 DEBUG HandlerThread:631520 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:25:00,743 DEBUG SenderThread:631520 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:25:13,068 INFO Thread-8 :631520 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_012026-kk67hlli/files/output.log +2021-07-15 01:25:14,553 DEBUG HandlerThread:631520 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 01:25:14,553 DEBUG SenderThread:631520 [sender.py:send():179] send: telemetry +2021-07-15 01:25:14,553 DEBUG SenderThread:631520 [sender.py:send():179] send: exit +2021-07-15 01:25:14,553 INFO SenderThread:631520 [sender.py:send_exit():287] handling exit code: 1 +2021-07-15 01:25:14,554 INFO SenderThread:631520 [sender.py:send_exit():295] send defer +2021-07-15 01:25:14,554 DEBUG SenderThread:631520 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 01:25:14,554 DEBUG HandlerThread:631520 [handler.py:handle_request():124] handle_request: defer +2021-07-15 01:25:14,555 INFO HandlerThread:631520 [handler.py:handle_request_defer():141] handle defer: 0 +2021-07-15 01:25:14,555 DEBUG SenderThread:631520 [sender.py:send_request():193] send_request: defer +2021-07-15 01:25:14,555 INFO SenderThread:631520 [sender.py:send_request_defer():304] handle sender defer: 0 +2021-07-15 01:25:14,555 INFO SenderThread:631520 [sender.py:transition_state():308] send defer: 1 +2021-07-15 01:25:14,555 DEBUG HandlerThread:631520 [handler.py:handle_request():124] handle_request: defer +2021-07-15 01:25:14,555 INFO HandlerThread:631520 [handler.py:handle_request_defer():141] handle defer: 1 +2021-07-15 01:25:14,627 DEBUG SenderThread:631520 [sender.py:send_request():193] send_request: defer +2021-07-15 01:25:14,627 INFO SenderThread:631520 [sender.py:send_request_defer():304] handle sender defer: 1 +2021-07-15 01:25:14,627 INFO SenderThread:631520 [sender.py:transition_state():308] send defer: 2 +2021-07-15 01:25:14,627 DEBUG HandlerThread:631520 [handler.py:handle_request():124] handle_request: defer +2021-07-15 01:25:14,628 INFO HandlerThread:631520 [handler.py:handle_request_defer():141] handle defer: 2 +2021-07-15 01:25:14,628 DEBUG SenderThread:631520 [sender.py:send():179] send: stats +2021-07-15 01:25:14,628 DEBUG SenderThread:631520 [sender.py:send_request():193] send_request: defer +2021-07-15 01:25:14,628 INFO SenderThread:631520 [sender.py:send_request_defer():304] handle sender defer: 2 +2021-07-15 01:25:14,628 INFO SenderThread:631520 [sender.py:transition_state():308] send defer: 3 +2021-07-15 01:25:14,628 DEBUG HandlerThread:631520 [handler.py:handle_request():124] handle_request: defer +2021-07-15 01:25:14,629 INFO HandlerThread:631520 [handler.py:handle_request_defer():141] handle defer: 3 +2021-07-15 01:25:14,629 DEBUG SenderThread:631520 [sender.py:send():179] send: summary +2021-07-15 01:25:14,629 INFO SenderThread:631520 [sender.py:_save_file():841] saving file wandb-summary.json with policy end +2021-07-15 01:25:14,629 DEBUG SenderThread:631520 [sender.py:send_request():193] send_request: defer +2021-07-15 01:25:14,629 INFO SenderThread:631520 [sender.py:send_request_defer():304] handle sender defer: 3 +2021-07-15 01:25:14,629 INFO SenderThread:631520 [sender.py:transition_state():308] send defer: 4 +2021-07-15 01:25:14,630 DEBUG HandlerThread:631520 [handler.py:handle_request():124] handle_request: defer +2021-07-15 01:25:14,630 INFO HandlerThread:631520 [handler.py:handle_request_defer():141] handle defer: 4 +2021-07-15 01:25:14,630 DEBUG SenderThread:631520 [sender.py:send_request():193] send_request: defer +2021-07-15 01:25:14,630 INFO SenderThread:631520 [sender.py:send_request_defer():304] handle sender defer: 4 +2021-07-15 01:25:14,656 DEBUG HandlerThread:631520 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 01:25:14,810 INFO SenderThread:631520 [sender.py:transition_state():308] send defer: 5 +2021-07-15 01:25:14,810 DEBUG SenderThread:631520 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 01:25:14,811 DEBUG HandlerThread:631520 [handler.py:handle_request():124] handle_request: defer +2021-07-15 01:25:14,811 INFO HandlerThread:631520 [handler.py:handle_request_defer():141] handle defer: 5 +2021-07-15 01:25:14,811 DEBUG SenderThread:631520 [sender.py:send_request():193] send_request: defer +2021-07-15 01:25:14,811 INFO SenderThread:631520 [sender.py:send_request_defer():304] handle sender defer: 5 +2021-07-15 01:25:14,811 INFO SenderThread:631520 [dir_watcher.py:finish():282] shutting down directory watcher +2021-07-15 01:25:14,912 DEBUG HandlerThread:631520 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 01:25:15,069 INFO Thread-8 :631520 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_012026-kk67hlli/files/config.yaml +2021-07-15 01:25:15,070 INFO SenderThread:631520 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_012026-kk67hlli/files/wandb-summary.json +2021-07-15 01:25:15,070 INFO SenderThread:631520 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_012026-kk67hlli/files/output.log +2021-07-15 01:25:15,070 INFO SenderThread:631520 [dir_watcher.py:finish():312] scan: /home/dat/pino-roberta-base/wandb/run-20210715_012026-kk67hlli/files +2021-07-15 01:25:15,070 INFO SenderThread:631520 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_012026-kk67hlli/files/requirements.txt requirements.txt +2021-07-15 01:25:15,070 INFO SenderThread:631520 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_012026-kk67hlli/files/output.log output.log +2021-07-15 01:25:15,070 INFO SenderThread:631520 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_012026-kk67hlli/files/wandb-metadata.json wandb-metadata.json +2021-07-15 01:25:15,071 INFO SenderThread:631520 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_012026-kk67hlli/files/config.yaml config.yaml +2021-07-15 01:25:15,071 INFO SenderThread:631520 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_012026-kk67hlli/files/wandb-summary.json wandb-summary.json +2021-07-15 01:25:15,074 INFO SenderThread:631520 [sender.py:transition_state():308] send defer: 6 +2021-07-15 01:25:15,074 DEBUG SenderThread:631520 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 01:25:15,081 DEBUG HandlerThread:631520 [handler.py:handle_request():124] handle_request: defer +2021-07-15 01:25:15,081 INFO HandlerThread:631520 [handler.py:handle_request_defer():141] handle defer: 6 +2021-07-15 01:25:15,084 DEBUG SenderThread:631520 [sender.py:send_request():193] send_request: defer +2021-07-15 01:25:15,085 INFO SenderThread:631520 [sender.py:send_request_defer():304] handle sender defer: 6 +2021-07-15 01:25:15,085 INFO SenderThread:631520 [file_pusher.py:finish():177] shutting down file pusher +2021-07-15 01:25:15,179 DEBUG HandlerThread:631520 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 01:25:15,179 DEBUG SenderThread:631520 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 01:25:15,281 DEBUG HandlerThread:631520 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 01:25:15,281 DEBUG SenderThread:631520 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 01:25:15,383 DEBUG HandlerThread:631520 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 01:25:15,383 DEBUG SenderThread:631520 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 01:25:15,484 DEBUG HandlerThread:631520 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 01:25:15,485 DEBUG SenderThread:631520 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 01:25:15,527 INFO Thread-12 :631520 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210715_012026-kk67hlli/files/requirements.txt +2021-07-15 01:25:15,528 INFO Thread-13 :631520 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210715_012026-kk67hlli/files/output.log +2021-07-15 01:25:15,529 INFO Thread-14 :631520 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210715_012026-kk67hlli/files/config.yaml +2021-07-15 01:25:15,561 INFO Thread-15 :631520 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210715_012026-kk67hlli/files/wandb-summary.json +2021-07-15 01:25:15,586 DEBUG HandlerThread:631520 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 01:25:15,587 DEBUG SenderThread:631520 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 01:25:15,688 DEBUG HandlerThread:631520 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 01:25:15,689 DEBUG SenderThread:631520 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 01:25:15,762 INFO Thread-7 :631520 [sender.py:transition_state():308] send defer: 7 +2021-07-15 01:25:15,762 DEBUG HandlerThread:631520 [handler.py:handle_request():124] handle_request: defer +2021-07-15 01:25:15,762 INFO HandlerThread:631520 [handler.py:handle_request_defer():141] handle defer: 7 +2021-07-15 01:25:15,763 DEBUG SenderThread:631520 [sender.py:send_request():193] send_request: defer +2021-07-15 01:25:15,763 INFO SenderThread:631520 [sender.py:send_request_defer():304] handle sender defer: 7 +2021-07-15 01:25:15,790 DEBUG HandlerThread:631520 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 01:25:16,035 INFO SenderThread:631520 [sender.py:transition_state():308] send defer: 8 +2021-07-15 01:25:16,035 DEBUG SenderThread:631520 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 01:25:16,036 DEBUG HandlerThread:631520 [handler.py:handle_request():124] handle_request: defer +2021-07-15 01:25:16,036 INFO HandlerThread:631520 [handler.py:handle_request_defer():141] handle defer: 8 +2021-07-15 01:25:16,036 DEBUG SenderThread:631520 [sender.py:send_request():193] send_request: defer +2021-07-15 01:25:16,036 INFO SenderThread:631520 [sender.py:send_request_defer():304] handle sender defer: 8 +2021-07-15 01:25:16,036 INFO SenderThread:631520 [sender.py:transition_state():308] send defer: 9 +2021-07-15 01:25:16,037 DEBUG HandlerThread:631520 [handler.py:handle_request():124] handle_request: defer +2021-07-15 01:25:16,037 INFO HandlerThread:631520 [handler.py:handle_request_defer():141] handle defer: 9 +2021-07-15 01:25:16,037 DEBUG SenderThread:631520 [sender.py:send():179] send: final +2021-07-15 01:25:16,037 DEBUG SenderThread:631520 [sender.py:send():179] send: footer +2021-07-15 01:25:16,037 DEBUG SenderThread:631520 [sender.py:send_request():193] send_request: defer +2021-07-15 01:25:16,037 INFO SenderThread:631520 [sender.py:send_request_defer():304] handle sender defer: 9 +2021-07-15 01:25:16,137 DEBUG HandlerThread:631520 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 01:25:16,138 DEBUG SenderThread:631520 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 01:25:16,138 INFO SenderThread:631520 [file_pusher.py:join():182] waiting for file pusher +2021-07-15 01:25:16,139 DEBUG HandlerThread:631520 [handler.py:handle_request():124] handle_request: get_summary +2021-07-15 01:25:16,140 DEBUG HandlerThread:631520 [handler.py:handle_request():124] handle_request: sampled_history +2021-07-15 01:25:16,140 DEBUG HandlerThread:631520 [handler.py:handle_request():124] handle_request: shutdown +2021-07-15 01:25:16,140 INFO HandlerThread:631520 [handler.py:finish():638] shutting down handler +2021-07-15 01:25:17,037 INFO WriterThread:631520 [datastore.py:close():288] close: /home/dat/pino-roberta-base/wandb/run-20210715_012026-kk67hlli/run-kk67hlli.wandb +2021-07-15 01:25:17,138 INFO SenderThread:631520 [sender.py:finish():945] shutting down sender +2021-07-15 01:25:17,138 INFO SenderThread:631520 [file_pusher.py:finish():177] shutting down file pusher +2021-07-15 01:25:17,138 INFO SenderThread:631520 [file_pusher.py:join():182] waiting for file pusher +2021-07-15 01:25:17,141 INFO MainThread:631520 [internal.py:handle_exit():78] Internal process exited diff --git a/wandb/run-20210715_012026-kk67hlli/logs/debug.log b/wandb/run-20210715_012026-kk67hlli/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..4a6010dd8bed0be68b2809ab4c39fdd90c6f99a3 --- /dev/null +++ b/wandb/run-20210715_012026-kk67hlli/logs/debug.log @@ -0,0 +1,119 @@ +2021-07-15 01:20:26,084 INFO MainThread:630273 [wandb_setup.py:_flush():69] setting env: {} +2021-07-15 01:20:26,084 INFO MainThread:630273 [wandb_setup.py:_flush():69] setting login settings: {} +2021-07-15 01:20:26,084 INFO MainThread:630273 [wandb_init.py:_log_setup():337] Logging user logs to /home/dat/pino-roberta-base/wandb/run-20210715_012026-kk67hlli/logs/debug.log +2021-07-15 01:20:26,084 INFO MainThread:630273 [wandb_init.py:_log_setup():338] Logging internal logs to /home/dat/pino-roberta-base/wandb/run-20210715_012026-kk67hlli/logs/debug-internal.log +2021-07-15 01:20:26,084 INFO MainThread:630273 [wandb_init.py:init():370] calling init triggers +2021-07-15 01:20:26,084 INFO MainThread:630273 [wandb_init.py:init():375] wandb.init called with sweep_config: {} +config: {} +2021-07-15 01:20:26,084 INFO MainThread:630273 [wandb_init.py:init():419] starting backend +2021-07-15 01:20:26,084 INFO MainThread:630273 [backend.py:_multiprocessing_setup():70] multiprocessing start_methods=fork,spawn,forkserver, using: spawn +2021-07-15 01:20:26,127 INFO MainThread:630273 [backend.py:ensure_launched():135] starting backend process... +2021-07-15 01:20:26,169 INFO MainThread:630273 [backend.py:ensure_launched():139] started backend process with pid: 631520 +2021-07-15 01:20:26,171 INFO MainThread:630273 [wandb_init.py:init():424] backend started and connected +2021-07-15 01:20:26,174 INFO MainThread:630273 [wandb_init.py:init():472] updated telemetry +2021-07-15 01:20:26,175 INFO MainThread:630273 [wandb_init.py:init():491] communicating current version +2021-07-15 01:20:26,780 INFO MainThread:630273 [wandb_init.py:init():496] got version response +2021-07-15 01:20:26,780 INFO MainThread:630273 [wandb_init.py:init():504] communicating run to backend with 30 second timeout +2021-07-15 01:20:27,090 INFO MainThread:630273 [wandb_init.py:init():529] starting run threads in backend +2021-07-15 01:20:28,227 INFO MainThread:630273 [wandb_run.py:_console_start():1623] atexit reg +2021-07-15 01:20:28,227 INFO MainThread:630273 [wandb_run.py:_redirect():1497] redirect: SettingsConsole.REDIRECT +2021-07-15 01:20:28,228 INFO MainThread:630273 [wandb_run.py:_redirect():1502] Redirecting console. +2021-07-15 01:20:28,230 INFO MainThread:630273 [wandb_run.py:_redirect():1558] Redirects installed. +2021-07-15 01:20:28,230 INFO MainThread:630273 [wandb_init.py:init():554] run started, returning control to user process +2021-07-15 01:20:28,236 INFO MainThread:630273 [wandb_run.py:_config_callback():872] config_cb None None {'output_dir': './', 'overwrite_output_dir': True, 'do_train': False, 'do_eval': False, 'do_predict': False, 'evaluation_strategy': 'IntervalStrategy.NO', 'prediction_loss_only': False, 'per_device_train_batch_size': 2, 'per_device_eval_batch_size': 2, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 4, 'eval_accumulation_steps': None, 'learning_rate': 5e-05, 'weight_decay': 0.0095, 'adam_beta1': 0.9, 'adam_beta2': 0.98, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 5.0, 'max_steps': -1, 'lr_scheduler_type': 'SchedulerType.LINEAR', 'warmup_ratio': 0.0, 'warmup_steps': 5000, 'log_level': -1, 'log_level_replica': -1, 'log_on_each_node': True, 'logging_dir': './runs/Jul15_01-20-17_t1v-n-f5c06ea1-w-0', 'logging_strategy': 'IntervalStrategy.STEPS', 'logging_first_step': False, 'logging_steps': 100, 'save_strategy': 'IntervalStrategy.STEPS', 'save_steps': 20000, 'save_total_limit': 5, 'save_on_each_node': False, 'no_cuda': False, 'seed': 42, 'fp16': False, 'fp16_opt_level': 'O1', 'fp16_backend': 'auto', 'fp16_full_eval': False, 'local_rank': -1, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 20000, 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': './', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'sharded_ddp': [], 'deepspeed': None, 'label_smoothing_factor': 0.0, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'dataloader_pin_memory': True, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'push_to_hub_model_id': '', 'push_to_hub_organization': None, 'push_to_hub_token': None, 'mp_parameters': ''} +2021-07-15 01:20:28,237 INFO MainThread:630273 [wandb_run.py:_config_callback():872] config_cb None None {'model_name_or_path': None, 'model_type': 'big_bird', 'config_name': './', 'tokenizer_name': './', 'cache_dir': None, 'use_fast_tokenizer': True, 'dtype': 'float32', 'save_optimizer': True} +2021-07-15 01:20:28,239 INFO MainThread:630273 [wandb_run.py:_config_callback():872] config_cb None None {'dataset_name': None, 'dataset_config_name': None, 'train_ref_file': None, 'validation_ref_file': None, 'overwrite_cache': False, 'validation_split_percentage': 5, 'max_seq_length': 4096, 'preprocessing_num_workers': 96, 'mlm_probability': 0.15, 'pad_to_max_length': False, 'line_by_line': False, 'max_eval_samples': 500} +2021-07-15 01:25:12,329 INFO MainThread:630273 [wandb_run.py:_atexit_cleanup():1593] got exitcode: 1 +2021-07-15 01:25:12,330 INFO MainThread:630273 [wandb_run.py:_restore():1565] restore +2021-07-15 01:25:14,554 INFO MainThread:630273 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 1 +} +pusher_stats { + uploaded_bytes: 1400 + total_bytes: 1400 +} + +2021-07-15 01:25:14,811 INFO MainThread:630273 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 1 +} +pusher_stats { + uploaded_bytes: 1400 + total_bytes: 1400 +} + +2021-07-15 01:25:15,078 INFO MainThread:630273 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 3 +} +pusher_stats { + uploaded_bytes: 1400 + total_bytes: 7059 +} + +2021-07-15 01:25:15,180 INFO MainThread:630273 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 1400 + total_bytes: 11671 +} + +2021-07-15 01:25:15,282 INFO MainThread:630273 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 11671 + total_bytes: 11671 +} + +2021-07-15 01:25:15,384 INFO MainThread:630273 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 11671 + total_bytes: 11671 +} + +2021-07-15 01:25:15,485 INFO MainThread:630273 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 11671 + total_bytes: 11671 +} + +2021-07-15 01:25:15,587 INFO MainThread:630273 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 11671 + total_bytes: 11671 +} + +2021-07-15 01:25:15,689 INFO MainThread:630273 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 11671 + total_bytes: 11671 +} + +2021-07-15 01:25:16,036 INFO MainThread:630273 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 11671 + total_bytes: 11671 +} + +2021-07-15 01:25:16,138 INFO MainThread:630273 [wandb_run.py:_wait_for_finish():1715] got exit ret: done: true +exit_result { +} +file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 11671 + total_bytes: 11671 +} + +2021-07-15 01:25:17,400 INFO MainThread:630273 [wandb_run.py:_show_files():1937] logging synced files diff --git a/wandb/run-20210715_012026-kk67hlli/run-kk67hlli.wandb b/wandb/run-20210715_012026-kk67hlli/run-kk67hlli.wandb new file mode 100644 index 0000000000000000000000000000000000000000..09d3019c23334331d7ca0e1f9b626831e331ad37 Binary files /dev/null and b/wandb/run-20210715_012026-kk67hlli/run-kk67hlli.wandb differ diff --git a/wandb/run-20210715_012543-1l1p8ztt/files/config.yaml b/wandb/run-20210715_012543-1l1p8ztt/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..befc8b00f6a40e6c540466f2ccfc5a8f4f5c20c5 --- /dev/null +++ b/wandb/run-20210715_012543-1l1p8ztt/files/config.yaml @@ -0,0 +1,307 @@ +wandb_version: 1 + +_wandb: + desc: null + value: + cli_version: 0.10.33 + framework: huggingface + huggingface_version: 4.9.0.dev0 + is_jupyter_run: false + is_kaggle_kernel: false + python_version: 3.8.10 + t: + 1: + - 3 + - 11 + 2: + - 3 + - 11 + 4: 3.8.10 + 5: 0.10.33 + 6: 4.9.0.dev0 + 8: + - 5 +adafactor: + desc: null + value: false +adam_beta1: + desc: null + value: 0.9 +adam_beta2: + desc: null + value: 0.98 +adam_epsilon: + desc: null + value: 1.0e-08 +cache_dir: + desc: null + value: null +config_name: + desc: null + value: ./ +dataloader_drop_last: + desc: null + value: false +dataloader_num_workers: + desc: null + value: 0 +dataloader_pin_memory: + desc: null + value: true +dataset_config_name: + desc: null + value: null +dataset_name: + desc: null + value: null +ddp_find_unused_parameters: + desc: null + value: null +debug: + desc: null + value: [] +deepspeed: + desc: null + value: null +disable_tqdm: + desc: null + value: false +do_eval: + desc: null + value: false +do_predict: + desc: null + value: false +do_train: + desc: null + value: false +dtype: + desc: null + value: float32 +eval_accumulation_steps: + desc: null + value: null +eval_steps: + desc: null + value: 20000 +evaluation_strategy: + desc: null + value: IntervalStrategy.NO +fp16: + desc: null + value: false +fp16_backend: + desc: null + value: auto +fp16_full_eval: + desc: null + value: false +fp16_opt_level: + desc: null + value: O1 +gradient_accumulation_steps: + desc: null + value: 4 +greater_is_better: + desc: null + value: null +group_by_length: + desc: null + value: false +ignore_data_skip: + desc: null + value: false +label_names: + desc: null + value: null +label_smoothing_factor: + desc: null + value: 0.0 +learning_rate: + desc: null + value: 5.0e-05 +length_column_name: + desc: null + value: length +line_by_line: + desc: null + value: false +load_best_model_at_end: + desc: null + value: false +local_rank: + desc: null + value: -1 +log_level: + desc: null + value: -1 +log_level_replica: + desc: null + value: -1 +log_on_each_node: + desc: null + value: true +logging_dir: + desc: null + value: ./runs/Jul15_01-25-35_t1v-n-f5c06ea1-w-0 +logging_first_step: + desc: null + value: false +logging_steps: + desc: null + value: 100 +logging_strategy: + desc: null + value: IntervalStrategy.STEPS +lr_scheduler_type: + desc: null + value: SchedulerType.LINEAR +max_eval_samples: + desc: null + value: 500 +max_grad_norm: + desc: null + value: 1.0 +max_seq_length: + desc: null + value: 4096 +max_steps: + desc: null + value: -1 +metric_for_best_model: + desc: null + value: null +mlm_probability: + desc: null + value: 0.15 +model_name_or_path: + desc: null + value: null +model_type: + desc: null + value: big_bird +mp_parameters: + desc: null + value: '' +no_cuda: + desc: null + value: false +num_train_epochs: + desc: null + value: 5.0 +output_dir: + desc: null + value: ./ +overwrite_cache: + desc: null + value: false +overwrite_output_dir: + desc: null + value: true +pad_to_max_length: + desc: null + value: false +past_index: + desc: null + value: -1 +per_device_eval_batch_size: + desc: null + value: 2 +per_device_train_batch_size: + desc: null + value: 2 +per_gpu_eval_batch_size: + desc: null + value: null +per_gpu_train_batch_size: + desc: null + value: null +prediction_loss_only: + desc: null + value: false +preprocessing_num_workers: + desc: null + value: 96 +push_to_hub: + desc: null + value: true +push_to_hub_model_id: + desc: null + value: '' +push_to_hub_organization: + desc: null + value: null +push_to_hub_token: + desc: null + value: null +remove_unused_columns: + desc: null + value: true +report_to: + desc: null + value: + - tensorboard + - wandb +resume_from_checkpoint: + desc: null + value: null +run_name: + desc: null + value: ./ +save_on_each_node: + desc: null + value: false +save_optimizer: + desc: null + value: true +save_steps: + desc: null + value: 20000 +save_strategy: + desc: null + value: IntervalStrategy.STEPS +save_total_limit: + desc: null + value: 5 +seed: + desc: null + value: 42 +sharded_ddp: + desc: null + value: [] +skip_memory_metrics: + desc: null + value: true +tokenizer_name: + desc: null + value: ./ +tpu_metrics_debug: + desc: null + value: false +tpu_num_cores: + desc: null + value: null +train_ref_file: + desc: null + value: null +use_fast_tokenizer: + desc: null + value: true +use_legacy_prediction_loop: + desc: null + value: false +validation_ref_file: + desc: null + value: null +validation_split_percentage: + desc: null + value: 5 +warmup_ratio: + desc: null + value: 0.0 +warmup_steps: + desc: null + value: 5000 +weight_decay: + desc: null + value: 0.0095 diff --git a/wandb/run-20210715_012543-1l1p8ztt/files/output.log b/wandb/run-20210715_012543-1l1p8ztt/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..5c6075c954bf854c3c5d986bdeb3a67462f2cd18 --- /dev/null +++ b/wandb/run-20210715_012543-1l1p8ztt/files/output.log @@ -0,0 +1,39 @@ +/home/dat/pino/lib/python3.8/site-packages/jax/_src/numpy/lax_numpy.py:3132: UserWarning: Explicitly requested dtype requested in zeros is not available, and will be truncated to dtype int32. To enable more dtypes, set the jax_enable_x64 configuration option or the JAX_ENABLE_X64 shell environment variable. See https://github.com/google/jax#current-gotchas for more. + lax._check_user_dtype_supported(dtype, "zeros") +/home/dat/pino/lib/python3.8/site-packages/jax/lib/xla_bridge.py:386: UserWarning: jax.host_count has been renamed to jax.process_count. This alias will eventually be removed; please update your code. + warnings.warn( +/home/dat/pino/lib/python3.8/site-packages/jax/lib/xla_bridge.py:373: UserWarning: jax.host_id has been renamed to jax.process_index. This alias will eventually be removed; please update your code. + warnings.warn( +Epoch ... (1/5): 0%| | 0/5 [00:00 + state, train_metric, dropout_rngs = p_train_step(state, model_inputs, dropout_rngs) + File "/home/dat/pino/lib/python3.8/site-packages/jax/_src/traceback_util.py", line 183, in reraise_with_filtered_traceback + return fun(*args, **kwargs) + File "/home/dat/pino/lib/python3.8/site-packages/jax/_src/api.py", line 1669, in f_pmapped + out = pxla.xla_pmap( + File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 1620, in bind + return call_bind(self, fun, *args, **params) + File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 1551, in call_bind + outs = primitive.process(top_trace, fun, tracers, params) + File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 1623, in process + return trace.process_map(self, fun, tracers, params) + File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 606, in process_call + return primitive.impl(f, *tracers, **params) + File "/home/dat/pino/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 637, in xla_pmap_impl + return compiled_fun(*args) + File "/home/dat/pino/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 1159, in execute_replicated + out_bufs = compiled.execute_sharded_on_local_devices(input_bufs) +jax._src.traceback_util.UnfilteredStackTrace: RuntimeError: Resource exhausted: Attempting to reserve 12.60G at the bottom of memory. That was not possible. There are 11.26G free, 0B reserved, and 11.23G reservable.: while running replica 0 and partition 0 of a replicated computation (other replicas may have failed as well). +The stack trace below excludes JAX-internal frames. +The preceding is the original exception that occurred, unmodified. +-------------------- +The above exception was the direct cause of the following exception: +Traceback (most recent call last): + File "./run_mlm_flax.py", line 815, in + state, train_metric, dropout_rngs = p_train_step(state, model_inputs, dropout_rngs) + File "/home/dat/pino/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 1159, in execute_replicated + out_bufs = compiled.execute_sharded_on_local_devices(input_bufs) +RuntimeError: Resource exhausted: Attempting to reserve 12.60G at the bottom of memory. That was not possible. There are 11.26G free, 0B reserved, and 11.23G reservable.: while running replica 0 and partition 0 of a replicated computation (other replicas may have failed as well). \ No newline at end of file diff --git a/wandb/run-20210715_012543-1l1p8ztt/files/requirements.txt b/wandb/run-20210715_012543-1l1p8ztt/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..554e3a34bc91714a8462f65295a12e9a04537637 --- /dev/null +++ b/wandb/run-20210715_012543-1l1p8ztt/files/requirements.txt @@ -0,0 +1,94 @@ +absl-py==0.13.0 +aiohttp==3.7.4.post0 +astunparse==1.6.3 +async-timeout==3.0.1 +attrs==21.2.0 +cachetools==4.2.2 +certifi==2021.5.30 +chardet==4.0.0 +charset-normalizer==2.0.1 +chex==0.0.8 +click==8.0.1 +configparser==5.0.2 +cycler==0.10.0 +datasets==1.9.1.dev0 +dill==0.3.4 +dm-tree==0.1.6 +docker-pycreds==0.4.0 +filelock==3.0.12 +flatbuffers==1.12 +flax==0.3.4 +fsspec==2021.7.0 +gast==0.4.0 +gitdb==4.0.7 +gitpython==3.1.18 +google-auth-oauthlib==0.4.4 +google-auth==1.32.1 +google-pasta==0.2.0 +grpcio==1.34.1 +h5py==3.1.0 +huggingface-hub==0.0.12 +idna==3.2 +install==1.3.4 +jax==0.2.17 +jaxlib==0.1.68 +joblib==1.0.1 +keras-nightly==2.5.0.dev2021032900 +keras-preprocessing==1.1.2 +kiwisolver==1.3.1 +libtpu-nightly==0.1.dev20210615 +markdown==3.3.4 +matplotlib==3.4.2 +msgpack==1.0.2 +multidict==5.1.0 +multiprocess==0.70.12.2 +numpy==1.19.5 +oauthlib==3.1.1 +opt-einsum==3.3.0 +optax==0.0.9 +packaging==21.0 +pandas==1.3.0 +pathtools==0.1.2 +pillow==8.3.1 +pip==20.0.2 +pkg-resources==0.0.0 +promise==2.3 +protobuf==3.17.3 +psutil==5.8.0 +pyarrow==4.0.1 +pyasn1-modules==0.2.8 +pyasn1==0.4.8 +pyparsing==2.4.7 +python-dateutil==2.8.1 +pytz==2021.1 +pyyaml==5.4.1 +regex==2021.7.6 +requests-oauthlib==1.3.0 +requests==2.26.0 +rsa==4.7.2 +sacremoses==0.0.45 +scipy==1.7.0 +sentry-sdk==1.3.0 +setuptools==44.0.0 +shortuuid==1.0.1 +six==1.15.0 +smmap==4.0.0 +subprocess32==3.5.4 +tensorboard-data-server==0.6.1 +tensorboard-plugin-wit==1.8.0 +tensorboard==2.5.0 +tensorflow-estimator==2.5.0 +tensorflow==2.5.0 +termcolor==1.1.0 +tokenizers==0.10.3 +toolz==0.11.1 +tqdm==4.61.2 +transformers==4.9.0.dev0 +typing-extensions==3.7.4.3 +urllib3==1.26.6 +wandb==0.10.33 +werkzeug==2.0.1 +wheel==0.36.2 +wrapt==1.12.1 +xxhash==2.0.2 +yarl==1.6.3 \ No newline at end of file diff --git a/wandb/run-20210715_012543-1l1p8ztt/files/wandb-metadata.json b/wandb/run-20210715_012543-1l1p8ztt/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..3a7858c2a8659390466fde0cdb22b9a223db881c --- /dev/null +++ b/wandb/run-20210715_012543-1l1p8ztt/files/wandb-metadata.json @@ -0,0 +1,45 @@ +{ + "os": "Linux-5.4.0-1043-gcp-x86_64-with-glibc2.29", + "python": "3.8.10", + "heartbeatAt": "2021-07-15T01:25:45.284986", + "startedAt": "2021-07-15T01:25:43.266010", + "docker": null, + "cpu_count": 96, + "cuda": null, + "args": [ + "--push_to_hub", + "--output_dir=./", + "--model_type=big_bird", + "--config_name=./", + "--tokenizer_name=./", + "--max_seq_length=4096", + "--weight_decay=0.0095", + "--warmup_steps=5000", + "--overwrite_output_dir", + "--adam_beta1=0.9", + "--adam_beta2=0.98", + "--logging_steps=100", + "--eval_steps=20000", + "--num_train_epochs=5", + "--preprocessing_num_workers=96", + "--save_steps=20000", + "--learning_rate=5e-5", + "--per_device_train_batch_size=2", + "--per_device_eval_batch_size=2", + "--save_total_limit=5", + "--max_eval_samples=500", + "--gradient_accumulation_steps=4" + ], + "state": "running", + "program": "./run_mlm_flax.py", + "codePath": "run_mlm_flax.py", + "git": { + "remote": "https://huggingface.co/flax-community/pino-roberta-base", + "commit": "9915204ae8501d6ae8425aa8f0b4b290cbc7e5b6" + }, + "email": null, + "root": "/home/dat/pino-roberta-base", + "host": "t1v-n-f5c06ea1-w-0", + "username": "dat", + "executable": "/home/dat/pino/bin/python" +} diff --git a/wandb/run-20210715_012543-1l1p8ztt/files/wandb-summary.json b/wandb/run-20210715_012543-1l1p8ztt/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/wandb/run-20210715_012543-1l1p8ztt/files/wandb-summary.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/wandb/run-20210715_012543-1l1p8ztt/logs/debug-internal.log b/wandb/run-20210715_012543-1l1p8ztt/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..063b4cbbe5f3e49d76b97626de2ee0ea3b3573e4 --- /dev/null +++ b/wandb/run-20210715_012543-1l1p8ztt/logs/debug-internal.log @@ -0,0 +1,178 @@ +2021-07-15 01:25:43,966 INFO MainThread:633085 [internal.py:wandb_internal():88] W&B internal server running at pid: 633085, started at: 2021-07-15 01:25:43.966597 +2021-07-15 01:25:43,969 DEBUG HandlerThread:633085 [handler.py:handle_request():124] handle_request: check_version +2021-07-15 01:25:43,969 INFO WriterThread:633085 [datastore.py:open_for_write():80] open: /home/dat/pino-roberta-base/wandb/run-20210715_012543-1l1p8ztt/run-1l1p8ztt.wandb +2021-07-15 01:25:43,970 DEBUG SenderThread:633085 [sender.py:send():179] send: header +2021-07-15 01:25:43,970 DEBUG SenderThread:633085 [sender.py:send_request():193] send_request: check_version +2021-07-15 01:25:44,011 DEBUG SenderThread:633085 [sender.py:send():179] send: run +2021-07-15 01:25:44,196 INFO SenderThread:633085 [dir_watcher.py:__init__():168] watching files in: /home/dat/pino-roberta-base/wandb/run-20210715_012543-1l1p8ztt/files +2021-07-15 01:25:44,196 INFO SenderThread:633085 [sender.py:_start_run_threads():716] run started: 1l1p8ztt with start time 1626312343 +2021-07-15 01:25:44,196 DEBUG SenderThread:633085 [sender.py:send():179] send: summary +2021-07-15 01:25:44,196 INFO SenderThread:633085 [sender.py:_save_file():841] saving file wandb-summary.json with policy end +2021-07-15 01:25:44,197 DEBUG HandlerThread:633085 [handler.py:handle_request():124] handle_request: run_start +2021-07-15 01:25:45,197 INFO Thread-8 :633085 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210715_012543-1l1p8ztt/files/wandb-summary.json +2021-07-15 01:25:45,284 DEBUG HandlerThread:633085 [meta.py:__init__():39] meta init +2021-07-15 01:25:45,284 DEBUG HandlerThread:633085 [meta.py:__init__():53] meta init done +2021-07-15 01:25:45,284 DEBUG HandlerThread:633085 [meta.py:probe():210] probe +2021-07-15 01:25:45,286 DEBUG HandlerThread:633085 [meta.py:_setup_git():200] setup git +2021-07-15 01:25:45,315 DEBUG HandlerThread:633085 [meta.py:_setup_git():207] setup git done +2021-07-15 01:25:45,316 DEBUG HandlerThread:633085 [meta.py:_save_pip():57] save pip +2021-07-15 01:25:45,316 DEBUG HandlerThread:633085 [meta.py:_save_pip():71] save pip done +2021-07-15 01:25:45,316 DEBUG HandlerThread:633085 [meta.py:probe():252] probe done +2021-07-15 01:25:45,319 DEBUG SenderThread:633085 [sender.py:send():179] send: files +2021-07-15 01:25:45,319 INFO SenderThread:633085 [sender.py:_save_file():841] saving file wandb-metadata.json with policy now +2021-07-15 01:25:45,325 DEBUG HandlerThread:633085 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:25:45,325 DEBUG SenderThread:633085 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:25:45,453 DEBUG SenderThread:633085 [sender.py:send():179] send: config +2021-07-15 01:25:45,453 DEBUG SenderThread:633085 [sender.py:send():179] send: config +2021-07-15 01:25:45,454 DEBUG SenderThread:633085 [sender.py:send():179] send: config +2021-07-15 01:25:45,748 INFO Thread-11 :633085 [upload_job.py:push():137] Uploaded file /tmp/tmpg7lk1a20wandb/3gmfcmsh-wandb-metadata.json +2021-07-15 01:25:46,197 INFO Thread-8 :633085 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210715_012543-1l1p8ztt/files/wandb-metadata.json +2021-07-15 01:25:46,197 INFO Thread-8 :633085 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210715_012543-1l1p8ztt/files/requirements.txt +2021-07-15 01:25:46,197 INFO Thread-8 :633085 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210715_012543-1l1p8ztt/files/output.log +2021-07-15 01:26:00,202 INFO Thread-8 :633085 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_012543-1l1p8ztt/files/output.log +2021-07-15 01:26:00,538 DEBUG HandlerThread:633085 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:26:00,538 DEBUG SenderThread:633085 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:26:02,203 INFO Thread-8 :633085 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_012543-1l1p8ztt/files/output.log +2021-07-15 01:26:13,387 DEBUG SenderThread:633085 [sender.py:send():179] send: stats +2021-07-15 01:26:15,208 INFO Thread-8 :633085 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_012543-1l1p8ztt/files/config.yaml +2021-07-15 01:26:15,671 DEBUG HandlerThread:633085 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:26:15,671 DEBUG SenderThread:633085 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:26:30,802 DEBUG HandlerThread:633085 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:26:30,802 DEBUG SenderThread:633085 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:26:43,465 DEBUG SenderThread:633085 [sender.py:send():179] send: stats +2021-07-15 01:26:45,946 DEBUG HandlerThread:633085 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:26:45,947 DEBUG SenderThread:633085 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:27:01,079 DEBUG HandlerThread:633085 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:27:01,079 DEBUG SenderThread:633085 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:27:13,532 DEBUG SenderThread:633085 [sender.py:send():179] send: stats +2021-07-15 01:27:16,212 DEBUG HandlerThread:633085 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:27:16,213 DEBUG SenderThread:633085 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:27:31,346 DEBUG HandlerThread:633085 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:27:31,347 DEBUG SenderThread:633085 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:27:43,597 DEBUG SenderThread:633085 [sender.py:send():179] send: stats +2021-07-15 01:27:46,478 DEBUG HandlerThread:633085 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:27:46,478 DEBUG SenderThread:633085 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:28:01,609 DEBUG HandlerThread:633085 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:28:01,610 DEBUG SenderThread:633085 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:28:13,664 DEBUG SenderThread:633085 [sender.py:send():179] send: stats +2021-07-15 01:28:16,741 DEBUG HandlerThread:633085 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:28:16,742 DEBUG SenderThread:633085 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:28:24,252 INFO Thread-8 :633085 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_012543-1l1p8ztt/files/output.log +2021-07-15 01:28:31,890 DEBUG HandlerThread:633085 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:28:31,890 DEBUG SenderThread:633085 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:28:43,734 DEBUG SenderThread:633085 [sender.py:send():179] send: stats +2021-07-15 01:28:47,042 DEBUG HandlerThread:633085 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:28:47,042 DEBUG SenderThread:633085 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:29:02,173 DEBUG HandlerThread:633085 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:29:02,173 DEBUG SenderThread:633085 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:29:13,816 DEBUG SenderThread:633085 [sender.py:send():179] send: stats +2021-07-15 01:29:17,304 DEBUG HandlerThread:633085 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:29:17,304 DEBUG SenderThread:633085 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:29:32,435 DEBUG HandlerThread:633085 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:29:32,435 DEBUG SenderThread:633085 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:29:43,894 DEBUG SenderThread:633085 [sender.py:send():179] send: stats +2021-07-15 01:29:49,798 DEBUG SenderThread:633085 [sender.py:send():179] send: telemetry +2021-07-15 01:29:49,799 DEBUG HandlerThread:633085 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 01:29:49,799 DEBUG SenderThread:633085 [sender.py:send():179] send: exit +2021-07-15 01:29:49,799 INFO SenderThread:633085 [sender.py:send_exit():287] handling exit code: 1 +2021-07-15 01:29:49,799 INFO SenderThread:633085 [sender.py:send_exit():295] send defer +2021-07-15 01:29:49,799 DEBUG SenderThread:633085 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 01:29:49,800 DEBUG HandlerThread:633085 [handler.py:handle_request():124] handle_request: defer +2021-07-15 01:29:49,800 INFO HandlerThread:633085 [handler.py:handle_request_defer():141] handle defer: 0 +2021-07-15 01:29:49,800 DEBUG SenderThread:633085 [sender.py:send_request():193] send_request: defer +2021-07-15 01:29:49,800 INFO SenderThread:633085 [sender.py:send_request_defer():304] handle sender defer: 0 +2021-07-15 01:29:49,800 INFO SenderThread:633085 [sender.py:transition_state():308] send defer: 1 +2021-07-15 01:29:49,800 DEBUG HandlerThread:633085 [handler.py:handle_request():124] handle_request: defer +2021-07-15 01:29:49,800 INFO HandlerThread:633085 [handler.py:handle_request_defer():141] handle defer: 1 +2021-07-15 01:29:49,829 DEBUG SenderThread:633085 [sender.py:send_request():193] send_request: defer +2021-07-15 01:29:49,829 INFO SenderThread:633085 [sender.py:send_request_defer():304] handle sender defer: 1 +2021-07-15 01:29:49,829 INFO SenderThread:633085 [sender.py:transition_state():308] send defer: 2 +2021-07-15 01:29:49,829 DEBUG SenderThread:633085 [sender.py:send():179] send: stats +2021-07-15 01:29:49,830 DEBUG HandlerThread:633085 [handler.py:handle_request():124] handle_request: defer +2021-07-15 01:29:49,830 INFO HandlerThread:633085 [handler.py:handle_request_defer():141] handle defer: 2 +2021-07-15 01:29:49,830 DEBUG SenderThread:633085 [sender.py:send_request():193] send_request: defer +2021-07-15 01:29:49,830 INFO SenderThread:633085 [sender.py:send_request_defer():304] handle sender defer: 2 +2021-07-15 01:29:49,830 INFO SenderThread:633085 [sender.py:transition_state():308] send defer: 3 +2021-07-15 01:29:49,830 DEBUG HandlerThread:633085 [handler.py:handle_request():124] handle_request: defer +2021-07-15 01:29:49,830 INFO HandlerThread:633085 [handler.py:handle_request_defer():141] handle defer: 3 +2021-07-15 01:29:49,830 DEBUG SenderThread:633085 [sender.py:send():179] send: summary +2021-07-15 01:29:49,831 INFO SenderThread:633085 [sender.py:_save_file():841] saving file wandb-summary.json with policy end +2021-07-15 01:29:49,831 DEBUG SenderThread:633085 [sender.py:send_request():193] send_request: defer +2021-07-15 01:29:49,831 INFO SenderThread:633085 [sender.py:send_request_defer():304] handle sender defer: 3 +2021-07-15 01:29:49,831 INFO SenderThread:633085 [sender.py:transition_state():308] send defer: 4 +2021-07-15 01:29:49,831 DEBUG HandlerThread:633085 [handler.py:handle_request():124] handle_request: defer +2021-07-15 01:29:49,831 INFO HandlerThread:633085 [handler.py:handle_request_defer():141] handle defer: 4 +2021-07-15 01:29:49,831 DEBUG SenderThread:633085 [sender.py:send_request():193] send_request: defer +2021-07-15 01:29:49,831 INFO SenderThread:633085 [sender.py:send_request_defer():304] handle sender defer: 4 +2021-07-15 01:29:49,901 DEBUG HandlerThread:633085 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 01:29:50,009 INFO SenderThread:633085 [sender.py:transition_state():308] send defer: 5 +2021-07-15 01:29:50,009 DEBUG SenderThread:633085 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 01:29:50,009 DEBUG HandlerThread:633085 [handler.py:handle_request():124] handle_request: defer +2021-07-15 01:29:50,009 INFO HandlerThread:633085 [handler.py:handle_request_defer():141] handle defer: 5 +2021-07-15 01:29:50,010 DEBUG SenderThread:633085 [sender.py:send_request():193] send_request: defer +2021-07-15 01:29:50,010 INFO SenderThread:633085 [sender.py:send_request_defer():304] handle sender defer: 5 +2021-07-15 01:29:50,010 INFO SenderThread:633085 [dir_watcher.py:finish():282] shutting down directory watcher +2021-07-15 01:29:50,111 DEBUG HandlerThread:633085 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 01:29:50,272 INFO SenderThread:633085 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_012543-1l1p8ztt/files/config.yaml +2021-07-15 01:29:50,272 INFO SenderThread:633085 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_012543-1l1p8ztt/files/wandb-summary.json +2021-07-15 01:29:50,272 INFO SenderThread:633085 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_012543-1l1p8ztt/files/output.log +2021-07-15 01:29:50,272 INFO SenderThread:633085 [dir_watcher.py:finish():312] scan: /home/dat/pino-roberta-base/wandb/run-20210715_012543-1l1p8ztt/files +2021-07-15 01:29:50,272 INFO SenderThread:633085 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_012543-1l1p8ztt/files/requirements.txt requirements.txt +2021-07-15 01:29:50,272 INFO SenderThread:633085 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_012543-1l1p8ztt/files/output.log output.log +2021-07-15 01:29:50,273 INFO SenderThread:633085 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_012543-1l1p8ztt/files/wandb-metadata.json wandb-metadata.json +2021-07-15 01:29:50,273 INFO SenderThread:633085 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_012543-1l1p8ztt/files/config.yaml config.yaml +2021-07-15 01:29:50,273 INFO SenderThread:633085 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_012543-1l1p8ztt/files/wandb-summary.json wandb-summary.json +2021-07-15 01:29:50,273 INFO SenderThread:633085 [sender.py:transition_state():308] send defer: 6 +2021-07-15 01:29:50,273 DEBUG SenderThread:633085 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 01:29:50,280 DEBUG HandlerThread:633085 [handler.py:handle_request():124] handle_request: defer +2021-07-15 01:29:50,280 INFO HandlerThread:633085 [handler.py:handle_request_defer():141] handle defer: 6 +2021-07-15 01:29:50,284 DEBUG SenderThread:633085 [sender.py:send_request():193] send_request: defer +2021-07-15 01:29:50,284 INFO SenderThread:633085 [sender.py:send_request_defer():304] handle sender defer: 6 +2021-07-15 01:29:50,284 INFO SenderThread:633085 [file_pusher.py:finish():177] shutting down file pusher +2021-07-15 01:29:50,379 DEBUG HandlerThread:633085 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 01:29:50,379 DEBUG SenderThread:633085 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 01:29:50,481 DEBUG HandlerThread:633085 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 01:29:50,481 DEBUG SenderThread:633085 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 01:29:50,583 DEBUG HandlerThread:633085 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 01:29:50,583 DEBUG SenderThread:633085 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 01:29:50,685 DEBUG HandlerThread:633085 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 01:29:50,685 DEBUG SenderThread:633085 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 01:29:50,718 INFO Thread-12 :633085 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210715_012543-1l1p8ztt/files/requirements.txt +2021-07-15 01:29:50,736 INFO Thread-13 :633085 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210715_012543-1l1p8ztt/files/output.log +2021-07-15 01:29:50,749 INFO Thread-15 :633085 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210715_012543-1l1p8ztt/files/wandb-summary.json +2021-07-15 01:29:50,755 INFO Thread-14 :633085 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210715_012543-1l1p8ztt/files/config.yaml +2021-07-15 01:29:50,787 DEBUG HandlerThread:633085 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 01:29:50,787 DEBUG SenderThread:633085 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 01:29:50,888 DEBUG HandlerThread:633085 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 01:29:50,889 DEBUG SenderThread:633085 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 01:29:50,955 INFO Thread-7 :633085 [sender.py:transition_state():308] send defer: 7 +2021-07-15 01:29:50,955 DEBUG HandlerThread:633085 [handler.py:handle_request():124] handle_request: defer +2021-07-15 01:29:50,955 INFO HandlerThread:633085 [handler.py:handle_request_defer():141] handle defer: 7 +2021-07-15 01:29:50,956 DEBUG SenderThread:633085 [sender.py:send_request():193] send_request: defer +2021-07-15 01:29:50,956 INFO SenderThread:633085 [sender.py:send_request_defer():304] handle sender defer: 7 +2021-07-15 01:29:50,990 DEBUG HandlerThread:633085 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 01:29:51,228 INFO SenderThread:633085 [sender.py:transition_state():308] send defer: 8 +2021-07-15 01:29:51,228 DEBUG SenderThread:633085 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 01:29:51,229 DEBUG HandlerThread:633085 [handler.py:handle_request():124] handle_request: defer +2021-07-15 01:29:51,229 INFO HandlerThread:633085 [handler.py:handle_request_defer():141] handle defer: 8 +2021-07-15 01:29:51,229 DEBUG SenderThread:633085 [sender.py:send_request():193] send_request: defer +2021-07-15 01:29:51,229 INFO SenderThread:633085 [sender.py:send_request_defer():304] handle sender defer: 8 +2021-07-15 01:29:51,229 INFO SenderThread:633085 [sender.py:transition_state():308] send defer: 9 +2021-07-15 01:29:51,229 DEBUG SenderThread:633085 [sender.py:send():179] send: final +2021-07-15 01:29:51,230 DEBUG HandlerThread:633085 [handler.py:handle_request():124] handle_request: defer +2021-07-15 01:29:51,230 INFO HandlerThread:633085 [handler.py:handle_request_defer():141] handle defer: 9 +2021-07-15 01:29:51,230 DEBUG SenderThread:633085 [sender.py:send():179] send: footer +2021-07-15 01:29:51,230 DEBUG SenderThread:633085 [sender.py:send_request():193] send_request: defer +2021-07-15 01:29:51,230 INFO SenderThread:633085 [sender.py:send_request_defer():304] handle sender defer: 9 +2021-07-15 01:29:51,330 DEBUG HandlerThread:633085 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 01:29:51,330 DEBUG SenderThread:633085 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 01:29:51,331 INFO SenderThread:633085 [file_pusher.py:join():182] waiting for file pusher +2021-07-15 01:29:51,332 DEBUG HandlerThread:633085 [handler.py:handle_request():124] handle_request: get_summary +2021-07-15 01:29:51,333 DEBUG HandlerThread:633085 [handler.py:handle_request():124] handle_request: sampled_history +2021-07-15 01:29:51,333 DEBUG HandlerThread:633085 [handler.py:handle_request():124] handle_request: shutdown +2021-07-15 01:29:51,333 INFO HandlerThread:633085 [handler.py:finish():638] shutting down handler +2021-07-15 01:29:52,230 INFO WriterThread:633085 [datastore.py:close():288] close: /home/dat/pino-roberta-base/wandb/run-20210715_012543-1l1p8ztt/run-1l1p8ztt.wandb +2021-07-15 01:29:52,331 INFO SenderThread:633085 [sender.py:finish():945] shutting down sender +2021-07-15 01:29:52,331 INFO SenderThread:633085 [file_pusher.py:finish():177] shutting down file pusher +2021-07-15 01:29:52,331 INFO SenderThread:633085 [file_pusher.py:join():182] waiting for file pusher +2021-07-15 01:29:52,333 INFO MainThread:633085 [internal.py:handle_exit():78] Internal process exited diff --git a/wandb/run-20210715_012543-1l1p8ztt/logs/debug.log b/wandb/run-20210715_012543-1l1p8ztt/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..ccef51701805f9017e8afe40bbfc421194aa0dc2 --- /dev/null +++ b/wandb/run-20210715_012543-1l1p8ztt/logs/debug.log @@ -0,0 +1,119 @@ +2021-07-15 01:25:43,267 INFO MainThread:631837 [wandb_setup.py:_flush():69] setting env: {} +2021-07-15 01:25:43,267 INFO MainThread:631837 [wandb_setup.py:_flush():69] setting login settings: {} +2021-07-15 01:25:43,267 INFO MainThread:631837 [wandb_init.py:_log_setup():337] Logging user logs to /home/dat/pino-roberta-base/wandb/run-20210715_012543-1l1p8ztt/logs/debug.log +2021-07-15 01:25:43,267 INFO MainThread:631837 [wandb_init.py:_log_setup():338] Logging internal logs to /home/dat/pino-roberta-base/wandb/run-20210715_012543-1l1p8ztt/logs/debug-internal.log +2021-07-15 01:25:43,268 INFO MainThread:631837 [wandb_init.py:init():370] calling init triggers +2021-07-15 01:25:43,268 INFO MainThread:631837 [wandb_init.py:init():375] wandb.init called with sweep_config: {} +config: {} +2021-07-15 01:25:43,268 INFO MainThread:631837 [wandb_init.py:init():419] starting backend +2021-07-15 01:25:43,268 INFO MainThread:631837 [backend.py:_multiprocessing_setup():70] multiprocessing start_methods=fork,spawn,forkserver, using: spawn +2021-07-15 01:25:43,315 INFO MainThread:631837 [backend.py:ensure_launched():135] starting backend process... +2021-07-15 01:25:43,361 INFO MainThread:631837 [backend.py:ensure_launched():139] started backend process with pid: 633085 +2021-07-15 01:25:43,363 INFO MainThread:631837 [wandb_init.py:init():424] backend started and connected +2021-07-15 01:25:43,366 INFO MainThread:631837 [wandb_init.py:init():472] updated telemetry +2021-07-15 01:25:43,367 INFO MainThread:631837 [wandb_init.py:init():491] communicating current version +2021-07-15 01:25:44,009 INFO MainThread:631837 [wandb_init.py:init():496] got version response +2021-07-15 01:25:44,010 INFO MainThread:631837 [wandb_init.py:init():504] communicating run to backend with 30 second timeout +2021-07-15 01:25:44,196 INFO MainThread:631837 [wandb_init.py:init():529] starting run threads in backend +2021-07-15 01:25:45,323 INFO MainThread:631837 [wandb_run.py:_console_start():1623] atexit reg +2021-07-15 01:25:45,323 INFO MainThread:631837 [wandb_run.py:_redirect():1497] redirect: SettingsConsole.REDIRECT +2021-07-15 01:25:45,324 INFO MainThread:631837 [wandb_run.py:_redirect():1502] Redirecting console. +2021-07-15 01:25:45,326 INFO MainThread:631837 [wandb_run.py:_redirect():1558] Redirects installed. +2021-07-15 01:25:45,326 INFO MainThread:631837 [wandb_init.py:init():554] run started, returning control to user process +2021-07-15 01:25:45,334 INFO MainThread:631837 [wandb_run.py:_config_callback():872] config_cb None None {'output_dir': './', 'overwrite_output_dir': True, 'do_train': False, 'do_eval': False, 'do_predict': False, 'evaluation_strategy': 'IntervalStrategy.NO', 'prediction_loss_only': False, 'per_device_train_batch_size': 2, 'per_device_eval_batch_size': 2, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 4, 'eval_accumulation_steps': None, 'learning_rate': 5e-05, 'weight_decay': 0.0095, 'adam_beta1': 0.9, 'adam_beta2': 0.98, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 5.0, 'max_steps': -1, 'lr_scheduler_type': 'SchedulerType.LINEAR', 'warmup_ratio': 0.0, 'warmup_steps': 5000, 'log_level': -1, 'log_level_replica': -1, 'log_on_each_node': True, 'logging_dir': './runs/Jul15_01-25-35_t1v-n-f5c06ea1-w-0', 'logging_strategy': 'IntervalStrategy.STEPS', 'logging_first_step': False, 'logging_steps': 100, 'save_strategy': 'IntervalStrategy.STEPS', 'save_steps': 20000, 'save_total_limit': 5, 'save_on_each_node': False, 'no_cuda': False, 'seed': 42, 'fp16': False, 'fp16_opt_level': 'O1', 'fp16_backend': 'auto', 'fp16_full_eval': False, 'local_rank': -1, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 20000, 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': './', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'sharded_ddp': [], 'deepspeed': None, 'label_smoothing_factor': 0.0, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'dataloader_pin_memory': True, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'push_to_hub_model_id': '', 'push_to_hub_organization': None, 'push_to_hub_token': None, 'mp_parameters': ''} +2021-07-15 01:25:45,335 INFO MainThread:631837 [wandb_run.py:_config_callback():872] config_cb None None {'model_name_or_path': None, 'model_type': 'big_bird', 'config_name': './', 'tokenizer_name': './', 'cache_dir': None, 'use_fast_tokenizer': True, 'dtype': 'float32', 'save_optimizer': True} +2021-07-15 01:25:45,337 INFO MainThread:631837 [wandb_run.py:_config_callback():872] config_cb None None {'dataset_name': None, 'dataset_config_name': None, 'train_ref_file': None, 'validation_ref_file': None, 'overwrite_cache': False, 'validation_split_percentage': 5, 'max_seq_length': 4096, 'preprocessing_num_workers': 96, 'mlm_probability': 0.15, 'pad_to_max_length': False, 'line_by_line': False, 'max_eval_samples': 500} +2021-07-15 01:29:47,556 INFO MainThread:631837 [wandb_run.py:_atexit_cleanup():1593] got exitcode: 1 +2021-07-15 01:29:47,557 INFO MainThread:631837 [wandb_run.py:_restore():1565] restore +2021-07-15 01:29:49,800 INFO MainThread:631837 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 1 +} +pusher_stats { + uploaded_bytes: 1400 + total_bytes: 1400 +} + +2021-07-15 01:29:50,010 INFO MainThread:631837 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 1 +} +pusher_stats { + uploaded_bytes: 1400 + total_bytes: 1400 +} + +2021-07-15 01:29:50,278 INFO MainThread:631837 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 4 +} +pusher_stats { + uploaded_bytes: 1400 + total_bytes: 11669 +} + +2021-07-15 01:29:50,380 INFO MainThread:631837 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 1400 + total_bytes: 11671 +} + +2021-07-15 01:29:50,482 INFO MainThread:631837 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 11671 + total_bytes: 11671 +} + +2021-07-15 01:29:50,584 INFO MainThread:631837 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 11671 + total_bytes: 11671 +} + +2021-07-15 01:29:50,686 INFO MainThread:631837 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 11671 + total_bytes: 11671 +} + +2021-07-15 01:29:50,787 INFO MainThread:631837 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 11671 + total_bytes: 11671 +} + +2021-07-15 01:29:50,889 INFO MainThread:631837 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 11671 + total_bytes: 11671 +} + +2021-07-15 01:29:51,229 INFO MainThread:631837 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 11671 + total_bytes: 11671 +} + +2021-07-15 01:29:51,331 INFO MainThread:631837 [wandb_run.py:_wait_for_finish():1715] got exit ret: done: true +exit_result { +} +file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 11671 + total_bytes: 11671 +} + +2021-07-15 01:29:52,602 INFO MainThread:631837 [wandb_run.py:_show_files():1937] logging synced files diff --git a/wandb/run-20210715_012543-1l1p8ztt/run-1l1p8ztt.wandb b/wandb/run-20210715_012543-1l1p8ztt/run-1l1p8ztt.wandb new file mode 100644 index 0000000000000000000000000000000000000000..04bb2880019fbed9c0f295358dd590e57dd0ddcc Binary files /dev/null and b/wandb/run-20210715_012543-1l1p8ztt/run-1l1p8ztt.wandb differ diff --git a/wandb/run-20210715_013430-22ca2z4i/files/config.yaml b/wandb/run-20210715_013430-22ca2z4i/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..aab16205ed98f26826e4d93f8aee9f1af5d7601a --- /dev/null +++ b/wandb/run-20210715_013430-22ca2z4i/files/config.yaml @@ -0,0 +1,304 @@ +wandb_version: 1 + +_wandb: + desc: null + value: + cli_version: 0.10.33 + framework: huggingface + huggingface_version: 4.9.0.dev0 + is_jupyter_run: false + is_kaggle_kernel: false + python_version: 3.8.10 + t: + 1: + - 3 + - 11 + 2: + - 3 + - 11 + 4: 3.8.10 + 5: 0.10.33 + 6: 4.9.0.dev0 + 8: + - 5 +adafactor: + desc: null + value: false +adam_beta1: + desc: null + value: 0.9 +adam_beta2: + desc: null + value: 0.98 +adam_epsilon: + desc: null + value: 1.0e-08 +cache_dir: + desc: null + value: null +config_name: + desc: null + value: ./ +dataloader_drop_last: + desc: null + value: false +dataloader_num_workers: + desc: null + value: 0 +dataloader_pin_memory: + desc: null + value: true +dataset_config_name: + desc: null + value: null +dataset_name: + desc: null + value: null +ddp_find_unused_parameters: + desc: null + value: null +debug: + desc: null + value: [] +deepspeed: + desc: null + value: null +disable_tqdm: + desc: null + value: false +do_eval: + desc: null + value: false +do_predict: + desc: null + value: false +do_train: + desc: null + value: false +dtype: + desc: null + value: float32 +eval_accumulation_steps: + desc: null + value: null +eval_steps: + desc: null + value: 20000 +evaluation_strategy: + desc: null + value: IntervalStrategy.NO +fp16: + desc: null + value: false +fp16_backend: + desc: null + value: auto +fp16_full_eval: + desc: null + value: false +fp16_opt_level: + desc: null + value: O1 +gradient_accumulation_steps: + desc: null + value: 1 +greater_is_better: + desc: null + value: null +group_by_length: + desc: null + value: false +ignore_data_skip: + desc: null + value: false +label_names: + desc: null + value: null +label_smoothing_factor: + desc: null + value: 0.0 +learning_rate: + desc: null + value: 5.0e-05 +length_column_name: + desc: null + value: length +line_by_line: + desc: null + value: false +load_best_model_at_end: + desc: null + value: false +local_rank: + desc: null + value: -1 +log_level: + desc: null + value: -1 +log_level_replica: + desc: null + value: -1 +log_on_each_node: + desc: null + value: true +logging_dir: + desc: null + value: ./runs/Jul15_01-34-22_t1v-n-f5c06ea1-w-0 +logging_first_step: + desc: null + value: false +logging_steps: + desc: null + value: 100 +logging_strategy: + desc: null + value: IntervalStrategy.STEPS +lr_scheduler_type: + desc: null + value: SchedulerType.LINEAR +max_eval_samples: + desc: null + value: 500 +max_grad_norm: + desc: null + value: 1.0 +max_seq_length: + desc: null + value: 4096 +max_steps: + desc: null + value: -1 +metric_for_best_model: + desc: null + value: null +mlm_probability: + desc: null + value: 0.15 +model_name_or_path: + desc: null + value: null +model_type: + desc: null + value: big_bird +mp_parameters: + desc: null + value: '' +no_cuda: + desc: null + value: false +num_train_epochs: + desc: null + value: 5.0 +output_dir: + desc: null + value: ./ +overwrite_cache: + desc: null + value: false +overwrite_output_dir: + desc: null + value: true +pad_to_max_length: + desc: null + value: false +past_index: + desc: null + value: -1 +per_device_eval_batch_size: + desc: null + value: 1 +per_device_train_batch_size: + desc: null + value: 1 +per_gpu_eval_batch_size: + desc: null + value: null +per_gpu_train_batch_size: + desc: null + value: null +prediction_loss_only: + desc: null + value: false +preprocessing_num_workers: + desc: null + value: 96 +push_to_hub: + desc: null + value: true +push_to_hub_model_id: + desc: null + value: '' +push_to_hub_organization: + desc: null + value: null +push_to_hub_token: + desc: null + value: null +remove_unused_columns: + desc: null + value: true +report_to: + desc: null + value: + - tensorboard + - wandb +resume_from_checkpoint: + desc: null + value: null +run_name: + desc: null + value: ./ +save_on_each_node: + desc: null + value: false +save_steps: + desc: null + value: 20000 +save_strategy: + desc: null + value: IntervalStrategy.STEPS +save_total_limit: + desc: null + value: 5 +seed: + desc: null + value: 42 +sharded_ddp: + desc: null + value: [] +skip_memory_metrics: + desc: null + value: true +tokenizer_name: + desc: null + value: ./ +tpu_metrics_debug: + desc: null + value: false +tpu_num_cores: + desc: null + value: null +train_ref_file: + desc: null + value: null +use_fast_tokenizer: + desc: null + value: true +use_legacy_prediction_loop: + desc: null + value: false +validation_ref_file: + desc: null + value: null +validation_split_percentage: + desc: null + value: 5 +warmup_ratio: + desc: null + value: 0.0 +warmup_steps: + desc: null + value: 5000 +weight_decay: + desc: null + value: 0.0095 diff --git a/wandb/run-20210715_013430-22ca2z4i/files/output.log b/wandb/run-20210715_013430-22ca2z4i/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..4a5f49f555488c4ae1c42848c8ff969558b3137e --- /dev/null +++ b/wandb/run-20210715_013430-22ca2z4i/files/output.log @@ -0,0 +1,4 @@ +Traceback (most recent call last): + File "./run_mlm_flax_no_accum.py", line 607, in + optax.clip_grad_by_global_norm(1.), +AttributeError: module 'optax' has no attribute 'clip_grad_by_global_norm' \ No newline at end of file diff --git a/wandb/run-20210715_013430-22ca2z4i/files/requirements.txt b/wandb/run-20210715_013430-22ca2z4i/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..554e3a34bc91714a8462f65295a12e9a04537637 --- /dev/null +++ b/wandb/run-20210715_013430-22ca2z4i/files/requirements.txt @@ -0,0 +1,94 @@ +absl-py==0.13.0 +aiohttp==3.7.4.post0 +astunparse==1.6.3 +async-timeout==3.0.1 +attrs==21.2.0 +cachetools==4.2.2 +certifi==2021.5.30 +chardet==4.0.0 +charset-normalizer==2.0.1 +chex==0.0.8 +click==8.0.1 +configparser==5.0.2 +cycler==0.10.0 +datasets==1.9.1.dev0 +dill==0.3.4 +dm-tree==0.1.6 +docker-pycreds==0.4.0 +filelock==3.0.12 +flatbuffers==1.12 +flax==0.3.4 +fsspec==2021.7.0 +gast==0.4.0 +gitdb==4.0.7 +gitpython==3.1.18 +google-auth-oauthlib==0.4.4 +google-auth==1.32.1 +google-pasta==0.2.0 +grpcio==1.34.1 +h5py==3.1.0 +huggingface-hub==0.0.12 +idna==3.2 +install==1.3.4 +jax==0.2.17 +jaxlib==0.1.68 +joblib==1.0.1 +keras-nightly==2.5.0.dev2021032900 +keras-preprocessing==1.1.2 +kiwisolver==1.3.1 +libtpu-nightly==0.1.dev20210615 +markdown==3.3.4 +matplotlib==3.4.2 +msgpack==1.0.2 +multidict==5.1.0 +multiprocess==0.70.12.2 +numpy==1.19.5 +oauthlib==3.1.1 +opt-einsum==3.3.0 +optax==0.0.9 +packaging==21.0 +pandas==1.3.0 +pathtools==0.1.2 +pillow==8.3.1 +pip==20.0.2 +pkg-resources==0.0.0 +promise==2.3 +protobuf==3.17.3 +psutil==5.8.0 +pyarrow==4.0.1 +pyasn1-modules==0.2.8 +pyasn1==0.4.8 +pyparsing==2.4.7 +python-dateutil==2.8.1 +pytz==2021.1 +pyyaml==5.4.1 +regex==2021.7.6 +requests-oauthlib==1.3.0 +requests==2.26.0 +rsa==4.7.2 +sacremoses==0.0.45 +scipy==1.7.0 +sentry-sdk==1.3.0 +setuptools==44.0.0 +shortuuid==1.0.1 +six==1.15.0 +smmap==4.0.0 +subprocess32==3.5.4 +tensorboard-data-server==0.6.1 +tensorboard-plugin-wit==1.8.0 +tensorboard==2.5.0 +tensorflow-estimator==2.5.0 +tensorflow==2.5.0 +termcolor==1.1.0 +tokenizers==0.10.3 +toolz==0.11.1 +tqdm==4.61.2 +transformers==4.9.0.dev0 +typing-extensions==3.7.4.3 +urllib3==1.26.6 +wandb==0.10.33 +werkzeug==2.0.1 +wheel==0.36.2 +wrapt==1.12.1 +xxhash==2.0.2 +yarl==1.6.3 \ No newline at end of file diff --git a/wandb/run-20210715_013430-22ca2z4i/files/wandb-metadata.json b/wandb/run-20210715_013430-22ca2z4i/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..955247efef1e21ac5b0f5c3cfc392ebc4b32b4d3 --- /dev/null +++ b/wandb/run-20210715_013430-22ca2z4i/files/wandb-metadata.json @@ -0,0 +1,44 @@ +{ + "os": "Linux-5.4.0-1043-gcp-x86_64-with-glibc2.29", + "python": "3.8.10", + "heartbeatAt": "2021-07-15T01:34:32.137359", + "startedAt": "2021-07-15T01:34:30.157405", + "docker": null, + "cpu_count": 96, + "cuda": null, + "args": [ + "--push_to_hub", + "--output_dir=./", + "--model_type=big_bird", + "--config_name=./", + "--tokenizer_name=./", + "--max_seq_length=4096", + "--weight_decay=0.0095", + "--warmup_steps=5000", + "--overwrite_output_dir", + "--adam_beta1=0.9", + "--adam_beta2=0.98", + "--logging_steps=100", + "--eval_steps=20000", + "--num_train_epochs=5", + "--preprocessing_num_workers=96", + "--save_steps=20000", + "--learning_rate=5e-5", + "--per_device_train_batch_size=1", + "--per_device_eval_batch_size=1", + "--save_total_limit=5", + "--max_eval_samples=500" + ], + "state": "running", + "program": "./run_mlm_flax_no_accum.py", + "codePath": "run_mlm_flax_no_accum.py", + "git": { + "remote": "https://huggingface.co/flax-community/pino-roberta-base", + "commit": "9915204ae8501d6ae8425aa8f0b4b290cbc7e5b6" + }, + "email": null, + "root": "/home/dat/pino-roberta-base", + "host": "t1v-n-f5c06ea1-w-0", + "username": "dat", + "executable": "/home/dat/pino/bin/python" +} diff --git a/wandb/run-20210715_013430-22ca2z4i/files/wandb-summary.json b/wandb/run-20210715_013430-22ca2z4i/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/wandb/run-20210715_013430-22ca2z4i/files/wandb-summary.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/wandb/run-20210715_013430-22ca2z4i/logs/debug-internal.log b/wandb/run-20210715_013430-22ca2z4i/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..74c398c34970d71a994d6f018638378f573c8371 --- /dev/null +++ b/wandb/run-20210715_013430-22ca2z4i/logs/debug-internal.log @@ -0,0 +1,148 @@ +2021-07-15 01:34:30,850 INFO MainThread:635489 [internal.py:wandb_internal():88] W&B internal server running at pid: 635489, started at: 2021-07-15 01:34:30.849889 +2021-07-15 01:34:30,852 DEBUG HandlerThread:635489 [handler.py:handle_request():124] handle_request: check_version +2021-07-15 01:34:30,852 INFO WriterThread:635489 [datastore.py:open_for_write():80] open: /home/dat/pino-roberta-base/wandb/run-20210715_013430-22ca2z4i/run-22ca2z4i.wandb +2021-07-15 01:34:30,853 DEBUG SenderThread:635489 [sender.py:send():179] send: header +2021-07-15 01:34:30,853 DEBUG SenderThread:635489 [sender.py:send_request():193] send_request: check_version +2021-07-15 01:34:30,891 DEBUG SenderThread:635489 [sender.py:send():179] send: run +2021-07-15 01:34:31,058 INFO SenderThread:635489 [dir_watcher.py:__init__():168] watching files in: /home/dat/pino-roberta-base/wandb/run-20210715_013430-22ca2z4i/files +2021-07-15 01:34:31,059 INFO SenderThread:635489 [sender.py:_start_run_threads():716] run started: 22ca2z4i with start time 1626312870 +2021-07-15 01:34:31,059 DEBUG SenderThread:635489 [sender.py:send():179] send: summary +2021-07-15 01:34:31,059 DEBUG HandlerThread:635489 [handler.py:handle_request():124] handle_request: run_start +2021-07-15 01:34:31,059 INFO SenderThread:635489 [sender.py:_save_file():841] saving file wandb-summary.json with policy end +2021-07-15 01:34:32,061 INFO Thread-8 :635489 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210715_013430-22ca2z4i/files/wandb-summary.json +2021-07-15 01:34:32,137 DEBUG HandlerThread:635489 [meta.py:__init__():39] meta init +2021-07-15 01:34:32,137 DEBUG HandlerThread:635489 [meta.py:__init__():53] meta init done +2021-07-15 01:34:32,137 DEBUG HandlerThread:635489 [meta.py:probe():210] probe +2021-07-15 01:34:32,138 DEBUG HandlerThread:635489 [meta.py:_setup_git():200] setup git +2021-07-15 01:34:32,167 DEBUG HandlerThread:635489 [meta.py:_setup_git():207] setup git done +2021-07-15 01:34:32,167 DEBUG HandlerThread:635489 [meta.py:_save_pip():57] save pip +2021-07-15 01:34:32,168 DEBUG HandlerThread:635489 [meta.py:_save_pip():71] save pip done +2021-07-15 01:34:32,168 DEBUG HandlerThread:635489 [meta.py:probe():252] probe done +2021-07-15 01:34:32,171 DEBUG SenderThread:635489 [sender.py:send():179] send: files +2021-07-15 01:34:32,171 INFO SenderThread:635489 [sender.py:_save_file():841] saving file wandb-metadata.json with policy now +2021-07-15 01:34:32,177 DEBUG HandlerThread:635489 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:34:32,177 DEBUG SenderThread:635489 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:34:32,306 DEBUG SenderThread:635489 [sender.py:send():179] send: config +2021-07-15 01:34:32,306 DEBUG SenderThread:635489 [sender.py:send():179] send: config +2021-07-15 01:34:32,306 DEBUG SenderThread:635489 [sender.py:send():179] send: config +2021-07-15 01:34:32,706 INFO Thread-11 :635489 [upload_job.py:push():137] Uploaded file /tmp/tmp08fad2b1wandb/2rjtf8fu-wandb-metadata.json +2021-07-15 01:34:33,059 INFO Thread-8 :635489 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210715_013430-22ca2z4i/files/output.log +2021-07-15 01:34:33,060 INFO Thread-8 :635489 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210715_013430-22ca2z4i/files/wandb-metadata.json +2021-07-15 01:34:33,060 INFO Thread-8 :635489 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210715_013430-22ca2z4i/files/requirements.txt +2021-07-15 01:34:46,699 DEBUG SenderThread:635489 [sender.py:send():179] send: telemetry +2021-07-15 01:34:46,699 DEBUG HandlerThread:635489 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 01:34:46,699 DEBUG SenderThread:635489 [sender.py:send():179] send: exit +2021-07-15 01:34:46,699 INFO SenderThread:635489 [sender.py:send_exit():287] handling exit code: 1 +2021-07-15 01:34:46,700 INFO SenderThread:635489 [sender.py:send_exit():295] send defer +2021-07-15 01:34:46,700 DEBUG SenderThread:635489 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 01:34:46,700 DEBUG HandlerThread:635489 [handler.py:handle_request():124] handle_request: defer +2021-07-15 01:34:46,700 INFO HandlerThread:635489 [handler.py:handle_request_defer():141] handle defer: 0 +2021-07-15 01:34:46,701 DEBUG SenderThread:635489 [sender.py:send_request():193] send_request: defer +2021-07-15 01:34:46,701 INFO SenderThread:635489 [sender.py:send_request_defer():304] handle sender defer: 0 +2021-07-15 01:34:46,701 INFO SenderThread:635489 [sender.py:transition_state():308] send defer: 1 +2021-07-15 01:34:46,701 DEBUG HandlerThread:635489 [handler.py:handle_request():124] handle_request: defer +2021-07-15 01:34:46,701 INFO HandlerThread:635489 [handler.py:handle_request_defer():141] handle defer: 1 +2021-07-15 01:34:46,783 DEBUG SenderThread:635489 [sender.py:send_request():193] send_request: defer +2021-07-15 01:34:46,783 INFO SenderThread:635489 [sender.py:send_request_defer():304] handle sender defer: 1 +2021-07-15 01:34:46,783 INFO SenderThread:635489 [sender.py:transition_state():308] send defer: 2 +2021-07-15 01:34:46,783 DEBUG SenderThread:635489 [sender.py:send():179] send: stats +2021-07-15 01:34:46,784 DEBUG HandlerThread:635489 [handler.py:handle_request():124] handle_request: defer +2021-07-15 01:34:46,784 INFO HandlerThread:635489 [handler.py:handle_request_defer():141] handle defer: 2 +2021-07-15 01:34:46,784 DEBUG SenderThread:635489 [sender.py:send_request():193] send_request: defer +2021-07-15 01:34:46,784 INFO SenderThread:635489 [sender.py:send_request_defer():304] handle sender defer: 2 +2021-07-15 01:34:46,784 INFO SenderThread:635489 [sender.py:transition_state():308] send defer: 3 +2021-07-15 01:34:46,784 DEBUG HandlerThread:635489 [handler.py:handle_request():124] handle_request: defer +2021-07-15 01:34:46,784 INFO HandlerThread:635489 [handler.py:handle_request_defer():141] handle defer: 3 +2021-07-15 01:34:46,784 DEBUG SenderThread:635489 [sender.py:send():179] send: summary +2021-07-15 01:34:46,785 INFO SenderThread:635489 [sender.py:_save_file():841] saving file wandb-summary.json with policy end +2021-07-15 01:34:46,785 DEBUG SenderThread:635489 [sender.py:send_request():193] send_request: defer +2021-07-15 01:34:46,785 INFO SenderThread:635489 [sender.py:send_request_defer():304] handle sender defer: 3 +2021-07-15 01:34:46,785 INFO SenderThread:635489 [sender.py:transition_state():308] send defer: 4 +2021-07-15 01:34:46,785 DEBUG HandlerThread:635489 [handler.py:handle_request():124] handle_request: defer +2021-07-15 01:34:46,785 INFO HandlerThread:635489 [handler.py:handle_request_defer():141] handle defer: 4 +2021-07-15 01:34:46,785 DEBUG SenderThread:635489 [sender.py:send_request():193] send_request: defer +2021-07-15 01:34:46,785 INFO SenderThread:635489 [sender.py:send_request_defer():304] handle sender defer: 4 +2021-07-15 01:34:46,802 DEBUG HandlerThread:635489 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 01:34:46,964 INFO SenderThread:635489 [sender.py:transition_state():308] send defer: 5 +2021-07-15 01:34:46,964 DEBUG SenderThread:635489 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 01:34:46,965 DEBUG HandlerThread:635489 [handler.py:handle_request():124] handle_request: defer +2021-07-15 01:34:46,965 INFO HandlerThread:635489 [handler.py:handle_request_defer():141] handle defer: 5 +2021-07-15 01:34:46,965 DEBUG SenderThread:635489 [sender.py:send_request():193] send_request: defer +2021-07-15 01:34:46,965 INFO SenderThread:635489 [sender.py:send_request_defer():304] handle sender defer: 5 +2021-07-15 01:34:46,965 INFO SenderThread:635489 [dir_watcher.py:finish():282] shutting down directory watcher +2021-07-15 01:34:47,063 INFO SenderThread:635489 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_013430-22ca2z4i/files/config.yaml +2021-07-15 01:34:47,064 INFO SenderThread:635489 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_013430-22ca2z4i/files/output.log +2021-07-15 01:34:47,064 INFO SenderThread:635489 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_013430-22ca2z4i/files/wandb-summary.json +2021-07-15 01:34:47,064 INFO SenderThread:635489 [dir_watcher.py:finish():312] scan: /home/dat/pino-roberta-base/wandb/run-20210715_013430-22ca2z4i/files +2021-07-15 01:34:47,064 INFO SenderThread:635489 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_013430-22ca2z4i/files/requirements.txt requirements.txt +2021-07-15 01:34:47,064 INFO SenderThread:635489 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_013430-22ca2z4i/files/output.log output.log +2021-07-15 01:34:47,064 INFO SenderThread:635489 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_013430-22ca2z4i/files/wandb-metadata.json wandb-metadata.json +2021-07-15 01:34:47,064 INFO SenderThread:635489 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_013430-22ca2z4i/files/config.yaml config.yaml +2021-07-15 01:34:47,065 INFO SenderThread:635489 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_013430-22ca2z4i/files/wandb-summary.json wandb-summary.json +2021-07-15 01:34:47,065 INFO SenderThread:635489 [sender.py:transition_state():308] send defer: 6 +2021-07-15 01:34:47,065 DEBUG HandlerThread:635489 [handler.py:handle_request():124] handle_request: defer +2021-07-15 01:34:47,065 INFO HandlerThread:635489 [handler.py:handle_request_defer():141] handle defer: 6 +2021-07-15 01:34:47,068 DEBUG SenderThread:635489 [sender.py:send_request():193] send_request: defer +2021-07-15 01:34:47,068 INFO SenderThread:635489 [sender.py:send_request_defer():304] handle sender defer: 6 +2021-07-15 01:34:47,068 INFO SenderThread:635489 [file_pusher.py:finish():177] shutting down file pusher +2021-07-15 01:34:47,069 DEBUG HandlerThread:635489 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 01:34:47,072 DEBUG SenderThread:635489 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 01:34:47,174 DEBUG HandlerThread:635489 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 01:34:47,174 DEBUG SenderThread:635489 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 01:34:47,276 DEBUG HandlerThread:635489 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 01:34:47,276 DEBUG SenderThread:635489 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 01:34:47,378 DEBUG HandlerThread:635489 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 01:34:47,378 DEBUG SenderThread:635489 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 01:34:47,480 DEBUG HandlerThread:635489 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 01:34:47,480 DEBUG SenderThread:635489 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 01:34:47,510 INFO Thread-12 :635489 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210715_013430-22ca2z4i/files/requirements.txt +2021-07-15 01:34:47,515 INFO Thread-15 :635489 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210715_013430-22ca2z4i/files/wandb-summary.json +2021-07-15 01:34:47,533 INFO Thread-13 :635489 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210715_013430-22ca2z4i/files/output.log +2021-07-15 01:34:47,582 DEBUG HandlerThread:635489 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 01:34:47,582 DEBUG SenderThread:635489 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 01:34:47,684 DEBUG HandlerThread:635489 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 01:34:47,684 DEBUG SenderThread:635489 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 01:34:47,785 DEBUG HandlerThread:635489 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 01:34:47,786 DEBUG SenderThread:635489 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 01:34:47,887 DEBUG HandlerThread:635489 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 01:34:47,887 DEBUG SenderThread:635489 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 01:34:47,989 DEBUG HandlerThread:635489 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 01:34:47,989 DEBUG SenderThread:635489 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 01:34:48,091 DEBUG HandlerThread:635489 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 01:34:48,091 DEBUG SenderThread:635489 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 01:34:48,105 INFO Thread-14 :635489 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210715_013430-22ca2z4i/files/config.yaml +2021-07-15 01:34:48,193 DEBUG HandlerThread:635489 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 01:34:48,193 DEBUG SenderThread:635489 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 01:34:48,295 DEBUG HandlerThread:635489 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 01:34:48,295 DEBUG SenderThread:635489 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 01:34:48,305 INFO Thread-7 :635489 [sender.py:transition_state():308] send defer: 7 +2021-07-15 01:34:48,306 DEBUG HandlerThread:635489 [handler.py:handle_request():124] handle_request: defer +2021-07-15 01:34:48,306 INFO HandlerThread:635489 [handler.py:handle_request_defer():141] handle defer: 7 +2021-07-15 01:34:48,306 DEBUG SenderThread:635489 [sender.py:send_request():193] send_request: defer +2021-07-15 01:34:48,306 INFO SenderThread:635489 [sender.py:send_request_defer():304] handle sender defer: 7 +2021-07-15 01:34:48,397 DEBUG HandlerThread:635489 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 01:34:48,573 INFO SenderThread:635489 [sender.py:transition_state():308] send defer: 8 +2021-07-15 01:34:48,573 DEBUG SenderThread:635489 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 01:34:48,574 DEBUG HandlerThread:635489 [handler.py:handle_request():124] handle_request: defer +2021-07-15 01:34:48,574 INFO HandlerThread:635489 [handler.py:handle_request_defer():141] handle defer: 8 +2021-07-15 01:34:48,574 DEBUG SenderThread:635489 [sender.py:send_request():193] send_request: defer +2021-07-15 01:34:48,574 INFO SenderThread:635489 [sender.py:send_request_defer():304] handle sender defer: 8 +2021-07-15 01:34:48,574 INFO SenderThread:635489 [sender.py:transition_state():308] send defer: 9 +2021-07-15 01:34:48,574 DEBUG SenderThread:635489 [sender.py:send():179] send: final +2021-07-15 01:34:48,574 DEBUG SenderThread:635489 [sender.py:send():179] send: footer +2021-07-15 01:34:48,575 DEBUG HandlerThread:635489 [handler.py:handle_request():124] handle_request: defer +2021-07-15 01:34:48,575 INFO HandlerThread:635489 [handler.py:handle_request_defer():141] handle defer: 9 +2021-07-15 01:34:48,575 DEBUG SenderThread:635489 [sender.py:send_request():193] send_request: defer +2021-07-15 01:34:48,575 INFO SenderThread:635489 [sender.py:send_request_defer():304] handle sender defer: 9 +2021-07-15 01:34:48,675 DEBUG HandlerThread:635489 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 01:34:48,675 DEBUG SenderThread:635489 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 01:34:48,676 INFO SenderThread:635489 [file_pusher.py:join():182] waiting for file pusher +2021-07-15 01:34:48,677 DEBUG HandlerThread:635489 [handler.py:handle_request():124] handle_request: get_summary +2021-07-15 01:34:48,678 DEBUG HandlerThread:635489 [handler.py:handle_request():124] handle_request: sampled_history +2021-07-15 01:34:48,678 DEBUG HandlerThread:635489 [handler.py:handle_request():124] handle_request: shutdown +2021-07-15 01:34:48,679 INFO HandlerThread:635489 [handler.py:finish():638] shutting down handler +2021-07-15 01:34:49,575 INFO WriterThread:635489 [datastore.py:close():288] close: /home/dat/pino-roberta-base/wandb/run-20210715_013430-22ca2z4i/run-22ca2z4i.wandb +2021-07-15 01:34:49,676 INFO SenderThread:635489 [sender.py:finish():945] shutting down sender +2021-07-15 01:34:49,676 INFO SenderThread:635489 [file_pusher.py:finish():177] shutting down file pusher +2021-07-15 01:34:49,676 INFO SenderThread:635489 [file_pusher.py:join():182] waiting for file pusher +2021-07-15 01:34:49,678 INFO MainThread:635489 [internal.py:handle_exit():78] Internal process exited diff --git a/wandb/run-20210715_013430-22ca2z4i/logs/debug.log b/wandb/run-20210715_013430-22ca2z4i/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..520cf375a9fbdd346aa7e07d1907e4d90a84b68e --- /dev/null +++ b/wandb/run-20210715_013430-22ca2z4i/logs/debug.log @@ -0,0 +1,167 @@ +2021-07-15 01:34:30,159 INFO MainThread:634228 [wandb_setup.py:_flush():69] setting env: {} +2021-07-15 01:34:30,159 INFO MainThread:634228 [wandb_setup.py:_flush():69] setting login settings: {} +2021-07-15 01:34:30,159 INFO MainThread:634228 [wandb_init.py:_log_setup():337] Logging user logs to /home/dat/pino-roberta-base/wandb/run-20210715_013430-22ca2z4i/logs/debug.log +2021-07-15 01:34:30,159 INFO MainThread:634228 [wandb_init.py:_log_setup():338] Logging internal logs to /home/dat/pino-roberta-base/wandb/run-20210715_013430-22ca2z4i/logs/debug-internal.log +2021-07-15 01:34:30,159 INFO MainThread:634228 [wandb_init.py:init():370] calling init triggers +2021-07-15 01:34:30,159 INFO MainThread:634228 [wandb_init.py:init():375] wandb.init called with sweep_config: {} +config: {} +2021-07-15 01:34:30,159 INFO MainThread:634228 [wandb_init.py:init():419] starting backend +2021-07-15 01:34:30,159 INFO MainThread:634228 [backend.py:_multiprocessing_setup():70] multiprocessing start_methods=fork,spawn,forkserver, using: spawn +2021-07-15 01:34:30,207 INFO MainThread:634228 [backend.py:ensure_launched():135] starting backend process... +2021-07-15 01:34:30,253 INFO MainThread:634228 [backend.py:ensure_launched():139] started backend process with pid: 635489 +2021-07-15 01:34:30,255 INFO MainThread:634228 [wandb_init.py:init():424] backend started and connected +2021-07-15 01:34:30,258 INFO MainThread:634228 [wandb_init.py:init():472] updated telemetry +2021-07-15 01:34:30,259 INFO MainThread:634228 [wandb_init.py:init():491] communicating current version +2021-07-15 01:34:30,889 INFO MainThread:634228 [wandb_init.py:init():496] got version response +2021-07-15 01:34:30,890 INFO MainThread:634228 [wandb_init.py:init():504] communicating run to backend with 30 second timeout +2021-07-15 01:34:31,058 INFO MainThread:634228 [wandb_init.py:init():529] starting run threads in backend +2021-07-15 01:34:32,174 INFO MainThread:634228 [wandb_run.py:_console_start():1623] atexit reg +2021-07-15 01:34:32,174 INFO MainThread:634228 [wandb_run.py:_redirect():1497] redirect: SettingsConsole.REDIRECT +2021-07-15 01:34:32,175 INFO MainThread:634228 [wandb_run.py:_redirect():1502] Redirecting console. +2021-07-15 01:34:32,177 INFO MainThread:634228 [wandb_run.py:_redirect():1558] Redirects installed. +2021-07-15 01:34:32,177 INFO MainThread:634228 [wandb_init.py:init():554] run started, returning control to user process +2021-07-15 01:34:32,183 INFO MainThread:634228 [wandb_run.py:_config_callback():872] config_cb None None {'output_dir': './', 'overwrite_output_dir': True, 'do_train': False, 'do_eval': False, 'do_predict': False, 'evaluation_strategy': 'IntervalStrategy.NO', 'prediction_loss_only': False, 'per_device_train_batch_size': 1, 'per_device_eval_batch_size': 1, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'learning_rate': 5e-05, 'weight_decay': 0.0095, 'adam_beta1': 0.9, 'adam_beta2': 0.98, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 5.0, 'max_steps': -1, 'lr_scheduler_type': 'SchedulerType.LINEAR', 'warmup_ratio': 0.0, 'warmup_steps': 5000, 'log_level': -1, 'log_level_replica': -1, 'log_on_each_node': True, 'logging_dir': './runs/Jul15_01-34-22_t1v-n-f5c06ea1-w-0', 'logging_strategy': 'IntervalStrategy.STEPS', 'logging_first_step': False, 'logging_steps': 100, 'save_strategy': 'IntervalStrategy.STEPS', 'save_steps': 20000, 'save_total_limit': 5, 'save_on_each_node': False, 'no_cuda': False, 'seed': 42, 'fp16': False, 'fp16_opt_level': 'O1', 'fp16_backend': 'auto', 'fp16_full_eval': False, 'local_rank': -1, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 20000, 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': './', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'sharded_ddp': [], 'deepspeed': None, 'label_smoothing_factor': 0.0, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'dataloader_pin_memory': True, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'push_to_hub_model_id': '', 'push_to_hub_organization': None, 'push_to_hub_token': None, 'mp_parameters': ''} +2021-07-15 01:34:32,185 INFO MainThread:634228 [wandb_run.py:_config_callback():872] config_cb None None {'model_name_or_path': None, 'model_type': 'big_bird', 'config_name': './', 'tokenizer_name': './', 'cache_dir': None, 'use_fast_tokenizer': True, 'dtype': 'float32'} +2021-07-15 01:34:32,186 INFO MainThread:634228 [wandb_run.py:_config_callback():872] config_cb None None {'dataset_name': None, 'dataset_config_name': None, 'train_ref_file': None, 'validation_ref_file': None, 'overwrite_cache': False, 'validation_split_percentage': 5, 'max_seq_length': 4096, 'preprocessing_num_workers': 96, 'mlm_probability': 0.15, 'pad_to_max_length': False, 'line_by_line': False, 'max_eval_samples': 500} +2021-07-15 01:34:44,605 INFO MainThread:634228 [wandb_run.py:_atexit_cleanup():1593] got exitcode: 1 +2021-07-15 01:34:44,606 INFO MainThread:634228 [wandb_run.py:_restore():1565] restore +2021-07-15 01:34:46,700 INFO MainThread:634228 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 1 +} +pusher_stats { + uploaded_bytes: 1375 + total_bytes: 1375 +} + +2021-07-15 01:34:46,965 INFO MainThread:634228 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 1 +} +pusher_stats { + uploaded_bytes: 1375 + total_bytes: 1375 +} + +2021-07-15 01:34:47,073 INFO MainThread:634228 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 1375 + total_bytes: 7802 +} + +2021-07-15 01:34:47,175 INFO MainThread:634228 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 1375 + total_bytes: 7802 +} + +2021-07-15 01:34:47,277 INFO MainThread:634228 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 3235 + total_bytes: 7802 +} + +2021-07-15 01:34:47,379 INFO MainThread:634228 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 3235 + total_bytes: 7802 +} + +2021-07-15 01:34:47,481 INFO MainThread:634228 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 3235 + total_bytes: 7802 +} + +2021-07-15 01:34:47,583 INFO MainThread:634228 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 3235 + total_bytes: 7802 +} + +2021-07-15 01:34:47,684 INFO MainThread:634228 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 3235 + total_bytes: 7802 +} + +2021-07-15 01:34:47,786 INFO MainThread:634228 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 3235 + total_bytes: 7802 +} + +2021-07-15 01:34:47,888 INFO MainThread:634228 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 7802 + total_bytes: 7802 +} + +2021-07-15 01:34:47,990 INFO MainThread:634228 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 7802 + total_bytes: 7802 +} + +2021-07-15 01:34:48,092 INFO MainThread:634228 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 7802 + total_bytes: 7802 +} + +2021-07-15 01:34:48,194 INFO MainThread:634228 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 7802 + total_bytes: 7802 +} + +2021-07-15 01:34:48,295 INFO MainThread:634228 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 7802 + total_bytes: 7802 +} + +2021-07-15 01:34:48,574 INFO MainThread:634228 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 7802 + total_bytes: 7802 +} + +2021-07-15 01:34:48,676 INFO MainThread:634228 [wandb_run.py:_wait_for_finish():1715] got exit ret: done: true +exit_result { +} +file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 7802 + total_bytes: 7802 +} + +2021-07-15 01:34:49,949 INFO MainThread:634228 [wandb_run.py:_show_files():1937] logging synced files diff --git a/wandb/run-20210715_013430-22ca2z4i/run-22ca2z4i.wandb b/wandb/run-20210715_013430-22ca2z4i/run-22ca2z4i.wandb new file mode 100644 index 0000000000000000000000000000000000000000..1a9c377667dbb6c482d1e3026a0427c1d94e8627 Binary files /dev/null and b/wandb/run-20210715_013430-22ca2z4i/run-22ca2z4i.wandb differ diff --git a/wandb/run-20210715_013559-2xq1lq7e/files/config.yaml b/wandb/run-20210715_013559-2xq1lq7e/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..647e1edebf2b584c5238897ca226bebc27a19d87 --- /dev/null +++ b/wandb/run-20210715_013559-2xq1lq7e/files/config.yaml @@ -0,0 +1,301 @@ +wandb_version: 1 + +_wandb: + desc: null + value: + cli_version: 0.10.33 + framework: huggingface + huggingface_version: 4.9.0.dev0 + is_jupyter_run: false + is_kaggle_kernel: false + python_version: 3.8.10 + t: + 1: + - 3 + - 11 + 4: 3.8.10 + 5: 0.10.33 + 6: 4.9.0.dev0 + 8: + - 5 +adafactor: + desc: null + value: false +adam_beta1: + desc: null + value: 0.9 +adam_beta2: + desc: null + value: 0.98 +adam_epsilon: + desc: null + value: 1.0e-08 +cache_dir: + desc: null + value: null +config_name: + desc: null + value: ./ +dataloader_drop_last: + desc: null + value: false +dataloader_num_workers: + desc: null + value: 0 +dataloader_pin_memory: + desc: null + value: true +dataset_config_name: + desc: null + value: null +dataset_name: + desc: null + value: null +ddp_find_unused_parameters: + desc: null + value: null +debug: + desc: null + value: [] +deepspeed: + desc: null + value: null +disable_tqdm: + desc: null + value: false +do_eval: + desc: null + value: false +do_predict: + desc: null + value: false +do_train: + desc: null + value: false +dtype: + desc: null + value: float32 +eval_accumulation_steps: + desc: null + value: null +eval_steps: + desc: null + value: 20000 +evaluation_strategy: + desc: null + value: IntervalStrategy.NO +fp16: + desc: null + value: false +fp16_backend: + desc: null + value: auto +fp16_full_eval: + desc: null + value: false +fp16_opt_level: + desc: null + value: O1 +gradient_accumulation_steps: + desc: null + value: 1 +greater_is_better: + desc: null + value: null +group_by_length: + desc: null + value: false +ignore_data_skip: + desc: null + value: false +label_names: + desc: null + value: null +label_smoothing_factor: + desc: null + value: 0.0 +learning_rate: + desc: null + value: 5.0e-05 +length_column_name: + desc: null + value: length +line_by_line: + desc: null + value: false +load_best_model_at_end: + desc: null + value: false +local_rank: + desc: null + value: -1 +log_level: + desc: null + value: -1 +log_level_replica: + desc: null + value: -1 +log_on_each_node: + desc: null + value: true +logging_dir: + desc: null + value: ./runs/Jul15_01-35-50_t1v-n-f5c06ea1-w-0 +logging_first_step: + desc: null + value: false +logging_steps: + desc: null + value: 100 +logging_strategy: + desc: null + value: IntervalStrategy.STEPS +lr_scheduler_type: + desc: null + value: SchedulerType.LINEAR +max_eval_samples: + desc: null + value: 500 +max_grad_norm: + desc: null + value: 1.0 +max_seq_length: + desc: null + value: 4096 +max_steps: + desc: null + value: -1 +metric_for_best_model: + desc: null + value: null +mlm_probability: + desc: null + value: 0.15 +model_name_or_path: + desc: null + value: null +model_type: + desc: null + value: big_bird +mp_parameters: + desc: null + value: '' +no_cuda: + desc: null + value: false +num_train_epochs: + desc: null + value: 5.0 +output_dir: + desc: null + value: ./ +overwrite_cache: + desc: null + value: false +overwrite_output_dir: + desc: null + value: true +pad_to_max_length: + desc: null + value: false +past_index: + desc: null + value: -1 +per_device_eval_batch_size: + desc: null + value: 1 +per_device_train_batch_size: + desc: null + value: 1 +per_gpu_eval_batch_size: + desc: null + value: null +per_gpu_train_batch_size: + desc: null + value: null +prediction_loss_only: + desc: null + value: false +preprocessing_num_workers: + desc: null + value: 96 +push_to_hub: + desc: null + value: true +push_to_hub_model_id: + desc: null + value: '' +push_to_hub_organization: + desc: null + value: null +push_to_hub_token: + desc: null + value: null +remove_unused_columns: + desc: null + value: true +report_to: + desc: null + value: + - tensorboard + - wandb +resume_from_checkpoint: + desc: null + value: null +run_name: + desc: null + value: ./ +save_on_each_node: + desc: null + value: false +save_steps: + desc: null + value: 20000 +save_strategy: + desc: null + value: IntervalStrategy.STEPS +save_total_limit: + desc: null + value: 5 +seed: + desc: null + value: 42 +sharded_ddp: + desc: null + value: [] +skip_memory_metrics: + desc: null + value: true +tokenizer_name: + desc: null + value: ./ +tpu_metrics_debug: + desc: null + value: false +tpu_num_cores: + desc: null + value: null +train_ref_file: + desc: null + value: null +use_fast_tokenizer: + desc: null + value: true +use_legacy_prediction_loop: + desc: null + value: false +validation_ref_file: + desc: null + value: null +validation_split_percentage: + desc: null + value: 5 +warmup_ratio: + desc: null + value: 0.0 +warmup_steps: + desc: null + value: 5000 +weight_decay: + desc: null + value: 0.0095 diff --git a/wandb/run-20210715_013559-2xq1lq7e/files/output.log b/wandb/run-20210715_013559-2xq1lq7e/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..a8e5276ca1b1dd09bb0ea6e29816c3647f5f6ffc --- /dev/null +++ b/wandb/run-20210715_013559-2xq1lq7e/files/output.log @@ -0,0 +1,17 @@ +/home/dat/pino/lib/python3.8/site-packages/jax/lib/xla_bridge.py:386: UserWarning: jax.host_count has been renamed to jax.process_count. This alias will eventually be removed; please update your code. + warnings.warn( +/home/dat/pino/lib/python3.8/site-packages/jax/lib/xla_bridge.py:373: UserWarning: jax.host_id has been renamed to jax.process_index. This alias will eventually be removed; please update your code. + warnings.warn( +Epoch ... (1/5): 0%| | 0/5 [00:00 + train_batch_idx = generate_batch_splits(train_samples_idx, train_batch_size) + File "./run_mlm_flax_no_accum.py", line 255, in generate_batch_splits + batch_idx = np.split(samples_idx, sections_split) + File "<__array_function__ internals>", line 5, in split + File "/home/dat/pino/lib/python3.8/site-packages/numpy/lib/shape_base.py", line 874, in split + return array_split(ary, indices_or_sections, axis) + File "<__array_function__ internals>", line 5, in array_split + File "/home/dat/pino/lib/python3.8/site-packages/numpy/lib/shape_base.py", line 790, in array_split + sub_arys.append(_nx.swapaxes(sary[st:end], axis, 0)) + File "/home/dat/pino/lib/python3.8/site-packages/jax/_src/numpy/lax_numpy.py", line 5009, in _rewriting_take + return _gather(arr, treedef, static_idx, dynamic_idx, indices_are_sorted, + File "/home/dat/pino/lib/python3.8/site-packages/jax/_src/numpy/lax_numpy.py", line 5028, in _gather + y = lax.gather( + File "/home/dat/pino/lib/python3.8/site-packages/jax/_src/lax/lax.py", line 984, in gather + return gather_p.bind( + File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 264, in bind + out = top_trace.process_primitive(self, tracers, params) + File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 603, in process_primitive + return primitive.impl(*tracers, **params) + File "/home/dat/pino/lib/python3.8/site-packages/jax/interpreters/xla.py", line 249, in apply_primitive + return compiled_fun(*args) + File "/home/dat/pino/lib/python3.8/site-packages/jax/interpreters/xla.py", line 365, in _execute_compiled_primitive + out_bufs = compiled.execute(input_bufs) +RuntimeError: Resource exhausted: Attempting to allocate 17.0K. That was not possible. There are 48.0K free. Due to fragmentation, the largest contiguous region of free memory is 16.0K.; (0x0x0_HBM0) \ No newline at end of file diff --git a/wandb/run-20210715_014510-2cbystpd/files/requirements.txt b/wandb/run-20210715_014510-2cbystpd/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..554e3a34bc91714a8462f65295a12e9a04537637 --- /dev/null +++ b/wandb/run-20210715_014510-2cbystpd/files/requirements.txt @@ -0,0 +1,94 @@ +absl-py==0.13.0 +aiohttp==3.7.4.post0 +astunparse==1.6.3 +async-timeout==3.0.1 +attrs==21.2.0 +cachetools==4.2.2 +certifi==2021.5.30 +chardet==4.0.0 +charset-normalizer==2.0.1 +chex==0.0.8 +click==8.0.1 +configparser==5.0.2 +cycler==0.10.0 +datasets==1.9.1.dev0 +dill==0.3.4 +dm-tree==0.1.6 +docker-pycreds==0.4.0 +filelock==3.0.12 +flatbuffers==1.12 +flax==0.3.4 +fsspec==2021.7.0 +gast==0.4.0 +gitdb==4.0.7 +gitpython==3.1.18 +google-auth-oauthlib==0.4.4 +google-auth==1.32.1 +google-pasta==0.2.0 +grpcio==1.34.1 +h5py==3.1.0 +huggingface-hub==0.0.12 +idna==3.2 +install==1.3.4 +jax==0.2.17 +jaxlib==0.1.68 +joblib==1.0.1 +keras-nightly==2.5.0.dev2021032900 +keras-preprocessing==1.1.2 +kiwisolver==1.3.1 +libtpu-nightly==0.1.dev20210615 +markdown==3.3.4 +matplotlib==3.4.2 +msgpack==1.0.2 +multidict==5.1.0 +multiprocess==0.70.12.2 +numpy==1.19.5 +oauthlib==3.1.1 +opt-einsum==3.3.0 +optax==0.0.9 +packaging==21.0 +pandas==1.3.0 +pathtools==0.1.2 +pillow==8.3.1 +pip==20.0.2 +pkg-resources==0.0.0 +promise==2.3 +protobuf==3.17.3 +psutil==5.8.0 +pyarrow==4.0.1 +pyasn1-modules==0.2.8 +pyasn1==0.4.8 +pyparsing==2.4.7 +python-dateutil==2.8.1 +pytz==2021.1 +pyyaml==5.4.1 +regex==2021.7.6 +requests-oauthlib==1.3.0 +requests==2.26.0 +rsa==4.7.2 +sacremoses==0.0.45 +scipy==1.7.0 +sentry-sdk==1.3.0 +setuptools==44.0.0 +shortuuid==1.0.1 +six==1.15.0 +smmap==4.0.0 +subprocess32==3.5.4 +tensorboard-data-server==0.6.1 +tensorboard-plugin-wit==1.8.0 +tensorboard==2.5.0 +tensorflow-estimator==2.5.0 +tensorflow==2.5.0 +termcolor==1.1.0 +tokenizers==0.10.3 +toolz==0.11.1 +tqdm==4.61.2 +transformers==4.9.0.dev0 +typing-extensions==3.7.4.3 +urllib3==1.26.6 +wandb==0.10.33 +werkzeug==2.0.1 +wheel==0.36.2 +wrapt==1.12.1 +xxhash==2.0.2 +yarl==1.6.3 \ No newline at end of file diff --git a/wandb/run-20210715_014510-2cbystpd/files/wandb-metadata.json b/wandb/run-20210715_014510-2cbystpd/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..c123905e10eeb79f2b64f5f9f80fd10fe3a56208 --- /dev/null +++ b/wandb/run-20210715_014510-2cbystpd/files/wandb-metadata.json @@ -0,0 +1,44 @@ +{ + "os": "Linux-5.4.0-1043-gcp-x86_64-with-glibc2.29", + "python": "3.8.10", + "heartbeatAt": "2021-07-15T01:45:12.578938", + "startedAt": "2021-07-15T01:45:10.556715", + "docker": null, + "cpu_count": 96, + "cuda": null, + "args": [ + "--push_to_hub", + "--output_dir=./", + "--model_type=big_bird", + "--config_name=./", + "--tokenizer_name=./", + "--max_seq_length=4096", + "--weight_decay=0.0095", + "--warmup_steps=10000", + "--overwrite_output_dir", + "--adam_beta1=0.9", + "--adam_beta2=0.98", + "--logging_steps=100", + "--eval_steps=20000", + "--num_train_epochs=5", + "--preprocessing_num_workers=96", + "--save_steps=20000", + "--learning_rate=3e-5", + "--per_device_train_batch_size=1", + "--per_device_eval_batch_size=1", + "--save_total_limit=5", + "--max_eval_samples=500" + ], + "state": "running", + "program": "./run_mlm_flax_no_accum.py", + "codePath": "run_mlm_flax_no_accum.py", + "git": { + "remote": "https://huggingface.co/flax-community/pino-roberta-base", + "commit": "9915204ae8501d6ae8425aa8f0b4b290cbc7e5b6" + }, + "email": null, + "root": "/home/dat/pino-roberta-base", + "host": "t1v-n-f5c06ea1-w-0", + "username": "dat", + "executable": "/home/dat/pino/bin/python" +} diff --git a/wandb/run-20210715_014510-2cbystpd/files/wandb-summary.json b/wandb/run-20210715_014510-2cbystpd/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/wandb/run-20210715_014510-2cbystpd/files/wandb-summary.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/wandb/run-20210715_014510-2cbystpd/logs/debug-internal.log b/wandb/run-20210715_014510-2cbystpd/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..48f19d8dc5874003ff0c7dbd59b6689ea8953d41 --- /dev/null +++ b/wandb/run-20210715_014510-2cbystpd/logs/debug-internal.log @@ -0,0 +1,279 @@ +2021-07-15 01:45:11,263 INFO MainThread:639336 [internal.py:wandb_internal():88] W&B internal server running at pid: 639336, started at: 2021-07-15 01:45:11.262939 +2021-07-15 01:45:11,265 INFO WriterThread:639336 [datastore.py:open_for_write():80] open: /home/dat/pino-roberta-base/wandb/run-20210715_014510-2cbystpd/run-2cbystpd.wandb +2021-07-15 01:45:11,265 DEBUG HandlerThread:639336 [handler.py:handle_request():124] handle_request: check_version +2021-07-15 01:45:11,266 DEBUG SenderThread:639336 [sender.py:send():179] send: header +2021-07-15 01:45:11,266 DEBUG SenderThread:639336 [sender.py:send_request():193] send_request: check_version +2021-07-15 01:45:11,304 DEBUG SenderThread:639336 [sender.py:send():179] send: run +2021-07-15 01:45:11,473 INFO SenderThread:639336 [dir_watcher.py:__init__():168] watching files in: /home/dat/pino-roberta-base/wandb/run-20210715_014510-2cbystpd/files +2021-07-15 01:45:11,473 INFO SenderThread:639336 [sender.py:_start_run_threads():716] run started: 2cbystpd with start time 1626313510 +2021-07-15 01:45:11,475 DEBUG SenderThread:639336 [sender.py:send():179] send: summary +2021-07-15 01:45:11,475 INFO SenderThread:639336 [sender.py:_save_file():841] saving file wandb-summary.json with policy end +2021-07-15 01:45:11,476 DEBUG HandlerThread:639336 [handler.py:handle_request():124] handle_request: run_start +2021-07-15 01:45:12,476 INFO Thread-8 :639336 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210715_014510-2cbystpd/files/wandb-summary.json +2021-07-15 01:45:12,578 DEBUG HandlerThread:639336 [meta.py:__init__():39] meta init +2021-07-15 01:45:12,578 DEBUG HandlerThread:639336 [meta.py:__init__():53] meta init done +2021-07-15 01:45:12,578 DEBUG HandlerThread:639336 [meta.py:probe():210] probe +2021-07-15 01:45:12,579 DEBUG HandlerThread:639336 [meta.py:_setup_git():200] setup git +2021-07-15 01:45:12,611 DEBUG HandlerThread:639336 [meta.py:_setup_git():207] setup git done +2021-07-15 01:45:12,611 DEBUG HandlerThread:639336 [meta.py:_save_pip():57] save pip +2021-07-15 01:45:12,611 DEBUG HandlerThread:639336 [meta.py:_save_pip():71] save pip done +2021-07-15 01:45:12,611 DEBUG HandlerThread:639336 [meta.py:probe():252] probe done +2021-07-15 01:45:12,615 DEBUG SenderThread:639336 [sender.py:send():179] send: files +2021-07-15 01:45:12,615 INFO SenderThread:639336 [sender.py:_save_file():841] saving file wandb-metadata.json with policy now +2021-07-15 01:45:12,621 DEBUG HandlerThread:639336 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:45:12,622 DEBUG SenderThread:639336 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:45:12,750 DEBUG SenderThread:639336 [sender.py:send():179] send: config +2021-07-15 01:45:12,750 DEBUG SenderThread:639336 [sender.py:send():179] send: config +2021-07-15 01:45:12,750 DEBUG SenderThread:639336 [sender.py:send():179] send: config +2021-07-15 01:45:13,092 INFO Thread-11 :639336 [upload_job.py:push():137] Uploaded file /tmp/tmpseil32hrwandb/ao2qw48f-wandb-metadata.json +2021-07-15 01:45:13,475 INFO Thread-8 :639336 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210715_014510-2cbystpd/files/output.log +2021-07-15 01:45:13,475 INFO Thread-8 :639336 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210715_014510-2cbystpd/files/requirements.txt +2021-07-15 01:45:13,475 INFO Thread-8 :639336 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210715_014510-2cbystpd/files/wandb-metadata.json +2021-07-15 01:45:27,481 INFO Thread-8 :639336 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_014510-2cbystpd/files/output.log +2021-07-15 01:45:27,753 DEBUG HandlerThread:639336 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:45:27,753 DEBUG SenderThread:639336 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:45:40,665 DEBUG SenderThread:639336 [sender.py:send():179] send: stats +2021-07-15 01:45:42,489 INFO Thread-8 :639336 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_014510-2cbystpd/files/config.yaml +2021-07-15 01:45:42,885 DEBUG HandlerThread:639336 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:45:42,885 DEBUG SenderThread:639336 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:45:58,019 DEBUG HandlerThread:639336 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:45:58,019 DEBUG SenderThread:639336 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:46:10,732 DEBUG SenderThread:639336 [sender.py:send():179] send: stats +2021-07-15 01:46:13,151 DEBUG HandlerThread:639336 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:46:13,152 DEBUG SenderThread:639336 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:46:28,283 DEBUG HandlerThread:639336 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:46:28,284 DEBUG SenderThread:639336 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:46:40,799 DEBUG SenderThread:639336 [sender.py:send():179] send: stats +2021-07-15 01:46:43,415 DEBUG HandlerThread:639336 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:46:43,416 DEBUG SenderThread:639336 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:46:58,548 DEBUG HandlerThread:639336 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:46:58,548 DEBUG SenderThread:639336 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:47:10,871 DEBUG SenderThread:639336 [sender.py:send():179] send: stats +2021-07-15 01:47:13,682 DEBUG HandlerThread:639336 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:47:13,683 DEBUG SenderThread:639336 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:47:28,824 DEBUG HandlerThread:639336 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:47:28,825 DEBUG SenderThread:639336 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:47:40,949 DEBUG SenderThread:639336 [sender.py:send():179] send: stats +2021-07-15 01:47:43,957 DEBUG HandlerThread:639336 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:47:43,957 DEBUG SenderThread:639336 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:47:59,089 DEBUG HandlerThread:639336 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:47:59,089 DEBUG SenderThread:639336 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:48:11,028 DEBUG SenderThread:639336 [sender.py:send():179] send: stats +2021-07-15 01:48:14,220 DEBUG HandlerThread:639336 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:48:14,221 DEBUG SenderThread:639336 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:48:29,354 DEBUG HandlerThread:639336 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:48:29,354 DEBUG SenderThread:639336 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:48:41,095 DEBUG SenderThread:639336 [sender.py:send():179] send: stats +2021-07-15 01:48:44,485 DEBUG HandlerThread:639336 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:48:44,486 DEBUG SenderThread:639336 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:48:59,618 DEBUG HandlerThread:639336 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:48:59,619 DEBUG SenderThread:639336 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:49:11,173 DEBUG SenderThread:639336 [sender.py:send():179] send: stats +2021-07-15 01:49:14,752 DEBUG HandlerThread:639336 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:49:14,752 DEBUG SenderThread:639336 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:49:29,888 DEBUG HandlerThread:639336 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:49:29,889 DEBUG SenderThread:639336 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:49:41,252 DEBUG SenderThread:639336 [sender.py:send():179] send: stats +2021-07-15 01:49:45,019 DEBUG HandlerThread:639336 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:49:45,019 DEBUG SenderThread:639336 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:50:00,155 DEBUG HandlerThread:639336 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:50:00,155 DEBUG SenderThread:639336 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:50:11,328 DEBUG SenderThread:639336 [sender.py:send():179] send: stats +2021-07-15 01:50:15,291 DEBUG HandlerThread:639336 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:50:15,291 DEBUG SenderThread:639336 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:50:30,426 DEBUG HandlerThread:639336 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:50:30,426 DEBUG SenderThread:639336 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:50:41,405 DEBUG SenderThread:639336 [sender.py:send():179] send: stats +2021-07-15 01:50:45,560 DEBUG HandlerThread:639336 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:50:45,560 DEBUG SenderThread:639336 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:51:00,696 DEBUG HandlerThread:639336 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:51:00,696 DEBUG SenderThread:639336 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:51:11,484 DEBUG SenderThread:639336 [sender.py:send():179] send: stats +2021-07-15 01:51:15,830 DEBUG HandlerThread:639336 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:51:15,831 DEBUG SenderThread:639336 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:51:31,029 DEBUG HandlerThread:639336 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:51:31,030 DEBUG SenderThread:639336 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:51:41,562 DEBUG SenderThread:639336 [sender.py:send():179] send: stats +2021-07-15 01:51:46,161 DEBUG HandlerThread:639336 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:51:46,161 DEBUG SenderThread:639336 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:52:01,294 DEBUG HandlerThread:639336 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:52:01,295 DEBUG SenderThread:639336 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:52:11,635 DEBUG SenderThread:639336 [sender.py:send():179] send: stats +2021-07-15 01:52:16,428 DEBUG HandlerThread:639336 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:52:16,428 DEBUG SenderThread:639336 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:52:31,560 DEBUG HandlerThread:639336 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:52:31,561 DEBUG SenderThread:639336 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:52:41,712 DEBUG SenderThread:639336 [sender.py:send():179] send: stats +2021-07-15 01:52:46,693 DEBUG HandlerThread:639336 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:52:46,694 DEBUG SenderThread:639336 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:53:01,829 DEBUG HandlerThread:639336 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:53:01,830 DEBUG SenderThread:639336 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:53:11,790 DEBUG SenderThread:639336 [sender.py:send():179] send: stats +2021-07-15 01:53:16,973 DEBUG HandlerThread:639336 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:53:16,974 DEBUG SenderThread:639336 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:53:32,108 DEBUG HandlerThread:639336 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:53:32,108 DEBUG SenderThread:639336 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:53:41,867 DEBUG SenderThread:639336 [sender.py:send():179] send: stats +2021-07-15 01:53:47,242 DEBUG HandlerThread:639336 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:53:47,243 DEBUG SenderThread:639336 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:54:02,378 DEBUG HandlerThread:639336 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:54:02,378 DEBUG SenderThread:639336 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:54:11,946 DEBUG SenderThread:639336 [sender.py:send():179] send: stats +2021-07-15 01:54:17,514 DEBUG HandlerThread:639336 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:54:17,514 DEBUG SenderThread:639336 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:54:32,660 DEBUG HandlerThread:639336 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:54:32,660 DEBUG SenderThread:639336 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:54:42,021 DEBUG SenderThread:639336 [sender.py:send():179] send: stats +2021-07-15 01:54:47,794 DEBUG HandlerThread:639336 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:54:47,794 DEBUG SenderThread:639336 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:55:02,927 DEBUG HandlerThread:639336 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:55:02,927 DEBUG SenderThread:639336 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:55:12,096 DEBUG SenderThread:639336 [sender.py:send():179] send: stats +2021-07-15 01:55:18,059 DEBUG HandlerThread:639336 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:55:18,060 DEBUG SenderThread:639336 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:55:33,193 DEBUG HandlerThread:639336 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:55:33,193 DEBUG SenderThread:639336 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:55:42,173 DEBUG SenderThread:639336 [sender.py:send():179] send: stats +2021-07-15 01:55:48,323 DEBUG HandlerThread:639336 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:55:48,323 DEBUG SenderThread:639336 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:56:03,457 DEBUG HandlerThread:639336 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:56:03,458 DEBUG SenderThread:639336 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:56:12,247 DEBUG SenderThread:639336 [sender.py:send():179] send: stats +2021-07-15 01:56:18,602 DEBUG HandlerThread:639336 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:56:18,602 DEBUG SenderThread:639336 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:56:33,735 DEBUG HandlerThread:639336 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:56:33,736 DEBUG SenderThread:639336 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:56:42,324 DEBUG SenderThread:639336 [sender.py:send():179] send: stats +2021-07-15 01:56:48,867 DEBUG HandlerThread:639336 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:56:48,867 DEBUG SenderThread:639336 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:57:03,998 DEBUG HandlerThread:639336 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:57:03,999 DEBUG SenderThread:639336 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:57:12,402 DEBUG SenderThread:639336 [sender.py:send():179] send: stats +2021-07-15 01:57:19,134 DEBUG HandlerThread:639336 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:57:19,134 DEBUG SenderThread:639336 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:57:34,270 DEBUG HandlerThread:639336 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:57:34,271 DEBUG SenderThread:639336 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:57:42,479 DEBUG SenderThread:639336 [sender.py:send():179] send: stats +2021-07-15 01:57:49,411 DEBUG HandlerThread:639336 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:57:49,411 DEBUG SenderThread:639336 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:58:04,544 DEBUG HandlerThread:639336 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:58:04,544 DEBUG SenderThread:639336 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:58:12,557 DEBUG SenderThread:639336 [sender.py:send():179] send: stats +2021-07-15 01:58:19,675 DEBUG HandlerThread:639336 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:58:19,675 DEBUG SenderThread:639336 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:58:34,810 DEBUG HandlerThread:639336 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:58:34,811 DEBUG SenderThread:639336 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:58:42,635 DEBUG SenderThread:639336 [sender.py:send():179] send: stats +2021-07-15 01:58:49,942 DEBUG HandlerThread:639336 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:58:49,943 DEBUG SenderThread:639336 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:59:05,076 DEBUG HandlerThread:639336 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:59:05,076 DEBUG SenderThread:639336 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:59:12,709 DEBUG SenderThread:639336 [sender.py:send():179] send: stats +2021-07-15 01:59:20,207 DEBUG HandlerThread:639336 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 01:59:20,208 DEBUG SenderThread:639336 [sender.py:send_request():193] send_request: stop_status +2021-07-15 01:59:31,823 INFO Thread-8 :639336 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_014510-2cbystpd/files/output.log +2021-07-15 01:59:32,417 DEBUG SenderThread:639336 [sender.py:send():179] send: telemetry +2021-07-15 01:59:32,417 DEBUG HandlerThread:639336 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 01:59:32,418 DEBUG SenderThread:639336 [sender.py:send():179] send: exit +2021-07-15 01:59:32,418 INFO SenderThread:639336 [sender.py:send_exit():287] handling exit code: 1 +2021-07-15 01:59:32,419 INFO SenderThread:639336 [sender.py:send_exit():295] send defer +2021-07-15 01:59:32,419 DEBUG SenderThread:639336 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 01:59:32,419 DEBUG HandlerThread:639336 [handler.py:handle_request():124] handle_request: defer +2021-07-15 01:59:32,420 INFO HandlerThread:639336 [handler.py:handle_request_defer():141] handle defer: 0 +2021-07-15 01:59:32,420 DEBUG SenderThread:639336 [sender.py:send_request():193] send_request: defer +2021-07-15 01:59:32,420 INFO SenderThread:639336 [sender.py:send_request_defer():304] handle sender defer: 0 +2021-07-15 01:59:32,420 INFO SenderThread:639336 [sender.py:transition_state():308] send defer: 1 +2021-07-15 01:59:32,420 DEBUG HandlerThread:639336 [handler.py:handle_request():124] handle_request: defer +2021-07-15 01:59:32,421 INFO HandlerThread:639336 [handler.py:handle_request_defer():141] handle defer: 1 +2021-07-15 01:59:32,458 DEBUG SenderThread:639336 [sender.py:send_request():193] send_request: defer +2021-07-15 01:59:32,458 INFO SenderThread:639336 [sender.py:send_request_defer():304] handle sender defer: 1 +2021-07-15 01:59:32,458 INFO SenderThread:639336 [sender.py:transition_state():308] send defer: 2 +2021-07-15 01:59:32,458 DEBUG SenderThread:639336 [sender.py:send():179] send: stats +2021-07-15 01:59:32,459 DEBUG HandlerThread:639336 [handler.py:handle_request():124] handle_request: defer +2021-07-15 01:59:32,459 INFO HandlerThread:639336 [handler.py:handle_request_defer():141] handle defer: 2 +2021-07-15 01:59:32,459 DEBUG SenderThread:639336 [sender.py:send_request():193] send_request: defer +2021-07-15 01:59:32,459 INFO SenderThread:639336 [sender.py:send_request_defer():304] handle sender defer: 2 +2021-07-15 01:59:32,459 INFO SenderThread:639336 [sender.py:transition_state():308] send defer: 3 +2021-07-15 01:59:32,460 DEBUG HandlerThread:639336 [handler.py:handle_request():124] handle_request: defer +2021-07-15 01:59:32,460 INFO HandlerThread:639336 [handler.py:handle_request_defer():141] handle defer: 3 +2021-07-15 01:59:32,460 DEBUG SenderThread:639336 [sender.py:send():179] send: summary +2021-07-15 01:59:32,460 INFO SenderThread:639336 [sender.py:_save_file():841] saving file wandb-summary.json with policy end +2021-07-15 01:59:32,460 DEBUG SenderThread:639336 [sender.py:send_request():193] send_request: defer +2021-07-15 01:59:32,460 INFO SenderThread:639336 [sender.py:send_request_defer():304] handle sender defer: 3 +2021-07-15 01:59:32,461 INFO SenderThread:639336 [sender.py:transition_state():308] send defer: 4 +2021-07-15 01:59:32,461 DEBUG HandlerThread:639336 [handler.py:handle_request():124] handle_request: defer +2021-07-15 01:59:32,461 INFO HandlerThread:639336 [handler.py:handle_request_defer():141] handle defer: 4 +2021-07-15 01:59:32,461 DEBUG SenderThread:639336 [sender.py:send_request():193] send_request: defer +2021-07-15 01:59:32,461 INFO SenderThread:639336 [sender.py:send_request_defer():304] handle sender defer: 4 +2021-07-15 01:59:32,522 DEBUG HandlerThread:639336 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 01:59:32,637 INFO SenderThread:639336 [sender.py:transition_state():308] send defer: 5 +2021-07-15 01:59:32,637 DEBUG SenderThread:639336 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 01:59:32,638 DEBUG HandlerThread:639336 [handler.py:handle_request():124] handle_request: defer +2021-07-15 01:59:32,638 INFO HandlerThread:639336 [handler.py:handle_request_defer():141] handle defer: 5 +2021-07-15 01:59:32,638 DEBUG SenderThread:639336 [sender.py:send_request():193] send_request: defer +2021-07-15 01:59:32,638 INFO SenderThread:639336 [sender.py:send_request_defer():304] handle sender defer: 5 +2021-07-15 01:59:32,638 INFO SenderThread:639336 [dir_watcher.py:finish():282] shutting down directory watcher +2021-07-15 01:59:32,739 DEBUG HandlerThread:639336 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 01:59:32,823 INFO Thread-8 :639336 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_014510-2cbystpd/files/wandb-summary.json +2021-07-15 01:59:32,824 INFO SenderThread:639336 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_014510-2cbystpd/files/output.log +2021-07-15 01:59:32,824 INFO SenderThread:639336 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_014510-2cbystpd/files/config.yaml +2021-07-15 01:59:32,824 INFO SenderThread:639336 [dir_watcher.py:finish():312] scan: /home/dat/pino-roberta-base/wandb/run-20210715_014510-2cbystpd/files +2021-07-15 01:59:32,824 INFO SenderThread:639336 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_014510-2cbystpd/files/requirements.txt requirements.txt +2021-07-15 01:59:32,824 INFO SenderThread:639336 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_014510-2cbystpd/files/output.log output.log +2021-07-15 01:59:32,824 INFO SenderThread:639336 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_014510-2cbystpd/files/wandb-metadata.json wandb-metadata.json +2021-07-15 01:59:32,825 INFO SenderThread:639336 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_014510-2cbystpd/files/config.yaml config.yaml +2021-07-15 01:59:32,825 INFO SenderThread:639336 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_014510-2cbystpd/files/wandb-summary.json wandb-summary.json +2021-07-15 01:59:32,825 INFO SenderThread:639336 [sender.py:transition_state():308] send defer: 6 +2021-07-15 01:59:32,825 DEBUG SenderThread:639336 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 01:59:32,829 DEBUG HandlerThread:639336 [handler.py:handle_request():124] handle_request: defer +2021-07-15 01:59:32,829 INFO HandlerThread:639336 [handler.py:handle_request_defer():141] handle defer: 6 +2021-07-15 01:59:32,834 DEBUG SenderThread:639336 [sender.py:send_request():193] send_request: defer +2021-07-15 01:59:32,835 INFO SenderThread:639336 [sender.py:send_request_defer():304] handle sender defer: 6 +2021-07-15 01:59:32,835 INFO SenderThread:639336 [file_pusher.py:finish():177] shutting down file pusher +2021-07-15 01:59:32,930 DEBUG HandlerThread:639336 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 01:59:32,931 DEBUG SenderThread:639336 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 01:59:33,033 DEBUG HandlerThread:639336 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 01:59:33,033 DEBUG SenderThread:639336 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 01:59:33,135 DEBUG HandlerThread:639336 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 01:59:33,135 DEBUG SenderThread:639336 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 01:59:33,237 DEBUG HandlerThread:639336 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 01:59:33,237 DEBUG SenderThread:639336 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 01:59:33,259 INFO Thread-12 :639336 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210715_014510-2cbystpd/files/requirements.txt +2021-07-15 01:59:33,296 INFO Thread-14 :639336 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210715_014510-2cbystpd/files/config.yaml +2021-07-15 01:59:33,298 INFO Thread-15 :639336 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210715_014510-2cbystpd/files/wandb-summary.json +2021-07-15 01:59:33,300 INFO Thread-13 :639336 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210715_014510-2cbystpd/files/output.log +2021-07-15 01:59:33,339 DEBUG HandlerThread:639336 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 01:59:33,339 DEBUG SenderThread:639336 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 01:59:33,441 DEBUG HandlerThread:639336 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 01:59:33,441 DEBUG SenderThread:639336 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 01:59:33,500 INFO Thread-7 :639336 [sender.py:transition_state():308] send defer: 7 +2021-07-15 01:59:33,501 DEBUG HandlerThread:639336 [handler.py:handle_request():124] handle_request: defer +2021-07-15 01:59:33,501 INFO HandlerThread:639336 [handler.py:handle_request_defer():141] handle defer: 7 +2021-07-15 01:59:33,501 DEBUG SenderThread:639336 [sender.py:send_request():193] send_request: defer +2021-07-15 01:59:33,501 INFO SenderThread:639336 [sender.py:send_request_defer():304] handle sender defer: 7 +2021-07-15 01:59:33,543 DEBUG HandlerThread:639336 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 01:59:33,775 INFO SenderThread:639336 [sender.py:transition_state():308] send defer: 8 +2021-07-15 01:59:33,775 DEBUG SenderThread:639336 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 01:59:33,776 DEBUG HandlerThread:639336 [handler.py:handle_request():124] handle_request: defer +2021-07-15 01:59:33,776 INFO HandlerThread:639336 [handler.py:handle_request_defer():141] handle defer: 8 +2021-07-15 01:59:33,776 DEBUG SenderThread:639336 [sender.py:send_request():193] send_request: defer +2021-07-15 01:59:33,776 INFO SenderThread:639336 [sender.py:send_request_defer():304] handle sender defer: 8 +2021-07-15 01:59:33,777 INFO SenderThread:639336 [sender.py:transition_state():308] send defer: 9 +2021-07-15 01:59:33,777 DEBUG SenderThread:639336 [sender.py:send():179] send: final +2021-07-15 01:59:33,777 DEBUG SenderThread:639336 [sender.py:send():179] send: footer +2021-07-15 01:59:33,777 DEBUG HandlerThread:639336 [handler.py:handle_request():124] handle_request: defer +2021-07-15 01:59:33,777 INFO HandlerThread:639336 [handler.py:handle_request_defer():141] handle defer: 9 +2021-07-15 01:59:33,778 DEBUG SenderThread:639336 [sender.py:send_request():193] send_request: defer +2021-07-15 01:59:33,778 INFO SenderThread:639336 [sender.py:send_request_defer():304] handle sender defer: 9 +2021-07-15 01:59:33,877 DEBUG HandlerThread:639336 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 01:59:33,878 DEBUG SenderThread:639336 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 01:59:33,878 INFO SenderThread:639336 [file_pusher.py:join():182] waiting for file pusher +2021-07-15 01:59:33,879 DEBUG HandlerThread:639336 [handler.py:handle_request():124] handle_request: get_summary +2021-07-15 01:59:33,880 DEBUG HandlerThread:639336 [handler.py:handle_request():124] handle_request: sampled_history +2021-07-15 01:59:33,881 DEBUG HandlerThread:639336 [handler.py:handle_request():124] handle_request: shutdown +2021-07-15 01:59:33,881 INFO HandlerThread:639336 [handler.py:finish():638] shutting down handler +2021-07-15 01:59:34,778 INFO WriterThread:639336 [datastore.py:close():288] close: /home/dat/pino-roberta-base/wandb/run-20210715_014510-2cbystpd/run-2cbystpd.wandb +2021-07-15 01:59:34,878 INFO SenderThread:639336 [sender.py:finish():945] shutting down sender +2021-07-15 01:59:34,878 INFO SenderThread:639336 [file_pusher.py:finish():177] shutting down file pusher +2021-07-15 01:59:34,878 INFO SenderThread:639336 [file_pusher.py:join():182] waiting for file pusher +2021-07-15 01:59:34,881 INFO MainThread:639336 [internal.py:handle_exit():78] Internal process exited diff --git a/wandb/run-20210715_014510-2cbystpd/logs/debug.log b/wandb/run-20210715_014510-2cbystpd/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..edfe48d6891b511f10812aaa970501abf416fcca --- /dev/null +++ b/wandb/run-20210715_014510-2cbystpd/logs/debug.log @@ -0,0 +1,119 @@ +2021-07-15 01:45:10,558 INFO MainThread:638079 [wandb_setup.py:_flush():69] setting env: {} +2021-07-15 01:45:10,558 INFO MainThread:638079 [wandb_setup.py:_flush():69] setting login settings: {} +2021-07-15 01:45:10,558 INFO MainThread:638079 [wandb_init.py:_log_setup():337] Logging user logs to /home/dat/pino-roberta-base/wandb/run-20210715_014510-2cbystpd/logs/debug.log +2021-07-15 01:45:10,558 INFO MainThread:638079 [wandb_init.py:_log_setup():338] Logging internal logs to /home/dat/pino-roberta-base/wandb/run-20210715_014510-2cbystpd/logs/debug-internal.log +2021-07-15 01:45:10,558 INFO MainThread:638079 [wandb_init.py:init():370] calling init triggers +2021-07-15 01:45:10,558 INFO MainThread:638079 [wandb_init.py:init():375] wandb.init called with sweep_config: {} +config: {} +2021-07-15 01:45:10,558 INFO MainThread:638079 [wandb_init.py:init():419] starting backend +2021-07-15 01:45:10,559 INFO MainThread:638079 [backend.py:_multiprocessing_setup():70] multiprocessing start_methods=fork,spawn,forkserver, using: spawn +2021-07-15 01:45:10,605 INFO MainThread:638079 [backend.py:ensure_launched():135] starting backend process... +2021-07-15 01:45:10,651 INFO MainThread:638079 [backend.py:ensure_launched():139] started backend process with pid: 639336 +2021-07-15 01:45:10,653 INFO MainThread:638079 [wandb_init.py:init():424] backend started and connected +2021-07-15 01:45:10,656 INFO MainThread:638079 [wandb_init.py:init():472] updated telemetry +2021-07-15 01:45:10,657 INFO MainThread:638079 [wandb_init.py:init():491] communicating current version +2021-07-15 01:45:11,303 INFO MainThread:638079 [wandb_init.py:init():496] got version response +2021-07-15 01:45:11,303 INFO MainThread:638079 [wandb_init.py:init():504] communicating run to backend with 30 second timeout +2021-07-15 01:45:11,475 INFO MainThread:638079 [wandb_init.py:init():529] starting run threads in backend +2021-07-15 01:45:12,618 INFO MainThread:638079 [wandb_run.py:_console_start():1623] atexit reg +2021-07-15 01:45:12,619 INFO MainThread:638079 [wandb_run.py:_redirect():1497] redirect: SettingsConsole.REDIRECT +2021-07-15 01:45:12,619 INFO MainThread:638079 [wandb_run.py:_redirect():1502] Redirecting console. +2021-07-15 01:45:12,621 INFO MainThread:638079 [wandb_run.py:_redirect():1558] Redirects installed. +2021-07-15 01:45:12,621 INFO MainThread:638079 [wandb_init.py:init():554] run started, returning control to user process +2021-07-15 01:45:12,627 INFO MainThread:638079 [wandb_run.py:_config_callback():872] config_cb None None {'output_dir': './', 'overwrite_output_dir': True, 'do_train': False, 'do_eval': False, 'do_predict': False, 'evaluation_strategy': 'IntervalStrategy.NO', 'prediction_loss_only': False, 'per_device_train_batch_size': 1, 'per_device_eval_batch_size': 1, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'learning_rate': 3e-05, 'weight_decay': 0.0095, 'adam_beta1': 0.9, 'adam_beta2': 0.98, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 5.0, 'max_steps': -1, 'lr_scheduler_type': 'SchedulerType.LINEAR', 'warmup_ratio': 0.0, 'warmup_steps': 10000, 'log_level': -1, 'log_level_replica': -1, 'log_on_each_node': True, 'logging_dir': './runs/Jul15_01-45-02_t1v-n-f5c06ea1-w-0', 'logging_strategy': 'IntervalStrategy.STEPS', 'logging_first_step': False, 'logging_steps': 100, 'save_strategy': 'IntervalStrategy.STEPS', 'save_steps': 20000, 'save_total_limit': 5, 'save_on_each_node': False, 'no_cuda': False, 'seed': 42, 'fp16': False, 'fp16_opt_level': 'O1', 'fp16_backend': 'auto', 'fp16_full_eval': False, 'local_rank': -1, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 20000, 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': './', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'sharded_ddp': [], 'deepspeed': None, 'label_smoothing_factor': 0.0, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'dataloader_pin_memory': True, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'push_to_hub_model_id': '', 'push_to_hub_organization': None, 'push_to_hub_token': None, 'mp_parameters': ''} +2021-07-15 01:45:12,629 INFO MainThread:638079 [wandb_run.py:_config_callback():872] config_cb None None {'model_name_or_path': None, 'model_type': 'big_bird', 'config_name': './', 'tokenizer_name': './', 'cache_dir': None, 'use_fast_tokenizer': True, 'dtype': 'float32'} +2021-07-15 01:45:12,630 INFO MainThread:638079 [wandb_run.py:_config_callback():872] config_cb None None {'dataset_name': None, 'dataset_config_name': None, 'train_ref_file': None, 'validation_ref_file': None, 'overwrite_cache': False, 'validation_split_percentage': 5, 'max_seq_length': 4096, 'preprocessing_num_workers': 96, 'mlm_probability': 0.15, 'pad_to_max_length': False, 'line_by_line': False, 'max_eval_samples': 500} +2021-07-15 01:59:30,108 INFO MainThread:638079 [wandb_run.py:_atexit_cleanup():1593] got exitcode: 1 +2021-07-15 01:59:30,110 INFO MainThread:638079 [wandb_run.py:_restore():1565] restore +2021-07-15 01:59:32,420 INFO MainThread:638079 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 1 +} +pusher_stats { + uploaded_bytes: 1376 + total_bytes: 1376 +} + +2021-07-15 01:59:32,638 INFO MainThread:638079 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 1 +} +pusher_stats { + uploaded_bytes: 1376 + total_bytes: 1376 +} + +2021-07-15 01:59:32,829 INFO MainThread:638079 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 3 +} +pusher_stats { + uploaded_bytes: 1376 + total_bytes: 5987 +} + +2021-07-15 01:59:32,931 INFO MainThread:638079 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 1376 + total_bytes: 10557 +} + +2021-07-15 01:59:33,034 INFO MainThread:638079 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 10557 + total_bytes: 10557 +} + +2021-07-15 01:59:33,136 INFO MainThread:638079 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 10557 + total_bytes: 10557 +} + +2021-07-15 01:59:33,238 INFO MainThread:638079 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 10557 + total_bytes: 10557 +} + +2021-07-15 01:59:33,340 INFO MainThread:638079 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 10557 + total_bytes: 10557 +} + +2021-07-15 01:59:33,442 INFO MainThread:638079 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 10557 + total_bytes: 10557 +} + +2021-07-15 01:59:33,776 INFO MainThread:638079 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 10557 + total_bytes: 10557 +} + +2021-07-15 01:59:33,878 INFO MainThread:638079 [wandb_run.py:_wait_for_finish():1715] got exit ret: done: true +exit_result { +} +file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 10557 + total_bytes: 10557 +} + +2021-07-15 01:59:35,186 INFO MainThread:638079 [wandb_run.py:_show_files():1937] logging synced files diff --git a/wandb/run-20210715_014510-2cbystpd/run-2cbystpd.wandb b/wandb/run-20210715_014510-2cbystpd/run-2cbystpd.wandb new file mode 100644 index 0000000000000000000000000000000000000000..f29d56072fc68443c7ff2be1294af500cc123b91 Binary files /dev/null and b/wandb/run-20210715_014510-2cbystpd/run-2cbystpd.wandb differ diff --git a/wandb/run-20210715_020018-3i0mvo08/files/config.yaml b/wandb/run-20210715_020018-3i0mvo08/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8911d4e91fc3ec02f42918149f3638a2edae4228 --- /dev/null +++ b/wandb/run-20210715_020018-3i0mvo08/files/config.yaml @@ -0,0 +1,301 @@ +wandb_version: 1 + +_wandb: + desc: null + value: + cli_version: 0.10.33 + framework: huggingface + huggingface_version: 4.9.0.dev0 + is_jupyter_run: false + is_kaggle_kernel: false + python_version: 3.8.10 + t: + 1: + - 3 + - 11 + 4: 3.8.10 + 5: 0.10.33 + 6: 4.9.0.dev0 + 8: + - 5 +adafactor: + desc: null + value: false +adam_beta1: + desc: null + value: 0.9 +adam_beta2: + desc: null + value: 0.98 +adam_epsilon: + desc: null + value: 1.0e-08 +cache_dir: + desc: null + value: null +config_name: + desc: null + value: ./ +dataloader_drop_last: + desc: null + value: false +dataloader_num_workers: + desc: null + value: 0 +dataloader_pin_memory: + desc: null + value: true +dataset_config_name: + desc: null + value: null +dataset_name: + desc: null + value: null +ddp_find_unused_parameters: + desc: null + value: null +debug: + desc: null + value: [] +deepspeed: + desc: null + value: null +disable_tqdm: + desc: null + value: false +do_eval: + desc: null + value: false +do_predict: + desc: null + value: false +do_train: + desc: null + value: false +dtype: + desc: null + value: float32 +eval_accumulation_steps: + desc: null + value: null +eval_steps: + desc: null + value: 20000 +evaluation_strategy: + desc: null + value: IntervalStrategy.NO +fp16: + desc: null + value: false +fp16_backend: + desc: null + value: auto +fp16_full_eval: + desc: null + value: false +fp16_opt_level: + desc: null + value: O1 +gradient_accumulation_steps: + desc: null + value: 1 +greater_is_better: + desc: null + value: null +group_by_length: + desc: null + value: false +ignore_data_skip: + desc: null + value: false +label_names: + desc: null + value: null +label_smoothing_factor: + desc: null + value: 0.0 +learning_rate: + desc: null + value: 3.0e-05 +length_column_name: + desc: null + value: length +line_by_line: + desc: null + value: false +load_best_model_at_end: + desc: null + value: false +local_rank: + desc: null + value: -1 +log_level: + desc: null + value: -1 +log_level_replica: + desc: null + value: -1 +log_on_each_node: + desc: null + value: true +logging_dir: + desc: null + value: ./runs/Jul15_02-00-11_t1v-n-f5c06ea1-w-0 +logging_first_step: + desc: null + value: false +logging_steps: + desc: null + value: 50 +logging_strategy: + desc: null + value: IntervalStrategy.STEPS +lr_scheduler_type: + desc: null + value: SchedulerType.LINEAR +max_eval_samples: + desc: null + value: 500 +max_grad_norm: + desc: null + value: 1.0 +max_seq_length: + desc: null + value: 4096 +max_steps: + desc: null + value: -1 +metric_for_best_model: + desc: null + value: null +mlm_probability: + desc: null + value: 0.15 +model_name_or_path: + desc: null + value: null +model_type: + desc: null + value: big_bird +mp_parameters: + desc: null + value: '' +no_cuda: + desc: null + value: false +num_train_epochs: + desc: null + value: 5.0 +output_dir: + desc: null + value: ./ +overwrite_cache: + desc: null + value: false +overwrite_output_dir: + desc: null + value: true +pad_to_max_length: + desc: null + value: false +past_index: + desc: null + value: -1 +per_device_eval_batch_size: + desc: null + value: 1 +per_device_train_batch_size: + desc: null + value: 1 +per_gpu_eval_batch_size: + desc: null + value: null +per_gpu_train_batch_size: + desc: null + value: null +prediction_loss_only: + desc: null + value: false +preprocessing_num_workers: + desc: null + value: 96 +push_to_hub: + desc: null + value: true +push_to_hub_model_id: + desc: null + value: '' +push_to_hub_organization: + desc: null + value: null +push_to_hub_token: + desc: null + value: null +remove_unused_columns: + desc: null + value: true +report_to: + desc: null + value: + - tensorboard + - wandb +resume_from_checkpoint: + desc: null + value: null +run_name: + desc: null + value: ./ +save_on_each_node: + desc: null + value: false +save_steps: + desc: null + value: 20000 +save_strategy: + desc: null + value: IntervalStrategy.STEPS +save_total_limit: + desc: null + value: 5 +seed: + desc: null + value: 42 +sharded_ddp: + desc: null + value: [] +skip_memory_metrics: + desc: null + value: true +tokenizer_name: + desc: null + value: ./ +tpu_metrics_debug: + desc: null + value: false +tpu_num_cores: + desc: null + value: null +train_ref_file: + desc: null + value: null +use_fast_tokenizer: + desc: null + value: true +use_legacy_prediction_loop: + desc: null + value: false +validation_ref_file: + desc: null + value: null +validation_split_percentage: + desc: null + value: 5 +warmup_ratio: + desc: null + value: 0.0 +warmup_steps: + desc: null + value: 10000 +weight_decay: + desc: null + value: 0.0095 diff --git a/wandb/run-20210715_020018-3i0mvo08/files/output.log b/wandb/run-20210715_020018-3i0mvo08/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..9d02c29e83bd53becfa75075433bf004ddbe0164 --- /dev/null +++ b/wandb/run-20210715_020018-3i0mvo08/files/output.log @@ -0,0 +1,4 @@ +/home/dat/pino/lib/python3.8/site-packages/jax/lib/xla_bridge.py:386: UserWarning: jax.host_count has been renamed to jax.process_count. This alias will eventually be removed; please update your code. + warnings.warn( +/home/dat/pino/lib/python3.8/site-packages/jax/lib/xla_bridge.py:373: UserWarning: jax.host_id has been renamed to jax.process_index. This alias will eventually be removed; please update your code. + warnings.warn( diff --git a/wandb/run-20210715_020018-3i0mvo08/files/requirements.txt b/wandb/run-20210715_020018-3i0mvo08/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..554e3a34bc91714a8462f65295a12e9a04537637 --- /dev/null +++ b/wandb/run-20210715_020018-3i0mvo08/files/requirements.txt @@ -0,0 +1,94 @@ +absl-py==0.13.0 +aiohttp==3.7.4.post0 +astunparse==1.6.3 +async-timeout==3.0.1 +attrs==21.2.0 +cachetools==4.2.2 +certifi==2021.5.30 +chardet==4.0.0 +charset-normalizer==2.0.1 +chex==0.0.8 +click==8.0.1 +configparser==5.0.2 +cycler==0.10.0 +datasets==1.9.1.dev0 +dill==0.3.4 +dm-tree==0.1.6 +docker-pycreds==0.4.0 +filelock==3.0.12 +flatbuffers==1.12 +flax==0.3.4 +fsspec==2021.7.0 +gast==0.4.0 +gitdb==4.0.7 +gitpython==3.1.18 +google-auth-oauthlib==0.4.4 +google-auth==1.32.1 +google-pasta==0.2.0 +grpcio==1.34.1 +h5py==3.1.0 +huggingface-hub==0.0.12 +idna==3.2 +install==1.3.4 +jax==0.2.17 +jaxlib==0.1.68 +joblib==1.0.1 +keras-nightly==2.5.0.dev2021032900 +keras-preprocessing==1.1.2 +kiwisolver==1.3.1 +libtpu-nightly==0.1.dev20210615 +markdown==3.3.4 +matplotlib==3.4.2 +msgpack==1.0.2 +multidict==5.1.0 +multiprocess==0.70.12.2 +numpy==1.19.5 +oauthlib==3.1.1 +opt-einsum==3.3.0 +optax==0.0.9 +packaging==21.0 +pandas==1.3.0 +pathtools==0.1.2 +pillow==8.3.1 +pip==20.0.2 +pkg-resources==0.0.0 +promise==2.3 +protobuf==3.17.3 +psutil==5.8.0 +pyarrow==4.0.1 +pyasn1-modules==0.2.8 +pyasn1==0.4.8 +pyparsing==2.4.7 +python-dateutil==2.8.1 +pytz==2021.1 +pyyaml==5.4.1 +regex==2021.7.6 +requests-oauthlib==1.3.0 +requests==2.26.0 +rsa==4.7.2 +sacremoses==0.0.45 +scipy==1.7.0 +sentry-sdk==1.3.0 +setuptools==44.0.0 +shortuuid==1.0.1 +six==1.15.0 +smmap==4.0.0 +subprocess32==3.5.4 +tensorboard-data-server==0.6.1 +tensorboard-plugin-wit==1.8.0 +tensorboard==2.5.0 +tensorflow-estimator==2.5.0 +tensorflow==2.5.0 +termcolor==1.1.0 +tokenizers==0.10.3 +toolz==0.11.1 +tqdm==4.61.2 +transformers==4.9.0.dev0 +typing-extensions==3.7.4.3 +urllib3==1.26.6 +wandb==0.10.33 +werkzeug==2.0.1 +wheel==0.36.2 +wrapt==1.12.1 +xxhash==2.0.2 +yarl==1.6.3 \ No newline at end of file diff --git a/wandb/run-20210715_020018-3i0mvo08/files/wandb-metadata.json b/wandb/run-20210715_020018-3i0mvo08/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..87d44ba157778931bb6186d5e63be52bf958760d --- /dev/null +++ b/wandb/run-20210715_020018-3i0mvo08/files/wandb-metadata.json @@ -0,0 +1,44 @@ +{ + "os": "Linux-5.4.0-1043-gcp-x86_64-with-glibc2.29", + "python": "3.8.10", + "heartbeatAt": "2021-07-15T02:00:20.823870", + "startedAt": "2021-07-15T02:00:18.726644", + "docker": null, + "cpu_count": 96, + "cuda": null, + "args": [ + "--push_to_hub", + "--output_dir=./", + "--model_type=big_bird", + "--config_name=./", + "--tokenizer_name=./", + "--max_seq_length=4096", + "--weight_decay=0.0095", + "--warmup_steps=10000", + "--overwrite_output_dir", + "--adam_beta1=0.9", + "--adam_beta2=0.98", + "--logging_steps=50", + "--eval_steps=20000", + "--num_train_epochs=5", + "--preprocessing_num_workers=96", + "--save_steps=20000", + "--learning_rate=3e-5", + "--per_device_train_batch_size=1", + "--per_device_eval_batch_size=1", + "--save_total_limit=5", + "--max_eval_samples=500" + ], + "state": "running", + "program": "./run_mlm_flax_no_accum.py", + "codePath": "run_mlm_flax_no_accum.py", + "git": { + "remote": "https://huggingface.co/flax-community/pino-roberta-base", + "commit": "9915204ae8501d6ae8425aa8f0b4b290cbc7e5b6" + }, + "email": null, + "root": "/home/dat/pino-roberta-base", + "host": "t1v-n-f5c06ea1-w-0", + "username": "dat", + "executable": "/home/dat/pino/bin/python" +} diff --git a/wandb/run-20210715_020018-3i0mvo08/files/wandb-summary.json b/wandb/run-20210715_020018-3i0mvo08/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/wandb/run-20210715_020018-3i0mvo08/files/wandb-summary.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/wandb/run-20210715_020018-3i0mvo08/logs/debug-internal.log b/wandb/run-20210715_020018-3i0mvo08/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..7cb42b213576b05f8c963ac6040795e214b15267 --- /dev/null +++ b/wandb/run-20210715_020018-3i0mvo08/logs/debug-internal.log @@ -0,0 +1,156 @@ +2021-07-15 02:00:19,467 INFO MainThread:641950 [internal.py:wandb_internal():88] W&B internal server running at pid: 641950, started at: 2021-07-15 02:00:19.467145 +2021-07-15 02:00:19,469 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: check_version +2021-07-15 02:00:19,469 INFO WriterThread:641950 [datastore.py:open_for_write():80] open: /home/dat/pino-roberta-base/wandb/run-20210715_020018-3i0mvo08/run-3i0mvo08.wandb +2021-07-15 02:00:19,470 DEBUG SenderThread:641950 [sender.py:send():179] send: header +2021-07-15 02:00:19,470 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: check_version +2021-07-15 02:00:19,508 DEBUG SenderThread:641950 [sender.py:send():179] send: run +2021-07-15 02:00:19,678 INFO SenderThread:641950 [dir_watcher.py:__init__():168] watching files in: /home/dat/pino-roberta-base/wandb/run-20210715_020018-3i0mvo08/files +2021-07-15 02:00:19,678 INFO SenderThread:641950 [sender.py:_start_run_threads():716] run started: 3i0mvo08 with start time 1626314418 +2021-07-15 02:00:19,680 DEBUG SenderThread:641950 [sender.py:send():179] send: summary +2021-07-15 02:00:19,681 INFO SenderThread:641950 [sender.py:_save_file():841] saving file wandb-summary.json with policy end +2021-07-15 02:00:19,682 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: run_start +2021-07-15 02:00:20,680 INFO Thread-8 :641950 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210715_020018-3i0mvo08/files/wandb-summary.json +2021-07-15 02:00:20,823 DEBUG HandlerThread:641950 [meta.py:__init__():39] meta init +2021-07-15 02:00:20,823 DEBUG HandlerThread:641950 [meta.py:__init__():53] meta init done +2021-07-15 02:00:20,823 DEBUG HandlerThread:641950 [meta.py:probe():210] probe +2021-07-15 02:00:20,826 DEBUG HandlerThread:641950 [meta.py:_setup_git():200] setup git +2021-07-15 02:00:20,859 DEBUG HandlerThread:641950 [meta.py:_setup_git():207] setup git done +2021-07-15 02:00:20,860 DEBUG HandlerThread:641950 [meta.py:_save_pip():57] save pip +2021-07-15 02:00:20,860 DEBUG HandlerThread:641950 [meta.py:_save_pip():71] save pip done +2021-07-15 02:00:20,860 DEBUG HandlerThread:641950 [meta.py:probe():252] probe done +2021-07-15 02:00:20,864 DEBUG SenderThread:641950 [sender.py:send():179] send: files +2021-07-15 02:00:20,864 INFO SenderThread:641950 [sender.py:_save_file():841] saving file wandb-metadata.json with policy now +2021-07-15 02:00:20,871 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:00:20,872 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:00:21,005 DEBUG SenderThread:641950 [sender.py:send():179] send: config +2021-07-15 02:00:21,006 DEBUG SenderThread:641950 [sender.py:send():179] send: config +2021-07-15 02:00:21,006 DEBUG SenderThread:641950 [sender.py:send():179] send: config +2021-07-15 02:00:21,336 INFO Thread-11 :641950 [upload_job.py:push():137] Uploaded file /tmp/tmp6nr5kquswandb/1mm7a1ss-wandb-metadata.json +2021-07-15 02:00:21,680 INFO Thread-8 :641950 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210715_020018-3i0mvo08/files/requirements.txt +2021-07-15 02:00:21,680 INFO Thread-8 :641950 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210715_020018-3i0mvo08/files/wandb-metadata.json +2021-07-15 02:00:21,680 INFO Thread-8 :641950 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210715_020018-3i0mvo08/files/output.log +2021-07-15 02:00:35,686 INFO Thread-8 :641950 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_020018-3i0mvo08/files/output.log +2021-07-15 02:00:36,007 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:00:36,008 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:00:37,686 INFO Thread-8 :641950 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_020018-3i0mvo08/files/output.log +2021-07-15 02:00:48,911 DEBUG SenderThread:641950 [sender.py:send():179] send: stats +2021-07-15 02:00:50,692 INFO Thread-8 :641950 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_020018-3i0mvo08/files/config.yaml +2021-07-15 02:00:51,141 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:00:51,141 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:01:06,276 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:01:06,277 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:01:18,985 DEBUG SenderThread:641950 [sender.py:send():179] send: stats +2021-07-15 02:01:21,409 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:01:21,410 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:01:36,542 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:01:36,542 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:01:49,063 DEBUG SenderThread:641950 [sender.py:send():179] send: stats +2021-07-15 02:01:51,675 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:01:51,675 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:02:06,807 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:02:06,807 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:02:19,139 DEBUG SenderThread:641950 [sender.py:send():179] send: stats +2021-07-15 02:02:21,939 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:02:21,940 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:02:37,095 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:02:37,096 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:02:49,217 DEBUG SenderThread:641950 [sender.py:send():179] send: stats +2021-07-15 02:02:52,228 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:02:52,229 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:03:07,361 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:03:07,361 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:03:19,296 DEBUG SenderThread:641950 [sender.py:send():179] send: stats +2021-07-15 02:03:22,495 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:03:22,496 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:03:37,625 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:03:37,625 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:03:49,373 DEBUG SenderThread:641950 [sender.py:send():179] send: stats +2021-07-15 02:03:52,759 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:03:52,760 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:04:07,891 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:04:07,892 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:04:19,452 DEBUG SenderThread:641950 [sender.py:send():179] send: stats +2021-07-15 02:04:23,028 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:04:23,028 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:04:38,163 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:04:38,164 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:04:49,529 DEBUG SenderThread:641950 [sender.py:send():179] send: stats +2021-07-15 02:04:53,295 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:04:53,296 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:05:08,430 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:05:08,431 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:05:19,608 DEBUG SenderThread:641950 [sender.py:send():179] send: stats +2021-07-15 02:05:23,564 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:05:23,564 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:05:38,696 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:05:38,697 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:05:49,675 DEBUG SenderThread:641950 [sender.py:send():179] send: stats +2021-07-15 02:05:53,829 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:05:53,829 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:06:08,964 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:06:08,965 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:06:19,742 DEBUG SenderThread:641950 [sender.py:send():179] send: stats +2021-07-15 02:06:24,099 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:06:24,099 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:06:39,255 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:06:39,255 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:06:49,819 DEBUG SenderThread:641950 [sender.py:send():179] send: stats +2021-07-15 02:06:54,387 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:06:54,387 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:07:09,520 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:07:09,520 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:07:19,895 DEBUG SenderThread:641950 [sender.py:send():179] send: stats +2021-07-15 02:07:24,654 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:07:24,654 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:07:39,784 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:07:39,785 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:07:49,974 DEBUG SenderThread:641950 [sender.py:send():179] send: stats +2021-07-15 02:07:54,915 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:07:54,916 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:08:10,045 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:08:10,045 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:08:20,053 DEBUG SenderThread:641950 [sender.py:send():179] send: stats +2021-07-15 02:08:25,176 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:08:25,177 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:08:40,316 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:08:40,317 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:08:50,130 DEBUG SenderThread:641950 [sender.py:send():179] send: stats +2021-07-15 02:08:55,452 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:08:55,453 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:09:10,584 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:09:10,584 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:09:20,207 DEBUG SenderThread:641950 [sender.py:send():179] send: stats +2021-07-15 02:09:25,714 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:09:25,714 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:09:40,847 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:09:40,848 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:09:50,285 DEBUG SenderThread:641950 [sender.py:send():179] send: stats +2021-07-15 02:09:55,977 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:09:55,978 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:10:11,108 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:10:11,109 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:10:20,362 DEBUG SenderThread:641950 [sender.py:send():179] send: stats +2021-07-15 02:10:26,244 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:10:26,245 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:10:41,377 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:10:41,378 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:10:50,431 DEBUG SenderThread:641950 [sender.py:send():179] send: stats +2021-07-15 02:10:56,509 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:10:56,509 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:11:11,641 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:11:11,641 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:11:20,504 DEBUG SenderThread:641950 [sender.py:send():179] send: stats +2021-07-15 02:11:26,773 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:11:26,774 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:11:41,906 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:11:41,906 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:11:50,568 DEBUG SenderThread:641950 [sender.py:send():179] send: stats +2021-07-15 02:11:57,039 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:11:57,039 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:12:12,171 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:12:12,171 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:12:20,628 DEBUG SenderThread:641950 [sender.py:send():179] send: stats +2021-07-15 02:12:27,302 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:12:27,302 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:12:42,431 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:12:42,432 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: stop_status diff --git a/wandb/run-20210715_020018-3i0mvo08/logs/debug.log b/wandb/run-20210715_020018-3i0mvo08/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..49646987f2fe19daa8cee908011314efd1e2827d --- /dev/null +++ b/wandb/run-20210715_020018-3i0mvo08/logs/debug.log @@ -0,0 +1,25 @@ +2021-07-15 02:00:18,728 INFO MainThread:640692 [wandb_setup.py:_flush():69] setting env: {} +2021-07-15 02:00:18,728 INFO MainThread:640692 [wandb_setup.py:_flush():69] setting login settings: {} +2021-07-15 02:00:18,728 INFO MainThread:640692 [wandb_init.py:_log_setup():337] Logging user logs to /home/dat/pino-roberta-base/wandb/run-20210715_020018-3i0mvo08/logs/debug.log +2021-07-15 02:00:18,728 INFO MainThread:640692 [wandb_init.py:_log_setup():338] Logging internal logs to /home/dat/pino-roberta-base/wandb/run-20210715_020018-3i0mvo08/logs/debug-internal.log +2021-07-15 02:00:18,728 INFO MainThread:640692 [wandb_init.py:init():370] calling init triggers +2021-07-15 02:00:18,728 INFO MainThread:640692 [wandb_init.py:init():375] wandb.init called with sweep_config: {} +config: {} +2021-07-15 02:00:18,728 INFO MainThread:640692 [wandb_init.py:init():419] starting backend +2021-07-15 02:00:18,728 INFO MainThread:640692 [backend.py:_multiprocessing_setup():70] multiprocessing start_methods=fork,spawn,forkserver, using: spawn +2021-07-15 02:00:18,786 INFO MainThread:640692 [backend.py:ensure_launched():135] starting backend process... +2021-07-15 02:00:18,844 INFO MainThread:640692 [backend.py:ensure_launched():139] started backend process with pid: 641950 +2021-07-15 02:00:18,846 INFO MainThread:640692 [wandb_init.py:init():424] backend started and connected +2021-07-15 02:00:18,849 INFO MainThread:640692 [wandb_init.py:init():472] updated telemetry +2021-07-15 02:00:18,850 INFO MainThread:640692 [wandb_init.py:init():491] communicating current version +2021-07-15 02:00:19,506 INFO MainThread:640692 [wandb_init.py:init():496] got version response +2021-07-15 02:00:19,507 INFO MainThread:640692 [wandb_init.py:init():504] communicating run to backend with 30 second timeout +2021-07-15 02:00:19,681 INFO MainThread:640692 [wandb_init.py:init():529] starting run threads in backend +2021-07-15 02:00:20,867 INFO MainThread:640692 [wandb_run.py:_console_start():1623] atexit reg +2021-07-15 02:00:20,868 INFO MainThread:640692 [wandb_run.py:_redirect():1497] redirect: SettingsConsole.REDIRECT +2021-07-15 02:00:20,869 INFO MainThread:640692 [wandb_run.py:_redirect():1502] Redirecting console. +2021-07-15 02:00:20,870 INFO MainThread:640692 [wandb_run.py:_redirect():1558] Redirects installed. +2021-07-15 02:00:20,871 INFO MainThread:640692 [wandb_init.py:init():554] run started, returning control to user process +2021-07-15 02:00:20,876 INFO MainThread:640692 [wandb_run.py:_config_callback():872] config_cb None None {'output_dir': './', 'overwrite_output_dir': True, 'do_train': False, 'do_eval': False, 'do_predict': False, 'evaluation_strategy': 'IntervalStrategy.NO', 'prediction_loss_only': False, 'per_device_train_batch_size': 1, 'per_device_eval_batch_size': 1, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'learning_rate': 3e-05, 'weight_decay': 0.0095, 'adam_beta1': 0.9, 'adam_beta2': 0.98, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 5.0, 'max_steps': -1, 'lr_scheduler_type': 'SchedulerType.LINEAR', 'warmup_ratio': 0.0, 'warmup_steps': 10000, 'log_level': -1, 'log_level_replica': -1, 'log_on_each_node': True, 'logging_dir': './runs/Jul15_02-00-11_t1v-n-f5c06ea1-w-0', 'logging_strategy': 'IntervalStrategy.STEPS', 'logging_first_step': False, 'logging_steps': 50, 'save_strategy': 'IntervalStrategy.STEPS', 'save_steps': 20000, 'save_total_limit': 5, 'save_on_each_node': False, 'no_cuda': False, 'seed': 42, 'fp16': False, 'fp16_opt_level': 'O1', 'fp16_backend': 'auto', 'fp16_full_eval': False, 'local_rank': -1, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 20000, 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': './', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'sharded_ddp': [], 'deepspeed': None, 'label_smoothing_factor': 0.0, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'dataloader_pin_memory': True, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'push_to_hub_model_id': '', 'push_to_hub_organization': None, 'push_to_hub_token': None, 'mp_parameters': ''} +2021-07-15 02:00:20,878 INFO MainThread:640692 [wandb_run.py:_config_callback():872] config_cb None None {'model_name_or_path': None, 'model_type': 'big_bird', 'config_name': './', 'tokenizer_name': './', 'cache_dir': None, 'use_fast_tokenizer': True, 'dtype': 'float32'} +2021-07-15 02:00:20,879 INFO MainThread:640692 [wandb_run.py:_config_callback():872] config_cb None None {'dataset_name': None, 'dataset_config_name': None, 'train_ref_file': None, 'validation_ref_file': None, 'overwrite_cache': False, 'validation_split_percentage': 5, 'max_seq_length': 4096, 'preprocessing_num_workers': 96, 'mlm_probability': 0.15, 'pad_to_max_length': False, 'line_by_line': False, 'max_eval_samples': 500} diff --git a/wandb/run-20210715_020018-3i0mvo08/run-3i0mvo08.wandb b/wandb/run-20210715_020018-3i0mvo08/run-3i0mvo08.wandb new file mode 100644 index 0000000000000000000000000000000000000000..c2e94a6c1df1528c380e16bd5338b819caad49b4 Binary files /dev/null and b/wandb/run-20210715_020018-3i0mvo08/run-3i0mvo08.wandb differ