diff --git a/checkpoint_30000 b/checkpoint_30000 new file mode 100644 index 0000000000000000000000000000000000000000..53675b63c93e1995205f5ddebdde18e1f7b5d265 --- /dev/null +++ b/checkpoint_30000 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59518736214a20e8125f1484fe8db260c9384560796a22aa38130472f209af5f +size 1530270447 diff --git a/events.out.tfevents.1626315358.t1v-n-f5c06ea1-w-0.643445.3.v2 b/events.out.tfevents.1626315358.t1v-n-f5c06ea1-w-0.643445.3.v2 new file mode 100644 index 0000000000000000000000000000000000000000..0eb7fd68cb6f1e298f8ba66016fc3d63c69b7d7e --- /dev/null +++ b/events.out.tfevents.1626315358.t1v-n-f5c06ea1-w-0.643445.3.v2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cbbdcfdd6e9c4204719d1205d9986aefaa632a8322127f79c7c2db6721350035 +size 40 diff --git a/events.out.tfevents.1626316431.t1v-n-f5c06ea1-w-0.646155.3.v2 b/events.out.tfevents.1626316431.t1v-n-f5c06ea1-w-0.646155.3.v2 new file mode 100644 index 0000000000000000000000000000000000000000..f926781dc1eb7120dd6a5516e7eaeec5ade9a672 --- /dev/null +++ b/events.out.tfevents.1626316431.t1v-n-f5c06ea1-w-0.646155.3.v2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d42ca9bdf3e3d3484649f49a885a13ee3f0a5215c95ce545d2555e478ec6d2c3 +size 40 diff --git a/events.out.tfevents.1626317295.t1v-n-f5c06ea1-w-0.648648.3.v2 b/events.out.tfevents.1626317295.t1v-n-f5c06ea1-w-0.648648.3.v2 new file mode 100644 index 0000000000000000000000000000000000000000..254def98382b6f094fe6d0421c61334e1972d117 --- /dev/null +++ b/events.out.tfevents.1626317295.t1v-n-f5c06ea1-w-0.648648.3.v2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4b9c6db5c2a2231727f871c58176b8c65820459b68583d2c8f45c07f2298c60 +size 40 diff --git a/events.out.tfevents.1626318014.t1v-n-f5c06ea1-w-0.651126.3.v2 b/events.out.tfevents.1626318014.t1v-n-f5c06ea1-w-0.651126.3.v2 new file mode 100644 index 0000000000000000000000000000000000000000..9b3f0739478c944488ea8c62e4deeccf955eba10 --- /dev/null +++ b/events.out.tfevents.1626318014.t1v-n-f5c06ea1-w-0.651126.3.v2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e65b24a209eecf55c1ee53b6bdce4204f068eff07c0ed31b60a3fc7d1ad7de80 +size 40 diff --git a/events.out.tfevents.1626318666.t1v-n-f5c06ea1-w-0.655476.3.v2 b/events.out.tfevents.1626318666.t1v-n-f5c06ea1-w-0.655476.3.v2 new file mode 100644 index 0000000000000000000000000000000000000000..22c193fd39fbe42d2c9ada518ce94920365870ef --- /dev/null +++ b/events.out.tfevents.1626318666.t1v-n-f5c06ea1-w-0.655476.3.v2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69b32ab8a1ed5787f81840edb5bf7961c38526a7e7a3785c8559a727406f16a2 +size 4478974 diff --git a/flax_model.msgpack b/flax_model.msgpack index b893cdea9fe9fdf8ff2f35bd601e8d44de3f5d63..8ff79b8816a7cd0cfefea765d47071cb6ae4d11d 100644 --- a/flax_model.msgpack +++ b/flax_model.msgpack @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:920cc52411bb9cee8aec7d54fbccc6e52223b9df8ed791d4fdd90a31831aed15 +oid sha256:888817743e9128cfd6e093b2327a40d34a3acb8bcc7b90b00adfae9b27af28ec size 510090043 diff --git a/run.sh b/run.sh index 26797e2cf9f7578e1509f9d9843cd4cdd85035c8..94c7a2ac2fcdcefa2fc352d72c6bc14d422a2a81 100644 --- a/run.sh +++ b/run.sh @@ -18,7 +18,7 @@ python ./run_mlm_flax_no_accum.py \ --eval_steps="20000" \ --num_train_epochs="5" \ --preprocessing_num_workers="96" \ - --save_steps="20000" \ + --save_steps="30000" \ --learning_rate="3e-5" \ --per_device_train_batch_size="1" \ --per_device_eval_batch_size="1" \ diff --git a/run_mlm_flax_no_accum.py b/run_mlm_flax_no_accum.py index 95004f842878b3c5b3452face9c836bd299cd6fc..0bea7754289a690a3c372a273a400a02e82e3917 100644 --- a/run_mlm_flax_no_accum.py +++ b/run_mlm_flax_no_accum.py @@ -421,7 +421,7 @@ if __name__ == "__main__": tokenized_datasets = DatasetDict.load_from_disk("/data/tokenized_data") logger.info("Setting max validation examples to ") print(f"Number of validation examples {data_args.max_eval_samples}") - #tokenized_datasets["train"]= tokenized_datasets["train"].select(range(int(0.3*len(tokenized_datasets["train"])))) + tokenized_datasets["train"]= tokenized_datasets["train"].select(range(int(0.35*len(tokenized_datasets["train"])))) if data_args.max_eval_samples is not None: tokenized_datasets["validation"] = tokenized_datasets["validation"].select(range(data_args.max_eval_samples)) else: diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log index 436926eb20a98d5ac77e0f29c4908d5ab93f55fd..9ad3b2e59addeab37e6ffcd1c6d27f1315d21916 120000 --- a/wandb/debug-internal.log +++ b/wandb/debug-internal.log @@ -1 +1 @@ -run-20210715_020018-3i0mvo08/logs/debug-internal.log \ No newline at end of file +run-20210715_031107-69jkygz3/logs/debug-internal.log \ No newline at end of file diff --git a/wandb/debug.log b/wandb/debug.log index 3e9479db1a2bcb33dc4f7e6bb4d08bc8a6fd995f..b2ba49810619ce235007a7a9b6570c0f1380de4a 120000 --- a/wandb/debug.log +++ b/wandb/debug.log @@ -1 +1 @@ -run-20210715_020018-3i0mvo08/logs/debug.log \ No newline at end of file +run-20210715_031107-69jkygz3/logs/debug.log \ No newline at end of file diff --git a/wandb/latest-run b/wandb/latest-run index 7f862e9df6a33d2250244dd835ed7820e9276ebd..eacb3173b4b2f0f55ac7e7d91aa8b81f44e8b2b7 120000 --- a/wandb/latest-run +++ b/wandb/latest-run @@ -1 +1 @@ -run-20210715_020018-3i0mvo08 \ No newline at end of file +run-20210715_031107-69jkygz3 \ No newline at end of file diff --git a/wandb/run-20210715_020018-3i0mvo08/files/config.yaml b/wandb/run-20210715_020018-3i0mvo08/files/config.yaml index 8911d4e91fc3ec02f42918149f3638a2edae4228..f1a37961d6289fe8fc315dfce0f456b05a7e141c 100644 --- a/wandb/run-20210715_020018-3i0mvo08/files/config.yaml +++ b/wandb/run-20210715_020018-3i0mvo08/files/config.yaml @@ -13,6 +13,9 @@ _wandb: 1: - 3 - 11 + 2: + - 3 + - 11 4: 3.8.10 5: 0.10.33 6: 4.9.0.dev0 diff --git a/wandb/run-20210715_020018-3i0mvo08/files/output.log b/wandb/run-20210715_020018-3i0mvo08/files/output.log index 9d02c29e83bd53becfa75075433bf004ddbe0164..6c83ac2a929e075cc536ec68cef4adb28bfc46a8 100644 --- a/wandb/run-20210715_020018-3i0mvo08/files/output.log +++ b/wandb/run-20210715_020018-3i0mvo08/files/output.log @@ -2,3 +2,31 @@ warnings.warn( /home/dat/pino/lib/python3.8/site-packages/jax/lib/xla_bridge.py:373: UserWarning: jax.host_id has been renamed to jax.process_index. This alias will eventually be removed; please update your code. warnings.warn( +Epoch ... (1/5): 0%| | 0/5 [00:00 + train_batch_idx = generate_batch_splits(train_samples_idx, train_batch_size) + File "./run_mlm_flax_no_accum.py", line 255, in generate_batch_splits + batch_idx = np.split(samples_idx, sections_split) + File "<__array_function__ internals>", line 5, in split + File "/home/dat/pino/lib/python3.8/site-packages/numpy/lib/shape_base.py", line 874, in split + return array_split(ary, indices_or_sections, axis) + File "<__array_function__ internals>", line 5, in array_split + File "/home/dat/pino/lib/python3.8/site-packages/numpy/lib/shape_base.py", line 790, in array_split + sub_arys.append(_nx.swapaxes(sary[st:end], axis, 0)) + File "/home/dat/pino/lib/python3.8/site-packages/jax/_src/numpy/lax_numpy.py", line 5009, in _rewriting_take + return _gather(arr, treedef, static_idx, dynamic_idx, indices_are_sorted, + File "/home/dat/pino/lib/python3.8/site-packages/jax/_src/numpy/lax_numpy.py", line 5028, in _gather + y = lax.gather( + File "/home/dat/pino/lib/python3.8/site-packages/jax/_src/lax/lax.py", line 984, in gather + return gather_p.bind( + File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 264, in bind + out = top_trace.process_primitive(self, tracers, params) + File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 603, in process_primitive + return primitive.impl(*tracers, **params) + File "/home/dat/pino/lib/python3.8/site-packages/jax/interpreters/xla.py", line 249, in apply_primitive + return compiled_fun(*args) + File "/home/dat/pino/lib/python3.8/site-packages/jax/interpreters/xla.py", line 365, in _execute_compiled_primitive + out_bufs = compiled.execute(input_bufs) +RuntimeError: Resource exhausted: Attempting to allocate 17.0K. That was not possible. There are 48.0K free. Due to fragmentation, the largest contiguous region of free memory is 16.0K.; (0x0x0_HBM0) \ No newline at end of file diff --git a/wandb/run-20210715_020018-3i0mvo08/logs/debug-internal.log b/wandb/run-20210715_020018-3i0mvo08/logs/debug-internal.log index 7cb42b213576b05f8c963ac6040795e214b15267..a18c016a5089753005e0d8a49c945fb5a6ddf154 100644 --- a/wandb/run-20210715_020018-3i0mvo08/logs/debug-internal.log +++ b/wandb/run-20210715_020018-3i0mvo08/logs/debug-internal.log @@ -154,3 +154,129 @@ 2021-07-15 02:12:27,302 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: stop_status 2021-07-15 02:12:42,431 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: stop_status 2021-07-15 02:12:42,432 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:12:50,705 DEBUG SenderThread:641950 [sender.py:send():179] send: stats +2021-07-15 02:12:57,560 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:12:57,561 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:13:12,692 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:13:12,692 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:13:20,785 DEBUG SenderThread:641950 [sender.py:send():179] send: stats +2021-07-15 02:13:27,826 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:13:27,826 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:13:42,962 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:13:42,963 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:13:50,860 DEBUG SenderThread:641950 [sender.py:send():179] send: stats +2021-07-15 02:13:58,097 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:13:58,097 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:14:13,229 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:14:13,229 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:14:20,935 DEBUG SenderThread:641950 [sender.py:send():179] send: stats +2021-07-15 02:14:28,363 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:14:28,363 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:14:43,496 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:14:43,496 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:14:46,031 INFO Thread-8 :641950 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_020018-3i0mvo08/files/output.log +2021-07-15 02:14:47,302 DEBUG SenderThread:641950 [sender.py:send():179] send: telemetry +2021-07-15 02:14:47,303 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 02:14:47,303 DEBUG SenderThread:641950 [sender.py:send():179] send: exit +2021-07-15 02:14:47,303 INFO SenderThread:641950 [sender.py:send_exit():287] handling exit code: 1 +2021-07-15 02:14:47,304 INFO SenderThread:641950 [sender.py:send_exit():295] send defer +2021-07-15 02:14:47,305 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 02:14:47,305 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: defer +2021-07-15 02:14:47,306 INFO HandlerThread:641950 [handler.py:handle_request_defer():141] handle defer: 0 +2021-07-15 02:14:47,306 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: defer +2021-07-15 02:14:47,306 INFO SenderThread:641950 [sender.py:send_request_defer():304] handle sender defer: 0 +2021-07-15 02:14:47,306 INFO SenderThread:641950 [sender.py:transition_state():308] send defer: 1 +2021-07-15 02:14:47,306 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: defer +2021-07-15 02:14:47,306 INFO HandlerThread:641950 [handler.py:handle_request_defer():141] handle defer: 1 +2021-07-15 02:14:47,401 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: defer +2021-07-15 02:14:47,401 INFO SenderThread:641950 [sender.py:send_request_defer():304] handle sender defer: 1 +2021-07-15 02:14:47,401 INFO SenderThread:641950 [sender.py:transition_state():308] send defer: 2 +2021-07-15 02:14:47,401 DEBUG SenderThread:641950 [sender.py:send():179] send: stats +2021-07-15 02:14:47,402 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: defer +2021-07-15 02:14:47,402 INFO HandlerThread:641950 [handler.py:handle_request_defer():141] handle defer: 2 +2021-07-15 02:14:47,402 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: defer +2021-07-15 02:14:47,402 INFO SenderThread:641950 [sender.py:send_request_defer():304] handle sender defer: 2 +2021-07-15 02:14:47,402 INFO SenderThread:641950 [sender.py:transition_state():308] send defer: 3 +2021-07-15 02:14:47,403 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: defer +2021-07-15 02:14:47,403 INFO HandlerThread:641950 [handler.py:handle_request_defer():141] handle defer: 3 +2021-07-15 02:14:47,403 DEBUG SenderThread:641950 [sender.py:send():179] send: summary +2021-07-15 02:14:47,403 INFO SenderThread:641950 [sender.py:_save_file():841] saving file wandb-summary.json with policy end +2021-07-15 02:14:47,404 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: defer +2021-07-15 02:14:47,404 INFO SenderThread:641950 [sender.py:send_request_defer():304] handle sender defer: 3 +2021-07-15 02:14:47,404 INFO SenderThread:641950 [sender.py:transition_state():308] send defer: 4 +2021-07-15 02:14:47,404 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: defer +2021-07-15 02:14:47,404 INFO HandlerThread:641950 [handler.py:handle_request_defer():141] handle defer: 4 +2021-07-15 02:14:47,404 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: defer +2021-07-15 02:14:47,404 INFO SenderThread:641950 [sender.py:send_request_defer():304] handle sender defer: 4 +2021-07-15 02:14:47,409 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 02:14:47,585 INFO SenderThread:641950 [sender.py:transition_state():308] send defer: 5 +2021-07-15 02:14:47,586 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 02:14:47,586 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: defer +2021-07-15 02:14:47,586 INFO HandlerThread:641950 [handler.py:handle_request_defer():141] handle defer: 5 +2021-07-15 02:14:47,586 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: defer +2021-07-15 02:14:47,586 INFO SenderThread:641950 [sender.py:send_request_defer():304] handle sender defer: 5 +2021-07-15 02:14:47,586 INFO SenderThread:641950 [dir_watcher.py:finish():282] shutting down directory watcher +2021-07-15 02:14:47,688 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 02:14:48,032 INFO Thread-8 :641950 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_020018-3i0mvo08/files/config.yaml +2021-07-15 02:14:48,033 INFO SenderThread:641950 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_020018-3i0mvo08/files/wandb-summary.json +2021-07-15 02:14:48,033 INFO SenderThread:641950 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_020018-3i0mvo08/files/output.log +2021-07-15 02:14:48,033 INFO SenderThread:641950 [dir_watcher.py:finish():312] scan: /home/dat/pino-roberta-base/wandb/run-20210715_020018-3i0mvo08/files +2021-07-15 02:14:48,033 INFO SenderThread:641950 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_020018-3i0mvo08/files/requirements.txt requirements.txt +2021-07-15 02:14:48,034 INFO SenderThread:641950 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_020018-3i0mvo08/files/output.log output.log +2021-07-15 02:14:48,034 INFO SenderThread:641950 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_020018-3i0mvo08/files/wandb-metadata.json wandb-metadata.json +2021-07-15 02:14:48,034 INFO SenderThread:641950 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_020018-3i0mvo08/files/config.yaml config.yaml +2021-07-15 02:14:48,034 INFO SenderThread:641950 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_020018-3i0mvo08/files/wandb-summary.json wandb-summary.json +2021-07-15 02:14:48,034 INFO SenderThread:641950 [sender.py:transition_state():308] send defer: 6 +2021-07-15 02:14:48,034 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 02:14:48,035 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: defer +2021-07-15 02:14:48,035 INFO HandlerThread:641950 [handler.py:handle_request_defer():141] handle defer: 6 +2021-07-15 02:14:48,036 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: defer +2021-07-15 02:14:48,036 INFO SenderThread:641950 [sender.py:send_request_defer():304] handle sender defer: 6 +2021-07-15 02:14:48,036 INFO SenderThread:641950 [file_pusher.py:finish():177] shutting down file pusher +2021-07-15 02:14:48,137 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 02:14:48,137 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 02:14:48,239 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 02:14:48,240 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 02:14:48,342 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 02:14:48,342 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 02:14:48,444 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 02:14:48,444 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 02:14:48,475 INFO Thread-15 :641950 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210715_020018-3i0mvo08/files/wandb-summary.json +2021-07-15 02:14:48,479 INFO Thread-13 :641950 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210715_020018-3i0mvo08/files/output.log +2021-07-15 02:14:48,493 INFO Thread-14 :641950 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210715_020018-3i0mvo08/files/config.yaml +2021-07-15 02:14:48,547 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 02:14:48,548 INFO Thread-12 :641950 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210715_020018-3i0mvo08/files/requirements.txt +2021-07-15 02:14:48,548 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 02:14:48,650 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 02:14:48,650 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 02:14:48,749 INFO Thread-7 :641950 [sender.py:transition_state():308] send defer: 7 +2021-07-15 02:14:48,749 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: defer +2021-07-15 02:14:48,749 INFO HandlerThread:641950 [handler.py:handle_request_defer():141] handle defer: 7 +2021-07-15 02:14:48,749 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: defer +2021-07-15 02:14:48,750 INFO SenderThread:641950 [sender.py:send_request_defer():304] handle sender defer: 7 +2021-07-15 02:14:48,752 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 02:14:49,034 INFO SenderThread:641950 [sender.py:transition_state():308] send defer: 8 +2021-07-15 02:14:49,034 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 02:14:49,035 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: defer +2021-07-15 02:14:49,035 INFO HandlerThread:641950 [handler.py:handle_request_defer():141] handle defer: 8 +2021-07-15 02:14:49,035 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: defer +2021-07-15 02:14:49,035 INFO SenderThread:641950 [sender.py:send_request_defer():304] handle sender defer: 8 +2021-07-15 02:14:49,035 INFO SenderThread:641950 [sender.py:transition_state():308] send defer: 9 +2021-07-15 02:14:49,036 DEBUG SenderThread:641950 [sender.py:send():179] send: final +2021-07-15 02:14:49,036 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: defer +2021-07-15 02:14:49,036 INFO HandlerThread:641950 [handler.py:handle_request_defer():141] handle defer: 9 +2021-07-15 02:14:49,036 DEBUG SenderThread:641950 [sender.py:send():179] send: footer +2021-07-15 02:14:49,036 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: defer +2021-07-15 02:14:49,036 INFO SenderThread:641950 [sender.py:send_request_defer():304] handle sender defer: 9 +2021-07-15 02:14:49,137 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 02:14:49,137 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 02:14:49,137 INFO SenderThread:641950 [file_pusher.py:join():182] waiting for file pusher +2021-07-15 02:14:49,139 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: get_summary +2021-07-15 02:14:49,139 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: sampled_history +2021-07-15 02:14:49,140 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: shutdown +2021-07-15 02:14:49,140 INFO HandlerThread:641950 [handler.py:finish():638] shutting down handler +2021-07-15 02:14:50,037 INFO WriterThread:641950 [datastore.py:close():288] close: /home/dat/pino-roberta-base/wandb/run-20210715_020018-3i0mvo08/run-3i0mvo08.wandb +2021-07-15 02:14:50,138 INFO SenderThread:641950 [sender.py:finish():945] shutting down sender +2021-07-15 02:14:50,138 INFO SenderThread:641950 [file_pusher.py:finish():177] shutting down file pusher +2021-07-15 02:14:50,138 INFO SenderThread:641950 [file_pusher.py:join():182] waiting for file pusher +2021-07-15 02:14:50,141 INFO MainThread:641950 [internal.py:handle_exit():78] Internal process exited diff --git a/wandb/run-20210715_020018-3i0mvo08/logs/debug.log b/wandb/run-20210715_020018-3i0mvo08/logs/debug.log index 49646987f2fe19daa8cee908011314efd1e2827d..5138925b4e47c3f7375d2f944f5adac6e95aedd5 100644 --- a/wandb/run-20210715_020018-3i0mvo08/logs/debug.log +++ b/wandb/run-20210715_020018-3i0mvo08/logs/debug.log @@ -23,3 +23,97 @@ config: {} 2021-07-15 02:00:20,876 INFO MainThread:640692 [wandb_run.py:_config_callback():872] config_cb None None {'output_dir': './', 'overwrite_output_dir': True, 'do_train': False, 'do_eval': False, 'do_predict': False, 'evaluation_strategy': 'IntervalStrategy.NO', 'prediction_loss_only': False, 'per_device_train_batch_size': 1, 'per_device_eval_batch_size': 1, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'learning_rate': 3e-05, 'weight_decay': 0.0095, 'adam_beta1': 0.9, 'adam_beta2': 0.98, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 5.0, 'max_steps': -1, 'lr_scheduler_type': 'SchedulerType.LINEAR', 'warmup_ratio': 0.0, 'warmup_steps': 10000, 'log_level': -1, 'log_level_replica': -1, 'log_on_each_node': True, 'logging_dir': './runs/Jul15_02-00-11_t1v-n-f5c06ea1-w-0', 'logging_strategy': 'IntervalStrategy.STEPS', 'logging_first_step': False, 'logging_steps': 50, 'save_strategy': 'IntervalStrategy.STEPS', 'save_steps': 20000, 'save_total_limit': 5, 'save_on_each_node': False, 'no_cuda': False, 'seed': 42, 'fp16': False, 'fp16_opt_level': 'O1', 'fp16_backend': 'auto', 'fp16_full_eval': False, 'local_rank': -1, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 20000, 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': './', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'sharded_ddp': [], 'deepspeed': None, 'label_smoothing_factor': 0.0, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'dataloader_pin_memory': True, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'push_to_hub_model_id': '', 'push_to_hub_organization': None, 'push_to_hub_token': None, 'mp_parameters': ''} 2021-07-15 02:00:20,878 INFO MainThread:640692 [wandb_run.py:_config_callback():872] config_cb None None {'model_name_or_path': None, 'model_type': 'big_bird', 'config_name': './', 'tokenizer_name': './', 'cache_dir': None, 'use_fast_tokenizer': True, 'dtype': 'float32'} 2021-07-15 02:00:20,879 INFO MainThread:640692 [wandb_run.py:_config_callback():872] config_cb None None {'dataset_name': None, 'dataset_config_name': None, 'train_ref_file': None, 'validation_ref_file': None, 'overwrite_cache': False, 'validation_split_percentage': 5, 'max_seq_length': 4096, 'preprocessing_num_workers': 96, 'mlm_probability': 0.15, 'pad_to_max_length': False, 'line_by_line': False, 'max_eval_samples': 500} +2021-07-15 02:14:44,909 INFO MainThread:640692 [wandb_run.py:_atexit_cleanup():1593] got exitcode: 1 +2021-07-15 02:14:44,910 INFO MainThread:640692 [wandb_run.py:_restore():1565] restore +2021-07-15 02:14:47,306 INFO MainThread:640692 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 1 +} +pusher_stats { + uploaded_bytes: 1375 + total_bytes: 1375 +} + +2021-07-15 02:14:47,586 INFO MainThread:640692 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 1 +} +pusher_stats { + uploaded_bytes: 1375 + total_bytes: 1375 +} + +2021-07-15 02:14:48,036 INFO MainThread:640692 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 3 +} +pusher_stats { + uploaded_bytes: 1375 + total_bytes: 5986 +} + +2021-07-15 02:14:48,138 INFO MainThread:640692 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 1375 + total_bytes: 10555 +} + +2021-07-15 02:14:48,240 INFO MainThread:640692 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 10555 + total_bytes: 10555 +} + +2021-07-15 02:14:48,343 INFO MainThread:640692 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 10555 + total_bytes: 10555 +} + +2021-07-15 02:14:48,445 INFO MainThread:640692 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 10555 + total_bytes: 10555 +} + +2021-07-15 02:14:48,549 INFO MainThread:640692 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 10555 + total_bytes: 10555 +} + +2021-07-15 02:14:48,651 INFO MainThread:640692 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 10555 + total_bytes: 10555 +} + +2021-07-15 02:14:49,035 INFO MainThread:640692 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 10555 + total_bytes: 10555 +} + +2021-07-15 02:14:49,138 INFO MainThread:640692 [wandb_run.py:_wait_for_finish():1715] got exit ret: done: true +exit_result { +} +file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 10555 + total_bytes: 10555 +} + +2021-07-15 02:14:50,442 INFO MainThread:640692 [wandb_run.py:_show_files():1937] logging synced files diff --git a/wandb/run-20210715_020018-3i0mvo08/run-3i0mvo08.wandb b/wandb/run-20210715_020018-3i0mvo08/run-3i0mvo08.wandb index c2e94a6c1df1528c380e16bd5338b819caad49b4..2bcc1a4d6e858290fac3bcf670b5fe2db093c11c 100644 Binary files a/wandb/run-20210715_020018-3i0mvo08/run-3i0mvo08.wandb and b/wandb/run-20210715_020018-3i0mvo08/run-3i0mvo08.wandb differ diff --git a/wandb/run-20210715_021559-38yj0n5v/files/config.yaml b/wandb/run-20210715_021559-38yj0n5v/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..162d8f4da081dc59836d4d858f80bbf5e0690e47 --- /dev/null +++ b/wandb/run-20210715_021559-38yj0n5v/files/config.yaml @@ -0,0 +1,304 @@ +wandb_version: 1 + +_wandb: + desc: null + value: + cli_version: 0.10.33 + framework: huggingface + huggingface_version: 4.9.0.dev0 + is_jupyter_run: false + is_kaggle_kernel: false + python_version: 3.8.10 + t: + 1: + - 3 + - 11 + 2: + - 3 + - 11 + 4: 3.8.10 + 5: 0.10.33 + 6: 4.9.0.dev0 + 8: + - 5 +adafactor: + desc: null + value: false +adam_beta1: + desc: null + value: 0.9 +adam_beta2: + desc: null + value: 0.98 +adam_epsilon: + desc: null + value: 1.0e-08 +cache_dir: + desc: null + value: null +config_name: + desc: null + value: ./ +dataloader_drop_last: + desc: null + value: false +dataloader_num_workers: + desc: null + value: 0 +dataloader_pin_memory: + desc: null + value: true +dataset_config_name: + desc: null + value: null +dataset_name: + desc: null + value: null +ddp_find_unused_parameters: + desc: null + value: null +debug: + desc: null + value: [] +deepspeed: + desc: null + value: null +disable_tqdm: + desc: null + value: false +do_eval: + desc: null + value: false +do_predict: + desc: null + value: false +do_train: + desc: null + value: false +dtype: + desc: null + value: float32 +eval_accumulation_steps: + desc: null + value: null +eval_steps: + desc: null + value: 20000 +evaluation_strategy: + desc: null + value: IntervalStrategy.NO +fp16: + desc: null + value: false +fp16_backend: + desc: null + value: auto +fp16_full_eval: + desc: null + value: false +fp16_opt_level: + desc: null + value: O1 +gradient_accumulation_steps: + desc: null + value: 1 +greater_is_better: + desc: null + value: null +group_by_length: + desc: null + value: false +ignore_data_skip: + desc: null + value: false +label_names: + desc: null + value: null +label_smoothing_factor: + desc: null + value: 0.0 +learning_rate: + desc: null + value: 3.0e-05 +length_column_name: + desc: null + value: length +line_by_line: + desc: null + value: false +load_best_model_at_end: + desc: null + value: false +local_rank: + desc: null + value: -1 +log_level: + desc: null + value: -1 +log_level_replica: + desc: null + value: -1 +log_on_each_node: + desc: null + value: true +logging_dir: + desc: null + value: ./runs/Jul15_02-15-50_t1v-n-f5c06ea1-w-0 +logging_first_step: + desc: null + value: false +logging_steps: + desc: null + value: 50 +logging_strategy: + desc: null + value: IntervalStrategy.STEPS +lr_scheduler_type: + desc: null + value: SchedulerType.LINEAR +max_eval_samples: + desc: null + value: 500 +max_grad_norm: + desc: null + value: 1.0 +max_seq_length: + desc: null + value: 4096 +max_steps: + desc: null + value: -1 +metric_for_best_model: + desc: null + value: null +mlm_probability: + desc: null + value: 0.15 +model_name_or_path: + desc: null + value: null +model_type: + desc: null + value: big_bird +mp_parameters: + desc: null + value: '' +no_cuda: + desc: null + value: false +num_train_epochs: + desc: null + value: 5.0 +output_dir: + desc: null + value: ./ +overwrite_cache: + desc: null + value: false +overwrite_output_dir: + desc: null + value: true +pad_to_max_length: + desc: null + value: false +past_index: + desc: null + value: -1 +per_device_eval_batch_size: + desc: null + value: 1 +per_device_train_batch_size: + desc: null + value: 1 +per_gpu_eval_batch_size: + desc: null + value: null +per_gpu_train_batch_size: + desc: null + value: null +prediction_loss_only: + desc: null + value: false +preprocessing_num_workers: + desc: null + value: 96 +push_to_hub: + desc: null + value: true +push_to_hub_model_id: + desc: null + value: '' +push_to_hub_organization: + desc: null + value: null +push_to_hub_token: + desc: null + value: null +remove_unused_columns: + desc: null + value: true +report_to: + desc: null + value: + - tensorboard + - wandb +resume_from_checkpoint: + desc: null + value: null +run_name: + desc: null + value: ./ +save_on_each_node: + desc: null + value: false +save_steps: + desc: null + value: 20000 +save_strategy: + desc: null + value: IntervalStrategy.STEPS +save_total_limit: + desc: null + value: 5 +seed: + desc: null + value: 42 +sharded_ddp: + desc: null + value: [] +skip_memory_metrics: + desc: null + value: true +tokenizer_name: + desc: null + value: ./ +tpu_metrics_debug: + desc: null + value: false +tpu_num_cores: + desc: null + value: null +train_ref_file: + desc: null + value: null +use_fast_tokenizer: + desc: null + value: true +use_legacy_prediction_loop: + desc: null + value: false +validation_ref_file: + desc: null + value: null +validation_split_percentage: + desc: null + value: 5 +warmup_ratio: + desc: null + value: 0.0 +warmup_steps: + desc: null + value: 10000 +weight_decay: + desc: null + value: 0.0095 diff --git a/wandb/run-20210715_021559-38yj0n5v/files/output.log b/wandb/run-20210715_021559-38yj0n5v/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..21c55c578d8c3c56ec4dc6011fa8f6302046a016 --- /dev/null +++ b/wandb/run-20210715_021559-38yj0n5v/files/output.log @@ -0,0 +1,37 @@ +/home/dat/pino/lib/python3.8/site-packages/jax/lib/xla_bridge.py:386: UserWarning: jax.host_count has been renamed to jax.process_count. This alias will eventually be removed; please update your code. + warnings.warn( +/home/dat/pino/lib/python3.8/site-packages/jax/lib/xla_bridge.py:373: UserWarning: jax.host_id has been renamed to jax.process_index. This alias will eventually be removed; please update your code. + warnings.warn( +Epoch ... (1/5): 0%| | 0/5 [00:00 + state, train_metric, dropout_rngs = p_train_step(state, model_inputs, dropout_rngs) + File "/home/dat/pino/lib/python3.8/site-packages/jax/_src/traceback_util.py", line 183, in reraise_with_filtered_traceback + return fun(*args, **kwargs) + File "/home/dat/pino/lib/python3.8/site-packages/jax/_src/api.py", line 1669, in f_pmapped + out = pxla.xla_pmap( + File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 1620, in bind + return call_bind(self, fun, *args, **params) + File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 1551, in call_bind + outs = primitive.process(top_trace, fun, tracers, params) + File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 1623, in process + return trace.process_map(self, fun, tracers, params) + File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 606, in process_call + return primitive.impl(f, *tracers, **params) + File "/home/dat/pino/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 637, in xla_pmap_impl + return compiled_fun(*args) + File "/home/dat/pino/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 1159, in execute_replicated + out_bufs = compiled.execute_sharded_on_local_devices(input_bufs) +jax._src.traceback_util.UnfilteredStackTrace: RuntimeError: Resource exhausted: Attempting to reserve 7.59G at the bottom of memory. That was not possible. There are 188.47M free, 0B reserved, and 6.75M reservable.: while running replica 0 and partition 0 of a replicated computation (other replicas may have failed as well). +The stack trace below excludes JAX-internal frames. +The preceding is the original exception that occurred, unmodified. +-------------------- +The above exception was the direct cause of the following exception: +Traceback (most recent call last): + File "./run_mlm_flax_no_accum.py", line 699, in + state, train_metric, dropout_rngs = p_train_step(state, model_inputs, dropout_rngs) + File "/home/dat/pino/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 1159, in execute_replicated + out_bufs = compiled.execute_sharded_on_local_devices(input_bufs) +RuntimeError: Resource exhausted: Attempting to reserve 7.59G at the bottom of memory. That was not possible. There are 188.47M free, 0B reserved, and 6.75M reservable.: while running replica 0 and partition 0 of a replicated computation (other replicas may have failed as well). \ No newline at end of file diff --git a/wandb/run-20210715_021559-38yj0n5v/files/requirements.txt b/wandb/run-20210715_021559-38yj0n5v/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..554e3a34bc91714a8462f65295a12e9a04537637 --- /dev/null +++ b/wandb/run-20210715_021559-38yj0n5v/files/requirements.txt @@ -0,0 +1,94 @@ +absl-py==0.13.0 +aiohttp==3.7.4.post0 +astunparse==1.6.3 +async-timeout==3.0.1 +attrs==21.2.0 +cachetools==4.2.2 +certifi==2021.5.30 +chardet==4.0.0 +charset-normalizer==2.0.1 +chex==0.0.8 +click==8.0.1 +configparser==5.0.2 +cycler==0.10.0 +datasets==1.9.1.dev0 +dill==0.3.4 +dm-tree==0.1.6 +docker-pycreds==0.4.0 +filelock==3.0.12 +flatbuffers==1.12 +flax==0.3.4 +fsspec==2021.7.0 +gast==0.4.0 +gitdb==4.0.7 +gitpython==3.1.18 +google-auth-oauthlib==0.4.4 +google-auth==1.32.1 +google-pasta==0.2.0 +grpcio==1.34.1 +h5py==3.1.0 +huggingface-hub==0.0.12 +idna==3.2 +install==1.3.4 +jax==0.2.17 +jaxlib==0.1.68 +joblib==1.0.1 +keras-nightly==2.5.0.dev2021032900 +keras-preprocessing==1.1.2 +kiwisolver==1.3.1 +libtpu-nightly==0.1.dev20210615 +markdown==3.3.4 +matplotlib==3.4.2 +msgpack==1.0.2 +multidict==5.1.0 +multiprocess==0.70.12.2 +numpy==1.19.5 +oauthlib==3.1.1 +opt-einsum==3.3.0 +optax==0.0.9 +packaging==21.0 +pandas==1.3.0 +pathtools==0.1.2 +pillow==8.3.1 +pip==20.0.2 +pkg-resources==0.0.0 +promise==2.3 +protobuf==3.17.3 +psutil==5.8.0 +pyarrow==4.0.1 +pyasn1-modules==0.2.8 +pyasn1==0.4.8 +pyparsing==2.4.7 +python-dateutil==2.8.1 +pytz==2021.1 +pyyaml==5.4.1 +regex==2021.7.6 +requests-oauthlib==1.3.0 +requests==2.26.0 +rsa==4.7.2 +sacremoses==0.0.45 +scipy==1.7.0 +sentry-sdk==1.3.0 +setuptools==44.0.0 +shortuuid==1.0.1 +six==1.15.0 +smmap==4.0.0 +subprocess32==3.5.4 +tensorboard-data-server==0.6.1 +tensorboard-plugin-wit==1.8.0 +tensorboard==2.5.0 +tensorflow-estimator==2.5.0 +tensorflow==2.5.0 +termcolor==1.1.0 +tokenizers==0.10.3 +toolz==0.11.1 +tqdm==4.61.2 +transformers==4.9.0.dev0 +typing-extensions==3.7.4.3 +urllib3==1.26.6 +wandb==0.10.33 +werkzeug==2.0.1 +wheel==0.36.2 +wrapt==1.12.1 +xxhash==2.0.2 +yarl==1.6.3 \ No newline at end of file diff --git a/wandb/run-20210715_021559-38yj0n5v/files/wandb-metadata.json b/wandb/run-20210715_021559-38yj0n5v/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..641e85571281b47ff511f7f004ed77edcd3189a2 --- /dev/null +++ b/wandb/run-20210715_021559-38yj0n5v/files/wandb-metadata.json @@ -0,0 +1,44 @@ +{ + "os": "Linux-5.4.0-1043-gcp-x86_64-with-glibc2.29", + "python": "3.8.10", + "heartbeatAt": "2021-07-15T02:16:01.117383", + "startedAt": "2021-07-15T02:15:59.045700", + "docker": null, + "cpu_count": 96, + "cuda": null, + "args": [ + "--push_to_hub", + "--output_dir=./", + "--model_type=big_bird", + "--config_name=./", + "--tokenizer_name=./", + "--max_seq_length=4096", + "--weight_decay=0.0095", + "--warmup_steps=10000", + "--overwrite_output_dir", + "--adam_beta1=0.9", + "--adam_beta2=0.98", + "--logging_steps=50", + "--eval_steps=20000", + "--num_train_epochs=5", + "--preprocessing_num_workers=96", + "--save_steps=20000", + "--learning_rate=3e-5", + "--per_device_train_batch_size=1", + "--per_device_eval_batch_size=1", + "--save_total_limit=5", + "--max_eval_samples=500" + ], + "state": "running", + "program": "./run_mlm_flax_no_accum.py", + "codePath": "run_mlm_flax_no_accum.py", + "git": { + "remote": "https://huggingface.co/flax-community/pino-roberta-base", + "commit": "87e02e7ff8fbaea90c8c4ad1c984f83742432303" + }, + "email": null, + "root": "/home/dat/pino-roberta-base", + "host": "t1v-n-f5c06ea1-w-0", + "username": "dat", + "executable": "/home/dat/pino/bin/python" +} diff --git a/wandb/run-20210715_021559-38yj0n5v/files/wandb-summary.json b/wandb/run-20210715_021559-38yj0n5v/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/wandb/run-20210715_021559-38yj0n5v/files/wandb-summary.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/wandb/run-20210715_021559-38yj0n5v/logs/debug-internal.log b/wandb/run-20210715_021559-38yj0n5v/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..b94b28e7664c2767ad67892b9650737020ca2c7f --- /dev/null +++ b/wandb/run-20210715_021559-38yj0n5v/logs/debug-internal.log @@ -0,0 +1,298 @@ +2021-07-15 02:15:59,792 INFO MainThread:644701 [internal.py:wandb_internal():88] W&B internal server running at pid: 644701, started at: 2021-07-15 02:15:59.792106 +2021-07-15 02:15:59,795 DEBUG SenderThread:644701 [sender.py:send():179] send: header +2021-07-15 02:15:59,795 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: check_version +2021-07-15 02:15:59,795 INFO WriterThread:644701 [datastore.py:open_for_write():80] open: /home/dat/pino-roberta-base/wandb/run-20210715_021559-38yj0n5v/run-38yj0n5v.wandb +2021-07-15 02:15:59,796 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: check_version +2021-07-15 02:15:59,833 DEBUG SenderThread:644701 [sender.py:send():179] send: run +2021-07-15 02:16:00,007 INFO SenderThread:644701 [dir_watcher.py:__init__():168] watching files in: /home/dat/pino-roberta-base/wandb/run-20210715_021559-38yj0n5v/files +2021-07-15 02:16:00,007 INFO SenderThread:644701 [sender.py:_start_run_threads():716] run started: 38yj0n5v with start time 1626315359 +2021-07-15 02:16:00,007 DEBUG SenderThread:644701 [sender.py:send():179] send: summary +2021-07-15 02:16:00,008 INFO SenderThread:644701 [sender.py:_save_file():841] saving file wandb-summary.json with policy end +2021-07-15 02:16:00,008 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: run_start +2021-07-15 02:16:01,010 INFO Thread-8 :644701 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210715_021559-38yj0n5v/files/wandb-summary.json +2021-07-15 02:16:01,117 DEBUG HandlerThread:644701 [meta.py:__init__():39] meta init +2021-07-15 02:16:01,117 DEBUG HandlerThread:644701 [meta.py:__init__():53] meta init done +2021-07-15 02:16:01,117 DEBUG HandlerThread:644701 [meta.py:probe():210] probe +2021-07-15 02:16:01,118 DEBUG HandlerThread:644701 [meta.py:_setup_git():200] setup git +2021-07-15 02:16:01,149 DEBUG HandlerThread:644701 [meta.py:_setup_git():207] setup git done +2021-07-15 02:16:01,150 DEBUG HandlerThread:644701 [meta.py:_save_pip():57] save pip +2021-07-15 02:16:01,150 DEBUG HandlerThread:644701 [meta.py:_save_pip():71] save pip done +2021-07-15 02:16:01,150 DEBUG HandlerThread:644701 [meta.py:probe():252] probe done +2021-07-15 02:16:01,154 DEBUG SenderThread:644701 [sender.py:send():179] send: files +2021-07-15 02:16:01,154 INFO SenderThread:644701 [sender.py:_save_file():841] saving file wandb-metadata.json with policy now +2021-07-15 02:16:01,160 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:16:01,161 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:16:01,290 DEBUG SenderThread:644701 [sender.py:send():179] send: config +2021-07-15 02:16:01,291 DEBUG SenderThread:644701 [sender.py:send():179] send: config +2021-07-15 02:16:01,291 DEBUG SenderThread:644701 [sender.py:send():179] send: config +2021-07-15 02:16:01,718 INFO Thread-11 :644701 [upload_job.py:push():137] Uploaded file /tmp/tmp__ipqk3vwandb/1qcixa2k-wandb-metadata.json +2021-07-15 02:16:02,009 INFO Thread-8 :644701 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210715_021559-38yj0n5v/files/output.log +2021-07-15 02:16:02,009 INFO Thread-8 :644701 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210715_021559-38yj0n5v/files/requirements.txt +2021-07-15 02:16:02,009 INFO Thread-8 :644701 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210715_021559-38yj0n5v/files/wandb-metadata.json +2021-07-15 02:16:16,015 INFO Thread-8 :644701 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_021559-38yj0n5v/files/output.log +2021-07-15 02:16:16,292 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:16:16,293 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:16:29,202 DEBUG SenderThread:644701 [sender.py:send():179] send: stats +2021-07-15 02:16:31,021 INFO Thread-8 :644701 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_021559-38yj0n5v/files/config.yaml +2021-07-15 02:16:31,425 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:16:31,425 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:16:46,555 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:16:46,555 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:16:59,284 DEBUG SenderThread:644701 [sender.py:send():179] send: stats +2021-07-15 02:17:01,687 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:17:01,687 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:17:16,819 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:17:16,820 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:17:29,359 DEBUG SenderThread:644701 [sender.py:send():179] send: stats +2021-07-15 02:17:31,951 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:17:31,951 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:17:47,083 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:17:47,083 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:17:59,439 DEBUG SenderThread:644701 [sender.py:send():179] send: stats +2021-07-15 02:18:02,215 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:18:02,215 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:18:17,355 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:18:17,356 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:18:29,519 DEBUG SenderThread:644701 [sender.py:send():179] send: stats +2021-07-15 02:18:32,491 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:18:32,492 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:18:47,624 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:18:47,624 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:18:59,595 DEBUG SenderThread:644701 [sender.py:send():179] send: stats +2021-07-15 02:19:02,759 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:19:02,759 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:19:17,890 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:19:17,890 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:19:29,672 DEBUG SenderThread:644701 [sender.py:send():179] send: stats +2021-07-15 02:19:33,021 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:19:33,022 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:19:48,153 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:19:48,154 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:19:59,751 DEBUG SenderThread:644701 [sender.py:send():179] send: stats +2021-07-15 02:20:03,293 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:20:03,294 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:20:18,425 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:20:18,426 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:20:29,828 DEBUG SenderThread:644701 [sender.py:send():179] send: stats +2021-07-15 02:20:33,560 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:20:33,560 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:20:48,726 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:20:48,726 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:20:59,906 DEBUG SenderThread:644701 [sender.py:send():179] send: stats +2021-07-15 02:21:03,857 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:21:03,858 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:21:18,990 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:21:18,991 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:21:29,980 DEBUG SenderThread:644701 [sender.py:send():179] send: stats +2021-07-15 02:21:34,126 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:21:34,126 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:21:49,258 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:21:49,258 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:22:00,053 DEBUG SenderThread:644701 [sender.py:send():179] send: stats +2021-07-15 02:22:04,390 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:22:04,391 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:22:19,527 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:22:19,527 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:22:30,130 DEBUG SenderThread:644701 [sender.py:send():179] send: stats +2021-07-15 02:22:34,658 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:22:34,658 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:22:49,790 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:22:49,790 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:23:00,206 DEBUG SenderThread:644701 [sender.py:send():179] send: stats +2021-07-15 02:23:04,919 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:23:04,920 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:23:20,062 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:23:20,063 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:23:30,267 DEBUG SenderThread:644701 [sender.py:send():179] send: stats +2021-07-15 02:23:35,199 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:23:35,199 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:23:50,332 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:23:50,332 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:24:00,346 DEBUG SenderThread:644701 [sender.py:send():179] send: stats +2021-07-15 02:24:05,465 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:24:05,466 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:24:20,598 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:24:20,598 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:24:30,424 DEBUG SenderThread:644701 [sender.py:send():179] send: stats +2021-07-15 02:24:35,751 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:24:35,751 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:24:50,888 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:24:50,888 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:25:00,500 DEBUG SenderThread:644701 [sender.py:send():179] send: stats +2021-07-15 02:25:06,021 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:25:06,022 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:25:21,156 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:25:21,157 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:25:30,575 DEBUG SenderThread:644701 [sender.py:send():179] send: stats +2021-07-15 02:25:36,290 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:25:36,291 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:25:51,426 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:25:51,426 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:26:00,654 DEBUG SenderThread:644701 [sender.py:send():179] send: stats +2021-07-15 02:26:06,562 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:26:06,562 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:26:21,692 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:26:21,693 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:26:30,729 DEBUG SenderThread:644701 [sender.py:send():179] send: stats +2021-07-15 02:26:36,825 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:26:36,825 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:26:51,959 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:26:51,959 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:27:00,798 DEBUG SenderThread:644701 [sender.py:send():179] send: stats +2021-07-15 02:27:07,091 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:27:07,091 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:27:22,224 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:27:22,224 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:27:30,870 DEBUG SenderThread:644701 [sender.py:send():179] send: stats +2021-07-15 02:27:37,360 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:27:37,360 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:27:52,491 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:27:52,491 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:28:00,938 DEBUG SenderThread:644701 [sender.py:send():179] send: stats +2021-07-15 02:28:07,622 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:28:07,622 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:28:22,754 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:28:22,755 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:28:31,010 DEBUG SenderThread:644701 [sender.py:send():179] send: stats +2021-07-15 02:28:37,888 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:28:37,888 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:28:53,020 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:28:53,021 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:29:01,085 DEBUG SenderThread:644701 [sender.py:send():179] send: stats +2021-07-15 02:29:08,157 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:29:08,157 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:29:23,289 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:29:23,289 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:29:31,158 DEBUG SenderThread:644701 [sender.py:send():179] send: stats +2021-07-15 02:29:38,420 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:29:38,420 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:29:53,553 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:29:53,553 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:29:58,389 INFO Thread-8 :644701 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_021559-38yj0n5v/files/output.log +2021-07-15 02:30:01,235 DEBUG SenderThread:644701 [sender.py:send():179] send: stats +2021-07-15 02:30:08,702 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:30:08,702 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:30:23,843 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:30:23,843 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:30:31,315 DEBUG SenderThread:644701 [sender.py:send():179] send: stats +2021-07-15 02:30:38,973 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:30:38,973 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:30:54,105 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:30:54,106 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:31:01,399 DEBUG SenderThread:644701 [sender.py:send():179] send: stats +2021-07-15 02:31:09,240 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:31:09,240 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:31:24,379 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:31:24,379 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:31:31,480 DEBUG SenderThread:644701 [sender.py:send():179] send: stats +2021-07-15 02:31:39,512 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:31:39,512 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:31:54,644 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:31:54,644 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:32:01,553 DEBUG SenderThread:644701 [sender.py:send():179] send: stats +2021-07-15 02:32:04,443 INFO Thread-8 :644701 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_021559-38yj0n5v/files/output.log +2021-07-15 02:32:04,474 DEBUG SenderThread:644701 [sender.py:send():179] send: telemetry +2021-07-15 02:32:04,474 DEBUG SenderThread:644701 [sender.py:send():179] send: exit +2021-07-15 02:32:04,474 INFO SenderThread:644701 [sender.py:send_exit():287] handling exit code: 1 +2021-07-15 02:32:04,476 INFO SenderThread:644701 [sender.py:send_exit():295] send defer +2021-07-15 02:32:04,476 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 02:32:04,477 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 02:32:04,477 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: defer +2021-07-15 02:32:04,477 INFO HandlerThread:644701 [handler.py:handle_request_defer():141] handle defer: 0 +2021-07-15 02:32:04,478 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: defer +2021-07-15 02:32:04,478 INFO SenderThread:644701 [sender.py:send_request_defer():304] handle sender defer: 0 +2021-07-15 02:32:04,478 INFO SenderThread:644701 [sender.py:transition_state():308] send defer: 1 +2021-07-15 02:32:04,478 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: defer +2021-07-15 02:32:04,478 INFO HandlerThread:644701 [handler.py:handle_request_defer():141] handle defer: 1 +2021-07-15 02:32:04,561 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: defer +2021-07-15 02:32:04,561 INFO SenderThread:644701 [sender.py:send_request_defer():304] handle sender defer: 1 +2021-07-15 02:32:04,562 INFO SenderThread:644701 [sender.py:transition_state():308] send defer: 2 +2021-07-15 02:32:04,562 DEBUG SenderThread:644701 [sender.py:send():179] send: stats +2021-07-15 02:32:04,562 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: defer +2021-07-15 02:32:04,562 INFO HandlerThread:644701 [handler.py:handle_request_defer():141] handle defer: 2 +2021-07-15 02:32:04,562 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: defer +2021-07-15 02:32:04,563 INFO SenderThread:644701 [sender.py:send_request_defer():304] handle sender defer: 2 +2021-07-15 02:32:04,563 INFO SenderThread:644701 [sender.py:transition_state():308] send defer: 3 +2021-07-15 02:32:04,563 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: defer +2021-07-15 02:32:04,563 INFO HandlerThread:644701 [handler.py:handle_request_defer():141] handle defer: 3 +2021-07-15 02:32:04,563 DEBUG SenderThread:644701 [sender.py:send():179] send: summary +2021-07-15 02:32:04,564 INFO SenderThread:644701 [sender.py:_save_file():841] saving file wandb-summary.json with policy end +2021-07-15 02:32:04,564 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: defer +2021-07-15 02:32:04,564 INFO SenderThread:644701 [sender.py:send_request_defer():304] handle sender defer: 3 +2021-07-15 02:32:04,564 INFO SenderThread:644701 [sender.py:transition_state():308] send defer: 4 +2021-07-15 02:32:04,565 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: defer +2021-07-15 02:32:04,565 INFO HandlerThread:644701 [handler.py:handle_request_defer():141] handle defer: 4 +2021-07-15 02:32:04,565 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: defer +2021-07-15 02:32:04,565 INFO SenderThread:644701 [sender.py:send_request_defer():304] handle sender defer: 4 +2021-07-15 02:32:04,580 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 02:32:04,749 INFO SenderThread:644701 [sender.py:transition_state():308] send defer: 5 +2021-07-15 02:32:04,749 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 02:32:04,749 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: defer +2021-07-15 02:32:04,750 INFO HandlerThread:644701 [handler.py:handle_request_defer():141] handle defer: 5 +2021-07-15 02:32:04,750 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: defer +2021-07-15 02:32:04,750 INFO SenderThread:644701 [sender.py:send_request_defer():304] handle sender defer: 5 +2021-07-15 02:32:04,750 INFO SenderThread:644701 [dir_watcher.py:finish():282] shutting down directory watcher +2021-07-15 02:32:04,851 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 02:32:05,444 INFO Thread-8 :644701 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_021559-38yj0n5v/files/output.log +2021-07-15 02:32:05,445 INFO SenderThread:644701 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_021559-38yj0n5v/files/wandb-summary.json +2021-07-15 02:32:05,445 INFO SenderThread:644701 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_021559-38yj0n5v/files/config.yaml +2021-07-15 02:32:05,445 INFO SenderThread:644701 [dir_watcher.py:finish():312] scan: /home/dat/pino-roberta-base/wandb/run-20210715_021559-38yj0n5v/files +2021-07-15 02:32:05,445 INFO SenderThread:644701 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_021559-38yj0n5v/files/requirements.txt requirements.txt +2021-07-15 02:32:05,446 INFO SenderThread:644701 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_021559-38yj0n5v/files/output.log output.log +2021-07-15 02:32:05,446 INFO SenderThread:644701 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_021559-38yj0n5v/files/wandb-metadata.json wandb-metadata.json +2021-07-15 02:32:05,446 INFO SenderThread:644701 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_021559-38yj0n5v/files/config.yaml config.yaml +2021-07-15 02:32:05,450 INFO SenderThread:644701 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_021559-38yj0n5v/files/wandb-summary.json wandb-summary.json +2021-07-15 02:32:05,453 INFO SenderThread:644701 [sender.py:transition_state():308] send defer: 6 +2021-07-15 02:32:05,453 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 02:32:05,455 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: defer +2021-07-15 02:32:05,457 INFO HandlerThread:644701 [handler.py:handle_request_defer():141] handle defer: 6 +2021-07-15 02:32:05,458 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: defer +2021-07-15 02:32:05,461 INFO SenderThread:644701 [sender.py:send_request_defer():304] handle sender defer: 6 +2021-07-15 02:32:05,461 INFO SenderThread:644701 [file_pusher.py:finish():177] shutting down file pusher +2021-07-15 02:32:05,556 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 02:32:05,556 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 02:32:05,658 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 02:32:05,659 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 02:32:05,761 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 02:32:05,761 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 02:32:05,863 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 02:32:05,863 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 02:32:05,888 INFO Thread-15 :644701 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210715_021559-38yj0n5v/files/wandb-summary.json +2021-07-15 02:32:05,892 INFO Thread-13 :644701 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210715_021559-38yj0n5v/files/output.log +2021-07-15 02:32:05,894 INFO Thread-14 :644701 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210715_021559-38yj0n5v/files/config.yaml +2021-07-15 02:32:05,895 INFO Thread-12 :644701 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210715_021559-38yj0n5v/files/requirements.txt +2021-07-15 02:32:05,965 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 02:32:05,965 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 02:32:06,067 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 02:32:06,067 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 02:32:06,096 INFO Thread-7 :644701 [sender.py:transition_state():308] send defer: 7 +2021-07-15 02:32:06,096 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: defer +2021-07-15 02:32:06,096 INFO HandlerThread:644701 [handler.py:handle_request_defer():141] handle defer: 7 +2021-07-15 02:32:06,097 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: defer +2021-07-15 02:32:06,097 INFO SenderThread:644701 [sender.py:send_request_defer():304] handle sender defer: 7 +2021-07-15 02:32:06,169 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 02:32:06,370 INFO SenderThread:644701 [sender.py:transition_state():308] send defer: 8 +2021-07-15 02:32:06,370 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 02:32:06,371 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: defer +2021-07-15 02:32:06,371 INFO HandlerThread:644701 [handler.py:handle_request_defer():141] handle defer: 8 +2021-07-15 02:32:06,371 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: defer +2021-07-15 02:32:06,371 INFO SenderThread:644701 [sender.py:send_request_defer():304] handle sender defer: 8 +2021-07-15 02:32:06,371 INFO SenderThread:644701 [sender.py:transition_state():308] send defer: 9 +2021-07-15 02:32:06,372 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: defer +2021-07-15 02:32:06,372 INFO HandlerThread:644701 [handler.py:handle_request_defer():141] handle defer: 9 +2021-07-15 02:32:06,372 DEBUG SenderThread:644701 [sender.py:send():179] send: final +2021-07-15 02:32:06,372 DEBUG SenderThread:644701 [sender.py:send():179] send: footer +2021-07-15 02:32:06,372 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: defer +2021-07-15 02:32:06,372 INFO SenderThread:644701 [sender.py:send_request_defer():304] handle sender defer: 9 +2021-07-15 02:32:06,472 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 02:32:06,472 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 02:32:06,473 INFO SenderThread:644701 [file_pusher.py:join():182] waiting for file pusher +2021-07-15 02:32:06,474 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: get_summary +2021-07-15 02:32:06,475 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: sampled_history +2021-07-15 02:32:06,475 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: shutdown +2021-07-15 02:32:06,475 INFO HandlerThread:644701 [handler.py:finish():638] shutting down handler +2021-07-15 02:32:07,372 INFO WriterThread:644701 [datastore.py:close():288] close: /home/dat/pino-roberta-base/wandb/run-20210715_021559-38yj0n5v/run-38yj0n5v.wandb +2021-07-15 02:32:07,473 INFO SenderThread:644701 [sender.py:finish():945] shutting down sender +2021-07-15 02:32:07,473 INFO SenderThread:644701 [file_pusher.py:finish():177] shutting down file pusher +2021-07-15 02:32:07,473 INFO SenderThread:644701 [file_pusher.py:join():182] waiting for file pusher +2021-07-15 02:32:07,477 INFO MainThread:644701 [internal.py:handle_exit():78] Internal process exited diff --git a/wandb/run-20210715_021559-38yj0n5v/logs/debug.log b/wandb/run-20210715_021559-38yj0n5v/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..e91cdc93b3bf345317b8fdd4cf8068c1cd536cf8 --- /dev/null +++ b/wandb/run-20210715_021559-38yj0n5v/logs/debug.log @@ -0,0 +1,119 @@ +2021-07-15 02:15:59,047 INFO MainThread:643445 [wandb_setup.py:_flush():69] setting env: {} +2021-07-15 02:15:59,047 INFO MainThread:643445 [wandb_setup.py:_flush():69] setting login settings: {} +2021-07-15 02:15:59,047 INFO MainThread:643445 [wandb_init.py:_log_setup():337] Logging user logs to /home/dat/pino-roberta-base/wandb/run-20210715_021559-38yj0n5v/logs/debug.log +2021-07-15 02:15:59,047 INFO MainThread:643445 [wandb_init.py:_log_setup():338] Logging internal logs to /home/dat/pino-roberta-base/wandb/run-20210715_021559-38yj0n5v/logs/debug-internal.log +2021-07-15 02:15:59,048 INFO MainThread:643445 [wandb_init.py:init():370] calling init triggers +2021-07-15 02:15:59,048 INFO MainThread:643445 [wandb_init.py:init():375] wandb.init called with sweep_config: {} +config: {} +2021-07-15 02:15:59,048 INFO MainThread:643445 [wandb_init.py:init():419] starting backend +2021-07-15 02:15:59,048 INFO MainThread:643445 [backend.py:_multiprocessing_setup():70] multiprocessing start_methods=fork,spawn,forkserver, using: spawn +2021-07-15 02:15:59,097 INFO MainThread:643445 [backend.py:ensure_launched():135] starting backend process... +2021-07-15 02:15:59,145 INFO MainThread:643445 [backend.py:ensure_launched():139] started backend process with pid: 644701 +2021-07-15 02:15:59,147 INFO MainThread:643445 [wandb_init.py:init():424] backend started and connected +2021-07-15 02:15:59,150 INFO MainThread:643445 [wandb_init.py:init():472] updated telemetry +2021-07-15 02:15:59,151 INFO MainThread:643445 [wandb_init.py:init():491] communicating current version +2021-07-15 02:15:59,832 INFO MainThread:643445 [wandb_init.py:init():496] got version response +2021-07-15 02:15:59,832 INFO MainThread:643445 [wandb_init.py:init():504] communicating run to backend with 30 second timeout +2021-07-15 02:16:00,007 INFO MainThread:643445 [wandb_init.py:init():529] starting run threads in backend +2021-07-15 02:16:01,157 INFO MainThread:643445 [wandb_run.py:_console_start():1623] atexit reg +2021-07-15 02:16:01,158 INFO MainThread:643445 [wandb_run.py:_redirect():1497] redirect: SettingsConsole.REDIRECT +2021-07-15 02:16:01,158 INFO MainThread:643445 [wandb_run.py:_redirect():1502] Redirecting console. +2021-07-15 02:16:01,160 INFO MainThread:643445 [wandb_run.py:_redirect():1558] Redirects installed. +2021-07-15 02:16:01,160 INFO MainThread:643445 [wandb_init.py:init():554] run started, returning control to user process +2021-07-15 02:16:01,168 INFO MainThread:643445 [wandb_run.py:_config_callback():872] config_cb None None {'output_dir': './', 'overwrite_output_dir': True, 'do_train': False, 'do_eval': False, 'do_predict': False, 'evaluation_strategy': 'IntervalStrategy.NO', 'prediction_loss_only': False, 'per_device_train_batch_size': 1, 'per_device_eval_batch_size': 1, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'learning_rate': 3e-05, 'weight_decay': 0.0095, 'adam_beta1': 0.9, 'adam_beta2': 0.98, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 5.0, 'max_steps': -1, 'lr_scheduler_type': 'SchedulerType.LINEAR', 'warmup_ratio': 0.0, 'warmup_steps': 10000, 'log_level': -1, 'log_level_replica': -1, 'log_on_each_node': True, 'logging_dir': './runs/Jul15_02-15-50_t1v-n-f5c06ea1-w-0', 'logging_strategy': 'IntervalStrategy.STEPS', 'logging_first_step': False, 'logging_steps': 50, 'save_strategy': 'IntervalStrategy.STEPS', 'save_steps': 20000, 'save_total_limit': 5, 'save_on_each_node': False, 'no_cuda': False, 'seed': 42, 'fp16': False, 'fp16_opt_level': 'O1', 'fp16_backend': 'auto', 'fp16_full_eval': False, 'local_rank': -1, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 20000, 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': './', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'sharded_ddp': [], 'deepspeed': None, 'label_smoothing_factor': 0.0, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'dataloader_pin_memory': True, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'push_to_hub_model_id': '', 'push_to_hub_organization': None, 'push_to_hub_token': None, 'mp_parameters': ''} +2021-07-15 02:16:01,170 INFO MainThread:643445 [wandb_run.py:_config_callback():872] config_cb None None {'model_name_or_path': None, 'model_type': 'big_bird', 'config_name': './', 'tokenizer_name': './', 'cache_dir': None, 'use_fast_tokenizer': True, 'dtype': 'float32'} +2021-07-15 02:16:01,171 INFO MainThread:643445 [wandb_run.py:_config_callback():872] config_cb None None {'dataset_name': None, 'dataset_config_name': None, 'train_ref_file': None, 'validation_ref_file': None, 'overwrite_cache': False, 'validation_split_percentage': 5, 'max_seq_length': 4096, 'preprocessing_num_workers': 96, 'mlm_probability': 0.15, 'pad_to_max_length': False, 'line_by_line': False, 'max_eval_samples': 500} +2021-07-15 02:32:02,250 INFO MainThread:643445 [wandb_run.py:_atexit_cleanup():1593] got exitcode: 1 +2021-07-15 02:32:02,251 INFO MainThread:643445 [wandb_run.py:_restore():1565] restore +2021-07-15 02:32:04,478 INFO MainThread:643445 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 1 +} +pusher_stats { + uploaded_bytes: 1375 + total_bytes: 1375 +} + +2021-07-15 02:32:04,750 INFO MainThread:643445 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 1 +} +pusher_stats { + uploaded_bytes: 1375 + total_bytes: 1375 +} + +2021-07-15 02:32:05,454 INFO MainThread:643445 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 3 +} +pusher_stats { + uploaded_bytes: 1375 + total_bytes: 6341 +} + +2021-07-15 02:32:05,557 INFO MainThread:643445 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 1375 + total_bytes: 10910 +} + +2021-07-15 02:32:05,659 INFO MainThread:643445 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 10910 + total_bytes: 10910 +} + +2021-07-15 02:32:05,761 INFO MainThread:643445 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 10910 + total_bytes: 10910 +} + +2021-07-15 02:32:05,864 INFO MainThread:643445 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 10910 + total_bytes: 10910 +} + +2021-07-15 02:32:05,966 INFO MainThread:643445 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 10910 + total_bytes: 10910 +} + +2021-07-15 02:32:06,068 INFO MainThread:643445 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 10910 + total_bytes: 10910 +} + +2021-07-15 02:32:06,371 INFO MainThread:643445 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 10910 + total_bytes: 10910 +} + +2021-07-15 02:32:06,473 INFO MainThread:643445 [wandb_run.py:_wait_for_finish():1715] got exit ret: done: true +exit_result { +} +file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 10910 + total_bytes: 10910 +} + +2021-07-15 02:32:07,796 INFO MainThread:643445 [wandb_run.py:_show_files():1937] logging synced files diff --git a/wandb/run-20210715_021559-38yj0n5v/run-38yj0n5v.wandb b/wandb/run-20210715_021559-38yj0n5v/run-38yj0n5v.wandb new file mode 100644 index 0000000000000000000000000000000000000000..6b02453346ab9329f375c2ab140df11df294fa14 Binary files /dev/null and b/wandb/run-20210715_021559-38yj0n5v/run-38yj0n5v.wandb differ diff --git a/wandb/run-20210715_023352-28io0kfl/files/config.yaml b/wandb/run-20210715_023352-28io0kfl/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a42d721992304ce9bb97246f89ea29b584ebaba0 --- /dev/null +++ b/wandb/run-20210715_023352-28io0kfl/files/config.yaml @@ -0,0 +1,304 @@ +wandb_version: 1 + +_wandb: + desc: null + value: + cli_version: 0.10.33 + framework: huggingface + huggingface_version: 4.9.0.dev0 + is_jupyter_run: false + is_kaggle_kernel: false + python_version: 3.8.10 + t: + 1: + - 3 + - 11 + 2: + - 3 + - 11 + 4: 3.8.10 + 5: 0.10.33 + 6: 4.9.0.dev0 + 8: + - 5 +adafactor: + desc: null + value: false +adam_beta1: + desc: null + value: 0.9 +adam_beta2: + desc: null + value: 0.98 +adam_epsilon: + desc: null + value: 1.0e-08 +cache_dir: + desc: null + value: null +config_name: + desc: null + value: ./ +dataloader_drop_last: + desc: null + value: false +dataloader_num_workers: + desc: null + value: 0 +dataloader_pin_memory: + desc: null + value: true +dataset_config_name: + desc: null + value: null +dataset_name: + desc: null + value: null +ddp_find_unused_parameters: + desc: null + value: null +debug: + desc: null + value: [] +deepspeed: + desc: null + value: null +disable_tqdm: + desc: null + value: false +do_eval: + desc: null + value: false +do_predict: + desc: null + value: false +do_train: + desc: null + value: false +dtype: + desc: null + value: float32 +eval_accumulation_steps: + desc: null + value: null +eval_steps: + desc: null + value: 20000 +evaluation_strategy: + desc: null + value: IntervalStrategy.NO +fp16: + desc: null + value: false +fp16_backend: + desc: null + value: auto +fp16_full_eval: + desc: null + value: false +fp16_opt_level: + desc: null + value: O1 +gradient_accumulation_steps: + desc: null + value: 1 +greater_is_better: + desc: null + value: null +group_by_length: + desc: null + value: false +ignore_data_skip: + desc: null + value: false +label_names: + desc: null + value: null +label_smoothing_factor: + desc: null + value: 0.0 +learning_rate: + desc: null + value: 3.0e-05 +length_column_name: + desc: null + value: length +line_by_line: + desc: null + value: false +load_best_model_at_end: + desc: null + value: false +local_rank: + desc: null + value: -1 +log_level: + desc: null + value: -1 +log_level_replica: + desc: null + value: -1 +log_on_each_node: + desc: null + value: true +logging_dir: + desc: null + value: ./runs/Jul15_02-33-44_t1v-n-f5c06ea1-w-0 +logging_first_step: + desc: null + value: false +logging_steps: + desc: null + value: 50 +logging_strategy: + desc: null + value: IntervalStrategy.STEPS +lr_scheduler_type: + desc: null + value: SchedulerType.LINEAR +max_eval_samples: + desc: null + value: 500 +max_grad_norm: + desc: null + value: 1.0 +max_seq_length: + desc: null + value: 4096 +max_steps: + desc: null + value: -1 +metric_for_best_model: + desc: null + value: null +mlm_probability: + desc: null + value: 0.15 +model_name_or_path: + desc: null + value: null +model_type: + desc: null + value: big_bird +mp_parameters: + desc: null + value: '' +no_cuda: + desc: null + value: false +num_train_epochs: + desc: null + value: 5.0 +output_dir: + desc: null + value: ./ +overwrite_cache: + desc: null + value: false +overwrite_output_dir: + desc: null + value: true +pad_to_max_length: + desc: null + value: false +past_index: + desc: null + value: -1 +per_device_eval_batch_size: + desc: null + value: 1 +per_device_train_batch_size: + desc: null + value: 1 +per_gpu_eval_batch_size: + desc: null + value: null +per_gpu_train_batch_size: + desc: null + value: null +prediction_loss_only: + desc: null + value: false +preprocessing_num_workers: + desc: null + value: 96 +push_to_hub: + desc: null + value: true +push_to_hub_model_id: + desc: null + value: '' +push_to_hub_organization: + desc: null + value: null +push_to_hub_token: + desc: null + value: null +remove_unused_columns: + desc: null + value: true +report_to: + desc: null + value: + - tensorboard + - wandb +resume_from_checkpoint: + desc: null + value: null +run_name: + desc: null + value: ./ +save_on_each_node: + desc: null + value: false +save_steps: + desc: null + value: 40000 +save_strategy: + desc: null + value: IntervalStrategy.STEPS +save_total_limit: + desc: null + value: 5 +seed: + desc: null + value: 42 +sharded_ddp: + desc: null + value: [] +skip_memory_metrics: + desc: null + value: true +tokenizer_name: + desc: null + value: ./ +tpu_metrics_debug: + desc: null + value: false +tpu_num_cores: + desc: null + value: null +train_ref_file: + desc: null + value: null +use_fast_tokenizer: + desc: null + value: true +use_legacy_prediction_loop: + desc: null + value: false +validation_ref_file: + desc: null + value: null +validation_split_percentage: + desc: null + value: 5 +warmup_ratio: + desc: null + value: 0.0 +warmup_steps: + desc: null + value: 10000 +weight_decay: + desc: null + value: 0.0095 diff --git a/wandb/run-20210715_023352-28io0kfl/files/output.log b/wandb/run-20210715_023352-28io0kfl/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..3095588aee53d621908c636ca74b90f1c6df4c7e --- /dev/null +++ b/wandb/run-20210715_023352-28io0kfl/files/output.log @@ -0,0 +1,37 @@ +/home/dat/pino/lib/python3.8/site-packages/jax/lib/xla_bridge.py:386: UserWarning: jax.host_count has been renamed to jax.process_count. This alias will eventually be removed; please update your code. + warnings.warn( +/home/dat/pino/lib/python3.8/site-packages/jax/lib/xla_bridge.py:373: UserWarning: jax.host_id has been renamed to jax.process_index. This alias will eventually be removed; please update your code. + warnings.warn( +Epoch ... (1/5): 0%| | 0/5 [00:00 + state, train_metric, dropout_rngs = p_train_step(state, model_inputs, dropout_rngs) + File "/home/dat/pino/lib/python3.8/site-packages/jax/_src/traceback_util.py", line 183, in reraise_with_filtered_traceback + return fun(*args, **kwargs) + File "/home/dat/pino/lib/python3.8/site-packages/jax/_src/api.py", line 1669, in f_pmapped + out = pxla.xla_pmap( + File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 1620, in bind + return call_bind(self, fun, *args, **params) + File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 1551, in call_bind + outs = primitive.process(top_trace, fun, tracers, params) + File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 1623, in process + return trace.process_map(self, fun, tracers, params) + File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 606, in process_call + return primitive.impl(f, *tracers, **params) + File "/home/dat/pino/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 637, in xla_pmap_impl + return compiled_fun(*args) + File "/home/dat/pino/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 1159, in execute_replicated + out_bufs = compiled.execute_sharded_on_local_devices(input_bufs) +jax._src.traceback_util.UnfilteredStackTrace: RuntimeError: Resource exhausted: Attempting to reserve 7.59G at the bottom of memory. That was not possible. There are 3.27G free, 0B reserved, and 3.22G reservable.: while running replica 0 and partition 0 of a replicated computation (other replicas may have failed as well). +The stack trace below excludes JAX-internal frames. +The preceding is the original exception that occurred, unmodified. +-------------------- +The above exception was the direct cause of the following exception: +Traceback (most recent call last): + File "./run_mlm_flax_no_accum.py", line 699, in + state, train_metric, dropout_rngs = p_train_step(state, model_inputs, dropout_rngs) + File "/home/dat/pino/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 1159, in execute_replicated + out_bufs = compiled.execute_sharded_on_local_devices(input_bufs) +RuntimeError: Resource exhausted: Attempting to reserve 7.59G at the bottom of memory. That was not possible. There are 3.27G free, 0B reserved, and 3.22G reservable.: while running replica 0 and partition 0 of a replicated computation (other replicas may have failed as well). \ No newline at end of file diff --git a/wandb/run-20210715_023352-28io0kfl/files/requirements.txt b/wandb/run-20210715_023352-28io0kfl/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..554e3a34bc91714a8462f65295a12e9a04537637 --- /dev/null +++ b/wandb/run-20210715_023352-28io0kfl/files/requirements.txt @@ -0,0 +1,94 @@ +absl-py==0.13.0 +aiohttp==3.7.4.post0 +astunparse==1.6.3 +async-timeout==3.0.1 +attrs==21.2.0 +cachetools==4.2.2 +certifi==2021.5.30 +chardet==4.0.0 +charset-normalizer==2.0.1 +chex==0.0.8 +click==8.0.1 +configparser==5.0.2 +cycler==0.10.0 +datasets==1.9.1.dev0 +dill==0.3.4 +dm-tree==0.1.6 +docker-pycreds==0.4.0 +filelock==3.0.12 +flatbuffers==1.12 +flax==0.3.4 +fsspec==2021.7.0 +gast==0.4.0 +gitdb==4.0.7 +gitpython==3.1.18 +google-auth-oauthlib==0.4.4 +google-auth==1.32.1 +google-pasta==0.2.0 +grpcio==1.34.1 +h5py==3.1.0 +huggingface-hub==0.0.12 +idna==3.2 +install==1.3.4 +jax==0.2.17 +jaxlib==0.1.68 +joblib==1.0.1 +keras-nightly==2.5.0.dev2021032900 +keras-preprocessing==1.1.2 +kiwisolver==1.3.1 +libtpu-nightly==0.1.dev20210615 +markdown==3.3.4 +matplotlib==3.4.2 +msgpack==1.0.2 +multidict==5.1.0 +multiprocess==0.70.12.2 +numpy==1.19.5 +oauthlib==3.1.1 +opt-einsum==3.3.0 +optax==0.0.9 +packaging==21.0 +pandas==1.3.0 +pathtools==0.1.2 +pillow==8.3.1 +pip==20.0.2 +pkg-resources==0.0.0 +promise==2.3 +protobuf==3.17.3 +psutil==5.8.0 +pyarrow==4.0.1 +pyasn1-modules==0.2.8 +pyasn1==0.4.8 +pyparsing==2.4.7 +python-dateutil==2.8.1 +pytz==2021.1 +pyyaml==5.4.1 +regex==2021.7.6 +requests-oauthlib==1.3.0 +requests==2.26.0 +rsa==4.7.2 +sacremoses==0.0.45 +scipy==1.7.0 +sentry-sdk==1.3.0 +setuptools==44.0.0 +shortuuid==1.0.1 +six==1.15.0 +smmap==4.0.0 +subprocess32==3.5.4 +tensorboard-data-server==0.6.1 +tensorboard-plugin-wit==1.8.0 +tensorboard==2.5.0 +tensorflow-estimator==2.5.0 +tensorflow==2.5.0 +termcolor==1.1.0 +tokenizers==0.10.3 +toolz==0.11.1 +tqdm==4.61.2 +transformers==4.9.0.dev0 +typing-extensions==3.7.4.3 +urllib3==1.26.6 +wandb==0.10.33 +werkzeug==2.0.1 +wheel==0.36.2 +wrapt==1.12.1 +xxhash==2.0.2 +yarl==1.6.3 \ No newline at end of file diff --git a/wandb/run-20210715_023352-28io0kfl/files/wandb-metadata.json b/wandb/run-20210715_023352-28io0kfl/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..db2b124315a638730382ab7ed56405d4b7c206b2 --- /dev/null +++ b/wandb/run-20210715_023352-28io0kfl/files/wandb-metadata.json @@ -0,0 +1,44 @@ +{ + "os": "Linux-5.4.0-1043-gcp-x86_64-with-glibc2.29", + "python": "3.8.10", + "heartbeatAt": "2021-07-15T02:33:54.743234", + "startedAt": "2021-07-15T02:33:52.730317", + "docker": null, + "cpu_count": 96, + "cuda": null, + "args": [ + "--push_to_hub", + "--output_dir=./", + "--model_type=big_bird", + "--config_name=./", + "--tokenizer_name=./", + "--max_seq_length=4096", + "--weight_decay=0.0095", + "--warmup_steps=10000", + "--overwrite_output_dir", + "--adam_beta1=0.9", + "--adam_beta2=0.98", + "--logging_steps=50", + "--eval_steps=20000", + "--num_train_epochs=5", + "--preprocessing_num_workers=96", + "--save_steps=40000", + "--learning_rate=3e-5", + "--per_device_train_batch_size=1", + "--per_device_eval_batch_size=1", + "--save_total_limit=5", + "--max_eval_samples=500" + ], + "state": "running", + "program": "./run_mlm_flax_no_accum.py", + "codePath": "run_mlm_flax_no_accum.py", + "git": { + "remote": "https://huggingface.co/flax-community/pino-roberta-base", + "commit": "87e02e7ff8fbaea90c8c4ad1c984f83742432303" + }, + "email": null, + "root": "/home/dat/pino-roberta-base", + "host": "t1v-n-f5c06ea1-w-0", + "username": "dat", + "executable": "/home/dat/pino/bin/python" +} diff --git a/wandb/run-20210715_023352-28io0kfl/files/wandb-summary.json b/wandb/run-20210715_023352-28io0kfl/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/wandb/run-20210715_023352-28io0kfl/files/wandb-summary.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/wandb/run-20210715_023352-28io0kfl/logs/debug-internal.log b/wandb/run-20210715_023352-28io0kfl/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..f2fc0b65366f0b6b10fdcdd0826c8896e6fff780 --- /dev/null +++ b/wandb/run-20210715_023352-28io0kfl/logs/debug-internal.log @@ -0,0 +1,268 @@ +2021-07-15 02:33:53,426 INFO MainThread:647413 [internal.py:wandb_internal():88] W&B internal server running at pid: 647413, started at: 2021-07-15 02:33:53.426396 +2021-07-15 02:33:53,428 INFO WriterThread:647413 [datastore.py:open_for_write():80] open: /home/dat/pino-roberta-base/wandb/run-20210715_023352-28io0kfl/run-28io0kfl.wandb +2021-07-15 02:33:53,429 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: check_version +2021-07-15 02:33:53,430 DEBUG SenderThread:647413 [sender.py:send():179] send: header +2021-07-15 02:33:53,430 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: check_version +2021-07-15 02:33:53,471 DEBUG SenderThread:647413 [sender.py:send():179] send: run +2021-07-15 02:33:53,641 INFO SenderThread:647413 [dir_watcher.py:__init__():168] watching files in: /home/dat/pino-roberta-base/wandb/run-20210715_023352-28io0kfl/files +2021-07-15 02:33:53,641 INFO SenderThread:647413 [sender.py:_start_run_threads():716] run started: 28io0kfl with start time 1626316432 +2021-07-15 02:33:53,641 DEBUG SenderThread:647413 [sender.py:send():179] send: summary +2021-07-15 02:33:53,641 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: run_start +2021-07-15 02:33:53,642 INFO SenderThread:647413 [sender.py:_save_file():841] saving file wandb-summary.json with policy end +2021-07-15 02:33:54,643 INFO Thread-8 :647413 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210715_023352-28io0kfl/files/wandb-summary.json +2021-07-15 02:33:54,742 DEBUG HandlerThread:647413 [meta.py:__init__():39] meta init +2021-07-15 02:33:54,743 DEBUG HandlerThread:647413 [meta.py:__init__():53] meta init done +2021-07-15 02:33:54,743 DEBUG HandlerThread:647413 [meta.py:probe():210] probe +2021-07-15 02:33:54,744 DEBUG HandlerThread:647413 [meta.py:_setup_git():200] setup git +2021-07-15 02:33:54,775 DEBUG HandlerThread:647413 [meta.py:_setup_git():207] setup git done +2021-07-15 02:33:54,775 DEBUG HandlerThread:647413 [meta.py:_save_pip():57] save pip +2021-07-15 02:33:54,775 DEBUG HandlerThread:647413 [meta.py:_save_pip():71] save pip done +2021-07-15 02:33:54,775 DEBUG HandlerThread:647413 [meta.py:probe():252] probe done +2021-07-15 02:33:54,778 DEBUG SenderThread:647413 [sender.py:send():179] send: files +2021-07-15 02:33:54,779 INFO SenderThread:647413 [sender.py:_save_file():841] saving file wandb-metadata.json with policy now +2021-07-15 02:33:54,786 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:33:54,787 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:33:54,917 DEBUG SenderThread:647413 [sender.py:send():179] send: config +2021-07-15 02:33:54,917 DEBUG SenderThread:647413 [sender.py:send():179] send: config +2021-07-15 02:33:54,917 DEBUG SenderThread:647413 [sender.py:send():179] send: config +2021-07-15 02:33:55,232 INFO Thread-11 :647413 [upload_job.py:push():137] Uploaded file /tmp/tmp3vyhbjkzwandb/34s07tos-wandb-metadata.json +2021-07-15 02:33:55,643 INFO Thread-8 :647413 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210715_023352-28io0kfl/files/requirements.txt +2021-07-15 02:33:55,643 INFO Thread-8 :647413 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210715_023352-28io0kfl/files/wandb-metadata.json +2021-07-15 02:33:55,643 INFO Thread-8 :647413 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210715_023352-28io0kfl/files/output.log +2021-07-15 02:34:09,649 INFO Thread-8 :647413 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_023352-28io0kfl/files/output.log +2021-07-15 02:34:09,919 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:34:09,919 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:34:22,827 DEBUG SenderThread:647413 [sender.py:send():179] send: stats +2021-07-15 02:34:24,656 INFO Thread-8 :647413 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_023352-28io0kfl/files/config.yaml +2021-07-15 02:34:25,052 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:34:25,052 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:34:40,185 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:34:40,186 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:34:52,904 DEBUG SenderThread:647413 [sender.py:send():179] send: stats +2021-07-15 02:34:55,321 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:34:55,321 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:35:10,455 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:35:10,455 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:35:22,977 DEBUG SenderThread:647413 [sender.py:send():179] send: stats +2021-07-15 02:35:25,587 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:35:25,587 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:35:40,721 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:35:40,722 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:35:53,062 DEBUG SenderThread:647413 [sender.py:send():179] send: stats +2021-07-15 02:35:55,856 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:35:55,856 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:36:10,989 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:36:10,990 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:36:23,136 DEBUG SenderThread:647413 [sender.py:send():179] send: stats +2021-07-15 02:36:26,122 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:36:26,123 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:36:41,256 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:36:41,257 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:36:53,204 DEBUG SenderThread:647413 [sender.py:send():179] send: stats +2021-07-15 02:36:56,393 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:36:56,394 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:37:11,526 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:37:11,526 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:37:23,277 DEBUG SenderThread:647413 [sender.py:send():179] send: stats +2021-07-15 02:37:26,659 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:37:26,659 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:37:41,793 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:37:41,793 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:37:53,344 DEBUG SenderThread:647413 [sender.py:send():179] send: stats +2021-07-15 02:37:56,927 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:37:56,928 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:38:12,060 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:38:12,060 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:38:23,410 DEBUG SenderThread:647413 [sender.py:send():179] send: stats +2021-07-15 02:38:27,194 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:38:27,194 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:38:42,326 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:38:42,326 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:38:53,475 DEBUG SenderThread:647413 [sender.py:send():179] send: stats +2021-07-15 02:38:57,457 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:38:57,457 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:39:12,589 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:39:12,589 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:39:23,542 DEBUG SenderThread:647413 [sender.py:send():179] send: stats +2021-07-15 02:39:27,728 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:39:27,728 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:39:42,860 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:39:42,860 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:39:53,613 DEBUG SenderThread:647413 [sender.py:send():179] send: stats +2021-07-15 02:39:57,993 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:39:57,994 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:40:13,128 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:40:13,128 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:40:23,681 DEBUG SenderThread:647413 [sender.py:send():179] send: stats +2021-07-15 02:40:28,265 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:40:28,266 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:40:43,401 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:40:43,401 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:40:53,753 DEBUG SenderThread:647413 [sender.py:send():179] send: stats +2021-07-15 02:40:58,548 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:40:58,549 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:41:13,683 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:41:13,684 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:41:23,828 DEBUG SenderThread:647413 [sender.py:send():179] send: stats +2021-07-15 02:41:28,827 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:41:28,827 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:41:43,958 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:41:43,958 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:41:53,904 DEBUG SenderThread:647413 [sender.py:send():179] send: stats +2021-07-15 02:41:59,090 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:41:59,091 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:42:14,225 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:42:14,225 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:42:23,978 DEBUG SenderThread:647413 [sender.py:send():179] send: stats +2021-07-15 02:42:31,120 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:42:31,120 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:42:46,253 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:42:46,253 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:42:54,050 DEBUG SenderThread:647413 [sender.py:send():179] send: stats +2021-07-15 02:43:01,385 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:43:01,385 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:43:16,523 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:43:16,524 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:43:24,121 DEBUG SenderThread:647413 [sender.py:send():179] send: stats +2021-07-15 02:43:31,656 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:43:31,657 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:43:46,789 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:43:46,790 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:43:54,190 DEBUG SenderThread:647413 [sender.py:send():179] send: stats +2021-07-15 02:44:01,924 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:44:01,925 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:44:17,056 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:44:17,057 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:44:24,264 DEBUG SenderThread:647413 [sender.py:send():179] send: stats +2021-07-15 02:44:32,190 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:44:32,190 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:44:47,325 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:44:47,326 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:44:51,894 INFO Thread-8 :647413 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_023352-28io0kfl/files/output.log +2021-07-15 02:44:54,337 DEBUG SenderThread:647413 [sender.py:send():179] send: stats +2021-07-15 02:45:02,471 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:45:02,472 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:45:17,619 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:45:17,619 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:45:24,415 DEBUG SenderThread:647413 [sender.py:send():179] send: stats +2021-07-15 02:45:32,753 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:45:32,754 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:45:47,896 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:45:47,897 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:45:54,500 DEBUG SenderThread:647413 [sender.py:send():179] send: stats +2021-07-15 02:46:03,028 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:46:03,028 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:46:18,161 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:46:18,162 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:46:24,580 DEBUG SenderThread:647413 [sender.py:send():179] send: stats +2021-07-15 02:46:33,296 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:46:33,297 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:46:48,441 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:46:48,441 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:46:54,662 DEBUG SenderThread:647413 [sender.py:send():179] send: stats +2021-07-15 02:46:57,942 INFO Thread-8 :647413 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_023352-28io0kfl/files/output.log +2021-07-15 02:46:58,807 DEBUG SenderThread:647413 [sender.py:send():179] send: telemetry +2021-07-15 02:46:58,807 DEBUG SenderThread:647413 [sender.py:send():179] send: exit +2021-07-15 02:46:58,807 INFO SenderThread:647413 [sender.py:send_exit():287] handling exit code: 1 +2021-07-15 02:46:58,809 INFO SenderThread:647413 [sender.py:send_exit():295] send defer +2021-07-15 02:46:58,809 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 02:46:58,809 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: defer +2021-07-15 02:46:58,809 INFO HandlerThread:647413 [handler.py:handle_request_defer():141] handle defer: 0 +2021-07-15 02:46:58,810 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 02:46:58,810 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: defer +2021-07-15 02:46:58,810 INFO SenderThread:647413 [sender.py:send_request_defer():304] handle sender defer: 0 +2021-07-15 02:46:58,810 INFO SenderThread:647413 [sender.py:transition_state():308] send defer: 1 +2021-07-15 02:46:58,811 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: defer +2021-07-15 02:46:58,811 INFO HandlerThread:647413 [handler.py:handle_request_defer():141] handle defer: 1 +2021-07-15 02:46:58,873 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: defer +2021-07-15 02:46:58,873 INFO SenderThread:647413 [sender.py:send_request_defer():304] handle sender defer: 1 +2021-07-15 02:46:58,873 INFO SenderThread:647413 [sender.py:transition_state():308] send defer: 2 +2021-07-15 02:46:58,874 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: defer +2021-07-15 02:46:58,874 DEBUG SenderThread:647413 [sender.py:send():179] send: stats +2021-07-15 02:46:58,874 INFO HandlerThread:647413 [handler.py:handle_request_defer():141] handle defer: 2 +2021-07-15 02:46:58,875 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: defer +2021-07-15 02:46:58,875 INFO SenderThread:647413 [sender.py:send_request_defer():304] handle sender defer: 2 +2021-07-15 02:46:58,875 INFO SenderThread:647413 [sender.py:transition_state():308] send defer: 3 +2021-07-15 02:46:58,875 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: defer +2021-07-15 02:46:58,876 INFO HandlerThread:647413 [handler.py:handle_request_defer():141] handle defer: 3 +2021-07-15 02:46:58,876 DEBUG SenderThread:647413 [sender.py:send():179] send: summary +2021-07-15 02:46:58,876 INFO SenderThread:647413 [sender.py:_save_file():841] saving file wandb-summary.json with policy end +2021-07-15 02:46:58,877 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: defer +2021-07-15 02:46:58,877 INFO SenderThread:647413 [sender.py:send_request_defer():304] handle sender defer: 3 +2021-07-15 02:46:58,877 INFO SenderThread:647413 [sender.py:transition_state():308] send defer: 4 +2021-07-15 02:46:58,877 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: defer +2021-07-15 02:46:58,877 INFO HandlerThread:647413 [handler.py:handle_request_defer():141] handle defer: 4 +2021-07-15 02:46:58,877 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: defer +2021-07-15 02:46:58,877 INFO SenderThread:647413 [sender.py:send_request_defer():304] handle sender defer: 4 +2021-07-15 02:46:58,913 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 02:46:58,943 INFO Thread-8 :647413 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_023352-28io0kfl/files/wandb-summary.json +2021-07-15 02:46:58,943 INFO Thread-8 :647413 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_023352-28io0kfl/files/output.log +2021-07-15 02:46:59,055 INFO SenderThread:647413 [sender.py:transition_state():308] send defer: 5 +2021-07-15 02:46:59,055 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 02:46:59,055 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: defer +2021-07-15 02:46:59,055 INFO HandlerThread:647413 [handler.py:handle_request_defer():141] handle defer: 5 +2021-07-15 02:46:59,056 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: defer +2021-07-15 02:46:59,056 INFO SenderThread:647413 [sender.py:send_request_defer():304] handle sender defer: 5 +2021-07-15 02:46:59,056 INFO SenderThread:647413 [dir_watcher.py:finish():282] shutting down directory watcher +2021-07-15 02:46:59,157 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 02:46:59,943 INFO Thread-8 :647413 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_023352-28io0kfl/files/config.yaml +2021-07-15 02:46:59,944 INFO SenderThread:647413 [dir_watcher.py:finish():312] scan: /home/dat/pino-roberta-base/wandb/run-20210715_023352-28io0kfl/files +2021-07-15 02:46:59,944 INFO SenderThread:647413 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_023352-28io0kfl/files/requirements.txt requirements.txt +2021-07-15 02:46:59,944 INFO SenderThread:647413 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_023352-28io0kfl/files/output.log output.log +2021-07-15 02:46:59,945 INFO SenderThread:647413 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_023352-28io0kfl/files/wandb-metadata.json wandb-metadata.json +2021-07-15 02:46:59,945 INFO SenderThread:647413 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_023352-28io0kfl/files/config.yaml config.yaml +2021-07-15 02:46:59,948 INFO SenderThread:647413 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_023352-28io0kfl/files/wandb-summary.json wandb-summary.json +2021-07-15 02:46:59,951 INFO SenderThread:647413 [sender.py:transition_state():308] send defer: 6 +2021-07-15 02:46:59,951 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 02:46:59,952 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: defer +2021-07-15 02:46:59,953 INFO HandlerThread:647413 [handler.py:handle_request_defer():141] handle defer: 6 +2021-07-15 02:46:59,956 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: defer +2021-07-15 02:46:59,956 INFO SenderThread:647413 [sender.py:send_request_defer():304] handle sender defer: 6 +2021-07-15 02:46:59,956 INFO SenderThread:647413 [file_pusher.py:finish():177] shutting down file pusher +2021-07-15 02:47:00,054 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 02:47:00,054 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 02:47:00,157 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 02:47:00,157 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 02:47:00,259 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 02:47:00,259 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 02:47:00,361 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 02:47:00,362 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 02:47:00,377 INFO Thread-14 :647413 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210715_023352-28io0kfl/files/config.yaml +2021-07-15 02:47:00,382 INFO Thread-12 :647413 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210715_023352-28io0kfl/files/requirements.txt +2021-07-15 02:47:00,415 INFO Thread-15 :647413 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210715_023352-28io0kfl/files/wandb-summary.json +2021-07-15 02:47:00,439 INFO Thread-13 :647413 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210715_023352-28io0kfl/files/output.log +2021-07-15 02:47:00,464 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 02:47:00,464 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 02:47:00,566 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 02:47:00,566 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 02:47:00,640 INFO Thread-7 :647413 [sender.py:transition_state():308] send defer: 7 +2021-07-15 02:47:00,640 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: defer +2021-07-15 02:47:00,640 INFO HandlerThread:647413 [handler.py:handle_request_defer():141] handle defer: 7 +2021-07-15 02:47:00,641 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: defer +2021-07-15 02:47:00,641 INFO SenderThread:647413 [sender.py:send_request_defer():304] handle sender defer: 7 +2021-07-15 02:47:00,668 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 02:47:00,919 INFO SenderThread:647413 [sender.py:transition_state():308] send defer: 8 +2021-07-15 02:47:00,920 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 02:47:00,920 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: defer +2021-07-15 02:47:00,920 INFO HandlerThread:647413 [handler.py:handle_request_defer():141] handle defer: 8 +2021-07-15 02:47:00,921 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: defer +2021-07-15 02:47:00,921 INFO SenderThread:647413 [sender.py:send_request_defer():304] handle sender defer: 8 +2021-07-15 02:47:00,921 INFO SenderThread:647413 [sender.py:transition_state():308] send defer: 9 +2021-07-15 02:47:00,921 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: defer +2021-07-15 02:47:00,921 INFO HandlerThread:647413 [handler.py:handle_request_defer():141] handle defer: 9 +2021-07-15 02:47:00,921 DEBUG SenderThread:647413 [sender.py:send():179] send: final +2021-07-15 02:47:00,922 DEBUG SenderThread:647413 [sender.py:send():179] send: footer +2021-07-15 02:47:00,922 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: defer +2021-07-15 02:47:00,922 INFO SenderThread:647413 [sender.py:send_request_defer():304] handle sender defer: 9 +2021-07-15 02:47:01,022 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 02:47:01,022 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 02:47:01,022 INFO SenderThread:647413 [file_pusher.py:join():182] waiting for file pusher +2021-07-15 02:47:01,024 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: get_summary +2021-07-15 02:47:01,024 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: sampled_history +2021-07-15 02:47:01,025 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: shutdown +2021-07-15 02:47:01,025 INFO HandlerThread:647413 [handler.py:finish():638] shutting down handler +2021-07-15 02:47:01,922 INFO WriterThread:647413 [datastore.py:close():288] close: /home/dat/pino-roberta-base/wandb/run-20210715_023352-28io0kfl/run-28io0kfl.wandb +2021-07-15 02:47:02,023 INFO SenderThread:647413 [sender.py:finish():945] shutting down sender +2021-07-15 02:47:02,023 INFO SenderThread:647413 [file_pusher.py:finish():177] shutting down file pusher +2021-07-15 02:47:02,023 INFO SenderThread:647413 [file_pusher.py:join():182] waiting for file pusher +2021-07-15 02:47:02,026 INFO MainThread:647413 [internal.py:handle_exit():78] Internal process exited diff --git a/wandb/run-20210715_023352-28io0kfl/logs/debug.log b/wandb/run-20210715_023352-28io0kfl/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..69cd55b897cd7f72fd6b8813ca8aa7866a19b8a0 --- /dev/null +++ b/wandb/run-20210715_023352-28io0kfl/logs/debug.log @@ -0,0 +1,119 @@ +2021-07-15 02:33:52,731 INFO MainThread:646155 [wandb_setup.py:_flush():69] setting env: {} +2021-07-15 02:33:52,732 INFO MainThread:646155 [wandb_setup.py:_flush():69] setting login settings: {} +2021-07-15 02:33:52,732 INFO MainThread:646155 [wandb_init.py:_log_setup():337] Logging user logs to /home/dat/pino-roberta-base/wandb/run-20210715_023352-28io0kfl/logs/debug.log +2021-07-15 02:33:52,732 INFO MainThread:646155 [wandb_init.py:_log_setup():338] Logging internal logs to /home/dat/pino-roberta-base/wandb/run-20210715_023352-28io0kfl/logs/debug-internal.log +2021-07-15 02:33:52,732 INFO MainThread:646155 [wandb_init.py:init():370] calling init triggers +2021-07-15 02:33:52,732 INFO MainThread:646155 [wandb_init.py:init():375] wandb.init called with sweep_config: {} +config: {} +2021-07-15 02:33:52,732 INFO MainThread:646155 [wandb_init.py:init():419] starting backend +2021-07-15 02:33:52,732 INFO MainThread:646155 [backend.py:_multiprocessing_setup():70] multiprocessing start_methods=fork,spawn,forkserver, using: spawn +2021-07-15 02:33:52,780 INFO MainThread:646155 [backend.py:ensure_launched():135] starting backend process... +2021-07-15 02:33:52,826 INFO MainThread:646155 [backend.py:ensure_launched():139] started backend process with pid: 647413 +2021-07-15 02:33:52,828 INFO MainThread:646155 [wandb_init.py:init():424] backend started and connected +2021-07-15 02:33:52,831 INFO MainThread:646155 [wandb_init.py:init():472] updated telemetry +2021-07-15 02:33:52,832 INFO MainThread:646155 [wandb_init.py:init():491] communicating current version +2021-07-15 02:33:53,470 INFO MainThread:646155 [wandb_init.py:init():496] got version response +2021-07-15 02:33:53,470 INFO MainThread:646155 [wandb_init.py:init():504] communicating run to backend with 30 second timeout +2021-07-15 02:33:53,641 INFO MainThread:646155 [wandb_init.py:init():529] starting run threads in backend +2021-07-15 02:33:54,781 INFO MainThread:646155 [wandb_run.py:_console_start():1623] atexit reg +2021-07-15 02:33:54,782 INFO MainThread:646155 [wandb_run.py:_redirect():1497] redirect: SettingsConsole.REDIRECT +2021-07-15 02:33:54,783 INFO MainThread:646155 [wandb_run.py:_redirect():1502] Redirecting console. +2021-07-15 02:33:54,784 INFO MainThread:646155 [wandb_run.py:_redirect():1558] Redirects installed. +2021-07-15 02:33:54,785 INFO MainThread:646155 [wandb_init.py:init():554] run started, returning control to user process +2021-07-15 02:33:54,790 INFO MainThread:646155 [wandb_run.py:_config_callback():872] config_cb None None {'output_dir': './', 'overwrite_output_dir': True, 'do_train': False, 'do_eval': False, 'do_predict': False, 'evaluation_strategy': 'IntervalStrategy.NO', 'prediction_loss_only': False, 'per_device_train_batch_size': 1, 'per_device_eval_batch_size': 1, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'learning_rate': 3e-05, 'weight_decay': 0.0095, 'adam_beta1': 0.9, 'adam_beta2': 0.98, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 5.0, 'max_steps': -1, 'lr_scheduler_type': 'SchedulerType.LINEAR', 'warmup_ratio': 0.0, 'warmup_steps': 10000, 'log_level': -1, 'log_level_replica': -1, 'log_on_each_node': True, 'logging_dir': './runs/Jul15_02-33-44_t1v-n-f5c06ea1-w-0', 'logging_strategy': 'IntervalStrategy.STEPS', 'logging_first_step': False, 'logging_steps': 50, 'save_strategy': 'IntervalStrategy.STEPS', 'save_steps': 40000, 'save_total_limit': 5, 'save_on_each_node': False, 'no_cuda': False, 'seed': 42, 'fp16': False, 'fp16_opt_level': 'O1', 'fp16_backend': 'auto', 'fp16_full_eval': False, 'local_rank': -1, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 20000, 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': './', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'sharded_ddp': [], 'deepspeed': None, 'label_smoothing_factor': 0.0, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'dataloader_pin_memory': True, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'push_to_hub_model_id': '', 'push_to_hub_organization': None, 'push_to_hub_token': None, 'mp_parameters': ''} +2021-07-15 02:33:54,792 INFO MainThread:646155 [wandb_run.py:_config_callback():872] config_cb None None {'model_name_or_path': None, 'model_type': 'big_bird', 'config_name': './', 'tokenizer_name': './', 'cache_dir': None, 'use_fast_tokenizer': True, 'dtype': 'float32'} +2021-07-15 02:33:54,793 INFO MainThread:646155 [wandb_run.py:_config_callback():872] config_cb None None {'dataset_name': None, 'dataset_config_name': None, 'train_ref_file': None, 'validation_ref_file': None, 'overwrite_cache': False, 'validation_split_percentage': 5, 'max_seq_length': 4096, 'preprocessing_num_workers': 96, 'mlm_probability': 0.15, 'pad_to_max_length': False, 'line_by_line': False, 'max_eval_samples': 500} +2021-07-15 02:46:56,604 INFO MainThread:646155 [wandb_run.py:_atexit_cleanup():1593] got exitcode: 1 +2021-07-15 02:46:56,605 INFO MainThread:646155 [wandb_run.py:_restore():1565] restore +2021-07-15 02:46:58,811 INFO MainThread:646155 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 1 +} +pusher_stats { + uploaded_bytes: 1375 + total_bytes: 1375 +} + +2021-07-15 02:46:59,056 INFO MainThread:646155 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 1 +} +pusher_stats { + uploaded_bytes: 1375 + total_bytes: 1375 +} + +2021-07-15 02:46:59,953 INFO MainThread:646155 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 4 +} +pusher_stats { + uploaded_bytes: 1375 + total_bytes: 10904 +} + +2021-07-15 02:47:00,055 INFO MainThread:646155 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 1375 + total_bytes: 10906 +} + +2021-07-15 02:47:00,158 INFO MainThread:646155 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 10906 + total_bytes: 10906 +} + +2021-07-15 02:47:00,260 INFO MainThread:646155 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 10906 + total_bytes: 10906 +} + +2021-07-15 02:47:00,362 INFO MainThread:646155 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 10906 + total_bytes: 10906 +} + +2021-07-15 02:47:00,465 INFO MainThread:646155 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 10906 + total_bytes: 10906 +} + +2021-07-15 02:47:00,567 INFO MainThread:646155 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 10906 + total_bytes: 10906 +} + +2021-07-15 02:47:00,920 INFO MainThread:646155 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 10906 + total_bytes: 10906 +} + +2021-07-15 02:47:01,023 INFO MainThread:646155 [wandb_run.py:_wait_for_finish():1715] got exit ret: done: true +exit_result { +} +file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 10906 + total_bytes: 10906 +} + +2021-07-15 02:47:02,337 INFO MainThread:646155 [wandb_run.py:_show_files():1937] logging synced files diff --git a/wandb/run-20210715_023352-28io0kfl/run-28io0kfl.wandb b/wandb/run-20210715_023352-28io0kfl/run-28io0kfl.wandb new file mode 100644 index 0000000000000000000000000000000000000000..72581a4af5619a6fddfe3169e4eb0a1898353228 Binary files /dev/null and b/wandb/run-20210715_023352-28io0kfl/run-28io0kfl.wandb differ diff --git a/wandb/run-20210715_024816-39ztwpif/files/config.yaml b/wandb/run-20210715_024816-39ztwpif/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..93e0c7cb85b3270375c18b3346179388e2cce28f --- /dev/null +++ b/wandb/run-20210715_024816-39ztwpif/files/config.yaml @@ -0,0 +1,304 @@ +wandb_version: 1 + +_wandb: + desc: null + value: + cli_version: 0.10.33 + framework: huggingface + huggingface_version: 4.9.0.dev0 + is_jupyter_run: false + is_kaggle_kernel: false + python_version: 3.8.10 + t: + 1: + - 3 + - 11 + 2: + - 3 + - 11 + 4: 3.8.10 + 5: 0.10.33 + 6: 4.9.0.dev0 + 8: + - 5 +adafactor: + desc: null + value: false +adam_beta1: + desc: null + value: 0.9 +adam_beta2: + desc: null + value: 0.98 +adam_epsilon: + desc: null + value: 1.0e-08 +cache_dir: + desc: null + value: null +config_name: + desc: null + value: ./ +dataloader_drop_last: + desc: null + value: false +dataloader_num_workers: + desc: null + value: 0 +dataloader_pin_memory: + desc: null + value: true +dataset_config_name: + desc: null + value: null +dataset_name: + desc: null + value: null +ddp_find_unused_parameters: + desc: null + value: null +debug: + desc: null + value: [] +deepspeed: + desc: null + value: null +disable_tqdm: + desc: null + value: false +do_eval: + desc: null + value: false +do_predict: + desc: null + value: false +do_train: + desc: null + value: false +dtype: + desc: null + value: float32 +eval_accumulation_steps: + desc: null + value: null +eval_steps: + desc: null + value: 20000 +evaluation_strategy: + desc: null + value: IntervalStrategy.NO +fp16: + desc: null + value: false +fp16_backend: + desc: null + value: auto +fp16_full_eval: + desc: null + value: false +fp16_opt_level: + desc: null + value: O1 +gradient_accumulation_steps: + desc: null + value: 1 +greater_is_better: + desc: null + value: null +group_by_length: + desc: null + value: false +ignore_data_skip: + desc: null + value: false +label_names: + desc: null + value: null +label_smoothing_factor: + desc: null + value: 0.0 +learning_rate: + desc: null + value: 3.0e-05 +length_column_name: + desc: null + value: length +line_by_line: + desc: null + value: false +load_best_model_at_end: + desc: null + value: false +local_rank: + desc: null + value: -1 +log_level: + desc: null + value: -1 +log_level_replica: + desc: null + value: -1 +log_on_each_node: + desc: null + value: true +logging_dir: + desc: null + value: ./runs/Jul15_02-48-08_t1v-n-f5c06ea1-w-0 +logging_first_step: + desc: null + value: false +logging_steps: + desc: null + value: 50 +logging_strategy: + desc: null + value: IntervalStrategy.STEPS +lr_scheduler_type: + desc: null + value: SchedulerType.LINEAR +max_eval_samples: + desc: null + value: 500 +max_grad_norm: + desc: null + value: 1.0 +max_seq_length: + desc: null + value: 4096 +max_steps: + desc: null + value: -1 +metric_for_best_model: + desc: null + value: null +mlm_probability: + desc: null + value: 0.15 +model_name_or_path: + desc: null + value: null +model_type: + desc: null + value: big_bird +mp_parameters: + desc: null + value: '' +no_cuda: + desc: null + value: false +num_train_epochs: + desc: null + value: 5.0 +output_dir: + desc: null + value: ./ +overwrite_cache: + desc: null + value: false +overwrite_output_dir: + desc: null + value: true +pad_to_max_length: + desc: null + value: false +past_index: + desc: null + value: -1 +per_device_eval_batch_size: + desc: null + value: 1 +per_device_train_batch_size: + desc: null + value: 1 +per_gpu_eval_batch_size: + desc: null + value: null +per_gpu_train_batch_size: + desc: null + value: null +prediction_loss_only: + desc: null + value: false +preprocessing_num_workers: + desc: null + value: 96 +push_to_hub: + desc: null + value: true +push_to_hub_model_id: + desc: null + value: '' +push_to_hub_organization: + desc: null + value: null +push_to_hub_token: + desc: null + value: null +remove_unused_columns: + desc: null + value: true +report_to: + desc: null + value: + - tensorboard + - wandb +resume_from_checkpoint: + desc: null + value: null +run_name: + desc: null + value: ./ +save_on_each_node: + desc: null + value: false +save_steps: + desc: null + value: 40000 +save_strategy: + desc: null + value: IntervalStrategy.STEPS +save_total_limit: + desc: null + value: 5 +seed: + desc: null + value: 42 +sharded_ddp: + desc: null + value: [] +skip_memory_metrics: + desc: null + value: true +tokenizer_name: + desc: null + value: ./ +tpu_metrics_debug: + desc: null + value: false +tpu_num_cores: + desc: null + value: null +train_ref_file: + desc: null + value: null +use_fast_tokenizer: + desc: null + value: true +use_legacy_prediction_loop: + desc: null + value: false +validation_ref_file: + desc: null + value: null +validation_split_percentage: + desc: null + value: 5 +warmup_ratio: + desc: null + value: 0.0 +warmup_steps: + desc: null + value: 10000 +weight_decay: + desc: null + value: 0.0095 diff --git a/wandb/run-20210715_024816-39ztwpif/files/output.log b/wandb/run-20210715_024816-39ztwpif/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..4dbcde4b188825273d8c1abbcf33f7b5fcd6d602 --- /dev/null +++ b/wandb/run-20210715_024816-39ztwpif/files/output.log @@ -0,0 +1,37 @@ +/home/dat/pino/lib/python3.8/site-packages/jax/lib/xla_bridge.py:386: UserWarning: jax.host_count has been renamed to jax.process_count. This alias will eventually be removed; please update your code. + warnings.warn( +/home/dat/pino/lib/python3.8/site-packages/jax/lib/xla_bridge.py:373: UserWarning: jax.host_id has been renamed to jax.process_index. This alias will eventually be removed; please update your code. + warnings.warn( +Epoch ... (1/5): 0%| | 0/5 [00:00 + state, train_metric, dropout_rngs = p_train_step(state, model_inputs, dropout_rngs) + File "/home/dat/pino/lib/python3.8/site-packages/jax/_src/traceback_util.py", line 183, in reraise_with_filtered_traceback + return fun(*args, **kwargs) + File "/home/dat/pino/lib/python3.8/site-packages/jax/_src/api.py", line 1669, in f_pmapped + out = pxla.xla_pmap( + File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 1620, in bind + return call_bind(self, fun, *args, **params) + File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 1551, in call_bind + outs = primitive.process(top_trace, fun, tracers, params) + File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 1623, in process + return trace.process_map(self, fun, tracers, params) + File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 606, in process_call + return primitive.impl(f, *tracers, **params) + File "/home/dat/pino/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 637, in xla_pmap_impl + return compiled_fun(*args) + File "/home/dat/pino/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 1159, in execute_replicated + out_bufs = compiled.execute_sharded_on_local_devices(input_bufs) +jax._src.traceback_util.UnfilteredStackTrace: RuntimeError: Resource exhausted: Attempting to reserve 7.59G at the bottom of memory. That was not possible. There are 6.35G free, 0B reserved, and 6.31G reservable.: while running replica 0 and partition 0 of a replicated computation (other replicas may have failed as well). +The stack trace below excludes JAX-internal frames. +The preceding is the original exception that occurred, unmodified. +-------------------- +The above exception was the direct cause of the following exception: +Traceback (most recent call last): + File "./run_mlm_flax_no_accum.py", line 699, in + state, train_metric, dropout_rngs = p_train_step(state, model_inputs, dropout_rngs) + File "/home/dat/pino/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 1159, in execute_replicated + out_bufs = compiled.execute_sharded_on_local_devices(input_bufs) +RuntimeError: Resource exhausted: Attempting to reserve 7.59G at the bottom of memory. That was not possible. There are 6.35G free, 0B reserved, and 6.31G reservable.: while running replica 0 and partition 0 of a replicated computation (other replicas may have failed as well). \ No newline at end of file diff --git a/wandb/run-20210715_024816-39ztwpif/files/requirements.txt b/wandb/run-20210715_024816-39ztwpif/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..554e3a34bc91714a8462f65295a12e9a04537637 --- /dev/null +++ b/wandb/run-20210715_024816-39ztwpif/files/requirements.txt @@ -0,0 +1,94 @@ +absl-py==0.13.0 +aiohttp==3.7.4.post0 +astunparse==1.6.3 +async-timeout==3.0.1 +attrs==21.2.0 +cachetools==4.2.2 +certifi==2021.5.30 +chardet==4.0.0 +charset-normalizer==2.0.1 +chex==0.0.8 +click==8.0.1 +configparser==5.0.2 +cycler==0.10.0 +datasets==1.9.1.dev0 +dill==0.3.4 +dm-tree==0.1.6 +docker-pycreds==0.4.0 +filelock==3.0.12 +flatbuffers==1.12 +flax==0.3.4 +fsspec==2021.7.0 +gast==0.4.0 +gitdb==4.0.7 +gitpython==3.1.18 +google-auth-oauthlib==0.4.4 +google-auth==1.32.1 +google-pasta==0.2.0 +grpcio==1.34.1 +h5py==3.1.0 +huggingface-hub==0.0.12 +idna==3.2 +install==1.3.4 +jax==0.2.17 +jaxlib==0.1.68 +joblib==1.0.1 +keras-nightly==2.5.0.dev2021032900 +keras-preprocessing==1.1.2 +kiwisolver==1.3.1 +libtpu-nightly==0.1.dev20210615 +markdown==3.3.4 +matplotlib==3.4.2 +msgpack==1.0.2 +multidict==5.1.0 +multiprocess==0.70.12.2 +numpy==1.19.5 +oauthlib==3.1.1 +opt-einsum==3.3.0 +optax==0.0.9 +packaging==21.0 +pandas==1.3.0 +pathtools==0.1.2 +pillow==8.3.1 +pip==20.0.2 +pkg-resources==0.0.0 +promise==2.3 +protobuf==3.17.3 +psutil==5.8.0 +pyarrow==4.0.1 +pyasn1-modules==0.2.8 +pyasn1==0.4.8 +pyparsing==2.4.7 +python-dateutil==2.8.1 +pytz==2021.1 +pyyaml==5.4.1 +regex==2021.7.6 +requests-oauthlib==1.3.0 +requests==2.26.0 +rsa==4.7.2 +sacremoses==0.0.45 +scipy==1.7.0 +sentry-sdk==1.3.0 +setuptools==44.0.0 +shortuuid==1.0.1 +six==1.15.0 +smmap==4.0.0 +subprocess32==3.5.4 +tensorboard-data-server==0.6.1 +tensorboard-plugin-wit==1.8.0 +tensorboard==2.5.0 +tensorflow-estimator==2.5.0 +tensorflow==2.5.0 +termcolor==1.1.0 +tokenizers==0.10.3 +toolz==0.11.1 +tqdm==4.61.2 +transformers==4.9.0.dev0 +typing-extensions==3.7.4.3 +urllib3==1.26.6 +wandb==0.10.33 +werkzeug==2.0.1 +wheel==0.36.2 +wrapt==1.12.1 +xxhash==2.0.2 +yarl==1.6.3 \ No newline at end of file diff --git a/wandb/run-20210715_024816-39ztwpif/files/wandb-metadata.json b/wandb/run-20210715_024816-39ztwpif/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..1e617110bc59bb11c0d9eda85984045ee8ff0f56 --- /dev/null +++ b/wandb/run-20210715_024816-39ztwpif/files/wandb-metadata.json @@ -0,0 +1,44 @@ +{ + "os": "Linux-5.4.0-1043-gcp-x86_64-with-glibc2.29", + "python": "3.8.10", + "heartbeatAt": "2021-07-15T02:48:18.837710", + "startedAt": "2021-07-15T02:48:16.824799", + "docker": null, + "cpu_count": 96, + "cuda": null, + "args": [ + "--push_to_hub", + "--output_dir=./", + "--model_type=big_bird", + "--config_name=./", + "--tokenizer_name=./", + "--max_seq_length=4096", + "--weight_decay=0.0095", + "--warmup_steps=10000", + "--overwrite_output_dir", + "--adam_beta1=0.9", + "--adam_beta2=0.98", + "--logging_steps=50", + "--eval_steps=20000", + "--num_train_epochs=5", + "--preprocessing_num_workers=96", + "--save_steps=40000", + "--learning_rate=3e-5", + "--per_device_train_batch_size=1", + "--per_device_eval_batch_size=1", + "--save_total_limit=5", + "--max_eval_samples=500" + ], + "state": "running", + "program": "./run_mlm_flax_no_accum.py", + "codePath": "run_mlm_flax_no_accum.py", + "git": { + "remote": "https://huggingface.co/flax-community/pino-roberta-base", + "commit": "87e02e7ff8fbaea90c8c4ad1c984f83742432303" + }, + "email": null, + "root": "/home/dat/pino-roberta-base", + "host": "t1v-n-f5c06ea1-w-0", + "username": "dat", + "executable": "/home/dat/pino/bin/python" +} diff --git a/wandb/run-20210715_024816-39ztwpif/files/wandb-summary.json b/wandb/run-20210715_024816-39ztwpif/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/wandb/run-20210715_024816-39ztwpif/files/wandb-summary.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/wandb/run-20210715_024816-39ztwpif/logs/debug-internal.log b/wandb/run-20210715_024816-39ztwpif/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..3f166737d4ebc5608695a7c18a86154b382cd319 --- /dev/null +++ b/wandb/run-20210715_024816-39ztwpif/logs/debug-internal.log @@ -0,0 +1,240 @@ +2021-07-15 02:48:17,521 INFO MainThread:649905 [internal.py:wandb_internal():88] W&B internal server running at pid: 649905, started at: 2021-07-15 02:48:17.521263 +2021-07-15 02:48:17,523 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: check_version +2021-07-15 02:48:17,524 INFO WriterThread:649905 [datastore.py:open_for_write():80] open: /home/dat/pino-roberta-base/wandb/run-20210715_024816-39ztwpif/run-39ztwpif.wandb +2021-07-15 02:48:17,524 DEBUG SenderThread:649905 [sender.py:send():179] send: header +2021-07-15 02:48:17,525 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: check_version +2021-07-15 02:48:17,561 DEBUG SenderThread:649905 [sender.py:send():179] send: run +2021-07-15 02:48:17,732 INFO SenderThread:649905 [dir_watcher.py:__init__():168] watching files in: /home/dat/pino-roberta-base/wandb/run-20210715_024816-39ztwpif/files +2021-07-15 02:48:17,732 INFO SenderThread:649905 [sender.py:_start_run_threads():716] run started: 39ztwpif with start time 1626317296 +2021-07-15 02:48:17,732 DEBUG SenderThread:649905 [sender.py:send():179] send: summary +2021-07-15 02:48:17,732 INFO SenderThread:649905 [sender.py:_save_file():841] saving file wandb-summary.json with policy end +2021-07-15 02:48:17,733 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: run_start +2021-07-15 02:48:18,733 INFO Thread-8 :649905 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210715_024816-39ztwpif/files/wandb-summary.json +2021-07-15 02:48:18,837 DEBUG HandlerThread:649905 [meta.py:__init__():39] meta init +2021-07-15 02:48:18,837 DEBUG HandlerThread:649905 [meta.py:__init__():53] meta init done +2021-07-15 02:48:18,837 DEBUG HandlerThread:649905 [meta.py:probe():210] probe +2021-07-15 02:48:18,838 DEBUG HandlerThread:649905 [meta.py:_setup_git():200] setup git +2021-07-15 02:48:18,868 DEBUG HandlerThread:649905 [meta.py:_setup_git():207] setup git done +2021-07-15 02:48:18,868 DEBUG HandlerThread:649905 [meta.py:_save_pip():57] save pip +2021-07-15 02:48:18,868 DEBUG HandlerThread:649905 [meta.py:_save_pip():71] save pip done +2021-07-15 02:48:18,868 DEBUG HandlerThread:649905 [meta.py:probe():252] probe done +2021-07-15 02:48:18,871 DEBUG SenderThread:649905 [sender.py:send():179] send: files +2021-07-15 02:48:18,872 INFO SenderThread:649905 [sender.py:_save_file():841] saving file wandb-metadata.json with policy now +2021-07-15 02:48:18,877 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:48:18,878 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:48:19,006 DEBUG SenderThread:649905 [sender.py:send():179] send: config +2021-07-15 02:48:19,006 DEBUG SenderThread:649905 [sender.py:send():179] send: config +2021-07-15 02:48:19,006 DEBUG SenderThread:649905 [sender.py:send():179] send: config +2021-07-15 02:48:19,311 INFO Thread-11 :649905 [upload_job.py:push():137] Uploaded file /tmp/tmpgcnix6scwandb/15nx6xdi-wandb-metadata.json +2021-07-15 02:48:19,733 INFO Thread-8 :649905 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210715_024816-39ztwpif/files/requirements.txt +2021-07-15 02:48:19,733 INFO Thread-8 :649905 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210715_024816-39ztwpif/files/wandb-metadata.json +2021-07-15 02:48:19,733 INFO Thread-8 :649905 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210715_024816-39ztwpif/files/output.log +2021-07-15 02:48:33,738 INFO Thread-8 :649905 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_024816-39ztwpif/files/output.log +2021-07-15 02:48:34,008 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:48:34,009 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:48:46,922 DEBUG SenderThread:649905 [sender.py:send():179] send: stats +2021-07-15 02:48:48,744 INFO Thread-8 :649905 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_024816-39ztwpif/files/config.yaml +2021-07-15 02:48:49,147 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:48:49,147 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:49:04,279 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:49:04,280 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:49:17,003 DEBUG SenderThread:649905 [sender.py:send():179] send: stats +2021-07-15 02:49:19,412 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:49:19,412 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:49:34,543 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:49:34,543 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:49:47,079 DEBUG SenderThread:649905 [sender.py:send():179] send: stats +2021-07-15 02:49:49,677 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:49:49,677 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:50:04,809 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:50:04,809 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:50:17,143 DEBUG SenderThread:649905 [sender.py:send():179] send: stats +2021-07-15 02:50:19,943 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:50:19,943 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:50:35,077 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:50:35,078 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:50:47,219 DEBUG SenderThread:649905 [sender.py:send():179] send: stats +2021-07-15 02:50:50,223 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:50:50,223 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:51:05,389 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:51:05,389 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:51:17,291 DEBUG SenderThread:649905 [sender.py:send():179] send: stats +2021-07-15 02:51:20,521 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:51:20,521 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:51:35,655 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:51:35,655 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:51:47,368 DEBUG SenderThread:649905 [sender.py:send():179] send: stats +2021-07-15 02:51:50,786 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:51:50,786 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:52:05,917 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:52:05,917 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:52:17,445 DEBUG SenderThread:649905 [sender.py:send():179] send: stats +2021-07-15 02:52:21,058 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:52:21,058 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:52:36,188 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:52:36,189 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:52:47,519 DEBUG SenderThread:649905 [sender.py:send():179] send: stats +2021-07-15 02:52:51,318 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:52:51,318 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:53:06,454 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:53:06,454 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:53:17,587 DEBUG SenderThread:649905 [sender.py:send():179] send: stats +2021-07-15 02:53:21,586 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:53:21,586 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:53:36,717 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:53:36,718 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:53:47,654 DEBUG SenderThread:649905 [sender.py:send():179] send: stats +2021-07-15 02:53:51,851 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:53:51,851 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:54:06,983 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:54:06,983 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:54:17,727 DEBUG SenderThread:649905 [sender.py:send():179] send: stats +2021-07-15 02:54:22,115 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:54:22,115 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:54:37,245 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:54:37,246 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:54:47,796 DEBUG SenderThread:649905 [sender.py:send():179] send: stats +2021-07-15 02:54:52,379 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:54:52,379 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:55:07,511 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:55:07,511 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:55:17,864 DEBUG SenderThread:649905 [sender.py:send():179] send: stats +2021-07-15 02:55:22,641 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:55:22,641 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:55:37,785 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:55:37,786 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:55:47,933 DEBUG SenderThread:649905 [sender.py:send():179] send: stats +2021-07-15 02:55:52,928 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:55:52,929 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:56:08,060 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:56:08,060 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:56:18,007 DEBUG SenderThread:649905 [sender.py:send():179] send: stats +2021-07-15 02:56:23,209 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:56:23,210 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:56:23,919 INFO Thread-8 :649905 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_024816-39ztwpif/files/output.log +2021-07-15 02:56:38,372 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:56:38,372 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:56:48,082 DEBUG SenderThread:649905 [sender.py:send():179] send: stats +2021-07-15 02:56:53,514 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:56:53,514 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:57:08,654 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:57:08,655 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:57:18,162 DEBUG SenderThread:649905 [sender.py:send():179] send: stats +2021-07-15 02:57:23,787 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:57:23,787 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:57:38,920 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:57:38,920 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:57:48,241 DEBUG SenderThread:649905 [sender.py:send():179] send: stats +2021-07-15 02:57:54,061 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:57:54,061 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:58:09,194 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:58:09,195 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:58:18,311 DEBUG SenderThread:649905 [sender.py:send():179] send: stats +2021-07-15 02:58:24,331 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 02:58:24,331 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: stop_status +2021-07-15 02:58:27,972 INFO Thread-8 :649905 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_024816-39ztwpif/files/output.log +2021-07-15 02:58:29,408 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 02:58:29,409 DEBUG SenderThread:649905 [sender.py:send():179] send: telemetry +2021-07-15 02:58:29,409 DEBUG SenderThread:649905 [sender.py:send():179] send: exit +2021-07-15 02:58:29,409 INFO SenderThread:649905 [sender.py:send_exit():287] handling exit code: 1 +2021-07-15 02:58:29,411 INFO SenderThread:649905 [sender.py:send_exit():295] send defer +2021-07-15 02:58:29,411 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 02:58:29,412 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: defer +2021-07-15 02:58:29,412 INFO HandlerThread:649905 [handler.py:handle_request_defer():141] handle defer: 0 +2021-07-15 02:58:29,412 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: defer +2021-07-15 02:58:29,412 INFO SenderThread:649905 [sender.py:send_request_defer():304] handle sender defer: 0 +2021-07-15 02:58:29,412 INFO SenderThread:649905 [sender.py:transition_state():308] send defer: 1 +2021-07-15 02:58:29,412 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: defer +2021-07-15 02:58:29,413 INFO HandlerThread:649905 [handler.py:handle_request_defer():141] handle defer: 1 +2021-07-15 02:58:29,440 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: defer +2021-07-15 02:58:29,440 INFO SenderThread:649905 [sender.py:send_request_defer():304] handle sender defer: 1 +2021-07-15 02:58:29,440 INFO SenderThread:649905 [sender.py:transition_state():308] send defer: 2 +2021-07-15 02:58:29,440 DEBUG SenderThread:649905 [sender.py:send():179] send: stats +2021-07-15 02:58:29,441 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: defer +2021-07-15 02:58:29,441 INFO HandlerThread:649905 [handler.py:handle_request_defer():141] handle defer: 2 +2021-07-15 02:58:29,441 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: defer +2021-07-15 02:58:29,441 INFO SenderThread:649905 [sender.py:send_request_defer():304] handle sender defer: 2 +2021-07-15 02:58:29,441 INFO SenderThread:649905 [sender.py:transition_state():308] send defer: 3 +2021-07-15 02:58:29,442 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: defer +2021-07-15 02:58:29,442 INFO HandlerThread:649905 [handler.py:handle_request_defer():141] handle defer: 3 +2021-07-15 02:58:29,442 DEBUG SenderThread:649905 [sender.py:send():179] send: summary +2021-07-15 02:58:29,442 INFO SenderThread:649905 [sender.py:_save_file():841] saving file wandb-summary.json with policy end +2021-07-15 02:58:29,443 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: defer +2021-07-15 02:58:29,443 INFO SenderThread:649905 [sender.py:send_request_defer():304] handle sender defer: 3 +2021-07-15 02:58:29,443 INFO SenderThread:649905 [sender.py:transition_state():308] send defer: 4 +2021-07-15 02:58:29,443 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: defer +2021-07-15 02:58:29,443 INFO HandlerThread:649905 [handler.py:handle_request_defer():141] handle defer: 4 +2021-07-15 02:58:29,443 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: defer +2021-07-15 02:58:29,443 INFO SenderThread:649905 [sender.py:send_request_defer():304] handle sender defer: 4 +2021-07-15 02:58:29,513 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 02:58:29,619 INFO SenderThread:649905 [sender.py:transition_state():308] send defer: 5 +2021-07-15 02:58:29,619 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 02:58:29,620 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: defer +2021-07-15 02:58:29,620 INFO HandlerThread:649905 [handler.py:handle_request_defer():141] handle defer: 5 +2021-07-15 02:58:29,620 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: defer +2021-07-15 02:58:29,620 INFO SenderThread:649905 [sender.py:send_request_defer():304] handle sender defer: 5 +2021-07-15 02:58:29,620 INFO SenderThread:649905 [dir_watcher.py:finish():282] shutting down directory watcher +2021-07-15 02:58:29,722 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 02:58:29,974 INFO SenderThread:649905 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_024816-39ztwpif/files/wandb-summary.json +2021-07-15 02:58:29,974 INFO SenderThread:649905 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_024816-39ztwpif/files/output.log +2021-07-15 02:58:29,974 INFO SenderThread:649905 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_024816-39ztwpif/files/config.yaml +2021-07-15 02:58:29,974 INFO SenderThread:649905 [dir_watcher.py:finish():312] scan: /home/dat/pino-roberta-base/wandb/run-20210715_024816-39ztwpif/files +2021-07-15 02:58:29,974 INFO SenderThread:649905 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_024816-39ztwpif/files/requirements.txt requirements.txt +2021-07-15 02:58:29,975 INFO SenderThread:649905 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_024816-39ztwpif/files/output.log output.log +2021-07-15 02:58:29,975 INFO SenderThread:649905 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_024816-39ztwpif/files/wandb-metadata.json wandb-metadata.json +2021-07-15 02:58:29,975 INFO SenderThread:649905 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_024816-39ztwpif/files/config.yaml config.yaml +2021-07-15 02:58:29,975 INFO SenderThread:649905 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_024816-39ztwpif/files/wandb-summary.json wandb-summary.json +2021-07-15 02:58:29,976 INFO SenderThread:649905 [sender.py:transition_state():308] send defer: 6 +2021-07-15 02:58:29,976 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 02:58:29,977 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: defer +2021-07-15 02:58:29,983 INFO HandlerThread:649905 [handler.py:handle_request_defer():141] handle defer: 6 +2021-07-15 02:58:29,986 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: defer +2021-07-15 02:58:29,989 INFO SenderThread:649905 [sender.py:send_request_defer():304] handle sender defer: 6 +2021-07-15 02:58:29,989 INFO SenderThread:649905 [file_pusher.py:finish():177] shutting down file pusher +2021-07-15 02:58:30,078 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 02:58:30,078 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 02:58:30,181 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 02:58:30,181 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 02:58:30,283 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 02:58:30,283 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 02:58:30,385 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 02:58:30,385 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 02:58:30,418 INFO Thread-13 :649905 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210715_024816-39ztwpif/files/output.log +2021-07-15 02:58:30,421 INFO Thread-14 :649905 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210715_024816-39ztwpif/files/config.yaml +2021-07-15 02:58:30,423 INFO Thread-12 :649905 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210715_024816-39ztwpif/files/requirements.txt +2021-07-15 02:58:30,451 INFO Thread-15 :649905 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210715_024816-39ztwpif/files/wandb-summary.json +2021-07-15 02:58:30,487 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 02:58:30,487 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 02:58:30,589 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 02:58:30,589 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 02:58:30,652 INFO Thread-7 :649905 [sender.py:transition_state():308] send defer: 7 +2021-07-15 02:58:30,652 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: defer +2021-07-15 02:58:30,653 INFO HandlerThread:649905 [handler.py:handle_request_defer():141] handle defer: 7 +2021-07-15 02:58:30,653 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: defer +2021-07-15 02:58:30,653 INFO SenderThread:649905 [sender.py:send_request_defer():304] handle sender defer: 7 +2021-07-15 02:58:30,691 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 02:58:30,941 INFO SenderThread:649905 [sender.py:transition_state():308] send defer: 8 +2021-07-15 02:58:30,941 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 02:58:30,941 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: defer +2021-07-15 02:58:30,941 INFO HandlerThread:649905 [handler.py:handle_request_defer():141] handle defer: 8 +2021-07-15 02:58:30,942 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: defer +2021-07-15 02:58:30,942 INFO SenderThread:649905 [sender.py:send_request_defer():304] handle sender defer: 8 +2021-07-15 02:58:30,942 INFO SenderThread:649905 [sender.py:transition_state():308] send defer: 9 +2021-07-15 02:58:30,942 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: defer +2021-07-15 02:58:30,942 INFO HandlerThread:649905 [handler.py:handle_request_defer():141] handle defer: 9 +2021-07-15 02:58:30,942 DEBUG SenderThread:649905 [sender.py:send():179] send: final +2021-07-15 02:58:30,943 DEBUG SenderThread:649905 [sender.py:send():179] send: footer +2021-07-15 02:58:30,943 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: defer +2021-07-15 02:58:30,943 INFO SenderThread:649905 [sender.py:send_request_defer():304] handle sender defer: 9 +2021-07-15 02:58:31,043 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 02:58:31,043 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 02:58:31,043 INFO SenderThread:649905 [file_pusher.py:join():182] waiting for file pusher +2021-07-15 02:58:31,045 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: get_summary +2021-07-15 02:58:31,046 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: sampled_history +2021-07-15 02:58:31,046 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: shutdown +2021-07-15 02:58:31,046 INFO HandlerThread:649905 [handler.py:finish():638] shutting down handler +2021-07-15 02:58:31,943 INFO WriterThread:649905 [datastore.py:close():288] close: /home/dat/pino-roberta-base/wandb/run-20210715_024816-39ztwpif/run-39ztwpif.wandb +2021-07-15 02:58:32,044 INFO SenderThread:649905 [sender.py:finish():945] shutting down sender +2021-07-15 02:58:32,044 INFO SenderThread:649905 [file_pusher.py:finish():177] shutting down file pusher +2021-07-15 02:58:32,044 INFO SenderThread:649905 [file_pusher.py:join():182] waiting for file pusher +2021-07-15 02:58:32,047 INFO MainThread:649905 [internal.py:handle_exit():78] Internal process exited diff --git a/wandb/run-20210715_024816-39ztwpif/logs/debug.log b/wandb/run-20210715_024816-39ztwpif/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..e91dc499ab0f738ea718c048517c0d514956ce6d --- /dev/null +++ b/wandb/run-20210715_024816-39ztwpif/logs/debug.log @@ -0,0 +1,119 @@ +2021-07-15 02:48:16,826 INFO MainThread:648648 [wandb_setup.py:_flush():69] setting env: {} +2021-07-15 02:48:16,826 INFO MainThread:648648 [wandb_setup.py:_flush():69] setting login settings: {} +2021-07-15 02:48:16,826 INFO MainThread:648648 [wandb_init.py:_log_setup():337] Logging user logs to /home/dat/pino-roberta-base/wandb/run-20210715_024816-39ztwpif/logs/debug.log +2021-07-15 02:48:16,826 INFO MainThread:648648 [wandb_init.py:_log_setup():338] Logging internal logs to /home/dat/pino-roberta-base/wandb/run-20210715_024816-39ztwpif/logs/debug-internal.log +2021-07-15 02:48:16,826 INFO MainThread:648648 [wandb_init.py:init():370] calling init triggers +2021-07-15 02:48:16,826 INFO MainThread:648648 [wandb_init.py:init():375] wandb.init called with sweep_config: {} +config: {} +2021-07-15 02:48:16,826 INFO MainThread:648648 [wandb_init.py:init():419] starting backend +2021-07-15 02:48:16,827 INFO MainThread:648648 [backend.py:_multiprocessing_setup():70] multiprocessing start_methods=fork,spawn,forkserver, using: spawn +2021-07-15 02:48:16,875 INFO MainThread:648648 [backend.py:ensure_launched():135] starting backend process... +2021-07-15 02:48:16,922 INFO MainThread:648648 [backend.py:ensure_launched():139] started backend process with pid: 649905 +2021-07-15 02:48:16,924 INFO MainThread:648648 [wandb_init.py:init():424] backend started and connected +2021-07-15 02:48:16,927 INFO MainThread:648648 [wandb_init.py:init():472] updated telemetry +2021-07-15 02:48:16,928 INFO MainThread:648648 [wandb_init.py:init():491] communicating current version +2021-07-15 02:48:17,560 INFO MainThread:648648 [wandb_init.py:init():496] got version response +2021-07-15 02:48:17,560 INFO MainThread:648648 [wandb_init.py:init():504] communicating run to backend with 30 second timeout +2021-07-15 02:48:17,732 INFO MainThread:648648 [wandb_init.py:init():529] starting run threads in backend +2021-07-15 02:48:18,876 INFO MainThread:648648 [wandb_run.py:_console_start():1623] atexit reg +2021-07-15 02:48:18,877 INFO MainThread:648648 [wandb_run.py:_redirect():1497] redirect: SettingsConsole.REDIRECT +2021-07-15 02:48:18,877 INFO MainThread:648648 [wandb_run.py:_redirect():1502] Redirecting console. +2021-07-15 02:48:18,879 INFO MainThread:648648 [wandb_run.py:_redirect():1558] Redirects installed. +2021-07-15 02:48:18,879 INFO MainThread:648648 [wandb_init.py:init():554] run started, returning control to user process +2021-07-15 02:48:18,886 INFO MainThread:648648 [wandb_run.py:_config_callback():872] config_cb None None {'output_dir': './', 'overwrite_output_dir': True, 'do_train': False, 'do_eval': False, 'do_predict': False, 'evaluation_strategy': 'IntervalStrategy.NO', 'prediction_loss_only': False, 'per_device_train_batch_size': 1, 'per_device_eval_batch_size': 1, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'learning_rate': 3e-05, 'weight_decay': 0.0095, 'adam_beta1': 0.9, 'adam_beta2': 0.98, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 5.0, 'max_steps': -1, 'lr_scheduler_type': 'SchedulerType.LINEAR', 'warmup_ratio': 0.0, 'warmup_steps': 10000, 'log_level': -1, 'log_level_replica': -1, 'log_on_each_node': True, 'logging_dir': './runs/Jul15_02-48-08_t1v-n-f5c06ea1-w-0', 'logging_strategy': 'IntervalStrategy.STEPS', 'logging_first_step': False, 'logging_steps': 50, 'save_strategy': 'IntervalStrategy.STEPS', 'save_steps': 40000, 'save_total_limit': 5, 'save_on_each_node': False, 'no_cuda': False, 'seed': 42, 'fp16': False, 'fp16_opt_level': 'O1', 'fp16_backend': 'auto', 'fp16_full_eval': False, 'local_rank': -1, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 20000, 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': './', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'sharded_ddp': [], 'deepspeed': None, 'label_smoothing_factor': 0.0, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'dataloader_pin_memory': True, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'push_to_hub_model_id': '', 'push_to_hub_organization': None, 'push_to_hub_token': None, 'mp_parameters': ''} +2021-07-15 02:48:18,888 INFO MainThread:648648 [wandb_run.py:_config_callback():872] config_cb None None {'model_name_or_path': None, 'model_type': 'big_bird', 'config_name': './', 'tokenizer_name': './', 'cache_dir': None, 'use_fast_tokenizer': True, 'dtype': 'float32'} +2021-07-15 02:48:18,889 INFO MainThread:648648 [wandb_run.py:_config_callback():872] config_cb None None {'dataset_name': None, 'dataset_config_name': None, 'train_ref_file': None, 'validation_ref_file': None, 'overwrite_cache': False, 'validation_split_percentage': 5, 'max_seq_length': 4096, 'preprocessing_num_workers': 96, 'mlm_probability': 0.15, 'pad_to_max_length': False, 'line_by_line': False, 'max_eval_samples': 500} +2021-07-15 02:58:26,636 INFO MainThread:648648 [wandb_run.py:_atexit_cleanup():1593] got exitcode: 1 +2021-07-15 02:58:26,637 INFO MainThread:648648 [wandb_run.py:_restore():1565] restore +2021-07-15 02:58:29,412 INFO MainThread:648648 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 1 +} +pusher_stats { + uploaded_bytes: 1375 + total_bytes: 1375 +} + +2021-07-15 02:58:29,620 INFO MainThread:648648 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 1 +} +pusher_stats { + uploaded_bytes: 1375 + total_bytes: 1375 +} + +2021-07-15 02:58:29,977 INFO MainThread:648648 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 4 +} +pusher_stats { + uploaded_bytes: 1375 + total_bytes: 10904 +} + +2021-07-15 02:58:30,079 INFO MainThread:648648 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 1375 + total_bytes: 10906 +} + +2021-07-15 02:58:30,181 INFO MainThread:648648 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 10906 + total_bytes: 10906 +} + +2021-07-15 02:58:30,284 INFO MainThread:648648 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 10906 + total_bytes: 10906 +} + +2021-07-15 02:58:30,386 INFO MainThread:648648 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 10906 + total_bytes: 10906 +} + +2021-07-15 02:58:30,488 INFO MainThread:648648 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 10906 + total_bytes: 10906 +} + +2021-07-15 02:58:30,590 INFO MainThread:648648 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 10906 + total_bytes: 10906 +} + +2021-07-15 02:58:30,942 INFO MainThread:648648 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 10906 + total_bytes: 10906 +} + +2021-07-15 02:58:31,044 INFO MainThread:648648 [wandb_run.py:_wait_for_finish():1715] got exit ret: done: true +exit_result { +} +file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 10906 + total_bytes: 10906 +} + +2021-07-15 02:58:32,341 INFO MainThread:648648 [wandb_run.py:_show_files():1937] logging synced files diff --git a/wandb/run-20210715_024816-39ztwpif/run-39ztwpif.wandb b/wandb/run-20210715_024816-39ztwpif/run-39ztwpif.wandb new file mode 100644 index 0000000000000000000000000000000000000000..196d83ae8d7397aac4cedca8003c9fdfbd623148 Binary files /dev/null and b/wandb/run-20210715_024816-39ztwpif/run-39ztwpif.wandb differ diff --git a/wandb/run-20210715_030015-30wihv4o/files/config.yaml b/wandb/run-20210715_030015-30wihv4o/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0bc0590915de7290f7bc43946020fb6fbdf4a3ac --- /dev/null +++ b/wandb/run-20210715_030015-30wihv4o/files/config.yaml @@ -0,0 +1,304 @@ +wandb_version: 1 + +_wandb: + desc: null + value: + cli_version: 0.10.33 + framework: huggingface + huggingface_version: 4.9.0.dev0 + is_jupyter_run: false + is_kaggle_kernel: false + python_version: 3.8.10 + t: + 1: + - 3 + - 11 + 2: + - 3 + - 11 + 4: 3.8.10 + 5: 0.10.33 + 6: 4.9.0.dev0 + 8: + - 5 +adafactor: + desc: null + value: false +adam_beta1: + desc: null + value: 0.9 +adam_beta2: + desc: null + value: 0.98 +adam_epsilon: + desc: null + value: 1.0e-08 +cache_dir: + desc: null + value: null +config_name: + desc: null + value: ./ +dataloader_drop_last: + desc: null + value: false +dataloader_num_workers: + desc: null + value: 0 +dataloader_pin_memory: + desc: null + value: true +dataset_config_name: + desc: null + value: null +dataset_name: + desc: null + value: null +ddp_find_unused_parameters: + desc: null + value: null +debug: + desc: null + value: [] +deepspeed: + desc: null + value: null +disable_tqdm: + desc: null + value: false +do_eval: + desc: null + value: false +do_predict: + desc: null + value: false +do_train: + desc: null + value: false +dtype: + desc: null + value: float32 +eval_accumulation_steps: + desc: null + value: null +eval_steps: + desc: null + value: 20000 +evaluation_strategy: + desc: null + value: IntervalStrategy.NO +fp16: + desc: null + value: false +fp16_backend: + desc: null + value: auto +fp16_full_eval: + desc: null + value: false +fp16_opt_level: + desc: null + value: O1 +gradient_accumulation_steps: + desc: null + value: 1 +greater_is_better: + desc: null + value: null +group_by_length: + desc: null + value: false +ignore_data_skip: + desc: null + value: false +label_names: + desc: null + value: null +label_smoothing_factor: + desc: null + value: 0.0 +learning_rate: + desc: null + value: 3.0e-05 +length_column_name: + desc: null + value: length +line_by_line: + desc: null + value: false +load_best_model_at_end: + desc: null + value: false +local_rank: + desc: null + value: -1 +log_level: + desc: null + value: -1 +log_level_replica: + desc: null + value: -1 +log_on_each_node: + desc: null + value: true +logging_dir: + desc: null + value: ./runs/Jul15_03-00-07_t1v-n-f5c06ea1-w-0 +logging_first_step: + desc: null + value: false +logging_steps: + desc: null + value: 50 +logging_strategy: + desc: null + value: IntervalStrategy.STEPS +lr_scheduler_type: + desc: null + value: SchedulerType.LINEAR +max_eval_samples: + desc: null + value: 500 +max_grad_norm: + desc: null + value: 1.0 +max_seq_length: + desc: null + value: 4096 +max_steps: + desc: null + value: -1 +metric_for_best_model: + desc: null + value: null +mlm_probability: + desc: null + value: 0.15 +model_name_or_path: + desc: null + value: null +model_type: + desc: null + value: big_bird +mp_parameters: + desc: null + value: '' +no_cuda: + desc: null + value: false +num_train_epochs: + desc: null + value: 5.0 +output_dir: + desc: null + value: ./ +overwrite_cache: + desc: null + value: false +overwrite_output_dir: + desc: null + value: true +pad_to_max_length: + desc: null + value: false +past_index: + desc: null + value: -1 +per_device_eval_batch_size: + desc: null + value: 1 +per_device_train_batch_size: + desc: null + value: 1 +per_gpu_eval_batch_size: + desc: null + value: null +per_gpu_train_batch_size: + desc: null + value: null +prediction_loss_only: + desc: null + value: false +preprocessing_num_workers: + desc: null + value: 96 +push_to_hub: + desc: null + value: true +push_to_hub_model_id: + desc: null + value: '' +push_to_hub_organization: + desc: null + value: null +push_to_hub_token: + desc: null + value: null +remove_unused_columns: + desc: null + value: true +report_to: + desc: null + value: + - tensorboard + - wandb +resume_from_checkpoint: + desc: null + value: null +run_name: + desc: null + value: ./ +save_on_each_node: + desc: null + value: false +save_steps: + desc: null + value: 30000 +save_strategy: + desc: null + value: IntervalStrategy.STEPS +save_total_limit: + desc: null + value: 5 +seed: + desc: null + value: 42 +sharded_ddp: + desc: null + value: [] +skip_memory_metrics: + desc: null + value: true +tokenizer_name: + desc: null + value: ./ +tpu_metrics_debug: + desc: null + value: false +tpu_num_cores: + desc: null + value: null +train_ref_file: + desc: null + value: null +use_fast_tokenizer: + desc: null + value: true +use_legacy_prediction_loop: + desc: null + value: false +validation_ref_file: + desc: null + value: null +validation_split_percentage: + desc: null + value: 5 +warmup_ratio: + desc: null + value: 0.0 +warmup_steps: + desc: null + value: 10000 +weight_decay: + desc: null + value: 0.0095 diff --git a/wandb/run-20210715_030015-30wihv4o/files/output.log b/wandb/run-20210715_030015-30wihv4o/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..0e000f352be151fcfcaea2c4ec3a486f02c02d1e --- /dev/null +++ b/wandb/run-20210715_030015-30wihv4o/files/output.log @@ -0,0 +1,37 @@ +/home/dat/pino/lib/python3.8/site-packages/jax/lib/xla_bridge.py:386: UserWarning: jax.host_count has been renamed to jax.process_count. This alias will eventually be removed; please update your code. + warnings.warn( +/home/dat/pino/lib/python3.8/site-packages/jax/lib/xla_bridge.py:373: UserWarning: jax.host_id has been renamed to jax.process_index. This alias will eventually be removed; please update your code. + warnings.warn( +Epoch ... (1/5): 0%| | 0/5 [00:00 + state, train_metric, dropout_rngs = p_train_step(state, model_inputs, dropout_rngs) + File "/home/dat/pino/lib/python3.8/site-packages/jax/_src/traceback_util.py", line 183, in reraise_with_filtered_traceback + return fun(*args, **kwargs) + File "/home/dat/pino/lib/python3.8/site-packages/jax/_src/api.py", line 1669, in f_pmapped + out = pxla.xla_pmap( + File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 1620, in bind + return call_bind(self, fun, *args, **params) + File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 1551, in call_bind + outs = primitive.process(top_trace, fun, tracers, params) + File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 1623, in process + return trace.process_map(self, fun, tracers, params) + File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 606, in process_call + return primitive.impl(f, *tracers, **params) + File "/home/dat/pino/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 637, in xla_pmap_impl + return compiled_fun(*args) + File "/home/dat/pino/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 1159, in execute_replicated + out_bufs = compiled.execute_sharded_on_local_devices(input_bufs) +jax._src.traceback_util.UnfilteredStackTrace: RuntimeError: Resource exhausted: Attempting to reserve 7.59G at the bottom of memory. That was not possible. There are 7.12G free, 0B reserved, and 7.08G reservable.: while running replica 0 and partition 0 of a replicated computation (other replicas may have failed as well). +The stack trace below excludes JAX-internal frames. +The preceding is the original exception that occurred, unmodified. +-------------------- +The above exception was the direct cause of the following exception: +Traceback (most recent call last): + File "./run_mlm_flax_no_accum.py", line 699, in + state, train_metric, dropout_rngs = p_train_step(state, model_inputs, dropout_rngs) + File "/home/dat/pino/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 1159, in execute_replicated + out_bufs = compiled.execute_sharded_on_local_devices(input_bufs) +RuntimeError: Resource exhausted: Attempting to reserve 7.59G at the bottom of memory. That was not possible. There are 7.12G free, 0B reserved, and 7.08G reservable.: while running replica 0 and partition 0 of a replicated computation (other replicas may have failed as well). \ No newline at end of file diff --git a/wandb/run-20210715_030015-30wihv4o/files/requirements.txt b/wandb/run-20210715_030015-30wihv4o/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..554e3a34bc91714a8462f65295a12e9a04537637 --- /dev/null +++ b/wandb/run-20210715_030015-30wihv4o/files/requirements.txt @@ -0,0 +1,94 @@ +absl-py==0.13.0 +aiohttp==3.7.4.post0 +astunparse==1.6.3 +async-timeout==3.0.1 +attrs==21.2.0 +cachetools==4.2.2 +certifi==2021.5.30 +chardet==4.0.0 +charset-normalizer==2.0.1 +chex==0.0.8 +click==8.0.1 +configparser==5.0.2 +cycler==0.10.0 +datasets==1.9.1.dev0 +dill==0.3.4 +dm-tree==0.1.6 +docker-pycreds==0.4.0 +filelock==3.0.12 +flatbuffers==1.12 +flax==0.3.4 +fsspec==2021.7.0 +gast==0.4.0 +gitdb==4.0.7 +gitpython==3.1.18 +google-auth-oauthlib==0.4.4 +google-auth==1.32.1 +google-pasta==0.2.0 +grpcio==1.34.1 +h5py==3.1.0 +huggingface-hub==0.0.12 +idna==3.2 +install==1.3.4 +jax==0.2.17 +jaxlib==0.1.68 +joblib==1.0.1 +keras-nightly==2.5.0.dev2021032900 +keras-preprocessing==1.1.2 +kiwisolver==1.3.1 +libtpu-nightly==0.1.dev20210615 +markdown==3.3.4 +matplotlib==3.4.2 +msgpack==1.0.2 +multidict==5.1.0 +multiprocess==0.70.12.2 +numpy==1.19.5 +oauthlib==3.1.1 +opt-einsum==3.3.0 +optax==0.0.9 +packaging==21.0 +pandas==1.3.0 +pathtools==0.1.2 +pillow==8.3.1 +pip==20.0.2 +pkg-resources==0.0.0 +promise==2.3 +protobuf==3.17.3 +psutil==5.8.0 +pyarrow==4.0.1 +pyasn1-modules==0.2.8 +pyasn1==0.4.8 +pyparsing==2.4.7 +python-dateutil==2.8.1 +pytz==2021.1 +pyyaml==5.4.1 +regex==2021.7.6 +requests-oauthlib==1.3.0 +requests==2.26.0 +rsa==4.7.2 +sacremoses==0.0.45 +scipy==1.7.0 +sentry-sdk==1.3.0 +setuptools==44.0.0 +shortuuid==1.0.1 +six==1.15.0 +smmap==4.0.0 +subprocess32==3.5.4 +tensorboard-data-server==0.6.1 +tensorboard-plugin-wit==1.8.0 +tensorboard==2.5.0 +tensorflow-estimator==2.5.0 +tensorflow==2.5.0 +termcolor==1.1.0 +tokenizers==0.10.3 +toolz==0.11.1 +tqdm==4.61.2 +transformers==4.9.0.dev0 +typing-extensions==3.7.4.3 +urllib3==1.26.6 +wandb==0.10.33 +werkzeug==2.0.1 +wheel==0.36.2 +wrapt==1.12.1 +xxhash==2.0.2 +yarl==1.6.3 \ No newline at end of file diff --git a/wandb/run-20210715_030015-30wihv4o/files/wandb-metadata.json b/wandb/run-20210715_030015-30wihv4o/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..3d8433b07fe16f8fd396f81ef4d7c73e1fccdeed --- /dev/null +++ b/wandb/run-20210715_030015-30wihv4o/files/wandb-metadata.json @@ -0,0 +1,44 @@ +{ + "os": "Linux-5.4.0-1043-gcp-x86_64-with-glibc2.29", + "python": "3.8.10", + "heartbeatAt": "2021-07-15T03:00:17.537660", + "startedAt": "2021-07-15T03:00:15.443682", + "docker": null, + "cpu_count": 96, + "cuda": null, + "args": [ + "--push_to_hub", + "--output_dir=./", + "--model_type=big_bird", + "--config_name=./", + "--tokenizer_name=./", + "--max_seq_length=4096", + "--weight_decay=0.0095", + "--warmup_steps=10000", + "--overwrite_output_dir", + "--adam_beta1=0.9", + "--adam_beta2=0.98", + "--logging_steps=50", + "--eval_steps=20000", + "--num_train_epochs=5", + "--preprocessing_num_workers=96", + "--save_steps=30000", + "--learning_rate=3e-5", + "--per_device_train_batch_size=1", + "--per_device_eval_batch_size=1", + "--save_total_limit=5", + "--max_eval_samples=500" + ], + "state": "running", + "program": "./run_mlm_flax_no_accum.py", + "codePath": "run_mlm_flax_no_accum.py", + "git": { + "remote": "https://huggingface.co/flax-community/pino-roberta-base", + "commit": "87e02e7ff8fbaea90c8c4ad1c984f83742432303" + }, + "email": null, + "root": "/home/dat/pino-roberta-base", + "host": "t1v-n-f5c06ea1-w-0", + "username": "dat", + "executable": "/home/dat/pino/bin/python" +} diff --git a/wandb/run-20210715_030015-30wihv4o/files/wandb-summary.json b/wandb/run-20210715_030015-30wihv4o/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/wandb/run-20210715_030015-30wihv4o/files/wandb-summary.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/wandb/run-20210715_030015-30wihv4o/logs/debug-internal.log b/wandb/run-20210715_030015-30wihv4o/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..53d300520baecdbcca5f59c7c33fb91bfd9619b3 --- /dev/null +++ b/wandb/run-20210715_030015-30wihv4o/logs/debug-internal.log @@ -0,0 +1,232 @@ +2021-07-15 03:00:16,154 INFO MainThread:652382 [internal.py:wandb_internal():88] W&B internal server running at pid: 652382, started at: 2021-07-15 03:00:16.153819 +2021-07-15 03:00:16,156 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: check_version +2021-07-15 03:00:16,156 INFO WriterThread:652382 [datastore.py:open_for_write():80] open: /home/dat/pino-roberta-base/wandb/run-20210715_030015-30wihv4o/run-30wihv4o.wandb +2021-07-15 03:00:16,157 DEBUG SenderThread:652382 [sender.py:send():179] send: header +2021-07-15 03:00:16,157 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: check_version +2021-07-15 03:00:16,194 DEBUG SenderThread:652382 [sender.py:send():179] send: run +2021-07-15 03:00:16,370 INFO SenderThread:652382 [dir_watcher.py:__init__():168] watching files in: /home/dat/pino-roberta-base/wandb/run-20210715_030015-30wihv4o/files +2021-07-15 03:00:16,370 INFO SenderThread:652382 [sender.py:_start_run_threads():716] run started: 30wihv4o with start time 1626318015 +2021-07-15 03:00:16,372 DEBUG SenderThread:652382 [sender.py:send():179] send: summary +2021-07-15 03:00:16,373 INFO SenderThread:652382 [sender.py:_save_file():841] saving file wandb-summary.json with policy end +2021-07-15 03:00:16,374 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: run_start +2021-07-15 03:00:17,425 INFO Thread-8 :652382 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210715_030015-30wihv4o/files/wandb-summary.json +2021-07-15 03:00:17,537 DEBUG HandlerThread:652382 [meta.py:__init__():39] meta init +2021-07-15 03:00:17,537 DEBUG HandlerThread:652382 [meta.py:__init__():53] meta init done +2021-07-15 03:00:17,537 DEBUG HandlerThread:652382 [meta.py:probe():210] probe +2021-07-15 03:00:17,538 DEBUG HandlerThread:652382 [meta.py:_setup_git():200] setup git +2021-07-15 03:00:17,568 DEBUG HandlerThread:652382 [meta.py:_setup_git():207] setup git done +2021-07-15 03:00:17,568 DEBUG HandlerThread:652382 [meta.py:_save_pip():57] save pip +2021-07-15 03:00:17,568 DEBUG HandlerThread:652382 [meta.py:_save_pip():71] save pip done +2021-07-15 03:00:17,568 DEBUG HandlerThread:652382 [meta.py:probe():252] probe done +2021-07-15 03:00:17,572 DEBUG SenderThread:652382 [sender.py:send():179] send: files +2021-07-15 03:00:17,572 INFO SenderThread:652382 [sender.py:_save_file():841] saving file wandb-metadata.json with policy now +2021-07-15 03:00:17,579 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 03:00:17,580 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: stop_status +2021-07-15 03:00:17,710 DEBUG SenderThread:652382 [sender.py:send():179] send: config +2021-07-15 03:00:17,710 DEBUG SenderThread:652382 [sender.py:send():179] send: config +2021-07-15 03:00:17,711 DEBUG SenderThread:652382 [sender.py:send():179] send: config +2021-07-15 03:00:18,067 INFO Thread-11 :652382 [upload_job.py:push():137] Uploaded file /tmp/tmpo5adho61wandb/1x3gq8av-wandb-metadata.json +2021-07-15 03:00:18,425 INFO Thread-8 :652382 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210715_030015-30wihv4o/files/requirements.txt +2021-07-15 03:00:18,425 INFO Thread-8 :652382 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210715_030015-30wihv4o/files/wandb-metadata.json +2021-07-15 03:00:18,425 INFO Thread-8 :652382 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210715_030015-30wihv4o/files/output.log +2021-07-15 03:00:32,431 INFO Thread-8 :652382 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_030015-30wihv4o/files/output.log +2021-07-15 03:00:32,712 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 03:00:32,712 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: stop_status +2021-07-15 03:00:45,621 DEBUG SenderThread:652382 [sender.py:send():179] send: stats +2021-07-15 03:00:47,437 INFO Thread-8 :652382 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_030015-30wihv4o/files/config.yaml +2021-07-15 03:00:47,844 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 03:00:47,844 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: stop_status +2021-07-15 03:01:02,976 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 03:01:02,976 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: stop_status +2021-07-15 03:01:15,700 DEBUG SenderThread:652382 [sender.py:send():179] send: stats +2021-07-15 03:01:18,116 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 03:01:18,116 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: stop_status +2021-07-15 03:01:33,254 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 03:01:33,255 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: stop_status +2021-07-15 03:01:45,777 DEBUG SenderThread:652382 [sender.py:send():179] send: stats +2021-07-15 03:01:48,387 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 03:01:48,388 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: stop_status +2021-07-15 03:02:03,521 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 03:02:03,522 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: stop_status +2021-07-15 03:02:15,842 DEBUG SenderThread:652382 [sender.py:send():179] send: stats +2021-07-15 03:02:18,653 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 03:02:18,654 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: stop_status +2021-07-15 03:02:33,785 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 03:02:33,785 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: stop_status +2021-07-15 03:02:45,911 DEBUG SenderThread:652382 [sender.py:send():179] send: stats +2021-07-15 03:02:48,915 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 03:02:48,915 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: stop_status +2021-07-15 03:03:04,048 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 03:03:04,049 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: stop_status +2021-07-15 03:03:15,978 DEBUG SenderThread:652382 [sender.py:send():179] send: stats +2021-07-15 03:03:19,181 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 03:03:19,181 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: stop_status +2021-07-15 03:03:34,314 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 03:03:34,314 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: stop_status +2021-07-15 03:03:46,043 DEBUG SenderThread:652382 [sender.py:send():179] send: stats +2021-07-15 03:03:49,447 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 03:03:49,448 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: stop_status +2021-07-15 03:04:04,580 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 03:04:04,580 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: stop_status +2021-07-15 03:04:16,110 DEBUG SenderThread:652382 [sender.py:send():179] send: stats +2021-07-15 03:04:19,717 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 03:04:19,718 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: stop_status +2021-07-15 03:04:34,849 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 03:04:34,849 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: stop_status +2021-07-15 03:04:46,173 DEBUG SenderThread:652382 [sender.py:send():179] send: stats +2021-07-15 03:04:49,981 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 03:04:49,982 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: stop_status +2021-07-15 03:05:05,119 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 03:05:05,120 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: stop_status +2021-07-15 03:05:16,239 DEBUG SenderThread:652382 [sender.py:send():179] send: stats +2021-07-15 03:05:20,263 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 03:05:20,264 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: stop_status +2021-07-15 03:05:35,395 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 03:05:35,395 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: stop_status +2021-07-15 03:05:46,312 DEBUG SenderThread:652382 [sender.py:send():179] send: stats +2021-07-15 03:05:50,529 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 03:05:50,529 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: stop_status +2021-07-15 03:06:05,662 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 03:06:05,662 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: stop_status +2021-07-15 03:06:16,385 DEBUG SenderThread:652382 [sender.py:send():179] send: stats +2021-07-15 03:06:20,794 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 03:06:20,794 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: stop_status +2021-07-15 03:06:35,926 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 03:06:35,926 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: stop_status +2021-07-15 03:06:46,454 DEBUG SenderThread:652382 [sender.py:send():179] send: stats +2021-07-15 03:06:51,060 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 03:06:51,060 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: stop_status +2021-07-15 03:07:06,201 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 03:07:06,201 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: stop_status +2021-07-15 03:07:16,531 DEBUG SenderThread:652382 [sender.py:send():179] send: stats +2021-07-15 03:07:21,340 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 03:07:21,340 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: stop_status +2021-07-15 03:07:36,473 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 03:07:36,473 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: stop_status +2021-07-15 03:07:38,595 INFO Thread-8 :652382 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_030015-30wihv4o/files/output.log +2021-07-15 03:07:46,605 DEBUG SenderThread:652382 [sender.py:send():179] send: stats +2021-07-15 03:07:51,620 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 03:07:51,620 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: stop_status +2021-07-15 03:08:06,767 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 03:08:06,768 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: stop_status +2021-07-15 03:08:16,682 DEBUG SenderThread:652382 [sender.py:send():179] send: stats +2021-07-15 03:08:21,898 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 03:08:21,899 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: stop_status +2021-07-15 03:08:37,032 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 03:08:37,032 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: stop_status +2021-07-15 03:08:46,763 DEBUG SenderThread:652382 [sender.py:send():179] send: stats +2021-07-15 03:08:52,171 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 03:08:52,172 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: stop_status +2021-07-15 03:09:07,305 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 03:09:07,305 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: stop_status +2021-07-15 03:09:16,837 DEBUG SenderThread:652382 [sender.py:send():179] send: stats +2021-07-15 03:09:22,440 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 03:09:22,440 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: stop_status +2021-07-15 03:09:37,575 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: stop_status +2021-07-15 03:09:37,576 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: stop_status +2021-07-15 03:09:42,648 INFO Thread-8 :652382 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_030015-30wihv4o/files/output.log +2021-07-15 03:09:44,217 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 03:09:44,217 DEBUG SenderThread:652382 [sender.py:send():179] send: telemetry +2021-07-15 03:09:44,218 DEBUG SenderThread:652382 [sender.py:send():179] send: exit +2021-07-15 03:09:44,218 INFO SenderThread:652382 [sender.py:send_exit():287] handling exit code: 1 +2021-07-15 03:09:44,219 INFO SenderThread:652382 [sender.py:send_exit():295] send defer +2021-07-15 03:09:44,219 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 03:09:44,220 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: defer +2021-07-15 03:09:44,220 INFO HandlerThread:652382 [handler.py:handle_request_defer():141] handle defer: 0 +2021-07-15 03:09:44,221 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: defer +2021-07-15 03:09:44,221 INFO SenderThread:652382 [sender.py:send_request_defer():304] handle sender defer: 0 +2021-07-15 03:09:44,221 INFO SenderThread:652382 [sender.py:transition_state():308] send defer: 1 +2021-07-15 03:09:44,221 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: defer +2021-07-15 03:09:44,221 INFO HandlerThread:652382 [handler.py:handle_request_defer():141] handle defer: 1 +2021-07-15 03:09:44,308 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: defer +2021-07-15 03:09:44,308 INFO SenderThread:652382 [sender.py:send_request_defer():304] handle sender defer: 1 +2021-07-15 03:09:44,308 INFO SenderThread:652382 [sender.py:transition_state():308] send defer: 2 +2021-07-15 03:09:44,308 DEBUG SenderThread:652382 [sender.py:send():179] send: stats +2021-07-15 03:09:44,309 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: defer +2021-07-15 03:09:44,309 INFO HandlerThread:652382 [handler.py:handle_request_defer():141] handle defer: 2 +2021-07-15 03:09:44,309 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: defer +2021-07-15 03:09:44,309 INFO SenderThread:652382 [sender.py:send_request_defer():304] handle sender defer: 2 +2021-07-15 03:09:44,309 INFO SenderThread:652382 [sender.py:transition_state():308] send defer: 3 +2021-07-15 03:09:44,309 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: defer +2021-07-15 03:09:44,310 INFO HandlerThread:652382 [handler.py:handle_request_defer():141] handle defer: 3 +2021-07-15 03:09:44,310 DEBUG SenderThread:652382 [sender.py:send():179] send: summary +2021-07-15 03:09:44,311 INFO SenderThread:652382 [sender.py:_save_file():841] saving file wandb-summary.json with policy end +2021-07-15 03:09:44,311 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: defer +2021-07-15 03:09:44,311 INFO SenderThread:652382 [sender.py:send_request_defer():304] handle sender defer: 3 +2021-07-15 03:09:44,311 INFO SenderThread:652382 [sender.py:transition_state():308] send defer: 4 +2021-07-15 03:09:44,311 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: defer +2021-07-15 03:09:44,312 INFO HandlerThread:652382 [handler.py:handle_request_defer():141] handle defer: 4 +2021-07-15 03:09:44,312 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: defer +2021-07-15 03:09:44,312 INFO SenderThread:652382 [sender.py:send_request_defer():304] handle sender defer: 4 +2021-07-15 03:09:44,322 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 03:09:44,490 INFO SenderThread:652382 [sender.py:transition_state():308] send defer: 5 +2021-07-15 03:09:44,490 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 03:09:44,491 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: defer +2021-07-15 03:09:44,491 INFO HandlerThread:652382 [handler.py:handle_request_defer():141] handle defer: 5 +2021-07-15 03:09:44,491 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: defer +2021-07-15 03:09:44,491 INFO SenderThread:652382 [sender.py:send_request_defer():304] handle sender defer: 5 +2021-07-15 03:09:44,491 INFO SenderThread:652382 [dir_watcher.py:finish():282] shutting down directory watcher +2021-07-15 03:09:44,592 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 03:09:44,649 INFO Thread-8 :652382 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_030015-30wihv4o/files/wandb-summary.json +2021-07-15 03:09:44,650 INFO SenderThread:652382 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_030015-30wihv4o/files/config.yaml +2021-07-15 03:09:44,650 INFO SenderThread:652382 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_030015-30wihv4o/files/output.log +2021-07-15 03:09:44,650 INFO SenderThread:652382 [dir_watcher.py:finish():312] scan: /home/dat/pino-roberta-base/wandb/run-20210715_030015-30wihv4o/files +2021-07-15 03:09:44,650 INFO SenderThread:652382 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_030015-30wihv4o/files/requirements.txt requirements.txt +2021-07-15 03:09:44,650 INFO SenderThread:652382 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_030015-30wihv4o/files/output.log output.log +2021-07-15 03:09:44,650 INFO SenderThread:652382 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_030015-30wihv4o/files/wandb-metadata.json wandb-metadata.json +2021-07-15 03:09:44,651 INFO SenderThread:652382 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_030015-30wihv4o/files/config.yaml config.yaml +2021-07-15 03:09:44,651 INFO SenderThread:652382 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_030015-30wihv4o/files/wandb-summary.json wandb-summary.json +2021-07-15 03:09:44,651 INFO SenderThread:652382 [sender.py:transition_state():308] send defer: 6 +2021-07-15 03:09:44,651 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 03:09:44,652 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: defer +2021-07-15 03:09:44,652 INFO HandlerThread:652382 [handler.py:handle_request_defer():141] handle defer: 6 +2021-07-15 03:09:44,655 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: defer +2021-07-15 03:09:44,655 INFO SenderThread:652382 [sender.py:send_request_defer():304] handle sender defer: 6 +2021-07-15 03:09:44,655 INFO SenderThread:652382 [file_pusher.py:finish():177] shutting down file pusher +2021-07-15 03:09:44,754 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 03:09:44,754 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 03:09:44,856 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 03:09:44,856 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 03:09:44,958 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 03:09:44,958 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 03:09:45,060 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 03:09:45,060 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 03:09:45,085 INFO Thread-14 :652382 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210715_030015-30wihv4o/files/config.yaml +2021-07-15 03:09:45,094 INFO Thread-12 :652382 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210715_030015-30wihv4o/files/requirements.txt +2021-07-15 03:09:45,129 INFO Thread-15 :652382 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210715_030015-30wihv4o/files/wandb-summary.json +2021-07-15 03:09:45,144 INFO Thread-13 :652382 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210715_030015-30wihv4o/files/output.log +2021-07-15 03:09:45,162 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 03:09:45,162 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 03:09:45,264 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 03:09:45,265 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 03:09:45,345 INFO Thread-7 :652382 [sender.py:transition_state():308] send defer: 7 +2021-07-15 03:09:45,345 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: defer +2021-07-15 03:09:45,345 INFO HandlerThread:652382 [handler.py:handle_request_defer():141] handle defer: 7 +2021-07-15 03:09:45,346 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: defer +2021-07-15 03:09:45,346 INFO SenderThread:652382 [sender.py:send_request_defer():304] handle sender defer: 7 +2021-07-15 03:09:45,366 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 03:09:45,636 INFO SenderThread:652382 [sender.py:transition_state():308] send defer: 8 +2021-07-15 03:09:45,636 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 03:09:45,637 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: defer +2021-07-15 03:09:45,637 INFO HandlerThread:652382 [handler.py:handle_request_defer():141] handle defer: 8 +2021-07-15 03:09:45,637 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: defer +2021-07-15 03:09:45,637 INFO SenderThread:652382 [sender.py:send_request_defer():304] handle sender defer: 8 +2021-07-15 03:09:45,638 INFO SenderThread:652382 [sender.py:transition_state():308] send defer: 9 +2021-07-15 03:09:45,638 DEBUG SenderThread:652382 [sender.py:send():179] send: final +2021-07-15 03:09:45,638 DEBUG SenderThread:652382 [sender.py:send():179] send: footer +2021-07-15 03:09:45,639 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: defer +2021-07-15 03:09:45,639 INFO HandlerThread:652382 [handler.py:handle_request_defer():141] handle defer: 9 +2021-07-15 03:09:45,639 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: defer +2021-07-15 03:09:45,639 INFO SenderThread:652382 [sender.py:send_request_defer():304] handle sender defer: 9 +2021-07-15 03:09:45,738 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-15 03:09:45,739 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: poll_exit +2021-07-15 03:09:45,739 INFO SenderThread:652382 [file_pusher.py:join():182] waiting for file pusher +2021-07-15 03:09:45,740 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: get_summary +2021-07-15 03:09:45,741 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: sampled_history +2021-07-15 03:09:45,742 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: shutdown +2021-07-15 03:09:45,742 INFO HandlerThread:652382 [handler.py:finish():638] shutting down handler +2021-07-15 03:09:46,639 INFO WriterThread:652382 [datastore.py:close():288] close: /home/dat/pino-roberta-base/wandb/run-20210715_030015-30wihv4o/run-30wihv4o.wandb +2021-07-15 03:09:46,739 INFO SenderThread:652382 [sender.py:finish():945] shutting down sender +2021-07-15 03:09:46,739 INFO SenderThread:652382 [file_pusher.py:finish():177] shutting down file pusher +2021-07-15 03:09:46,739 INFO SenderThread:652382 [file_pusher.py:join():182] waiting for file pusher +2021-07-15 03:09:46,742 INFO MainThread:652382 [internal.py:handle_exit():78] Internal process exited diff --git a/wandb/run-20210715_030015-30wihv4o/logs/debug.log b/wandb/run-20210715_030015-30wihv4o/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..f3125ae556b360d37faca0cacf21e7fc374f3ecf --- /dev/null +++ b/wandb/run-20210715_030015-30wihv4o/logs/debug.log @@ -0,0 +1,119 @@ +2021-07-15 03:00:15,445 INFO MainThread:651126 [wandb_setup.py:_flush():69] setting env: {} +2021-07-15 03:00:15,445 INFO MainThread:651126 [wandb_setup.py:_flush():69] setting login settings: {} +2021-07-15 03:00:15,445 INFO MainThread:651126 [wandb_init.py:_log_setup():337] Logging user logs to /home/dat/pino-roberta-base/wandb/run-20210715_030015-30wihv4o/logs/debug.log +2021-07-15 03:00:15,445 INFO MainThread:651126 [wandb_init.py:_log_setup():338] Logging internal logs to /home/dat/pino-roberta-base/wandb/run-20210715_030015-30wihv4o/logs/debug-internal.log +2021-07-15 03:00:15,445 INFO MainThread:651126 [wandb_init.py:init():370] calling init triggers +2021-07-15 03:00:15,445 INFO MainThread:651126 [wandb_init.py:init():375] wandb.init called with sweep_config: {} +config: {} +2021-07-15 03:00:15,445 INFO MainThread:651126 [wandb_init.py:init():419] starting backend +2021-07-15 03:00:15,446 INFO MainThread:651126 [backend.py:_multiprocessing_setup():70] multiprocessing start_methods=fork,spawn,forkserver, using: spawn +2021-07-15 03:00:15,492 INFO MainThread:651126 [backend.py:ensure_launched():135] starting backend process... +2021-07-15 03:00:15,539 INFO MainThread:651126 [backend.py:ensure_launched():139] started backend process with pid: 652382 +2021-07-15 03:00:15,541 INFO MainThread:651126 [wandb_init.py:init():424] backend started and connected +2021-07-15 03:00:15,544 INFO MainThread:651126 [wandb_init.py:init():472] updated telemetry +2021-07-15 03:00:15,545 INFO MainThread:651126 [wandb_init.py:init():491] communicating current version +2021-07-15 03:00:16,193 INFO MainThread:651126 [wandb_init.py:init():496] got version response +2021-07-15 03:00:16,193 INFO MainThread:651126 [wandb_init.py:init():504] communicating run to backend with 30 second timeout +2021-07-15 03:00:16,373 INFO MainThread:651126 [wandb_init.py:init():529] starting run threads in backend +2021-07-15 03:00:17,575 INFO MainThread:651126 [wandb_run.py:_console_start():1623] atexit reg +2021-07-15 03:00:17,575 INFO MainThread:651126 [wandb_run.py:_redirect():1497] redirect: SettingsConsole.REDIRECT +2021-07-15 03:00:17,576 INFO MainThread:651126 [wandb_run.py:_redirect():1502] Redirecting console. +2021-07-15 03:00:17,577 INFO MainThread:651126 [wandb_run.py:_redirect():1558] Redirects installed. +2021-07-15 03:00:17,578 INFO MainThread:651126 [wandb_init.py:init():554] run started, returning control to user process +2021-07-15 03:00:17,583 INFO MainThread:651126 [wandb_run.py:_config_callback():872] config_cb None None {'output_dir': './', 'overwrite_output_dir': True, 'do_train': False, 'do_eval': False, 'do_predict': False, 'evaluation_strategy': 'IntervalStrategy.NO', 'prediction_loss_only': False, 'per_device_train_batch_size': 1, 'per_device_eval_batch_size': 1, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'learning_rate': 3e-05, 'weight_decay': 0.0095, 'adam_beta1': 0.9, 'adam_beta2': 0.98, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 5.0, 'max_steps': -1, 'lr_scheduler_type': 'SchedulerType.LINEAR', 'warmup_ratio': 0.0, 'warmup_steps': 10000, 'log_level': -1, 'log_level_replica': -1, 'log_on_each_node': True, 'logging_dir': './runs/Jul15_03-00-07_t1v-n-f5c06ea1-w-0', 'logging_strategy': 'IntervalStrategy.STEPS', 'logging_first_step': False, 'logging_steps': 50, 'save_strategy': 'IntervalStrategy.STEPS', 'save_steps': 30000, 'save_total_limit': 5, 'save_on_each_node': False, 'no_cuda': False, 'seed': 42, 'fp16': False, 'fp16_opt_level': 'O1', 'fp16_backend': 'auto', 'fp16_full_eval': False, 'local_rank': -1, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 20000, 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': './', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'sharded_ddp': [], 'deepspeed': None, 'label_smoothing_factor': 0.0, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'dataloader_pin_memory': True, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'push_to_hub_model_id': '', 'push_to_hub_organization': None, 'push_to_hub_token': None, 'mp_parameters': ''} +2021-07-15 03:00:17,585 INFO MainThread:651126 [wandb_run.py:_config_callback():872] config_cb None None {'model_name_or_path': None, 'model_type': 'big_bird', 'config_name': './', 'tokenizer_name': './', 'cache_dir': None, 'use_fast_tokenizer': True, 'dtype': 'float32'} +2021-07-15 03:00:17,586 INFO MainThread:651126 [wandb_run.py:_config_callback():872] config_cb None None {'dataset_name': None, 'dataset_config_name': None, 'train_ref_file': None, 'validation_ref_file': None, 'overwrite_cache': False, 'validation_split_percentage': 5, 'max_seq_length': 4096, 'preprocessing_num_workers': 96, 'mlm_probability': 0.15, 'pad_to_max_length': False, 'line_by_line': False, 'max_eval_samples': 500} +2021-07-15 03:09:41,805 INFO MainThread:651126 [wandb_run.py:_atexit_cleanup():1593] got exitcode: 1 +2021-07-15 03:09:41,806 INFO MainThread:651126 [wandb_run.py:_restore():1565] restore +2021-07-15 03:09:44,221 INFO MainThread:651126 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 1 +} +pusher_stats { + uploaded_bytes: 1375 + total_bytes: 1375 +} + +2021-07-15 03:09:44,491 INFO MainThread:651126 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 1 +} +pusher_stats { + uploaded_bytes: 1375 + total_bytes: 1375 +} + +2021-07-15 03:09:44,652 INFO MainThread:651126 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 4 +} +pusher_stats { + uploaded_bytes: 1375 + total_bytes: 10904 +} + +2021-07-15 03:09:44,755 INFO MainThread:651126 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 1375 + total_bytes: 10906 +} + +2021-07-15 03:09:44,857 INFO MainThread:651126 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 10906 + total_bytes: 10906 +} + +2021-07-15 03:09:44,959 INFO MainThread:651126 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 10906 + total_bytes: 10906 +} + +2021-07-15 03:09:45,061 INFO MainThread:651126 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 10906 + total_bytes: 10906 +} + +2021-07-15 03:09:45,163 INFO MainThread:651126 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 10906 + total_bytes: 10906 +} + +2021-07-15 03:09:45,265 INFO MainThread:651126 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 10906 + total_bytes: 10906 +} + +2021-07-15 03:09:45,637 INFO MainThread:651126 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 10906 + total_bytes: 10906 +} + +2021-07-15 03:09:45,739 INFO MainThread:651126 [wandb_run.py:_wait_for_finish():1715] got exit ret: done: true +exit_result { +} +file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 10906 + total_bytes: 10906 +} + +2021-07-15 03:09:47,041 INFO MainThread:651126 [wandb_run.py:_show_files():1937] logging synced files diff --git a/wandb/run-20210715_030015-30wihv4o/run-30wihv4o.wandb b/wandb/run-20210715_030015-30wihv4o/run-30wihv4o.wandb new file mode 100644 index 0000000000000000000000000000000000000000..c1938210ec631196b1226cf4d579466594128843 Binary files /dev/null and b/wandb/run-20210715_030015-30wihv4o/run-30wihv4o.wandb differ diff --git a/wandb/run-20210715_031107-69jkygz3/files/config.yaml b/wandb/run-20210715_031107-69jkygz3/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1d8bab7008105e70916a2aede3983dc7b9186753 --- /dev/null +++ b/wandb/run-20210715_031107-69jkygz3/files/config.yaml @@ -0,0 +1,301 @@ +wandb_version: 1 + +_wandb: + desc: null + value: + cli_version: 0.10.33 + framework: huggingface + huggingface_version: 4.9.0.dev0 + is_jupyter_run: false + is_kaggle_kernel: false + python_version: 3.8.10 + t: + 1: + - 3 + - 11 + 4: 3.8.10 + 5: 0.10.33 + 6: 4.9.0.dev0 + 8: + - 5 +adafactor: + desc: null + value: false +adam_beta1: + desc: null + value: 0.9 +adam_beta2: + desc: null + value: 0.98 +adam_epsilon: + desc: null + value: 1.0e-08 +cache_dir: + desc: null + value: null +config_name: + desc: null + value: ./ +dataloader_drop_last: + desc: null + value: false +dataloader_num_workers: + desc: null + value: 0 +dataloader_pin_memory: + desc: null + value: true +dataset_config_name: + desc: null + value: null +dataset_name: + desc: null + value: null +ddp_find_unused_parameters: + desc: null + value: null +debug: + desc: null + value: [] +deepspeed: + desc: null + value: null +disable_tqdm: + desc: null + value: false +do_eval: + desc: null + value: false +do_predict: + desc: null + value: false +do_train: + desc: null + value: false +dtype: + desc: null + value: float32 +eval_accumulation_steps: + desc: null + value: null +eval_steps: + desc: null + value: 20000 +evaluation_strategy: + desc: null + value: IntervalStrategy.NO +fp16: + desc: null + value: false +fp16_backend: + desc: null + value: auto +fp16_full_eval: + desc: null + value: false +fp16_opt_level: + desc: null + value: O1 +gradient_accumulation_steps: + desc: null + value: 1 +greater_is_better: + desc: null + value: null +group_by_length: + desc: null + value: false +ignore_data_skip: + desc: null + value: false +label_names: + desc: null + value: null +label_smoothing_factor: + desc: null + value: 0.0 +learning_rate: + desc: null + value: 3.0e-05 +length_column_name: + desc: null + value: length +line_by_line: + desc: null + value: false +load_best_model_at_end: + desc: null + value: false +local_rank: + desc: null + value: -1 +log_level: + desc: null + value: -1 +log_level_replica: + desc: null + value: -1 +log_on_each_node: + desc: null + value: true +logging_dir: + desc: null + value: ./runs/Jul15_03-10-59_t1v-n-f5c06ea1-w-0 +logging_first_step: + desc: null + value: false +logging_steps: + desc: null + value: 50 +logging_strategy: + desc: null + value: IntervalStrategy.STEPS +lr_scheduler_type: + desc: null + value: SchedulerType.LINEAR +max_eval_samples: + desc: null + value: 500 +max_grad_norm: + desc: null + value: 1.0 +max_seq_length: + desc: null + value: 4096 +max_steps: + desc: null + value: -1 +metric_for_best_model: + desc: null + value: null +mlm_probability: + desc: null + value: 0.15 +model_name_or_path: + desc: null + value: null +model_type: + desc: null + value: big_bird +mp_parameters: + desc: null + value: '' +no_cuda: + desc: null + value: false +num_train_epochs: + desc: null + value: 5.0 +output_dir: + desc: null + value: ./ +overwrite_cache: + desc: null + value: false +overwrite_output_dir: + desc: null + value: true +pad_to_max_length: + desc: null + value: false +past_index: + desc: null + value: -1 +per_device_eval_batch_size: + desc: null + value: 1 +per_device_train_batch_size: + desc: null + value: 1 +per_gpu_eval_batch_size: + desc: null + value: null +per_gpu_train_batch_size: + desc: null + value: null +prediction_loss_only: + desc: null + value: false +preprocessing_num_workers: + desc: null + value: 96 +push_to_hub: + desc: null + value: true +push_to_hub_model_id: + desc: null + value: '' +push_to_hub_organization: + desc: null + value: null +push_to_hub_token: + desc: null + value: null +remove_unused_columns: + desc: null + value: true +report_to: + desc: null + value: + - tensorboard + - wandb +resume_from_checkpoint: + desc: null + value: null +run_name: + desc: null + value: ./ +save_on_each_node: + desc: null + value: false +save_steps: + desc: null + value: 30000 +save_strategy: + desc: null + value: IntervalStrategy.STEPS +save_total_limit: + desc: null + value: 5 +seed: + desc: null + value: 42 +sharded_ddp: + desc: null + value: [] +skip_memory_metrics: + desc: null + value: true +tokenizer_name: + desc: null + value: ./ +tpu_metrics_debug: + desc: null + value: false +tpu_num_cores: + desc: null + value: null +train_ref_file: + desc: null + value: null +use_fast_tokenizer: + desc: null + value: true +use_legacy_prediction_loop: + desc: null + value: false +validation_ref_file: + desc: null + value: null +validation_split_percentage: + desc: null + value: 5 +warmup_ratio: + desc: null + value: 0.0 +warmup_steps: + desc: null + value: 10000 +weight_decay: + desc: null + value: 0.0095 diff --git a/wandb/run-20210715_031107-69jkygz3/files/output.log b/wandb/run-20210715_031107-69jkygz3/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..9df312e8c3354f931d9f7fc8f7bf30429bfd5775 --- /dev/null +++ b/wandb/run-20210715_031107-69jkygz3/files/output.log @@ -0,0 +1,4076 @@ +/home/dat/pino/lib/python3.8/site-packages/jax/lib/xla_bridge.py:386: UserWarning: jax.host_count has been renamed to jax.process_count. This alias will eventually be removed; please update your code. + warnings.warn( +/home/dat/pino/lib/python3.8/site-packages/jax/lib/xla_bridge.py:373: UserWarning: jax.host_id has been renamed to jax.process_index. This alias will eventually be removed; please update your code. + warnings.warn( +Epoch ... (1/5): 0%| | 0/5 [00:00