dat commited on
Commit
6571862
1 Parent(s): 87e02e7

Saving weights and logs of step 30000

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. checkpoint_30000 +3 -0
  2. events.out.tfevents.1626315358.t1v-n-f5c06ea1-w-0.643445.3.v2 +3 -0
  3. events.out.tfevents.1626316431.t1v-n-f5c06ea1-w-0.646155.3.v2 +3 -0
  4. events.out.tfevents.1626317295.t1v-n-f5c06ea1-w-0.648648.3.v2 +3 -0
  5. events.out.tfevents.1626318014.t1v-n-f5c06ea1-w-0.651126.3.v2 +3 -0
  6. events.out.tfevents.1626318666.t1v-n-f5c06ea1-w-0.655476.3.v2 +3 -0
  7. flax_model.msgpack +1 -1
  8. run.sh +1 -1
  9. run_mlm_flax_no_accum.py +1 -1
  10. wandb/debug-internal.log +1 -1
  11. wandb/debug.log +1 -1
  12. wandb/latest-run +1 -1
  13. wandb/run-20210715_020018-3i0mvo08/files/config.yaml +3 -0
  14. wandb/run-20210715_020018-3i0mvo08/files/output.log +28 -0
  15. wandb/run-20210715_020018-3i0mvo08/logs/debug-internal.log +126 -0
  16. wandb/run-20210715_020018-3i0mvo08/logs/debug.log +94 -0
  17. wandb/run-20210715_020018-3i0mvo08/run-3i0mvo08.wandb +0 -0
  18. wandb/run-20210715_021559-38yj0n5v/files/config.yaml +304 -0
  19. wandb/run-20210715_021559-38yj0n5v/files/output.log +37 -0
  20. wandb/run-20210715_021559-38yj0n5v/files/requirements.txt +94 -0
  21. wandb/run-20210715_021559-38yj0n5v/files/wandb-metadata.json +44 -0
  22. wandb/run-20210715_021559-38yj0n5v/files/wandb-summary.json +1 -0
  23. wandb/run-20210715_021559-38yj0n5v/logs/debug-internal.log +298 -0
  24. wandb/run-20210715_021559-38yj0n5v/logs/debug.log +119 -0
  25. wandb/run-20210715_021559-38yj0n5v/run-38yj0n5v.wandb +0 -0
  26. wandb/run-20210715_023352-28io0kfl/files/config.yaml +304 -0
  27. wandb/run-20210715_023352-28io0kfl/files/output.log +37 -0
  28. wandb/run-20210715_023352-28io0kfl/files/requirements.txt +94 -0
  29. wandb/run-20210715_023352-28io0kfl/files/wandb-metadata.json +44 -0
  30. wandb/run-20210715_023352-28io0kfl/files/wandb-summary.json +1 -0
  31. wandb/run-20210715_023352-28io0kfl/logs/debug-internal.log +268 -0
  32. wandb/run-20210715_023352-28io0kfl/logs/debug.log +119 -0
  33. wandb/run-20210715_023352-28io0kfl/run-28io0kfl.wandb +0 -0
  34. wandb/run-20210715_024816-39ztwpif/files/config.yaml +304 -0
  35. wandb/run-20210715_024816-39ztwpif/files/output.log +37 -0
  36. wandb/run-20210715_024816-39ztwpif/files/requirements.txt +94 -0
  37. wandb/run-20210715_024816-39ztwpif/files/wandb-metadata.json +44 -0
  38. wandb/run-20210715_024816-39ztwpif/files/wandb-summary.json +1 -0
  39. wandb/run-20210715_024816-39ztwpif/logs/debug-internal.log +240 -0
  40. wandb/run-20210715_024816-39ztwpif/logs/debug.log +119 -0
  41. wandb/run-20210715_024816-39ztwpif/run-39ztwpif.wandb +0 -0
  42. wandb/run-20210715_030015-30wihv4o/files/config.yaml +304 -0
  43. wandb/run-20210715_030015-30wihv4o/files/output.log +37 -0
  44. wandb/run-20210715_030015-30wihv4o/files/requirements.txt +94 -0
  45. wandb/run-20210715_030015-30wihv4o/files/wandb-metadata.json +44 -0
  46. wandb/run-20210715_030015-30wihv4o/files/wandb-summary.json +1 -0
  47. wandb/run-20210715_030015-30wihv4o/logs/debug-internal.log +232 -0
  48. wandb/run-20210715_030015-30wihv4o/logs/debug.log +119 -0
  49. wandb/run-20210715_030015-30wihv4o/run-30wihv4o.wandb +0 -0
  50. wandb/run-20210715_031107-69jkygz3/files/config.yaml +301 -0
checkpoint_30000 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:59518736214a20e8125f1484fe8db260c9384560796a22aa38130472f209af5f
3
+ size 1530270447
events.out.tfevents.1626315358.t1v-n-f5c06ea1-w-0.643445.3.v2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cbbdcfdd6e9c4204719d1205d9986aefaa632a8322127f79c7c2db6721350035
3
+ size 40
events.out.tfevents.1626316431.t1v-n-f5c06ea1-w-0.646155.3.v2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d42ca9bdf3e3d3484649f49a885a13ee3f0a5215c95ce545d2555e478ec6d2c3
3
+ size 40
events.out.tfevents.1626317295.t1v-n-f5c06ea1-w-0.648648.3.v2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f4b9c6db5c2a2231727f871c58176b8c65820459b68583d2c8f45c07f2298c60
3
+ size 40
events.out.tfevents.1626318014.t1v-n-f5c06ea1-w-0.651126.3.v2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e65b24a209eecf55c1ee53b6bdce4204f068eff07c0ed31b60a3fc7d1ad7de80
3
+ size 40
events.out.tfevents.1626318666.t1v-n-f5c06ea1-w-0.655476.3.v2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:69b32ab8a1ed5787f81840edb5bf7961c38526a7e7a3785c8559a727406f16a2
3
+ size 4478974
flax_model.msgpack CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:920cc52411bb9cee8aec7d54fbccc6e52223b9df8ed791d4fdd90a31831aed15
3
  size 510090043
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:888817743e9128cfd6e093b2327a40d34a3acb8bcc7b90b00adfae9b27af28ec
3
  size 510090043
run.sh CHANGED
@@ -18,7 +18,7 @@ python ./run_mlm_flax_no_accum.py \
18
  --eval_steps="20000" \
19
  --num_train_epochs="5" \
20
  --preprocessing_num_workers="96" \
21
- --save_steps="20000" \
22
  --learning_rate="3e-5" \
23
  --per_device_train_batch_size="1" \
24
  --per_device_eval_batch_size="1" \
 
18
  --eval_steps="20000" \
19
  --num_train_epochs="5" \
20
  --preprocessing_num_workers="96" \
21
+ --save_steps="30000" \
22
  --learning_rate="3e-5" \
23
  --per_device_train_batch_size="1" \
24
  --per_device_eval_batch_size="1" \
run_mlm_flax_no_accum.py CHANGED
@@ -421,7 +421,7 @@ if __name__ == "__main__":
421
  tokenized_datasets = DatasetDict.load_from_disk("/data/tokenized_data")
422
  logger.info("Setting max validation examples to ")
423
  print(f"Number of validation examples {data_args.max_eval_samples}")
424
- #tokenized_datasets["train"]= tokenized_datasets["train"].select(range(int(0.3*len(tokenized_datasets["train"]))))
425
  if data_args.max_eval_samples is not None:
426
  tokenized_datasets["validation"] = tokenized_datasets["validation"].select(range(data_args.max_eval_samples))
427
  else:
 
421
  tokenized_datasets = DatasetDict.load_from_disk("/data/tokenized_data")
422
  logger.info("Setting max validation examples to ")
423
  print(f"Number of validation examples {data_args.max_eval_samples}")
424
+ tokenized_datasets["train"]= tokenized_datasets["train"].select(range(int(0.35*len(tokenized_datasets["train"]))))
425
  if data_args.max_eval_samples is not None:
426
  tokenized_datasets["validation"] = tokenized_datasets["validation"].select(range(data_args.max_eval_samples))
427
  else:
wandb/debug-internal.log CHANGED
@@ -1 +1 @@
1
- run-20210715_020018-3i0mvo08/logs/debug-internal.log
 
1
+ run-20210715_031107-69jkygz3/logs/debug-internal.log
wandb/debug.log CHANGED
@@ -1 +1 @@
1
- run-20210715_020018-3i0mvo08/logs/debug.log
 
1
+ run-20210715_031107-69jkygz3/logs/debug.log
wandb/latest-run CHANGED
@@ -1 +1 @@
1
- run-20210715_020018-3i0mvo08
 
1
+ run-20210715_031107-69jkygz3
wandb/run-20210715_020018-3i0mvo08/files/config.yaml CHANGED
@@ -13,6 +13,9 @@ _wandb:
13
  1:
14
  - 3
15
  - 11
 
 
 
16
  4: 3.8.10
17
  5: 0.10.33
18
  6: 4.9.0.dev0
 
13
  1:
14
  - 3
15
  - 11
16
+ 2:
17
+ - 3
18
+ - 11
19
  4: 3.8.10
20
  5: 0.10.33
21
  6: 4.9.0.dev0
wandb/run-20210715_020018-3i0mvo08/files/output.log CHANGED
@@ -2,3 +2,31 @@
2
  warnings.warn(
3
  /home/dat/pino/lib/python3.8/site-packages/jax/lib/xla_bridge.py:373: UserWarning: jax.host_id has been renamed to jax.process_index. This alias will eventually be removed; please update your code.
4
  warnings.warn(
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  warnings.warn(
3
  /home/dat/pino/lib/python3.8/site-packages/jax/lib/xla_bridge.py:373: UserWarning: jax.host_id has been renamed to jax.process_index. This alias will eventually be removed; please update your code.
4
  warnings.warn(
5
+ Epoch ... (1/5): 0%| | 0/5 [00:00<?, ?it/s]2021-07-15 02:14:44.903616: E external/org_tensorflow/tensorflow/compiler/xla/pjrt/pjrt_stream_executor_client.cc:2036] Execution of replica 0 failed: Resource exhausted: Attempting to allocate 17.0K. That was not possible. There are 48.0K free. Due to fragmentation, the largest contiguous region of free memory is 16.0K.; (0x0x0_HBM0)
6
+ Epoch ... (1/5): 0%| | 0/5 [14:10<?, ?it/s]
7
+ Traceback (most recent call last):
8
+ File "./run_mlm_flax_no_accum.py", line 690, in <module>
9
+ train_batch_idx = generate_batch_splits(train_samples_idx, train_batch_size)
10
+ File "./run_mlm_flax_no_accum.py", line 255, in generate_batch_splits
11
+ batch_idx = np.split(samples_idx, sections_split)
12
+ File "<__array_function__ internals>", line 5, in split
13
+ File "/home/dat/pino/lib/python3.8/site-packages/numpy/lib/shape_base.py", line 874, in split
14
+ return array_split(ary, indices_or_sections, axis)
15
+ File "<__array_function__ internals>", line 5, in array_split
16
+ File "/home/dat/pino/lib/python3.8/site-packages/numpy/lib/shape_base.py", line 790, in array_split
17
+ sub_arys.append(_nx.swapaxes(sary[st:end], axis, 0))
18
+ File "/home/dat/pino/lib/python3.8/site-packages/jax/_src/numpy/lax_numpy.py", line 5009, in _rewriting_take
19
+ return _gather(arr, treedef, static_idx, dynamic_idx, indices_are_sorted,
20
+ File "/home/dat/pino/lib/python3.8/site-packages/jax/_src/numpy/lax_numpy.py", line 5028, in _gather
21
+ y = lax.gather(
22
+ File "/home/dat/pino/lib/python3.8/site-packages/jax/_src/lax/lax.py", line 984, in gather
23
+ return gather_p.bind(
24
+ File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 264, in bind
25
+ out = top_trace.process_primitive(self, tracers, params)
26
+ File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 603, in process_primitive
27
+ return primitive.impl(*tracers, **params)
28
+ File "/home/dat/pino/lib/python3.8/site-packages/jax/interpreters/xla.py", line 249, in apply_primitive
29
+ return compiled_fun(*args)
30
+ File "/home/dat/pino/lib/python3.8/site-packages/jax/interpreters/xla.py", line 365, in _execute_compiled_primitive
31
+ out_bufs = compiled.execute(input_bufs)
32
+ RuntimeError: Resource exhausted: Attempting to allocate 17.0K. That was not possible. There are 48.0K free. Due to fragmentation, the largest contiguous region of free memory is 16.0K.; (0x0x0_HBM0)
wandb/run-20210715_020018-3i0mvo08/logs/debug-internal.log CHANGED
@@ -154,3 +154,129 @@
154
  2021-07-15 02:12:27,302 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: stop_status
155
  2021-07-15 02:12:42,431 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: stop_status
156
  2021-07-15 02:12:42,432 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: stop_status
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
154
  2021-07-15 02:12:27,302 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: stop_status
155
  2021-07-15 02:12:42,431 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: stop_status
156
  2021-07-15 02:12:42,432 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: stop_status
157
+ 2021-07-15 02:12:50,705 DEBUG SenderThread:641950 [sender.py:send():179] send: stats
158
+ 2021-07-15 02:12:57,560 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: stop_status
159
+ 2021-07-15 02:12:57,561 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: stop_status
160
+ 2021-07-15 02:13:12,692 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: stop_status
161
+ 2021-07-15 02:13:12,692 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: stop_status
162
+ 2021-07-15 02:13:20,785 DEBUG SenderThread:641950 [sender.py:send():179] send: stats
163
+ 2021-07-15 02:13:27,826 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: stop_status
164
+ 2021-07-15 02:13:27,826 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: stop_status
165
+ 2021-07-15 02:13:42,962 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: stop_status
166
+ 2021-07-15 02:13:42,963 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: stop_status
167
+ 2021-07-15 02:13:50,860 DEBUG SenderThread:641950 [sender.py:send():179] send: stats
168
+ 2021-07-15 02:13:58,097 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: stop_status
169
+ 2021-07-15 02:13:58,097 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: stop_status
170
+ 2021-07-15 02:14:13,229 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: stop_status
171
+ 2021-07-15 02:14:13,229 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: stop_status
172
+ 2021-07-15 02:14:20,935 DEBUG SenderThread:641950 [sender.py:send():179] send: stats
173
+ 2021-07-15 02:14:28,363 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: stop_status
174
+ 2021-07-15 02:14:28,363 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: stop_status
175
+ 2021-07-15 02:14:43,496 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: stop_status
176
+ 2021-07-15 02:14:43,496 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: stop_status
177
+ 2021-07-15 02:14:46,031 INFO Thread-8 :641950 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_020018-3i0mvo08/files/output.log
178
+ 2021-07-15 02:14:47,302 DEBUG SenderThread:641950 [sender.py:send():179] send: telemetry
179
+ 2021-07-15 02:14:47,303 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: poll_exit
180
+ 2021-07-15 02:14:47,303 DEBUG SenderThread:641950 [sender.py:send():179] send: exit
181
+ 2021-07-15 02:14:47,303 INFO SenderThread:641950 [sender.py:send_exit():287] handling exit code: 1
182
+ 2021-07-15 02:14:47,304 INFO SenderThread:641950 [sender.py:send_exit():295] send defer
183
+ 2021-07-15 02:14:47,305 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: poll_exit
184
+ 2021-07-15 02:14:47,305 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: defer
185
+ 2021-07-15 02:14:47,306 INFO HandlerThread:641950 [handler.py:handle_request_defer():141] handle defer: 0
186
+ 2021-07-15 02:14:47,306 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: defer
187
+ 2021-07-15 02:14:47,306 INFO SenderThread:641950 [sender.py:send_request_defer():304] handle sender defer: 0
188
+ 2021-07-15 02:14:47,306 INFO SenderThread:641950 [sender.py:transition_state():308] send defer: 1
189
+ 2021-07-15 02:14:47,306 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: defer
190
+ 2021-07-15 02:14:47,306 INFO HandlerThread:641950 [handler.py:handle_request_defer():141] handle defer: 1
191
+ 2021-07-15 02:14:47,401 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: defer
192
+ 2021-07-15 02:14:47,401 INFO SenderThread:641950 [sender.py:send_request_defer():304] handle sender defer: 1
193
+ 2021-07-15 02:14:47,401 INFO SenderThread:641950 [sender.py:transition_state():308] send defer: 2
194
+ 2021-07-15 02:14:47,401 DEBUG SenderThread:641950 [sender.py:send():179] send: stats
195
+ 2021-07-15 02:14:47,402 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: defer
196
+ 2021-07-15 02:14:47,402 INFO HandlerThread:641950 [handler.py:handle_request_defer():141] handle defer: 2
197
+ 2021-07-15 02:14:47,402 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: defer
198
+ 2021-07-15 02:14:47,402 INFO SenderThread:641950 [sender.py:send_request_defer():304] handle sender defer: 2
199
+ 2021-07-15 02:14:47,402 INFO SenderThread:641950 [sender.py:transition_state():308] send defer: 3
200
+ 2021-07-15 02:14:47,403 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: defer
201
+ 2021-07-15 02:14:47,403 INFO HandlerThread:641950 [handler.py:handle_request_defer():141] handle defer: 3
202
+ 2021-07-15 02:14:47,403 DEBUG SenderThread:641950 [sender.py:send():179] send: summary
203
+ 2021-07-15 02:14:47,403 INFO SenderThread:641950 [sender.py:_save_file():841] saving file wandb-summary.json with policy end
204
+ 2021-07-15 02:14:47,404 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: defer
205
+ 2021-07-15 02:14:47,404 INFO SenderThread:641950 [sender.py:send_request_defer():304] handle sender defer: 3
206
+ 2021-07-15 02:14:47,404 INFO SenderThread:641950 [sender.py:transition_state():308] send defer: 4
207
+ 2021-07-15 02:14:47,404 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: defer
208
+ 2021-07-15 02:14:47,404 INFO HandlerThread:641950 [handler.py:handle_request_defer():141] handle defer: 4
209
+ 2021-07-15 02:14:47,404 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: defer
210
+ 2021-07-15 02:14:47,404 INFO SenderThread:641950 [sender.py:send_request_defer():304] handle sender defer: 4
211
+ 2021-07-15 02:14:47,409 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: poll_exit
212
+ 2021-07-15 02:14:47,585 INFO SenderThread:641950 [sender.py:transition_state():308] send defer: 5
213
+ 2021-07-15 02:14:47,586 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: poll_exit
214
+ 2021-07-15 02:14:47,586 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: defer
215
+ 2021-07-15 02:14:47,586 INFO HandlerThread:641950 [handler.py:handle_request_defer():141] handle defer: 5
216
+ 2021-07-15 02:14:47,586 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: defer
217
+ 2021-07-15 02:14:47,586 INFO SenderThread:641950 [sender.py:send_request_defer():304] handle sender defer: 5
218
+ 2021-07-15 02:14:47,586 INFO SenderThread:641950 [dir_watcher.py:finish():282] shutting down directory watcher
219
+ 2021-07-15 02:14:47,688 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: poll_exit
220
+ 2021-07-15 02:14:48,032 INFO Thread-8 :641950 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_020018-3i0mvo08/files/config.yaml
221
+ 2021-07-15 02:14:48,033 INFO SenderThread:641950 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_020018-3i0mvo08/files/wandb-summary.json
222
+ 2021-07-15 02:14:48,033 INFO SenderThread:641950 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_020018-3i0mvo08/files/output.log
223
+ 2021-07-15 02:14:48,033 INFO SenderThread:641950 [dir_watcher.py:finish():312] scan: /home/dat/pino-roberta-base/wandb/run-20210715_020018-3i0mvo08/files
224
+ 2021-07-15 02:14:48,033 INFO SenderThread:641950 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_020018-3i0mvo08/files/requirements.txt requirements.txt
225
+ 2021-07-15 02:14:48,034 INFO SenderThread:641950 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_020018-3i0mvo08/files/output.log output.log
226
+ 2021-07-15 02:14:48,034 INFO SenderThread:641950 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_020018-3i0mvo08/files/wandb-metadata.json wandb-metadata.json
227
+ 2021-07-15 02:14:48,034 INFO SenderThread:641950 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_020018-3i0mvo08/files/config.yaml config.yaml
228
+ 2021-07-15 02:14:48,034 INFO SenderThread:641950 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_020018-3i0mvo08/files/wandb-summary.json wandb-summary.json
229
+ 2021-07-15 02:14:48,034 INFO SenderThread:641950 [sender.py:transition_state():308] send defer: 6
230
+ 2021-07-15 02:14:48,034 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: poll_exit
231
+ 2021-07-15 02:14:48,035 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: defer
232
+ 2021-07-15 02:14:48,035 INFO HandlerThread:641950 [handler.py:handle_request_defer():141] handle defer: 6
233
+ 2021-07-15 02:14:48,036 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: defer
234
+ 2021-07-15 02:14:48,036 INFO SenderThread:641950 [sender.py:send_request_defer():304] handle sender defer: 6
235
+ 2021-07-15 02:14:48,036 INFO SenderThread:641950 [file_pusher.py:finish():177] shutting down file pusher
236
+ 2021-07-15 02:14:48,137 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: poll_exit
237
+ 2021-07-15 02:14:48,137 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: poll_exit
238
+ 2021-07-15 02:14:48,239 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: poll_exit
239
+ 2021-07-15 02:14:48,240 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: poll_exit
240
+ 2021-07-15 02:14:48,342 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: poll_exit
241
+ 2021-07-15 02:14:48,342 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: poll_exit
242
+ 2021-07-15 02:14:48,444 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: poll_exit
243
+ 2021-07-15 02:14:48,444 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: poll_exit
244
+ 2021-07-15 02:14:48,475 INFO Thread-15 :641950 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210715_020018-3i0mvo08/files/wandb-summary.json
245
+ 2021-07-15 02:14:48,479 INFO Thread-13 :641950 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210715_020018-3i0mvo08/files/output.log
246
+ 2021-07-15 02:14:48,493 INFO Thread-14 :641950 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210715_020018-3i0mvo08/files/config.yaml
247
+ 2021-07-15 02:14:48,547 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: poll_exit
248
+ 2021-07-15 02:14:48,548 INFO Thread-12 :641950 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210715_020018-3i0mvo08/files/requirements.txt
249
+ 2021-07-15 02:14:48,548 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: poll_exit
250
+ 2021-07-15 02:14:48,650 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: poll_exit
251
+ 2021-07-15 02:14:48,650 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: poll_exit
252
+ 2021-07-15 02:14:48,749 INFO Thread-7 :641950 [sender.py:transition_state():308] send defer: 7
253
+ 2021-07-15 02:14:48,749 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: defer
254
+ 2021-07-15 02:14:48,749 INFO HandlerThread:641950 [handler.py:handle_request_defer():141] handle defer: 7
255
+ 2021-07-15 02:14:48,749 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: defer
256
+ 2021-07-15 02:14:48,750 INFO SenderThread:641950 [sender.py:send_request_defer():304] handle sender defer: 7
257
+ 2021-07-15 02:14:48,752 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: poll_exit
258
+ 2021-07-15 02:14:49,034 INFO SenderThread:641950 [sender.py:transition_state():308] send defer: 8
259
+ 2021-07-15 02:14:49,034 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: poll_exit
260
+ 2021-07-15 02:14:49,035 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: defer
261
+ 2021-07-15 02:14:49,035 INFO HandlerThread:641950 [handler.py:handle_request_defer():141] handle defer: 8
262
+ 2021-07-15 02:14:49,035 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: defer
263
+ 2021-07-15 02:14:49,035 INFO SenderThread:641950 [sender.py:send_request_defer():304] handle sender defer: 8
264
+ 2021-07-15 02:14:49,035 INFO SenderThread:641950 [sender.py:transition_state():308] send defer: 9
265
+ 2021-07-15 02:14:49,036 DEBUG SenderThread:641950 [sender.py:send():179] send: final
266
+ 2021-07-15 02:14:49,036 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: defer
267
+ 2021-07-15 02:14:49,036 INFO HandlerThread:641950 [handler.py:handle_request_defer():141] handle defer: 9
268
+ 2021-07-15 02:14:49,036 DEBUG SenderThread:641950 [sender.py:send():179] send: footer
269
+ 2021-07-15 02:14:49,036 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: defer
270
+ 2021-07-15 02:14:49,036 INFO SenderThread:641950 [sender.py:send_request_defer():304] handle sender defer: 9
271
+ 2021-07-15 02:14:49,137 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: poll_exit
272
+ 2021-07-15 02:14:49,137 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: poll_exit
273
+ 2021-07-15 02:14:49,137 INFO SenderThread:641950 [file_pusher.py:join():182] waiting for file pusher
274
+ 2021-07-15 02:14:49,139 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: get_summary
275
+ 2021-07-15 02:14:49,139 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: sampled_history
276
+ 2021-07-15 02:14:49,140 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: shutdown
277
+ 2021-07-15 02:14:49,140 INFO HandlerThread:641950 [handler.py:finish():638] shutting down handler
278
+ 2021-07-15 02:14:50,037 INFO WriterThread:641950 [datastore.py:close():288] close: /home/dat/pino-roberta-base/wandb/run-20210715_020018-3i0mvo08/run-3i0mvo08.wandb
279
+ 2021-07-15 02:14:50,138 INFO SenderThread:641950 [sender.py:finish():945] shutting down sender
280
+ 2021-07-15 02:14:50,138 INFO SenderThread:641950 [file_pusher.py:finish():177] shutting down file pusher
281
+ 2021-07-15 02:14:50,138 INFO SenderThread:641950 [file_pusher.py:join():182] waiting for file pusher
282
+ 2021-07-15 02:14:50,141 INFO MainThread:641950 [internal.py:handle_exit():78] Internal process exited
wandb/run-20210715_020018-3i0mvo08/logs/debug.log CHANGED
@@ -23,3 +23,97 @@ config: {}
23
  2021-07-15 02:00:20,876 INFO MainThread:640692 [wandb_run.py:_config_callback():872] config_cb None None {'output_dir': './', 'overwrite_output_dir': True, 'do_train': False, 'do_eval': False, 'do_predict': False, 'evaluation_strategy': 'IntervalStrategy.NO', 'prediction_loss_only': False, 'per_device_train_batch_size': 1, 'per_device_eval_batch_size': 1, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'learning_rate': 3e-05, 'weight_decay': 0.0095, 'adam_beta1': 0.9, 'adam_beta2': 0.98, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 5.0, 'max_steps': -1, 'lr_scheduler_type': 'SchedulerType.LINEAR', 'warmup_ratio': 0.0, 'warmup_steps': 10000, 'log_level': -1, 'log_level_replica': -1, 'log_on_each_node': True, 'logging_dir': './runs/Jul15_02-00-11_t1v-n-f5c06ea1-w-0', 'logging_strategy': 'IntervalStrategy.STEPS', 'logging_first_step': False, 'logging_steps': 50, 'save_strategy': 'IntervalStrategy.STEPS', 'save_steps': 20000, 'save_total_limit': 5, 'save_on_each_node': False, 'no_cuda': False, 'seed': 42, 'fp16': False, 'fp16_opt_level': 'O1', 'fp16_backend': 'auto', 'fp16_full_eval': False, 'local_rank': -1, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 20000, 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': './', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'sharded_ddp': [], 'deepspeed': None, 'label_smoothing_factor': 0.0, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'dataloader_pin_memory': True, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'push_to_hub_model_id': '', 'push_to_hub_organization': None, 'push_to_hub_token': None, 'mp_parameters': ''}
24
  2021-07-15 02:00:20,878 INFO MainThread:640692 [wandb_run.py:_config_callback():872] config_cb None None {'model_name_or_path': None, 'model_type': 'big_bird', 'config_name': './', 'tokenizer_name': './', 'cache_dir': None, 'use_fast_tokenizer': True, 'dtype': 'float32'}
25
  2021-07-15 02:00:20,879 INFO MainThread:640692 [wandb_run.py:_config_callback():872] config_cb None None {'dataset_name': None, 'dataset_config_name': None, 'train_ref_file': None, 'validation_ref_file': None, 'overwrite_cache': False, 'validation_split_percentage': 5, 'max_seq_length': 4096, 'preprocessing_num_workers': 96, 'mlm_probability': 0.15, 'pad_to_max_length': False, 'line_by_line': False, 'max_eval_samples': 500}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  2021-07-15 02:00:20,876 INFO MainThread:640692 [wandb_run.py:_config_callback():872] config_cb None None {'output_dir': './', 'overwrite_output_dir': True, 'do_train': False, 'do_eval': False, 'do_predict': False, 'evaluation_strategy': 'IntervalStrategy.NO', 'prediction_loss_only': False, 'per_device_train_batch_size': 1, 'per_device_eval_batch_size': 1, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'learning_rate': 3e-05, 'weight_decay': 0.0095, 'adam_beta1': 0.9, 'adam_beta2': 0.98, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 5.0, 'max_steps': -1, 'lr_scheduler_type': 'SchedulerType.LINEAR', 'warmup_ratio': 0.0, 'warmup_steps': 10000, 'log_level': -1, 'log_level_replica': -1, 'log_on_each_node': True, 'logging_dir': './runs/Jul15_02-00-11_t1v-n-f5c06ea1-w-0', 'logging_strategy': 'IntervalStrategy.STEPS', 'logging_first_step': False, 'logging_steps': 50, 'save_strategy': 'IntervalStrategy.STEPS', 'save_steps': 20000, 'save_total_limit': 5, 'save_on_each_node': False, 'no_cuda': False, 'seed': 42, 'fp16': False, 'fp16_opt_level': 'O1', 'fp16_backend': 'auto', 'fp16_full_eval': False, 'local_rank': -1, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 20000, 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': './', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'sharded_ddp': [], 'deepspeed': None, 'label_smoothing_factor': 0.0, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'dataloader_pin_memory': True, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'push_to_hub_model_id': '', 'push_to_hub_organization': None, 'push_to_hub_token': None, 'mp_parameters': ''}
24
  2021-07-15 02:00:20,878 INFO MainThread:640692 [wandb_run.py:_config_callback():872] config_cb None None {'model_name_or_path': None, 'model_type': 'big_bird', 'config_name': './', 'tokenizer_name': './', 'cache_dir': None, 'use_fast_tokenizer': True, 'dtype': 'float32'}
25
  2021-07-15 02:00:20,879 INFO MainThread:640692 [wandb_run.py:_config_callback():872] config_cb None None {'dataset_name': None, 'dataset_config_name': None, 'train_ref_file': None, 'validation_ref_file': None, 'overwrite_cache': False, 'validation_split_percentage': 5, 'max_seq_length': 4096, 'preprocessing_num_workers': 96, 'mlm_probability': 0.15, 'pad_to_max_length': False, 'line_by_line': False, 'max_eval_samples': 500}
26
+ 2021-07-15 02:14:44,909 INFO MainThread:640692 [wandb_run.py:_atexit_cleanup():1593] got exitcode: 1
27
+ 2021-07-15 02:14:44,910 INFO MainThread:640692 [wandb_run.py:_restore():1565] restore
28
+ 2021-07-15 02:14:47,306 INFO MainThread:640692 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
29
+ wandb_count: 1
30
+ }
31
+ pusher_stats {
32
+ uploaded_bytes: 1375
33
+ total_bytes: 1375
34
+ }
35
+
36
+ 2021-07-15 02:14:47,586 INFO MainThread:640692 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
37
+ wandb_count: 1
38
+ }
39
+ pusher_stats {
40
+ uploaded_bytes: 1375
41
+ total_bytes: 1375
42
+ }
43
+
44
+ 2021-07-15 02:14:48,036 INFO MainThread:640692 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
45
+ wandb_count: 3
46
+ }
47
+ pusher_stats {
48
+ uploaded_bytes: 1375
49
+ total_bytes: 5986
50
+ }
51
+
52
+ 2021-07-15 02:14:48,138 INFO MainThread:640692 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
53
+ wandb_count: 5
54
+ }
55
+ pusher_stats {
56
+ uploaded_bytes: 1375
57
+ total_bytes: 10555
58
+ }
59
+
60
+ 2021-07-15 02:14:48,240 INFO MainThread:640692 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
61
+ wandb_count: 5
62
+ }
63
+ pusher_stats {
64
+ uploaded_bytes: 10555
65
+ total_bytes: 10555
66
+ }
67
+
68
+ 2021-07-15 02:14:48,343 INFO MainThread:640692 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
69
+ wandb_count: 5
70
+ }
71
+ pusher_stats {
72
+ uploaded_bytes: 10555
73
+ total_bytes: 10555
74
+ }
75
+
76
+ 2021-07-15 02:14:48,445 INFO MainThread:640692 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
77
+ wandb_count: 5
78
+ }
79
+ pusher_stats {
80
+ uploaded_bytes: 10555
81
+ total_bytes: 10555
82
+ }
83
+
84
+ 2021-07-15 02:14:48,549 INFO MainThread:640692 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
85
+ wandb_count: 5
86
+ }
87
+ pusher_stats {
88
+ uploaded_bytes: 10555
89
+ total_bytes: 10555
90
+ }
91
+
92
+ 2021-07-15 02:14:48,651 INFO MainThread:640692 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
93
+ wandb_count: 5
94
+ }
95
+ pusher_stats {
96
+ uploaded_bytes: 10555
97
+ total_bytes: 10555
98
+ }
99
+
100
+ 2021-07-15 02:14:49,035 INFO MainThread:640692 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
101
+ wandb_count: 5
102
+ }
103
+ pusher_stats {
104
+ uploaded_bytes: 10555
105
+ total_bytes: 10555
106
+ }
107
+
108
+ 2021-07-15 02:14:49,138 INFO MainThread:640692 [wandb_run.py:_wait_for_finish():1715] got exit ret: done: true
109
+ exit_result {
110
+ }
111
+ file_counts {
112
+ wandb_count: 5
113
+ }
114
+ pusher_stats {
115
+ uploaded_bytes: 10555
116
+ total_bytes: 10555
117
+ }
118
+
119
+ 2021-07-15 02:14:50,442 INFO MainThread:640692 [wandb_run.py:_show_files():1937] logging synced files
wandb/run-20210715_020018-3i0mvo08/run-3i0mvo08.wandb CHANGED
Binary files a/wandb/run-20210715_020018-3i0mvo08/run-3i0mvo08.wandb and b/wandb/run-20210715_020018-3i0mvo08/run-3i0mvo08.wandb differ
 
wandb/run-20210715_021559-38yj0n5v/files/config.yaml ADDED
@@ -0,0 +1,304 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ wandb_version: 1
2
+
3
+ _wandb:
4
+ desc: null
5
+ value:
6
+ cli_version: 0.10.33
7
+ framework: huggingface
8
+ huggingface_version: 4.9.0.dev0
9
+ is_jupyter_run: false
10
+ is_kaggle_kernel: false
11
+ python_version: 3.8.10
12
+ t:
13
+ 1:
14
+ - 3
15
+ - 11
16
+ 2:
17
+ - 3
18
+ - 11
19
+ 4: 3.8.10
20
+ 5: 0.10.33
21
+ 6: 4.9.0.dev0
22
+ 8:
23
+ - 5
24
+ adafactor:
25
+ desc: null
26
+ value: false
27
+ adam_beta1:
28
+ desc: null
29
+ value: 0.9
30
+ adam_beta2:
31
+ desc: null
32
+ value: 0.98
33
+ adam_epsilon:
34
+ desc: null
35
+ value: 1.0e-08
36
+ cache_dir:
37
+ desc: null
38
+ value: null
39
+ config_name:
40
+ desc: null
41
+ value: ./
42
+ dataloader_drop_last:
43
+ desc: null
44
+ value: false
45
+ dataloader_num_workers:
46
+ desc: null
47
+ value: 0
48
+ dataloader_pin_memory:
49
+ desc: null
50
+ value: true
51
+ dataset_config_name:
52
+ desc: null
53
+ value: null
54
+ dataset_name:
55
+ desc: null
56
+ value: null
57
+ ddp_find_unused_parameters:
58
+ desc: null
59
+ value: null
60
+ debug:
61
+ desc: null
62
+ value: []
63
+ deepspeed:
64
+ desc: null
65
+ value: null
66
+ disable_tqdm:
67
+ desc: null
68
+ value: false
69
+ do_eval:
70
+ desc: null
71
+ value: false
72
+ do_predict:
73
+ desc: null
74
+ value: false
75
+ do_train:
76
+ desc: null
77
+ value: false
78
+ dtype:
79
+ desc: null
80
+ value: float32
81
+ eval_accumulation_steps:
82
+ desc: null
83
+ value: null
84
+ eval_steps:
85
+ desc: null
86
+ value: 20000
87
+ evaluation_strategy:
88
+ desc: null
89
+ value: IntervalStrategy.NO
90
+ fp16:
91
+ desc: null
92
+ value: false
93
+ fp16_backend:
94
+ desc: null
95
+ value: auto
96
+ fp16_full_eval:
97
+ desc: null
98
+ value: false
99
+ fp16_opt_level:
100
+ desc: null
101
+ value: O1
102
+ gradient_accumulation_steps:
103
+ desc: null
104
+ value: 1
105
+ greater_is_better:
106
+ desc: null
107
+ value: null
108
+ group_by_length:
109
+ desc: null
110
+ value: false
111
+ ignore_data_skip:
112
+ desc: null
113
+ value: false
114
+ label_names:
115
+ desc: null
116
+ value: null
117
+ label_smoothing_factor:
118
+ desc: null
119
+ value: 0.0
120
+ learning_rate:
121
+ desc: null
122
+ value: 3.0e-05
123
+ length_column_name:
124
+ desc: null
125
+ value: length
126
+ line_by_line:
127
+ desc: null
128
+ value: false
129
+ load_best_model_at_end:
130
+ desc: null
131
+ value: false
132
+ local_rank:
133
+ desc: null
134
+ value: -1
135
+ log_level:
136
+ desc: null
137
+ value: -1
138
+ log_level_replica:
139
+ desc: null
140
+ value: -1
141
+ log_on_each_node:
142
+ desc: null
143
+ value: true
144
+ logging_dir:
145
+ desc: null
146
+ value: ./runs/Jul15_02-15-50_t1v-n-f5c06ea1-w-0
147
+ logging_first_step:
148
+ desc: null
149
+ value: false
150
+ logging_steps:
151
+ desc: null
152
+ value: 50
153
+ logging_strategy:
154
+ desc: null
155
+ value: IntervalStrategy.STEPS
156
+ lr_scheduler_type:
157
+ desc: null
158
+ value: SchedulerType.LINEAR
159
+ max_eval_samples:
160
+ desc: null
161
+ value: 500
162
+ max_grad_norm:
163
+ desc: null
164
+ value: 1.0
165
+ max_seq_length:
166
+ desc: null
167
+ value: 4096
168
+ max_steps:
169
+ desc: null
170
+ value: -1
171
+ metric_for_best_model:
172
+ desc: null
173
+ value: null
174
+ mlm_probability:
175
+ desc: null
176
+ value: 0.15
177
+ model_name_or_path:
178
+ desc: null
179
+ value: null
180
+ model_type:
181
+ desc: null
182
+ value: big_bird
183
+ mp_parameters:
184
+ desc: null
185
+ value: ''
186
+ no_cuda:
187
+ desc: null
188
+ value: false
189
+ num_train_epochs:
190
+ desc: null
191
+ value: 5.0
192
+ output_dir:
193
+ desc: null
194
+ value: ./
195
+ overwrite_cache:
196
+ desc: null
197
+ value: false
198
+ overwrite_output_dir:
199
+ desc: null
200
+ value: true
201
+ pad_to_max_length:
202
+ desc: null
203
+ value: false
204
+ past_index:
205
+ desc: null
206
+ value: -1
207
+ per_device_eval_batch_size:
208
+ desc: null
209
+ value: 1
210
+ per_device_train_batch_size:
211
+ desc: null
212
+ value: 1
213
+ per_gpu_eval_batch_size:
214
+ desc: null
215
+ value: null
216
+ per_gpu_train_batch_size:
217
+ desc: null
218
+ value: null
219
+ prediction_loss_only:
220
+ desc: null
221
+ value: false
222
+ preprocessing_num_workers:
223
+ desc: null
224
+ value: 96
225
+ push_to_hub:
226
+ desc: null
227
+ value: true
228
+ push_to_hub_model_id:
229
+ desc: null
230
+ value: ''
231
+ push_to_hub_organization:
232
+ desc: null
233
+ value: null
234
+ push_to_hub_token:
235
+ desc: null
236
+ value: null
237
+ remove_unused_columns:
238
+ desc: null
239
+ value: true
240
+ report_to:
241
+ desc: null
242
+ value:
243
+ - tensorboard
244
+ - wandb
245
+ resume_from_checkpoint:
246
+ desc: null
247
+ value: null
248
+ run_name:
249
+ desc: null
250
+ value: ./
251
+ save_on_each_node:
252
+ desc: null
253
+ value: false
254
+ save_steps:
255
+ desc: null
256
+ value: 20000
257
+ save_strategy:
258
+ desc: null
259
+ value: IntervalStrategy.STEPS
260
+ save_total_limit:
261
+ desc: null
262
+ value: 5
263
+ seed:
264
+ desc: null
265
+ value: 42
266
+ sharded_ddp:
267
+ desc: null
268
+ value: []
269
+ skip_memory_metrics:
270
+ desc: null
271
+ value: true
272
+ tokenizer_name:
273
+ desc: null
274
+ value: ./
275
+ tpu_metrics_debug:
276
+ desc: null
277
+ value: false
278
+ tpu_num_cores:
279
+ desc: null
280
+ value: null
281
+ train_ref_file:
282
+ desc: null
283
+ value: null
284
+ use_fast_tokenizer:
285
+ desc: null
286
+ value: true
287
+ use_legacy_prediction_loop:
288
+ desc: null
289
+ value: false
290
+ validation_ref_file:
291
+ desc: null
292
+ value: null
293
+ validation_split_percentage:
294
+ desc: null
295
+ value: 5
296
+ warmup_ratio:
297
+ desc: null
298
+ value: 0.0
299
+ warmup_steps:
300
+ desc: null
301
+ value: 10000
302
+ weight_decay:
303
+ desc: null
304
+ value: 0.0095
wandb/run-20210715_021559-38yj0n5v/files/output.log ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /home/dat/pino/lib/python3.8/site-packages/jax/lib/xla_bridge.py:386: UserWarning: jax.host_count has been renamed to jax.process_count. This alias will eventually be removed; please update your code.
2
+ warnings.warn(
3
+ /home/dat/pino/lib/python3.8/site-packages/jax/lib/xla_bridge.py:373: UserWarning: jax.host_id has been renamed to jax.process_index. This alias will eventually be removed; please update your code.
4
+ warnings.warn(
5
+ Epoch ... (1/5): 0%| | 0/5 [00:00<?, ?it/s]
6
+ Training...: 0%| | 0/907114 [02:05<?, ?it/s]
7
+ Epoch ... (1/5): 0%| | 0/5 [15:48<?, ?it/s]
8
+ Traceback (most recent call last):
9
+ File "./run_mlm_flax_no_accum.py", line 699, in <module>
10
+ state, train_metric, dropout_rngs = p_train_step(state, model_inputs, dropout_rngs)
11
+ File "/home/dat/pino/lib/python3.8/site-packages/jax/_src/traceback_util.py", line 183, in reraise_with_filtered_traceback
12
+ return fun(*args, **kwargs)
13
+ File "/home/dat/pino/lib/python3.8/site-packages/jax/_src/api.py", line 1669, in f_pmapped
14
+ out = pxla.xla_pmap(
15
+ File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 1620, in bind
16
+ return call_bind(self, fun, *args, **params)
17
+ File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 1551, in call_bind
18
+ outs = primitive.process(top_trace, fun, tracers, params)
19
+ File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 1623, in process
20
+ return trace.process_map(self, fun, tracers, params)
21
+ File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 606, in process_call
22
+ return primitive.impl(f, *tracers, **params)
23
+ File "/home/dat/pino/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 637, in xla_pmap_impl
24
+ return compiled_fun(*args)
25
+ File "/home/dat/pino/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 1159, in execute_replicated
26
+ out_bufs = compiled.execute_sharded_on_local_devices(input_bufs)
27
+ jax._src.traceback_util.UnfilteredStackTrace: RuntimeError: Resource exhausted: Attempting to reserve 7.59G at the bottom of memory. That was not possible. There are 188.47M free, 0B reserved, and 6.75M reservable.: while running replica 0 and partition 0 of a replicated computation (other replicas may have failed as well).
28
+ The stack trace below excludes JAX-internal frames.
29
+ The preceding is the original exception that occurred, unmodified.
30
+ --------------------
31
+ The above exception was the direct cause of the following exception:
32
+ Traceback (most recent call last):
33
+ File "./run_mlm_flax_no_accum.py", line 699, in <module>
34
+ state, train_metric, dropout_rngs = p_train_step(state, model_inputs, dropout_rngs)
35
+ File "/home/dat/pino/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 1159, in execute_replicated
36
+ out_bufs = compiled.execute_sharded_on_local_devices(input_bufs)
37
+ RuntimeError: Resource exhausted: Attempting to reserve 7.59G at the bottom of memory. That was not possible. There are 188.47M free, 0B reserved, and 6.75M reservable.: while running replica 0 and partition 0 of a replicated computation (other replicas may have failed as well).
wandb/run-20210715_021559-38yj0n5v/files/requirements.txt ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ absl-py==0.13.0
2
+ aiohttp==3.7.4.post0
3
+ astunparse==1.6.3
4
+ async-timeout==3.0.1
5
+ attrs==21.2.0
6
+ cachetools==4.2.2
7
+ certifi==2021.5.30
8
+ chardet==4.0.0
9
+ charset-normalizer==2.0.1
10
+ chex==0.0.8
11
+ click==8.0.1
12
+ configparser==5.0.2
13
+ cycler==0.10.0
14
+ datasets==1.9.1.dev0
15
+ dill==0.3.4
16
+ dm-tree==0.1.6
17
+ docker-pycreds==0.4.0
18
+ filelock==3.0.12
19
+ flatbuffers==1.12
20
+ flax==0.3.4
21
+ fsspec==2021.7.0
22
+ gast==0.4.0
23
+ gitdb==4.0.7
24
+ gitpython==3.1.18
25
+ google-auth-oauthlib==0.4.4
26
+ google-auth==1.32.1
27
+ google-pasta==0.2.0
28
+ grpcio==1.34.1
29
+ h5py==3.1.0
30
+ huggingface-hub==0.0.12
31
+ idna==3.2
32
+ install==1.3.4
33
+ jax==0.2.17
34
+ jaxlib==0.1.68
35
+ joblib==1.0.1
36
+ keras-nightly==2.5.0.dev2021032900
37
+ keras-preprocessing==1.1.2
38
+ kiwisolver==1.3.1
39
+ libtpu-nightly==0.1.dev20210615
40
+ markdown==3.3.4
41
+ matplotlib==3.4.2
42
+ msgpack==1.0.2
43
+ multidict==5.1.0
44
+ multiprocess==0.70.12.2
45
+ numpy==1.19.5
46
+ oauthlib==3.1.1
47
+ opt-einsum==3.3.0
48
+ optax==0.0.9
49
+ packaging==21.0
50
+ pandas==1.3.0
51
+ pathtools==0.1.2
52
+ pillow==8.3.1
53
+ pip==20.0.2
54
+ pkg-resources==0.0.0
55
+ promise==2.3
56
+ protobuf==3.17.3
57
+ psutil==5.8.0
58
+ pyarrow==4.0.1
59
+ pyasn1-modules==0.2.8
60
+ pyasn1==0.4.8
61
+ pyparsing==2.4.7
62
+ python-dateutil==2.8.1
63
+ pytz==2021.1
64
+ pyyaml==5.4.1
65
+ regex==2021.7.6
66
+ requests-oauthlib==1.3.0
67
+ requests==2.26.0
68
+ rsa==4.7.2
69
+ sacremoses==0.0.45
70
+ scipy==1.7.0
71
+ sentry-sdk==1.3.0
72
+ setuptools==44.0.0
73
+ shortuuid==1.0.1
74
+ six==1.15.0
75
+ smmap==4.0.0
76
+ subprocess32==3.5.4
77
+ tensorboard-data-server==0.6.1
78
+ tensorboard-plugin-wit==1.8.0
79
+ tensorboard==2.5.0
80
+ tensorflow-estimator==2.5.0
81
+ tensorflow==2.5.0
82
+ termcolor==1.1.0
83
+ tokenizers==0.10.3
84
+ toolz==0.11.1
85
+ tqdm==4.61.2
86
+ transformers==4.9.0.dev0
87
+ typing-extensions==3.7.4.3
88
+ urllib3==1.26.6
89
+ wandb==0.10.33
90
+ werkzeug==2.0.1
91
+ wheel==0.36.2
92
+ wrapt==1.12.1
93
+ xxhash==2.0.2
94
+ yarl==1.6.3
wandb/run-20210715_021559-38yj0n5v/files/wandb-metadata.json ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-5.4.0-1043-gcp-x86_64-with-glibc2.29",
3
+ "python": "3.8.10",
4
+ "heartbeatAt": "2021-07-15T02:16:01.117383",
5
+ "startedAt": "2021-07-15T02:15:59.045700",
6
+ "docker": null,
7
+ "cpu_count": 96,
8
+ "cuda": null,
9
+ "args": [
10
+ "--push_to_hub",
11
+ "--output_dir=./",
12
+ "--model_type=big_bird",
13
+ "--config_name=./",
14
+ "--tokenizer_name=./",
15
+ "--max_seq_length=4096",
16
+ "--weight_decay=0.0095",
17
+ "--warmup_steps=10000",
18
+ "--overwrite_output_dir",
19
+ "--adam_beta1=0.9",
20
+ "--adam_beta2=0.98",
21
+ "--logging_steps=50",
22
+ "--eval_steps=20000",
23
+ "--num_train_epochs=5",
24
+ "--preprocessing_num_workers=96",
25
+ "--save_steps=20000",
26
+ "--learning_rate=3e-5",
27
+ "--per_device_train_batch_size=1",
28
+ "--per_device_eval_batch_size=1",
29
+ "--save_total_limit=5",
30
+ "--max_eval_samples=500"
31
+ ],
32
+ "state": "running",
33
+ "program": "./run_mlm_flax_no_accum.py",
34
+ "codePath": "run_mlm_flax_no_accum.py",
35
+ "git": {
36
+ "remote": "https://huggingface.co/flax-community/pino-roberta-base",
37
+ "commit": "87e02e7ff8fbaea90c8c4ad1c984f83742432303"
38
+ },
39
+ "email": null,
40
+ "root": "/home/dat/pino-roberta-base",
41
+ "host": "t1v-n-f5c06ea1-w-0",
42
+ "username": "dat",
43
+ "executable": "/home/dat/pino/bin/python"
44
+ }
wandb/run-20210715_021559-38yj0n5v/files/wandb-summary.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {}
wandb/run-20210715_021559-38yj0n5v/logs/debug-internal.log ADDED
@@ -0,0 +1,298 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2021-07-15 02:15:59,792 INFO MainThread:644701 [internal.py:wandb_internal():88] W&B internal server running at pid: 644701, started at: 2021-07-15 02:15:59.792106
2
+ 2021-07-15 02:15:59,795 DEBUG SenderThread:644701 [sender.py:send():179] send: header
3
+ 2021-07-15 02:15:59,795 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: check_version
4
+ 2021-07-15 02:15:59,795 INFO WriterThread:644701 [datastore.py:open_for_write():80] open: /home/dat/pino-roberta-base/wandb/run-20210715_021559-38yj0n5v/run-38yj0n5v.wandb
5
+ 2021-07-15 02:15:59,796 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: check_version
6
+ 2021-07-15 02:15:59,833 DEBUG SenderThread:644701 [sender.py:send():179] send: run
7
+ 2021-07-15 02:16:00,007 INFO SenderThread:644701 [dir_watcher.py:__init__():168] watching files in: /home/dat/pino-roberta-base/wandb/run-20210715_021559-38yj0n5v/files
8
+ 2021-07-15 02:16:00,007 INFO SenderThread:644701 [sender.py:_start_run_threads():716] run started: 38yj0n5v with start time 1626315359
9
+ 2021-07-15 02:16:00,007 DEBUG SenderThread:644701 [sender.py:send():179] send: summary
10
+ 2021-07-15 02:16:00,008 INFO SenderThread:644701 [sender.py:_save_file():841] saving file wandb-summary.json with policy end
11
+ 2021-07-15 02:16:00,008 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: run_start
12
+ 2021-07-15 02:16:01,010 INFO Thread-8 :644701 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210715_021559-38yj0n5v/files/wandb-summary.json
13
+ 2021-07-15 02:16:01,117 DEBUG HandlerThread:644701 [meta.py:__init__():39] meta init
14
+ 2021-07-15 02:16:01,117 DEBUG HandlerThread:644701 [meta.py:__init__():53] meta init done
15
+ 2021-07-15 02:16:01,117 DEBUG HandlerThread:644701 [meta.py:probe():210] probe
16
+ 2021-07-15 02:16:01,118 DEBUG HandlerThread:644701 [meta.py:_setup_git():200] setup git
17
+ 2021-07-15 02:16:01,149 DEBUG HandlerThread:644701 [meta.py:_setup_git():207] setup git done
18
+ 2021-07-15 02:16:01,150 DEBUG HandlerThread:644701 [meta.py:_save_pip():57] save pip
19
+ 2021-07-15 02:16:01,150 DEBUG HandlerThread:644701 [meta.py:_save_pip():71] save pip done
20
+ 2021-07-15 02:16:01,150 DEBUG HandlerThread:644701 [meta.py:probe():252] probe done
21
+ 2021-07-15 02:16:01,154 DEBUG SenderThread:644701 [sender.py:send():179] send: files
22
+ 2021-07-15 02:16:01,154 INFO SenderThread:644701 [sender.py:_save_file():841] saving file wandb-metadata.json with policy now
23
+ 2021-07-15 02:16:01,160 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status
24
+ 2021-07-15 02:16:01,161 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status
25
+ 2021-07-15 02:16:01,290 DEBUG SenderThread:644701 [sender.py:send():179] send: config
26
+ 2021-07-15 02:16:01,291 DEBUG SenderThread:644701 [sender.py:send():179] send: config
27
+ 2021-07-15 02:16:01,291 DEBUG SenderThread:644701 [sender.py:send():179] send: config
28
+ 2021-07-15 02:16:01,718 INFO Thread-11 :644701 [upload_job.py:push():137] Uploaded file /tmp/tmp__ipqk3vwandb/1qcixa2k-wandb-metadata.json
29
+ 2021-07-15 02:16:02,009 INFO Thread-8 :644701 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210715_021559-38yj0n5v/files/output.log
30
+ 2021-07-15 02:16:02,009 INFO Thread-8 :644701 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210715_021559-38yj0n5v/files/requirements.txt
31
+ 2021-07-15 02:16:02,009 INFO Thread-8 :644701 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210715_021559-38yj0n5v/files/wandb-metadata.json
32
+ 2021-07-15 02:16:16,015 INFO Thread-8 :644701 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_021559-38yj0n5v/files/output.log
33
+ 2021-07-15 02:16:16,292 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status
34
+ 2021-07-15 02:16:16,293 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status
35
+ 2021-07-15 02:16:29,202 DEBUG SenderThread:644701 [sender.py:send():179] send: stats
36
+ 2021-07-15 02:16:31,021 INFO Thread-8 :644701 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_021559-38yj0n5v/files/config.yaml
37
+ 2021-07-15 02:16:31,425 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status
38
+ 2021-07-15 02:16:31,425 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status
39
+ 2021-07-15 02:16:46,555 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status
40
+ 2021-07-15 02:16:46,555 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status
41
+ 2021-07-15 02:16:59,284 DEBUG SenderThread:644701 [sender.py:send():179] send: stats
42
+ 2021-07-15 02:17:01,687 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status
43
+ 2021-07-15 02:17:01,687 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status
44
+ 2021-07-15 02:17:16,819 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status
45
+ 2021-07-15 02:17:16,820 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status
46
+ 2021-07-15 02:17:29,359 DEBUG SenderThread:644701 [sender.py:send():179] send: stats
47
+ 2021-07-15 02:17:31,951 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status
48
+ 2021-07-15 02:17:31,951 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status
49
+ 2021-07-15 02:17:47,083 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status
50
+ 2021-07-15 02:17:47,083 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status
51
+ 2021-07-15 02:17:59,439 DEBUG SenderThread:644701 [sender.py:send():179] send: stats
52
+ 2021-07-15 02:18:02,215 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status
53
+ 2021-07-15 02:18:02,215 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status
54
+ 2021-07-15 02:18:17,355 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status
55
+ 2021-07-15 02:18:17,356 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status
56
+ 2021-07-15 02:18:29,519 DEBUG SenderThread:644701 [sender.py:send():179] send: stats
57
+ 2021-07-15 02:18:32,491 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status
58
+ 2021-07-15 02:18:32,492 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status
59
+ 2021-07-15 02:18:47,624 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status
60
+ 2021-07-15 02:18:47,624 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status
61
+ 2021-07-15 02:18:59,595 DEBUG SenderThread:644701 [sender.py:send():179] send: stats
62
+ 2021-07-15 02:19:02,759 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status
63
+ 2021-07-15 02:19:02,759 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status
64
+ 2021-07-15 02:19:17,890 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status
65
+ 2021-07-15 02:19:17,890 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status
66
+ 2021-07-15 02:19:29,672 DEBUG SenderThread:644701 [sender.py:send():179] send: stats
67
+ 2021-07-15 02:19:33,021 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status
68
+ 2021-07-15 02:19:33,022 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status
69
+ 2021-07-15 02:19:48,153 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status
70
+ 2021-07-15 02:19:48,154 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status
71
+ 2021-07-15 02:19:59,751 DEBUG SenderThread:644701 [sender.py:send():179] send: stats
72
+ 2021-07-15 02:20:03,293 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status
73
+ 2021-07-15 02:20:03,294 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status
74
+ 2021-07-15 02:20:18,425 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status
75
+ 2021-07-15 02:20:18,426 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status
76
+ 2021-07-15 02:20:29,828 DEBUG SenderThread:644701 [sender.py:send():179] send: stats
77
+ 2021-07-15 02:20:33,560 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status
78
+ 2021-07-15 02:20:33,560 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status
79
+ 2021-07-15 02:20:48,726 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status
80
+ 2021-07-15 02:20:48,726 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status
81
+ 2021-07-15 02:20:59,906 DEBUG SenderThread:644701 [sender.py:send():179] send: stats
82
+ 2021-07-15 02:21:03,857 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status
83
+ 2021-07-15 02:21:03,858 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status
84
+ 2021-07-15 02:21:18,990 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status
85
+ 2021-07-15 02:21:18,991 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status
86
+ 2021-07-15 02:21:29,980 DEBUG SenderThread:644701 [sender.py:send():179] send: stats
87
+ 2021-07-15 02:21:34,126 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status
88
+ 2021-07-15 02:21:34,126 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status
89
+ 2021-07-15 02:21:49,258 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status
90
+ 2021-07-15 02:21:49,258 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status
91
+ 2021-07-15 02:22:00,053 DEBUG SenderThread:644701 [sender.py:send():179] send: stats
92
+ 2021-07-15 02:22:04,390 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status
93
+ 2021-07-15 02:22:04,391 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status
94
+ 2021-07-15 02:22:19,527 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status
95
+ 2021-07-15 02:22:19,527 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status
96
+ 2021-07-15 02:22:30,130 DEBUG SenderThread:644701 [sender.py:send():179] send: stats
97
+ 2021-07-15 02:22:34,658 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status
98
+ 2021-07-15 02:22:34,658 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status
99
+ 2021-07-15 02:22:49,790 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status
100
+ 2021-07-15 02:22:49,790 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status
101
+ 2021-07-15 02:23:00,206 DEBUG SenderThread:644701 [sender.py:send():179] send: stats
102
+ 2021-07-15 02:23:04,919 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status
103
+ 2021-07-15 02:23:04,920 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status
104
+ 2021-07-15 02:23:20,062 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status
105
+ 2021-07-15 02:23:20,063 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status
106
+ 2021-07-15 02:23:30,267 DEBUG SenderThread:644701 [sender.py:send():179] send: stats
107
+ 2021-07-15 02:23:35,199 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status
108
+ 2021-07-15 02:23:35,199 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status
109
+ 2021-07-15 02:23:50,332 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status
110
+ 2021-07-15 02:23:50,332 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status
111
+ 2021-07-15 02:24:00,346 DEBUG SenderThread:644701 [sender.py:send():179] send: stats
112
+ 2021-07-15 02:24:05,465 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status
113
+ 2021-07-15 02:24:05,466 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status
114
+ 2021-07-15 02:24:20,598 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status
115
+ 2021-07-15 02:24:20,598 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status
116
+ 2021-07-15 02:24:30,424 DEBUG SenderThread:644701 [sender.py:send():179] send: stats
117
+ 2021-07-15 02:24:35,751 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status
118
+ 2021-07-15 02:24:35,751 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status
119
+ 2021-07-15 02:24:50,888 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status
120
+ 2021-07-15 02:24:50,888 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status
121
+ 2021-07-15 02:25:00,500 DEBUG SenderThread:644701 [sender.py:send():179] send: stats
122
+ 2021-07-15 02:25:06,021 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status
123
+ 2021-07-15 02:25:06,022 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status
124
+ 2021-07-15 02:25:21,156 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status
125
+ 2021-07-15 02:25:21,157 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status
126
+ 2021-07-15 02:25:30,575 DEBUG SenderThread:644701 [sender.py:send():179] send: stats
127
+ 2021-07-15 02:25:36,290 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status
128
+ 2021-07-15 02:25:36,291 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status
129
+ 2021-07-15 02:25:51,426 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status
130
+ 2021-07-15 02:25:51,426 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status
131
+ 2021-07-15 02:26:00,654 DEBUG SenderThread:644701 [sender.py:send():179] send: stats
132
+ 2021-07-15 02:26:06,562 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status
133
+ 2021-07-15 02:26:06,562 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status
134
+ 2021-07-15 02:26:21,692 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status
135
+ 2021-07-15 02:26:21,693 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status
136
+ 2021-07-15 02:26:30,729 DEBUG SenderThread:644701 [sender.py:send():179] send: stats
137
+ 2021-07-15 02:26:36,825 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status
138
+ 2021-07-15 02:26:36,825 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status
139
+ 2021-07-15 02:26:51,959 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status
140
+ 2021-07-15 02:26:51,959 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status
141
+ 2021-07-15 02:27:00,798 DEBUG SenderThread:644701 [sender.py:send():179] send: stats
142
+ 2021-07-15 02:27:07,091 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status
143
+ 2021-07-15 02:27:07,091 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status
144
+ 2021-07-15 02:27:22,224 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status
145
+ 2021-07-15 02:27:22,224 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status
146
+ 2021-07-15 02:27:30,870 DEBUG SenderThread:644701 [sender.py:send():179] send: stats
147
+ 2021-07-15 02:27:37,360 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status
148
+ 2021-07-15 02:27:37,360 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status
149
+ 2021-07-15 02:27:52,491 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status
150
+ 2021-07-15 02:27:52,491 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status
151
+ 2021-07-15 02:28:00,938 DEBUG SenderThread:644701 [sender.py:send():179] send: stats
152
+ 2021-07-15 02:28:07,622 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status
153
+ 2021-07-15 02:28:07,622 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status
154
+ 2021-07-15 02:28:22,754 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status
155
+ 2021-07-15 02:28:22,755 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status
156
+ 2021-07-15 02:28:31,010 DEBUG SenderThread:644701 [sender.py:send():179] send: stats
157
+ 2021-07-15 02:28:37,888 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status
158
+ 2021-07-15 02:28:37,888 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status
159
+ 2021-07-15 02:28:53,020 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status
160
+ 2021-07-15 02:28:53,021 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status
161
+ 2021-07-15 02:29:01,085 DEBUG SenderThread:644701 [sender.py:send():179] send: stats
162
+ 2021-07-15 02:29:08,157 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status
163
+ 2021-07-15 02:29:08,157 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status
164
+ 2021-07-15 02:29:23,289 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status
165
+ 2021-07-15 02:29:23,289 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status
166
+ 2021-07-15 02:29:31,158 DEBUG SenderThread:644701 [sender.py:send():179] send: stats
167
+ 2021-07-15 02:29:38,420 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status
168
+ 2021-07-15 02:29:38,420 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status
169
+ 2021-07-15 02:29:53,553 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status
170
+ 2021-07-15 02:29:53,553 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status
171
+ 2021-07-15 02:29:58,389 INFO Thread-8 :644701 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_021559-38yj0n5v/files/output.log
172
+ 2021-07-15 02:30:01,235 DEBUG SenderThread:644701 [sender.py:send():179] send: stats
173
+ 2021-07-15 02:30:08,702 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status
174
+ 2021-07-15 02:30:08,702 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status
175
+ 2021-07-15 02:30:23,843 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status
176
+ 2021-07-15 02:30:23,843 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status
177
+ 2021-07-15 02:30:31,315 DEBUG SenderThread:644701 [sender.py:send():179] send: stats
178
+ 2021-07-15 02:30:38,973 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status
179
+ 2021-07-15 02:30:38,973 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status
180
+ 2021-07-15 02:30:54,105 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status
181
+ 2021-07-15 02:30:54,106 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status
182
+ 2021-07-15 02:31:01,399 DEBUG SenderThread:644701 [sender.py:send():179] send: stats
183
+ 2021-07-15 02:31:09,240 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status
184
+ 2021-07-15 02:31:09,240 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status
185
+ 2021-07-15 02:31:24,379 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status
186
+ 2021-07-15 02:31:24,379 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status
187
+ 2021-07-15 02:31:31,480 DEBUG SenderThread:644701 [sender.py:send():179] send: stats
188
+ 2021-07-15 02:31:39,512 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status
189
+ 2021-07-15 02:31:39,512 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status
190
+ 2021-07-15 02:31:54,644 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status
191
+ 2021-07-15 02:31:54,644 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status
192
+ 2021-07-15 02:32:01,553 DEBUG SenderThread:644701 [sender.py:send():179] send: stats
193
+ 2021-07-15 02:32:04,443 INFO Thread-8 :644701 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_021559-38yj0n5v/files/output.log
194
+ 2021-07-15 02:32:04,474 DEBUG SenderThread:644701 [sender.py:send():179] send: telemetry
195
+ 2021-07-15 02:32:04,474 DEBUG SenderThread:644701 [sender.py:send():179] send: exit
196
+ 2021-07-15 02:32:04,474 INFO SenderThread:644701 [sender.py:send_exit():287] handling exit code: 1
197
+ 2021-07-15 02:32:04,476 INFO SenderThread:644701 [sender.py:send_exit():295] send defer
198
+ 2021-07-15 02:32:04,476 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: poll_exit
199
+ 2021-07-15 02:32:04,477 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: poll_exit
200
+ 2021-07-15 02:32:04,477 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: defer
201
+ 2021-07-15 02:32:04,477 INFO HandlerThread:644701 [handler.py:handle_request_defer():141] handle defer: 0
202
+ 2021-07-15 02:32:04,478 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: defer
203
+ 2021-07-15 02:32:04,478 INFO SenderThread:644701 [sender.py:send_request_defer():304] handle sender defer: 0
204
+ 2021-07-15 02:32:04,478 INFO SenderThread:644701 [sender.py:transition_state():308] send defer: 1
205
+ 2021-07-15 02:32:04,478 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: defer
206
+ 2021-07-15 02:32:04,478 INFO HandlerThread:644701 [handler.py:handle_request_defer():141] handle defer: 1
207
+ 2021-07-15 02:32:04,561 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: defer
208
+ 2021-07-15 02:32:04,561 INFO SenderThread:644701 [sender.py:send_request_defer():304] handle sender defer: 1
209
+ 2021-07-15 02:32:04,562 INFO SenderThread:644701 [sender.py:transition_state():308] send defer: 2
210
+ 2021-07-15 02:32:04,562 DEBUG SenderThread:644701 [sender.py:send():179] send: stats
211
+ 2021-07-15 02:32:04,562 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: defer
212
+ 2021-07-15 02:32:04,562 INFO HandlerThread:644701 [handler.py:handle_request_defer():141] handle defer: 2
213
+ 2021-07-15 02:32:04,562 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: defer
214
+ 2021-07-15 02:32:04,563 INFO SenderThread:644701 [sender.py:send_request_defer():304] handle sender defer: 2
215
+ 2021-07-15 02:32:04,563 INFO SenderThread:644701 [sender.py:transition_state():308] send defer: 3
216
+ 2021-07-15 02:32:04,563 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: defer
217
+ 2021-07-15 02:32:04,563 INFO HandlerThread:644701 [handler.py:handle_request_defer():141] handle defer: 3
218
+ 2021-07-15 02:32:04,563 DEBUG SenderThread:644701 [sender.py:send():179] send: summary
219
+ 2021-07-15 02:32:04,564 INFO SenderThread:644701 [sender.py:_save_file():841] saving file wandb-summary.json with policy end
220
+ 2021-07-15 02:32:04,564 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: defer
221
+ 2021-07-15 02:32:04,564 INFO SenderThread:644701 [sender.py:send_request_defer():304] handle sender defer: 3
222
+ 2021-07-15 02:32:04,564 INFO SenderThread:644701 [sender.py:transition_state():308] send defer: 4
223
+ 2021-07-15 02:32:04,565 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: defer
224
+ 2021-07-15 02:32:04,565 INFO HandlerThread:644701 [handler.py:handle_request_defer():141] handle defer: 4
225
+ 2021-07-15 02:32:04,565 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: defer
226
+ 2021-07-15 02:32:04,565 INFO SenderThread:644701 [sender.py:send_request_defer():304] handle sender defer: 4
227
+ 2021-07-15 02:32:04,580 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: poll_exit
228
+ 2021-07-15 02:32:04,749 INFO SenderThread:644701 [sender.py:transition_state():308] send defer: 5
229
+ 2021-07-15 02:32:04,749 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: poll_exit
230
+ 2021-07-15 02:32:04,749 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: defer
231
+ 2021-07-15 02:32:04,750 INFO HandlerThread:644701 [handler.py:handle_request_defer():141] handle defer: 5
232
+ 2021-07-15 02:32:04,750 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: defer
233
+ 2021-07-15 02:32:04,750 INFO SenderThread:644701 [sender.py:send_request_defer():304] handle sender defer: 5
234
+ 2021-07-15 02:32:04,750 INFO SenderThread:644701 [dir_watcher.py:finish():282] shutting down directory watcher
235
+ 2021-07-15 02:32:04,851 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: poll_exit
236
+ 2021-07-15 02:32:05,444 INFO Thread-8 :644701 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_021559-38yj0n5v/files/output.log
237
+ 2021-07-15 02:32:05,445 INFO SenderThread:644701 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_021559-38yj0n5v/files/wandb-summary.json
238
+ 2021-07-15 02:32:05,445 INFO SenderThread:644701 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_021559-38yj0n5v/files/config.yaml
239
+ 2021-07-15 02:32:05,445 INFO SenderThread:644701 [dir_watcher.py:finish():312] scan: /home/dat/pino-roberta-base/wandb/run-20210715_021559-38yj0n5v/files
240
+ 2021-07-15 02:32:05,445 INFO SenderThread:644701 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_021559-38yj0n5v/files/requirements.txt requirements.txt
241
+ 2021-07-15 02:32:05,446 INFO SenderThread:644701 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_021559-38yj0n5v/files/output.log output.log
242
+ 2021-07-15 02:32:05,446 INFO SenderThread:644701 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_021559-38yj0n5v/files/wandb-metadata.json wandb-metadata.json
243
+ 2021-07-15 02:32:05,446 INFO SenderThread:644701 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_021559-38yj0n5v/files/config.yaml config.yaml
244
+ 2021-07-15 02:32:05,450 INFO SenderThread:644701 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_021559-38yj0n5v/files/wandb-summary.json wandb-summary.json
245
+ 2021-07-15 02:32:05,453 INFO SenderThread:644701 [sender.py:transition_state():308] send defer: 6
246
+ 2021-07-15 02:32:05,453 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: poll_exit
247
+ 2021-07-15 02:32:05,455 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: defer
248
+ 2021-07-15 02:32:05,457 INFO HandlerThread:644701 [handler.py:handle_request_defer():141] handle defer: 6
249
+ 2021-07-15 02:32:05,458 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: defer
250
+ 2021-07-15 02:32:05,461 INFO SenderThread:644701 [sender.py:send_request_defer():304] handle sender defer: 6
251
+ 2021-07-15 02:32:05,461 INFO SenderThread:644701 [file_pusher.py:finish():177] shutting down file pusher
252
+ 2021-07-15 02:32:05,556 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: poll_exit
253
+ 2021-07-15 02:32:05,556 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: poll_exit
254
+ 2021-07-15 02:32:05,658 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: poll_exit
255
+ 2021-07-15 02:32:05,659 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: poll_exit
256
+ 2021-07-15 02:32:05,761 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: poll_exit
257
+ 2021-07-15 02:32:05,761 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: poll_exit
258
+ 2021-07-15 02:32:05,863 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: poll_exit
259
+ 2021-07-15 02:32:05,863 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: poll_exit
260
+ 2021-07-15 02:32:05,888 INFO Thread-15 :644701 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210715_021559-38yj0n5v/files/wandb-summary.json
261
+ 2021-07-15 02:32:05,892 INFO Thread-13 :644701 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210715_021559-38yj0n5v/files/output.log
262
+ 2021-07-15 02:32:05,894 INFO Thread-14 :644701 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210715_021559-38yj0n5v/files/config.yaml
263
+ 2021-07-15 02:32:05,895 INFO Thread-12 :644701 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210715_021559-38yj0n5v/files/requirements.txt
264
+ 2021-07-15 02:32:05,965 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: poll_exit
265
+ 2021-07-15 02:32:05,965 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: poll_exit
266
+ 2021-07-15 02:32:06,067 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: poll_exit
267
+ 2021-07-15 02:32:06,067 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: poll_exit
268
+ 2021-07-15 02:32:06,096 INFO Thread-7 :644701 [sender.py:transition_state():308] send defer: 7
269
+ 2021-07-15 02:32:06,096 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: defer
270
+ 2021-07-15 02:32:06,096 INFO HandlerThread:644701 [handler.py:handle_request_defer():141] handle defer: 7
271
+ 2021-07-15 02:32:06,097 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: defer
272
+ 2021-07-15 02:32:06,097 INFO SenderThread:644701 [sender.py:send_request_defer():304] handle sender defer: 7
273
+ 2021-07-15 02:32:06,169 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: poll_exit
274
+ 2021-07-15 02:32:06,370 INFO SenderThread:644701 [sender.py:transition_state():308] send defer: 8
275
+ 2021-07-15 02:32:06,370 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: poll_exit
276
+ 2021-07-15 02:32:06,371 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: defer
277
+ 2021-07-15 02:32:06,371 INFO HandlerThread:644701 [handler.py:handle_request_defer():141] handle defer: 8
278
+ 2021-07-15 02:32:06,371 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: defer
279
+ 2021-07-15 02:32:06,371 INFO SenderThread:644701 [sender.py:send_request_defer():304] handle sender defer: 8
280
+ 2021-07-15 02:32:06,371 INFO SenderThread:644701 [sender.py:transition_state():308] send defer: 9
281
+ 2021-07-15 02:32:06,372 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: defer
282
+ 2021-07-15 02:32:06,372 INFO HandlerThread:644701 [handler.py:handle_request_defer():141] handle defer: 9
283
+ 2021-07-15 02:32:06,372 DEBUG SenderThread:644701 [sender.py:send():179] send: final
284
+ 2021-07-15 02:32:06,372 DEBUG SenderThread:644701 [sender.py:send():179] send: footer
285
+ 2021-07-15 02:32:06,372 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: defer
286
+ 2021-07-15 02:32:06,372 INFO SenderThread:644701 [sender.py:send_request_defer():304] handle sender defer: 9
287
+ 2021-07-15 02:32:06,472 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: poll_exit
288
+ 2021-07-15 02:32:06,472 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: poll_exit
289
+ 2021-07-15 02:32:06,473 INFO SenderThread:644701 [file_pusher.py:join():182] waiting for file pusher
290
+ 2021-07-15 02:32:06,474 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: get_summary
291
+ 2021-07-15 02:32:06,475 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: sampled_history
292
+ 2021-07-15 02:32:06,475 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: shutdown
293
+ 2021-07-15 02:32:06,475 INFO HandlerThread:644701 [handler.py:finish():638] shutting down handler
294
+ 2021-07-15 02:32:07,372 INFO WriterThread:644701 [datastore.py:close():288] close: /home/dat/pino-roberta-base/wandb/run-20210715_021559-38yj0n5v/run-38yj0n5v.wandb
295
+ 2021-07-15 02:32:07,473 INFO SenderThread:644701 [sender.py:finish():945] shutting down sender
296
+ 2021-07-15 02:32:07,473 INFO SenderThread:644701 [file_pusher.py:finish():177] shutting down file pusher
297
+ 2021-07-15 02:32:07,473 INFO SenderThread:644701 [file_pusher.py:join():182] waiting for file pusher
298
+ 2021-07-15 02:32:07,477 INFO MainThread:644701 [internal.py:handle_exit():78] Internal process exited
wandb/run-20210715_021559-38yj0n5v/logs/debug.log ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2021-07-15 02:15:59,047 INFO MainThread:643445 [wandb_setup.py:_flush():69] setting env: {}
2
+ 2021-07-15 02:15:59,047 INFO MainThread:643445 [wandb_setup.py:_flush():69] setting login settings: {}
3
+ 2021-07-15 02:15:59,047 INFO MainThread:643445 [wandb_init.py:_log_setup():337] Logging user logs to /home/dat/pino-roberta-base/wandb/run-20210715_021559-38yj0n5v/logs/debug.log
4
+ 2021-07-15 02:15:59,047 INFO MainThread:643445 [wandb_init.py:_log_setup():338] Logging internal logs to /home/dat/pino-roberta-base/wandb/run-20210715_021559-38yj0n5v/logs/debug-internal.log
5
+ 2021-07-15 02:15:59,048 INFO MainThread:643445 [wandb_init.py:init():370] calling init triggers
6
+ 2021-07-15 02:15:59,048 INFO MainThread:643445 [wandb_init.py:init():375] wandb.init called with sweep_config: {}
7
+ config: {}
8
+ 2021-07-15 02:15:59,048 INFO MainThread:643445 [wandb_init.py:init():419] starting backend
9
+ 2021-07-15 02:15:59,048 INFO MainThread:643445 [backend.py:_multiprocessing_setup():70] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
10
+ 2021-07-15 02:15:59,097 INFO MainThread:643445 [backend.py:ensure_launched():135] starting backend process...
11
+ 2021-07-15 02:15:59,145 INFO MainThread:643445 [backend.py:ensure_launched():139] started backend process with pid: 644701
12
+ 2021-07-15 02:15:59,147 INFO MainThread:643445 [wandb_init.py:init():424] backend started and connected
13
+ 2021-07-15 02:15:59,150 INFO MainThread:643445 [wandb_init.py:init():472] updated telemetry
14
+ 2021-07-15 02:15:59,151 INFO MainThread:643445 [wandb_init.py:init():491] communicating current version
15
+ 2021-07-15 02:15:59,832 INFO MainThread:643445 [wandb_init.py:init():496] got version response
16
+ 2021-07-15 02:15:59,832 INFO MainThread:643445 [wandb_init.py:init():504] communicating run to backend with 30 second timeout
17
+ 2021-07-15 02:16:00,007 INFO MainThread:643445 [wandb_init.py:init():529] starting run threads in backend
18
+ 2021-07-15 02:16:01,157 INFO MainThread:643445 [wandb_run.py:_console_start():1623] atexit reg
19
+ 2021-07-15 02:16:01,158 INFO MainThread:643445 [wandb_run.py:_redirect():1497] redirect: SettingsConsole.REDIRECT
20
+ 2021-07-15 02:16:01,158 INFO MainThread:643445 [wandb_run.py:_redirect():1502] Redirecting console.
21
+ 2021-07-15 02:16:01,160 INFO MainThread:643445 [wandb_run.py:_redirect():1558] Redirects installed.
22
+ 2021-07-15 02:16:01,160 INFO MainThread:643445 [wandb_init.py:init():554] run started, returning control to user process
23
+ 2021-07-15 02:16:01,168 INFO MainThread:643445 [wandb_run.py:_config_callback():872] config_cb None None {'output_dir': './', 'overwrite_output_dir': True, 'do_train': False, 'do_eval': False, 'do_predict': False, 'evaluation_strategy': 'IntervalStrategy.NO', 'prediction_loss_only': False, 'per_device_train_batch_size': 1, 'per_device_eval_batch_size': 1, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'learning_rate': 3e-05, 'weight_decay': 0.0095, 'adam_beta1': 0.9, 'adam_beta2': 0.98, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 5.0, 'max_steps': -1, 'lr_scheduler_type': 'SchedulerType.LINEAR', 'warmup_ratio': 0.0, 'warmup_steps': 10000, 'log_level': -1, 'log_level_replica': -1, 'log_on_each_node': True, 'logging_dir': './runs/Jul15_02-15-50_t1v-n-f5c06ea1-w-0', 'logging_strategy': 'IntervalStrategy.STEPS', 'logging_first_step': False, 'logging_steps': 50, 'save_strategy': 'IntervalStrategy.STEPS', 'save_steps': 20000, 'save_total_limit': 5, 'save_on_each_node': False, 'no_cuda': False, 'seed': 42, 'fp16': False, 'fp16_opt_level': 'O1', 'fp16_backend': 'auto', 'fp16_full_eval': False, 'local_rank': -1, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 20000, 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': './', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'sharded_ddp': [], 'deepspeed': None, 'label_smoothing_factor': 0.0, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'dataloader_pin_memory': True, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'push_to_hub_model_id': '', 'push_to_hub_organization': None, 'push_to_hub_token': None, 'mp_parameters': ''}
24
+ 2021-07-15 02:16:01,170 INFO MainThread:643445 [wandb_run.py:_config_callback():872] config_cb None None {'model_name_or_path': None, 'model_type': 'big_bird', 'config_name': './', 'tokenizer_name': './', 'cache_dir': None, 'use_fast_tokenizer': True, 'dtype': 'float32'}
25
+ 2021-07-15 02:16:01,171 INFO MainThread:643445 [wandb_run.py:_config_callback():872] config_cb None None {'dataset_name': None, 'dataset_config_name': None, 'train_ref_file': None, 'validation_ref_file': None, 'overwrite_cache': False, 'validation_split_percentage': 5, 'max_seq_length': 4096, 'preprocessing_num_workers': 96, 'mlm_probability': 0.15, 'pad_to_max_length': False, 'line_by_line': False, 'max_eval_samples': 500}
26
+ 2021-07-15 02:32:02,250 INFO MainThread:643445 [wandb_run.py:_atexit_cleanup():1593] got exitcode: 1
27
+ 2021-07-15 02:32:02,251 INFO MainThread:643445 [wandb_run.py:_restore():1565] restore
28
+ 2021-07-15 02:32:04,478 INFO MainThread:643445 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
29
+ wandb_count: 1
30
+ }
31
+ pusher_stats {
32
+ uploaded_bytes: 1375
33
+ total_bytes: 1375
34
+ }
35
+
36
+ 2021-07-15 02:32:04,750 INFO MainThread:643445 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
37
+ wandb_count: 1
38
+ }
39
+ pusher_stats {
40
+ uploaded_bytes: 1375
41
+ total_bytes: 1375
42
+ }
43
+
44
+ 2021-07-15 02:32:05,454 INFO MainThread:643445 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
45
+ wandb_count: 3
46
+ }
47
+ pusher_stats {
48
+ uploaded_bytes: 1375
49
+ total_bytes: 6341
50
+ }
51
+
52
+ 2021-07-15 02:32:05,557 INFO MainThread:643445 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
53
+ wandb_count: 5
54
+ }
55
+ pusher_stats {
56
+ uploaded_bytes: 1375
57
+ total_bytes: 10910
58
+ }
59
+
60
+ 2021-07-15 02:32:05,659 INFO MainThread:643445 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
61
+ wandb_count: 5
62
+ }
63
+ pusher_stats {
64
+ uploaded_bytes: 10910
65
+ total_bytes: 10910
66
+ }
67
+
68
+ 2021-07-15 02:32:05,761 INFO MainThread:643445 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
69
+ wandb_count: 5
70
+ }
71
+ pusher_stats {
72
+ uploaded_bytes: 10910
73
+ total_bytes: 10910
74
+ }
75
+
76
+ 2021-07-15 02:32:05,864 INFO MainThread:643445 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
77
+ wandb_count: 5
78
+ }
79
+ pusher_stats {
80
+ uploaded_bytes: 10910
81
+ total_bytes: 10910
82
+ }
83
+
84
+ 2021-07-15 02:32:05,966 INFO MainThread:643445 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
85
+ wandb_count: 5
86
+ }
87
+ pusher_stats {
88
+ uploaded_bytes: 10910
89
+ total_bytes: 10910
90
+ }
91
+
92
+ 2021-07-15 02:32:06,068 INFO MainThread:643445 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
93
+ wandb_count: 5
94
+ }
95
+ pusher_stats {
96
+ uploaded_bytes: 10910
97
+ total_bytes: 10910
98
+ }
99
+
100
+ 2021-07-15 02:32:06,371 INFO MainThread:643445 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
101
+ wandb_count: 5
102
+ }
103
+ pusher_stats {
104
+ uploaded_bytes: 10910
105
+ total_bytes: 10910
106
+ }
107
+
108
+ 2021-07-15 02:32:06,473 INFO MainThread:643445 [wandb_run.py:_wait_for_finish():1715] got exit ret: done: true
109
+ exit_result {
110
+ }
111
+ file_counts {
112
+ wandb_count: 5
113
+ }
114
+ pusher_stats {
115
+ uploaded_bytes: 10910
116
+ total_bytes: 10910
117
+ }
118
+
119
+ 2021-07-15 02:32:07,796 INFO MainThread:643445 [wandb_run.py:_show_files():1937] logging synced files
wandb/run-20210715_021559-38yj0n5v/run-38yj0n5v.wandb ADDED
Binary file (14.5 kB). View file
 
wandb/run-20210715_023352-28io0kfl/files/config.yaml ADDED
@@ -0,0 +1,304 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ wandb_version: 1
2
+
3
+ _wandb:
4
+ desc: null
5
+ value:
6
+ cli_version: 0.10.33
7
+ framework: huggingface
8
+ huggingface_version: 4.9.0.dev0
9
+ is_jupyter_run: false
10
+ is_kaggle_kernel: false
11
+ python_version: 3.8.10
12
+ t:
13
+ 1:
14
+ - 3
15
+ - 11
16
+ 2:
17
+ - 3
18
+ - 11
19
+ 4: 3.8.10
20
+ 5: 0.10.33
21
+ 6: 4.9.0.dev0
22
+ 8:
23
+ - 5
24
+ adafactor:
25
+ desc: null
26
+ value: false
27
+ adam_beta1:
28
+ desc: null
29
+ value: 0.9
30
+ adam_beta2:
31
+ desc: null
32
+ value: 0.98
33
+ adam_epsilon:
34
+ desc: null
35
+ value: 1.0e-08
36
+ cache_dir:
37
+ desc: null
38
+ value: null
39
+ config_name:
40
+ desc: null
41
+ value: ./
42
+ dataloader_drop_last:
43
+ desc: null
44
+ value: false
45
+ dataloader_num_workers:
46
+ desc: null
47
+ value: 0
48
+ dataloader_pin_memory:
49
+ desc: null
50
+ value: true
51
+ dataset_config_name:
52
+ desc: null
53
+ value: null
54
+ dataset_name:
55
+ desc: null
56
+ value: null
57
+ ddp_find_unused_parameters:
58
+ desc: null
59
+ value: null
60
+ debug:
61
+ desc: null
62
+ value: []
63
+ deepspeed:
64
+ desc: null
65
+ value: null
66
+ disable_tqdm:
67
+ desc: null
68
+ value: false
69
+ do_eval:
70
+ desc: null
71
+ value: false
72
+ do_predict:
73
+ desc: null
74
+ value: false
75
+ do_train:
76
+ desc: null
77
+ value: false
78
+ dtype:
79
+ desc: null
80
+ value: float32
81
+ eval_accumulation_steps:
82
+ desc: null
83
+ value: null
84
+ eval_steps:
85
+ desc: null
86
+ value: 20000
87
+ evaluation_strategy:
88
+ desc: null
89
+ value: IntervalStrategy.NO
90
+ fp16:
91
+ desc: null
92
+ value: false
93
+ fp16_backend:
94
+ desc: null
95
+ value: auto
96
+ fp16_full_eval:
97
+ desc: null
98
+ value: false
99
+ fp16_opt_level:
100
+ desc: null
101
+ value: O1
102
+ gradient_accumulation_steps:
103
+ desc: null
104
+ value: 1
105
+ greater_is_better:
106
+ desc: null
107
+ value: null
108
+ group_by_length:
109
+ desc: null
110
+ value: false
111
+ ignore_data_skip:
112
+ desc: null
113
+ value: false
114
+ label_names:
115
+ desc: null
116
+ value: null
117
+ label_smoothing_factor:
118
+ desc: null
119
+ value: 0.0
120
+ learning_rate:
121
+ desc: null
122
+ value: 3.0e-05
123
+ length_column_name:
124
+ desc: null
125
+ value: length
126
+ line_by_line:
127
+ desc: null
128
+ value: false
129
+ load_best_model_at_end:
130
+ desc: null
131
+ value: false
132
+ local_rank:
133
+ desc: null
134
+ value: -1
135
+ log_level:
136
+ desc: null
137
+ value: -1
138
+ log_level_replica:
139
+ desc: null
140
+ value: -1
141
+ log_on_each_node:
142
+ desc: null
143
+ value: true
144
+ logging_dir:
145
+ desc: null
146
+ value: ./runs/Jul15_02-33-44_t1v-n-f5c06ea1-w-0
147
+ logging_first_step:
148
+ desc: null
149
+ value: false
150
+ logging_steps:
151
+ desc: null
152
+ value: 50
153
+ logging_strategy:
154
+ desc: null
155
+ value: IntervalStrategy.STEPS
156
+ lr_scheduler_type:
157
+ desc: null
158
+ value: SchedulerType.LINEAR
159
+ max_eval_samples:
160
+ desc: null
161
+ value: 500
162
+ max_grad_norm:
163
+ desc: null
164
+ value: 1.0
165
+ max_seq_length:
166
+ desc: null
167
+ value: 4096
168
+ max_steps:
169
+ desc: null
170
+ value: -1
171
+ metric_for_best_model:
172
+ desc: null
173
+ value: null
174
+ mlm_probability:
175
+ desc: null
176
+ value: 0.15
177
+ model_name_or_path:
178
+ desc: null
179
+ value: null
180
+ model_type:
181
+ desc: null
182
+ value: big_bird
183
+ mp_parameters:
184
+ desc: null
185
+ value: ''
186
+ no_cuda:
187
+ desc: null
188
+ value: false
189
+ num_train_epochs:
190
+ desc: null
191
+ value: 5.0
192
+ output_dir:
193
+ desc: null
194
+ value: ./
195
+ overwrite_cache:
196
+ desc: null
197
+ value: false
198
+ overwrite_output_dir:
199
+ desc: null
200
+ value: true
201
+ pad_to_max_length:
202
+ desc: null
203
+ value: false
204
+ past_index:
205
+ desc: null
206
+ value: -1
207
+ per_device_eval_batch_size:
208
+ desc: null
209
+ value: 1
210
+ per_device_train_batch_size:
211
+ desc: null
212
+ value: 1
213
+ per_gpu_eval_batch_size:
214
+ desc: null
215
+ value: null
216
+ per_gpu_train_batch_size:
217
+ desc: null
218
+ value: null
219
+ prediction_loss_only:
220
+ desc: null
221
+ value: false
222
+ preprocessing_num_workers:
223
+ desc: null
224
+ value: 96
225
+ push_to_hub:
226
+ desc: null
227
+ value: true
228
+ push_to_hub_model_id:
229
+ desc: null
230
+ value: ''
231
+ push_to_hub_organization:
232
+ desc: null
233
+ value: null
234
+ push_to_hub_token:
235
+ desc: null
236
+ value: null
237
+ remove_unused_columns:
238
+ desc: null
239
+ value: true
240
+ report_to:
241
+ desc: null
242
+ value:
243
+ - tensorboard
244
+ - wandb
245
+ resume_from_checkpoint:
246
+ desc: null
247
+ value: null
248
+ run_name:
249
+ desc: null
250
+ value: ./
251
+ save_on_each_node:
252
+ desc: null
253
+ value: false
254
+ save_steps:
255
+ desc: null
256
+ value: 40000
257
+ save_strategy:
258
+ desc: null
259
+ value: IntervalStrategy.STEPS
260
+ save_total_limit:
261
+ desc: null
262
+ value: 5
263
+ seed:
264
+ desc: null
265
+ value: 42
266
+ sharded_ddp:
267
+ desc: null
268
+ value: []
269
+ skip_memory_metrics:
270
+ desc: null
271
+ value: true
272
+ tokenizer_name:
273
+ desc: null
274
+ value: ./
275
+ tpu_metrics_debug:
276
+ desc: null
277
+ value: false
278
+ tpu_num_cores:
279
+ desc: null
280
+ value: null
281
+ train_ref_file:
282
+ desc: null
283
+ value: null
284
+ use_fast_tokenizer:
285
+ desc: null
286
+ value: true
287
+ use_legacy_prediction_loop:
288
+ desc: null
289
+ value: false
290
+ validation_ref_file:
291
+ desc: null
292
+ value: null
293
+ validation_split_percentage:
294
+ desc: null
295
+ value: 5
296
+ warmup_ratio:
297
+ desc: null
298
+ value: 0.0
299
+ warmup_steps:
300
+ desc: null
301
+ value: 10000
302
+ weight_decay:
303
+ desc: null
304
+ value: 0.0095
wandb/run-20210715_023352-28io0kfl/files/output.log ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /home/dat/pino/lib/python3.8/site-packages/jax/lib/xla_bridge.py:386: UserWarning: jax.host_count has been renamed to jax.process_count. This alias will eventually be removed; please update your code.
2
+ warnings.warn(
3
+ /home/dat/pino/lib/python3.8/site-packages/jax/lib/xla_bridge.py:373: UserWarning: jax.host_id has been renamed to jax.process_index. This alias will eventually be removed; please update your code.
4
+ warnings.warn(
5
+ Epoch ... (1/5): 0%| | 0/5 [00:00<?, ?it/s]
6
+ Training...: 0%| | 0/705533 [02:06<?, ?it/s]
7
+ Epoch ... (1/5): 0%| | 0/5 [12:48<?, ?it/s]
8
+ Traceback (most recent call last):
9
+ File "./run_mlm_flax_no_accum.py", line 699, in <module>
10
+ state, train_metric, dropout_rngs = p_train_step(state, model_inputs, dropout_rngs)
11
+ File "/home/dat/pino/lib/python3.8/site-packages/jax/_src/traceback_util.py", line 183, in reraise_with_filtered_traceback
12
+ return fun(*args, **kwargs)
13
+ File "/home/dat/pino/lib/python3.8/site-packages/jax/_src/api.py", line 1669, in f_pmapped
14
+ out = pxla.xla_pmap(
15
+ File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 1620, in bind
16
+ return call_bind(self, fun, *args, **params)
17
+ File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 1551, in call_bind
18
+ outs = primitive.process(top_trace, fun, tracers, params)
19
+ File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 1623, in process
20
+ return trace.process_map(self, fun, tracers, params)
21
+ File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 606, in process_call
22
+ return primitive.impl(f, *tracers, **params)
23
+ File "/home/dat/pino/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 637, in xla_pmap_impl
24
+ return compiled_fun(*args)
25
+ File "/home/dat/pino/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 1159, in execute_replicated
26
+ out_bufs = compiled.execute_sharded_on_local_devices(input_bufs)
27
+ jax._src.traceback_util.UnfilteredStackTrace: RuntimeError: Resource exhausted: Attempting to reserve 7.59G at the bottom of memory. That was not possible. There are 3.27G free, 0B reserved, and 3.22G reservable.: while running replica 0 and partition 0 of a replicated computation (other replicas may have failed as well).
28
+ The stack trace below excludes JAX-internal frames.
29
+ The preceding is the original exception that occurred, unmodified.
30
+ --------------------
31
+ The above exception was the direct cause of the following exception:
32
+ Traceback (most recent call last):
33
+ File "./run_mlm_flax_no_accum.py", line 699, in <module>
34
+ state, train_metric, dropout_rngs = p_train_step(state, model_inputs, dropout_rngs)
35
+ File "/home/dat/pino/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 1159, in execute_replicated
36
+ out_bufs = compiled.execute_sharded_on_local_devices(input_bufs)
37
+ RuntimeError: Resource exhausted: Attempting to reserve 7.59G at the bottom of memory. That was not possible. There are 3.27G free, 0B reserved, and 3.22G reservable.: while running replica 0 and partition 0 of a replicated computation (other replicas may have failed as well).
wandb/run-20210715_023352-28io0kfl/files/requirements.txt ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ absl-py==0.13.0
2
+ aiohttp==3.7.4.post0
3
+ astunparse==1.6.3
4
+ async-timeout==3.0.1
5
+ attrs==21.2.0
6
+ cachetools==4.2.2
7
+ certifi==2021.5.30
8
+ chardet==4.0.0
9
+ charset-normalizer==2.0.1
10
+ chex==0.0.8
11
+ click==8.0.1
12
+ configparser==5.0.2
13
+ cycler==0.10.0
14
+ datasets==1.9.1.dev0
15
+ dill==0.3.4
16
+ dm-tree==0.1.6
17
+ docker-pycreds==0.4.0
18
+ filelock==3.0.12
19
+ flatbuffers==1.12
20
+ flax==0.3.4
21
+ fsspec==2021.7.0
22
+ gast==0.4.0
23
+ gitdb==4.0.7
24
+ gitpython==3.1.18
25
+ google-auth-oauthlib==0.4.4
26
+ google-auth==1.32.1
27
+ google-pasta==0.2.0
28
+ grpcio==1.34.1
29
+ h5py==3.1.0
30
+ huggingface-hub==0.0.12
31
+ idna==3.2
32
+ install==1.3.4
33
+ jax==0.2.17
34
+ jaxlib==0.1.68
35
+ joblib==1.0.1
36
+ keras-nightly==2.5.0.dev2021032900
37
+ keras-preprocessing==1.1.2
38
+ kiwisolver==1.3.1
39
+ libtpu-nightly==0.1.dev20210615
40
+ markdown==3.3.4
41
+ matplotlib==3.4.2
42
+ msgpack==1.0.2
43
+ multidict==5.1.0
44
+ multiprocess==0.70.12.2
45
+ numpy==1.19.5
46
+ oauthlib==3.1.1
47
+ opt-einsum==3.3.0
48
+ optax==0.0.9
49
+ packaging==21.0
50
+ pandas==1.3.0
51
+ pathtools==0.1.2
52
+ pillow==8.3.1
53
+ pip==20.0.2
54
+ pkg-resources==0.0.0
55
+ promise==2.3
56
+ protobuf==3.17.3
57
+ psutil==5.8.0
58
+ pyarrow==4.0.1
59
+ pyasn1-modules==0.2.8
60
+ pyasn1==0.4.8
61
+ pyparsing==2.4.7
62
+ python-dateutil==2.8.1
63
+ pytz==2021.1
64
+ pyyaml==5.4.1
65
+ regex==2021.7.6
66
+ requests-oauthlib==1.3.0
67
+ requests==2.26.0
68
+ rsa==4.7.2
69
+ sacremoses==0.0.45
70
+ scipy==1.7.0
71
+ sentry-sdk==1.3.0
72
+ setuptools==44.0.0
73
+ shortuuid==1.0.1
74
+ six==1.15.0
75
+ smmap==4.0.0
76
+ subprocess32==3.5.4
77
+ tensorboard-data-server==0.6.1
78
+ tensorboard-plugin-wit==1.8.0
79
+ tensorboard==2.5.0
80
+ tensorflow-estimator==2.5.0
81
+ tensorflow==2.5.0
82
+ termcolor==1.1.0
83
+ tokenizers==0.10.3
84
+ toolz==0.11.1
85
+ tqdm==4.61.2
86
+ transformers==4.9.0.dev0
87
+ typing-extensions==3.7.4.3
88
+ urllib3==1.26.6
89
+ wandb==0.10.33
90
+ werkzeug==2.0.1
91
+ wheel==0.36.2
92
+ wrapt==1.12.1
93
+ xxhash==2.0.2
94
+ yarl==1.6.3
wandb/run-20210715_023352-28io0kfl/files/wandb-metadata.json ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-5.4.0-1043-gcp-x86_64-with-glibc2.29",
3
+ "python": "3.8.10",
4
+ "heartbeatAt": "2021-07-15T02:33:54.743234",
5
+ "startedAt": "2021-07-15T02:33:52.730317",
6
+ "docker": null,
7
+ "cpu_count": 96,
8
+ "cuda": null,
9
+ "args": [
10
+ "--push_to_hub",
11
+ "--output_dir=./",
12
+ "--model_type=big_bird",
13
+ "--config_name=./",
14
+ "--tokenizer_name=./",
15
+ "--max_seq_length=4096",
16
+ "--weight_decay=0.0095",
17
+ "--warmup_steps=10000",
18
+ "--overwrite_output_dir",
19
+ "--adam_beta1=0.9",
20
+ "--adam_beta2=0.98",
21
+ "--logging_steps=50",
22
+ "--eval_steps=20000",
23
+ "--num_train_epochs=5",
24
+ "--preprocessing_num_workers=96",
25
+ "--save_steps=40000",
26
+ "--learning_rate=3e-5",
27
+ "--per_device_train_batch_size=1",
28
+ "--per_device_eval_batch_size=1",
29
+ "--save_total_limit=5",
30
+ "--max_eval_samples=500"
31
+ ],
32
+ "state": "running",
33
+ "program": "./run_mlm_flax_no_accum.py",
34
+ "codePath": "run_mlm_flax_no_accum.py",
35
+ "git": {
36
+ "remote": "https://huggingface.co/flax-community/pino-roberta-base",
37
+ "commit": "87e02e7ff8fbaea90c8c4ad1c984f83742432303"
38
+ },
39
+ "email": null,
40
+ "root": "/home/dat/pino-roberta-base",
41
+ "host": "t1v-n-f5c06ea1-w-0",
42
+ "username": "dat",
43
+ "executable": "/home/dat/pino/bin/python"
44
+ }
wandb/run-20210715_023352-28io0kfl/files/wandb-summary.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {}
wandb/run-20210715_023352-28io0kfl/logs/debug-internal.log ADDED
@@ -0,0 +1,268 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2021-07-15 02:33:53,426 INFO MainThread:647413 [internal.py:wandb_internal():88] W&B internal server running at pid: 647413, started at: 2021-07-15 02:33:53.426396
2
+ 2021-07-15 02:33:53,428 INFO WriterThread:647413 [datastore.py:open_for_write():80] open: /home/dat/pino-roberta-base/wandb/run-20210715_023352-28io0kfl/run-28io0kfl.wandb
3
+ 2021-07-15 02:33:53,429 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: check_version
4
+ 2021-07-15 02:33:53,430 DEBUG SenderThread:647413 [sender.py:send():179] send: header
5
+ 2021-07-15 02:33:53,430 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: check_version
6
+ 2021-07-15 02:33:53,471 DEBUG SenderThread:647413 [sender.py:send():179] send: run
7
+ 2021-07-15 02:33:53,641 INFO SenderThread:647413 [dir_watcher.py:__init__():168] watching files in: /home/dat/pino-roberta-base/wandb/run-20210715_023352-28io0kfl/files
8
+ 2021-07-15 02:33:53,641 INFO SenderThread:647413 [sender.py:_start_run_threads():716] run started: 28io0kfl with start time 1626316432
9
+ 2021-07-15 02:33:53,641 DEBUG SenderThread:647413 [sender.py:send():179] send: summary
10
+ 2021-07-15 02:33:53,641 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: run_start
11
+ 2021-07-15 02:33:53,642 INFO SenderThread:647413 [sender.py:_save_file():841] saving file wandb-summary.json with policy end
12
+ 2021-07-15 02:33:54,643 INFO Thread-8 :647413 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210715_023352-28io0kfl/files/wandb-summary.json
13
+ 2021-07-15 02:33:54,742 DEBUG HandlerThread:647413 [meta.py:__init__():39] meta init
14
+ 2021-07-15 02:33:54,743 DEBUG HandlerThread:647413 [meta.py:__init__():53] meta init done
15
+ 2021-07-15 02:33:54,743 DEBUG HandlerThread:647413 [meta.py:probe():210] probe
16
+ 2021-07-15 02:33:54,744 DEBUG HandlerThread:647413 [meta.py:_setup_git():200] setup git
17
+ 2021-07-15 02:33:54,775 DEBUG HandlerThread:647413 [meta.py:_setup_git():207] setup git done
18
+ 2021-07-15 02:33:54,775 DEBUG HandlerThread:647413 [meta.py:_save_pip():57] save pip
19
+ 2021-07-15 02:33:54,775 DEBUG HandlerThread:647413 [meta.py:_save_pip():71] save pip done
20
+ 2021-07-15 02:33:54,775 DEBUG HandlerThread:647413 [meta.py:probe():252] probe done
21
+ 2021-07-15 02:33:54,778 DEBUG SenderThread:647413 [sender.py:send():179] send: files
22
+ 2021-07-15 02:33:54,779 INFO SenderThread:647413 [sender.py:_save_file():841] saving file wandb-metadata.json with policy now
23
+ 2021-07-15 02:33:54,786 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status
24
+ 2021-07-15 02:33:54,787 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status
25
+ 2021-07-15 02:33:54,917 DEBUG SenderThread:647413 [sender.py:send():179] send: config
26
+ 2021-07-15 02:33:54,917 DEBUG SenderThread:647413 [sender.py:send():179] send: config
27
+ 2021-07-15 02:33:54,917 DEBUG SenderThread:647413 [sender.py:send():179] send: config
28
+ 2021-07-15 02:33:55,232 INFO Thread-11 :647413 [upload_job.py:push():137] Uploaded file /tmp/tmp3vyhbjkzwandb/34s07tos-wandb-metadata.json
29
+ 2021-07-15 02:33:55,643 INFO Thread-8 :647413 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210715_023352-28io0kfl/files/requirements.txt
30
+ 2021-07-15 02:33:55,643 INFO Thread-8 :647413 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210715_023352-28io0kfl/files/wandb-metadata.json
31
+ 2021-07-15 02:33:55,643 INFO Thread-8 :647413 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210715_023352-28io0kfl/files/output.log
32
+ 2021-07-15 02:34:09,649 INFO Thread-8 :647413 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_023352-28io0kfl/files/output.log
33
+ 2021-07-15 02:34:09,919 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status
34
+ 2021-07-15 02:34:09,919 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status
35
+ 2021-07-15 02:34:22,827 DEBUG SenderThread:647413 [sender.py:send():179] send: stats
36
+ 2021-07-15 02:34:24,656 INFO Thread-8 :647413 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_023352-28io0kfl/files/config.yaml
37
+ 2021-07-15 02:34:25,052 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status
38
+ 2021-07-15 02:34:25,052 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status
39
+ 2021-07-15 02:34:40,185 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status
40
+ 2021-07-15 02:34:40,186 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status
41
+ 2021-07-15 02:34:52,904 DEBUG SenderThread:647413 [sender.py:send():179] send: stats
42
+ 2021-07-15 02:34:55,321 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status
43
+ 2021-07-15 02:34:55,321 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status
44
+ 2021-07-15 02:35:10,455 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status
45
+ 2021-07-15 02:35:10,455 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status
46
+ 2021-07-15 02:35:22,977 DEBUG SenderThread:647413 [sender.py:send():179] send: stats
47
+ 2021-07-15 02:35:25,587 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status
48
+ 2021-07-15 02:35:25,587 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status
49
+ 2021-07-15 02:35:40,721 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status
50
+ 2021-07-15 02:35:40,722 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status
51
+ 2021-07-15 02:35:53,062 DEBUG SenderThread:647413 [sender.py:send():179] send: stats
52
+ 2021-07-15 02:35:55,856 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status
53
+ 2021-07-15 02:35:55,856 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status
54
+ 2021-07-15 02:36:10,989 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status
55
+ 2021-07-15 02:36:10,990 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status
56
+ 2021-07-15 02:36:23,136 DEBUG SenderThread:647413 [sender.py:send():179] send: stats
57
+ 2021-07-15 02:36:26,122 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status
58
+ 2021-07-15 02:36:26,123 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status
59
+ 2021-07-15 02:36:41,256 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status
60
+ 2021-07-15 02:36:41,257 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status
61
+ 2021-07-15 02:36:53,204 DEBUG SenderThread:647413 [sender.py:send():179] send: stats
62
+ 2021-07-15 02:36:56,393 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status
63
+ 2021-07-15 02:36:56,394 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status
64
+ 2021-07-15 02:37:11,526 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status
65
+ 2021-07-15 02:37:11,526 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status
66
+ 2021-07-15 02:37:23,277 DEBUG SenderThread:647413 [sender.py:send():179] send: stats
67
+ 2021-07-15 02:37:26,659 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status
68
+ 2021-07-15 02:37:26,659 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status
69
+ 2021-07-15 02:37:41,793 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status
70
+ 2021-07-15 02:37:41,793 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status
71
+ 2021-07-15 02:37:53,344 DEBUG SenderThread:647413 [sender.py:send():179] send: stats
72
+ 2021-07-15 02:37:56,927 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status
73
+ 2021-07-15 02:37:56,928 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status
74
+ 2021-07-15 02:38:12,060 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status
75
+ 2021-07-15 02:38:12,060 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status
76
+ 2021-07-15 02:38:23,410 DEBUG SenderThread:647413 [sender.py:send():179] send: stats
77
+ 2021-07-15 02:38:27,194 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status
78
+ 2021-07-15 02:38:27,194 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status
79
+ 2021-07-15 02:38:42,326 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status
80
+ 2021-07-15 02:38:42,326 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status
81
+ 2021-07-15 02:38:53,475 DEBUG SenderThread:647413 [sender.py:send():179] send: stats
82
+ 2021-07-15 02:38:57,457 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status
83
+ 2021-07-15 02:38:57,457 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status
84
+ 2021-07-15 02:39:12,589 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status
85
+ 2021-07-15 02:39:12,589 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status
86
+ 2021-07-15 02:39:23,542 DEBUG SenderThread:647413 [sender.py:send():179] send: stats
87
+ 2021-07-15 02:39:27,728 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status
88
+ 2021-07-15 02:39:27,728 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status
89
+ 2021-07-15 02:39:42,860 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status
90
+ 2021-07-15 02:39:42,860 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status
91
+ 2021-07-15 02:39:53,613 DEBUG SenderThread:647413 [sender.py:send():179] send: stats
92
+ 2021-07-15 02:39:57,993 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status
93
+ 2021-07-15 02:39:57,994 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status
94
+ 2021-07-15 02:40:13,128 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status
95
+ 2021-07-15 02:40:13,128 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status
96
+ 2021-07-15 02:40:23,681 DEBUG SenderThread:647413 [sender.py:send():179] send: stats
97
+ 2021-07-15 02:40:28,265 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status
98
+ 2021-07-15 02:40:28,266 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status
99
+ 2021-07-15 02:40:43,401 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status
100
+ 2021-07-15 02:40:43,401 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status
101
+ 2021-07-15 02:40:53,753 DEBUG SenderThread:647413 [sender.py:send():179] send: stats
102
+ 2021-07-15 02:40:58,548 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status
103
+ 2021-07-15 02:40:58,549 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status
104
+ 2021-07-15 02:41:13,683 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status
105
+ 2021-07-15 02:41:13,684 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status
106
+ 2021-07-15 02:41:23,828 DEBUG SenderThread:647413 [sender.py:send():179] send: stats
107
+ 2021-07-15 02:41:28,827 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status
108
+ 2021-07-15 02:41:28,827 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status
109
+ 2021-07-15 02:41:43,958 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status
110
+ 2021-07-15 02:41:43,958 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status
111
+ 2021-07-15 02:41:53,904 DEBUG SenderThread:647413 [sender.py:send():179] send: stats
112
+ 2021-07-15 02:41:59,090 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status
113
+ 2021-07-15 02:41:59,091 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status
114
+ 2021-07-15 02:42:14,225 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status
115
+ 2021-07-15 02:42:14,225 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status
116
+ 2021-07-15 02:42:23,978 DEBUG SenderThread:647413 [sender.py:send():179] send: stats
117
+ 2021-07-15 02:42:31,120 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status
118
+ 2021-07-15 02:42:31,120 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status
119
+ 2021-07-15 02:42:46,253 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status
120
+ 2021-07-15 02:42:46,253 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status
121
+ 2021-07-15 02:42:54,050 DEBUG SenderThread:647413 [sender.py:send():179] send: stats
122
+ 2021-07-15 02:43:01,385 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status
123
+ 2021-07-15 02:43:01,385 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status
124
+ 2021-07-15 02:43:16,523 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status
125
+ 2021-07-15 02:43:16,524 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status
126
+ 2021-07-15 02:43:24,121 DEBUG SenderThread:647413 [sender.py:send():179] send: stats
127
+ 2021-07-15 02:43:31,656 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status
128
+ 2021-07-15 02:43:31,657 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status
129
+ 2021-07-15 02:43:46,789 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status
130
+ 2021-07-15 02:43:46,790 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status
131
+ 2021-07-15 02:43:54,190 DEBUG SenderThread:647413 [sender.py:send():179] send: stats
132
+ 2021-07-15 02:44:01,924 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status
133
+ 2021-07-15 02:44:01,925 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status
134
+ 2021-07-15 02:44:17,056 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status
135
+ 2021-07-15 02:44:17,057 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status
136
+ 2021-07-15 02:44:24,264 DEBUG SenderThread:647413 [sender.py:send():179] send: stats
137
+ 2021-07-15 02:44:32,190 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status
138
+ 2021-07-15 02:44:32,190 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status
139
+ 2021-07-15 02:44:47,325 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status
140
+ 2021-07-15 02:44:47,326 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status
141
+ 2021-07-15 02:44:51,894 INFO Thread-8 :647413 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_023352-28io0kfl/files/output.log
142
+ 2021-07-15 02:44:54,337 DEBUG SenderThread:647413 [sender.py:send():179] send: stats
143
+ 2021-07-15 02:45:02,471 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status
144
+ 2021-07-15 02:45:02,472 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status
145
+ 2021-07-15 02:45:17,619 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status
146
+ 2021-07-15 02:45:17,619 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status
147
+ 2021-07-15 02:45:24,415 DEBUG SenderThread:647413 [sender.py:send():179] send: stats
148
+ 2021-07-15 02:45:32,753 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status
149
+ 2021-07-15 02:45:32,754 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status
150
+ 2021-07-15 02:45:47,896 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status
151
+ 2021-07-15 02:45:47,897 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status
152
+ 2021-07-15 02:45:54,500 DEBUG SenderThread:647413 [sender.py:send():179] send: stats
153
+ 2021-07-15 02:46:03,028 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status
154
+ 2021-07-15 02:46:03,028 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status
155
+ 2021-07-15 02:46:18,161 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status
156
+ 2021-07-15 02:46:18,162 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status
157
+ 2021-07-15 02:46:24,580 DEBUG SenderThread:647413 [sender.py:send():179] send: stats
158
+ 2021-07-15 02:46:33,296 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status
159
+ 2021-07-15 02:46:33,297 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status
160
+ 2021-07-15 02:46:48,441 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status
161
+ 2021-07-15 02:46:48,441 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status
162
+ 2021-07-15 02:46:54,662 DEBUG SenderThread:647413 [sender.py:send():179] send: stats
163
+ 2021-07-15 02:46:57,942 INFO Thread-8 :647413 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_023352-28io0kfl/files/output.log
164
+ 2021-07-15 02:46:58,807 DEBUG SenderThread:647413 [sender.py:send():179] send: telemetry
165
+ 2021-07-15 02:46:58,807 DEBUG SenderThread:647413 [sender.py:send():179] send: exit
166
+ 2021-07-15 02:46:58,807 INFO SenderThread:647413 [sender.py:send_exit():287] handling exit code: 1
167
+ 2021-07-15 02:46:58,809 INFO SenderThread:647413 [sender.py:send_exit():295] send defer
168
+ 2021-07-15 02:46:58,809 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: poll_exit
169
+ 2021-07-15 02:46:58,809 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: defer
170
+ 2021-07-15 02:46:58,809 INFO HandlerThread:647413 [handler.py:handle_request_defer():141] handle defer: 0
171
+ 2021-07-15 02:46:58,810 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: poll_exit
172
+ 2021-07-15 02:46:58,810 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: defer
173
+ 2021-07-15 02:46:58,810 INFO SenderThread:647413 [sender.py:send_request_defer():304] handle sender defer: 0
174
+ 2021-07-15 02:46:58,810 INFO SenderThread:647413 [sender.py:transition_state():308] send defer: 1
175
+ 2021-07-15 02:46:58,811 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: defer
176
+ 2021-07-15 02:46:58,811 INFO HandlerThread:647413 [handler.py:handle_request_defer():141] handle defer: 1
177
+ 2021-07-15 02:46:58,873 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: defer
178
+ 2021-07-15 02:46:58,873 INFO SenderThread:647413 [sender.py:send_request_defer():304] handle sender defer: 1
179
+ 2021-07-15 02:46:58,873 INFO SenderThread:647413 [sender.py:transition_state():308] send defer: 2
180
+ 2021-07-15 02:46:58,874 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: defer
181
+ 2021-07-15 02:46:58,874 DEBUG SenderThread:647413 [sender.py:send():179] send: stats
182
+ 2021-07-15 02:46:58,874 INFO HandlerThread:647413 [handler.py:handle_request_defer():141] handle defer: 2
183
+ 2021-07-15 02:46:58,875 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: defer
184
+ 2021-07-15 02:46:58,875 INFO SenderThread:647413 [sender.py:send_request_defer():304] handle sender defer: 2
185
+ 2021-07-15 02:46:58,875 INFO SenderThread:647413 [sender.py:transition_state():308] send defer: 3
186
+ 2021-07-15 02:46:58,875 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: defer
187
+ 2021-07-15 02:46:58,876 INFO HandlerThread:647413 [handler.py:handle_request_defer():141] handle defer: 3
188
+ 2021-07-15 02:46:58,876 DEBUG SenderThread:647413 [sender.py:send():179] send: summary
189
+ 2021-07-15 02:46:58,876 INFO SenderThread:647413 [sender.py:_save_file():841] saving file wandb-summary.json with policy end
190
+ 2021-07-15 02:46:58,877 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: defer
191
+ 2021-07-15 02:46:58,877 INFO SenderThread:647413 [sender.py:send_request_defer():304] handle sender defer: 3
192
+ 2021-07-15 02:46:58,877 INFO SenderThread:647413 [sender.py:transition_state():308] send defer: 4
193
+ 2021-07-15 02:46:58,877 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: defer
194
+ 2021-07-15 02:46:58,877 INFO HandlerThread:647413 [handler.py:handle_request_defer():141] handle defer: 4
195
+ 2021-07-15 02:46:58,877 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: defer
196
+ 2021-07-15 02:46:58,877 INFO SenderThread:647413 [sender.py:send_request_defer():304] handle sender defer: 4
197
+ 2021-07-15 02:46:58,913 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: poll_exit
198
+ 2021-07-15 02:46:58,943 INFO Thread-8 :647413 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_023352-28io0kfl/files/wandb-summary.json
199
+ 2021-07-15 02:46:58,943 INFO Thread-8 :647413 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_023352-28io0kfl/files/output.log
200
+ 2021-07-15 02:46:59,055 INFO SenderThread:647413 [sender.py:transition_state():308] send defer: 5
201
+ 2021-07-15 02:46:59,055 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: poll_exit
202
+ 2021-07-15 02:46:59,055 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: defer
203
+ 2021-07-15 02:46:59,055 INFO HandlerThread:647413 [handler.py:handle_request_defer():141] handle defer: 5
204
+ 2021-07-15 02:46:59,056 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: defer
205
+ 2021-07-15 02:46:59,056 INFO SenderThread:647413 [sender.py:send_request_defer():304] handle sender defer: 5
206
+ 2021-07-15 02:46:59,056 INFO SenderThread:647413 [dir_watcher.py:finish():282] shutting down directory watcher
207
+ 2021-07-15 02:46:59,157 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: poll_exit
208
+ 2021-07-15 02:46:59,943 INFO Thread-8 :647413 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_023352-28io0kfl/files/config.yaml
209
+ 2021-07-15 02:46:59,944 INFO SenderThread:647413 [dir_watcher.py:finish():312] scan: /home/dat/pino-roberta-base/wandb/run-20210715_023352-28io0kfl/files
210
+ 2021-07-15 02:46:59,944 INFO SenderThread:647413 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_023352-28io0kfl/files/requirements.txt requirements.txt
211
+ 2021-07-15 02:46:59,944 INFO SenderThread:647413 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_023352-28io0kfl/files/output.log output.log
212
+ 2021-07-15 02:46:59,945 INFO SenderThread:647413 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_023352-28io0kfl/files/wandb-metadata.json wandb-metadata.json
213
+ 2021-07-15 02:46:59,945 INFO SenderThread:647413 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_023352-28io0kfl/files/config.yaml config.yaml
214
+ 2021-07-15 02:46:59,948 INFO SenderThread:647413 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_023352-28io0kfl/files/wandb-summary.json wandb-summary.json
215
+ 2021-07-15 02:46:59,951 INFO SenderThread:647413 [sender.py:transition_state():308] send defer: 6
216
+ 2021-07-15 02:46:59,951 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: poll_exit
217
+ 2021-07-15 02:46:59,952 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: defer
218
+ 2021-07-15 02:46:59,953 INFO HandlerThread:647413 [handler.py:handle_request_defer():141] handle defer: 6
219
+ 2021-07-15 02:46:59,956 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: defer
220
+ 2021-07-15 02:46:59,956 INFO SenderThread:647413 [sender.py:send_request_defer():304] handle sender defer: 6
221
+ 2021-07-15 02:46:59,956 INFO SenderThread:647413 [file_pusher.py:finish():177] shutting down file pusher
222
+ 2021-07-15 02:47:00,054 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: poll_exit
223
+ 2021-07-15 02:47:00,054 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: poll_exit
224
+ 2021-07-15 02:47:00,157 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: poll_exit
225
+ 2021-07-15 02:47:00,157 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: poll_exit
226
+ 2021-07-15 02:47:00,259 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: poll_exit
227
+ 2021-07-15 02:47:00,259 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: poll_exit
228
+ 2021-07-15 02:47:00,361 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: poll_exit
229
+ 2021-07-15 02:47:00,362 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: poll_exit
230
+ 2021-07-15 02:47:00,377 INFO Thread-14 :647413 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210715_023352-28io0kfl/files/config.yaml
231
+ 2021-07-15 02:47:00,382 INFO Thread-12 :647413 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210715_023352-28io0kfl/files/requirements.txt
232
+ 2021-07-15 02:47:00,415 INFO Thread-15 :647413 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210715_023352-28io0kfl/files/wandb-summary.json
233
+ 2021-07-15 02:47:00,439 INFO Thread-13 :647413 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210715_023352-28io0kfl/files/output.log
234
+ 2021-07-15 02:47:00,464 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: poll_exit
235
+ 2021-07-15 02:47:00,464 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: poll_exit
236
+ 2021-07-15 02:47:00,566 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: poll_exit
237
+ 2021-07-15 02:47:00,566 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: poll_exit
238
+ 2021-07-15 02:47:00,640 INFO Thread-7 :647413 [sender.py:transition_state():308] send defer: 7
239
+ 2021-07-15 02:47:00,640 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: defer
240
+ 2021-07-15 02:47:00,640 INFO HandlerThread:647413 [handler.py:handle_request_defer():141] handle defer: 7
241
+ 2021-07-15 02:47:00,641 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: defer
242
+ 2021-07-15 02:47:00,641 INFO SenderThread:647413 [sender.py:send_request_defer():304] handle sender defer: 7
243
+ 2021-07-15 02:47:00,668 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: poll_exit
244
+ 2021-07-15 02:47:00,919 INFO SenderThread:647413 [sender.py:transition_state():308] send defer: 8
245
+ 2021-07-15 02:47:00,920 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: poll_exit
246
+ 2021-07-15 02:47:00,920 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: defer
247
+ 2021-07-15 02:47:00,920 INFO HandlerThread:647413 [handler.py:handle_request_defer():141] handle defer: 8
248
+ 2021-07-15 02:47:00,921 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: defer
249
+ 2021-07-15 02:47:00,921 INFO SenderThread:647413 [sender.py:send_request_defer():304] handle sender defer: 8
250
+ 2021-07-15 02:47:00,921 INFO SenderThread:647413 [sender.py:transition_state():308] send defer: 9
251
+ 2021-07-15 02:47:00,921 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: defer
252
+ 2021-07-15 02:47:00,921 INFO HandlerThread:647413 [handler.py:handle_request_defer():141] handle defer: 9
253
+ 2021-07-15 02:47:00,921 DEBUG SenderThread:647413 [sender.py:send():179] send: final
254
+ 2021-07-15 02:47:00,922 DEBUG SenderThread:647413 [sender.py:send():179] send: footer
255
+ 2021-07-15 02:47:00,922 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: defer
256
+ 2021-07-15 02:47:00,922 INFO SenderThread:647413 [sender.py:send_request_defer():304] handle sender defer: 9
257
+ 2021-07-15 02:47:01,022 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: poll_exit
258
+ 2021-07-15 02:47:01,022 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: poll_exit
259
+ 2021-07-15 02:47:01,022 INFO SenderThread:647413 [file_pusher.py:join():182] waiting for file pusher
260
+ 2021-07-15 02:47:01,024 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: get_summary
261
+ 2021-07-15 02:47:01,024 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: sampled_history
262
+ 2021-07-15 02:47:01,025 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: shutdown
263
+ 2021-07-15 02:47:01,025 INFO HandlerThread:647413 [handler.py:finish():638] shutting down handler
264
+ 2021-07-15 02:47:01,922 INFO WriterThread:647413 [datastore.py:close():288] close: /home/dat/pino-roberta-base/wandb/run-20210715_023352-28io0kfl/run-28io0kfl.wandb
265
+ 2021-07-15 02:47:02,023 INFO SenderThread:647413 [sender.py:finish():945] shutting down sender
266
+ 2021-07-15 02:47:02,023 INFO SenderThread:647413 [file_pusher.py:finish():177] shutting down file pusher
267
+ 2021-07-15 02:47:02,023 INFO SenderThread:647413 [file_pusher.py:join():182] waiting for file pusher
268
+ 2021-07-15 02:47:02,026 INFO MainThread:647413 [internal.py:handle_exit():78] Internal process exited
wandb/run-20210715_023352-28io0kfl/logs/debug.log ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2021-07-15 02:33:52,731 INFO MainThread:646155 [wandb_setup.py:_flush():69] setting env: {}
2
+ 2021-07-15 02:33:52,732 INFO MainThread:646155 [wandb_setup.py:_flush():69] setting login settings: {}
3
+ 2021-07-15 02:33:52,732 INFO MainThread:646155 [wandb_init.py:_log_setup():337] Logging user logs to /home/dat/pino-roberta-base/wandb/run-20210715_023352-28io0kfl/logs/debug.log
4
+ 2021-07-15 02:33:52,732 INFO MainThread:646155 [wandb_init.py:_log_setup():338] Logging internal logs to /home/dat/pino-roberta-base/wandb/run-20210715_023352-28io0kfl/logs/debug-internal.log
5
+ 2021-07-15 02:33:52,732 INFO MainThread:646155 [wandb_init.py:init():370] calling init triggers
6
+ 2021-07-15 02:33:52,732 INFO MainThread:646155 [wandb_init.py:init():375] wandb.init called with sweep_config: {}
7
+ config: {}
8
+ 2021-07-15 02:33:52,732 INFO MainThread:646155 [wandb_init.py:init():419] starting backend
9
+ 2021-07-15 02:33:52,732 INFO MainThread:646155 [backend.py:_multiprocessing_setup():70] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
10
+ 2021-07-15 02:33:52,780 INFO MainThread:646155 [backend.py:ensure_launched():135] starting backend process...
11
+ 2021-07-15 02:33:52,826 INFO MainThread:646155 [backend.py:ensure_launched():139] started backend process with pid: 647413
12
+ 2021-07-15 02:33:52,828 INFO MainThread:646155 [wandb_init.py:init():424] backend started and connected
13
+ 2021-07-15 02:33:52,831 INFO MainThread:646155 [wandb_init.py:init():472] updated telemetry
14
+ 2021-07-15 02:33:52,832 INFO MainThread:646155 [wandb_init.py:init():491] communicating current version
15
+ 2021-07-15 02:33:53,470 INFO MainThread:646155 [wandb_init.py:init():496] got version response
16
+ 2021-07-15 02:33:53,470 INFO MainThread:646155 [wandb_init.py:init():504] communicating run to backend with 30 second timeout
17
+ 2021-07-15 02:33:53,641 INFO MainThread:646155 [wandb_init.py:init():529] starting run threads in backend
18
+ 2021-07-15 02:33:54,781 INFO MainThread:646155 [wandb_run.py:_console_start():1623] atexit reg
19
+ 2021-07-15 02:33:54,782 INFO MainThread:646155 [wandb_run.py:_redirect():1497] redirect: SettingsConsole.REDIRECT
20
+ 2021-07-15 02:33:54,783 INFO MainThread:646155 [wandb_run.py:_redirect():1502] Redirecting console.
21
+ 2021-07-15 02:33:54,784 INFO MainThread:646155 [wandb_run.py:_redirect():1558] Redirects installed.
22
+ 2021-07-15 02:33:54,785 INFO MainThread:646155 [wandb_init.py:init():554] run started, returning control to user process
23
+ 2021-07-15 02:33:54,790 INFO MainThread:646155 [wandb_run.py:_config_callback():872] config_cb None None {'output_dir': './', 'overwrite_output_dir': True, 'do_train': False, 'do_eval': False, 'do_predict': False, 'evaluation_strategy': 'IntervalStrategy.NO', 'prediction_loss_only': False, 'per_device_train_batch_size': 1, 'per_device_eval_batch_size': 1, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'learning_rate': 3e-05, 'weight_decay': 0.0095, 'adam_beta1': 0.9, 'adam_beta2': 0.98, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 5.0, 'max_steps': -1, 'lr_scheduler_type': 'SchedulerType.LINEAR', 'warmup_ratio': 0.0, 'warmup_steps': 10000, 'log_level': -1, 'log_level_replica': -1, 'log_on_each_node': True, 'logging_dir': './runs/Jul15_02-33-44_t1v-n-f5c06ea1-w-0', 'logging_strategy': 'IntervalStrategy.STEPS', 'logging_first_step': False, 'logging_steps': 50, 'save_strategy': 'IntervalStrategy.STEPS', 'save_steps': 40000, 'save_total_limit': 5, 'save_on_each_node': False, 'no_cuda': False, 'seed': 42, 'fp16': False, 'fp16_opt_level': 'O1', 'fp16_backend': 'auto', 'fp16_full_eval': False, 'local_rank': -1, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 20000, 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': './', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'sharded_ddp': [], 'deepspeed': None, 'label_smoothing_factor': 0.0, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'dataloader_pin_memory': True, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'push_to_hub_model_id': '', 'push_to_hub_organization': None, 'push_to_hub_token': None, 'mp_parameters': ''}
24
+ 2021-07-15 02:33:54,792 INFO MainThread:646155 [wandb_run.py:_config_callback():872] config_cb None None {'model_name_or_path': None, 'model_type': 'big_bird', 'config_name': './', 'tokenizer_name': './', 'cache_dir': None, 'use_fast_tokenizer': True, 'dtype': 'float32'}
25
+ 2021-07-15 02:33:54,793 INFO MainThread:646155 [wandb_run.py:_config_callback():872] config_cb None None {'dataset_name': None, 'dataset_config_name': None, 'train_ref_file': None, 'validation_ref_file': None, 'overwrite_cache': False, 'validation_split_percentage': 5, 'max_seq_length': 4096, 'preprocessing_num_workers': 96, 'mlm_probability': 0.15, 'pad_to_max_length': False, 'line_by_line': False, 'max_eval_samples': 500}
26
+ 2021-07-15 02:46:56,604 INFO MainThread:646155 [wandb_run.py:_atexit_cleanup():1593] got exitcode: 1
27
+ 2021-07-15 02:46:56,605 INFO MainThread:646155 [wandb_run.py:_restore():1565] restore
28
+ 2021-07-15 02:46:58,811 INFO MainThread:646155 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
29
+ wandb_count: 1
30
+ }
31
+ pusher_stats {
32
+ uploaded_bytes: 1375
33
+ total_bytes: 1375
34
+ }
35
+
36
+ 2021-07-15 02:46:59,056 INFO MainThread:646155 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
37
+ wandb_count: 1
38
+ }
39
+ pusher_stats {
40
+ uploaded_bytes: 1375
41
+ total_bytes: 1375
42
+ }
43
+
44
+ 2021-07-15 02:46:59,953 INFO MainThread:646155 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
45
+ wandb_count: 4
46
+ }
47
+ pusher_stats {
48
+ uploaded_bytes: 1375
49
+ total_bytes: 10904
50
+ }
51
+
52
+ 2021-07-15 02:47:00,055 INFO MainThread:646155 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
53
+ wandb_count: 5
54
+ }
55
+ pusher_stats {
56
+ uploaded_bytes: 1375
57
+ total_bytes: 10906
58
+ }
59
+
60
+ 2021-07-15 02:47:00,158 INFO MainThread:646155 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
61
+ wandb_count: 5
62
+ }
63
+ pusher_stats {
64
+ uploaded_bytes: 10906
65
+ total_bytes: 10906
66
+ }
67
+
68
+ 2021-07-15 02:47:00,260 INFO MainThread:646155 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
69
+ wandb_count: 5
70
+ }
71
+ pusher_stats {
72
+ uploaded_bytes: 10906
73
+ total_bytes: 10906
74
+ }
75
+
76
+ 2021-07-15 02:47:00,362 INFO MainThread:646155 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
77
+ wandb_count: 5
78
+ }
79
+ pusher_stats {
80
+ uploaded_bytes: 10906
81
+ total_bytes: 10906
82
+ }
83
+
84
+ 2021-07-15 02:47:00,465 INFO MainThread:646155 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
85
+ wandb_count: 5
86
+ }
87
+ pusher_stats {
88
+ uploaded_bytes: 10906
89
+ total_bytes: 10906
90
+ }
91
+
92
+ 2021-07-15 02:47:00,567 INFO MainThread:646155 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
93
+ wandb_count: 5
94
+ }
95
+ pusher_stats {
96
+ uploaded_bytes: 10906
97
+ total_bytes: 10906
98
+ }
99
+
100
+ 2021-07-15 02:47:00,920 INFO MainThread:646155 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
101
+ wandb_count: 5
102
+ }
103
+ pusher_stats {
104
+ uploaded_bytes: 10906
105
+ total_bytes: 10906
106
+ }
107
+
108
+ 2021-07-15 02:47:01,023 INFO MainThread:646155 [wandb_run.py:_wait_for_finish():1715] got exit ret: done: true
109
+ exit_result {
110
+ }
111
+ file_counts {
112
+ wandb_count: 5
113
+ }
114
+ pusher_stats {
115
+ uploaded_bytes: 10906
116
+ total_bytes: 10906
117
+ }
118
+
119
+ 2021-07-15 02:47:02,337 INFO MainThread:646155 [wandb_run.py:_show_files():1937] logging synced files
wandb/run-20210715_023352-28io0kfl/run-28io0kfl.wandb ADDED
Binary file (13 kB). View file
 
wandb/run-20210715_024816-39ztwpif/files/config.yaml ADDED
@@ -0,0 +1,304 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ wandb_version: 1
2
+
3
+ _wandb:
4
+ desc: null
5
+ value:
6
+ cli_version: 0.10.33
7
+ framework: huggingface
8
+ huggingface_version: 4.9.0.dev0
9
+ is_jupyter_run: false
10
+ is_kaggle_kernel: false
11
+ python_version: 3.8.10
12
+ t:
13
+ 1:
14
+ - 3
15
+ - 11
16
+ 2:
17
+ - 3
18
+ - 11
19
+ 4: 3.8.10
20
+ 5: 0.10.33
21
+ 6: 4.9.0.dev0
22
+ 8:
23
+ - 5
24
+ adafactor:
25
+ desc: null
26
+ value: false
27
+ adam_beta1:
28
+ desc: null
29
+ value: 0.9
30
+ adam_beta2:
31
+ desc: null
32
+ value: 0.98
33
+ adam_epsilon:
34
+ desc: null
35
+ value: 1.0e-08
36
+ cache_dir:
37
+ desc: null
38
+ value: null
39
+ config_name:
40
+ desc: null
41
+ value: ./
42
+ dataloader_drop_last:
43
+ desc: null
44
+ value: false
45
+ dataloader_num_workers:
46
+ desc: null
47
+ value: 0
48
+ dataloader_pin_memory:
49
+ desc: null
50
+ value: true
51
+ dataset_config_name:
52
+ desc: null
53
+ value: null
54
+ dataset_name:
55
+ desc: null
56
+ value: null
57
+ ddp_find_unused_parameters:
58
+ desc: null
59
+ value: null
60
+ debug:
61
+ desc: null
62
+ value: []
63
+ deepspeed:
64
+ desc: null
65
+ value: null
66
+ disable_tqdm:
67
+ desc: null
68
+ value: false
69
+ do_eval:
70
+ desc: null
71
+ value: false
72
+ do_predict:
73
+ desc: null
74
+ value: false
75
+ do_train:
76
+ desc: null
77
+ value: false
78
+ dtype:
79
+ desc: null
80
+ value: float32
81
+ eval_accumulation_steps:
82
+ desc: null
83
+ value: null
84
+ eval_steps:
85
+ desc: null
86
+ value: 20000
87
+ evaluation_strategy:
88
+ desc: null
89
+ value: IntervalStrategy.NO
90
+ fp16:
91
+ desc: null
92
+ value: false
93
+ fp16_backend:
94
+ desc: null
95
+ value: auto
96
+ fp16_full_eval:
97
+ desc: null
98
+ value: false
99
+ fp16_opt_level:
100
+ desc: null
101
+ value: O1
102
+ gradient_accumulation_steps:
103
+ desc: null
104
+ value: 1
105
+ greater_is_better:
106
+ desc: null
107
+ value: null
108
+ group_by_length:
109
+ desc: null
110
+ value: false
111
+ ignore_data_skip:
112
+ desc: null
113
+ value: false
114
+ label_names:
115
+ desc: null
116
+ value: null
117
+ label_smoothing_factor:
118
+ desc: null
119
+ value: 0.0
120
+ learning_rate:
121
+ desc: null
122
+ value: 3.0e-05
123
+ length_column_name:
124
+ desc: null
125
+ value: length
126
+ line_by_line:
127
+ desc: null
128
+ value: false
129
+ load_best_model_at_end:
130
+ desc: null
131
+ value: false
132
+ local_rank:
133
+ desc: null
134
+ value: -1
135
+ log_level:
136
+ desc: null
137
+ value: -1
138
+ log_level_replica:
139
+ desc: null
140
+ value: -1
141
+ log_on_each_node:
142
+ desc: null
143
+ value: true
144
+ logging_dir:
145
+ desc: null
146
+ value: ./runs/Jul15_02-48-08_t1v-n-f5c06ea1-w-0
147
+ logging_first_step:
148
+ desc: null
149
+ value: false
150
+ logging_steps:
151
+ desc: null
152
+ value: 50
153
+ logging_strategy:
154
+ desc: null
155
+ value: IntervalStrategy.STEPS
156
+ lr_scheduler_type:
157
+ desc: null
158
+ value: SchedulerType.LINEAR
159
+ max_eval_samples:
160
+ desc: null
161
+ value: 500
162
+ max_grad_norm:
163
+ desc: null
164
+ value: 1.0
165
+ max_seq_length:
166
+ desc: null
167
+ value: 4096
168
+ max_steps:
169
+ desc: null
170
+ value: -1
171
+ metric_for_best_model:
172
+ desc: null
173
+ value: null
174
+ mlm_probability:
175
+ desc: null
176
+ value: 0.15
177
+ model_name_or_path:
178
+ desc: null
179
+ value: null
180
+ model_type:
181
+ desc: null
182
+ value: big_bird
183
+ mp_parameters:
184
+ desc: null
185
+ value: ''
186
+ no_cuda:
187
+ desc: null
188
+ value: false
189
+ num_train_epochs:
190
+ desc: null
191
+ value: 5.0
192
+ output_dir:
193
+ desc: null
194
+ value: ./
195
+ overwrite_cache:
196
+ desc: null
197
+ value: false
198
+ overwrite_output_dir:
199
+ desc: null
200
+ value: true
201
+ pad_to_max_length:
202
+ desc: null
203
+ value: false
204
+ past_index:
205
+ desc: null
206
+ value: -1
207
+ per_device_eval_batch_size:
208
+ desc: null
209
+ value: 1
210
+ per_device_train_batch_size:
211
+ desc: null
212
+ value: 1
213
+ per_gpu_eval_batch_size:
214
+ desc: null
215
+ value: null
216
+ per_gpu_train_batch_size:
217
+ desc: null
218
+ value: null
219
+ prediction_loss_only:
220
+ desc: null
221
+ value: false
222
+ preprocessing_num_workers:
223
+ desc: null
224
+ value: 96
225
+ push_to_hub:
226
+ desc: null
227
+ value: true
228
+ push_to_hub_model_id:
229
+ desc: null
230
+ value: ''
231
+ push_to_hub_organization:
232
+ desc: null
233
+ value: null
234
+ push_to_hub_token:
235
+ desc: null
236
+ value: null
237
+ remove_unused_columns:
238
+ desc: null
239
+ value: true
240
+ report_to:
241
+ desc: null
242
+ value:
243
+ - tensorboard
244
+ - wandb
245
+ resume_from_checkpoint:
246
+ desc: null
247
+ value: null
248
+ run_name:
249
+ desc: null
250
+ value: ./
251
+ save_on_each_node:
252
+ desc: null
253
+ value: false
254
+ save_steps:
255
+ desc: null
256
+ value: 40000
257
+ save_strategy:
258
+ desc: null
259
+ value: IntervalStrategy.STEPS
260
+ save_total_limit:
261
+ desc: null
262
+ value: 5
263
+ seed:
264
+ desc: null
265
+ value: 42
266
+ sharded_ddp:
267
+ desc: null
268
+ value: []
269
+ skip_memory_metrics:
270
+ desc: null
271
+ value: true
272
+ tokenizer_name:
273
+ desc: null
274
+ value: ./
275
+ tpu_metrics_debug:
276
+ desc: null
277
+ value: false
278
+ tpu_num_cores:
279
+ desc: null
280
+ value: null
281
+ train_ref_file:
282
+ desc: null
283
+ value: null
284
+ use_fast_tokenizer:
285
+ desc: null
286
+ value: true
287
+ use_legacy_prediction_loop:
288
+ desc: null
289
+ value: false
290
+ validation_ref_file:
291
+ desc: null
292
+ value: null
293
+ validation_split_percentage:
294
+ desc: null
295
+ value: 5
296
+ warmup_ratio:
297
+ desc: null
298
+ value: 0.0
299
+ warmup_steps:
300
+ desc: null
301
+ value: 10000
302
+ weight_decay:
303
+ desc: null
304
+ value: 0.0095
wandb/run-20210715_024816-39ztwpif/files/output.log ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /home/dat/pino/lib/python3.8/site-packages/jax/lib/xla_bridge.py:386: UserWarning: jax.host_count has been renamed to jax.process_count. This alias will eventually be removed; please update your code.
2
+ warnings.warn(
3
+ /home/dat/pino/lib/python3.8/site-packages/jax/lib/xla_bridge.py:373: UserWarning: jax.host_id has been renamed to jax.process_index. This alias will eventually be removed; please update your code.
4
+ warnings.warn(
5
+ Epoch ... (1/5): 0%| | 0/5 [00:00<?, ?it/s]
6
+ Training...: 0%| | 0/503952 [02:05<?, ?it/s]
7
+ Epoch ... (1/5): 0%| | 0/5 [09:54<?, ?it/s]
8
+ Traceback (most recent call last):
9
+ File "./run_mlm_flax_no_accum.py", line 699, in <module>
10
+ state, train_metric, dropout_rngs = p_train_step(state, model_inputs, dropout_rngs)
11
+ File "/home/dat/pino/lib/python3.8/site-packages/jax/_src/traceback_util.py", line 183, in reraise_with_filtered_traceback
12
+ return fun(*args, **kwargs)
13
+ File "/home/dat/pino/lib/python3.8/site-packages/jax/_src/api.py", line 1669, in f_pmapped
14
+ out = pxla.xla_pmap(
15
+ File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 1620, in bind
16
+ return call_bind(self, fun, *args, **params)
17
+ File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 1551, in call_bind
18
+ outs = primitive.process(top_trace, fun, tracers, params)
19
+ File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 1623, in process
20
+ return trace.process_map(self, fun, tracers, params)
21
+ File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 606, in process_call
22
+ return primitive.impl(f, *tracers, **params)
23
+ File "/home/dat/pino/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 637, in xla_pmap_impl
24
+ return compiled_fun(*args)
25
+ File "/home/dat/pino/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 1159, in execute_replicated
26
+ out_bufs = compiled.execute_sharded_on_local_devices(input_bufs)
27
+ jax._src.traceback_util.UnfilteredStackTrace: RuntimeError: Resource exhausted: Attempting to reserve 7.59G at the bottom of memory. That was not possible. There are 6.35G free, 0B reserved, and 6.31G reservable.: while running replica 0 and partition 0 of a replicated computation (other replicas may have failed as well).
28
+ The stack trace below excludes JAX-internal frames.
29
+ The preceding is the original exception that occurred, unmodified.
30
+ --------------------
31
+ The above exception was the direct cause of the following exception:
32
+ Traceback (most recent call last):
33
+ File "./run_mlm_flax_no_accum.py", line 699, in <module>
34
+ state, train_metric, dropout_rngs = p_train_step(state, model_inputs, dropout_rngs)
35
+ File "/home/dat/pino/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 1159, in execute_replicated
36
+ out_bufs = compiled.execute_sharded_on_local_devices(input_bufs)
37
+ RuntimeError: Resource exhausted: Attempting to reserve 7.59G at the bottom of memory. That was not possible. There are 6.35G free, 0B reserved, and 6.31G reservable.: while running replica 0 and partition 0 of a replicated computation (other replicas may have failed as well).
wandb/run-20210715_024816-39ztwpif/files/requirements.txt ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ absl-py==0.13.0
2
+ aiohttp==3.7.4.post0
3
+ astunparse==1.6.3
4
+ async-timeout==3.0.1
5
+ attrs==21.2.0
6
+ cachetools==4.2.2
7
+ certifi==2021.5.30
8
+ chardet==4.0.0
9
+ charset-normalizer==2.0.1
10
+ chex==0.0.8
11
+ click==8.0.1
12
+ configparser==5.0.2
13
+ cycler==0.10.0
14
+ datasets==1.9.1.dev0
15
+ dill==0.3.4
16
+ dm-tree==0.1.6
17
+ docker-pycreds==0.4.0
18
+ filelock==3.0.12
19
+ flatbuffers==1.12
20
+ flax==0.3.4
21
+ fsspec==2021.7.0
22
+ gast==0.4.0
23
+ gitdb==4.0.7
24
+ gitpython==3.1.18
25
+ google-auth-oauthlib==0.4.4
26
+ google-auth==1.32.1
27
+ google-pasta==0.2.0
28
+ grpcio==1.34.1
29
+ h5py==3.1.0
30
+ huggingface-hub==0.0.12
31
+ idna==3.2
32
+ install==1.3.4
33
+ jax==0.2.17
34
+ jaxlib==0.1.68
35
+ joblib==1.0.1
36
+ keras-nightly==2.5.0.dev2021032900
37
+ keras-preprocessing==1.1.2
38
+ kiwisolver==1.3.1
39
+ libtpu-nightly==0.1.dev20210615
40
+ markdown==3.3.4
41
+ matplotlib==3.4.2
42
+ msgpack==1.0.2
43
+ multidict==5.1.0
44
+ multiprocess==0.70.12.2
45
+ numpy==1.19.5
46
+ oauthlib==3.1.1
47
+ opt-einsum==3.3.0
48
+ optax==0.0.9
49
+ packaging==21.0
50
+ pandas==1.3.0
51
+ pathtools==0.1.2
52
+ pillow==8.3.1
53
+ pip==20.0.2
54
+ pkg-resources==0.0.0
55
+ promise==2.3
56
+ protobuf==3.17.3
57
+ psutil==5.8.0
58
+ pyarrow==4.0.1
59
+ pyasn1-modules==0.2.8
60
+ pyasn1==0.4.8
61
+ pyparsing==2.4.7
62
+ python-dateutil==2.8.1
63
+ pytz==2021.1
64
+ pyyaml==5.4.1
65
+ regex==2021.7.6
66
+ requests-oauthlib==1.3.0
67
+ requests==2.26.0
68
+ rsa==4.7.2
69
+ sacremoses==0.0.45
70
+ scipy==1.7.0
71
+ sentry-sdk==1.3.0
72
+ setuptools==44.0.0
73
+ shortuuid==1.0.1
74
+ six==1.15.0
75
+ smmap==4.0.0
76
+ subprocess32==3.5.4
77
+ tensorboard-data-server==0.6.1
78
+ tensorboard-plugin-wit==1.8.0
79
+ tensorboard==2.5.0
80
+ tensorflow-estimator==2.5.0
81
+ tensorflow==2.5.0
82
+ termcolor==1.1.0
83
+ tokenizers==0.10.3
84
+ toolz==0.11.1
85
+ tqdm==4.61.2
86
+ transformers==4.9.0.dev0
87
+ typing-extensions==3.7.4.3
88
+ urllib3==1.26.6
89
+ wandb==0.10.33
90
+ werkzeug==2.0.1
91
+ wheel==0.36.2
92
+ wrapt==1.12.1
93
+ xxhash==2.0.2
94
+ yarl==1.6.3
wandb/run-20210715_024816-39ztwpif/files/wandb-metadata.json ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-5.4.0-1043-gcp-x86_64-with-glibc2.29",
3
+ "python": "3.8.10",
4
+ "heartbeatAt": "2021-07-15T02:48:18.837710",
5
+ "startedAt": "2021-07-15T02:48:16.824799",
6
+ "docker": null,
7
+ "cpu_count": 96,
8
+ "cuda": null,
9
+ "args": [
10
+ "--push_to_hub",
11
+ "--output_dir=./",
12
+ "--model_type=big_bird",
13
+ "--config_name=./",
14
+ "--tokenizer_name=./",
15
+ "--max_seq_length=4096",
16
+ "--weight_decay=0.0095",
17
+ "--warmup_steps=10000",
18
+ "--overwrite_output_dir",
19
+ "--adam_beta1=0.9",
20
+ "--adam_beta2=0.98",
21
+ "--logging_steps=50",
22
+ "--eval_steps=20000",
23
+ "--num_train_epochs=5",
24
+ "--preprocessing_num_workers=96",
25
+ "--save_steps=40000",
26
+ "--learning_rate=3e-5",
27
+ "--per_device_train_batch_size=1",
28
+ "--per_device_eval_batch_size=1",
29
+ "--save_total_limit=5",
30
+ "--max_eval_samples=500"
31
+ ],
32
+ "state": "running",
33
+ "program": "./run_mlm_flax_no_accum.py",
34
+ "codePath": "run_mlm_flax_no_accum.py",
35
+ "git": {
36
+ "remote": "https://huggingface.co/flax-community/pino-roberta-base",
37
+ "commit": "87e02e7ff8fbaea90c8c4ad1c984f83742432303"
38
+ },
39
+ "email": null,
40
+ "root": "/home/dat/pino-roberta-base",
41
+ "host": "t1v-n-f5c06ea1-w-0",
42
+ "username": "dat",
43
+ "executable": "/home/dat/pino/bin/python"
44
+ }
wandb/run-20210715_024816-39ztwpif/files/wandb-summary.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {}
wandb/run-20210715_024816-39ztwpif/logs/debug-internal.log ADDED
@@ -0,0 +1,240 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2021-07-15 02:48:17,521 INFO MainThread:649905 [internal.py:wandb_internal():88] W&B internal server running at pid: 649905, started at: 2021-07-15 02:48:17.521263
2
+ 2021-07-15 02:48:17,523 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: check_version
3
+ 2021-07-15 02:48:17,524 INFO WriterThread:649905 [datastore.py:open_for_write():80] open: /home/dat/pino-roberta-base/wandb/run-20210715_024816-39ztwpif/run-39ztwpif.wandb
4
+ 2021-07-15 02:48:17,524 DEBUG SenderThread:649905 [sender.py:send():179] send: header
5
+ 2021-07-15 02:48:17,525 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: check_version
6
+ 2021-07-15 02:48:17,561 DEBUG SenderThread:649905 [sender.py:send():179] send: run
7
+ 2021-07-15 02:48:17,732 INFO SenderThread:649905 [dir_watcher.py:__init__():168] watching files in: /home/dat/pino-roberta-base/wandb/run-20210715_024816-39ztwpif/files
8
+ 2021-07-15 02:48:17,732 INFO SenderThread:649905 [sender.py:_start_run_threads():716] run started: 39ztwpif with start time 1626317296
9
+ 2021-07-15 02:48:17,732 DEBUG SenderThread:649905 [sender.py:send():179] send: summary
10
+ 2021-07-15 02:48:17,732 INFO SenderThread:649905 [sender.py:_save_file():841] saving file wandb-summary.json with policy end
11
+ 2021-07-15 02:48:17,733 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: run_start
12
+ 2021-07-15 02:48:18,733 INFO Thread-8 :649905 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210715_024816-39ztwpif/files/wandb-summary.json
13
+ 2021-07-15 02:48:18,837 DEBUG HandlerThread:649905 [meta.py:__init__():39] meta init
14
+ 2021-07-15 02:48:18,837 DEBUG HandlerThread:649905 [meta.py:__init__():53] meta init done
15
+ 2021-07-15 02:48:18,837 DEBUG HandlerThread:649905 [meta.py:probe():210] probe
16
+ 2021-07-15 02:48:18,838 DEBUG HandlerThread:649905 [meta.py:_setup_git():200] setup git
17
+ 2021-07-15 02:48:18,868 DEBUG HandlerThread:649905 [meta.py:_setup_git():207] setup git done
18
+ 2021-07-15 02:48:18,868 DEBUG HandlerThread:649905 [meta.py:_save_pip():57] save pip
19
+ 2021-07-15 02:48:18,868 DEBUG HandlerThread:649905 [meta.py:_save_pip():71] save pip done
20
+ 2021-07-15 02:48:18,868 DEBUG HandlerThread:649905 [meta.py:probe():252] probe done
21
+ 2021-07-15 02:48:18,871 DEBUG SenderThread:649905 [sender.py:send():179] send: files
22
+ 2021-07-15 02:48:18,872 INFO SenderThread:649905 [sender.py:_save_file():841] saving file wandb-metadata.json with policy now
23
+ 2021-07-15 02:48:18,877 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: stop_status
24
+ 2021-07-15 02:48:18,878 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: stop_status
25
+ 2021-07-15 02:48:19,006 DEBUG SenderThread:649905 [sender.py:send():179] send: config
26
+ 2021-07-15 02:48:19,006 DEBUG SenderThread:649905 [sender.py:send():179] send: config
27
+ 2021-07-15 02:48:19,006 DEBUG SenderThread:649905 [sender.py:send():179] send: config
28
+ 2021-07-15 02:48:19,311 INFO Thread-11 :649905 [upload_job.py:push():137] Uploaded file /tmp/tmpgcnix6scwandb/15nx6xdi-wandb-metadata.json
29
+ 2021-07-15 02:48:19,733 INFO Thread-8 :649905 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210715_024816-39ztwpif/files/requirements.txt
30
+ 2021-07-15 02:48:19,733 INFO Thread-8 :649905 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210715_024816-39ztwpif/files/wandb-metadata.json
31
+ 2021-07-15 02:48:19,733 INFO Thread-8 :649905 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210715_024816-39ztwpif/files/output.log
32
+ 2021-07-15 02:48:33,738 INFO Thread-8 :649905 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_024816-39ztwpif/files/output.log
33
+ 2021-07-15 02:48:34,008 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: stop_status
34
+ 2021-07-15 02:48:34,009 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: stop_status
35
+ 2021-07-15 02:48:46,922 DEBUG SenderThread:649905 [sender.py:send():179] send: stats
36
+ 2021-07-15 02:48:48,744 INFO Thread-8 :649905 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_024816-39ztwpif/files/config.yaml
37
+ 2021-07-15 02:48:49,147 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: stop_status
38
+ 2021-07-15 02:48:49,147 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: stop_status
39
+ 2021-07-15 02:49:04,279 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: stop_status
40
+ 2021-07-15 02:49:04,280 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: stop_status
41
+ 2021-07-15 02:49:17,003 DEBUG SenderThread:649905 [sender.py:send():179] send: stats
42
+ 2021-07-15 02:49:19,412 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: stop_status
43
+ 2021-07-15 02:49:19,412 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: stop_status
44
+ 2021-07-15 02:49:34,543 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: stop_status
45
+ 2021-07-15 02:49:34,543 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: stop_status
46
+ 2021-07-15 02:49:47,079 DEBUG SenderThread:649905 [sender.py:send():179] send: stats
47
+ 2021-07-15 02:49:49,677 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: stop_status
48
+ 2021-07-15 02:49:49,677 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: stop_status
49
+ 2021-07-15 02:50:04,809 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: stop_status
50
+ 2021-07-15 02:50:04,809 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: stop_status
51
+ 2021-07-15 02:50:17,143 DEBUG SenderThread:649905 [sender.py:send():179] send: stats
52
+ 2021-07-15 02:50:19,943 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: stop_status
53
+ 2021-07-15 02:50:19,943 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: stop_status
54
+ 2021-07-15 02:50:35,077 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: stop_status
55
+ 2021-07-15 02:50:35,078 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: stop_status
56
+ 2021-07-15 02:50:47,219 DEBUG SenderThread:649905 [sender.py:send():179] send: stats
57
+ 2021-07-15 02:50:50,223 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: stop_status
58
+ 2021-07-15 02:50:50,223 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: stop_status
59
+ 2021-07-15 02:51:05,389 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: stop_status
60
+ 2021-07-15 02:51:05,389 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: stop_status
61
+ 2021-07-15 02:51:17,291 DEBUG SenderThread:649905 [sender.py:send():179] send: stats
62
+ 2021-07-15 02:51:20,521 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: stop_status
63
+ 2021-07-15 02:51:20,521 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: stop_status
64
+ 2021-07-15 02:51:35,655 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: stop_status
65
+ 2021-07-15 02:51:35,655 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: stop_status
66
+ 2021-07-15 02:51:47,368 DEBUG SenderThread:649905 [sender.py:send():179] send: stats
67
+ 2021-07-15 02:51:50,786 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: stop_status
68
+ 2021-07-15 02:51:50,786 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: stop_status
69
+ 2021-07-15 02:52:05,917 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: stop_status
70
+ 2021-07-15 02:52:05,917 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: stop_status
71
+ 2021-07-15 02:52:17,445 DEBUG SenderThread:649905 [sender.py:send():179] send: stats
72
+ 2021-07-15 02:52:21,058 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: stop_status
73
+ 2021-07-15 02:52:21,058 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: stop_status
74
+ 2021-07-15 02:52:36,188 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: stop_status
75
+ 2021-07-15 02:52:36,189 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: stop_status
76
+ 2021-07-15 02:52:47,519 DEBUG SenderThread:649905 [sender.py:send():179] send: stats
77
+ 2021-07-15 02:52:51,318 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: stop_status
78
+ 2021-07-15 02:52:51,318 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: stop_status
79
+ 2021-07-15 02:53:06,454 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: stop_status
80
+ 2021-07-15 02:53:06,454 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: stop_status
81
+ 2021-07-15 02:53:17,587 DEBUG SenderThread:649905 [sender.py:send():179] send: stats
82
+ 2021-07-15 02:53:21,586 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: stop_status
83
+ 2021-07-15 02:53:21,586 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: stop_status
84
+ 2021-07-15 02:53:36,717 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: stop_status
85
+ 2021-07-15 02:53:36,718 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: stop_status
86
+ 2021-07-15 02:53:47,654 DEBUG SenderThread:649905 [sender.py:send():179] send: stats
87
+ 2021-07-15 02:53:51,851 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: stop_status
88
+ 2021-07-15 02:53:51,851 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: stop_status
89
+ 2021-07-15 02:54:06,983 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: stop_status
90
+ 2021-07-15 02:54:06,983 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: stop_status
91
+ 2021-07-15 02:54:17,727 DEBUG SenderThread:649905 [sender.py:send():179] send: stats
92
+ 2021-07-15 02:54:22,115 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: stop_status
93
+ 2021-07-15 02:54:22,115 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: stop_status
94
+ 2021-07-15 02:54:37,245 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: stop_status
95
+ 2021-07-15 02:54:37,246 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: stop_status
96
+ 2021-07-15 02:54:47,796 DEBUG SenderThread:649905 [sender.py:send():179] send: stats
97
+ 2021-07-15 02:54:52,379 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: stop_status
98
+ 2021-07-15 02:54:52,379 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: stop_status
99
+ 2021-07-15 02:55:07,511 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: stop_status
100
+ 2021-07-15 02:55:07,511 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: stop_status
101
+ 2021-07-15 02:55:17,864 DEBUG SenderThread:649905 [sender.py:send():179] send: stats
102
+ 2021-07-15 02:55:22,641 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: stop_status
103
+ 2021-07-15 02:55:22,641 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: stop_status
104
+ 2021-07-15 02:55:37,785 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: stop_status
105
+ 2021-07-15 02:55:37,786 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: stop_status
106
+ 2021-07-15 02:55:47,933 DEBUG SenderThread:649905 [sender.py:send():179] send: stats
107
+ 2021-07-15 02:55:52,928 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: stop_status
108
+ 2021-07-15 02:55:52,929 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: stop_status
109
+ 2021-07-15 02:56:08,060 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: stop_status
110
+ 2021-07-15 02:56:08,060 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: stop_status
111
+ 2021-07-15 02:56:18,007 DEBUG SenderThread:649905 [sender.py:send():179] send: stats
112
+ 2021-07-15 02:56:23,209 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: stop_status
113
+ 2021-07-15 02:56:23,210 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: stop_status
114
+ 2021-07-15 02:56:23,919 INFO Thread-8 :649905 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_024816-39ztwpif/files/output.log
115
+ 2021-07-15 02:56:38,372 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: stop_status
116
+ 2021-07-15 02:56:38,372 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: stop_status
117
+ 2021-07-15 02:56:48,082 DEBUG SenderThread:649905 [sender.py:send():179] send: stats
118
+ 2021-07-15 02:56:53,514 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: stop_status
119
+ 2021-07-15 02:56:53,514 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: stop_status
120
+ 2021-07-15 02:57:08,654 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: stop_status
121
+ 2021-07-15 02:57:08,655 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: stop_status
122
+ 2021-07-15 02:57:18,162 DEBUG SenderThread:649905 [sender.py:send():179] send: stats
123
+ 2021-07-15 02:57:23,787 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: stop_status
124
+ 2021-07-15 02:57:23,787 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: stop_status
125
+ 2021-07-15 02:57:38,920 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: stop_status
126
+ 2021-07-15 02:57:38,920 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: stop_status
127
+ 2021-07-15 02:57:48,241 DEBUG SenderThread:649905 [sender.py:send():179] send: stats
128
+ 2021-07-15 02:57:54,061 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: stop_status
129
+ 2021-07-15 02:57:54,061 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: stop_status
130
+ 2021-07-15 02:58:09,194 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: stop_status
131
+ 2021-07-15 02:58:09,195 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: stop_status
132
+ 2021-07-15 02:58:18,311 DEBUG SenderThread:649905 [sender.py:send():179] send: stats
133
+ 2021-07-15 02:58:24,331 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: stop_status
134
+ 2021-07-15 02:58:24,331 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: stop_status
135
+ 2021-07-15 02:58:27,972 INFO Thread-8 :649905 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_024816-39ztwpif/files/output.log
136
+ 2021-07-15 02:58:29,408 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: poll_exit
137
+ 2021-07-15 02:58:29,409 DEBUG SenderThread:649905 [sender.py:send():179] send: telemetry
138
+ 2021-07-15 02:58:29,409 DEBUG SenderThread:649905 [sender.py:send():179] send: exit
139
+ 2021-07-15 02:58:29,409 INFO SenderThread:649905 [sender.py:send_exit():287] handling exit code: 1
140
+ 2021-07-15 02:58:29,411 INFO SenderThread:649905 [sender.py:send_exit():295] send defer
141
+ 2021-07-15 02:58:29,411 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: poll_exit
142
+ 2021-07-15 02:58:29,412 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: defer
143
+ 2021-07-15 02:58:29,412 INFO HandlerThread:649905 [handler.py:handle_request_defer():141] handle defer: 0
144
+ 2021-07-15 02:58:29,412 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: defer
145
+ 2021-07-15 02:58:29,412 INFO SenderThread:649905 [sender.py:send_request_defer():304] handle sender defer: 0
146
+ 2021-07-15 02:58:29,412 INFO SenderThread:649905 [sender.py:transition_state():308] send defer: 1
147
+ 2021-07-15 02:58:29,412 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: defer
148
+ 2021-07-15 02:58:29,413 INFO HandlerThread:649905 [handler.py:handle_request_defer():141] handle defer: 1
149
+ 2021-07-15 02:58:29,440 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: defer
150
+ 2021-07-15 02:58:29,440 INFO SenderThread:649905 [sender.py:send_request_defer():304] handle sender defer: 1
151
+ 2021-07-15 02:58:29,440 INFO SenderThread:649905 [sender.py:transition_state():308] send defer: 2
152
+ 2021-07-15 02:58:29,440 DEBUG SenderThread:649905 [sender.py:send():179] send: stats
153
+ 2021-07-15 02:58:29,441 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: defer
154
+ 2021-07-15 02:58:29,441 INFO HandlerThread:649905 [handler.py:handle_request_defer():141] handle defer: 2
155
+ 2021-07-15 02:58:29,441 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: defer
156
+ 2021-07-15 02:58:29,441 INFO SenderThread:649905 [sender.py:send_request_defer():304] handle sender defer: 2
157
+ 2021-07-15 02:58:29,441 INFO SenderThread:649905 [sender.py:transition_state():308] send defer: 3
158
+ 2021-07-15 02:58:29,442 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: defer
159
+ 2021-07-15 02:58:29,442 INFO HandlerThread:649905 [handler.py:handle_request_defer():141] handle defer: 3
160
+ 2021-07-15 02:58:29,442 DEBUG SenderThread:649905 [sender.py:send():179] send: summary
161
+ 2021-07-15 02:58:29,442 INFO SenderThread:649905 [sender.py:_save_file():841] saving file wandb-summary.json with policy end
162
+ 2021-07-15 02:58:29,443 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: defer
163
+ 2021-07-15 02:58:29,443 INFO SenderThread:649905 [sender.py:send_request_defer():304] handle sender defer: 3
164
+ 2021-07-15 02:58:29,443 INFO SenderThread:649905 [sender.py:transition_state():308] send defer: 4
165
+ 2021-07-15 02:58:29,443 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: defer
166
+ 2021-07-15 02:58:29,443 INFO HandlerThread:649905 [handler.py:handle_request_defer():141] handle defer: 4
167
+ 2021-07-15 02:58:29,443 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: defer
168
+ 2021-07-15 02:58:29,443 INFO SenderThread:649905 [sender.py:send_request_defer():304] handle sender defer: 4
169
+ 2021-07-15 02:58:29,513 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: poll_exit
170
+ 2021-07-15 02:58:29,619 INFO SenderThread:649905 [sender.py:transition_state():308] send defer: 5
171
+ 2021-07-15 02:58:29,619 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: poll_exit
172
+ 2021-07-15 02:58:29,620 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: defer
173
+ 2021-07-15 02:58:29,620 INFO HandlerThread:649905 [handler.py:handle_request_defer():141] handle defer: 5
174
+ 2021-07-15 02:58:29,620 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: defer
175
+ 2021-07-15 02:58:29,620 INFO SenderThread:649905 [sender.py:send_request_defer():304] handle sender defer: 5
176
+ 2021-07-15 02:58:29,620 INFO SenderThread:649905 [dir_watcher.py:finish():282] shutting down directory watcher
177
+ 2021-07-15 02:58:29,722 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: poll_exit
178
+ 2021-07-15 02:58:29,974 INFO SenderThread:649905 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_024816-39ztwpif/files/wandb-summary.json
179
+ 2021-07-15 02:58:29,974 INFO SenderThread:649905 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_024816-39ztwpif/files/output.log
180
+ 2021-07-15 02:58:29,974 INFO SenderThread:649905 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_024816-39ztwpif/files/config.yaml
181
+ 2021-07-15 02:58:29,974 INFO SenderThread:649905 [dir_watcher.py:finish():312] scan: /home/dat/pino-roberta-base/wandb/run-20210715_024816-39ztwpif/files
182
+ 2021-07-15 02:58:29,974 INFO SenderThread:649905 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_024816-39ztwpif/files/requirements.txt requirements.txt
183
+ 2021-07-15 02:58:29,975 INFO SenderThread:649905 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_024816-39ztwpif/files/output.log output.log
184
+ 2021-07-15 02:58:29,975 INFO SenderThread:649905 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_024816-39ztwpif/files/wandb-metadata.json wandb-metadata.json
185
+ 2021-07-15 02:58:29,975 INFO SenderThread:649905 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_024816-39ztwpif/files/config.yaml config.yaml
186
+ 2021-07-15 02:58:29,975 INFO SenderThread:649905 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_024816-39ztwpif/files/wandb-summary.json wandb-summary.json
187
+ 2021-07-15 02:58:29,976 INFO SenderThread:649905 [sender.py:transition_state():308] send defer: 6
188
+ 2021-07-15 02:58:29,976 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: poll_exit
189
+ 2021-07-15 02:58:29,977 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: defer
190
+ 2021-07-15 02:58:29,983 INFO HandlerThread:649905 [handler.py:handle_request_defer():141] handle defer: 6
191
+ 2021-07-15 02:58:29,986 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: defer
192
+ 2021-07-15 02:58:29,989 INFO SenderThread:649905 [sender.py:send_request_defer():304] handle sender defer: 6
193
+ 2021-07-15 02:58:29,989 INFO SenderThread:649905 [file_pusher.py:finish():177] shutting down file pusher
194
+ 2021-07-15 02:58:30,078 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: poll_exit
195
+ 2021-07-15 02:58:30,078 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: poll_exit
196
+ 2021-07-15 02:58:30,181 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: poll_exit
197
+ 2021-07-15 02:58:30,181 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: poll_exit
198
+ 2021-07-15 02:58:30,283 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: poll_exit
199
+ 2021-07-15 02:58:30,283 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: poll_exit
200
+ 2021-07-15 02:58:30,385 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: poll_exit
201
+ 2021-07-15 02:58:30,385 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: poll_exit
202
+ 2021-07-15 02:58:30,418 INFO Thread-13 :649905 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210715_024816-39ztwpif/files/output.log
203
+ 2021-07-15 02:58:30,421 INFO Thread-14 :649905 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210715_024816-39ztwpif/files/config.yaml
204
+ 2021-07-15 02:58:30,423 INFO Thread-12 :649905 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210715_024816-39ztwpif/files/requirements.txt
205
+ 2021-07-15 02:58:30,451 INFO Thread-15 :649905 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210715_024816-39ztwpif/files/wandb-summary.json
206
+ 2021-07-15 02:58:30,487 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: poll_exit
207
+ 2021-07-15 02:58:30,487 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: poll_exit
208
+ 2021-07-15 02:58:30,589 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: poll_exit
209
+ 2021-07-15 02:58:30,589 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: poll_exit
210
+ 2021-07-15 02:58:30,652 INFO Thread-7 :649905 [sender.py:transition_state():308] send defer: 7
211
+ 2021-07-15 02:58:30,652 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: defer
212
+ 2021-07-15 02:58:30,653 INFO HandlerThread:649905 [handler.py:handle_request_defer():141] handle defer: 7
213
+ 2021-07-15 02:58:30,653 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: defer
214
+ 2021-07-15 02:58:30,653 INFO SenderThread:649905 [sender.py:send_request_defer():304] handle sender defer: 7
215
+ 2021-07-15 02:58:30,691 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: poll_exit
216
+ 2021-07-15 02:58:30,941 INFO SenderThread:649905 [sender.py:transition_state():308] send defer: 8
217
+ 2021-07-15 02:58:30,941 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: poll_exit
218
+ 2021-07-15 02:58:30,941 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: defer
219
+ 2021-07-15 02:58:30,941 INFO HandlerThread:649905 [handler.py:handle_request_defer():141] handle defer: 8
220
+ 2021-07-15 02:58:30,942 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: defer
221
+ 2021-07-15 02:58:30,942 INFO SenderThread:649905 [sender.py:send_request_defer():304] handle sender defer: 8
222
+ 2021-07-15 02:58:30,942 INFO SenderThread:649905 [sender.py:transition_state():308] send defer: 9
223
+ 2021-07-15 02:58:30,942 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: defer
224
+ 2021-07-15 02:58:30,942 INFO HandlerThread:649905 [handler.py:handle_request_defer():141] handle defer: 9
225
+ 2021-07-15 02:58:30,942 DEBUG SenderThread:649905 [sender.py:send():179] send: final
226
+ 2021-07-15 02:58:30,943 DEBUG SenderThread:649905 [sender.py:send():179] send: footer
227
+ 2021-07-15 02:58:30,943 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: defer
228
+ 2021-07-15 02:58:30,943 INFO SenderThread:649905 [sender.py:send_request_defer():304] handle sender defer: 9
229
+ 2021-07-15 02:58:31,043 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: poll_exit
230
+ 2021-07-15 02:58:31,043 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: poll_exit
231
+ 2021-07-15 02:58:31,043 INFO SenderThread:649905 [file_pusher.py:join():182] waiting for file pusher
232
+ 2021-07-15 02:58:31,045 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: get_summary
233
+ 2021-07-15 02:58:31,046 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: sampled_history
234
+ 2021-07-15 02:58:31,046 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: shutdown
235
+ 2021-07-15 02:58:31,046 INFO HandlerThread:649905 [handler.py:finish():638] shutting down handler
236
+ 2021-07-15 02:58:31,943 INFO WriterThread:649905 [datastore.py:close():288] close: /home/dat/pino-roberta-base/wandb/run-20210715_024816-39ztwpif/run-39ztwpif.wandb
237
+ 2021-07-15 02:58:32,044 INFO SenderThread:649905 [sender.py:finish():945] shutting down sender
238
+ 2021-07-15 02:58:32,044 INFO SenderThread:649905 [file_pusher.py:finish():177] shutting down file pusher
239
+ 2021-07-15 02:58:32,044 INFO SenderThread:649905 [file_pusher.py:join():182] waiting for file pusher
240
+ 2021-07-15 02:58:32,047 INFO MainThread:649905 [internal.py:handle_exit():78] Internal process exited
wandb/run-20210715_024816-39ztwpif/logs/debug.log ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2021-07-15 02:48:16,826 INFO MainThread:648648 [wandb_setup.py:_flush():69] setting env: {}
2
+ 2021-07-15 02:48:16,826 INFO MainThread:648648 [wandb_setup.py:_flush():69] setting login settings: {}
3
+ 2021-07-15 02:48:16,826 INFO MainThread:648648 [wandb_init.py:_log_setup():337] Logging user logs to /home/dat/pino-roberta-base/wandb/run-20210715_024816-39ztwpif/logs/debug.log
4
+ 2021-07-15 02:48:16,826 INFO MainThread:648648 [wandb_init.py:_log_setup():338] Logging internal logs to /home/dat/pino-roberta-base/wandb/run-20210715_024816-39ztwpif/logs/debug-internal.log
5
+ 2021-07-15 02:48:16,826 INFO MainThread:648648 [wandb_init.py:init():370] calling init triggers
6
+ 2021-07-15 02:48:16,826 INFO MainThread:648648 [wandb_init.py:init():375] wandb.init called with sweep_config: {}
7
+ config: {}
8
+ 2021-07-15 02:48:16,826 INFO MainThread:648648 [wandb_init.py:init():419] starting backend
9
+ 2021-07-15 02:48:16,827 INFO MainThread:648648 [backend.py:_multiprocessing_setup():70] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
10
+ 2021-07-15 02:48:16,875 INFO MainThread:648648 [backend.py:ensure_launched():135] starting backend process...
11
+ 2021-07-15 02:48:16,922 INFO MainThread:648648 [backend.py:ensure_launched():139] started backend process with pid: 649905
12
+ 2021-07-15 02:48:16,924 INFO MainThread:648648 [wandb_init.py:init():424] backend started and connected
13
+ 2021-07-15 02:48:16,927 INFO MainThread:648648 [wandb_init.py:init():472] updated telemetry
14
+ 2021-07-15 02:48:16,928 INFO MainThread:648648 [wandb_init.py:init():491] communicating current version
15
+ 2021-07-15 02:48:17,560 INFO MainThread:648648 [wandb_init.py:init():496] got version response
16
+ 2021-07-15 02:48:17,560 INFO MainThread:648648 [wandb_init.py:init():504] communicating run to backend with 30 second timeout
17
+ 2021-07-15 02:48:17,732 INFO MainThread:648648 [wandb_init.py:init():529] starting run threads in backend
18
+ 2021-07-15 02:48:18,876 INFO MainThread:648648 [wandb_run.py:_console_start():1623] atexit reg
19
+ 2021-07-15 02:48:18,877 INFO MainThread:648648 [wandb_run.py:_redirect():1497] redirect: SettingsConsole.REDIRECT
20
+ 2021-07-15 02:48:18,877 INFO MainThread:648648 [wandb_run.py:_redirect():1502] Redirecting console.
21
+ 2021-07-15 02:48:18,879 INFO MainThread:648648 [wandb_run.py:_redirect():1558] Redirects installed.
22
+ 2021-07-15 02:48:18,879 INFO MainThread:648648 [wandb_init.py:init():554] run started, returning control to user process
23
+ 2021-07-15 02:48:18,886 INFO MainThread:648648 [wandb_run.py:_config_callback():872] config_cb None None {'output_dir': './', 'overwrite_output_dir': True, 'do_train': False, 'do_eval': False, 'do_predict': False, 'evaluation_strategy': 'IntervalStrategy.NO', 'prediction_loss_only': False, 'per_device_train_batch_size': 1, 'per_device_eval_batch_size': 1, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'learning_rate': 3e-05, 'weight_decay': 0.0095, 'adam_beta1': 0.9, 'adam_beta2': 0.98, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 5.0, 'max_steps': -1, 'lr_scheduler_type': 'SchedulerType.LINEAR', 'warmup_ratio': 0.0, 'warmup_steps': 10000, 'log_level': -1, 'log_level_replica': -1, 'log_on_each_node': True, 'logging_dir': './runs/Jul15_02-48-08_t1v-n-f5c06ea1-w-0', 'logging_strategy': 'IntervalStrategy.STEPS', 'logging_first_step': False, 'logging_steps': 50, 'save_strategy': 'IntervalStrategy.STEPS', 'save_steps': 40000, 'save_total_limit': 5, 'save_on_each_node': False, 'no_cuda': False, 'seed': 42, 'fp16': False, 'fp16_opt_level': 'O1', 'fp16_backend': 'auto', 'fp16_full_eval': False, 'local_rank': -1, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 20000, 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': './', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'sharded_ddp': [], 'deepspeed': None, 'label_smoothing_factor': 0.0, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'dataloader_pin_memory': True, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'push_to_hub_model_id': '', 'push_to_hub_organization': None, 'push_to_hub_token': None, 'mp_parameters': ''}
24
+ 2021-07-15 02:48:18,888 INFO MainThread:648648 [wandb_run.py:_config_callback():872] config_cb None None {'model_name_or_path': None, 'model_type': 'big_bird', 'config_name': './', 'tokenizer_name': './', 'cache_dir': None, 'use_fast_tokenizer': True, 'dtype': 'float32'}
25
+ 2021-07-15 02:48:18,889 INFO MainThread:648648 [wandb_run.py:_config_callback():872] config_cb None None {'dataset_name': None, 'dataset_config_name': None, 'train_ref_file': None, 'validation_ref_file': None, 'overwrite_cache': False, 'validation_split_percentage': 5, 'max_seq_length': 4096, 'preprocessing_num_workers': 96, 'mlm_probability': 0.15, 'pad_to_max_length': False, 'line_by_line': False, 'max_eval_samples': 500}
26
+ 2021-07-15 02:58:26,636 INFO MainThread:648648 [wandb_run.py:_atexit_cleanup():1593] got exitcode: 1
27
+ 2021-07-15 02:58:26,637 INFO MainThread:648648 [wandb_run.py:_restore():1565] restore
28
+ 2021-07-15 02:58:29,412 INFO MainThread:648648 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
29
+ wandb_count: 1
30
+ }
31
+ pusher_stats {
32
+ uploaded_bytes: 1375
33
+ total_bytes: 1375
34
+ }
35
+
36
+ 2021-07-15 02:58:29,620 INFO MainThread:648648 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
37
+ wandb_count: 1
38
+ }
39
+ pusher_stats {
40
+ uploaded_bytes: 1375
41
+ total_bytes: 1375
42
+ }
43
+
44
+ 2021-07-15 02:58:29,977 INFO MainThread:648648 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
45
+ wandb_count: 4
46
+ }
47
+ pusher_stats {
48
+ uploaded_bytes: 1375
49
+ total_bytes: 10904
50
+ }
51
+
52
+ 2021-07-15 02:58:30,079 INFO MainThread:648648 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
53
+ wandb_count: 5
54
+ }
55
+ pusher_stats {
56
+ uploaded_bytes: 1375
57
+ total_bytes: 10906
58
+ }
59
+
60
+ 2021-07-15 02:58:30,181 INFO MainThread:648648 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
61
+ wandb_count: 5
62
+ }
63
+ pusher_stats {
64
+ uploaded_bytes: 10906
65
+ total_bytes: 10906
66
+ }
67
+
68
+ 2021-07-15 02:58:30,284 INFO MainThread:648648 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
69
+ wandb_count: 5
70
+ }
71
+ pusher_stats {
72
+ uploaded_bytes: 10906
73
+ total_bytes: 10906
74
+ }
75
+
76
+ 2021-07-15 02:58:30,386 INFO MainThread:648648 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
77
+ wandb_count: 5
78
+ }
79
+ pusher_stats {
80
+ uploaded_bytes: 10906
81
+ total_bytes: 10906
82
+ }
83
+
84
+ 2021-07-15 02:58:30,488 INFO MainThread:648648 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
85
+ wandb_count: 5
86
+ }
87
+ pusher_stats {
88
+ uploaded_bytes: 10906
89
+ total_bytes: 10906
90
+ }
91
+
92
+ 2021-07-15 02:58:30,590 INFO MainThread:648648 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
93
+ wandb_count: 5
94
+ }
95
+ pusher_stats {
96
+ uploaded_bytes: 10906
97
+ total_bytes: 10906
98
+ }
99
+
100
+ 2021-07-15 02:58:30,942 INFO MainThread:648648 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
101
+ wandb_count: 5
102
+ }
103
+ pusher_stats {
104
+ uploaded_bytes: 10906
105
+ total_bytes: 10906
106
+ }
107
+
108
+ 2021-07-15 02:58:31,044 INFO MainThread:648648 [wandb_run.py:_wait_for_finish():1715] got exit ret: done: true
109
+ exit_result {
110
+ }
111
+ file_counts {
112
+ wandb_count: 5
113
+ }
114
+ pusher_stats {
115
+ uploaded_bytes: 10906
116
+ total_bytes: 10906
117
+ }
118
+
119
+ 2021-07-15 02:58:32,341 INFO MainThread:648648 [wandb_run.py:_show_files():1937] logging synced files
wandb/run-20210715_024816-39ztwpif/run-39ztwpif.wandb ADDED
Binary file (11.6 kB). View file
 
wandb/run-20210715_030015-30wihv4o/files/config.yaml ADDED
@@ -0,0 +1,304 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ wandb_version: 1
2
+
3
+ _wandb:
4
+ desc: null
5
+ value:
6
+ cli_version: 0.10.33
7
+ framework: huggingface
8
+ huggingface_version: 4.9.0.dev0
9
+ is_jupyter_run: false
10
+ is_kaggle_kernel: false
11
+ python_version: 3.8.10
12
+ t:
13
+ 1:
14
+ - 3
15
+ - 11
16
+ 2:
17
+ - 3
18
+ - 11
19
+ 4: 3.8.10
20
+ 5: 0.10.33
21
+ 6: 4.9.0.dev0
22
+ 8:
23
+ - 5
24
+ adafactor:
25
+ desc: null
26
+ value: false
27
+ adam_beta1:
28
+ desc: null
29
+ value: 0.9
30
+ adam_beta2:
31
+ desc: null
32
+ value: 0.98
33
+ adam_epsilon:
34
+ desc: null
35
+ value: 1.0e-08
36
+ cache_dir:
37
+ desc: null
38
+ value: null
39
+ config_name:
40
+ desc: null
41
+ value: ./
42
+ dataloader_drop_last:
43
+ desc: null
44
+ value: false
45
+ dataloader_num_workers:
46
+ desc: null
47
+ value: 0
48
+ dataloader_pin_memory:
49
+ desc: null
50
+ value: true
51
+ dataset_config_name:
52
+ desc: null
53
+ value: null
54
+ dataset_name:
55
+ desc: null
56
+ value: null
57
+ ddp_find_unused_parameters:
58
+ desc: null
59
+ value: null
60
+ debug:
61
+ desc: null
62
+ value: []
63
+ deepspeed:
64
+ desc: null
65
+ value: null
66
+ disable_tqdm:
67
+ desc: null
68
+ value: false
69
+ do_eval:
70
+ desc: null
71
+ value: false
72
+ do_predict:
73
+ desc: null
74
+ value: false
75
+ do_train:
76
+ desc: null
77
+ value: false
78
+ dtype:
79
+ desc: null
80
+ value: float32
81
+ eval_accumulation_steps:
82
+ desc: null
83
+ value: null
84
+ eval_steps:
85
+ desc: null
86
+ value: 20000
87
+ evaluation_strategy:
88
+ desc: null
89
+ value: IntervalStrategy.NO
90
+ fp16:
91
+ desc: null
92
+ value: false
93
+ fp16_backend:
94
+ desc: null
95
+ value: auto
96
+ fp16_full_eval:
97
+ desc: null
98
+ value: false
99
+ fp16_opt_level:
100
+ desc: null
101
+ value: O1
102
+ gradient_accumulation_steps:
103
+ desc: null
104
+ value: 1
105
+ greater_is_better:
106
+ desc: null
107
+ value: null
108
+ group_by_length:
109
+ desc: null
110
+ value: false
111
+ ignore_data_skip:
112
+ desc: null
113
+ value: false
114
+ label_names:
115
+ desc: null
116
+ value: null
117
+ label_smoothing_factor:
118
+ desc: null
119
+ value: 0.0
120
+ learning_rate:
121
+ desc: null
122
+ value: 3.0e-05
123
+ length_column_name:
124
+ desc: null
125
+ value: length
126
+ line_by_line:
127
+ desc: null
128
+ value: false
129
+ load_best_model_at_end:
130
+ desc: null
131
+ value: false
132
+ local_rank:
133
+ desc: null
134
+ value: -1
135
+ log_level:
136
+ desc: null
137
+ value: -1
138
+ log_level_replica:
139
+ desc: null
140
+ value: -1
141
+ log_on_each_node:
142
+ desc: null
143
+ value: true
144
+ logging_dir:
145
+ desc: null
146
+ value: ./runs/Jul15_03-00-07_t1v-n-f5c06ea1-w-0
147
+ logging_first_step:
148
+ desc: null
149
+ value: false
150
+ logging_steps:
151
+ desc: null
152
+ value: 50
153
+ logging_strategy:
154
+ desc: null
155
+ value: IntervalStrategy.STEPS
156
+ lr_scheduler_type:
157
+ desc: null
158
+ value: SchedulerType.LINEAR
159
+ max_eval_samples:
160
+ desc: null
161
+ value: 500
162
+ max_grad_norm:
163
+ desc: null
164
+ value: 1.0
165
+ max_seq_length:
166
+ desc: null
167
+ value: 4096
168
+ max_steps:
169
+ desc: null
170
+ value: -1
171
+ metric_for_best_model:
172
+ desc: null
173
+ value: null
174
+ mlm_probability:
175
+ desc: null
176
+ value: 0.15
177
+ model_name_or_path:
178
+ desc: null
179
+ value: null
180
+ model_type:
181
+ desc: null
182
+ value: big_bird
183
+ mp_parameters:
184
+ desc: null
185
+ value: ''
186
+ no_cuda:
187
+ desc: null
188
+ value: false
189
+ num_train_epochs:
190
+ desc: null
191
+ value: 5.0
192
+ output_dir:
193
+ desc: null
194
+ value: ./
195
+ overwrite_cache:
196
+ desc: null
197
+ value: false
198
+ overwrite_output_dir:
199
+ desc: null
200
+ value: true
201
+ pad_to_max_length:
202
+ desc: null
203
+ value: false
204
+ past_index:
205
+ desc: null
206
+ value: -1
207
+ per_device_eval_batch_size:
208
+ desc: null
209
+ value: 1
210
+ per_device_train_batch_size:
211
+ desc: null
212
+ value: 1
213
+ per_gpu_eval_batch_size:
214
+ desc: null
215
+ value: null
216
+ per_gpu_train_batch_size:
217
+ desc: null
218
+ value: null
219
+ prediction_loss_only:
220
+ desc: null
221
+ value: false
222
+ preprocessing_num_workers:
223
+ desc: null
224
+ value: 96
225
+ push_to_hub:
226
+ desc: null
227
+ value: true
228
+ push_to_hub_model_id:
229
+ desc: null
230
+ value: ''
231
+ push_to_hub_organization:
232
+ desc: null
233
+ value: null
234
+ push_to_hub_token:
235
+ desc: null
236
+ value: null
237
+ remove_unused_columns:
238
+ desc: null
239
+ value: true
240
+ report_to:
241
+ desc: null
242
+ value:
243
+ - tensorboard
244
+ - wandb
245
+ resume_from_checkpoint:
246
+ desc: null
247
+ value: null
248
+ run_name:
249
+ desc: null
250
+ value: ./
251
+ save_on_each_node:
252
+ desc: null
253
+ value: false
254
+ save_steps:
255
+ desc: null
256
+ value: 30000
257
+ save_strategy:
258
+ desc: null
259
+ value: IntervalStrategy.STEPS
260
+ save_total_limit:
261
+ desc: null
262
+ value: 5
263
+ seed:
264
+ desc: null
265
+ value: 42
266
+ sharded_ddp:
267
+ desc: null
268
+ value: []
269
+ skip_memory_metrics:
270
+ desc: null
271
+ value: true
272
+ tokenizer_name:
273
+ desc: null
274
+ value: ./
275
+ tpu_metrics_debug:
276
+ desc: null
277
+ value: false
278
+ tpu_num_cores:
279
+ desc: null
280
+ value: null
281
+ train_ref_file:
282
+ desc: null
283
+ value: null
284
+ use_fast_tokenizer:
285
+ desc: null
286
+ value: true
287
+ use_legacy_prediction_loop:
288
+ desc: null
289
+ value: false
290
+ validation_ref_file:
291
+ desc: null
292
+ value: null
293
+ validation_split_percentage:
294
+ desc: null
295
+ value: 5
296
+ warmup_ratio:
297
+ desc: null
298
+ value: 0.0
299
+ warmup_steps:
300
+ desc: null
301
+ value: 10000
302
+ weight_decay:
303
+ desc: null
304
+ value: 0.0095
wandb/run-20210715_030015-30wihv4o/files/output.log ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /home/dat/pino/lib/python3.8/site-packages/jax/lib/xla_bridge.py:386: UserWarning: jax.host_count has been renamed to jax.process_count. This alias will eventually be removed; please update your code.
2
+ warnings.warn(
3
+ /home/dat/pino/lib/python3.8/site-packages/jax/lib/xla_bridge.py:373: UserWarning: jax.host_id has been renamed to jax.process_index. This alias will eventually be removed; please update your code.
4
+ warnings.warn(
5
+ Epoch ... (1/5): 0%| | 0/5 [00:00<?, ?it/s]
6
+ Training...: 0%| | 0/453557 [02:04<?, ?it/s]
7
+ Epoch ... (1/5): 0%| | 0/5 [09:10<?, ?it/s]
8
+ Traceback (most recent call last):
9
+ File "./run_mlm_flax_no_accum.py", line 699, in <module>
10
+ state, train_metric, dropout_rngs = p_train_step(state, model_inputs, dropout_rngs)
11
+ File "/home/dat/pino/lib/python3.8/site-packages/jax/_src/traceback_util.py", line 183, in reraise_with_filtered_traceback
12
+ return fun(*args, **kwargs)
13
+ File "/home/dat/pino/lib/python3.8/site-packages/jax/_src/api.py", line 1669, in f_pmapped
14
+ out = pxla.xla_pmap(
15
+ File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 1620, in bind
16
+ return call_bind(self, fun, *args, **params)
17
+ File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 1551, in call_bind
18
+ outs = primitive.process(top_trace, fun, tracers, params)
19
+ File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 1623, in process
20
+ return trace.process_map(self, fun, tracers, params)
21
+ File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 606, in process_call
22
+ return primitive.impl(f, *tracers, **params)
23
+ File "/home/dat/pino/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 637, in xla_pmap_impl
24
+ return compiled_fun(*args)
25
+ File "/home/dat/pino/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 1159, in execute_replicated
26
+ out_bufs = compiled.execute_sharded_on_local_devices(input_bufs)
27
+ jax._src.traceback_util.UnfilteredStackTrace: RuntimeError: Resource exhausted: Attempting to reserve 7.59G at the bottom of memory. That was not possible. There are 7.12G free, 0B reserved, and 7.08G reservable.: while running replica 0 and partition 0 of a replicated computation (other replicas may have failed as well).
28
+ The stack trace below excludes JAX-internal frames.
29
+ The preceding is the original exception that occurred, unmodified.
30
+ --------------------
31
+ The above exception was the direct cause of the following exception:
32
+ Traceback (most recent call last):
33
+ File "./run_mlm_flax_no_accum.py", line 699, in <module>
34
+ state, train_metric, dropout_rngs = p_train_step(state, model_inputs, dropout_rngs)
35
+ File "/home/dat/pino/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 1159, in execute_replicated
36
+ out_bufs = compiled.execute_sharded_on_local_devices(input_bufs)
37
+ RuntimeError: Resource exhausted: Attempting to reserve 7.59G at the bottom of memory. That was not possible. There are 7.12G free, 0B reserved, and 7.08G reservable.: while running replica 0 and partition 0 of a replicated computation (other replicas may have failed as well).
wandb/run-20210715_030015-30wihv4o/files/requirements.txt ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ absl-py==0.13.0
2
+ aiohttp==3.7.4.post0
3
+ astunparse==1.6.3
4
+ async-timeout==3.0.1
5
+ attrs==21.2.0
6
+ cachetools==4.2.2
7
+ certifi==2021.5.30
8
+ chardet==4.0.0
9
+ charset-normalizer==2.0.1
10
+ chex==0.0.8
11
+ click==8.0.1
12
+ configparser==5.0.2
13
+ cycler==0.10.0
14
+ datasets==1.9.1.dev0
15
+ dill==0.3.4
16
+ dm-tree==0.1.6
17
+ docker-pycreds==0.4.0
18
+ filelock==3.0.12
19
+ flatbuffers==1.12
20
+ flax==0.3.4
21
+ fsspec==2021.7.0
22
+ gast==0.4.0
23
+ gitdb==4.0.7
24
+ gitpython==3.1.18
25
+ google-auth-oauthlib==0.4.4
26
+ google-auth==1.32.1
27
+ google-pasta==0.2.0
28
+ grpcio==1.34.1
29
+ h5py==3.1.0
30
+ huggingface-hub==0.0.12
31
+ idna==3.2
32
+ install==1.3.4
33
+ jax==0.2.17
34
+ jaxlib==0.1.68
35
+ joblib==1.0.1
36
+ keras-nightly==2.5.0.dev2021032900
37
+ keras-preprocessing==1.1.2
38
+ kiwisolver==1.3.1
39
+ libtpu-nightly==0.1.dev20210615
40
+ markdown==3.3.4
41
+ matplotlib==3.4.2
42
+ msgpack==1.0.2
43
+ multidict==5.1.0
44
+ multiprocess==0.70.12.2
45
+ numpy==1.19.5
46
+ oauthlib==3.1.1
47
+ opt-einsum==3.3.0
48
+ optax==0.0.9
49
+ packaging==21.0
50
+ pandas==1.3.0
51
+ pathtools==0.1.2
52
+ pillow==8.3.1
53
+ pip==20.0.2
54
+ pkg-resources==0.0.0
55
+ promise==2.3
56
+ protobuf==3.17.3
57
+ psutil==5.8.0
58
+ pyarrow==4.0.1
59
+ pyasn1-modules==0.2.8
60
+ pyasn1==0.4.8
61
+ pyparsing==2.4.7
62
+ python-dateutil==2.8.1
63
+ pytz==2021.1
64
+ pyyaml==5.4.1
65
+ regex==2021.7.6
66
+ requests-oauthlib==1.3.0
67
+ requests==2.26.0
68
+ rsa==4.7.2
69
+ sacremoses==0.0.45
70
+ scipy==1.7.0
71
+ sentry-sdk==1.3.0
72
+ setuptools==44.0.0
73
+ shortuuid==1.0.1
74
+ six==1.15.0
75
+ smmap==4.0.0
76
+ subprocess32==3.5.4
77
+ tensorboard-data-server==0.6.1
78
+ tensorboard-plugin-wit==1.8.0
79
+ tensorboard==2.5.0
80
+ tensorflow-estimator==2.5.0
81
+ tensorflow==2.5.0
82
+ termcolor==1.1.0
83
+ tokenizers==0.10.3
84
+ toolz==0.11.1
85
+ tqdm==4.61.2
86
+ transformers==4.9.0.dev0
87
+ typing-extensions==3.7.4.3
88
+ urllib3==1.26.6
89
+ wandb==0.10.33
90
+ werkzeug==2.0.1
91
+ wheel==0.36.2
92
+ wrapt==1.12.1
93
+ xxhash==2.0.2
94
+ yarl==1.6.3
wandb/run-20210715_030015-30wihv4o/files/wandb-metadata.json ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-5.4.0-1043-gcp-x86_64-with-glibc2.29",
3
+ "python": "3.8.10",
4
+ "heartbeatAt": "2021-07-15T03:00:17.537660",
5
+ "startedAt": "2021-07-15T03:00:15.443682",
6
+ "docker": null,
7
+ "cpu_count": 96,
8
+ "cuda": null,
9
+ "args": [
10
+ "--push_to_hub",
11
+ "--output_dir=./",
12
+ "--model_type=big_bird",
13
+ "--config_name=./",
14
+ "--tokenizer_name=./",
15
+ "--max_seq_length=4096",
16
+ "--weight_decay=0.0095",
17
+ "--warmup_steps=10000",
18
+ "--overwrite_output_dir",
19
+ "--adam_beta1=0.9",
20
+ "--adam_beta2=0.98",
21
+ "--logging_steps=50",
22
+ "--eval_steps=20000",
23
+ "--num_train_epochs=5",
24
+ "--preprocessing_num_workers=96",
25
+ "--save_steps=30000",
26
+ "--learning_rate=3e-5",
27
+ "--per_device_train_batch_size=1",
28
+ "--per_device_eval_batch_size=1",
29
+ "--save_total_limit=5",
30
+ "--max_eval_samples=500"
31
+ ],
32
+ "state": "running",
33
+ "program": "./run_mlm_flax_no_accum.py",
34
+ "codePath": "run_mlm_flax_no_accum.py",
35
+ "git": {
36
+ "remote": "https://huggingface.co/flax-community/pino-roberta-base",
37
+ "commit": "87e02e7ff8fbaea90c8c4ad1c984f83742432303"
38
+ },
39
+ "email": null,
40
+ "root": "/home/dat/pino-roberta-base",
41
+ "host": "t1v-n-f5c06ea1-w-0",
42
+ "username": "dat",
43
+ "executable": "/home/dat/pino/bin/python"
44
+ }
wandb/run-20210715_030015-30wihv4o/files/wandb-summary.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {}
wandb/run-20210715_030015-30wihv4o/logs/debug-internal.log ADDED
@@ -0,0 +1,232 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2021-07-15 03:00:16,154 INFO MainThread:652382 [internal.py:wandb_internal():88] W&B internal server running at pid: 652382, started at: 2021-07-15 03:00:16.153819
2
+ 2021-07-15 03:00:16,156 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: check_version
3
+ 2021-07-15 03:00:16,156 INFO WriterThread:652382 [datastore.py:open_for_write():80] open: /home/dat/pino-roberta-base/wandb/run-20210715_030015-30wihv4o/run-30wihv4o.wandb
4
+ 2021-07-15 03:00:16,157 DEBUG SenderThread:652382 [sender.py:send():179] send: header
5
+ 2021-07-15 03:00:16,157 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: check_version
6
+ 2021-07-15 03:00:16,194 DEBUG SenderThread:652382 [sender.py:send():179] send: run
7
+ 2021-07-15 03:00:16,370 INFO SenderThread:652382 [dir_watcher.py:__init__():168] watching files in: /home/dat/pino-roberta-base/wandb/run-20210715_030015-30wihv4o/files
8
+ 2021-07-15 03:00:16,370 INFO SenderThread:652382 [sender.py:_start_run_threads():716] run started: 30wihv4o with start time 1626318015
9
+ 2021-07-15 03:00:16,372 DEBUG SenderThread:652382 [sender.py:send():179] send: summary
10
+ 2021-07-15 03:00:16,373 INFO SenderThread:652382 [sender.py:_save_file():841] saving file wandb-summary.json with policy end
11
+ 2021-07-15 03:00:16,374 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: run_start
12
+ 2021-07-15 03:00:17,425 INFO Thread-8 :652382 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210715_030015-30wihv4o/files/wandb-summary.json
13
+ 2021-07-15 03:00:17,537 DEBUG HandlerThread:652382 [meta.py:__init__():39] meta init
14
+ 2021-07-15 03:00:17,537 DEBUG HandlerThread:652382 [meta.py:__init__():53] meta init done
15
+ 2021-07-15 03:00:17,537 DEBUG HandlerThread:652382 [meta.py:probe():210] probe
16
+ 2021-07-15 03:00:17,538 DEBUG HandlerThread:652382 [meta.py:_setup_git():200] setup git
17
+ 2021-07-15 03:00:17,568 DEBUG HandlerThread:652382 [meta.py:_setup_git():207] setup git done
18
+ 2021-07-15 03:00:17,568 DEBUG HandlerThread:652382 [meta.py:_save_pip():57] save pip
19
+ 2021-07-15 03:00:17,568 DEBUG HandlerThread:652382 [meta.py:_save_pip():71] save pip done
20
+ 2021-07-15 03:00:17,568 DEBUG HandlerThread:652382 [meta.py:probe():252] probe done
21
+ 2021-07-15 03:00:17,572 DEBUG SenderThread:652382 [sender.py:send():179] send: files
22
+ 2021-07-15 03:00:17,572 INFO SenderThread:652382 [sender.py:_save_file():841] saving file wandb-metadata.json with policy now
23
+ 2021-07-15 03:00:17,579 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: stop_status
24
+ 2021-07-15 03:00:17,580 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: stop_status
25
+ 2021-07-15 03:00:17,710 DEBUG SenderThread:652382 [sender.py:send():179] send: config
26
+ 2021-07-15 03:00:17,710 DEBUG SenderThread:652382 [sender.py:send():179] send: config
27
+ 2021-07-15 03:00:17,711 DEBUG SenderThread:652382 [sender.py:send():179] send: config
28
+ 2021-07-15 03:00:18,067 INFO Thread-11 :652382 [upload_job.py:push():137] Uploaded file /tmp/tmpo5adho61wandb/1x3gq8av-wandb-metadata.json
29
+ 2021-07-15 03:00:18,425 INFO Thread-8 :652382 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210715_030015-30wihv4o/files/requirements.txt
30
+ 2021-07-15 03:00:18,425 INFO Thread-8 :652382 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210715_030015-30wihv4o/files/wandb-metadata.json
31
+ 2021-07-15 03:00:18,425 INFO Thread-8 :652382 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210715_030015-30wihv4o/files/output.log
32
+ 2021-07-15 03:00:32,431 INFO Thread-8 :652382 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_030015-30wihv4o/files/output.log
33
+ 2021-07-15 03:00:32,712 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: stop_status
34
+ 2021-07-15 03:00:32,712 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: stop_status
35
+ 2021-07-15 03:00:45,621 DEBUG SenderThread:652382 [sender.py:send():179] send: stats
36
+ 2021-07-15 03:00:47,437 INFO Thread-8 :652382 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_030015-30wihv4o/files/config.yaml
37
+ 2021-07-15 03:00:47,844 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: stop_status
38
+ 2021-07-15 03:00:47,844 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: stop_status
39
+ 2021-07-15 03:01:02,976 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: stop_status
40
+ 2021-07-15 03:01:02,976 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: stop_status
41
+ 2021-07-15 03:01:15,700 DEBUG SenderThread:652382 [sender.py:send():179] send: stats
42
+ 2021-07-15 03:01:18,116 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: stop_status
43
+ 2021-07-15 03:01:18,116 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: stop_status
44
+ 2021-07-15 03:01:33,254 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: stop_status
45
+ 2021-07-15 03:01:33,255 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: stop_status
46
+ 2021-07-15 03:01:45,777 DEBUG SenderThread:652382 [sender.py:send():179] send: stats
47
+ 2021-07-15 03:01:48,387 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: stop_status
48
+ 2021-07-15 03:01:48,388 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: stop_status
49
+ 2021-07-15 03:02:03,521 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: stop_status
50
+ 2021-07-15 03:02:03,522 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: stop_status
51
+ 2021-07-15 03:02:15,842 DEBUG SenderThread:652382 [sender.py:send():179] send: stats
52
+ 2021-07-15 03:02:18,653 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: stop_status
53
+ 2021-07-15 03:02:18,654 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: stop_status
54
+ 2021-07-15 03:02:33,785 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: stop_status
55
+ 2021-07-15 03:02:33,785 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: stop_status
56
+ 2021-07-15 03:02:45,911 DEBUG SenderThread:652382 [sender.py:send():179] send: stats
57
+ 2021-07-15 03:02:48,915 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: stop_status
58
+ 2021-07-15 03:02:48,915 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: stop_status
59
+ 2021-07-15 03:03:04,048 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: stop_status
60
+ 2021-07-15 03:03:04,049 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: stop_status
61
+ 2021-07-15 03:03:15,978 DEBUG SenderThread:652382 [sender.py:send():179] send: stats
62
+ 2021-07-15 03:03:19,181 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: stop_status
63
+ 2021-07-15 03:03:19,181 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: stop_status
64
+ 2021-07-15 03:03:34,314 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: stop_status
65
+ 2021-07-15 03:03:34,314 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: stop_status
66
+ 2021-07-15 03:03:46,043 DEBUG SenderThread:652382 [sender.py:send():179] send: stats
67
+ 2021-07-15 03:03:49,447 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: stop_status
68
+ 2021-07-15 03:03:49,448 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: stop_status
69
+ 2021-07-15 03:04:04,580 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: stop_status
70
+ 2021-07-15 03:04:04,580 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: stop_status
71
+ 2021-07-15 03:04:16,110 DEBUG SenderThread:652382 [sender.py:send():179] send: stats
72
+ 2021-07-15 03:04:19,717 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: stop_status
73
+ 2021-07-15 03:04:19,718 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: stop_status
74
+ 2021-07-15 03:04:34,849 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: stop_status
75
+ 2021-07-15 03:04:34,849 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: stop_status
76
+ 2021-07-15 03:04:46,173 DEBUG SenderThread:652382 [sender.py:send():179] send: stats
77
+ 2021-07-15 03:04:49,981 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: stop_status
78
+ 2021-07-15 03:04:49,982 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: stop_status
79
+ 2021-07-15 03:05:05,119 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: stop_status
80
+ 2021-07-15 03:05:05,120 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: stop_status
81
+ 2021-07-15 03:05:16,239 DEBUG SenderThread:652382 [sender.py:send():179] send: stats
82
+ 2021-07-15 03:05:20,263 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: stop_status
83
+ 2021-07-15 03:05:20,264 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: stop_status
84
+ 2021-07-15 03:05:35,395 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: stop_status
85
+ 2021-07-15 03:05:35,395 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: stop_status
86
+ 2021-07-15 03:05:46,312 DEBUG SenderThread:652382 [sender.py:send():179] send: stats
87
+ 2021-07-15 03:05:50,529 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: stop_status
88
+ 2021-07-15 03:05:50,529 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: stop_status
89
+ 2021-07-15 03:06:05,662 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: stop_status
90
+ 2021-07-15 03:06:05,662 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: stop_status
91
+ 2021-07-15 03:06:16,385 DEBUG SenderThread:652382 [sender.py:send():179] send: stats
92
+ 2021-07-15 03:06:20,794 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: stop_status
93
+ 2021-07-15 03:06:20,794 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: stop_status
94
+ 2021-07-15 03:06:35,926 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: stop_status
95
+ 2021-07-15 03:06:35,926 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: stop_status
96
+ 2021-07-15 03:06:46,454 DEBUG SenderThread:652382 [sender.py:send():179] send: stats
97
+ 2021-07-15 03:06:51,060 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: stop_status
98
+ 2021-07-15 03:06:51,060 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: stop_status
99
+ 2021-07-15 03:07:06,201 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: stop_status
100
+ 2021-07-15 03:07:06,201 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: stop_status
101
+ 2021-07-15 03:07:16,531 DEBUG SenderThread:652382 [sender.py:send():179] send: stats
102
+ 2021-07-15 03:07:21,340 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: stop_status
103
+ 2021-07-15 03:07:21,340 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: stop_status
104
+ 2021-07-15 03:07:36,473 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: stop_status
105
+ 2021-07-15 03:07:36,473 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: stop_status
106
+ 2021-07-15 03:07:38,595 INFO Thread-8 :652382 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_030015-30wihv4o/files/output.log
107
+ 2021-07-15 03:07:46,605 DEBUG SenderThread:652382 [sender.py:send():179] send: stats
108
+ 2021-07-15 03:07:51,620 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: stop_status
109
+ 2021-07-15 03:07:51,620 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: stop_status
110
+ 2021-07-15 03:08:06,767 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: stop_status
111
+ 2021-07-15 03:08:06,768 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: stop_status
112
+ 2021-07-15 03:08:16,682 DEBUG SenderThread:652382 [sender.py:send():179] send: stats
113
+ 2021-07-15 03:08:21,898 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: stop_status
114
+ 2021-07-15 03:08:21,899 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: stop_status
115
+ 2021-07-15 03:08:37,032 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: stop_status
116
+ 2021-07-15 03:08:37,032 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: stop_status
117
+ 2021-07-15 03:08:46,763 DEBUG SenderThread:652382 [sender.py:send():179] send: stats
118
+ 2021-07-15 03:08:52,171 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: stop_status
119
+ 2021-07-15 03:08:52,172 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: stop_status
120
+ 2021-07-15 03:09:07,305 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: stop_status
121
+ 2021-07-15 03:09:07,305 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: stop_status
122
+ 2021-07-15 03:09:16,837 DEBUG SenderThread:652382 [sender.py:send():179] send: stats
123
+ 2021-07-15 03:09:22,440 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: stop_status
124
+ 2021-07-15 03:09:22,440 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: stop_status
125
+ 2021-07-15 03:09:37,575 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: stop_status
126
+ 2021-07-15 03:09:37,576 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: stop_status
127
+ 2021-07-15 03:09:42,648 INFO Thread-8 :652382 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_030015-30wihv4o/files/output.log
128
+ 2021-07-15 03:09:44,217 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: poll_exit
129
+ 2021-07-15 03:09:44,217 DEBUG SenderThread:652382 [sender.py:send():179] send: telemetry
130
+ 2021-07-15 03:09:44,218 DEBUG SenderThread:652382 [sender.py:send():179] send: exit
131
+ 2021-07-15 03:09:44,218 INFO SenderThread:652382 [sender.py:send_exit():287] handling exit code: 1
132
+ 2021-07-15 03:09:44,219 INFO SenderThread:652382 [sender.py:send_exit():295] send defer
133
+ 2021-07-15 03:09:44,219 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: poll_exit
134
+ 2021-07-15 03:09:44,220 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: defer
135
+ 2021-07-15 03:09:44,220 INFO HandlerThread:652382 [handler.py:handle_request_defer():141] handle defer: 0
136
+ 2021-07-15 03:09:44,221 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: defer
137
+ 2021-07-15 03:09:44,221 INFO SenderThread:652382 [sender.py:send_request_defer():304] handle sender defer: 0
138
+ 2021-07-15 03:09:44,221 INFO SenderThread:652382 [sender.py:transition_state():308] send defer: 1
139
+ 2021-07-15 03:09:44,221 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: defer
140
+ 2021-07-15 03:09:44,221 INFO HandlerThread:652382 [handler.py:handle_request_defer():141] handle defer: 1
141
+ 2021-07-15 03:09:44,308 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: defer
142
+ 2021-07-15 03:09:44,308 INFO SenderThread:652382 [sender.py:send_request_defer():304] handle sender defer: 1
143
+ 2021-07-15 03:09:44,308 INFO SenderThread:652382 [sender.py:transition_state():308] send defer: 2
144
+ 2021-07-15 03:09:44,308 DEBUG SenderThread:652382 [sender.py:send():179] send: stats
145
+ 2021-07-15 03:09:44,309 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: defer
146
+ 2021-07-15 03:09:44,309 INFO HandlerThread:652382 [handler.py:handle_request_defer():141] handle defer: 2
147
+ 2021-07-15 03:09:44,309 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: defer
148
+ 2021-07-15 03:09:44,309 INFO SenderThread:652382 [sender.py:send_request_defer():304] handle sender defer: 2
149
+ 2021-07-15 03:09:44,309 INFO SenderThread:652382 [sender.py:transition_state():308] send defer: 3
150
+ 2021-07-15 03:09:44,309 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: defer
151
+ 2021-07-15 03:09:44,310 INFO HandlerThread:652382 [handler.py:handle_request_defer():141] handle defer: 3
152
+ 2021-07-15 03:09:44,310 DEBUG SenderThread:652382 [sender.py:send():179] send: summary
153
+ 2021-07-15 03:09:44,311 INFO SenderThread:652382 [sender.py:_save_file():841] saving file wandb-summary.json with policy end
154
+ 2021-07-15 03:09:44,311 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: defer
155
+ 2021-07-15 03:09:44,311 INFO SenderThread:652382 [sender.py:send_request_defer():304] handle sender defer: 3
156
+ 2021-07-15 03:09:44,311 INFO SenderThread:652382 [sender.py:transition_state():308] send defer: 4
157
+ 2021-07-15 03:09:44,311 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: defer
158
+ 2021-07-15 03:09:44,312 INFO HandlerThread:652382 [handler.py:handle_request_defer():141] handle defer: 4
159
+ 2021-07-15 03:09:44,312 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: defer
160
+ 2021-07-15 03:09:44,312 INFO SenderThread:652382 [sender.py:send_request_defer():304] handle sender defer: 4
161
+ 2021-07-15 03:09:44,322 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: poll_exit
162
+ 2021-07-15 03:09:44,490 INFO SenderThread:652382 [sender.py:transition_state():308] send defer: 5
163
+ 2021-07-15 03:09:44,490 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: poll_exit
164
+ 2021-07-15 03:09:44,491 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: defer
165
+ 2021-07-15 03:09:44,491 INFO HandlerThread:652382 [handler.py:handle_request_defer():141] handle defer: 5
166
+ 2021-07-15 03:09:44,491 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: defer
167
+ 2021-07-15 03:09:44,491 INFO SenderThread:652382 [sender.py:send_request_defer():304] handle sender defer: 5
168
+ 2021-07-15 03:09:44,491 INFO SenderThread:652382 [dir_watcher.py:finish():282] shutting down directory watcher
169
+ 2021-07-15 03:09:44,592 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: poll_exit
170
+ 2021-07-15 03:09:44,649 INFO Thread-8 :652382 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_030015-30wihv4o/files/wandb-summary.json
171
+ 2021-07-15 03:09:44,650 INFO SenderThread:652382 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_030015-30wihv4o/files/config.yaml
172
+ 2021-07-15 03:09:44,650 INFO SenderThread:652382 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_030015-30wihv4o/files/output.log
173
+ 2021-07-15 03:09:44,650 INFO SenderThread:652382 [dir_watcher.py:finish():312] scan: /home/dat/pino-roberta-base/wandb/run-20210715_030015-30wihv4o/files
174
+ 2021-07-15 03:09:44,650 INFO SenderThread:652382 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_030015-30wihv4o/files/requirements.txt requirements.txt
175
+ 2021-07-15 03:09:44,650 INFO SenderThread:652382 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_030015-30wihv4o/files/output.log output.log
176
+ 2021-07-15 03:09:44,650 INFO SenderThread:652382 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_030015-30wihv4o/files/wandb-metadata.json wandb-metadata.json
177
+ 2021-07-15 03:09:44,651 INFO SenderThread:652382 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_030015-30wihv4o/files/config.yaml config.yaml
178
+ 2021-07-15 03:09:44,651 INFO SenderThread:652382 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_030015-30wihv4o/files/wandb-summary.json wandb-summary.json
179
+ 2021-07-15 03:09:44,651 INFO SenderThread:652382 [sender.py:transition_state():308] send defer: 6
180
+ 2021-07-15 03:09:44,651 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: poll_exit
181
+ 2021-07-15 03:09:44,652 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: defer
182
+ 2021-07-15 03:09:44,652 INFO HandlerThread:652382 [handler.py:handle_request_defer():141] handle defer: 6
183
+ 2021-07-15 03:09:44,655 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: defer
184
+ 2021-07-15 03:09:44,655 INFO SenderThread:652382 [sender.py:send_request_defer():304] handle sender defer: 6
185
+ 2021-07-15 03:09:44,655 INFO SenderThread:652382 [file_pusher.py:finish():177] shutting down file pusher
186
+ 2021-07-15 03:09:44,754 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: poll_exit
187
+ 2021-07-15 03:09:44,754 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: poll_exit
188
+ 2021-07-15 03:09:44,856 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: poll_exit
189
+ 2021-07-15 03:09:44,856 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: poll_exit
190
+ 2021-07-15 03:09:44,958 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: poll_exit
191
+ 2021-07-15 03:09:44,958 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: poll_exit
192
+ 2021-07-15 03:09:45,060 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: poll_exit
193
+ 2021-07-15 03:09:45,060 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: poll_exit
194
+ 2021-07-15 03:09:45,085 INFO Thread-14 :652382 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210715_030015-30wihv4o/files/config.yaml
195
+ 2021-07-15 03:09:45,094 INFO Thread-12 :652382 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210715_030015-30wihv4o/files/requirements.txt
196
+ 2021-07-15 03:09:45,129 INFO Thread-15 :652382 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210715_030015-30wihv4o/files/wandb-summary.json
197
+ 2021-07-15 03:09:45,144 INFO Thread-13 :652382 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210715_030015-30wihv4o/files/output.log
198
+ 2021-07-15 03:09:45,162 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: poll_exit
199
+ 2021-07-15 03:09:45,162 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: poll_exit
200
+ 2021-07-15 03:09:45,264 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: poll_exit
201
+ 2021-07-15 03:09:45,265 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: poll_exit
202
+ 2021-07-15 03:09:45,345 INFO Thread-7 :652382 [sender.py:transition_state():308] send defer: 7
203
+ 2021-07-15 03:09:45,345 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: defer
204
+ 2021-07-15 03:09:45,345 INFO HandlerThread:652382 [handler.py:handle_request_defer():141] handle defer: 7
205
+ 2021-07-15 03:09:45,346 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: defer
206
+ 2021-07-15 03:09:45,346 INFO SenderThread:652382 [sender.py:send_request_defer():304] handle sender defer: 7
207
+ 2021-07-15 03:09:45,366 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: poll_exit
208
+ 2021-07-15 03:09:45,636 INFO SenderThread:652382 [sender.py:transition_state():308] send defer: 8
209
+ 2021-07-15 03:09:45,636 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: poll_exit
210
+ 2021-07-15 03:09:45,637 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: defer
211
+ 2021-07-15 03:09:45,637 INFO HandlerThread:652382 [handler.py:handle_request_defer():141] handle defer: 8
212
+ 2021-07-15 03:09:45,637 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: defer
213
+ 2021-07-15 03:09:45,637 INFO SenderThread:652382 [sender.py:send_request_defer():304] handle sender defer: 8
214
+ 2021-07-15 03:09:45,638 INFO SenderThread:652382 [sender.py:transition_state():308] send defer: 9
215
+ 2021-07-15 03:09:45,638 DEBUG SenderThread:652382 [sender.py:send():179] send: final
216
+ 2021-07-15 03:09:45,638 DEBUG SenderThread:652382 [sender.py:send():179] send: footer
217
+ 2021-07-15 03:09:45,639 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: defer
218
+ 2021-07-15 03:09:45,639 INFO HandlerThread:652382 [handler.py:handle_request_defer():141] handle defer: 9
219
+ 2021-07-15 03:09:45,639 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: defer
220
+ 2021-07-15 03:09:45,639 INFO SenderThread:652382 [sender.py:send_request_defer():304] handle sender defer: 9
221
+ 2021-07-15 03:09:45,738 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: poll_exit
222
+ 2021-07-15 03:09:45,739 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: poll_exit
223
+ 2021-07-15 03:09:45,739 INFO SenderThread:652382 [file_pusher.py:join():182] waiting for file pusher
224
+ 2021-07-15 03:09:45,740 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: get_summary
225
+ 2021-07-15 03:09:45,741 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: sampled_history
226
+ 2021-07-15 03:09:45,742 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: shutdown
227
+ 2021-07-15 03:09:45,742 INFO HandlerThread:652382 [handler.py:finish():638] shutting down handler
228
+ 2021-07-15 03:09:46,639 INFO WriterThread:652382 [datastore.py:close():288] close: /home/dat/pino-roberta-base/wandb/run-20210715_030015-30wihv4o/run-30wihv4o.wandb
229
+ 2021-07-15 03:09:46,739 INFO SenderThread:652382 [sender.py:finish():945] shutting down sender
230
+ 2021-07-15 03:09:46,739 INFO SenderThread:652382 [file_pusher.py:finish():177] shutting down file pusher
231
+ 2021-07-15 03:09:46,739 INFO SenderThread:652382 [file_pusher.py:join():182] waiting for file pusher
232
+ 2021-07-15 03:09:46,742 INFO MainThread:652382 [internal.py:handle_exit():78] Internal process exited
wandb/run-20210715_030015-30wihv4o/logs/debug.log ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2021-07-15 03:00:15,445 INFO MainThread:651126 [wandb_setup.py:_flush():69] setting env: {}
2
+ 2021-07-15 03:00:15,445 INFO MainThread:651126 [wandb_setup.py:_flush():69] setting login settings: {}
3
+ 2021-07-15 03:00:15,445 INFO MainThread:651126 [wandb_init.py:_log_setup():337] Logging user logs to /home/dat/pino-roberta-base/wandb/run-20210715_030015-30wihv4o/logs/debug.log
4
+ 2021-07-15 03:00:15,445 INFO MainThread:651126 [wandb_init.py:_log_setup():338] Logging internal logs to /home/dat/pino-roberta-base/wandb/run-20210715_030015-30wihv4o/logs/debug-internal.log
5
+ 2021-07-15 03:00:15,445 INFO MainThread:651126 [wandb_init.py:init():370] calling init triggers
6
+ 2021-07-15 03:00:15,445 INFO MainThread:651126 [wandb_init.py:init():375] wandb.init called with sweep_config: {}
7
+ config: {}
8
+ 2021-07-15 03:00:15,445 INFO MainThread:651126 [wandb_init.py:init():419] starting backend
9
+ 2021-07-15 03:00:15,446 INFO MainThread:651126 [backend.py:_multiprocessing_setup():70] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
10
+ 2021-07-15 03:00:15,492 INFO MainThread:651126 [backend.py:ensure_launched():135] starting backend process...
11
+ 2021-07-15 03:00:15,539 INFO MainThread:651126 [backend.py:ensure_launched():139] started backend process with pid: 652382
12
+ 2021-07-15 03:00:15,541 INFO MainThread:651126 [wandb_init.py:init():424] backend started and connected
13
+ 2021-07-15 03:00:15,544 INFO MainThread:651126 [wandb_init.py:init():472] updated telemetry
14
+ 2021-07-15 03:00:15,545 INFO MainThread:651126 [wandb_init.py:init():491] communicating current version
15
+ 2021-07-15 03:00:16,193 INFO MainThread:651126 [wandb_init.py:init():496] got version response
16
+ 2021-07-15 03:00:16,193 INFO MainThread:651126 [wandb_init.py:init():504] communicating run to backend with 30 second timeout
17
+ 2021-07-15 03:00:16,373 INFO MainThread:651126 [wandb_init.py:init():529] starting run threads in backend
18
+ 2021-07-15 03:00:17,575 INFO MainThread:651126 [wandb_run.py:_console_start():1623] atexit reg
19
+ 2021-07-15 03:00:17,575 INFO MainThread:651126 [wandb_run.py:_redirect():1497] redirect: SettingsConsole.REDIRECT
20
+ 2021-07-15 03:00:17,576 INFO MainThread:651126 [wandb_run.py:_redirect():1502] Redirecting console.
21
+ 2021-07-15 03:00:17,577 INFO MainThread:651126 [wandb_run.py:_redirect():1558] Redirects installed.
22
+ 2021-07-15 03:00:17,578 INFO MainThread:651126 [wandb_init.py:init():554] run started, returning control to user process
23
+ 2021-07-15 03:00:17,583 INFO MainThread:651126 [wandb_run.py:_config_callback():872] config_cb None None {'output_dir': './', 'overwrite_output_dir': True, 'do_train': False, 'do_eval': False, 'do_predict': False, 'evaluation_strategy': 'IntervalStrategy.NO', 'prediction_loss_only': False, 'per_device_train_batch_size': 1, 'per_device_eval_batch_size': 1, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'learning_rate': 3e-05, 'weight_decay': 0.0095, 'adam_beta1': 0.9, 'adam_beta2': 0.98, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 5.0, 'max_steps': -1, 'lr_scheduler_type': 'SchedulerType.LINEAR', 'warmup_ratio': 0.0, 'warmup_steps': 10000, 'log_level': -1, 'log_level_replica': -1, 'log_on_each_node': True, 'logging_dir': './runs/Jul15_03-00-07_t1v-n-f5c06ea1-w-0', 'logging_strategy': 'IntervalStrategy.STEPS', 'logging_first_step': False, 'logging_steps': 50, 'save_strategy': 'IntervalStrategy.STEPS', 'save_steps': 30000, 'save_total_limit': 5, 'save_on_each_node': False, 'no_cuda': False, 'seed': 42, 'fp16': False, 'fp16_opt_level': 'O1', 'fp16_backend': 'auto', 'fp16_full_eval': False, 'local_rank': -1, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 20000, 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': './', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'sharded_ddp': [], 'deepspeed': None, 'label_smoothing_factor': 0.0, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'dataloader_pin_memory': True, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'push_to_hub_model_id': '', 'push_to_hub_organization': None, 'push_to_hub_token': None, 'mp_parameters': ''}
24
+ 2021-07-15 03:00:17,585 INFO MainThread:651126 [wandb_run.py:_config_callback():872] config_cb None None {'model_name_or_path': None, 'model_type': 'big_bird', 'config_name': './', 'tokenizer_name': './', 'cache_dir': None, 'use_fast_tokenizer': True, 'dtype': 'float32'}
25
+ 2021-07-15 03:00:17,586 INFO MainThread:651126 [wandb_run.py:_config_callback():872] config_cb None None {'dataset_name': None, 'dataset_config_name': None, 'train_ref_file': None, 'validation_ref_file': None, 'overwrite_cache': False, 'validation_split_percentage': 5, 'max_seq_length': 4096, 'preprocessing_num_workers': 96, 'mlm_probability': 0.15, 'pad_to_max_length': False, 'line_by_line': False, 'max_eval_samples': 500}
26
+ 2021-07-15 03:09:41,805 INFO MainThread:651126 [wandb_run.py:_atexit_cleanup():1593] got exitcode: 1
27
+ 2021-07-15 03:09:41,806 INFO MainThread:651126 [wandb_run.py:_restore():1565] restore
28
+ 2021-07-15 03:09:44,221 INFO MainThread:651126 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
29
+ wandb_count: 1
30
+ }
31
+ pusher_stats {
32
+ uploaded_bytes: 1375
33
+ total_bytes: 1375
34
+ }
35
+
36
+ 2021-07-15 03:09:44,491 INFO MainThread:651126 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
37
+ wandb_count: 1
38
+ }
39
+ pusher_stats {
40
+ uploaded_bytes: 1375
41
+ total_bytes: 1375
42
+ }
43
+
44
+ 2021-07-15 03:09:44,652 INFO MainThread:651126 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
45
+ wandb_count: 4
46
+ }
47
+ pusher_stats {
48
+ uploaded_bytes: 1375
49
+ total_bytes: 10904
50
+ }
51
+
52
+ 2021-07-15 03:09:44,755 INFO MainThread:651126 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
53
+ wandb_count: 5
54
+ }
55
+ pusher_stats {
56
+ uploaded_bytes: 1375
57
+ total_bytes: 10906
58
+ }
59
+
60
+ 2021-07-15 03:09:44,857 INFO MainThread:651126 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
61
+ wandb_count: 5
62
+ }
63
+ pusher_stats {
64
+ uploaded_bytes: 10906
65
+ total_bytes: 10906
66
+ }
67
+
68
+ 2021-07-15 03:09:44,959 INFO MainThread:651126 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
69
+ wandb_count: 5
70
+ }
71
+ pusher_stats {
72
+ uploaded_bytes: 10906
73
+ total_bytes: 10906
74
+ }
75
+
76
+ 2021-07-15 03:09:45,061 INFO MainThread:651126 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
77
+ wandb_count: 5
78
+ }
79
+ pusher_stats {
80
+ uploaded_bytes: 10906
81
+ total_bytes: 10906
82
+ }
83
+
84
+ 2021-07-15 03:09:45,163 INFO MainThread:651126 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
85
+ wandb_count: 5
86
+ }
87
+ pusher_stats {
88
+ uploaded_bytes: 10906
89
+ total_bytes: 10906
90
+ }
91
+
92
+ 2021-07-15 03:09:45,265 INFO MainThread:651126 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
93
+ wandb_count: 5
94
+ }
95
+ pusher_stats {
96
+ uploaded_bytes: 10906
97
+ total_bytes: 10906
98
+ }
99
+
100
+ 2021-07-15 03:09:45,637 INFO MainThread:651126 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
101
+ wandb_count: 5
102
+ }
103
+ pusher_stats {
104
+ uploaded_bytes: 10906
105
+ total_bytes: 10906
106
+ }
107
+
108
+ 2021-07-15 03:09:45,739 INFO MainThread:651126 [wandb_run.py:_wait_for_finish():1715] got exit ret: done: true
109
+ exit_result {
110
+ }
111
+ file_counts {
112
+ wandb_count: 5
113
+ }
114
+ pusher_stats {
115
+ uploaded_bytes: 10906
116
+ total_bytes: 10906
117
+ }
118
+
119
+ 2021-07-15 03:09:47,041 INFO MainThread:651126 [wandb_run.py:_show_files():1937] logging synced files
wandb/run-20210715_030015-30wihv4o/run-30wihv4o.wandb ADDED
Binary file (11.1 kB). View file
 
wandb/run-20210715_031107-69jkygz3/files/config.yaml ADDED
@@ -0,0 +1,301 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ wandb_version: 1
2
+
3
+ _wandb:
4
+ desc: null
5
+ value:
6
+ cli_version: 0.10.33
7
+ framework: huggingface
8
+ huggingface_version: 4.9.0.dev0
9
+ is_jupyter_run: false
10
+ is_kaggle_kernel: false
11
+ python_version: 3.8.10
12
+ t:
13
+ 1:
14
+ - 3
15
+ - 11
16
+ 4: 3.8.10
17
+ 5: 0.10.33
18
+ 6: 4.9.0.dev0
19
+ 8:
20
+ - 5
21
+ adafactor:
22
+ desc: null
23
+ value: false
24
+ adam_beta1:
25
+ desc: null
26
+ value: 0.9
27
+ adam_beta2:
28
+ desc: null
29
+ value: 0.98
30
+ adam_epsilon:
31
+ desc: null
32
+ value: 1.0e-08
33
+ cache_dir:
34
+ desc: null
35
+ value: null
36
+ config_name:
37
+ desc: null
38
+ value: ./
39
+ dataloader_drop_last:
40
+ desc: null
41
+ value: false
42
+ dataloader_num_workers:
43
+ desc: null
44
+ value: 0
45
+ dataloader_pin_memory:
46
+ desc: null
47
+ value: true
48
+ dataset_config_name:
49
+ desc: null
50
+ value: null
51
+ dataset_name:
52
+ desc: null
53
+ value: null
54
+ ddp_find_unused_parameters:
55
+ desc: null
56
+ value: null
57
+ debug:
58
+ desc: null
59
+ value: []
60
+ deepspeed:
61
+ desc: null
62
+ value: null
63
+ disable_tqdm:
64
+ desc: null
65
+ value: false
66
+ do_eval:
67
+ desc: null
68
+ value: false
69
+ do_predict:
70
+ desc: null
71
+ value: false
72
+ do_train:
73
+ desc: null
74
+ value: false
75
+ dtype:
76
+ desc: null
77
+ value: float32
78
+ eval_accumulation_steps:
79
+ desc: null
80
+ value: null
81
+ eval_steps:
82
+ desc: null
83
+ value: 20000
84
+ evaluation_strategy:
85
+ desc: null
86
+ value: IntervalStrategy.NO
87
+ fp16:
88
+ desc: null
89
+ value: false
90
+ fp16_backend:
91
+ desc: null
92
+ value: auto
93
+ fp16_full_eval:
94
+ desc: null
95
+ value: false
96
+ fp16_opt_level:
97
+ desc: null
98
+ value: O1
99
+ gradient_accumulation_steps:
100
+ desc: null
101
+ value: 1
102
+ greater_is_better:
103
+ desc: null
104
+ value: null
105
+ group_by_length:
106
+ desc: null
107
+ value: false
108
+ ignore_data_skip:
109
+ desc: null
110
+ value: false
111
+ label_names:
112
+ desc: null
113
+ value: null
114
+ label_smoothing_factor:
115
+ desc: null
116
+ value: 0.0
117
+ learning_rate:
118
+ desc: null
119
+ value: 3.0e-05
120
+ length_column_name:
121
+ desc: null
122
+ value: length
123
+ line_by_line:
124
+ desc: null
125
+ value: false
126
+ load_best_model_at_end:
127
+ desc: null
128
+ value: false
129
+ local_rank:
130
+ desc: null
131
+ value: -1
132
+ log_level:
133
+ desc: null
134
+ value: -1
135
+ log_level_replica:
136
+ desc: null
137
+ value: -1
138
+ log_on_each_node:
139
+ desc: null
140
+ value: true
141
+ logging_dir:
142
+ desc: null
143
+ value: ./runs/Jul15_03-10-59_t1v-n-f5c06ea1-w-0
144
+ logging_first_step:
145
+ desc: null
146
+ value: false
147
+ logging_steps:
148
+ desc: null
149
+ value: 50
150
+ logging_strategy:
151
+ desc: null
152
+ value: IntervalStrategy.STEPS
153
+ lr_scheduler_type:
154
+ desc: null
155
+ value: SchedulerType.LINEAR
156
+ max_eval_samples:
157
+ desc: null
158
+ value: 500
159
+ max_grad_norm:
160
+ desc: null
161
+ value: 1.0
162
+ max_seq_length:
163
+ desc: null
164
+ value: 4096
165
+ max_steps:
166
+ desc: null
167
+ value: -1
168
+ metric_for_best_model:
169
+ desc: null
170
+ value: null
171
+ mlm_probability:
172
+ desc: null
173
+ value: 0.15
174
+ model_name_or_path:
175
+ desc: null
176
+ value: null
177
+ model_type:
178
+ desc: null
179
+ value: big_bird
180
+ mp_parameters:
181
+ desc: null
182
+ value: ''
183
+ no_cuda:
184
+ desc: null
185
+ value: false
186
+ num_train_epochs:
187
+ desc: null
188
+ value: 5.0
189
+ output_dir:
190
+ desc: null
191
+ value: ./
192
+ overwrite_cache:
193
+ desc: null
194
+ value: false
195
+ overwrite_output_dir:
196
+ desc: null
197
+ value: true
198
+ pad_to_max_length:
199
+ desc: null
200
+ value: false
201
+ past_index:
202
+ desc: null
203
+ value: -1
204
+ per_device_eval_batch_size:
205
+ desc: null
206
+ value: 1
207
+ per_device_train_batch_size:
208
+ desc: null
209
+ value: 1
210
+ per_gpu_eval_batch_size:
211
+ desc: null
212
+ value: null
213
+ per_gpu_train_batch_size:
214
+ desc: null
215
+ value: null
216
+ prediction_loss_only:
217
+ desc: null
218
+ value: false
219
+ preprocessing_num_workers:
220
+ desc: null
221
+ value: 96
222
+ push_to_hub:
223
+ desc: null
224
+ value: true
225
+ push_to_hub_model_id:
226
+ desc: null
227
+ value: ''
228
+ push_to_hub_organization:
229
+ desc: null
230
+ value: null
231
+ push_to_hub_token:
232
+ desc: null
233
+ value: null
234
+ remove_unused_columns:
235
+ desc: null
236
+ value: true
237
+ report_to:
238
+ desc: null
239
+ value:
240
+ - tensorboard
241
+ - wandb
242
+ resume_from_checkpoint:
243
+ desc: null
244
+ value: null
245
+ run_name:
246
+ desc: null
247
+ value: ./
248
+ save_on_each_node:
249
+ desc: null
250
+ value: false
251
+ save_steps:
252
+ desc: null
253
+ value: 30000
254
+ save_strategy:
255
+ desc: null
256
+ value: IntervalStrategy.STEPS
257
+ save_total_limit:
258
+ desc: null
259
+ value: 5
260
+ seed:
261
+ desc: null
262
+ value: 42
263
+ sharded_ddp:
264
+ desc: null
265
+ value: []
266
+ skip_memory_metrics:
267
+ desc: null
268
+ value: true
269
+ tokenizer_name:
270
+ desc: null
271
+ value: ./
272
+ tpu_metrics_debug:
273
+ desc: null
274
+ value: false
275
+ tpu_num_cores:
276
+ desc: null
277
+ value: null
278
+ train_ref_file:
279
+ desc: null
280
+ value: null
281
+ use_fast_tokenizer:
282
+ desc: null
283
+ value: true
284
+ use_legacy_prediction_loop:
285
+ desc: null
286
+ value: false
287
+ validation_ref_file:
288
+ desc: null
289
+ value: null
290
+ validation_split_percentage:
291
+ desc: null
292
+ value: 5
293
+ warmup_ratio:
294
+ desc: null
295
+ value: 0.0
296
+ warmup_steps:
297
+ desc: null
298
+ value: 10000
299
+ weight_decay:
300
+ desc: null
301
+ value: 0.0095