dat
commited on
Commit
•
5575ac1
1
Parent(s):
cc569ae
Saving weights and logs of step 105000
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- checkpoint_105000 +3 -0
- events.out.tfevents.1626362977.t1v-n-f5c06ea1-w-0.707091.3.v2 +2 -2
- events.out.tfevents.1626368154.t1v-n-f5c06ea1-w-0.715071.3.v2 +3 -0
- events.out.tfevents.1626369005.t1v-n-f5c06ea1-w-0.717656.3.v2 +3 -0
- events.out.tfevents.1626370906.t1v-n-f5c06ea1-w-0.721922.3.v2 +3 -0
- events.out.tfevents.1626371506.t1v-n-f5c06ea1-w-0.724375.3.v2 +3 -0
- events.out.tfevents.1626372294.t1v-n-f5c06ea1-w-0.727475.3.v2 +3 -0
- events.out.tfevents.1626374797.t1v-n-f5c06ea1-w-0.731696.3.v2 +3 -0
- events.out.tfevents.1626375524.t1v-n-f5c06ea1-w-0.734136.3.v2 +3 -0
- flax_model.msgpack +1 -1
- run.sh +3 -3
- run_mlm_flax_no_accum.py +5 -3
- wandb/debug-internal.log +1 -1
- wandb/debug.log +1 -1
- wandb/latest-run +1 -1
- wandb/run-20210715_152938-8qznp93p/files/output.log +32 -0
- wandb/run-20210715_152938-8qznp93p/files/wandb-summary.json +1 -1
- wandb/run-20210715_152938-8qznp93p/logs/debug-internal.log +49 -0
- wandb/run-20210715_152938-8qznp93p/logs/debug.log +2 -0
- wandb/run-20210715_152938-8qznp93p/run-8qznp93p.wandb +0 -0
- wandb/run-20210715_165555-25rtfw59/files/config.yaml +301 -0
- wandb/run-20210715_165555-25rtfw59/files/output.log +48 -0
- wandb/run-20210715_165555-25rtfw59/files/requirements.txt +94 -0
- wandb/run-20210715_165555-25rtfw59/files/wandb-metadata.json +45 -0
- wandb/run-20210715_165555-25rtfw59/files/wandb-summary.json +1 -0
- wandb/run-20210715_165555-25rtfw59/logs/debug-internal.log +236 -0
- wandb/run-20210715_165555-25rtfw59/logs/debug.log +27 -0
- wandb/run-20210715_165555-25rtfw59/run-25rtfw59.wandb +0 -0
- wandb/run-20210715_171007-1mu5szt1/files/config.yaml +301 -0
- wandb/run-20210715_171007-1mu5szt1/files/output.log +307 -0
- wandb/run-20210715_171007-1mu5szt1/files/requirements.txt +94 -0
- wandb/run-20210715_171007-1mu5szt1/files/wandb-metadata.json +45 -0
- wandb/run-20210715_171007-1mu5szt1/files/wandb-summary.json +1 -0
- wandb/run-20210715_171007-1mu5szt1/logs/debug-internal.log +0 -0
- wandb/run-20210715_171007-1mu5szt1/logs/debug.log +27 -0
- wandb/run-20210715_171007-1mu5szt1/run-1mu5szt1.wandb +0 -0
- wandb/run-20210715_174147-3nkn7hxg/files/config.yaml +304 -0
- wandb/run-20210715_174147-3nkn7hxg/files/output.log +14 -0
- wandb/run-20210715_174147-3nkn7hxg/files/requirements.txt +94 -0
- wandb/run-20210715_174147-3nkn7hxg/files/wandb-metadata.json +45 -0
- wandb/run-20210715_174147-3nkn7hxg/files/wandb-summary.json +1 -0
- wandb/run-20210715_174147-3nkn7hxg/logs/debug-internal.log +195 -0
- wandb/run-20210715_174147-3nkn7hxg/logs/debug.log +119 -0
- wandb/run-20210715_174147-3nkn7hxg/run-3nkn7hxg.wandb +0 -0
- wandb/run-20210715_175147-3lygnexi/files/config.yaml +301 -0
- wandb/run-20210715_175147-3lygnexi/files/output.log +26 -0
- wandb/run-20210715_175147-3lygnexi/files/requirements.txt +94 -0
- wandb/run-20210715_175147-3lygnexi/files/wandb-metadata.json +45 -0
- wandb/run-20210715_175147-3lygnexi/files/wandb-summary.json +1 -0
- wandb/run-20210715_175147-3lygnexi/logs/debug-internal.log +170 -0
checkpoint_105000
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dd3729e2e9d09e233e2f4dfecac9dc1888f374b3614c8a092c1bc958fdab2ccf
|
3 |
+
size 1530270447
|
events.out.tfevents.1626362977.t1v-n-f5c06ea1-w-0.707091.3.v2
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ff143b1f5efcedbe6cb99342667f8e6e2c855c4b038a867b719689a9052d49f9
|
3 |
+
size 1491633
|
events.out.tfevents.1626368154.t1v-n-f5c06ea1-w-0.715071.3.v2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9278085ac62dbd79723fd51fc60c6044bcc9f3786ff81b54b11a5f5e9cdd4a8d
|
3 |
+
size 44508
|
events.out.tfevents.1626369005.t1v-n-f5c06ea1-w-0.717656.3.v2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:11918a96e9d622df1b0d2f1b7999d21bf87fd9b7b7c937dcecd680857aacdd68
|
3 |
+
size 437715
|
events.out.tfevents.1626370906.t1v-n-f5c06ea1-w-0.721922.3.v2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5eebb8a50d7001a4d23af995647a2f73df34f476a7489826b8dee64940fa1597
|
3 |
+
size 40
|
events.out.tfevents.1626371506.t1v-n-f5c06ea1-w-0.724375.3.v2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e90de36468e161351a0990822bb3945c86e791570c1c95ee000e62bcdd64e7d6
|
3 |
+
size 14886
|
events.out.tfevents.1626372294.t1v-n-f5c06ea1-w-0.727475.3.v2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9b6b8f2fe86e2ccf16382846366ee00347b75c8b502be2e423a846260c6bae4f
|
3 |
+
size 40
|
events.out.tfevents.1626374797.t1v-n-f5c06ea1-w-0.731696.3.v2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f583930fdc45029a3ba0585014533fd8ef9a4c87a21651e280f8759447faca8f
|
3 |
+
size 37561
|
events.out.tfevents.1626375524.t1v-n-f5c06ea1-w-0.734136.3.v2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:42934a5a9a64204b53646673742f1b60cb101307e03f942f13e21a364e9e5ac7
|
3 |
+
size 752033
|
flax_model.msgpack
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 510090043
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e40ad9ce0279ffbd95b20a8774cb1469a1b0a3a27fe354529522addece034982
|
3 |
size 510090043
|
run.sh
CHANGED
@@ -15,14 +15,14 @@ python ./run_mlm_flax_no_accum.py \
|
|
15 |
--adam_beta1="0.9" \
|
16 |
--adam_beta2="0.98" \
|
17 |
--logging_steps="50" \
|
18 |
-
--eval_steps="
|
19 |
--num_train_epochs="2"\
|
20 |
--preprocessing_num_workers="96" \
|
21 |
-
--save_steps="
|
22 |
--learning_rate="3e-5" \
|
23 |
--per_device_train_batch_size="1" \
|
24 |
--per_device_eval_batch_size="1" \
|
25 |
-
--save_total_limit="
|
26 |
--max_eval_samples="4000"\
|
27 |
--resume_from_checkpoint="./"\
|
28 |
#--gradient_accumulation_steps="4"\
|
|
|
15 |
--adam_beta1="0.9" \
|
16 |
--adam_beta2="0.98" \
|
17 |
--logging_steps="50" \
|
18 |
+
--eval_steps="6000" \
|
19 |
--num_train_epochs="2"\
|
20 |
--preprocessing_num_workers="96" \
|
21 |
+
--save_steps="15000" \
|
22 |
--learning_rate="3e-5" \
|
23 |
--per_device_train_batch_size="1" \
|
24 |
--per_device_eval_batch_size="1" \
|
25 |
+
--save_total_limit="20"\
|
26 |
--max_eval_samples="4000"\
|
27 |
--resume_from_checkpoint="./"\
|
28 |
#--gradient_accumulation_steps="4"\
|
run_mlm_flax_no_accum.py
CHANGED
@@ -689,9 +689,9 @@ if __name__ == "__main__":
|
|
689 |
num_train_samples = len(tokenized_datasets["train"])
|
690 |
train_samples_idx = jax.random.permutation(input_rng, jnp.arange(num_train_samples))
|
691 |
train_batch_idx = generate_batch_splits(train_samples_idx, train_batch_size)
|
692 |
-
|
693 |
# Gather the indexes for creating the batch and do a training step
|
694 |
-
for step, batch_idx in enumerate(
|
695 |
samples = [tokenized_datasets["train"][int(idx)] for idx in batch_idx]
|
696 |
model_inputs = data_collator(samples, pad_to_multiple_of=16)
|
697 |
|
@@ -699,8 +699,10 @@ if __name__ == "__main__":
|
|
699 |
model_inputs = shard(model_inputs.data)
|
700 |
state, train_metric, dropout_rngs = p_train_step(state, model_inputs, dropout_rngs)
|
701 |
train_metrics.append(train_metric)
|
702 |
-
|
703 |
cur_step = epoch * (num_train_samples // train_batch_size) + step
|
|
|
|
|
704 |
#if cur_step < resume_step:
|
705 |
# continue
|
706 |
|
|
|
689 |
num_train_samples = len(tokenized_datasets["train"])
|
690 |
train_samples_idx = jax.random.permutation(input_rng, jnp.arange(num_train_samples))
|
691 |
train_batch_idx = generate_batch_splits(train_samples_idx, train_batch_size)
|
692 |
+
hooks = []
|
693 |
# Gather the indexes for creating the batch and do a training step
|
694 |
+
for step, batch_idx in tqdm(enumerate(train_batch_idx,start=resume_step), desc="Training...", position=1):
|
695 |
samples = [tokenized_datasets["train"][int(idx)] for idx in batch_idx]
|
696 |
model_inputs = data_collator(samples, pad_to_multiple_of=16)
|
697 |
|
|
|
699 |
model_inputs = shard(model_inputs.data)
|
700 |
state, train_metric, dropout_rngs = p_train_step(state, model_inputs, dropout_rngs)
|
701 |
train_metrics.append(train_metric)
|
702 |
+
|
703 |
cur_step = epoch * (num_train_samples // train_batch_size) + step
|
704 |
+
if cur_step == resume_step:
|
705 |
+
logging.info('Initial compilation completed.')
|
706 |
#if cur_step < resume_step:
|
707 |
# continue
|
708 |
|
wandb/debug-internal.log
CHANGED
@@ -1 +1 @@
|
|
1 |
-
run-
|
|
|
1 |
+
run-20210715_185845-dq8uirtg/logs/debug-internal.log
|
wandb/debug.log
CHANGED
@@ -1 +1 @@
|
|
1 |
-
run-
|
|
|
1 |
+
run-20210715_185845-dq8uirtg/logs/debug.log
|
wandb/latest-run
CHANGED
@@ -1 +1 @@
|
|
1 |
-
run-
|
|
|
1 |
+
run-20210715_185845-dq8uirtg
|
wandb/run-20210715_152938-8qznp93p/files/output.log
CHANGED
@@ -1220,3 +1220,35 @@ tcmalloc: large alloc 1530273792 bytes == 0x31850a000 @ 0x7f3586844680 0x7f3586
|
|
1220 |
[16:50:06] - INFO - absl - Saved checkpoint at checkpoint_10000
|
1221 |
[16:50:07] - INFO - huggingface_hub.repository - git version 2.25.1
|
1222 |
git-lfs/2.9.2 (GitHub; linux amd64; go 1.13.5)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1220 |
[16:50:06] - INFO - absl - Saved checkpoint at checkpoint_10000
|
1221 |
[16:50:07] - INFO - huggingface_hub.repository - git version 2.25.1
|
1222 |
git-lfs/2.9.2 (GitHub; linux amd64; go 1.13.5)
|
1223 |
+
[16:50:07] - DEBUG - huggingface_hub.repository - [Repository] is a valid git repo
|
1224 |
+
[16:51:22] - INFO - huggingface_hub.repository - Uploading LFS objects: 100% (5/5), 2.0 GB | 43 MB/s, done.
|
1225 |
+
|
1226 |
+
|
1227 |
+
|
1228 |
+
Training...: 28%|███████████████████████████████████████▉ | 100051/352766 [1:16:06<25:42:07, 2.73it/s]
|
1229 |
+
|
1230 |
+
|
1231 |
+
|
1232 |
+
|
1233 |
+
Step... (9000 | Loss: 2.3799679279327393, Acc: 0.5589502453804016): 0%| | 0/2 [1:22:05<?, ?it/s]
|
1234 |
+
Traceback (most recent call last):
|
1235 |
+
File "./run_mlm_flax_no_accum.py", line 712, in <module>
|
1236 |
+
write_train_metric(summary_writer, train_metrics, train_time, cur_step)
|
1237 |
+
File "./run_mlm_flax_no_accum.py", line 263, in write_train_metric
|
1238 |
+
train_metrics = get_metrics(train_metrics)
|
1239 |
+
File "/home/dat/pino/lib/python3.8/site-packages/flax/training/common_utils.py", line 52, in get_metrics
|
1240 |
+
metrics_np = jax.device_get(device_metrics)
|
1241 |
+
File "/home/dat/pino/lib/python3.8/site-packages/jax/_src/api.py", line 2337, in device_get
|
1242 |
+
return tree_map(_device_get, x)
|
1243 |
+
File "/home/dat/pino/lib/python3.8/site-packages/jax/_src/tree_util.py", line 168, in tree_map
|
1244 |
+
return treedef.unflatten(f(*xs) for xs in zip(*all_leaves))
|
1245 |
+
File "/home/dat/pino/lib/python3.8/site-packages/jax/_src/tree_util.py", line 168, in <genexpr>
|
1246 |
+
return treedef.unflatten(f(*xs) for xs in zip(*all_leaves))
|
1247 |
+
File "/home/dat/pino/lib/python3.8/site-packages/jax/_src/api.py", line 2329, in _device_get
|
1248 |
+
return copy()
|
1249 |
+
File "/home/dat/pino/lib/python3.8/site-packages/jax/interpreters/xla.py", line 1221, in copy
|
1250 |
+
return np.asarray(self)
|
1251 |
+
File "/home/dat/pino/lib/python3.8/site-packages/numpy/core/_asarray.py", line 83, in asarray
|
1252 |
+
return array(a, dtype, copy=False, order=order)
|
1253 |
+
File "/home/dat/pino/lib/python3.8/site-packages/jax/interpreters/xla.py", line 1286, in __array__
|
1254 |
+
return np.asarray(self._value, dtype=dtype)
|
wandb/run-20210715_152938-8qznp93p/files/wandb-summary.json
CHANGED
@@ -1 +1 @@
|
|
1 |
-
{"training_step":
|
|
|
1 |
+
{"training_step": 10050, "learning_rate": 2.6115878426935524e-05, "train_loss": 2.5157060623168945, "_runtime": 4924, "_timestamp": 1626367902, "_step": 203, "eval_step": 9000, "eval_accuracy": 0.5589502453804016, "eval_loss": 2.3799679279327393}
|
wandb/run-20210715_152938-8qznp93p/logs/debug-internal.log
CHANGED
@@ -2791,3 +2791,52 @@
|
|
2791 |
2021-07-15 16:50:16,636 DEBUG HandlerThread:708348 [handler.py:handle_request():124] handle_request: stop_status
|
2792 |
2021-07-15 16:50:16,637 DEBUG SenderThread:708348 [sender.py:send_request():193] send_request: stop_status
|
2793 |
2021-07-15 16:50:20,859 DEBUG SenderThread:708348 [sender.py:send():179] send: stats
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2791 |
2021-07-15 16:50:16,636 DEBUG HandlerThread:708348 [handler.py:handle_request():124] handle_request: stop_status
|
2792 |
2021-07-15 16:50:16,637 DEBUG SenderThread:708348 [sender.py:send_request():193] send_request: stop_status
|
2793 |
2021-07-15 16:50:20,859 DEBUG SenderThread:708348 [sender.py:send():179] send: stats
|
2794 |
+
2021-07-15 16:50:31,769 DEBUG HandlerThread:708348 [handler.py:handle_request():124] handle_request: stop_status
|
2795 |
+
2021-07-15 16:50:31,770 DEBUG SenderThread:708348 [sender.py:send_request():193] send_request: stop_status
|
2796 |
+
2021-07-15 16:50:46,904 DEBUG HandlerThread:708348 [handler.py:handle_request():124] handle_request: stop_status
|
2797 |
+
2021-07-15 16:50:46,904 DEBUG SenderThread:708348 [sender.py:send_request():193] send_request: stop_status
|
2798 |
+
2021-07-15 16:50:50,940 DEBUG SenderThread:708348 [sender.py:send():179] send: stats
|
2799 |
+
2021-07-15 16:51:02,034 DEBUG HandlerThread:708348 [handler.py:handle_request():124] handle_request: stop_status
|
2800 |
+
2021-07-15 16:51:02,035 DEBUG SenderThread:708348 [sender.py:send_request():193] send_request: stop_status
|
2801 |
+
2021-07-15 16:51:17,167 DEBUG HandlerThread:708348 [handler.py:handle_request():124] handle_request: stop_status
|
2802 |
+
2021-07-15 16:51:17,167 DEBUG SenderThread:708348 [sender.py:send_request():193] send_request: stop_status
|
2803 |
+
2021-07-15 16:51:21,017 DEBUG SenderThread:708348 [sender.py:send():179] send: stats
|
2804 |
+
2021-07-15 16:51:23,329 INFO Thread-8 :708348 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_152938-8qznp93p/files/output.log
|
2805 |
+
2021-07-15 16:51:25,330 INFO Thread-8 :708348 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_152938-8qznp93p/files/output.log
|
2806 |
+
2021-07-15 16:51:27,330 INFO Thread-8 :708348 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_152938-8qznp93p/files/output.log
|
2807 |
+
2021-07-15 16:51:29,331 INFO Thread-8 :708348 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_152938-8qznp93p/files/output.log
|
2808 |
+
2021-07-15 16:51:31,332 INFO Thread-8 :708348 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_152938-8qznp93p/files/output.log
|
2809 |
+
2021-07-15 16:51:32,298 DEBUG HandlerThread:708348 [handler.py:handle_request():124] handle_request: stop_status
|
2810 |
+
2021-07-15 16:51:32,298 DEBUG SenderThread:708348 [sender.py:send_request():193] send_request: stop_status
|
2811 |
+
2021-07-15 16:51:42,885 DEBUG SenderThread:708348 [sender.py:send():179] send: history
|
2812 |
+
2021-07-15 16:51:42,886 DEBUG SenderThread:708348 [sender.py:send():179] send: summary
|
2813 |
+
2021-07-15 16:51:42,886 INFO SenderThread:708348 [sender.py:_save_file():841] saving file wandb-summary.json with policy end
|
2814 |
+
2021-07-15 16:51:43,337 INFO Thread-8 :708348 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_152938-8qznp93p/files/wandb-summary.json
|
2815 |
+
2021-07-15 16:51:45,338 INFO Thread-8 :708348 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_152938-8qznp93p/files/output.log
|
2816 |
+
2021-07-15 16:51:47,339 INFO Thread-8 :708348 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_152938-8qznp93p/files/output.log
|
2817 |
+
2021-07-15 16:51:47,437 DEBUG HandlerThread:708348 [handler.py:handle_request():124] handle_request: stop_status
|
2818 |
+
2021-07-15 16:51:47,438 DEBUG SenderThread:708348 [sender.py:send_request():193] send_request: stop_status
|
2819 |
+
2021-07-15 16:51:49,339 INFO Thread-8 :708348 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_152938-8qznp93p/files/output.log
|
2820 |
+
2021-07-15 16:51:51,095 DEBUG SenderThread:708348 [sender.py:send():179] send: stats
|
2821 |
+
2021-07-15 16:51:51,340 INFO Thread-8 :708348 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_152938-8qznp93p/files/output.log
|
2822 |
+
2021-07-15 16:52:01,993 WARNING MainThread:708348 [internal.py:wandb_internal():147] Internal process interrupt: 1
|
2823 |
+
2021-07-15 16:52:03,090 WARNING MainThread:708348 [internal.py:wandb_internal():147] Internal process interrupt: 2
|
2824 |
+
2021-07-15 16:52:03,091 ERROR MainThread:708348 [internal.py:wandb_internal():150] Internal process interrupted.
|
2825 |
+
2021-07-15 16:52:03,345 INFO Thread-8 :708348 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_152938-8qznp93p/files/output.log
|
2826 |
+
2021-07-15 16:52:03,763 INFO SenderThread:708348 [sender.py:finish():945] shutting down sender
|
2827 |
+
2021-07-15 16:52:03,763 INFO WriterThread:708348 [datastore.py:close():288] close: /home/dat/pino-roberta-base/wandb/run-20210715_152938-8qznp93p/run-8qznp93p.wandb
|
2828 |
+
2021-07-15 16:52:03,763 INFO SenderThread:708348 [dir_watcher.py:finish():282] shutting down directory watcher
|
2829 |
+
2021-07-15 16:52:03,764 INFO HandlerThread:708348 [handler.py:finish():638] shutting down handler
|
2830 |
+
2021-07-15 16:52:04,346 INFO SenderThread:708348 [dir_watcher.py:finish():312] scan: /home/dat/pino-roberta-base/wandb/run-20210715_152938-8qznp93p/files
|
2831 |
+
2021-07-15 16:52:04,346 INFO SenderThread:708348 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_152938-8qznp93p/files/requirements.txt requirements.txt
|
2832 |
+
2021-07-15 16:52:04,346 INFO SenderThread:708348 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_152938-8qznp93p/files/output.log output.log
|
2833 |
+
2021-07-15 16:52:04,346 INFO SenderThread:708348 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_152938-8qznp93p/files/wandb-metadata.json wandb-metadata.json
|
2834 |
+
2021-07-15 16:52:04,346 INFO SenderThread:708348 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_152938-8qznp93p/files/config.yaml config.yaml
|
2835 |
+
2021-07-15 16:52:04,346 INFO SenderThread:708348 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_152938-8qznp93p/files/wandb-summary.json wandb-summary.json
|
2836 |
+
2021-07-15 16:52:04,347 INFO SenderThread:708348 [file_pusher.py:finish():177] shutting down file pusher
|
2837 |
+
2021-07-15 16:52:04,347 INFO SenderThread:708348 [file_pusher.py:join():182] waiting for file pusher
|
2838 |
+
2021-07-15 16:52:04,799 INFO Thread-15 :708348 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210715_152938-8qznp93p/files/wandb-summary.json
|
2839 |
+
2021-07-15 16:52:04,811 INFO Thread-14 :708348 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210715_152938-8qznp93p/files/config.yaml
|
2840 |
+
2021-07-15 16:52:04,820 INFO Thread-13 :708348 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210715_152938-8qznp93p/files/output.log
|
2841 |
+
2021-07-15 16:52:04,835 INFO Thread-12 :708348 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210715_152938-8qznp93p/files/requirements.txt
|
2842 |
+
2021-07-15 16:52:05,617 INFO MainThread:708348 [internal.py:handle_exit():78] Internal process exited
|
wandb/run-20210715_152938-8qznp93p/logs/debug.log
CHANGED
@@ -23,3 +23,5 @@ config: {}
|
|
23 |
2021-07-15 15:29:40,498 INFO MainThread:707091 [wandb_run.py:_config_callback():872] config_cb None None {'output_dir': './', 'overwrite_output_dir': True, 'do_train': False, 'do_eval': False, 'do_predict': False, 'evaluation_strategy': 'IntervalStrategy.NO', 'prediction_loss_only': False, 'per_device_train_batch_size': 1, 'per_device_eval_batch_size': 1, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'learning_rate': 3e-05, 'weight_decay': 0.0095, 'adam_beta1': 0.9, 'adam_beta2': 0.98, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 2.0, 'max_steps': -1, 'lr_scheduler_type': 'SchedulerType.LINEAR', 'warmup_ratio': 0.0, 'warmup_steps': 10000, 'log_level': -1, 'log_level_replica': -1, 'log_on_each_node': True, 'logging_dir': './runs/Jul15_15-29-30_t1v-n-f5c06ea1-w-0', 'logging_strategy': 'IntervalStrategy.STEPS', 'logging_first_step': False, 'logging_steps': 50, 'save_strategy': 'IntervalStrategy.STEPS', 'save_steps': 10000, 'save_total_limit': 5, 'save_on_each_node': False, 'no_cuda': False, 'seed': 42, 'fp16': False, 'fp16_opt_level': 'O1', 'fp16_backend': 'auto', 'fp16_full_eval': False, 'local_rank': -1, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 3000, 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': './', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'sharded_ddp': [], 'deepspeed': None, 'label_smoothing_factor': 0.0, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'dataloader_pin_memory': True, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': './', 'push_to_hub_model_id': '', 'push_to_hub_organization': None, 'push_to_hub_token': None, 'mp_parameters': ''}
|
24 |
2021-07-15 15:29:40,500 INFO MainThread:707091 [wandb_run.py:_config_callback():872] config_cb None None {'model_name_or_path': None, 'model_type': 'big_bird', 'config_name': './', 'tokenizer_name': './', 'cache_dir': None, 'use_fast_tokenizer': True, 'dtype': 'float32'}
|
25 |
2021-07-15 15:29:40,501 INFO MainThread:707091 [wandb_run.py:_config_callback():872] config_cb None None {'dataset_name': None, 'dataset_config_name': None, 'train_ref_file': None, 'validation_ref_file': None, 'overwrite_cache': False, 'validation_split_percentage': 5, 'max_seq_length': 4096, 'preprocessing_num_workers': 96, 'mlm_probability': 0.15, 'pad_to_max_length': False, 'line_by_line': False, 'max_eval_samples': 4000}
|
|
|
|
|
|
23 |
2021-07-15 15:29:40,498 INFO MainThread:707091 [wandb_run.py:_config_callback():872] config_cb None None {'output_dir': './', 'overwrite_output_dir': True, 'do_train': False, 'do_eval': False, 'do_predict': False, 'evaluation_strategy': 'IntervalStrategy.NO', 'prediction_loss_only': False, 'per_device_train_batch_size': 1, 'per_device_eval_batch_size': 1, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'learning_rate': 3e-05, 'weight_decay': 0.0095, 'adam_beta1': 0.9, 'adam_beta2': 0.98, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 2.0, 'max_steps': -1, 'lr_scheduler_type': 'SchedulerType.LINEAR', 'warmup_ratio': 0.0, 'warmup_steps': 10000, 'log_level': -1, 'log_level_replica': -1, 'log_on_each_node': True, 'logging_dir': './runs/Jul15_15-29-30_t1v-n-f5c06ea1-w-0', 'logging_strategy': 'IntervalStrategy.STEPS', 'logging_first_step': False, 'logging_steps': 50, 'save_strategy': 'IntervalStrategy.STEPS', 'save_steps': 10000, 'save_total_limit': 5, 'save_on_each_node': False, 'no_cuda': False, 'seed': 42, 'fp16': False, 'fp16_opt_level': 'O1', 'fp16_backend': 'auto', 'fp16_full_eval': False, 'local_rank': -1, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 3000, 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': './', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'sharded_ddp': [], 'deepspeed': None, 'label_smoothing_factor': 0.0, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'dataloader_pin_memory': True, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': './', 'push_to_hub_model_id': '', 'push_to_hub_organization': None, 'push_to_hub_token': None, 'mp_parameters': ''}
|
24 |
2021-07-15 15:29:40,500 INFO MainThread:707091 [wandb_run.py:_config_callback():872] config_cb None None {'model_name_or_path': None, 'model_type': 'big_bird', 'config_name': './', 'tokenizer_name': './', 'cache_dir': None, 'use_fast_tokenizer': True, 'dtype': 'float32'}
|
25 |
2021-07-15 15:29:40,501 INFO MainThread:707091 [wandb_run.py:_config_callback():872] config_cb None None {'dataset_name': None, 'dataset_config_name': None, 'train_ref_file': None, 'validation_ref_file': None, 'overwrite_cache': False, 'validation_split_percentage': 5, 'max_seq_length': 4096, 'preprocessing_num_workers': 96, 'mlm_probability': 0.15, 'pad_to_max_length': False, 'line_by_line': False, 'max_eval_samples': 4000}
|
26 |
+
2021-07-15 16:52:02,189 INFO MainThread:707091 [wandb_run.py:_atexit_cleanup():1593] got exitcode: 255
|
27 |
+
2021-07-15 16:52:02,189 INFO MainThread:707091 [wandb_run.py:_restore():1565] restore
|
wandb/run-20210715_152938-8qznp93p/run-8qznp93p.wandb
CHANGED
Binary files a/wandb/run-20210715_152938-8qznp93p/run-8qznp93p.wandb and b/wandb/run-20210715_152938-8qznp93p/run-8qznp93p.wandb differ
|
|
wandb/run-20210715_165555-25rtfw59/files/config.yaml
ADDED
@@ -0,0 +1,301 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
wandb_version: 1
|
2 |
+
|
3 |
+
_wandb:
|
4 |
+
desc: null
|
5 |
+
value:
|
6 |
+
cli_version: 0.10.33
|
7 |
+
framework: huggingface
|
8 |
+
huggingface_version: 4.9.0.dev0
|
9 |
+
is_jupyter_run: false
|
10 |
+
is_kaggle_kernel: false
|
11 |
+
python_version: 3.8.10
|
12 |
+
t:
|
13 |
+
1:
|
14 |
+
- 3
|
15 |
+
- 11
|
16 |
+
4: 3.8.10
|
17 |
+
5: 0.10.33
|
18 |
+
6: 4.9.0.dev0
|
19 |
+
8:
|
20 |
+
- 5
|
21 |
+
adafactor:
|
22 |
+
desc: null
|
23 |
+
value: false
|
24 |
+
adam_beta1:
|
25 |
+
desc: null
|
26 |
+
value: 0.9
|
27 |
+
adam_beta2:
|
28 |
+
desc: null
|
29 |
+
value: 0.98
|
30 |
+
adam_epsilon:
|
31 |
+
desc: null
|
32 |
+
value: 1.0e-08
|
33 |
+
cache_dir:
|
34 |
+
desc: null
|
35 |
+
value: null
|
36 |
+
config_name:
|
37 |
+
desc: null
|
38 |
+
value: ./
|
39 |
+
dataloader_drop_last:
|
40 |
+
desc: null
|
41 |
+
value: false
|
42 |
+
dataloader_num_workers:
|
43 |
+
desc: null
|
44 |
+
value: 0
|
45 |
+
dataloader_pin_memory:
|
46 |
+
desc: null
|
47 |
+
value: true
|
48 |
+
dataset_config_name:
|
49 |
+
desc: null
|
50 |
+
value: null
|
51 |
+
dataset_name:
|
52 |
+
desc: null
|
53 |
+
value: null
|
54 |
+
ddp_find_unused_parameters:
|
55 |
+
desc: null
|
56 |
+
value: null
|
57 |
+
debug:
|
58 |
+
desc: null
|
59 |
+
value: []
|
60 |
+
deepspeed:
|
61 |
+
desc: null
|
62 |
+
value: null
|
63 |
+
disable_tqdm:
|
64 |
+
desc: null
|
65 |
+
value: false
|
66 |
+
do_eval:
|
67 |
+
desc: null
|
68 |
+
value: false
|
69 |
+
do_predict:
|
70 |
+
desc: null
|
71 |
+
value: false
|
72 |
+
do_train:
|
73 |
+
desc: null
|
74 |
+
value: false
|
75 |
+
dtype:
|
76 |
+
desc: null
|
77 |
+
value: float32
|
78 |
+
eval_accumulation_steps:
|
79 |
+
desc: null
|
80 |
+
value: null
|
81 |
+
eval_steps:
|
82 |
+
desc: null
|
83 |
+
value: 6000
|
84 |
+
evaluation_strategy:
|
85 |
+
desc: null
|
86 |
+
value: IntervalStrategy.NO
|
87 |
+
fp16:
|
88 |
+
desc: null
|
89 |
+
value: false
|
90 |
+
fp16_backend:
|
91 |
+
desc: null
|
92 |
+
value: auto
|
93 |
+
fp16_full_eval:
|
94 |
+
desc: null
|
95 |
+
value: false
|
96 |
+
fp16_opt_level:
|
97 |
+
desc: null
|
98 |
+
value: O1
|
99 |
+
gradient_accumulation_steps:
|
100 |
+
desc: null
|
101 |
+
value: 1
|
102 |
+
greater_is_better:
|
103 |
+
desc: null
|
104 |
+
value: null
|
105 |
+
group_by_length:
|
106 |
+
desc: null
|
107 |
+
value: false
|
108 |
+
ignore_data_skip:
|
109 |
+
desc: null
|
110 |
+
value: false
|
111 |
+
label_names:
|
112 |
+
desc: null
|
113 |
+
value: null
|
114 |
+
label_smoothing_factor:
|
115 |
+
desc: null
|
116 |
+
value: 0.0
|
117 |
+
learning_rate:
|
118 |
+
desc: null
|
119 |
+
value: 3.0e-05
|
120 |
+
length_column_name:
|
121 |
+
desc: null
|
122 |
+
value: length
|
123 |
+
line_by_line:
|
124 |
+
desc: null
|
125 |
+
value: false
|
126 |
+
load_best_model_at_end:
|
127 |
+
desc: null
|
128 |
+
value: false
|
129 |
+
local_rank:
|
130 |
+
desc: null
|
131 |
+
value: -1
|
132 |
+
log_level:
|
133 |
+
desc: null
|
134 |
+
value: -1
|
135 |
+
log_level_replica:
|
136 |
+
desc: null
|
137 |
+
value: -1
|
138 |
+
log_on_each_node:
|
139 |
+
desc: null
|
140 |
+
value: true
|
141 |
+
logging_dir:
|
142 |
+
desc: null
|
143 |
+
value: ./runs/Jul15_16-55-47_t1v-n-f5c06ea1-w-0
|
144 |
+
logging_first_step:
|
145 |
+
desc: null
|
146 |
+
value: false
|
147 |
+
logging_steps:
|
148 |
+
desc: null
|
149 |
+
value: 50
|
150 |
+
logging_strategy:
|
151 |
+
desc: null
|
152 |
+
value: IntervalStrategy.STEPS
|
153 |
+
lr_scheduler_type:
|
154 |
+
desc: null
|
155 |
+
value: SchedulerType.LINEAR
|
156 |
+
max_eval_samples:
|
157 |
+
desc: null
|
158 |
+
value: 4000
|
159 |
+
max_grad_norm:
|
160 |
+
desc: null
|
161 |
+
value: 1.0
|
162 |
+
max_seq_length:
|
163 |
+
desc: null
|
164 |
+
value: 4096
|
165 |
+
max_steps:
|
166 |
+
desc: null
|
167 |
+
value: -1
|
168 |
+
metric_for_best_model:
|
169 |
+
desc: null
|
170 |
+
value: null
|
171 |
+
mlm_probability:
|
172 |
+
desc: null
|
173 |
+
value: 0.15
|
174 |
+
model_name_or_path:
|
175 |
+
desc: null
|
176 |
+
value: null
|
177 |
+
model_type:
|
178 |
+
desc: null
|
179 |
+
value: big_bird
|
180 |
+
mp_parameters:
|
181 |
+
desc: null
|
182 |
+
value: ''
|
183 |
+
no_cuda:
|
184 |
+
desc: null
|
185 |
+
value: false
|
186 |
+
num_train_epochs:
|
187 |
+
desc: null
|
188 |
+
value: 2.0
|
189 |
+
output_dir:
|
190 |
+
desc: null
|
191 |
+
value: ./
|
192 |
+
overwrite_cache:
|
193 |
+
desc: null
|
194 |
+
value: false
|
195 |
+
overwrite_output_dir:
|
196 |
+
desc: null
|
197 |
+
value: true
|
198 |
+
pad_to_max_length:
|
199 |
+
desc: null
|
200 |
+
value: false
|
201 |
+
past_index:
|
202 |
+
desc: null
|
203 |
+
value: -1
|
204 |
+
per_device_eval_batch_size:
|
205 |
+
desc: null
|
206 |
+
value: 1
|
207 |
+
per_device_train_batch_size:
|
208 |
+
desc: null
|
209 |
+
value: 1
|
210 |
+
per_gpu_eval_batch_size:
|
211 |
+
desc: null
|
212 |
+
value: null
|
213 |
+
per_gpu_train_batch_size:
|
214 |
+
desc: null
|
215 |
+
value: null
|
216 |
+
prediction_loss_only:
|
217 |
+
desc: null
|
218 |
+
value: false
|
219 |
+
preprocessing_num_workers:
|
220 |
+
desc: null
|
221 |
+
value: 96
|
222 |
+
push_to_hub:
|
223 |
+
desc: null
|
224 |
+
value: true
|
225 |
+
push_to_hub_model_id:
|
226 |
+
desc: null
|
227 |
+
value: ''
|
228 |
+
push_to_hub_organization:
|
229 |
+
desc: null
|
230 |
+
value: null
|
231 |
+
push_to_hub_token:
|
232 |
+
desc: null
|
233 |
+
value: null
|
234 |
+
remove_unused_columns:
|
235 |
+
desc: null
|
236 |
+
value: true
|
237 |
+
report_to:
|
238 |
+
desc: null
|
239 |
+
value:
|
240 |
+
- tensorboard
|
241 |
+
- wandb
|
242 |
+
resume_from_checkpoint:
|
243 |
+
desc: null
|
244 |
+
value: ./
|
245 |
+
run_name:
|
246 |
+
desc: null
|
247 |
+
value: ./
|
248 |
+
save_on_each_node:
|
249 |
+
desc: null
|
250 |
+
value: false
|
251 |
+
save_steps:
|
252 |
+
desc: null
|
253 |
+
value: 15000
|
254 |
+
save_strategy:
|
255 |
+
desc: null
|
256 |
+
value: IntervalStrategy.STEPS
|
257 |
+
save_total_limit:
|
258 |
+
desc: null
|
259 |
+
value: 20
|
260 |
+
seed:
|
261 |
+
desc: null
|
262 |
+
value: 42
|
263 |
+
sharded_ddp:
|
264 |
+
desc: null
|
265 |
+
value: []
|
266 |
+
skip_memory_metrics:
|
267 |
+
desc: null
|
268 |
+
value: true
|
269 |
+
tokenizer_name:
|
270 |
+
desc: null
|
271 |
+
value: ./
|
272 |
+
tpu_metrics_debug:
|
273 |
+
desc: null
|
274 |
+
value: false
|
275 |
+
tpu_num_cores:
|
276 |
+
desc: null
|
277 |
+
value: null
|
278 |
+
train_ref_file:
|
279 |
+
desc: null
|
280 |
+
value: null
|
281 |
+
use_fast_tokenizer:
|
282 |
+
desc: null
|
283 |
+
value: true
|
284 |
+
use_legacy_prediction_loop:
|
285 |
+
desc: null
|
286 |
+
value: false
|
287 |
+
validation_ref_file:
|
288 |
+
desc: null
|
289 |
+
value: null
|
290 |
+
validation_split_percentage:
|
291 |
+
desc: null
|
292 |
+
value: 5
|
293 |
+
warmup_ratio:
|
294 |
+
desc: null
|
295 |
+
value: 0.0
|
296 |
+
warmup_steps:
|
297 |
+
desc: null
|
298 |
+
value: 10000
|
299 |
+
weight_decay:
|
300 |
+
desc: null
|
301 |
+
value: 0.0095
|
wandb/run-20210715_165555-25rtfw59/files/output.log
ADDED
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[16:56:11] - INFO - absl - Restoring checkpoint from ./checkpoint_10000
|
2 |
+
tcmalloc: large alloc 1530273792 bytes == 0x9b046000 @ 0x7f018a0fa680 0x7f018a11b824 0x5b9a14 0x50b2ae 0x50cb1b 0x5a6f17 0x5f3010 0x56fd36 0x568d9a 0x5f5b33 0x56aadf 0x568d9a 0x68cdc7 0x67e161 0x67e1df 0x67e281 0x67e627 0x6b6e62 0x6b71ed 0x7f0189f0f0b3 0x5f96de
|
3 |
+
/home/dat/pino/lib/python3.8/site-packages/jax/lib/xla_bridge.py:386: UserWarning: jax.host_count has been renamed to jax.process_count. This alias will eventually be removed; please update your code.
|
4 |
+
warnings.warn(
|
5 |
+
/home/dat/pino/lib/python3.8/site-packages/jax/lib/xla_bridge.py:373: UserWarning: jax.host_id has been renamed to jax.process_index. This alias will eventually be removed; please update your code.
|
6 |
+
warnings.warn(
|
7 |
+
Epoch ... (1/2): 0%| | 0/2 [00:00<?, ?it/s]
|
8 |
+
|
9 |
+
|
10 |
+
|
11 |
+
|
12 |
+
|
13 |
+
|
14 |
+
|
15 |
+
Training...: 28%|██████████████████████████████████████▌ | 100059/352766 [04:37<64:29:15, 1.09it/s]
|
16 |
+
|
17 |
+
|
18 |
+
|
19 |
+
|
20 |
+
Training...: 28%|██████████████████████████████████████▌ | 100102/352766 [04:43<26:15:09, 2.67it/s]
|
21 |
+
|
22 |
+
|
23 |
+
|
24 |
+
|
25 |
+
Training...: 28%|██████████████████████████████████████▌ | 100152/352766 [05:03<26:05:11, 2.69it/s]
|
26 |
+
|
27 |
+
|
28 |
+
|
29 |
+
|
30 |
+
|
31 |
+
Training...: 28%|██████████████████████████████████████▋ | 100202/352766 [05:23<26:12:27, 2.68it/s]
|
32 |
+
|
33 |
+
|
34 |
+
|
35 |
+
|
36 |
+
|
37 |
+
Training...: 28%|██████████████████████████████████████▋ | 100252/352766 [05:43<25:55:44, 2.71it/s]
|
38 |
+
|
39 |
+
|
40 |
+
|
41 |
+
|
42 |
+
|
43 |
+
Training...: 28%|██████████████████████████████████████▋ | 100302/352766 [06:03<26:14:51, 2.67it/s]
|
44 |
+
|
45 |
+
|
46 |
+
|
47 |
+
|
48 |
+
|
wandb/run-20210715_165555-25rtfw59/files/requirements.txt
ADDED
@@ -0,0 +1,94 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
absl-py==0.13.0
|
2 |
+
aiohttp==3.7.4.post0
|
3 |
+
astunparse==1.6.3
|
4 |
+
async-timeout==3.0.1
|
5 |
+
attrs==21.2.0
|
6 |
+
cachetools==4.2.2
|
7 |
+
certifi==2021.5.30
|
8 |
+
chardet==4.0.0
|
9 |
+
charset-normalizer==2.0.1
|
10 |
+
chex==0.0.8
|
11 |
+
click==8.0.1
|
12 |
+
configparser==5.0.2
|
13 |
+
cycler==0.10.0
|
14 |
+
datasets==1.9.1.dev0
|
15 |
+
dill==0.3.4
|
16 |
+
dm-tree==0.1.6
|
17 |
+
docker-pycreds==0.4.0
|
18 |
+
filelock==3.0.12
|
19 |
+
flatbuffers==1.12
|
20 |
+
flax==0.3.4
|
21 |
+
fsspec==2021.7.0
|
22 |
+
gast==0.4.0
|
23 |
+
gitdb==4.0.7
|
24 |
+
gitpython==3.1.18
|
25 |
+
google-auth-oauthlib==0.4.4
|
26 |
+
google-auth==1.32.1
|
27 |
+
google-pasta==0.2.0
|
28 |
+
grpcio==1.34.1
|
29 |
+
h5py==3.1.0
|
30 |
+
huggingface-hub==0.0.12
|
31 |
+
idna==3.2
|
32 |
+
install==1.3.4
|
33 |
+
jax==0.2.17
|
34 |
+
jaxlib==0.1.68
|
35 |
+
joblib==1.0.1
|
36 |
+
keras-nightly==2.5.0.dev2021032900
|
37 |
+
keras-preprocessing==1.1.2
|
38 |
+
kiwisolver==1.3.1
|
39 |
+
libtpu-nightly==0.1.dev20210615
|
40 |
+
markdown==3.3.4
|
41 |
+
matplotlib==3.4.2
|
42 |
+
msgpack==1.0.2
|
43 |
+
multidict==5.1.0
|
44 |
+
multiprocess==0.70.12.2
|
45 |
+
numpy==1.19.5
|
46 |
+
oauthlib==3.1.1
|
47 |
+
opt-einsum==3.3.0
|
48 |
+
optax==0.0.9
|
49 |
+
packaging==21.0
|
50 |
+
pandas==1.3.0
|
51 |
+
pathtools==0.1.2
|
52 |
+
pillow==8.3.1
|
53 |
+
pip==20.0.2
|
54 |
+
pkg-resources==0.0.0
|
55 |
+
promise==2.3
|
56 |
+
protobuf==3.17.3
|
57 |
+
psutil==5.8.0
|
58 |
+
pyarrow==4.0.1
|
59 |
+
pyasn1-modules==0.2.8
|
60 |
+
pyasn1==0.4.8
|
61 |
+
pyparsing==2.4.7
|
62 |
+
python-dateutil==2.8.1
|
63 |
+
pytz==2021.1
|
64 |
+
pyyaml==5.4.1
|
65 |
+
regex==2021.7.6
|
66 |
+
requests-oauthlib==1.3.0
|
67 |
+
requests==2.26.0
|
68 |
+
rsa==4.7.2
|
69 |
+
sacremoses==0.0.45
|
70 |
+
scipy==1.7.0
|
71 |
+
sentry-sdk==1.3.0
|
72 |
+
setuptools==44.0.0
|
73 |
+
shortuuid==1.0.1
|
74 |
+
six==1.15.0
|
75 |
+
smmap==4.0.0
|
76 |
+
subprocess32==3.5.4
|
77 |
+
tensorboard-data-server==0.6.1
|
78 |
+
tensorboard-plugin-wit==1.8.0
|
79 |
+
tensorboard==2.5.0
|
80 |
+
tensorflow-estimator==2.5.0
|
81 |
+
tensorflow==2.5.0
|
82 |
+
termcolor==1.1.0
|
83 |
+
tokenizers==0.10.3
|
84 |
+
toolz==0.11.1
|
85 |
+
tqdm==4.61.2
|
86 |
+
transformers==4.9.0.dev0
|
87 |
+
typing-extensions==3.7.4.3
|
88 |
+
urllib3==1.26.6
|
89 |
+
wandb==0.10.33
|
90 |
+
werkzeug==2.0.1
|
91 |
+
wheel==0.36.2
|
92 |
+
wrapt==1.12.1
|
93 |
+
xxhash==2.0.2
|
94 |
+
yarl==1.6.3
|
wandb/run-20210715_165555-25rtfw59/files/wandb-metadata.json
ADDED
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"os": "Linux-5.4.0-1043-gcp-x86_64-with-glibc2.29",
|
3 |
+
"python": "3.8.10",
|
4 |
+
"heartbeatAt": "2021-07-15T16:55:57.916568",
|
5 |
+
"startedAt": "2021-07-15T16:55:55.783375",
|
6 |
+
"docker": null,
|
7 |
+
"cpu_count": 96,
|
8 |
+
"cuda": null,
|
9 |
+
"args": [
|
10 |
+
"--push_to_hub",
|
11 |
+
"--output_dir=./",
|
12 |
+
"--model_type=big_bird",
|
13 |
+
"--config_name=./",
|
14 |
+
"--tokenizer_name=./",
|
15 |
+
"--max_seq_length=4096",
|
16 |
+
"--weight_decay=0.0095",
|
17 |
+
"--warmup_steps=10000",
|
18 |
+
"--overwrite_output_dir",
|
19 |
+
"--adam_beta1=0.9",
|
20 |
+
"--adam_beta2=0.98",
|
21 |
+
"--logging_steps=50",
|
22 |
+
"--eval_steps=6000",
|
23 |
+
"--num_train_epochs=2",
|
24 |
+
"--preprocessing_num_workers=96",
|
25 |
+
"--save_steps=15000",
|
26 |
+
"--learning_rate=3e-5",
|
27 |
+
"--per_device_train_batch_size=1",
|
28 |
+
"--per_device_eval_batch_size=1",
|
29 |
+
"--save_total_limit=20",
|
30 |
+
"--max_eval_samples=4000",
|
31 |
+
"--resume_from_checkpoint=./"
|
32 |
+
],
|
33 |
+
"state": "running",
|
34 |
+
"program": "./run_mlm_flax_no_accum.py",
|
35 |
+
"codePath": "run_mlm_flax_no_accum.py",
|
36 |
+
"git": {
|
37 |
+
"remote": "https://huggingface.co/flax-community/pino-roberta-base",
|
38 |
+
"commit": "cc569aecf5e26454416d7a13c7876ad9111120cf"
|
39 |
+
},
|
40 |
+
"email": null,
|
41 |
+
"root": "/home/dat/pino-roberta-base",
|
42 |
+
"host": "t1v-n-f5c06ea1-w-0",
|
43 |
+
"username": "dat",
|
44 |
+
"executable": "/home/dat/pino/bin/python"
|
45 |
+
}
|
wandb/run-20210715_165555-25rtfw59/files/wandb-summary.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"training_step": 300, "learning_rate": 2.6105053620995022e-05, "train_loss": 2.366305351257324, "_runtime": 722, "_timestamp": 1626368877, "_step": 5}
|
wandb/run-20210715_165555-25rtfw59/logs/debug-internal.log
ADDED
@@ -0,0 +1,236 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
2021-07-15 16:55:56,515 INFO MainThread:716328 [internal.py:wandb_internal():88] W&B internal server running at pid: 716328, started at: 2021-07-15 16:55:56.514842
|
2 |
+
2021-07-15 16:55:56,517 DEBUG HandlerThread:716328 [handler.py:handle_request():124] handle_request: check_version
|
3 |
+
2021-07-15 16:55:56,517 INFO WriterThread:716328 [datastore.py:open_for_write():80] open: /home/dat/pino-roberta-base/wandb/run-20210715_165555-25rtfw59/run-25rtfw59.wandb
|
4 |
+
2021-07-15 16:55:56,518 DEBUG SenderThread:716328 [sender.py:send():179] send: header
|
5 |
+
2021-07-15 16:55:56,518 DEBUG SenderThread:716328 [sender.py:send_request():193] send_request: check_version
|
6 |
+
2021-07-15 16:55:56,561 DEBUG SenderThread:716328 [sender.py:send():179] send: run
|
7 |
+
2021-07-15 16:55:56,732 INFO SenderThread:716328 [dir_watcher.py:__init__():168] watching files in: /home/dat/pino-roberta-base/wandb/run-20210715_165555-25rtfw59/files
|
8 |
+
2021-07-15 16:55:56,732 INFO SenderThread:716328 [sender.py:_start_run_threads():716] run started: 25rtfw59 with start time 1626368155
|
9 |
+
2021-07-15 16:55:56,732 DEBUG SenderThread:716328 [sender.py:send():179] send: summary
|
10 |
+
2021-07-15 16:55:56,733 INFO SenderThread:716328 [sender.py:_save_file():841] saving file wandb-summary.json with policy end
|
11 |
+
2021-07-15 16:55:56,733 DEBUG HandlerThread:716328 [handler.py:handle_request():124] handle_request: run_start
|
12 |
+
2021-07-15 16:55:57,738 INFO Thread-8 :716328 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210715_165555-25rtfw59/files/wandb-summary.json
|
13 |
+
2021-07-15 16:55:57,916 DEBUG HandlerThread:716328 [meta.py:__init__():39] meta init
|
14 |
+
2021-07-15 16:55:57,916 DEBUG HandlerThread:716328 [meta.py:__init__():53] meta init done
|
15 |
+
2021-07-15 16:55:57,916 DEBUG HandlerThread:716328 [meta.py:probe():210] probe
|
16 |
+
2021-07-15 16:55:57,917 DEBUG HandlerThread:716328 [meta.py:_setup_git():200] setup git
|
17 |
+
2021-07-15 16:55:57,953 DEBUG HandlerThread:716328 [meta.py:_setup_git():207] setup git done
|
18 |
+
2021-07-15 16:55:57,953 DEBUG HandlerThread:716328 [meta.py:_save_pip():57] save pip
|
19 |
+
2021-07-15 16:55:57,953 DEBUG HandlerThread:716328 [meta.py:_save_pip():71] save pip done
|
20 |
+
2021-07-15 16:55:57,953 DEBUG HandlerThread:716328 [meta.py:probe():252] probe done
|
21 |
+
2021-07-15 16:55:57,957 DEBUG SenderThread:716328 [sender.py:send():179] send: files
|
22 |
+
2021-07-15 16:55:57,957 INFO SenderThread:716328 [sender.py:_save_file():841] saving file wandb-metadata.json with policy now
|
23 |
+
2021-07-15 16:55:57,963 DEBUG HandlerThread:716328 [handler.py:handle_request():124] handle_request: stop_status
|
24 |
+
2021-07-15 16:55:57,963 DEBUG SenderThread:716328 [sender.py:send_request():193] send_request: stop_status
|
25 |
+
2021-07-15 16:55:58,096 DEBUG SenderThread:716328 [sender.py:send():179] send: config
|
26 |
+
2021-07-15 16:55:58,097 DEBUG SenderThread:716328 [sender.py:send():179] send: config
|
27 |
+
2021-07-15 16:55:58,097 DEBUG SenderThread:716328 [sender.py:send():179] send: config
|
28 |
+
2021-07-15 16:55:58,393 INFO Thread-11 :716328 [upload_job.py:push():137] Uploaded file /tmp/tmpbbcxwsn2wandb/2zsx43w4-wandb-metadata.json
|
29 |
+
2021-07-15 16:55:58,736 INFO Thread-8 :716328 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210715_165555-25rtfw59/files/output.log
|
30 |
+
2021-07-15 16:55:58,737 INFO Thread-8 :716328 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210715_165555-25rtfw59/files/wandb-metadata.json
|
31 |
+
2021-07-15 16:55:58,737 INFO Thread-8 :716328 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210715_165555-25rtfw59/files/requirements.txt
|
32 |
+
2021-07-15 16:56:13,662 DEBUG HandlerThread:716328 [handler.py:handle_request():124] handle_request: stop_status
|
33 |
+
2021-07-15 16:56:13,662 DEBUG SenderThread:716328 [sender.py:send_request():193] send_request: stop_status
|
34 |
+
2021-07-15 16:56:14,743 INFO Thread-8 :716328 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_165555-25rtfw59/files/output.log
|
35 |
+
2021-07-15 16:56:16,744 INFO Thread-8 :716328 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_165555-25rtfw59/files/output.log
|
36 |
+
2021-07-15 16:56:26,001 DEBUG SenderThread:716328 [sender.py:send():179] send: stats
|
37 |
+
2021-07-15 16:56:27,749 INFO Thread-8 :716328 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_165555-25rtfw59/files/config.yaml
|
38 |
+
2021-07-15 16:56:28,821 DEBUG HandlerThread:716328 [handler.py:handle_request():124] handle_request: stop_status
|
39 |
+
2021-07-15 16:56:28,821 DEBUG SenderThread:716328 [sender.py:send_request():193] send_request: stop_status
|
40 |
+
2021-07-15 16:56:43,953 DEBUG HandlerThread:716328 [handler.py:handle_request():124] handle_request: stop_status
|
41 |
+
2021-07-15 16:56:43,953 DEBUG SenderThread:716328 [sender.py:send_request():193] send_request: stop_status
|
42 |
+
2021-07-15 16:56:56,082 DEBUG SenderThread:716328 [sender.py:send():179] send: stats
|
43 |
+
2021-07-15 16:56:59,099 DEBUG HandlerThread:716328 [handler.py:handle_request():124] handle_request: stop_status
|
44 |
+
2021-07-15 16:56:59,099 DEBUG SenderThread:716328 [sender.py:send_request():193] send_request: stop_status
|
45 |
+
2021-07-15 16:57:14,230 DEBUG HandlerThread:716328 [handler.py:handle_request():124] handle_request: stop_status
|
46 |
+
2021-07-15 16:57:14,230 DEBUG SenderThread:716328 [sender.py:send_request():193] send_request: stop_status
|
47 |
+
2021-07-15 16:57:26,150 DEBUG SenderThread:716328 [sender.py:send():179] send: stats
|
48 |
+
2021-07-15 16:57:29,366 DEBUG HandlerThread:716328 [handler.py:handle_request():124] handle_request: stop_status
|
49 |
+
2021-07-15 16:57:29,366 DEBUG SenderThread:716328 [sender.py:send_request():193] send_request: stop_status
|
50 |
+
2021-07-15 16:57:44,497 DEBUG HandlerThread:716328 [handler.py:handle_request():124] handle_request: stop_status
|
51 |
+
2021-07-15 16:57:44,498 DEBUG SenderThread:716328 [sender.py:send_request():193] send_request: stop_status
|
52 |
+
2021-07-15 16:57:56,224 DEBUG SenderThread:716328 [sender.py:send():179] send: stats
|
53 |
+
2021-07-15 16:57:59,628 DEBUG HandlerThread:716328 [handler.py:handle_request():124] handle_request: stop_status
|
54 |
+
2021-07-15 16:57:59,628 DEBUG SenderThread:716328 [sender.py:send_request():193] send_request: stop_status
|
55 |
+
2021-07-15 16:58:14,771 DEBUG HandlerThread:716328 [handler.py:handle_request():124] handle_request: stop_status
|
56 |
+
2021-07-15 16:58:14,771 DEBUG SenderThread:716328 [sender.py:send_request():193] send_request: stop_status
|
57 |
+
2021-07-15 16:58:26,290 DEBUG SenderThread:716328 [sender.py:send():179] send: stats
|
58 |
+
2021-07-15 16:58:29,901 DEBUG HandlerThread:716328 [handler.py:handle_request():124] handle_request: stop_status
|
59 |
+
2021-07-15 16:58:29,901 DEBUG SenderThread:716328 [sender.py:send_request():193] send_request: stop_status
|
60 |
+
2021-07-15 16:58:45,036 DEBUG HandlerThread:716328 [handler.py:handle_request():124] handle_request: stop_status
|
61 |
+
2021-07-15 16:58:45,037 DEBUG SenderThread:716328 [sender.py:send_request():193] send_request: stop_status
|
62 |
+
2021-07-15 16:58:56,353 DEBUG SenderThread:716328 [sender.py:send():179] send: stats
|
63 |
+
2021-07-15 16:59:00,169 DEBUG HandlerThread:716328 [handler.py:handle_request():124] handle_request: stop_status
|
64 |
+
2021-07-15 16:59:00,169 DEBUG SenderThread:716328 [sender.py:send_request():193] send_request: stop_status
|
65 |
+
2021-07-15 16:59:15,303 DEBUG HandlerThread:716328 [handler.py:handle_request():124] handle_request: stop_status
|
66 |
+
2021-07-15 16:59:15,303 DEBUG SenderThread:716328 [sender.py:send_request():193] send_request: stop_status
|
67 |
+
2021-07-15 16:59:26,416 DEBUG SenderThread:716328 [sender.py:send():179] send: stats
|
68 |
+
2021-07-15 16:59:30,437 DEBUG HandlerThread:716328 [handler.py:handle_request():124] handle_request: stop_status
|
69 |
+
2021-07-15 16:59:30,438 DEBUG SenderThread:716328 [sender.py:send_request():193] send_request: stop_status
|
70 |
+
2021-07-15 16:59:45,570 DEBUG HandlerThread:716328 [handler.py:handle_request():124] handle_request: stop_status
|
71 |
+
2021-07-15 16:59:45,570 DEBUG SenderThread:716328 [sender.py:send_request():193] send_request: stop_status
|
72 |
+
2021-07-15 16:59:56,482 DEBUG SenderThread:716328 [sender.py:send():179] send: stats
|
73 |
+
2021-07-15 17:00:00,706 DEBUG HandlerThread:716328 [handler.py:handle_request():124] handle_request: stop_status
|
74 |
+
2021-07-15 17:00:00,706 DEBUG SenderThread:716328 [sender.py:send_request():193] send_request: stop_status
|
75 |
+
2021-07-15 17:00:15,845 DEBUG HandlerThread:716328 [handler.py:handle_request():124] handle_request: stop_status
|
76 |
+
2021-07-15 17:00:15,846 DEBUG SenderThread:716328 [sender.py:send_request():193] send_request: stop_status
|
77 |
+
2021-07-15 17:00:26,546 DEBUG SenderThread:716328 [sender.py:send():179] send: stats
|
78 |
+
2021-07-15 17:00:30,981 DEBUG HandlerThread:716328 [handler.py:handle_request():124] handle_request: stop_status
|
79 |
+
2021-07-15 17:00:30,982 DEBUG SenderThread:716328 [sender.py:send_request():193] send_request: stop_status
|
80 |
+
2021-07-15 17:00:46,115 DEBUG HandlerThread:716328 [handler.py:handle_request():124] handle_request: stop_status
|
81 |
+
2021-07-15 17:00:46,116 DEBUG SenderThread:716328 [sender.py:send_request():193] send_request: stop_status
|
82 |
+
2021-07-15 17:00:56,610 DEBUG SenderThread:716328 [sender.py:send():179] send: stats
|
83 |
+
2021-07-15 17:01:01,251 DEBUG HandlerThread:716328 [handler.py:handle_request():124] handle_request: stop_status
|
84 |
+
2021-07-15 17:01:01,251 DEBUG SenderThread:716328 [sender.py:send_request():193] send_request: stop_status
|
85 |
+
2021-07-15 17:01:16,388 DEBUG HandlerThread:716328 [handler.py:handle_request():124] handle_request: stop_status
|
86 |
+
2021-07-15 17:01:16,388 DEBUG SenderThread:716328 [sender.py:send_request():193] send_request: stop_status
|
87 |
+
2021-07-15 17:01:26,676 DEBUG SenderThread:716328 [sender.py:send():179] send: stats
|
88 |
+
2021-07-15 17:01:31,521 DEBUG HandlerThread:716328 [handler.py:handle_request():124] handle_request: stop_status
|
89 |
+
2021-07-15 17:01:31,522 DEBUG SenderThread:716328 [sender.py:send_request():193] send_request: stop_status
|
90 |
+
2021-07-15 17:01:40,875 INFO Thread-8 :716328 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_165555-25rtfw59/files/output.log
|
91 |
+
2021-07-15 17:01:46,668 DEBUG HandlerThread:716328 [handler.py:handle_request():124] handle_request: stop_status
|
92 |
+
2021-07-15 17:01:46,668 DEBUG SenderThread:716328 [sender.py:send_request():193] send_request: stop_status
|
93 |
+
2021-07-15 17:01:56,742 DEBUG SenderThread:716328 [sender.py:send():179] send: stats
|
94 |
+
2021-07-15 17:02:01,854 DEBUG HandlerThread:716328 [handler.py:handle_request():124] handle_request: stop_status
|
95 |
+
2021-07-15 17:02:01,855 DEBUG SenderThread:716328 [sender.py:send_request():193] send_request: stop_status
|
96 |
+
2021-07-15 17:02:17,001 DEBUG HandlerThread:716328 [handler.py:handle_request():124] handle_request: stop_status
|
97 |
+
2021-07-15 17:02:17,002 DEBUG SenderThread:716328 [sender.py:send_request():193] send_request: stop_status
|
98 |
+
2021-07-15 17:02:26,812 DEBUG SenderThread:716328 [sender.py:send():179] send: stats
|
99 |
+
2021-07-15 17:02:32,137 DEBUG HandlerThread:716328 [handler.py:handle_request():124] handle_request: stop_status
|
100 |
+
2021-07-15 17:02:32,138 DEBUG SenderThread:716328 [sender.py:send_request():193] send_request: stop_status
|
101 |
+
2021-07-15 17:02:47,271 DEBUG HandlerThread:716328 [handler.py:handle_request():124] handle_request: stop_status
|
102 |
+
2021-07-15 17:02:47,272 DEBUG SenderThread:716328 [sender.py:send_request():193] send_request: stop_status
|
103 |
+
2021-07-15 17:02:56,881 DEBUG SenderThread:716328 [sender.py:send():179] send: stats
|
104 |
+
2021-07-15 17:03:02,406 DEBUG HandlerThread:716328 [handler.py:handle_request():124] handle_request: stop_status
|
105 |
+
2021-07-15 17:03:02,406 DEBUG SenderThread:716328 [sender.py:send_request():193] send_request: stop_status
|
106 |
+
2021-07-15 17:03:17,544 DEBUG HandlerThread:716328 [handler.py:handle_request():124] handle_request: stop_status
|
107 |
+
2021-07-15 17:03:17,544 DEBUG SenderThread:716328 [sender.py:send_request():193] send_request: stop_status
|
108 |
+
2021-07-15 17:03:26,959 DEBUG SenderThread:716328 [sender.py:send():179] send: stats
|
109 |
+
2021-07-15 17:03:32,679 DEBUG HandlerThread:716328 [handler.py:handle_request():124] handle_request: stop_status
|
110 |
+
2021-07-15 17:03:32,679 DEBUG SenderThread:716328 [sender.py:send_request():193] send_request: stop_status
|
111 |
+
2021-07-15 17:03:47,834 DEBUG HandlerThread:716328 [handler.py:handle_request():124] handle_request: stop_status
|
112 |
+
2021-07-15 17:03:47,835 DEBUG SenderThread:716328 [sender.py:send_request():193] send_request: stop_status
|
113 |
+
2021-07-15 17:03:49,928 INFO Thread-8 :716328 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_165555-25rtfw59/files/output.log
|
114 |
+
2021-07-15 17:03:57,035 DEBUG SenderThread:716328 [sender.py:send():179] send: stats
|
115 |
+
2021-07-15 17:04:02,987 DEBUG HandlerThread:716328 [handler.py:handle_request():124] handle_request: stop_status
|
116 |
+
2021-07-15 17:04:02,988 DEBUG SenderThread:716328 [sender.py:send_request():193] send_request: stop_status
|
117 |
+
2021-07-15 17:04:18,133 DEBUG HandlerThread:716328 [handler.py:handle_request():124] handle_request: stop_status
|
118 |
+
2021-07-15 17:04:18,133 DEBUG SenderThread:716328 [sender.py:send_request():193] send_request: stop_status
|
119 |
+
2021-07-15 17:04:27,107 DEBUG SenderThread:716328 [sender.py:send():179] send: stats
|
120 |
+
2021-07-15 17:04:33,265 DEBUG HandlerThread:716328 [handler.py:handle_request():124] handle_request: stop_status
|
121 |
+
2021-07-15 17:04:33,266 DEBUG SenderThread:716328 [sender.py:send_request():193] send_request: stop_status
|
122 |
+
2021-07-15 17:04:48,399 DEBUG HandlerThread:716328 [handler.py:handle_request():124] handle_request: stop_status
|
123 |
+
2021-07-15 17:04:48,399 DEBUG SenderThread:716328 [sender.py:send_request():193] send_request: stop_status
|
124 |
+
2021-07-15 17:04:57,177 DEBUG SenderThread:716328 [sender.py:send():179] send: stats
|
125 |
+
2021-07-15 17:05:03,543 DEBUG HandlerThread:716328 [handler.py:handle_request():124] handle_request: stop_status
|
126 |
+
2021-07-15 17:05:03,543 DEBUG SenderThread:716328 [sender.py:send_request():193] send_request: stop_status
|
127 |
+
2021-07-15 17:05:18,688 DEBUG HandlerThread:716328 [handler.py:handle_request():124] handle_request: stop_status
|
128 |
+
2021-07-15 17:05:18,688 DEBUG SenderThread:716328 [sender.py:send_request():193] send_request: stop_status
|
129 |
+
2021-07-15 17:05:27,251 DEBUG SenderThread:716328 [sender.py:send():179] send: stats
|
130 |
+
2021-07-15 17:05:33,820 DEBUG HandlerThread:716328 [handler.py:handle_request():124] handle_request: stop_status
|
131 |
+
2021-07-15 17:05:33,820 DEBUG SenderThread:716328 [sender.py:send_request():193] send_request: stop_status
|
132 |
+
2021-07-15 17:05:48,955 DEBUG HandlerThread:716328 [handler.py:handle_request():124] handle_request: stop_status
|
133 |
+
2021-07-15 17:05:48,955 DEBUG SenderThread:716328 [sender.py:send_request():193] send_request: stop_status
|
134 |
+
2021-07-15 17:05:57,351 DEBUG SenderThread:716328 [sender.py:send():179] send: stats
|
135 |
+
2021-07-15 17:05:58,008 INFO Thread-8 :716328 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_165555-25rtfw59/files/output.log
|
136 |
+
2021-07-15 17:06:00,008 INFO Thread-8 :716328 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_165555-25rtfw59/files/output.log
|
137 |
+
2021-07-15 17:06:02,009 INFO Thread-8 :716328 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_165555-25rtfw59/files/output.log
|
138 |
+
2021-07-15 17:06:04,010 INFO Thread-8 :716328 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_165555-25rtfw59/files/output.log
|
139 |
+
2021-07-15 17:06:04,086 DEBUG HandlerThread:716328 [handler.py:handle_request():124] handle_request: stop_status
|
140 |
+
2021-07-15 17:06:04,086 DEBUG SenderThread:716328 [sender.py:send_request():193] send_request: stop_status
|
141 |
+
2021-07-15 17:06:06,011 INFO Thread-8 :716328 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_165555-25rtfw59/files/output.log
|
142 |
+
2021-07-15 17:06:17,423 DEBUG SenderThread:716328 [sender.py:send():179] send: history
|
143 |
+
2021-07-15 17:06:17,424 DEBUG SenderThread:716328 [sender.py:send():179] send: summary
|
144 |
+
2021-07-15 17:06:17,424 INFO SenderThread:716328 [sender.py:_save_file():841] saving file wandb-summary.json with policy end
|
145 |
+
2021-07-15 17:06:18,015 INFO Thread-8 :716328 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_165555-25rtfw59/files/output.log
|
146 |
+
2021-07-15 17:06:18,016 INFO Thread-8 :716328 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_165555-25rtfw59/files/wandb-summary.json
|
147 |
+
2021-07-15 17:06:19,240 DEBUG HandlerThread:716328 [handler.py:handle_request():124] handle_request: stop_status
|
148 |
+
2021-07-15 17:06:19,241 DEBUG SenderThread:716328 [sender.py:send_request():193] send_request: stop_status
|
149 |
+
2021-07-15 17:06:20,016 INFO Thread-8 :716328 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_165555-25rtfw59/files/output.log
|
150 |
+
2021-07-15 17:06:22,017 INFO Thread-8 :716328 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_165555-25rtfw59/files/output.log
|
151 |
+
2021-07-15 17:06:24,018 INFO Thread-8 :716328 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_165555-25rtfw59/files/output.log
|
152 |
+
2021-07-15 17:06:26,018 INFO Thread-8 :716328 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_165555-25rtfw59/files/output.log
|
153 |
+
2021-07-15 17:06:27,428 DEBUG SenderThread:716328 [sender.py:send():179] send: stats
|
154 |
+
2021-07-15 17:06:34,378 DEBUG HandlerThread:716328 [handler.py:handle_request():124] handle_request: stop_status
|
155 |
+
2021-07-15 17:06:34,378 DEBUG SenderThread:716328 [sender.py:send_request():193] send_request: stop_status
|
156 |
+
2021-07-15 17:06:37,459 DEBUG SenderThread:716328 [sender.py:send():179] send: history
|
157 |
+
2021-07-15 17:06:37,460 DEBUG SenderThread:716328 [sender.py:send():179] send: summary
|
158 |
+
2021-07-15 17:06:37,460 INFO SenderThread:716328 [sender.py:_save_file():841] saving file wandb-summary.json with policy end
|
159 |
+
2021-07-15 17:06:38,023 INFO Thread-8 :716328 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_165555-25rtfw59/files/wandb-summary.json
|
160 |
+
2021-07-15 17:06:40,024 INFO Thread-8 :716328 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_165555-25rtfw59/files/output.log
|
161 |
+
2021-07-15 17:06:42,025 INFO Thread-8 :716328 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_165555-25rtfw59/files/output.log
|
162 |
+
2021-07-15 17:06:44,025 INFO Thread-8 :716328 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_165555-25rtfw59/files/output.log
|
163 |
+
2021-07-15 17:06:46,026 INFO Thread-8 :716328 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_165555-25rtfw59/files/output.log
|
164 |
+
2021-07-15 17:06:49,510 DEBUG HandlerThread:716328 [handler.py:handle_request():124] handle_request: stop_status
|
165 |
+
2021-07-15 17:06:49,511 DEBUG SenderThread:716328 [sender.py:send_request():193] send_request: stop_status
|
166 |
+
2021-07-15 17:06:57,494 DEBUG SenderThread:716328 [sender.py:send():179] send: history
|
167 |
+
2021-07-15 17:06:57,494 DEBUG SenderThread:716328 [sender.py:send():179] send: summary
|
168 |
+
2021-07-15 17:06:57,496 INFO SenderThread:716328 [sender.py:_save_file():841] saving file wandb-summary.json with policy end
|
169 |
+
2021-07-15 17:06:57,506 DEBUG SenderThread:716328 [sender.py:send():179] send: stats
|
170 |
+
2021-07-15 17:06:58,031 INFO Thread-8 :716328 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_165555-25rtfw59/files/output.log
|
171 |
+
2021-07-15 17:06:58,031 INFO Thread-8 :716328 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_165555-25rtfw59/files/wandb-summary.json
|
172 |
+
2021-07-15 17:06:59,031 INFO Thread-8 :716328 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_165555-25rtfw59/files/output.log
|
173 |
+
2021-07-15 17:07:01,032 INFO Thread-8 :716328 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_165555-25rtfw59/files/output.log
|
174 |
+
2021-07-15 17:07:03,033 INFO Thread-8 :716328 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_165555-25rtfw59/files/output.log
|
175 |
+
2021-07-15 17:07:04,647 DEBUG HandlerThread:716328 [handler.py:handle_request():124] handle_request: stop_status
|
176 |
+
2021-07-15 17:07:04,647 DEBUG SenderThread:716328 [sender.py:send_request():193] send_request: stop_status
|
177 |
+
2021-07-15 17:07:05,034 INFO Thread-8 :716328 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_165555-25rtfw59/files/output.log
|
178 |
+
2021-07-15 17:07:07,034 INFO Thread-8 :716328 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_165555-25rtfw59/files/output.log
|
179 |
+
2021-07-15 17:07:17,535 DEBUG SenderThread:716328 [sender.py:send():179] send: history
|
180 |
+
2021-07-15 17:07:17,535 DEBUG SenderThread:716328 [sender.py:send():179] send: summary
|
181 |
+
2021-07-15 17:07:17,535 INFO SenderThread:716328 [sender.py:_save_file():841] saving file wandb-summary.json with policy end
|
182 |
+
2021-07-15 17:07:18,039 INFO Thread-8 :716328 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_165555-25rtfw59/files/output.log
|
183 |
+
2021-07-15 17:07:18,039 INFO Thread-8 :716328 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_165555-25rtfw59/files/wandb-summary.json
|
184 |
+
2021-07-15 17:07:19,039 INFO Thread-8 :716328 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_165555-25rtfw59/files/output.log
|
185 |
+
2021-07-15 17:07:19,784 DEBUG HandlerThread:716328 [handler.py:handle_request():124] handle_request: stop_status
|
186 |
+
2021-07-15 17:07:19,784 DEBUG SenderThread:716328 [sender.py:send_request():193] send_request: stop_status
|
187 |
+
2021-07-15 17:07:21,040 INFO Thread-8 :716328 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_165555-25rtfw59/files/output.log
|
188 |
+
2021-07-15 17:07:23,041 INFO Thread-8 :716328 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_165555-25rtfw59/files/output.log
|
189 |
+
2021-07-15 17:07:25,042 INFO Thread-8 :716328 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_165555-25rtfw59/files/output.log
|
190 |
+
2021-07-15 17:07:27,042 INFO Thread-8 :716328 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_165555-25rtfw59/files/output.log
|
191 |
+
2021-07-15 17:07:27,581 DEBUG SenderThread:716328 [sender.py:send():179] send: stats
|
192 |
+
2021-07-15 17:07:34,923 DEBUG HandlerThread:716328 [handler.py:handle_request():124] handle_request: stop_status
|
193 |
+
2021-07-15 17:07:34,924 DEBUG SenderThread:716328 [sender.py:send_request():193] send_request: stop_status
|
194 |
+
2021-07-15 17:07:37,578 DEBUG SenderThread:716328 [sender.py:send():179] send: history
|
195 |
+
2021-07-15 17:07:37,579 DEBUG SenderThread:716328 [sender.py:send():179] send: summary
|
196 |
+
2021-07-15 17:07:37,579 INFO SenderThread:716328 [sender.py:_save_file():841] saving file wandb-summary.json with policy end
|
197 |
+
2021-07-15 17:07:38,047 INFO Thread-8 :716328 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_165555-25rtfw59/files/wandb-summary.json
|
198 |
+
2021-07-15 17:07:39,047 INFO Thread-8 :716328 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_165555-25rtfw59/files/output.log
|
199 |
+
2021-07-15 17:07:41,048 INFO Thread-8 :716328 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_165555-25rtfw59/files/output.log
|
200 |
+
2021-07-15 17:07:43,049 INFO Thread-8 :716328 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_165555-25rtfw59/files/output.log
|
201 |
+
2021-07-15 17:07:45,049 INFO Thread-8 :716328 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_165555-25rtfw59/files/output.log
|
202 |
+
2021-07-15 17:07:47,050 INFO Thread-8 :716328 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_165555-25rtfw59/files/output.log
|
203 |
+
2021-07-15 17:07:50,056 DEBUG HandlerThread:716328 [handler.py:handle_request():124] handle_request: stop_status
|
204 |
+
2021-07-15 17:07:50,056 DEBUG SenderThread:716328 [sender.py:send_request():193] send_request: stop_status
|
205 |
+
2021-07-15 17:07:57,615 DEBUG SenderThread:716328 [sender.py:send():179] send: history
|
206 |
+
2021-07-15 17:07:57,615 DEBUG SenderThread:716328 [sender.py:send():179] send: summary
|
207 |
+
2021-07-15 17:07:57,617 INFO SenderThread:716328 [sender.py:_save_file():841] saving file wandb-summary.json with policy end
|
208 |
+
2021-07-15 17:07:57,658 DEBUG SenderThread:716328 [sender.py:send():179] send: stats
|
209 |
+
2021-07-15 17:07:58,055 INFO Thread-8 :716328 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_165555-25rtfw59/files/wandb-summary.json
|
210 |
+
2021-07-15 17:07:59,055 INFO Thread-8 :716328 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_165555-25rtfw59/files/output.log
|
211 |
+
2021-07-15 17:08:01,056 INFO Thread-8 :716328 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_165555-25rtfw59/files/output.log
|
212 |
+
2021-07-15 17:08:03,057 INFO Thread-8 :716328 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_165555-25rtfw59/files/output.log
|
213 |
+
2021-07-15 17:08:05,058 INFO Thread-8 :716328 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_165555-25rtfw59/files/output.log
|
214 |
+
2021-07-15 17:08:05,190 DEBUG HandlerThread:716328 [handler.py:handle_request():124] handle_request: stop_status
|
215 |
+
2021-07-15 17:08:05,191 DEBUG SenderThread:716328 [sender.py:send_request():193] send_request: stop_status
|
216 |
+
2021-07-15 17:08:07,058 INFO Thread-8 :716328 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_165555-25rtfw59/files/output.log
|
217 |
+
2021-07-15 17:08:09,176 WARNING MainThread:716328 [internal.py:wandb_internal():147] Internal process interrupt: 1
|
218 |
+
2021-07-15 17:08:09,428 WARNING MainThread:716328 [internal.py:wandb_internal():147] Internal process interrupt: 2
|
219 |
+
2021-07-15 17:08:09,428 ERROR MainThread:716328 [internal.py:wandb_internal():150] Internal process interrupted.
|
220 |
+
2021-07-15 17:08:10,189 INFO SenderThread:716328 [sender.py:finish():945] shutting down sender
|
221 |
+
2021-07-15 17:08:10,190 INFO WriterThread:716328 [datastore.py:close():288] close: /home/dat/pino-roberta-base/wandb/run-20210715_165555-25rtfw59/run-25rtfw59.wandb
|
222 |
+
2021-07-15 17:08:10,190 INFO SenderThread:716328 [dir_watcher.py:finish():282] shutting down directory watcher
|
223 |
+
2021-07-15 17:08:10,193 INFO HandlerThread:716328 [handler.py:finish():638] shutting down handler
|
224 |
+
2021-07-15 17:08:11,060 INFO SenderThread:716328 [dir_watcher.py:finish():312] scan: /home/dat/pino-roberta-base/wandb/run-20210715_165555-25rtfw59/files
|
225 |
+
2021-07-15 17:08:11,061 INFO SenderThread:716328 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_165555-25rtfw59/files/requirements.txt requirements.txt
|
226 |
+
2021-07-15 17:08:11,061 INFO SenderThread:716328 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_165555-25rtfw59/files/output.log output.log
|
227 |
+
2021-07-15 17:08:11,061 INFO SenderThread:716328 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_165555-25rtfw59/files/wandb-metadata.json wandb-metadata.json
|
228 |
+
2021-07-15 17:08:11,061 INFO SenderThread:716328 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_165555-25rtfw59/files/config.yaml config.yaml
|
229 |
+
2021-07-15 17:08:11,061 INFO SenderThread:716328 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_165555-25rtfw59/files/wandb-summary.json wandb-summary.json
|
230 |
+
2021-07-15 17:08:11,062 INFO SenderThread:716328 [file_pusher.py:finish():177] shutting down file pusher
|
231 |
+
2021-07-15 17:08:11,062 INFO SenderThread:716328 [file_pusher.py:join():182] waiting for file pusher
|
232 |
+
2021-07-15 17:08:11,509 INFO Thread-14 :716328 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210715_165555-25rtfw59/files/config.yaml
|
233 |
+
2021-07-15 17:08:11,528 INFO Thread-13 :716328 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210715_165555-25rtfw59/files/output.log
|
234 |
+
2021-07-15 17:08:11,529 INFO Thread-12 :716328 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210715_165555-25rtfw59/files/requirements.txt
|
235 |
+
2021-07-15 17:08:11,574 INFO Thread-15 :716328 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210715_165555-25rtfw59/files/wandb-summary.json
|
236 |
+
2021-07-15 17:08:12,362 INFO MainThread:716328 [internal.py:handle_exit():78] Internal process exited
|
wandb/run-20210715_165555-25rtfw59/logs/debug.log
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
2021-07-15 16:55:55,785 INFO MainThread:715071 [wandb_setup.py:_flush():69] setting env: {}
|
2 |
+
2021-07-15 16:55:55,785 INFO MainThread:715071 [wandb_setup.py:_flush():69] setting login settings: {}
|
3 |
+
2021-07-15 16:55:55,785 INFO MainThread:715071 [wandb_init.py:_log_setup():337] Logging user logs to /home/dat/pino-roberta-base/wandb/run-20210715_165555-25rtfw59/logs/debug.log
|
4 |
+
2021-07-15 16:55:55,785 INFO MainThread:715071 [wandb_init.py:_log_setup():338] Logging internal logs to /home/dat/pino-roberta-base/wandb/run-20210715_165555-25rtfw59/logs/debug-internal.log
|
5 |
+
2021-07-15 16:55:55,785 INFO MainThread:715071 [wandb_init.py:init():370] calling init triggers
|
6 |
+
2021-07-15 16:55:55,785 INFO MainThread:715071 [wandb_init.py:init():375] wandb.init called with sweep_config: {}
|
7 |
+
config: {}
|
8 |
+
2021-07-15 16:55:55,785 INFO MainThread:715071 [wandb_init.py:init():419] starting backend
|
9 |
+
2021-07-15 16:55:55,785 INFO MainThread:715071 [backend.py:_multiprocessing_setup():70] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
|
10 |
+
2021-07-15 16:55:55,838 INFO MainThread:715071 [backend.py:ensure_launched():135] starting backend process...
|
11 |
+
2021-07-15 16:55:55,890 INFO MainThread:715071 [backend.py:ensure_launched():139] started backend process with pid: 716328
|
12 |
+
2021-07-15 16:55:55,893 INFO MainThread:715071 [wandb_init.py:init():424] backend started and connected
|
13 |
+
2021-07-15 16:55:55,896 INFO MainThread:715071 [wandb_init.py:init():472] updated telemetry
|
14 |
+
2021-07-15 16:55:55,897 INFO MainThread:715071 [wandb_init.py:init():491] communicating current version
|
15 |
+
2021-07-15 16:55:56,559 INFO MainThread:715071 [wandb_init.py:init():496] got version response
|
16 |
+
2021-07-15 16:55:56,560 INFO MainThread:715071 [wandb_init.py:init():504] communicating run to backend with 30 second timeout
|
17 |
+
2021-07-15 16:55:56,733 INFO MainThread:715071 [wandb_init.py:init():529] starting run threads in backend
|
18 |
+
2021-07-15 16:55:57,960 INFO MainThread:715071 [wandb_run.py:_console_start():1623] atexit reg
|
19 |
+
2021-07-15 16:55:57,961 INFO MainThread:715071 [wandb_run.py:_redirect():1497] redirect: SettingsConsole.REDIRECT
|
20 |
+
2021-07-15 16:55:57,961 INFO MainThread:715071 [wandb_run.py:_redirect():1502] Redirecting console.
|
21 |
+
2021-07-15 16:55:57,964 INFO MainThread:715071 [wandb_run.py:_redirect():1558] Redirects installed.
|
22 |
+
2021-07-15 16:55:57,964 INFO MainThread:715071 [wandb_init.py:init():554] run started, returning control to user process
|
23 |
+
2021-07-15 16:55:57,970 INFO MainThread:715071 [wandb_run.py:_config_callback():872] config_cb None None {'output_dir': './', 'overwrite_output_dir': True, 'do_train': False, 'do_eval': False, 'do_predict': False, 'evaluation_strategy': 'IntervalStrategy.NO', 'prediction_loss_only': False, 'per_device_train_batch_size': 1, 'per_device_eval_batch_size': 1, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'learning_rate': 3e-05, 'weight_decay': 0.0095, 'adam_beta1': 0.9, 'adam_beta2': 0.98, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 2.0, 'max_steps': -1, 'lr_scheduler_type': 'SchedulerType.LINEAR', 'warmup_ratio': 0.0, 'warmup_steps': 10000, 'log_level': -1, 'log_level_replica': -1, 'log_on_each_node': True, 'logging_dir': './runs/Jul15_16-55-47_t1v-n-f5c06ea1-w-0', 'logging_strategy': 'IntervalStrategy.STEPS', 'logging_first_step': False, 'logging_steps': 50, 'save_strategy': 'IntervalStrategy.STEPS', 'save_steps': 15000, 'save_total_limit': 20, 'save_on_each_node': False, 'no_cuda': False, 'seed': 42, 'fp16': False, 'fp16_opt_level': 'O1', 'fp16_backend': 'auto', 'fp16_full_eval': False, 'local_rank': -1, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 6000, 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': './', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'sharded_ddp': [], 'deepspeed': None, 'label_smoothing_factor': 0.0, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'dataloader_pin_memory': True, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': './', 'push_to_hub_model_id': '', 'push_to_hub_organization': None, 'push_to_hub_token': None, 'mp_parameters': ''}
|
24 |
+
2021-07-15 16:55:57,972 INFO MainThread:715071 [wandb_run.py:_config_callback():872] config_cb None None {'model_name_or_path': None, 'model_type': 'big_bird', 'config_name': './', 'tokenizer_name': './', 'cache_dir': None, 'use_fast_tokenizer': True, 'dtype': 'float32'}
|
25 |
+
2021-07-15 16:55:57,974 INFO MainThread:715071 [wandb_run.py:_config_callback():872] config_cb None None {'dataset_name': None, 'dataset_config_name': None, 'train_ref_file': None, 'validation_ref_file': None, 'overwrite_cache': False, 'validation_split_percentage': 5, 'max_seq_length': 4096, 'preprocessing_num_workers': 96, 'mlm_probability': 0.15, 'pad_to_max_length': False, 'line_by_line': False, 'max_eval_samples': 4000}
|
26 |
+
2021-07-15 17:08:09,232 INFO MainThread:715071 [wandb_run.py:_atexit_cleanup():1593] got exitcode: 255
|
27 |
+
2021-07-15 17:08:09,232 INFO MainThread:715071 [wandb_run.py:_restore():1565] restore
|
wandb/run-20210715_165555-25rtfw59/run-25rtfw59.wandb
ADDED
Binary file (22.3 kB). View file
|
|
wandb/run-20210715_171007-1mu5szt1/files/config.yaml
ADDED
@@ -0,0 +1,301 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
wandb_version: 1
|
2 |
+
|
3 |
+
_wandb:
|
4 |
+
desc: null
|
5 |
+
value:
|
6 |
+
cli_version: 0.10.33
|
7 |
+
framework: huggingface
|
8 |
+
huggingface_version: 4.9.0.dev0
|
9 |
+
is_jupyter_run: false
|
10 |
+
is_kaggle_kernel: false
|
11 |
+
python_version: 3.8.10
|
12 |
+
t:
|
13 |
+
1:
|
14 |
+
- 3
|
15 |
+
- 11
|
16 |
+
4: 3.8.10
|
17 |
+
5: 0.10.33
|
18 |
+
6: 4.9.0.dev0
|
19 |
+
8:
|
20 |
+
- 5
|
21 |
+
adafactor:
|
22 |
+
desc: null
|
23 |
+
value: false
|
24 |
+
adam_beta1:
|
25 |
+
desc: null
|
26 |
+
value: 0.9
|
27 |
+
adam_beta2:
|
28 |
+
desc: null
|
29 |
+
value: 0.98
|
30 |
+
adam_epsilon:
|
31 |
+
desc: null
|
32 |
+
value: 1.0e-08
|
33 |
+
cache_dir:
|
34 |
+
desc: null
|
35 |
+
value: null
|
36 |
+
config_name:
|
37 |
+
desc: null
|
38 |
+
value: ./
|
39 |
+
dataloader_drop_last:
|
40 |
+
desc: null
|
41 |
+
value: false
|
42 |
+
dataloader_num_workers:
|
43 |
+
desc: null
|
44 |
+
value: 0
|
45 |
+
dataloader_pin_memory:
|
46 |
+
desc: null
|
47 |
+
value: true
|
48 |
+
dataset_config_name:
|
49 |
+
desc: null
|
50 |
+
value: null
|
51 |
+
dataset_name:
|
52 |
+
desc: null
|
53 |
+
value: null
|
54 |
+
ddp_find_unused_parameters:
|
55 |
+
desc: null
|
56 |
+
value: null
|
57 |
+
debug:
|
58 |
+
desc: null
|
59 |
+
value: []
|
60 |
+
deepspeed:
|
61 |
+
desc: null
|
62 |
+
value: null
|
63 |
+
disable_tqdm:
|
64 |
+
desc: null
|
65 |
+
value: false
|
66 |
+
do_eval:
|
67 |
+
desc: null
|
68 |
+
value: false
|
69 |
+
do_predict:
|
70 |
+
desc: null
|
71 |
+
value: false
|
72 |
+
do_train:
|
73 |
+
desc: null
|
74 |
+
value: false
|
75 |
+
dtype:
|
76 |
+
desc: null
|
77 |
+
value: float32
|
78 |
+
eval_accumulation_steps:
|
79 |
+
desc: null
|
80 |
+
value: null
|
81 |
+
eval_steps:
|
82 |
+
desc: null
|
83 |
+
value: 6000
|
84 |
+
evaluation_strategy:
|
85 |
+
desc: null
|
86 |
+
value: IntervalStrategy.NO
|
87 |
+
fp16:
|
88 |
+
desc: null
|
89 |
+
value: false
|
90 |
+
fp16_backend:
|
91 |
+
desc: null
|
92 |
+
value: auto
|
93 |
+
fp16_full_eval:
|
94 |
+
desc: null
|
95 |
+
value: false
|
96 |
+
fp16_opt_level:
|
97 |
+
desc: null
|
98 |
+
value: O1
|
99 |
+
gradient_accumulation_steps:
|
100 |
+
desc: null
|
101 |
+
value: 1
|
102 |
+
greater_is_better:
|
103 |
+
desc: null
|
104 |
+
value: null
|
105 |
+
group_by_length:
|
106 |
+
desc: null
|
107 |
+
value: false
|
108 |
+
ignore_data_skip:
|
109 |
+
desc: null
|
110 |
+
value: false
|
111 |
+
label_names:
|
112 |
+
desc: null
|
113 |
+
value: null
|
114 |
+
label_smoothing_factor:
|
115 |
+
desc: null
|
116 |
+
value: 0.0
|
117 |
+
learning_rate:
|
118 |
+
desc: null
|
119 |
+
value: 3.0e-05
|
120 |
+
length_column_name:
|
121 |
+
desc: null
|
122 |
+
value: length
|
123 |
+
line_by_line:
|
124 |
+
desc: null
|
125 |
+
value: false
|
126 |
+
load_best_model_at_end:
|
127 |
+
desc: null
|
128 |
+
value: false
|
129 |
+
local_rank:
|
130 |
+
desc: null
|
131 |
+
value: -1
|
132 |
+
log_level:
|
133 |
+
desc: null
|
134 |
+
value: -1
|
135 |
+
log_level_replica:
|
136 |
+
desc: null
|
137 |
+
value: -1
|
138 |
+
log_on_each_node:
|
139 |
+
desc: null
|
140 |
+
value: true
|
141 |
+
logging_dir:
|
142 |
+
desc: null
|
143 |
+
value: ./runs/Jul15_17-09-58_t1v-n-f5c06ea1-w-0
|
144 |
+
logging_first_step:
|
145 |
+
desc: null
|
146 |
+
value: false
|
147 |
+
logging_steps:
|
148 |
+
desc: null
|
149 |
+
value: 50
|
150 |
+
logging_strategy:
|
151 |
+
desc: null
|
152 |
+
value: IntervalStrategy.STEPS
|
153 |
+
lr_scheduler_type:
|
154 |
+
desc: null
|
155 |
+
value: SchedulerType.LINEAR
|
156 |
+
max_eval_samples:
|
157 |
+
desc: null
|
158 |
+
value: 4000
|
159 |
+
max_grad_norm:
|
160 |
+
desc: null
|
161 |
+
value: 1.0
|
162 |
+
max_seq_length:
|
163 |
+
desc: null
|
164 |
+
value: 4096
|
165 |
+
max_steps:
|
166 |
+
desc: null
|
167 |
+
value: -1
|
168 |
+
metric_for_best_model:
|
169 |
+
desc: null
|
170 |
+
value: null
|
171 |
+
mlm_probability:
|
172 |
+
desc: null
|
173 |
+
value: 0.15
|
174 |
+
model_name_or_path:
|
175 |
+
desc: null
|
176 |
+
value: null
|
177 |
+
model_type:
|
178 |
+
desc: null
|
179 |
+
value: big_bird
|
180 |
+
mp_parameters:
|
181 |
+
desc: null
|
182 |
+
value: ''
|
183 |
+
no_cuda:
|
184 |
+
desc: null
|
185 |
+
value: false
|
186 |
+
num_train_epochs:
|
187 |
+
desc: null
|
188 |
+
value: 2.0
|
189 |
+
output_dir:
|
190 |
+
desc: null
|
191 |
+
value: ./
|
192 |
+
overwrite_cache:
|
193 |
+
desc: null
|
194 |
+
value: false
|
195 |
+
overwrite_output_dir:
|
196 |
+
desc: null
|
197 |
+
value: true
|
198 |
+
pad_to_max_length:
|
199 |
+
desc: null
|
200 |
+
value: false
|
201 |
+
past_index:
|
202 |
+
desc: null
|
203 |
+
value: -1
|
204 |
+
per_device_eval_batch_size:
|
205 |
+
desc: null
|
206 |
+
value: 1
|
207 |
+
per_device_train_batch_size:
|
208 |
+
desc: null
|
209 |
+
value: 1
|
210 |
+
per_gpu_eval_batch_size:
|
211 |
+
desc: null
|
212 |
+
value: null
|
213 |
+
per_gpu_train_batch_size:
|
214 |
+
desc: null
|
215 |
+
value: null
|
216 |
+
prediction_loss_only:
|
217 |
+
desc: null
|
218 |
+
value: false
|
219 |
+
preprocessing_num_workers:
|
220 |
+
desc: null
|
221 |
+
value: 96
|
222 |
+
push_to_hub:
|
223 |
+
desc: null
|
224 |
+
value: true
|
225 |
+
push_to_hub_model_id:
|
226 |
+
desc: null
|
227 |
+
value: ''
|
228 |
+
push_to_hub_organization:
|
229 |
+
desc: null
|
230 |
+
value: null
|
231 |
+
push_to_hub_token:
|
232 |
+
desc: null
|
233 |
+
value: null
|
234 |
+
remove_unused_columns:
|
235 |
+
desc: null
|
236 |
+
value: true
|
237 |
+
report_to:
|
238 |
+
desc: null
|
239 |
+
value:
|
240 |
+
- tensorboard
|
241 |
+
- wandb
|
242 |
+
resume_from_checkpoint:
|
243 |
+
desc: null
|
244 |
+
value: ./
|
245 |
+
run_name:
|
246 |
+
desc: null
|
247 |
+
value: ./
|
248 |
+
save_on_each_node:
|
249 |
+
desc: null
|
250 |
+
value: false
|
251 |
+
save_steps:
|
252 |
+
desc: null
|
253 |
+
value: 15000
|
254 |
+
save_strategy:
|
255 |
+
desc: null
|
256 |
+
value: IntervalStrategy.STEPS
|
257 |
+
save_total_limit:
|
258 |
+
desc: null
|
259 |
+
value: 20
|
260 |
+
seed:
|
261 |
+
desc: null
|
262 |
+
value: 42
|
263 |
+
sharded_ddp:
|
264 |
+
desc: null
|
265 |
+
value: []
|
266 |
+
skip_memory_metrics:
|
267 |
+
desc: null
|
268 |
+
value: true
|
269 |
+
tokenizer_name:
|
270 |
+
desc: null
|
271 |
+
value: ./
|
272 |
+
tpu_metrics_debug:
|
273 |
+
desc: null
|
274 |
+
value: false
|
275 |
+
tpu_num_cores:
|
276 |
+
desc: null
|
277 |
+
value: null
|
278 |
+
train_ref_file:
|
279 |
+
desc: null
|
280 |
+
value: null
|
281 |
+
use_fast_tokenizer:
|
282 |
+
desc: null
|
283 |
+
value: true
|
284 |
+
use_legacy_prediction_loop:
|
285 |
+
desc: null
|
286 |
+
value: false
|
287 |
+
validation_ref_file:
|
288 |
+
desc: null
|
289 |
+
value: null
|
290 |
+
validation_split_percentage:
|
291 |
+
desc: null
|
292 |
+
value: 5
|
293 |
+
warmup_ratio:
|
294 |
+
desc: null
|
295 |
+
value: 0.0
|
296 |
+
warmup_steps:
|
297 |
+
desc: null
|
298 |
+
value: 10000
|
299 |
+
weight_decay:
|
300 |
+
desc: null
|
301 |
+
value: 0.0095
|
wandb/run-20210715_171007-1mu5szt1/files/output.log
ADDED
@@ -0,0 +1,307 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[17:10:21] - INFO - absl - Restoring checkpoint from ./checkpoint_10000
|
2 |
+
tcmalloc: large alloc 1530273792 bytes == 0x9b524000 @ 0x7f5b75dd5680 0x7f5b75df6824 0x5b9a14 0x50b2ae 0x50cb1b 0x5a6f17 0x5f3010 0x56fd36 0x568d9a 0x5f5b33 0x56aadf 0x568d9a 0x68cdc7 0x67e161 0x67e1df 0x67e281 0x67e627 0x6b6e62 0x6b71ed 0x7f5b75bea0b3 0x5f96de
|
3 |
+
/home/dat/pino/lib/python3.8/site-packages/jax/lib/xla_bridge.py:386: UserWarning: jax.host_count has been renamed to jax.process_count. This alias will eventually be removed; please update your code.
|
4 |
+
warnings.warn(
|
5 |
+
/home/dat/pino/lib/python3.8/site-packages/jax/lib/xla_bridge.py:373: UserWarning: jax.host_id has been renamed to jax.process_index. This alias will eventually be removed; please update your code.
|
6 |
+
warnings.warn(
|
7 |
+
Epoch ... (1/2): 0%| | 0/2 [00:00<?, ?it/s]
|
8 |
+
|
9 |
+
|
10 |
+
|
11 |
+
|
12 |
+
|
13 |
+
|
14 |
+
Training...: 28%|██████████████████████████████████████▌ | 100067/352766 [04:37<16:57:35, 4.14it/s]
|
15 |
+
|
16 |
+
|
17 |
+
|
18 |
+
|
19 |
+
Training...: 28%|██████████████████████████████████████▌ | 100119/352766 [04:57<13:06:22, 5.35it/s]
|
20 |
+
|
21 |
+
|
22 |
+
|
23 |
+
Training...: 28%|██████████████████████████████████████▌ | 100152/352766 [05:03<26:17:15, 2.67it/s]
|
24 |
+
|
25 |
+
|
26 |
+
|
27 |
+
|
28 |
+
Training...: 28%|██████████████████████████████████████▋ | 100202/352766 [05:23<26:02:15, 2.69it/s]
|
29 |
+
|
30 |
+
|
31 |
+
|
32 |
+
|
33 |
+
Training...: 28%|██████████████████████████████████████▋ | 100252/352766 [05:43<26:10:16, 2.68it/s]
|
34 |
+
|
35 |
+
|
36 |
+
|
37 |
+
|
38 |
+
|
39 |
+
Training...: 28%|██████████████████████████████████████▋ | 100317/352766 [06:17<17:05:12, 4.10it/s]
|
40 |
+
|
41 |
+
|
42 |
+
|
43 |
+
Training...: 28%|██████████████████████████████████████▋ | 100352/352766 [06:23<26:12:05, 2.68it/s]
|
44 |
+
|
45 |
+
|
46 |
+
|
47 |
+
|
48 |
+
Training...: 28%|██████████████████████████████████████▋ | 100402/352766 [06:43<26:11:04, 2.68it/s]
|
49 |
+
|
50 |
+
|
51 |
+
|
52 |
+
|
53 |
+
|
54 |
+
Training...: 28%|██████████████████████████████████████▋ | 100469/352766 [07:17<13:12:02, 5.31it/s]
|
55 |
+
|
56 |
+
|
57 |
+
|
58 |
+
Training...: 28%|██████████████████████████████████████▋ | 100502/352766 [07:23<26:09:06, 2.68it/s]
|
59 |
+
|
60 |
+
|
61 |
+
|
62 |
+
|
63 |
+
|
64 |
+
Training...: 29%|██████████████████████████████████████▊ | 100571/352766 [07:58<10:30:33, 6.67it/s]
|
65 |
+
|
66 |
+
|
67 |
+
|
68 |
+
|
69 |
+
Training...: 29%|██████████████████████████████████████▊ | 100619/352766 [08:18<13:30:57, 5.18it/s]
|
70 |
+
|
71 |
+
|
72 |
+
|
73 |
+
Training...: 29%|██████████████████████████████████████▊ | 100652/352766 [08:23<25:56:56, 2.70it/s]
|
74 |
+
|
75 |
+
|
76 |
+
|
77 |
+
|
78 |
+
Training...: 29%|██████████████████████████████████████▊ | 100702/352766 [08:43<26:11:10, 2.67it/s]
|
79 |
+
|
80 |
+
|
81 |
+
|
82 |
+
|
83 |
+
|
84 |
+
Training...: 29%|██████████████████████████████████████▊ | 100771/352766 [09:18<10:27:46, 6.69it/s]
|
85 |
+
|
86 |
+
|
87 |
+
|
88 |
+
|
89 |
+
Training...: 29%|██████████████████████████████████████▊ | 100821/352766 [09:38<10:21:37, 6.75it/s]
|
90 |
+
|
91 |
+
|
92 |
+
|
93 |
+
|
94 |
+
Training...: 29%|███████████████████████████████████████▏ | 100873/352766 [09:58<8:37:24, 8.11it/s]
|
95 |
+
|
96 |
+
|
97 |
+
|
98 |
+
|
99 |
+
Training...: 29%|███████████████████████████████████████▏ | 100923/352766 [10:18<8:39:16, 8.08it/s]
|
100 |
+
|
101 |
+
|
102 |
+
|
103 |
+
Training...: 29%|██████████████████████████████████████▉ | 100952/352766 [10:24<25:57:41, 2.69it/s]
|
104 |
+
|
105 |
+
|
106 |
+
|
107 |
+
|
108 |
+
Training...: 29%|██████████████████████████████████████▉ | 101002/352766 [10:44<26:08:39, 2.67it/s]
|
109 |
+
|
110 |
+
|
111 |
+
|
112 |
+
|
113 |
+
Training...: 29%|██████████████████████████████████████▉ | 101052/352766 [11:04<26:34:55, 2.63it/s]
|
114 |
+
|
115 |
+
|
116 |
+
|
117 |
+
|
118 |
+
Training...: 29%|██████████████████████████████████████▉ | 101102/352766 [11:24<26:07:20, 2.68it/s]
|
119 |
+
|
120 |
+
|
121 |
+
|
122 |
+
|
123 |
+
Training...: 29%|██████████████████████████████████████▉ | 101152/352766 [11:44<25:00:47, 2.79it/s]
|
124 |
+
|
125 |
+
|
126 |
+
|
127 |
+
|
128 |
+
Training...: 29%|███████████████████████████████████████ | 101202/352766 [12:04<26:07:29, 2.67it/s]
|
129 |
+
|
130 |
+
|
131 |
+
|
132 |
+
|
133 |
+
|
134 |
+
Training...: 29%|███████████████████████████████████████ | 101271/352766 [12:38<10:35:05, 6.60it/s]
|
135 |
+
|
136 |
+
|
137 |
+
|
138 |
+
Training...: 29%|███████████████████████████████████████ | 101302/352766 [12:44<25:54:10, 2.70it/s]
|
139 |
+
|
140 |
+
|
141 |
+
|
142 |
+
|
143 |
+
Training...: 29%|███████████████████████████████████████ | 101352/352766 [13:04<26:04:56, 2.68it/s]
|
144 |
+
|
145 |
+
|
146 |
+
|
147 |
+
|
148 |
+
Training...: 29%|███████████████████████████████████████ | 101402/352766 [13:24<25:52:37, 2.70it/s]
|
149 |
+
|
150 |
+
|
151 |
+
|
152 |
+
|
153 |
+
Training...: 29%|███████████████████████████████████████ | 101452/352766 [13:44<26:05:47, 2.68it/s]
|
154 |
+
|
155 |
+
|
156 |
+
|
157 |
+
|
158 |
+
Training...: 29%|███████████████████████████████████████▏ | 101502/352766 [14:04<25:53:30, 2.70it/s]
|
159 |
+
|
160 |
+
|
161 |
+
|
162 |
+
|
163 |
+
Training...: 29%|███████████████████████████████████████▏ | 101552/352766 [14:24<26:05:49, 2.67it/s]
|
164 |
+
|
165 |
+
|
166 |
+
|
167 |
+
|
168 |
+
Training...: 29%|███████████████████████████████████████▏ | 101602/352766 [14:44<25:48:27, 2.70it/s]
|
169 |
+
|
170 |
+
|
171 |
+
|
172 |
+
|
173 |
+
Training...: 29%|███████████████████████████████████████▏ | 101652/352766 [15:04<26:04:33, 2.68it/s]
|
174 |
+
|
175 |
+
|
176 |
+
|
177 |
+
|
178 |
+
Training...: 29%|███████████████████████████████████████▏ | 101702/352766 [15:24<25:57:03, 2.69it/s]
|
179 |
+
|
180 |
+
|
181 |
+
|
182 |
+
|
183 |
+
Training...: 29%|███████████████████████████████████████▏ | 101752/352766 [15:44<26:05:09, 2.67it/s]
|
184 |
+
|
185 |
+
|
186 |
+
|
187 |
+
|
188 |
+
Training...: 29%|███████████████████████████████████████▏ | 101802/352766 [16:04<26:04:34, 2.67it/s]
|
189 |
+
|
190 |
+
|
191 |
+
|
192 |
+
|
193 |
+
Training...: 29%|███████████████████████████████████████▎ | 101852/352766 [16:24<26:03:08, 2.68it/s]
|
194 |
+
|
195 |
+
|
196 |
+
|
197 |
+
|
198 |
+
Training...: 29%|███████████████████████████████████████▎ | 101902/352766 [16:44<26:01:53, 2.68it/s]
|
199 |
+
|
200 |
+
|
201 |
+
|
202 |
+
|
203 |
+
Training...: 29%|███████████████████████████████████████▎ | 101952/352766 [17:04<26:08:22, 2.67it/s]
|
204 |
+
|
205 |
+
|
206 |
+
|
207 |
+
|
208 |
+
Training...: 29%|███████████████████████████████████████▎ | 102002/352766 [17:24<26:02:13, 2.68it/s]
|
209 |
+
|
210 |
+
|
211 |
+
|
212 |
+
|
213 |
+
Training...: 29%|███████████████████████████████████████▎ | 102052/352766 [17:44<25:49:15, 2.70it/s]
|
214 |
+
|
215 |
+
|
216 |
+
|
217 |
+
|
218 |
+
Training...: 29%|███████████████████████████████████████▎ | 102102/352766 [18:05<26:01:33, 2.68it/s]
|
219 |
+
|
220 |
+
|
221 |
+
|
222 |
+
|
223 |
+
Training...: 29%|███████████████████████████████████████▍ | 102152/352766 [18:25<25:46:31, 2.70it/s]
|
224 |
+
|
225 |
+
|
226 |
+
|
227 |
+
|
228 |
+
Training...: 29%|███████████████████████████████████████▍ | 102202/352766 [18:45<26:00:54, 2.68it/s]
|
229 |
+
|
230 |
+
|
231 |
+
|
232 |
+
|
233 |
+
Training...: 29%|███████████████████████████████████████▍ | 102252/352766 [19:05<25:47:08, 2.70it/s]
|
234 |
+
|
235 |
+
|
236 |
+
|
237 |
+
|
238 |
+
Training...: 29%|███████████████████████████████████████▍ | 102302/352766 [19:25<26:00:27, 2.68it/s]
|
239 |
+
|
240 |
+
|
241 |
+
|
242 |
+
|
243 |
+
Training...: 29%|███████████████████████████████████████▍ | 102352/352766 [19:45<25:53:04, 2.69it/s]
|
244 |
+
|
245 |
+
|
246 |
+
|
247 |
+
|
248 |
+
Training...: 29%|███████████████████████████████████████▍ | 102402/352766 [20:05<26:01:48, 2.67it/s]
|
249 |
+
|
250 |
+
|
251 |
+
|
252 |
+
|
253 |
+
Training...: 29%|███████████████████████████████████████▍ | 102452/352766 [20:25<26:00:49, 2.67it/s]
|
254 |
+
|
255 |
+
|
256 |
+
|
257 |
+
|
258 |
+
Training...: 29%|███████████████████████████████████████▌ | 102502/352766 [20:45<25:58:45, 2.68it/s]
|
259 |
+
|
260 |
+
|
261 |
+
|
262 |
+
|
263 |
+
Training...: 29%|██████████████��████████████████████████▌ | 102552/352766 [21:05<25:58:02, 2.68it/s]
|
264 |
+
|
265 |
+
|
266 |
+
|
267 |
+
|
268 |
+
Training...: 29%|███████████████████████████████████████▌ | 102602/352766 [21:25<25:58:39, 2.67it/s]
|
269 |
+
|
270 |
+
|
271 |
+
|
272 |
+
|
273 |
+
Training...: 29%|███████████████████████████████████████▌ | 102652/352766 [21:45<25:59:04, 2.67it/s]
|
274 |
+
|
275 |
+
|
276 |
+
|
277 |
+
|
278 |
+
Training...: 29%|███████████████████████████████████████▌ | 102702/352766 [22:05<25:43:17, 2.70it/s]
|
279 |
+
|
280 |
+
|
281 |
+
|
282 |
+
|
283 |
+
|
284 |
+
Training...: 29%|███████████████████████████████████████▉ | 102777/352766 [22:40<6:58:15, 9.96it/s]
|
285 |
+
|
286 |
+
|
287 |
+
|
288 |
+
|
289 |
+
Training...: 29%|███████████████████████████████████████▉ | 102831/352766 [23:00<5:31:01, 12.58it/s]
|
290 |
+
|
291 |
+
|
292 |
+
|
293 |
+
Training...: 29%|███████████████████████████████████████▋ | 102852/352766 [23:05<25:58:26, 2.67it/s]
|
294 |
+
|
295 |
+
|
296 |
+
|
297 |
+
|
298 |
+
Training...: 29%|███████████████████████████████████████▋ | 102902/352766 [23:25<25:41:12, 2.70it/s]
|
299 |
+
|
300 |
+
|
301 |
+
|
302 |
+
|
303 |
+
|
304 |
+
Training...: 29%|███████████████████████████████████████▉ | 102977/352766 [24:00<6:26:06, 10.78it/s]
|
305 |
+
|
306 |
+
|
307 |
+
|
wandb/run-20210715_171007-1mu5szt1/files/requirements.txt
ADDED
@@ -0,0 +1,94 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
absl-py==0.13.0
|
2 |
+
aiohttp==3.7.4.post0
|
3 |
+
astunparse==1.6.3
|
4 |
+
async-timeout==3.0.1
|
5 |
+
attrs==21.2.0
|
6 |
+
cachetools==4.2.2
|
7 |
+
certifi==2021.5.30
|
8 |
+
chardet==4.0.0
|
9 |
+
charset-normalizer==2.0.1
|
10 |
+
chex==0.0.8
|
11 |
+
click==8.0.1
|
12 |
+
configparser==5.0.2
|
13 |
+
cycler==0.10.0
|
14 |
+
datasets==1.9.1.dev0
|
15 |
+
dill==0.3.4
|
16 |
+
dm-tree==0.1.6
|
17 |
+
docker-pycreds==0.4.0
|
18 |
+
filelock==3.0.12
|
19 |
+
flatbuffers==1.12
|
20 |
+
flax==0.3.4
|
21 |
+
fsspec==2021.7.0
|
22 |
+
gast==0.4.0
|
23 |
+
gitdb==4.0.7
|
24 |
+
gitpython==3.1.18
|
25 |
+
google-auth-oauthlib==0.4.4
|
26 |
+
google-auth==1.32.1
|
27 |
+
google-pasta==0.2.0
|
28 |
+
grpcio==1.34.1
|
29 |
+
h5py==3.1.0
|
30 |
+
huggingface-hub==0.0.12
|
31 |
+
idna==3.2
|
32 |
+
install==1.3.4
|
33 |
+
jax==0.2.17
|
34 |
+
jaxlib==0.1.68
|
35 |
+
joblib==1.0.1
|
36 |
+
keras-nightly==2.5.0.dev2021032900
|
37 |
+
keras-preprocessing==1.1.2
|
38 |
+
kiwisolver==1.3.1
|
39 |
+
libtpu-nightly==0.1.dev20210615
|
40 |
+
markdown==3.3.4
|
41 |
+
matplotlib==3.4.2
|
42 |
+
msgpack==1.0.2
|
43 |
+
multidict==5.1.0
|
44 |
+
multiprocess==0.70.12.2
|
45 |
+
numpy==1.19.5
|
46 |
+
oauthlib==3.1.1
|
47 |
+
opt-einsum==3.3.0
|
48 |
+
optax==0.0.9
|
49 |
+
packaging==21.0
|
50 |
+
pandas==1.3.0
|
51 |
+
pathtools==0.1.2
|
52 |
+
pillow==8.3.1
|
53 |
+
pip==20.0.2
|
54 |
+
pkg-resources==0.0.0
|
55 |
+
promise==2.3
|
56 |
+
protobuf==3.17.3
|
57 |
+
psutil==5.8.0
|
58 |
+
pyarrow==4.0.1
|
59 |
+
pyasn1-modules==0.2.8
|
60 |
+
pyasn1==0.4.8
|
61 |
+
pyparsing==2.4.7
|
62 |
+
python-dateutil==2.8.1
|
63 |
+
pytz==2021.1
|
64 |
+
pyyaml==5.4.1
|
65 |
+
regex==2021.7.6
|
66 |
+
requests-oauthlib==1.3.0
|
67 |
+
requests==2.26.0
|
68 |
+
rsa==4.7.2
|
69 |
+
sacremoses==0.0.45
|
70 |
+
scipy==1.7.0
|
71 |
+
sentry-sdk==1.3.0
|
72 |
+
setuptools==44.0.0
|
73 |
+
shortuuid==1.0.1
|
74 |
+
six==1.15.0
|
75 |
+
smmap==4.0.0
|
76 |
+
subprocess32==3.5.4
|
77 |
+
tensorboard-data-server==0.6.1
|
78 |
+
tensorboard-plugin-wit==1.8.0
|
79 |
+
tensorboard==2.5.0
|
80 |
+
tensorflow-estimator==2.5.0
|
81 |
+
tensorflow==2.5.0
|
82 |
+
termcolor==1.1.0
|
83 |
+
tokenizers==0.10.3
|
84 |
+
toolz==0.11.1
|
85 |
+
tqdm==4.61.2
|
86 |
+
transformers==4.9.0.dev0
|
87 |
+
typing-extensions==3.7.4.3
|
88 |
+
urllib3==1.26.6
|
89 |
+
wandb==0.10.33
|
90 |
+
werkzeug==2.0.1
|
91 |
+
wheel==0.36.2
|
92 |
+
wrapt==1.12.1
|
93 |
+
xxhash==2.0.2
|
94 |
+
yarl==1.6.3
|
wandb/run-20210715_171007-1mu5szt1/files/wandb-metadata.json
ADDED
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"os": "Linux-5.4.0-1043-gcp-x86_64-with-glibc2.29",
|
3 |
+
"python": "3.8.10",
|
4 |
+
"heartbeatAt": "2021-07-15T17:10:09.049159",
|
5 |
+
"startedAt": "2021-07-15T17:10:07.007216",
|
6 |
+
"docker": null,
|
7 |
+
"cpu_count": 96,
|
8 |
+
"cuda": null,
|
9 |
+
"args": [
|
10 |
+
"--push_to_hub",
|
11 |
+
"--output_dir=./",
|
12 |
+
"--model_type=big_bird",
|
13 |
+
"--config_name=./",
|
14 |
+
"--tokenizer_name=./",
|
15 |
+
"--max_seq_length=4096",
|
16 |
+
"--weight_decay=0.0095",
|
17 |
+
"--warmup_steps=10000",
|
18 |
+
"--overwrite_output_dir",
|
19 |
+
"--adam_beta1=0.9",
|
20 |
+
"--adam_beta2=0.98",
|
21 |
+
"--logging_steps=50",
|
22 |
+
"--eval_steps=6000",
|
23 |
+
"--num_train_epochs=2",
|
24 |
+
"--preprocessing_num_workers=96",
|
25 |
+
"--save_steps=15000",
|
26 |
+
"--learning_rate=3e-5",
|
27 |
+
"--per_device_train_batch_size=1",
|
28 |
+
"--per_device_eval_batch_size=1",
|
29 |
+
"--save_total_limit=20",
|
30 |
+
"--max_eval_samples=4000",
|
31 |
+
"--resume_from_checkpoint=./"
|
32 |
+
],
|
33 |
+
"state": "running",
|
34 |
+
"program": "./run_mlm_flax_no_accum.py",
|
35 |
+
"codePath": "run_mlm_flax_no_accum.py",
|
36 |
+
"git": {
|
37 |
+
"remote": "https://huggingface.co/flax-community/pino-roberta-base",
|
38 |
+
"commit": "cc569aecf5e26454416d7a13c7876ad9111120cf"
|
39 |
+
},
|
40 |
+
"email": null,
|
41 |
+
"root": "/home/dat/pino-roberta-base",
|
42 |
+
"host": "t1v-n-f5c06ea1-w-0",
|
43 |
+
"username": "dat",
|
44 |
+
"executable": "/home/dat/pino/bin/python"
|
45 |
+
}
|
wandb/run-20210715_171007-1mu5szt1/files/wandb-summary.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"training_step": 2950, "learning_rate": 2.5990751964855008e-05, "train_loss": 2.4761266708374023, "_runtime": 1780, "_timestamp": 1626370787, "_step": 58}
|
wandb/run-20210715_171007-1mu5szt1/logs/debug-internal.log
ADDED
The diff for this file is too large to render.
See raw diff
|
|
wandb/run-20210715_171007-1mu5szt1/logs/debug.log
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
2021-07-15 17:10:07,008 INFO MainThread:717656 [wandb_setup.py:_flush():69] setting env: {}
|
2 |
+
2021-07-15 17:10:07,008 INFO MainThread:717656 [wandb_setup.py:_flush():69] setting login settings: {}
|
3 |
+
2021-07-15 17:10:07,008 INFO MainThread:717656 [wandb_init.py:_log_setup():337] Logging user logs to /home/dat/pino-roberta-base/wandb/run-20210715_171007-1mu5szt1/logs/debug.log
|
4 |
+
2021-07-15 17:10:07,009 INFO MainThread:717656 [wandb_init.py:_log_setup():338] Logging internal logs to /home/dat/pino-roberta-base/wandb/run-20210715_171007-1mu5szt1/logs/debug-internal.log
|
5 |
+
2021-07-15 17:10:07,009 INFO MainThread:717656 [wandb_init.py:init():370] calling init triggers
|
6 |
+
2021-07-15 17:10:07,009 INFO MainThread:717656 [wandb_init.py:init():375] wandb.init called with sweep_config: {}
|
7 |
+
config: {}
|
8 |
+
2021-07-15 17:10:07,009 INFO MainThread:717656 [wandb_init.py:init():419] starting backend
|
9 |
+
2021-07-15 17:10:07,009 INFO MainThread:717656 [backend.py:_multiprocessing_setup():70] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
|
10 |
+
2021-07-15 17:10:07,057 INFO MainThread:717656 [backend.py:ensure_launched():135] starting backend process...
|
11 |
+
2021-07-15 17:10:07,104 INFO MainThread:717656 [backend.py:ensure_launched():139] started backend process with pid: 719053
|
12 |
+
2021-07-15 17:10:07,106 INFO MainThread:717656 [wandb_init.py:init():424] backend started and connected
|
13 |
+
2021-07-15 17:10:07,109 INFO MainThread:717656 [wandb_init.py:init():472] updated telemetry
|
14 |
+
2021-07-15 17:10:07,110 INFO MainThread:717656 [wandb_init.py:init():491] communicating current version
|
15 |
+
2021-07-15 17:10:07,747 INFO MainThread:717656 [wandb_init.py:init():496] got version response
|
16 |
+
2021-07-15 17:10:07,747 INFO MainThread:717656 [wandb_init.py:init():504] communicating run to backend with 30 second timeout
|
17 |
+
2021-07-15 17:10:07,930 INFO MainThread:717656 [wandb_init.py:init():529] starting run threads in backend
|
18 |
+
2021-07-15 17:10:09,087 INFO MainThread:717656 [wandb_run.py:_console_start():1623] atexit reg
|
19 |
+
2021-07-15 17:10:09,088 INFO MainThread:717656 [wandb_run.py:_redirect():1497] redirect: SettingsConsole.REDIRECT
|
20 |
+
2021-07-15 17:10:09,088 INFO MainThread:717656 [wandb_run.py:_redirect():1502] Redirecting console.
|
21 |
+
2021-07-15 17:10:09,091 INFO MainThread:717656 [wandb_run.py:_redirect():1558] Redirects installed.
|
22 |
+
2021-07-15 17:10:09,091 INFO MainThread:717656 [wandb_init.py:init():554] run started, returning control to user process
|
23 |
+
2021-07-15 17:10:09,097 INFO MainThread:717656 [wandb_run.py:_config_callback():872] config_cb None None {'output_dir': './', 'overwrite_output_dir': True, 'do_train': False, 'do_eval': False, 'do_predict': False, 'evaluation_strategy': 'IntervalStrategy.NO', 'prediction_loss_only': False, 'per_device_train_batch_size': 1, 'per_device_eval_batch_size': 1, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'learning_rate': 3e-05, 'weight_decay': 0.0095, 'adam_beta1': 0.9, 'adam_beta2': 0.98, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 2.0, 'max_steps': -1, 'lr_scheduler_type': 'SchedulerType.LINEAR', 'warmup_ratio': 0.0, 'warmup_steps': 10000, 'log_level': -1, 'log_level_replica': -1, 'log_on_each_node': True, 'logging_dir': './runs/Jul15_17-09-58_t1v-n-f5c06ea1-w-0', 'logging_strategy': 'IntervalStrategy.STEPS', 'logging_first_step': False, 'logging_steps': 50, 'save_strategy': 'IntervalStrategy.STEPS', 'save_steps': 15000, 'save_total_limit': 20, 'save_on_each_node': False, 'no_cuda': False, 'seed': 42, 'fp16': False, 'fp16_opt_level': 'O1', 'fp16_backend': 'auto', 'fp16_full_eval': False, 'local_rank': -1, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 6000, 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': './', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'sharded_ddp': [], 'deepspeed': None, 'label_smoothing_factor': 0.0, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'dataloader_pin_memory': True, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': './', 'push_to_hub_model_id': '', 'push_to_hub_organization': None, 'push_to_hub_token': None, 'mp_parameters': ''}
|
24 |
+
2021-07-15 17:10:09,098 INFO MainThread:717656 [wandb_run.py:_config_callback():872] config_cb None None {'model_name_or_path': None, 'model_type': 'big_bird', 'config_name': './', 'tokenizer_name': './', 'cache_dir': None, 'use_fast_tokenizer': True, 'dtype': 'float32'}
|
25 |
+
2021-07-15 17:10:09,100 INFO MainThread:717656 [wandb_run.py:_config_callback():872] config_cb None None {'dataset_name': None, 'dataset_config_name': None, 'train_ref_file': None, 'validation_ref_file': None, 'overwrite_cache': False, 'validation_split_percentage': 5, 'max_seq_length': 4096, 'preprocessing_num_workers': 96, 'mlm_probability': 0.15, 'pad_to_max_length': False, 'line_by_line': False, 'max_eval_samples': 4000}
|
26 |
+
2021-07-15 17:40:07,255 INFO MainThread:717656 [wandb_run.py:_atexit_cleanup():1593] got exitcode: 255
|
27 |
+
2021-07-15 17:40:07,255 INFO MainThread:717656 [wandb_run.py:_restore():1565] restore
|
wandb/run-20210715_171007-1mu5szt1/run-1mu5szt1.wandb
ADDED
Binary file (108 kB). View file
|
|
wandb/run-20210715_174147-3nkn7hxg/files/config.yaml
ADDED
@@ -0,0 +1,304 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
wandb_version: 1
|
2 |
+
|
3 |
+
_wandb:
|
4 |
+
desc: null
|
5 |
+
value:
|
6 |
+
cli_version: 0.10.33
|
7 |
+
framework: huggingface
|
8 |
+
huggingface_version: 4.9.0.dev0
|
9 |
+
is_jupyter_run: false
|
10 |
+
is_kaggle_kernel: false
|
11 |
+
python_version: 3.8.10
|
12 |
+
t:
|
13 |
+
1:
|
14 |
+
- 3
|
15 |
+
- 11
|
16 |
+
2:
|
17 |
+
- 3
|
18 |
+
- 11
|
19 |
+
4: 3.8.10
|
20 |
+
5: 0.10.33
|
21 |
+
6: 4.9.0.dev0
|
22 |
+
8:
|
23 |
+
- 5
|
24 |
+
adafactor:
|
25 |
+
desc: null
|
26 |
+
value: false
|
27 |
+
adam_beta1:
|
28 |
+
desc: null
|
29 |
+
value: 0.9
|
30 |
+
adam_beta2:
|
31 |
+
desc: null
|
32 |
+
value: 0.98
|
33 |
+
adam_epsilon:
|
34 |
+
desc: null
|
35 |
+
value: 1.0e-08
|
36 |
+
cache_dir:
|
37 |
+
desc: null
|
38 |
+
value: null
|
39 |
+
config_name:
|
40 |
+
desc: null
|
41 |
+
value: ./
|
42 |
+
dataloader_drop_last:
|
43 |
+
desc: null
|
44 |
+
value: false
|
45 |
+
dataloader_num_workers:
|
46 |
+
desc: null
|
47 |
+
value: 0
|
48 |
+
dataloader_pin_memory:
|
49 |
+
desc: null
|
50 |
+
value: true
|
51 |
+
dataset_config_name:
|
52 |
+
desc: null
|
53 |
+
value: null
|
54 |
+
dataset_name:
|
55 |
+
desc: null
|
56 |
+
value: null
|
57 |
+
ddp_find_unused_parameters:
|
58 |
+
desc: null
|
59 |
+
value: null
|
60 |
+
debug:
|
61 |
+
desc: null
|
62 |
+
value: []
|
63 |
+
deepspeed:
|
64 |
+
desc: null
|
65 |
+
value: null
|
66 |
+
disable_tqdm:
|
67 |
+
desc: null
|
68 |
+
value: false
|
69 |
+
do_eval:
|
70 |
+
desc: null
|
71 |
+
value: false
|
72 |
+
do_predict:
|
73 |
+
desc: null
|
74 |
+
value: false
|
75 |
+
do_train:
|
76 |
+
desc: null
|
77 |
+
value: false
|
78 |
+
dtype:
|
79 |
+
desc: null
|
80 |
+
value: float32
|
81 |
+
eval_accumulation_steps:
|
82 |
+
desc: null
|
83 |
+
value: null
|
84 |
+
eval_steps:
|
85 |
+
desc: null
|
86 |
+
value: 6000
|
87 |
+
evaluation_strategy:
|
88 |
+
desc: null
|
89 |
+
value: IntervalStrategy.NO
|
90 |
+
fp16:
|
91 |
+
desc: null
|
92 |
+
value: false
|
93 |
+
fp16_backend:
|
94 |
+
desc: null
|
95 |
+
value: auto
|
96 |
+
fp16_full_eval:
|
97 |
+
desc: null
|
98 |
+
value: false
|
99 |
+
fp16_opt_level:
|
100 |
+
desc: null
|
101 |
+
value: O1
|
102 |
+
gradient_accumulation_steps:
|
103 |
+
desc: null
|
104 |
+
value: 1
|
105 |
+
greater_is_better:
|
106 |
+
desc: null
|
107 |
+
value: null
|
108 |
+
group_by_length:
|
109 |
+
desc: null
|
110 |
+
value: false
|
111 |
+
ignore_data_skip:
|
112 |
+
desc: null
|
113 |
+
value: false
|
114 |
+
label_names:
|
115 |
+
desc: null
|
116 |
+
value: null
|
117 |
+
label_smoothing_factor:
|
118 |
+
desc: null
|
119 |
+
value: 0.0
|
120 |
+
learning_rate:
|
121 |
+
desc: null
|
122 |
+
value: 3.0e-05
|
123 |
+
length_column_name:
|
124 |
+
desc: null
|
125 |
+
value: length
|
126 |
+
line_by_line:
|
127 |
+
desc: null
|
128 |
+
value: false
|
129 |
+
load_best_model_at_end:
|
130 |
+
desc: null
|
131 |
+
value: false
|
132 |
+
local_rank:
|
133 |
+
desc: null
|
134 |
+
value: -1
|
135 |
+
log_level:
|
136 |
+
desc: null
|
137 |
+
value: -1
|
138 |
+
log_level_replica:
|
139 |
+
desc: null
|
140 |
+
value: -1
|
141 |
+
log_on_each_node:
|
142 |
+
desc: null
|
143 |
+
value: true
|
144 |
+
logging_dir:
|
145 |
+
desc: null
|
146 |
+
value: ./runs/Jul15_17-41-39_t1v-n-f5c06ea1-w-0
|
147 |
+
logging_first_step:
|
148 |
+
desc: null
|
149 |
+
value: false
|
150 |
+
logging_steps:
|
151 |
+
desc: null
|
152 |
+
value: 50
|
153 |
+
logging_strategy:
|
154 |
+
desc: null
|
155 |
+
value: IntervalStrategy.STEPS
|
156 |
+
lr_scheduler_type:
|
157 |
+
desc: null
|
158 |
+
value: SchedulerType.LINEAR
|
159 |
+
max_eval_samples:
|
160 |
+
desc: null
|
161 |
+
value: 4000
|
162 |
+
max_grad_norm:
|
163 |
+
desc: null
|
164 |
+
value: 1.0
|
165 |
+
max_seq_length:
|
166 |
+
desc: null
|
167 |
+
value: 4096
|
168 |
+
max_steps:
|
169 |
+
desc: null
|
170 |
+
value: -1
|
171 |
+
metric_for_best_model:
|
172 |
+
desc: null
|
173 |
+
value: null
|
174 |
+
mlm_probability:
|
175 |
+
desc: null
|
176 |
+
value: 0.15
|
177 |
+
model_name_or_path:
|
178 |
+
desc: null
|
179 |
+
value: null
|
180 |
+
model_type:
|
181 |
+
desc: null
|
182 |
+
value: big_bird
|
183 |
+
mp_parameters:
|
184 |
+
desc: null
|
185 |
+
value: ''
|
186 |
+
no_cuda:
|
187 |
+
desc: null
|
188 |
+
value: false
|
189 |
+
num_train_epochs:
|
190 |
+
desc: null
|
191 |
+
value: 2.0
|
192 |
+
output_dir:
|
193 |
+
desc: null
|
194 |
+
value: ./
|
195 |
+
overwrite_cache:
|
196 |
+
desc: null
|
197 |
+
value: false
|
198 |
+
overwrite_output_dir:
|
199 |
+
desc: null
|
200 |
+
value: true
|
201 |
+
pad_to_max_length:
|
202 |
+
desc: null
|
203 |
+
value: false
|
204 |
+
past_index:
|
205 |
+
desc: null
|
206 |
+
value: -1
|
207 |
+
per_device_eval_batch_size:
|
208 |
+
desc: null
|
209 |
+
value: 1
|
210 |
+
per_device_train_batch_size:
|
211 |
+
desc: null
|
212 |
+
value: 1
|
213 |
+
per_gpu_eval_batch_size:
|
214 |
+
desc: null
|
215 |
+
value: null
|
216 |
+
per_gpu_train_batch_size:
|
217 |
+
desc: null
|
218 |
+
value: null
|
219 |
+
prediction_loss_only:
|
220 |
+
desc: null
|
221 |
+
value: false
|
222 |
+
preprocessing_num_workers:
|
223 |
+
desc: null
|
224 |
+
value: 96
|
225 |
+
push_to_hub:
|
226 |
+
desc: null
|
227 |
+
value: true
|
228 |
+
push_to_hub_model_id:
|
229 |
+
desc: null
|
230 |
+
value: ''
|
231 |
+
push_to_hub_organization:
|
232 |
+
desc: null
|
233 |
+
value: null
|
234 |
+
push_to_hub_token:
|
235 |
+
desc: null
|
236 |
+
value: null
|
237 |
+
remove_unused_columns:
|
238 |
+
desc: null
|
239 |
+
value: true
|
240 |
+
report_to:
|
241 |
+
desc: null
|
242 |
+
value:
|
243 |
+
- tensorboard
|
244 |
+
- wandb
|
245 |
+
resume_from_checkpoint:
|
246 |
+
desc: null
|
247 |
+
value: ./
|
248 |
+
run_name:
|
249 |
+
desc: null
|
250 |
+
value: ./
|
251 |
+
save_on_each_node:
|
252 |
+
desc: null
|
253 |
+
value: false
|
254 |
+
save_steps:
|
255 |
+
desc: null
|
256 |
+
value: 15000
|
257 |
+
save_strategy:
|
258 |
+
desc: null
|
259 |
+
value: IntervalStrategy.STEPS
|
260 |
+
save_total_limit:
|
261 |
+
desc: null
|
262 |
+
value: 20
|
263 |
+
seed:
|
264 |
+
desc: null
|
265 |
+
value: 42
|
266 |
+
sharded_ddp:
|
267 |
+
desc: null
|
268 |
+
value: []
|
269 |
+
skip_memory_metrics:
|
270 |
+
desc: null
|
271 |
+
value: true
|
272 |
+
tokenizer_name:
|
273 |
+
desc: null
|
274 |
+
value: ./
|
275 |
+
tpu_metrics_debug:
|
276 |
+
desc: null
|
277 |
+
value: false
|
278 |
+
tpu_num_cores:
|
279 |
+
desc: null
|
280 |
+
value: null
|
281 |
+
train_ref_file:
|
282 |
+
desc: null
|
283 |
+
value: null
|
284 |
+
use_fast_tokenizer:
|
285 |
+
desc: null
|
286 |
+
value: true
|
287 |
+
use_legacy_prediction_loop:
|
288 |
+
desc: null
|
289 |
+
value: false
|
290 |
+
validation_ref_file:
|
291 |
+
desc: null
|
292 |
+
value: null
|
293 |
+
validation_split_percentage:
|
294 |
+
desc: null
|
295 |
+
value: 5
|
296 |
+
warmup_ratio:
|
297 |
+
desc: null
|
298 |
+
value: 0.0
|
299 |
+
warmup_steps:
|
300 |
+
desc: null
|
301 |
+
value: 10000
|
302 |
+
weight_decay:
|
303 |
+
desc: null
|
304 |
+
value: 0.0095
|
wandb/run-20210715_174147-3nkn7hxg/files/output.log
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
[17:42:02] - INFO - absl - Restoring checkpoint from ./checkpoint_10000
|
3 |
+
tcmalloc: large alloc 1530273792 bytes == 0x9a3bc000 @ 0x7ffb11761680 0x7ffb11782824 0x5b9a14 0x50b2ae 0x50cb1b 0x5a6f17 0x5f3010 0x56fd36 0x568d9a 0x5f5b33 0x56aadf 0x568d9a 0x68cdc7 0x67e161 0x67e1df 0x67e281 0x67e627 0x6b6e62 0x6b71ed 0x7ffb115760b3 0x5f96de
|
4 |
+
/home/dat/pino/lib/python3.8/site-packages/jax/lib/xla_bridge.py:386: UserWarning: jax.host_count has been renamed to jax.process_count. This alias will eventually be removed; please update your code.
|
5 |
+
warnings.warn(
|
6 |
+
/home/dat/pino/lib/python3.8/site-packages/jax/lib/xla_bridge.py:373: UserWarning: jax.host_id has been renamed to jax.process_index. This alias will eventually be removed; please update your code.
|
7 |
+
warnings.warn(
|
8 |
+
Epoch ... (1/2): 0%| | 0/2 [05:22<?, ?it/s]
|
9 |
+
Traceback (most recent call last):
|
10 |
+
File "./run_mlm_flax_no_accum.py", line 694, in <module>
|
11 |
+
for step, batch_idx in enumerate(tqdm(train_batch_idx, desc="Training...", position=1,start=resume_step)):
|
12 |
+
File "/home/dat/pino/lib/python3.8/site-packages/tqdm/std.py", line 1015, in __init__
|
13 |
+
raise (
|
14 |
+
tqdm.std.TqdmKeyError: "Unknown argument(s): {'start': 100002}"
|
wandb/run-20210715_174147-3nkn7hxg/files/requirements.txt
ADDED
@@ -0,0 +1,94 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
absl-py==0.13.0
|
2 |
+
aiohttp==3.7.4.post0
|
3 |
+
astunparse==1.6.3
|
4 |
+
async-timeout==3.0.1
|
5 |
+
attrs==21.2.0
|
6 |
+
cachetools==4.2.2
|
7 |
+
certifi==2021.5.30
|
8 |
+
chardet==4.0.0
|
9 |
+
charset-normalizer==2.0.1
|
10 |
+
chex==0.0.8
|
11 |
+
click==8.0.1
|
12 |
+
configparser==5.0.2
|
13 |
+
cycler==0.10.0
|
14 |
+
datasets==1.9.1.dev0
|
15 |
+
dill==0.3.4
|
16 |
+
dm-tree==0.1.6
|
17 |
+
docker-pycreds==0.4.0
|
18 |
+
filelock==3.0.12
|
19 |
+
flatbuffers==1.12
|
20 |
+
flax==0.3.4
|
21 |
+
fsspec==2021.7.0
|
22 |
+
gast==0.4.0
|
23 |
+
gitdb==4.0.7
|
24 |
+
gitpython==3.1.18
|
25 |
+
google-auth-oauthlib==0.4.4
|
26 |
+
google-auth==1.32.1
|
27 |
+
google-pasta==0.2.0
|
28 |
+
grpcio==1.34.1
|
29 |
+
h5py==3.1.0
|
30 |
+
huggingface-hub==0.0.12
|
31 |
+
idna==3.2
|
32 |
+
install==1.3.4
|
33 |
+
jax==0.2.17
|
34 |
+
jaxlib==0.1.68
|
35 |
+
joblib==1.0.1
|
36 |
+
keras-nightly==2.5.0.dev2021032900
|
37 |
+
keras-preprocessing==1.1.2
|
38 |
+
kiwisolver==1.3.1
|
39 |
+
libtpu-nightly==0.1.dev20210615
|
40 |
+
markdown==3.3.4
|
41 |
+
matplotlib==3.4.2
|
42 |
+
msgpack==1.0.2
|
43 |
+
multidict==5.1.0
|
44 |
+
multiprocess==0.70.12.2
|
45 |
+
numpy==1.19.5
|
46 |
+
oauthlib==3.1.1
|
47 |
+
opt-einsum==3.3.0
|
48 |
+
optax==0.0.9
|
49 |
+
packaging==21.0
|
50 |
+
pandas==1.3.0
|
51 |
+
pathtools==0.1.2
|
52 |
+
pillow==8.3.1
|
53 |
+
pip==20.0.2
|
54 |
+
pkg-resources==0.0.0
|
55 |
+
promise==2.3
|
56 |
+
protobuf==3.17.3
|
57 |
+
psutil==5.8.0
|
58 |
+
pyarrow==4.0.1
|
59 |
+
pyasn1-modules==0.2.8
|
60 |
+
pyasn1==0.4.8
|
61 |
+
pyparsing==2.4.7
|
62 |
+
python-dateutil==2.8.1
|
63 |
+
pytz==2021.1
|
64 |
+
pyyaml==5.4.1
|
65 |
+
regex==2021.7.6
|
66 |
+
requests-oauthlib==1.3.0
|
67 |
+
requests==2.26.0
|
68 |
+
rsa==4.7.2
|
69 |
+
sacremoses==0.0.45
|
70 |
+
scipy==1.7.0
|
71 |
+
sentry-sdk==1.3.0
|
72 |
+
setuptools==44.0.0
|
73 |
+
shortuuid==1.0.1
|
74 |
+
six==1.15.0
|
75 |
+
smmap==4.0.0
|
76 |
+
subprocess32==3.5.4
|
77 |
+
tensorboard-data-server==0.6.1
|
78 |
+
tensorboard-plugin-wit==1.8.0
|
79 |
+
tensorboard==2.5.0
|
80 |
+
tensorflow-estimator==2.5.0
|
81 |
+
tensorflow==2.5.0
|
82 |
+
termcolor==1.1.0
|
83 |
+
tokenizers==0.10.3
|
84 |
+
toolz==0.11.1
|
85 |
+
tqdm==4.61.2
|
86 |
+
transformers==4.9.0.dev0
|
87 |
+
typing-extensions==3.7.4.3
|
88 |
+
urllib3==1.26.6
|
89 |
+
wandb==0.10.33
|
90 |
+
werkzeug==2.0.1
|
91 |
+
wheel==0.36.2
|
92 |
+
wrapt==1.12.1
|
93 |
+
xxhash==2.0.2
|
94 |
+
yarl==1.6.3
|
wandb/run-20210715_174147-3nkn7hxg/files/wandb-metadata.json
ADDED
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"os": "Linux-5.4.0-1043-gcp-x86_64-with-glibc2.29",
|
3 |
+
"python": "3.8.10",
|
4 |
+
"heartbeatAt": "2021-07-15T17:41:49.930191",
|
5 |
+
"startedAt": "2021-07-15T17:41:47.904489",
|
6 |
+
"docker": null,
|
7 |
+
"cpu_count": 96,
|
8 |
+
"cuda": null,
|
9 |
+
"args": [
|
10 |
+
"--push_to_hub",
|
11 |
+
"--output_dir=./",
|
12 |
+
"--model_type=big_bird",
|
13 |
+
"--config_name=./",
|
14 |
+
"--tokenizer_name=./",
|
15 |
+
"--max_seq_length=4096",
|
16 |
+
"--weight_decay=0.0095",
|
17 |
+
"--warmup_steps=10000",
|
18 |
+
"--overwrite_output_dir",
|
19 |
+
"--adam_beta1=0.9",
|
20 |
+
"--adam_beta2=0.98",
|
21 |
+
"--logging_steps=50",
|
22 |
+
"--eval_steps=6000",
|
23 |
+
"--num_train_epochs=2",
|
24 |
+
"--preprocessing_num_workers=96",
|
25 |
+
"--save_steps=15000",
|
26 |
+
"--learning_rate=3e-5",
|
27 |
+
"--per_device_train_batch_size=1",
|
28 |
+
"--per_device_eval_batch_size=1",
|
29 |
+
"--save_total_limit=20",
|
30 |
+
"--max_eval_samples=4000",
|
31 |
+
"--resume_from_checkpoint=./"
|
32 |
+
],
|
33 |
+
"state": "running",
|
34 |
+
"program": "./run_mlm_flax_no_accum.py",
|
35 |
+
"codePath": "run_mlm_flax_no_accum.py",
|
36 |
+
"git": {
|
37 |
+
"remote": "https://huggingface.co/flax-community/pino-roberta-base",
|
38 |
+
"commit": "cc569aecf5e26454416d7a13c7876ad9111120cf"
|
39 |
+
},
|
40 |
+
"email": null,
|
41 |
+
"root": "/home/dat/pino-roberta-base",
|
42 |
+
"host": "t1v-n-f5c06ea1-w-0",
|
43 |
+
"username": "dat",
|
44 |
+
"executable": "/home/dat/pino/bin/python"
|
45 |
+
}
|
wandb/run-20210715_174147-3nkn7hxg/files/wandb-summary.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{}
|
wandb/run-20210715_174147-3nkn7hxg/logs/debug-internal.log
ADDED
@@ -0,0 +1,195 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
2021-07-15 17:41:48,594 INFO MainThread:723175 [internal.py:wandb_internal():88] W&B internal server running at pid: 723175, started at: 2021-07-15 17:41:48.594473
|
2 |
+
2021-07-15 17:41:48,596 DEBUG HandlerThread:723175 [handler.py:handle_request():124] handle_request: check_version
|
3 |
+
2021-07-15 17:41:48,597 INFO WriterThread:723175 [datastore.py:open_for_write():80] open: /home/dat/pino-roberta-base/wandb/run-20210715_174147-3nkn7hxg/run-3nkn7hxg.wandb
|
4 |
+
2021-07-15 17:41:48,598 DEBUG SenderThread:723175 [sender.py:send():179] send: header
|
5 |
+
2021-07-15 17:41:48,598 DEBUG SenderThread:723175 [sender.py:send_request():193] send_request: check_version
|
6 |
+
2021-07-15 17:41:48,634 DEBUG SenderThread:723175 [sender.py:send():179] send: run
|
7 |
+
2021-07-15 17:41:48,804 INFO SenderThread:723175 [dir_watcher.py:__init__():168] watching files in: /home/dat/pino-roberta-base/wandb/run-20210715_174147-3nkn7hxg/files
|
8 |
+
2021-07-15 17:41:48,805 INFO SenderThread:723175 [sender.py:_start_run_threads():716] run started: 3nkn7hxg with start time 1626370908
|
9 |
+
2021-07-15 17:41:48,805 DEBUG SenderThread:723175 [sender.py:send():179] send: summary
|
10 |
+
2021-07-15 17:41:48,805 INFO SenderThread:723175 [sender.py:_save_file():841] saving file wandb-summary.json with policy end
|
11 |
+
2021-07-15 17:41:48,805 DEBUG HandlerThread:723175 [handler.py:handle_request():124] handle_request: run_start
|
12 |
+
2021-07-15 17:41:49,817 INFO Thread-8 :723175 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210715_174147-3nkn7hxg/files/wandb-summary.json
|
13 |
+
2021-07-15 17:41:49,929 DEBUG HandlerThread:723175 [meta.py:__init__():39] meta init
|
14 |
+
2021-07-15 17:41:49,930 DEBUG HandlerThread:723175 [meta.py:__init__():53] meta init done
|
15 |
+
2021-07-15 17:41:49,930 DEBUG HandlerThread:723175 [meta.py:probe():210] probe
|
16 |
+
2021-07-15 17:41:49,931 DEBUG HandlerThread:723175 [meta.py:_setup_git():200] setup git
|
17 |
+
2021-07-15 17:41:49,960 DEBUG HandlerThread:723175 [meta.py:_setup_git():207] setup git done
|
18 |
+
2021-07-15 17:41:49,961 DEBUG HandlerThread:723175 [meta.py:_save_pip():57] save pip
|
19 |
+
2021-07-15 17:41:49,961 DEBUG HandlerThread:723175 [meta.py:_save_pip():71] save pip done
|
20 |
+
2021-07-15 17:41:49,961 DEBUG HandlerThread:723175 [meta.py:probe():252] probe done
|
21 |
+
2021-07-15 17:41:49,964 DEBUG SenderThread:723175 [sender.py:send():179] send: files
|
22 |
+
2021-07-15 17:41:49,964 INFO SenderThread:723175 [sender.py:_save_file():841] saving file wandb-metadata.json with policy now
|
23 |
+
2021-07-15 17:41:49,970 DEBUG HandlerThread:723175 [handler.py:handle_request():124] handle_request: stop_status
|
24 |
+
2021-07-15 17:41:49,971 DEBUG SenderThread:723175 [sender.py:send_request():193] send_request: stop_status
|
25 |
+
2021-07-15 17:41:50,099 DEBUG SenderThread:723175 [sender.py:send():179] send: config
|
26 |
+
2021-07-15 17:41:50,100 DEBUG SenderThread:723175 [sender.py:send():179] send: config
|
27 |
+
2021-07-15 17:41:50,100 DEBUG SenderThread:723175 [sender.py:send():179] send: config
|
28 |
+
2021-07-15 17:41:50,404 INFO Thread-11 :723175 [upload_job.py:push():137] Uploaded file /tmp/tmplksa3t0ywandb/5lvlwhj5-wandb-metadata.json
|
29 |
+
2021-07-15 17:41:50,815 INFO Thread-8 :723175 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210715_174147-3nkn7hxg/files/requirements.txt
|
30 |
+
2021-07-15 17:41:50,816 INFO Thread-8 :723175 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210715_174147-3nkn7hxg/files/output.log
|
31 |
+
2021-07-15 17:41:50,816 INFO Thread-8 :723175 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210715_174147-3nkn7hxg/files/wandb-metadata.json
|
32 |
+
2021-07-15 17:42:04,821 INFO Thread-8 :723175 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_174147-3nkn7hxg/files/output.log
|
33 |
+
2021-07-15 17:42:05,202 DEBUG HandlerThread:723175 [handler.py:handle_request():124] handle_request: stop_status
|
34 |
+
2021-07-15 17:42:05,203 DEBUG SenderThread:723175 [sender.py:send_request():193] send_request: stop_status
|
35 |
+
2021-07-15 17:42:06,821 INFO Thread-8 :723175 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_174147-3nkn7hxg/files/output.log
|
36 |
+
2021-07-15 17:42:08,822 INFO Thread-8 :723175 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_174147-3nkn7hxg/files/output.log
|
37 |
+
2021-07-15 17:42:18,016 DEBUG SenderThread:723175 [sender.py:send():179] send: stats
|
38 |
+
2021-07-15 17:42:19,827 INFO Thread-8 :723175 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_174147-3nkn7hxg/files/config.yaml
|
39 |
+
2021-07-15 17:42:20,347 DEBUG HandlerThread:723175 [handler.py:handle_request():124] handle_request: stop_status
|
40 |
+
2021-07-15 17:42:20,347 DEBUG SenderThread:723175 [sender.py:send_request():193] send_request: stop_status
|
41 |
+
2021-07-15 17:42:35,479 DEBUG HandlerThread:723175 [handler.py:handle_request():124] handle_request: stop_status
|
42 |
+
2021-07-15 17:42:35,480 DEBUG SenderThread:723175 [sender.py:send_request():193] send_request: stop_status
|
43 |
+
2021-07-15 17:42:48,096 DEBUG SenderThread:723175 [sender.py:send():179] send: stats
|
44 |
+
2021-07-15 17:42:50,612 DEBUG HandlerThread:723175 [handler.py:handle_request():124] handle_request: stop_status
|
45 |
+
2021-07-15 17:42:50,612 DEBUG SenderThread:723175 [sender.py:send_request():193] send_request: stop_status
|
46 |
+
2021-07-15 17:43:05,746 DEBUG HandlerThread:723175 [handler.py:handle_request():124] handle_request: stop_status
|
47 |
+
2021-07-15 17:43:05,747 DEBUG SenderThread:723175 [sender.py:send_request():193] send_request: stop_status
|
48 |
+
2021-07-15 17:43:18,173 DEBUG SenderThread:723175 [sender.py:send():179] send: stats
|
49 |
+
2021-07-15 17:43:20,877 DEBUG HandlerThread:723175 [handler.py:handle_request():124] handle_request: stop_status
|
50 |
+
2021-07-15 17:43:20,878 DEBUG SenderThread:723175 [sender.py:send_request():193] send_request: stop_status
|
51 |
+
2021-07-15 17:43:36,007 DEBUG HandlerThread:723175 [handler.py:handle_request():124] handle_request: stop_status
|
52 |
+
2021-07-15 17:43:36,008 DEBUG SenderThread:723175 [sender.py:send_request():193] send_request: stop_status
|
53 |
+
2021-07-15 17:43:48,239 DEBUG SenderThread:723175 [sender.py:send():179] send: stats
|
54 |
+
2021-07-15 17:43:51,139 DEBUG HandlerThread:723175 [handler.py:handle_request():124] handle_request: stop_status
|
55 |
+
2021-07-15 17:43:51,139 DEBUG SenderThread:723175 [sender.py:send_request():193] send_request: stop_status
|
56 |
+
2021-07-15 17:44:06,269 DEBUG HandlerThread:723175 [handler.py:handle_request():124] handle_request: stop_status
|
57 |
+
2021-07-15 17:44:06,269 DEBUG SenderThread:723175 [sender.py:send_request():193] send_request: stop_status
|
58 |
+
2021-07-15 17:44:18,311 DEBUG SenderThread:723175 [sender.py:send():179] send: stats
|
59 |
+
2021-07-15 17:44:21,400 DEBUG HandlerThread:723175 [handler.py:handle_request():124] handle_request: stop_status
|
60 |
+
2021-07-15 17:44:21,400 DEBUG SenderThread:723175 [sender.py:send_request():193] send_request: stop_status
|
61 |
+
2021-07-15 17:44:36,530 DEBUG HandlerThread:723175 [handler.py:handle_request():124] handle_request: stop_status
|
62 |
+
2021-07-15 17:44:36,530 DEBUG SenderThread:723175 [sender.py:send_request():193] send_request: stop_status
|
63 |
+
2021-07-15 17:44:48,385 DEBUG SenderThread:723175 [sender.py:send():179] send: stats
|
64 |
+
2021-07-15 17:44:51,661 DEBUG HandlerThread:723175 [handler.py:handle_request():124] handle_request: stop_status
|
65 |
+
2021-07-15 17:44:51,662 DEBUG SenderThread:723175 [sender.py:send_request():193] send_request: stop_status
|
66 |
+
2021-07-15 17:45:06,796 DEBUG HandlerThread:723175 [handler.py:handle_request():124] handle_request: stop_status
|
67 |
+
2021-07-15 17:45:06,796 DEBUG SenderThread:723175 [sender.py:send_request():193] send_request: stop_status
|
68 |
+
2021-07-15 17:45:18,460 DEBUG SenderThread:723175 [sender.py:send():179] send: stats
|
69 |
+
2021-07-15 17:45:21,928 DEBUG HandlerThread:723175 [handler.py:handle_request():124] handle_request: stop_status
|
70 |
+
2021-07-15 17:45:21,928 DEBUG SenderThread:723175 [sender.py:send_request():193] send_request: stop_status
|
71 |
+
2021-07-15 17:45:37,059 DEBUG HandlerThread:723175 [handler.py:handle_request():124] handle_request: stop_status
|
72 |
+
2021-07-15 17:45:37,059 DEBUG SenderThread:723175 [sender.py:send_request():193] send_request: stop_status
|
73 |
+
2021-07-15 17:45:48,530 DEBUG SenderThread:723175 [sender.py:send():179] send: stats
|
74 |
+
2021-07-15 17:45:52,197 DEBUG HandlerThread:723175 [handler.py:handle_request():124] handle_request: stop_status
|
75 |
+
2021-07-15 17:45:52,198 DEBUG SenderThread:723175 [sender.py:send_request():193] send_request: stop_status
|
76 |
+
2021-07-15 17:46:07,329 DEBUG HandlerThread:723175 [handler.py:handle_request():124] handle_request: stop_status
|
77 |
+
2021-07-15 17:46:07,329 DEBUG SenderThread:723175 [sender.py:send_request():193] send_request: stop_status
|
78 |
+
2021-07-15 17:46:18,600 DEBUG SenderThread:723175 [sender.py:send():179] send: stats
|
79 |
+
2021-07-15 17:46:22,464 DEBUG HandlerThread:723175 [handler.py:handle_request():124] handle_request: stop_status
|
80 |
+
2021-07-15 17:46:22,464 DEBUG SenderThread:723175 [sender.py:send_request():193] send_request: stop_status
|
81 |
+
2021-07-15 17:46:37,595 DEBUG HandlerThread:723175 [handler.py:handle_request():124] handle_request: stop_status
|
82 |
+
2021-07-15 17:46:37,595 DEBUG SenderThread:723175 [sender.py:send_request():193] send_request: stop_status
|
83 |
+
2021-07-15 17:46:48,677 DEBUG SenderThread:723175 [sender.py:send():179] send: stats
|
84 |
+
2021-07-15 17:46:52,729 DEBUG HandlerThread:723175 [handler.py:handle_request():124] handle_request: stop_status
|
85 |
+
2021-07-15 17:46:52,729 DEBUG SenderThread:723175 [sender.py:send_request():193] send_request: stop_status
|
86 |
+
2021-07-15 17:47:07,861 DEBUG HandlerThread:723175 [handler.py:handle_request():124] handle_request: stop_status
|
87 |
+
2021-07-15 17:47:07,862 DEBUG SenderThread:723175 [sender.py:send_request():193] send_request: stop_status
|
88 |
+
2021-07-15 17:47:18,753 DEBUG SenderThread:723175 [sender.py:send():179] send: stats
|
89 |
+
2021-07-15 17:47:22,995 DEBUG HandlerThread:723175 [handler.py:handle_request():124] handle_request: stop_status
|
90 |
+
2021-07-15 17:47:22,995 DEBUG SenderThread:723175 [sender.py:send_request():193] send_request: stop_status
|
91 |
+
2021-07-15 17:47:31,673 DEBUG HandlerThread:723175 [handler.py:handle_request():124] handle_request: poll_exit
|
92 |
+
2021-07-15 17:47:31,673 DEBUG SenderThread:723175 [sender.py:send():179] send: telemetry
|
93 |
+
2021-07-15 17:47:31,673 DEBUG SenderThread:723175 [sender.py:send():179] send: exit
|
94 |
+
2021-07-15 17:47:31,673 INFO SenderThread:723175 [sender.py:send_exit():287] handling exit code: 1
|
95 |
+
2021-07-15 17:47:31,674 INFO SenderThread:723175 [sender.py:send_exit():295] send defer
|
96 |
+
2021-07-15 17:47:31,674 DEBUG SenderThread:723175 [sender.py:send_request():193] send_request: poll_exit
|
97 |
+
2021-07-15 17:47:31,674 DEBUG HandlerThread:723175 [handler.py:handle_request():124] handle_request: defer
|
98 |
+
2021-07-15 17:47:31,674 INFO HandlerThread:723175 [handler.py:handle_request_defer():141] handle defer: 0
|
99 |
+
2021-07-15 17:47:31,674 DEBUG SenderThread:723175 [sender.py:send_request():193] send_request: defer
|
100 |
+
2021-07-15 17:47:31,674 INFO SenderThread:723175 [sender.py:send_request_defer():304] handle sender defer: 0
|
101 |
+
2021-07-15 17:47:31,675 INFO SenderThread:723175 [sender.py:transition_state():308] send defer: 1
|
102 |
+
2021-07-15 17:47:31,675 DEBUG HandlerThread:723175 [handler.py:handle_request():124] handle_request: defer
|
103 |
+
2021-07-15 17:47:31,675 INFO HandlerThread:723175 [handler.py:handle_request_defer():141] handle defer: 1
|
104 |
+
2021-07-15 17:47:31,686 DEBUG SenderThread:723175 [sender.py:send_request():193] send_request: defer
|
105 |
+
2021-07-15 17:47:31,686 INFO SenderThread:723175 [sender.py:send_request_defer():304] handle sender defer: 1
|
106 |
+
2021-07-15 17:47:31,686 INFO SenderThread:723175 [sender.py:transition_state():308] send defer: 2
|
107 |
+
2021-07-15 17:47:31,686 DEBUG SenderThread:723175 [sender.py:send():179] send: stats
|
108 |
+
2021-07-15 17:47:31,687 DEBUG HandlerThread:723175 [handler.py:handle_request():124] handle_request: defer
|
109 |
+
2021-07-15 17:47:31,687 INFO HandlerThread:723175 [handler.py:handle_request_defer():141] handle defer: 2
|
110 |
+
2021-07-15 17:47:31,687 DEBUG SenderThread:723175 [sender.py:send_request():193] send_request: defer
|
111 |
+
2021-07-15 17:47:31,687 INFO SenderThread:723175 [sender.py:send_request_defer():304] handle sender defer: 2
|
112 |
+
2021-07-15 17:47:31,687 INFO SenderThread:723175 [sender.py:transition_state():308] send defer: 3
|
113 |
+
2021-07-15 17:47:31,687 DEBUG HandlerThread:723175 [handler.py:handle_request():124] handle_request: defer
|
114 |
+
2021-07-15 17:47:31,687 INFO HandlerThread:723175 [handler.py:handle_request_defer():141] handle defer: 3
|
115 |
+
2021-07-15 17:47:31,687 DEBUG SenderThread:723175 [sender.py:send():179] send: summary
|
116 |
+
2021-07-15 17:47:31,688 INFO SenderThread:723175 [sender.py:_save_file():841] saving file wandb-summary.json with policy end
|
117 |
+
2021-07-15 17:47:31,688 DEBUG SenderThread:723175 [sender.py:send_request():193] send_request: defer
|
118 |
+
2021-07-15 17:47:31,688 INFO SenderThread:723175 [sender.py:send_request_defer():304] handle sender defer: 3
|
119 |
+
2021-07-15 17:47:31,688 INFO SenderThread:723175 [sender.py:transition_state():308] send defer: 4
|
120 |
+
2021-07-15 17:47:31,688 DEBUG HandlerThread:723175 [handler.py:handle_request():124] handle_request: defer
|
121 |
+
2021-07-15 17:47:31,688 INFO HandlerThread:723175 [handler.py:handle_request_defer():141] handle defer: 4
|
122 |
+
2021-07-15 17:47:31,688 DEBUG SenderThread:723175 [sender.py:send_request():193] send_request: defer
|
123 |
+
2021-07-15 17:47:31,688 INFO SenderThread:723175 [sender.py:send_request_defer():304] handle sender defer: 4
|
124 |
+
2021-07-15 17:47:31,776 DEBUG HandlerThread:723175 [handler.py:handle_request():124] handle_request: poll_exit
|
125 |
+
2021-07-15 17:47:31,881 INFO SenderThread:723175 [sender.py:transition_state():308] send defer: 5
|
126 |
+
2021-07-15 17:47:31,881 DEBUG SenderThread:723175 [sender.py:send_request():193] send_request: poll_exit
|
127 |
+
2021-07-15 17:47:31,881 DEBUG HandlerThread:723175 [handler.py:handle_request():124] handle_request: defer
|
128 |
+
2021-07-15 17:47:31,881 INFO HandlerThread:723175 [handler.py:handle_request_defer():141] handle defer: 5
|
129 |
+
2021-07-15 17:47:31,882 DEBUG SenderThread:723175 [sender.py:send_request():193] send_request: defer
|
130 |
+
2021-07-15 17:47:31,882 INFO SenderThread:723175 [sender.py:send_request_defer():304] handle sender defer: 5
|
131 |
+
2021-07-15 17:47:31,882 INFO SenderThread:723175 [dir_watcher.py:finish():282] shutting down directory watcher
|
132 |
+
2021-07-15 17:47:31,891 INFO SenderThread:723175 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_174147-3nkn7hxg/files/config.yaml
|
133 |
+
2021-07-15 17:47:31,891 INFO SenderThread:723175 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_174147-3nkn7hxg/files/wandb-summary.json
|
134 |
+
2021-07-15 17:47:31,891 INFO SenderThread:723175 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_174147-3nkn7hxg/files/output.log
|
135 |
+
2021-07-15 17:47:31,891 INFO SenderThread:723175 [dir_watcher.py:finish():312] scan: /home/dat/pino-roberta-base/wandb/run-20210715_174147-3nkn7hxg/files
|
136 |
+
2021-07-15 17:47:31,891 INFO SenderThread:723175 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_174147-3nkn7hxg/files/requirements.txt requirements.txt
|
137 |
+
2021-07-15 17:47:31,892 INFO SenderThread:723175 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_174147-3nkn7hxg/files/output.log output.log
|
138 |
+
2021-07-15 17:47:31,892 INFO SenderThread:723175 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_174147-3nkn7hxg/files/wandb-metadata.json wandb-metadata.json
|
139 |
+
2021-07-15 17:47:31,892 INFO SenderThread:723175 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_174147-3nkn7hxg/files/config.yaml config.yaml
|
140 |
+
2021-07-15 17:47:31,892 INFO SenderThread:723175 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_174147-3nkn7hxg/files/wandb-summary.json wandb-summary.json
|
141 |
+
2021-07-15 17:47:31,895 INFO SenderThread:723175 [sender.py:transition_state():308] send defer: 6
|
142 |
+
2021-07-15 17:47:31,896 DEBUG HandlerThread:723175 [handler.py:handle_request():124] handle_request: defer
|
143 |
+
2021-07-15 17:47:31,896 INFO HandlerThread:723175 [handler.py:handle_request_defer():141] handle defer: 6
|
144 |
+
2021-07-15 17:47:31,896 DEBUG SenderThread:723175 [sender.py:send_request():193] send_request: defer
|
145 |
+
2021-07-15 17:47:31,896 INFO SenderThread:723175 [sender.py:send_request_defer():304] handle sender defer: 6
|
146 |
+
2021-07-15 17:47:31,896 INFO SenderThread:723175 [file_pusher.py:finish():177] shutting down file pusher
|
147 |
+
2021-07-15 17:47:31,983 DEBUG HandlerThread:723175 [handler.py:handle_request():124] handle_request: poll_exit
|
148 |
+
2021-07-15 17:47:31,983 DEBUG SenderThread:723175 [sender.py:send_request():193] send_request: poll_exit
|
149 |
+
2021-07-15 17:47:32,085 DEBUG HandlerThread:723175 [handler.py:handle_request():124] handle_request: poll_exit
|
150 |
+
2021-07-15 17:47:32,085 DEBUG SenderThread:723175 [sender.py:send_request():193] send_request: poll_exit
|
151 |
+
2021-07-15 17:47:32,187 DEBUG HandlerThread:723175 [handler.py:handle_request():124] handle_request: poll_exit
|
152 |
+
2021-07-15 17:47:32,187 DEBUG SenderThread:723175 [sender.py:send_request():193] send_request: poll_exit
|
153 |
+
2021-07-15 17:47:32,289 DEBUG HandlerThread:723175 [handler.py:handle_request():124] handle_request: poll_exit
|
154 |
+
2021-07-15 17:47:32,289 DEBUG SenderThread:723175 [sender.py:send_request():193] send_request: poll_exit
|
155 |
+
2021-07-15 17:47:32,333 INFO Thread-12 :723175 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210715_174147-3nkn7hxg/files/requirements.txt
|
156 |
+
2021-07-15 17:47:32,347 INFO Thread-13 :723175 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210715_174147-3nkn7hxg/files/output.log
|
157 |
+
2021-07-15 17:47:32,391 DEBUG HandlerThread:723175 [handler.py:handle_request():124] handle_request: poll_exit
|
158 |
+
2021-07-15 17:47:32,391 DEBUG SenderThread:723175 [sender.py:send_request():193] send_request: poll_exit
|
159 |
+
2021-07-15 17:47:32,408 INFO Thread-14 :723175 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210715_174147-3nkn7hxg/files/config.yaml
|
160 |
+
2021-07-15 17:47:32,444 INFO Thread-15 :723175 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210715_174147-3nkn7hxg/files/wandb-summary.json
|
161 |
+
2021-07-15 17:47:32,493 DEBUG HandlerThread:723175 [handler.py:handle_request():124] handle_request: poll_exit
|
162 |
+
2021-07-15 17:47:32,493 DEBUG SenderThread:723175 [sender.py:send_request():193] send_request: poll_exit
|
163 |
+
2021-07-15 17:47:32,595 DEBUG HandlerThread:723175 [handler.py:handle_request():124] handle_request: poll_exit
|
164 |
+
2021-07-15 17:47:32,595 DEBUG SenderThread:723175 [sender.py:send_request():193] send_request: poll_exit
|
165 |
+
2021-07-15 17:47:32,645 INFO Thread-7 :723175 [sender.py:transition_state():308] send defer: 7
|
166 |
+
2021-07-15 17:47:32,645 DEBUG HandlerThread:723175 [handler.py:handle_request():124] handle_request: defer
|
167 |
+
2021-07-15 17:47:32,645 INFO HandlerThread:723175 [handler.py:handle_request_defer():141] handle defer: 7
|
168 |
+
2021-07-15 17:47:32,646 DEBUG SenderThread:723175 [sender.py:send_request():193] send_request: defer
|
169 |
+
2021-07-15 17:47:32,646 INFO SenderThread:723175 [sender.py:send_request_defer():304] handle sender defer: 7
|
170 |
+
2021-07-15 17:47:32,697 DEBUG HandlerThread:723175 [handler.py:handle_request():124] handle_request: poll_exit
|
171 |
+
2021-07-15 17:47:32,951 INFO SenderThread:723175 [sender.py:transition_state():308] send defer: 8
|
172 |
+
2021-07-15 17:47:32,951 DEBUG SenderThread:723175 [sender.py:send_request():193] send_request: poll_exit
|
173 |
+
2021-07-15 17:47:32,952 DEBUG HandlerThread:723175 [handler.py:handle_request():124] handle_request: defer
|
174 |
+
2021-07-15 17:47:32,952 INFO HandlerThread:723175 [handler.py:handle_request_defer():141] handle defer: 8
|
175 |
+
2021-07-15 17:47:32,952 DEBUG SenderThread:723175 [sender.py:send_request():193] send_request: defer
|
176 |
+
2021-07-15 17:47:32,952 INFO SenderThread:723175 [sender.py:send_request_defer():304] handle sender defer: 8
|
177 |
+
2021-07-15 17:47:32,952 INFO SenderThread:723175 [sender.py:transition_state():308] send defer: 9
|
178 |
+
2021-07-15 17:47:32,953 DEBUG HandlerThread:723175 [handler.py:handle_request():124] handle_request: defer
|
179 |
+
2021-07-15 17:47:32,953 INFO HandlerThread:723175 [handler.py:handle_request_defer():141] handle defer: 9
|
180 |
+
2021-07-15 17:47:32,953 DEBUG SenderThread:723175 [sender.py:send():179] send: final
|
181 |
+
2021-07-15 17:47:32,953 DEBUG SenderThread:723175 [sender.py:send():179] send: footer
|
182 |
+
2021-07-15 17:47:32,953 DEBUG SenderThread:723175 [sender.py:send_request():193] send_request: defer
|
183 |
+
2021-07-15 17:47:32,953 INFO SenderThread:723175 [sender.py:send_request_defer():304] handle sender defer: 9
|
184 |
+
2021-07-15 17:47:33,053 DEBUG HandlerThread:723175 [handler.py:handle_request():124] handle_request: poll_exit
|
185 |
+
2021-07-15 17:47:33,053 DEBUG SenderThread:723175 [sender.py:send_request():193] send_request: poll_exit
|
186 |
+
2021-07-15 17:47:33,053 INFO SenderThread:723175 [file_pusher.py:join():182] waiting for file pusher
|
187 |
+
2021-07-15 17:47:33,055 DEBUG HandlerThread:723175 [handler.py:handle_request():124] handle_request: get_summary
|
188 |
+
2021-07-15 17:47:33,055 DEBUG HandlerThread:723175 [handler.py:handle_request():124] handle_request: sampled_history
|
189 |
+
2021-07-15 17:47:33,056 DEBUG HandlerThread:723175 [handler.py:handle_request():124] handle_request: shutdown
|
190 |
+
2021-07-15 17:47:33,056 INFO HandlerThread:723175 [handler.py:finish():638] shutting down handler
|
191 |
+
2021-07-15 17:47:33,953 INFO WriterThread:723175 [datastore.py:close():288] close: /home/dat/pino-roberta-base/wandb/run-20210715_174147-3nkn7hxg/run-3nkn7hxg.wandb
|
192 |
+
2021-07-15 17:47:34,054 INFO SenderThread:723175 [sender.py:finish():945] shutting down sender
|
193 |
+
2021-07-15 17:47:34,054 INFO SenderThread:723175 [file_pusher.py:finish():177] shutting down file pusher
|
194 |
+
2021-07-15 17:47:34,054 INFO SenderThread:723175 [file_pusher.py:join():182] waiting for file pusher
|
195 |
+
2021-07-15 17:47:34,056 INFO MainThread:723175 [internal.py:handle_exit():78] Internal process exited
|
wandb/run-20210715_174147-3nkn7hxg/logs/debug.log
ADDED
@@ -0,0 +1,119 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
2021-07-15 17:41:47,906 INFO MainThread:721922 [wandb_setup.py:_flush():69] setting env: {}
|
2 |
+
2021-07-15 17:41:47,906 INFO MainThread:721922 [wandb_setup.py:_flush():69] setting login settings: {}
|
3 |
+
2021-07-15 17:41:47,906 INFO MainThread:721922 [wandb_init.py:_log_setup():337] Logging user logs to /home/dat/pino-roberta-base/wandb/run-20210715_174147-3nkn7hxg/logs/debug.log
|
4 |
+
2021-07-15 17:41:47,906 INFO MainThread:721922 [wandb_init.py:_log_setup():338] Logging internal logs to /home/dat/pino-roberta-base/wandb/run-20210715_174147-3nkn7hxg/logs/debug-internal.log
|
5 |
+
2021-07-15 17:41:47,906 INFO MainThread:721922 [wandb_init.py:init():370] calling init triggers
|
6 |
+
2021-07-15 17:41:47,906 INFO MainThread:721922 [wandb_init.py:init():375] wandb.init called with sweep_config: {}
|
7 |
+
config: {}
|
8 |
+
2021-07-15 17:41:47,906 INFO MainThread:721922 [wandb_init.py:init():419] starting backend
|
9 |
+
2021-07-15 17:41:47,906 INFO MainThread:721922 [backend.py:_multiprocessing_setup():70] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
|
10 |
+
2021-07-15 17:41:47,954 INFO MainThread:721922 [backend.py:ensure_launched():135] starting backend process...
|
11 |
+
2021-07-15 17:41:48,000 INFO MainThread:721922 [backend.py:ensure_launched():139] started backend process with pid: 723175
|
12 |
+
2021-07-15 17:41:48,002 INFO MainThread:721922 [wandb_init.py:init():424] backend started and connected
|
13 |
+
2021-07-15 17:41:48,005 INFO MainThread:721922 [wandb_init.py:init():472] updated telemetry
|
14 |
+
2021-07-15 17:41:48,006 INFO MainThread:721922 [wandb_init.py:init():491] communicating current version
|
15 |
+
2021-07-15 17:41:48,633 INFO MainThread:721922 [wandb_init.py:init():496] got version response
|
16 |
+
2021-07-15 17:41:48,633 INFO MainThread:721922 [wandb_init.py:init():504] communicating run to backend with 30 second timeout
|
17 |
+
2021-07-15 17:41:48,805 INFO MainThread:721922 [wandb_init.py:init():529] starting run threads in backend
|
18 |
+
2021-07-15 17:41:49,968 INFO MainThread:721922 [wandb_run.py:_console_start():1623] atexit reg
|
19 |
+
2021-07-15 17:41:49,968 INFO MainThread:721922 [wandb_run.py:_redirect():1497] redirect: SettingsConsole.REDIRECT
|
20 |
+
2021-07-15 17:41:49,969 INFO MainThread:721922 [wandb_run.py:_redirect():1502] Redirecting console.
|
21 |
+
2021-07-15 17:41:49,971 INFO MainThread:721922 [wandb_run.py:_redirect():1558] Redirects installed.
|
22 |
+
2021-07-15 17:41:49,971 INFO MainThread:721922 [wandb_init.py:init():554] run started, returning control to user process
|
23 |
+
2021-07-15 17:41:49,977 INFO MainThread:721922 [wandb_run.py:_config_callback():872] config_cb None None {'output_dir': './', 'overwrite_output_dir': True, 'do_train': False, 'do_eval': False, 'do_predict': False, 'evaluation_strategy': 'IntervalStrategy.NO', 'prediction_loss_only': False, 'per_device_train_batch_size': 1, 'per_device_eval_batch_size': 1, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'learning_rate': 3e-05, 'weight_decay': 0.0095, 'adam_beta1': 0.9, 'adam_beta2': 0.98, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 2.0, 'max_steps': -1, 'lr_scheduler_type': 'SchedulerType.LINEAR', 'warmup_ratio': 0.0, 'warmup_steps': 10000, 'log_level': -1, 'log_level_replica': -1, 'log_on_each_node': True, 'logging_dir': './runs/Jul15_17-41-39_t1v-n-f5c06ea1-w-0', 'logging_strategy': 'IntervalStrategy.STEPS', 'logging_first_step': False, 'logging_steps': 50, 'save_strategy': 'IntervalStrategy.STEPS', 'save_steps': 15000, 'save_total_limit': 20, 'save_on_each_node': False, 'no_cuda': False, 'seed': 42, 'fp16': False, 'fp16_opt_level': 'O1', 'fp16_backend': 'auto', 'fp16_full_eval': False, 'local_rank': -1, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 6000, 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': './', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'sharded_ddp': [], 'deepspeed': None, 'label_smoothing_factor': 0.0, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'dataloader_pin_memory': True, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': './', 'push_to_hub_model_id': '', 'push_to_hub_organization': None, 'push_to_hub_token': None, 'mp_parameters': ''}
|
24 |
+
2021-07-15 17:41:49,978 INFO MainThread:721922 [wandb_run.py:_config_callback():872] config_cb None None {'model_name_or_path': None, 'model_type': 'big_bird', 'config_name': './', 'tokenizer_name': './', 'cache_dir': None, 'use_fast_tokenizer': True, 'dtype': 'float32'}
|
25 |
+
2021-07-15 17:41:49,980 INFO MainThread:721922 [wandb_run.py:_config_callback():872] config_cb None None {'dataset_name': None, 'dataset_config_name': None, 'train_ref_file': None, 'validation_ref_file': None, 'overwrite_cache': False, 'validation_split_percentage': 5, 'max_seq_length': 4096, 'preprocessing_num_workers': 96, 'mlm_probability': 0.15, 'pad_to_max_length': False, 'line_by_line': False, 'max_eval_samples': 4000}
|
26 |
+
2021-07-15 17:47:28,749 INFO MainThread:721922 [wandb_run.py:_atexit_cleanup():1593] got exitcode: 1
|
27 |
+
2021-07-15 17:47:28,750 INFO MainThread:721922 [wandb_run.py:_restore():1565] restore
|
28 |
+
2021-07-15 17:47:31,674 INFO MainThread:721922 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
|
29 |
+
wandb_count: 1
|
30 |
+
}
|
31 |
+
pusher_stats {
|
32 |
+
uploaded_bytes: 1415
|
33 |
+
total_bytes: 1415
|
34 |
+
}
|
35 |
+
|
36 |
+
2021-07-15 17:47:31,882 INFO MainThread:721922 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
|
37 |
+
wandb_count: 1
|
38 |
+
}
|
39 |
+
pusher_stats {
|
40 |
+
uploaded_bytes: 1415
|
41 |
+
total_bytes: 1415
|
42 |
+
}
|
43 |
+
|
44 |
+
2021-07-15 17:47:31,984 INFO MainThread:721922 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
|
45 |
+
wandb_count: 5
|
46 |
+
}
|
47 |
+
pusher_stats {
|
48 |
+
uploaded_bytes: 1415
|
49 |
+
total_bytes: 8975
|
50 |
+
}
|
51 |
+
|
52 |
+
2021-07-15 17:47:32,086 INFO MainThread:721922 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
|
53 |
+
wandb_count: 5
|
54 |
+
}
|
55 |
+
pusher_stats {
|
56 |
+
uploaded_bytes: 8975
|
57 |
+
total_bytes: 8975
|
58 |
+
}
|
59 |
+
|
60 |
+
2021-07-15 17:47:32,188 INFO MainThread:721922 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
|
61 |
+
wandb_count: 5
|
62 |
+
}
|
63 |
+
pusher_stats {
|
64 |
+
uploaded_bytes: 8975
|
65 |
+
total_bytes: 8975
|
66 |
+
}
|
67 |
+
|
68 |
+
2021-07-15 17:47:32,290 INFO MainThread:721922 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
|
69 |
+
wandb_count: 5
|
70 |
+
}
|
71 |
+
pusher_stats {
|
72 |
+
uploaded_bytes: 8975
|
73 |
+
total_bytes: 8975
|
74 |
+
}
|
75 |
+
|
76 |
+
2021-07-15 17:47:32,392 INFO MainThread:721922 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
|
77 |
+
wandb_count: 5
|
78 |
+
}
|
79 |
+
pusher_stats {
|
80 |
+
uploaded_bytes: 8975
|
81 |
+
total_bytes: 8975
|
82 |
+
}
|
83 |
+
|
84 |
+
2021-07-15 17:47:32,494 INFO MainThread:721922 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
|
85 |
+
wandb_count: 5
|
86 |
+
}
|
87 |
+
pusher_stats {
|
88 |
+
uploaded_bytes: 8975
|
89 |
+
total_bytes: 8975
|
90 |
+
}
|
91 |
+
|
92 |
+
2021-07-15 17:47:32,596 INFO MainThread:721922 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
|
93 |
+
wandb_count: 5
|
94 |
+
}
|
95 |
+
pusher_stats {
|
96 |
+
uploaded_bytes: 8975
|
97 |
+
total_bytes: 8975
|
98 |
+
}
|
99 |
+
|
100 |
+
2021-07-15 17:47:32,952 INFO MainThread:721922 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
|
101 |
+
wandb_count: 5
|
102 |
+
}
|
103 |
+
pusher_stats {
|
104 |
+
uploaded_bytes: 8975
|
105 |
+
total_bytes: 8975
|
106 |
+
}
|
107 |
+
|
108 |
+
2021-07-15 17:47:33,054 INFO MainThread:721922 [wandb_run.py:_wait_for_finish():1715] got exit ret: done: true
|
109 |
+
exit_result {
|
110 |
+
}
|
111 |
+
file_counts {
|
112 |
+
wandb_count: 5
|
113 |
+
}
|
114 |
+
pusher_stats {
|
115 |
+
uploaded_bytes: 8975
|
116 |
+
total_bytes: 8975
|
117 |
+
}
|
118 |
+
|
119 |
+
2021-07-15 17:47:34,335 INFO MainThread:721922 [wandb_run.py:_show_files():1937] logging synced files
|
wandb/run-20210715_174147-3nkn7hxg/run-3nkn7hxg.wandb
ADDED
Binary file (7.49 kB). View file
|
|
wandb/run-20210715_175147-3lygnexi/files/config.yaml
ADDED
@@ -0,0 +1,301 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
wandb_version: 1
|
2 |
+
|
3 |
+
_wandb:
|
4 |
+
desc: null
|
5 |
+
value:
|
6 |
+
cli_version: 0.10.33
|
7 |
+
framework: huggingface
|
8 |
+
huggingface_version: 4.9.0.dev0
|
9 |
+
is_jupyter_run: false
|
10 |
+
is_kaggle_kernel: false
|
11 |
+
python_version: 3.8.10
|
12 |
+
t:
|
13 |
+
1:
|
14 |
+
- 3
|
15 |
+
- 11
|
16 |
+
4: 3.8.10
|
17 |
+
5: 0.10.33
|
18 |
+
6: 4.9.0.dev0
|
19 |
+
8:
|
20 |
+
- 5
|
21 |
+
adafactor:
|
22 |
+
desc: null
|
23 |
+
value: false
|
24 |
+
adam_beta1:
|
25 |
+
desc: null
|
26 |
+
value: 0.9
|
27 |
+
adam_beta2:
|
28 |
+
desc: null
|
29 |
+
value: 0.98
|
30 |
+
adam_epsilon:
|
31 |
+
desc: null
|
32 |
+
value: 1.0e-08
|
33 |
+
cache_dir:
|
34 |
+
desc: null
|
35 |
+
value: null
|
36 |
+
config_name:
|
37 |
+
desc: null
|
38 |
+
value: ./
|
39 |
+
dataloader_drop_last:
|
40 |
+
desc: null
|
41 |
+
value: false
|
42 |
+
dataloader_num_workers:
|
43 |
+
desc: null
|
44 |
+
value: 0
|
45 |
+
dataloader_pin_memory:
|
46 |
+
desc: null
|
47 |
+
value: true
|
48 |
+
dataset_config_name:
|
49 |
+
desc: null
|
50 |
+
value: null
|
51 |
+
dataset_name:
|
52 |
+
desc: null
|
53 |
+
value: null
|
54 |
+
ddp_find_unused_parameters:
|
55 |
+
desc: null
|
56 |
+
value: null
|
57 |
+
debug:
|
58 |
+
desc: null
|
59 |
+
value: []
|
60 |
+
deepspeed:
|
61 |
+
desc: null
|
62 |
+
value: null
|
63 |
+
disable_tqdm:
|
64 |
+
desc: null
|
65 |
+
value: false
|
66 |
+
do_eval:
|
67 |
+
desc: null
|
68 |
+
value: false
|
69 |
+
do_predict:
|
70 |
+
desc: null
|
71 |
+
value: false
|
72 |
+
do_train:
|
73 |
+
desc: null
|
74 |
+
value: false
|
75 |
+
dtype:
|
76 |
+
desc: null
|
77 |
+
value: float32
|
78 |
+
eval_accumulation_steps:
|
79 |
+
desc: null
|
80 |
+
value: null
|
81 |
+
eval_steps:
|
82 |
+
desc: null
|
83 |
+
value: 6000
|
84 |
+
evaluation_strategy:
|
85 |
+
desc: null
|
86 |
+
value: IntervalStrategy.NO
|
87 |
+
fp16:
|
88 |
+
desc: null
|
89 |
+
value: false
|
90 |
+
fp16_backend:
|
91 |
+
desc: null
|
92 |
+
value: auto
|
93 |
+
fp16_full_eval:
|
94 |
+
desc: null
|
95 |
+
value: false
|
96 |
+
fp16_opt_level:
|
97 |
+
desc: null
|
98 |
+
value: O1
|
99 |
+
gradient_accumulation_steps:
|
100 |
+
desc: null
|
101 |
+
value: 1
|
102 |
+
greater_is_better:
|
103 |
+
desc: null
|
104 |
+
value: null
|
105 |
+
group_by_length:
|
106 |
+
desc: null
|
107 |
+
value: false
|
108 |
+
ignore_data_skip:
|
109 |
+
desc: null
|
110 |
+
value: false
|
111 |
+
label_names:
|
112 |
+
desc: null
|
113 |
+
value: null
|
114 |
+
label_smoothing_factor:
|
115 |
+
desc: null
|
116 |
+
value: 0.0
|
117 |
+
learning_rate:
|
118 |
+
desc: null
|
119 |
+
value: 3.0e-05
|
120 |
+
length_column_name:
|
121 |
+
desc: null
|
122 |
+
value: length
|
123 |
+
line_by_line:
|
124 |
+
desc: null
|
125 |
+
value: false
|
126 |
+
load_best_model_at_end:
|
127 |
+
desc: null
|
128 |
+
value: false
|
129 |
+
local_rank:
|
130 |
+
desc: null
|
131 |
+
value: -1
|
132 |
+
log_level:
|
133 |
+
desc: null
|
134 |
+
value: -1
|
135 |
+
log_level_replica:
|
136 |
+
desc: null
|
137 |
+
value: -1
|
138 |
+
log_on_each_node:
|
139 |
+
desc: null
|
140 |
+
value: true
|
141 |
+
logging_dir:
|
142 |
+
desc: null
|
143 |
+
value: ./runs/Jul15_17-51-39_t1v-n-f5c06ea1-w-0
|
144 |
+
logging_first_step:
|
145 |
+
desc: null
|
146 |
+
value: false
|
147 |
+
logging_steps:
|
148 |
+
desc: null
|
149 |
+
value: 50
|
150 |
+
logging_strategy:
|
151 |
+
desc: null
|
152 |
+
value: IntervalStrategy.STEPS
|
153 |
+
lr_scheduler_type:
|
154 |
+
desc: null
|
155 |
+
value: SchedulerType.LINEAR
|
156 |
+
max_eval_samples:
|
157 |
+
desc: null
|
158 |
+
value: 4000
|
159 |
+
max_grad_norm:
|
160 |
+
desc: null
|
161 |
+
value: 1.0
|
162 |
+
max_seq_length:
|
163 |
+
desc: null
|
164 |
+
value: 4096
|
165 |
+
max_steps:
|
166 |
+
desc: null
|
167 |
+
value: -1
|
168 |
+
metric_for_best_model:
|
169 |
+
desc: null
|
170 |
+
value: null
|
171 |
+
mlm_probability:
|
172 |
+
desc: null
|
173 |
+
value: 0.15
|
174 |
+
model_name_or_path:
|
175 |
+
desc: null
|
176 |
+
value: null
|
177 |
+
model_type:
|
178 |
+
desc: null
|
179 |
+
value: big_bird
|
180 |
+
mp_parameters:
|
181 |
+
desc: null
|
182 |
+
value: ''
|
183 |
+
no_cuda:
|
184 |
+
desc: null
|
185 |
+
value: false
|
186 |
+
num_train_epochs:
|
187 |
+
desc: null
|
188 |
+
value: 2.0
|
189 |
+
output_dir:
|
190 |
+
desc: null
|
191 |
+
value: ./
|
192 |
+
overwrite_cache:
|
193 |
+
desc: null
|
194 |
+
value: false
|
195 |
+
overwrite_output_dir:
|
196 |
+
desc: null
|
197 |
+
value: true
|
198 |
+
pad_to_max_length:
|
199 |
+
desc: null
|
200 |
+
value: false
|
201 |
+
past_index:
|
202 |
+
desc: null
|
203 |
+
value: -1
|
204 |
+
per_device_eval_batch_size:
|
205 |
+
desc: null
|
206 |
+
value: 1
|
207 |
+
per_device_train_batch_size:
|
208 |
+
desc: null
|
209 |
+
value: 1
|
210 |
+
per_gpu_eval_batch_size:
|
211 |
+
desc: null
|
212 |
+
value: null
|
213 |
+
per_gpu_train_batch_size:
|
214 |
+
desc: null
|
215 |
+
value: null
|
216 |
+
prediction_loss_only:
|
217 |
+
desc: null
|
218 |
+
value: false
|
219 |
+
preprocessing_num_workers:
|
220 |
+
desc: null
|
221 |
+
value: 96
|
222 |
+
push_to_hub:
|
223 |
+
desc: null
|
224 |
+
value: true
|
225 |
+
push_to_hub_model_id:
|
226 |
+
desc: null
|
227 |
+
value: ''
|
228 |
+
push_to_hub_organization:
|
229 |
+
desc: null
|
230 |
+
value: null
|
231 |
+
push_to_hub_token:
|
232 |
+
desc: null
|
233 |
+
value: null
|
234 |
+
remove_unused_columns:
|
235 |
+
desc: null
|
236 |
+
value: true
|
237 |
+
report_to:
|
238 |
+
desc: null
|
239 |
+
value:
|
240 |
+
- tensorboard
|
241 |
+
- wandb
|
242 |
+
resume_from_checkpoint:
|
243 |
+
desc: null
|
244 |
+
value: ./
|
245 |
+
run_name:
|
246 |
+
desc: null
|
247 |
+
value: ./
|
248 |
+
save_on_each_node:
|
249 |
+
desc: null
|
250 |
+
value: false
|
251 |
+
save_steps:
|
252 |
+
desc: null
|
253 |
+
value: 15000
|
254 |
+
save_strategy:
|
255 |
+
desc: null
|
256 |
+
value: IntervalStrategy.STEPS
|
257 |
+
save_total_limit:
|
258 |
+
desc: null
|
259 |
+
value: 20
|
260 |
+
seed:
|
261 |
+
desc: null
|
262 |
+
value: 42
|
263 |
+
sharded_ddp:
|
264 |
+
desc: null
|
265 |
+
value: []
|
266 |
+
skip_memory_metrics:
|
267 |
+
desc: null
|
268 |
+
value: true
|
269 |
+
tokenizer_name:
|
270 |
+
desc: null
|
271 |
+
value: ./
|
272 |
+
tpu_metrics_debug:
|
273 |
+
desc: null
|
274 |
+
value: false
|
275 |
+
tpu_num_cores:
|
276 |
+
desc: null
|
277 |
+
value: null
|
278 |
+
train_ref_file:
|
279 |
+
desc: null
|
280 |
+
value: null
|
281 |
+
use_fast_tokenizer:
|
282 |
+
desc: null
|
283 |
+
value: true
|
284 |
+
use_legacy_prediction_loop:
|
285 |
+
desc: null
|
286 |
+
value: false
|
287 |
+
validation_ref_file:
|
288 |
+
desc: null
|
289 |
+
value: null
|
290 |
+
validation_split_percentage:
|
291 |
+
desc: null
|
292 |
+
value: 5
|
293 |
+
warmup_ratio:
|
294 |
+
desc: null
|
295 |
+
value: 0.0
|
296 |
+
warmup_steps:
|
297 |
+
desc: null
|
298 |
+
value: 10000
|
299 |
+
weight_decay:
|
300 |
+
desc: null
|
301 |
+
value: 0.0095
|
wandb/run-20210715_175147-3lygnexi/files/output.log
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
[17:52:01] - INFO - absl - Restoring checkpoint from ./checkpoint_10000
|
3 |
+
tcmalloc: large alloc 1530273792 bytes == 0x9b410000 @ 0x7f6acebf3680 0x7f6acec14824 0x5b9a14 0x50b2ae 0x50cb1b 0x5a6f17 0x5f3010 0x56fd36 0x568d9a 0x5f5b33 0x56aadf 0x568d9a 0x68cdc7 0x67e161 0x67e1df 0x67e281 0x67e627 0x6b6e62 0x6b71ed 0x7f6acea080b3 0x5f96de
|
4 |
+
/home/dat/pino/lib/python3.8/site-packages/jax/lib/xla_bridge.py:386: UserWarning: jax.host_count has been renamed to jax.process_count. This alias will eventually be removed; please update your code.
|
5 |
+
warnings.warn(
|
6 |
+
/home/dat/pino/lib/python3.8/site-packages/jax/lib/xla_bridge.py:373: UserWarning: jax.host_id has been renamed to jax.process_index. This alias will eventually be removed; please update your code.
|
7 |
+
warnings.warn(
|
8 |
+
Epoch ... (1/2): 0%| | 0/2 [00:00<?, ?it/s]
|
9 |
+
|
10 |
+
|
11 |
+
|
12 |
+
|
13 |
+
|
14 |
+
|
15 |
+
|
16 |
+
Training...: 28%|█████████████████▌ | 100059/352766 [04:35<64:36:13, 1.09it/s]
|
17 |
+
|
18 |
+
|
19 |
+
|
20 |
+
|
21 |
+
|
22 |
+
Training...: 28%|█████████████████▌ | 100113/352766 [04:55<31:00:36, 2.26it/s]
|
23 |
+
|
24 |
+
|
25 |
+
|
26 |
+
|
wandb/run-20210715_175147-3lygnexi/files/requirements.txt
ADDED
@@ -0,0 +1,94 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
absl-py==0.13.0
|
2 |
+
aiohttp==3.7.4.post0
|
3 |
+
astunparse==1.6.3
|
4 |
+
async-timeout==3.0.1
|
5 |
+
attrs==21.2.0
|
6 |
+
cachetools==4.2.2
|
7 |
+
certifi==2021.5.30
|
8 |
+
chardet==4.0.0
|
9 |
+
charset-normalizer==2.0.1
|
10 |
+
chex==0.0.8
|
11 |
+
click==8.0.1
|
12 |
+
configparser==5.0.2
|
13 |
+
cycler==0.10.0
|
14 |
+
datasets==1.9.1.dev0
|
15 |
+
dill==0.3.4
|
16 |
+
dm-tree==0.1.6
|
17 |
+
docker-pycreds==0.4.0
|
18 |
+
filelock==3.0.12
|
19 |
+
flatbuffers==1.12
|
20 |
+
flax==0.3.4
|
21 |
+
fsspec==2021.7.0
|
22 |
+
gast==0.4.0
|
23 |
+
gitdb==4.0.7
|
24 |
+
gitpython==3.1.18
|
25 |
+
google-auth-oauthlib==0.4.4
|
26 |
+
google-auth==1.32.1
|
27 |
+
google-pasta==0.2.0
|
28 |
+
grpcio==1.34.1
|
29 |
+
h5py==3.1.0
|
30 |
+
huggingface-hub==0.0.12
|
31 |
+
idna==3.2
|
32 |
+
install==1.3.4
|
33 |
+
jax==0.2.17
|
34 |
+
jaxlib==0.1.68
|
35 |
+
joblib==1.0.1
|
36 |
+
keras-nightly==2.5.0.dev2021032900
|
37 |
+
keras-preprocessing==1.1.2
|
38 |
+
kiwisolver==1.3.1
|
39 |
+
libtpu-nightly==0.1.dev20210615
|
40 |
+
markdown==3.3.4
|
41 |
+
matplotlib==3.4.2
|
42 |
+
msgpack==1.0.2
|
43 |
+
multidict==5.1.0
|
44 |
+
multiprocess==0.70.12.2
|
45 |
+
numpy==1.19.5
|
46 |
+
oauthlib==3.1.1
|
47 |
+
opt-einsum==3.3.0
|
48 |
+
optax==0.0.9
|
49 |
+
packaging==21.0
|
50 |
+
pandas==1.3.0
|
51 |
+
pathtools==0.1.2
|
52 |
+
pillow==8.3.1
|
53 |
+
pip==20.0.2
|
54 |
+
pkg-resources==0.0.0
|
55 |
+
promise==2.3
|
56 |
+
protobuf==3.17.3
|
57 |
+
psutil==5.8.0
|
58 |
+
pyarrow==4.0.1
|
59 |
+
pyasn1-modules==0.2.8
|
60 |
+
pyasn1==0.4.8
|
61 |
+
pyparsing==2.4.7
|
62 |
+
python-dateutil==2.8.1
|
63 |
+
pytz==2021.1
|
64 |
+
pyyaml==5.4.1
|
65 |
+
regex==2021.7.6
|
66 |
+
requests-oauthlib==1.3.0
|
67 |
+
requests==2.26.0
|
68 |
+
rsa==4.7.2
|
69 |
+
sacremoses==0.0.45
|
70 |
+
scipy==1.7.0
|
71 |
+
sentry-sdk==1.3.0
|
72 |
+
setuptools==44.0.0
|
73 |
+
shortuuid==1.0.1
|
74 |
+
six==1.15.0
|
75 |
+
smmap==4.0.0
|
76 |
+
subprocess32==3.5.4
|
77 |
+
tensorboard-data-server==0.6.1
|
78 |
+
tensorboard-plugin-wit==1.8.0
|
79 |
+
tensorboard==2.5.0
|
80 |
+
tensorflow-estimator==2.5.0
|
81 |
+
tensorflow==2.5.0
|
82 |
+
termcolor==1.1.0
|
83 |
+
tokenizers==0.10.3
|
84 |
+
toolz==0.11.1
|
85 |
+
tqdm==4.61.2
|
86 |
+
transformers==4.9.0.dev0
|
87 |
+
typing-extensions==3.7.4.3
|
88 |
+
urllib3==1.26.6
|
89 |
+
wandb==0.10.33
|
90 |
+
werkzeug==2.0.1
|
91 |
+
wheel==0.36.2
|
92 |
+
wrapt==1.12.1
|
93 |
+
xxhash==2.0.2
|
94 |
+
yarl==1.6.3
|
wandb/run-20210715_175147-3lygnexi/files/wandb-metadata.json
ADDED
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"os": "Linux-5.4.0-1043-gcp-x86_64-with-glibc2.29",
|
3 |
+
"python": "3.8.10",
|
4 |
+
"heartbeatAt": "2021-07-15T17:51:49.050579",
|
5 |
+
"startedAt": "2021-07-15T17:51:47.009391",
|
6 |
+
"docker": null,
|
7 |
+
"cpu_count": 96,
|
8 |
+
"cuda": null,
|
9 |
+
"args": [
|
10 |
+
"--push_to_hub",
|
11 |
+
"--output_dir=./",
|
12 |
+
"--model_type=big_bird",
|
13 |
+
"--config_name=./",
|
14 |
+
"--tokenizer_name=./",
|
15 |
+
"--max_seq_length=4096",
|
16 |
+
"--weight_decay=0.0095",
|
17 |
+
"--warmup_steps=10000",
|
18 |
+
"--overwrite_output_dir",
|
19 |
+
"--adam_beta1=0.9",
|
20 |
+
"--adam_beta2=0.98",
|
21 |
+
"--logging_steps=50",
|
22 |
+
"--eval_steps=6000",
|
23 |
+
"--num_train_epochs=2",
|
24 |
+
"--preprocessing_num_workers=96",
|
25 |
+
"--save_steps=15000",
|
26 |
+
"--learning_rate=3e-5",
|
27 |
+
"--per_device_train_batch_size=1",
|
28 |
+
"--per_device_eval_batch_size=1",
|
29 |
+
"--save_total_limit=20",
|
30 |
+
"--max_eval_samples=4000",
|
31 |
+
"--resume_from_checkpoint=./"
|
32 |
+
],
|
33 |
+
"state": "running",
|
34 |
+
"program": "./run_mlm_flax_no_accum.py",
|
35 |
+
"codePath": "run_mlm_flax_no_accum.py",
|
36 |
+
"git": {
|
37 |
+
"remote": "https://huggingface.co/flax-community/pino-roberta-base",
|
38 |
+
"commit": "cc569aecf5e26454416d7a13c7876ad9111120cf"
|
39 |
+
},
|
40 |
+
"email": null,
|
41 |
+
"root": "/home/dat/pino-roberta-base",
|
42 |
+
"host": "t1v-n-f5c06ea1-w-0",
|
43 |
+
"username": "dat",
|
44 |
+
"executable": "/home/dat/pino/bin/python"
|
45 |
+
}
|
wandb/run-20210715_175147-3lygnexi/files/wandb-summary.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"training_step": 100, "learning_rate": 2.6113679268746637e-05, "train_loss": 2.3843164443969727, "_runtime": 635, "_timestamp": 1626372142, "_step": 1}
|
wandb/run-20210715_175147-3lygnexi/logs/debug-internal.log
ADDED
@@ -0,0 +1,170 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
2021-07-15 17:51:47,712 INFO MainThread:725632 [internal.py:wandb_internal():88] W&B internal server running at pid: 725632, started at: 2021-07-15 17:51:47.712407
|
2 |
+
2021-07-15 17:51:47,714 DEBUG HandlerThread:725632 [handler.py:handle_request():124] handle_request: check_version
|
3 |
+
2021-07-15 17:51:47,714 INFO WriterThread:725632 [datastore.py:open_for_write():80] open: /home/dat/pino-roberta-base/wandb/run-20210715_175147-3lygnexi/run-3lygnexi.wandb
|
4 |
+
2021-07-15 17:51:47,715 DEBUG SenderThread:725632 [sender.py:send():179] send: header
|
5 |
+
2021-07-15 17:51:47,716 DEBUG SenderThread:725632 [sender.py:send_request():193] send_request: check_version
|
6 |
+
2021-07-15 17:51:47,753 DEBUG SenderThread:725632 [sender.py:send():179] send: run
|
7 |
+
2021-07-15 17:51:47,932 INFO SenderThread:725632 [dir_watcher.py:__init__():168] watching files in: /home/dat/pino-roberta-base/wandb/run-20210715_175147-3lygnexi/files
|
8 |
+
2021-07-15 17:51:47,933 INFO SenderThread:725632 [sender.py:_start_run_threads():716] run started: 3lygnexi with start time 1626371507
|
9 |
+
2021-07-15 17:51:47,933 DEBUG SenderThread:725632 [sender.py:send():179] send: summary
|
10 |
+
2021-07-15 17:51:47,933 INFO SenderThread:725632 [sender.py:_save_file():841] saving file wandb-summary.json with policy end
|
11 |
+
2021-07-15 17:51:47,933 DEBUG HandlerThread:725632 [handler.py:handle_request():124] handle_request: run_start
|
12 |
+
2021-07-15 17:51:48,936 INFO Thread-8 :725632 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210715_175147-3lygnexi/files/wandb-summary.json
|
13 |
+
2021-07-15 17:51:49,050 DEBUG HandlerThread:725632 [meta.py:__init__():39] meta init
|
14 |
+
2021-07-15 17:51:49,050 DEBUG HandlerThread:725632 [meta.py:__init__():53] meta init done
|
15 |
+
2021-07-15 17:51:49,050 DEBUG HandlerThread:725632 [meta.py:probe():210] probe
|
16 |
+
2021-07-15 17:51:49,051 DEBUG HandlerThread:725632 [meta.py:_setup_git():200] setup git
|
17 |
+
2021-07-15 17:51:49,081 DEBUG HandlerThread:725632 [meta.py:_setup_git():207] setup git done
|
18 |
+
2021-07-15 17:51:49,081 DEBUG HandlerThread:725632 [meta.py:_save_pip():57] save pip
|
19 |
+
2021-07-15 17:51:49,082 DEBUG HandlerThread:725632 [meta.py:_save_pip():71] save pip done
|
20 |
+
2021-07-15 17:51:49,082 DEBUG HandlerThread:725632 [meta.py:probe():252] probe done
|
21 |
+
2021-07-15 17:51:49,085 DEBUG SenderThread:725632 [sender.py:send():179] send: files
|
22 |
+
2021-07-15 17:51:49,085 INFO SenderThread:725632 [sender.py:_save_file():841] saving file wandb-metadata.json with policy now
|
23 |
+
2021-07-15 17:51:49,091 DEBUG HandlerThread:725632 [handler.py:handle_request():124] handle_request: stop_status
|
24 |
+
2021-07-15 17:51:49,092 DEBUG SenderThread:725632 [sender.py:send_request():193] send_request: stop_status
|
25 |
+
2021-07-15 17:51:49,220 DEBUG SenderThread:725632 [sender.py:send():179] send: config
|
26 |
+
2021-07-15 17:51:49,221 DEBUG SenderThread:725632 [sender.py:send():179] send: config
|
27 |
+
2021-07-15 17:51:49,221 DEBUG SenderThread:725632 [sender.py:send():179] send: config
|
28 |
+
2021-07-15 17:51:49,586 INFO Thread-11 :725632 [upload_job.py:push():137] Uploaded file /tmp/tmprw9uts3owandb/626w6hbr-wandb-metadata.json
|
29 |
+
2021-07-15 17:51:49,934 INFO Thread-8 :725632 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210715_175147-3lygnexi/files/output.log
|
30 |
+
2021-07-15 17:51:49,935 INFO Thread-8 :725632 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210715_175147-3lygnexi/files/requirements.txt
|
31 |
+
2021-07-15 17:51:49,935 INFO Thread-8 :725632 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210715_175147-3lygnexi/files/wandb-metadata.json
|
32 |
+
2021-07-15 17:52:04,352 DEBUG HandlerThread:725632 [handler.py:handle_request():124] handle_request: stop_status
|
33 |
+
2021-07-15 17:52:04,352 DEBUG SenderThread:725632 [sender.py:send_request():193] send_request: stop_status
|
34 |
+
2021-07-15 17:52:04,940 INFO Thread-8 :725632 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_175147-3lygnexi/files/output.log
|
35 |
+
2021-07-15 17:52:06,941 INFO Thread-8 :725632 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_175147-3lygnexi/files/output.log
|
36 |
+
2021-07-15 17:52:17,133 DEBUG SenderThread:725632 [sender.py:send():179] send: stats
|
37 |
+
2021-07-15 17:52:18,946 INFO Thread-8 :725632 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_175147-3lygnexi/files/config.yaml
|
38 |
+
2021-07-15 17:52:19,500 DEBUG HandlerThread:725632 [handler.py:handle_request():124] handle_request: stop_status
|
39 |
+
2021-07-15 17:52:19,500 DEBUG SenderThread:725632 [sender.py:send_request():193] send_request: stop_status
|
40 |
+
2021-07-15 17:52:34,650 DEBUG HandlerThread:725632 [handler.py:handle_request():124] handle_request: stop_status
|
41 |
+
2021-07-15 17:52:34,651 DEBUG SenderThread:725632 [sender.py:send_request():193] send_request: stop_status
|
42 |
+
2021-07-15 17:52:47,211 DEBUG SenderThread:725632 [sender.py:send():179] send: stats
|
43 |
+
2021-07-15 17:52:49,782 DEBUG HandlerThread:725632 [handler.py:handle_request():124] handle_request: stop_status
|
44 |
+
2021-07-15 17:52:49,782 DEBUG SenderThread:725632 [sender.py:send_request():193] send_request: stop_status
|
45 |
+
2021-07-15 17:53:04,916 DEBUG HandlerThread:725632 [handler.py:handle_request():124] handle_request: stop_status
|
46 |
+
2021-07-15 17:53:04,916 DEBUG SenderThread:725632 [sender.py:send_request():193] send_request: stop_status
|
47 |
+
2021-07-15 17:53:17,287 DEBUG SenderThread:725632 [sender.py:send():179] send: stats
|
48 |
+
2021-07-15 17:53:20,049 DEBUG HandlerThread:725632 [handler.py:handle_request():124] handle_request: stop_status
|
49 |
+
2021-07-15 17:53:20,049 DEBUG SenderThread:725632 [sender.py:send_request():193] send_request: stop_status
|
50 |
+
2021-07-15 17:53:35,180 DEBUG HandlerThread:725632 [handler.py:handle_request():124] handle_request: stop_status
|
51 |
+
2021-07-15 17:53:35,180 DEBUG SenderThread:725632 [sender.py:send_request():193] send_request: stop_status
|
52 |
+
2021-07-15 17:53:47,359 DEBUG SenderThread:725632 [sender.py:send():179] send: stats
|
53 |
+
2021-07-15 17:53:50,321 DEBUG HandlerThread:725632 [handler.py:handle_request():124] handle_request: stop_status
|
54 |
+
2021-07-15 17:53:50,322 DEBUG SenderThread:725632 [sender.py:send_request():193] send_request: stop_status
|
55 |
+
2021-07-15 17:54:05,456 DEBUG HandlerThread:725632 [handler.py:handle_request():124] handle_request: stop_status
|
56 |
+
2021-07-15 17:54:05,456 DEBUG SenderThread:725632 [sender.py:send_request():193] send_request: stop_status
|
57 |
+
2021-07-15 17:54:17,434 DEBUG SenderThread:725632 [sender.py:send():179] send: stats
|
58 |
+
2021-07-15 17:54:20,594 DEBUG HandlerThread:725632 [handler.py:handle_request():124] handle_request: stop_status
|
59 |
+
2021-07-15 17:54:20,595 DEBUG SenderThread:725632 [sender.py:send_request():193] send_request: stop_status
|
60 |
+
2021-07-15 17:54:35,726 DEBUG HandlerThread:725632 [handler.py:handle_request():124] handle_request: stop_status
|
61 |
+
2021-07-15 17:54:35,726 DEBUG SenderThread:725632 [sender.py:send_request():193] send_request: stop_status
|
62 |
+
2021-07-15 17:54:47,513 DEBUG SenderThread:725632 [sender.py:send():179] send: stats
|
63 |
+
2021-07-15 17:54:50,859 DEBUG HandlerThread:725632 [handler.py:handle_request():124] handle_request: stop_status
|
64 |
+
2021-07-15 17:54:50,859 DEBUG SenderThread:725632 [sender.py:send_request():193] send_request: stop_status
|
65 |
+
2021-07-15 17:55:05,995 DEBUG HandlerThread:725632 [handler.py:handle_request():124] handle_request: stop_status
|
66 |
+
2021-07-15 17:55:05,995 DEBUG SenderThread:725632 [sender.py:send_request():193] send_request: stop_status
|
67 |
+
2021-07-15 17:55:17,578 DEBUG SenderThread:725632 [sender.py:send():179] send: stats
|
68 |
+
2021-07-15 17:55:21,130 DEBUG HandlerThread:725632 [handler.py:handle_request():124] handle_request: stop_status
|
69 |
+
2021-07-15 17:55:21,131 DEBUG SenderThread:725632 [sender.py:send_request():193] send_request: stop_status
|
70 |
+
2021-07-15 17:55:36,263 DEBUG HandlerThread:725632 [handler.py:handle_request():124] handle_request: stop_status
|
71 |
+
2021-07-15 17:55:36,263 DEBUG SenderThread:725632 [sender.py:send_request():193] send_request: stop_status
|
72 |
+
2021-07-15 17:55:47,636 DEBUG SenderThread:725632 [sender.py:send():179] send: stats
|
73 |
+
2021-07-15 17:55:51,395 DEBUG HandlerThread:725632 [handler.py:handle_request():124] handle_request: stop_status
|
74 |
+
2021-07-15 17:55:51,396 DEBUG SenderThread:725632 [sender.py:send_request():193] send_request: stop_status
|
75 |
+
2021-07-15 17:56:06,527 DEBUG HandlerThread:725632 [handler.py:handle_request():124] handle_request: stop_status
|
76 |
+
2021-07-15 17:56:06,528 DEBUG SenderThread:725632 [sender.py:send_request():193] send_request: stop_status
|
77 |
+
2021-07-15 17:56:17,705 DEBUG SenderThread:725632 [sender.py:send():179] send: stats
|
78 |
+
2021-07-15 17:56:21,658 DEBUG HandlerThread:725632 [handler.py:handle_request():124] handle_request: stop_status
|
79 |
+
2021-07-15 17:56:21,658 DEBUG SenderThread:725632 [sender.py:send_request():193] send_request: stop_status
|
80 |
+
2021-07-15 17:56:36,787 DEBUG HandlerThread:725632 [handler.py:handle_request():124] handle_request: stop_status
|
81 |
+
2021-07-15 17:56:36,787 DEBUG SenderThread:725632 [sender.py:send_request():193] send_request: stop_status
|
82 |
+
2021-07-15 17:56:47,777 DEBUG SenderThread:725632 [sender.py:send():179] send: stats
|
83 |
+
2021-07-15 17:56:51,920 DEBUG HandlerThread:725632 [handler.py:handle_request():124] handle_request: stop_status
|
84 |
+
2021-07-15 17:56:51,920 DEBUG SenderThread:725632 [sender.py:send_request():193] send_request: stop_status
|
85 |
+
2021-07-15 17:57:07,051 DEBUG HandlerThread:725632 [handler.py:handle_request():124] handle_request: stop_status
|
86 |
+
2021-07-15 17:57:07,051 DEBUG SenderThread:725632 [sender.py:send_request():193] send_request: stop_status
|
87 |
+
2021-07-15 17:57:17,846 DEBUG SenderThread:725632 [sender.py:send():179] send: stats
|
88 |
+
2021-07-15 17:57:22,182 DEBUG HandlerThread:725632 [handler.py:handle_request():124] handle_request: stop_status
|
89 |
+
2021-07-15 17:57:22,182 DEBUG SenderThread:725632 [sender.py:send_request():193] send_request: stop_status
|
90 |
+
2021-07-15 17:57:29,057 INFO Thread-8 :725632 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_175147-3lygnexi/files/output.log
|
91 |
+
2021-07-15 17:57:37,329 DEBUG HandlerThread:725632 [handler.py:handle_request():124] handle_request: stop_status
|
92 |
+
2021-07-15 17:57:37,330 DEBUG SenderThread:725632 [sender.py:send_request():193] send_request: stop_status
|
93 |
+
2021-07-15 17:57:47,918 DEBUG SenderThread:725632 [sender.py:send():179] send: stats
|
94 |
+
2021-07-15 17:57:52,478 DEBUG HandlerThread:725632 [handler.py:handle_request():124] handle_request: stop_status
|
95 |
+
2021-07-15 17:57:52,478 DEBUG SenderThread:725632 [sender.py:send_request():193] send_request: stop_status
|
96 |
+
2021-07-15 17:58:07,606 DEBUG HandlerThread:725632 [handler.py:handle_request():124] handle_request: stop_status
|
97 |
+
2021-07-15 17:58:07,607 DEBUG SenderThread:725632 [sender.py:send_request():193] send_request: stop_status
|
98 |
+
2021-07-15 17:58:17,984 DEBUG SenderThread:725632 [sender.py:send():179] send: stats
|
99 |
+
2021-07-15 17:58:22,739 DEBUG HandlerThread:725632 [handler.py:handle_request():124] handle_request: stop_status
|
100 |
+
2021-07-15 17:58:22,739 DEBUG SenderThread:725632 [sender.py:send_request():193] send_request: stop_status
|
101 |
+
2021-07-15 17:58:37,872 DEBUG HandlerThread:725632 [handler.py:handle_request():124] handle_request: stop_status
|
102 |
+
2021-07-15 17:58:37,872 DEBUG SenderThread:725632 [sender.py:send_request():193] send_request: stop_status
|
103 |
+
2021-07-15 17:58:48,054 DEBUG SenderThread:725632 [sender.py:send():179] send: stats
|
104 |
+
2021-07-15 17:58:53,004 DEBUG HandlerThread:725632 [handler.py:handle_request():124] handle_request: stop_status
|
105 |
+
2021-07-15 17:58:53,005 DEBUG SenderThread:725632 [sender.py:send_request():193] send_request: stop_status
|
106 |
+
2021-07-15 17:59:08,136 DEBUG HandlerThread:725632 [handler.py:handle_request():124] handle_request: stop_status
|
107 |
+
2021-07-15 17:59:08,136 DEBUG SenderThread:725632 [sender.py:send_request():193] send_request: stop_status
|
108 |
+
2021-07-15 17:59:18,132 DEBUG SenderThread:725632 [sender.py:send():179] send: stats
|
109 |
+
2021-07-15 17:59:23,266 DEBUG HandlerThread:725632 [handler.py:handle_request():124] handle_request: stop_status
|
110 |
+
2021-07-15 17:59:23,266 DEBUG SenderThread:725632 [sender.py:send_request():193] send_request: stop_status
|
111 |
+
2021-07-15 17:59:35,104 INFO Thread-8 :725632 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_175147-3lygnexi/files/output.log
|
112 |
+
2021-07-15 17:59:38,608 DEBUG HandlerThread:725632 [handler.py:handle_request():124] handle_request: stop_status
|
113 |
+
2021-07-15 17:59:38,608 DEBUG SenderThread:725632 [sender.py:send_request():193] send_request: stop_status
|
114 |
+
2021-07-15 17:59:48,205 DEBUG SenderThread:725632 [sender.py:send():179] send: stats
|
115 |
+
2021-07-15 17:59:53,754 DEBUG HandlerThread:725632 [handler.py:handle_request():124] handle_request: stop_status
|
116 |
+
2021-07-15 17:59:53,755 DEBUG SenderThread:725632 [sender.py:send_request():193] send_request: stop_status
|
117 |
+
2021-07-15 18:00:08,886 DEBUG HandlerThread:725632 [handler.py:handle_request():124] handle_request: stop_status
|
118 |
+
2021-07-15 18:00:08,887 DEBUG SenderThread:725632 [sender.py:send_request():193] send_request: stop_status
|
119 |
+
2021-07-15 18:00:18,277 DEBUG SenderThread:725632 [sender.py:send():179] send: stats
|
120 |
+
2021-07-15 18:00:24,017 DEBUG HandlerThread:725632 [handler.py:handle_request():124] handle_request: stop_status
|
121 |
+
2021-07-15 18:00:24,018 DEBUG SenderThread:725632 [sender.py:send_request():193] send_request: stop_status
|
122 |
+
2021-07-15 18:00:39,149 DEBUG HandlerThread:725632 [handler.py:handle_request():124] handle_request: stop_status
|
123 |
+
2021-07-15 18:00:39,149 DEBUG SenderThread:725632 [sender.py:send_request():193] send_request: stop_status
|
124 |
+
2021-07-15 18:00:48,356 DEBUG SenderThread:725632 [sender.py:send():179] send: stats
|
125 |
+
2021-07-15 18:00:54,282 DEBUG HandlerThread:725632 [handler.py:handle_request():124] handle_request: stop_status
|
126 |
+
2021-07-15 18:00:54,283 DEBUG SenderThread:725632 [sender.py:send_request():193] send_request: stop_status
|
127 |
+
2021-07-15 18:01:09,424 DEBUG HandlerThread:725632 [handler.py:handle_request():124] handle_request: stop_status
|
128 |
+
2021-07-15 18:01:09,424 DEBUG SenderThread:725632 [sender.py:send_request():193] send_request: stop_status
|
129 |
+
2021-07-15 18:01:18,438 DEBUG SenderThread:725632 [sender.py:send():179] send: stats
|
130 |
+
2021-07-15 18:01:24,555 DEBUG HandlerThread:725632 [handler.py:handle_request():124] handle_request: stop_status
|
131 |
+
2021-07-15 18:01:24,555 DEBUG SenderThread:725632 [sender.py:send_request():193] send_request: stop_status
|
132 |
+
2021-07-15 18:01:39,687 DEBUG HandlerThread:725632 [handler.py:handle_request():124] handle_request: stop_status
|
133 |
+
2021-07-15 18:01:39,688 DEBUG SenderThread:725632 [sender.py:send_request():193] send_request: stop_status
|
134 |
+
2021-07-15 18:01:44,155 INFO Thread-8 :725632 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_175147-3lygnexi/files/output.log
|
135 |
+
2021-07-15 18:01:46,156 INFO Thread-8 :725632 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_175147-3lygnexi/files/output.log
|
136 |
+
2021-07-15 18:01:48,157 INFO Thread-8 :725632 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_175147-3lygnexi/files/output.log
|
137 |
+
2021-07-15 18:01:48,515 DEBUG SenderThread:725632 [sender.py:send():179] send: stats
|
138 |
+
2021-07-15 18:01:50,158 INFO Thread-8 :725632 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_175147-3lygnexi/files/output.log
|
139 |
+
2021-07-15 18:01:52,159 INFO Thread-8 :725632 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_175147-3lygnexi/files/output.log
|
140 |
+
2021-07-15 18:01:54,817 DEBUG HandlerThread:725632 [handler.py:handle_request():124] handle_request: stop_status
|
141 |
+
2021-07-15 18:01:54,818 DEBUG SenderThread:725632 [sender.py:send_request():193] send_request: stop_status
|
142 |
+
2021-07-15 18:02:02,507 DEBUG SenderThread:725632 [sender.py:send():179] send: history
|
143 |
+
2021-07-15 18:02:02,507 DEBUG SenderThread:725632 [sender.py:send():179] send: summary
|
144 |
+
2021-07-15 18:02:02,507 INFO SenderThread:725632 [sender.py:_save_file():841] saving file wandb-summary.json with policy end
|
145 |
+
2021-07-15 18:02:03,163 INFO Thread-8 :725632 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_175147-3lygnexi/files/wandb-summary.json
|
146 |
+
2021-07-15 18:02:04,163 INFO Thread-8 :725632 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_175147-3lygnexi/files/output.log
|
147 |
+
2021-07-15 18:02:05,164 INFO Thread-8 :725632 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_175147-3lygnexi/files/output.log
|
148 |
+
2021-07-15 18:02:06,164 INFO Thread-8 :725632 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_175147-3lygnexi/files/output.log
|
149 |
+
2021-07-15 18:02:08,165 INFO Thread-8 :725632 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_175147-3lygnexi/files/output.log
|
150 |
+
2021-07-15 18:02:09,950 DEBUG HandlerThread:725632 [handler.py:handle_request():124] handle_request: stop_status
|
151 |
+
2021-07-15 18:02:09,950 DEBUG SenderThread:725632 [sender.py:send_request():193] send_request: stop_status
|
152 |
+
2021-07-15 18:02:10,166 INFO Thread-8 :725632 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_175147-3lygnexi/files/output.log
|
153 |
+
2021-07-15 18:02:12,167 INFO Thread-8 :725632 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_175147-3lygnexi/files/output.log
|
154 |
+
2021-07-15 18:02:18,592 DEBUG SenderThread:725632 [sender.py:send():179] send: stats
|
155 |
+
2021-07-15 18:02:22,541 DEBUG SenderThread:725632 [sender.py:send():179] send: history
|
156 |
+
2021-07-15 18:02:22,542 DEBUG SenderThread:725632 [sender.py:send():179] send: summary
|
157 |
+
2021-07-15 18:02:22,542 INFO SenderThread:725632 [sender.py:_save_file():841] saving file wandb-summary.json with policy end
|
158 |
+
2021-07-15 18:02:23,171 INFO Thread-8 :725632 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_175147-3lygnexi/files/wandb-summary.json
|
159 |
+
2021-07-15 18:02:24,172 INFO Thread-8 :725632 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_175147-3lygnexi/files/output.log
|
160 |
+
2021-07-15 18:02:25,082 DEBUG HandlerThread:725632 [handler.py:handle_request():124] handle_request: stop_status
|
161 |
+
2021-07-15 18:02:25,082 DEBUG SenderThread:725632 [sender.py:send_request():193] send_request: stop_status
|
162 |
+
2021-07-15 18:02:25,172 INFO Thread-8 :725632 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_175147-3lygnexi/files/output.log
|
163 |
+
2021-07-15 18:02:26,173 INFO Thread-8 :725632 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_175147-3lygnexi/files/output.log
|
164 |
+
2021-07-15 18:02:28,174 INFO Thread-8 :725632 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_175147-3lygnexi/files/output.log
|
165 |
+
2021-07-15 18:02:30,174 INFO Thread-8 :725632 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_175147-3lygnexi/files/output.log
|
166 |
+
2021-07-15 18:02:32,175 INFO Thread-8 :725632 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_175147-3lygnexi/files/output.log
|
167 |
+
2021-07-15 18:02:33,395 WARNING MainThread:725632 [internal.py:wandb_internal():147] Internal process interrupt: 1
|
168 |
+
2021-07-15 18:02:33,592 WARNING MainThread:725632 [internal.py:wandb_internal():147] Internal process interrupt: 2
|
169 |
+
2021-07-15 18:02:33,592 ERROR MainThread:725632 [internal.py:wandb_internal():150] Internal process interrupted.
|
170 |
+
2021-07-15 18:02:34,020 INFO MainThread:725632 [internal.py:handle_exit():78] Internal process exited
|