dat
commited on
Commit
•
731244f
1
Parent(s):
9915204
update all
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- config.json +1 -1
- events.out.tfevents.1626304306.t1v-n-f5c06ea1-w-0.602807.3.v2 +3 -0
- events.out.tfevents.1626305223.t1v-n-f5c06ea1-w-0.605532.3.v2 +3 -0
- events.out.tfevents.1626306374.t1v-n-f5c06ea1-w-0.608526.3.v2 +3 -0
- events.out.tfevents.1626306537.t1v-n-f5c06ea1-w-0.610166.3.v2 +3 -0
- events.out.tfevents.1626306954.t1v-n-f5c06ea1-w-0.612049.3.v2 +3 -0
- events.out.tfevents.1626307676.t1v-n-f5c06ea1-w-0.614342.3.v2 +3 -0
- events.out.tfevents.1626308255.t1v-n-f5c06ea1-w-0.616592.3.v2 +3 -0
- events.out.tfevents.1626308889.t1v-n-f5c06ea1-w-0.618785.3.v2 +3 -0
- events.out.tfevents.1626309457.t1v-n-f5c06ea1-w-0.620917.3.v2 +3 -0
- events.out.tfevents.1626310347.t1v-n-f5c06ea1-w-0.623339.3.v2 +3 -0
- events.out.tfevents.1626310837.t1v-n-f5c06ea1-w-0.625421.3.v2 +3 -0
- events.out.tfevents.1626311317.t1v-n-f5c06ea1-w-0.626982.3.v2 +3 -0
- events.out.tfevents.1626311757.t1v-n-f5c06ea1-w-0.628566.3.v2 +3 -0
- events.out.tfevents.1626312025.t1v-n-f5c06ea1-w-0.630273.3.v2 +3 -0
- events.out.tfevents.1626312342.t1v-n-f5c06ea1-w-0.631837.3.v2 +3 -0
- events.out.tfevents.1626312869.t1v-n-f5c06ea1-w-0.634228.3.v2 +3 -0
- events.out.tfevents.1626312958.t1v-n-f5c06ea1-w-0.635913.3.v2 +3 -0
- events.out.tfevents.1626313509.t1v-n-f5c06ea1-w-0.638079.3.v2 +3 -0
- events.out.tfevents.1626314417.t1v-n-f5c06ea1-w-0.640692.3.v2 +3 -0
- run.sh +7 -8
- run_mlm_flax.py +3 -3
- run_mlm_flax_no_accum.py +3 -3
- wandb/debug-internal.log +1 -1
- wandb/debug.log +1 -1
- wandb/latest-run +1 -1
- wandb/run-20210714_225820-1dpoijkp/files/config.yaml +3 -0
- wandb/run-20210714_225820-1dpoijkp/files/output.log +33 -0
- wandb/run-20210714_225820-1dpoijkp/logs/debug-internal.log +195 -0
- wandb/run-20210714_225820-1dpoijkp/logs/debug.log +110 -0
- wandb/run-20210714_225820-1dpoijkp/run-1dpoijkp.wandb +0 -0
- wandb/run-20210714_231147-gkn68kcy/files/config.yaml +307 -0
- wandb/run-20210714_231147-gkn68kcy/files/output.log +32 -0
- wandb/run-20210714_231147-gkn68kcy/files/requirements.txt +94 -0
- wandb/run-20210714_231147-gkn68kcy/files/wandb-metadata.json +46 -0
- wandb/run-20210714_231147-gkn68kcy/files/wandb-summary.json +1 -0
- wandb/run-20210714_231147-gkn68kcy/logs/debug-internal.log +279 -0
- wandb/run-20210714_231147-gkn68kcy/logs/debug.log +119 -0
- wandb/run-20210714_231147-gkn68kcy/run-gkn68kcy.wandb +0 -0
- wandb/run-20210714_232703-1jijl27o/files/config.yaml +307 -0
- wandb/run-20210714_232703-1jijl27o/files/output.log +34 -0
- wandb/run-20210714_232703-1jijl27o/files/requirements.txt +94 -0
- wandb/run-20210714_232703-1jijl27o/files/wandb-metadata.json +47 -0
- wandb/run-20210714_232703-1jijl27o/files/wandb-summary.json +1 -0
- wandb/run-20210714_232703-1jijl27o/logs/debug-internal.log +275 -0
- wandb/run-20210714_232703-1jijl27o/logs/debug.log +127 -0
- wandb/run-20210714_232703-1jijl27o/run-1jijl27o.wandb +0 -0
- wandb/run-20210714_234615-3p6vlfc3/files/config.yaml +304 -0
- wandb/run-20210714_234615-3p6vlfc3/files/output.log +6 -0
- wandb/run-20210714_234615-3p6vlfc3/files/requirements.txt +94 -0
config.json
CHANGED
@@ -4,7 +4,7 @@
|
|
4 |
],
|
5 |
"attention_probs_dropout_prob": 0.1,
|
6 |
"attention_type": "block_sparse",
|
7 |
-
"block_size":
|
8 |
"bos_token_id": 1,
|
9 |
"eos_token_id": 2,
|
10 |
"gradient_checkpointing": false,
|
|
|
4 |
],
|
5 |
"attention_probs_dropout_prob": 0.1,
|
6 |
"attention_type": "block_sparse",
|
7 |
+
"block_size": 128,
|
8 |
"bos_token_id": 1,
|
9 |
"eos_token_id": 2,
|
10 |
"gradient_checkpointing": false,
|
events.out.tfevents.1626304306.t1v-n-f5c06ea1-w-0.602807.3.v2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5af1d6e48b81f0b9d7a35be9c91ffce03eee366544729e50b2f6158dda353896
|
3 |
+
size 40
|
events.out.tfevents.1626305223.t1v-n-f5c06ea1-w-0.605532.3.v2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:50736e35dba095f7e44007a17a7f2702b81215a45256c8a92304137fb1352920
|
3 |
+
size 40
|
events.out.tfevents.1626306374.t1v-n-f5c06ea1-w-0.608526.3.v2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1c754bf3ea8fba0ce37ab52a99fc32dbd0370af24af9c99ff82c5d66f750ccf9
|
3 |
+
size 40
|
events.out.tfevents.1626306537.t1v-n-f5c06ea1-w-0.610166.3.v2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8a0c43d6c44047fae8000715c934e7f0ecbdcde1074ad33da1da321c3b27e0fa
|
3 |
+
size 40
|
events.out.tfevents.1626306954.t1v-n-f5c06ea1-w-0.612049.3.v2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:399e413d5125b81deb4821e6cf2d48594b856250dc9a25b5b5a640e53b05dff1
|
3 |
+
size 40
|
events.out.tfevents.1626307676.t1v-n-f5c06ea1-w-0.614342.3.v2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:14bf286b75218519d67d5dfc8dd5186890bde2fc12fad6c71348d0dc35a5c0d8
|
3 |
+
size 40
|
events.out.tfevents.1626308255.t1v-n-f5c06ea1-w-0.616592.3.v2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:eba6714d53d12785a3f81e899889244c67dcbc04cb9fad8562008dcf8302a212
|
3 |
+
size 40
|
events.out.tfevents.1626308889.t1v-n-f5c06ea1-w-0.618785.3.v2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f3b826e6696483c676eb0da8b48cd3811887b854b324027f4c4f58af92933d17
|
3 |
+
size 40
|
events.out.tfevents.1626309457.t1v-n-f5c06ea1-w-0.620917.3.v2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d144d24c7f4699737d4dcd0b3619027b85af5478c55be0142c8ba091253568fc
|
3 |
+
size 40
|
events.out.tfevents.1626310347.t1v-n-f5c06ea1-w-0.623339.3.v2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:83fbcbad2d35506b956452a251f6ed447335e90407b067c149ba3127b303632a
|
3 |
+
size 40
|
events.out.tfevents.1626310837.t1v-n-f5c06ea1-w-0.625421.3.v2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7b99cff29b8871cd8db3f8fa198969b985fea2ffc47e79089d3d725ef9f29080
|
3 |
+
size 40
|
events.out.tfevents.1626311317.t1v-n-f5c06ea1-w-0.626982.3.v2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5f09f06cc74a2b6e25df6452b382ba19720ee66dbaf384048c15058d496280a8
|
3 |
+
size 40
|
events.out.tfevents.1626311757.t1v-n-f5c06ea1-w-0.628566.3.v2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7465c4dbbaee983519373fb924605fb6aacc585352185e9d2950814da4100c3b
|
3 |
+
size 7499
|
events.out.tfevents.1626312025.t1v-n-f5c06ea1-w-0.630273.3.v2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:635dcd8f0941e3cc687e76b31e5803cfcf80f17f9d50615fe8e93c2a0768e08c
|
3 |
+
size 40
|
events.out.tfevents.1626312342.t1v-n-f5c06ea1-w-0.631837.3.v2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a47b8e795ffa4867c8acf33eb5022d1bdaad588280c75b3bdebf47dda704101a
|
3 |
+
size 40
|
events.out.tfevents.1626312869.t1v-n-f5c06ea1-w-0.634228.3.v2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8228686009da130e2b1ae2918f1b8afc864f280b17fc48e479c5342dc466bec2
|
3 |
+
size 40
|
events.out.tfevents.1626312958.t1v-n-f5c06ea1-w-0.635913.3.v2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4c7381fc3fe2646dc9fa2397819b0db0d19f859ddbd6061a67f237b2e1dd61e5
|
3 |
+
size 40
|
events.out.tfevents.1626313509.t1v-n-f5c06ea1-w-0.638079.3.v2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c70c504b2ddc63c6d6ffb5846f09dd37ed05743155ecae6469ce433583aca0bc
|
3 |
+
size 40
|
events.out.tfevents.1626314417.t1v-n-f5c06ea1-w-0.640692.3.v2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a907482f1b09a2b5e4e62b66f168db6d65bc1c98b9a140174cfe918b8039be5e
|
3 |
+
size 40
|
run.sh
CHANGED
@@ -2,7 +2,7 @@
|
|
2 |
|
3 |
#export TOKENIZERS_PARALLELISM=0
|
4 |
|
5 |
-
python ./
|
6 |
--push_to_hub \
|
7 |
--output_dir="./" \
|
8 |
--model_type="big_bird" \
|
@@ -14,18 +14,17 @@ python ./run_mlm_flax.py \
|
|
14 |
--overwrite_output_dir \
|
15 |
--adam_beta1="0.9" \
|
16 |
--adam_beta2="0.98" \
|
17 |
-
--logging_steps="
|
18 |
--eval_steps="20000" \
|
19 |
--num_train_epochs="5" \
|
20 |
--preprocessing_num_workers="96" \
|
21 |
--save_steps="20000" \
|
22 |
-
--learning_rate="
|
23 |
-
--per_device_train_batch_size="
|
24 |
-
--per_device_eval_batch_size="
|
25 |
--save_total_limit="5"\
|
26 |
-
--max_eval_samples="
|
27 |
-
|
28 |
-
--gradient_accumulation_steps="8" \
|
29 |
#--resume_from_checkpoint="./"\
|
30 |
#--adafactor \
|
31 |
#--dtype="bfloat16" \
|
|
|
2 |
|
3 |
#export TOKENIZERS_PARALLELISM=0
|
4 |
|
5 |
+
python ./run_mlm_flax_no_accum.py \
|
6 |
--push_to_hub \
|
7 |
--output_dir="./" \
|
8 |
--model_type="big_bird" \
|
|
|
14 |
--overwrite_output_dir \
|
15 |
--adam_beta1="0.9" \
|
16 |
--adam_beta2="0.98" \
|
17 |
+
--logging_steps="50" \
|
18 |
--eval_steps="20000" \
|
19 |
--num_train_epochs="5" \
|
20 |
--preprocessing_num_workers="96" \
|
21 |
--save_steps="20000" \
|
22 |
+
--learning_rate="3e-5" \
|
23 |
+
--per_device_train_batch_size="1" \
|
24 |
+
--per_device_eval_batch_size="1" \
|
25 |
--save_total_limit="5"\
|
26 |
+
--max_eval_samples="500"\
|
27 |
+
#--gradient_accumulation_steps="4"\
|
|
|
28 |
#--resume_from_checkpoint="./"\
|
29 |
#--adafactor \
|
30 |
#--dtype="bfloat16" \
|
run_mlm_flax.py
CHANGED
@@ -525,10 +525,10 @@ if __name__ == "__main__":
|
|
525 |
if load_grouped:
|
526 |
logger.info("Loading tokenized and grouped dataset")
|
527 |
tokenized_datasets = DatasetDict.load_from_disk("/data/tokenized_data")
|
528 |
-
|
529 |
-
print(f"Number of validation examples {data_args.max_eval_samples}")
|
530 |
-
#tokenized_datasets["train"]= tokenized_datasets["train"].select(range(20000))
|
531 |
if data_args.max_eval_samples is not None:
|
|
|
|
|
532 |
tokenized_datasets["validation"] = tokenized_datasets["validation"].select(range(data_args.max_eval_samples))
|
533 |
else:
|
534 |
if training_args.do_train:
|
|
|
525 |
if load_grouped:
|
526 |
logger.info("Loading tokenized and grouped dataset")
|
527 |
tokenized_datasets = DatasetDict.load_from_disk("/data/tokenized_data")
|
528 |
+
tokenized_datasets["train"]= tokenized_datasets["train"].select(range(int(0.3*len(tokenized_datasets["train"]))))
|
|
|
|
|
529 |
if data_args.max_eval_samples is not None:
|
530 |
+
logger.info("Setting max validation examples to ")
|
531 |
+
print(f"Number of validation examples {data_args.max_eval_samples}")
|
532 |
tokenized_datasets["validation"] = tokenized_datasets["validation"].select(range(data_args.max_eval_samples))
|
533 |
else:
|
534 |
if training_args.do_train:
|
run_mlm_flax_no_accum.py
CHANGED
@@ -25,7 +25,7 @@ import os
|
|
25 |
import sys
|
26 |
import time
|
27 |
from dataclasses import dataclass, field
|
28 |
-
|
29 |
# You can also adapt this script on your own masked language modeling task. Pointers for this are left as comments.
|
30 |
from pathlib import Path
|
31 |
from typing import Dict, List, Optional, Tuple
|
@@ -421,7 +421,7 @@ if __name__ == "__main__":
|
|
421 |
tokenized_datasets = DatasetDict.load_from_disk("/data/tokenized_data")
|
422 |
logger.info("Setting max validation examples to ")
|
423 |
print(f"Number of validation examples {data_args.max_eval_samples}")
|
424 |
-
tokenized_datasets["train"]= tokenized_datasets["train"].select(range(
|
425 |
if data_args.max_eval_samples is not None:
|
426 |
tokenized_datasets["validation"] = tokenized_datasets["validation"].select(range(data_args.max_eval_samples))
|
427 |
else:
|
@@ -604,7 +604,7 @@ if __name__ == "__main__":
|
|
604 |
mask=decay_mask_fn,
|
605 |
)
|
606 |
optimizer = optax.chain(
|
607 |
-
optax.
|
608 |
optimizer
|
609 |
)
|
610 |
|
|
|
25 |
import sys
|
26 |
import time
|
27 |
from dataclasses import dataclass, field
|
28 |
+
from optax import clip_by_global_norm
|
29 |
# You can also adapt this script on your own masked language modeling task. Pointers for this are left as comments.
|
30 |
from pathlib import Path
|
31 |
from typing import Dict, List, Optional, Tuple
|
|
|
421 |
tokenized_datasets = DatasetDict.load_from_disk("/data/tokenized_data")
|
422 |
logger.info("Setting max validation examples to ")
|
423 |
print(f"Number of validation examples {data_args.max_eval_samples}")
|
424 |
+
#tokenized_datasets["train"]= tokenized_datasets["train"].select(range(int(0.3*len(tokenized_datasets["train"]))))
|
425 |
if data_args.max_eval_samples is not None:
|
426 |
tokenized_datasets["validation"] = tokenized_datasets["validation"].select(range(data_args.max_eval_samples))
|
427 |
else:
|
|
|
604 |
mask=decay_mask_fn,
|
605 |
)
|
606 |
optimizer = optax.chain(
|
607 |
+
optax.clip_by_global_norm(1.),
|
608 |
optimizer
|
609 |
)
|
610 |
|
wandb/debug-internal.log
CHANGED
@@ -1 +1 @@
|
|
1 |
-
run-
|
|
|
1 |
+
run-20210715_020018-3i0mvo08/logs/debug-internal.log
|
wandb/debug.log
CHANGED
@@ -1 +1 @@
|
|
1 |
-
run-
|
|
|
1 |
+
run-20210715_020018-3i0mvo08/logs/debug.log
|
wandb/latest-run
CHANGED
@@ -1 +1 @@
|
|
1 |
-
run-
|
|
|
1 |
+
run-20210715_020018-3i0mvo08
|
wandb/run-20210714_225820-1dpoijkp/files/config.yaml
CHANGED
@@ -13,6 +13,9 @@ _wandb:
|
|
13 |
1:
|
14 |
- 3
|
15 |
- 11
|
|
|
|
|
|
|
16 |
4: 3.8.10
|
17 |
5: 0.10.33
|
18 |
6: 4.9.0.dev0
|
|
|
13 |
1:
|
14 |
- 3
|
15 |
- 11
|
16 |
+
2:
|
17 |
+
- 3
|
18 |
+
- 11
|
19 |
4: 3.8.10
|
20 |
5: 0.10.33
|
21 |
6: 4.9.0.dev0
|
wandb/run-20210714_225820-1dpoijkp/files/output.log
CHANGED
@@ -4,3 +4,36 @@
|
|
4 |
warnings.warn(
|
5 |
/home/dat/pino/lib/python3.8/site-packages/jax/lib/xla_bridge.py:373: UserWarning: jax.host_id has been renamed to jax.process_index. This alias will eventually be removed; please update your code.
|
6 |
warnings.warn(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4 |
warnings.warn(
|
5 |
/home/dat/pino/lib/python3.8/site-packages/jax/lib/xla_bridge.py:373: UserWarning: jax.host_id has been renamed to jax.process_index. This alias will eventually be removed; please update your code.
|
6 |
warnings.warn(
|
7 |
+
Epoch ... (1/5): 0%| | 0/5 [00:00<?, ?it/s]
|
8 |
+
Training...: 0%| | 0/503952 [01:28<?, ?it/s]
|
9 |
+
Epoch ... (1/5): 0%| | 0/5 [09:12<?, ?it/s]
|
10 |
+
Traceback (most recent call last):
|
11 |
+
File "./run_mlm_flax.py", line 815, in <module>
|
12 |
+
state, train_metric, dropout_rngs = p_train_step(state, model_inputs, dropout_rngs)
|
13 |
+
File "/home/dat/pino/lib/python3.8/site-packages/jax/_src/traceback_util.py", line 183, in reraise_with_filtered_traceback
|
14 |
+
return fun(*args, **kwargs)
|
15 |
+
File "/home/dat/pino/lib/python3.8/site-packages/jax/_src/api.py", line 1669, in f_pmapped
|
16 |
+
out = pxla.xla_pmap(
|
17 |
+
File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 1620, in bind
|
18 |
+
return call_bind(self, fun, *args, **params)
|
19 |
+
File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 1551, in call_bind
|
20 |
+
outs = primitive.process(top_trace, fun, tracers, params)
|
21 |
+
File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 1623, in process
|
22 |
+
return trace.process_map(self, fun, tracers, params)
|
23 |
+
File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 606, in process_call
|
24 |
+
return primitive.impl(f, *tracers, **params)
|
25 |
+
File "/home/dat/pino/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 637, in xla_pmap_impl
|
26 |
+
return compiled_fun(*args)
|
27 |
+
File "/home/dat/pino/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 1159, in execute_replicated
|
28 |
+
out_bufs = compiled.execute_sharded_on_local_devices(input_bufs)
|
29 |
+
jax._src.traceback_util.UnfilteredStackTrace: RuntimeError: Resource exhausted: Attempting to reserve 12.58G at the bottom of memory. That was not possible. There are 5.86G free, 0B reserved, and 5.66G reservable.: while running replica 0 and partition 0 of a replicated computation (other replicas may have failed as well).
|
30 |
+
The stack trace below excludes JAX-internal frames.
|
31 |
+
The preceding is the original exception that occurred, unmodified.
|
32 |
+
--------------------
|
33 |
+
The above exception was the direct cause of the following exception:
|
34 |
+
Traceback (most recent call last):
|
35 |
+
File "./run_mlm_flax.py", line 815, in <module>
|
36 |
+
state, train_metric, dropout_rngs = p_train_step(state, model_inputs, dropout_rngs)
|
37 |
+
File "/home/dat/pino/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 1159, in execute_replicated
|
38 |
+
out_bufs = compiled.execute_sharded_on_local_devices(input_bufs)
|
39 |
+
RuntimeError: Resource exhausted: Attempting to reserve 12.58G at the bottom of memory. That was not possible. There are 5.86G free, 0B reserved, and 5.66G reservable.: while running replica 0 and partition 0 of a replicated computation (other replicas may have failed as well).
|
wandb/run-20210714_225820-1dpoijkp/logs/debug-internal.log
CHANGED
@@ -38,3 +38,198 @@
|
|
38 |
2021-07-14 22:58:53,006 DEBUG SenderThread:601574 [sender.py:send_request():193] send_request: stop_status
|
39 |
2021-07-14 22:59:08,141 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: stop_status
|
40 |
2021-07-14 22:59:08,141 DEBUG SenderThread:601574 [sender.py:send_request():193] send_request: stop_status
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
38 |
2021-07-14 22:58:53,006 DEBUG SenderThread:601574 [sender.py:send_request():193] send_request: stop_status
|
39 |
2021-07-14 22:59:08,141 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: stop_status
|
40 |
2021-07-14 22:59:08,141 DEBUG SenderThread:601574 [sender.py:send_request():193] send_request: stop_status
|
41 |
+
2021-07-14 22:59:20,866 DEBUG SenderThread:601574 [sender.py:send():179] send: stats
|
42 |
+
2021-07-14 22:59:23,276 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: stop_status
|
43 |
+
2021-07-14 22:59:23,277 DEBUG SenderThread:601574 [sender.py:send_request():193] send_request: stop_status
|
44 |
+
2021-07-14 22:59:38,415 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: stop_status
|
45 |
+
2021-07-14 22:59:38,415 DEBUG SenderThread:601574 [sender.py:send_request():193] send_request: stop_status
|
46 |
+
2021-07-14 22:59:50,943 DEBUG SenderThread:601574 [sender.py:send():179] send: stats
|
47 |
+
2021-07-14 22:59:53,547 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: stop_status
|
48 |
+
2021-07-14 22:59:53,547 DEBUG SenderThread:601574 [sender.py:send_request():193] send_request: stop_status
|
49 |
+
2021-07-14 23:00:08,680 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: stop_status
|
50 |
+
2021-07-14 23:00:08,680 DEBUG SenderThread:601574 [sender.py:send_request():193] send_request: stop_status
|
51 |
+
2021-07-14 23:00:21,020 DEBUG SenderThread:601574 [sender.py:send():179] send: stats
|
52 |
+
2021-07-14 23:00:23,810 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: stop_status
|
53 |
+
2021-07-14 23:00:23,811 DEBUG SenderThread:601574 [sender.py:send_request():193] send_request: stop_status
|
54 |
+
2021-07-14 23:00:38,944 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: stop_status
|
55 |
+
2021-07-14 23:00:38,945 DEBUG SenderThread:601574 [sender.py:send_request():193] send_request: stop_status
|
56 |
+
2021-07-14 23:00:51,098 DEBUG SenderThread:601574 [sender.py:send():179] send: stats
|
57 |
+
2021-07-14 23:00:54,080 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: stop_status
|
58 |
+
2021-07-14 23:00:54,080 DEBUG SenderThread:601574 [sender.py:send_request():193] send_request: stop_status
|
59 |
+
2021-07-14 23:01:09,212 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: stop_status
|
60 |
+
2021-07-14 23:01:09,212 DEBUG SenderThread:601574 [sender.py:send_request():193] send_request: stop_status
|
61 |
+
2021-07-14 23:01:21,176 DEBUG SenderThread:601574 [sender.py:send():179] send: stats
|
62 |
+
2021-07-14 23:01:24,345 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: stop_status
|
63 |
+
2021-07-14 23:01:24,346 DEBUG SenderThread:601574 [sender.py:send_request():193] send_request: stop_status
|
64 |
+
2021-07-14 23:01:39,477 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: stop_status
|
65 |
+
2021-07-14 23:01:39,478 DEBUG SenderThread:601574 [sender.py:send_request():193] send_request: stop_status
|
66 |
+
2021-07-14 23:01:51,254 DEBUG SenderThread:601574 [sender.py:send():179] send: stats
|
67 |
+
2021-07-14 23:01:54,612 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: stop_status
|
68 |
+
2021-07-14 23:01:54,612 DEBUG SenderThread:601574 [sender.py:send_request():193] send_request: stop_status
|
69 |
+
2021-07-14 23:02:09,744 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: stop_status
|
70 |
+
2021-07-14 23:02:09,744 DEBUG SenderThread:601574 [sender.py:send_request():193] send_request: stop_status
|
71 |
+
2021-07-14 23:02:21,332 DEBUG SenderThread:601574 [sender.py:send():179] send: stats
|
72 |
+
2021-07-14 23:02:24,932 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: stop_status
|
73 |
+
2021-07-14 23:02:24,932 DEBUG SenderThread:601574 [sender.py:send_request():193] send_request: stop_status
|
74 |
+
2021-07-14 23:02:40,066 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: stop_status
|
75 |
+
2021-07-14 23:02:40,067 DEBUG SenderThread:601574 [sender.py:send_request():193] send_request: stop_status
|
76 |
+
2021-07-14 23:02:51,409 DEBUG SenderThread:601574 [sender.py:send():179] send: stats
|
77 |
+
2021-07-14 23:02:55,209 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: stop_status
|
78 |
+
2021-07-14 23:02:55,209 DEBUG SenderThread:601574 [sender.py:send_request():193] send_request: stop_status
|
79 |
+
2021-07-14 23:03:10,341 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: stop_status
|
80 |
+
2021-07-14 23:03:10,341 DEBUG SenderThread:601574 [sender.py:send_request():193] send_request: stop_status
|
81 |
+
2021-07-14 23:03:21,484 DEBUG SenderThread:601574 [sender.py:send():179] send: stats
|
82 |
+
2021-07-14 23:03:25,483 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: stop_status
|
83 |
+
2021-07-14 23:03:25,483 DEBUG SenderThread:601574 [sender.py:send_request():193] send_request: stop_status
|
84 |
+
2021-07-14 23:03:40,615 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: stop_status
|
85 |
+
2021-07-14 23:03:40,615 DEBUG SenderThread:601574 [sender.py:send_request():193] send_request: stop_status
|
86 |
+
2021-07-14 23:03:51,547 DEBUG SenderThread:601574 [sender.py:send():179] send: stats
|
87 |
+
2021-07-14 23:03:56,377 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: stop_status
|
88 |
+
2021-07-14 23:03:56,377 DEBUG SenderThread:601574 [sender.py:send_request():193] send_request: stop_status
|
89 |
+
2021-07-14 23:04:11,520 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: stop_status
|
90 |
+
2021-07-14 23:04:11,520 DEBUG SenderThread:601574 [sender.py:send_request():193] send_request: stop_status
|
91 |
+
2021-07-14 23:04:21,610 DEBUG SenderThread:601574 [sender.py:send():179] send: stats
|
92 |
+
2021-07-14 23:04:26,652 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: stop_status
|
93 |
+
2021-07-14 23:04:26,652 DEBUG SenderThread:601574 [sender.py:send_request():193] send_request: stop_status
|
94 |
+
2021-07-14 23:04:41,781 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: stop_status
|
95 |
+
2021-07-14 23:04:41,781 DEBUG SenderThread:601574 [sender.py:send_request():193] send_request: stop_status
|
96 |
+
2021-07-14 23:04:51,676 DEBUG SenderThread:601574 [sender.py:send():179] send: stats
|
97 |
+
2021-07-14 23:04:56,913 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: stop_status
|
98 |
+
2021-07-14 23:04:56,913 DEBUG SenderThread:601574 [sender.py:send_request():193] send_request: stop_status
|
99 |
+
2021-07-14 23:05:13,026 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: stop_status
|
100 |
+
2021-07-14 23:05:13,027 DEBUG SenderThread:601574 [sender.py:send_request():193] send_request: stop_status
|
101 |
+
2021-07-14 23:05:21,744 DEBUG SenderThread:601574 [sender.py:send():179] send: stats
|
102 |
+
2021-07-14 23:05:28,237 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: stop_status
|
103 |
+
2021-07-14 23:05:28,238 DEBUG SenderThread:601574 [sender.py:send_request():193] send_request: stop_status
|
104 |
+
2021-07-14 23:05:44,049 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: stop_status
|
105 |
+
2021-07-14 23:05:44,049 DEBUG SenderThread:601574 [sender.py:send_request():193] send_request: stop_status
|
106 |
+
2021-07-14 23:05:51,809 DEBUG SenderThread:601574 [sender.py:send():179] send: stats
|
107 |
+
2021-07-14 23:05:59,179 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: stop_status
|
108 |
+
2021-07-14 23:05:59,180 DEBUG SenderThread:601574 [sender.py:send_request():193] send_request: stop_status
|
109 |
+
2021-07-14 23:06:14,311 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: stop_status
|
110 |
+
2021-07-14 23:06:14,311 DEBUG SenderThread:601574 [sender.py:send_request():193] send_request: stop_status
|
111 |
+
2021-07-14 23:06:21,764 INFO Thread-8 :601574 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_225820-1dpoijkp/files/output.log
|
112 |
+
2021-07-14 23:06:21,877 DEBUG SenderThread:601574 [sender.py:send():179] send: stats
|
113 |
+
2021-07-14 23:06:29,456 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: stop_status
|
114 |
+
2021-07-14 23:06:29,457 DEBUG SenderThread:601574 [sender.py:send_request():193] send_request: stop_status
|
115 |
+
2021-07-14 23:06:44,749 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: stop_status
|
116 |
+
2021-07-14 23:06:44,749 DEBUG SenderThread:601574 [sender.py:send_request():193] send_request: stop_status
|
117 |
+
2021-07-14 23:06:51,950 DEBUG SenderThread:601574 [sender.py:send():179] send: stats
|
118 |
+
2021-07-14 23:06:59,941 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: stop_status
|
119 |
+
2021-07-14 23:06:59,941 DEBUG SenderThread:601574 [sender.py:send_request():193] send_request: stop_status
|
120 |
+
2021-07-14 23:07:15,073 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: stop_status
|
121 |
+
2021-07-14 23:07:15,077 DEBUG SenderThread:601574 [sender.py:send_request():193] send_request: stop_status
|
122 |
+
2021-07-14 23:07:22,025 DEBUG SenderThread:601574 [sender.py:send():179] send: stats
|
123 |
+
2021-07-14 23:07:30,221 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: stop_status
|
124 |
+
2021-07-14 23:07:30,221 DEBUG SenderThread:601574 [sender.py:send_request():193] send_request: stop_status
|
125 |
+
2021-07-14 23:07:45,363 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: stop_status
|
126 |
+
2021-07-14 23:07:45,364 DEBUG SenderThread:601574 [sender.py:send_request():193] send_request: stop_status
|
127 |
+
2021-07-14 23:07:50,438 DEBUG SenderThread:601574 [sender.py:send():179] send: telemetry
|
128 |
+
2021-07-14 23:07:50,439 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: poll_exit
|
129 |
+
2021-07-14 23:07:50,439 DEBUG SenderThread:601574 [sender.py:send():179] send: exit
|
130 |
+
2021-07-14 23:07:50,439 INFO SenderThread:601574 [sender.py:send_exit():287] handling exit code: 1
|
131 |
+
2021-07-14 23:07:50,440 INFO SenderThread:601574 [sender.py:send_exit():295] send defer
|
132 |
+
2021-07-14 23:07:50,440 DEBUG SenderThread:601574 [sender.py:send_request():193] send_request: poll_exit
|
133 |
+
2021-07-14 23:07:50,441 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: defer
|
134 |
+
2021-07-14 23:07:50,441 INFO HandlerThread:601574 [handler.py:handle_request_defer():141] handle defer: 0
|
135 |
+
2021-07-14 23:07:50,441 DEBUG SenderThread:601574 [sender.py:send_request():193] send_request: defer
|
136 |
+
2021-07-14 23:07:50,441 INFO SenderThread:601574 [sender.py:send_request_defer():304] handle sender defer: 0
|
137 |
+
2021-07-14 23:07:50,441 INFO SenderThread:601574 [sender.py:transition_state():308] send defer: 1
|
138 |
+
2021-07-14 23:07:50,442 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: defer
|
139 |
+
2021-07-14 23:07:50,442 INFO HandlerThread:601574 [handler.py:handle_request_defer():141] handle defer: 1
|
140 |
+
2021-07-14 23:07:50,497 DEBUG SenderThread:601574 [sender.py:send_request():193] send_request: defer
|
141 |
+
2021-07-14 23:07:50,497 INFO SenderThread:601574 [sender.py:send_request_defer():304] handle sender defer: 1
|
142 |
+
2021-07-14 23:07:50,497 INFO SenderThread:601574 [sender.py:transition_state():308] send defer: 2
|
143 |
+
2021-07-14 23:07:50,497 DEBUG SenderThread:601574 [sender.py:send():179] send: stats
|
144 |
+
2021-07-14 23:07:50,498 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: defer
|
145 |
+
2021-07-14 23:07:50,498 INFO HandlerThread:601574 [handler.py:handle_request_defer():141] handle defer: 2
|
146 |
+
2021-07-14 23:07:50,498 DEBUG SenderThread:601574 [sender.py:send_request():193] send_request: defer
|
147 |
+
2021-07-14 23:07:50,498 INFO SenderThread:601574 [sender.py:send_request_defer():304] handle sender defer: 2
|
148 |
+
2021-07-14 23:07:50,498 INFO SenderThread:601574 [sender.py:transition_state():308] send defer: 3
|
149 |
+
2021-07-14 23:07:50,499 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: defer
|
150 |
+
2021-07-14 23:07:50,499 INFO HandlerThread:601574 [handler.py:handle_request_defer():141] handle defer: 3
|
151 |
+
2021-07-14 23:07:50,499 DEBUG SenderThread:601574 [sender.py:send():179] send: summary
|
152 |
+
2021-07-14 23:07:50,499 INFO SenderThread:601574 [sender.py:_save_file():841] saving file wandb-summary.json with policy end
|
153 |
+
2021-07-14 23:07:50,499 DEBUG SenderThread:601574 [sender.py:send_request():193] send_request: defer
|
154 |
+
2021-07-14 23:07:50,499 INFO SenderThread:601574 [sender.py:send_request_defer():304] handle sender defer: 3
|
155 |
+
2021-07-14 23:07:50,499 INFO SenderThread:601574 [sender.py:transition_state():308] send defer: 4
|
156 |
+
2021-07-14 23:07:50,500 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: defer
|
157 |
+
2021-07-14 23:07:50,500 INFO HandlerThread:601574 [handler.py:handle_request_defer():141] handle defer: 4
|
158 |
+
2021-07-14 23:07:50,500 DEBUG SenderThread:601574 [sender.py:send_request():193] send_request: defer
|
159 |
+
2021-07-14 23:07:50,500 INFO SenderThread:601574 [sender.py:send_request_defer():304] handle sender defer: 4
|
160 |
+
2021-07-14 23:07:50,543 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: poll_exit
|
161 |
+
2021-07-14 23:07:50,746 INFO SenderThread:601574 [sender.py:transition_state():308] send defer: 5
|
162 |
+
2021-07-14 23:07:50,746 DEBUG SenderThread:601574 [sender.py:send_request():193] send_request: poll_exit
|
163 |
+
2021-07-14 23:07:50,747 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: defer
|
164 |
+
2021-07-14 23:07:50,747 INFO HandlerThread:601574 [handler.py:handle_request_defer():141] handle defer: 5
|
165 |
+
2021-07-14 23:07:50,747 DEBUG SenderThread:601574 [sender.py:send_request():193] send_request: defer
|
166 |
+
2021-07-14 23:07:50,747 INFO SenderThread:601574 [sender.py:send_request_defer():304] handle sender defer: 5
|
167 |
+
2021-07-14 23:07:50,747 INFO SenderThread:601574 [dir_watcher.py:finish():282] shutting down directory watcher
|
168 |
+
2021-07-14 23:07:50,787 INFO SenderThread:601574 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_225820-1dpoijkp/files/output.log
|
169 |
+
2021-07-14 23:07:50,787 INFO SenderThread:601574 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_225820-1dpoijkp/files/config.yaml
|
170 |
+
2021-07-14 23:07:50,787 INFO SenderThread:601574 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_225820-1dpoijkp/files/wandb-summary.json
|
171 |
+
2021-07-14 23:07:50,788 INFO SenderThread:601574 [dir_watcher.py:finish():312] scan: /home/dat/pino-roberta-base/wandb/run-20210714_225820-1dpoijkp/files
|
172 |
+
2021-07-14 23:07:50,788 INFO SenderThread:601574 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210714_225820-1dpoijkp/files/requirements.txt requirements.txt
|
173 |
+
2021-07-14 23:07:50,788 INFO SenderThread:601574 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210714_225820-1dpoijkp/files/output.log output.log
|
174 |
+
2021-07-14 23:07:50,788 INFO SenderThread:601574 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210714_225820-1dpoijkp/files/wandb-metadata.json wandb-metadata.json
|
175 |
+
2021-07-14 23:07:50,788 INFO SenderThread:601574 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210714_225820-1dpoijkp/files/config.yaml config.yaml
|
176 |
+
2021-07-14 23:07:50,789 INFO SenderThread:601574 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210714_225820-1dpoijkp/files/wandb-summary.json wandb-summary.json
|
177 |
+
2021-07-14 23:07:50,792 INFO SenderThread:601574 [sender.py:transition_state():308] send defer: 6
|
178 |
+
2021-07-14 23:07:50,792 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: defer
|
179 |
+
2021-07-14 23:07:50,792 INFO HandlerThread:601574 [handler.py:handle_request_defer():141] handle defer: 6
|
180 |
+
2021-07-14 23:07:50,795 DEBUG SenderThread:601574 [sender.py:send_request():193] send_request: defer
|
181 |
+
2021-07-14 23:07:50,795 INFO SenderThread:601574 [sender.py:send_request_defer():304] handle sender defer: 6
|
182 |
+
2021-07-14 23:07:50,795 INFO SenderThread:601574 [file_pusher.py:finish():177] shutting down file pusher
|
183 |
+
2021-07-14 23:07:50,848 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: poll_exit
|
184 |
+
2021-07-14 23:07:50,849 DEBUG SenderThread:601574 [sender.py:send_request():193] send_request: poll_exit
|
185 |
+
2021-07-14 23:07:50,951 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: poll_exit
|
186 |
+
2021-07-14 23:07:50,951 DEBUG SenderThread:601574 [sender.py:send_request():193] send_request: poll_exit
|
187 |
+
2021-07-14 23:07:51,053 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: poll_exit
|
188 |
+
2021-07-14 23:07:51,053 DEBUG SenderThread:601574 [sender.py:send_request():193] send_request: poll_exit
|
189 |
+
2021-07-14 23:07:51,155 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: poll_exit
|
190 |
+
2021-07-14 23:07:51,155 DEBUG SenderThread:601574 [sender.py:send_request():193] send_request: poll_exit
|
191 |
+
2021-07-14 23:07:51,257 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: poll_exit
|
192 |
+
2021-07-14 23:07:51,257 DEBUG SenderThread:601574 [sender.py:send_request():193] send_request: poll_exit
|
193 |
+
2021-07-14 23:07:51,284 INFO Thread-12 :601574 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210714_225820-1dpoijkp/files/requirements.txt
|
194 |
+
2021-07-14 23:07:51,311 INFO Thread-14 :601574 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210714_225820-1dpoijkp/files/config.yaml
|
195 |
+
2021-07-14 23:07:51,322 INFO Thread-13 :601574 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210714_225820-1dpoijkp/files/output.log
|
196 |
+
2021-07-14 23:07:51,359 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: poll_exit
|
197 |
+
2021-07-14 23:07:51,359 DEBUG SenderThread:601574 [sender.py:send_request():193] send_request: poll_exit
|
198 |
+
2021-07-14 23:07:51,461 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: poll_exit
|
199 |
+
2021-07-14 23:07:51,461 DEBUG SenderThread:601574 [sender.py:send_request():193] send_request: poll_exit
|
200 |
+
2021-07-14 23:07:51,466 INFO Thread-15 :601574 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210714_225820-1dpoijkp/files/wandb-summary.json
|
201 |
+
2021-07-14 23:07:51,563 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: poll_exit
|
202 |
+
2021-07-14 23:07:51,563 DEBUG SenderThread:601574 [sender.py:send_request():193] send_request: poll_exit
|
203 |
+
2021-07-14 23:07:51,665 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: poll_exit
|
204 |
+
2021-07-14 23:07:51,665 DEBUG SenderThread:601574 [sender.py:send_request():193] send_request: poll_exit
|
205 |
+
2021-07-14 23:07:51,666 INFO Thread-7 :601574 [sender.py:transition_state():308] send defer: 7
|
206 |
+
2021-07-14 23:07:51,667 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: defer
|
207 |
+
2021-07-14 23:07:51,667 INFO HandlerThread:601574 [handler.py:handle_request_defer():141] handle defer: 7
|
208 |
+
2021-07-14 23:07:51,667 DEBUG SenderThread:601574 [sender.py:send_request():193] send_request: defer
|
209 |
+
2021-07-14 23:07:51,667 INFO SenderThread:601574 [sender.py:send_request_defer():304] handle sender defer: 7
|
210 |
+
2021-07-14 23:07:51,766 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: poll_exit
|
211 |
+
2021-07-14 23:07:51,941 INFO SenderThread:601574 [sender.py:transition_state():308] send defer: 8
|
212 |
+
2021-07-14 23:07:51,941 DEBUG SenderThread:601574 [sender.py:send_request():193] send_request: poll_exit
|
213 |
+
2021-07-14 23:07:51,942 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: defer
|
214 |
+
2021-07-14 23:07:51,942 INFO HandlerThread:601574 [handler.py:handle_request_defer():141] handle defer: 8
|
215 |
+
2021-07-14 23:07:51,942 DEBUG SenderThread:601574 [sender.py:send_request():193] send_request: defer
|
216 |
+
2021-07-14 23:07:51,942 INFO SenderThread:601574 [sender.py:send_request_defer():304] handle sender defer: 8
|
217 |
+
2021-07-14 23:07:51,942 INFO SenderThread:601574 [sender.py:transition_state():308] send defer: 9
|
218 |
+
2021-07-14 23:07:51,942 DEBUG SenderThread:601574 [sender.py:send():179] send: final
|
219 |
+
2021-07-14 23:07:51,942 DEBUG SenderThread:601574 [sender.py:send():179] send: footer
|
220 |
+
2021-07-14 23:07:51,943 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: defer
|
221 |
+
2021-07-14 23:07:51,943 INFO HandlerThread:601574 [handler.py:handle_request_defer():141] handle defer: 9
|
222 |
+
2021-07-14 23:07:51,943 DEBUG SenderThread:601574 [sender.py:send_request():193] send_request: defer
|
223 |
+
2021-07-14 23:07:51,943 INFO SenderThread:601574 [sender.py:send_request_defer():304] handle sender defer: 9
|
224 |
+
2021-07-14 23:07:52,043 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: poll_exit
|
225 |
+
2021-07-14 23:07:52,043 DEBUG SenderThread:601574 [sender.py:send_request():193] send_request: poll_exit
|
226 |
+
2021-07-14 23:07:52,044 INFO SenderThread:601574 [file_pusher.py:join():182] waiting for file pusher
|
227 |
+
2021-07-14 23:07:52,045 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: get_summary
|
228 |
+
2021-07-14 23:07:52,045 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: sampled_history
|
229 |
+
2021-07-14 23:07:52,046 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: shutdown
|
230 |
+
2021-07-14 23:07:52,046 INFO HandlerThread:601574 [handler.py:finish():638] shutting down handler
|
231 |
+
2021-07-14 23:07:52,943 INFO WriterThread:601574 [datastore.py:close():288] close: /home/dat/pino-roberta-base/wandb/run-20210714_225820-1dpoijkp/run-1dpoijkp.wandb
|
232 |
+
2021-07-14 23:07:53,044 INFO SenderThread:601574 [sender.py:finish():945] shutting down sender
|
233 |
+
2021-07-14 23:07:53,044 INFO SenderThread:601574 [file_pusher.py:finish():177] shutting down file pusher
|
234 |
+
2021-07-14 23:07:53,044 INFO SenderThread:601574 [file_pusher.py:join():182] waiting for file pusher
|
235 |
+
2021-07-14 23:07:53,047 INFO MainThread:601574 [internal.py:handle_exit():78] Internal process exited
|
wandb/run-20210714_225820-1dpoijkp/logs/debug.log
CHANGED
@@ -23,3 +23,113 @@ config: {}
|
|
23 |
2021-07-14 22:58:22,750 INFO MainThread:600323 [wandb_run.py:_config_callback():872] config_cb None None {'output_dir': './', 'overwrite_output_dir': True, 'do_train': False, 'do_eval': False, 'do_predict': False, 'evaluation_strategy': 'IntervalStrategy.NO', 'prediction_loss_only': False, 'per_device_train_batch_size': 2, 'per_device_eval_batch_size': 2, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 8, 'eval_accumulation_steps': None, 'learning_rate': 5e-05, 'weight_decay': 0.0095, 'adam_beta1': 0.9, 'adam_beta2': 0.98, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 5.0, 'max_steps': -1, 'lr_scheduler_type': 'SchedulerType.LINEAR', 'warmup_ratio': 0.0, 'warmup_steps': 10000, 'log_level': -1, 'log_level_replica': -1, 'log_on_each_node': True, 'logging_dir': './runs/Jul14_22-58-13_t1v-n-f5c06ea1-w-0', 'logging_strategy': 'IntervalStrategy.STEPS', 'logging_first_step': False, 'logging_steps': 500, 'save_strategy': 'IntervalStrategy.STEPS', 'save_steps': 20000, 'save_total_limit': 5, 'save_on_each_node': False, 'no_cuda': False, 'seed': 42, 'fp16': False, 'fp16_opt_level': 'O1', 'fp16_backend': 'auto', 'fp16_full_eval': False, 'local_rank': -1, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 20000, 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': './', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'sharded_ddp': [], 'deepspeed': None, 'label_smoothing_factor': 0.0, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'dataloader_pin_memory': True, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'push_to_hub_model_id': '', 'push_to_hub_organization': None, 'push_to_hub_token': None, 'mp_parameters': ''}
|
24 |
2021-07-14 22:58:22,752 INFO MainThread:600323 [wandb_run.py:_config_callback():872] config_cb None None {'model_name_or_path': None, 'model_type': 'big_bird', 'config_name': './', 'tokenizer_name': './', 'cache_dir': None, 'use_fast_tokenizer': True, 'dtype': 'float32', 'save_optimizer': True}
|
25 |
2021-07-14 22:58:22,753 INFO MainThread:600323 [wandb_run.py:_config_callback():872] config_cb None None {'dataset_name': None, 'dataset_config_name': None, 'train_ref_file': None, 'validation_ref_file': None, 'overwrite_cache': False, 'validation_split_percentage': 5, 'max_seq_length': 4096, 'preprocessing_num_workers': 96, 'mlm_probability': 0.15, 'pad_to_max_length': False, 'line_by_line': False, 'max_eval_samples': 2000}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
2021-07-14 22:58:22,750 INFO MainThread:600323 [wandb_run.py:_config_callback():872] config_cb None None {'output_dir': './', 'overwrite_output_dir': True, 'do_train': False, 'do_eval': False, 'do_predict': False, 'evaluation_strategy': 'IntervalStrategy.NO', 'prediction_loss_only': False, 'per_device_train_batch_size': 2, 'per_device_eval_batch_size': 2, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 8, 'eval_accumulation_steps': None, 'learning_rate': 5e-05, 'weight_decay': 0.0095, 'adam_beta1': 0.9, 'adam_beta2': 0.98, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 5.0, 'max_steps': -1, 'lr_scheduler_type': 'SchedulerType.LINEAR', 'warmup_ratio': 0.0, 'warmup_steps': 10000, 'log_level': -1, 'log_level_replica': -1, 'log_on_each_node': True, 'logging_dir': './runs/Jul14_22-58-13_t1v-n-f5c06ea1-w-0', 'logging_strategy': 'IntervalStrategy.STEPS', 'logging_first_step': False, 'logging_steps': 500, 'save_strategy': 'IntervalStrategy.STEPS', 'save_steps': 20000, 'save_total_limit': 5, 'save_on_each_node': False, 'no_cuda': False, 'seed': 42, 'fp16': False, 'fp16_opt_level': 'O1', 'fp16_backend': 'auto', 'fp16_full_eval': False, 'local_rank': -1, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 20000, 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': './', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'sharded_ddp': [], 'deepspeed': None, 'label_smoothing_factor': 0.0, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'dataloader_pin_memory': True, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'push_to_hub_model_id': '', 'push_to_hub_organization': None, 'push_to_hub_token': None, 'mp_parameters': ''}
|
24 |
2021-07-14 22:58:22,752 INFO MainThread:600323 [wandb_run.py:_config_callback():872] config_cb None None {'model_name_or_path': None, 'model_type': 'big_bird', 'config_name': './', 'tokenizer_name': './', 'cache_dir': None, 'use_fast_tokenizer': True, 'dtype': 'float32', 'save_optimizer': True}
|
25 |
2021-07-14 22:58:22,753 INFO MainThread:600323 [wandb_run.py:_config_callback():872] config_cb None None {'dataset_name': None, 'dataset_config_name': None, 'train_ref_file': None, 'validation_ref_file': None, 'overwrite_cache': False, 'validation_split_percentage': 5, 'max_seq_length': 4096, 'preprocessing_num_workers': 96, 'mlm_probability': 0.15, 'pad_to_max_length': False, 'line_by_line': False, 'max_eval_samples': 2000}
|
26 |
+
2021-07-14 23:07:48,138 INFO MainThread:600323 [wandb_run.py:_atexit_cleanup():1593] got exitcode: 1
|
27 |
+
2021-07-14 23:07:48,139 INFO MainThread:600323 [wandb_run.py:_restore():1565] restore
|
28 |
+
2021-07-14 23:07:50,441 INFO MainThread:600323 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
|
29 |
+
wandb_count: 1
|
30 |
+
}
|
31 |
+
pusher_stats {
|
32 |
+
uploaded_bytes: 1448
|
33 |
+
total_bytes: 1448
|
34 |
+
}
|
35 |
+
|
36 |
+
2021-07-14 23:07:50,747 INFO MainThread:600323 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
|
37 |
+
wandb_count: 1
|
38 |
+
}
|
39 |
+
pusher_stats {
|
40 |
+
uploaded_bytes: 1448
|
41 |
+
total_bytes: 1448
|
42 |
+
}
|
43 |
+
|
44 |
+
2021-07-14 23:07:50,849 INFO MainThread:600323 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
|
45 |
+
wandb_count: 5
|
46 |
+
}
|
47 |
+
pusher_stats {
|
48 |
+
uploaded_bytes: 1448
|
49 |
+
total_bytes: 11717
|
50 |
+
}
|
51 |
+
|
52 |
+
2021-07-14 23:07:50,951 INFO MainThread:600323 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
|
53 |
+
wandb_count: 5
|
54 |
+
}
|
55 |
+
pusher_stats {
|
56 |
+
uploaded_bytes: 1448
|
57 |
+
total_bytes: 11717
|
58 |
+
}
|
59 |
+
|
60 |
+
2021-07-14 23:07:51,054 INFO MainThread:600323 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
|
61 |
+
wandb_count: 5
|
62 |
+
}
|
63 |
+
pusher_stats {
|
64 |
+
uploaded_bytes: 11717
|
65 |
+
total_bytes: 11717
|
66 |
+
}
|
67 |
+
|
68 |
+
2021-07-14 23:07:51,156 INFO MainThread:600323 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
|
69 |
+
wandb_count: 5
|
70 |
+
}
|
71 |
+
pusher_stats {
|
72 |
+
uploaded_bytes: 11717
|
73 |
+
total_bytes: 11717
|
74 |
+
}
|
75 |
+
|
76 |
+
2021-07-14 23:07:51,258 INFO MainThread:600323 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
|
77 |
+
wandb_count: 5
|
78 |
+
}
|
79 |
+
pusher_stats {
|
80 |
+
uploaded_bytes: 11717
|
81 |
+
total_bytes: 11717
|
82 |
+
}
|
83 |
+
|
84 |
+
2021-07-14 23:07:51,360 INFO MainThread:600323 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
|
85 |
+
wandb_count: 5
|
86 |
+
}
|
87 |
+
pusher_stats {
|
88 |
+
uploaded_bytes: 11717
|
89 |
+
total_bytes: 11717
|
90 |
+
}
|
91 |
+
|
92 |
+
2021-07-14 23:07:51,462 INFO MainThread:600323 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
|
93 |
+
wandb_count: 5
|
94 |
+
}
|
95 |
+
pusher_stats {
|
96 |
+
uploaded_bytes: 11717
|
97 |
+
total_bytes: 11717
|
98 |
+
}
|
99 |
+
|
100 |
+
2021-07-14 23:07:51,564 INFO MainThread:600323 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
|
101 |
+
wandb_count: 5
|
102 |
+
}
|
103 |
+
pusher_stats {
|
104 |
+
uploaded_bytes: 11717
|
105 |
+
total_bytes: 11717
|
106 |
+
}
|
107 |
+
|
108 |
+
2021-07-14 23:07:51,665 INFO MainThread:600323 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
|
109 |
+
wandb_count: 5
|
110 |
+
}
|
111 |
+
pusher_stats {
|
112 |
+
uploaded_bytes: 11717
|
113 |
+
total_bytes: 11717
|
114 |
+
}
|
115 |
+
|
116 |
+
2021-07-14 23:07:51,942 INFO MainThread:600323 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
|
117 |
+
wandb_count: 5
|
118 |
+
}
|
119 |
+
pusher_stats {
|
120 |
+
uploaded_bytes: 11717
|
121 |
+
total_bytes: 11717
|
122 |
+
}
|
123 |
+
|
124 |
+
2021-07-14 23:07:52,044 INFO MainThread:600323 [wandb_run.py:_wait_for_finish():1715] got exit ret: done: true
|
125 |
+
exit_result {
|
126 |
+
}
|
127 |
+
file_counts {
|
128 |
+
wandb_count: 5
|
129 |
+
}
|
130 |
+
pusher_stats {
|
131 |
+
uploaded_bytes: 11717
|
132 |
+
total_bytes: 11717
|
133 |
+
}
|
134 |
+
|
135 |
+
2021-07-14 23:07:53,341 INFO MainThread:600323 [wandb_run.py:_show_files():1937] logging synced files
|
wandb/run-20210714_225820-1dpoijkp/run-1dpoijkp.wandb
CHANGED
Binary files a/wandb/run-20210714_225820-1dpoijkp/run-1dpoijkp.wandb and b/wandb/run-20210714_225820-1dpoijkp/run-1dpoijkp.wandb differ
|
|
wandb/run-20210714_231147-gkn68kcy/files/config.yaml
ADDED
@@ -0,0 +1,307 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
wandb_version: 1
|
2 |
+
|
3 |
+
_wandb:
|
4 |
+
desc: null
|
5 |
+
value:
|
6 |
+
cli_version: 0.10.33
|
7 |
+
framework: huggingface
|
8 |
+
huggingface_version: 4.9.0.dev0
|
9 |
+
is_jupyter_run: false
|
10 |
+
is_kaggle_kernel: false
|
11 |
+
python_version: 3.8.10
|
12 |
+
t:
|
13 |
+
1:
|
14 |
+
- 3
|
15 |
+
- 11
|
16 |
+
2:
|
17 |
+
- 3
|
18 |
+
- 11
|
19 |
+
4: 3.8.10
|
20 |
+
5: 0.10.33
|
21 |
+
6: 4.9.0.dev0
|
22 |
+
8:
|
23 |
+
- 5
|
24 |
+
adafactor:
|
25 |
+
desc: null
|
26 |
+
value: false
|
27 |
+
adam_beta1:
|
28 |
+
desc: null
|
29 |
+
value: 0.9
|
30 |
+
adam_beta2:
|
31 |
+
desc: null
|
32 |
+
value: 0.98
|
33 |
+
adam_epsilon:
|
34 |
+
desc: null
|
35 |
+
value: 1.0e-08
|
36 |
+
cache_dir:
|
37 |
+
desc: null
|
38 |
+
value: null
|
39 |
+
config_name:
|
40 |
+
desc: null
|
41 |
+
value: ./
|
42 |
+
dataloader_drop_last:
|
43 |
+
desc: null
|
44 |
+
value: false
|
45 |
+
dataloader_num_workers:
|
46 |
+
desc: null
|
47 |
+
value: 0
|
48 |
+
dataloader_pin_memory:
|
49 |
+
desc: null
|
50 |
+
value: true
|
51 |
+
dataset_config_name:
|
52 |
+
desc: null
|
53 |
+
value: null
|
54 |
+
dataset_name:
|
55 |
+
desc: null
|
56 |
+
value: null
|
57 |
+
ddp_find_unused_parameters:
|
58 |
+
desc: null
|
59 |
+
value: null
|
60 |
+
debug:
|
61 |
+
desc: null
|
62 |
+
value: []
|
63 |
+
deepspeed:
|
64 |
+
desc: null
|
65 |
+
value: null
|
66 |
+
disable_tqdm:
|
67 |
+
desc: null
|
68 |
+
value: false
|
69 |
+
do_eval:
|
70 |
+
desc: null
|
71 |
+
value: false
|
72 |
+
do_predict:
|
73 |
+
desc: null
|
74 |
+
value: false
|
75 |
+
do_train:
|
76 |
+
desc: null
|
77 |
+
value: false
|
78 |
+
dtype:
|
79 |
+
desc: null
|
80 |
+
value: float32
|
81 |
+
eval_accumulation_steps:
|
82 |
+
desc: null
|
83 |
+
value: null
|
84 |
+
eval_steps:
|
85 |
+
desc: null
|
86 |
+
value: 20000
|
87 |
+
evaluation_strategy:
|
88 |
+
desc: null
|
89 |
+
value: IntervalStrategy.NO
|
90 |
+
fp16:
|
91 |
+
desc: null
|
92 |
+
value: false
|
93 |
+
fp16_backend:
|
94 |
+
desc: null
|
95 |
+
value: auto
|
96 |
+
fp16_full_eval:
|
97 |
+
desc: null
|
98 |
+
value: false
|
99 |
+
fp16_opt_level:
|
100 |
+
desc: null
|
101 |
+
value: O1
|
102 |
+
gradient_accumulation_steps:
|
103 |
+
desc: null
|
104 |
+
value: 1
|
105 |
+
greater_is_better:
|
106 |
+
desc: null
|
107 |
+
value: null
|
108 |
+
group_by_length:
|
109 |
+
desc: null
|
110 |
+
value: false
|
111 |
+
ignore_data_skip:
|
112 |
+
desc: null
|
113 |
+
value: false
|
114 |
+
label_names:
|
115 |
+
desc: null
|
116 |
+
value: null
|
117 |
+
label_smoothing_factor:
|
118 |
+
desc: null
|
119 |
+
value: 0.0
|
120 |
+
learning_rate:
|
121 |
+
desc: null
|
122 |
+
value: 5.0e-05
|
123 |
+
length_column_name:
|
124 |
+
desc: null
|
125 |
+
value: length
|
126 |
+
line_by_line:
|
127 |
+
desc: null
|
128 |
+
value: false
|
129 |
+
load_best_model_at_end:
|
130 |
+
desc: null
|
131 |
+
value: false
|
132 |
+
local_rank:
|
133 |
+
desc: null
|
134 |
+
value: -1
|
135 |
+
log_level:
|
136 |
+
desc: null
|
137 |
+
value: -1
|
138 |
+
log_level_replica:
|
139 |
+
desc: null
|
140 |
+
value: -1
|
141 |
+
log_on_each_node:
|
142 |
+
desc: null
|
143 |
+
value: true
|
144 |
+
logging_dir:
|
145 |
+
desc: null
|
146 |
+
value: ./runs/Jul14_23-11-40_t1v-n-f5c06ea1-w-0
|
147 |
+
logging_first_step:
|
148 |
+
desc: null
|
149 |
+
value: false
|
150 |
+
logging_steps:
|
151 |
+
desc: null
|
152 |
+
value: 500
|
153 |
+
logging_strategy:
|
154 |
+
desc: null
|
155 |
+
value: IntervalStrategy.STEPS
|
156 |
+
lr_scheduler_type:
|
157 |
+
desc: null
|
158 |
+
value: SchedulerType.LINEAR
|
159 |
+
max_eval_samples:
|
160 |
+
desc: null
|
161 |
+
value: 2000
|
162 |
+
max_grad_norm:
|
163 |
+
desc: null
|
164 |
+
value: 1.0
|
165 |
+
max_seq_length:
|
166 |
+
desc: null
|
167 |
+
value: 4096
|
168 |
+
max_steps:
|
169 |
+
desc: null
|
170 |
+
value: -1
|
171 |
+
metric_for_best_model:
|
172 |
+
desc: null
|
173 |
+
value: null
|
174 |
+
mlm_probability:
|
175 |
+
desc: null
|
176 |
+
value: 0.15
|
177 |
+
model_name_or_path:
|
178 |
+
desc: null
|
179 |
+
value: null
|
180 |
+
model_type:
|
181 |
+
desc: null
|
182 |
+
value: big_bird
|
183 |
+
mp_parameters:
|
184 |
+
desc: null
|
185 |
+
value: ''
|
186 |
+
no_cuda:
|
187 |
+
desc: null
|
188 |
+
value: false
|
189 |
+
num_train_epochs:
|
190 |
+
desc: null
|
191 |
+
value: 5.0
|
192 |
+
output_dir:
|
193 |
+
desc: null
|
194 |
+
value: ./
|
195 |
+
overwrite_cache:
|
196 |
+
desc: null
|
197 |
+
value: false
|
198 |
+
overwrite_output_dir:
|
199 |
+
desc: null
|
200 |
+
value: true
|
201 |
+
pad_to_max_length:
|
202 |
+
desc: null
|
203 |
+
value: false
|
204 |
+
past_index:
|
205 |
+
desc: null
|
206 |
+
value: -1
|
207 |
+
per_device_eval_batch_size:
|
208 |
+
desc: null
|
209 |
+
value: 1
|
210 |
+
per_device_train_batch_size:
|
211 |
+
desc: null
|
212 |
+
value: 1
|
213 |
+
per_gpu_eval_batch_size:
|
214 |
+
desc: null
|
215 |
+
value: null
|
216 |
+
per_gpu_train_batch_size:
|
217 |
+
desc: null
|
218 |
+
value: null
|
219 |
+
prediction_loss_only:
|
220 |
+
desc: null
|
221 |
+
value: false
|
222 |
+
preprocessing_num_workers:
|
223 |
+
desc: null
|
224 |
+
value: 96
|
225 |
+
push_to_hub:
|
226 |
+
desc: null
|
227 |
+
value: true
|
228 |
+
push_to_hub_model_id:
|
229 |
+
desc: null
|
230 |
+
value: ''
|
231 |
+
push_to_hub_organization:
|
232 |
+
desc: null
|
233 |
+
value: null
|
234 |
+
push_to_hub_token:
|
235 |
+
desc: null
|
236 |
+
value: null
|
237 |
+
remove_unused_columns:
|
238 |
+
desc: null
|
239 |
+
value: true
|
240 |
+
report_to:
|
241 |
+
desc: null
|
242 |
+
value:
|
243 |
+
- tensorboard
|
244 |
+
- wandb
|
245 |
+
resume_from_checkpoint:
|
246 |
+
desc: null
|
247 |
+
value: null
|
248 |
+
run_name:
|
249 |
+
desc: null
|
250 |
+
value: ./
|
251 |
+
save_on_each_node:
|
252 |
+
desc: null
|
253 |
+
value: false
|
254 |
+
save_optimizer:
|
255 |
+
desc: null
|
256 |
+
value: true
|
257 |
+
save_steps:
|
258 |
+
desc: null
|
259 |
+
value: 20000
|
260 |
+
save_strategy:
|
261 |
+
desc: null
|
262 |
+
value: IntervalStrategy.STEPS
|
263 |
+
save_total_limit:
|
264 |
+
desc: null
|
265 |
+
value: 5
|
266 |
+
seed:
|
267 |
+
desc: null
|
268 |
+
value: 42
|
269 |
+
sharded_ddp:
|
270 |
+
desc: null
|
271 |
+
value: []
|
272 |
+
skip_memory_metrics:
|
273 |
+
desc: null
|
274 |
+
value: true
|
275 |
+
tokenizer_name:
|
276 |
+
desc: null
|
277 |
+
value: ./
|
278 |
+
tpu_metrics_debug:
|
279 |
+
desc: null
|
280 |
+
value: false
|
281 |
+
tpu_num_cores:
|
282 |
+
desc: null
|
283 |
+
value: null
|
284 |
+
train_ref_file:
|
285 |
+
desc: null
|
286 |
+
value: null
|
287 |
+
use_fast_tokenizer:
|
288 |
+
desc: null
|
289 |
+
value: true
|
290 |
+
use_legacy_prediction_loop:
|
291 |
+
desc: null
|
292 |
+
value: false
|
293 |
+
validation_ref_file:
|
294 |
+
desc: null
|
295 |
+
value: null
|
296 |
+
validation_split_percentage:
|
297 |
+
desc: null
|
298 |
+
value: 5
|
299 |
+
warmup_ratio:
|
300 |
+
desc: null
|
301 |
+
value: 0.0
|
302 |
+
warmup_steps:
|
303 |
+
desc: null
|
304 |
+
value: 10000
|
305 |
+
weight_decay:
|
306 |
+
desc: null
|
307 |
+
value: 0.0095
|
wandb/run-20210714_231147-gkn68kcy/files/output.log
ADDED
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
/home/dat/pino/lib/python3.8/site-packages/jax/lib/xla_bridge.py:386: UserWarning: jax.host_count has been renamed to jax.process_count. This alias will eventually be removed; please update your code.
|
2 |
+
warnings.warn(
|
3 |
+
/home/dat/pino/lib/python3.8/site-packages/jax/lib/xla_bridge.py:373: UserWarning: jax.host_id has been renamed to jax.process_index. This alias will eventually be removed; please update your code.
|
4 |
+
warnings.warn(
|
5 |
+
Epoch ... (1/5): 0%| | 0/5 [00:00<?, ?it/s]2021-07-14 23:26:04.701487: E external/org_tensorflow/tensorflow/compiler/xla/pjrt/pjrt_stream_executor_client.cc:2036] Execution of replica 0 failed: Resource exhausted: Attempting to allocate 17.0K. That was not possible. There are 48.0K free. Due to fragmentation, the largest contiguous region of free memory is 16.0K.; (0x0x0_HBM0)
|
6 |
+
Epoch ... (1/5): 0%| | 0/5 [14:02<?, ?it/s]
|
7 |
+
Traceback (most recent call last):
|
8 |
+
File "./run_mlm_flax.py", line 806, in <module>
|
9 |
+
train_batch_idx = generate_batch_splits(train_samples_idx, train_batch_size // grad_accum_steps)
|
10 |
+
File "./run_mlm_flax.py", line 263, in generate_batch_splits
|
11 |
+
batch_idx = np.split(samples_idx, sections_split)
|
12 |
+
File "<__array_function__ internals>", line 5, in split
|
13 |
+
File "/home/dat/pino/lib/python3.8/site-packages/numpy/lib/shape_base.py", line 874, in split
|
14 |
+
return array_split(ary, indices_or_sections, axis)
|
15 |
+
File "<__array_function__ internals>", line 5, in array_split
|
16 |
+
File "/home/dat/pino/lib/python3.8/site-packages/numpy/lib/shape_base.py", line 790, in array_split
|
17 |
+
sub_arys.append(_nx.swapaxes(sary[st:end], axis, 0))
|
18 |
+
File "/home/dat/pino/lib/python3.8/site-packages/jax/_src/numpy/lax_numpy.py", line 5009, in _rewriting_take
|
19 |
+
return _gather(arr, treedef, static_idx, dynamic_idx, indices_are_sorted,
|
20 |
+
File "/home/dat/pino/lib/python3.8/site-packages/jax/_src/numpy/lax_numpy.py", line 5028, in _gather
|
21 |
+
y = lax.gather(
|
22 |
+
File "/home/dat/pino/lib/python3.8/site-packages/jax/_src/lax/lax.py", line 984, in gather
|
23 |
+
return gather_p.bind(
|
24 |
+
File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 264, in bind
|
25 |
+
out = top_trace.process_primitive(self, tracers, params)
|
26 |
+
File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 603, in process_primitive
|
27 |
+
return primitive.impl(*tracers, **params)
|
28 |
+
File "/home/dat/pino/lib/python3.8/site-packages/jax/interpreters/xla.py", line 249, in apply_primitive
|
29 |
+
return compiled_fun(*args)
|
30 |
+
File "/home/dat/pino/lib/python3.8/site-packages/jax/interpreters/xla.py", line 365, in _execute_compiled_primitive
|
31 |
+
out_bufs = compiled.execute(input_bufs)
|
32 |
+
RuntimeError: Resource exhausted: Attempting to allocate 17.0K. That was not possible. There are 48.0K free. Due to fragmentation, the largest contiguous region of free memory is 16.0K.; (0x0x0_HBM0)
|
wandb/run-20210714_231147-gkn68kcy/files/requirements.txt
ADDED
@@ -0,0 +1,94 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
absl-py==0.13.0
|
2 |
+
aiohttp==3.7.4.post0
|
3 |
+
astunparse==1.6.3
|
4 |
+
async-timeout==3.0.1
|
5 |
+
attrs==21.2.0
|
6 |
+
cachetools==4.2.2
|
7 |
+
certifi==2021.5.30
|
8 |
+
chardet==4.0.0
|
9 |
+
charset-normalizer==2.0.1
|
10 |
+
chex==0.0.8
|
11 |
+
click==8.0.1
|
12 |
+
configparser==5.0.2
|
13 |
+
cycler==0.10.0
|
14 |
+
datasets==1.9.1.dev0
|
15 |
+
dill==0.3.4
|
16 |
+
dm-tree==0.1.6
|
17 |
+
docker-pycreds==0.4.0
|
18 |
+
filelock==3.0.12
|
19 |
+
flatbuffers==1.12
|
20 |
+
flax==0.3.4
|
21 |
+
fsspec==2021.7.0
|
22 |
+
gast==0.4.0
|
23 |
+
gitdb==4.0.7
|
24 |
+
gitpython==3.1.18
|
25 |
+
google-auth-oauthlib==0.4.4
|
26 |
+
google-auth==1.32.1
|
27 |
+
google-pasta==0.2.0
|
28 |
+
grpcio==1.34.1
|
29 |
+
h5py==3.1.0
|
30 |
+
huggingface-hub==0.0.12
|
31 |
+
idna==3.2
|
32 |
+
install==1.3.4
|
33 |
+
jax==0.2.17
|
34 |
+
jaxlib==0.1.68
|
35 |
+
joblib==1.0.1
|
36 |
+
keras-nightly==2.5.0.dev2021032900
|
37 |
+
keras-preprocessing==1.1.2
|
38 |
+
kiwisolver==1.3.1
|
39 |
+
libtpu-nightly==0.1.dev20210615
|
40 |
+
markdown==3.3.4
|
41 |
+
matplotlib==3.4.2
|
42 |
+
msgpack==1.0.2
|
43 |
+
multidict==5.1.0
|
44 |
+
multiprocess==0.70.12.2
|
45 |
+
numpy==1.19.5
|
46 |
+
oauthlib==3.1.1
|
47 |
+
opt-einsum==3.3.0
|
48 |
+
optax==0.0.9
|
49 |
+
packaging==21.0
|
50 |
+
pandas==1.3.0
|
51 |
+
pathtools==0.1.2
|
52 |
+
pillow==8.3.1
|
53 |
+
pip==20.0.2
|
54 |
+
pkg-resources==0.0.0
|
55 |
+
promise==2.3
|
56 |
+
protobuf==3.17.3
|
57 |
+
psutil==5.8.0
|
58 |
+
pyarrow==4.0.1
|
59 |
+
pyasn1-modules==0.2.8
|
60 |
+
pyasn1==0.4.8
|
61 |
+
pyparsing==2.4.7
|
62 |
+
python-dateutil==2.8.1
|
63 |
+
pytz==2021.1
|
64 |
+
pyyaml==5.4.1
|
65 |
+
regex==2021.7.6
|
66 |
+
requests-oauthlib==1.3.0
|
67 |
+
requests==2.26.0
|
68 |
+
rsa==4.7.2
|
69 |
+
sacremoses==0.0.45
|
70 |
+
scipy==1.7.0
|
71 |
+
sentry-sdk==1.3.0
|
72 |
+
setuptools==44.0.0
|
73 |
+
shortuuid==1.0.1
|
74 |
+
six==1.15.0
|
75 |
+
smmap==4.0.0
|
76 |
+
subprocess32==3.5.4
|
77 |
+
tensorboard-data-server==0.6.1
|
78 |
+
tensorboard-plugin-wit==1.8.0
|
79 |
+
tensorboard==2.5.0
|
80 |
+
tensorflow-estimator==2.5.0
|
81 |
+
tensorflow==2.5.0
|
82 |
+
termcolor==1.1.0
|
83 |
+
tokenizers==0.10.3
|
84 |
+
toolz==0.11.1
|
85 |
+
tqdm==4.61.2
|
86 |
+
transformers==4.9.0.dev0
|
87 |
+
typing-extensions==3.7.4.3
|
88 |
+
urllib3==1.26.6
|
89 |
+
wandb==0.10.33
|
90 |
+
werkzeug==2.0.1
|
91 |
+
wheel==0.36.2
|
92 |
+
wrapt==1.12.1
|
93 |
+
xxhash==2.0.2
|
94 |
+
yarl==1.6.3
|
wandb/run-20210714_231147-gkn68kcy/files/wandb-metadata.json
ADDED
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"os": "Linux-5.4.0-1043-gcp-x86_64-with-glibc2.29",
|
3 |
+
"python": "3.8.10",
|
4 |
+
"heartbeatAt": "2021-07-14T23:11:49.155779",
|
5 |
+
"startedAt": "2021-07-14T23:11:47.117291",
|
6 |
+
"docker": null,
|
7 |
+
"cpu_count": 96,
|
8 |
+
"cuda": null,
|
9 |
+
"args": [
|
10 |
+
"--push_to_hub",
|
11 |
+
"--output_dir=./",
|
12 |
+
"--model_type=big_bird",
|
13 |
+
"--config_name=./",
|
14 |
+
"--tokenizer_name=./",
|
15 |
+
"--max_seq_length=4096",
|
16 |
+
"--weight_decay=0.0095",
|
17 |
+
"--warmup_steps=10000",
|
18 |
+
"--overwrite_output_dir",
|
19 |
+
"--adam_beta1=0.9",
|
20 |
+
"--adam_beta2=0.98",
|
21 |
+
"--logging_steps=500",
|
22 |
+
"--eval_steps=20000",
|
23 |
+
"--num_train_epochs=5",
|
24 |
+
"--preprocessing_num_workers=96",
|
25 |
+
"--save_steps=20000",
|
26 |
+
"--learning_rate=5e-5",
|
27 |
+
"--per_device_train_batch_size=1",
|
28 |
+
"--per_device_eval_batch_size=1",
|
29 |
+
"--save_total_limit=5",
|
30 |
+
"--max_eval_samples=2000",
|
31 |
+
"--overwrite_cache",
|
32 |
+
"False"
|
33 |
+
],
|
34 |
+
"state": "running",
|
35 |
+
"program": "./run_mlm_flax.py",
|
36 |
+
"codePath": "run_mlm_flax.py",
|
37 |
+
"git": {
|
38 |
+
"remote": "https://huggingface.co/flax-community/pino-roberta-base",
|
39 |
+
"commit": "9915204ae8501d6ae8425aa8f0b4b290cbc7e5b6"
|
40 |
+
},
|
41 |
+
"email": null,
|
42 |
+
"root": "/home/dat/pino-roberta-base",
|
43 |
+
"host": "t1v-n-f5c06ea1-w-0",
|
44 |
+
"username": "dat",
|
45 |
+
"executable": "/home/dat/pino/bin/python"
|
46 |
+
}
|
wandb/run-20210714_231147-gkn68kcy/files/wandb-summary.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{}
|
wandb/run-20210714_231147-gkn68kcy/logs/debug-internal.log
ADDED
@@ -0,0 +1,279 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
2021-07-14 23:11:47,821 INFO MainThread:604064 [internal.py:wandb_internal():88] W&B internal server running at pid: 604064, started at: 2021-07-14 23:11:47.821366
|
2 |
+
2021-07-14 23:11:47,823 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: check_version
|
3 |
+
2021-07-14 23:11:47,824 INFO WriterThread:604064 [datastore.py:open_for_write():80] open: /home/dat/pino-roberta-base/wandb/run-20210714_231147-gkn68kcy/run-gkn68kcy.wandb
|
4 |
+
2021-07-14 23:11:47,824 DEBUG SenderThread:604064 [sender.py:send():179] send: header
|
5 |
+
2021-07-14 23:11:47,825 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: check_version
|
6 |
+
2021-07-14 23:11:47,862 DEBUG SenderThread:604064 [sender.py:send():179] send: run
|
7 |
+
2021-07-14 23:11:48,044 INFO SenderThread:604064 [dir_watcher.py:__init__():168] watching files in: /home/dat/pino-roberta-base/wandb/run-20210714_231147-gkn68kcy/files
|
8 |
+
2021-07-14 23:11:48,044 INFO SenderThread:604064 [sender.py:_start_run_threads():716] run started: gkn68kcy with start time 1626304307
|
9 |
+
2021-07-14 23:11:48,044 DEBUG SenderThread:604064 [sender.py:send():179] send: summary
|
10 |
+
2021-07-14 23:11:48,044 INFO SenderThread:604064 [sender.py:_save_file():841] saving file wandb-summary.json with policy end
|
11 |
+
2021-07-14 23:11:48,045 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: run_start
|
12 |
+
2021-07-14 23:11:49,047 INFO Thread-8 :604064 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210714_231147-gkn68kcy/files/wandb-summary.json
|
13 |
+
2021-07-14 23:11:49,155 DEBUG HandlerThread:604064 [meta.py:__init__():39] meta init
|
14 |
+
2021-07-14 23:11:49,155 DEBUG HandlerThread:604064 [meta.py:__init__():53] meta init done
|
15 |
+
2021-07-14 23:11:49,155 DEBUG HandlerThread:604064 [meta.py:probe():210] probe
|
16 |
+
2021-07-14 23:11:49,156 DEBUG HandlerThread:604064 [meta.py:_setup_git():200] setup git
|
17 |
+
2021-07-14 23:11:49,187 DEBUG HandlerThread:604064 [meta.py:_setup_git():207] setup git done
|
18 |
+
2021-07-14 23:11:49,188 DEBUG HandlerThread:604064 [meta.py:_save_pip():57] save pip
|
19 |
+
2021-07-14 23:11:49,188 DEBUG HandlerThread:604064 [meta.py:_save_pip():71] save pip done
|
20 |
+
2021-07-14 23:11:49,188 DEBUG HandlerThread:604064 [meta.py:probe():252] probe done
|
21 |
+
2021-07-14 23:11:49,192 DEBUG SenderThread:604064 [sender.py:send():179] send: files
|
22 |
+
2021-07-14 23:11:49,192 INFO SenderThread:604064 [sender.py:_save_file():841] saving file wandb-metadata.json with policy now
|
23 |
+
2021-07-14 23:11:49,199 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: stop_status
|
24 |
+
2021-07-14 23:11:49,200 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: stop_status
|
25 |
+
2021-07-14 23:11:49,336 DEBUG SenderThread:604064 [sender.py:send():179] send: config
|
26 |
+
2021-07-14 23:11:49,336 DEBUG SenderThread:604064 [sender.py:send():179] send: config
|
27 |
+
2021-07-14 23:11:49,337 DEBUG SenderThread:604064 [sender.py:send():179] send: config
|
28 |
+
2021-07-14 23:11:49,685 INFO Thread-11 :604064 [upload_job.py:push():137] Uploaded file /tmp/tmpnh56hsgtwandb/3jt8aozu-wandb-metadata.json
|
29 |
+
2021-07-14 23:11:50,046 INFO Thread-8 :604064 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210714_231147-gkn68kcy/files/output.log
|
30 |
+
2021-07-14 23:11:50,046 INFO Thread-8 :604064 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210714_231147-gkn68kcy/files/requirements.txt
|
31 |
+
2021-07-14 23:11:50,046 INFO Thread-8 :604064 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210714_231147-gkn68kcy/files/wandb-metadata.json
|
32 |
+
2021-07-14 23:12:04,051 INFO Thread-8 :604064 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_231147-gkn68kcy/files/output.log
|
33 |
+
2021-07-14 23:12:04,338 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: stop_status
|
34 |
+
2021-07-14 23:12:04,339 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: stop_status
|
35 |
+
2021-07-14 23:12:17,233 DEBUG SenderThread:604064 [sender.py:send():179] send: stats
|
36 |
+
2021-07-14 23:12:19,058 INFO Thread-8 :604064 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_231147-gkn68kcy/files/config.yaml
|
37 |
+
2021-07-14 23:12:19,470 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: stop_status
|
38 |
+
2021-07-14 23:12:19,471 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: stop_status
|
39 |
+
2021-07-14 23:12:34,607 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: stop_status
|
40 |
+
2021-07-14 23:12:34,607 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: stop_status
|
41 |
+
2021-07-14 23:12:47,304 DEBUG SenderThread:604064 [sender.py:send():179] send: stats
|
42 |
+
2021-07-14 23:12:49,759 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: stop_status
|
43 |
+
2021-07-14 23:12:49,760 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: stop_status
|
44 |
+
2021-07-14 23:13:04,899 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: stop_status
|
45 |
+
2021-07-14 23:13:04,899 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: stop_status
|
46 |
+
2021-07-14 23:13:17,375 DEBUG SenderThread:604064 [sender.py:send():179] send: stats
|
47 |
+
2021-07-14 23:13:20,032 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: stop_status
|
48 |
+
2021-07-14 23:13:20,032 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: stop_status
|
49 |
+
2021-07-14 23:13:35,167 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: stop_status
|
50 |
+
2021-07-14 23:13:35,168 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: stop_status
|
51 |
+
2021-07-14 23:13:47,450 DEBUG SenderThread:604064 [sender.py:send():179] send: stats
|
52 |
+
2021-07-14 23:13:50,301 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: stop_status
|
53 |
+
2021-07-14 23:13:50,301 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: stop_status
|
54 |
+
2021-07-14 23:14:05,433 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: stop_status
|
55 |
+
2021-07-14 23:14:05,434 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: stop_status
|
56 |
+
2021-07-14 23:14:17,527 DEBUG SenderThread:604064 [sender.py:send():179] send: stats
|
57 |
+
2021-07-14 23:14:20,564 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: stop_status
|
58 |
+
2021-07-14 23:14:20,564 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: stop_status
|
59 |
+
2021-07-14 23:14:35,694 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: stop_status
|
60 |
+
2021-07-14 23:14:35,695 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: stop_status
|
61 |
+
2021-07-14 23:14:47,605 DEBUG SenderThread:604064 [sender.py:send():179] send: stats
|
62 |
+
2021-07-14 23:14:50,827 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: stop_status
|
63 |
+
2021-07-14 23:14:50,827 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: stop_status
|
64 |
+
2021-07-14 23:15:05,965 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: stop_status
|
65 |
+
2021-07-14 23:15:05,965 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: stop_status
|
66 |
+
2021-07-14 23:15:17,682 DEBUG SenderThread:604064 [sender.py:send():179] send: stats
|
67 |
+
2021-07-14 23:15:21,099 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: stop_status
|
68 |
+
2021-07-14 23:15:21,100 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: stop_status
|
69 |
+
2021-07-14 23:15:36,236 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: stop_status
|
70 |
+
2021-07-14 23:15:36,237 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: stop_status
|
71 |
+
2021-07-14 23:15:47,752 DEBUG SenderThread:604064 [sender.py:send():179] send: stats
|
72 |
+
2021-07-14 23:15:51,383 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: stop_status
|
73 |
+
2021-07-14 23:15:51,384 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: stop_status
|
74 |
+
2021-07-14 23:16:06,514 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: stop_status
|
75 |
+
2021-07-14 23:16:06,515 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: stop_status
|
76 |
+
2021-07-14 23:16:17,832 DEBUG SenderThread:604064 [sender.py:send():179] send: stats
|
77 |
+
2021-07-14 23:16:21,647 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: stop_status
|
78 |
+
2021-07-14 23:16:21,647 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: stop_status
|
79 |
+
2021-07-14 23:16:36,777 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: stop_status
|
80 |
+
2021-07-14 23:16:36,778 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: stop_status
|
81 |
+
2021-07-14 23:16:47,906 DEBUG SenderThread:604064 [sender.py:send():179] send: stats
|
82 |
+
2021-07-14 23:16:51,911 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: stop_status
|
83 |
+
2021-07-14 23:16:51,911 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: stop_status
|
84 |
+
2021-07-14 23:17:07,045 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: stop_status
|
85 |
+
2021-07-14 23:17:07,045 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: stop_status
|
86 |
+
2021-07-14 23:17:17,984 DEBUG SenderThread:604064 [sender.py:send():179] send: stats
|
87 |
+
2021-07-14 23:17:22,180 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: stop_status
|
88 |
+
2021-07-14 23:17:22,180 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: stop_status
|
89 |
+
2021-07-14 23:17:37,312 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: stop_status
|
90 |
+
2021-07-14 23:17:37,313 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: stop_status
|
91 |
+
2021-07-14 23:17:48,061 DEBUG SenderThread:604064 [sender.py:send():179] send: stats
|
92 |
+
2021-07-14 23:17:52,447 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: stop_status
|
93 |
+
2021-07-14 23:17:52,447 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: stop_status
|
94 |
+
2021-07-14 23:18:07,584 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: stop_status
|
95 |
+
2021-07-14 23:18:07,584 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: stop_status
|
96 |
+
2021-07-14 23:18:18,134 DEBUG SenderThread:604064 [sender.py:send():179] send: stats
|
97 |
+
2021-07-14 23:18:22,718 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: stop_status
|
98 |
+
2021-07-14 23:18:22,718 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: stop_status
|
99 |
+
2021-07-14 23:18:37,852 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: stop_status
|
100 |
+
2021-07-14 23:18:37,852 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: stop_status
|
101 |
+
2021-07-14 23:18:48,202 DEBUG SenderThread:604064 [sender.py:send():179] send: stats
|
102 |
+
2021-07-14 23:18:52,983 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: stop_status
|
103 |
+
2021-07-14 23:18:52,984 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: stop_status
|
104 |
+
2021-07-14 23:19:08,118 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: stop_status
|
105 |
+
2021-07-14 23:19:08,118 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: stop_status
|
106 |
+
2021-07-14 23:19:18,280 DEBUG SenderThread:604064 [sender.py:send():179] send: stats
|
107 |
+
2021-07-14 23:19:23,249 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: stop_status
|
108 |
+
2021-07-14 23:19:23,249 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: stop_status
|
109 |
+
2021-07-14 23:19:38,406 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: stop_status
|
110 |
+
2021-07-14 23:19:38,407 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: stop_status
|
111 |
+
2021-07-14 23:19:48,357 DEBUG SenderThread:604064 [sender.py:send():179] send: stats
|
112 |
+
2021-07-14 23:19:53,538 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: stop_status
|
113 |
+
2021-07-14 23:19:53,539 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: stop_status
|
114 |
+
2021-07-14 23:20:08,668 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: stop_status
|
115 |
+
2021-07-14 23:20:08,668 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: stop_status
|
116 |
+
2021-07-14 23:20:18,431 DEBUG SenderThread:604064 [sender.py:send():179] send: stats
|
117 |
+
2021-07-14 23:20:23,808 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: stop_status
|
118 |
+
2021-07-14 23:20:23,809 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: stop_status
|
119 |
+
2021-07-14 23:20:38,943 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: stop_status
|
120 |
+
2021-07-14 23:20:38,943 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: stop_status
|
121 |
+
2021-07-14 23:20:48,507 DEBUG SenderThread:604064 [sender.py:send():179] send: stats
|
122 |
+
2021-07-14 23:20:54,079 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: stop_status
|
123 |
+
2021-07-14 23:20:54,079 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: stop_status
|
124 |
+
2021-07-14 23:21:09,222 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: stop_status
|
125 |
+
2021-07-14 23:21:09,223 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: stop_status
|
126 |
+
2021-07-14 23:21:18,586 DEBUG SenderThread:604064 [sender.py:send():179] send: stats
|
127 |
+
2021-07-14 23:21:24,353 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: stop_status
|
128 |
+
2021-07-14 23:21:24,354 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: stop_status
|
129 |
+
2021-07-14 23:21:39,483 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: stop_status
|
130 |
+
2021-07-14 23:21:39,484 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: stop_status
|
131 |
+
2021-07-14 23:21:48,663 DEBUG SenderThread:604064 [sender.py:send():179] send: stats
|
132 |
+
2021-07-14 23:21:55,394 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: stop_status
|
133 |
+
2021-07-14 23:21:55,394 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: stop_status
|
134 |
+
2021-07-14 23:22:10,527 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: stop_status
|
135 |
+
2021-07-14 23:22:10,528 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: stop_status
|
136 |
+
2021-07-14 23:22:18,741 DEBUG SenderThread:604064 [sender.py:send():179] send: stats
|
137 |
+
2021-07-14 23:22:25,659 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: stop_status
|
138 |
+
2021-07-14 23:22:25,659 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: stop_status
|
139 |
+
2021-07-14 23:22:40,790 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: stop_status
|
140 |
+
2021-07-14 23:22:40,790 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: stop_status
|
141 |
+
2021-07-14 23:22:48,820 DEBUG SenderThread:604064 [sender.py:send():179] send: stats
|
142 |
+
2021-07-14 23:22:55,922 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: stop_status
|
143 |
+
2021-07-14 23:22:55,923 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: stop_status
|
144 |
+
2021-07-14 23:23:11,058 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: stop_status
|
145 |
+
2021-07-14 23:23:11,059 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: stop_status
|
146 |
+
2021-07-14 23:23:18,897 DEBUG SenderThread:604064 [sender.py:send():179] send: stats
|
147 |
+
2021-07-14 23:23:26,197 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: stop_status
|
148 |
+
2021-07-14 23:23:26,198 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: stop_status
|
149 |
+
2021-07-14 23:23:41,329 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: stop_status
|
150 |
+
2021-07-14 23:23:41,329 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: stop_status
|
151 |
+
2021-07-14 23:23:48,974 DEBUG SenderThread:604064 [sender.py:send():179] send: stats
|
152 |
+
2021-07-14 23:23:56,463 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: stop_status
|
153 |
+
2021-07-14 23:23:56,463 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: stop_status
|
154 |
+
2021-07-14 23:24:11,593 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: stop_status
|
155 |
+
2021-07-14 23:24:11,593 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: stop_status
|
156 |
+
2021-07-14 23:24:19,051 DEBUG SenderThread:604064 [sender.py:send():179] send: stats
|
157 |
+
2021-07-14 23:24:26,724 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: stop_status
|
158 |
+
2021-07-14 23:24:26,724 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: stop_status
|
159 |
+
2021-07-14 23:24:41,858 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: stop_status
|
160 |
+
2021-07-14 23:24:41,858 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: stop_status
|
161 |
+
2021-07-14 23:24:49,130 DEBUG SenderThread:604064 [sender.py:send():179] send: stats
|
162 |
+
2021-07-14 23:24:56,991 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: stop_status
|
163 |
+
2021-07-14 23:24:56,991 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: stop_status
|
164 |
+
2021-07-14 23:25:12,121 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: stop_status
|
165 |
+
2021-07-14 23:25:12,122 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: stop_status
|
166 |
+
2021-07-14 23:25:19,207 DEBUG SenderThread:604064 [sender.py:send():179] send: stats
|
167 |
+
2021-07-14 23:25:27,253 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: stop_status
|
168 |
+
2021-07-14 23:25:27,254 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: stop_status
|
169 |
+
2021-07-14 23:25:42,385 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: stop_status
|
170 |
+
2021-07-14 23:25:42,386 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: stop_status
|
171 |
+
2021-07-14 23:25:49,284 DEBUG SenderThread:604064 [sender.py:send():179] send: stats
|
172 |
+
2021-07-14 23:25:57,527 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: stop_status
|
173 |
+
2021-07-14 23:25:57,527 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: stop_status
|
174 |
+
2021-07-14 23:26:06,351 INFO Thread-8 :604064 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_231147-gkn68kcy/files/output.log
|
175 |
+
2021-07-14 23:26:07,081 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: poll_exit
|
176 |
+
2021-07-14 23:26:07,082 DEBUG SenderThread:604064 [sender.py:send():179] send: telemetry
|
177 |
+
2021-07-14 23:26:07,082 DEBUG SenderThread:604064 [sender.py:send():179] send: exit
|
178 |
+
2021-07-14 23:26:07,082 INFO SenderThread:604064 [sender.py:send_exit():287] handling exit code: 1
|
179 |
+
2021-07-14 23:26:07,083 INFO SenderThread:604064 [sender.py:send_exit():295] send defer
|
180 |
+
2021-07-14 23:26:07,084 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: poll_exit
|
181 |
+
2021-07-14 23:26:07,084 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: defer
|
182 |
+
2021-07-14 23:26:07,085 INFO HandlerThread:604064 [handler.py:handle_request_defer():141] handle defer: 0
|
183 |
+
2021-07-14 23:26:07,085 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: defer
|
184 |
+
2021-07-14 23:26:07,085 INFO SenderThread:604064 [sender.py:send_request_defer():304] handle sender defer: 0
|
185 |
+
2021-07-14 23:26:07,085 INFO SenderThread:604064 [sender.py:transition_state():308] send defer: 1
|
186 |
+
2021-07-14 23:26:07,085 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: defer
|
187 |
+
2021-07-14 23:26:07,085 INFO HandlerThread:604064 [handler.py:handle_request_defer():141] handle defer: 1
|
188 |
+
2021-07-14 23:26:07,130 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: defer
|
189 |
+
2021-07-14 23:26:07,130 INFO SenderThread:604064 [sender.py:send_request_defer():304] handle sender defer: 1
|
190 |
+
2021-07-14 23:26:07,130 INFO SenderThread:604064 [sender.py:transition_state():308] send defer: 2
|
191 |
+
2021-07-14 23:26:07,131 DEBUG SenderThread:604064 [sender.py:send():179] send: stats
|
192 |
+
2021-07-14 23:26:07,131 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: defer
|
193 |
+
2021-07-14 23:26:07,131 INFO HandlerThread:604064 [handler.py:handle_request_defer():141] handle defer: 2
|
194 |
+
2021-07-14 23:26:07,132 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: defer
|
195 |
+
2021-07-14 23:26:07,132 INFO SenderThread:604064 [sender.py:send_request_defer():304] handle sender defer: 2
|
196 |
+
2021-07-14 23:26:07,132 INFO SenderThread:604064 [sender.py:transition_state():308] send defer: 3
|
197 |
+
2021-07-14 23:26:07,132 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: defer
|
198 |
+
2021-07-14 23:26:07,132 INFO HandlerThread:604064 [handler.py:handle_request_defer():141] handle defer: 3
|
199 |
+
2021-07-14 23:26:07,132 DEBUG SenderThread:604064 [sender.py:send():179] send: summary
|
200 |
+
2021-07-14 23:26:07,133 INFO SenderThread:604064 [sender.py:_save_file():841] saving file wandb-summary.json with policy end
|
201 |
+
2021-07-14 23:26:07,133 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: defer
|
202 |
+
2021-07-14 23:26:07,133 INFO SenderThread:604064 [sender.py:send_request_defer():304] handle sender defer: 3
|
203 |
+
2021-07-14 23:26:07,133 INFO SenderThread:604064 [sender.py:transition_state():308] send defer: 4
|
204 |
+
2021-07-14 23:26:07,134 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: defer
|
205 |
+
2021-07-14 23:26:07,134 INFO HandlerThread:604064 [handler.py:handle_request_defer():141] handle defer: 4
|
206 |
+
2021-07-14 23:26:07,134 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: defer
|
207 |
+
2021-07-14 23:26:07,134 INFO SenderThread:604064 [sender.py:send_request_defer():304] handle sender defer: 4
|
208 |
+
2021-07-14 23:26:07,228 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: poll_exit
|
209 |
+
2021-07-14 23:26:07,305 INFO SenderThread:604064 [sender.py:transition_state():308] send defer: 5
|
210 |
+
2021-07-14 23:26:07,305 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: poll_exit
|
211 |
+
2021-07-14 23:26:07,306 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: defer
|
212 |
+
2021-07-14 23:26:07,306 INFO HandlerThread:604064 [handler.py:handle_request_defer():141] handle defer: 5
|
213 |
+
2021-07-14 23:26:07,306 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: defer
|
214 |
+
2021-07-14 23:26:07,306 INFO SenderThread:604064 [sender.py:send_request_defer():304] handle sender defer: 5
|
215 |
+
2021-07-14 23:26:07,306 INFO SenderThread:604064 [dir_watcher.py:finish():282] shutting down directory watcher
|
216 |
+
2021-07-14 23:26:07,351 INFO Thread-8 :604064 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_231147-gkn68kcy/files/config.yaml
|
217 |
+
2021-07-14 23:26:07,351 INFO SenderThread:604064 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_231147-gkn68kcy/files/output.log
|
218 |
+
2021-07-14 23:26:07,352 INFO SenderThread:604064 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_231147-gkn68kcy/files/wandb-summary.json
|
219 |
+
2021-07-14 23:26:07,352 INFO SenderThread:604064 [dir_watcher.py:finish():312] scan: /home/dat/pino-roberta-base/wandb/run-20210714_231147-gkn68kcy/files
|
220 |
+
2021-07-14 23:26:07,352 INFO SenderThread:604064 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210714_231147-gkn68kcy/files/requirements.txt requirements.txt
|
221 |
+
2021-07-14 23:26:07,352 INFO SenderThread:604064 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210714_231147-gkn68kcy/files/output.log output.log
|
222 |
+
2021-07-14 23:26:07,352 INFO SenderThread:604064 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210714_231147-gkn68kcy/files/wandb-metadata.json wandb-metadata.json
|
223 |
+
2021-07-14 23:26:07,352 INFO SenderThread:604064 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210714_231147-gkn68kcy/files/config.yaml config.yaml
|
224 |
+
2021-07-14 23:26:07,356 INFO SenderThread:604064 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210714_231147-gkn68kcy/files/wandb-summary.json wandb-summary.json
|
225 |
+
2021-07-14 23:26:07,359 INFO SenderThread:604064 [sender.py:transition_state():308] send defer: 6
|
226 |
+
2021-07-14 23:26:07,360 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: defer
|
227 |
+
2021-07-14 23:26:07,360 INFO HandlerThread:604064 [handler.py:handle_request_defer():141] handle defer: 6
|
228 |
+
2021-07-14 23:26:07,360 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: defer
|
229 |
+
2021-07-14 23:26:07,361 INFO SenderThread:604064 [sender.py:send_request_defer():304] handle sender defer: 6
|
230 |
+
2021-07-14 23:26:07,361 INFO SenderThread:604064 [file_pusher.py:finish():177] shutting down file pusher
|
231 |
+
2021-07-14 23:26:07,407 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: poll_exit
|
232 |
+
2021-07-14 23:26:07,408 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: poll_exit
|
233 |
+
2021-07-14 23:26:07,513 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: poll_exit
|
234 |
+
2021-07-14 23:26:07,514 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: poll_exit
|
235 |
+
2021-07-14 23:26:07,616 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: poll_exit
|
236 |
+
2021-07-14 23:26:07,616 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: poll_exit
|
237 |
+
2021-07-14 23:26:07,718 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: poll_exit
|
238 |
+
2021-07-14 23:26:07,718 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: poll_exit
|
239 |
+
2021-07-14 23:26:07,820 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: poll_exit
|
240 |
+
2021-07-14 23:26:07,820 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: poll_exit
|
241 |
+
2021-07-14 23:26:07,840 INFO Thread-14 :604064 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210714_231147-gkn68kcy/files/config.yaml
|
242 |
+
2021-07-14 23:26:07,841 INFO Thread-13 :604064 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210714_231147-gkn68kcy/files/output.log
|
243 |
+
2021-07-14 23:26:07,874 INFO Thread-12 :604064 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210714_231147-gkn68kcy/files/requirements.txt
|
244 |
+
2021-07-14 23:26:07,875 INFO Thread-15 :604064 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210714_231147-gkn68kcy/files/wandb-summary.json
|
245 |
+
2021-07-14 23:26:07,922 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: poll_exit
|
246 |
+
2021-07-14 23:26:07,922 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: poll_exit
|
247 |
+
2021-07-14 23:26:08,024 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: poll_exit
|
248 |
+
2021-07-14 23:26:08,024 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: poll_exit
|
249 |
+
2021-07-14 23:26:08,076 INFO Thread-7 :604064 [sender.py:transition_state():308] send defer: 7
|
250 |
+
2021-07-14 23:26:08,077 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: defer
|
251 |
+
2021-07-14 23:26:08,077 INFO HandlerThread:604064 [handler.py:handle_request_defer():141] handle defer: 7
|
252 |
+
2021-07-14 23:26:08,077 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: defer
|
253 |
+
2021-07-14 23:26:08,077 INFO SenderThread:604064 [sender.py:send_request_defer():304] handle sender defer: 7
|
254 |
+
2021-07-14 23:26:08,126 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: poll_exit
|
255 |
+
2021-07-14 23:26:08,360 INFO SenderThread:604064 [sender.py:transition_state():308] send defer: 8
|
256 |
+
2021-07-14 23:26:08,360 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: poll_exit
|
257 |
+
2021-07-14 23:26:08,361 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: defer
|
258 |
+
2021-07-14 23:26:08,361 INFO HandlerThread:604064 [handler.py:handle_request_defer():141] handle defer: 8
|
259 |
+
2021-07-14 23:26:08,361 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: defer
|
260 |
+
2021-07-14 23:26:08,361 INFO SenderThread:604064 [sender.py:send_request_defer():304] handle sender defer: 8
|
261 |
+
2021-07-14 23:26:08,361 INFO SenderThread:604064 [sender.py:transition_state():308] send defer: 9
|
262 |
+
2021-07-14 23:26:08,362 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: defer
|
263 |
+
2021-07-14 23:26:08,362 INFO HandlerThread:604064 [handler.py:handle_request_defer():141] handle defer: 9
|
264 |
+
2021-07-14 23:26:08,362 DEBUG SenderThread:604064 [sender.py:send():179] send: final
|
265 |
+
2021-07-14 23:26:08,362 DEBUG SenderThread:604064 [sender.py:send():179] send: footer
|
266 |
+
2021-07-14 23:26:08,362 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: defer
|
267 |
+
2021-07-14 23:26:08,362 INFO SenderThread:604064 [sender.py:send_request_defer():304] handle sender defer: 9
|
268 |
+
2021-07-14 23:26:08,462 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: poll_exit
|
269 |
+
2021-07-14 23:26:08,462 DEBUG SenderThread:604064 [sender.py:send_request():193] send_request: poll_exit
|
270 |
+
2021-07-14 23:26:08,463 INFO SenderThread:604064 [file_pusher.py:join():182] waiting for file pusher
|
271 |
+
2021-07-14 23:26:08,464 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: get_summary
|
272 |
+
2021-07-14 23:26:08,465 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: sampled_history
|
273 |
+
2021-07-14 23:26:08,465 DEBUG HandlerThread:604064 [handler.py:handle_request():124] handle_request: shutdown
|
274 |
+
2021-07-14 23:26:08,465 INFO HandlerThread:604064 [handler.py:finish():638] shutting down handler
|
275 |
+
2021-07-14 23:26:09,363 INFO WriterThread:604064 [datastore.py:close():288] close: /home/dat/pino-roberta-base/wandb/run-20210714_231147-gkn68kcy/run-gkn68kcy.wandb
|
276 |
+
2021-07-14 23:26:09,463 INFO SenderThread:604064 [sender.py:finish():945] shutting down sender
|
277 |
+
2021-07-14 23:26:09,463 INFO SenderThread:604064 [file_pusher.py:finish():177] shutting down file pusher
|
278 |
+
2021-07-14 23:26:09,463 INFO SenderThread:604064 [file_pusher.py:join():182] waiting for file pusher
|
279 |
+
2021-07-14 23:26:09,466 INFO MainThread:604064 [internal.py:handle_exit():78] Internal process exited
|
wandb/run-20210714_231147-gkn68kcy/logs/debug.log
ADDED
@@ -0,0 +1,119 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
2021-07-14 23:11:47,119 INFO MainThread:602807 [wandb_setup.py:_flush():69] setting env: {}
|
2 |
+
2021-07-14 23:11:47,119 INFO MainThread:602807 [wandb_setup.py:_flush():69] setting login settings: {}
|
3 |
+
2021-07-14 23:11:47,119 INFO MainThread:602807 [wandb_init.py:_log_setup():337] Logging user logs to /home/dat/pino-roberta-base/wandb/run-20210714_231147-gkn68kcy/logs/debug.log
|
4 |
+
2021-07-14 23:11:47,119 INFO MainThread:602807 [wandb_init.py:_log_setup():338] Logging internal logs to /home/dat/pino-roberta-base/wandb/run-20210714_231147-gkn68kcy/logs/debug-internal.log
|
5 |
+
2021-07-14 23:11:47,119 INFO MainThread:602807 [wandb_init.py:init():370] calling init triggers
|
6 |
+
2021-07-14 23:11:47,119 INFO MainThread:602807 [wandb_init.py:init():375] wandb.init called with sweep_config: {}
|
7 |
+
config: {}
|
8 |
+
2021-07-14 23:11:47,119 INFO MainThread:602807 [wandb_init.py:init():419] starting backend
|
9 |
+
2021-07-14 23:11:47,120 INFO MainThread:602807 [backend.py:_multiprocessing_setup():70] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
|
10 |
+
2021-07-14 23:11:47,167 INFO MainThread:602807 [backend.py:ensure_launched():135] starting backend process...
|
11 |
+
2021-07-14 23:11:47,212 INFO MainThread:602807 [backend.py:ensure_launched():139] started backend process with pid: 604064
|
12 |
+
2021-07-14 23:11:47,214 INFO MainThread:602807 [wandb_init.py:init():424] backend started and connected
|
13 |
+
2021-07-14 23:11:47,217 INFO MainThread:602807 [wandb_init.py:init():472] updated telemetry
|
14 |
+
2021-07-14 23:11:47,218 INFO MainThread:602807 [wandb_init.py:init():491] communicating current version
|
15 |
+
2021-07-14 23:11:47,860 INFO MainThread:602807 [wandb_init.py:init():496] got version response
|
16 |
+
2021-07-14 23:11:47,860 INFO MainThread:602807 [wandb_init.py:init():504] communicating run to backend with 30 second timeout
|
17 |
+
2021-07-14 23:11:48,044 INFO MainThread:602807 [wandb_init.py:init():529] starting run threads in backend
|
18 |
+
2021-07-14 23:11:49,195 INFO MainThread:602807 [wandb_run.py:_console_start():1623] atexit reg
|
19 |
+
2021-07-14 23:11:49,196 INFO MainThread:602807 [wandb_run.py:_redirect():1497] redirect: SettingsConsole.REDIRECT
|
20 |
+
2021-07-14 23:11:49,196 INFO MainThread:602807 [wandb_run.py:_redirect():1502] Redirecting console.
|
21 |
+
2021-07-14 23:11:49,199 INFO MainThread:602807 [wandb_run.py:_redirect():1558] Redirects installed.
|
22 |
+
2021-07-14 23:11:49,199 INFO MainThread:602807 [wandb_init.py:init():554] run started, returning control to user process
|
23 |
+
2021-07-14 23:11:49,207 INFO MainThread:602807 [wandb_run.py:_config_callback():872] config_cb None None {'output_dir': './', 'overwrite_output_dir': True, 'do_train': False, 'do_eval': False, 'do_predict': False, 'evaluation_strategy': 'IntervalStrategy.NO', 'prediction_loss_only': False, 'per_device_train_batch_size': 1, 'per_device_eval_batch_size': 1, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'learning_rate': 5e-05, 'weight_decay': 0.0095, 'adam_beta1': 0.9, 'adam_beta2': 0.98, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 5.0, 'max_steps': -1, 'lr_scheduler_type': 'SchedulerType.LINEAR', 'warmup_ratio': 0.0, 'warmup_steps': 10000, 'log_level': -1, 'log_level_replica': -1, 'log_on_each_node': True, 'logging_dir': './runs/Jul14_23-11-40_t1v-n-f5c06ea1-w-0', 'logging_strategy': 'IntervalStrategy.STEPS', 'logging_first_step': False, 'logging_steps': 500, 'save_strategy': 'IntervalStrategy.STEPS', 'save_steps': 20000, 'save_total_limit': 5, 'save_on_each_node': False, 'no_cuda': False, 'seed': 42, 'fp16': False, 'fp16_opt_level': 'O1', 'fp16_backend': 'auto', 'fp16_full_eval': False, 'local_rank': -1, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 20000, 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': './', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'sharded_ddp': [], 'deepspeed': None, 'label_smoothing_factor': 0.0, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'dataloader_pin_memory': True, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'push_to_hub_model_id': '', 'push_to_hub_organization': None, 'push_to_hub_token': None, 'mp_parameters': ''}
|
24 |
+
2021-07-14 23:11:49,209 INFO MainThread:602807 [wandb_run.py:_config_callback():872] config_cb None None {'model_name_or_path': None, 'model_type': 'big_bird', 'config_name': './', 'tokenizer_name': './', 'cache_dir': None, 'use_fast_tokenizer': True, 'dtype': 'float32', 'save_optimizer': True}
|
25 |
+
2021-07-14 23:11:49,210 INFO MainThread:602807 [wandb_run.py:_config_callback():872] config_cb None None {'dataset_name': None, 'dataset_config_name': None, 'train_ref_file': None, 'validation_ref_file': None, 'overwrite_cache': False, 'validation_split_percentage': 5, 'max_seq_length': 4096, 'preprocessing_num_workers': 96, 'mlm_probability': 0.15, 'pad_to_max_length': False, 'line_by_line': False, 'max_eval_samples': 2000}
|
26 |
+
2021-07-14 23:26:04,706 INFO MainThread:602807 [wandb_run.py:_atexit_cleanup():1593] got exitcode: 1
|
27 |
+
2021-07-14 23:26:04,707 INFO MainThread:602807 [wandb_run.py:_restore():1565] restore
|
28 |
+
2021-07-14 23:26:07,085 INFO MainThread:602807 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
|
29 |
+
wandb_count: 1
|
30 |
+
}
|
31 |
+
pusher_stats {
|
32 |
+
uploaded_bytes: 1405
|
33 |
+
total_bytes: 1405
|
34 |
+
}
|
35 |
+
|
36 |
+
2021-07-14 23:26:07,306 INFO MainThread:602807 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
|
37 |
+
wandb_count: 1
|
38 |
+
}
|
39 |
+
pusher_stats {
|
40 |
+
uploaded_bytes: 1405
|
41 |
+
total_bytes: 1405
|
42 |
+
}
|
43 |
+
|
44 |
+
2021-07-14 23:26:07,408 INFO MainThread:602807 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
|
45 |
+
wandb_count: 5
|
46 |
+
}
|
47 |
+
pusher_stats {
|
48 |
+
uploaded_bytes: 1405
|
49 |
+
total_bytes: 10798
|
50 |
+
}
|
51 |
+
|
52 |
+
2021-07-14 23:26:07,515 INFO MainThread:602807 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
|
53 |
+
wandb_count: 5
|
54 |
+
}
|
55 |
+
pusher_stats {
|
56 |
+
uploaded_bytes: 1405
|
57 |
+
total_bytes: 10798
|
58 |
+
}
|
59 |
+
|
60 |
+
2021-07-14 23:26:07,617 INFO MainThread:602807 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
|
61 |
+
wandb_count: 5
|
62 |
+
}
|
63 |
+
pusher_stats {
|
64 |
+
uploaded_bytes: 10798
|
65 |
+
total_bytes: 10798
|
66 |
+
}
|
67 |
+
|
68 |
+
2021-07-14 23:26:07,719 INFO MainThread:602807 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
|
69 |
+
wandb_count: 5
|
70 |
+
}
|
71 |
+
pusher_stats {
|
72 |
+
uploaded_bytes: 10798
|
73 |
+
total_bytes: 10798
|
74 |
+
}
|
75 |
+
|
76 |
+
2021-07-14 23:26:07,821 INFO MainThread:602807 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
|
77 |
+
wandb_count: 5
|
78 |
+
}
|
79 |
+
pusher_stats {
|
80 |
+
uploaded_bytes: 10798
|
81 |
+
total_bytes: 10798
|
82 |
+
}
|
83 |
+
|
84 |
+
2021-07-14 23:26:07,923 INFO MainThread:602807 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
|
85 |
+
wandb_count: 5
|
86 |
+
}
|
87 |
+
pusher_stats {
|
88 |
+
uploaded_bytes: 10798
|
89 |
+
total_bytes: 10798
|
90 |
+
}
|
91 |
+
|
92 |
+
2021-07-14 23:26:08,025 INFO MainThread:602807 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
|
93 |
+
wandb_count: 5
|
94 |
+
}
|
95 |
+
pusher_stats {
|
96 |
+
uploaded_bytes: 10798
|
97 |
+
total_bytes: 10798
|
98 |
+
}
|
99 |
+
|
100 |
+
2021-07-14 23:26:08,361 INFO MainThread:602807 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
|
101 |
+
wandb_count: 5
|
102 |
+
}
|
103 |
+
pusher_stats {
|
104 |
+
uploaded_bytes: 10798
|
105 |
+
total_bytes: 10798
|
106 |
+
}
|
107 |
+
|
108 |
+
2021-07-14 23:26:08,463 INFO MainThread:602807 [wandb_run.py:_wait_for_finish():1715] got exit ret: done: true
|
109 |
+
exit_result {
|
110 |
+
}
|
111 |
+
file_counts {
|
112 |
+
wandb_count: 5
|
113 |
+
}
|
114 |
+
pusher_stats {
|
115 |
+
uploaded_bytes: 10798
|
116 |
+
total_bytes: 10798
|
117 |
+
}
|
118 |
+
|
119 |
+
2021-07-14 23:26:09,799 INFO MainThread:602807 [wandb_run.py:_show_files():1937] logging synced files
|
wandb/run-20210714_231147-gkn68kcy/run-gkn68kcy.wandb
ADDED
Binary file (13.4 kB). View file
|
|
wandb/run-20210714_232703-1jijl27o/files/config.yaml
ADDED
@@ -0,0 +1,307 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
wandb_version: 1
|
2 |
+
|
3 |
+
_wandb:
|
4 |
+
desc: null
|
5 |
+
value:
|
6 |
+
cli_version: 0.10.33
|
7 |
+
framework: huggingface
|
8 |
+
huggingface_version: 4.9.0.dev0
|
9 |
+
is_jupyter_run: false
|
10 |
+
is_kaggle_kernel: false
|
11 |
+
python_version: 3.8.10
|
12 |
+
t:
|
13 |
+
1:
|
14 |
+
- 3
|
15 |
+
- 11
|
16 |
+
2:
|
17 |
+
- 3
|
18 |
+
- 11
|
19 |
+
4: 3.8.10
|
20 |
+
5: 0.10.33
|
21 |
+
6: 4.9.0.dev0
|
22 |
+
8:
|
23 |
+
- 5
|
24 |
+
adafactor:
|
25 |
+
desc: null
|
26 |
+
value: false
|
27 |
+
adam_beta1:
|
28 |
+
desc: null
|
29 |
+
value: 0.9
|
30 |
+
adam_beta2:
|
31 |
+
desc: null
|
32 |
+
value: 0.98
|
33 |
+
adam_epsilon:
|
34 |
+
desc: null
|
35 |
+
value: 1.0e-08
|
36 |
+
cache_dir:
|
37 |
+
desc: null
|
38 |
+
value: null
|
39 |
+
config_name:
|
40 |
+
desc: null
|
41 |
+
value: ./
|
42 |
+
dataloader_drop_last:
|
43 |
+
desc: null
|
44 |
+
value: false
|
45 |
+
dataloader_num_workers:
|
46 |
+
desc: null
|
47 |
+
value: 0
|
48 |
+
dataloader_pin_memory:
|
49 |
+
desc: null
|
50 |
+
value: true
|
51 |
+
dataset_config_name:
|
52 |
+
desc: null
|
53 |
+
value: null
|
54 |
+
dataset_name:
|
55 |
+
desc: null
|
56 |
+
value: null
|
57 |
+
ddp_find_unused_parameters:
|
58 |
+
desc: null
|
59 |
+
value: null
|
60 |
+
debug:
|
61 |
+
desc: null
|
62 |
+
value: []
|
63 |
+
deepspeed:
|
64 |
+
desc: null
|
65 |
+
value: null
|
66 |
+
disable_tqdm:
|
67 |
+
desc: null
|
68 |
+
value: false
|
69 |
+
do_eval:
|
70 |
+
desc: null
|
71 |
+
value: false
|
72 |
+
do_predict:
|
73 |
+
desc: null
|
74 |
+
value: false
|
75 |
+
do_train:
|
76 |
+
desc: null
|
77 |
+
value: false
|
78 |
+
dtype:
|
79 |
+
desc: null
|
80 |
+
value: float32
|
81 |
+
eval_accumulation_steps:
|
82 |
+
desc: null
|
83 |
+
value: null
|
84 |
+
eval_steps:
|
85 |
+
desc: null
|
86 |
+
value: 20000
|
87 |
+
evaluation_strategy:
|
88 |
+
desc: null
|
89 |
+
value: IntervalStrategy.NO
|
90 |
+
fp16:
|
91 |
+
desc: null
|
92 |
+
value: false
|
93 |
+
fp16_backend:
|
94 |
+
desc: null
|
95 |
+
value: auto
|
96 |
+
fp16_full_eval:
|
97 |
+
desc: null
|
98 |
+
value: false
|
99 |
+
fp16_opt_level:
|
100 |
+
desc: null
|
101 |
+
value: O1
|
102 |
+
gradient_accumulation_steps:
|
103 |
+
desc: null
|
104 |
+
value: 4
|
105 |
+
greater_is_better:
|
106 |
+
desc: null
|
107 |
+
value: null
|
108 |
+
group_by_length:
|
109 |
+
desc: null
|
110 |
+
value: false
|
111 |
+
ignore_data_skip:
|
112 |
+
desc: null
|
113 |
+
value: false
|
114 |
+
label_names:
|
115 |
+
desc: null
|
116 |
+
value: null
|
117 |
+
label_smoothing_factor:
|
118 |
+
desc: null
|
119 |
+
value: 0.0
|
120 |
+
learning_rate:
|
121 |
+
desc: null
|
122 |
+
value: 5.0e-05
|
123 |
+
length_column_name:
|
124 |
+
desc: null
|
125 |
+
value: length
|
126 |
+
line_by_line:
|
127 |
+
desc: null
|
128 |
+
value: false
|
129 |
+
load_best_model_at_end:
|
130 |
+
desc: null
|
131 |
+
value: false
|
132 |
+
local_rank:
|
133 |
+
desc: null
|
134 |
+
value: -1
|
135 |
+
log_level:
|
136 |
+
desc: null
|
137 |
+
value: -1
|
138 |
+
log_level_replica:
|
139 |
+
desc: null
|
140 |
+
value: -1
|
141 |
+
log_on_each_node:
|
142 |
+
desc: null
|
143 |
+
value: true
|
144 |
+
logging_dir:
|
145 |
+
desc: null
|
146 |
+
value: ./runs/Jul14_23-26-56_t1v-n-f5c06ea1-w-0
|
147 |
+
logging_first_step:
|
148 |
+
desc: null
|
149 |
+
value: false
|
150 |
+
logging_steps:
|
151 |
+
desc: null
|
152 |
+
value: 500
|
153 |
+
logging_strategy:
|
154 |
+
desc: null
|
155 |
+
value: IntervalStrategy.STEPS
|
156 |
+
lr_scheduler_type:
|
157 |
+
desc: null
|
158 |
+
value: SchedulerType.LINEAR
|
159 |
+
max_eval_samples:
|
160 |
+
desc: null
|
161 |
+
value: 2000
|
162 |
+
max_grad_norm:
|
163 |
+
desc: null
|
164 |
+
value: 1.0
|
165 |
+
max_seq_length:
|
166 |
+
desc: null
|
167 |
+
value: 4096
|
168 |
+
max_steps:
|
169 |
+
desc: null
|
170 |
+
value: -1
|
171 |
+
metric_for_best_model:
|
172 |
+
desc: null
|
173 |
+
value: null
|
174 |
+
mlm_probability:
|
175 |
+
desc: null
|
176 |
+
value: 0.15
|
177 |
+
model_name_or_path:
|
178 |
+
desc: null
|
179 |
+
value: null
|
180 |
+
model_type:
|
181 |
+
desc: null
|
182 |
+
value: big_bird
|
183 |
+
mp_parameters:
|
184 |
+
desc: null
|
185 |
+
value: ''
|
186 |
+
no_cuda:
|
187 |
+
desc: null
|
188 |
+
value: false
|
189 |
+
num_train_epochs:
|
190 |
+
desc: null
|
191 |
+
value: 5.0
|
192 |
+
output_dir:
|
193 |
+
desc: null
|
194 |
+
value: ./
|
195 |
+
overwrite_cache:
|
196 |
+
desc: null
|
197 |
+
value: false
|
198 |
+
overwrite_output_dir:
|
199 |
+
desc: null
|
200 |
+
value: true
|
201 |
+
pad_to_max_length:
|
202 |
+
desc: null
|
203 |
+
value: false
|
204 |
+
past_index:
|
205 |
+
desc: null
|
206 |
+
value: -1
|
207 |
+
per_device_eval_batch_size:
|
208 |
+
desc: null
|
209 |
+
value: 1
|
210 |
+
per_device_train_batch_size:
|
211 |
+
desc: null
|
212 |
+
value: 1
|
213 |
+
per_gpu_eval_batch_size:
|
214 |
+
desc: null
|
215 |
+
value: null
|
216 |
+
per_gpu_train_batch_size:
|
217 |
+
desc: null
|
218 |
+
value: null
|
219 |
+
prediction_loss_only:
|
220 |
+
desc: null
|
221 |
+
value: false
|
222 |
+
preprocessing_num_workers:
|
223 |
+
desc: null
|
224 |
+
value: 96
|
225 |
+
push_to_hub:
|
226 |
+
desc: null
|
227 |
+
value: true
|
228 |
+
push_to_hub_model_id:
|
229 |
+
desc: null
|
230 |
+
value: ''
|
231 |
+
push_to_hub_organization:
|
232 |
+
desc: null
|
233 |
+
value: null
|
234 |
+
push_to_hub_token:
|
235 |
+
desc: null
|
236 |
+
value: null
|
237 |
+
remove_unused_columns:
|
238 |
+
desc: null
|
239 |
+
value: true
|
240 |
+
report_to:
|
241 |
+
desc: null
|
242 |
+
value:
|
243 |
+
- tensorboard
|
244 |
+
- wandb
|
245 |
+
resume_from_checkpoint:
|
246 |
+
desc: null
|
247 |
+
value: null
|
248 |
+
run_name:
|
249 |
+
desc: null
|
250 |
+
value: ./
|
251 |
+
save_on_each_node:
|
252 |
+
desc: null
|
253 |
+
value: false
|
254 |
+
save_optimizer:
|
255 |
+
desc: null
|
256 |
+
value: true
|
257 |
+
save_steps:
|
258 |
+
desc: null
|
259 |
+
value: 20000
|
260 |
+
save_strategy:
|
261 |
+
desc: null
|
262 |
+
value: IntervalStrategy.STEPS
|
263 |
+
save_total_limit:
|
264 |
+
desc: null
|
265 |
+
value: 5
|
266 |
+
seed:
|
267 |
+
desc: null
|
268 |
+
value: 42
|
269 |
+
sharded_ddp:
|
270 |
+
desc: null
|
271 |
+
value: []
|
272 |
+
skip_memory_metrics:
|
273 |
+
desc: null
|
274 |
+
value: true
|
275 |
+
tokenizer_name:
|
276 |
+
desc: null
|
277 |
+
value: ./
|
278 |
+
tpu_metrics_debug:
|
279 |
+
desc: null
|
280 |
+
value: false
|
281 |
+
tpu_num_cores:
|
282 |
+
desc: null
|
283 |
+
value: null
|
284 |
+
train_ref_file:
|
285 |
+
desc: null
|
286 |
+
value: null
|
287 |
+
use_fast_tokenizer:
|
288 |
+
desc: null
|
289 |
+
value: true
|
290 |
+
use_legacy_prediction_loop:
|
291 |
+
desc: null
|
292 |
+
value: false
|
293 |
+
validation_ref_file:
|
294 |
+
desc: null
|
295 |
+
value: null
|
296 |
+
validation_split_percentage:
|
297 |
+
desc: null
|
298 |
+
value: 5
|
299 |
+
warmup_ratio:
|
300 |
+
desc: null
|
301 |
+
value: 0.0
|
302 |
+
warmup_steps:
|
303 |
+
desc: null
|
304 |
+
value: 10000
|
305 |
+
weight_decay:
|
306 |
+
desc: null
|
307 |
+
value: 0.0095
|
wandb/run-20210714_232703-1jijl27o/files/output.log
ADDED
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
/home/dat/pino/lib/python3.8/site-packages/jax/_src/numpy/lax_numpy.py:3132: UserWarning: Explicitly requested dtype <class 'jax._src.numpy.lax_numpy.int64'> requested in zeros is not available, and will be truncated to dtype int32. To enable more dtypes, set the jax_enable_x64 configuration option or the JAX_ENABLE_X64 shell environment variable. See https://github.com/google/jax#current-gotchas for more.
|
2 |
+
lax._check_user_dtype_supported(dtype, "zeros")
|
3 |
+
/home/dat/pino/lib/python3.8/site-packages/jax/lib/xla_bridge.py:386: UserWarning: jax.host_count has been renamed to jax.process_count. This alias will eventually be removed; please update your code.
|
4 |
+
warnings.warn(
|
5 |
+
/home/dat/pino/lib/python3.8/site-packages/jax/lib/xla_bridge.py:373: UserWarning: jax.host_id has been renamed to jax.process_index. This alias will eventually be removed; please update your code.
|
6 |
+
warnings.warn(
|
7 |
+
Epoch ... (1/5): 0%| | 0/5 [00:00<?, ?it/s]2021-07-14 23:40:46.044618: E external/org_tensorflow/tensorflow/compiler/xla/pjrt/pjrt_stream_executor_client.cc:2036] Execution of replica 0 failed: Resource exhausted: Attempting to allocate 17.0K. That was not possible. There are 48.0K free. Due to fragmentation, the largest contiguous region of free memory is 16.0K.; (0x0x0_HBM0)
|
8 |
+
Epoch ... (1/5): 0%| | 0/5 [13:26<?, ?it/s]
|
9 |
+
Traceback (most recent call last):
|
10 |
+
File "./run_mlm_flax.py", line 806, in <module>
|
11 |
+
train_batch_idx = generate_batch_splits(train_samples_idx, train_batch_size // grad_accum_steps)
|
12 |
+
File "./run_mlm_flax.py", line 263, in generate_batch_splits
|
13 |
+
batch_idx = np.split(samples_idx, sections_split)
|
14 |
+
File "<__array_function__ internals>", line 5, in split
|
15 |
+
File "/home/dat/pino/lib/python3.8/site-packages/numpy/lib/shape_base.py", line 874, in split
|
16 |
+
return array_split(ary, indices_or_sections, axis)
|
17 |
+
File "<__array_function__ internals>", line 5, in array_split
|
18 |
+
File "/home/dat/pino/lib/python3.8/site-packages/numpy/lib/shape_base.py", line 790, in array_split
|
19 |
+
sub_arys.append(_nx.swapaxes(sary[st:end], axis, 0))
|
20 |
+
File "/home/dat/pino/lib/python3.8/site-packages/jax/_src/numpy/lax_numpy.py", line 5009, in _rewriting_take
|
21 |
+
return _gather(arr, treedef, static_idx, dynamic_idx, indices_are_sorted,
|
22 |
+
File "/home/dat/pino/lib/python3.8/site-packages/jax/_src/numpy/lax_numpy.py", line 5028, in _gather
|
23 |
+
y = lax.gather(
|
24 |
+
File "/home/dat/pino/lib/python3.8/site-packages/jax/_src/lax/lax.py", line 984, in gather
|
25 |
+
return gather_p.bind(
|
26 |
+
File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 264, in bind
|
27 |
+
out = top_trace.process_primitive(self, tracers, params)
|
28 |
+
File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 603, in process_primitive
|
29 |
+
return primitive.impl(*tracers, **params)
|
30 |
+
File "/home/dat/pino/lib/python3.8/site-packages/jax/interpreters/xla.py", line 249, in apply_primitive
|
31 |
+
return compiled_fun(*args)
|
32 |
+
File "/home/dat/pino/lib/python3.8/site-packages/jax/interpreters/xla.py", line 365, in _execute_compiled_primitive
|
33 |
+
out_bufs = compiled.execute(input_bufs)
|
34 |
+
RuntimeError: Resource exhausted: Attempting to allocate 17.0K. That was not possible. There are 48.0K free. Due to fragmentation, the largest contiguous region of free memory is 16.0K.; (0x0x0_HBM0)
|
wandb/run-20210714_232703-1jijl27o/files/requirements.txt
ADDED
@@ -0,0 +1,94 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
absl-py==0.13.0
|
2 |
+
aiohttp==3.7.4.post0
|
3 |
+
astunparse==1.6.3
|
4 |
+
async-timeout==3.0.1
|
5 |
+
attrs==21.2.0
|
6 |
+
cachetools==4.2.2
|
7 |
+
certifi==2021.5.30
|
8 |
+
chardet==4.0.0
|
9 |
+
charset-normalizer==2.0.1
|
10 |
+
chex==0.0.8
|
11 |
+
click==8.0.1
|
12 |
+
configparser==5.0.2
|
13 |
+
cycler==0.10.0
|
14 |
+
datasets==1.9.1.dev0
|
15 |
+
dill==0.3.4
|
16 |
+
dm-tree==0.1.6
|
17 |
+
docker-pycreds==0.4.0
|
18 |
+
filelock==3.0.12
|
19 |
+
flatbuffers==1.12
|
20 |
+
flax==0.3.4
|
21 |
+
fsspec==2021.7.0
|
22 |
+
gast==0.4.0
|
23 |
+
gitdb==4.0.7
|
24 |
+
gitpython==3.1.18
|
25 |
+
google-auth-oauthlib==0.4.4
|
26 |
+
google-auth==1.32.1
|
27 |
+
google-pasta==0.2.0
|
28 |
+
grpcio==1.34.1
|
29 |
+
h5py==3.1.0
|
30 |
+
huggingface-hub==0.0.12
|
31 |
+
idna==3.2
|
32 |
+
install==1.3.4
|
33 |
+
jax==0.2.17
|
34 |
+
jaxlib==0.1.68
|
35 |
+
joblib==1.0.1
|
36 |
+
keras-nightly==2.5.0.dev2021032900
|
37 |
+
keras-preprocessing==1.1.2
|
38 |
+
kiwisolver==1.3.1
|
39 |
+
libtpu-nightly==0.1.dev20210615
|
40 |
+
markdown==3.3.4
|
41 |
+
matplotlib==3.4.2
|
42 |
+
msgpack==1.0.2
|
43 |
+
multidict==5.1.0
|
44 |
+
multiprocess==0.70.12.2
|
45 |
+
numpy==1.19.5
|
46 |
+
oauthlib==3.1.1
|
47 |
+
opt-einsum==3.3.0
|
48 |
+
optax==0.0.9
|
49 |
+
packaging==21.0
|
50 |
+
pandas==1.3.0
|
51 |
+
pathtools==0.1.2
|
52 |
+
pillow==8.3.1
|
53 |
+
pip==20.0.2
|
54 |
+
pkg-resources==0.0.0
|
55 |
+
promise==2.3
|
56 |
+
protobuf==3.17.3
|
57 |
+
psutil==5.8.0
|
58 |
+
pyarrow==4.0.1
|
59 |
+
pyasn1-modules==0.2.8
|
60 |
+
pyasn1==0.4.8
|
61 |
+
pyparsing==2.4.7
|
62 |
+
python-dateutil==2.8.1
|
63 |
+
pytz==2021.1
|
64 |
+
pyyaml==5.4.1
|
65 |
+
regex==2021.7.6
|
66 |
+
requests-oauthlib==1.3.0
|
67 |
+
requests==2.26.0
|
68 |
+
rsa==4.7.2
|
69 |
+
sacremoses==0.0.45
|
70 |
+
scipy==1.7.0
|
71 |
+
sentry-sdk==1.3.0
|
72 |
+
setuptools==44.0.0
|
73 |
+
shortuuid==1.0.1
|
74 |
+
six==1.15.0
|
75 |
+
smmap==4.0.0
|
76 |
+
subprocess32==3.5.4
|
77 |
+
tensorboard-data-server==0.6.1
|
78 |
+
tensorboard-plugin-wit==1.8.0
|
79 |
+
tensorboard==2.5.0
|
80 |
+
tensorflow-estimator==2.5.0
|
81 |
+
tensorflow==2.5.0
|
82 |
+
termcolor==1.1.0
|
83 |
+
tokenizers==0.10.3
|
84 |
+
toolz==0.11.1
|
85 |
+
tqdm==4.61.2
|
86 |
+
transformers==4.9.0.dev0
|
87 |
+
typing-extensions==3.7.4.3
|
88 |
+
urllib3==1.26.6
|
89 |
+
wandb==0.10.33
|
90 |
+
werkzeug==2.0.1
|
91 |
+
wheel==0.36.2
|
92 |
+
wrapt==1.12.1
|
93 |
+
xxhash==2.0.2
|
94 |
+
yarl==1.6.3
|
wandb/run-20210714_232703-1jijl27o/files/wandb-metadata.json
ADDED
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"os": "Linux-5.4.0-1043-gcp-x86_64-with-glibc2.29",
|
3 |
+
"python": "3.8.10",
|
4 |
+
"heartbeatAt": "2021-07-14T23:27:05.989594",
|
5 |
+
"startedAt": "2021-07-14T23:27:03.993085",
|
6 |
+
"docker": null,
|
7 |
+
"cpu_count": 96,
|
8 |
+
"cuda": null,
|
9 |
+
"args": [
|
10 |
+
"--push_to_hub",
|
11 |
+
"--output_dir=./",
|
12 |
+
"--model_type=big_bird",
|
13 |
+
"--config_name=./",
|
14 |
+
"--tokenizer_name=./",
|
15 |
+
"--max_seq_length=4096",
|
16 |
+
"--weight_decay=0.0095",
|
17 |
+
"--warmup_steps=10000",
|
18 |
+
"--overwrite_output_dir",
|
19 |
+
"--adam_beta1=0.9",
|
20 |
+
"--adam_beta2=0.98",
|
21 |
+
"--logging_steps=500",
|
22 |
+
"--eval_steps=20000",
|
23 |
+
"--num_train_epochs=5",
|
24 |
+
"--preprocessing_num_workers=96",
|
25 |
+
"--save_steps=20000",
|
26 |
+
"--learning_rate=5e-5",
|
27 |
+
"--per_device_train_batch_size=1",
|
28 |
+
"--per_device_eval_batch_size=1",
|
29 |
+
"--save_total_limit=5",
|
30 |
+
"--max_eval_samples=2000",
|
31 |
+
"--overwrite_cache",
|
32 |
+
"False",
|
33 |
+
"--gradient_accumulation_steps=4"
|
34 |
+
],
|
35 |
+
"state": "running",
|
36 |
+
"program": "./run_mlm_flax.py",
|
37 |
+
"codePath": "run_mlm_flax.py",
|
38 |
+
"git": {
|
39 |
+
"remote": "https://huggingface.co/flax-community/pino-roberta-base",
|
40 |
+
"commit": "9915204ae8501d6ae8425aa8f0b4b290cbc7e5b6"
|
41 |
+
},
|
42 |
+
"email": null,
|
43 |
+
"root": "/home/dat/pino-roberta-base",
|
44 |
+
"host": "t1v-n-f5c06ea1-w-0",
|
45 |
+
"username": "dat",
|
46 |
+
"executable": "/home/dat/pino/bin/python"
|
47 |
+
}
|
wandb/run-20210714_232703-1jijl27o/files/wandb-summary.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{}
|
wandb/run-20210714_232703-1jijl27o/logs/debug-internal.log
ADDED
@@ -0,0 +1,275 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
2021-07-14 23:27:04,669 INFO MainThread:606786 [internal.py:wandb_internal():88] W&B internal server running at pid: 606786, started at: 2021-07-14 23:27:04.669257
|
2 |
+
2021-07-14 23:27:04,671 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: check_version
|
3 |
+
2021-07-14 23:27:04,671 INFO WriterThread:606786 [datastore.py:open_for_write():80] open: /home/dat/pino-roberta-base/wandb/run-20210714_232703-1jijl27o/run-1jijl27o.wandb
|
4 |
+
2021-07-14 23:27:04,672 DEBUG SenderThread:606786 [sender.py:send():179] send: header
|
5 |
+
2021-07-14 23:27:04,672 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: check_version
|
6 |
+
2021-07-14 23:27:04,709 DEBUG SenderThread:606786 [sender.py:send():179] send: run
|
7 |
+
2021-07-14 23:27:04,879 INFO SenderThread:606786 [dir_watcher.py:__init__():168] watching files in: /home/dat/pino-roberta-base/wandb/run-20210714_232703-1jijl27o/files
|
8 |
+
2021-07-14 23:27:04,880 INFO SenderThread:606786 [sender.py:_start_run_threads():716] run started: 1jijl27o with start time 1626305224
|
9 |
+
2021-07-14 23:27:04,880 DEBUG SenderThread:606786 [sender.py:send():179] send: summary
|
10 |
+
2021-07-14 23:27:04,880 INFO SenderThread:606786 [sender.py:_save_file():841] saving file wandb-summary.json with policy end
|
11 |
+
2021-07-14 23:27:04,880 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: run_start
|
12 |
+
2021-07-14 23:27:05,883 INFO Thread-8 :606786 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210714_232703-1jijl27o/files/wandb-summary.json
|
13 |
+
2021-07-14 23:27:05,989 DEBUG HandlerThread:606786 [meta.py:__init__():39] meta init
|
14 |
+
2021-07-14 23:27:05,989 DEBUG HandlerThread:606786 [meta.py:__init__():53] meta init done
|
15 |
+
2021-07-14 23:27:05,989 DEBUG HandlerThread:606786 [meta.py:probe():210] probe
|
16 |
+
2021-07-14 23:27:05,990 DEBUG HandlerThread:606786 [meta.py:_setup_git():200] setup git
|
17 |
+
2021-07-14 23:27:06,020 DEBUG HandlerThread:606786 [meta.py:_setup_git():207] setup git done
|
18 |
+
2021-07-14 23:27:06,020 DEBUG HandlerThread:606786 [meta.py:_save_pip():57] save pip
|
19 |
+
2021-07-14 23:27:06,021 DEBUG HandlerThread:606786 [meta.py:_save_pip():71] save pip done
|
20 |
+
2021-07-14 23:27:06,021 DEBUG HandlerThread:606786 [meta.py:probe():252] probe done
|
21 |
+
2021-07-14 23:27:06,024 DEBUG SenderThread:606786 [sender.py:send():179] send: files
|
22 |
+
2021-07-14 23:27:06,024 INFO SenderThread:606786 [sender.py:_save_file():841] saving file wandb-metadata.json with policy now
|
23 |
+
2021-07-14 23:27:06,032 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: stop_status
|
24 |
+
2021-07-14 23:27:06,033 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: stop_status
|
25 |
+
2021-07-14 23:27:06,166 DEBUG SenderThread:606786 [sender.py:send():179] send: config
|
26 |
+
2021-07-14 23:27:06,166 DEBUG SenderThread:606786 [sender.py:send():179] send: config
|
27 |
+
2021-07-14 23:27:06,166 DEBUG SenderThread:606786 [sender.py:send():179] send: config
|
28 |
+
2021-07-14 23:27:06,504 INFO Thread-11 :606786 [upload_job.py:push():137] Uploaded file /tmp/tmpf02i07o_wandb/gludn8x0-wandb-metadata.json
|
29 |
+
2021-07-14 23:27:06,881 INFO Thread-8 :606786 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210714_232703-1jijl27o/files/requirements.txt
|
30 |
+
2021-07-14 23:27:06,881 INFO Thread-8 :606786 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210714_232703-1jijl27o/files/wandb-metadata.json
|
31 |
+
2021-07-14 23:27:06,881 INFO Thread-8 :606786 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210714_232703-1jijl27o/files/output.log
|
32 |
+
2021-07-14 23:27:20,887 INFO Thread-8 :606786 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_232703-1jijl27o/files/output.log
|
33 |
+
2021-07-14 23:27:21,168 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: stop_status
|
34 |
+
2021-07-14 23:27:21,168 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: stop_status
|
35 |
+
2021-07-14 23:27:22,888 INFO Thread-8 :606786 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_232703-1jijl27o/files/output.log
|
36 |
+
2021-07-14 23:27:34,069 DEBUG SenderThread:606786 [sender.py:send():179] send: stats
|
37 |
+
2021-07-14 23:27:35,894 INFO Thread-8 :606786 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_232703-1jijl27o/files/config.yaml
|
38 |
+
2021-07-14 23:27:36,301 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: stop_status
|
39 |
+
2021-07-14 23:27:36,302 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: stop_status
|
40 |
+
2021-07-14 23:27:51,435 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: stop_status
|
41 |
+
2021-07-14 23:27:51,435 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: stop_status
|
42 |
+
2021-07-14 23:28:04,145 DEBUG SenderThread:606786 [sender.py:send():179] send: stats
|
43 |
+
2021-07-14 23:28:06,571 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: stop_status
|
44 |
+
2021-07-14 23:28:06,571 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: stop_status
|
45 |
+
2021-07-14 23:28:21,704 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: stop_status
|
46 |
+
2021-07-14 23:28:21,704 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: stop_status
|
47 |
+
2021-07-14 23:28:34,210 DEBUG SenderThread:606786 [sender.py:send():179] send: stats
|
48 |
+
2021-07-14 23:28:36,838 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: stop_status
|
49 |
+
2021-07-14 23:28:36,838 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: stop_status
|
50 |
+
2021-07-14 23:28:51,969 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: stop_status
|
51 |
+
2021-07-14 23:28:51,970 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: stop_status
|
52 |
+
2021-07-14 23:29:04,276 DEBUG SenderThread:606786 [sender.py:send():179] send: stats
|
53 |
+
2021-07-14 23:29:11,606 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: stop_status
|
54 |
+
2021-07-14 23:29:11,606 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: stop_status
|
55 |
+
2021-07-14 23:29:26,738 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: stop_status
|
56 |
+
2021-07-14 23:29:26,739 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: stop_status
|
57 |
+
2021-07-14 23:29:34,341 DEBUG SenderThread:606786 [sender.py:send():179] send: stats
|
58 |
+
2021-07-14 23:29:41,875 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: stop_status
|
59 |
+
2021-07-14 23:29:41,875 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: stop_status
|
60 |
+
2021-07-14 23:29:57,008 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: stop_status
|
61 |
+
2021-07-14 23:29:57,009 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: stop_status
|
62 |
+
2021-07-14 23:30:04,407 DEBUG SenderThread:606786 [sender.py:send():179] send: stats
|
63 |
+
2021-07-14 23:30:12,141 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: stop_status
|
64 |
+
2021-07-14 23:30:12,142 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: stop_status
|
65 |
+
2021-07-14 23:30:27,275 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: stop_status
|
66 |
+
2021-07-14 23:30:27,276 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: stop_status
|
67 |
+
2021-07-14 23:30:34,475 DEBUG SenderThread:606786 [sender.py:send():179] send: stats
|
68 |
+
2021-07-14 23:30:42,407 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: stop_status
|
69 |
+
2021-07-14 23:30:42,407 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: stop_status
|
70 |
+
2021-07-14 23:30:57,540 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: stop_status
|
71 |
+
2021-07-14 23:30:57,541 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: stop_status
|
72 |
+
2021-07-14 23:31:04,540 DEBUG SenderThread:606786 [sender.py:send():179] send: stats
|
73 |
+
2021-07-14 23:31:12,674 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: stop_status
|
74 |
+
2021-07-14 23:31:12,674 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: stop_status
|
75 |
+
2021-07-14 23:31:27,806 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: stop_status
|
76 |
+
2021-07-14 23:31:27,807 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: stop_status
|
77 |
+
2021-07-14 23:31:34,612 DEBUG SenderThread:606786 [sender.py:send():179] send: stats
|
78 |
+
2021-07-14 23:31:42,940 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: stop_status
|
79 |
+
2021-07-14 23:31:42,940 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: stop_status
|
80 |
+
2021-07-14 23:31:58,074 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: stop_status
|
81 |
+
2021-07-14 23:31:58,075 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: stop_status
|
82 |
+
2021-07-14 23:32:04,688 DEBUG SenderThread:606786 [sender.py:send():179] send: stats
|
83 |
+
2021-07-14 23:32:13,208 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: stop_status
|
84 |
+
2021-07-14 23:32:13,208 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: stop_status
|
85 |
+
2021-07-14 23:32:28,339 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: stop_status
|
86 |
+
2021-07-14 23:32:28,340 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: stop_status
|
87 |
+
2021-07-14 23:32:34,767 DEBUG SenderThread:606786 [sender.py:send():179] send: stats
|
88 |
+
2021-07-14 23:32:43,471 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: stop_status
|
89 |
+
2021-07-14 23:32:43,472 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: stop_status
|
90 |
+
2021-07-14 23:32:58,604 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: stop_status
|
91 |
+
2021-07-14 23:32:58,604 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: stop_status
|
92 |
+
2021-07-14 23:33:04,843 DEBUG SenderThread:606786 [sender.py:send():179] send: stats
|
93 |
+
2021-07-14 23:33:13,739 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: stop_status
|
94 |
+
2021-07-14 23:33:13,739 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: stop_status
|
95 |
+
2021-07-14 23:33:28,873 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: stop_status
|
96 |
+
2021-07-14 23:33:28,874 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: stop_status
|
97 |
+
2021-07-14 23:33:34,917 DEBUG SenderThread:606786 [sender.py:send():179] send: stats
|
98 |
+
2021-07-14 23:33:44,007 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: stop_status
|
99 |
+
2021-07-14 23:33:44,007 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: stop_status
|
100 |
+
2021-07-14 23:33:59,140 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: stop_status
|
101 |
+
2021-07-14 23:33:59,141 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: stop_status
|
102 |
+
2021-07-14 23:34:04,989 DEBUG SenderThread:606786 [sender.py:send():179] send: stats
|
103 |
+
2021-07-14 23:34:14,274 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: stop_status
|
104 |
+
2021-07-14 23:34:14,275 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: stop_status
|
105 |
+
2021-07-14 23:34:29,406 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: stop_status
|
106 |
+
2021-07-14 23:34:29,406 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: stop_status
|
107 |
+
2021-07-14 23:34:35,063 DEBUG SenderThread:606786 [sender.py:send():179] send: stats
|
108 |
+
2021-07-14 23:34:44,538 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: stop_status
|
109 |
+
2021-07-14 23:34:44,538 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: stop_status
|
110 |
+
2021-07-14 23:34:59,670 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: stop_status
|
111 |
+
2021-07-14 23:34:59,671 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: stop_status
|
112 |
+
2021-07-14 23:35:05,136 DEBUG SenderThread:606786 [sender.py:send():179] send: stats
|
113 |
+
2021-07-14 23:35:14,806 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: stop_status
|
114 |
+
2021-07-14 23:35:14,806 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: stop_status
|
115 |
+
2021-07-14 23:35:29,937 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: stop_status
|
116 |
+
2021-07-14 23:35:29,938 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: stop_status
|
117 |
+
2021-07-14 23:35:35,212 DEBUG SenderThread:606786 [sender.py:send():179] send: stats
|
118 |
+
2021-07-14 23:35:45,083 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: stop_status
|
119 |
+
2021-07-14 23:35:45,084 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: stop_status
|
120 |
+
2021-07-14 23:36:00,215 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: stop_status
|
121 |
+
2021-07-14 23:36:00,215 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: stop_status
|
122 |
+
2021-07-14 23:36:05,289 DEBUG SenderThread:606786 [sender.py:send():179] send: stats
|
123 |
+
2021-07-14 23:36:15,359 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: stop_status
|
124 |
+
2021-07-14 23:36:15,359 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: stop_status
|
125 |
+
2021-07-14 23:36:30,491 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: stop_status
|
126 |
+
2021-07-14 23:36:30,492 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: stop_status
|
127 |
+
2021-07-14 23:36:35,354 DEBUG SenderThread:606786 [sender.py:send():179] send: stats
|
128 |
+
2021-07-14 23:36:45,626 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: stop_status
|
129 |
+
2021-07-14 23:36:45,626 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: stop_status
|
130 |
+
2021-07-14 23:37:00,758 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: stop_status
|
131 |
+
2021-07-14 23:37:00,758 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: stop_status
|
132 |
+
2021-07-14 23:37:05,418 DEBUG SenderThread:606786 [sender.py:send():179] send: stats
|
133 |
+
2021-07-14 23:37:15,896 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: stop_status
|
134 |
+
2021-07-14 23:37:15,896 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: stop_status
|
135 |
+
2021-07-14 23:37:31,030 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: stop_status
|
136 |
+
2021-07-14 23:37:31,031 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: stop_status
|
137 |
+
2021-07-14 23:37:35,484 DEBUG SenderThread:606786 [sender.py:send():179] send: stats
|
138 |
+
2021-07-14 23:37:46,162 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: stop_status
|
139 |
+
2021-07-14 23:37:46,162 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: stop_status
|
140 |
+
2021-07-14 23:38:01,293 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: stop_status
|
141 |
+
2021-07-14 23:38:01,294 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: stop_status
|
142 |
+
2021-07-14 23:38:05,552 DEBUG SenderThread:606786 [sender.py:send():179] send: stats
|
143 |
+
2021-07-14 23:38:16,425 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: stop_status
|
144 |
+
2021-07-14 23:38:16,425 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: stop_status
|
145 |
+
2021-07-14 23:38:31,557 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: stop_status
|
146 |
+
2021-07-14 23:38:31,557 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: stop_status
|
147 |
+
2021-07-14 23:38:35,624 DEBUG SenderThread:606786 [sender.py:send():179] send: stats
|
148 |
+
2021-07-14 23:38:46,691 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: stop_status
|
149 |
+
2021-07-14 23:38:46,691 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: stop_status
|
150 |
+
2021-07-14 23:39:01,823 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: stop_status
|
151 |
+
2021-07-14 23:39:01,824 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: stop_status
|
152 |
+
2021-07-14 23:39:05,695 DEBUG SenderThread:606786 [sender.py:send():179] send: stats
|
153 |
+
2021-07-14 23:39:16,955 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: stop_status
|
154 |
+
2021-07-14 23:39:16,955 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: stop_status
|
155 |
+
2021-07-14 23:39:32,087 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: stop_status
|
156 |
+
2021-07-14 23:39:32,088 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: stop_status
|
157 |
+
2021-07-14 23:39:35,769 DEBUG SenderThread:606786 [sender.py:send():179] send: stats
|
158 |
+
2021-07-14 23:39:47,220 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: stop_status
|
159 |
+
2021-07-14 23:39:47,221 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: stop_status
|
160 |
+
2021-07-14 23:40:02,350 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: stop_status
|
161 |
+
2021-07-14 23:40:02,351 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: stop_status
|
162 |
+
2021-07-14 23:40:05,843 DEBUG SenderThread:606786 [sender.py:send():179] send: stats
|
163 |
+
2021-07-14 23:40:17,481 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: stop_status
|
164 |
+
2021-07-14 23:40:17,482 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: stop_status
|
165 |
+
2021-07-14 23:40:32,615 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: stop_status
|
166 |
+
2021-07-14 23:40:32,616 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: stop_status
|
167 |
+
2021-07-14 23:40:35,918 DEBUG SenderThread:606786 [sender.py:send():179] send: stats
|
168 |
+
2021-07-14 23:40:47,205 INFO Thread-8 :606786 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_232703-1jijl27o/files/output.log
|
169 |
+
2021-07-14 23:40:48,363 DEBUG SenderThread:606786 [sender.py:send():179] send: telemetry
|
170 |
+
2021-07-14 23:40:48,364 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: poll_exit
|
171 |
+
2021-07-14 23:40:48,364 DEBUG SenderThread:606786 [sender.py:send():179] send: exit
|
172 |
+
2021-07-14 23:40:48,364 INFO SenderThread:606786 [sender.py:send_exit():287] handling exit code: 1
|
173 |
+
2021-07-14 23:40:48,366 INFO SenderThread:606786 [sender.py:send_exit():295] send defer
|
174 |
+
2021-07-14 23:40:48,366 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: poll_exit
|
175 |
+
2021-07-14 23:40:48,367 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: defer
|
176 |
+
2021-07-14 23:40:48,367 INFO HandlerThread:606786 [handler.py:handle_request_defer():141] handle defer: 0
|
177 |
+
2021-07-14 23:40:48,367 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: defer
|
178 |
+
2021-07-14 23:40:48,367 INFO SenderThread:606786 [sender.py:send_request_defer():304] handle sender defer: 0
|
179 |
+
2021-07-14 23:40:48,367 INFO SenderThread:606786 [sender.py:transition_state():308] send defer: 1
|
180 |
+
2021-07-14 23:40:48,368 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: defer
|
181 |
+
2021-07-14 23:40:48,368 INFO HandlerThread:606786 [handler.py:handle_request_defer():141] handle defer: 1
|
182 |
+
2021-07-14 23:40:48,446 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: defer
|
183 |
+
2021-07-14 23:40:48,446 INFO SenderThread:606786 [sender.py:send_request_defer():304] handle sender defer: 1
|
184 |
+
2021-07-14 23:40:48,446 INFO SenderThread:606786 [sender.py:transition_state():308] send defer: 2
|
185 |
+
2021-07-14 23:40:48,446 DEBUG SenderThread:606786 [sender.py:send():179] send: stats
|
186 |
+
2021-07-14 23:40:48,447 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: defer
|
187 |
+
2021-07-14 23:40:48,447 INFO HandlerThread:606786 [handler.py:handle_request_defer():141] handle defer: 2
|
188 |
+
2021-07-14 23:40:48,447 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: defer
|
189 |
+
2021-07-14 23:40:48,447 INFO SenderThread:606786 [sender.py:send_request_defer():304] handle sender defer: 2
|
190 |
+
2021-07-14 23:40:48,447 INFO SenderThread:606786 [sender.py:transition_state():308] send defer: 3
|
191 |
+
2021-07-14 23:40:48,447 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: defer
|
192 |
+
2021-07-14 23:40:48,448 INFO HandlerThread:606786 [handler.py:handle_request_defer():141] handle defer: 3
|
193 |
+
2021-07-14 23:40:48,448 DEBUG SenderThread:606786 [sender.py:send():179] send: summary
|
194 |
+
2021-07-14 23:40:48,448 INFO SenderThread:606786 [sender.py:_save_file():841] saving file wandb-summary.json with policy end
|
195 |
+
2021-07-14 23:40:48,449 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: defer
|
196 |
+
2021-07-14 23:40:48,449 INFO SenderThread:606786 [sender.py:send_request_defer():304] handle sender defer: 3
|
197 |
+
2021-07-14 23:40:48,449 INFO SenderThread:606786 [sender.py:transition_state():308] send defer: 4
|
198 |
+
2021-07-14 23:40:48,449 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: defer
|
199 |
+
2021-07-14 23:40:48,449 INFO HandlerThread:606786 [handler.py:handle_request_defer():141] handle defer: 4
|
200 |
+
2021-07-14 23:40:48,449 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: defer
|
201 |
+
2021-07-14 23:40:48,449 INFO SenderThread:606786 [sender.py:send_request_defer():304] handle sender defer: 4
|
202 |
+
2021-07-14 23:40:48,469 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: poll_exit
|
203 |
+
2021-07-14 23:40:48,629 INFO SenderThread:606786 [sender.py:transition_state():308] send defer: 5
|
204 |
+
2021-07-14 23:40:48,629 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: poll_exit
|
205 |
+
2021-07-14 23:40:48,630 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: defer
|
206 |
+
2021-07-14 23:40:48,630 INFO HandlerThread:606786 [handler.py:handle_request_defer():141] handle defer: 5
|
207 |
+
2021-07-14 23:40:48,630 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: defer
|
208 |
+
2021-07-14 23:40:48,630 INFO SenderThread:606786 [sender.py:send_request_defer():304] handle sender defer: 5
|
209 |
+
2021-07-14 23:40:48,630 INFO SenderThread:606786 [dir_watcher.py:finish():282] shutting down directory watcher
|
210 |
+
2021-07-14 23:40:48,732 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: poll_exit
|
211 |
+
2021-07-14 23:40:49,206 INFO Thread-8 :606786 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_232703-1jijl27o/files/config.yaml
|
212 |
+
2021-07-14 23:40:49,207 INFO SenderThread:606786 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_232703-1jijl27o/files/output.log
|
213 |
+
2021-07-14 23:40:49,207 INFO SenderThread:606786 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_232703-1jijl27o/files/wandb-summary.json
|
214 |
+
2021-07-14 23:40:49,207 INFO SenderThread:606786 [dir_watcher.py:finish():312] scan: /home/dat/pino-roberta-base/wandb/run-20210714_232703-1jijl27o/files
|
215 |
+
2021-07-14 23:40:49,207 INFO SenderThread:606786 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210714_232703-1jijl27o/files/requirements.txt requirements.txt
|
216 |
+
2021-07-14 23:40:49,208 INFO SenderThread:606786 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210714_232703-1jijl27o/files/output.log output.log
|
217 |
+
2021-07-14 23:40:49,208 INFO SenderThread:606786 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210714_232703-1jijl27o/files/wandb-metadata.json wandb-metadata.json
|
218 |
+
2021-07-14 23:40:49,208 INFO SenderThread:606786 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210714_232703-1jijl27o/files/config.yaml config.yaml
|
219 |
+
2021-07-14 23:40:49,208 INFO SenderThread:606786 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210714_232703-1jijl27o/files/wandb-summary.json wandb-summary.json
|
220 |
+
2021-07-14 23:40:49,209 INFO SenderThread:606786 [sender.py:transition_state():308] send defer: 6
|
221 |
+
2021-07-14 23:40:49,209 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: poll_exit
|
222 |
+
2021-07-14 23:40:49,210 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: defer
|
223 |
+
2021-07-14 23:40:49,210 INFO HandlerThread:606786 [handler.py:handle_request_defer():141] handle defer: 6
|
224 |
+
2021-07-14 23:40:49,215 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: defer
|
225 |
+
2021-07-14 23:40:49,216 INFO SenderThread:606786 [sender.py:send_request_defer():304] handle sender defer: 6
|
226 |
+
2021-07-14 23:40:49,216 INFO SenderThread:606786 [file_pusher.py:finish():177] shutting down file pusher
|
227 |
+
2021-07-14 23:40:49,311 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: poll_exit
|
228 |
+
2021-07-14 23:40:49,311 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: poll_exit
|
229 |
+
2021-07-14 23:40:49,413 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: poll_exit
|
230 |
+
2021-07-14 23:40:49,413 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: poll_exit
|
231 |
+
2021-07-14 23:40:49,515 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: poll_exit
|
232 |
+
2021-07-14 23:40:49,515 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: poll_exit
|
233 |
+
2021-07-14 23:40:49,617 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: poll_exit
|
234 |
+
2021-07-14 23:40:49,618 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: poll_exit
|
235 |
+
2021-07-14 23:40:49,652 INFO Thread-12 :606786 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210714_232703-1jijl27o/files/requirements.txt
|
236 |
+
2021-07-14 23:40:49,653 INFO Thread-14 :606786 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210714_232703-1jijl27o/files/config.yaml
|
237 |
+
2021-07-14 23:40:49,698 INFO Thread-15 :606786 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210714_232703-1jijl27o/files/wandb-summary.json
|
238 |
+
2021-07-14 23:40:49,719 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: poll_exit
|
239 |
+
2021-07-14 23:40:49,720 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: poll_exit
|
240 |
+
2021-07-14 23:40:49,739 INFO Thread-13 :606786 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210714_232703-1jijl27o/files/output.log
|
241 |
+
2021-07-14 23:40:49,821 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: poll_exit
|
242 |
+
2021-07-14 23:40:49,822 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: poll_exit
|
243 |
+
2021-07-14 23:40:49,923 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: poll_exit
|
244 |
+
2021-07-14 23:40:49,923 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: poll_exit
|
245 |
+
2021-07-14 23:40:49,940 INFO Thread-7 :606786 [sender.py:transition_state():308] send defer: 7
|
246 |
+
2021-07-14 23:40:49,940 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: defer
|
247 |
+
2021-07-14 23:40:49,940 INFO HandlerThread:606786 [handler.py:handle_request_defer():141] handle defer: 7
|
248 |
+
2021-07-14 23:40:49,940 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: defer
|
249 |
+
2021-07-14 23:40:49,940 INFO SenderThread:606786 [sender.py:send_request_defer():304] handle sender defer: 7
|
250 |
+
2021-07-14 23:40:50,025 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: poll_exit
|
251 |
+
2021-07-14 23:40:50,227 INFO SenderThread:606786 [sender.py:transition_state():308] send defer: 8
|
252 |
+
2021-07-14 23:40:50,228 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: poll_exit
|
253 |
+
2021-07-14 23:40:50,228 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: defer
|
254 |
+
2021-07-14 23:40:50,228 INFO HandlerThread:606786 [handler.py:handle_request_defer():141] handle defer: 8
|
255 |
+
2021-07-14 23:40:50,229 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: defer
|
256 |
+
2021-07-14 23:40:50,229 INFO SenderThread:606786 [sender.py:send_request_defer():304] handle sender defer: 8
|
257 |
+
2021-07-14 23:40:50,229 INFO SenderThread:606786 [sender.py:transition_state():308] send defer: 9
|
258 |
+
2021-07-14 23:40:50,229 DEBUG SenderThread:606786 [sender.py:send():179] send: final
|
259 |
+
2021-07-14 23:40:50,229 DEBUG SenderThread:606786 [sender.py:send():179] send: footer
|
260 |
+
2021-07-14 23:40:50,230 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: defer
|
261 |
+
2021-07-14 23:40:50,230 INFO HandlerThread:606786 [handler.py:handle_request_defer():141] handle defer: 9
|
262 |
+
2021-07-14 23:40:50,230 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: defer
|
263 |
+
2021-07-14 23:40:50,230 INFO SenderThread:606786 [sender.py:send_request_defer():304] handle sender defer: 9
|
264 |
+
2021-07-14 23:40:50,330 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: poll_exit
|
265 |
+
2021-07-14 23:40:50,330 DEBUG SenderThread:606786 [sender.py:send_request():193] send_request: poll_exit
|
266 |
+
2021-07-14 23:40:50,330 INFO SenderThread:606786 [file_pusher.py:join():182] waiting for file pusher
|
267 |
+
2021-07-14 23:40:50,332 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: get_summary
|
268 |
+
2021-07-14 23:40:50,332 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: sampled_history
|
269 |
+
2021-07-14 23:40:50,333 DEBUG HandlerThread:606786 [handler.py:handle_request():124] handle_request: shutdown
|
270 |
+
2021-07-14 23:40:50,333 INFO HandlerThread:606786 [handler.py:finish():638] shutting down handler
|
271 |
+
2021-07-14 23:40:51,230 INFO WriterThread:606786 [datastore.py:close():288] close: /home/dat/pino-roberta-base/wandb/run-20210714_232703-1jijl27o/run-1jijl27o.wandb
|
272 |
+
2021-07-14 23:40:51,330 INFO SenderThread:606786 [sender.py:finish():945] shutting down sender
|
273 |
+
2021-07-14 23:40:51,331 INFO SenderThread:606786 [file_pusher.py:finish():177] shutting down file pusher
|
274 |
+
2021-07-14 23:40:51,331 INFO SenderThread:606786 [file_pusher.py:join():182] waiting for file pusher
|
275 |
+
2021-07-14 23:40:51,334 INFO MainThread:606786 [internal.py:handle_exit():78] Internal process exited
|
wandb/run-20210714_232703-1jijl27o/logs/debug.log
ADDED
@@ -0,0 +1,127 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
2021-07-14 23:27:03,994 INFO MainThread:605532 [wandb_setup.py:_flush():69] setting env: {}
|
2 |
+
2021-07-14 23:27:03,994 INFO MainThread:605532 [wandb_setup.py:_flush():69] setting login settings: {}
|
3 |
+
2021-07-14 23:27:03,994 INFO MainThread:605532 [wandb_init.py:_log_setup():337] Logging user logs to /home/dat/pino-roberta-base/wandb/run-20210714_232703-1jijl27o/logs/debug.log
|
4 |
+
2021-07-14 23:27:03,994 INFO MainThread:605532 [wandb_init.py:_log_setup():338] Logging internal logs to /home/dat/pino-roberta-base/wandb/run-20210714_232703-1jijl27o/logs/debug-internal.log
|
5 |
+
2021-07-14 23:27:03,995 INFO MainThread:605532 [wandb_init.py:init():370] calling init triggers
|
6 |
+
2021-07-14 23:27:03,995 INFO MainThread:605532 [wandb_init.py:init():375] wandb.init called with sweep_config: {}
|
7 |
+
config: {}
|
8 |
+
2021-07-14 23:27:03,995 INFO MainThread:605532 [wandb_init.py:init():419] starting backend
|
9 |
+
2021-07-14 23:27:03,995 INFO MainThread:605532 [backend.py:_multiprocessing_setup():70] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
|
10 |
+
2021-07-14 23:27:04,041 INFO MainThread:605532 [backend.py:ensure_launched():135] starting backend process...
|
11 |
+
2021-07-14 23:27:04,085 INFO MainThread:605532 [backend.py:ensure_launched():139] started backend process with pid: 606786
|
12 |
+
2021-07-14 23:27:04,087 INFO MainThread:605532 [wandb_init.py:init():424] backend started and connected
|
13 |
+
2021-07-14 23:27:04,090 INFO MainThread:605532 [wandb_init.py:init():472] updated telemetry
|
14 |
+
2021-07-14 23:27:04,091 INFO MainThread:605532 [wandb_init.py:init():491] communicating current version
|
15 |
+
2021-07-14 23:27:04,708 INFO MainThread:605532 [wandb_init.py:init():496] got version response
|
16 |
+
2021-07-14 23:27:04,708 INFO MainThread:605532 [wandb_init.py:init():504] communicating run to backend with 30 second timeout
|
17 |
+
2021-07-14 23:27:04,879 INFO MainThread:605532 [wandb_init.py:init():529] starting run threads in backend
|
18 |
+
2021-07-14 23:27:06,027 INFO MainThread:605532 [wandb_run.py:_console_start():1623] atexit reg
|
19 |
+
2021-07-14 23:27:06,028 INFO MainThread:605532 [wandb_run.py:_redirect():1497] redirect: SettingsConsole.REDIRECT
|
20 |
+
2021-07-14 23:27:06,028 INFO MainThread:605532 [wandb_run.py:_redirect():1502] Redirecting console.
|
21 |
+
2021-07-14 23:27:06,030 INFO MainThread:605532 [wandb_run.py:_redirect():1558] Redirects installed.
|
22 |
+
2021-07-14 23:27:06,030 INFO MainThread:605532 [wandb_init.py:init():554] run started, returning control to user process
|
23 |
+
2021-07-14 23:27:06,038 INFO MainThread:605532 [wandb_run.py:_config_callback():872] config_cb None None {'output_dir': './', 'overwrite_output_dir': True, 'do_train': False, 'do_eval': False, 'do_predict': False, 'evaluation_strategy': 'IntervalStrategy.NO', 'prediction_loss_only': False, 'per_device_train_batch_size': 1, 'per_device_eval_batch_size': 1, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 4, 'eval_accumulation_steps': None, 'learning_rate': 5e-05, 'weight_decay': 0.0095, 'adam_beta1': 0.9, 'adam_beta2': 0.98, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 5.0, 'max_steps': -1, 'lr_scheduler_type': 'SchedulerType.LINEAR', 'warmup_ratio': 0.0, 'warmup_steps': 10000, 'log_level': -1, 'log_level_replica': -1, 'log_on_each_node': True, 'logging_dir': './runs/Jul14_23-26-56_t1v-n-f5c06ea1-w-0', 'logging_strategy': 'IntervalStrategy.STEPS', 'logging_first_step': False, 'logging_steps': 500, 'save_strategy': 'IntervalStrategy.STEPS', 'save_steps': 20000, 'save_total_limit': 5, 'save_on_each_node': False, 'no_cuda': False, 'seed': 42, 'fp16': False, 'fp16_opt_level': 'O1', 'fp16_backend': 'auto', 'fp16_full_eval': False, 'local_rank': -1, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 20000, 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': './', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'sharded_ddp': [], 'deepspeed': None, 'label_smoothing_factor': 0.0, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'dataloader_pin_memory': True, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'push_to_hub_model_id': '', 'push_to_hub_organization': None, 'push_to_hub_token': None, 'mp_parameters': ''}
|
24 |
+
2021-07-14 23:27:06,040 INFO MainThread:605532 [wandb_run.py:_config_callback():872] config_cb None None {'model_name_or_path': None, 'model_type': 'big_bird', 'config_name': './', 'tokenizer_name': './', 'cache_dir': None, 'use_fast_tokenizer': True, 'dtype': 'float32', 'save_optimizer': True}
|
25 |
+
2021-07-14 23:27:06,041 INFO MainThread:605532 [wandb_run.py:_config_callback():872] config_cb None None {'dataset_name': None, 'dataset_config_name': None, 'train_ref_file': None, 'validation_ref_file': None, 'overwrite_cache': False, 'validation_split_percentage': 5, 'max_seq_length': 4096, 'preprocessing_num_workers': 96, 'mlm_probability': 0.15, 'pad_to_max_length': False, 'line_by_line': False, 'max_eval_samples': 2000}
|
26 |
+
2021-07-14 23:40:46,049 INFO MainThread:605532 [wandb_run.py:_atexit_cleanup():1593] got exitcode: 1
|
27 |
+
2021-07-14 23:40:46,050 INFO MainThread:605532 [wandb_run.py:_restore():1565] restore
|
28 |
+
2021-07-14 23:40:48,367 INFO MainThread:605532 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
|
29 |
+
wandb_count: 1
|
30 |
+
}
|
31 |
+
pusher_stats {
|
32 |
+
uploaded_bytes: 1448
|
33 |
+
total_bytes: 1448
|
34 |
+
}
|
35 |
+
|
36 |
+
2021-07-14 23:40:48,630 INFO MainThread:605532 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
|
37 |
+
wandb_count: 1
|
38 |
+
}
|
39 |
+
pusher_stats {
|
40 |
+
uploaded_bytes: 1448
|
41 |
+
total_bytes: 1448
|
42 |
+
}
|
43 |
+
|
44 |
+
2021-07-14 23:40:49,210 INFO MainThread:605532 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
|
45 |
+
wandb_count: 4
|
46 |
+
}
|
47 |
+
pusher_stats {
|
48 |
+
uploaded_bytes: 1448
|
49 |
+
total_bytes: 11299
|
50 |
+
}
|
51 |
+
|
52 |
+
2021-07-14 23:40:49,312 INFO MainThread:605532 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
|
53 |
+
wandb_count: 5
|
54 |
+
}
|
55 |
+
pusher_stats {
|
56 |
+
uploaded_bytes: 1448
|
57 |
+
total_bytes: 11301
|
58 |
+
}
|
59 |
+
|
60 |
+
2021-07-14 23:40:49,414 INFO MainThread:605532 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
|
61 |
+
wandb_count: 5
|
62 |
+
}
|
63 |
+
pusher_stats {
|
64 |
+
uploaded_bytes: 11301
|
65 |
+
total_bytes: 11301
|
66 |
+
}
|
67 |
+
|
68 |
+
2021-07-14 23:40:49,516 INFO MainThread:605532 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
|
69 |
+
wandb_count: 5
|
70 |
+
}
|
71 |
+
pusher_stats {
|
72 |
+
uploaded_bytes: 11301
|
73 |
+
total_bytes: 11301
|
74 |
+
}
|
75 |
+
|
76 |
+
2021-07-14 23:40:49,618 INFO MainThread:605532 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
|
77 |
+
wandb_count: 5
|
78 |
+
}
|
79 |
+
pusher_stats {
|
80 |
+
uploaded_bytes: 11301
|
81 |
+
total_bytes: 11301
|
82 |
+
}
|
83 |
+
|
84 |
+
2021-07-14 23:40:49,720 INFO MainThread:605532 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
|
85 |
+
wandb_count: 5
|
86 |
+
}
|
87 |
+
pusher_stats {
|
88 |
+
uploaded_bytes: 11301
|
89 |
+
total_bytes: 11301
|
90 |
+
}
|
91 |
+
|
92 |
+
2021-07-14 23:40:49,822 INFO MainThread:605532 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
|
93 |
+
wandb_count: 5
|
94 |
+
}
|
95 |
+
pusher_stats {
|
96 |
+
uploaded_bytes: 11301
|
97 |
+
total_bytes: 11301
|
98 |
+
}
|
99 |
+
|
100 |
+
2021-07-14 23:40:49,924 INFO MainThread:605532 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
|
101 |
+
wandb_count: 5
|
102 |
+
}
|
103 |
+
pusher_stats {
|
104 |
+
uploaded_bytes: 11301
|
105 |
+
total_bytes: 11301
|
106 |
+
}
|
107 |
+
|
108 |
+
2021-07-14 23:40:50,228 INFO MainThread:605532 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
|
109 |
+
wandb_count: 5
|
110 |
+
}
|
111 |
+
pusher_stats {
|
112 |
+
uploaded_bytes: 11301
|
113 |
+
total_bytes: 11301
|
114 |
+
}
|
115 |
+
|
116 |
+
2021-07-14 23:40:50,331 INFO MainThread:605532 [wandb_run.py:_wait_for_finish():1715] got exit ret: done: true
|
117 |
+
exit_result {
|
118 |
+
}
|
119 |
+
file_counts {
|
120 |
+
wandb_count: 5
|
121 |
+
}
|
122 |
+
pusher_stats {
|
123 |
+
uploaded_bytes: 11301
|
124 |
+
total_bytes: 11301
|
125 |
+
}
|
126 |
+
|
127 |
+
2021-07-14 23:40:51,653 INFO MainThread:605532 [wandb_run.py:_show_files():1937] logging synced files
|
wandb/run-20210714_232703-1jijl27o/run-1jijl27o.wandb
ADDED
Binary file (13.6 kB). View file
|
|
wandb/run-20210714_234615-3p6vlfc3/files/config.yaml
ADDED
@@ -0,0 +1,304 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
wandb_version: 1
|
2 |
+
|
3 |
+
_wandb:
|
4 |
+
desc: null
|
5 |
+
value:
|
6 |
+
cli_version: 0.10.33
|
7 |
+
framework: huggingface
|
8 |
+
huggingface_version: 4.9.0.dev0
|
9 |
+
is_jupyter_run: false
|
10 |
+
is_kaggle_kernel: false
|
11 |
+
python_version: 3.8.10
|
12 |
+
t:
|
13 |
+
1:
|
14 |
+
- 3
|
15 |
+
- 11
|
16 |
+
4: 3.8.10
|
17 |
+
5: 0.10.33
|
18 |
+
6: 4.9.0.dev0
|
19 |
+
8:
|
20 |
+
- 5
|
21 |
+
adafactor:
|
22 |
+
desc: null
|
23 |
+
value: false
|
24 |
+
adam_beta1:
|
25 |
+
desc: null
|
26 |
+
value: 0.9
|
27 |
+
adam_beta2:
|
28 |
+
desc: null
|
29 |
+
value: 0.98
|
30 |
+
adam_epsilon:
|
31 |
+
desc: null
|
32 |
+
value: 1.0e-08
|
33 |
+
cache_dir:
|
34 |
+
desc: null
|
35 |
+
value: null
|
36 |
+
config_name:
|
37 |
+
desc: null
|
38 |
+
value: ./
|
39 |
+
dataloader_drop_last:
|
40 |
+
desc: null
|
41 |
+
value: false
|
42 |
+
dataloader_num_workers:
|
43 |
+
desc: null
|
44 |
+
value: 0
|
45 |
+
dataloader_pin_memory:
|
46 |
+
desc: null
|
47 |
+
value: true
|
48 |
+
dataset_config_name:
|
49 |
+
desc: null
|
50 |
+
value: null
|
51 |
+
dataset_name:
|
52 |
+
desc: null
|
53 |
+
value: null
|
54 |
+
ddp_find_unused_parameters:
|
55 |
+
desc: null
|
56 |
+
value: null
|
57 |
+
debug:
|
58 |
+
desc: null
|
59 |
+
value: []
|
60 |
+
deepspeed:
|
61 |
+
desc: null
|
62 |
+
value: null
|
63 |
+
disable_tqdm:
|
64 |
+
desc: null
|
65 |
+
value: false
|
66 |
+
do_eval:
|
67 |
+
desc: null
|
68 |
+
value: false
|
69 |
+
do_predict:
|
70 |
+
desc: null
|
71 |
+
value: false
|
72 |
+
do_train:
|
73 |
+
desc: null
|
74 |
+
value: false
|
75 |
+
dtype:
|
76 |
+
desc: null
|
77 |
+
value: float32
|
78 |
+
eval_accumulation_steps:
|
79 |
+
desc: null
|
80 |
+
value: null
|
81 |
+
eval_steps:
|
82 |
+
desc: null
|
83 |
+
value: 20000
|
84 |
+
evaluation_strategy:
|
85 |
+
desc: null
|
86 |
+
value: IntervalStrategy.NO
|
87 |
+
fp16:
|
88 |
+
desc: null
|
89 |
+
value: false
|
90 |
+
fp16_backend:
|
91 |
+
desc: null
|
92 |
+
value: auto
|
93 |
+
fp16_full_eval:
|
94 |
+
desc: null
|
95 |
+
value: false
|
96 |
+
fp16_opt_level:
|
97 |
+
desc: null
|
98 |
+
value: O1
|
99 |
+
gradient_accumulation_steps:
|
100 |
+
desc: null
|
101 |
+
value: 4
|
102 |
+
greater_is_better:
|
103 |
+
desc: null
|
104 |
+
value: null
|
105 |
+
group_by_length:
|
106 |
+
desc: null
|
107 |
+
value: false
|
108 |
+
ignore_data_skip:
|
109 |
+
desc: null
|
110 |
+
value: false
|
111 |
+
label_names:
|
112 |
+
desc: null
|
113 |
+
value: null
|
114 |
+
label_smoothing_factor:
|
115 |
+
desc: null
|
116 |
+
value: 0.0
|
117 |
+
learning_rate:
|
118 |
+
desc: null
|
119 |
+
value: 5.0e-05
|
120 |
+
length_column_name:
|
121 |
+
desc: null
|
122 |
+
value: length
|
123 |
+
line_by_line:
|
124 |
+
desc: null
|
125 |
+
value: false
|
126 |
+
load_best_model_at_end:
|
127 |
+
desc: null
|
128 |
+
value: false
|
129 |
+
local_rank:
|
130 |
+
desc: null
|
131 |
+
value: -1
|
132 |
+
log_level:
|
133 |
+
desc: null
|
134 |
+
value: -1
|
135 |
+
log_level_replica:
|
136 |
+
desc: null
|
137 |
+
value: -1
|
138 |
+
log_on_each_node:
|
139 |
+
desc: null
|
140 |
+
value: true
|
141 |
+
logging_dir:
|
142 |
+
desc: null
|
143 |
+
value: ./runs/Jul14_23-46-07_t1v-n-f5c06ea1-w-0
|
144 |
+
logging_first_step:
|
145 |
+
desc: null
|
146 |
+
value: false
|
147 |
+
logging_steps:
|
148 |
+
desc: null
|
149 |
+
value: 250
|
150 |
+
logging_strategy:
|
151 |
+
desc: null
|
152 |
+
value: IntervalStrategy.STEPS
|
153 |
+
lr_scheduler_type:
|
154 |
+
desc: null
|
155 |
+
value: SchedulerType.LINEAR
|
156 |
+
max_eval_samples:
|
157 |
+
desc: null
|
158 |
+
value: 2000
|
159 |
+
max_grad_norm:
|
160 |
+
desc: null
|
161 |
+
value: 1.0
|
162 |
+
max_seq_length:
|
163 |
+
desc: null
|
164 |
+
value: 4096
|
165 |
+
max_steps:
|
166 |
+
desc: null
|
167 |
+
value: -1
|
168 |
+
metric_for_best_model:
|
169 |
+
desc: null
|
170 |
+
value: null
|
171 |
+
mlm_probability:
|
172 |
+
desc: null
|
173 |
+
value: 0.15
|
174 |
+
model_name_or_path:
|
175 |
+
desc: null
|
176 |
+
value: null
|
177 |
+
model_type:
|
178 |
+
desc: null
|
179 |
+
value: big_bird
|
180 |
+
mp_parameters:
|
181 |
+
desc: null
|
182 |
+
value: ''
|
183 |
+
no_cuda:
|
184 |
+
desc: null
|
185 |
+
value: false
|
186 |
+
num_train_epochs:
|
187 |
+
desc: null
|
188 |
+
value: 5.0
|
189 |
+
output_dir:
|
190 |
+
desc: null
|
191 |
+
value: ./
|
192 |
+
overwrite_cache:
|
193 |
+
desc: null
|
194 |
+
value: false
|
195 |
+
overwrite_output_dir:
|
196 |
+
desc: null
|
197 |
+
value: true
|
198 |
+
pad_to_max_length:
|
199 |
+
desc: null
|
200 |
+
value: false
|
201 |
+
past_index:
|
202 |
+
desc: null
|
203 |
+
value: -1
|
204 |
+
per_device_eval_batch_size:
|
205 |
+
desc: null
|
206 |
+
value: 2
|
207 |
+
per_device_train_batch_size:
|
208 |
+
desc: null
|
209 |
+
value: 2
|
210 |
+
per_gpu_eval_batch_size:
|
211 |
+
desc: null
|
212 |
+
value: null
|
213 |
+
per_gpu_train_batch_size:
|
214 |
+
desc: null
|
215 |
+
value: null
|
216 |
+
prediction_loss_only:
|
217 |
+
desc: null
|
218 |
+
value: false
|
219 |
+
preprocessing_num_workers:
|
220 |
+
desc: null
|
221 |
+
value: 96
|
222 |
+
push_to_hub:
|
223 |
+
desc: null
|
224 |
+
value: true
|
225 |
+
push_to_hub_model_id:
|
226 |
+
desc: null
|
227 |
+
value: ''
|
228 |
+
push_to_hub_organization:
|
229 |
+
desc: null
|
230 |
+
value: null
|
231 |
+
push_to_hub_token:
|
232 |
+
desc: null
|
233 |
+
value: null
|
234 |
+
remove_unused_columns:
|
235 |
+
desc: null
|
236 |
+
value: true
|
237 |
+
report_to:
|
238 |
+
desc: null
|
239 |
+
value:
|
240 |
+
- tensorboard
|
241 |
+
- wandb
|
242 |
+
resume_from_checkpoint:
|
243 |
+
desc: null
|
244 |
+
value: null
|
245 |
+
run_name:
|
246 |
+
desc: null
|
247 |
+
value: ./
|
248 |
+
save_on_each_node:
|
249 |
+
desc: null
|
250 |
+
value: false
|
251 |
+
save_optimizer:
|
252 |
+
desc: null
|
253 |
+
value: true
|
254 |
+
save_steps:
|
255 |
+
desc: null
|
256 |
+
value: 20000
|
257 |
+
save_strategy:
|
258 |
+
desc: null
|
259 |
+
value: IntervalStrategy.STEPS
|
260 |
+
save_total_limit:
|
261 |
+
desc: null
|
262 |
+
value: 5
|
263 |
+
seed:
|
264 |
+
desc: null
|
265 |
+
value: 42
|
266 |
+
sharded_ddp:
|
267 |
+
desc: null
|
268 |
+
value: []
|
269 |
+
skip_memory_metrics:
|
270 |
+
desc: null
|
271 |
+
value: true
|
272 |
+
tokenizer_name:
|
273 |
+
desc: null
|
274 |
+
value: ./
|
275 |
+
tpu_metrics_debug:
|
276 |
+
desc: null
|
277 |
+
value: false
|
278 |
+
tpu_num_cores:
|
279 |
+
desc: null
|
280 |
+
value: null
|
281 |
+
train_ref_file:
|
282 |
+
desc: null
|
283 |
+
value: null
|
284 |
+
use_fast_tokenizer:
|
285 |
+
desc: null
|
286 |
+
value: true
|
287 |
+
use_legacy_prediction_loop:
|
288 |
+
desc: null
|
289 |
+
value: false
|
290 |
+
validation_ref_file:
|
291 |
+
desc: null
|
292 |
+
value: null
|
293 |
+
validation_split_percentage:
|
294 |
+
desc: null
|
295 |
+
value: 5
|
296 |
+
warmup_ratio:
|
297 |
+
desc: null
|
298 |
+
value: 0.0
|
299 |
+
warmup_steps:
|
300 |
+
desc: null
|
301 |
+
value: 10000
|
302 |
+
weight_decay:
|
303 |
+
desc: null
|
304 |
+
value: 0.0095
|
wandb/run-20210714_234615-3p6vlfc3/files/output.log
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
/home/dat/pino/lib/python3.8/site-packages/jax/_src/numpy/lax_numpy.py:3132: UserWarning: Explicitly requested dtype <class 'jax._src.numpy.lax_numpy.int64'> requested in zeros is not available, and will be truncated to dtype int32. To enable more dtypes, set the jax_enable_x64 configuration option or the JAX_ENABLE_X64 shell environment variable. See https://github.com/google/jax#current-gotchas for more.
|
2 |
+
lax._check_user_dtype_supported(dtype, "zeros")
|
3 |
+
/home/dat/pino/lib/python3.8/site-packages/jax/lib/xla_bridge.py:386: UserWarning: jax.host_count has been renamed to jax.process_count. This alias will eventually be removed; please update your code.
|
4 |
+
warnings.warn(
|
5 |
+
/home/dat/pino/lib/python3.8/site-packages/jax/lib/xla_bridge.py:373: UserWarning: jax.host_id has been renamed to jax.process_index. This alias will eventually be removed; please update your code.
|
6 |
+
warnings.warn(
|
wandb/run-20210714_234615-3p6vlfc3/files/requirements.txt
ADDED
@@ -0,0 +1,94 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
absl-py==0.13.0
|
2 |
+
aiohttp==3.7.4.post0
|
3 |
+
astunparse==1.6.3
|
4 |
+
async-timeout==3.0.1
|
5 |
+
attrs==21.2.0
|
6 |
+
cachetools==4.2.2
|
7 |
+
certifi==2021.5.30
|
8 |
+
chardet==4.0.0
|
9 |
+
charset-normalizer==2.0.1
|
10 |
+
chex==0.0.8
|
11 |
+
click==8.0.1
|
12 |
+
configparser==5.0.2
|
13 |
+
cycler==0.10.0
|
14 |
+
datasets==1.9.1.dev0
|
15 |
+
dill==0.3.4
|
16 |
+
dm-tree==0.1.6
|
17 |
+
docker-pycreds==0.4.0
|
18 |
+
filelock==3.0.12
|
19 |
+
flatbuffers==1.12
|
20 |
+
flax==0.3.4
|
21 |
+
fsspec==2021.7.0
|
22 |
+
gast==0.4.0
|
23 |
+
gitdb==4.0.7
|
24 |
+
gitpython==3.1.18
|
25 |
+
google-auth-oauthlib==0.4.4
|
26 |
+
google-auth==1.32.1
|
27 |
+
google-pasta==0.2.0
|
28 |
+
grpcio==1.34.1
|
29 |
+
h5py==3.1.0
|
30 |
+
huggingface-hub==0.0.12
|
31 |
+
idna==3.2
|
32 |
+
install==1.3.4
|
33 |
+
jax==0.2.17
|
34 |
+
jaxlib==0.1.68
|
35 |
+
joblib==1.0.1
|
36 |
+
keras-nightly==2.5.0.dev2021032900
|
37 |
+
keras-preprocessing==1.1.2
|
38 |
+
kiwisolver==1.3.1
|
39 |
+
libtpu-nightly==0.1.dev20210615
|
40 |
+
markdown==3.3.4
|
41 |
+
matplotlib==3.4.2
|
42 |
+
msgpack==1.0.2
|
43 |
+
multidict==5.1.0
|
44 |
+
multiprocess==0.70.12.2
|
45 |
+
numpy==1.19.5
|
46 |
+
oauthlib==3.1.1
|
47 |
+
opt-einsum==3.3.0
|
48 |
+
optax==0.0.9
|
49 |
+
packaging==21.0
|
50 |
+
pandas==1.3.0
|
51 |
+
pathtools==0.1.2
|
52 |
+
pillow==8.3.1
|
53 |
+
pip==20.0.2
|
54 |
+
pkg-resources==0.0.0
|
55 |
+
promise==2.3
|
56 |
+
protobuf==3.17.3
|
57 |
+
psutil==5.8.0
|
58 |
+
pyarrow==4.0.1
|
59 |
+
pyasn1-modules==0.2.8
|
60 |
+
pyasn1==0.4.8
|
61 |
+
pyparsing==2.4.7
|
62 |
+
python-dateutil==2.8.1
|
63 |
+
pytz==2021.1
|
64 |
+
pyyaml==5.4.1
|
65 |
+
regex==2021.7.6
|
66 |
+
requests-oauthlib==1.3.0
|
67 |
+
requests==2.26.0
|
68 |
+
rsa==4.7.2
|
69 |
+
sacremoses==0.0.45
|
70 |
+
scipy==1.7.0
|
71 |
+
sentry-sdk==1.3.0
|
72 |
+
setuptools==44.0.0
|
73 |
+
shortuuid==1.0.1
|
74 |
+
six==1.15.0
|
75 |
+
smmap==4.0.0
|
76 |
+
subprocess32==3.5.4
|
77 |
+
tensorboard-data-server==0.6.1
|
78 |
+
tensorboard-plugin-wit==1.8.0
|
79 |
+
tensorboard==2.5.0
|
80 |
+
tensorflow-estimator==2.5.0
|
81 |
+
tensorflow==2.5.0
|
82 |
+
termcolor==1.1.0
|
83 |
+
tokenizers==0.10.3
|
84 |
+
toolz==0.11.1
|
85 |
+
tqdm==4.61.2
|
86 |
+
transformers==4.9.0.dev0
|
87 |
+
typing-extensions==3.7.4.3
|
88 |
+
urllib3==1.26.6
|
89 |
+
wandb==0.10.33
|
90 |
+
werkzeug==2.0.1
|
91 |
+
wheel==0.36.2
|
92 |
+
wrapt==1.12.1
|
93 |
+
xxhash==2.0.2
|
94 |
+
yarl==1.6.3
|