dat
commited on
Commit
·
9915204
1
Parent(s):
de71755
update all
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- config.json +1 -1
- events.out.tfevents.1626299647.t1v-n-f5c06ea1-w-0.587396.3.v2 +3 -0
- events.out.tfevents.1626301159.t1v-n-f5c06ea1-w-0.592040.3.v2 +3 -0
- events.out.tfevents.1626301759.t1v-n-f5c06ea1-w-0.595290.3.v2 +3 -0
- events.out.tfevents.1626302399.t1v-n-f5c06ea1-w-0.597542.3.v2 +3 -0
- events.out.tfevents.1626303499.t1v-n-f5c06ea1-w-0.600323.3.v2 +3 -0
- run.sh +5 -5
- run_mlm_flax.py +42 -28
- wandb/debug-internal.log +1 -1
- wandb/debug.log +1 -1
- wandb/latest-run +1 -1
- wandb/run-20210714_215408-3kpvz8se/files/config.yaml +307 -0
- wandb/run-20210714_215408-3kpvz8se/files/output.log +15 -0
- wandb/run-20210714_215408-3kpvz8se/files/requirements.txt +94 -0
- wandb/run-20210714_215408-3kpvz8se/files/wandb-metadata.json +48 -0
- wandb/run-20210714_215408-3kpvz8se/files/wandb-summary.json +1 -0
- wandb/run-20210714_215408-3kpvz8se/logs/debug-internal.log +142 -0
- wandb/run-20210714_215408-3kpvz8se/logs/debug.log +127 -0
- wandb/run-20210714_215408-3kpvz8se/run-3kpvz8se.wandb +0 -0
- wandb/run-20210714_221920-s091gfok/files/config.yaml +307 -0
- wandb/run-20210714_221920-s091gfok/files/output.log +39 -0
- wandb/run-20210714_221920-s091gfok/files/requirements.txt +94 -0
- wandb/run-20210714_221920-s091gfok/files/wandb-metadata.json +47 -0
- wandb/run-20210714_221920-s091gfok/files/wandb-summary.json +1 -0
- wandb/run-20210714_221920-s091gfok/logs/debug-internal.log +233 -0
- wandb/run-20210714_221920-s091gfok/logs/debug.log +119 -0
- wandb/run-20210714_221920-s091gfok/run-s091gfok.wandb +0 -0
- wandb/run-20210714_222920-2p7mu4rm/files/config.yaml +307 -0
- wandb/run-20210714_222920-2p7mu4rm/files/output.log +39 -0
- wandb/run-20210714_222920-2p7mu4rm/files/requirements.txt +94 -0
- wandb/run-20210714_222920-2p7mu4rm/files/wandb-metadata.json +47 -0
- wandb/run-20210714_222920-2p7mu4rm/files/wandb-summary.json +1 -0
- wandb/run-20210714_222920-2p7mu4rm/logs/debug-internal.log +232 -0
- wandb/run-20210714_222920-2p7mu4rm/logs/debug.log +119 -0
- wandb/run-20210714_222920-2p7mu4rm/run-2p7mu4rm.wandb +0 -0
- wandb/run-20210714_224000-1jvvynqa/files/config.yaml +307 -0
- wandb/run-20210714_224000-1jvvynqa/files/output.log +39 -0
- wandb/run-20210714_224000-1jvvynqa/files/requirements.txt +94 -0
- wandb/run-20210714_224000-1jvvynqa/files/wandb-metadata.json +47 -0
- wandb/run-20210714_224000-1jvvynqa/files/wandb-summary.json +1 -0
- wandb/run-20210714_224000-1jvvynqa/logs/debug-internal.log +236 -0
- wandb/run-20210714_224000-1jvvynqa/logs/debug.log +127 -0
- wandb/run-20210714_224000-1jvvynqa/run-1jvvynqa.wandb +0 -0
- wandb/run-20210714_225820-1dpoijkp/files/config.yaml +304 -0
- wandb/run-20210714_225820-1dpoijkp/files/output.log +6 -0
- wandb/run-20210714_225820-1dpoijkp/files/requirements.txt +94 -0
- wandb/run-20210714_225820-1dpoijkp/files/wandb-metadata.json +47 -0
- wandb/run-20210714_225820-1dpoijkp/files/wandb-summary.json +1 -0
- wandb/run-20210714_225820-1dpoijkp/logs/debug-internal.log +40 -0
- wandb/run-20210714_225820-1dpoijkp/logs/debug.log +25 -0
config.json
CHANGED
@@ -4,7 +4,7 @@
|
|
4 |
],
|
5 |
"attention_probs_dropout_prob": 0.1,
|
6 |
"attention_type": "block_sparse",
|
7 |
-
"block_size":
|
8 |
"bos_token_id": 1,
|
9 |
"eos_token_id": 2,
|
10 |
"gradient_checkpointing": false,
|
|
|
4 |
],
|
5 |
"attention_probs_dropout_prob": 0.1,
|
6 |
"attention_type": "block_sparse",
|
7 |
+
"block_size": 64,
|
8 |
"bos_token_id": 1,
|
9 |
"eos_token_id": 2,
|
10 |
"gradient_checkpointing": false,
|
events.out.tfevents.1626299647.t1v-n-f5c06ea1-w-0.587396.3.v2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:84890ea5bd3c73af594c93d00f787106810c8227126a97c436a63ef86502b93f
|
3 |
+
size 40
|
events.out.tfevents.1626301159.t1v-n-f5c06ea1-w-0.592040.3.v2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9b87389b3d84fa4c5e66f3f568af52dc3799c8e93ecd1c17d2757563eadf4b8a
|
3 |
+
size 40
|
events.out.tfevents.1626301759.t1v-n-f5c06ea1-w-0.595290.3.v2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7151f4506f8d7e2790068211ea6067e529caadb072896f77702e4e393e69bc8b
|
3 |
+
size 40
|
events.out.tfevents.1626302399.t1v-n-f5c06ea1-w-0.597542.3.v2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c317581746aa1f2878b10cfdd0724ead65d7384dc0003909dc09986654f0ca6d
|
3 |
+
size 40
|
events.out.tfevents.1626303499.t1v-n-f5c06ea1-w-0.600323.3.v2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:610b4725be7ba71faaadd24b5aa839e67235b0b211a2bf6c4d1da48931051a45
|
3 |
+
size 40
|
run.sh
CHANGED
@@ -10,12 +10,12 @@ python ./run_mlm_flax.py \
|
|
10 |
--tokenizer_name="./" \
|
11 |
--max_seq_length="4096" \
|
12 |
--weight_decay="0.0095" \
|
13 |
-
--warmup_steps="
|
14 |
--overwrite_output_dir \
|
15 |
--adam_beta1="0.9" \
|
16 |
--adam_beta2="0.98" \
|
17 |
-
--logging_steps="
|
18 |
-
--eval_steps="
|
19 |
--num_train_epochs="5" \
|
20 |
--preprocessing_num_workers="96" \
|
21 |
--save_steps="20000" \
|
@@ -23,9 +23,9 @@ python ./run_mlm_flax.py \
|
|
23 |
--per_device_train_batch_size="2" \
|
24 |
--per_device_eval_batch_size="2" \
|
25 |
--save_total_limit="5"\
|
26 |
-
--max_eval_samples="
|
27 |
--overwrite_cache False \
|
28 |
-
--gradient_accumulation_steps="
|
29 |
#--resume_from_checkpoint="./"\
|
30 |
#--adafactor \
|
31 |
#--dtype="bfloat16" \
|
|
|
10 |
--tokenizer_name="./" \
|
11 |
--max_seq_length="4096" \
|
12 |
--weight_decay="0.0095" \
|
13 |
+
--warmup_steps="10000" \
|
14 |
--overwrite_output_dir \
|
15 |
--adam_beta1="0.9" \
|
16 |
--adam_beta2="0.98" \
|
17 |
+
--logging_steps="500" \
|
18 |
+
--eval_steps="20000" \
|
19 |
--num_train_epochs="5" \
|
20 |
--preprocessing_num_workers="96" \
|
21 |
--save_steps="20000" \
|
|
|
23 |
--per_device_train_batch_size="2" \
|
24 |
--per_device_eval_batch_size="2" \
|
25 |
--save_total_limit="5"\
|
26 |
+
--max_eval_samples="2000"\
|
27 |
--overwrite_cache False \
|
28 |
+
--gradient_accumulation_steps="8" \
|
29 |
#--resume_from_checkpoint="./"\
|
30 |
#--adafactor \
|
31 |
#--dtype="bfloat16" \
|
run_mlm_flax.py
CHANGED
@@ -33,6 +33,8 @@ from typing import Dict, List, Optional, Tuple
|
|
33 |
import numpy as np
|
34 |
from datasets import load_dataset, DatasetDict
|
35 |
from tqdm import tqdm
|
|
|
|
|
36 |
|
37 |
import flax
|
38 |
import jax
|
@@ -55,11 +57,13 @@ from transformers import (
|
|
55 |
set_seed,
|
56 |
)
|
57 |
import json
|
58 |
-
|
|
|
59 |
from flax.jax_utils import unreplicate
|
60 |
from flax.training.checkpoints import save_checkpoint, restore_checkpoint
|
61 |
from importlib.util import find_spec
|
62 |
from flax.serialization import to_bytes, from_bytes
|
|
|
63 |
|
64 |
MODEL_CONFIG_CLASSES = list(FLAX_MODEL_FOR_MASKED_LM_MAPPING.keys())
|
65 |
MODEL_TYPES = tuple(conf.model_type for conf in MODEL_CONFIG_CLASSES)
|
@@ -275,6 +279,35 @@ def write_eval_metric(summary_writer, eval_metrics, step):
|
|
275 |
summary_writer.scalar(f"eval_{metric_name}", value, step)
|
276 |
|
277 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
278 |
def _zeros_tree_like(inp_tree):
|
279 |
return jax.tree_map(jnp.zeros_like, inp_tree)
|
280 |
|
@@ -291,8 +324,11 @@ def fake_update(state):
|
|
291 |
def reinstantiate_states(opt_state):
|
292 |
new_state = []
|
293 |
for state in opt_state:
|
294 |
-
|
295 |
-
|
|
|
|
|
|
|
296 |
return new_state
|
297 |
|
298 |
def restore_model_checkpoint(save_dir, state):
|
@@ -318,27 +354,6 @@ def restore_model_checkpoint(save_dir, state):
|
|
318 |
|
319 |
return state.replace(step=step, params=params, opt_state=opt_state)
|
320 |
|
321 |
-
def save_model_checkpoint(model, save_dir, state, with_opt:bool=True, push_to_hub:bool=False):
|
322 |
-
"""
|
323 |
-
If `push_to_hub` is True, will save to `save_dir`. Otherwise will save to `save_dir/ckpt-{step}`.
|
324 |
-
"""
|
325 |
-
state = jax_utils.unreplicate(state)
|
326 |
-
logger.info(f"SAVING CHECKPOINT IN {save_dir}...")
|
327 |
-
if not push_to_hub:
|
328 |
-
save_dir = f"{save_dir}/ckpt-{mb_item(state.step)-1}"
|
329 |
-
model.save_pretrained(
|
330 |
-
save_dir,
|
331 |
-
params=state.params,
|
332 |
-
push_to_hub=push_to_hub,
|
333 |
-
commit_message=f"Saving weights and logs at step {mb_item(state.step)-1}",
|
334 |
-
)
|
335 |
-
if with_opt:
|
336 |
-
with open(os.path.join(save_dir, "opt_state.msgpack"), "wb") as f:
|
337 |
-
f.write(to_bytes(state.opt_state))
|
338 |
-
with open(os.path.join(save_dir, "training_state.json"), "w") as f:
|
339 |
-
json.dump({"step": state.step.item()}, f)
|
340 |
-
logger.info("checkpoint saved")
|
341 |
-
|
342 |
def rotate_checkpoints(ckpt_dir:str, save_total_limit:int):
|
343 |
"Removes older checkpoints so that `save_total_limit` checkpoints are kept"
|
344 |
# TODO: what to remove is decided using step number only, we might want to improve that
|
@@ -351,7 +366,6 @@ def rotate_checkpoints(ckpt_dir:str, save_total_limit:int):
|
|
351 |
shutil.rmtree(ckpt)
|
352 |
|
353 |
|
354 |
-
|
355 |
if __name__ == "__main__":
|
356 |
# See all possible arguments in src/transformers/training_args.py
|
357 |
# or by passing the --help flag to this script.
|
@@ -513,7 +527,7 @@ if __name__ == "__main__":
|
|
513 |
tokenized_datasets = DatasetDict.load_from_disk("/data/tokenized_data")
|
514 |
logger.info("Setting max validation examples to ")
|
515 |
print(f"Number of validation examples {data_args.max_eval_samples}")
|
516 |
-
tokenized_datasets["train"]= tokenized_datasets["train"].select(range(20000))
|
517 |
if data_args.max_eval_samples is not None:
|
518 |
tokenized_datasets["validation"] = tokenized_datasets["validation"].select(range(data_args.max_eval_samples))
|
519 |
else:
|
@@ -687,7 +701,6 @@ if __name__ == "__main__":
|
|
687 |
learning_rate=linear_decay_lr_schedule_fn,
|
688 |
)
|
689 |
else:
|
690 |
-
from optax import clip_by_global_norm
|
691 |
optimizer = optax.adamw(
|
692 |
learning_rate=linear_decay_lr_schedule_fn,
|
693 |
b1=training_args.adam_beta1,
|
@@ -777,7 +790,8 @@ if __name__ == "__main__":
|
|
777 |
steps_per_epoch = len(tokenized_datasets["train"]) // train_batch_size
|
778 |
resume_epoch = resume_step // (steps_per_epoch * grad_accum_steps)
|
779 |
epochs = tqdm(range(num_epochs), desc=f"Epoch ... ({resume_epoch+1}/{num_epochs})", position=0)
|
780 |
-
|
|
|
781 |
for epoch in epochs:
|
782 |
# ======================== Training ================================
|
783 |
train_start = time.time()
|
|
|
33 |
import numpy as np
|
34 |
from datasets import load_dataset, DatasetDict
|
35 |
from tqdm import tqdm
|
36 |
+
from optax import clip_by_global_norm
|
37 |
+
|
38 |
|
39 |
import flax
|
40 |
import jax
|
|
|
57 |
set_seed,
|
58 |
)
|
59 |
import json
|
60 |
+
import shutil
|
61 |
+
|
62 |
from flax.jax_utils import unreplicate
|
63 |
from flax.training.checkpoints import save_checkpoint, restore_checkpoint
|
64 |
from importlib.util import find_spec
|
65 |
from flax.serialization import to_bytes, from_bytes
|
66 |
+
import jax.profiler
|
67 |
|
68 |
MODEL_CONFIG_CLASSES = list(FLAX_MODEL_FOR_MASKED_LM_MAPPING.keys())
|
69 |
MODEL_TYPES = tuple(conf.model_type for conf in MODEL_CONFIG_CLASSES)
|
|
|
279 |
summary_writer.scalar(f"eval_{metric_name}", value, step)
|
280 |
|
281 |
|
282 |
+
# utils
|
283 |
+
def mb_item(x):
|
284 |
+
return x.item() if hasattr(x, "item") else x
|
285 |
+
|
286 |
+
#checkpoint functions
|
287 |
+
def save_model_checkpoint(model, save_dir, state, with_opt:bool=True, push_to_hub:bool=False):
|
288 |
+
"""
|
289 |
+
If `push_to_hub` is True, will save to `save_dir`. Otherwise will save to `save_dir/ckpt-{step}`.
|
290 |
+
"""
|
291 |
+
state = jax_utils.unreplicate(state)
|
292 |
+
logger.info(f"SAVING CHECKPOINT IN {save_dir}...")
|
293 |
+
if not push_to_hub:
|
294 |
+
save_dir = f"{save_dir}/ckpt-{mb_item(state.step)-1}"
|
295 |
+
model.save_pretrained(
|
296 |
+
save_dir,
|
297 |
+
params=state.params,
|
298 |
+
push_to_hub=push_to_hub,
|
299 |
+
commit_message=f"Saving weights and logs at step {mb_item(state.step)-1}",
|
300 |
+
)
|
301 |
+
if with_opt:
|
302 |
+
with open(os.path.join(save_dir, "opt_state.msgpack"), "wb") as f:
|
303 |
+
f.write(to_bytes(state.opt_state))
|
304 |
+
with open(os.path.join(save_dir, "training_state.json"), "w") as f:
|
305 |
+
json.dump({"step": state.step.item()}, f)
|
306 |
+
logger.info("checkpoint saved")
|
307 |
+
|
308 |
+
# this is added to make resuming from checkpoint to work with adafactor
|
309 |
+
# to be removed when issue is fixed
|
310 |
+
# notice that adafactor state is perturbed by fake_update
|
311 |
def _zeros_tree_like(inp_tree):
|
312 |
return jax.tree_map(jnp.zeros_like, inp_tree)
|
313 |
|
|
|
324 |
def reinstantiate_states(opt_state):
|
325 |
new_state = []
|
326 |
for state in opt_state:
|
327 |
+
if isinstance(state, list):
|
328 |
+
new_state.append(reinstantiate_states(state))
|
329 |
+
else:
|
330 |
+
cls = getattr(optax, type(state).__name__)
|
331 |
+
new_state.append(cls(**{k:getattr(state, k) for k in state._fields}))
|
332 |
return new_state
|
333 |
|
334 |
def restore_model_checkpoint(save_dir, state):
|
|
|
354 |
|
355 |
return state.replace(step=step, params=params, opt_state=opt_state)
|
356 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
357 |
def rotate_checkpoints(ckpt_dir:str, save_total_limit:int):
|
358 |
"Removes older checkpoints so that `save_total_limit` checkpoints are kept"
|
359 |
# TODO: what to remove is decided using step number only, we might want to improve that
|
|
|
366 |
shutil.rmtree(ckpt)
|
367 |
|
368 |
|
|
|
369 |
if __name__ == "__main__":
|
370 |
# See all possible arguments in src/transformers/training_args.py
|
371 |
# or by passing the --help flag to this script.
|
|
|
527 |
tokenized_datasets = DatasetDict.load_from_disk("/data/tokenized_data")
|
528 |
logger.info("Setting max validation examples to ")
|
529 |
print(f"Number of validation examples {data_args.max_eval_samples}")
|
530 |
+
#tokenized_datasets["train"]= tokenized_datasets["train"].select(range(20000))
|
531 |
if data_args.max_eval_samples is not None:
|
532 |
tokenized_datasets["validation"] = tokenized_datasets["validation"].select(range(data_args.max_eval_samples))
|
533 |
else:
|
|
|
701 |
learning_rate=linear_decay_lr_schedule_fn,
|
702 |
)
|
703 |
else:
|
|
|
704 |
optimizer = optax.adamw(
|
705 |
learning_rate=linear_decay_lr_schedule_fn,
|
706 |
b1=training_args.adam_beta1,
|
|
|
790 |
steps_per_epoch = len(tokenized_datasets["train"]) // train_batch_size
|
791 |
resume_epoch = resume_step // (steps_per_epoch * grad_accum_steps)
|
792 |
epochs = tqdm(range(num_epochs), desc=f"Epoch ... ({resume_epoch+1}/{num_epochs})", position=0)
|
793 |
+
if resume_step != 0:
|
794 |
+
logger.info(f"Skipping to epoch {resume_epoch} step {resume_step // grad_accum_steps}")
|
795 |
for epoch in epochs:
|
796 |
# ======================== Training ================================
|
797 |
train_start = time.time()
|
wandb/debug-internal.log
CHANGED
@@ -1 +1 @@
|
|
1 |
-
run-
|
|
|
1 |
+
run-20210714_225820-1dpoijkp/logs/debug-internal.log
|
wandb/debug.log
CHANGED
@@ -1 +1 @@
|
|
1 |
-
run-
|
|
|
1 |
+
run-20210714_225820-1dpoijkp/logs/debug.log
|
wandb/latest-run
CHANGED
@@ -1 +1 @@
|
|
1 |
-
run-
|
|
|
1 |
+
run-20210714_225820-1dpoijkp
|
wandb/run-20210714_215408-3kpvz8se/files/config.yaml
ADDED
@@ -0,0 +1,307 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
wandb_version: 1
|
2 |
+
|
3 |
+
_wandb:
|
4 |
+
desc: null
|
5 |
+
value:
|
6 |
+
cli_version: 0.10.33
|
7 |
+
framework: huggingface
|
8 |
+
huggingface_version: 4.9.0.dev0
|
9 |
+
is_jupyter_run: false
|
10 |
+
is_kaggle_kernel: false
|
11 |
+
python_version: 3.8.10
|
12 |
+
t:
|
13 |
+
1:
|
14 |
+
- 3
|
15 |
+
- 11
|
16 |
+
2:
|
17 |
+
- 3
|
18 |
+
- 11
|
19 |
+
4: 3.8.10
|
20 |
+
5: 0.10.33
|
21 |
+
6: 4.9.0.dev0
|
22 |
+
8:
|
23 |
+
- 5
|
24 |
+
adafactor:
|
25 |
+
desc: null
|
26 |
+
value: false
|
27 |
+
adam_beta1:
|
28 |
+
desc: null
|
29 |
+
value: 0.9
|
30 |
+
adam_beta2:
|
31 |
+
desc: null
|
32 |
+
value: 0.98
|
33 |
+
adam_epsilon:
|
34 |
+
desc: null
|
35 |
+
value: 1.0e-08
|
36 |
+
cache_dir:
|
37 |
+
desc: null
|
38 |
+
value: null
|
39 |
+
config_name:
|
40 |
+
desc: null
|
41 |
+
value: ./
|
42 |
+
dataloader_drop_last:
|
43 |
+
desc: null
|
44 |
+
value: false
|
45 |
+
dataloader_num_workers:
|
46 |
+
desc: null
|
47 |
+
value: 0
|
48 |
+
dataloader_pin_memory:
|
49 |
+
desc: null
|
50 |
+
value: true
|
51 |
+
dataset_config_name:
|
52 |
+
desc: null
|
53 |
+
value: null
|
54 |
+
dataset_name:
|
55 |
+
desc: null
|
56 |
+
value: null
|
57 |
+
ddp_find_unused_parameters:
|
58 |
+
desc: null
|
59 |
+
value: null
|
60 |
+
debug:
|
61 |
+
desc: null
|
62 |
+
value: []
|
63 |
+
deepspeed:
|
64 |
+
desc: null
|
65 |
+
value: null
|
66 |
+
disable_tqdm:
|
67 |
+
desc: null
|
68 |
+
value: false
|
69 |
+
do_eval:
|
70 |
+
desc: null
|
71 |
+
value: false
|
72 |
+
do_predict:
|
73 |
+
desc: null
|
74 |
+
value: false
|
75 |
+
do_train:
|
76 |
+
desc: null
|
77 |
+
value: false
|
78 |
+
dtype:
|
79 |
+
desc: null
|
80 |
+
value: float32
|
81 |
+
eval_accumulation_steps:
|
82 |
+
desc: null
|
83 |
+
value: null
|
84 |
+
eval_steps:
|
85 |
+
desc: null
|
86 |
+
value: 500
|
87 |
+
evaluation_strategy:
|
88 |
+
desc: null
|
89 |
+
value: IntervalStrategy.NO
|
90 |
+
fp16:
|
91 |
+
desc: null
|
92 |
+
value: false
|
93 |
+
fp16_backend:
|
94 |
+
desc: null
|
95 |
+
value: auto
|
96 |
+
fp16_full_eval:
|
97 |
+
desc: null
|
98 |
+
value: false
|
99 |
+
fp16_opt_level:
|
100 |
+
desc: null
|
101 |
+
value: O1
|
102 |
+
gradient_accumulation_steps:
|
103 |
+
desc: null
|
104 |
+
value: 2
|
105 |
+
greater_is_better:
|
106 |
+
desc: null
|
107 |
+
value: null
|
108 |
+
group_by_length:
|
109 |
+
desc: null
|
110 |
+
value: false
|
111 |
+
ignore_data_skip:
|
112 |
+
desc: null
|
113 |
+
value: false
|
114 |
+
label_names:
|
115 |
+
desc: null
|
116 |
+
value: null
|
117 |
+
label_smoothing_factor:
|
118 |
+
desc: null
|
119 |
+
value: 0.0
|
120 |
+
learning_rate:
|
121 |
+
desc: null
|
122 |
+
value: 5.0e-05
|
123 |
+
length_column_name:
|
124 |
+
desc: null
|
125 |
+
value: length
|
126 |
+
line_by_line:
|
127 |
+
desc: null
|
128 |
+
value: false
|
129 |
+
load_best_model_at_end:
|
130 |
+
desc: null
|
131 |
+
value: false
|
132 |
+
local_rank:
|
133 |
+
desc: null
|
134 |
+
value: -1
|
135 |
+
log_level:
|
136 |
+
desc: null
|
137 |
+
value: -1
|
138 |
+
log_level_replica:
|
139 |
+
desc: null
|
140 |
+
value: -1
|
141 |
+
log_on_each_node:
|
142 |
+
desc: null
|
143 |
+
value: true
|
144 |
+
logging_dir:
|
145 |
+
desc: null
|
146 |
+
value: ./runs/Jul14_21-54-01_t1v-n-f5c06ea1-w-0
|
147 |
+
logging_first_step:
|
148 |
+
desc: null
|
149 |
+
value: false
|
150 |
+
logging_steps:
|
151 |
+
desc: null
|
152 |
+
value: 250
|
153 |
+
logging_strategy:
|
154 |
+
desc: null
|
155 |
+
value: IntervalStrategy.STEPS
|
156 |
+
lr_scheduler_type:
|
157 |
+
desc: null
|
158 |
+
value: SchedulerType.LINEAR
|
159 |
+
max_eval_samples:
|
160 |
+
desc: null
|
161 |
+
value: 500
|
162 |
+
max_grad_norm:
|
163 |
+
desc: null
|
164 |
+
value: 1.0
|
165 |
+
max_seq_length:
|
166 |
+
desc: null
|
167 |
+
value: 4096
|
168 |
+
max_steps:
|
169 |
+
desc: null
|
170 |
+
value: -1
|
171 |
+
metric_for_best_model:
|
172 |
+
desc: null
|
173 |
+
value: null
|
174 |
+
mlm_probability:
|
175 |
+
desc: null
|
176 |
+
value: 0.15
|
177 |
+
model_name_or_path:
|
178 |
+
desc: null
|
179 |
+
value: null
|
180 |
+
model_type:
|
181 |
+
desc: null
|
182 |
+
value: big_bird
|
183 |
+
mp_parameters:
|
184 |
+
desc: null
|
185 |
+
value: ''
|
186 |
+
no_cuda:
|
187 |
+
desc: null
|
188 |
+
value: false
|
189 |
+
num_train_epochs:
|
190 |
+
desc: null
|
191 |
+
value: 5.0
|
192 |
+
output_dir:
|
193 |
+
desc: null
|
194 |
+
value: ./
|
195 |
+
overwrite_cache:
|
196 |
+
desc: null
|
197 |
+
value: false
|
198 |
+
overwrite_output_dir:
|
199 |
+
desc: null
|
200 |
+
value: true
|
201 |
+
pad_to_max_length:
|
202 |
+
desc: null
|
203 |
+
value: false
|
204 |
+
past_index:
|
205 |
+
desc: null
|
206 |
+
value: -1
|
207 |
+
per_device_eval_batch_size:
|
208 |
+
desc: null
|
209 |
+
value: 2
|
210 |
+
per_device_train_batch_size:
|
211 |
+
desc: null
|
212 |
+
value: 2
|
213 |
+
per_gpu_eval_batch_size:
|
214 |
+
desc: null
|
215 |
+
value: null
|
216 |
+
per_gpu_train_batch_size:
|
217 |
+
desc: null
|
218 |
+
value: null
|
219 |
+
prediction_loss_only:
|
220 |
+
desc: null
|
221 |
+
value: false
|
222 |
+
preprocessing_num_workers:
|
223 |
+
desc: null
|
224 |
+
value: 96
|
225 |
+
push_to_hub:
|
226 |
+
desc: null
|
227 |
+
value: true
|
228 |
+
push_to_hub_model_id:
|
229 |
+
desc: null
|
230 |
+
value: ''
|
231 |
+
push_to_hub_organization:
|
232 |
+
desc: null
|
233 |
+
value: null
|
234 |
+
push_to_hub_token:
|
235 |
+
desc: null
|
236 |
+
value: null
|
237 |
+
remove_unused_columns:
|
238 |
+
desc: null
|
239 |
+
value: true
|
240 |
+
report_to:
|
241 |
+
desc: null
|
242 |
+
value:
|
243 |
+
- tensorboard
|
244 |
+
- wandb
|
245 |
+
resume_from_checkpoint:
|
246 |
+
desc: null
|
247 |
+
value: ./
|
248 |
+
run_name:
|
249 |
+
desc: null
|
250 |
+
value: ./
|
251 |
+
save_on_each_node:
|
252 |
+
desc: null
|
253 |
+
value: false
|
254 |
+
save_optimizer:
|
255 |
+
desc: null
|
256 |
+
value: true
|
257 |
+
save_steps:
|
258 |
+
desc: null
|
259 |
+
value: 20000
|
260 |
+
save_strategy:
|
261 |
+
desc: null
|
262 |
+
value: IntervalStrategy.STEPS
|
263 |
+
save_total_limit:
|
264 |
+
desc: null
|
265 |
+
value: 5
|
266 |
+
seed:
|
267 |
+
desc: null
|
268 |
+
value: 42
|
269 |
+
sharded_ddp:
|
270 |
+
desc: null
|
271 |
+
value: []
|
272 |
+
skip_memory_metrics:
|
273 |
+
desc: null
|
274 |
+
value: true
|
275 |
+
tokenizer_name:
|
276 |
+
desc: null
|
277 |
+
value: ./
|
278 |
+
tpu_metrics_debug:
|
279 |
+
desc: null
|
280 |
+
value: false
|
281 |
+
tpu_num_cores:
|
282 |
+
desc: null
|
283 |
+
value: null
|
284 |
+
train_ref_file:
|
285 |
+
desc: null
|
286 |
+
value: null
|
287 |
+
use_fast_tokenizer:
|
288 |
+
desc: null
|
289 |
+
value: true
|
290 |
+
use_legacy_prediction_loop:
|
291 |
+
desc: null
|
292 |
+
value: false
|
293 |
+
validation_ref_file:
|
294 |
+
desc: null
|
295 |
+
value: null
|
296 |
+
validation_split_percentage:
|
297 |
+
desc: null
|
298 |
+
value: 5
|
299 |
+
warmup_ratio:
|
300 |
+
desc: null
|
301 |
+
value: 0.0
|
302 |
+
warmup_steps:
|
303 |
+
desc: null
|
304 |
+
value: 5000
|
305 |
+
weight_decay:
|
306 |
+
desc: null
|
307 |
+
value: 0.0095
|
wandb/run-20210714_215408-3kpvz8se/files/output.log
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[21:54:22] - INFO - absl - A polynomial schedule was set with a non-positive `transition_steps` value; this results in a constant schedule with value `init_value`.
|
2 |
+
/home/dat/pino/lib/python3.8/site-packages/jax/_src/numpy/lax_numpy.py:3132: UserWarning: Explicitly requested dtype <class 'jax._src.numpy.lax_numpy.int64'> requested in zeros is not available, and will be truncated to dtype int32. To enable more dtypes, set the jax_enable_x64 configuration option or the JAX_ENABLE_X64 shell environment variable. See https://github.com/google/jax#current-gotchas for more.
|
3 |
+
lax._check_user_dtype_supported(dtype, "zeros")
|
4 |
+
[21:54:23] - INFO - __main__ - RESTORING CHECKPOINT FROM ./...
|
5 |
+
tcmalloc: large alloc 1530273792 bytes == 0xd9eda000 @ 0x7f6c0ba41680 0x7f6c0ba62824 0x5f7b11 0x648631 0x5c38e6 0x4f30e6 0x64ee88 0x505653 0x56acb6 0x568d9a 0x5f5b33 0x56aadf 0x568d9a 0x68cdc7 0x67e161 0x67e1df 0x67e281 0x67e627 0x6b6e62 0x6b71ed 0x7f6c0b8560b3 0x5f96de
|
6 |
+
restoring state of multisteps optimizer
|
7 |
+
[21:54:26] - INFO - __main__ - checkpoint restored
|
8 |
+
Traceback (most recent call last):
|
9 |
+
File "./run_mlm_flax.py", line 712, in <module>
|
10 |
+
state = restore_model_checkpoint(training_args.resume_from_checkpoint, state)
|
11 |
+
File "./run_mlm_flax.py", line 314, in restore_model_checkpoint
|
12 |
+
inner_opt_state = reinstantiate_states(opt_state.inner_opt_state)
|
13 |
+
File "./run_mlm_flax.py", line 294, in reinstantiate_states
|
14 |
+
cls = getattr(optax, type(state).__name__)
|
15 |
+
AttributeError: module 'optax' has no attribute 'list'
|
wandb/run-20210714_215408-3kpvz8se/files/requirements.txt
ADDED
@@ -0,0 +1,94 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
absl-py==0.13.0
|
2 |
+
aiohttp==3.7.4.post0
|
3 |
+
astunparse==1.6.3
|
4 |
+
async-timeout==3.0.1
|
5 |
+
attrs==21.2.0
|
6 |
+
cachetools==4.2.2
|
7 |
+
certifi==2021.5.30
|
8 |
+
chardet==4.0.0
|
9 |
+
charset-normalizer==2.0.1
|
10 |
+
chex==0.0.8
|
11 |
+
click==8.0.1
|
12 |
+
configparser==5.0.2
|
13 |
+
cycler==0.10.0
|
14 |
+
datasets==1.9.1.dev0
|
15 |
+
dill==0.3.4
|
16 |
+
dm-tree==0.1.6
|
17 |
+
docker-pycreds==0.4.0
|
18 |
+
filelock==3.0.12
|
19 |
+
flatbuffers==1.12
|
20 |
+
flax==0.3.4
|
21 |
+
fsspec==2021.7.0
|
22 |
+
gast==0.4.0
|
23 |
+
gitdb==4.0.7
|
24 |
+
gitpython==3.1.18
|
25 |
+
google-auth-oauthlib==0.4.4
|
26 |
+
google-auth==1.32.1
|
27 |
+
google-pasta==0.2.0
|
28 |
+
grpcio==1.34.1
|
29 |
+
h5py==3.1.0
|
30 |
+
huggingface-hub==0.0.12
|
31 |
+
idna==3.2
|
32 |
+
install==1.3.4
|
33 |
+
jax==0.2.17
|
34 |
+
jaxlib==0.1.68
|
35 |
+
joblib==1.0.1
|
36 |
+
keras-nightly==2.5.0.dev2021032900
|
37 |
+
keras-preprocessing==1.1.2
|
38 |
+
kiwisolver==1.3.1
|
39 |
+
libtpu-nightly==0.1.dev20210615
|
40 |
+
markdown==3.3.4
|
41 |
+
matplotlib==3.4.2
|
42 |
+
msgpack==1.0.2
|
43 |
+
multidict==5.1.0
|
44 |
+
multiprocess==0.70.12.2
|
45 |
+
numpy==1.19.5
|
46 |
+
oauthlib==3.1.1
|
47 |
+
opt-einsum==3.3.0
|
48 |
+
optax==0.0.9
|
49 |
+
packaging==21.0
|
50 |
+
pandas==1.3.0
|
51 |
+
pathtools==0.1.2
|
52 |
+
pillow==8.3.1
|
53 |
+
pip==20.0.2
|
54 |
+
pkg-resources==0.0.0
|
55 |
+
promise==2.3
|
56 |
+
protobuf==3.17.3
|
57 |
+
psutil==5.8.0
|
58 |
+
pyarrow==4.0.1
|
59 |
+
pyasn1-modules==0.2.8
|
60 |
+
pyasn1==0.4.8
|
61 |
+
pyparsing==2.4.7
|
62 |
+
python-dateutil==2.8.1
|
63 |
+
pytz==2021.1
|
64 |
+
pyyaml==5.4.1
|
65 |
+
regex==2021.7.6
|
66 |
+
requests-oauthlib==1.3.0
|
67 |
+
requests==2.26.0
|
68 |
+
rsa==4.7.2
|
69 |
+
sacremoses==0.0.45
|
70 |
+
scipy==1.7.0
|
71 |
+
sentry-sdk==1.3.0
|
72 |
+
setuptools==44.0.0
|
73 |
+
shortuuid==1.0.1
|
74 |
+
six==1.15.0
|
75 |
+
smmap==4.0.0
|
76 |
+
subprocess32==3.5.4
|
77 |
+
tensorboard-data-server==0.6.1
|
78 |
+
tensorboard-plugin-wit==1.8.0
|
79 |
+
tensorboard==2.5.0
|
80 |
+
tensorflow-estimator==2.5.0
|
81 |
+
tensorflow==2.5.0
|
82 |
+
termcolor==1.1.0
|
83 |
+
tokenizers==0.10.3
|
84 |
+
toolz==0.11.1
|
85 |
+
tqdm==4.61.2
|
86 |
+
transformers==4.9.0.dev0
|
87 |
+
typing-extensions==3.7.4.3
|
88 |
+
urllib3==1.26.6
|
89 |
+
wandb==0.10.33
|
90 |
+
werkzeug==2.0.1
|
91 |
+
wheel==0.36.2
|
92 |
+
wrapt==1.12.1
|
93 |
+
xxhash==2.0.2
|
94 |
+
yarl==1.6.3
|
wandb/run-20210714_215408-3kpvz8se/files/wandb-metadata.json
ADDED
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"os": "Linux-5.4.0-1043-gcp-x86_64-with-glibc2.29",
|
3 |
+
"python": "3.8.10",
|
4 |
+
"heartbeatAt": "2021-07-14T21:54:10.349764",
|
5 |
+
"startedAt": "2021-07-14T21:54:08.359450",
|
6 |
+
"docker": null,
|
7 |
+
"cpu_count": 96,
|
8 |
+
"cuda": null,
|
9 |
+
"args": [
|
10 |
+
"--push_to_hub",
|
11 |
+
"--output_dir=./",
|
12 |
+
"--model_type=big_bird",
|
13 |
+
"--config_name=./",
|
14 |
+
"--tokenizer_name=./",
|
15 |
+
"--max_seq_length=4096",
|
16 |
+
"--weight_decay=0.0095",
|
17 |
+
"--warmup_steps=5000",
|
18 |
+
"--overwrite_output_dir",
|
19 |
+
"--adam_beta1=0.9",
|
20 |
+
"--adam_beta2=0.98",
|
21 |
+
"--logging_steps=250",
|
22 |
+
"--eval_steps=500",
|
23 |
+
"--num_train_epochs=5",
|
24 |
+
"--preprocessing_num_workers=96",
|
25 |
+
"--save_steps=20000",
|
26 |
+
"--learning_rate=5e-5",
|
27 |
+
"--per_device_train_batch_size=2",
|
28 |
+
"--per_device_eval_batch_size=2",
|
29 |
+
"--save_total_limit=5",
|
30 |
+
"--max_eval_samples=500",
|
31 |
+
"--overwrite_cache",
|
32 |
+
"False",
|
33 |
+
"--gradient_accumulation_steps=2",
|
34 |
+
"--resume_from_checkpoint=./"
|
35 |
+
],
|
36 |
+
"state": "running",
|
37 |
+
"program": "./run_mlm_flax.py",
|
38 |
+
"codePath": "run_mlm_flax.py",
|
39 |
+
"git": {
|
40 |
+
"remote": "https://huggingface.co/flax-community/pino-roberta-base",
|
41 |
+
"commit": "de71755314d4aa3ab182072c6f0be3de9798a66b"
|
42 |
+
},
|
43 |
+
"email": null,
|
44 |
+
"root": "/home/dat/pino-roberta-base",
|
45 |
+
"host": "t1v-n-f5c06ea1-w-0",
|
46 |
+
"username": "dat",
|
47 |
+
"executable": "/home/dat/pino/bin/python"
|
48 |
+
}
|
wandb/run-20210714_215408-3kpvz8se/files/wandb-summary.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{}
|
wandb/run-20210714_215408-3kpvz8se/logs/debug-internal.log
ADDED
@@ -0,0 +1,142 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
2021-07-14 21:54:09,008 INFO MainThread:588654 [internal.py:wandb_internal():88] W&B internal server running at pid: 588654, started at: 2021-07-14 21:54:09.008494
|
2 |
+
2021-07-14 21:54:09,011 DEBUG HandlerThread:588654 [handler.py:handle_request():124] handle_request: check_version
|
3 |
+
2021-07-14 21:54:09,011 INFO WriterThread:588654 [datastore.py:open_for_write():80] open: /home/dat/pino-roberta-base/wandb/run-20210714_215408-3kpvz8se/run-3kpvz8se.wandb
|
4 |
+
2021-07-14 21:54:09,012 DEBUG SenderThread:588654 [sender.py:send():179] send: header
|
5 |
+
2021-07-14 21:54:09,012 DEBUG SenderThread:588654 [sender.py:send_request():193] send_request: check_version
|
6 |
+
2021-07-14 21:54:09,050 DEBUG SenderThread:588654 [sender.py:send():179] send: run
|
7 |
+
2021-07-14 21:54:09,234 INFO SenderThread:588654 [dir_watcher.py:__init__():168] watching files in: /home/dat/pino-roberta-base/wandb/run-20210714_215408-3kpvz8se/files
|
8 |
+
2021-07-14 21:54:09,234 INFO SenderThread:588654 [sender.py:_start_run_threads():716] run started: 3kpvz8se with start time 1626299648
|
9 |
+
2021-07-14 21:54:09,234 DEBUG SenderThread:588654 [sender.py:send():179] send: summary
|
10 |
+
2021-07-14 21:54:09,234 INFO SenderThread:588654 [sender.py:_save_file():841] saving file wandb-summary.json with policy end
|
11 |
+
2021-07-14 21:54:09,235 DEBUG HandlerThread:588654 [handler.py:handle_request():124] handle_request: run_start
|
12 |
+
2021-07-14 21:54:10,238 INFO Thread-8 :588654 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210714_215408-3kpvz8se/files/wandb-summary.json
|
13 |
+
2021-07-14 21:54:10,349 DEBUG HandlerThread:588654 [meta.py:__init__():39] meta init
|
14 |
+
2021-07-14 21:54:10,349 DEBUG HandlerThread:588654 [meta.py:__init__():53] meta init done
|
15 |
+
2021-07-14 21:54:10,349 DEBUG HandlerThread:588654 [meta.py:probe():210] probe
|
16 |
+
2021-07-14 21:54:10,351 DEBUG HandlerThread:588654 [meta.py:_setup_git():200] setup git
|
17 |
+
2021-07-14 21:54:10,382 DEBUG HandlerThread:588654 [meta.py:_setup_git():207] setup git done
|
18 |
+
2021-07-14 21:54:10,382 DEBUG HandlerThread:588654 [meta.py:_save_pip():57] save pip
|
19 |
+
2021-07-14 21:54:10,382 DEBUG HandlerThread:588654 [meta.py:_save_pip():71] save pip done
|
20 |
+
2021-07-14 21:54:10,382 DEBUG HandlerThread:588654 [meta.py:probe():252] probe done
|
21 |
+
2021-07-14 21:54:10,386 DEBUG SenderThread:588654 [sender.py:send():179] send: files
|
22 |
+
2021-07-14 21:54:10,387 INFO SenderThread:588654 [sender.py:_save_file():841] saving file wandb-metadata.json with policy now
|
23 |
+
2021-07-14 21:54:10,394 DEBUG HandlerThread:588654 [handler.py:handle_request():124] handle_request: stop_status
|
24 |
+
2021-07-14 21:54:10,394 DEBUG SenderThread:588654 [sender.py:send_request():193] send_request: stop_status
|
25 |
+
2021-07-14 21:54:10,527 DEBUG SenderThread:588654 [sender.py:send():179] send: config
|
26 |
+
2021-07-14 21:54:10,527 DEBUG SenderThread:588654 [sender.py:send():179] send: config
|
27 |
+
2021-07-14 21:54:10,527 DEBUG SenderThread:588654 [sender.py:send():179] send: config
|
28 |
+
2021-07-14 21:54:11,237 INFO Thread-8 :588654 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210714_215408-3kpvz8se/files/requirements.txt
|
29 |
+
2021-07-14 21:54:11,237 INFO Thread-8 :588654 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210714_215408-3kpvz8se/files/wandb-metadata.json
|
30 |
+
2021-07-14 21:54:11,237 INFO Thread-8 :588654 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210714_215408-3kpvz8se/files/output.log
|
31 |
+
2021-07-14 21:54:11,254 INFO Thread-11 :588654 [upload_job.py:push():137] Uploaded file /tmp/tmp43phob9nwandb/1nzximp3-wandb-metadata.json
|
32 |
+
2021-07-14 21:54:25,243 INFO Thread-8 :588654 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_215408-3kpvz8se/files/output.log
|
33 |
+
2021-07-14 21:54:25,572 DEBUG HandlerThread:588654 [handler.py:handle_request():124] handle_request: stop_status
|
34 |
+
2021-07-14 21:54:25,573 DEBUG SenderThread:588654 [sender.py:send_request():193] send_request: stop_status
|
35 |
+
2021-07-14 21:54:28,245 INFO Thread-8 :588654 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_215408-3kpvz8se/files/output.log
|
36 |
+
2021-07-14 21:54:29,079 DEBUG HandlerThread:588654 [handler.py:handle_request():124] handle_request: poll_exit
|
37 |
+
2021-07-14 21:54:29,079 DEBUG SenderThread:588654 [sender.py:send():179] send: telemetry
|
38 |
+
2021-07-14 21:54:29,079 DEBUG SenderThread:588654 [sender.py:send():179] send: exit
|
39 |
+
2021-07-14 21:54:29,079 INFO SenderThread:588654 [sender.py:send_exit():287] handling exit code: 1
|
40 |
+
2021-07-14 21:54:29,080 INFO SenderThread:588654 [sender.py:send_exit():295] send defer
|
41 |
+
2021-07-14 21:54:29,080 DEBUG SenderThread:588654 [sender.py:send_request():193] send_request: poll_exit
|
42 |
+
2021-07-14 21:54:29,081 DEBUG HandlerThread:588654 [handler.py:handle_request():124] handle_request: defer
|
43 |
+
2021-07-14 21:54:29,081 INFO HandlerThread:588654 [handler.py:handle_request_defer():141] handle defer: 0
|
44 |
+
2021-07-14 21:54:29,081 DEBUG SenderThread:588654 [sender.py:send_request():193] send_request: defer
|
45 |
+
2021-07-14 21:54:29,081 INFO SenderThread:588654 [sender.py:send_request_defer():304] handle sender defer: 0
|
46 |
+
2021-07-14 21:54:29,081 INFO SenderThread:588654 [sender.py:transition_state():308] send defer: 1
|
47 |
+
2021-07-14 21:54:29,081 DEBUG HandlerThread:588654 [handler.py:handle_request():124] handle_request: defer
|
48 |
+
2021-07-14 21:54:29,081 INFO HandlerThread:588654 [handler.py:handle_request_defer():141] handle defer: 1
|
49 |
+
2021-07-14 21:54:29,111 DEBUG SenderThread:588654 [sender.py:send_request():193] send_request: defer
|
50 |
+
2021-07-14 21:54:29,111 INFO SenderThread:588654 [sender.py:send_request_defer():304] handle sender defer: 1
|
51 |
+
2021-07-14 21:54:29,111 INFO SenderThread:588654 [sender.py:transition_state():308] send defer: 2
|
52 |
+
2021-07-14 21:54:29,111 DEBUG SenderThread:588654 [sender.py:send():179] send: stats
|
53 |
+
2021-07-14 21:54:29,112 DEBUG HandlerThread:588654 [handler.py:handle_request():124] handle_request: defer
|
54 |
+
2021-07-14 21:54:29,112 INFO HandlerThread:588654 [handler.py:handle_request_defer():141] handle defer: 2
|
55 |
+
2021-07-14 21:54:29,112 DEBUG SenderThread:588654 [sender.py:send_request():193] send_request: defer
|
56 |
+
2021-07-14 21:54:29,112 INFO SenderThread:588654 [sender.py:send_request_defer():304] handle sender defer: 2
|
57 |
+
2021-07-14 21:54:29,112 INFO SenderThread:588654 [sender.py:transition_state():308] send defer: 3
|
58 |
+
2021-07-14 21:54:29,112 DEBUG HandlerThread:588654 [handler.py:handle_request():124] handle_request: defer
|
59 |
+
2021-07-14 21:54:29,112 INFO HandlerThread:588654 [handler.py:handle_request_defer():141] handle defer: 3
|
60 |
+
2021-07-14 21:54:29,113 DEBUG SenderThread:588654 [sender.py:send():179] send: summary
|
61 |
+
2021-07-14 21:54:29,113 INFO SenderThread:588654 [sender.py:_save_file():841] saving file wandb-summary.json with policy end
|
62 |
+
2021-07-14 21:54:29,113 DEBUG SenderThread:588654 [sender.py:send_request():193] send_request: defer
|
63 |
+
2021-07-14 21:54:29,113 INFO SenderThread:588654 [sender.py:send_request_defer():304] handle sender defer: 3
|
64 |
+
2021-07-14 21:54:29,113 INFO SenderThread:588654 [sender.py:transition_state():308] send defer: 4
|
65 |
+
2021-07-14 21:54:29,113 DEBUG HandlerThread:588654 [handler.py:handle_request():124] handle_request: defer
|
66 |
+
2021-07-14 21:54:29,113 INFO HandlerThread:588654 [handler.py:handle_request_defer():141] handle defer: 4
|
67 |
+
2021-07-14 21:54:29,114 DEBUG SenderThread:588654 [sender.py:send_request():193] send_request: defer
|
68 |
+
2021-07-14 21:54:29,114 INFO SenderThread:588654 [sender.py:send_request_defer():304] handle sender defer: 4
|
69 |
+
2021-07-14 21:54:29,182 DEBUG HandlerThread:588654 [handler.py:handle_request():124] handle_request: poll_exit
|
70 |
+
2021-07-14 21:54:29,245 INFO Thread-8 :588654 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_215408-3kpvz8se/files/wandb-summary.json
|
71 |
+
2021-07-14 21:54:29,245 INFO Thread-8 :588654 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_215408-3kpvz8se/files/output.log
|
72 |
+
2021-07-14 21:54:29,299 INFO SenderThread:588654 [sender.py:transition_state():308] send defer: 5
|
73 |
+
2021-07-14 21:54:29,299 DEBUG SenderThread:588654 [sender.py:send_request():193] send_request: poll_exit
|
74 |
+
2021-07-14 21:54:29,300 DEBUG HandlerThread:588654 [handler.py:handle_request():124] handle_request: defer
|
75 |
+
2021-07-14 21:54:29,300 INFO HandlerThread:588654 [handler.py:handle_request_defer():141] handle defer: 5
|
76 |
+
2021-07-14 21:54:29,300 DEBUG SenderThread:588654 [sender.py:send_request():193] send_request: defer
|
77 |
+
2021-07-14 21:54:29,300 INFO SenderThread:588654 [sender.py:send_request_defer():304] handle sender defer: 5
|
78 |
+
2021-07-14 21:54:29,300 INFO SenderThread:588654 [dir_watcher.py:finish():282] shutting down directory watcher
|
79 |
+
2021-07-14 21:54:29,401 DEBUG HandlerThread:588654 [handler.py:handle_request():124] handle_request: poll_exit
|
80 |
+
2021-07-14 21:54:30,246 INFO Thread-8 :588654 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_215408-3kpvz8se/files/config.yaml
|
81 |
+
2021-07-14 21:54:30,246 INFO SenderThread:588654 [dir_watcher.py:finish():312] scan: /home/dat/pino-roberta-base/wandb/run-20210714_215408-3kpvz8se/files
|
82 |
+
2021-07-14 21:54:30,247 INFO SenderThread:588654 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210714_215408-3kpvz8se/files/requirements.txt requirements.txt
|
83 |
+
2021-07-14 21:54:30,247 INFO SenderThread:588654 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210714_215408-3kpvz8se/files/output.log output.log
|
84 |
+
2021-07-14 21:54:30,247 INFO SenderThread:588654 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210714_215408-3kpvz8se/files/wandb-metadata.json wandb-metadata.json
|
85 |
+
2021-07-14 21:54:30,247 INFO SenderThread:588654 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210714_215408-3kpvz8se/files/config.yaml config.yaml
|
86 |
+
2021-07-14 21:54:30,247 INFO SenderThread:588654 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210714_215408-3kpvz8se/files/wandb-summary.json wandb-summary.json
|
87 |
+
2021-07-14 21:54:30,248 INFO SenderThread:588654 [sender.py:transition_state():308] send defer: 6
|
88 |
+
2021-07-14 21:54:30,248 DEBUG SenderThread:588654 [sender.py:send_request():193] send_request: poll_exit
|
89 |
+
2021-07-14 21:54:30,252 DEBUG HandlerThread:588654 [handler.py:handle_request():124] handle_request: defer
|
90 |
+
2021-07-14 21:54:30,252 INFO HandlerThread:588654 [handler.py:handle_request_defer():141] handle defer: 6
|
91 |
+
2021-07-14 21:54:30,252 DEBUG SenderThread:588654 [sender.py:send_request():193] send_request: defer
|
92 |
+
2021-07-14 21:54:30,252 INFO SenderThread:588654 [sender.py:send_request_defer():304] handle sender defer: 6
|
93 |
+
2021-07-14 21:54:30,252 INFO SenderThread:588654 [file_pusher.py:finish():177] shutting down file pusher
|
94 |
+
2021-07-14 21:54:30,350 DEBUG HandlerThread:588654 [handler.py:handle_request():124] handle_request: poll_exit
|
95 |
+
2021-07-14 21:54:30,350 DEBUG SenderThread:588654 [sender.py:send_request():193] send_request: poll_exit
|
96 |
+
2021-07-14 21:54:30,452 DEBUG HandlerThread:588654 [handler.py:handle_request():124] handle_request: poll_exit
|
97 |
+
2021-07-14 21:54:30,452 DEBUG SenderThread:588654 [sender.py:send_request():193] send_request: poll_exit
|
98 |
+
2021-07-14 21:54:30,553 DEBUG HandlerThread:588654 [handler.py:handle_request():124] handle_request: poll_exit
|
99 |
+
2021-07-14 21:54:30,554 DEBUG SenderThread:588654 [sender.py:send_request():193] send_request: poll_exit
|
100 |
+
2021-07-14 21:54:30,655 DEBUG HandlerThread:588654 [handler.py:handle_request():124] handle_request: poll_exit
|
101 |
+
2021-07-14 21:54:30,655 DEBUG SenderThread:588654 [sender.py:send_request():193] send_request: poll_exit
|
102 |
+
2021-07-14 21:54:30,699 INFO Thread-13 :588654 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210714_215408-3kpvz8se/files/output.log
|
103 |
+
2021-07-14 21:54:30,707 INFO Thread-12 :588654 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210714_215408-3kpvz8se/files/requirements.txt
|
104 |
+
2021-07-14 21:54:30,708 INFO Thread-15 :588654 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210714_215408-3kpvz8se/files/wandb-summary.json
|
105 |
+
2021-07-14 21:54:30,757 DEBUG HandlerThread:588654 [handler.py:handle_request():124] handle_request: poll_exit
|
106 |
+
2021-07-14 21:54:30,757 DEBUG SenderThread:588654 [sender.py:send_request():193] send_request: poll_exit
|
107 |
+
2021-07-14 21:54:30,760 INFO Thread-14 :588654 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210714_215408-3kpvz8se/files/config.yaml
|
108 |
+
2021-07-14 21:54:30,858 DEBUG HandlerThread:588654 [handler.py:handle_request():124] handle_request: poll_exit
|
109 |
+
2021-07-14 21:54:30,858 DEBUG SenderThread:588654 [sender.py:send_request():193] send_request: poll_exit
|
110 |
+
2021-07-14 21:54:30,960 DEBUG HandlerThread:588654 [handler.py:handle_request():124] handle_request: poll_exit
|
111 |
+
2021-07-14 21:54:30,960 DEBUG SenderThread:588654 [sender.py:send_request():193] send_request: poll_exit
|
112 |
+
2021-07-14 21:54:30,960 INFO Thread-7 :588654 [sender.py:transition_state():308] send defer: 7
|
113 |
+
2021-07-14 21:54:30,961 DEBUG HandlerThread:588654 [handler.py:handle_request():124] handle_request: defer
|
114 |
+
2021-07-14 21:54:30,961 INFO HandlerThread:588654 [handler.py:handle_request_defer():141] handle defer: 7
|
115 |
+
2021-07-14 21:54:30,961 DEBUG SenderThread:588654 [sender.py:send_request():193] send_request: defer
|
116 |
+
2021-07-14 21:54:30,961 INFO SenderThread:588654 [sender.py:send_request_defer():304] handle sender defer: 7
|
117 |
+
2021-07-14 21:54:31,062 DEBUG HandlerThread:588654 [handler.py:handle_request():124] handle_request: poll_exit
|
118 |
+
2021-07-14 21:54:31,093 INFO SenderThread:588654 [sender.py:transition_state():308] send defer: 8
|
119 |
+
2021-07-14 21:54:31,093 DEBUG SenderThread:588654 [sender.py:send_request():193] send_request: poll_exit
|
120 |
+
2021-07-14 21:54:31,093 DEBUG HandlerThread:588654 [handler.py:handle_request():124] handle_request: defer
|
121 |
+
2021-07-14 21:54:31,093 INFO HandlerThread:588654 [handler.py:handle_request_defer():141] handle defer: 8
|
122 |
+
2021-07-14 21:54:31,093 DEBUG SenderThread:588654 [sender.py:send_request():193] send_request: defer
|
123 |
+
2021-07-14 21:54:31,094 INFO SenderThread:588654 [sender.py:send_request_defer():304] handle sender defer: 8
|
124 |
+
2021-07-14 21:54:31,094 INFO SenderThread:588654 [sender.py:transition_state():308] send defer: 9
|
125 |
+
2021-07-14 21:54:31,094 DEBUG SenderThread:588654 [sender.py:send():179] send: final
|
126 |
+
2021-07-14 21:54:31,094 DEBUG SenderThread:588654 [sender.py:send():179] send: footer
|
127 |
+
2021-07-14 21:54:31,094 DEBUG HandlerThread:588654 [handler.py:handle_request():124] handle_request: defer
|
128 |
+
2021-07-14 21:54:31,095 INFO HandlerThread:588654 [handler.py:handle_request_defer():141] handle defer: 9
|
129 |
+
2021-07-14 21:54:31,095 DEBUG SenderThread:588654 [sender.py:send_request():193] send_request: defer
|
130 |
+
2021-07-14 21:54:31,095 INFO SenderThread:588654 [sender.py:send_request_defer():304] handle sender defer: 9
|
131 |
+
2021-07-14 21:54:31,195 DEBUG HandlerThread:588654 [handler.py:handle_request():124] handle_request: poll_exit
|
132 |
+
2021-07-14 21:54:31,195 DEBUG SenderThread:588654 [sender.py:send_request():193] send_request: poll_exit
|
133 |
+
2021-07-14 21:54:31,195 INFO SenderThread:588654 [file_pusher.py:join():182] waiting for file pusher
|
134 |
+
2021-07-14 21:54:31,196 DEBUG HandlerThread:588654 [handler.py:handle_request():124] handle_request: get_summary
|
135 |
+
2021-07-14 21:54:31,197 DEBUG HandlerThread:588654 [handler.py:handle_request():124] handle_request: sampled_history
|
136 |
+
2021-07-14 21:54:31,197 DEBUG HandlerThread:588654 [handler.py:handle_request():124] handle_request: shutdown
|
137 |
+
2021-07-14 21:54:31,197 INFO HandlerThread:588654 [handler.py:finish():638] shutting down handler
|
138 |
+
2021-07-14 21:54:32,095 INFO WriterThread:588654 [datastore.py:close():288] close: /home/dat/pino-roberta-base/wandb/run-20210714_215408-3kpvz8se/run-3kpvz8se.wandb
|
139 |
+
2021-07-14 21:54:32,195 INFO SenderThread:588654 [sender.py:finish():945] shutting down sender
|
140 |
+
2021-07-14 21:54:32,196 INFO SenderThread:588654 [file_pusher.py:finish():177] shutting down file pusher
|
141 |
+
2021-07-14 21:54:32,196 INFO SenderThread:588654 [file_pusher.py:join():182] waiting for file pusher
|
142 |
+
2021-07-14 21:54:32,198 INFO MainThread:588654 [internal.py:handle_exit():78] Internal process exited
|
wandb/run-20210714_215408-3kpvz8se/logs/debug.log
ADDED
@@ -0,0 +1,127 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
2021-07-14 21:54:08,360 INFO MainThread:587396 [wandb_setup.py:_flush():69] setting env: {}
|
2 |
+
2021-07-14 21:54:08,361 INFO MainThread:587396 [wandb_setup.py:_flush():69] setting login settings: {}
|
3 |
+
2021-07-14 21:54:08,361 INFO MainThread:587396 [wandb_init.py:_log_setup():337] Logging user logs to /home/dat/pino-roberta-base/wandb/run-20210714_215408-3kpvz8se/logs/debug.log
|
4 |
+
2021-07-14 21:54:08,361 INFO MainThread:587396 [wandb_init.py:_log_setup():338] Logging internal logs to /home/dat/pino-roberta-base/wandb/run-20210714_215408-3kpvz8se/logs/debug-internal.log
|
5 |
+
2021-07-14 21:54:08,361 INFO MainThread:587396 [wandb_init.py:init():370] calling init triggers
|
6 |
+
2021-07-14 21:54:08,361 INFO MainThread:587396 [wandb_init.py:init():375] wandb.init called with sweep_config: {}
|
7 |
+
config: {}
|
8 |
+
2021-07-14 21:54:08,361 INFO MainThread:587396 [wandb_init.py:init():419] starting backend
|
9 |
+
2021-07-14 21:54:08,361 INFO MainThread:587396 [backend.py:_multiprocessing_setup():70] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
|
10 |
+
2021-07-14 21:54:08,399 INFO MainThread:587396 [backend.py:ensure_launched():135] starting backend process...
|
11 |
+
2021-07-14 21:54:08,437 INFO MainThread:587396 [backend.py:ensure_launched():139] started backend process with pid: 588654
|
12 |
+
2021-07-14 21:54:08,438 INFO MainThread:587396 [wandb_init.py:init():424] backend started and connected
|
13 |
+
2021-07-14 21:54:08,441 INFO MainThread:587396 [wandb_init.py:init():472] updated telemetry
|
14 |
+
2021-07-14 21:54:08,442 INFO MainThread:587396 [wandb_init.py:init():491] communicating current version
|
15 |
+
2021-07-14 21:54:09,049 INFO MainThread:587396 [wandb_init.py:init():496] got version response
|
16 |
+
2021-07-14 21:54:09,049 INFO MainThread:587396 [wandb_init.py:init():504] communicating run to backend with 30 second timeout
|
17 |
+
2021-07-14 21:54:09,234 INFO MainThread:587396 [wandb_init.py:init():529] starting run threads in backend
|
18 |
+
2021-07-14 21:54:10,390 INFO MainThread:587396 [wandb_run.py:_console_start():1623] atexit reg
|
19 |
+
2021-07-14 21:54:10,390 INFO MainThread:587396 [wandb_run.py:_redirect():1497] redirect: SettingsConsole.REDIRECT
|
20 |
+
2021-07-14 21:54:10,391 INFO MainThread:587396 [wandb_run.py:_redirect():1502] Redirecting console.
|
21 |
+
2021-07-14 21:54:10,393 INFO MainThread:587396 [wandb_run.py:_redirect():1558] Redirects installed.
|
22 |
+
2021-07-14 21:54:10,393 INFO MainThread:587396 [wandb_init.py:init():554] run started, returning control to user process
|
23 |
+
2021-07-14 21:54:10,399 INFO MainThread:587396 [wandb_run.py:_config_callback():872] config_cb None None {'output_dir': './', 'overwrite_output_dir': True, 'do_train': False, 'do_eval': False, 'do_predict': False, 'evaluation_strategy': 'IntervalStrategy.NO', 'prediction_loss_only': False, 'per_device_train_batch_size': 2, 'per_device_eval_batch_size': 2, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 2, 'eval_accumulation_steps': None, 'learning_rate': 5e-05, 'weight_decay': 0.0095, 'adam_beta1': 0.9, 'adam_beta2': 0.98, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 5.0, 'max_steps': -1, 'lr_scheduler_type': 'SchedulerType.LINEAR', 'warmup_ratio': 0.0, 'warmup_steps': 5000, 'log_level': -1, 'log_level_replica': -1, 'log_on_each_node': True, 'logging_dir': './runs/Jul14_21-54-01_t1v-n-f5c06ea1-w-0', 'logging_strategy': 'IntervalStrategy.STEPS', 'logging_first_step': False, 'logging_steps': 250, 'save_strategy': 'IntervalStrategy.STEPS', 'save_steps': 20000, 'save_total_limit': 5, 'save_on_each_node': False, 'no_cuda': False, 'seed': 42, 'fp16': False, 'fp16_opt_level': 'O1', 'fp16_backend': 'auto', 'fp16_full_eval': False, 'local_rank': -1, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 500, 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': './', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'sharded_ddp': [], 'deepspeed': None, 'label_smoothing_factor': 0.0, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'dataloader_pin_memory': True, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': './', 'push_to_hub_model_id': '', 'push_to_hub_organization': None, 'push_to_hub_token': None, 'mp_parameters': ''}
|
24 |
+
2021-07-14 21:54:10,401 INFO MainThread:587396 [wandb_run.py:_config_callback():872] config_cb None None {'model_name_or_path': None, 'model_type': 'big_bird', 'config_name': './', 'tokenizer_name': './', 'cache_dir': None, 'use_fast_tokenizer': True, 'dtype': 'float32', 'save_optimizer': True}
|
25 |
+
2021-07-14 21:54:10,402 INFO MainThread:587396 [wandb_run.py:_config_callback():872] config_cb None None {'dataset_name': None, 'dataset_config_name': None, 'train_ref_file': None, 'validation_ref_file': None, 'overwrite_cache': False, 'validation_split_percentage': 5, 'max_seq_length': 4096, 'preprocessing_num_workers': 96, 'mlm_probability': 0.15, 'pad_to_max_length': False, 'line_by_line': False, 'max_eval_samples': 500}
|
26 |
+
2021-07-14 21:54:26,346 INFO MainThread:587396 [wandb_run.py:_atexit_cleanup():1593] got exitcode: 1
|
27 |
+
2021-07-14 21:54:26,347 INFO MainThread:587396 [wandb_run.py:_restore():1565] restore
|
28 |
+
2021-07-14 21:54:29,081 INFO MainThread:587396 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
|
29 |
+
wandb_count: 1
|
30 |
+
}
|
31 |
+
pusher_stats {
|
32 |
+
uploaded_bytes: 1483
|
33 |
+
total_bytes: 1483
|
34 |
+
}
|
35 |
+
|
36 |
+
2021-07-14 21:54:29,300 INFO MainThread:587396 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
|
37 |
+
wandb_count: 1
|
38 |
+
}
|
39 |
+
pusher_stats {
|
40 |
+
uploaded_bytes: 1483
|
41 |
+
total_bytes: 1483
|
42 |
+
}
|
43 |
+
|
44 |
+
2021-07-14 21:54:30,248 INFO MainThread:587396 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
|
45 |
+
wandb_count: 2
|
46 |
+
}
|
47 |
+
pusher_stats {
|
48 |
+
uploaded_bytes: 1483
|
49 |
+
total_bytes: 3133
|
50 |
+
}
|
51 |
+
|
52 |
+
2021-07-14 21:54:30,351 INFO MainThread:587396 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
|
53 |
+
wandb_count: 5
|
54 |
+
}
|
55 |
+
pusher_stats {
|
56 |
+
uploaded_bytes: 1483
|
57 |
+
total_bytes: 9257
|
58 |
+
}
|
59 |
+
|
60 |
+
2021-07-14 21:54:30,452 INFO MainThread:587396 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
|
61 |
+
wandb_count: 5
|
62 |
+
}
|
63 |
+
pusher_stats {
|
64 |
+
uploaded_bytes: 9257
|
65 |
+
total_bytes: 9257
|
66 |
+
}
|
67 |
+
|
68 |
+
2021-07-14 21:54:30,554 INFO MainThread:587396 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
|
69 |
+
wandb_count: 5
|
70 |
+
}
|
71 |
+
pusher_stats {
|
72 |
+
uploaded_bytes: 9257
|
73 |
+
total_bytes: 9257
|
74 |
+
}
|
75 |
+
|
76 |
+
2021-07-14 21:54:30,656 INFO MainThread:587396 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
|
77 |
+
wandb_count: 5
|
78 |
+
}
|
79 |
+
pusher_stats {
|
80 |
+
uploaded_bytes: 9257
|
81 |
+
total_bytes: 9257
|
82 |
+
}
|
83 |
+
|
84 |
+
2021-07-14 21:54:30,757 INFO MainThread:587396 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
|
85 |
+
wandb_count: 5
|
86 |
+
}
|
87 |
+
pusher_stats {
|
88 |
+
uploaded_bytes: 9257
|
89 |
+
total_bytes: 9257
|
90 |
+
}
|
91 |
+
|
92 |
+
2021-07-14 21:54:30,859 INFO MainThread:587396 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
|
93 |
+
wandb_count: 5
|
94 |
+
}
|
95 |
+
pusher_stats {
|
96 |
+
uploaded_bytes: 9257
|
97 |
+
total_bytes: 9257
|
98 |
+
}
|
99 |
+
|
100 |
+
2021-07-14 21:54:30,961 INFO MainThread:587396 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
|
101 |
+
wandb_count: 5
|
102 |
+
}
|
103 |
+
pusher_stats {
|
104 |
+
uploaded_bytes: 9257
|
105 |
+
total_bytes: 9257
|
106 |
+
}
|
107 |
+
|
108 |
+
2021-07-14 21:54:31,093 INFO MainThread:587396 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
|
109 |
+
wandb_count: 5
|
110 |
+
}
|
111 |
+
pusher_stats {
|
112 |
+
uploaded_bytes: 9257
|
113 |
+
total_bytes: 9257
|
114 |
+
}
|
115 |
+
|
116 |
+
2021-07-14 21:54:31,196 INFO MainThread:587396 [wandb_run.py:_wait_for_finish():1715] got exit ret: done: true
|
117 |
+
exit_result {
|
118 |
+
}
|
119 |
+
file_counts {
|
120 |
+
wandb_count: 5
|
121 |
+
}
|
122 |
+
pusher_stats {
|
123 |
+
uploaded_bytes: 9257
|
124 |
+
total_bytes: 9257
|
125 |
+
}
|
126 |
+
|
127 |
+
2021-07-14 21:54:32,452 INFO MainThread:587396 [wandb_run.py:_show_files():1937] logging synced files
|
wandb/run-20210714_215408-3kpvz8se/run-3kpvz8se.wandb
ADDED
Binary file (4.77 kB). View file
|
|
wandb/run-20210714_221920-s091gfok/files/config.yaml
ADDED
@@ -0,0 +1,307 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
wandb_version: 1
|
2 |
+
|
3 |
+
_wandb:
|
4 |
+
desc: null
|
5 |
+
value:
|
6 |
+
cli_version: 0.10.33
|
7 |
+
framework: huggingface
|
8 |
+
huggingface_version: 4.9.0.dev0
|
9 |
+
is_jupyter_run: false
|
10 |
+
is_kaggle_kernel: false
|
11 |
+
python_version: 3.8.10
|
12 |
+
t:
|
13 |
+
1:
|
14 |
+
- 3
|
15 |
+
- 11
|
16 |
+
2:
|
17 |
+
- 3
|
18 |
+
- 11
|
19 |
+
4: 3.8.10
|
20 |
+
5: 0.10.33
|
21 |
+
6: 4.9.0.dev0
|
22 |
+
8:
|
23 |
+
- 5
|
24 |
+
adafactor:
|
25 |
+
desc: null
|
26 |
+
value: false
|
27 |
+
adam_beta1:
|
28 |
+
desc: null
|
29 |
+
value: 0.9
|
30 |
+
adam_beta2:
|
31 |
+
desc: null
|
32 |
+
value: 0.98
|
33 |
+
adam_epsilon:
|
34 |
+
desc: null
|
35 |
+
value: 1.0e-08
|
36 |
+
cache_dir:
|
37 |
+
desc: null
|
38 |
+
value: null
|
39 |
+
config_name:
|
40 |
+
desc: null
|
41 |
+
value: ./
|
42 |
+
dataloader_drop_last:
|
43 |
+
desc: null
|
44 |
+
value: false
|
45 |
+
dataloader_num_workers:
|
46 |
+
desc: null
|
47 |
+
value: 0
|
48 |
+
dataloader_pin_memory:
|
49 |
+
desc: null
|
50 |
+
value: true
|
51 |
+
dataset_config_name:
|
52 |
+
desc: null
|
53 |
+
value: null
|
54 |
+
dataset_name:
|
55 |
+
desc: null
|
56 |
+
value: null
|
57 |
+
ddp_find_unused_parameters:
|
58 |
+
desc: null
|
59 |
+
value: null
|
60 |
+
debug:
|
61 |
+
desc: null
|
62 |
+
value: []
|
63 |
+
deepspeed:
|
64 |
+
desc: null
|
65 |
+
value: null
|
66 |
+
disable_tqdm:
|
67 |
+
desc: null
|
68 |
+
value: false
|
69 |
+
do_eval:
|
70 |
+
desc: null
|
71 |
+
value: false
|
72 |
+
do_predict:
|
73 |
+
desc: null
|
74 |
+
value: false
|
75 |
+
do_train:
|
76 |
+
desc: null
|
77 |
+
value: false
|
78 |
+
dtype:
|
79 |
+
desc: null
|
80 |
+
value: float32
|
81 |
+
eval_accumulation_steps:
|
82 |
+
desc: null
|
83 |
+
value: null
|
84 |
+
eval_steps:
|
85 |
+
desc: null
|
86 |
+
value: 20000
|
87 |
+
evaluation_strategy:
|
88 |
+
desc: null
|
89 |
+
value: IntervalStrategy.NO
|
90 |
+
fp16:
|
91 |
+
desc: null
|
92 |
+
value: false
|
93 |
+
fp16_backend:
|
94 |
+
desc: null
|
95 |
+
value: auto
|
96 |
+
fp16_full_eval:
|
97 |
+
desc: null
|
98 |
+
value: false
|
99 |
+
fp16_opt_level:
|
100 |
+
desc: null
|
101 |
+
value: O1
|
102 |
+
gradient_accumulation_steps:
|
103 |
+
desc: null
|
104 |
+
value: 2
|
105 |
+
greater_is_better:
|
106 |
+
desc: null
|
107 |
+
value: null
|
108 |
+
group_by_length:
|
109 |
+
desc: null
|
110 |
+
value: false
|
111 |
+
ignore_data_skip:
|
112 |
+
desc: null
|
113 |
+
value: false
|
114 |
+
label_names:
|
115 |
+
desc: null
|
116 |
+
value: null
|
117 |
+
label_smoothing_factor:
|
118 |
+
desc: null
|
119 |
+
value: 0.0
|
120 |
+
learning_rate:
|
121 |
+
desc: null
|
122 |
+
value: 5.0e-05
|
123 |
+
length_column_name:
|
124 |
+
desc: null
|
125 |
+
value: length
|
126 |
+
line_by_line:
|
127 |
+
desc: null
|
128 |
+
value: false
|
129 |
+
load_best_model_at_end:
|
130 |
+
desc: null
|
131 |
+
value: false
|
132 |
+
local_rank:
|
133 |
+
desc: null
|
134 |
+
value: -1
|
135 |
+
log_level:
|
136 |
+
desc: null
|
137 |
+
value: -1
|
138 |
+
log_level_replica:
|
139 |
+
desc: null
|
140 |
+
value: -1
|
141 |
+
log_on_each_node:
|
142 |
+
desc: null
|
143 |
+
value: true
|
144 |
+
logging_dir:
|
145 |
+
desc: null
|
146 |
+
value: ./runs/Jul14_22-19-13_t1v-n-f5c06ea1-w-0
|
147 |
+
logging_first_step:
|
148 |
+
desc: null
|
149 |
+
value: false
|
150 |
+
logging_steps:
|
151 |
+
desc: null
|
152 |
+
value: 500
|
153 |
+
logging_strategy:
|
154 |
+
desc: null
|
155 |
+
value: IntervalStrategy.STEPS
|
156 |
+
lr_scheduler_type:
|
157 |
+
desc: null
|
158 |
+
value: SchedulerType.LINEAR
|
159 |
+
max_eval_samples:
|
160 |
+
desc: null
|
161 |
+
value: 2000
|
162 |
+
max_grad_norm:
|
163 |
+
desc: null
|
164 |
+
value: 1.0
|
165 |
+
max_seq_length:
|
166 |
+
desc: null
|
167 |
+
value: 4096
|
168 |
+
max_steps:
|
169 |
+
desc: null
|
170 |
+
value: -1
|
171 |
+
metric_for_best_model:
|
172 |
+
desc: null
|
173 |
+
value: null
|
174 |
+
mlm_probability:
|
175 |
+
desc: null
|
176 |
+
value: 0.15
|
177 |
+
model_name_or_path:
|
178 |
+
desc: null
|
179 |
+
value: null
|
180 |
+
model_type:
|
181 |
+
desc: null
|
182 |
+
value: big_bird
|
183 |
+
mp_parameters:
|
184 |
+
desc: null
|
185 |
+
value: ''
|
186 |
+
no_cuda:
|
187 |
+
desc: null
|
188 |
+
value: false
|
189 |
+
num_train_epochs:
|
190 |
+
desc: null
|
191 |
+
value: 5.0
|
192 |
+
output_dir:
|
193 |
+
desc: null
|
194 |
+
value: ./
|
195 |
+
overwrite_cache:
|
196 |
+
desc: null
|
197 |
+
value: false
|
198 |
+
overwrite_output_dir:
|
199 |
+
desc: null
|
200 |
+
value: true
|
201 |
+
pad_to_max_length:
|
202 |
+
desc: null
|
203 |
+
value: false
|
204 |
+
past_index:
|
205 |
+
desc: null
|
206 |
+
value: -1
|
207 |
+
per_device_eval_batch_size:
|
208 |
+
desc: null
|
209 |
+
value: 2
|
210 |
+
per_device_train_batch_size:
|
211 |
+
desc: null
|
212 |
+
value: 2
|
213 |
+
per_gpu_eval_batch_size:
|
214 |
+
desc: null
|
215 |
+
value: null
|
216 |
+
per_gpu_train_batch_size:
|
217 |
+
desc: null
|
218 |
+
value: null
|
219 |
+
prediction_loss_only:
|
220 |
+
desc: null
|
221 |
+
value: false
|
222 |
+
preprocessing_num_workers:
|
223 |
+
desc: null
|
224 |
+
value: 96
|
225 |
+
push_to_hub:
|
226 |
+
desc: null
|
227 |
+
value: true
|
228 |
+
push_to_hub_model_id:
|
229 |
+
desc: null
|
230 |
+
value: ''
|
231 |
+
push_to_hub_organization:
|
232 |
+
desc: null
|
233 |
+
value: null
|
234 |
+
push_to_hub_token:
|
235 |
+
desc: null
|
236 |
+
value: null
|
237 |
+
remove_unused_columns:
|
238 |
+
desc: null
|
239 |
+
value: true
|
240 |
+
report_to:
|
241 |
+
desc: null
|
242 |
+
value:
|
243 |
+
- tensorboard
|
244 |
+
- wandb
|
245 |
+
resume_from_checkpoint:
|
246 |
+
desc: null
|
247 |
+
value: null
|
248 |
+
run_name:
|
249 |
+
desc: null
|
250 |
+
value: ./
|
251 |
+
save_on_each_node:
|
252 |
+
desc: null
|
253 |
+
value: false
|
254 |
+
save_optimizer:
|
255 |
+
desc: null
|
256 |
+
value: true
|
257 |
+
save_steps:
|
258 |
+
desc: null
|
259 |
+
value: 20000
|
260 |
+
save_strategy:
|
261 |
+
desc: null
|
262 |
+
value: IntervalStrategy.STEPS
|
263 |
+
save_total_limit:
|
264 |
+
desc: null
|
265 |
+
value: 5
|
266 |
+
seed:
|
267 |
+
desc: null
|
268 |
+
value: 42
|
269 |
+
sharded_ddp:
|
270 |
+
desc: null
|
271 |
+
value: []
|
272 |
+
skip_memory_metrics:
|
273 |
+
desc: null
|
274 |
+
value: true
|
275 |
+
tokenizer_name:
|
276 |
+
desc: null
|
277 |
+
value: ./
|
278 |
+
tpu_metrics_debug:
|
279 |
+
desc: null
|
280 |
+
value: false
|
281 |
+
tpu_num_cores:
|
282 |
+
desc: null
|
283 |
+
value: null
|
284 |
+
train_ref_file:
|
285 |
+
desc: null
|
286 |
+
value: null
|
287 |
+
use_fast_tokenizer:
|
288 |
+
desc: null
|
289 |
+
value: true
|
290 |
+
use_legacy_prediction_loop:
|
291 |
+
desc: null
|
292 |
+
value: false
|
293 |
+
validation_ref_file:
|
294 |
+
desc: null
|
295 |
+
value: null
|
296 |
+
validation_split_percentage:
|
297 |
+
desc: null
|
298 |
+
value: 5
|
299 |
+
warmup_ratio:
|
300 |
+
desc: null
|
301 |
+
value: 0.0
|
302 |
+
warmup_steps:
|
303 |
+
desc: null
|
304 |
+
value: 5000
|
305 |
+
weight_decay:
|
306 |
+
desc: null
|
307 |
+
value: 0.0095
|
wandb/run-20210714_221920-s091gfok/files/output.log
ADDED
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
/home/dat/pino/lib/python3.8/site-packages/jax/_src/numpy/lax_numpy.py:3132: UserWarning: Explicitly requested dtype <class 'jax._src.numpy.lax_numpy.int64'> requested in zeros is not available, and will be truncated to dtype int32. To enable more dtypes, set the jax_enable_x64 configuration option or the JAX_ENABLE_X64 shell environment variable. See https://github.com/google/jax#current-gotchas for more.
|
2 |
+
lax._check_user_dtype_supported(dtype, "zeros")
|
3 |
+
/home/dat/pino/lib/python3.8/site-packages/jax/lib/xla_bridge.py:386: UserWarning: jax.host_count has been renamed to jax.process_count. This alias will eventually be removed; please update your code.
|
4 |
+
warnings.warn(
|
5 |
+
/home/dat/pino/lib/python3.8/site-packages/jax/lib/xla_bridge.py:373: UserWarning: jax.host_id has been renamed to jax.process_index. This alias will eventually be removed; please update your code.
|
6 |
+
warnings.warn(
|
7 |
+
Epoch ... (1/5): 0%| | 0/5 [00:00<?, ?it/s][22:19:36] - INFO - __main__ - Skipping to epoch 0 step 0
|
8 |
+
Training...: 0%| | 0/503952 [01:21<?, ?it/s]
|
9 |
+
Epoch ... (1/5): 0%| | 0/5 [09:12<?, ?it/s]
|
10 |
+
Traceback (most recent call last):
|
11 |
+
File "./run_mlm_flax.py", line 804, in <module>
|
12 |
+
state, train_metric, dropout_rngs = p_train_step(state, model_inputs, dropout_rngs)
|
13 |
+
File "/home/dat/pino/lib/python3.8/site-packages/jax/_src/traceback_util.py", line 183, in reraise_with_filtered_traceback
|
14 |
+
return fun(*args, **kwargs)
|
15 |
+
File "/home/dat/pino/lib/python3.8/site-packages/jax/_src/api.py", line 1669, in f_pmapped
|
16 |
+
out = pxla.xla_pmap(
|
17 |
+
File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 1620, in bind
|
18 |
+
return call_bind(self, fun, *args, **params)
|
19 |
+
File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 1551, in call_bind
|
20 |
+
outs = primitive.process(top_trace, fun, tracers, params)
|
21 |
+
File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 1623, in process
|
22 |
+
return trace.process_map(self, fun, tracers, params)
|
23 |
+
File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 606, in process_call
|
24 |
+
return primitive.impl(f, *tracers, **params)
|
25 |
+
File "/home/dat/pino/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 637, in xla_pmap_impl
|
26 |
+
return compiled_fun(*args)
|
27 |
+
File "/home/dat/pino/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 1159, in execute_replicated
|
28 |
+
out_bufs = compiled.execute_sharded_on_local_devices(input_bufs)
|
29 |
+
jax._src.traceback_util.UnfilteredStackTrace: RuntimeError: Resource exhausted: Attempting to reserve 12.60G at the bottom of memory. That was not possible. There are 5.86G free, 0B reserved, and 5.65G reservable.: while running replica 0 and partition 0 of a replicated computation (other replicas may have failed as well).
|
30 |
+
The stack trace below excludes JAX-internal frames.
|
31 |
+
The preceding is the original exception that occurred, unmodified.
|
32 |
+
--------------------
|
33 |
+
The above exception was the direct cause of the following exception:
|
34 |
+
Traceback (most recent call last):
|
35 |
+
File "./run_mlm_flax.py", line 804, in <module>
|
36 |
+
state, train_metric, dropout_rngs = p_train_step(state, model_inputs, dropout_rngs)
|
37 |
+
File "/home/dat/pino/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 1159, in execute_replicated
|
38 |
+
out_bufs = compiled.execute_sharded_on_local_devices(input_bufs)
|
39 |
+
RuntimeError: Resource exhausted: Attempting to reserve 12.60G at the bottom of memory. That was not possible. There are 5.86G free, 0B reserved, and 5.65G reservable.: while running replica 0 and partition 0 of a replicated computation (other replicas may have failed as well).
|
wandb/run-20210714_221920-s091gfok/files/requirements.txt
ADDED
@@ -0,0 +1,94 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
absl-py==0.13.0
|
2 |
+
aiohttp==3.7.4.post0
|
3 |
+
astunparse==1.6.3
|
4 |
+
async-timeout==3.0.1
|
5 |
+
attrs==21.2.0
|
6 |
+
cachetools==4.2.2
|
7 |
+
certifi==2021.5.30
|
8 |
+
chardet==4.0.0
|
9 |
+
charset-normalizer==2.0.1
|
10 |
+
chex==0.0.8
|
11 |
+
click==8.0.1
|
12 |
+
configparser==5.0.2
|
13 |
+
cycler==0.10.0
|
14 |
+
datasets==1.9.1.dev0
|
15 |
+
dill==0.3.4
|
16 |
+
dm-tree==0.1.6
|
17 |
+
docker-pycreds==0.4.0
|
18 |
+
filelock==3.0.12
|
19 |
+
flatbuffers==1.12
|
20 |
+
flax==0.3.4
|
21 |
+
fsspec==2021.7.0
|
22 |
+
gast==0.4.0
|
23 |
+
gitdb==4.0.7
|
24 |
+
gitpython==3.1.18
|
25 |
+
google-auth-oauthlib==0.4.4
|
26 |
+
google-auth==1.32.1
|
27 |
+
google-pasta==0.2.0
|
28 |
+
grpcio==1.34.1
|
29 |
+
h5py==3.1.0
|
30 |
+
huggingface-hub==0.0.12
|
31 |
+
idna==3.2
|
32 |
+
install==1.3.4
|
33 |
+
jax==0.2.17
|
34 |
+
jaxlib==0.1.68
|
35 |
+
joblib==1.0.1
|
36 |
+
keras-nightly==2.5.0.dev2021032900
|
37 |
+
keras-preprocessing==1.1.2
|
38 |
+
kiwisolver==1.3.1
|
39 |
+
libtpu-nightly==0.1.dev20210615
|
40 |
+
markdown==3.3.4
|
41 |
+
matplotlib==3.4.2
|
42 |
+
msgpack==1.0.2
|
43 |
+
multidict==5.1.0
|
44 |
+
multiprocess==0.70.12.2
|
45 |
+
numpy==1.19.5
|
46 |
+
oauthlib==3.1.1
|
47 |
+
opt-einsum==3.3.0
|
48 |
+
optax==0.0.9
|
49 |
+
packaging==21.0
|
50 |
+
pandas==1.3.0
|
51 |
+
pathtools==0.1.2
|
52 |
+
pillow==8.3.1
|
53 |
+
pip==20.0.2
|
54 |
+
pkg-resources==0.0.0
|
55 |
+
promise==2.3
|
56 |
+
protobuf==3.17.3
|
57 |
+
psutil==5.8.0
|
58 |
+
pyarrow==4.0.1
|
59 |
+
pyasn1-modules==0.2.8
|
60 |
+
pyasn1==0.4.8
|
61 |
+
pyparsing==2.4.7
|
62 |
+
python-dateutil==2.8.1
|
63 |
+
pytz==2021.1
|
64 |
+
pyyaml==5.4.1
|
65 |
+
regex==2021.7.6
|
66 |
+
requests-oauthlib==1.3.0
|
67 |
+
requests==2.26.0
|
68 |
+
rsa==4.7.2
|
69 |
+
sacremoses==0.0.45
|
70 |
+
scipy==1.7.0
|
71 |
+
sentry-sdk==1.3.0
|
72 |
+
setuptools==44.0.0
|
73 |
+
shortuuid==1.0.1
|
74 |
+
six==1.15.0
|
75 |
+
smmap==4.0.0
|
76 |
+
subprocess32==3.5.4
|
77 |
+
tensorboard-data-server==0.6.1
|
78 |
+
tensorboard-plugin-wit==1.8.0
|
79 |
+
tensorboard==2.5.0
|
80 |
+
tensorflow-estimator==2.5.0
|
81 |
+
tensorflow==2.5.0
|
82 |
+
termcolor==1.1.0
|
83 |
+
tokenizers==0.10.3
|
84 |
+
toolz==0.11.1
|
85 |
+
tqdm==4.61.2
|
86 |
+
transformers==4.9.0.dev0
|
87 |
+
typing-extensions==3.7.4.3
|
88 |
+
urllib3==1.26.6
|
89 |
+
wandb==0.10.33
|
90 |
+
werkzeug==2.0.1
|
91 |
+
wheel==0.36.2
|
92 |
+
wrapt==1.12.1
|
93 |
+
xxhash==2.0.2
|
94 |
+
yarl==1.6.3
|
wandb/run-20210714_221920-s091gfok/files/wandb-metadata.json
ADDED
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"os": "Linux-5.4.0-1043-gcp-x86_64-with-glibc2.29",
|
3 |
+
"python": "3.8.10",
|
4 |
+
"heartbeatAt": "2021-07-14T22:19:22.632871",
|
5 |
+
"startedAt": "2021-07-14T22:19:20.670815",
|
6 |
+
"docker": null,
|
7 |
+
"cpu_count": 96,
|
8 |
+
"cuda": null,
|
9 |
+
"args": [
|
10 |
+
"--push_to_hub",
|
11 |
+
"--output_dir=./",
|
12 |
+
"--model_type=big_bird",
|
13 |
+
"--config_name=./",
|
14 |
+
"--tokenizer_name=./",
|
15 |
+
"--max_seq_length=4096",
|
16 |
+
"--weight_decay=0.0095",
|
17 |
+
"--warmup_steps=5000",
|
18 |
+
"--overwrite_output_dir",
|
19 |
+
"--adam_beta1=0.9",
|
20 |
+
"--adam_beta2=0.98",
|
21 |
+
"--logging_steps=500",
|
22 |
+
"--eval_steps=20000",
|
23 |
+
"--num_train_epochs=5",
|
24 |
+
"--preprocessing_num_workers=96",
|
25 |
+
"--save_steps=20000",
|
26 |
+
"--learning_rate=5e-5",
|
27 |
+
"--per_device_train_batch_size=2",
|
28 |
+
"--per_device_eval_batch_size=2",
|
29 |
+
"--save_total_limit=5",
|
30 |
+
"--max_eval_samples=2000",
|
31 |
+
"--overwrite_cache",
|
32 |
+
"False",
|
33 |
+
"--gradient_accumulation_steps=2"
|
34 |
+
],
|
35 |
+
"state": "running",
|
36 |
+
"program": "./run_mlm_flax.py",
|
37 |
+
"codePath": "run_mlm_flax.py",
|
38 |
+
"git": {
|
39 |
+
"remote": "https://huggingface.co/flax-community/pino-roberta-base",
|
40 |
+
"commit": "de71755314d4aa3ab182072c6f0be3de9798a66b"
|
41 |
+
},
|
42 |
+
"email": null,
|
43 |
+
"root": "/home/dat/pino-roberta-base",
|
44 |
+
"host": "t1v-n-f5c06ea1-w-0",
|
45 |
+
"username": "dat",
|
46 |
+
"executable": "/home/dat/pino/bin/python"
|
47 |
+
}
|
wandb/run-20210714_221920-s091gfok/files/wandb-summary.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{}
|
wandb/run-20210714_221920-s091gfok/logs/debug-internal.log
ADDED
@@ -0,0 +1,233 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
2021-07-14 22:19:21,314 INFO MainThread:593294 [internal.py:wandb_internal():88] W&B internal server running at pid: 593294, started at: 2021-07-14 22:19:21.314432
|
2 |
+
2021-07-14 22:19:21,317 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: check_version
|
3 |
+
2021-07-14 22:19:21,317 INFO WriterThread:593294 [datastore.py:open_for_write():80] open: /home/dat/pino-roberta-base/wandb/run-20210714_221920-s091gfok/run-s091gfok.wandb
|
4 |
+
2021-07-14 22:19:21,318 DEBUG SenderThread:593294 [sender.py:send():179] send: header
|
5 |
+
2021-07-14 22:19:21,318 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: check_version
|
6 |
+
2021-07-14 22:19:21,357 DEBUG SenderThread:593294 [sender.py:send():179] send: run
|
7 |
+
2021-07-14 22:19:21,536 INFO SenderThread:593294 [dir_watcher.py:__init__():168] watching files in: /home/dat/pino-roberta-base/wandb/run-20210714_221920-s091gfok/files
|
8 |
+
2021-07-14 22:19:21,536 INFO SenderThread:593294 [sender.py:_start_run_threads():716] run started: s091gfok with start time 1626301160
|
9 |
+
2021-07-14 22:19:21,536 DEBUG SenderThread:593294 [sender.py:send():179] send: summary
|
10 |
+
2021-07-14 22:19:21,537 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: run_start
|
11 |
+
2021-07-14 22:19:21,537 INFO SenderThread:593294 [sender.py:_save_file():841] saving file wandb-summary.json with policy end
|
12 |
+
2021-07-14 22:19:22,539 INFO Thread-8 :593294 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210714_221920-s091gfok/files/wandb-summary.json
|
13 |
+
2021-07-14 22:19:22,632 DEBUG HandlerThread:593294 [meta.py:__init__():39] meta init
|
14 |
+
2021-07-14 22:19:22,632 DEBUG HandlerThread:593294 [meta.py:__init__():53] meta init done
|
15 |
+
2021-07-14 22:19:22,632 DEBUG HandlerThread:593294 [meta.py:probe():210] probe
|
16 |
+
2021-07-14 22:19:22,634 DEBUG HandlerThread:593294 [meta.py:_setup_git():200] setup git
|
17 |
+
2021-07-14 22:19:22,663 DEBUG HandlerThread:593294 [meta.py:_setup_git():207] setup git done
|
18 |
+
2021-07-14 22:19:22,663 DEBUG HandlerThread:593294 [meta.py:_save_pip():57] save pip
|
19 |
+
2021-07-14 22:19:22,664 DEBUG HandlerThread:593294 [meta.py:_save_pip():71] save pip done
|
20 |
+
2021-07-14 22:19:22,664 DEBUG HandlerThread:593294 [meta.py:probe():252] probe done
|
21 |
+
2021-07-14 22:19:22,667 DEBUG SenderThread:593294 [sender.py:send():179] send: files
|
22 |
+
2021-07-14 22:19:22,667 INFO SenderThread:593294 [sender.py:_save_file():841] saving file wandb-metadata.json with policy now
|
23 |
+
2021-07-14 22:19:22,674 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: stop_status
|
24 |
+
2021-07-14 22:19:22,674 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: stop_status
|
25 |
+
2021-07-14 22:19:22,802 DEBUG SenderThread:593294 [sender.py:send():179] send: config
|
26 |
+
2021-07-14 22:19:22,803 DEBUG SenderThread:593294 [sender.py:send():179] send: config
|
27 |
+
2021-07-14 22:19:22,803 DEBUG SenderThread:593294 [sender.py:send():179] send: config
|
28 |
+
2021-07-14 22:19:23,119 INFO Thread-11 :593294 [upload_job.py:push():137] Uploaded file /tmp/tmpn0n6xzzmwandb/2vhpic31-wandb-metadata.json
|
29 |
+
2021-07-14 22:19:23,537 INFO Thread-8 :593294 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210714_221920-s091gfok/files/wandb-metadata.json
|
30 |
+
2021-07-14 22:19:23,537 INFO Thread-8 :593294 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210714_221920-s091gfok/files/requirements.txt
|
31 |
+
2021-07-14 22:19:23,538 INFO Thread-8 :593294 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210714_221920-s091gfok/files/output.log
|
32 |
+
2021-07-14 22:19:37,543 INFO Thread-8 :593294 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_221920-s091gfok/files/output.log
|
33 |
+
2021-07-14 22:19:37,804 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: stop_status
|
34 |
+
2021-07-14 22:19:37,804 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: stop_status
|
35 |
+
2021-07-14 22:19:39,545 INFO Thread-8 :593294 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_221920-s091gfok/files/output.log
|
36 |
+
2021-07-14 22:19:50,715 DEBUG SenderThread:593294 [sender.py:send():179] send: stats
|
37 |
+
2021-07-14 22:19:52,550 INFO Thread-8 :593294 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_221920-s091gfok/files/config.yaml
|
38 |
+
2021-07-14 22:19:52,936 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: stop_status
|
39 |
+
2021-07-14 22:19:52,936 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: stop_status
|
40 |
+
2021-07-14 22:20:08,079 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: stop_status
|
41 |
+
2021-07-14 22:20:08,080 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: stop_status
|
42 |
+
2021-07-14 22:20:20,789 DEBUG SenderThread:593294 [sender.py:send():179] send: stats
|
43 |
+
2021-07-14 22:20:23,215 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: stop_status
|
44 |
+
2021-07-14 22:20:23,215 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: stop_status
|
45 |
+
2021-07-14 22:20:38,362 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: stop_status
|
46 |
+
2021-07-14 22:20:38,363 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: stop_status
|
47 |
+
2021-07-14 22:20:50,861 DEBUG SenderThread:593294 [sender.py:send():179] send: stats
|
48 |
+
2021-07-14 22:20:53,496 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: stop_status
|
49 |
+
2021-07-14 22:20:53,496 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: stop_status
|
50 |
+
2021-07-14 22:21:08,625 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: stop_status
|
51 |
+
2021-07-14 22:21:08,625 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: stop_status
|
52 |
+
2021-07-14 22:21:20,932 DEBUG SenderThread:593294 [sender.py:send():179] send: stats
|
53 |
+
2021-07-14 22:21:23,756 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: stop_status
|
54 |
+
2021-07-14 22:21:23,757 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: stop_status
|
55 |
+
2021-07-14 22:21:38,885 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: stop_status
|
56 |
+
2021-07-14 22:21:38,886 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: stop_status
|
57 |
+
2021-07-14 22:21:50,997 DEBUG SenderThread:593294 [sender.py:send():179] send: stats
|
58 |
+
2021-07-14 22:21:54,016 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: stop_status
|
59 |
+
2021-07-14 22:21:54,016 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: stop_status
|
60 |
+
2021-07-14 22:22:09,146 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: stop_status
|
61 |
+
2021-07-14 22:22:09,147 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: stop_status
|
62 |
+
2021-07-14 22:22:21,114 DEBUG SenderThread:593294 [sender.py:send():179] send: stats
|
63 |
+
2021-07-14 22:22:24,279 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: stop_status
|
64 |
+
2021-07-14 22:22:24,279 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: stop_status
|
65 |
+
2021-07-14 22:22:39,412 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: stop_status
|
66 |
+
2021-07-14 22:22:39,413 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: stop_status
|
67 |
+
2021-07-14 22:22:51,192 DEBUG SenderThread:593294 [sender.py:send():179] send: stats
|
68 |
+
2021-07-14 22:22:54,548 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: stop_status
|
69 |
+
2021-07-14 22:22:54,548 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: stop_status
|
70 |
+
2021-07-14 22:23:09,678 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: stop_status
|
71 |
+
2021-07-14 22:23:09,678 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: stop_status
|
72 |
+
2021-07-14 22:23:21,267 DEBUG SenderThread:593294 [sender.py:send():179] send: stats
|
73 |
+
2021-07-14 22:23:24,814 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: stop_status
|
74 |
+
2021-07-14 22:23:24,814 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: stop_status
|
75 |
+
2021-07-14 22:23:39,949 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: stop_status
|
76 |
+
2021-07-14 22:23:39,949 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: stop_status
|
77 |
+
2021-07-14 22:23:51,337 DEBUG SenderThread:593294 [sender.py:send():179] send: stats
|
78 |
+
2021-07-14 22:23:55,081 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: stop_status
|
79 |
+
2021-07-14 22:23:55,082 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: stop_status
|
80 |
+
2021-07-14 22:24:10,212 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: stop_status
|
81 |
+
2021-07-14 22:24:10,212 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: stop_status
|
82 |
+
2021-07-14 22:24:21,405 DEBUG SenderThread:593294 [sender.py:send():179] send: stats
|
83 |
+
2021-07-14 22:24:25,345 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: stop_status
|
84 |
+
2021-07-14 22:24:25,346 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: stop_status
|
85 |
+
2021-07-14 22:24:40,483 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: stop_status
|
86 |
+
2021-07-14 22:24:40,483 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: stop_status
|
87 |
+
2021-07-14 22:24:51,475 DEBUG SenderThread:593294 [sender.py:send():179] send: stats
|
88 |
+
2021-07-14 22:24:55,615 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: stop_status
|
89 |
+
2021-07-14 22:24:55,615 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: stop_status
|
90 |
+
2021-07-14 22:25:10,746 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: stop_status
|
91 |
+
2021-07-14 22:25:10,746 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: stop_status
|
92 |
+
2021-07-14 22:25:21,548 DEBUG SenderThread:593294 [sender.py:send():179] send: stats
|
93 |
+
2021-07-14 22:25:25,876 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: stop_status
|
94 |
+
2021-07-14 22:25:25,876 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: stop_status
|
95 |
+
2021-07-14 22:25:41,015 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: stop_status
|
96 |
+
2021-07-14 22:25:41,016 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: stop_status
|
97 |
+
2021-07-14 22:25:51,619 DEBUG SenderThread:593294 [sender.py:send():179] send: stats
|
98 |
+
2021-07-14 22:25:56,148 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: stop_status
|
99 |
+
2021-07-14 22:25:56,148 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: stop_status
|
100 |
+
2021-07-14 22:26:11,280 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: stop_status
|
101 |
+
2021-07-14 22:26:11,280 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: stop_status
|
102 |
+
2021-07-14 22:26:21,695 DEBUG SenderThread:593294 [sender.py:send():179] send: stats
|
103 |
+
2021-07-14 22:26:26,412 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: stop_status
|
104 |
+
2021-07-14 22:26:26,413 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: stop_status
|
105 |
+
2021-07-14 22:26:41,546 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: stop_status
|
106 |
+
2021-07-14 22:26:41,547 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: stop_status
|
107 |
+
2021-07-14 22:26:51,772 DEBUG SenderThread:593294 [sender.py:send():179] send: stats
|
108 |
+
2021-07-14 22:26:56,683 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: stop_status
|
109 |
+
2021-07-14 22:26:56,683 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: stop_status
|
110 |
+
2021-07-14 22:27:11,816 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: stop_status
|
111 |
+
2021-07-14 22:27:11,816 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: stop_status
|
112 |
+
2021-07-14 22:27:21,849 DEBUG SenderThread:593294 [sender.py:send():179] send: stats
|
113 |
+
2021-07-14 22:27:26,950 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: stop_status
|
114 |
+
2021-07-14 22:27:26,950 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: stop_status
|
115 |
+
2021-07-14 22:27:29,710 INFO Thread-8 :593294 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_221920-s091gfok/files/output.log
|
116 |
+
2021-07-14 22:27:42,097 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: stop_status
|
117 |
+
2021-07-14 22:27:42,097 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: stop_status
|
118 |
+
2021-07-14 22:27:51,925 DEBUG SenderThread:593294 [sender.py:send():179] send: stats
|
119 |
+
2021-07-14 22:27:57,249 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: stop_status
|
120 |
+
2021-07-14 22:27:57,250 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: stop_status
|
121 |
+
2021-07-14 22:28:12,383 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: stop_status
|
122 |
+
2021-07-14 22:28:12,384 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: stop_status
|
123 |
+
2021-07-14 22:28:22,007 DEBUG SenderThread:593294 [sender.py:send():179] send: stats
|
124 |
+
2021-07-14 22:28:27,521 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: stop_status
|
125 |
+
2021-07-14 22:28:27,522 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: stop_status
|
126 |
+
2021-07-14 22:28:42,658 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: stop_status
|
127 |
+
2021-07-14 22:28:42,658 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: stop_status
|
128 |
+
2021-07-14 22:28:49,741 INFO Thread-8 :593294 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_221920-s091gfok/files/output.log
|
129 |
+
2021-07-14 22:28:51,116 DEBUG SenderThread:593294 [sender.py:send():179] send: telemetry
|
130 |
+
2021-07-14 22:28:51,116 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: poll_exit
|
131 |
+
2021-07-14 22:28:51,116 DEBUG SenderThread:593294 [sender.py:send():179] send: exit
|
132 |
+
2021-07-14 22:28:51,116 INFO SenderThread:593294 [sender.py:send_exit():287] handling exit code: 1
|
133 |
+
2021-07-14 22:28:51,117 INFO SenderThread:593294 [sender.py:send_exit():295] send defer
|
134 |
+
2021-07-14 22:28:51,117 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: poll_exit
|
135 |
+
2021-07-14 22:28:51,118 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: defer
|
136 |
+
2021-07-14 22:28:51,118 INFO HandlerThread:593294 [handler.py:handle_request_defer():141] handle defer: 0
|
137 |
+
2021-07-14 22:28:51,118 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: defer
|
138 |
+
2021-07-14 22:28:51,118 INFO SenderThread:593294 [sender.py:send_request_defer():304] handle sender defer: 0
|
139 |
+
2021-07-14 22:28:51,118 INFO SenderThread:593294 [sender.py:transition_state():308] send defer: 1
|
140 |
+
2021-07-14 22:28:51,118 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: defer
|
141 |
+
2021-07-14 22:28:51,118 INFO HandlerThread:593294 [handler.py:handle_request_defer():141] handle defer: 1
|
142 |
+
2021-07-14 22:28:51,182 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: defer
|
143 |
+
2021-07-14 22:28:51,182 INFO SenderThread:593294 [sender.py:send_request_defer():304] handle sender defer: 1
|
144 |
+
2021-07-14 22:28:51,183 INFO SenderThread:593294 [sender.py:transition_state():308] send defer: 2
|
145 |
+
2021-07-14 22:28:51,183 DEBUG SenderThread:593294 [sender.py:send():179] send: stats
|
146 |
+
2021-07-14 22:28:51,183 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: defer
|
147 |
+
2021-07-14 22:28:51,183 INFO HandlerThread:593294 [handler.py:handle_request_defer():141] handle defer: 2
|
148 |
+
2021-07-14 22:28:51,184 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: defer
|
149 |
+
2021-07-14 22:28:51,184 INFO SenderThread:593294 [sender.py:send_request_defer():304] handle sender defer: 2
|
150 |
+
2021-07-14 22:28:51,184 INFO SenderThread:593294 [sender.py:transition_state():308] send defer: 3
|
151 |
+
2021-07-14 22:28:51,184 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: defer
|
152 |
+
2021-07-14 22:28:51,184 INFO HandlerThread:593294 [handler.py:handle_request_defer():141] handle defer: 3
|
153 |
+
2021-07-14 22:28:51,184 DEBUG SenderThread:593294 [sender.py:send():179] send: summary
|
154 |
+
2021-07-14 22:28:51,185 INFO SenderThread:593294 [sender.py:_save_file():841] saving file wandb-summary.json with policy end
|
155 |
+
2021-07-14 22:28:51,185 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: defer
|
156 |
+
2021-07-14 22:28:51,185 INFO SenderThread:593294 [sender.py:send_request_defer():304] handle sender defer: 3
|
157 |
+
2021-07-14 22:28:51,185 INFO SenderThread:593294 [sender.py:transition_state():308] send defer: 4
|
158 |
+
2021-07-14 22:28:51,185 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: defer
|
159 |
+
2021-07-14 22:28:51,185 INFO HandlerThread:593294 [handler.py:handle_request_defer():141] handle defer: 4
|
160 |
+
2021-07-14 22:28:51,185 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: defer
|
161 |
+
2021-07-14 22:28:51,185 INFO SenderThread:593294 [sender.py:send_request_defer():304] handle sender defer: 4
|
162 |
+
2021-07-14 22:28:51,220 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: poll_exit
|
163 |
+
2021-07-14 22:28:51,361 INFO SenderThread:593294 [sender.py:transition_state():308] send defer: 5
|
164 |
+
2021-07-14 22:28:51,361 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: poll_exit
|
165 |
+
2021-07-14 22:28:51,361 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: defer
|
166 |
+
2021-07-14 22:28:51,362 INFO HandlerThread:593294 [handler.py:handle_request_defer():141] handle defer: 5
|
167 |
+
2021-07-14 22:28:51,362 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: defer
|
168 |
+
2021-07-14 22:28:51,362 INFO SenderThread:593294 [sender.py:send_request_defer():304] handle sender defer: 5
|
169 |
+
2021-07-14 22:28:51,362 INFO SenderThread:593294 [dir_watcher.py:finish():282] shutting down directory watcher
|
170 |
+
2021-07-14 22:28:51,463 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: poll_exit
|
171 |
+
2021-07-14 22:28:51,742 INFO Thread-8 :593294 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_221920-s091gfok/files/wandb-summary.json
|
172 |
+
2021-07-14 22:28:51,743 INFO SenderThread:593294 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_221920-s091gfok/files/config.yaml
|
173 |
+
2021-07-14 22:28:51,743 INFO SenderThread:593294 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_221920-s091gfok/files/output.log
|
174 |
+
2021-07-14 22:28:51,743 INFO SenderThread:593294 [dir_watcher.py:finish():312] scan: /home/dat/pino-roberta-base/wandb/run-20210714_221920-s091gfok/files
|
175 |
+
2021-07-14 22:28:51,743 INFO SenderThread:593294 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210714_221920-s091gfok/files/requirements.txt requirements.txt
|
176 |
+
2021-07-14 22:28:51,744 INFO SenderThread:593294 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210714_221920-s091gfok/files/output.log output.log
|
177 |
+
2021-07-14 22:28:51,744 INFO SenderThread:593294 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210714_221920-s091gfok/files/wandb-metadata.json wandb-metadata.json
|
178 |
+
2021-07-14 22:28:51,744 INFO SenderThread:593294 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210714_221920-s091gfok/files/config.yaml config.yaml
|
179 |
+
2021-07-14 22:28:51,744 INFO SenderThread:593294 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210714_221920-s091gfok/files/wandb-summary.json wandb-summary.json
|
180 |
+
2021-07-14 22:28:51,750 INFO SenderThread:593294 [sender.py:transition_state():308] send defer: 6
|
181 |
+
2021-07-14 22:28:51,750 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: poll_exit
|
182 |
+
2021-07-14 22:28:51,751 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: defer
|
183 |
+
2021-07-14 22:28:51,751 INFO HandlerThread:593294 [handler.py:handle_request_defer():141] handle defer: 6
|
184 |
+
2021-07-14 22:28:51,754 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: defer
|
185 |
+
2021-07-14 22:28:51,754 INFO SenderThread:593294 [sender.py:send_request_defer():304] handle sender defer: 6
|
186 |
+
2021-07-14 22:28:51,754 INFO SenderThread:593294 [file_pusher.py:finish():177] shutting down file pusher
|
187 |
+
2021-07-14 22:28:51,856 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: poll_exit
|
188 |
+
2021-07-14 22:28:51,856 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: poll_exit
|
189 |
+
2021-07-14 22:28:51,958 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: poll_exit
|
190 |
+
2021-07-14 22:28:51,958 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: poll_exit
|
191 |
+
2021-07-14 22:28:52,060 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: poll_exit
|
192 |
+
2021-07-14 22:28:52,061 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: poll_exit
|
193 |
+
2021-07-14 22:28:52,162 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: poll_exit
|
194 |
+
2021-07-14 22:28:52,163 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: poll_exit
|
195 |
+
2021-07-14 22:28:52,191 INFO Thread-13 :593294 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210714_221920-s091gfok/files/output.log
|
196 |
+
2021-07-14 22:28:52,198 INFO Thread-12 :593294 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210714_221920-s091gfok/files/requirements.txt
|
197 |
+
2021-07-14 22:28:52,200 INFO Thread-14 :593294 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210714_221920-s091gfok/files/config.yaml
|
198 |
+
2021-07-14 22:28:52,212 INFO Thread-15 :593294 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210714_221920-s091gfok/files/wandb-summary.json
|
199 |
+
2021-07-14 22:28:52,264 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: poll_exit
|
200 |
+
2021-07-14 22:28:52,265 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: poll_exit
|
201 |
+
2021-07-14 22:28:52,366 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: poll_exit
|
202 |
+
2021-07-14 22:28:52,366 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: poll_exit
|
203 |
+
2021-07-14 22:28:52,413 INFO Thread-7 :593294 [sender.py:transition_state():308] send defer: 7
|
204 |
+
2021-07-14 22:28:52,413 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: defer
|
205 |
+
2021-07-14 22:28:52,413 INFO HandlerThread:593294 [handler.py:handle_request_defer():141] handle defer: 7
|
206 |
+
2021-07-14 22:28:52,413 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: defer
|
207 |
+
2021-07-14 22:28:52,414 INFO SenderThread:593294 [sender.py:send_request_defer():304] handle sender defer: 7
|
208 |
+
2021-07-14 22:28:52,468 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: poll_exit
|
209 |
+
2021-07-14 22:28:52,536 INFO SenderThread:593294 [sender.py:transition_state():308] send defer: 8
|
210 |
+
2021-07-14 22:28:52,536 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: poll_exit
|
211 |
+
2021-07-14 22:28:52,537 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: defer
|
212 |
+
2021-07-14 22:28:52,537 INFO HandlerThread:593294 [handler.py:handle_request_defer():141] handle defer: 8
|
213 |
+
2021-07-14 22:28:52,537 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: defer
|
214 |
+
2021-07-14 22:28:52,537 INFO SenderThread:593294 [sender.py:send_request_defer():304] handle sender defer: 8
|
215 |
+
2021-07-14 22:28:52,537 INFO SenderThread:593294 [sender.py:transition_state():308] send defer: 9
|
216 |
+
2021-07-14 22:28:52,538 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: defer
|
217 |
+
2021-07-14 22:28:52,538 INFO HandlerThread:593294 [handler.py:handle_request_defer():141] handle defer: 9
|
218 |
+
2021-07-14 22:28:52,538 DEBUG SenderThread:593294 [sender.py:send():179] send: final
|
219 |
+
2021-07-14 22:28:52,538 DEBUG SenderThread:593294 [sender.py:send():179] send: footer
|
220 |
+
2021-07-14 22:28:52,538 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: defer
|
221 |
+
2021-07-14 22:28:52,538 INFO SenderThread:593294 [sender.py:send_request_defer():304] handle sender defer: 9
|
222 |
+
2021-07-14 22:28:52,638 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: poll_exit
|
223 |
+
2021-07-14 22:28:52,638 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: poll_exit
|
224 |
+
2021-07-14 22:28:52,638 INFO SenderThread:593294 [file_pusher.py:join():182] waiting for file pusher
|
225 |
+
2021-07-14 22:28:52,640 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: get_summary
|
226 |
+
2021-07-14 22:28:52,640 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: sampled_history
|
227 |
+
2021-07-14 22:28:52,641 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: shutdown
|
228 |
+
2021-07-14 22:28:52,641 INFO HandlerThread:593294 [handler.py:finish():638] shutting down handler
|
229 |
+
2021-07-14 22:28:53,538 INFO WriterThread:593294 [datastore.py:close():288] close: /home/dat/pino-roberta-base/wandb/run-20210714_221920-s091gfok/run-s091gfok.wandb
|
230 |
+
2021-07-14 22:28:53,639 INFO SenderThread:593294 [sender.py:finish():945] shutting down sender
|
231 |
+
2021-07-14 22:28:53,639 INFO SenderThread:593294 [file_pusher.py:finish():177] shutting down file pusher
|
232 |
+
2021-07-14 22:28:53,639 INFO SenderThread:593294 [file_pusher.py:join():182] waiting for file pusher
|
233 |
+
2021-07-14 22:28:53,641 INFO MainThread:593294 [internal.py:handle_exit():78] Internal process exited
|
wandb/run-20210714_221920-s091gfok/logs/debug.log
ADDED
@@ -0,0 +1,119 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
2021-07-14 22:19:20,672 INFO MainThread:592040 [wandb_setup.py:_flush():69] setting env: {}
|
2 |
+
2021-07-14 22:19:20,672 INFO MainThread:592040 [wandb_setup.py:_flush():69] setting login settings: {}
|
3 |
+
2021-07-14 22:19:20,672 INFO MainThread:592040 [wandb_init.py:_log_setup():337] Logging user logs to /home/dat/pino-roberta-base/wandb/run-20210714_221920-s091gfok/logs/debug.log
|
4 |
+
2021-07-14 22:19:20,672 INFO MainThread:592040 [wandb_init.py:_log_setup():338] Logging internal logs to /home/dat/pino-roberta-base/wandb/run-20210714_221920-s091gfok/logs/debug-internal.log
|
5 |
+
2021-07-14 22:19:20,672 INFO MainThread:592040 [wandb_init.py:init():370] calling init triggers
|
6 |
+
2021-07-14 22:19:20,672 INFO MainThread:592040 [wandb_init.py:init():375] wandb.init called with sweep_config: {}
|
7 |
+
config: {}
|
8 |
+
2021-07-14 22:19:20,673 INFO MainThread:592040 [wandb_init.py:init():419] starting backend
|
9 |
+
2021-07-14 22:19:20,673 INFO MainThread:592040 [backend.py:_multiprocessing_setup():70] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
|
10 |
+
2021-07-14 22:19:20,717 INFO MainThread:592040 [backend.py:ensure_launched():135] starting backend process...
|
11 |
+
2021-07-14 22:19:20,761 INFO MainThread:592040 [backend.py:ensure_launched():139] started backend process with pid: 593294
|
12 |
+
2021-07-14 22:19:20,763 INFO MainThread:592040 [wandb_init.py:init():424] backend started and connected
|
13 |
+
2021-07-14 22:19:20,766 INFO MainThread:592040 [wandb_init.py:init():472] updated telemetry
|
14 |
+
2021-07-14 22:19:20,766 INFO MainThread:592040 [wandb_init.py:init():491] communicating current version
|
15 |
+
2021-07-14 22:19:21,355 INFO MainThread:592040 [wandb_init.py:init():496] got version response
|
16 |
+
2021-07-14 22:19:21,356 INFO MainThread:592040 [wandb_init.py:init():504] communicating run to backend with 30 second timeout
|
17 |
+
2021-07-14 22:19:21,536 INFO MainThread:592040 [wandb_init.py:init():529] starting run threads in backend
|
18 |
+
2021-07-14 22:19:22,670 INFO MainThread:592040 [wandb_run.py:_console_start():1623] atexit reg
|
19 |
+
2021-07-14 22:19:22,671 INFO MainThread:592040 [wandb_run.py:_redirect():1497] redirect: SettingsConsole.REDIRECT
|
20 |
+
2021-07-14 22:19:22,672 INFO MainThread:592040 [wandb_run.py:_redirect():1502] Redirecting console.
|
21 |
+
2021-07-14 22:19:22,674 INFO MainThread:592040 [wandb_run.py:_redirect():1558] Redirects installed.
|
22 |
+
2021-07-14 22:19:22,674 INFO MainThread:592040 [wandb_init.py:init():554] run started, returning control to user process
|
23 |
+
2021-07-14 22:19:22,681 INFO MainThread:592040 [wandb_run.py:_config_callback():872] config_cb None None {'output_dir': './', 'overwrite_output_dir': True, 'do_train': False, 'do_eval': False, 'do_predict': False, 'evaluation_strategy': 'IntervalStrategy.NO', 'prediction_loss_only': False, 'per_device_train_batch_size': 2, 'per_device_eval_batch_size': 2, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 2, 'eval_accumulation_steps': None, 'learning_rate': 5e-05, 'weight_decay': 0.0095, 'adam_beta1': 0.9, 'adam_beta2': 0.98, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 5.0, 'max_steps': -1, 'lr_scheduler_type': 'SchedulerType.LINEAR', 'warmup_ratio': 0.0, 'warmup_steps': 5000, 'log_level': -1, 'log_level_replica': -1, 'log_on_each_node': True, 'logging_dir': './runs/Jul14_22-19-13_t1v-n-f5c06ea1-w-0', 'logging_strategy': 'IntervalStrategy.STEPS', 'logging_first_step': False, 'logging_steps': 500, 'save_strategy': 'IntervalStrategy.STEPS', 'save_steps': 20000, 'save_total_limit': 5, 'save_on_each_node': False, 'no_cuda': False, 'seed': 42, 'fp16': False, 'fp16_opt_level': 'O1', 'fp16_backend': 'auto', 'fp16_full_eval': False, 'local_rank': -1, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 20000, 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': './', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'sharded_ddp': [], 'deepspeed': None, 'label_smoothing_factor': 0.0, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'dataloader_pin_memory': True, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'push_to_hub_model_id': '', 'push_to_hub_organization': None, 'push_to_hub_token': None, 'mp_parameters': ''}
|
24 |
+
2021-07-14 22:19:22,683 INFO MainThread:592040 [wandb_run.py:_config_callback():872] config_cb None None {'model_name_or_path': None, 'model_type': 'big_bird', 'config_name': './', 'tokenizer_name': './', 'cache_dir': None, 'use_fast_tokenizer': True, 'dtype': 'float32', 'save_optimizer': True}
|
25 |
+
2021-07-14 22:19:22,685 INFO MainThread:592040 [wandb_run.py:_config_callback():872] config_cb None None {'dataset_name': None, 'dataset_config_name': None, 'train_ref_file': None, 'validation_ref_file': None, 'overwrite_cache': False, 'validation_split_percentage': 5, 'max_seq_length': 4096, 'preprocessing_num_workers': 96, 'mlm_probability': 0.15, 'pad_to_max_length': False, 'line_by_line': False, 'max_eval_samples': 2000}
|
26 |
+
2021-07-14 22:28:48,857 INFO MainThread:592040 [wandb_run.py:_atexit_cleanup():1593] got exitcode: 1
|
27 |
+
2021-07-14 22:28:48,859 INFO MainThread:592040 [wandb_run.py:_restore():1565] restore
|
28 |
+
2021-07-14 22:28:51,118 INFO MainThread:592040 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
|
29 |
+
wandb_count: 1
|
30 |
+
}
|
31 |
+
pusher_stats {
|
32 |
+
uploaded_bytes: 1447
|
33 |
+
total_bytes: 1447
|
34 |
+
}
|
35 |
+
|
36 |
+
2021-07-14 22:28:51,362 INFO MainThread:592040 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
|
37 |
+
wandb_count: 1
|
38 |
+
}
|
39 |
+
pusher_stats {
|
40 |
+
uploaded_bytes: 1447
|
41 |
+
total_bytes: 1447
|
42 |
+
}
|
43 |
+
|
44 |
+
2021-07-14 22:28:51,754 INFO MainThread:592040 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
|
45 |
+
wandb_count: 4
|
46 |
+
}
|
47 |
+
pusher_stats {
|
48 |
+
uploaded_bytes: 1447
|
49 |
+
total_bytes: 11398
|
50 |
+
}
|
51 |
+
|
52 |
+
2021-07-14 22:28:51,857 INFO MainThread:592040 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
|
53 |
+
wandb_count: 5
|
54 |
+
}
|
55 |
+
pusher_stats {
|
56 |
+
uploaded_bytes: 1447
|
57 |
+
total_bytes: 11400
|
58 |
+
}
|
59 |
+
|
60 |
+
2021-07-14 22:28:51,959 INFO MainThread:592040 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
|
61 |
+
wandb_count: 5
|
62 |
+
}
|
63 |
+
pusher_stats {
|
64 |
+
uploaded_bytes: 11400
|
65 |
+
total_bytes: 11400
|
66 |
+
}
|
67 |
+
|
68 |
+
2021-07-14 22:28:52,061 INFO MainThread:592040 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
|
69 |
+
wandb_count: 5
|
70 |
+
}
|
71 |
+
pusher_stats {
|
72 |
+
uploaded_bytes: 11400
|
73 |
+
total_bytes: 11400
|
74 |
+
}
|
75 |
+
|
76 |
+
2021-07-14 22:28:52,163 INFO MainThread:592040 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
|
77 |
+
wandb_count: 5
|
78 |
+
}
|
79 |
+
pusher_stats {
|
80 |
+
uploaded_bytes: 11400
|
81 |
+
total_bytes: 11400
|
82 |
+
}
|
83 |
+
|
84 |
+
2021-07-14 22:28:52,265 INFO MainThread:592040 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
|
85 |
+
wandb_count: 5
|
86 |
+
}
|
87 |
+
pusher_stats {
|
88 |
+
uploaded_bytes: 11400
|
89 |
+
total_bytes: 11400
|
90 |
+
}
|
91 |
+
|
92 |
+
2021-07-14 22:28:52,367 INFO MainThread:592040 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
|
93 |
+
wandb_count: 5
|
94 |
+
}
|
95 |
+
pusher_stats {
|
96 |
+
uploaded_bytes: 11400
|
97 |
+
total_bytes: 11400
|
98 |
+
}
|
99 |
+
|
100 |
+
2021-07-14 22:28:52,537 INFO MainThread:592040 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
|
101 |
+
wandb_count: 5
|
102 |
+
}
|
103 |
+
pusher_stats {
|
104 |
+
uploaded_bytes: 11400
|
105 |
+
total_bytes: 11400
|
106 |
+
}
|
107 |
+
|
108 |
+
2021-07-14 22:28:52,639 INFO MainThread:592040 [wandb_run.py:_wait_for_finish():1715] got exit ret: done: true
|
109 |
+
exit_result {
|
110 |
+
}
|
111 |
+
file_counts {
|
112 |
+
wandb_count: 5
|
113 |
+
}
|
114 |
+
pusher_stats {
|
115 |
+
uploaded_bytes: 11400
|
116 |
+
total_bytes: 11400
|
117 |
+
}
|
118 |
+
|
119 |
+
2021-07-14 22:28:53,943 INFO MainThread:592040 [wandb_run.py:_show_files():1937] logging synced files
|
wandb/run-20210714_221920-s091gfok/run-s091gfok.wandb
ADDED
Binary file (11.5 kB). View file
|
|
wandb/run-20210714_222920-2p7mu4rm/files/config.yaml
ADDED
@@ -0,0 +1,307 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
wandb_version: 1
|
2 |
+
|
3 |
+
_wandb:
|
4 |
+
desc: null
|
5 |
+
value:
|
6 |
+
cli_version: 0.10.33
|
7 |
+
framework: huggingface
|
8 |
+
huggingface_version: 4.9.0.dev0
|
9 |
+
is_jupyter_run: false
|
10 |
+
is_kaggle_kernel: false
|
11 |
+
python_version: 3.8.10
|
12 |
+
t:
|
13 |
+
1:
|
14 |
+
- 3
|
15 |
+
- 11
|
16 |
+
2:
|
17 |
+
- 3
|
18 |
+
- 11
|
19 |
+
4: 3.8.10
|
20 |
+
5: 0.10.33
|
21 |
+
6: 4.9.0.dev0
|
22 |
+
8:
|
23 |
+
- 5
|
24 |
+
adafactor:
|
25 |
+
desc: null
|
26 |
+
value: false
|
27 |
+
adam_beta1:
|
28 |
+
desc: null
|
29 |
+
value: 0.9
|
30 |
+
adam_beta2:
|
31 |
+
desc: null
|
32 |
+
value: 0.98
|
33 |
+
adam_epsilon:
|
34 |
+
desc: null
|
35 |
+
value: 1.0e-08
|
36 |
+
cache_dir:
|
37 |
+
desc: null
|
38 |
+
value: null
|
39 |
+
config_name:
|
40 |
+
desc: null
|
41 |
+
value: ./
|
42 |
+
dataloader_drop_last:
|
43 |
+
desc: null
|
44 |
+
value: false
|
45 |
+
dataloader_num_workers:
|
46 |
+
desc: null
|
47 |
+
value: 0
|
48 |
+
dataloader_pin_memory:
|
49 |
+
desc: null
|
50 |
+
value: true
|
51 |
+
dataset_config_name:
|
52 |
+
desc: null
|
53 |
+
value: null
|
54 |
+
dataset_name:
|
55 |
+
desc: null
|
56 |
+
value: null
|
57 |
+
ddp_find_unused_parameters:
|
58 |
+
desc: null
|
59 |
+
value: null
|
60 |
+
debug:
|
61 |
+
desc: null
|
62 |
+
value: []
|
63 |
+
deepspeed:
|
64 |
+
desc: null
|
65 |
+
value: null
|
66 |
+
disable_tqdm:
|
67 |
+
desc: null
|
68 |
+
value: false
|
69 |
+
do_eval:
|
70 |
+
desc: null
|
71 |
+
value: false
|
72 |
+
do_predict:
|
73 |
+
desc: null
|
74 |
+
value: false
|
75 |
+
do_train:
|
76 |
+
desc: null
|
77 |
+
value: false
|
78 |
+
dtype:
|
79 |
+
desc: null
|
80 |
+
value: float32
|
81 |
+
eval_accumulation_steps:
|
82 |
+
desc: null
|
83 |
+
value: null
|
84 |
+
eval_steps:
|
85 |
+
desc: null
|
86 |
+
value: 20000
|
87 |
+
evaluation_strategy:
|
88 |
+
desc: null
|
89 |
+
value: IntervalStrategy.NO
|
90 |
+
fp16:
|
91 |
+
desc: null
|
92 |
+
value: false
|
93 |
+
fp16_backend:
|
94 |
+
desc: null
|
95 |
+
value: auto
|
96 |
+
fp16_full_eval:
|
97 |
+
desc: null
|
98 |
+
value: false
|
99 |
+
fp16_opt_level:
|
100 |
+
desc: null
|
101 |
+
value: O1
|
102 |
+
gradient_accumulation_steps:
|
103 |
+
desc: null
|
104 |
+
value: 4
|
105 |
+
greater_is_better:
|
106 |
+
desc: null
|
107 |
+
value: null
|
108 |
+
group_by_length:
|
109 |
+
desc: null
|
110 |
+
value: false
|
111 |
+
ignore_data_skip:
|
112 |
+
desc: null
|
113 |
+
value: false
|
114 |
+
label_names:
|
115 |
+
desc: null
|
116 |
+
value: null
|
117 |
+
label_smoothing_factor:
|
118 |
+
desc: null
|
119 |
+
value: 0.0
|
120 |
+
learning_rate:
|
121 |
+
desc: null
|
122 |
+
value: 5.0e-05
|
123 |
+
length_column_name:
|
124 |
+
desc: null
|
125 |
+
value: length
|
126 |
+
line_by_line:
|
127 |
+
desc: null
|
128 |
+
value: false
|
129 |
+
load_best_model_at_end:
|
130 |
+
desc: null
|
131 |
+
value: false
|
132 |
+
local_rank:
|
133 |
+
desc: null
|
134 |
+
value: -1
|
135 |
+
log_level:
|
136 |
+
desc: null
|
137 |
+
value: -1
|
138 |
+
log_level_replica:
|
139 |
+
desc: null
|
140 |
+
value: -1
|
141 |
+
log_on_each_node:
|
142 |
+
desc: null
|
143 |
+
value: true
|
144 |
+
logging_dir:
|
145 |
+
desc: null
|
146 |
+
value: ./runs/Jul14_22-29-13_t1v-n-f5c06ea1-w-0
|
147 |
+
logging_first_step:
|
148 |
+
desc: null
|
149 |
+
value: false
|
150 |
+
logging_steps:
|
151 |
+
desc: null
|
152 |
+
value: 500
|
153 |
+
logging_strategy:
|
154 |
+
desc: null
|
155 |
+
value: IntervalStrategy.STEPS
|
156 |
+
lr_scheduler_type:
|
157 |
+
desc: null
|
158 |
+
value: SchedulerType.LINEAR
|
159 |
+
max_eval_samples:
|
160 |
+
desc: null
|
161 |
+
value: 2000
|
162 |
+
max_grad_norm:
|
163 |
+
desc: null
|
164 |
+
value: 1.0
|
165 |
+
max_seq_length:
|
166 |
+
desc: null
|
167 |
+
value: 4096
|
168 |
+
max_steps:
|
169 |
+
desc: null
|
170 |
+
value: -1
|
171 |
+
metric_for_best_model:
|
172 |
+
desc: null
|
173 |
+
value: null
|
174 |
+
mlm_probability:
|
175 |
+
desc: null
|
176 |
+
value: 0.15
|
177 |
+
model_name_or_path:
|
178 |
+
desc: null
|
179 |
+
value: null
|
180 |
+
model_type:
|
181 |
+
desc: null
|
182 |
+
value: big_bird
|
183 |
+
mp_parameters:
|
184 |
+
desc: null
|
185 |
+
value: ''
|
186 |
+
no_cuda:
|
187 |
+
desc: null
|
188 |
+
value: false
|
189 |
+
num_train_epochs:
|
190 |
+
desc: null
|
191 |
+
value: 5.0
|
192 |
+
output_dir:
|
193 |
+
desc: null
|
194 |
+
value: ./
|
195 |
+
overwrite_cache:
|
196 |
+
desc: null
|
197 |
+
value: false
|
198 |
+
overwrite_output_dir:
|
199 |
+
desc: null
|
200 |
+
value: true
|
201 |
+
pad_to_max_length:
|
202 |
+
desc: null
|
203 |
+
value: false
|
204 |
+
past_index:
|
205 |
+
desc: null
|
206 |
+
value: -1
|
207 |
+
per_device_eval_batch_size:
|
208 |
+
desc: null
|
209 |
+
value: 2
|
210 |
+
per_device_train_batch_size:
|
211 |
+
desc: null
|
212 |
+
value: 2
|
213 |
+
per_gpu_eval_batch_size:
|
214 |
+
desc: null
|
215 |
+
value: null
|
216 |
+
per_gpu_train_batch_size:
|
217 |
+
desc: null
|
218 |
+
value: null
|
219 |
+
prediction_loss_only:
|
220 |
+
desc: null
|
221 |
+
value: false
|
222 |
+
preprocessing_num_workers:
|
223 |
+
desc: null
|
224 |
+
value: 96
|
225 |
+
push_to_hub:
|
226 |
+
desc: null
|
227 |
+
value: true
|
228 |
+
push_to_hub_model_id:
|
229 |
+
desc: null
|
230 |
+
value: ''
|
231 |
+
push_to_hub_organization:
|
232 |
+
desc: null
|
233 |
+
value: null
|
234 |
+
push_to_hub_token:
|
235 |
+
desc: null
|
236 |
+
value: null
|
237 |
+
remove_unused_columns:
|
238 |
+
desc: null
|
239 |
+
value: true
|
240 |
+
report_to:
|
241 |
+
desc: null
|
242 |
+
value:
|
243 |
+
- tensorboard
|
244 |
+
- wandb
|
245 |
+
resume_from_checkpoint:
|
246 |
+
desc: null
|
247 |
+
value: null
|
248 |
+
run_name:
|
249 |
+
desc: null
|
250 |
+
value: ./
|
251 |
+
save_on_each_node:
|
252 |
+
desc: null
|
253 |
+
value: false
|
254 |
+
save_optimizer:
|
255 |
+
desc: null
|
256 |
+
value: true
|
257 |
+
save_steps:
|
258 |
+
desc: null
|
259 |
+
value: 20000
|
260 |
+
save_strategy:
|
261 |
+
desc: null
|
262 |
+
value: IntervalStrategy.STEPS
|
263 |
+
save_total_limit:
|
264 |
+
desc: null
|
265 |
+
value: 5
|
266 |
+
seed:
|
267 |
+
desc: null
|
268 |
+
value: 42
|
269 |
+
sharded_ddp:
|
270 |
+
desc: null
|
271 |
+
value: []
|
272 |
+
skip_memory_metrics:
|
273 |
+
desc: null
|
274 |
+
value: true
|
275 |
+
tokenizer_name:
|
276 |
+
desc: null
|
277 |
+
value: ./
|
278 |
+
tpu_metrics_debug:
|
279 |
+
desc: null
|
280 |
+
value: false
|
281 |
+
tpu_num_cores:
|
282 |
+
desc: null
|
283 |
+
value: null
|
284 |
+
train_ref_file:
|
285 |
+
desc: null
|
286 |
+
value: null
|
287 |
+
use_fast_tokenizer:
|
288 |
+
desc: null
|
289 |
+
value: true
|
290 |
+
use_legacy_prediction_loop:
|
291 |
+
desc: null
|
292 |
+
value: false
|
293 |
+
validation_ref_file:
|
294 |
+
desc: null
|
295 |
+
value: null
|
296 |
+
validation_split_percentage:
|
297 |
+
desc: null
|
298 |
+
value: 5
|
299 |
+
warmup_ratio:
|
300 |
+
desc: null
|
301 |
+
value: 0.0
|
302 |
+
warmup_steps:
|
303 |
+
desc: null
|
304 |
+
value: 5000
|
305 |
+
weight_decay:
|
306 |
+
desc: null
|
307 |
+
value: 0.0095
|
wandb/run-20210714_222920-2p7mu4rm/files/output.log
ADDED
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
/home/dat/pino/lib/python3.8/site-packages/jax/_src/numpy/lax_numpy.py:3132: UserWarning: Explicitly requested dtype <class 'jax._src.numpy.lax_numpy.int64'> requested in zeros is not available, and will be truncated to dtype int32. To enable more dtypes, set the jax_enable_x64 configuration option or the JAX_ENABLE_X64 shell environment variable. See https://github.com/google/jax#current-gotchas for more.
|
2 |
+
lax._check_user_dtype_supported(dtype, "zeros")
|
3 |
+
/home/dat/pino/lib/python3.8/site-packages/jax/lib/xla_bridge.py:386: UserWarning: jax.host_count has been renamed to jax.process_count. This alias will eventually be removed; please update your code.
|
4 |
+
warnings.warn(
|
5 |
+
/home/dat/pino/lib/python3.8/site-packages/jax/lib/xla_bridge.py:373: UserWarning: jax.host_id has been renamed to jax.process_index. This alias will eventually be removed; please update your code.
|
6 |
+
warnings.warn(
|
7 |
+
Epoch ... (1/5): 0%| | 0/5 [00:00<?, ?it/s][22:29:35] - INFO - __main__ - Skipping to epoch 0 step 0
|
8 |
+
Training...: 0%| | 0/503952 [01:24<?, ?it/s]
|
9 |
+
Epoch ... (1/5): 0%| | 0/5 [09:07<?, ?it/s]
|
10 |
+
Traceback (most recent call last):
|
11 |
+
File "./run_mlm_flax.py", line 804, in <module>
|
12 |
+
state, train_metric, dropout_rngs = p_train_step(state, model_inputs, dropout_rngs)
|
13 |
+
File "/home/dat/pino/lib/python3.8/site-packages/jax/_src/traceback_util.py", line 183, in reraise_with_filtered_traceback
|
14 |
+
return fun(*args, **kwargs)
|
15 |
+
File "/home/dat/pino/lib/python3.8/site-packages/jax/_src/api.py", line 1669, in f_pmapped
|
16 |
+
out = pxla.xla_pmap(
|
17 |
+
File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 1620, in bind
|
18 |
+
return call_bind(self, fun, *args, **params)
|
19 |
+
File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 1551, in call_bind
|
20 |
+
outs = primitive.process(top_trace, fun, tracers, params)
|
21 |
+
File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 1623, in process
|
22 |
+
return trace.process_map(self, fun, tracers, params)
|
23 |
+
File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 606, in process_call
|
24 |
+
return primitive.impl(f, *tracers, **params)
|
25 |
+
File "/home/dat/pino/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 637, in xla_pmap_impl
|
26 |
+
return compiled_fun(*args)
|
27 |
+
File "/home/dat/pino/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 1159, in execute_replicated
|
28 |
+
out_bufs = compiled.execute_sharded_on_local_devices(input_bufs)
|
29 |
+
jax._src.traceback_util.UnfilteredStackTrace: RuntimeError: Resource exhausted: Attempting to reserve 12.60G at the bottom of memory. That was not possible. There are 5.86G free, 0B reserved, and 5.65G reservable.: while running replica 0 and partition 0 of a replicated computation (other replicas may have failed as well).
|
30 |
+
The stack trace below excludes JAX-internal frames.
|
31 |
+
The preceding is the original exception that occurred, unmodified.
|
32 |
+
--------------------
|
33 |
+
The above exception was the direct cause of the following exception:
|
34 |
+
Traceback (most recent call last):
|
35 |
+
File "./run_mlm_flax.py", line 804, in <module>
|
36 |
+
state, train_metric, dropout_rngs = p_train_step(state, model_inputs, dropout_rngs)
|
37 |
+
File "/home/dat/pino/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 1159, in execute_replicated
|
38 |
+
out_bufs = compiled.execute_sharded_on_local_devices(input_bufs)
|
39 |
+
RuntimeError: Resource exhausted: Attempting to reserve 12.60G at the bottom of memory. That was not possible. There are 5.86G free, 0B reserved, and 5.65G reservable.: while running replica 0 and partition 0 of a replicated computation (other replicas may have failed as well).
|
wandb/run-20210714_222920-2p7mu4rm/files/requirements.txt
ADDED
@@ -0,0 +1,94 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
absl-py==0.13.0
|
2 |
+
aiohttp==3.7.4.post0
|
3 |
+
astunparse==1.6.3
|
4 |
+
async-timeout==3.0.1
|
5 |
+
attrs==21.2.0
|
6 |
+
cachetools==4.2.2
|
7 |
+
certifi==2021.5.30
|
8 |
+
chardet==4.0.0
|
9 |
+
charset-normalizer==2.0.1
|
10 |
+
chex==0.0.8
|
11 |
+
click==8.0.1
|
12 |
+
configparser==5.0.2
|
13 |
+
cycler==0.10.0
|
14 |
+
datasets==1.9.1.dev0
|
15 |
+
dill==0.3.4
|
16 |
+
dm-tree==0.1.6
|
17 |
+
docker-pycreds==0.4.0
|
18 |
+
filelock==3.0.12
|
19 |
+
flatbuffers==1.12
|
20 |
+
flax==0.3.4
|
21 |
+
fsspec==2021.7.0
|
22 |
+
gast==0.4.0
|
23 |
+
gitdb==4.0.7
|
24 |
+
gitpython==3.1.18
|
25 |
+
google-auth-oauthlib==0.4.4
|
26 |
+
google-auth==1.32.1
|
27 |
+
google-pasta==0.2.0
|
28 |
+
grpcio==1.34.1
|
29 |
+
h5py==3.1.0
|
30 |
+
huggingface-hub==0.0.12
|
31 |
+
idna==3.2
|
32 |
+
install==1.3.4
|
33 |
+
jax==0.2.17
|
34 |
+
jaxlib==0.1.68
|
35 |
+
joblib==1.0.1
|
36 |
+
keras-nightly==2.5.0.dev2021032900
|
37 |
+
keras-preprocessing==1.1.2
|
38 |
+
kiwisolver==1.3.1
|
39 |
+
libtpu-nightly==0.1.dev20210615
|
40 |
+
markdown==3.3.4
|
41 |
+
matplotlib==3.4.2
|
42 |
+
msgpack==1.0.2
|
43 |
+
multidict==5.1.0
|
44 |
+
multiprocess==0.70.12.2
|
45 |
+
numpy==1.19.5
|
46 |
+
oauthlib==3.1.1
|
47 |
+
opt-einsum==3.3.0
|
48 |
+
optax==0.0.9
|
49 |
+
packaging==21.0
|
50 |
+
pandas==1.3.0
|
51 |
+
pathtools==0.1.2
|
52 |
+
pillow==8.3.1
|
53 |
+
pip==20.0.2
|
54 |
+
pkg-resources==0.0.0
|
55 |
+
promise==2.3
|
56 |
+
protobuf==3.17.3
|
57 |
+
psutil==5.8.0
|
58 |
+
pyarrow==4.0.1
|
59 |
+
pyasn1-modules==0.2.8
|
60 |
+
pyasn1==0.4.8
|
61 |
+
pyparsing==2.4.7
|
62 |
+
python-dateutil==2.8.1
|
63 |
+
pytz==2021.1
|
64 |
+
pyyaml==5.4.1
|
65 |
+
regex==2021.7.6
|
66 |
+
requests-oauthlib==1.3.0
|
67 |
+
requests==2.26.0
|
68 |
+
rsa==4.7.2
|
69 |
+
sacremoses==0.0.45
|
70 |
+
scipy==1.7.0
|
71 |
+
sentry-sdk==1.3.0
|
72 |
+
setuptools==44.0.0
|
73 |
+
shortuuid==1.0.1
|
74 |
+
six==1.15.0
|
75 |
+
smmap==4.0.0
|
76 |
+
subprocess32==3.5.4
|
77 |
+
tensorboard-data-server==0.6.1
|
78 |
+
tensorboard-plugin-wit==1.8.0
|
79 |
+
tensorboard==2.5.0
|
80 |
+
tensorflow-estimator==2.5.0
|
81 |
+
tensorflow==2.5.0
|
82 |
+
termcolor==1.1.0
|
83 |
+
tokenizers==0.10.3
|
84 |
+
toolz==0.11.1
|
85 |
+
tqdm==4.61.2
|
86 |
+
transformers==4.9.0.dev0
|
87 |
+
typing-extensions==3.7.4.3
|
88 |
+
urllib3==1.26.6
|
89 |
+
wandb==0.10.33
|
90 |
+
werkzeug==2.0.1
|
91 |
+
wheel==0.36.2
|
92 |
+
wrapt==1.12.1
|
93 |
+
xxhash==2.0.2
|
94 |
+
yarl==1.6.3
|
wandb/run-20210714_222920-2p7mu4rm/files/wandb-metadata.json
ADDED
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"os": "Linux-5.4.0-1043-gcp-x86_64-with-glibc2.29",
|
3 |
+
"python": "3.8.10",
|
4 |
+
"heartbeatAt": "2021-07-14T22:29:22.512026",
|
5 |
+
"startedAt": "2021-07-14T22:29:20.509023",
|
6 |
+
"docker": null,
|
7 |
+
"cpu_count": 96,
|
8 |
+
"cuda": null,
|
9 |
+
"args": [
|
10 |
+
"--push_to_hub",
|
11 |
+
"--output_dir=./",
|
12 |
+
"--model_type=big_bird",
|
13 |
+
"--config_name=./",
|
14 |
+
"--tokenizer_name=./",
|
15 |
+
"--max_seq_length=4096",
|
16 |
+
"--weight_decay=0.0095",
|
17 |
+
"--warmup_steps=5000",
|
18 |
+
"--overwrite_output_dir",
|
19 |
+
"--adam_beta1=0.9",
|
20 |
+
"--adam_beta2=0.98",
|
21 |
+
"--logging_steps=500",
|
22 |
+
"--eval_steps=20000",
|
23 |
+
"--num_train_epochs=5",
|
24 |
+
"--preprocessing_num_workers=96",
|
25 |
+
"--save_steps=20000",
|
26 |
+
"--learning_rate=5e-5",
|
27 |
+
"--per_device_train_batch_size=2",
|
28 |
+
"--per_device_eval_batch_size=2",
|
29 |
+
"--save_total_limit=5",
|
30 |
+
"--max_eval_samples=2000",
|
31 |
+
"--overwrite_cache",
|
32 |
+
"False",
|
33 |
+
"--gradient_accumulation_steps=4"
|
34 |
+
],
|
35 |
+
"state": "running",
|
36 |
+
"program": "./run_mlm_flax.py",
|
37 |
+
"codePath": "run_mlm_flax.py",
|
38 |
+
"git": {
|
39 |
+
"remote": "https://huggingface.co/flax-community/pino-roberta-base",
|
40 |
+
"commit": "de71755314d4aa3ab182072c6f0be3de9798a66b"
|
41 |
+
},
|
42 |
+
"email": null,
|
43 |
+
"root": "/home/dat/pino-roberta-base",
|
44 |
+
"host": "t1v-n-f5c06ea1-w-0",
|
45 |
+
"username": "dat",
|
46 |
+
"executable": "/home/dat/pino/bin/python"
|
47 |
+
}
|
wandb/run-20210714_222920-2p7mu4rm/files/wandb-summary.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{}
|
wandb/run-20210714_222920-2p7mu4rm/logs/debug-internal.log
ADDED
@@ -0,0 +1,232 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
2021-07-14 22:29:21,187 INFO MainThread:596546 [internal.py:wandb_internal():88] W&B internal server running at pid: 596546, started at: 2021-07-14 22:29:21.187444
|
2 |
+
2021-07-14 22:29:21,189 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: check_version
|
3 |
+
2021-07-14 22:29:21,189 INFO WriterThread:596546 [datastore.py:open_for_write():80] open: /home/dat/pino-roberta-base/wandb/run-20210714_222920-2p7mu4rm/run-2p7mu4rm.wandb
|
4 |
+
2021-07-14 22:29:21,191 DEBUG SenderThread:596546 [sender.py:send():179] send: header
|
5 |
+
2021-07-14 22:29:21,191 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: check_version
|
6 |
+
2021-07-14 22:29:21,226 DEBUG SenderThread:596546 [sender.py:send():179] send: run
|
7 |
+
2021-07-14 22:29:21,391 INFO SenderThread:596546 [dir_watcher.py:__init__():168] watching files in: /home/dat/pino-roberta-base/wandb/run-20210714_222920-2p7mu4rm/files
|
8 |
+
2021-07-14 22:29:21,391 INFO SenderThread:596546 [sender.py:_start_run_threads():716] run started: 2p7mu4rm with start time 1626301760
|
9 |
+
2021-07-14 22:29:21,391 DEBUG SenderThread:596546 [sender.py:send():179] send: summary
|
10 |
+
2021-07-14 22:29:21,391 INFO SenderThread:596546 [sender.py:_save_file():841] saving file wandb-summary.json with policy end
|
11 |
+
2021-07-14 22:29:21,392 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: run_start
|
12 |
+
2021-07-14 22:29:22,397 INFO Thread-8 :596546 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210714_222920-2p7mu4rm/files/wandb-summary.json
|
13 |
+
2021-07-14 22:29:22,511 DEBUG HandlerThread:596546 [meta.py:__init__():39] meta init
|
14 |
+
2021-07-14 22:29:22,511 DEBUG HandlerThread:596546 [meta.py:__init__():53] meta init done
|
15 |
+
2021-07-14 22:29:22,511 DEBUG HandlerThread:596546 [meta.py:probe():210] probe
|
16 |
+
2021-07-14 22:29:22,513 DEBUG HandlerThread:596546 [meta.py:_setup_git():200] setup git
|
17 |
+
2021-07-14 22:29:22,541 DEBUG HandlerThread:596546 [meta.py:_setup_git():207] setup git done
|
18 |
+
2021-07-14 22:29:22,541 DEBUG HandlerThread:596546 [meta.py:_save_pip():57] save pip
|
19 |
+
2021-07-14 22:29:22,542 DEBUG HandlerThread:596546 [meta.py:_save_pip():71] save pip done
|
20 |
+
2021-07-14 22:29:22,542 DEBUG HandlerThread:596546 [meta.py:probe():252] probe done
|
21 |
+
2021-07-14 22:29:22,545 DEBUG SenderThread:596546 [sender.py:send():179] send: files
|
22 |
+
2021-07-14 22:29:22,545 INFO SenderThread:596546 [sender.py:_save_file():841] saving file wandb-metadata.json with policy now
|
23 |
+
2021-07-14 22:29:22,551 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: stop_status
|
24 |
+
2021-07-14 22:29:22,551 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: stop_status
|
25 |
+
2021-07-14 22:29:22,678 DEBUG SenderThread:596546 [sender.py:send():179] send: config
|
26 |
+
2021-07-14 22:29:22,678 DEBUG SenderThread:596546 [sender.py:send():179] send: config
|
27 |
+
2021-07-14 22:29:22,679 DEBUG SenderThread:596546 [sender.py:send():179] send: config
|
28 |
+
2021-07-14 22:29:22,981 INFO Thread-11 :596546 [upload_job.py:push():137] Uploaded file /tmp/tmpkw6g32phwandb/2nns5d67-wandb-metadata.json
|
29 |
+
2021-07-14 22:29:23,396 INFO Thread-8 :596546 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210714_222920-2p7mu4rm/files/requirements.txt
|
30 |
+
2021-07-14 22:29:23,396 INFO Thread-8 :596546 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210714_222920-2p7mu4rm/files/wandb-metadata.json
|
31 |
+
2021-07-14 22:29:23,397 INFO Thread-8 :596546 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210714_222920-2p7mu4rm/files/output.log
|
32 |
+
2021-07-14 22:29:37,401 INFO Thread-8 :596546 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_222920-2p7mu4rm/files/output.log
|
33 |
+
2021-07-14 22:29:37,681 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: stop_status
|
34 |
+
2021-07-14 22:29:37,681 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: stop_status
|
35 |
+
2021-07-14 22:29:50,595 DEBUG SenderThread:596546 [sender.py:send():179] send: stats
|
36 |
+
2021-07-14 22:29:52,407 INFO Thread-8 :596546 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_222920-2p7mu4rm/files/config.yaml
|
37 |
+
2021-07-14 22:29:52,815 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: stop_status
|
38 |
+
2021-07-14 22:29:52,815 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: stop_status
|
39 |
+
2021-07-14 22:30:07,946 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: stop_status
|
40 |
+
2021-07-14 22:30:07,947 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: stop_status
|
41 |
+
2021-07-14 22:30:20,679 DEBUG SenderThread:596546 [sender.py:send():179] send: stats
|
42 |
+
2021-07-14 22:30:23,081 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: stop_status
|
43 |
+
2021-07-14 22:30:23,081 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: stop_status
|
44 |
+
2021-07-14 22:30:38,211 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: stop_status
|
45 |
+
2021-07-14 22:30:38,212 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: stop_status
|
46 |
+
2021-07-14 22:30:50,744 DEBUG SenderThread:596546 [sender.py:send():179] send: stats
|
47 |
+
2021-07-14 22:30:53,343 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: stop_status
|
48 |
+
2021-07-14 22:30:53,344 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: stop_status
|
49 |
+
2021-07-14 22:31:08,475 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: stop_status
|
50 |
+
2021-07-14 22:31:08,476 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: stop_status
|
51 |
+
2021-07-14 22:31:20,817 DEBUG SenderThread:596546 [sender.py:send():179] send: stats
|
52 |
+
2021-07-14 22:31:23,611 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: stop_status
|
53 |
+
2021-07-14 22:31:23,611 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: stop_status
|
54 |
+
2021-07-14 22:31:38,742 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: stop_status
|
55 |
+
2021-07-14 22:31:38,742 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: stop_status
|
56 |
+
2021-07-14 22:31:50,892 DEBUG SenderThread:596546 [sender.py:send():179] send: stats
|
57 |
+
2021-07-14 22:31:53,876 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: stop_status
|
58 |
+
2021-07-14 22:31:53,876 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: stop_status
|
59 |
+
2021-07-14 22:32:09,009 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: stop_status
|
60 |
+
2021-07-14 22:32:09,010 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: stop_status
|
61 |
+
2021-07-14 22:32:20,968 DEBUG SenderThread:596546 [sender.py:send():179] send: stats
|
62 |
+
2021-07-14 22:32:24,154 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: stop_status
|
63 |
+
2021-07-14 22:32:24,154 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: stop_status
|
64 |
+
2021-07-14 22:32:39,289 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: stop_status
|
65 |
+
2021-07-14 22:32:39,289 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: stop_status
|
66 |
+
2021-07-14 22:32:51,042 DEBUG SenderThread:596546 [sender.py:send():179] send: stats
|
67 |
+
2021-07-14 22:32:54,420 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: stop_status
|
68 |
+
2021-07-14 22:32:54,420 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: stop_status
|
69 |
+
2021-07-14 22:33:09,552 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: stop_status
|
70 |
+
2021-07-14 22:33:09,552 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: stop_status
|
71 |
+
2021-07-14 22:33:21,119 DEBUG SenderThread:596546 [sender.py:send():179] send: stats
|
72 |
+
2021-07-14 22:33:24,688 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: stop_status
|
73 |
+
2021-07-14 22:33:24,689 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: stop_status
|
74 |
+
2021-07-14 22:33:39,824 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: stop_status
|
75 |
+
2021-07-14 22:33:39,825 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: stop_status
|
76 |
+
2021-07-14 22:33:51,197 DEBUG SenderThread:596546 [sender.py:send():179] send: stats
|
77 |
+
2021-07-14 22:33:54,955 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: stop_status
|
78 |
+
2021-07-14 22:33:54,955 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: stop_status
|
79 |
+
2021-07-14 22:34:10,085 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: stop_status
|
80 |
+
2021-07-14 22:34:10,086 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: stop_status
|
81 |
+
2021-07-14 22:34:21,275 DEBUG SenderThread:596546 [sender.py:send():179] send: stats
|
82 |
+
2021-07-14 22:34:25,221 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: stop_status
|
83 |
+
2021-07-14 22:34:25,221 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: stop_status
|
84 |
+
2021-07-14 22:34:40,360 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: stop_status
|
85 |
+
2021-07-14 22:34:40,360 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: stop_status
|
86 |
+
2021-07-14 22:34:51,349 DEBUG SenderThread:596546 [sender.py:send():179] send: stats
|
87 |
+
2021-07-14 22:34:55,491 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: stop_status
|
88 |
+
2021-07-14 22:34:55,491 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: stop_status
|
89 |
+
2021-07-14 22:35:10,620 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: stop_status
|
90 |
+
2021-07-14 22:35:10,621 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: stop_status
|
91 |
+
2021-07-14 22:35:21,421 DEBUG SenderThread:596546 [sender.py:send():179] send: stats
|
92 |
+
2021-07-14 22:35:25,755 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: stop_status
|
93 |
+
2021-07-14 22:35:25,755 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: stop_status
|
94 |
+
2021-07-14 22:35:40,915 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: stop_status
|
95 |
+
2021-07-14 22:35:40,916 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: stop_status
|
96 |
+
2021-07-14 22:35:51,496 DEBUG SenderThread:596546 [sender.py:send():179] send: stats
|
97 |
+
2021-07-14 22:35:56,049 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: stop_status
|
98 |
+
2021-07-14 22:35:56,049 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: stop_status
|
99 |
+
2021-07-14 22:36:11,183 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: stop_status
|
100 |
+
2021-07-14 22:36:11,184 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: stop_status
|
101 |
+
2021-07-14 22:36:21,575 DEBUG SenderThread:596546 [sender.py:send():179] send: stats
|
102 |
+
2021-07-14 22:36:26,315 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: stop_status
|
103 |
+
2021-07-14 22:36:26,316 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: stop_status
|
104 |
+
2021-07-14 22:36:41,448 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: stop_status
|
105 |
+
2021-07-14 22:36:41,448 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: stop_status
|
106 |
+
2021-07-14 22:36:51,652 DEBUG SenderThread:596546 [sender.py:send():179] send: stats
|
107 |
+
2021-07-14 22:36:56,580 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: stop_status
|
108 |
+
2021-07-14 22:36:56,581 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: stop_status
|
109 |
+
2021-07-14 22:37:11,712 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: stop_status
|
110 |
+
2021-07-14 22:37:11,712 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: stop_status
|
111 |
+
2021-07-14 22:37:21,566 INFO Thread-8 :596546 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_222920-2p7mu4rm/files/output.log
|
112 |
+
2021-07-14 22:37:21,727 DEBUG SenderThread:596546 [sender.py:send():179] send: stats
|
113 |
+
2021-07-14 22:37:27,049 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: stop_status
|
114 |
+
2021-07-14 22:37:27,050 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: stop_status
|
115 |
+
2021-07-14 22:37:42,194 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: stop_status
|
116 |
+
2021-07-14 22:37:42,194 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: stop_status
|
117 |
+
2021-07-14 22:37:51,805 DEBUG SenderThread:596546 [sender.py:send():179] send: stats
|
118 |
+
2021-07-14 22:37:57,327 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: stop_status
|
119 |
+
2021-07-14 22:37:57,327 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: stop_status
|
120 |
+
2021-07-14 22:38:12,463 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: stop_status
|
121 |
+
2021-07-14 22:38:12,464 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: stop_status
|
122 |
+
2021-07-14 22:38:21,882 DEBUG SenderThread:596546 [sender.py:send():179] send: stats
|
123 |
+
2021-07-14 22:38:27,596 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: stop_status
|
124 |
+
2021-07-14 22:38:27,596 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: stop_status
|
125 |
+
2021-07-14 22:38:42,728 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: stop_status
|
126 |
+
2021-07-14 22:38:42,728 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: stop_status
|
127 |
+
2021-07-14 22:38:45,598 INFO Thread-8 :596546 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_222920-2p7mu4rm/files/output.log
|
128 |
+
2021-07-14 22:38:45,823 DEBUG SenderThread:596546 [sender.py:send():179] send: telemetry
|
129 |
+
2021-07-14 22:38:45,823 DEBUG SenderThread:596546 [sender.py:send():179] send: exit
|
130 |
+
2021-07-14 22:38:45,823 INFO SenderThread:596546 [sender.py:send_exit():287] handling exit code: 1
|
131 |
+
2021-07-14 22:38:45,824 INFO SenderThread:596546 [sender.py:send_exit():295] send defer
|
132 |
+
2021-07-14 22:38:45,824 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: poll_exit
|
133 |
+
2021-07-14 22:38:45,825 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: poll_exit
|
134 |
+
2021-07-14 22:38:45,825 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: defer
|
135 |
+
2021-07-14 22:38:45,825 INFO HandlerThread:596546 [handler.py:handle_request_defer():141] handle defer: 0
|
136 |
+
2021-07-14 22:38:45,825 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: defer
|
137 |
+
2021-07-14 22:38:45,825 INFO SenderThread:596546 [sender.py:send_request_defer():304] handle sender defer: 0
|
138 |
+
2021-07-14 22:38:45,825 INFO SenderThread:596546 [sender.py:transition_state():308] send defer: 1
|
139 |
+
2021-07-14 22:38:45,826 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: defer
|
140 |
+
2021-07-14 22:38:45,826 INFO HandlerThread:596546 [handler.py:handle_request_defer():141] handle defer: 1
|
141 |
+
2021-07-14 22:38:45,857 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: defer
|
142 |
+
2021-07-14 22:38:45,857 INFO SenderThread:596546 [sender.py:send_request_defer():304] handle sender defer: 1
|
143 |
+
2021-07-14 22:38:45,857 INFO SenderThread:596546 [sender.py:transition_state():308] send defer: 2
|
144 |
+
2021-07-14 22:38:45,857 DEBUG SenderThread:596546 [sender.py:send():179] send: stats
|
145 |
+
2021-07-14 22:38:45,857 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: defer
|
146 |
+
2021-07-14 22:38:45,858 INFO HandlerThread:596546 [handler.py:handle_request_defer():141] handle defer: 2
|
147 |
+
2021-07-14 22:38:45,858 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: defer
|
148 |
+
2021-07-14 22:38:45,858 INFO SenderThread:596546 [sender.py:send_request_defer():304] handle sender defer: 2
|
149 |
+
2021-07-14 22:38:45,858 INFO SenderThread:596546 [sender.py:transition_state():308] send defer: 3
|
150 |
+
2021-07-14 22:38:45,859 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: defer
|
151 |
+
2021-07-14 22:38:45,859 INFO HandlerThread:596546 [handler.py:handle_request_defer():141] handle defer: 3
|
152 |
+
2021-07-14 22:38:45,859 DEBUG SenderThread:596546 [sender.py:send():179] send: summary
|
153 |
+
2021-07-14 22:38:45,859 INFO SenderThread:596546 [sender.py:_save_file():841] saving file wandb-summary.json with policy end
|
154 |
+
2021-07-14 22:38:45,859 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: defer
|
155 |
+
2021-07-14 22:38:45,859 INFO SenderThread:596546 [sender.py:send_request_defer():304] handle sender defer: 3
|
156 |
+
2021-07-14 22:38:45,859 INFO SenderThread:596546 [sender.py:transition_state():308] send defer: 4
|
157 |
+
2021-07-14 22:38:45,860 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: defer
|
158 |
+
2021-07-14 22:38:45,860 INFO HandlerThread:596546 [handler.py:handle_request_defer():141] handle defer: 4
|
159 |
+
2021-07-14 22:38:45,860 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: defer
|
160 |
+
2021-07-14 22:38:45,860 INFO SenderThread:596546 [sender.py:send_request_defer():304] handle sender defer: 4
|
161 |
+
2021-07-14 22:38:45,927 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: poll_exit
|
162 |
+
2021-07-14 22:38:46,024 INFO SenderThread:596546 [sender.py:transition_state():308] send defer: 5
|
163 |
+
2021-07-14 22:38:46,024 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: poll_exit
|
164 |
+
2021-07-14 22:38:46,024 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: defer
|
165 |
+
2021-07-14 22:38:46,024 INFO HandlerThread:596546 [handler.py:handle_request_defer():141] handle defer: 5
|
166 |
+
2021-07-14 22:38:46,025 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: defer
|
167 |
+
2021-07-14 22:38:46,025 INFO SenderThread:596546 [sender.py:send_request_defer():304] handle sender defer: 5
|
168 |
+
2021-07-14 22:38:46,025 INFO SenderThread:596546 [dir_watcher.py:finish():282] shutting down directory watcher
|
169 |
+
2021-07-14 22:38:46,126 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: poll_exit
|
170 |
+
2021-07-14 22:38:46,598 INFO Thread-8 :596546 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_222920-2p7mu4rm/files/wandb-summary.json
|
171 |
+
2021-07-14 22:38:46,599 INFO SenderThread:596546 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_222920-2p7mu4rm/files/config.yaml
|
172 |
+
2021-07-14 22:38:46,599 INFO SenderThread:596546 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_222920-2p7mu4rm/files/output.log
|
173 |
+
2021-07-14 22:38:46,599 INFO SenderThread:596546 [dir_watcher.py:finish():312] scan: /home/dat/pino-roberta-base/wandb/run-20210714_222920-2p7mu4rm/files
|
174 |
+
2021-07-14 22:38:46,599 INFO SenderThread:596546 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210714_222920-2p7mu4rm/files/requirements.txt requirements.txt
|
175 |
+
2021-07-14 22:38:46,600 INFO SenderThread:596546 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210714_222920-2p7mu4rm/files/output.log output.log
|
176 |
+
2021-07-14 22:38:46,600 INFO SenderThread:596546 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210714_222920-2p7mu4rm/files/wandb-metadata.json wandb-metadata.json
|
177 |
+
2021-07-14 22:38:46,600 INFO SenderThread:596546 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210714_222920-2p7mu4rm/files/config.yaml config.yaml
|
178 |
+
2021-07-14 22:38:46,600 INFO SenderThread:596546 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210714_222920-2p7mu4rm/files/wandb-summary.json wandb-summary.json
|
179 |
+
2021-07-14 22:38:46,603 INFO SenderThread:596546 [sender.py:transition_state():308] send defer: 6
|
180 |
+
2021-07-14 22:38:46,604 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: poll_exit
|
181 |
+
2021-07-14 22:38:46,607 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: defer
|
182 |
+
2021-07-14 22:38:46,607 INFO HandlerThread:596546 [handler.py:handle_request_defer():141] handle defer: 6
|
183 |
+
2021-07-14 22:38:46,608 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: defer
|
184 |
+
2021-07-14 22:38:46,610 INFO SenderThread:596546 [sender.py:send_request_defer():304] handle sender defer: 6
|
185 |
+
2021-07-14 22:38:46,610 INFO SenderThread:596546 [file_pusher.py:finish():177] shutting down file pusher
|
186 |
+
2021-07-14 22:38:46,708 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: poll_exit
|
187 |
+
2021-07-14 22:38:46,709 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: poll_exit
|
188 |
+
2021-07-14 22:38:46,811 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: poll_exit
|
189 |
+
2021-07-14 22:38:46,811 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: poll_exit
|
190 |
+
2021-07-14 22:38:46,913 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: poll_exit
|
191 |
+
2021-07-14 22:38:46,913 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: poll_exit
|
192 |
+
2021-07-14 22:38:47,015 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: poll_exit
|
193 |
+
2021-07-14 22:38:47,015 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: poll_exit
|
194 |
+
2021-07-14 22:38:47,054 INFO Thread-14 :596546 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210714_222920-2p7mu4rm/files/config.yaml
|
195 |
+
2021-07-14 22:38:47,063 INFO Thread-12 :596546 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210714_222920-2p7mu4rm/files/requirements.txt
|
196 |
+
2021-07-14 22:38:47,074 INFO Thread-13 :596546 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210714_222920-2p7mu4rm/files/output.log
|
197 |
+
2021-07-14 22:38:47,095 INFO Thread-15 :596546 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210714_222920-2p7mu4rm/files/wandb-summary.json
|
198 |
+
2021-07-14 22:38:47,117 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: poll_exit
|
199 |
+
2021-07-14 22:38:47,117 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: poll_exit
|
200 |
+
2021-07-14 22:38:47,219 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: poll_exit
|
201 |
+
2021-07-14 22:38:47,219 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: poll_exit
|
202 |
+
2021-07-14 22:38:47,295 INFO Thread-7 :596546 [sender.py:transition_state():308] send defer: 7
|
203 |
+
2021-07-14 22:38:47,295 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: defer
|
204 |
+
2021-07-14 22:38:47,296 INFO HandlerThread:596546 [handler.py:handle_request_defer():141] handle defer: 7
|
205 |
+
2021-07-14 22:38:47,296 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: defer
|
206 |
+
2021-07-14 22:38:47,296 INFO SenderThread:596546 [sender.py:send_request_defer():304] handle sender defer: 7
|
207 |
+
2021-07-14 22:38:47,321 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: poll_exit
|
208 |
+
2021-07-14 22:38:47,939 INFO SenderThread:596546 [sender.py:transition_state():308] send defer: 8
|
209 |
+
2021-07-14 22:38:47,940 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: poll_exit
|
210 |
+
2021-07-14 22:38:47,940 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: defer
|
211 |
+
2021-07-14 22:38:47,940 INFO HandlerThread:596546 [handler.py:handle_request_defer():141] handle defer: 8
|
212 |
+
2021-07-14 22:38:47,940 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: defer
|
213 |
+
2021-07-14 22:38:47,941 INFO SenderThread:596546 [sender.py:send_request_defer():304] handle sender defer: 8
|
214 |
+
2021-07-14 22:38:47,941 INFO SenderThread:596546 [sender.py:transition_state():308] send defer: 9
|
215 |
+
2021-07-14 22:38:47,941 DEBUG SenderThread:596546 [sender.py:send():179] send: final
|
216 |
+
2021-07-14 22:38:47,941 DEBUG SenderThread:596546 [sender.py:send():179] send: footer
|
217 |
+
2021-07-14 22:38:47,942 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: defer
|
218 |
+
2021-07-14 22:38:47,942 INFO HandlerThread:596546 [handler.py:handle_request_defer():141] handle defer: 9
|
219 |
+
2021-07-14 22:38:47,942 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: defer
|
220 |
+
2021-07-14 22:38:47,942 INFO SenderThread:596546 [sender.py:send_request_defer():304] handle sender defer: 9
|
221 |
+
2021-07-14 22:38:48,042 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: poll_exit
|
222 |
+
2021-07-14 22:38:48,042 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: poll_exit
|
223 |
+
2021-07-14 22:38:48,042 INFO SenderThread:596546 [file_pusher.py:join():182] waiting for file pusher
|
224 |
+
2021-07-14 22:38:48,044 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: get_summary
|
225 |
+
2021-07-14 22:38:48,044 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: sampled_history
|
226 |
+
2021-07-14 22:38:48,045 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: shutdown
|
227 |
+
2021-07-14 22:38:48,045 INFO HandlerThread:596546 [handler.py:finish():638] shutting down handler
|
228 |
+
2021-07-14 22:38:48,942 INFO WriterThread:596546 [datastore.py:close():288] close: /home/dat/pino-roberta-base/wandb/run-20210714_222920-2p7mu4rm/run-2p7mu4rm.wandb
|
229 |
+
2021-07-14 22:38:49,042 INFO SenderThread:596546 [sender.py:finish():945] shutting down sender
|
230 |
+
2021-07-14 22:38:49,043 INFO SenderThread:596546 [file_pusher.py:finish():177] shutting down file pusher
|
231 |
+
2021-07-14 22:38:49,043 INFO SenderThread:596546 [file_pusher.py:join():182] waiting for file pusher
|
232 |
+
2021-07-14 22:38:49,045 INFO MainThread:596546 [internal.py:handle_exit():78] Internal process exited
|
wandb/run-20210714_222920-2p7mu4rm/logs/debug.log
ADDED
@@ -0,0 +1,119 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
2021-07-14 22:29:20,510 INFO MainThread:595290 [wandb_setup.py:_flush():69] setting env: {}
|
2 |
+
2021-07-14 22:29:20,510 INFO MainThread:595290 [wandb_setup.py:_flush():69] setting login settings: {}
|
3 |
+
2021-07-14 22:29:20,510 INFO MainThread:595290 [wandb_init.py:_log_setup():337] Logging user logs to /home/dat/pino-roberta-base/wandb/run-20210714_222920-2p7mu4rm/logs/debug.log
|
4 |
+
2021-07-14 22:29:20,510 INFO MainThread:595290 [wandb_init.py:_log_setup():338] Logging internal logs to /home/dat/pino-roberta-base/wandb/run-20210714_222920-2p7mu4rm/logs/debug-internal.log
|
5 |
+
2021-07-14 22:29:20,511 INFO MainThread:595290 [wandb_init.py:init():370] calling init triggers
|
6 |
+
2021-07-14 22:29:20,511 INFO MainThread:595290 [wandb_init.py:init():375] wandb.init called with sweep_config: {}
|
7 |
+
config: {}
|
8 |
+
2021-07-14 22:29:20,511 INFO MainThread:595290 [wandb_init.py:init():419] starting backend
|
9 |
+
2021-07-14 22:29:20,511 INFO MainThread:595290 [backend.py:_multiprocessing_setup():70] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
|
10 |
+
2021-07-14 22:29:20,555 INFO MainThread:595290 [backend.py:ensure_launched():135] starting backend process...
|
11 |
+
2021-07-14 22:29:20,597 INFO MainThread:595290 [backend.py:ensure_launched():139] started backend process with pid: 596546
|
12 |
+
2021-07-14 22:29:20,599 INFO MainThread:595290 [wandb_init.py:init():424] backend started and connected
|
13 |
+
2021-07-14 22:29:20,602 INFO MainThread:595290 [wandb_init.py:init():472] updated telemetry
|
14 |
+
2021-07-14 22:29:20,603 INFO MainThread:595290 [wandb_init.py:init():491] communicating current version
|
15 |
+
2021-07-14 22:29:21,225 INFO MainThread:595290 [wandb_init.py:init():496] got version response
|
16 |
+
2021-07-14 22:29:21,226 INFO MainThread:595290 [wandb_init.py:init():504] communicating run to backend with 30 second timeout
|
17 |
+
2021-07-14 22:29:21,391 INFO MainThread:595290 [wandb_init.py:init():529] starting run threads in backend
|
18 |
+
2021-07-14 22:29:22,548 INFO MainThread:595290 [wandb_run.py:_console_start():1623] atexit reg
|
19 |
+
2021-07-14 22:29:22,548 INFO MainThread:595290 [wandb_run.py:_redirect():1497] redirect: SettingsConsole.REDIRECT
|
20 |
+
2021-07-14 22:29:22,549 INFO MainThread:595290 [wandb_run.py:_redirect():1502] Redirecting console.
|
21 |
+
2021-07-14 22:29:22,551 INFO MainThread:595290 [wandb_run.py:_redirect():1558] Redirects installed.
|
22 |
+
2021-07-14 22:29:22,551 INFO MainThread:595290 [wandb_init.py:init():554] run started, returning control to user process
|
23 |
+
2021-07-14 22:29:22,559 INFO MainThread:595290 [wandb_run.py:_config_callback():872] config_cb None None {'output_dir': './', 'overwrite_output_dir': True, 'do_train': False, 'do_eval': False, 'do_predict': False, 'evaluation_strategy': 'IntervalStrategy.NO', 'prediction_loss_only': False, 'per_device_train_batch_size': 2, 'per_device_eval_batch_size': 2, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 4, 'eval_accumulation_steps': None, 'learning_rate': 5e-05, 'weight_decay': 0.0095, 'adam_beta1': 0.9, 'adam_beta2': 0.98, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 5.0, 'max_steps': -1, 'lr_scheduler_type': 'SchedulerType.LINEAR', 'warmup_ratio': 0.0, 'warmup_steps': 5000, 'log_level': -1, 'log_level_replica': -1, 'log_on_each_node': True, 'logging_dir': './runs/Jul14_22-29-13_t1v-n-f5c06ea1-w-0', 'logging_strategy': 'IntervalStrategy.STEPS', 'logging_first_step': False, 'logging_steps': 500, 'save_strategy': 'IntervalStrategy.STEPS', 'save_steps': 20000, 'save_total_limit': 5, 'save_on_each_node': False, 'no_cuda': False, 'seed': 42, 'fp16': False, 'fp16_opt_level': 'O1', 'fp16_backend': 'auto', 'fp16_full_eval': False, 'local_rank': -1, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 20000, 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': './', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'sharded_ddp': [], 'deepspeed': None, 'label_smoothing_factor': 0.0, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'dataloader_pin_memory': True, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'push_to_hub_model_id': '', 'push_to_hub_organization': None, 'push_to_hub_token': None, 'mp_parameters': ''}
|
24 |
+
2021-07-14 22:29:22,561 INFO MainThread:595290 [wandb_run.py:_config_callback():872] config_cb None None {'model_name_or_path': None, 'model_type': 'big_bird', 'config_name': './', 'tokenizer_name': './', 'cache_dir': None, 'use_fast_tokenizer': True, 'dtype': 'float32', 'save_optimizer': True}
|
25 |
+
2021-07-14 22:29:22,562 INFO MainThread:595290 [wandb_run.py:_config_callback():872] config_cb None None {'dataset_name': None, 'dataset_config_name': None, 'train_ref_file': None, 'validation_ref_file': None, 'overwrite_cache': False, 'validation_split_percentage': 5, 'max_seq_length': 4096, 'preprocessing_num_workers': 96, 'mlm_probability': 0.15, 'pad_to_max_length': False, 'line_by_line': False, 'max_eval_samples': 2000}
|
26 |
+
2021-07-14 22:38:43,366 INFO MainThread:595290 [wandb_run.py:_atexit_cleanup():1593] got exitcode: 1
|
27 |
+
2021-07-14 22:38:43,368 INFO MainThread:595290 [wandb_run.py:_restore():1565] restore
|
28 |
+
2021-07-14 22:38:45,826 INFO MainThread:595290 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
|
29 |
+
wandb_count: 1
|
30 |
+
}
|
31 |
+
pusher_stats {
|
32 |
+
uploaded_bytes: 1447
|
33 |
+
total_bytes: 1447
|
34 |
+
}
|
35 |
+
|
36 |
+
2021-07-14 22:38:46,025 INFO MainThread:595290 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
|
37 |
+
wandb_count: 1
|
38 |
+
}
|
39 |
+
pusher_stats {
|
40 |
+
uploaded_bytes: 1447
|
41 |
+
total_bytes: 1447
|
42 |
+
}
|
43 |
+
|
44 |
+
2021-07-14 22:38:46,607 INFO MainThread:595290 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
|
45 |
+
wandb_count: 4
|
46 |
+
}
|
47 |
+
pusher_stats {
|
48 |
+
uploaded_bytes: 1447
|
49 |
+
total_bytes: 11500
|
50 |
+
}
|
51 |
+
|
52 |
+
2021-07-14 22:38:46,709 INFO MainThread:595290 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
|
53 |
+
wandb_count: 5
|
54 |
+
}
|
55 |
+
pusher_stats {
|
56 |
+
uploaded_bytes: 1447
|
57 |
+
total_bytes: 11502
|
58 |
+
}
|
59 |
+
|
60 |
+
2021-07-14 22:38:46,812 INFO MainThread:595290 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
|
61 |
+
wandb_count: 5
|
62 |
+
}
|
63 |
+
pusher_stats {
|
64 |
+
uploaded_bytes: 11502
|
65 |
+
total_bytes: 11502
|
66 |
+
}
|
67 |
+
|
68 |
+
2021-07-14 22:38:46,914 INFO MainThread:595290 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
|
69 |
+
wandb_count: 5
|
70 |
+
}
|
71 |
+
pusher_stats {
|
72 |
+
uploaded_bytes: 11502
|
73 |
+
total_bytes: 11502
|
74 |
+
}
|
75 |
+
|
76 |
+
2021-07-14 22:38:47,016 INFO MainThread:595290 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
|
77 |
+
wandb_count: 5
|
78 |
+
}
|
79 |
+
pusher_stats {
|
80 |
+
uploaded_bytes: 11502
|
81 |
+
total_bytes: 11502
|
82 |
+
}
|
83 |
+
|
84 |
+
2021-07-14 22:38:47,118 INFO MainThread:595290 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
|
85 |
+
wandb_count: 5
|
86 |
+
}
|
87 |
+
pusher_stats {
|
88 |
+
uploaded_bytes: 11502
|
89 |
+
total_bytes: 11502
|
90 |
+
}
|
91 |
+
|
92 |
+
2021-07-14 22:38:47,220 INFO MainThread:595290 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
|
93 |
+
wandb_count: 5
|
94 |
+
}
|
95 |
+
pusher_stats {
|
96 |
+
uploaded_bytes: 11502
|
97 |
+
total_bytes: 11502
|
98 |
+
}
|
99 |
+
|
100 |
+
2021-07-14 22:38:47,940 INFO MainThread:595290 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
|
101 |
+
wandb_count: 5
|
102 |
+
}
|
103 |
+
pusher_stats {
|
104 |
+
uploaded_bytes: 11502
|
105 |
+
total_bytes: 11502
|
106 |
+
}
|
107 |
+
|
108 |
+
2021-07-14 22:38:48,043 INFO MainThread:595290 [wandb_run.py:_wait_for_finish():1715] got exit ret: done: true
|
109 |
+
exit_result {
|
110 |
+
}
|
111 |
+
file_counts {
|
112 |
+
wandb_count: 5
|
113 |
+
}
|
114 |
+
pusher_stats {
|
115 |
+
uploaded_bytes: 11502
|
116 |
+
total_bytes: 11502
|
117 |
+
}
|
118 |
+
|
119 |
+
2021-07-14 22:38:49,338 INFO MainThread:595290 [wandb_run.py:_show_files():1937] logging synced files
|
wandb/run-20210714_222920-2p7mu4rm/run-2p7mu4rm.wandb
ADDED
Binary file (11.6 kB). View file
|
|
wandb/run-20210714_224000-1jvvynqa/files/config.yaml
ADDED
@@ -0,0 +1,307 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
wandb_version: 1
|
2 |
+
|
3 |
+
_wandb:
|
4 |
+
desc: null
|
5 |
+
value:
|
6 |
+
cli_version: 0.10.33
|
7 |
+
framework: huggingface
|
8 |
+
huggingface_version: 4.9.0.dev0
|
9 |
+
is_jupyter_run: false
|
10 |
+
is_kaggle_kernel: false
|
11 |
+
python_version: 3.8.10
|
12 |
+
t:
|
13 |
+
1:
|
14 |
+
- 3
|
15 |
+
- 11
|
16 |
+
2:
|
17 |
+
- 3
|
18 |
+
- 11
|
19 |
+
4: 3.8.10
|
20 |
+
5: 0.10.33
|
21 |
+
6: 4.9.0.dev0
|
22 |
+
8:
|
23 |
+
- 5
|
24 |
+
adafactor:
|
25 |
+
desc: null
|
26 |
+
value: false
|
27 |
+
adam_beta1:
|
28 |
+
desc: null
|
29 |
+
value: 0.9
|
30 |
+
adam_beta2:
|
31 |
+
desc: null
|
32 |
+
value: 0.98
|
33 |
+
adam_epsilon:
|
34 |
+
desc: null
|
35 |
+
value: 1.0e-08
|
36 |
+
cache_dir:
|
37 |
+
desc: null
|
38 |
+
value: null
|
39 |
+
config_name:
|
40 |
+
desc: null
|
41 |
+
value: ./
|
42 |
+
dataloader_drop_last:
|
43 |
+
desc: null
|
44 |
+
value: false
|
45 |
+
dataloader_num_workers:
|
46 |
+
desc: null
|
47 |
+
value: 0
|
48 |
+
dataloader_pin_memory:
|
49 |
+
desc: null
|
50 |
+
value: true
|
51 |
+
dataset_config_name:
|
52 |
+
desc: null
|
53 |
+
value: null
|
54 |
+
dataset_name:
|
55 |
+
desc: null
|
56 |
+
value: null
|
57 |
+
ddp_find_unused_parameters:
|
58 |
+
desc: null
|
59 |
+
value: null
|
60 |
+
debug:
|
61 |
+
desc: null
|
62 |
+
value: []
|
63 |
+
deepspeed:
|
64 |
+
desc: null
|
65 |
+
value: null
|
66 |
+
disable_tqdm:
|
67 |
+
desc: null
|
68 |
+
value: false
|
69 |
+
do_eval:
|
70 |
+
desc: null
|
71 |
+
value: false
|
72 |
+
do_predict:
|
73 |
+
desc: null
|
74 |
+
value: false
|
75 |
+
do_train:
|
76 |
+
desc: null
|
77 |
+
value: false
|
78 |
+
dtype:
|
79 |
+
desc: null
|
80 |
+
value: float32
|
81 |
+
eval_accumulation_steps:
|
82 |
+
desc: null
|
83 |
+
value: null
|
84 |
+
eval_steps:
|
85 |
+
desc: null
|
86 |
+
value: 20000
|
87 |
+
evaluation_strategy:
|
88 |
+
desc: null
|
89 |
+
value: IntervalStrategy.NO
|
90 |
+
fp16:
|
91 |
+
desc: null
|
92 |
+
value: false
|
93 |
+
fp16_backend:
|
94 |
+
desc: null
|
95 |
+
value: auto
|
96 |
+
fp16_full_eval:
|
97 |
+
desc: null
|
98 |
+
value: false
|
99 |
+
fp16_opt_level:
|
100 |
+
desc: null
|
101 |
+
value: O1
|
102 |
+
gradient_accumulation_steps:
|
103 |
+
desc: null
|
104 |
+
value: 8
|
105 |
+
greater_is_better:
|
106 |
+
desc: null
|
107 |
+
value: null
|
108 |
+
group_by_length:
|
109 |
+
desc: null
|
110 |
+
value: false
|
111 |
+
ignore_data_skip:
|
112 |
+
desc: null
|
113 |
+
value: false
|
114 |
+
label_names:
|
115 |
+
desc: null
|
116 |
+
value: null
|
117 |
+
label_smoothing_factor:
|
118 |
+
desc: null
|
119 |
+
value: 0.0
|
120 |
+
learning_rate:
|
121 |
+
desc: null
|
122 |
+
value: 5.0e-05
|
123 |
+
length_column_name:
|
124 |
+
desc: null
|
125 |
+
value: length
|
126 |
+
line_by_line:
|
127 |
+
desc: null
|
128 |
+
value: false
|
129 |
+
load_best_model_at_end:
|
130 |
+
desc: null
|
131 |
+
value: false
|
132 |
+
local_rank:
|
133 |
+
desc: null
|
134 |
+
value: -1
|
135 |
+
log_level:
|
136 |
+
desc: null
|
137 |
+
value: -1
|
138 |
+
log_level_replica:
|
139 |
+
desc: null
|
140 |
+
value: -1
|
141 |
+
log_on_each_node:
|
142 |
+
desc: null
|
143 |
+
value: true
|
144 |
+
logging_dir:
|
145 |
+
desc: null
|
146 |
+
value: ./runs/Jul14_22-39-51_t1v-n-f5c06ea1-w-0
|
147 |
+
logging_first_step:
|
148 |
+
desc: null
|
149 |
+
value: false
|
150 |
+
logging_steps:
|
151 |
+
desc: null
|
152 |
+
value: 500
|
153 |
+
logging_strategy:
|
154 |
+
desc: null
|
155 |
+
value: IntervalStrategy.STEPS
|
156 |
+
lr_scheduler_type:
|
157 |
+
desc: null
|
158 |
+
value: SchedulerType.LINEAR
|
159 |
+
max_eval_samples:
|
160 |
+
desc: null
|
161 |
+
value: 2000
|
162 |
+
max_grad_norm:
|
163 |
+
desc: null
|
164 |
+
value: 1.0
|
165 |
+
max_seq_length:
|
166 |
+
desc: null
|
167 |
+
value: 4096
|
168 |
+
max_steps:
|
169 |
+
desc: null
|
170 |
+
value: -1
|
171 |
+
metric_for_best_model:
|
172 |
+
desc: null
|
173 |
+
value: null
|
174 |
+
mlm_probability:
|
175 |
+
desc: null
|
176 |
+
value: 0.15
|
177 |
+
model_name_or_path:
|
178 |
+
desc: null
|
179 |
+
value: null
|
180 |
+
model_type:
|
181 |
+
desc: null
|
182 |
+
value: big_bird
|
183 |
+
mp_parameters:
|
184 |
+
desc: null
|
185 |
+
value: ''
|
186 |
+
no_cuda:
|
187 |
+
desc: null
|
188 |
+
value: false
|
189 |
+
num_train_epochs:
|
190 |
+
desc: null
|
191 |
+
value: 5.0
|
192 |
+
output_dir:
|
193 |
+
desc: null
|
194 |
+
value: ./
|
195 |
+
overwrite_cache:
|
196 |
+
desc: null
|
197 |
+
value: false
|
198 |
+
overwrite_output_dir:
|
199 |
+
desc: null
|
200 |
+
value: true
|
201 |
+
pad_to_max_length:
|
202 |
+
desc: null
|
203 |
+
value: false
|
204 |
+
past_index:
|
205 |
+
desc: null
|
206 |
+
value: -1
|
207 |
+
per_device_eval_batch_size:
|
208 |
+
desc: null
|
209 |
+
value: 2
|
210 |
+
per_device_train_batch_size:
|
211 |
+
desc: null
|
212 |
+
value: 2
|
213 |
+
per_gpu_eval_batch_size:
|
214 |
+
desc: null
|
215 |
+
value: null
|
216 |
+
per_gpu_train_batch_size:
|
217 |
+
desc: null
|
218 |
+
value: null
|
219 |
+
prediction_loss_only:
|
220 |
+
desc: null
|
221 |
+
value: false
|
222 |
+
preprocessing_num_workers:
|
223 |
+
desc: null
|
224 |
+
value: 96
|
225 |
+
push_to_hub:
|
226 |
+
desc: null
|
227 |
+
value: true
|
228 |
+
push_to_hub_model_id:
|
229 |
+
desc: null
|
230 |
+
value: ''
|
231 |
+
push_to_hub_organization:
|
232 |
+
desc: null
|
233 |
+
value: null
|
234 |
+
push_to_hub_token:
|
235 |
+
desc: null
|
236 |
+
value: null
|
237 |
+
remove_unused_columns:
|
238 |
+
desc: null
|
239 |
+
value: true
|
240 |
+
report_to:
|
241 |
+
desc: null
|
242 |
+
value:
|
243 |
+
- tensorboard
|
244 |
+
- wandb
|
245 |
+
resume_from_checkpoint:
|
246 |
+
desc: null
|
247 |
+
value: null
|
248 |
+
run_name:
|
249 |
+
desc: null
|
250 |
+
value: ./
|
251 |
+
save_on_each_node:
|
252 |
+
desc: null
|
253 |
+
value: false
|
254 |
+
save_optimizer:
|
255 |
+
desc: null
|
256 |
+
value: true
|
257 |
+
save_steps:
|
258 |
+
desc: null
|
259 |
+
value: 20000
|
260 |
+
save_strategy:
|
261 |
+
desc: null
|
262 |
+
value: IntervalStrategy.STEPS
|
263 |
+
save_total_limit:
|
264 |
+
desc: null
|
265 |
+
value: 5
|
266 |
+
seed:
|
267 |
+
desc: null
|
268 |
+
value: 42
|
269 |
+
sharded_ddp:
|
270 |
+
desc: null
|
271 |
+
value: []
|
272 |
+
skip_memory_metrics:
|
273 |
+
desc: null
|
274 |
+
value: true
|
275 |
+
tokenizer_name:
|
276 |
+
desc: null
|
277 |
+
value: ./
|
278 |
+
tpu_metrics_debug:
|
279 |
+
desc: null
|
280 |
+
value: false
|
281 |
+
tpu_num_cores:
|
282 |
+
desc: null
|
283 |
+
value: null
|
284 |
+
train_ref_file:
|
285 |
+
desc: null
|
286 |
+
value: null
|
287 |
+
use_fast_tokenizer:
|
288 |
+
desc: null
|
289 |
+
value: true
|
290 |
+
use_legacy_prediction_loop:
|
291 |
+
desc: null
|
292 |
+
value: false
|
293 |
+
validation_ref_file:
|
294 |
+
desc: null
|
295 |
+
value: null
|
296 |
+
validation_split_percentage:
|
297 |
+
desc: null
|
298 |
+
value: 5
|
299 |
+
warmup_ratio:
|
300 |
+
desc: null
|
301 |
+
value: 0.0
|
302 |
+
warmup_steps:
|
303 |
+
desc: null
|
304 |
+
value: 10000
|
305 |
+
weight_decay:
|
306 |
+
desc: null
|
307 |
+
value: 0.0095
|
wandb/run-20210714_224000-1jvvynqa/files/output.log
ADDED
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
/home/dat/pino/lib/python3.8/site-packages/jax/_src/numpy/lax_numpy.py:3132: UserWarning: Explicitly requested dtype <class 'jax._src.numpy.lax_numpy.int64'> requested in zeros is not available, and will be truncated to dtype int32. To enable more dtypes, set the jax_enable_x64 configuration option or the JAX_ENABLE_X64 shell environment variable. See https://github.com/google/jax#current-gotchas for more.
|
2 |
+
lax._check_user_dtype_supported(dtype, "zeros")
|
3 |
+
/home/dat/pino/lib/python3.8/site-packages/jax/lib/xla_bridge.py:386: UserWarning: jax.host_count has been renamed to jax.process_count. This alias will eventually be removed; please update your code.
|
4 |
+
warnings.warn(
|
5 |
+
/home/dat/pino/lib/python3.8/site-packages/jax/lib/xla_bridge.py:373: UserWarning: jax.host_id has been renamed to jax.process_index. This alias will eventually be removed; please update your code.
|
6 |
+
warnings.warn(
|
7 |
+
Epoch ... (1/5): 0%| | 0/5 [00:00<?, ?it/s][22:40:16] - INFO - __main__ - Skipping to epoch 0 step 0
|
8 |
+
Training...: 0%| | 0/503952 [01:24<?, ?it/s]
|
9 |
+
Epoch ... (1/5): 0%| | 0/5 [09:14<?, ?it/s]
|
10 |
+
Traceback (most recent call last):
|
11 |
+
File "./run_mlm_flax.py", line 804, in <module>
|
12 |
+
num_train_samples = len(tokenized_datasets["train"])
|
13 |
+
File "/home/dat/pino/lib/python3.8/site-packages/jax/_src/traceback_util.py", line 183, in reraise_with_filtered_traceback
|
14 |
+
return fun(*args, **kwargs)
|
15 |
+
File "/home/dat/pino/lib/python3.8/site-packages/jax/_src/api.py", line 1669, in f_pmapped
|
16 |
+
out = pxla.xla_pmap(
|
17 |
+
File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 1620, in bind
|
18 |
+
return call_bind(self, fun, *args, **params)
|
19 |
+
File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 1551, in call_bind
|
20 |
+
outs = primitive.process(top_trace, fun, tracers, params)
|
21 |
+
File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 1623, in process
|
22 |
+
return trace.process_map(self, fun, tracers, params)
|
23 |
+
File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 606, in process_call
|
24 |
+
return primitive.impl(f, *tracers, **params)
|
25 |
+
File "/home/dat/pino/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 637, in xla_pmap_impl
|
26 |
+
return compiled_fun(*args)
|
27 |
+
File "/home/dat/pino/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 1159, in execute_replicated
|
28 |
+
out_bufs = compiled.execute_sharded_on_local_devices(input_bufs)
|
29 |
+
jax._src.traceback_util.UnfilteredStackTrace: RuntimeError: Resource exhausted: Attempting to reserve 12.60G at the bottom of memory. That was not possible. There are 5.86G free, 0B reserved, and 5.65G reservable.: while running replica 0 and partition 0 of a replicated computation (other replicas may have failed as well).
|
30 |
+
The stack trace below excludes JAX-internal frames.
|
31 |
+
The preceding is the original exception that occurred, unmodified.
|
32 |
+
--------------------
|
33 |
+
The above exception was the direct cause of the following exception:
|
34 |
+
Traceback (most recent call last):
|
35 |
+
File "./run_mlm_flax.py", line 804, in <module>
|
36 |
+
num_train_samples = len(tokenized_datasets["train"])
|
37 |
+
File "/home/dat/pino/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 1159, in execute_replicated
|
38 |
+
out_bufs = compiled.execute_sharded_on_local_devices(input_bufs)
|
39 |
+
RuntimeError: Resource exhausted: Attempting to reserve 12.60G at the bottom of memory. That was not possible. There are 5.86G free, 0B reserved, and 5.65G reservable.: while running replica 0 and partition 0 of a replicated computation (other replicas may have failed as well).
|
wandb/run-20210714_224000-1jvvynqa/files/requirements.txt
ADDED
@@ -0,0 +1,94 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
absl-py==0.13.0
|
2 |
+
aiohttp==3.7.4.post0
|
3 |
+
astunparse==1.6.3
|
4 |
+
async-timeout==3.0.1
|
5 |
+
attrs==21.2.0
|
6 |
+
cachetools==4.2.2
|
7 |
+
certifi==2021.5.30
|
8 |
+
chardet==4.0.0
|
9 |
+
charset-normalizer==2.0.1
|
10 |
+
chex==0.0.8
|
11 |
+
click==8.0.1
|
12 |
+
configparser==5.0.2
|
13 |
+
cycler==0.10.0
|
14 |
+
datasets==1.9.1.dev0
|
15 |
+
dill==0.3.4
|
16 |
+
dm-tree==0.1.6
|
17 |
+
docker-pycreds==0.4.0
|
18 |
+
filelock==3.0.12
|
19 |
+
flatbuffers==1.12
|
20 |
+
flax==0.3.4
|
21 |
+
fsspec==2021.7.0
|
22 |
+
gast==0.4.0
|
23 |
+
gitdb==4.0.7
|
24 |
+
gitpython==3.1.18
|
25 |
+
google-auth-oauthlib==0.4.4
|
26 |
+
google-auth==1.32.1
|
27 |
+
google-pasta==0.2.0
|
28 |
+
grpcio==1.34.1
|
29 |
+
h5py==3.1.0
|
30 |
+
huggingface-hub==0.0.12
|
31 |
+
idna==3.2
|
32 |
+
install==1.3.4
|
33 |
+
jax==0.2.17
|
34 |
+
jaxlib==0.1.68
|
35 |
+
joblib==1.0.1
|
36 |
+
keras-nightly==2.5.0.dev2021032900
|
37 |
+
keras-preprocessing==1.1.2
|
38 |
+
kiwisolver==1.3.1
|
39 |
+
libtpu-nightly==0.1.dev20210615
|
40 |
+
markdown==3.3.4
|
41 |
+
matplotlib==3.4.2
|
42 |
+
msgpack==1.0.2
|
43 |
+
multidict==5.1.0
|
44 |
+
multiprocess==0.70.12.2
|
45 |
+
numpy==1.19.5
|
46 |
+
oauthlib==3.1.1
|
47 |
+
opt-einsum==3.3.0
|
48 |
+
optax==0.0.9
|
49 |
+
packaging==21.0
|
50 |
+
pandas==1.3.0
|
51 |
+
pathtools==0.1.2
|
52 |
+
pillow==8.3.1
|
53 |
+
pip==20.0.2
|
54 |
+
pkg-resources==0.0.0
|
55 |
+
promise==2.3
|
56 |
+
protobuf==3.17.3
|
57 |
+
psutil==5.8.0
|
58 |
+
pyarrow==4.0.1
|
59 |
+
pyasn1-modules==0.2.8
|
60 |
+
pyasn1==0.4.8
|
61 |
+
pyparsing==2.4.7
|
62 |
+
python-dateutil==2.8.1
|
63 |
+
pytz==2021.1
|
64 |
+
pyyaml==5.4.1
|
65 |
+
regex==2021.7.6
|
66 |
+
requests-oauthlib==1.3.0
|
67 |
+
requests==2.26.0
|
68 |
+
rsa==4.7.2
|
69 |
+
sacremoses==0.0.45
|
70 |
+
scipy==1.7.0
|
71 |
+
sentry-sdk==1.3.0
|
72 |
+
setuptools==44.0.0
|
73 |
+
shortuuid==1.0.1
|
74 |
+
six==1.15.0
|
75 |
+
smmap==4.0.0
|
76 |
+
subprocess32==3.5.4
|
77 |
+
tensorboard-data-server==0.6.1
|
78 |
+
tensorboard-plugin-wit==1.8.0
|
79 |
+
tensorboard==2.5.0
|
80 |
+
tensorflow-estimator==2.5.0
|
81 |
+
tensorflow==2.5.0
|
82 |
+
termcolor==1.1.0
|
83 |
+
tokenizers==0.10.3
|
84 |
+
toolz==0.11.1
|
85 |
+
tqdm==4.61.2
|
86 |
+
transformers==4.9.0.dev0
|
87 |
+
typing-extensions==3.7.4.3
|
88 |
+
urllib3==1.26.6
|
89 |
+
wandb==0.10.33
|
90 |
+
werkzeug==2.0.1
|
91 |
+
wheel==0.36.2
|
92 |
+
wrapt==1.12.1
|
93 |
+
xxhash==2.0.2
|
94 |
+
yarl==1.6.3
|
wandb/run-20210714_224000-1jvvynqa/files/wandb-metadata.json
ADDED
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"os": "Linux-5.4.0-1043-gcp-x86_64-with-glibc2.29",
|
3 |
+
"python": "3.8.10",
|
4 |
+
"heartbeatAt": "2021-07-14T22:40:02.351628",
|
5 |
+
"startedAt": "2021-07-14T22:40:00.340218",
|
6 |
+
"docker": null,
|
7 |
+
"cpu_count": 96,
|
8 |
+
"cuda": null,
|
9 |
+
"args": [
|
10 |
+
"--push_to_hub",
|
11 |
+
"--output_dir=./",
|
12 |
+
"--model_type=big_bird",
|
13 |
+
"--config_name=./",
|
14 |
+
"--tokenizer_name=./",
|
15 |
+
"--max_seq_length=4096",
|
16 |
+
"--weight_decay=0.0095",
|
17 |
+
"--warmup_steps=10000",
|
18 |
+
"--overwrite_output_dir",
|
19 |
+
"--adam_beta1=0.9",
|
20 |
+
"--adam_beta2=0.98",
|
21 |
+
"--logging_steps=500",
|
22 |
+
"--eval_steps=20000",
|
23 |
+
"--num_train_epochs=5",
|
24 |
+
"--preprocessing_num_workers=96",
|
25 |
+
"--save_steps=20000",
|
26 |
+
"--learning_rate=5e-5",
|
27 |
+
"--per_device_train_batch_size=2",
|
28 |
+
"--per_device_eval_batch_size=2",
|
29 |
+
"--save_total_limit=5",
|
30 |
+
"--max_eval_samples=2000",
|
31 |
+
"--overwrite_cache",
|
32 |
+
"False",
|
33 |
+
"--gradient_accumulation_steps=8"
|
34 |
+
],
|
35 |
+
"state": "running",
|
36 |
+
"program": "./run_mlm_flax.py",
|
37 |
+
"codePath": "run_mlm_flax.py",
|
38 |
+
"git": {
|
39 |
+
"remote": "https://huggingface.co/flax-community/pino-roberta-base",
|
40 |
+
"commit": "de71755314d4aa3ab182072c6f0be3de9798a66b"
|
41 |
+
},
|
42 |
+
"email": null,
|
43 |
+
"root": "/home/dat/pino-roberta-base",
|
44 |
+
"host": "t1v-n-f5c06ea1-w-0",
|
45 |
+
"username": "dat",
|
46 |
+
"executable": "/home/dat/pino/bin/python"
|
47 |
+
}
|
wandb/run-20210714_224000-1jvvynqa/files/wandb-summary.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{}
|
wandb/run-20210714_224000-1jvvynqa/logs/debug-internal.log
ADDED
@@ -0,0 +1,236 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
2021-07-14 22:40:01,029 INFO MainThread:598803 [internal.py:wandb_internal():88] W&B internal server running at pid: 598803, started at: 2021-07-14 22:40:01.029595
|
2 |
+
2021-07-14 22:40:01,031 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: check_version
|
3 |
+
2021-07-14 22:40:01,032 INFO WriterThread:598803 [datastore.py:open_for_write():80] open: /home/dat/pino-roberta-base/wandb/run-20210714_224000-1jvvynqa/run-1jvvynqa.wandb
|
4 |
+
2021-07-14 22:40:01,033 DEBUG SenderThread:598803 [sender.py:send():179] send: header
|
5 |
+
2021-07-14 22:40:01,033 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: check_version
|
6 |
+
2021-07-14 22:40:01,069 DEBUG SenderThread:598803 [sender.py:send():179] send: run
|
7 |
+
2021-07-14 22:40:01,239 INFO SenderThread:598803 [dir_watcher.py:__init__():168] watching files in: /home/dat/pino-roberta-base/wandb/run-20210714_224000-1jvvynqa/files
|
8 |
+
2021-07-14 22:40:01,240 INFO SenderThread:598803 [sender.py:_start_run_threads():716] run started: 1jvvynqa with start time 1626302400
|
9 |
+
2021-07-14 22:40:01,240 DEBUG SenderThread:598803 [sender.py:send():179] send: summary
|
10 |
+
2021-07-14 22:40:01,240 INFO SenderThread:598803 [sender.py:_save_file():841] saving file wandb-summary.json with policy end
|
11 |
+
2021-07-14 22:40:01,240 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: run_start
|
12 |
+
2021-07-14 22:40:02,242 INFO Thread-8 :598803 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210714_224000-1jvvynqa/files/wandb-summary.json
|
13 |
+
2021-07-14 22:40:02,351 DEBUG HandlerThread:598803 [meta.py:__init__():39] meta init
|
14 |
+
2021-07-14 22:40:02,351 DEBUG HandlerThread:598803 [meta.py:__init__():53] meta init done
|
15 |
+
2021-07-14 22:40:02,351 DEBUG HandlerThread:598803 [meta.py:probe():210] probe
|
16 |
+
2021-07-14 22:40:02,352 DEBUG HandlerThread:598803 [meta.py:_setup_git():200] setup git
|
17 |
+
2021-07-14 22:40:02,381 DEBUG HandlerThread:598803 [meta.py:_setup_git():207] setup git done
|
18 |
+
2021-07-14 22:40:02,381 DEBUG HandlerThread:598803 [meta.py:_save_pip():57] save pip
|
19 |
+
2021-07-14 22:40:02,382 DEBUG HandlerThread:598803 [meta.py:_save_pip():71] save pip done
|
20 |
+
2021-07-14 22:40:02,382 DEBUG HandlerThread:598803 [meta.py:probe():252] probe done
|
21 |
+
2021-07-14 22:40:02,385 DEBUG SenderThread:598803 [sender.py:send():179] send: files
|
22 |
+
2021-07-14 22:40:02,385 INFO SenderThread:598803 [sender.py:_save_file():841] saving file wandb-metadata.json with policy now
|
23 |
+
2021-07-14 22:40:02,390 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: stop_status
|
24 |
+
2021-07-14 22:40:02,391 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: stop_status
|
25 |
+
2021-07-14 22:40:02,517 DEBUG SenderThread:598803 [sender.py:send():179] send: config
|
26 |
+
2021-07-14 22:40:02,517 DEBUG SenderThread:598803 [sender.py:send():179] send: config
|
27 |
+
2021-07-14 22:40:02,517 DEBUG SenderThread:598803 [sender.py:send():179] send: config
|
28 |
+
2021-07-14 22:40:02,814 INFO Thread-11 :598803 [upload_job.py:push():137] Uploaded file /tmp/tmp43so6xcswandb/116losze-wandb-metadata.json
|
29 |
+
2021-07-14 22:40:03,240 INFO Thread-8 :598803 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210714_224000-1jvvynqa/files/requirements.txt
|
30 |
+
2021-07-14 22:40:03,241 INFO Thread-8 :598803 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210714_224000-1jvvynqa/files/wandb-metadata.json
|
31 |
+
2021-07-14 22:40:03,241 INFO Thread-8 :598803 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210714_224000-1jvvynqa/files/output.log
|
32 |
+
2021-07-14 22:40:17,246 INFO Thread-8 :598803 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_224000-1jvvynqa/files/output.log
|
33 |
+
2021-07-14 22:40:17,518 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: stop_status
|
34 |
+
2021-07-14 22:40:17,519 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: stop_status
|
35 |
+
2021-07-14 22:40:19,247 INFO Thread-8 :598803 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_224000-1jvvynqa/files/output.log
|
36 |
+
2021-07-14 22:40:30,436 DEBUG SenderThread:598803 [sender.py:send():179] send: stats
|
37 |
+
2021-07-14 22:40:32,253 INFO Thread-8 :598803 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_224000-1jvvynqa/files/config.yaml
|
38 |
+
2021-07-14 22:40:32,650 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: stop_status
|
39 |
+
2021-07-14 22:40:32,650 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: stop_status
|
40 |
+
2021-07-14 22:40:47,784 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: stop_status
|
41 |
+
2021-07-14 22:40:47,784 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: stop_status
|
42 |
+
2021-07-14 22:41:00,509 DEBUG SenderThread:598803 [sender.py:send():179] send: stats
|
43 |
+
2021-07-14 22:41:02,914 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: stop_status
|
44 |
+
2021-07-14 22:41:02,914 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: stop_status
|
45 |
+
2021-07-14 22:41:18,045 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: stop_status
|
46 |
+
2021-07-14 22:41:18,045 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: stop_status
|
47 |
+
2021-07-14 22:41:30,568 DEBUG SenderThread:598803 [sender.py:send():179] send: stats
|
48 |
+
2021-07-14 22:41:33,175 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: stop_status
|
49 |
+
2021-07-14 22:41:33,175 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: stop_status
|
50 |
+
2021-07-14 22:41:48,307 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: stop_status
|
51 |
+
2021-07-14 22:41:48,307 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: stop_status
|
52 |
+
2021-07-14 22:42:00,641 DEBUG SenderThread:598803 [sender.py:send():179] send: stats
|
53 |
+
2021-07-14 22:42:03,441 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: stop_status
|
54 |
+
2021-07-14 22:42:03,442 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: stop_status
|
55 |
+
2021-07-14 22:42:18,571 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: stop_status
|
56 |
+
2021-07-14 22:42:18,572 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: stop_status
|
57 |
+
2021-07-14 22:42:30,706 DEBUG SenderThread:598803 [sender.py:send():179] send: stats
|
58 |
+
2021-07-14 22:42:33,702 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: stop_status
|
59 |
+
2021-07-14 22:42:33,702 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: stop_status
|
60 |
+
2021-07-14 22:42:48,848 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: stop_status
|
61 |
+
2021-07-14 22:42:48,848 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: stop_status
|
62 |
+
2021-07-14 22:43:00,777 DEBUG SenderThread:598803 [sender.py:send():179] send: stats
|
63 |
+
2021-07-14 22:43:03,978 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: stop_status
|
64 |
+
2021-07-14 22:43:03,979 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: stop_status
|
65 |
+
2021-07-14 22:43:19,111 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: stop_status
|
66 |
+
2021-07-14 22:43:19,111 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: stop_status
|
67 |
+
2021-07-14 22:43:30,850 DEBUG SenderThread:598803 [sender.py:send():179] send: stats
|
68 |
+
2021-07-14 22:43:34,242 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: stop_status
|
69 |
+
2021-07-14 22:43:34,242 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: stop_status
|
70 |
+
2021-07-14 22:43:49,373 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: stop_status
|
71 |
+
2021-07-14 22:43:49,374 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: stop_status
|
72 |
+
2021-07-14 22:44:00,923 DEBUG SenderThread:598803 [sender.py:send():179] send: stats
|
73 |
+
2021-07-14 22:44:04,513 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: stop_status
|
74 |
+
2021-07-14 22:44:04,513 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: stop_status
|
75 |
+
2021-07-14 22:44:19,644 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: stop_status
|
76 |
+
2021-07-14 22:44:19,644 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: stop_status
|
77 |
+
2021-07-14 22:44:30,999 DEBUG SenderThread:598803 [sender.py:send():179] send: stats
|
78 |
+
2021-07-14 22:44:34,774 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: stop_status
|
79 |
+
2021-07-14 22:44:34,774 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: stop_status
|
80 |
+
2021-07-14 22:44:49,906 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: stop_status
|
81 |
+
2021-07-14 22:44:49,906 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: stop_status
|
82 |
+
2021-07-14 22:45:01,074 DEBUG SenderThread:598803 [sender.py:send():179] send: stats
|
83 |
+
2021-07-14 22:45:05,077 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: stop_status
|
84 |
+
2021-07-14 22:45:05,077 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: stop_status
|
85 |
+
2021-07-14 22:45:20,207 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: stop_status
|
86 |
+
2021-07-14 22:45:20,208 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: stop_status
|
87 |
+
2021-07-14 22:45:31,140 DEBUG SenderThread:598803 [sender.py:send():179] send: stats
|
88 |
+
2021-07-14 22:45:35,338 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: stop_status
|
89 |
+
2021-07-14 22:45:35,339 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: stop_status
|
90 |
+
2021-07-14 22:45:50,469 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: stop_status
|
91 |
+
2021-07-14 22:45:50,470 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: stop_status
|
92 |
+
2021-07-14 22:46:01,203 DEBUG SenderThread:598803 [sender.py:send():179] send: stats
|
93 |
+
2021-07-14 22:46:05,601 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: stop_status
|
94 |
+
2021-07-14 22:46:05,601 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: stop_status
|
95 |
+
2021-07-14 22:46:20,734 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: stop_status
|
96 |
+
2021-07-14 22:46:20,734 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: stop_status
|
97 |
+
2021-07-14 22:46:31,276 DEBUG SenderThread:598803 [sender.py:send():179] send: stats
|
98 |
+
2021-07-14 22:46:35,865 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: stop_status
|
99 |
+
2021-07-14 22:46:35,865 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: stop_status
|
100 |
+
2021-07-14 22:46:51,019 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: stop_status
|
101 |
+
2021-07-14 22:46:51,020 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: stop_status
|
102 |
+
2021-07-14 22:47:01,353 DEBUG SenderThread:598803 [sender.py:send():179] send: stats
|
103 |
+
2021-07-14 22:47:06,154 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: stop_status
|
104 |
+
2021-07-14 22:47:06,154 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: stop_status
|
105 |
+
2021-07-14 22:47:21,290 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: stop_status
|
106 |
+
2021-07-14 22:47:21,290 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: stop_status
|
107 |
+
2021-07-14 22:47:31,428 DEBUG SenderThread:598803 [sender.py:send():179] send: stats
|
108 |
+
2021-07-14 22:47:36,424 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: stop_status
|
109 |
+
2021-07-14 22:47:36,424 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: stop_status
|
110 |
+
2021-07-14 22:47:51,555 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: stop_status
|
111 |
+
2021-07-14 22:47:51,555 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: stop_status
|
112 |
+
2021-07-14 22:48:01,502 DEBUG SenderThread:598803 [sender.py:send():179] send: stats
|
113 |
+
2021-07-14 22:48:06,777 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: stop_status
|
114 |
+
2021-07-14 22:48:06,777 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: stop_status
|
115 |
+
2021-07-14 22:48:07,431 INFO Thread-8 :598803 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_224000-1jvvynqa/files/output.log
|
116 |
+
2021-07-14 22:48:21,934 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: stop_status
|
117 |
+
2021-07-14 22:48:21,935 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: stop_status
|
118 |
+
2021-07-14 22:48:31,579 DEBUG SenderThread:598803 [sender.py:send():179] send: stats
|
119 |
+
2021-07-14 22:48:37,091 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: stop_status
|
120 |
+
2021-07-14 22:48:37,091 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: stop_status
|
121 |
+
2021-07-14 22:48:52,233 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: stop_status
|
122 |
+
2021-07-14 22:48:52,234 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: stop_status
|
123 |
+
2021-07-14 22:49:01,665 DEBUG SenderThread:598803 [sender.py:send():179] send: stats
|
124 |
+
2021-07-14 22:49:07,381 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: stop_status
|
125 |
+
2021-07-14 22:49:07,382 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: stop_status
|
126 |
+
2021-07-14 22:49:22,521 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: stop_status
|
127 |
+
2021-07-14 22:49:22,521 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: stop_status
|
128 |
+
2021-07-14 22:49:31,465 INFO Thread-8 :598803 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_224000-1jvvynqa/files/output.log
|
129 |
+
2021-07-14 22:49:31,743 DEBUG SenderThread:598803 [sender.py:send():179] send: stats
|
130 |
+
2021-07-14 22:49:32,262 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: poll_exit
|
131 |
+
2021-07-14 22:49:32,263 DEBUG SenderThread:598803 [sender.py:send():179] send: telemetry
|
132 |
+
2021-07-14 22:49:32,263 DEBUG SenderThread:598803 [sender.py:send():179] send: exit
|
133 |
+
2021-07-14 22:49:32,263 INFO SenderThread:598803 [sender.py:send_exit():287] handling exit code: 1
|
134 |
+
2021-07-14 22:49:32,263 INFO SenderThread:598803 [sender.py:send_exit():295] send defer
|
135 |
+
2021-07-14 22:49:32,263 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: poll_exit
|
136 |
+
2021-07-14 22:49:32,264 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: defer
|
137 |
+
2021-07-14 22:49:32,264 INFO HandlerThread:598803 [handler.py:handle_request_defer():141] handle defer: 0
|
138 |
+
2021-07-14 22:49:32,264 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: defer
|
139 |
+
2021-07-14 22:49:32,264 INFO SenderThread:598803 [sender.py:send_request_defer():304] handle sender defer: 0
|
140 |
+
2021-07-14 22:49:32,264 INFO SenderThread:598803 [sender.py:transition_state():308] send defer: 1
|
141 |
+
2021-07-14 22:49:32,265 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: defer
|
142 |
+
2021-07-14 22:49:32,265 INFO HandlerThread:598803 [handler.py:handle_request_defer():141] handle defer: 1
|
143 |
+
2021-07-14 22:49:32,345 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: defer
|
144 |
+
2021-07-14 22:49:32,345 INFO SenderThread:598803 [sender.py:send_request_defer():304] handle sender defer: 1
|
145 |
+
2021-07-14 22:49:32,346 INFO SenderThread:598803 [sender.py:transition_state():308] send defer: 2
|
146 |
+
2021-07-14 22:49:32,346 DEBUG SenderThread:598803 [sender.py:send():179] send: stats
|
147 |
+
2021-07-14 22:49:32,346 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: defer
|
148 |
+
2021-07-14 22:49:32,346 INFO HandlerThread:598803 [handler.py:handle_request_defer():141] handle defer: 2
|
149 |
+
2021-07-14 22:49:32,346 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: defer
|
150 |
+
2021-07-14 22:49:32,347 INFO SenderThread:598803 [sender.py:send_request_defer():304] handle sender defer: 2
|
151 |
+
2021-07-14 22:49:32,347 INFO SenderThread:598803 [sender.py:transition_state():308] send defer: 3
|
152 |
+
2021-07-14 22:49:32,347 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: defer
|
153 |
+
2021-07-14 22:49:32,347 INFO HandlerThread:598803 [handler.py:handle_request_defer():141] handle defer: 3
|
154 |
+
2021-07-14 22:49:32,347 DEBUG SenderThread:598803 [sender.py:send():179] send: summary
|
155 |
+
2021-07-14 22:49:32,347 INFO SenderThread:598803 [sender.py:_save_file():841] saving file wandb-summary.json with policy end
|
156 |
+
2021-07-14 22:49:32,347 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: defer
|
157 |
+
2021-07-14 22:49:32,347 INFO SenderThread:598803 [sender.py:send_request_defer():304] handle sender defer: 3
|
158 |
+
2021-07-14 22:49:32,347 INFO SenderThread:598803 [sender.py:transition_state():308] send defer: 4
|
159 |
+
2021-07-14 22:49:32,348 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: defer
|
160 |
+
2021-07-14 22:49:32,348 INFO HandlerThread:598803 [handler.py:handle_request_defer():141] handle defer: 4
|
161 |
+
2021-07-14 22:49:32,348 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: defer
|
162 |
+
2021-07-14 22:49:32,348 INFO SenderThread:598803 [sender.py:send_request_defer():304] handle sender defer: 4
|
163 |
+
2021-07-14 22:49:32,366 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: poll_exit
|
164 |
+
2021-07-14 22:49:32,466 INFO Thread-8 :598803 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_224000-1jvvynqa/files/wandb-summary.json
|
165 |
+
2021-07-14 22:49:32,466 INFO Thread-8 :598803 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_224000-1jvvynqa/files/output.log
|
166 |
+
2021-07-14 22:49:32,534 INFO SenderThread:598803 [sender.py:transition_state():308] send defer: 5
|
167 |
+
2021-07-14 22:49:32,534 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: poll_exit
|
168 |
+
2021-07-14 22:49:32,535 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: defer
|
169 |
+
2021-07-14 22:49:32,535 INFO HandlerThread:598803 [handler.py:handle_request_defer():141] handle defer: 5
|
170 |
+
2021-07-14 22:49:32,535 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: defer
|
171 |
+
2021-07-14 22:49:32,535 INFO SenderThread:598803 [sender.py:send_request_defer():304] handle sender defer: 5
|
172 |
+
2021-07-14 22:49:32,535 INFO SenderThread:598803 [dir_watcher.py:finish():282] shutting down directory watcher
|
173 |
+
2021-07-14 22:49:32,636 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: poll_exit
|
174 |
+
2021-07-14 22:49:33,466 INFO Thread-8 :598803 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_224000-1jvvynqa/files/config.yaml
|
175 |
+
2021-07-14 22:49:33,467 INFO SenderThread:598803 [dir_watcher.py:finish():312] scan: /home/dat/pino-roberta-base/wandb/run-20210714_224000-1jvvynqa/files
|
176 |
+
2021-07-14 22:49:33,467 INFO SenderThread:598803 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210714_224000-1jvvynqa/files/requirements.txt requirements.txt
|
177 |
+
2021-07-14 22:49:33,467 INFO SenderThread:598803 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210714_224000-1jvvynqa/files/output.log output.log
|
178 |
+
2021-07-14 22:49:33,467 INFO SenderThread:598803 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210714_224000-1jvvynqa/files/wandb-metadata.json wandb-metadata.json
|
179 |
+
2021-07-14 22:49:33,468 INFO SenderThread:598803 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210714_224000-1jvvynqa/files/config.yaml config.yaml
|
180 |
+
2021-07-14 22:49:33,468 INFO SenderThread:598803 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210714_224000-1jvvynqa/files/wandb-summary.json wandb-summary.json
|
181 |
+
2021-07-14 22:49:33,468 INFO SenderThread:598803 [sender.py:transition_state():308] send defer: 6
|
182 |
+
2021-07-14 22:49:33,468 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: poll_exit
|
183 |
+
2021-07-14 22:49:33,472 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: defer
|
184 |
+
2021-07-14 22:49:33,472 INFO HandlerThread:598803 [handler.py:handle_request_defer():141] handle defer: 6
|
185 |
+
2021-07-14 22:49:33,474 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: defer
|
186 |
+
2021-07-14 22:49:33,474 INFO SenderThread:598803 [sender.py:send_request_defer():304] handle sender defer: 6
|
187 |
+
2021-07-14 22:49:33,475 INFO SenderThread:598803 [file_pusher.py:finish():177] shutting down file pusher
|
188 |
+
2021-07-14 22:49:33,574 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: poll_exit
|
189 |
+
2021-07-14 22:49:33,574 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: poll_exit
|
190 |
+
2021-07-14 22:49:33,676 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: poll_exit
|
191 |
+
2021-07-14 22:49:33,676 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: poll_exit
|
192 |
+
2021-07-14 22:49:33,778 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: poll_exit
|
193 |
+
2021-07-14 22:49:33,778 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: poll_exit
|
194 |
+
2021-07-14 22:49:33,880 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: poll_exit
|
195 |
+
2021-07-14 22:49:33,880 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: poll_exit
|
196 |
+
2021-07-14 22:49:33,982 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: poll_exit
|
197 |
+
2021-07-14 22:49:33,982 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: poll_exit
|
198 |
+
2021-07-14 22:49:33,989 INFO Thread-15 :598803 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210714_224000-1jvvynqa/files/wandb-summary.json
|
199 |
+
2021-07-14 22:49:33,994 INFO Thread-14 :598803 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210714_224000-1jvvynqa/files/config.yaml
|
200 |
+
2021-07-14 22:49:33,995 INFO Thread-13 :598803 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210714_224000-1jvvynqa/files/output.log
|
201 |
+
2021-07-14 22:49:33,997 INFO Thread-12 :598803 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210714_224000-1jvvynqa/files/requirements.txt
|
202 |
+
2021-07-14 22:49:34,084 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: poll_exit
|
203 |
+
2021-07-14 22:49:34,085 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: poll_exit
|
204 |
+
2021-07-14 22:49:34,186 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: poll_exit
|
205 |
+
2021-07-14 22:49:34,187 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: poll_exit
|
206 |
+
2021-07-14 22:49:34,198 INFO Thread-7 :598803 [sender.py:transition_state():308] send defer: 7
|
207 |
+
2021-07-14 22:49:34,198 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: defer
|
208 |
+
2021-07-14 22:49:34,198 INFO HandlerThread:598803 [handler.py:handle_request_defer():141] handle defer: 7
|
209 |
+
2021-07-14 22:49:34,198 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: defer
|
210 |
+
2021-07-14 22:49:34,198 INFO SenderThread:598803 [sender.py:send_request_defer():304] handle sender defer: 7
|
211 |
+
2021-07-14 22:49:34,288 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: poll_exit
|
212 |
+
2021-07-14 22:49:34,464 INFO SenderThread:598803 [sender.py:transition_state():308] send defer: 8
|
213 |
+
2021-07-14 22:49:34,464 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: poll_exit
|
214 |
+
2021-07-14 22:49:34,465 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: defer
|
215 |
+
2021-07-14 22:49:34,465 INFO HandlerThread:598803 [handler.py:handle_request_defer():141] handle defer: 8
|
216 |
+
2021-07-14 22:49:34,465 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: defer
|
217 |
+
2021-07-14 22:49:34,465 INFO SenderThread:598803 [sender.py:send_request_defer():304] handle sender defer: 8
|
218 |
+
2021-07-14 22:49:34,466 INFO SenderThread:598803 [sender.py:transition_state():308] send defer: 9
|
219 |
+
2021-07-14 22:49:34,466 DEBUG SenderThread:598803 [sender.py:send():179] send: final
|
220 |
+
2021-07-14 22:49:34,466 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: defer
|
221 |
+
2021-07-14 22:49:34,466 DEBUG SenderThread:598803 [sender.py:send():179] send: footer
|
222 |
+
2021-07-14 22:49:34,466 INFO HandlerThread:598803 [handler.py:handle_request_defer():141] handle defer: 9
|
223 |
+
2021-07-14 22:49:34,467 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: defer
|
224 |
+
2021-07-14 22:49:34,467 INFO SenderThread:598803 [sender.py:send_request_defer():304] handle sender defer: 9
|
225 |
+
2021-07-14 22:49:34,567 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: poll_exit
|
226 |
+
2021-07-14 22:49:34,567 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: poll_exit
|
227 |
+
2021-07-14 22:49:34,567 INFO SenderThread:598803 [file_pusher.py:join():182] waiting for file pusher
|
228 |
+
2021-07-14 22:49:34,569 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: get_summary
|
229 |
+
2021-07-14 22:49:34,570 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: sampled_history
|
230 |
+
2021-07-14 22:49:34,570 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: shutdown
|
231 |
+
2021-07-14 22:49:34,570 INFO HandlerThread:598803 [handler.py:finish():638] shutting down handler
|
232 |
+
2021-07-14 22:49:35,467 INFO WriterThread:598803 [datastore.py:close():288] close: /home/dat/pino-roberta-base/wandb/run-20210714_224000-1jvvynqa/run-1jvvynqa.wandb
|
233 |
+
2021-07-14 22:49:35,568 INFO SenderThread:598803 [sender.py:finish():945] shutting down sender
|
234 |
+
2021-07-14 22:49:35,568 INFO SenderThread:598803 [file_pusher.py:finish():177] shutting down file pusher
|
235 |
+
2021-07-14 22:49:35,568 INFO SenderThread:598803 [file_pusher.py:join():182] waiting for file pusher
|
236 |
+
2021-07-14 22:49:35,570 INFO MainThread:598803 [internal.py:handle_exit():78] Internal process exited
|
wandb/run-20210714_224000-1jvvynqa/logs/debug.log
ADDED
@@ -0,0 +1,127 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
2021-07-14 22:40:00,341 INFO MainThread:597542 [wandb_setup.py:_flush():69] setting env: {}
|
2 |
+
2021-07-14 22:40:00,341 INFO MainThread:597542 [wandb_setup.py:_flush():69] setting login settings: {}
|
3 |
+
2021-07-14 22:40:00,341 INFO MainThread:597542 [wandb_init.py:_log_setup():337] Logging user logs to /home/dat/pino-roberta-base/wandb/run-20210714_224000-1jvvynqa/logs/debug.log
|
4 |
+
2021-07-14 22:40:00,341 INFO MainThread:597542 [wandb_init.py:_log_setup():338] Logging internal logs to /home/dat/pino-roberta-base/wandb/run-20210714_224000-1jvvynqa/logs/debug-internal.log
|
5 |
+
2021-07-14 22:40:00,342 INFO MainThread:597542 [wandb_init.py:init():370] calling init triggers
|
6 |
+
2021-07-14 22:40:00,342 INFO MainThread:597542 [wandb_init.py:init():375] wandb.init called with sweep_config: {}
|
7 |
+
config: {}
|
8 |
+
2021-07-14 22:40:00,342 INFO MainThread:597542 [wandb_init.py:init():419] starting backend
|
9 |
+
2021-07-14 22:40:00,342 INFO MainThread:597542 [backend.py:_multiprocessing_setup():70] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
|
10 |
+
2021-07-14 22:40:00,388 INFO MainThread:597542 [backend.py:ensure_launched():135] starting backend process...
|
11 |
+
2021-07-14 22:40:00,433 INFO MainThread:597542 [backend.py:ensure_launched():139] started backend process with pid: 598803
|
12 |
+
2021-07-14 22:40:00,435 INFO MainThread:597542 [wandb_init.py:init():424] backend started and connected
|
13 |
+
2021-07-14 22:40:00,438 INFO MainThread:597542 [wandb_init.py:init():472] updated telemetry
|
14 |
+
2021-07-14 22:40:00,439 INFO MainThread:597542 [wandb_init.py:init():491] communicating current version
|
15 |
+
2021-07-14 22:40:01,068 INFO MainThread:597542 [wandb_init.py:init():496] got version response
|
16 |
+
2021-07-14 22:40:01,068 INFO MainThread:597542 [wandb_init.py:init():504] communicating run to backend with 30 second timeout
|
17 |
+
2021-07-14 22:40:01,239 INFO MainThread:597542 [wandb_init.py:init():529] starting run threads in backend
|
18 |
+
2021-07-14 22:40:02,388 INFO MainThread:597542 [wandb_run.py:_console_start():1623] atexit reg
|
19 |
+
2021-07-14 22:40:02,389 INFO MainThread:597542 [wandb_run.py:_redirect():1497] redirect: SettingsConsole.REDIRECT
|
20 |
+
2021-07-14 22:40:02,389 INFO MainThread:597542 [wandb_run.py:_redirect():1502] Redirecting console.
|
21 |
+
2021-07-14 22:40:02,391 INFO MainThread:597542 [wandb_run.py:_redirect():1558] Redirects installed.
|
22 |
+
2021-07-14 22:40:02,391 INFO MainThread:597542 [wandb_init.py:init():554] run started, returning control to user process
|
23 |
+
2021-07-14 22:40:02,399 INFO MainThread:597542 [wandb_run.py:_config_callback():872] config_cb None None {'output_dir': './', 'overwrite_output_dir': True, 'do_train': False, 'do_eval': False, 'do_predict': False, 'evaluation_strategy': 'IntervalStrategy.NO', 'prediction_loss_only': False, 'per_device_train_batch_size': 2, 'per_device_eval_batch_size': 2, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 8, 'eval_accumulation_steps': None, 'learning_rate': 5e-05, 'weight_decay': 0.0095, 'adam_beta1': 0.9, 'adam_beta2': 0.98, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 5.0, 'max_steps': -1, 'lr_scheduler_type': 'SchedulerType.LINEAR', 'warmup_ratio': 0.0, 'warmup_steps': 10000, 'log_level': -1, 'log_level_replica': -1, 'log_on_each_node': True, 'logging_dir': './runs/Jul14_22-39-51_t1v-n-f5c06ea1-w-0', 'logging_strategy': 'IntervalStrategy.STEPS', 'logging_first_step': False, 'logging_steps': 500, 'save_strategy': 'IntervalStrategy.STEPS', 'save_steps': 20000, 'save_total_limit': 5, 'save_on_each_node': False, 'no_cuda': False, 'seed': 42, 'fp16': False, 'fp16_opt_level': 'O1', 'fp16_backend': 'auto', 'fp16_full_eval': False, 'local_rank': -1, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 20000, 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': './', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'sharded_ddp': [], 'deepspeed': None, 'label_smoothing_factor': 0.0, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'dataloader_pin_memory': True, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'push_to_hub_model_id': '', 'push_to_hub_organization': None, 'push_to_hub_token': None, 'mp_parameters': ''}
|
24 |
+
2021-07-14 22:40:02,400 INFO MainThread:597542 [wandb_run.py:_config_callback():872] config_cb None None {'model_name_or_path': None, 'model_type': 'big_bird', 'config_name': './', 'tokenizer_name': './', 'cache_dir': None, 'use_fast_tokenizer': True, 'dtype': 'float32', 'save_optimizer': True}
|
25 |
+
2021-07-14 22:40:02,402 INFO MainThread:597542 [wandb_run.py:_config_callback():872] config_cb None None {'dataset_name': None, 'dataset_config_name': None, 'train_ref_file': None, 'validation_ref_file': None, 'overwrite_cache': False, 'validation_split_percentage': 5, 'max_seq_length': 4096, 'preprocessing_num_workers': 96, 'mlm_probability': 0.15, 'pad_to_max_length': False, 'line_by_line': False, 'max_eval_samples': 2000}
|
26 |
+
2021-07-14 22:49:30,065 INFO MainThread:597542 [wandb_run.py:_atexit_cleanup():1593] got exitcode: 1
|
27 |
+
2021-07-14 22:49:30,066 INFO MainThread:597542 [wandb_run.py:_restore():1565] restore
|
28 |
+
2021-07-14 22:49:32,264 INFO MainThread:597542 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
|
29 |
+
wandb_count: 1
|
30 |
+
}
|
31 |
+
pusher_stats {
|
32 |
+
uploaded_bytes: 1448
|
33 |
+
total_bytes: 1448
|
34 |
+
}
|
35 |
+
|
36 |
+
2021-07-14 22:49:32,535 INFO MainThread:597542 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
|
37 |
+
wandb_count: 1
|
38 |
+
}
|
39 |
+
pusher_stats {
|
40 |
+
uploaded_bytes: 1448
|
41 |
+
total_bytes: 1448
|
42 |
+
}
|
43 |
+
|
44 |
+
2021-07-14 22:49:33,472 INFO MainThread:597542 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
|
45 |
+
wandb_count: 3
|
46 |
+
}
|
47 |
+
pusher_stats {
|
48 |
+
uploaded_bytes: 1448
|
49 |
+
total_bytes: 6873
|
50 |
+
}
|
51 |
+
|
52 |
+
2021-07-14 22:49:33,575 INFO MainThread:597542 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
|
53 |
+
wandb_count: 5
|
54 |
+
}
|
55 |
+
pusher_stats {
|
56 |
+
uploaded_bytes: 1448
|
57 |
+
total_bytes: 11487
|
58 |
+
}
|
59 |
+
|
60 |
+
2021-07-14 22:49:33,677 INFO MainThread:597542 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
|
61 |
+
wandb_count: 5
|
62 |
+
}
|
63 |
+
pusher_stats {
|
64 |
+
uploaded_bytes: 11487
|
65 |
+
total_bytes: 11487
|
66 |
+
}
|
67 |
+
|
68 |
+
2021-07-14 22:49:33,779 INFO MainThread:597542 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
|
69 |
+
wandb_count: 5
|
70 |
+
}
|
71 |
+
pusher_stats {
|
72 |
+
uploaded_bytes: 11487
|
73 |
+
total_bytes: 11487
|
74 |
+
}
|
75 |
+
|
76 |
+
2021-07-14 22:49:33,881 INFO MainThread:597542 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
|
77 |
+
wandb_count: 5
|
78 |
+
}
|
79 |
+
pusher_stats {
|
80 |
+
uploaded_bytes: 11487
|
81 |
+
total_bytes: 11487
|
82 |
+
}
|
83 |
+
|
84 |
+
2021-07-14 22:49:33,983 INFO MainThread:597542 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
|
85 |
+
wandb_count: 5
|
86 |
+
}
|
87 |
+
pusher_stats {
|
88 |
+
uploaded_bytes: 11487
|
89 |
+
total_bytes: 11487
|
90 |
+
}
|
91 |
+
|
92 |
+
2021-07-14 22:49:34,085 INFO MainThread:597542 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
|
93 |
+
wandb_count: 5
|
94 |
+
}
|
95 |
+
pusher_stats {
|
96 |
+
uploaded_bytes: 11487
|
97 |
+
total_bytes: 11487
|
98 |
+
}
|
99 |
+
|
100 |
+
2021-07-14 22:49:34,187 INFO MainThread:597542 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
|
101 |
+
wandb_count: 5
|
102 |
+
}
|
103 |
+
pusher_stats {
|
104 |
+
uploaded_bytes: 11487
|
105 |
+
total_bytes: 11487
|
106 |
+
}
|
107 |
+
|
108 |
+
2021-07-14 22:49:34,466 INFO MainThread:597542 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
|
109 |
+
wandb_count: 5
|
110 |
+
}
|
111 |
+
pusher_stats {
|
112 |
+
uploaded_bytes: 11487
|
113 |
+
total_bytes: 11487
|
114 |
+
}
|
115 |
+
|
116 |
+
2021-07-14 22:49:34,568 INFO MainThread:597542 [wandb_run.py:_wait_for_finish():1715] got exit ret: done: true
|
117 |
+
exit_result {
|
118 |
+
}
|
119 |
+
file_counts {
|
120 |
+
wandb_count: 5
|
121 |
+
}
|
122 |
+
pusher_stats {
|
123 |
+
uploaded_bytes: 11487
|
124 |
+
total_bytes: 11487
|
125 |
+
}
|
126 |
+
|
127 |
+
2021-07-14 22:49:35,856 INFO MainThread:597542 [wandb_run.py:_show_files():1937] logging synced files
|
wandb/run-20210714_224000-1jvvynqa/run-1jvvynqa.wandb
ADDED
Binary file (11.8 kB). View file
|
|
wandb/run-20210714_225820-1dpoijkp/files/config.yaml
ADDED
@@ -0,0 +1,304 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
wandb_version: 1
|
2 |
+
|
3 |
+
_wandb:
|
4 |
+
desc: null
|
5 |
+
value:
|
6 |
+
cli_version: 0.10.33
|
7 |
+
framework: huggingface
|
8 |
+
huggingface_version: 4.9.0.dev0
|
9 |
+
is_jupyter_run: false
|
10 |
+
is_kaggle_kernel: false
|
11 |
+
python_version: 3.8.10
|
12 |
+
t:
|
13 |
+
1:
|
14 |
+
- 3
|
15 |
+
- 11
|
16 |
+
4: 3.8.10
|
17 |
+
5: 0.10.33
|
18 |
+
6: 4.9.0.dev0
|
19 |
+
8:
|
20 |
+
- 5
|
21 |
+
adafactor:
|
22 |
+
desc: null
|
23 |
+
value: false
|
24 |
+
adam_beta1:
|
25 |
+
desc: null
|
26 |
+
value: 0.9
|
27 |
+
adam_beta2:
|
28 |
+
desc: null
|
29 |
+
value: 0.98
|
30 |
+
adam_epsilon:
|
31 |
+
desc: null
|
32 |
+
value: 1.0e-08
|
33 |
+
cache_dir:
|
34 |
+
desc: null
|
35 |
+
value: null
|
36 |
+
config_name:
|
37 |
+
desc: null
|
38 |
+
value: ./
|
39 |
+
dataloader_drop_last:
|
40 |
+
desc: null
|
41 |
+
value: false
|
42 |
+
dataloader_num_workers:
|
43 |
+
desc: null
|
44 |
+
value: 0
|
45 |
+
dataloader_pin_memory:
|
46 |
+
desc: null
|
47 |
+
value: true
|
48 |
+
dataset_config_name:
|
49 |
+
desc: null
|
50 |
+
value: null
|
51 |
+
dataset_name:
|
52 |
+
desc: null
|
53 |
+
value: null
|
54 |
+
ddp_find_unused_parameters:
|
55 |
+
desc: null
|
56 |
+
value: null
|
57 |
+
debug:
|
58 |
+
desc: null
|
59 |
+
value: []
|
60 |
+
deepspeed:
|
61 |
+
desc: null
|
62 |
+
value: null
|
63 |
+
disable_tqdm:
|
64 |
+
desc: null
|
65 |
+
value: false
|
66 |
+
do_eval:
|
67 |
+
desc: null
|
68 |
+
value: false
|
69 |
+
do_predict:
|
70 |
+
desc: null
|
71 |
+
value: false
|
72 |
+
do_train:
|
73 |
+
desc: null
|
74 |
+
value: false
|
75 |
+
dtype:
|
76 |
+
desc: null
|
77 |
+
value: float32
|
78 |
+
eval_accumulation_steps:
|
79 |
+
desc: null
|
80 |
+
value: null
|
81 |
+
eval_steps:
|
82 |
+
desc: null
|
83 |
+
value: 20000
|
84 |
+
evaluation_strategy:
|
85 |
+
desc: null
|
86 |
+
value: IntervalStrategy.NO
|
87 |
+
fp16:
|
88 |
+
desc: null
|
89 |
+
value: false
|
90 |
+
fp16_backend:
|
91 |
+
desc: null
|
92 |
+
value: auto
|
93 |
+
fp16_full_eval:
|
94 |
+
desc: null
|
95 |
+
value: false
|
96 |
+
fp16_opt_level:
|
97 |
+
desc: null
|
98 |
+
value: O1
|
99 |
+
gradient_accumulation_steps:
|
100 |
+
desc: null
|
101 |
+
value: 8
|
102 |
+
greater_is_better:
|
103 |
+
desc: null
|
104 |
+
value: null
|
105 |
+
group_by_length:
|
106 |
+
desc: null
|
107 |
+
value: false
|
108 |
+
ignore_data_skip:
|
109 |
+
desc: null
|
110 |
+
value: false
|
111 |
+
label_names:
|
112 |
+
desc: null
|
113 |
+
value: null
|
114 |
+
label_smoothing_factor:
|
115 |
+
desc: null
|
116 |
+
value: 0.0
|
117 |
+
learning_rate:
|
118 |
+
desc: null
|
119 |
+
value: 5.0e-05
|
120 |
+
length_column_name:
|
121 |
+
desc: null
|
122 |
+
value: length
|
123 |
+
line_by_line:
|
124 |
+
desc: null
|
125 |
+
value: false
|
126 |
+
load_best_model_at_end:
|
127 |
+
desc: null
|
128 |
+
value: false
|
129 |
+
local_rank:
|
130 |
+
desc: null
|
131 |
+
value: -1
|
132 |
+
log_level:
|
133 |
+
desc: null
|
134 |
+
value: -1
|
135 |
+
log_level_replica:
|
136 |
+
desc: null
|
137 |
+
value: -1
|
138 |
+
log_on_each_node:
|
139 |
+
desc: null
|
140 |
+
value: true
|
141 |
+
logging_dir:
|
142 |
+
desc: null
|
143 |
+
value: ./runs/Jul14_22-58-13_t1v-n-f5c06ea1-w-0
|
144 |
+
logging_first_step:
|
145 |
+
desc: null
|
146 |
+
value: false
|
147 |
+
logging_steps:
|
148 |
+
desc: null
|
149 |
+
value: 500
|
150 |
+
logging_strategy:
|
151 |
+
desc: null
|
152 |
+
value: IntervalStrategy.STEPS
|
153 |
+
lr_scheduler_type:
|
154 |
+
desc: null
|
155 |
+
value: SchedulerType.LINEAR
|
156 |
+
max_eval_samples:
|
157 |
+
desc: null
|
158 |
+
value: 2000
|
159 |
+
max_grad_norm:
|
160 |
+
desc: null
|
161 |
+
value: 1.0
|
162 |
+
max_seq_length:
|
163 |
+
desc: null
|
164 |
+
value: 4096
|
165 |
+
max_steps:
|
166 |
+
desc: null
|
167 |
+
value: -1
|
168 |
+
metric_for_best_model:
|
169 |
+
desc: null
|
170 |
+
value: null
|
171 |
+
mlm_probability:
|
172 |
+
desc: null
|
173 |
+
value: 0.15
|
174 |
+
model_name_or_path:
|
175 |
+
desc: null
|
176 |
+
value: null
|
177 |
+
model_type:
|
178 |
+
desc: null
|
179 |
+
value: big_bird
|
180 |
+
mp_parameters:
|
181 |
+
desc: null
|
182 |
+
value: ''
|
183 |
+
no_cuda:
|
184 |
+
desc: null
|
185 |
+
value: false
|
186 |
+
num_train_epochs:
|
187 |
+
desc: null
|
188 |
+
value: 5.0
|
189 |
+
output_dir:
|
190 |
+
desc: null
|
191 |
+
value: ./
|
192 |
+
overwrite_cache:
|
193 |
+
desc: null
|
194 |
+
value: false
|
195 |
+
overwrite_output_dir:
|
196 |
+
desc: null
|
197 |
+
value: true
|
198 |
+
pad_to_max_length:
|
199 |
+
desc: null
|
200 |
+
value: false
|
201 |
+
past_index:
|
202 |
+
desc: null
|
203 |
+
value: -1
|
204 |
+
per_device_eval_batch_size:
|
205 |
+
desc: null
|
206 |
+
value: 2
|
207 |
+
per_device_train_batch_size:
|
208 |
+
desc: null
|
209 |
+
value: 2
|
210 |
+
per_gpu_eval_batch_size:
|
211 |
+
desc: null
|
212 |
+
value: null
|
213 |
+
per_gpu_train_batch_size:
|
214 |
+
desc: null
|
215 |
+
value: null
|
216 |
+
prediction_loss_only:
|
217 |
+
desc: null
|
218 |
+
value: false
|
219 |
+
preprocessing_num_workers:
|
220 |
+
desc: null
|
221 |
+
value: 96
|
222 |
+
push_to_hub:
|
223 |
+
desc: null
|
224 |
+
value: true
|
225 |
+
push_to_hub_model_id:
|
226 |
+
desc: null
|
227 |
+
value: ''
|
228 |
+
push_to_hub_organization:
|
229 |
+
desc: null
|
230 |
+
value: null
|
231 |
+
push_to_hub_token:
|
232 |
+
desc: null
|
233 |
+
value: null
|
234 |
+
remove_unused_columns:
|
235 |
+
desc: null
|
236 |
+
value: true
|
237 |
+
report_to:
|
238 |
+
desc: null
|
239 |
+
value:
|
240 |
+
- tensorboard
|
241 |
+
- wandb
|
242 |
+
resume_from_checkpoint:
|
243 |
+
desc: null
|
244 |
+
value: null
|
245 |
+
run_name:
|
246 |
+
desc: null
|
247 |
+
value: ./
|
248 |
+
save_on_each_node:
|
249 |
+
desc: null
|
250 |
+
value: false
|
251 |
+
save_optimizer:
|
252 |
+
desc: null
|
253 |
+
value: true
|
254 |
+
save_steps:
|
255 |
+
desc: null
|
256 |
+
value: 20000
|
257 |
+
save_strategy:
|
258 |
+
desc: null
|
259 |
+
value: IntervalStrategy.STEPS
|
260 |
+
save_total_limit:
|
261 |
+
desc: null
|
262 |
+
value: 5
|
263 |
+
seed:
|
264 |
+
desc: null
|
265 |
+
value: 42
|
266 |
+
sharded_ddp:
|
267 |
+
desc: null
|
268 |
+
value: []
|
269 |
+
skip_memory_metrics:
|
270 |
+
desc: null
|
271 |
+
value: true
|
272 |
+
tokenizer_name:
|
273 |
+
desc: null
|
274 |
+
value: ./
|
275 |
+
tpu_metrics_debug:
|
276 |
+
desc: null
|
277 |
+
value: false
|
278 |
+
tpu_num_cores:
|
279 |
+
desc: null
|
280 |
+
value: null
|
281 |
+
train_ref_file:
|
282 |
+
desc: null
|
283 |
+
value: null
|
284 |
+
use_fast_tokenizer:
|
285 |
+
desc: null
|
286 |
+
value: true
|
287 |
+
use_legacy_prediction_loop:
|
288 |
+
desc: null
|
289 |
+
value: false
|
290 |
+
validation_ref_file:
|
291 |
+
desc: null
|
292 |
+
value: null
|
293 |
+
validation_split_percentage:
|
294 |
+
desc: null
|
295 |
+
value: 5
|
296 |
+
warmup_ratio:
|
297 |
+
desc: null
|
298 |
+
value: 0.0
|
299 |
+
warmup_steps:
|
300 |
+
desc: null
|
301 |
+
value: 10000
|
302 |
+
weight_decay:
|
303 |
+
desc: null
|
304 |
+
value: 0.0095
|
wandb/run-20210714_225820-1dpoijkp/files/output.log
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
/home/dat/pino/lib/python3.8/site-packages/jax/_src/numpy/lax_numpy.py:3132: UserWarning: Explicitly requested dtype <class 'jax._src.numpy.lax_numpy.int64'> requested in zeros is not available, and will be truncated to dtype int32. To enable more dtypes, set the jax_enable_x64 configuration option or the JAX_ENABLE_X64 shell environment variable. See https://github.com/google/jax#current-gotchas for more.
|
2 |
+
lax._check_user_dtype_supported(dtype, "zeros")
|
3 |
+
/home/dat/pino/lib/python3.8/site-packages/jax/lib/xla_bridge.py:386: UserWarning: jax.host_count has been renamed to jax.process_count. This alias will eventually be removed; please update your code.
|
4 |
+
warnings.warn(
|
5 |
+
/home/dat/pino/lib/python3.8/site-packages/jax/lib/xla_bridge.py:373: UserWarning: jax.host_id has been renamed to jax.process_index. This alias will eventually be removed; please update your code.
|
6 |
+
warnings.warn(
|
wandb/run-20210714_225820-1dpoijkp/files/requirements.txt
ADDED
@@ -0,0 +1,94 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
absl-py==0.13.0
|
2 |
+
aiohttp==3.7.4.post0
|
3 |
+
astunparse==1.6.3
|
4 |
+
async-timeout==3.0.1
|
5 |
+
attrs==21.2.0
|
6 |
+
cachetools==4.2.2
|
7 |
+
certifi==2021.5.30
|
8 |
+
chardet==4.0.0
|
9 |
+
charset-normalizer==2.0.1
|
10 |
+
chex==0.0.8
|
11 |
+
click==8.0.1
|
12 |
+
configparser==5.0.2
|
13 |
+
cycler==0.10.0
|
14 |
+
datasets==1.9.1.dev0
|
15 |
+
dill==0.3.4
|
16 |
+
dm-tree==0.1.6
|
17 |
+
docker-pycreds==0.4.0
|
18 |
+
filelock==3.0.12
|
19 |
+
flatbuffers==1.12
|
20 |
+
flax==0.3.4
|
21 |
+
fsspec==2021.7.0
|
22 |
+
gast==0.4.0
|
23 |
+
gitdb==4.0.7
|
24 |
+
gitpython==3.1.18
|
25 |
+
google-auth-oauthlib==0.4.4
|
26 |
+
google-auth==1.32.1
|
27 |
+
google-pasta==0.2.0
|
28 |
+
grpcio==1.34.1
|
29 |
+
h5py==3.1.0
|
30 |
+
huggingface-hub==0.0.12
|
31 |
+
idna==3.2
|
32 |
+
install==1.3.4
|
33 |
+
jax==0.2.17
|
34 |
+
jaxlib==0.1.68
|
35 |
+
joblib==1.0.1
|
36 |
+
keras-nightly==2.5.0.dev2021032900
|
37 |
+
keras-preprocessing==1.1.2
|
38 |
+
kiwisolver==1.3.1
|
39 |
+
libtpu-nightly==0.1.dev20210615
|
40 |
+
markdown==3.3.4
|
41 |
+
matplotlib==3.4.2
|
42 |
+
msgpack==1.0.2
|
43 |
+
multidict==5.1.0
|
44 |
+
multiprocess==0.70.12.2
|
45 |
+
numpy==1.19.5
|
46 |
+
oauthlib==3.1.1
|
47 |
+
opt-einsum==3.3.0
|
48 |
+
optax==0.0.9
|
49 |
+
packaging==21.0
|
50 |
+
pandas==1.3.0
|
51 |
+
pathtools==0.1.2
|
52 |
+
pillow==8.3.1
|
53 |
+
pip==20.0.2
|
54 |
+
pkg-resources==0.0.0
|
55 |
+
promise==2.3
|
56 |
+
protobuf==3.17.3
|
57 |
+
psutil==5.8.0
|
58 |
+
pyarrow==4.0.1
|
59 |
+
pyasn1-modules==0.2.8
|
60 |
+
pyasn1==0.4.8
|
61 |
+
pyparsing==2.4.7
|
62 |
+
python-dateutil==2.8.1
|
63 |
+
pytz==2021.1
|
64 |
+
pyyaml==5.4.1
|
65 |
+
regex==2021.7.6
|
66 |
+
requests-oauthlib==1.3.0
|
67 |
+
requests==2.26.0
|
68 |
+
rsa==4.7.2
|
69 |
+
sacremoses==0.0.45
|
70 |
+
scipy==1.7.0
|
71 |
+
sentry-sdk==1.3.0
|
72 |
+
setuptools==44.0.0
|
73 |
+
shortuuid==1.0.1
|
74 |
+
six==1.15.0
|
75 |
+
smmap==4.0.0
|
76 |
+
subprocess32==3.5.4
|
77 |
+
tensorboard-data-server==0.6.1
|
78 |
+
tensorboard-plugin-wit==1.8.0
|
79 |
+
tensorboard==2.5.0
|
80 |
+
tensorflow-estimator==2.5.0
|
81 |
+
tensorflow==2.5.0
|
82 |
+
termcolor==1.1.0
|
83 |
+
tokenizers==0.10.3
|
84 |
+
toolz==0.11.1
|
85 |
+
tqdm==4.61.2
|
86 |
+
transformers==4.9.0.dev0
|
87 |
+
typing-extensions==3.7.4.3
|
88 |
+
urllib3==1.26.6
|
89 |
+
wandb==0.10.33
|
90 |
+
werkzeug==2.0.1
|
91 |
+
wheel==0.36.2
|
92 |
+
wrapt==1.12.1
|
93 |
+
xxhash==2.0.2
|
94 |
+
yarl==1.6.3
|
wandb/run-20210714_225820-1dpoijkp/files/wandb-metadata.json
ADDED
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"os": "Linux-5.4.0-1043-gcp-x86_64-with-glibc2.29",
|
3 |
+
"python": "3.8.10",
|
4 |
+
"heartbeatAt": "2021-07-14T22:58:22.701262",
|
5 |
+
"startedAt": "2021-07-14T22:58:20.641335",
|
6 |
+
"docker": null,
|
7 |
+
"cpu_count": 96,
|
8 |
+
"cuda": null,
|
9 |
+
"args": [
|
10 |
+
"--push_to_hub",
|
11 |
+
"--output_dir=./",
|
12 |
+
"--model_type=big_bird",
|
13 |
+
"--config_name=./",
|
14 |
+
"--tokenizer_name=./",
|
15 |
+
"--max_seq_length=4096",
|
16 |
+
"--weight_decay=0.0095",
|
17 |
+
"--warmup_steps=10000",
|
18 |
+
"--overwrite_output_dir",
|
19 |
+
"--adam_beta1=0.9",
|
20 |
+
"--adam_beta2=0.98",
|
21 |
+
"--logging_steps=500",
|
22 |
+
"--eval_steps=20000",
|
23 |
+
"--num_train_epochs=5",
|
24 |
+
"--preprocessing_num_workers=96",
|
25 |
+
"--save_steps=20000",
|
26 |
+
"--learning_rate=5e-5",
|
27 |
+
"--per_device_train_batch_size=2",
|
28 |
+
"--per_device_eval_batch_size=2",
|
29 |
+
"--save_total_limit=5",
|
30 |
+
"--max_eval_samples=2000",
|
31 |
+
"--overwrite_cache",
|
32 |
+
"False",
|
33 |
+
"--gradient_accumulation_steps=8"
|
34 |
+
],
|
35 |
+
"state": "running",
|
36 |
+
"program": "./run_mlm_flax.py",
|
37 |
+
"codePath": "run_mlm_flax.py",
|
38 |
+
"git": {
|
39 |
+
"remote": "https://huggingface.co/flax-community/pino-roberta-base",
|
40 |
+
"commit": "de71755314d4aa3ab182072c6f0be3de9798a66b"
|
41 |
+
},
|
42 |
+
"email": null,
|
43 |
+
"root": "/home/dat/pino-roberta-base",
|
44 |
+
"host": "t1v-n-f5c06ea1-w-0",
|
45 |
+
"username": "dat",
|
46 |
+
"executable": "/home/dat/pino/bin/python"
|
47 |
+
}
|
wandb/run-20210714_225820-1dpoijkp/files/wandb-summary.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{}
|
wandb/run-20210714_225820-1dpoijkp/logs/debug-internal.log
ADDED
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
2021-07-14 22:58:21,337 INFO MainThread:601574 [internal.py:wandb_internal():88] W&B internal server running at pid: 601574, started at: 2021-07-14 22:58:21.336704
|
2 |
+
2021-07-14 22:58:21,339 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: check_version
|
3 |
+
2021-07-14 22:58:21,339 INFO WriterThread:601574 [datastore.py:open_for_write():80] open: /home/dat/pino-roberta-base/wandb/run-20210714_225820-1dpoijkp/run-1dpoijkp.wandb
|
4 |
+
2021-07-14 22:58:21,340 DEBUG SenderThread:601574 [sender.py:send():179] send: header
|
5 |
+
2021-07-14 22:58:21,340 DEBUG SenderThread:601574 [sender.py:send_request():193] send_request: check_version
|
6 |
+
2021-07-14 22:58:21,377 DEBUG SenderThread:601574 [sender.py:send():179] send: run
|
7 |
+
2021-07-14 22:58:21,602 INFO SenderThread:601574 [dir_watcher.py:__init__():168] watching files in: /home/dat/pino-roberta-base/wandb/run-20210714_225820-1dpoijkp/files
|
8 |
+
2021-07-14 22:58:21,602 INFO SenderThread:601574 [sender.py:_start_run_threads():716] run started: 1dpoijkp with start time 1626303500
|
9 |
+
2021-07-14 22:58:21,602 DEBUG SenderThread:601574 [sender.py:send():179] send: summary
|
10 |
+
2021-07-14 22:58:21,603 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: run_start
|
11 |
+
2021-07-14 22:58:21,603 INFO SenderThread:601574 [sender.py:_save_file():841] saving file wandb-summary.json with policy end
|
12 |
+
2021-07-14 22:58:22,605 INFO Thread-8 :601574 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210714_225820-1dpoijkp/files/wandb-summary.json
|
13 |
+
2021-07-14 22:58:22,701 DEBUG HandlerThread:601574 [meta.py:__init__():39] meta init
|
14 |
+
2021-07-14 22:58:22,701 DEBUG HandlerThread:601574 [meta.py:__init__():53] meta init done
|
15 |
+
2021-07-14 22:58:22,701 DEBUG HandlerThread:601574 [meta.py:probe():210] probe
|
16 |
+
2021-07-14 22:58:22,702 DEBUG HandlerThread:601574 [meta.py:_setup_git():200] setup git
|
17 |
+
2021-07-14 22:58:22,732 DEBUG HandlerThread:601574 [meta.py:_setup_git():207] setup git done
|
18 |
+
2021-07-14 22:58:22,732 DEBUG HandlerThread:601574 [meta.py:_save_pip():57] save pip
|
19 |
+
2021-07-14 22:58:22,732 DEBUG HandlerThread:601574 [meta.py:_save_pip():71] save pip done
|
20 |
+
2021-07-14 22:58:22,733 DEBUG HandlerThread:601574 [meta.py:probe():252] probe done
|
21 |
+
2021-07-14 22:58:22,736 DEBUG SenderThread:601574 [sender.py:send():179] send: files
|
22 |
+
2021-07-14 22:58:22,736 INFO SenderThread:601574 [sender.py:_save_file():841] saving file wandb-metadata.json with policy now
|
23 |
+
2021-07-14 22:58:22,742 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: stop_status
|
24 |
+
2021-07-14 22:58:22,742 DEBUG SenderThread:601574 [sender.py:send_request():193] send_request: stop_status
|
25 |
+
2021-07-14 22:58:22,871 DEBUG SenderThread:601574 [sender.py:send():179] send: config
|
26 |
+
2021-07-14 22:58:22,872 DEBUG SenderThread:601574 [sender.py:send():179] send: config
|
27 |
+
2021-07-14 22:58:22,872 DEBUG SenderThread:601574 [sender.py:send():179] send: config
|
28 |
+
2021-07-14 22:58:23,214 INFO Thread-11 :601574 [upload_job.py:push():137] Uploaded file /tmp/tmpg5fs3m8gwandb/35jlequ6-wandb-metadata.json
|
29 |
+
2021-07-14 22:58:23,603 INFO Thread-8 :601574 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210714_225820-1dpoijkp/files/output.log
|
30 |
+
2021-07-14 22:58:23,604 INFO Thread-8 :601574 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210714_225820-1dpoijkp/files/requirements.txt
|
31 |
+
2021-07-14 22:58:23,604 INFO Thread-8 :601574 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210714_225820-1dpoijkp/files/wandb-metadata.json
|
32 |
+
2021-07-14 22:58:37,609 INFO Thread-8 :601574 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_225820-1dpoijkp/files/output.log
|
33 |
+
2021-07-14 22:58:37,873 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: stop_status
|
34 |
+
2021-07-14 22:58:37,874 DEBUG SenderThread:601574 [sender.py:send_request():193] send_request: stop_status
|
35 |
+
2021-07-14 22:58:50,784 DEBUG SenderThread:601574 [sender.py:send():179] send: stats
|
36 |
+
2021-07-14 22:58:52,614 INFO Thread-8 :601574 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_225820-1dpoijkp/files/config.yaml
|
37 |
+
2021-07-14 22:58:53,006 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: stop_status
|
38 |
+
2021-07-14 22:58:53,006 DEBUG SenderThread:601574 [sender.py:send_request():193] send_request: stop_status
|
39 |
+
2021-07-14 22:59:08,141 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: stop_status
|
40 |
+
2021-07-14 22:59:08,141 DEBUG SenderThread:601574 [sender.py:send_request():193] send_request: stop_status
|
wandb/run-20210714_225820-1dpoijkp/logs/debug.log
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
2021-07-14 22:58:20,643 INFO MainThread:600323 [wandb_setup.py:_flush():69] setting env: {}
|
2 |
+
2021-07-14 22:58:20,643 INFO MainThread:600323 [wandb_setup.py:_flush():69] setting login settings: {}
|
3 |
+
2021-07-14 22:58:20,643 INFO MainThread:600323 [wandb_init.py:_log_setup():337] Logging user logs to /home/dat/pino-roberta-base/wandb/run-20210714_225820-1dpoijkp/logs/debug.log
|
4 |
+
2021-07-14 22:58:20,643 INFO MainThread:600323 [wandb_init.py:_log_setup():338] Logging internal logs to /home/dat/pino-roberta-base/wandb/run-20210714_225820-1dpoijkp/logs/debug-internal.log
|
5 |
+
2021-07-14 22:58:20,643 INFO MainThread:600323 [wandb_init.py:init():370] calling init triggers
|
6 |
+
2021-07-14 22:58:20,643 INFO MainThread:600323 [wandb_init.py:init():375] wandb.init called with sweep_config: {}
|
7 |
+
config: {}
|
8 |
+
2021-07-14 22:58:20,643 INFO MainThread:600323 [wandb_init.py:init():419] starting backend
|
9 |
+
2021-07-14 22:58:20,643 INFO MainThread:600323 [backend.py:_multiprocessing_setup():70] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
|
10 |
+
2021-07-14 22:58:20,687 INFO MainThread:600323 [backend.py:ensure_launched():135] starting backend process...
|
11 |
+
2021-07-14 22:58:20,729 INFO MainThread:600323 [backend.py:ensure_launched():139] started backend process with pid: 601574
|
12 |
+
2021-07-14 22:58:20,731 INFO MainThread:600323 [wandb_init.py:init():424] backend started and connected
|
13 |
+
2021-07-14 22:58:20,734 INFO MainThread:600323 [wandb_init.py:init():472] updated telemetry
|
14 |
+
2021-07-14 22:58:20,735 INFO MainThread:600323 [wandb_init.py:init():491] communicating current version
|
15 |
+
2021-07-14 22:58:21,375 INFO MainThread:600323 [wandb_init.py:init():496] got version response
|
16 |
+
2021-07-14 22:58:21,375 INFO MainThread:600323 [wandb_init.py:init():504] communicating run to backend with 30 second timeout
|
17 |
+
2021-07-14 22:58:21,602 INFO MainThread:600323 [wandb_init.py:init():529] starting run threads in backend
|
18 |
+
2021-07-14 22:58:22,739 INFO MainThread:600323 [wandb_run.py:_console_start():1623] atexit reg
|
19 |
+
2021-07-14 22:58:22,740 INFO MainThread:600323 [wandb_run.py:_redirect():1497] redirect: SettingsConsole.REDIRECT
|
20 |
+
2021-07-14 22:58:22,740 INFO MainThread:600323 [wandb_run.py:_redirect():1502] Redirecting console.
|
21 |
+
2021-07-14 22:58:22,742 INFO MainThread:600323 [wandb_run.py:_redirect():1558] Redirects installed.
|
22 |
+
2021-07-14 22:58:22,742 INFO MainThread:600323 [wandb_init.py:init():554] run started, returning control to user process
|
23 |
+
2021-07-14 22:58:22,750 INFO MainThread:600323 [wandb_run.py:_config_callback():872] config_cb None None {'output_dir': './', 'overwrite_output_dir': True, 'do_train': False, 'do_eval': False, 'do_predict': False, 'evaluation_strategy': 'IntervalStrategy.NO', 'prediction_loss_only': False, 'per_device_train_batch_size': 2, 'per_device_eval_batch_size': 2, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 8, 'eval_accumulation_steps': None, 'learning_rate': 5e-05, 'weight_decay': 0.0095, 'adam_beta1': 0.9, 'adam_beta2': 0.98, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 5.0, 'max_steps': -1, 'lr_scheduler_type': 'SchedulerType.LINEAR', 'warmup_ratio': 0.0, 'warmup_steps': 10000, 'log_level': -1, 'log_level_replica': -1, 'log_on_each_node': True, 'logging_dir': './runs/Jul14_22-58-13_t1v-n-f5c06ea1-w-0', 'logging_strategy': 'IntervalStrategy.STEPS', 'logging_first_step': False, 'logging_steps': 500, 'save_strategy': 'IntervalStrategy.STEPS', 'save_steps': 20000, 'save_total_limit': 5, 'save_on_each_node': False, 'no_cuda': False, 'seed': 42, 'fp16': False, 'fp16_opt_level': 'O1', 'fp16_backend': 'auto', 'fp16_full_eval': False, 'local_rank': -1, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 20000, 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': './', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'sharded_ddp': [], 'deepspeed': None, 'label_smoothing_factor': 0.0, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'dataloader_pin_memory': True, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'push_to_hub_model_id': '', 'push_to_hub_organization': None, 'push_to_hub_token': None, 'mp_parameters': ''}
|
24 |
+
2021-07-14 22:58:22,752 INFO MainThread:600323 [wandb_run.py:_config_callback():872] config_cb None None {'model_name_or_path': None, 'model_type': 'big_bird', 'config_name': './', 'tokenizer_name': './', 'cache_dir': None, 'use_fast_tokenizer': True, 'dtype': 'float32', 'save_optimizer': True}
|
25 |
+
2021-07-14 22:58:22,753 INFO MainThread:600323 [wandb_run.py:_config_callback():872] config_cb None None {'dataset_name': None, 'dataset_config_name': None, 'train_ref_file': None, 'validation_ref_file': None, 'overwrite_cache': False, 'validation_split_percentage': 5, 'max_seq_length': 4096, 'preprocessing_num_workers': 96, 'mlm_probability': 0.15, 'pad_to_max_length': False, 'line_by_line': False, 'max_eval_samples': 2000}
|