diff --git a/config.json b/config.json index 849f38f31170912b81fb7116a08702ee35828544..d983e12af73121af5a17c4c69ffb6ece637889a7 100644 --- a/config.json +++ b/config.json @@ -4,7 +4,7 @@ ], "attention_probs_dropout_prob": 0.1, "attention_type": "block_sparse", - "block_size": 128, + "block_size": 64, "bos_token_id": 1, "eos_token_id": 2, "gradient_checkpointing": false, diff --git a/events.out.tfevents.1626299647.t1v-n-f5c06ea1-w-0.587396.3.v2 b/events.out.tfevents.1626299647.t1v-n-f5c06ea1-w-0.587396.3.v2 new file mode 100644 index 0000000000000000000000000000000000000000..a06992a7a7ada3ddc1854004f7f5e973c7c4cf19 --- /dev/null +++ b/events.out.tfevents.1626299647.t1v-n-f5c06ea1-w-0.587396.3.v2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84890ea5bd3c73af594c93d00f787106810c8227126a97c436a63ef86502b93f +size 40 diff --git a/events.out.tfevents.1626301159.t1v-n-f5c06ea1-w-0.592040.3.v2 b/events.out.tfevents.1626301159.t1v-n-f5c06ea1-w-0.592040.3.v2 new file mode 100644 index 0000000000000000000000000000000000000000..1aecd6723afb236ecc1eb4e732293b80691706aa --- /dev/null +++ b/events.out.tfevents.1626301159.t1v-n-f5c06ea1-w-0.592040.3.v2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b87389b3d84fa4c5e66f3f568af52dc3799c8e93ecd1c17d2757563eadf4b8a +size 40 diff --git a/events.out.tfevents.1626301759.t1v-n-f5c06ea1-w-0.595290.3.v2 b/events.out.tfevents.1626301759.t1v-n-f5c06ea1-w-0.595290.3.v2 new file mode 100644 index 0000000000000000000000000000000000000000..3dec41a45c76fd756035904af5c473fa5d30c4bc --- /dev/null +++ b/events.out.tfevents.1626301759.t1v-n-f5c06ea1-w-0.595290.3.v2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7151f4506f8d7e2790068211ea6067e529caadb072896f77702e4e393e69bc8b +size 40 diff --git a/events.out.tfevents.1626302399.t1v-n-f5c06ea1-w-0.597542.3.v2 b/events.out.tfevents.1626302399.t1v-n-f5c06ea1-w-0.597542.3.v2 new file mode 100644 index 0000000000000000000000000000000000000000..0bfc4ba5ffe187d75914e7094927eaefcbaacd16 --- /dev/null +++ b/events.out.tfevents.1626302399.t1v-n-f5c06ea1-w-0.597542.3.v2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c317581746aa1f2878b10cfdd0724ead65d7384dc0003909dc09986654f0ca6d +size 40 diff --git a/events.out.tfevents.1626303499.t1v-n-f5c06ea1-w-0.600323.3.v2 b/events.out.tfevents.1626303499.t1v-n-f5c06ea1-w-0.600323.3.v2 new file mode 100644 index 0000000000000000000000000000000000000000..084f131eb6763e52a420b6a12d50441cf3e45685 --- /dev/null +++ b/events.out.tfevents.1626303499.t1v-n-f5c06ea1-w-0.600323.3.v2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:610b4725be7ba71faaadd24b5aa839e67235b0b211a2bf6c4d1da48931051a45 +size 40 diff --git a/run.sh b/run.sh index 84c5ab824de3caa720d507060cc1be3afdecb577..cc2e94eb7c8ff2c871cb3366b24e0dbf926ef228 100644 --- a/run.sh +++ b/run.sh @@ -10,12 +10,12 @@ python ./run_mlm_flax.py \ --tokenizer_name="./" \ --max_seq_length="4096" \ --weight_decay="0.0095" \ - --warmup_steps="5000" \ + --warmup_steps="10000" \ --overwrite_output_dir \ --adam_beta1="0.9" \ --adam_beta2="0.98" \ - --logging_steps="250" \ - --eval_steps="500" \ + --logging_steps="500" \ + --eval_steps="20000" \ --num_train_epochs="5" \ --preprocessing_num_workers="96" \ --save_steps="20000" \ @@ -23,9 +23,9 @@ python ./run_mlm_flax.py \ --per_device_train_batch_size="2" \ --per_device_eval_batch_size="2" \ --save_total_limit="5"\ - --max_eval_samples="500"\ + --max_eval_samples="2000"\ --overwrite_cache False \ - --gradient_accumulation_steps="4" \ + --gradient_accumulation_steps="8" \ #--resume_from_checkpoint="./"\ #--adafactor \ #--dtype="bfloat16" \ diff --git a/run_mlm_flax.py b/run_mlm_flax.py index fa03d6a3b1ee931defb281b5cc469a1df21f3e64..f96f248e40bb6d31de97c8fbc499c44c735f54a9 100644 --- a/run_mlm_flax.py +++ b/run_mlm_flax.py @@ -33,6 +33,8 @@ from typing import Dict, List, Optional, Tuple import numpy as np from datasets import load_dataset, DatasetDict from tqdm import tqdm +from optax import clip_by_global_norm + import flax import jax @@ -55,11 +57,13 @@ from transformers import ( set_seed, ) import json -from flax.training import checkpoints +import shutil + from flax.jax_utils import unreplicate from flax.training.checkpoints import save_checkpoint, restore_checkpoint from importlib.util import find_spec from flax.serialization import to_bytes, from_bytes +import jax.profiler MODEL_CONFIG_CLASSES = list(FLAX_MODEL_FOR_MASKED_LM_MAPPING.keys()) MODEL_TYPES = tuple(conf.model_type for conf in MODEL_CONFIG_CLASSES) @@ -275,6 +279,35 @@ def write_eval_metric(summary_writer, eval_metrics, step): summary_writer.scalar(f"eval_{metric_name}", value, step) +# utils +def mb_item(x): + return x.item() if hasattr(x, "item") else x + +#checkpoint functions +def save_model_checkpoint(model, save_dir, state, with_opt:bool=True, push_to_hub:bool=False): + """ + If `push_to_hub` is True, will save to `save_dir`. Otherwise will save to `save_dir/ckpt-{step}`. + """ + state = jax_utils.unreplicate(state) + logger.info(f"SAVING CHECKPOINT IN {save_dir}...") + if not push_to_hub: + save_dir = f"{save_dir}/ckpt-{mb_item(state.step)-1}" + model.save_pretrained( + save_dir, + params=state.params, + push_to_hub=push_to_hub, + commit_message=f"Saving weights and logs at step {mb_item(state.step)-1}", + ) + if with_opt: + with open(os.path.join(save_dir, "opt_state.msgpack"), "wb") as f: + f.write(to_bytes(state.opt_state)) + with open(os.path.join(save_dir, "training_state.json"), "w") as f: + json.dump({"step": state.step.item()}, f) + logger.info("checkpoint saved") + +# this is added to make resuming from checkpoint to work with adafactor +# to be removed when issue is fixed +# notice that adafactor state is perturbed by fake_update def _zeros_tree_like(inp_tree): return jax.tree_map(jnp.zeros_like, inp_tree) @@ -291,8 +324,11 @@ def fake_update(state): def reinstantiate_states(opt_state): new_state = [] for state in opt_state: - cls = getattr(optax, type(state).__name__) - new_state.append(cls(**{k:getattr(state, k) for k in state._fields})) + if isinstance(state, list): + new_state.append(reinstantiate_states(state)) + else: + cls = getattr(optax, type(state).__name__) + new_state.append(cls(**{k:getattr(state, k) for k in state._fields})) return new_state def restore_model_checkpoint(save_dir, state): @@ -318,27 +354,6 @@ def restore_model_checkpoint(save_dir, state): return state.replace(step=step, params=params, opt_state=opt_state) -def save_model_checkpoint(model, save_dir, state, with_opt:bool=True, push_to_hub:bool=False): - """ - If `push_to_hub` is True, will save to `save_dir`. Otherwise will save to `save_dir/ckpt-{step}`. - """ - state = jax_utils.unreplicate(state) - logger.info(f"SAVING CHECKPOINT IN {save_dir}...") - if not push_to_hub: - save_dir = f"{save_dir}/ckpt-{mb_item(state.step)-1}" - model.save_pretrained( - save_dir, - params=state.params, - push_to_hub=push_to_hub, - commit_message=f"Saving weights and logs at step {mb_item(state.step)-1}", - ) - if with_opt: - with open(os.path.join(save_dir, "opt_state.msgpack"), "wb") as f: - f.write(to_bytes(state.opt_state)) - with open(os.path.join(save_dir, "training_state.json"), "w") as f: - json.dump({"step": state.step.item()}, f) - logger.info("checkpoint saved") - def rotate_checkpoints(ckpt_dir:str, save_total_limit:int): "Removes older checkpoints so that `save_total_limit` checkpoints are kept" # TODO: what to remove is decided using step number only, we might want to improve that @@ -351,7 +366,6 @@ def rotate_checkpoints(ckpt_dir:str, save_total_limit:int): shutil.rmtree(ckpt) - if __name__ == "__main__": # See all possible arguments in src/transformers/training_args.py # or by passing the --help flag to this script. @@ -513,7 +527,7 @@ if __name__ == "__main__": tokenized_datasets = DatasetDict.load_from_disk("/data/tokenized_data") logger.info("Setting max validation examples to ") print(f"Number of validation examples {data_args.max_eval_samples}") - tokenized_datasets["train"]= tokenized_datasets["train"].select(range(20000)) + #tokenized_datasets["train"]= tokenized_datasets["train"].select(range(20000)) if data_args.max_eval_samples is not None: tokenized_datasets["validation"] = tokenized_datasets["validation"].select(range(data_args.max_eval_samples)) else: @@ -687,7 +701,6 @@ if __name__ == "__main__": learning_rate=linear_decay_lr_schedule_fn, ) else: - from optax import clip_by_global_norm optimizer = optax.adamw( learning_rate=linear_decay_lr_schedule_fn, b1=training_args.adam_beta1, @@ -777,7 +790,8 @@ if __name__ == "__main__": steps_per_epoch = len(tokenized_datasets["train"]) // train_batch_size resume_epoch = resume_step // (steps_per_epoch * grad_accum_steps) epochs = tqdm(range(num_epochs), desc=f"Epoch ... ({resume_epoch+1}/{num_epochs})", position=0) - logger.info(f"Skipping to epoch {resume_epoch} step {resume_step // grad_accum_steps}") + if resume_step != 0: + logger.info(f"Skipping to epoch {resume_epoch} step {resume_step // grad_accum_steps}") for epoch in epochs: # ======================== Training ================================ train_start = time.time() diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log index 2f6d00a781cc9c32cf29b9f98c5e76bd28b1e0d3..0d918acb88e38282460bcd0b5a158f53bd6e7609 120000 --- a/wandb/debug-internal.log +++ b/wandb/debug-internal.log @@ -1 +1 @@ -run-20210714_213944-3j6d3fy2/logs/debug-internal.log \ No newline at end of file +run-20210714_225820-1dpoijkp/logs/debug-internal.log \ No newline at end of file diff --git a/wandb/debug.log b/wandb/debug.log index 45bcbcfacef708917c60d9ddf15accaea986a26b..e8517a240a8369bb78195e44a363d7213a488ab3 120000 --- a/wandb/debug.log +++ b/wandb/debug.log @@ -1 +1 @@ -run-20210714_213944-3j6d3fy2/logs/debug.log \ No newline at end of file +run-20210714_225820-1dpoijkp/logs/debug.log \ No newline at end of file diff --git a/wandb/latest-run b/wandb/latest-run index 4a6d8550545cd8bb5e78602e06fe9f18f56d8138..1dbf35e208de011d8d308816d17498c2c987b2fa 120000 --- a/wandb/latest-run +++ b/wandb/latest-run @@ -1 +1 @@ -run-20210714_213944-3j6d3fy2 \ No newline at end of file +run-20210714_225820-1dpoijkp \ No newline at end of file diff --git a/wandb/run-20210714_215408-3kpvz8se/files/config.yaml b/wandb/run-20210714_215408-3kpvz8se/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5e4f8c9eb1df3c3ab4c447f491abb8e10925bce1 --- /dev/null +++ b/wandb/run-20210714_215408-3kpvz8se/files/config.yaml @@ -0,0 +1,307 @@ +wandb_version: 1 + +_wandb: + desc: null + value: + cli_version: 0.10.33 + framework: huggingface + huggingface_version: 4.9.0.dev0 + is_jupyter_run: false + is_kaggle_kernel: false + python_version: 3.8.10 + t: + 1: + - 3 + - 11 + 2: + - 3 + - 11 + 4: 3.8.10 + 5: 0.10.33 + 6: 4.9.0.dev0 + 8: + - 5 +adafactor: + desc: null + value: false +adam_beta1: + desc: null + value: 0.9 +adam_beta2: + desc: null + value: 0.98 +adam_epsilon: + desc: null + value: 1.0e-08 +cache_dir: + desc: null + value: null +config_name: + desc: null + value: ./ +dataloader_drop_last: + desc: null + value: false +dataloader_num_workers: + desc: null + value: 0 +dataloader_pin_memory: + desc: null + value: true +dataset_config_name: + desc: null + value: null +dataset_name: + desc: null + value: null +ddp_find_unused_parameters: + desc: null + value: null +debug: + desc: null + value: [] +deepspeed: + desc: null + value: null +disable_tqdm: + desc: null + value: false +do_eval: + desc: null + value: false +do_predict: + desc: null + value: false +do_train: + desc: null + value: false +dtype: + desc: null + value: float32 +eval_accumulation_steps: + desc: null + value: null +eval_steps: + desc: null + value: 500 +evaluation_strategy: + desc: null + value: IntervalStrategy.NO +fp16: + desc: null + value: false +fp16_backend: + desc: null + value: auto +fp16_full_eval: + desc: null + value: false +fp16_opt_level: + desc: null + value: O1 +gradient_accumulation_steps: + desc: null + value: 2 +greater_is_better: + desc: null + value: null +group_by_length: + desc: null + value: false +ignore_data_skip: + desc: null + value: false +label_names: + desc: null + value: null +label_smoothing_factor: + desc: null + value: 0.0 +learning_rate: + desc: null + value: 5.0e-05 +length_column_name: + desc: null + value: length +line_by_line: + desc: null + value: false +load_best_model_at_end: + desc: null + value: false +local_rank: + desc: null + value: -1 +log_level: + desc: null + value: -1 +log_level_replica: + desc: null + value: -1 +log_on_each_node: + desc: null + value: true +logging_dir: + desc: null + value: ./runs/Jul14_21-54-01_t1v-n-f5c06ea1-w-0 +logging_first_step: + desc: null + value: false +logging_steps: + desc: null + value: 250 +logging_strategy: + desc: null + value: IntervalStrategy.STEPS +lr_scheduler_type: + desc: null + value: SchedulerType.LINEAR +max_eval_samples: + desc: null + value: 500 +max_grad_norm: + desc: null + value: 1.0 +max_seq_length: + desc: null + value: 4096 +max_steps: + desc: null + value: -1 +metric_for_best_model: + desc: null + value: null +mlm_probability: + desc: null + value: 0.15 +model_name_or_path: + desc: null + value: null +model_type: + desc: null + value: big_bird +mp_parameters: + desc: null + value: '' +no_cuda: + desc: null + value: false +num_train_epochs: + desc: null + value: 5.0 +output_dir: + desc: null + value: ./ +overwrite_cache: + desc: null + value: false +overwrite_output_dir: + desc: null + value: true +pad_to_max_length: + desc: null + value: false +past_index: + desc: null + value: -1 +per_device_eval_batch_size: + desc: null + value: 2 +per_device_train_batch_size: + desc: null + value: 2 +per_gpu_eval_batch_size: + desc: null + value: null +per_gpu_train_batch_size: + desc: null + value: null +prediction_loss_only: + desc: null + value: false +preprocessing_num_workers: + desc: null + value: 96 +push_to_hub: + desc: null + value: true +push_to_hub_model_id: + desc: null + value: '' +push_to_hub_organization: + desc: null + value: null +push_to_hub_token: + desc: null + value: null +remove_unused_columns: + desc: null + value: true +report_to: + desc: null + value: + - tensorboard + - wandb +resume_from_checkpoint: + desc: null + value: ./ +run_name: + desc: null + value: ./ +save_on_each_node: + desc: null + value: false +save_optimizer: + desc: null + value: true +save_steps: + desc: null + value: 20000 +save_strategy: + desc: null + value: IntervalStrategy.STEPS +save_total_limit: + desc: null + value: 5 +seed: + desc: null + value: 42 +sharded_ddp: + desc: null + value: [] +skip_memory_metrics: + desc: null + value: true +tokenizer_name: + desc: null + value: ./ +tpu_metrics_debug: + desc: null + value: false +tpu_num_cores: + desc: null + value: null +train_ref_file: + desc: null + value: null +use_fast_tokenizer: + desc: null + value: true +use_legacy_prediction_loop: + desc: null + value: false +validation_ref_file: + desc: null + value: null +validation_split_percentage: + desc: null + value: 5 +warmup_ratio: + desc: null + value: 0.0 +warmup_steps: + desc: null + value: 5000 +weight_decay: + desc: null + value: 0.0095 diff --git a/wandb/run-20210714_215408-3kpvz8se/files/output.log b/wandb/run-20210714_215408-3kpvz8se/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..81e24cc0e2316f8dbea7f40f5913602ccd2d07ac --- /dev/null +++ b/wandb/run-20210714_215408-3kpvz8se/files/output.log @@ -0,0 +1,15 @@ +[21:54:22] - INFO - absl - A polynomial schedule was set with a non-positive `transition_steps` value; this results in a constant schedule with value `init_value`. +/home/dat/pino/lib/python3.8/site-packages/jax/_src/numpy/lax_numpy.py:3132: UserWarning: Explicitly requested dtype requested in zeros is not available, and will be truncated to dtype int32. To enable more dtypes, set the jax_enable_x64 configuration option or the JAX_ENABLE_X64 shell environment variable. See https://github.com/google/jax#current-gotchas for more. + lax._check_user_dtype_supported(dtype, "zeros") +[21:54:23] - INFO - __main__ - RESTORING CHECKPOINT FROM ./... +tcmalloc: large alloc 1530273792 bytes == 0xd9eda000 @ 0x7f6c0ba41680 0x7f6c0ba62824 0x5f7b11 0x648631 0x5c38e6 0x4f30e6 0x64ee88 0x505653 0x56acb6 0x568d9a 0x5f5b33 0x56aadf 0x568d9a 0x68cdc7 0x67e161 0x67e1df 0x67e281 0x67e627 0x6b6e62 0x6b71ed 0x7f6c0b8560b3 0x5f96de +restoring state of multisteps optimizer +[21:54:26] - INFO - __main__ - checkpoint restored +Traceback (most recent call last): + File "./run_mlm_flax.py", line 712, in + state = restore_model_checkpoint(training_args.resume_from_checkpoint, state) + File "./run_mlm_flax.py", line 314, in restore_model_checkpoint + inner_opt_state = reinstantiate_states(opt_state.inner_opt_state) + File "./run_mlm_flax.py", line 294, in reinstantiate_states + cls = getattr(optax, type(state).__name__) +AttributeError: module 'optax' has no attribute 'list' \ No newline at end of file diff --git a/wandb/run-20210714_215408-3kpvz8se/files/requirements.txt b/wandb/run-20210714_215408-3kpvz8se/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..554e3a34bc91714a8462f65295a12e9a04537637 --- /dev/null +++ b/wandb/run-20210714_215408-3kpvz8se/files/requirements.txt @@ -0,0 +1,94 @@ +absl-py==0.13.0 +aiohttp==3.7.4.post0 +astunparse==1.6.3 +async-timeout==3.0.1 +attrs==21.2.0 +cachetools==4.2.2 +certifi==2021.5.30 +chardet==4.0.0 +charset-normalizer==2.0.1 +chex==0.0.8 +click==8.0.1 +configparser==5.0.2 +cycler==0.10.0 +datasets==1.9.1.dev0 +dill==0.3.4 +dm-tree==0.1.6 +docker-pycreds==0.4.0 +filelock==3.0.12 +flatbuffers==1.12 +flax==0.3.4 +fsspec==2021.7.0 +gast==0.4.0 +gitdb==4.0.7 +gitpython==3.1.18 +google-auth-oauthlib==0.4.4 +google-auth==1.32.1 +google-pasta==0.2.0 +grpcio==1.34.1 +h5py==3.1.0 +huggingface-hub==0.0.12 +idna==3.2 +install==1.3.4 +jax==0.2.17 +jaxlib==0.1.68 +joblib==1.0.1 +keras-nightly==2.5.0.dev2021032900 +keras-preprocessing==1.1.2 +kiwisolver==1.3.1 +libtpu-nightly==0.1.dev20210615 +markdown==3.3.4 +matplotlib==3.4.2 +msgpack==1.0.2 +multidict==5.1.0 +multiprocess==0.70.12.2 +numpy==1.19.5 +oauthlib==3.1.1 +opt-einsum==3.3.0 +optax==0.0.9 +packaging==21.0 +pandas==1.3.0 +pathtools==0.1.2 +pillow==8.3.1 +pip==20.0.2 +pkg-resources==0.0.0 +promise==2.3 +protobuf==3.17.3 +psutil==5.8.0 +pyarrow==4.0.1 +pyasn1-modules==0.2.8 +pyasn1==0.4.8 +pyparsing==2.4.7 +python-dateutil==2.8.1 +pytz==2021.1 +pyyaml==5.4.1 +regex==2021.7.6 +requests-oauthlib==1.3.0 +requests==2.26.0 +rsa==4.7.2 +sacremoses==0.0.45 +scipy==1.7.0 +sentry-sdk==1.3.0 +setuptools==44.0.0 +shortuuid==1.0.1 +six==1.15.0 +smmap==4.0.0 +subprocess32==3.5.4 +tensorboard-data-server==0.6.1 +tensorboard-plugin-wit==1.8.0 +tensorboard==2.5.0 +tensorflow-estimator==2.5.0 +tensorflow==2.5.0 +termcolor==1.1.0 +tokenizers==0.10.3 +toolz==0.11.1 +tqdm==4.61.2 +transformers==4.9.0.dev0 +typing-extensions==3.7.4.3 +urllib3==1.26.6 +wandb==0.10.33 +werkzeug==2.0.1 +wheel==0.36.2 +wrapt==1.12.1 +xxhash==2.0.2 +yarl==1.6.3 \ No newline at end of file diff --git a/wandb/run-20210714_215408-3kpvz8se/files/wandb-metadata.json b/wandb/run-20210714_215408-3kpvz8se/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..8143e2e01c1b02341caf74a62f4ef3be25518079 --- /dev/null +++ b/wandb/run-20210714_215408-3kpvz8se/files/wandb-metadata.json @@ -0,0 +1,48 @@ +{ + "os": "Linux-5.4.0-1043-gcp-x86_64-with-glibc2.29", + "python": "3.8.10", + "heartbeatAt": "2021-07-14T21:54:10.349764", + "startedAt": "2021-07-14T21:54:08.359450", + "docker": null, + "cpu_count": 96, + "cuda": null, + "args": [ + "--push_to_hub", + "--output_dir=./", + "--model_type=big_bird", + "--config_name=./", + "--tokenizer_name=./", + "--max_seq_length=4096", + "--weight_decay=0.0095", + "--warmup_steps=5000", + "--overwrite_output_dir", + "--adam_beta1=0.9", + "--adam_beta2=0.98", + "--logging_steps=250", + "--eval_steps=500", + "--num_train_epochs=5", + "--preprocessing_num_workers=96", + "--save_steps=20000", + "--learning_rate=5e-5", + "--per_device_train_batch_size=2", + "--per_device_eval_batch_size=2", + "--save_total_limit=5", + "--max_eval_samples=500", + "--overwrite_cache", + "False", + "--gradient_accumulation_steps=2", + "--resume_from_checkpoint=./" + ], + "state": "running", + "program": "./run_mlm_flax.py", + "codePath": "run_mlm_flax.py", + "git": { + "remote": "https://huggingface.co/flax-community/pino-roberta-base", + "commit": "de71755314d4aa3ab182072c6f0be3de9798a66b" + }, + "email": null, + "root": "/home/dat/pino-roberta-base", + "host": "t1v-n-f5c06ea1-w-0", + "username": "dat", + "executable": "/home/dat/pino/bin/python" +} diff --git a/wandb/run-20210714_215408-3kpvz8se/files/wandb-summary.json b/wandb/run-20210714_215408-3kpvz8se/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/wandb/run-20210714_215408-3kpvz8se/files/wandb-summary.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/wandb/run-20210714_215408-3kpvz8se/logs/debug-internal.log b/wandb/run-20210714_215408-3kpvz8se/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..da0d3865d081cc5d018fcf3b3f2a165eb6148a31 --- /dev/null +++ b/wandb/run-20210714_215408-3kpvz8se/logs/debug-internal.log @@ -0,0 +1,142 @@ +2021-07-14 21:54:09,008 INFO MainThread:588654 [internal.py:wandb_internal():88] W&B internal server running at pid: 588654, started at: 2021-07-14 21:54:09.008494 +2021-07-14 21:54:09,011 DEBUG HandlerThread:588654 [handler.py:handle_request():124] handle_request: check_version +2021-07-14 21:54:09,011 INFO WriterThread:588654 [datastore.py:open_for_write():80] open: /home/dat/pino-roberta-base/wandb/run-20210714_215408-3kpvz8se/run-3kpvz8se.wandb +2021-07-14 21:54:09,012 DEBUG SenderThread:588654 [sender.py:send():179] send: header +2021-07-14 21:54:09,012 DEBUG SenderThread:588654 [sender.py:send_request():193] send_request: check_version +2021-07-14 21:54:09,050 DEBUG SenderThread:588654 [sender.py:send():179] send: run +2021-07-14 21:54:09,234 INFO SenderThread:588654 [dir_watcher.py:__init__():168] watching files in: /home/dat/pino-roberta-base/wandb/run-20210714_215408-3kpvz8se/files +2021-07-14 21:54:09,234 INFO SenderThread:588654 [sender.py:_start_run_threads():716] run started: 3kpvz8se with start time 1626299648 +2021-07-14 21:54:09,234 DEBUG SenderThread:588654 [sender.py:send():179] send: summary +2021-07-14 21:54:09,234 INFO SenderThread:588654 [sender.py:_save_file():841] saving file wandb-summary.json with policy end +2021-07-14 21:54:09,235 DEBUG HandlerThread:588654 [handler.py:handle_request():124] handle_request: run_start +2021-07-14 21:54:10,238 INFO Thread-8 :588654 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210714_215408-3kpvz8se/files/wandb-summary.json +2021-07-14 21:54:10,349 DEBUG HandlerThread:588654 [meta.py:__init__():39] meta init +2021-07-14 21:54:10,349 DEBUG HandlerThread:588654 [meta.py:__init__():53] meta init done +2021-07-14 21:54:10,349 DEBUG HandlerThread:588654 [meta.py:probe():210] probe +2021-07-14 21:54:10,351 DEBUG HandlerThread:588654 [meta.py:_setup_git():200] setup git +2021-07-14 21:54:10,382 DEBUG HandlerThread:588654 [meta.py:_setup_git():207] setup git done +2021-07-14 21:54:10,382 DEBUG HandlerThread:588654 [meta.py:_save_pip():57] save pip +2021-07-14 21:54:10,382 DEBUG HandlerThread:588654 [meta.py:_save_pip():71] save pip done +2021-07-14 21:54:10,382 DEBUG HandlerThread:588654 [meta.py:probe():252] probe done +2021-07-14 21:54:10,386 DEBUG SenderThread:588654 [sender.py:send():179] send: files +2021-07-14 21:54:10,387 INFO SenderThread:588654 [sender.py:_save_file():841] saving file wandb-metadata.json with policy now +2021-07-14 21:54:10,394 DEBUG HandlerThread:588654 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 21:54:10,394 DEBUG SenderThread:588654 [sender.py:send_request():193] send_request: stop_status +2021-07-14 21:54:10,527 DEBUG SenderThread:588654 [sender.py:send():179] send: config +2021-07-14 21:54:10,527 DEBUG SenderThread:588654 [sender.py:send():179] send: config +2021-07-14 21:54:10,527 DEBUG SenderThread:588654 [sender.py:send():179] send: config +2021-07-14 21:54:11,237 INFO Thread-8 :588654 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210714_215408-3kpvz8se/files/requirements.txt +2021-07-14 21:54:11,237 INFO Thread-8 :588654 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210714_215408-3kpvz8se/files/wandb-metadata.json +2021-07-14 21:54:11,237 INFO Thread-8 :588654 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210714_215408-3kpvz8se/files/output.log +2021-07-14 21:54:11,254 INFO Thread-11 :588654 [upload_job.py:push():137] Uploaded file /tmp/tmp43phob9nwandb/1nzximp3-wandb-metadata.json +2021-07-14 21:54:25,243 INFO Thread-8 :588654 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_215408-3kpvz8se/files/output.log +2021-07-14 21:54:25,572 DEBUG HandlerThread:588654 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 21:54:25,573 DEBUG SenderThread:588654 [sender.py:send_request():193] send_request: stop_status +2021-07-14 21:54:28,245 INFO Thread-8 :588654 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_215408-3kpvz8se/files/output.log +2021-07-14 21:54:29,079 DEBUG HandlerThread:588654 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-14 21:54:29,079 DEBUG SenderThread:588654 [sender.py:send():179] send: telemetry +2021-07-14 21:54:29,079 DEBUG SenderThread:588654 [sender.py:send():179] send: exit +2021-07-14 21:54:29,079 INFO SenderThread:588654 [sender.py:send_exit():287] handling exit code: 1 +2021-07-14 21:54:29,080 INFO SenderThread:588654 [sender.py:send_exit():295] send defer +2021-07-14 21:54:29,080 DEBUG SenderThread:588654 [sender.py:send_request():193] send_request: poll_exit +2021-07-14 21:54:29,081 DEBUG HandlerThread:588654 [handler.py:handle_request():124] handle_request: defer +2021-07-14 21:54:29,081 INFO HandlerThread:588654 [handler.py:handle_request_defer():141] handle defer: 0 +2021-07-14 21:54:29,081 DEBUG SenderThread:588654 [sender.py:send_request():193] send_request: defer +2021-07-14 21:54:29,081 INFO SenderThread:588654 [sender.py:send_request_defer():304] handle sender defer: 0 +2021-07-14 21:54:29,081 INFO SenderThread:588654 [sender.py:transition_state():308] send defer: 1 +2021-07-14 21:54:29,081 DEBUG HandlerThread:588654 [handler.py:handle_request():124] handle_request: defer +2021-07-14 21:54:29,081 INFO HandlerThread:588654 [handler.py:handle_request_defer():141] handle defer: 1 +2021-07-14 21:54:29,111 DEBUG SenderThread:588654 [sender.py:send_request():193] send_request: defer +2021-07-14 21:54:29,111 INFO SenderThread:588654 [sender.py:send_request_defer():304] handle sender defer: 1 +2021-07-14 21:54:29,111 INFO SenderThread:588654 [sender.py:transition_state():308] send defer: 2 +2021-07-14 21:54:29,111 DEBUG SenderThread:588654 [sender.py:send():179] send: stats +2021-07-14 21:54:29,112 DEBUG HandlerThread:588654 [handler.py:handle_request():124] handle_request: defer +2021-07-14 21:54:29,112 INFO HandlerThread:588654 [handler.py:handle_request_defer():141] handle defer: 2 +2021-07-14 21:54:29,112 DEBUG SenderThread:588654 [sender.py:send_request():193] send_request: defer +2021-07-14 21:54:29,112 INFO SenderThread:588654 [sender.py:send_request_defer():304] handle sender defer: 2 +2021-07-14 21:54:29,112 INFO SenderThread:588654 [sender.py:transition_state():308] send defer: 3 +2021-07-14 21:54:29,112 DEBUG HandlerThread:588654 [handler.py:handle_request():124] handle_request: defer +2021-07-14 21:54:29,112 INFO HandlerThread:588654 [handler.py:handle_request_defer():141] handle defer: 3 +2021-07-14 21:54:29,113 DEBUG SenderThread:588654 [sender.py:send():179] send: summary +2021-07-14 21:54:29,113 INFO SenderThread:588654 [sender.py:_save_file():841] saving file wandb-summary.json with policy end +2021-07-14 21:54:29,113 DEBUG SenderThread:588654 [sender.py:send_request():193] send_request: defer +2021-07-14 21:54:29,113 INFO SenderThread:588654 [sender.py:send_request_defer():304] handle sender defer: 3 +2021-07-14 21:54:29,113 INFO SenderThread:588654 [sender.py:transition_state():308] send defer: 4 +2021-07-14 21:54:29,113 DEBUG HandlerThread:588654 [handler.py:handle_request():124] handle_request: defer +2021-07-14 21:54:29,113 INFO HandlerThread:588654 [handler.py:handle_request_defer():141] handle defer: 4 +2021-07-14 21:54:29,114 DEBUG SenderThread:588654 [sender.py:send_request():193] send_request: defer +2021-07-14 21:54:29,114 INFO SenderThread:588654 [sender.py:send_request_defer():304] handle sender defer: 4 +2021-07-14 21:54:29,182 DEBUG HandlerThread:588654 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-14 21:54:29,245 INFO Thread-8 :588654 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_215408-3kpvz8se/files/wandb-summary.json +2021-07-14 21:54:29,245 INFO Thread-8 :588654 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_215408-3kpvz8se/files/output.log +2021-07-14 21:54:29,299 INFO SenderThread:588654 [sender.py:transition_state():308] send defer: 5 +2021-07-14 21:54:29,299 DEBUG SenderThread:588654 [sender.py:send_request():193] send_request: poll_exit +2021-07-14 21:54:29,300 DEBUG HandlerThread:588654 [handler.py:handle_request():124] handle_request: defer +2021-07-14 21:54:29,300 INFO HandlerThread:588654 [handler.py:handle_request_defer():141] handle defer: 5 +2021-07-14 21:54:29,300 DEBUG SenderThread:588654 [sender.py:send_request():193] send_request: defer +2021-07-14 21:54:29,300 INFO SenderThread:588654 [sender.py:send_request_defer():304] handle sender defer: 5 +2021-07-14 21:54:29,300 INFO SenderThread:588654 [dir_watcher.py:finish():282] shutting down directory watcher +2021-07-14 21:54:29,401 DEBUG HandlerThread:588654 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-14 21:54:30,246 INFO Thread-8 :588654 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_215408-3kpvz8se/files/config.yaml +2021-07-14 21:54:30,246 INFO SenderThread:588654 [dir_watcher.py:finish():312] scan: /home/dat/pino-roberta-base/wandb/run-20210714_215408-3kpvz8se/files +2021-07-14 21:54:30,247 INFO SenderThread:588654 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210714_215408-3kpvz8se/files/requirements.txt requirements.txt +2021-07-14 21:54:30,247 INFO SenderThread:588654 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210714_215408-3kpvz8se/files/output.log output.log +2021-07-14 21:54:30,247 INFO SenderThread:588654 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210714_215408-3kpvz8se/files/wandb-metadata.json wandb-metadata.json +2021-07-14 21:54:30,247 INFO SenderThread:588654 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210714_215408-3kpvz8se/files/config.yaml config.yaml +2021-07-14 21:54:30,247 INFO SenderThread:588654 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210714_215408-3kpvz8se/files/wandb-summary.json wandb-summary.json +2021-07-14 21:54:30,248 INFO SenderThread:588654 [sender.py:transition_state():308] send defer: 6 +2021-07-14 21:54:30,248 DEBUG SenderThread:588654 [sender.py:send_request():193] send_request: poll_exit +2021-07-14 21:54:30,252 DEBUG HandlerThread:588654 [handler.py:handle_request():124] handle_request: defer +2021-07-14 21:54:30,252 INFO HandlerThread:588654 [handler.py:handle_request_defer():141] handle defer: 6 +2021-07-14 21:54:30,252 DEBUG SenderThread:588654 [sender.py:send_request():193] send_request: defer +2021-07-14 21:54:30,252 INFO SenderThread:588654 [sender.py:send_request_defer():304] handle sender defer: 6 +2021-07-14 21:54:30,252 INFO SenderThread:588654 [file_pusher.py:finish():177] shutting down file pusher +2021-07-14 21:54:30,350 DEBUG HandlerThread:588654 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-14 21:54:30,350 DEBUG SenderThread:588654 [sender.py:send_request():193] send_request: poll_exit +2021-07-14 21:54:30,452 DEBUG HandlerThread:588654 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-14 21:54:30,452 DEBUG SenderThread:588654 [sender.py:send_request():193] send_request: poll_exit +2021-07-14 21:54:30,553 DEBUG HandlerThread:588654 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-14 21:54:30,554 DEBUG SenderThread:588654 [sender.py:send_request():193] send_request: poll_exit +2021-07-14 21:54:30,655 DEBUG HandlerThread:588654 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-14 21:54:30,655 DEBUG SenderThread:588654 [sender.py:send_request():193] send_request: poll_exit +2021-07-14 21:54:30,699 INFO Thread-13 :588654 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210714_215408-3kpvz8se/files/output.log +2021-07-14 21:54:30,707 INFO Thread-12 :588654 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210714_215408-3kpvz8se/files/requirements.txt +2021-07-14 21:54:30,708 INFO Thread-15 :588654 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210714_215408-3kpvz8se/files/wandb-summary.json +2021-07-14 21:54:30,757 DEBUG HandlerThread:588654 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-14 21:54:30,757 DEBUG SenderThread:588654 [sender.py:send_request():193] send_request: poll_exit +2021-07-14 21:54:30,760 INFO Thread-14 :588654 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210714_215408-3kpvz8se/files/config.yaml +2021-07-14 21:54:30,858 DEBUG HandlerThread:588654 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-14 21:54:30,858 DEBUG SenderThread:588654 [sender.py:send_request():193] send_request: poll_exit +2021-07-14 21:54:30,960 DEBUG HandlerThread:588654 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-14 21:54:30,960 DEBUG SenderThread:588654 [sender.py:send_request():193] send_request: poll_exit +2021-07-14 21:54:30,960 INFO Thread-7 :588654 [sender.py:transition_state():308] send defer: 7 +2021-07-14 21:54:30,961 DEBUG HandlerThread:588654 [handler.py:handle_request():124] handle_request: defer +2021-07-14 21:54:30,961 INFO HandlerThread:588654 [handler.py:handle_request_defer():141] handle defer: 7 +2021-07-14 21:54:30,961 DEBUG SenderThread:588654 [sender.py:send_request():193] send_request: defer +2021-07-14 21:54:30,961 INFO SenderThread:588654 [sender.py:send_request_defer():304] handle sender defer: 7 +2021-07-14 21:54:31,062 DEBUG HandlerThread:588654 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-14 21:54:31,093 INFO SenderThread:588654 [sender.py:transition_state():308] send defer: 8 +2021-07-14 21:54:31,093 DEBUG SenderThread:588654 [sender.py:send_request():193] send_request: poll_exit +2021-07-14 21:54:31,093 DEBUG HandlerThread:588654 [handler.py:handle_request():124] handle_request: defer +2021-07-14 21:54:31,093 INFO HandlerThread:588654 [handler.py:handle_request_defer():141] handle defer: 8 +2021-07-14 21:54:31,093 DEBUG SenderThread:588654 [sender.py:send_request():193] send_request: defer +2021-07-14 21:54:31,094 INFO SenderThread:588654 [sender.py:send_request_defer():304] handle sender defer: 8 +2021-07-14 21:54:31,094 INFO SenderThread:588654 [sender.py:transition_state():308] send defer: 9 +2021-07-14 21:54:31,094 DEBUG SenderThread:588654 [sender.py:send():179] send: final +2021-07-14 21:54:31,094 DEBUG SenderThread:588654 [sender.py:send():179] send: footer +2021-07-14 21:54:31,094 DEBUG HandlerThread:588654 [handler.py:handle_request():124] handle_request: defer +2021-07-14 21:54:31,095 INFO HandlerThread:588654 [handler.py:handle_request_defer():141] handle defer: 9 +2021-07-14 21:54:31,095 DEBUG SenderThread:588654 [sender.py:send_request():193] send_request: defer +2021-07-14 21:54:31,095 INFO SenderThread:588654 [sender.py:send_request_defer():304] handle sender defer: 9 +2021-07-14 21:54:31,195 DEBUG HandlerThread:588654 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-14 21:54:31,195 DEBUG SenderThread:588654 [sender.py:send_request():193] send_request: poll_exit +2021-07-14 21:54:31,195 INFO SenderThread:588654 [file_pusher.py:join():182] waiting for file pusher +2021-07-14 21:54:31,196 DEBUG HandlerThread:588654 [handler.py:handle_request():124] handle_request: get_summary +2021-07-14 21:54:31,197 DEBUG HandlerThread:588654 [handler.py:handle_request():124] handle_request: sampled_history +2021-07-14 21:54:31,197 DEBUG HandlerThread:588654 [handler.py:handle_request():124] handle_request: shutdown +2021-07-14 21:54:31,197 INFO HandlerThread:588654 [handler.py:finish():638] shutting down handler +2021-07-14 21:54:32,095 INFO WriterThread:588654 [datastore.py:close():288] close: /home/dat/pino-roberta-base/wandb/run-20210714_215408-3kpvz8se/run-3kpvz8se.wandb +2021-07-14 21:54:32,195 INFO SenderThread:588654 [sender.py:finish():945] shutting down sender +2021-07-14 21:54:32,196 INFO SenderThread:588654 [file_pusher.py:finish():177] shutting down file pusher +2021-07-14 21:54:32,196 INFO SenderThread:588654 [file_pusher.py:join():182] waiting for file pusher +2021-07-14 21:54:32,198 INFO MainThread:588654 [internal.py:handle_exit():78] Internal process exited diff --git a/wandb/run-20210714_215408-3kpvz8se/logs/debug.log b/wandb/run-20210714_215408-3kpvz8se/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..e357f51f4a7d198fdc96e3784e3f49f0f8ff66dc --- /dev/null +++ b/wandb/run-20210714_215408-3kpvz8se/logs/debug.log @@ -0,0 +1,127 @@ +2021-07-14 21:54:08,360 INFO MainThread:587396 [wandb_setup.py:_flush():69] setting env: {} +2021-07-14 21:54:08,361 INFO MainThread:587396 [wandb_setup.py:_flush():69] setting login settings: {} +2021-07-14 21:54:08,361 INFO MainThread:587396 [wandb_init.py:_log_setup():337] Logging user logs to /home/dat/pino-roberta-base/wandb/run-20210714_215408-3kpvz8se/logs/debug.log +2021-07-14 21:54:08,361 INFO MainThread:587396 [wandb_init.py:_log_setup():338] Logging internal logs to /home/dat/pino-roberta-base/wandb/run-20210714_215408-3kpvz8se/logs/debug-internal.log +2021-07-14 21:54:08,361 INFO MainThread:587396 [wandb_init.py:init():370] calling init triggers +2021-07-14 21:54:08,361 INFO MainThread:587396 [wandb_init.py:init():375] wandb.init called with sweep_config: {} +config: {} +2021-07-14 21:54:08,361 INFO MainThread:587396 [wandb_init.py:init():419] starting backend +2021-07-14 21:54:08,361 INFO MainThread:587396 [backend.py:_multiprocessing_setup():70] multiprocessing start_methods=fork,spawn,forkserver, using: spawn +2021-07-14 21:54:08,399 INFO MainThread:587396 [backend.py:ensure_launched():135] starting backend process... +2021-07-14 21:54:08,437 INFO MainThread:587396 [backend.py:ensure_launched():139] started backend process with pid: 588654 +2021-07-14 21:54:08,438 INFO MainThread:587396 [wandb_init.py:init():424] backend started and connected +2021-07-14 21:54:08,441 INFO MainThread:587396 [wandb_init.py:init():472] updated telemetry +2021-07-14 21:54:08,442 INFO MainThread:587396 [wandb_init.py:init():491] communicating current version +2021-07-14 21:54:09,049 INFO MainThread:587396 [wandb_init.py:init():496] got version response +2021-07-14 21:54:09,049 INFO MainThread:587396 [wandb_init.py:init():504] communicating run to backend with 30 second timeout +2021-07-14 21:54:09,234 INFO MainThread:587396 [wandb_init.py:init():529] starting run threads in backend +2021-07-14 21:54:10,390 INFO MainThread:587396 [wandb_run.py:_console_start():1623] atexit reg +2021-07-14 21:54:10,390 INFO MainThread:587396 [wandb_run.py:_redirect():1497] redirect: SettingsConsole.REDIRECT +2021-07-14 21:54:10,391 INFO MainThread:587396 [wandb_run.py:_redirect():1502] Redirecting console. +2021-07-14 21:54:10,393 INFO MainThread:587396 [wandb_run.py:_redirect():1558] Redirects installed. +2021-07-14 21:54:10,393 INFO MainThread:587396 [wandb_init.py:init():554] run started, returning control to user process +2021-07-14 21:54:10,399 INFO MainThread:587396 [wandb_run.py:_config_callback():872] config_cb None None {'output_dir': './', 'overwrite_output_dir': True, 'do_train': False, 'do_eval': False, 'do_predict': False, 'evaluation_strategy': 'IntervalStrategy.NO', 'prediction_loss_only': False, 'per_device_train_batch_size': 2, 'per_device_eval_batch_size': 2, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 2, 'eval_accumulation_steps': None, 'learning_rate': 5e-05, 'weight_decay': 0.0095, 'adam_beta1': 0.9, 'adam_beta2': 0.98, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 5.0, 'max_steps': -1, 'lr_scheduler_type': 'SchedulerType.LINEAR', 'warmup_ratio': 0.0, 'warmup_steps': 5000, 'log_level': -1, 'log_level_replica': -1, 'log_on_each_node': True, 'logging_dir': './runs/Jul14_21-54-01_t1v-n-f5c06ea1-w-0', 'logging_strategy': 'IntervalStrategy.STEPS', 'logging_first_step': False, 'logging_steps': 250, 'save_strategy': 'IntervalStrategy.STEPS', 'save_steps': 20000, 'save_total_limit': 5, 'save_on_each_node': False, 'no_cuda': False, 'seed': 42, 'fp16': False, 'fp16_opt_level': 'O1', 'fp16_backend': 'auto', 'fp16_full_eval': False, 'local_rank': -1, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 500, 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': './', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'sharded_ddp': [], 'deepspeed': None, 'label_smoothing_factor': 0.0, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'dataloader_pin_memory': True, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': './', 'push_to_hub_model_id': '', 'push_to_hub_organization': None, 'push_to_hub_token': None, 'mp_parameters': ''} +2021-07-14 21:54:10,401 INFO MainThread:587396 [wandb_run.py:_config_callback():872] config_cb None None {'model_name_or_path': None, 'model_type': 'big_bird', 'config_name': './', 'tokenizer_name': './', 'cache_dir': None, 'use_fast_tokenizer': True, 'dtype': 'float32', 'save_optimizer': True} +2021-07-14 21:54:10,402 INFO MainThread:587396 [wandb_run.py:_config_callback():872] config_cb None None {'dataset_name': None, 'dataset_config_name': None, 'train_ref_file': None, 'validation_ref_file': None, 'overwrite_cache': False, 'validation_split_percentage': 5, 'max_seq_length': 4096, 'preprocessing_num_workers': 96, 'mlm_probability': 0.15, 'pad_to_max_length': False, 'line_by_line': False, 'max_eval_samples': 500} +2021-07-14 21:54:26,346 INFO MainThread:587396 [wandb_run.py:_atexit_cleanup():1593] got exitcode: 1 +2021-07-14 21:54:26,347 INFO MainThread:587396 [wandb_run.py:_restore():1565] restore +2021-07-14 21:54:29,081 INFO MainThread:587396 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 1 +} +pusher_stats { + uploaded_bytes: 1483 + total_bytes: 1483 +} + +2021-07-14 21:54:29,300 INFO MainThread:587396 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 1 +} +pusher_stats { + uploaded_bytes: 1483 + total_bytes: 1483 +} + +2021-07-14 21:54:30,248 INFO MainThread:587396 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 2 +} +pusher_stats { + uploaded_bytes: 1483 + total_bytes: 3133 +} + +2021-07-14 21:54:30,351 INFO MainThread:587396 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 1483 + total_bytes: 9257 +} + +2021-07-14 21:54:30,452 INFO MainThread:587396 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 9257 + total_bytes: 9257 +} + +2021-07-14 21:54:30,554 INFO MainThread:587396 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 9257 + total_bytes: 9257 +} + +2021-07-14 21:54:30,656 INFO MainThread:587396 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 9257 + total_bytes: 9257 +} + +2021-07-14 21:54:30,757 INFO MainThread:587396 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 9257 + total_bytes: 9257 +} + +2021-07-14 21:54:30,859 INFO MainThread:587396 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 9257 + total_bytes: 9257 +} + +2021-07-14 21:54:30,961 INFO MainThread:587396 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 9257 + total_bytes: 9257 +} + +2021-07-14 21:54:31,093 INFO MainThread:587396 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 9257 + total_bytes: 9257 +} + +2021-07-14 21:54:31,196 INFO MainThread:587396 [wandb_run.py:_wait_for_finish():1715] got exit ret: done: true +exit_result { +} +file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 9257 + total_bytes: 9257 +} + +2021-07-14 21:54:32,452 INFO MainThread:587396 [wandb_run.py:_show_files():1937] logging synced files diff --git a/wandb/run-20210714_215408-3kpvz8se/run-3kpvz8se.wandb b/wandb/run-20210714_215408-3kpvz8se/run-3kpvz8se.wandb new file mode 100644 index 0000000000000000000000000000000000000000..0bdbc7745e71c9d08bd57e16c7d19f292b7ade57 Binary files /dev/null and b/wandb/run-20210714_215408-3kpvz8se/run-3kpvz8se.wandb differ diff --git a/wandb/run-20210714_221920-s091gfok/files/config.yaml b/wandb/run-20210714_221920-s091gfok/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a5f38e62d7c045b21f3971aa3cf82bdeed351f01 --- /dev/null +++ b/wandb/run-20210714_221920-s091gfok/files/config.yaml @@ -0,0 +1,307 @@ +wandb_version: 1 + +_wandb: + desc: null + value: + cli_version: 0.10.33 + framework: huggingface + huggingface_version: 4.9.0.dev0 + is_jupyter_run: false + is_kaggle_kernel: false + python_version: 3.8.10 + t: + 1: + - 3 + - 11 + 2: + - 3 + - 11 + 4: 3.8.10 + 5: 0.10.33 + 6: 4.9.0.dev0 + 8: + - 5 +adafactor: + desc: null + value: false +adam_beta1: + desc: null + value: 0.9 +adam_beta2: + desc: null + value: 0.98 +adam_epsilon: + desc: null + value: 1.0e-08 +cache_dir: + desc: null + value: null +config_name: + desc: null + value: ./ +dataloader_drop_last: + desc: null + value: false +dataloader_num_workers: + desc: null + value: 0 +dataloader_pin_memory: + desc: null + value: true +dataset_config_name: + desc: null + value: null +dataset_name: + desc: null + value: null +ddp_find_unused_parameters: + desc: null + value: null +debug: + desc: null + value: [] +deepspeed: + desc: null + value: null +disable_tqdm: + desc: null + value: false +do_eval: + desc: null + value: false +do_predict: + desc: null + value: false +do_train: + desc: null + value: false +dtype: + desc: null + value: float32 +eval_accumulation_steps: + desc: null + value: null +eval_steps: + desc: null + value: 20000 +evaluation_strategy: + desc: null + value: IntervalStrategy.NO +fp16: + desc: null + value: false +fp16_backend: + desc: null + value: auto +fp16_full_eval: + desc: null + value: false +fp16_opt_level: + desc: null + value: O1 +gradient_accumulation_steps: + desc: null + value: 2 +greater_is_better: + desc: null + value: null +group_by_length: + desc: null + value: false +ignore_data_skip: + desc: null + value: false +label_names: + desc: null + value: null +label_smoothing_factor: + desc: null + value: 0.0 +learning_rate: + desc: null + value: 5.0e-05 +length_column_name: + desc: null + value: length +line_by_line: + desc: null + value: false +load_best_model_at_end: + desc: null + value: false +local_rank: + desc: null + value: -1 +log_level: + desc: null + value: -1 +log_level_replica: + desc: null + value: -1 +log_on_each_node: + desc: null + value: true +logging_dir: + desc: null + value: ./runs/Jul14_22-19-13_t1v-n-f5c06ea1-w-0 +logging_first_step: + desc: null + value: false +logging_steps: + desc: null + value: 500 +logging_strategy: + desc: null + value: IntervalStrategy.STEPS +lr_scheduler_type: + desc: null + value: SchedulerType.LINEAR +max_eval_samples: + desc: null + value: 2000 +max_grad_norm: + desc: null + value: 1.0 +max_seq_length: + desc: null + value: 4096 +max_steps: + desc: null + value: -1 +metric_for_best_model: + desc: null + value: null +mlm_probability: + desc: null + value: 0.15 +model_name_or_path: + desc: null + value: null +model_type: + desc: null + value: big_bird +mp_parameters: + desc: null + value: '' +no_cuda: + desc: null + value: false +num_train_epochs: + desc: null + value: 5.0 +output_dir: + desc: null + value: ./ +overwrite_cache: + desc: null + value: false +overwrite_output_dir: + desc: null + value: true +pad_to_max_length: + desc: null + value: false +past_index: + desc: null + value: -1 +per_device_eval_batch_size: + desc: null + value: 2 +per_device_train_batch_size: + desc: null + value: 2 +per_gpu_eval_batch_size: + desc: null + value: null +per_gpu_train_batch_size: + desc: null + value: null +prediction_loss_only: + desc: null + value: false +preprocessing_num_workers: + desc: null + value: 96 +push_to_hub: + desc: null + value: true +push_to_hub_model_id: + desc: null + value: '' +push_to_hub_organization: + desc: null + value: null +push_to_hub_token: + desc: null + value: null +remove_unused_columns: + desc: null + value: true +report_to: + desc: null + value: + - tensorboard + - wandb +resume_from_checkpoint: + desc: null + value: null +run_name: + desc: null + value: ./ +save_on_each_node: + desc: null + value: false +save_optimizer: + desc: null + value: true +save_steps: + desc: null + value: 20000 +save_strategy: + desc: null + value: IntervalStrategy.STEPS +save_total_limit: + desc: null + value: 5 +seed: + desc: null + value: 42 +sharded_ddp: + desc: null + value: [] +skip_memory_metrics: + desc: null + value: true +tokenizer_name: + desc: null + value: ./ +tpu_metrics_debug: + desc: null + value: false +tpu_num_cores: + desc: null + value: null +train_ref_file: + desc: null + value: null +use_fast_tokenizer: + desc: null + value: true +use_legacy_prediction_loop: + desc: null + value: false +validation_ref_file: + desc: null + value: null +validation_split_percentage: + desc: null + value: 5 +warmup_ratio: + desc: null + value: 0.0 +warmup_steps: + desc: null + value: 5000 +weight_decay: + desc: null + value: 0.0095 diff --git a/wandb/run-20210714_221920-s091gfok/files/output.log b/wandb/run-20210714_221920-s091gfok/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..e441205529ca0ffa15924508036060d0b6bdcea5 --- /dev/null +++ b/wandb/run-20210714_221920-s091gfok/files/output.log @@ -0,0 +1,39 @@ +/home/dat/pino/lib/python3.8/site-packages/jax/_src/numpy/lax_numpy.py:3132: UserWarning: Explicitly requested dtype requested in zeros is not available, and will be truncated to dtype int32. To enable more dtypes, set the jax_enable_x64 configuration option or the JAX_ENABLE_X64 shell environment variable. See https://github.com/google/jax#current-gotchas for more. + lax._check_user_dtype_supported(dtype, "zeros") +/home/dat/pino/lib/python3.8/site-packages/jax/lib/xla_bridge.py:386: UserWarning: jax.host_count has been renamed to jax.process_count. This alias will eventually be removed; please update your code. + warnings.warn( +/home/dat/pino/lib/python3.8/site-packages/jax/lib/xla_bridge.py:373: UserWarning: jax.host_id has been renamed to jax.process_index. This alias will eventually be removed; please update your code. + warnings.warn( +Epoch ... (1/5): 0%| | 0/5 [00:00 + state, train_metric, dropout_rngs = p_train_step(state, model_inputs, dropout_rngs) + File "/home/dat/pino/lib/python3.8/site-packages/jax/_src/traceback_util.py", line 183, in reraise_with_filtered_traceback + return fun(*args, **kwargs) + File "/home/dat/pino/lib/python3.8/site-packages/jax/_src/api.py", line 1669, in f_pmapped + out = pxla.xla_pmap( + File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 1620, in bind + return call_bind(self, fun, *args, **params) + File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 1551, in call_bind + outs = primitive.process(top_trace, fun, tracers, params) + File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 1623, in process + return trace.process_map(self, fun, tracers, params) + File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 606, in process_call + return primitive.impl(f, *tracers, **params) + File "/home/dat/pino/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 637, in xla_pmap_impl + return compiled_fun(*args) + File "/home/dat/pino/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 1159, in execute_replicated + out_bufs = compiled.execute_sharded_on_local_devices(input_bufs) +jax._src.traceback_util.UnfilteredStackTrace: RuntimeError: Resource exhausted: Attempting to reserve 12.60G at the bottom of memory. That was not possible. There are 5.86G free, 0B reserved, and 5.65G reservable.: while running replica 0 and partition 0 of a replicated computation (other replicas may have failed as well). +The stack trace below excludes JAX-internal frames. +The preceding is the original exception that occurred, unmodified. +-------------------- +The above exception was the direct cause of the following exception: +Traceback (most recent call last): + File "./run_mlm_flax.py", line 804, in + state, train_metric, dropout_rngs = p_train_step(state, model_inputs, dropout_rngs) + File "/home/dat/pino/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 1159, in execute_replicated + out_bufs = compiled.execute_sharded_on_local_devices(input_bufs) +RuntimeError: Resource exhausted: Attempting to reserve 12.60G at the bottom of memory. That was not possible. There are 5.86G free, 0B reserved, and 5.65G reservable.: while running replica 0 and partition 0 of a replicated computation (other replicas may have failed as well). \ No newline at end of file diff --git a/wandb/run-20210714_221920-s091gfok/files/requirements.txt b/wandb/run-20210714_221920-s091gfok/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..554e3a34bc91714a8462f65295a12e9a04537637 --- /dev/null +++ b/wandb/run-20210714_221920-s091gfok/files/requirements.txt @@ -0,0 +1,94 @@ +absl-py==0.13.0 +aiohttp==3.7.4.post0 +astunparse==1.6.3 +async-timeout==3.0.1 +attrs==21.2.0 +cachetools==4.2.2 +certifi==2021.5.30 +chardet==4.0.0 +charset-normalizer==2.0.1 +chex==0.0.8 +click==8.0.1 +configparser==5.0.2 +cycler==0.10.0 +datasets==1.9.1.dev0 +dill==0.3.4 +dm-tree==0.1.6 +docker-pycreds==0.4.0 +filelock==3.0.12 +flatbuffers==1.12 +flax==0.3.4 +fsspec==2021.7.0 +gast==0.4.0 +gitdb==4.0.7 +gitpython==3.1.18 +google-auth-oauthlib==0.4.4 +google-auth==1.32.1 +google-pasta==0.2.0 +grpcio==1.34.1 +h5py==3.1.0 +huggingface-hub==0.0.12 +idna==3.2 +install==1.3.4 +jax==0.2.17 +jaxlib==0.1.68 +joblib==1.0.1 +keras-nightly==2.5.0.dev2021032900 +keras-preprocessing==1.1.2 +kiwisolver==1.3.1 +libtpu-nightly==0.1.dev20210615 +markdown==3.3.4 +matplotlib==3.4.2 +msgpack==1.0.2 +multidict==5.1.0 +multiprocess==0.70.12.2 +numpy==1.19.5 +oauthlib==3.1.1 +opt-einsum==3.3.0 +optax==0.0.9 +packaging==21.0 +pandas==1.3.0 +pathtools==0.1.2 +pillow==8.3.1 +pip==20.0.2 +pkg-resources==0.0.0 +promise==2.3 +protobuf==3.17.3 +psutil==5.8.0 +pyarrow==4.0.1 +pyasn1-modules==0.2.8 +pyasn1==0.4.8 +pyparsing==2.4.7 +python-dateutil==2.8.1 +pytz==2021.1 +pyyaml==5.4.1 +regex==2021.7.6 +requests-oauthlib==1.3.0 +requests==2.26.0 +rsa==4.7.2 +sacremoses==0.0.45 +scipy==1.7.0 +sentry-sdk==1.3.0 +setuptools==44.0.0 +shortuuid==1.0.1 +six==1.15.0 +smmap==4.0.0 +subprocess32==3.5.4 +tensorboard-data-server==0.6.1 +tensorboard-plugin-wit==1.8.0 +tensorboard==2.5.0 +tensorflow-estimator==2.5.0 +tensorflow==2.5.0 +termcolor==1.1.0 +tokenizers==0.10.3 +toolz==0.11.1 +tqdm==4.61.2 +transformers==4.9.0.dev0 +typing-extensions==3.7.4.3 +urllib3==1.26.6 +wandb==0.10.33 +werkzeug==2.0.1 +wheel==0.36.2 +wrapt==1.12.1 +xxhash==2.0.2 +yarl==1.6.3 \ No newline at end of file diff --git a/wandb/run-20210714_221920-s091gfok/files/wandb-metadata.json b/wandb/run-20210714_221920-s091gfok/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..e9dbc82a8c8cfa1046b18ec5cbde74915d1ae8d6 --- /dev/null +++ b/wandb/run-20210714_221920-s091gfok/files/wandb-metadata.json @@ -0,0 +1,47 @@ +{ + "os": "Linux-5.4.0-1043-gcp-x86_64-with-glibc2.29", + "python": "3.8.10", + "heartbeatAt": "2021-07-14T22:19:22.632871", + "startedAt": "2021-07-14T22:19:20.670815", + "docker": null, + "cpu_count": 96, + "cuda": null, + "args": [ + "--push_to_hub", + "--output_dir=./", + "--model_type=big_bird", + "--config_name=./", + "--tokenizer_name=./", + "--max_seq_length=4096", + "--weight_decay=0.0095", + "--warmup_steps=5000", + "--overwrite_output_dir", + "--adam_beta1=0.9", + "--adam_beta2=0.98", + "--logging_steps=500", + "--eval_steps=20000", + "--num_train_epochs=5", + "--preprocessing_num_workers=96", + "--save_steps=20000", + "--learning_rate=5e-5", + "--per_device_train_batch_size=2", + "--per_device_eval_batch_size=2", + "--save_total_limit=5", + "--max_eval_samples=2000", + "--overwrite_cache", + "False", + "--gradient_accumulation_steps=2" + ], + "state": "running", + "program": "./run_mlm_flax.py", + "codePath": "run_mlm_flax.py", + "git": { + "remote": "https://huggingface.co/flax-community/pino-roberta-base", + "commit": "de71755314d4aa3ab182072c6f0be3de9798a66b" + }, + "email": null, + "root": "/home/dat/pino-roberta-base", + "host": "t1v-n-f5c06ea1-w-0", + "username": "dat", + "executable": "/home/dat/pino/bin/python" +} diff --git a/wandb/run-20210714_221920-s091gfok/files/wandb-summary.json b/wandb/run-20210714_221920-s091gfok/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/wandb/run-20210714_221920-s091gfok/files/wandb-summary.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/wandb/run-20210714_221920-s091gfok/logs/debug-internal.log b/wandb/run-20210714_221920-s091gfok/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..fb225af51e086e160b2d9f7fb200f11f86a60b7e --- /dev/null +++ b/wandb/run-20210714_221920-s091gfok/logs/debug-internal.log @@ -0,0 +1,233 @@ +2021-07-14 22:19:21,314 INFO MainThread:593294 [internal.py:wandb_internal():88] W&B internal server running at pid: 593294, started at: 2021-07-14 22:19:21.314432 +2021-07-14 22:19:21,317 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: check_version +2021-07-14 22:19:21,317 INFO WriterThread:593294 [datastore.py:open_for_write():80] open: /home/dat/pino-roberta-base/wandb/run-20210714_221920-s091gfok/run-s091gfok.wandb +2021-07-14 22:19:21,318 DEBUG SenderThread:593294 [sender.py:send():179] send: header +2021-07-14 22:19:21,318 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: check_version +2021-07-14 22:19:21,357 DEBUG SenderThread:593294 [sender.py:send():179] send: run +2021-07-14 22:19:21,536 INFO SenderThread:593294 [dir_watcher.py:__init__():168] watching files in: /home/dat/pino-roberta-base/wandb/run-20210714_221920-s091gfok/files +2021-07-14 22:19:21,536 INFO SenderThread:593294 [sender.py:_start_run_threads():716] run started: s091gfok with start time 1626301160 +2021-07-14 22:19:21,536 DEBUG SenderThread:593294 [sender.py:send():179] send: summary +2021-07-14 22:19:21,537 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: run_start +2021-07-14 22:19:21,537 INFO SenderThread:593294 [sender.py:_save_file():841] saving file wandb-summary.json with policy end +2021-07-14 22:19:22,539 INFO Thread-8 :593294 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210714_221920-s091gfok/files/wandb-summary.json +2021-07-14 22:19:22,632 DEBUG HandlerThread:593294 [meta.py:__init__():39] meta init +2021-07-14 22:19:22,632 DEBUG HandlerThread:593294 [meta.py:__init__():53] meta init done +2021-07-14 22:19:22,632 DEBUG HandlerThread:593294 [meta.py:probe():210] probe +2021-07-14 22:19:22,634 DEBUG HandlerThread:593294 [meta.py:_setup_git():200] setup git +2021-07-14 22:19:22,663 DEBUG HandlerThread:593294 [meta.py:_setup_git():207] setup git done +2021-07-14 22:19:22,663 DEBUG HandlerThread:593294 [meta.py:_save_pip():57] save pip +2021-07-14 22:19:22,664 DEBUG HandlerThread:593294 [meta.py:_save_pip():71] save pip done +2021-07-14 22:19:22,664 DEBUG HandlerThread:593294 [meta.py:probe():252] probe done +2021-07-14 22:19:22,667 DEBUG SenderThread:593294 [sender.py:send():179] send: files +2021-07-14 22:19:22,667 INFO SenderThread:593294 [sender.py:_save_file():841] saving file wandb-metadata.json with policy now +2021-07-14 22:19:22,674 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 22:19:22,674 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: stop_status +2021-07-14 22:19:22,802 DEBUG SenderThread:593294 [sender.py:send():179] send: config +2021-07-14 22:19:22,803 DEBUG SenderThread:593294 [sender.py:send():179] send: config +2021-07-14 22:19:22,803 DEBUG SenderThread:593294 [sender.py:send():179] send: config +2021-07-14 22:19:23,119 INFO Thread-11 :593294 [upload_job.py:push():137] Uploaded file /tmp/tmpn0n6xzzmwandb/2vhpic31-wandb-metadata.json +2021-07-14 22:19:23,537 INFO Thread-8 :593294 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210714_221920-s091gfok/files/wandb-metadata.json +2021-07-14 22:19:23,537 INFO Thread-8 :593294 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210714_221920-s091gfok/files/requirements.txt +2021-07-14 22:19:23,538 INFO Thread-8 :593294 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210714_221920-s091gfok/files/output.log +2021-07-14 22:19:37,543 INFO Thread-8 :593294 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_221920-s091gfok/files/output.log +2021-07-14 22:19:37,804 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 22:19:37,804 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: stop_status +2021-07-14 22:19:39,545 INFO Thread-8 :593294 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_221920-s091gfok/files/output.log +2021-07-14 22:19:50,715 DEBUG SenderThread:593294 [sender.py:send():179] send: stats +2021-07-14 22:19:52,550 INFO Thread-8 :593294 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_221920-s091gfok/files/config.yaml +2021-07-14 22:19:52,936 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 22:19:52,936 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: stop_status +2021-07-14 22:20:08,079 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 22:20:08,080 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: stop_status +2021-07-14 22:20:20,789 DEBUG SenderThread:593294 [sender.py:send():179] send: stats +2021-07-14 22:20:23,215 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 22:20:23,215 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: stop_status +2021-07-14 22:20:38,362 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 22:20:38,363 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: stop_status +2021-07-14 22:20:50,861 DEBUG SenderThread:593294 [sender.py:send():179] send: stats +2021-07-14 22:20:53,496 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 22:20:53,496 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: stop_status +2021-07-14 22:21:08,625 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 22:21:08,625 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: stop_status +2021-07-14 22:21:20,932 DEBUG SenderThread:593294 [sender.py:send():179] send: stats +2021-07-14 22:21:23,756 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 22:21:23,757 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: stop_status +2021-07-14 22:21:38,885 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 22:21:38,886 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: stop_status +2021-07-14 22:21:50,997 DEBUG SenderThread:593294 [sender.py:send():179] send: stats +2021-07-14 22:21:54,016 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 22:21:54,016 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: stop_status +2021-07-14 22:22:09,146 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 22:22:09,147 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: stop_status +2021-07-14 22:22:21,114 DEBUG SenderThread:593294 [sender.py:send():179] send: stats +2021-07-14 22:22:24,279 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 22:22:24,279 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: stop_status +2021-07-14 22:22:39,412 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 22:22:39,413 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: stop_status +2021-07-14 22:22:51,192 DEBUG SenderThread:593294 [sender.py:send():179] send: stats +2021-07-14 22:22:54,548 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 22:22:54,548 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: stop_status +2021-07-14 22:23:09,678 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 22:23:09,678 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: stop_status +2021-07-14 22:23:21,267 DEBUG SenderThread:593294 [sender.py:send():179] send: stats +2021-07-14 22:23:24,814 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 22:23:24,814 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: stop_status +2021-07-14 22:23:39,949 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 22:23:39,949 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: stop_status +2021-07-14 22:23:51,337 DEBUG SenderThread:593294 [sender.py:send():179] send: stats +2021-07-14 22:23:55,081 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 22:23:55,082 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: stop_status +2021-07-14 22:24:10,212 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 22:24:10,212 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: stop_status +2021-07-14 22:24:21,405 DEBUG SenderThread:593294 [sender.py:send():179] send: stats +2021-07-14 22:24:25,345 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 22:24:25,346 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: stop_status +2021-07-14 22:24:40,483 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 22:24:40,483 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: stop_status +2021-07-14 22:24:51,475 DEBUG SenderThread:593294 [sender.py:send():179] send: stats +2021-07-14 22:24:55,615 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 22:24:55,615 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: stop_status +2021-07-14 22:25:10,746 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 22:25:10,746 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: stop_status +2021-07-14 22:25:21,548 DEBUG SenderThread:593294 [sender.py:send():179] send: stats +2021-07-14 22:25:25,876 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 22:25:25,876 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: stop_status +2021-07-14 22:25:41,015 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 22:25:41,016 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: stop_status +2021-07-14 22:25:51,619 DEBUG SenderThread:593294 [sender.py:send():179] send: stats +2021-07-14 22:25:56,148 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 22:25:56,148 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: stop_status +2021-07-14 22:26:11,280 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 22:26:11,280 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: stop_status +2021-07-14 22:26:21,695 DEBUG SenderThread:593294 [sender.py:send():179] send: stats +2021-07-14 22:26:26,412 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 22:26:26,413 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: stop_status +2021-07-14 22:26:41,546 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 22:26:41,547 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: stop_status +2021-07-14 22:26:51,772 DEBUG SenderThread:593294 [sender.py:send():179] send: stats +2021-07-14 22:26:56,683 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 22:26:56,683 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: stop_status +2021-07-14 22:27:11,816 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 22:27:11,816 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: stop_status +2021-07-14 22:27:21,849 DEBUG SenderThread:593294 [sender.py:send():179] send: stats +2021-07-14 22:27:26,950 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 22:27:26,950 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: stop_status +2021-07-14 22:27:29,710 INFO Thread-8 :593294 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_221920-s091gfok/files/output.log +2021-07-14 22:27:42,097 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 22:27:42,097 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: stop_status +2021-07-14 22:27:51,925 DEBUG SenderThread:593294 [sender.py:send():179] send: stats +2021-07-14 22:27:57,249 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 22:27:57,250 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: stop_status +2021-07-14 22:28:12,383 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 22:28:12,384 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: stop_status +2021-07-14 22:28:22,007 DEBUG SenderThread:593294 [sender.py:send():179] send: stats +2021-07-14 22:28:27,521 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 22:28:27,522 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: stop_status +2021-07-14 22:28:42,658 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 22:28:42,658 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: stop_status +2021-07-14 22:28:49,741 INFO Thread-8 :593294 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_221920-s091gfok/files/output.log +2021-07-14 22:28:51,116 DEBUG SenderThread:593294 [sender.py:send():179] send: telemetry +2021-07-14 22:28:51,116 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-14 22:28:51,116 DEBUG SenderThread:593294 [sender.py:send():179] send: exit +2021-07-14 22:28:51,116 INFO SenderThread:593294 [sender.py:send_exit():287] handling exit code: 1 +2021-07-14 22:28:51,117 INFO SenderThread:593294 [sender.py:send_exit():295] send defer +2021-07-14 22:28:51,117 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: poll_exit +2021-07-14 22:28:51,118 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: defer +2021-07-14 22:28:51,118 INFO HandlerThread:593294 [handler.py:handle_request_defer():141] handle defer: 0 +2021-07-14 22:28:51,118 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: defer +2021-07-14 22:28:51,118 INFO SenderThread:593294 [sender.py:send_request_defer():304] handle sender defer: 0 +2021-07-14 22:28:51,118 INFO SenderThread:593294 [sender.py:transition_state():308] send defer: 1 +2021-07-14 22:28:51,118 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: defer +2021-07-14 22:28:51,118 INFO HandlerThread:593294 [handler.py:handle_request_defer():141] handle defer: 1 +2021-07-14 22:28:51,182 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: defer +2021-07-14 22:28:51,182 INFO SenderThread:593294 [sender.py:send_request_defer():304] handle sender defer: 1 +2021-07-14 22:28:51,183 INFO SenderThread:593294 [sender.py:transition_state():308] send defer: 2 +2021-07-14 22:28:51,183 DEBUG SenderThread:593294 [sender.py:send():179] send: stats +2021-07-14 22:28:51,183 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: defer +2021-07-14 22:28:51,183 INFO HandlerThread:593294 [handler.py:handle_request_defer():141] handle defer: 2 +2021-07-14 22:28:51,184 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: defer +2021-07-14 22:28:51,184 INFO SenderThread:593294 [sender.py:send_request_defer():304] handle sender defer: 2 +2021-07-14 22:28:51,184 INFO SenderThread:593294 [sender.py:transition_state():308] send defer: 3 +2021-07-14 22:28:51,184 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: defer +2021-07-14 22:28:51,184 INFO HandlerThread:593294 [handler.py:handle_request_defer():141] handle defer: 3 +2021-07-14 22:28:51,184 DEBUG SenderThread:593294 [sender.py:send():179] send: summary +2021-07-14 22:28:51,185 INFO SenderThread:593294 [sender.py:_save_file():841] saving file wandb-summary.json with policy end +2021-07-14 22:28:51,185 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: defer +2021-07-14 22:28:51,185 INFO SenderThread:593294 [sender.py:send_request_defer():304] handle sender defer: 3 +2021-07-14 22:28:51,185 INFO SenderThread:593294 [sender.py:transition_state():308] send defer: 4 +2021-07-14 22:28:51,185 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: defer +2021-07-14 22:28:51,185 INFO HandlerThread:593294 [handler.py:handle_request_defer():141] handle defer: 4 +2021-07-14 22:28:51,185 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: defer +2021-07-14 22:28:51,185 INFO SenderThread:593294 [sender.py:send_request_defer():304] handle sender defer: 4 +2021-07-14 22:28:51,220 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-14 22:28:51,361 INFO SenderThread:593294 [sender.py:transition_state():308] send defer: 5 +2021-07-14 22:28:51,361 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: poll_exit +2021-07-14 22:28:51,361 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: defer +2021-07-14 22:28:51,362 INFO HandlerThread:593294 [handler.py:handle_request_defer():141] handle defer: 5 +2021-07-14 22:28:51,362 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: defer +2021-07-14 22:28:51,362 INFO SenderThread:593294 [sender.py:send_request_defer():304] handle sender defer: 5 +2021-07-14 22:28:51,362 INFO SenderThread:593294 [dir_watcher.py:finish():282] shutting down directory watcher +2021-07-14 22:28:51,463 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-14 22:28:51,742 INFO Thread-8 :593294 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_221920-s091gfok/files/wandb-summary.json +2021-07-14 22:28:51,743 INFO SenderThread:593294 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_221920-s091gfok/files/config.yaml +2021-07-14 22:28:51,743 INFO SenderThread:593294 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_221920-s091gfok/files/output.log +2021-07-14 22:28:51,743 INFO SenderThread:593294 [dir_watcher.py:finish():312] scan: /home/dat/pino-roberta-base/wandb/run-20210714_221920-s091gfok/files +2021-07-14 22:28:51,743 INFO SenderThread:593294 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210714_221920-s091gfok/files/requirements.txt requirements.txt +2021-07-14 22:28:51,744 INFO SenderThread:593294 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210714_221920-s091gfok/files/output.log output.log +2021-07-14 22:28:51,744 INFO SenderThread:593294 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210714_221920-s091gfok/files/wandb-metadata.json wandb-metadata.json +2021-07-14 22:28:51,744 INFO SenderThread:593294 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210714_221920-s091gfok/files/config.yaml config.yaml +2021-07-14 22:28:51,744 INFO SenderThread:593294 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210714_221920-s091gfok/files/wandb-summary.json wandb-summary.json +2021-07-14 22:28:51,750 INFO SenderThread:593294 [sender.py:transition_state():308] send defer: 6 +2021-07-14 22:28:51,750 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: poll_exit +2021-07-14 22:28:51,751 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: defer +2021-07-14 22:28:51,751 INFO HandlerThread:593294 [handler.py:handle_request_defer():141] handle defer: 6 +2021-07-14 22:28:51,754 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: defer +2021-07-14 22:28:51,754 INFO SenderThread:593294 [sender.py:send_request_defer():304] handle sender defer: 6 +2021-07-14 22:28:51,754 INFO SenderThread:593294 [file_pusher.py:finish():177] shutting down file pusher +2021-07-14 22:28:51,856 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-14 22:28:51,856 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: poll_exit +2021-07-14 22:28:51,958 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-14 22:28:51,958 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: poll_exit +2021-07-14 22:28:52,060 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-14 22:28:52,061 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: poll_exit +2021-07-14 22:28:52,162 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-14 22:28:52,163 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: poll_exit +2021-07-14 22:28:52,191 INFO Thread-13 :593294 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210714_221920-s091gfok/files/output.log +2021-07-14 22:28:52,198 INFO Thread-12 :593294 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210714_221920-s091gfok/files/requirements.txt +2021-07-14 22:28:52,200 INFO Thread-14 :593294 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210714_221920-s091gfok/files/config.yaml +2021-07-14 22:28:52,212 INFO Thread-15 :593294 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210714_221920-s091gfok/files/wandb-summary.json +2021-07-14 22:28:52,264 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-14 22:28:52,265 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: poll_exit +2021-07-14 22:28:52,366 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-14 22:28:52,366 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: poll_exit +2021-07-14 22:28:52,413 INFO Thread-7 :593294 [sender.py:transition_state():308] send defer: 7 +2021-07-14 22:28:52,413 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: defer +2021-07-14 22:28:52,413 INFO HandlerThread:593294 [handler.py:handle_request_defer():141] handle defer: 7 +2021-07-14 22:28:52,413 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: defer +2021-07-14 22:28:52,414 INFO SenderThread:593294 [sender.py:send_request_defer():304] handle sender defer: 7 +2021-07-14 22:28:52,468 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-14 22:28:52,536 INFO SenderThread:593294 [sender.py:transition_state():308] send defer: 8 +2021-07-14 22:28:52,536 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: poll_exit +2021-07-14 22:28:52,537 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: defer +2021-07-14 22:28:52,537 INFO HandlerThread:593294 [handler.py:handle_request_defer():141] handle defer: 8 +2021-07-14 22:28:52,537 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: defer +2021-07-14 22:28:52,537 INFO SenderThread:593294 [sender.py:send_request_defer():304] handle sender defer: 8 +2021-07-14 22:28:52,537 INFO SenderThread:593294 [sender.py:transition_state():308] send defer: 9 +2021-07-14 22:28:52,538 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: defer +2021-07-14 22:28:52,538 INFO HandlerThread:593294 [handler.py:handle_request_defer():141] handle defer: 9 +2021-07-14 22:28:52,538 DEBUG SenderThread:593294 [sender.py:send():179] send: final +2021-07-14 22:28:52,538 DEBUG SenderThread:593294 [sender.py:send():179] send: footer +2021-07-14 22:28:52,538 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: defer +2021-07-14 22:28:52,538 INFO SenderThread:593294 [sender.py:send_request_defer():304] handle sender defer: 9 +2021-07-14 22:28:52,638 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-14 22:28:52,638 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: poll_exit +2021-07-14 22:28:52,638 INFO SenderThread:593294 [file_pusher.py:join():182] waiting for file pusher +2021-07-14 22:28:52,640 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: get_summary +2021-07-14 22:28:52,640 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: sampled_history +2021-07-14 22:28:52,641 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: shutdown +2021-07-14 22:28:52,641 INFO HandlerThread:593294 [handler.py:finish():638] shutting down handler +2021-07-14 22:28:53,538 INFO WriterThread:593294 [datastore.py:close():288] close: /home/dat/pino-roberta-base/wandb/run-20210714_221920-s091gfok/run-s091gfok.wandb +2021-07-14 22:28:53,639 INFO SenderThread:593294 [sender.py:finish():945] shutting down sender +2021-07-14 22:28:53,639 INFO SenderThread:593294 [file_pusher.py:finish():177] shutting down file pusher +2021-07-14 22:28:53,639 INFO SenderThread:593294 [file_pusher.py:join():182] waiting for file pusher +2021-07-14 22:28:53,641 INFO MainThread:593294 [internal.py:handle_exit():78] Internal process exited diff --git a/wandb/run-20210714_221920-s091gfok/logs/debug.log b/wandb/run-20210714_221920-s091gfok/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..ee756842fd03b6c4a42f7393e4072bd8bfe5cbb9 --- /dev/null +++ b/wandb/run-20210714_221920-s091gfok/logs/debug.log @@ -0,0 +1,119 @@ +2021-07-14 22:19:20,672 INFO MainThread:592040 [wandb_setup.py:_flush():69] setting env: {} +2021-07-14 22:19:20,672 INFO MainThread:592040 [wandb_setup.py:_flush():69] setting login settings: {} +2021-07-14 22:19:20,672 INFO MainThread:592040 [wandb_init.py:_log_setup():337] Logging user logs to /home/dat/pino-roberta-base/wandb/run-20210714_221920-s091gfok/logs/debug.log +2021-07-14 22:19:20,672 INFO MainThread:592040 [wandb_init.py:_log_setup():338] Logging internal logs to /home/dat/pino-roberta-base/wandb/run-20210714_221920-s091gfok/logs/debug-internal.log +2021-07-14 22:19:20,672 INFO MainThread:592040 [wandb_init.py:init():370] calling init triggers +2021-07-14 22:19:20,672 INFO MainThread:592040 [wandb_init.py:init():375] wandb.init called with sweep_config: {} +config: {} +2021-07-14 22:19:20,673 INFO MainThread:592040 [wandb_init.py:init():419] starting backend +2021-07-14 22:19:20,673 INFO MainThread:592040 [backend.py:_multiprocessing_setup():70] multiprocessing start_methods=fork,spawn,forkserver, using: spawn +2021-07-14 22:19:20,717 INFO MainThread:592040 [backend.py:ensure_launched():135] starting backend process... +2021-07-14 22:19:20,761 INFO MainThread:592040 [backend.py:ensure_launched():139] started backend process with pid: 593294 +2021-07-14 22:19:20,763 INFO MainThread:592040 [wandb_init.py:init():424] backend started and connected +2021-07-14 22:19:20,766 INFO MainThread:592040 [wandb_init.py:init():472] updated telemetry +2021-07-14 22:19:20,766 INFO MainThread:592040 [wandb_init.py:init():491] communicating current version +2021-07-14 22:19:21,355 INFO MainThread:592040 [wandb_init.py:init():496] got version response +2021-07-14 22:19:21,356 INFO MainThread:592040 [wandb_init.py:init():504] communicating run to backend with 30 second timeout +2021-07-14 22:19:21,536 INFO MainThread:592040 [wandb_init.py:init():529] starting run threads in backend +2021-07-14 22:19:22,670 INFO MainThread:592040 [wandb_run.py:_console_start():1623] atexit reg +2021-07-14 22:19:22,671 INFO MainThread:592040 [wandb_run.py:_redirect():1497] redirect: SettingsConsole.REDIRECT +2021-07-14 22:19:22,672 INFO MainThread:592040 [wandb_run.py:_redirect():1502] Redirecting console. +2021-07-14 22:19:22,674 INFO MainThread:592040 [wandb_run.py:_redirect():1558] Redirects installed. +2021-07-14 22:19:22,674 INFO MainThread:592040 [wandb_init.py:init():554] run started, returning control to user process +2021-07-14 22:19:22,681 INFO MainThread:592040 [wandb_run.py:_config_callback():872] config_cb None None {'output_dir': './', 'overwrite_output_dir': True, 'do_train': False, 'do_eval': False, 'do_predict': False, 'evaluation_strategy': 'IntervalStrategy.NO', 'prediction_loss_only': False, 'per_device_train_batch_size': 2, 'per_device_eval_batch_size': 2, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 2, 'eval_accumulation_steps': None, 'learning_rate': 5e-05, 'weight_decay': 0.0095, 'adam_beta1': 0.9, 'adam_beta2': 0.98, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 5.0, 'max_steps': -1, 'lr_scheduler_type': 'SchedulerType.LINEAR', 'warmup_ratio': 0.0, 'warmup_steps': 5000, 'log_level': -1, 'log_level_replica': -1, 'log_on_each_node': True, 'logging_dir': './runs/Jul14_22-19-13_t1v-n-f5c06ea1-w-0', 'logging_strategy': 'IntervalStrategy.STEPS', 'logging_first_step': False, 'logging_steps': 500, 'save_strategy': 'IntervalStrategy.STEPS', 'save_steps': 20000, 'save_total_limit': 5, 'save_on_each_node': False, 'no_cuda': False, 'seed': 42, 'fp16': False, 'fp16_opt_level': 'O1', 'fp16_backend': 'auto', 'fp16_full_eval': False, 'local_rank': -1, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 20000, 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': './', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'sharded_ddp': [], 'deepspeed': None, 'label_smoothing_factor': 0.0, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'dataloader_pin_memory': True, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'push_to_hub_model_id': '', 'push_to_hub_organization': None, 'push_to_hub_token': None, 'mp_parameters': ''} +2021-07-14 22:19:22,683 INFO MainThread:592040 [wandb_run.py:_config_callback():872] config_cb None None {'model_name_or_path': None, 'model_type': 'big_bird', 'config_name': './', 'tokenizer_name': './', 'cache_dir': None, 'use_fast_tokenizer': True, 'dtype': 'float32', 'save_optimizer': True} +2021-07-14 22:19:22,685 INFO MainThread:592040 [wandb_run.py:_config_callback():872] config_cb None None {'dataset_name': None, 'dataset_config_name': None, 'train_ref_file': None, 'validation_ref_file': None, 'overwrite_cache': False, 'validation_split_percentage': 5, 'max_seq_length': 4096, 'preprocessing_num_workers': 96, 'mlm_probability': 0.15, 'pad_to_max_length': False, 'line_by_line': False, 'max_eval_samples': 2000} +2021-07-14 22:28:48,857 INFO MainThread:592040 [wandb_run.py:_atexit_cleanup():1593] got exitcode: 1 +2021-07-14 22:28:48,859 INFO MainThread:592040 [wandb_run.py:_restore():1565] restore +2021-07-14 22:28:51,118 INFO MainThread:592040 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 1 +} +pusher_stats { + uploaded_bytes: 1447 + total_bytes: 1447 +} + +2021-07-14 22:28:51,362 INFO MainThread:592040 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 1 +} +pusher_stats { + uploaded_bytes: 1447 + total_bytes: 1447 +} + +2021-07-14 22:28:51,754 INFO MainThread:592040 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 4 +} +pusher_stats { + uploaded_bytes: 1447 + total_bytes: 11398 +} + +2021-07-14 22:28:51,857 INFO MainThread:592040 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 1447 + total_bytes: 11400 +} + +2021-07-14 22:28:51,959 INFO MainThread:592040 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 11400 + total_bytes: 11400 +} + +2021-07-14 22:28:52,061 INFO MainThread:592040 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 11400 + total_bytes: 11400 +} + +2021-07-14 22:28:52,163 INFO MainThread:592040 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 11400 + total_bytes: 11400 +} + +2021-07-14 22:28:52,265 INFO MainThread:592040 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 11400 + total_bytes: 11400 +} + +2021-07-14 22:28:52,367 INFO MainThread:592040 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 11400 + total_bytes: 11400 +} + +2021-07-14 22:28:52,537 INFO MainThread:592040 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 11400 + total_bytes: 11400 +} + +2021-07-14 22:28:52,639 INFO MainThread:592040 [wandb_run.py:_wait_for_finish():1715] got exit ret: done: true +exit_result { +} +file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 11400 + total_bytes: 11400 +} + +2021-07-14 22:28:53,943 INFO MainThread:592040 [wandb_run.py:_show_files():1937] logging synced files diff --git a/wandb/run-20210714_221920-s091gfok/run-s091gfok.wandb b/wandb/run-20210714_221920-s091gfok/run-s091gfok.wandb new file mode 100644 index 0000000000000000000000000000000000000000..8330c661c9e701216b5072d86e85d936d6a30a69 Binary files /dev/null and b/wandb/run-20210714_221920-s091gfok/run-s091gfok.wandb differ diff --git a/wandb/run-20210714_222920-2p7mu4rm/files/config.yaml b/wandb/run-20210714_222920-2p7mu4rm/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c6a2e57d92d12d73fbeafa0807d73078fcfd480e --- /dev/null +++ b/wandb/run-20210714_222920-2p7mu4rm/files/config.yaml @@ -0,0 +1,307 @@ +wandb_version: 1 + +_wandb: + desc: null + value: + cli_version: 0.10.33 + framework: huggingface + huggingface_version: 4.9.0.dev0 + is_jupyter_run: false + is_kaggle_kernel: false + python_version: 3.8.10 + t: + 1: + - 3 + - 11 + 2: + - 3 + - 11 + 4: 3.8.10 + 5: 0.10.33 + 6: 4.9.0.dev0 + 8: + - 5 +adafactor: + desc: null + value: false +adam_beta1: + desc: null + value: 0.9 +adam_beta2: + desc: null + value: 0.98 +adam_epsilon: + desc: null + value: 1.0e-08 +cache_dir: + desc: null + value: null +config_name: + desc: null + value: ./ +dataloader_drop_last: + desc: null + value: false +dataloader_num_workers: + desc: null + value: 0 +dataloader_pin_memory: + desc: null + value: true +dataset_config_name: + desc: null + value: null +dataset_name: + desc: null + value: null +ddp_find_unused_parameters: + desc: null + value: null +debug: + desc: null + value: [] +deepspeed: + desc: null + value: null +disable_tqdm: + desc: null + value: false +do_eval: + desc: null + value: false +do_predict: + desc: null + value: false +do_train: + desc: null + value: false +dtype: + desc: null + value: float32 +eval_accumulation_steps: + desc: null + value: null +eval_steps: + desc: null + value: 20000 +evaluation_strategy: + desc: null + value: IntervalStrategy.NO +fp16: + desc: null + value: false +fp16_backend: + desc: null + value: auto +fp16_full_eval: + desc: null + value: false +fp16_opt_level: + desc: null + value: O1 +gradient_accumulation_steps: + desc: null + value: 4 +greater_is_better: + desc: null + value: null +group_by_length: + desc: null + value: false +ignore_data_skip: + desc: null + value: false +label_names: + desc: null + value: null +label_smoothing_factor: + desc: null + value: 0.0 +learning_rate: + desc: null + value: 5.0e-05 +length_column_name: + desc: null + value: length +line_by_line: + desc: null + value: false +load_best_model_at_end: + desc: null + value: false +local_rank: + desc: null + value: -1 +log_level: + desc: null + value: -1 +log_level_replica: + desc: null + value: -1 +log_on_each_node: + desc: null + value: true +logging_dir: + desc: null + value: ./runs/Jul14_22-29-13_t1v-n-f5c06ea1-w-0 +logging_first_step: + desc: null + value: false +logging_steps: + desc: null + value: 500 +logging_strategy: + desc: null + value: IntervalStrategy.STEPS +lr_scheduler_type: + desc: null + value: SchedulerType.LINEAR +max_eval_samples: + desc: null + value: 2000 +max_grad_norm: + desc: null + value: 1.0 +max_seq_length: + desc: null + value: 4096 +max_steps: + desc: null + value: -1 +metric_for_best_model: + desc: null + value: null +mlm_probability: + desc: null + value: 0.15 +model_name_or_path: + desc: null + value: null +model_type: + desc: null + value: big_bird +mp_parameters: + desc: null + value: '' +no_cuda: + desc: null + value: false +num_train_epochs: + desc: null + value: 5.0 +output_dir: + desc: null + value: ./ +overwrite_cache: + desc: null + value: false +overwrite_output_dir: + desc: null + value: true +pad_to_max_length: + desc: null + value: false +past_index: + desc: null + value: -1 +per_device_eval_batch_size: + desc: null + value: 2 +per_device_train_batch_size: + desc: null + value: 2 +per_gpu_eval_batch_size: + desc: null + value: null +per_gpu_train_batch_size: + desc: null + value: null +prediction_loss_only: + desc: null + value: false +preprocessing_num_workers: + desc: null + value: 96 +push_to_hub: + desc: null + value: true +push_to_hub_model_id: + desc: null + value: '' +push_to_hub_organization: + desc: null + value: null +push_to_hub_token: + desc: null + value: null +remove_unused_columns: + desc: null + value: true +report_to: + desc: null + value: + - tensorboard + - wandb +resume_from_checkpoint: + desc: null + value: null +run_name: + desc: null + value: ./ +save_on_each_node: + desc: null + value: false +save_optimizer: + desc: null + value: true +save_steps: + desc: null + value: 20000 +save_strategy: + desc: null + value: IntervalStrategy.STEPS +save_total_limit: + desc: null + value: 5 +seed: + desc: null + value: 42 +sharded_ddp: + desc: null + value: [] +skip_memory_metrics: + desc: null + value: true +tokenizer_name: + desc: null + value: ./ +tpu_metrics_debug: + desc: null + value: false +tpu_num_cores: + desc: null + value: null +train_ref_file: + desc: null + value: null +use_fast_tokenizer: + desc: null + value: true +use_legacy_prediction_loop: + desc: null + value: false +validation_ref_file: + desc: null + value: null +validation_split_percentage: + desc: null + value: 5 +warmup_ratio: + desc: null + value: 0.0 +warmup_steps: + desc: null + value: 5000 +weight_decay: + desc: null + value: 0.0095 diff --git a/wandb/run-20210714_222920-2p7mu4rm/files/output.log b/wandb/run-20210714_222920-2p7mu4rm/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..0cecd285673f912e2342e440a3c12a6536831629 --- /dev/null +++ b/wandb/run-20210714_222920-2p7mu4rm/files/output.log @@ -0,0 +1,39 @@ +/home/dat/pino/lib/python3.8/site-packages/jax/_src/numpy/lax_numpy.py:3132: UserWarning: Explicitly requested dtype requested in zeros is not available, and will be truncated to dtype int32. To enable more dtypes, set the jax_enable_x64 configuration option or the JAX_ENABLE_X64 shell environment variable. See https://github.com/google/jax#current-gotchas for more. + lax._check_user_dtype_supported(dtype, "zeros") +/home/dat/pino/lib/python3.8/site-packages/jax/lib/xla_bridge.py:386: UserWarning: jax.host_count has been renamed to jax.process_count. This alias will eventually be removed; please update your code. + warnings.warn( +/home/dat/pino/lib/python3.8/site-packages/jax/lib/xla_bridge.py:373: UserWarning: jax.host_id has been renamed to jax.process_index. This alias will eventually be removed; please update your code. + warnings.warn( +Epoch ... (1/5): 0%| | 0/5 [00:00 + state, train_metric, dropout_rngs = p_train_step(state, model_inputs, dropout_rngs) + File "/home/dat/pino/lib/python3.8/site-packages/jax/_src/traceback_util.py", line 183, in reraise_with_filtered_traceback + return fun(*args, **kwargs) + File "/home/dat/pino/lib/python3.8/site-packages/jax/_src/api.py", line 1669, in f_pmapped + out = pxla.xla_pmap( + File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 1620, in bind + return call_bind(self, fun, *args, **params) + File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 1551, in call_bind + outs = primitive.process(top_trace, fun, tracers, params) + File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 1623, in process + return trace.process_map(self, fun, tracers, params) + File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 606, in process_call + return primitive.impl(f, *tracers, **params) + File "/home/dat/pino/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 637, in xla_pmap_impl + return compiled_fun(*args) + File "/home/dat/pino/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 1159, in execute_replicated + out_bufs = compiled.execute_sharded_on_local_devices(input_bufs) +jax._src.traceback_util.UnfilteredStackTrace: RuntimeError: Resource exhausted: Attempting to reserve 12.60G at the bottom of memory. That was not possible. There are 5.86G free, 0B reserved, and 5.65G reservable.: while running replica 0 and partition 0 of a replicated computation (other replicas may have failed as well). +The stack trace below excludes JAX-internal frames. +The preceding is the original exception that occurred, unmodified. +-------------------- +The above exception was the direct cause of the following exception: +Traceback (most recent call last): + File "./run_mlm_flax.py", line 804, in + state, train_metric, dropout_rngs = p_train_step(state, model_inputs, dropout_rngs) + File "/home/dat/pino/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 1159, in execute_replicated + out_bufs = compiled.execute_sharded_on_local_devices(input_bufs) +RuntimeError: Resource exhausted: Attempting to reserve 12.60G at the bottom of memory. That was not possible. There are 5.86G free, 0B reserved, and 5.65G reservable.: while running replica 0 and partition 0 of a replicated computation (other replicas may have failed as well). \ No newline at end of file diff --git a/wandb/run-20210714_222920-2p7mu4rm/files/requirements.txt b/wandb/run-20210714_222920-2p7mu4rm/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..554e3a34bc91714a8462f65295a12e9a04537637 --- /dev/null +++ b/wandb/run-20210714_222920-2p7mu4rm/files/requirements.txt @@ -0,0 +1,94 @@ +absl-py==0.13.0 +aiohttp==3.7.4.post0 +astunparse==1.6.3 +async-timeout==3.0.1 +attrs==21.2.0 +cachetools==4.2.2 +certifi==2021.5.30 +chardet==4.0.0 +charset-normalizer==2.0.1 +chex==0.0.8 +click==8.0.1 +configparser==5.0.2 +cycler==0.10.0 +datasets==1.9.1.dev0 +dill==0.3.4 +dm-tree==0.1.6 +docker-pycreds==0.4.0 +filelock==3.0.12 +flatbuffers==1.12 +flax==0.3.4 +fsspec==2021.7.0 +gast==0.4.0 +gitdb==4.0.7 +gitpython==3.1.18 +google-auth-oauthlib==0.4.4 +google-auth==1.32.1 +google-pasta==0.2.0 +grpcio==1.34.1 +h5py==3.1.0 +huggingface-hub==0.0.12 +idna==3.2 +install==1.3.4 +jax==0.2.17 +jaxlib==0.1.68 +joblib==1.0.1 +keras-nightly==2.5.0.dev2021032900 +keras-preprocessing==1.1.2 +kiwisolver==1.3.1 +libtpu-nightly==0.1.dev20210615 +markdown==3.3.4 +matplotlib==3.4.2 +msgpack==1.0.2 +multidict==5.1.0 +multiprocess==0.70.12.2 +numpy==1.19.5 +oauthlib==3.1.1 +opt-einsum==3.3.0 +optax==0.0.9 +packaging==21.0 +pandas==1.3.0 +pathtools==0.1.2 +pillow==8.3.1 +pip==20.0.2 +pkg-resources==0.0.0 +promise==2.3 +protobuf==3.17.3 +psutil==5.8.0 +pyarrow==4.0.1 +pyasn1-modules==0.2.8 +pyasn1==0.4.8 +pyparsing==2.4.7 +python-dateutil==2.8.1 +pytz==2021.1 +pyyaml==5.4.1 +regex==2021.7.6 +requests-oauthlib==1.3.0 +requests==2.26.0 +rsa==4.7.2 +sacremoses==0.0.45 +scipy==1.7.0 +sentry-sdk==1.3.0 +setuptools==44.0.0 +shortuuid==1.0.1 +six==1.15.0 +smmap==4.0.0 +subprocess32==3.5.4 +tensorboard-data-server==0.6.1 +tensorboard-plugin-wit==1.8.0 +tensorboard==2.5.0 +tensorflow-estimator==2.5.0 +tensorflow==2.5.0 +termcolor==1.1.0 +tokenizers==0.10.3 +toolz==0.11.1 +tqdm==4.61.2 +transformers==4.9.0.dev0 +typing-extensions==3.7.4.3 +urllib3==1.26.6 +wandb==0.10.33 +werkzeug==2.0.1 +wheel==0.36.2 +wrapt==1.12.1 +xxhash==2.0.2 +yarl==1.6.3 \ No newline at end of file diff --git a/wandb/run-20210714_222920-2p7mu4rm/files/wandb-metadata.json b/wandb/run-20210714_222920-2p7mu4rm/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..8c5891b94312f8b4f64ffe6a736cd8159c0415e8 --- /dev/null +++ b/wandb/run-20210714_222920-2p7mu4rm/files/wandb-metadata.json @@ -0,0 +1,47 @@ +{ + "os": "Linux-5.4.0-1043-gcp-x86_64-with-glibc2.29", + "python": "3.8.10", + "heartbeatAt": "2021-07-14T22:29:22.512026", + "startedAt": "2021-07-14T22:29:20.509023", + "docker": null, + "cpu_count": 96, + "cuda": null, + "args": [ + "--push_to_hub", + "--output_dir=./", + "--model_type=big_bird", + "--config_name=./", + "--tokenizer_name=./", + "--max_seq_length=4096", + "--weight_decay=0.0095", + "--warmup_steps=5000", + "--overwrite_output_dir", + "--adam_beta1=0.9", + "--adam_beta2=0.98", + "--logging_steps=500", + "--eval_steps=20000", + "--num_train_epochs=5", + "--preprocessing_num_workers=96", + "--save_steps=20000", + "--learning_rate=5e-5", + "--per_device_train_batch_size=2", + "--per_device_eval_batch_size=2", + "--save_total_limit=5", + "--max_eval_samples=2000", + "--overwrite_cache", + "False", + "--gradient_accumulation_steps=4" + ], + "state": "running", + "program": "./run_mlm_flax.py", + "codePath": "run_mlm_flax.py", + "git": { + "remote": "https://huggingface.co/flax-community/pino-roberta-base", + "commit": "de71755314d4aa3ab182072c6f0be3de9798a66b" + }, + "email": null, + "root": "/home/dat/pino-roberta-base", + "host": "t1v-n-f5c06ea1-w-0", + "username": "dat", + "executable": "/home/dat/pino/bin/python" +} diff --git a/wandb/run-20210714_222920-2p7mu4rm/files/wandb-summary.json b/wandb/run-20210714_222920-2p7mu4rm/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/wandb/run-20210714_222920-2p7mu4rm/files/wandb-summary.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/wandb/run-20210714_222920-2p7mu4rm/logs/debug-internal.log b/wandb/run-20210714_222920-2p7mu4rm/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..ddcf7ddd56fdcef8e7f0fcdb59611dcc77e9e4ce --- /dev/null +++ b/wandb/run-20210714_222920-2p7mu4rm/logs/debug-internal.log @@ -0,0 +1,232 @@ +2021-07-14 22:29:21,187 INFO MainThread:596546 [internal.py:wandb_internal():88] W&B internal server running at pid: 596546, started at: 2021-07-14 22:29:21.187444 +2021-07-14 22:29:21,189 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: check_version +2021-07-14 22:29:21,189 INFO WriterThread:596546 [datastore.py:open_for_write():80] open: /home/dat/pino-roberta-base/wandb/run-20210714_222920-2p7mu4rm/run-2p7mu4rm.wandb +2021-07-14 22:29:21,191 DEBUG SenderThread:596546 [sender.py:send():179] send: header +2021-07-14 22:29:21,191 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: check_version +2021-07-14 22:29:21,226 DEBUG SenderThread:596546 [sender.py:send():179] send: run +2021-07-14 22:29:21,391 INFO SenderThread:596546 [dir_watcher.py:__init__():168] watching files in: /home/dat/pino-roberta-base/wandb/run-20210714_222920-2p7mu4rm/files +2021-07-14 22:29:21,391 INFO SenderThread:596546 [sender.py:_start_run_threads():716] run started: 2p7mu4rm with start time 1626301760 +2021-07-14 22:29:21,391 DEBUG SenderThread:596546 [sender.py:send():179] send: summary +2021-07-14 22:29:21,391 INFO SenderThread:596546 [sender.py:_save_file():841] saving file wandb-summary.json with policy end +2021-07-14 22:29:21,392 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: run_start +2021-07-14 22:29:22,397 INFO Thread-8 :596546 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210714_222920-2p7mu4rm/files/wandb-summary.json +2021-07-14 22:29:22,511 DEBUG HandlerThread:596546 [meta.py:__init__():39] meta init +2021-07-14 22:29:22,511 DEBUG HandlerThread:596546 [meta.py:__init__():53] meta init done +2021-07-14 22:29:22,511 DEBUG HandlerThread:596546 [meta.py:probe():210] probe +2021-07-14 22:29:22,513 DEBUG HandlerThread:596546 [meta.py:_setup_git():200] setup git +2021-07-14 22:29:22,541 DEBUG HandlerThread:596546 [meta.py:_setup_git():207] setup git done +2021-07-14 22:29:22,541 DEBUG HandlerThread:596546 [meta.py:_save_pip():57] save pip +2021-07-14 22:29:22,542 DEBUG HandlerThread:596546 [meta.py:_save_pip():71] save pip done +2021-07-14 22:29:22,542 DEBUG HandlerThread:596546 [meta.py:probe():252] probe done +2021-07-14 22:29:22,545 DEBUG SenderThread:596546 [sender.py:send():179] send: files +2021-07-14 22:29:22,545 INFO SenderThread:596546 [sender.py:_save_file():841] saving file wandb-metadata.json with policy now +2021-07-14 22:29:22,551 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 22:29:22,551 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: stop_status +2021-07-14 22:29:22,678 DEBUG SenderThread:596546 [sender.py:send():179] send: config +2021-07-14 22:29:22,678 DEBUG SenderThread:596546 [sender.py:send():179] send: config +2021-07-14 22:29:22,679 DEBUG SenderThread:596546 [sender.py:send():179] send: config +2021-07-14 22:29:22,981 INFO Thread-11 :596546 [upload_job.py:push():137] Uploaded file /tmp/tmpkw6g32phwandb/2nns5d67-wandb-metadata.json +2021-07-14 22:29:23,396 INFO Thread-8 :596546 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210714_222920-2p7mu4rm/files/requirements.txt +2021-07-14 22:29:23,396 INFO Thread-8 :596546 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210714_222920-2p7mu4rm/files/wandb-metadata.json +2021-07-14 22:29:23,397 INFO Thread-8 :596546 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210714_222920-2p7mu4rm/files/output.log +2021-07-14 22:29:37,401 INFO Thread-8 :596546 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_222920-2p7mu4rm/files/output.log +2021-07-14 22:29:37,681 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 22:29:37,681 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: stop_status +2021-07-14 22:29:50,595 DEBUG SenderThread:596546 [sender.py:send():179] send: stats +2021-07-14 22:29:52,407 INFO Thread-8 :596546 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_222920-2p7mu4rm/files/config.yaml +2021-07-14 22:29:52,815 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 22:29:52,815 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: stop_status +2021-07-14 22:30:07,946 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 22:30:07,947 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: stop_status +2021-07-14 22:30:20,679 DEBUG SenderThread:596546 [sender.py:send():179] send: stats +2021-07-14 22:30:23,081 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 22:30:23,081 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: stop_status +2021-07-14 22:30:38,211 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 22:30:38,212 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: stop_status +2021-07-14 22:30:50,744 DEBUG SenderThread:596546 [sender.py:send():179] send: stats +2021-07-14 22:30:53,343 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 22:30:53,344 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: stop_status +2021-07-14 22:31:08,475 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 22:31:08,476 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: stop_status +2021-07-14 22:31:20,817 DEBUG SenderThread:596546 [sender.py:send():179] send: stats +2021-07-14 22:31:23,611 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 22:31:23,611 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: stop_status +2021-07-14 22:31:38,742 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 22:31:38,742 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: stop_status +2021-07-14 22:31:50,892 DEBUG SenderThread:596546 [sender.py:send():179] send: stats +2021-07-14 22:31:53,876 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 22:31:53,876 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: stop_status +2021-07-14 22:32:09,009 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 22:32:09,010 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: stop_status +2021-07-14 22:32:20,968 DEBUG SenderThread:596546 [sender.py:send():179] send: stats +2021-07-14 22:32:24,154 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 22:32:24,154 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: stop_status +2021-07-14 22:32:39,289 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 22:32:39,289 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: stop_status +2021-07-14 22:32:51,042 DEBUG SenderThread:596546 [sender.py:send():179] send: stats +2021-07-14 22:32:54,420 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 22:32:54,420 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: stop_status +2021-07-14 22:33:09,552 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 22:33:09,552 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: stop_status +2021-07-14 22:33:21,119 DEBUG SenderThread:596546 [sender.py:send():179] send: stats +2021-07-14 22:33:24,688 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 22:33:24,689 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: stop_status +2021-07-14 22:33:39,824 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 22:33:39,825 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: stop_status +2021-07-14 22:33:51,197 DEBUG SenderThread:596546 [sender.py:send():179] send: stats +2021-07-14 22:33:54,955 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 22:33:54,955 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: stop_status +2021-07-14 22:34:10,085 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 22:34:10,086 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: stop_status +2021-07-14 22:34:21,275 DEBUG SenderThread:596546 [sender.py:send():179] send: stats +2021-07-14 22:34:25,221 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 22:34:25,221 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: stop_status +2021-07-14 22:34:40,360 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 22:34:40,360 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: stop_status +2021-07-14 22:34:51,349 DEBUG SenderThread:596546 [sender.py:send():179] send: stats +2021-07-14 22:34:55,491 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 22:34:55,491 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: stop_status +2021-07-14 22:35:10,620 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 22:35:10,621 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: stop_status +2021-07-14 22:35:21,421 DEBUG SenderThread:596546 [sender.py:send():179] send: stats +2021-07-14 22:35:25,755 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 22:35:25,755 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: stop_status +2021-07-14 22:35:40,915 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 22:35:40,916 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: stop_status +2021-07-14 22:35:51,496 DEBUG SenderThread:596546 [sender.py:send():179] send: stats +2021-07-14 22:35:56,049 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 22:35:56,049 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: stop_status +2021-07-14 22:36:11,183 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 22:36:11,184 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: stop_status +2021-07-14 22:36:21,575 DEBUG SenderThread:596546 [sender.py:send():179] send: stats +2021-07-14 22:36:26,315 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 22:36:26,316 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: stop_status +2021-07-14 22:36:41,448 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 22:36:41,448 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: stop_status +2021-07-14 22:36:51,652 DEBUG SenderThread:596546 [sender.py:send():179] send: stats +2021-07-14 22:36:56,580 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 22:36:56,581 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: stop_status +2021-07-14 22:37:11,712 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 22:37:11,712 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: stop_status +2021-07-14 22:37:21,566 INFO Thread-8 :596546 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_222920-2p7mu4rm/files/output.log +2021-07-14 22:37:21,727 DEBUG SenderThread:596546 [sender.py:send():179] send: stats +2021-07-14 22:37:27,049 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 22:37:27,050 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: stop_status +2021-07-14 22:37:42,194 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 22:37:42,194 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: stop_status +2021-07-14 22:37:51,805 DEBUG SenderThread:596546 [sender.py:send():179] send: stats +2021-07-14 22:37:57,327 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 22:37:57,327 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: stop_status +2021-07-14 22:38:12,463 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 22:38:12,464 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: stop_status +2021-07-14 22:38:21,882 DEBUG SenderThread:596546 [sender.py:send():179] send: stats +2021-07-14 22:38:27,596 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 22:38:27,596 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: stop_status +2021-07-14 22:38:42,728 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 22:38:42,728 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: stop_status +2021-07-14 22:38:45,598 INFO Thread-8 :596546 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_222920-2p7mu4rm/files/output.log +2021-07-14 22:38:45,823 DEBUG SenderThread:596546 [sender.py:send():179] send: telemetry +2021-07-14 22:38:45,823 DEBUG SenderThread:596546 [sender.py:send():179] send: exit +2021-07-14 22:38:45,823 INFO SenderThread:596546 [sender.py:send_exit():287] handling exit code: 1 +2021-07-14 22:38:45,824 INFO SenderThread:596546 [sender.py:send_exit():295] send defer +2021-07-14 22:38:45,824 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-14 22:38:45,825 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: poll_exit +2021-07-14 22:38:45,825 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: defer +2021-07-14 22:38:45,825 INFO HandlerThread:596546 [handler.py:handle_request_defer():141] handle defer: 0 +2021-07-14 22:38:45,825 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: defer +2021-07-14 22:38:45,825 INFO SenderThread:596546 [sender.py:send_request_defer():304] handle sender defer: 0 +2021-07-14 22:38:45,825 INFO SenderThread:596546 [sender.py:transition_state():308] send defer: 1 +2021-07-14 22:38:45,826 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: defer +2021-07-14 22:38:45,826 INFO HandlerThread:596546 [handler.py:handle_request_defer():141] handle defer: 1 +2021-07-14 22:38:45,857 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: defer +2021-07-14 22:38:45,857 INFO SenderThread:596546 [sender.py:send_request_defer():304] handle sender defer: 1 +2021-07-14 22:38:45,857 INFO SenderThread:596546 [sender.py:transition_state():308] send defer: 2 +2021-07-14 22:38:45,857 DEBUG SenderThread:596546 [sender.py:send():179] send: stats +2021-07-14 22:38:45,857 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: defer +2021-07-14 22:38:45,858 INFO HandlerThread:596546 [handler.py:handle_request_defer():141] handle defer: 2 +2021-07-14 22:38:45,858 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: defer +2021-07-14 22:38:45,858 INFO SenderThread:596546 [sender.py:send_request_defer():304] handle sender defer: 2 +2021-07-14 22:38:45,858 INFO SenderThread:596546 [sender.py:transition_state():308] send defer: 3 +2021-07-14 22:38:45,859 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: defer +2021-07-14 22:38:45,859 INFO HandlerThread:596546 [handler.py:handle_request_defer():141] handle defer: 3 +2021-07-14 22:38:45,859 DEBUG SenderThread:596546 [sender.py:send():179] send: summary +2021-07-14 22:38:45,859 INFO SenderThread:596546 [sender.py:_save_file():841] saving file wandb-summary.json with policy end +2021-07-14 22:38:45,859 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: defer +2021-07-14 22:38:45,859 INFO SenderThread:596546 [sender.py:send_request_defer():304] handle sender defer: 3 +2021-07-14 22:38:45,859 INFO SenderThread:596546 [sender.py:transition_state():308] send defer: 4 +2021-07-14 22:38:45,860 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: defer +2021-07-14 22:38:45,860 INFO HandlerThread:596546 [handler.py:handle_request_defer():141] handle defer: 4 +2021-07-14 22:38:45,860 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: defer +2021-07-14 22:38:45,860 INFO SenderThread:596546 [sender.py:send_request_defer():304] handle sender defer: 4 +2021-07-14 22:38:45,927 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-14 22:38:46,024 INFO SenderThread:596546 [sender.py:transition_state():308] send defer: 5 +2021-07-14 22:38:46,024 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: poll_exit +2021-07-14 22:38:46,024 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: defer +2021-07-14 22:38:46,024 INFO HandlerThread:596546 [handler.py:handle_request_defer():141] handle defer: 5 +2021-07-14 22:38:46,025 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: defer +2021-07-14 22:38:46,025 INFO SenderThread:596546 [sender.py:send_request_defer():304] handle sender defer: 5 +2021-07-14 22:38:46,025 INFO SenderThread:596546 [dir_watcher.py:finish():282] shutting down directory watcher +2021-07-14 22:38:46,126 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-14 22:38:46,598 INFO Thread-8 :596546 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_222920-2p7mu4rm/files/wandb-summary.json +2021-07-14 22:38:46,599 INFO SenderThread:596546 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_222920-2p7mu4rm/files/config.yaml +2021-07-14 22:38:46,599 INFO SenderThread:596546 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_222920-2p7mu4rm/files/output.log +2021-07-14 22:38:46,599 INFO SenderThread:596546 [dir_watcher.py:finish():312] scan: /home/dat/pino-roberta-base/wandb/run-20210714_222920-2p7mu4rm/files +2021-07-14 22:38:46,599 INFO SenderThread:596546 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210714_222920-2p7mu4rm/files/requirements.txt requirements.txt +2021-07-14 22:38:46,600 INFO SenderThread:596546 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210714_222920-2p7mu4rm/files/output.log output.log +2021-07-14 22:38:46,600 INFO SenderThread:596546 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210714_222920-2p7mu4rm/files/wandb-metadata.json wandb-metadata.json +2021-07-14 22:38:46,600 INFO SenderThread:596546 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210714_222920-2p7mu4rm/files/config.yaml config.yaml +2021-07-14 22:38:46,600 INFO SenderThread:596546 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210714_222920-2p7mu4rm/files/wandb-summary.json wandb-summary.json +2021-07-14 22:38:46,603 INFO SenderThread:596546 [sender.py:transition_state():308] send defer: 6 +2021-07-14 22:38:46,604 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: poll_exit +2021-07-14 22:38:46,607 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: defer +2021-07-14 22:38:46,607 INFO HandlerThread:596546 [handler.py:handle_request_defer():141] handle defer: 6 +2021-07-14 22:38:46,608 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: defer +2021-07-14 22:38:46,610 INFO SenderThread:596546 [sender.py:send_request_defer():304] handle sender defer: 6 +2021-07-14 22:38:46,610 INFO SenderThread:596546 [file_pusher.py:finish():177] shutting down file pusher +2021-07-14 22:38:46,708 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-14 22:38:46,709 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: poll_exit +2021-07-14 22:38:46,811 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-14 22:38:46,811 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: poll_exit +2021-07-14 22:38:46,913 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-14 22:38:46,913 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: poll_exit +2021-07-14 22:38:47,015 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-14 22:38:47,015 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: poll_exit +2021-07-14 22:38:47,054 INFO Thread-14 :596546 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210714_222920-2p7mu4rm/files/config.yaml +2021-07-14 22:38:47,063 INFO Thread-12 :596546 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210714_222920-2p7mu4rm/files/requirements.txt +2021-07-14 22:38:47,074 INFO Thread-13 :596546 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210714_222920-2p7mu4rm/files/output.log +2021-07-14 22:38:47,095 INFO Thread-15 :596546 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210714_222920-2p7mu4rm/files/wandb-summary.json +2021-07-14 22:38:47,117 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-14 22:38:47,117 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: poll_exit +2021-07-14 22:38:47,219 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-14 22:38:47,219 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: poll_exit +2021-07-14 22:38:47,295 INFO Thread-7 :596546 [sender.py:transition_state():308] send defer: 7 +2021-07-14 22:38:47,295 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: defer +2021-07-14 22:38:47,296 INFO HandlerThread:596546 [handler.py:handle_request_defer():141] handle defer: 7 +2021-07-14 22:38:47,296 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: defer +2021-07-14 22:38:47,296 INFO SenderThread:596546 [sender.py:send_request_defer():304] handle sender defer: 7 +2021-07-14 22:38:47,321 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-14 22:38:47,939 INFO SenderThread:596546 [sender.py:transition_state():308] send defer: 8 +2021-07-14 22:38:47,940 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: poll_exit +2021-07-14 22:38:47,940 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: defer +2021-07-14 22:38:47,940 INFO HandlerThread:596546 [handler.py:handle_request_defer():141] handle defer: 8 +2021-07-14 22:38:47,940 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: defer +2021-07-14 22:38:47,941 INFO SenderThread:596546 [sender.py:send_request_defer():304] handle sender defer: 8 +2021-07-14 22:38:47,941 INFO SenderThread:596546 [sender.py:transition_state():308] send defer: 9 +2021-07-14 22:38:47,941 DEBUG SenderThread:596546 [sender.py:send():179] send: final +2021-07-14 22:38:47,941 DEBUG SenderThread:596546 [sender.py:send():179] send: footer +2021-07-14 22:38:47,942 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: defer +2021-07-14 22:38:47,942 INFO HandlerThread:596546 [handler.py:handle_request_defer():141] handle defer: 9 +2021-07-14 22:38:47,942 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: defer +2021-07-14 22:38:47,942 INFO SenderThread:596546 [sender.py:send_request_defer():304] handle sender defer: 9 +2021-07-14 22:38:48,042 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-14 22:38:48,042 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: poll_exit +2021-07-14 22:38:48,042 INFO SenderThread:596546 [file_pusher.py:join():182] waiting for file pusher +2021-07-14 22:38:48,044 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: get_summary +2021-07-14 22:38:48,044 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: sampled_history +2021-07-14 22:38:48,045 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: shutdown +2021-07-14 22:38:48,045 INFO HandlerThread:596546 [handler.py:finish():638] shutting down handler +2021-07-14 22:38:48,942 INFO WriterThread:596546 [datastore.py:close():288] close: /home/dat/pino-roberta-base/wandb/run-20210714_222920-2p7mu4rm/run-2p7mu4rm.wandb +2021-07-14 22:38:49,042 INFO SenderThread:596546 [sender.py:finish():945] shutting down sender +2021-07-14 22:38:49,043 INFO SenderThread:596546 [file_pusher.py:finish():177] shutting down file pusher +2021-07-14 22:38:49,043 INFO SenderThread:596546 [file_pusher.py:join():182] waiting for file pusher +2021-07-14 22:38:49,045 INFO MainThread:596546 [internal.py:handle_exit():78] Internal process exited diff --git a/wandb/run-20210714_222920-2p7mu4rm/logs/debug.log b/wandb/run-20210714_222920-2p7mu4rm/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..c70afa9831cb8a571fc518a4ff3ca41d197d463e --- /dev/null +++ b/wandb/run-20210714_222920-2p7mu4rm/logs/debug.log @@ -0,0 +1,119 @@ +2021-07-14 22:29:20,510 INFO MainThread:595290 [wandb_setup.py:_flush():69] setting env: {} +2021-07-14 22:29:20,510 INFO MainThread:595290 [wandb_setup.py:_flush():69] setting login settings: {} +2021-07-14 22:29:20,510 INFO MainThread:595290 [wandb_init.py:_log_setup():337] Logging user logs to /home/dat/pino-roberta-base/wandb/run-20210714_222920-2p7mu4rm/logs/debug.log +2021-07-14 22:29:20,510 INFO MainThread:595290 [wandb_init.py:_log_setup():338] Logging internal logs to /home/dat/pino-roberta-base/wandb/run-20210714_222920-2p7mu4rm/logs/debug-internal.log +2021-07-14 22:29:20,511 INFO MainThread:595290 [wandb_init.py:init():370] calling init triggers +2021-07-14 22:29:20,511 INFO MainThread:595290 [wandb_init.py:init():375] wandb.init called with sweep_config: {} +config: {} +2021-07-14 22:29:20,511 INFO MainThread:595290 [wandb_init.py:init():419] starting backend +2021-07-14 22:29:20,511 INFO MainThread:595290 [backend.py:_multiprocessing_setup():70] multiprocessing start_methods=fork,spawn,forkserver, using: spawn +2021-07-14 22:29:20,555 INFO MainThread:595290 [backend.py:ensure_launched():135] starting backend process... +2021-07-14 22:29:20,597 INFO MainThread:595290 [backend.py:ensure_launched():139] started backend process with pid: 596546 +2021-07-14 22:29:20,599 INFO MainThread:595290 [wandb_init.py:init():424] backend started and connected +2021-07-14 22:29:20,602 INFO MainThread:595290 [wandb_init.py:init():472] updated telemetry +2021-07-14 22:29:20,603 INFO MainThread:595290 [wandb_init.py:init():491] communicating current version +2021-07-14 22:29:21,225 INFO MainThread:595290 [wandb_init.py:init():496] got version response +2021-07-14 22:29:21,226 INFO MainThread:595290 [wandb_init.py:init():504] communicating run to backend with 30 second timeout +2021-07-14 22:29:21,391 INFO MainThread:595290 [wandb_init.py:init():529] starting run threads in backend +2021-07-14 22:29:22,548 INFO MainThread:595290 [wandb_run.py:_console_start():1623] atexit reg +2021-07-14 22:29:22,548 INFO MainThread:595290 [wandb_run.py:_redirect():1497] redirect: SettingsConsole.REDIRECT +2021-07-14 22:29:22,549 INFO MainThread:595290 [wandb_run.py:_redirect():1502] Redirecting console. +2021-07-14 22:29:22,551 INFO MainThread:595290 [wandb_run.py:_redirect():1558] Redirects installed. +2021-07-14 22:29:22,551 INFO MainThread:595290 [wandb_init.py:init():554] run started, returning control to user process +2021-07-14 22:29:22,559 INFO MainThread:595290 [wandb_run.py:_config_callback():872] config_cb None None {'output_dir': './', 'overwrite_output_dir': True, 'do_train': False, 'do_eval': False, 'do_predict': False, 'evaluation_strategy': 'IntervalStrategy.NO', 'prediction_loss_only': False, 'per_device_train_batch_size': 2, 'per_device_eval_batch_size': 2, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 4, 'eval_accumulation_steps': None, 'learning_rate': 5e-05, 'weight_decay': 0.0095, 'adam_beta1': 0.9, 'adam_beta2': 0.98, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 5.0, 'max_steps': -1, 'lr_scheduler_type': 'SchedulerType.LINEAR', 'warmup_ratio': 0.0, 'warmup_steps': 5000, 'log_level': -1, 'log_level_replica': -1, 'log_on_each_node': True, 'logging_dir': './runs/Jul14_22-29-13_t1v-n-f5c06ea1-w-0', 'logging_strategy': 'IntervalStrategy.STEPS', 'logging_first_step': False, 'logging_steps': 500, 'save_strategy': 'IntervalStrategy.STEPS', 'save_steps': 20000, 'save_total_limit': 5, 'save_on_each_node': False, 'no_cuda': False, 'seed': 42, 'fp16': False, 'fp16_opt_level': 'O1', 'fp16_backend': 'auto', 'fp16_full_eval': False, 'local_rank': -1, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 20000, 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': './', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'sharded_ddp': [], 'deepspeed': None, 'label_smoothing_factor': 0.0, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'dataloader_pin_memory': True, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'push_to_hub_model_id': '', 'push_to_hub_organization': None, 'push_to_hub_token': None, 'mp_parameters': ''} +2021-07-14 22:29:22,561 INFO MainThread:595290 [wandb_run.py:_config_callback():872] config_cb None None {'model_name_or_path': None, 'model_type': 'big_bird', 'config_name': './', 'tokenizer_name': './', 'cache_dir': None, 'use_fast_tokenizer': True, 'dtype': 'float32', 'save_optimizer': True} +2021-07-14 22:29:22,562 INFO MainThread:595290 [wandb_run.py:_config_callback():872] config_cb None None {'dataset_name': None, 'dataset_config_name': None, 'train_ref_file': None, 'validation_ref_file': None, 'overwrite_cache': False, 'validation_split_percentage': 5, 'max_seq_length': 4096, 'preprocessing_num_workers': 96, 'mlm_probability': 0.15, 'pad_to_max_length': False, 'line_by_line': False, 'max_eval_samples': 2000} +2021-07-14 22:38:43,366 INFO MainThread:595290 [wandb_run.py:_atexit_cleanup():1593] got exitcode: 1 +2021-07-14 22:38:43,368 INFO MainThread:595290 [wandb_run.py:_restore():1565] restore +2021-07-14 22:38:45,826 INFO MainThread:595290 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 1 +} +pusher_stats { + uploaded_bytes: 1447 + total_bytes: 1447 +} + +2021-07-14 22:38:46,025 INFO MainThread:595290 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 1 +} +pusher_stats { + uploaded_bytes: 1447 + total_bytes: 1447 +} + +2021-07-14 22:38:46,607 INFO MainThread:595290 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 4 +} +pusher_stats { + uploaded_bytes: 1447 + total_bytes: 11500 +} + +2021-07-14 22:38:46,709 INFO MainThread:595290 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 1447 + total_bytes: 11502 +} + +2021-07-14 22:38:46,812 INFO MainThread:595290 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 11502 + total_bytes: 11502 +} + +2021-07-14 22:38:46,914 INFO MainThread:595290 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 11502 + total_bytes: 11502 +} + +2021-07-14 22:38:47,016 INFO MainThread:595290 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 11502 + total_bytes: 11502 +} + +2021-07-14 22:38:47,118 INFO MainThread:595290 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 11502 + total_bytes: 11502 +} + +2021-07-14 22:38:47,220 INFO MainThread:595290 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 11502 + total_bytes: 11502 +} + +2021-07-14 22:38:47,940 INFO MainThread:595290 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 11502 + total_bytes: 11502 +} + +2021-07-14 22:38:48,043 INFO MainThread:595290 [wandb_run.py:_wait_for_finish():1715] got exit ret: done: true +exit_result { +} +file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 11502 + total_bytes: 11502 +} + +2021-07-14 22:38:49,338 INFO MainThread:595290 [wandb_run.py:_show_files():1937] logging synced files diff --git a/wandb/run-20210714_222920-2p7mu4rm/run-2p7mu4rm.wandb b/wandb/run-20210714_222920-2p7mu4rm/run-2p7mu4rm.wandb new file mode 100644 index 0000000000000000000000000000000000000000..34762fdff1bedc788e2f9d96f536fec0826aad6c Binary files /dev/null and b/wandb/run-20210714_222920-2p7mu4rm/run-2p7mu4rm.wandb differ diff --git a/wandb/run-20210714_224000-1jvvynqa/files/config.yaml b/wandb/run-20210714_224000-1jvvynqa/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d823fa163ee4898f0e9ebad9f18abeb5b61be0b8 --- /dev/null +++ b/wandb/run-20210714_224000-1jvvynqa/files/config.yaml @@ -0,0 +1,307 @@ +wandb_version: 1 + +_wandb: + desc: null + value: + cli_version: 0.10.33 + framework: huggingface + huggingface_version: 4.9.0.dev0 + is_jupyter_run: false + is_kaggle_kernel: false + python_version: 3.8.10 + t: + 1: + - 3 + - 11 + 2: + - 3 + - 11 + 4: 3.8.10 + 5: 0.10.33 + 6: 4.9.0.dev0 + 8: + - 5 +adafactor: + desc: null + value: false +adam_beta1: + desc: null + value: 0.9 +adam_beta2: + desc: null + value: 0.98 +adam_epsilon: + desc: null + value: 1.0e-08 +cache_dir: + desc: null + value: null +config_name: + desc: null + value: ./ +dataloader_drop_last: + desc: null + value: false +dataloader_num_workers: + desc: null + value: 0 +dataloader_pin_memory: + desc: null + value: true +dataset_config_name: + desc: null + value: null +dataset_name: + desc: null + value: null +ddp_find_unused_parameters: + desc: null + value: null +debug: + desc: null + value: [] +deepspeed: + desc: null + value: null +disable_tqdm: + desc: null + value: false +do_eval: + desc: null + value: false +do_predict: + desc: null + value: false +do_train: + desc: null + value: false +dtype: + desc: null + value: float32 +eval_accumulation_steps: + desc: null + value: null +eval_steps: + desc: null + value: 20000 +evaluation_strategy: + desc: null + value: IntervalStrategy.NO +fp16: + desc: null + value: false +fp16_backend: + desc: null + value: auto +fp16_full_eval: + desc: null + value: false +fp16_opt_level: + desc: null + value: O1 +gradient_accumulation_steps: + desc: null + value: 8 +greater_is_better: + desc: null + value: null +group_by_length: + desc: null + value: false +ignore_data_skip: + desc: null + value: false +label_names: + desc: null + value: null +label_smoothing_factor: + desc: null + value: 0.0 +learning_rate: + desc: null + value: 5.0e-05 +length_column_name: + desc: null + value: length +line_by_line: + desc: null + value: false +load_best_model_at_end: + desc: null + value: false +local_rank: + desc: null + value: -1 +log_level: + desc: null + value: -1 +log_level_replica: + desc: null + value: -1 +log_on_each_node: + desc: null + value: true +logging_dir: + desc: null + value: ./runs/Jul14_22-39-51_t1v-n-f5c06ea1-w-0 +logging_first_step: + desc: null + value: false +logging_steps: + desc: null + value: 500 +logging_strategy: + desc: null + value: IntervalStrategy.STEPS +lr_scheduler_type: + desc: null + value: SchedulerType.LINEAR +max_eval_samples: + desc: null + value: 2000 +max_grad_norm: + desc: null + value: 1.0 +max_seq_length: + desc: null + value: 4096 +max_steps: + desc: null + value: -1 +metric_for_best_model: + desc: null + value: null +mlm_probability: + desc: null + value: 0.15 +model_name_or_path: + desc: null + value: null +model_type: + desc: null + value: big_bird +mp_parameters: + desc: null + value: '' +no_cuda: + desc: null + value: false +num_train_epochs: + desc: null + value: 5.0 +output_dir: + desc: null + value: ./ +overwrite_cache: + desc: null + value: false +overwrite_output_dir: + desc: null + value: true +pad_to_max_length: + desc: null + value: false +past_index: + desc: null + value: -1 +per_device_eval_batch_size: + desc: null + value: 2 +per_device_train_batch_size: + desc: null + value: 2 +per_gpu_eval_batch_size: + desc: null + value: null +per_gpu_train_batch_size: + desc: null + value: null +prediction_loss_only: + desc: null + value: false +preprocessing_num_workers: + desc: null + value: 96 +push_to_hub: + desc: null + value: true +push_to_hub_model_id: + desc: null + value: '' +push_to_hub_organization: + desc: null + value: null +push_to_hub_token: + desc: null + value: null +remove_unused_columns: + desc: null + value: true +report_to: + desc: null + value: + - tensorboard + - wandb +resume_from_checkpoint: + desc: null + value: null +run_name: + desc: null + value: ./ +save_on_each_node: + desc: null + value: false +save_optimizer: + desc: null + value: true +save_steps: + desc: null + value: 20000 +save_strategy: + desc: null + value: IntervalStrategy.STEPS +save_total_limit: + desc: null + value: 5 +seed: + desc: null + value: 42 +sharded_ddp: + desc: null + value: [] +skip_memory_metrics: + desc: null + value: true +tokenizer_name: + desc: null + value: ./ +tpu_metrics_debug: + desc: null + value: false +tpu_num_cores: + desc: null + value: null +train_ref_file: + desc: null + value: null +use_fast_tokenizer: + desc: null + value: true +use_legacy_prediction_loop: + desc: null + value: false +validation_ref_file: + desc: null + value: null +validation_split_percentage: + desc: null + value: 5 +warmup_ratio: + desc: null + value: 0.0 +warmup_steps: + desc: null + value: 10000 +weight_decay: + desc: null + value: 0.0095 diff --git a/wandb/run-20210714_224000-1jvvynqa/files/output.log b/wandb/run-20210714_224000-1jvvynqa/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..09411fe8d524ae3a1ac8837ae3e403c28077a1ab --- /dev/null +++ b/wandb/run-20210714_224000-1jvvynqa/files/output.log @@ -0,0 +1,39 @@ +/home/dat/pino/lib/python3.8/site-packages/jax/_src/numpy/lax_numpy.py:3132: UserWarning: Explicitly requested dtype requested in zeros is not available, and will be truncated to dtype int32. To enable more dtypes, set the jax_enable_x64 configuration option or the JAX_ENABLE_X64 shell environment variable. See https://github.com/google/jax#current-gotchas for more. + lax._check_user_dtype_supported(dtype, "zeros") +/home/dat/pino/lib/python3.8/site-packages/jax/lib/xla_bridge.py:386: UserWarning: jax.host_count has been renamed to jax.process_count. This alias will eventually be removed; please update your code. + warnings.warn( +/home/dat/pino/lib/python3.8/site-packages/jax/lib/xla_bridge.py:373: UserWarning: jax.host_id has been renamed to jax.process_index. This alias will eventually be removed; please update your code. + warnings.warn( +Epoch ... (1/5): 0%| | 0/5 [00:00 + num_train_samples = len(tokenized_datasets["train"]) + File "/home/dat/pino/lib/python3.8/site-packages/jax/_src/traceback_util.py", line 183, in reraise_with_filtered_traceback + return fun(*args, **kwargs) + File "/home/dat/pino/lib/python3.8/site-packages/jax/_src/api.py", line 1669, in f_pmapped + out = pxla.xla_pmap( + File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 1620, in bind + return call_bind(self, fun, *args, **params) + File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 1551, in call_bind + outs = primitive.process(top_trace, fun, tracers, params) + File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 1623, in process + return trace.process_map(self, fun, tracers, params) + File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 606, in process_call + return primitive.impl(f, *tracers, **params) + File "/home/dat/pino/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 637, in xla_pmap_impl + return compiled_fun(*args) + File "/home/dat/pino/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 1159, in execute_replicated + out_bufs = compiled.execute_sharded_on_local_devices(input_bufs) +jax._src.traceback_util.UnfilteredStackTrace: RuntimeError: Resource exhausted: Attempting to reserve 12.60G at the bottom of memory. That was not possible. There are 5.86G free, 0B reserved, and 5.65G reservable.: while running replica 0 and partition 0 of a replicated computation (other replicas may have failed as well). +The stack trace below excludes JAX-internal frames. +The preceding is the original exception that occurred, unmodified. +-------------------- +The above exception was the direct cause of the following exception: +Traceback (most recent call last): + File "./run_mlm_flax.py", line 804, in + num_train_samples = len(tokenized_datasets["train"]) + File "/home/dat/pino/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 1159, in execute_replicated + out_bufs = compiled.execute_sharded_on_local_devices(input_bufs) +RuntimeError: Resource exhausted: Attempting to reserve 12.60G at the bottom of memory. That was not possible. There are 5.86G free, 0B reserved, and 5.65G reservable.: while running replica 0 and partition 0 of a replicated computation (other replicas may have failed as well). \ No newline at end of file diff --git a/wandb/run-20210714_224000-1jvvynqa/files/requirements.txt b/wandb/run-20210714_224000-1jvvynqa/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..554e3a34bc91714a8462f65295a12e9a04537637 --- /dev/null +++ b/wandb/run-20210714_224000-1jvvynqa/files/requirements.txt @@ -0,0 +1,94 @@ +absl-py==0.13.0 +aiohttp==3.7.4.post0 +astunparse==1.6.3 +async-timeout==3.0.1 +attrs==21.2.0 +cachetools==4.2.2 +certifi==2021.5.30 +chardet==4.0.0 +charset-normalizer==2.0.1 +chex==0.0.8 +click==8.0.1 +configparser==5.0.2 +cycler==0.10.0 +datasets==1.9.1.dev0 +dill==0.3.4 +dm-tree==0.1.6 +docker-pycreds==0.4.0 +filelock==3.0.12 +flatbuffers==1.12 +flax==0.3.4 +fsspec==2021.7.0 +gast==0.4.0 +gitdb==4.0.7 +gitpython==3.1.18 +google-auth-oauthlib==0.4.4 +google-auth==1.32.1 +google-pasta==0.2.0 +grpcio==1.34.1 +h5py==3.1.0 +huggingface-hub==0.0.12 +idna==3.2 +install==1.3.4 +jax==0.2.17 +jaxlib==0.1.68 +joblib==1.0.1 +keras-nightly==2.5.0.dev2021032900 +keras-preprocessing==1.1.2 +kiwisolver==1.3.1 +libtpu-nightly==0.1.dev20210615 +markdown==3.3.4 +matplotlib==3.4.2 +msgpack==1.0.2 +multidict==5.1.0 +multiprocess==0.70.12.2 +numpy==1.19.5 +oauthlib==3.1.1 +opt-einsum==3.3.0 +optax==0.0.9 +packaging==21.0 +pandas==1.3.0 +pathtools==0.1.2 +pillow==8.3.1 +pip==20.0.2 +pkg-resources==0.0.0 +promise==2.3 +protobuf==3.17.3 +psutil==5.8.0 +pyarrow==4.0.1 +pyasn1-modules==0.2.8 +pyasn1==0.4.8 +pyparsing==2.4.7 +python-dateutil==2.8.1 +pytz==2021.1 +pyyaml==5.4.1 +regex==2021.7.6 +requests-oauthlib==1.3.0 +requests==2.26.0 +rsa==4.7.2 +sacremoses==0.0.45 +scipy==1.7.0 +sentry-sdk==1.3.0 +setuptools==44.0.0 +shortuuid==1.0.1 +six==1.15.0 +smmap==4.0.0 +subprocess32==3.5.4 +tensorboard-data-server==0.6.1 +tensorboard-plugin-wit==1.8.0 +tensorboard==2.5.0 +tensorflow-estimator==2.5.0 +tensorflow==2.5.0 +termcolor==1.1.0 +tokenizers==0.10.3 +toolz==0.11.1 +tqdm==4.61.2 +transformers==4.9.0.dev0 +typing-extensions==3.7.4.3 +urllib3==1.26.6 +wandb==0.10.33 +werkzeug==2.0.1 +wheel==0.36.2 +wrapt==1.12.1 +xxhash==2.0.2 +yarl==1.6.3 \ No newline at end of file diff --git a/wandb/run-20210714_224000-1jvvynqa/files/wandb-metadata.json b/wandb/run-20210714_224000-1jvvynqa/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..a5e23ec099d4b5c32a62f61941c1f3d3d61684af --- /dev/null +++ b/wandb/run-20210714_224000-1jvvynqa/files/wandb-metadata.json @@ -0,0 +1,47 @@ +{ + "os": "Linux-5.4.0-1043-gcp-x86_64-with-glibc2.29", + "python": "3.8.10", + "heartbeatAt": "2021-07-14T22:40:02.351628", + "startedAt": "2021-07-14T22:40:00.340218", + "docker": null, + "cpu_count": 96, + "cuda": null, + "args": [ + "--push_to_hub", + "--output_dir=./", + "--model_type=big_bird", + "--config_name=./", + "--tokenizer_name=./", + "--max_seq_length=4096", + "--weight_decay=0.0095", + "--warmup_steps=10000", + "--overwrite_output_dir", + "--adam_beta1=0.9", + "--adam_beta2=0.98", + "--logging_steps=500", + "--eval_steps=20000", + "--num_train_epochs=5", + "--preprocessing_num_workers=96", + "--save_steps=20000", + "--learning_rate=5e-5", + "--per_device_train_batch_size=2", + "--per_device_eval_batch_size=2", + "--save_total_limit=5", + "--max_eval_samples=2000", + "--overwrite_cache", + "False", + "--gradient_accumulation_steps=8" + ], + "state": "running", + "program": "./run_mlm_flax.py", + "codePath": "run_mlm_flax.py", + "git": { + "remote": "https://huggingface.co/flax-community/pino-roberta-base", + "commit": "de71755314d4aa3ab182072c6f0be3de9798a66b" + }, + "email": null, + "root": "/home/dat/pino-roberta-base", + "host": "t1v-n-f5c06ea1-w-0", + "username": "dat", + "executable": "/home/dat/pino/bin/python" +} diff --git a/wandb/run-20210714_224000-1jvvynqa/files/wandb-summary.json b/wandb/run-20210714_224000-1jvvynqa/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/wandb/run-20210714_224000-1jvvynqa/files/wandb-summary.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/wandb/run-20210714_224000-1jvvynqa/logs/debug-internal.log b/wandb/run-20210714_224000-1jvvynqa/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..7a7160558d56b3cced0a928f00fad219ab584bc4 --- /dev/null +++ b/wandb/run-20210714_224000-1jvvynqa/logs/debug-internal.log @@ -0,0 +1,236 @@ +2021-07-14 22:40:01,029 INFO MainThread:598803 [internal.py:wandb_internal():88] W&B internal server running at pid: 598803, started at: 2021-07-14 22:40:01.029595 +2021-07-14 22:40:01,031 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: check_version +2021-07-14 22:40:01,032 INFO WriterThread:598803 [datastore.py:open_for_write():80] open: /home/dat/pino-roberta-base/wandb/run-20210714_224000-1jvvynqa/run-1jvvynqa.wandb +2021-07-14 22:40:01,033 DEBUG SenderThread:598803 [sender.py:send():179] send: header +2021-07-14 22:40:01,033 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: check_version +2021-07-14 22:40:01,069 DEBUG SenderThread:598803 [sender.py:send():179] send: run +2021-07-14 22:40:01,239 INFO SenderThread:598803 [dir_watcher.py:__init__():168] watching files in: /home/dat/pino-roberta-base/wandb/run-20210714_224000-1jvvynqa/files +2021-07-14 22:40:01,240 INFO SenderThread:598803 [sender.py:_start_run_threads():716] run started: 1jvvynqa with start time 1626302400 +2021-07-14 22:40:01,240 DEBUG SenderThread:598803 [sender.py:send():179] send: summary +2021-07-14 22:40:01,240 INFO SenderThread:598803 [sender.py:_save_file():841] saving file wandb-summary.json with policy end +2021-07-14 22:40:01,240 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: run_start +2021-07-14 22:40:02,242 INFO Thread-8 :598803 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210714_224000-1jvvynqa/files/wandb-summary.json +2021-07-14 22:40:02,351 DEBUG HandlerThread:598803 [meta.py:__init__():39] meta init +2021-07-14 22:40:02,351 DEBUG HandlerThread:598803 [meta.py:__init__():53] meta init done +2021-07-14 22:40:02,351 DEBUG HandlerThread:598803 [meta.py:probe():210] probe +2021-07-14 22:40:02,352 DEBUG HandlerThread:598803 [meta.py:_setup_git():200] setup git +2021-07-14 22:40:02,381 DEBUG HandlerThread:598803 [meta.py:_setup_git():207] setup git done +2021-07-14 22:40:02,381 DEBUG HandlerThread:598803 [meta.py:_save_pip():57] save pip +2021-07-14 22:40:02,382 DEBUG HandlerThread:598803 [meta.py:_save_pip():71] save pip done +2021-07-14 22:40:02,382 DEBUG HandlerThread:598803 [meta.py:probe():252] probe done +2021-07-14 22:40:02,385 DEBUG SenderThread:598803 [sender.py:send():179] send: files +2021-07-14 22:40:02,385 INFO SenderThread:598803 [sender.py:_save_file():841] saving file wandb-metadata.json with policy now +2021-07-14 22:40:02,390 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 22:40:02,391 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: stop_status +2021-07-14 22:40:02,517 DEBUG SenderThread:598803 [sender.py:send():179] send: config +2021-07-14 22:40:02,517 DEBUG SenderThread:598803 [sender.py:send():179] send: config +2021-07-14 22:40:02,517 DEBUG SenderThread:598803 [sender.py:send():179] send: config +2021-07-14 22:40:02,814 INFO Thread-11 :598803 [upload_job.py:push():137] Uploaded file /tmp/tmp43so6xcswandb/116losze-wandb-metadata.json +2021-07-14 22:40:03,240 INFO Thread-8 :598803 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210714_224000-1jvvynqa/files/requirements.txt +2021-07-14 22:40:03,241 INFO Thread-8 :598803 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210714_224000-1jvvynqa/files/wandb-metadata.json +2021-07-14 22:40:03,241 INFO Thread-8 :598803 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210714_224000-1jvvynqa/files/output.log +2021-07-14 22:40:17,246 INFO Thread-8 :598803 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_224000-1jvvynqa/files/output.log +2021-07-14 22:40:17,518 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 22:40:17,519 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: stop_status +2021-07-14 22:40:19,247 INFO Thread-8 :598803 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_224000-1jvvynqa/files/output.log +2021-07-14 22:40:30,436 DEBUG SenderThread:598803 [sender.py:send():179] send: stats +2021-07-14 22:40:32,253 INFO Thread-8 :598803 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_224000-1jvvynqa/files/config.yaml +2021-07-14 22:40:32,650 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 22:40:32,650 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: stop_status +2021-07-14 22:40:47,784 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 22:40:47,784 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: stop_status +2021-07-14 22:41:00,509 DEBUG SenderThread:598803 [sender.py:send():179] send: stats +2021-07-14 22:41:02,914 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 22:41:02,914 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: stop_status +2021-07-14 22:41:18,045 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 22:41:18,045 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: stop_status +2021-07-14 22:41:30,568 DEBUG SenderThread:598803 [sender.py:send():179] send: stats +2021-07-14 22:41:33,175 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 22:41:33,175 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: stop_status +2021-07-14 22:41:48,307 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 22:41:48,307 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: stop_status +2021-07-14 22:42:00,641 DEBUG SenderThread:598803 [sender.py:send():179] send: stats +2021-07-14 22:42:03,441 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 22:42:03,442 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: stop_status +2021-07-14 22:42:18,571 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 22:42:18,572 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: stop_status +2021-07-14 22:42:30,706 DEBUG SenderThread:598803 [sender.py:send():179] send: stats +2021-07-14 22:42:33,702 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 22:42:33,702 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: stop_status +2021-07-14 22:42:48,848 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 22:42:48,848 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: stop_status +2021-07-14 22:43:00,777 DEBUG SenderThread:598803 [sender.py:send():179] send: stats +2021-07-14 22:43:03,978 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 22:43:03,979 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: stop_status +2021-07-14 22:43:19,111 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 22:43:19,111 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: stop_status +2021-07-14 22:43:30,850 DEBUG SenderThread:598803 [sender.py:send():179] send: stats +2021-07-14 22:43:34,242 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 22:43:34,242 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: stop_status +2021-07-14 22:43:49,373 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 22:43:49,374 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: stop_status +2021-07-14 22:44:00,923 DEBUG SenderThread:598803 [sender.py:send():179] send: stats +2021-07-14 22:44:04,513 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 22:44:04,513 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: stop_status +2021-07-14 22:44:19,644 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 22:44:19,644 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: stop_status +2021-07-14 22:44:30,999 DEBUG SenderThread:598803 [sender.py:send():179] send: stats +2021-07-14 22:44:34,774 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 22:44:34,774 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: stop_status +2021-07-14 22:44:49,906 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 22:44:49,906 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: stop_status +2021-07-14 22:45:01,074 DEBUG SenderThread:598803 [sender.py:send():179] send: stats +2021-07-14 22:45:05,077 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 22:45:05,077 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: stop_status +2021-07-14 22:45:20,207 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 22:45:20,208 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: stop_status +2021-07-14 22:45:31,140 DEBUG SenderThread:598803 [sender.py:send():179] send: stats +2021-07-14 22:45:35,338 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 22:45:35,339 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: stop_status +2021-07-14 22:45:50,469 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 22:45:50,470 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: stop_status +2021-07-14 22:46:01,203 DEBUG SenderThread:598803 [sender.py:send():179] send: stats +2021-07-14 22:46:05,601 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 22:46:05,601 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: stop_status +2021-07-14 22:46:20,734 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 22:46:20,734 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: stop_status +2021-07-14 22:46:31,276 DEBUG SenderThread:598803 [sender.py:send():179] send: stats +2021-07-14 22:46:35,865 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 22:46:35,865 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: stop_status +2021-07-14 22:46:51,019 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 22:46:51,020 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: stop_status +2021-07-14 22:47:01,353 DEBUG SenderThread:598803 [sender.py:send():179] send: stats +2021-07-14 22:47:06,154 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 22:47:06,154 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: stop_status +2021-07-14 22:47:21,290 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 22:47:21,290 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: stop_status +2021-07-14 22:47:31,428 DEBUG SenderThread:598803 [sender.py:send():179] send: stats +2021-07-14 22:47:36,424 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 22:47:36,424 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: stop_status +2021-07-14 22:47:51,555 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 22:47:51,555 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: stop_status +2021-07-14 22:48:01,502 DEBUG SenderThread:598803 [sender.py:send():179] send: stats +2021-07-14 22:48:06,777 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 22:48:06,777 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: stop_status +2021-07-14 22:48:07,431 INFO Thread-8 :598803 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_224000-1jvvynqa/files/output.log +2021-07-14 22:48:21,934 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 22:48:21,935 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: stop_status +2021-07-14 22:48:31,579 DEBUG SenderThread:598803 [sender.py:send():179] send: stats +2021-07-14 22:48:37,091 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 22:48:37,091 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: stop_status +2021-07-14 22:48:52,233 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 22:48:52,234 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: stop_status +2021-07-14 22:49:01,665 DEBUG SenderThread:598803 [sender.py:send():179] send: stats +2021-07-14 22:49:07,381 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 22:49:07,382 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: stop_status +2021-07-14 22:49:22,521 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 22:49:22,521 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: stop_status +2021-07-14 22:49:31,465 INFO Thread-8 :598803 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_224000-1jvvynqa/files/output.log +2021-07-14 22:49:31,743 DEBUG SenderThread:598803 [sender.py:send():179] send: stats +2021-07-14 22:49:32,262 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-14 22:49:32,263 DEBUG SenderThread:598803 [sender.py:send():179] send: telemetry +2021-07-14 22:49:32,263 DEBUG SenderThread:598803 [sender.py:send():179] send: exit +2021-07-14 22:49:32,263 INFO SenderThread:598803 [sender.py:send_exit():287] handling exit code: 1 +2021-07-14 22:49:32,263 INFO SenderThread:598803 [sender.py:send_exit():295] send defer +2021-07-14 22:49:32,263 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: poll_exit +2021-07-14 22:49:32,264 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: defer +2021-07-14 22:49:32,264 INFO HandlerThread:598803 [handler.py:handle_request_defer():141] handle defer: 0 +2021-07-14 22:49:32,264 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: defer +2021-07-14 22:49:32,264 INFO SenderThread:598803 [sender.py:send_request_defer():304] handle sender defer: 0 +2021-07-14 22:49:32,264 INFO SenderThread:598803 [sender.py:transition_state():308] send defer: 1 +2021-07-14 22:49:32,265 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: defer +2021-07-14 22:49:32,265 INFO HandlerThread:598803 [handler.py:handle_request_defer():141] handle defer: 1 +2021-07-14 22:49:32,345 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: defer +2021-07-14 22:49:32,345 INFO SenderThread:598803 [sender.py:send_request_defer():304] handle sender defer: 1 +2021-07-14 22:49:32,346 INFO SenderThread:598803 [sender.py:transition_state():308] send defer: 2 +2021-07-14 22:49:32,346 DEBUG SenderThread:598803 [sender.py:send():179] send: stats +2021-07-14 22:49:32,346 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: defer +2021-07-14 22:49:32,346 INFO HandlerThread:598803 [handler.py:handle_request_defer():141] handle defer: 2 +2021-07-14 22:49:32,346 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: defer +2021-07-14 22:49:32,347 INFO SenderThread:598803 [sender.py:send_request_defer():304] handle sender defer: 2 +2021-07-14 22:49:32,347 INFO SenderThread:598803 [sender.py:transition_state():308] send defer: 3 +2021-07-14 22:49:32,347 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: defer +2021-07-14 22:49:32,347 INFO HandlerThread:598803 [handler.py:handle_request_defer():141] handle defer: 3 +2021-07-14 22:49:32,347 DEBUG SenderThread:598803 [sender.py:send():179] send: summary +2021-07-14 22:49:32,347 INFO SenderThread:598803 [sender.py:_save_file():841] saving file wandb-summary.json with policy end +2021-07-14 22:49:32,347 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: defer +2021-07-14 22:49:32,347 INFO SenderThread:598803 [sender.py:send_request_defer():304] handle sender defer: 3 +2021-07-14 22:49:32,347 INFO SenderThread:598803 [sender.py:transition_state():308] send defer: 4 +2021-07-14 22:49:32,348 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: defer +2021-07-14 22:49:32,348 INFO HandlerThread:598803 [handler.py:handle_request_defer():141] handle defer: 4 +2021-07-14 22:49:32,348 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: defer +2021-07-14 22:49:32,348 INFO SenderThread:598803 [sender.py:send_request_defer():304] handle sender defer: 4 +2021-07-14 22:49:32,366 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-14 22:49:32,466 INFO Thread-8 :598803 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_224000-1jvvynqa/files/wandb-summary.json +2021-07-14 22:49:32,466 INFO Thread-8 :598803 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_224000-1jvvynqa/files/output.log +2021-07-14 22:49:32,534 INFO SenderThread:598803 [sender.py:transition_state():308] send defer: 5 +2021-07-14 22:49:32,534 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: poll_exit +2021-07-14 22:49:32,535 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: defer +2021-07-14 22:49:32,535 INFO HandlerThread:598803 [handler.py:handle_request_defer():141] handle defer: 5 +2021-07-14 22:49:32,535 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: defer +2021-07-14 22:49:32,535 INFO SenderThread:598803 [sender.py:send_request_defer():304] handle sender defer: 5 +2021-07-14 22:49:32,535 INFO SenderThread:598803 [dir_watcher.py:finish():282] shutting down directory watcher +2021-07-14 22:49:32,636 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-14 22:49:33,466 INFO Thread-8 :598803 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_224000-1jvvynqa/files/config.yaml +2021-07-14 22:49:33,467 INFO SenderThread:598803 [dir_watcher.py:finish():312] scan: /home/dat/pino-roberta-base/wandb/run-20210714_224000-1jvvynqa/files +2021-07-14 22:49:33,467 INFO SenderThread:598803 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210714_224000-1jvvynqa/files/requirements.txt requirements.txt +2021-07-14 22:49:33,467 INFO SenderThread:598803 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210714_224000-1jvvynqa/files/output.log output.log +2021-07-14 22:49:33,467 INFO SenderThread:598803 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210714_224000-1jvvynqa/files/wandb-metadata.json wandb-metadata.json +2021-07-14 22:49:33,468 INFO SenderThread:598803 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210714_224000-1jvvynqa/files/config.yaml config.yaml +2021-07-14 22:49:33,468 INFO SenderThread:598803 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210714_224000-1jvvynqa/files/wandb-summary.json wandb-summary.json +2021-07-14 22:49:33,468 INFO SenderThread:598803 [sender.py:transition_state():308] send defer: 6 +2021-07-14 22:49:33,468 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: poll_exit +2021-07-14 22:49:33,472 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: defer +2021-07-14 22:49:33,472 INFO HandlerThread:598803 [handler.py:handle_request_defer():141] handle defer: 6 +2021-07-14 22:49:33,474 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: defer +2021-07-14 22:49:33,474 INFO SenderThread:598803 [sender.py:send_request_defer():304] handle sender defer: 6 +2021-07-14 22:49:33,475 INFO SenderThread:598803 [file_pusher.py:finish():177] shutting down file pusher +2021-07-14 22:49:33,574 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-14 22:49:33,574 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: poll_exit +2021-07-14 22:49:33,676 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-14 22:49:33,676 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: poll_exit +2021-07-14 22:49:33,778 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-14 22:49:33,778 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: poll_exit +2021-07-14 22:49:33,880 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-14 22:49:33,880 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: poll_exit +2021-07-14 22:49:33,982 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-14 22:49:33,982 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: poll_exit +2021-07-14 22:49:33,989 INFO Thread-15 :598803 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210714_224000-1jvvynqa/files/wandb-summary.json +2021-07-14 22:49:33,994 INFO Thread-14 :598803 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210714_224000-1jvvynqa/files/config.yaml +2021-07-14 22:49:33,995 INFO Thread-13 :598803 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210714_224000-1jvvynqa/files/output.log +2021-07-14 22:49:33,997 INFO Thread-12 :598803 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210714_224000-1jvvynqa/files/requirements.txt +2021-07-14 22:49:34,084 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-14 22:49:34,085 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: poll_exit +2021-07-14 22:49:34,186 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-14 22:49:34,187 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: poll_exit +2021-07-14 22:49:34,198 INFO Thread-7 :598803 [sender.py:transition_state():308] send defer: 7 +2021-07-14 22:49:34,198 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: defer +2021-07-14 22:49:34,198 INFO HandlerThread:598803 [handler.py:handle_request_defer():141] handle defer: 7 +2021-07-14 22:49:34,198 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: defer +2021-07-14 22:49:34,198 INFO SenderThread:598803 [sender.py:send_request_defer():304] handle sender defer: 7 +2021-07-14 22:49:34,288 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-14 22:49:34,464 INFO SenderThread:598803 [sender.py:transition_state():308] send defer: 8 +2021-07-14 22:49:34,464 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: poll_exit +2021-07-14 22:49:34,465 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: defer +2021-07-14 22:49:34,465 INFO HandlerThread:598803 [handler.py:handle_request_defer():141] handle defer: 8 +2021-07-14 22:49:34,465 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: defer +2021-07-14 22:49:34,465 INFO SenderThread:598803 [sender.py:send_request_defer():304] handle sender defer: 8 +2021-07-14 22:49:34,466 INFO SenderThread:598803 [sender.py:transition_state():308] send defer: 9 +2021-07-14 22:49:34,466 DEBUG SenderThread:598803 [sender.py:send():179] send: final +2021-07-14 22:49:34,466 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: defer +2021-07-14 22:49:34,466 DEBUG SenderThread:598803 [sender.py:send():179] send: footer +2021-07-14 22:49:34,466 INFO HandlerThread:598803 [handler.py:handle_request_defer():141] handle defer: 9 +2021-07-14 22:49:34,467 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: defer +2021-07-14 22:49:34,467 INFO SenderThread:598803 [sender.py:send_request_defer():304] handle sender defer: 9 +2021-07-14 22:49:34,567 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: poll_exit +2021-07-14 22:49:34,567 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: poll_exit +2021-07-14 22:49:34,567 INFO SenderThread:598803 [file_pusher.py:join():182] waiting for file pusher +2021-07-14 22:49:34,569 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: get_summary +2021-07-14 22:49:34,570 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: sampled_history +2021-07-14 22:49:34,570 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: shutdown +2021-07-14 22:49:34,570 INFO HandlerThread:598803 [handler.py:finish():638] shutting down handler +2021-07-14 22:49:35,467 INFO WriterThread:598803 [datastore.py:close():288] close: /home/dat/pino-roberta-base/wandb/run-20210714_224000-1jvvynqa/run-1jvvynqa.wandb +2021-07-14 22:49:35,568 INFO SenderThread:598803 [sender.py:finish():945] shutting down sender +2021-07-14 22:49:35,568 INFO SenderThread:598803 [file_pusher.py:finish():177] shutting down file pusher +2021-07-14 22:49:35,568 INFO SenderThread:598803 [file_pusher.py:join():182] waiting for file pusher +2021-07-14 22:49:35,570 INFO MainThread:598803 [internal.py:handle_exit():78] Internal process exited diff --git a/wandb/run-20210714_224000-1jvvynqa/logs/debug.log b/wandb/run-20210714_224000-1jvvynqa/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..664ba2f52ca342d1fae118f04bc9547f4439259b --- /dev/null +++ b/wandb/run-20210714_224000-1jvvynqa/logs/debug.log @@ -0,0 +1,127 @@ +2021-07-14 22:40:00,341 INFO MainThread:597542 [wandb_setup.py:_flush():69] setting env: {} +2021-07-14 22:40:00,341 INFO MainThread:597542 [wandb_setup.py:_flush():69] setting login settings: {} +2021-07-14 22:40:00,341 INFO MainThread:597542 [wandb_init.py:_log_setup():337] Logging user logs to /home/dat/pino-roberta-base/wandb/run-20210714_224000-1jvvynqa/logs/debug.log +2021-07-14 22:40:00,341 INFO MainThread:597542 [wandb_init.py:_log_setup():338] Logging internal logs to /home/dat/pino-roberta-base/wandb/run-20210714_224000-1jvvynqa/logs/debug-internal.log +2021-07-14 22:40:00,342 INFO MainThread:597542 [wandb_init.py:init():370] calling init triggers +2021-07-14 22:40:00,342 INFO MainThread:597542 [wandb_init.py:init():375] wandb.init called with sweep_config: {} +config: {} +2021-07-14 22:40:00,342 INFO MainThread:597542 [wandb_init.py:init():419] starting backend +2021-07-14 22:40:00,342 INFO MainThread:597542 [backend.py:_multiprocessing_setup():70] multiprocessing start_methods=fork,spawn,forkserver, using: spawn +2021-07-14 22:40:00,388 INFO MainThread:597542 [backend.py:ensure_launched():135] starting backend process... +2021-07-14 22:40:00,433 INFO MainThread:597542 [backend.py:ensure_launched():139] started backend process with pid: 598803 +2021-07-14 22:40:00,435 INFO MainThread:597542 [wandb_init.py:init():424] backend started and connected +2021-07-14 22:40:00,438 INFO MainThread:597542 [wandb_init.py:init():472] updated telemetry +2021-07-14 22:40:00,439 INFO MainThread:597542 [wandb_init.py:init():491] communicating current version +2021-07-14 22:40:01,068 INFO MainThread:597542 [wandb_init.py:init():496] got version response +2021-07-14 22:40:01,068 INFO MainThread:597542 [wandb_init.py:init():504] communicating run to backend with 30 second timeout +2021-07-14 22:40:01,239 INFO MainThread:597542 [wandb_init.py:init():529] starting run threads in backend +2021-07-14 22:40:02,388 INFO MainThread:597542 [wandb_run.py:_console_start():1623] atexit reg +2021-07-14 22:40:02,389 INFO MainThread:597542 [wandb_run.py:_redirect():1497] redirect: SettingsConsole.REDIRECT +2021-07-14 22:40:02,389 INFO MainThread:597542 [wandb_run.py:_redirect():1502] Redirecting console. +2021-07-14 22:40:02,391 INFO MainThread:597542 [wandb_run.py:_redirect():1558] Redirects installed. +2021-07-14 22:40:02,391 INFO MainThread:597542 [wandb_init.py:init():554] run started, returning control to user process +2021-07-14 22:40:02,399 INFO MainThread:597542 [wandb_run.py:_config_callback():872] config_cb None None {'output_dir': './', 'overwrite_output_dir': True, 'do_train': False, 'do_eval': False, 'do_predict': False, 'evaluation_strategy': 'IntervalStrategy.NO', 'prediction_loss_only': False, 'per_device_train_batch_size': 2, 'per_device_eval_batch_size': 2, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 8, 'eval_accumulation_steps': None, 'learning_rate': 5e-05, 'weight_decay': 0.0095, 'adam_beta1': 0.9, 'adam_beta2': 0.98, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 5.0, 'max_steps': -1, 'lr_scheduler_type': 'SchedulerType.LINEAR', 'warmup_ratio': 0.0, 'warmup_steps': 10000, 'log_level': -1, 'log_level_replica': -1, 'log_on_each_node': True, 'logging_dir': './runs/Jul14_22-39-51_t1v-n-f5c06ea1-w-0', 'logging_strategy': 'IntervalStrategy.STEPS', 'logging_first_step': False, 'logging_steps': 500, 'save_strategy': 'IntervalStrategy.STEPS', 'save_steps': 20000, 'save_total_limit': 5, 'save_on_each_node': False, 'no_cuda': False, 'seed': 42, 'fp16': False, 'fp16_opt_level': 'O1', 'fp16_backend': 'auto', 'fp16_full_eval': False, 'local_rank': -1, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 20000, 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': './', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'sharded_ddp': [], 'deepspeed': None, 'label_smoothing_factor': 0.0, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'dataloader_pin_memory': True, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'push_to_hub_model_id': '', 'push_to_hub_organization': None, 'push_to_hub_token': None, 'mp_parameters': ''} +2021-07-14 22:40:02,400 INFO MainThread:597542 [wandb_run.py:_config_callback():872] config_cb None None {'model_name_or_path': None, 'model_type': 'big_bird', 'config_name': './', 'tokenizer_name': './', 'cache_dir': None, 'use_fast_tokenizer': True, 'dtype': 'float32', 'save_optimizer': True} +2021-07-14 22:40:02,402 INFO MainThread:597542 [wandb_run.py:_config_callback():872] config_cb None None {'dataset_name': None, 'dataset_config_name': None, 'train_ref_file': None, 'validation_ref_file': None, 'overwrite_cache': False, 'validation_split_percentage': 5, 'max_seq_length': 4096, 'preprocessing_num_workers': 96, 'mlm_probability': 0.15, 'pad_to_max_length': False, 'line_by_line': False, 'max_eval_samples': 2000} +2021-07-14 22:49:30,065 INFO MainThread:597542 [wandb_run.py:_atexit_cleanup():1593] got exitcode: 1 +2021-07-14 22:49:30,066 INFO MainThread:597542 [wandb_run.py:_restore():1565] restore +2021-07-14 22:49:32,264 INFO MainThread:597542 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 1 +} +pusher_stats { + uploaded_bytes: 1448 + total_bytes: 1448 +} + +2021-07-14 22:49:32,535 INFO MainThread:597542 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 1 +} +pusher_stats { + uploaded_bytes: 1448 + total_bytes: 1448 +} + +2021-07-14 22:49:33,472 INFO MainThread:597542 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 3 +} +pusher_stats { + uploaded_bytes: 1448 + total_bytes: 6873 +} + +2021-07-14 22:49:33,575 INFO MainThread:597542 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 1448 + total_bytes: 11487 +} + +2021-07-14 22:49:33,677 INFO MainThread:597542 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 11487 + total_bytes: 11487 +} + +2021-07-14 22:49:33,779 INFO MainThread:597542 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 11487 + total_bytes: 11487 +} + +2021-07-14 22:49:33,881 INFO MainThread:597542 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 11487 + total_bytes: 11487 +} + +2021-07-14 22:49:33,983 INFO MainThread:597542 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 11487 + total_bytes: 11487 +} + +2021-07-14 22:49:34,085 INFO MainThread:597542 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 11487 + total_bytes: 11487 +} + +2021-07-14 22:49:34,187 INFO MainThread:597542 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 11487 + total_bytes: 11487 +} + +2021-07-14 22:49:34,466 INFO MainThread:597542 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 11487 + total_bytes: 11487 +} + +2021-07-14 22:49:34,568 INFO MainThread:597542 [wandb_run.py:_wait_for_finish():1715] got exit ret: done: true +exit_result { +} +file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 11487 + total_bytes: 11487 +} + +2021-07-14 22:49:35,856 INFO MainThread:597542 [wandb_run.py:_show_files():1937] logging synced files diff --git a/wandb/run-20210714_224000-1jvvynqa/run-1jvvynqa.wandb b/wandb/run-20210714_224000-1jvvynqa/run-1jvvynqa.wandb new file mode 100644 index 0000000000000000000000000000000000000000..c7e0b797e5a36fb5addcfb3371acf568e7996660 Binary files /dev/null and b/wandb/run-20210714_224000-1jvvynqa/run-1jvvynqa.wandb differ diff --git a/wandb/run-20210714_225820-1dpoijkp/files/config.yaml b/wandb/run-20210714_225820-1dpoijkp/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..73a1d00888b8c4ac2e215babbc0fb34d2bd186d3 --- /dev/null +++ b/wandb/run-20210714_225820-1dpoijkp/files/config.yaml @@ -0,0 +1,304 @@ +wandb_version: 1 + +_wandb: + desc: null + value: + cli_version: 0.10.33 + framework: huggingface + huggingface_version: 4.9.0.dev0 + is_jupyter_run: false + is_kaggle_kernel: false + python_version: 3.8.10 + t: + 1: + - 3 + - 11 + 4: 3.8.10 + 5: 0.10.33 + 6: 4.9.0.dev0 + 8: + - 5 +adafactor: + desc: null + value: false +adam_beta1: + desc: null + value: 0.9 +adam_beta2: + desc: null + value: 0.98 +adam_epsilon: + desc: null + value: 1.0e-08 +cache_dir: + desc: null + value: null +config_name: + desc: null + value: ./ +dataloader_drop_last: + desc: null + value: false +dataloader_num_workers: + desc: null + value: 0 +dataloader_pin_memory: + desc: null + value: true +dataset_config_name: + desc: null + value: null +dataset_name: + desc: null + value: null +ddp_find_unused_parameters: + desc: null + value: null +debug: + desc: null + value: [] +deepspeed: + desc: null + value: null +disable_tqdm: + desc: null + value: false +do_eval: + desc: null + value: false +do_predict: + desc: null + value: false +do_train: + desc: null + value: false +dtype: + desc: null + value: float32 +eval_accumulation_steps: + desc: null + value: null +eval_steps: + desc: null + value: 20000 +evaluation_strategy: + desc: null + value: IntervalStrategy.NO +fp16: + desc: null + value: false +fp16_backend: + desc: null + value: auto +fp16_full_eval: + desc: null + value: false +fp16_opt_level: + desc: null + value: O1 +gradient_accumulation_steps: + desc: null + value: 8 +greater_is_better: + desc: null + value: null +group_by_length: + desc: null + value: false +ignore_data_skip: + desc: null + value: false +label_names: + desc: null + value: null +label_smoothing_factor: + desc: null + value: 0.0 +learning_rate: + desc: null + value: 5.0e-05 +length_column_name: + desc: null + value: length +line_by_line: + desc: null + value: false +load_best_model_at_end: + desc: null + value: false +local_rank: + desc: null + value: -1 +log_level: + desc: null + value: -1 +log_level_replica: + desc: null + value: -1 +log_on_each_node: + desc: null + value: true +logging_dir: + desc: null + value: ./runs/Jul14_22-58-13_t1v-n-f5c06ea1-w-0 +logging_first_step: + desc: null + value: false +logging_steps: + desc: null + value: 500 +logging_strategy: + desc: null + value: IntervalStrategy.STEPS +lr_scheduler_type: + desc: null + value: SchedulerType.LINEAR +max_eval_samples: + desc: null + value: 2000 +max_grad_norm: + desc: null + value: 1.0 +max_seq_length: + desc: null + value: 4096 +max_steps: + desc: null + value: -1 +metric_for_best_model: + desc: null + value: null +mlm_probability: + desc: null + value: 0.15 +model_name_or_path: + desc: null + value: null +model_type: + desc: null + value: big_bird +mp_parameters: + desc: null + value: '' +no_cuda: + desc: null + value: false +num_train_epochs: + desc: null + value: 5.0 +output_dir: + desc: null + value: ./ +overwrite_cache: + desc: null + value: false +overwrite_output_dir: + desc: null + value: true +pad_to_max_length: + desc: null + value: false +past_index: + desc: null + value: -1 +per_device_eval_batch_size: + desc: null + value: 2 +per_device_train_batch_size: + desc: null + value: 2 +per_gpu_eval_batch_size: + desc: null + value: null +per_gpu_train_batch_size: + desc: null + value: null +prediction_loss_only: + desc: null + value: false +preprocessing_num_workers: + desc: null + value: 96 +push_to_hub: + desc: null + value: true +push_to_hub_model_id: + desc: null + value: '' +push_to_hub_organization: + desc: null + value: null +push_to_hub_token: + desc: null + value: null +remove_unused_columns: + desc: null + value: true +report_to: + desc: null + value: + - tensorboard + - wandb +resume_from_checkpoint: + desc: null + value: null +run_name: + desc: null + value: ./ +save_on_each_node: + desc: null + value: false +save_optimizer: + desc: null + value: true +save_steps: + desc: null + value: 20000 +save_strategy: + desc: null + value: IntervalStrategy.STEPS +save_total_limit: + desc: null + value: 5 +seed: + desc: null + value: 42 +sharded_ddp: + desc: null + value: [] +skip_memory_metrics: + desc: null + value: true +tokenizer_name: + desc: null + value: ./ +tpu_metrics_debug: + desc: null + value: false +tpu_num_cores: + desc: null + value: null +train_ref_file: + desc: null + value: null +use_fast_tokenizer: + desc: null + value: true +use_legacy_prediction_loop: + desc: null + value: false +validation_ref_file: + desc: null + value: null +validation_split_percentage: + desc: null + value: 5 +warmup_ratio: + desc: null + value: 0.0 +warmup_steps: + desc: null + value: 10000 +weight_decay: + desc: null + value: 0.0095 diff --git a/wandb/run-20210714_225820-1dpoijkp/files/output.log b/wandb/run-20210714_225820-1dpoijkp/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..a0f2fd4451704b4da68d71b21f5d9abb32e0fd5e --- /dev/null +++ b/wandb/run-20210714_225820-1dpoijkp/files/output.log @@ -0,0 +1,6 @@ +/home/dat/pino/lib/python3.8/site-packages/jax/_src/numpy/lax_numpy.py:3132: UserWarning: Explicitly requested dtype requested in zeros is not available, and will be truncated to dtype int32. To enable more dtypes, set the jax_enable_x64 configuration option or the JAX_ENABLE_X64 shell environment variable. See https://github.com/google/jax#current-gotchas for more. + lax._check_user_dtype_supported(dtype, "zeros") +/home/dat/pino/lib/python3.8/site-packages/jax/lib/xla_bridge.py:386: UserWarning: jax.host_count has been renamed to jax.process_count. This alias will eventually be removed; please update your code. + warnings.warn( +/home/dat/pino/lib/python3.8/site-packages/jax/lib/xla_bridge.py:373: UserWarning: jax.host_id has been renamed to jax.process_index. This alias will eventually be removed; please update your code. + warnings.warn( diff --git a/wandb/run-20210714_225820-1dpoijkp/files/requirements.txt b/wandb/run-20210714_225820-1dpoijkp/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..554e3a34bc91714a8462f65295a12e9a04537637 --- /dev/null +++ b/wandb/run-20210714_225820-1dpoijkp/files/requirements.txt @@ -0,0 +1,94 @@ +absl-py==0.13.0 +aiohttp==3.7.4.post0 +astunparse==1.6.3 +async-timeout==3.0.1 +attrs==21.2.0 +cachetools==4.2.2 +certifi==2021.5.30 +chardet==4.0.0 +charset-normalizer==2.0.1 +chex==0.0.8 +click==8.0.1 +configparser==5.0.2 +cycler==0.10.0 +datasets==1.9.1.dev0 +dill==0.3.4 +dm-tree==0.1.6 +docker-pycreds==0.4.0 +filelock==3.0.12 +flatbuffers==1.12 +flax==0.3.4 +fsspec==2021.7.0 +gast==0.4.0 +gitdb==4.0.7 +gitpython==3.1.18 +google-auth-oauthlib==0.4.4 +google-auth==1.32.1 +google-pasta==0.2.0 +grpcio==1.34.1 +h5py==3.1.0 +huggingface-hub==0.0.12 +idna==3.2 +install==1.3.4 +jax==0.2.17 +jaxlib==0.1.68 +joblib==1.0.1 +keras-nightly==2.5.0.dev2021032900 +keras-preprocessing==1.1.2 +kiwisolver==1.3.1 +libtpu-nightly==0.1.dev20210615 +markdown==3.3.4 +matplotlib==3.4.2 +msgpack==1.0.2 +multidict==5.1.0 +multiprocess==0.70.12.2 +numpy==1.19.5 +oauthlib==3.1.1 +opt-einsum==3.3.0 +optax==0.0.9 +packaging==21.0 +pandas==1.3.0 +pathtools==0.1.2 +pillow==8.3.1 +pip==20.0.2 +pkg-resources==0.0.0 +promise==2.3 +protobuf==3.17.3 +psutil==5.8.0 +pyarrow==4.0.1 +pyasn1-modules==0.2.8 +pyasn1==0.4.8 +pyparsing==2.4.7 +python-dateutil==2.8.1 +pytz==2021.1 +pyyaml==5.4.1 +regex==2021.7.6 +requests-oauthlib==1.3.0 +requests==2.26.0 +rsa==4.7.2 +sacremoses==0.0.45 +scipy==1.7.0 +sentry-sdk==1.3.0 +setuptools==44.0.0 +shortuuid==1.0.1 +six==1.15.0 +smmap==4.0.0 +subprocess32==3.5.4 +tensorboard-data-server==0.6.1 +tensorboard-plugin-wit==1.8.0 +tensorboard==2.5.0 +tensorflow-estimator==2.5.0 +tensorflow==2.5.0 +termcolor==1.1.0 +tokenizers==0.10.3 +toolz==0.11.1 +tqdm==4.61.2 +transformers==4.9.0.dev0 +typing-extensions==3.7.4.3 +urllib3==1.26.6 +wandb==0.10.33 +werkzeug==2.0.1 +wheel==0.36.2 +wrapt==1.12.1 +xxhash==2.0.2 +yarl==1.6.3 \ No newline at end of file diff --git a/wandb/run-20210714_225820-1dpoijkp/files/wandb-metadata.json b/wandb/run-20210714_225820-1dpoijkp/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..042d4e51e559f14b847d813932c8eb47cb92b58b --- /dev/null +++ b/wandb/run-20210714_225820-1dpoijkp/files/wandb-metadata.json @@ -0,0 +1,47 @@ +{ + "os": "Linux-5.4.0-1043-gcp-x86_64-with-glibc2.29", + "python": "3.8.10", + "heartbeatAt": "2021-07-14T22:58:22.701262", + "startedAt": "2021-07-14T22:58:20.641335", + "docker": null, + "cpu_count": 96, + "cuda": null, + "args": [ + "--push_to_hub", + "--output_dir=./", + "--model_type=big_bird", + "--config_name=./", + "--tokenizer_name=./", + "--max_seq_length=4096", + "--weight_decay=0.0095", + "--warmup_steps=10000", + "--overwrite_output_dir", + "--adam_beta1=0.9", + "--adam_beta2=0.98", + "--logging_steps=500", + "--eval_steps=20000", + "--num_train_epochs=5", + "--preprocessing_num_workers=96", + "--save_steps=20000", + "--learning_rate=5e-5", + "--per_device_train_batch_size=2", + "--per_device_eval_batch_size=2", + "--save_total_limit=5", + "--max_eval_samples=2000", + "--overwrite_cache", + "False", + "--gradient_accumulation_steps=8" + ], + "state": "running", + "program": "./run_mlm_flax.py", + "codePath": "run_mlm_flax.py", + "git": { + "remote": "https://huggingface.co/flax-community/pino-roberta-base", + "commit": "de71755314d4aa3ab182072c6f0be3de9798a66b" + }, + "email": null, + "root": "/home/dat/pino-roberta-base", + "host": "t1v-n-f5c06ea1-w-0", + "username": "dat", + "executable": "/home/dat/pino/bin/python" +} diff --git a/wandb/run-20210714_225820-1dpoijkp/files/wandb-summary.json b/wandb/run-20210714_225820-1dpoijkp/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/wandb/run-20210714_225820-1dpoijkp/files/wandb-summary.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/wandb/run-20210714_225820-1dpoijkp/logs/debug-internal.log b/wandb/run-20210714_225820-1dpoijkp/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..fa339fd96f591cef4c30a5a8beb576c2c2fa7b01 --- /dev/null +++ b/wandb/run-20210714_225820-1dpoijkp/logs/debug-internal.log @@ -0,0 +1,40 @@ +2021-07-14 22:58:21,337 INFO MainThread:601574 [internal.py:wandb_internal():88] W&B internal server running at pid: 601574, started at: 2021-07-14 22:58:21.336704 +2021-07-14 22:58:21,339 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: check_version +2021-07-14 22:58:21,339 INFO WriterThread:601574 [datastore.py:open_for_write():80] open: /home/dat/pino-roberta-base/wandb/run-20210714_225820-1dpoijkp/run-1dpoijkp.wandb +2021-07-14 22:58:21,340 DEBUG SenderThread:601574 [sender.py:send():179] send: header +2021-07-14 22:58:21,340 DEBUG SenderThread:601574 [sender.py:send_request():193] send_request: check_version +2021-07-14 22:58:21,377 DEBUG SenderThread:601574 [sender.py:send():179] send: run +2021-07-14 22:58:21,602 INFO SenderThread:601574 [dir_watcher.py:__init__():168] watching files in: /home/dat/pino-roberta-base/wandb/run-20210714_225820-1dpoijkp/files +2021-07-14 22:58:21,602 INFO SenderThread:601574 [sender.py:_start_run_threads():716] run started: 1dpoijkp with start time 1626303500 +2021-07-14 22:58:21,602 DEBUG SenderThread:601574 [sender.py:send():179] send: summary +2021-07-14 22:58:21,603 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: run_start +2021-07-14 22:58:21,603 INFO SenderThread:601574 [sender.py:_save_file():841] saving file wandb-summary.json with policy end +2021-07-14 22:58:22,605 INFO Thread-8 :601574 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210714_225820-1dpoijkp/files/wandb-summary.json +2021-07-14 22:58:22,701 DEBUG HandlerThread:601574 [meta.py:__init__():39] meta init +2021-07-14 22:58:22,701 DEBUG HandlerThread:601574 [meta.py:__init__():53] meta init done +2021-07-14 22:58:22,701 DEBUG HandlerThread:601574 [meta.py:probe():210] probe +2021-07-14 22:58:22,702 DEBUG HandlerThread:601574 [meta.py:_setup_git():200] setup git +2021-07-14 22:58:22,732 DEBUG HandlerThread:601574 [meta.py:_setup_git():207] setup git done +2021-07-14 22:58:22,732 DEBUG HandlerThread:601574 [meta.py:_save_pip():57] save pip +2021-07-14 22:58:22,732 DEBUG HandlerThread:601574 [meta.py:_save_pip():71] save pip done +2021-07-14 22:58:22,733 DEBUG HandlerThread:601574 [meta.py:probe():252] probe done +2021-07-14 22:58:22,736 DEBUG SenderThread:601574 [sender.py:send():179] send: files +2021-07-14 22:58:22,736 INFO SenderThread:601574 [sender.py:_save_file():841] saving file wandb-metadata.json with policy now +2021-07-14 22:58:22,742 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 22:58:22,742 DEBUG SenderThread:601574 [sender.py:send_request():193] send_request: stop_status +2021-07-14 22:58:22,871 DEBUG SenderThread:601574 [sender.py:send():179] send: config +2021-07-14 22:58:22,872 DEBUG SenderThread:601574 [sender.py:send():179] send: config +2021-07-14 22:58:22,872 DEBUG SenderThread:601574 [sender.py:send():179] send: config +2021-07-14 22:58:23,214 INFO Thread-11 :601574 [upload_job.py:push():137] Uploaded file /tmp/tmpg5fs3m8gwandb/35jlequ6-wandb-metadata.json +2021-07-14 22:58:23,603 INFO Thread-8 :601574 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210714_225820-1dpoijkp/files/output.log +2021-07-14 22:58:23,604 INFO Thread-8 :601574 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210714_225820-1dpoijkp/files/requirements.txt +2021-07-14 22:58:23,604 INFO Thread-8 :601574 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210714_225820-1dpoijkp/files/wandb-metadata.json +2021-07-14 22:58:37,609 INFO Thread-8 :601574 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_225820-1dpoijkp/files/output.log +2021-07-14 22:58:37,873 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 22:58:37,874 DEBUG SenderThread:601574 [sender.py:send_request():193] send_request: stop_status +2021-07-14 22:58:50,784 DEBUG SenderThread:601574 [sender.py:send():179] send: stats +2021-07-14 22:58:52,614 INFO Thread-8 :601574 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_225820-1dpoijkp/files/config.yaml +2021-07-14 22:58:53,006 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 22:58:53,006 DEBUG SenderThread:601574 [sender.py:send_request():193] send_request: stop_status +2021-07-14 22:59:08,141 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: stop_status +2021-07-14 22:59:08,141 DEBUG SenderThread:601574 [sender.py:send_request():193] send_request: stop_status diff --git a/wandb/run-20210714_225820-1dpoijkp/logs/debug.log b/wandb/run-20210714_225820-1dpoijkp/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..f7410944bc848b8f681ec31ce3fb5eba71d69059 --- /dev/null +++ b/wandb/run-20210714_225820-1dpoijkp/logs/debug.log @@ -0,0 +1,25 @@ +2021-07-14 22:58:20,643 INFO MainThread:600323 [wandb_setup.py:_flush():69] setting env: {} +2021-07-14 22:58:20,643 INFO MainThread:600323 [wandb_setup.py:_flush():69] setting login settings: {} +2021-07-14 22:58:20,643 INFO MainThread:600323 [wandb_init.py:_log_setup():337] Logging user logs to /home/dat/pino-roberta-base/wandb/run-20210714_225820-1dpoijkp/logs/debug.log +2021-07-14 22:58:20,643 INFO MainThread:600323 [wandb_init.py:_log_setup():338] Logging internal logs to /home/dat/pino-roberta-base/wandb/run-20210714_225820-1dpoijkp/logs/debug-internal.log +2021-07-14 22:58:20,643 INFO MainThread:600323 [wandb_init.py:init():370] calling init triggers +2021-07-14 22:58:20,643 INFO MainThread:600323 [wandb_init.py:init():375] wandb.init called with sweep_config: {} +config: {} +2021-07-14 22:58:20,643 INFO MainThread:600323 [wandb_init.py:init():419] starting backend +2021-07-14 22:58:20,643 INFO MainThread:600323 [backend.py:_multiprocessing_setup():70] multiprocessing start_methods=fork,spawn,forkserver, using: spawn +2021-07-14 22:58:20,687 INFO MainThread:600323 [backend.py:ensure_launched():135] starting backend process... +2021-07-14 22:58:20,729 INFO MainThread:600323 [backend.py:ensure_launched():139] started backend process with pid: 601574 +2021-07-14 22:58:20,731 INFO MainThread:600323 [wandb_init.py:init():424] backend started and connected +2021-07-14 22:58:20,734 INFO MainThread:600323 [wandb_init.py:init():472] updated telemetry +2021-07-14 22:58:20,735 INFO MainThread:600323 [wandb_init.py:init():491] communicating current version +2021-07-14 22:58:21,375 INFO MainThread:600323 [wandb_init.py:init():496] got version response +2021-07-14 22:58:21,375 INFO MainThread:600323 [wandb_init.py:init():504] communicating run to backend with 30 second timeout +2021-07-14 22:58:21,602 INFO MainThread:600323 [wandb_init.py:init():529] starting run threads in backend +2021-07-14 22:58:22,739 INFO MainThread:600323 [wandb_run.py:_console_start():1623] atexit reg +2021-07-14 22:58:22,740 INFO MainThread:600323 [wandb_run.py:_redirect():1497] redirect: SettingsConsole.REDIRECT +2021-07-14 22:58:22,740 INFO MainThread:600323 [wandb_run.py:_redirect():1502] Redirecting console. +2021-07-14 22:58:22,742 INFO MainThread:600323 [wandb_run.py:_redirect():1558] Redirects installed. +2021-07-14 22:58:22,742 INFO MainThread:600323 [wandb_init.py:init():554] run started, returning control to user process +2021-07-14 22:58:22,750 INFO MainThread:600323 [wandb_run.py:_config_callback():872] config_cb None None {'output_dir': './', 'overwrite_output_dir': True, 'do_train': False, 'do_eval': False, 'do_predict': False, 'evaluation_strategy': 'IntervalStrategy.NO', 'prediction_loss_only': False, 'per_device_train_batch_size': 2, 'per_device_eval_batch_size': 2, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 8, 'eval_accumulation_steps': None, 'learning_rate': 5e-05, 'weight_decay': 0.0095, 'adam_beta1': 0.9, 'adam_beta2': 0.98, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 5.0, 'max_steps': -1, 'lr_scheduler_type': 'SchedulerType.LINEAR', 'warmup_ratio': 0.0, 'warmup_steps': 10000, 'log_level': -1, 'log_level_replica': -1, 'log_on_each_node': True, 'logging_dir': './runs/Jul14_22-58-13_t1v-n-f5c06ea1-w-0', 'logging_strategy': 'IntervalStrategy.STEPS', 'logging_first_step': False, 'logging_steps': 500, 'save_strategy': 'IntervalStrategy.STEPS', 'save_steps': 20000, 'save_total_limit': 5, 'save_on_each_node': False, 'no_cuda': False, 'seed': 42, 'fp16': False, 'fp16_opt_level': 'O1', 'fp16_backend': 'auto', 'fp16_full_eval': False, 'local_rank': -1, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 20000, 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': './', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'sharded_ddp': [], 'deepspeed': None, 'label_smoothing_factor': 0.0, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'dataloader_pin_memory': True, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'push_to_hub_model_id': '', 'push_to_hub_organization': None, 'push_to_hub_token': None, 'mp_parameters': ''} +2021-07-14 22:58:22,752 INFO MainThread:600323 [wandb_run.py:_config_callback():872] config_cb None None {'model_name_or_path': None, 'model_type': 'big_bird', 'config_name': './', 'tokenizer_name': './', 'cache_dir': None, 'use_fast_tokenizer': True, 'dtype': 'float32', 'save_optimizer': True} +2021-07-14 22:58:22,753 INFO MainThread:600323 [wandb_run.py:_config_callback():872] config_cb None None {'dataset_name': None, 'dataset_config_name': None, 'train_ref_file': None, 'validation_ref_file': None, 'overwrite_cache': False, 'validation_split_percentage': 5, 'max_seq_length': 4096, 'preprocessing_num_workers': 96, 'mlm_probability': 0.15, 'pad_to_max_length': False, 'line_by_line': False, 'max_eval_samples': 2000} diff --git a/wandb/run-20210714_225820-1dpoijkp/run-1dpoijkp.wandb b/wandb/run-20210714_225820-1dpoijkp/run-1dpoijkp.wandb new file mode 100644 index 0000000000000000000000000000000000000000..a484d92e324e9902f1e849ceb17c2a173dc3b637 Binary files /dev/null and b/wandb/run-20210714_225820-1dpoijkp/run-1dpoijkp.wandb differ