dat commited on
Commit
9915204
1 Parent(s): de71755

update all

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. config.json +1 -1
  2. events.out.tfevents.1626299647.t1v-n-f5c06ea1-w-0.587396.3.v2 +3 -0
  3. events.out.tfevents.1626301159.t1v-n-f5c06ea1-w-0.592040.3.v2 +3 -0
  4. events.out.tfevents.1626301759.t1v-n-f5c06ea1-w-0.595290.3.v2 +3 -0
  5. events.out.tfevents.1626302399.t1v-n-f5c06ea1-w-0.597542.3.v2 +3 -0
  6. events.out.tfevents.1626303499.t1v-n-f5c06ea1-w-0.600323.3.v2 +3 -0
  7. run.sh +5 -5
  8. run_mlm_flax.py +42 -28
  9. wandb/debug-internal.log +1 -1
  10. wandb/debug.log +1 -1
  11. wandb/latest-run +1 -1
  12. wandb/run-20210714_215408-3kpvz8se/files/config.yaml +307 -0
  13. wandb/run-20210714_215408-3kpvz8se/files/output.log +15 -0
  14. wandb/run-20210714_215408-3kpvz8se/files/requirements.txt +94 -0
  15. wandb/run-20210714_215408-3kpvz8se/files/wandb-metadata.json +48 -0
  16. wandb/run-20210714_215408-3kpvz8se/files/wandb-summary.json +1 -0
  17. wandb/run-20210714_215408-3kpvz8se/logs/debug-internal.log +142 -0
  18. wandb/run-20210714_215408-3kpvz8se/logs/debug.log +127 -0
  19. wandb/run-20210714_215408-3kpvz8se/run-3kpvz8se.wandb +0 -0
  20. wandb/run-20210714_221920-s091gfok/files/config.yaml +307 -0
  21. wandb/run-20210714_221920-s091gfok/files/output.log +39 -0
  22. wandb/run-20210714_221920-s091gfok/files/requirements.txt +94 -0
  23. wandb/run-20210714_221920-s091gfok/files/wandb-metadata.json +47 -0
  24. wandb/run-20210714_221920-s091gfok/files/wandb-summary.json +1 -0
  25. wandb/run-20210714_221920-s091gfok/logs/debug-internal.log +233 -0
  26. wandb/run-20210714_221920-s091gfok/logs/debug.log +119 -0
  27. wandb/run-20210714_221920-s091gfok/run-s091gfok.wandb +0 -0
  28. wandb/run-20210714_222920-2p7mu4rm/files/config.yaml +307 -0
  29. wandb/run-20210714_222920-2p7mu4rm/files/output.log +39 -0
  30. wandb/run-20210714_222920-2p7mu4rm/files/requirements.txt +94 -0
  31. wandb/run-20210714_222920-2p7mu4rm/files/wandb-metadata.json +47 -0
  32. wandb/run-20210714_222920-2p7mu4rm/files/wandb-summary.json +1 -0
  33. wandb/run-20210714_222920-2p7mu4rm/logs/debug-internal.log +232 -0
  34. wandb/run-20210714_222920-2p7mu4rm/logs/debug.log +119 -0
  35. wandb/run-20210714_222920-2p7mu4rm/run-2p7mu4rm.wandb +0 -0
  36. wandb/run-20210714_224000-1jvvynqa/files/config.yaml +307 -0
  37. wandb/run-20210714_224000-1jvvynqa/files/output.log +39 -0
  38. wandb/run-20210714_224000-1jvvynqa/files/requirements.txt +94 -0
  39. wandb/run-20210714_224000-1jvvynqa/files/wandb-metadata.json +47 -0
  40. wandb/run-20210714_224000-1jvvynqa/files/wandb-summary.json +1 -0
  41. wandb/run-20210714_224000-1jvvynqa/logs/debug-internal.log +236 -0
  42. wandb/run-20210714_224000-1jvvynqa/logs/debug.log +127 -0
  43. wandb/run-20210714_224000-1jvvynqa/run-1jvvynqa.wandb +0 -0
  44. wandb/run-20210714_225820-1dpoijkp/files/config.yaml +304 -0
  45. wandb/run-20210714_225820-1dpoijkp/files/output.log +6 -0
  46. wandb/run-20210714_225820-1dpoijkp/files/requirements.txt +94 -0
  47. wandb/run-20210714_225820-1dpoijkp/files/wandb-metadata.json +47 -0
  48. wandb/run-20210714_225820-1dpoijkp/files/wandb-summary.json +1 -0
  49. wandb/run-20210714_225820-1dpoijkp/logs/debug-internal.log +40 -0
  50. wandb/run-20210714_225820-1dpoijkp/logs/debug.log +25 -0
config.json CHANGED
@@ -4,7 +4,7 @@
4
  ],
5
  "attention_probs_dropout_prob": 0.1,
6
  "attention_type": "block_sparse",
7
- "block_size": 128,
8
  "bos_token_id": 1,
9
  "eos_token_id": 2,
10
  "gradient_checkpointing": false,
4
  ],
5
  "attention_probs_dropout_prob": 0.1,
6
  "attention_type": "block_sparse",
7
+ "block_size": 64,
8
  "bos_token_id": 1,
9
  "eos_token_id": 2,
10
  "gradient_checkpointing": false,
events.out.tfevents.1626299647.t1v-n-f5c06ea1-w-0.587396.3.v2 ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:84890ea5bd3c73af594c93d00f787106810c8227126a97c436a63ef86502b93f
3
+ size 40
events.out.tfevents.1626301159.t1v-n-f5c06ea1-w-0.592040.3.v2 ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b87389b3d84fa4c5e66f3f568af52dc3799c8e93ecd1c17d2757563eadf4b8a
3
+ size 40
events.out.tfevents.1626301759.t1v-n-f5c06ea1-w-0.595290.3.v2 ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7151f4506f8d7e2790068211ea6067e529caadb072896f77702e4e393e69bc8b
3
+ size 40
events.out.tfevents.1626302399.t1v-n-f5c06ea1-w-0.597542.3.v2 ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c317581746aa1f2878b10cfdd0724ead65d7384dc0003909dc09986654f0ca6d
3
+ size 40
events.out.tfevents.1626303499.t1v-n-f5c06ea1-w-0.600323.3.v2 ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:610b4725be7ba71faaadd24b5aa839e67235b0b211a2bf6c4d1da48931051a45
3
+ size 40
run.sh CHANGED
@@ -10,12 +10,12 @@ python ./run_mlm_flax.py \
10
  --tokenizer_name="./" \
11
  --max_seq_length="4096" \
12
  --weight_decay="0.0095" \
13
- --warmup_steps="5000" \
14
  --overwrite_output_dir \
15
  --adam_beta1="0.9" \
16
  --adam_beta2="0.98" \
17
- --logging_steps="250" \
18
- --eval_steps="500" \
19
  --num_train_epochs="5" \
20
  --preprocessing_num_workers="96" \
21
  --save_steps="20000" \
@@ -23,9 +23,9 @@ python ./run_mlm_flax.py \
23
  --per_device_train_batch_size="2" \
24
  --per_device_eval_batch_size="2" \
25
  --save_total_limit="5"\
26
- --max_eval_samples="500"\
27
  --overwrite_cache False \
28
- --gradient_accumulation_steps="4" \
29
  #--resume_from_checkpoint="./"\
30
  #--adafactor \
31
  #--dtype="bfloat16" \
10
  --tokenizer_name="./" \
11
  --max_seq_length="4096" \
12
  --weight_decay="0.0095" \
13
+ --warmup_steps="10000" \
14
  --overwrite_output_dir \
15
  --adam_beta1="0.9" \
16
  --adam_beta2="0.98" \
17
+ --logging_steps="500" \
18
+ --eval_steps="20000" \
19
  --num_train_epochs="5" \
20
  --preprocessing_num_workers="96" \
21
  --save_steps="20000" \
23
  --per_device_train_batch_size="2" \
24
  --per_device_eval_batch_size="2" \
25
  --save_total_limit="5"\
26
+ --max_eval_samples="2000"\
27
  --overwrite_cache False \
28
+ --gradient_accumulation_steps="8" \
29
  #--resume_from_checkpoint="./"\
30
  #--adafactor \
31
  #--dtype="bfloat16" \
run_mlm_flax.py CHANGED
@@ -33,6 +33,8 @@ from typing import Dict, List, Optional, Tuple
33
  import numpy as np
34
  from datasets import load_dataset, DatasetDict
35
  from tqdm import tqdm
 
 
36
 
37
  import flax
38
  import jax
@@ -55,11 +57,13 @@ from transformers import (
55
  set_seed,
56
  )
57
  import json
58
- from flax.training import checkpoints
 
59
  from flax.jax_utils import unreplicate
60
  from flax.training.checkpoints import save_checkpoint, restore_checkpoint
61
  from importlib.util import find_spec
62
  from flax.serialization import to_bytes, from_bytes
 
63
 
64
  MODEL_CONFIG_CLASSES = list(FLAX_MODEL_FOR_MASKED_LM_MAPPING.keys())
65
  MODEL_TYPES = tuple(conf.model_type for conf in MODEL_CONFIG_CLASSES)
@@ -275,6 +279,35 @@ def write_eval_metric(summary_writer, eval_metrics, step):
275
  summary_writer.scalar(f"eval_{metric_name}", value, step)
276
 
277
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
278
  def _zeros_tree_like(inp_tree):
279
  return jax.tree_map(jnp.zeros_like, inp_tree)
280
 
@@ -291,8 +324,11 @@ def fake_update(state):
291
  def reinstantiate_states(opt_state):
292
  new_state = []
293
  for state in opt_state:
294
- cls = getattr(optax, type(state).__name__)
295
- new_state.append(cls(**{k:getattr(state, k) for k in state._fields}))
 
 
 
296
  return new_state
297
 
298
  def restore_model_checkpoint(save_dir, state):
@@ -318,27 +354,6 @@ def restore_model_checkpoint(save_dir, state):
318
 
319
  return state.replace(step=step, params=params, opt_state=opt_state)
320
 
321
- def save_model_checkpoint(model, save_dir, state, with_opt:bool=True, push_to_hub:bool=False):
322
- """
323
- If `push_to_hub` is True, will save to `save_dir`. Otherwise will save to `save_dir/ckpt-{step}`.
324
- """
325
- state = jax_utils.unreplicate(state)
326
- logger.info(f"SAVING CHECKPOINT IN {save_dir}...")
327
- if not push_to_hub:
328
- save_dir = f"{save_dir}/ckpt-{mb_item(state.step)-1}"
329
- model.save_pretrained(
330
- save_dir,
331
- params=state.params,
332
- push_to_hub=push_to_hub,
333
- commit_message=f"Saving weights and logs at step {mb_item(state.step)-1}",
334
- )
335
- if with_opt:
336
- with open(os.path.join(save_dir, "opt_state.msgpack"), "wb") as f:
337
- f.write(to_bytes(state.opt_state))
338
- with open(os.path.join(save_dir, "training_state.json"), "w") as f:
339
- json.dump({"step": state.step.item()}, f)
340
- logger.info("checkpoint saved")
341
-
342
  def rotate_checkpoints(ckpt_dir:str, save_total_limit:int):
343
  "Removes older checkpoints so that `save_total_limit` checkpoints are kept"
344
  # TODO: what to remove is decided using step number only, we might want to improve that
@@ -351,7 +366,6 @@ def rotate_checkpoints(ckpt_dir:str, save_total_limit:int):
351
  shutil.rmtree(ckpt)
352
 
353
 
354
-
355
  if __name__ == "__main__":
356
  # See all possible arguments in src/transformers/training_args.py
357
  # or by passing the --help flag to this script.
@@ -513,7 +527,7 @@ if __name__ == "__main__":
513
  tokenized_datasets = DatasetDict.load_from_disk("/data/tokenized_data")
514
  logger.info("Setting max validation examples to ")
515
  print(f"Number of validation examples {data_args.max_eval_samples}")
516
- tokenized_datasets["train"]= tokenized_datasets["train"].select(range(20000))
517
  if data_args.max_eval_samples is not None:
518
  tokenized_datasets["validation"] = tokenized_datasets["validation"].select(range(data_args.max_eval_samples))
519
  else:
@@ -687,7 +701,6 @@ if __name__ == "__main__":
687
  learning_rate=linear_decay_lr_schedule_fn,
688
  )
689
  else:
690
- from optax import clip_by_global_norm
691
  optimizer = optax.adamw(
692
  learning_rate=linear_decay_lr_schedule_fn,
693
  b1=training_args.adam_beta1,
@@ -777,7 +790,8 @@ if __name__ == "__main__":
777
  steps_per_epoch = len(tokenized_datasets["train"]) // train_batch_size
778
  resume_epoch = resume_step // (steps_per_epoch * grad_accum_steps)
779
  epochs = tqdm(range(num_epochs), desc=f"Epoch ... ({resume_epoch+1}/{num_epochs})", position=0)
780
- logger.info(f"Skipping to epoch {resume_epoch} step {resume_step // grad_accum_steps}")
 
781
  for epoch in epochs:
782
  # ======================== Training ================================
783
  train_start = time.time()
33
  import numpy as np
34
  from datasets import load_dataset, DatasetDict
35
  from tqdm import tqdm
36
+ from optax import clip_by_global_norm
37
+
38
 
39
  import flax
40
  import jax
57
  set_seed,
58
  )
59
  import json
60
+ import shutil
61
+
62
  from flax.jax_utils import unreplicate
63
  from flax.training.checkpoints import save_checkpoint, restore_checkpoint
64
  from importlib.util import find_spec
65
  from flax.serialization import to_bytes, from_bytes
66
+ import jax.profiler
67
 
68
  MODEL_CONFIG_CLASSES = list(FLAX_MODEL_FOR_MASKED_LM_MAPPING.keys())
69
  MODEL_TYPES = tuple(conf.model_type for conf in MODEL_CONFIG_CLASSES)
279
  summary_writer.scalar(f"eval_{metric_name}", value, step)
280
 
281
 
282
+ # utils
283
+ def mb_item(x):
284
+ return x.item() if hasattr(x, "item") else x
285
+
286
+ #checkpoint functions
287
+ def save_model_checkpoint(model, save_dir, state, with_opt:bool=True, push_to_hub:bool=False):
288
+ """
289
+ If `push_to_hub` is True, will save to `save_dir`. Otherwise will save to `save_dir/ckpt-{step}`.
290
+ """
291
+ state = jax_utils.unreplicate(state)
292
+ logger.info(f"SAVING CHECKPOINT IN {save_dir}...")
293
+ if not push_to_hub:
294
+ save_dir = f"{save_dir}/ckpt-{mb_item(state.step)-1}"
295
+ model.save_pretrained(
296
+ save_dir,
297
+ params=state.params,
298
+ push_to_hub=push_to_hub,
299
+ commit_message=f"Saving weights and logs at step {mb_item(state.step)-1}",
300
+ )
301
+ if with_opt:
302
+ with open(os.path.join(save_dir, "opt_state.msgpack"), "wb") as f:
303
+ f.write(to_bytes(state.opt_state))
304
+ with open(os.path.join(save_dir, "training_state.json"), "w") as f:
305
+ json.dump({"step": state.step.item()}, f)
306
+ logger.info("checkpoint saved")
307
+
308
+ # this is added to make resuming from checkpoint to work with adafactor
309
+ # to be removed when issue is fixed
310
+ # notice that adafactor state is perturbed by fake_update
311
  def _zeros_tree_like(inp_tree):
312
  return jax.tree_map(jnp.zeros_like, inp_tree)
313
 
324
  def reinstantiate_states(opt_state):
325
  new_state = []
326
  for state in opt_state:
327
+ if isinstance(state, list):
328
+ new_state.append(reinstantiate_states(state))
329
+ else:
330
+ cls = getattr(optax, type(state).__name__)
331
+ new_state.append(cls(**{k:getattr(state, k) for k in state._fields}))
332
  return new_state
333
 
334
  def restore_model_checkpoint(save_dir, state):
354
 
355
  return state.replace(step=step, params=params, opt_state=opt_state)
356
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
357
  def rotate_checkpoints(ckpt_dir:str, save_total_limit:int):
358
  "Removes older checkpoints so that `save_total_limit` checkpoints are kept"
359
  # TODO: what to remove is decided using step number only, we might want to improve that
366
  shutil.rmtree(ckpt)
367
 
368
 
 
369
  if __name__ == "__main__":
370
  # See all possible arguments in src/transformers/training_args.py
371
  # or by passing the --help flag to this script.
527
  tokenized_datasets = DatasetDict.load_from_disk("/data/tokenized_data")
528
  logger.info("Setting max validation examples to ")
529
  print(f"Number of validation examples {data_args.max_eval_samples}")
530
+ #tokenized_datasets["train"]= tokenized_datasets["train"].select(range(20000))
531
  if data_args.max_eval_samples is not None:
532
  tokenized_datasets["validation"] = tokenized_datasets["validation"].select(range(data_args.max_eval_samples))
533
  else:
701
  learning_rate=linear_decay_lr_schedule_fn,
702
  )
703
  else:
 
704
  optimizer = optax.adamw(
705
  learning_rate=linear_decay_lr_schedule_fn,
706
  b1=training_args.adam_beta1,
790
  steps_per_epoch = len(tokenized_datasets["train"]) // train_batch_size
791
  resume_epoch = resume_step // (steps_per_epoch * grad_accum_steps)
792
  epochs = tqdm(range(num_epochs), desc=f"Epoch ... ({resume_epoch+1}/{num_epochs})", position=0)
793
+ if resume_step != 0:
794
+ logger.info(f"Skipping to epoch {resume_epoch} step {resume_step // grad_accum_steps}")
795
  for epoch in epochs:
796
  # ======================== Training ================================
797
  train_start = time.time()
wandb/debug-internal.log CHANGED
@@ -1 +1 @@
1
- run-20210714_213944-3j6d3fy2/logs/debug-internal.log
1
+ run-20210714_225820-1dpoijkp/logs/debug-internal.log
wandb/debug.log CHANGED
@@ -1 +1 @@
1
- run-20210714_213944-3j6d3fy2/logs/debug.log
1
+ run-20210714_225820-1dpoijkp/logs/debug.log
wandb/latest-run CHANGED
@@ -1 +1 @@
1
- run-20210714_213944-3j6d3fy2
1
+ run-20210714_225820-1dpoijkp
wandb/run-20210714_215408-3kpvz8se/files/config.yaml ADDED
@@ -0,0 +1,307 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ wandb_version: 1
2
+
3
+ _wandb:
4
+ desc: null
5
+ value:
6
+ cli_version: 0.10.33
7
+ framework: huggingface
8
+ huggingface_version: 4.9.0.dev0
9
+ is_jupyter_run: false
10
+ is_kaggle_kernel: false
11
+ python_version: 3.8.10
12
+ t:
13
+ 1:
14
+ - 3
15
+ - 11
16
+ 2:
17
+ - 3
18
+ - 11
19
+ 4: 3.8.10
20
+ 5: 0.10.33
21
+ 6: 4.9.0.dev0
22
+ 8:
23
+ - 5
24
+ adafactor:
25
+ desc: null
26
+ value: false
27
+ adam_beta1:
28
+ desc: null
29
+ value: 0.9
30
+ adam_beta2:
31
+ desc: null
32
+ value: 0.98
33
+ adam_epsilon:
34
+ desc: null
35
+ value: 1.0e-08
36
+ cache_dir:
37
+ desc: null
38
+ value: null
39
+ config_name:
40
+ desc: null
41
+ value: ./
42
+ dataloader_drop_last:
43
+ desc: null
44
+ value: false
45
+ dataloader_num_workers:
46
+ desc: null
47
+ value: 0
48
+ dataloader_pin_memory:
49
+ desc: null
50
+ value: true
51
+ dataset_config_name:
52
+ desc: null
53
+ value: null
54
+ dataset_name:
55
+ desc: null
56
+ value: null
57
+ ddp_find_unused_parameters:
58
+ desc: null
59
+ value: null
60
+ debug:
61
+ desc: null
62
+ value: []
63
+ deepspeed:
64
+ desc: null
65
+ value: null
66
+ disable_tqdm:
67
+ desc: null
68
+ value: false
69
+ do_eval:
70
+ desc: null
71
+ value: false
72
+ do_predict:
73
+ desc: null
74
+ value: false
75
+ do_train:
76
+ desc: null
77
+ value: false
78
+ dtype:
79
+ desc: null
80
+ value: float32
81
+ eval_accumulation_steps:
82
+ desc: null
83
+ value: null
84
+ eval_steps:
85
+ desc: null
86
+ value: 500
87
+ evaluation_strategy:
88
+ desc: null
89
+ value: IntervalStrategy.NO
90
+ fp16:
91
+ desc: null
92
+ value: false
93
+ fp16_backend:
94
+ desc: null
95
+ value: auto
96
+ fp16_full_eval:
97
+ desc: null
98
+ value: false
99
+ fp16_opt_level:
100
+ desc: null
101
+ value: O1
102
+ gradient_accumulation_steps:
103
+ desc: null
104
+ value: 2
105
+ greater_is_better:
106
+ desc: null
107
+ value: null
108
+ group_by_length:
109
+ desc: null
110
+ value: false
111
+ ignore_data_skip:
112
+ desc: null
113
+ value: false
114
+ label_names:
115
+ desc: null
116
+ value: null
117
+ label_smoothing_factor:
118
+ desc: null
119
+ value: 0.0
120
+ learning_rate:
121
+ desc: null
122
+ value: 5.0e-05
123
+ length_column_name:
124
+ desc: null
125
+ value: length
126
+ line_by_line:
127
+ desc: null
128
+ value: false
129
+ load_best_model_at_end:
130
+ desc: null
131
+ value: false
132
+ local_rank:
133
+ desc: null
134
+ value: -1
135
+ log_level:
136
+ desc: null
137
+ value: -1
138
+ log_level_replica:
139
+ desc: null
140
+ value: -1
141
+ log_on_each_node:
142
+ desc: null
143
+ value: true
144
+ logging_dir:
145
+ desc: null
146
+ value: ./runs/Jul14_21-54-01_t1v-n-f5c06ea1-w-0
147
+ logging_first_step:
148
+ desc: null
149
+ value: false
150
+ logging_steps:
151
+ desc: null
152
+ value: 250
153
+ logging_strategy:
154
+ desc: null
155
+ value: IntervalStrategy.STEPS
156
+ lr_scheduler_type:
157
+ desc: null
158
+ value: SchedulerType.LINEAR
159
+ max_eval_samples:
160
+ desc: null
161
+ value: 500
162
+ max_grad_norm:
163
+ desc: null
164
+ value: 1.0
165
+ max_seq_length:
166
+ desc: null
167
+ value: 4096
168
+ max_steps:
169
+ desc: null
170
+ value: -1
171
+ metric_for_best_model:
172
+ desc: null
173
+ value: null
174
+ mlm_probability:
175
+ desc: null
176
+ value: 0.15
177
+ model_name_or_path:
178
+ desc: null
179
+ value: null
180
+ model_type:
181
+ desc: null
182
+ value: big_bird
183
+ mp_parameters:
184
+ desc: null
185
+ value: ''
186
+ no_cuda:
187
+ desc: null
188
+ value: false
189
+ num_train_epochs:
190
+ desc: null
191
+ value: 5.0
192
+ output_dir:
193
+ desc: null
194
+ value: ./
195
+ overwrite_cache:
196
+ desc: null
197
+ value: false
198
+ overwrite_output_dir:
199
+ desc: null
200
+ value: true
201
+ pad_to_max_length:
202
+ desc: null
203
+ value: false
204
+ past_index:
205
+ desc: null
206
+ value: -1
207
+ per_device_eval_batch_size:
208
+ desc: null
209
+ value: 2
210
+ per_device_train_batch_size:
211
+ desc: null
212
+ value: 2
213
+ per_gpu_eval_batch_size:
214
+ desc: null
215
+ value: null
216
+ per_gpu_train_batch_size:
217
+ desc: null
218
+ value: null
219
+ prediction_loss_only:
220
+ desc: null
221
+ value: false
222
+ preprocessing_num_workers:
223
+ desc: null
224
+ value: 96
225
+ push_to_hub:
226
+ desc: null
227
+ value: true
228
+ push_to_hub_model_id:
229
+ desc: null
230
+ value: ''
231
+ push_to_hub_organization:
232
+ desc: null
233
+ value: null
234
+ push_to_hub_token:
235
+ desc: null
236
+ value: null
237
+ remove_unused_columns:
238
+ desc: null
239
+ value: true
240
+ report_to:
241
+ desc: null
242
+ value:
243
+ - tensorboard
244
+ - wandb
245
+ resume_from_checkpoint:
246
+ desc: null
247
+ value: ./
248
+ run_name:
249
+ desc: null
250
+ value: ./
251
+ save_on_each_node:
252
+ desc: null
253
+ value: false
254
+ save_optimizer:
255
+ desc: null
256
+ value: true
257
+ save_steps:
258
+ desc: null
259
+ value: 20000
260
+ save_strategy:
261
+ desc: null
262
+ value: IntervalStrategy.STEPS
263
+ save_total_limit:
264
+ desc: null
265
+ value: 5
266
+ seed:
267
+ desc: null
268
+ value: 42
269
+ sharded_ddp:
270
+ desc: null
271
+ value: []
272
+ skip_memory_metrics:
273
+ desc: null
274
+ value: true
275
+ tokenizer_name:
276
+ desc: null
277
+ value: ./
278
+ tpu_metrics_debug:
279
+ desc: null
280
+ value: false
281
+ tpu_num_cores:
282
+ desc: null
283
+ value: null
284
+ train_ref_file:
285
+ desc: null
286
+ value: null
287
+ use_fast_tokenizer:
288
+ desc: null
289
+ value: true
290
+ use_legacy_prediction_loop:
291
+ desc: null
292
+ value: false
293
+ validation_ref_file:
294
+ desc: null
295
+ value: null
296
+ validation_split_percentage:
297
+ desc: null
298
+ value: 5
299
+ warmup_ratio:
300
+ desc: null
301
+ value: 0.0
302
+ warmup_steps:
303
+ desc: null
304
+ value: 5000
305
+ weight_decay:
306
+ desc: null
307
+ value: 0.0095
wandb/run-20210714_215408-3kpvz8se/files/output.log ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [21:54:22] - INFO - absl - A polynomial schedule was set with a non-positive `transition_steps` value; this results in a constant schedule with value `init_value`.
2
+ /home/dat/pino/lib/python3.8/site-packages/jax/_src/numpy/lax_numpy.py:3132: UserWarning: Explicitly requested dtype <class 'jax._src.numpy.lax_numpy.int64'> requested in zeros is not available, and will be truncated to dtype int32. To enable more dtypes, set the jax_enable_x64 configuration option or the JAX_ENABLE_X64 shell environment variable. See https://github.com/google/jax#current-gotchas for more.
3
+ lax._check_user_dtype_supported(dtype, "zeros")
4
+ [21:54:23] - INFO - __main__ - RESTORING CHECKPOINT FROM ./...
5
+ tcmalloc: large alloc 1530273792 bytes == 0xd9eda000 @ 0x7f6c0ba41680 0x7f6c0ba62824 0x5f7b11 0x648631 0x5c38e6 0x4f30e6 0x64ee88 0x505653 0x56acb6 0x568d9a 0x5f5b33 0x56aadf 0x568d9a 0x68cdc7 0x67e161 0x67e1df 0x67e281 0x67e627 0x6b6e62 0x6b71ed 0x7f6c0b8560b3 0x5f96de
6
+ restoring state of multisteps optimizer
7
+ [21:54:26] - INFO - __main__ - checkpoint restored
8
+ Traceback (most recent call last):
9
+ File "./run_mlm_flax.py", line 712, in <module>
10
+ state = restore_model_checkpoint(training_args.resume_from_checkpoint, state)
11
+ File "./run_mlm_flax.py", line 314, in restore_model_checkpoint
12
+ inner_opt_state = reinstantiate_states(opt_state.inner_opt_state)
13
+ File "./run_mlm_flax.py", line 294, in reinstantiate_states
14
+ cls = getattr(optax, type(state).__name__)
15
+ AttributeError: module 'optax' has no attribute 'list'
wandb/run-20210714_215408-3kpvz8se/files/requirements.txt ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ absl-py==0.13.0
2
+ aiohttp==3.7.4.post0
3
+ astunparse==1.6.3
4
+ async-timeout==3.0.1
5
+ attrs==21.2.0
6
+ cachetools==4.2.2
7
+ certifi==2021.5.30
8
+ chardet==4.0.0
9
+ charset-normalizer==2.0.1
10
+ chex==0.0.8
11
+ click==8.0.1
12
+ configparser==5.0.2
13
+ cycler==0.10.0
14
+ datasets==1.9.1.dev0
15
+ dill==0.3.4
16
+ dm-tree==0.1.6
17
+ docker-pycreds==0.4.0
18
+ filelock==3.0.12
19
+ flatbuffers==1.12
20
+ flax==0.3.4
21
+ fsspec==2021.7.0
22
+ gast==0.4.0
23
+ gitdb==4.0.7
24
+ gitpython==3.1.18
25
+ google-auth-oauthlib==0.4.4
26
+ google-auth==1.32.1
27
+ google-pasta==0.2.0
28
+ grpcio==1.34.1
29
+ h5py==3.1.0
30
+ huggingface-hub==0.0.12
31
+ idna==3.2
32
+ install==1.3.4
33
+ jax==0.2.17
34
+ jaxlib==0.1.68
35
+ joblib==1.0.1
36
+ keras-nightly==2.5.0.dev2021032900
37
+ keras-preprocessing==1.1.2
38
+ kiwisolver==1.3.1
39
+ libtpu-nightly==0.1.dev20210615
40
+ markdown==3.3.4
41
+ matplotlib==3.4.2
42
+ msgpack==1.0.2
43
+ multidict==5.1.0
44
+ multiprocess==0.70.12.2
45
+ numpy==1.19.5
46
+ oauthlib==3.1.1
47
+ opt-einsum==3.3.0
48
+ optax==0.0.9
49
+ packaging==21.0
50
+ pandas==1.3.0
51
+ pathtools==0.1.2
52
+ pillow==8.3.1
53
+ pip==20.0.2
54
+ pkg-resources==0.0.0
55
+ promise==2.3
56
+ protobuf==3.17.3
57
+ psutil==5.8.0
58
+ pyarrow==4.0.1
59
+ pyasn1-modules==0.2.8
60
+ pyasn1==0.4.8
61
+ pyparsing==2.4.7
62
+ python-dateutil==2.8.1
63
+ pytz==2021.1
64
+ pyyaml==5.4.1
65
+ regex==2021.7.6
66
+ requests-oauthlib==1.3.0
67
+ requests==2.26.0
68
+ rsa==4.7.2
69
+ sacremoses==0.0.45
70
+ scipy==1.7.0
71
+ sentry-sdk==1.3.0
72
+ setuptools==44.0.0
73
+ shortuuid==1.0.1
74
+ six==1.15.0
75
+ smmap==4.0.0
76
+ subprocess32==3.5.4
77
+ tensorboard-data-server==0.6.1
78
+ tensorboard-plugin-wit==1.8.0
79
+ tensorboard==2.5.0
80
+ tensorflow-estimator==2.5.0
81
+ tensorflow==2.5.0
82
+ termcolor==1.1.0
83
+ tokenizers==0.10.3
84
+ toolz==0.11.1
85
+ tqdm==4.61.2
86
+ transformers==4.9.0.dev0
87
+ typing-extensions==3.7.4.3
88
+ urllib3==1.26.6
89
+ wandb==0.10.33
90
+ werkzeug==2.0.1
91
+ wheel==0.36.2
92
+ wrapt==1.12.1
93
+ xxhash==2.0.2
94
+ yarl==1.6.3
wandb/run-20210714_215408-3kpvz8se/files/wandb-metadata.json ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-5.4.0-1043-gcp-x86_64-with-glibc2.29",
3
+ "python": "3.8.10",
4
+ "heartbeatAt": "2021-07-14T21:54:10.349764",
5
+ "startedAt": "2021-07-14T21:54:08.359450",
6
+ "docker": null,
7
+ "cpu_count": 96,
8
+ "cuda": null,
9
+ "args": [
10
+ "--push_to_hub",
11
+ "--output_dir=./",
12
+ "--model_type=big_bird",
13
+ "--config_name=./",
14
+ "--tokenizer_name=./",
15
+ "--max_seq_length=4096",
16
+ "--weight_decay=0.0095",
17
+ "--warmup_steps=5000",
18
+ "--overwrite_output_dir",
19
+ "--adam_beta1=0.9",
20
+ "--adam_beta2=0.98",
21
+ "--logging_steps=250",
22
+ "--eval_steps=500",
23
+ "--num_train_epochs=5",
24
+ "--preprocessing_num_workers=96",
25
+ "--save_steps=20000",
26
+ "--learning_rate=5e-5",
27
+ "--per_device_train_batch_size=2",
28
+ "--per_device_eval_batch_size=2",
29
+ "--save_total_limit=5",
30
+ "--max_eval_samples=500",
31
+ "--overwrite_cache",
32
+ "False",
33
+ "--gradient_accumulation_steps=2",
34
+ "--resume_from_checkpoint=./"
35
+ ],
36
+ "state": "running",
37
+ "program": "./run_mlm_flax.py",
38
+ "codePath": "run_mlm_flax.py",
39
+ "git": {
40
+ "remote": "https://huggingface.co/flax-community/pino-roberta-base",
41
+ "commit": "de71755314d4aa3ab182072c6f0be3de9798a66b"
42
+ },
43
+ "email": null,
44
+ "root": "/home/dat/pino-roberta-base",
45
+ "host": "t1v-n-f5c06ea1-w-0",
46
+ "username": "dat",
47
+ "executable": "/home/dat/pino/bin/python"
48
+ }
wandb/run-20210714_215408-3kpvz8se/files/wandb-summary.json ADDED
@@ -0,0 +1 @@
 
1
+ {}
wandb/run-20210714_215408-3kpvz8se/logs/debug-internal.log ADDED
@@ -0,0 +1,142 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2021-07-14 21:54:09,008 INFO MainThread:588654 [internal.py:wandb_internal():88] W&B internal server running at pid: 588654, started at: 2021-07-14 21:54:09.008494
2
+ 2021-07-14 21:54:09,011 DEBUG HandlerThread:588654 [handler.py:handle_request():124] handle_request: check_version
3
+ 2021-07-14 21:54:09,011 INFO WriterThread:588654 [datastore.py:open_for_write():80] open: /home/dat/pino-roberta-base/wandb/run-20210714_215408-3kpvz8se/run-3kpvz8se.wandb
4
+ 2021-07-14 21:54:09,012 DEBUG SenderThread:588654 [sender.py:send():179] send: header
5
+ 2021-07-14 21:54:09,012 DEBUG SenderThread:588654 [sender.py:send_request():193] send_request: check_version
6
+ 2021-07-14 21:54:09,050 DEBUG SenderThread:588654 [sender.py:send():179] send: run
7
+ 2021-07-14 21:54:09,234 INFO SenderThread:588654 [dir_watcher.py:__init__():168] watching files in: /home/dat/pino-roberta-base/wandb/run-20210714_215408-3kpvz8se/files
8
+ 2021-07-14 21:54:09,234 INFO SenderThread:588654 [sender.py:_start_run_threads():716] run started: 3kpvz8se with start time 1626299648
9
+ 2021-07-14 21:54:09,234 DEBUG SenderThread:588654 [sender.py:send():179] send: summary
10
+ 2021-07-14 21:54:09,234 INFO SenderThread:588654 [sender.py:_save_file():841] saving file wandb-summary.json with policy end
11
+ 2021-07-14 21:54:09,235 DEBUG HandlerThread:588654 [handler.py:handle_request():124] handle_request: run_start
12
+ 2021-07-14 21:54:10,238 INFO Thread-8 :588654 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210714_215408-3kpvz8se/files/wandb-summary.json
13
+ 2021-07-14 21:54:10,349 DEBUG HandlerThread:588654 [meta.py:__init__():39] meta init
14
+ 2021-07-14 21:54:10,349 DEBUG HandlerThread:588654 [meta.py:__init__():53] meta init done
15
+ 2021-07-14 21:54:10,349 DEBUG HandlerThread:588654 [meta.py:probe():210] probe
16
+ 2021-07-14 21:54:10,351 DEBUG HandlerThread:588654 [meta.py:_setup_git():200] setup git
17
+ 2021-07-14 21:54:10,382 DEBUG HandlerThread:588654 [meta.py:_setup_git():207] setup git done
18
+ 2021-07-14 21:54:10,382 DEBUG HandlerThread:588654 [meta.py:_save_pip():57] save pip
19
+ 2021-07-14 21:54:10,382 DEBUG HandlerThread:588654 [meta.py:_save_pip():71] save pip done
20
+ 2021-07-14 21:54:10,382 DEBUG HandlerThread:588654 [meta.py:probe():252] probe done
21
+ 2021-07-14 21:54:10,386 DEBUG SenderThread:588654 [sender.py:send():179] send: files
22
+ 2021-07-14 21:54:10,387 INFO SenderThread:588654 [sender.py:_save_file():841] saving file wandb-metadata.json with policy now
23
+ 2021-07-14 21:54:10,394 DEBUG HandlerThread:588654 [handler.py:handle_request():124] handle_request: stop_status
24
+ 2021-07-14 21:54:10,394 DEBUG SenderThread:588654 [sender.py:send_request():193] send_request: stop_status
25
+ 2021-07-14 21:54:10,527 DEBUG SenderThread:588654 [sender.py:send():179] send: config
26
+ 2021-07-14 21:54:10,527 DEBUG SenderThread:588654 [sender.py:send():179] send: config
27
+ 2021-07-14 21:54:10,527 DEBUG SenderThread:588654 [sender.py:send():179] send: config
28
+ 2021-07-14 21:54:11,237 INFO Thread-8 :588654 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210714_215408-3kpvz8se/files/requirements.txt
29
+ 2021-07-14 21:54:11,237 INFO Thread-8 :588654 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210714_215408-3kpvz8se/files/wandb-metadata.json
30
+ 2021-07-14 21:54:11,237 INFO Thread-8 :588654 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210714_215408-3kpvz8se/files/output.log
31
+ 2021-07-14 21:54:11,254 INFO Thread-11 :588654 [upload_job.py:push():137] Uploaded file /tmp/tmp43phob9nwandb/1nzximp3-wandb-metadata.json
32
+ 2021-07-14 21:54:25,243 INFO Thread-8 :588654 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_215408-3kpvz8se/files/output.log
33
+ 2021-07-14 21:54:25,572 DEBUG HandlerThread:588654 [handler.py:handle_request():124] handle_request: stop_status
34
+ 2021-07-14 21:54:25,573 DEBUG SenderThread:588654 [sender.py:send_request():193] send_request: stop_status
35
+ 2021-07-14 21:54:28,245 INFO Thread-8 :588654 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_215408-3kpvz8se/files/output.log
36
+ 2021-07-14 21:54:29,079 DEBUG HandlerThread:588654 [handler.py:handle_request():124] handle_request: poll_exit
37
+ 2021-07-14 21:54:29,079 DEBUG SenderThread:588654 [sender.py:send():179] send: telemetry
38
+ 2021-07-14 21:54:29,079 DEBUG SenderThread:588654 [sender.py:send():179] send: exit
39
+ 2021-07-14 21:54:29,079 INFO SenderThread:588654 [sender.py:send_exit():287] handling exit code: 1
40
+ 2021-07-14 21:54:29,080 INFO SenderThread:588654 [sender.py:send_exit():295] send defer
41
+ 2021-07-14 21:54:29,080 DEBUG SenderThread:588654 [sender.py:send_request():193] send_request: poll_exit
42
+ 2021-07-14 21:54:29,081 DEBUG HandlerThread:588654 [handler.py:handle_request():124] handle_request: defer
43
+ 2021-07-14 21:54:29,081 INFO HandlerThread:588654 [handler.py:handle_request_defer():141] handle defer: 0
44
+ 2021-07-14 21:54:29,081 DEBUG SenderThread:588654 [sender.py:send_request():193] send_request: defer
45
+ 2021-07-14 21:54:29,081 INFO SenderThread:588654 [sender.py:send_request_defer():304] handle sender defer: 0
46
+ 2021-07-14 21:54:29,081 INFO SenderThread:588654 [sender.py:transition_state():308] send defer: 1
47
+ 2021-07-14 21:54:29,081 DEBUG HandlerThread:588654 [handler.py:handle_request():124] handle_request: defer
48
+ 2021-07-14 21:54:29,081 INFO HandlerThread:588654 [handler.py:handle_request_defer():141] handle defer: 1
49
+ 2021-07-14 21:54:29,111 DEBUG SenderThread:588654 [sender.py:send_request():193] send_request: defer
50
+ 2021-07-14 21:54:29,111 INFO SenderThread:588654 [sender.py:send_request_defer():304] handle sender defer: 1
51
+ 2021-07-14 21:54:29,111 INFO SenderThread:588654 [sender.py:transition_state():308] send defer: 2
52
+ 2021-07-14 21:54:29,111 DEBUG SenderThread:588654 [sender.py:send():179] send: stats
53
+ 2021-07-14 21:54:29,112 DEBUG HandlerThread:588654 [handler.py:handle_request():124] handle_request: defer
54
+ 2021-07-14 21:54:29,112 INFO HandlerThread:588654 [handler.py:handle_request_defer():141] handle defer: 2
55
+ 2021-07-14 21:54:29,112 DEBUG SenderThread:588654 [sender.py:send_request():193] send_request: defer
56
+ 2021-07-14 21:54:29,112 INFO SenderThread:588654 [sender.py:send_request_defer():304] handle sender defer: 2
57
+ 2021-07-14 21:54:29,112 INFO SenderThread:588654 [sender.py:transition_state():308] send defer: 3
58
+ 2021-07-14 21:54:29,112 DEBUG HandlerThread:588654 [handler.py:handle_request():124] handle_request: defer
59
+ 2021-07-14 21:54:29,112 INFO HandlerThread:588654 [handler.py:handle_request_defer():141] handle defer: 3
60
+ 2021-07-14 21:54:29,113 DEBUG SenderThread:588654 [sender.py:send():179] send: summary
61
+ 2021-07-14 21:54:29,113 INFO SenderThread:588654 [sender.py:_save_file():841] saving file wandb-summary.json with policy end
62
+ 2021-07-14 21:54:29,113 DEBUG SenderThread:588654 [sender.py:send_request():193] send_request: defer
63
+ 2021-07-14 21:54:29,113 INFO SenderThread:588654 [sender.py:send_request_defer():304] handle sender defer: 3
64
+ 2021-07-14 21:54:29,113 INFO SenderThread:588654 [sender.py:transition_state():308] send defer: 4
65
+ 2021-07-14 21:54:29,113 DEBUG HandlerThread:588654 [handler.py:handle_request():124] handle_request: defer
66
+ 2021-07-14 21:54:29,113 INFO HandlerThread:588654 [handler.py:handle_request_defer():141] handle defer: 4
67
+ 2021-07-14 21:54:29,114 DEBUG SenderThread:588654 [sender.py:send_request():193] send_request: defer
68
+ 2021-07-14 21:54:29,114 INFO SenderThread:588654 [sender.py:send_request_defer():304] handle sender defer: 4
69
+ 2021-07-14 21:54:29,182 DEBUG HandlerThread:588654 [handler.py:handle_request():124] handle_request: poll_exit
70
+ 2021-07-14 21:54:29,245 INFO Thread-8 :588654 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_215408-3kpvz8se/files/wandb-summary.json
71
+ 2021-07-14 21:54:29,245 INFO Thread-8 :588654 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_215408-3kpvz8se/files/output.log
72
+ 2021-07-14 21:54:29,299 INFO SenderThread:588654 [sender.py:transition_state():308] send defer: 5
73
+ 2021-07-14 21:54:29,299 DEBUG SenderThread:588654 [sender.py:send_request():193] send_request: poll_exit
74
+ 2021-07-14 21:54:29,300 DEBUG HandlerThread:588654 [handler.py:handle_request():124] handle_request: defer
75
+ 2021-07-14 21:54:29,300 INFO HandlerThread:588654 [handler.py:handle_request_defer():141] handle defer: 5
76
+ 2021-07-14 21:54:29,300 DEBUG SenderThread:588654 [sender.py:send_request():193] send_request: defer
77
+ 2021-07-14 21:54:29,300 INFO SenderThread:588654 [sender.py:send_request_defer():304] handle sender defer: 5
78
+ 2021-07-14 21:54:29,300 INFO SenderThread:588654 [dir_watcher.py:finish():282] shutting down directory watcher
79
+ 2021-07-14 21:54:29,401 DEBUG HandlerThread:588654 [handler.py:handle_request():124] handle_request: poll_exit
80
+ 2021-07-14 21:54:30,246 INFO Thread-8 :588654 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_215408-3kpvz8se/files/config.yaml
81
+ 2021-07-14 21:54:30,246 INFO SenderThread:588654 [dir_watcher.py:finish():312] scan: /home/dat/pino-roberta-base/wandb/run-20210714_215408-3kpvz8se/files
82
+ 2021-07-14 21:54:30,247 INFO SenderThread:588654 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210714_215408-3kpvz8se/files/requirements.txt requirements.txt
83
+ 2021-07-14 21:54:30,247 INFO SenderThread:588654 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210714_215408-3kpvz8se/files/output.log output.log
84
+ 2021-07-14 21:54:30,247 INFO SenderThread:588654 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210714_215408-3kpvz8se/files/wandb-metadata.json wandb-metadata.json
85
+ 2021-07-14 21:54:30,247 INFO SenderThread:588654 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210714_215408-3kpvz8se/files/config.yaml config.yaml
86
+ 2021-07-14 21:54:30,247 INFO SenderThread:588654 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210714_215408-3kpvz8se/files/wandb-summary.json wandb-summary.json
87
+ 2021-07-14 21:54:30,248 INFO SenderThread:588654 [sender.py:transition_state():308] send defer: 6
88
+ 2021-07-14 21:54:30,248 DEBUG SenderThread:588654 [sender.py:send_request():193] send_request: poll_exit
89
+ 2021-07-14 21:54:30,252 DEBUG HandlerThread:588654 [handler.py:handle_request():124] handle_request: defer
90
+ 2021-07-14 21:54:30,252 INFO HandlerThread:588654 [handler.py:handle_request_defer():141] handle defer: 6
91
+ 2021-07-14 21:54:30,252 DEBUG SenderThread:588654 [sender.py:send_request():193] send_request: defer
92
+ 2021-07-14 21:54:30,252 INFO SenderThread:588654 [sender.py:send_request_defer():304] handle sender defer: 6
93
+ 2021-07-14 21:54:30,252 INFO SenderThread:588654 [file_pusher.py:finish():177] shutting down file pusher
94
+ 2021-07-14 21:54:30,350 DEBUG HandlerThread:588654 [handler.py:handle_request():124] handle_request: poll_exit
95
+ 2021-07-14 21:54:30,350 DEBUG SenderThread:588654 [sender.py:send_request():193] send_request: poll_exit
96
+ 2021-07-14 21:54:30,452 DEBUG HandlerThread:588654 [handler.py:handle_request():124] handle_request: poll_exit
97
+ 2021-07-14 21:54:30,452 DEBUG SenderThread:588654 [sender.py:send_request():193] send_request: poll_exit
98
+ 2021-07-14 21:54:30,553 DEBUG HandlerThread:588654 [handler.py:handle_request():124] handle_request: poll_exit
99
+ 2021-07-14 21:54:30,554 DEBUG SenderThread:588654 [sender.py:send_request():193] send_request: poll_exit
100
+ 2021-07-14 21:54:30,655 DEBUG HandlerThread:588654 [handler.py:handle_request():124] handle_request: poll_exit
101
+ 2021-07-14 21:54:30,655 DEBUG SenderThread:588654 [sender.py:send_request():193] send_request: poll_exit
102
+ 2021-07-14 21:54:30,699 INFO Thread-13 :588654 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210714_215408-3kpvz8se/files/output.log
103
+ 2021-07-14 21:54:30,707 INFO Thread-12 :588654 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210714_215408-3kpvz8se/files/requirements.txt
104
+ 2021-07-14 21:54:30,708 INFO Thread-15 :588654 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210714_215408-3kpvz8se/files/wandb-summary.json
105
+ 2021-07-14 21:54:30,757 DEBUG HandlerThread:588654 [handler.py:handle_request():124] handle_request: poll_exit
106
+ 2021-07-14 21:54:30,757 DEBUG SenderThread:588654 [sender.py:send_request():193] send_request: poll_exit
107
+ 2021-07-14 21:54:30,760 INFO Thread-14 :588654 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210714_215408-3kpvz8se/files/config.yaml
108
+ 2021-07-14 21:54:30,858 DEBUG HandlerThread:588654 [handler.py:handle_request():124] handle_request: poll_exit
109
+ 2021-07-14 21:54:30,858 DEBUG SenderThread:588654 [sender.py:send_request():193] send_request: poll_exit
110
+ 2021-07-14 21:54:30,960 DEBUG HandlerThread:588654 [handler.py:handle_request():124] handle_request: poll_exit
111
+ 2021-07-14 21:54:30,960 DEBUG SenderThread:588654 [sender.py:send_request():193] send_request: poll_exit
112
+ 2021-07-14 21:54:30,960 INFO Thread-7 :588654 [sender.py:transition_state():308] send defer: 7
113
+ 2021-07-14 21:54:30,961 DEBUG HandlerThread:588654 [handler.py:handle_request():124] handle_request: defer
114
+ 2021-07-14 21:54:30,961 INFO HandlerThread:588654 [handler.py:handle_request_defer():141] handle defer: 7
115
+ 2021-07-14 21:54:30,961 DEBUG SenderThread:588654 [sender.py:send_request():193] send_request: defer
116
+ 2021-07-14 21:54:30,961 INFO SenderThread:588654 [sender.py:send_request_defer():304] handle sender defer: 7
117
+ 2021-07-14 21:54:31,062 DEBUG HandlerThread:588654 [handler.py:handle_request():124] handle_request: poll_exit
118
+ 2021-07-14 21:54:31,093 INFO SenderThread:588654 [sender.py:transition_state():308] send defer: 8
119
+ 2021-07-14 21:54:31,093 DEBUG SenderThread:588654 [sender.py:send_request():193] send_request: poll_exit
120
+ 2021-07-14 21:54:31,093 DEBUG HandlerThread:588654 [handler.py:handle_request():124] handle_request: defer
121
+ 2021-07-14 21:54:31,093 INFO HandlerThread:588654 [handler.py:handle_request_defer():141] handle defer: 8
122
+ 2021-07-14 21:54:31,093 DEBUG SenderThread:588654 [sender.py:send_request():193] send_request: defer
123
+ 2021-07-14 21:54:31,094 INFO SenderThread:588654 [sender.py:send_request_defer():304] handle sender defer: 8
124
+ 2021-07-14 21:54:31,094 INFO SenderThread:588654 [sender.py:transition_state():308] send defer: 9
125
+ 2021-07-14 21:54:31,094 DEBUG SenderThread:588654 [sender.py:send():179] send: final
126
+ 2021-07-14 21:54:31,094 DEBUG SenderThread:588654 [sender.py:send():179] send: footer
127
+ 2021-07-14 21:54:31,094 DEBUG HandlerThread:588654 [handler.py:handle_request():124] handle_request: defer
128
+ 2021-07-14 21:54:31,095 INFO HandlerThread:588654 [handler.py:handle_request_defer():141] handle defer: 9
129
+ 2021-07-14 21:54:31,095 DEBUG SenderThread:588654 [sender.py:send_request():193] send_request: defer
130
+ 2021-07-14 21:54:31,095 INFO SenderThread:588654 [sender.py:send_request_defer():304] handle sender defer: 9
131
+ 2021-07-14 21:54:31,195 DEBUG HandlerThread:588654 [handler.py:handle_request():124] handle_request: poll_exit
132
+ 2021-07-14 21:54:31,195 DEBUG SenderThread:588654 [sender.py:send_request():193] send_request: poll_exit
133
+ 2021-07-14 21:54:31,195 INFO SenderThread:588654 [file_pusher.py:join():182] waiting for file pusher
134
+ 2021-07-14 21:54:31,196 DEBUG HandlerThread:588654 [handler.py:handle_request():124] handle_request: get_summary
135
+ 2021-07-14 21:54:31,197 DEBUG HandlerThread:588654 [handler.py:handle_request():124] handle_request: sampled_history
136
+ 2021-07-14 21:54:31,197 DEBUG HandlerThread:588654 [handler.py:handle_request():124] handle_request: shutdown
137
+ 2021-07-14 21:54:31,197 INFO HandlerThread:588654 [handler.py:finish():638] shutting down handler
138
+ 2021-07-14 21:54:32,095 INFO WriterThread:588654 [datastore.py:close():288] close: /home/dat/pino-roberta-base/wandb/run-20210714_215408-3kpvz8se/run-3kpvz8se.wandb
139
+ 2021-07-14 21:54:32,195 INFO SenderThread:588654 [sender.py:finish():945] shutting down sender
140
+ 2021-07-14 21:54:32,196 INFO SenderThread:588654 [file_pusher.py:finish():177] shutting down file pusher
141
+ 2021-07-14 21:54:32,196 INFO SenderThread:588654 [file_pusher.py:join():182] waiting for file pusher
142
+ 2021-07-14 21:54:32,198 INFO MainThread:588654 [internal.py:handle_exit():78] Internal process exited
wandb/run-20210714_215408-3kpvz8se/logs/debug.log ADDED
@@ -0,0 +1,127 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2021-07-14 21:54:08,360 INFO MainThread:587396 [wandb_setup.py:_flush():69] setting env: {}
2
+ 2021-07-14 21:54:08,361 INFO MainThread:587396 [wandb_setup.py:_flush():69] setting login settings: {}
3
+ 2021-07-14 21:54:08,361 INFO MainThread:587396 [wandb_init.py:_log_setup():337] Logging user logs to /home/dat/pino-roberta-base/wandb/run-20210714_215408-3kpvz8se/logs/debug.log
4
+ 2021-07-14 21:54:08,361 INFO MainThread:587396 [wandb_init.py:_log_setup():338] Logging internal logs to /home/dat/pino-roberta-base/wandb/run-20210714_215408-3kpvz8se/logs/debug-internal.log
5
+ 2021-07-14 21:54:08,361 INFO MainThread:587396 [wandb_init.py:init():370] calling init triggers
6
+ 2021-07-14 21:54:08,361 INFO MainThread:587396 [wandb_init.py:init():375] wandb.init called with sweep_config: {}
7
+ config: {}
8
+ 2021-07-14 21:54:08,361 INFO MainThread:587396 [wandb_init.py:init():419] starting backend
9
+ 2021-07-14 21:54:08,361 INFO MainThread:587396 [backend.py:_multiprocessing_setup():70] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
10
+ 2021-07-14 21:54:08,399 INFO MainThread:587396 [backend.py:ensure_launched():135] starting backend process...
11
+ 2021-07-14 21:54:08,437 INFO MainThread:587396 [backend.py:ensure_launched():139] started backend process with pid: 588654
12
+ 2021-07-14 21:54:08,438 INFO MainThread:587396 [wandb_init.py:init():424] backend started and connected
13
+ 2021-07-14 21:54:08,441 INFO MainThread:587396 [wandb_init.py:init():472] updated telemetry
14
+ 2021-07-14 21:54:08,442 INFO MainThread:587396 [wandb_init.py:init():491] communicating current version
15
+ 2021-07-14 21:54:09,049 INFO MainThread:587396 [wandb_init.py:init():496] got version response
16
+ 2021-07-14 21:54:09,049 INFO MainThread:587396 [wandb_init.py:init():504] communicating run to backend with 30 second timeout
17
+ 2021-07-14 21:54:09,234 INFO MainThread:587396 [wandb_init.py:init():529] starting run threads in backend
18
+ 2021-07-14 21:54:10,390 INFO MainThread:587396 [wandb_run.py:_console_start():1623] atexit reg
19
+ 2021-07-14 21:54:10,390 INFO MainThread:587396 [wandb_run.py:_redirect():1497] redirect: SettingsConsole.REDIRECT
20
+ 2021-07-14 21:54:10,391 INFO MainThread:587396 [wandb_run.py:_redirect():1502] Redirecting console.
21
+ 2021-07-14 21:54:10,393 INFO MainThread:587396 [wandb_run.py:_redirect():1558] Redirects installed.
22
+ 2021-07-14 21:54:10,393 INFO MainThread:587396 [wandb_init.py:init():554] run started, returning control to user process
23
+ 2021-07-14 21:54:10,399 INFO MainThread:587396 [wandb_run.py:_config_callback():872] config_cb None None {'output_dir': './', 'overwrite_output_dir': True, 'do_train': False, 'do_eval': False, 'do_predict': False, 'evaluation_strategy': 'IntervalStrategy.NO', 'prediction_loss_only': False, 'per_device_train_batch_size': 2, 'per_device_eval_batch_size': 2, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 2, 'eval_accumulation_steps': None, 'learning_rate': 5e-05, 'weight_decay': 0.0095, 'adam_beta1': 0.9, 'adam_beta2': 0.98, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 5.0, 'max_steps': -1, 'lr_scheduler_type': 'SchedulerType.LINEAR', 'warmup_ratio': 0.0, 'warmup_steps': 5000, 'log_level': -1, 'log_level_replica': -1, 'log_on_each_node': True, 'logging_dir': './runs/Jul14_21-54-01_t1v-n-f5c06ea1-w-0', 'logging_strategy': 'IntervalStrategy.STEPS', 'logging_first_step': False, 'logging_steps': 250, 'save_strategy': 'IntervalStrategy.STEPS', 'save_steps': 20000, 'save_total_limit': 5, 'save_on_each_node': False, 'no_cuda': False, 'seed': 42, 'fp16': False, 'fp16_opt_level': 'O1', 'fp16_backend': 'auto', 'fp16_full_eval': False, 'local_rank': -1, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 500, 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': './', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'sharded_ddp': [], 'deepspeed': None, 'label_smoothing_factor': 0.0, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'dataloader_pin_memory': True, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': './', 'push_to_hub_model_id': '', 'push_to_hub_organization': None, 'push_to_hub_token': None, 'mp_parameters': ''}
24
+ 2021-07-14 21:54:10,401 INFO MainThread:587396 [wandb_run.py:_config_callback():872] config_cb None None {'model_name_or_path': None, 'model_type': 'big_bird', 'config_name': './', 'tokenizer_name': './', 'cache_dir': None, 'use_fast_tokenizer': True, 'dtype': 'float32', 'save_optimizer': True}
25
+ 2021-07-14 21:54:10,402 INFO MainThread:587396 [wandb_run.py:_config_callback():872] config_cb None None {'dataset_name': None, 'dataset_config_name': None, 'train_ref_file': None, 'validation_ref_file': None, 'overwrite_cache': False, 'validation_split_percentage': 5, 'max_seq_length': 4096, 'preprocessing_num_workers': 96, 'mlm_probability': 0.15, 'pad_to_max_length': False, 'line_by_line': False, 'max_eval_samples': 500}
26
+ 2021-07-14 21:54:26,346 INFO MainThread:587396 [wandb_run.py:_atexit_cleanup():1593] got exitcode: 1
27
+ 2021-07-14 21:54:26,347 INFO MainThread:587396 [wandb_run.py:_restore():1565] restore
28
+ 2021-07-14 21:54:29,081 INFO MainThread:587396 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
29
+ wandb_count: 1
30
+ }
31
+ pusher_stats {
32
+ uploaded_bytes: 1483
33
+ total_bytes: 1483
34
+ }
35
+
36
+ 2021-07-14 21:54:29,300 INFO MainThread:587396 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
37
+ wandb_count: 1
38
+ }
39
+ pusher_stats {
40
+ uploaded_bytes: 1483
41
+ total_bytes: 1483
42
+ }
43
+
44
+ 2021-07-14 21:54:30,248 INFO MainThread:587396 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
45
+ wandb_count: 2
46
+ }
47
+ pusher_stats {
48
+ uploaded_bytes: 1483
49
+ total_bytes: 3133
50
+ }
51
+
52
+ 2021-07-14 21:54:30,351 INFO MainThread:587396 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
53
+ wandb_count: 5
54
+ }
55
+ pusher_stats {
56
+ uploaded_bytes: 1483
57
+ total_bytes: 9257
58
+ }
59
+
60
+ 2021-07-14 21:54:30,452 INFO MainThread:587396 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
61
+ wandb_count: 5
62
+ }
63
+ pusher_stats {
64
+ uploaded_bytes: 9257
65
+ total_bytes: 9257
66
+ }
67
+
68
+ 2021-07-14 21:54:30,554 INFO MainThread:587396 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
69
+ wandb_count: 5
70
+ }
71
+ pusher_stats {
72
+ uploaded_bytes: 9257
73
+ total_bytes: 9257
74
+ }
75
+
76
+ 2021-07-14 21:54:30,656 INFO MainThread:587396 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
77
+ wandb_count: 5
78
+ }
79
+ pusher_stats {
80
+ uploaded_bytes: 9257
81
+ total_bytes: 9257
82
+ }
83
+
84
+ 2021-07-14 21:54:30,757 INFO MainThread:587396 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
85
+ wandb_count: 5
86
+ }
87
+ pusher_stats {
88
+ uploaded_bytes: 9257
89
+ total_bytes: 9257
90
+ }
91
+
92
+ 2021-07-14 21:54:30,859 INFO MainThread:587396 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
93
+ wandb_count: 5
94
+ }
95
+ pusher_stats {
96
+ uploaded_bytes: 9257
97
+ total_bytes: 9257
98
+ }
99
+
100
+ 2021-07-14 21:54:30,961 INFO MainThread:587396 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
101
+ wandb_count: 5
102
+ }
103
+ pusher_stats {
104
+ uploaded_bytes: 9257
105
+ total_bytes: 9257
106
+ }
107
+
108
+ 2021-07-14 21:54:31,093 INFO MainThread:587396 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
109
+ wandb_count: 5
110
+ }
111
+ pusher_stats {
112
+ uploaded_bytes: 9257
113
+ total_bytes: 9257
114
+ }
115
+
116
+ 2021-07-14 21:54:31,196 INFO MainThread:587396 [wandb_run.py:_wait_for_finish():1715] got exit ret: done: true
117
+ exit_result {
118
+ }
119
+ file_counts {
120
+ wandb_count: 5
121
+ }
122
+ pusher_stats {
123
+ uploaded_bytes: 9257
124
+ total_bytes: 9257
125
+ }
126
+
127
+ 2021-07-14 21:54:32,452 INFO MainThread:587396 [wandb_run.py:_show_files():1937] logging synced files
wandb/run-20210714_215408-3kpvz8se/run-3kpvz8se.wandb ADDED
Binary file (4.77 kB). View file
wandb/run-20210714_221920-s091gfok/files/config.yaml ADDED
@@ -0,0 +1,307 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ wandb_version: 1
2
+
3
+ _wandb:
4
+ desc: null
5
+ value:
6
+ cli_version: 0.10.33
7
+ framework: huggingface
8
+ huggingface_version: 4.9.0.dev0
9
+ is_jupyter_run: false
10
+ is_kaggle_kernel: false
11
+ python_version: 3.8.10
12
+ t:
13
+ 1:
14
+ - 3
15
+ - 11
16
+ 2:
17
+ - 3
18
+ - 11
19
+ 4: 3.8.10
20
+ 5: 0.10.33
21
+ 6: 4.9.0.dev0
22
+ 8:
23
+ - 5
24
+ adafactor:
25
+ desc: null
26
+ value: false
27
+ adam_beta1:
28
+ desc: null
29
+ value: 0.9
30
+ adam_beta2:
31
+ desc: null
32
+ value: 0.98
33
+ adam_epsilon:
34
+ desc: null
35
+ value: 1.0e-08
36
+ cache_dir:
37
+ desc: null
38
+ value: null
39
+ config_name:
40
+ desc: null
41
+ value: ./
42
+ dataloader_drop_last:
43
+ desc: null
44
+ value: false
45
+ dataloader_num_workers:
46
+ desc: null
47
+ value: 0
48
+ dataloader_pin_memory:
49
+ desc: null
50
+ value: true
51
+ dataset_config_name:
52
+ desc: null
53
+ value: null
54
+ dataset_name:
55
+ desc: null
56
+ value: null
57
+ ddp_find_unused_parameters:
58
+ desc: null
59
+ value: null
60
+ debug:
61
+ desc: null
62
+ value: []
63
+ deepspeed:
64
+ desc: null
65
+ value: null
66
+ disable_tqdm:
67
+ desc: null
68
+ value: false
69
+ do_eval:
70
+ desc: null
71
+ value: false
72
+ do_predict:
73
+ desc: null
74
+ value: false
75
+ do_train:
76
+ desc: null
77
+ value: false
78
+ dtype:
79
+ desc: null
80
+ value: float32
81
+ eval_accumulation_steps:
82
+ desc: null
83
+ value: null
84
+ eval_steps:
85
+ desc: null
86
+ value: 20000
87
+ evaluation_strategy:
88
+ desc: null
89
+ value: IntervalStrategy.NO
90
+ fp16:
91
+ desc: null
92
+ value: false
93
+ fp16_backend:
94
+ desc: null
95
+ value: auto
96
+ fp16_full_eval:
97
+ desc: null
98
+ value: false
99
+ fp16_opt_level:
100
+ desc: null
101
+ value: O1
102
+ gradient_accumulation_steps:
103
+ desc: null
104
+ value: 2
105
+ greater_is_better:
106
+ desc: null
107
+ value: null
108
+ group_by_length:
109
+ desc: null
110
+ value: false
111
+ ignore_data_skip:
112
+ desc: null
113
+ value: false
114
+ label_names:
115
+ desc: null
116
+ value: null
117
+ label_smoothing_factor:
118
+ desc: null
119
+ value: 0.0
120
+ learning_rate:
121
+ desc: null
122
+ value: 5.0e-05
123
+ length_column_name:
124
+ desc: null
125
+ value: length
126
+ line_by_line:
127
+ desc: null
128
+ value: false
129
+ load_best_model_at_end:
130
+ desc: null
131
+ value: false
132
+ local_rank:
133
+ desc: null
134
+ value: -1
135
+ log_level:
136
+ desc: null
137
+ value: -1
138
+ log_level_replica:
139
+ desc: null
140
+ value: -1
141
+ log_on_each_node:
142
+ desc: null
143
+ value: true
144
+ logging_dir:
145
+ desc: null
146
+ value: ./runs/Jul14_22-19-13_t1v-n-f5c06ea1-w-0
147
+ logging_first_step:
148
+ desc: null
149
+ value: false
150
+ logging_steps:
151
+ desc: null
152
+ value: 500
153
+ logging_strategy:
154
+ desc: null
155
+ value: IntervalStrategy.STEPS
156
+ lr_scheduler_type:
157
+ desc: null
158
+ value: SchedulerType.LINEAR
159
+ max_eval_samples:
160
+ desc: null
161
+ value: 2000
162
+ max_grad_norm:
163
+ desc: null
164
+ value: 1.0
165
+ max_seq_length:
166
+ desc: null
167
+ value: 4096
168
+ max_steps:
169
+ desc: null
170
+ value: -1
171
+ metric_for_best_model:
172
+ desc: null
173
+ value: null
174
+ mlm_probability:
175
+ desc: null
176
+ value: 0.15
177
+ model_name_or_path:
178
+ desc: null
179
+ value: null
180
+ model_type:
181
+ desc: null
182
+ value: big_bird
183
+ mp_parameters:
184
+ desc: null
185
+ value: ''
186
+ no_cuda:
187
+ desc: null
188
+ value: false
189
+ num_train_epochs:
190
+ desc: null
191
+ value: 5.0
192
+ output_dir:
193
+ desc: null
194
+ value: ./
195
+ overwrite_cache:
196
+ desc: null
197
+ value: false
198
+ overwrite_output_dir:
199
+ desc: null
200
+ value: true
201
+ pad_to_max_length:
202
+ desc: null
203
+ value: false
204
+ past_index:
205
+ desc: null
206
+ value: -1
207
+ per_device_eval_batch_size:
208
+ desc: null
209
+ value: 2
210
+ per_device_train_batch_size:
211
+ desc: null
212
+ value: 2
213
+ per_gpu_eval_batch_size:
214
+ desc: null
215
+ value: null
216
+ per_gpu_train_batch_size:
217
+ desc: null
218
+ value: null
219
+ prediction_loss_only:
220
+ desc: null
221
+ value: false
222
+ preprocessing_num_workers:
223
+ desc: null
224
+ value: 96
225
+ push_to_hub:
226
+ desc: null
227
+ value: true
228
+ push_to_hub_model_id:
229
+ desc: null
230
+ value: ''
231
+ push_to_hub_organization:
232
+ desc: null
233
+ value: null
234
+ push_to_hub_token:
235
+ desc: null
236
+ value: null
237
+ remove_unused_columns:
238
+ desc: null
239
+ value: true
240
+ report_to:
241
+ desc: null
242
+ value:
243
+ - tensorboard
244
+ - wandb
245
+ resume_from_checkpoint:
246
+ desc: null
247
+ value: null
248
+ run_name:
249
+ desc: null
250
+ value: ./
251
+ save_on_each_node:
252
+ desc: null
253
+ value: false
254
+ save_optimizer:
255
+ desc: null
256
+ value: true
257
+ save_steps:
258
+ desc: null
259
+ value: 20000
260
+ save_strategy:
261
+ desc: null
262
+ value: IntervalStrategy.STEPS
263
+ save_total_limit:
264
+ desc: null
265
+ value: 5
266
+ seed:
267
+ desc: null
268
+ value: 42
269
+ sharded_ddp:
270
+ desc: null
271
+ value: []
272
+ skip_memory_metrics:
273
+ desc: null
274
+ value: true
275
+ tokenizer_name:
276
+ desc: null
277
+ value: ./
278
+ tpu_metrics_debug:
279
+ desc: null
280
+ value: false
281
+ tpu_num_cores:
282
+ desc: null
283
+ value: null
284
+ train_ref_file:
285
+ desc: null
286
+ value: null
287
+ use_fast_tokenizer:
288
+ desc: null
289
+ value: true
290
+ use_legacy_prediction_loop:
291
+ desc: null
292
+ value: false
293
+ validation_ref_file:
294
+ desc: null
295
+ value: null
296
+ validation_split_percentage:
297
+ desc: null
298
+ value: 5
299
+ warmup_ratio:
300
+ desc: null
301
+ value: 0.0
302
+ warmup_steps:
303
+ desc: null
304
+ value: 5000
305
+ weight_decay:
306
+ desc: null
307
+ value: 0.0095
wandb/run-20210714_221920-s091gfok/files/output.log ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /home/dat/pino/lib/python3.8/site-packages/jax/_src/numpy/lax_numpy.py:3132: UserWarning: Explicitly requested dtype <class 'jax._src.numpy.lax_numpy.int64'> requested in zeros is not available, and will be truncated to dtype int32. To enable more dtypes, set the jax_enable_x64 configuration option or the JAX_ENABLE_X64 shell environment variable. See https://github.com/google/jax#current-gotchas for more.
2
+ lax._check_user_dtype_supported(dtype, "zeros")
3
+ /home/dat/pino/lib/python3.8/site-packages/jax/lib/xla_bridge.py:386: UserWarning: jax.host_count has been renamed to jax.process_count. This alias will eventually be removed; please update your code.
4
+ warnings.warn(
5
+ /home/dat/pino/lib/python3.8/site-packages/jax/lib/xla_bridge.py:373: UserWarning: jax.host_id has been renamed to jax.process_index. This alias will eventually be removed; please update your code.
6
+ warnings.warn(
7
+ Epoch ... (1/5): 0%| | 0/5 [00:00<?, ?it/s][22:19:36] - INFO - __main__ - Skipping to epoch 0 step 0
8
+ Training...: 0%| | 0/503952 [01:21<?, ?it/s]
9
+ Epoch ... (1/5): 0%| | 0/5 [09:12<?, ?it/s]
10
+ Traceback (most recent call last):
11
+ File "./run_mlm_flax.py", line 804, in <module>
12
+ state, train_metric, dropout_rngs = p_train_step(state, model_inputs, dropout_rngs)
13
+ File "/home/dat/pino/lib/python3.8/site-packages/jax/_src/traceback_util.py", line 183, in reraise_with_filtered_traceback
14
+ return fun(*args, **kwargs)
15
+ File "/home/dat/pino/lib/python3.8/site-packages/jax/_src/api.py", line 1669, in f_pmapped
16
+ out = pxla.xla_pmap(
17
+ File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 1620, in bind
18
+ return call_bind(self, fun, *args, **params)
19
+ File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 1551, in call_bind
20
+ outs = primitive.process(top_trace, fun, tracers, params)
21
+ File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 1623, in process
22
+ return trace.process_map(self, fun, tracers, params)
23
+ File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 606, in process_call
24
+ return primitive.impl(f, *tracers, **params)
25
+ File "/home/dat/pino/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 637, in xla_pmap_impl
26
+ return compiled_fun(*args)
27
+ File "/home/dat/pino/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 1159, in execute_replicated
28
+ out_bufs = compiled.execute_sharded_on_local_devices(input_bufs)
29
+ jax._src.traceback_util.UnfilteredStackTrace: RuntimeError: Resource exhausted: Attempting to reserve 12.60G at the bottom of memory. That was not possible. There are 5.86G free, 0B reserved, and 5.65G reservable.: while running replica 0 and partition 0 of a replicated computation (other replicas may have failed as well).
30
+ The stack trace below excludes JAX-internal frames.
31
+ The preceding is the original exception that occurred, unmodified.
32
+ --------------------
33
+ The above exception was the direct cause of the following exception:
34
+ Traceback (most recent call last):
35
+ File "./run_mlm_flax.py", line 804, in <module>
36
+ state, train_metric, dropout_rngs = p_train_step(state, model_inputs, dropout_rngs)
37
+ File "/home/dat/pino/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 1159, in execute_replicated
38
+ out_bufs = compiled.execute_sharded_on_local_devices(input_bufs)
39
+ RuntimeError: Resource exhausted: Attempting to reserve 12.60G at the bottom of memory. That was not possible. There are 5.86G free, 0B reserved, and 5.65G reservable.: while running replica 0 and partition 0 of a replicated computation (other replicas may have failed as well).
wandb/run-20210714_221920-s091gfok/files/requirements.txt ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ absl-py==0.13.0
2
+ aiohttp==3.7.4.post0
3
+ astunparse==1.6.3
4
+ async-timeout==3.0.1
5
+ attrs==21.2.0
6
+ cachetools==4.2.2
7
+ certifi==2021.5.30
8
+ chardet==4.0.0
9
+ charset-normalizer==2.0.1
10
+ chex==0.0.8
11
+ click==8.0.1
12
+ configparser==5.0.2
13
+ cycler==0.10.0
14
+ datasets==1.9.1.dev0
15
+ dill==0.3.4
16
+ dm-tree==0.1.6
17
+ docker-pycreds==0.4.0
18
+ filelock==3.0.12
19
+ flatbuffers==1.12
20
+ flax==0.3.4
21
+ fsspec==2021.7.0
22
+ gast==0.4.0
23
+ gitdb==4.0.7
24
+ gitpython==3.1.18
25
+ google-auth-oauthlib==0.4.4
26
+ google-auth==1.32.1
27
+ google-pasta==0.2.0
28
+ grpcio==1.34.1
29
+ h5py==3.1.0
30
+ huggingface-hub==0.0.12
31
+ idna==3.2
32
+ install==1.3.4
33
+ jax==0.2.17
34
+ jaxlib==0.1.68
35
+ joblib==1.0.1
36
+ keras-nightly==2.5.0.dev2021032900
37
+ keras-preprocessing==1.1.2
38
+ kiwisolver==1.3.1
39
+ libtpu-nightly==0.1.dev20210615
40
+ markdown==3.3.4
41
+ matplotlib==3.4.2
42
+ msgpack==1.0.2
43
+ multidict==5.1.0
44
+ multiprocess==0.70.12.2
45
+ numpy==1.19.5
46
+ oauthlib==3.1.1
47
+ opt-einsum==3.3.0
48
+ optax==0.0.9
49
+ packaging==21.0
50
+ pandas==1.3.0
51
+ pathtools==0.1.2
52
+ pillow==8.3.1
53
+ pip==20.0.2
54
+ pkg-resources==0.0.0
55
+ promise==2.3
56
+ protobuf==3.17.3
57
+ psutil==5.8.0
58
+ pyarrow==4.0.1
59
+ pyasn1-modules==0.2.8
60
+ pyasn1==0.4.8
61
+ pyparsing==2.4.7
62
+ python-dateutil==2.8.1
63
+ pytz==2021.1
64
+ pyyaml==5.4.1
65
+ regex==2021.7.6
66
+ requests-oauthlib==1.3.0
67
+ requests==2.26.0
68
+ rsa==4.7.2
69
+ sacremoses==0.0.45
70
+ scipy==1.7.0
71
+ sentry-sdk==1.3.0
72
+ setuptools==44.0.0
73
+ shortuuid==1.0.1
74
+ six==1.15.0
75
+ smmap==4.0.0
76
+ subprocess32==3.5.4
77
+ tensorboard-data-server==0.6.1
78
+ tensorboard-plugin-wit==1.8.0
79
+ tensorboard==2.5.0
80
+ tensorflow-estimator==2.5.0
81
+ tensorflow==2.5.0
82
+ termcolor==1.1.0
83
+ tokenizers==0.10.3
84
+ toolz==0.11.1
85
+ tqdm==4.61.2
86
+ transformers==4.9.0.dev0
87
+ typing-extensions==3.7.4.3
88
+ urllib3==1.26.6
89
+ wandb==0.10.33
90
+ werkzeug==2.0.1
91
+ wheel==0.36.2
92
+ wrapt==1.12.1
93
+ xxhash==2.0.2
94
+ yarl==1.6.3
wandb/run-20210714_221920-s091gfok/files/wandb-metadata.json ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-5.4.0-1043-gcp-x86_64-with-glibc2.29",
3
+ "python": "3.8.10",
4
+ "heartbeatAt": "2021-07-14T22:19:22.632871",
5
+ "startedAt": "2021-07-14T22:19:20.670815",
6
+ "docker": null,
7
+ "cpu_count": 96,
8
+ "cuda": null,
9
+ "args": [
10
+ "--push_to_hub",
11
+ "--output_dir=./",
12
+ "--model_type=big_bird",
13
+ "--config_name=./",
14
+ "--tokenizer_name=./",
15
+ "--max_seq_length=4096",
16
+ "--weight_decay=0.0095",
17
+ "--warmup_steps=5000",
18
+ "--overwrite_output_dir",
19
+ "--adam_beta1=0.9",
20
+ "--adam_beta2=0.98",
21
+ "--logging_steps=500",
22
+ "--eval_steps=20000",
23
+ "--num_train_epochs=5",
24
+ "--preprocessing_num_workers=96",
25
+ "--save_steps=20000",
26
+ "--learning_rate=5e-5",
27
+ "--per_device_train_batch_size=2",
28
+ "--per_device_eval_batch_size=2",
29
+ "--save_total_limit=5",
30
+ "--max_eval_samples=2000",
31
+ "--overwrite_cache",
32
+ "False",
33
+ "--gradient_accumulation_steps=2"
34
+ ],
35
+ "state": "running",
36
+ "program": "./run_mlm_flax.py",
37
+ "codePath": "run_mlm_flax.py",
38
+ "git": {
39
+ "remote": "https://huggingface.co/flax-community/pino-roberta-base",
40
+ "commit": "de71755314d4aa3ab182072c6f0be3de9798a66b"
41
+ },
42
+ "email": null,
43
+ "root": "/home/dat/pino-roberta-base",
44
+ "host": "t1v-n-f5c06ea1-w-0",
45
+ "username": "dat",
46
+ "executable": "/home/dat/pino/bin/python"
47
+ }
wandb/run-20210714_221920-s091gfok/files/wandb-summary.json ADDED
@@ -0,0 +1 @@
 
1
+ {}
wandb/run-20210714_221920-s091gfok/logs/debug-internal.log ADDED
@@ -0,0 +1,233 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2021-07-14 22:19:21,314 INFO MainThread:593294 [internal.py:wandb_internal():88] W&B internal server running at pid: 593294, started at: 2021-07-14 22:19:21.314432
2
+ 2021-07-14 22:19:21,317 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: check_version
3
+ 2021-07-14 22:19:21,317 INFO WriterThread:593294 [datastore.py:open_for_write():80] open: /home/dat/pino-roberta-base/wandb/run-20210714_221920-s091gfok/run-s091gfok.wandb
4
+ 2021-07-14 22:19:21,318 DEBUG SenderThread:593294 [sender.py:send():179] send: header
5
+ 2021-07-14 22:19:21,318 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: check_version
6
+ 2021-07-14 22:19:21,357 DEBUG SenderThread:593294 [sender.py:send():179] send: run
7
+ 2021-07-14 22:19:21,536 INFO SenderThread:593294 [dir_watcher.py:__init__():168] watching files in: /home/dat/pino-roberta-base/wandb/run-20210714_221920-s091gfok/files
8
+ 2021-07-14 22:19:21,536 INFO SenderThread:593294 [sender.py:_start_run_threads():716] run started: s091gfok with start time 1626301160
9
+ 2021-07-14 22:19:21,536 DEBUG SenderThread:593294 [sender.py:send():179] send: summary
10
+ 2021-07-14 22:19:21,537 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: run_start
11
+ 2021-07-14 22:19:21,537 INFO SenderThread:593294 [sender.py:_save_file():841] saving file wandb-summary.json with policy end
12
+ 2021-07-14 22:19:22,539 INFO Thread-8 :593294 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210714_221920-s091gfok/files/wandb-summary.json
13
+ 2021-07-14 22:19:22,632 DEBUG HandlerThread:593294 [meta.py:__init__():39] meta init
14
+ 2021-07-14 22:19:22,632 DEBUG HandlerThread:593294 [meta.py:__init__():53] meta init done
15
+ 2021-07-14 22:19:22,632 DEBUG HandlerThread:593294 [meta.py:probe():210] probe
16
+ 2021-07-14 22:19:22,634 DEBUG HandlerThread:593294 [meta.py:_setup_git():200] setup git
17
+ 2021-07-14 22:19:22,663 DEBUG HandlerThread:593294 [meta.py:_setup_git():207] setup git done
18
+ 2021-07-14 22:19:22,663 DEBUG HandlerThread:593294 [meta.py:_save_pip():57] save pip
19
+ 2021-07-14 22:19:22,664 DEBUG HandlerThread:593294 [meta.py:_save_pip():71] save pip done
20
+ 2021-07-14 22:19:22,664 DEBUG HandlerThread:593294 [meta.py:probe():252] probe done
21
+ 2021-07-14 22:19:22,667 DEBUG SenderThread:593294 [sender.py:send():179] send: files
22
+ 2021-07-14 22:19:22,667 INFO SenderThread:593294 [sender.py:_save_file():841] saving file wandb-metadata.json with policy now
23
+ 2021-07-14 22:19:22,674 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: stop_status
24
+ 2021-07-14 22:19:22,674 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: stop_status
25
+ 2021-07-14 22:19:22,802 DEBUG SenderThread:593294 [sender.py:send():179] send: config
26
+ 2021-07-14 22:19:22,803 DEBUG SenderThread:593294 [sender.py:send():179] send: config
27
+ 2021-07-14 22:19:22,803 DEBUG SenderThread:593294 [sender.py:send():179] send: config
28
+ 2021-07-14 22:19:23,119 INFO Thread-11 :593294 [upload_job.py:push():137] Uploaded file /tmp/tmpn0n6xzzmwandb/2vhpic31-wandb-metadata.json
29
+ 2021-07-14 22:19:23,537 INFO Thread-8 :593294 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210714_221920-s091gfok/files/wandb-metadata.json
30
+ 2021-07-14 22:19:23,537 INFO Thread-8 :593294 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210714_221920-s091gfok/files/requirements.txt
31
+ 2021-07-14 22:19:23,538 INFO Thread-8 :593294 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210714_221920-s091gfok/files/output.log
32
+ 2021-07-14 22:19:37,543 INFO Thread-8 :593294 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_221920-s091gfok/files/output.log
33
+ 2021-07-14 22:19:37,804 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: stop_status
34
+ 2021-07-14 22:19:37,804 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: stop_status
35
+ 2021-07-14 22:19:39,545 INFO Thread-8 :593294 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_221920-s091gfok/files/output.log
36
+ 2021-07-14 22:19:50,715 DEBUG SenderThread:593294 [sender.py:send():179] send: stats
37
+ 2021-07-14 22:19:52,550 INFO Thread-8 :593294 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_221920-s091gfok/files/config.yaml
38
+ 2021-07-14 22:19:52,936 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: stop_status
39
+ 2021-07-14 22:19:52,936 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: stop_status
40
+ 2021-07-14 22:20:08,079 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: stop_status
41
+ 2021-07-14 22:20:08,080 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: stop_status
42
+ 2021-07-14 22:20:20,789 DEBUG SenderThread:593294 [sender.py:send():179] send: stats
43
+ 2021-07-14 22:20:23,215 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: stop_status
44
+ 2021-07-14 22:20:23,215 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: stop_status
45
+ 2021-07-14 22:20:38,362 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: stop_status
46
+ 2021-07-14 22:20:38,363 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: stop_status
47
+ 2021-07-14 22:20:50,861 DEBUG SenderThread:593294 [sender.py:send():179] send: stats
48
+ 2021-07-14 22:20:53,496 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: stop_status
49
+ 2021-07-14 22:20:53,496 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: stop_status
50
+ 2021-07-14 22:21:08,625 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: stop_status
51
+ 2021-07-14 22:21:08,625 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: stop_status
52
+ 2021-07-14 22:21:20,932 DEBUG SenderThread:593294 [sender.py:send():179] send: stats
53
+ 2021-07-14 22:21:23,756 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: stop_status
54
+ 2021-07-14 22:21:23,757 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: stop_status
55
+ 2021-07-14 22:21:38,885 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: stop_status
56
+ 2021-07-14 22:21:38,886 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: stop_status
57
+ 2021-07-14 22:21:50,997 DEBUG SenderThread:593294 [sender.py:send():179] send: stats
58
+ 2021-07-14 22:21:54,016 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: stop_status
59
+ 2021-07-14 22:21:54,016 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: stop_status
60
+ 2021-07-14 22:22:09,146 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: stop_status
61
+ 2021-07-14 22:22:09,147 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: stop_status
62
+ 2021-07-14 22:22:21,114 DEBUG SenderThread:593294 [sender.py:send():179] send: stats
63
+ 2021-07-14 22:22:24,279 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: stop_status
64
+ 2021-07-14 22:22:24,279 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: stop_status
65
+ 2021-07-14 22:22:39,412 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: stop_status
66
+ 2021-07-14 22:22:39,413 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: stop_status
67
+ 2021-07-14 22:22:51,192 DEBUG SenderThread:593294 [sender.py:send():179] send: stats
68
+ 2021-07-14 22:22:54,548 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: stop_status
69
+ 2021-07-14 22:22:54,548 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: stop_status
70
+ 2021-07-14 22:23:09,678 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: stop_status
71
+ 2021-07-14 22:23:09,678 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: stop_status
72
+ 2021-07-14 22:23:21,267 DEBUG SenderThread:593294 [sender.py:send():179] send: stats
73
+ 2021-07-14 22:23:24,814 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: stop_status
74
+ 2021-07-14 22:23:24,814 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: stop_status
75
+ 2021-07-14 22:23:39,949 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: stop_status
76
+ 2021-07-14 22:23:39,949 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: stop_status
77
+ 2021-07-14 22:23:51,337 DEBUG SenderThread:593294 [sender.py:send():179] send: stats
78
+ 2021-07-14 22:23:55,081 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: stop_status
79
+ 2021-07-14 22:23:55,082 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: stop_status
80
+ 2021-07-14 22:24:10,212 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: stop_status
81
+ 2021-07-14 22:24:10,212 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: stop_status
82
+ 2021-07-14 22:24:21,405 DEBUG SenderThread:593294 [sender.py:send():179] send: stats
83
+ 2021-07-14 22:24:25,345 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: stop_status
84
+ 2021-07-14 22:24:25,346 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: stop_status
85
+ 2021-07-14 22:24:40,483 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: stop_status
86
+ 2021-07-14 22:24:40,483 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: stop_status
87
+ 2021-07-14 22:24:51,475 DEBUG SenderThread:593294 [sender.py:send():179] send: stats
88
+ 2021-07-14 22:24:55,615 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: stop_status
89
+ 2021-07-14 22:24:55,615 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: stop_status
90
+ 2021-07-14 22:25:10,746 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: stop_status
91
+ 2021-07-14 22:25:10,746 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: stop_status
92
+ 2021-07-14 22:25:21,548 DEBUG SenderThread:593294 [sender.py:send():179] send: stats
93
+ 2021-07-14 22:25:25,876 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: stop_status
94
+ 2021-07-14 22:25:25,876 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: stop_status
95
+ 2021-07-14 22:25:41,015 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: stop_status
96
+ 2021-07-14 22:25:41,016 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: stop_status
97
+ 2021-07-14 22:25:51,619 DEBUG SenderThread:593294 [sender.py:send():179] send: stats
98
+ 2021-07-14 22:25:56,148 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: stop_status
99
+ 2021-07-14 22:25:56,148 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: stop_status
100
+ 2021-07-14 22:26:11,280 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: stop_status
101
+ 2021-07-14 22:26:11,280 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: stop_status
102
+ 2021-07-14 22:26:21,695 DEBUG SenderThread:593294 [sender.py:send():179] send: stats
103
+ 2021-07-14 22:26:26,412 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: stop_status
104
+ 2021-07-14 22:26:26,413 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: stop_status
105
+ 2021-07-14 22:26:41,546 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: stop_status
106
+ 2021-07-14 22:26:41,547 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: stop_status
107
+ 2021-07-14 22:26:51,772 DEBUG SenderThread:593294 [sender.py:send():179] send: stats
108
+ 2021-07-14 22:26:56,683 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: stop_status
109
+ 2021-07-14 22:26:56,683 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: stop_status
110
+ 2021-07-14 22:27:11,816 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: stop_status
111
+ 2021-07-14 22:27:11,816 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: stop_status
112
+ 2021-07-14 22:27:21,849 DEBUG SenderThread:593294 [sender.py:send():179] send: stats
113
+ 2021-07-14 22:27:26,950 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: stop_status
114
+ 2021-07-14 22:27:26,950 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: stop_status
115
+ 2021-07-14 22:27:29,710 INFO Thread-8 :593294 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_221920-s091gfok/files/output.log
116
+ 2021-07-14 22:27:42,097 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: stop_status
117
+ 2021-07-14 22:27:42,097 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: stop_status
118
+ 2021-07-14 22:27:51,925 DEBUG SenderThread:593294 [sender.py:send():179] send: stats
119
+ 2021-07-14 22:27:57,249 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: stop_status
120
+ 2021-07-14 22:27:57,250 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: stop_status
121
+ 2021-07-14 22:28:12,383 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: stop_status
122
+ 2021-07-14 22:28:12,384 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: stop_status
123
+ 2021-07-14 22:28:22,007 DEBUG SenderThread:593294 [sender.py:send():179] send: stats
124
+ 2021-07-14 22:28:27,521 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: stop_status
125
+ 2021-07-14 22:28:27,522 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: stop_status
126
+ 2021-07-14 22:28:42,658 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: stop_status
127
+ 2021-07-14 22:28:42,658 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: stop_status
128
+ 2021-07-14 22:28:49,741 INFO Thread-8 :593294 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_221920-s091gfok/files/output.log
129
+ 2021-07-14 22:28:51,116 DEBUG SenderThread:593294 [sender.py:send():179] send: telemetry
130
+ 2021-07-14 22:28:51,116 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: poll_exit
131
+ 2021-07-14 22:28:51,116 DEBUG SenderThread:593294 [sender.py:send():179] send: exit
132
+ 2021-07-14 22:28:51,116 INFO SenderThread:593294 [sender.py:send_exit():287] handling exit code: 1
133
+ 2021-07-14 22:28:51,117 INFO SenderThread:593294 [sender.py:send_exit():295] send defer
134
+ 2021-07-14 22:28:51,117 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: poll_exit
135
+ 2021-07-14 22:28:51,118 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: defer
136
+ 2021-07-14 22:28:51,118 INFO HandlerThread:593294 [handler.py:handle_request_defer():141] handle defer: 0
137
+ 2021-07-14 22:28:51,118 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: defer
138
+ 2021-07-14 22:28:51,118 INFO SenderThread:593294 [sender.py:send_request_defer():304] handle sender defer: 0
139
+ 2021-07-14 22:28:51,118 INFO SenderThread:593294 [sender.py:transition_state():308] send defer: 1
140
+ 2021-07-14 22:28:51,118 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: defer
141
+ 2021-07-14 22:28:51,118 INFO HandlerThread:593294 [handler.py:handle_request_defer():141] handle defer: 1
142
+ 2021-07-14 22:28:51,182 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: defer
143
+ 2021-07-14 22:28:51,182 INFO SenderThread:593294 [sender.py:send_request_defer():304] handle sender defer: 1
144
+ 2021-07-14 22:28:51,183 INFO SenderThread:593294 [sender.py:transition_state():308] send defer: 2
145
+ 2021-07-14 22:28:51,183 DEBUG SenderThread:593294 [sender.py:send():179] send: stats
146
+ 2021-07-14 22:28:51,183 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: defer
147
+ 2021-07-14 22:28:51,183 INFO HandlerThread:593294 [handler.py:handle_request_defer():141] handle defer: 2
148
+ 2021-07-14 22:28:51,184 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: defer
149
+ 2021-07-14 22:28:51,184 INFO SenderThread:593294 [sender.py:send_request_defer():304] handle sender defer: 2
150
+ 2021-07-14 22:28:51,184 INFO SenderThread:593294 [sender.py:transition_state():308] send defer: 3
151
+ 2021-07-14 22:28:51,184 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: defer
152
+ 2021-07-14 22:28:51,184 INFO HandlerThread:593294 [handler.py:handle_request_defer():141] handle defer: 3
153
+ 2021-07-14 22:28:51,184 DEBUG SenderThread:593294 [sender.py:send():179] send: summary
154
+ 2021-07-14 22:28:51,185 INFO SenderThread:593294 [sender.py:_save_file():841] saving file wandb-summary.json with policy end
155
+ 2021-07-14 22:28:51,185 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: defer
156
+ 2021-07-14 22:28:51,185 INFO SenderThread:593294 [sender.py:send_request_defer():304] handle sender defer: 3
157
+ 2021-07-14 22:28:51,185 INFO SenderThread:593294 [sender.py:transition_state():308] send defer: 4
158
+ 2021-07-14 22:28:51,185 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: defer
159
+ 2021-07-14 22:28:51,185 INFO HandlerThread:593294 [handler.py:handle_request_defer():141] handle defer: 4
160
+ 2021-07-14 22:28:51,185 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: defer
161
+ 2021-07-14 22:28:51,185 INFO SenderThread:593294 [sender.py:send_request_defer():304] handle sender defer: 4
162
+ 2021-07-14 22:28:51,220 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: poll_exit
163
+ 2021-07-14 22:28:51,361 INFO SenderThread:593294 [sender.py:transition_state():308] send defer: 5
164
+ 2021-07-14 22:28:51,361 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: poll_exit
165
+ 2021-07-14 22:28:51,361 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: defer
166
+ 2021-07-14 22:28:51,362 INFO HandlerThread:593294 [handler.py:handle_request_defer():141] handle defer: 5
167
+ 2021-07-14 22:28:51,362 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: defer
168
+ 2021-07-14 22:28:51,362 INFO SenderThread:593294 [sender.py:send_request_defer():304] handle sender defer: 5
169
+ 2021-07-14 22:28:51,362 INFO SenderThread:593294 [dir_watcher.py:finish():282] shutting down directory watcher
170
+ 2021-07-14 22:28:51,463 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: poll_exit
171
+ 2021-07-14 22:28:51,742 INFO Thread-8 :593294 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_221920-s091gfok/files/wandb-summary.json
172
+ 2021-07-14 22:28:51,743 INFO SenderThread:593294 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_221920-s091gfok/files/config.yaml
173
+ 2021-07-14 22:28:51,743 INFO SenderThread:593294 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_221920-s091gfok/files/output.log
174
+ 2021-07-14 22:28:51,743 INFO SenderThread:593294 [dir_watcher.py:finish():312] scan: /home/dat/pino-roberta-base/wandb/run-20210714_221920-s091gfok/files
175
+ 2021-07-14 22:28:51,743 INFO SenderThread:593294 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210714_221920-s091gfok/files/requirements.txt requirements.txt
176
+ 2021-07-14 22:28:51,744 INFO SenderThread:593294 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210714_221920-s091gfok/files/output.log output.log
177
+ 2021-07-14 22:28:51,744 INFO SenderThread:593294 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210714_221920-s091gfok/files/wandb-metadata.json wandb-metadata.json
178
+ 2021-07-14 22:28:51,744 INFO SenderThread:593294 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210714_221920-s091gfok/files/config.yaml config.yaml
179
+ 2021-07-14 22:28:51,744 INFO SenderThread:593294 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210714_221920-s091gfok/files/wandb-summary.json wandb-summary.json
180
+ 2021-07-14 22:28:51,750 INFO SenderThread:593294 [sender.py:transition_state():308] send defer: 6
181
+ 2021-07-14 22:28:51,750 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: poll_exit
182
+ 2021-07-14 22:28:51,751 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: defer
183
+ 2021-07-14 22:28:51,751 INFO HandlerThread:593294 [handler.py:handle_request_defer():141] handle defer: 6
184
+ 2021-07-14 22:28:51,754 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: defer
185
+ 2021-07-14 22:28:51,754 INFO SenderThread:593294 [sender.py:send_request_defer():304] handle sender defer: 6
186
+ 2021-07-14 22:28:51,754 INFO SenderThread:593294 [file_pusher.py:finish():177] shutting down file pusher
187
+ 2021-07-14 22:28:51,856 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: poll_exit
188
+ 2021-07-14 22:28:51,856 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: poll_exit
189
+ 2021-07-14 22:28:51,958 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: poll_exit
190
+ 2021-07-14 22:28:51,958 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: poll_exit
191
+ 2021-07-14 22:28:52,060 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: poll_exit
192
+ 2021-07-14 22:28:52,061 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: poll_exit
193
+ 2021-07-14 22:28:52,162 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: poll_exit
194
+ 2021-07-14 22:28:52,163 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: poll_exit
195
+ 2021-07-14 22:28:52,191 INFO Thread-13 :593294 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210714_221920-s091gfok/files/output.log
196
+ 2021-07-14 22:28:52,198 INFO Thread-12 :593294 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210714_221920-s091gfok/files/requirements.txt
197
+ 2021-07-14 22:28:52,200 INFO Thread-14 :593294 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210714_221920-s091gfok/files/config.yaml
198
+ 2021-07-14 22:28:52,212 INFO Thread-15 :593294 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210714_221920-s091gfok/files/wandb-summary.json
199
+ 2021-07-14 22:28:52,264 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: poll_exit
200
+ 2021-07-14 22:28:52,265 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: poll_exit
201
+ 2021-07-14 22:28:52,366 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: poll_exit
202
+ 2021-07-14 22:28:52,366 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: poll_exit
203
+ 2021-07-14 22:28:52,413 INFO Thread-7 :593294 [sender.py:transition_state():308] send defer: 7
204
+ 2021-07-14 22:28:52,413 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: defer
205
+ 2021-07-14 22:28:52,413 INFO HandlerThread:593294 [handler.py:handle_request_defer():141] handle defer: 7
206
+ 2021-07-14 22:28:52,413 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: defer
207
+ 2021-07-14 22:28:52,414 INFO SenderThread:593294 [sender.py:send_request_defer():304] handle sender defer: 7
208
+ 2021-07-14 22:28:52,468 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: poll_exit
209
+ 2021-07-14 22:28:52,536 INFO SenderThread:593294 [sender.py:transition_state():308] send defer: 8
210
+ 2021-07-14 22:28:52,536 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: poll_exit
211
+ 2021-07-14 22:28:52,537 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: defer
212
+ 2021-07-14 22:28:52,537 INFO HandlerThread:593294 [handler.py:handle_request_defer():141] handle defer: 8
213
+ 2021-07-14 22:28:52,537 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: defer
214
+ 2021-07-14 22:28:52,537 INFO SenderThread:593294 [sender.py:send_request_defer():304] handle sender defer: 8
215
+ 2021-07-14 22:28:52,537 INFO SenderThread:593294 [sender.py:transition_state():308] send defer: 9
216
+ 2021-07-14 22:28:52,538 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: defer
217
+ 2021-07-14 22:28:52,538 INFO HandlerThread:593294 [handler.py:handle_request_defer():141] handle defer: 9
218
+ 2021-07-14 22:28:52,538 DEBUG SenderThread:593294 [sender.py:send():179] send: final
219
+ 2021-07-14 22:28:52,538 DEBUG SenderThread:593294 [sender.py:send():179] send: footer
220
+ 2021-07-14 22:28:52,538 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: defer
221
+ 2021-07-14 22:28:52,538 INFO SenderThread:593294 [sender.py:send_request_defer():304] handle sender defer: 9
222
+ 2021-07-14 22:28:52,638 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: poll_exit
223
+ 2021-07-14 22:28:52,638 DEBUG SenderThread:593294 [sender.py:send_request():193] send_request: poll_exit
224
+ 2021-07-14 22:28:52,638 INFO SenderThread:593294 [file_pusher.py:join():182] waiting for file pusher
225
+ 2021-07-14 22:28:52,640 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: get_summary
226
+ 2021-07-14 22:28:52,640 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: sampled_history
227
+ 2021-07-14 22:28:52,641 DEBUG HandlerThread:593294 [handler.py:handle_request():124] handle_request: shutdown
228
+ 2021-07-14 22:28:52,641 INFO HandlerThread:593294 [handler.py:finish():638] shutting down handler
229
+ 2021-07-14 22:28:53,538 INFO WriterThread:593294 [datastore.py:close():288] close: /home/dat/pino-roberta-base/wandb/run-20210714_221920-s091gfok/run-s091gfok.wandb
230
+ 2021-07-14 22:28:53,639 INFO SenderThread:593294 [sender.py:finish():945] shutting down sender
231
+ 2021-07-14 22:28:53,639 INFO SenderThread:593294 [file_pusher.py:finish():177] shutting down file pusher
232
+ 2021-07-14 22:28:53,639 INFO SenderThread:593294 [file_pusher.py:join():182] waiting for file pusher
233
+ 2021-07-14 22:28:53,641 INFO MainThread:593294 [internal.py:handle_exit():78] Internal process exited
wandb/run-20210714_221920-s091gfok/logs/debug.log ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2021-07-14 22:19:20,672 INFO MainThread:592040 [wandb_setup.py:_flush():69] setting env: {}
2
+ 2021-07-14 22:19:20,672 INFO MainThread:592040 [wandb_setup.py:_flush():69] setting login settings: {}
3
+ 2021-07-14 22:19:20,672 INFO MainThread:592040 [wandb_init.py:_log_setup():337] Logging user logs to /home/dat/pino-roberta-base/wandb/run-20210714_221920-s091gfok/logs/debug.log
4
+ 2021-07-14 22:19:20,672 INFO MainThread:592040 [wandb_init.py:_log_setup():338] Logging internal logs to /home/dat/pino-roberta-base/wandb/run-20210714_221920-s091gfok/logs/debug-internal.log
5
+ 2021-07-14 22:19:20,672 INFO MainThread:592040 [wandb_init.py:init():370] calling init triggers
6
+ 2021-07-14 22:19:20,672 INFO MainThread:592040 [wandb_init.py:init():375] wandb.init called with sweep_config: {}
7
+ config: {}
8
+ 2021-07-14 22:19:20,673 INFO MainThread:592040 [wandb_init.py:init():419] starting backend
9
+ 2021-07-14 22:19:20,673 INFO MainThread:592040 [backend.py:_multiprocessing_setup():70] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
10
+ 2021-07-14 22:19:20,717 INFO MainThread:592040 [backend.py:ensure_launched():135] starting backend process...
11
+ 2021-07-14 22:19:20,761 INFO MainThread:592040 [backend.py:ensure_launched():139] started backend process with pid: 593294
12
+ 2021-07-14 22:19:20,763 INFO MainThread:592040 [wandb_init.py:init():424] backend started and connected
13
+ 2021-07-14 22:19:20,766 INFO MainThread:592040 [wandb_init.py:init():472] updated telemetry
14
+ 2021-07-14 22:19:20,766 INFO MainThread:592040 [wandb_init.py:init():491] communicating current version
15
+ 2021-07-14 22:19:21,355 INFO MainThread:592040 [wandb_init.py:init():496] got version response
16
+ 2021-07-14 22:19:21,356 INFO MainThread:592040 [wandb_init.py:init():504] communicating run to backend with 30 second timeout
17
+ 2021-07-14 22:19:21,536 INFO MainThread:592040 [wandb_init.py:init():529] starting run threads in backend
18
+ 2021-07-14 22:19:22,670 INFO MainThread:592040 [wandb_run.py:_console_start():1623] atexit reg
19
+ 2021-07-14 22:19:22,671 INFO MainThread:592040 [wandb_run.py:_redirect():1497] redirect: SettingsConsole.REDIRECT
20
+ 2021-07-14 22:19:22,672 INFO MainThread:592040 [wandb_run.py:_redirect():1502] Redirecting console.
21
+ 2021-07-14 22:19:22,674 INFO MainThread:592040 [wandb_run.py:_redirect():1558] Redirects installed.
22
+ 2021-07-14 22:19:22,674 INFO MainThread:592040 [wandb_init.py:init():554] run started, returning control to user process
23
+ 2021-07-14 22:19:22,681 INFO MainThread:592040 [wandb_run.py:_config_callback():872] config_cb None None {'output_dir': './', 'overwrite_output_dir': True, 'do_train': False, 'do_eval': False, 'do_predict': False, 'evaluation_strategy': 'IntervalStrategy.NO', 'prediction_loss_only': False, 'per_device_train_batch_size': 2, 'per_device_eval_batch_size': 2, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 2, 'eval_accumulation_steps': None, 'learning_rate': 5e-05, 'weight_decay': 0.0095, 'adam_beta1': 0.9, 'adam_beta2': 0.98, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 5.0, 'max_steps': -1, 'lr_scheduler_type': 'SchedulerType.LINEAR', 'warmup_ratio': 0.0, 'warmup_steps': 5000, 'log_level': -1, 'log_level_replica': -1, 'log_on_each_node': True, 'logging_dir': './runs/Jul14_22-19-13_t1v-n-f5c06ea1-w-0', 'logging_strategy': 'IntervalStrategy.STEPS', 'logging_first_step': False, 'logging_steps': 500, 'save_strategy': 'IntervalStrategy.STEPS', 'save_steps': 20000, 'save_total_limit': 5, 'save_on_each_node': False, 'no_cuda': False, 'seed': 42, 'fp16': False, 'fp16_opt_level': 'O1', 'fp16_backend': 'auto', 'fp16_full_eval': False, 'local_rank': -1, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 20000, 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': './', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'sharded_ddp': [], 'deepspeed': None, 'label_smoothing_factor': 0.0, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'dataloader_pin_memory': True, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'push_to_hub_model_id': '', 'push_to_hub_organization': None, 'push_to_hub_token': None, 'mp_parameters': ''}
24
+ 2021-07-14 22:19:22,683 INFO MainThread:592040 [wandb_run.py:_config_callback():872] config_cb None None {'model_name_or_path': None, 'model_type': 'big_bird', 'config_name': './', 'tokenizer_name': './', 'cache_dir': None, 'use_fast_tokenizer': True, 'dtype': 'float32', 'save_optimizer': True}
25
+ 2021-07-14 22:19:22,685 INFO MainThread:592040 [wandb_run.py:_config_callback():872] config_cb None None {'dataset_name': None, 'dataset_config_name': None, 'train_ref_file': None, 'validation_ref_file': None, 'overwrite_cache': False, 'validation_split_percentage': 5, 'max_seq_length': 4096, 'preprocessing_num_workers': 96, 'mlm_probability': 0.15, 'pad_to_max_length': False, 'line_by_line': False, 'max_eval_samples': 2000}
26
+ 2021-07-14 22:28:48,857 INFO MainThread:592040 [wandb_run.py:_atexit_cleanup():1593] got exitcode: 1
27
+ 2021-07-14 22:28:48,859 INFO MainThread:592040 [wandb_run.py:_restore():1565] restore
28
+ 2021-07-14 22:28:51,118 INFO MainThread:592040 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
29
+ wandb_count: 1
30
+ }
31
+ pusher_stats {
32
+ uploaded_bytes: 1447
33
+ total_bytes: 1447
34
+ }
35
+
36
+ 2021-07-14 22:28:51,362 INFO MainThread:592040 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
37
+ wandb_count: 1
38
+ }
39
+ pusher_stats {
40
+ uploaded_bytes: 1447
41
+ total_bytes: 1447
42
+ }
43
+
44
+ 2021-07-14 22:28:51,754 INFO MainThread:592040 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
45
+ wandb_count: 4
46
+ }
47
+ pusher_stats {
48
+ uploaded_bytes: 1447
49
+ total_bytes: 11398
50
+ }
51
+
52
+ 2021-07-14 22:28:51,857 INFO MainThread:592040 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
53
+ wandb_count: 5
54
+ }
55
+ pusher_stats {
56
+ uploaded_bytes: 1447
57
+ total_bytes: 11400
58
+ }
59
+
60
+ 2021-07-14 22:28:51,959 INFO MainThread:592040 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
61
+ wandb_count: 5
62
+ }
63
+ pusher_stats {
64
+ uploaded_bytes: 11400
65
+ total_bytes: 11400
66
+ }
67
+
68
+ 2021-07-14 22:28:52,061 INFO MainThread:592040 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
69
+ wandb_count: 5
70
+ }
71
+ pusher_stats {
72
+ uploaded_bytes: 11400
73
+ total_bytes: 11400
74
+ }
75
+
76
+ 2021-07-14 22:28:52,163 INFO MainThread:592040 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
77
+ wandb_count: 5
78
+ }
79
+ pusher_stats {
80
+ uploaded_bytes: 11400
81
+ total_bytes: 11400
82
+ }
83
+
84
+ 2021-07-14 22:28:52,265 INFO MainThread:592040 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
85
+ wandb_count: 5
86
+ }
87
+ pusher_stats {
88
+ uploaded_bytes: 11400
89
+ total_bytes: 11400
90
+ }
91
+
92
+ 2021-07-14 22:28:52,367 INFO MainThread:592040 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
93
+ wandb_count: 5
94
+ }
95
+ pusher_stats {
96
+ uploaded_bytes: 11400
97
+ total_bytes: 11400
98
+ }
99
+
100
+ 2021-07-14 22:28:52,537 INFO MainThread:592040 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
101
+ wandb_count: 5
102
+ }
103
+ pusher_stats {
104
+ uploaded_bytes: 11400
105
+ total_bytes: 11400
106
+ }
107
+
108
+ 2021-07-14 22:28:52,639 INFO MainThread:592040 [wandb_run.py:_wait_for_finish():1715] got exit ret: done: true
109
+ exit_result {
110
+ }
111
+ file_counts {
112
+ wandb_count: 5
113
+ }
114
+ pusher_stats {
115
+ uploaded_bytes: 11400
116
+ total_bytes: 11400
117
+ }
118
+
119
+ 2021-07-14 22:28:53,943 INFO MainThread:592040 [wandb_run.py:_show_files():1937] logging synced files
wandb/run-20210714_221920-s091gfok/run-s091gfok.wandb ADDED
Binary file (11.5 kB). View file
wandb/run-20210714_222920-2p7mu4rm/files/config.yaml ADDED
@@ -0,0 +1,307 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ wandb_version: 1
2
+
3
+ _wandb:
4
+ desc: null
5
+ value:
6
+ cli_version: 0.10.33
7
+ framework: huggingface
8
+ huggingface_version: 4.9.0.dev0
9
+ is_jupyter_run: false
10
+ is_kaggle_kernel: false
11
+ python_version: 3.8.10
12
+ t:
13
+ 1:
14
+ - 3
15
+ - 11
16
+ 2:
17
+ - 3
18
+ - 11
19
+ 4: 3.8.10
20
+ 5: 0.10.33
21
+ 6: 4.9.0.dev0
22
+ 8:
23
+ - 5
24
+ adafactor:
25
+ desc: null
26
+ value: false
27
+ adam_beta1:
28
+ desc: null
29
+ value: 0.9
30
+ adam_beta2:
31
+ desc: null
32
+ value: 0.98
33
+ adam_epsilon:
34
+ desc: null
35
+ value: 1.0e-08
36
+ cache_dir:
37
+ desc: null
38
+ value: null
39
+ config_name:
40
+ desc: null
41
+ value: ./
42
+ dataloader_drop_last:
43
+ desc: null
44
+ value: false
45
+ dataloader_num_workers:
46
+ desc: null
47
+ value: 0
48
+ dataloader_pin_memory:
49
+ desc: null
50
+ value: true
51
+ dataset_config_name:
52
+ desc: null
53
+ value: null
54
+ dataset_name:
55
+ desc: null
56
+ value: null
57
+ ddp_find_unused_parameters:
58
+ desc: null
59
+ value: null
60
+ debug:
61
+ desc: null
62
+ value: []
63
+ deepspeed:
64
+ desc: null
65
+ value: null
66
+ disable_tqdm:
67
+ desc: null
68
+ value: false
69
+ do_eval:
70
+ desc: null
71
+ value: false
72
+ do_predict:
73
+ desc: null
74
+ value: false
75
+ do_train:
76
+ desc: null
77
+ value: false
78
+ dtype:
79
+ desc: null
80
+ value: float32
81
+ eval_accumulation_steps:
82
+ desc: null
83
+ value: null
84
+ eval_steps:
85
+ desc: null
86
+ value: 20000
87
+ evaluation_strategy:
88
+ desc: null
89
+ value: IntervalStrategy.NO
90
+ fp16:
91
+ desc: null
92
+ value: false
93
+ fp16_backend:
94
+ desc: null
95
+ value: auto
96
+ fp16_full_eval:
97
+ desc: null
98
+ value: false
99
+ fp16_opt_level:
100
+ desc: null
101
+ value: O1
102
+ gradient_accumulation_steps:
103
+ desc: null
104
+ value: 4
105
+ greater_is_better:
106
+ desc: null
107
+ value: null
108
+ group_by_length:
109
+ desc: null
110
+ value: false
111
+ ignore_data_skip:
112
+ desc: null
113
+ value: false
114
+ label_names:
115
+ desc: null
116
+ value: null
117
+ label_smoothing_factor:
118
+ desc: null
119
+ value: 0.0
120
+ learning_rate:
121
+ desc: null
122
+ value: 5.0e-05
123
+ length_column_name:
124
+ desc: null
125
+ value: length
126
+ line_by_line:
127
+ desc: null
128
+ value: false
129
+ load_best_model_at_end:
130
+ desc: null
131
+ value: false
132
+ local_rank:
133
+ desc: null
134
+ value: -1
135
+ log_level:
136
+ desc: null
137
+ value: -1
138
+ log_level_replica:
139
+ desc: null
140
+ value: -1
141
+ log_on_each_node:
142
+ desc: null
143
+ value: true
144
+ logging_dir:
145
+ desc: null
146
+ value: ./runs/Jul14_22-29-13_t1v-n-f5c06ea1-w-0
147
+ logging_first_step:
148
+ desc: null
149
+ value: false
150
+ logging_steps:
151
+ desc: null
152
+ value: 500
153
+ logging_strategy:
154
+ desc: null
155
+ value: IntervalStrategy.STEPS
156
+ lr_scheduler_type:
157
+ desc: null
158
+ value: SchedulerType.LINEAR
159
+ max_eval_samples:
160
+ desc: null
161
+ value: 2000
162
+ max_grad_norm:
163
+ desc: null
164
+ value: 1.0
165
+ max_seq_length:
166
+ desc: null
167
+ value: 4096
168
+ max_steps:
169
+ desc: null
170
+ value: -1
171
+ metric_for_best_model:
172
+ desc: null
173
+ value: null
174
+ mlm_probability:
175
+ desc: null
176
+ value: 0.15
177
+ model_name_or_path:
178
+ desc: null
179
+ value: null
180
+ model_type:
181
+ desc: null
182
+ value: big_bird
183
+ mp_parameters:
184
+ desc: null
185
+ value: ''
186
+ no_cuda:
187
+ desc: null
188
+ value: false
189
+ num_train_epochs:
190
+ desc: null
191
+ value: 5.0
192
+ output_dir:
193
+ desc: null
194
+ value: ./
195
+ overwrite_cache:
196
+ desc: null
197
+ value: false
198
+ overwrite_output_dir:
199
+ desc: null
200
+ value: true
201
+ pad_to_max_length:
202
+ desc: null
203
+ value: false
204
+ past_index:
205
+ desc: null
206
+ value: -1
207
+ per_device_eval_batch_size:
208
+ desc: null
209
+ value: 2
210
+ per_device_train_batch_size:
211
+ desc: null
212
+ value: 2
213
+ per_gpu_eval_batch_size:
214
+ desc: null
215
+ value: null
216
+ per_gpu_train_batch_size:
217
+ desc: null
218
+ value: null
219
+ prediction_loss_only:
220
+ desc: null
221
+ value: false
222
+ preprocessing_num_workers:
223
+ desc: null
224
+ value: 96
225
+ push_to_hub:
226
+ desc: null
227
+ value: true
228
+ push_to_hub_model_id:
229
+ desc: null
230
+ value: ''
231
+ push_to_hub_organization:
232
+ desc: null
233
+ value: null
234
+ push_to_hub_token:
235
+ desc: null
236
+ value: null
237
+ remove_unused_columns:
238
+ desc: null
239
+ value: true
240
+ report_to:
241
+ desc: null
242
+ value:
243
+ - tensorboard
244
+ - wandb
245
+ resume_from_checkpoint:
246
+ desc: null
247
+ value: null
248
+ run_name:
249
+ desc: null
250
+ value: ./
251
+ save_on_each_node:
252
+ desc: null
253
+ value: false
254
+ save_optimizer:
255
+ desc: null
256
+ value: true
257
+ save_steps:
258
+ desc: null
259
+ value: 20000
260
+ save_strategy:
261
+ desc: null
262
+ value: IntervalStrategy.STEPS
263
+ save_total_limit:
264
+ desc: null
265
+ value: 5
266
+ seed:
267
+ desc: null
268
+ value: 42
269
+ sharded_ddp:
270
+ desc: null
271
+ value: []
272
+ skip_memory_metrics:
273
+ desc: null
274
+ value: true
275
+ tokenizer_name:
276
+ desc: null
277
+ value: ./
278
+ tpu_metrics_debug:
279
+ desc: null
280
+ value: false
281
+ tpu_num_cores:
282
+ desc: null
283
+ value: null
284
+ train_ref_file:
285
+ desc: null
286
+ value: null
287
+ use_fast_tokenizer:
288
+ desc: null
289
+ value: true
290
+ use_legacy_prediction_loop:
291
+ desc: null
292
+ value: false
293
+ validation_ref_file:
294
+ desc: null
295
+ value: null
296
+ validation_split_percentage:
297
+ desc: null
298
+ value: 5
299
+ warmup_ratio:
300
+ desc: null
301
+ value: 0.0
302
+ warmup_steps:
303
+ desc: null
304
+ value: 5000
305
+ weight_decay:
306
+ desc: null
307
+ value: 0.0095
wandb/run-20210714_222920-2p7mu4rm/files/output.log ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /home/dat/pino/lib/python3.8/site-packages/jax/_src/numpy/lax_numpy.py:3132: UserWarning: Explicitly requested dtype <class 'jax._src.numpy.lax_numpy.int64'> requested in zeros is not available, and will be truncated to dtype int32. To enable more dtypes, set the jax_enable_x64 configuration option or the JAX_ENABLE_X64 shell environment variable. See https://github.com/google/jax#current-gotchas for more.
2
+ lax._check_user_dtype_supported(dtype, "zeros")
3
+ /home/dat/pino/lib/python3.8/site-packages/jax/lib/xla_bridge.py:386: UserWarning: jax.host_count has been renamed to jax.process_count. This alias will eventually be removed; please update your code.
4
+ warnings.warn(
5
+ /home/dat/pino/lib/python3.8/site-packages/jax/lib/xla_bridge.py:373: UserWarning: jax.host_id has been renamed to jax.process_index. This alias will eventually be removed; please update your code.
6
+ warnings.warn(
7
+ Epoch ... (1/5): 0%| | 0/5 [00:00<?, ?it/s][22:29:35] - INFO - __main__ - Skipping to epoch 0 step 0
8
+ Training...: 0%| | 0/503952 [01:24<?, ?it/s]
9
+ Epoch ... (1/5): 0%| | 0/5 [09:07<?, ?it/s]
10
+ Traceback (most recent call last):
11
+ File "./run_mlm_flax.py", line 804, in <module>
12
+ state, train_metric, dropout_rngs = p_train_step(state, model_inputs, dropout_rngs)
13
+ File "/home/dat/pino/lib/python3.8/site-packages/jax/_src/traceback_util.py", line 183, in reraise_with_filtered_traceback
14
+ return fun(*args, **kwargs)
15
+ File "/home/dat/pino/lib/python3.8/site-packages/jax/_src/api.py", line 1669, in f_pmapped
16
+ out = pxla.xla_pmap(
17
+ File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 1620, in bind
18
+ return call_bind(self, fun, *args, **params)
19
+ File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 1551, in call_bind
20
+ outs = primitive.process(top_trace, fun, tracers, params)
21
+ File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 1623, in process
22
+ return trace.process_map(self, fun, tracers, params)
23
+ File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 606, in process_call
24
+ return primitive.impl(f, *tracers, **params)
25
+ File "/home/dat/pino/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 637, in xla_pmap_impl
26
+ return compiled_fun(*args)
27
+ File "/home/dat/pino/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 1159, in execute_replicated
28
+ out_bufs = compiled.execute_sharded_on_local_devices(input_bufs)
29
+ jax._src.traceback_util.UnfilteredStackTrace: RuntimeError: Resource exhausted: Attempting to reserve 12.60G at the bottom of memory. That was not possible. There are 5.86G free, 0B reserved, and 5.65G reservable.: while running replica 0 and partition 0 of a replicated computation (other replicas may have failed as well).
30
+ The stack trace below excludes JAX-internal frames.
31
+ The preceding is the original exception that occurred, unmodified.
32
+ --------------------
33
+ The above exception was the direct cause of the following exception:
34
+ Traceback (most recent call last):
35
+ File "./run_mlm_flax.py", line 804, in <module>
36
+ state, train_metric, dropout_rngs = p_train_step(state, model_inputs, dropout_rngs)
37
+ File "/home/dat/pino/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 1159, in execute_replicated
38
+ out_bufs = compiled.execute_sharded_on_local_devices(input_bufs)
39
+ RuntimeError: Resource exhausted: Attempting to reserve 12.60G at the bottom of memory. That was not possible. There are 5.86G free, 0B reserved, and 5.65G reservable.: while running replica 0 and partition 0 of a replicated computation (other replicas may have failed as well).
wandb/run-20210714_222920-2p7mu4rm/files/requirements.txt ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ absl-py==0.13.0
2
+ aiohttp==3.7.4.post0
3
+ astunparse==1.6.3
4
+ async-timeout==3.0.1
5
+ attrs==21.2.0
6
+ cachetools==4.2.2
7
+ certifi==2021.5.30
8
+ chardet==4.0.0
9
+ charset-normalizer==2.0.1
10
+ chex==0.0.8
11
+ click==8.0.1
12
+ configparser==5.0.2
13
+ cycler==0.10.0
14
+ datasets==1.9.1.dev0
15
+ dill==0.3.4
16
+ dm-tree==0.1.6
17
+ docker-pycreds==0.4.0
18
+ filelock==3.0.12
19
+ flatbuffers==1.12
20
+ flax==0.3.4
21
+ fsspec==2021.7.0
22
+ gast==0.4.0
23
+ gitdb==4.0.7
24
+ gitpython==3.1.18
25
+ google-auth-oauthlib==0.4.4
26
+ google-auth==1.32.1
27
+ google-pasta==0.2.0
28
+ grpcio==1.34.1
29
+ h5py==3.1.0
30
+ huggingface-hub==0.0.12
31
+ idna==3.2
32
+ install==1.3.4
33
+ jax==0.2.17
34
+ jaxlib==0.1.68
35
+ joblib==1.0.1
36
+ keras-nightly==2.5.0.dev2021032900
37
+ keras-preprocessing==1.1.2
38
+ kiwisolver==1.3.1
39
+ libtpu-nightly==0.1.dev20210615
40
+ markdown==3.3.4
41
+ matplotlib==3.4.2
42
+ msgpack==1.0.2
43
+ multidict==5.1.0
44
+ multiprocess==0.70.12.2
45
+ numpy==1.19.5
46
+ oauthlib==3.1.1
47
+ opt-einsum==3.3.0
48
+ optax==0.0.9
49
+ packaging==21.0
50
+ pandas==1.3.0
51
+ pathtools==0.1.2
52
+ pillow==8.3.1
53
+ pip==20.0.2
54
+ pkg-resources==0.0.0
55
+ promise==2.3
56
+ protobuf==3.17.3
57
+ psutil==5.8.0
58
+ pyarrow==4.0.1
59
+ pyasn1-modules==0.2.8
60
+ pyasn1==0.4.8
61
+ pyparsing==2.4.7
62
+ python-dateutil==2.8.1
63
+ pytz==2021.1
64
+ pyyaml==5.4.1
65
+ regex==2021.7.6
66
+ requests-oauthlib==1.3.0
67
+ requests==2.26.0
68
+ rsa==4.7.2
69
+ sacremoses==0.0.45
70
+ scipy==1.7.0
71
+ sentry-sdk==1.3.0
72
+ setuptools==44.0.0
73
+ shortuuid==1.0.1
74
+ six==1.15.0
75
+ smmap==4.0.0
76
+ subprocess32==3.5.4
77
+ tensorboard-data-server==0.6.1
78
+ tensorboard-plugin-wit==1.8.0
79
+ tensorboard==2.5.0
80
+ tensorflow-estimator==2.5.0
81
+ tensorflow==2.5.0
82
+ termcolor==1.1.0
83
+ tokenizers==0.10.3
84
+ toolz==0.11.1
85
+ tqdm==4.61.2
86
+ transformers==4.9.0.dev0
87
+ typing-extensions==3.7.4.3
88
+ urllib3==1.26.6
89
+ wandb==0.10.33
90
+ werkzeug==2.0.1
91
+ wheel==0.36.2
92
+ wrapt==1.12.1
93
+ xxhash==2.0.2
94
+ yarl==1.6.3
wandb/run-20210714_222920-2p7mu4rm/files/wandb-metadata.json ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-5.4.0-1043-gcp-x86_64-with-glibc2.29",
3
+ "python": "3.8.10",
4
+ "heartbeatAt": "2021-07-14T22:29:22.512026",
5
+ "startedAt": "2021-07-14T22:29:20.509023",
6
+ "docker": null,
7
+ "cpu_count": 96,
8
+ "cuda": null,
9
+ "args": [
10
+ "--push_to_hub",
11
+ "--output_dir=./",
12
+ "--model_type=big_bird",
13
+ "--config_name=./",
14
+ "--tokenizer_name=./",
15
+ "--max_seq_length=4096",
16
+ "--weight_decay=0.0095",
17
+ "--warmup_steps=5000",
18
+ "--overwrite_output_dir",
19
+ "--adam_beta1=0.9",
20
+ "--adam_beta2=0.98",
21
+ "--logging_steps=500",
22
+ "--eval_steps=20000",
23
+ "--num_train_epochs=5",
24
+ "--preprocessing_num_workers=96",
25
+ "--save_steps=20000",
26
+ "--learning_rate=5e-5",
27
+ "--per_device_train_batch_size=2",
28
+ "--per_device_eval_batch_size=2",
29
+ "--save_total_limit=5",
30
+ "--max_eval_samples=2000",
31
+ "--overwrite_cache",
32
+ "False",
33
+ "--gradient_accumulation_steps=4"
34
+ ],
35
+ "state": "running",
36
+ "program": "./run_mlm_flax.py",
37
+ "codePath": "run_mlm_flax.py",
38
+ "git": {
39
+ "remote": "https://huggingface.co/flax-community/pino-roberta-base",
40
+ "commit": "de71755314d4aa3ab182072c6f0be3de9798a66b"
41
+ },
42
+ "email": null,
43
+ "root": "/home/dat/pino-roberta-base",
44
+ "host": "t1v-n-f5c06ea1-w-0",
45
+ "username": "dat",
46
+ "executable": "/home/dat/pino/bin/python"
47
+ }
wandb/run-20210714_222920-2p7mu4rm/files/wandb-summary.json ADDED
@@ -0,0 +1 @@
 
1
+ {}
wandb/run-20210714_222920-2p7mu4rm/logs/debug-internal.log ADDED
@@ -0,0 +1,232 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2021-07-14 22:29:21,187 INFO MainThread:596546 [internal.py:wandb_internal():88] W&B internal server running at pid: 596546, started at: 2021-07-14 22:29:21.187444
2
+ 2021-07-14 22:29:21,189 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: check_version
3
+ 2021-07-14 22:29:21,189 INFO WriterThread:596546 [datastore.py:open_for_write():80] open: /home/dat/pino-roberta-base/wandb/run-20210714_222920-2p7mu4rm/run-2p7mu4rm.wandb
4
+ 2021-07-14 22:29:21,191 DEBUG SenderThread:596546 [sender.py:send():179] send: header
5
+ 2021-07-14 22:29:21,191 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: check_version
6
+ 2021-07-14 22:29:21,226 DEBUG SenderThread:596546 [sender.py:send():179] send: run
7
+ 2021-07-14 22:29:21,391 INFO SenderThread:596546 [dir_watcher.py:__init__():168] watching files in: /home/dat/pino-roberta-base/wandb/run-20210714_222920-2p7mu4rm/files
8
+ 2021-07-14 22:29:21,391 INFO SenderThread:596546 [sender.py:_start_run_threads():716] run started: 2p7mu4rm with start time 1626301760
9
+ 2021-07-14 22:29:21,391 DEBUG SenderThread:596546 [sender.py:send():179] send: summary
10
+ 2021-07-14 22:29:21,391 INFO SenderThread:596546 [sender.py:_save_file():841] saving file wandb-summary.json with policy end
11
+ 2021-07-14 22:29:21,392 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: run_start
12
+ 2021-07-14 22:29:22,397 INFO Thread-8 :596546 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210714_222920-2p7mu4rm/files/wandb-summary.json
13
+ 2021-07-14 22:29:22,511 DEBUG HandlerThread:596546 [meta.py:__init__():39] meta init
14
+ 2021-07-14 22:29:22,511 DEBUG HandlerThread:596546 [meta.py:__init__():53] meta init done
15
+ 2021-07-14 22:29:22,511 DEBUG HandlerThread:596546 [meta.py:probe():210] probe
16
+ 2021-07-14 22:29:22,513 DEBUG HandlerThread:596546 [meta.py:_setup_git():200] setup git
17
+ 2021-07-14 22:29:22,541 DEBUG HandlerThread:596546 [meta.py:_setup_git():207] setup git done
18
+ 2021-07-14 22:29:22,541 DEBUG HandlerThread:596546 [meta.py:_save_pip():57] save pip
19
+ 2021-07-14 22:29:22,542 DEBUG HandlerThread:596546 [meta.py:_save_pip():71] save pip done
20
+ 2021-07-14 22:29:22,542 DEBUG HandlerThread:596546 [meta.py:probe():252] probe done
21
+ 2021-07-14 22:29:22,545 DEBUG SenderThread:596546 [sender.py:send():179] send: files
22
+ 2021-07-14 22:29:22,545 INFO SenderThread:596546 [sender.py:_save_file():841] saving file wandb-metadata.json with policy now
23
+ 2021-07-14 22:29:22,551 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: stop_status
24
+ 2021-07-14 22:29:22,551 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: stop_status
25
+ 2021-07-14 22:29:22,678 DEBUG SenderThread:596546 [sender.py:send():179] send: config
26
+ 2021-07-14 22:29:22,678 DEBUG SenderThread:596546 [sender.py:send():179] send: config
27
+ 2021-07-14 22:29:22,679 DEBUG SenderThread:596546 [sender.py:send():179] send: config
28
+ 2021-07-14 22:29:22,981 INFO Thread-11 :596546 [upload_job.py:push():137] Uploaded file /tmp/tmpkw6g32phwandb/2nns5d67-wandb-metadata.json
29
+ 2021-07-14 22:29:23,396 INFO Thread-8 :596546 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210714_222920-2p7mu4rm/files/requirements.txt
30
+ 2021-07-14 22:29:23,396 INFO Thread-8 :596546 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210714_222920-2p7mu4rm/files/wandb-metadata.json
31
+ 2021-07-14 22:29:23,397 INFO Thread-8 :596546 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210714_222920-2p7mu4rm/files/output.log
32
+ 2021-07-14 22:29:37,401 INFO Thread-8 :596546 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_222920-2p7mu4rm/files/output.log
33
+ 2021-07-14 22:29:37,681 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: stop_status
34
+ 2021-07-14 22:29:37,681 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: stop_status
35
+ 2021-07-14 22:29:50,595 DEBUG SenderThread:596546 [sender.py:send():179] send: stats
36
+ 2021-07-14 22:29:52,407 INFO Thread-8 :596546 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_222920-2p7mu4rm/files/config.yaml
37
+ 2021-07-14 22:29:52,815 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: stop_status
38
+ 2021-07-14 22:29:52,815 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: stop_status
39
+ 2021-07-14 22:30:07,946 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: stop_status
40
+ 2021-07-14 22:30:07,947 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: stop_status
41
+ 2021-07-14 22:30:20,679 DEBUG SenderThread:596546 [sender.py:send():179] send: stats
42
+ 2021-07-14 22:30:23,081 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: stop_status
43
+ 2021-07-14 22:30:23,081 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: stop_status
44
+ 2021-07-14 22:30:38,211 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: stop_status
45
+ 2021-07-14 22:30:38,212 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: stop_status
46
+ 2021-07-14 22:30:50,744 DEBUG SenderThread:596546 [sender.py:send():179] send: stats
47
+ 2021-07-14 22:30:53,343 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: stop_status
48
+ 2021-07-14 22:30:53,344 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: stop_status
49
+ 2021-07-14 22:31:08,475 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: stop_status
50
+ 2021-07-14 22:31:08,476 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: stop_status
51
+ 2021-07-14 22:31:20,817 DEBUG SenderThread:596546 [sender.py:send():179] send: stats
52
+ 2021-07-14 22:31:23,611 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: stop_status
53
+ 2021-07-14 22:31:23,611 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: stop_status
54
+ 2021-07-14 22:31:38,742 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: stop_status
55
+ 2021-07-14 22:31:38,742 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: stop_status
56
+ 2021-07-14 22:31:50,892 DEBUG SenderThread:596546 [sender.py:send():179] send: stats
57
+ 2021-07-14 22:31:53,876 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: stop_status
58
+ 2021-07-14 22:31:53,876 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: stop_status
59
+ 2021-07-14 22:32:09,009 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: stop_status
60
+ 2021-07-14 22:32:09,010 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: stop_status
61
+ 2021-07-14 22:32:20,968 DEBUG SenderThread:596546 [sender.py:send():179] send: stats
62
+ 2021-07-14 22:32:24,154 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: stop_status
63
+ 2021-07-14 22:32:24,154 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: stop_status
64
+ 2021-07-14 22:32:39,289 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: stop_status
65
+ 2021-07-14 22:32:39,289 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: stop_status
66
+ 2021-07-14 22:32:51,042 DEBUG SenderThread:596546 [sender.py:send():179] send: stats
67
+ 2021-07-14 22:32:54,420 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: stop_status
68
+ 2021-07-14 22:32:54,420 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: stop_status
69
+ 2021-07-14 22:33:09,552 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: stop_status
70
+ 2021-07-14 22:33:09,552 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: stop_status
71
+ 2021-07-14 22:33:21,119 DEBUG SenderThread:596546 [sender.py:send():179] send: stats
72
+ 2021-07-14 22:33:24,688 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: stop_status
73
+ 2021-07-14 22:33:24,689 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: stop_status
74
+ 2021-07-14 22:33:39,824 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: stop_status
75
+ 2021-07-14 22:33:39,825 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: stop_status
76
+ 2021-07-14 22:33:51,197 DEBUG SenderThread:596546 [sender.py:send():179] send: stats
77
+ 2021-07-14 22:33:54,955 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: stop_status
78
+ 2021-07-14 22:33:54,955 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: stop_status
79
+ 2021-07-14 22:34:10,085 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: stop_status
80
+ 2021-07-14 22:34:10,086 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: stop_status
81
+ 2021-07-14 22:34:21,275 DEBUG SenderThread:596546 [sender.py:send():179] send: stats
82
+ 2021-07-14 22:34:25,221 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: stop_status
83
+ 2021-07-14 22:34:25,221 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: stop_status
84
+ 2021-07-14 22:34:40,360 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: stop_status
85
+ 2021-07-14 22:34:40,360 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: stop_status
86
+ 2021-07-14 22:34:51,349 DEBUG SenderThread:596546 [sender.py:send():179] send: stats
87
+ 2021-07-14 22:34:55,491 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: stop_status
88
+ 2021-07-14 22:34:55,491 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: stop_status
89
+ 2021-07-14 22:35:10,620 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: stop_status
90
+ 2021-07-14 22:35:10,621 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: stop_status
91
+ 2021-07-14 22:35:21,421 DEBUG SenderThread:596546 [sender.py:send():179] send: stats
92
+ 2021-07-14 22:35:25,755 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: stop_status
93
+ 2021-07-14 22:35:25,755 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: stop_status
94
+ 2021-07-14 22:35:40,915 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: stop_status
95
+ 2021-07-14 22:35:40,916 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: stop_status
96
+ 2021-07-14 22:35:51,496 DEBUG SenderThread:596546 [sender.py:send():179] send: stats
97
+ 2021-07-14 22:35:56,049 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: stop_status
98
+ 2021-07-14 22:35:56,049 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: stop_status
99
+ 2021-07-14 22:36:11,183 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: stop_status
100
+ 2021-07-14 22:36:11,184 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: stop_status
101
+ 2021-07-14 22:36:21,575 DEBUG SenderThread:596546 [sender.py:send():179] send: stats
102
+ 2021-07-14 22:36:26,315 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: stop_status
103
+ 2021-07-14 22:36:26,316 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: stop_status
104
+ 2021-07-14 22:36:41,448 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: stop_status
105
+ 2021-07-14 22:36:41,448 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: stop_status
106
+ 2021-07-14 22:36:51,652 DEBUG SenderThread:596546 [sender.py:send():179] send: stats
107
+ 2021-07-14 22:36:56,580 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: stop_status
108
+ 2021-07-14 22:36:56,581 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: stop_status
109
+ 2021-07-14 22:37:11,712 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: stop_status
110
+ 2021-07-14 22:37:11,712 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: stop_status
111
+ 2021-07-14 22:37:21,566 INFO Thread-8 :596546 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_222920-2p7mu4rm/files/output.log
112
+ 2021-07-14 22:37:21,727 DEBUG SenderThread:596546 [sender.py:send():179] send: stats
113
+ 2021-07-14 22:37:27,049 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: stop_status
114
+ 2021-07-14 22:37:27,050 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: stop_status
115
+ 2021-07-14 22:37:42,194 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: stop_status
116
+ 2021-07-14 22:37:42,194 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: stop_status
117
+ 2021-07-14 22:37:51,805 DEBUG SenderThread:596546 [sender.py:send():179] send: stats
118
+ 2021-07-14 22:37:57,327 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: stop_status
119
+ 2021-07-14 22:37:57,327 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: stop_status
120
+ 2021-07-14 22:38:12,463 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: stop_status
121
+ 2021-07-14 22:38:12,464 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: stop_status
122
+ 2021-07-14 22:38:21,882 DEBUG SenderThread:596546 [sender.py:send():179] send: stats
123
+ 2021-07-14 22:38:27,596 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: stop_status
124
+ 2021-07-14 22:38:27,596 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: stop_status
125
+ 2021-07-14 22:38:42,728 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: stop_status
126
+ 2021-07-14 22:38:42,728 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: stop_status
127
+ 2021-07-14 22:38:45,598 INFO Thread-8 :596546 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_222920-2p7mu4rm/files/output.log
128
+ 2021-07-14 22:38:45,823 DEBUG SenderThread:596546 [sender.py:send():179] send: telemetry
129
+ 2021-07-14 22:38:45,823 DEBUG SenderThread:596546 [sender.py:send():179] send: exit
130
+ 2021-07-14 22:38:45,823 INFO SenderThread:596546 [sender.py:send_exit():287] handling exit code: 1
131
+ 2021-07-14 22:38:45,824 INFO SenderThread:596546 [sender.py:send_exit():295] send defer
132
+ 2021-07-14 22:38:45,824 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: poll_exit
133
+ 2021-07-14 22:38:45,825 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: poll_exit
134
+ 2021-07-14 22:38:45,825 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: defer
135
+ 2021-07-14 22:38:45,825 INFO HandlerThread:596546 [handler.py:handle_request_defer():141] handle defer: 0
136
+ 2021-07-14 22:38:45,825 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: defer
137
+ 2021-07-14 22:38:45,825 INFO SenderThread:596546 [sender.py:send_request_defer():304] handle sender defer: 0
138
+ 2021-07-14 22:38:45,825 INFO SenderThread:596546 [sender.py:transition_state():308] send defer: 1
139
+ 2021-07-14 22:38:45,826 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: defer
140
+ 2021-07-14 22:38:45,826 INFO HandlerThread:596546 [handler.py:handle_request_defer():141] handle defer: 1
141
+ 2021-07-14 22:38:45,857 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: defer
142
+ 2021-07-14 22:38:45,857 INFO SenderThread:596546 [sender.py:send_request_defer():304] handle sender defer: 1
143
+ 2021-07-14 22:38:45,857 INFO SenderThread:596546 [sender.py:transition_state():308] send defer: 2
144
+ 2021-07-14 22:38:45,857 DEBUG SenderThread:596546 [sender.py:send():179] send: stats
145
+ 2021-07-14 22:38:45,857 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: defer
146
+ 2021-07-14 22:38:45,858 INFO HandlerThread:596546 [handler.py:handle_request_defer():141] handle defer: 2
147
+ 2021-07-14 22:38:45,858 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: defer
148
+ 2021-07-14 22:38:45,858 INFO SenderThread:596546 [sender.py:send_request_defer():304] handle sender defer: 2
149
+ 2021-07-14 22:38:45,858 INFO SenderThread:596546 [sender.py:transition_state():308] send defer: 3
150
+ 2021-07-14 22:38:45,859 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: defer
151
+ 2021-07-14 22:38:45,859 INFO HandlerThread:596546 [handler.py:handle_request_defer():141] handle defer: 3
152
+ 2021-07-14 22:38:45,859 DEBUG SenderThread:596546 [sender.py:send():179] send: summary
153
+ 2021-07-14 22:38:45,859 INFO SenderThread:596546 [sender.py:_save_file():841] saving file wandb-summary.json with policy end
154
+ 2021-07-14 22:38:45,859 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: defer
155
+ 2021-07-14 22:38:45,859 INFO SenderThread:596546 [sender.py:send_request_defer():304] handle sender defer: 3
156
+ 2021-07-14 22:38:45,859 INFO SenderThread:596546 [sender.py:transition_state():308] send defer: 4
157
+ 2021-07-14 22:38:45,860 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: defer
158
+ 2021-07-14 22:38:45,860 INFO HandlerThread:596546 [handler.py:handle_request_defer():141] handle defer: 4
159
+ 2021-07-14 22:38:45,860 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: defer
160
+ 2021-07-14 22:38:45,860 INFO SenderThread:596546 [sender.py:send_request_defer():304] handle sender defer: 4
161
+ 2021-07-14 22:38:45,927 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: poll_exit
162
+ 2021-07-14 22:38:46,024 INFO SenderThread:596546 [sender.py:transition_state():308] send defer: 5
163
+ 2021-07-14 22:38:46,024 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: poll_exit
164
+ 2021-07-14 22:38:46,024 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: defer
165
+ 2021-07-14 22:38:46,024 INFO HandlerThread:596546 [handler.py:handle_request_defer():141] handle defer: 5
166
+ 2021-07-14 22:38:46,025 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: defer
167
+ 2021-07-14 22:38:46,025 INFO SenderThread:596546 [sender.py:send_request_defer():304] handle sender defer: 5
168
+ 2021-07-14 22:38:46,025 INFO SenderThread:596546 [dir_watcher.py:finish():282] shutting down directory watcher
169
+ 2021-07-14 22:38:46,126 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: poll_exit
170
+ 2021-07-14 22:38:46,598 INFO Thread-8 :596546 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_222920-2p7mu4rm/files/wandb-summary.json
171
+ 2021-07-14 22:38:46,599 INFO SenderThread:596546 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_222920-2p7mu4rm/files/config.yaml
172
+ 2021-07-14 22:38:46,599 INFO SenderThread:596546 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_222920-2p7mu4rm/files/output.log
173
+ 2021-07-14 22:38:46,599 INFO SenderThread:596546 [dir_watcher.py:finish():312] scan: /home/dat/pino-roberta-base/wandb/run-20210714_222920-2p7mu4rm/files
174
+ 2021-07-14 22:38:46,599 INFO SenderThread:596546 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210714_222920-2p7mu4rm/files/requirements.txt requirements.txt
175
+ 2021-07-14 22:38:46,600 INFO SenderThread:596546 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210714_222920-2p7mu4rm/files/output.log output.log
176
+ 2021-07-14 22:38:46,600 INFO SenderThread:596546 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210714_222920-2p7mu4rm/files/wandb-metadata.json wandb-metadata.json
177
+ 2021-07-14 22:38:46,600 INFO SenderThread:596546 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210714_222920-2p7mu4rm/files/config.yaml config.yaml
178
+ 2021-07-14 22:38:46,600 INFO SenderThread:596546 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210714_222920-2p7mu4rm/files/wandb-summary.json wandb-summary.json
179
+ 2021-07-14 22:38:46,603 INFO SenderThread:596546 [sender.py:transition_state():308] send defer: 6
180
+ 2021-07-14 22:38:46,604 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: poll_exit
181
+ 2021-07-14 22:38:46,607 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: defer
182
+ 2021-07-14 22:38:46,607 INFO HandlerThread:596546 [handler.py:handle_request_defer():141] handle defer: 6
183
+ 2021-07-14 22:38:46,608 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: defer
184
+ 2021-07-14 22:38:46,610 INFO SenderThread:596546 [sender.py:send_request_defer():304] handle sender defer: 6
185
+ 2021-07-14 22:38:46,610 INFO SenderThread:596546 [file_pusher.py:finish():177] shutting down file pusher
186
+ 2021-07-14 22:38:46,708 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: poll_exit
187
+ 2021-07-14 22:38:46,709 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: poll_exit
188
+ 2021-07-14 22:38:46,811 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: poll_exit
189
+ 2021-07-14 22:38:46,811 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: poll_exit
190
+ 2021-07-14 22:38:46,913 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: poll_exit
191
+ 2021-07-14 22:38:46,913 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: poll_exit
192
+ 2021-07-14 22:38:47,015 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: poll_exit
193
+ 2021-07-14 22:38:47,015 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: poll_exit
194
+ 2021-07-14 22:38:47,054 INFO Thread-14 :596546 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210714_222920-2p7mu4rm/files/config.yaml
195
+ 2021-07-14 22:38:47,063 INFO Thread-12 :596546 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210714_222920-2p7mu4rm/files/requirements.txt
196
+ 2021-07-14 22:38:47,074 INFO Thread-13 :596546 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210714_222920-2p7mu4rm/files/output.log
197
+ 2021-07-14 22:38:47,095 INFO Thread-15 :596546 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210714_222920-2p7mu4rm/files/wandb-summary.json
198
+ 2021-07-14 22:38:47,117 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: poll_exit
199
+ 2021-07-14 22:38:47,117 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: poll_exit
200
+ 2021-07-14 22:38:47,219 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: poll_exit
201
+ 2021-07-14 22:38:47,219 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: poll_exit
202
+ 2021-07-14 22:38:47,295 INFO Thread-7 :596546 [sender.py:transition_state():308] send defer: 7
203
+ 2021-07-14 22:38:47,295 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: defer
204
+ 2021-07-14 22:38:47,296 INFO HandlerThread:596546 [handler.py:handle_request_defer():141] handle defer: 7
205
+ 2021-07-14 22:38:47,296 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: defer
206
+ 2021-07-14 22:38:47,296 INFO SenderThread:596546 [sender.py:send_request_defer():304] handle sender defer: 7
207
+ 2021-07-14 22:38:47,321 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: poll_exit
208
+ 2021-07-14 22:38:47,939 INFO SenderThread:596546 [sender.py:transition_state():308] send defer: 8
209
+ 2021-07-14 22:38:47,940 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: poll_exit
210
+ 2021-07-14 22:38:47,940 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: defer
211
+ 2021-07-14 22:38:47,940 INFO HandlerThread:596546 [handler.py:handle_request_defer():141] handle defer: 8
212
+ 2021-07-14 22:38:47,940 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: defer
213
+ 2021-07-14 22:38:47,941 INFO SenderThread:596546 [sender.py:send_request_defer():304] handle sender defer: 8
214
+ 2021-07-14 22:38:47,941 INFO SenderThread:596546 [sender.py:transition_state():308] send defer: 9
215
+ 2021-07-14 22:38:47,941 DEBUG SenderThread:596546 [sender.py:send():179] send: final
216
+ 2021-07-14 22:38:47,941 DEBUG SenderThread:596546 [sender.py:send():179] send: footer
217
+ 2021-07-14 22:38:47,942 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: defer
218
+ 2021-07-14 22:38:47,942 INFO HandlerThread:596546 [handler.py:handle_request_defer():141] handle defer: 9
219
+ 2021-07-14 22:38:47,942 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: defer
220
+ 2021-07-14 22:38:47,942 INFO SenderThread:596546 [sender.py:send_request_defer():304] handle sender defer: 9
221
+ 2021-07-14 22:38:48,042 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: poll_exit
222
+ 2021-07-14 22:38:48,042 DEBUG SenderThread:596546 [sender.py:send_request():193] send_request: poll_exit
223
+ 2021-07-14 22:38:48,042 INFO SenderThread:596546 [file_pusher.py:join():182] waiting for file pusher
224
+ 2021-07-14 22:38:48,044 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: get_summary
225
+ 2021-07-14 22:38:48,044 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: sampled_history
226
+ 2021-07-14 22:38:48,045 DEBUG HandlerThread:596546 [handler.py:handle_request():124] handle_request: shutdown
227
+ 2021-07-14 22:38:48,045 INFO HandlerThread:596546 [handler.py:finish():638] shutting down handler
228
+ 2021-07-14 22:38:48,942 INFO WriterThread:596546 [datastore.py:close():288] close: /home/dat/pino-roberta-base/wandb/run-20210714_222920-2p7mu4rm/run-2p7mu4rm.wandb
229
+ 2021-07-14 22:38:49,042 INFO SenderThread:596546 [sender.py:finish():945] shutting down sender
230
+ 2021-07-14 22:38:49,043 INFO SenderThread:596546 [file_pusher.py:finish():177] shutting down file pusher
231
+ 2021-07-14 22:38:49,043 INFO SenderThread:596546 [file_pusher.py:join():182] waiting for file pusher
232
+ 2021-07-14 22:38:49,045 INFO MainThread:596546 [internal.py:handle_exit():78] Internal process exited
wandb/run-20210714_222920-2p7mu4rm/logs/debug.log ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2021-07-14 22:29:20,510 INFO MainThread:595290 [wandb_setup.py:_flush():69] setting env: {}
2
+ 2021-07-14 22:29:20,510 INFO MainThread:595290 [wandb_setup.py:_flush():69] setting login settings: {}
3
+ 2021-07-14 22:29:20,510 INFO MainThread:595290 [wandb_init.py:_log_setup():337] Logging user logs to /home/dat/pino-roberta-base/wandb/run-20210714_222920-2p7mu4rm/logs/debug.log
4
+ 2021-07-14 22:29:20,510 INFO MainThread:595290 [wandb_init.py:_log_setup():338] Logging internal logs to /home/dat/pino-roberta-base/wandb/run-20210714_222920-2p7mu4rm/logs/debug-internal.log
5
+ 2021-07-14 22:29:20,511 INFO MainThread:595290 [wandb_init.py:init():370] calling init triggers
6
+ 2021-07-14 22:29:20,511 INFO MainThread:595290 [wandb_init.py:init():375] wandb.init called with sweep_config: {}
7
+ config: {}
8
+ 2021-07-14 22:29:20,511 INFO MainThread:595290 [wandb_init.py:init():419] starting backend
9
+ 2021-07-14 22:29:20,511 INFO MainThread:595290 [backend.py:_multiprocessing_setup():70] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
10
+ 2021-07-14 22:29:20,555 INFO MainThread:595290 [backend.py:ensure_launched():135] starting backend process...
11
+ 2021-07-14 22:29:20,597 INFO MainThread:595290 [backend.py:ensure_launched():139] started backend process with pid: 596546
12
+ 2021-07-14 22:29:20,599 INFO MainThread:595290 [wandb_init.py:init():424] backend started and connected
13
+ 2021-07-14 22:29:20,602 INFO MainThread:595290 [wandb_init.py:init():472] updated telemetry
14
+ 2021-07-14 22:29:20,603 INFO MainThread:595290 [wandb_init.py:init():491] communicating current version
15
+ 2021-07-14 22:29:21,225 INFO MainThread:595290 [wandb_init.py:init():496] got version response
16
+ 2021-07-14 22:29:21,226 INFO MainThread:595290 [wandb_init.py:init():504] communicating run to backend with 30 second timeout
17
+ 2021-07-14 22:29:21,391 INFO MainThread:595290 [wandb_init.py:init():529] starting run threads in backend
18
+ 2021-07-14 22:29:22,548 INFO MainThread:595290 [wandb_run.py:_console_start():1623] atexit reg
19
+ 2021-07-14 22:29:22,548 INFO MainThread:595290 [wandb_run.py:_redirect():1497] redirect: SettingsConsole.REDIRECT
20
+ 2021-07-14 22:29:22,549 INFO MainThread:595290 [wandb_run.py:_redirect():1502] Redirecting console.
21
+ 2021-07-14 22:29:22,551 INFO MainThread:595290 [wandb_run.py:_redirect():1558] Redirects installed.
22
+ 2021-07-14 22:29:22,551 INFO MainThread:595290 [wandb_init.py:init():554] run started, returning control to user process
23
+ 2021-07-14 22:29:22,559 INFO MainThread:595290 [wandb_run.py:_config_callback():872] config_cb None None {'output_dir': './', 'overwrite_output_dir': True, 'do_train': False, 'do_eval': False, 'do_predict': False, 'evaluation_strategy': 'IntervalStrategy.NO', 'prediction_loss_only': False, 'per_device_train_batch_size': 2, 'per_device_eval_batch_size': 2, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 4, 'eval_accumulation_steps': None, 'learning_rate': 5e-05, 'weight_decay': 0.0095, 'adam_beta1': 0.9, 'adam_beta2': 0.98, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 5.0, 'max_steps': -1, 'lr_scheduler_type': 'SchedulerType.LINEAR', 'warmup_ratio': 0.0, 'warmup_steps': 5000, 'log_level': -1, 'log_level_replica': -1, 'log_on_each_node': True, 'logging_dir': './runs/Jul14_22-29-13_t1v-n-f5c06ea1-w-0', 'logging_strategy': 'IntervalStrategy.STEPS', 'logging_first_step': False, 'logging_steps': 500, 'save_strategy': 'IntervalStrategy.STEPS', 'save_steps': 20000, 'save_total_limit': 5, 'save_on_each_node': False, 'no_cuda': False, 'seed': 42, 'fp16': False, 'fp16_opt_level': 'O1', 'fp16_backend': 'auto', 'fp16_full_eval': False, 'local_rank': -1, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 20000, 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': './', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'sharded_ddp': [], 'deepspeed': None, 'label_smoothing_factor': 0.0, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'dataloader_pin_memory': True, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'push_to_hub_model_id': '', 'push_to_hub_organization': None, 'push_to_hub_token': None, 'mp_parameters': ''}
24
+ 2021-07-14 22:29:22,561 INFO MainThread:595290 [wandb_run.py:_config_callback():872] config_cb None None {'model_name_or_path': None, 'model_type': 'big_bird', 'config_name': './', 'tokenizer_name': './', 'cache_dir': None, 'use_fast_tokenizer': True, 'dtype': 'float32', 'save_optimizer': True}
25
+ 2021-07-14 22:29:22,562 INFO MainThread:595290 [wandb_run.py:_config_callback():872] config_cb None None {'dataset_name': None, 'dataset_config_name': None, 'train_ref_file': None, 'validation_ref_file': None, 'overwrite_cache': False, 'validation_split_percentage': 5, 'max_seq_length': 4096, 'preprocessing_num_workers': 96, 'mlm_probability': 0.15, 'pad_to_max_length': False, 'line_by_line': False, 'max_eval_samples': 2000}
26
+ 2021-07-14 22:38:43,366 INFO MainThread:595290 [wandb_run.py:_atexit_cleanup():1593] got exitcode: 1
27
+ 2021-07-14 22:38:43,368 INFO MainThread:595290 [wandb_run.py:_restore():1565] restore
28
+ 2021-07-14 22:38:45,826 INFO MainThread:595290 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
29
+ wandb_count: 1
30
+ }
31
+ pusher_stats {
32
+ uploaded_bytes: 1447
33
+ total_bytes: 1447
34
+ }
35
+
36
+ 2021-07-14 22:38:46,025 INFO MainThread:595290 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
37
+ wandb_count: 1
38
+ }
39
+ pusher_stats {
40
+ uploaded_bytes: 1447
41
+ total_bytes: 1447
42
+ }
43
+
44
+ 2021-07-14 22:38:46,607 INFO MainThread:595290 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
45
+ wandb_count: 4
46
+ }
47
+ pusher_stats {
48
+ uploaded_bytes: 1447
49
+ total_bytes: 11500
50
+ }
51
+
52
+ 2021-07-14 22:38:46,709 INFO MainThread:595290 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
53
+ wandb_count: 5
54
+ }
55
+ pusher_stats {
56
+ uploaded_bytes: 1447
57
+ total_bytes: 11502
58
+ }
59
+
60
+ 2021-07-14 22:38:46,812 INFO MainThread:595290 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
61
+ wandb_count: 5
62
+ }
63
+ pusher_stats {
64
+ uploaded_bytes: 11502
65
+ total_bytes: 11502
66
+ }
67
+
68
+ 2021-07-14 22:38:46,914 INFO MainThread:595290 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
69
+ wandb_count: 5
70
+ }
71
+ pusher_stats {
72
+ uploaded_bytes: 11502
73
+ total_bytes: 11502
74
+ }
75
+
76
+ 2021-07-14 22:38:47,016 INFO MainThread:595290 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
77
+ wandb_count: 5
78
+ }
79
+ pusher_stats {
80
+ uploaded_bytes: 11502
81
+ total_bytes: 11502
82
+ }
83
+
84
+ 2021-07-14 22:38:47,118 INFO MainThread:595290 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
85
+ wandb_count: 5
86
+ }
87
+ pusher_stats {
88
+ uploaded_bytes: 11502
89
+ total_bytes: 11502
90
+ }
91
+
92
+ 2021-07-14 22:38:47,220 INFO MainThread:595290 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
93
+ wandb_count: 5
94
+ }
95
+ pusher_stats {
96
+ uploaded_bytes: 11502
97
+ total_bytes: 11502
98
+ }
99
+
100
+ 2021-07-14 22:38:47,940 INFO MainThread:595290 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
101
+ wandb_count: 5
102
+ }
103
+ pusher_stats {
104
+ uploaded_bytes: 11502
105
+ total_bytes: 11502
106
+ }
107
+
108
+ 2021-07-14 22:38:48,043 INFO MainThread:595290 [wandb_run.py:_wait_for_finish():1715] got exit ret: done: true
109
+ exit_result {
110
+ }
111
+ file_counts {
112
+ wandb_count: 5
113
+ }
114
+ pusher_stats {
115
+ uploaded_bytes: 11502
116
+ total_bytes: 11502
117
+ }
118
+
119
+ 2021-07-14 22:38:49,338 INFO MainThread:595290 [wandb_run.py:_show_files():1937] logging synced files
wandb/run-20210714_222920-2p7mu4rm/run-2p7mu4rm.wandb ADDED
Binary file (11.6 kB). View file
wandb/run-20210714_224000-1jvvynqa/files/config.yaml ADDED
@@ -0,0 +1,307 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ wandb_version: 1
2
+
3
+ _wandb:
4
+ desc: null
5
+ value:
6
+ cli_version: 0.10.33
7
+ framework: huggingface
8
+ huggingface_version: 4.9.0.dev0
9
+ is_jupyter_run: false
10
+ is_kaggle_kernel: false
11
+ python_version: 3.8.10
12
+ t:
13
+ 1:
14
+ - 3
15
+ - 11
16
+ 2:
17
+ - 3
18
+ - 11
19
+ 4: 3.8.10
20
+ 5: 0.10.33
21
+ 6: 4.9.0.dev0
22
+ 8:
23
+ - 5
24
+ adafactor:
25
+ desc: null
26
+ value: false
27
+ adam_beta1:
28
+ desc: null
29
+ value: 0.9
30
+ adam_beta2:
31
+ desc: null
32
+ value: 0.98
33
+ adam_epsilon:
34
+ desc: null
35
+ value: 1.0e-08
36
+ cache_dir:
37
+ desc: null
38
+ value: null
39
+ config_name:
40
+ desc: null
41
+ value: ./
42
+ dataloader_drop_last:
43
+ desc: null
44
+ value: false
45
+ dataloader_num_workers:
46
+ desc: null
47
+ value: 0
48
+ dataloader_pin_memory:
49
+ desc: null
50
+ value: true
51
+ dataset_config_name:
52
+ desc: null
53
+ value: null
54
+ dataset_name:
55
+ desc: null
56
+ value: null
57
+ ddp_find_unused_parameters:
58
+ desc: null
59
+ value: null
60
+ debug:
61
+ desc: null
62
+ value: []
63
+ deepspeed:
64
+ desc: null
65
+ value: null
66
+ disable_tqdm:
67
+ desc: null
68
+ value: false
69
+ do_eval:
70
+ desc: null
71
+ value: false
72
+ do_predict:
73
+ desc: null
74
+ value: false
75
+ do_train:
76
+ desc: null
77
+ value: false
78
+ dtype:
79
+ desc: null
80
+ value: float32
81
+ eval_accumulation_steps:
82
+ desc: null
83
+ value: null
84
+ eval_steps:
85
+ desc: null
86
+ value: 20000
87
+ evaluation_strategy:
88
+ desc: null
89
+ value: IntervalStrategy.NO
90
+ fp16:
91
+ desc: null
92
+ value: false
93
+ fp16_backend:
94
+ desc: null
95
+ value: auto
96
+ fp16_full_eval:
97
+ desc: null
98
+ value: false
99
+ fp16_opt_level:
100
+ desc: null
101
+ value: O1
102
+ gradient_accumulation_steps:
103
+ desc: null
104
+ value: 8
105
+ greater_is_better:
106
+ desc: null
107
+ value: null
108
+ group_by_length:
109
+ desc: null
110
+ value: false
111
+ ignore_data_skip:
112
+ desc: null
113
+ value: false
114
+ label_names:
115
+ desc: null
116
+ value: null
117
+ label_smoothing_factor:
118
+ desc: null
119
+ value: 0.0
120
+ learning_rate:
121
+ desc: null
122
+ value: 5.0e-05
123
+ length_column_name:
124
+ desc: null
125
+ value: length
126
+ line_by_line:
127
+ desc: null
128
+ value: false
129
+ load_best_model_at_end:
130
+ desc: null
131
+ value: false
132
+ local_rank:
133
+ desc: null
134
+ value: -1
135
+ log_level:
136
+ desc: null
137
+ value: -1
138
+ log_level_replica:
139
+ desc: null
140
+ value: -1
141
+ log_on_each_node:
142
+ desc: null
143
+ value: true
144
+ logging_dir:
145
+ desc: null
146
+ value: ./runs/Jul14_22-39-51_t1v-n-f5c06ea1-w-0
147
+ logging_first_step:
148
+ desc: null
149
+ value: false
150
+ logging_steps:
151
+ desc: null
152
+ value: 500
153
+ logging_strategy:
154
+ desc: null
155
+ value: IntervalStrategy.STEPS
156
+ lr_scheduler_type:
157
+ desc: null
158
+ value: SchedulerType.LINEAR
159
+ max_eval_samples:
160
+ desc: null
161
+ value: 2000
162
+ max_grad_norm:
163
+ desc: null
164
+ value: 1.0
165
+ max_seq_length:
166
+ desc: null
167
+ value: 4096
168
+ max_steps:
169
+ desc: null
170
+ value: -1
171
+ metric_for_best_model:
172
+ desc: null
173
+ value: null
174
+ mlm_probability:
175
+ desc: null
176
+ value: 0.15
177
+ model_name_or_path:
178
+ desc: null
179
+ value: null
180
+ model_type:
181
+ desc: null
182
+ value: big_bird
183
+ mp_parameters:
184
+ desc: null
185
+ value: ''
186
+ no_cuda:
187
+ desc: null
188
+ value: false
189
+ num_train_epochs:
190
+ desc: null
191
+ value: 5.0
192
+ output_dir:
193
+ desc: null
194
+ value: ./
195
+ overwrite_cache:
196
+ desc: null
197
+ value: false
198
+ overwrite_output_dir:
199
+ desc: null
200
+ value: true
201
+ pad_to_max_length:
202
+ desc: null
203
+ value: false
204
+ past_index:
205
+ desc: null
206
+ value: -1
207
+ per_device_eval_batch_size:
208
+ desc: null
209
+ value: 2
210
+ per_device_train_batch_size:
211
+ desc: null
212
+ value: 2
213
+ per_gpu_eval_batch_size:
214
+ desc: null
215
+ value: null
216
+ per_gpu_train_batch_size:
217
+ desc: null
218
+ value: null
219
+ prediction_loss_only:
220
+ desc: null
221
+ value: false
222
+ preprocessing_num_workers:
223
+ desc: null
224
+ value: 96
225
+ push_to_hub:
226
+ desc: null
227
+ value: true
228
+ push_to_hub_model_id:
229
+ desc: null
230
+ value: ''
231
+ push_to_hub_organization:
232
+ desc: null
233
+ value: null
234
+ push_to_hub_token:
235
+ desc: null
236
+ value: null
237
+ remove_unused_columns:
238
+ desc: null
239
+ value: true
240
+ report_to:
241
+ desc: null
242
+ value:
243
+ - tensorboard
244
+ - wandb
245
+ resume_from_checkpoint:
246
+ desc: null
247
+ value: null
248
+ run_name:
249
+ desc: null
250
+ value: ./
251
+ save_on_each_node:
252
+ desc: null
253
+ value: false
254
+ save_optimizer:
255
+ desc: null
256
+ value: true
257
+ save_steps:
258
+ desc: null
259
+ value: 20000
260
+ save_strategy:
261
+ desc: null
262
+ value: IntervalStrategy.STEPS
263
+ save_total_limit:
264
+ desc: null
265
+ value: 5
266
+ seed:
267
+ desc: null
268
+ value: 42
269
+ sharded_ddp:
270
+ desc: null
271
+ value: []
272
+ skip_memory_metrics:
273
+ desc: null
274
+ value: true
275
+ tokenizer_name:
276
+ desc: null
277
+ value: ./
278
+ tpu_metrics_debug:
279
+ desc: null
280
+ value: false
281
+ tpu_num_cores:
282
+ desc: null
283
+ value: null
284
+ train_ref_file:
285
+ desc: null
286
+ value: null
287
+ use_fast_tokenizer:
288
+ desc: null
289
+ value: true
290
+ use_legacy_prediction_loop:
291
+ desc: null
292
+ value: false
293
+ validation_ref_file:
294
+ desc: null
295
+ value: null
296
+ validation_split_percentage:
297
+ desc: null
298
+ value: 5
299
+ warmup_ratio:
300
+ desc: null
301
+ value: 0.0
302
+ warmup_steps:
303
+ desc: null
304
+ value: 10000
305
+ weight_decay:
306
+ desc: null
307
+ value: 0.0095
wandb/run-20210714_224000-1jvvynqa/files/output.log ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /home/dat/pino/lib/python3.8/site-packages/jax/_src/numpy/lax_numpy.py:3132: UserWarning: Explicitly requested dtype <class 'jax._src.numpy.lax_numpy.int64'> requested in zeros is not available, and will be truncated to dtype int32. To enable more dtypes, set the jax_enable_x64 configuration option or the JAX_ENABLE_X64 shell environment variable. See https://github.com/google/jax#current-gotchas for more.
2
+ lax._check_user_dtype_supported(dtype, "zeros")
3
+ /home/dat/pino/lib/python3.8/site-packages/jax/lib/xla_bridge.py:386: UserWarning: jax.host_count has been renamed to jax.process_count. This alias will eventually be removed; please update your code.
4
+ warnings.warn(
5
+ /home/dat/pino/lib/python3.8/site-packages/jax/lib/xla_bridge.py:373: UserWarning: jax.host_id has been renamed to jax.process_index. This alias will eventually be removed; please update your code.
6
+ warnings.warn(
7
+ Epoch ... (1/5): 0%| | 0/5 [00:00<?, ?it/s][22:40:16] - INFO - __main__ - Skipping to epoch 0 step 0
8
+ Training...: 0%| | 0/503952 [01:24<?, ?it/s]
9
+ Epoch ... (1/5): 0%| | 0/5 [09:14<?, ?it/s]
10
+ Traceback (most recent call last):
11
+ File "./run_mlm_flax.py", line 804, in <module>
12
+ num_train_samples = len(tokenized_datasets["train"])
13
+ File "/home/dat/pino/lib/python3.8/site-packages/jax/_src/traceback_util.py", line 183, in reraise_with_filtered_traceback
14
+ return fun(*args, **kwargs)
15
+ File "/home/dat/pino/lib/python3.8/site-packages/jax/_src/api.py", line 1669, in f_pmapped
16
+ out = pxla.xla_pmap(
17
+ File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 1620, in bind
18
+ return call_bind(self, fun, *args, **params)
19
+ File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 1551, in call_bind
20
+ outs = primitive.process(top_trace, fun, tracers, params)
21
+ File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 1623, in process
22
+ return trace.process_map(self, fun, tracers, params)
23
+ File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 606, in process_call
24
+ return primitive.impl(f, *tracers, **params)
25
+ File "/home/dat/pino/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 637, in xla_pmap_impl
26
+ return compiled_fun(*args)
27
+ File "/home/dat/pino/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 1159, in execute_replicated
28
+ out_bufs = compiled.execute_sharded_on_local_devices(input_bufs)
29
+ jax._src.traceback_util.UnfilteredStackTrace: RuntimeError: Resource exhausted: Attempting to reserve 12.60G at the bottom of memory. That was not possible. There are 5.86G free, 0B reserved, and 5.65G reservable.: while running replica 0 and partition 0 of a replicated computation (other replicas may have failed as well).
30
+ The stack trace below excludes JAX-internal frames.
31
+ The preceding is the original exception that occurred, unmodified.
32
+ --------------------
33
+ The above exception was the direct cause of the following exception:
34
+ Traceback (most recent call last):
35
+ File "./run_mlm_flax.py", line 804, in <module>
36
+ num_train_samples = len(tokenized_datasets["train"])
37
+ File "/home/dat/pino/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 1159, in execute_replicated
38
+ out_bufs = compiled.execute_sharded_on_local_devices(input_bufs)
39
+ RuntimeError: Resource exhausted: Attempting to reserve 12.60G at the bottom of memory. That was not possible. There are 5.86G free, 0B reserved, and 5.65G reservable.: while running replica 0 and partition 0 of a replicated computation (other replicas may have failed as well).
wandb/run-20210714_224000-1jvvynqa/files/requirements.txt ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ absl-py==0.13.0
2
+ aiohttp==3.7.4.post0
3
+ astunparse==1.6.3
4
+ async-timeout==3.0.1
5
+ attrs==21.2.0
6
+ cachetools==4.2.2
7
+ certifi==2021.5.30
8
+ chardet==4.0.0
9
+ charset-normalizer==2.0.1
10
+ chex==0.0.8
11
+ click==8.0.1
12
+ configparser==5.0.2
13
+ cycler==0.10.0
14
+ datasets==1.9.1.dev0
15
+ dill==0.3.4
16
+ dm-tree==0.1.6
17
+ docker-pycreds==0.4.0
18
+ filelock==3.0.12
19
+ flatbuffers==1.12
20
+ flax==0.3.4
21
+ fsspec==2021.7.0
22
+ gast==0.4.0
23
+ gitdb==4.0.7
24
+ gitpython==3.1.18
25
+ google-auth-oauthlib==0.4.4
26
+ google-auth==1.32.1
27
+ google-pasta==0.2.0
28
+ grpcio==1.34.1
29
+ h5py==3.1.0
30
+ huggingface-hub==0.0.12
31
+ idna==3.2
32
+ install==1.3.4
33
+ jax==0.2.17
34
+ jaxlib==0.1.68
35
+ joblib==1.0.1
36
+ keras-nightly==2.5.0.dev2021032900
37
+ keras-preprocessing==1.1.2
38
+ kiwisolver==1.3.1
39
+ libtpu-nightly==0.1.dev20210615
40
+ markdown==3.3.4
41
+ matplotlib==3.4.2
42
+ msgpack==1.0.2
43
+ multidict==5.1.0
44
+ multiprocess==0.70.12.2
45
+ numpy==1.19.5
46
+ oauthlib==3.1.1
47
+ opt-einsum==3.3.0
48
+ optax==0.0.9
49
+ packaging==21.0
50
+ pandas==1.3.0
51
+ pathtools==0.1.2
52
+ pillow==8.3.1
53
+ pip==20.0.2
54
+ pkg-resources==0.0.0
55
+ promise==2.3
56
+ protobuf==3.17.3
57
+ psutil==5.8.0
58
+ pyarrow==4.0.1
59
+ pyasn1-modules==0.2.8
60
+ pyasn1==0.4.8
61
+ pyparsing==2.4.7
62
+ python-dateutil==2.8.1
63
+ pytz==2021.1
64
+ pyyaml==5.4.1
65
+ regex==2021.7.6
66
+ requests-oauthlib==1.3.0
67
+ requests==2.26.0
68
+ rsa==4.7.2
69
+ sacremoses==0.0.45
70
+ scipy==1.7.0
71
+ sentry-sdk==1.3.0
72
+ setuptools==44.0.0
73
+ shortuuid==1.0.1
74
+ six==1.15.0
75
+ smmap==4.0.0
76
+ subprocess32==3.5.4
77
+ tensorboard-data-server==0.6.1
78
+ tensorboard-plugin-wit==1.8.0
79
+ tensorboard==2.5.0
80
+ tensorflow-estimator==2.5.0
81
+ tensorflow==2.5.0
82
+ termcolor==1.1.0
83
+ tokenizers==0.10.3
84
+ toolz==0.11.1
85
+ tqdm==4.61.2
86
+ transformers==4.9.0.dev0
87
+ typing-extensions==3.7.4.3
88
+ urllib3==1.26.6
89
+ wandb==0.10.33
90
+ werkzeug==2.0.1
91
+ wheel==0.36.2
92
+ wrapt==1.12.1
93
+ xxhash==2.0.2
94
+ yarl==1.6.3
wandb/run-20210714_224000-1jvvynqa/files/wandb-metadata.json ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-5.4.0-1043-gcp-x86_64-with-glibc2.29",
3
+ "python": "3.8.10",
4
+ "heartbeatAt": "2021-07-14T22:40:02.351628",
5
+ "startedAt": "2021-07-14T22:40:00.340218",
6
+ "docker": null,
7
+ "cpu_count": 96,
8
+ "cuda": null,
9
+ "args": [
10
+ "--push_to_hub",
11
+ "--output_dir=./",
12
+ "--model_type=big_bird",
13
+ "--config_name=./",
14
+ "--tokenizer_name=./",
15
+ "--max_seq_length=4096",
16
+ "--weight_decay=0.0095",
17
+ "--warmup_steps=10000",
18
+ "--overwrite_output_dir",
19
+ "--adam_beta1=0.9",
20
+ "--adam_beta2=0.98",
21
+ "--logging_steps=500",
22
+ "--eval_steps=20000",
23
+ "--num_train_epochs=5",
24
+ "--preprocessing_num_workers=96",
25
+ "--save_steps=20000",
26
+ "--learning_rate=5e-5",
27
+ "--per_device_train_batch_size=2",
28
+ "--per_device_eval_batch_size=2",
29
+ "--save_total_limit=5",
30
+ "--max_eval_samples=2000",
31
+ "--overwrite_cache",
32
+ "False",
33
+ "--gradient_accumulation_steps=8"
34
+ ],
35
+ "state": "running",
36
+ "program": "./run_mlm_flax.py",
37
+ "codePath": "run_mlm_flax.py",
38
+ "git": {
39
+ "remote": "https://huggingface.co/flax-community/pino-roberta-base",
40
+ "commit": "de71755314d4aa3ab182072c6f0be3de9798a66b"
41
+ },
42
+ "email": null,
43
+ "root": "/home/dat/pino-roberta-base",
44
+ "host": "t1v-n-f5c06ea1-w-0",
45
+ "username": "dat",
46
+ "executable": "/home/dat/pino/bin/python"
47
+ }
wandb/run-20210714_224000-1jvvynqa/files/wandb-summary.json ADDED
@@ -0,0 +1 @@
 
1
+ {}
wandb/run-20210714_224000-1jvvynqa/logs/debug-internal.log ADDED
@@ -0,0 +1,236 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2021-07-14 22:40:01,029 INFO MainThread:598803 [internal.py:wandb_internal():88] W&B internal server running at pid: 598803, started at: 2021-07-14 22:40:01.029595
2
+ 2021-07-14 22:40:01,031 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: check_version
3
+ 2021-07-14 22:40:01,032 INFO WriterThread:598803 [datastore.py:open_for_write():80] open: /home/dat/pino-roberta-base/wandb/run-20210714_224000-1jvvynqa/run-1jvvynqa.wandb
4
+ 2021-07-14 22:40:01,033 DEBUG SenderThread:598803 [sender.py:send():179] send: header
5
+ 2021-07-14 22:40:01,033 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: check_version
6
+ 2021-07-14 22:40:01,069 DEBUG SenderThread:598803 [sender.py:send():179] send: run
7
+ 2021-07-14 22:40:01,239 INFO SenderThread:598803 [dir_watcher.py:__init__():168] watching files in: /home/dat/pino-roberta-base/wandb/run-20210714_224000-1jvvynqa/files
8
+ 2021-07-14 22:40:01,240 INFO SenderThread:598803 [sender.py:_start_run_threads():716] run started: 1jvvynqa with start time 1626302400
9
+ 2021-07-14 22:40:01,240 DEBUG SenderThread:598803 [sender.py:send():179] send: summary
10
+ 2021-07-14 22:40:01,240 INFO SenderThread:598803 [sender.py:_save_file():841] saving file wandb-summary.json with policy end
11
+ 2021-07-14 22:40:01,240 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: run_start
12
+ 2021-07-14 22:40:02,242 INFO Thread-8 :598803 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210714_224000-1jvvynqa/files/wandb-summary.json
13
+ 2021-07-14 22:40:02,351 DEBUG HandlerThread:598803 [meta.py:__init__():39] meta init
14
+ 2021-07-14 22:40:02,351 DEBUG HandlerThread:598803 [meta.py:__init__():53] meta init done
15
+ 2021-07-14 22:40:02,351 DEBUG HandlerThread:598803 [meta.py:probe():210] probe
16
+ 2021-07-14 22:40:02,352 DEBUG HandlerThread:598803 [meta.py:_setup_git():200] setup git
17
+ 2021-07-14 22:40:02,381 DEBUG HandlerThread:598803 [meta.py:_setup_git():207] setup git done
18
+ 2021-07-14 22:40:02,381 DEBUG HandlerThread:598803 [meta.py:_save_pip():57] save pip
19
+ 2021-07-14 22:40:02,382 DEBUG HandlerThread:598803 [meta.py:_save_pip():71] save pip done
20
+ 2021-07-14 22:40:02,382 DEBUG HandlerThread:598803 [meta.py:probe():252] probe done
21
+ 2021-07-14 22:40:02,385 DEBUG SenderThread:598803 [sender.py:send():179] send: files
22
+ 2021-07-14 22:40:02,385 INFO SenderThread:598803 [sender.py:_save_file():841] saving file wandb-metadata.json with policy now
23
+ 2021-07-14 22:40:02,390 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: stop_status
24
+ 2021-07-14 22:40:02,391 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: stop_status
25
+ 2021-07-14 22:40:02,517 DEBUG SenderThread:598803 [sender.py:send():179] send: config
26
+ 2021-07-14 22:40:02,517 DEBUG SenderThread:598803 [sender.py:send():179] send: config
27
+ 2021-07-14 22:40:02,517 DEBUG SenderThread:598803 [sender.py:send():179] send: config
28
+ 2021-07-14 22:40:02,814 INFO Thread-11 :598803 [upload_job.py:push():137] Uploaded file /tmp/tmp43so6xcswandb/116losze-wandb-metadata.json
29
+ 2021-07-14 22:40:03,240 INFO Thread-8 :598803 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210714_224000-1jvvynqa/files/requirements.txt
30
+ 2021-07-14 22:40:03,241 INFO Thread-8 :598803 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210714_224000-1jvvynqa/files/wandb-metadata.json
31
+ 2021-07-14 22:40:03,241 INFO Thread-8 :598803 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210714_224000-1jvvynqa/files/output.log
32
+ 2021-07-14 22:40:17,246 INFO Thread-8 :598803 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_224000-1jvvynqa/files/output.log
33
+ 2021-07-14 22:40:17,518 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: stop_status
34
+ 2021-07-14 22:40:17,519 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: stop_status
35
+ 2021-07-14 22:40:19,247 INFO Thread-8 :598803 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_224000-1jvvynqa/files/output.log
36
+ 2021-07-14 22:40:30,436 DEBUG SenderThread:598803 [sender.py:send():179] send: stats
37
+ 2021-07-14 22:40:32,253 INFO Thread-8 :598803 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_224000-1jvvynqa/files/config.yaml
38
+ 2021-07-14 22:40:32,650 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: stop_status
39
+ 2021-07-14 22:40:32,650 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: stop_status
40
+ 2021-07-14 22:40:47,784 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: stop_status
41
+ 2021-07-14 22:40:47,784 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: stop_status
42
+ 2021-07-14 22:41:00,509 DEBUG SenderThread:598803 [sender.py:send():179] send: stats
43
+ 2021-07-14 22:41:02,914 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: stop_status
44
+ 2021-07-14 22:41:02,914 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: stop_status
45
+ 2021-07-14 22:41:18,045 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: stop_status
46
+ 2021-07-14 22:41:18,045 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: stop_status
47
+ 2021-07-14 22:41:30,568 DEBUG SenderThread:598803 [sender.py:send():179] send: stats
48
+ 2021-07-14 22:41:33,175 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: stop_status
49
+ 2021-07-14 22:41:33,175 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: stop_status
50
+ 2021-07-14 22:41:48,307 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: stop_status
51
+ 2021-07-14 22:41:48,307 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: stop_status
52
+ 2021-07-14 22:42:00,641 DEBUG SenderThread:598803 [sender.py:send():179] send: stats
53
+ 2021-07-14 22:42:03,441 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: stop_status
54
+ 2021-07-14 22:42:03,442 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: stop_status
55
+ 2021-07-14 22:42:18,571 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: stop_status
56
+ 2021-07-14 22:42:18,572 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: stop_status
57
+ 2021-07-14 22:42:30,706 DEBUG SenderThread:598803 [sender.py:send():179] send: stats
58
+ 2021-07-14 22:42:33,702 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: stop_status
59
+ 2021-07-14 22:42:33,702 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: stop_status
60
+ 2021-07-14 22:42:48,848 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: stop_status
61
+ 2021-07-14 22:42:48,848 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: stop_status
62
+ 2021-07-14 22:43:00,777 DEBUG SenderThread:598803 [sender.py:send():179] send: stats
63
+ 2021-07-14 22:43:03,978 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: stop_status
64
+ 2021-07-14 22:43:03,979 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: stop_status
65
+ 2021-07-14 22:43:19,111 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: stop_status
66
+ 2021-07-14 22:43:19,111 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: stop_status
67
+ 2021-07-14 22:43:30,850 DEBUG SenderThread:598803 [sender.py:send():179] send: stats
68
+ 2021-07-14 22:43:34,242 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: stop_status
69
+ 2021-07-14 22:43:34,242 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: stop_status
70
+ 2021-07-14 22:43:49,373 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: stop_status
71
+ 2021-07-14 22:43:49,374 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: stop_status
72
+ 2021-07-14 22:44:00,923 DEBUG SenderThread:598803 [sender.py:send():179] send: stats
73
+ 2021-07-14 22:44:04,513 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: stop_status
74
+ 2021-07-14 22:44:04,513 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: stop_status
75
+ 2021-07-14 22:44:19,644 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: stop_status
76
+ 2021-07-14 22:44:19,644 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: stop_status
77
+ 2021-07-14 22:44:30,999 DEBUG SenderThread:598803 [sender.py:send():179] send: stats
78
+ 2021-07-14 22:44:34,774 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: stop_status
79
+ 2021-07-14 22:44:34,774 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: stop_status
80
+ 2021-07-14 22:44:49,906 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: stop_status
81
+ 2021-07-14 22:44:49,906 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: stop_status
82
+ 2021-07-14 22:45:01,074 DEBUG SenderThread:598803 [sender.py:send():179] send: stats
83
+ 2021-07-14 22:45:05,077 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: stop_status
84
+ 2021-07-14 22:45:05,077 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: stop_status
85
+ 2021-07-14 22:45:20,207 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: stop_status
86
+ 2021-07-14 22:45:20,208 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: stop_status
87
+ 2021-07-14 22:45:31,140 DEBUG SenderThread:598803 [sender.py:send():179] send: stats
88
+ 2021-07-14 22:45:35,338 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: stop_status
89
+ 2021-07-14 22:45:35,339 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: stop_status
90
+ 2021-07-14 22:45:50,469 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: stop_status
91
+ 2021-07-14 22:45:50,470 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: stop_status
92
+ 2021-07-14 22:46:01,203 DEBUG SenderThread:598803 [sender.py:send():179] send: stats
93
+ 2021-07-14 22:46:05,601 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: stop_status
94
+ 2021-07-14 22:46:05,601 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: stop_status
95
+ 2021-07-14 22:46:20,734 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: stop_status
96
+ 2021-07-14 22:46:20,734 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: stop_status
97
+ 2021-07-14 22:46:31,276 DEBUG SenderThread:598803 [sender.py:send():179] send: stats
98
+ 2021-07-14 22:46:35,865 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: stop_status
99
+ 2021-07-14 22:46:35,865 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: stop_status
100
+ 2021-07-14 22:46:51,019 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: stop_status
101
+ 2021-07-14 22:46:51,020 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: stop_status
102
+ 2021-07-14 22:47:01,353 DEBUG SenderThread:598803 [sender.py:send():179] send: stats
103
+ 2021-07-14 22:47:06,154 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: stop_status
104
+ 2021-07-14 22:47:06,154 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: stop_status
105
+ 2021-07-14 22:47:21,290 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: stop_status
106
+ 2021-07-14 22:47:21,290 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: stop_status
107
+ 2021-07-14 22:47:31,428 DEBUG SenderThread:598803 [sender.py:send():179] send: stats
108
+ 2021-07-14 22:47:36,424 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: stop_status
109
+ 2021-07-14 22:47:36,424 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: stop_status
110
+ 2021-07-14 22:47:51,555 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: stop_status
111
+ 2021-07-14 22:47:51,555 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: stop_status
112
+ 2021-07-14 22:48:01,502 DEBUG SenderThread:598803 [sender.py:send():179] send: stats
113
+ 2021-07-14 22:48:06,777 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: stop_status
114
+ 2021-07-14 22:48:06,777 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: stop_status
115
+ 2021-07-14 22:48:07,431 INFO Thread-8 :598803 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_224000-1jvvynqa/files/output.log
116
+ 2021-07-14 22:48:21,934 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: stop_status
117
+ 2021-07-14 22:48:21,935 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: stop_status
118
+ 2021-07-14 22:48:31,579 DEBUG SenderThread:598803 [sender.py:send():179] send: stats
119
+ 2021-07-14 22:48:37,091 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: stop_status
120
+ 2021-07-14 22:48:37,091 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: stop_status
121
+ 2021-07-14 22:48:52,233 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: stop_status
122
+ 2021-07-14 22:48:52,234 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: stop_status
123
+ 2021-07-14 22:49:01,665 DEBUG SenderThread:598803 [sender.py:send():179] send: stats
124
+ 2021-07-14 22:49:07,381 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: stop_status
125
+ 2021-07-14 22:49:07,382 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: stop_status
126
+ 2021-07-14 22:49:22,521 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: stop_status
127
+ 2021-07-14 22:49:22,521 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: stop_status
128
+ 2021-07-14 22:49:31,465 INFO Thread-8 :598803 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_224000-1jvvynqa/files/output.log
129
+ 2021-07-14 22:49:31,743 DEBUG SenderThread:598803 [sender.py:send():179] send: stats
130
+ 2021-07-14 22:49:32,262 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: poll_exit
131
+ 2021-07-14 22:49:32,263 DEBUG SenderThread:598803 [sender.py:send():179] send: telemetry
132
+ 2021-07-14 22:49:32,263 DEBUG SenderThread:598803 [sender.py:send():179] send: exit
133
+ 2021-07-14 22:49:32,263 INFO SenderThread:598803 [sender.py:send_exit():287] handling exit code: 1
134
+ 2021-07-14 22:49:32,263 INFO SenderThread:598803 [sender.py:send_exit():295] send defer
135
+ 2021-07-14 22:49:32,263 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: poll_exit
136
+ 2021-07-14 22:49:32,264 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: defer
137
+ 2021-07-14 22:49:32,264 INFO HandlerThread:598803 [handler.py:handle_request_defer():141] handle defer: 0
138
+ 2021-07-14 22:49:32,264 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: defer
139
+ 2021-07-14 22:49:32,264 INFO SenderThread:598803 [sender.py:send_request_defer():304] handle sender defer: 0
140
+ 2021-07-14 22:49:32,264 INFO SenderThread:598803 [sender.py:transition_state():308] send defer: 1
141
+ 2021-07-14 22:49:32,265 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: defer
142
+ 2021-07-14 22:49:32,265 INFO HandlerThread:598803 [handler.py:handle_request_defer():141] handle defer: 1
143
+ 2021-07-14 22:49:32,345 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: defer
144
+ 2021-07-14 22:49:32,345 INFO SenderThread:598803 [sender.py:send_request_defer():304] handle sender defer: 1
145
+ 2021-07-14 22:49:32,346 INFO SenderThread:598803 [sender.py:transition_state():308] send defer: 2
146
+ 2021-07-14 22:49:32,346 DEBUG SenderThread:598803 [sender.py:send():179] send: stats
147
+ 2021-07-14 22:49:32,346 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: defer
148
+ 2021-07-14 22:49:32,346 INFO HandlerThread:598803 [handler.py:handle_request_defer():141] handle defer: 2
149
+ 2021-07-14 22:49:32,346 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: defer
150
+ 2021-07-14 22:49:32,347 INFO SenderThread:598803 [sender.py:send_request_defer():304] handle sender defer: 2
151
+ 2021-07-14 22:49:32,347 INFO SenderThread:598803 [sender.py:transition_state():308] send defer: 3
152
+ 2021-07-14 22:49:32,347 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: defer
153
+ 2021-07-14 22:49:32,347 INFO HandlerThread:598803 [handler.py:handle_request_defer():141] handle defer: 3
154
+ 2021-07-14 22:49:32,347 DEBUG SenderThread:598803 [sender.py:send():179] send: summary
155
+ 2021-07-14 22:49:32,347 INFO SenderThread:598803 [sender.py:_save_file():841] saving file wandb-summary.json with policy end
156
+ 2021-07-14 22:49:32,347 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: defer
157
+ 2021-07-14 22:49:32,347 INFO SenderThread:598803 [sender.py:send_request_defer():304] handle sender defer: 3
158
+ 2021-07-14 22:49:32,347 INFO SenderThread:598803 [sender.py:transition_state():308] send defer: 4
159
+ 2021-07-14 22:49:32,348 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: defer
160
+ 2021-07-14 22:49:32,348 INFO HandlerThread:598803 [handler.py:handle_request_defer():141] handle defer: 4
161
+ 2021-07-14 22:49:32,348 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: defer
162
+ 2021-07-14 22:49:32,348 INFO SenderThread:598803 [sender.py:send_request_defer():304] handle sender defer: 4
163
+ 2021-07-14 22:49:32,366 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: poll_exit
164
+ 2021-07-14 22:49:32,466 INFO Thread-8 :598803 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_224000-1jvvynqa/files/wandb-summary.json
165
+ 2021-07-14 22:49:32,466 INFO Thread-8 :598803 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_224000-1jvvynqa/files/output.log
166
+ 2021-07-14 22:49:32,534 INFO SenderThread:598803 [sender.py:transition_state():308] send defer: 5
167
+ 2021-07-14 22:49:32,534 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: poll_exit
168
+ 2021-07-14 22:49:32,535 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: defer
169
+ 2021-07-14 22:49:32,535 INFO HandlerThread:598803 [handler.py:handle_request_defer():141] handle defer: 5
170
+ 2021-07-14 22:49:32,535 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: defer
171
+ 2021-07-14 22:49:32,535 INFO SenderThread:598803 [sender.py:send_request_defer():304] handle sender defer: 5
172
+ 2021-07-14 22:49:32,535 INFO SenderThread:598803 [dir_watcher.py:finish():282] shutting down directory watcher
173
+ 2021-07-14 22:49:32,636 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: poll_exit
174
+ 2021-07-14 22:49:33,466 INFO Thread-8 :598803 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_224000-1jvvynqa/files/config.yaml
175
+ 2021-07-14 22:49:33,467 INFO SenderThread:598803 [dir_watcher.py:finish():312] scan: /home/dat/pino-roberta-base/wandb/run-20210714_224000-1jvvynqa/files
176
+ 2021-07-14 22:49:33,467 INFO SenderThread:598803 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210714_224000-1jvvynqa/files/requirements.txt requirements.txt
177
+ 2021-07-14 22:49:33,467 INFO SenderThread:598803 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210714_224000-1jvvynqa/files/output.log output.log
178
+ 2021-07-14 22:49:33,467 INFO SenderThread:598803 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210714_224000-1jvvynqa/files/wandb-metadata.json wandb-metadata.json
179
+ 2021-07-14 22:49:33,468 INFO SenderThread:598803 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210714_224000-1jvvynqa/files/config.yaml config.yaml
180
+ 2021-07-14 22:49:33,468 INFO SenderThread:598803 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210714_224000-1jvvynqa/files/wandb-summary.json wandb-summary.json
181
+ 2021-07-14 22:49:33,468 INFO SenderThread:598803 [sender.py:transition_state():308] send defer: 6
182
+ 2021-07-14 22:49:33,468 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: poll_exit
183
+ 2021-07-14 22:49:33,472 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: defer
184
+ 2021-07-14 22:49:33,472 INFO HandlerThread:598803 [handler.py:handle_request_defer():141] handle defer: 6
185
+ 2021-07-14 22:49:33,474 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: defer
186
+ 2021-07-14 22:49:33,474 INFO SenderThread:598803 [sender.py:send_request_defer():304] handle sender defer: 6
187
+ 2021-07-14 22:49:33,475 INFO SenderThread:598803 [file_pusher.py:finish():177] shutting down file pusher
188
+ 2021-07-14 22:49:33,574 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: poll_exit
189
+ 2021-07-14 22:49:33,574 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: poll_exit
190
+ 2021-07-14 22:49:33,676 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: poll_exit
191
+ 2021-07-14 22:49:33,676 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: poll_exit
192
+ 2021-07-14 22:49:33,778 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: poll_exit
193
+ 2021-07-14 22:49:33,778 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: poll_exit
194
+ 2021-07-14 22:49:33,880 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: poll_exit
195
+ 2021-07-14 22:49:33,880 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: poll_exit
196
+ 2021-07-14 22:49:33,982 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: poll_exit
197
+ 2021-07-14 22:49:33,982 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: poll_exit
198
+ 2021-07-14 22:49:33,989 INFO Thread-15 :598803 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210714_224000-1jvvynqa/files/wandb-summary.json
199
+ 2021-07-14 22:49:33,994 INFO Thread-14 :598803 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210714_224000-1jvvynqa/files/config.yaml
200
+ 2021-07-14 22:49:33,995 INFO Thread-13 :598803 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210714_224000-1jvvynqa/files/output.log
201
+ 2021-07-14 22:49:33,997 INFO Thread-12 :598803 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210714_224000-1jvvynqa/files/requirements.txt
202
+ 2021-07-14 22:49:34,084 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: poll_exit
203
+ 2021-07-14 22:49:34,085 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: poll_exit
204
+ 2021-07-14 22:49:34,186 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: poll_exit
205
+ 2021-07-14 22:49:34,187 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: poll_exit
206
+ 2021-07-14 22:49:34,198 INFO Thread-7 :598803 [sender.py:transition_state():308] send defer: 7
207
+ 2021-07-14 22:49:34,198 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: defer
208
+ 2021-07-14 22:49:34,198 INFO HandlerThread:598803 [handler.py:handle_request_defer():141] handle defer: 7
209
+ 2021-07-14 22:49:34,198 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: defer
210
+ 2021-07-14 22:49:34,198 INFO SenderThread:598803 [sender.py:send_request_defer():304] handle sender defer: 7
211
+ 2021-07-14 22:49:34,288 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: poll_exit
212
+ 2021-07-14 22:49:34,464 INFO SenderThread:598803 [sender.py:transition_state():308] send defer: 8
213
+ 2021-07-14 22:49:34,464 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: poll_exit
214
+ 2021-07-14 22:49:34,465 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: defer
215
+ 2021-07-14 22:49:34,465 INFO HandlerThread:598803 [handler.py:handle_request_defer():141] handle defer: 8
216
+ 2021-07-14 22:49:34,465 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: defer
217
+ 2021-07-14 22:49:34,465 INFO SenderThread:598803 [sender.py:send_request_defer():304] handle sender defer: 8
218
+ 2021-07-14 22:49:34,466 INFO SenderThread:598803 [sender.py:transition_state():308] send defer: 9
219
+ 2021-07-14 22:49:34,466 DEBUG SenderThread:598803 [sender.py:send():179] send: final
220
+ 2021-07-14 22:49:34,466 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: defer
221
+ 2021-07-14 22:49:34,466 DEBUG SenderThread:598803 [sender.py:send():179] send: footer
222
+ 2021-07-14 22:49:34,466 INFO HandlerThread:598803 [handler.py:handle_request_defer():141] handle defer: 9
223
+ 2021-07-14 22:49:34,467 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: defer
224
+ 2021-07-14 22:49:34,467 INFO SenderThread:598803 [sender.py:send_request_defer():304] handle sender defer: 9
225
+ 2021-07-14 22:49:34,567 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: poll_exit
226
+ 2021-07-14 22:49:34,567 DEBUG SenderThread:598803 [sender.py:send_request():193] send_request: poll_exit
227
+ 2021-07-14 22:49:34,567 INFO SenderThread:598803 [file_pusher.py:join():182] waiting for file pusher
228
+ 2021-07-14 22:49:34,569 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: get_summary
229
+ 2021-07-14 22:49:34,570 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: sampled_history
230
+ 2021-07-14 22:49:34,570 DEBUG HandlerThread:598803 [handler.py:handle_request():124] handle_request: shutdown
231
+ 2021-07-14 22:49:34,570 INFO HandlerThread:598803 [handler.py:finish():638] shutting down handler
232
+ 2021-07-14 22:49:35,467 INFO WriterThread:598803 [datastore.py:close():288] close: /home/dat/pino-roberta-base/wandb/run-20210714_224000-1jvvynqa/run-1jvvynqa.wandb
233
+ 2021-07-14 22:49:35,568 INFO SenderThread:598803 [sender.py:finish():945] shutting down sender
234
+ 2021-07-14 22:49:35,568 INFO SenderThread:598803 [file_pusher.py:finish():177] shutting down file pusher
235
+ 2021-07-14 22:49:35,568 INFO SenderThread:598803 [file_pusher.py:join():182] waiting for file pusher
236
+ 2021-07-14 22:49:35,570 INFO MainThread:598803 [internal.py:handle_exit():78] Internal process exited
wandb/run-20210714_224000-1jvvynqa/logs/debug.log ADDED
@@ -0,0 +1,127 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2021-07-14 22:40:00,341 INFO MainThread:597542 [wandb_setup.py:_flush():69] setting env: {}
2
+ 2021-07-14 22:40:00,341 INFO MainThread:597542 [wandb_setup.py:_flush():69] setting login settings: {}
3
+ 2021-07-14 22:40:00,341 INFO MainThread:597542 [wandb_init.py:_log_setup():337] Logging user logs to /home/dat/pino-roberta-base/wandb/run-20210714_224000-1jvvynqa/logs/debug.log
4
+ 2021-07-14 22:40:00,341 INFO MainThread:597542 [wandb_init.py:_log_setup():338] Logging internal logs to /home/dat/pino-roberta-base/wandb/run-20210714_224000-1jvvynqa/logs/debug-internal.log
5
+ 2021-07-14 22:40:00,342 INFO MainThread:597542 [wandb_init.py:init():370] calling init triggers
6
+ 2021-07-14 22:40:00,342 INFO MainThread:597542 [wandb_init.py:init():375] wandb.init called with sweep_config: {}
7
+ config: {}
8
+ 2021-07-14 22:40:00,342 INFO MainThread:597542 [wandb_init.py:init():419] starting backend
9
+ 2021-07-14 22:40:00,342 INFO MainThread:597542 [backend.py:_multiprocessing_setup():70] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
10
+ 2021-07-14 22:40:00,388 INFO MainThread:597542 [backend.py:ensure_launched():135] starting backend process...
11
+ 2021-07-14 22:40:00,433 INFO MainThread:597542 [backend.py:ensure_launched():139] started backend process with pid: 598803
12
+ 2021-07-14 22:40:00,435 INFO MainThread:597542 [wandb_init.py:init():424] backend started and connected
13
+ 2021-07-14 22:40:00,438 INFO MainThread:597542 [wandb_init.py:init():472] updated telemetry
14
+ 2021-07-14 22:40:00,439 INFO MainThread:597542 [wandb_init.py:init():491] communicating current version
15
+ 2021-07-14 22:40:01,068 INFO MainThread:597542 [wandb_init.py:init():496] got version response
16
+ 2021-07-14 22:40:01,068 INFO MainThread:597542 [wandb_init.py:init():504] communicating run to backend with 30 second timeout
17
+ 2021-07-14 22:40:01,239 INFO MainThread:597542 [wandb_init.py:init():529] starting run threads in backend
18
+ 2021-07-14 22:40:02,388 INFO MainThread:597542 [wandb_run.py:_console_start():1623] atexit reg
19
+ 2021-07-14 22:40:02,389 INFO MainThread:597542 [wandb_run.py:_redirect():1497] redirect: SettingsConsole.REDIRECT
20
+ 2021-07-14 22:40:02,389 INFO MainThread:597542 [wandb_run.py:_redirect():1502] Redirecting console.
21
+ 2021-07-14 22:40:02,391 INFO MainThread:597542 [wandb_run.py:_redirect():1558] Redirects installed.
22
+ 2021-07-14 22:40:02,391 INFO MainThread:597542 [wandb_init.py:init():554] run started, returning control to user process
23
+ 2021-07-14 22:40:02,399 INFO MainThread:597542 [wandb_run.py:_config_callback():872] config_cb None None {'output_dir': './', 'overwrite_output_dir': True, 'do_train': False, 'do_eval': False, 'do_predict': False, 'evaluation_strategy': 'IntervalStrategy.NO', 'prediction_loss_only': False, 'per_device_train_batch_size': 2, 'per_device_eval_batch_size': 2, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 8, 'eval_accumulation_steps': None, 'learning_rate': 5e-05, 'weight_decay': 0.0095, 'adam_beta1': 0.9, 'adam_beta2': 0.98, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 5.0, 'max_steps': -1, 'lr_scheduler_type': 'SchedulerType.LINEAR', 'warmup_ratio': 0.0, 'warmup_steps': 10000, 'log_level': -1, 'log_level_replica': -1, 'log_on_each_node': True, 'logging_dir': './runs/Jul14_22-39-51_t1v-n-f5c06ea1-w-0', 'logging_strategy': 'IntervalStrategy.STEPS', 'logging_first_step': False, 'logging_steps': 500, 'save_strategy': 'IntervalStrategy.STEPS', 'save_steps': 20000, 'save_total_limit': 5, 'save_on_each_node': False, 'no_cuda': False, 'seed': 42, 'fp16': False, 'fp16_opt_level': 'O1', 'fp16_backend': 'auto', 'fp16_full_eval': False, 'local_rank': -1, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 20000, 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': './', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'sharded_ddp': [], 'deepspeed': None, 'label_smoothing_factor': 0.0, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'dataloader_pin_memory': True, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'push_to_hub_model_id': '', 'push_to_hub_organization': None, 'push_to_hub_token': None, 'mp_parameters': ''}
24
+ 2021-07-14 22:40:02,400 INFO MainThread:597542 [wandb_run.py:_config_callback():872] config_cb None None {'model_name_or_path': None, 'model_type': 'big_bird', 'config_name': './', 'tokenizer_name': './', 'cache_dir': None, 'use_fast_tokenizer': True, 'dtype': 'float32', 'save_optimizer': True}
25
+ 2021-07-14 22:40:02,402 INFO MainThread:597542 [wandb_run.py:_config_callback():872] config_cb None None {'dataset_name': None, 'dataset_config_name': None, 'train_ref_file': None, 'validation_ref_file': None, 'overwrite_cache': False, 'validation_split_percentage': 5, 'max_seq_length': 4096, 'preprocessing_num_workers': 96, 'mlm_probability': 0.15, 'pad_to_max_length': False, 'line_by_line': False, 'max_eval_samples': 2000}
26
+ 2021-07-14 22:49:30,065 INFO MainThread:597542 [wandb_run.py:_atexit_cleanup():1593] got exitcode: 1
27
+ 2021-07-14 22:49:30,066 INFO MainThread:597542 [wandb_run.py:_restore():1565] restore
28
+ 2021-07-14 22:49:32,264 INFO MainThread:597542 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
29
+ wandb_count: 1
30
+ }
31
+ pusher_stats {
32
+ uploaded_bytes: 1448
33
+ total_bytes: 1448
34
+ }
35
+
36
+ 2021-07-14 22:49:32,535 INFO MainThread:597542 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
37
+ wandb_count: 1
38
+ }
39
+ pusher_stats {
40
+ uploaded_bytes: 1448
41
+ total_bytes: 1448
42
+ }
43
+
44
+ 2021-07-14 22:49:33,472 INFO MainThread:597542 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
45
+ wandb_count: 3
46
+ }
47
+ pusher_stats {
48
+ uploaded_bytes: 1448
49
+ total_bytes: 6873
50
+ }
51
+
52
+ 2021-07-14 22:49:33,575 INFO MainThread:597542 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
53
+ wandb_count: 5
54
+ }
55
+ pusher_stats {
56
+ uploaded_bytes: 1448
57
+ total_bytes: 11487
58
+ }
59
+
60
+ 2021-07-14 22:49:33,677 INFO MainThread:597542 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
61
+ wandb_count: 5
62
+ }
63
+ pusher_stats {
64
+ uploaded_bytes: 11487
65
+ total_bytes: 11487
66
+ }
67
+
68
+ 2021-07-14 22:49:33,779 INFO MainThread:597542 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
69
+ wandb_count: 5
70
+ }
71
+ pusher_stats {
72
+ uploaded_bytes: 11487
73
+ total_bytes: 11487
74
+ }
75
+
76
+ 2021-07-14 22:49:33,881 INFO MainThread:597542 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
77
+ wandb_count: 5
78
+ }
79
+ pusher_stats {
80
+ uploaded_bytes: 11487
81
+ total_bytes: 11487
82
+ }
83
+
84
+ 2021-07-14 22:49:33,983 INFO MainThread:597542 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
85
+ wandb_count: 5
86
+ }
87
+ pusher_stats {
88
+ uploaded_bytes: 11487
89
+ total_bytes: 11487
90
+ }
91
+
92
+ 2021-07-14 22:49:34,085 INFO MainThread:597542 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
93
+ wandb_count: 5
94
+ }
95
+ pusher_stats {
96
+ uploaded_bytes: 11487
97
+ total_bytes: 11487
98
+ }
99
+
100
+ 2021-07-14 22:49:34,187 INFO MainThread:597542 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
101
+ wandb_count: 5
102
+ }
103
+ pusher_stats {
104
+ uploaded_bytes: 11487
105
+ total_bytes: 11487
106
+ }
107
+
108
+ 2021-07-14 22:49:34,466 INFO MainThread:597542 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
109
+ wandb_count: 5
110
+ }
111
+ pusher_stats {
112
+ uploaded_bytes: 11487
113
+ total_bytes: 11487
114
+ }
115
+
116
+ 2021-07-14 22:49:34,568 INFO MainThread:597542 [wandb_run.py:_wait_for_finish():1715] got exit ret: done: true
117
+ exit_result {
118
+ }
119
+ file_counts {
120
+ wandb_count: 5
121
+ }
122
+ pusher_stats {
123
+ uploaded_bytes: 11487
124
+ total_bytes: 11487
125
+ }
126
+
127
+ 2021-07-14 22:49:35,856 INFO MainThread:597542 [wandb_run.py:_show_files():1937] logging synced files
wandb/run-20210714_224000-1jvvynqa/run-1jvvynqa.wandb ADDED
Binary file (11.8 kB). View file
wandb/run-20210714_225820-1dpoijkp/files/config.yaml ADDED
@@ -0,0 +1,304 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ wandb_version: 1
2
+
3
+ _wandb:
4
+ desc: null
5
+ value:
6
+ cli_version: 0.10.33
7
+ framework: huggingface
8
+ huggingface_version: 4.9.0.dev0
9
+ is_jupyter_run: false
10
+ is_kaggle_kernel: false
11
+ python_version: 3.8.10
12
+ t:
13
+ 1:
14
+ - 3
15
+ - 11
16
+ 4: 3.8.10
17
+ 5: 0.10.33
18
+ 6: 4.9.0.dev0
19
+ 8:
20
+ - 5
21
+ adafactor:
22
+ desc: null
23
+ value: false
24
+ adam_beta1:
25
+ desc: null
26
+ value: 0.9
27
+ adam_beta2:
28
+ desc: null
29
+ value: 0.98
30
+ adam_epsilon:
31
+ desc: null
32
+ value: 1.0e-08
33
+ cache_dir:
34
+ desc: null
35
+ value: null
36
+ config_name:
37
+ desc: null
38
+ value: ./
39
+ dataloader_drop_last:
40
+ desc: null
41
+ value: false
42
+ dataloader_num_workers:
43
+ desc: null
44
+ value: 0
45
+ dataloader_pin_memory:
46
+ desc: null
47
+ value: true
48
+ dataset_config_name:
49
+ desc: null
50
+ value: null
51
+ dataset_name:
52
+ desc: null
53
+ value: null
54
+ ddp_find_unused_parameters:
55
+ desc: null
56
+ value: null
57
+ debug:
58
+ desc: null
59
+ value: []
60
+ deepspeed:
61
+ desc: null
62
+ value: null
63
+ disable_tqdm:
64
+ desc: null
65
+ value: false
66
+ do_eval:
67
+ desc: null
68
+ value: false
69
+ do_predict:
70
+ desc: null
71
+ value: false
72
+ do_train:
73
+ desc: null
74
+ value: false
75
+ dtype:
76
+ desc: null
77
+ value: float32
78
+ eval_accumulation_steps:
79
+ desc: null
80
+ value: null
81
+ eval_steps:
82
+ desc: null
83
+ value: 20000
84
+ evaluation_strategy:
85
+ desc: null
86
+ value: IntervalStrategy.NO
87
+ fp16:
88
+ desc: null
89
+ value: false
90
+ fp16_backend:
91
+ desc: null
92
+ value: auto
93
+ fp16_full_eval:
94
+ desc: null
95
+ value: false
96
+ fp16_opt_level:
97
+ desc: null
98
+ value: O1
99
+ gradient_accumulation_steps:
100
+ desc: null
101
+ value: 8
102
+ greater_is_better:
103
+ desc: null
104
+ value: null
105
+ group_by_length:
106
+ desc: null
107
+ value: false
108
+ ignore_data_skip:
109
+ desc: null
110
+ value: false
111
+ label_names:
112
+ desc: null
113
+ value: null
114
+ label_smoothing_factor:
115
+ desc: null
116
+ value: 0.0
117
+ learning_rate:
118
+ desc: null
119
+ value: 5.0e-05
120
+ length_column_name:
121
+ desc: null
122
+ value: length
123
+ line_by_line:
124
+ desc: null
125
+ value: false
126
+ load_best_model_at_end:
127
+ desc: null
128
+ value: false
129
+ local_rank:
130
+ desc: null
131
+ value: -1
132
+ log_level:
133
+ desc: null
134
+ value: -1
135
+ log_level_replica:
136
+ desc: null
137
+ value: -1
138
+ log_on_each_node:
139
+ desc: null
140
+ value: true
141
+ logging_dir:
142
+ desc: null
143
+ value: ./runs/Jul14_22-58-13_t1v-n-f5c06ea1-w-0
144
+ logging_first_step:
145
+ desc: null
146
+ value: false
147
+ logging_steps:
148
+ desc: null
149
+ value: 500
150
+ logging_strategy:
151
+ desc: null
152
+ value: IntervalStrategy.STEPS
153
+ lr_scheduler_type:
154
+ desc: null
155
+ value: SchedulerType.LINEAR
156
+ max_eval_samples:
157
+ desc: null
158
+ value: 2000
159
+ max_grad_norm:
160
+ desc: null
161
+ value: 1.0
162
+ max_seq_length:
163
+ desc: null
164
+ value: 4096
165
+ max_steps:
166
+ desc: null
167
+ value: -1
168
+ metric_for_best_model:
169
+ desc: null
170
+ value: null
171
+ mlm_probability:
172
+ desc: null
173
+ value: 0.15
174
+ model_name_or_path:
175
+ desc: null
176
+ value: null
177
+ model_type:
178
+ desc: null
179
+ value: big_bird
180
+ mp_parameters:
181
+ desc: null
182
+ value: ''
183
+ no_cuda:
184
+ desc: null
185
+ value: false
186
+ num_train_epochs:
187
+ desc: null
188
+ value: 5.0
189
+ output_dir:
190
+ desc: null
191
+ value: ./
192
+ overwrite_cache:
193
+ desc: null
194
+ value: false
195
+ overwrite_output_dir:
196
+ desc: null
197
+ value: true
198
+ pad_to_max_length:
199
+ desc: null
200
+ value: false
201
+ past_index:
202
+ desc: null
203
+ value: -1
204
+ per_device_eval_batch_size:
205
+ desc: null
206
+ value: 2
207
+ per_device_train_batch_size:
208
+ desc: null
209
+ value: 2
210
+ per_gpu_eval_batch_size:
211
+ desc: null
212
+ value: null
213
+ per_gpu_train_batch_size:
214
+ desc: null
215
+ value: null
216
+ prediction_loss_only:
217
+ desc: null
218
+ value: false
219
+ preprocessing_num_workers:
220
+ desc: null
221
+ value: 96
222
+ push_to_hub:
223
+ desc: null
224
+ value: true
225
+ push_to_hub_model_id:
226
+ desc: null
227
+ value: ''
228
+ push_to_hub_organization:
229
+ desc: null
230
+ value: null
231
+ push_to_hub_token:
232
+ desc: null
233
+ value: null
234
+ remove_unused_columns:
235
+ desc: null
236
+ value: true
237
+ report_to:
238
+ desc: null
239
+ value:
240
+ - tensorboard
241
+ - wandb
242
+ resume_from_checkpoint:
243
+ desc: null
244
+ value: null
245
+ run_name:
246
+ desc: null
247
+ value: ./
248
+ save_on_each_node:
249
+ desc: null
250
+ value: false
251
+ save_optimizer:
252
+ desc: null
253
+ value: true
254
+ save_steps:
255
+ desc: null
256
+ value: 20000
257
+ save_strategy:
258
+ desc: null
259
+ value: IntervalStrategy.STEPS
260
+ save_total_limit:
261
+ desc: null
262
+ value: 5
263
+ seed:
264
+ desc: null
265
+ value: 42
266
+ sharded_ddp:
267
+ desc: null
268
+ value: []
269
+ skip_memory_metrics:
270
+ desc: null
271
+ value: true
272
+ tokenizer_name:
273
+ desc: null
274
+ value: ./
275
+ tpu_metrics_debug:
276
+ desc: null
277
+ value: false
278
+ tpu_num_cores:
279
+ desc: null
280
+ value: null
281
+ train_ref_file:
282
+ desc: null
283
+ value: null
284
+ use_fast_tokenizer:
285
+ desc: null
286
+ value: true
287
+ use_legacy_prediction_loop:
288
+ desc: null
289
+ value: false
290
+ validation_ref_file:
291
+ desc: null
292
+ value: null
293
+ validation_split_percentage:
294
+ desc: null
295
+ value: 5
296
+ warmup_ratio:
297
+ desc: null
298
+ value: 0.0
299
+ warmup_steps:
300
+ desc: null
301
+ value: 10000
302
+ weight_decay:
303
+ desc: null
304
+ value: 0.0095
wandb/run-20210714_225820-1dpoijkp/files/output.log ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
1
+ /home/dat/pino/lib/python3.8/site-packages/jax/_src/numpy/lax_numpy.py:3132: UserWarning: Explicitly requested dtype <class 'jax._src.numpy.lax_numpy.int64'> requested in zeros is not available, and will be truncated to dtype int32. To enable more dtypes, set the jax_enable_x64 configuration option or the JAX_ENABLE_X64 shell environment variable. See https://github.com/google/jax#current-gotchas for more.
2
+ lax._check_user_dtype_supported(dtype, "zeros")
3
+ /home/dat/pino/lib/python3.8/site-packages/jax/lib/xla_bridge.py:386: UserWarning: jax.host_count has been renamed to jax.process_count. This alias will eventually be removed; please update your code.
4
+ warnings.warn(
5
+ /home/dat/pino/lib/python3.8/site-packages/jax/lib/xla_bridge.py:373: UserWarning: jax.host_id has been renamed to jax.process_index. This alias will eventually be removed; please update your code.
6
+ warnings.warn(
wandb/run-20210714_225820-1dpoijkp/files/requirements.txt ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ absl-py==0.13.0
2
+ aiohttp==3.7.4.post0
3
+ astunparse==1.6.3
4
+ async-timeout==3.0.1
5
+ attrs==21.2.0
6
+ cachetools==4.2.2
7
+ certifi==2021.5.30
8
+ chardet==4.0.0
9
+ charset-normalizer==2.0.1
10
+ chex==0.0.8
11
+ click==8.0.1
12
+ configparser==5.0.2
13
+ cycler==0.10.0
14
+ datasets==1.9.1.dev0
15
+ dill==0.3.4
16
+ dm-tree==0.1.6
17
+ docker-pycreds==0.4.0
18
+ filelock==3.0.12
19
+ flatbuffers==1.12
20
+ flax==0.3.4
21
+ fsspec==2021.7.0
22
+ gast==0.4.0
23
+ gitdb==4.0.7
24
+ gitpython==3.1.18
25
+ google-auth-oauthlib==0.4.4
26
+ google-auth==1.32.1
27
+ google-pasta==0.2.0
28
+ grpcio==1.34.1
29
+ h5py==3.1.0
30
+ huggingface-hub==0.0.12
31
+ idna==3.2
32
+ install==1.3.4
33
+ jax==0.2.17
34
+ jaxlib==0.1.68
35
+ joblib==1.0.1
36
+ keras-nightly==2.5.0.dev2021032900
37
+ keras-preprocessing==1.1.2
38
+ kiwisolver==1.3.1
39
+ libtpu-nightly==0.1.dev20210615
40
+ markdown==3.3.4
41
+ matplotlib==3.4.2
42
+ msgpack==1.0.2
43
+ multidict==5.1.0
44
+ multiprocess==0.70.12.2
45
+ numpy==1.19.5
46
+ oauthlib==3.1.1
47
+ opt-einsum==3.3.0
48
+ optax==0.0.9
49
+ packaging==21.0
50
+ pandas==1.3.0
51
+ pathtools==0.1.2
52
+ pillow==8.3.1
53
+ pip==20.0.2
54
+ pkg-resources==0.0.0
55
+ promise==2.3
56
+ protobuf==3.17.3
57
+ psutil==5.8.0
58
+ pyarrow==4.0.1
59
+ pyasn1-modules==0.2.8
60
+ pyasn1==0.4.8
61
+ pyparsing==2.4.7
62
+ python-dateutil==2.8.1
63
+ pytz==2021.1
64
+ pyyaml==5.4.1
65
+ regex==2021.7.6
66
+ requests-oauthlib==1.3.0
67
+ requests==2.26.0
68
+ rsa==4.7.2
69
+ sacremoses==0.0.45
70
+ scipy==1.7.0
71
+ sentry-sdk==1.3.0
72
+ setuptools==44.0.0
73
+ shortuuid==1.0.1
74
+ six==1.15.0
75
+ smmap==4.0.0
76
+ subprocess32==3.5.4
77
+ tensorboard-data-server==0.6.1
78
+ tensorboard-plugin-wit==1.8.0
79
+ tensorboard==2.5.0
80
+ tensorflow-estimator==2.5.0
81
+ tensorflow==2.5.0
82
+ termcolor==1.1.0
83
+ tokenizers==0.10.3
84
+ toolz==0.11.1
85
+ tqdm==4.61.2
86
+ transformers==4.9.0.dev0
87
+ typing-extensions==3.7.4.3
88
+ urllib3==1.26.6
89
+ wandb==0.10.33
90
+ werkzeug==2.0.1
91
+ wheel==0.36.2
92
+ wrapt==1.12.1
93
+ xxhash==2.0.2
94
+ yarl==1.6.3
wandb/run-20210714_225820-1dpoijkp/files/wandb-metadata.json ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-5.4.0-1043-gcp-x86_64-with-glibc2.29",
3
+ "python": "3.8.10",
4
+ "heartbeatAt": "2021-07-14T22:58:22.701262",
5
+ "startedAt": "2021-07-14T22:58:20.641335",
6
+ "docker": null,
7
+ "cpu_count": 96,
8
+ "cuda": null,
9
+ "args": [
10
+ "--push_to_hub",
11
+ "--output_dir=./",
12
+ "--model_type=big_bird",
13
+ "--config_name=./",
14
+ "--tokenizer_name=./",
15
+ "--max_seq_length=4096",
16
+ "--weight_decay=0.0095",
17
+ "--warmup_steps=10000",
18
+ "--overwrite_output_dir",
19
+ "--adam_beta1=0.9",
20
+ "--adam_beta2=0.98",
21
+ "--logging_steps=500",
22
+ "--eval_steps=20000",
23
+ "--num_train_epochs=5",
24
+ "--preprocessing_num_workers=96",
25
+ "--save_steps=20000",
26
+ "--learning_rate=5e-5",
27
+ "--per_device_train_batch_size=2",
28
+ "--per_device_eval_batch_size=2",
29
+ "--save_total_limit=5",
30
+ "--max_eval_samples=2000",
31
+ "--overwrite_cache",
32
+ "False",
33
+ "--gradient_accumulation_steps=8"
34
+ ],
35
+ "state": "running",
36
+ "program": "./run_mlm_flax.py",
37
+ "codePath": "run_mlm_flax.py",
38
+ "git": {
39
+ "remote": "https://huggingface.co/flax-community/pino-roberta-base",
40
+ "commit": "de71755314d4aa3ab182072c6f0be3de9798a66b"
41
+ },
42
+ "email": null,
43
+ "root": "/home/dat/pino-roberta-base",
44
+ "host": "t1v-n-f5c06ea1-w-0",
45
+ "username": "dat",
46
+ "executable": "/home/dat/pino/bin/python"
47
+ }
wandb/run-20210714_225820-1dpoijkp/files/wandb-summary.json ADDED
@@ -0,0 +1 @@
 
1
+ {}
wandb/run-20210714_225820-1dpoijkp/logs/debug-internal.log ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2021-07-14 22:58:21,337 INFO MainThread:601574 [internal.py:wandb_internal():88] W&B internal server running at pid: 601574, started at: 2021-07-14 22:58:21.336704
2
+ 2021-07-14 22:58:21,339 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: check_version
3
+ 2021-07-14 22:58:21,339 INFO WriterThread:601574 [datastore.py:open_for_write():80] open: /home/dat/pino-roberta-base/wandb/run-20210714_225820-1dpoijkp/run-1dpoijkp.wandb
4
+ 2021-07-14 22:58:21,340 DEBUG SenderThread:601574 [sender.py:send():179] send: header
5
+ 2021-07-14 22:58:21,340 DEBUG SenderThread:601574 [sender.py:send_request():193] send_request: check_version
6
+ 2021-07-14 22:58:21,377 DEBUG SenderThread:601574 [sender.py:send():179] send: run
7
+ 2021-07-14 22:58:21,602 INFO SenderThread:601574 [dir_watcher.py:__init__():168] watching files in: /home/dat/pino-roberta-base/wandb/run-20210714_225820-1dpoijkp/files
8
+ 2021-07-14 22:58:21,602 INFO SenderThread:601574 [sender.py:_start_run_threads():716] run started: 1dpoijkp with start time 1626303500
9
+ 2021-07-14 22:58:21,602 DEBUG SenderThread:601574 [sender.py:send():179] send: summary
10
+ 2021-07-14 22:58:21,603 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: run_start
11
+ 2021-07-14 22:58:21,603 INFO SenderThread:601574 [sender.py:_save_file():841] saving file wandb-summary.json with policy end
12
+ 2021-07-14 22:58:22,605 INFO Thread-8 :601574 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210714_225820-1dpoijkp/files/wandb-summary.json
13
+ 2021-07-14 22:58:22,701 DEBUG HandlerThread:601574 [meta.py:__init__():39] meta init
14
+ 2021-07-14 22:58:22,701 DEBUG HandlerThread:601574 [meta.py:__init__():53] meta init done
15
+ 2021-07-14 22:58:22,701 DEBUG HandlerThread:601574 [meta.py:probe():210] probe
16
+ 2021-07-14 22:58:22,702 DEBUG HandlerThread:601574 [meta.py:_setup_git():200] setup git
17
+ 2021-07-14 22:58:22,732 DEBUG HandlerThread:601574 [meta.py:_setup_git():207] setup git done
18
+ 2021-07-14 22:58:22,732 DEBUG HandlerThread:601574 [meta.py:_save_pip():57] save pip
19
+ 2021-07-14 22:58:22,732 DEBUG HandlerThread:601574 [meta.py:_save_pip():71] save pip done
20
+ 2021-07-14 22:58:22,733 DEBUG HandlerThread:601574 [meta.py:probe():252] probe done
21
+ 2021-07-14 22:58:22,736 DEBUG SenderThread:601574 [sender.py:send():179] send: files
22
+ 2021-07-14 22:58:22,736 INFO SenderThread:601574 [sender.py:_save_file():841] saving file wandb-metadata.json with policy now
23
+ 2021-07-14 22:58:22,742 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: stop_status
24
+ 2021-07-14 22:58:22,742 DEBUG SenderThread:601574 [sender.py:send_request():193] send_request: stop_status
25
+ 2021-07-14 22:58:22,871 DEBUG SenderThread:601574 [sender.py:send():179] send: config
26
+ 2021-07-14 22:58:22,872 DEBUG SenderThread:601574 [sender.py:send():179] send: config
27
+ 2021-07-14 22:58:22,872 DEBUG SenderThread:601574 [sender.py:send():179] send: config
28
+ 2021-07-14 22:58:23,214 INFO Thread-11 :601574 [upload_job.py:push():137] Uploaded file /tmp/tmpg5fs3m8gwandb/35jlequ6-wandb-metadata.json
29
+ 2021-07-14 22:58:23,603 INFO Thread-8 :601574 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210714_225820-1dpoijkp/files/output.log
30
+ 2021-07-14 22:58:23,604 INFO Thread-8 :601574 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210714_225820-1dpoijkp/files/requirements.txt
31
+ 2021-07-14 22:58:23,604 INFO Thread-8 :601574 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210714_225820-1dpoijkp/files/wandb-metadata.json
32
+ 2021-07-14 22:58:37,609 INFO Thread-8 :601574 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_225820-1dpoijkp/files/output.log
33
+ 2021-07-14 22:58:37,873 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: stop_status
34
+ 2021-07-14 22:58:37,874 DEBUG SenderThread:601574 [sender.py:send_request():193] send_request: stop_status
35
+ 2021-07-14 22:58:50,784 DEBUG SenderThread:601574 [sender.py:send():179] send: stats
36
+ 2021-07-14 22:58:52,614 INFO Thread-8 :601574 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210714_225820-1dpoijkp/files/config.yaml
37
+ 2021-07-14 22:58:53,006 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: stop_status
38
+ 2021-07-14 22:58:53,006 DEBUG SenderThread:601574 [sender.py:send_request():193] send_request: stop_status
39
+ 2021-07-14 22:59:08,141 DEBUG HandlerThread:601574 [handler.py:handle_request():124] handle_request: stop_status
40
+ 2021-07-14 22:59:08,141 DEBUG SenderThread:601574 [sender.py:send_request():193] send_request: stop_status
wandb/run-20210714_225820-1dpoijkp/logs/debug.log ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2021-07-14 22:58:20,643 INFO MainThread:600323 [wandb_setup.py:_flush():69] setting env: {}
2
+ 2021-07-14 22:58:20,643 INFO MainThread:600323 [wandb_setup.py:_flush():69] setting login settings: {}
3
+ 2021-07-14 22:58:20,643 INFO MainThread:600323 [wandb_init.py:_log_setup():337] Logging user logs to /home/dat/pino-roberta-base/wandb/run-20210714_225820-1dpoijkp/logs/debug.log
4
+ 2021-07-14 22:58:20,643 INFO MainThread:600323 [wandb_init.py:_log_setup():338] Logging internal logs to /home/dat/pino-roberta-base/wandb/run-20210714_225820-1dpoijkp/logs/debug-internal.log
5
+ 2021-07-14 22:58:20,643 INFO MainThread:600323 [wandb_init.py:init():370] calling init triggers
6
+ 2021-07-14 22:58:20,643 INFO MainThread:600323 [wandb_init.py:init():375] wandb.init called with sweep_config: {}
7
+ config: {}
8
+ 2021-07-14 22:58:20,643 INFO MainThread:600323 [wandb_init.py:init():419] starting backend
9
+ 2021-07-14 22:58:20,643 INFO MainThread:600323 [backend.py:_multiprocessing_setup():70] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
10
+ 2021-07-14 22:58:20,687 INFO MainThread:600323 [backend.py:ensure_launched():135] starting backend process...
11
+ 2021-07-14 22:58:20,729 INFO MainThread:600323 [backend.py:ensure_launched():139] started backend process with pid: 601574
12
+ 2021-07-14 22:58:20,731 INFO MainThread:600323 [wandb_init.py:init():424] backend started and connected
13
+ 2021-07-14 22:58:20,734 INFO MainThread:600323 [wandb_init.py:init():472] updated telemetry
14
+ 2021-07-14 22:58:20,735 INFO MainThread:600323 [wandb_init.py:init():491] communicating current version
15
+ 2021-07-14 22:58:21,375 INFO MainThread:600323 [wandb_init.py:init():496] got version response
16
+ 2021-07-14 22:58:21,375 INFO MainThread:600323 [wandb_init.py:init():504] communicating run to backend with 30 second timeout
17
+ 2021-07-14 22:58:21,602 INFO MainThread:600323 [wandb_init.py:init():529] starting run threads in backend
18
+ 2021-07-14 22:58:22,739 INFO MainThread:600323 [wandb_run.py:_console_start():1623] atexit reg
19
+ 2021-07-14 22:58:22,740 INFO MainThread:600323 [wandb_run.py:_redirect():1497] redirect: SettingsConsole.REDIRECT
20
+ 2021-07-14 22:58:22,740 INFO MainThread:600323 [wandb_run.py:_redirect():1502] Redirecting console.
21
+ 2021-07-14 22:58:22,742 INFO MainThread:600323 [wandb_run.py:_redirect():1558] Redirects installed.
22
+ 2021-07-14 22:58:22,742 INFO MainThread:600323 [wandb_init.py:init():554] run started, returning control to user process
23
+ 2021-07-14 22:58:22,750 INFO MainThread:600323 [wandb_run.py:_config_callback():872] config_cb None None {'output_dir': './', 'overwrite_output_dir': True, 'do_train': False, 'do_eval': False, 'do_predict': False, 'evaluation_strategy': 'IntervalStrategy.NO', 'prediction_loss_only': False, 'per_device_train_batch_size': 2, 'per_device_eval_batch_size': 2, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 8, 'eval_accumulation_steps': None, 'learning_rate': 5e-05, 'weight_decay': 0.0095, 'adam_beta1': 0.9, 'adam_beta2': 0.98, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 5.0, 'max_steps': -1, 'lr_scheduler_type': 'SchedulerType.LINEAR', 'warmup_ratio': 0.0, 'warmup_steps': 10000, 'log_level': -1, 'log_level_replica': -1, 'log_on_each_node': True, 'logging_dir': './runs/Jul14_22-58-13_t1v-n-f5c06ea1-w-0', 'logging_strategy': 'IntervalStrategy.STEPS', 'logging_first_step': False, 'logging_steps': 500, 'save_strategy': 'IntervalStrategy.STEPS', 'save_steps': 20000, 'save_total_limit': 5, 'save_on_each_node': False, 'no_cuda': False, 'seed': 42, 'fp16': False, 'fp16_opt_level': 'O1', 'fp16_backend': 'auto', 'fp16_full_eval': False, 'local_rank': -1, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 20000, 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': './', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'sharded_ddp': [], 'deepspeed': None, 'label_smoothing_factor': 0.0, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'dataloader_pin_memory': True, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'push_to_hub_model_id': '', 'push_to_hub_organization': None, 'push_to_hub_token': None, 'mp_parameters': ''}
24
+ 2021-07-14 22:58:22,752 INFO MainThread:600323 [wandb_run.py:_config_callback():872] config_cb None None {'model_name_or_path': None, 'model_type': 'big_bird', 'config_name': './', 'tokenizer_name': './', 'cache_dir': None, 'use_fast_tokenizer': True, 'dtype': 'float32', 'save_optimizer': True}
25
+ 2021-07-14 22:58:22,753 INFO MainThread:600323 [wandb_run.py:_config_callback():872] config_cb None None {'dataset_name': None, 'dataset_config_name': None, 'train_ref_file': None, 'validation_ref_file': None, 'overwrite_cache': False, 'validation_split_percentage': 5, 'max_seq_length': 4096, 'preprocessing_num_workers': 96, 'mlm_probability': 0.15, 'pad_to_max_length': False, 'line_by_line': False, 'max_eval_samples': 2000}