versae's picture
Saving weights and logs of step 1000
9c3de9e
wandb_version: 1
_wandb:
desc: null
value:
cli_version: 0.12.9
code_path: code/run_mlm_flax.py
framework: huggingface
huggingface_version: 4.16.0.dev0
is_jupyter_run: false
is_kaggle_kernel: false
python_version: 3.8.10
start_time: 1642198533
t:
1:
- 2
- 3
- 11
- 12
2:
- 2
- 3
- 11
- 12
4: 3.8.10
5: 0.12.9
6: 4.16.0.dev0
8:
- 5
adafactor:
desc: null
value: false
adam_beta1:
desc: null
value: 0.9
adam_beta2:
desc: null
value: 0.98
adam_epsilon:
desc: null
value: 1.0e-06
cache_dir:
desc: null
value: null
config_name:
desc: null
value: roberta-base
dataset_config_name:
desc: null
value: null
dataset_name:
desc: null
value: NbAiLab/NCC
do_eval:
desc: null
value: true
do_train:
desc: null
value: true
dtype:
desc: null
value: bfloat16
eval_steps:
desc: null
value: 1000
hub_model_id:
desc: null
value: null
hub_token:
desc: null
value: null
learning_rate:
desc: null
value: 0.0006
line_by_line:
desc: null
value: false
logging_steps:
desc: null
value: 1000
max_seq_length:
desc: null
value: 128
mlm_probability:
desc: null
value: 0.15
model_name_or_path:
desc: null
value: null
model_type:
desc: null
value: roberta
num_train_epochs:
desc: null
value: 3.0
output_dir:
desc: null
value: ./
overwrite_cache:
desc: null
value: false
overwrite_output_dir:
desc: null
value: true
pad_to_max_length:
desc: null
value: true
per_device_eval_batch_size:
desc: null
value: 250
per_device_train_batch_size:
desc: null
value: 250
preprocessing_num_workers:
desc: null
value: null
push_to_hub:
desc: null
value: true
save_steps:
desc: null
value: 1000
seed:
desc: null
value: 42
tokenizer_name:
desc: null
value: NbAiLab/nb-roberta-base
train_file:
desc: null
value: null
train_ref_file:
desc: null
value: null
use_fast_tokenizer:
desc: null
value: true
validation_file:
desc: null
value: null
validation_ref_file:
desc: null
value: null
validation_split_percentage:
desc: null
value: 5
warmup_steps:
desc: null
value: 10000
weight_decay:
desc: null
value: 0.01