versae's picture
Saving weights and logs of step 1000
5f161cc
raw
history blame
2.19 kB
wandb_version: 1
_wandb:
desc: null
value:
cli_version: 0.12.9
code_path: code/run_mlm_flax.py
framework: huggingface
huggingface_version: 4.16.0.dev0
is_jupyter_run: false
is_kaggle_kernel: false
python_version: 3.8.10
start_time: 1642687009
t:
1:
- 11
- 12
2:
- 11
- 12
4: 3.8.10
5: 0.12.9
6: 4.16.0.dev0
8:
- 5
adafactor:
desc: null
value: false
adam_beta1:
desc: null
value: 0.9
adam_beta2:
desc: null
value: 0.98
adam_epsilon:
desc: null
value: 1.0e-06
cache_dir:
desc: null
value: null
config_name:
desc: null
value: ./
dataset_config_name:
desc: null
value: null
dataset_name:
desc: null
value: NbAiLab/NCC
do_eval:
desc: null
value: true
do_train:
desc: null
value: true
dtype:
desc: null
value: bfloat16
eval_steps:
desc: null
value: 1000
hub_model_id:
desc: null
value: null
hub_token:
desc: null
value: null
learning_rate:
desc: null
value: 0.00015
line_by_line:
desc: null
value: false
logging_steps:
desc: null
value: 1000
max_seq_length:
desc: null
value: 512
mlm_probability:
desc: null
value: 0.15
model_name_or_path:
desc: null
value: versae/roberta-base-ncc
model_type:
desc: null
value: roberta
num_train_epochs:
desc: null
value: 3.0
output_dir:
desc: null
value: ./
overwrite_cache:
desc: null
value: false
overwrite_output_dir:
desc: null
value: true
pad_to_max_length:
desc: null
value: true
per_device_eval_batch_size:
desc: null
value: 46
per_device_train_batch_size:
desc: null
value: 46
preprocessing_num_workers:
desc: null
value: null
push_to_hub:
desc: null
value: true
save_steps:
desc: null
value: 1000
seed:
desc: null
value: 42
tokenizer_name:
desc: null
value: ./
train_file:
desc: null
value: null
train_ref_file:
desc: null
value: null
use_fast_tokenizer:
desc: null
value: true
validation_file:
desc: null
value: null
validation_ref_file:
desc: null
value: null
validation_split_percentage:
desc: null
value: 5
warmup_steps:
desc: null
value: 1000
weight_decay:
desc: null
value: 0.01