Saving weights and logs of step 10000
Browse files
events.out.tfevents.1637523639.t1v-n-8eba1090-w-0.295438.0.v2 → events.out.tfevents.1637622530.t1v-n-8eba1090-w-0.18483.0.v2
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:46b1ed32532cb8c66a31d50c192133429b153d1fb9e6ce38e84ddc41202ef2d1
|
3 |
+
size 1470757
|
flax_model.msgpack
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1421662309
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:416eebb5d6b9be6fb90af53e09925009426f2e9a53571890b22b61245d85d1a4
|
3 |
size 1421662309
|
run_mlm_flax.py
CHANGED
@@ -508,14 +508,6 @@ if __name__ == "__main__":
|
|
508 |
|
509 |
# save the tokenized dataset for future runs
|
510 |
if data_args.save_tokenized_dataset_filepath is not None:
|
511 |
-
if data_args.dataset_filepath is not None:
|
512 |
-
try:
|
513 |
-
os.system(f"sudo rm {data_args.dataset_filepath}/train/cache*")
|
514 |
-
os.system(f"sudo rm {data_args.dataset_filepath}/validation/cache*")
|
515 |
-
os.system(f"sudo rm {data_args.dataset_filepath}/train/tmp*")
|
516 |
-
os.system(f"sudo rm {data_args.dataset_filepath}/validation/tmp*")
|
517 |
-
except:
|
518 |
-
pass
|
519 |
tokenized_datasets.save_to_disk(data_args.save_tokenized_dataset_filepath)
|
520 |
|
521 |
|
@@ -592,6 +584,7 @@ if __name__ == "__main__":
|
|
592 |
# For more details about the parameters please check https://github.com/deepmind/optax/blob/ed02befef9bf81cbbf236be3d2b0e032e9ed4a40/optax/_src/alias.py#L74
|
593 |
optimizer = optax.adafactor(
|
594 |
learning_rate=linear_decay_lr_schedule_fn,
|
|
|
595 |
)
|
596 |
else:
|
597 |
optimizer = optax.adamw(
|
|
|
508 |
|
509 |
# save the tokenized dataset for future runs
|
510 |
if data_args.save_tokenized_dataset_filepath is not None:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
511 |
tokenized_datasets.save_to_disk(data_args.save_tokenized_dataset_filepath)
|
512 |
|
513 |
|
|
|
584 |
# For more details about the parameters please check https://github.com/deepmind/optax/blob/ed02befef9bf81cbbf236be3d2b0e032e9ed4a40/optax/_src/alias.py#L74
|
585 |
optimizer = optax.adafactor(
|
586 |
learning_rate=linear_decay_lr_schedule_fn,
|
587 |
+
weight_decay_rate=training_args.weight_decay,
|
588 |
)
|
589 |
else:
|
590 |
optimizer = optax.adamw(
|
start_train.sh
CHANGED
@@ -1,23 +1,24 @@
|
|
1 |
# set train hyperparams
|
2 |
unset LD_PRELOAD
|
3 |
export HF_DATASETS_CACHE="/researchdisk/datasets_cache"
|
|
|
4 |
python3 run_mlm_flax.py \
|
5 |
--output_dir="./" \
|
6 |
--model_type="roberta" \
|
7 |
--config_name="./" \
|
8 |
--tokenizer_name="./" \
|
9 |
--dataset_filepath="/researchdisk/training_dataset_full" \
|
10 |
-
--save_tokenized_dataset_filepath="/researchdisk/training_dataset_full_tokenized_128" \
|
11 |
--max_seq_length="128" \
|
12 |
--pad_to_max_length \
|
13 |
--preprocessing_num_workers="96" \
|
14 |
--per_device_train_batch_size="64" \
|
15 |
--per_device_eval_batch_size="64" \
|
|
|
16 |
--adam_beta1="0.9" \
|
17 |
--adam_beta2="0.98" \
|
18 |
--adam_epsilon="1e-6" \
|
19 |
--learning_rate="2e-4" \
|
20 |
-
--warmup_steps="
|
21 |
--overwrite_output_dir \
|
22 |
--num_train_epochs="2" \
|
23 |
--save_strategy="steps" \
|
|
|
1 |
# set train hyperparams
|
2 |
unset LD_PRELOAD
|
3 |
export HF_DATASETS_CACHE="/researchdisk/datasets_cache"
|
4 |
+
export USE_TORCH=False
|
5 |
python3 run_mlm_flax.py \
|
6 |
--output_dir="./" \
|
7 |
--model_type="roberta" \
|
8 |
--config_name="./" \
|
9 |
--tokenizer_name="./" \
|
10 |
--dataset_filepath="/researchdisk/training_dataset_full" \
|
|
|
11 |
--max_seq_length="128" \
|
12 |
--pad_to_max_length \
|
13 |
--preprocessing_num_workers="96" \
|
14 |
--per_device_train_batch_size="64" \
|
15 |
--per_device_eval_batch_size="64" \
|
16 |
+
--weight_decay="0.01" \
|
17 |
--adam_beta1="0.9" \
|
18 |
--adam_beta2="0.98" \
|
19 |
--adam_epsilon="1e-6" \
|
20 |
--learning_rate="2e-4" \
|
21 |
+
--warmup_steps="25000" \
|
22 |
--overwrite_output_dir \
|
23 |
--num_train_epochs="2" \
|
24 |
--save_strategy="steps" \
|