aapot commited on
Commit
18fdd91
1 Parent(s): aff65f8

Saving weights and logs of step 10000

Browse files
events.out.tfevents.1630324517.t1v-n-1ae8dadb-w-0.551349.0.v2 ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1203aed8447582f392a2f352627ab346fc40d4eab7f73dce0c7c7662f7336091
3
+ size 40
events.out.tfevents.1630325064.t1v-n-1ae8dadb-w-0.554071.0.v2 ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ea235530adbca24b3e9dc2315a3fa56a6022c7e39fc733baf5a4aafee9fad5dd
3
+ size 1470757
flax_model.msgpack CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f87e2ed3eebcebe5f871845c9acd8a737b4575431f9ac5e651d96a6bd77826e2
3
  size 1421662309
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c54f3c2c1b748cf9a071b21240bd58e11b66c7f53878208fcf8521d6d3346e0a
3
  size 1421662309
run_mlm_flax.py CHANGED
@@ -494,6 +494,14 @@ if __name__ == "__main__":
494
 
495
  # save the tokenized dataset for future runs
496
  if data_args.save_tokenized_dataset_filepath is not None:
 
 
 
 
 
 
 
 
497
  tokenized_datasets.save_to_disk(data_args.save_tokenized_dataset_filepath)
498
 
499
  # Enable tensorboard only on the master node
494
 
495
  # save the tokenized dataset for future runs
496
  if data_args.save_tokenized_dataset_filepath is not None:
497
+ if data_args.dataset_filepath is not None:
498
+ try:
499
+ os.system(f"sudo rm {data_args.dataset_filepath}/train/cache*")
500
+ os.system(f"sudo rm {data_args.dataset_filepath}/validation/cache*")
501
+ os.system(f"sudo rm {data_args.dataset_filepath}/train/tmp*")
502
+ os.system(f"sudo rm {data_args.dataset_filepath}/validation/tmp*")
503
+ except:
504
+ pass
505
  tokenized_datasets.save_to_disk(data_args.save_tokenized_dataset_filepath)
506
 
507
  # Enable tensorboard only on the master node
start_train.sh CHANGED
@@ -6,12 +6,12 @@ python3 run_mlm_flax.py \
6
  --config_name="./" \
7
  --tokenizer_name="./" \
8
  --dataset_filepath="/researchdisk1/data/training_data_full" \
9
- --tokenized_dataset_filepath="/researchdisk1/data/training_data_full_tokenized_128" \
10
- --max_seq_length="128" \
11
  --pad_to_max_length \
12
  --preprocessing_num_workers="96" \
13
- --per_device_train_batch_size="32" \
14
- --per_device_eval_batch_size="32" \
15
  --adam_beta1="0.9" \
16
  --adam_beta2="0.98" \
17
  --adam_epsilon="1e-6" \
6
  --config_name="./" \
7
  --tokenizer_name="./" \
8
  --dataset_filepath="/researchdisk1/data/training_data_full" \
9
+ --tokenized_dataset_filepath="/researchdisk1/data/training_data_full_tokenized_512" \
10
+ --max_seq_length="512" \
11
  --pad_to_max_length \
12
  --preprocessing_num_workers="96" \
13
+ --per_device_train_batch_size="16" \
14
+ --per_device_eval_batch_size="16" \
15
  --adam_beta1="0.9" \
16
  --adam_beta2="0.98" \
17
  --adam_epsilon="1e-6" \