pere commited on
Commit
d9de13a
1 Parent(s): 4dd8549

Saving weights and logs of step 2500

Browse files
events.out.tfevents.1631271978.t1v-n-d4d6e0cd-w-0.40355.0.v2 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ebdf2e01eb26c6600a5107d32f763353b0a79d7a3a08c3ab08e591af42ec8531
3
- size 10752297
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d07c587c9b4cadb323b4f6475cecfeb801baee195091d11f12d6098e3006aff3
3
+ size 10826867
events.out.tfevents.1631342182.t1v-n-d4d6e0cd-w-0.113370.0.v2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff8f9e2af2235985b80b7089527b58de5163da05c658525e8d456081e301976e
3
+ size 40
events.out.tfevents.1631344615.t1v-n-d4d6e0cd-w-0.3764.0.v2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b1b79a991f6d34452ce746fa2d94ed14f45212826894b0378c4c5988f335928b
3
+ size 40
events.out.tfevents.1631345739.t1v-n-d4d6e0cd-w-0.5509.0.v2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c0987808e42cb735cf737b686f88f56b8c1bd9d42e75a574b00103ab4b15011b
3
+ size 367912
flax_model.msgpack CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f22233ccf4a61962d4a8e18c37e86bb8dacf6e687ba8cf085635bbdf310e69d8
3
  size 1100762015
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f1ecbffd7b0c9c000e97760acd00710cdc817bbbb4328e36499aab3087bb4366
3
  size 1100762015
run.sh CHANGED
@@ -7,8 +7,8 @@
7
  --validation_file /mnt/disks/flaxdisk/NCC_nb_nn_balanced/nb_nn_balanced_shuffled_100k_validation.json \
8
  --max_seq_length="512" \
9
  --weight_decay="0.01" \
10
- --per_device_train_batch_size="32" \
11
- --per_device_eval_batch_size="32" \
12
  --learning_rate="8e-3" \
13
  --warmup_steps="2000" \
14
  --overwrite_output_dir \
 
7
  --validation_file /mnt/disks/flaxdisk/NCC_nb_nn_balanced/nb_nn_balanced_shuffled_100k_validation.json \
8
  --max_seq_length="512" \
9
  --weight_decay="0.01" \
10
+ --per_device_train_batch_size="16" \
11
+ --per_device_eval_batch_size="16" \
12
  --learning_rate="8e-3" \
13
  --warmup_steps="2000" \
14
  --overwrite_output_dir \
run_recover_1e.sh ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ./run_t5_mlm_flax.py \
2
+ --output_dir="./" \
3
+ --model_type="t5" \
4
+ --model_name_or_path="." \
5
+ --config_name="./" \
6
+ --tokenizer_name="./" \
7
+ --train_file /mnt/disks/flaxdisk/NCC_nb_nn_balanced/nb_nn_balanced_shuffled_100k_train.json \
8
+ --validation_file /mnt/disks/flaxdisk/NCC_nb_nn_balanced/nb_nn_balanced_shuffled_100k_validation.json \
9
+ --max_seq_length="512" \
10
+ --weight_decay="0.01" \
11
+ --per_device_train_batch_size="32" \
12
+ --per_device_eval_batch_size="32" \
13
+ --learning_rate="0.006964816711843014" \
14
+ --warmup_steps="0" \
15
+ --overwrite_output_dir \
16
+ --cache_dir /mnt/disks/flaxdisk/cache/ \
17
+ --num_train_epochs="14" \
18
+ --adam_beta1="0.9" \
19
+ --adam_beta2="0.98" \
20
+ --logging_steps="500" \
21
+ --save_steps="2500" \
22
+ --eval_steps="2500" \
23
+ --preprocessing_num_workers 96 \
24
+ --adafactor \
25
+ --push_to_hub
26
+