w11wo commited on
Commit
c5d8680
1 Parent(s): 85074c3

Saving weights and logs of epoch 1

Browse files
events.out.tfevents.1625848981.t1v-n-b95d739e-w-0.195447.3.v2 → events.out.tfevents.1625926631.t1v-n-b95d739e-w-0.271163.3.v2 RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4a0b06bb4c0c8bcf1babf235c79c7b2fcfbd0a8b3874f799b016c5882bc8126e
3
- size 6822986
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:058091f2aa628751ffce3a9b123432d04c86420a4355fec8763decfd8a17d9be
3
+ size 3581817
flax_model.msgpack CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5ad0a0dab0d811ae79a350baae4a566468756c89b37c7847013a7d71b4d9515a
3
  size 711588089
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8678c0f46f5db25de579fa7beaba06e1c3b2cf73dfa0635f07e5822b760dce16
3
  size 711588089
nohup.out CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7c6e9b48135ecd060b322a351bcf8d088d66545864cff1ce1225776af641215c
3
- size 6077478
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:355b4eaf707e807efbb0fc5c51bfc2a4a766c6e846b5f6c6e8484f7016d03a01
3
+ size 2037187
run.sh CHANGED
@@ -8,17 +8,13 @@ python3 run_mlm_flax.py \
8
  --dataset_name="oscar" \
9
  --dataset_config_name="unshuffled_deduplicated_id" \
10
  --max_seq_length="128" \
11
- --weight_decay="0.01" \
12
  --preprocessing_num_workers="64" \
13
  --per_device_train_batch_size="128" \
14
  --per_device_eval_batch_size="128" \
15
- --learning_rate="2e-5" \
16
  --warmup_steps="1000" \
17
  --overwrite_output_dir \
18
  --num_train_epochs="10" \
19
- --logging_steps="500" \
20
- --save_steps="10000" \
21
- --eval_steps="10000" \
22
  --dtype="bfloat16" \
23
  --push_to_hub
24
 
 
8
  --dataset_name="oscar" \
9
  --dataset_config_name="unshuffled_deduplicated_id" \
10
  --max_seq_length="128" \
 
11
  --preprocessing_num_workers="64" \
12
  --per_device_train_batch_size="128" \
13
  --per_device_eval_batch_size="128" \
14
+ --learning_rate="7e-5" \
15
  --warmup_steps="1000" \
16
  --overwrite_output_dir \
17
  --num_train_epochs="10" \
 
 
 
18
  --dtype="bfloat16" \
19
  --push_to_hub
20