pere commited on
Commit
ff54415
1 Parent(s): a8ea73b

Saving weights and logs of step 2500

Browse files
events.out.tfevents.1638828930.t1v-n-d4d6e0cd-w-0.721231.0.v2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5f6dd60bd2a9f5218802933a8ad1e193b0fcfd889464d3d06d9084ad2dbc8ec9
3
+ size 367912
flax_model.msgpack CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fff7bafec4ef805f528b34450ef3a3408cc179716b0089b610c4d29447b474ea
3
  size 498796983
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ceac157b5b4dd77a8a109aa0a7acd9f4b3858da6fe44496eb76bba963c6248a9
3
  size 498796983
run_oscar_adafactor.sh ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ./run_mlm_flax.py \
2
+ --output_dir="./" \
3
+ --model_type="roberta" \
4
+ --config_name="./" \
5
+ --tokenizer_name="./" \
6
+ --dataset_name="oscar" \
7
+ --dataset_config_name="unshuffled_deduplicated_no" \
8
+ --max_seq_length="128" \
9
+ --weight_decay="0.01" \
10
+ --per_device_train_batch_size="128" \
11
+ --per_device_eval_batch_size="128" \
12
+ --learning_rate="3e-4" \
13
+ --warmup_steps="1000" \
14
+ --overwrite_output_dir \
15
+ --num_train_epochs="18" \
16
+ --adam_beta1="0.9" \
17
+ --adam_beta2="0.98" \
18
+ --logging_steps="500" \
19
+ --save_steps="2500" \
20
+ --eval_steps="2500" \
21
+ --adafactor \
22
+ --push_to_hub