pere commited on
Commit
a63ae20
1 Parent(s): ad93372

Saving weights and logs of step 1000

Browse files
config.json CHANGED
@@ -19,7 +19,7 @@
19
  "num_hidden_layers": 12,
20
  "pad_token_id": 1,
21
  "position_embedding_type": "absolute",
22
- "transformers_version": "4.15.0.dev0",
23
  "type_vocab_size": 1,
24
  "use_cache": true,
25
  "vocab_size": 50265
 
19
  "num_hidden_layers": 12,
20
  "pad_token_id": 1,
21
  "position_embedding_type": "absolute",
22
+ "transformers_version": "4.16.0.dev0",
23
  "type_vocab_size": 1,
24
  "use_cache": true,
25
  "vocab_size": 50265
events.out.tfevents.1644205555.t1v-n-79f0077b-w-0.401125.0.v2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e2af1fe635d7f82065c5bf878b782423969184c0166560393084266ea5901947
3
+ size 40
events.out.tfevents.1644212923.t1v-n-79f0077b-w-0.446891.0.v2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8111d41e059aec555485bed7138f0db88eecc16f5892eb6e19b5165841ec58db
3
+ size 40
events.out.tfevents.1644215479.t1v-n-79f0077b-w-0.450647.0.v2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:181bc17df02b41dbf683a685073aeb91c09fed6dec3b8ce74e09577c46de1971
3
+ size 147136
flax_model.msgpack CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4abb41156cf5e1bcf659c487aa968be2612b5af34c29a3e312dedf77fe42746c
3
  size 498796983
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c3043126169385088e6f98e40ad48982f05b396f4317b4be26034416ef2fcdc
3
  size 498796983
run_128_scandinavian.sh CHANGED
@@ -1,3 +1,7 @@
 
 
 
 
1
  python run_mlm_flax.py \
2
  --output_dir="./" \
3
  --model_type="roberta" \
@@ -8,8 +12,8 @@ python run_mlm_flax.py \
8
  --cache_dir="/mnt/disks/flaxdisk/cache/" \
9
  --max_seq_length="128" \
10
  --weight_decay="0.01" \
11
- --per_device_train_batch_size="232" \
12
- --per_device_eval_batch_size="232" \
13
  --pad_to_max_length \
14
  --learning_rate="3e-4" \
15
  --warmup_steps="10000" \
 
1
+ # --per_device_train_batch_size="232" \
2
+ # --per_device_eval_batch_size="232" \
3
+
4
+
5
  python run_mlm_flax.py \
6
  --output_dir="./" \
7
  --model_type="roberta" \
 
12
  --cache_dir="/mnt/disks/flaxdisk/cache/" \
13
  --max_seq_length="128" \
14
  --weight_decay="0.01" \
15
+ --per_device_train_batch_size="116" \
16
+ --per_device_eval_batch_size="116" \
17
  --pad_to_max_length \
18
  --learning_rate="3e-4" \
19
  --warmup_steps="10000" \