birgermoell commited on
Commit
741851c
1 Parent(s): ec35d64

Saving weights and logs of step 1001

Browse files
events.out.tfevents.1625519728.t1v-n-98937c84-w-0.273849.3.v2 ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c9f9ea01402a6c19d6301d33c11a94bc174d8d5c42ca78838ae9f3a0d7112c4f
3
+ size 36746
events.out.tfevents.1625538240.t1v-n-98937c84-w-0.291128.3.v2 ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6119abea838ae418cf6aea5b9d383bbff047bc03a0ea3720bee5330a6dc1a1bc
3
+ size 147343
flax_model.msgpack CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:424306ea98f9a679c5842bc2691d3534e391dfb03354174121d688ad94dc9a65
3
  size 498796983
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a80a06921f90d26504ed4e0758408585ec09f23b64374dfc4c257e5d766af797
3
  size 498796983
mc4script.sh ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ./run_mlm_flax_stream.py \
2
+ --output_dir="${MODEL_DIR}" \
3
+ --model_type="roberta" \
4
+ --config_name="${MODEL_DIR}" \
5
+ --tokenizer_name="${MODEL_DIR}" \
6
+ --dataset_name="mc4" \
7
+ --dataset_config_name="sv" \
8
+ --max_seq_length="128" \
9
+ --per_device_train_batch_size="128" \
10
+ --per_device_eval_batch_size="128" \
11
+ --learning_rate="3e-4" \
12
+ --warmup_steps="1000" \
13
+ --overwrite_output_dir \
14
+ --adam_beta1="0.9" \
15
+ --adam_beta2="0.98" \
16
+ --num_train_steps="10000" \
17
+ --num_eval_samples="5000" \
18
+ --logging_steps="250" \
19
+ --eval_steps="1000" \
20
+ --push_to_hub
oscar_script.sh ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ./run_mlm_flax_stream.py \
2
+ --output_dir="${MODEL_DIR}" \
3
+ --model_type="roberta" \
4
+ --config_name="${MODEL_DIR}" \
5
+ --tokenizer_name="${MODEL_DIR}" \
6
+ --dataset_name="oscar" \
7
+ --dataset_config_name="unshuffled_deduplicated_sv" \
8
+ --max_seq_length="128" \
9
+ --per_device_train_batch_size="128" \
10
+ --per_device_eval_batch_size="128" \
11
+ --learning_rate="3e-4" \
12
+ --warmup_steps="1000" \
13
+ --overwrite_output_dir \
14
+ --adam_beta1="0.9" \
15
+ --adam_beta2="0.98" \
16
+ --num_train_steps="10000" \
17
+ --num_eval_samples="5000" \
18
+ --logging_steps="250" \
19
+ --eval_steps="1000" \
20
+ --push_to_hub
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff