boris commited on
Commit
63249ac
1 Parent(s): a30dbd3

feat: update scriptst

Browse files
Files changed (2) hide show
  1. seq2seq/do_big_run.sh +5 -5
  2. seq2seq/do_small_run.sh +3 -3
seq2seq/do_big_run.sh CHANGED
@@ -1,16 +1,16 @@
1
  python run_seq2seq_flax.py \
2
  --max_source_length 128 \
3
- --train_file /data/CC12M/encoded-small-train.tsv \ # ignored for now in our script
4
- --validation_file /data/CC12M/encoded-small-valid.tsv \ # ignored for now in our script
5
  --output_dir output \
6
  --per_device_train_batch_size 56 \
7
  --per_device_eval_batch_size 56 \
8
  --preprocessing_num_workers 80 \
9
- --warmup_steps 125 \
10
  --gradient_accumulation_steps 8 \
11
  --do_train \
12
  --do_eval \
13
  --adafactor \
14
- --num_train_epochs 10 \
15
  --log_model \
16
- --learning_rate 0.001
 
1
  python run_seq2seq_flax.py \
2
  --max_source_length 128 \
3
+ --train_file /data/CC12M/encoded-small-train.tsv \
4
+ --validation_file /data/CC12M/encoded-small-valid.tsv \
5
  --output_dir output \
6
  --per_device_train_batch_size 56 \
7
  --per_device_eval_batch_size 56 \
8
  --preprocessing_num_workers 80 \
9
+ --warmup_steps 250 \
10
  --gradient_accumulation_steps 8 \
11
  --do_train \
12
  --do_eval \
13
  --adafactor \
14
+ --num_train_epochs 6 \
15
  --log_model \
16
+ --learning_rate 0.005
seq2seq/do_small_run.sh CHANGED
@@ -1,7 +1,7 @@
1
  python run_seq2seq_flax.py \
2
  --max_source_length 128 \
3
- --train_file /data/CC12M/encoded-small-train.tsv \ # ignored for now in our script
4
- --validation_file /data/CC12M/encoded-small-valid.tsv \ # ignored for now in our script
5
  --output_dir output \
6
  --per_device_train_batch_size 56 \
7
  --per_device_eval_batch_size 56 \
@@ -13,4 +13,4 @@ python run_seq2seq_flax.py \
13
  --adafactor \
14
  --num_train_epochs 1 \
15
  --max_train_samples 20000 \
16
- --learning_rate 0.003
 
1
  python run_seq2seq_flax.py \
2
  --max_source_length 128 \
3
+ --train_file /data/CC12M/encoded-small-train.tsv \
4
+ --validation_file /data/CC12M/encoded-small-valid.tsv \
5
  --output_dir output \
6
  --per_device_train_batch_size 56 \
7
  --per_device_eval_batch_size 56 \
 
13
  --adafactor \
14
  --num_train_epochs 1 \
15
  --max_train_samples 20000 \
16
+ --learning_rate 0.005