boris commited on
Commit
5e244d0
1 Parent(s): 3cccb01

feat: split script for small and big runs

Browse files
seq2seq/do_big_run.sh ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ python run_seq2seq_flax.py \
2
+ --max_source_length 128 \
3
+ --train_file /data/CC12M/encoded-small-train.tsv \ # ignored for now in our script
4
+ --validation_file /data/CC12M/encoded-small-valid.tsv \ # ignored for now in our script
5
+ --output_dir output \
6
+ --per_device_train_batch_size 56 \
7
+ --per_device_eval_batch_size 56 \
8
+ --preprocessing_num_workers 80 \
9
+ --warmup_steps 125 \
10
+ --gradient_accumulation_steps 8 \
11
+ --do_train \
12
+ --do_eval \
13
+ --adafactor \
14
+ --num_train_epochs 10 \
15
+ --log_model \
16
+ --learning_rate 0.001
seq2seq/{do_run.sh → do_small_run.sh} RENAMED
@@ -1,7 +1,7 @@
1
  python run_seq2seq_flax.py \
2
  --max_source_length 128 \
3
- --train_file /data/CC12M/encoded-small-train.tsv \
4
- --validation_file /data/CC12M/encoded-small-valid.tsv \
5
  --output_dir output \
6
  --per_device_train_batch_size 56 \
7
  --per_device_eval_batch_size 56 \
 
1
  python run_seq2seq_flax.py \
2
  --max_source_length 128 \
3
+ --train_file /data/CC12M/encoded-small-train.tsv \ # ignored for now in our script
4
+ --validation_file /data/CC12M/encoded-small-valid.tsv \ # ignored for now in our script
5
  --output_dir output \
6
  --per_device_train_batch_size 56 \
7
  --per_device_eval_batch_size 56 \