Spaces:
Running
Running
feat: update scripts
Browse files- dev/seq2seq/do_big_run.sh +4 -3
- dev/seq2seq/do_small_run.sh +5 -5
dev/seq2seq/do_big_run.sh
CHANGED
@@ -2,15 +2,16 @@ python run_seq2seq_flax.py \
|
|
2 |
--dataset_repo_or_path dalle-mini/encoded \
|
3 |
--train_file **/train/*/*.jsonl \
|
4 |
--validation_file **/valid/*/*.jsonl \
|
5 |
-
--len_train
|
6 |
-
--len_eval
|
|
|
7 |
--streaming \
|
8 |
--normalize_text \
|
9 |
--output_dir output \
|
10 |
--per_device_train_batch_size 56 \
|
11 |
--per_device_eval_batch_size 56 \
|
12 |
--preprocessing_num_workers 80 \
|
13 |
-
--warmup_steps
|
14 |
--gradient_accumulation_steps 8 \
|
15 |
--do_train \
|
16 |
--do_eval \
|
|
|
2 |
--dataset_repo_or_path dalle-mini/encoded \
|
3 |
--train_file **/train/*/*.jsonl \
|
4 |
--validation_file **/valid/*/*.jsonl \
|
5 |
+
--len_train 129847128 \
|
6 |
+
--len_eval 157312 \
|
7 |
+
--eval_steps 1000 \
|
8 |
--streaming \
|
9 |
--normalize_text \
|
10 |
--output_dir output \
|
11 |
--per_device_train_batch_size 56 \
|
12 |
--per_device_eval_batch_size 56 \
|
13 |
--preprocessing_num_workers 80 \
|
14 |
+
--warmup_steps 5000 \
|
15 |
--gradient_accumulation_steps 8 \
|
16 |
--do_train \
|
17 |
--do_eval \
|
dev/seq2seq/do_small_run.sh
CHANGED
@@ -1,13 +1,13 @@
|
|
1 |
python run_seq2seq_flax.py \
|
2 |
--dataset_repo_or_path dalle-mini/encoded \
|
3 |
-
--train_file **/train
|
4 |
--validation_file **/valid/*/*.jsonl \
|
5 |
-
--len_train
|
6 |
-
--len_eval
|
7 |
--streaming \
|
8 |
--output_dir output \
|
9 |
-
--per_device_train_batch_size
|
10 |
-
--per_device_eval_batch_size
|
11 |
--preprocessing_num_workers 80 \
|
12 |
--warmup_steps 125 \
|
13 |
--gradient_accumulation_steps 8 \
|
|
|
1 |
python run_seq2seq_flax.py \
|
2 |
--dataset_repo_or_path dalle-mini/encoded \
|
3 |
+
--train_file **/train/CC3M/*.jsonl \
|
4 |
--validation_file **/valid/*/*.jsonl \
|
5 |
+
--len_train 129847128 \
|
6 |
+
--len_eval 157312 \
|
7 |
--streaming \
|
8 |
--output_dir output \
|
9 |
+
--per_device_train_batch_size 16 \
|
10 |
+
--per_device_eval_batch_size 16 \
|
11 |
--preprocessing_num_workers 80 \
|
12 |
--warmup_steps 125 \
|
13 |
--gradient_accumulation_steps 8 \
|