boris commited on
Commit
06f1345
2 Parent(s): ba73e00 dad6d93

Merge pull request #12 from borisdayma/feat-sweeps

Browse files
Files changed (2) hide show
  1. seq2seq/run_seq2seq_flax.py +1 -1
  2. seq2seq/sweep.yaml +37 -0
seq2seq/run_seq2seq_flax.py CHANGED
@@ -152,7 +152,7 @@ class DataTrainingArguments:
152
  metadata={"help": "An optional input predict data file to do prediction on (a text file)."},
153
  )
154
  max_source_length: Optional[int] = field(
155
- default=1024,
156
  metadata={
157
  "help": "The maximum total input sequence length after tokenization. Sequences longer "
158
  "than this will be truncated, sequences shorter will be padded."
 
152
  metadata={"help": "An optional input predict data file to do prediction on (a text file)."},
153
  )
154
  max_source_length: Optional[int] = field(
155
+ default=128,
156
  metadata={
157
  "help": "The maximum total input sequence length after tokenization. Sequences longer "
158
  "than this will be truncated, sequences shorter will be padded."
seq2seq/sweep.yaml ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ program: run_seq2seq_flax.py
2
+ entity: wandb
3
+ project: hf-flax-dalle-mini
4
+ method: random
5
+ metric:
6
+ name: eval/loss
7
+ goal: minimize
8
+ parameters:
9
+ learning_rate:
10
+ distribution: log_uniform
11
+ # from exp(min) to exp(max), ie 1e-5 to 1e-3 on log scale
12
+ min: -11.5
13
+ max: -6.9
14
+ gradient_accumulation_steps:
15
+ value: 8
16
+ warmup_steps:
17
+ value: 1000
18
+ command:
19
+ - python3
20
+ - ${program}
21
+ - "--output_dir"
22
+ - "./output_sweep"
23
+ - "--overwrite_output_dir"
24
+ - "--adafactor"
25
+ - "--num_train_epochs"
26
+ - 1
27
+ - "--max_train_samples"
28
+ - 1000
29
+ - "--per_device_train_batch_size"
30
+ - 32
31
+ - "--per_device_eval_batch_size"
32
+ - 32
33
+ - "--preprocessing_num_workers"
34
+ - 80
35
+ - "--do_train"
36
+ - "--do_eval"
37
+ - ${args}