boris commited on
Commit
dbe8c41
1 Parent(s): 8bb2236

feat: update default parameters

Browse files
seq2seq/run_seq2seq_flax.py CHANGED
@@ -219,7 +219,7 @@ class DataTrainingArguments:
219
  default=False, metadata={"help": "Overwrite the cached training and evaluation sets"}
220
  )
221
  log_interval: Optional[int] = field(
222
- default=5,
223
  metadata={
224
  "help": "For debugging purposes or quicker training, truncate the number of training examples to this "
225
  "value if set."
 
219
  default=False, metadata={"help": "Overwrite the cached training and evaluation sets"}
220
  )
221
  log_interval: Optional[int] = field(
222
+ default=40,
223
  metadata={
224
  "help": "For debugging purposes or quicker training, truncate the number of training examples to this "
225
  "value if set."
seq2seq/sweep.yaml CHANGED
@@ -9,12 +9,13 @@ parameters:
9
  learning_rate:
10
  distribution: log_uniform
11
  # from exp(min) to exp(max), ie 1e-4 to 5e-3 on log scale
12
- min: -9.2
13
  max: -5.3
14
  gradient_accumulation_steps:
15
  value: 8
16
  warmup_steps:
17
- value: 1000
 
18
  command:
19
  - python3
20
  - ${program}
@@ -29,7 +30,7 @@ command:
29
  - "--num_train_epochs"
30
  - 1
31
  - "--max_train_samples"
32
- - 2000000
33
  - "--per_device_train_batch_size"
34
  - 56
35
  - "--per_device_eval_batch_size"
 
9
  learning_rate:
10
  distribution: log_uniform
11
  # from exp(min) to exp(max), ie 1e-4 to 5e-3 on log scale
12
+ min: -9.9
13
  max: -5.3
14
  gradient_accumulation_steps:
15
  value: 8
16
  warmup_steps:
17
+ # in term of optimization steps so multiplied by gradient accumulation
18
+ value: 125
19
  command:
20
  - python3
21
  - ${program}
 
30
  - "--num_train_epochs"
31
  - 1
32
  - "--max_train_samples"
33
+ - 1500000
34
  - "--per_device_train_batch_size"
35
  - 56
36
  - "--per_device_eval_batch_size"