boris commited on
Commit
de21250
1 Parent(s): caf7f44

feat(sweep): update config

Browse files
Files changed (1) hide show
  1. tools/train/sweep.yaml +34 -23
tools/train/sweep.yaml CHANGED
@@ -1,6 +1,6 @@
1
- program: run_seq2seq_flax.py
2
- entity: wandb
3
- project: hf-flax-dalle-mini
4
  method: random
5
  metric:
6
  name: eval/loss
@@ -8,36 +8,47 @@ metric:
8
  parameters:
9
  learning_rate:
10
  distribution: log_uniform
11
- # from exp(min) to exp(max), ie 5e-5 to 5e-3 on log scale
12
- min: -9.9
13
- max: -5.3
14
  gradient_accumulation_steps:
15
  value: 8
16
  warmup_steps:
17
- # in term of optimization steps so multiplied by gradient accumulation
18
- value: 125
19
  command:
20
  - python3
21
  - ${program}
22
- - "--train_file"
23
- - "/data/CC12M/encoded-small-train.tsv"
24
- - "--validation_file"
25
- - "/data/CC12M/encoded-small-valid.tsv"
26
- - "--output_dir"
27
- - "./output_sweep"
28
- - "--overwrite_output_dir"
29
- - "--adafactor"
30
- - "--num_train_epochs"
31
- - 1
32
- - "--max_train_samples"
33
- - 1500000
 
 
34
  - "--per_device_train_batch_size"
35
  - 56
36
  - "--per_device_eval_batch_size"
37
  - 56
38
- - "--preprocessing_num_workers"
39
- - 80
40
- - "--no_decay"
41
  - "--do_train"
42
  - "--do_eval"
 
 
 
 
 
 
 
 
 
 
 
43
  - ${args}
 
1
+ program: train.py
2
+ entity: dalle-mini
3
+ project: dalle-mini
4
  method: random
5
  metric:
6
  name: eval/loss
 
8
  parameters:
9
  learning_rate:
10
  distribution: log_uniform
11
+ # from exp(min) to exp(max)
12
+ min: -6.9
13
+ max: -3.5
14
  gradient_accumulation_steps:
15
  value: 8
16
  warmup_steps:
17
+ value: 4000
18
+ #TODO: outdated command
19
  command:
20
  - python3
21
  - ${program}
22
+ - "--tokenizer_name"
23
+ - "boris/dalle-mini-tokenizer"
24
+ - "--config_name"
25
+ - "facebook/bart-large-cnn"
26
+ - "--dataset_repo_or_path"
27
+ - "boris/gis_vqgan_f16_16384"
28
+ - "--streaming"
29
+ - "--use_auth_token"
30
+ - "--image_vocab_size"
31
+ - 16384
32
+ - "--image_length"
33
+ - 256
34
+ - "--normalize_text"
35
+ - True
36
  - "--per_device_train_batch_size"
37
  - 56
38
  - "--per_device_eval_batch_size"
39
  - 56
40
+ - "--adafactor"
 
 
41
  - "--do_train"
42
  - "--do_eval"
43
+ - "--num_train_epochs"
44
+ - 1
45
+ - "--logging_steps"
46
+ - 40
47
+ - "--eval_steps"
48
+ - 800
49
+ - "--output_dir"
50
+ - "./output"
51
+ - "--overwrite_output_dir"
52
+ - "--max_train_samples"
53
+ - 10000000
54
  - ${args}