dalle-mini / seq2seq /sweep.yaml
boris's picture
feat: add sweep for parameter search
dad6d93
raw history blame
No virus
715 Bytes
program: run_seq2seq_flax.py
entity: wandb
project: hf-flax-dalle-mini
method: random
metric:
name: eval/loss
goal: minimize
parameters:
learning_rate:
distribution: log_uniform
# from exp(min) to exp(max), ie 1e-5 to 1e-3 on log scale
min: -11.5
max: -6.9
gradient_accumulation_steps:
value: 8
warmup_steps:
value: 1000
command:
- python3
- ${program}
- "--output_dir"
- "./output_sweep"
- "--overwrite_output_dir"
- "--adafactor"
- "--num_train_epochs"
- 1
- "--max_train_samples"
- 1000
- "--per_device_train_batch_size"
- 32
- "--per_device_eval_batch_size"
- 32
- "--preprocessing_num_workers"
- 80
- "--do_train"
- "--do_eval"
- ${args}