program: run_seq2seq_flax.py entity: wandb project: hf-flax-dalle-mini method: random metric: name: eval/loss goal: minimize parameters: learning_rate: distribution: log_uniform # from exp(min) to exp(max), ie 1e-5 to 1e-3 on log scale min: -11.5 max: -6.9 gradient_accumulation_steps: value: 8 warmup_steps: value: 1000 command: - python3 - ${program} - "--output_dir" - "./output_sweep" - "--overwrite_output_dir" - "--adafactor" - "--num_train_epochs" - 1 - "--max_train_samples" - 1000 - "--per_device_train_batch_size" - 32 - "--per_device_eval_batch_size" - 32 - "--preprocessing_num_workers" - 80 - "--do_train" - "--do_eval" - ${args}