program: train.py entity: dalle-mini project: dalle-mini method: random metric: name: eval/loss goal: minimize parameters: learning_rate: distribution: log_uniform # from exp(min) to exp(max) min: -6.9 max: -3.5 tokenizer_name: value: boris/dalle-mini-tokenizer config_name: value: ./config/mini dtype: value: bfloat16 dataset_repo_or_path: value: ./data per_device_train_batch_size: value: 64 per_device_eval_batch_size: value: 64 gradient_accumulation_steps: value: 1 warmup_steps: value: 4000 num_train_epochs: value: 1 logging_steps: value: 32 eval_steps: value: 800 max_train_samples: value: 1000000 command: - python3 - ${program} - "--streaming" - "--output_dir" - "./output" - "--overwrite_output_dir" - "--adafactor" - "--do_train" - "--do_eval" - ${args}