program: train.py project: dalle-mini method: random metric: name: eval/loss goal: minimize parameters: optim: value: distributed_shampoo learning_rate: distribution: log_uniform # from exp(min) to exp(max) min: -9.2 max: -6.9 tokenizer_name: value: boris/dalle-mini-tokenizer config_name: value: ./config/mini dtype: value: bfloat16 dataset_repo_or_path: value: ./data per_device_train_batch_size: value: 64 per_device_eval_batch_size: value: 64 gradient_accumulation_steps: value: 1 warmup_steps: value: 1000 num_train_epochs: value: 1 max_train_samples: value: 1000000 logging_steps: value: 40 eval_steps: value: 200 command: - python3 - ${program} - "--streaming" - "--output_dir" - "./output" - "--overwrite_output_dir" - "--do_train" - "--do_eval" - ${args}