checkpointing_steps: null clip_grad_norm: -1 config_name: null dataset_config_name: null dataset_name: null gradient_accumulation_steps: 16 gradient_checkpointing: false learning_rate: 2.0e-05 logging_steps: 1 lora_alpha: 16 lora_dropout: 0.1 lora_rank: 64 low_cpu_mem_usage: false lr_scheduler_type: linear max_seq_length: 1024 max_train_steps: 868 model_name_or_path: /net/nfs/mosaic/day/llama_hf/llama-2-13b num_train_epochs: 2 output_dir: /net/nfs/mosaic/day/uniagent/train/output/lumos_unified_ground_iterative-13B_new/ overwrite_cache: false per_device_train_batch_size: 2 preprocessing_num_workers: 16 report_to: tensorboard resume_from_checkpoint: null seed: null tokenizer_name: /net/nfs/mosaic/day/llama_hf/llama-2-13b train_file: data/train/unified/train_annots/lumos_unified_ground_iterative.jsonl use_8bit_optimizer: false use_flash_attn: true use_lora: false use_qlora: false use_slow_tokenizer: true warmup_ratio: 0.03 weight_decay: 0.0 with_tracking: true