add_bos: false checkpointing_steps: null clip_grad_norm: -1 config_name: null gradient_accumulation_steps: 8 gradient_checkpointing: false learning_rate: 2.0e-05 logging_steps: 1 lora_alpha: 16 lora_dropout: 0.1 lora_rank: 64 low_cpu_mem_usage: false lr_scheduler_type: linear max_seq_length: 2048 max_train_steps: 738 model_name_or_path: ./hf_llama_models/7B num_train_epochs: 2 output_dir: output/gpt4_cp_2_7B/ overwrite_cache: false per_device_train_batch_size: 2 preprocessing_num_workers: 16 reduce_loss: mean report_to: tensorboard resume_from_checkpoint: null seed: null timeout: 1800 train_file: data/gpt4_cp_2.json trust_remote_code: false use_8bit_optimizer: false use_flash_attn: true use_lora: false use_qlora: false use_slow_tokenizer: true warmup_ratio: 0.03 weight_decay: 0.0 with_tracking: true