data_cfgs: eval_data_files: null eval_datasets: null eval_optional_args: [] eval_size: null eval_split: null eval_subset: null eval_template: null train_data_files: 0930_25k_ti2ti_preference.pt train_datasets: /data/projects/aligner/hantao/hantao_local/hantao/local_anything/align-anything/projects/text_image_to_text_image/output/ train_optional_args: [] train_size: null train_split: train train_subset: null train_template: ti2ti_preference logger_cfgs: cache_dir: null log_project: align-anything log_run_name: dpo log_type: wandb output_dir: ../outputs/t2i/dpo_0930_25k_ti2ti_preference_baseline save_interval: 400.0 model_cfgs: model_max_length: 4096 model_name_or_path: /data/projects/aligner/hantao/hantao_local/hantao/models/0917_cham_ti2t_sft trust_remote_code: true special_tokens: null train_cfgs: adam_betas: - 0.9 - 0.95 bf16: true ds_cfgs: ds_z3_config.json epochs: 3.0 eval_interval: 10 eval_strategy: epoch fp16: false freeze_language_model: true freeze_mm_proj: true freeze_vision_tower: false gradient_accumulation_steps: 2.0 gradient_checkpointing: true learning_rate: 5.0e-07 lr_scheduler_type: cosine lr_warmup_ratio: 0.03 per_device_eval_batch_size: 2.0 per_device_train_batch_size: 2.0 regularization: 0.001 scale_coeff: 0.1 seed: 42 weight_decay: 0.01