|
bnb_cfgs: |
|
bnb_4bit_compute_dtype: float16 |
|
bnb_4bit_quant_type: nf4 |
|
bnb_4bit_use_double_quant: true |
|
load_in_4bit: true |
|
load_in_8bit: false |
|
use_bnb: false |
|
data_cfgs: |
|
eval_data_files: null |
|
eval_datasets: null |
|
eval_optional_args: [] |
|
eval_size: null |
|
eval_split: null |
|
eval_subset: null |
|
eval_template: null |
|
ptx_data_files: flux_ptx_8k_t2i.pt |
|
ptx_datasets: /data/align-anything/hantao/align-anything/projects/text_image_to_text_image/outputs |
|
ptx_optional_args: [] |
|
ptx_size: null |
|
ptx_split: null |
|
ptx_subset: null |
|
ptx_template: Chameleon_preference |
|
train_data_files: t2i_llf_prompt_only_tokenize.pt |
|
train_datasets: /data/align-anything/hantao/align-anything/projects/text_image_to_text_image/outputs |
|
train_optional_args: [] |
|
train_size: 5000 |
|
train_split: null |
|
train_subset: null |
|
train_template: spavl_ti2ti |
|
logger_cfgs: |
|
cache_dir: null |
|
log_project: align-anything |
|
log_run_name: ppo |
|
log_type: wandb |
|
output_dir: ../outputs/ppo_t2i_llf_1018 |
|
save_interval: 30.0 |
|
lora_cfgs: |
|
inference_mode: false |
|
lora_alpha: 16 |
|
lora_dropout: 0.1 |
|
r: 16 |
|
save_full_model: true |
|
target_modules: |
|
- q_proj |
|
- v_proj |
|
task_type: TaskType.CAUSAL_LM |
|
use_lora: false |
|
model_cfgs: |
|
actor_model_name_or_path: /data/align-anything/hantao/models/0830_4k_sft_flux |
|
model_max_length: 2048 |
|
repetition_penalty: 1.0 |
|
reward_critic_model_name_or_path: /data/align-anything/hantao/align-anything/outputs/rm_t2i_llf_1017 |
|
reward_model_name_or_path: /data/align-anything/hantao/align-anything/outputs/rm_t2i_llf_1017 |
|
temperature: 1.0 |
|
top_p: 1.0 |
|
trust_remote_code: true |
|
special_tokens: null |
|
train_cfgs: |
|
actor_gradient_checkpointing: true |
|
actor_lr: 1.0e-05 |
|
actor_lr_scheduler_type: cosine |
|
actor_lr_warmup_ratio: 0.03 |
|
actor_weight_decay: 0.01 |
|
adam_betas: |
|
- 0.9 |
|
- 0.95 |
|
bf16: true |
|
clip_range_ratio: 0.2 |
|
clip_range_score: 50.0 |
|
clip_range_value: 5.0 |
|
critic_gradient_checkpointing: true |
|
critic_lr: 5.0e-06 |
|
critic_lr_scheduler_type: constant |
|
critic_lr_warmup_ratio: 0.03 |
|
critic_weight_decay: 0.0 |
|
ds_cfgs: ds_z3_config.json |
|
epochs: 3 |
|
eval_interval: 10 |
|
eval_strategy: epoch |
|
fp16: false |
|
freeze_language_model: true |
|
freeze_mm_proj: true |
|
freeze_vision_tower: false |
|
gae_lambda: 0.95 |
|
gamma: 1.0 |
|
gradient_accumulation_steps: 2 |
|
kl_coeff: 0.02 |
|
normalize_reward: false |
|
per_device_eval_batch_size: 8 |
|
per_device_prompt_batch_size: 8 |
|
per_device_train_batch_size: 8 |
|
ptx_coeff: 16.0 |
|
seed: 42 |
|
update_iters: 1 |
|
|