ETE-RL-Checkpoints / config.json
fhyfhy's picture
Upload folder using huggingface_hub
f616fd1 verified
{
"hidden_size": 4096,
"num_head_layers": 1,
"num_heads": 8,
"use_adaln": true,
"use_mask_embeddings": true,
"use_block_embeddings": false,
"learning_rate": 1e-05,
"weight_decay": 0.01,
"num_epochs": 3,
"max_grad_norm": 1.0,
"per_gpu_policy_update_prompt_batch_size": 16,
"group_size": 16,
"per_gpu_sampling_mini_batch_size": 8,
"per_gpu_ref_mini_batch_size": 8,
"per_gpu_training_mini_batch_size": 2,
"update_steps_per_rollouts": 2,
"gradient_accumulation_steps": 32,
"use_cached_hidden_states": true,
"use_gradient_checkpointing": false,
"use_step_probs": true,
"optimize_denoiser": false,
"denoiser_learning_rate": null,
"denoiser_weight_decay": null,
"kl_coef": 0.1,
"clip_range": 0.2,
"advantage_clip_min": null,
"advantage_clip_max": null,
"ratio_cap": null,
"use_ref_kl": true,
"reward_weights": {
"judge": 1.0,
"correctness": 1.0,
"lookahead": 1.0,
"format": 0.5
},
"alpha_values": [
1.0
],
"steps": 256,
"gen_length": 512,
"block_length": 64,
"temperature": 1.0,
"temp_explore": 1.0,
"use_sampling_new": false,
"mixed_precision": true,
"reserve_teacher_gpu": true,
"teacher_device_index": 7,
"log_interval": 1,
"eval_interval": 100,
"save_interval": 1000,
"use_wandb": true,
"wandb_project": "planner-rl-8gpu",
"wandb_run_name": "my-run-8gpu",
"log_text_samples": 4,
"debug_logging": false,
"debug_log_interval": 1,
"output_dir": "/accounts/projects/songmei/hengyuf/hengyu_scratch/hengyuf/ete-checkpoints/RL",
"resume_from_checkpoint": null,
"compute_logprob_tensor": false
}