| { | |
| "dtype": "torch.float16", | |
| "train_dataset": "EYLSFTStaticDataset", | |
| "train_dataset_len": 9223372036854775807, | |
| "test_dataset": "EYLSFTStaticDataset", | |
| "test_dataset_len": 9223372036854775807, | |
| "n_layers": "24", | |
| "n_heads": "16", | |
| "embedding_dim": "1024", | |
| "dropout_rate": "0.2", | |
| "use_bias": "True", | |
| "block_size": "1024", | |
| "vocab_size": "50257", | |
| "model_name": "gpt2-medium/dropout", | |
| "hf_model": "gpt2-medium", | |
| "grad_clip": "1.0", | |
| "exp_name": "experiment_name", | |
| "batch_size": "4", | |
| "lr": "0.0001", | |
| "lora_rank": "0", | |
| "pretrain": "huggingface", | |
| "activation_checkpointing": "False", | |
| "finetune_method": "", | |
| "total_epochs": "1", | |
| "max_steps": "50000", | |
| "actor_weights": "", | |
| "critic_weights": "", | |
| "reward_model_weights": "", | |
| "sft_model_weights": "", | |
| "actor_lr": "5e-06", | |
| "critic_lr": "9e-06", | |
| "kl_beta": "0.02", | |
| "adam_beta1": "0.9", | |
| "adam_beta2": "0.95" | |
| } |