|
{ |
|
"adaptive_mixing_args": null, |
|
"async_checkpointing": false, |
|
"async_eval_ngpus": -1, |
|
"batch_size": 2, |
|
"checkpoint_manifold_bucket": "genai_llm_fb", |
|
"data": "", |
|
"delete_manifold_checkpoints": true, |
|
"disable_logging": false, |
|
"disable_workers_print": false, |
|
"dist": { |
|
"global_rank": 0, |
|
"world_size": 8 |
|
}, |
|
"do_sync_eval": true, |
|
"dtype": "bf16", |
|
"dump_dir": "/tmp/metaformers_dmp", |
|
"dump_freq": 100, |
|
"dump_profile_traces": false, |
|
"enable_gil_watcher": false, |
|
"enable_loss_tracker": false, |
|
"eval": null, |
|
"eval_freq": 100, |
|
"exp_id": "", |
|
"exp_name": "", |
|
"finetuning_checkpoint_load_strict": false, |
|
"finetuning_dir": "/tmp/metaformers_dmp/checkpoints/stable/llama_cinnamon_7b", |
|
"fp32_reduce_scatter": true, |
|
"gpu_check_level": 3, |
|
"hive_data": null, |
|
"instruct": { |
|
"is_instruct_tuning": true, |
|
"no_loss_prompt": true, |
|
"no_loss_truncated": false, |
|
"only_sft_last_response": false, |
|
"smart_coalesce": false, |
|
"space_around_response": false, |
|
"wrap_seq_tokens_once": false |
|
}, |
|
"instruct_data": "/tmp/metaformers_dmp/data/sft/anthropic_prompts_open_13K_desc_1122_redist_6cat:0.25,/tmp/metaformers_dmp/data/sft/anthropic_prompts_open_13K_no_desc_1122_redist_6cat:0.25,/tmp/metaformers_dmp/data/sft/anthropic_responses_open_13K_desc_1122_redist_6cat:0.25,/tmp/metaformers_dmp/data/sft/anthropic_responses_open_13K_no_desc_1122_redist_6cat:0.25", |
|
"iter_batch_multi_hive_koski": null, |
|
"iter_jsonl": { |
|
"buffer_size": 64, |
|
"same_data": false |
|
}, |
|
"iter_multi": { |
|
"buffer_size": 512, |
|
"ignore_extra_chunks": true, |
|
"max_precompute": 20, |
|
"multiprocess": true |
|
}, |
|
"iter_type": "multi", |
|
"kd_args": { |
|
"kd_logits": false, |
|
"kd_model": null, |
|
"kd_model_dir": "", |
|
"reverse_kld_loss": false |
|
}, |
|
"keep_eval_checkpoints": true, |
|
"keep_n_last_checkpoints": 2, |
|
"log_all_steps": false, |
|
"log_freq": 1, |
|
"log_updates": true, |
|
"loss_rescaling": false, |
|
"manifold_output_dir": "tree/checkpoints/mast/inan/2023-11-27/080608_VAx9Hcb0THuGhWcZP4I6OA", |
|
"mixing_ratio": null, |
|
"model": { |
|
"alpha_depth": "disabled", |
|
"custom_bwd": true, |
|
"dim": 4096, |
|
"dim_by_layer": "", |
|
"dropout": 0, |
|
"efficient_attn": "cutlass", |
|
"ffn_dim": 512, |
|
"ffn_dim_multiplier": 1.0, |
|
"full_logging_n_layers": 4, |
|
"head_prune": false, |
|
"init": { |
|
"coeff_std": null, |
|
"depth_last": false, |
|
"fixed_std": null, |
|
"no_init": false, |
|
"use_depth": "current", |
|
"use_gaussian": true |
|
}, |
|
"init_on_meta_device": false, |
|
"layer_ckpt": "none", |
|
"loss_parallel": false, |
|
"max_length": 4096, |
|
"multiple_of": 256, |
|
"n_heads": 32, |
|
"n_heads_by_layer": "", |
|
"n_kv_heads": null, |
|
"n_kv_heads_by_layer": "", |
|
"n_layers": 32, |
|
"non_linearity": "swiglu", |
|
"norm_affine": true, |
|
"norm_eps": 1e-05, |
|
"norm_type": "rmsnorm", |
|
"output_size": -1, |
|
"parallel_impl": "FAIRSCALE", |
|
"position_interpolation": 1.0, |
|
"pre_norm": true, |
|
"recompute_attn": true, |
|
"recompute_fc1_out": true, |
|
"recompute_fc3_out": true, |
|
"rope_theta": 10000.0, |
|
"sequence_parallel": false, |
|
"use_rope": true, |
|
"use_xpos": false, |
|
"vocab_size": 32000, |
|
"xpos_gamma": 0.8, |
|
"xpos_scale_base": 4096, |
|
"xpos_theta": 500000.0 |
|
}, |
|
"model_parallel_size": 1, |
|
"no_final_ckpt": false, |
|
"optim": { |
|
"beta1": 0.9, |
|
"beta2": 0.95, |
|
"clip": 1.0, |
|
"cosine_theta": 1.0, |
|
"cycle_length": 1.0, |
|
"epsilon": 1e-08, |
|
"exp_factor": 0.5, |
|
"fused": null, |
|
"lr": 2e-06, |
|
"lr_min_ratio": 0.1, |
|
"scheduler": "cosine", |
|
"use_deprecated_optim": false, |
|
"use_sgd": false, |
|
"warmup": 100, |
|
"weight_decay": 0.1 |
|
}, |
|
"peft_args": null, |
|
"periodic_gpu_check": true, |
|
"profile_freq": -1, |
|
"reshard_after_forward": true, |
|
"restore_dataloader_position": false, |
|
"rlhf": null, |
|
"root_dump_dir": "/tmp/nobody/xldumps", |
|
"secondary_hive_data": null, |
|
"seq_len": 4096, |
|
"snapshot_restore_dir": null, |
|
"steps": 3000, |
|
"stuck_threshold_sec": 1500, |
|
"tb_upload_freq": 50, |
|
"tokenizer": "tokenizer_final_32k.minus_inf_ws.model", |
|
"tokenizer_dir": "/tmp/metaformers_dmp/tokenizer", |
|
"torch_seed": -1, |
|
"unlimited_steps": false, |
|
"valid": { |
|
"batch_size": 8, |
|
"content_key": null, |
|
"custom_preference_task_table1": "", |
|
"custom_preference_task_table2": "", |
|
"debug": false, |
|
"hive_data": null, |
|
"hive_tasks": [], |
|
"hive_tasks_output_hive_data": null, |
|
"instruct": { |
|
"is_instruct_tuning": true, |
|
"no_loss_prompt": true, |
|
"no_loss_truncated": false, |
|
"only_sft_last_response": false, |
|
"smart_coalesce": false, |
|
"space_around_response": false, |
|
"wrap_seq_tokens_once": false |
|
}, |
|
"iso_regression_model_path": "", |
|
"majority_voting": 0, |
|
"n_batches": 100, |
|
"n_generations": 1, |
|
"ppl_files_str": "", |
|
"ppl_root_dir": "", |
|
"prompt_path": "", |
|
"random_fewshots": false, |
|
"repetition_penalty": 1.0, |
|
"rlhf_eval": false, |
|
"seq_len": 2048, |
|
"task_batch_size": 8, |
|
"tasks_root_dir": "/tmp/metaformers_dmp/data/eval", |
|
"tasks_str": "safetyllama_prompt,safetyllama_response", |
|
"temperature": 1.0, |
|
"top_k": 0, |
|
"top_p": 0.0, |
|
"use_llm_inference": true, |
|
"use_relative_loss": true, |
|
"use_sampling": false, |
|
"write_eval": true, |
|
"write_every_n_batches": 1 |
|
} |
|
} |