{ "adaptive_mixing_args": null, "async_checkpointing": false, "async_eval_ngpus": -1, "batch_size": 2, "checkpoint_manifold_bucket": "genai_llm_fb", "data": "", "delete_manifold_checkpoints": true, "disable_logging": false, "disable_workers_print": false, "dist": { "global_rank": 0, "world_size": 8 }, "do_sync_eval": true, "dtype": "bf16", "dump_dir": "/tmp/metaformers_dmp", "dump_freq": 100, "dump_profile_traces": false, "enable_gil_watcher": false, "enable_loss_tracker": false, "eval": null, "eval_freq": 100, "exp_id": "", "exp_name": "", "finetuning_checkpoint_load_strict": false, "finetuning_dir": "/tmp/metaformers_dmp/checkpoints/stable/llama_cinnamon_7b", "fp32_reduce_scatter": true, "gpu_check_level": 3, "hive_data": null, "instruct": { "is_instruct_tuning": true, "no_loss_prompt": true, "no_loss_truncated": false, "only_sft_last_response": false, "smart_coalesce": false, "space_around_response": false, "wrap_seq_tokens_once": false }, "instruct_data": "/tmp/metaformers_dmp/data/sft/anthropic_prompts_open_13K_desc_1122_redist_6cat:0.25,/tmp/metaformers_dmp/data/sft/anthropic_prompts_open_13K_no_desc_1122_redist_6cat:0.25,/tmp/metaformers_dmp/data/sft/anthropic_responses_open_13K_desc_1122_redist_6cat:0.25,/tmp/metaformers_dmp/data/sft/anthropic_responses_open_13K_no_desc_1122_redist_6cat:0.25", "iter_batch_multi_hive_koski": null, "iter_jsonl": { "buffer_size": 64, "same_data": false }, "iter_multi": { "buffer_size": 512, "ignore_extra_chunks": true, "max_precompute": 20, "multiprocess": true }, "iter_type": "multi", "kd_args": { "kd_logits": false, "kd_model": null, "kd_model_dir": "", "reverse_kld_loss": false }, "keep_eval_checkpoints": true, "keep_n_last_checkpoints": 2, "log_all_steps": false, "log_freq": 1, "log_updates": true, "loss_rescaling": false, "manifold_output_dir": "tree/checkpoints/mast/inan/2023-11-27/080608_VAx9Hcb0THuGhWcZP4I6OA", "mixing_ratio": null, "model": { "alpha_depth": "disabled", "custom_bwd": true, "dim": 4096, "dim_by_layer": "", "dropout": 0, "efficient_attn": "cutlass", "ffn_dim": 512, "ffn_dim_multiplier": 1.0, "full_logging_n_layers": 4, "head_prune": false, "init": { "coeff_std": null, "depth_last": false, "fixed_std": null, "no_init": false, "use_depth": "current", "use_gaussian": true }, "init_on_meta_device": false, "layer_ckpt": "none", "loss_parallel": false, "max_length": 4096, "multiple_of": 256, "n_heads": 32, "n_heads_by_layer": "", "n_kv_heads": null, "n_kv_heads_by_layer": "", "n_layers": 32, "non_linearity": "swiglu", "norm_affine": true, "norm_eps": 1e-05, "norm_type": "rmsnorm", "output_size": -1, "parallel_impl": "FAIRSCALE", "position_interpolation": 1.0, "pre_norm": true, "recompute_attn": true, "recompute_fc1_out": true, "recompute_fc3_out": true, "rope_theta": 10000.0, "sequence_parallel": false, "use_rope": true, "use_xpos": false, "vocab_size": 32000, "xpos_gamma": 0.8, "xpos_scale_base": 4096, "xpos_theta": 500000.0 }, "model_parallel_size": 1, "no_final_ckpt": false, "optim": { "beta1": 0.9, "beta2": 0.95, "clip": 1.0, "cosine_theta": 1.0, "cycle_length": 1.0, "epsilon": 1e-08, "exp_factor": 0.5, "fused": null, "lr": 2e-06, "lr_min_ratio": 0.1, "scheduler": "cosine", "use_deprecated_optim": false, "use_sgd": false, "warmup": 100, "weight_decay": 0.1 }, "peft_args": null, "periodic_gpu_check": true, "profile_freq": -1, "reshard_after_forward": true, "restore_dataloader_position": false, "rlhf": null, "root_dump_dir": "/tmp/nobody/xldumps", "secondary_hive_data": null, "seq_len": 4096, "snapshot_restore_dir": null, "steps": 3000, "stuck_threshold_sec": 1500, "tb_upload_freq": 50, "tokenizer": "tokenizer_final_32k.minus_inf_ws.model", "tokenizer_dir": "/tmp/metaformers_dmp/tokenizer", "torch_seed": -1, "unlimited_steps": false, "valid": { "batch_size": 8, "content_key": null, "custom_preference_task_table1": "", "custom_preference_task_table2": "", "debug": false, "hive_data": null, "hive_tasks": [], "hive_tasks_output_hive_data": null, "instruct": { "is_instruct_tuning": true, "no_loss_prompt": true, "no_loss_truncated": false, "only_sft_last_response": false, "smart_coalesce": false, "space_around_response": false, "wrap_seq_tokens_once": false }, "iso_regression_model_path": "", "majority_voting": 0, "n_batches": 100, "n_generations": 1, "ppl_files_str": "", "ppl_root_dir": "", "prompt_path": "", "random_fewshots": false, "repetition_penalty": 1.0, "rlhf_eval": false, "seq_len": 2048, "task_batch_size": 8, "tasks_root_dir": "/tmp/metaformers_dmp/data/eval", "tasks_str": "safetyllama_prompt,safetyllama_response", "temperature": 1.0, "top_k": 0, "top_p": 0.0, "use_llm_inference": true, "use_relative_loss": true, "use_sampling": false, "write_eval": true, "write_every_n_batches": 1 } }