{ "async_checkpointing": false, "async_eval_ngpus": -1, "batch_size": 4, "data": "", "disable_logging": false, "disable_workers_print": false, "dtype": "bf16", "dump_after_steps": 0, "dump_dir": "/fsx-onellm/rpasunuru/SFT/v2.1_textpp_30b_730k_sftv1.4_exp1/v2.1_textpp_30b_730k_sftv1.4_exp1_run000", "dump_freq": 400, "dump_profile_traces": false, "enable_loss_tracker": false, "epochs": -1, "eval_freq": 100000, "exp_id": "", "exp_name": "", "finetuning_dir": "/fsx-onellm/shared/from_rsc//v2.1_30b_qk_zloss_snorm_Nov_26_3_run000_checkpoint_0730000", "fp32_reduce_scatter": "all", "gpu_check_level": 3, "image_loss_weight": 1.0, "image_text_rotation_prob": 0.0, "instruct": { "no_loss_prompt": true, "no_loss_truncated": false, "use_eot": true }, "instruct_data": "/fsx-onellm/shared/finetuning/sft_v1.4_data/splits/train/long_caption:2.92,/fsx-onellm/shared/finetuning/sft_v1.4_data/splits/train/vqa:4.59,/fsx-onellm/shared/finetuning/sft_v1.4_data/splits/train/text2image:10.44,/fsx-onellm/shared/finetuning/sft_v1.4_data/splits/train/llama2_rjv6_helpful:43.27,/fsx-onellm/shared/finetuning/sft_v1.4_data/splits/train/code_llama:0.51,/fsx-onellm/shared/finetuning/sft_v1.4_data/splits/train/interleaved_batch1-17:27.45,/fsx-onellm/shared/finetuning/sft_v1.4_data/splits/train/image_dialogue:7.46,/fsx-onellm/shared/finetuning/sft_v1.4_data/splits/train/llama2_rjv6_harmless:0.97,/fsx-onellm/shared/finetuning/sft_v1.4_data/splits/train/cybersec_safety:0.33,/fsx-onellm/shared/finetuning/sft_v1.4_data/splits/train/onellm_multimodal_safety:0.86,/fsx-onellm/shared/finetuning/sft_v1.4_data/splits/train/autosafety:0.51,/fsx-onellm/shared/finetuning/sft_v1.4_data/splits/train/rainbow_safety:0.10,/fsx-onellm/shared/finetuning/sft_v1.4_data/splits/train/genai_safety:0.58", "iter_gopher": { "buffer_size": 16, "max_precompute": 10, "n_chars_by_tok": 15, "n_seqs_to_concat": 10, "num_processes": 1 }, "iter_jsonl": { "buffer_size": 64, "same_data": false }, "iter_multi": { "buffer_size": 512, "ignore_extra_chunks": true, "max_precompute": 20, "multiprocess": true }, "iter_type": "multi", "keep_checkpoints_every_steps": 400, "keep_eval_checkpoints": true, "keep_n_last_checkpoints": 2, "log_all_steps": false, "log_freq": 10, "log_updates": true, "log_wandb": false, "loss_rescaling": false, "model": { "add_extra_toks": "0", "alpha_depth": "disabled", "attn_dropout": 0, "attn_to_keep": "all", "custom_bwd": false, "dim": 8192, "dropout": 0.05, "efficient_attn": "flash", "emb_dropout": 0, "ffn_dim_multiplier": 1.0, "ffn_dropout": 0, "full_logging_n_layers": 4, "fuse_sequence_parallel": false, "init": { "coeff_std": null, "depth_last": false, "fixed_std": null, "no_init": false, "pos_init_scalar": null, "use_depth": "current", "use_gaussian": true }, "layer_ckpt": "0::2", "linear_residual_dropout": false, "loss_parallel": true, "max_length": 2048, "multiple_of": 256, "n_heads": 64, "n_kv_heads": 8, "n_layers": 48, "non_linearity": "swiglu", "norm_affine": true, "norm_eps": 1e-05, "norm_type": "rmsnorm", "output_dropout": 0, "output_size": -1, "pre_norm": true, "qk_normalization": true, "recompute_attn": true, "recompute_fc1_out": true, "recompute_fc3_out": true, "residual_dropout": 0.0, "rope_theta": 10000.0, "sequence_parallel": false, "swin_norm": true, "turn_eos_token": "", "use_rope": true, "vocab_size": 65536 }, "model_parallel_size": 4, "no_final_ckpt": false, "num_retrieved_docs": 0, "old_mp": -1, "old_world_size": -1, "optim": { "beta1": 0.9, "beta2": 0.95, "clip": 1.0, "cosine_theta": 1.0, "cycle_length": 1.0, "epsilon": 1e-08, "exp_factor": 0.5, "lr": 1e-05, "lr_min_ratio": 0.1, "scheduler": "cosine", "use_deprecated_optim": false, "warmup": 100, "weight_decay": 0.1 }, "periodic_gpu_check": true, "profile_freq": -1, "reshard_after_forward": true, "restore_dataloader_position": false, "retrieval_prob": 0.0, "rlhf": null, "root_dump_dir": "", "save_optimizer_states": true, "seq_len": 4096, "slurm": { "global_rank": 0, "is_slurm_job": true, "world_size": 128 }, "steps": 1200, "tokenizer": "/fsx-onellm/rpasunuru/models/cm3z/cm3v2_7b_placeholder/gpt2-unified-image-sentinel.json", "tokenizer_dir": "/fsx/guismay/data/large_experiments/fair_llm/datasets/tokenizers", "torch_seed": -1, "unlimited_steps": false, "use_hf_tokenizer": true, "valid": { "batch_size": 1, "debug": false, "majority_voting": 0, "n_batches": 100, "onellm_eval": false, "onellm_eval_media_storage": "", "ppl_files_str": "", "prompt_path": "", "prompt_templates": "{}", "random_fewshots": false, "seq_len": 4096, "tasks_root_dir": "", "tasks_str": "", "temperature": 1.0, "top_k": 0, "top_p": 0.0, "use_sampling": false, "write_eval": false }, "wandb_entity": "violet-zct", "wandb_project": "instruct_sft", "water_marking_codes_str": null, "z_loss_weight": 0.0001 }