File size: 5,859 Bytes
ca395b9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
{
    "async_checkpointing": false,
    "async_eval_ngpus": -1,
    "batch_size": 4,
    "data": "",
    "disable_logging": false,
    "disable_workers_print": false,
    "dtype": "bf16",
    "dump_after_steps": 0,
    "dump_dir": "/fsx-onellm/rpasunuru/SFT/v2.1_textpp_30b_730k_sftv1.4_exp1/v2.1_textpp_30b_730k_sftv1.4_exp1_run000",
    "dump_freq": 400,
    "dump_profile_traces": false,
    "enable_loss_tracker": false,
    "epochs": -1,
    "eval_freq": 100000,
    "exp_id": "",
    "exp_name": "",
    "finetuning_dir": "/fsx-onellm/shared/from_rsc//v2.1_30b_qk_zloss_snorm_Nov_26_3_run000_checkpoint_0730000",
    "fp32_reduce_scatter": "all",
    "gpu_check_level": 3,
    "image_loss_weight": 1.0,
    "image_text_rotation_prob": 0.0,
    "instruct": {
        "no_loss_prompt": true,
        "no_loss_truncated": false,
        "use_eot": true
    },
    "instruct_data": "/fsx-onellm/shared/finetuning/sft_v1.4_data/splits/train/long_caption:2.92,/fsx-onellm/shared/finetuning/sft_v1.4_data/splits/train/vqa:4.59,/fsx-onellm/shared/finetuning/sft_v1.4_data/splits/train/text2image:10.44,/fsx-onellm/shared/finetuning/sft_v1.4_data/splits/train/llama2_rjv6_helpful:43.27,/fsx-onellm/shared/finetuning/sft_v1.4_data/splits/train/code_llama:0.51,/fsx-onellm/shared/finetuning/sft_v1.4_data/splits/train/interleaved_batch1-17:27.45,/fsx-onellm/shared/finetuning/sft_v1.4_data/splits/train/image_dialogue:7.46,/fsx-onellm/shared/finetuning/sft_v1.4_data/splits/train/llama2_rjv6_harmless:0.97,/fsx-onellm/shared/finetuning/sft_v1.4_data/splits/train/cybersec_safety:0.33,/fsx-onellm/shared/finetuning/sft_v1.4_data/splits/train/onellm_multimodal_safety:0.86,/fsx-onellm/shared/finetuning/sft_v1.4_data/splits/train/autosafety:0.51,/fsx-onellm/shared/finetuning/sft_v1.4_data/splits/train/rainbow_safety:0.10,/fsx-onellm/shared/finetuning/sft_v1.4_data/splits/train/genai_safety:0.58",
    "iter_gopher": {
        "buffer_size": 16,
        "max_precompute": 10,
        "n_chars_by_tok": 15,
        "n_seqs_to_concat": 10,
        "num_processes": 1
    },
    "iter_jsonl": {
        "buffer_size": 64,
        "same_data": false
    },
    "iter_multi": {
        "buffer_size": 512,
        "ignore_extra_chunks": true,
        "max_precompute": 20,
        "multiprocess": true
    },
    "iter_type": "multi",
    "keep_checkpoints_every_steps": 400,
    "keep_eval_checkpoints": true,
    "keep_n_last_checkpoints": 2,
    "log_all_steps": false,
    "log_freq": 10,
    "log_updates": true,
    "log_wandb": false,
    "loss_rescaling": false,
    "model": {
        "add_extra_toks": "0",
        "alpha_depth": "disabled",
        "attn_dropout": 0,
        "attn_to_keep": "all",
        "custom_bwd": false,
        "dim": 8192,
        "dropout": 0.05,
        "efficient_attn": "flash",
        "emb_dropout": 0,
        "ffn_dim_multiplier": 1.0,
        "ffn_dropout": 0,
        "full_logging_n_layers": 4,
        "fuse_sequence_parallel": false,
        "init": {
            "coeff_std": null,
            "depth_last": false,
            "fixed_std": null,
            "no_init": false,
            "pos_init_scalar": null,
            "use_depth": "current",
            "use_gaussian": true
        },
        "layer_ckpt": "0::2",
        "linear_residual_dropout": false,
        "loss_parallel": true,
        "max_length": 2048,
        "multiple_of": 256,
        "n_heads": 64,
        "n_kv_heads": 8,
        "n_layers": 48,
        "non_linearity": "swiglu",
        "norm_affine": true,
        "norm_eps": 1e-05,
        "norm_type": "rmsnorm",
        "output_dropout": 0,
        "output_size": -1,
        "pre_norm": true,
        "qk_normalization": true,
        "recompute_attn": true,
        "recompute_fc1_out": true,
        "recompute_fc3_out": true,
        "residual_dropout": 0.0,
        "rope_theta": 10000.0,
        "sequence_parallel": false,
        "swin_norm": true,
        "turn_eos_token": "<eos>",
        "use_rope": true,
        "vocab_size": 65536
    },
    "model_parallel_size": 4,
    "no_final_ckpt": false,
    "num_retrieved_docs": 0,
    "old_mp": -1,
    "old_world_size": -1,
    "optim": {
        "beta1": 0.9,
        "beta2": 0.95,
        "clip": 1.0,
        "cosine_theta": 1.0,
        "cycle_length": 1.0,
        "epsilon": 1e-08,
        "exp_factor": 0.5,
        "lr": 1e-05,
        "lr_min_ratio": 0.1,
        "scheduler": "cosine",
        "use_deprecated_optim": false,
        "warmup": 100,
        "weight_decay": 0.1
    },
    "periodic_gpu_check": true,
    "profile_freq": -1,
    "reshard_after_forward": true,
    "restore_dataloader_position": false,
    "retrieval_prob": 0.0,
    "rlhf": null,
    "root_dump_dir": "",
    "save_optimizer_states": true,
    "seq_len": 4096,
    "slurm": {
        "global_rank": 0,
        "is_slurm_job": true,
        "world_size": 128
    },
    "steps": 1200,
    "tokenizer": "/fsx-onellm/rpasunuru/models/cm3z/cm3v2_7b_placeholder/gpt2-unified-image-sentinel.json",
    "tokenizer_dir": "/fsx/guismay/data/large_experiments/fair_llm/datasets/tokenizers",
    "torch_seed": -1,
    "unlimited_steps": false,
    "use_hf_tokenizer": true,
    "valid": {
        "batch_size": 1,
        "debug": false,
        "majority_voting": 0,
        "n_batches": 100,
        "onellm_eval": false,
        "onellm_eval_media_storage": "",
        "ppl_files_str": "",
        "prompt_path": "",
        "prompt_templates": "{}",
        "random_fewshots": false,
        "seq_len": 4096,
        "tasks_root_dir": "",
        "tasks_str": "",
        "temperature": 1.0,
        "top_k": 0,
        "top_p": 0.0,
        "use_sampling": false,
        "write_eval": false
    },
    "wandb_entity": "violet-zct",
    "wandb_project": "instruct_sft",
    "water_marking_codes_str": null,
    "z_loss_weight": 0.0001
}