File size: 6,032 Bytes
ecfa6a0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
{
    "adaptive_mixing_args": null,
    "async_checkpointing": false,
    "async_eval_ngpus": -1,
    "batch_size": 2,
    "checkpoint_manifold_bucket": "genai_llm_fb",
    "data": "",
    "delete_manifold_checkpoints": true,
    "disable_logging": false,
    "disable_workers_print": false,
    "dist": {
        "global_rank": 0,
        "world_size": 8
    },
    "do_sync_eval": true,
    "dtype": "bf16",
    "dump_dir": "/tmp/metaformers_dmp",
    "dump_freq": 100,
    "dump_profile_traces": false,
    "enable_gil_watcher": false,
    "enable_loss_tracker": false,
    "eval": null,
    "eval_freq": 100,
    "exp_id": "",
    "exp_name": "",
    "finetuning_checkpoint_load_strict": false,
    "finetuning_dir": "/tmp/metaformers_dmp/checkpoints/stable/llama_cinnamon_7b",
    "fp32_reduce_scatter": true,
    "gpu_check_level": 3,
    "hive_data": null,
    "instruct": {
        "is_instruct_tuning": true,
        "no_loss_prompt": true,
        "no_loss_truncated": false,
        "only_sft_last_response": false,
        "smart_coalesce": false,
        "space_around_response": false,
        "wrap_seq_tokens_once": false
    },
    "instruct_data": "/tmp/metaformers_dmp/data/sft/anthropic_prompts_open_13K_desc_1122_redist_6cat:0.25,/tmp/metaformers_dmp/data/sft/anthropic_prompts_open_13K_no_desc_1122_redist_6cat:0.25,/tmp/metaformers_dmp/data/sft/anthropic_responses_open_13K_desc_1122_redist_6cat:0.25,/tmp/metaformers_dmp/data/sft/anthropic_responses_open_13K_no_desc_1122_redist_6cat:0.25",
    "iter_batch_multi_hive_koski": null,
    "iter_jsonl": {
        "buffer_size": 64,
        "same_data": false
    },
    "iter_multi": {
        "buffer_size": 512,
        "ignore_extra_chunks": true,
        "max_precompute": 20,
        "multiprocess": true
    },
    "iter_type": "multi",
    "kd_args": {
        "kd_logits": false,
        "kd_model": null,
        "kd_model_dir": "",
        "reverse_kld_loss": false
    },
    "keep_eval_checkpoints": true,
    "keep_n_last_checkpoints": 2,
    "log_all_steps": false,
    "log_freq": 1,
    "log_updates": true,
    "loss_rescaling": false,
    "manifold_output_dir": "tree/checkpoints/mast/inan/2023-11-27/080608_VAx9Hcb0THuGhWcZP4I6OA",
    "mixing_ratio": null,
    "model": {
        "alpha_depth": "disabled",
        "custom_bwd": true,
        "dim": 4096,
        "dim_by_layer": "",
        "dropout": 0,
        "efficient_attn": "cutlass",
        "ffn_dim": 512,
        "ffn_dim_multiplier": 1.0,
        "full_logging_n_layers": 4,
        "head_prune": false,
        "init": {
            "coeff_std": null,
            "depth_last": false,
            "fixed_std": null,
            "no_init": false,
            "use_depth": "current",
            "use_gaussian": true
        },
        "init_on_meta_device": false,
        "layer_ckpt": "none",
        "loss_parallel": false,
        "max_length": 4096,
        "multiple_of": 256,
        "n_heads": 32,
        "n_heads_by_layer": "",
        "n_kv_heads": null,
        "n_kv_heads_by_layer": "",
        "n_layers": 32,
        "non_linearity": "swiglu",
        "norm_affine": true,
        "norm_eps": 1e-05,
        "norm_type": "rmsnorm",
        "output_size": -1,
        "parallel_impl": "FAIRSCALE",
        "position_interpolation": 1.0,
        "pre_norm": true,
        "recompute_attn": true,
        "recompute_fc1_out": true,
        "recompute_fc3_out": true,
        "rope_theta": 10000.0,
        "sequence_parallel": false,
        "use_rope": true,
        "use_xpos": false,
        "vocab_size": 32000,
        "xpos_gamma": 0.8,
        "xpos_scale_base": 4096,
        "xpos_theta": 500000.0
    },
    "model_parallel_size": 1,
    "no_final_ckpt": false,
    "optim": {
        "beta1": 0.9,
        "beta2": 0.95,
        "clip": 1.0,
        "cosine_theta": 1.0,
        "cycle_length": 1.0,
        "epsilon": 1e-08,
        "exp_factor": 0.5,
        "fused": null,
        "lr": 2e-06,
        "lr_min_ratio": 0.1,
        "scheduler": "cosine",
        "use_deprecated_optim": false,
        "use_sgd": false,
        "warmup": 100,
        "weight_decay": 0.1
    },
    "peft_args": null,
    "periodic_gpu_check": true,
    "profile_freq": -1,
    "reshard_after_forward": true,
    "restore_dataloader_position": false,
    "rlhf": null,
    "root_dump_dir": "/tmp/nobody/xldumps",
    "secondary_hive_data": null,
    "seq_len": 4096,
    "snapshot_restore_dir": null,
    "steps": 3000,
    "stuck_threshold_sec": 1500,
    "tb_upload_freq": 50,
    "tokenizer": "tokenizer_final_32k.minus_inf_ws.model",
    "tokenizer_dir": "/tmp/metaformers_dmp/tokenizer",
    "torch_seed": -1,
    "unlimited_steps": false,
    "valid": {
        "batch_size": 8,
        "content_key": null,
        "custom_preference_task_table1": "",
        "custom_preference_task_table2": "",
        "debug": false,
        "hive_data": null,
        "hive_tasks": [],
        "hive_tasks_output_hive_data": null,
        "instruct": {
            "is_instruct_tuning": true,
            "no_loss_prompt": true,
            "no_loss_truncated": false,
            "only_sft_last_response": false,
            "smart_coalesce": false,
            "space_around_response": false,
            "wrap_seq_tokens_once": false
        },
        "iso_regression_model_path": "",
        "majority_voting": 0,
        "n_batches": 100,
        "n_generations": 1,
        "ppl_files_str": "",
        "ppl_root_dir": "",
        "prompt_path": "",
        "random_fewshots": false,
        "repetition_penalty": 1.0,
        "rlhf_eval": false,
        "seq_len": 2048,
        "task_batch_size": 8,
        "tasks_root_dir": "/tmp/metaformers_dmp/data/eval",
        "tasks_str": "safetyllama_prompt,safetyllama_response",
        "temperature": 1.0,
        "top_k": 0,
        "top_p": 0.0,
        "use_llm_inference": true,
        "use_relative_loss": true,
        "use_sampling": false,
        "write_eval": true,
        "write_every_n_batches": 1
    }
}