andyjzhao commited on
Commit
aff9928
·
verified ·
1 Parent(s): f0036a4

Upload hydra_cfg.yaml with huggingface_hub

Browse files
Files changed (1) hide show
  1. hydra_cfg.yaml +189 -0
hydra_cfg.yaml ADDED
@@ -0,0 +1,189 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ RC_augmentation: false
2
+ _dataset_cfg_lookup:
3
+ gencode128k_basic:
4
+ hf_path: jzshared/gencode128k_basic
5
+ path: data/gencode128k_basic
6
+ type: refseq
7
+ gencode128k_debug:
8
+ hf_path: jzshared/gencode128k_debug
9
+ path: data/gencode128k_debug
10
+ type: refseq
11
+ gencode_human_12.8k:
12
+ hf_path: jzshared/gencode_human_12.8k
13
+ path: data/gencode_human_12.8k
14
+ type: refseq
15
+ gencode_human_128k:
16
+ hf_path: jzshared/gencode_human_128k
17
+ path: data/gencode_human_128k
18
+ type: refseq
19
+ hg38_128k:
20
+ hf_path: jzshared/hg38_cds_anchored_128000
21
+ path: data/hg38_cds_anchored_128000
22
+ type: refseq
23
+ hg38_12k:
24
+ hf_path: jzshared/hg38_12800
25
+ path: data/hg38_cds_anchored_len12800_mincds150_1000000samples
26
+ type: refseq
27
+ hg38_cds_4m:
28
+ hf_path: null
29
+ path: data/hg38_cds_dataset_4m_filtered
30
+ type: refseq
31
+ alias: CKPT_DEBUG
32
+ alpha_exp: 1.0
33
+ alpha_max: 0.03
34
+ arch: hnet
35
+ batch_size: 32
36
+ bp_per_token: 3
37
+ cluster: mila
38
+ cmd: python src/scripts/train_genezip_v1.py exp=glm/stage1 data=gencode_human_12.8k
39
+ model=hnet/mamba_64m_2dc max_len=12800 batch_size=32 grad_acc_steps=1 max_train_steps=20
40
+ eval_steps=10 save_steps=10 alpha_max=0.03 use_routing_floor=false strictness_max=0
41
+ region_info=promoter1_cds1_utr1_exon1_intron1_nig1_dig1 alias=CKPT_DEBUG bp_per_token=3
42
+ use_wandb=true upload_to_hf=true hf_repo=jzshared/ckpt_debug
43
+ config_path: null
44
+ data: gencode_human_12.8k
45
+ data_alias: ${.data}_${max_len}
46
+ dataset: ${_dataset_cfg_lookup[${data}]}
47
+ device: cuda
48
+ device_type: GPU
49
+ dirs:
50
+ data_cache: ${project_root}/data_cache/
51
+ data_storage: ${project_root}/data/
52
+ hydra: ${project_root}/temp/hydra/
53
+ output: ${project_root}/output/${data_alias}/${alias}/
54
+ temp: ${project_root}/temp/working_dir/${uid}/
55
+ wandb_cache: ${oc.env:WANDB_CACHE_DIR,${project_root}/temp/wandb_cache/}
56
+ epochs: 200
57
+ eval_batch_size: ${batch_size}
58
+ eval_steps: 10
59
+ grad_acc_steps: 1
60
+ hf_repo: jzshared/ckpt_debug
61
+ hf_repo_owner: jzshared
62
+ is_distributed: false
63
+ local_rank: 0
64
+ logging:
65
+ level: info
66
+ log_wandb_metric_to_stdout: true
67
+ lr: 0.001
68
+ master_port: '41105'
69
+ max_data_samples: null
70
+ max_eval_samples: 1000
71
+ max_len: 12800
72
+ max_length: ${max_len}
73
+ max_train_steps: 20
74
+ min_routing_tokens: 8
75
+ mode: Stage1
76
+ model:
77
+ arch: hnet
78
+ name: hnet_mamba_64m_2dc
79
+ model_alias: ${oc.select:model.name,UnknownModel}
80
+ model_cfg:
81
+ arch_layout:
82
+ - m2
83
+ - - m2
84
+ - - m15
85
+ - m2
86
+ - m2
87
+ attn_cfg:
88
+ num_heads:
89
+ - 8
90
+ - 8
91
+ - 12
92
+ rotary_emb_dim:
93
+ - 16
94
+ - 16
95
+ - 24
96
+ window_size:
97
+ - 511
98
+ - 511
99
+ - -1
100
+ d_intermediate:
101
+ - 0
102
+ - 0
103
+ - 2048
104
+ d_model:
105
+ - 512
106
+ - 512
107
+ - 768
108
+ min_routing_tokens: ${min_routing_tokens}
109
+ n_gpt: 1.0
110
+ r_hi: ${r_hi}
111
+ r_low: ${r_low}
112
+ r_warm_up_end: ${r_warm_up_end}
113
+ r_warm_up_start: ${r_warm_up_start}
114
+ ssm_cfg:
115
+ chunk_size: 256
116
+ d_conv: 4
117
+ d_state: 64
118
+ expand: 2
119
+ head_dim: 64
120
+ tie_embeddings: true
121
+ vocab_size: 12
122
+ name: hnet_base
123
+ private: false
124
+ project_root: ${hydra:runtime.cwd}
125
+ r_hi: 0.3
126
+ r_low: 0.0
127
+ r_warm_up_end: 750
128
+ r_warm_up_start: 200
129
+ rank: 0
130
+ reference_loss: null
131
+ region_info: promoter1_cds1_utr1_exon1_intron1_nig1_dig1
132
+ save_steps: 10
133
+ seed: 0
134
+ source: ${dataset.type}
135
+ strictness_exp: 1.0
136
+ strictness_max: 0
137
+ tokenizer: fast
138
+ training:
139
+ adam_beta1: 0.9
140
+ adam_beta2: 0.95
141
+ bf16: true
142
+ dataloader_drop_last: true
143
+ dataloader_num_workers: 1
144
+ disable_tqdm: false
145
+ do_train: true
146
+ eval_steps: ${eval_steps}
147
+ eval_strategy: steps
148
+ gradient_accumulation_steps: ${grad_acc_steps}
149
+ gradient_checkpointing: false
150
+ group_by_length: false
151
+ label_names:
152
+ - input_ids
153
+ learning_rate: ${lr}
154
+ logging_steps: 10
155
+ lr_scheduler_type: linear
156
+ max_grad_norm: 2.0
157
+ max_train_steps: ${max_train_steps}
158
+ num_train_epochs: ${epochs}
159
+ output_dir: ${dirs.output}
160
+ overrides: {}
161
+ per_device_eval_batch_size: ${eval_batch_size}
162
+ per_device_train_batch_size: ${batch_size}
163
+ remove_unused_columns: false
164
+ report_to: null
165
+ save_steps: ${save_steps}
166
+ save_strategy: steps
167
+ use_lr_multiplier: true
168
+ warmup_steps: 500
169
+ weight_decay: 0.1
170
+ training_alias: ${mode}_glm_s1_${region_info}_bp${bp_per_token}_aw${warmup_steps}_amax${alpha_max}_smax${strictness_max}_lr${lr}_e${epochs}_ms${max_train_steps}_maxlen${max_len}
171
+ uid: gnrjc7ou
172
+ upload_to_hf: true
173
+ use_routing_floor: false
174
+ use_wandb: true
175
+ valid_test_downsample: null
176
+ version: NA
177
+ wandb:
178
+ dir: ${dirs.wandb_cache}
179
+ entity: ${oc.select:env.vars.wandb_entity,${oc.env:WANDB_ENTITY,null}}
180
+ id: gnrjc7ou
181
+ mode: online
182
+ name: CKPT_DEBUG
183
+ project: ${oc.select:env.vars.wandb_proj,DNAFM}
184
+ step_metric: null
185
+ tags:
186
+ - ${mode}
187
+ url: https://wandb.ai/jzshared/DNAFM/runs/gnrjc7ou
188
+ warmup_steps: 0
189
+ world_size: 1