model: | |
d_model: 768 | |
n_layer: 6 | |
d_inner: ${eval:4 * ${.d_model}} | |
vocab_size: ${tokenizer.vocab_size} | |
resid_dropout: 0.0 | |
embed_dropout: 0.1 | |
residual_in_fp32: true | |
pad_vocab_size_multiple: 8 | |
mamba_ver: mamba2 | |
layer: | |
d_model: ${model.d_model} | |
d_state: 64 | |
d_conv: 4 | |
expand: 2 | |
headdim: 64 | |
n_classes: null | |
dataset: | |
__train_len: ${div_up:1_000_000_000, ${.max_len}} | |
__l_max: ${.max_len} | |
randomize_offset: true | |
input_path: ./data/ | |
max_len: 660 | |
use_padding: true | |
add_eos: false | |
rc_aug: true | |
phase: pretrain | |
classify_level: null | |
num_workers: 0 | |
batch_size: 16 | |
pretrain_method: ntp | |
mask_ratio: 0.5 | |
tokenizer: | |
use_unk_token: true | |
name: char | |
characters: | |
- A | |
- C | |
- G | |
- T | |
- 'N' | |
model_max_length: ${dataset.max_len} + 2 | |
add_special_tokens: false | |
padding_side: left | |
vocab_size: 8 | |
trainer: | |
accelerator: gpu | |
devices: -1 | |
num_nodes: 1 | |
max_epochs: 50 | |
gradient_clip_val: 1.0 | |
fast_dev_run: false | |
strategy: ddp | |
train: | |
logger: wandb | |
run_name: null | |
gpu_mem: ${eval:"round(float(__import__('subprocess').check_output('nvidia-smi -i | |
0 --query-gpu=memory.total --format=csv,noheader,nounits', shell=True).strip().decode()) | |
/ 1000)"} | |
seed: 2222 | |
global_batch_size: 256 | |
ckpt: null | |
ema: 0.0 | |
test: true | |
interval: step | |
monitor: val/loss_epoch | |
mode: min | |
validate_at_start: false | |
pretrained_model_strict_load: false | |
pretrained_model_path: null | |
scheduler: | |
t_in_epochs: false | |
t_initial: ${eval:${div_up:${dataset.__train_len}, ${train.global_batch_size}} * | |
${trainer.max_epochs}} | |
warmup_lr_init: 1.0e-06 | |
warmup_t: ${eval:${div_up:${dataset.__train_len}, ${train.global_batch_size}} * | |
${trainer.max_epochs} * 0.01} | |
lr_min: ${eval:0.1 * ${optimizer.lr}} | |
optimizer: | |
lr: 0.0008 | |
weight_decay: 0.1 | |
betas: | |
- 0.9 | |
- 0.999 | |
model_checkpoint: | |
monitor: ${train.monitor} | |
mode: ${train.mode} | |
save_top_k: 1 | |
save_last: true | |
dirpath: checkpoints/ | |
filename: barcode-mamba-${dataset.phase}-{epoch:02d} | |
save_on_train_epoch_end: true | |
auto_insert_metric_name: true | |
verbose: true | |
debug: false | |