ul3-base / .hydra /config.yaml
amazingvince's picture
Upload folder using huggingface_hub
ca671b7 verified
mode: pt
device: gpu
precision: bf16
eval_only: false
predict_only: false
seed: 93789
tokenizer:
name: BEE-spoke-data/hf_slimpajama-6B-28672-BPE-forT5
working_dir: null
model:
liger: true
klass: local_t5
name: pszemraj/tFINE-850m-24x24-1024ctx
overwrite:
dropout_rate: 0.0
num_decoder_layers: 16
num_key_value_heads: 4
num_layers: 16
use_gqa: true
add_config:
is_bf16: false
checkpoint_path: ''
random_init: true
compile: true
data:
multi_task: true
NTP: 0.3
input_length: 512
max_seq_len: 512
mlm_probability: 0.15
mean_noise_span_length: 3.0
num_workers: 0
optim:
name: adamwscale
base_lr: 0.001
batch_size: 128
total_steps: 65536
epochs: -1
warmup_steps: 5000
lr_scheduler: cosine
weight_decay: 0.01
grad_clip: 1.0
grad_acc: 16
final_cosine: 2.0e-05
eval:
every_steps: 500
steps: 0
checkpoint:
every_steps: 1500
logging:
every_steps: 25
grad_l2: true
weights_l2: true
use_wandb: true
wandb_config:
project: nanoT5
entity: amazingvince
tags:
- gqa
- large
- e32-d16
- 512 ctx
mode: online