Stewart Whiting
Upload 7 files
04314e4 verified
raw
history blame
726 Bytes
batch_size: 1
checkpoint: true
ckpt_freq: 100
data:
data: /content/data/HansardSequences_250k.big.txt
eval_instruct_data: ''
instruct:
dynamic_chunk_fn_call: true
shuffle: true
instruct_data: ''
shuffle: false
eval_freq: 100
log_freq: 1
lora:
dropout: 0.0
enable: true
rank: 64
scaling: 2.0
max_norm: 1.0
max_steps: 100
mlflow:
experiment_name: null
tracking_uri: null
model_id_or_path: /content/mistral_models/7B-v0.3
no_ckpt: false
no_eval: true
num_ckpt_keep: 3
num_microbatches: 8
optim:
lr: 0.0001
pct_start: 0.05
weight_decay: 0.1
run_dir: /content/debategpt
save_adapters: true
seed: 0
seq_len: 8192
wandb:
key: null
offline: false
project: null
run_name: null
world_size: 1