File size: 418 Bytes
ae81e0f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
name: llama
model:
  pretrained_model_name_or_path: 'meta-llama/Meta-Llama-3-8B'
  cache_dir: '/scr-ssd/mzhang/models/llama3'  # Set this to where you want to save checkpoint weights
  return_dict: true
  load_in_8bit: false
  load_in_4bit: false
  device_map: auto
  low_cpu_mem_usage: true
  torch_dtype: bfloat16
  attn_implementation: flash_attention_2
  rope_theta: 500000.0

attention:
  attention_type: softmax