name: llama | |
model: | |
pretrained_model_name_or_path: 'meta-llama/Meta-Llama-3-8B' | |
cache_dir: '/scr-ssd/mzhang/models/llama3' # Set this to where you want to save checkpoint weights | |
return_dict: true | |
load_in_8bit: false | |
load_in_4bit: false | |
device_map: auto | |
low_cpu_mem_usage: true | |
torch_dtype: bfloat16 | |
attn_implementation: flash_attention_2 | |
rope_theta: 500000.0 | |
attention: | |
attention_type: softmax | |