jpfearnworks commited on
Commit
370d057
1 Parent(s): 809cceb

Add qlora-openllama-3b example

Browse files
examples/qlora-openllama-3b.yml/config.yml ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: openlm-research/open_llama_3b_600bt_preview
2
+ base_model_config: openlm-research/open_llama_3b_600bt_preview
3
+ model_type: LlamaForCausalLM
4
+ tokenizer_type: LlamaTokenizer
5
+ load_in_8bit: false
6
+ load_in_4bit: true
7
+ strict: false
8
+ push_dataset_to_hub:
9
+ datasets:
10
+ - path: teknium/GPT4-LLM-Cleaned
11
+ type: alpaca
12
+ dataset_prepared_path: last_run_prepared
13
+ val_set_size: 0.01
14
+ adapter: qlora
15
+ lora_model_dir:
16
+ sequence_len: 2048
17
+ max_packed_sequence_len: 2048
18
+ lora_r: 8
19
+ lora_alpha: 32
20
+ lora_dropout: 0.05
21
+ lora_target_modules:
22
+ lora_target_linear: true
23
+ lora_fan_in_fan_out:
24
+ wandb_project:
25
+ wandb_watch:
26
+ wandb_run_id:
27
+ wandb_log_model:
28
+ output_dir: ./qlora-out
29
+ batch_size: 1
30
+ micro_batch_size: 4 # Changed from 1 to 4
31
+ num_epochs: 2 # Changed from 4 to 2
32
+ optimizer: paged_adamw_32bit
33
+ torchdistx_path:
34
+ lr_scheduler: cosine
35
+ learning_rate: 0.0002
36
+ train_on_inputs: false
37
+ group_by_length: true # Changed from false to true
38
+ bf16: true
39
+ fp16: false
40
+ tf32: true
41
+ gradient_checkpointing: true
42
+ early_stopping_patience:
43
+ resume_from_checkpoint:
44
+ local_rank:
45
+ logging_steps: 1
46
+ xformers_attention: true
47
+ flash_attention:
48
+ gptq_groupsize:
49
+ gptq_model_v1:
50
+ warmup_steps: 10
51
+ eval_steps: 20
52
+ save_steps:
53
+ debug:
54
+ deepspeed:
55
+ weight_decay: 0.0
56
+ fsdp:
57
+ fsdp_config:
58
+ special_tokens:
59
+ bos_token: "<s>"
60
+ eos_token: "</s>"
61
+ unk_token: "<unk>"