winglian commited on
Commit
4fd360c
1 Parent(s): 70c47eb

add experiment3.yml

Browse files
Files changed (1) hide show
  1. experiment3.yml +68 -0
experiment3.yml ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: huggyllama/llama-7b
2
+ base_model_config: huggyllama/llama-7b
3
+ model_type: LlamaForCausalLM
4
+ tokenizer_type: LlamaTokenizer
5
+ load_in_8bit: false
6
+ load_4bit: false
7
+ datasets:
8
+ - path: teknium/GPT4-LLM-Cleaned
9
+ type: alpaca
10
+ dataset_prepared_path: last_run_prepared
11
+ val_set_size: 0.02
12
+ adapter:
13
+ lora_model_dir:
14
+ sequence_len: 2048
15
+ max_packed_sequence_len:
16
+ lora_r: 16
17
+ lora_alpha: 16
18
+ lora_dropout: 0.05
19
+ lora_target_modules:
20
+ - gate_proj
21
+ - down_proj
22
+ - up_proj
23
+ - q_proj
24
+ - v_proj
25
+ - k_proj
26
+ - o_proj
27
+ lora_fan_in_fan_out:
28
+ wandb_project: lora-experiment
29
+ wandb_watch:
30
+ wandb_run_id:
31
+ wandb_log_model:
32
+ output_dir: ./lora-experiment
33
+ batch_size: 8
34
+ micro_batch_size: 4
35
+ num_epochs: 4
36
+ optimizer: adamw_torch
37
+ torchdistx_path:
38
+ lr_scheduler: cosine
39
+ learning_rate: 0.00003
40
+ train_on_inputs: false
41
+ group_by_length: true
42
+ bf16: true
43
+ tf32: true
44
+ gradient_checkpointing:
45
+ early_stopping_patience:
46
+ resume_from_checkpoint:
47
+ local_rank:
48
+ logging_steps: 1
49
+ xformers_attention: true
50
+ flash_attention:
51
+ gptq_groupsize:
52
+ gptq_model_v1:
53
+ warmup_steps: 50
54
+ eval_steps: 134
55
+ save_steps:
56
+ debug:
57
+ deepspeed:
58
+ weight_decay: 0
59
+ fsdp:
60
+ - full_shard
61
+ - auto_wrap
62
+ fsdp_config:
63
+ fsdp_transformer_layer_cls_to_wrap: LlamaDecoderLayer
64
+ special_tokens:
65
+ bos_token: "<s>"
66
+ eos_token: "</s>"
67
+ unk_token: "<unk>"
68
+