winglian commited on
Commit
7358bc7
1 Parent(s): 809dc03

merged full weights after 1 ep training

Browse files
configs/experiment.yml ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: wcde/llama-7b-4bit-gr128
2
+ base_model_config: wcde/llama-7b-4bit-gr128
3
+ model_type: LlamaForCausalLM
4
+ tokenizer_type: LlamaTokenizer
5
+ load_in_8bit: true
6
+ load_4bit: true
7
+ datasets:
8
+ - path: teknium/GPT4-LLM-Cleaned
9
+ type: alpaca
10
+ dataset_prepared_path: last_run_prepared
11
+ val_set_size: 0.02
12
+ adapter: lora
13
+ lora_model_dir:
14
+ sequence_len: 512
15
+ max_packed_sequence_len:
16
+ lora_r: 16
17
+ lora_alpha: 16
18
+ lora_dropout: 0.05
19
+ lora_target_modules:
20
+ - gate_proj
21
+ - down_proj
22
+ - up_proj
23
+ - q_proj
24
+ - v_proj
25
+ - k_proj
26
+ - o_proj
27
+ lora_fan_in_fan_out:
28
+ wandb_project: lora-experiment
29
+ wandb_watch:
30
+ wandb_run_id:
31
+ wandb_log_model:
32
+ output_dir: ./lora-experiment
33
+ batch_size: 128
34
+ micro_batch_size: 8
35
+ num_epochs: 4
36
+ optimizer: adamw_torch
37
+ torchdistx_path:
38
+ lr_scheduler: cosine
39
+ learning_rate: 0.00001
40
+ train_on_inputs: false
41
+ group_by_length: true
42
+ bf16: true
43
+ tf32: true
44
+ gradient_checkpointing:
45
+ early_stopping_patience:
46
+ resume_from_checkpoint:
47
+ local_rank:
48
+ logging_steps: 1
49
+ xformers_attention: true
50
+ flash_attention:
51
+ gptq_groupsize: 128
52
+ gptq_model_v1: false
53
+ warmup_steps: 10
54
+ eval_steps: 10
55
+ save_steps:
56
+ debug:
57
+ deepspeed:
58
+ weight_decay: 0
59
+ fsdp:
60
+ fsdp_config:
61
+ special_tokens:
62
+ bos_token: "<s>"
63
+ eos_token: "</s>"
64
+ unk_token: "<unk>"
65
+
configs/experiment2.yml ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: ./lora-experiment/merged
2
+ base_model_config: huggyllama/llama-7b
3
+ model_type: LlamaForCausalLM
4
+ tokenizer_type: LlamaTokenizer
5
+ load_in_8bit: false
6
+ load_4bit: false
7
+ datasets:
8
+ - path: teknium/GPT4-LLM-Cleaned
9
+ type: alpaca
10
+ dataset_prepared_path: last_run_prepared
11
+ val_set_size: 0.02
12
+ adapter:
13
+ lora_model_dir:
14
+ sequence_len: 2048
15
+ max_packed_sequence_len:
16
+ lora_r: 16
17
+ lora_alpha: 16
18
+ lora_dropout: 0.05
19
+ lora_target_modules:
20
+ - gate_proj
21
+ - down_proj
22
+ - up_proj
23
+ - q_proj
24
+ - v_proj
25
+ - k_proj
26
+ - o_proj
27
+ lora_fan_in_fan_out:
28
+ wandb_project: lora-experiment
29
+ wandb_watch:
30
+ wandb_run_id:
31
+ wandb_log_model:
32
+ output_dir: ./lora-experiment
33
+ batch_size: 8
34
+ micro_batch_size: 4
35
+ num_epochs: 4
36
+ optimizer: adamw_torch
37
+ torchdistx_path:
38
+ lr_scheduler: cosine
39
+ learning_rate: 0.00003
40
+ train_on_inputs: false
41
+ group_by_length: true
42
+ bf16: true
43
+ tf32: true
44
+ gradient_checkpointing:
45
+ early_stopping_patience:
46
+ resume_from_checkpoint:
47
+ local_rank:
48
+ logging_steps: 1
49
+ xformers_attention: true
50
+ flash_attention:
51
+ gptq_groupsize:
52
+ gptq_model_v1:
53
+ warmup_steps: 50
54
+ eval_steps: 134
55
+ save_steps:
56
+ debug:
57
+ deepspeed:
58
+ weight_decay: 0
59
+ fsdp:
60
+ - full_shard
61
+ - auto_wrap
62
+ fsdp_config:
63
+ fsdp_transformer_layer_cls_to_wrap: LlamaDecoderLayer
64
+ special_tokens:
65
+ bos_token: "<s>"
66
+ eos_token: "</s>"
67
+ unk_token: "<unk>"
68
+
pytorch_model-00001-of-00002.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:81b5054f1e8cd5d83fdb298b665041e19323261b00588855a2f0059db10d6517
3
- size 9976637950
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da8de3751aa79df207365b6546fbbc8ab8aeb8a5bbe411624953a0bec10a2810
3
+ size 9976642558
pytorch_model-00002-of-00002.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bc45dda38a2cdfa6f63b897a7250326951cccf192c7fc77af616667155b4d190
3
- size 3500316627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:08bfb0bb717720500f2947009dcfe8f152a5150c9834bd1b858d83e7ff8cfbec
3
+ size 3500318291