winglian commited on
Commit
bd55e5c
1 Parent(s): e5dbd4b

model after one epoch of alpaca-gpt4, *bfloat16

Browse files
README.md ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ datasets:
4
+ - vicgalle/alpaca-gpt4
5
+ language:
6
+ - en
7
+ library_name: transformers
8
+ pipeline_tag: conversational
9
+ ---
10
+
11
+ # Freedom-AI-Collective/llama-13b-alpaca-wizard-vicuna
12
+
13
+ ## Trained
14
+
15
+ - `vicgalle/alpaca-gpt4` 1 epoch, learning rate 3e-5 https://wandb.ai/wing-lian/wizard-vicuna-gpt4/overview
16
+ - `deepspeed scripts/finetune.py configs/axolotl/wizard-vicuna-13b-step1.yml --deepspeed configs/ds_config.json --num_epochs 2 --warmup_steps 46 --logging_steps 1 --save_steps 23`
17
+ - `wizardlm` TBD
18
+ - `vicuna` TBD
19
+
20
+ <pre>Brought to you by the Freedom AI Collective</pre>
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "/home/sgugger/tmp/llama/llama-13b/",
3
  "architectures": [
4
  "LlamaForCausalLM"
5
  ],
@@ -17,8 +17,8 @@
17
  "pad_token_id": 0,
18
  "rms_norm_eps": 1e-06,
19
  "tie_word_embeddings": false,
20
- "torch_dtype": "float16",
21
- "transformers_version": "4.28.0.dev0",
22
- "use_cache": true,
23
  "vocab_size": 32000
24
  }
 
1
  {
2
+ "_name_or_path": "huggyllama/llama-13b",
3
  "architectures": [
4
  "LlamaForCausalLM"
5
  ],
 
17
  "pad_token_id": 0,
18
  "rms_norm_eps": 1e-06,
19
  "tie_word_embeddings": false,
20
+ "torch_dtype": "bfloat16",
21
+ "transformers_version": "4.29.0.dev0",
22
+ "use_cache": false,
23
  "vocab_size": 32000
24
  }
configs/axolotl/wizard-vicuna-13b-step1.yml ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: huggyllama/llama-13b
2
+ base_model_config: huggyllama/llama-13b
3
+ model_type: LlamaForCausalLM
4
+ tokenizer_type: LlamaTokenizer
5
+ load_in_8bit: false
6
+ datasets:
7
+ - path: vicgalle/alpaca-gpt4
8
+ type: alpaca
9
+ # - path: anon8231489123/ShareGPT_Vicuna_unfiltered
10
+ # data_files: ShareGPT_V3_unfiltered_cleaned_split_no_imsorry.json
11
+ # type: sharegpt
12
+ # - path: ehartford/WizardLM_alpaca_evol_instruct_70k_unfiltered
13
+ # type: alpaca
14
+ dataset_prepared_path: data/last_run_prepared
15
+ val_set_size: 0.04
16
+ adapter:
17
+ lora_model_dir:
18
+ sequence_len: 2048
19
+ max_packed_sequence_len: 2048
20
+ lora_r: 8
21
+ lora_alpha: 16
22
+ lora_dropout: 0.05
23
+ lora_target_modules:
24
+ - q_proj
25
+ - v_proj
26
+ # - k_proj
27
+ # - o_proj
28
+ lora_fan_in_fan_out: false
29
+ wandb_project:
30
+ wandb_watch:
31
+ wandb_run_id:
32
+ wandb_log_model: checkpoint
33
+ output_dir: ./wizard-lm-out
34
+ batch_size: 128
35
+ micro_batch_size: 1
36
+ num_epochs: 2
37
+ warmup_steps: 46
38
+ logging_steps:
39
+ learning_rate: 0.00003
40
+ optimizer: adamw_torch
41
+ torchdistx_path:
42
+ lr_scheduler: one_cycle
43
+ log_sweep_min_lr: 2e-6
44
+ log_sweep_max_lr: 1e-4
45
+ train_on_inputs: false
46
+ group_by_length: false
47
+ bf16: true
48
+ tf32: true
49
+ gradient_checkpointing:
50
+ early_stopping_patience:
51
+ resume_from_checkpoint:
52
+ auto_resume_from_checkpoints:
53
+ local_rank:
54
+ load_4bit:
55
+ xformers_attention:
56
+ flash_attention: true
57
+ gptq_groupsize:
58
+ gptq_model_v1:
59
+ save_steps:
60
+ debug:
61
+ deepspeed:
62
+ weight_decay: 0.0
63
+ fsdp:
64
+ fsdp_config:
65
+ fsdp_transformer_layer_cls_to_wrap:
66
+ fsdp_min_num_params: 2000
67
+ fsdp_backward_prefetch:
68
+ - backward_pre
69
+ limit_all_gathers: false
70
+ special_tokens:
71
+ pad_token: "[PAD]"
72
+ bos_token: "<s>"
73
+ eos_token: "</s>"
74
+ unk_token: "<unk>"
75
+
configs/ds_config.json ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "zero_optimization": {
3
+ "stage": 3,
4
+ "offload_optimizer": {
5
+ "device": "cpu",
6
+ "pin_memory": true
7
+ },
8
+ "offload_param": {
9
+ "device": "cpu",
10
+ "pin_memory": true
11
+ },
12
+ "overlap_comm": true,
13
+ "contiguous_gradients": true,
14
+ "sub_group_size": 0,
15
+ "reduce_bucket_size": "auto",
16
+ "stage3_prefetch_bucket_size": "auto",
17
+ "stage3_param_persistence_threshold": "auto",
18
+ "stage3_max_live_parameters": 0,
19
+ "stage3_max_reuse_distance": 0,
20
+ "stage3_gather_16bit_weights_on_model_save": true
21
+ },
22
+ "bf16": {
23
+ "enabled": "auto"
24
+ },
25
+ "fp16": {
26
+ "enabled": "auto",
27
+ "auto_cast": false,
28
+ "loss_scale": 0,
29
+ "initial_scale_power": 32,
30
+ "loss_scale_window": 1000,
31
+ "hysteresis": 2,
32
+ "min_loss_scale": 1
33
+ },
34
+ "optimizer": {
35
+ "type": "AdamW",
36
+ "params": {
37
+ "lr": "auto",
38
+ "betas": [
39
+ 0.9,
40
+ 0.999
41
+ ],
42
+ "eps": 1e-8,
43
+ "weight_decay": 0
44
+ }
45
+ },
46
+ "scheduler": {
47
+ "type": "OneCycle",
48
+ "params": {
49
+ "cycle_min_lr": 0.000003,
50
+ "cycle_max_lr": 0.00003,
51
+ "cycle_first_step_size": 46
52
+ }
53
+ },
54
+ "train_batch_size": "auto",
55
+ "train_micro_batch_size_per_gpu": "auto",
56
+ "wall_clock_breakdown": false
57
+ }
58
+
generation_config.json CHANGED
@@ -3,5 +3,5 @@
3
  "bos_token_id": 1,
4
  "eos_token_id": 2,
5
  "pad_token_id": 0,
6
- "transformers_version": "4.28.0.dev0"
7
  }
 
3
  "bos_token_id": 1,
4
  "eos_token_id": 2,
5
  "pad_token_id": 0,
6
+ "transformers_version": "4.29.0.dev0"
7
  }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:889db4f7bfe042df8a8a31be88256992bfb30eece88dca60fadaa83810bf7b13
3
+ size 26031868013