winglian commited on
Commit
165da58
1 Parent(s): 4cc7ed8

fix config for parity with previous change

Browse files

https://github.com/OpenAccess-AI-Collective/axolotl/commit/5159d00a86ef7c358aa819d0bafadd1d5d8304e8\#diff-65b4693504c4e8ffac76c7f2c90913faee381f802cf64e7f49c995a2134ed3b3R164

configs/galactica_1_3B.yml CHANGED
@@ -34,7 +34,7 @@ tf32: false
34
  early_stopping_patience:
35
  resume_from_checkpoint:
36
  local_rank:
37
- special_tokens:
38
  pad_token: "[PAD]"
39
  bos_token: "<s>"
40
  eos_token: "</s>"
 
34
  early_stopping_patience:
35
  resume_from_checkpoint:
36
  local_rank:
37
+ tokens:
38
  pad_token: "[PAD]"
39
  bos_token: "<s>"
40
  eos_token: "</s>"
configs/llama_7B_jeopardy.yml CHANGED
@@ -51,7 +51,7 @@ deepspeed:
51
  weight_decay: 0.0001
52
  fsdp:
53
  fsdp_config:
54
- special_tokens:
55
  pad_token: "[PAD]"
56
  bos_token: "<s>"
57
  eos_token: "</s>"
 
51
  weight_decay: 0.0001
52
  fsdp:
53
  fsdp_config:
54
+ tokens:
55
  pad_token: "[PAD]"
56
  bos_token: "<s>"
57
  eos_token: "</s>"
configs/stability_3b.yml CHANGED
@@ -49,7 +49,7 @@ deepspeed:
49
  weight_decay: 0.01
50
  fsdp:
51
  fsdp_config:
52
- #special_tokens:
53
  # pad_token: "[PAD]"
54
  # bos_token: "<s>"
55
  # eos_token: "</s>"
 
49
  weight_decay: 0.01
50
  fsdp:
51
  fsdp_config:
52
+ #tokens:
53
  # pad_token: "[PAD]"
54
  # bos_token: "<s>"
55
  # eos_token: "</s>"
examples/4bit-lora-7b/config.yml CHANGED
@@ -55,7 +55,7 @@ deepspeed:
55
  weight_decay: 0.0001
56
  fsdp:
57
  fsdp_config:
58
- special_tokens:
59
  pad_token: "[PAD]"
60
  bos_token: "<s>"
61
  eos_token: "</s>"
 
55
  weight_decay: 0.0001
56
  fsdp:
57
  fsdp_config:
58
+ tokens:
59
  pad_token: "[PAD]"
60
  bos_token: "<s>"
61
  eos_token: "</s>"
examples/mpt-7b/config.yml CHANGED
@@ -1,7 +1,6 @@
1
  base_model: mosaicml/mpt-7b
2
  base_model_config: mosaicml/mpt-7b
3
- model_type: AutoModelForCausalLM
4
- tokenizer_type: GPTNeoXTokenizer
5
  trust_remote_code: true # required for mpt as their model class is not merged into transformers yet
6
  load_in_8bit: false
7
  datasets:
@@ -25,7 +24,7 @@ wandb_watch:
25
  wandb_run_id:
26
  wandb_log_model: checkpoint
27
  output_dir: ./mpt-alpaca-7b
28
- batch_size: 4
29
  micro_batch_size: 1
30
  num_epochs: 3
31
  optimizer: adamw_bnb_8bit
@@ -52,7 +51,7 @@ deepspeed:
52
  weight_decay: 0.0001
53
  fsdp:
54
  fsdp_config:
55
- special_tokens:
56
  pad_token: "<|padding|>"
57
  bos_token: "<|endoftext|>"
58
  eos_token: "<|endoftext|>"
 
1
  base_model: mosaicml/mpt-7b
2
  base_model_config: mosaicml/mpt-7b
3
+ tokenizer_type: AutoTokenizer
 
4
  trust_remote_code: true # required for mpt as their model class is not merged into transformers yet
5
  load_in_8bit: false
6
  datasets:
 
24
  wandb_run_id:
25
  wandb_log_model: checkpoint
26
  output_dir: ./mpt-alpaca-7b
27
+ batch_size: 1
28
  micro_batch_size: 1
29
  num_epochs: 3
30
  optimizer: adamw_bnb_8bit
 
51
  weight_decay: 0.0001
52
  fsdp:
53
  fsdp_config:
54
+ tokens:
55
  pad_token: "<|padding|>"
56
  bos_token: "<|endoftext|>"
57
  eos_token: "<|endoftext|>"
examples/redpajama/config-3b.yml CHANGED
@@ -52,7 +52,7 @@ deepspeed:
52
  weight_decay: 0.0001
53
  fsdp:
54
  fsdp_config:
55
- special_tokens:
56
  pad_token: "<|padding|>"
57
  bos_token: "<|endoftext|>"
58
  eos_token: "<|endoftext|>"
 
52
  weight_decay: 0.0001
53
  fsdp:
54
  fsdp_config:
55
+ tokens:
56
  pad_token: "<|padding|>"
57
  bos_token: "<|endoftext|>"
58
  eos_token: "<|endoftext|>"