fix config for parity with previous change
Browse fileshttps://github.com/OpenAccess-AI-Collective/axolotl/commit/5159d00a86ef7c358aa819d0bafadd1d5d8304e8\#diff-65b4693504c4e8ffac76c7f2c90913faee381f802cf64e7f49c995a2134ed3b3R164
configs/galactica_1_3B.yml
CHANGED
@@ -34,7 +34,7 @@ tf32: false
|
|
34 |
early_stopping_patience:
|
35 |
resume_from_checkpoint:
|
36 |
local_rank:
|
37 |
-
|
38 |
pad_token: "[PAD]"
|
39 |
bos_token: "<s>"
|
40 |
eos_token: "</s>"
|
|
|
34 |
early_stopping_patience:
|
35 |
resume_from_checkpoint:
|
36 |
local_rank:
|
37 |
+
tokens:
|
38 |
pad_token: "[PAD]"
|
39 |
bos_token: "<s>"
|
40 |
eos_token: "</s>"
|
configs/llama_7B_jeopardy.yml
CHANGED
@@ -51,7 +51,7 @@ deepspeed:
|
|
51 |
weight_decay: 0.0001
|
52 |
fsdp:
|
53 |
fsdp_config:
|
54 |
-
|
55 |
pad_token: "[PAD]"
|
56 |
bos_token: "<s>"
|
57 |
eos_token: "</s>"
|
|
|
51 |
weight_decay: 0.0001
|
52 |
fsdp:
|
53 |
fsdp_config:
|
54 |
+
tokens:
|
55 |
pad_token: "[PAD]"
|
56 |
bos_token: "<s>"
|
57 |
eos_token: "</s>"
|
configs/stability_3b.yml
CHANGED
@@ -49,7 +49,7 @@ deepspeed:
|
|
49 |
weight_decay: 0.01
|
50 |
fsdp:
|
51 |
fsdp_config:
|
52 |
-
#
|
53 |
# pad_token: "[PAD]"
|
54 |
# bos_token: "<s>"
|
55 |
# eos_token: "</s>"
|
|
|
49 |
weight_decay: 0.01
|
50 |
fsdp:
|
51 |
fsdp_config:
|
52 |
+
#tokens:
|
53 |
# pad_token: "[PAD]"
|
54 |
# bos_token: "<s>"
|
55 |
# eos_token: "</s>"
|
examples/4bit-lora-7b/config.yml
CHANGED
@@ -55,7 +55,7 @@ deepspeed:
|
|
55 |
weight_decay: 0.0001
|
56 |
fsdp:
|
57 |
fsdp_config:
|
58 |
-
|
59 |
pad_token: "[PAD]"
|
60 |
bos_token: "<s>"
|
61 |
eos_token: "</s>"
|
|
|
55 |
weight_decay: 0.0001
|
56 |
fsdp:
|
57 |
fsdp_config:
|
58 |
+
tokens:
|
59 |
pad_token: "[PAD]"
|
60 |
bos_token: "<s>"
|
61 |
eos_token: "</s>"
|
examples/mpt-7b/config.yml
CHANGED
@@ -1,7 +1,6 @@
|
|
1 |
base_model: mosaicml/mpt-7b
|
2 |
base_model_config: mosaicml/mpt-7b
|
3 |
-
|
4 |
-
tokenizer_type: GPTNeoXTokenizer
|
5 |
trust_remote_code: true # required for mpt as their model class is not merged into transformers yet
|
6 |
load_in_8bit: false
|
7 |
datasets:
|
@@ -25,7 +24,7 @@ wandb_watch:
|
|
25 |
wandb_run_id:
|
26 |
wandb_log_model: checkpoint
|
27 |
output_dir: ./mpt-alpaca-7b
|
28 |
-
batch_size:
|
29 |
micro_batch_size: 1
|
30 |
num_epochs: 3
|
31 |
optimizer: adamw_bnb_8bit
|
@@ -52,7 +51,7 @@ deepspeed:
|
|
52 |
weight_decay: 0.0001
|
53 |
fsdp:
|
54 |
fsdp_config:
|
55 |
-
|
56 |
pad_token: "<|padding|>"
|
57 |
bos_token: "<|endoftext|>"
|
58 |
eos_token: "<|endoftext|>"
|
|
|
1 |
base_model: mosaicml/mpt-7b
|
2 |
base_model_config: mosaicml/mpt-7b
|
3 |
+
tokenizer_type: AutoTokenizer
|
|
|
4 |
trust_remote_code: true # required for mpt as their model class is not merged into transformers yet
|
5 |
load_in_8bit: false
|
6 |
datasets:
|
|
|
24 |
wandb_run_id:
|
25 |
wandb_log_model: checkpoint
|
26 |
output_dir: ./mpt-alpaca-7b
|
27 |
+
batch_size: 1
|
28 |
micro_batch_size: 1
|
29 |
num_epochs: 3
|
30 |
optimizer: adamw_bnb_8bit
|
|
|
51 |
weight_decay: 0.0001
|
52 |
fsdp:
|
53 |
fsdp_config:
|
54 |
+
tokens:
|
55 |
pad_token: "<|padding|>"
|
56 |
bos_token: "<|endoftext|>"
|
57 |
eos_token: "<|endoftext|>"
|
examples/redpajama/config-3b.yml
CHANGED
@@ -52,7 +52,7 @@ deepspeed:
|
|
52 |
weight_decay: 0.0001
|
53 |
fsdp:
|
54 |
fsdp_config:
|
55 |
-
|
56 |
pad_token: "<|padding|>"
|
57 |
bos_token: "<|endoftext|>"
|
58 |
eos_token: "<|endoftext|>"
|
|
|
52 |
weight_decay: 0.0001
|
53 |
fsdp:
|
54 |
fsdp_config:
|
55 |
+
tokens:
|
56 |
pad_token: "<|padding|>"
|
57 |
bos_token: "<|endoftext|>"
|
58 |
eos_token: "<|endoftext|>"
|