simplify by removing duplicate base_model_config (#772)
Browse files- examples/cerebras/btlm-ft.yml +0 -1
- examples/cerebras/qlora.yml +0 -1
- examples/code-llama/13b/lora.yml +0 -1
- examples/code-llama/13b/qlora.yml +0 -1
- examples/code-llama/34b/lora.yml +0 -1
- examples/code-llama/34b/qlora.yml +0 -1
- examples/code-llama/7b/lora.yml +0 -1
- examples/code-llama/7b/qlora.yml +0 -1
- examples/falcon/config-7b-lora.yml +0 -1
- examples/falcon/config-7b-qlora.yml +0 -1
- examples/falcon/config-7b.yml +0 -1
- examples/gptj/qlora.yml +0 -1
- examples/jeopardy-bot/config.yml +0 -1
- examples/llama-2/fft_optimized.yml +0 -1
- examples/llama-2/gptq-lora.yml +0 -1
- examples/llama-2/lora.yml +0 -1
- examples/llama-2/qlora.yml +0 -1
- examples/llama-2/relora.yml +0 -1
- examples/llama-2/tiny-llama.yml +0 -1
- examples/mistral/config.yml +0 -1
- examples/mistral/qlora.yml +0 -1
- examples/mpt-7b/config.yml +0 -1
- examples/openllama-3b/config.yml +0 -1
- examples/openllama-3b/lora.yml +0 -1
- examples/openllama-3b/qlora.yml +0 -1
- examples/phi/phi-ft.yml +0 -1
- examples/phi/phi-qlora.yml +0 -1
- examples/pythia-12b/config.yml +0 -1
- examples/pythia/lora.yml +0 -1
- examples/redpajama/config-3b.yml +0 -1
- examples/replit-3b/config-lora.yml +0 -1
- examples/xgen-7b/xgen-7b-8k-qlora.yml +0 -1
- src/axolotl/utils/config.py +3 -0
- tests/e2e/test_fused_llama.py +0 -1
- tests/e2e/test_lora_llama.py +0 -3
- tests/e2e/test_mistral.py +0 -2
- tests/e2e/test_mistral_samplepack.py +0 -2
- tests/e2e/test_phi.py +0 -2
- tests/test_normalize_config.py +7 -0
examples/cerebras/btlm-ft.yml
CHANGED
@@ -1,5 +1,4 @@
|
|
1 |
base_model: cerebras/btlm-3b-8k-base
|
2 |
-
base_model_config: cerebras/btlm-3b-8k-base
|
3 |
model_type: AutoModelForCausalLM
|
4 |
tokenizer_type: GPT2Tokenizer
|
5 |
trust_remote_code: true
|
|
|
1 |
base_model: cerebras/btlm-3b-8k-base
|
|
|
2 |
model_type: AutoModelForCausalLM
|
3 |
tokenizer_type: GPT2Tokenizer
|
4 |
trust_remote_code: true
|
examples/cerebras/qlora.yml
CHANGED
@@ -1,5 +1,4 @@
|
|
1 |
base_model: cerebras/Cerebras-GPT-1.3B
|
2 |
-
base_model_config: cerebras/Cerebras-GPT-1.3B
|
3 |
load_in_8bit: false
|
4 |
load_in_4bit: true
|
5 |
strict: false
|
|
|
1 |
base_model: cerebras/Cerebras-GPT-1.3B
|
|
|
2 |
load_in_8bit: false
|
3 |
load_in_4bit: true
|
4 |
strict: false
|
examples/code-llama/13b/lora.yml
CHANGED
@@ -1,5 +1,4 @@
|
|
1 |
base_model: codellama/CodeLlama-13b-hf
|
2 |
-
base_model_config: codellama/CodeLlama-13b-hf
|
3 |
model_type: LlamaForCausalLM
|
4 |
tokenizer_type: CodeLlamaTokenizer
|
5 |
is_llama_derived_model: true
|
|
|
1 |
base_model: codellama/CodeLlama-13b-hf
|
|
|
2 |
model_type: LlamaForCausalLM
|
3 |
tokenizer_type: CodeLlamaTokenizer
|
4 |
is_llama_derived_model: true
|
examples/code-llama/13b/qlora.yml
CHANGED
@@ -1,5 +1,4 @@
|
|
1 |
base_model: codellama/CodeLlama-13b-hf
|
2 |
-
base_model_config: codellama/CodeLlama-13b-hf
|
3 |
model_type: LlamaForCausalLM
|
4 |
tokenizer_type: CodeLlamaTokenizer
|
5 |
is_llama_derived_model: true
|
|
|
1 |
base_model: codellama/CodeLlama-13b-hf
|
|
|
2 |
model_type: LlamaForCausalLM
|
3 |
tokenizer_type: CodeLlamaTokenizer
|
4 |
is_llama_derived_model: true
|
examples/code-llama/34b/lora.yml
CHANGED
@@ -1,5 +1,4 @@
|
|
1 |
base_model: codellama/CodeLlama-34b-hf
|
2 |
-
base_model_config: codellama/CodeLlama-34b-hf
|
3 |
model_type: LlamaForCausalLM
|
4 |
tokenizer_type: CodeLlamaTokenizer
|
5 |
is_llama_derived_model: true
|
|
|
1 |
base_model: codellama/CodeLlama-34b-hf
|
|
|
2 |
model_type: LlamaForCausalLM
|
3 |
tokenizer_type: CodeLlamaTokenizer
|
4 |
is_llama_derived_model: true
|
examples/code-llama/34b/qlora.yml
CHANGED
@@ -1,5 +1,4 @@
|
|
1 |
base_model: codellama/CodeLlama-34b-hf
|
2 |
-
base_model_config: codellama/CodeLlama-34b-hf
|
3 |
model_type: LlamaForCausalLM
|
4 |
tokenizer_type: CodeLlamaTokenizer
|
5 |
is_llama_derived_model: true
|
|
|
1 |
base_model: codellama/CodeLlama-34b-hf
|
|
|
2 |
model_type: LlamaForCausalLM
|
3 |
tokenizer_type: CodeLlamaTokenizer
|
4 |
is_llama_derived_model: true
|
examples/code-llama/7b/lora.yml
CHANGED
@@ -1,5 +1,4 @@
|
|
1 |
base_model: codellama/CodeLlama-7b-hf
|
2 |
-
base_model_config: codellama/CodeLlama-7b-hf
|
3 |
model_type: LlamaForCausalLM
|
4 |
tokenizer_type: CodeLlamaTokenizer
|
5 |
is_llama_derived_model: true
|
|
|
1 |
base_model: codellama/CodeLlama-7b-hf
|
|
|
2 |
model_type: LlamaForCausalLM
|
3 |
tokenizer_type: CodeLlamaTokenizer
|
4 |
is_llama_derived_model: true
|
examples/code-llama/7b/qlora.yml
CHANGED
@@ -1,5 +1,4 @@
|
|
1 |
base_model: codellama/CodeLlama-7b-hf
|
2 |
-
base_model_config: codellama/CodeLlama-7b-hf
|
3 |
model_type: LlamaForCausalLM
|
4 |
tokenizer_type: CodeLlamaTokenizer
|
5 |
is_llama_derived_model: true
|
|
|
1 |
base_model: codellama/CodeLlama-7b-hf
|
|
|
2 |
model_type: LlamaForCausalLM
|
3 |
tokenizer_type: CodeLlamaTokenizer
|
4 |
is_llama_derived_model: true
|
examples/falcon/config-7b-lora.yml
CHANGED
@@ -1,5 +1,4 @@
|
|
1 |
base_model: tiiuae/falcon-7b
|
2 |
-
base_model_config: tiiuae/falcon-7b
|
3 |
trust_remote_code: true
|
4 |
model_type: AutoModelForCausalLM
|
5 |
tokenizer_type: AutoTokenizer
|
|
|
1 |
base_model: tiiuae/falcon-7b
|
|
|
2 |
trust_remote_code: true
|
3 |
model_type: AutoModelForCausalLM
|
4 |
tokenizer_type: AutoTokenizer
|
examples/falcon/config-7b-qlora.yml
CHANGED
@@ -1,7 +1,6 @@
|
|
1 |
# 1b: tiiuae/falcon-rw-1b
|
2 |
# 40b: tiiuae/falcon-40b
|
3 |
base_model: tiiuae/falcon-7b
|
4 |
-
base_model_config: tiiuae/falcon-7b
|
5 |
# required by falcon custom model code: https://huggingface.co/tiiuae/falcon-7b/tree/main
|
6 |
trust_remote_code: true
|
7 |
model_type: AutoModelForCausalLM
|
|
|
1 |
# 1b: tiiuae/falcon-rw-1b
|
2 |
# 40b: tiiuae/falcon-40b
|
3 |
base_model: tiiuae/falcon-7b
|
|
|
4 |
# required by falcon custom model code: https://huggingface.co/tiiuae/falcon-7b/tree/main
|
5 |
trust_remote_code: true
|
6 |
model_type: AutoModelForCausalLM
|
examples/falcon/config-7b.yml
CHANGED
@@ -1,5 +1,4 @@
|
|
1 |
base_model: tiiuae/falcon-7b
|
2 |
-
base_model_config: tiiuae/falcon-7b
|
3 |
trust_remote_code: true
|
4 |
model_type: AutoModelForCausalLM
|
5 |
tokenizer_type: AutoTokenizer
|
|
|
1 |
base_model: tiiuae/falcon-7b
|
|
|
2 |
trust_remote_code: true
|
3 |
model_type: AutoModelForCausalLM
|
4 |
tokenizer_type: AutoTokenizer
|
examples/gptj/qlora.yml
CHANGED
@@ -1,5 +1,4 @@
|
|
1 |
base_model: EleutherAI/gpt-j-6b
|
2 |
-
base_model_config: EleutherAI/gpt-j-6b
|
3 |
load_in_8bit: false
|
4 |
load_in_4bit: true
|
5 |
strict: false
|
|
|
1 |
base_model: EleutherAI/gpt-j-6b
|
|
|
2 |
load_in_8bit: false
|
3 |
load_in_4bit: true
|
4 |
strict: false
|
examples/jeopardy-bot/config.yml
CHANGED
@@ -1,5 +1,4 @@
|
|
1 |
base_model: huggyllama/llama-7b
|
2 |
-
base_model_config: huggyllama/llama-7b
|
3 |
model_type: LlamaForCausalLM
|
4 |
tokenizer_type: LlamaTokenizer
|
5 |
load_in_8bit: false
|
|
|
1 |
base_model: huggyllama/llama-7b
|
|
|
2 |
model_type: LlamaForCausalLM
|
3 |
tokenizer_type: LlamaTokenizer
|
4 |
load_in_8bit: false
|
examples/llama-2/fft_optimized.yml
CHANGED
@@ -1,5 +1,4 @@
|
|
1 |
base_model: NousResearch/Llama-2-7b-hf
|
2 |
-
base_model_config: NousResearch/Llama-2-7b-hf
|
3 |
model_type: LlamaForCausalLM
|
4 |
tokenizer_type: LlamaTokenizer
|
5 |
is_llama_derived_model: true
|
|
|
1 |
base_model: NousResearch/Llama-2-7b-hf
|
|
|
2 |
model_type: LlamaForCausalLM
|
3 |
tokenizer_type: LlamaTokenizer
|
4 |
is_llama_derived_model: true
|
examples/llama-2/gptq-lora.yml
CHANGED
@@ -1,5 +1,4 @@
|
|
1 |
base_model: TheBloke/Llama-2-7B-GPTQ
|
2 |
-
base_model_config: TheBloke/Llama-2-7B-GPTQ
|
3 |
is_llama_derived_model: false
|
4 |
gptq: true
|
5 |
gptq_disable_exllama: true
|
|
|
1 |
base_model: TheBloke/Llama-2-7B-GPTQ
|
|
|
2 |
is_llama_derived_model: false
|
3 |
gptq: true
|
4 |
gptq_disable_exllama: true
|
examples/llama-2/lora.yml
CHANGED
@@ -1,5 +1,4 @@
|
|
1 |
base_model: NousResearch/Llama-2-7b-hf
|
2 |
-
base_model_config: NousResearch/Llama-2-7b-hf
|
3 |
model_type: LlamaForCausalLM
|
4 |
tokenizer_type: LlamaTokenizer
|
5 |
is_llama_derived_model: true
|
|
|
1 |
base_model: NousResearch/Llama-2-7b-hf
|
|
|
2 |
model_type: LlamaForCausalLM
|
3 |
tokenizer_type: LlamaTokenizer
|
4 |
is_llama_derived_model: true
|
examples/llama-2/qlora.yml
CHANGED
@@ -1,5 +1,4 @@
|
|
1 |
base_model: NousResearch/Llama-2-7b-hf
|
2 |
-
base_model_config: NousResearch/Llama-2-7b-hf
|
3 |
model_type: LlamaForCausalLM
|
4 |
tokenizer_type: LlamaTokenizer
|
5 |
is_llama_derived_model: true
|
|
|
1 |
base_model: NousResearch/Llama-2-7b-hf
|
|
|
2 |
model_type: LlamaForCausalLM
|
3 |
tokenizer_type: LlamaTokenizer
|
4 |
is_llama_derived_model: true
|
examples/llama-2/relora.yml
CHANGED
@@ -1,5 +1,4 @@
|
|
1 |
base_model: NousResearch/Llama-2-7b-hf
|
2 |
-
base_model_config: NousResearch/Llama-2-7b-hf
|
3 |
model_type: LlamaForCausalLM
|
4 |
tokenizer_type: LlamaTokenizer
|
5 |
is_llama_derived_model: true
|
|
|
1 |
base_model: NousResearch/Llama-2-7b-hf
|
|
|
2 |
model_type: LlamaForCausalLM
|
3 |
tokenizer_type: LlamaTokenizer
|
4 |
is_llama_derived_model: true
|
examples/llama-2/tiny-llama.yml
CHANGED
@@ -1,5 +1,4 @@
|
|
1 |
base_model: PY007/TinyLlama-1.1B-step-50K-105b
|
2 |
-
base_model_config: PY007/TinyLlama-1.1B-step-50K-105b
|
3 |
|
4 |
model_type: LlamaForCausalLM
|
5 |
tokenizer_type: LlamaTokenizer
|
|
|
1 |
base_model: PY007/TinyLlama-1.1B-step-50K-105b
|
|
|
2 |
|
3 |
model_type: LlamaForCausalLM
|
4 |
tokenizer_type: LlamaTokenizer
|
examples/mistral/config.yml
CHANGED
@@ -1,5 +1,4 @@
|
|
1 |
base_model: mistralai/Mistral-7B-v0.1
|
2 |
-
base_model_config: mistralai/Mistral-7B-v0.1
|
3 |
model_type: MistralForCausalLM
|
4 |
tokenizer_type: LlamaTokenizer
|
5 |
is_mistral_derived_model: true
|
|
|
1 |
base_model: mistralai/Mistral-7B-v0.1
|
|
|
2 |
model_type: MistralForCausalLM
|
3 |
tokenizer_type: LlamaTokenizer
|
4 |
is_mistral_derived_model: true
|
examples/mistral/qlora.yml
CHANGED
@@ -1,5 +1,4 @@
|
|
1 |
base_model: mistralai/Mistral-7B-v0.1
|
2 |
-
base_model_config: mistralai/Mistral-7B-v0.1
|
3 |
model_type: MistralForCausalLM
|
4 |
tokenizer_type: LlamaTokenizer
|
5 |
is_mistral_derived_model: true
|
|
|
1 |
base_model: mistralai/Mistral-7B-v0.1
|
|
|
2 |
model_type: MistralForCausalLM
|
3 |
tokenizer_type: LlamaTokenizer
|
4 |
is_mistral_derived_model: true
|
examples/mpt-7b/config.yml
CHANGED
@@ -1,5 +1,4 @@
|
|
1 |
base_model: mosaicml/mpt-7b
|
2 |
-
base_model_config: mosaicml/mpt-7b
|
3 |
tokenizer_type: AutoTokenizer
|
4 |
trust_remote_code: true # required for mpt as their model class is not merged into transformers yet
|
5 |
load_in_8bit: false
|
|
|
1 |
base_model: mosaicml/mpt-7b
|
|
|
2 |
tokenizer_type: AutoTokenizer
|
3 |
trust_remote_code: true # required for mpt as their model class is not merged into transformers yet
|
4 |
load_in_8bit: false
|
examples/openllama-3b/config.yml
CHANGED
@@ -1,5 +1,4 @@
|
|
1 |
base_model: openlm-research/open_llama_3b_v2
|
2 |
-
base_model_config: openlm-research/open_llama_3b_v2
|
3 |
model_type: LlamaForCausalLM
|
4 |
tokenizer_type: LlamaTokenizer
|
5 |
load_in_8bit: false
|
|
|
1 |
base_model: openlm-research/open_llama_3b_v2
|
|
|
2 |
model_type: LlamaForCausalLM
|
3 |
tokenizer_type: LlamaTokenizer
|
4 |
load_in_8bit: false
|
examples/openllama-3b/lora.yml
CHANGED
@@ -1,5 +1,4 @@
|
|
1 |
base_model: openlm-research/open_llama_3b_v2
|
2 |
-
base_model_config: openlm-research/open_llama_3b_v2
|
3 |
model_type: LlamaForCausalLM
|
4 |
tokenizer_type: LlamaTokenizer
|
5 |
load_in_8bit: true
|
|
|
1 |
base_model: openlm-research/open_llama_3b_v2
|
|
|
2 |
model_type: LlamaForCausalLM
|
3 |
tokenizer_type: LlamaTokenizer
|
4 |
load_in_8bit: true
|
examples/openllama-3b/qlora.yml
CHANGED
@@ -1,5 +1,4 @@
|
|
1 |
base_model: openlm-research/open_llama_3b_v2
|
2 |
-
base_model_config: openlm-research/open_llama_3b_v2
|
3 |
model_type: LlamaForCausalLM
|
4 |
tokenizer_type: LlamaTokenizer
|
5 |
load_in_8bit: false
|
|
|
1 |
base_model: openlm-research/open_llama_3b_v2
|
|
|
2 |
model_type: LlamaForCausalLM
|
3 |
tokenizer_type: LlamaTokenizer
|
4 |
load_in_8bit: false
|
examples/phi/phi-ft.yml
CHANGED
@@ -1,5 +1,4 @@
|
|
1 |
base_model: microsoft/phi-1_5
|
2 |
-
base_model_config: microsoft/phi-1_5
|
3 |
model_type: MixFormerSequentialForCausalLM
|
4 |
tokenizer_type: AutoTokenizer
|
5 |
is_llama_derived_model: false
|
|
|
1 |
base_model: microsoft/phi-1_5
|
|
|
2 |
model_type: MixFormerSequentialForCausalLM
|
3 |
tokenizer_type: AutoTokenizer
|
4 |
is_llama_derived_model: false
|
examples/phi/phi-qlora.yml
CHANGED
@@ -1,5 +1,4 @@
|
|
1 |
base_model: microsoft/phi-1_5
|
2 |
-
base_model_config: microsoft/phi-1_5
|
3 |
model_type: AutoModelForCausalLM
|
4 |
tokenizer_type: AutoTokenizer
|
5 |
is_llama_derived_model: false
|
|
|
1 |
base_model: microsoft/phi-1_5
|
|
|
2 |
model_type: AutoModelForCausalLM
|
3 |
tokenizer_type: AutoTokenizer
|
4 |
is_llama_derived_model: false
|
examples/pythia-12b/config.yml
CHANGED
@@ -1,5 +1,4 @@
|
|
1 |
base_model: EleutherAI/pythia-12b-deduped
|
2 |
-
base_model_config: EleutherAI/pythia-12b-deduped
|
3 |
base_model_ignore_patterns: pytorch* # prefer safetensors
|
4 |
model_type: GPTNeoXForCausalLM
|
5 |
tokenizer_type: AutoTokenizer
|
|
|
1 |
base_model: EleutherAI/pythia-12b-deduped
|
|
|
2 |
base_model_ignore_patterns: pytorch* # prefer safetensors
|
3 |
model_type: GPTNeoXForCausalLM
|
4 |
tokenizer_type: AutoTokenizer
|
examples/pythia/lora.yml
CHANGED
@@ -1,5 +1,4 @@
|
|
1 |
base_model: EleutherAI/pythia-1.4b-deduped
|
2 |
-
base_model_config: EleutherAI/pythia-1.4b-deduped
|
3 |
load_in_8bit: true
|
4 |
datasets:
|
5 |
- path: teknium/GPT4-LLM-Cleaned
|
|
|
1 |
base_model: EleutherAI/pythia-1.4b-deduped
|
|
|
2 |
load_in_8bit: true
|
3 |
datasets:
|
4 |
- path: teknium/GPT4-LLM-Cleaned
|
examples/redpajama/config-3b.yml
CHANGED
@@ -1,5 +1,4 @@
|
|
1 |
base_model: togethercomputer/RedPajama-INCITE-Chat-3B-v1
|
2 |
-
base_model_config: togethercomputer/RedPajama-INCITE-Chat-3B-v1
|
3 |
model_type: GPTNeoXForCausalLM
|
4 |
tokenizer_type: AutoTokenizer
|
5 |
trust_remote_code:
|
|
|
1 |
base_model: togethercomputer/RedPajama-INCITE-Chat-3B-v1
|
|
|
2 |
model_type: GPTNeoXForCausalLM
|
3 |
tokenizer_type: AutoTokenizer
|
4 |
trust_remote_code:
|
examples/replit-3b/config-lora.yml
CHANGED
@@ -1,5 +1,4 @@
|
|
1 |
base_model: replit/replit-code-v1-3b
|
2 |
-
base_model_config: replit/replit-code-v1-3b
|
3 |
trust_remote_code: true
|
4 |
load_in_8bit: false
|
5 |
datasets:
|
|
|
1 |
base_model: replit/replit-code-v1-3b
|
|
|
2 |
trust_remote_code: true
|
3 |
load_in_8bit: false
|
4 |
datasets:
|
examples/xgen-7b/xgen-7b-8k-qlora.yml
CHANGED
@@ -1,7 +1,6 @@
|
|
1 |
# An example finetuning Saleforce's XGen-7b model with 8k context using qlora
|
2 |
# on Tim Dettmer's Guanaco dataset.
|
3 |
base_model: Salesforce/xgen-7b-8k-base
|
4 |
-
base_model_config: Salesforce/xgen-7b-8k-base
|
5 |
trust_remote_code: true
|
6 |
model_type: AutoModelForCausalLM
|
7 |
tokenizer_type: AutoTokenizer
|
|
|
1 |
# An example finetuning Saleforce's XGen-7b model with 8k context using qlora
|
2 |
# on Tim Dettmer's Guanaco dataset.
|
3 |
base_model: Salesforce/xgen-7b-8k-base
|
|
|
4 |
trust_remote_code: true
|
5 |
model_type: AutoModelForCausalLM
|
6 |
tokenizer_type: AutoTokenizer
|
src/axolotl/utils/config.py
CHANGED
@@ -79,6 +79,9 @@ def normalize_config(cfg):
|
|
79 |
|
80 |
cfg.dataset_processes = cfg.dataset_processes or os.cpu_count()
|
81 |
|
|
|
|
|
|
|
82 |
model_config = load_model_config(cfg)
|
83 |
cfg.model_config_type = model_config.model_type
|
84 |
|
|
|
79 |
|
80 |
cfg.dataset_processes = cfg.dataset_processes or os.cpu_count()
|
81 |
|
82 |
+
if not cfg.base_model_config:
|
83 |
+
cfg.base_model_config = cfg.base_model
|
84 |
+
|
85 |
model_config = load_model_config(cfg)
|
86 |
cfg.model_config_type = model_config.model_type
|
87 |
|
tests/e2e/test_fused_llama.py
CHANGED
@@ -31,7 +31,6 @@ class TestFusedLlama(unittest.TestCase):
|
|
31 |
cfg = DictDefault(
|
32 |
{
|
33 |
"base_model": "JackFram/llama-68m",
|
34 |
-
"base_model_config": "JackFram/llama-68m",
|
35 |
"flash_attention": True,
|
36 |
"flash_attn_fuse_qkv": True,
|
37 |
"flash_attn_fuse_mlp": True,
|
|
|
31 |
cfg = DictDefault(
|
32 |
{
|
33 |
"base_model": "JackFram/llama-68m",
|
|
|
34 |
"flash_attention": True,
|
35 |
"flash_attn_fuse_qkv": True,
|
36 |
"flash_attn_fuse_mlp": True,
|
tests/e2e/test_lora_llama.py
CHANGED
@@ -29,7 +29,6 @@ class TestLoraLlama(unittest.TestCase):
|
|
29 |
cfg = DictDefault(
|
30 |
{
|
31 |
"base_model": "JackFram/llama-68m",
|
32 |
-
"base_model_config": "JackFram/llama-68m",
|
33 |
"tokenizer_type": "LlamaTokenizer",
|
34 |
"sequence_len": 1024,
|
35 |
"load_in_8bit": True,
|
@@ -72,7 +71,6 @@ class TestLoraLlama(unittest.TestCase):
|
|
72 |
cfg = DictDefault(
|
73 |
{
|
74 |
"base_model": "JackFram/llama-68m",
|
75 |
-
"base_model_config": "JackFram/llama-68m",
|
76 |
"tokenizer_type": "LlamaTokenizer",
|
77 |
"sequence_len": 1024,
|
78 |
"sample_packing": True,
|
@@ -117,7 +115,6 @@ class TestLoraLlama(unittest.TestCase):
|
|
117 |
cfg = DictDefault(
|
118 |
{
|
119 |
"base_model": "TheBlokeAI/jackfram_llama-68m-GPTQ",
|
120 |
-
"base_model_config": "TheBlokeAI/jackfram_llama-68m-GPTQ",
|
121 |
"model_type": "AutoModelForCausalLM",
|
122 |
"tokenizer_type": "LlamaTokenizer",
|
123 |
"sequence_len": 1024,
|
|
|
29 |
cfg = DictDefault(
|
30 |
{
|
31 |
"base_model": "JackFram/llama-68m",
|
|
|
32 |
"tokenizer_type": "LlamaTokenizer",
|
33 |
"sequence_len": 1024,
|
34 |
"load_in_8bit": True,
|
|
|
71 |
cfg = DictDefault(
|
72 |
{
|
73 |
"base_model": "JackFram/llama-68m",
|
|
|
74 |
"tokenizer_type": "LlamaTokenizer",
|
75 |
"sequence_len": 1024,
|
76 |
"sample_packing": True,
|
|
|
115 |
cfg = DictDefault(
|
116 |
{
|
117 |
"base_model": "TheBlokeAI/jackfram_llama-68m-GPTQ",
|
|
|
118 |
"model_type": "AutoModelForCausalLM",
|
119 |
"tokenizer_type": "LlamaTokenizer",
|
120 |
"sequence_len": 1024,
|
tests/e2e/test_mistral.py
CHANGED
@@ -31,7 +31,6 @@ class TestMistral(unittest.TestCase):
|
|
31 |
cfg = DictDefault(
|
32 |
{
|
33 |
"base_model": "openaccess-ai-collective/tiny-mistral",
|
34 |
-
"base_model_config": "openaccess-ai-collective/tiny-mistral",
|
35 |
"flash_attention": True,
|
36 |
"sequence_len": 1024,
|
37 |
"load_in_8bit": True,
|
@@ -77,7 +76,6 @@ class TestMistral(unittest.TestCase):
|
|
77 |
cfg = DictDefault(
|
78 |
{
|
79 |
"base_model": "openaccess-ai-collective/tiny-mistral",
|
80 |
-
"base_model_config": "openaccess-ai-collective/tiny-mistral",
|
81 |
"flash_attention": True,
|
82 |
"sequence_len": 1024,
|
83 |
"val_set_size": 0.1,
|
|
|
31 |
cfg = DictDefault(
|
32 |
{
|
33 |
"base_model": "openaccess-ai-collective/tiny-mistral",
|
|
|
34 |
"flash_attention": True,
|
35 |
"sequence_len": 1024,
|
36 |
"load_in_8bit": True,
|
|
|
76 |
cfg = DictDefault(
|
77 |
{
|
78 |
"base_model": "openaccess-ai-collective/tiny-mistral",
|
|
|
79 |
"flash_attention": True,
|
80 |
"sequence_len": 1024,
|
81 |
"val_set_size": 0.1,
|
tests/e2e/test_mistral_samplepack.py
CHANGED
@@ -31,7 +31,6 @@ class TestMistral(unittest.TestCase):
|
|
31 |
cfg = DictDefault(
|
32 |
{
|
33 |
"base_model": "openaccess-ai-collective/tiny-mistral",
|
34 |
-
"base_model_config": "openaccess-ai-collective/tiny-mistral",
|
35 |
"flash_attention": True,
|
36 |
"sample_packing": True,
|
37 |
"sequence_len": 1024,
|
@@ -78,7 +77,6 @@ class TestMistral(unittest.TestCase):
|
|
78 |
cfg = DictDefault(
|
79 |
{
|
80 |
"base_model": "openaccess-ai-collective/tiny-mistral",
|
81 |
-
"base_model_config": "openaccess-ai-collective/tiny-mistral",
|
82 |
"flash_attention": True,
|
83 |
"sample_packing": True,
|
84 |
"sequence_len": 1024,
|
|
|
31 |
cfg = DictDefault(
|
32 |
{
|
33 |
"base_model": "openaccess-ai-collective/tiny-mistral",
|
|
|
34 |
"flash_attention": True,
|
35 |
"sample_packing": True,
|
36 |
"sequence_len": 1024,
|
|
|
77 |
cfg = DictDefault(
|
78 |
{
|
79 |
"base_model": "openaccess-ai-collective/tiny-mistral",
|
|
|
80 |
"flash_attention": True,
|
81 |
"sample_packing": True,
|
82 |
"sequence_len": 1024,
|
tests/e2e/test_phi.py
CHANGED
@@ -27,7 +27,6 @@ class TestPhi(unittest.TestCase):
|
|
27 |
cfg = DictDefault(
|
28 |
{
|
29 |
"base_model": "microsoft/phi-1_5",
|
30 |
-
"base_model_config": "microsoft/phi-1_5",
|
31 |
"trust_remote_code": True,
|
32 |
"model_type": "MixFormerSequentialForCausalLM",
|
33 |
"tokenizer_type": "AutoTokenizer",
|
@@ -71,7 +70,6 @@ class TestPhi(unittest.TestCase):
|
|
71 |
cfg = DictDefault(
|
72 |
{
|
73 |
"base_model": "microsoft/phi-1_5",
|
74 |
-
"base_model_config": "microsoft/phi-1_5",
|
75 |
"trust_remote_code": True,
|
76 |
"model_type": "MixFormerSequentialForCausalLM",
|
77 |
"tokenizer_type": "AutoTokenizer",
|
|
|
27 |
cfg = DictDefault(
|
28 |
{
|
29 |
"base_model": "microsoft/phi-1_5",
|
|
|
30 |
"trust_remote_code": True,
|
31 |
"model_type": "MixFormerSequentialForCausalLM",
|
32 |
"tokenizer_type": "AutoTokenizer",
|
|
|
70 |
cfg = DictDefault(
|
71 |
{
|
72 |
"base_model": "microsoft/phi-1_5",
|
|
|
73 |
"trust_remote_code": True,
|
74 |
"model_type": "MixFormerSequentialForCausalLM",
|
75 |
"tokenizer_type": "AutoTokenizer",
|
tests/test_normalize_config.py
CHANGED
@@ -37,3 +37,10 @@ class NormalizeConfigTestCase(unittest.TestCase):
|
|
37 |
normalize_config(cfg)
|
38 |
|
39 |
assert cfg.learning_rate == 0.00005
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
normalize_config(cfg)
|
38 |
|
39 |
assert cfg.learning_rate == 0.00005
|
40 |
+
|
41 |
+
def test_base_model_config_set_when_empty(self):
|
42 |
+
cfg = self._get_base_cfg()
|
43 |
+
del cfg.base_model_config
|
44 |
+
normalize_config(cfg)
|
45 |
+
|
46 |
+
assert cfg.base_model_config == cfg.base_model
|