Spaces:

Dovakiins
/

qwerrwe

Build error

App Files Files Community

winglian commited on Oct 23, 2023

Commit

2d8def6

unverified ·

1 Parent(s): 44c9d01

simplify by removing duplicate base_model_config (#772)

Browse files

Files changed (39) hide show

examples/cerebras/btlm-ft.yml +0 -1
examples/cerebras/qlora.yml +0 -1
examples/code-llama/13b/lora.yml +0 -1
examples/code-llama/13b/qlora.yml +0 -1
examples/code-llama/34b/lora.yml +0 -1
examples/code-llama/34b/qlora.yml +0 -1
examples/code-llama/7b/lora.yml +0 -1
examples/code-llama/7b/qlora.yml +0 -1
examples/falcon/config-7b-lora.yml +0 -1
examples/falcon/config-7b-qlora.yml +0 -1
examples/falcon/config-7b.yml +0 -1
examples/gptj/qlora.yml +0 -1
examples/jeopardy-bot/config.yml +0 -1
examples/llama-2/fft_optimized.yml +0 -1
examples/llama-2/gptq-lora.yml +0 -1
examples/llama-2/lora.yml +0 -1
examples/llama-2/qlora.yml +0 -1
examples/llama-2/relora.yml +0 -1
examples/llama-2/tiny-llama.yml +0 -1
examples/mistral/config.yml +0 -1
examples/mistral/qlora.yml +0 -1
examples/mpt-7b/config.yml +0 -1
examples/openllama-3b/config.yml +0 -1
examples/openllama-3b/lora.yml +0 -1
examples/openllama-3b/qlora.yml +0 -1
examples/phi/phi-ft.yml +0 -1
examples/phi/phi-qlora.yml +0 -1
examples/pythia-12b/config.yml +0 -1
examples/pythia/lora.yml +0 -1
examples/redpajama/config-3b.yml +0 -1
examples/replit-3b/config-lora.yml +0 -1
examples/xgen-7b/xgen-7b-8k-qlora.yml +0 -1
src/axolotl/utils/config.py +3 -0
tests/e2e/test_fused_llama.py +0 -1
tests/e2e/test_lora_llama.py +0 -3
tests/e2e/test_mistral.py +0 -2
tests/e2e/test_mistral_samplepack.py +0 -2
tests/e2e/test_phi.py +0 -2
tests/test_normalize_config.py +7 -0

examples/cerebras/btlm-ft.yml CHANGED Viewed

@@ -1,5 +1,4 @@
 base_model: cerebras/btlm-3b-8k-base
-base_model_config: cerebras/btlm-3b-8k-base
 model_type: AutoModelForCausalLM
 tokenizer_type: GPT2Tokenizer
 trust_remote_code: true

 base_model: cerebras/btlm-3b-8k-base
 model_type: AutoModelForCausalLM
 tokenizer_type: GPT2Tokenizer
 trust_remote_code: true

examples/cerebras/qlora.yml CHANGED Viewed

@@ -1,5 +1,4 @@
 base_model: cerebras/Cerebras-GPT-1.3B
-base_model_config: cerebras/Cerebras-GPT-1.3B
 load_in_8bit: false
 load_in_4bit: true
 strict: false

 base_model: cerebras/Cerebras-GPT-1.3B
 load_in_8bit: false
 load_in_4bit: true
 strict: false

examples/code-llama/13b/lora.yml CHANGED Viewed

@@ -1,5 +1,4 @@
 base_model: codellama/CodeLlama-13b-hf
-base_model_config: codellama/CodeLlama-13b-hf
 model_type: LlamaForCausalLM
 tokenizer_type: CodeLlamaTokenizer
 is_llama_derived_model: true

 base_model: codellama/CodeLlama-13b-hf
 model_type: LlamaForCausalLM
 tokenizer_type: CodeLlamaTokenizer
 is_llama_derived_model: true

examples/code-llama/13b/qlora.yml CHANGED Viewed

@@ -1,5 +1,4 @@
 base_model: codellama/CodeLlama-13b-hf
-base_model_config: codellama/CodeLlama-13b-hf
 model_type: LlamaForCausalLM
 tokenizer_type: CodeLlamaTokenizer
 is_llama_derived_model: true

 base_model: codellama/CodeLlama-13b-hf
 model_type: LlamaForCausalLM
 tokenizer_type: CodeLlamaTokenizer
 is_llama_derived_model: true

examples/code-llama/34b/lora.yml CHANGED Viewed

@@ -1,5 +1,4 @@
 base_model: codellama/CodeLlama-34b-hf
-base_model_config: codellama/CodeLlama-34b-hf
 model_type: LlamaForCausalLM
 tokenizer_type: CodeLlamaTokenizer
 is_llama_derived_model: true

 base_model: codellama/CodeLlama-34b-hf
 model_type: LlamaForCausalLM
 tokenizer_type: CodeLlamaTokenizer
 is_llama_derived_model: true

examples/code-llama/34b/qlora.yml CHANGED Viewed

@@ -1,5 +1,4 @@
 base_model: codellama/CodeLlama-34b-hf
-base_model_config: codellama/CodeLlama-34b-hf
 model_type: LlamaForCausalLM
 tokenizer_type: CodeLlamaTokenizer
 is_llama_derived_model: true

 base_model: codellama/CodeLlama-34b-hf
 model_type: LlamaForCausalLM
 tokenizer_type: CodeLlamaTokenizer
 is_llama_derived_model: true

examples/code-llama/7b/lora.yml CHANGED Viewed

@@ -1,5 +1,4 @@
 base_model: codellama/CodeLlama-7b-hf
-base_model_config: codellama/CodeLlama-7b-hf
 model_type: LlamaForCausalLM
 tokenizer_type: CodeLlamaTokenizer
 is_llama_derived_model: true

 base_model: codellama/CodeLlama-7b-hf
 model_type: LlamaForCausalLM
 tokenizer_type: CodeLlamaTokenizer
 is_llama_derived_model: true

examples/code-llama/7b/qlora.yml CHANGED Viewed

@@ -1,5 +1,4 @@
 base_model: codellama/CodeLlama-7b-hf
-base_model_config: codellama/CodeLlama-7b-hf
 model_type: LlamaForCausalLM
 tokenizer_type: CodeLlamaTokenizer
 is_llama_derived_model: true

 base_model: codellama/CodeLlama-7b-hf
 model_type: LlamaForCausalLM
 tokenizer_type: CodeLlamaTokenizer
 is_llama_derived_model: true

examples/falcon/config-7b-lora.yml CHANGED Viewed

@@ -1,5 +1,4 @@
 base_model: tiiuae/falcon-7b
-base_model_config: tiiuae/falcon-7b
 trust_remote_code: true
 model_type: AutoModelForCausalLM
 tokenizer_type: AutoTokenizer

 base_model: tiiuae/falcon-7b
 trust_remote_code: true
 model_type: AutoModelForCausalLM
 tokenizer_type: AutoTokenizer

examples/falcon/config-7b-qlora.yml CHANGED Viewed

@@ -1,7 +1,6 @@
 # 1b: tiiuae/falcon-rw-1b
 # 40b: tiiuae/falcon-40b
 base_model: tiiuae/falcon-7b
-base_model_config: tiiuae/falcon-7b
 # required by falcon custom model code: https://huggingface.co/tiiuae/falcon-7b/tree/main
 trust_remote_code: true
 model_type: AutoModelForCausalLM

 # 1b: tiiuae/falcon-rw-1b
 # 40b: tiiuae/falcon-40b
 base_model: tiiuae/falcon-7b
 # required by falcon custom model code: https://huggingface.co/tiiuae/falcon-7b/tree/main
 trust_remote_code: true
 model_type: AutoModelForCausalLM

examples/falcon/config-7b.yml CHANGED Viewed

@@ -1,5 +1,4 @@
 base_model: tiiuae/falcon-7b
-base_model_config: tiiuae/falcon-7b
 trust_remote_code: true
 model_type: AutoModelForCausalLM
 tokenizer_type: AutoTokenizer

 base_model: tiiuae/falcon-7b
 trust_remote_code: true
 model_type: AutoModelForCausalLM
 tokenizer_type: AutoTokenizer

examples/gptj/qlora.yml CHANGED Viewed

@@ -1,5 +1,4 @@
 base_model: EleutherAI/gpt-j-6b
-base_model_config: EleutherAI/gpt-j-6b
 load_in_8bit: false
 load_in_4bit: true
 strict: false

 base_model: EleutherAI/gpt-j-6b
 load_in_8bit: false
 load_in_4bit: true
 strict: false

examples/jeopardy-bot/config.yml CHANGED Viewed

@@ -1,5 +1,4 @@
 base_model: huggyllama/llama-7b
-base_model_config: huggyllama/llama-7b
 model_type: LlamaForCausalLM
 tokenizer_type: LlamaTokenizer
 load_in_8bit: false

 base_model: huggyllama/llama-7b
 model_type: LlamaForCausalLM
 tokenizer_type: LlamaTokenizer
 load_in_8bit: false

examples/llama-2/fft_optimized.yml CHANGED Viewed

@@ -1,5 +1,4 @@
 base_model: NousResearch/Llama-2-7b-hf
-base_model_config: NousResearch/Llama-2-7b-hf
 model_type: LlamaForCausalLM
 tokenizer_type: LlamaTokenizer
 is_llama_derived_model: true

 base_model: NousResearch/Llama-2-7b-hf
 model_type: LlamaForCausalLM
 tokenizer_type: LlamaTokenizer
 is_llama_derived_model: true

examples/llama-2/gptq-lora.yml CHANGED Viewed

@@ -1,5 +1,4 @@
 base_model: TheBloke/Llama-2-7B-GPTQ
-base_model_config: TheBloke/Llama-2-7B-GPTQ
 is_llama_derived_model: false
 gptq: true
 gptq_disable_exllama: true

 base_model: TheBloke/Llama-2-7B-GPTQ
 is_llama_derived_model: false
 gptq: true
 gptq_disable_exllama: true

examples/llama-2/lora.yml CHANGED Viewed

@@ -1,5 +1,4 @@
 base_model: NousResearch/Llama-2-7b-hf
-base_model_config: NousResearch/Llama-2-7b-hf
 model_type: LlamaForCausalLM
 tokenizer_type: LlamaTokenizer
 is_llama_derived_model: true

 base_model: NousResearch/Llama-2-7b-hf
 model_type: LlamaForCausalLM
 tokenizer_type: LlamaTokenizer
 is_llama_derived_model: true

examples/llama-2/qlora.yml CHANGED Viewed

@@ -1,5 +1,4 @@
 base_model: NousResearch/Llama-2-7b-hf
-base_model_config: NousResearch/Llama-2-7b-hf
 model_type: LlamaForCausalLM
 tokenizer_type: LlamaTokenizer
 is_llama_derived_model: true

 base_model: NousResearch/Llama-2-7b-hf
 model_type: LlamaForCausalLM
 tokenizer_type: LlamaTokenizer
 is_llama_derived_model: true

examples/llama-2/relora.yml CHANGED Viewed

@@ -1,5 +1,4 @@
 base_model: NousResearch/Llama-2-7b-hf
-base_model_config: NousResearch/Llama-2-7b-hf
 model_type: LlamaForCausalLM
 tokenizer_type: LlamaTokenizer
 is_llama_derived_model: true

 base_model: NousResearch/Llama-2-7b-hf
 model_type: LlamaForCausalLM
 tokenizer_type: LlamaTokenizer
 is_llama_derived_model: true

examples/llama-2/tiny-llama.yml CHANGED Viewed

@@ -1,5 +1,4 @@
 base_model: PY007/TinyLlama-1.1B-step-50K-105b
-base_model_config: PY007/TinyLlama-1.1B-step-50K-105b
 model_type: LlamaForCausalLM
 tokenizer_type: LlamaTokenizer

 base_model: PY007/TinyLlama-1.1B-step-50K-105b
 model_type: LlamaForCausalLM
 tokenizer_type: LlamaTokenizer

examples/mistral/config.yml CHANGED Viewed

@@ -1,5 +1,4 @@
 base_model: mistralai/Mistral-7B-v0.1
-base_model_config: mistralai/Mistral-7B-v0.1
 model_type: MistralForCausalLM
 tokenizer_type: LlamaTokenizer
 is_mistral_derived_model: true

 base_model: mistralai/Mistral-7B-v0.1
 model_type: MistralForCausalLM
 tokenizer_type: LlamaTokenizer
 is_mistral_derived_model: true

examples/mistral/qlora.yml CHANGED Viewed

@@ -1,5 +1,4 @@
 base_model: mistralai/Mistral-7B-v0.1
-base_model_config: mistralai/Mistral-7B-v0.1
 model_type: MistralForCausalLM
 tokenizer_type: LlamaTokenizer
 is_mistral_derived_model: true

 base_model: mistralai/Mistral-7B-v0.1
 model_type: MistralForCausalLM
 tokenizer_type: LlamaTokenizer
 is_mistral_derived_model: true

examples/mpt-7b/config.yml CHANGED Viewed

@@ -1,5 +1,4 @@
 base_model: mosaicml/mpt-7b
-base_model_config: mosaicml/mpt-7b
 tokenizer_type: AutoTokenizer
 trust_remote_code: true  # required for mpt as their model class is not merged into transformers yet
 load_in_8bit: false

 base_model: mosaicml/mpt-7b
 tokenizer_type: AutoTokenizer
 trust_remote_code: true  # required for mpt as their model class is not merged into transformers yet
 load_in_8bit: false

examples/openllama-3b/config.yml CHANGED Viewed

@@ -1,5 +1,4 @@
 base_model: openlm-research/open_llama_3b_v2
-base_model_config: openlm-research/open_llama_3b_v2
 model_type: LlamaForCausalLM
 tokenizer_type: LlamaTokenizer
 load_in_8bit: false

 base_model: openlm-research/open_llama_3b_v2
 model_type: LlamaForCausalLM
 tokenizer_type: LlamaTokenizer
 load_in_8bit: false

examples/openllama-3b/lora.yml CHANGED Viewed

@@ -1,5 +1,4 @@
 base_model: openlm-research/open_llama_3b_v2
-base_model_config: openlm-research/open_llama_3b_v2
 model_type: LlamaForCausalLM
 tokenizer_type: LlamaTokenizer
 load_in_8bit: true

 base_model: openlm-research/open_llama_3b_v2
 model_type: LlamaForCausalLM
 tokenizer_type: LlamaTokenizer
 load_in_8bit: true

examples/openllama-3b/qlora.yml CHANGED Viewed

@@ -1,5 +1,4 @@
 base_model: openlm-research/open_llama_3b_v2
-base_model_config: openlm-research/open_llama_3b_v2
 model_type: LlamaForCausalLM
 tokenizer_type: LlamaTokenizer
 load_in_8bit: false

 base_model: openlm-research/open_llama_3b_v2
 model_type: LlamaForCausalLM
 tokenizer_type: LlamaTokenizer
 load_in_8bit: false

examples/phi/phi-ft.yml CHANGED Viewed

@@ -1,5 +1,4 @@
 base_model: microsoft/phi-1_5
-base_model_config: microsoft/phi-1_5
 model_type: MixFormerSequentialForCausalLM
 tokenizer_type: AutoTokenizer
 is_llama_derived_model: false

 base_model: microsoft/phi-1_5
 model_type: MixFormerSequentialForCausalLM
 tokenizer_type: AutoTokenizer
 is_llama_derived_model: false

examples/phi/phi-qlora.yml CHANGED Viewed

@@ -1,5 +1,4 @@
 base_model: microsoft/phi-1_5
-base_model_config: microsoft/phi-1_5
 model_type: AutoModelForCausalLM
 tokenizer_type: AutoTokenizer
 is_llama_derived_model: false

 base_model: microsoft/phi-1_5
 model_type: AutoModelForCausalLM
 tokenizer_type: AutoTokenizer
 is_llama_derived_model: false

examples/pythia-12b/config.yml CHANGED Viewed

@@ -1,5 +1,4 @@
 base_model: EleutherAI/pythia-12b-deduped
-base_model_config: EleutherAI/pythia-12b-deduped
 base_model_ignore_patterns: pytorch*  # prefer safetensors
 model_type: GPTNeoXForCausalLM
 tokenizer_type: AutoTokenizer

 base_model: EleutherAI/pythia-12b-deduped
 base_model_ignore_patterns: pytorch*  # prefer safetensors
 model_type: GPTNeoXForCausalLM
 tokenizer_type: AutoTokenizer

examples/pythia/lora.yml CHANGED Viewed

@@ -1,5 +1,4 @@
 base_model: EleutherAI/pythia-1.4b-deduped
-base_model_config: EleutherAI/pythia-1.4b-deduped
 load_in_8bit: true
 datasets:
   - path: teknium/GPT4-LLM-Cleaned

 base_model: EleutherAI/pythia-1.4b-deduped
 load_in_8bit: true
 datasets:
   - path: teknium/GPT4-LLM-Cleaned

examples/redpajama/config-3b.yml CHANGED Viewed

@@ -1,5 +1,4 @@
 base_model: togethercomputer/RedPajama-INCITE-Chat-3B-v1
-base_model_config: togethercomputer/RedPajama-INCITE-Chat-3B-v1
 model_type: GPTNeoXForCausalLM
 tokenizer_type: AutoTokenizer
 trust_remote_code:

 base_model: togethercomputer/RedPajama-INCITE-Chat-3B-v1
 model_type: GPTNeoXForCausalLM
 tokenizer_type: AutoTokenizer
 trust_remote_code:

examples/replit-3b/config-lora.yml CHANGED Viewed

@@ -1,5 +1,4 @@
 base_model: replit/replit-code-v1-3b
-base_model_config: replit/replit-code-v1-3b
 trust_remote_code: true
 load_in_8bit: false
 datasets:

 base_model: replit/replit-code-v1-3b
 trust_remote_code: true
 load_in_8bit: false
 datasets:

examples/xgen-7b/xgen-7b-8k-qlora.yml CHANGED Viewed

@@ -1,7 +1,6 @@
 # An example finetuning Saleforce's XGen-7b model with 8k context using qlora
 # on Tim Dettmer's Guanaco dataset.
 base_model: Salesforce/xgen-7b-8k-base
-base_model_config: Salesforce/xgen-7b-8k-base
 trust_remote_code: true
 model_type: AutoModelForCausalLM
 tokenizer_type: AutoTokenizer

 # An example finetuning Saleforce's XGen-7b model with 8k context using qlora
 # on Tim Dettmer's Guanaco dataset.
 base_model: Salesforce/xgen-7b-8k-base
 trust_remote_code: true
 model_type: AutoModelForCausalLM
 tokenizer_type: AutoTokenizer

src/axolotl/utils/config.py CHANGED Viewed

@@ -79,6 +79,9 @@ def normalize_config(cfg):
     cfg.dataset_processes = cfg.dataset_processes or os.cpu_count()
     model_config = load_model_config(cfg)
     cfg.model_config_type = model_config.model_type

     cfg.dataset_processes = cfg.dataset_processes or os.cpu_count()
+    if not cfg.base_model_config:
+        cfg.base_model_config = cfg.base_model
     model_config = load_model_config(cfg)
     cfg.model_config_type = model_config.model_type

tests/e2e/test_fused_llama.py CHANGED Viewed

@@ -31,7 +31,6 @@ class TestFusedLlama(unittest.TestCase):
         cfg = DictDefault(
             {
                 "base_model": "JackFram/llama-68m",
-                "base_model_config": "JackFram/llama-68m",
                 "flash_attention": True,
                 "flash_attn_fuse_qkv": True,
                 "flash_attn_fuse_mlp": True,

         cfg = DictDefault(
             {
                 "base_model": "JackFram/llama-68m",
                 "flash_attention": True,
                 "flash_attn_fuse_qkv": True,
                 "flash_attn_fuse_mlp": True,

tests/e2e/test_lora_llama.py CHANGED Viewed

@@ -29,7 +29,6 @@ class TestLoraLlama(unittest.TestCase):
         cfg = DictDefault(
             {
                 "base_model": "JackFram/llama-68m",
-                "base_model_config": "JackFram/llama-68m",
                 "tokenizer_type": "LlamaTokenizer",
                 "sequence_len": 1024,
                 "load_in_8bit": True,
@@ -72,7 +71,6 @@ class TestLoraLlama(unittest.TestCase):
         cfg = DictDefault(
             {
                 "base_model": "JackFram/llama-68m",
-                "base_model_config": "JackFram/llama-68m",
                 "tokenizer_type": "LlamaTokenizer",
                 "sequence_len": 1024,
                 "sample_packing": True,
@@ -117,7 +115,6 @@ class TestLoraLlama(unittest.TestCase):
         cfg = DictDefault(
             {
                 "base_model": "TheBlokeAI/jackfram_llama-68m-GPTQ",
-                "base_model_config": "TheBlokeAI/jackfram_llama-68m-GPTQ",
                 "model_type": "AutoModelForCausalLM",
                 "tokenizer_type": "LlamaTokenizer",
                 "sequence_len": 1024,

         cfg = DictDefault(
             {
                 "base_model": "JackFram/llama-68m",
                 "tokenizer_type": "LlamaTokenizer",
                 "sequence_len": 1024,
                 "load_in_8bit": True,
         cfg = DictDefault(
             {
                 "base_model": "JackFram/llama-68m",
                 "tokenizer_type": "LlamaTokenizer",
                 "sequence_len": 1024,
                 "sample_packing": True,
         cfg = DictDefault(
             {
                 "base_model": "TheBlokeAI/jackfram_llama-68m-GPTQ",
                 "model_type": "AutoModelForCausalLM",
                 "tokenizer_type": "LlamaTokenizer",
                 "sequence_len": 1024,

tests/e2e/test_mistral.py CHANGED Viewed

@@ -31,7 +31,6 @@ class TestMistral(unittest.TestCase):
         cfg = DictDefault(
             {
                 "base_model": "openaccess-ai-collective/tiny-mistral",
-                "base_model_config": "openaccess-ai-collective/tiny-mistral",
                 "flash_attention": True,
                 "sequence_len": 1024,
                 "load_in_8bit": True,
@@ -77,7 +76,6 @@ class TestMistral(unittest.TestCase):
         cfg = DictDefault(
             {
                 "base_model": "openaccess-ai-collective/tiny-mistral",
-                "base_model_config": "openaccess-ai-collective/tiny-mistral",
                 "flash_attention": True,
                 "sequence_len": 1024,
                 "val_set_size": 0.1,

         cfg = DictDefault(
             {
                 "base_model": "openaccess-ai-collective/tiny-mistral",
                 "flash_attention": True,
                 "sequence_len": 1024,
                 "load_in_8bit": True,
         cfg = DictDefault(
             {
                 "base_model": "openaccess-ai-collective/tiny-mistral",
                 "flash_attention": True,
                 "sequence_len": 1024,
                 "val_set_size": 0.1,

tests/e2e/test_mistral_samplepack.py CHANGED Viewed

@@ -31,7 +31,6 @@ class TestMistral(unittest.TestCase):
         cfg = DictDefault(
             {
                 "base_model": "openaccess-ai-collective/tiny-mistral",
-                "base_model_config": "openaccess-ai-collective/tiny-mistral",
                 "flash_attention": True,
                 "sample_packing": True,
                 "sequence_len": 1024,
@@ -78,7 +77,6 @@ class TestMistral(unittest.TestCase):
         cfg = DictDefault(
             {
                 "base_model": "openaccess-ai-collective/tiny-mistral",
-                "base_model_config": "openaccess-ai-collective/tiny-mistral",
                 "flash_attention": True,
                 "sample_packing": True,
                 "sequence_len": 1024,

         cfg = DictDefault(
             {
                 "base_model": "openaccess-ai-collective/tiny-mistral",
                 "flash_attention": True,
                 "sample_packing": True,
                 "sequence_len": 1024,
         cfg = DictDefault(
             {
                 "base_model": "openaccess-ai-collective/tiny-mistral",
                 "flash_attention": True,
                 "sample_packing": True,
                 "sequence_len": 1024,

tests/e2e/test_phi.py CHANGED Viewed

@@ -27,7 +27,6 @@ class TestPhi(unittest.TestCase):
         cfg = DictDefault(
             {
                 "base_model": "microsoft/phi-1_5",
-                "base_model_config": "microsoft/phi-1_5",
                 "trust_remote_code": True,
                 "model_type": "MixFormerSequentialForCausalLM",
                 "tokenizer_type": "AutoTokenizer",
@@ -71,7 +70,6 @@ class TestPhi(unittest.TestCase):
         cfg = DictDefault(
             {
                 "base_model": "microsoft/phi-1_5",
-                "base_model_config": "microsoft/phi-1_5",
                 "trust_remote_code": True,
                 "model_type": "MixFormerSequentialForCausalLM",
                 "tokenizer_type": "AutoTokenizer",

         cfg = DictDefault(
             {
                 "base_model": "microsoft/phi-1_5",
                 "trust_remote_code": True,
                 "model_type": "MixFormerSequentialForCausalLM",
                 "tokenizer_type": "AutoTokenizer",
         cfg = DictDefault(
             {
                 "base_model": "microsoft/phi-1_5",
                 "trust_remote_code": True,
                 "model_type": "MixFormerSequentialForCausalLM",
                 "tokenizer_type": "AutoTokenizer",

tests/test_normalize_config.py CHANGED Viewed

@@ -37,3 +37,10 @@ class NormalizeConfigTestCase(unittest.TestCase):
         normalize_config(cfg)
         assert cfg.learning_rate == 0.00005

         normalize_config(cfg)
         assert cfg.learning_rate == 0.00005
+    def test_base_model_config_set_when_empty(self):
+        cfg = self._get_base_cfg()
+        del cfg.base_model_config
+        normalize_config(cfg)
+        assert cfg.base_model_config == cfg.base_model