Nanobit commited on
Commit
8bd7a49
1 Parent(s): 18d41ce

Refactor to use DictDefault instead

Browse files
scripts/finetune.py CHANGED
@@ -10,11 +10,11 @@ from typing import Optional, List, Dict, Any, Union
10
  import fire
11
  import torch
12
  import yaml
13
- from addict import Dict
14
 
15
  # add src to the pythonpath so we don't need to pip install this
16
  from axolotl.utils.tokenization import check_dataset_labels
17
  from axolotl.utils.validation import validate_config
 
18
 
19
  project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
20
  src_dir = os.path.join(project_root, "src")
@@ -83,7 +83,7 @@ def do_inference(cfg, model, tokenizer, prompter="AlpacaPrompter"):
83
  temperature=0.9,
84
  top_p=0.95,
85
  top_k=40,
86
- return_dict_in_generate=True,
87
  output_attentions=False,
88
  output_hidden_states=False,
89
  output_scores=False,
@@ -131,7 +131,7 @@ def train(
131
 
132
  # load the config from the yaml file
133
  with open(config, "r") as f:
134
- cfg: Dict = Dict(yaml.load(f, Loader=yaml.Loader))
135
  # if there are any options passed in the cli, if it is something that seems valid from the yaml,
136
  # then overwrite the value
137
  cfg_keys = cfg.keys()
 
10
  import fire
11
  import torch
12
  import yaml
 
13
 
14
  # add src to the pythonpath so we don't need to pip install this
15
  from axolotl.utils.tokenization import check_dataset_labels
16
  from axolotl.utils.validation import validate_config
17
+ from axolotl.utils.dict import DictDefault
18
 
19
  project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
20
  src_dir = os.path.join(project_root, "src")
 
83
  temperature=0.9,
84
  top_p=0.95,
85
  top_k=40,
86
+ return_DictDefault_in_generate=True,
87
  output_attentions=False,
88
  output_hidden_states=False,
89
  output_scores=False,
 
131
 
132
  # load the config from the yaml file
133
  with open(config, "r") as f:
134
+ cfg: DictDefault = DictDefault(yaml.load(f, Loader=yaml.Loader))
135
  # if there are any options passed in the cli, if it is something that seems valid from the yaml,
136
  # then overwrite the value
137
  cfg_keys = cfg.keys()
src/axolotl/utils/models.py CHANGED
@@ -29,7 +29,7 @@ from axolotl.prompt_tokenizers import LLAMA_DEFAULT_PAD_TOKEN
29
 
30
  if TYPE_CHECKING:
31
  from peft import PeftModel, PeftConfig
32
- from addict import Dict
33
  from transformers import PreTrainedTokenizer
34
 
35
 
@@ -79,7 +79,7 @@ def load_model(
79
  adapter="lora",
80
  inference=False,
81
  ):
82
- # type: (str, str, str, str, Dict, Optional[str], bool) -> Tuple[PreTrainedModel, PreTrainedTokenizer, Optional[PeftConfig]]
83
 
84
  # TODO refactor as a kwarg
85
  load_in_8bit = cfg.load_in_8bit
@@ -184,9 +184,9 @@ def load_model(
184
  # # https://github.com/HazyResearch/flash-attention/blob/40a25c8ee7465cf547b929cfa2937034e37bfce9/tests/models/test_gpt_neox.py#L12
185
  # # https://github.com/HazyResearch/flash-attention/tree/main/training#model-components
186
  # # add `**kwargs` to https://github.com/HazyResearch/flash-attention/blob/40a25c8ee7465cf547b929cfa2937034e37bfce9/flash_attn/models/gpt.py#L442
187
- # from flash_attn.utils.pretrained import state_dict_from_pretrained
188
  # from flash_attn.models.gpt import GPTLMHeadModel
189
- # from flash_attn.models.gpt_neox import remap_state_dict_hf_gpt_neox, gpt_neox_config_to_gpt2_config
190
  # from transformers import GPTNeoXConfig
191
  # config = gpt_neox_config_to_gpt2_config(GPTNeoXConfig.from_pretrained(base_model))
192
  # config.use_flash_attn = True
@@ -294,7 +294,7 @@ def load_model(
294
 
295
 
296
  def load_adapter(model, cfg, adapter):
297
- # type: (PreTrainedModel, Dict, Optional[str]) -> Tuple[PreTrainedModel, Optional[PeftConfig]]
298
 
299
  if adapter is None:
300
  return model, None
@@ -307,7 +307,7 @@ def load_adapter(model, cfg, adapter):
307
 
308
 
309
  def load_llama_adapter(model, cfg):
310
- # type: (PreTrainedModel, Dict) -> Tuple[PreTrainedModel, Optional[PeftConfig]]
311
  from peft import (
312
  AdaptionPromptConfig,
313
  get_peft_model,
@@ -355,7 +355,7 @@ def find_all_linear_names(bits, model):
355
 
356
 
357
  def load_lora(model, cfg):
358
- # type: (PreTrainedModel, Dict) -> Tuple[PreTrainedModel, Optional[PeftConfig]]
359
 
360
  from peft import (
361
  LoraConfig,
 
29
 
30
  if TYPE_CHECKING:
31
  from peft import PeftModel, PeftConfig
32
+ from axolotl.utils.dict import DictDefault
33
  from transformers import PreTrainedTokenizer
34
 
35
 
 
79
  adapter="lora",
80
  inference=False,
81
  ):
82
+ # type: (str, str, str, str, DictDefault, Optional[str], bool) -> Tuple[PreTrainedModel, PreTrainedTokenizer, Optional[PeftConfig]]
83
 
84
  # TODO refactor as a kwarg
85
  load_in_8bit = cfg.load_in_8bit
 
184
  # # https://github.com/HazyResearch/flash-attention/blob/40a25c8ee7465cf547b929cfa2937034e37bfce9/tests/models/test_gpt_neox.py#L12
185
  # # https://github.com/HazyResearch/flash-attention/tree/main/training#model-components
186
  # # add `**kwargs` to https://github.com/HazyResearch/flash-attention/blob/40a25c8ee7465cf547b929cfa2937034e37bfce9/flash_attn/models/gpt.py#L442
187
+ # from flash_attn.utils.pretrained import state_DictDefault_from_pretrained
188
  # from flash_attn.models.gpt import GPTLMHeadModel
189
+ # from flash_attn.models.gpt_neox import remap_state_DictDefault_hf_gpt_neox, gpt_neox_config_to_gpt2_config
190
  # from transformers import GPTNeoXConfig
191
  # config = gpt_neox_config_to_gpt2_config(GPTNeoXConfig.from_pretrained(base_model))
192
  # config.use_flash_attn = True
 
294
 
295
 
296
  def load_adapter(model, cfg, adapter):
297
+ # type: (PreTrainedModel, DictDefault, Optional[str]) -> Tuple[PreTrainedModel, Optional[PeftConfig]]
298
 
299
  if adapter is None:
300
  return model, None
 
307
 
308
 
309
  def load_llama_adapter(model, cfg):
310
+ # type: (PreTrainedModel, DictDefault) -> Tuple[PreTrainedModel, Optional[PeftConfig]]
311
  from peft import (
312
  AdaptionPromptConfig,
313
  get_peft_model,
 
355
 
356
 
357
  def load_lora(model, cfg):
358
+ # type: (PreTrainedModel, DictDefault) -> Tuple[PreTrainedModel, Optional[PeftConfig]]
359
 
360
  from peft import (
361
  LoraConfig,