wwwaj commited on
Commit
122d3be
1 Parent(s): bc74875

Update sample_finetune.py

Browse files
Files changed (1) hide show
  1. sample_finetune.py +57 -21
sample_finetune.py CHANGED
@@ -4,7 +4,9 @@ from trl import SFTTrainer
4
  from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments
5
 
6
  """
7
- Please note that A100 or later generation GPUs are required to finetune Phi-3 models
 
 
8
  1. Install accelerate:
9
  conda install -c conda-forge accelerate
10
  2. Setup accelerate config:
@@ -14,19 +16,16 @@ to simply use all the GPUs available:
14
  check accelerate config:
15
  accelerate env
16
  3. Run the code:
17
- accelerate launch phi3-mini-sample-ft.py
18
  """
19
 
20
  ###################
21
  # Hyper-parameters
22
  ###################
23
-
24
-
25
  args = {
26
  "bf16": True,
27
  "do_eval": False,
28
- "evaluation_strategy": "no",
29
- "eval_steps": 100,
30
  "learning_rate": 5.0e-06,
31
  "log_level": "info",
32
  "logging_steps": 20,
@@ -34,7 +33,7 @@ args = {
34
  "lr_scheduler_type": "cosine",
35
  "num_train_epochs": 1,
36
  "max_steps": -1,
37
- "output_dir": ".",
38
  "overwrite_output_dir": True,
39
  "per_device_eval_batch_size": 4,
40
  "per_device_train_batch_size": 8,
@@ -43,51 +42,88 @@ args = {
43
  "save_total_limit": 1,
44
  "seed": 0,
45
  "gradient_checkpointing": True,
 
46
  "gradient_accumulation_steps": 1,
47
- "warmup_ratio": 0.1,
48
  }
49
 
50
  training_args = TrainingArguments(**args)
51
 
52
-
53
  ################
54
  # Modle Loading
55
  ################
56
-
57
  checkpoint_path = "microsoft/Phi-3-mini-4k-instruct"
58
  # checkpoint_path = "microsoft/Phi-3-mini-128k-instruct"
59
  model_kwargs = dict(
 
60
  trust_remote_code=True,
61
- attn_implementation="flash_attention_2", # load the model with flash-attenstion support
62
  torch_dtype=torch.bfloat16,
63
  device_map="cuda",
64
  )
65
  model = AutoModelForCausalLM.from_pretrained(checkpoint_path, **model_kwargs)
66
- tokenizer = AutoTokenizer.from_pretrained(checkpoint_path, trust_remote_code=True)
 
 
 
67
 
68
- ################
69
- # Data Loading
70
- ################
 
 
 
 
 
 
 
 
 
 
 
71
 
72
- dataset = load_dataset("imdb")
73
- train_dataset = dataset["train"]
74
- eval_dataset = dataset["test"]
75
 
 
 
 
 
 
 
 
 
 
76
 
77
- ################
78
  # Training
79
- ################
80
-
81
  trainer = SFTTrainer(
82
  model=model,
83
  args=training_args,
84
  train_dataset=train_dataset,
 
85
  max_seq_length=2048,
86
  dataset_text_field="text",
87
  tokenizer=tokenizer,
 
88
  )
89
  train_result = trainer.train()
90
  metrics = train_result.metrics
91
  trainer.log_metrics("train", metrics)
92
  trainer.save_metrics("train", metrics)
93
  trainer.save_state()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments
5
 
6
  """
7
+ A simple example on using SFTTrainer and Accelerate to finetune Phi-3 models. For
8
+ a more advanced example, please follow HF alignment-handbook/scripts/run_sft.py
9
+
10
  1. Install accelerate:
11
  conda install -c conda-forge accelerate
12
  2. Setup accelerate config:
 
16
  check accelerate config:
17
  accelerate env
18
  3. Run the code:
19
+ accelerate launch sample_finetune.py
20
  """
21
 
22
  ###################
23
  # Hyper-parameters
24
  ###################
 
 
25
  args = {
26
  "bf16": True,
27
  "do_eval": False,
28
+ "eval_strategy": "no",
 
29
  "learning_rate": 5.0e-06,
30
  "log_level": "info",
31
  "logging_steps": 20,
 
33
  "lr_scheduler_type": "cosine",
34
  "num_train_epochs": 1,
35
  "max_steps": -1,
36
+ "output_dir": "./checkpoint_dir",
37
  "overwrite_output_dir": True,
38
  "per_device_eval_batch_size": 4,
39
  "per_device_train_batch_size": 8,
 
42
  "save_total_limit": 1,
43
  "seed": 0,
44
  "gradient_checkpointing": True,
45
+ "gradient_checkpointing_kwargs":{"use_reentrant": False},
46
  "gradient_accumulation_steps": 1,
47
+ "warmup_ratio": 0.2,
48
  }
49
 
50
  training_args = TrainingArguments(**args)
51
 
 
52
  ################
53
  # Modle Loading
54
  ################
 
55
  checkpoint_path = "microsoft/Phi-3-mini-4k-instruct"
56
  # checkpoint_path = "microsoft/Phi-3-mini-128k-instruct"
57
  model_kwargs = dict(
58
+ use_cache=False,
59
  trust_remote_code=True,
60
+ attn_implementation="flash_attention_2", # loading the model with flash-attenstion support
61
  torch_dtype=torch.bfloat16,
62
  device_map="cuda",
63
  )
64
  model = AutoModelForCausalLM.from_pretrained(checkpoint_path, **model_kwargs)
65
+ tokenizer = AutoTokenizer.from_pretrained(checkpoint_path)
66
+ tokenizer.pad_token = tokenizer.unk_token # use unk rather than eos token to prevent endless generation
67
+ tokenizer.pad_token_id = tokenizer.convert_tokens_to_ids(tokenizer.pad_token)
68
+ tokenizer.padding_side = 'right'
69
 
70
+ ##################
71
+ # Data Processing
72
+ ##################
73
+ def apply_chat_template(
74
+ example,
75
+ tokenizer,
76
+ ):
77
+ messages = example["messages"]
78
+ # Add an empty system message if there is none
79
+ if messages[0]["role"] != "system":
80
+ messages.insert(0, {"role": "system", "content": ""})
81
+ example["text"] = tokenizer.apply_chat_template(
82
+ messages, tokenize=False, add_generation_prompt=False)
83
+ return example
84
 
85
+ raw_dataset = load_dataset("HuggingFaceH4/ultrachat_200k")
86
+ column_names = list(raw_dataset["train_sft"].features)
 
87
 
88
+ processed_dataset = raw_dataset.map(
89
+ apply_chat_template,
90
+ fn_kwargs={"tokenizer": tokenizer},
91
+ num_proc=12,
92
+ remove_columns=column_names,
93
+ desc="Applying chat template",
94
+ )
95
+ train_dataset = processed_dataset["train_sft"]
96
+ eval_dataset = processed_dataset["test_sft"]
97
 
98
+ ###########
99
  # Training
100
+ ###########
 
101
  trainer = SFTTrainer(
102
  model=model,
103
  args=training_args,
104
  train_dataset=train_dataset,
105
+ eval_dataset=eval_dataset,
106
  max_seq_length=2048,
107
  dataset_text_field="text",
108
  tokenizer=tokenizer,
109
+ packing=True
110
  )
111
  train_result = trainer.train()
112
  metrics = train_result.metrics
113
  trainer.log_metrics("train", metrics)
114
  trainer.save_metrics("train", metrics)
115
  trainer.save_state()
116
+
117
+ #############
118
+ # Evaluation
119
+ #############
120
+ tokenizer.padding_side = 'left'
121
+ metrics = trainer.evaluate()
122
+ metrics["eval_samples"] = len(eval_dataset)
123
+ trainer.log_metrics("eval", metrics)
124
+ trainer.save_metrics("eval", metrics)
125
+
126
+ ############
127
+ # Save model
128
+ ############
129
+ trainer.save_model(training_args.output_dir)