wwwaj commited on
Commit
f74eb85
1 Parent(s): edb43c8

Update sample_finetune.py

Browse files
Files changed (1) hide show
  1. sample_finetune.py +60 -21
sample_finetune.py CHANGED
@@ -4,7 +4,9 @@ from trl import SFTTrainer
4
  from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments
5
 
6
  """
7
- Please note that A100 or later generation GPUs are required to finetune Phi-3 models
 
 
8
  1. Install accelerate:
9
  conda install -c conda-forge accelerate
10
  2. Setup accelerate config:
@@ -14,19 +16,16 @@ to simply use all the GPUs available:
14
  check accelerate config:
15
  accelerate env
16
  3. Run the code:
17
- accelerate launch phi3-mini-sample-ft.py
18
  """
19
 
20
  ###################
21
  # Hyper-parameters
22
  ###################
23
-
24
-
25
  args = {
26
  "bf16": True,
27
  "do_eval": False,
28
- "evaluation_strategy": "no",
29
- "eval_steps": 100,
30
  "learning_rate": 5.0e-06,
31
  "log_level": "info",
32
  "logging_steps": 20,
@@ -34,7 +33,7 @@ args = {
34
  "lr_scheduler_type": "cosine",
35
  "num_train_epochs": 1,
36
  "max_steps": -1,
37
- "output_dir": ".",
38
  "overwrite_output_dir": True,
39
  "per_device_eval_batch_size": 4,
40
  "per_device_train_batch_size": 8,
@@ -43,8 +42,9 @@ args = {
43
  "save_total_limit": 1,
44
  "seed": 0,
45
  "gradient_checkpointing": True,
 
46
  "gradient_accumulation_steps": 1,
47
- "warmup_ratio": 0.1,
48
  }
49
 
50
  training_args = TrainingArguments(**args)
@@ -53,40 +53,79 @@ training_args = TrainingArguments(**args)
53
  ################
54
  # Modle Loading
55
  ################
56
-
57
- checkpoint_path = "microsoft/Phi-3-mini-128k-instruct"
58
  model_kwargs = dict(
 
59
  trust_remote_code=True,
60
- attn_implementation="flash_attention_2", # load the model with flash-attenstion support
61
  torch_dtype=torch.bfloat16,
62
  device_map="cuda",
63
  )
64
  model = AutoModelForCausalLM.from_pretrained(checkpoint_path, **model_kwargs)
65
- tokenizer = AutoTokenizer.from_pretrained(checkpoint_path, trust_remote_code=True)
 
 
 
66
 
67
- ################
68
- # Data Loading
69
- ################
 
 
 
 
 
 
 
 
 
 
 
70
 
71
- dataset = load_dataset("imdb")
72
- train_dataset = dataset["train"]
73
- eval_dataset = dataset["test"]
74
 
 
 
 
 
 
 
 
 
 
75
 
76
- ################
77
- # Training
78
- ################
79
 
 
 
 
80
  trainer = SFTTrainer(
81
  model=model,
82
  args=training_args,
83
  train_dataset=train_dataset,
 
84
  max_seq_length=2048,
85
  dataset_text_field="text",
86
  tokenizer=tokenizer,
 
87
  )
88
  train_result = trainer.train()
89
  metrics = train_result.metrics
90
  trainer.log_metrics("train", metrics)
91
  trainer.save_metrics("train", metrics)
92
  trainer.save_state()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments
5
 
6
  """
7
+ A simple example on using SFTTrainer and Accelerate to finetune Phi-3 models. For
8
+ a more advanced example, please follow HF alignment-handbook/scripts/run_sft.py
9
+
10
  1. Install accelerate:
11
  conda install -c conda-forge accelerate
12
  2. Setup accelerate config:
 
16
  check accelerate config:
17
  accelerate env
18
  3. Run the code:
19
+ accelerate launch sample_finetune.py
20
  """
21
 
22
  ###################
23
  # Hyper-parameters
24
  ###################
 
 
25
  args = {
26
  "bf16": True,
27
  "do_eval": False,
28
+ "eval_strategy": "no",
 
29
  "learning_rate": 5.0e-06,
30
  "log_level": "info",
31
  "logging_steps": 20,
 
33
  "lr_scheduler_type": "cosine",
34
  "num_train_epochs": 1,
35
  "max_steps": -1,
36
+ "output_dir": "./checkpoint_dir",
37
  "overwrite_output_dir": True,
38
  "per_device_eval_batch_size": 4,
39
  "per_device_train_batch_size": 8,
 
42
  "save_total_limit": 1,
43
  "seed": 0,
44
  "gradient_checkpointing": True,
45
+ "gradient_checkpointing_kwargs":{"use_reentrant": False},
46
  "gradient_accumulation_steps": 1,
47
+ "warmup_ratio": 0.2,
48
  }
49
 
50
  training_args = TrainingArguments(**args)
 
53
  ################
54
  # Modle Loading
55
  ################
56
+ checkpoint_path = "microsoft/Phi-3-mini-4k-instruct"
57
+ # checkpoint_path = "microsoft/Phi-3-mini-128k-instruct"
58
  model_kwargs = dict(
59
+ use_cache=False,
60
  trust_remote_code=True,
61
+ attn_implementation="flash_attention_2", # loading the model with flash-attenstion support
62
  torch_dtype=torch.bfloat16,
63
  device_map="cuda",
64
  )
65
  model = AutoModelForCausalLM.from_pretrained(checkpoint_path, **model_kwargs)
66
+ tokenizer = AutoTokenizer.from_pretrained(checkpoint_path)
67
+ tokenizer.pad_token = tokenizer.unk_token # use unk rather than eos token to prevent endless generation
68
+ tokenizer.pad_token_id = tokenizer.convert_tokens_to_ids(tokenizer.pad_token)
69
+ tokenizer.padding_side = 'right'
70
 
71
+ ##################
72
+ # Data Processing
73
+ ##################
74
+ def apply_chat_template(
75
+ example,
76
+ tokenizer,
77
+ ):
78
+ messages = example["messages"]
79
+ # Add an empty system message if there is none
80
+ if messages[0]["role"] != "system":
81
+ messages.insert(0, {"role": "system", "content": ""})
82
+ example["text"] = tokenizer.apply_chat_template(
83
+ messages, tokenize=False, add_generation_prompt=False)
84
+ return example
85
 
86
+ raw_dataset = load_dataset("HuggingFaceH4/ultrachat_200k")
87
+ column_names = list(raw_dataset["train_sft"].features)
 
88
 
89
+ processed_dataset = raw_dataset.map(
90
+ apply_chat_template,
91
+ fn_kwargs={"tokenizer": tokenizer},
92
+ num_proc=12,
93
+ remove_columns=column_names,
94
+ desc="Applying chat template",
95
+ )
96
+ train_dataset = processed_dataset["train_sft"]
97
+ eval_dataset = processed_dataset["test_sft"]
98
 
 
 
 
99
 
100
+ ###########
101
+ # Training
102
+ ###########
103
  trainer = SFTTrainer(
104
  model=model,
105
  args=training_args,
106
  train_dataset=train_dataset,
107
+ eval_dataset=eval_dataset,
108
  max_seq_length=2048,
109
  dataset_text_field="text",
110
  tokenizer=tokenizer,
111
+ packing=True
112
  )
113
  train_result = trainer.train()
114
  metrics = train_result.metrics
115
  trainer.log_metrics("train", metrics)
116
  trainer.save_metrics("train", metrics)
117
  trainer.save_state()
118
+
119
+ #############
120
+ # Evaluation
121
+ #############
122
+ tokenizer.padding_side = 'left'
123
+ metrics = trainer.evaluate()
124
+ metrics["eval_samples"] = len(eval_dataset)
125
+ trainer.log_metrics("eval", metrics)
126
+ trainer.save_metrics("eval", metrics)
127
+
128
+ ############
129
+ # Save model
130
+ ############
131
+ trainer.save_model(training_args.output_dir)