nbeerbower commited on
Commit
5389d02
1 Parent(s): 5d85114

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +61 -2
README.md CHANGED
@@ -5,6 +5,7 @@ base_model:
5
  - nbeerbower/Mahou-1.2a-mistral-7B
6
  datasets:
7
  - flammenai/MahouMix-v1
 
8
  ---
9
  ![image/png](https://huggingface.co/flammenai/Mahou-1.0-mistral-7B/resolve/main/mahou1.png)
10
 
@@ -43,6 +44,64 @@ This model has been trained to use ChatML format.
43
 
44
  ### Method
45
 
46
- DPO finetuned for 6 epochs using an A100 on Google Colab.
47
 
48
- [Fine-tune a Mistral-7b model with Direct Preference Optimization](https://towardsdatascience.com/fine-tune-a-mistral-7b-model-with-direct-preference-optimization-708042745aac) - [Maxime Labonne](https://huggingface.co/mlabonne)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  - nbeerbower/Mahou-1.2a-mistral-7B
6
  datasets:
7
  - flammenai/MahouMix-v1
8
+ - flammenai/FlameMix-DPO-v1
9
  ---
10
  ![image/png](https://huggingface.co/flammenai/Mahou-1.0-mistral-7B/resolve/main/mahou1.png)
11
 
 
44
 
45
  ### Method
46
 
47
+ DPO finetuned using an A100 on Google Colab.
48
 
49
+ [Fine-tune a Mistral-7b model with Direct Preference Optimization](https://towardsdatascience.com/fine-tune-a-mistral-7b-model-with-direct-preference-optimization-708042745aac) - [Maxime Labonne](https://huggingface.co/mlabonne)
50
+
51
+ ### Configuration
52
+
53
+ LoRA, model, and training settings:
54
+
55
+ ```python
56
+ # LoRA configuration
57
+ peft_config = LoraConfig(
58
+ r=16,
59
+ lora_alpha=16,
60
+ lora_dropout=0.05,
61
+ bias="none",
62
+ task_type="CAUSAL_LM",
63
+ target_modules=['k_proj', 'gate_proj', 'v_proj', 'up_proj', 'q_proj', 'o_proj', 'down_proj']
64
+ )
65
+ # Model to fine-tune
66
+ model = AutoModelForCausalLM.from_pretrained(
67
+ model_name,
68
+ torch_dtype=torch.bfloat16,
69
+ load_in_4bit=True
70
+ )
71
+ model.config.use_cache = False
72
+ # Reference model
73
+ ref_model = AutoModelForCausalLM.from_pretrained(
74
+ model_name,
75
+ torch_dtype=torch.bfloat16,
76
+ load_in_4bit=True
77
+ )
78
+ # Training arguments
79
+ training_args = TrainingArguments(
80
+ per_device_train_batch_size=4,
81
+ gradient_accumulation_steps=4,
82
+ gradient_checkpointing=True,
83
+ learning_rate=5e-5,
84
+ lr_scheduler_type="cosine",
85
+ max_steps=2000,
86
+ save_strategy="no",
87
+ logging_steps=1,
88
+ output_dir=new_model,
89
+ optim="paged_adamw_32bit",
90
+ warmup_steps=100,
91
+ bf16=True,
92
+ report_to="wandb",
93
+ )
94
+ # Create DPO trainer
95
+ dpo_trainer = DPOTrainer(
96
+ model,
97
+ ref_model,
98
+ args=training_args,
99
+ train_dataset=dataset,
100
+ tokenizer=tokenizer,
101
+ peft_config=peft_config,
102
+ beta=0.1,
103
+ force_use_ref_model=True
104
+ )
105
+ # Fine-tune model with DPO
106
+ dpo_trainer.train()
107
+ ```