CultriX commited on
Commit
26741dd
1 Parent(s): 5fb2eb9

Create README.md

Browse files
Files changed (1) hide show
  1. README.md +94 -0
README.md CHANGED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ datasets:
4
+ - mlabonne/orpo-dpo-mix-40k-flat
5
+ language:
6
+ - en
7
+ base_model: cognitivecomputations/dolphin-2.9.4-llama3.1-8b
8
+ pipeline_tag: text-generation
9
+ tags:
10
+ - dpo
11
+ - axolotl
12
+ ---
13
+
14
+ ## Axolotl configuration:
15
+ ```yaml
16
+
17
+ base_model: cognitivecomputations/dolphin-2.9.4-llama3.1-8b
18
+ model_type: LlamaForCausalLM
19
+ tokenizer_type: AutoTokenizer
20
+ tokenizer:
21
+ name_or_path: "https://huggingface.co/cognitivecomputations/dolphin-2.9.4-llama3.1-8b/resolve/main/tokenizer.json"
22
+
23
+
24
+ load_in_8bit: false
25
+ load_in_4bit: true
26
+ strict: false
27
+ save_safetensors: true
28
+ bnb_4bit_quant_type: "nf4"
29
+ bnb_4bit_compute_dtype: "bf16"
30
+ bnb_4bit_use_double_quant: true
31
+
32
+ rl: dpo
33
+ chat_template: chatml
34
+ datasets:
35
+ - path: mlabonne/orpo-dpo-mix-40k-flat
36
+ split: train
37
+ type: chatml.intel
38
+
39
+ dataset_prepared_path: /workspace/axolotl/dataset-prepared
40
+ val_set_size: 0.0
41
+ output_dir: ./out
42
+
43
+ adapter: qlora
44
+ lora_model_dir:
45
+
46
+ sequence_len: 2048
47
+ sample_packing: false
48
+ pad_to_sequence_len: false
49
+
50
+ lora_r: 64
51
+ lora_alpha: 32
52
+ lora_dropout: 0.05
53
+ lora_target_linear: true
54
+ lora_fan_in_fan_out:
55
+ lora_target_modules:
56
+
57
+ wandb_project: axolotl
58
+ wandb_entity:
59
+ wandb_watch:
60
+ wandb_name:
61
+ wandb_log_model:
62
+
63
+
64
+ gradient_accumulation_steps: 4 # Reduced from 8 to 4 due to large VRAM
65
+ micro_batch_size: 2 # Increased micro-batch size to 2
66
+ num_epochs: 1
67
+ optimizer: paged_adamw_8bit
68
+ lr_scheduler: cosine
69
+ learning_rate: 5e-6
70
+ train_on_inputs: false
71
+ group_by_length: false
72
+
73
+ bf16: true # Use bf16 as it is optimal for A40 GPUs
74
+ fp16: false
75
+ tf32: true # TF32 is supported by A40 and improves performance
76
+
77
+ gradient_checkpointing: true
78
+ early_stopping_patience:
79
+ resume_from_checkpoint:
80
+ local_rank:
81
+ logging_steps: 1
82
+ xformers_attention:
83
+ flash_attention: true
84
+ warmup_steps: 100
85
+ evals_per_epoch: 0
86
+ eval_table_size:
87
+ eval_table_max_new_tokens: 128
88
+ saves_per_epoch: 1
89
+ debug:
90
+ deepspeed: deepspeed_configs/zero2.json # Enable DeepSpeed with ZeRO Stage 2
91
+ weight_decay: 0.0
92
+ special_tokens:
93
+ pad_token: <|end_of_text|>
94
+ ```