Text Generation
Transformers
PyTorch
Safetensors
English
mistral
conversational
Inference Endpoints
text-generation-inference
Crystalcareai commited on
Commit
dd5430b
1 Parent(s): 516002c

Upload dolphin-2.9-phi3-qlora.yml

Browse files
Files changed (1) hide show
  1. configs/dolphin-2.9-phi3-qlora.yml +133 -0
configs/dolphin-2.9-phi3-qlora.yml ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: unsloth/Phi-3-mini-4k-instruct
2
+ model_type: AutoModelForCausalLM
3
+ tokenizer_type: AutoTokenizer
4
+
5
+ trust_remote_code: true
6
+
7
+ peft_layer_replication:
8
+ - [0, 8]
9
+ - [4, 12]
10
+ - [8, 16]
11
+ - [12, 20]
12
+ - [16, 24]
13
+ - [20, 28]
14
+ - [24, 32]
15
+
16
+ load_in_8bit: false
17
+ load_in_4bit: true
18
+ strict: false
19
+
20
+ datasets:
21
+ - path: /workspace/datasets/dolphin-2.9/dolphin201-sharegpt2.jsonl
22
+ type: sharegpt
23
+ conversation: chatml
24
+ # - path: /workspace/datasets/dolphin-2.9/Ultrachat200kunfiltered.jsonl
25
+ # type: sharegpt
26
+ # conversation: chatml
27
+ - path: /workspace/datasets/dolphin-2.9/dolphin-coder-translate-sharegpt2.jsonl
28
+ type: sharegpt
29
+ conversation: chatml
30
+ - path: /workspace/datasets/dolphin-2.9/dolphin-coder-codegen-sharegpt2.jsonl
31
+ type: sharegpt
32
+ conversation: chatml
33
+ - path: /workspace/datasets/dolphin-2.9/m-a-p_Code-Feedback-sharegpt-unfiltered.jsonl
34
+ type: sharegpt
35
+ conversation: chatml
36
+ - path: /workspace/datasets/dolphin-2.9/m-a-p_CodeFeedback-Filtered-Instruction-sharegpt-unfiltered.jsonl
37
+ type: sharegpt
38
+ conversation: chatml
39
+ - path: /workspace/datasets/dolphin-2.9/not_samantha_norefusals.jsonl
40
+ type: sharegpt
41
+ conversation: chatml
42
+ - path: /workspace/datasets/dolphin-2.9/Orca-Math-resort-unfiltered.jsonl
43
+ type: sharegpt
44
+ conversation: chatml
45
+ - path: /workspace/datasets/dolphin-2.9/agent_instruct_react_unfiltered.jsonl
46
+ type: sharegpt
47
+ conversation: chatml
48
+ - path: /workspace/datasets/dolphin-2.9/toolbench_instruct_j1s1_3k_unfiltered.jsonl
49
+ type: sharegpt
50
+ conversation: chatml
51
+ - path: /workspace/datasets/dolphin-2.9/toolbench_negative_unfiltered.jsonl
52
+ type: sharegpt
53
+ conversation: chatml
54
+ - path: /workspace/datasets/dolphin-2.9/toolbench_react_10p_unfiltered.jsonl
55
+ type: sharegpt
56
+ conversation: chatml
57
+ - path: /workspace/datasets/dolphin-2.9/toolbench_tflan_cot_30p_unfiltered.jsonl
58
+ type: sharegpt
59
+ conversation: chatml
60
+ - path: /workspace/datasets/dolphin-2.9/openhermes200k_unfiltered.jsonl
61
+ type: sharegpt
62
+ conversation: chatml
63
+ # - path: /workspace/datasets/dolphin-2.9/SystemConversations.jsonl
64
+ # type: sharegpt
65
+ # conversation: chatml
66
+
67
+ chat_template: chatml
68
+
69
+ dataset_prepared_path: dolphin-phi3-prepared
70
+ val_set_size: 0
71
+ output_dir: ./dolphin-phi3-5b
72
+
73
+ sequence_len: 4096
74
+ sample_packing: true
75
+ pad_to_sequence_len: true
76
+
77
+ adapter: qlora
78
+ lora_model_dir:
79
+ lora_r: 64
80
+ lora_alpha: 32
81
+ lora_dropout: 0.05
82
+ lora_target_linear: true
83
+ lora_fan_in_fan_out:
84
+
85
+ lora_modules_to_save: ['embed_tokens', 'lm_head']
86
+
87
+ wandb_project: dolphin-2.9-phi3-5b
88
+ wandb_entity:
89
+ wandb_watch:
90
+ wandb_name:
91
+ wandb_log_model:
92
+
93
+ gradient_accumulation_steps: 4
94
+ micro_batch_size: 8
95
+ num_epochs: 4
96
+ optimizer: adamw_8bit
97
+ # adam_beta2: 0.95
98
+ # adam_epsilon: 0.00001
99
+ max_grad_norm: 1.0
100
+ lr_scheduler: cosine
101
+ learning_rate: 5e-6
102
+
103
+ train_on_inputs: false
104
+ group_by_length: false
105
+ bf16: auto
106
+ fp16:
107
+ tf32: true
108
+
109
+ gradient_checkpointing: true
110
+ gradient_checkpointing_kwargs:
111
+ use_reentrant: True
112
+ early_stopping_patience:
113
+ resume_from_checkpoint:
114
+ local_rank:
115
+ logging_steps: 1
116
+ xformers_attention:
117
+ flash_attention: true
118
+
119
+ warmup_steps: 100
120
+ evals_per_epoch: 4
121
+ saves_per_epoch: 1
122
+ debug:
123
+ # deepspeed: deepspeed_configs/zero2.json
124
+ weight_decay: 0.1
125
+ fsdp:
126
+ fsdp_config:
127
+ # resize_token_embeddings_to_32x: true
128
+ special_tokens:
129
+ eos_token: "<|im_end|>"
130
+ pad_token: "<|endoftext|>"
131
+ tokens:
132
+ - "<|im_start|>"
133
+ - "<|im_end|>"