ehartford commited on
Commit
2f30344
1 Parent(s): 2684aca

Upload folder using huggingface_hub

Browse files
Files changed (1) hide show
  1. configs/dolphin-mistral-7b.yml +71 -0
configs/dolphin-mistral-7b.yml ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: mistralai/Mistral-7B-v0.1
2
+ model_type: MistralForCausalLM
3
+ tokenizer_type: LlamaTokenizer
4
+ is_mistral_derived_model: true
5
+
6
+ load_in_8bit: false
7
+ load_in_4bit: false
8
+ strict: false
9
+
10
+ datasets:
11
+ - path: /workspace/datasets/dolphin/dolphin201.jsonl
12
+ type: alpaca_w_system.load_open_orca_chatml
13
+ - path: /workspace/datasets/WizardLM_evol_instruct_cleaned.jsonl
14
+ type: sharegpt
15
+ conversation: chatml
16
+ - path: /workspace/datasets/not_samantha_norefusals.jsonl
17
+ type: sharegpt
18
+ conversation: chatml
19
+ dataset_prepared_path: last_run_prepared
20
+ val_set_size: 0
21
+ output_dir: /workspace/dolphin-2.2-mistral-7b
22
+
23
+ sequence_len: 8192
24
+ sample_packing: true
25
+ pad_to_sequence_len: true
26
+
27
+ wandb_project: dolphin
28
+ wandb_entity:
29
+ wandb_watch:
30
+ wandb_run_id:
31
+ wandb_log_model:
32
+
33
+ gradient_accumulation_steps: 4
34
+ micro_batch_size: 5
35
+ num_epochs: 4
36
+ adam_beta2: 0.95
37
+ adam_epsilon: 0.00001
38
+ max_grad_norm: 1.0
39
+ lr_scheduler: cosine
40
+ learning_rate: 0.000006
41
+
42
+ train_on_inputs: false
43
+ group_by_length: false
44
+ bf16: true
45
+ fp16: false
46
+ tf32: false
47
+
48
+ gradient_checkpointing: true
49
+ early_stopping_patience:
50
+ resume_from_checkpoint:
51
+ local_rank:
52
+ logging_steps: 1
53
+ xformers_attention:
54
+ flash_attention: true
55
+
56
+ warmup_steps: 100
57
+ eval_steps:
58
+ eval_table_size:
59
+ eval_table_max_new_tokens:
60
+ eval_sample_packing: false
61
+ save_steps: 0.25
62
+ debug:
63
+ deepspeed: deepspeed/zero2.json
64
+ weight_decay: 0.1
65
+ fsdp:
66
+ fsdp_config:
67
+ special_tokens:
68
+ eos_token: "<|im_end|>"
69
+ tokens:
70
+ - "<|im_start|>"
71
+ - "<|im_end|>"