Text Generation
Transformers
PyTorch
Safetensors
English
llama
conversational
Eval Results
Inference Endpoints
text-generation-inference
ehartford commited on
Commit
1f5ea35
1 Parent(s): c3ee753

Upload folder using huggingface_hub

Browse files
Files changed (1) hide show
  1. configs/dolphin-70b-2.yml +83 -0
configs/dolphin-70b-2.yml ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: /workspace/models/StellarBright
2
+ model_type: LlamaForCausalLM
3
+ tokenizer_type: LlamaTokenizer
4
+ is_llama_derived_model: true
5
+
6
+ load_in_8bit: false
7
+ load_in_4bit: true
8
+ strict: false
9
+
10
+ datasets:
11
+ - path: /workspace/datasets/dolphin/dolphin201.jsonl
12
+ type: alpaca_w_system.load_open_orca_chatml
13
+ - path: /workspace/datasets/WizardLM_evol_instruct_cleaned.jsonl
14
+ type: sharegpt
15
+ conversation: chatml
16
+ - path: /workspace/datasets/not_samantha_norefusals.jsonl
17
+ type: sharegpt
18
+ conversation: chatml
19
+ dataset_prepared_path: last_run_prepared
20
+ val_set_size: 0
21
+ output_dir: /workspace/dolphin-2.2-70b
22
+
23
+ adapter: qlora
24
+ lora_model_dir:
25
+
26
+ sequence_len: 4096
27
+ sample_packing: true
28
+ pad_to_sequence_len: true
29
+
30
+ lora_r: 32
31
+ lora_alpha: 16
32
+ lora_dropout: 0.05
33
+ lora_target_modules:
34
+ lora_target_linear: true
35
+
36
+ lora_modules_to_save:
37
+ - embed_tokens
38
+ - lm_head
39
+
40
+ lora_fan_in_fan_out:
41
+
42
+ wandb_project: dolphin
43
+ wandb_entity:
44
+ wandb_watch:
45
+ wandb_run_id:
46
+ wandb_log_model:
47
+
48
+ gradient_accumulation_steps: 4
49
+ micro_batch_size: 3
50
+ eval_batch_size: 3
51
+ num_epochs: 4
52
+ optimizer: paged_adamw_32bit
53
+ lr_scheduler: cosine
54
+ learning_rate: 0.0003
55
+
56
+ train_on_inputs: false
57
+ group_by_length: false
58
+ bf16: true
59
+ fp16: false
60
+ tf32: false
61
+
62
+ gradient_checkpointing: true
63
+ early_stopping_patience:
64
+ resume_from_checkpoint:
65
+ local_rank:
66
+ logging_steps: 1
67
+ xformers_attention:
68
+ flash_attention: true
69
+
70
+ warmup_steps: 100
71
+ eval_steps:
72
+ eval_table_size:
73
+ save_steps: 0.05
74
+ debug:
75
+ deepspeed: deepspeed/zero2.json
76
+ weight_decay: 0.01
77
+ fsdp:
78
+ fsdp_config:
79
+ special_tokens:
80
+ eos_token: "<|im_end|>"
81
+ tokens:
82
+ - "<|im_start|>"
83
+ - "<|im_end|>"