Text Generation
Transformers
Safetensors
English
mixtral
conversational
Inference Endpoints
text-generation-inference
chargoddard commited on
Commit
fde04db
1 Parent(s): 91877d6

Create axolotl_config.yml

Browse files
Files changed (1) hide show
  1. axolotl_config.yml +91 -0
axolotl_config.yml ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: mistralai/Mixtral-8x7B-v0.1
2
+ model_type: AutoModelForCausalLM
3
+ tokenizer_type: AutoTokenizer
4
+
5
+ model_config:
6
+ output_router_logits: true
7
+ router_aux_loss_coef: 0.02
8
+ router_z_loss_coef: 0.001
9
+
10
+ load_in_8bit: false
11
+ load_in_4bit: true
12
+ strict: false
13
+
14
+ datasets:
15
+ - path: /workspace/data/rpguild-grammar-filtered.jsonl
16
+ type: rp_forum
17
+ shards: 4
18
+ - path: /workspace/limaerp-8192.jsonl
19
+ type: rp_forum
20
+ repeats: 2
21
+ - path: /workspace/data/no-robots-rp.jsonl
22
+ type: pippa
23
+ - path: Open-Orca/SlimOrca
24
+ type: sharegpt
25
+ shards: 15
26
+ - path: chargoddard/summarize_from_feedback_alpaca
27
+ type: alpaca
28
+ shards: 14
29
+ - path: chargoddard/coedit-reworded
30
+ type: alpaca
31
+ shards: 14
32
+ prompt_format: economic_rp
33
+ dataset_prepared_path: last_run_prepared
34
+ val_set_size: 0.04
35
+ output_dir: ./mixtral-lora
36
+ save_safetensors: true
37
+
38
+ adapter: qlora
39
+ lora_model_dir:
40
+
41
+ sequence_len: 8192
42
+ sample_packing: true
43
+ pad_to_sequence_len: true
44
+
45
+ lora_r: 128
46
+ lora_alpha: 256
47
+ lora_dropout: 0.05
48
+ lora_target_linear: true
49
+ lora_fan_in_fan_out:
50
+ # lora_target_modules:
51
+
52
+ wandb_project: mixtral-lora
53
+ wandb_entity:
54
+ wandb_watch:
55
+ wandb_run_id:
56
+ wandb_log_model:
57
+
58
+ gradient_accumulation_steps: 16
59
+ micro_batch_size: 2
60
+ eval_batch_size: 2
61
+ num_epochs: 1
62
+ optimizer: adamw_bnb_8bit
63
+ lr_scheduler: cosine
64
+ learning_rate: 0.00025
65
+
66
+ train_on_inputs: false
67
+ group_by_length: false
68
+ bf16: true
69
+ fp16: false
70
+ tf32: false
71
+
72
+ gradient_checkpointing: true
73
+ early_stopping_patience:
74
+ resume_from_checkpoint:
75
+ auto_resume_from_checkpoints: true
76
+ local_rank:
77
+ logging_steps: 1
78
+ xformers_attention:
79
+ flash_attention: true
80
+ save_total_limit: 2
81
+
82
+ warmup_steps: 10
83
+ eval_steps: 0.05
84
+ save_steps: 0.05
85
+ eval_table_size:
86
+ eval_table_max_new_tokens: 128
87
+ weight_decay: 0.0
88
+ special_tokens:
89
+ bos_token: "<s>"
90
+ eos_token: "</s>"
91
+ unk_token: "<unk>"