Kquant03 commited on
Commit
4f28ff5
1 Parent(s): 4bf8cfb

Create README.md

Browse files
Files changed (1) hide show
  1. README.md +85 -0
README.md ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ I'll explain more about this model when I've found the optimal checkpoint for its use case
2
+
3
+ it's been full fine-tuned on [Sandevistan](https://huggingface.co/datasets/Replete-AI/Sandevistan).
4
+
5
+
6
+ Here is my Axolotl config (thanks to fizz and empti):
7
+
8
+ ```
9
+ base_model: meta-llama/Meta-Llama-3-8B
10
+
11
+ load_in_8bit: false
12
+ load_in_4bit: false
13
+ strict: false
14
+
15
+ load_in_8bit: false
16
+ load_in_4bit: false
17
+ strict: false
18
+
19
+ datasets:
20
+ - path: Kquant03/Sandevistan_Reformat
21
+ type: customllama3_stan
22
+ dataset_prepared_path: last_run_prepared
23
+ val_set_size: 0.05
24
+ output_dir: ./outputs/out
25
+ max_steps: 80000
26
+
27
+ fix_untrained_tokens: true
28
+
29
+ sequence_len: 4096
30
+ sample_packing: true
31
+ pad_to_sequence_len: true
32
+
33
+ wandb_project: Pneuma
34
+ wandb_entity:
35
+ wandb_watch:
36
+ wandb_name:
37
+ wandb_log_model:
38
+
39
+ gradient_accumulation_steps: 16
40
+ micro_batch_size: 8
41
+ num_epochs: 1
42
+ optimizer: paged_adamw_8bit
43
+ lr_scheduler: cosine
44
+ learning_rate: 0.00001
45
+ max_grad_norm: 1
46
+
47
+ train_on_inputs: false
48
+ group_by_length: false
49
+ bf16: auto
50
+ fp16:
51
+ tf32: false
52
+
53
+ gradient_checkpointing: unsloth
54
+ early_stopping_patience:
55
+ resume_from_checkpoint:
56
+ logging_steps: 1
57
+ xformers_attention:
58
+ flash_attention: true
59
+ eval_sample_packing: false
60
+
61
+ plugins:
62
+ - axolotl.integrations.liger.LigerPlugin
63
+ liger_rope: true
64
+ liger_rms_norm: true
65
+ liger_swiglu: true
66
+ liger_fused_linear_cross_entropy: true
67
+
68
+ hub_model_id: Replete-AI/L3-Pneuma-8B
69
+ hub_strategy: every_save
70
+
71
+ warmup_steps: 10
72
+ evals_per_epoch: 3
73
+ eval_table_size:
74
+ saves_per_epoch: 3
75
+ debug:
76
+ deepspeed:
77
+ weight_decay: 0.1
78
+ fsdp:
79
+ fsdp_config:
80
+ special_tokens:
81
+ bos_token: "<|begin_of_text|>"
82
+ eos_token: "<|end_of_text|>"
83
+ pad_token: "<|end_of_text|>"
84
+ tokens:
85
+ ```