aashish1904 commited on
Commit
eeee536
1 Parent(s): 3de200e

Upload README.md with huggingface_hub

Browse files
Files changed (1) hide show
  1. README.md +117 -0
README.md ADDED
@@ -0,0 +1,117 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ ---
3
+
4
+ license: apache-2.0
5
+ base_model:
6
+ - Qwen/Qwen2.5-7B
7
+ library_name: transformers
8
+
9
+ ---
10
+
11
+ [![QuantFactory Banner](https://lh7-rt.googleusercontent.com/docsz/AD_4nXeiuCm7c8lEwEJuRey9kiVZsRn2W-b4pWlu3-X534V3YmVuVc2ZL-NXg2RkzSOOS2JXGHutDuyyNAUtdJI65jGTo8jT9Y99tMi4H4MqL44Uc5QKG77B0d6-JfIkZHFaUA71-RtjyYZWVIhqsNZcx8-OMaA?key=xt3VSDoCbmTY7o-cwwOFwQ)](https://hf.co/QuantFactory)
12
+
13
+
14
+ # QuantFactory/Vapor_7B-GGUF
15
+ This is quantized version of [FourOhFour/Vapor_7B](https://huggingface.co/FourOhFour/Vapor_7B) created using llama.cpp
16
+
17
+ # Original Model Card
18
+
19
+
20
+ ```
21
+ base_model: Qwen/Qwen2.5-7B
22
+ model_type: AutoModelForCausalLM
23
+ tokenizer_type: AutoTokenizer
24
+
25
+ load_in_8bit: false
26
+ load_in_4bit: false
27
+ strict: false
28
+
29
+ datasets:
30
+ - path: PocketDoc/Dans-MemoryCore-CoreCurriculum-Small
31
+ type: sharegpt
32
+ conversation: chatml
33
+ - path: NewEden/Kalo-Opus-Instruct-22k-Refusal-Murdered
34
+ type: sharegpt
35
+ conversation: chatml
36
+ - path: Epiculous/Synthstruct-Gens-v1.1-Filtered-n-Cleaned
37
+ type: sharegpt
38
+ conversation: chatml
39
+ - path: NewEden/Gryphe-Sonnet-3.5-35k-Subset
40
+ type: sharegpt
41
+ conversation: chatml
42
+ - path: Nitral-AI/Reasoning-1shot_ShareGPT
43
+ type: sharegpt
44
+ conversation: chatml
45
+ - path: Nitral-AI/GU_Instruct-ShareGPT
46
+ type: sharegpt
47
+ conversation: chatml
48
+ - path: Nitral-AI/Medical_Instruct-ShareGPT
49
+ type: sharegpt
50
+ conversation: chatml
51
+
52
+ chat_template: chatml
53
+
54
+ val_set_size: 0.01
55
+ output_dir: ./outputs/out
56
+
57
+ adapter:
58
+ lora_r:
59
+ lora_alpha:
60
+ lora_dropout:
61
+ lora_target_linear:
62
+
63
+ sequence_len: 8192
64
+ # sequence_len: 32768
65
+ sample_packing: true
66
+ eval_sample_packing: false
67
+ pad_to_sequence_len: true
68
+
69
+ plugins:
70
+ - axolotl.integrations.liger.LigerPlugin
71
+ liger_rope: true
72
+ liger_rms_norm: true
73
+ liger_swiglu: true
74
+ liger_fused_linear_cross_entropy: true
75
+
76
+ wandb_project: qwen7B
77
+ wandb_entity:
78
+ wandb_watch:
79
+ wandb_name: qwen7B
80
+ wandb_log_model:
81
+
82
+ gradient_accumulation_steps: 32
83
+ micro_batch_size: 1
84
+ num_epochs: 2
85
+ optimizer: adamw_bnb_8bit
86
+ lr_scheduler: cosine
87
+ learning_rate: 0.00001
88
+ weight_decay: 0.05
89
+
90
+ train_on_inputs: false
91
+ group_by_length: false
92
+ bf16: auto
93
+ fp16:
94
+ tf32: true
95
+
96
+ gradient_checkpointing: true
97
+ early_stopping_patience:
98
+ resume_from_checkpoint:
99
+ local_rank:
100
+ logging_steps: 1
101
+ xformers_attention:
102
+ flash_attention: true
103
+
104
+ warmup_ratio: 0.1
105
+ evals_per_epoch: 4
106
+ eval_table_size:
107
+ eval_max_new_tokens: 128
108
+ saves_per_epoch: 2
109
+
110
+ debug:
111
+ deepspeed:
112
+ fsdp:
113
+ fsdp_config:
114
+
115
+ special_tokens:
116
+ pad_token: <pad>
117
+ ```