MaziyarPanahi commited on
Commit
e25a6c9
1 Parent(s): 94d3e63

End of training

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,148 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: other
3
+ library_name: peft
4
+ tags:
5
+ - axolotl
6
+ - generated_from_trainer
7
+ base_model: google/gemma-7b
8
+ model-index:
9
+ - name: gemma-7b-Open-Hermes-v0.1
10
+ results: []
11
+ ---
12
+
13
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
14
+ should probably proofread and complete it, then remove this comment. -->
15
+
16
+ [<img src="https://raw.githubusercontent.com/OpenAccess-AI-Collective/axolotl/main/image/axolotl-badge-web.png" alt="Built with Axolotl" width="200" height="32"/>](https://github.com/OpenAccess-AI-Collective/axolotl)
17
+ <details><summary>See axolotl config</summary>
18
+
19
+ axolotl version: `0.4.0`
20
+ ```yaml
21
+ # use google/gemma-7b if you have access
22
+ #base_model: mhenrichsen/gemma-7b
23
+ base_model: google/gemma-7b
24
+ model_type: AutoModelForCausalLM
25
+ tokenizer_type: AutoTokenizer
26
+
27
+ hub_model_id: MaziyarPanahi/gemma-7b-Open-Hermes-v0.1
28
+ hf_use_auth_token: true
29
+
30
+ load_in_8bit: false
31
+ load_in_4bit: true
32
+ strict: false
33
+
34
+ # huggingface repo
35
+ datasets:
36
+ - path: teknium/openhermes
37
+ type: alpaca
38
+ val_set_size: 0.1
39
+ output_dir: ./qlora-gemma-7b-openhermes
40
+
41
+ adapter: qlora
42
+ lora_r: 32
43
+ lora_alpha: 16
44
+ lora_dropout: 0.05
45
+ lora_target_linear: true
46
+
47
+
48
+ sequence_len: 4096
49
+ sample_packing: false
50
+ pad_to_sequence_len: false
51
+
52
+ wandb_project:
53
+ wandb_entity:
54
+ wandb_watch:
55
+ wandb_name:
56
+ wandb_log_model:
57
+
58
+
59
+ gradient_accumulation_steps: 3
60
+ micro_batch_size: 2
61
+ num_epochs: 1
62
+ optimizer: adamw_bnb_8bit
63
+ lr_scheduler: cosine
64
+ learning_rate: 0.0002
65
+
66
+ train_on_inputs: false
67
+ group_by_length: false
68
+ bf16: auto
69
+ fp16:
70
+ tf32: false
71
+
72
+ gradient_checkpointing: true
73
+ early_stopping_patience:
74
+ resume_from_checkpoint:
75
+ local_rank:
76
+ logging_steps: 1
77
+ xformers_attention:
78
+ flash_attention: true
79
+
80
+ warmup_ratio: 0.1
81
+ evals_per_epoch: 4
82
+ eval_table_size:
83
+ eval_max_new_tokens: 128
84
+ saves_per_epoch: 1
85
+ debug:
86
+ deepspeed:
87
+ weight_decay: 0.0
88
+ fsdp:
89
+ fsdp_config:
90
+ special_tokens:
91
+ ```
92
+
93
+ </details><br>
94
+
95
+ # gemma-7b-Open-Hermes-v0.1
96
+
97
+ This model is a fine-tuned version of [google/gemma-7b](https://huggingface.co/google/gemma-7b) on the None dataset.
98
+ It achieves the following results on the evaluation set:
99
+ - Loss: 1.4456
100
+
101
+ ## Model description
102
+
103
+ More information needed
104
+
105
+ ## Intended uses & limitations
106
+
107
+ More information needed
108
+
109
+ ## Training and evaluation data
110
+
111
+ More information needed
112
+
113
+ ## Training procedure
114
+
115
+ ### Training hyperparameters
116
+
117
+ The following hyperparameters were used during training:
118
+ - learning_rate: 0.0002
119
+ - train_batch_size: 2
120
+ - eval_batch_size: 2
121
+ - seed: 42
122
+ - distributed_type: multi-GPU
123
+ - num_devices: 4
124
+ - gradient_accumulation_steps: 3
125
+ - total_train_batch_size: 24
126
+ - total_eval_batch_size: 8
127
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
128
+ - lr_scheduler_type: cosine
129
+ - lr_scheduler_warmup_steps: 227
130
+ - num_epochs: 1
131
+
132
+ ### Training results
133
+
134
+ | Training Loss | Epoch | Step | Validation Loss |
135
+ |:-------------:|:-----:|:----:|:---------------:|
136
+ | 1.3258 | 0.0 | 1 | 1.9697 |
137
+ | 0.63 | 0.25 | 2277 | 1.5227 |
138
+ | 0.642 | 0.5 | 4554 | 1.4835 |
139
+ | 0.7721 | 0.75 | 6831 | 1.4456 |
140
+
141
+
142
+ ### Framework versions
143
+
144
+ - PEFT 0.8.2
145
+ - Transformers 4.39.0.dev0
146
+ - Pytorch 2.2.0+cu121
147
+ - Datasets 2.17.0
148
+ - Tokenizers 0.15.0
adapter_config.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "google/gemma-7b",
5
+ "bias": "none",
6
+ "fan_in_fan_out": null,
7
+ "inference_mode": true,
8
+ "init_lora_weights": true,
9
+ "layers_pattern": null,
10
+ "layers_to_transform": null,
11
+ "loftq_config": {},
12
+ "lora_alpha": 16,
13
+ "lora_dropout": 0.05,
14
+ "megatron_config": null,
15
+ "megatron_core": "megatron.core",
16
+ "modules_to_save": null,
17
+ "peft_type": "LORA",
18
+ "r": 32,
19
+ "rank_pattern": {},
20
+ "revision": null,
21
+ "target_modules": [
22
+ "k_proj",
23
+ "up_proj",
24
+ "gate_proj",
25
+ "q_proj",
26
+ "o_proj",
27
+ "v_proj",
28
+ "down_proj"
29
+ ],
30
+ "task_type": "CAUSAL_LM",
31
+ "use_rslora": false
32
+ }
adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:671229bfc35e87fa9a923676b4a08a255e8cd8e5bd2d4d559e607095012c7834
3
+ size 400173482
adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e4276021b5987792bb3d8f356997abc9fbf369ac5f1bdc9d52bc01ed5a88db1
3
+ size 400084608
config.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "google/gemma-7b",
3
+ "architectures": [
4
+ "GemmaForCausalLM"
5
+ ],
6
+ "attention_bias": false,
7
+ "attention_dropout": 0.0,
8
+ "bos_token_id": 2,
9
+ "eos_token_id": 1,
10
+ "head_dim": 256,
11
+ "hidden_act": "gelu",
12
+ "hidden_size": 3072,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 24576,
15
+ "max_position_embeddings": 8192,
16
+ "model_type": "gemma",
17
+ "num_attention_heads": 16,
18
+ "num_hidden_layers": 28,
19
+ "num_key_value_heads": 16,
20
+ "pad_token_id": 0,
21
+ "quantization_config": {
22
+ "_load_in_4bit": true,
23
+ "_load_in_8bit": false,
24
+ "bnb_4bit_compute_dtype": "bfloat16",
25
+ "bnb_4bit_quant_type": "nf4",
26
+ "bnb_4bit_use_double_quant": true,
27
+ "llm_int8_enable_fp32_cpu_offload": false,
28
+ "llm_int8_has_fp16_weight": false,
29
+ "llm_int8_skip_modules": null,
30
+ "llm_int8_threshold": 6.0,
31
+ "load_in_4bit": true,
32
+ "load_in_8bit": false,
33
+ "quant_method": "bitsandbytes"
34
+ },
35
+ "rms_norm_eps": 1e-06,
36
+ "rope_scaling": null,
37
+ "rope_theta": 10000.0,
38
+ "torch_dtype": "bfloat16",
39
+ "transformers_version": "4.39.0.dev0",
40
+ "use_cache": false,
41
+ "vocab_size": 256000
42
+ }
runs/Feb22_21-33-07_zen/events.out.tfevents.1708637589.zen.1452606.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:745df8070d326f1d832c295df1f76f165fab444f9f26fda95f6b83a9cc3b41fb
3
+ size 1927866
special_tokens_map.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<bos>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<eos>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "<pad>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "unk_token": {
24
+ "content": "<unk>",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ }
30
+ }
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d0d908b4f9326e0998815690e325b6abbd378978553e10627924dd825db7e243
3
+ size 17477553
tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:61a7b147390c64585d6c3543dd6fc636906c9af3865a5548f27f31aee1d4c8e2
3
+ size 4241003
tokenizer_config.json ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
+ "added_tokens_decoder": {
5
+ "0": {
6
+ "content": "<pad>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "1": {
14
+ "content": "<eos>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "2": {
22
+ "content": "<bos>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ },
29
+ "3": {
30
+ "content": "<unk>",
31
+ "lstrip": false,
32
+ "normalized": false,
33
+ "rstrip": false,
34
+ "single_word": false,
35
+ "special": true
36
+ }
37
+ },
38
+ "bos_token": "<bos>",
39
+ "clean_up_tokenization_spaces": false,
40
+ "eos_token": "<eos>",
41
+ "legacy": null,
42
+ "model_max_length": 1000000000000000019884624838656,
43
+ "pad_token": "<pad>",
44
+ "sp_model_kwargs": {},
45
+ "spaces_between_special_tokens": false,
46
+ "tokenizer_class": "GemmaTokenizer",
47
+ "unk_token": "<unk>",
48
+ "use_default_system_prompt": false
49
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:52a30bdcb81a21e8c21767e001e3d6ead09b444076c2945ea756096bfea315e6
3
+ size 5624