TearGosling commited on
Commit
0b09547
1 Parent(s): b461fcd

Uploading model

Browse files
README.md ADDED
@@ -0,0 +1,142 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ base_model: NousResearch/Hermes-2-Pro-Mistral-7B
4
+ tags:
5
+ - generated_from_trainer
6
+ model-index:
7
+ - name: workspace/disk2/alexandria/models/g2t_hermes/
8
+ results: []
9
+ ---
10
+
11
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
12
+ should probably proofread and complete it, then remove this comment. -->
13
+
14
+ [<img src="https://raw.githubusercontent.com/OpenAccess-AI-Collective/axolotl/main/image/axolotl-badge-web.png" alt="Built with Axolotl" width="200" height="32"/>](https://github.com/OpenAccess-AI-Collective/axolotl)
15
+ <details><summary>See axolotl config</summary>
16
+
17
+ axolotl version: `0.4.0`
18
+ ```yaml
19
+ base_model: NousResearch/Hermes-2-Pro-Mistral-7B
20
+ model_type: MistralForCausalLM
21
+ tokenizer_type: LlamaTokenizer
22
+
23
+ load_in_8bit: false
24
+ load_in_4bit: false
25
+ strict: false
26
+
27
+ datasets:
28
+ - path: /workspace/disk2/alexandria/data/graphs_2_text_hermes.jsonl
29
+ type: sharegpt
30
+ conversation: chatml
31
+ dataset_prepared_path:
32
+ val_set_size: 0.0
33
+ output_dir: /workspace/disk2/alexandria/models/g2t_hermes/
34
+
35
+ sequence_len: 8192
36
+ sample_packing: true
37
+ pad_to_sequence_len: true
38
+ eval_sample_packing: false
39
+
40
+ wandb_project: alexandria
41
+ wandb_entity:
42
+ wandb_watch:
43
+ wandb_name:
44
+ wandb_log_model:
45
+
46
+ gradient_accumulation_steps: 1
47
+ micro_batch_size: 2
48
+ num_epochs: 1
49
+ optimizer: adamw_bnb_8bit
50
+ lr_scheduler: cosine
51
+ learning_rate: 0.000005
52
+
53
+ train_on_inputs: false
54
+ group_by_length: false
55
+ bf16: auto
56
+ fp16:
57
+ tf32: false
58
+
59
+ gradient_checkpointing: true
60
+ early_stopping_patience:
61
+ resume_from_checkpoint:
62
+ local_rank:
63
+ logging_steps: 1
64
+ xformers_attention:
65
+ flash_attention: true
66
+
67
+ warmup_steps: 10
68
+ evals_per_epoch: 0
69
+ eval_table_size:
70
+ eval_max_new_tokens: 128
71
+ saves_per_epoch: 2
72
+ debug:
73
+ deepspeed: deepspeed_configs/zero2.json
74
+ weight_decay: 0.0
75
+ fsdp:
76
+ fsdp_config:
77
+ special_tokens:
78
+ bos_token: "<s>"
79
+ eos_token: "</s>"
80
+ unk_token: "<unk>"
81
+
82
+ ```
83
+
84
+ </details><br>
85
+
86
+ # workspace/disk2/alexandria/models/g2t_hermes/
87
+
88
+ This model is a fine-tuned version of [NousResearch/Hermes-2-Pro-Mistral-7B](https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B) on a version of the [Project Alexandria dataset](https://huggingface.co/datasets/ChristophSchuhmann/alexandria-test), designed to turn input knowledge graphs structured as Python dictionaries to reconstructed plaintext.
89
+
90
+ ## Model description
91
+
92
+ This is a *prototype* model; trained quickly as a proof of concept. No hyperparameter tuning or extensive data cleaning has been done besides filtering entries that meet the following criteria:
93
+ - Contains a refusal of some sort
94
+ - Has an empty input and/or output
95
+ - Queries that resulted in an error output
96
+
97
+ ## Intended uses & limitations
98
+
99
+ The model follows a form of ChatML with no system prompt. The model should be prompted as follows:
100
+ ```
101
+ <|im_start|>user
102
+ [Input your knowledge graph structured as a Python dictionary here.]<|im_end|>
103
+ <|im_start|>assistant
104
+ (Make sure to put a newline after "assistant". Do not include this text in parenthesis in your prompt.)
105
+ ```
106
+
107
+ Greedy sampling is recommended for generating outputs.
108
+
109
+ No extensive data cleaning has been done. The model may not output a satisfactorily detailed or parsable knowledge graph at times. Since this model is only 7B parameters, certain relationships in the input text may not be properly picked up on by the model. As stated before, this model is a prototype.
110
+
111
+ ## Training and evaluation data
112
+
113
+ The data was generated via. several large language models.
114
+
115
+ ## Training procedure
116
+
117
+ ### Training hyperparameters
118
+
119
+ The following hyperparameters were used during training:
120
+ - learning_rate: 5e-06
121
+ - train_batch_size: 2
122
+ - eval_batch_size: 2
123
+ - seed: 42
124
+ - distributed_type: multi-GPU
125
+ - num_devices: 8
126
+ - total_train_batch_size: 16
127
+ - total_eval_batch_size: 16
128
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
129
+ - lr_scheduler_type: cosine
130
+ - lr_scheduler_warmup_steps: 10
131
+ - num_epochs: 1
132
+
133
+ ### Training results
134
+
135
+
136
+
137
+ ### Framework versions
138
+
139
+ - Transformers 4.39.0.dev0
140
+ - Pytorch 2.1.2+cu118
141
+ - Datasets 2.18.0
142
+ - Tokenizers 0.15.0
added_tokens.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "<|im_end|>": 32000,
3
+ "<|im_start|>": 32001
4
+ }
config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "NousResearch/Hermes-2-Pro-Mistral-7B",
3
+ "architectures": [
4
+ "MistralForCausalLM"
5
+ ],
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 1,
8
+ "eos_token_id": 2,
9
+ "hidden_act": "silu",
10
+ "hidden_size": 4096,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 14336,
13
+ "max_position_embeddings": 32768,
14
+ "model_type": "mistral",
15
+ "num_attention_heads": 32,
16
+ "num_hidden_layers": 32,
17
+ "num_key_value_heads": 8,
18
+ "rms_norm_eps": 1e-05,
19
+ "rope_theta": 10000.0,
20
+ "sliding_window": 4096,
21
+ "tie_word_embeddings": false,
22
+ "torch_dtype": "bfloat16",
23
+ "transformers_version": "4.39.0.dev0",
24
+ "use_cache": false,
25
+ "vocab_size": 32032
26
+ }
generation_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 1,
4
+ "do_sample": true,
5
+ "eos_token_id": 32000,
6
+ "transformers_version": "4.39.0.dev0"
7
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:efb8b3ca2230a2820259ca868e3c275249afc78771463d5095b80655d0013e87
3
+ size 14484029102
special_tokens_map.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "</s>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "unk_token": {
24
+ "content": "<unk>",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ }
30
+ }
tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dadfd56d766715c61d2ef780a525ab43b8e6da4de6865bda3d95fdef5e134055
3
+ size 493443
tokenizer_config.json ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
+ "add_prefix_space": true,
5
+ "added_tokens_decoder": {
6
+ "0": {
7
+ "content": "<unk>",
8
+ "lstrip": false,
9
+ "normalized": false,
10
+ "rstrip": false,
11
+ "single_word": false,
12
+ "special": true
13
+ },
14
+ "1": {
15
+ "content": "<s>",
16
+ "lstrip": false,
17
+ "normalized": false,
18
+ "rstrip": false,
19
+ "single_word": false,
20
+ "special": true
21
+ },
22
+ "2": {
23
+ "content": "</s>",
24
+ "lstrip": false,
25
+ "normalized": false,
26
+ "rstrip": false,
27
+ "single_word": false,
28
+ "special": true
29
+ },
30
+ "32000": {
31
+ "content": "<|im_end|>",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false,
36
+ "special": true
37
+ },
38
+ "32001": {
39
+ "content": "<|im_start|>",
40
+ "lstrip": false,
41
+ "normalized": false,
42
+ "rstrip": false,
43
+ "single_word": false,
44
+ "special": false
45
+ }
46
+ },
47
+ "additional_special_tokens": [],
48
+ "bos_token": "<s>",
49
+ "chat_template": "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
50
+ "clean_up_tokenization_spaces": false,
51
+ "eos_token": "</s>",
52
+ "legacy": true,
53
+ "model_max_length": 1000000000000000019884624838656,
54
+ "pad_token": "</s>",
55
+ "sp_model_kwargs": {},
56
+ "spaces_between_special_tokens": false,
57
+ "tokenizer_class": "LlamaTokenizer",
58
+ "unk_token": "<unk>",
59
+ "use_default_system_prompt": false,
60
+ "use_fast": true
61
+ }