Update model, now trined with OpenAssistant dataset in ChatML format

#6
by Felladrin - opened
README.md CHANGED
@@ -5,45 +5,66 @@ tags:
5
  - text-generation
6
  base_model: Locutusque/TinyMistral-248M
7
  datasets:
8
- - WizardLM/WizardLM_evol_instruct_V2_196k
9
- - KnutJaegersberg/WizardLM_evol_instruct_V2_196k_instruct_format
10
  widget:
11
  - text: |-
12
- ### Instruction:
13
- Write the specs of a game about trolls and warriors in a fantasy world.
14
-
15
- ### Response:
16
- The game is an adventure game that takes place on a planet, where players must explore their own unique abilities to survive. Players can use different strategies such as collecting items or trading them for gold or silver coins, but they also need to learn how to deal with obstacles and find new ways to escape.
17
-
18
- ### Instruction:
19
- Tell me something curious about the Earth.
20
-
21
- ### Response:
22
- The planet is a large, rocky world with an atmosphere of 10 billion years old and a surface area around 25 million miles (36 million kilometers) wide.
23
-
24
- ### Instruction:
25
- What are some potential applications for quantum computing?
26
-
27
- ### Response:
28
  inference:
29
  parameters:
30
  max_new_tokens: 64
31
  repetition_penalty: 1.18
32
  ---
33
 
34
- # Locutusque's TinyMistral-248M trained on the Evol Instruct dataset
35
 
36
- - Base model: [Locutusque/TinyMistral-248M](https://huggingface.co/Locutusque/TinyMistral-248M)
37
- - Dataset: [WizardLM/WizardLM_evol_instruct_V2_196k](https://huggingface.co/datasets/WizardLM/WizardLM_evol_instruct_V2_196k)
38
- - Trained with [AutoTrain Advanced](https://github.com/huggingface/autotrain-advanced) using [these parameters](https://huggingface.co/Felladrin/TinyMistral-248M-Evol-Instruct/blob/321787e81e2eb0392d7ce2715154fb9c254e39b1/training_params.json) and [this CSV file](https://huggingface.co/datasets/KnutJaegersberg/WizardLM_evol_instruct_V2_196k_instruct_format/blob/93aa373501f829449f23efc91b3ac6e7a60a4d70/all_instructions.csv)
39
- - Availability in other ML formats:
40
- - GGUF: [Felladrin/gguf-TinyMistral-248M-Evol-Instruct](https://huggingface.co/Felladrin/gguf-TinyMistral-248M-Evol-Instruct)
41
 
42
  ## Recommended Prompt Format
43
 
44
  ```
45
- ### Instruction:
46
- <instruction>
 
 
 
 
 
 
 
 
 
47
 
48
- ### Response:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
  ```
 
5
  - text-generation
6
  base_model: Locutusque/TinyMistral-248M
7
  datasets:
8
+ - OpenAssistant/oasst_top1_2023-08-25
 
9
  widget:
10
  - text: |-
11
+ <|im_start|>user
12
+ Write the specs of a game about trolls and warriors in a fantasy world.<|im_end|>
13
+ <|im_start|>assistant
14
+ The game is an adventure game that takes place on a planet, where players must explore their unique abilities to survive. Players can use different strategies such as collecting items or trading them for gold or silver coins, but they also need to learn how to deal with obstacles and find new ways to escape.<|im_end|>
15
+ <|im_start|>user
16
+ Could you tell me something curious about the Earth?<|im_end|>
17
+ <|im_start|>assistant
18
+ The planet is a large, rocky world with an atmosphere of 10 billion years old and a surface area around 25 million miles (36 million kilometers) wide.<|im_end|>
19
+ <|im_start|>user
20
+ What are some potential applications for quantum computing?<|im_end|>
21
+ <|im_start|>assistant
 
 
 
 
 
22
  inference:
23
  parameters:
24
  max_new_tokens: 64
25
  repetition_penalty: 1.18
26
  ---
27
 
28
+ # Locutusque's TinyMistral-248M trained on OpenAssistant TOP-1 Conversation Threads
29
 
30
+ - Base model: [Locutusque/TinyMistral-248M](https://huggingface.co/Locutusque/TinyMistral-248M/blob/90b89d18fdf27937dc04ab8a9b543c5af2991c7f/README.md)
31
+ - Dataset: [OpenAssistant/oasst_top1_2023-08-25](https://huggingface.co/datasets/OpenAssistant/oasst_top1_2023-08-25)
 
 
 
32
 
33
  ## Recommended Prompt Format
34
 
35
  ```
36
+ <|im_start|>user
37
+ {message}<|im_end|>
38
+ <|im_start|>assistant
39
+ ```
40
+
41
+ ## How it was trained
42
+
43
+ ```ipython
44
+ %pip install autotrain-advanced
45
+
46
+ !autotrain setup
47
 
48
+ !autotrain llm \
49
+ --train \
50
+ --trainer "sft" \
51
+ --model './TinyMistral-248M/' \
52
+ --model_max_length 4096 \
53
+ --block-size 1024 \
54
+ --project-name 'trained-model' \
55
+ --data-path "OpenAssistant/oasst_top1_2023-08-25" \
56
+ --train_split "train" \
57
+ --valid_split "test" \
58
+ --text-column "text" \
59
+ --lr 1e-5 \
60
+ --train_batch_size 2 \
61
+ --epochs 5 \
62
+ --evaluation_strategy "steps" \
63
+ --save-strategy "steps" \
64
+ --save-total-limit 2 \
65
+ --warmup-ratio 0.05 \
66
+ --weight-decay 0.0 \
67
+ --gradient-accumulation 8 \
68
+ --logging-steps 10 \
69
+ --scheduler "constant"
70
  ```
config.json CHANGED
@@ -1,11 +1,10 @@
1
  {
2
- "_name_or_path": "Locutusque/TinyMistral-248M",
3
  "architectures": [
4
  "MistralForCausalLM"
5
  ],
6
- "bos_token_id": 32000,
7
- "eos_token_id": 32001,
8
- "pad_token_id": 32002,
9
  "hidden_act": "silu",
10
  "hidden_size": 1024,
11
  "initializer_range": 0.02,
@@ -19,8 +18,8 @@
19
  "rope_theta": 10000.0,
20
  "sliding_window": 32,
21
  "tie_word_embeddings": false,
22
- "torch_dtype": "float16",
23
  "transformers_version": "4.34.1",
24
- "use_cache": true,
25
  "vocab_size": 32003
26
  }
 
1
  {
2
+ "_name_or_path": "./TinyMistral-248M/",
3
  "architectures": [
4
  "MistralForCausalLM"
5
  ],
6
+ "bos_token_id": 1,
7
+ "eos_token_id": 2,
 
8
  "hidden_act": "silu",
9
  "hidden_size": 1024,
10
  "initializer_range": 0.02,
 
18
  "rope_theta": 10000.0,
19
  "sliding_window": 32,
20
  "tie_word_embeddings": false,
21
+ "torch_dtype": "float32",
22
  "transformers_version": "4.34.1",
23
+ "use_cache": false,
24
  "vocab_size": 32003
25
  }
generation_config.json CHANGED
@@ -1,7 +1,6 @@
1
  {
2
  "_from_model_config": true,
3
- "bos_token_id": 32000,
4
- "eos_token_id": 32001,
5
- "pad_token_id": 32002,
6
  "transformers_version": "4.34.1"
7
  }
 
1
  {
2
  "_from_model_config": true,
3
+ "bos_token_id": 1,
4
+ "eos_token_id": 2,
 
5
  "transformers_version": "4.34.1"
6
  }
model.safetensors → pytorch_model.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5b06f829f8156b3724f0f7d8f10c75a09c08c9dc3655efa72272991dfe521597
3
- size 496052392
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e01fe28eb61f7d317f3236e36c88be43e136ef4ae66a424e8c912a98141aba2
3
+ size 992115782
special_tokens_map.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
  "bos_token": {
3
- "content": "<|bos|>",
4
  "lstrip": false,
5
  "normalized": false,
6
  "rstrip": false,
7
  "single_word": false
8
  },
9
  "eos_token": {
10
- "content": "<|endoftext|>",
11
  "lstrip": false,
12
  "normalized": false,
13
  "rstrip": false,
 
1
  {
2
  "bos_token": {
3
+ "content": "<s>",
4
  "lstrip": false,
5
  "normalized": false,
6
  "rstrip": false,
7
  "single_word": false
8
  },
9
  "eos_token": {
10
+ "content": "</s>",
11
  "lstrip": false,
12
  "normalized": false,
13
  "rstrip": false,
tokenizer.json CHANGED
@@ -80,7 +80,7 @@
80
  "single": [
81
  {
82
  "SpecialToken": {
83
- "id": "<|bos|>",
84
  "type_id": 0
85
  }
86
  },
@@ -94,7 +94,7 @@
94
  "pair": [
95
  {
96
  "SpecialToken": {
97
- "id": "<|bos|>",
98
  "type_id": 0
99
  }
100
  },
@@ -106,7 +106,7 @@
106
  },
107
  {
108
  "SpecialToken": {
109
- "id": "<|bos|>",
110
  "type_id": 1
111
  }
112
  },
@@ -118,13 +118,13 @@
118
  }
119
  ],
120
  "special_tokens": {
121
- "<|bos|>": {
122
- "id": "<|bos|>",
123
  "ids": [
124
- 32000
125
  ],
126
  "tokens": [
127
- "<|bos|>"
128
  ]
129
  }
130
  }
 
80
  "single": [
81
  {
82
  "SpecialToken": {
83
+ "id": "<s>",
84
  "type_id": 0
85
  }
86
  },
 
94
  "pair": [
95
  {
96
  "SpecialToken": {
97
+ "id": "<s>",
98
  "type_id": 0
99
  }
100
  },
 
106
  },
107
  {
108
  "SpecialToken": {
109
+ "id": "<s>",
110
  "type_id": 1
111
  }
112
  },
 
118
  }
119
  ],
120
  "special_tokens": {
121
+ "<s>": {
122
+ "id": "<s>",
123
  "ids": [
124
+ 1
125
  ],
126
  "tokens": [
127
+ "<s>"
128
  ]
129
  }
130
  }
tokenizer_config.json CHANGED
@@ -1,6 +1,4 @@
1
  {
2
- "add_bos_token": true,
3
- "add_eos_token": false,
4
  "added_tokens_decoder": {
5
  "0": {
6
  "content": "<unk>",
@@ -52,15 +50,19 @@
52
  }
53
  },
54
  "additional_special_tokens": [],
55
- "bos_token": "<|bos|>",
56
  "clean_up_tokenization_spaces": false,
57
- "eos_token": "<|endoftext|>",
58
  "legacy": true,
59
- "model_max_length": 1000000000000000019884624838656,
 
60
  "pad_token": "[PAD]",
61
  "sp_model_kwargs": {},
62
  "spaces_between_special_tokens": false,
 
63
  "tokenizer_class": "LlamaTokenizer",
 
 
64
  "unk_token": "<unk>",
65
  "use_default_system_prompt": true
66
  }
 
1
  {
 
 
2
  "added_tokens_decoder": {
3
  "0": {
4
  "content": "<unk>",
 
50
  }
51
  },
52
  "additional_special_tokens": [],
53
+ "bos_token": "<s>",
54
  "clean_up_tokenization_spaces": false,
55
+ "eos_token": "</s>",
56
  "legacy": true,
57
+ "max_length": 1536,
58
+ "model_max_length": 4096,
59
  "pad_token": "[PAD]",
60
  "sp_model_kwargs": {},
61
  "spaces_between_special_tokens": false,
62
+ "stride": 0,
63
  "tokenizer_class": "LlamaTokenizer",
64
+ "truncation_side": "right",
65
+ "truncation_strategy": "longest_first",
66
  "unk_token": "<unk>",
67
  "use_default_system_prompt": true
68
  }
training_params.json DELETED
@@ -1,47 +0,0 @@
1
- {
2
- "model": "Felladrin/TinyMistral-248M-Evol-Instruct",
3
- "data_path": "data/",
4
- "project_name": "TinyMistral-248M-Evol-Instruct",
5
- "train_split": "train",
6
- "valid_split": null,
7
- "text_column": "text",
8
- "rejected_text_column": "rejected",
9
- "token": null,
10
- "lr": 0.0002,
11
- "epochs": 1,
12
- "batch_size": 12,
13
- "warmup_ratio": 0.1,
14
- "gradient_accumulation": 4,
15
- "optimizer": "adamw_torch",
16
- "scheduler": "linear",
17
- "weight_decay": 0.01,
18
- "max_grad_norm": 1.0,
19
- "seed": 42,
20
- "add_eos_token": false,
21
- "block_size": 1024,
22
- "use_peft": false,
23
- "lora_r": 16,
24
- "lora_alpha": 32,
25
- "lora_dropout": 0.05,
26
- "logging_steps": -1,
27
- "evaluation_strategy": "epoch",
28
- "save_total_limit": 1,
29
- "save_strategy": "epoch",
30
- "auto_find_batch_size": false,
31
- "fp16": false,
32
- "push_to_hub": false,
33
- "use_int8": true,
34
- "model_max_length": 1024,
35
- "repo_id": null,
36
- "use_int4": false,
37
- "trainer": "sft",
38
- "target_modules": null,
39
- "merge_adapter": true,
40
- "username": null,
41
- "use_flash_attention_2": false,
42
- "log": "none",
43
- "disable_gradient_checkpointing": false,
44
- "model_ref": null,
45
- "dpo_beta": 0.1,
46
- "prompt_text_column": "prompt"
47
- }