Felladrin commited on
Commit
b1ab71e
1 Parent(s): 3cfecd1

Update model, now trined with OpenAssistant dataset in ChatML format

Browse files
config.json CHANGED
@@ -1,11 +1,10 @@
1
  {
2
- "_name_or_path": "Locutusque/TinyMistral-248M",
3
  "architectures": [
4
  "MistralForCausalLM"
5
  ],
6
- "bos_token_id": 32000,
7
- "eos_token_id": 32001,
8
- "pad_token_id": 32002,
9
  "hidden_act": "silu",
10
  "hidden_size": 1024,
11
  "initializer_range": 0.02,
@@ -19,8 +18,8 @@
19
  "rope_theta": 10000.0,
20
  "sliding_window": 32,
21
  "tie_word_embeddings": false,
22
- "torch_dtype": "float16",
23
  "transformers_version": "4.34.1",
24
- "use_cache": true,
25
  "vocab_size": 32003
26
  }
 
1
  {
2
+ "_name_or_path": "./TinyMistral-248M/",
3
  "architectures": [
4
  "MistralForCausalLM"
5
  ],
6
+ "bos_token_id": 1,
7
+ "eos_token_id": 2,
 
8
  "hidden_act": "silu",
9
  "hidden_size": 1024,
10
  "initializer_range": 0.02,
 
18
  "rope_theta": 10000.0,
19
  "sliding_window": 32,
20
  "tie_word_embeddings": false,
21
+ "torch_dtype": "float32",
22
  "transformers_version": "4.34.1",
23
+ "use_cache": false,
24
  "vocab_size": 32003
25
  }
generation_config.json CHANGED
@@ -1,7 +1,6 @@
1
  {
2
  "_from_model_config": true,
3
- "bos_token_id": 32000,
4
- "eos_token_id": 32001,
5
- "pad_token_id": 32002,
6
  "transformers_version": "4.34.1"
7
  }
 
1
  {
2
  "_from_model_config": true,
3
+ "bos_token_id": 1,
4
+ "eos_token_id": 2,
 
5
  "transformers_version": "4.34.1"
6
  }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e01fe28eb61f7d317f3236e36c88be43e136ef4ae66a424e8c912a98141aba2
3
+ size 992115782
special_tokens_map.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
  "bos_token": {
3
- "content": "<|bos|>",
4
  "lstrip": false,
5
  "normalized": false,
6
  "rstrip": false,
7
  "single_word": false
8
  },
9
  "eos_token": {
10
- "content": "<|endoftext|>",
11
  "lstrip": false,
12
  "normalized": false,
13
  "rstrip": false,
 
1
  {
2
  "bos_token": {
3
+ "content": "<s>",
4
  "lstrip": false,
5
  "normalized": false,
6
  "rstrip": false,
7
  "single_word": false
8
  },
9
  "eos_token": {
10
+ "content": "</s>",
11
  "lstrip": false,
12
  "normalized": false,
13
  "rstrip": false,
tokenizer.json CHANGED
@@ -80,7 +80,7 @@
80
  "single": [
81
  {
82
  "SpecialToken": {
83
- "id": "<|bos|>",
84
  "type_id": 0
85
  }
86
  },
@@ -94,7 +94,7 @@
94
  "pair": [
95
  {
96
  "SpecialToken": {
97
- "id": "<|bos|>",
98
  "type_id": 0
99
  }
100
  },
@@ -106,7 +106,7 @@
106
  },
107
  {
108
  "SpecialToken": {
109
- "id": "<|bos|>",
110
  "type_id": 1
111
  }
112
  },
@@ -118,13 +118,13 @@
118
  }
119
  ],
120
  "special_tokens": {
121
- "<|bos|>": {
122
- "id": "<|bos|>",
123
  "ids": [
124
- 32000
125
  ],
126
  "tokens": [
127
- "<|bos|>"
128
  ]
129
  }
130
  }
 
80
  "single": [
81
  {
82
  "SpecialToken": {
83
+ "id": "<s>",
84
  "type_id": 0
85
  }
86
  },
 
94
  "pair": [
95
  {
96
  "SpecialToken": {
97
+ "id": "<s>",
98
  "type_id": 0
99
  }
100
  },
 
106
  },
107
  {
108
  "SpecialToken": {
109
+ "id": "<s>",
110
  "type_id": 1
111
  }
112
  },
 
118
  }
119
  ],
120
  "special_tokens": {
121
+ "<s>": {
122
+ "id": "<s>",
123
  "ids": [
124
+ 1
125
  ],
126
  "tokens": [
127
+ "<s>"
128
  ]
129
  }
130
  }
tokenizer_config.json CHANGED
@@ -1,6 +1,4 @@
1
  {
2
- "add_bos_token": true,
3
- "add_eos_token": false,
4
  "added_tokens_decoder": {
5
  "0": {
6
  "content": "<unk>",
@@ -52,15 +50,19 @@
52
  }
53
  },
54
  "additional_special_tokens": [],
55
- "bos_token": "<|bos|>",
56
  "clean_up_tokenization_spaces": false,
57
- "eos_token": "<|endoftext|>",
58
  "legacy": true,
59
- "model_max_length": 1000000000000000019884624838656,
 
60
  "pad_token": "[PAD]",
61
  "sp_model_kwargs": {},
62
  "spaces_between_special_tokens": false,
 
63
  "tokenizer_class": "LlamaTokenizer",
 
 
64
  "unk_token": "<unk>",
65
  "use_default_system_prompt": true
66
  }
 
1
  {
 
 
2
  "added_tokens_decoder": {
3
  "0": {
4
  "content": "<unk>",
 
50
  }
51
  },
52
  "additional_special_tokens": [],
53
+ "bos_token": "<s>",
54
  "clean_up_tokenization_spaces": false,
55
+ "eos_token": "</s>",
56
  "legacy": true,
57
+ "max_length": 1536,
58
+ "model_max_length": 4096,
59
  "pad_token": "[PAD]",
60
  "sp_model_kwargs": {},
61
  "spaces_between_special_tokens": false,
62
+ "stride": 0,
63
  "tokenizer_class": "LlamaTokenizer",
64
+ "truncation_side": "right",
65
+ "truncation_strategy": "longest_first",
66
  "unk_token": "<unk>",
67
  "use_default_system_prompt": true
68
  }
training_params.json CHANGED
@@ -1,20 +1,20 @@
1
  {
2
- "model": "Felladrin/TinyMistral-248M-Evol-Instruct",
3
- "data_path": "data/",
4
- "project_name": "TinyMistral-248M-Evol-Instruct",
5
  "train_split": "train",
6
- "valid_split": null,
7
  "text_column": "text",
8
  "rejected_text_column": "rejected",
9
  "token": null,
10
- "lr": 0.0002,
11
- "epochs": 1,
12
- "batch_size": 12,
13
- "warmup_ratio": 0.1,
14
- "gradient_accumulation": 4,
15
  "optimizer": "adamw_torch",
16
- "scheduler": "linear",
17
- "weight_decay": 0.01,
18
  "max_grad_norm": 1.0,
19
  "seed": 42,
20
  "add_eos_token": false,
@@ -23,20 +23,20 @@
23
  "lora_r": 16,
24
  "lora_alpha": 32,
25
  "lora_dropout": 0.05,
26
- "logging_steps": -1,
27
- "evaluation_strategy": "epoch",
28
- "save_total_limit": 1,
29
- "save_strategy": "epoch",
30
  "auto_find_batch_size": false,
31
  "fp16": false,
32
  "push_to_hub": false,
33
- "use_int8": true,
34
- "model_max_length": 1024,
35
  "repo_id": null,
36
  "use_int4": false,
37
  "trainer": "sft",
38
  "target_modules": null,
39
- "merge_adapter": true,
40
  "username": null,
41
  "use_flash_attention_2": false,
42
  "log": "none",
 
1
  {
2
+ "model": "./TinyMistral-248M/",
3
+ "data_path": "OpenAssistant/oasst_top1_2023-08-25",
4
+ "project_name": "trained-model",
5
  "train_split": "train",
6
+ "valid_split": "test",
7
  "text_column": "text",
8
  "rejected_text_column": "rejected",
9
  "token": null,
10
+ "lr": 1e-05,
11
+ "epochs": 5,
12
+ "batch_size": 2,
13
+ "warmup_ratio": 0.05,
14
+ "gradient_accumulation": 8,
15
  "optimizer": "adamw_torch",
16
+ "scheduler": "constant",
17
+ "weight_decay": 0.0,
18
  "max_grad_norm": 1.0,
19
  "seed": 42,
20
  "add_eos_token": false,
 
23
  "lora_r": 16,
24
  "lora_alpha": 32,
25
  "lora_dropout": 0.05,
26
+ "logging_steps": 50,
27
+ "evaluation_strategy": "steps",
28
+ "save_total_limit": 2,
29
+ "save_strategy": "steps",
30
  "auto_find_batch_size": false,
31
  "fp16": false,
32
  "push_to_hub": false,
33
+ "use_int8": false,
34
+ "model_max_length": 4096,
35
  "repo_id": null,
36
  "use_int4": false,
37
  "trainer": "sft",
38
  "target_modules": null,
39
+ "merge_adapter": false,
40
  "username": null,
41
  "use_flash_attention_2": false,
42
  "log": "none",