sequelbox commited on
Commit
9d12ec0
1 Parent(s): 03328e7

a91634b4e42eeb457f4f2bcbe45807e668ed0469bdabfa82770d16c9750aeba2

Browse files
.ipynb_checkpoints/README-checkpoint.md ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language:
3
+ - en
4
+ pipeline_tag: text-generation
5
+ tags:
6
+ - shining-valiant
7
+ - valiant
8
+ - valiant-labs
9
+ - llama
10
+ - llama-2
11
+ - llama-2-chat
12
+ - 13b
13
+ model_type: llama
14
+ license: llama2
15
+ ---
16
+
17
+
18
+ ![image/jpeg](https://cdn-uploads.huggingface.co/production/uploads/63444f2687964b331809eb55/EXX7TKbB-R6arxww2mk0R.jpeg)
19
+
20
+
21
+
22
+ Shining Valiant XS is a chat model built on the Llama 2 architecture, finetuned on our data for insight, creativity, passion, and friendliness.
23
+ - Uses the llama-2-13b-chat model, with safetensors
24
+ - Trained through multiple finetuning runs on public and private data
25
+ - the personality of our 70b [Shining Valiant](https://huggingface.co/ValiantLabs/ShiningValiant) model, now at 13b! **Our new release features greatly expanded personality capability**, bringing a more immersive chat experience!
26
+
27
+ ## Version
28
+
29
+ This is Version **1.2** of Shining Valiant XS. We've greatly expanded our personality dataset and fixed some bugs to deliver our strongest real-chat experience so far.
30
+
31
+ (We're also exploring **new models and architectures**, to deliver helpful open source capabilities for users and creators!)
32
+
33
+ Previous versions remain available in the repository. New models will be released for everyone once our team's training and validation process is complete.
34
+
35
+ ## Evaluation
36
+
37
+ Version 1.2 is awaiting evaluation from the Open LLM leaderboard.
38
+
39
+ ## Prompting Guide
40
+ Shining Valiant XS uses the same prompt format as Llama 2 Chat - feel free to use your existing prompts and scripts!
41
+ A few examples of different formats:
42
+
43
+ 1. [INST] Good morning! Can you let me know how to parse a text file and turn the semicolons into commas? [/INST]
44
+
45
+ 2. [INST] (You are an intelligent, helpful AI assistant.) Hello, can you write me a thank you letter? [/INST]
46
+
47
+ 3. [INST] << SYS >> You are an intelligent, helpful AI assistant. << /SYS >> Deep dive about a country with interesting history: [/INST]
48
+
49
+ ## The Model
50
+ Shining Valiant XS is built on top of Diamond Force, which uses Llama 2's 13b parameter architecture and features upgraded general and chat capability.
51
+
52
+ From there, we've created Shining Valiant XS through multiple finetuning runs on different compositions of our private dataset, the same one we use for our [Shining Valiant](https://huggingface.co/ValiantLabs/ShiningValiant) model.
53
+
54
+ Our private data focuses primarily on applying Shining Valiant's personality: she's friendly, enthusiastic, insightful, knowledgeable, and loves to learn!
55
+
56
+ With this release, the personality component of our Shining Valiant dataset has been greatly improved. We're excited to use it in future releases of this model and others.
57
+
58
+
59
+
60
+
61
+ ![image/jpeg](https://cdn-uploads.huggingface.co/production/uploads/63444f2687964b331809eb55/VCJ8Fmefd8cdVhXSSxJiD.jpeg)
62
+
63
+
64
+ Shining Valiant XS is created by [Valiant Labs.](http://valiantlabs.ca/)
65
+
66
+ [Follow us on X for updates on our models!](https://twitter.com/valiant_labs)
67
+
68
+ We care about open source.
69
+ For everyone to use.
70
+
71
+ We encourage others to finetune further from our models.
.ipynb_checkpoints/config-checkpoint.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "ValiantLabs/ShiningValiantXS",
3
+ "architectures": [
4
+ "LlamaForCausalLM"
5
+ ],
6
+ "attention_bias": false,
7
+ "attention_dropout": 0.0,
8
+ "bos_token_id": 1,
9
+ "eos_token_id": 2,
10
+ "hidden_act": "silu",
11
+ "hidden_size": 5120,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 13824,
14
+ "max_position_embeddings": 4096,
15
+ "model_type": "llama",
16
+ "num_attention_heads": 40,
17
+ "num_hidden_layers": 40,
18
+ "num_key_value_heads": 40,
19
+ "pretraining_tp": 1,
20
+ "rms_norm_eps": 1e-05,
21
+ "rope_scaling": null,
22
+ "rope_theta": 10000.0,
23
+ "tie_word_embeddings": false,
24
+ "torch_dtype": "float32",
25
+ "transformers_version": "4.36.2",
26
+ "use_cache": false,
27
+ "vocab_size": 32000
28
+ }
.ipynb_checkpoints/generation_config-checkpoint.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 1,
4
+ "eos_token_id": 2,
5
+ "transformers_version": "4.36.2"
6
+ }
.ipynb_checkpoints/model.safetensors.index-checkpoint.json ADDED
@@ -0,0 +1,370 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metadata": {
3
+ "total_size": 52063457280
4
+ },
5
+ "weight_map": {
6
+ "lm_head.weight": "model-00011-of-00011.safetensors",
7
+ "model.embed_tokens.weight": "model-00001-of-00011.safetensors",
8
+ "model.layers.0.input_layernorm.weight": "model-00001-of-00011.safetensors",
9
+ "model.layers.0.mlp.down_proj.weight": "model-00001-of-00011.safetensors",
10
+ "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00011.safetensors",
11
+ "model.layers.0.mlp.up_proj.weight": "model-00001-of-00011.safetensors",
12
+ "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00011.safetensors",
13
+ "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00011.safetensors",
14
+ "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00011.safetensors",
15
+ "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00011.safetensors",
16
+ "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00011.safetensors",
17
+ "model.layers.1.input_layernorm.weight": "model-00001-of-00011.safetensors",
18
+ "model.layers.1.mlp.down_proj.weight": "model-00001-of-00011.safetensors",
19
+ "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00011.safetensors",
20
+ "model.layers.1.mlp.up_proj.weight": "model-00001-of-00011.safetensors",
21
+ "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00011.safetensors",
22
+ "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00011.safetensors",
23
+ "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00011.safetensors",
24
+ "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00011.safetensors",
25
+ "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00011.safetensors",
26
+ "model.layers.10.input_layernorm.weight": "model-00003-of-00011.safetensors",
27
+ "model.layers.10.mlp.down_proj.weight": "model-00003-of-00011.safetensors",
28
+ "model.layers.10.mlp.gate_proj.weight": "model-00003-of-00011.safetensors",
29
+ "model.layers.10.mlp.up_proj.weight": "model-00003-of-00011.safetensors",
30
+ "model.layers.10.post_attention_layernorm.weight": "model-00003-of-00011.safetensors",
31
+ "model.layers.10.self_attn.k_proj.weight": "model-00003-of-00011.safetensors",
32
+ "model.layers.10.self_attn.o_proj.weight": "model-00003-of-00011.safetensors",
33
+ "model.layers.10.self_attn.q_proj.weight": "model-00003-of-00011.safetensors",
34
+ "model.layers.10.self_attn.v_proj.weight": "model-00003-of-00011.safetensors",
35
+ "model.layers.11.input_layernorm.weight": "model-00004-of-00011.safetensors",
36
+ "model.layers.11.mlp.down_proj.weight": "model-00004-of-00011.safetensors",
37
+ "model.layers.11.mlp.gate_proj.weight": "model-00004-of-00011.safetensors",
38
+ "model.layers.11.mlp.up_proj.weight": "model-00004-of-00011.safetensors",
39
+ "model.layers.11.post_attention_layernorm.weight": "model-00004-of-00011.safetensors",
40
+ "model.layers.11.self_attn.k_proj.weight": "model-00003-of-00011.safetensors",
41
+ "model.layers.11.self_attn.o_proj.weight": "model-00004-of-00011.safetensors",
42
+ "model.layers.11.self_attn.q_proj.weight": "model-00003-of-00011.safetensors",
43
+ "model.layers.11.self_attn.v_proj.weight": "model-00004-of-00011.safetensors",
44
+ "model.layers.12.input_layernorm.weight": "model-00004-of-00011.safetensors",
45
+ "model.layers.12.mlp.down_proj.weight": "model-00004-of-00011.safetensors",
46
+ "model.layers.12.mlp.gate_proj.weight": "model-00004-of-00011.safetensors",
47
+ "model.layers.12.mlp.up_proj.weight": "model-00004-of-00011.safetensors",
48
+ "model.layers.12.post_attention_layernorm.weight": "model-00004-of-00011.safetensors",
49
+ "model.layers.12.self_attn.k_proj.weight": "model-00004-of-00011.safetensors",
50
+ "model.layers.12.self_attn.o_proj.weight": "model-00004-of-00011.safetensors",
51
+ "model.layers.12.self_attn.q_proj.weight": "model-00004-of-00011.safetensors",
52
+ "model.layers.12.self_attn.v_proj.weight": "model-00004-of-00011.safetensors",
53
+ "model.layers.13.input_layernorm.weight": "model-00004-of-00011.safetensors",
54
+ "model.layers.13.mlp.down_proj.weight": "model-00004-of-00011.safetensors",
55
+ "model.layers.13.mlp.gate_proj.weight": "model-00004-of-00011.safetensors",
56
+ "model.layers.13.mlp.up_proj.weight": "model-00004-of-00011.safetensors",
57
+ "model.layers.13.post_attention_layernorm.weight": "model-00004-of-00011.safetensors",
58
+ "model.layers.13.self_attn.k_proj.weight": "model-00004-of-00011.safetensors",
59
+ "model.layers.13.self_attn.o_proj.weight": "model-00004-of-00011.safetensors",
60
+ "model.layers.13.self_attn.q_proj.weight": "model-00004-of-00011.safetensors",
61
+ "model.layers.13.self_attn.v_proj.weight": "model-00004-of-00011.safetensors",
62
+ "model.layers.14.input_layernorm.weight": "model-00004-of-00011.safetensors",
63
+ "model.layers.14.mlp.down_proj.weight": "model-00004-of-00011.safetensors",
64
+ "model.layers.14.mlp.gate_proj.weight": "model-00004-of-00011.safetensors",
65
+ "model.layers.14.mlp.up_proj.weight": "model-00004-of-00011.safetensors",
66
+ "model.layers.14.post_attention_layernorm.weight": "model-00004-of-00011.safetensors",
67
+ "model.layers.14.self_attn.k_proj.weight": "model-00004-of-00011.safetensors",
68
+ "model.layers.14.self_attn.o_proj.weight": "model-00004-of-00011.safetensors",
69
+ "model.layers.14.self_attn.q_proj.weight": "model-00004-of-00011.safetensors",
70
+ "model.layers.14.self_attn.v_proj.weight": "model-00004-of-00011.safetensors",
71
+ "model.layers.15.input_layernorm.weight": "model-00005-of-00011.safetensors",
72
+ "model.layers.15.mlp.down_proj.weight": "model-00005-of-00011.safetensors",
73
+ "model.layers.15.mlp.gate_proj.weight": "model-00005-of-00011.safetensors",
74
+ "model.layers.15.mlp.up_proj.weight": "model-00005-of-00011.safetensors",
75
+ "model.layers.15.post_attention_layernorm.weight": "model-00005-of-00011.safetensors",
76
+ "model.layers.15.self_attn.k_proj.weight": "model-00005-of-00011.safetensors",
77
+ "model.layers.15.self_attn.o_proj.weight": "model-00005-of-00011.safetensors",
78
+ "model.layers.15.self_attn.q_proj.weight": "model-00004-of-00011.safetensors",
79
+ "model.layers.15.self_attn.v_proj.weight": "model-00005-of-00011.safetensors",
80
+ "model.layers.16.input_layernorm.weight": "model-00005-of-00011.safetensors",
81
+ "model.layers.16.mlp.down_proj.weight": "model-00005-of-00011.safetensors",
82
+ "model.layers.16.mlp.gate_proj.weight": "model-00005-of-00011.safetensors",
83
+ "model.layers.16.mlp.up_proj.weight": "model-00005-of-00011.safetensors",
84
+ "model.layers.16.post_attention_layernorm.weight": "model-00005-of-00011.safetensors",
85
+ "model.layers.16.self_attn.k_proj.weight": "model-00005-of-00011.safetensors",
86
+ "model.layers.16.self_attn.o_proj.weight": "model-00005-of-00011.safetensors",
87
+ "model.layers.16.self_attn.q_proj.weight": "model-00005-of-00011.safetensors",
88
+ "model.layers.16.self_attn.v_proj.weight": "model-00005-of-00011.safetensors",
89
+ "model.layers.17.input_layernorm.weight": "model-00005-of-00011.safetensors",
90
+ "model.layers.17.mlp.down_proj.weight": "model-00005-of-00011.safetensors",
91
+ "model.layers.17.mlp.gate_proj.weight": "model-00005-of-00011.safetensors",
92
+ "model.layers.17.mlp.up_proj.weight": "model-00005-of-00011.safetensors",
93
+ "model.layers.17.post_attention_layernorm.weight": "model-00005-of-00011.safetensors",
94
+ "model.layers.17.self_attn.k_proj.weight": "model-00005-of-00011.safetensors",
95
+ "model.layers.17.self_attn.o_proj.weight": "model-00005-of-00011.safetensors",
96
+ "model.layers.17.self_attn.q_proj.weight": "model-00005-of-00011.safetensors",
97
+ "model.layers.17.self_attn.v_proj.weight": "model-00005-of-00011.safetensors",
98
+ "model.layers.18.input_layernorm.weight": "model-00005-of-00011.safetensors",
99
+ "model.layers.18.mlp.down_proj.weight": "model-00005-of-00011.safetensors",
100
+ "model.layers.18.mlp.gate_proj.weight": "model-00005-of-00011.safetensors",
101
+ "model.layers.18.mlp.up_proj.weight": "model-00005-of-00011.safetensors",
102
+ "model.layers.18.post_attention_layernorm.weight": "model-00005-of-00011.safetensors",
103
+ "model.layers.18.self_attn.k_proj.weight": "model-00005-of-00011.safetensors",
104
+ "model.layers.18.self_attn.o_proj.weight": "model-00005-of-00011.safetensors",
105
+ "model.layers.18.self_attn.q_proj.weight": "model-00005-of-00011.safetensors",
106
+ "model.layers.18.self_attn.v_proj.weight": "model-00005-of-00011.safetensors",
107
+ "model.layers.19.input_layernorm.weight": "model-00006-of-00011.safetensors",
108
+ "model.layers.19.mlp.down_proj.weight": "model-00006-of-00011.safetensors",
109
+ "model.layers.19.mlp.gate_proj.weight": "model-00006-of-00011.safetensors",
110
+ "model.layers.19.mlp.up_proj.weight": "model-00006-of-00011.safetensors",
111
+ "model.layers.19.post_attention_layernorm.weight": "model-00006-of-00011.safetensors",
112
+ "model.layers.19.self_attn.k_proj.weight": "model-00006-of-00011.safetensors",
113
+ "model.layers.19.self_attn.o_proj.weight": "model-00006-of-00011.safetensors",
114
+ "model.layers.19.self_attn.q_proj.weight": "model-00006-of-00011.safetensors",
115
+ "model.layers.19.self_attn.v_proj.weight": "model-00006-of-00011.safetensors",
116
+ "model.layers.2.input_layernorm.weight": "model-00001-of-00011.safetensors",
117
+ "model.layers.2.mlp.down_proj.weight": "model-00001-of-00011.safetensors",
118
+ "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00011.safetensors",
119
+ "model.layers.2.mlp.up_proj.weight": "model-00001-of-00011.safetensors",
120
+ "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00011.safetensors",
121
+ "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00011.safetensors",
122
+ "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00011.safetensors",
123
+ "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00011.safetensors",
124
+ "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00011.safetensors",
125
+ "model.layers.20.input_layernorm.weight": "model-00006-of-00011.safetensors",
126
+ "model.layers.20.mlp.down_proj.weight": "model-00006-of-00011.safetensors",
127
+ "model.layers.20.mlp.gate_proj.weight": "model-00006-of-00011.safetensors",
128
+ "model.layers.20.mlp.up_proj.weight": "model-00006-of-00011.safetensors",
129
+ "model.layers.20.post_attention_layernorm.weight": "model-00006-of-00011.safetensors",
130
+ "model.layers.20.self_attn.k_proj.weight": "model-00006-of-00011.safetensors",
131
+ "model.layers.20.self_attn.o_proj.weight": "model-00006-of-00011.safetensors",
132
+ "model.layers.20.self_attn.q_proj.weight": "model-00006-of-00011.safetensors",
133
+ "model.layers.20.self_attn.v_proj.weight": "model-00006-of-00011.safetensors",
134
+ "model.layers.21.input_layernorm.weight": "model-00006-of-00011.safetensors",
135
+ "model.layers.21.mlp.down_proj.weight": "model-00006-of-00011.safetensors",
136
+ "model.layers.21.mlp.gate_proj.weight": "model-00006-of-00011.safetensors",
137
+ "model.layers.21.mlp.up_proj.weight": "model-00006-of-00011.safetensors",
138
+ "model.layers.21.post_attention_layernorm.weight": "model-00006-of-00011.safetensors",
139
+ "model.layers.21.self_attn.k_proj.weight": "model-00006-of-00011.safetensors",
140
+ "model.layers.21.self_attn.o_proj.weight": "model-00006-of-00011.safetensors",
141
+ "model.layers.21.self_attn.q_proj.weight": "model-00006-of-00011.safetensors",
142
+ "model.layers.21.self_attn.v_proj.weight": "model-00006-of-00011.safetensors",
143
+ "model.layers.22.input_layernorm.weight": "model-00007-of-00011.safetensors",
144
+ "model.layers.22.mlp.down_proj.weight": "model-00007-of-00011.safetensors",
145
+ "model.layers.22.mlp.gate_proj.weight": "model-00006-of-00011.safetensors",
146
+ "model.layers.22.mlp.up_proj.weight": "model-00006-of-00011.safetensors",
147
+ "model.layers.22.post_attention_layernorm.weight": "model-00007-of-00011.safetensors",
148
+ "model.layers.22.self_attn.k_proj.weight": "model-00006-of-00011.safetensors",
149
+ "model.layers.22.self_attn.o_proj.weight": "model-00006-of-00011.safetensors",
150
+ "model.layers.22.self_attn.q_proj.weight": "model-00006-of-00011.safetensors",
151
+ "model.layers.22.self_attn.v_proj.weight": "model-00006-of-00011.safetensors",
152
+ "model.layers.23.input_layernorm.weight": "model-00007-of-00011.safetensors",
153
+ "model.layers.23.mlp.down_proj.weight": "model-00007-of-00011.safetensors",
154
+ "model.layers.23.mlp.gate_proj.weight": "model-00007-of-00011.safetensors",
155
+ "model.layers.23.mlp.up_proj.weight": "model-00007-of-00011.safetensors",
156
+ "model.layers.23.post_attention_layernorm.weight": "model-00007-of-00011.safetensors",
157
+ "model.layers.23.self_attn.k_proj.weight": "model-00007-of-00011.safetensors",
158
+ "model.layers.23.self_attn.o_proj.weight": "model-00007-of-00011.safetensors",
159
+ "model.layers.23.self_attn.q_proj.weight": "model-00007-of-00011.safetensors",
160
+ "model.layers.23.self_attn.v_proj.weight": "model-00007-of-00011.safetensors",
161
+ "model.layers.24.input_layernorm.weight": "model-00007-of-00011.safetensors",
162
+ "model.layers.24.mlp.down_proj.weight": "model-00007-of-00011.safetensors",
163
+ "model.layers.24.mlp.gate_proj.weight": "model-00007-of-00011.safetensors",
164
+ "model.layers.24.mlp.up_proj.weight": "model-00007-of-00011.safetensors",
165
+ "model.layers.24.post_attention_layernorm.weight": "model-00007-of-00011.safetensors",
166
+ "model.layers.24.self_attn.k_proj.weight": "model-00007-of-00011.safetensors",
167
+ "model.layers.24.self_attn.o_proj.weight": "model-00007-of-00011.safetensors",
168
+ "model.layers.24.self_attn.q_proj.weight": "model-00007-of-00011.safetensors",
169
+ "model.layers.24.self_attn.v_proj.weight": "model-00007-of-00011.safetensors",
170
+ "model.layers.25.input_layernorm.weight": "model-00007-of-00011.safetensors",
171
+ "model.layers.25.mlp.down_proj.weight": "model-00007-of-00011.safetensors",
172
+ "model.layers.25.mlp.gate_proj.weight": "model-00007-of-00011.safetensors",
173
+ "model.layers.25.mlp.up_proj.weight": "model-00007-of-00011.safetensors",
174
+ "model.layers.25.post_attention_layernorm.weight": "model-00007-of-00011.safetensors",
175
+ "model.layers.25.self_attn.k_proj.weight": "model-00007-of-00011.safetensors",
176
+ "model.layers.25.self_attn.o_proj.weight": "model-00007-of-00011.safetensors",
177
+ "model.layers.25.self_attn.q_proj.weight": "model-00007-of-00011.safetensors",
178
+ "model.layers.25.self_attn.v_proj.weight": "model-00007-of-00011.safetensors",
179
+ "model.layers.26.input_layernorm.weight": "model-00008-of-00011.safetensors",
180
+ "model.layers.26.mlp.down_proj.weight": "model-00008-of-00011.safetensors",
181
+ "model.layers.26.mlp.gate_proj.weight": "model-00007-of-00011.safetensors",
182
+ "model.layers.26.mlp.up_proj.weight": "model-00008-of-00011.safetensors",
183
+ "model.layers.26.post_attention_layernorm.weight": "model-00008-of-00011.safetensors",
184
+ "model.layers.26.self_attn.k_proj.weight": "model-00007-of-00011.safetensors",
185
+ "model.layers.26.self_attn.o_proj.weight": "model-00007-of-00011.safetensors",
186
+ "model.layers.26.self_attn.q_proj.weight": "model-00007-of-00011.safetensors",
187
+ "model.layers.26.self_attn.v_proj.weight": "model-00007-of-00011.safetensors",
188
+ "model.layers.27.input_layernorm.weight": "model-00008-of-00011.safetensors",
189
+ "model.layers.27.mlp.down_proj.weight": "model-00008-of-00011.safetensors",
190
+ "model.layers.27.mlp.gate_proj.weight": "model-00008-of-00011.safetensors",
191
+ "model.layers.27.mlp.up_proj.weight": "model-00008-of-00011.safetensors",
192
+ "model.layers.27.post_attention_layernorm.weight": "model-00008-of-00011.safetensors",
193
+ "model.layers.27.self_attn.k_proj.weight": "model-00008-of-00011.safetensors",
194
+ "model.layers.27.self_attn.o_proj.weight": "model-00008-of-00011.safetensors",
195
+ "model.layers.27.self_attn.q_proj.weight": "model-00008-of-00011.safetensors",
196
+ "model.layers.27.self_attn.v_proj.weight": "model-00008-of-00011.safetensors",
197
+ "model.layers.28.input_layernorm.weight": "model-00008-of-00011.safetensors",
198
+ "model.layers.28.mlp.down_proj.weight": "model-00008-of-00011.safetensors",
199
+ "model.layers.28.mlp.gate_proj.weight": "model-00008-of-00011.safetensors",
200
+ "model.layers.28.mlp.up_proj.weight": "model-00008-of-00011.safetensors",
201
+ "model.layers.28.post_attention_layernorm.weight": "model-00008-of-00011.safetensors",
202
+ "model.layers.28.self_attn.k_proj.weight": "model-00008-of-00011.safetensors",
203
+ "model.layers.28.self_attn.o_proj.weight": "model-00008-of-00011.safetensors",
204
+ "model.layers.28.self_attn.q_proj.weight": "model-00008-of-00011.safetensors",
205
+ "model.layers.28.self_attn.v_proj.weight": "model-00008-of-00011.safetensors",
206
+ "model.layers.29.input_layernorm.weight": "model-00008-of-00011.safetensors",
207
+ "model.layers.29.mlp.down_proj.weight": "model-00008-of-00011.safetensors",
208
+ "model.layers.29.mlp.gate_proj.weight": "model-00008-of-00011.safetensors",
209
+ "model.layers.29.mlp.up_proj.weight": "model-00008-of-00011.safetensors",
210
+ "model.layers.29.post_attention_layernorm.weight": "model-00008-of-00011.safetensors",
211
+ "model.layers.29.self_attn.k_proj.weight": "model-00008-of-00011.safetensors",
212
+ "model.layers.29.self_attn.o_proj.weight": "model-00008-of-00011.safetensors",
213
+ "model.layers.29.self_attn.q_proj.weight": "model-00008-of-00011.safetensors",
214
+ "model.layers.29.self_attn.v_proj.weight": "model-00008-of-00011.safetensors",
215
+ "model.layers.3.input_layernorm.weight": "model-00002-of-00011.safetensors",
216
+ "model.layers.3.mlp.down_proj.weight": "model-00002-of-00011.safetensors",
217
+ "model.layers.3.mlp.gate_proj.weight": "model-00002-of-00011.safetensors",
218
+ "model.layers.3.mlp.up_proj.weight": "model-00002-of-00011.safetensors",
219
+ "model.layers.3.post_attention_layernorm.weight": "model-00002-of-00011.safetensors",
220
+ "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00011.safetensors",
221
+ "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00011.safetensors",
222
+ "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00011.safetensors",
223
+ "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00011.safetensors",
224
+ "model.layers.30.input_layernorm.weight": "model-00009-of-00011.safetensors",
225
+ "model.layers.30.mlp.down_proj.weight": "model-00009-of-00011.safetensors",
226
+ "model.layers.30.mlp.gate_proj.weight": "model-00009-of-00011.safetensors",
227
+ "model.layers.30.mlp.up_proj.weight": "model-00009-of-00011.safetensors",
228
+ "model.layers.30.post_attention_layernorm.weight": "model-00009-of-00011.safetensors",
229
+ "model.layers.30.self_attn.k_proj.weight": "model-00008-of-00011.safetensors",
230
+ "model.layers.30.self_attn.o_proj.weight": "model-00008-of-00011.safetensors",
231
+ "model.layers.30.self_attn.q_proj.weight": "model-00008-of-00011.safetensors",
232
+ "model.layers.30.self_attn.v_proj.weight": "model-00008-of-00011.safetensors",
233
+ "model.layers.31.input_layernorm.weight": "model-00009-of-00011.safetensors",
234
+ "model.layers.31.mlp.down_proj.weight": "model-00009-of-00011.safetensors",
235
+ "model.layers.31.mlp.gate_proj.weight": "model-00009-of-00011.safetensors",
236
+ "model.layers.31.mlp.up_proj.weight": "model-00009-of-00011.safetensors",
237
+ "model.layers.31.post_attention_layernorm.weight": "model-00009-of-00011.safetensors",
238
+ "model.layers.31.self_attn.k_proj.weight": "model-00009-of-00011.safetensors",
239
+ "model.layers.31.self_attn.o_proj.weight": "model-00009-of-00011.safetensors",
240
+ "model.layers.31.self_attn.q_proj.weight": "model-00009-of-00011.safetensors",
241
+ "model.layers.31.self_attn.v_proj.weight": "model-00009-of-00011.safetensors",
242
+ "model.layers.32.input_layernorm.weight": "model-00009-of-00011.safetensors",
243
+ "model.layers.32.mlp.down_proj.weight": "model-00009-of-00011.safetensors",
244
+ "model.layers.32.mlp.gate_proj.weight": "model-00009-of-00011.safetensors",
245
+ "model.layers.32.mlp.up_proj.weight": "model-00009-of-00011.safetensors",
246
+ "model.layers.32.post_attention_layernorm.weight": "model-00009-of-00011.safetensors",
247
+ "model.layers.32.self_attn.k_proj.weight": "model-00009-of-00011.safetensors",
248
+ "model.layers.32.self_attn.o_proj.weight": "model-00009-of-00011.safetensors",
249
+ "model.layers.32.self_attn.q_proj.weight": "model-00009-of-00011.safetensors",
250
+ "model.layers.32.self_attn.v_proj.weight": "model-00009-of-00011.safetensors",
251
+ "model.layers.33.input_layernorm.weight": "model-00009-of-00011.safetensors",
252
+ "model.layers.33.mlp.down_proj.weight": "model-00009-of-00011.safetensors",
253
+ "model.layers.33.mlp.gate_proj.weight": "model-00009-of-00011.safetensors",
254
+ "model.layers.33.mlp.up_proj.weight": "model-00009-of-00011.safetensors",
255
+ "model.layers.33.post_attention_layernorm.weight": "model-00009-of-00011.safetensors",
256
+ "model.layers.33.self_attn.k_proj.weight": "model-00009-of-00011.safetensors",
257
+ "model.layers.33.self_attn.o_proj.weight": "model-00009-of-00011.safetensors",
258
+ "model.layers.33.self_attn.q_proj.weight": "model-00009-of-00011.safetensors",
259
+ "model.layers.33.self_attn.v_proj.weight": "model-00009-of-00011.safetensors",
260
+ "model.layers.34.input_layernorm.weight": "model-00010-of-00011.safetensors",
261
+ "model.layers.34.mlp.down_proj.weight": "model-00010-of-00011.safetensors",
262
+ "model.layers.34.mlp.gate_proj.weight": "model-00010-of-00011.safetensors",
263
+ "model.layers.34.mlp.up_proj.weight": "model-00010-of-00011.safetensors",
264
+ "model.layers.34.post_attention_layernorm.weight": "model-00010-of-00011.safetensors",
265
+ "model.layers.34.self_attn.k_proj.weight": "model-00009-of-00011.safetensors",
266
+ "model.layers.34.self_attn.o_proj.weight": "model-00010-of-00011.safetensors",
267
+ "model.layers.34.self_attn.q_proj.weight": "model-00009-of-00011.safetensors",
268
+ "model.layers.34.self_attn.v_proj.weight": "model-00009-of-00011.safetensors",
269
+ "model.layers.35.input_layernorm.weight": "model-00010-of-00011.safetensors",
270
+ "model.layers.35.mlp.down_proj.weight": "model-00010-of-00011.safetensors",
271
+ "model.layers.35.mlp.gate_proj.weight": "model-00010-of-00011.safetensors",
272
+ "model.layers.35.mlp.up_proj.weight": "model-00010-of-00011.safetensors",
273
+ "model.layers.35.post_attention_layernorm.weight": "model-00010-of-00011.safetensors",
274
+ "model.layers.35.self_attn.k_proj.weight": "model-00010-of-00011.safetensors",
275
+ "model.layers.35.self_attn.o_proj.weight": "model-00010-of-00011.safetensors",
276
+ "model.layers.35.self_attn.q_proj.weight": "model-00010-of-00011.safetensors",
277
+ "model.layers.35.self_attn.v_proj.weight": "model-00010-of-00011.safetensors",
278
+ "model.layers.36.input_layernorm.weight": "model-00010-of-00011.safetensors",
279
+ "model.layers.36.mlp.down_proj.weight": "model-00010-of-00011.safetensors",
280
+ "model.layers.36.mlp.gate_proj.weight": "model-00010-of-00011.safetensors",
281
+ "model.layers.36.mlp.up_proj.weight": "model-00010-of-00011.safetensors",
282
+ "model.layers.36.post_attention_layernorm.weight": "model-00010-of-00011.safetensors",
283
+ "model.layers.36.self_attn.k_proj.weight": "model-00010-of-00011.safetensors",
284
+ "model.layers.36.self_attn.o_proj.weight": "model-00010-of-00011.safetensors",
285
+ "model.layers.36.self_attn.q_proj.weight": "model-00010-of-00011.safetensors",
286
+ "model.layers.36.self_attn.v_proj.weight": "model-00010-of-00011.safetensors",
287
+ "model.layers.37.input_layernorm.weight": "model-00010-of-00011.safetensors",
288
+ "model.layers.37.mlp.down_proj.weight": "model-00010-of-00011.safetensors",
289
+ "model.layers.37.mlp.gate_proj.weight": "model-00010-of-00011.safetensors",
290
+ "model.layers.37.mlp.up_proj.weight": "model-00010-of-00011.safetensors",
291
+ "model.layers.37.post_attention_layernorm.weight": "model-00010-of-00011.safetensors",
292
+ "model.layers.37.self_attn.k_proj.weight": "model-00010-of-00011.safetensors",
293
+ "model.layers.37.self_attn.o_proj.weight": "model-00010-of-00011.safetensors",
294
+ "model.layers.37.self_attn.q_proj.weight": "model-00010-of-00011.safetensors",
295
+ "model.layers.37.self_attn.v_proj.weight": "model-00010-of-00011.safetensors",
296
+ "model.layers.38.input_layernorm.weight": "model-00011-of-00011.safetensors",
297
+ "model.layers.38.mlp.down_proj.weight": "model-00011-of-00011.safetensors",
298
+ "model.layers.38.mlp.gate_proj.weight": "model-00011-of-00011.safetensors",
299
+ "model.layers.38.mlp.up_proj.weight": "model-00011-of-00011.safetensors",
300
+ "model.layers.38.post_attention_layernorm.weight": "model-00011-of-00011.safetensors",
301
+ "model.layers.38.self_attn.k_proj.weight": "model-00010-of-00011.safetensors",
302
+ "model.layers.38.self_attn.o_proj.weight": "model-00011-of-00011.safetensors",
303
+ "model.layers.38.self_attn.q_proj.weight": "model-00010-of-00011.safetensors",
304
+ "model.layers.38.self_attn.v_proj.weight": "model-00011-of-00011.safetensors",
305
+ "model.layers.39.input_layernorm.weight": "model-00011-of-00011.safetensors",
306
+ "model.layers.39.mlp.down_proj.weight": "model-00011-of-00011.safetensors",
307
+ "model.layers.39.mlp.gate_proj.weight": "model-00011-of-00011.safetensors",
308
+ "model.layers.39.mlp.up_proj.weight": "model-00011-of-00011.safetensors",
309
+ "model.layers.39.post_attention_layernorm.weight": "model-00011-of-00011.safetensors",
310
+ "model.layers.39.self_attn.k_proj.weight": "model-00011-of-00011.safetensors",
311
+ "model.layers.39.self_attn.o_proj.weight": "model-00011-of-00011.safetensors",
312
+ "model.layers.39.self_attn.q_proj.weight": "model-00011-of-00011.safetensors",
313
+ "model.layers.39.self_attn.v_proj.weight": "model-00011-of-00011.safetensors",
314
+ "model.layers.4.input_layernorm.weight": "model-00002-of-00011.safetensors",
315
+ "model.layers.4.mlp.down_proj.weight": "model-00002-of-00011.safetensors",
316
+ "model.layers.4.mlp.gate_proj.weight": "model-00002-of-00011.safetensors",
317
+ "model.layers.4.mlp.up_proj.weight": "model-00002-of-00011.safetensors",
318
+ "model.layers.4.post_attention_layernorm.weight": "model-00002-of-00011.safetensors",
319
+ "model.layers.4.self_attn.k_proj.weight": "model-00002-of-00011.safetensors",
320
+ "model.layers.4.self_attn.o_proj.weight": "model-00002-of-00011.safetensors",
321
+ "model.layers.4.self_attn.q_proj.weight": "model-00002-of-00011.safetensors",
322
+ "model.layers.4.self_attn.v_proj.weight": "model-00002-of-00011.safetensors",
323
+ "model.layers.5.input_layernorm.weight": "model-00002-of-00011.safetensors",
324
+ "model.layers.5.mlp.down_proj.weight": "model-00002-of-00011.safetensors",
325
+ "model.layers.5.mlp.gate_proj.weight": "model-00002-of-00011.safetensors",
326
+ "model.layers.5.mlp.up_proj.weight": "model-00002-of-00011.safetensors",
327
+ "model.layers.5.post_attention_layernorm.weight": "model-00002-of-00011.safetensors",
328
+ "model.layers.5.self_attn.k_proj.weight": "model-00002-of-00011.safetensors",
329
+ "model.layers.5.self_attn.o_proj.weight": "model-00002-of-00011.safetensors",
330
+ "model.layers.5.self_attn.q_proj.weight": "model-00002-of-00011.safetensors",
331
+ "model.layers.5.self_attn.v_proj.weight": "model-00002-of-00011.safetensors",
332
+ "model.layers.6.input_layernorm.weight": "model-00002-of-00011.safetensors",
333
+ "model.layers.6.mlp.down_proj.weight": "model-00002-of-00011.safetensors",
334
+ "model.layers.6.mlp.gate_proj.weight": "model-00002-of-00011.safetensors",
335
+ "model.layers.6.mlp.up_proj.weight": "model-00002-of-00011.safetensors",
336
+ "model.layers.6.post_attention_layernorm.weight": "model-00002-of-00011.safetensors",
337
+ "model.layers.6.self_attn.k_proj.weight": "model-00002-of-00011.safetensors",
338
+ "model.layers.6.self_attn.o_proj.weight": "model-00002-of-00011.safetensors",
339
+ "model.layers.6.self_attn.q_proj.weight": "model-00002-of-00011.safetensors",
340
+ "model.layers.6.self_attn.v_proj.weight": "model-00002-of-00011.safetensors",
341
+ "model.layers.7.input_layernorm.weight": "model-00003-of-00011.safetensors",
342
+ "model.layers.7.mlp.down_proj.weight": "model-00003-of-00011.safetensors",
343
+ "model.layers.7.mlp.gate_proj.weight": "model-00003-of-00011.safetensors",
344
+ "model.layers.7.mlp.up_proj.weight": "model-00003-of-00011.safetensors",
345
+ "model.layers.7.post_attention_layernorm.weight": "model-00003-of-00011.safetensors",
346
+ "model.layers.7.self_attn.k_proj.weight": "model-00002-of-00011.safetensors",
347
+ "model.layers.7.self_attn.o_proj.weight": "model-00003-of-00011.safetensors",
348
+ "model.layers.7.self_attn.q_proj.weight": "model-00002-of-00011.safetensors",
349
+ "model.layers.7.self_attn.v_proj.weight": "model-00002-of-00011.safetensors",
350
+ "model.layers.8.input_layernorm.weight": "model-00003-of-00011.safetensors",
351
+ "model.layers.8.mlp.down_proj.weight": "model-00003-of-00011.safetensors",
352
+ "model.layers.8.mlp.gate_proj.weight": "model-00003-of-00011.safetensors",
353
+ "model.layers.8.mlp.up_proj.weight": "model-00003-of-00011.safetensors",
354
+ "model.layers.8.post_attention_layernorm.weight": "model-00003-of-00011.safetensors",
355
+ "model.layers.8.self_attn.k_proj.weight": "model-00003-of-00011.safetensors",
356
+ "model.layers.8.self_attn.o_proj.weight": "model-00003-of-00011.safetensors",
357
+ "model.layers.8.self_attn.q_proj.weight": "model-00003-of-00011.safetensors",
358
+ "model.layers.8.self_attn.v_proj.weight": "model-00003-of-00011.safetensors",
359
+ "model.layers.9.input_layernorm.weight": "model-00003-of-00011.safetensors",
360
+ "model.layers.9.mlp.down_proj.weight": "model-00003-of-00011.safetensors",
361
+ "model.layers.9.mlp.gate_proj.weight": "model-00003-of-00011.safetensors",
362
+ "model.layers.9.mlp.up_proj.weight": "model-00003-of-00011.safetensors",
363
+ "model.layers.9.post_attention_layernorm.weight": "model-00003-of-00011.safetensors",
364
+ "model.layers.9.self_attn.k_proj.weight": "model-00003-of-00011.safetensors",
365
+ "model.layers.9.self_attn.o_proj.weight": "model-00003-of-00011.safetensors",
366
+ "model.layers.9.self_attn.q_proj.weight": "model-00003-of-00011.safetensors",
367
+ "model.layers.9.self_attn.v_proj.weight": "model-00003-of-00011.safetensors",
368
+ "model.norm.weight": "model-00011-of-00011.safetensors"
369
+ }
370
+ }
.ipynb_checkpoints/special_tokens_map-checkpoint.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "unk_token": {
17
+ "content": "<unk>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ }
23
+ }
README.md CHANGED
@@ -22,21 +22,19 @@ license: llama2
22
  Shining Valiant XS is a chat model built on the Llama 2 architecture, finetuned on our data for insight, creativity, passion, and friendliness.
23
  - Uses the llama-2-13b-chat model, with safetensors
24
  - Trained through multiple finetuning runs on public and private data
25
- - the personality of our 70b [Shining Valiant](https://huggingface.co/ValiantLabs/ShiningValiant) model, now at 13b!
26
 
27
  ## Version
28
 
29
- This is Version **1.1** of Shining Valiant XS.
30
 
31
- Version 1.2 is now in training. (We're also exploring new models and architectures!)
32
 
33
- New models are released for everyone once our team's training and validation process is complete.
34
 
35
  ## Evaluation
36
 
37
- | Model | Avg | ARC | HS | MMLU | TQA | WG | GSM |
38
- |-----------------------|--------|-------|-------|--------|-------|-------|-------|
39
- | **Shining Valiant XS 1.1** | 62.48 | 64.42 | 83.58 | 60.37 | 55.00 | 76.80 | 34.72 |
40
 
41
  ## Prompting Guide
42
  Shining Valiant XS uses the same prompt format as Llama 2 Chat - feel free to use your existing prompts and scripts!
@@ -49,13 +47,14 @@ A few examples of different formats:
49
  3. [INST] << SYS >> You are an intelligent, helpful AI assistant. << /SYS >> Deep dive about a country with interesting history: [/INST]
50
 
51
  ## The Model
52
- Shining Valiant XS is built on top of Dynamic Factor, which uses Llama 2's 13b parameter architecture and features upgraded general capability.
53
 
54
  From there, we've created Shining Valiant XS through multiple finetuning runs on different compositions of our private dataset, the same one we use for our [Shining Valiant](https://huggingface.co/ValiantLabs/ShiningValiant) model.
55
 
56
  Our private data focuses primarily on applying Shining Valiant's personality: she's friendly, enthusiastic, insightful, knowledgeable, and loves to learn!
57
 
58
- We are actively working on expanding and improving the Shining Valiant dataset for use in future releases of the Shining Valiant series of models.
 
59
 
60
 
61
 
 
22
  Shining Valiant XS is a chat model built on the Llama 2 architecture, finetuned on our data for insight, creativity, passion, and friendliness.
23
  - Uses the llama-2-13b-chat model, with safetensors
24
  - Trained through multiple finetuning runs on public and private data
25
+ - the personality of our 70b [Shining Valiant](https://huggingface.co/ValiantLabs/ShiningValiant) model, now at 13b! **Our new release features greatly expanded personality capability**, bringing a more immersive chat experience!
26
 
27
  ## Version
28
 
29
+ This is Version **1.2** of Shining Valiant XS. We've greatly expanded our personality dataset and fixed some bugs to deliver our strongest real-chat experience so far.
30
 
31
+ (We're also exploring **new models and architectures**, to deliver helpful open source capabilities for users and creators!)
32
 
33
+ Previous versions remain available in the repository. New models will be released for everyone once our team's training and validation process is complete.
34
 
35
  ## Evaluation
36
 
37
+ Version 1.2 is awaiting evaluation from the Open LLM leaderboard.
 
 
38
 
39
  ## Prompting Guide
40
  Shining Valiant XS uses the same prompt format as Llama 2 Chat - feel free to use your existing prompts and scripts!
 
47
  3. [INST] << SYS >> You are an intelligent, helpful AI assistant. << /SYS >> Deep dive about a country with interesting history: [/INST]
48
 
49
  ## The Model
50
+ Shining Valiant XS is built on top of Diamond Force, which uses Llama 2's 13b parameter architecture and features upgraded general and chat capability.
51
 
52
  From there, we've created Shining Valiant XS through multiple finetuning runs on different compositions of our private dataset, the same one we use for our [Shining Valiant](https://huggingface.co/ValiantLabs/ShiningValiant) model.
53
 
54
  Our private data focuses primarily on applying Shining Valiant's personality: she's friendly, enthusiastic, insightful, knowledgeable, and loves to learn!
55
 
56
+ With this release, the personality component of our Shining Valiant dataset has been greatly improved. We're excited to use it in future releases of this model and others.
57
+
58
 
59
 
60
 
config.json CHANGED
@@ -4,6 +4,7 @@
4
  "LlamaForCausalLM"
5
  ],
6
  "attention_bias": false,
 
7
  "bos_token_id": 1,
8
  "eos_token_id": 2,
9
  "hidden_act": "silu",
@@ -21,7 +22,7 @@
21
  "rope_theta": 10000.0,
22
  "tie_word_embeddings": false,
23
  "torch_dtype": "float32",
24
- "transformers_version": "4.35.2",
25
  "use_cache": false,
26
  "vocab_size": 32000
27
  }
 
4
  "LlamaForCausalLM"
5
  ],
6
  "attention_bias": false,
7
+ "attention_dropout": 0.0,
8
  "bos_token_id": 1,
9
  "eos_token_id": 2,
10
  "hidden_act": "silu",
 
22
  "rope_theta": 10000.0,
23
  "tie_word_embeddings": false,
24
  "torch_dtype": "float32",
25
+ "transformers_version": "4.36.2",
26
  "use_cache": false,
27
  "vocab_size": 32000
28
  }
generation_config.json CHANGED
@@ -2,5 +2,5 @@
2
  "_from_model_config": true,
3
  "bos_token_id": 1,
4
  "eos_token_id": 2,
5
- "transformers_version": "4.35.2"
6
  }
 
2
  "_from_model_config": true,
3
  "bos_token_id": 1,
4
  "eos_token_id": 2,
5
+ "transformers_version": "4.36.2"
6
  }
special_tokens_map.json CHANGED
@@ -13,13 +13,6 @@
13
  "rstrip": false,
14
  "single_word": false
15
  },
16
- "pad_token": {
17
- "content": "</s>",
18
- "lstrip": false,
19
- "normalized": false,
20
- "rstrip": false,
21
- "single_word": false
22
- },
23
  "unk_token": {
24
  "content": "<unk>",
25
  "lstrip": false,
 
13
  "rstrip": false,
14
  "single_word": false
15
  },
 
 
 
 
 
 
 
16
  "unk_token": {
17
  "content": "<unk>",
18
  "lstrip": false,
tokenizer.json CHANGED
@@ -1,11 +1,6 @@
1
  {
2
  "version": "1.0",
3
- "truncation": {
4
- "direction": "Right",
5
- "max_length": 2048,
6
- "strategy": "LongestFirst",
7
- "stride": 0
8
- },
9
  "padding": null,
10
  "added_tokens": [
11
  {
 
1
  {
2
  "version": "1.0",
3
+ "truncation": null,
 
 
 
 
 
4
  "padding": null,
5
  "added_tokens": [
6
  {
tokenizer_config.json CHANGED
@@ -1,43 +1,35 @@
1
  {
2
- "added_tokens_decoder": {
3
- "0": {
4
- "content": "<unk>",
5
- "lstrip": false,
6
- "normalized": false,
7
- "rstrip": false,
8
- "single_word": false,
9
- "special": true
10
- },
11
- "1": {
12
- "content": "<s>",
13
- "lstrip": false,
14
- "normalized": false,
15
- "rstrip": false,
16
- "single_word": false,
17
- "special": true
18
- },
19
- "2": {
20
- "content": "</s>",
21
- "lstrip": false,
22
- "normalized": false,
23
- "rstrip": false,
24
- "single_word": false,
25
- "special": true
26
- }
27
  },
28
- "bos_token": "<s>",
29
  "clean_up_tokenization_spaces": false,
30
- "eos_token": "</s>",
 
 
 
 
 
 
 
31
  "legacy": false,
32
- "max_length": 2048,
33
  "model_max_length": 1000000000000000019884624838656,
34
- "pad_token": "</s>",
35
  "padding_side": "right",
36
  "sp_model_kwargs": {},
37
- "stride": 0,
38
  "tokenizer_class": "LlamaTokenizer",
39
- "truncation_side": "right",
40
- "truncation_strategy": "longest_first",
41
- "unk_token": "<unk>",
42
- "use_default_system_prompt": false
 
 
 
 
43
  }
 
1
  {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
+ "bos_token": {
5
+ "__type": "AddedToken",
6
+ "content": "<s>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  },
 
12
  "clean_up_tokenization_spaces": false,
13
+ "eos_token": {
14
+ "__type": "AddedToken",
15
+ "content": "</s>",
16
+ "lstrip": false,
17
+ "normalized": false,
18
+ "rstrip": false,
19
+ "single_word": false
20
+ },
21
  "legacy": false,
 
22
  "model_max_length": 1000000000000000019884624838656,
23
+ "pad_token": null,
24
  "padding_side": "right",
25
  "sp_model_kwargs": {},
 
26
  "tokenizer_class": "LlamaTokenizer",
27
+ "unk_token": {
28
+ "__type": "AddedToken",
29
+ "content": "<unk>",
30
+ "lstrip": false,
31
+ "normalized": false,
32
+ "rstrip": false,
33
+ "single_word": false
34
+ }
35
  }