[WIP] Upload folder using huggingface_hub (multi-commit eb71ae4c1e718d1dbc1682c4264be4bb3dc7dc948df9919b218bddc3bd69313f)

#4
by sequelbox - opened
.gitattributes CHANGED
@@ -33,4 +33,3 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
- tokenizer.json filter=lfs diff=lfs merge=lfs -text
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
README.md CHANGED
@@ -29,7 +29,6 @@ tags:
29
  base_model: meta-llama/Meta-Llama-3.1-8B-Instruct
30
  datasets:
31
  - sequelbox/Celestia
32
- - sequelbox/Spurline
33
  - sequelbox/Supernova
34
  model_type: llama
35
  model-index:
@@ -45,7 +44,7 @@ model-index:
45
  num_few_shot: 5
46
  metrics:
47
  - type: acc
48
- value: 75.85
49
  name: acc
50
  - task:
51
  type: text-generation
@@ -57,7 +56,7 @@ model-index:
57
  num_few_shot: 5
58
  metrics:
59
  - type: acc
60
- value: 68.75
61
  name: acc
62
  - task:
63
  type: text-generation
@@ -69,7 +68,7 @@ model-index:
69
  num_few_shot: 5
70
  metrics:
71
  - type: acc
72
- value: 73.23
73
  name: acc
74
  - task:
75
  type: text-generation
@@ -81,7 +80,7 @@ model-index:
81
  num_few_shot: 5
82
  metrics:
83
  - type: acc
84
- value: 46.00
85
  name: acc
86
  - task:
87
  type: text-generation
@@ -93,19 +92,7 @@ model-index:
93
  num_few_shot: 5
94
  metrics:
95
  - type: acc
96
- value: 44.33
97
- name: acc
98
- - task:
99
- type: text-generation
100
- name: Text Generation
101
- dataset:
102
- name: MMLU Conceptual Physics (5-Shot)
103
- type: MMLU
104
- args:
105
- num_few_shot: 5
106
- metrics:
107
- - type: acc
108
- value: 53.19
109
  name: acc
110
  - task:
111
  type: text-generation
@@ -117,7 +104,7 @@ model-index:
117
  num_few_shot: 5
118
  metrics:
119
  - type: acc
120
- value: 37.25
121
  name: acc
122
  - task:
123
  type: text-generation
@@ -141,7 +128,7 @@ model-index:
141
  num_few_shot: 5
142
  metrics:
143
  - type: acc
144
- value: 56.00
145
  name: acc
146
  - task:
147
  type: text-generation
@@ -153,19 +140,19 @@ model-index:
153
  num_few_shot: 5
154
  metrics:
155
  - type: acc
156
- value: 63.00
157
  name: acc
158
  - task:
159
  type: text-generation
160
  name: Text Generation
161
  dataset:
162
- name: MMLU Astronomy (5-shot)
163
  type: MMLU
164
  args:
165
  num_few_shot: 5
166
  metrics:
167
  - type: acc
168
- value: 63.16
169
  name: acc
170
  - task:
171
  type: text-generation
@@ -177,7 +164,7 @@ model-index:
177
  num_few_shot: 0
178
  metrics:
179
  - type: inst_level_strict_acc and prompt_level_strict_acc
180
- value: 64.96
181
  name: strict accuracy
182
  source:
183
  url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=ValiantLabs/Llama3.1-8B-ShiningValiant2
@@ -207,7 +194,7 @@ model-index:
207
  num_few_shot: 4
208
  metrics:
209
  - type: exact_match
210
- value: 12.92
211
  name: exact match
212
  source:
213
  url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=ValiantLabs/Llama3.1-8B-ShiningValiant2
@@ -222,7 +209,7 @@ model-index:
222
  num_few_shot: 0
223
  metrics:
224
  - type: acc_norm
225
- value: 8.05
226
  name: acc_norm
227
  source:
228
  url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=ValiantLabs/Llama3.1-8B-ShiningValiant2
@@ -237,7 +224,7 @@ model-index:
237
  num_few_shot: 0
238
  metrics:
239
  - type: acc_norm
240
- value: 7.46
241
  name: acc_norm
242
  source:
243
  url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=ValiantLabs/Llama3.1-8B-ShiningValiant2
@@ -254,7 +241,7 @@ model-index:
254
  num_few_shot: 5
255
  metrics:
256
  - type: acc
257
- value: 26.46
258
  name: accuracy
259
  source:
260
  url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=ValiantLabs/Llama3.1-8B-ShiningValiant2
@@ -268,15 +255,14 @@ license: llama3.1
268
 
269
  Shining Valiant 2 is a chat model built on Llama 3.1 8b, finetuned on our data for friendship, insight, knowledge and enthusiasm.
270
  - Finetuned on [meta-llama/Meta-Llama-3.1-8B-Instruct](https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct) for best available general performance
271
- - Trained on a variety of our high quality open source data; focused on science, engineering, technical knowledge, and structured reasoning
272
- - Also available for [Llama 3.1 70b](https://huggingface.co/ValiantLabs/Llama3.1-70B-ShiningValiant2) and [Llama 3.2 3b!](https://huggingface.co/ValiantLabs/Llama3.2-3B-ShiningValiant2)
273
 
274
 
275
  ## Version
276
 
277
- This is the **2024-11-04** release of Shining Valiant 2 for Llama 3.1 8b.
278
 
279
- This release uses our newest datasets, open-sourced for everyone's use, including our expanded [science-instruct dataset](https://huggingface.co/datasets/sequelbox/Celestia). This release features improvements in logical thinking and structured reasoning as well as physics, chemistry, biology, astronomy, Earth science, computer science, and information theory.
280
 
281
  Future upgrades will continue to expand Shining Valiant's technical knowledge base.
282
 
@@ -316,9 +302,9 @@ print(outputs[0]["generated_text"][-1])
316
  ## The Model
317
  Shining Valiant 2 is built on top of Llama 3.1 8b Instruct.
318
 
319
- The current version of Shining Valiant 2 is trained on technical knowledge using [sequelbox/Celestia](https://huggingface.co/datasets/sequelbox/Celestia), complex reasoning using [sequelbox/Spurline](https://huggingface.co/datasets/sequelbox/Spurline), and general chat capability using [sequelbox/Supernova.](https://huggingface.co/datasets/sequelbox/Supernova)
320
 
321
- We're super excited that Shining Valiant's dataset has been fully open-sourced! She's friendly, enthusiastic, insightful, knowledgeable, and loves to learn! Magical.
322
 
323
 
324
  ![image/jpeg](https://cdn-uploads.huggingface.co/production/uploads/63444f2687964b331809eb55/VCJ8Fmefd8cdVhXSSxJiD.jpeg)
 
29
  base_model: meta-llama/Meta-Llama-3.1-8B-Instruct
30
  datasets:
31
  - sequelbox/Celestia
 
32
  - sequelbox/Supernova
33
  model_type: llama
34
  model-index:
 
44
  num_few_shot: 5
45
  metrics:
46
  - type: acc
47
+ value: 77.35
48
  name: acc
49
  - task:
50
  type: text-generation
 
56
  num_few_shot: 5
57
  metrics:
58
  - type: acc
59
+ value: 76.39
60
  name: acc
61
  - task:
62
  type: text-generation
 
68
  num_few_shot: 5
69
  metrics:
70
  - type: acc
71
+ value: 79.03
72
  name: acc
73
  - task:
74
  type: text-generation
 
80
  num_few_shot: 5
81
  metrics:
82
  - type: acc
83
+ value: 50.0
84
  name: acc
85
  - task:
86
  type: text-generation
 
92
  num_few_shot: 5
93
  metrics:
94
  - type: acc
95
+ value: 53.2
 
 
 
 
 
 
 
 
 
 
 
 
96
  name: acc
97
  - task:
98
  type: text-generation
 
104
  num_few_shot: 5
105
  metrics:
106
  - type: acc
107
+ value: 43.14
108
  name: acc
109
  - task:
110
  type: text-generation
 
128
  num_few_shot: 5
129
  metrics:
130
  - type: acc
131
+ value: 55.0
132
  name: acc
133
  - task:
134
  type: text-generation
 
140
  num_few_shot: 5
141
  metrics:
142
  - type: acc
143
+ value: 66.0
144
  name: acc
145
  - task:
146
  type: text-generation
147
  name: Text Generation
148
  dataset:
149
+ name: MMLU STEM (5-Shot)
150
  type: MMLU
151
  args:
152
  num_few_shot: 5
153
  metrics:
154
  - type: acc
155
+ value: 55.57
156
  name: acc
157
  - task:
158
  type: text-generation
 
164
  num_few_shot: 0
165
  metrics:
166
  - type: inst_level_strict_acc and prompt_level_strict_acc
167
+ value: 65.24
168
  name: strict accuracy
169
  source:
170
  url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=ValiantLabs/Llama3.1-8B-ShiningValiant2
 
194
  num_few_shot: 4
195
  metrics:
196
  - type: exact_match
197
+ value: 11.63
198
  name: exact match
199
  source:
200
  url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=ValiantLabs/Llama3.1-8B-ShiningValiant2
 
209
  num_few_shot: 0
210
  metrics:
211
  - type: acc_norm
212
+ value: 8.95
213
  name: acc_norm
214
  source:
215
  url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=ValiantLabs/Llama3.1-8B-ShiningValiant2
 
224
  num_few_shot: 0
225
  metrics:
226
  - type: acc_norm
227
+ value: 7.19
228
  name: acc_norm
229
  source:
230
  url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=ValiantLabs/Llama3.1-8B-ShiningValiant2
 
241
  num_few_shot: 5
242
  metrics:
243
  - type: acc
244
+ value: 26.38
245
  name: accuracy
246
  source:
247
  url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=ValiantLabs/Llama3.1-8B-ShiningValiant2
 
255
 
256
  Shining Valiant 2 is a chat model built on Llama 3.1 8b, finetuned on our data for friendship, insight, knowledge and enthusiasm.
257
  - Finetuned on [meta-llama/Meta-Llama-3.1-8B-Instruct](https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct) for best available general performance
258
+ - Trained on a variety of high quality data; focused on science, engineering, technical knowledge, and structured reasoning
 
259
 
260
 
261
  ## Version
262
 
263
+ This is the **2024-09-16** release of Shining Valiant 2 for Llama 3.1 8b.
264
 
265
+ We've improved and open-sourced our new baseline [science-instruct dataset](https://huggingface.co/datasets/sequelbox/Celestia). This release features improvements in physics, chemistry, biology, and computer science.
266
 
267
  Future upgrades will continue to expand Shining Valiant's technical knowledge base.
268
 
 
302
  ## The Model
303
  Shining Valiant 2 is built on top of Llama 3.1 8b Instruct.
304
 
305
+ The current version of Shining Valiant 2 is trained on technical knowledge using [sequelbox/Celestia](https://huggingface.co/datasets/sequelbox/Celestia) and general chat capability using [sequelbox/Supernova.](https://huggingface.co/datasets/sequelbox/Supernova)
306
 
307
+ Our private data adds specialist knowledge and Shining Valiant's personality: she's friendly, enthusiastic, insightful, knowledgeable, and loves to learn! Magical. (As a general note: we're hoping to replace and open-source this part of Shining Valiant's dataset with synthetic data soon!)
308
 
309
 
310
  ![image/jpeg](https://cdn-uploads.huggingface.co/production/uploads/63444f2687964b331809eb55/VCJ8Fmefd8cdVhXSSxJiD.jpeg)
config.json CHANGED
@@ -11,7 +11,6 @@
11
  128008,
12
  128009
13
  ],
14
- "head_dim": 128,
15
  "hidden_act": "silu",
16
  "hidden_size": 4096,
17
  "initializer_range": 0.02,
@@ -34,7 +33,7 @@
34
  "rope_theta": 500000.0,
35
  "tie_word_embeddings": false,
36
  "torch_dtype": "float32",
37
- "transformers_version": "4.46.1",
38
  "use_cache": true,
39
  "vocab_size": 128256
40
  }
 
11
  128008,
12
  128009
13
  ],
 
14
  "hidden_act": "silu",
15
  "hidden_size": 4096,
16
  "initializer_range": 0.02,
 
33
  "rope_theta": 500000.0,
34
  "tie_word_embeddings": false,
35
  "torch_dtype": "float32",
36
+ "transformers_version": "4.44.2",
37
  "use_cache": true,
38
  "vocab_size": 128256
39
  }
generation_config.json CHANGED
@@ -8,5 +8,5 @@
8
  ],
9
  "temperature": 0.6,
10
  "top_p": 0.9,
11
- "transformers_version": "4.46.1"
12
  }
 
8
  ],
9
  "temperature": 0.6,
10
  "top_p": 0.9,
11
+ "transformers_version": "4.44.2"
12
  }
model-00001-of-00007.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6efbffa72857ec90e0ea4310a6025190a4e75eef43e10ec9d46025412e1616a8
3
  size 4886466168
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dcebe7b4eacb57cbc4e03e60f0d4e1eec8a1471455a3fdbc953edfaca5c8763e
3
  size 4886466168
model-00002-of-00007.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c569b9d9276836eb9f31fda31ea667ee3ad1c132b852ec94b4b9a7a2598db0ca
3
  size 4832007448
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:756b38e9412a00dc12d14823d48c9a71732a1c0318fd9bb48661e9589ddb9ac1
3
  size 4832007448
model-00003-of-00007.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:10413c97beeea538cb108448193c790d5224192982c2837b1dc3a54a1d5ff50b
3
  size 4999813112
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4d3ff8801d13032241f11b23af8bf458181a87b41b3e6497cf7cc503a0469ce6
3
  size 4999813112
model-00004-of-00007.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6ef021115a20513e5b0db4178345a1f4959c59eb73fbb3679aca24055ead5d0e
3
  size 4999813128
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:35ee4a044f0e1c92ba26c63b584ac344740d70fff1f3d86d073810bc8e610d66
3
  size 4999813128
model-00005-of-00007.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:26822b4a9c2cc0f9d92e0c1522f517aac4a20a6b936c706e1ca68ed1beaf8b44
3
  size 4832007496
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7b6123fecf735935528930e989780254f5bd5eb78b872cda5677f04479d09c25
3
  size 4832007496
model-00006-of-00007.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8f64f7cdbfd3903f7fea88117c49a8533a1ffa928d1ce4354d0d8431faddffe4
3
  size 4999813120
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:895b3445cc9cb423b5c8b67c289eecd411f860ea3d7255857beb8fcb8e990621
3
  size 4999813120
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff