Crystalcareai commited on
Commit
632e4e6
1 Parent(s): f40ca02

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. README.md +604 -0
  2. added_tokens.json +4 -0
  3. config.json +31 -0
  4. generation_config.json +7 -0
  5. model-00001-of-00059.safetensors +3 -0
  6. model-00002-of-00059.safetensors +3 -0
  7. model-00003-of-00059.safetensors +3 -0
  8. model-00004-of-00059.safetensors +3 -0
  9. model-00005-of-00059.safetensors +3 -0
  10. model-00006-of-00059.safetensors +3 -0
  11. model-00007-of-00059.safetensors +3 -0
  12. model-00008-of-00059.safetensors +3 -0
  13. model-00009-of-00059.safetensors +3 -0
  14. model-00010-of-00059.safetensors +3 -0
  15. model-00011-of-00059.safetensors +3 -0
  16. model-00012-of-00059.safetensors +3 -0
  17. model-00013-of-00059.safetensors +3 -0
  18. model-00014-of-00059.safetensors +3 -0
  19. model-00015-of-00059.safetensors +3 -0
  20. model-00016-of-00059.safetensors +3 -0
  21. model-00017-of-00059.safetensors +3 -0
  22. model-00018-of-00059.safetensors +3 -0
  23. model-00019-of-00059.safetensors +3 -0
  24. model-00020-of-00059.safetensors +3 -0
  25. model-00021-of-00059.safetensors +3 -0
  26. model-00022-of-00059.safetensors +3 -0
  27. model-00023-of-00059.safetensors +3 -0
  28. model-00024-of-00059.safetensors +3 -0
  29. model-00025-of-00059.safetensors +3 -0
  30. model-00026-of-00059.safetensors +3 -0
  31. model-00027-of-00059.safetensors +3 -0
  32. model-00028-of-00059.safetensors +3 -0
  33. model-00029-of-00059.safetensors +3 -0
  34. model-00030-of-00059.safetensors +3 -0
  35. model-00031-of-00059.safetensors +3 -0
  36. model-00032-of-00059.safetensors +3 -0
  37. model-00033-of-00059.safetensors +3 -0
  38. model-00034-of-00059.safetensors +3 -0
  39. model-00035-of-00059.safetensors +3 -0
  40. model-00036-of-00059.safetensors +3 -0
  41. model-00037-of-00059.safetensors +3 -0
  42. model-00038-of-00059.safetensors +3 -0
  43. model-00039-of-00059.safetensors +3 -0
  44. model-00040-of-00059.safetensors +3 -0
  45. model-00041-of-00059.safetensors +3 -0
  46. model-00042-of-00059.safetensors +3 -0
  47. model-00043-of-00059.safetensors +3 -0
  48. model-00044-of-00059.safetensors +3 -0
  49. model-00045-of-00059.safetensors +3 -0
  50. model-00046-of-00059.safetensors +3 -0
README.md ADDED
@@ -0,0 +1,604 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ base_model: mistral-community/Mixtral-8x22B-v0.1
4
+ tags:
5
+ - generated_from_trainer
6
+ model-index:
7
+ - name: out
8
+ results: []
9
+ ---
10
+
11
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
12
+ should probably proofread and complete it, then remove this comment. -->
13
+
14
+ [<img src="https://raw.githubusercontent.com/OpenAccess-AI-Collective/axolotl/main/image/axolotl-badge-web.png" alt="Built with Axolotl" width="200" height="32"/>](https://github.com/OpenAccess-AI-Collective/axolotl)
15
+ <details><summary>See axolotl config</summary>
16
+
17
+ axolotl version: `0.4.0`
18
+ ```yaml
19
+ base_model: mistral-community/Mixtral-8x22B-v0.1
20
+ model_type: AutoModelForCausalLM
21
+ tokenizer_type: LlamaTokenizer
22
+ trust_remote_code: true
23
+
24
+ load_in_8bit: false
25
+ load_in_4bit: false
26
+ strict: false
27
+
28
+ unfrozen_parameters:
29
+ - ^lm_head.weight$
30
+ - ^model.embed_tokens.weight$
31
+ - model.layers.0.self_attn.q_proj
32
+ - model.layers.1.self_attn.q_proj
33
+ - model.layers.2.self_attn.q_proj
34
+ - model.layers.22.self_attn.q_proj
35
+ - model.layers.27.self_attn.q_proj
36
+ - model.layers.28.self_attn.q_proj
37
+ - model.layers.13.self_attn.q_proj
38
+ - model.layers.21.self_attn.q_proj
39
+ - model.layers.24.self_attn.q_proj
40
+ - model.layers.14.self_attn.q_proj
41
+ - model.layers.15.self_attn.q_proj
42
+ - model.layers.11.self_attn.q_proj
43
+ - model.layers.20.self_attn.q_proj
44
+ - model.layers.23.self_attn.q_proj
45
+ - model.layers.30.self_attn.k_proj
46
+ - model.layers.31.self_attn.k_proj
47
+ - model.layers.25.self_attn.k_proj
48
+ - model.layers.23.self_attn.k_proj
49
+ - model.layers.27.self_attn.k_proj
50
+ - model.layers.26.self_attn.k_proj
51
+ - model.layers.29.self_attn.k_proj
52
+ - model.layers.28.self_attn.k_proj
53
+ - model.layers.24.self_attn.k_proj
54
+ - model.layers.16.self_attn.k_proj
55
+ - model.layers.19.self_attn.k_proj
56
+ - model.layers.22.self_attn.k_proj
57
+ - model.layers.20.self_attn.k_proj
58
+ - model.layers.6.self_attn.k_proj
59
+ - model.layers.22.self_attn.v_proj
60
+ - model.layers.29.self_attn.v_proj
61
+ - model.layers.31.self_attn.v_proj
62
+ - model.layers.5.self_attn.v_proj
63
+ - model.layers.8.self_attn.v_proj
64
+ - model.layers.4.self_attn.v_proj
65
+ - model.layers.25.self_attn.v_proj
66
+ - model.layers.30.self_attn.v_proj
67
+ - model.layers.17.self_attn.v_proj
68
+ - model.layers.23.self_attn.v_proj
69
+ - model.layers.28.self_attn.v_proj
70
+ - model.layers.9.self_attn.v_proj
71
+ - model.layers.26.self_attn.v_proj
72
+ - model.layers.27.self_attn.v_proj
73
+ - model.layers.20.self_attn.o_proj
74
+ - model.layers.19.self_attn.o_proj
75
+ - model.layers.16.self_attn.o_proj
76
+ - model.layers.13.self_attn.o_proj
77
+ - model.layers.18.self_attn.o_proj
78
+ - model.layers.17.self_attn.o_proj
79
+ - model.layers.12.self_attn.o_proj
80
+ - model.layers.15.self_attn.o_proj
81
+ - model.layers.14.self_attn.o_proj
82
+ - model.layers.22.self_attn.o_proj
83
+ - model.layers.23.self_attn.o_proj
84
+ - model.layers.21.self_attn.o_proj
85
+ - model.layers.10.self_attn.o_proj
86
+ - model.layers.0.self_attn.o_proj
87
+ - model.layers.0.block_sparse_moe.experts.0.w1
88
+ - model.layers.1.block_sparse_moe.experts.0.w1
89
+ - model.layers.2.block_sparse_moe.experts.0.w1
90
+ - model.layers.3.block_sparse_moe.experts.0.w1
91
+ - model.layers.4.block_sparse_moe.experts.0.w1
92
+ - model.layers.5.block_sparse_moe.experts.0.w1
93
+ - model.layers.6.block_sparse_moe.experts.0.w1
94
+ - model.layers.7.block_sparse_moe.experts.0.w1
95
+ - model.layers.8.block_sparse_moe.experts.0.w1
96
+ - model.layers.9.block_sparse_moe.experts.0.w1
97
+ - model.layers.10.block_sparse_moe.experts.0.w1
98
+ - model.layers.11.block_sparse_moe.experts.0.w1
99
+ - model.layers.12.block_sparse_moe.experts.0.w1
100
+ - model.layers.13.block_sparse_moe.experts.0.w1
101
+ - model.layers.0.block_sparse_moe.experts.0.w2
102
+ - model.layers.1.block_sparse_moe.experts.0.w2
103
+ - model.layers.2.block_sparse_moe.experts.0.w2
104
+ - model.layers.3.block_sparse_moe.experts.0.w2
105
+ - model.layers.4.block_sparse_moe.experts.0.w2
106
+ - model.layers.5.block_sparse_moe.experts.0.w2
107
+ - model.layers.6.block_sparse_moe.experts.0.w2
108
+ - model.layers.7.block_sparse_moe.experts.0.w2
109
+ - model.layers.8.block_sparse_moe.experts.0.w2
110
+ - model.layers.9.block_sparse_moe.experts.0.w2
111
+ - model.layers.10.block_sparse_moe.experts.0.w2
112
+ - model.layers.11.block_sparse_moe.experts.0.w2
113
+ - model.layers.12.block_sparse_moe.experts.0.w2
114
+ - model.layers.13.block_sparse_moe.experts.0.w2
115
+ - model.layers.0.block_sparse_moe.experts.0.w3
116
+ - model.layers.1.block_sparse_moe.experts.0.w3
117
+ - model.layers.2.block_sparse_moe.experts.0.w3
118
+ - model.layers.3.block_sparse_moe.experts.0.w3
119
+ - model.layers.4.block_sparse_moe.experts.0.w3
120
+ - model.layers.5.block_sparse_moe.experts.0.w3
121
+ - model.layers.6.block_sparse_moe.experts.0.w3
122
+ - model.layers.7.block_sparse_moe.experts.0.w3
123
+ - model.layers.8.block_sparse_moe.experts.0.w3
124
+ - model.layers.9.block_sparse_moe.experts.0.w3
125
+ - model.layers.10.block_sparse_moe.experts.0.w3
126
+ - model.layers.11.block_sparse_moe.experts.0.w3
127
+ - model.layers.12.block_sparse_moe.experts.0.w3
128
+ - model.layers.13.block_sparse_moe.experts.0.w3
129
+ - model.layers.0.block_sparse_moe.experts.1.w1
130
+ - model.layers.1.block_sparse_moe.experts.1.w1
131
+ - model.layers.2.block_sparse_moe.experts.1.w1
132
+ - model.layers.3.block_sparse_moe.experts.1.w1
133
+ - model.layers.4.block_sparse_moe.experts.1.w1
134
+ - model.layers.5.block_sparse_moe.experts.1.w1
135
+ - model.layers.6.block_sparse_moe.experts.1.w1
136
+ - model.layers.7.block_sparse_moe.experts.1.w1
137
+ - model.layers.8.block_sparse_moe.experts.1.w1
138
+ - model.layers.9.block_sparse_moe.experts.1.w1
139
+ - model.layers.10.block_sparse_moe.experts.1.w1
140
+ - model.layers.11.block_sparse_moe.experts.1.w1
141
+ - model.layers.12.block_sparse_moe.experts.1.w1
142
+ - model.layers.13.block_sparse_moe.experts.1.w1
143
+ - model.layers.40.block_sparse_moe.experts.1.w2
144
+ - model.layers.0.block_sparse_moe.experts.1.w2
145
+ - model.layers.1.block_sparse_moe.experts.1.w2
146
+ - model.layers.2.block_sparse_moe.experts.1.w2
147
+ - model.layers.3.block_sparse_moe.experts.1.w2
148
+ - model.layers.4.block_sparse_moe.experts.1.w2
149
+ - model.layers.5.block_sparse_moe.experts.1.w2
150
+ - model.layers.6.block_sparse_moe.experts.1.w2
151
+ - model.layers.7.block_sparse_moe.experts.1.w2
152
+ - model.layers.8.block_sparse_moe.experts.1.w2
153
+ - model.layers.9.block_sparse_moe.experts.1.w2
154
+ - model.layers.10.block_sparse_moe.experts.1.w2
155
+ - model.layers.11.block_sparse_moe.experts.1.w2
156
+ - model.layers.12.block_sparse_moe.experts.1.w2
157
+ - model.layers.5.block_sparse_moe.experts.1.w3
158
+ - model.layers.0.block_sparse_moe.experts.1.w3
159
+ - model.layers.1.block_sparse_moe.experts.1.w3
160
+ - model.layers.2.block_sparse_moe.experts.1.w3
161
+ - model.layers.3.block_sparse_moe.experts.1.w3
162
+ - model.layers.4.block_sparse_moe.experts.1.w3
163
+ - model.layers.6.block_sparse_moe.experts.1.w3
164
+ - model.layers.7.block_sparse_moe.experts.1.w3
165
+ - model.layers.8.block_sparse_moe.experts.1.w3
166
+ - model.layers.9.block_sparse_moe.experts.1.w3
167
+ - model.layers.10.block_sparse_moe.experts.1.w3
168
+ - model.layers.11.block_sparse_moe.experts.1.w3
169
+ - model.layers.12.block_sparse_moe.experts.1.w3
170
+ - model.layers.13.block_sparse_moe.experts.1.w3
171
+ - model.layers.1.block_sparse_moe.experts.2.w1
172
+ - model.layers.0.block_sparse_moe.experts.2.w1
173
+ - model.layers.2.block_sparse_moe.experts.2.w1
174
+ - model.layers.3.block_sparse_moe.experts.2.w1
175
+ - model.layers.4.block_sparse_moe.experts.2.w1
176
+ - model.layers.5.block_sparse_moe.experts.2.w1
177
+ - model.layers.6.block_sparse_moe.experts.2.w1
178
+ - model.layers.7.block_sparse_moe.experts.2.w1
179
+ - model.layers.8.block_sparse_moe.experts.2.w1
180
+ - model.layers.9.block_sparse_moe.experts.2.w1
181
+ - model.layers.10.block_sparse_moe.experts.2.w1
182
+ - model.layers.11.block_sparse_moe.experts.2.w1
183
+ - model.layers.12.block_sparse_moe.experts.2.w1
184
+ - model.layers.13.block_sparse_moe.experts.2.w1
185
+ - model.layers.1.block_sparse_moe.experts.2.w2
186
+ - model.layers.0.block_sparse_moe.experts.2.w2
187
+ - model.layers.2.block_sparse_moe.experts.2.w2
188
+ - model.layers.3.block_sparse_moe.experts.2.w2
189
+ - model.layers.4.block_sparse_moe.experts.2.w2
190
+ - model.layers.5.block_sparse_moe.experts.2.w2
191
+ - model.layers.6.block_sparse_moe.experts.2.w2
192
+ - model.layers.7.block_sparse_moe.experts.2.w2
193
+ - model.layers.8.block_sparse_moe.experts.2.w2
194
+ - model.layers.9.block_sparse_moe.experts.2.w2
195
+ - model.layers.10.block_sparse_moe.experts.2.w2
196
+ - model.layers.11.block_sparse_moe.experts.2.w2
197
+ - model.layers.12.block_sparse_moe.experts.2.w2
198
+ - model.layers.13.block_sparse_moe.experts.2.w2
199
+ - model.layers.1.block_sparse_moe.experts.2.w3
200
+ - model.layers.0.block_sparse_moe.experts.2.w3
201
+ - model.layers.2.block_sparse_moe.experts.2.w3
202
+ - model.layers.3.block_sparse_moe.experts.2.w3
203
+ - model.layers.4.block_sparse_moe.experts.2.w3
204
+ - model.layers.5.block_sparse_moe.experts.2.w3
205
+ - model.layers.6.block_sparse_moe.experts.2.w3
206
+ - model.layers.7.block_sparse_moe.experts.2.w3
207
+ - model.layers.8.block_sparse_moe.experts.2.w3
208
+ - model.layers.9.block_sparse_moe.experts.2.w3
209
+ - model.layers.10.block_sparse_moe.experts.2.w3
210
+ - model.layers.11.block_sparse_moe.experts.2.w3
211
+ - model.layers.12.block_sparse_moe.experts.2.w3
212
+ - model.layers.13.block_sparse_moe.experts.2.w3
213
+ - model.layers.2.block_sparse_moe.experts.3.w1
214
+ - model.layers.1.block_sparse_moe.experts.3.w1
215
+ - model.layers.0.block_sparse_moe.experts.3.w1
216
+ - model.layers.3.block_sparse_moe.experts.3.w1
217
+ - model.layers.4.block_sparse_moe.experts.3.w1
218
+ - model.layers.5.block_sparse_moe.experts.3.w1
219
+ - model.layers.6.block_sparse_moe.experts.3.w1
220
+ - model.layers.7.block_sparse_moe.experts.3.w1
221
+ - model.layers.8.block_sparse_moe.experts.3.w1
222
+ - model.layers.9.block_sparse_moe.experts.3.w1
223
+ - model.layers.10.block_sparse_moe.experts.3.w1
224
+ - model.layers.11.block_sparse_moe.experts.3.w1
225
+ - model.layers.12.block_sparse_moe.experts.3.w1
226
+ - model.layers.13.block_sparse_moe.experts.3.w1
227
+ - model.layers.2.block_sparse_moe.experts.3.w2
228
+ - model.layers.1.block_sparse_moe.experts.3.w2
229
+ - model.layers.0.block_sparse_moe.experts.3.w2
230
+ - model.layers.3.block_sparse_moe.experts.3.w2
231
+ - model.layers.4.block_sparse_moe.experts.3.w2
232
+ - model.layers.5.block_sparse_moe.experts.3.w2
233
+ - model.layers.6.block_sparse_moe.experts.3.w2
234
+ - model.layers.7.block_sparse_moe.experts.3.w2
235
+ - model.layers.8.block_sparse_moe.experts.3.w2
236
+ - model.layers.9.block_sparse_moe.experts.3.w2
237
+ - model.layers.10.block_sparse_moe.experts.3.w2
238
+ - model.layers.11.block_sparse_moe.experts.3.w2
239
+ - model.layers.12.block_sparse_moe.experts.3.w2
240
+ - model.layers.13.block_sparse_moe.experts.3.w2
241
+ - model.layers.2.block_sparse_moe.experts.3.w3
242
+ - model.layers.1.block_sparse_moe.experts.3.w3
243
+ - model.layers.0.block_sparse_moe.experts.3.w3
244
+ - model.layers.3.block_sparse_moe.experts.3.w3
245
+ - model.layers.4.block_sparse_moe.experts.3.w3
246
+ - model.layers.5.block_sparse_moe.experts.3.w3
247
+ - model.layers.6.block_sparse_moe.experts.3.w3
248
+ - model.layers.7.block_sparse_moe.experts.3.w3
249
+ - model.layers.8.block_sparse_moe.experts.3.w3
250
+ - model.layers.9.block_sparse_moe.experts.3.w3
251
+ - model.layers.10.block_sparse_moe.experts.3.w3
252
+ - model.layers.11.block_sparse_moe.experts.3.w3
253
+ - model.layers.12.block_sparse_moe.experts.3.w3
254
+ - model.layers.13.block_sparse_moe.experts.3.w3
255
+ - model.layers.3.block_sparse_moe.experts.4.w1
256
+ - model.layers.2.block_sparse_moe.experts.4.w1
257
+ - model.layers.1.block_sparse_moe.experts.4.w1
258
+ - model.layers.0.block_sparse_moe.experts.4.w1
259
+ - model.layers.4.block_sparse_moe.experts.4.w1
260
+ - model.layers.5.block_sparse_moe.experts.4.w1
261
+ - model.layers.6.block_sparse_moe.experts.4.w1
262
+ - model.layers.7.block_sparse_moe.experts.4.w1
263
+ - model.layers.8.block_sparse_moe.experts.4.w1
264
+ - model.layers.9.block_sparse_moe.experts.4.w1
265
+ - model.layers.10.block_sparse_moe.experts.4.w1
266
+ - model.layers.11.block_sparse_moe.experts.4.w1
267
+ - model.layers.12.block_sparse_moe.experts.4.w1
268
+ - model.layers.13.block_sparse_moe.experts.4.w1
269
+ - model.layers.2.block_sparse_moe.experts.4.w2
270
+ - model.layers.3.block_sparse_moe.experts.4.w2
271
+ - model.layers.1.block_sparse_moe.experts.4.w2
272
+ - model.layers.20.block_sparse_moe.experts.4.w2
273
+ - model.layers.0.block_sparse_moe.experts.4.w2
274
+ - model.layers.4.block_sparse_moe.experts.4.w2
275
+ - model.layers.5.block_sparse_moe.experts.4.w2
276
+ - model.layers.6.block_sparse_moe.experts.4.w2
277
+ - model.layers.7.block_sparse_moe.experts.4.w2
278
+ - model.layers.8.block_sparse_moe.experts.4.w2
279
+ - model.layers.9.block_sparse_moe.experts.4.w2
280
+ - model.layers.10.block_sparse_moe.experts.4.w2
281
+ - model.layers.11.block_sparse_moe.experts.4.w2
282
+ - model.layers.12.block_sparse_moe.experts.4.w2
283
+ - model.layers.3.block_sparse_moe.experts.4.w3
284
+ - model.layers.2.block_sparse_moe.experts.4.w3
285
+ - model.layers.1.block_sparse_moe.experts.4.w3
286
+ - model.layers.0.block_sparse_moe.experts.4.w3
287
+ - model.layers.4.block_sparse_moe.experts.4.w3
288
+ - model.layers.5.block_sparse_moe.experts.4.w3
289
+ - model.layers.6.block_sparse_moe.experts.4.w3
290
+ - model.layers.7.block_sparse_moe.experts.4.w3
291
+ - model.layers.8.block_sparse_moe.experts.4.w3
292
+ - model.layers.9.block_sparse_moe.experts.4.w3
293
+ - model.layers.10.block_sparse_moe.experts.4.w3
294
+ - model.layers.11.block_sparse_moe.experts.4.w3
295
+ - model.layers.12.block_sparse_moe.experts.4.w3
296
+ - model.layers.13.block_sparse_moe.experts.4.w3
297
+ - model.layers.4.block_sparse_moe.experts.5.w1
298
+ - model.layers.3.block_sparse_moe.experts.5.w1
299
+ - model.layers.2.block_sparse_moe.experts.5.w1
300
+ - model.layers.1.block_sparse_moe.experts.5.w1
301
+ - model.layers.0.block_sparse_moe.experts.5.w1
302
+ - model.layers.5.block_sparse_moe.experts.5.w1
303
+ - model.layers.6.block_sparse_moe.experts.5.w1
304
+ - model.layers.7.block_sparse_moe.experts.5.w1
305
+ - model.layers.8.block_sparse_moe.experts.5.w1
306
+ - model.layers.9.block_sparse_moe.experts.5.w1
307
+ - model.layers.10.block_sparse_moe.experts.5.w1
308
+ - model.layers.11.block_sparse_moe.experts.5.w1
309
+ - model.layers.12.block_sparse_moe.experts.5.w1
310
+ - model.layers.13.block_sparse_moe.experts.5.w1
311
+ - model.layers.4.block_sparse_moe.experts.5.w2
312
+ - model.layers.2.block_sparse_moe.experts.5.w2
313
+ - model.layers.3.block_sparse_moe.experts.5.w2
314
+ - model.layers.1.block_sparse_moe.experts.5.w2
315
+ - model.layers.0.block_sparse_moe.experts.5.w2
316
+ - model.layers.5.block_sparse_moe.experts.5.w2
317
+ - model.layers.6.block_sparse_moe.experts.5.w2
318
+ - model.layers.7.block_sparse_moe.experts.5.w2
319
+ - model.layers.8.block_sparse_moe.experts.5.w2
320
+ - model.layers.9.block_sparse_moe.experts.5.w2
321
+ - model.layers.10.block_sparse_moe.experts.5.w2
322
+ - model.layers.11.block_sparse_moe.experts.5.w2
323
+ - model.layers.12.block_sparse_moe.experts.5.w2
324
+ - model.layers.13.block_sparse_moe.experts.5.w2
325
+ - model.layers.4.block_sparse_moe.experts.5.w3
326
+ - model.layers.3.block_sparse_moe.experts.5.w3
327
+ - model.layers.2.block_sparse_moe.experts.5.w3
328
+ - model.layers.1.block_sparse_moe.experts.5.w3
329
+ - model.layers.0.block_sparse_moe.experts.5.w3
330
+ - model.layers.5.block_sparse_moe.experts.5.w3
331
+ - model.layers.6.block_sparse_moe.experts.5.w3
332
+ - model.layers.7.block_sparse_moe.experts.5.w3
333
+ - model.layers.8.block_sparse_moe.experts.5.w3
334
+ - model.layers.9.block_sparse_moe.experts.5.w3
335
+ - model.layers.10.block_sparse_moe.experts.5.w3
336
+ - model.layers.11.block_sparse_moe.experts.5.w3
337
+ - model.layers.12.block_sparse_moe.experts.5.w3
338
+ - model.layers.13.block_sparse_moe.experts.5.w3
339
+ - model.layers.5.block_sparse_moe.experts.6.w1
340
+ - model.layers.4.block_sparse_moe.experts.6.w1
341
+ - model.layers.3.block_sparse_moe.experts.6.w1
342
+ - model.layers.2.block_sparse_moe.experts.6.w1
343
+ - model.layers.1.block_sparse_moe.experts.6.w1
344
+ - model.layers.0.block_sparse_moe.experts.6.w1
345
+ - model.layers.6.block_sparse_moe.experts.6.w1
346
+ - model.layers.7.block_sparse_moe.experts.6.w1
347
+ - model.layers.8.block_sparse_moe.experts.6.w1
348
+ - model.layers.9.block_sparse_moe.experts.6.w1
349
+ - model.layers.10.block_sparse_moe.experts.6.w1
350
+ - model.layers.11.block_sparse_moe.experts.6.w1
351
+ - model.layers.12.block_sparse_moe.experts.6.w1
352
+ - model.layers.13.block_sparse_moe.experts.6.w1
353
+ - model.layers.5.block_sparse_moe.experts.6.w2
354
+ - model.layers.4.block_sparse_moe.experts.6.w2
355
+ - model.layers.2.block_sparse_moe.experts.6.w2
356
+ - model.layers.3.block_sparse_moe.experts.6.w2
357
+ - model.layers.1.block_sparse_moe.experts.6.w2
358
+ - model.layers.0.block_sparse_moe.experts.6.w2
359
+ - model.layers.6.block_sparse_moe.experts.6.w2
360
+ - model.layers.7.block_sparse_moe.experts.6.w2
361
+ - model.layers.8.block_sparse_moe.experts.6.w2
362
+ - model.layers.9.block_sparse_moe.experts.6.w2
363
+ - model.layers.10.block_sparse_moe.experts.6.w2
364
+ - model.layers.11.block_sparse_moe.experts.6.w2
365
+ - model.layers.12.block_sparse_moe.experts.6.w2
366
+ - model.layers.13.block_sparse_moe.experts.6.w2
367
+ - model.layers.5.block_sparse_moe.experts.6.w3
368
+ - model.layers.4.block_sparse_moe.experts.6.w3
369
+ - model.layers.3.block_sparse_moe.experts.6.w3
370
+ - model.layers.2.block_sparse_moe.experts.6.w3
371
+ - model.layers.1.block_sparse_moe.experts.6.w3
372
+ - model.layers.0.block_sparse_moe.experts.6.w3
373
+ - model.layers.6.block_sparse_moe.experts.6.w3
374
+ - model.layers.7.block_sparse_moe.experts.6.w3
375
+ - model.layers.8.block_sparse_moe.experts.6.w3
376
+ - model.layers.9.block_sparse_moe.experts.6.w3
377
+ - model.layers.10.block_sparse_moe.experts.6.w3
378
+ - model.layers.11.block_sparse_moe.experts.6.w3
379
+ - model.layers.12.block_sparse_moe.experts.6.w3
380
+ - model.layers.13.block_sparse_moe.experts.6.w3
381
+ - model.layers.5.block_sparse_moe.experts.7.w1
382
+ - model.layers.6.block_sparse_moe.experts.7.w1
383
+ - model.layers.3.block_sparse_moe.experts.7.w1
384
+ - model.layers.4.block_sparse_moe.experts.7.w1
385
+ - model.layers.2.block_sparse_moe.experts.7.w1
386
+ - model.layers.0.block_sparse_moe.experts.7.w1
387
+ - model.layers.7.block_sparse_moe.experts.7.w1
388
+ - model.layers.8.block_sparse_moe.experts.7.w1
389
+ - model.layers.9.block_sparse_moe.experts.7.w1
390
+ - model.layers.10.block_sparse_moe.experts.7.w1
391
+ - model.layers.11.block_sparse_moe.experts.7.w1
392
+ - model.layers.12.block_sparse_moe.experts.7.w1
393
+ - model.layers.13.block_sparse_moe.experts.7.w1
394
+ - model.layers.14.block_sparse_moe.experts.7.w1
395
+ - model.layers.6.block_sparse_moe.experts.7.w2
396
+ - model.layers.5.block_sparse_moe.experts.7.w2
397
+ - model.layers.4.block_sparse_moe.experts.7.w2
398
+ - model.layers.2.block_sparse_moe.experts.7.w2
399
+ - model.layers.3.block_sparse_moe.experts.7.w2
400
+ - model.layers.1.block_sparse_moe.experts.7.w2
401
+ - model.layers.0.block_sparse_moe.experts.7.w2
402
+ - model.layers.7.block_sparse_moe.experts.7.w2
403
+ - model.layers.8.block_sparse_moe.experts.7.w2
404
+ - model.layers.9.block_sparse_moe.experts.7.w2
405
+ - model.layers.10.block_sparse_moe.experts.7.w2
406
+ - model.layers.11.block_sparse_moe.experts.7.w2
407
+ - model.layers.12.block_sparse_moe.experts.7.w2
408
+ - model.layers.13.block_sparse_moe.experts.7.w2
409
+ - model.layers.6.block_sparse_moe.experts.7.w3
410
+ - model.layers.5.block_sparse_moe.experts.7.w3
411
+ - model.layers.4.block_sparse_moe.experts.7.w3
412
+ - model.layers.3.block_sparse_moe.experts.7.w3
413
+ - model.layers.2.block_sparse_moe.experts.7.w3
414
+ - model.layers.0.block_sparse_moe.experts.7.w3
415
+ - model.layers.7.block_sparse_moe.experts.7.w3
416
+ - model.layers.8.block_sparse_moe.experts.7.w3
417
+ - model.layers.9.block_sparse_moe.experts.7.w3
418
+ - model.layers.10.block_sparse_moe.experts.7.w3
419
+ - model.layers.11.block_sparse_moe.experts.7.w3
420
+ - model.layers.12.block_sparse_moe.experts.7.w3
421
+ - model.layers.13.block_sparse_moe.experts.7.w3
422
+ - model.layers.14.block_sparse_moe.experts.7.w3
423
+ # - model.layers.0.block_sparse_moe.gate
424
+ # - model.layers.1.block_sparse_moe.gate
425
+ # - model.layers.2.block_sparse_moe.gate
426
+ # - model.layers.3.block_sparse_moe.gate
427
+ # - model.layers.4.block_sparse_moe.gate
428
+ # - model.layers.5.block_sparse_moe.gate
429
+ # - model.layers.6.block_sparse_moe.gate
430
+ # - model.layers.7.block_sparse_moe.gate
431
+ # - model.layers.8.block_sparse_moe.gate
432
+ # - model.layers.9.block_sparse_moe.gate
433
+ # - model.layers.10.block_sparse_moe.gate
434
+ # - model.layers.11.block_sparse_moe.gate
435
+ # - model.layers.12.block_sparse_moe.gate
436
+ # - model.layers.13.block_sparse_moe.gate
437
+
438
+ model_config:
439
+ output_router_logits: true
440
+
441
+ datasets:
442
+ - path: /workspace/datasets/dolphin-2.9/dolphin201-sharegpt2.jsonl
443
+ type: sharegpt
444
+ conversation: chatml
445
+ - path: /workspace/datasets/dolphin-2.9/Ultrachat200kunfiltered.jsonl
446
+ type: sharegpt
447
+ conversation: chatml
448
+ - path: /workspace/datasets/dolphin-2.9/dolphin-coder-translate-sharegpt2.jsonl
449
+ type: sharegpt
450
+ conversation: chatml
451
+ - path: /workspace/datasets/dolphin-2.9/dolphin-coder-codegen-sharegpt2.jsonl
452
+ type: sharegpt
453
+ conversation: chatml
454
+ - path: /workspace/datasets/dolphin-2.9/m-a-p_Code-Feedback-sharegpt-unfiltered.jsonl
455
+ type: sharegpt
456
+ conversation: chatml
457
+ - path: /workspace/datasets/dolphin-2.9/m-a-p_CodeFeedback-Filtered-Instruction-sharegpt-unfiltered.jsonl
458
+ type: sharegpt
459
+ conversation: chatml
460
+ - path: /workspace/datasets/dolphin-2.9/not_samantha_norefusals.jsonl
461
+ type: sharegpt
462
+ conversation: chatml
463
+ - path: /workspace/datasets/dolphin-2.9/Orca-Math-resort-unfiltered.jsonl
464
+ type: sharegpt
465
+ conversation: chatml
466
+ - path: /workspace/datasets/dolphin-2.9/agent_instruct_react_unfiltered.jsonl
467
+ type: sharegpt
468
+ conversation: chatml
469
+ - path: /workspace/datasets/dolphin-2.9/toolbench_instruct_j1s1_3k_unfiltered.jsonl
470
+ type: sharegpt
471
+ conversation: chatml
472
+ - path: /workspace/datasets/dolphin-2.9/toolbench_negative_unfiltered.jsonl
473
+ type: sharegpt
474
+ conversation: chatml
475
+ - path: /workspace/datasets/dolphin-2.9/toolbench_react_10p_unfiltered.jsonl
476
+ type: sharegpt
477
+ conversation: chatml
478
+ - path: /workspace/datasets/dolphin-2.9/toolbench_tflan_cot_30p_unfiltered.jsonl
479
+ type: sharegpt
480
+ conversation: chatml
481
+ - path: /workspace/datasets/dolphin-2.9/openhermes200k_unfiltered.jsonl
482
+ type: sharegpt
483
+ conversation: chatml
484
+ - path: /workspace/datasets/dolphin-2.9/SystemConversations.jsonl
485
+ type: sharegpt
486
+ conversation: chatml
487
+
488
+ chat_template: chatml
489
+
490
+ dataset_prepared_path: thingy
491
+ val_set_size: 0.0002
492
+ output_dir: ./out
493
+
494
+ sequence_len: 4096
495
+ sample_packing: true
496
+ pad_to_sequence_len: true
497
+
498
+ gradient_accumulation_steps: 8
499
+ micro_batch_size: 4
500
+ num_epochs: 3
501
+ logging_steps: 1
502
+ optimizer: paged_adamw_8bit
503
+ lr_scheduler: cosine
504
+ learning_rate: 2.7e-5
505
+
506
+ wandb_project: dolphin-2.9-mixtral-8x22b
507
+ wandb_watch:
508
+ wandb_run_id:
509
+ wandb_log_model:
510
+
511
+ train_on_inputs: false
512
+ group_by_length: false
513
+ bf16: auto
514
+ fp16:
515
+ tf32: true
516
+
517
+ gradient_checkpointing: true
518
+ gradient_checkpointing_kwargs:
519
+ use_reentrant: false
520
+ early_stopping_patience:
521
+ # resume_from_checkpoint: /home/ehartford/axolotl/out/checkpoint-316
522
+ local_rank:
523
+ logging_steps: 1
524
+ xformers_attention:
525
+ flash_attention: true
526
+ saves_per_epoch: 8
527
+ save_total_limit: 2
528
+ save_steps:
529
+ evals_per_epoch: 4
530
+ eval_sample_packing: false
531
+ debug:
532
+ deepspeed: deepspeed_configs/zero3_bf16_cpuoffload_params.json
533
+ weight_decay: 0.05
534
+ fsdp:
535
+ fsdp_config:
536
+ special_tokens:
537
+ eos_token: "<|im_end|>"
538
+ tokens:
539
+ - "<|im_start|>"
540
+ ```
541
+
542
+ </details><br>
543
+
544
+ # out
545
+
546
+ This model is a fine-tuned version of [mistral-community/Mixtral-8x22B-v0.1](https://huggingface.co/mistral-community/Mixtral-8x22B-v0.1) on the None dataset.
547
+ It achieves the following results on the evaluation set:
548
+ - Loss: 0.5217
549
+
550
+ ## Model description
551
+
552
+ More information needed
553
+
554
+ ## Intended uses & limitations
555
+
556
+ More information needed
557
+
558
+ ## Training and evaluation data
559
+
560
+ More information needed
561
+
562
+ ## Training procedure
563
+
564
+ ### Training hyperparameters
565
+
566
+ The following hyperparameters were used during training:
567
+ - learning_rate: 2.7e-05
568
+ - train_batch_size: 4
569
+ - eval_batch_size: 4
570
+ - seed: 42
571
+ - distributed_type: multi-GPU
572
+ - num_devices: 8
573
+ - gradient_accumulation_steps: 8
574
+ - total_train_batch_size: 256
575
+ - total_eval_batch_size: 32
576
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
577
+ - lr_scheduler_type: cosine
578
+ - lr_scheduler_warmup_steps: 2
579
+ - num_epochs: 3
580
+
581
+ ### Training results
582
+
583
+ | Training Loss | Epoch | Step | Validation Loss |
584
+ |:-------------:|:-----:|:----:|:---------------:|
585
+ | 0.7022 | 0.0 | 1 | 0.6989 |
586
+ | 0.5344 | 0.25 | 238 | 0.5138 |
587
+ | 0.5204 | 0.5 | 476 | 0.5018 |
588
+ | 0.5059 | 0.75 | 714 | 0.4951 |
589
+ | 0.5112 | 1.0 | 952 | 0.4911 |
590
+ | 0.4561 | 1.24 | 1190 | 0.4978 |
591
+ | 0.478 | 1.49 | 1428 | 0.4935 |
592
+ | 0.4714 | 1.74 | 1666 | 0.4899 |
593
+ | 0.4626 | 1.99 | 1904 | 0.4861 |
594
+ | 0.3675 | 2.22 | 2142 | 0.5240 |
595
+ | 0.3595 | 2.47 | 2380 | 0.5229 |
596
+ | 0.3438 | 2.72 | 2618 | 0.5217 |
597
+
598
+
599
+ ### Framework versions
600
+
601
+ - Transformers 4.40.0.dev0
602
+ - Pytorch 2.2.2+cu121
603
+ - Datasets 2.15.0
604
+ - Tokenizers 0.15.0
added_tokens.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "<|im_end|>": 32000,
3
+ "<|im_start|>": 32001
4
+ }
config.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "mistral-community/Mixtral-8x22B-v0.1",
3
+ "architectures": [
4
+ "MixtralForCausalLM"
5
+ ],
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 1,
8
+ "eos_token_id": 32000,
9
+ "hidden_act": "silu",
10
+ "hidden_size": 6144,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 16384,
13
+ "max_position_embeddings": 65536,
14
+ "model_type": "mixtral",
15
+ "num_attention_heads": 48,
16
+ "num_experts_per_tok": 2,
17
+ "num_hidden_layers": 56,
18
+ "num_key_value_heads": 8,
19
+ "num_local_experts": 8,
20
+ "output_router_logits": true,
21
+ "rms_norm_eps": 1e-05,
22
+ "rope_theta": 1000000,
23
+ "router_aux_loss_coef": 0.001,
24
+ "router_jitter_noise": 0.0,
25
+ "sliding_window": null,
26
+ "tie_word_embeddings": false,
27
+ "torch_dtype": "bfloat16",
28
+ "transformers_version": "4.40.0.dev0",
29
+ "use_cache": false,
30
+ "vocab_size": 32002
31
+ }
generation_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 1,
4
+ "do_sample": true,
5
+ "eos_token_id": 2,
6
+ "transformers_version": "4.40.0.dev0"
7
+ }
model-00001-of-00059.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:95b5af481edd04132b52ca7a00aa1743770027f063bf3affa6a7dce98eb52eda
3
+ size 4998688272
model-00002-of-00059.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fdb334567b19bdece1e1c49155503c03b0af86f4094eaa68406572bb25ac430b
3
+ size 4806799120
model-00003-of-00059.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bd77d61f27d67c8bf54d6c08c7eaacea4022ad4768b5e94b0c6b85356c113cdb
3
+ size 4806799120
model-00004-of-00059.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f704dcab188d2614cc772858e2a109e88b0971b240b5eb376cb8c2af205b7294
3
+ size 4806799120
model-00005-of-00059.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be6336cffcd5f109a2676b0b289b61a4fbbfb2819957758c38b59b783d2c8801
3
+ size 4806799120
model-00006-of-00059.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4b541b98786c887f920a1a9f67c2a4f8f832e56130d9e49ecfce10961504c60c
3
+ size 4806799120
model-00007-of-00059.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4c67a731a28b213cfcf000fe467cbe17d97d3abd84570d134120013bd8da289b
3
+ size 4806799120
model-00008-of-00059.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f389c12722447d9c665afb9686752bb9648f85b907b8c77e30772dad7953d2be
3
+ size 4806799120
model-00009-of-00059.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9acb0495e3bbaf80398a4dadf20733682fb8ea0a71e7386d19142d369cdbf4d9
3
+ size 4806799120
model-00010-of-00059.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:68ad974bd32a5a4b0281418282cb71bf68d9fe665007fb6b288d4ba5821119d7
3
+ size 4806799120
model-00011-of-00059.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6bce041a52f79af5b40fc5f9f0297a5dd0f70b730caaee3abf4b939fc1f6497b
3
+ size 4806799136
model-00012-of-00059.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1eb694f86f4fbaee616a1b49b8bc8d6d218130d2a30d24ee7940f7155147256c
3
+ size 4806799152
model-00013-of-00059.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:458ada607b6e4b025c921592146b811386c6fffd4931f7e9131b764c19d04d9d
3
+ size 4806799152
model-00014-of-00059.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d79627a1ff52da13845810d798947e7ffe7f35c066969c35a04f754991b1f8e8
3
+ size 4806799152
model-00015-of-00059.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:13ec009c6cc62d47763d7a594d99d277e7f2c5942ccfdaeb8b518d2c5d95f28b
3
+ size 4806799152
model-00016-of-00059.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be73f197597c4fe8ffbb6d3be3531ad7889711b9a641e86a3af2c53345557f3e
3
+ size 4806799152
model-00017-of-00059.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:927fcc5ecaf8ce838ec26807e98b8d143e6dea4580a539e5066d3d4052256e11
3
+ size 4806799152
model-00018-of-00059.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:43c2dcd5b668cc2d5cceb241df2a62ddff87b375d0c7faeaa77585e0adc9a3aa
3
+ size 4806799152
model-00019-of-00059.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dcbf022bdc3c4313cd4d4199982a0e5e50f3728de6668453c8516a64e8038f08
3
+ size 4806799152
model-00020-of-00059.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dab7878b4effd9bd0c6cbecc01cf8c06627143d9fab012365019cb066437d6b2
3
+ size 4806799152
model-00021-of-00059.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1194045593988066255d9250064aedfa94eda846407d1403f2d356d9b86b51df
3
+ size 4806799152
model-00022-of-00059.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c273e9e02faba78dca024273688322a045014710c15bd68d1651c42a543dae1
3
+ size 4806799152
model-00023-of-00059.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:60fc76f369394da94b3e3b50a61e14571f6d7c31a7200123644021db8b66b652
3
+ size 4806799152
model-00024-of-00059.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eb73ea1ea07f0ab6bda083eb47c8a0e8d52e084fd432746b81e1ad870dd1e96c
3
+ size 4932529864
model-00025-of-00059.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a59e8d4959eac96c25eb374981d9c0798f345457f358814b63a9fde012cc8ecb
3
+ size 4995542848
model-00026-of-00059.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4147e8cd43de8b2541c9655c6a846cacfea355f8f84fc74dfd4f1e5b27bb708a
3
+ size 4995542848
model-00027-of-00059.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:49dd9850f5850aa14a544faee25c5adba9da8bda7740b627e69539d7ca4d6ca9
3
+ size 4932628288
model-00028-of-00059.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:131e737f09815b2cb5b9d09c0d1d0d0f14c820e8fac17d5bf623b6c7903d9415
3
+ size 4806774344
model-00029-of-00059.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f2c6290c581dd966cd096f8d2093aeabcbb0617de677e06400290c39898045c
3
+ size 4806799144
model-00030-of-00059.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b32d0ee70b1eb918cab4c89025a75db221040e4aaa3aca4c3a46e34d78bcd703
3
+ size 4806799144
model-00031-of-00059.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c09b0c7efa6d8ec4c86f27a7d0c65063f0f85d9edbc08bd09faa65c0a3e9cdee
3
+ size 4806799144
model-00032-of-00059.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fc22fe85ff68b830deaaddd4734941baa62eb6080479007b09dd982ad6819706
3
+ size 4806799144
model-00033-of-00059.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f6d86ee969b1c4437cfbaae6158368fa30817a738c0ff2547e47ba8ae4b19f8
3
+ size 4806799152
model-00034-of-00059.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aa829c75a954b291c0de5fc2244727ada42f692d038d19ef8c890424635d1c6e
3
+ size 4806799152
model-00035-of-00059.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:faa68a0f1f0272c470853f643b934fadebeb7f71ed9b80746bfc824f7fe75e49
3
+ size 4806799152
model-00036-of-00059.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a3e0a006f69da252b0d760bcb79de478182d73b2ea93f2afcd57d98d52ba8b0d
3
+ size 4806799152
model-00037-of-00059.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ef1a0d8420239ab78812b3d14fc99c1a99d0f364a0ea419dac7e187ae001b9a
3
+ size 4806799152
model-00038-of-00059.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:82f18563cccd71b7d5c23b4774f1beb335de28e2260442f9032f9ad29e5674e4
3
+ size 4806799152
model-00039-of-00059.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:427ca0a9f81e2a9af1465d6c7b8d6ebaf82d3b2637dd91cd8c1f61eaf204e7dd
3
+ size 4806799152
model-00040-of-00059.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c341cd9aad7b90118afa75c3b3336754d957de5a4a55190799049f1dd70be512
3
+ size 4806799152
model-00041-of-00059.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:877dd428fba3d3b10d5f688626b21a356fdf23518f4be5e95c1aa596d70b2d46
3
+ size 4806799152
model-00042-of-00059.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bf58e49c141fe7c5c312dedecdb1e0c165f8c092682cbb774c04d8ad0c84b2bf
3
+ size 4806799152
model-00043-of-00059.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6e8bdcf31f9f1925fb5673cfa9c40f07b59c3073b02f6c06c8ece6d27dc7f24c
3
+ size 4806799152
model-00044-of-00059.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:68f0e70c784a929c513fdd8827bf9379a1f0919ce44d60ffcd573ccf81c2ef83
3
+ size 4806799152
model-00045-of-00059.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:60f5d7d07c67c3ee7a7ee3991e7904434a4f890ae8ae1880373c9b5b7fa937cc
3
+ size 4806799152
model-00046-of-00059.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c4e97a90022efc665f629a61c20688e017a60521421ddf806ccb4960b45472ed
3
+ size 4806799152