SinclairSchneider commited on
Commit
731fe61
1 Parent(s): c3a592b

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +61 -0
  2. LICENSE.txt +176 -0
  3. NOTICE.txt +1 -0
  4. README.md +1 -1
  5. config.json +38 -0
  6. configuration_dbrx.py +264 -0
  7. generation_config.json +8 -0
  8. huggingface-metadata.txt +65 -0
  9. model-00001-of-00061.safetensors +3 -0
  10. model-00002-of-00061.safetensors +3 -0
  11. model-00003-of-00061.safetensors +3 -0
  12. model-00004-of-00061.safetensors +3 -0
  13. model-00005-of-00061.safetensors +3 -0
  14. model-00006-of-00061.safetensors +3 -0
  15. model-00007-of-00061.safetensors +3 -0
  16. model-00008-of-00061.safetensors +3 -0
  17. model-00009-of-00061.safetensors +3 -0
  18. model-00010-of-00061.safetensors +3 -0
  19. model-00011-of-00061.safetensors +3 -0
  20. model-00012-of-00061.safetensors +3 -0
  21. model-00013-of-00061.safetensors +3 -0
  22. model-00014-of-00061.safetensors +3 -0
  23. model-00015-of-00061.safetensors +3 -0
  24. model-00016-of-00061.safetensors +3 -0
  25. model-00017-of-00061.safetensors +3 -0
  26. model-00018-of-00061.safetensors +3 -0
  27. model-00019-of-00061.safetensors +3 -0
  28. model-00020-of-00061.safetensors +3 -0
  29. model-00021-of-00061.safetensors +3 -0
  30. model-00022-of-00061.safetensors +3 -0
  31. model-00023-of-00061.safetensors +3 -0
  32. model-00024-of-00061.safetensors +3 -0
  33. model-00025-of-00061.safetensors +3 -0
  34. model-00026-of-00061.safetensors +3 -0
  35. model-00027-of-00061.safetensors +3 -0
  36. model-00028-of-00061.safetensors +3 -0
  37. model-00029-of-00061.safetensors +3 -0
  38. model-00030-of-00061.safetensors +3 -0
  39. model-00031-of-00061.safetensors +3 -0
  40. model-00032-of-00061.safetensors +3 -0
  41. model-00033-of-00061.safetensors +3 -0
  42. model-00034-of-00061.safetensors +3 -0
  43. model-00035-of-00061.safetensors +3 -0
  44. model-00036-of-00061.safetensors +3 -0
  45. model-00037-of-00061.safetensors +3 -0
  46. model-00038-of-00061.safetensors +3 -0
  47. model-00039-of-00061.safetensors +3 -0
  48. model-00040-of-00061.safetensors +3 -0
  49. model-00041-of-00061.safetensors +3 -0
  50. model-00042-of-00061.safetensors +3 -0
.gitattributes ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model-00001-of-00061.safetensors filter=lfs diff=lfs merge=lfs -text
2
+ model-00002-of-00061.safetensors filter=lfs diff=lfs merge=lfs -text
3
+ model-00003-of-00061.safetensors filter=lfs diff=lfs merge=lfs -text
4
+ model-00004-of-00061.safetensors filter=lfs diff=lfs merge=lfs -text
5
+ model-00005-of-00061.safetensors filter=lfs diff=lfs merge=lfs -text
6
+ model-00006-of-00061.safetensors filter=lfs diff=lfs merge=lfs -text
7
+ model-00007-of-00061.safetensors filter=lfs diff=lfs merge=lfs -text
8
+ model-00008-of-00061.safetensors filter=lfs diff=lfs merge=lfs -text
9
+ model-00009-of-00061.safetensors filter=lfs diff=lfs merge=lfs -text
10
+ model-00010-of-00061.safetensors filter=lfs diff=lfs merge=lfs -text
11
+ model-00011-of-00061.safetensors filter=lfs diff=lfs merge=lfs -text
12
+ model-00012-of-00061.safetensors filter=lfs diff=lfs merge=lfs -text
13
+ model-00013-of-00061.safetensors filter=lfs diff=lfs merge=lfs -text
14
+ model-00014-of-00061.safetensors filter=lfs diff=lfs merge=lfs -text
15
+ model-00015-of-00061.safetensors filter=lfs diff=lfs merge=lfs -text
16
+ model-00016-of-00061.safetensors filter=lfs diff=lfs merge=lfs -text
17
+ model-00017-of-00061.safetensors filter=lfs diff=lfs merge=lfs -text
18
+ model-00018-of-00061.safetensors filter=lfs diff=lfs merge=lfs -text
19
+ model-00019-of-00061.safetensors filter=lfs diff=lfs merge=lfs -text
20
+ model-00020-of-00061.safetensors filter=lfs diff=lfs merge=lfs -text
21
+ model-00021-of-00061.safetensors filter=lfs diff=lfs merge=lfs -text
22
+ model-00022-of-00061.safetensors filter=lfs diff=lfs merge=lfs -text
23
+ model-00023-of-00061.safetensors filter=lfs diff=lfs merge=lfs -text
24
+ model-00024-of-00061.safetensors filter=lfs diff=lfs merge=lfs -text
25
+ model-00025-of-00061.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ model-00026-of-00061.safetensors filter=lfs diff=lfs merge=lfs -text
27
+ model-00027-of-00061.safetensors filter=lfs diff=lfs merge=lfs -text
28
+ model-00028-of-00061.safetensors filter=lfs diff=lfs merge=lfs -text
29
+ model-00029-of-00061.safetensors filter=lfs diff=lfs merge=lfs -text
30
+ model-00030-of-00061.safetensors filter=lfs diff=lfs merge=lfs -text
31
+ model-00031-of-00061.safetensors filter=lfs diff=lfs merge=lfs -text
32
+ model-00032-of-00061.safetensors filter=lfs diff=lfs merge=lfs -text
33
+ model-00033-of-00061.safetensors filter=lfs diff=lfs merge=lfs -text
34
+ model-00034-of-00061.safetensors filter=lfs diff=lfs merge=lfs -text
35
+ model-00035-of-00061.safetensors filter=lfs diff=lfs merge=lfs -text
36
+ model-00036-of-00061.safetensors filter=lfs diff=lfs merge=lfs -text
37
+ model-00037-of-00061.safetensors filter=lfs diff=lfs merge=lfs -text
38
+ model-00038-of-00061.safetensors filter=lfs diff=lfs merge=lfs -text
39
+ model-00039-of-00061.safetensors filter=lfs diff=lfs merge=lfs -text
40
+ model-00040-of-00061.safetensors filter=lfs diff=lfs merge=lfs -text
41
+ model-00041-of-00061.safetensors filter=lfs diff=lfs merge=lfs -text
42
+ model-00042-of-00061.safetensors filter=lfs diff=lfs merge=lfs -text
43
+ model-00043-of-00061.safetensors filter=lfs diff=lfs merge=lfs -text
44
+ model-00044-of-00061.safetensors filter=lfs diff=lfs merge=lfs -text
45
+ model-00045-of-00061.safetensors filter=lfs diff=lfs merge=lfs -text
46
+ model-00046-of-00061.safetensors filter=lfs diff=lfs merge=lfs -text
47
+ model-00047-of-00061.safetensors filter=lfs diff=lfs merge=lfs -text
48
+ model-00048-of-00061.safetensors filter=lfs diff=lfs merge=lfs -text
49
+ model-00049-of-00061.safetensors filter=lfs diff=lfs merge=lfs -text
50
+ model-00050-of-00061.safetensors filter=lfs diff=lfs merge=lfs -text
51
+ model-00051-of-00061.safetensors filter=lfs diff=lfs merge=lfs -text
52
+ model-00052-of-00061.safetensors filter=lfs diff=lfs merge=lfs -text
53
+ model-00053-of-00061.safetensors filter=lfs diff=lfs merge=lfs -text
54
+ model-00054-of-00061.safetensors filter=lfs diff=lfs merge=lfs -text
55
+ model-00055-of-00061.safetensors filter=lfs diff=lfs merge=lfs -text
56
+ model-00056-of-00061.safetensors filter=lfs diff=lfs merge=lfs -text
57
+ model-00057-of-00061.safetensors filter=lfs diff=lfs merge=lfs -text
58
+ model-00058-of-00061.safetensors filter=lfs diff=lfs merge=lfs -text
59
+ model-00059-of-00061.safetensors filter=lfs diff=lfs merge=lfs -text
60
+ model-00060-of-00061.safetensors filter=lfs diff=lfs merge=lfs -text
61
+ model-00061-of-00061.safetensors filter=lfs diff=lfs merge=lfs -text
LICENSE.txt ADDED
@@ -0,0 +1,176 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Databricks Open Model License
2
+
3
+ By using, reproducing, modifying, distributing, performing or displaying
4
+ any portion or element of DBRX or DBRX Derivatives, or otherwise accepting
5
+ the terms of this Agreement, you agree to be bound by this Agreement.
6
+
7
+ Version Release Date: March 27, 2024
8
+
9
+
10
+ Section 1: Definitions
11
+
12
+ “Agreement” means these terms and conditions that govern the use, reproduction,
13
+ modification, distribution, performance or display of DBRX and/or DBRX
14
+ Derivatives and any terms and conditions incorporated by reference.
15
+
16
+ “Databricks” or “we” means Databricks, Inc.
17
+
18
+ “Licensee” or “you” means you, or your employer or any other person or entity
19
+ (if you are entering into this Agreement on such person or entity’s behalf),
20
+ of the age required under applicable laws, rules or regulations to provide
21
+ legal consent and that has legal authority to bind your employer or such other
22
+ person or entity if you are entering in this Agreement on their behalf.
23
+
24
+ “DBRX Derivatives” means all (i) modifications to DBRX, (ii) works based on
25
+ DBRX and (iii) any other derivative works thereof. Outputs are not deemed DBRX
26
+ Derivatives.
27
+
28
+ “DBRX” means the foundational large language models and software and
29
+ algorithms, including machine-learning model code, trained model weights,
30
+ inference-enabling code, training-enabling code, fine-tuning enabling code,
31
+ documentation and other elements of the foregoing identified by Databricks at
32
+ https://github.com/databricks/dbrx, regardless of the source that you obtained
33
+ it from.
34
+
35
+ “Output” means the results of operating DBRX or DBRX Derivatives.
36
+
37
+ As used in this Agreement, “including” means “including without limitation.”
38
+
39
+
40
+ Section 2: License Rights and Conditions on Use and Distribution
41
+
42
+ 2.1 Grant of Rights
43
+
44
+ You are granted a non-exclusive, worldwide, non-transferable and royalty-free
45
+ limited license under Databricks’ intellectual property or other rights owned
46
+ by Databricks embodied in DBRX to use, reproduce, distribute, copy, modify,
47
+ and create derivative works of DBRX in accordance with the terms of this
48
+ Agreement.
49
+
50
+ 2.2 Reproduction and Distribution
51
+
52
+ 1. All distributions of DBRX or DBRX Derivatives must be accompanied by a
53
+ "Notice" text file that contains the following notice: "DBRX is provided
54
+ under and subject to the Databricks Open Model License, Copyright ©
55
+ Databricks, Inc. All rights reserved."
56
+
57
+ 2. If you distribute or make DBRX or DBRX Derivatives available to a third
58
+ party, you must provide a copy of this Agreement to such third party.
59
+
60
+ 3. You must cause any modified files that you distribute to carry prominent
61
+ notices stating that you modified the files.
62
+
63
+ You may add your own intellectual property statement to your modifications of
64
+ DBRX and, except as set forth in this Section, may provide additional or
65
+ different terms and conditions for use, reproduction, or distribution of DBRX
66
+ or DBRX Derivatives as a whole, provided your use, reproduction, modification,
67
+ distribution, performance, and display of DBRX or DBRX Derivatives otherwise
68
+ complies with the terms and conditions of this Agreement. Any additional or
69
+ different terms and conditions you impose must not conflict with the terms of
70
+ this Agreement and in the event of a conflict, the terms and conditions of this
71
+ Agreement shall govern over any such additional or different terms and conditions.
72
+
73
+ 2.3 Use Restrictions
74
+
75
+ You will not use DBRX or DBRX Derivatives or any Output to improve any other
76
+ large language model (excluding DBRX or DBRX Derivatives).
77
+
78
+ You will not use DBRX or DBRX Derivatives:
79
+
80
+ 1. for any restricted use set forth in the Databricks Open Model Acceptable
81
+ Use Policy identified at
82
+ https://www.databricks.com/legal/acceptable-use-policy-open-model
83
+ ("Acceptable Use Policy"), which is hereby incorporated by reference into
84
+ this Agreement; or
85
+
86
+ 2. in violation of applicable laws and regulations.
87
+
88
+ To the maximum extent permitted by law, Databricks reserves the right to
89
+ restrict (remotely or otherwise) usage of DBRX or DBRX Derivatives that
90
+ Databricks reasonably believes are in violation of this Agreement.
91
+
92
+
93
+ Section 3: Additional Commercial Terms
94
+
95
+ If, on the DBRX version release date, the monthly active users of the products
96
+ or services made available by or for Licensee, or Licensee’s affiliates, is
97
+ greater than 700 million monthly active users in the preceding calendar month,
98
+ you must request a license from Databricks, which we may grant to you in our
99
+ sole discretion, and you are not authorized to exercise any of the rights under
100
+ this Agreement unless or until Databricks otherwise expressly grants you such
101
+ rights.
102
+
103
+ If you receive DBRX or DBRX Derivatives from a direct or indirect licensee as
104
+ part of an integrated end user product, then this section (Section 3) of the
105
+ Agreement will not apply to you.
106
+
107
+
108
+ Section 4: Additional Provisions
109
+
110
+ 4.1 Updates
111
+
112
+ Databricks may update DBRX from time to time, and you must make reasonable
113
+ efforts to use the latest version of DBRX.
114
+
115
+ 4.2 Intellectual Property
116
+
117
+ a. No trademark licenses are granted under this Agreement, and in connection
118
+ with DBRX or DBRX Derivatives, neither Databricks nor Licensee may use any name
119
+ or mark owned by or associated with the other or any of its affiliates, except
120
+ as required for reasonable and customary use in describing and redistributing
121
+ DBRX or DBRX Derivatives.
122
+
123
+ b. Subject to Databricks’ ownership of DBRX and DRBX Derivatives made by or for
124
+ Databricks, with respect to any DBRX Derivatives that are made by you, as
125
+ between you and Databricks, you are and will be the owner of such DBRX
126
+ Derivatives.
127
+
128
+ c. Databricks claims no ownership rights in Outputs. You are responsible for
129
+ Outputs and their subsequent uses.
130
+
131
+ d. If you institute litigation or other proceedings against Databricks or any
132
+ entity (including a cross-claim or counterclaim in a lawsuit) alleging that
133
+ DBRX or Outputs or results therefrom, or any portion of any of the foregoing,
134
+ constitutes infringement of intellectual property or other rights owned or
135
+ licensable by you, then any licenses granted to you under this Agreement shall
136
+ terminate as of the date such litigation or claim is filed or instituted. You
137
+ will indemnify and hold harmless Databricks from and against any claim by any
138
+ third party arising out of or related to your use or distribution of DBRX or
139
+ DBRX Derivatives.
140
+
141
+ 4.3 DISCLAIMER OF WARRANTY
142
+
143
+ UNLESS REQUIRED BY APPLICABLE LAW, DBRX AND ANY OUTPUT AND RESULTS THEREFROM
144
+ ARE PROVIDED ON AN “AS IS” BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER
145
+ EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, ANY WARRANTIES OF TITLE,
146
+ NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE. YOU
147
+ ARE SOLELY RESPONSIBLE FOR DETERMINING THE APPROPRIATENESS OF USING OR
148
+ REDISTRIBUTING DBRX OR DBRX DERIVATIVES AND ANY OUTPUT AND ASSUME ANY RISKS
149
+ ASSOCIATED WITH YOUR USE OF DBRX OR DBRX DERIVATIVES AND ANY OUTPUT AND RESULTS.
150
+
151
+ 4.4 LIMITATION OF LIABILITY
152
+
153
+ IN NO EVENT WILL DATABRICKS OR ITS AFFILIATES BE LIABLE UNDER ANY THEORY OF
154
+ LIABILITY, WHETHER IN CONTRACT, TORT, NEGLIGENCE, PRODUCTS LIABILITY, OR
155
+ OTHERWISE, ARISING OUT OF THIS AGREEMENT, FOR ANY LOST PROFITS OR ANY INDIRECT,
156
+ SPECIAL, CONSEQUENTIAL, INCIDENTAL, EXEMPLARY OR PUNITIVE DAMAGES, EVEN IF
157
+ DATABRICKS OR ITS AFFILIATES HAVE BEEN ADVISED OF THE POSSIBILITY OF ANY OF THE
158
+ FOREGOING.
159
+
160
+ 4.5 Term and Termination
161
+
162
+ The term of this Agreement will commence upon your acceptance of this Agreement
163
+ or access to DBRX or DBRX Derivatives and will continue in full force and
164
+ effect until terminated in accordance with the terms and conditions herein.
165
+ Databricks may terminate this Agreement if you are in breach of any term or
166
+ condition of this Agreement. Upon termination of this Agreement, you shall
167
+ delete and cease use of DBRX or any DBRX Derivatives. Sections 1, 4.2(d), 4.3,
168
+ 4.4, and 4.6 shall survive the termination of this Agreement.
169
+
170
+ 4.6 Governing Law and Jurisdiction
171
+
172
+ This Agreement will be governed and construed under the laws of the State of
173
+ California without regard to choice of law principles, and the UN Convention
174
+ on Contracts for the International Sale of Goods does not apply to this
175
+ Agreement. The courts of California shall have exclusive jurisdiction of any
176
+ dispute arising out of this Agreement.
NOTICE.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ DBRX is provided under and subject to the Databricks Open Model License, Copyright © Databricks, Inc. All rights reserved.
README.md CHANGED
@@ -170,4 +170,4 @@ Full evaluation details can be found in our [technical blog post](https://www.da
170
  ## Acknowledgements
171
  The DBRX models were made possible thanks in large part to the open-source community, especially:
172
  * The [MegaBlocks](https://arxiv.org/abs/2211.15841) library, which established a foundation for our MoE implementation.
173
- * [PyTorch FSDP](https://arxiv.org/abs/2304.11277), which we built on for distributed training.
 
170
  ## Acknowledgements
171
  The DBRX models were made possible thanks in large part to the open-source community, especially:
172
  * The [MegaBlocks](https://arxiv.org/abs/2211.15841) library, which established a foundation for our MoE implementation.
173
+ * [PyTorch FSDP](https://arxiv.org/abs/2304.11277), which we built on for distributed training.
config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "DbrxForCausalLM"
4
+ ],
5
+ "attn_config": {
6
+ "clip_qkv": 8,
7
+ "kv_n_heads": 8,
8
+ "model_type": "",
9
+ "rope_theta": 500000
10
+ },
11
+ "auto_map": {
12
+ "AutoConfig": "configuration_dbrx.DbrxConfig",
13
+ "AutoModelForCausalLM": "modeling_dbrx.DbrxForCausalLM"
14
+ },
15
+ "d_model": 6144,
16
+ "emb_pdrop": 0.0,
17
+ "ffn_config": {
18
+ "ffn_hidden_size": 10752,
19
+ "model_type": "",
20
+ "moe_jitter_eps": 0,
21
+ "moe_loss_weight": 0.05,
22
+ "moe_num_experts": 16,
23
+ "moe_top_k": 4
24
+ },
25
+ "initializer_range": 0.02,
26
+ "max_seq_len": 32768,
27
+ "model_type": "dbrx",
28
+ "n_heads": 48,
29
+ "n_layers": 40,
30
+ "output_router_logits": false,
31
+ "resid_pdrop": 0.0,
32
+ "router_aux_loss_coef": 0.05,
33
+ "tie_word_embeddings": false,
34
+ "torch_dtype": "bfloat16",
35
+ "transformers_version": "4.38.2",
36
+ "use_cache": true,
37
+ "vocab_size": 100352
38
+ }
configuration_dbrx.py ADDED
@@ -0,0 +1,264 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Dbrx configuration."""
2
+ from typing import Any, Optional
3
+
4
+ from transformers.configuration_utils import PretrainedConfig
5
+ from transformers.utils import logging
6
+
7
+ logger = logging.get_logger(__name__)
8
+
9
+ DBRX_PRETRAINED_CONFIG_ARCHIVE_MAP = {}
10
+
11
+
12
+ class DbrxAttentionConfig(PretrainedConfig):
13
+ """Configuration class for Dbrx Attention.
14
+
15
+ [`DbrxAttention`] class. It is used to instantiate attention layers
16
+ according to the specified arguments, defining the layers architecture.
17
+
18
+ Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
19
+ documentation from [`PretrainedConfig`] for more information.
20
+
21
+ Args:
22
+ attn_pdrop (`float`, *optional*, defaults to 0.0):
23
+ The dropout probability for the attention layers.
24
+ clip_qkv (`float`, *optional*, defualts to None):
25
+ If not `None`, clip the queries, keys, and values in the attention layer to this value.
26
+ kv_n_heads (Optional[int]): For grouped_query_attention only, allow user to specify number of kv heads.
27
+ rope_theta (float): The base frequency for rope.
28
+ """
29
+
30
+ def __init__(
31
+ self,
32
+ attn_pdrop: float = 0,
33
+ clip_qkv: Optional[float] = None,
34
+ kv_n_heads: int = 1,
35
+ rope_theta: float = 10000.0,
36
+ **kwargs: Any,
37
+ ):
38
+ super().__init__(**kwargs)
39
+ self.attn_pdrop = attn_pdrop
40
+ self.clip_qkv = clip_qkv
41
+ self.kv_n_heads = kv_n_heads
42
+ self.rope_theta = rope_theta
43
+
44
+ for k in ['model_type']:
45
+ if k in kwargs:
46
+ kwargs.pop(k)
47
+ if len(kwargs) != 0:
48
+ raise ValueError(f'Found unknown {kwargs=}')
49
+
50
+ @classmethod
51
+ def from_pretrained(cls, pretrained_model_name_or_path: str,
52
+ **kwargs: Any) -> 'PretrainedConfig':
53
+ cls._set_token_in_kwargs(kwargs)
54
+
55
+ config_dict, kwargs = cls.get_config_dict(pretrained_model_name_or_path,
56
+ **kwargs)
57
+
58
+ if config_dict.get('model_type') == 'dbrx':
59
+ config_dict = config_dict['attn_config']
60
+
61
+ if 'model_type' in config_dict and hasattr(
62
+ cls,
63
+ 'model_type') and config_dict['model_type'] != cls.model_type:
64
+ logger.warning(
65
+ f"You are using a model of type {config_dict['model_type']} to instantiate a model of type "
66
+ +
67
+ f'{cls.model_type}. This is not supported for all configurations of models and can yield errors.'
68
+ )
69
+
70
+ return cls.from_dict(config_dict, **kwargs)
71
+
72
+
73
+ class DbrxFFNConfig(PretrainedConfig):
74
+ """Configuration class for Dbrx FFN.
75
+
76
+ [`DbrxFFN`] class. It is used to instantiate feedforward layers according to
77
+ the specified arguments, defining the layers architecture.
78
+
79
+ Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
80
+ documentation from [`PretrainedConfig`] for more information.
81
+
82
+ Args:
83
+ ffn_act_fn (dict, optional): A dict specifying activation function for the FFN.
84
+ The dict should have a key 'name' with the value being the name of
85
+ the activation function along with any additional keyword arguments.
86
+ ffn_hidden_size (int, optional): The hidden size of the feedforward network.
87
+ moe_num_experts (int, optional): The number of experts in the mixture of experts layer.
88
+ moe_top_k (int, optional): The number of experts to use in the mixture of experts layer.
89
+ moe_jitter_eps (float, optional): The jitter epsilon for the mixture of experts layer.
90
+ moe_loss_weight (float, optional): The loss weight for the mixture of experts layer.
91
+ moe_normalize_expert_weights (float, optional): The normalization factor for the expert weights.
92
+ uniform_expert_assignment (bool, optional): Whether to use uniform expert assignment.
93
+ This should only be used for benchmarking purposes.
94
+ """
95
+
96
+ def __init__(
97
+ self,
98
+ ffn_act_fn: Optional[dict] = None,
99
+ ffn_hidden_size: int = 3584,
100
+ moe_num_experts: int = 4,
101
+ moe_top_k: int = 1,
102
+ moe_jitter_eps: Optional[float] = None,
103
+ moe_loss_weight: float = 0.01,
104
+ moe_normalize_expert_weights: Optional[float] = 1,
105
+ uniform_expert_assignment: bool = False,
106
+ **kwargs: Any,
107
+ ):
108
+ super().__init__()
109
+ if ffn_act_fn is None:
110
+ ffn_act_fn = {'name': 'silu'}
111
+ self.ffn_act_fn = ffn_act_fn
112
+ self.ffn_hidden_size = ffn_hidden_size
113
+ self.moe_num_experts = moe_num_experts
114
+ self.moe_top_k = moe_top_k
115
+ self.moe_jitter_eps = moe_jitter_eps
116
+ self.moe_loss_weight = moe_loss_weight
117
+ self.moe_normalize_expert_weights = moe_normalize_expert_weights
118
+ self.uniform_expert_assignment = uniform_expert_assignment
119
+
120
+ for k in ['model_type']:
121
+ if k in kwargs:
122
+ kwargs.pop(k)
123
+ if len(kwargs) != 0:
124
+ raise ValueError(f'Found unknown {kwargs=}')
125
+
126
+ @classmethod
127
+ def from_pretrained(cls, pretrained_model_name_or_path: str,
128
+ **kwargs: Any) -> 'PretrainedConfig':
129
+ cls._set_token_in_kwargs(kwargs)
130
+
131
+ config_dict, kwargs = cls.get_config_dict(pretrained_model_name_or_path,
132
+ **kwargs)
133
+
134
+ if config_dict.get('model_type') == 'dbrx':
135
+ config_dict = config_dict['ffn_config']
136
+
137
+ if 'model_type' in config_dict and hasattr(
138
+ cls,
139
+ 'model_type') and config_dict['model_type'] != cls.model_type:
140
+ logger.warning(
141
+ f"You are using a model of type {config_dict['model_type']} to instantiate a model of type "
142
+ +
143
+ f'{cls.model_type}. This is not supported for all configurations of models and can yield errors.'
144
+ )
145
+
146
+ return cls.from_dict(config_dict, **kwargs)
147
+
148
+
149
+ class DbrxConfig(PretrainedConfig):
150
+ """Configuration class for Dbrx.
151
+
152
+ [`DbrxModel`]. It is used to instantiate a Dbrx model according to the
153
+ specified arguments, defining the model architecture.
154
+
155
+ Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
156
+ documentation from [`PretrainedConfig`] for more information.
157
+
158
+
159
+ Args:
160
+ d_model (`int`, *optional*, defaults to 6144):
161
+ Dimensionality of the embeddings and hidden states.
162
+ n_heads (`int`, *optional*, defaults to 48):
163
+ Number of attention heads for each attention layer in the Transformer encoder.
164
+ n_layers (`int`, *optional*, defaults to 40):
165
+ Number of hidden layers in the Transformer encoder.
166
+ max_seq_len (`int`, *optional*, defaults to 32768):
167
+ The maximum sequence length of the model.
168
+ vocab_size (`int`, *optional*, defaults to 100352):
169
+ Vocabulary size of the Dbrx model. Defines the maximum number of different tokens that can be represented by
170
+ the `inputs_ids` passed when calling [`DbrxModel`].
171
+ resid_pdrop (`float`, *optional*, defaults to 0.0):
172
+ The dropout probability applied to the attention output before combining with residual.
173
+ emb_pdrop (`float`, *optional*, defaults to 0.0):
174
+ The dropout probability for the embedding layer.
175
+ attn_config (`dict`, *optional*):
176
+ A dictionary used to configure the model's attention module.
177
+ ffn_config (`dict`, *optional*):
178
+ A dictionary used to configure the model's FFN module.
179
+ use_cache (`bool`, *optional*, defaults to `False`):
180
+ Whether or not the model should return the last key/values attentions (not used by all models).
181
+ initializer_range (`float`, *optional*, defaults to 0.02):
182
+ The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
183
+ output_router_logits (`bool`, *optional*, defaults to `False`):
184
+ Whether or not the router logits should be returned by the model. Enabling this will also
185
+ allow the model to output the auxiliary loss. See [here]() for more details
186
+ router_aux_loss_coef (`float`, *optional*, defaults to 0.001):
187
+ The aux loss factor for the total loss.
188
+
189
+
190
+ Example:
191
+ ```python
192
+ >>> from transformers import DbrxConfig, DbrxModel
193
+
194
+ >>> # Initializing a Dbrx configuration
195
+ >>> configuration = DbrxConfig()
196
+
197
+ >>> # Initializing a model (with random weights) from the configuration
198
+ >>> model = DbrxModel(configuration)
199
+
200
+ >>> # Accessing the model configuration
201
+ >>> configuration = model.config
202
+ ```
203
+ """
204
+
205
+ model_type = 'dbrx'
206
+ attribute_map = {
207
+ 'num_attention_heads': 'n_heads',
208
+ 'hidden_size': 'd_model',
209
+ 'num_hidden_layers': 'n_layers',
210
+ 'max_position_embeddings': 'max_seq_len'
211
+ }
212
+
213
+ def __init__(
214
+ self,
215
+ d_model: int = 2048,
216
+ n_heads: int = 16,
217
+ n_layers: int = 24,
218
+ max_seq_len: int = 2048,
219
+ vocab_size: int = 32000,
220
+ resid_pdrop: float = 0.0,
221
+ emb_pdrop: float = 0.0,
222
+ attn_config: Optional[DbrxAttentionConfig] = None,
223
+ ffn_config: Optional[DbrxFFNConfig] = None,
224
+ use_cache: bool = True,
225
+ initializer_range: float = 0.02,
226
+ output_router_logits: bool = False,
227
+ router_aux_loss_coef: float = 0.05,
228
+ **kwargs: Any,
229
+ ):
230
+ if attn_config is None:
231
+ self.attn_config = DbrxAttentionConfig()
232
+ elif isinstance(attn_config, dict):
233
+ self.attn_config = DbrxAttentionConfig(**attn_config)
234
+ else:
235
+ self.attn_config = attn_config
236
+
237
+ if ffn_config is None:
238
+ self.ffn_config = DbrxFFNConfig()
239
+ elif isinstance(ffn_config, dict):
240
+ self.ffn_config = DbrxFFNConfig(**ffn_config)
241
+ else:
242
+ self.ffn_config = ffn_config
243
+
244
+ self.d_model = d_model
245
+ self.n_heads = n_heads
246
+ self.n_layers = n_layers
247
+ self.max_seq_len = max_seq_len
248
+ self.vocab_size = vocab_size
249
+ self.resid_pdrop = resid_pdrop
250
+ self.emb_pdrop = emb_pdrop
251
+ self.use_cache = use_cache
252
+ self.initializer_range = initializer_range
253
+ self.output_router_logits = output_router_logits
254
+ self.router_aux_loss_coef = router_aux_loss_coef
255
+
256
+ tie_word_embeddings = kwargs.pop('tie_word_embeddings', False)
257
+ if tie_word_embeddings:
258
+ raise ValueError(
259
+ 'tie_word_embeddings is not supported for Dbrx models.')
260
+
261
+ super().__init__(
262
+ tie_word_embeddings=tie_word_embeddings,
263
+ **kwargs,
264
+ )
generation_config.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "eos_token_id": [
4
+ 100257,
5
+ 100279
6
+ ],
7
+ "transformers_version": "4.38.2"
8
+ }
huggingface-metadata.txt ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ url: https://huggingface.co/databricks/dbrx-instruct
2
+ branch: main
3
+ download date: 2024-03-29 03:50:30
4
+ sha256sum:
5
+ 3cfdb9ef2003544c6d3f8c6244883e76ca151bc60b13ac6f67693bd8baf864da model-00001-of-00061.safetensors
6
+ 23b1468ea4f961710bc3241b52dbebf981aa8e1273a8e4b985915cf49dfa04af model-00002-of-00061.safetensors
7
+ eaf9bdd6896edda1c662ac7530a4053220d4a49da0c0361e90d8b7855141f251 model-00003-of-00061.safetensors
8
+ be29f3fa53a5cf878bd77a56440fa2537a0f00cccb8651ad5741ed654689e460 model-00004-of-00061.safetensors
9
+ 0e6b9356183c8f24780675e281cb8ce0d1abac44008a79ed626abe72077a4e9d model-00005-of-00061.safetensors
10
+ 8d06f6f69cc365df30371efea4514bca2bfc8a25ff31064d21e90995733f956b model-00006-of-00061.safetensors
11
+ 9961227308e4e7212cab4eef4031e060512d55370aee3b911d80098f61cb096d model-00007-of-00061.safetensors
12
+ 33941bb4a441d44870fed325e16ebfd88b31320e542e9e776d497c9d2d7f169e model-00008-of-00061.safetensors
13
+ 47f5395b27077df7f4bcc94ec9dbf870e8fd0bdef35764e10e43017dcc13424f model-00009-of-00061.safetensors
14
+ 5572004a402e03eb7b68630970417ffb1d8558984b448c24521bf36eaf1b9715 model-00010-of-00061.safetensors
15
+ 8a2405a3e78cb765efbffc5f502c5db84921733ba5ab6a15a6b8810fe69e8031 model-00011-of-00061.safetensors
16
+ d1e0086fafd04d58ea6a61b7e7660f6bbe54a0380a6fa1f39464ac8be440e7d8 model-00012-of-00061.safetensors
17
+ a8f77cffc1ba5af4b6ed8a05a38bf1aa18ebbc3ed06861eb75925a84dba3672f model-00013-of-00061.safetensors
18
+ 107ce690fec4aa202cb07507af6e8f3971c974dd31f697c6c5dc39afcc899b67 model-00014-of-00061.safetensors
19
+ a7d1cb9080dbf19ec925aff6beba736866ae505e334d5e852183468dc57188d0 model-00015-of-00061.safetensors
20
+ c160424dedae5f805f10b2981e9e2bf261d4aed49c0d0d85cfb803ef445559f4 model-00016-of-00061.safetensors
21
+ e498a7c62d501f649ca2dfa0f409f6fe188a359c31d91f2d209b23effe15b22f model-00017-of-00061.safetensors
22
+ 24f9e0a148c17ae3fef677ae94a8290a562bd6a31a805e76e23f91c2dbbd9f18 model-00018-of-00061.safetensors
23
+ 73b3af8d33ad09039d052d000d122fc904c5542a247e2e6d05bb6aae6f5bb916 model-00019-of-00061.safetensors
24
+ fe93ce5f12f216d9d9ca2b252eeabcfc47fb6d3785eb3b1ecf9a6c5c384ae3a5 model-00020-of-00061.safetensors
25
+ af7b81ca660ca1afde8b1a0f697724a788e7566214e7178492d0839e441819a4 model-00021-of-00061.safetensors
26
+ 599a1d7323777aafe5edbaf22b8a56b8c84068f75ec20d382fc40e5c405bce38 model-00022-of-00061.safetensors
27
+ 9223edff1a6c3a73c6dacab2b104d2bfa280be2a5f8a3965fa24bf586fb683ad model-00023-of-00061.safetensors
28
+ 02350eeaa1d595bf65fc5c16c9ee3241f195fa0118d9dc9f983374b145bf35ea model-00024-of-00061.safetensors
29
+ a5779cb1a15c4d3e2d0302c3481f5e3d2cf74e4434e44824464c282ec724eee6 model-00025-of-00061.safetensors
30
+ 65ce12b50281d2449f5eb5e43b7bef56e9ebd137b675f9649d31c0bffdd07ceb model-00026-of-00061.safetensors
31
+ 1db31da422deb6f7098c79dd18a3fab823fe8d6ae95dfcd751e7bc7fa1650def model-00027-of-00061.safetensors
32
+ 312da4cc278ab598b38ca994f70d453fc086fc560de6da851b55d602ef368ecc model-00028-of-00061.safetensors
33
+ d28d9280c500a9cbe5ce59df1043cf8bc07b531c0ddb8cc70365786b1a36900c model-00029-of-00061.safetensors
34
+ 573a4ef61f3d9f1974f05612cc0299475c41f6a8d4e1ae530a0f21e8e6b04725 model-00030-of-00061.safetensors
35
+ d2bb381b92c8bdb37a780fc4369904406fc8fdabda0007325a0debf4ba103a26 model-00031-of-00061.safetensors
36
+ 17a60f2ae29a04acead99434eedc40413954982cac787ca485378390c537ccb2 model-00032-of-00061.safetensors
37
+ d410b47d48eb572478b80a6ea1766da7ef7b5fdbb8e9e4e611208b2677cb81bd model-00033-of-00061.safetensors
38
+ 8e5e894489b1da3cf744345022a9fd6d3fcf14f66a205b9931146769892999e7 model-00034-of-00061.safetensors
39
+ 35b4937e5f449256f2e8277461af26d956c762a98f252f04445ca1852da30b7e model-00035-of-00061.safetensors
40
+ bc2d28e62edf58cf121924c4a57c172021a9dbe972d75cfdeb54180a1f21957b model-00036-of-00061.safetensors
41
+ 3813a7e00dd6ed9129158314a92ac7229cea41f3562c186a6daee461f6bbbaa6 model-00037-of-00061.safetensors
42
+ b9f4014248e2087083843eaff448d0232230dbd8b5fa33cc54d7ebf5003c26b3 model-00038-of-00061.safetensors
43
+ 1d6b612cb8b81dc0714c0f7db73e0e9b4e14eb463e7069b16f5387bd2673890a model-00039-of-00061.safetensors
44
+ 8dde9e2e2bc8a1138d3801929b1fab01d301bf6e16c54be0094a9e1f407243ea model-00040-of-00061.safetensors
45
+ 9730e3d757fd41b8cad918c66f041011a96e67c5e82d6e313d8b57cccb44bec0 model-00041-of-00061.safetensors
46
+ 128e0433397f8e992cd174ea9b2931af49e25feeb78aef9c6cd5335da6e86e82 model-00042-of-00061.safetensors
47
+ e982d566cf1d966052f6365b655d26b38a34bfb3d0bdd75bbcc9d794fc485c32 model-00043-of-00061.safetensors
48
+ 713866fbc89733b067d799c4368e2495d4316193e91ad2586440b9d97e549952 model-00044-of-00061.safetensors
49
+ 8f8796101e5df9c327e6bcc376ffaba33191270a966be67cfa9d75ef1dbac695 model-00045-of-00061.safetensors
50
+ d793734c75efe71ffa500c3048fd8c6a21eed8c8b692371be76d068fb8c044e3 model-00046-of-00061.safetensors
51
+ 53871e712da21c4ae6862e8fc546367a60572f9b336049b1f3b7cf0506437fbb model-00047-of-00061.safetensors
52
+ bcd0c10ef6ef58fbe5d59c81c0950022ce64b12c7534d162c5299a76776d5bc0 model-00048-of-00061.safetensors
53
+ b2fcb794b7df43e7cfede55e5722d0f53ab709751cb7efbde77b14522d094011 model-00049-of-00061.safetensors
54
+ 48fd71f1e1b8ed9fd4ca47a621c9924d6485340d4e39749ed965f1f6ef045170 model-00050-of-00061.safetensors
55
+ c4e5c439153215ccddf1d0d00cb05ccf6557cbddf9a2c963c47b9556958ce506 model-00051-of-00061.safetensors
56
+ 06b911989ad3040713c576ad78ddda8b34fa7509440aec61cace1a57263890c0 model-00052-of-00061.safetensors
57
+ 709079f70f812d17a2fdf8e639fd99f4c0d572cf23f1a5e3e323cc00b50db6a2 model-00053-of-00061.safetensors
58
+ 8bf788ff04aa4e73eec8abac378ebb4c765df0c6dc85e24522d24c0cca4aa344 model-00054-of-00061.safetensors
59
+ 5163f3f77874a4ea7a8e76f74da330b0a87fd19c6ed9ef42d36a0b1269ca92de model-00055-of-00061.safetensors
60
+ 6c880fae2c3d3a98b1b1bfea849512df7be84004f3569adec642c350e3c814bc model-00056-of-00061.safetensors
61
+ c0d24334da4eaea54d3094a171b698e796a4102f954a1c9e988808b9a3afc905 model-00057-of-00061.safetensors
62
+ 70cab56497ef8b4bd74352678f4ac83ab5b5a0facf7aac394df2ae4669f5a57b model-00058-of-00061.safetensors
63
+ 1ca70897eb949ac8871a9e0ac83d597c694d3f51c6e48a021231d3b749c5b667 model-00059-of-00061.safetensors
64
+ 492fe0baed3f76704206d454994c377e8355ca1bec43ef4cd4968b5df8fb669e model-00060-of-00061.safetensors
65
+ 00af0b1e9823022479242479f6f84ae3dfc543628acfe4e0e83e0b2034e4b220 model-00061-of-00061.safetensors
model-00001-of-00061.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c51a84ddbe9c6db4a3933f79dab41d5d296faea64e611f06987d3edf9a188de1
3
+ size 3523439352
model-00002-of-00061.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9bdd74964838d9c5ab140d3f00a31fc12081cff8ac3310aa66f6f1f75fddc443
3
+ size 4404245144
model-00003-of-00061.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e72c1494f4bfa8409e1d0a74fb11b6d63107eabef89309392370ce7900929eb
3
+ size 4227862560
model-00004-of-00061.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:43bdcc149fc83b65e164e6fbeb02735309750ef043fc0fb7bc0de7f33ce05ac8
3
+ size 4404245144
model-00005-of-00061.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c6d8e3d56f731ba3a05858b4d614e2fc4a51703100915280bf89c25a5d24b81
3
+ size 4404245144
model-00006-of-00061.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c1b51cc32f2c535f528d69c43ddc4d48855ec3a8847b9e584e28c68f9efc786
3
+ size 4227862560
model-00007-of-00061.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca4ad42049f29c5cfb3f375f0246562cf8d2e1606e851d438fea210c579a41d8
3
+ size 4404245144
model-00008-of-00061.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d4ba5cef0b5a8d1b73756f1656f5247dc63c23161e5f6dca08479ccc10492f5
3
+ size 4404245144
model-00009-of-00061.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7335c39b0e58a2e9e9a88e648effc97bdab49f5be42c7914bde7f039b6924f47
3
+ size 4227862560
model-00010-of-00061.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:849318fb372f0aad14be50f74ecccf059e4236703f453c3380f72ad47680549f
3
+ size 4404245144
model-00011-of-00061.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:09193bc2a79bcf6017e1afce25dd3ece2f16509940ce50922cb1d1e54c7224b7
3
+ size 4404245144
model-00012-of-00061.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4f50c19c0c494a5bfa78403e4faa431a579f9d9ef353e88c3f8bac123e10e5b2
3
+ size 4227862560
model-00013-of-00061.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a8a958d323eedc75858514d4bf1e03b4f1be3dde553d40704f8a8b5e0d509a4c
3
+ size 4404245144
model-00014-of-00061.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e5ff6b686e2d67764473c62b3212530e2d5276716446da843e77ffa8092f5ada
3
+ size 4404245144
model-00015-of-00061.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0a659155cfb439471ef08e8aceb5dd9010ec1efb8f39e19342e81a9e40a8c9a7
3
+ size 4227862560
model-00016-of-00061.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b630f37522a73766346162b70512a5d21db1ed331eae0a3c3c42cf5bb57dfcca
3
+ size 4404245168
model-00017-of-00061.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8738f9ab6bcc6ff4dce364b48bd6e42d2fb8368b7c479cdc0477a8c892769f9d
3
+ size 4404245184
model-00018-of-00061.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:78dd6843d4539850acc3e2548bbe52726a50b09d05cc58984a0c11b78af6ed42
3
+ size 4227862592
model-00019-of-00061.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:74fcf7768f057fde24b7b2b992f709a771c7c1b283deda188af8a45a7171cf6f
3
+ size 4404245184
model-00020-of-00061.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d4bc51b7850ba7cb99379d71b894a325785ef5d538d09a7c8780f4c4d5be50f
3
+ size 4404245184
model-00021-of-00061.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4691ae48cb49b454351a8a4fed3ca0d0f1fef3a044d9ceb3ae538632ee22e0fb
3
+ size 4227862592
model-00022-of-00061.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a1ede5d2312fa979f629ecd9fe2ff0bfa4620d339afd07d350d773b93c2ee574
3
+ size 4404245184
model-00023-of-00061.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a208b0c2743d0849cb0e8aed71e209a5e7972b63dff4615b0678ef40d77bd992
3
+ size 4404245184
model-00024-of-00061.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f48f04d4622d45afb74f97837453d8f1eee3d897d83d7f936b2a0ea78acb8bbf
3
+ size 4227862592
model-00025-of-00061.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:056e53c0e8f98ed7675a74f37766c60e7402f01d4ec3c89691cef5b345613d74
3
+ size 4404245184
model-00026-of-00061.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bd62fc675e8a92415ed0e60f8e9a25a065679f33b944b3bc8b529c9c4d2b2ea8
3
+ size 4404245184
model-00027-of-00061.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3dfa7c1a7243967bdeed47b58e0a6798bfb6fc6debdce2eed1fe7b6a243d41b0
3
+ size 4227862592
model-00028-of-00061.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d70c1f5c43efc4c9f4041ab48d2390dd275532376eaa5b33774956d08a363ad2
3
+ size 4404245184
model-00029-of-00061.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:778c60430626f6b96e0dacec38b193d49afd4f74e6b38e2a70292d45ec3c7ef0
3
+ size 4404245184
model-00030-of-00061.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:34d8d6d24a74ebcd861bed5709bcb8ade5595734bb8f79d1e72e725bdebbc83a
3
+ size 4227862592
model-00031-of-00061.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fe8d09e32d3b5fd39e1f20c0e3c2aad1a5124331a160f75c9be018a3b03817b5
3
+ size 4404245184
model-00032-of-00061.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2ff026c683b608f73289ca170232eed5b66c04fc2e11c15878e6514a828d2d34
3
+ size 4404245184
model-00033-of-00061.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e52a05f9a898caf58749aa641205ce316eefe3d8654c46b2c06271e27093b51
3
+ size 4227862592
model-00034-of-00061.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2db17c4882119b0c3f28d4f2c5bd7fddee43b74d73928efcfca0bb7ede81a314
3
+ size 4404245184
model-00035-of-00061.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0761a842b153ad1e6e78883cc2fc5a3e24dea543ddfa2c9fc03713d7c68f2e32
3
+ size 4404245184
model-00036-of-00061.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:79a15bc8adf90a283a34904d44df5c8631876651e3cc0323db3dafaef65ac6fd
3
+ size 4227862592
model-00037-of-00061.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:002005c0596b8d0476fa6ab158ff0c9c22b33b76584d1f1857dff21e2bd9f737
3
+ size 4404245184
model-00038-of-00061.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4971108c48eca76aa1020aa225c215026605d7967871b52d28803d8dc435a482
3
+ size 4404245184
model-00039-of-00061.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7bf53ce714b30cf246e5fbfee7c29a8e4a83539439d900984d03254321b9290a
3
+ size 4227862592
model-00040-of-00061.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ea6e5518b2e08f81cbd52d8b0b4da7f77bba02dca96138bf5cb2d2f5bc4db84
3
+ size 4404245184
model-00041-of-00061.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:249d9c556f32ff02ed6cfc7b96c62c0d86ad0cb1b99be9785840b40e9a5c7022
3
+ size 4404245184
model-00042-of-00061.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d244c2975aa94e2827d6bb04854943b27c3c70e115fe8c743a840632c323b0d
3
+ size 4227862592