Text2Text Generation
Transformers
PyTorch
English
switch_transformers
ArthurZ HF staff commited on
Commit
c5ab651
1 Parent(s): 1d42380

add config.json

Browse files
Files changed (2) hide show
  1. .gitattributes +365 -0
  2. config.json +38 -0
.gitattributes CHANGED
@@ -32,3 +32,368 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
32
  *.zip filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  *.zip filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
35
+ pytorch_model-00299-of-00364.bin filter=lfs diff=lfs merge=lfs -text
36
+ pytorch_model-00335-of-00364.bin filter=lfs diff=lfs merge=lfs -text
37
+ pytorch_model-00339-of-00364.bin filter=lfs diff=lfs merge=lfs -text
38
+ pytorch_model-00003-of-00364.bin filter=lfs diff=lfs merge=lfs -text
39
+ pytorch_model-00067-of-00364.bin filter=lfs diff=lfs merge=lfs -text
40
+ pytorch_model-00084-of-00364.bin filter=lfs diff=lfs merge=lfs -text
41
+ pytorch_model-00163-of-00364.bin filter=lfs diff=lfs merge=lfs -text
42
+ pytorch_model-00230-of-00364.bin filter=lfs diff=lfs merge=lfs -text
43
+ pytorch_model-00293-of-00364.bin filter=lfs diff=lfs merge=lfs -text
44
+ pytorch_model-00006-of-00364.bin filter=lfs diff=lfs merge=lfs -text
45
+ pytorch_model-00033-of-00364.bin filter=lfs diff=lfs merge=lfs -text
46
+ pytorch_model-00120-of-00364.bin filter=lfs diff=lfs merge=lfs -text
47
+ pytorch_model-00173-of-00364.bin filter=lfs diff=lfs merge=lfs -text
48
+ pytorch_model-00122-of-00364.bin filter=lfs diff=lfs merge=lfs -text
49
+ pytorch_model-00223-of-00364.bin filter=lfs diff=lfs merge=lfs -text
50
+ pytorch_model-00323-of-00364.bin filter=lfs diff=lfs merge=lfs -text
51
+ pytorch_model-00331-of-00364.bin filter=lfs diff=lfs merge=lfs -text
52
+ pytorch_model-00321-of-00364.bin filter=lfs diff=lfs merge=lfs -text
53
+ pytorch_model-00329-of-00364.bin filter=lfs diff=lfs merge=lfs -text
54
+ pytorch_model-00070-of-00364.bin filter=lfs diff=lfs merge=lfs -text
55
+ pytorch_model-00156-of-00364.bin filter=lfs diff=lfs merge=lfs -text
56
+ pytorch_model-00257-of-00364.bin filter=lfs diff=lfs merge=lfs -text
57
+ pytorch_model-00305-of-00364.bin filter=lfs diff=lfs merge=lfs -text
58
+ pytorch_model-00081-of-00364.bin filter=lfs diff=lfs merge=lfs -text
59
+ pytorch_model-00222-of-00364.bin filter=lfs diff=lfs merge=lfs -text
60
+ pytorch_model-00359-of-00364.bin filter=lfs diff=lfs merge=lfs -text
61
+ pytorch_model-00057-of-00364.bin filter=lfs diff=lfs merge=lfs -text
62
+ pytorch_model-00105-of-00364.bin filter=lfs diff=lfs merge=lfs -text
63
+ pytorch_model-00113-of-00364.bin filter=lfs diff=lfs merge=lfs -text
64
+ pytorch_model-00151-of-00364.bin filter=lfs diff=lfs merge=lfs -text
65
+ pytorch_model-00008-of-00364.bin filter=lfs diff=lfs merge=lfs -text
66
+ pytorch_model-00011-of-00364.bin filter=lfs diff=lfs merge=lfs -text
67
+ pytorch_model-00051-of-00364.bin filter=lfs diff=lfs merge=lfs -text
68
+ pytorch_model-00056-of-00364.bin filter=lfs diff=lfs merge=lfs -text
69
+ pytorch_model-00158-of-00364.bin filter=lfs diff=lfs merge=lfs -text
70
+ pytorch_model-00185-of-00364.bin filter=lfs diff=lfs merge=lfs -text
71
+ pytorch_model-00315-of-00364.bin filter=lfs diff=lfs merge=lfs -text
72
+ pytorch_model-00336-of-00364.bin filter=lfs diff=lfs merge=lfs -text
73
+ pytorch_model-00183-of-00364.bin filter=lfs diff=lfs merge=lfs -text
74
+ pytorch_model-00244-of-00364.bin filter=lfs diff=lfs merge=lfs -text
75
+ pytorch_model-00255-of-00364.bin filter=lfs diff=lfs merge=lfs -text
76
+ pytorch_model-00260-of-00364.bin filter=lfs diff=lfs merge=lfs -text
77
+ pytorch_model-00270-of-00364.bin filter=lfs diff=lfs merge=lfs -text
78
+ pytorch_model-00313-of-00364.bin filter=lfs diff=lfs merge=lfs -text
79
+ pytorch_model-00010-of-00364.bin filter=lfs diff=lfs merge=lfs -text
80
+ pytorch_model-00017-of-00364.bin filter=lfs diff=lfs merge=lfs -text
81
+ pytorch_model-00128-of-00364.bin filter=lfs diff=lfs merge=lfs -text
82
+ pytorch_model-00133-of-00364.bin filter=lfs diff=lfs merge=lfs -text
83
+ pytorch_model-00210-of-00364.bin filter=lfs diff=lfs merge=lfs -text
84
+ pytorch_model-00247-of-00364.bin filter=lfs diff=lfs merge=lfs -text
85
+ pytorch_model-00324-of-00364.bin filter=lfs diff=lfs merge=lfs -text
86
+ pytorch_model-00037-of-00364.bin filter=lfs diff=lfs merge=lfs -text
87
+ pytorch_model-00058-of-00364.bin filter=lfs diff=lfs merge=lfs -text
88
+ pytorch_model-00125-of-00364.bin filter=lfs diff=lfs merge=lfs -text
89
+ pytorch_model-00203-of-00364.bin filter=lfs diff=lfs merge=lfs -text
90
+ pytorch_model-00019-of-00364.bin filter=lfs diff=lfs merge=lfs -text
91
+ pytorch_model-00199-of-00364.bin filter=lfs diff=lfs merge=lfs -text
92
+ pytorch_model-00224-of-00364.bin filter=lfs diff=lfs merge=lfs -text
93
+ pytorch_model-00283-of-00364.bin filter=lfs diff=lfs merge=lfs -text
94
+ pytorch_model-00317-of-00364.bin filter=lfs diff=lfs merge=lfs -text
95
+ pytorch_model-00358-of-00364.bin filter=lfs diff=lfs merge=lfs -text
96
+ pytorch_model-00080-of-00364.bin filter=lfs diff=lfs merge=lfs -text
97
+ pytorch_model-00136-of-00364.bin filter=lfs diff=lfs merge=lfs -text
98
+ pytorch_model-00169-of-00364.bin filter=lfs diff=lfs merge=lfs -text
99
+ pytorch_model-00267-of-00364.bin filter=lfs diff=lfs merge=lfs -text
100
+ pytorch_model-00314-of-00364.bin filter=lfs diff=lfs merge=lfs -text
101
+ pytorch_model-00050-of-00364.bin filter=lfs diff=lfs merge=lfs -text
102
+ pytorch_model-00160-of-00364.bin filter=lfs diff=lfs merge=lfs -text
103
+ pytorch_model-00197-of-00364.bin filter=lfs diff=lfs merge=lfs -text
104
+ pytorch_model-00307-of-00364.bin filter=lfs diff=lfs merge=lfs -text
105
+ pytorch_model-00234-of-00364.bin filter=lfs diff=lfs merge=lfs -text
106
+ pytorch_model-00242-of-00364.bin filter=lfs diff=lfs merge=lfs -text
107
+ pytorch_model-00243-of-00364.bin filter=lfs diff=lfs merge=lfs -text
108
+ pytorch_model-00275-of-00364.bin filter=lfs diff=lfs merge=lfs -text
109
+ pytorch_model-00063-of-00364.bin filter=lfs diff=lfs merge=lfs -text
110
+ pytorch_model-00170-of-00364.bin filter=lfs diff=lfs merge=lfs -text
111
+ pytorch_model-00200-of-00364.bin filter=lfs diff=lfs merge=lfs -text
112
+ pytorch_model-00219-of-00364.bin filter=lfs diff=lfs merge=lfs -text
113
+ pytorch_model-00297-of-00364.bin filter=lfs diff=lfs merge=lfs -text
114
+ pytorch_model-00330-of-00364.bin filter=lfs diff=lfs merge=lfs -text
115
+ pytorch_model-00360-of-00364.bin filter=lfs diff=lfs merge=lfs -text
116
+ pytorch_model-00139-of-00364.bin filter=lfs diff=lfs merge=lfs -text
117
+ pytorch_model-00174-of-00364.bin filter=lfs diff=lfs merge=lfs -text
118
+ pytorch_model-00194-of-00364.bin filter=lfs diff=lfs merge=lfs -text
119
+ pytorch_model-00268-of-00364.bin filter=lfs diff=lfs merge=lfs -text
120
+ pytorch_model-00146-of-00364.bin filter=lfs diff=lfs merge=lfs -text
121
+ pytorch_model-00214-of-00364.bin filter=lfs diff=lfs merge=lfs -text
122
+ pytorch_model-00089-of-00364.bin filter=lfs diff=lfs merge=lfs -text
123
+ pytorch_model-00132-of-00364.bin filter=lfs diff=lfs merge=lfs -text
124
+ pytorch_model-00300-of-00364.bin filter=lfs diff=lfs merge=lfs -text
125
+ pytorch_model-00310-of-00364.bin filter=lfs diff=lfs merge=lfs -text
126
+ pytorch_model-00016-of-00364.bin filter=lfs diff=lfs merge=lfs -text
127
+ pytorch_model-00086-of-00364.bin filter=lfs diff=lfs merge=lfs -text
128
+ pytorch_model-00111-of-00364.bin filter=lfs diff=lfs merge=lfs -text
129
+ pytorch_model-00295-of-00364.bin filter=lfs diff=lfs merge=lfs -text
130
+ pytorch_model-00276-of-00364.bin filter=lfs diff=lfs merge=lfs -text
131
+ pytorch_model-00353-of-00364.bin filter=lfs diff=lfs merge=lfs -text
132
+ pytorch_model-00181-of-00364.bin filter=lfs diff=lfs merge=lfs -text
133
+ pytorch_model-00208-of-00364.bin filter=lfs diff=lfs merge=lfs -text
134
+ pytorch_model-00220-of-00364.bin filter=lfs diff=lfs merge=lfs -text
135
+ pytorch_model-00233-of-00364.bin filter=lfs diff=lfs merge=lfs -text
136
+ pytorch_model-00106-of-00364.bin filter=lfs diff=lfs merge=lfs -text
137
+ pytorch_model-00364-of-00364.bin filter=lfs diff=lfs merge=lfs -text
138
+ pytorch_model-00083-of-00364.bin filter=lfs diff=lfs merge=lfs -text
139
+ pytorch_model-00127-of-00364.bin filter=lfs diff=lfs merge=lfs -text
140
+ pytorch_model-00066-of-00364.bin filter=lfs diff=lfs merge=lfs -text
141
+ pytorch_model-00091-of-00364.bin filter=lfs diff=lfs merge=lfs -text
142
+ pytorch_model-00123-of-00364.bin filter=lfs diff=lfs merge=lfs -text
143
+ pytorch_model-00124-of-00364.bin filter=lfs diff=lfs merge=lfs -text
144
+ pytorch_model-00182-of-00364.bin filter=lfs diff=lfs merge=lfs -text
145
+ pytorch_model-00217-of-00364.bin filter=lfs diff=lfs merge=lfs -text
146
+ pytorch_model-00249-of-00364.bin filter=lfs diff=lfs merge=lfs -text
147
+ pytorch_model-00286-of-00364.bin filter=lfs diff=lfs merge=lfs -text
148
+ pytorch_model-00009-of-00364.bin filter=lfs diff=lfs merge=lfs -text
149
+ pytorch_model-00036-of-00364.bin filter=lfs diff=lfs merge=lfs -text
150
+ pytorch_model-00055-of-00364.bin filter=lfs diff=lfs merge=lfs -text
151
+ pytorch_model-00119-of-00364.bin filter=lfs diff=lfs merge=lfs -text
152
+ pytorch_model-00345-of-00364.bin filter=lfs diff=lfs merge=lfs -text
153
+ pytorch_model-00287-of-00364.bin filter=lfs diff=lfs merge=lfs -text
154
+ pytorch_model-00005-of-00364.bin filter=lfs diff=lfs merge=lfs -text
155
+ pytorch_model-00059-of-00364.bin filter=lfs diff=lfs merge=lfs -text
156
+ pytorch_model-00069-of-00364.bin filter=lfs diff=lfs merge=lfs -text
157
+ pytorch_model-00110-of-00364.bin filter=lfs diff=lfs merge=lfs -text
158
+ pytorch_model-00172-of-00364.bin filter=lfs diff=lfs merge=lfs -text
159
+ pytorch_model-00216-of-00364.bin filter=lfs diff=lfs merge=lfs -text
160
+ pytorch_model-00228-of-00364.bin filter=lfs diff=lfs merge=lfs -text
161
+ pytorch_model-00236-of-00364.bin filter=lfs diff=lfs merge=lfs -text
162
+ pytorch_model-00026-of-00364.bin filter=lfs diff=lfs merge=lfs -text
163
+ pytorch_model-00071-of-00364.bin filter=lfs diff=lfs merge=lfs -text
164
+ pytorch_model-00104-of-00364.bin filter=lfs diff=lfs merge=lfs -text
165
+ pytorch_model-00150-of-00364.bin filter=lfs diff=lfs merge=lfs -text
166
+ pytorch_model-00271-of-00364.bin filter=lfs diff=lfs merge=lfs -text
167
+ pytorch_model-00348-of-00364.bin filter=lfs diff=lfs merge=lfs -text
168
+ pytorch_model-00107-of-00364.bin filter=lfs diff=lfs merge=lfs -text
169
+ pytorch_model-00108-of-00364.bin filter=lfs diff=lfs merge=lfs -text
170
+ pytorch_model-00227-of-00364.bin filter=lfs diff=lfs merge=lfs -text
171
+ pytorch_model-00237-of-00364.bin filter=lfs diff=lfs merge=lfs -text
172
+ pytorch_model-00012-of-00364.bin filter=lfs diff=lfs merge=lfs -text
173
+ pytorch_model-00015-of-00364.bin filter=lfs diff=lfs merge=lfs -text
174
+ pytorch_model-00043-of-00364.bin filter=lfs diff=lfs merge=lfs -text
175
+ pytorch_model-00088-of-00364.bin filter=lfs diff=lfs merge=lfs -text
176
+ pytorch_model-00292-of-00364.bin filter=lfs diff=lfs merge=lfs -text
177
+ pytorch_model-00347-of-00364.bin filter=lfs diff=lfs merge=lfs -text
178
+ pytorch_model-00274-of-00364.bin filter=lfs diff=lfs merge=lfs -text
179
+ pytorch_model-00018-of-00364.bin filter=lfs diff=lfs merge=lfs -text
180
+ pytorch_model-00072-of-00364.bin filter=lfs diff=lfs merge=lfs -text
181
+ pytorch_model-00101-of-00364.bin filter=lfs diff=lfs merge=lfs -text
182
+ pytorch_model-00238-of-00364.bin filter=lfs diff=lfs merge=lfs -text
183
+ pytorch_model-00129-of-00364.bin filter=lfs diff=lfs merge=lfs -text
184
+ pytorch_model-00140-of-00364.bin filter=lfs diff=lfs merge=lfs -text
185
+ pytorch_model-00161-of-00364.bin filter=lfs diff=lfs merge=lfs -text
186
+ pytorch_model-00316-of-00364.bin filter=lfs diff=lfs merge=lfs -text
187
+ pytorch_model-00014-of-00364.bin filter=lfs diff=lfs merge=lfs -text
188
+ pytorch_model-00025-of-00364.bin filter=lfs diff=lfs merge=lfs -text
189
+ pytorch_model-00053-of-00364.bin filter=lfs diff=lfs merge=lfs -text
190
+ pytorch_model-00075-of-00364.bin filter=lfs diff=lfs merge=lfs -text
191
+ pytorch_model-00320-of-00364.bin filter=lfs diff=lfs merge=lfs -text
192
+ pytorch_model-00354-of-00364.bin filter=lfs diff=lfs merge=lfs -text
193
+ pytorch_model-00116-of-00364.bin filter=lfs diff=lfs merge=lfs -text
194
+ pytorch_model-00117-of-00364.bin filter=lfs diff=lfs merge=lfs -text
195
+ pytorch_model-00328-of-00364.bin filter=lfs diff=lfs merge=lfs -text
196
+ pytorch_model-00342-of-00364.bin filter=lfs diff=lfs merge=lfs -text
197
+ pytorch_model-00261-of-00364.bin filter=lfs diff=lfs merge=lfs -text
198
+ pytorch_model-00046-of-00364.bin filter=lfs diff=lfs merge=lfs -text
199
+ pytorch_model-00145-of-00364.bin filter=lfs diff=lfs merge=lfs -text
200
+ pytorch_model-00188-of-00364.bin filter=lfs diff=lfs merge=lfs -text
201
+ pytorch_model-00212-of-00364.bin filter=lfs diff=lfs merge=lfs -text
202
+ pytorch_model-00147-of-00364.bin filter=lfs diff=lfs merge=lfs -text
203
+ pytorch_model-00285-of-00364.bin filter=lfs diff=lfs merge=lfs -text
204
+ pytorch_model-00319-of-00364.bin filter=lfs diff=lfs merge=lfs -text
205
+ pytorch_model-00337-of-00364.bin filter=lfs diff=lfs merge=lfs -text
206
+ pytorch_model-00027-of-00364.bin filter=lfs diff=lfs merge=lfs -text
207
+ pytorch_model-00034-of-00364.bin filter=lfs diff=lfs merge=lfs -text
208
+ pytorch_model-00103-of-00364.bin filter=lfs diff=lfs merge=lfs -text
209
+ pytorch_model-00115-of-00364.bin filter=lfs diff=lfs merge=lfs -text
210
+ pytorch_model-00207-of-00364.bin filter=lfs diff=lfs merge=lfs -text
211
+ pytorch_model-00298-of-00364.bin filter=lfs diff=lfs merge=lfs -text
212
+ pytorch_model-00341-of-00364.bin filter=lfs diff=lfs merge=lfs -text
213
+ pytorch_model-00350-of-00364.bin filter=lfs diff=lfs merge=lfs -text
214
+ pytorch_model-00077-of-00364.bin filter=lfs diff=lfs merge=lfs -text
215
+ pytorch_model-00109-of-00364.bin filter=lfs diff=lfs merge=lfs -text
216
+ pytorch_model-00175-of-00364.bin filter=lfs diff=lfs merge=lfs -text
217
+ pytorch_model-00178-of-00364.bin filter=lfs diff=lfs merge=lfs -text
218
+ pytorch_model-00239-of-00364.bin filter=lfs diff=lfs merge=lfs -text
219
+ pytorch_model-00245-of-00364.bin filter=lfs diff=lfs merge=lfs -text
220
+ pytorch_model-00250-of-00364.bin filter=lfs diff=lfs merge=lfs -text
221
+ pytorch_model-00334-of-00364.bin filter=lfs diff=lfs merge=lfs -text
222
+ pytorch_model-00023-of-00364.bin filter=lfs diff=lfs merge=lfs -text
223
+ pytorch_model-00065-of-00364.bin filter=lfs diff=lfs merge=lfs -text
224
+ pytorch_model-00099-of-00364.bin filter=lfs diff=lfs merge=lfs -text
225
+ pytorch_model-00198-of-00364.bin filter=lfs diff=lfs merge=lfs -text
226
+ pytorch_model-00356-of-00364.bin filter=lfs diff=lfs merge=lfs -text
227
+ pytorch_model-00187-of-00364.bin filter=lfs diff=lfs merge=lfs -text
228
+ pytorch_model-00211-of-00364.bin filter=lfs diff=lfs merge=lfs -text
229
+ pytorch_model-00253-of-00364.bin filter=lfs diff=lfs merge=lfs -text
230
+ pytorch_model-00013-of-00364.bin filter=lfs diff=lfs merge=lfs -text
231
+ pytorch_model-00138-of-00364.bin filter=lfs diff=lfs merge=lfs -text
232
+ pytorch_model-00143-of-00364.bin filter=lfs diff=lfs merge=lfs -text
233
+ pytorch_model-00152-of-00364.bin filter=lfs diff=lfs merge=lfs -text
234
+ pytorch_model-00322-of-00364.bin filter=lfs diff=lfs merge=lfs -text
235
+ pytorch_model-00351-of-00364.bin filter=lfs diff=lfs merge=lfs -text
236
+ pytorch_model-00052-of-00364.bin filter=lfs diff=lfs merge=lfs -text
237
+ pytorch_model-00154-of-00364.bin filter=lfs diff=lfs merge=lfs -text
238
+ pytorch_model-00155-of-00364.bin filter=lfs diff=lfs merge=lfs -text
239
+ pytorch_model-00246-of-00364.bin filter=lfs diff=lfs merge=lfs -text
240
+ pytorch_model-00290-of-00364.bin filter=lfs diff=lfs merge=lfs -text
241
+ pytorch_model-00064-of-00364.bin filter=lfs diff=lfs merge=lfs -text
242
+ pytorch_model-00098-of-00364.bin filter=lfs diff=lfs merge=lfs -text
243
+ pytorch_model-00162-of-00364.bin filter=lfs diff=lfs merge=lfs -text
244
+ pytorch_model-00265-of-00364.bin filter=lfs diff=lfs merge=lfs -text
245
+ pytorch_model-00177-of-00364.bin filter=lfs diff=lfs merge=lfs -text
246
+ pytorch_model-00232-of-00364.bin filter=lfs diff=lfs merge=lfs -text
247
+ pytorch_model-00038-of-00364.bin filter=lfs diff=lfs merge=lfs -text
248
+ pytorch_model-00048-of-00364.bin filter=lfs diff=lfs merge=lfs -text
249
+ pytorch_model-00141-of-00364.bin filter=lfs diff=lfs merge=lfs -text
250
+ pytorch_model-00166-of-00364.bin filter=lfs diff=lfs merge=lfs -text
251
+ pytorch_model-00193-of-00364.bin filter=lfs diff=lfs merge=lfs -text
252
+ pytorch_model-00263-of-00364.bin filter=lfs diff=lfs merge=lfs -text
253
+ pytorch_model-00273-of-00364.bin filter=lfs diff=lfs merge=lfs -text
254
+ pytorch_model-00045-of-00364.bin filter=lfs diff=lfs merge=lfs -text
255
+ pytorch_model-00094-of-00364.bin filter=lfs diff=lfs merge=lfs -text
256
+ pytorch_model-00135-of-00364.bin filter=lfs diff=lfs merge=lfs -text
257
+ pytorch_model-00167-of-00364.bin filter=lfs diff=lfs merge=lfs -text
258
+ pytorch_model-00191-of-00364.bin filter=lfs diff=lfs merge=lfs -text
259
+ pytorch_model-00231-of-00364.bin filter=lfs diff=lfs merge=lfs -text
260
+ pytorch_model-00264-of-00364.bin filter=lfs diff=lfs merge=lfs -text
261
+ pytorch_model-00277-of-00364.bin filter=lfs diff=lfs merge=lfs -text
262
+ pytorch_model-00022-of-00364.bin filter=lfs diff=lfs merge=lfs -text
263
+ pytorch_model-00074-of-00364.bin filter=lfs diff=lfs merge=lfs -text
264
+ pytorch_model-00078-of-00364.bin filter=lfs diff=lfs merge=lfs -text
265
+ pytorch_model-00114-of-00364.bin filter=lfs diff=lfs merge=lfs -text
266
+ pytorch_model-00280-of-00364.bin filter=lfs diff=lfs merge=lfs -text
267
+ pytorch_model-00343-of-00364.bin filter=lfs diff=lfs merge=lfs -text
268
+ pytorch_model-00304-of-00364.bin filter=lfs diff=lfs merge=lfs -text
269
+ pytorch_model-00126-of-00364.bin filter=lfs diff=lfs merge=lfs -text
270
+ pytorch_model-00149-of-00364.bin filter=lfs diff=lfs merge=lfs -text
271
+ pytorch_model-00184-of-00364.bin filter=lfs diff=lfs merge=lfs -text
272
+ pytorch_model-00240-of-00364.bin filter=lfs diff=lfs merge=lfs -text
273
+ pytorch_model-00049-of-00364.bin filter=lfs diff=lfs merge=lfs -text
274
+ pytorch_model-00168-of-00364.bin filter=lfs diff=lfs merge=lfs -text
275
+ pytorch_model-00266-of-00364.bin filter=lfs diff=lfs merge=lfs -text
276
+ pytorch_model-00294-of-00364.bin filter=lfs diff=lfs merge=lfs -text
277
+ pytorch_model-00308-of-00364.bin filter=lfs diff=lfs merge=lfs -text
278
+ pytorch_model-00352-of-00364.bin filter=lfs diff=lfs merge=lfs -text
279
+ pytorch_model-00079-of-00364.bin filter=lfs diff=lfs merge=lfs -text
280
+ pytorch_model-00090-of-00364.bin filter=lfs diff=lfs merge=lfs -text
281
+ pytorch_model-00153-of-00364.bin filter=lfs diff=lfs merge=lfs -text
282
+ pytorch_model-00225-of-00364.bin filter=lfs diff=lfs merge=lfs -text
283
+ pytorch_model-00355-of-00364.bin filter=lfs diff=lfs merge=lfs -text
284
+ pytorch_model-00340-of-00364.bin filter=lfs diff=lfs merge=lfs -text
285
+ pytorch_model-00007-of-00364.bin filter=lfs diff=lfs merge=lfs -text
286
+ pytorch_model-00040-of-00364.bin filter=lfs diff=lfs merge=lfs -text
287
+ pytorch_model-00042-of-00364.bin filter=lfs diff=lfs merge=lfs -text
288
+ pytorch_model-00112-of-00364.bin filter=lfs diff=lfs merge=lfs -text
289
+ pytorch_model-00148-of-00364.bin filter=lfs diff=lfs merge=lfs -text
290
+ pytorch_model-00262-of-00364.bin filter=lfs diff=lfs merge=lfs -text
291
+ pytorch_model-00309-of-00364.bin filter=lfs diff=lfs merge=lfs -text
292
+ pytorch_model-00303-of-00364.bin filter=lfs diff=lfs merge=lfs -text
293
+ pytorch_model-00028-of-00364.bin filter=lfs diff=lfs merge=lfs -text
294
+ pytorch_model-00092-of-00364.bin filter=lfs diff=lfs merge=lfs -text
295
+ pytorch_model-00102-of-00364.bin filter=lfs diff=lfs merge=lfs -text
296
+ pytorch_model-00165-of-00364.bin filter=lfs diff=lfs merge=lfs -text
297
+ pytorch_model-00054-of-00364.bin filter=lfs diff=lfs merge=lfs -text
298
+ pytorch_model-00190-of-00364.bin filter=lfs diff=lfs merge=lfs -text
299
+ pytorch_model-00325-of-00364.bin filter=lfs diff=lfs merge=lfs -text
300
+ pytorch_model-00118-of-00364.bin filter=lfs diff=lfs merge=lfs -text
301
+ pytorch_model-00144-of-00364.bin filter=lfs diff=lfs merge=lfs -text
302
+ pytorch_model-00279-of-00364.bin filter=lfs diff=lfs merge=lfs -text
303
+ pytorch_model-00301-of-00364.bin filter=lfs diff=lfs merge=lfs -text
304
+ pytorch_model-00134-of-00364.bin filter=lfs diff=lfs merge=lfs -text
305
+ pytorch_model-00218-of-00364.bin filter=lfs diff=lfs merge=lfs -text
306
+ pytorch_model-00327-of-00364.bin filter=lfs diff=lfs merge=lfs -text
307
+ pytorch_model-00332-of-00364.bin filter=lfs diff=lfs merge=lfs -text
308
+ pytorch_model-00002-of-00364.bin filter=lfs diff=lfs merge=lfs -text
309
+ pytorch_model-00004-of-00364.bin filter=lfs diff=lfs merge=lfs -text
310
+ pytorch_model-00085-of-00364.bin filter=lfs diff=lfs merge=lfs -text
311
+ pytorch_model-00095-of-00364.bin filter=lfs diff=lfs merge=lfs -text
312
+ pytorch_model-00362-of-00364.bin filter=lfs diff=lfs merge=lfs -text
313
+ pytorch_model-00209-of-00364.bin filter=lfs diff=lfs merge=lfs -text
314
+ pytorch_model-00256-of-00364.bin filter=lfs diff=lfs merge=lfs -text
315
+ pytorch_model-00281-of-00364.bin filter=lfs diff=lfs merge=lfs -text
316
+ pytorch_model-00349-of-00364.bin filter=lfs diff=lfs merge=lfs -text
317
+ pytorch_model-00030-of-00364.bin filter=lfs diff=lfs merge=lfs -text
318
+ pytorch_model-00035-of-00364.bin filter=lfs diff=lfs merge=lfs -text
319
+ pytorch_model-00039-of-00364.bin filter=lfs diff=lfs merge=lfs -text
320
+ pytorch_model-00192-of-00364.bin filter=lfs diff=lfs merge=lfs -text
321
+ pytorch_model-00312-of-00364.bin filter=lfs diff=lfs merge=lfs -text
322
+ pytorch_model-00338-of-00364.bin filter=lfs diff=lfs merge=lfs -text
323
+ pytorch_model-00189-of-00364.bin filter=lfs diff=lfs merge=lfs -text
324
+ pytorch_model-00204-of-00364.bin filter=lfs diff=lfs merge=lfs -text
325
+ pytorch_model-00213-of-00364.bin filter=lfs diff=lfs merge=lfs -text
326
+ pytorch_model-00306-of-00364.bin filter=lfs diff=lfs merge=lfs -text
327
+ pytorch_model-00248-of-00364.bin filter=lfs diff=lfs merge=lfs -text
328
+ pytorch_model-00272-of-00364.bin filter=lfs diff=lfs merge=lfs -text
329
+ pytorch_model-00044-of-00364.bin filter=lfs diff=lfs merge=lfs -text
330
+ pytorch_model-00137-of-00364.bin filter=lfs diff=lfs merge=lfs -text
331
+ pytorch_model-00201-of-00364.bin filter=lfs diff=lfs merge=lfs -text
332
+ pytorch_model-00205-of-00364.bin filter=lfs diff=lfs merge=lfs -text
333
+ pytorch_model-00229-of-00364.bin filter=lfs diff=lfs merge=lfs -text
334
+ pytorch_model-00259-of-00364.bin filter=lfs diff=lfs merge=lfs -text
335
+ pytorch_model-00269-of-00364.bin filter=lfs diff=lfs merge=lfs -text
336
+ pytorch_model-00093-of-00364.bin filter=lfs diff=lfs merge=lfs -text
337
+ pytorch_model-00100-of-00364.bin filter=lfs diff=lfs merge=lfs -text
338
+ pytorch_model-00159-of-00364.bin filter=lfs diff=lfs merge=lfs -text
339
+ pytorch_model-00196-of-00364.bin filter=lfs diff=lfs merge=lfs -text
340
+ pytorch_model-00357-of-00364.bin filter=lfs diff=lfs merge=lfs -text
341
+ pytorch_model-00020-of-00364.bin filter=lfs diff=lfs merge=lfs -text
342
+ pytorch_model-00061-of-00364.bin filter=lfs diff=lfs merge=lfs -text
343
+ pytorch_model-00087-of-00364.bin filter=lfs diff=lfs merge=lfs -text
344
+ pytorch_model-00288-of-00364.bin filter=lfs diff=lfs merge=lfs -text
345
+ pytorch_model-00289-of-00364.bin filter=lfs diff=lfs merge=lfs -text
346
+ pytorch_model-00346-of-00364.bin filter=lfs diff=lfs merge=lfs -text
347
+ pytorch_model-00060-of-00364.bin filter=lfs diff=lfs merge=lfs -text
348
+ pytorch_model-00131-of-00364.bin filter=lfs diff=lfs merge=lfs -text
349
+ pytorch_model-00171-of-00364.bin filter=lfs diff=lfs merge=lfs -text
350
+ pytorch_model-00226-of-00364.bin filter=lfs diff=lfs merge=lfs -text
351
+ pytorch_model-00326-of-00364.bin filter=lfs diff=lfs merge=lfs -text
352
+ pytorch_model-00062-of-00364.bin filter=lfs diff=lfs merge=lfs -text
353
+ pytorch_model-00235-of-00364.bin filter=lfs diff=lfs merge=lfs -text
354
+ pytorch_model-00282-of-00364.bin filter=lfs diff=lfs merge=lfs -text
355
+ pytorch_model-00296-of-00364.bin filter=lfs diff=lfs merge=lfs -text
356
+ pytorch_model-00032-of-00364.bin filter=lfs diff=lfs merge=lfs -text
357
+ pytorch_model-00215-of-00364.bin filter=lfs diff=lfs merge=lfs -text
358
+ pytorch_model-00363-of-00364.bin filter=lfs diff=lfs merge=lfs -text
359
+ pytorch_model-00202-of-00364.bin filter=lfs diff=lfs merge=lfs -text
360
+ pytorch_model-00221-of-00364.bin filter=lfs diff=lfs merge=lfs -text
361
+ pytorch_model-00284-of-00364.bin filter=lfs diff=lfs merge=lfs -text
362
+ pytorch_model-00311-of-00364.bin filter=lfs diff=lfs merge=lfs -text
363
+ pytorch_model-00047-of-00364.bin filter=lfs diff=lfs merge=lfs -text
364
+ pytorch_model-00157-of-00364.bin filter=lfs diff=lfs merge=lfs -text
365
+ pytorch_model-00164-of-00364.bin filter=lfs diff=lfs merge=lfs -text
366
+ pytorch_model-00176-of-00364.bin filter=lfs diff=lfs merge=lfs -text
367
+ pytorch_model-00361-of-00364.bin filter=lfs diff=lfs merge=lfs -text
368
+ pytorch_model-00302-of-00364.bin filter=lfs diff=lfs merge=lfs -text
369
+ pytorch_model-00180-of-00364.bin filter=lfs diff=lfs merge=lfs -text
370
+ pytorch_model-00206-of-00364.bin filter=lfs diff=lfs merge=lfs -text
371
+ pytorch_model-00241-of-00364.bin filter=lfs diff=lfs merge=lfs -text
372
+ pytorch_model-00251-of-00364.bin filter=lfs diff=lfs merge=lfs -text
373
+ pytorch_model-00031-of-00364.bin filter=lfs diff=lfs merge=lfs -text
374
+ pytorch_model-00068-of-00364.bin filter=lfs diff=lfs merge=lfs -text
375
+ pytorch_model-00179-of-00364.bin filter=lfs diff=lfs merge=lfs -text
376
+ pytorch_model-00344-of-00364.bin filter=lfs diff=lfs merge=lfs -text
377
+ pytorch_model-00278-of-00364.bin filter=lfs diff=lfs merge=lfs -text
378
+ pytorch_model-00096-of-00364.bin filter=lfs diff=lfs merge=lfs -text
379
+ pytorch_model-00121-of-00364.bin filter=lfs diff=lfs merge=lfs -text
380
+ pytorch_model-00142-of-00364.bin filter=lfs diff=lfs merge=lfs -text
381
+ pytorch_model-00258-of-00364.bin filter=lfs diff=lfs merge=lfs -text
382
+ pytorch_model-00041-of-00364.bin filter=lfs diff=lfs merge=lfs -text
383
+ pytorch_model-00186-of-00364.bin filter=lfs diff=lfs merge=lfs -text
384
+ pytorch_model-00333-of-00364.bin filter=lfs diff=lfs merge=lfs -text
385
+ pytorch_model-00001-of-00364.bin filter=lfs diff=lfs merge=lfs -text
386
+ pytorch_model-00021-of-00364.bin filter=lfs diff=lfs merge=lfs -text
387
+ pytorch_model-00024-of-00364.bin filter=lfs diff=lfs merge=lfs -text
388
+ pytorch_model-00029-of-00364.bin filter=lfs diff=lfs merge=lfs -text
389
+ pytorch_model-00073-of-00364.bin filter=lfs diff=lfs merge=lfs -text
390
+ pytorch_model-00097-of-00364.bin filter=lfs diff=lfs merge=lfs -text
391
+ pytorch_model-00291-of-00364.bin filter=lfs diff=lfs merge=lfs -text
392
+ pytorch_model-00195-of-00364.bin filter=lfs diff=lfs merge=lfs -text
393
+ pytorch_model-00252-of-00364.bin filter=lfs diff=lfs merge=lfs -text
394
+ pytorch_model-00318-of-00364.bin filter=lfs diff=lfs merge=lfs -text
395
+ pytorch_model-00076-of-00364.bin filter=lfs diff=lfs merge=lfs -text
396
+ pytorch_model-00082-of-00364.bin filter=lfs diff=lfs merge=lfs -text
397
+ pytorch_model-00130-of-00364.bin filter=lfs diff=lfs merge=lfs -text
398
+ pytorch_model-00254-of-00364.bin filter=lfs diff=lfs merge=lfs -text
399
+ pytorch_model.bin.index.json filter=lfs diff=lfs merge=lfs -text
config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_router_probs": false,
3
+ "d_ff": 6144,
4
+ "d_kv": 64,
5
+ "d_model": 2080,
6
+ "decoder_sparse_step": 0,
7
+ "dense_act_fn": "relu",
8
+ "dropout_rate": 0.1,
9
+ "encoder_sparse_step": 1,
10
+ "eos_token_id": 1,
11
+ "expert_capacity": 64,
12
+ "feed_forward_proj": "relu",
13
+ "initializer_factor": 1.0,
14
+ "is_encoder_decoder": true,
15
+ "is_full_sparse": true,
16
+ "is_gated_act": false,
17
+ "layer_norm_epsilon": 1e-06,
18
+ "model_type": "switch_transformers",
19
+ "num_decoder_layers": 12,
20
+ "num_experts": 2048,
21
+ "num_heads": 30,
22
+ "num_layers": 15,
23
+ "num_sparse_decoder_layers": 15,
24
+ "num_sparse_encoder_layers": 15,
25
+ "pad_token_id": 0,
26
+ "relative_attention_max_distance": 128,
27
+ "relative_attention_num_buckets": 32,
28
+ "router_aux_loss_coef": 0.001,
29
+ "router_bias": false,
30
+ "router_dtype": "float32",
31
+ "router_ignore_padding_tokens": false,
32
+ "router_jitter_noise": 0.01,
33
+ "router_type": "tokens_masked",
34
+ "router_z_loss_coef": 0.001,
35
+ "transformers_version": "4.25.0.dev0",
36
+ "use_cache": true,
37
+ "vocab_size": 32128
38
+ }