Add new SentenceTransformer model.
Browse files- 1_Pooling/config.json +10 -0
- README.md +633 -0
- config.json +36 -0
- config_sentence_transformers.json +10 -0
- model.safetensors +3 -0
- modules.json +14 -0
- sentence_bert_config.json +4 -0
- special_tokens_map.json +37 -0
- tokenizer.json +0 -0
- tokenizer_config.json +57 -0
- vocab.txt +0 -0
1_Pooling/config.json
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"word_embedding_dimension": 512,
|
3 |
+
"pooling_mode_cls_token": false,
|
4 |
+
"pooling_mode_mean_tokens": true,
|
5 |
+
"pooling_mode_max_tokens": false,
|
6 |
+
"pooling_mode_mean_sqrt_len_tokens": false,
|
7 |
+
"pooling_mode_weightedmean_tokens": false,
|
8 |
+
"pooling_mode_lasttoken": false,
|
9 |
+
"include_prompt": true
|
10 |
+
}
|
README.md
ADDED
@@ -0,0 +1,633 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
language:
|
3 |
+
- en
|
4 |
+
tags:
|
5 |
+
- sentence-transformers
|
6 |
+
- sentence-similarity
|
7 |
+
- feature-extraction
|
8 |
+
- generated_from_trainer
|
9 |
+
- dataset_size:463
|
10 |
+
- loss:MultipleNegativesRankingLoss
|
11 |
+
base_model: jinaai/jina-embeddings-v2-small-en
|
12 |
+
widget:
|
13 |
+
- source_sentence: '
|
14 |
+
|
15 |
+
In cases where an individual rescues another person from getting hit by a car
|
16 |
+
by pushing that person out of the way, causing the person''s luxury kimono to
|
17 |
+
get dirty, the rescuer does not have to compensate damages for the kimono.
|
18 |
+
|
19 |
+
'
|
20 |
+
sentences:
|
21 |
+
- '
|
22 |
+
|
23 |
+
Article 465-2
|
24 |
+
|
25 |
+
(1) A guarantor to a guarantee contract under which the principal obligation is
|
26 |
+
one or more unidentified obligations within a certain specified scope (hereinafter
|
27 |
+
referred to as a "contract for revolving guarantee") and the guarantor is not
|
28 |
+
a corporation (hereinafter referred to as a "contract for revolving guarantee
|
29 |
+
by an individual") is liable to perform the obligation in terms of the amounts
|
30 |
+
of the principal of the principal obligation, any interest, penalty and compensation
|
31 |
+
for loss or damage in connection with the principal obligation, and all the other
|
32 |
+
charges secondary to the obligation, as well as the amount of any penalty or compensation
|
33 |
+
for loss or damage which is agreed- upon on with regard to the guarantee obligation,
|
34 |
+
up to a certain maximum amount which pertains to all of these amounts.
|
35 |
+
|
36 |
+
(2) A contract for revolving guarantee by an individual does not become effective
|
37 |
+
unless it provides for the maximum amount prescribed in the preceding paragraph.
|
38 |
+
|
39 |
+
(3) The provisions of Article 446, paragraphs (2) and (3) apply mutatis mutandis
|
40 |
+
to the provisions concerning a maximum amount prescribed in paragraph (1) in a
|
41 |
+
contract for revolving guarantee by an individual..
|
42 |
+
|
43 |
+
'
|
44 |
+
- '
|
45 |
+
|
46 |
+
Article 449
|
47 |
+
|
48 |
+
If a guarantor that guarantees an obligation which may be voidable due to the
|
49 |
+
principal obligor''s limited capacity to act, is aware, at the time of entering
|
50 |
+
into a guarantee contract, of the cause for its voidability, that guarantor is
|
51 |
+
presumed to have assumed an independent obligation of the same subject matter
|
52 |
+
in the event of non-performance by the principal obligor or rescission of the
|
53 |
+
obligation..
|
54 |
+
|
55 |
+
'
|
56 |
+
- '
|
57 |
+
|
58 |
+
Article 698
|
59 |
+
|
60 |
+
If a manager engages in benevolent intervention in another''s business in order
|
61 |
+
to allow a principal to escape imminent danger to the principal''s person, reputation,
|
62 |
+
or property, the manager is not liable to compensate for damage resulting from
|
63 |
+
this unless the manager has acted in bad faith or with gross negligence..
|
64 |
+
|
65 |
+
'
|
66 |
+
- source_sentence: '
|
67 |
+
|
68 |
+
No contract of loan for use shall be effective unless it is made in writing.
|
69 |
+
|
70 |
+
'
|
71 |
+
sentences:
|
72 |
+
- '
|
73 |
+
|
74 |
+
Article 266
|
75 |
+
|
76 |
+
(1) The provisions of Articles 274 through 276 apply mutatis mutandis if the superficiary
|
77 |
+
must pay periodical rent to the owners of the land.
|
78 |
+
|
79 |
+
(2) Beyond what is provided for in the preceding paragraph, provisions on leasehold
|
80 |
+
apply mutatis mutandis to rent to the extent that application is not inconsistent
|
81 |
+
with the nature of the same.
|
82 |
+
|
83 |
+
Article 276
|
84 |
+
|
85 |
+
If a farming right holder fails to pay the rent for two or more consecutive years,
|
86 |
+
the landowner may demand the extinction of the farming right..
|
87 |
+
|
88 |
+
'
|
89 |
+
- '
|
90 |
+
|
91 |
+
Article 169
|
92 |
+
|
93 |
+
(1) The period of prescription of a right determined by a final and binding judgment
|
94 |
+
or anything that has the same effect as a final and binding judgment is 10 years
|
95 |
+
even if a period of prescription shorter than 10 years is provided for.
|
96 |
+
|
97 |
+
(2) The provisions of the preceding paragraph do not apply to a claim which is
|
98 |
+
not yet due and payable at the time when it is determined..
|
99 |
+
|
100 |
+
'
|
101 |
+
- '
|
102 |
+
|
103 |
+
Article 593
|
104 |
+
|
105 |
+
A loan for use becomes effective if one of the parties promises to deliver a certain
|
106 |
+
thing, and the other party promises to return the thing when the contract is terminated
|
107 |
+
after the other party gratuitously uses and makes profit of the borrowed thing..
|
108 |
+
|
109 |
+
'
|
110 |
+
- source_sentence: '
|
111 |
+
|
112 |
+
In the case where, after the adjudication of disappearance had been made with
|
113 |
+
respect to A and A had been deemed to have died because it had not been clear
|
114 |
+
whether A had been dead or alive for 7 years, A was found to be alive, but the
|
115 |
+
adjudication of disappearance was not rescinded and A died, the adjudication of
|
116 |
+
disappearance may no longer be rescinded.
|
117 |
+
|
118 |
+
'
|
119 |
+
sentences:
|
120 |
+
- '
|
121 |
+
|
122 |
+
Article 568
|
123 |
+
|
124 |
+
(1) The successful bidder at an auction based on the provisions of the Civil Execution
|
125 |
+
Act and other laws (hereinafter referred to as an "auction" in this Article) may
|
126 |
+
cancel the contract or demand a reduction of the price against the obligor pursuant
|
127 |
+
to the provisions of Articles 541 and 542 and the provisions of Article 563
|
128 |
+
|
129 |
+
(including as applied mutatis mutandis pursuant to Article 565).
|
130 |
+
|
131 |
+
(2) In the cases referred to in the preceding paragraph, if the obligor is insolvent,
|
132 |
+
the successful bidder may demand total or partial reimbursement of the proceeds
|
133 |
+
against the obligees that received the distribution of the proceeds.
|
134 |
+
|
135 |
+
(3) In the cases set forth in the preceding two paragraphs, if obligors knew of
|
136 |
+
the absence of the object or right and did not disclose the same, or if obligees
|
137 |
+
knew of the absence but demanded an auction, the successful bidder may claim compensation
|
138 |
+
for loss or damage against those persons.
|
139 |
+
|
140 |
+
(4) The provisions of the preceding three paragraphs do not apply to the non-conformity
|
141 |
+
with respect to the kind or quality of the subject matter of an auction..
|
142 |
+
|
143 |
+
'
|
144 |
+
- '
|
145 |
+
|
146 |
+
Article 166
|
147 |
+
|
148 |
+
(1) A claim is extinguished by prescription in the following cases:
|
149 |
+
|
150 |
+
(i) if the obligee does not exercise the right within five years from the time
|
151 |
+
when the obligee came to know that it was exercisable; or
|
152 |
+
|
153 |
+
(ii) if the obligee does not exercise the right within 10 years from the time
|
154 |
+
when it became exercisable.
|
155 |
+
|
156 |
+
(2) A claim or property right other than ownership is extinguished by prescription
|
157 |
+
if not exercised within 20 years from the time when the right became exercisable.
|
158 |
+
|
159 |
+
(3) The provisions of the preceding two paragraphs do not preclude the commencement
|
160 |
+
of acquisitive prescription for the benefit of a third party that possesses the
|
161 |
+
subject matter of a right with a time of commencement or a right subject to a
|
162 |
+
condition precedent, at the time of commencing the possession;provided, however,
|
163 |
+
that the holder of the right may demand acknowledgment from the possessor at any
|
164 |
+
time to renew the prescription period.
|
165 |
+
|
166 |
+
Article 412
|
167 |
+
|
168 |
+
(1) If a fixed due date is assigned to the performance of an obligation, the obligor
|
169 |
+
is liable for delay from the time that due date arrives.
|
170 |
+
|
171 |
+
(2) If an uncertain due date is assigned to the performance of an obligation,
|
172 |
+
the obligor is liable for delay from the time when the obligor receives the request
|
173 |
+
for performance after the due date arrives or the time when the obligor becomes
|
174 |
+
aware of the arrival of that due date, whichever comes earlier.
|
175 |
+
|
176 |
+
(3) If no time limit is assigned to the performance of an obligation, the obligor
|
177 |
+
is liable for delay from the time the obligor receives the request for performance..
|
178 |
+
|
179 |
+
'
|
180 |
+
- '
|
181 |
+
|
182 |
+
Article 32
|
183 |
+
|
184 |
+
(1) Having received proof that a missing person is alive or that a missing person
|
185 |
+
died at a time different from the time set forth in the preceding Article, the
|
186 |
+
family court, at the request of the missing person or an interested person, must
|
187 |
+
rescind the declaration of that person''s disappearance.In this case, the rescission
|
188 |
+
does not affect the validity of any act performed in good faith after the declaration
|
189 |
+
of disappearance but before the rescission thereof.
|
190 |
+
|
191 |
+
(2) A person who has acquired property due to a declaration of disappearance loses
|
192 |
+
the rights in question due to its rescission;provided, however, that the person
|
193 |
+
has the obligation to return that property only to the extent currently enriched..
|
194 |
+
|
195 |
+
'
|
196 |
+
- source_sentence: '
|
197 |
+
|
198 |
+
A holder of statutory lien for the sales of the immovable property who has registration
|
199 |
+
of such statutory lien may exercise statutory lien prior to mortgages, regardless
|
200 |
+
of the chronological order of the registraiton.
|
201 |
+
|
202 |
+
'
|
203 |
+
sentences:
|
204 |
+
- '
|
205 |
+
|
206 |
+
Article 339
|
207 |
+
|
208 |
+
Statutory liens registered in accordance with the provisions of the preceding
|
209 |
+
two Articles may be exercised prior to mortgages.
|
210 |
+
|
211 |
+
Article 341
|
212 |
+
|
213 |
+
Beyond what is provided for in this Section, the provisions regarding mortgages
|
214 |
+
apply mutatis mutandis to the effects of statutory liens, provided that it is
|
215 |
+
not inconsistent with the nature of the same..
|
216 |
+
|
217 |
+
'
|
218 |
+
- '
|
219 |
+
|
220 |
+
Article 177
|
221 |
+
|
222 |
+
Acquisitions of, losses of and changes in real rights on immovables may not be
|
223 |
+
duly asserted against any third parties, unless the same are registered pursuant
|
224 |
+
to the applicable provisions of the Real Property Registration Act (Act No. 123
|
225 |
+
of 2004) and other laws regarding registration.
|
226 |
+
|
227 |
+
Article 361
|
228 |
+
|
229 |
+
Beyond what is provided for in this Section, the provisions of the next Chapter
|
230 |
+
(Mortgages) apply mutatis mutandis to pledges of immovables, provided that it
|
231 |
+
is not inconsistent with the nature of the same.
|
232 |
+
|
233 |
+
Article 373
|
234 |
+
|
235 |
+
If more than one mortgage is created with respect to the same immovables, the
|
236 |
+
order of priority of those mortgages follows the chronological order of their
|
237 |
+
registration.
|
238 |
+
|
239 |
+
Article 339
|
240 |
+
|
241 |
+
Statutory liens registered in accordance with the provisions of the preceding
|
242 |
+
two Articles may be exercised prior to mortgages.
|
243 |
+
|
244 |
+
Article 337
|
245 |
+
|
246 |
+
In order to preserve the effectiveness of statutory liens for preservation of
|
247 |
+
immovables, registration must be carried out immediately after the completion
|
248 |
+
of the act of preservation.
|
249 |
+
|
250 |
+
Article 338
|
251 |
+
|
252 |
+
(1) In order to preserve the effectiveness of statutory liens for construction
|
253 |
+
work for immovables, the budgeted expenses of the construction work must be registered
|
254 |
+
prior to the commencement of the same.In this case, if the expenses of the construction
|
255 |
+
work exceed the budgeted amount, a statutory lien does not exist with respect
|
256 |
+
to the amount in excess of the same.
|
257 |
+
|
258 |
+
(2) The amount of increase in value of immovables that resulted from construction
|
259 |
+
work must be evaluated by an appraiser appointed by the court at the time of the
|
260 |
+
participation in the distribution..
|
261 |
+
|
262 |
+
'
|
263 |
+
- '
|
264 |
+
|
265 |
+
Article 216
|
266 |
+
|
267 |
+
If a land suffers or is likely to suffer damage due to destruction or blockage
|
268 |
+
of a structure installed on other land to store, discharge or draw water, the
|
269 |
+
owner of that land may have the owner of that other land repair the structure
|
270 |
+
or remove the impediments, or, if necessary, have the same carry out preventive
|
271 |
+
construction work..
|
272 |
+
|
273 |
+
'
|
274 |
+
- source_sentence: '
|
275 |
+
|
276 |
+
In cases where a mortgage is created with respect to land by the owner of the
|
277 |
+
land, if trees are planted and logged on the land, the mortgage shall entend to
|
278 |
+
the trees.
|
279 |
+
|
280 |
+
'
|
281 |
+
sentences:
|
282 |
+
- '
|
283 |
+
|
284 |
+
Article 659
|
285 |
+
|
286 |
+
A gratuitous bailee bears a duty to keep the bailed thing while exercising the
|
287 |
+
same level of care that the bailee would exercise for their own property..
|
288 |
+
|
289 |
+
'
|
290 |
+
- '
|
291 |
+
|
292 |
+
Article 304
|
293 |
+
|
294 |
+
(1) A statutory lien may also be exercised against things including monies that
|
295 |
+
the obligor is to receive as a result of the sale, lease or loss of, or damage
|
296 |
+
to, the subject matter of the statutory lien;provided, however, that the holder
|
297 |
+
of the statutory lien must attach the same before the payment or delivery of the
|
298 |
+
monies or other thing.
|
299 |
+
|
300 |
+
(2) The provisions of the preceding paragraph also apply to the consideration
|
301 |
+
for real rights created by the obligor on the subject matter of the statutory
|
302 |
+
lien..
|
303 |
+
|
304 |
+
'
|
305 |
+
- '
|
306 |
+
|
307 |
+
Article 370
|
308 |
+
|
309 |
+
A mortgage extends to the things that form an integral part of the immovables
|
310 |
+
that are the subject matter of the mortgage (hereinafter referred to as "mortgaged
|
311 |
+
immovables") except for buildings on the mortgaged land; provided, however, that
|
312 |
+
this does not apply if the act establishing the mortgage provides otherwise or
|
313 |
+
the rescission of fraudulent act may be demanded as prescribed in Article 424,
|
314 |
+
paragraph (3) with regard to the act of the obligor..
|
315 |
+
|
316 |
+
'
|
317 |
+
datasets:
|
318 |
+
- sentence-transformers/coliee
|
319 |
+
pipeline_tag: sentence-similarity
|
320 |
+
library_name: sentence-transformers
|
321 |
+
---
|
322 |
+
|
323 |
+
# SentenceTransformer based on jinaai/jina-embeddings-v2-small-en
|
324 |
+
|
325 |
+
This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [jinaai/jina-embeddings-v2-small-en](https://huggingface.co/jinaai/jina-embeddings-v2-small-en) on the [coliee](https://huggingface.co/datasets/sentence-transformers/coliee) dataset. It maps sentences & paragraphs to a 512-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
|
326 |
+
|
327 |
+
## Model Details
|
328 |
+
|
329 |
+
### Model Description
|
330 |
+
- **Model Type:** Sentence Transformer
|
331 |
+
- **Base model:** [jinaai/jina-embeddings-v2-small-en](https://huggingface.co/jinaai/jina-embeddings-v2-small-en) <!-- at revision 796cff318cdd4e5fbe8b7303a1ef8cbec36996ef -->
|
332 |
+
- **Maximum Sequence Length:** 8192 tokens
|
333 |
+
- **Output Dimensionality:** 512 tokens
|
334 |
+
- **Similarity Function:** Cosine Similarity
|
335 |
+
- **Training Dataset:**
|
336 |
+
- [coliee](https://huggingface.co/datasets/sentence-transformers/coliee)
|
337 |
+
- **Language:** en
|
338 |
+
<!-- - **License:** Unknown -->
|
339 |
+
|
340 |
+
### Model Sources
|
341 |
+
|
342 |
+
- **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
|
343 |
+
- **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
|
344 |
+
- **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
|
345 |
+
|
346 |
+
### Full Model Architecture
|
347 |
+
|
348 |
+
```
|
349 |
+
SentenceTransformer(
|
350 |
+
(0): Transformer({'max_seq_length': 8192, 'do_lower_case': False}) with Transformer model: JinaBertModel
|
351 |
+
(1): Pooling({'word_embedding_dimension': 512, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
|
352 |
+
)
|
353 |
+
```
|
354 |
+
|
355 |
+
## Usage
|
356 |
+
|
357 |
+
### Direct Usage (Sentence Transformers)
|
358 |
+
|
359 |
+
First install the Sentence Transformers library:
|
360 |
+
|
361 |
+
```bash
|
362 |
+
pip install -U sentence-transformers
|
363 |
+
```
|
364 |
+
|
365 |
+
Then you can load this model and run inference.
|
366 |
+
```python
|
367 |
+
from sentence_transformers import SentenceTransformer
|
368 |
+
|
369 |
+
# Download from the 🤗 Hub
|
370 |
+
model = SentenceTransformer("bwang0911/jev2-legal")
|
371 |
+
# Run inference
|
372 |
+
sentences = [
|
373 |
+
'\nIn cases where a mortgage is created with respect to land by the owner of the land, if trees are planted and logged on the land, the mortgage shall entend to the trees.\n',
|
374 |
+
'\nArticle 370\nA mortgage extends to the things that form an integral part of the immovables that are the subject matter of the mortgage (hereinafter referred to as "mortgaged immovables") except for buildings on the mortgaged land; provided, however, that this does not apply if the act establishing the mortgage provides otherwise or the rescission of fraudulent act may be demanded as prescribed in Article 424, paragraph (3) with regard to the act of the obligor..\n',
|
375 |
+
'\nArticle 304\n(1) A statutory lien may also be exercised against things including monies that the obligor is to receive as a result of the sale, lease or loss of, or damage to, the subject matter of the statutory lien;provided, however, that the holder of the statutory lien must attach the same before the payment or delivery of the monies or other thing.\n(2) The provisions of the preceding paragraph also apply to the consideration for real rights created by the obligor on the subject matter of the statutory lien..\n',
|
376 |
+
]
|
377 |
+
embeddings = model.encode(sentences)
|
378 |
+
print(embeddings.shape)
|
379 |
+
# [3, 512]
|
380 |
+
|
381 |
+
# Get the similarity scores for the embeddings
|
382 |
+
similarities = model.similarity(embeddings, embeddings)
|
383 |
+
print(similarities.shape)
|
384 |
+
# [3, 3]
|
385 |
+
```
|
386 |
+
|
387 |
+
<!--
|
388 |
+
### Direct Usage (Transformers)
|
389 |
+
|
390 |
+
<details><summary>Click to see the direct usage in Transformers</summary>
|
391 |
+
|
392 |
+
</details>
|
393 |
+
-->
|
394 |
+
|
395 |
+
<!--
|
396 |
+
### Downstream Usage (Sentence Transformers)
|
397 |
+
|
398 |
+
You can finetune this model on your own dataset.
|
399 |
+
|
400 |
+
<details><summary>Click to expand</summary>
|
401 |
+
|
402 |
+
</details>
|
403 |
+
-->
|
404 |
+
|
405 |
+
<!--
|
406 |
+
### Out-of-Scope Use
|
407 |
+
|
408 |
+
*List how the model may foreseeably be misused and address what users ought not to do with the model.*
|
409 |
+
-->
|
410 |
+
|
411 |
+
<!--
|
412 |
+
## Bias, Risks and Limitations
|
413 |
+
|
414 |
+
*What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
|
415 |
+
-->
|
416 |
+
|
417 |
+
<!--
|
418 |
+
### Recommendations
|
419 |
+
|
420 |
+
*What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
|
421 |
+
-->
|
422 |
+
|
423 |
+
## Training Details
|
424 |
+
|
425 |
+
### Training Dataset
|
426 |
+
|
427 |
+
#### coliee
|
428 |
+
|
429 |
+
* Dataset: [coliee](https://huggingface.co/datasets/sentence-transformers/coliee) at [d90012e](https://huggingface.co/datasets/sentence-transformers/coliee/tree/d90012e1f3a0d7103713bb2ce7faed1636a10090)
|
430 |
+
* Size: 463 training samples
|
431 |
+
* Columns: <code>anchor</code>, <code>positive</code>, and <code>negative</code>
|
432 |
+
* Approximate statistics based on the first 463 samples:
|
433 |
+
| | anchor | positive | negative |
|
434 |
+
|:--------|:------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------|
|
435 |
+
| type | string | string | string |
|
436 |
+
| details | <ul><li>min: 10 tokens</li><li>mean: 46.53 tokens</li><li>max: 137 tokens</li></ul> | <ul><li>min: 22 tokens</li><li>mean: 120.43 tokens</li><li>max: 441 tokens</li></ul> | <ul><li>min: 26 tokens</li><li>mean: 126.66 tokens</li><li>max: 405 tokens</li></ul> |
|
437 |
+
* Samples:
|
438 |
+
| anchor | positive | negative |
|
439 |
+
|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
440 |
+
| <code><br>A compulsory auction is also a sale, so warranty is imposed the same as for an ordinary sale.<br></code> | <code><br>Article 568<br>(1) The successful bidder at an auction based on the provisions of the Civil Execution Act and other laws (hereinafter referred to as an "auction" in this Article) may cancel the contract or demand a reduction of the price against the obligor pursuant to the provisions of Articles 541 and 542 and the provisions of Article 563<br>(including as applied mutatis mutandis pursuant to Article 565).<br>(2) In the cases referred to in the preceding paragraph, if the obligor is insolvent, the successful bidder may demand total or partial reimbursement of the proceeds against the obligees that received the distribution of the proceeds.<br>(3) In the cases set forth in the preceding two paragraphs, if obligors knew of the absence of the object or right and did not disclose the same, or if obligees knew of the absence but demanded an auction, the successful bidder may claim compensation for loss or damage against those persons.<br>(4) The provisions of the preceding three paragraphs do not apply to the non-conformity with respect to the kind or quality of the subject matter of an auction..<br></code> | <code><br>Article 575<br>(1) If the subject matter of a sale which has not yet been delivered bears fruits, the fruits vest in the seller.<br>(2) The buyer bears the obligation to pay interest on the price beginning from the day of delivery;provided, however, that if a due date is provided for the payment of the price, it is not necessary to pay the interest until that due date arrives..<br></code> |
|
441 |
+
| <code><br>In cases where a person plans to prevent crime in their own house by fixing the fence of a neighboring house, that person is found as having intent towards the other person.<br></code> | <code><br>Article 697<br>(1) A person that has begun to manage a business for another person without being obligated to do so (hereinafter in this Chapter referred to as a "manager") must manage that business, in accordance with the nature of the business, in the way that best suits the interests of the principal (hereinafter referred to as "benevolent intervention in another's business").<br>(2) A manager must engage in benevolent intervention in another's business in accordance with the intentions of the principal if the manager knows, or is able to conjecture that intention..<br></code> | <code><br>Article 94<br>(1) A false manifestation of intention that a person makes in collusion with another person is void.<br>(2) The nullity of a manifestation of intention under the provisions of the preceding paragraph may not be duly asserted against a third party in good faith..<br></code> |
|
442 |
+
| <code><br>In cases where an individual rescues another person from getting hit by a car by pushing that person out of the way, causing the person's luxury kimono to get dirty, the rescuer does not have to compensate damages for the kimono.<br></code> | <code><br>Article 698<br>If a manager engages in benevolent intervention in another's business in order to allow a principal to escape imminent danger to the principal's person, reputation, or property, the manager is not liable to compensate for damage resulting from this unless the manager has acted in bad faith or with gross negligence..<br></code> | <code><br>Article 465-2<br>(1) A guarantor to a guarantee contract under which the principal obligation is one or more unidentified obligations within a certain specified scope (hereinafter referred to as a "contract for revolving guarantee") and the guarantor is not a corporation (hereinafter referred to as a "contract for revolving guarantee by an individual") is liable to perform the obligation in terms of the amounts of the principal of the principal obligation, any interest, penalty and compensation for loss or damage in connection with the principal obligation, and all the other charges secondary to the obligation, as well as the amount of any penalty or compensation for loss or damage which is agreed- upon on with regard to the guarantee obligation, up to a certain maximum amount which pertains to all of these amounts.<br>(2) A contract for revolving guarantee by an individual does not become effective unless it provides for the maximum amount prescribed in the preceding paragraph.<br>(3) The provisions of Article 446, paragraphs (2) and (3) apply mutatis mutandis to the provisions concerning a maximum amount prescribed in paragraph (1) in a contract for revolving guarantee by an individual..<br></code> |
|
443 |
+
* Loss: [<code>MultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#multiplenegativesrankingloss) with these parameters:
|
444 |
+
```json
|
445 |
+
{
|
446 |
+
"scale": 20.0,
|
447 |
+
"similarity_fct": "cos_sim"
|
448 |
+
}
|
449 |
+
```
|
450 |
+
|
451 |
+
### Training Hyperparameters
|
452 |
+
#### Non-Default Hyperparameters
|
453 |
+
|
454 |
+
- `per_device_train_batch_size`: 128
|
455 |
+
- `per_device_eval_batch_size`: 16
|
456 |
+
- `learning_rate`: 2e-05
|
457 |
+
- `warmup_ratio`: 0.1
|
458 |
+
- `fp16`: True
|
459 |
+
- `batch_sampler`: no_duplicates
|
460 |
+
|
461 |
+
#### All Hyperparameters
|
462 |
+
<details><summary>Click to expand</summary>
|
463 |
+
|
464 |
+
- `overwrite_output_dir`: False
|
465 |
+
- `do_predict`: False
|
466 |
+
- `eval_strategy`: no
|
467 |
+
- `prediction_loss_only`: True
|
468 |
+
- `per_device_train_batch_size`: 128
|
469 |
+
- `per_device_eval_batch_size`: 16
|
470 |
+
- `per_gpu_train_batch_size`: None
|
471 |
+
- `per_gpu_eval_batch_size`: None
|
472 |
+
- `gradient_accumulation_steps`: 1
|
473 |
+
- `eval_accumulation_steps`: None
|
474 |
+
- `torch_empty_cache_steps`: None
|
475 |
+
- `learning_rate`: 2e-05
|
476 |
+
- `weight_decay`: 0.0
|
477 |
+
- `adam_beta1`: 0.9
|
478 |
+
- `adam_beta2`: 0.999
|
479 |
+
- `adam_epsilon`: 1e-08
|
480 |
+
- `max_grad_norm`: 1.0
|
481 |
+
- `num_train_epochs`: 3
|
482 |
+
- `max_steps`: -1
|
483 |
+
- `lr_scheduler_type`: linear
|
484 |
+
- `lr_scheduler_kwargs`: {}
|
485 |
+
- `warmup_ratio`: 0.1
|
486 |
+
- `warmup_steps`: 0
|
487 |
+
- `log_level`: passive
|
488 |
+
- `log_level_replica`: warning
|
489 |
+
- `log_on_each_node`: True
|
490 |
+
- `logging_nan_inf_filter`: True
|
491 |
+
- `save_safetensors`: True
|
492 |
+
- `save_on_each_node`: False
|
493 |
+
- `save_only_model`: False
|
494 |
+
- `restore_callback_states_from_checkpoint`: False
|
495 |
+
- `no_cuda`: False
|
496 |
+
- `use_cpu`: False
|
497 |
+
- `use_mps_device`: False
|
498 |
+
- `seed`: 42
|
499 |
+
- `data_seed`: None
|
500 |
+
- `jit_mode_eval`: False
|
501 |
+
- `use_ipex`: False
|
502 |
+
- `bf16`: False
|
503 |
+
- `fp16`: True
|
504 |
+
- `fp16_opt_level`: O1
|
505 |
+
- `half_precision_backend`: auto
|
506 |
+
- `bf16_full_eval`: False
|
507 |
+
- `fp16_full_eval`: False
|
508 |
+
- `tf32`: None
|
509 |
+
- `local_rank`: 0
|
510 |
+
- `ddp_backend`: None
|
511 |
+
- `tpu_num_cores`: None
|
512 |
+
- `tpu_metrics_debug`: False
|
513 |
+
- `debug`: []
|
514 |
+
- `dataloader_drop_last`: False
|
515 |
+
- `dataloader_num_workers`: 0
|
516 |
+
- `dataloader_prefetch_factor`: None
|
517 |
+
- `past_index`: -1
|
518 |
+
- `disable_tqdm`: False
|
519 |
+
- `remove_unused_columns`: True
|
520 |
+
- `label_names`: None
|
521 |
+
- `load_best_model_at_end`: False
|
522 |
+
- `ignore_data_skip`: False
|
523 |
+
- `fsdp`: []
|
524 |
+
- `fsdp_min_num_params`: 0
|
525 |
+
- `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
|
526 |
+
- `fsdp_transformer_layer_cls_to_wrap`: None
|
527 |
+
- `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
|
528 |
+
- `deepspeed`: None
|
529 |
+
- `label_smoothing_factor`: 0.0
|
530 |
+
- `optim`: adamw_torch
|
531 |
+
- `optim_args`: None
|
532 |
+
- `adafactor`: False
|
533 |
+
- `group_by_length`: False
|
534 |
+
- `length_column_name`: length
|
535 |
+
- `ddp_find_unused_parameters`: None
|
536 |
+
- `ddp_bucket_cap_mb`: None
|
537 |
+
- `ddp_broadcast_buffers`: False
|
538 |
+
- `dataloader_pin_memory`: True
|
539 |
+
- `dataloader_persistent_workers`: False
|
540 |
+
- `skip_memory_metrics`: True
|
541 |
+
- `use_legacy_prediction_loop`: False
|
542 |
+
- `push_to_hub`: False
|
543 |
+
- `resume_from_checkpoint`: None
|
544 |
+
- `hub_model_id`: None
|
545 |
+
- `hub_strategy`: every_save
|
546 |
+
- `hub_private_repo`: False
|
547 |
+
- `hub_always_push`: False
|
548 |
+
- `gradient_checkpointing`: False
|
549 |
+
- `gradient_checkpointing_kwargs`: None
|
550 |
+
- `include_inputs_for_metrics`: False
|
551 |
+
- `eval_do_concat_batches`: True
|
552 |
+
- `fp16_backend`: auto
|
553 |
+
- `push_to_hub_model_id`: None
|
554 |
+
- `push_to_hub_organization`: None
|
555 |
+
- `mp_parameters`:
|
556 |
+
- `auto_find_batch_size`: False
|
557 |
+
- `full_determinism`: False
|
558 |
+
- `torchdynamo`: None
|
559 |
+
- `ray_scope`: last
|
560 |
+
- `ddp_timeout`: 1800
|
561 |
+
- `torch_compile`: False
|
562 |
+
- `torch_compile_backend`: None
|
563 |
+
- `torch_compile_mode`: None
|
564 |
+
- `dispatch_batches`: None
|
565 |
+
- `split_batches`: None
|
566 |
+
- `include_tokens_per_second`: False
|
567 |
+
- `include_num_input_tokens_seen`: False
|
568 |
+
- `neftune_noise_alpha`: None
|
569 |
+
- `optim_target_modules`: None
|
570 |
+
- `batch_eval_metrics`: False
|
571 |
+
- `eval_on_start`: False
|
572 |
+
- `use_liger_kernel`: False
|
573 |
+
- `eval_use_gather_object`: False
|
574 |
+
- `batch_sampler`: no_duplicates
|
575 |
+
- `multi_dataset_batch_sampler`: proportional
|
576 |
+
|
577 |
+
</details>
|
578 |
+
|
579 |
+
### Framework Versions
|
580 |
+
- Python: 3.10.12
|
581 |
+
- Sentence Transformers: 3.1.1
|
582 |
+
- Transformers: 4.45.2
|
583 |
+
- PyTorch: 2.5.1+cu124
|
584 |
+
- Accelerate: 1.1.0
|
585 |
+
- Datasets: 3.1.0
|
586 |
+
- Tokenizers: 0.20.2
|
587 |
+
|
588 |
+
## Citation
|
589 |
+
|
590 |
+
### BibTeX
|
591 |
+
|
592 |
+
#### Sentence Transformers
|
593 |
+
```bibtex
|
594 |
+
@inproceedings{reimers-2019-sentence-bert,
|
595 |
+
title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
|
596 |
+
author = "Reimers, Nils and Gurevych, Iryna",
|
597 |
+
booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
|
598 |
+
month = "11",
|
599 |
+
year = "2019",
|
600 |
+
publisher = "Association for Computational Linguistics",
|
601 |
+
url = "https://arxiv.org/abs/1908.10084",
|
602 |
+
}
|
603 |
+
```
|
604 |
+
|
605 |
+
#### MultipleNegativesRankingLoss
|
606 |
+
```bibtex
|
607 |
+
@misc{henderson2017efficient,
|
608 |
+
title={Efficient Natural Language Response Suggestion for Smart Reply},
|
609 |
+
author={Matthew Henderson and Rami Al-Rfou and Brian Strope and Yun-hsuan Sung and Laszlo Lukacs and Ruiqi Guo and Sanjiv Kumar and Balint Miklos and Ray Kurzweil},
|
610 |
+
year={2017},
|
611 |
+
eprint={1705.00652},
|
612 |
+
archivePrefix={arXiv},
|
613 |
+
primaryClass={cs.CL}
|
614 |
+
}
|
615 |
+
```
|
616 |
+
|
617 |
+
<!--
|
618 |
+
## Glossary
|
619 |
+
|
620 |
+
*Clearly define terms in order to be accessible across audiences.*
|
621 |
+
-->
|
622 |
+
|
623 |
+
<!--
|
624 |
+
## Model Card Authors
|
625 |
+
|
626 |
+
*Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
|
627 |
+
-->
|
628 |
+
|
629 |
+
<!--
|
630 |
+
## Model Card Contact
|
631 |
+
|
632 |
+
*Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
|
633 |
+
-->
|
config.json
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "jinaai/jina-embeddings-v2-small-en",
|
3 |
+
"architectures": [
|
4 |
+
"JinaBertModel"
|
5 |
+
],
|
6 |
+
"attention_probs_dropout_prob": 0.0,
|
7 |
+
"attn_implementation": null,
|
8 |
+
"auto_map": {
|
9 |
+
"AutoConfig": "jinaai/jina-bert-implementation--configuration_bert.JinaBertConfig",
|
10 |
+
"AutoModel": "jinaai/jina-bert-implementation--modeling_bert.JinaBertModel",
|
11 |
+
"AutoModelForMaskedLM": "jinaai/jina-bert-implementation--modeling_bert.JinaBertForMaskedLM",
|
12 |
+
"AutoModelForSequenceClassification": "jinaai/jina-bert-implementation--modeling_bert.JinaBertForSequenceClassification"
|
13 |
+
},
|
14 |
+
"classifier_dropout": null,
|
15 |
+
"emb_pooler": "mean",
|
16 |
+
"feed_forward_type": "geglu",
|
17 |
+
"gradient_checkpointing": false,
|
18 |
+
"hidden_act": "gelu",
|
19 |
+
"hidden_dropout_prob": 0.1,
|
20 |
+
"hidden_size": 512,
|
21 |
+
"initializer_range": 0.02,
|
22 |
+
"intermediate_size": 2048,
|
23 |
+
"layer_norm_eps": 1e-12,
|
24 |
+
"max_position_embeddings": 8192,
|
25 |
+
"model_max_length": 8192,
|
26 |
+
"model_type": "bert",
|
27 |
+
"num_attention_heads": 8,
|
28 |
+
"num_hidden_layers": 4,
|
29 |
+
"pad_token_id": 0,
|
30 |
+
"position_embedding_type": "alibi",
|
31 |
+
"torch_dtype": "float32",
|
32 |
+
"transformers_version": "4.45.2",
|
33 |
+
"type_vocab_size": 2,
|
34 |
+
"use_cache": true,
|
35 |
+
"vocab_size": 30528
|
36 |
+
}
|
config_sentence_transformers.json
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"__version__": {
|
3 |
+
"sentence_transformers": "3.1.1",
|
4 |
+
"transformers": "4.45.2",
|
5 |
+
"pytorch": "2.5.1+cu124"
|
6 |
+
},
|
7 |
+
"prompts": {},
|
8 |
+
"default_prompt_name": null,
|
9 |
+
"similarity_fn_name": null
|
10 |
+
}
|
model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:44eb5c241ea5b10113bda4b966816098b7236e0c22d52588d38e1810ae2f0d81
|
3 |
+
size 130769960
|
modules.json
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
{
|
3 |
+
"idx": 0,
|
4 |
+
"name": "0",
|
5 |
+
"path": "",
|
6 |
+
"type": "sentence_transformers.models.Transformer"
|
7 |
+
},
|
8 |
+
{
|
9 |
+
"idx": 1,
|
10 |
+
"name": "1",
|
11 |
+
"path": "1_Pooling",
|
12 |
+
"type": "sentence_transformers.models.Pooling"
|
13 |
+
}
|
14 |
+
]
|
sentence_bert_config.json
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"max_seq_length": 8192,
|
3 |
+
"do_lower_case": false
|
4 |
+
}
|
special_tokens_map.json
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cls_token": {
|
3 |
+
"content": "[CLS]",
|
4 |
+
"lstrip": false,
|
5 |
+
"normalized": false,
|
6 |
+
"rstrip": false,
|
7 |
+
"single_word": false
|
8 |
+
},
|
9 |
+
"mask_token": {
|
10 |
+
"content": "[MASK]",
|
11 |
+
"lstrip": false,
|
12 |
+
"normalized": false,
|
13 |
+
"rstrip": false,
|
14 |
+
"single_word": false
|
15 |
+
},
|
16 |
+
"pad_token": {
|
17 |
+
"content": "[PAD]",
|
18 |
+
"lstrip": false,
|
19 |
+
"normalized": false,
|
20 |
+
"rstrip": false,
|
21 |
+
"single_word": false
|
22 |
+
},
|
23 |
+
"sep_token": {
|
24 |
+
"content": "[SEP]",
|
25 |
+
"lstrip": false,
|
26 |
+
"normalized": false,
|
27 |
+
"rstrip": false,
|
28 |
+
"single_word": false
|
29 |
+
},
|
30 |
+
"unk_token": {
|
31 |
+
"content": "[UNK]",
|
32 |
+
"lstrip": false,
|
33 |
+
"normalized": false,
|
34 |
+
"rstrip": false,
|
35 |
+
"single_word": false
|
36 |
+
}
|
37 |
+
}
|
tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer_config.json
ADDED
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"added_tokens_decoder": {
|
3 |
+
"0": {
|
4 |
+
"content": "[PAD]",
|
5 |
+
"lstrip": false,
|
6 |
+
"normalized": false,
|
7 |
+
"rstrip": false,
|
8 |
+
"single_word": false,
|
9 |
+
"special": true
|
10 |
+
},
|
11 |
+
"100": {
|
12 |
+
"content": "[UNK]",
|
13 |
+
"lstrip": false,
|
14 |
+
"normalized": false,
|
15 |
+
"rstrip": false,
|
16 |
+
"single_word": false,
|
17 |
+
"special": true
|
18 |
+
},
|
19 |
+
"101": {
|
20 |
+
"content": "[CLS]",
|
21 |
+
"lstrip": false,
|
22 |
+
"normalized": false,
|
23 |
+
"rstrip": false,
|
24 |
+
"single_word": false,
|
25 |
+
"special": true
|
26 |
+
},
|
27 |
+
"102": {
|
28 |
+
"content": "[SEP]",
|
29 |
+
"lstrip": false,
|
30 |
+
"normalized": false,
|
31 |
+
"rstrip": false,
|
32 |
+
"single_word": false,
|
33 |
+
"special": true
|
34 |
+
},
|
35 |
+
"103": {
|
36 |
+
"content": "[MASK]",
|
37 |
+
"lstrip": false,
|
38 |
+
"normalized": false,
|
39 |
+
"rstrip": false,
|
40 |
+
"single_word": false,
|
41 |
+
"special": true
|
42 |
+
}
|
43 |
+
},
|
44 |
+
"clean_up_tokenization_spaces": true,
|
45 |
+
"cls_token": "[CLS]",
|
46 |
+
"do_basic_tokenize": true,
|
47 |
+
"do_lower_case": true,
|
48 |
+
"mask_token": "[MASK]",
|
49 |
+
"model_max_length": 8192,
|
50 |
+
"never_split": null,
|
51 |
+
"pad_token": "[PAD]",
|
52 |
+
"sep_token": "[SEP]",
|
53 |
+
"strip_accents": null,
|
54 |
+
"tokenize_chinese_chars": true,
|
55 |
+
"tokenizer_class": "BertTokenizer",
|
56 |
+
"unk_token": "[UNK]"
|
57 |
+
}
|
vocab.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|