Add new SentenceTransformer model
Browse files- .gitattributes +1 -0
- 1_Pooling/config.json +10 -0
- README.md +601 -0
- config.json +28 -0
- config_sentence_transformers.json +10 -0
- model.safetensors +3 -0
- modules.json +20 -0
- sentence_bert_config.json +4 -0
- special_tokens_map.json +51 -0
- tokenizer.json +3 -0
- tokenizer_config.json +55 -0
.gitattributes
CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
1_Pooling/config.json
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"word_embedding_dimension": 1024,
|
3 |
+
"pooling_mode_cls_token": true,
|
4 |
+
"pooling_mode_mean_tokens": false,
|
5 |
+
"pooling_mode_max_tokens": false,
|
6 |
+
"pooling_mode_mean_sqrt_len_tokens": false,
|
7 |
+
"pooling_mode_weightedmean_tokens": false,
|
8 |
+
"pooling_mode_lasttoken": false,
|
9 |
+
"include_prompt": true
|
10 |
+
}
|
README.md
ADDED
@@ -0,0 +1,601 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
tags:
|
3 |
+
- sentence-transformers
|
4 |
+
- sentence-similarity
|
5 |
+
- feature-extraction
|
6 |
+
- generated_from_trainer
|
7 |
+
- dataset_size:6433
|
8 |
+
- loss:Infonce
|
9 |
+
base_model: BAAI/bge-m3
|
10 |
+
widget:
|
11 |
+
- source_sentence: What are the factors that influence the treatment and healing of
|
12 |
+
equine lameness
|
13 |
+
sentences:
|
14 |
+
- 'Masterkraft may refer to:
|
15 |
+
|
16 |
+
|
17 |
+
Masterkraft (producer), a record producer
|
18 |
+
|
19 |
+
MSTRKRFT, a Canadian electronic music group'
|
20 |
+
- 'The treatment of equine lameness is a complex subject. Lameness in horses has
|
21 |
+
a variety of causes, and treatment must be tailored to the type and degree of
|
22 |
+
injury, as well as the financial capabilities of the owner. Treatment may be applied
|
23 |
+
locally, systemically, or intralesionally, and the strategy for treatment may
|
24 |
+
change as healing progresses. The end goal is to reduce the pain and inflammation
|
25 |
+
associated with injury, to encourage the injured tissue to heal with normal structure
|
26 |
+
and function, and to ultimately return the horse to the highest level of performance
|
27 |
+
possible following recovery. The process of healing
|
28 |
+
|
29 |
+
|
30 |
+
Bone
|
31 |
+
|
32 |
+
|
33 |
+
Bone heals by formation of a callus over the defective area. Speed and quality
|
34 |
+
of healing is directly related to the blood supply and fracture stability. Rest
|
35 |
+
is required immediately following injury to reduce movement of the fracture site.
|
36 |
+
Stability may be improved through use of surgical implants or casting, depending
|
37 |
+
on the location of extent of the fracture. Shock wave therapy is sometimes employed
|
38 |
+
in the case of splint bone fracture or stress fractures to the cannon bones, to
|
39 |
+
improve blood flow to the area. Fractures within a joint, such as chip fractures
|
40 |
+
in the knee, hock, or fetlock, require arthroscopic surgery to prevent secondary
|
41 |
+
arthritis of that joint. In some cases, the callus may place pressure on surrounding
|
42 |
+
soft tissue structures. The callus of a splint bone fracture can push on the adjacent
|
43 |
+
suspensory ligament, leading to lameness from secondary suspensory desmitis. Treatment
|
44 |
+
usually involves the removal of the offending callus. On average, bone heals better
|
45 |
+
than soft tissue. It requires less time to heal and, unlike soft tissue which
|
46 |
+
is always weaker after healing, bone heals to 100% strength. However, fracture
|
47 |
+
healing in horses is complicated by their size, flightiness, and desire to stand.
|
48 |
+
Horses are at risk of re-injury of the fracture site, especially when trying to
|
49 |
+
rise after lying down, or when recovering from anesthesia following fracture repair.
|
50 |
+
Forced recumbency is not an option for horses, making healing more difficult.
|
51 |
+
Weight bearing on a single front or hind limb increases the likelihood of support
|
52 |
+
limb laminitis. Additionally, the cost of casting or surgical fixation makes treatment
|
53 |
+
financially unattainable for some owners. While limb fractures are no longer a
|
54 |
+
death-sentence for horses, it is still considered a very serious injury. In general,
|
55 |
+
a horse is more likely to survive if it is small in stature and has a good temperament
|
56 |
+
that will tolerate the months of inactivity required for healing. Fractures that
|
57 |
+
are open, comminuted (very fragmented), or located higher on the limb tend to
|
58 |
+
have a poorer prognosis. Synovial joints
|
59 |
+
|
60 |
+
Lameness is most commonly associated with injury to synovial joints, or those
|
61 |
+
joints containing articular cartilage, a joint capsule, and a synovial membrane.
|
62 |
+
Joint disease may affect the joint capsule and synovial membrane, articular cartilage,
|
63 |
+
subchondral bone (the bone underneath the cartilage), menisci, or any ligaments
|
64 |
+
associated with the joint. Damage to any of these tissues leads to inflammation,
|
65 |
+
which is especially problematic in the joint. While degeneration of articular
|
66 |
+
cartilage is a common disease process in working animals, resulting in osteoarthritis,
|
67 |
+
cartilage is aneural (does not contain nerves) and does not produce pain. Pain
|
68 |
+
associated with osteoarthritis is secondary to joint capsule pain, due to joint
|
69 |
+
distention and reduced range of motion, or to pain from the underlying bone, which
|
70 |
+
may become damaged following erosion of the articular cartilage. Inflammatory
|
71 |
+
products, such as inflammatory mediators and cytokines, damage articular cartilage
|
72 |
+
and have been shown to weaken intra-articular ligaments. Therefore, treatment
|
73 |
+
of joint disease should not only address the primary injury producing inflammation,
|
74 |
+
but also the inflammatory cycle that leads to further tissue damage. Cryotherapy,
|
75 |
+
joint lavage, systemic anti-inflammatories, or intra-articular medications are
|
76 |
+
used to reduce joint inflammation. In the case of severe joint pathology such
|
77 |
+
as an osteochondral chip, intra-articular fracture, osteochondritis dissecans
|
78 |
+
lesion, or ligamentous or meniscal injury, arthroscopy may be required to ensure
|
79 |
+
normal function of that joint. Debris within the joint, such as from a chip fracture,
|
80 |
+
can cause long term damage to the synovium and articular cartilage leading to
|
81 |
+
osteoarthritis, and is therefore best removed. Following acute injury, joints
|
82 |
+
often benefit from specialized physical therapy, such as swimming, to prevent
|
83 |
+
the loss of range of motion associated with joint capsule fibrosis. Treatment
|
84 |
+
of joint cartilage injury is difficult and often unrewarding. Partial-thickness
|
85 |
+
defects do not heal. The body will try to repair full-thickness cartilage defects
|
86 |
+
using scar tissue or fibrocartilage, both of which are poor substitutes for normal,
|
87 |
+
healthy articular cartilage. Current treatment includes arthroscopy-produced microfratures
|
88 |
+
within the subchondral plate. These microfractures encourage an inflammatory response
|
89 |
+
within the defect, which recruits stem cells to the area. Unfortunately, these
|
90 |
+
cells differentiate into fibrocartilage, rather than normal joint (hyaline) cartilage,
|
91 |
+
leading to inferior tissue repair at the site of injury. Bone marrow aspirate
|
92 |
+
concentrate (BMAC) has shown some benefits when grafted into the area following
|
93 |
+
microtrauma. However, the primary treatment for degenerative joint disease involves
|
94 |
+
reducing the inflammatory process that is known to accelerate articular cartilage
|
95 |
+
degeneration. Tendon and ligament
|
96 |
+
|
97 |
+
|
98 |
+
Tendon is primarily composed of elastic type I collagen. However, mature tendon
|
99 |
+
contains cells that have a limited ability to regenerate. Following injury, tendon
|
100 |
+
lays down type III collagen, or scar tissue, which is stronger than type I collagen
|
101 |
+
but stiffer and less-elastic. This makes it less distensible and more likely to
|
102 |
+
re-injure when the horse begins to stretch the tendon during strenuous work. Certain
|
103 |
+
treatments may improve the final tendon fiber quality, and subsequently increase
|
104 |
+
the likelihood that the horse will return to full performance post-injury. Healing
|
105 |
+
of soft tissue injury is often monitored using ultrasound to assess the lesion
|
106 |
+
size and fiber pattern. Monitoring soft tissue injury with ultrasound allows for
|
107 |
+
a more scientific determination of when to introduce exercise back into the horse''s
|
108 |
+
rehabilitation program, and for quick intervention should the injury worsen. Recently,
|
109 |
+
a new ultrasound technique called color Doppler ultrasonography has been used
|
110 |
+
to assess equine tendon injuries. Color Doppler measures the degree of blood flow
|
111 |
+
to a lesion, allowing for more accurate assessment of healing. Rest and hand-walking
|
112 |
+
|
113 |
+
Rest is almost always recommended for treatment of lameness because it reduces
|
114 |
+
forces placed on damaged tissue, allowing the normal process of healing to occur.
|
115 |
+
Type and severity of injury determines the duration and degree of rest required.
|
116 |
+
Aggressive limitations of activity may be required in cases of fracture. Horses
|
117 |
+
are kept tied for the several-month duration of healing, to prevent them from
|
118 |
+
lying down and potentially re-injuring the bone while trying to stand. In other
|
119 |
+
cases, rest may be contraindicated. Animals with a history of upward fixation
|
120 |
+
of the patella, polysaccharide storage myopathy, and equine recurrent rhabdomyolysis
|
121 |
+
are often best kept on a schedule of regular exercise. Rest may be counterproductive
|
122 |
+
if the lameness is secondary to osteoarthritis. In this case, mild exercise improves
|
123 |
+
joint mobility and lameness can worsen with confinement. Rest may vary from strict
|
124 |
+
confinement (“stall rest”), to small paddock or pasture turnout, to reduction
|
125 |
+
of exercise intensity. Horses are often unpredictable when on prolonged stall
|
126 |
+
rest, which greatly increases the risk of re-injury when hand-walking is begun.
|
127 |
+
Sedation or additional forms of restraint may be needed to help control the horse
|
128 |
+
during this initial period of'
|
129 |
+
- As we stand right now, our broadcasts resemble a mixture between a graduate-level
|
130 |
+
college seminar and a JV football game with the volume turned down 11 notches.
|
131 |
+
Everything the commentators narrate is an explanation simply of what is going
|
132 |
+
on, not speculation as to why they are doing it. They simply state what an ejection
|
133 |
+
means, what a particular whistle is for, or what the purpose was of taking a time
|
134 |
+
out. There is no substance. If Water Polo hopes and expects to be a household
|
135 |
+
name, or at least something that households want on their television instead of
|
136 |
+
the Wednesday Bowling Championships, then we need to institute a change. Most
|
137 |
+
broadcasts are viewed by Water Polo fans…fans that already understand the game
|
138 |
+
and the rules. We don’t need to be spoken to like a freshman college student who’s
|
139 |
+
learning algebra for the first time. Most of us have already done that, and I
|
140 |
+
feel confident in claiming that no one cares to do it again, especially not for
|
141 |
+
entertainment. Production and entertainment value are key to the growth and expansion
|
142 |
+
of any sport, especially Water Polo. Invite a color commentator that knows the
|
143 |
+
game well enough to look for the nuances in the action.
|
144 |
+
- source_sentence: What architectural features make Rosslyn Chapel unique
|
145 |
+
sentences:
|
146 |
+
- 'Star Wars: Galaxy’s Edge may not officially open until the end of June, but for
|
147 |
+
some fans, it could happen even sooner.'
|
148 |
+
- Visual artist Megan Jonas painted urban Phoenix landscapes, and musician Jordan
|
149 |
+
Ignacio composed music to accompany the images. Director Joshua J. Provost is
|
150 |
+
an Arizona native and explores how the artists are inspired, allowing them to
|
151 |
+
express their creative process through animation and sound.
|
152 |
+
- 'In 1736, James St. Clair worked to restore the building to its former state.
|
153 |
+
Since then, the Earls of St. Clair have worked to keep the building in good condition.
|
154 |
+
There are a few intersting things to mention about Rosslyn Chapel. First, the
|
155 |
+
town Roslin was founded as a home for the stone masons who worked on the original
|
156 |
+
church. This is the same Roslin where scientists produced the first cloned animal,
|
157 |
+
Dolly the sheep, in 1996. Secondly, the chapel''s design is quite eccentric. There
|
158 |
+
are 32 different arch designs, which is strange enough, and some are even hollow.
|
159 |
+
There are numerous carvings, including the "Dance of Death" motif, Abraham about
|
160 |
+
to sacrifice Isaac, Samson destroying Philistines, David killing a lion, the Prodigal
|
161 |
+
son, the Crucifixion, and scenes of the St. Clairs. The building is oriented east-west:
|
162 |
+
|
163 |
+
marked out by the solar ray, according to the ancient tradition. A geometrical
|
164 |
+
figure is usually used to regulate the proportions, and a double equilateral triangle
|
165 |
+
is used in Rosslyn.Then there is the case of the Apprentice Pillar and its possible
|
166 |
+
significance. The story associated with it is somewhat reminiscent of the Widow''s
|
167 |
+
Son of the Freemasons, and there are numerous claims--unsubstantiated--which say
|
168 |
+
that the Knights Templar worshipped here after escaping from the Continent, and
|
169 |
+
that they brought the Holy Grail and hid it in the pillar. --The Rosslyn Chapel
|
170 |
+
website
|
171 |
+
|
172 |
+
All in all, an unusual place. Back to "R" | Back to JCE
|
173 |
+
|
174 |
+
Mary Jones © 2003'
|
175 |
+
- source_sentence: What is the liability of Norwest regarding the information provided
|
176 |
+
on their website
|
177 |
+
sentences:
|
178 |
+
- Mulpha Norwest Pty Ltd ACN 000 004 633, Norwest City Pty Ltd ACN 603 333 615 (in
|
179 |
+
its personal capacity and as trustee of the Norwest City Trust ABN 26 695 958
|
180 |
+
023), Norwest Real Estate Pty Ltd ACN 000 692 213, Norwest Flexi Pty Ltd ACN 607
|
181 |
+
435 165 (in its personal capacity and as trustee of Flexi Trust ABN 49 958 864
|
182 |
+
033) and Circa 1 Pty Ltd (“Norwest, we, us, our ”) updates this website as often
|
183 |
+
as practicable and tries to ensure that all information provided as part of this
|
184 |
+
website is correct at the time of its inclusion; however, we do not guarantee
|
185 |
+
its accuracy at any time. All information provided as part of this website is
|
186 |
+
intended to be general in nature and you should not rely on it in connection with
|
187 |
+
the making of any decision. We are not liable for any action you may take as a
|
188 |
+
result of, or loss incurred from relying on this website, including from data
|
189 |
+
corruption on download. We do not take responsibility for websites of other organisations,
|
190 |
+
whether or not linked, or any device used to access or view our website.
|
191 |
+
- Coaltown (1945–1965) was an American Hall of Fame Champion Thoroughbred racehorse
|
192 |
+
of whom The New York Times said "was probably the most underrated Thoroughbred
|
193 |
+
of the 20th Century."
|
194 |
+
- '“As a proud Cal Alum, I’m excited to help lay the foundation for Berkeley to
|
195 |
+
become a globally recognized hub for Artificial Intelligence.”
|
196 |
+
|
197 |
+
We’re excited to share that we are expanding our team to execute on this vision.
|
198 |
+
Today, we welcome six UC Berkeley Faculty as Co-Founders of AI@The House and Faculty
|
199 |
+
Partners at The House Fund. Their professional and academic expertise spans across
|
200 |
+
the full stack of AI — theory and statistics, computer vision, systems, robotics,
|
201 |
+
security, algorithms, hardware, and more.'
|
202 |
+
- source_sentence: In what year were the book reviews mentioned in various publications
|
203 |
+
published
|
204 |
+
sentences:
|
205 |
+
- Vol 13, No 1. Pages 84 to 86. JSTOR. Julius Goebel Jr. (September 1926) Political
|
206 |
+
Science Quarterly. Vol 41, No 3. Pages 476 to 479. JSTOR. Plucknett, Thoedore
|
207 |
+
F T. "Book Reviews" (1926) 39 Harvard Law Review 405 JSTOR. Zane, John M. "Current
|
208 |
+
Legal Literature". (1926) 12 American Bar Association Journal 172. JSTOR. Putnam,
|
209 |
+
Bertha Haven. "Reviews of Books" (July 1926) The American Historical Review. Vol
|
210 |
+
31, No 4. Page 763. JSTOR. H D H. "Book Reviews" (1926) 2 Cambridge Law Journal
|
211 |
+
408. JSTOR.
|
212 |
+
- 'Wednesday, September 24, 2014
|
213 |
+
|
214 |
+
Kiwi Lane & Bella Blvd "That''s My Boy"
|
215 |
+
|
216 |
+
Alicia with you today for Kiwi Lane. I have for you a single page layout using
|
217 |
+
Bella Blvd''s Max Collections paper That''s My Boy, Borders, Treasures & Text
|
218 |
+
stickers with some die cut pieces from that collection.'
|
219 |
+
- Memoir '44 is a historical boardgame where players face-off in stylized battles
|
220 |
+
of some of the most famous historic battles of World War II including Omaha Beach,
|
221 |
+
Pegasus Bridge, Operation Cobra and the Ardennes. Memoir '44 includes over 15
|
222 |
+
different battle scenarios and features a double-sided hex game board for both
|
223 |
+
beach landings and countryside combat.
|
224 |
+
- source_sentence: What rights do individuals have regarding their personal data according
|
225 |
+
to European Regulation no. 679/2016
|
226 |
+
sentences:
|
227 |
+
- 'The regional economy has benefited from these investments in infrastructure.
|
228 |
+
The project owner has also funded the construction of a local school, which is
|
229 |
+
providing benefits to local children. Project impacts and benefits:
|
230 |
+
|
231 |
+
- The project has generated hydropower plant operation/ maintenance jobs for local
|
232 |
+
people. - 24 operational staff have benefited from six months of capacity building
|
233 |
+
in the form of technical training. - The construction of a new transmission line
|
234 |
+
is reducing electricity loss and increasing the electricity supply in the region.
|
235 |
+
- Former low-quality infrastructure systems in the region have improved, e.g.
|
236 |
+
by upgrading roads, and by building bridges and irrigation canals. - A local school
|
237 |
+
has been built. - The project has provided local farmers with support to broaden
|
238 |
+
their agricultural activities to make them more sustainable (e.g. by implementing
|
239 |
+
aquaculture, which reduces the need for logging for farmland). - The project has
|
240 |
+
reduced the need for wood for heating, cooking, and lighting, thus allowing the
|
241 |
+
forest to regenerate and improving soil conditions, hydrology and biodiversity.
|
242 |
+
- The project has improved regional air quality by reducing the need for diesel
|
243 |
+
generators and wood fires.'
|
244 |
+
- 'How did you get there??? 05-20-2003, 05:57 AM
|
245 |
+
|
246 |
+
FAQ (on this forum) -> How do I get a picture under my username'
|
247 |
+
- By consulting this site, data relating to identified or identifiable persons may
|
248 |
+
be processed. The consent mechanisms will be evident, brief and easily understandable;
|
249 |
+
if the original conditions for which consent was requested were to be changed,
|
250 |
+
for example if the purpose of data processing changed, further consent will be
|
251 |
+
required pursuant to European Regulation no. 679/2016. All the documents related
|
252 |
+
to consents collected will be kept separate from any other corporate document.
|
253 |
+
Your personal data will not be disclosed and you are granted the exercise of the
|
254 |
+
rights referred to in Articles. 11-20 of the European Regulation n. 679/2016 by
|
255 |
+
writing to Promoviaggi S.p.A., Viale Gian Galeazzo, nr. 3, 20136 Milano (Italy)
|
256 |
+
or by sending an e-mail to firstname.lastname@example.org.
|
257 |
+
pipeline_tag: sentence-similarity
|
258 |
+
library_name: sentence-transformers
|
259 |
+
---
|
260 |
+
|
261 |
+
# SentenceTransformer based on BAAI/bge-m3
|
262 |
+
|
263 |
+
This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [BAAI/bge-m3](https://huggingface.co/BAAI/bge-m3). It maps sentences & paragraphs to a 1024-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
|
264 |
+
|
265 |
+
## Model Details
|
266 |
+
|
267 |
+
### Model Description
|
268 |
+
- **Model Type:** Sentence Transformer
|
269 |
+
- **Base model:** [BAAI/bge-m3](https://huggingface.co/BAAI/bge-m3) <!-- at revision 5617a9f61b028005a4858fdac845db406aefb181 -->
|
270 |
+
- **Maximum Sequence Length:** 1024 tokens
|
271 |
+
- **Output Dimensionality:** 1024 dimensions
|
272 |
+
- **Similarity Function:** Cosine Similarity
|
273 |
+
<!-- - **Training Dataset:** Unknown -->
|
274 |
+
<!-- - **Language:** Unknown -->
|
275 |
+
<!-- - **License:** Unknown -->
|
276 |
+
|
277 |
+
### Model Sources
|
278 |
+
|
279 |
+
- **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
|
280 |
+
- **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
|
281 |
+
- **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
|
282 |
+
|
283 |
+
### Full Model Architecture
|
284 |
+
|
285 |
+
```
|
286 |
+
SentenceTransformer(
|
287 |
+
(0): Transformer({'max_seq_length': 1024, 'do_lower_case': False}) with Transformer model: XLMRobertaModel
|
288 |
+
(1): Pooling({'word_embedding_dimension': 1024, 'pooling_mode_cls_token': True, 'pooling_mode_mean_tokens': False, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
|
289 |
+
(2): Normalize()
|
290 |
+
)
|
291 |
+
```
|
292 |
+
|
293 |
+
## Usage
|
294 |
+
|
295 |
+
### Direct Usage (Sentence Transformers)
|
296 |
+
|
297 |
+
First install the Sentence Transformers library:
|
298 |
+
|
299 |
+
```bash
|
300 |
+
pip install -U sentence-transformers
|
301 |
+
```
|
302 |
+
|
303 |
+
Then you can load this model and run inference.
|
304 |
+
```python
|
305 |
+
from sentence_transformers import SentenceTransformer
|
306 |
+
|
307 |
+
# Download from the 🤗 Hub
|
308 |
+
model = SentenceTransformer("Jrinky/mpnet-base-all-nli-triplet")
|
309 |
+
# Run inference
|
310 |
+
sentences = [
|
311 |
+
'What rights do individuals have regarding their personal data according to European Regulation no. 679/2016',
|
312 |
+
'By consulting this site, data relating to identified or identifiable persons may be processed. The consent mechanisms will be evident, brief and easily understandable; if the original conditions for which consent was requested were to be changed, for example if the purpose of data processing changed, further consent will be required pursuant to European Regulation no. 679/2016. All the documents related to consents collected will be kept separate from any other corporate document. Your personal data will not be disclosed and you are granted the exercise of the rights referred to in Articles. 11-20 of the European Regulation n. 679/2016 by writing to Promoviaggi S.p.A., Viale Gian Galeazzo, nr. 3, 20136 Milano (Italy) or by sending an e-mail to firstname.lastname@example.org.',
|
313 |
+
'The regional economy has benefited from these investments in infrastructure. The project owner has also funded the construction of a local school, which is providing benefits to local children. Project impacts and benefits:\n- The project has generated hydropower plant operation/ maintenance jobs for local people. - 24 operational staff have benefited from six months of capacity building in the form of technical training. - The construction of a new transmission line is reducing electricity loss and increasing the electricity supply in the region. - Former low-quality infrastructure systems in the region have improved, e.g. by upgrading roads, and by building bridges and irrigation canals. - A local school has been built. - The project has provided local farmers with support to broaden their agricultural activities to make them more sustainable (e.g. by implementing aquaculture, which reduces the need for logging for farmland). - The project has reduced the need for wood for heating, cooking, and lighting, thus allowing the forest to regenerate and improving soil conditions, hydrology and biodiversity. - The project has improved regional air quality by reducing the need for diesel generators and wood fires.',
|
314 |
+
]
|
315 |
+
embeddings = model.encode(sentences)
|
316 |
+
print(embeddings.shape)
|
317 |
+
# [3, 1024]
|
318 |
+
|
319 |
+
# Get the similarity scores for the embeddings
|
320 |
+
similarities = model.similarity(embeddings, embeddings)
|
321 |
+
print(similarities.shape)
|
322 |
+
# [3, 3]
|
323 |
+
```
|
324 |
+
|
325 |
+
<!--
|
326 |
+
### Direct Usage (Transformers)
|
327 |
+
|
328 |
+
<details><summary>Click to see the direct usage in Transformers</summary>
|
329 |
+
|
330 |
+
</details>
|
331 |
+
-->
|
332 |
+
|
333 |
+
<!--
|
334 |
+
### Downstream Usage (Sentence Transformers)
|
335 |
+
|
336 |
+
You can finetune this model on your own dataset.
|
337 |
+
|
338 |
+
<details><summary>Click to expand</summary>
|
339 |
+
|
340 |
+
</details>
|
341 |
+
-->
|
342 |
+
|
343 |
+
<!--
|
344 |
+
### Out-of-Scope Use
|
345 |
+
|
346 |
+
*List how the model may foreseeably be misused and address what users ought not to do with the model.*
|
347 |
+
-->
|
348 |
+
|
349 |
+
<!--
|
350 |
+
## Bias, Risks and Limitations
|
351 |
+
|
352 |
+
*What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
|
353 |
+
-->
|
354 |
+
|
355 |
+
<!--
|
356 |
+
### Recommendations
|
357 |
+
|
358 |
+
*What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
|
359 |
+
-->
|
360 |
+
|
361 |
+
## Training Details
|
362 |
+
|
363 |
+
### Training Dataset
|
364 |
+
|
365 |
+
#### Unnamed Dataset
|
366 |
+
|
367 |
+
* Size: 6,433 training samples
|
368 |
+
* Columns: <code>anchor</code> and <code>positive</code>
|
369 |
+
* Approximate statistics based on the first 1000 samples:
|
370 |
+
| | anchor | positive |
|
371 |
+
|:--------|:---------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------|
|
372 |
+
| type | string | string |
|
373 |
+
| details | <ul><li>min: 6 tokens</li><li>mean: 18.1 tokens</li><li>max: 38 tokens</li></ul> | <ul><li>min: 6 tokens</li><li>mean: 182.31 tokens</li><li>max: 1024 tokens</li></ul> |
|
374 |
+
* Samples:
|
375 |
+
| anchor | positive |
|
376 |
+
|:---------------------------------------------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
377 |
+
| <code>What type of insect is Crambus sudanicola</code> | <code>Crambus sudanicola is a moth in the family Crambidae.</code> |
|
378 |
+
| <code>How can you improve storage capacity in standard-height kitchens with unused wall space</code> | <code>If you have standard-height cabinets with unused wall space above, increasing the cabinets by six inches can improve storage capacity. Utilize cutlery dividers in drawers to organize cooking utensils and tools and keep them off counters.</code> |
|
379 |
+
| <code>What new guidelines has the Library Association issued regarding the sale of rare books and manuscripts</code> | <code>The Library Association has issued new guidelines for the sale of rare books and manuscripts by institutions, writes Kam Patel. The move follows an acrimonious dispute at Keele University over the sale of rare mathematics books for Pounds 1 million. The association said that for an institution to have the authority to sell books, its library should first establish that it has a full legal title to the works.</code> |
|
380 |
+
* Loss: <code>selfloss.Infonce</code> with these parameters:
|
381 |
+
```json
|
382 |
+
{
|
383 |
+
"scale": 20.0,
|
384 |
+
"similarity_fct": "cos_sim"
|
385 |
+
}
|
386 |
+
```
|
387 |
+
|
388 |
+
### Evaluation Dataset
|
389 |
+
|
390 |
+
#### Unnamed Dataset
|
391 |
+
|
392 |
+
* Size: 804 evaluation samples
|
393 |
+
* Columns: <code>anchor</code> and <code>positive</code>
|
394 |
+
* Approximate statistics based on the first 804 samples:
|
395 |
+
| | anchor | positive |
|
396 |
+
|:--------|:----------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------|
|
397 |
+
| type | string | string |
|
398 |
+
| details | <ul><li>min: 6 tokens</li><li>mean: 17.78 tokens</li><li>max: 37 tokens</li></ul> | <ul><li>min: 4 tokens</li><li>mean: 190.09 tokens</li><li>max: 1024 tokens</li></ul> |
|
399 |
+
* Samples:
|
400 |
+
| anchor | positive |
|
401 |
+
|:--------------------------------------------------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
402 |
+
| <code>What does the speaker suggest about the relationship between unarmed civilians and the metaphor of eggs and a wall</code> | <code>One way to read the metaphor, he says, is that unarmed civilians are the eggs, while tanks, guns and white phosphorus shells are the wall. But he also offers a more nuanced interpretation:<br>Each of us is, more or less, an egg.</code> |
|
403 |
+
| <code>When did Reed-Rowe retire from the Foreign Service</code> | <code>Reed-Rowe completed that assignment July 26, 2013, and was succeeded by Amy J. Hyatt. Reed-Rowe then joined the United States Army War College as a member of the Command team which focuses on the development of the next generation of military, interagency and international leaders. She officially retired from the Foreign Service in July 2014. During her career, Reed-Rowe earned several Department of State Meritorious Honor Awards, recognition from the Republic of the Marshall Islands, and the Republic of Palau, and the U.S. Army Superior Civilian Service Award. Personal<br>In addition to English, Reed-Rowe speaks Spanish and French. She has two adult children, Nikkia Rowe and Kevin Anthony Rowe. See also<br><br>List of ambassadors of the United States<br><br>References<br><br>External links<br>US Department of State: Ambassadorial Nomination Statement: Helen Reed-Rowe, Ambassador-Designate to Palau, July 21, 2010<br><br>Ambassadors of the United States to Palau<br>African-American diplomats<br>American women ambassadors<br>...</code> |
|
404 |
+
| <code>When will Star Wars: Galaxy's Edge officially open</code> | <code>Star Wars: Galaxy’s Edge may not officially open until the end of June, but for some fans, it could happen even sooner.</code> |
|
405 |
+
* Loss: <code>selfloss.Infonce</code> with these parameters:
|
406 |
+
```json
|
407 |
+
{
|
408 |
+
"scale": 20.0,
|
409 |
+
"similarity_fct": "cos_sim"
|
410 |
+
}
|
411 |
+
```
|
412 |
+
|
413 |
+
### Training Hyperparameters
|
414 |
+
#### Non-Default Hyperparameters
|
415 |
+
|
416 |
+
- `eval_strategy`: steps
|
417 |
+
- `per_device_train_batch_size`: 4
|
418 |
+
- `per_device_eval_batch_size`: 4
|
419 |
+
- `learning_rate`: 2e-05
|
420 |
+
- `num_train_epochs`: 1
|
421 |
+
- `warmup_ratio`: 0.1
|
422 |
+
- `fp16`: True
|
423 |
+
- `batch_sampler`: no_duplicates
|
424 |
+
|
425 |
+
#### All Hyperparameters
|
426 |
+
<details><summary>Click to expand</summary>
|
427 |
+
|
428 |
+
- `overwrite_output_dir`: False
|
429 |
+
- `do_predict`: False
|
430 |
+
- `eval_strategy`: steps
|
431 |
+
- `prediction_loss_only`: True
|
432 |
+
- `per_device_train_batch_size`: 4
|
433 |
+
- `per_device_eval_batch_size`: 4
|
434 |
+
- `per_gpu_train_batch_size`: None
|
435 |
+
- `per_gpu_eval_batch_size`: None
|
436 |
+
- `gradient_accumulation_steps`: 1
|
437 |
+
- `eval_accumulation_steps`: None
|
438 |
+
- `learning_rate`: 2e-05
|
439 |
+
- `weight_decay`: 0.0
|
440 |
+
- `adam_beta1`: 0.9
|
441 |
+
- `adam_beta2`: 0.999
|
442 |
+
- `adam_epsilon`: 1e-08
|
443 |
+
- `max_grad_norm`: 1.0
|
444 |
+
- `num_train_epochs`: 1
|
445 |
+
- `max_steps`: -1
|
446 |
+
- `lr_scheduler_type`: linear
|
447 |
+
- `lr_scheduler_kwargs`: {}
|
448 |
+
- `warmup_ratio`: 0.1
|
449 |
+
- `warmup_steps`: 0
|
450 |
+
- `log_level`: passive
|
451 |
+
- `log_level_replica`: warning
|
452 |
+
- `log_on_each_node`: True
|
453 |
+
- `logging_nan_inf_filter`: True
|
454 |
+
- `save_safetensors`: True
|
455 |
+
- `save_on_each_node`: False
|
456 |
+
- `save_only_model`: False
|
457 |
+
- `restore_callback_states_from_checkpoint`: False
|
458 |
+
- `no_cuda`: False
|
459 |
+
- `use_cpu`: False
|
460 |
+
- `use_mps_device`: False
|
461 |
+
- `seed`: 42
|
462 |
+
- `data_seed`: None
|
463 |
+
- `jit_mode_eval`: False
|
464 |
+
- `use_ipex`: False
|
465 |
+
- `bf16`: False
|
466 |
+
- `fp16`: True
|
467 |
+
- `fp16_opt_level`: O1
|
468 |
+
- `half_precision_backend`: auto
|
469 |
+
- `bf16_full_eval`: False
|
470 |
+
- `fp16_full_eval`: False
|
471 |
+
- `tf32`: None
|
472 |
+
- `local_rank`: 0
|
473 |
+
- `ddp_backend`: None
|
474 |
+
- `tpu_num_cores`: None
|
475 |
+
- `tpu_metrics_debug`: False
|
476 |
+
- `debug`: []
|
477 |
+
- `dataloader_drop_last`: False
|
478 |
+
- `dataloader_num_workers`: 0
|
479 |
+
- `dataloader_prefetch_factor`: None
|
480 |
+
- `past_index`: -1
|
481 |
+
- `disable_tqdm`: False
|
482 |
+
- `remove_unused_columns`: True
|
483 |
+
- `label_names`: None
|
484 |
+
- `load_best_model_at_end`: False
|
485 |
+
- `ignore_data_skip`: False
|
486 |
+
- `fsdp`: []
|
487 |
+
- `fsdp_min_num_params`: 0
|
488 |
+
- `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
|
489 |
+
- `fsdp_transformer_layer_cls_to_wrap`: None
|
490 |
+
- `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
|
491 |
+
- `deepspeed`: None
|
492 |
+
- `label_smoothing_factor`: 0.0
|
493 |
+
- `optim`: adamw_torch
|
494 |
+
- `optim_args`: None
|
495 |
+
- `adafactor`: False
|
496 |
+
- `group_by_length`: False
|
497 |
+
- `length_column_name`: length
|
498 |
+
- `ddp_find_unused_parameters`: None
|
499 |
+
- `ddp_bucket_cap_mb`: None
|
500 |
+
- `ddp_broadcast_buffers`: False
|
501 |
+
- `dataloader_pin_memory`: True
|
502 |
+
- `dataloader_persistent_workers`: False
|
503 |
+
- `skip_memory_metrics`: True
|
504 |
+
- `use_legacy_prediction_loop`: False
|
505 |
+
- `push_to_hub`: False
|
506 |
+
- `resume_from_checkpoint`: None
|
507 |
+
- `hub_model_id`: None
|
508 |
+
- `hub_strategy`: every_save
|
509 |
+
- `hub_private_repo`: False
|
510 |
+
- `hub_always_push`: False
|
511 |
+
- `gradient_checkpointing`: False
|
512 |
+
- `gradient_checkpointing_kwargs`: None
|
513 |
+
- `include_inputs_for_metrics`: False
|
514 |
+
- `eval_do_concat_batches`: True
|
515 |
+
- `fp16_backend`: auto
|
516 |
+
- `push_to_hub_model_id`: None
|
517 |
+
- `push_to_hub_organization`: None
|
518 |
+
- `mp_parameters`:
|
519 |
+
- `auto_find_batch_size`: False
|
520 |
+
- `full_determinism`: False
|
521 |
+
- `torchdynamo`: None
|
522 |
+
- `ray_scope`: last
|
523 |
+
- `ddp_timeout`: 1800
|
524 |
+
- `torch_compile`: False
|
525 |
+
- `torch_compile_backend`: None
|
526 |
+
- `torch_compile_mode`: None
|
527 |
+
- `dispatch_batches`: None
|
528 |
+
- `split_batches`: None
|
529 |
+
- `include_tokens_per_second`: False
|
530 |
+
- `include_num_input_tokens_seen`: False
|
531 |
+
- `neftune_noise_alpha`: None
|
532 |
+
- `optim_target_modules`: None
|
533 |
+
- `batch_eval_metrics`: False
|
534 |
+
- `eval_on_start`: False
|
535 |
+
- `prompts`: None
|
536 |
+
- `batch_sampler`: no_duplicates
|
537 |
+
- `multi_dataset_batch_sampler`: proportional
|
538 |
+
|
539 |
+
</details>
|
540 |
+
|
541 |
+
### Training Logs
|
542 |
+
| Epoch | Step | Training Loss | Validation Loss |
|
543 |
+
|:------:|:----:|:-------------:|:---------------:|
|
544 |
+
| 0.7407 | 100 | 0.2167 | 0.1060 |
|
545 |
+
|
546 |
+
|
547 |
+
### Framework Versions
|
548 |
+
- Python: 3.12.3
|
549 |
+
- Sentence Transformers: 3.4.0
|
550 |
+
- Transformers: 4.42.4
|
551 |
+
- PyTorch: 2.2.0+cu121
|
552 |
+
- Accelerate: 1.3.0
|
553 |
+
- Datasets: 3.2.0
|
554 |
+
- Tokenizers: 0.19.1
|
555 |
+
|
556 |
+
## Citation
|
557 |
+
|
558 |
+
### BibTeX
|
559 |
+
|
560 |
+
#### Sentence Transformers
|
561 |
+
```bibtex
|
562 |
+
@inproceedings{reimers-2019-sentence-bert,
|
563 |
+
title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
|
564 |
+
author = "Reimers, Nils and Gurevych, Iryna",
|
565 |
+
booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
|
566 |
+
month = "11",
|
567 |
+
year = "2019",
|
568 |
+
publisher = "Association for Computational Linguistics",
|
569 |
+
url = "https://arxiv.org/abs/1908.10084",
|
570 |
+
}
|
571 |
+
```
|
572 |
+
|
573 |
+
#### Infonce
|
574 |
+
```bibtex
|
575 |
+
@misc{henderson2017efficient,
|
576 |
+
title={Efficient Natural Language Response Suggestion for Smart Reply},
|
577 |
+
author={Matthew Henderson and Rami Al-Rfou and Brian Strope and Yun-hsuan Sung and Laszlo Lukacs and Ruiqi Guo and Sanjiv Kumar and Balint Miklos and Ray Kurzweil},
|
578 |
+
year={2017},
|
579 |
+
eprint={1705.00652},
|
580 |
+
archivePrefix={arXiv},
|
581 |
+
primaryClass={cs.CL}
|
582 |
+
}
|
583 |
+
```
|
584 |
+
|
585 |
+
<!--
|
586 |
+
## Glossary
|
587 |
+
|
588 |
+
*Clearly define terms in order to be accessible across audiences.*
|
589 |
+
-->
|
590 |
+
|
591 |
+
<!--
|
592 |
+
## Model Card Authors
|
593 |
+
|
594 |
+
*Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
|
595 |
+
-->
|
596 |
+
|
597 |
+
<!--
|
598 |
+
## Model Card Contact
|
599 |
+
|
600 |
+
*Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
|
601 |
+
-->
|
config.json
ADDED
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "BAAI/bge-m3",
|
3 |
+
"architectures": [
|
4 |
+
"XLMRobertaModel"
|
5 |
+
],
|
6 |
+
"attention_probs_dropout_prob": 0.1,
|
7 |
+
"bos_token_id": 0,
|
8 |
+
"classifier_dropout": null,
|
9 |
+
"eos_token_id": 2,
|
10 |
+
"hidden_act": "gelu",
|
11 |
+
"hidden_dropout_prob": 0.1,
|
12 |
+
"hidden_size": 1024,
|
13 |
+
"initializer_range": 0.02,
|
14 |
+
"intermediate_size": 4096,
|
15 |
+
"layer_norm_eps": 1e-05,
|
16 |
+
"max_position_embeddings": 8194,
|
17 |
+
"model_type": "xlm-roberta",
|
18 |
+
"num_attention_heads": 16,
|
19 |
+
"num_hidden_layers": 24,
|
20 |
+
"output_past": true,
|
21 |
+
"pad_token_id": 1,
|
22 |
+
"position_embedding_type": "absolute",
|
23 |
+
"torch_dtype": "float32",
|
24 |
+
"transformers_version": "4.42.4",
|
25 |
+
"type_vocab_size": 1,
|
26 |
+
"use_cache": true,
|
27 |
+
"vocab_size": 250002
|
28 |
+
}
|
config_sentence_transformers.json
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"__version__": {
|
3 |
+
"sentence_transformers": "3.4.0",
|
4 |
+
"transformers": "4.42.4",
|
5 |
+
"pytorch": "2.2.0+cu121"
|
6 |
+
},
|
7 |
+
"prompts": {},
|
8 |
+
"default_prompt_name": null,
|
9 |
+
"similarity_fn_name": "cosine"
|
10 |
+
}
|
model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3786f2b74f3da50e07e35fe361f1b83922678ad08d11c1270add6cd034923eb5
|
3 |
+
size 2271064456
|
modules.json
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
{
|
3 |
+
"idx": 0,
|
4 |
+
"name": "0",
|
5 |
+
"path": "",
|
6 |
+
"type": "sentence_transformers.models.Transformer"
|
7 |
+
},
|
8 |
+
{
|
9 |
+
"idx": 1,
|
10 |
+
"name": "1",
|
11 |
+
"path": "1_Pooling",
|
12 |
+
"type": "sentence_transformers.models.Pooling"
|
13 |
+
},
|
14 |
+
{
|
15 |
+
"idx": 2,
|
16 |
+
"name": "2",
|
17 |
+
"path": "2_Normalize",
|
18 |
+
"type": "sentence_transformers.models.Normalize"
|
19 |
+
}
|
20 |
+
]
|
sentence_bert_config.json
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"max_seq_length": 1024,
|
3 |
+
"do_lower_case": false
|
4 |
+
}
|
special_tokens_map.json
ADDED
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bos_token": {
|
3 |
+
"content": "<s>",
|
4 |
+
"lstrip": false,
|
5 |
+
"normalized": false,
|
6 |
+
"rstrip": false,
|
7 |
+
"single_word": false
|
8 |
+
},
|
9 |
+
"cls_token": {
|
10 |
+
"content": "<s>",
|
11 |
+
"lstrip": false,
|
12 |
+
"normalized": false,
|
13 |
+
"rstrip": false,
|
14 |
+
"single_word": false
|
15 |
+
},
|
16 |
+
"eos_token": {
|
17 |
+
"content": "</s>",
|
18 |
+
"lstrip": false,
|
19 |
+
"normalized": false,
|
20 |
+
"rstrip": false,
|
21 |
+
"single_word": false
|
22 |
+
},
|
23 |
+
"mask_token": {
|
24 |
+
"content": "<mask>",
|
25 |
+
"lstrip": true,
|
26 |
+
"normalized": false,
|
27 |
+
"rstrip": false,
|
28 |
+
"single_word": false
|
29 |
+
},
|
30 |
+
"pad_token": {
|
31 |
+
"content": "<pad>",
|
32 |
+
"lstrip": false,
|
33 |
+
"normalized": false,
|
34 |
+
"rstrip": false,
|
35 |
+
"single_word": false
|
36 |
+
},
|
37 |
+
"sep_token": {
|
38 |
+
"content": "</s>",
|
39 |
+
"lstrip": false,
|
40 |
+
"normalized": false,
|
41 |
+
"rstrip": false,
|
42 |
+
"single_word": false
|
43 |
+
},
|
44 |
+
"unk_token": {
|
45 |
+
"content": "<unk>",
|
46 |
+
"lstrip": false,
|
47 |
+
"normalized": false,
|
48 |
+
"rstrip": false,
|
49 |
+
"single_word": false
|
50 |
+
}
|
51 |
+
}
|
tokenizer.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6e3b8957de04e3a4ed42b1a11381556f9adad8d0d502b9dd071c75f626b28f40
|
3 |
+
size 17083053
|
tokenizer_config.json
ADDED
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"added_tokens_decoder": {
|
3 |
+
"0": {
|
4 |
+
"content": "<s>",
|
5 |
+
"lstrip": false,
|
6 |
+
"normalized": false,
|
7 |
+
"rstrip": false,
|
8 |
+
"single_word": false,
|
9 |
+
"special": true
|
10 |
+
},
|
11 |
+
"1": {
|
12 |
+
"content": "<pad>",
|
13 |
+
"lstrip": false,
|
14 |
+
"normalized": false,
|
15 |
+
"rstrip": false,
|
16 |
+
"single_word": false,
|
17 |
+
"special": true
|
18 |
+
},
|
19 |
+
"2": {
|
20 |
+
"content": "</s>",
|
21 |
+
"lstrip": false,
|
22 |
+
"normalized": false,
|
23 |
+
"rstrip": false,
|
24 |
+
"single_word": false,
|
25 |
+
"special": true
|
26 |
+
},
|
27 |
+
"3": {
|
28 |
+
"content": "<unk>",
|
29 |
+
"lstrip": false,
|
30 |
+
"normalized": false,
|
31 |
+
"rstrip": false,
|
32 |
+
"single_word": false,
|
33 |
+
"special": true
|
34 |
+
},
|
35 |
+
"250001": {
|
36 |
+
"content": "<mask>",
|
37 |
+
"lstrip": true,
|
38 |
+
"normalized": false,
|
39 |
+
"rstrip": false,
|
40 |
+
"single_word": false,
|
41 |
+
"special": true
|
42 |
+
}
|
43 |
+
},
|
44 |
+
"bos_token": "<s>",
|
45 |
+
"clean_up_tokenization_spaces": true,
|
46 |
+
"cls_token": "<s>",
|
47 |
+
"eos_token": "</s>",
|
48 |
+
"mask_token": "<mask>",
|
49 |
+
"model_max_length": 8192,
|
50 |
+
"pad_token": "<pad>",
|
51 |
+
"sep_token": "</s>",
|
52 |
+
"sp_model_kwargs": {},
|
53 |
+
"tokenizer_class": "XLMRobertaTokenizer",
|
54 |
+
"unk_token": "<unk>"
|
55 |
+
}
|