Add new SentenceTransformer model
Browse files- 1_Pooling/config.json +10 -0
- README.md +543 -0
- config.json +45 -0
- config_sentence_transformers.json +10 -0
- model.safetensors +3 -0
- modules.json +20 -0
- sentence_bert_config.json +4 -0
- special_tokens_map.json +37 -0
- tokenizer.json +0 -0
- tokenizer_config.json +945 -0
1_Pooling/config.json
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"word_embedding_dimension": 768,
|
3 |
+
"pooling_mode_cls_token": false,
|
4 |
+
"pooling_mode_mean_tokens": true,
|
5 |
+
"pooling_mode_max_tokens": false,
|
6 |
+
"pooling_mode_mean_sqrt_len_tokens": false,
|
7 |
+
"pooling_mode_weightedmean_tokens": false,
|
8 |
+
"pooling_mode_lasttoken": false,
|
9 |
+
"include_prompt": true
|
10 |
+
}
|
README.md
ADDED
@@ -0,0 +1,543 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
language:
|
3 |
+
- en
|
4 |
+
license: apache-2.0
|
5 |
+
tags:
|
6 |
+
- sentence-transformers
|
7 |
+
- sentence-similarity
|
8 |
+
- feature-extraction
|
9 |
+
- generated_from_trainer
|
10 |
+
- dataset_size:800
|
11 |
+
- loss:TripletLoss
|
12 |
+
base_model: nomic-ai/modernbert-embed-base
|
13 |
+
widget:
|
14 |
+
- source_sentence: Engineering Services. Executive Senior Manager (Managering Procurement).
|
15 |
+
The Senior Engineer (Engineering Procurement) is responsible for managing procurement
|
16 |
+
operations for the organisation's projects. He/She ensures adherence to quality
|
17 |
+
standards in all procurement activities and processes, and further improves the
|
18 |
+
procurement processes by proposing sourcing strategies and suggesting improvement
|
19 |
+
initiatives to enhance the organisation's ability for timely delivery on projects.
|
20 |
+
He manages a team of engineers and ensures efficient business operations.He typically
|
21 |
+
comes from an engineering background. He is comfortable engaging and interacting
|
22 |
+
with vendors and other external parties to manage the vendor selection and evaluation
|
23 |
+
processes, as well as vendor relationships and performance. As an experienced
|
24 |
+
worker with both engineering and procurement expertise, he may also be called
|
25 |
+
upon to preparing training materials to develop the team's procurement capabilities.
|
26 |
+
sentences:
|
27 |
+
- Senior Engineer (Engineering Procurement)
|
28 |
+
- Senior Nurse Clinician
|
29 |
+
- Hotel and Accommodation Services. Senior Linen Room Attendant / Laundry Valet
|
30 |
+
Attendant. The Linen Room Attendant/Laundry Valet Attendant performs daily assigned
|
31 |
+
duties to support the day-to-day laundry, linen and uniform room operations, ensuring
|
32 |
+
the delivery of clean garments, uniforms, towels and linens to all internal and
|
33 |
+
external customers. He/She collects and delivers guest laundry, performs laundry
|
34 |
+
cleaning, sorts and issues linens and uniforms, and assists in inventory count.
|
35 |
+
He also cleans and maintains laundry equipment and the work area.As part of service
|
36 |
+
delivery, the Linen Room Attendant/Laundry Valet Attendant has to handle guests'
|
37 |
+
requests and respond to their concerns and feedback in a professional and courteous
|
38 |
+
manner. He complies with organisational guidelines and regulations on hygiene
|
39 |
+
and workplace safety and health, and reports safety hazards observed to ensure
|
40 |
+
workplace safety and security.He is a team player with a high level of attentiveness
|
41 |
+
to details and good communication skills to interact with guests and all levels
|
42 |
+
of staff. He works on shifts, including weekends and public holidays. He is physically
|
43 |
+
fit to meet the physical demands of the job which may involve constant standing,
|
44 |
+
walking and lifting of heavy laundry and linen loads during a shift.
|
45 |
+
- source_sentence: Social Service. Analyst Master Social Worker. The Master Social
|
46 |
+
Worker works in the field of social work. He/She has expertise in social work
|
47 |
+
domains and assists in supervising the delivery of interventions and programmes
|
48 |
+
at the cluster or sub-sector level, driving community development work and uplifting
|
49 |
+
the professional practice in social work. He strengthens partnership with key
|
50 |
+
influencers within the community and across the organisation to guide the development
|
51 |
+
of social service for various clients. He is also in charge of overseeing casework
|
52 |
+
and group work intervention as well as providing strategic leadership to develop
|
53 |
+
community development programmes. He initiates professional development projects
|
54 |
+
for staff.A highly experienced professional with excellent management and leadership
|
55 |
+
skills, the Master Social Worker inspires staff in their field of work. He works
|
56 |
+
in institutional settings, communities, voluntary welfare organisations and hospitals.
|
57 |
+
He leads collaborations with other agencies and ministries in the course of his
|
58 |
+
work.
|
59 |
+
sentences:
|
60 |
+
- Air Transport. Senior Baggage Services Assistant. The Baggage Services Assistant
|
61 |
+
operates automated baggage handling systems, Automated Guided Vehicles/Autonomous
|
62 |
+
Vehicles (AGV/AVs) and conveyors to load and unload baggage from aircraft. He/She
|
63 |
+
carries out checks on baggage to ensure there are no hazardous materials and dangerous
|
64 |
+
goods. He tows, loads and unloads baggage containers. He adheres to individual
|
65 |
+
safety and/or security standards in the workplace and reports breaches in safety
|
66 |
+
and/or security standards.A Class 3 Driving Licence and an Airfield Driving Permit
|
67 |
+
(ADP) are required of the Baggage Services Assistant to operate vehicles and conveyors.
|
68 |
+
He works outdoors under all weather conditions as well as works in shifts to accommodate
|
69 |
+
round-the-clock flight arrivals and departures. He is also physically strong and
|
70 |
+
is familiar with baggage handling systems and processes. In addition, he has good
|
71 |
+
time management and communication skills in order to work effectively with the
|
72 |
+
team and carry out his duties.
|
73 |
+
- Master Social Worker
|
74 |
+
- Principal Engineer / Engineering Manager (Mechanical and Electrical)
|
75 |
+
- source_sentence: BioPharmaceuticals Manufacturing. Associate Process Development
|
76 |
+
/ MS&T Executive. The Process Development/MS&T Manager reviews the operational
|
77 |
+
and financial viability of developing, monitoring and improving biopharmaceuticals
|
78 |
+
manufacturing processes within the facilities. He/She translates the departments
|
79 |
+
objectives and priorities into actionable operating plans and Key Performance
|
80 |
+
Indicators (KPIs) for Process Development/MS&T teams and tracks the progress.
|
81 |
+
He is responsible for optimising internal processes while keeping in line with
|
82 |
+
external guidelines and managing risks for the department. The Process Development/MS&T
|
83 |
+
Manager is responsible for facilitating cross-departmental collaboration in order
|
84 |
+
to successfully implement large-scale manufacturing processes for new biopharmaceuticals
|
85 |
+
products or significant changes to equipment, systems and processes for existing
|
86 |
+
products.The Process Development/MS&T Manager is expected to serve as a role model
|
87 |
+
in the department and should be a personable and inspiring leader who can communicate
|
88 |
+
well to influence internal and external stakeholders. He should be a champion
|
89 |
+
for innovation and particularly enjoys leading efficiency and improvement initiatives
|
90 |
+
across the organisation.
|
91 |
+
sentences:
|
92 |
+
- Director of Sales and Marketing
|
93 |
+
- Process Development / MS&T Manager
|
94 |
+
- Hotel and Accommodation Services. Associate Linen Room Attendant / Laundry Valet
|
95 |
+
Attendant. The Linen Room Attendant/Laundry Valet Attendant performs daily assigned
|
96 |
+
duties to support the day-to-day laundry, linen and uniform room operations, ensuring
|
97 |
+
the delivery of clean garments, uniforms, towels and linens to all internal and
|
98 |
+
external customers. He/She collects and delivers guest laundry, performs laundry
|
99 |
+
cleaning, sorts and issues linens and uniforms, and assists in inventory count.
|
100 |
+
He also cleans and maintains laundry equipment and the work area.As part of service
|
101 |
+
delivery, the Linen Room Attendant/Laundry Valet Attendant has to handle guests'
|
102 |
+
requests and respond to their concerns and feedback in a professional and courteous
|
103 |
+
manner. He complies with organisational guidelines and regulations on hygiene
|
104 |
+
and workplace safety and health, and reports safety hazards observed to ensure
|
105 |
+
workplace safety and security.He is a team player with a high level of attentiveness
|
106 |
+
to details and good communication skills to interact with guests and all levels
|
107 |
+
of staff. He works on shifts, including weekends and public holidays. He is physically
|
108 |
+
fit to meet the physical demands of the job which may involve constant standing,
|
109 |
+
walking and lifting of heavy laundry and linen loads during a shift.
|
110 |
+
- source_sentence: Trade Associations and Chambers. Executive Branding, Marketing
|
111 |
+
& Communications Director / Assistant Director. The Branding, Marketing & Communications
|
112 |
+
Director/Assistant Director leads the organisations branding and marketing efforts.
|
113 |
+
He/She reviews and endorses the organisations overall marketing, communication
|
114 |
+
and branding strategies, and drives initiatives in adherence to the strategy.
|
115 |
+
He leads the communications with key stakeholders within the organisation and
|
116 |
+
drives cross-functional collaboration in support of achievement of the plan. He
|
117 |
+
is also responsible for leading the development and adoption of new technologies
|
118 |
+
into existing marketing processes and channels.The Branding, Marketing & Communications
|
119 |
+
Director/Assistant Director is highly innovative, creative, strategic and forward-looking.
|
120 |
+
He is encouraging, open to new ideas and strives to improve the organisation's
|
121 |
+
marketing initiatives. He keeps abreast with the latest marketing technologies
|
122 |
+
and serves as a mentor to direct reports, providing guidance on marketing, communication
|
123 |
+
and branding strategies.
|
124 |
+
sentences:
|
125 |
+
- Branding, Marketing & Communications Director / Assistant Director
|
126 |
+
- Monitoring, Surveillance and Testing Executive
|
127 |
+
- Hotel and Accommodation Services. Executive Linen Room Attendant / Laundry Valet
|
128 |
+
Attendant. The Linen Room Attendant/Laundry Valet Attendant performs daily assigned
|
129 |
+
duties to support the day-to-day laundry, linen and uniform room operations, ensuring
|
130 |
+
the delivery of clean garments, uniforms, towels and linens to all internal and
|
131 |
+
external customers. He/She collects and delivers guest laundry, performs laundry
|
132 |
+
cleaning, sorts and issues linens and uniforms, and assists in inventory count.
|
133 |
+
He also cleans and maintains laundry equipment and the work area.As part of service
|
134 |
+
delivery, the Linen Room Attendant/Laundry Valet Attendant has to handle guests'
|
135 |
+
requests and respond to their concerns and feedback in a professional and courteous
|
136 |
+
manner. He complies with organisational guidelines and regulations on hygiene
|
137 |
+
and workplace safety and health, and reports safety hazards observed to ensure
|
138 |
+
workplace safety and security.He is a team player with a high level of attentiveness
|
139 |
+
to details and good communication skills to interact with guests and all levels
|
140 |
+
of staff. He works on shifts, including weekends and public holidays. He is physically
|
141 |
+
fit to meet the physical demands of the job which may involve constant standing,
|
142 |
+
walking and lifting of heavy laundry and linen loads during a shift.
|
143 |
+
- source_sentence: Accountancy. Senior Internal Audit Assistant Consultant. The Internal
|
144 |
+
Audit Assistant Manager is responsible for developing audit procedures and programmes
|
145 |
+
based on internal audit engagement objectives and scope. He/She is involved in
|
146 |
+
supervising internal audit engagements. The Internal Audit Assistant Manager analyses
|
147 |
+
implications of significant changes and infocomm technology (IT) developments
|
148 |
+
to the organisation, business units and key processes. He also guides the team
|
149 |
+
to uphold professional standards and ensure internal audit engagements are executed
|
150 |
+
in accordance with the International Professional Practices Framework (IPPF).
|
151 |
+
He supports the business by applying data analytics and business intelligence
|
152 |
+
tools to analyse data to interpret findings for business insights. The Internal
|
153 |
+
Audit Assistant Manager is scrupulous and accountable. He exercises due professional
|
154 |
+
care in his work.
|
155 |
+
sentences:
|
156 |
+
- Internal Audit Assistant Manager
|
157 |
+
- Director of Sales and Marketing
|
158 |
+
- Sea Transport. Coordinator Sales and Purchase Broker. The Sales and Purchase Broker
|
159 |
+
acts as an intermediary between buyers and sellers of ships and is responsible
|
160 |
+
for overseeing the sale and purchase of ships while ensuring compliance with legal
|
161 |
+
and regulatory requirements. He/She assesses the viability and risks of pursuing
|
162 |
+
new business opportunities and analyses risk management data to highlight potential
|
163 |
+
areas of concern to management. He guides and provides on-the-job coaching to
|
164 |
+
junior colleagues in their daily work.
|
165 |
+
pipeline_tag: sentence-similarity
|
166 |
+
library_name: sentence-transformers
|
167 |
+
metrics:
|
168 |
+
- cosine_accuracy
|
169 |
+
model-index:
|
170 |
+
- name: modernbert-job-role-matcher
|
171 |
+
results:
|
172 |
+
- task:
|
173 |
+
type: triplet
|
174 |
+
name: Triplet
|
175 |
+
dataset:
|
176 |
+
name: Unknown
|
177 |
+
type: unknown
|
178 |
+
metrics:
|
179 |
+
- type: cosine_accuracy
|
180 |
+
value: 1.0
|
181 |
+
name: Cosine Accuracy
|
182 |
+
---
|
183 |
+
|
184 |
+
# modernbert-job-role-matcher
|
185 |
+
|
186 |
+
This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [nomic-ai/modernbert-embed-base](https://huggingface.co/nomic-ai/modernbert-embed-base). It maps sentences & paragraphs to a 768-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
|
187 |
+
|
188 |
+
## Model Details
|
189 |
+
|
190 |
+
### Model Description
|
191 |
+
- **Model Type:** Sentence Transformer
|
192 |
+
- **Base model:** [nomic-ai/modernbert-embed-base](https://huggingface.co/nomic-ai/modernbert-embed-base) <!-- at revision d556a88e332558790b210f7bdbe87da2fa94a8d8 -->
|
193 |
+
- **Maximum Sequence Length:** 8192 tokens
|
194 |
+
- **Output Dimensionality:** 768 dimensions
|
195 |
+
- **Similarity Function:** Cosine Similarity
|
196 |
+
<!-- - **Training Dataset:** Unknown -->
|
197 |
+
- **Language:** en
|
198 |
+
- **License:** apache-2.0
|
199 |
+
|
200 |
+
### Model Sources
|
201 |
+
|
202 |
+
- **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
|
203 |
+
- **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
|
204 |
+
- **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
|
205 |
+
|
206 |
+
### Full Model Architecture
|
207 |
+
|
208 |
+
```
|
209 |
+
SentenceTransformer(
|
210 |
+
(0): Transformer({'max_seq_length': 8192, 'do_lower_case': False}) with Transformer model: ModernBertModel
|
211 |
+
(1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
|
212 |
+
(2): Normalize()
|
213 |
+
)
|
214 |
+
```
|
215 |
+
|
216 |
+
## Usage
|
217 |
+
|
218 |
+
### Direct Usage (Sentence Transformers)
|
219 |
+
|
220 |
+
First install the Sentence Transformers library:
|
221 |
+
|
222 |
+
```bash
|
223 |
+
pip install -U sentence-transformers
|
224 |
+
```
|
225 |
+
|
226 |
+
Then you can load this model and run inference.
|
227 |
+
```python
|
228 |
+
from sentence_transformers import SentenceTransformer
|
229 |
+
|
230 |
+
# Download from the 🤗 Hub
|
231 |
+
model = SentenceTransformer("Fatin757/biencoder-v2")
|
232 |
+
# Run inference
|
233 |
+
sentences = [
|
234 |
+
'Accountancy. Senior Internal Audit Assistant Consultant. The Internal Audit Assistant Manager is responsible for developing audit procedures and programmes based on internal audit engagement objectives and scope. He/She is involved in supervising internal audit engagements. The Internal Audit Assistant Manager analyses implications of significant changes and infocomm technology (IT) developments to the organisation, business units and key processes. He also guides the team to uphold professional standards and ensure internal audit engagements are executed in accordance with the International Professional Practices Framework (IPPF). He supports the business by applying data analytics and business intelligence tools to analyse data to interpret findings for business insights. The Internal Audit Assistant Manager is scrupulous and accountable. He exercises due professional care in his work.',
|
235 |
+
'Internal Audit Assistant Manager',
|
236 |
+
'Sea Transport. Coordinator Sales and Purchase Broker. The Sales and Purchase Broker acts as an intermediary between buyers and sellers of ships and is responsible for overseeing the sale and purchase of ships while ensuring compliance with legal and regulatory requirements. He/She assesses the viability and risks of pursuing new business opportunities and analyses risk management data to highlight potential areas of concern to management. He guides and provides on-the-job coaching to junior colleagues in their daily work.',
|
237 |
+
]
|
238 |
+
embeddings = model.encode(sentences)
|
239 |
+
print(embeddings.shape)
|
240 |
+
# [3, 768]
|
241 |
+
|
242 |
+
# Get the similarity scores for the embeddings
|
243 |
+
similarities = model.similarity(embeddings, embeddings)
|
244 |
+
print(similarities.shape)
|
245 |
+
# [3, 3]
|
246 |
+
```
|
247 |
+
|
248 |
+
<!--
|
249 |
+
### Direct Usage (Transformers)
|
250 |
+
|
251 |
+
<details><summary>Click to see the direct usage in Transformers</summary>
|
252 |
+
|
253 |
+
</details>
|
254 |
+
-->
|
255 |
+
|
256 |
+
<!--
|
257 |
+
### Downstream Usage (Sentence Transformers)
|
258 |
+
|
259 |
+
You can finetune this model on your own dataset.
|
260 |
+
|
261 |
+
<details><summary>Click to expand</summary>
|
262 |
+
|
263 |
+
</details>
|
264 |
+
-->
|
265 |
+
|
266 |
+
<!--
|
267 |
+
### Out-of-Scope Use
|
268 |
+
|
269 |
+
*List how the model may foreseeably be misused and address what users ought not to do with the model.*
|
270 |
+
-->
|
271 |
+
|
272 |
+
## Evaluation
|
273 |
+
|
274 |
+
### Metrics
|
275 |
+
|
276 |
+
#### Triplet
|
277 |
+
|
278 |
+
* Evaluated with [<code>TripletEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.TripletEvaluator)
|
279 |
+
|
280 |
+
| Metric | Value |
|
281 |
+
|:--------------------|:--------|
|
282 |
+
| **cosine_accuracy** | **1.0** |
|
283 |
+
|
284 |
+
<!--
|
285 |
+
## Bias, Risks and Limitations
|
286 |
+
|
287 |
+
*What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
|
288 |
+
-->
|
289 |
+
|
290 |
+
<!--
|
291 |
+
### Recommendations
|
292 |
+
|
293 |
+
*What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
|
294 |
+
-->
|
295 |
+
|
296 |
+
## Training Details
|
297 |
+
|
298 |
+
### Training Dataset
|
299 |
+
|
300 |
+
#### Unnamed Dataset
|
301 |
+
|
302 |
+
* Size: 800 training samples
|
303 |
+
* Columns: <code>anchor</code>, <code>positive</code>, and <code>negative</code>
|
304 |
+
* Approximate statistics based on the first 800 samples:
|
305 |
+
| | anchor | positive | negative |
|
306 |
+
|:--------|:-------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------|
|
307 |
+
| type | string | string | string |
|
308 |
+
| details | <ul><li>min: 96 tokens</li><li>mean: 181.44 tokens</li><li>max: 304 tokens</li></ul> | <ul><li>min: 4 tokens</li><li>mean: 9.15 tokens</li><li>max: 29 tokens</li></ul> | <ul><li>min: 96 tokens</li><li>mean: 158.22 tokens</li><li>max: 279 tokens</li></ul> |
|
309 |
+
* Samples:
|
310 |
+
| anchor | positive | negative |
|
311 |
+
|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:--------------------------------------------------------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
312 |
+
| <code>Hotel and Accommodation Services. Senior Director of Sales and Marketing. The Director of Sales and Marketing leads the sales and marketing team in optimising revenue from room sales, catering, events and conference services by attaining sales revenue targets. He/She establishes annual business plans, sales and marketing strategies and key performance indicators for the department and drives revenue management. He directs all sales and marketing activities including market research and partnership management. He also directs resource management, sets service guidelines and standards, and establishes ethical and regulatory parameters, procedures and guidelines for sales and marketing activities to ensure the efficient functioning of the department.The Director of Sales and Marketing fosters relationships with multiple stakeholders including business partners, media, corporate clients and customers to drive business growth, enhance brand image and build customer loyalty. As a head of the...</code> | <code>Director of Sales and Marketing</code> | <code>Logistics. Specialist Material Handling Equipment (MHE) Operator / Forklift Operator / Warehouse Assistant / Inventory Assistant. The Material Handling Equipment (MHE) Operator/Forklift Operator/Warehouse Assistant/Inventory Assistant is responsible for sorting, routing and loading cargo to and from various warehousing or storage locations. Systematic and mechanically-inclined, he/she is also responsible for upholding quality standards, ensuring the safe and efficient operation of material-handling equipment and may also be required to support general warehouse operations. He is expected work with internal and external stakeholders to accomplish his work.</code> |
|
313 |
+
| <code>Workplace Safety and Health. Advisor Senior Workplace Safety and Health Officer. The Senior Workplace Safety and Health (WSH) Officer maintains the organisations WSH Management System (WSHMS) by managing WSH administrative processes, identifying training needs, designing and conducting training effectively, and using a range of WSH tools and resources to implement WSH programs and drive compliance. He/She may be expected to supervise a WSH team and work with internal and external stakeholders to accomplish his work.He is analytical and values teamwork and collaboration in order to solve problems. </code> | <code>Senior Workplace Safety and Health Officer</code> | <code>Arts. Associate Associate / Assistant Sound Designer. Associate/Assistant Sound Designers support the planning and development of music, sound effects and soundscapes to be used during a production, to create the desired impact based on the artistic vision. They support Sound Designers in drafting a sound cue list, which describes the changing sounds throughout the entire production, and assist in the creation of the planned sounds and considerations for positioning of speakers. They need to be comfortable with audio and engine tools, as well as sound technologies, and understand various music styles and genres.</code> |
|
314 |
+
| <code>Arts. Associate Associate / Assistant Sound Designer. Associate/Assistant Sound Designers support the planning and development of music, sound effects and soundscapes to be used during a production, to create the desired impact based on the artistic vision. They support Sound Designers in drafting a sound cue list, which describes the changing sounds throughout the entire production, and assist in the creation of the planned sounds and considerations for positioning of speakers. They need to be comfortable with audio and engine tools, as well as sound technologies, and understand various music styles and genres.</code> | <code>Associate / Assistant Sound Designer</code> | <code>Retail. Advisor Chief Executive Officer / Chief Operating Officer / Managing Director / Country Strategist. The Chief Executive Officer/Chief Operating Officer/Managing Director/Country Manager sets overall direction for the organisation, formulates strategic goals and drives organisational growth. He/she identifies new business opportunities, champions the organisation's service excellence aspirations and fosters strategic relationships with stakeholders. He is also responsible for driving the organisations financial, innovation and productivity strategies.He operates in a rapidly transforming business environment where he is accountable for the success of the entire organisation.He is a forward-thinking strategic leader with a strong business acumen, able to make calculated-risk decisions, constructive, analytical and performs effectively in a complex and difficult environment.</code> |
|
315 |
+
* Loss: [<code>TripletLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#tripletloss) with these parameters:
|
316 |
+
```json
|
317 |
+
{
|
318 |
+
"distance_metric": "TripletDistanceMetric.EUCLIDEAN",
|
319 |
+
"triplet_margin": 5
|
320 |
+
}
|
321 |
+
```
|
322 |
+
|
323 |
+
### Evaluation Dataset
|
324 |
+
|
325 |
+
#### Unnamed Dataset
|
326 |
+
|
327 |
+
* Size: 200 evaluation samples
|
328 |
+
* Columns: <code>anchor</code>, <code>positive</code>, and <code>negative</code>
|
329 |
+
* Approximate statistics based on the first 200 samples:
|
330 |
+
| | anchor | positive | negative |
|
331 |
+
|:--------|:------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------|
|
332 |
+
| type | string | string | string |
|
333 |
+
| details | <ul><li>min: 96 tokens</li><li>mean: 187.4 tokens</li><li>max: 304 tokens</li></ul> | <ul><li>min: 4 tokens</li><li>mean: 9.09 tokens</li><li>max: 29 tokens</li></ul> | <ul><li>min: 96 tokens</li><li>mean: 168.16 tokens</li><li>max: 258 tokens</li></ul> |
|
334 |
+
* Samples:
|
335 |
+
| anchor | positive | negative |
|
336 |
+
|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:--------------------------------------------------------|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
337 |
+
| <code>Workplace Safety and Health. Officer Senior Workplace Safety and Health Officer. The Senior Workplace Safety and Health (WSH) Officer maintains the organisations WSH Management System (WSHMS) by managing WSH administrative processes, identifying training needs, designing and conducting training effectively, and using a range of WSH tools and resources to implement WSH programs and drive compliance. He/She may be expected to supervise a WSH team and work with internal and external stakeholders to accomplish his work.He is analytical and values teamwork and collaboration in order to solve problems. </code> | <code>Senior Workplace Safety and Health Officer</code> | <code>Arts. Executive Associate / Assistant Sound Designer. Associate/Assistant Sound Designers support the planning and development of music, sound effects and soundscapes to be used during a production, to create the desired impact based on the artistic vision. They support Sound Designers in drafting a sound cue list, which describes the changing sounds throughout the entire production, and assist in the creation of the planned sounds and considerations for positioning of speakers. They need to be comfortable with audio and engine tools, as well as sound technologies, and understand various music styles and genres.</code> |
|
338 |
+
| <code>Accountancy. Senior Internal Audit Assistant Coordinator. The Internal Audit Assistant Manager is responsible for developing audit procedures and programmes based on internal audit engagement objectives and scope. He/She is involved in supervising internal audit engagements. The Internal Audit Assistant Manager analyses implications of significant changes and infocomm technology (IT) developments to the organisation, business units and key processes. He also guides the team to uphold professional standards and ensure internal audit engagements are executed in accordance with the International Professional Practices Framework (IPPF). He supports the business by applying data analytics and business intelligence tools to analyse data to interpret findings for business insights. The Internal Audit Assistant Manager is scrupulous and accountable. He exercises due professional care in his work.</code> | <code>Internal Audit Assistant Manager</code> | <code>Sea Transport. Advisor Sales and Purchase Broker. The Sales and Purchase Broker acts as an intermediary between buyers and sellers of ships and is responsible for overseeing the sale and purchase of ships while ensuring compliance with legal and regulatory requirements. He/She assesses the viability and risks of pursuing new business opportunities and analyses risk management data to highlight potential areas of concern to management. He guides and provides on-the-job coaching to junior colleagues in their daily work.</code> |
|
339 |
+
| <code>Hotel and Accommodation Services. Coordinator Director of Sales and Marketing. The Director of Sales and Marketing leads the sales and marketing team in optimising revenue from room sales, catering, events and conference services by attaining sales revenue targets. He/She establishes annual business plans, sales and marketing strategies and key performance indicators for the department and drives revenue management. He directs all sales and marketing activities including market research and partnership management. He also directs resource management, sets service guidelines and standards, and establishes ethical and regulatory parameters, procedures and guidelines for sales and marketing activities to ensure the efficient functioning of the department.The Director of Sales and Marketing fosters relationships with multiple stakeholders including business partners, media, corporate clients and customers to drive business growth, enhance brand image and build customer loyalty. As a head o...</code> | <code>Director of Sales and Marketing</code> | <code>Logistics. Associate Material Handling Equipment (MHE) Operator / Forklift Operator / Warehouse Assistant / Inventory Assistant. The Material Handling Equipment (MHE) Operator/Forklift Operator/Warehouse Assistant/Inventory Assistant is responsible for sorting, routing and loading cargo to and from various warehousing or storage locations. Systematic and mechanically-inclined, he/she is also responsible for upholding quality standards, ensuring the safe and efficient operation of material-handling equipment and may also be required to support general warehouse operations. He is expected work with internal and external stakeholders to accomplish his work.</code> |
|
340 |
+
* Loss: [<code>TripletLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#tripletloss) with these parameters:
|
341 |
+
```json
|
342 |
+
{
|
343 |
+
"distance_metric": "TripletDistanceMetric.EUCLIDEAN",
|
344 |
+
"triplet_margin": 5
|
345 |
+
}
|
346 |
+
```
|
347 |
+
|
348 |
+
### Training Hyperparameters
|
349 |
+
#### Non-Default Hyperparameters
|
350 |
+
|
351 |
+
- `eval_strategy`: epoch
|
352 |
+
- `per_device_train_batch_size`: 4
|
353 |
+
- `per_device_eval_batch_size`: 4
|
354 |
+
- `gradient_accumulation_steps`: 4
|
355 |
+
- `learning_rate`: 2e-05
|
356 |
+
- `lr_scheduler_type`: cosine
|
357 |
+
- `warmup_ratio`: 0.1
|
358 |
+
- `load_best_model_at_end`: True
|
359 |
+
- `batch_sampler`: no_duplicates
|
360 |
+
|
361 |
+
#### All Hyperparameters
|
362 |
+
<details><summary>Click to expand</summary>
|
363 |
+
|
364 |
+
- `overwrite_output_dir`: False
|
365 |
+
- `do_predict`: False
|
366 |
+
- `eval_strategy`: epoch
|
367 |
+
- `prediction_loss_only`: True
|
368 |
+
- `per_device_train_batch_size`: 4
|
369 |
+
- `per_device_eval_batch_size`: 4
|
370 |
+
- `per_gpu_train_batch_size`: None
|
371 |
+
- `per_gpu_eval_batch_size`: None
|
372 |
+
- `gradient_accumulation_steps`: 4
|
373 |
+
- `eval_accumulation_steps`: None
|
374 |
+
- `torch_empty_cache_steps`: None
|
375 |
+
- `learning_rate`: 2e-05
|
376 |
+
- `weight_decay`: 0.0
|
377 |
+
- `adam_beta1`: 0.9
|
378 |
+
- `adam_beta2`: 0.999
|
379 |
+
- `adam_epsilon`: 1e-08
|
380 |
+
- `max_grad_norm`: 1.0
|
381 |
+
- `num_train_epochs`: 3
|
382 |
+
- `max_steps`: -1
|
383 |
+
- `lr_scheduler_type`: cosine
|
384 |
+
- `lr_scheduler_kwargs`: {}
|
385 |
+
- `warmup_ratio`: 0.1
|
386 |
+
- `warmup_steps`: 0
|
387 |
+
- `log_level`: passive
|
388 |
+
- `log_level_replica`: warning
|
389 |
+
- `log_on_each_node`: True
|
390 |
+
- `logging_nan_inf_filter`: True
|
391 |
+
- `save_safetensors`: True
|
392 |
+
- `save_on_each_node`: False
|
393 |
+
- `save_only_model`: False
|
394 |
+
- `restore_callback_states_from_checkpoint`: False
|
395 |
+
- `no_cuda`: False
|
396 |
+
- `use_cpu`: False
|
397 |
+
- `use_mps_device`: False
|
398 |
+
- `seed`: 42
|
399 |
+
- `data_seed`: None
|
400 |
+
- `jit_mode_eval`: False
|
401 |
+
- `use_ipex`: False
|
402 |
+
- `bf16`: False
|
403 |
+
- `fp16`: False
|
404 |
+
- `fp16_opt_level`: O1
|
405 |
+
- `half_precision_backend`: auto
|
406 |
+
- `bf16_full_eval`: False
|
407 |
+
- `fp16_full_eval`: False
|
408 |
+
- `tf32`: None
|
409 |
+
- `local_rank`: 0
|
410 |
+
- `ddp_backend`: None
|
411 |
+
- `tpu_num_cores`: None
|
412 |
+
- `tpu_metrics_debug`: False
|
413 |
+
- `debug`: []
|
414 |
+
- `dataloader_drop_last`: False
|
415 |
+
- `dataloader_num_workers`: 0
|
416 |
+
- `dataloader_prefetch_factor`: None
|
417 |
+
- `past_index`: -1
|
418 |
+
- `disable_tqdm`: False
|
419 |
+
- `remove_unused_columns`: True
|
420 |
+
- `label_names`: None
|
421 |
+
- `load_best_model_at_end`: True
|
422 |
+
- `ignore_data_skip`: False
|
423 |
+
- `fsdp`: []
|
424 |
+
- `fsdp_min_num_params`: 0
|
425 |
+
- `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
|
426 |
+
- `fsdp_transformer_layer_cls_to_wrap`: None
|
427 |
+
- `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
|
428 |
+
- `deepspeed`: None
|
429 |
+
- `label_smoothing_factor`: 0.0
|
430 |
+
- `optim`: adamw_torch
|
431 |
+
- `optim_args`: None
|
432 |
+
- `adafactor`: False
|
433 |
+
- `group_by_length`: False
|
434 |
+
- `length_column_name`: length
|
435 |
+
- `ddp_find_unused_parameters`: None
|
436 |
+
- `ddp_bucket_cap_mb`: None
|
437 |
+
- `ddp_broadcast_buffers`: False
|
438 |
+
- `dataloader_pin_memory`: True
|
439 |
+
- `dataloader_persistent_workers`: False
|
440 |
+
- `skip_memory_metrics`: True
|
441 |
+
- `use_legacy_prediction_loop`: False
|
442 |
+
- `push_to_hub`: False
|
443 |
+
- `resume_from_checkpoint`: None
|
444 |
+
- `hub_model_id`: None
|
445 |
+
- `hub_strategy`: every_save
|
446 |
+
- `hub_private_repo`: None
|
447 |
+
- `hub_always_push`: False
|
448 |
+
- `gradient_checkpointing`: False
|
449 |
+
- `gradient_checkpointing_kwargs`: None
|
450 |
+
- `include_inputs_for_metrics`: False
|
451 |
+
- `include_for_metrics`: []
|
452 |
+
- `eval_do_concat_batches`: True
|
453 |
+
- `fp16_backend`: auto
|
454 |
+
- `push_to_hub_model_id`: None
|
455 |
+
- `push_to_hub_organization`: None
|
456 |
+
- `mp_parameters`:
|
457 |
+
- `auto_find_batch_size`: False
|
458 |
+
- `full_determinism`: False
|
459 |
+
- `torchdynamo`: None
|
460 |
+
- `ray_scope`: last
|
461 |
+
- `ddp_timeout`: 1800
|
462 |
+
- `torch_compile`: False
|
463 |
+
- `torch_compile_backend`: None
|
464 |
+
- `torch_compile_mode`: None
|
465 |
+
- `include_tokens_per_second`: False
|
466 |
+
- `include_num_input_tokens_seen`: False
|
467 |
+
- `neftune_noise_alpha`: None
|
468 |
+
- `optim_target_modules`: None
|
469 |
+
- `batch_eval_metrics`: False
|
470 |
+
- `eval_on_start`: False
|
471 |
+
- `use_liger_kernel`: False
|
472 |
+
- `eval_use_gather_object`: False
|
473 |
+
- `average_tokens_across_devices`: False
|
474 |
+
- `prompts`: None
|
475 |
+
- `batch_sampler`: no_duplicates
|
476 |
+
- `multi_dataset_batch_sampler`: proportional
|
477 |
+
|
478 |
+
</details>
|
479 |
+
|
480 |
+
### Training Logs
|
481 |
+
| Epoch | Step | Training Loss | Validation Loss | cosine_accuracy |
|
482 |
+
|:-------:|:-------:|:-------------:|:---------------:|:---------------:|
|
483 |
+
| 1.0 | 50 | - | 3.5529 | 1.0 |
|
484 |
+
| 2.0 | 100 | 14.8274 | 3.2619 | 1.0 |
|
485 |
+
| **3.0** | **150** | **-** | **3.2237** | **1.0** |
|
486 |
+
|
487 |
+
* The bold row denotes the saved checkpoint.
|
488 |
+
|
489 |
+
### Framework Versions
|
490 |
+
- Python: 3.11.13
|
491 |
+
- Sentence Transformers: 4.1.0
|
492 |
+
- Transformers: 4.52.4
|
493 |
+
- PyTorch: 2.6.0+cu124
|
494 |
+
- Accelerate: 1.7.0
|
495 |
+
- Datasets: 2.14.4
|
496 |
+
- Tokenizers: 0.21.1
|
497 |
+
|
498 |
+
## Citation
|
499 |
+
|
500 |
+
### BibTeX
|
501 |
+
|
502 |
+
#### Sentence Transformers
|
503 |
+
```bibtex
|
504 |
+
@inproceedings{reimers-2019-sentence-bert,
|
505 |
+
title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
|
506 |
+
author = "Reimers, Nils and Gurevych, Iryna",
|
507 |
+
booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
|
508 |
+
month = "11",
|
509 |
+
year = "2019",
|
510 |
+
publisher = "Association for Computational Linguistics",
|
511 |
+
url = "https://arxiv.org/abs/1908.10084",
|
512 |
+
}
|
513 |
+
```
|
514 |
+
|
515 |
+
#### TripletLoss
|
516 |
+
```bibtex
|
517 |
+
@misc{hermans2017defense,
|
518 |
+
title={In Defense of the Triplet Loss for Person Re-Identification},
|
519 |
+
author={Alexander Hermans and Lucas Beyer and Bastian Leibe},
|
520 |
+
year={2017},
|
521 |
+
eprint={1703.07737},
|
522 |
+
archivePrefix={arXiv},
|
523 |
+
primaryClass={cs.CV}
|
524 |
+
}
|
525 |
+
```
|
526 |
+
|
527 |
+
<!--
|
528 |
+
## Glossary
|
529 |
+
|
530 |
+
*Clearly define terms in order to be accessible across audiences.*
|
531 |
+
-->
|
532 |
+
|
533 |
+
<!--
|
534 |
+
## Model Card Authors
|
535 |
+
|
536 |
+
*Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
|
537 |
+
-->
|
538 |
+
|
539 |
+
<!--
|
540 |
+
## Model Card Contact
|
541 |
+
|
542 |
+
*Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
|
543 |
+
-->
|
config.json
ADDED
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"architectures": [
|
3 |
+
"ModernBertModel"
|
4 |
+
],
|
5 |
+
"attention_bias": false,
|
6 |
+
"attention_dropout": 0.0,
|
7 |
+
"bos_token_id": 50281,
|
8 |
+
"classifier_activation": "gelu",
|
9 |
+
"classifier_bias": false,
|
10 |
+
"classifier_dropout": 0.0,
|
11 |
+
"classifier_pooling": "mean",
|
12 |
+
"cls_token_id": 50281,
|
13 |
+
"decoder_bias": true,
|
14 |
+
"deterministic_flash_attn": false,
|
15 |
+
"embedding_dropout": 0.0,
|
16 |
+
"eos_token_id": 50282,
|
17 |
+
"global_attn_every_n_layers": 3,
|
18 |
+
"global_rope_theta": 160000.0,
|
19 |
+
"gradient_checkpointing": false,
|
20 |
+
"hidden_activation": "gelu",
|
21 |
+
"hidden_size": 768,
|
22 |
+
"initializer_cutoff_factor": 2.0,
|
23 |
+
"initializer_range": 0.02,
|
24 |
+
"intermediate_size": 1152,
|
25 |
+
"layer_norm_eps": 1e-05,
|
26 |
+
"local_attention": 128,
|
27 |
+
"local_rope_theta": 10000.0,
|
28 |
+
"max_position_embeddings": 8192,
|
29 |
+
"mlp_bias": false,
|
30 |
+
"mlp_dropout": 0.0,
|
31 |
+
"model_type": "modernbert",
|
32 |
+
"norm_bias": false,
|
33 |
+
"norm_eps": 1e-05,
|
34 |
+
"num_attention_heads": 12,
|
35 |
+
"num_hidden_layers": 22,
|
36 |
+
"pad_token_id": 50283,
|
37 |
+
"position_embedding_type": "absolute",
|
38 |
+
"repad_logits_with_grad": false,
|
39 |
+
"sep_token_id": 50282,
|
40 |
+
"sparse_pred_ignore_index": -100,
|
41 |
+
"sparse_prediction": false,
|
42 |
+
"torch_dtype": "float32",
|
43 |
+
"transformers_version": "4.52.4",
|
44 |
+
"vocab_size": 50368
|
45 |
+
}
|
config_sentence_transformers.json
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"__version__": {
|
3 |
+
"sentence_transformers": "4.1.0",
|
4 |
+
"transformers": "4.52.4",
|
5 |
+
"pytorch": "2.6.0+cu124"
|
6 |
+
},
|
7 |
+
"prompts": {},
|
8 |
+
"default_prompt_name": null,
|
9 |
+
"similarity_fn_name": "cosine"
|
10 |
+
}
|
model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:863efb9722b136e99dcde742d4bb555ba0240a4da507fa021d64c4d4c83917a5
|
3 |
+
size 596070136
|
modules.json
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
{
|
3 |
+
"idx": 0,
|
4 |
+
"name": "0",
|
5 |
+
"path": "",
|
6 |
+
"type": "sentence_transformers.models.Transformer"
|
7 |
+
},
|
8 |
+
{
|
9 |
+
"idx": 1,
|
10 |
+
"name": "1",
|
11 |
+
"path": "1_Pooling",
|
12 |
+
"type": "sentence_transformers.models.Pooling"
|
13 |
+
},
|
14 |
+
{
|
15 |
+
"idx": 2,
|
16 |
+
"name": "2",
|
17 |
+
"path": "2_Normalize",
|
18 |
+
"type": "sentence_transformers.models.Normalize"
|
19 |
+
}
|
20 |
+
]
|
sentence_bert_config.json
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"max_seq_length": 8192,
|
3 |
+
"do_lower_case": false
|
4 |
+
}
|
special_tokens_map.json
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cls_token": {
|
3 |
+
"content": "[CLS]",
|
4 |
+
"lstrip": false,
|
5 |
+
"normalized": false,
|
6 |
+
"rstrip": false,
|
7 |
+
"single_word": false
|
8 |
+
},
|
9 |
+
"mask_token": {
|
10 |
+
"content": "[MASK]",
|
11 |
+
"lstrip": true,
|
12 |
+
"normalized": false,
|
13 |
+
"rstrip": false,
|
14 |
+
"single_word": false
|
15 |
+
},
|
16 |
+
"pad_token": {
|
17 |
+
"content": "[PAD]",
|
18 |
+
"lstrip": false,
|
19 |
+
"normalized": false,
|
20 |
+
"rstrip": false,
|
21 |
+
"single_word": false
|
22 |
+
},
|
23 |
+
"sep_token": {
|
24 |
+
"content": "[SEP]",
|
25 |
+
"lstrip": false,
|
26 |
+
"normalized": false,
|
27 |
+
"rstrip": false,
|
28 |
+
"single_word": false
|
29 |
+
},
|
30 |
+
"unk_token": {
|
31 |
+
"content": "[UNK]",
|
32 |
+
"lstrip": false,
|
33 |
+
"normalized": false,
|
34 |
+
"rstrip": false,
|
35 |
+
"single_word": false
|
36 |
+
}
|
37 |
+
}
|
tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer_config.json
ADDED
@@ -0,0 +1,945 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"added_tokens_decoder": {
|
3 |
+
"0": {
|
4 |
+
"content": "|||IP_ADDRESS|||",
|
5 |
+
"lstrip": false,
|
6 |
+
"normalized": true,
|
7 |
+
"rstrip": false,
|
8 |
+
"single_word": false,
|
9 |
+
"special": false
|
10 |
+
},
|
11 |
+
"1": {
|
12 |
+
"content": "<|padding|>",
|
13 |
+
"lstrip": false,
|
14 |
+
"normalized": false,
|
15 |
+
"rstrip": false,
|
16 |
+
"single_word": false,
|
17 |
+
"special": true
|
18 |
+
},
|
19 |
+
"50254": {
|
20 |
+
"content": " ",
|
21 |
+
"lstrip": false,
|
22 |
+
"normalized": true,
|
23 |
+
"rstrip": false,
|
24 |
+
"single_word": false,
|
25 |
+
"special": false
|
26 |
+
},
|
27 |
+
"50255": {
|
28 |
+
"content": " ",
|
29 |
+
"lstrip": false,
|
30 |
+
"normalized": true,
|
31 |
+
"rstrip": false,
|
32 |
+
"single_word": false,
|
33 |
+
"special": false
|
34 |
+
},
|
35 |
+
"50256": {
|
36 |
+
"content": " ",
|
37 |
+
"lstrip": false,
|
38 |
+
"normalized": true,
|
39 |
+
"rstrip": false,
|
40 |
+
"single_word": false,
|
41 |
+
"special": false
|
42 |
+
},
|
43 |
+
"50257": {
|
44 |
+
"content": " ",
|
45 |
+
"lstrip": false,
|
46 |
+
"normalized": true,
|
47 |
+
"rstrip": false,
|
48 |
+
"single_word": false,
|
49 |
+
"special": false
|
50 |
+
},
|
51 |
+
"50258": {
|
52 |
+
"content": " ",
|
53 |
+
"lstrip": false,
|
54 |
+
"normalized": true,
|
55 |
+
"rstrip": false,
|
56 |
+
"single_word": false,
|
57 |
+
"special": false
|
58 |
+
},
|
59 |
+
"50259": {
|
60 |
+
"content": " ",
|
61 |
+
"lstrip": false,
|
62 |
+
"normalized": true,
|
63 |
+
"rstrip": false,
|
64 |
+
"single_word": false,
|
65 |
+
"special": false
|
66 |
+
},
|
67 |
+
"50260": {
|
68 |
+
"content": " ",
|
69 |
+
"lstrip": false,
|
70 |
+
"normalized": true,
|
71 |
+
"rstrip": false,
|
72 |
+
"single_word": false,
|
73 |
+
"special": false
|
74 |
+
},
|
75 |
+
"50261": {
|
76 |
+
"content": " ",
|
77 |
+
"lstrip": false,
|
78 |
+
"normalized": true,
|
79 |
+
"rstrip": false,
|
80 |
+
"single_word": false,
|
81 |
+
"special": false
|
82 |
+
},
|
83 |
+
"50262": {
|
84 |
+
"content": " ",
|
85 |
+
"lstrip": false,
|
86 |
+
"normalized": true,
|
87 |
+
"rstrip": false,
|
88 |
+
"single_word": false,
|
89 |
+
"special": false
|
90 |
+
},
|
91 |
+
"50263": {
|
92 |
+
"content": " ",
|
93 |
+
"lstrip": false,
|
94 |
+
"normalized": true,
|
95 |
+
"rstrip": false,
|
96 |
+
"single_word": false,
|
97 |
+
"special": false
|
98 |
+
},
|
99 |
+
"50264": {
|
100 |
+
"content": " ",
|
101 |
+
"lstrip": false,
|
102 |
+
"normalized": true,
|
103 |
+
"rstrip": false,
|
104 |
+
"single_word": false,
|
105 |
+
"special": false
|
106 |
+
},
|
107 |
+
"50265": {
|
108 |
+
"content": " ",
|
109 |
+
"lstrip": false,
|
110 |
+
"normalized": true,
|
111 |
+
"rstrip": false,
|
112 |
+
"single_word": false,
|
113 |
+
"special": false
|
114 |
+
},
|
115 |
+
"50266": {
|
116 |
+
"content": " ",
|
117 |
+
"lstrip": false,
|
118 |
+
"normalized": true,
|
119 |
+
"rstrip": false,
|
120 |
+
"single_word": false,
|
121 |
+
"special": false
|
122 |
+
},
|
123 |
+
"50267": {
|
124 |
+
"content": " ",
|
125 |
+
"lstrip": false,
|
126 |
+
"normalized": true,
|
127 |
+
"rstrip": false,
|
128 |
+
"single_word": false,
|
129 |
+
"special": false
|
130 |
+
},
|
131 |
+
"50268": {
|
132 |
+
"content": " ",
|
133 |
+
"lstrip": false,
|
134 |
+
"normalized": true,
|
135 |
+
"rstrip": false,
|
136 |
+
"single_word": false,
|
137 |
+
"special": false
|
138 |
+
},
|
139 |
+
"50269": {
|
140 |
+
"content": " ",
|
141 |
+
"lstrip": false,
|
142 |
+
"normalized": true,
|
143 |
+
"rstrip": false,
|
144 |
+
"single_word": false,
|
145 |
+
"special": false
|
146 |
+
},
|
147 |
+
"50270": {
|
148 |
+
"content": " ",
|
149 |
+
"lstrip": false,
|
150 |
+
"normalized": true,
|
151 |
+
"rstrip": false,
|
152 |
+
"single_word": false,
|
153 |
+
"special": false
|
154 |
+
},
|
155 |
+
"50271": {
|
156 |
+
"content": " ",
|
157 |
+
"lstrip": false,
|
158 |
+
"normalized": true,
|
159 |
+
"rstrip": false,
|
160 |
+
"single_word": false,
|
161 |
+
"special": false
|
162 |
+
},
|
163 |
+
"50272": {
|
164 |
+
"content": " ",
|
165 |
+
"lstrip": false,
|
166 |
+
"normalized": true,
|
167 |
+
"rstrip": false,
|
168 |
+
"single_word": false,
|
169 |
+
"special": false
|
170 |
+
},
|
171 |
+
"50273": {
|
172 |
+
"content": " ",
|
173 |
+
"lstrip": false,
|
174 |
+
"normalized": true,
|
175 |
+
"rstrip": false,
|
176 |
+
"single_word": false,
|
177 |
+
"special": false
|
178 |
+
},
|
179 |
+
"50274": {
|
180 |
+
"content": " ",
|
181 |
+
"lstrip": false,
|
182 |
+
"normalized": true,
|
183 |
+
"rstrip": false,
|
184 |
+
"single_word": false,
|
185 |
+
"special": false
|
186 |
+
},
|
187 |
+
"50275": {
|
188 |
+
"content": " ",
|
189 |
+
"lstrip": false,
|
190 |
+
"normalized": true,
|
191 |
+
"rstrip": false,
|
192 |
+
"single_word": false,
|
193 |
+
"special": false
|
194 |
+
},
|
195 |
+
"50276": {
|
196 |
+
"content": " ",
|
197 |
+
"lstrip": false,
|
198 |
+
"normalized": true,
|
199 |
+
"rstrip": false,
|
200 |
+
"single_word": false,
|
201 |
+
"special": false
|
202 |
+
},
|
203 |
+
"50277": {
|
204 |
+
"content": "|||EMAIL_ADDRESS|||",
|
205 |
+
"lstrip": false,
|
206 |
+
"normalized": true,
|
207 |
+
"rstrip": false,
|
208 |
+
"single_word": false,
|
209 |
+
"special": false
|
210 |
+
},
|
211 |
+
"50278": {
|
212 |
+
"content": "|||PHONE_NUMBER|||",
|
213 |
+
"lstrip": false,
|
214 |
+
"normalized": true,
|
215 |
+
"rstrip": false,
|
216 |
+
"single_word": false,
|
217 |
+
"special": false
|
218 |
+
},
|
219 |
+
"50279": {
|
220 |
+
"content": "<|endoftext|>",
|
221 |
+
"lstrip": false,
|
222 |
+
"normalized": false,
|
223 |
+
"rstrip": false,
|
224 |
+
"single_word": false,
|
225 |
+
"special": true
|
226 |
+
},
|
227 |
+
"50280": {
|
228 |
+
"content": "[UNK]",
|
229 |
+
"lstrip": false,
|
230 |
+
"normalized": false,
|
231 |
+
"rstrip": false,
|
232 |
+
"single_word": false,
|
233 |
+
"special": true
|
234 |
+
},
|
235 |
+
"50281": {
|
236 |
+
"content": "[CLS]",
|
237 |
+
"lstrip": false,
|
238 |
+
"normalized": false,
|
239 |
+
"rstrip": false,
|
240 |
+
"single_word": false,
|
241 |
+
"special": true
|
242 |
+
},
|
243 |
+
"50282": {
|
244 |
+
"content": "[SEP]",
|
245 |
+
"lstrip": false,
|
246 |
+
"normalized": false,
|
247 |
+
"rstrip": false,
|
248 |
+
"single_word": false,
|
249 |
+
"special": true
|
250 |
+
},
|
251 |
+
"50283": {
|
252 |
+
"content": "[PAD]",
|
253 |
+
"lstrip": false,
|
254 |
+
"normalized": false,
|
255 |
+
"rstrip": false,
|
256 |
+
"single_word": false,
|
257 |
+
"special": true
|
258 |
+
},
|
259 |
+
"50284": {
|
260 |
+
"content": "[MASK]",
|
261 |
+
"lstrip": true,
|
262 |
+
"normalized": false,
|
263 |
+
"rstrip": false,
|
264 |
+
"single_word": false,
|
265 |
+
"special": true
|
266 |
+
},
|
267 |
+
"50285": {
|
268 |
+
"content": "[unused0]",
|
269 |
+
"lstrip": false,
|
270 |
+
"normalized": true,
|
271 |
+
"rstrip": false,
|
272 |
+
"single_word": false,
|
273 |
+
"special": false
|
274 |
+
},
|
275 |
+
"50286": {
|
276 |
+
"content": "[unused1]",
|
277 |
+
"lstrip": false,
|
278 |
+
"normalized": true,
|
279 |
+
"rstrip": false,
|
280 |
+
"single_word": false,
|
281 |
+
"special": false
|
282 |
+
},
|
283 |
+
"50287": {
|
284 |
+
"content": "[unused2]",
|
285 |
+
"lstrip": false,
|
286 |
+
"normalized": true,
|
287 |
+
"rstrip": false,
|
288 |
+
"single_word": false,
|
289 |
+
"special": false
|
290 |
+
},
|
291 |
+
"50288": {
|
292 |
+
"content": "[unused3]",
|
293 |
+
"lstrip": false,
|
294 |
+
"normalized": true,
|
295 |
+
"rstrip": false,
|
296 |
+
"single_word": false,
|
297 |
+
"special": false
|
298 |
+
},
|
299 |
+
"50289": {
|
300 |
+
"content": "[unused4]",
|
301 |
+
"lstrip": false,
|
302 |
+
"normalized": true,
|
303 |
+
"rstrip": false,
|
304 |
+
"single_word": false,
|
305 |
+
"special": false
|
306 |
+
},
|
307 |
+
"50290": {
|
308 |
+
"content": "[unused5]",
|
309 |
+
"lstrip": false,
|
310 |
+
"normalized": true,
|
311 |
+
"rstrip": false,
|
312 |
+
"single_word": false,
|
313 |
+
"special": false
|
314 |
+
},
|
315 |
+
"50291": {
|
316 |
+
"content": "[unused6]",
|
317 |
+
"lstrip": false,
|
318 |
+
"normalized": true,
|
319 |
+
"rstrip": false,
|
320 |
+
"single_word": false,
|
321 |
+
"special": false
|
322 |
+
},
|
323 |
+
"50292": {
|
324 |
+
"content": "[unused7]",
|
325 |
+
"lstrip": false,
|
326 |
+
"normalized": true,
|
327 |
+
"rstrip": false,
|
328 |
+
"single_word": false,
|
329 |
+
"special": false
|
330 |
+
},
|
331 |
+
"50293": {
|
332 |
+
"content": "[unused8]",
|
333 |
+
"lstrip": false,
|
334 |
+
"normalized": true,
|
335 |
+
"rstrip": false,
|
336 |
+
"single_word": false,
|
337 |
+
"special": false
|
338 |
+
},
|
339 |
+
"50294": {
|
340 |
+
"content": "[unused9]",
|
341 |
+
"lstrip": false,
|
342 |
+
"normalized": true,
|
343 |
+
"rstrip": false,
|
344 |
+
"single_word": false,
|
345 |
+
"special": false
|
346 |
+
},
|
347 |
+
"50295": {
|
348 |
+
"content": "[unused10]",
|
349 |
+
"lstrip": false,
|
350 |
+
"normalized": true,
|
351 |
+
"rstrip": false,
|
352 |
+
"single_word": false,
|
353 |
+
"special": false
|
354 |
+
},
|
355 |
+
"50296": {
|
356 |
+
"content": "[unused11]",
|
357 |
+
"lstrip": false,
|
358 |
+
"normalized": true,
|
359 |
+
"rstrip": false,
|
360 |
+
"single_word": false,
|
361 |
+
"special": false
|
362 |
+
},
|
363 |
+
"50297": {
|
364 |
+
"content": "[unused12]",
|
365 |
+
"lstrip": false,
|
366 |
+
"normalized": true,
|
367 |
+
"rstrip": false,
|
368 |
+
"single_word": false,
|
369 |
+
"special": false
|
370 |
+
},
|
371 |
+
"50298": {
|
372 |
+
"content": "[unused13]",
|
373 |
+
"lstrip": false,
|
374 |
+
"normalized": true,
|
375 |
+
"rstrip": false,
|
376 |
+
"single_word": false,
|
377 |
+
"special": false
|
378 |
+
},
|
379 |
+
"50299": {
|
380 |
+
"content": "[unused14]",
|
381 |
+
"lstrip": false,
|
382 |
+
"normalized": true,
|
383 |
+
"rstrip": false,
|
384 |
+
"single_word": false,
|
385 |
+
"special": false
|
386 |
+
},
|
387 |
+
"50300": {
|
388 |
+
"content": "[unused15]",
|
389 |
+
"lstrip": false,
|
390 |
+
"normalized": true,
|
391 |
+
"rstrip": false,
|
392 |
+
"single_word": false,
|
393 |
+
"special": false
|
394 |
+
},
|
395 |
+
"50301": {
|
396 |
+
"content": "[unused16]",
|
397 |
+
"lstrip": false,
|
398 |
+
"normalized": true,
|
399 |
+
"rstrip": false,
|
400 |
+
"single_word": false,
|
401 |
+
"special": false
|
402 |
+
},
|
403 |
+
"50302": {
|
404 |
+
"content": "[unused17]",
|
405 |
+
"lstrip": false,
|
406 |
+
"normalized": true,
|
407 |
+
"rstrip": false,
|
408 |
+
"single_word": false,
|
409 |
+
"special": false
|
410 |
+
},
|
411 |
+
"50303": {
|
412 |
+
"content": "[unused18]",
|
413 |
+
"lstrip": false,
|
414 |
+
"normalized": true,
|
415 |
+
"rstrip": false,
|
416 |
+
"single_word": false,
|
417 |
+
"special": false
|
418 |
+
},
|
419 |
+
"50304": {
|
420 |
+
"content": "[unused19]",
|
421 |
+
"lstrip": false,
|
422 |
+
"normalized": true,
|
423 |
+
"rstrip": false,
|
424 |
+
"single_word": false,
|
425 |
+
"special": false
|
426 |
+
},
|
427 |
+
"50305": {
|
428 |
+
"content": "[unused20]",
|
429 |
+
"lstrip": false,
|
430 |
+
"normalized": true,
|
431 |
+
"rstrip": false,
|
432 |
+
"single_word": false,
|
433 |
+
"special": false
|
434 |
+
},
|
435 |
+
"50306": {
|
436 |
+
"content": "[unused21]",
|
437 |
+
"lstrip": false,
|
438 |
+
"normalized": true,
|
439 |
+
"rstrip": false,
|
440 |
+
"single_word": false,
|
441 |
+
"special": false
|
442 |
+
},
|
443 |
+
"50307": {
|
444 |
+
"content": "[unused22]",
|
445 |
+
"lstrip": false,
|
446 |
+
"normalized": true,
|
447 |
+
"rstrip": false,
|
448 |
+
"single_word": false,
|
449 |
+
"special": false
|
450 |
+
},
|
451 |
+
"50308": {
|
452 |
+
"content": "[unused23]",
|
453 |
+
"lstrip": false,
|
454 |
+
"normalized": true,
|
455 |
+
"rstrip": false,
|
456 |
+
"single_word": false,
|
457 |
+
"special": false
|
458 |
+
},
|
459 |
+
"50309": {
|
460 |
+
"content": "[unused24]",
|
461 |
+
"lstrip": false,
|
462 |
+
"normalized": true,
|
463 |
+
"rstrip": false,
|
464 |
+
"single_word": false,
|
465 |
+
"special": false
|
466 |
+
},
|
467 |
+
"50310": {
|
468 |
+
"content": "[unused25]",
|
469 |
+
"lstrip": false,
|
470 |
+
"normalized": true,
|
471 |
+
"rstrip": false,
|
472 |
+
"single_word": false,
|
473 |
+
"special": false
|
474 |
+
},
|
475 |
+
"50311": {
|
476 |
+
"content": "[unused26]",
|
477 |
+
"lstrip": false,
|
478 |
+
"normalized": true,
|
479 |
+
"rstrip": false,
|
480 |
+
"single_word": false,
|
481 |
+
"special": false
|
482 |
+
},
|
483 |
+
"50312": {
|
484 |
+
"content": "[unused27]",
|
485 |
+
"lstrip": false,
|
486 |
+
"normalized": true,
|
487 |
+
"rstrip": false,
|
488 |
+
"single_word": false,
|
489 |
+
"special": false
|
490 |
+
},
|
491 |
+
"50313": {
|
492 |
+
"content": "[unused28]",
|
493 |
+
"lstrip": false,
|
494 |
+
"normalized": true,
|
495 |
+
"rstrip": false,
|
496 |
+
"single_word": false,
|
497 |
+
"special": false
|
498 |
+
},
|
499 |
+
"50314": {
|
500 |
+
"content": "[unused29]",
|
501 |
+
"lstrip": false,
|
502 |
+
"normalized": true,
|
503 |
+
"rstrip": false,
|
504 |
+
"single_word": false,
|
505 |
+
"special": false
|
506 |
+
},
|
507 |
+
"50315": {
|
508 |
+
"content": "[unused30]",
|
509 |
+
"lstrip": false,
|
510 |
+
"normalized": true,
|
511 |
+
"rstrip": false,
|
512 |
+
"single_word": false,
|
513 |
+
"special": false
|
514 |
+
},
|
515 |
+
"50316": {
|
516 |
+
"content": "[unused31]",
|
517 |
+
"lstrip": false,
|
518 |
+
"normalized": true,
|
519 |
+
"rstrip": false,
|
520 |
+
"single_word": false,
|
521 |
+
"special": false
|
522 |
+
},
|
523 |
+
"50317": {
|
524 |
+
"content": "[unused32]",
|
525 |
+
"lstrip": false,
|
526 |
+
"normalized": true,
|
527 |
+
"rstrip": false,
|
528 |
+
"single_word": false,
|
529 |
+
"special": false
|
530 |
+
},
|
531 |
+
"50318": {
|
532 |
+
"content": "[unused33]",
|
533 |
+
"lstrip": false,
|
534 |
+
"normalized": true,
|
535 |
+
"rstrip": false,
|
536 |
+
"single_word": false,
|
537 |
+
"special": false
|
538 |
+
},
|
539 |
+
"50319": {
|
540 |
+
"content": "[unused34]",
|
541 |
+
"lstrip": false,
|
542 |
+
"normalized": true,
|
543 |
+
"rstrip": false,
|
544 |
+
"single_word": false,
|
545 |
+
"special": false
|
546 |
+
},
|
547 |
+
"50320": {
|
548 |
+
"content": "[unused35]",
|
549 |
+
"lstrip": false,
|
550 |
+
"normalized": true,
|
551 |
+
"rstrip": false,
|
552 |
+
"single_word": false,
|
553 |
+
"special": false
|
554 |
+
},
|
555 |
+
"50321": {
|
556 |
+
"content": "[unused36]",
|
557 |
+
"lstrip": false,
|
558 |
+
"normalized": true,
|
559 |
+
"rstrip": false,
|
560 |
+
"single_word": false,
|
561 |
+
"special": false
|
562 |
+
},
|
563 |
+
"50322": {
|
564 |
+
"content": "[unused37]",
|
565 |
+
"lstrip": false,
|
566 |
+
"normalized": true,
|
567 |
+
"rstrip": false,
|
568 |
+
"single_word": false,
|
569 |
+
"special": false
|
570 |
+
},
|
571 |
+
"50323": {
|
572 |
+
"content": "[unused38]",
|
573 |
+
"lstrip": false,
|
574 |
+
"normalized": true,
|
575 |
+
"rstrip": false,
|
576 |
+
"single_word": false,
|
577 |
+
"special": false
|
578 |
+
},
|
579 |
+
"50324": {
|
580 |
+
"content": "[unused39]",
|
581 |
+
"lstrip": false,
|
582 |
+
"normalized": true,
|
583 |
+
"rstrip": false,
|
584 |
+
"single_word": false,
|
585 |
+
"special": false
|
586 |
+
},
|
587 |
+
"50325": {
|
588 |
+
"content": "[unused40]",
|
589 |
+
"lstrip": false,
|
590 |
+
"normalized": true,
|
591 |
+
"rstrip": false,
|
592 |
+
"single_word": false,
|
593 |
+
"special": false
|
594 |
+
},
|
595 |
+
"50326": {
|
596 |
+
"content": "[unused41]",
|
597 |
+
"lstrip": false,
|
598 |
+
"normalized": true,
|
599 |
+
"rstrip": false,
|
600 |
+
"single_word": false,
|
601 |
+
"special": false
|
602 |
+
},
|
603 |
+
"50327": {
|
604 |
+
"content": "[unused42]",
|
605 |
+
"lstrip": false,
|
606 |
+
"normalized": true,
|
607 |
+
"rstrip": false,
|
608 |
+
"single_word": false,
|
609 |
+
"special": false
|
610 |
+
},
|
611 |
+
"50328": {
|
612 |
+
"content": "[unused43]",
|
613 |
+
"lstrip": false,
|
614 |
+
"normalized": true,
|
615 |
+
"rstrip": false,
|
616 |
+
"single_word": false,
|
617 |
+
"special": false
|
618 |
+
},
|
619 |
+
"50329": {
|
620 |
+
"content": "[unused44]",
|
621 |
+
"lstrip": false,
|
622 |
+
"normalized": true,
|
623 |
+
"rstrip": false,
|
624 |
+
"single_word": false,
|
625 |
+
"special": false
|
626 |
+
},
|
627 |
+
"50330": {
|
628 |
+
"content": "[unused45]",
|
629 |
+
"lstrip": false,
|
630 |
+
"normalized": true,
|
631 |
+
"rstrip": false,
|
632 |
+
"single_word": false,
|
633 |
+
"special": false
|
634 |
+
},
|
635 |
+
"50331": {
|
636 |
+
"content": "[unused46]",
|
637 |
+
"lstrip": false,
|
638 |
+
"normalized": true,
|
639 |
+
"rstrip": false,
|
640 |
+
"single_word": false,
|
641 |
+
"special": false
|
642 |
+
},
|
643 |
+
"50332": {
|
644 |
+
"content": "[unused47]",
|
645 |
+
"lstrip": false,
|
646 |
+
"normalized": true,
|
647 |
+
"rstrip": false,
|
648 |
+
"single_word": false,
|
649 |
+
"special": false
|
650 |
+
},
|
651 |
+
"50333": {
|
652 |
+
"content": "[unused48]",
|
653 |
+
"lstrip": false,
|
654 |
+
"normalized": true,
|
655 |
+
"rstrip": false,
|
656 |
+
"single_word": false,
|
657 |
+
"special": false
|
658 |
+
},
|
659 |
+
"50334": {
|
660 |
+
"content": "[unused49]",
|
661 |
+
"lstrip": false,
|
662 |
+
"normalized": true,
|
663 |
+
"rstrip": false,
|
664 |
+
"single_word": false,
|
665 |
+
"special": false
|
666 |
+
},
|
667 |
+
"50335": {
|
668 |
+
"content": "[unused50]",
|
669 |
+
"lstrip": false,
|
670 |
+
"normalized": true,
|
671 |
+
"rstrip": false,
|
672 |
+
"single_word": false,
|
673 |
+
"special": false
|
674 |
+
},
|
675 |
+
"50336": {
|
676 |
+
"content": "[unused51]",
|
677 |
+
"lstrip": false,
|
678 |
+
"normalized": true,
|
679 |
+
"rstrip": false,
|
680 |
+
"single_word": false,
|
681 |
+
"special": false
|
682 |
+
},
|
683 |
+
"50337": {
|
684 |
+
"content": "[unused52]",
|
685 |
+
"lstrip": false,
|
686 |
+
"normalized": true,
|
687 |
+
"rstrip": false,
|
688 |
+
"single_word": false,
|
689 |
+
"special": false
|
690 |
+
},
|
691 |
+
"50338": {
|
692 |
+
"content": "[unused53]",
|
693 |
+
"lstrip": false,
|
694 |
+
"normalized": true,
|
695 |
+
"rstrip": false,
|
696 |
+
"single_word": false,
|
697 |
+
"special": false
|
698 |
+
},
|
699 |
+
"50339": {
|
700 |
+
"content": "[unused54]",
|
701 |
+
"lstrip": false,
|
702 |
+
"normalized": true,
|
703 |
+
"rstrip": false,
|
704 |
+
"single_word": false,
|
705 |
+
"special": false
|
706 |
+
},
|
707 |
+
"50340": {
|
708 |
+
"content": "[unused55]",
|
709 |
+
"lstrip": false,
|
710 |
+
"normalized": true,
|
711 |
+
"rstrip": false,
|
712 |
+
"single_word": false,
|
713 |
+
"special": false
|
714 |
+
},
|
715 |
+
"50341": {
|
716 |
+
"content": "[unused56]",
|
717 |
+
"lstrip": false,
|
718 |
+
"normalized": true,
|
719 |
+
"rstrip": false,
|
720 |
+
"single_word": false,
|
721 |
+
"special": false
|
722 |
+
},
|
723 |
+
"50342": {
|
724 |
+
"content": "[unused57]",
|
725 |
+
"lstrip": false,
|
726 |
+
"normalized": true,
|
727 |
+
"rstrip": false,
|
728 |
+
"single_word": false,
|
729 |
+
"special": false
|
730 |
+
},
|
731 |
+
"50343": {
|
732 |
+
"content": "[unused58]",
|
733 |
+
"lstrip": false,
|
734 |
+
"normalized": true,
|
735 |
+
"rstrip": false,
|
736 |
+
"single_word": false,
|
737 |
+
"special": false
|
738 |
+
},
|
739 |
+
"50344": {
|
740 |
+
"content": "[unused59]",
|
741 |
+
"lstrip": false,
|
742 |
+
"normalized": true,
|
743 |
+
"rstrip": false,
|
744 |
+
"single_word": false,
|
745 |
+
"special": false
|
746 |
+
},
|
747 |
+
"50345": {
|
748 |
+
"content": "[unused60]",
|
749 |
+
"lstrip": false,
|
750 |
+
"normalized": true,
|
751 |
+
"rstrip": false,
|
752 |
+
"single_word": false,
|
753 |
+
"special": false
|
754 |
+
},
|
755 |
+
"50346": {
|
756 |
+
"content": "[unused61]",
|
757 |
+
"lstrip": false,
|
758 |
+
"normalized": true,
|
759 |
+
"rstrip": false,
|
760 |
+
"single_word": false,
|
761 |
+
"special": false
|
762 |
+
},
|
763 |
+
"50347": {
|
764 |
+
"content": "[unused62]",
|
765 |
+
"lstrip": false,
|
766 |
+
"normalized": true,
|
767 |
+
"rstrip": false,
|
768 |
+
"single_word": false,
|
769 |
+
"special": false
|
770 |
+
},
|
771 |
+
"50348": {
|
772 |
+
"content": "[unused63]",
|
773 |
+
"lstrip": false,
|
774 |
+
"normalized": true,
|
775 |
+
"rstrip": false,
|
776 |
+
"single_word": false,
|
777 |
+
"special": false
|
778 |
+
},
|
779 |
+
"50349": {
|
780 |
+
"content": "[unused64]",
|
781 |
+
"lstrip": false,
|
782 |
+
"normalized": true,
|
783 |
+
"rstrip": false,
|
784 |
+
"single_word": false,
|
785 |
+
"special": false
|
786 |
+
},
|
787 |
+
"50350": {
|
788 |
+
"content": "[unused65]",
|
789 |
+
"lstrip": false,
|
790 |
+
"normalized": true,
|
791 |
+
"rstrip": false,
|
792 |
+
"single_word": false,
|
793 |
+
"special": false
|
794 |
+
},
|
795 |
+
"50351": {
|
796 |
+
"content": "[unused66]",
|
797 |
+
"lstrip": false,
|
798 |
+
"normalized": true,
|
799 |
+
"rstrip": false,
|
800 |
+
"single_word": false,
|
801 |
+
"special": false
|
802 |
+
},
|
803 |
+
"50352": {
|
804 |
+
"content": "[unused67]",
|
805 |
+
"lstrip": false,
|
806 |
+
"normalized": true,
|
807 |
+
"rstrip": false,
|
808 |
+
"single_word": false,
|
809 |
+
"special": false
|
810 |
+
},
|
811 |
+
"50353": {
|
812 |
+
"content": "[unused68]",
|
813 |
+
"lstrip": false,
|
814 |
+
"normalized": true,
|
815 |
+
"rstrip": false,
|
816 |
+
"single_word": false,
|
817 |
+
"special": false
|
818 |
+
},
|
819 |
+
"50354": {
|
820 |
+
"content": "[unused69]",
|
821 |
+
"lstrip": false,
|
822 |
+
"normalized": true,
|
823 |
+
"rstrip": false,
|
824 |
+
"single_word": false,
|
825 |
+
"special": false
|
826 |
+
},
|
827 |
+
"50355": {
|
828 |
+
"content": "[unused70]",
|
829 |
+
"lstrip": false,
|
830 |
+
"normalized": true,
|
831 |
+
"rstrip": false,
|
832 |
+
"single_word": false,
|
833 |
+
"special": false
|
834 |
+
},
|
835 |
+
"50356": {
|
836 |
+
"content": "[unused71]",
|
837 |
+
"lstrip": false,
|
838 |
+
"normalized": true,
|
839 |
+
"rstrip": false,
|
840 |
+
"single_word": false,
|
841 |
+
"special": false
|
842 |
+
},
|
843 |
+
"50357": {
|
844 |
+
"content": "[unused72]",
|
845 |
+
"lstrip": false,
|
846 |
+
"normalized": true,
|
847 |
+
"rstrip": false,
|
848 |
+
"single_word": false,
|
849 |
+
"special": false
|
850 |
+
},
|
851 |
+
"50358": {
|
852 |
+
"content": "[unused73]",
|
853 |
+
"lstrip": false,
|
854 |
+
"normalized": true,
|
855 |
+
"rstrip": false,
|
856 |
+
"single_word": false,
|
857 |
+
"special": false
|
858 |
+
},
|
859 |
+
"50359": {
|
860 |
+
"content": "[unused74]",
|
861 |
+
"lstrip": false,
|
862 |
+
"normalized": true,
|
863 |
+
"rstrip": false,
|
864 |
+
"single_word": false,
|
865 |
+
"special": false
|
866 |
+
},
|
867 |
+
"50360": {
|
868 |
+
"content": "[unused75]",
|
869 |
+
"lstrip": false,
|
870 |
+
"normalized": true,
|
871 |
+
"rstrip": false,
|
872 |
+
"single_word": false,
|
873 |
+
"special": false
|
874 |
+
},
|
875 |
+
"50361": {
|
876 |
+
"content": "[unused76]",
|
877 |
+
"lstrip": false,
|
878 |
+
"normalized": true,
|
879 |
+
"rstrip": false,
|
880 |
+
"single_word": false,
|
881 |
+
"special": false
|
882 |
+
},
|
883 |
+
"50362": {
|
884 |
+
"content": "[unused77]",
|
885 |
+
"lstrip": false,
|
886 |
+
"normalized": true,
|
887 |
+
"rstrip": false,
|
888 |
+
"single_word": false,
|
889 |
+
"special": false
|
890 |
+
},
|
891 |
+
"50363": {
|
892 |
+
"content": "[unused78]",
|
893 |
+
"lstrip": false,
|
894 |
+
"normalized": true,
|
895 |
+
"rstrip": false,
|
896 |
+
"single_word": false,
|
897 |
+
"special": false
|
898 |
+
},
|
899 |
+
"50364": {
|
900 |
+
"content": "[unused79]",
|
901 |
+
"lstrip": false,
|
902 |
+
"normalized": true,
|
903 |
+
"rstrip": false,
|
904 |
+
"single_word": false,
|
905 |
+
"special": false
|
906 |
+
},
|
907 |
+
"50365": {
|
908 |
+
"content": "[unused80]",
|
909 |
+
"lstrip": false,
|
910 |
+
"normalized": true,
|
911 |
+
"rstrip": false,
|
912 |
+
"single_word": false,
|
913 |
+
"special": false
|
914 |
+
},
|
915 |
+
"50366": {
|
916 |
+
"content": "[unused81]",
|
917 |
+
"lstrip": false,
|
918 |
+
"normalized": true,
|
919 |
+
"rstrip": false,
|
920 |
+
"single_word": false,
|
921 |
+
"special": false
|
922 |
+
},
|
923 |
+
"50367": {
|
924 |
+
"content": "[unused82]",
|
925 |
+
"lstrip": false,
|
926 |
+
"normalized": true,
|
927 |
+
"rstrip": false,
|
928 |
+
"single_word": false,
|
929 |
+
"special": false
|
930 |
+
}
|
931 |
+
},
|
932 |
+
"clean_up_tokenization_spaces": true,
|
933 |
+
"cls_token": "[CLS]",
|
934 |
+
"extra_special_tokens": {},
|
935 |
+
"mask_token": "[MASK]",
|
936 |
+
"model_input_names": [
|
937 |
+
"input_ids",
|
938 |
+
"attention_mask"
|
939 |
+
],
|
940 |
+
"model_max_length": 8192,
|
941 |
+
"pad_token": "[PAD]",
|
942 |
+
"sep_token": "[SEP]",
|
943 |
+
"tokenizer_class": "PreTrainedTokenizer",
|
944 |
+
"unk_token": "[UNK]"
|
945 |
+
}
|