Add new SentenceTransformer model.
Browse files- 1_Pooling/config.json +10 -0
- README.md +808 -0
- config.json +32 -0
- config_sentence_transformers.json +10 -0
- model.safetensors +3 -0
- modules.json +20 -0
- sentence_bert_config.json +4 -0
- special_tokens_map.json +37 -0
- tokenizer.json +0 -0
- tokenizer_config.json +57 -0
- vocab.txt +0 -0
1_Pooling/config.json
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"word_embedding_dimension": 768,
|
3 |
+
"pooling_mode_cls_token": true,
|
4 |
+
"pooling_mode_mean_tokens": false,
|
5 |
+
"pooling_mode_max_tokens": false,
|
6 |
+
"pooling_mode_mean_sqrt_len_tokens": false,
|
7 |
+
"pooling_mode_weightedmean_tokens": false,
|
8 |
+
"pooling_mode_lasttoken": false,
|
9 |
+
"include_prompt": true
|
10 |
+
}
|
README.md
ADDED
@@ -0,0 +1,808 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
base_model: BAAI/bge-base-en-v1.5
|
3 |
+
language:
|
4 |
+
- en
|
5 |
+
library_name: sentence-transformers
|
6 |
+
license: apache-2.0
|
7 |
+
metrics:
|
8 |
+
- cosine_accuracy@1
|
9 |
+
- cosine_accuracy@3
|
10 |
+
- cosine_accuracy@5
|
11 |
+
- cosine_accuracy@10
|
12 |
+
- cosine_precision@1
|
13 |
+
- cosine_precision@3
|
14 |
+
- cosine_precision@5
|
15 |
+
- cosine_precision@10
|
16 |
+
- cosine_recall@1
|
17 |
+
- cosine_recall@3
|
18 |
+
- cosine_recall@5
|
19 |
+
- cosine_recall@10
|
20 |
+
- cosine_ndcg@10
|
21 |
+
- cosine_mrr@10
|
22 |
+
- cosine_map@100
|
23 |
+
pipeline_tag: sentence-similarity
|
24 |
+
tags:
|
25 |
+
- sentence-transformers
|
26 |
+
- sentence-similarity
|
27 |
+
- feature-extraction
|
28 |
+
- generated_from_trainer
|
29 |
+
- dataset_size:4887
|
30 |
+
- loss:MatryoshkaLoss
|
31 |
+
- loss:MultipleNegativesRankingLoss
|
32 |
+
widget:
|
33 |
+
- source_sentence: Informations non disponibles
|
34 |
+
sentences:
|
35 |
+
- What are the possible interactions of the plant Canneberge or cranberry with other
|
36 |
+
substances?
|
37 |
+
- What are the possible interactions of the black Sureau plant with other substances?
|
38 |
+
- Quelle est l'action pharmacologique de Crataegus oxyacantha L. subsp. monogyna?
|
39 |
+
- source_sentence: 'L''avis du spécialiste sur le lierre grimpant
|
40 |
+
|
41 |
+
|
42 |
+
|
43 |
+
|
44 |
+
En France, plusieurs sirops à base de Lierre grimpant sont commercialisés pour
|
45 |
+
la toux grasse de l’adulte et l’enfant de plus de 2 ans. Le lierre grimpant contient
|
46 |
+
des saponines similaires à celles trouvées dans le marron d''Inde ou le petit-houx.
|
47 |
+
Il serait intéressant d''explorer les propriétés d''extraits de lierre grimpant
|
48 |
+
dans le cadre de l''insuffisance veineuse et de ses symptômes (jambes lourdes,
|
49 |
+
hémorroïdes).'
|
50 |
+
sentences:
|
51 |
+
- Quelle partie de Mandragora automnalis Spreng. est utilisée à des fins médicales?
|
52 |
+
- Quel est le nom anglais de Tetraclini articulata (Vahl.) Masters?
|
53 |
+
- Quel est l'avis d'un expert sur la plante Lierre grimpant?
|
54 |
+
- source_sentence: lavande stoechade, lavande pap
|
55 |
+
sentences:
|
56 |
+
- Que pensent les autorités de santé de la plante Gentiane jaune?
|
57 |
+
- What is the French name of Lavandula stoechas L.?
|
58 |
+
- What are the common preparations made from Hyoscyamus muticus L.?
|
59 |
+
- source_sentence: 'œdographie locale: presque toutes les régions phytogéographiques
|
60 |
+
de l''égypte, à l''exception de la région méditerranéenne.régionale: egypte, libya.
|
61 |
+
globale: parties tropicales à sèches de l''afrique, arabia, palestine, w. indies,
|
62 |
+
brazil, columbia et venezuela.'
|
63 |
+
sentences:
|
64 |
+
- Où est géographiquement répartie Calotropis procera?
|
65 |
+
- Quel est le nom anglais de Deverra chlorantus Coss. & Dur.?
|
66 |
+
- Où est Calligonum comosum L Herit. géographiquement distribué?
|
67 |
+
- source_sentence: iconstituents its principal constituent is an alkaloid, tamarixin,along
|
68 |
+
with traces of its aglocone, tamarixetin. theplant also contains a high level
|
69 |
+
of tannin (ellagicand gallic) and quercetol (methyllic esther).
|
70 |
+
sentences:
|
71 |
+
- What are the chemical constituents of Tamarix gallica L.?
|
72 |
+
- How is Myrtus communis L. used in modern and traditional medicine?
|
73 |
+
- Quel est le nom arabe de Tamarix gallica L.?
|
74 |
+
model-index:
|
75 |
+
- name: BGE base Financial Matryoshka
|
76 |
+
results:
|
77 |
+
- task:
|
78 |
+
type: information-retrieval
|
79 |
+
name: Information Retrieval
|
80 |
+
dataset:
|
81 |
+
name: dim 768
|
82 |
+
type: dim_768
|
83 |
+
metrics:
|
84 |
+
- type: cosine_accuracy@1
|
85 |
+
value: 0.15285451197053407
|
86 |
+
name: Cosine Accuracy@1
|
87 |
+
- type: cosine_accuracy@3
|
88 |
+
value: 0.2302025782688766
|
89 |
+
name: Cosine Accuracy@3
|
90 |
+
- type: cosine_accuracy@5
|
91 |
+
value: 0.2965009208103131
|
92 |
+
name: Cosine Accuracy@5
|
93 |
+
- type: cosine_accuracy@10
|
94 |
+
value: 0.3830570902394107
|
95 |
+
name: Cosine Accuracy@10
|
96 |
+
- type: cosine_precision@1
|
97 |
+
value: 0.15285451197053407
|
98 |
+
name: Cosine Precision@1
|
99 |
+
- type: cosine_precision@3
|
100 |
+
value: 0.07673419275629219
|
101 |
+
name: Cosine Precision@3
|
102 |
+
- type: cosine_precision@5
|
103 |
+
value: 0.05930018416206262
|
104 |
+
name: Cosine Precision@5
|
105 |
+
- type: cosine_precision@10
|
106 |
+
value: 0.03830570902394107
|
107 |
+
name: Cosine Precision@10
|
108 |
+
- type: cosine_recall@1
|
109 |
+
value: 0.15285451197053407
|
110 |
+
name: Cosine Recall@1
|
111 |
+
- type: cosine_recall@3
|
112 |
+
value: 0.2302025782688766
|
113 |
+
name: Cosine Recall@3
|
114 |
+
- type: cosine_recall@5
|
115 |
+
value: 0.2965009208103131
|
116 |
+
name: Cosine Recall@5
|
117 |
+
- type: cosine_recall@10
|
118 |
+
value: 0.3830570902394107
|
119 |
+
name: Cosine Recall@10
|
120 |
+
- type: cosine_ndcg@10
|
121 |
+
value: 0.25252421493568566
|
122 |
+
name: Cosine Ndcg@10
|
123 |
+
- type: cosine_mrr@10
|
124 |
+
value: 0.212548598322079
|
125 |
+
name: Cosine Mrr@10
|
126 |
+
- type: cosine_map@100
|
127 |
+
value: 0.2202737635503467
|
128 |
+
name: Cosine Map@100
|
129 |
+
- task:
|
130 |
+
type: information-retrieval
|
131 |
+
name: Information Retrieval
|
132 |
+
dataset:
|
133 |
+
name: dim 512
|
134 |
+
type: dim_512
|
135 |
+
metrics:
|
136 |
+
- type: cosine_accuracy@1
|
137 |
+
value: 0.15101289134438306
|
138 |
+
name: Cosine Accuracy@1
|
139 |
+
- type: cosine_accuracy@3
|
140 |
+
value: 0.23572744014732966
|
141 |
+
name: Cosine Accuracy@3
|
142 |
+
- type: cosine_accuracy@5
|
143 |
+
value: 0.2946593001841621
|
144 |
+
name: Cosine Accuracy@5
|
145 |
+
- type: cosine_accuracy@10
|
146 |
+
value: 0.3885819521178637
|
147 |
+
name: Cosine Accuracy@10
|
148 |
+
- type: cosine_precision@1
|
149 |
+
value: 0.15101289134438306
|
150 |
+
name: Cosine Precision@1
|
151 |
+
- type: cosine_precision@3
|
152 |
+
value: 0.07857581338244321
|
153 |
+
name: Cosine Precision@3
|
154 |
+
- type: cosine_precision@5
|
155 |
+
value: 0.058931860036832415
|
156 |
+
name: Cosine Precision@5
|
157 |
+
- type: cosine_precision@10
|
158 |
+
value: 0.03885819521178637
|
159 |
+
name: Cosine Precision@10
|
160 |
+
- type: cosine_recall@1
|
161 |
+
value: 0.15101289134438306
|
162 |
+
name: Cosine Recall@1
|
163 |
+
- type: cosine_recall@3
|
164 |
+
value: 0.23572744014732966
|
165 |
+
name: Cosine Recall@3
|
166 |
+
- type: cosine_recall@5
|
167 |
+
value: 0.2946593001841621
|
168 |
+
name: Cosine Recall@5
|
169 |
+
- type: cosine_recall@10
|
170 |
+
value: 0.3885819521178637
|
171 |
+
name: Cosine Recall@10
|
172 |
+
- type: cosine_ndcg@10
|
173 |
+
value: 0.25426743863940854
|
174 |
+
name: Cosine Ndcg@10
|
175 |
+
- type: cosine_mrr@10
|
176 |
+
value: 0.21315297144026432
|
177 |
+
name: Cosine Mrr@10
|
178 |
+
- type: cosine_map@100
|
179 |
+
value: 0.2197240149343455
|
180 |
+
name: Cosine Map@100
|
181 |
+
- task:
|
182 |
+
type: information-retrieval
|
183 |
+
name: Information Retrieval
|
184 |
+
dataset:
|
185 |
+
name: dim 256
|
186 |
+
type: dim_256
|
187 |
+
metrics:
|
188 |
+
- type: cosine_accuracy@1
|
189 |
+
value: 0.143646408839779
|
190 |
+
name: Cosine Accuracy@1
|
191 |
+
- type: cosine_accuracy@3
|
192 |
+
value: 0.22467771639042358
|
193 |
+
name: Cosine Accuracy@3
|
194 |
+
- type: cosine_accuracy@5
|
195 |
+
value: 0.285451197053407
|
196 |
+
name: Cosine Accuracy@5
|
197 |
+
- type: cosine_accuracy@10
|
198 |
+
value: 0.3572744014732965
|
199 |
+
name: Cosine Accuracy@10
|
200 |
+
- type: cosine_precision@1
|
201 |
+
value: 0.143646408839779
|
202 |
+
name: Cosine Precision@1
|
203 |
+
- type: cosine_precision@3
|
204 |
+
value: 0.07489257213014118
|
205 |
+
name: Cosine Precision@3
|
206 |
+
- type: cosine_precision@5
|
207 |
+
value: 0.0570902394106814
|
208 |
+
name: Cosine Precision@5
|
209 |
+
- type: cosine_precision@10
|
210 |
+
value: 0.03572744014732965
|
211 |
+
name: Cosine Precision@10
|
212 |
+
- type: cosine_recall@1
|
213 |
+
value: 0.143646408839779
|
214 |
+
name: Cosine Recall@1
|
215 |
+
- type: cosine_recall@3
|
216 |
+
value: 0.22467771639042358
|
217 |
+
name: Cosine Recall@3
|
218 |
+
- type: cosine_recall@5
|
219 |
+
value: 0.285451197053407
|
220 |
+
name: Cosine Recall@5
|
221 |
+
- type: cosine_recall@10
|
222 |
+
value: 0.3572744014732965
|
223 |
+
name: Cosine Recall@10
|
224 |
+
- type: cosine_ndcg@10
|
225 |
+
value: 0.23930186714544474
|
226 |
+
name: Cosine Ndcg@10
|
227 |
+
- type: cosine_mrr@10
|
228 |
+
value: 0.2028713204127569
|
229 |
+
name: Cosine Mrr@10
|
230 |
+
- type: cosine_map@100
|
231 |
+
value: 0.21119367144055487
|
232 |
+
name: Cosine Map@100
|
233 |
+
- task:
|
234 |
+
type: information-retrieval
|
235 |
+
name: Information Retrieval
|
236 |
+
dataset:
|
237 |
+
name: dim 128
|
238 |
+
type: dim_128
|
239 |
+
metrics:
|
240 |
+
- type: cosine_accuracy@1
|
241 |
+
value: 0.13259668508287292
|
242 |
+
name: Cosine Accuracy@1
|
243 |
+
- type: cosine_accuracy@3
|
244 |
+
value: 0.23388581952117865
|
245 |
+
name: Cosine Accuracy@3
|
246 |
+
- type: cosine_accuracy@5
|
247 |
+
value: 0.27440147329650094
|
248 |
+
name: Cosine Accuracy@5
|
249 |
+
- type: cosine_accuracy@10
|
250 |
+
value: 0.3462246777163904
|
251 |
+
name: Cosine Accuracy@10
|
252 |
+
- type: cosine_precision@1
|
253 |
+
value: 0.13259668508287292
|
254 |
+
name: Cosine Precision@1
|
255 |
+
- type: cosine_precision@3
|
256 |
+
value: 0.07796193984039287
|
257 |
+
name: Cosine Precision@3
|
258 |
+
- type: cosine_precision@5
|
259 |
+
value: 0.05488029465930018
|
260 |
+
name: Cosine Precision@5
|
261 |
+
- type: cosine_precision@10
|
262 |
+
value: 0.03462246777163904
|
263 |
+
name: Cosine Precision@10
|
264 |
+
- type: cosine_recall@1
|
265 |
+
value: 0.13259668508287292
|
266 |
+
name: Cosine Recall@1
|
267 |
+
- type: cosine_recall@3
|
268 |
+
value: 0.23388581952117865
|
269 |
+
name: Cosine Recall@3
|
270 |
+
- type: cosine_recall@5
|
271 |
+
value: 0.27440147329650094
|
272 |
+
name: Cosine Recall@5
|
273 |
+
- type: cosine_recall@10
|
274 |
+
value: 0.3462246777163904
|
275 |
+
name: Cosine Recall@10
|
276 |
+
- type: cosine_ndcg@10
|
277 |
+
value: 0.22991680445992715
|
278 |
+
name: Cosine Ndcg@10
|
279 |
+
- type: cosine_mrr@10
|
280 |
+
value: 0.19395407641263987
|
281 |
+
name: Cosine Mrr@10
|
282 |
+
- type: cosine_map@100
|
283 |
+
value: 0.20211208448630702
|
284 |
+
name: Cosine Map@100
|
285 |
+
- task:
|
286 |
+
type: information-retrieval
|
287 |
+
name: Information Retrieval
|
288 |
+
dataset:
|
289 |
+
name: dim 64
|
290 |
+
type: dim_64
|
291 |
+
metrics:
|
292 |
+
- type: cosine_accuracy@1
|
293 |
+
value: 0.1141804788213628
|
294 |
+
name: Cosine Accuracy@1
|
295 |
+
- type: cosine_accuracy@3
|
296 |
+
value: 0.1878453038674033
|
297 |
+
name: Cosine Accuracy@3
|
298 |
+
- type: cosine_accuracy@5
|
299 |
+
value: 0.2430939226519337
|
300 |
+
name: Cosine Accuracy@5
|
301 |
+
- type: cosine_accuracy@10
|
302 |
+
value: 0.32044198895027626
|
303 |
+
name: Cosine Accuracy@10
|
304 |
+
- type: cosine_precision@1
|
305 |
+
value: 0.1141804788213628
|
306 |
+
name: Cosine Precision@1
|
307 |
+
- type: cosine_precision@3
|
308 |
+
value: 0.06261510128913444
|
309 |
+
name: Cosine Precision@3
|
310 |
+
- type: cosine_precision@5
|
311 |
+
value: 0.04861878453038675
|
312 |
+
name: Cosine Precision@5
|
313 |
+
- type: cosine_precision@10
|
314 |
+
value: 0.032044198895027624
|
315 |
+
name: Cosine Precision@10
|
316 |
+
- type: cosine_recall@1
|
317 |
+
value: 0.1141804788213628
|
318 |
+
name: Cosine Recall@1
|
319 |
+
- type: cosine_recall@3
|
320 |
+
value: 0.1878453038674033
|
321 |
+
name: Cosine Recall@3
|
322 |
+
- type: cosine_recall@5
|
323 |
+
value: 0.2430939226519337
|
324 |
+
name: Cosine Recall@5
|
325 |
+
- type: cosine_recall@10
|
326 |
+
value: 0.32044198895027626
|
327 |
+
name: Cosine Recall@10
|
328 |
+
- type: cosine_ndcg@10
|
329 |
+
value: 0.20468493348849215
|
330 |
+
name: Cosine Ndcg@10
|
331 |
+
- type: cosine_mrr@10
|
332 |
+
value: 0.16909658277061598
|
333 |
+
name: Cosine Mrr@10
|
334 |
+
- type: cosine_map@100
|
335 |
+
value: 0.1776805865582346
|
336 |
+
name: Cosine Map@100
|
337 |
+
---
|
338 |
+
|
339 |
+
# BGE base Financial Matryoshka
|
340 |
+
|
341 |
+
This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [BAAI/bge-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) on the json dataset. It maps sentences & paragraphs to a 768-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
|
342 |
+
|
343 |
+
## Model Details
|
344 |
+
|
345 |
+
### Model Description
|
346 |
+
- **Model Type:** Sentence Transformer
|
347 |
+
- **Base model:** [BAAI/bge-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) <!-- at revision a5beb1e3e68b9ab74eb54cfd186867f64f240e1a -->
|
348 |
+
- **Maximum Sequence Length:** 512 tokens
|
349 |
+
- **Output Dimensionality:** 768 tokens
|
350 |
+
- **Similarity Function:** Cosine Similarity
|
351 |
+
- **Training Dataset:**
|
352 |
+
- json
|
353 |
+
- **Language:** en
|
354 |
+
- **License:** apache-2.0
|
355 |
+
|
356 |
+
### Model Sources
|
357 |
+
|
358 |
+
- **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
|
359 |
+
- **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
|
360 |
+
- **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
|
361 |
+
|
362 |
+
### Full Model Architecture
|
363 |
+
|
364 |
+
```
|
365 |
+
SentenceTransformer(
|
366 |
+
(0): Transformer({'max_seq_length': 512, 'do_lower_case': True}) with Transformer model: BertModel
|
367 |
+
(1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': True, 'pooling_mode_mean_tokens': False, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
|
368 |
+
(2): Normalize()
|
369 |
+
)
|
370 |
+
```
|
371 |
+
|
372 |
+
## Usage
|
373 |
+
|
374 |
+
### Direct Usage (Sentence Transformers)
|
375 |
+
|
376 |
+
First install the Sentence Transformers library:
|
377 |
+
|
378 |
+
```bash
|
379 |
+
pip install -U sentence-transformers
|
380 |
+
```
|
381 |
+
|
382 |
+
Then you can load this model and run inference.
|
383 |
+
```python
|
384 |
+
from sentence_transformers import SentenceTransformer
|
385 |
+
|
386 |
+
# Download from the 🤗 Hub
|
387 |
+
model = SentenceTransformer("tmmazen/bge-base-financial-matryoshka")
|
388 |
+
# Run inference
|
389 |
+
sentences = [
|
390 |
+
'iconstituents its principal constituent is an alkaloid, tamarixin,along with traces of its aglocone, tamarixetin. theplant also contains a high level of tannin (ellagicand gallic) and quercetol (methyllic esther).',
|
391 |
+
'What are the chemical constituents of Tamarix gallica L.?',
|
392 |
+
'How is Myrtus communis L. used in modern and traditional medicine?',
|
393 |
+
]
|
394 |
+
embeddings = model.encode(sentences)
|
395 |
+
print(embeddings.shape)
|
396 |
+
# [3, 768]
|
397 |
+
|
398 |
+
# Get the similarity scores for the embeddings
|
399 |
+
similarities = model.similarity(embeddings, embeddings)
|
400 |
+
print(similarities.shape)
|
401 |
+
# [3, 3]
|
402 |
+
```
|
403 |
+
|
404 |
+
<!--
|
405 |
+
### Direct Usage (Transformers)
|
406 |
+
|
407 |
+
<details><summary>Click to see the direct usage in Transformers</summary>
|
408 |
+
|
409 |
+
</details>
|
410 |
+
-->
|
411 |
+
|
412 |
+
<!--
|
413 |
+
### Downstream Usage (Sentence Transformers)
|
414 |
+
|
415 |
+
You can finetune this model on your own dataset.
|
416 |
+
|
417 |
+
<details><summary>Click to expand</summary>
|
418 |
+
|
419 |
+
</details>
|
420 |
+
-->
|
421 |
+
|
422 |
+
<!--
|
423 |
+
### Out-of-Scope Use
|
424 |
+
|
425 |
+
*List how the model may foreseeably be misused and address what users ought not to do with the model.*
|
426 |
+
-->
|
427 |
+
|
428 |
+
## Evaluation
|
429 |
+
|
430 |
+
### Metrics
|
431 |
+
|
432 |
+
#### Information Retrieval
|
433 |
+
* Dataset: `dim_768`
|
434 |
+
* Evaluated with [<code>InformationRetrievalEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.InformationRetrievalEvaluator)
|
435 |
+
|
436 |
+
| Metric | Value |
|
437 |
+
|:--------------------|:-----------|
|
438 |
+
| cosine_accuracy@1 | 0.1529 |
|
439 |
+
| cosine_accuracy@3 | 0.2302 |
|
440 |
+
| cosine_accuracy@5 | 0.2965 |
|
441 |
+
| cosine_accuracy@10 | 0.3831 |
|
442 |
+
| cosine_precision@1 | 0.1529 |
|
443 |
+
| cosine_precision@3 | 0.0767 |
|
444 |
+
| cosine_precision@5 | 0.0593 |
|
445 |
+
| cosine_precision@10 | 0.0383 |
|
446 |
+
| cosine_recall@1 | 0.1529 |
|
447 |
+
| cosine_recall@3 | 0.2302 |
|
448 |
+
| cosine_recall@5 | 0.2965 |
|
449 |
+
| cosine_recall@10 | 0.3831 |
|
450 |
+
| cosine_ndcg@10 | 0.2525 |
|
451 |
+
| cosine_mrr@10 | 0.2125 |
|
452 |
+
| **cosine_map@100** | **0.2203** |
|
453 |
+
|
454 |
+
#### Information Retrieval
|
455 |
+
* Dataset: `dim_512`
|
456 |
+
* Evaluated with [<code>InformationRetrievalEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.InformationRetrievalEvaluator)
|
457 |
+
|
458 |
+
| Metric | Value |
|
459 |
+
|:--------------------|:-----------|
|
460 |
+
| cosine_accuracy@1 | 0.151 |
|
461 |
+
| cosine_accuracy@3 | 0.2357 |
|
462 |
+
| cosine_accuracy@5 | 0.2947 |
|
463 |
+
| cosine_accuracy@10 | 0.3886 |
|
464 |
+
| cosine_precision@1 | 0.151 |
|
465 |
+
| cosine_precision@3 | 0.0786 |
|
466 |
+
| cosine_precision@5 | 0.0589 |
|
467 |
+
| cosine_precision@10 | 0.0389 |
|
468 |
+
| cosine_recall@1 | 0.151 |
|
469 |
+
| cosine_recall@3 | 0.2357 |
|
470 |
+
| cosine_recall@5 | 0.2947 |
|
471 |
+
| cosine_recall@10 | 0.3886 |
|
472 |
+
| cosine_ndcg@10 | 0.2543 |
|
473 |
+
| cosine_mrr@10 | 0.2132 |
|
474 |
+
| **cosine_map@100** | **0.2197** |
|
475 |
+
|
476 |
+
#### Information Retrieval
|
477 |
+
* Dataset: `dim_256`
|
478 |
+
* Evaluated with [<code>InformationRetrievalEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.InformationRetrievalEvaluator)
|
479 |
+
|
480 |
+
| Metric | Value |
|
481 |
+
|:--------------------|:-----------|
|
482 |
+
| cosine_accuracy@1 | 0.1436 |
|
483 |
+
| cosine_accuracy@3 | 0.2247 |
|
484 |
+
| cosine_accuracy@5 | 0.2855 |
|
485 |
+
| cosine_accuracy@10 | 0.3573 |
|
486 |
+
| cosine_precision@1 | 0.1436 |
|
487 |
+
| cosine_precision@3 | 0.0749 |
|
488 |
+
| cosine_precision@5 | 0.0571 |
|
489 |
+
| cosine_precision@10 | 0.0357 |
|
490 |
+
| cosine_recall@1 | 0.1436 |
|
491 |
+
| cosine_recall@3 | 0.2247 |
|
492 |
+
| cosine_recall@5 | 0.2855 |
|
493 |
+
| cosine_recall@10 | 0.3573 |
|
494 |
+
| cosine_ndcg@10 | 0.2393 |
|
495 |
+
| cosine_mrr@10 | 0.2029 |
|
496 |
+
| **cosine_map@100** | **0.2112** |
|
497 |
+
|
498 |
+
#### Information Retrieval
|
499 |
+
* Dataset: `dim_128`
|
500 |
+
* Evaluated with [<code>InformationRetrievalEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.InformationRetrievalEvaluator)
|
501 |
+
|
502 |
+
| Metric | Value |
|
503 |
+
|:--------------------|:-----------|
|
504 |
+
| cosine_accuracy@1 | 0.1326 |
|
505 |
+
| cosine_accuracy@3 | 0.2339 |
|
506 |
+
| cosine_accuracy@5 | 0.2744 |
|
507 |
+
| cosine_accuracy@10 | 0.3462 |
|
508 |
+
| cosine_precision@1 | 0.1326 |
|
509 |
+
| cosine_precision@3 | 0.078 |
|
510 |
+
| cosine_precision@5 | 0.0549 |
|
511 |
+
| cosine_precision@10 | 0.0346 |
|
512 |
+
| cosine_recall@1 | 0.1326 |
|
513 |
+
| cosine_recall@3 | 0.2339 |
|
514 |
+
| cosine_recall@5 | 0.2744 |
|
515 |
+
| cosine_recall@10 | 0.3462 |
|
516 |
+
| cosine_ndcg@10 | 0.2299 |
|
517 |
+
| cosine_mrr@10 | 0.194 |
|
518 |
+
| **cosine_map@100** | **0.2021** |
|
519 |
+
|
520 |
+
#### Information Retrieval
|
521 |
+
* Dataset: `dim_64`
|
522 |
+
* Evaluated with [<code>InformationRetrievalEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.InformationRetrievalEvaluator)
|
523 |
+
|
524 |
+
| Metric | Value |
|
525 |
+
|:--------------------|:-----------|
|
526 |
+
| cosine_accuracy@1 | 0.1142 |
|
527 |
+
| cosine_accuracy@3 | 0.1878 |
|
528 |
+
| cosine_accuracy@5 | 0.2431 |
|
529 |
+
| cosine_accuracy@10 | 0.3204 |
|
530 |
+
| cosine_precision@1 | 0.1142 |
|
531 |
+
| cosine_precision@3 | 0.0626 |
|
532 |
+
| cosine_precision@5 | 0.0486 |
|
533 |
+
| cosine_precision@10 | 0.032 |
|
534 |
+
| cosine_recall@1 | 0.1142 |
|
535 |
+
| cosine_recall@3 | 0.1878 |
|
536 |
+
| cosine_recall@5 | 0.2431 |
|
537 |
+
| cosine_recall@10 | 0.3204 |
|
538 |
+
| cosine_ndcg@10 | 0.2047 |
|
539 |
+
| cosine_mrr@10 | 0.1691 |
|
540 |
+
| **cosine_map@100** | **0.1777** |
|
541 |
+
|
542 |
+
<!--
|
543 |
+
## Bias, Risks and Limitations
|
544 |
+
|
545 |
+
*What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
|
546 |
+
-->
|
547 |
+
|
548 |
+
<!--
|
549 |
+
### Recommendations
|
550 |
+
|
551 |
+
*What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
|
552 |
+
-->
|
553 |
+
|
554 |
+
## Training Details
|
555 |
+
|
556 |
+
### Training Dataset
|
557 |
+
|
558 |
+
#### json
|
559 |
+
|
560 |
+
* Dataset: json
|
561 |
+
* Size: 4,887 training samples
|
562 |
+
* Columns: <code>positive</code> and <code>anchor</code>
|
563 |
+
* Approximate statistics based on the first 1000 samples:
|
564 |
+
| | positive | anchor |
|
565 |
+
|:--------|:------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|
|
566 |
+
| type | string | string |
|
567 |
+
| details | <ul><li>min: 3 tokens</li><li>mean: 102.23 tokens</li><li>max: 512 tokens</li></ul> | <ul><li>min: 9 tokens</li><li>mean: 20.63 tokens</li><li>max: 42 tokens</li></ul> |
|
568 |
+
* Samples:
|
569 |
+
| positive | anchor |
|
570 |
+
|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------|
|
571 |
+
| <code>The reported side effects of solidification were those with allergy to the plants of the asteraceae family (formerly the family of compounds: chamomile, dandelion, echinacea, armoise, etc.), to which the solidification belongs. Sometimes, solidification can cause heartburn.</code> | <code>What are the side effects and risks of overdose of the European Solidage plant?</code> |
|
572 |
+
| <code>rhumes crâniens, névralgie, problèmes respiratoires, sinusite</code> | <code>À quelles conditions ou fins Pulicaria incisa (Lam.) DC. est-il utilisé?</code> |
|
573 |
+
| <code> La mélisse (Melissa officinalis) est originaire d’Asie Mineure (Turquie et pourtour méditerranéen) où Théophraste et Hippocrate en vantaient déjà la capacité à calmer les maux de ventre. Elle doit son nom au mot grec « melissa » désignant l’abeille (la mélisse est aussi appelée « piment des abeilles »). Elle est traditionnellement utilisée pour ses propriétés apaisantes sur le système nerveux et le système digestif. Son usage a été popularisé par des préparations élaborées dans des monastères (l’Eau de Mélisse des Carmes, par exemple). Cultivée en régions tempérées, la mélisse est une plante de la famille des labiées, tout comme la menthe. Ses feuilles sont récoltées de juin à septembre, puis séchées. La poudre de mélisse est obtenue par broyage des feuilles, dont on peut aussi extraire l’huile essentielle, à usage externe. Des teintures et des extraits liquides sont également obtenus par extraction dans l’alcool. Les autres usages traditionnels de la mélisse<br><br><br>Les décoctions de mélisse sont parfois utilisées en frictions pour soulager les migraines ou les rhumatismes, et en bains en cas de nervosité, d’agitation et de règles douloureuses.</code> | <code>Quelle est l'origine et quels sont les usages de la plante Mélisse?</code> |
|
574 |
+
* Loss: [<code>MatryoshkaLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#matryoshkaloss) with these parameters:
|
575 |
+
```json
|
576 |
+
{
|
577 |
+
"loss": "MultipleNegativesRankingLoss",
|
578 |
+
"matryoshka_dims": [
|
579 |
+
768,
|
580 |
+
512,
|
581 |
+
256,
|
582 |
+
128,
|
583 |
+
64
|
584 |
+
],
|
585 |
+
"matryoshka_weights": [
|
586 |
+
1,
|
587 |
+
1,
|
588 |
+
1,
|
589 |
+
1,
|
590 |
+
1
|
591 |
+
],
|
592 |
+
"n_dims_per_step": -1
|
593 |
+
}
|
594 |
+
```
|
595 |
+
|
596 |
+
### Training Hyperparameters
|
597 |
+
#### Non-Default Hyperparameters
|
598 |
+
|
599 |
+
- `eval_strategy`: epoch
|
600 |
+
- `per_device_train_batch_size`: 16
|
601 |
+
- `per_device_eval_batch_size`: 16
|
602 |
+
- `gradient_accumulation_steps`: 16
|
603 |
+
- `learning_rate`: 1e-05
|
604 |
+
- `num_train_epochs`: 4
|
605 |
+
- `lr_scheduler_type`: cosine
|
606 |
+
- `warmup_ratio`: 0.1
|
607 |
+
- `fp16`: True
|
608 |
+
- `load_best_model_at_end`: True
|
609 |
+
- `optim`: adamw_torch_fused
|
610 |
+
- `batch_sampler`: no_duplicates
|
611 |
+
|
612 |
+
#### All Hyperparameters
|
613 |
+
<details><summary>Click to expand</summary>
|
614 |
+
|
615 |
+
- `overwrite_output_dir`: False
|
616 |
+
- `do_predict`: False
|
617 |
+
- `eval_strategy`: epoch
|
618 |
+
- `prediction_loss_only`: True
|
619 |
+
- `per_device_train_batch_size`: 16
|
620 |
+
- `per_device_eval_batch_size`: 16
|
621 |
+
- `per_gpu_train_batch_size`: None
|
622 |
+
- `per_gpu_eval_batch_size`: None
|
623 |
+
- `gradient_accumulation_steps`: 16
|
624 |
+
- `eval_accumulation_steps`: None
|
625 |
+
- `learning_rate`: 1e-05
|
626 |
+
- `weight_decay`: 0.0
|
627 |
+
- `adam_beta1`: 0.9
|
628 |
+
- `adam_beta2`: 0.999
|
629 |
+
- `adam_epsilon`: 1e-08
|
630 |
+
- `max_grad_norm`: 1.0
|
631 |
+
- `num_train_epochs`: 4
|
632 |
+
- `max_steps`: -1
|
633 |
+
- `lr_scheduler_type`: cosine
|
634 |
+
- `lr_scheduler_kwargs`: {}
|
635 |
+
- `warmup_ratio`: 0.1
|
636 |
+
- `warmup_steps`: 0
|
637 |
+
- `log_level`: passive
|
638 |
+
- `log_level_replica`: warning
|
639 |
+
- `log_on_each_node`: True
|
640 |
+
- `logging_nan_inf_filter`: True
|
641 |
+
- `save_safetensors`: True
|
642 |
+
- `save_on_each_node`: False
|
643 |
+
- `save_only_model`: False
|
644 |
+
- `restore_callback_states_from_checkpoint`: False
|
645 |
+
- `no_cuda`: False
|
646 |
+
- `use_cpu`: False
|
647 |
+
- `use_mps_device`: False
|
648 |
+
- `seed`: 42
|
649 |
+
- `data_seed`: None
|
650 |
+
- `jit_mode_eval`: False
|
651 |
+
- `use_ipex`: False
|
652 |
+
- `bf16`: False
|
653 |
+
- `fp16`: True
|
654 |
+
- `fp16_opt_level`: O1
|
655 |
+
- `half_precision_backend`: auto
|
656 |
+
- `bf16_full_eval`: False
|
657 |
+
- `fp16_full_eval`: False
|
658 |
+
- `tf32`: None
|
659 |
+
- `local_rank`: 0
|
660 |
+
- `ddp_backend`: None
|
661 |
+
- `tpu_num_cores`: None
|
662 |
+
- `tpu_metrics_debug`: False
|
663 |
+
- `debug`: []
|
664 |
+
- `dataloader_drop_last`: False
|
665 |
+
- `dataloader_num_workers`: 0
|
666 |
+
- `dataloader_prefetch_factor`: None
|
667 |
+
- `past_index`: -1
|
668 |
+
- `disable_tqdm`: False
|
669 |
+
- `remove_unused_columns`: True
|
670 |
+
- `label_names`: None
|
671 |
+
- `load_best_model_at_end`: True
|
672 |
+
- `ignore_data_skip`: False
|
673 |
+
- `fsdp`: []
|
674 |
+
- `fsdp_min_num_params`: 0
|
675 |
+
- `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
|
676 |
+
- `fsdp_transformer_layer_cls_to_wrap`: None
|
677 |
+
- `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
|
678 |
+
- `deepspeed`: None
|
679 |
+
- `label_smoothing_factor`: 0.0
|
680 |
+
- `optim`: adamw_torch_fused
|
681 |
+
- `optim_args`: None
|
682 |
+
- `adafactor`: False
|
683 |
+
- `group_by_length`: False
|
684 |
+
- `length_column_name`: length
|
685 |
+
- `ddp_find_unused_parameters`: None
|
686 |
+
- `ddp_bucket_cap_mb`: None
|
687 |
+
- `ddp_broadcast_buffers`: False
|
688 |
+
- `dataloader_pin_memory`: True
|
689 |
+
- `dataloader_persistent_workers`: False
|
690 |
+
- `skip_memory_metrics`: True
|
691 |
+
- `use_legacy_prediction_loop`: False
|
692 |
+
- `push_to_hub`: False
|
693 |
+
- `resume_from_checkpoint`: None
|
694 |
+
- `hub_model_id`: None
|
695 |
+
- `hub_strategy`: every_save
|
696 |
+
- `hub_private_repo`: False
|
697 |
+
- `hub_always_push`: False
|
698 |
+
- `gradient_checkpointing`: False
|
699 |
+
- `gradient_checkpointing_kwargs`: None
|
700 |
+
- `include_inputs_for_metrics`: False
|
701 |
+
- `eval_do_concat_batches`: True
|
702 |
+
- `fp16_backend`: auto
|
703 |
+
- `push_to_hub_model_id`: None
|
704 |
+
- `push_to_hub_organization`: None
|
705 |
+
- `mp_parameters`:
|
706 |
+
- `auto_find_batch_size`: False
|
707 |
+
- `full_determinism`: False
|
708 |
+
- `torchdynamo`: None
|
709 |
+
- `ray_scope`: last
|
710 |
+
- `ddp_timeout`: 1800
|
711 |
+
- `torch_compile`: False
|
712 |
+
- `torch_compile_backend`: None
|
713 |
+
- `torch_compile_mode`: None
|
714 |
+
- `dispatch_batches`: None
|
715 |
+
- `split_batches`: None
|
716 |
+
- `include_tokens_per_second`: False
|
717 |
+
- `include_num_input_tokens_seen`: False
|
718 |
+
- `neftune_noise_alpha`: None
|
719 |
+
- `optim_target_modules`: None
|
720 |
+
- `batch_eval_metrics`: False
|
721 |
+
- `batch_sampler`: no_duplicates
|
722 |
+
- `multi_dataset_batch_sampler`: proportional
|
723 |
+
|
724 |
+
</details>
|
725 |
+
|
726 |
+
### Training Logs
|
727 |
+
| Epoch | Step | Training Loss | dim_128_cosine_map@100 | dim_256_cosine_map@100 | dim_512_cosine_map@100 | dim_64_cosine_map@100 | dim_768_cosine_map@100 |
|
728 |
+
|:----------:|:------:|:-------------:|:----------------------:|:----------------------:|:----------------------:|:---------------------:|:----------------------:|
|
729 |
+
| 0.5229 | 10 | 7.9606 | - | - | - | - | - |
|
730 |
+
| 1.0458 | 20 | 4.6998 | - | - | - | - | - |
|
731 |
+
| 1.5686 | 30 | 0.3577 | - | - | - | - | - |
|
732 |
+
| 1.7778 | 34 | - | 0.1845 | 0.2027 | 0.2041 | 0.1558 | 0.2045 |
|
733 |
+
| 1.2680 | 40 | 2.4714 | - | - | - | - | - |
|
734 |
+
| 1.7908 | 50 | 4.4309 | - | - | - | - | - |
|
735 |
+
| 2.3137 | 60 | 0.7847 | - | - | - | - | - |
|
736 |
+
| **2.7843** | **69** | **-** | **0.2028** | **0.2114** | **0.2197** | **0.1779** | **0.2206** |
|
737 |
+
| 2.0131 | 70 | 0.1189 | - | - | - | - | - |
|
738 |
+
| 2.3268 | 76 | - | 0.2021 | 0.2112 | 0.2197 | 0.1777 | 0.2203 |
|
739 |
+
|
740 |
+
* The bold row denotes the saved checkpoint.
|
741 |
+
|
742 |
+
### Framework Versions
|
743 |
+
- Python: 3.10.12
|
744 |
+
- Sentence Transformers: 3.1.0
|
745 |
+
- Transformers: 4.41.2
|
746 |
+
- PyTorch: 2.1.2+cu121
|
747 |
+
- Accelerate: 0.33.0
|
748 |
+
- Datasets: 2.19.1
|
749 |
+
- Tokenizers: 0.19.1
|
750 |
+
|
751 |
+
## Citation
|
752 |
+
|
753 |
+
### BibTeX
|
754 |
+
|
755 |
+
#### Sentence Transformers
|
756 |
+
```bibtex
|
757 |
+
@inproceedings{reimers-2019-sentence-bert,
|
758 |
+
title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
|
759 |
+
author = "Reimers, Nils and Gurevych, Iryna",
|
760 |
+
booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
|
761 |
+
month = "11",
|
762 |
+
year = "2019",
|
763 |
+
publisher = "Association for Computational Linguistics",
|
764 |
+
url = "https://arxiv.org/abs/1908.10084",
|
765 |
+
}
|
766 |
+
```
|
767 |
+
|
768 |
+
#### MatryoshkaLoss
|
769 |
+
```bibtex
|
770 |
+
@misc{kusupati2024matryoshka,
|
771 |
+
title={Matryoshka Representation Learning},
|
772 |
+
author={Aditya Kusupati and Gantavya Bhatt and Aniket Rege and Matthew Wallingford and Aditya Sinha and Vivek Ramanujan and William Howard-Snyder and Kaifeng Chen and Sham Kakade and Prateek Jain and Ali Farhadi},
|
773 |
+
year={2024},
|
774 |
+
eprint={2205.13147},
|
775 |
+
archivePrefix={arXiv},
|
776 |
+
primaryClass={cs.LG}
|
777 |
+
}
|
778 |
+
```
|
779 |
+
|
780 |
+
#### MultipleNegativesRankingLoss
|
781 |
+
```bibtex
|
782 |
+
@misc{henderson2017efficient,
|
783 |
+
title={Efficient Natural Language Response Suggestion for Smart Reply},
|
784 |
+
author={Matthew Henderson and Rami Al-Rfou and Brian Strope and Yun-hsuan Sung and Laszlo Lukacs and Ruiqi Guo and Sanjiv Kumar and Balint Miklos and Ray Kurzweil},
|
785 |
+
year={2017},
|
786 |
+
eprint={1705.00652},
|
787 |
+
archivePrefix={arXiv},
|
788 |
+
primaryClass={cs.CL}
|
789 |
+
}
|
790 |
+
```
|
791 |
+
|
792 |
+
<!--
|
793 |
+
## Glossary
|
794 |
+
|
795 |
+
*Clearly define terms in order to be accessible across audiences.*
|
796 |
+
-->
|
797 |
+
|
798 |
+
<!--
|
799 |
+
## Model Card Authors
|
800 |
+
|
801 |
+
*Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
|
802 |
+
-->
|
803 |
+
|
804 |
+
<!--
|
805 |
+
## Model Card Contact
|
806 |
+
|
807 |
+
*Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
|
808 |
+
-->
|
config.json
ADDED
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "BAAI/bge-base-en-v1.5",
|
3 |
+
"architectures": [
|
4 |
+
"BertModel"
|
5 |
+
],
|
6 |
+
"attention_probs_dropout_prob": 0.1,
|
7 |
+
"classifier_dropout": null,
|
8 |
+
"gradient_checkpointing": false,
|
9 |
+
"hidden_act": "gelu",
|
10 |
+
"hidden_dropout_prob": 0.1,
|
11 |
+
"hidden_size": 768,
|
12 |
+
"id2label": {
|
13 |
+
"0": "LABEL_0"
|
14 |
+
},
|
15 |
+
"initializer_range": 0.02,
|
16 |
+
"intermediate_size": 3072,
|
17 |
+
"label2id": {
|
18 |
+
"LABEL_0": 0
|
19 |
+
},
|
20 |
+
"layer_norm_eps": 1e-12,
|
21 |
+
"max_position_embeddings": 512,
|
22 |
+
"model_type": "bert",
|
23 |
+
"num_attention_heads": 12,
|
24 |
+
"num_hidden_layers": 12,
|
25 |
+
"pad_token_id": 0,
|
26 |
+
"position_embedding_type": "absolute",
|
27 |
+
"torch_dtype": "float32",
|
28 |
+
"transformers_version": "4.41.2",
|
29 |
+
"type_vocab_size": 2,
|
30 |
+
"use_cache": true,
|
31 |
+
"vocab_size": 30522
|
32 |
+
}
|
config_sentence_transformers.json
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"__version__": {
|
3 |
+
"sentence_transformers": "3.1.0",
|
4 |
+
"transformers": "4.41.2",
|
5 |
+
"pytorch": "2.1.2+cu121"
|
6 |
+
},
|
7 |
+
"prompts": {},
|
8 |
+
"default_prompt_name": null,
|
9 |
+
"similarity_fn_name": null
|
10 |
+
}
|
model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:98117f70bbecba92636d05a1e5a546c5c3e3c16511cc8537eba5af038d0e97c5
|
3 |
+
size 437951328
|
modules.json
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
{
|
3 |
+
"idx": 0,
|
4 |
+
"name": "0",
|
5 |
+
"path": "",
|
6 |
+
"type": "sentence_transformers.models.Transformer"
|
7 |
+
},
|
8 |
+
{
|
9 |
+
"idx": 1,
|
10 |
+
"name": "1",
|
11 |
+
"path": "1_Pooling",
|
12 |
+
"type": "sentence_transformers.models.Pooling"
|
13 |
+
},
|
14 |
+
{
|
15 |
+
"idx": 2,
|
16 |
+
"name": "2",
|
17 |
+
"path": "2_Normalize",
|
18 |
+
"type": "sentence_transformers.models.Normalize"
|
19 |
+
}
|
20 |
+
]
|
sentence_bert_config.json
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"max_seq_length": 512,
|
3 |
+
"do_lower_case": true
|
4 |
+
}
|
special_tokens_map.json
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cls_token": {
|
3 |
+
"content": "[CLS]",
|
4 |
+
"lstrip": false,
|
5 |
+
"normalized": false,
|
6 |
+
"rstrip": false,
|
7 |
+
"single_word": false
|
8 |
+
},
|
9 |
+
"mask_token": {
|
10 |
+
"content": "[MASK]",
|
11 |
+
"lstrip": false,
|
12 |
+
"normalized": false,
|
13 |
+
"rstrip": false,
|
14 |
+
"single_word": false
|
15 |
+
},
|
16 |
+
"pad_token": {
|
17 |
+
"content": "[PAD]",
|
18 |
+
"lstrip": false,
|
19 |
+
"normalized": false,
|
20 |
+
"rstrip": false,
|
21 |
+
"single_word": false
|
22 |
+
},
|
23 |
+
"sep_token": {
|
24 |
+
"content": "[SEP]",
|
25 |
+
"lstrip": false,
|
26 |
+
"normalized": false,
|
27 |
+
"rstrip": false,
|
28 |
+
"single_word": false
|
29 |
+
},
|
30 |
+
"unk_token": {
|
31 |
+
"content": "[UNK]",
|
32 |
+
"lstrip": false,
|
33 |
+
"normalized": false,
|
34 |
+
"rstrip": false,
|
35 |
+
"single_word": false
|
36 |
+
}
|
37 |
+
}
|
tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer_config.json
ADDED
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"added_tokens_decoder": {
|
3 |
+
"0": {
|
4 |
+
"content": "[PAD]",
|
5 |
+
"lstrip": false,
|
6 |
+
"normalized": false,
|
7 |
+
"rstrip": false,
|
8 |
+
"single_word": false,
|
9 |
+
"special": true
|
10 |
+
},
|
11 |
+
"100": {
|
12 |
+
"content": "[UNK]",
|
13 |
+
"lstrip": false,
|
14 |
+
"normalized": false,
|
15 |
+
"rstrip": false,
|
16 |
+
"single_word": false,
|
17 |
+
"special": true
|
18 |
+
},
|
19 |
+
"101": {
|
20 |
+
"content": "[CLS]",
|
21 |
+
"lstrip": false,
|
22 |
+
"normalized": false,
|
23 |
+
"rstrip": false,
|
24 |
+
"single_word": false,
|
25 |
+
"special": true
|
26 |
+
},
|
27 |
+
"102": {
|
28 |
+
"content": "[SEP]",
|
29 |
+
"lstrip": false,
|
30 |
+
"normalized": false,
|
31 |
+
"rstrip": false,
|
32 |
+
"single_word": false,
|
33 |
+
"special": true
|
34 |
+
},
|
35 |
+
"103": {
|
36 |
+
"content": "[MASK]",
|
37 |
+
"lstrip": false,
|
38 |
+
"normalized": false,
|
39 |
+
"rstrip": false,
|
40 |
+
"single_word": false,
|
41 |
+
"special": true
|
42 |
+
}
|
43 |
+
},
|
44 |
+
"clean_up_tokenization_spaces": true,
|
45 |
+
"cls_token": "[CLS]",
|
46 |
+
"do_basic_tokenize": true,
|
47 |
+
"do_lower_case": true,
|
48 |
+
"mask_token": "[MASK]",
|
49 |
+
"model_max_length": 512,
|
50 |
+
"never_split": null,
|
51 |
+
"pad_token": "[PAD]",
|
52 |
+
"sep_token": "[SEP]",
|
53 |
+
"strip_accents": null,
|
54 |
+
"tokenize_chinese_chars": true,
|
55 |
+
"tokenizer_class": "BertTokenizer",
|
56 |
+
"unk_token": "[UNK]"
|
57 |
+
}
|
vocab.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|