Add new SentenceTransformer model.
Browse files- 1_Pooling/config.json +10 -0
- README.md +879 -0
- config.json +44 -0
- config_sentence_transformers.json +10 -0
- model.safetensors +3 -0
- modules.json +14 -0
- sentence_bert_config.json +4 -0
- special_tokens_map.json +37 -0
- tokenizer.json +0 -0
- tokenizer_config.json +62 -0
- vocab.txt +0 -0
1_Pooling/config.json
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"word_embedding_dimension": 1024,
|
3 |
+
"pooling_mode_cls_token": true,
|
4 |
+
"pooling_mode_mean_tokens": false,
|
5 |
+
"pooling_mode_max_tokens": false,
|
6 |
+
"pooling_mode_mean_sqrt_len_tokens": false,
|
7 |
+
"pooling_mode_weightedmean_tokens": false,
|
8 |
+
"pooling_mode_lasttoken": false,
|
9 |
+
"include_prompt": true
|
10 |
+
}
|
README.md
ADDED
@@ -0,0 +1,879 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
base_model: Alibaba-NLP/gte-large-en-v1.5
|
3 |
+
datasets: []
|
4 |
+
language:
|
5 |
+
- en
|
6 |
+
library_name: sentence-transformers
|
7 |
+
license: apache-2.0
|
8 |
+
metrics:
|
9 |
+
- cosine_accuracy@1
|
10 |
+
- cosine_accuracy@3
|
11 |
+
- cosine_accuracy@5
|
12 |
+
- cosine_accuracy@10
|
13 |
+
- cosine_precision@1
|
14 |
+
- cosine_precision@3
|
15 |
+
- cosine_precision@5
|
16 |
+
- cosine_precision@10
|
17 |
+
- cosine_recall@1
|
18 |
+
- cosine_recall@3
|
19 |
+
- cosine_recall@5
|
20 |
+
- cosine_recall@10
|
21 |
+
- cosine_ndcg@10
|
22 |
+
- cosine_mrr@10
|
23 |
+
- cosine_map@100
|
24 |
+
pipeline_tag: sentence-similarity
|
25 |
+
tags:
|
26 |
+
- sentence-transformers
|
27 |
+
- sentence-similarity
|
28 |
+
- feature-extraction
|
29 |
+
- generated_from_trainer
|
30 |
+
- dataset_size:3051
|
31 |
+
- loss:MatryoshkaLoss
|
32 |
+
- loss:MultipleNegativesRankingLoss
|
33 |
+
widget:
|
34 |
+
- source_sentence: In the fiscal year of 2022, revenue in Productivity and Business
|
35 |
+
Processes was $53.5 billion, increasing 15%. The increase was driven by higher
|
36 |
+
revenue from LinkedIn, Office 366 commercial and Dynamics 365.
|
37 |
+
sentences:
|
38 |
+
- Who is the CEO of Berkshire Hathaway and how long have they been in the position?
|
39 |
+
- How does Microsoft's Office segment contribute to its overall revenue?
|
40 |
+
- What is the primary revenue stream for McDonald's?
|
41 |
+
- source_sentence: According to the Cisco Systems, Inc. financial reports, the company
|
42 |
+
had cash and cash equivalents worth $30.1 billion at the end of fiscal year 2023.
|
43 |
+
sentences:
|
44 |
+
- How much cash and cash equivalents did Cisco Systems, Inc. hold at the end of
|
45 |
+
fiscal year 2023?
|
46 |
+
- In 2023, how much capital expenditure was done by Microsoft in the Information
|
47 |
+
Technology services sector?
|
48 |
+
- In 2023, how has the acquisition of ABC Corp affected the financial performance
|
49 |
+
of Microsoft corporation?
|
50 |
+
- source_sentence: As of December 31, 2023, Bank of America had total loans and leases
|
51 |
+
in its consumer real estate portfolio amounting to approximately $262 billion.
|
52 |
+
sentences:
|
53 |
+
- How many total loans were there in Bank of America's consumer real estate portfolio
|
54 |
+
as of December 31, 2023?
|
55 |
+
- How did Johnson & Johnson’s pharmaceutical segment perform in the fiscal year
|
56 |
+
2023?
|
57 |
+
- What is the source of most of Amazon's revenue in 2023?
|
58 |
+
- source_sentence: The ongoing trade war between US and China has negatively impacted
|
59 |
+
the performance of Microsoft stock, with a decline of around 7.5% observed in
|
60 |
+
the last quarter.
|
61 |
+
sentences:
|
62 |
+
- What was the profit margin for Amazon in 2025?
|
63 |
+
- What was the growth rate in the consumer sector for Amazon in 2023?
|
64 |
+
- How has the ongoing trade war between US and China affected the performance of
|
65 |
+
the Microsoft stock in the past quarter?
|
66 |
+
- source_sentence: For the fiscal year ending June 30, 2023, Procter & Gamble Company
|
67 |
+
reported a dividend payout ratio of 60%.
|
68 |
+
sentences:
|
69 |
+
- What was the impact of the housing market on Wells Fargo's mortgage banking income
|
70 |
+
in 2023?
|
71 |
+
- What was the dividend payout ratio of Procter & Gamble Company for the fiscal
|
72 |
+
year ended 2023?
|
73 |
+
- What prompted Tesla's stock to surge in 2020?
|
74 |
+
model-index:
|
75 |
+
- name: gte-large-en-v1.5-financial-embeddings-matryoshka
|
76 |
+
results:
|
77 |
+
- task:
|
78 |
+
type: information-retrieval
|
79 |
+
name: Information Retrieval
|
80 |
+
dataset:
|
81 |
+
name: dim 1024
|
82 |
+
type: dim_1024
|
83 |
+
metrics:
|
84 |
+
- type: cosine_accuracy@1
|
85 |
+
value: 0.8436578171091446
|
86 |
+
name: Cosine Accuracy@1
|
87 |
+
- type: cosine_accuracy@3
|
88 |
+
value: 0.967551622418879
|
89 |
+
name: Cosine Accuracy@3
|
90 |
+
- type: cosine_accuracy@5
|
91 |
+
value: 0.9793510324483776
|
92 |
+
name: Cosine Accuracy@5
|
93 |
+
- type: cosine_accuracy@10
|
94 |
+
value: 0.9941002949852508
|
95 |
+
name: Cosine Accuracy@10
|
96 |
+
- type: cosine_precision@1
|
97 |
+
value: 0.8436578171091446
|
98 |
+
name: Cosine Precision@1
|
99 |
+
- type: cosine_precision@3
|
100 |
+
value: 0.32251720747295964
|
101 |
+
name: Cosine Precision@3
|
102 |
+
- type: cosine_precision@5
|
103 |
+
value: 0.19587020648967549
|
104 |
+
name: Cosine Precision@5
|
105 |
+
- type: cosine_precision@10
|
106 |
+
value: 0.09941002949852507
|
107 |
+
name: Cosine Precision@10
|
108 |
+
- type: cosine_recall@1
|
109 |
+
value: 0.8436578171091446
|
110 |
+
name: Cosine Recall@1
|
111 |
+
- type: cosine_recall@3
|
112 |
+
value: 0.967551622418879
|
113 |
+
name: Cosine Recall@3
|
114 |
+
- type: cosine_recall@5
|
115 |
+
value: 0.9793510324483776
|
116 |
+
name: Cosine Recall@5
|
117 |
+
- type: cosine_recall@10
|
118 |
+
value: 0.9941002949852508
|
119 |
+
name: Cosine Recall@10
|
120 |
+
- type: cosine_ndcg@10
|
121 |
+
value: 0.9266864251220158
|
122 |
+
name: Cosine Ndcg@10
|
123 |
+
- type: cosine_mrr@10
|
124 |
+
value: 0.9042480217258979
|
125 |
+
name: Cosine Mrr@10
|
126 |
+
- type: cosine_map@100
|
127 |
+
value: 0.9045732618741468
|
128 |
+
name: Cosine Map@100
|
129 |
+
- task:
|
130 |
+
type: information-retrieval
|
131 |
+
name: Information Retrieval
|
132 |
+
dataset:
|
133 |
+
name: dim 768
|
134 |
+
type: dim_768
|
135 |
+
metrics:
|
136 |
+
- type: cosine_accuracy@1
|
137 |
+
value: 0.8466076696165191
|
138 |
+
name: Cosine Accuracy@1
|
139 |
+
- type: cosine_accuracy@3
|
140 |
+
value: 0.9646017699115044
|
141 |
+
name: Cosine Accuracy@3
|
142 |
+
- type: cosine_accuracy@5
|
143 |
+
value: 0.9793510324483776
|
144 |
+
name: Cosine Accuracy@5
|
145 |
+
- type: cosine_accuracy@10
|
146 |
+
value: 0.9941002949852508
|
147 |
+
name: Cosine Accuracy@10
|
148 |
+
- type: cosine_precision@1
|
149 |
+
value: 0.8466076696165191
|
150 |
+
name: Cosine Precision@1
|
151 |
+
- type: cosine_precision@3
|
152 |
+
value: 0.3215339233038348
|
153 |
+
name: Cosine Precision@3
|
154 |
+
- type: cosine_precision@5
|
155 |
+
value: 0.19587020648967549
|
156 |
+
name: Cosine Precision@5
|
157 |
+
- type: cosine_precision@10
|
158 |
+
value: 0.09941002949852507
|
159 |
+
name: Cosine Precision@10
|
160 |
+
- type: cosine_recall@1
|
161 |
+
value: 0.8466076696165191
|
162 |
+
name: Cosine Recall@1
|
163 |
+
- type: cosine_recall@3
|
164 |
+
value: 0.9646017699115044
|
165 |
+
name: Cosine Recall@3
|
166 |
+
- type: cosine_recall@5
|
167 |
+
value: 0.9793510324483776
|
168 |
+
name: Cosine Recall@5
|
169 |
+
- type: cosine_recall@10
|
170 |
+
value: 0.9941002949852508
|
171 |
+
name: Cosine Recall@10
|
172 |
+
- type: cosine_ndcg@10
|
173 |
+
value: 0.9264205608498619
|
174 |
+
name: Cosine Ndcg@10
|
175 |
+
- type: cosine_mrr@10
|
176 |
+
value: 0.9039893243433065
|
177 |
+
name: Cosine Mrr@10
|
178 |
+
- type: cosine_map@100
|
179 |
+
value: 0.9043029964425071
|
180 |
+
name: Cosine Map@100
|
181 |
+
- task:
|
182 |
+
type: information-retrieval
|
183 |
+
name: Information Retrieval
|
184 |
+
dataset:
|
185 |
+
name: dim 512
|
186 |
+
type: dim_512
|
187 |
+
metrics:
|
188 |
+
- type: cosine_accuracy@1
|
189 |
+
value: 0.8495575221238938
|
190 |
+
name: Cosine Accuracy@1
|
191 |
+
- type: cosine_accuracy@3
|
192 |
+
value: 0.967551622418879
|
193 |
+
name: Cosine Accuracy@3
|
194 |
+
- type: cosine_accuracy@5
|
195 |
+
value: 0.9823008849557522
|
196 |
+
name: Cosine Accuracy@5
|
197 |
+
- type: cosine_accuracy@10
|
198 |
+
value: 0.9941002949852508
|
199 |
+
name: Cosine Accuracy@10
|
200 |
+
- type: cosine_precision@1
|
201 |
+
value: 0.8495575221238938
|
202 |
+
name: Cosine Precision@1
|
203 |
+
- type: cosine_precision@3
|
204 |
+
value: 0.3225172074729597
|
205 |
+
name: Cosine Precision@3
|
206 |
+
- type: cosine_precision@5
|
207 |
+
value: 0.19646017699115043
|
208 |
+
name: Cosine Precision@5
|
209 |
+
- type: cosine_precision@10
|
210 |
+
value: 0.09941002949852507
|
211 |
+
name: Cosine Precision@10
|
212 |
+
- type: cosine_recall@1
|
213 |
+
value: 0.8495575221238938
|
214 |
+
name: Cosine Recall@1
|
215 |
+
- type: cosine_recall@3
|
216 |
+
value: 0.967551622418879
|
217 |
+
name: Cosine Recall@3
|
218 |
+
- type: cosine_recall@5
|
219 |
+
value: 0.9823008849557522
|
220 |
+
name: Cosine Recall@5
|
221 |
+
- type: cosine_recall@10
|
222 |
+
value: 0.9941002949852508
|
223 |
+
name: Cosine Recall@10
|
224 |
+
- type: cosine_ndcg@10
|
225 |
+
value: 0.9284195153657646
|
226 |
+
name: Cosine Ndcg@10
|
227 |
+
- type: cosine_mrr@10
|
228 |
+
value: 0.9066207800721073
|
229 |
+
name: Cosine Mrr@10
|
230 |
+
- type: cosine_map@100
|
231 |
+
value: 0.9069134241700614
|
232 |
+
name: Cosine Map@100
|
233 |
+
- task:
|
234 |
+
type: information-retrieval
|
235 |
+
name: Information Retrieval
|
236 |
+
dataset:
|
237 |
+
name: dim 256
|
238 |
+
type: dim_256
|
239 |
+
metrics:
|
240 |
+
- type: cosine_accuracy@1
|
241 |
+
value: 0.8495575221238938
|
242 |
+
name: Cosine Accuracy@1
|
243 |
+
- type: cosine_accuracy@3
|
244 |
+
value: 0.9646017699115044
|
245 |
+
name: Cosine Accuracy@3
|
246 |
+
- type: cosine_accuracy@5
|
247 |
+
value: 0.9823008849557522
|
248 |
+
name: Cosine Accuracy@5
|
249 |
+
- type: cosine_accuracy@10
|
250 |
+
value: 0.9911504424778761
|
251 |
+
name: Cosine Accuracy@10
|
252 |
+
- type: cosine_precision@1
|
253 |
+
value: 0.8495575221238938
|
254 |
+
name: Cosine Precision@1
|
255 |
+
- type: cosine_precision@3
|
256 |
+
value: 0.3215339233038348
|
257 |
+
name: Cosine Precision@3
|
258 |
+
- type: cosine_precision@5
|
259 |
+
value: 0.19646017699115043
|
260 |
+
name: Cosine Precision@5
|
261 |
+
- type: cosine_precision@10
|
262 |
+
value: 0.0991150442477876
|
263 |
+
name: Cosine Precision@10
|
264 |
+
- type: cosine_recall@1
|
265 |
+
value: 0.8495575221238938
|
266 |
+
name: Cosine Recall@1
|
267 |
+
- type: cosine_recall@3
|
268 |
+
value: 0.9646017699115044
|
269 |
+
name: Cosine Recall@3
|
270 |
+
- type: cosine_recall@5
|
271 |
+
value: 0.9823008849557522
|
272 |
+
name: Cosine Recall@5
|
273 |
+
- type: cosine_recall@10
|
274 |
+
value: 0.9911504424778761
|
275 |
+
name: Cosine Recall@10
|
276 |
+
- type: cosine_ndcg@10
|
277 |
+
value: 0.9279790172476607
|
278 |
+
name: Cosine Ndcg@10
|
279 |
+
- type: cosine_mrr@10
|
280 |
+
value: 0.906915765322845
|
281 |
+
name: Cosine Mrr@10
|
282 |
+
- type: cosine_map@100
|
283 |
+
value: 0.907374745870321
|
284 |
+
name: Cosine Map@100
|
285 |
+
- task:
|
286 |
+
type: information-retrieval
|
287 |
+
name: Information Retrieval
|
288 |
+
dataset:
|
289 |
+
name: dim 128
|
290 |
+
type: dim_128
|
291 |
+
metrics:
|
292 |
+
- type: cosine_accuracy@1
|
293 |
+
value: 0.8348082595870207
|
294 |
+
name: Cosine Accuracy@1
|
295 |
+
- type: cosine_accuracy@3
|
296 |
+
value: 0.9616519174041298
|
297 |
+
name: Cosine Accuracy@3
|
298 |
+
- type: cosine_accuracy@5
|
299 |
+
value: 0.976401179941003
|
300 |
+
name: Cosine Accuracy@5
|
301 |
+
- type: cosine_accuracy@10
|
302 |
+
value: 0.9852507374631269
|
303 |
+
name: Cosine Accuracy@10
|
304 |
+
- type: cosine_precision@1
|
305 |
+
value: 0.8348082595870207
|
306 |
+
name: Cosine Precision@1
|
307 |
+
- type: cosine_precision@3
|
308 |
+
value: 0.3205506391347099
|
309 |
+
name: Cosine Precision@3
|
310 |
+
- type: cosine_precision@5
|
311 |
+
value: 0.19528023598820055
|
312 |
+
name: Cosine Precision@5
|
313 |
+
- type: cosine_precision@10
|
314 |
+
value: 0.09852507374631266
|
315 |
+
name: Cosine Precision@10
|
316 |
+
- type: cosine_recall@1
|
317 |
+
value: 0.8348082595870207
|
318 |
+
name: Cosine Recall@1
|
319 |
+
- type: cosine_recall@3
|
320 |
+
value: 0.9616519174041298
|
321 |
+
name: Cosine Recall@3
|
322 |
+
- type: cosine_recall@5
|
323 |
+
value: 0.976401179941003
|
324 |
+
name: Cosine Recall@5
|
325 |
+
- type: cosine_recall@10
|
326 |
+
value: 0.9852507374631269
|
327 |
+
name: Cosine Recall@10
|
328 |
+
- type: cosine_ndcg@10
|
329 |
+
value: 0.9195345068147046
|
330 |
+
name: Cosine Ndcg@10
|
331 |
+
- type: cosine_mrr@10
|
332 |
+
value: 0.8974551669241934
|
333 |
+
name: Cosine Mrr@10
|
334 |
+
- type: cosine_map@100
|
335 |
+
value: 0.898503034985336
|
336 |
+
name: Cosine Map@100
|
337 |
+
- task:
|
338 |
+
type: information-retrieval
|
339 |
+
name: Information Retrieval
|
340 |
+
dataset:
|
341 |
+
name: dim 64
|
342 |
+
type: dim_64
|
343 |
+
metrics:
|
344 |
+
- type: cosine_accuracy@1
|
345 |
+
value: 0.8259587020648967
|
346 |
+
name: Cosine Accuracy@1
|
347 |
+
- type: cosine_accuracy@3
|
348 |
+
value: 0.9587020648967551
|
349 |
+
name: Cosine Accuracy@3
|
350 |
+
- type: cosine_accuracy@5
|
351 |
+
value: 0.976401179941003
|
352 |
+
name: Cosine Accuracy@5
|
353 |
+
- type: cosine_accuracy@10
|
354 |
+
value: 0.9852507374631269
|
355 |
+
name: Cosine Accuracy@10
|
356 |
+
- type: cosine_precision@1
|
357 |
+
value: 0.8259587020648967
|
358 |
+
name: Cosine Precision@1
|
359 |
+
- type: cosine_precision@3
|
360 |
+
value: 0.319567354965585
|
361 |
+
name: Cosine Precision@3
|
362 |
+
- type: cosine_precision@5
|
363 |
+
value: 0.19528023598820055
|
364 |
+
name: Cosine Precision@5
|
365 |
+
- type: cosine_precision@10
|
366 |
+
value: 0.09852507374631266
|
367 |
+
name: Cosine Precision@10
|
368 |
+
- type: cosine_recall@1
|
369 |
+
value: 0.8259587020648967
|
370 |
+
name: Cosine Recall@1
|
371 |
+
- type: cosine_recall@3
|
372 |
+
value: 0.9587020648967551
|
373 |
+
name: Cosine Recall@3
|
374 |
+
- type: cosine_recall@5
|
375 |
+
value: 0.976401179941003
|
376 |
+
name: Cosine Recall@5
|
377 |
+
- type: cosine_recall@10
|
378 |
+
value: 0.9852507374631269
|
379 |
+
name: Cosine Recall@10
|
380 |
+
- type: cosine_ndcg@10
|
381 |
+
value: 0.9154633119580834
|
382 |
+
name: Cosine Ndcg@10
|
383 |
+
- type: cosine_mrr@10
|
384 |
+
value: 0.8919897457508078
|
385 |
+
name: Cosine Mrr@10
|
386 |
+
- type: cosine_map@100
|
387 |
+
value: 0.8929899218270423
|
388 |
+
name: Cosine Map@100
|
389 |
+
---
|
390 |
+
|
391 |
+
# gte-large-en-v1.5-financial-embeddings-matryoshka
|
392 |
+
|
393 |
+
This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [Alibaba-NLP/gte-large-en-v1.5](https://huggingface.co/Alibaba-NLP/gte-large-en-v1.5). It maps sentences & paragraphs to a 1024-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
|
394 |
+
|
395 |
+
## Model Details
|
396 |
+
|
397 |
+
### Model Description
|
398 |
+
- **Model Type:** Sentence Transformer
|
399 |
+
- **Base model:** [Alibaba-NLP/gte-large-en-v1.5](https://huggingface.co/Alibaba-NLP/gte-large-en-v1.5) <!-- at revision a0d6174973604c8ef416d9f6ed0f4c17ab32d78d -->
|
400 |
+
- **Maximum Sequence Length:** 8192 tokens
|
401 |
+
- **Output Dimensionality:** 1024 tokens
|
402 |
+
- **Similarity Function:** Cosine Similarity
|
403 |
+
<!-- - **Training Dataset:** Unknown -->
|
404 |
+
- **Language:** en
|
405 |
+
- **License:** apache-2.0
|
406 |
+
|
407 |
+
### Model Sources
|
408 |
+
|
409 |
+
- **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
|
410 |
+
- **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
|
411 |
+
- **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
|
412 |
+
|
413 |
+
### Full Model Architecture
|
414 |
+
|
415 |
+
```
|
416 |
+
SentenceTransformer(
|
417 |
+
(0): Transformer({'max_seq_length': 8192, 'do_lower_case': False}) with Transformer model: NewModel
|
418 |
+
(1): Pooling({'word_embedding_dimension': 1024, 'pooling_mode_cls_token': True, 'pooling_mode_mean_tokens': False, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
|
419 |
+
)
|
420 |
+
```
|
421 |
+
|
422 |
+
## Usage
|
423 |
+
|
424 |
+
### Direct Usage (Sentence Transformers)
|
425 |
+
|
426 |
+
First install the Sentence Transformers library:
|
427 |
+
|
428 |
+
```bash
|
429 |
+
pip install -U sentence-transformers
|
430 |
+
```
|
431 |
+
|
432 |
+
Then you can load this model and run inference.
|
433 |
+
```python
|
434 |
+
from sentence_transformers import SentenceTransformer
|
435 |
+
|
436 |
+
# Download from the 🤗 Hub
|
437 |
+
model = SentenceTransformer("rbhatia46/gte-large-en-v1.5-financial-rag-matryoshka")
|
438 |
+
# Run inference
|
439 |
+
sentences = [
|
440 |
+
'For the fiscal year ending June 30, 2023, Procter & Gamble Company reported a dividend payout ratio of 60%.',
|
441 |
+
'What was the dividend payout ratio of Procter & Gamble Company for the fiscal year ended 2023?',
|
442 |
+
"What prompted Tesla's stock to surge in 2020?",
|
443 |
+
]
|
444 |
+
embeddings = model.encode(sentences)
|
445 |
+
print(embeddings.shape)
|
446 |
+
# [3, 1024]
|
447 |
+
|
448 |
+
# Get the similarity scores for the embeddings
|
449 |
+
similarities = model.similarity(embeddings, embeddings)
|
450 |
+
print(similarities.shape)
|
451 |
+
# [3, 3]
|
452 |
+
```
|
453 |
+
|
454 |
+
<!--
|
455 |
+
### Direct Usage (Transformers)
|
456 |
+
|
457 |
+
<details><summary>Click to see the direct usage in Transformers</summary>
|
458 |
+
|
459 |
+
</details>
|
460 |
+
-->
|
461 |
+
|
462 |
+
<!--
|
463 |
+
### Downstream Usage (Sentence Transformers)
|
464 |
+
|
465 |
+
You can finetune this model on your own dataset.
|
466 |
+
|
467 |
+
<details><summary>Click to expand</summary>
|
468 |
+
|
469 |
+
</details>
|
470 |
+
-->
|
471 |
+
|
472 |
+
<!--
|
473 |
+
### Out-of-Scope Use
|
474 |
+
|
475 |
+
*List how the model may foreseeably be misused and address what users ought not to do with the model.*
|
476 |
+
-->
|
477 |
+
|
478 |
+
## Evaluation
|
479 |
+
|
480 |
+
### Metrics
|
481 |
+
|
482 |
+
#### Information Retrieval
|
483 |
+
* Dataset: `dim_1024`
|
484 |
+
* Evaluated with [<code>InformationRetrievalEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.InformationRetrievalEvaluator)
|
485 |
+
|
486 |
+
| Metric | Value |
|
487 |
+
|:--------------------|:-----------|
|
488 |
+
| cosine_accuracy@1 | 0.8437 |
|
489 |
+
| cosine_accuracy@3 | 0.9676 |
|
490 |
+
| cosine_accuracy@5 | 0.9794 |
|
491 |
+
| cosine_accuracy@10 | 0.9941 |
|
492 |
+
| cosine_precision@1 | 0.8437 |
|
493 |
+
| cosine_precision@3 | 0.3225 |
|
494 |
+
| cosine_precision@5 | 0.1959 |
|
495 |
+
| cosine_precision@10 | 0.0994 |
|
496 |
+
| cosine_recall@1 | 0.8437 |
|
497 |
+
| cosine_recall@3 | 0.9676 |
|
498 |
+
| cosine_recall@5 | 0.9794 |
|
499 |
+
| cosine_recall@10 | 0.9941 |
|
500 |
+
| cosine_ndcg@10 | 0.9267 |
|
501 |
+
| cosine_mrr@10 | 0.9042 |
|
502 |
+
| **cosine_map@100** | **0.9046** |
|
503 |
+
|
504 |
+
#### Information Retrieval
|
505 |
+
* Dataset: `dim_768`
|
506 |
+
* Evaluated with [<code>InformationRetrievalEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.InformationRetrievalEvaluator)
|
507 |
+
|
508 |
+
| Metric | Value |
|
509 |
+
|:--------------------|:-----------|
|
510 |
+
| cosine_accuracy@1 | 0.8466 |
|
511 |
+
| cosine_accuracy@3 | 0.9646 |
|
512 |
+
| cosine_accuracy@5 | 0.9794 |
|
513 |
+
| cosine_accuracy@10 | 0.9941 |
|
514 |
+
| cosine_precision@1 | 0.8466 |
|
515 |
+
| cosine_precision@3 | 0.3215 |
|
516 |
+
| cosine_precision@5 | 0.1959 |
|
517 |
+
| cosine_precision@10 | 0.0994 |
|
518 |
+
| cosine_recall@1 | 0.8466 |
|
519 |
+
| cosine_recall@3 | 0.9646 |
|
520 |
+
| cosine_recall@5 | 0.9794 |
|
521 |
+
| cosine_recall@10 | 0.9941 |
|
522 |
+
| cosine_ndcg@10 | 0.9264 |
|
523 |
+
| cosine_mrr@10 | 0.904 |
|
524 |
+
| **cosine_map@100** | **0.9043** |
|
525 |
+
|
526 |
+
#### Information Retrieval
|
527 |
+
* Dataset: `dim_512`
|
528 |
+
* Evaluated with [<code>InformationRetrievalEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.InformationRetrievalEvaluator)
|
529 |
+
|
530 |
+
| Metric | Value |
|
531 |
+
|:--------------------|:-----------|
|
532 |
+
| cosine_accuracy@1 | 0.8496 |
|
533 |
+
| cosine_accuracy@3 | 0.9676 |
|
534 |
+
| cosine_accuracy@5 | 0.9823 |
|
535 |
+
| cosine_accuracy@10 | 0.9941 |
|
536 |
+
| cosine_precision@1 | 0.8496 |
|
537 |
+
| cosine_precision@3 | 0.3225 |
|
538 |
+
| cosine_precision@5 | 0.1965 |
|
539 |
+
| cosine_precision@10 | 0.0994 |
|
540 |
+
| cosine_recall@1 | 0.8496 |
|
541 |
+
| cosine_recall@3 | 0.9676 |
|
542 |
+
| cosine_recall@5 | 0.9823 |
|
543 |
+
| cosine_recall@10 | 0.9941 |
|
544 |
+
| cosine_ndcg@10 | 0.9284 |
|
545 |
+
| cosine_mrr@10 | 0.9066 |
|
546 |
+
| **cosine_map@100** | **0.9069** |
|
547 |
+
|
548 |
+
#### Information Retrieval
|
549 |
+
* Dataset: `dim_256`
|
550 |
+
* Evaluated with [<code>InformationRetrievalEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.InformationRetrievalEvaluator)
|
551 |
+
|
552 |
+
| Metric | Value |
|
553 |
+
|:--------------------|:-----------|
|
554 |
+
| cosine_accuracy@1 | 0.8496 |
|
555 |
+
| cosine_accuracy@3 | 0.9646 |
|
556 |
+
| cosine_accuracy@5 | 0.9823 |
|
557 |
+
| cosine_accuracy@10 | 0.9912 |
|
558 |
+
| cosine_precision@1 | 0.8496 |
|
559 |
+
| cosine_precision@3 | 0.3215 |
|
560 |
+
| cosine_precision@5 | 0.1965 |
|
561 |
+
| cosine_precision@10 | 0.0991 |
|
562 |
+
| cosine_recall@1 | 0.8496 |
|
563 |
+
| cosine_recall@3 | 0.9646 |
|
564 |
+
| cosine_recall@5 | 0.9823 |
|
565 |
+
| cosine_recall@10 | 0.9912 |
|
566 |
+
| cosine_ndcg@10 | 0.928 |
|
567 |
+
| cosine_mrr@10 | 0.9069 |
|
568 |
+
| **cosine_map@100** | **0.9074** |
|
569 |
+
|
570 |
+
#### Information Retrieval
|
571 |
+
* Dataset: `dim_128`
|
572 |
+
* Evaluated with [<code>InformationRetrievalEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.InformationRetrievalEvaluator)
|
573 |
+
|
574 |
+
| Metric | Value |
|
575 |
+
|:--------------------|:-----------|
|
576 |
+
| cosine_accuracy@1 | 0.8348 |
|
577 |
+
| cosine_accuracy@3 | 0.9617 |
|
578 |
+
| cosine_accuracy@5 | 0.9764 |
|
579 |
+
| cosine_accuracy@10 | 0.9853 |
|
580 |
+
| cosine_precision@1 | 0.8348 |
|
581 |
+
| cosine_precision@3 | 0.3206 |
|
582 |
+
| cosine_precision@5 | 0.1953 |
|
583 |
+
| cosine_precision@10 | 0.0985 |
|
584 |
+
| cosine_recall@1 | 0.8348 |
|
585 |
+
| cosine_recall@3 | 0.9617 |
|
586 |
+
| cosine_recall@5 | 0.9764 |
|
587 |
+
| cosine_recall@10 | 0.9853 |
|
588 |
+
| cosine_ndcg@10 | 0.9195 |
|
589 |
+
| cosine_mrr@10 | 0.8975 |
|
590 |
+
| **cosine_map@100** | **0.8985** |
|
591 |
+
|
592 |
+
#### Information Retrieval
|
593 |
+
* Dataset: `dim_64`
|
594 |
+
* Evaluated with [<code>InformationRetrievalEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.InformationRetrievalEvaluator)
|
595 |
+
|
596 |
+
| Metric | Value |
|
597 |
+
|:--------------------|:----------|
|
598 |
+
| cosine_accuracy@1 | 0.826 |
|
599 |
+
| cosine_accuracy@3 | 0.9587 |
|
600 |
+
| cosine_accuracy@5 | 0.9764 |
|
601 |
+
| cosine_accuracy@10 | 0.9853 |
|
602 |
+
| cosine_precision@1 | 0.826 |
|
603 |
+
| cosine_precision@3 | 0.3196 |
|
604 |
+
| cosine_precision@5 | 0.1953 |
|
605 |
+
| cosine_precision@10 | 0.0985 |
|
606 |
+
| cosine_recall@1 | 0.826 |
|
607 |
+
| cosine_recall@3 | 0.9587 |
|
608 |
+
| cosine_recall@5 | 0.9764 |
|
609 |
+
| cosine_recall@10 | 0.9853 |
|
610 |
+
| cosine_ndcg@10 | 0.9155 |
|
611 |
+
| cosine_mrr@10 | 0.892 |
|
612 |
+
| **cosine_map@100** | **0.893** |
|
613 |
+
|
614 |
+
<!--
|
615 |
+
## Bias, Risks and Limitations
|
616 |
+
|
617 |
+
*What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
|
618 |
+
-->
|
619 |
+
|
620 |
+
<!--
|
621 |
+
### Recommendations
|
622 |
+
|
623 |
+
*What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
|
624 |
+
-->
|
625 |
+
|
626 |
+
## Training Details
|
627 |
+
|
628 |
+
### Training Dataset
|
629 |
+
|
630 |
+
#### Unnamed Dataset
|
631 |
+
|
632 |
+
|
633 |
+
* Size: 3,051 training samples
|
634 |
+
* Columns: <code>positive</code> and <code>anchor</code>
|
635 |
+
* Approximate statistics based on the first 1000 samples:
|
636 |
+
| | positive | anchor |
|
637 |
+
|:--------|:------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|
|
638 |
+
| type | string | string |
|
639 |
+
| details | <ul><li>min: 15 tokens</li><li>mean: 44.56 tokens</li><li>max: 116 tokens</li></ul> | <ul><li>min: 9 tokens</li><li>mean: 18.23 tokens</li><li>max: 32 tokens</li></ul> |
|
640 |
+
* Samples:
|
641 |
+
| positive | anchor |
|
642 |
+
|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|
|
643 |
+
| <code>Amazon had a strong Q1 in 2021, with net sales increasing 44% to $108.5 billion in the first quarter, compared with $75.5 billion in first quarter 2020. Operating income increased to $8.9 billion in the first quarter, compared with operating income of $4.0 billion in first quarter 2020.</code> | <code>What were the key findings of Amazon's Q1 financial report for 2021?</code> |
|
644 |
+
| <code>Apple Inc. reported total revenue of $102.4 billion for the second quarter of fiscal 2023, up 22% from the same quarter in the previous year. This has been attributed to increased sales of iPhones, iPads, and other hardware products as well as the expansion of their services business.</code> | <code>What was the revenue of Apple Inc. in 2023?</code> |
|
645 |
+
| <code>JPMorgan Chase & Co. was formed through the merger of Chase Manhattan Corporation and J.P. Morgan & Co. The origins of the company can be traced back to the founding of The Manhattan Company by Aaron Burr in 1799.</code> | <code>Who founded JPMorgan Chase & Co.?</code> |
|
646 |
+
* Loss: [<code>MatryoshkaLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#matryoshkaloss) with these parameters:
|
647 |
+
```json
|
648 |
+
{
|
649 |
+
"loss": "MultipleNegativesRankingLoss",
|
650 |
+
"matryoshka_dims": [
|
651 |
+
1024,
|
652 |
+
768,
|
653 |
+
512,
|
654 |
+
256,
|
655 |
+
128,
|
656 |
+
64
|
657 |
+
],
|
658 |
+
"matryoshka_weights": [
|
659 |
+
1,
|
660 |
+
1,
|
661 |
+
1,
|
662 |
+
1,
|
663 |
+
1,
|
664 |
+
1
|
665 |
+
],
|
666 |
+
"n_dims_per_step": -1
|
667 |
+
}
|
668 |
+
```
|
669 |
+
|
670 |
+
### Training Hyperparameters
|
671 |
+
#### Non-Default Hyperparameters
|
672 |
+
|
673 |
+
- `eval_strategy`: epoch
|
674 |
+
- `per_device_train_batch_size`: 32
|
675 |
+
- `per_device_eval_batch_size`: 16
|
676 |
+
- `gradient_accumulation_steps`: 16
|
677 |
+
- `learning_rate`: 2e-05
|
678 |
+
- `num_train_epochs`: 4
|
679 |
+
- `lr_scheduler_type`: cosine
|
680 |
+
- `warmup_ratio`: 0.1
|
681 |
+
- `bf16`: True
|
682 |
+
- `tf32`: True
|
683 |
+
- `load_best_model_at_end`: True
|
684 |
+
- `optim`: adamw_torch_fused
|
685 |
+
- `batch_sampler`: no_duplicates
|
686 |
+
|
687 |
+
#### All Hyperparameters
|
688 |
+
<details><summary>Click to expand</summary>
|
689 |
+
|
690 |
+
- `overwrite_output_dir`: False
|
691 |
+
- `do_predict`: False
|
692 |
+
- `eval_strategy`: epoch
|
693 |
+
- `prediction_loss_only`: True
|
694 |
+
- `per_device_train_batch_size`: 32
|
695 |
+
- `per_device_eval_batch_size`: 16
|
696 |
+
- `per_gpu_train_batch_size`: None
|
697 |
+
- `per_gpu_eval_batch_size`: None
|
698 |
+
- `gradient_accumulation_steps`: 16
|
699 |
+
- `eval_accumulation_steps`: None
|
700 |
+
- `learning_rate`: 2e-05
|
701 |
+
- `weight_decay`: 0.0
|
702 |
+
- `adam_beta1`: 0.9
|
703 |
+
- `adam_beta2`: 0.999
|
704 |
+
- `adam_epsilon`: 1e-08
|
705 |
+
- `max_grad_norm`: 1.0
|
706 |
+
- `num_train_epochs`: 4
|
707 |
+
- `max_steps`: -1
|
708 |
+
- `lr_scheduler_type`: cosine
|
709 |
+
- `lr_scheduler_kwargs`: {}
|
710 |
+
- `warmup_ratio`: 0.1
|
711 |
+
- `warmup_steps`: 0
|
712 |
+
- `log_level`: passive
|
713 |
+
- `log_level_replica`: warning
|
714 |
+
- `log_on_each_node`: True
|
715 |
+
- `logging_nan_inf_filter`: True
|
716 |
+
- `save_safetensors`: True
|
717 |
+
- `save_on_each_node`: False
|
718 |
+
- `save_only_model`: False
|
719 |
+
- `restore_callback_states_from_checkpoint`: False
|
720 |
+
- `no_cuda`: False
|
721 |
+
- `use_cpu`: False
|
722 |
+
- `use_mps_device`: False
|
723 |
+
- `seed`: 42
|
724 |
+
- `data_seed`: None
|
725 |
+
- `jit_mode_eval`: False
|
726 |
+
- `use_ipex`: False
|
727 |
+
- `bf16`: True
|
728 |
+
- `fp16`: False
|
729 |
+
- `fp16_opt_level`: O1
|
730 |
+
- `half_precision_backend`: auto
|
731 |
+
- `bf16_full_eval`: False
|
732 |
+
- `fp16_full_eval`: False
|
733 |
+
- `tf32`: True
|
734 |
+
- `local_rank`: 0
|
735 |
+
- `ddp_backend`: None
|
736 |
+
- `tpu_num_cores`: None
|
737 |
+
- `tpu_metrics_debug`: False
|
738 |
+
- `debug`: []
|
739 |
+
- `dataloader_drop_last`: False
|
740 |
+
- `dataloader_num_workers`: 0
|
741 |
+
- `dataloader_prefetch_factor`: None
|
742 |
+
- `past_index`: -1
|
743 |
+
- `disable_tqdm`: False
|
744 |
+
- `remove_unused_columns`: True
|
745 |
+
- `label_names`: None
|
746 |
+
- `load_best_model_at_end`: True
|
747 |
+
- `ignore_data_skip`: False
|
748 |
+
- `fsdp`: []
|
749 |
+
- `fsdp_min_num_params`: 0
|
750 |
+
- `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
|
751 |
+
- `fsdp_transformer_layer_cls_to_wrap`: None
|
752 |
+
- `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
|
753 |
+
- `deepspeed`: None
|
754 |
+
- `label_smoothing_factor`: 0.0
|
755 |
+
- `optim`: adamw_torch_fused
|
756 |
+
- `optim_args`: None
|
757 |
+
- `adafactor`: False
|
758 |
+
- `group_by_length`: False
|
759 |
+
- `length_column_name`: length
|
760 |
+
- `ddp_find_unused_parameters`: None
|
761 |
+
- `ddp_bucket_cap_mb`: None
|
762 |
+
- `ddp_broadcast_buffers`: False
|
763 |
+
- `dataloader_pin_memory`: True
|
764 |
+
- `dataloader_persistent_workers`: False
|
765 |
+
- `skip_memory_metrics`: True
|
766 |
+
- `use_legacy_prediction_loop`: False
|
767 |
+
- `push_to_hub`: False
|
768 |
+
- `resume_from_checkpoint`: None
|
769 |
+
- `hub_model_id`: None
|
770 |
+
- `hub_strategy`: every_save
|
771 |
+
- `hub_private_repo`: False
|
772 |
+
- `hub_always_push`: False
|
773 |
+
- `gradient_checkpointing`: False
|
774 |
+
- `gradient_checkpointing_kwargs`: None
|
775 |
+
- `include_inputs_for_metrics`: False
|
776 |
+
- `eval_do_concat_batches`: True
|
777 |
+
- `fp16_backend`: auto
|
778 |
+
- `push_to_hub_model_id`: None
|
779 |
+
- `push_to_hub_organization`: None
|
780 |
+
- `mp_parameters`:
|
781 |
+
- `auto_find_batch_size`: False
|
782 |
+
- `full_determinism`: False
|
783 |
+
- `torchdynamo`: None
|
784 |
+
- `ray_scope`: last
|
785 |
+
- `ddp_timeout`: 1800
|
786 |
+
- `torch_compile`: False
|
787 |
+
- `torch_compile_backend`: None
|
788 |
+
- `torch_compile_mode`: None
|
789 |
+
- `dispatch_batches`: None
|
790 |
+
- `split_batches`: None
|
791 |
+
- `include_tokens_per_second`: False
|
792 |
+
- `include_num_input_tokens_seen`: False
|
793 |
+
- `neftune_noise_alpha`: None
|
794 |
+
- `optim_target_modules`: None
|
795 |
+
- `batch_eval_metrics`: False
|
796 |
+
- `batch_sampler`: no_duplicates
|
797 |
+
- `multi_dataset_batch_sampler`: proportional
|
798 |
+
|
799 |
+
</details>
|
800 |
+
|
801 |
+
### Training Logs
|
802 |
+
| Epoch | Step | Training Loss | dim_1024_cosine_map@100 | dim_128_cosine_map@100 | dim_256_cosine_map@100 | dim_512_cosine_map@100 | dim_64_cosine_map@100 | dim_768_cosine_map@100 |
|
803 |
+
|:-------:|:------:|:-------------:|:-----------------------:|:----------------------:|:----------------------:|:----------------------:|:---------------------:|:----------------------:|
|
804 |
+
| 1.0 | 6 | - | 0.9015 | 0.8932 | 0.9017 | 0.8958 | 0.8925 | 0.8935 |
|
805 |
+
| 1.6667 | 10 | 0.3905 | - | - | - | - | - | - |
|
806 |
+
| **2.0** | **12** | **-** | **0.9038** | **0.901** | **0.9018** | **0.909** | **0.8903** | **0.9039** |
|
807 |
+
| 3.0 | 18 | - | 0.9046 | 0.8982 | 0.9065 | 0.9055 | 0.8916 | 0.9033 |
|
808 |
+
| 3.3333 | 20 | 0.0493 | - | - | - | - | - | - |
|
809 |
+
| 4.0 | 24 | - | 0.9046 | 0.8985 | 0.9074 | 0.9069 | 0.8930 | 0.9043 |
|
810 |
+
|
811 |
+
* The bold row denotes the saved checkpoint.
|
812 |
+
|
813 |
+
### Framework Versions
|
814 |
+
- Python: 3.10.6
|
815 |
+
- Sentence Transformers: 3.0.1
|
816 |
+
- Transformers: 4.41.2
|
817 |
+
- PyTorch: 2.1.2+cu121
|
818 |
+
- Accelerate: 0.32.1
|
819 |
+
- Datasets: 2.19.1
|
820 |
+
- Tokenizers: 0.19.1
|
821 |
+
|
822 |
+
## Citation
|
823 |
+
|
824 |
+
### BibTeX
|
825 |
+
|
826 |
+
#### Sentence Transformers
|
827 |
+
```bibtex
|
828 |
+
@inproceedings{reimers-2019-sentence-bert,
|
829 |
+
title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
|
830 |
+
author = "Reimers, Nils and Gurevych, Iryna",
|
831 |
+
booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
|
832 |
+
month = "11",
|
833 |
+
year = "2019",
|
834 |
+
publisher = "Association for Computational Linguistics",
|
835 |
+
url = "https://arxiv.org/abs/1908.10084",
|
836 |
+
}
|
837 |
+
```
|
838 |
+
|
839 |
+
#### MatryoshkaLoss
|
840 |
+
```bibtex
|
841 |
+
@misc{kusupati2024matryoshka,
|
842 |
+
title={Matryoshka Representation Learning},
|
843 |
+
author={Aditya Kusupati and Gantavya Bhatt and Aniket Rege and Matthew Wallingford and Aditya Sinha and Vivek Ramanujan and William Howard-Snyder and Kaifeng Chen and Sham Kakade and Prateek Jain and Ali Farhadi},
|
844 |
+
year={2024},
|
845 |
+
eprint={2205.13147},
|
846 |
+
archivePrefix={arXiv},
|
847 |
+
primaryClass={cs.LG}
|
848 |
+
}
|
849 |
+
```
|
850 |
+
|
851 |
+
#### MultipleNegativesRankingLoss
|
852 |
+
```bibtex
|
853 |
+
@misc{henderson2017efficient,
|
854 |
+
title={Efficient Natural Language Response Suggestion for Smart Reply},
|
855 |
+
author={Matthew Henderson and Rami Al-Rfou and Brian Strope and Yun-hsuan Sung and Laszlo Lukacs and Ruiqi Guo and Sanjiv Kumar and Balint Miklos and Ray Kurzweil},
|
856 |
+
year={2017},
|
857 |
+
eprint={1705.00652},
|
858 |
+
archivePrefix={arXiv},
|
859 |
+
primaryClass={cs.CL}
|
860 |
+
}
|
861 |
+
```
|
862 |
+
|
863 |
+
<!--
|
864 |
+
## Glossary
|
865 |
+
|
866 |
+
*Clearly define terms in order to be accessible across audiences.*
|
867 |
+
-->
|
868 |
+
|
869 |
+
<!--
|
870 |
+
## Model Card Authors
|
871 |
+
|
872 |
+
*Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
|
873 |
+
-->
|
874 |
+
|
875 |
+
<!--
|
876 |
+
## Model Card Contact
|
877 |
+
|
878 |
+
*Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
|
879 |
+
-->
|
config.json
ADDED
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "Alibaba-NLP/gte-large-en-v1.5",
|
3 |
+
"architectures": [
|
4 |
+
"NewModel"
|
5 |
+
],
|
6 |
+
"attention_probs_dropout_prob": 0.0,
|
7 |
+
"auto_map": {
|
8 |
+
"AutoConfig": "Alibaba-NLP/new-impl--configuration.NewConfig",
|
9 |
+
"AutoModel": "Alibaba-NLP/new-impl--modeling.NewModel",
|
10 |
+
"AutoModelForMaskedLM": "Alibaba-NLP/new-impl--modeling.NewForMaskedLM",
|
11 |
+
"AutoModelForMultipleChoice": "Alibaba-NLP/new-impl--modeling.NewForMultipleChoice",
|
12 |
+
"AutoModelForQuestionAnswering": "Alibaba-NLP/new-impl--modeling.NewForQuestionAnswering",
|
13 |
+
"AutoModelForSequenceClassification": "Alibaba-NLP/new-impl--modeling.NewForSequenceClassification",
|
14 |
+
"AutoModelForTokenClassification": "Alibaba-NLP/new-impl--modeling.NewForTokenClassification"
|
15 |
+
},
|
16 |
+
"classifier_dropout": null,
|
17 |
+
"hidden_act": "gelu",
|
18 |
+
"hidden_dropout_prob": 0.1,
|
19 |
+
"hidden_size": 1024,
|
20 |
+
"initializer_range": 0.02,
|
21 |
+
"intermediate_size": 4096,
|
22 |
+
"layer_norm_eps": 1e-12,
|
23 |
+
"layer_norm_type": "layer_norm",
|
24 |
+
"logn_attention_clip1": false,
|
25 |
+
"logn_attention_scale": false,
|
26 |
+
"max_position_embeddings": 8192,
|
27 |
+
"model_type": "new",
|
28 |
+
"num_attention_heads": 16,
|
29 |
+
"num_hidden_layers": 24,
|
30 |
+
"pack_qkv": true,
|
31 |
+
"pad_token_id": 0,
|
32 |
+
"position_embedding_type": "rope",
|
33 |
+
"rope_scaling": {
|
34 |
+
"factor": 2.0,
|
35 |
+
"type": "ntk"
|
36 |
+
},
|
37 |
+
"rope_theta": 160000,
|
38 |
+
"torch_dtype": "float32",
|
39 |
+
"transformers_version": "4.41.2",
|
40 |
+
"type_vocab_size": 2,
|
41 |
+
"unpad_inputs": false,
|
42 |
+
"use_memory_efficient_attention": false,
|
43 |
+
"vocab_size": 30528
|
44 |
+
}
|
config_sentence_transformers.json
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"__version__": {
|
3 |
+
"sentence_transformers": "3.0.1",
|
4 |
+
"transformers": "4.41.2",
|
5 |
+
"pytorch": "2.1.2+cu121"
|
6 |
+
},
|
7 |
+
"prompts": {},
|
8 |
+
"default_prompt_name": null,
|
9 |
+
"similarity_fn_name": null
|
10 |
+
}
|
model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bb5caa2f1c0b65eeed7770c657480d99fa8fd2b48d11771885b26587b5adfa42
|
3 |
+
size 1736585680
|
modules.json
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
{
|
3 |
+
"idx": 0,
|
4 |
+
"name": "0",
|
5 |
+
"path": "",
|
6 |
+
"type": "sentence_transformers.models.Transformer"
|
7 |
+
},
|
8 |
+
{
|
9 |
+
"idx": 1,
|
10 |
+
"name": "1",
|
11 |
+
"path": "1_Pooling",
|
12 |
+
"type": "sentence_transformers.models.Pooling"
|
13 |
+
}
|
14 |
+
]
|
sentence_bert_config.json
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"max_seq_length": 8192,
|
3 |
+
"do_lower_case": false
|
4 |
+
}
|
special_tokens_map.json
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cls_token": {
|
3 |
+
"content": "[CLS]",
|
4 |
+
"lstrip": false,
|
5 |
+
"normalized": false,
|
6 |
+
"rstrip": false,
|
7 |
+
"single_word": false
|
8 |
+
},
|
9 |
+
"mask_token": {
|
10 |
+
"content": "[MASK]",
|
11 |
+
"lstrip": false,
|
12 |
+
"normalized": false,
|
13 |
+
"rstrip": false,
|
14 |
+
"single_word": false
|
15 |
+
},
|
16 |
+
"pad_token": {
|
17 |
+
"content": "[PAD]",
|
18 |
+
"lstrip": false,
|
19 |
+
"normalized": false,
|
20 |
+
"rstrip": false,
|
21 |
+
"single_word": false
|
22 |
+
},
|
23 |
+
"sep_token": {
|
24 |
+
"content": "[SEP]",
|
25 |
+
"lstrip": false,
|
26 |
+
"normalized": false,
|
27 |
+
"rstrip": false,
|
28 |
+
"single_word": false
|
29 |
+
},
|
30 |
+
"unk_token": {
|
31 |
+
"content": "[UNK]",
|
32 |
+
"lstrip": false,
|
33 |
+
"normalized": false,
|
34 |
+
"rstrip": false,
|
35 |
+
"single_word": false
|
36 |
+
}
|
37 |
+
}
|
tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer_config.json
ADDED
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"added_tokens_decoder": {
|
3 |
+
"0": {
|
4 |
+
"content": "[PAD]",
|
5 |
+
"lstrip": false,
|
6 |
+
"normalized": false,
|
7 |
+
"rstrip": false,
|
8 |
+
"single_word": false,
|
9 |
+
"special": true
|
10 |
+
},
|
11 |
+
"100": {
|
12 |
+
"content": "[UNK]",
|
13 |
+
"lstrip": false,
|
14 |
+
"normalized": false,
|
15 |
+
"rstrip": false,
|
16 |
+
"single_word": false,
|
17 |
+
"special": true
|
18 |
+
},
|
19 |
+
"101": {
|
20 |
+
"content": "[CLS]",
|
21 |
+
"lstrip": false,
|
22 |
+
"normalized": false,
|
23 |
+
"rstrip": false,
|
24 |
+
"single_word": false,
|
25 |
+
"special": true
|
26 |
+
},
|
27 |
+
"102": {
|
28 |
+
"content": "[SEP]",
|
29 |
+
"lstrip": false,
|
30 |
+
"normalized": false,
|
31 |
+
"rstrip": false,
|
32 |
+
"single_word": false,
|
33 |
+
"special": true
|
34 |
+
},
|
35 |
+
"103": {
|
36 |
+
"content": "[MASK]",
|
37 |
+
"lstrip": false,
|
38 |
+
"normalized": false,
|
39 |
+
"rstrip": false,
|
40 |
+
"single_word": false,
|
41 |
+
"special": true
|
42 |
+
}
|
43 |
+
},
|
44 |
+
"clean_up_tokenization_spaces": true,
|
45 |
+
"cls_token": "[CLS]",
|
46 |
+
"do_lower_case": true,
|
47 |
+
"mask_token": "[MASK]",
|
48 |
+
"max_length": 8000,
|
49 |
+
"model_max_length": 8192,
|
50 |
+
"pad_to_multiple_of": null,
|
51 |
+
"pad_token": "[PAD]",
|
52 |
+
"pad_token_type_id": 0,
|
53 |
+
"padding_side": "right",
|
54 |
+
"sep_token": "[SEP]",
|
55 |
+
"stride": 0,
|
56 |
+
"strip_accents": null,
|
57 |
+
"tokenize_chinese_chars": true,
|
58 |
+
"tokenizer_class": "BertTokenizer",
|
59 |
+
"truncation_side": "right",
|
60 |
+
"truncation_strategy": "longest_first",
|
61 |
+
"unk_token": "[UNK]"
|
62 |
+
}
|
vocab.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|