oguuzhansahin
commited on
Commit
•
db2a74d
1
Parent(s):
34afe10
Upload folder using huggingface_hub
Browse files- 1_Pooling/config.json +7 -0
- README.md +142 -0
- config.json +25 -0
- config_sentence_transformers.json +7 -0
- model.safetensors +3 -0
- modules.json +14 -0
- sentence_bert_config.json +4 -0
- special_tokens_map.json +7 -0
- tokenizer.json +0 -0
- tokenizer_config.json +58 -0
- vocab.txt +0 -0
1_Pooling/config.json
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"word_embedding_dimension": 768,
|
3 |
+
"pooling_mode_cls_token": false,
|
4 |
+
"pooling_mode_mean_tokens": true,
|
5 |
+
"pooling_mode_max_tokens": false,
|
6 |
+
"pooling_mode_mean_sqrt_len_tokens": false
|
7 |
+
}
|
README.md
ADDED
@@ -0,0 +1,142 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
pipeline_tag: sentence-similarity
|
3 |
+
tags:
|
4 |
+
- sentence-transformers
|
5 |
+
- feature-extraction
|
6 |
+
- sentence-similarity
|
7 |
+
- retrieval
|
8 |
+
- transformers
|
9 |
+
|
10 |
+
|
11 |
+
---
|
12 |
+
|
13 |
+
# bi-encoder-mnrl-dbmdz-bert-base-turkish-cased-margin_3.0-msmarco-10k-translated
|
14 |
+
|
15 |
+
This is a [sentence-transformers](https://www.SBERT.net) model: It maps sentences & paragraphs to a 768 dimensional dense vector space and can be used for tasks like clustering or semantic search.
|
16 |
+
|
17 |
+
<!--- Describe your model here -->
|
18 |
+
|
19 |
+
## Usage (Sentence-Transformers)
|
20 |
+
|
21 |
+
Using this model becomes easy when you have [sentence-transformers](https://www.SBERT.net) installed:
|
22 |
+
|
23 |
+
```
|
24 |
+
pip install -U sentence-transformers
|
25 |
+
```
|
26 |
+
|
27 |
+
Then you can use the model like this:
|
28 |
+
|
29 |
+
```python
|
30 |
+
from sentence_transformers import SentenceTransformer, util
|
31 |
+
|
32 |
+
model = SentenceTransformer('oguuzhansahin/bi-encoder-mnrl-dbmdz-bert-base-turkish-cased-margin_3.0-msmarco-10k-translated')
|
33 |
+
|
34 |
+
query = "İstanbul'un nüfusu kaçtır?"
|
35 |
+
|
36 |
+
sentences = ["İstanbul'da yaşayan insan sayısı 15 milyonu geçmiştir",
|
37 |
+
"Londra'nın nüfusu yaklaşık 9 milyondur.",
|
38 |
+
"İstanbul'da hayat çok zor."]
|
39 |
+
|
40 |
+
query_embedding = model.encode(query, convert_to_tensor=True)
|
41 |
+
sentence_embeddings = model.encode(sentences, show_progress_bar=True)
|
42 |
+
|
43 |
+
#Compute dot score between query and all document embeddings
|
44 |
+
scores = util.dot_score(query_embedding, sentence_embeddings)[0].cpu().tolist()
|
45 |
+
|
46 |
+
#Combine docs & scores
|
47 |
+
doc_score_pairs = list(zip(sentences, scores))
|
48 |
+
|
49 |
+
#Sort by decreasing score
|
50 |
+
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
|
51 |
+
|
52 |
+
#Output passages & scores
|
53 |
+
for doc, score in doc_score_pairs:
|
54 |
+
print(score, doc)
|
55 |
+
|
56 |
+
## Expected Output:
|
57 |
+
400.1816711425781 | İstanbul'da yaşayan insan sayısı 15 milyonu geçmiştir
|
58 |
+
309.97796630859375 | Londra'nın nüfusu yaklaşık 9 milyondur.
|
59 |
+
133.04507446289062 | İstanbul'da hayat çok zor.
|
60 |
+
```
|
61 |
+
|
62 |
+
|
63 |
+
|
64 |
+
## Evaluation Results
|
65 |
+
|
66 |
+
<!--- Describe how your model was evaluated -->
|
67 |
+
Evaluated on 10k query translated MSMARCO dev dataset.
|
68 |
+
|
69 |
+
| epoch | steps | cos_sim-Accuracy@1 | cos_sim-Accuracy@3 | cos_sim-Accuracy@5 | cos_sim-Accuracy@10 | cos_sim-Precision@1 | cos_sim-Recall@1 | cos_sim-Precision@3 | cos_sim-Recall@3 | cos_sim-Precision@5 | cos_sim-Recall@5 | cos_sim-Precision@10 | cos_sim-Recall@10 | cos_sim-MRR@10 | cos_sim-NDCG@10 | cos_sim-MAP@100 | dot_score-Accuracy@1 | dot_score-Accuracy@3 | dot_score-Accuracy@5 | dot_score-Accuracy@10 | dot_score-Precision@1 | dot_score-Recall@1 | dot_score-Precision@3 | dot_score-Recall@3 | dot_score-Precision@5 | dot_score-Recall@5 | dot_score-Precision@10 | dot_score-Recall@10 | dot_score-MRR@10 | dot_score-NDCG@10 | dot_score-MAP@100 |
|
70 |
+
|-------|-------|--------------------|--------------------|--------------------|---------------------|---------------------|--------------------|---------------------|--------------------|---------------------|--------------------|----------------------|--------------------|--------------------|--------------------|--------------------|----------------------|----------------------|----------------------|-----------------------|-----------------------|--------------------|-----------------------|--------------------|-----------------------|--------------------|------------------------|---------------------|--------------------|--------------------|--------------------|
|
71 |
+
| 0 | 500 | 0.6525787965616046 | 0.7808022922636103 | 0.8197707736389684 | 0.8611747851002866 | 0.6525787965616046 | 0.6301575931232092 | 0.27277936962750715 | 0.7720630372492837 | 0.17286532951289396 | 0.8130730659025788 | 0.0912320916905444 | 0.8564828080229226 | 0.7247057352071669 | 0.7540179789445202 | 0.7229577384633034 | 0.5883954154727794 | 0.7340974212034383 | 0.7799426934097421 | 0.833810888252149 | 0.5883954154727794 | 0.5672158548233047 | 0.25577841451766953 | 0.7242956064947469 | 0.16421203438395413 | 0.772648042024833 | 0.08810888252148998 | 0.82774594078319 | 0.6712877495792965 | 0.7060157817761727 | 0.6695710889515925 |
|
72 |
+
| 0 | 1000 | 0.6911174785100287 | 0.8101719197707736 | 0.8435530085959886 | 0.8846704871060171 | 0.6911174785100287 | 0.6672397325692454 | 0.2833810888252149 | 0.8015042979942694 | 0.17822349570200574 | 0.837440305635148 | 0.09386819484240688 | 0.880730659025788 | 0.757661117933323 | 0.7848392425365591 | 0.7556074534364394 | 0.6309455587392551 | 0.767621776504298 | 0.8137535816618912 | 0.8601719197707737 | 0.6309455587392551 | 0.6086198662846226 | 0.2671919770773639 | 0.7572349570200573 | 0.17160458452722066 | 0.8068767908309455 | 0.091189111747851 | 0.855336676217765 | 0.7088129349160859 | 0.7412798293491312 | 0.7066932344452895 |
|
73 |
+
| 0 | 1500 | 0.7151862464183381 | 0.8306590257879656 | 0.8608882521489971 | 0.897134670487106 | 0.7151862464183381 | 0.6912488061127029 | 0.29054441260744984 | 0.8222898758357211 | 0.18183381088825215 | 0.8549665711556829 | 0.09510028653295129 | 0.8929560649474689 | 0.7788507981989347 | 0.8039875824511752 | 0.7766051282738895 | 0.6584527220630373 | 0.7901146131805158 | 0.8308022922636104 | 0.8744985673352436 | 0.6584527220630373 | 0.636162846227316 | 0.27569245463228276 | 0.7805157593123209 | 0.1749856733524355 | 0.8235315186246418 | 0.09257879656160459 | 0.8693051575931232 | 0.7328220653113194 | 0.7630103337467442 | 0.7306729678612995 |
|
74 |
+
| 0 | -1 | 0.7299426934097422 | 0.8385386819484241 | 0.8677650429799427 | 0.9012893982808023 | 0.7299426934097422 | 0.705730659025788 | 0.2936007640878701 | 0.8304560649474689 | 0.18349570200573068 | 0.8623089780324737 | 0.09554441260744985 | 0.897015281757402 | 0.7901240505753392 | 0.8135626197561437 | 0.787830493935352 | 0.6787965616045846 | 0.8031518624641834 | 0.8373925501432665 | 0.882378223495702 | 0.6787965616045846 | 0.6561007640878702 | 0.2801814708691499 | 0.7933022922636103 | 0.17653295128939825 | 0.8303724928366762 | 0.09348137535816618 | 0.8777340019102197 | 0.748256185473233 | 0.7767303860204461 | 0.7458413737625622 |
|
75 |
+
| 1 | 500 | 0.7329512893982808 | 0.8422636103151863 | 0.8755014326647564 | 0.9061604584527221 | 0.7329512893982808 | 0.7083810888252149 | 0.2947946513849093 | 0.8340138490926455 | 0.1848710601719198 | 0.8693051575931232 | 0.0961031518624642 | 0.9022683858643744 | 0.7940033883658509 | 0.8177562178760835 | 0.7914392824209506 | 0.6802292263610316 | 0.8078796561604584 | 0.8454154727793697 | 0.8848137535816619 | 0.6802292263610316 | 0.6566738299904489 | 0.282378223495702 | 0.7988658070678127 | 0.17819484240687677 | 0.8380850047755491 | 0.09369627507163324 | 0.8799546322827124 | 0.7516260744985672 | 0.7798058371179187 | 0.7490502934740975 |
|
76 |
+
| 1 | 1000 | 0.729512893982808 | 0.841404011461318 | 0.875214899713467 | 0.9113180515759313 | 0.729512893982808 | 0.7050859598853868 | 0.29450811843361985 | 0.8335124164278891 | 0.18469914040114613 | 0.8690902578796562 | 0.09656160458452723 | 0.9071155682903534 | 0.7932278164369851 | 0.8182219363350265 | 0.7903662439052012 | 0.6843839541547277 | 0.8101719197707736 | 0.8532951289398281 | 0.8925501432664756 | 0.6843839541547277 | 0.6612106017191977 | 0.2825214899713467 | 0.8005969436485195 | 0.17965616045845273 | 0.8460721107927411 | 0.09448424068767908 | 0.8879417382999044 | 0.7560514167462585 | 0.7849882443395625 | 0.7531013420305113 |
|
77 |
+
| 1 | 1500 | 0.7449856733524355 | 0.8524355300859598 | 0.8818051575931232 | 0.9164756446991404 | 0.7449856733524355 | 0.7202960840496656 | 0.298567335243553 | 0.8447110792741165 | 0.18616045845272208 | 0.8759789875835721 | 0.09730659025787966 | 0.9132282712511939 | 0.8056443011779678 | 0.8294354661493777 | 0.8032045174854995 | 0.7004297994269341 | 0.8217765042979943 | 0.8593123209169055 | 0.9007163323782235 | 0.7004297994269341 | 0.6766833810888252 | 0.28705826170009546 | 0.8130014326647564 | 0.181432664756447 | 0.8531518624641834 | 0.09545845272206303 | 0.8966212989493791 | 0.7692333537999718 | 0.7972424077272082 | 0.7664284653213875 |
|
78 |
+
| 1 | -1 | 0.7343839541547278 | 0.8487106017191977 | 0.877650429799427 | 0.9116045845272206 | 0.7343839541547278 | 0.7101599808978032 | 0.29727793696275073 | 0.8408667621776504 | 0.18521489971346708 | 0.8716929321872016 | 0.09663323782234957 | 0.9077722063037249 | 0.7974350752717504 | 0.8218012152154055 | 0.7950416352280592 | 0.6871060171919771 | 0.8141833810888253 | 0.8520057306590257 | 0.8941260744985673 | 0.6871060171919771 | 0.663562559694365 | 0.2840974212034384 | 0.8046561604584527 | 0.17979942693409742 | 0.8456064947468959 | 0.09461318051575933 | 0.8893147086914994 | 0.7588650111429464 | 0.7873455619046803 | 0.7557920076739941 |
|
79 |
+
| 2 | 500 | 0.7253581661891118 | 0.8373925501432665 | 0.872349570200573 | 0.9060171919770774 | 0.7253581661891118 | 0.701098376313276 | 0.29326647564469915 | 0.8291666666666666 | 0.18426934097421205 | 0.8663920725883476 | 0.09597421203438397 | 0.9019102196752626 | 0.788637035520988 | 0.8134114908135215 | 0.7859345726437968 | 0.6726361031518625 | 0.8038681948424069 | 0.844269340974212 | 0.8862464183381089 | 0.6726361031518625 | 0.6494269340974212 | 0.2805157593123209 | 0.79420964660936 | 0.17810888252148996 | 0.8373686723973256 | 0.09383954154727796 | 0.8815902578796562 | 0.7467288056578876 | 0.7764423334792536 | 0.7442459199666945 |
|
80 |
+
| 2 | 1000 | 0.7346704871060172 | 0.845702005730659 | 0.8765042979942693 | 0.9106017191977077 | 0.7346704871060172 | 0.710458452722063 | 0.29613180515759313 | 0.8375477554918815 | 0.18521489971346708 | 0.8708333333333332 | 0.09654727793696276 | 0.9069484240687679 | 0.7967844635466406 | 0.821080594029586 | 0.7944073081188138 | 0.6851002865329513 | 0.8143266475644699 | 0.8484240687679083 | 0.8919770773638969 | 0.6851002865329513 | 0.6620224450811844 | 0.2843361986628462 | 0.8052411652340019 | 0.1791977077363897 | 0.8421561604584528 | 0.09445558739255014 | 0.8876313276026743 | 0.7576274048301268 | 0.786374643230553 | 0.7553079432262348 |
|
81 |
+
| 2 | 1500 | 0.7351002865329513 | 0.8478510028653296 | 0.8796561604584527 | 0.9114613180515759 | 0.7351002865329513 | 0.7105300859598854 | 0.2967526265520534 | 0.8396251193887296 | 0.18578796561604588 | 0.8738419293218719 | 0.09664756446991406 | 0.9078199617956064 | 0.7969524036930906 | 0.8213578318779787 | 0.7944409556338867 | 0.6955587392550143 | 0.8191977077363897 | 0.8571633237822349 | 0.8965616045845273 | 0.6955587392550143 | 0.6719436485195798 | 0.2864374403056351 | 0.8105659025787966 | 0.1806590257879656 | 0.8503939828080229 | 0.09497134670487108 | 0.8924188156638013 | 0.7651858484559056 | 0.7931668058208581 | 0.7625478945876472 |
|
82 |
+
| 2 | -1 | 0.7330945558739255 | 0.8459885386819485 | 0.8796561604584527 | 0.9098853868194843 | 0.7330945558739255 | 0.7082975167144221 | 0.29613180515759313 | 0.8375955109837631 | 0.18564469914040113 | 0.8733882521489972 | 0.09648997134670487 | 0.9058978032473733 | 0.7952313867285201 | 0.8194890404298979 | 0.7924671105318537 | 0.6892550143266476 | 0.8156160458452723 | 0.8537249283667622 | 0.8951289398280803 | 0.6892550143266476 | 0.6661771728748805 | 0.28486150907354346 | 0.8061127029608404 | 0.17994269340974212 | 0.8468003820439349 | 0.09478510028653296 | 0.8905085959885387 | 0.760438554600445 | 0.7890338697308207 | 0.7575932457133956 |
|
83 |
+
| 3 | 500 | 0.7280802292263611 | 0.8458452722063037 | 0.880515759312321 | 0.9094555873925502 | 0.7280802292263611 | 0.703784622731614 | 0.29613180515759313 | 0.8376790830945559 | 0.18595988538681948 | 0.8747254059216809 | 0.09637535816618911 | 0.9053008595988539 | 0.7930108700595786 | 0.8179371983031188 | 0.7906095180992412 | 0.686676217765043 | 0.811461318051576 | 0.8525787965616046 | 0.8967048710601719 | 0.686676217765043 | 0.663932664756447 | 0.28371537726838586 | 0.8026146131805157 | 0.17974212034383952 | 0.8457497612225405 | 0.09492836676217767 | 0.8921203438395415 | 0.7590517123754944 | 0.7884945147622646 | 0.7564213901145882 |
|
84 |
+
| 3 | 1000 | 0.727650429799427 | 0.8452722063037249 | 0.8782234957020058 | 0.9094555873925502 | 0.727650429799427 | 0.7037010506208213 | 0.29574976122254054 | 0.8370702005730659 | 0.1853295128939828 | 0.8717884431709646 | 0.09638968481375358 | 0.9054680038204392 | 0.79231966616637 | 0.8173548182315657 | 0.7897494667720486 | 0.6872492836676217 | 0.8177650429799427 | 0.8541547277936963 | 0.8945558739255014 | 0.6872492836676217 | 0.6640998089780323 | 0.2857688634192932 | 0.8089541547277938 | 0.17997134670487105 | 0.8471466093600765 | 0.09465616045845272 | 0.8896251193887297 | 0.7590795128939827 | 0.7878020986215141 | 0.7562928001653756 |
|
85 |
+
| 3 | 1500 | 0.730945558739255 | 0.8478510028653296 | 0.8787965616045845 | 0.9100286532951289 | 0.730945558739255 | 0.7067215854823303 | 0.2968481375358166 | 0.8397683858643745 | 0.18535816618911177 | 0.8723376313276026 | 0.09643266475644699 | 0.9059216809933142 | 0.7947027220630363 | 0.8191206005600553 | 0.7918770498713639 | 0.6895415472779369 | 0.8153295128939828 | 0.8535816618911175 | 0.8949856733524355 | 0.6895415472779369 | 0.6665353390639923 | 0.2848137535816619 | 0.8062440305635148 | 0.17988538681948424 | 0.8465616045845272 | 0.09462750716332378 | 0.8898758357211078 | 0.7605133715377266 | 0.7888842917894296 | 0.7576483206453933 |
|
86 |
+
| 3 | -1 | 0.7319484240687679 | 0.8492836676217765 | 0.8813753581661891 | 0.9106017191977077 | 0.7319484240687679 | 0.7076528175740209 | 0.2973734479465138 | 0.8414517669531996 | 0.1860458452722063 | 0.8752745940783189 | 0.09650429799426935 | 0.9064469914040114 | 0.7956879064901968 | 0.8201540152375801 | 0.7930877726771091 | 0.6893982808022923 | 0.8177650429799427 | 0.856160458452722 | 0.8977077363896848 | 0.6893982808022923 | 0.6661771728748805 | 0.2854823304680038 | 0.808416905444126 | 0.18037249283667622 | 0.8491762177650429 | 0.09491404011461319 | 0.8925501432664756 | 0.761433460681311 | 0.7901828953258867 | 0.7583172945055513 |
|
87 |
+
| 4 | 500 | 0.729512893982808 | 0.8436962750716333 | 0.876647564469914 | 0.9101719197707736 | 0.729512893982808 | 0.705241165234002 | 0.29541547277936964 | 0.8356733524355301 | 0.18510028653295132 | 0.8706064947468959 | 0.09653295128939827 | 0.9063514804202483 | 0.7933414062855317 | 0.8183534981698449 | 0.7908415471359164 | 0.6862464183381088 | 0.8141833810888253 | 0.8527220630372493 | 0.895272206303725 | 0.6862464183381088 | 0.6631924546322827 | 0.2843839541547278 | 0.805002387774594 | 0.17977077363896848 | 0.8458452722063037 | 0.09471346704871061 | 0.8904250238777458 | 0.7585354982489648 | 0.7875293449629553 | 0.7557095120190159 |
|
88 |
+
| 4 | 1000 | 0.7293696275071633 | 0.8426934097421204 | 0.8772206303724929 | 0.9107449856733524 | 0.7293696275071633 | 0.7051337153772683 | 0.2950334288443171 | 0.8346704871060172 | 0.18518624641833814 | 0.8710840496657115 | 0.09651862464183382 | 0.9066738299904489 | 0.7926533860917803 | 0.8177753364741875 | 0.7898442183283092 | 0.6889684813753582 | 0.8131805157593123 | 0.8531518624641834 | 0.8955587392550143 | 0.6889684813753582 | 0.6657234957020057 | 0.2836676217765043 | 0.803569723018147 | 0.17968481375358167 | 0.8459646609360076 | 0.09471346704871061 | 0.8904727793696275 | 0.760148951653249 | 0.7886659671781766 | 0.7571659283553608 |
|
89 |
+
| 4 | 1500 | 0.7326647564469914 | 0.8435530085959886 | 0.8787965616045845 | 0.9117478510028654 | 0.7326647564469914 | 0.708416905444126 | 0.29546322827125115 | 0.835792741165234 | 0.1854441260744986 | 0.872743553008596 | 0.09659025787965617 | 0.9076528175740209 | 0.7948749260926895 | 0.81981561179438 | 0.7922047206136493 | 0.6889684813753582 | 0.8173352435530086 | 0.8537249283667622 | 0.895272206303725 | 0.6889684813753582 | 0.6657712511938872 | 0.28529130850047757 | 0.8080826170009551 | 0.17988538681948424 | 0.8468839541547278 | 0.09472779369627508 | 0.8905085959885387 | 0.7611654045572382 | 0.7896038729003526 | 0.7582836411869348 |
|
90 |
+
| 4 | -1 | 0.730945558739255 | 0.8429799426934097 | 0.8773638968481375 | 0.9127507163323783 | 0.730945558739255 | 0.7066977077363897 | 0.29531996179560643 | 0.8353629417383 | 0.1851289398280802 | 0.8711318051575931 | 0.09667621776504297 | 0.9084885386819485 | 0.7939326079046694 | 0.8191584665873488 | 0.7910064252106939 | 0.6905444126074498 | 0.8163323782234957 | 0.852865329512894 | 0.8951289398280803 | 0.6905444126074498 | 0.6674426934097422 | 0.2849570200573066 | 0.8070319961795606 | 0.17971346704871058 | 0.8460004775549188 | 0.09469914040114613 | 0.89024594078319 | 0.7616585937144678 | 0.7898879131897266 | 0.7588026826359487 |
|
91 |
+
| 5 | 500 | 0.7292263610315186 | 0.8416905444126075 | 0.877650429799427 | 0.9106017191977077 | 0.7292263610315186 | 0.7048591212989493 | 0.29493791786055396 | 0.8342048710601719 | 0.18530085959885387 | 0.8718361986628461 | 0.09646131805157594 | 0.9063395415472779 | 0.7927187656342373 | 0.817777858898932 | 0.7899427304931261 | 0.6893982808022923 | 0.8153295128939828 | 0.8530085959885387 | 0.8934097421203439 | 0.6893982808022923 | 0.6660100286532952 | 0.28486150907354346 | 0.8064231136580706 | 0.17965616045845273 | 0.845857211079274 | 0.09455587392550144 | 0.8887416427889206 | 0.7603872174466731 | 0.7884751772347413 | 0.7574401114646502 || 5 | 1000 | 0.7297994269340974 | 0.8419770773638968 | 0.8769340974212034 | 0.9084527220630373 | 0.7297994269340974 | 0.7054560649474689 | 0.29489016236867244 | 0.8342168099331423 | 0.18518624641833809 | 0.8712870105062083 | 0.09623209169054442 | 0.9041905444126075 | 0.7925137012780262 | 0.817151826387473 | 0.7899257058194014 | 0.6905444126074498 | 0.8177650429799427 | 0.8540114613180516 | 0.8934097421203439 | 0.6905444126074498 | 0.6670487106017192 | 0.2855300859598854 | 0.8086198662846227 | 0.17991404011461318 | 0.8471466093600764 | 0.09454154727793698 | 0.8887774594078318 | 0.7611970141447217 | 0.7890603185208098 | 0.758136309854029 |
|
92 |
+
| 5 | 1500 | 0.7293696275071633 | 0.8408309455587393 | 0.8772206303724929 | 0.9093123209169054 | 0.7293696275071633 | 0.7051695319961795 | 0.29446036294173833 | 0.8329274116523401 | 0.18515759312320915 | 0.871191499522445 | 0.09634670487106017 | 0.9051695319961796 | 0.7923364942920804 | 0.8171934443384362 | 0.7895821477901567 | 0.689971346704871 | 0.8170487106017192 | 0.8537249283667622 | 0.8929799426934097 | 0.689971346704871 | 0.666583094555874 | 0.28529130850047757 | 0.80792741165234 | 0.17982808022922636 | 0.8467645654250239 | 0.09449856733524356 | 0.8882521489971347 | 0.760993769045345 | 0.7887928737935377 | 0.7579401229598806 |
|
93 |
+
| 5 | -1 | 0.729512893982808 | 0.8409742120343839 | 0.8770773638968481 | 0.9093123209169054 | 0.729512893982808 | 0.7053127984718243 | 0.2945081184336199 | 0.8330706781279849 | 0.18515759312320915 | 0.8711198662846227 | 0.09634670487106017 | 0.9051695319961796 | 0.7923945968072029 | 0.8172376494572229 | 0.789643206301542 | 0.689971346704871 | 0.8170487106017192 | 0.8537249283667622 | 0.8928366762177651 | 0.689971346704871 | 0.666583094555874 | 0.28529130850047757 | 0.80792741165234 | 0.17982808022922636 | 0.8467645654250239 | 0.09448424068767908 | 0.88810888252149 | 0.7609504479919952 | 0.788727215652273 | 0.7579130219416423 |
|
94 |
+
|
95 |
+
|
96 |
+
## Training
|
97 |
+
The model was trained with the parameters:
|
98 |
+
|
99 |
+
**DataLoader**:
|
100 |
+
|
101 |
+
`torch.utils.data.dataloader.DataLoader` of length 311 with parameters:
|
102 |
+
```
|
103 |
+
{'batch_size': 32, 'sampler': 'torch.utils.data.sampler.RandomSampler', 'batch_sampler': 'torch.utils.data.sampler.BatchSampler'}
|
104 |
+
```
|
105 |
+
|
106 |
+
**Loss**:
|
107 |
+
|
108 |
+
`sentence_transformers.losses.MultipleNegativesRankingLoss.MultipleNegativesRankingLoss` with parameters:
|
109 |
+
```
|
110 |
+
{'scale': 20.0, 'similarity_fct': 'cos_sim'}
|
111 |
+
```
|
112 |
+
|
113 |
+
Parameters of the fit()-Method:
|
114 |
+
```
|
115 |
+
{
|
116 |
+
"epochs": 5,
|
117 |
+
"evaluation_steps": 500,
|
118 |
+
"evaluator": "sentence_transformers.evaluation.InformationRetrievalEvaluator.InformationRetrievalEvaluator",
|
119 |
+
"max_grad_norm": 1,
|
120 |
+
"optimizer_class": "<class 'torch.optim.adamw.AdamW'>",
|
121 |
+
"optimizer_params": {
|
122 |
+
"lr": 2e-05
|
123 |
+
},
|
124 |
+
"scheduler": "WarmupLinear",
|
125 |
+
"steps_per_epoch": null,
|
126 |
+
"warmup_steps": 1000,
|
127 |
+
"weight_decay": 0.01
|
128 |
+
}
|
129 |
+
```
|
130 |
+
|
131 |
+
|
132 |
+
## Full Model Architecture
|
133 |
+
```
|
134 |
+
SentenceTransformer(
|
135 |
+
(0): Transformer({'max_seq_length': 256, 'do_lower_case': False}) with Transformer model: BertModel
|
136 |
+
(1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False})
|
137 |
+
)
|
138 |
+
```
|
139 |
+
|
140 |
+
## Citing & Authors
|
141 |
+
|
142 |
+
<!--- Describe where people can find more information -->
|
config.json
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "dbmdz/bert-base-turkish-cased",
|
3 |
+
"architectures": [
|
4 |
+
"BertModel"
|
5 |
+
],
|
6 |
+
"attention_probs_dropout_prob": 0.1,
|
7 |
+
"classifier_dropout": null,
|
8 |
+
"hidden_act": "gelu",
|
9 |
+
"hidden_dropout_prob": 0.1,
|
10 |
+
"hidden_size": 768,
|
11 |
+
"initializer_range": 0.02,
|
12 |
+
"intermediate_size": 3072,
|
13 |
+
"layer_norm_eps": 1e-12,
|
14 |
+
"max_position_embeddings": 512,
|
15 |
+
"model_type": "bert",
|
16 |
+
"num_attention_heads": 12,
|
17 |
+
"num_hidden_layers": 12,
|
18 |
+
"pad_token_id": 0,
|
19 |
+
"position_embedding_type": "absolute",
|
20 |
+
"torch_dtype": "float32",
|
21 |
+
"transformers_version": "4.35.2",
|
22 |
+
"type_vocab_size": 2,
|
23 |
+
"use_cache": true,
|
24 |
+
"vocab_size": 32000
|
25 |
+
}
|
config_sentence_transformers.json
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"__version__": {
|
3 |
+
"sentence_transformers": "2.2.2",
|
4 |
+
"transformers": "4.35.2",
|
5 |
+
"pytorch": "2.1.0+cu118"
|
6 |
+
}
|
7 |
+
}
|
model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1e2105369d20099e6edf45a5c29997dc90fb0ab448ad77159cac3bbe1d2b62cd
|
3 |
+
size 442491744
|
modules.json
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
{
|
3 |
+
"idx": 0,
|
4 |
+
"name": "0",
|
5 |
+
"path": "",
|
6 |
+
"type": "sentence_transformers.models.Transformer"
|
7 |
+
},
|
8 |
+
{
|
9 |
+
"idx": 1,
|
10 |
+
"name": "1",
|
11 |
+
"path": "1_Pooling",
|
12 |
+
"type": "sentence_transformers.models.Pooling"
|
13 |
+
}
|
14 |
+
]
|
sentence_bert_config.json
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"max_seq_length": 256,
|
3 |
+
"do_lower_case": false
|
4 |
+
}
|
special_tokens_map.json
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cls_token": "[CLS]",
|
3 |
+
"mask_token": "[MASK]",
|
4 |
+
"pad_token": "[PAD]",
|
5 |
+
"sep_token": "[SEP]",
|
6 |
+
"unk_token": "[UNK]"
|
7 |
+
}
|
tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer_config.json
ADDED
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"added_tokens_decoder": {
|
3 |
+
"0": {
|
4 |
+
"content": "[PAD]",
|
5 |
+
"lstrip": false,
|
6 |
+
"normalized": false,
|
7 |
+
"rstrip": false,
|
8 |
+
"single_word": false,
|
9 |
+
"special": true
|
10 |
+
},
|
11 |
+
"1": {
|
12 |
+
"content": "[UNK]",
|
13 |
+
"lstrip": false,
|
14 |
+
"normalized": false,
|
15 |
+
"rstrip": false,
|
16 |
+
"single_word": false,
|
17 |
+
"special": true
|
18 |
+
},
|
19 |
+
"2": {
|
20 |
+
"content": "[CLS]",
|
21 |
+
"lstrip": false,
|
22 |
+
"normalized": false,
|
23 |
+
"rstrip": false,
|
24 |
+
"single_word": false,
|
25 |
+
"special": true
|
26 |
+
},
|
27 |
+
"3": {
|
28 |
+
"content": "[SEP]",
|
29 |
+
"lstrip": false,
|
30 |
+
"normalized": false,
|
31 |
+
"rstrip": false,
|
32 |
+
"single_word": false,
|
33 |
+
"special": true
|
34 |
+
},
|
35 |
+
"4": {
|
36 |
+
"content": "[MASK]",
|
37 |
+
"lstrip": false,
|
38 |
+
"normalized": false,
|
39 |
+
"rstrip": false,
|
40 |
+
"single_word": false,
|
41 |
+
"special": true
|
42 |
+
}
|
43 |
+
},
|
44 |
+
"clean_up_tokenization_spaces": true,
|
45 |
+
"cls_token": "[CLS]",
|
46 |
+
"do_basic_tokenize": true,
|
47 |
+
"do_lower_case": false,
|
48 |
+
"mask_token": "[MASK]",
|
49 |
+
"max_len": 512,
|
50 |
+
"model_max_length": 512,
|
51 |
+
"never_split": null,
|
52 |
+
"pad_token": "[PAD]",
|
53 |
+
"sep_token": "[SEP]",
|
54 |
+
"strip_accents": null,
|
55 |
+
"tokenize_chinese_chars": true,
|
56 |
+
"tokenizer_class": "BertTokenizer",
|
57 |
+
"unk_token": "[UNK]"
|
58 |
+
}
|
vocab.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|