upload

Browse files

Files changed (7) hide show

CEBinaryClassificationEvaluator_MS-Marco_results.csv +43 -0
README.md +34 -0
config.json +31 -0
pytorch_model.bin +3 -0
special_tokens_map.json +1 -0
tokenizer_config.json +1 -0
vocab.txt +0 -0

CEBinaryClassificationEvaluator_MS-Marco_results.csv ADDED Viewed

	@@ -0,0 +1,43 @@

+epoch,steps,Accuracy,Accuracy_Threshold,F1,F1_Threshold,Precision,Recall,Average_Precision
+0,5000,0.9297070292970703,0.25256121158599854,0.8307839388145314,0.19771124422550201,0.7957875457875457,0.869,0.8904110467492587
+0,10000,0.939006099390061,0.5306986570358276,0.8460807600950118,0.28808051347732544,0.8058823529411765,0.8905,0.910544278892506
+0,15000,0.9393060693930607,0.5750397443771362,0.8566081871345029,0.48249387741088867,0.8048351648351648,0.9155,0.9132147986720082
+0,20000,0.9405059494050595,0.591253936290741,0.8546298558514537,0.570050835609436,0.8356426182513139,0.8745,0.9073685536522613
+0,25000,0.9436056394360564,0.5074090957641602,0.8603960396039605,0.5057582855224609,0.8519607843137255,0.869,0.9167379821993755
+0,30000,0.9396060393960604,0.8262588381767273,0.8542471042471043,0.7406325340270996,0.8255597014925373,0.885,0.8979176130668384
+0,35000,0.9425057494250575,0.46686679124832153,0.8596070915189268,0.28302955627441406,0.8252069917203312,0.897,0.9163289965092976
+0,40000,0.9417058294170583,0.6763133406639099,0.8575602629656682,0.6603987216949463,0.8357854769814903,0.8805,0.9173776247925393
+0,45000,0.9426057394260574,0.4643915295600891,0.8605042016806723,0.29147765040397644,0.8277136258660508,0.896,0.9120726077810245
+0,50000,0.945005499450055,0.5493776798248291,0.8624535315985131,0.4713650643825531,0.855036855036855,0.87,0.9209400105864155
+0,55000,0.9454054594540546,0.6156725287437439,0.864585893339887,0.5604670643806458,0.8501691638472693,0.8795,0.9206262233464874
+0,60000,0.9421057894210579,0.39554399251937866,0.8605827112930412,0.3811936378479004,0.8300046446818393,0.8935,0.9193948306076224
+0,65000,0.9428057194280572,0.5363738536834717,0.8629682313892841,0.32784485816955566,0.8205590622182146,0.91,0.9227492855045069
+0,70000,0.9438056194380562,0.38333064317703247,0.8628501827040195,0.3524332344532013,0.8413301662707838,0.8855,0.9236299441431376
+0,75000,0.9468053194680532,0.48936331272125244,0.8696717295443409,0.48936331272125244,0.8525456292026897,0.8875,0.9254413650794524
+0,80000,0.9454054594540546,0.3127445578575134,0.8651851851851852,0.3127445578575134,0.8546341463414634,0.876,0.9213706944185774
+0,85000,0.9443055694430557,0.31547677516937256,0.8655280250180418,0.21403872966766357,0.8340287436254057,0.8995,0.9237103419372517
+0,90000,0.9465053494650535,0.3857932686805725,0.8702401164200824,0.3761560022830963,0.8450306170513424,0.897,0.9258501989030058
+0,95000,0.9453054694530547,0.3604514002799988,0.8669713735867213,0.29048818349838257,0.8354195642095503,0.901,0.9226658871253511
+0,100000,0.9453054694530547,0.6748594045639038,0.8686288585786074,0.4552273154258728,0.8329508949059201,0.9075,0.9252677323330876
+0,105000,0.9435056494350565,0.40062007308006287,0.8639551192145862,0.1210024282336235,0.8112379280070237,0.924,0.9237990563267019
+0,110000,0.944905509449055,0.4197750985622406,0.8656429942418427,0.27975988388061523,0.8321033210332104,0.902,0.9247201058651281
+0,115000,0.9464053594640536,0.4172205924987793,0.8698167791706846,0.2961992919445038,0.839851024208566,0.902,0.927117403879296
+0,120000,0.9474052594740526,0.44686269760131836,0.8712047012732614,0.4383932948112488,0.8536468330134357,0.8895,0.9279628711835812
+0,125000,0.945005499450055,0.4358792304992676,0.8655339805825243,0.28539055585861206,0.8410377358490566,0.8915,0.9268525722856882
+0,130000,0.9462053794620537,0.21194982528686523,0.8703747911195989,0.16292141377925873,0.8328003654636821,0.9115,0.925512309638313
+0,135000,0.9454054594540546,0.2292814701795578,0.8678621991505427,0.11477036774158478,0.82171581769437,0.9195,0.9268551457216524
+0,140000,0.9482051794820517,0.31556186079978943,0.8758076094759513,0.26744428277015686,0.8398347865993575,0.915,0.9275073681003255
+0,145000,0.9478052194780522,0.3485147953033447,0.8719556305763203,0.12995882332324982,0.8421052631578947,0.904,0.9278250006342896
+0,150000,0.9483051694830517,0.32228657603263855,0.8726037369570493,0.21710461378097534,0.8477133427628477,0.899,0.9259328370035781
+0,155000,0.9474052594740526,0.1903868019580841,0.8731307284129282,0.18298938870429993,0.8434296365330848,0.905,0.9261096325445609
+0,160000,0.9473052694730527,0.5740681886672974,0.872194660996929,0.17134147882461548,0.8266905508284819,0.923,0.927973529121574
+0,165000,0.9495050494950505,0.38968273997306824,0.87591956841589,0.34622055292129517,0.8594802694898941,0.893,0.9241440163389828
+0,170000,0.9459054094590541,0.47478723526000977,0.8706669854171647,0.11328981816768646,0.8341731562070546,0.9105,0.9289979858500923
+0,175000,0.9473052694730527,0.5903739929199219,0.8703747911195989,0.15506823360919952,0.8328003654636821,0.9115,0.9305074303915251
+0,180000,0.9463053694630537,0.23235449194908142,0.8702585165498912,0.23235449194908142,0.841982234689107,0.9005,0.9291547676197442
+0,185000,0.9478052194780522,0.174373060464859,0.8734852157052836,0.171615868806839,0.8476011288805269,0.901,0.9280170204346545
+0,190000,0.949005099490051,0.5715193748474121,0.8747241971071341,0.5108739137649536,0.8581048581048581,0.892,0.9271410745170057
+0,195000,0.9461053894610539,0.5194154977798462,0.8679334916864608,0.170893132686615,0.8266968325791855,0.9135,0.9271023702066649
+0,200000,0.9468053194680532,0.3094758987426758,0.8707931277947754,0.11578939855098724,0.82258781680747,0.925,0.9290083868621436
+0,205000,0.9461053894610539,0.6028298139572144,0.8679067577113257,0.13052904605865479,0.8202047174009791,0.9215,0.9276186176796931
+0,210000,0.9459054094590541,0.49049288034439087,0.8694616484040019,0.16249723732471466,0.8303002729754322,0.9125,0.9285170114050436

README.md ADDED Viewed

	@@ -0,0 +1,34 @@

+# Cross-Encoder for MS Marco
+This model uses [Electra-base](https://huggingface.co/google/electra-base-discriminator).
+It was trained on [MS Marco Passage Ranking](https://github.com/microsoft/MSMARCO-Passage-Ranking) task.
+The model can be used for Information Retrieval: Given a query, encode the query will all possible passages (e.g. retrieved with ElasticSearch). Then sort the passages in a decreasing order. See [SBERT.net Information Retrieval](https://github.com/UKPLab/sentence-transformers/tree/master/examples/applications/information-retrieval) for more details. The training code is available here: [SBERT.net Training MS Marco](https://github.com/UKPLab/sentence-transformers/tree/master/examples/training/ms_marco)
+## Usage and Performance
+Pre-trained models can be used like this:
+```
+from sentence_transformers import CrossEncoder
+model = CrossEncoder('model_name', max_length=512)
+scores = model.predict([('Query', 'Paragraph1'), ('Query', 'Paragraph2') , ('Query', 'Paragraph3')])
+```
+In the following table, we provide various pre-trained Cross-Encoders together with their performance on the [TREC Deep Learning 2019](https://microsoft.github.io/TREC-2019-Deep-Learning/) and the [MS Marco Passage Reranking](https://github.com/microsoft/MSMARCO-Passage-Ranking/) dataset.
+| Model-Name        | NDCG@10 (TREC DL 19) | MRR@10 (MS Marco Dev)  | Docs / Sec (BertTokenizerFast) | Docs / Sec |
+| ------------- |:-------------| -----| --- | --- |
+| cross-encoder/ms-marco-TinyBERT-L-2  | 67.43 | 30.15  | 9000 | 780
+| cross-encoder/ms-marco-TinyBERT-L-4  | 68.09 | 34.50  | 2900 | 760
+| cross-encoder/ms-marco-TinyBERT-L-6 |  69.57 | 36.13  | 680 | 660
+| cross-encoder/ms-marco-electra-base | 71.99 | 36.41 | 340 | 340
+| *Other models* | | | |
+| nboost/pt-tinybert-msmarco | 63.63 | 28.80 | 2900 | 760
+| nboost/pt-bert-base-uncased-msmarco | 70.94 | 34.75 | 340 | 340|
+| nboost/pt-bert-large-msmarco | 73.36 | 36.48 | 100 | 100 |
+| Capreolus/electra-base-msmarco | 71.23 | | 340 | 340 |
+| amberoad/bert-multilingual-passage-reranking-msmarco | 68.40 | | 330 | 330
+ Note: Runtime was computed on a V100 GPU. A bottleneck for smaller models is the standard Python tokenizer from Huggingface v3. Replacing it with the fast tokenizer based on Rust, the throughput is significantly improved:

config.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "_name_or_path": "google/electra-base-discriminator",
+  "architectures": [
+    "ElectraForSequenceClassification"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "embedding_size": 768,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "id2label": {
+    "0": "LABEL_0"
+  },
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "label2id": {
+    "LABEL_0": 0
+  },
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "electra",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "pad_token_id": 0,
+  "summary_activation": "gelu",
+  "summary_last_dropout": 0.1,
+  "summary_type": "first",
+  "summary_use_proj": true,
+  "type_vocab_size": 2,
+  "vocab_size": 30522
+}

pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c554473d61458bf2969566b1bb464eb280ef7de9cacb6ec787b4fe7f0a9a80d9
+size 438022601

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"}

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"do_lower_case": true, "do_basic_tokenize": true, "never_split": null, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "tokenize_chinese_chars": true, "strip_accents": null, "model_max_length": 512, "name_or_path": "google/electra-base-discriminator"}

vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff