upload

Browse files

Files changed (8) hide show

CEBinaryClassificationEvaluator_Quora-dev_results.csv +15 -0
README.md +18 -0
config.json +29 -0
merges.txt +0 -0
pytorch_model.bin +3 -0
special_tokens_map.json +1 -0
tokenizer_config.json +1 -0
vocab.json +0 -0

CEBinaryClassificationEvaluator_Quora-dev_results.csv ADDED Viewed

	@@ -0,0 +1,15 @@

+epoch,steps,Accuracy,Accuracy_Threshold,F1,F1_Threshold,Precision,Recall,Average_Precision
+0,5000,0.8683833274728842,0.5240041017532349,0.8198757763975154,0.3768240511417389,0.7744990392533626,0.8709005324484914,0.8731992606250942
+0,10000,0.8685997132887939,0.6672844886779785,0.8178615982974976,0.4671083688735962,0.7796432318992654,0.8600200632764874,0.8729835294247004
+0,15000,0.867139109031403,0.1811354160308838,0.8167750946447605,0.10655976086854935,0.7798287478944413,0.8573964040435219,0.8630894015038928
+0,20000,0.8652457331421926,0.5867656469345093,0.810907497992261,0.042630940675735474,0.7694492552823,0.8570877382514083,0.8491018647322772
+0,25000,0.8699791728652186,0.19235554337501526,0.8189171463965605,0.11564554274082184,0.7757299647960241,0.8671965429431283,0.8779901051208325
+0,30000,0.8738470693246058,0.1700415164232254,0.8265335858953811,0.12816089391708374,0.7827916925412268,0.8754533528821669,0.880091749112012
+0,-1,0.8736036352817073,0.24405449628829956,0.8273541780947156,0.17230889201164246,0.7863200333657723,0.8729068600972297,0.8822842182205346
+1,5000,0.8749830948581321,0.18700861930847168,0.8258481907771007,0.1650511920452118,0.8056065164746459,0.847133266455745,0.8808449128337521
+1,10000,0.8773362906061508,0.056389499455690384,0.8286927441475971,0.05023118108510971,0.8076021678628973,0.8509144224091365,0.8806352233982097
+1,15000,0.8772551459251846,0.08140122890472412,0.8288128056914184,0.040914058685302734,0.7972057880105495,0.8630295547495949,0.8724934608981681
+1,20000,0.876146168618647,0.02961307018995285,0.8269898751016186,0.008039627224206924,0.7934482025101043,0.8634925534377652,0.8820982567462179
+1,25000,0.8766871331584215,0.018407132476568222,0.8281409975135293,0.006738942116498947,0.7869900618528042,0.8738328574735705,0.8403278253386028
+1,30000,0.8772551459251846,0.03199715167284012,0.8317026152111446,0.014339910820126534,0.7866382276042383,0.8822440003086658,0.8802174694024198
+1,-1,0.8785534608206432,0.03839807212352753,0.8313170836773521,0.012577023357152939,0.7772184185796751,0.8935103017208118,0.8790503615896275

README.md ADDED Viewed

	@@ -0,0 +1,18 @@

+# Cross-Encoder for Quora Duplicate Questions Detection
+This model was trained using [SentenceTransformers](https://sbert.net) [Cross-Encoder](https://www.sbert.net/examples/applications/cross-encoder/README.html) class.
+## Training Data
+This model was trained on the [Quora Duplicate Questions](https://www.quora.com/q/quoradata/First-Quora-Dataset-Release-Question-Pairs) dataset. The model will predict a score between 0 and 1 how likely the two given questions are duplicates.
+Note: The model is not suitable to estimate the similarity of questions, e.g. the two questions "How to learn Java" and "How to learn Python" will result in a rahter low score, as these are not duplicates.
+## Usage and Performance
+Pre-trained models can be used like this:
+```
+from sentence_transformers import CrossEncoder
+model = CrossEncoder('model_name')
+scores = model.predict([('Question 1', 'Question 2'), ('Question 3', 'Question 4')])
+```
+You can use this model also without sentence_transformers and by just using Transformers ``AutoModel`` class

config.json ADDED Viewed

	@@ -0,0 +1,29 @@

+{
+  "_num_labels": 3,
+  "architectures": [
+    "RobertaForSequenceClassification"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "bos_token_id": 0,
+  "eos_token_id": 2,
+  "gradient_checkpointing": false,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 1024,
+  "id2label": {
+    "0": "LABEL_0"
+  },
+  "initializer_range": 0.02,
+  "intermediate_size": 4096,
+  "label2id": {
+    "LABEL_0": 0
+  },
+  "layer_norm_eps": 1e-05,
+  "max_position_embeddings": 514,
+  "model_type": "roberta",
+  "num_attention_heads": 16,
+  "num_hidden_layers": 24,
+  "pad_token_id": 1,
+  "type_vocab_size": 1,
+  "vocab_size": 50265
+}

merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a01c9ec97da7dcd86815855a2d91b43c96db6518c7da0f917bcbe00ced5d13e3
+size 1421616585

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"bos_token": {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "eos_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "unk_token": {"content": "<unk>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "sep_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "pad_token": {"content": "<pad>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "cls_token": {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "mask_token": {"content": "<mask>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true}}

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"model_max_length": 512, "special_tokens_map_file": "input-model/roberta-large-mnli/special_tokens_map.json", "full_tokenizer_file": null}

vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff