nreimers commited on
Commit
480c0d6
1 Parent(s): 54cbe90
CESoftmaxAccuracyEvaluator_AllNLI-dev_results.csv ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ epoch,steps,Accuracy
2
+ 0,10000,0.8450424785063845
3
+ 0,20000,0.8602533448644248
4
+ 0,30000,0.8755659561479371
5
+ 0,40000,0.8786691763748283
6
+ 0,50000,0.8844686371267233
7
+ 0,-1,0.8870631327262553
8
+ 1,10000,0.8886401790710688
9
+ 1,20000,0.8880297095182378
10
+ 1,30000,0.8925573587017348
11
+ 1,40000,0.8941852775092842
12
+ 1,50000,0.8943887673602279
13
+ 1,-1,0.8943887673602279
14
+ 2,10000,0.896271048481457
15
+ 2,20000,0.8971867528107036
16
+ 2,30000,0.8961184310932492
17
+ 2,40000,0.8954570890776823
18
+ 2,50000,0.8955079615404181
19
+ 2,-1,0.8961184310932492
20
+ 3,10000,0.8958640687795696
21
+ 3,20000,0.8985603093045734
22
+ 3,30000,0.8993742687083481
23
+ 3,40000,0.8986111817673094
24
+ 3,50000,0.8986111817673094
25
+ 3,-1,0.8983059469908938
README.md ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language: en
3
+ pipeline_tag: zero-shot-classification
4
+ tags:
5
+ - microsoft/deberta-v3-xsmall
6
+ datasets:
7
+ - multi_nli
8
+ - snli
9
+ metrics:
10
+ - accuracy
11
+ license: apache-2.0
12
+ ---
13
+
14
+ # Cross-Encoder for Natural Language Inference
15
+ This model was trained using [SentenceTransformers](https://sbert.net) [Cross-Encoder](https://www.sbert.net/examples/applications/cross-encoder/README.html) class. This model is based on [microsoft/deberta-v3-xsmall](https://huggingface.co/microsoft/deberta-v3-xsmall)
16
+
17
+ ## Training Data
18
+ The model was trained on the [SNLI](https://nlp.stanford.edu/projects/snli/) and [MultiNLI](https://cims.nyu.edu/~sbowman/multinli/) datasets. For a given sentence pair, it will output three scores corresponding to the labels: contradiction, entailment, neutral.
19
+
20
+ ## Performance
21
+ - Accuracy on SNLI-test dataset: 91.64
22
+ - Accuracy on MNLI mismatched set: 87.77
23
+
24
+ For futher evaluation results, see [SBERT.net - Pretrained Cross-Encoder](https://www.sbert.net/docs/pretrained_cross-encoders.html#nli).
25
+
26
+ ## Usage
27
+
28
+ Pre-trained models can be used like this:
29
+ ```python
30
+ from sentence_transformers import CrossEncoder
31
+ model = CrossEncoder('cross-encoder/nli-deberta-v3-xsmall')
32
+ scores = model.predict([('A man is eating pizza', 'A man eats something'), ('A black race car starts up in front of a crowd of people.', 'A man is driving down a lonely road.')])
33
+
34
+ #Convert scores to labels
35
+ label_mapping = ['contradiction', 'entailment', 'neutral']
36
+ labels = [label_mapping[score_max] for score_max in scores.argmax(axis=1)]
37
+ ```
38
+
39
+ ## Usage with Transformers AutoModel
40
+ You can use the model also directly with Transformers library (without SentenceTransformers library):
41
+ ```python
42
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
43
+ import torch
44
+
45
+ model = AutoModelForSequenceClassification.from_pretrained('cross-encoder/nli-deberta-v3-xsmall')
46
+ tokenizer = AutoTokenizer.from_pretrained('cross-encoder/nli-deberta-v3-xsmall')
47
+
48
+ features = tokenizer(['A man is eating pizza', 'A black race car starts up in front of a crowd of people.'], ['A man eats something', 'A man is driving down a lonely road.'], padding=True, truncation=True, return_tensors="pt")
49
+
50
+ model.eval()
51
+ with torch.no_grad():
52
+ scores = model(**features).logits
53
+ label_mapping = ['contradiction', 'entailment', 'neutral']
54
+ labels = [label_mapping[score_max] for score_max in scores.argmax(dim=1)]
55
+ print(labels)
56
+ ```
57
+
58
+ ## Zero-Shot Classification
59
+ This model can also be used for zero-shot-classification:
60
+ ```python
61
+ from transformers import pipeline
62
+
63
+ classifier = pipeline("zero-shot-classification", model='cross-encoder/nli-deberta-v3-xsmall')
64
+
65
+ sent = "Apple just announced the newest iPhone X"
66
+ candidate_labels = ["technology", "sports", "politics"]
67
+ res = classifier(sent, candidate_labels)
68
+ print(res)
69
+ ```
added_tokens.json ADDED
@@ -0,0 +1 @@
 
1
+ {"[MASK]": 128000}
config.json ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "microsoft/deberta-v3-xsmall",
3
+ "architectures": [
4
+ "DebertaV2ForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "hidden_act": "gelu",
8
+ "hidden_dropout_prob": 0.1,
9
+ "hidden_size": 384,
10
+ "id2label": {
11
+ "0": "LABEL_0",
12
+ "1": "LABEL_1",
13
+ "2": "LABEL_2"
14
+ },
15
+ "initializer_range": 0.02,
16
+ "intermediate_size": 1536,
17
+ "label2id": {
18
+ "LABEL_0": 0,
19
+ "LABEL_1": 1,
20
+ "LABEL_2": 2
21
+ },
22
+ "layer_norm_eps": 1e-07,
23
+ "max_position_embeddings": 512,
24
+ "max_relative_positions": -1,
25
+ "model_type": "deberta-v2",
26
+ "norm_rel_ebd": "layer_norm",
27
+ "num_attention_heads": 6,
28
+ "num_hidden_layers": 12,
29
+ "pad_token_id": 0,
30
+ "pooler_dropout": 0,
31
+ "pooler_hidden_act": "gelu",
32
+ "pooler_hidden_size": 384,
33
+ "pos_att_type": [
34
+ "p2c",
35
+ "c2p"
36
+ ],
37
+ "position_biased_input": false,
38
+ "position_buckets": 256,
39
+ "relative_attention": true,
40
+ "share_att_key": true,
41
+ "torch_dtype": "float32",
42
+ "transformers_version": "4.11.3",
43
+ "type_vocab_size": 0,
44
+ "vocab_size": 128100
45
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7bfaa3c5238c1cddcf8a8f76bc82686a653473218f880452dded766a93f41461
3
+ size 283416722
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
1
+ {"bos_token": "[CLS]", "eos_token": "[SEP]", "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"}
spm.model ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c679fbf93643d19aab7ee10c0b99e460bdbc02fedf34b92b05af343b4af586fd
3
+ size 2464616
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
1
+ {"do_lower_case": false, "bos_token": "[CLS]", "eos_token": "[SEP]", "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "split_by_punct": false, "sp_model_kwargs": {}, "vocab_type": "spm", "special_tokens_map_file": null, "tokenizer_file": null, "name_or_path": "microsoft/deberta-v3-xsmall", "tokenizer_class": "DebertaV2Tokenizer", "model_max_length": 512}