Ceshine Lee commited on
Commit
92f50da
1 Parent(s): 5e726da

First version

Browse files
README.md ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # TinyBERT_L-4_H-312_v2 English Sentence Encoder
2
+
3
+ This is distilled from the `distilbert-base-nli-stsb-mean-tokens` pre-trained model from [Sentence-Transformers](https://sbert.net/).
4
+
5
+ The embedding vector is obtained by mean/average pooling of the last layer's hidden states.
6
+
7
+ ## Model Comparison
8
+
9
+ We compute cosine similarity scores of the embeddings of the sentence pair to get the spearman correlation on the STS benchmark (bigger is better):
10
+
11
+ | | Dev | Test |
12
+ | ------------------------------------ | ----- | ----- |
13
+ | distilbert-base-nli-stsb-mean-tokens | .8667 | .8516 |
14
+ | TinyBERT_L-4_H-312_v2-distill-AllNLI | .8587 | .8283 |
config.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "nreimers/TinyBERT_L-4_H-312_v2",
3
+ "architectures": [
4
+ "BertModel"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "gradient_checkpointing": false,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.1,
10
+ "hidden_size": 312,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 1200,
13
+ "layer_norm_eps": 1e-12,
14
+ "max_position_embeddings": 512,
15
+ "model_type": "bert",
16
+ "num_attention_heads": 12,
17
+ "num_hidden_layers": 4,
18
+ "output_hidden_states": true,
19
+ "pad_token_id": 0,
20
+ "position_embedding_type": "absolute",
21
+ "transformers_version": "4.3.3",
22
+ "type_vocab_size": 2,
23
+ "use_cache": true,
24
+ "vocab_size": 30522
25
+ }
oggdo_config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "max_seq_length": 256,
3
+ "do_lower_case": true
4
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:55cf95c30a8ab86ae85cb790b49781ff8398d664fd7be6f5815ce5aa0e0dd5f6
3
+ size 57432312
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"}
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"do_lower_case": true, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "tokenize_chinese_chars": true, "strip_accents": null, "model_max_length": 512, "special_tokens_map_file": "/home/ceshine/.cache/huggingface/transformers/f96b11e14fec8f4be06121e7f6bbe07f82216bf7d75ad76fe3a81251e8895d69.dd8bd9bfd3664b530ea4e645105f557769387b3da9f79bdb55ed556bdd80611d", "name_or_path": "nreimers/TinyBERT_L-4_H-312_v2", "do_basic_tokenize": true, "never_split": null}
vocab.txt ADDED
The diff for this file is too large to render. See raw diff