sliderSun
commited on
Commit
•
128a1cc
1
Parent(s):
1f3fcd0
commit
Browse files- README.md +49 -0
- config.json +29 -0
- pytorch_model.bin +3 -0
- vocab.txt +0 -0
README.md
CHANGED
@@ -1,3 +1,52 @@
|
|
1 |
---
|
|
|
|
|
2 |
license: apache-2.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
---
|
2 |
+
language:
|
3 |
+
- zh
|
4 |
license: apache-2.0
|
5 |
+
|
6 |
+
tags:
|
7 |
+
- bert
|
8 |
+
- NLU
|
9 |
+
- NLI
|
10 |
+
|
11 |
+
inference: true
|
12 |
+
|
13 |
+
widget:
|
14 |
+
- text: "今天心情不好[SEP]今天很开心"
|
15 |
+
|
16 |
---
|
17 |
+
# Erlangshen-Roberta-110M-Similarity, model (Chinese),one model of [Fengshenbang-LM](https://github.com/IDEA-CCNL/Fengshenbang-LM).
|
18 |
+
We collect 20 paraphrace datasets in the Chinese domain for finetune, with a total of 2773880 samples. Our model is mainly based on [roberta](https://huggingface.co/hfl/chinese-roberta-wwm-ext-large)
|
19 |
+
|
20 |
+
## Usage
|
21 |
+
```python
|
22 |
+
from transformers import BertForSequenceClassification
|
23 |
+
from transformers import BertTokenizer
|
24 |
+
import torch
|
25 |
+
|
26 |
+
tokenizer=BertTokenizer.from_pretrained('IDEA-CCNL/Erlangshen-Roberta-110M-Similarity')
|
27 |
+
model=BertForSequenceClassification.from_pretrained('IDEA-CCNL/Erlangshen-Roberta-110M-Similarity')
|
28 |
+
|
29 |
+
texta='今天的饭不好吃'
|
30 |
+
textb='今天心情不好'
|
31 |
+
|
32 |
+
output=model(torch.tensor([tokenizer.encode(texta,textb)]))
|
33 |
+
print(torch.nn.functional.softmax(output.logits,dim=-1))
|
34 |
+
|
35 |
+
```
|
36 |
+
## Scores on downstream chinese tasks(The dev datasets of BUSTM and AFQMC may exist in the train set)
|
37 |
+
| Model | BQ | BUSTM | AFQMC |
|
38 |
+
| :--------: | :-----: | :----: | :-----: |
|
39 |
+
| Erlangshen-Roberta-110M-Similarity | 85.41 | 95.18 | 81.72 |
|
40 |
+
| Erlangshen-Roberta-330M-Similarity | 86.21 | 99.29 | 93.89 |
|
41 |
+
| Erlangshen-MegatronBert-1.3B-Similarity | 86.31 | - | - |
|
42 |
+
|
43 |
+
## Citation
|
44 |
+
If you find the resource is useful, please cite the following website in your paper.
|
45 |
+
```
|
46 |
+
@misc{Fengshenbang-LM,
|
47 |
+
title={Fengshenbang-LM},
|
48 |
+
author={IDEA-CCNL},
|
49 |
+
year={2021},
|
50 |
+
howpublished={\url{https://github.com/IDEA-CCNL/Fengshenbang-LM}},
|
51 |
+
}
|
52 |
+
```
|
config.json
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"architectures": [
|
3 |
+
"BertForSequenceClassification"
|
4 |
+
],
|
5 |
+
"attention_probs_dropout_prob": 0.1,
|
6 |
+
"bos_token_id": 0,
|
7 |
+
"directionality": "bidi",
|
8 |
+
"eos_token_id": 2,
|
9 |
+
"hidden_act": "gelu",
|
10 |
+
"hidden_dropout_prob": 0.1,
|
11 |
+
"hidden_size": 768,
|
12 |
+
"id2label":{"1":"similar","0":"not similar"},
|
13 |
+
"initializer_range": 0.02,
|
14 |
+
"intermediate_size": 3072,
|
15 |
+
"layer_norm_eps": 1e-12,
|
16 |
+
"max_position_embeddings": 512,
|
17 |
+
"model_type": "bert",
|
18 |
+
"num_attention_heads": 12,
|
19 |
+
"num_hidden_layers": 12,
|
20 |
+
"output_past": true,
|
21 |
+
"pad_token_id": 1,
|
22 |
+
"pooler_fc_size": 768,
|
23 |
+
"pooler_num_attention_heads": 12,
|
24 |
+
"pooler_num_fc_layers": 3,
|
25 |
+
"pooler_size_per_head": 128,
|
26 |
+
"pooler_type": "first_token_transform",
|
27 |
+
"type_vocab_size": 2,
|
28 |
+
"vocab_size": 21128
|
29 |
+
}
|
pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1db1f280d11dedabeb6b1af2182415839559ae2199fb4c6f5c125cef85a8f33c
|
3 |
+
size 409160877
|
vocab.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|