Update README.md
Browse files
README.md
CHANGED
@@ -5,13 +5,11 @@ tags:
|
|
5 |
inference: False
|
6 |
---
|
7 |
|
8 |
-
#
|
9 |
-
|
10 |
-
|
11 |
-
本项目主要自定义了tokenization_glycebert_fast.py文件中的GlyceBertTokenizerFast代码。从而可以从huggingface.co调用。
|
12 |
```python
|
13 |
-
pretrained_tokenizer_name = "junnyu/ChineseBERT-
|
14 |
-
tokenizer =
|
15 |
```
|
16 |
|
17 |
# Paper
|
@@ -20,32 +18,29 @@ tokenizer = GlyceBertTokenizerFast.from_pretrained(pretrained_tokenizer_name)
|
|
20 |
|
21 |
# Install
|
22 |
```bash
|
23 |
-
pip install
|
24 |
or
|
25 |
-
pip install git+https://github.com/JunnYu/
|
26 |
```
|
27 |
|
28 |
# Usage
|
29 |
```python
|
30 |
import torch
|
31 |
-
from transformers import BertConfig as
|
|
|
32 |
|
33 |
-
from glycebert import GlyceBertForMaskedLM, GlyceBertTokenizerFast
|
34 |
-
|
35 |
-
# 使用我这个里面的tokenizer config和model config
|
36 |
pretrained_tokenizer_name = "junnyu/ChineseBERT-large"
|
37 |
pretrained_model_name = "ShannonAI/ChineseBERT-large"
|
38 |
|
39 |
-
tokenizer =
|
40 |
-
config =
|
41 |
-
chinese_bert =
|
42 |
-
pretrained_model_name, config=config
|
43 |
-
)
|
44 |
|
45 |
text = "北京是[MASK]国的首都。"
|
46 |
inputs = tokenizer(text, return_tensors="pt")
|
47 |
print(inputs)
|
48 |
maskpos = 4
|
|
|
49 |
with torch.no_grad():
|
50 |
o = chinese_bert(**inputs)
|
51 |
value, index = o.logits.softmax(-1)[0, maskpos].topk(10)
|
|
|
5 |
inference: False
|
6 |
---
|
7 |
|
8 |
+
# ChineseBert_pytorch
|
9 |
+
本项目主要自定义了tokenization_chinesebert_fast.py文件中的ChineseBertTokenizerFast代码。从而可以从huggingface.co调用。
|
|
|
|
|
10 |
```python
|
11 |
+
pretrained_tokenizer_name = "junnyu/ChineseBERT-base"
|
12 |
+
tokenizer = ChineseBertTokenizerFast.from_pretrained(pretrained_tokenizer_name)
|
13 |
```
|
14 |
|
15 |
# Paper
|
|
|
18 |
|
19 |
# Install
|
20 |
```bash
|
21 |
+
pip install chinesebert
|
22 |
or
|
23 |
+
pip install git+https://github.com/JunnYu/ChineseBert_pytorch.git
|
24 |
```
|
25 |
|
26 |
# Usage
|
27 |
```python
|
28 |
import torch
|
29 |
+
from transformers import BertConfig as ChineseBertConfig
|
30 |
+
from chinesebert import ChineseBertForMaskedLM, ChineseBertTokenizerFast
|
31 |
|
|
|
|
|
|
|
32 |
pretrained_tokenizer_name = "junnyu/ChineseBERT-large"
|
33 |
pretrained_model_name = "ShannonAI/ChineseBERT-large"
|
34 |
|
35 |
+
tokenizer = ChineseBertTokenizerFast.from_pretrained(pretrained_tokenizer_name)
|
36 |
+
config = ChineseBertConfig.from_pretrained(pretrained_tokenizer_name)
|
37 |
+
chinese_bert = ChineseBertForMaskedLM.from_pretrained(pretrained_model_name, config=config)
|
|
|
|
|
38 |
|
39 |
text = "北京是[MASK]国的首都。"
|
40 |
inputs = tokenizer(text, return_tensors="pt")
|
41 |
print(inputs)
|
42 |
maskpos = 4
|
43 |
+
|
44 |
with torch.no_grad():
|
45 |
o = chinese_bert(**inputs)
|
46 |
value, index = o.logits.softmax(-1)[0, maskpos].topk(10)
|