Update README file
Browse files
README.md
CHANGED
@@ -34,7 +34,7 @@ Note that this model is primiarly aimed at predicting whether a Classical Chines
|
|
34 |
Note that this model is primiarly aimed at predicting whether a Classical Chinese sentence is a letter title (书信标题) or not.
|
35 |
|
36 |
Here is how to use this model to get the features of a given text in PyTorch:
|
37 |
-
|
38 |
```python
|
39 |
from transformers import BertTokenizer
|
40 |
from transformers import BertForSequenceClassification
|
@@ -42,15 +42,19 @@ import torch
|
|
42 |
from numpy import exp
|
43 |
|
44 |
tokenizer = BertTokenizer.from_pretrained('bert-base-chinese')
|
45 |
-
|
46 |
-
model = BertForSequenceClassification.from_pretrained(model_path,
|
47 |
output_attentions=False,
|
48 |
output_hidden_states=False)
|
|
|
49 |
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
|
|
|
|
|
|
|
|
|
54 |
def predict_class(test_sen):
|
55 |
tokens_test = tokenizer.encode_plus(
|
56 |
test_sen,
|
@@ -74,11 +78,19 @@ def predict_class(test_sen):
|
|
74 |
pred_class_dict = {k:v for k, v in zip(label2idx.keys(), softmax_score[0])}
|
75 |
return pred_class_dict
|
76 |
|
77 |
-
max_seq_len = 512
|
78 |
label2idx = {'not-letter': 0,'letter': 1}
|
79 |
idx2label = {v:k for k,v in label2idx.items()}
|
|
|
80 |
|
|
|
|
|
81 |
test_sen = '上丞相康思公書'
|
|
|
|
|
|
|
|
|
|
|
|
|
82 |
pred_class_dict = predict_class(test_sen)
|
83 |
print(pred_class_dict)
|
84 |
```
|
|
|
34 |
Note that this model is primiarly aimed at predicting whether a Classical Chinese sentence is a letter title (书信标题) or not.
|
35 |
|
36 |
Here is how to use this model to get the features of a given text in PyTorch:
|
37 |
+
1. Import model
|
38 |
```python
|
39 |
from transformers import BertTokenizer
|
40 |
from transformers import BertForSequenceClassification
|
|
|
42 |
from numpy import exp
|
43 |
|
44 |
tokenizer = BertTokenizer.from_pretrained('bert-base-chinese')
|
45 |
+
model = BertForSequenceClassification.from_pretrained('cbdb/ClassicalChineseLetterClassification',
|
|
|
46 |
output_attentions=False,
|
47 |
output_hidden_states=False)
|
48 |
+
```
|
49 |
|
50 |
+
2. Make a prediction
|
51 |
+
```python
|
52 |
+
max_seq_len = 512
|
53 |
|
54 |
+
def softmax(vector):
|
55 |
+
e = exp(vector)
|
56 |
+
return e / e.sum()
|
57 |
+
|
58 |
def predict_class(test_sen):
|
59 |
tokens_test = tokenizer.encode_plus(
|
60 |
test_sen,
|
|
|
78 |
pred_class_dict = {k:v for k, v in zip(label2idx.keys(), softmax_score[0])}
|
79 |
return pred_class_dict
|
80 |
|
|
|
81 |
label2idx = {'not-letter': 0,'letter': 1}
|
82 |
idx2label = {v:k for k,v in label2idx.items()}
|
83 |
+
```
|
84 |
|
85 |
+
3. Change your sentence here
|
86 |
+
```python
|
87 |
test_sen = '上丞相康思公書'
|
88 |
+
pred_class_proba = predict_class(test_sen)
|
89 |
+
print(f'The predicted probability for the {list(pred_class_proba.keys())[0]} class: {list(pred_class_proba.values())[0]}')
|
90 |
+
print(f'The predicted probability for the {list(pred_class_proba.keys())[1]} class: {list(pred_class_proba.values())[1]}')
|
91 |
+
|
92 |
+
pred_class = idx2label[np.argmax(list(pred_class_proba.values()))]
|
93 |
+
print(f'The predicted class is: {pred_class}')test_sen = '上丞相康思公書'
|
94 |
pred_class_dict = predict_class(test_sen)
|
95 |
print(pred_class_dict)
|
96 |
```
|