cbdb commited on
Commit
57767ac
·
1 Parent(s): c2ab321

Update README file

Browse files
Files changed (1) hide show
  1. README.md +19 -7
README.md CHANGED
@@ -34,7 +34,7 @@ Note that this model is primiarly aimed at predicting whether a Classical Chines
34
  Note that this model is primiarly aimed at predicting whether a Classical Chinese sentence is a letter title (书信标题) or not.
35
 
36
  Here is how to use this model to get the features of a given text in PyTorch:
37
-
38
  ```python
39
  from transformers import BertTokenizer
40
  from transformers import BertForSequenceClassification
@@ -42,15 +42,19 @@ import torch
42
  from numpy import exp
43
 
44
  tokenizer = BertTokenizer.from_pretrained('bert-base-chinese')
45
- model_path = '/content/drive/MyDrive/CBDB/Letter_Classifier/model/letter_classifer_epoch2' # here
46
- model = BertForSequenceClassification.from_pretrained(model_path,
47
  output_attentions=False,
48
  output_hidden_states=False)
 
49
 
50
- def softmax(vector):
51
- e = exp(vector)
52
- return e / e.sum()
53
 
 
 
 
 
54
  def predict_class(test_sen):
55
  tokens_test = tokenizer.encode_plus(
56
  test_sen,
@@ -74,11 +78,19 @@ def predict_class(test_sen):
74
  pred_class_dict = {k:v for k, v in zip(label2idx.keys(), softmax_score[0])}
75
  return pred_class_dict
76
 
77
- max_seq_len = 512
78
  label2idx = {'not-letter': 0,'letter': 1}
79
  idx2label = {v:k for k,v in label2idx.items()}
 
80
 
 
 
81
  test_sen = '上丞相康思公書'
 
 
 
 
 
 
82
  pred_class_dict = predict_class(test_sen)
83
  print(pred_class_dict)
84
  ```
 
34
  Note that this model is primiarly aimed at predicting whether a Classical Chinese sentence is a letter title (书信标题) or not.
35
 
36
  Here is how to use this model to get the features of a given text in PyTorch:
37
+ 1. Import model
38
  ```python
39
  from transformers import BertTokenizer
40
  from transformers import BertForSequenceClassification
 
42
  from numpy import exp
43
 
44
  tokenizer = BertTokenizer.from_pretrained('bert-base-chinese')
45
+ model = BertForSequenceClassification.from_pretrained('cbdb/ClassicalChineseLetterClassification',
 
46
  output_attentions=False,
47
  output_hidden_states=False)
48
+ ```
49
 
50
+ 2. Make a prediction
51
+ ```python
52
+ max_seq_len = 512
53
 
54
+ def softmax(vector):
55
+ e = exp(vector)
56
+ return e / e.sum()
57
+
58
  def predict_class(test_sen):
59
  tokens_test = tokenizer.encode_plus(
60
  test_sen,
 
78
  pred_class_dict = {k:v for k, v in zip(label2idx.keys(), softmax_score[0])}
79
  return pred_class_dict
80
 
 
81
  label2idx = {'not-letter': 0,'letter': 1}
82
  idx2label = {v:k for k,v in label2idx.items()}
83
+ ```
84
 
85
+ 3. Change your sentence here
86
+ ```python
87
  test_sen = '上丞相康思公書'
88
+ pred_class_proba = predict_class(test_sen)
89
+ print(f'The predicted probability for the {list(pred_class_proba.keys())[0]} class: {list(pred_class_proba.values())[0]}')
90
+ print(f'The predicted probability for the {list(pred_class_proba.keys())[1]} class: {list(pred_class_proba.values())[1]}')
91
+
92
+ pred_class = idx2label[np.argmax(list(pred_class_proba.values()))]
93
+ print(f'The predicted class is: {pred_class}')test_sen = '上丞相康思公書'
94
  pred_class_dict = predict_class(test_sen)
95
  print(pred_class_dict)
96
  ```