Update README file

Browse files

Files changed (1) hide show

README.md +19 -7

README.md CHANGED Viewed

@@ -34,7 +34,7 @@ Note that this model is primiarly aimed at predicting whether a Classical Chines
 Note that this model is primiarly aimed at predicting whether a Classical Chinese sentence is a letter title (书信标题) or not.
 Here is how to use this model to get the features of a given text in PyTorch:
 ```python
 from transformers import BertTokenizer
 from transformers import BertForSequenceClassification
@@ -42,15 +42,19 @@ import torch
 from numpy import exp
 tokenizer = BertTokenizer.from_pretrained('bert-base-chinese')
-model_path = '/content/drive/MyDrive/CBDB/Letter_Classifier/model/letter_classifer_epoch2' # here
-model = BertForSequenceClassification.from_pretrained(model_path,
                                                      output_attentions=False,
                                                      output_hidden_states=False)
-def softmax(vector):
-  e = exp(vector)
-  return e / e.sum()
 def predict_class(test_sen):
   tokens_test = tokenizer.encode_plus(
       test_sen,
@@ -74,11 +78,19 @@ def predict_class(test_sen):
   pred_class_dict = {k:v for k, v in zip(label2idx.keys(), softmax_score[0])}
   return pred_class_dict
-max_seq_len = 512
 label2idx = {'not-letter': 0,'letter': 1}
 idx2label = {v:k for k,v in label2idx.items()}
 test_sen = '上丞相康思公書'
 pred_class_dict = predict_class(test_sen)
 print(pred_class_dict)
 ```

 Note that this model is primiarly aimed at predicting whether a Classical Chinese sentence is a letter title (书信标题) or not.
 Here is how to use this model to get the features of a given text in PyTorch:
+1. Import model
 ```python
 from transformers import BertTokenizer
 from transformers import BertForSequenceClassification
 from numpy import exp
 tokenizer = BertTokenizer.from_pretrained('bert-base-chinese')
+model = BertForSequenceClassification.from_pretrained('cbdb/ClassicalChineseLetterClassification',
                                                      output_attentions=False,
                                                      output_hidden_states=False)
+```
+2. Make a prediction
+```python
+max_seq_len = 512
+def softmax(vector):
+	e = exp(vector)
+	return e / e.sum()
 def predict_class(test_sen):
   tokens_test = tokenizer.encode_plus(
       test_sen,
   pred_class_dict = {k:v for k, v in zip(label2idx.keys(), softmax_score[0])}
   return pred_class_dict
 label2idx = {'not-letter': 0,'letter': 1}
 idx2label = {v:k for k,v in label2idx.items()}
+```
+3. Change your sentence here
+```python
 test_sen = '上丞相康思公書'
+pred_class_proba = predict_class(test_sen)
+print(f'The predicted probability for the {list(pred_class_proba.keys())[0]} class: {list(pred_class_proba.values())[0]}')
+print(f'The predicted probability for the {list(pred_class_proba.keys())[1]} class: {list(pred_class_proba.values())[1]}')
+pred_class = idx2label[np.argmax(list(pred_class_proba.values()))]
+print(f'The predicted class is: {pred_class}')test_sen = '上丞相康思公書'
 pred_class_dict = predict_class(test_sen)
 print(pred_class_dict)
 ```