Edit model card

Example Usage

import torch
import jieba
import numpy as np
from classifier import BertForMaskClassification
from transformers import AutoTokenizer, AutoConfig, BertForTokenClassification

label_list = ["O","COMMA","PERIOD","COLON"]

label2punct = {
    "COMMA": ",",
    "PERIOD": "。",
    "COLON":":",
}

model_name_or_path = "pmp-h312"

tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
model = BertForMaskClassification.from_pretrained(model_name_or_path)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

def punct(text):

    tokenize_words = jieba.lcut(''.join(text))
    mask_tokens = []
    for word in tokenize_words:
        mask_tokens.extend(word)
        mask_tokens.append("[MASK]")
    tokenized_inputs = tokenizer(mask_tokens,is_split_into_words=True, return_tensors="pt")
    with torch.no_grad():   
        logits = model(**tokenized_inputs).logits
    predictions = logits.argmax(-1).tolist()
    predictions = predictions[0]
    tokens = tokenizer.convert_ids_to_tokens(tokenized_inputs["input_ids"][0])

    result =[]
    print(tokens)
    print(predictions)
    for token, prediction in zip(tokens, predictions):
        if token =="[CLS]" or token =="[SEP]":
            continue
        if token == "[MASK]":
            label = label_list[prediction]
            if label != "O":
                punct = label2punct[label]
                result.append(punct)
        else:
            result.append(token)

    return "".join(result)

text = '肝浊音界正常肝上界位于锁骨中线第五肋间移动浊音阴性肾区无叩痛'
print(punct(text))

# 肝浊音界正常,肝上界位于锁骨中线第五肋间,移动浊音阴性,肾区无叩痛。

Acknowledgments

This work was in part supported by Shenzhen Science and Technology Program (No:JCYJ20210324135809025).

Citations

Coming Soon

License

MIT

Downloads last month
2
Inference API
This model does not have enough activity to be deployed to Inference API (serverless) yet. Increase its social visibility and check back later, or deploy to Inference Endpoints (dedicated) instead.

Model tree for rickltt/pmp-h312

Finetuned
rickltt/pmp-h256
Finetuned
this model