Update README.md
Browse files
README.md
CHANGED
@@ -30,7 +30,7 @@ class LongCiteModel:
|
|
30 |
def text_split_by_punctuation(original_text, return_dict=False):
|
31 |
# text = re.sub(r'([a-z])\.([A-Z])', r'\1. \2', original_text) # separate period without space
|
32 |
text = original_text
|
33 |
-
custom_sent_tokenizer = PunktSentenceTokenizer(
|
34 |
punctuations = r"([。;!?])" # For Chinese support
|
35 |
|
36 |
separated = custom_sent_tokenizer.tokenize(text)
|
|
|
30 |
def text_split_by_punctuation(original_text, return_dict=False):
|
31 |
# text = re.sub(r'([a-z])\.([A-Z])', r'\1. \2', original_text) # separate period without space
|
32 |
text = original_text
|
33 |
+
custom_sent_tokenizer = PunktSentenceTokenizer()
|
34 |
punctuations = r"([。;!?])" # For Chinese support
|
35 |
|
36 |
separated = custom_sent_tokenizer.tokenize(text)
|