File size: 585 Bytes
576d564 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 |
import re
def split_sentences(text):
"""Given a string, split it into sentences.
:param text: The string to be processed.
:return: The list of split sentences.
"""
processed_text = re.split(
"(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<=[.?!])\s\n*|(?<=[^A-zA-z0-90-9 ].)(?<=[。..??!!])(?![\.」])\n*", text)
# processed_text = re.split("(? <=[。??!!])") # In case only a simple regex is necessary
processed_text = [x.strip() for x in processed_text]
processed_text = [x for x in processed_text if x != '']
return processed_text
|