File size: 585 Bytes
576d564
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
import re


def split_sentences(text):
    """Given a string, split it into sentences.

    :param text: The string to be processed.
    :return: The list of split sentences.
    """
    processed_text = re.split(
        "(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<=[.?!])\s\n*|(?<=[^A-zA-z0-90-9 ].)(?<=[。..??!!])(?![\.」])\n*", text)
    # processed_text = re.split("(? <=[。??!!])")  # In case only a simple regex is necessary
    processed_text = [x.strip() for x in processed_text]
    processed_text = [x for x in processed_text if x != '']
    return processed_text