File size: 1,352 Bytes
751936e
814ee6b
751936e
814ee6b
751936e
 
 
 
 
 
 
814ee6b
751936e
814ee6b
f4973d4
 
 
814ee6b
 
 
 
 
 
 
 
 
f4973d4
 
814ee6b
 
 
 
 
 
 
 
 
 
f4973d4
 
 
 
 
 
 
 
814ee6b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78

from zhon.hanzi import punctuation as zh_punc

def is_zh_char(uchar):
    """
    https://github.com/fxsjy/jieba/blob/master/jieba/__init__.py#L48
    re.compile("([\u4E00-\u9FD5]+)", re.U)
    """
    return u'\u4e00' <= uchar <= u'\u9fa5'


def has_zh(text):
    """ contains Chinese characters """
    return any(is_zh_char(ch) for ch in text)


def get_zh_count(text):
    return sum([is_zh_char(uchar) for uchar in text])


def is_all_zh(text):
    return all(is_zh_char(char) for char in text)


def is_all_en(text):
    return text.encode('utf-8').isalpha()


def is_digit_char(uchar):
    return uchar in "0123456789"


def has_digit(text):
    return any(is_digit_char(ch) for ch in text)


def is_all_digit(text):
    return all(is_digit_char(char) for char in text)


def get_digit_count(text):
    digit_count = 0
    for char in text:
        if char in "0123456789":
            digit_count += 1
    return digit_count



def has_zh_punc(text):
    """
    是否包含中文标点
    """
    return any(ch in zh_punc for ch in text)



def is_space_char(uchar):
    """
    https://emptycharacter.com/


    """


def has_space(text):
    pass

def is_all_space(text):
    pass

def get_space_count(text):
    space_count = 0
    for char in text:
        if len(char.strip()) == 0:
            space_count += 1
    return space_count