File size: 625 Bytes
751936e
 
 
f4973d4
 
751936e
 
 
 
 
 
 
 
 
 
 
f4973d4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34





def is_chinese(uchar):
    """
    https://github.com/fxsjy/jieba/blob/master/jieba/__init__.py#L48
    re.compile("([\u4E00-\u9FD5]+)", re.U)
    """
    return u'\u4e00' <= uchar <= u'\u9fa5'



def has_chinese(text):
    """ contains Chinese characters """
    return any(is_chinese(ch) for ch in text)


def get_zh_count(text):
    return sum([is_chinese(uchar) for uchar in text])


def is_all_chinese(text):
    return all(is_chinese(char) for char in text)


def get_digit_count(text):
    digit_count = 0
    for char in text:
        if char in "0123456789":
            digit_count += 1
    return digit_count