File size: 791 Bytes
f25c867
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
#!/usr/bin/python3
# -*- coding: utf-8 -*-
import argparse

import spacy

from project_settings import project_path


def get_args():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--txt_file",
        default=(project_path / "data/e_book/confucianism/the_analects.txt").as_posix(),
        type=str
    )
    args = parser.parse_args()
    return args


def main():
    args = get_args()

    with open(args.txt_file, "r", encoding="utf-8") as f:
        data = f.read()

    # print(data)

    nlp = spacy.load("zh_core_web_sm")

    doc = nlp(data)

    for sentence in doc.sents:
        text = sentence.text.strip()
        if len(text) == 0:
            continue
        print(text)
        print("-" * 150)
    return


if __name__ == '__main__':
    main()