#!/usr/bin/python3 # -*- coding: utf-8 -*- import argparse import os import warnings import time warnings.simplefilter(action='ignore', category=FutureWarning) from project_settings import project_path os.environ["STANZA_RESOURCES_DIR"] = (project_path / "data/stanza_resources").as_posix() import stanza def get_args(): parser = argparse.ArgumentParser() parser.add_argument( "--text", type=str, default="Mr. Honey Tian. How are you." ) parser.add_argument( "--language", type=str, default="en" ) args = parser.parse_args() return args # https://huggingface.co/stanfordnlp languages = [ "ru", "pl", "cs", "hi", "fr", "es", "en", "de", "ca", "zh-hant", "zh-hans", "xcl", "wo", "vi", "ur", "uk", "ug", "tr", "th", "te", "ta", "sv", "sr", "sme", "sl", "sk", "si", "sd", "sa", "ro", "qtd", "qpm", "qaf", "pt", "pcm", "orv", "nn", "nl", "nb", "myv", "my", "multilingual", "mt", "mr", "ml", "lzh", "lv", "lt", "lij", "la", "ky", "ko", "kmr", "kk", "ja", "it", "is", "id", "hyw", "hy", "hu", "hsb", "hr", "he", "hbo", "gv", "grc", "got", "gl", "fro", "fo", "fi", "fa", "eu", "et", "el", "da", "cy", "cu", "cop", "bxr", "hn", "bg", "be", "ar", "ang", "af", "swl", "no" ] def main(): args = get_args() stanza.download(args.language) stanza_nlp = stanza.Pipeline(args.language) begin_time = time.time() doc = stanza_nlp(args.text) sentences = [sentence.text for sentence in doc.sentences] cost = time.time() - begin_time print(f"time cost: {cost}") print(sentences) return if __name__ == "__main__": main()