File size: 1,667 Bytes
974e1e6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 |
#!/usr/bin/python3
# -*- coding: utf-8 -*-
import argparse
import os
import warnings
import time
warnings.simplefilter(action='ignore', category=FutureWarning)
from project_settings import project_path
os.environ["STANZA_RESOURCES_DIR"] = (project_path / "data/stanza_resources").as_posix()
import stanza
def get_args():
parser = argparse.ArgumentParser()
parser.add_argument(
"--text",
type=str,
default="Mr. Honey Tian. How are you."
)
parser.add_argument(
"--language",
type=str,
default="en"
)
args = parser.parse_args()
return args
# https://huggingface.co/stanfordnlp
languages = [
"ru", "pl", "cs", "hi", "fr", "es", "en", "de", "ca", "zh-hant", "zh-hans",
"xcl", "wo", "vi", "ur", "uk", "ug", "tr", "th", "te", "ta", "sv", "sr", "sme",
"sl", "sk", "si", "sd", "sa", "ro", "qtd", "qpm", "qaf", "pt", "pcm", "orv",
"nn", "nl", "nb", "myv", "my", "multilingual", "mt", "mr", "ml", "lzh", "lv",
"lt", "lij", "la", "ky", "ko", "kmr", "kk", "ja", "it", "is", "id", "hyw", "hy",
"hu", "hsb", "hr", "he", "hbo", "gv", "grc", "got", "gl", "fro", "fo", "fi", "fa",
"eu", "et", "el", "da", "cy", "cu", "cop", "bxr", "hn", "bg", "be", "ar", "ang",
"af", "swl", "no"
]
def main():
args = get_args()
stanza.download(args.language)
stanza_nlp = stanza.Pipeline(args.language)
begin_time = time.time()
doc = stanza_nlp(args.text)
sentences = [sentence.text for sentence in doc.sentences]
cost = time.time() - begin_time
print(f"time cost: {cost}")
print(sentences)
return
if __name__ == "__main__":
main()
|