import re from MHGTagger.rnn_annotate import annotate from Tagset_Mappings.tag_mapping import map_tags from parsing.src.parse import run_parse from nltk import word_tokenize def parse_text(text): tokens = tokenize(text) tokens, tags, probs = annotate(tokens) tags = map_tags(tags) parse_tree = run_parse(tokens, tags)[0] return tokens, tags, probs, parse_tree def tokenize(text: str): text = re.sub(r'\s*([.,;:?!"])\s', r' \1 ', text) text = re.sub(r'\s*([.,;:?!"]) ', r' \1 ', text) tokens = word_tokenize(text) return tokens