File size: 942 Bytes

8d1b471

from constituent_treelib import ConstituentTree

# First, we have to provide a sentence that should be parsed
sentence = "I've got a machine learning task involving a large amount of text data."

# Then, we define the language that should be considered with respect to the underlying models 
language = ConstituentTree.Language.English

# You can also specify the desired model for the language ("Small" is selected by default)
spacy_model_size = ConstituentTree.SpacyModelSize.Medium

# Next, we must create the neccesary NLP pipeline. 
# If you wish, you can instruct the library to download and install the models automatically
nlp = ConstituentTree.create_pipeline(language, spacy_model_size) # , download_models=True

# Now, we can instantiate a ConstituentTree object and pass it the sentence and the NLP pipeline
tree = ConstituentTree(sentence, nlp)

# Finally, we can extract the phrases
tree.extract_all_phrases()