from constituent_treelib import ConstituentTree | |
# First, we have to provide a sentence that should be parsed | |
sentence = "I've got a machine learning task involving a large amount of text data." | |
# Then, we define the language that should be considered with respect to the underlying models | |
language = ConstituentTree.Language.English | |
# You can also specify the desired model for the language ("Small" is selected by default) | |
spacy_model_size = ConstituentTree.SpacyModelSize.Medium | |
# Next, we must create the neccesary NLP pipeline. | |
# If you wish, you can instruct the library to download and install the models automatically | |
nlp = ConstituentTree.create_pipeline(language, spacy_model_size) # , download_models=True | |
# Now, we can instantiate a ConstituentTree object and pass it the sentence and the NLP pipeline | |
tree = ConstituentTree(sentence, nlp) | |
# Finally, we can extract the phrases | |
tree.extract_all_phrases() |