from constituent_treelib import ConstituentTree # First, we have to provide a sentence that should be parsed sentence = "I've got a machine learning task involving a large amount of text data." # Then, we define the language that should be considered with respect to the underlying models language = ConstituentTree.Language.English # You can also specify the desired model for the language ("Small" is selected by default) spacy_model_size = ConstituentTree.SpacyModelSize.Medium # Next, we must create the neccesary NLP pipeline. # If you wish, you can instruct the library to download and install the models automatically nlp = ConstituentTree.create_pipeline(language, spacy_model_size) # , download_models=True # Now, we can instantiate a ConstituentTree object and pass it the sentence and the NLP pipeline tree = ConstituentTree(sentence, nlp) # Finally, we can extract the phrases tree.extract_all_phrases()