ToS-Summarization / keyphrase_extraction.py
EE21's picture
Update keyphrase_extraction.py
f873e82
raw
history blame
No virus
964 Bytes
!pip install https://huggingface.co/spacy/en_core_web_sm/resolve/main/en_core_web_sm-any-py3-none-any.wh
import spacy
# Load the English language model
nlp = spacy.load("en_core_web_sm")
# Define a list of obligation words
obligation_words = ["must", "will", "use", "may", "provides", 'is obliged to',
'has to', 'needs to', 'is required to',
"shall", "should", "ought to", "required", "obligated", "duty"]
def extract_keyphrase(text):
# Parse the input text with SpaCy
doc = nlp(text)
# Initialize a list to store sentences with obligation words
obligation_sentences = []
# Iterate through the sentences in the document
for sentence in doc.sents:
# Check if any of the obligation words appear in the sentence
if any(word.text.lower() in obligation_words for word in sentence):
obligation_sentences.append(sentence.text)
return obligation_sentences