Spaces:
Runtime error
Runtime error
File size: 1,495 Bytes
210facd d7485e8 8dcb3a7 d7485e8 210facd d7485e8 210facd |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 |
from rake_nltk import Rake
import re
# Define a list of obligation words
obligation_words = [
# English words
"must", "will", "use", "may", "provides", 'is obliged to',
'has to', 'needs to', 'is required to', "shall",
"should", "ought to", "required", "obligated", "duty",
"agrees to", "has a duty to", "is expected to", "commits to",
# German words
"muss", "wird", "nutzen", "darf", "stellt bereit", "ist verpflichtet",
"muss", "muss", "muss", "ist erforderlich", "soll",
"sollte", "sollte", "erforderlich", "verpflichtet", "Pflicht",
"stimmt zu", "hat die Pflicht", "wird erwartet", "verpflichtet sich"
]
def extract_sentences_with_obligations(text):
# Initialize Rake with stopwords set to None (to keep all words)
rake = Rake()
# Split the text into sentences
sentences = re.split(r'(?<=[.!?])\s+', text)
# Initialize a list to store sentences with obligation words
obligation_sentences = []
# Iterate through the sentences
for sentence in sentences:
# Extract keyphrases from the sentence
rake.extract_keywords_from_text(sentence)
# Get the ranked keyphrases
ranked_keyphrases = rake.get_ranked_phrases()
# Check if any of the ranked keyphrases contain obligation words
if any(any(word in kp.lower() for word in obligation_words) for kp in ranked_keyphrases):
obligation_sentences.append(sentence)
return obligation_sentences |