File size: 1,495 Bytes
210facd
 
d7485e8
 
8dcb3a7
 
 
 
 
 
 
 
 
 
 
 
 
 
d7485e8
210facd
 
 
 
 
 
 
d7485e8
 
210facd
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
from rake_nltk import Rake
import re

# Define a list of obligation words
obligation_words = [
    # English words
    "must", "will", "use", "may", "provides", 'is obliged to', 
    'has to', 'needs to', 'is required to', "shall", 
    "should", "ought to", "required", "obligated", "duty", 
    "agrees to", "has a duty to", "is expected to", "commits to",
    
    # German words 
    "muss", "wird", "nutzen", "darf", "stellt bereit", "ist verpflichtet", 
    "muss", "muss", "muss", "ist erforderlich", "soll", 
    "sollte", "sollte", "erforderlich", "verpflichtet", "Pflicht", 
    "stimmt zu", "hat die Pflicht", "wird erwartet", "verpflichtet sich"
]


def extract_sentences_with_obligations(text):
    # Initialize Rake with stopwords set to None (to keep all words)
    rake = Rake()

    # Split the text into sentences
    sentences = re.split(r'(?<=[.!?])\s+', text)

    # Initialize a list to store sentences with obligation words
    obligation_sentences = []

    # Iterate through the sentences
    for sentence in sentences:
        # Extract keyphrases from the sentence
        rake.extract_keywords_from_text(sentence)
        
        # Get the ranked keyphrases
        ranked_keyphrases = rake.get_ranked_phrases()

        # Check if any of the ranked keyphrases contain obligation words
        if any(any(word in kp.lower() for word in obligation_words) for kp in ranked_keyphrases):
            obligation_sentences.append(sentence)

    return obligation_sentences