EE21 commited on
Commit
210facd
1 Parent(s): f873e82

Update keyphrase_extraction.py

Browse files
Files changed (1) hide show
  1. keyphrase_extraction.py +23 -18
keyphrase_extraction.py CHANGED
@@ -1,26 +1,31 @@
1
- !pip install https://huggingface.co/spacy/en_core_web_sm/resolve/main/en_core_web_sm-any-py3-none-any.wh
2
-
3
- import spacy
4
-
5
- # Load the English language model
6
- nlp = spacy.load("en_core_web_sm")
7
 
8
  # Define a list of obligation words
9
  obligation_words = ["must", "will", "use", "may", "provides", 'is obliged to',
10
  'has to', 'needs to', 'is required to',
11
  "shall", "should", "ought to", "required", "obligated", "duty"]
12
 
13
- def extract_keyphrase(text):
14
- # Parse the input text with SpaCy
15
- doc = nlp(text)
16
-
 
 
 
17
  # Initialize a list to store sentences with obligation words
18
  obligation_sentences = []
19
-
20
- # Iterate through the sentences in the document
21
- for sentence in doc.sents:
22
- # Check if any of the obligation words appear in the sentence
23
- if any(word.text.lower() in obligation_words for word in sentence):
24
- obligation_sentences.append(sentence.text)
25
-
26
- return obligation_sentences
 
 
 
 
 
 
 
1
+ from rake_nltk import Rake
2
+ import re
 
 
 
 
3
 
4
  # Define a list of obligation words
5
  obligation_words = ["must", "will", "use", "may", "provides", 'is obliged to',
6
  'has to', 'needs to', 'is required to',
7
  "shall", "should", "ought to", "required", "obligated", "duty"]
8
 
9
+ def extract_sentences_with_obligations(text):
10
+ # Initialize Rake with stopwords set to None (to keep all words)
11
+ rake = Rake()
12
+
13
+ # Split the text into sentences
14
+ sentences = re.split(r'(?<=[.!?])\s+', text)
15
+
16
  # Initialize a list to store sentences with obligation words
17
  obligation_sentences = []
18
+
19
+ # Iterate through the sentences
20
+ for sentence in sentences:
21
+ # Extract keyphrases from the sentence
22
+ rake.extract_keywords_from_text(sentence)
23
+
24
+ # Get the ranked keyphrases
25
+ ranked_keyphrases = rake.get_ranked_phrases()
26
+
27
+ # Check if any of the ranked keyphrases contain obligation words
28
+ if any(any(word in kp.lower() for word in obligation_words) for kp in ranked_keyphrases):
29
+ obligation_sentences.append(sentence)
30
+
31
+ return obligation_sentences