shivakerur99 commited on
Commit
bd059eb
1 Parent(s): d5c7129

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +11 -14
main.py CHANGED
@@ -9,16 +9,15 @@ from textblob import TextBlob
9
 
10
  import whisperx
11
  import gc
12
- import openai
13
- import time
14
-
15
 
16
- import spacy
17
 
18
- # Download the model
19
- spacy.cli.download("en_core_web_sm")
20
-
21
- nlp = spacy.load("en_core_web_sm")
 
 
 
22
 
23
  openai.api_key = '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'
24
 
@@ -90,13 +89,11 @@ def analyze_conversation_sentiment(conversation):
90
  def parse_conversation(content):
91
  return content.strip().split('\n')
92
 
 
93
  def extract_active_words(text):
94
- # Process the text with spaCy
95
- doc = nlp(text)
96
-
97
- # Extract tokens that are not stopwords
98
- active_words = [re.sub(r'[^\w\s]', '', token.text) for token in doc if not token.is_stop and not token.is_digit and not token.is_punct and token.text != "Speaker"]
99
-
100
  return active_words
101
 
102
 
 
9
 
10
  import whisperx
11
  import gc
 
 
 
12
 
 
13
 
14
+ import nltk
15
+ from nltk.tokenize import word_tokenize
16
+ from nltk.corpus import stopwords
17
+ import openai
18
+ import time
19
+ nltk.download('punkt')
20
+ nltk.download('stopwords')
21
 
22
  openai.api_key = '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'
23
 
 
89
  def parse_conversation(content):
90
  return content.strip().split('\n')
91
 
92
+
93
  def extract_active_words(text):
94
+ tokens = word_tokenize(text)
95
+ stop_words = set(stopwords.words('english'))
96
+ active_words = [word for word in tokens if word.isalnum() and word.lower() not in stop_words]
 
 
 
97
  return active_words
98
 
99