Spaces:
Sleeping
Sleeping
shivakerur99
commited on
Commit
•
bd059eb
1
Parent(s):
d5c7129
Update main.py
Browse files
main.py
CHANGED
@@ -9,16 +9,15 @@ from textblob import TextBlob
|
|
9 |
|
10 |
import whisperx
|
11 |
import gc
|
12 |
-
import openai
|
13 |
-
import time
|
14 |
-
|
15 |
|
16 |
-
import spacy
|
17 |
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
|
|
|
|
|
|
22 |
|
23 |
openai.api_key = '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'
|
24 |
|
@@ -90,13 +89,11 @@ def analyze_conversation_sentiment(conversation):
|
|
90 |
def parse_conversation(content):
|
91 |
return content.strip().split('\n')
|
92 |
|
|
|
93 |
def extract_active_words(text):
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
# Extract tokens that are not stopwords
|
98 |
-
active_words = [re.sub(r'[^\w\s]', '', token.text) for token in doc if not token.is_stop and not token.is_digit and not token.is_punct and token.text != "Speaker"]
|
99 |
-
|
100 |
return active_words
|
101 |
|
102 |
|
|
|
9 |
|
10 |
import whisperx
|
11 |
import gc
|
|
|
|
|
|
|
12 |
|
|
|
13 |
|
14 |
+
import nltk
|
15 |
+
from nltk.tokenize import word_tokenize
|
16 |
+
from nltk.corpus import stopwords
|
17 |
+
import openai
|
18 |
+
import time
|
19 |
+
nltk.download('punkt')
|
20 |
+
nltk.download('stopwords')
|
21 |
|
22 |
openai.api_key = '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'
|
23 |
|
|
|
89 |
def parse_conversation(content):
|
90 |
return content.strip().split('\n')
|
91 |
|
92 |
+
|
93 |
def extract_active_words(text):
|
94 |
+
tokens = word_tokenize(text)
|
95 |
+
stop_words = set(stopwords.words('english'))
|
96 |
+
active_words = [word for word in tokens if word.isalnum() and word.lower() not in stop_words]
|
|
|
|
|
|
|
97 |
return active_words
|
98 |
|
99 |
|