Spaces:

shivakerur99
/

alindor_grandmaster

Sleeping

shivakerur99 commited on Mar 26

Commit

bd059eb

•

1 Parent(s): d5c7129

Update main.py

Files changed (1) hide show

main.py CHANGED Viewed

@@ -9,16 +9,15 @@ from textblob import TextBlob
 import whisperx
 import gc
-import openai
-import time
-import spacy
-# Download the model
-spacy.cli.download("en_core_web_sm")
-nlp = spacy.load("en_core_web_sm")
 openai.api_key = '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'
@@ -90,13 +89,11 @@ def analyze_conversation_sentiment(conversation):
 def parse_conversation(content):
     return content.strip().split('\n')
 def extract_active_words(text):
-    # Process the text with spaCy
-    doc = nlp(text)
-    # Extract tokens that are not stopwords
-    active_words = [re.sub(r'[^\w\s]', '', token.text) for token in doc if not token.is_stop and not token.is_digit and not token.is_punct and token.text != "Speaker"]
     return active_words

 import whisperx
 import gc
+import nltk
+from nltk.tokenize import word_tokenize
+from nltk.corpus import stopwords
+import openai
+import time
+nltk.download('punkt')
+nltk.download('stopwords')
 openai.api_key = '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'
 def parse_conversation(content):
     return content.strip().split('\n')
 def extract_active_words(text):
+    tokens = word_tokenize(text)
+    stop_words = set(stopwords.words('english'))
+    active_words = [word for word in tokens if word.isalnum() and word.lower() not in stop_words]
     return active_words