Spaces:
Sleeping
Sleeping
shivakerur99
commited on
Commit
•
1387d41
1
Parent(s):
a1a449a
Update main.py
Browse files
main.py
CHANGED
@@ -6,24 +6,25 @@ from fastapi.middleware.cors import CORSMiddleware
|
|
6 |
from sqlalchemy import create_engine, MetaData, Table, Column, Integer, String
|
7 |
from databases import Database
|
8 |
from textblob import TextBlob
|
9 |
-
import os
|
10 |
|
11 |
import whisperx
|
12 |
import gc
|
13 |
-
import re
|
14 |
-
import openai
|
15 |
-
import time
|
16 |
|
17 |
-
import spacy
|
18 |
-
|
19 |
-
# Load the English tokenizer, tagger, parser, NER, and word vectors
|
20 |
-
nlp = spacy.load("en_core_web_sm")
|
21 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
|
23 |
openai.api_key = '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'
|
24 |
|
25 |
app = FastAPI()
|
26 |
|
|
|
|
|
27 |
import requests
|
28 |
import json
|
29 |
|
@@ -90,12 +91,9 @@ def parse_conversation(content):
|
|
90 |
|
91 |
|
92 |
def extract_active_words(text):
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
# Extract tokens that are not stopwords
|
97 |
-
active_words = [re.sub(r'[^\w\s]', '', token.text) for token in doc if not token.is_stop and not token.is_digit and not token.is_punct and token.text != "Speaker"]
|
98 |
-
|
99 |
return active_words
|
100 |
|
101 |
|
|
|
6 |
from sqlalchemy import create_engine, MetaData, Table, Column, Integer, String
|
7 |
from databases import Database
|
8 |
from textblob import TextBlob
|
|
|
9 |
|
10 |
import whisperx
|
11 |
import gc
|
|
|
|
|
|
|
12 |
|
|
|
|
|
|
|
|
|
13 |
|
14 |
+
import nltk
|
15 |
+
from nltk.tokenize import word_tokenize
|
16 |
+
from nltk.corpus import stopwords
|
17 |
+
import openai
|
18 |
+
import time
|
19 |
+
nltk.download('punkt')
|
20 |
+
nltk.download('stopwords')
|
21 |
|
22 |
openai.api_key = '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'
|
23 |
|
24 |
app = FastAPI()
|
25 |
|
26 |
+
import os
|
27 |
+
|
28 |
import requests
|
29 |
import json
|
30 |
|
|
|
91 |
|
92 |
|
93 |
def extract_active_words(text):
|
94 |
+
tokens = word_tokenize(text)
|
95 |
+
stop_words = set(stopwords.words('english'))
|
96 |
+
active_words = [word for word in tokens if word.isalnum() and word.lower() not in stop_words]
|
|
|
|
|
|
|
97 |
return active_words
|
98 |
|
99 |
|