Spaces:
Sleeping
Sleeping
Added preprocessing steps to app.py.
Browse files
app.py
CHANGED
@@ -2,14 +2,38 @@
|
|
2 |
import gradio as gr
|
3 |
from sklearn.linear_model import LogisticRegression
|
4 |
import pickle5 as pickle
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
|
6 |
# file name
|
7 |
lr_filename = 'logistic_regression.pkl'
|
8 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
# Load model from pickle file
|
10 |
model = pickle.load(open(lr_filename, 'rb'))
|
11 |
|
12 |
-
|
13 |
def predict(text):
|
14 |
return model.predict([text])
|
15 |
|
|
|
2 |
import gradio as gr
|
3 |
from sklearn.linear_model import LogisticRegression
|
4 |
import pickle5 as pickle
|
5 |
+
import re
|
6 |
+
import string
|
7 |
+
import nltk
|
8 |
+
import re
|
9 |
+
from nltk.corpus import stopwords
|
10 |
+
nltk.download('stopwords')
|
11 |
+
from sklearn.feature_extraction.text import CountVectorizer
|
12 |
|
13 |
# file name
|
14 |
lr_filename = 'logistic_regression.pkl'
|
15 |
|
16 |
+
# Process input text, including removing stopwords, converting to lowercase, and removing punctuation
|
17 |
+
def process_text(text):
|
18 |
+
text = stopwords.words('english')
|
19 |
+
text = str(text).lower()
|
20 |
+
text = re.sub(
|
21 |
+
f"[{re.escape(string.punctuation)}]", " ", text
|
22 |
+
)
|
23 |
+
text = " ".join(text.split())
|
24 |
+
return text
|
25 |
+
|
26 |
+
# Vectorize input text
|
27 |
+
vectorizer = CountVectorizer()
|
28 |
+
def vectorize_text(text):
|
29 |
+
text = process_text(text)
|
30 |
+
text = [text]
|
31 |
+
return vectorizer.transform(text)
|
32 |
+
|
33 |
# Load model from pickle file
|
34 |
model = pickle.load(open(lr_filename, 'rb'))
|
35 |
|
36 |
+
|
37 |
def predict(text):
|
38 |
return model.predict([text])
|
39 |
|