curtpond commited on
Commit
0f34ca3
1 Parent(s): 8d08fb9

Added preprocessing steps to app.py.

Browse files
Files changed (1) hide show
  1. app.py +25 -1
app.py CHANGED
@@ -2,14 +2,38 @@
2
  import gradio as gr
3
  from sklearn.linear_model import LogisticRegression
4
  import pickle5 as pickle
 
 
 
 
 
 
 
5
 
6
  # file name
7
  lr_filename = 'logistic_regression.pkl'
8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  # Load model from pickle file
10
  model = pickle.load(open(lr_filename, 'rb'))
11
 
12
- # Define function to make a prediction with the model
13
  def predict(text):
14
  return model.predict([text])
15
 
 
2
  import gradio as gr
3
  from sklearn.linear_model import LogisticRegression
4
  import pickle5 as pickle
5
+ import re
6
+ import string
7
+ import nltk
8
+ import re
9
+ from nltk.corpus import stopwords
10
+ nltk.download('stopwords')
11
+ from sklearn.feature_extraction.text import CountVectorizer
12
 
13
  # file name
14
  lr_filename = 'logistic_regression.pkl'
15
 
16
+ # Process input text, including removing stopwords, converting to lowercase, and removing punctuation
17
+ def process_text(text):
18
+ text = stopwords.words('english')
19
+ text = str(text).lower()
20
+ text = re.sub(
21
+ f"[{re.escape(string.punctuation)}]", " ", text
22
+ )
23
+ text = " ".join(text.split())
24
+ return text
25
+
26
+ # Vectorize input text
27
+ vectorizer = CountVectorizer()
28
+ def vectorize_text(text):
29
+ text = process_text(text)
30
+ text = [text]
31
+ return vectorizer.transform(text)
32
+
33
  # Load model from pickle file
34
  model = pickle.load(open(lr_filename, 'rb'))
35
 
36
+
37
  def predict(text):
38
  return model.predict([text])
39