ByteBlaze commited on
Commit
25842ad
1 Parent(s): abd5914

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -1
app.py CHANGED
@@ -1,13 +1,26 @@
1
  import gradio as gr
2
- from sklearn.feature_extraction.text import CountVectorizer
3
  from sklearn.feature_extraction.text import TfidfVectorizer
4
  import joblib
 
 
5
 
6
  # import warnings
7
  # from sklearn.exceptions import InconsistentVersionWarning
8
  # warnings.filterwarnings("ignore", category=InconsistentVersionWarning)
9
 
 
10
 
 
 
 
 
 
 
 
 
 
 
11
  vectorizer = joblib.load('./vectorizer.pkl')
12
  nb_classifier = joblib.load('./nb_classifier.pkl')
13
  tfidf_vectorizer = joblib.load('./tfidf_vectorizer.pkl')
@@ -16,6 +29,7 @@ random_forest = joblib.load('./random_forest.pkl')
16
  def classify(text,choice):
17
  corpus=[text]
18
  if(choice == 1):
 
19
  features = vectorizer.transform(corpus).toarray()
20
  prediction = nb_classifier.predict(features)
21
  elif(choice == 2):
 
1
  import gradio as gr
2
+ from sklearn.feature_extraction.text import CountVectorizer
3
  from sklearn.feature_extraction.text import TfidfVectorizer
4
  import joblib
5
+ from nltk.stem.porter import PorterStemmer
6
+ import re
7
 
8
  # import warnings
9
  # from sklearn.exceptions import InconsistentVersionWarning
10
  # warnings.filterwarnings("ignore", category=InconsistentVersionWarning)
11
 
12
+ ps = PorterStemmer()
13
 
14
+ def preprocess_for_bow(text):
15
+ corpus = []
16
+ text = re.sub('[^a-zA-Z0-9$£€¥%]',' ',text)
17
+ text = text.lower()
18
+ text = text.split()
19
+ text = [ps.stem(t) for t in text if t not in stopwords.words('english')]
20
+ corpus.append(' '.join(text))
21
+
22
+ return corpus
23
+
24
  vectorizer = joblib.load('./vectorizer.pkl')
25
  nb_classifier = joblib.load('./nb_classifier.pkl')
26
  tfidf_vectorizer = joblib.load('./tfidf_vectorizer.pkl')
 
29
  def classify(text,choice):
30
  corpus=[text]
31
  if(choice == 1):
32
+ corpus = preprocess_for_bow(text)
33
  features = vectorizer.transform(corpus).toarray()
34
  prediction = nb_classifier.predict(features)
35
  elif(choice == 2):