ruslanruslanruslan commited on
Commit
186a961
1 Parent(s): 60cb352

errors fixed

Browse files
Files changed (1) hide show
  1. pages/Film reviews classifier.py +5 -2
pages/Film reviews classifier.py CHANGED
@@ -14,7 +14,10 @@ import transformers
14
  from collections import Counter
15
  from nltk.corpus import stopwords
16
  from nltk.stem import WordNetLemmatizer
17
- # stop_words = set(stopwords.words('english'))
 
 
 
18
 
19
  def preprocess_single_string(input_string: str, seq_len: int, vocab_to_int: dict):
20
  preprocessed_string = data_preprocessing(input_string)
@@ -46,7 +49,7 @@ def data_preprocessing(text: str):
46
  text = text.lower()
47
  text = re.sub('<.*?>', '', text)
48
  text = ''.join([c for c in text if c not in string.punctuation])
49
- text = [wn_lemmatizer.lemmatize(word) for word in text.split()] #if word not in stop_words]
50
  text = ' '.join(text)
51
  return text
52
 
 
14
  from collections import Counter
15
  from nltk.corpus import stopwords
16
  from nltk.stem import WordNetLemmatizer
17
+
18
+ nltk.download('wordnet')
19
+ nltk.download('stopwords')
20
+ stop_words = set(stopwords.words('english'))
21
 
22
  def preprocess_single_string(input_string: str, seq_len: int, vocab_to_int: dict):
23
  preprocessed_string = data_preprocessing(input_string)
 
49
  text = text.lower()
50
  text = re.sub('<.*?>', '', text)
51
  text = ''.join([c for c in text if c not in string.punctuation])
52
+ text = [wn_lemmatizer.lemmatize(word) for word in text.split() if word not in stop_words]
53
  text = ' '.join(text)
54
  return text
55