xymeow7 commited on
Commit
2b0e8d0
1 Parent(s): 7f1e465

Update svm_model.py

Browse files
Files changed (1) hide show
  1. svm_model.py +20 -5
svm_model.py CHANGED
@@ -15,7 +15,7 @@ except AttributeError:
15
  else:
16
  ssl._create_default_https_context = _create_unverified_https_context
17
  # print(f"nltk version: {nltk.__version__}")
18
- nltk.download('stopwords')
19
 
20
  #
21
  class SVMModel:
@@ -89,13 +89,21 @@ class SVM:
89
  self.data_folder = '.'
90
 
91
  print(f"Start loading data")
92
- self._load_data()
93
 
94
  print(f"Setting vectorizer")
95
- self.vectorizer = TfidfVectorizer(max_features=4000, min_df=7, max_df=0.8, stop_words=stopwords.words('english'))
 
 
96
 
97
- print(f"Start preprocessing data")
98
- self._preprocess_data()
 
 
 
 
 
 
99
 
100
  # self.setup_model()
101
  self.setup_model_ours()
@@ -132,8 +140,15 @@ class SVM:
132
 
133
  def _preprocess_data(self, ):
134
  self.X_train = self.vectorizer.fit_transform(self.x_train).toarray()
 
 
 
 
 
 
135
  self.X_test = self.vectorizer.transform(self.x_test).toarray()
136
 
 
137
 
138
 
139
 
 
15
  else:
16
  ssl._create_default_https_context = _create_unverified_https_context
17
  # print(f"nltk version: {nltk.__version__}")
18
+ # nltk.download('stopwords')
19
 
20
  #
21
  class SVMModel:
 
89
  self.data_folder = '.'
90
 
91
  print(f"Start loading data")
92
+ # self._load_data()
93
 
94
  print(f"Setting vectorizer")
95
+ # self.vectorizer = TfidfVectorizer(max_features=4000, min_df=7, max_df=0.8, stop_words=stopwords.words('english'))
96
+ # parmas_dict = np.load("svm_vectorizer.npy", allow_pickle=True).item()
97
+ # print(f"parmas_dict: {parmas_dict.keys()}")
98
 
99
+
100
+ # self.vectorizer.set_params(**parmas_dict)
101
+
102
+ import pickle
103
+ self.vectorizer = pickle.load(open("tfidf.pickle", "rb"))
104
+
105
+ # print(f"Start preprocessing data")
106
+ # self._preprocess_data()
107
 
108
  # self.setup_model()
109
  self.setup_model_ours()
 
140
 
141
  def _preprocess_data(self, ):
142
  self.X_train = self.vectorizer.fit_transform(self.x_train).toarray()
143
+
144
+ import pickle
145
+ # self.vectorizer_params = self.vectorizer.get_params()
146
+ # np.save("svm_vectorizer.npy", self.vectorizer_params)
147
+ pickle.dump(self.vectorizer, open("tfidf.pickle", "wb"))
148
+
149
  self.X_test = self.vectorizer.transform(self.x_test).toarray()
150
 
151
+ # self.X_train = self.vectorizer.transform
152
 
153
 
154