Alec commited on
Commit
8101eff
1 Parent(s): 284240f

update to gridsearch

Browse files
Files changed (1) hide show
  1. baseline.py +8 -5
baseline.py CHANGED
@@ -5,8 +5,8 @@ import fire
5
  import numpy as np
6
  from scipy import sparse
7
 
8
- from sklearn.model_selection import PredefinedSplit
9
- from sklearn.linear_model import LogisticRegressionCV
10
  from sklearn.feature_extraction.text import TfidfVectorizer
11
 
12
  def _load_split(data_dir, source, split, n=np.inf):
@@ -35,10 +35,13 @@ def main(data_dir, log_dir, source='xl-1542M-k40', n_train=500000, n_valid=10000
35
  valid_features = vect.transform(valid_texts)
36
  test_features = vect.transform(test_texts)
37
 
38
- Cs = [1/64, 1/32, 1/16, 1/8, 1/4, 1/2, 1, 2, 4, 8, 16, 32, 64]
 
39
  split = PredefinedSplit([-1]*n_train+[0]*n_valid)
40
- model = LogisticRegressionCV(Cs=Cs, cv=split, solver='liblinear', n_jobs=n_jobs, verbose=verbose, refit=False)
41
- model.fit(sparse.vstack([train_features, valid_features]), train_labels+valid_labels)
 
 
42
  valid_accuracy = model.score(valid_features, valid_labels)*100.
43
  test_accuracy = model.score(test_features, test_labels)*100.
44
  data = {
 
5
  import numpy as np
6
  from scipy import sparse
7
 
8
+ from sklearn.model_selection import PredefinedSplit, GridSearchCV
9
+ from sklearn.linear_model import LogisticRegression
10
  from sklearn.feature_extraction.text import TfidfVectorizer
11
 
12
  def _load_split(data_dir, source, split, n=np.inf):
 
35
  valid_features = vect.transform(valid_texts)
36
  test_features = vect.transform(test_texts)
37
 
38
+ model = LogisticRegression(solver='liblinear')
39
+ params = {'C': [1/64, 1/32, 1/16, 1/8, 1/4, 1/2, 1, 2, 4, 8, 16, 32, 64]}
40
  split = PredefinedSplit([-1]*n_train+[0]*n_valid)
41
+ search = GridSearchCV(model, params, cv=split, n_jobs=n_jobs, verbose=verbose, refit=False)
42
+ search.fit(sparse.vstack([train_features, valid_features]), train_labels+valid_labels)
43
+ model = model.set_params(**search.best_params_)
44
+ model.fit(train_features, train_labels)
45
  valid_accuracy = model.score(valid_features, valid_labels)*100.
46
  test_accuracy = model.score(test_features, test_labels)*100.
47
  data = {