sidphbot commited on
Commit
f81b1b5
1 Parent(s): de65586
Files changed (1) hide show
  1. src/Surveyor.py +20 -19
src/Surveyor.py CHANGED
@@ -18,7 +18,7 @@ except:
18
 
19
  from src.defaults import DEFAULTS_CPU_COMPAT, DEFAULTS_HIGH_GPU
20
 
21
- DEFAULTS = DEFAULTS_CPU_COMPAT
22
 
23
  class Surveyor:
24
  '''
@@ -79,15 +79,16 @@ class Surveyor:
79
  spacy.require_gpu()
80
 
81
  self.high_gpu = high_gpu
 
82
  if self.high_gpu:
83
- DEFAULTS = DEFAULTS_HIGH_GPU
84
 
85
  if not kw_model_name:
86
- kw_model_name = DEFAULTS["kw_model_name"]
87
- self.num_papers = DEFAULTS['num_papers']
88
- self.max_search = DEFAULTS['max_search']
89
  if not models_dir:
90
- models_dir = DEFAULTS['models_dir']
91
 
92
  models_found = False
93
  if os.path.exists(models_dir) and not no_save_models:
@@ -95,17 +96,17 @@ class Surveyor:
95
  models_found = True
96
 
97
  if not title_model_name:
98
- title_model_name = DEFAULTS["title_model_name"]
99
  if not ex_summ_model_name:
100
- ex_summ_model_name = DEFAULTS["ex_summ_model_name"]
101
  if not ledmodel_name:
102
- ledmodel_name = DEFAULTS["ledmodel_name"]
103
  if not embedder_name:
104
- embedder_name = DEFAULTS["embedder_name"]
105
  if not nlp_name:
106
- nlp_name = DEFAULTS["nlp_name"]
107
  if not similarity_nlp_name:
108
- similarity_nlp_name = DEFAULTS["similarity_nlp_name"]
109
 
110
  if refresh_models or not models_found:
111
  print(f'\nInitializing models {"and saving (about 5GB)" if not no_save_models else ""}')
@@ -183,27 +184,27 @@ class Surveyor:
183
  if pdf_dir:
184
  self.pdf_dir = pdf_dir
185
  else:
186
- self.pdf_dir = DEFAULTS["pdf_dir"]
187
 
188
  if txt_dir:
189
  self.txt_dir = txt_dir
190
  else:
191
- self.txt_dir = DEFAULTS["txt_dir"]
192
 
193
  if img_dir:
194
  self.img_dir = img_dir
195
  else:
196
- self.img_dir = DEFAULTS["img_dir"]
197
 
198
  if tab_dir:
199
  self.tab_dir = tab_dir
200
  else:
201
- self.tab_dir = DEFAULTS["tab_dir"]
202
 
203
  if dump_dir:
204
  self.dump_dir = dump_dir
205
  else:
206
- self.dump_dir = DEFAULTS["dump_dir"]
207
 
208
  dirs = [self.pdf_dir, self.txt_dir, self.img_dir, self.tab_dir, self.dump_dir]
209
  if sum([True for dir in dirs if 'arxiv_data/' in dir]):
@@ -1337,9 +1338,9 @@ class Surveyor:
1337
  import joblib
1338
  import os, shutil
1339
  if not max_search:
1340
- max_search = DEFAULTS['max_search']
1341
  if not num_papers:
1342
- num_papers = DEFAULTS['num_papers']
1343
  # arxiv api relevance search and data preparation
1344
  print("\nsearching arXiv for top 100 papers.. ")
1345
  results, searched_papers = self.search(query, max_search=max_search)
 
18
 
19
  from src.defaults import DEFAULTS_CPU_COMPAT, DEFAULTS_HIGH_GPU
20
 
21
+
22
 
23
  class Surveyor:
24
  '''
 
79
  spacy.require_gpu()
80
 
81
  self.high_gpu = high_gpu
82
+ self.DEFAULTS = DEFAULTS_CPU_COMPAT
83
  if self.high_gpu:
84
+ self.DEFAULTS = DEFAULTS_HIGH_GPU
85
 
86
  if not kw_model_name:
87
+ kw_model_name = self.DEFAULTS["kw_model_name"]
88
+ self.num_papers = self.DEFAULTS['num_papers']
89
+ self.max_search = self.DEFAULTS['max_search']
90
  if not models_dir:
91
+ models_dir = self.DEFAULTS['models_dir']
92
 
93
  models_found = False
94
  if os.path.exists(models_dir) and not no_save_models:
 
96
  models_found = True
97
 
98
  if not title_model_name:
99
+ title_model_name = self.DEFAULTS["title_model_name"]
100
  if not ex_summ_model_name:
101
+ ex_summ_model_name = self.DEFAULTS["ex_summ_model_name"]
102
  if not ledmodel_name:
103
+ ledmodel_name = self.DEFAULTS["ledmodel_name"]
104
  if not embedder_name:
105
+ embedder_name = self.DEFAULTS["embedder_name"]
106
  if not nlp_name:
107
+ nlp_name = self.DEFAULTS["nlp_name"]
108
  if not similarity_nlp_name:
109
+ similarity_nlp_name = self.DEFAULTS["similarity_nlp_name"]
110
 
111
  if refresh_models or not models_found:
112
  print(f'\nInitializing models {"and saving (about 5GB)" if not no_save_models else ""}')
 
184
  if pdf_dir:
185
  self.pdf_dir = pdf_dir
186
  else:
187
+ self.pdf_dir = self.DEFAULTS["pdf_dir"]
188
 
189
  if txt_dir:
190
  self.txt_dir = txt_dir
191
  else:
192
+ self.txt_dir = self.DEFAULTS["txt_dir"]
193
 
194
  if img_dir:
195
  self.img_dir = img_dir
196
  else:
197
+ self.img_dir = self.DEFAULTS["img_dir"]
198
 
199
  if tab_dir:
200
  self.tab_dir = tab_dir
201
  else:
202
+ self.tab_dir = self.DEFAULTS["tab_dir"]
203
 
204
  if dump_dir:
205
  self.dump_dir = dump_dir
206
  else:
207
+ self.dump_dir = self.DEFAULTS["dump_dir"]
208
 
209
  dirs = [self.pdf_dir, self.txt_dir, self.img_dir, self.tab_dir, self.dump_dir]
210
  if sum([True for dir in dirs if 'arxiv_data/' in dir]):
 
1338
  import joblib
1339
  import os, shutil
1340
  if not max_search:
1341
+ max_search = self.DEFAULTS['max_search']
1342
  if not num_papers:
1343
+ num_papers = self.DEFAULTS['num_papers']
1344
  # arxiv api relevance search and data preparation
1345
  print("\nsearching arXiv for top 100 papers.. ")
1346
  results, searched_papers = self.search(query, max_search=max_search)