sidphbot commited on
Commit
b0ec94c
1 Parent(s): 5e4be81

download analysis

Browse files
Files changed (1) hide show
  1. src/Surveyor.py +13 -9
src/Surveyor.py CHANGED
@@ -1,3 +1,4 @@
 
1
  from arxiv_public_data.fulltext import convert_directory_parallel
2
  from arxiv_public_data import internal_citations
3
  import torch
@@ -1329,7 +1330,7 @@ class Surveyor:
1329
  self.print_fn("outputs: " + outputs)
1330
  return outputs
1331
 
1332
- def zip_outputs(self, dump_dir, query):
1333
  import zipfile
1334
  def zipdir(path, ziph):
1335
  # ziph is zipfile handle
@@ -1339,10 +1340,9 @@ class Surveyor:
1339
  os.path.relpath(os.path.join(root, file),
1340
  os.path.join(path, '../..')))
1341
 
1342
- zip_name = 'arxiv_dumps_'+query.replace(' ', '_')+'.zip'
1343
  zipf = zipfile.ZipFile(zip_name, 'w', zipfile.ZIP_DEFLATED)
1344
  zipdir(dump_dir, zipf)
1345
- return zip_name
1346
 
1347
  def survey(self, query=None, id_list=None, max_search=None, num_papers=None, debug=False, weigh_authors=False):
1348
  import joblib
@@ -1468,17 +1468,21 @@ class Surveyor:
1468
  query = self.generate_title(' '.join([v for v in clustered_sections.values()]))
1469
 
1470
  survey_file = 'A_Survey_on_' + query.replace(' ', '_') + '.txt'
1471
- self.build_doc(clustered_sections, papers_standardized, query=query, filename=self.dump_dir + survey_file)
 
1472
 
1473
  self.survey_print_fn("\n-citation-network: ")
1474
  self.survey_print_fn(cites)
1475
 
1476
  shutil.copytree('arxiv_data/', self.dump_dir + '/arxiv_data/')
1477
- shutil.copy(self.dump_dir + survey_file, survey_file)
1478
  assert (os.path.exists(survey_file))
 
 
 
 
 
 
 
1479
  self.print_fn(str(list(Path(self.dump_dir).resolve().glob('*'))))
1480
- output_zip = self.zip_outputs(self.dump_dir, query)
1481
- self.print_fn("\n- Survey complete.. \nSurvey file path :" + os.path.abspath(
1482
- survey_file) + "\nAll outputs zip path :" + os.path.abspath(self.dump_dir + output_zip))
1483
 
1484
- return os.path.abspath(self.dump_dir + output_zip), os.path.abspath(survey_file)
 
1
+ from pathlib import Path
2
  from arxiv_public_data.fulltext import convert_directory_parallel
3
  from arxiv_public_data import internal_citations
4
  import torch
 
1330
  self.print_fn("outputs: " + outputs)
1331
  return outputs
1332
 
1333
+ def zip_outputs(self, dump_dir, zip_name):
1334
  import zipfile
1335
  def zipdir(path, ziph):
1336
  # ziph is zipfile handle
 
1340
  os.path.relpath(os.path.join(root, file),
1341
  os.path.join(path, '../..')))
1342
 
1343
+
1344
  zipf = zipfile.ZipFile(zip_name, 'w', zipfile.ZIP_DEFLATED)
1345
  zipdir(dump_dir, zipf)
 
1346
 
1347
  def survey(self, query=None, id_list=None, max_search=None, num_papers=None, debug=False, weigh_authors=False):
1348
  import joblib
 
1468
  query = self.generate_title(' '.join([v for v in clustered_sections.values()]))
1469
 
1470
  survey_file = 'A_Survey_on_' + query.replace(' ', '_') + '.txt'
1471
+ survey_file = Path(self.dump_dir).resolve() / survey_file
1472
+ self.build_doc(clustered_sections, papers_standardized, query=query, filename=str(survey_file))
1473
 
1474
  self.survey_print_fn("\n-citation-network: ")
1475
  self.survey_print_fn(cites)
1476
 
1477
  shutil.copytree('arxiv_data/', self.dump_dir + '/arxiv_data/')
 
1478
  assert (os.path.exists(survey_file))
1479
+
1480
+ zip_name = 'arxiv_dumps_'+query.replace(' ', '_')+'.zip'
1481
+ zip_name = Path(self.dump_dir).parent.resolve() / zip_name
1482
+ self.zip_outputs(self.dump_dir, str(zip_name))
1483
+ self.print_fn("\n- Survey complete.. \nSurvey file path :" + str(survey_file) +
1484
+ "\nAll outputs zip path :" + str(zipname))
1485
+
1486
  self.print_fn(str(list(Path(self.dump_dir).resolve().glob('*'))))
 
 
 
1487
 
1488
+ return str(zip_name.resolve()), str(zipname.resolve())