Spaces:
Build error
Build error
download analysis
Browse files- src/Surveyor.py +13 -9
src/Surveyor.py
CHANGED
@@ -1,3 +1,4 @@
|
|
|
|
1 |
from arxiv_public_data.fulltext import convert_directory_parallel
|
2 |
from arxiv_public_data import internal_citations
|
3 |
import torch
|
@@ -1329,7 +1330,7 @@ class Surveyor:
|
|
1329 |
self.print_fn("outputs: " + outputs)
|
1330 |
return outputs
|
1331 |
|
1332 |
-
def zip_outputs(self, dump_dir,
|
1333 |
import zipfile
|
1334 |
def zipdir(path, ziph):
|
1335 |
# ziph is zipfile handle
|
@@ -1339,10 +1340,9 @@ class Surveyor:
|
|
1339 |
os.path.relpath(os.path.join(root, file),
|
1340 |
os.path.join(path, '../..')))
|
1341 |
|
1342 |
-
|
1343 |
zipf = zipfile.ZipFile(zip_name, 'w', zipfile.ZIP_DEFLATED)
|
1344 |
zipdir(dump_dir, zipf)
|
1345 |
-
return zip_name
|
1346 |
|
1347 |
def survey(self, query=None, id_list=None, max_search=None, num_papers=None, debug=False, weigh_authors=False):
|
1348 |
import joblib
|
@@ -1468,17 +1468,21 @@ class Surveyor:
|
|
1468 |
query = self.generate_title(' '.join([v for v in clustered_sections.values()]))
|
1469 |
|
1470 |
survey_file = 'A_Survey_on_' + query.replace(' ', '_') + '.txt'
|
1471 |
-
|
|
|
1472 |
|
1473 |
self.survey_print_fn("\n-citation-network: ")
|
1474 |
self.survey_print_fn(cites)
|
1475 |
|
1476 |
shutil.copytree('arxiv_data/', self.dump_dir + '/arxiv_data/')
|
1477 |
-
shutil.copy(self.dump_dir + survey_file, survey_file)
|
1478 |
assert (os.path.exists(survey_file))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1479 |
self.print_fn(str(list(Path(self.dump_dir).resolve().glob('*'))))
|
1480 |
-
output_zip = self.zip_outputs(self.dump_dir, query)
|
1481 |
-
self.print_fn("\n- Survey complete.. \nSurvey file path :" + os.path.abspath(
|
1482 |
-
survey_file) + "\nAll outputs zip path :" + os.path.abspath(self.dump_dir + output_zip))
|
1483 |
|
1484 |
-
return
|
|
|
1 |
+
from pathlib import Path
|
2 |
from arxiv_public_data.fulltext import convert_directory_parallel
|
3 |
from arxiv_public_data import internal_citations
|
4 |
import torch
|
|
|
1330 |
self.print_fn("outputs: " + outputs)
|
1331 |
return outputs
|
1332 |
|
1333 |
+
def zip_outputs(self, dump_dir, zip_name):
|
1334 |
import zipfile
|
1335 |
def zipdir(path, ziph):
|
1336 |
# ziph is zipfile handle
|
|
|
1340 |
os.path.relpath(os.path.join(root, file),
|
1341 |
os.path.join(path, '../..')))
|
1342 |
|
1343 |
+
|
1344 |
zipf = zipfile.ZipFile(zip_name, 'w', zipfile.ZIP_DEFLATED)
|
1345 |
zipdir(dump_dir, zipf)
|
|
|
1346 |
|
1347 |
def survey(self, query=None, id_list=None, max_search=None, num_papers=None, debug=False, weigh_authors=False):
|
1348 |
import joblib
|
|
|
1468 |
query = self.generate_title(' '.join([v for v in clustered_sections.values()]))
|
1469 |
|
1470 |
survey_file = 'A_Survey_on_' + query.replace(' ', '_') + '.txt'
|
1471 |
+
survey_file = Path(self.dump_dir).resolve() / survey_file
|
1472 |
+
self.build_doc(clustered_sections, papers_standardized, query=query, filename=str(survey_file))
|
1473 |
|
1474 |
self.survey_print_fn("\n-citation-network: ")
|
1475 |
self.survey_print_fn(cites)
|
1476 |
|
1477 |
shutil.copytree('arxiv_data/', self.dump_dir + '/arxiv_data/')
|
|
|
1478 |
assert (os.path.exists(survey_file))
|
1479 |
+
|
1480 |
+
zip_name = 'arxiv_dumps_'+query.replace(' ', '_')+'.zip'
|
1481 |
+
zip_name = Path(self.dump_dir).parent.resolve() / zip_name
|
1482 |
+
self.zip_outputs(self.dump_dir, str(zip_name))
|
1483 |
+
self.print_fn("\n- Survey complete.. \nSurvey file path :" + str(survey_file) +
|
1484 |
+
"\nAll outputs zip path :" + str(zipname))
|
1485 |
+
|
1486 |
self.print_fn(str(list(Path(self.dump_dir).resolve().glob('*'))))
|
|
|
|
|
|
|
1487 |
|
1488 |
+
return str(zip_name.resolve()), str(zipname.resolve())
|