Spaces:
Runtime error
Runtime error
gamingflexer
commited on
Commit
•
69151cb
1
Parent(s):
9269cc3
Refactor get_paper_details_batch method to store metadata
Browse files- src/scrapper/main.py +6 -3
src/scrapper/main.py
CHANGED
@@ -34,16 +34,19 @@ class ArxivPaper:
|
|
34 |
|
35 |
def get_paper_details_batch(self, paper_ids: list, path: str = "./data/papers"):
|
36 |
path_author = os.path.join(path, self.author_name.replace(" ", "_"))
|
37 |
-
data =
|
38 |
for i in tqdm(paper_ids):
|
39 |
try:
|
40 |
paper = Arxiv(i)
|
41 |
paper.load(path_author)
|
42 |
-
paper.get_meta()
|
43 |
refs = paper.get_refs(
|
44 |
extractor=self.extractor,
|
45 |
text_splitter=self.text_splitter,)
|
46 |
paper.chunker()
|
47 |
paper.save_chunks(include_metadata=True, path=path_author)
|
|
|
48 |
except Exception as e:
|
49 |
-
print(f"Error processing paper {i}: {e}")
|
|
|
|
|
|
34 |
|
35 |
def get_paper_details_batch(self, paper_ids: list, path: str = "./data/papers"):
|
36 |
path_author = os.path.join(path, self.author_name.replace(" ", "_"))
|
37 |
+
data = []
|
38 |
for i in tqdm(paper_ids):
|
39 |
try:
|
40 |
paper = Arxiv(i)
|
41 |
paper.load(path_author)
|
42 |
+
metadata = paper.get_meta()
|
43 |
refs = paper.get_refs(
|
44 |
extractor=self.extractor,
|
45 |
text_splitter=self.text_splitter,)
|
46 |
paper.chunker()
|
47 |
paper.save_chunks(include_metadata=True, path=path_author)
|
48 |
+
data.append(metadata)
|
49 |
except Exception as e:
|
50 |
+
print(f"Error processing paper {i}: {e}")
|
51 |
+
return data
|
52 |
+
|