Spaces:
Sleeping
Sleeping
refresh
Browse files
main.py
CHANGED
@@ -66,7 +66,7 @@ def get_zotero_items(debug=False):
|
|
66 |
print(f"# items fetched {len(items)}")
|
67 |
|
68 |
if debug:
|
69 |
-
if len(items) >
|
70 |
break
|
71 |
|
72 |
return items
|
@@ -419,21 +419,24 @@ def upload_to_hf(abstract_df, contents_df, processed_arxiv_ids):
|
|
419 |
)
|
420 |
|
421 |
# upload image dataset
|
422 |
-
|
423 |
-
|
|
|
424 |
|
425 |
-
|
426 |
-
|
427 |
-
|
428 |
|
429 |
-
|
430 |
-
|
431 |
-
|
432 |
|
433 |
-
|
434 |
-
|
435 |
-
|
436 |
-
|
|
|
|
|
437 |
|
438 |
|
439 |
########################################################
|
|
|
66 |
print(f"# items fetched {len(items)}")
|
67 |
|
68 |
if debug:
|
69 |
+
if len(items) > 500:
|
70 |
break
|
71 |
|
72 |
return items
|
|
|
419 |
)
|
420 |
|
421 |
# upload image dataset
|
422 |
+
try:
|
423 |
+
img_ds = create_hf_image_dataset("data/arxiv_images")
|
424 |
+
img_ds.push_to_hub(repo_id, "images", token=os.environ.get("HF_TOKEN"))
|
425 |
|
426 |
+
# push id_to_abstract
|
427 |
+
abstract_ds = Dataset.from_pandas(abstract_df)
|
428 |
+
abstract_ds.push_to_hub(repo_id, "abstracts", token=os.environ.get("HF_TOKEN"))
|
429 |
|
430 |
+
# push arxiv_items
|
431 |
+
arxiv_ds = Dataset.from_pandas(contents_df)
|
432 |
+
arxiv_ds.push_to_hub(repo_id, "articles", token=os.environ.get("HF_TOKEN"))
|
433 |
|
434 |
+
# push processed_arxiv_ids
|
435 |
+
processed_arxiv_ids = [{"arxiv_id": arxiv_id} for arxiv_id in processed_arxiv_ids]
|
436 |
+
processed_arxiv_ids_ds = Dataset.from_list(processed_arxiv_ids)
|
437 |
+
processed_arxiv_ids_ds.push_to_hub(repo_id, "processed_arxiv_ids", token=os.environ.get("HF_TOKEN"))
|
438 |
+
except Exception as e:
|
439 |
+
print(e)
|
440 |
|
441 |
|
442 |
########################################################
|