donb-hf commited on
Commit
778b735
1 Parent(s): edd8809

initialize datasett script

Browse files
Files changed (1) hide show
  1. initialize_dataset.py +24 -0
initialize_dataset.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from datasets import Dataset
2
+ from config import DATASET_NAME
3
+ import huggingface_hub
4
+
5
+ # Initialize an empty dataset with the expected structure
6
+ initial_data = {
7
+ "id": [],
8
+ "title": [],
9
+ "authors": [],
10
+ "published": [],
11
+ "updated": [],
12
+ "pdf_url": [],
13
+ "entry_id": [],
14
+ "summary": [],
15
+ "categories": [],
16
+ "primary_category": [],
17
+ "html_url": []
18
+ }
19
+
20
+ # Create the dataset
21
+ dataset = Dataset.from_dict(initial_data)
22
+
23
+ # Push the initial dataset to the Hub
24
+ dataset.push_to_hub(DATASET_NAME, split="train")