Spaces:
Paused
Paused
from datasets import Dataset | |
from huggingface_hub import HfApi | |
from config import DATASET_NAME | |
import logging | |
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') | |
def initialize_dataset(): | |
# Initialize an empty dataset with the expected structure | |
initial_data = { | |
"entry_id": [], | |
"title": [], | |
"authors": [], | |
"published": [], | |
"updated": [], | |
"pdf_url": [], | |
"summary": [], | |
"categories": [], | |
"primary_category": [], | |
"html_url": [] | |
} | |
# Create the dataset | |
dataset = Dataset.from_dict(initial_data) | |
try: | |
# Push the initial dataset to the Hub | |
dataset.push_to_hub(DATASET_NAME, split="train") | |
logging.info(f"Dataset {DATASET_NAME} initialized successfully with 'train' split.") | |
except Exception as e: | |
logging.error(f"Failed to initialize dataset: {str(e)}") | |
raise | |
if __name__ == "__main__": | |
initialize_dataset() |