article_classifier / dataset.py
Hacker1337's picture
added loading of my model
2904d0e
raw
history blame
940 Bytes
labels = ["CV", "AI", "ML", "NE", "CL"]
id2label = {i: label for i, label in enumerate(labels)}
label2id = {label: i for i, label in enumerate(labels)}
categorie2human = {
"CV": "Computer Vision",
"AI": "Artificial Intelligence",
"ML": "Machine Learning",
"NE": "Neural and Evolutionary Computing",
"CL": "Computation and Language"
}
def load_arxiv_dataset():
import kagglehub
import os
from datasets import load_dataset
# Download latest version
path = kagglehub.dataset_download("spsayakpaul/arxiv-paper-abstracts")
dataset = load_dataset(
"csv",
data_files=os.path.join(path, "arxiv_data.csv"),
encoding="utf-8",
split="train"
)
# convert string to lists
import ast
def parse_terms(example):
example["terms"] = ast.literal_eval(example["terms"])
return example
dataset = dataset.map(parse_terms)
return dataset