Spaces:
Sleeping
Sleeping
labels = ["CV", "AI", "ML", "NE", "CL"] | |
id2label = {i: label for i, label in enumerate(labels)} | |
label2id = {label: i for i, label in enumerate(labels)} | |
category2human = { | |
"CV": "Computer Vision", | |
"AI": "Artificial Intelligence", | |
"ML": "Machine Learning", | |
"NE": "Neural and Evolutionary Computing", | |
"CL": "Computation and Language", | |
} | |
def load_arxiv_dataset(): | |
import kagglehub | |
import os | |
from datasets import load_dataset | |
# Download latest version | |
path = kagglehub.dataset_download("spsayakpaul/arxiv-paper-abstracts") | |
dataset = load_dataset( | |
"csv", | |
data_files=os.path.join(path, "arxiv_data.csv"), | |
encoding="utf-8", | |
split="train", | |
) | |
# convert string to lists | |
import ast | |
def parse_terms(example): | |
example["terms"] = ast.literal_eval(example["terms"]) | |
return example | |
dataset = dataset.map(parse_terms) | |
return dataset | |
def create_prompt(title, summary): | |
""" | |
Create a prompt for the model from the title and summary. | |
""" | |
return f"# title:\n{title}\n# abstract:\n{summary}" | |