Spaces:
Runtime error
Runtime error
import yaml | |
from datasets import load_dataset | |
import pandas as pd | |
import os | |
import pprint | |
def make_dataset(dataset="cnn_dailymail", split="train"): | |
"""make dataset for summarisation""" | |
if not os.path.exists("data/raw"): | |
os.makedirs("data/raw") | |
dataset = load_dataset(dataset, "3.0.0", split=split) | |
df = pd.DataFrame() | |
df["article"] = dataset["article"] | |
df["highlights"] = dataset["highlights"] | |
df.to_csv("data/raw/{}.csv".format(split)) | |
if __name__ == "__main__": | |
with open("params.yml") as f: | |
params = yaml.safe_load(f) | |
pprint.pprint(params) | |
make_dataset(dataset=params["data"], split="train") | |
make_dataset(dataset=params["data"], split="test") | |
make_dataset(dataset=params["data"], split="validation") | |