Spaces:
Runtime error
Runtime error
File size: 771 Bytes
1bf52db 1482fe1 fdcca24 c5a3a4d 504cec1 5fade0e d379f83 93aba5e 3424db2 9d3af5f 483a16d 809df28 5e833ac bc51ebd 8d55377 43c82bc 9116abe c990a96 7654c8a 629049e 7be31bf bf1265d ca265c0 702ccc8 5e8234e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 |
import yaml
from datasets import load_dataset
import pandas as pd
import os
import pprint
def make_dataset(dataset="cnn_dailymail", split="train"):
"""make dataset for summarisation"""
if not os.path.exists("data/raw"):
os.makedirs("data/raw")
dataset = load_dataset(dataset, "3.0.0", split=split)
df = pd.DataFrame()
df["article"] = dataset["article"]
df["highlights"] = dataset["highlights"]
df.to_csv("data/raw/{}.csv".format(split))
if __name__ == "__main__":
with open("params.yml") as f:
params = yaml.safe_load(f)
pprint.pprint(params)
make_dataset(dataset=params["data"], split="train")
make_dataset(dataset=params["data"], split="test")
make_dataset(dataset=params["data"], split="validation")
|