File size: 469 Bytes
1482fe1
fdcca24
5fade0e
d379f83
39fb052
3424db2
22275d2
5e833ac
8cab515
eb71d14
410c6a8
9116abe
e7b5de5
5bb46d4
1
2
3
4
5
6
7
8
9
10
11
12
13
14
from datasets import load_dataset
import pandas as pd


def make_dataset(dataset='cnn_dailymail', split='train', version="3.0.0"):
    """make dataset for summarisation"""
    dataset = load_dataset(dataset, split=split, script_version=version)
    df = pd.DataFrame()
    df['input_text'] = dataset['concepts']
    df['output_text'] = dataset['target']
    return df

if __name__ == '__main__':
    make_dataset(dataset='cnn_dailymail', split='train', version="3.0.0")