Spaces:
Runtime error
Runtime error
File size: 618 Bytes
1482fe1 fdcca24 5fade0e d379f83 4087b07 3424db2 24a726e 5e833ac 17499bc 06d3c94 370f001 9116abe c990a96 e7b5de5 6803e26 44d7069 7b19a88 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 |
from datasets import load_dataset
import pandas as pd
def make_dataset(dataset='cnn_dailymail', split='train'):
"""make dataset for summarisation"""
dataset = load_dataset(dataset, '3.0.0', split=split)
df = pd.DataFrame()
df['article'] = dataset['article']
df['highlights'] = dataset['highlights']
df.to_csv('C:/Users/gbhat/Documents/GitHub/summarization/data/raw/{}.csv'.format(split))
if __name__ == '__main__':
make_dataset(dataset='cnn_dailymail', split='train')
make_dataset(dataset='cnn_dailymail', split='test')
make_dataset(dataset='cnn_dailymail', split='validation')
|