File size: 466 Bytes
1c6ec0a
33908fd
987e02d
043f19f
91ed5d6
3e637e3
3c2d31d
5483f1b
e83f5ad
d9476c0
71e4eea
7b25728
7285843
8ab6d60
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
import pandas as pd


def process_data(split='train'):
    df = pd.read_csv('C:/Users/gbhat/Documents/GitHub/summarization/data/raw/{}.csv'.format(split))
    df.columns = ['Unnamed: 0', 'input_text', 'output_text']
    print(df.columns)
    df.to_csv('C:/Users/gbhat/Documents/GitHub/summarization/data/processed/{}.csv'.format(split))


if __name__ == '__main__':
    process_data(split='train')
    process_data(split='test')
    process_data(split='validation')