T5-Summarization / src /data /process_data.py
Gagan Bhatia
Update process_data.py
8ab6d60
raw
history blame
466 Bytes
import pandas as pd
def process_data(split='train'):
df = pd.read_csv('C:/Users/gbhat/Documents/GitHub/summarization/data/raw/{}.csv'.format(split))
df.columns = ['Unnamed: 0', 'input_text', 'output_text']
print(df.columns)
df.to_csv('C:/Users/gbhat/Documents/GitHub/summarization/data/processed/{}.csv'.format(split))
if __name__ == '__main__':
process_data(split='train')
process_data(split='test')
process_data(split='validation')