Spaces:
Runtime error
Runtime error
File size: 623 Bytes
1c6ec0a 2c042c1 52aeedb 33908fd 987e02d 043f19f 1b154c5 9951e30 27edbdc e1f665f 1528dbf 3e637e3 684ff8e fc0bcb4 255f974 66ca39d e83f5ad d9476c0 71e4eea 7b25728 7285843 8ab6d60 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 |
import pandas as pd
import yaml
import os
def process_data(split='train'):
with open("params.yml") as f:
params = yaml.safe_load(f)
df = pd.read_csv('data/raw/{}.csv'.format(split))
df.columns = ['Unnamed: 0', 'input_text', 'output_text']
df = df.sample(frac=params['split'], replace=True, random_state=1)
if os.path.exists("data/raw/{}.csv".format(split)):
os.remove("data/raw/{}.csv".format(split))
df.to_csv('data/processed/{}.csv'.format(split))
if __name__ == '__main__':
process_data(split='train')
process_data(split='test')
process_data(split='validation')
|