Spaces:
Runtime error
Runtime error
import pandas as pd | |
import yaml | |
import os | |
def process_data(split="train"): | |
with open("params.yml") as f: | |
params = yaml.safe_load(f) | |
df = pd.read_csv("data/raw/{}.csv".format(split)) | |
df.columns = ["Unnamed: 0", "input_text", "output_text"] | |
df = df.sample(frac=params["split"], replace=True, random_state=1) | |
if os.path.exists("data/raw/{}.csv".format(split)): | |
os.remove("data/raw/{}.csv".format(split)) | |
df.to_csv("data/processed/{}.csv".format(split)) | |
if __name__ == "__main__": | |
process_data(split="train") | |
process_data(split="test") | |
process_data(split="validation") | |