HackHPC / preprocess.py
Seanyoon's picture
Create preprocess.py
87c4f73
raw
history blame contribute delete
445 Bytes
import pandas as pd
def preprocess_data(data):
nc = len(data.columns)
nr = len(data.index)
new = [0] * nc
for i in range(nc):
new[i] = len(data.iloc[:, i].unique()) / nr
sorted_index = sorted(range(len(new)), key=lambda k: new[k], reverse=True)
sensitive_cols = list(data.columns[sorted_index[i]] for i in range(nc) if new[sorted_index[i]] > 0.5)
data = data.drop(columns=sensitive_cols)
return data