terra_faction_bot / faction-picker-bot /create_individual_datasets.py
guyar's picture
initial commit
654e9ae
raw
history blame contribute delete
No virus
3.33 kB
import pandas as pd
import argparse
import pickle
import yaml
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder, OrdinalEncoder
import numpy as np
def featurise_features(featdf, params):
# adjust features dataset for chosen encoding
game = featdf.iloc[:, :1]
rounddata = featdf.iloc[:, 1:7]
bontiles = featdf.iloc[:, 7:17]
playerdata = featdf.iloc[:, 17:18]
colours = featdf.iloc[:, 18:25]
mapdata = featdf.iloc[:, -1:]
onehot_encoder = OneHotEncoder(sparse=False)
ordinal_encoder = OrdinalEncoder()
if params['prepare-step2']['round-features'] == 'ordinal':
rounddatanp = ordinal_encoder.fit_transform(rounddata)
rounddata = pd.DataFrame(data=rounddatanp, columns=rounddata.columns)
else: # one-hot
rounddatanp = onehot_encoder.fit_transform(rounddata)
rounddata = pd.DataFrame(data=rounddatanp, columns=onehot_encoder.get_feature_names())
if params['prepare-step2']['playercount-features'] == 'ordinal':
playerdatanp = ordinal_encoder.fit_transform(playerdata)
playerdata = pd.DataFrame(data=playerdatanp, columns=playerdata.columns)
else: # one-hot
playerdatanp = onehot_encoder.fit_transform(playerdata)
playerdata = pd.DataFrame(data=playerdatanp, columns=onehot_encoder.get_feature_names())
if params['prepare-step2']['map-features'] == 'ordinal':
mapdatanp = ordinal_encoder.fit_transform(mapdata)
mapdata = pd.DataFrame(data=mapdatanp, columns=mapdata.columns)
else: # one-hot
mapdatanp = onehot_encoder.fit_transform(mapdata)
mapdata = pd.DataFrame(data=mapdatanp, columns=onehot_encoder.get_feature_names())
featdf = pd.concat([game, rounddata, bontiles, playerdata, colours, mapdata], axis=1)
return featdf
def main(params):
vpdfdir = params['prepare']['vp-data-dir']
featdfdir = params['prepare']['feature-data-dir']
pickledir = params['prepare-step2']['pickle-dir']
vpdf = pd.read_csv(vpdfdir)
featdf = pd.read_csv(featdfdir)
vpdf = vpdf.sort_values('game')
featdf = featdf.sort_values('game')
featdf = featdf.drop(columns=['Unnamed: 0'])
featdf = featurise_features(featdf, params)
each_faction_dataset = dict()
colnames = list(vpdf.columns)
factions = [x for x in colnames if x != 'game' and x != 'Unnamed: 0']
for faction in factions:
faction_dataset = {}
vpdf = vpdf.sort_index()
indexes = pd.isnull(vpdf[faction])
vpdata = pd.Series(index=vpdf['game'][~indexes], data=vpdf[faction][~indexes].values)
featdf = featdf.sort_index()
featdata = featdf[~indexes]
featdata.index = featdata['game']
featdata = featdata.drop(columns=['game'])
faction_dataset['vp'] = vpdata
faction_dataset['features'] = featdata
each_faction_dataset[faction] = faction_dataset
with open(pickledir, 'wb') as pklfile:
pickle.dump(each_faction_dataset, pklfile)
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Input DVC params.')
parser.add_argument('--params', type=str)
args = parser.parse_args()
paramsdir = args.params
with open(paramsdir, 'r') as fd:
params = yaml.safe_load(fd)
main(params)