Malware-Prediction / Malware-Prediction.py
khulnasoft's picture
Create Malware-Prediction.py
589c7b6 verified
raw
history blame contribute delete
679 Bytes
frames = [train, test]
df = pd.concat(frames)
list_frequency_encoding = ['AppVersion',
'AvSigVersion',
'Census_OSVersion',
'EngineVersion',
'OsBuildLab']
def frequency_encoding(feature):
t = df[feature].value_counts().reset_index()
t = t.reset_index()
t.loc[t[feature] == 1, 'level_0'] = np.nan
t.set_index('index', inplace=True)
max_label = t['level_0'].max() + 1
t.fillna(max_label, inplace=True)
return t.to_dict()['level_0']
for feature in tqdm(list_frequency_encoding):
freq_enc_dict = frequency_encoding(feature)
df[feature] = df[feature].map(lambda x: freq_enc_dict.get(x, np.nan))
df[feature] = df[feature].astype('int64')