ASCARIS / code /standard.py
fatmacankara's picture
Update code/standard.py
6e2e202
raw
history blame
No virus
1.52 kB
import numpy as np
def standardize(df, get_columns):
cols_to_change = ['sasa', 'domaindistance3D', 'disulfide', 'intMet', 'intramembrane',
'naturalVariant', 'dnaBinding', 'activeSite', 'nucleotideBinding',
'lipidation', 'site', 'transmembrane', 'crosslink', 'mutagenesis',
'strand', 'helix', 'turn', 'metalBinding', 'repeat', 'caBinding',
'topologicalDomain', 'bindingSite', 'region', 'signalPeptide',
'modifiedResidue', 'zincFinger', 'motif', 'coiledCoil', 'peptide',
'transitPeptide', 'glycosylation', 'propeptide']
for col in cols_to_change: # because in the other ones, they are 3D distance. Here, no distance calculated.
df[col] = 'nan'
df = df[get_columns.columns]
return df
def finalTouch(data):
for i in data.index:
if '*' in data.at[i, 'sasa']:
data.at[i, 'sasa'] = data.at[i, 'sasa'].split('*')[0]
data.sasa = data.sasa.replace({'N/A': np.NaN})
data.replace({' N/A': np.NaN}, inplace=True)
data.replace({'None': np.NaN, '':np.NaN}, inplace=True)
data.sasa = data.sasa.astype(float)
data = data.astype(str)
for i in data.index:
if float(data.at[i, 'sasa']) < 5:
data.at[i, 'trsh4'] = 'core'
elif float(data.at[i, 'sasa']) >= 5:
data.at[i, 'trsh4'] = 'surface'
elif data.at[i, 'sasa'] == 'nan':
data.at[i, 'trsh4'] = 'nan'
return data