File size: 1,519 Bytes
6e2e202
 
c2a02c6
 
 
 
 
 
 
 
 
 
 
 
 
debc743
6e2e202
debc743
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
import numpy as np

def standardize(df, get_columns):
    cols_to_change = ['sasa', 'domaindistance3D', 'disulfide', 'intMet', 'intramembrane',
                      'naturalVariant', 'dnaBinding', 'activeSite', 'nucleotideBinding',
                      'lipidation', 'site', 'transmembrane', 'crosslink', 'mutagenesis',
                      'strand', 'helix', 'turn', 'metalBinding', 'repeat', 'caBinding',
                      'topologicalDomain', 'bindingSite', 'region', 'signalPeptide',
                      'modifiedResidue', 'zincFinger', 'motif', 'coiledCoil', 'peptide',
                      'transitPeptide', 'glycosylation', 'propeptide']
    for col in cols_to_change:  # because in the other ones, they are 3D distance. Here, no distance calculated.
        df[col] = 'nan'
    df = df[get_columns.columns]

    return df


def finalTouch(data):
    for i in data.index:
        if '*' in data.at[i, 'sasa']:
            data.at[i, 'sasa'] = data.at[i, 'sasa'].split('*')[0]
    data.sasa = data.sasa.replace({'N/A': np.NaN})
    data.replace({'   N/A': np.NaN}, inplace=True)
    data.replace({'None': np.NaN, '':np.NaN}, inplace=True)
    data.sasa = data.sasa.astype(float)
    data = data.astype(str)
    for i in data.index:
        if float(data.at[i, 'sasa']) < 5:
            data.at[i, 'trsh4'] = 'core'
        elif float(data.at[i, 'sasa']) >= 5:
            data.at[i, 'trsh4'] = 'surface'
        elif data.at[i, 'sasa'] == 'nan':
            data.at[i, 'trsh4'] = 'nan'


    return data