File size: 4,980 Bytes
70b95b8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
#!/usr/bin/python
################################################################################
# retrieveBRENDA
# Acces the web client and retrieves all EC data from BRENDA. Creates files with
# BRENDA output for all organisms and EC numbers for which there is data.
#
# Benjamin Sanchez. Last edited: 2018-04-10
################################################################################

# Updated by:
# Author: LE YUAN
# This code should be run under the Python 2.7 environment


#INPUTS:
#1) Path in which you wish to store all BRENDA queries:
# output_path = '/Users/.../brenda_parser/raw_data'
output_path = '../../Data/database/brenda_ec'
#2) Last field processed (if the program was interrupted), e.g. 'KM'. If you
#   want to start from scratch, leave empty:
last_field = ''
#3) Last EC number processed (if the program was interrupted), e.g. '1.2.3.4'.
#   If you want to start from scratch, leave empty:
last_EC = ''
#4) E-mail in BRENDA:
email = 'youremail'
#5) Password in BRENDA:
password = 'yourpassword'

################################################################################

#extract_field: Function that extracts all BRENDA data from a specific field.
def extract_field(field,last):

    #Construct list of EC numbers, based on the enzymes for which there is
    #data on BRENDA:
    # if field == 'KCAT':
    #     ECstring = client.getEcNumbersFromTurnoverNumber(credentials)

    if field == 'KM':
        ECstring = client.getEcNumbersFromKmValue(credentials)

    elif field == 'MW':
        ECstring = client.getEcNumbersFromMolecularWeight(credentials)

    elif field == 'PATH':
        ECstring = client.getEcNumbersFromPathway(credentials)

    elif field == 'SEQ':
        ECstring = client.getEcNumbersFromSequence(credentials)

    elif field == 'SA':
        ECstring = client.getEcNumbersFromSpecificActivity(credentials)

    elif field == 'KCAT':
        ECstring = client.getEcNumbersFromTurnoverNumber(credentials)
  
    EClist = ECstring.split('!')

    #Loop that retrieves data from BRENDA and saves it in txt files. Starts
    #from the last EC number queried:
    start = 0
    for ECnumber in EClist:
    
        #Detects the starting point (the last EC number queried):
        if not start and (ECnumber == last or last == ''):
            start = 1
        
        if start:
            #The code will retrieve data for all organisms:
            query  = credentials + ',ecNumber*' + ECnumber + '#organism*'
            succes = 0
            
            #The try/except block inside the while is to avoid timeout PROXY
            #and encoding errors:
            while succes < 10:
                try:
                    file_name = 'EC' + ECnumber + '_' + field
                    print(file_name)

                    if field == 'KM':
                        data = client.getKmValue(query)

                    elif field == 'MW':
                        data = client.getMolecularWeight(query)

                    elif field == 'PATH':
                        data = client.getPathway(query)

                    elif field == 'SEQ':
                        data = client.getSequence(query)

                    elif field == 'SA':
                        data = client.getSpecificActivity(query)

                    elif field == 'KCAT':
                        data = client.getTurnoverNumber(query)
                    
                    #Once the querie was performed succesfully, the data is
                    #copied in txt files:    
                    if data:
                        fid = open(file_name + '.txt','w')
                        fid.write(data.decode('ascii','ignore'))
                        fid.close()

                    succes = 10
                
                except:
                    #Let the server cool of for a bit. If after 10 times it
                    #still fails, the query is discarded:
                    time.sleep(1)
                    succes += 1

################################################################################

#Main script
                    
#Change path:
import os
prev_path = os.getcwd()
os.chdir(output_path)

#Construct BRENDA client:
import string
import hashlib
from SOAPpy import SOAPProxy ## for usage without WSDL file
endpointURL = "https://www.brenda-enzymes.org/soap/brenda_server.php"
client      = SOAPProxy(endpointURL)
password    = hashlib.sha256(password).hexdigest()
credentials = email + ',' + password

#Information to retrieve: km, M.W., pathway, sequence, specific activity and kcat.
# fields = ['KM','MW','PATH','SA','KCAT']
fields = ['KCAT']
import time

#Loop that retrieves all fields. Starts by the last one queried:
start = 0
for field in fields:
    if not start and (field == last_field or last_field == ''):
        start = 1

    if start:
        extract_field(field,last_EC)

os.chdir(prev_path)

################################################################################