File size: 4,980 Bytes
70b95b8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 |
#!/usr/bin/python
################################################################################
# retrieveBRENDA
# Acces the web client and retrieves all EC data from BRENDA. Creates files with
# BRENDA output for all organisms and EC numbers for which there is data.
#
# Benjamin Sanchez. Last edited: 2018-04-10
################################################################################
# Updated by:
# Author: LE YUAN
# This code should be run under the Python 2.7 environment
#INPUTS:
#1) Path in which you wish to store all BRENDA queries:
# output_path = '/Users/.../brenda_parser/raw_data'
output_path = '../../Data/database/brenda_ec'
#2) Last field processed (if the program was interrupted), e.g. 'KM'. If you
# want to start from scratch, leave empty:
last_field = ''
#3) Last EC number processed (if the program was interrupted), e.g. '1.2.3.4'.
# If you want to start from scratch, leave empty:
last_EC = ''
#4) E-mail in BRENDA:
email = 'youremail'
#5) Password in BRENDA:
password = 'yourpassword'
################################################################################
#extract_field: Function that extracts all BRENDA data from a specific field.
def extract_field(field,last):
#Construct list of EC numbers, based on the enzymes for which there is
#data on BRENDA:
# if field == 'KCAT':
# ECstring = client.getEcNumbersFromTurnoverNumber(credentials)
if field == 'KM':
ECstring = client.getEcNumbersFromKmValue(credentials)
elif field == 'MW':
ECstring = client.getEcNumbersFromMolecularWeight(credentials)
elif field == 'PATH':
ECstring = client.getEcNumbersFromPathway(credentials)
elif field == 'SEQ':
ECstring = client.getEcNumbersFromSequence(credentials)
elif field == 'SA':
ECstring = client.getEcNumbersFromSpecificActivity(credentials)
elif field == 'KCAT':
ECstring = client.getEcNumbersFromTurnoverNumber(credentials)
EClist = ECstring.split('!')
#Loop that retrieves data from BRENDA and saves it in txt files. Starts
#from the last EC number queried:
start = 0
for ECnumber in EClist:
#Detects the starting point (the last EC number queried):
if not start and (ECnumber == last or last == ''):
start = 1
if start:
#The code will retrieve data for all organisms:
query = credentials + ',ecNumber*' + ECnumber + '#organism*'
succes = 0
#The try/except block inside the while is to avoid timeout PROXY
#and encoding errors:
while succes < 10:
try:
file_name = 'EC' + ECnumber + '_' + field
print(file_name)
if field == 'KM':
data = client.getKmValue(query)
elif field == 'MW':
data = client.getMolecularWeight(query)
elif field == 'PATH':
data = client.getPathway(query)
elif field == 'SEQ':
data = client.getSequence(query)
elif field == 'SA':
data = client.getSpecificActivity(query)
elif field == 'KCAT':
data = client.getTurnoverNumber(query)
#Once the querie was performed succesfully, the data is
#copied in txt files:
if data:
fid = open(file_name + '.txt','w')
fid.write(data.decode('ascii','ignore'))
fid.close()
succes = 10
except:
#Let the server cool of for a bit. If after 10 times it
#still fails, the query is discarded:
time.sleep(1)
succes += 1
################################################################################
#Main script
#Change path:
import os
prev_path = os.getcwd()
os.chdir(output_path)
#Construct BRENDA client:
import string
import hashlib
from SOAPpy import SOAPProxy ## for usage without WSDL file
endpointURL = "https://www.brenda-enzymes.org/soap/brenda_server.php"
client = SOAPProxy(endpointURL)
password = hashlib.sha256(password).hexdigest()
credentials = email + ',' + password
#Information to retrieve: km, M.W., pathway, sequence, specific activity and kcat.
# fields = ['KM','MW','PATH','SA','KCAT']
fields = ['KCAT']
import time
#Loop that retrieves all fields. Starts by the last one queried:
start = 0
for field in fields:
if not start and (field == last_field or last_field == ''):
start = 1
if start:
extract_field(field,last_EC)
os.chdir(prev_path)
################################################################################
|