jie1's picture
Upload 17 files
70b95b8
#!/usr/bin/python
################################################################################
# retrieveBRENDA
# Acces the web client and retrieves all EC data from BRENDA. Creates files with
# BRENDA output for all organisms and EC numbers for which there is data.
#
# Benjamin Sanchez. Last edited: 2018-04-10
################################################################################
# Updated by:
# Author: LE YUAN
# This code should be run under the Python 2.7 environment
#INPUTS:
#1) Path in which you wish to store all BRENDA queries:
# output_path = '/Users/.../brenda_parser/raw_data'
output_path = '../../Data/database/brenda_ec'
#2) Last field processed (if the program was interrupted), e.g. 'KM'. If you
# want to start from scratch, leave empty:
last_field = ''
#3) Last EC number processed (if the program was interrupted), e.g. '1.2.3.4'.
# If you want to start from scratch, leave empty:
last_EC = ''
#4) E-mail in BRENDA:
email = 'youremail'
#5) Password in BRENDA:
password = 'yourpassword'
################################################################################
#extract_field: Function that extracts all BRENDA data from a specific field.
def extract_field(field,last):
#Construct list of EC numbers, based on the enzymes for which there is
#data on BRENDA:
# if field == 'KCAT':
# ECstring = client.getEcNumbersFromTurnoverNumber(credentials)
if field == 'KM':
ECstring = client.getEcNumbersFromKmValue(credentials)
elif field == 'MW':
ECstring = client.getEcNumbersFromMolecularWeight(credentials)
elif field == 'PATH':
ECstring = client.getEcNumbersFromPathway(credentials)
elif field == 'SEQ':
ECstring = client.getEcNumbersFromSequence(credentials)
elif field == 'SA':
ECstring = client.getEcNumbersFromSpecificActivity(credentials)
elif field == 'KCAT':
ECstring = client.getEcNumbersFromTurnoverNumber(credentials)
EClist = ECstring.split('!')
#Loop that retrieves data from BRENDA and saves it in txt files. Starts
#from the last EC number queried:
start = 0
for ECnumber in EClist:
#Detects the starting point (the last EC number queried):
if not start and (ECnumber == last or last == ''):
start = 1
if start:
#The code will retrieve data for all organisms:
query = credentials + ',ecNumber*' + ECnumber + '#organism*'
succes = 0
#The try/except block inside the while is to avoid timeout PROXY
#and encoding errors:
while succes < 10:
try:
file_name = 'EC' + ECnumber + '_' + field
print(file_name)
if field == 'KM':
data = client.getKmValue(query)
elif field == 'MW':
data = client.getMolecularWeight(query)
elif field == 'PATH':
data = client.getPathway(query)
elif field == 'SEQ':
data = client.getSequence(query)
elif field == 'SA':
data = client.getSpecificActivity(query)
elif field == 'KCAT':
data = client.getTurnoverNumber(query)
#Once the querie was performed succesfully, the data is
#copied in txt files:
if data:
fid = open(file_name + '.txt','w')
fid.write(data.decode('ascii','ignore'))
fid.close()
succes = 10
except:
#Let the server cool of for a bit. If after 10 times it
#still fails, the query is discarded:
time.sleep(1)
succes += 1
################################################################################
#Main script
#Change path:
import os
prev_path = os.getcwd()
os.chdir(output_path)
#Construct BRENDA client:
import string
import hashlib
from SOAPpy import SOAPProxy ## for usage without WSDL file
endpointURL = "https://www.brenda-enzymes.org/soap/brenda_server.php"
client = SOAPProxy(endpointURL)
password = hashlib.sha256(password).hexdigest()
credentials = email + ',' + password
#Information to retrieve: km, M.W., pathway, sequence, specific activity and kcat.
# fields = ['KM','MW','PATH','SA','KCAT']
fields = ['KCAT']
import time
#Loop that retrieves all fields. Starts by the last one queried:
start = 0
for field in fields:
if not start and (field == last_field or last_field == ''):
start = 1
if start:
extract_field(field,last_EC)
os.chdir(prev_path)
################################################################################