Spaces:

jie1
/

succ1

Build error

File size: 4,980 Bytes

70b95b8

#!/usr/bin/python
################################################################################
# retrieveBRENDA
# Acces the web client and retrieves all EC data from BRENDA. Creates files with
# BRENDA output for all organisms and EC numbers for which there is data.
#
# Benjamin Sanchez. Last edited: 2018-04-10
################################################################################

# Updated by:
# Author: LE YUAN
# This code should be run under the Python 2.7 environment


#INPUTS:
#1) Path in which you wish to store all BRENDA queries:
# output_path = '/Users/.../brenda_parser/raw_data'
output_path = '../../Data/database/brenda_ec'
#2) Last field processed (if the program was interrupted), e.g. 'KM'. If you
#   want to start from scratch, leave empty:
last_field = ''
#3) Last EC number processed (if the program was interrupted), e.g. '1.2.3.4'.
#   If you want to start from scratch, leave empty:
last_EC = ''
#4) E-mail in BRENDA:
email = 'youremail'
#5) Password in BRENDA:
password = 'yourpassword'

################################################################################

#extract_field: Function that extracts all BRENDA data from a specific field.
def extract_field(field,last):

    #Construct list of EC numbers, based on the enzymes for which there is
    #data on BRENDA:
    # if field == 'KCAT':
    #     ECstring = client.getEcNumbersFromTurnoverNumber(credentials)

    if field == 'KM':
        ECstring = client.getEcNumbersFromKmValue(credentials)

    elif field == 'MW':
        ECstring = client.getEcNumbersFromMolecularWeight(credentials)

    elif field == 'PATH':
        ECstring = client.getEcNumbersFromPathway(credentials)

    elif field == 'SEQ':
        ECstring = client.getEcNumbersFromSequence(credentials)

    elif field == 'SA':
        ECstring = client.getEcNumbersFromSpecificActivity(credentials)

    elif field == 'KCAT':
        ECstring = client.getEcNumbersFromTurnoverNumber(credentials)
  
    EClist = ECstring.split('!')

    #Loop that retrieves data from BRENDA and saves it in txt files. Starts
    #from the last EC number queried:
    start = 0
    for ECnumber in EClist:
    
        #Detects the starting point (the last EC number queried):
        if not start and (ECnumber == last or last == ''):
            start = 1
        
        if start:
            #The code will retrieve data for all organisms:
            query  = credentials + ',ecNumber*' + ECnumber + '#organism*'
            succes = 0
            
            #The try/except block inside the while is to avoid timeout PROXY
            #and encoding errors:
            while succes < 10:
                try:
                    file_name = 'EC' + ECnumber + '_' + field
                    print(file_name)

                    if field == 'KM':
                        data = client.getKmValue(query)

                    elif field == 'MW':
                        data = client.getMolecularWeight(query)

                    elif field == 'PATH':
                        data = client.getPathway(query)

                    elif field == 'SEQ':
                        data = client.getSequence(query)

                    elif field == 'SA':
                        data = client.getSpecificActivity(query)

                    elif field == 'KCAT':
                        data = client.getTurnoverNumber(query)
                    
                    #Once the querie was performed succesfully, the data is
                    #copied in txt files:    
                    if data:
                        fid = open(file_name + '.txt','w')
                        fid.write(data.decode('ascii','ignore'))
                        fid.close()

                    succes = 10
                
                except:
                    #Let the server cool of for a bit. If after 10 times it
                    #still fails, the query is discarded:
                    time.sleep(1)
                    succes += 1

################################################################################

#Main script
                    
#Change path:
import os
prev_path = os.getcwd()
os.chdir(output_path)

#Construct BRENDA client:
import string
import hashlib
from SOAPpy import SOAPProxy ## for usage without WSDL file
endpointURL = "https://www.brenda-enzymes.org/soap/brenda_server.php"
client      = SOAPProxy(endpointURL)
password    = hashlib.sha256(password).hexdigest()
credentials = email + ',' + password

#Information to retrieve: km, M.W., pathway, sequence, specific activity and kcat.
# fields = ['KM','MW','PATH','SA','KCAT']
fields = ['KCAT']
import time

#Loop that retrieves all fields. Starts by the last one queried:
start = 0
for field in fields:
    if not start and (field == last_field or last_field == ''):
        start = 1

    if start:
        extract_field(field,last_EC)

os.chdir(prev_path)

################################################################################