Spaces:

jie1
/

succ1

Build error

App Files Files Community

succ1 / DLKcat /DeeplearningApproach /Code /preprocess /brenda_retrieve.py

jie1

Upload 17 files

70b95b8 over 2 years ago

raw

history blame contribute delete

4.98 kB

	#!/usr/bin/python
	################################################################################
	# retrieveBRENDA
	# Acces the web client and retrieves all EC data from BRENDA. Creates files with
	# BRENDA output for all organisms and EC numbers for which there is data.
	#
	# Benjamin Sanchez. Last edited: 2018-04-10
	################################################################################

	# Updated by:
	# Author: LE YUAN
	# This code should be run under the Python 2.7 environment


	#INPUTS:
	#1) Path in which you wish to store all BRENDA queries:
	# output_path = '/Users/.../brenda_parser/raw_data'
	output_path = '../../Data/database/brenda_ec'
	#2) Last field processed (if the program was interrupted), e.g. 'KM'. If you
	# want to start from scratch, leave empty:
	last_field = ''
	#3) Last EC number processed (if the program was interrupted), e.g. '1.2.3.4'.
	# If you want to start from scratch, leave empty:
	last_EC = ''
	#4) E-mail in BRENDA:
	email = 'youremail'
	#5) Password in BRENDA:
	password = 'yourpassword'

	################################################################################

	#extract_field: Function that extracts all BRENDA data from a specific field.
	def extract_field(field,last):

	#Construct list of EC numbers, based on the enzymes for which there is
	#data on BRENDA:
	# if field == 'KCAT':
	# ECstring = client.getEcNumbersFromTurnoverNumber(credentials)

	if field == 'KM':
	ECstring = client.getEcNumbersFromKmValue(credentials)

	elif field == 'MW':
	ECstring = client.getEcNumbersFromMolecularWeight(credentials)

	elif field == 'PATH':
	ECstring = client.getEcNumbersFromPathway(credentials)

	elif field == 'SEQ':
	ECstring = client.getEcNumbersFromSequence(credentials)

	elif field == 'SA':
	ECstring = client.getEcNumbersFromSpecificActivity(credentials)

	elif field == 'KCAT':
	ECstring = client.getEcNumbersFromTurnoverNumber(credentials)

	EClist = ECstring.split('!')

	#Loop that retrieves data from BRENDA and saves it in txt files. Starts
	#from the last EC number queried:
	start = 0
	for ECnumber in EClist:

	#Detects the starting point (the last EC number queried):
	if not start and (ECnumber == last or last == ''):
	start = 1

	if start:
	#The code will retrieve data for all organisms:
	query = credentials + ',ecNumber' + ECnumber + '#organism'
	succes = 0

	#The try/except block inside the while is to avoid timeout PROXY
	#and encoding errors:
	while succes < 10:
	try:
	file_name = 'EC' + ECnumber + '_' + field
	print(file_name)

	if field == 'KM':
	data = client.getKmValue(query)

	elif field == 'MW':
	data = client.getMolecularWeight(query)

	elif field == 'PATH':
	data = client.getPathway(query)

	elif field == 'SEQ':
	data = client.getSequence(query)

	elif field == 'SA':
	data = client.getSpecificActivity(query)

	elif field == 'KCAT':
	data = client.getTurnoverNumber(query)

	#Once the querie was performed succesfully, the data is
	#copied in txt files:
	if data:
	fid = open(file_name + '.txt','w')
	fid.write(data.decode('ascii','ignore'))
	fid.close()

	succes = 10

	except:
	#Let the server cool of for a bit. If after 10 times it
	#still fails, the query is discarded:
	time.sleep(1)
	succes += 1

	################################################################################

	#Main script

	#Change path:
	import os
	prev_path = os.getcwd()
	os.chdir(output_path)

	#Construct BRENDA client:
	import string
	import hashlib
	from SOAPpy import SOAPProxy ## for usage without WSDL file
	endpointURL = "https://www.brenda-enzymes.org/soap/brenda_server.php"
	client = SOAPProxy(endpointURL)
	password = hashlib.sha256(password).hexdigest()
	credentials = email + ',' + password

	#Information to retrieve: km, M.W., pathway, sequence, specific activity and kcat.
	# fields = ['KM','MW','PATH','SA','KCAT']
	fields = ['KCAT']
	import time

	#Loop that retrieves all fields. Starts by the last one queried:
	start = 0
	for field in fields:
	if not start and (field == last_field or last_field == ''):
	start = 1

	if start:
	extract_field(field,last_EC)

	os.chdir(prev_path)

	################################################################################