File size: 2,148 Bytes
70b95b8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
#!/usr/bin/python
# coding: utf-8

# Author: LE YUAN
# Date: 2020-06-16


# E-mail in BRENDA:
email = 'youremail'
# Password in BRENDA:
password = 'yourpassword'


# #Construct BRENDA client:
import string
import hashlib
import os
import json
from SOAPpy import SOAPProxy ## for usage without WSDL file
endpointURL = "https://www.brenda-enzymes.org/soap/brenda_server.php"
client      = SOAPProxy(endpointURL)
password    = hashlib.sha256(password).hexdigest()
credentials = email + ',' + password


filenames = os.listdir('../../Data/database/Kcat_brenda')
# print(len(filenames)) # 1741 EC files
i = 0

EC_organisms = dict()
for filename in filenames :
    EC = filename[2:-4]
    print(EC)
    if filename != '.DS_Store' :
        with open("../../Data/database/Kcat_brenda/%s" %(filename), 'r') as file :
            lines = file.readlines()
    organisms = list()
    for line in lines[1:] :
        data = line.strip().split('\t')
        organism = data[1]
        organisms.append(organism)
    organisms = list(set(organisms))

    organism_seqcounts = dict()
    for organism in organisms :
        print(organism)

        # parameters = "j.doe@example.edu,"+password+","+"ecNumber*1.1.1.1#organism*Mus musculus"
        # resultString = client.getSequence(parameters)

        # parameters = credentials+","+"ecNumber*1.1.1.1#organism*Homo sapiens"
        # parameters = credentials+","+"ecNumber*3.1.3.17#organism*Oryctolagus cuniculus"
        parameters = credentials+","+"ecNumber*%s#organism*%s" %(EC, organism)
        sequence = client.getSequence(parameters)
        split_sequences = sequence.strip().split('#!') #noOfAminoAcids #!
        # sequence = client.getSequence("ecNumber*1.1.1.1#organism*Mus musculus")

        # for seq in split_sequences :
        #     print(seq)
        #     print('--------------------------------')
        organism_seqcounts[organism] = len(split_sequences)
        # print(len(split_sequences))

    EC_organisms[EC] = organism_seqcounts

print(EC_organisms)
with open('../../Data/database/brenda_EC_organims_try.json', 'w') as outfile:
    json.dump(EC_organisms, outfile, indent=4)