Spaces:
Running
Running
Erva Ulusoy
commited on
Commit
·
873150b
1
Parent(s):
4cc17e2
merge overlapping domain locations
Browse files- run_domain2go_app.py +14 -5
run_domain2go_app.py
CHANGED
@@ -4,6 +4,7 @@ from Bio import SeqIO
|
|
4 |
import os
|
5 |
import time
|
6 |
import pandas as pd
|
|
|
7 |
|
8 |
def find_domains(email, sequence, name):
|
9 |
|
@@ -72,10 +73,10 @@ def find_domains(email, sequence, name):
|
|
72 |
entries[entry['accession']]['locations'].extend(location_list)
|
73 |
|
74 |
entries[entry['accession']]['locations'] = list(set(entries[entry['accession']]['locations']))
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
if entries:
|
80 |
result_text = 'Domains found.'
|
81 |
|
@@ -92,6 +93,14 @@ def find_domains(email, sequence, name):
|
|
92 |
|
93 |
# generate protein function predictions based on domain2go mappings
|
94 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
95 |
def generate_function_predictions(domains_df, mapping_path):
|
96 |
|
97 |
# read domain2go mappings
|
@@ -115,4 +124,4 @@ def generate_function_predictions(domains_df, mapping_path):
|
|
115 |
# save protein function predictions
|
116 |
protein_name = domains_df['protein_name'].iloc[0]
|
117 |
result_text= 'Function predictions found.'
|
118 |
-
return [result_text, merged_df]
|
|
|
4 |
import os
|
5 |
import time
|
6 |
import pandas as pd
|
7 |
+
import intervaltree
|
8 |
|
9 |
def find_domains(email, sequence, name):
|
10 |
|
|
|
73 |
entries[entry['accession']]['locations'].extend(location_list)
|
74 |
|
75 |
entries[entry['accession']]['locations'] = list(set(entries[entry['accession']]['locations']))
|
76 |
+
if len(entries[entry['accession']]['locations']) > 1:
|
77 |
+
entries[entry['accession']]['locations'] = merge_locations(entries[entry['accession']]['locations'])
|
78 |
+
entries[entry['accession']]['locations'] = sorted([i.split('-') for i in entries[entry['accession']]['locations']], key=lambda x: (int(x[0]), int(x[1])))
|
79 |
+
entries[entry['accession']]['locations'] = ['-'.join(i) for i in entries[entry['accession']]['locations']]
|
80 |
if entries:
|
81 |
result_text = 'Domains found.'
|
82 |
|
|
|
93 |
|
94 |
# generate protein function predictions based on domain2go mappings
|
95 |
|
96 |
+
|
97 |
+
def merge_locations(locations):
|
98 |
+
temp_locs = [i.split('-') for i in locations]
|
99 |
+
tree = intervaltree.IntervalTree.from_tuples(temp_locs)
|
100 |
+
tree.merge_overlaps()
|
101 |
+
merged_locations = ['-'.join([i.begin, i.end]) for i in tree]
|
102 |
+
return merged_locations
|
103 |
+
|
104 |
def generate_function_predictions(domains_df, mapping_path):
|
105 |
|
106 |
# read domain2go mappings
|
|
|
124 |
# save protein function predictions
|
125 |
protein_name = domains_df['protein_name'].iloc[0]
|
126 |
result_text= 'Function predictions found.'
|
127 |
+
return [result_text, merged_df]
|