aksell commited on
Commit
4d025a2
1 Parent(s): 6a8b1e7

Cache parsing and folding

Browse files

Mainly for folding, to be faster if you re-submit the same
sequence.

Files changed (1) hide show
  1. app.py +27 -17
app.py CHANGED
@@ -19,38 +19,48 @@ MGLTTSGGARGFCSLAVLQELVPRPELLFVIDRAFHSGKHAVDMQVVDQEGLGDGVATLLYAHQGLYTCLLQAEARLLGR
19
  MGLTTSGGARGFCSLAVLQELVPRPELLFVIDRAFHSGKHAVDMQVVDQEGLGDGVATLLYAHQGLYTCLLQAEARLLGREWAAVPALEPNF
20
  MGAAGYTGSLILAALKQNPDIAVYALNRNDEKLKDVCGQYSNLKGQVCDLSNESQVEALLSGPRKTVVNLVGPYSFYGSRVLNACIEANCHY
21
  """
22
- input_sequence = st.text_area("Sequences separated by a newline (max 400 resis each)", default_sequences)
23
- sequences = []
24
-
25
- # Parse and clean input sequences
26
- for seq in input_sequence.split("\n"):
27
- seq = seq.strip()
28
- if len(seq) > 400:
29
- seq = seq[:400]
30
- seq = re.sub("[^ACDEFGHIKLMNPQRSTVWY]", "", seq)
31
- if len(seq) > 0:
32
- sequences.append(seq)
 
 
 
 
 
33
  st.write(f"Found {len(sequences)} valid sequences")
34
 
35
 
36
  pdb_strings = []
37
  url = "https://api.esmatlas.com/foldSequence/v1/pdb/"
38
 
39
- # Fold sequences with ESMfold
40
- for seq in sequences:
41
  retries = 0
42
  pdb_str = None
43
  while retries < 3 and pdb_str is None:
44
- response = requests.post(url, data=seq)
45
  pdb_str = response.text
46
  if pdb_str == "INTERNAL SERVER ERROR":
47
  retries += 1
48
  time.sleep(0.1)
49
  pdb_str = None
50
- if pdb_str is not None:
51
- pdb_strings.append(pdb_str)
 
 
 
 
 
52
  else:
53
- st.write("Failed to retrieve PDB structure after 3 retries")
54
 
55
 
56
 
 
19
  MGLTTSGGARGFCSLAVLQELVPRPELLFVIDRAFHSGKHAVDMQVVDQEGLGDGVATLLYAHQGLYTCLLQAEARLLGREWAAVPALEPNF
20
  MGAAGYTGSLILAALKQNPDIAVYALNRNDEKLKDVCGQYSNLKGQVCDLSNESQVEALLSGPRKTVVNLVGPYSFYGSRVLNACIEANCHY
21
  """
22
+ input_sequences = st.text_area("Sequences separated by a newline (max 400 resis each)", default_sequences)
23
+
24
+ @st.cache_data
25
+ def get_sequences(sequences_string):
26
+ sequences = []
27
+ # Parse and clean input sequences
28
+ for seq in sequences_string.split("\n"):
29
+ seq = seq.strip()
30
+ if len(seq) > 400:
31
+ seq = seq[:400]
32
+ seq = re.sub("[^ACDEFGHIKLMNPQRSTVWY]", "", seq)
33
+ if len(seq) > 0:
34
+ sequences.append(seq)
35
+ return sequences
36
+
37
+ sequences = get_sequences(input_sequences)
38
  st.write(f"Found {len(sequences)} valid sequences")
39
 
40
 
41
  pdb_strings = []
42
  url = "https://api.esmatlas.com/foldSequence/v1/pdb/"
43
 
44
+ @st.cache_data
45
+ def get_pdb(sequence):
46
  retries = 0
47
  pdb_str = None
48
  while retries < 3 and pdb_str is None:
49
+ response = requests.post(url, data=sequence)
50
  pdb_str = response.text
51
  if pdb_str == "INTERNAL SERVER ERROR":
52
  retries += 1
53
  time.sleep(0.1)
54
  pdb_str = None
55
+ return pdb_str
56
+
57
+
58
+ # Fold sequences with ESMfold
59
+ for seq in sequences:
60
+ if pdb := get_pdb(seq):
61
+ pdb_strings.append(pdb)
62
  else:
63
+ st.write(f"Failed to retrieve PDB structure from ESMFold for {seq}")
64
 
65
 
66