Simon Duerr commited on
Commit
85bd48b
1 Parent(s): e65166b

add fast af

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. LICENSE +1 -1
  2. af_backprop/README.md +6 -0
  3. af_backprop/alphafold/__init__.py +14 -0
  4. af_backprop/alphafold/common/__init__.py +14 -0
  5. af_backprop/alphafold/common/confidence.py +155 -0
  6. af_backprop/alphafold/common/protein.py +229 -0
  7. af_backprop/alphafold/common/residue_constants.py +911 -0
  8. af_backprop/alphafold/data/__init__.py +14 -0
  9. af_backprop/alphafold/data/mmcif_parsing.py +384 -0
  10. af_backprop/alphafold/data/parsers.py +364 -0
  11. af_backprop/alphafold/data/pipeline.py +209 -0
  12. af_backprop/alphafold/data/prep_inputs.py +133 -0
  13. af_backprop/alphafold/data/templates.py +910 -0
  14. af_backprop/alphafold/data/tools/__init__.py +14 -0
  15. af_backprop/alphafold/data/tools/hhblits.py +155 -0
  16. af_backprop/alphafold/data/tools/hhsearch.py +91 -0
  17. af_backprop/alphafold/data/tools/hmmbuild.py +138 -0
  18. af_backprop/alphafold/data/tools/hmmsearch.py +90 -0
  19. af_backprop/alphafold/data/tools/jackhmmer.py +198 -0
  20. af_backprop/alphafold/data/tools/kalign.py +104 -0
  21. af_backprop/alphafold/data/tools/utils.py +40 -0
  22. af_backprop/alphafold/model/__init__.py +14 -0
  23. af_backprop/alphafold/model/all_atom.py +1155 -0
  24. af_backprop/alphafold/model/common_modules.py +84 -0
  25. af_backprop/alphafold/model/config.py +412 -0
  26. af_backprop/alphafold/model/data.py +39 -0
  27. af_backprop/alphafold/model/features.py +102 -0
  28. af_backprop/alphafold/model/folding.py +1016 -0
  29. af_backprop/alphafold/model/layer_stack.py +274 -0
  30. af_backprop/alphafold/model/lddt.py +88 -0
  31. af_backprop/alphafold/model/mapping.py +218 -0
  32. af_backprop/alphafold/model/model.py +145 -0
  33. af_backprop/alphafold/model/modules.py +2164 -0
  34. af_backprop/alphafold/model/prng.py +70 -0
  35. af_backprop/alphafold/model/quat_affine.py +459 -0
  36. af_backprop/alphafold/model/r3.py +320 -0
  37. af_backprop/alphafold/model/tf/__init__.py +14 -0
  38. af_backprop/alphafold/model/tf/data_transforms.py +625 -0
  39. af_backprop/alphafold/model/tf/input_pipeline.py +166 -0
  40. af_backprop/alphafold/model/tf/protein_features.py +129 -0
  41. af_backprop/alphafold/model/tf/proteins_dataset.py +166 -0
  42. af_backprop/alphafold/model/tf/shape_helpers.py +47 -0
  43. af_backprop/alphafold/model/tf/shape_placeholders.py +20 -0
  44. af_backprop/alphafold/model/tf/utils.py +47 -0
  45. af_backprop/alphafold/model/utils.py +81 -0
  46. af_backprop/examples/AlphaFold_single.ipynb +311 -0
  47. af_backprop/examples/af_design.ipynb +41 -0
  48. af_backprop/examples/fixbb_design.ipynb +29 -0
  49. af_backprop/examples/sc_hall/1QJG.pdb +1156 -0
  50. af_backprop/examples/sc_hall/1QJS_starting.pdb +880 -0
LICENSE CHANGED
@@ -1,6 +1,6 @@
1
  MIT License
2
 
3
- Copyright (c) 2022 Justas Dauparas, Simon Duerr
4
 
5
  Permission is hereby granted, free of charge, to any person obtaining a copy
6
  of this software and associated documentation files (the "Software"), to deal
 
1
  MIT License
2
 
3
+ Copyright (c) 2022 Justas Dauparas,Sergey Ovichinnikov, Simon Duerr
4
 
5
  Permission is hereby granted, free of charge, to any person obtaining a copy
6
  of this software and associated documentation files (the "Software"), to deal
af_backprop/README.md ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ # af_backprop
2
+ various modifications to alphafold to allow backprop through the model
3
+
4
+ ### projects that use af_backprop
5
+ - [SMURF](https://github.com/spetti/SMURF): End-to-end learning of multiple sequence alignments with differentiable Smith-Waterman
6
+ - [ColabDesign](https://github.com/sokrypton/ColabDesign): Making Protein Design accessible to all via Google Colab!
af_backprop/alphafold/__init__.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2021 DeepMind Technologies Limited
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ """An implementation of the inference pipeline of AlphaFold v2.0."""
af_backprop/alphafold/common/__init__.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2021 DeepMind Technologies Limited
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ """Common data types and constants used within Alphafold."""
af_backprop/alphafold/common/confidence.py ADDED
@@ -0,0 +1,155 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2021 DeepMind Technologies Limited
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ """Functions for processing confidence metrics."""
16
+
17
+ from typing import Dict, Optional, Tuple
18
+ import numpy as np
19
+ import scipy.special
20
+
21
+
22
+ def compute_plddt(logits: np.ndarray) -> np.ndarray:
23
+ """Computes per-residue pLDDT from logits.
24
+
25
+ Args:
26
+ logits: [num_res, num_bins] output from the PredictedLDDTHead.
27
+
28
+ Returns:
29
+ plddt: [num_res] per-residue pLDDT.
30
+ """
31
+ num_bins = logits.shape[-1]
32
+ bin_width = 1.0 / num_bins
33
+ bin_centers = np.arange(start=0.5 * bin_width, stop=1.0, step=bin_width)
34
+ probs = scipy.special.softmax(logits, axis=-1)
35
+ predicted_lddt_ca = np.sum(probs * bin_centers[None, :], axis=-1)
36
+ return predicted_lddt_ca * 100
37
+
38
+
39
+ def _calculate_bin_centers(breaks: np.ndarray):
40
+ """Gets the bin centers from the bin edges.
41
+
42
+ Args:
43
+ breaks: [num_bins - 1] the error bin edges.
44
+
45
+ Returns:
46
+ bin_centers: [num_bins] the error bin centers.
47
+ """
48
+ step = (breaks[1] - breaks[0])
49
+
50
+ # Add half-step to get the center
51
+ bin_centers = breaks + step / 2
52
+ # Add a catch-all bin at the end.
53
+ bin_centers = np.concatenate([bin_centers, [bin_centers[-1] + step]],
54
+ axis=0)
55
+ return bin_centers
56
+
57
+
58
+ def _calculate_expected_aligned_error(
59
+ alignment_confidence_breaks: np.ndarray,
60
+ aligned_distance_error_probs: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
61
+ """Calculates expected aligned distance errors for every pair of residues.
62
+
63
+ Args:
64
+ alignment_confidence_breaks: [num_bins - 1] the error bin edges.
65
+ aligned_distance_error_probs: [num_res, num_res, num_bins] the predicted
66
+ probs for each error bin, for each pair of residues.
67
+
68
+ Returns:
69
+ predicted_aligned_error: [num_res, num_res] the expected aligned distance
70
+ error for each pair of residues.
71
+ max_predicted_aligned_error: The maximum predicted error possible.
72
+ """
73
+ bin_centers = _calculate_bin_centers(alignment_confidence_breaks)
74
+
75
+ # Tuple of expected aligned distance error and max possible error.
76
+ return (np.sum(aligned_distance_error_probs * bin_centers, axis=-1),
77
+ np.asarray(bin_centers[-1]))
78
+
79
+
80
+ def compute_predicted_aligned_error(
81
+ logits: np.ndarray,
82
+ breaks: np.ndarray) -> Dict[str, np.ndarray]:
83
+ """Computes aligned confidence metrics from logits.
84
+
85
+ Args:
86
+ logits: [num_res, num_res, num_bins] the logits output from
87
+ PredictedAlignedErrorHead.
88
+ breaks: [num_bins - 1] the error bin edges.
89
+
90
+ Returns:
91
+ aligned_confidence_probs: [num_res, num_res, num_bins] the predicted
92
+ aligned error probabilities over bins for each residue pair.
93
+ predicted_aligned_error: [num_res, num_res] the expected aligned distance
94
+ error for each pair of residues.
95
+ max_predicted_aligned_error: The maximum predicted error possible.
96
+ """
97
+ aligned_confidence_probs = scipy.special.softmax(
98
+ logits,
99
+ axis=-1)
100
+ predicted_aligned_error, max_predicted_aligned_error = (
101
+ _calculate_expected_aligned_error(
102
+ alignment_confidence_breaks=breaks,
103
+ aligned_distance_error_probs=aligned_confidence_probs))
104
+ return {
105
+ 'aligned_confidence_probs': aligned_confidence_probs,
106
+ 'predicted_aligned_error': predicted_aligned_error,
107
+ 'max_predicted_aligned_error': max_predicted_aligned_error,
108
+ }
109
+
110
+
111
+ def predicted_tm_score(
112
+ logits: np.ndarray,
113
+ breaks: np.ndarray,
114
+ residue_weights: Optional[np.ndarray] = None) -> np.ndarray:
115
+ """Computes predicted TM alignment score.
116
+
117
+ Args:
118
+ logits: [num_res, num_res, num_bins] the logits output from
119
+ PredictedAlignedErrorHead.
120
+ breaks: [num_bins] the error bins.
121
+ residue_weights: [num_res] the per residue weights to use for the
122
+ expectation.
123
+
124
+ Returns:
125
+ ptm_score: the predicted TM alignment score.
126
+ """
127
+
128
+ # residue_weights has to be in [0, 1], but can be floating-point, i.e. the
129
+ # exp. resolved head's probability.
130
+ if residue_weights is None:
131
+ residue_weights = np.ones(logits.shape[0])
132
+
133
+ bin_centers = _calculate_bin_centers(breaks)
134
+
135
+ num_res = np.sum(residue_weights)
136
+ # Clip num_res to avoid negative/undefined d0.
137
+ clipped_num_res = max(num_res, 19)
138
+
139
+ # Compute d_0(num_res) as defined by TM-score, eqn. (5) in
140
+ # http://zhanglab.ccmb.med.umich.edu/papers/2004_3.pdf
141
+ # Yang & Skolnick "Scoring function for automated
142
+ # assessment of protein structure template quality" 2004
143
+ d0 = 1.24 * (clipped_num_res - 15) ** (1./3) - 1.8
144
+
145
+ # Convert logits to probs
146
+ probs = scipy.special.softmax(logits, axis=-1)
147
+
148
+ # TM-Score term for every bin
149
+ tm_per_bin = 1. / (1 + np.square(bin_centers) / np.square(d0))
150
+ # E_distances tm(distance)
151
+ predicted_tm_term = np.sum(probs * tm_per_bin, axis=-1)
152
+
153
+ normed_residue_mask = residue_weights / (1e-8 + residue_weights.sum())
154
+ per_alignment = np.sum(predicted_tm_term * normed_residue_mask, axis=-1)
155
+ return np.asarray(per_alignment[(per_alignment * residue_weights).argmax()])
af_backprop/alphafold/common/protein.py ADDED
@@ -0,0 +1,229 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2021 DeepMind Technologies Limited
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ """Protein data type."""
16
+ import dataclasses
17
+ import io
18
+ from typing import Any, Mapping, Optional
19
+ from alphafold.common import residue_constants
20
+ from Bio.PDB import PDBParser
21
+ import numpy as np
22
+
23
+ FeatureDict = Mapping[str, np.ndarray]
24
+ ModelOutput = Mapping[str, Any] # Is a nested dict.
25
+
26
+
27
+ @dataclasses.dataclass(frozen=True)
28
+ class Protein:
29
+ """Protein structure representation."""
30
+
31
+ # Cartesian coordinates of atoms in angstroms. The atom types correspond to
32
+ # residue_constants.atom_types, i.e. the first three are N, CA, CB.
33
+ atom_positions: np.ndarray # [num_res, num_atom_type, 3]
34
+
35
+ # Amino-acid type for each residue represented as an integer between 0 and
36
+ # 20, where 20 is 'X'.
37
+ aatype: np.ndarray # [num_res]
38
+
39
+ # Binary float mask to indicate presence of a particular atom. 1.0 if an atom
40
+ # is present and 0.0 if not. This should be used for loss masking.
41
+ atom_mask: np.ndarray # [num_res, num_atom_type]
42
+
43
+ # Residue index as used in PDB. It is not necessarily continuous or 0-indexed.
44
+ residue_index: np.ndarray # [num_res]
45
+
46
+ # B-factors, or temperature factors, of each residue (in sq. angstroms units),
47
+ # representing the displacement of the residue from its ground truth mean
48
+ # value.
49
+ b_factors: np.ndarray # [num_res, num_atom_type]
50
+
51
+
52
+ def from_pdb_string(pdb_str: str, chain_id: Optional[str] = None) -> Protein:
53
+ """Takes a PDB string and constructs a Protein object.
54
+
55
+ WARNING: All non-standard residue types will be converted into UNK. All
56
+ non-standard atoms will be ignored.
57
+
58
+ Args:
59
+ pdb_str: The contents of the pdb file
60
+ chain_id: If None, then the pdb file must contain a single chain (which
61
+ will be parsed). If chain_id is specified (e.g. A), then only that chain
62
+ is parsed.
63
+
64
+ Returns:
65
+ A new `Protein` parsed from the pdb contents.
66
+ """
67
+ pdb_fh = io.StringIO(pdb_str)
68
+ parser = PDBParser(QUIET=True)
69
+ structure = parser.get_structure('none', pdb_fh)
70
+ models = list(structure.get_models())
71
+ if len(models) != 1:
72
+ raise ValueError(
73
+ f'Only single model PDBs are supported. Found {len(models)} models.')
74
+ model = models[0]
75
+
76
+ if chain_id is not None:
77
+ chain = model[chain_id]
78
+ else:
79
+ chains = list(model.get_chains())
80
+ if len(chains) != 1:
81
+ raise ValueError(
82
+ 'Only single chain PDBs are supported when chain_id not specified. '
83
+ f'Found {len(chains)} chains.')
84
+ else:
85
+ chain = chains[0]
86
+
87
+ atom_positions = []
88
+ aatype = []
89
+ atom_mask = []
90
+ residue_index = []
91
+ b_factors = []
92
+
93
+ for res in chain:
94
+ if res.id[2] != ' ':
95
+ raise ValueError(
96
+ f'PDB contains an insertion code at chain {chain.id} and residue '
97
+ f'index {res.id[1]}. These are not supported.')
98
+ res_shortname = residue_constants.restype_3to1.get(res.resname, 'X')
99
+ restype_idx = residue_constants.restype_order.get(
100
+ res_shortname, residue_constants.restype_num)
101
+ pos = np.zeros((residue_constants.atom_type_num, 3))
102
+ mask = np.zeros((residue_constants.atom_type_num,))
103
+ res_b_factors = np.zeros((residue_constants.atom_type_num,))
104
+ for atom in res:
105
+ if atom.name not in residue_constants.atom_types:
106
+ continue
107
+ pos[residue_constants.atom_order[atom.name]] = atom.coord
108
+ mask[residue_constants.atom_order[atom.name]] = 1.
109
+ res_b_factors[residue_constants.atom_order[atom.name]] = atom.bfactor
110
+ if np.sum(mask) < 0.5:
111
+ # If no known atom positions are reported for the residue then skip it.
112
+ continue
113
+ aatype.append(restype_idx)
114
+ atom_positions.append(pos)
115
+ atom_mask.append(mask)
116
+ residue_index.append(res.id[1])
117
+ b_factors.append(res_b_factors)
118
+
119
+ return Protein(
120
+ atom_positions=np.array(atom_positions),
121
+ atom_mask=np.array(atom_mask),
122
+ aatype=np.array(aatype),
123
+ residue_index=np.array(residue_index),
124
+ b_factors=np.array(b_factors))
125
+
126
+
127
+ def to_pdb(prot: Protein) -> str:
128
+ """Converts a `Protein` instance to a PDB string.
129
+
130
+ Args:
131
+ prot: The protein to convert to PDB.
132
+
133
+ Returns:
134
+ PDB string.
135
+ """
136
+ restypes = residue_constants.restypes + ['X']
137
+ res_1to3 = lambda r: residue_constants.restype_1to3.get(restypes[r], 'UNK')
138
+ atom_types = residue_constants.atom_types
139
+
140
+ pdb_lines = []
141
+
142
+ atom_mask = prot.atom_mask
143
+ aatype = prot.aatype
144
+ atom_positions = prot.atom_positions
145
+ residue_index = prot.residue_index.astype(np.int32)
146
+ b_factors = prot.b_factors
147
+
148
+ if np.any(aatype > residue_constants.restype_num):
149
+ raise ValueError('Invalid aatypes.')
150
+
151
+ pdb_lines.append('MODEL 1')
152
+ atom_index = 1
153
+ chain_id = 'A'
154
+ # Add all atom sites.
155
+ for i in range(aatype.shape[0]):
156
+ res_name_3 = res_1to3(aatype[i])
157
+ for atom_name, pos, mask, b_factor in zip(
158
+ atom_types, atom_positions[i], atom_mask[i], b_factors[i]):
159
+ if mask < 0.5:
160
+ continue
161
+
162
+ record_type = 'ATOM'
163
+ name = atom_name if len(atom_name) == 4 else f' {atom_name}'
164
+ alt_loc = ''
165
+ insertion_code = ''
166
+ occupancy = 1.00
167
+ element = atom_name[0] # Protein supports only C, N, O, S, this works.
168
+ charge = ''
169
+ # PDB is a columnar format, every space matters here!
170
+ atom_line = (f'{record_type:<6}{atom_index:>5} {name:<4}{alt_loc:>1}'
171
+ f'{res_name_3:>3} {chain_id:>1}'
172
+ f'{residue_index[i]:>4}{insertion_code:>1} '
173
+ f'{pos[0]:>8.3f}{pos[1]:>8.3f}{pos[2]:>8.3f}'
174
+ f'{occupancy:>6.2f}{b_factor:>6.2f} '
175
+ f'{element:>2}{charge:>2}')
176
+ pdb_lines.append(atom_line)
177
+ atom_index += 1
178
+
179
+ # Close the chain.
180
+ chain_end = 'TER'
181
+ chain_termination_line = (
182
+ f'{chain_end:<6}{atom_index:>5} {res_1to3(aatype[-1]):>3} '
183
+ f'{chain_id:>1}{residue_index[-1]:>4}')
184
+ pdb_lines.append(chain_termination_line)
185
+ pdb_lines.append('ENDMDL')
186
+
187
+ pdb_lines.append('END')
188
+ pdb_lines.append('')
189
+ return '\n'.join(pdb_lines)
190
+
191
+
192
+ def ideal_atom_mask(prot: Protein) -> np.ndarray:
193
+ """Computes an ideal atom mask.
194
+
195
+ `Protein.atom_mask` typically is defined according to the atoms that are
196
+ reported in the PDB. This function computes a mask according to heavy atoms
197
+ that should be present in the given sequence of amino acids.
198
+
199
+ Args:
200
+ prot: `Protein` whose fields are `numpy.ndarray` objects.
201
+
202
+ Returns:
203
+ An ideal atom mask.
204
+ """
205
+ return residue_constants.STANDARD_ATOM_MASK[prot.aatype]
206
+
207
+
208
+ def from_prediction(features: FeatureDict, result: ModelOutput,
209
+ b_factors: Optional[np.ndarray] = None) -> Protein:
210
+ """Assembles a protein from a prediction.
211
+
212
+ Args:
213
+ features: Dictionary holding model inputs.
214
+ result: Dictionary holding model outputs.
215
+ b_factors: (Optional) B-factors to use for the protein.
216
+
217
+ Returns:
218
+ A protein instance.
219
+ """
220
+ fold_output = result['structure_module']
221
+ if b_factors is None:
222
+ b_factors = np.zeros_like(fold_output['final_atom_mask'])
223
+
224
+ return Protein(
225
+ aatype=features['aatype'][0],
226
+ atom_positions=fold_output['final_atom_positions'],
227
+ atom_mask=fold_output['final_atom_mask'],
228
+ residue_index=features['residue_index'][0] + 1,
229
+ b_factors=b_factors)
af_backprop/alphafold/common/residue_constants.py ADDED
@@ -0,0 +1,911 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2021 DeepMind Technologies Limited
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ """Constants used in AlphaFold."""
16
+
17
+ import collections
18
+ import functools
19
+ from typing import List, Mapping, Tuple
20
+
21
+ import numpy as np
22
+ import tree
23
+
24
+ # Internal import (35fd).
25
+
26
+
27
+ # Distance from one CA to next CA [trans configuration: omega = 180].
28
+ ca_ca = 3.80209737096
29
+
30
+ # Format: The list for each AA type contains chi1, chi2, chi3, chi4 in
31
+ # this order (or a relevant subset from chi1 onwards). ALA and GLY don't have
32
+ # chi angles so their chi angle lists are empty.
33
+ chi_angles_atoms = {
34
+ 'ALA': [],
35
+ # Chi5 in arginine is always 0 +- 5 degrees, so ignore it.
36
+ 'ARG': [['N', 'CA', 'CB', 'CG'], ['CA', 'CB', 'CG', 'CD'],
37
+ ['CB', 'CG', 'CD', 'NE'], ['CG', 'CD', 'NE', 'CZ']],
38
+ 'ASN': [['N', 'CA', 'CB', 'CG'], ['CA', 'CB', 'CG', 'OD1']],
39
+ 'ASP': [['N', 'CA', 'CB', 'CG'], ['CA', 'CB', 'CG', 'OD1']],
40
+ 'CYS': [['N', 'CA', 'CB', 'SG']],
41
+ 'GLN': [['N', 'CA', 'CB', 'CG'], ['CA', 'CB', 'CG', 'CD'],
42
+ ['CB', 'CG', 'CD', 'OE1']],
43
+ 'GLU': [['N', 'CA', 'CB', 'CG'], ['CA', 'CB', 'CG', 'CD'],
44
+ ['CB', 'CG', 'CD', 'OE1']],
45
+ 'GLY': [],
46
+ 'HIS': [['N', 'CA', 'CB', 'CG'], ['CA', 'CB', 'CG', 'ND1']],
47
+ 'ILE': [['N', 'CA', 'CB', 'CG1'], ['CA', 'CB', 'CG1', 'CD1']],
48
+ 'LEU': [['N', 'CA', 'CB', 'CG'], ['CA', 'CB', 'CG', 'CD1']],
49
+ 'LYS': [['N', 'CA', 'CB', 'CG'], ['CA', 'CB', 'CG', 'CD'],
50
+ ['CB', 'CG', 'CD', 'CE'], ['CG', 'CD', 'CE', 'NZ']],
51
+ 'MET': [['N', 'CA', 'CB', 'CG'], ['CA', 'CB', 'CG', 'SD'],
52
+ ['CB', 'CG', 'SD', 'CE']],
53
+ 'PHE': [['N', 'CA', 'CB', 'CG'], ['CA', 'CB', 'CG', 'CD1']],
54
+ 'PRO': [['N', 'CA', 'CB', 'CG'], ['CA', 'CB', 'CG', 'CD']],
55
+ 'SER': [['N', 'CA', 'CB', 'OG']],
56
+ 'THR': [['N', 'CA', 'CB', 'OG1']],
57
+ 'TRP': [['N', 'CA', 'CB', 'CG'], ['CA', 'CB', 'CG', 'CD1']],
58
+ 'TYR': [['N', 'CA', 'CB', 'CG'], ['CA', 'CB', 'CG', 'CD1']],
59
+ 'VAL': [['N', 'CA', 'CB', 'CG1']],
60
+ }
61
+
62
+ # If chi angles given in fixed-length array, this matrix determines how to mask
63
+ # them for each AA type. The order is as per restype_order (see below).
64
+ chi_angles_mask = [
65
+ [0.0, 0.0, 0.0, 0.0], # ALA
66
+ [1.0, 1.0, 1.0, 1.0], # ARG
67
+ [1.0, 1.0, 0.0, 0.0], # ASN
68
+ [1.0, 1.0, 0.0, 0.0], # ASP
69
+ [1.0, 0.0, 0.0, 0.0], # CYS
70
+ [1.0, 1.0, 1.0, 0.0], # GLN
71
+ [1.0, 1.0, 1.0, 0.0], # GLU
72
+ [0.0, 0.0, 0.0, 0.0], # GLY
73
+ [1.0, 1.0, 0.0, 0.0], # HIS
74
+ [1.0, 1.0, 0.0, 0.0], # ILE
75
+ [1.0, 1.0, 0.0, 0.0], # LEU
76
+ [1.0, 1.0, 1.0, 1.0], # LYS
77
+ [1.0, 1.0, 1.0, 0.0], # MET
78
+ [1.0, 1.0, 0.0, 0.0], # PHE
79
+ [1.0, 1.0, 0.0, 0.0], # PRO
80
+ [1.0, 0.0, 0.0, 0.0], # SER
81
+ [1.0, 0.0, 0.0, 0.0], # THR
82
+ [1.0, 1.0, 0.0, 0.0], # TRP
83
+ [1.0, 1.0, 0.0, 0.0], # TYR
84
+ [1.0, 0.0, 0.0, 0.0], # VAL
85
+ ]
86
+
87
+ # The following chi angles are pi periodic: they can be rotated by a multiple
88
+ # of pi without affecting the structure.
89
+ chi_pi_periodic = [
90
+ [0.0, 0.0, 0.0, 0.0], # ALA
91
+ [0.0, 0.0, 0.0, 0.0], # ARG
92
+ [0.0, 0.0, 0.0, 0.0], # ASN
93
+ [0.0, 1.0, 0.0, 0.0], # ASP
94
+ [0.0, 0.0, 0.0, 0.0], # CYS
95
+ [0.0, 0.0, 0.0, 0.0], # GLN
96
+ [0.0, 0.0, 1.0, 0.0], # GLU
97
+ [0.0, 0.0, 0.0, 0.0], # GLY
98
+ [0.0, 0.0, 0.0, 0.0], # HIS
99
+ [0.0, 0.0, 0.0, 0.0], # ILE
100
+ [0.0, 0.0, 0.0, 0.0], # LEU
101
+ [0.0, 0.0, 0.0, 0.0], # LYS
102
+ [0.0, 0.0, 0.0, 0.0], # MET
103
+ [0.0, 1.0, 0.0, 0.0], # PHE
104
+ [0.0, 0.0, 0.0, 0.0], # PRO
105
+ [0.0, 0.0, 0.0, 0.0], # SER
106
+ [0.0, 0.0, 0.0, 0.0], # THR
107
+ [0.0, 0.0, 0.0, 0.0], # TRP
108
+ [0.0, 1.0, 0.0, 0.0], # TYR
109
+ [0.0, 0.0, 0.0, 0.0], # VAL
110
+ [0.0, 0.0, 0.0, 0.0], # UNK
111
+ ]
112
+
113
+ # Atoms positions relative to the 8 rigid groups, defined by the pre-omega, phi,
114
+ # psi and chi angles:
115
+ # 0: 'backbone group',
116
+ # 1: 'pre-omega-group', (empty)
117
+ # 2: 'phi-group', (currently empty, because it defines only hydrogens)
118
+ # 3: 'psi-group',
119
+ # 4,5,6,7: 'chi1,2,3,4-group'
120
+ # The atom positions are relative to the axis-end-atom of the corresponding
121
+ # rotation axis. The x-axis is in direction of the rotation axis, and the y-axis
122
+ # is defined such that the dihedral-angle-definiting atom (the last entry in
123
+ # chi_angles_atoms above) is in the xy-plane (with a positive y-coordinate).
124
+ # format: [atomname, group_idx, rel_position]
125
+ rigid_group_atom_positions = {
126
+ 'ALA': [
127
+ ['N', 0, (-0.525, 1.363, 0.000)],
128
+ ['CA', 0, (0.000, 0.000, 0.000)],
129
+ ['C', 0, (1.526, -0.000, -0.000)],
130
+ ['CB', 0, (-0.529, -0.774, -1.205)],
131
+ ['O', 3, (0.627, 1.062, 0.000)],
132
+ ],
133
+ 'ARG': [
134
+ ['N', 0, (-0.524, 1.362, -0.000)],
135
+ ['CA', 0, (0.000, 0.000, 0.000)],
136
+ ['C', 0, (1.525, -0.000, -0.000)],
137
+ ['CB', 0, (-0.524, -0.778, -1.209)],
138
+ ['O', 3, (0.626, 1.062, 0.000)],
139
+ ['CG', 4, (0.616, 1.390, -0.000)],
140
+ ['CD', 5, (0.564, 1.414, 0.000)],
141
+ ['NE', 6, (0.539, 1.357, -0.000)],
142
+ ['NH1', 7, (0.206, 2.301, 0.000)],
143
+ ['NH2', 7, (2.078, 0.978, -0.000)],
144
+ ['CZ', 7, (0.758, 1.093, -0.000)],
145
+ ],
146
+ 'ASN': [
147
+ ['N', 0, (-0.536, 1.357, 0.000)],
148
+ ['CA', 0, (0.000, 0.000, 0.000)],
149
+ ['C', 0, (1.526, -0.000, -0.000)],
150
+ ['CB', 0, (-0.531, -0.787, -1.200)],
151
+ ['O', 3, (0.625, 1.062, 0.000)],
152
+ ['CG', 4, (0.584, 1.399, 0.000)],
153
+ ['ND2', 5, (0.593, -1.188, 0.001)],
154
+ ['OD1', 5, (0.633, 1.059, 0.000)],
155
+ ],
156
+ 'ASP': [
157
+ ['N', 0, (-0.525, 1.362, -0.000)],
158
+ ['CA', 0, (0.000, 0.000, 0.000)],
159
+ ['C', 0, (1.527, 0.000, -0.000)],
160
+ ['CB', 0, (-0.526, -0.778, -1.208)],
161
+ ['O', 3, (0.626, 1.062, -0.000)],
162
+ ['CG', 4, (0.593, 1.398, -0.000)],
163
+ ['OD1', 5, (0.610, 1.091, 0.000)],
164
+ ['OD2', 5, (0.592, -1.101, -0.003)],
165
+ ],
166
+ 'CYS': [
167
+ ['N', 0, (-0.522, 1.362, -0.000)],
168
+ ['CA', 0, (0.000, 0.000, 0.000)],
169
+ ['C', 0, (1.524, 0.000, 0.000)],
170
+ ['CB', 0, (-0.519, -0.773, -1.212)],
171
+ ['O', 3, (0.625, 1.062, -0.000)],
172
+ ['SG', 4, (0.728, 1.653, 0.000)],
173
+ ],
174
+ 'GLN': [
175
+ ['N', 0, (-0.526, 1.361, -0.000)],
176
+ ['CA', 0, (0.000, 0.000, 0.000)],
177
+ ['C', 0, (1.526, 0.000, 0.000)],
178
+ ['CB', 0, (-0.525, -0.779, -1.207)],
179
+ ['O', 3, (0.626, 1.062, -0.000)],
180
+ ['CG', 4, (0.615, 1.393, 0.000)],
181
+ ['CD', 5, (0.587, 1.399, -0.000)],
182
+ ['NE2', 6, (0.593, -1.189, -0.001)],
183
+ ['OE1', 6, (0.634, 1.060, 0.000)],
184
+ ],
185
+ 'GLU': [
186
+ ['N', 0, (-0.528, 1.361, 0.000)],
187
+ ['CA', 0, (0.000, 0.000, 0.000)],
188
+ ['C', 0, (1.526, -0.000, -0.000)],
189
+ ['CB', 0, (-0.526, -0.781, -1.207)],
190
+ ['O', 3, (0.626, 1.062, 0.000)],
191
+ ['CG', 4, (0.615, 1.392, 0.000)],
192
+ ['CD', 5, (0.600, 1.397, 0.000)],
193
+ ['OE1', 6, (0.607, 1.095, -0.000)],
194
+ ['OE2', 6, (0.589, -1.104, -0.001)],
195
+ ],
196
+ 'GLY': [
197
+ ['N', 0, (-0.572, 1.337, 0.000)],
198
+ ['CA', 0, (0.000, 0.000, 0.000)],
199
+ ['C', 0, (1.517, -0.000, -0.000)],
200
+ ['O', 3, (0.626, 1.062, -0.000)],
201
+ ],
202
+ 'HIS': [
203
+ ['N', 0, (-0.527, 1.360, 0.000)],
204
+ ['CA', 0, (0.000, 0.000, 0.000)],
205
+ ['C', 0, (1.525, 0.000, 0.000)],
206
+ ['CB', 0, (-0.525, -0.778, -1.208)],
207
+ ['O', 3, (0.625, 1.063, 0.000)],
208
+ ['CG', 4, (0.600, 1.370, -0.000)],
209
+ ['CD2', 5, (0.889, -1.021, 0.003)],
210
+ ['ND1', 5, (0.744, 1.160, -0.000)],
211
+ ['CE1', 5, (2.030, 0.851, 0.002)],
212
+ ['NE2', 5, (2.145, -0.466, 0.004)],
213
+ ],
214
+ 'ILE': [
215
+ ['N', 0, (-0.493, 1.373, -0.000)],
216
+ ['CA', 0, (0.000, 0.000, 0.000)],
217
+ ['C', 0, (1.527, -0.000, -0.000)],
218
+ ['CB', 0, (-0.536, -0.793, -1.213)],
219
+ ['O', 3, (0.627, 1.062, -0.000)],
220
+ ['CG1', 4, (0.534, 1.437, -0.000)],
221
+ ['CG2', 4, (0.540, -0.785, -1.199)],
222
+ ['CD1', 5, (0.619, 1.391, 0.000)],
223
+ ],
224
+ 'LEU': [
225
+ ['N', 0, (-0.520, 1.363, 0.000)],
226
+ ['CA', 0, (0.000, 0.000, 0.000)],
227
+ ['C', 0, (1.525, -0.000, -0.000)],
228
+ ['CB', 0, (-0.522, -0.773, -1.214)],
229
+ ['O', 3, (0.625, 1.063, -0.000)],
230
+ ['CG', 4, (0.678, 1.371, 0.000)],
231
+ ['CD1', 5, (0.530, 1.430, -0.000)],
232
+ ['CD2', 5, (0.535, -0.774, 1.200)],
233
+ ],
234
+ 'LYS': [
235
+ ['N', 0, (-0.526, 1.362, -0.000)],
236
+ ['CA', 0, (0.000, 0.000, 0.000)],
237
+ ['C', 0, (1.526, 0.000, 0.000)],
238
+ ['CB', 0, (-0.524, -0.778, -1.208)],
239
+ ['O', 3, (0.626, 1.062, -0.000)],
240
+ ['CG', 4, (0.619, 1.390, 0.000)],
241
+ ['CD', 5, (0.559, 1.417, 0.000)],
242
+ ['CE', 6, (0.560, 1.416, 0.000)],
243
+ ['NZ', 7, (0.554, 1.387, 0.000)],
244
+ ],
245
+ 'MET': [
246
+ ['N', 0, (-0.521, 1.364, -0.000)],
247
+ ['CA', 0, (0.000, 0.000, 0.000)],
248
+ ['C', 0, (1.525, 0.000, 0.000)],
249
+ ['CB', 0, (-0.523, -0.776, -1.210)],
250
+ ['O', 3, (0.625, 1.062, -0.000)],
251
+ ['CG', 4, (0.613, 1.391, -0.000)],
252
+ ['SD', 5, (0.703, 1.695, 0.000)],
253
+ ['CE', 6, (0.320, 1.786, -0.000)],
254
+ ],
255
+ 'PHE': [
256
+ ['N', 0, (-0.518, 1.363, 0.000)],
257
+ ['CA', 0, (0.000, 0.000, 0.000)],
258
+ ['C', 0, (1.524, 0.000, -0.000)],
259
+ ['CB', 0, (-0.525, -0.776, -1.212)],
260
+ ['O', 3, (0.626, 1.062, -0.000)],
261
+ ['CG', 4, (0.607, 1.377, 0.000)],
262
+ ['CD1', 5, (0.709, 1.195, -0.000)],
263
+ ['CD2', 5, (0.706, -1.196, 0.000)],
264
+ ['CE1', 5, (2.102, 1.198, -0.000)],
265
+ ['CE2', 5, (2.098, -1.201, -0.000)],
266
+ ['CZ', 5, (2.794, -0.003, -0.001)],
267
+ ],
268
+ 'PRO': [
269
+ ['N', 0, (-0.566, 1.351, -0.000)],
270
+ ['CA', 0, (0.000, 0.000, 0.000)],
271
+ ['C', 0, (1.527, -0.000, 0.000)],
272
+ ['CB', 0, (-0.546, -0.611, -1.293)],
273
+ ['O', 3, (0.621, 1.066, 0.000)],
274
+ ['CG', 4, (0.382, 1.445, 0.0)],
275
+ # ['CD', 5, (0.427, 1.440, 0.0)],
276
+ ['CD', 5, (0.477, 1.424, 0.0)], # manually made angle 2 degrees larger
277
+ ],
278
+ 'SER': [
279
+ ['N', 0, (-0.529, 1.360, -0.000)],
280
+ ['CA', 0, (0.000, 0.000, 0.000)],
281
+ ['C', 0, (1.525, -0.000, -0.000)],
282
+ ['CB', 0, (-0.518, -0.777, -1.211)],
283
+ ['O', 3, (0.626, 1.062, -0.000)],
284
+ ['OG', 4, (0.503, 1.325, 0.000)],
285
+ ],
286
+ 'THR': [
287
+ ['N', 0, (-0.517, 1.364, 0.000)],
288
+ ['CA', 0, (0.000, 0.000, 0.000)],
289
+ ['C', 0, (1.526, 0.000, -0.000)],
290
+ ['CB', 0, (-0.516, -0.793, -1.215)],
291
+ ['O', 3, (0.626, 1.062, 0.000)],
292
+ ['CG2', 4, (0.550, -0.718, -1.228)],
293
+ ['OG1', 4, (0.472, 1.353, 0.000)],
294
+ ],
295
+ 'TRP': [
296
+ ['N', 0, (-0.521, 1.363, 0.000)],
297
+ ['CA', 0, (0.000, 0.000, 0.000)],
298
+ ['C', 0, (1.525, -0.000, 0.000)],
299
+ ['CB', 0, (-0.523, -0.776, -1.212)],
300
+ ['O', 3, (0.627, 1.062, 0.000)],
301
+ ['CG', 4, (0.609, 1.370, -0.000)],
302
+ ['CD1', 5, (0.824, 1.091, 0.000)],
303
+ ['CD2', 5, (0.854, -1.148, -0.005)],
304
+ ['CE2', 5, (2.186, -0.678, -0.007)],
305
+ ['CE3', 5, (0.622, -2.530, -0.007)],
306
+ ['NE1', 5, (2.140, 0.690, -0.004)],
307
+ ['CH2', 5, (3.028, -2.890, -0.013)],
308
+ ['CZ2', 5, (3.283, -1.543, -0.011)],
309
+ ['CZ3', 5, (1.715, -3.389, -0.011)],
310
+ ],
311
+ 'TYR': [
312
+ ['N', 0, (-0.522, 1.362, 0.000)],
313
+ ['CA', 0, (0.000, 0.000, 0.000)],
314
+ ['C', 0, (1.524, -0.000, -0.000)],
315
+ ['CB', 0, (-0.522, -0.776, -1.213)],
316
+ ['O', 3, (0.627, 1.062, -0.000)],
317
+ ['CG', 4, (0.607, 1.382, -0.000)],
318
+ ['CD1', 5, (0.716, 1.195, -0.000)],
319
+ ['CD2', 5, (0.713, -1.194, -0.001)],
320
+ ['CE1', 5, (2.107, 1.200, -0.002)],
321
+ ['CE2', 5, (2.104, -1.201, -0.003)],
322
+ ['OH', 5, (4.168, -0.002, -0.005)],
323
+ ['CZ', 5, (2.791, -0.001, -0.003)],
324
+ ],
325
+ 'VAL': [
326
+ ['N', 0, (-0.494, 1.373, -0.000)],
327
+ ['CA', 0, (0.000, 0.000, 0.000)],
328
+ ['C', 0, (1.527, -0.000, -0.000)],
329
+ ['CB', 0, (-0.533, -0.795, -1.213)],
330
+ ['O', 3, (0.627, 1.062, -0.000)],
331
+ ['CG1', 4, (0.540, 1.429, -0.000)],
332
+ ['CG2', 4, (0.533, -0.776, 1.203)],
333
+ ],
334
+ }
335
+
336
+ # A list of atoms (excluding hydrogen) for each AA type. PDB naming convention.
337
+ residue_atoms = {
338
+ 'ALA': ['C', 'CA', 'CB', 'N', 'O'],
339
+ 'ARG': ['C', 'CA', 'CB', 'CG', 'CD', 'CZ', 'N', 'NE', 'O', 'NH1', 'NH2'],
340
+ 'ASP': ['C', 'CA', 'CB', 'CG', 'N', 'O', 'OD1', 'OD2'],
341
+ 'ASN': ['C', 'CA', 'CB', 'CG', 'N', 'ND2', 'O', 'OD1'],
342
+ 'CYS': ['C', 'CA', 'CB', 'N', 'O', 'SG'],
343
+ 'GLU': ['C', 'CA', 'CB', 'CG', 'CD', 'N', 'O', 'OE1', 'OE2'],
344
+ 'GLN': ['C', 'CA', 'CB', 'CG', 'CD', 'N', 'NE2', 'O', 'OE1'],
345
+ 'GLY': ['C', 'CA', 'N', 'O'],
346
+ 'HIS': ['C', 'CA', 'CB', 'CG', 'CD2', 'CE1', 'N', 'ND1', 'NE2', 'O'],
347
+ 'ILE': ['C', 'CA', 'CB', 'CG1', 'CG2', 'CD1', 'N', 'O'],
348
+ 'LEU': ['C', 'CA', 'CB', 'CG', 'CD1', 'CD2', 'N', 'O'],
349
+ 'LYS': ['C', 'CA', 'CB', 'CG', 'CD', 'CE', 'N', 'NZ', 'O'],
350
+ 'MET': ['C', 'CA', 'CB', 'CG', 'CE', 'N', 'O', 'SD'],
351
+ 'PHE': ['C', 'CA', 'CB', 'CG', 'CD1', 'CD2', 'CE1', 'CE2', 'CZ', 'N', 'O'],
352
+ 'PRO': ['C', 'CA', 'CB', 'CG', 'CD', 'N', 'O'],
353
+ 'SER': ['C', 'CA', 'CB', 'N', 'O', 'OG'],
354
+ 'THR': ['C', 'CA', 'CB', 'CG2', 'N', 'O', 'OG1'],
355
+ 'TRP': ['C', 'CA', 'CB', 'CG', 'CD1', 'CD2', 'CE2', 'CE3', 'CZ2', 'CZ3',
356
+ 'CH2', 'N', 'NE1', 'O'],
357
+ 'TYR': ['C', 'CA', 'CB', 'CG', 'CD1', 'CD2', 'CE1', 'CE2', 'CZ', 'N', 'O',
358
+ 'OH'],
359
+ 'VAL': ['C', 'CA', 'CB', 'CG1', 'CG2', 'N', 'O']
360
+ }
361
+
362
+ # Naming swaps for ambiguous atom names.
363
+ # Due to symmetries in the amino acids the naming of atoms is ambiguous in
364
+ # 4 of the 20 amino acids.
365
+ # (The LDDT paper lists 7 amino acids as ambiguous, but the naming ambiguities
366
+ # in LEU, VAL and ARG can be resolved by using the 3d constellations of
367
+ # the 'ambiguous' atoms and their neighbours)
368
+ residue_atom_renaming_swaps = {
369
+ 'ASP': {'OD1': 'OD2'},
370
+ 'GLU': {'OE1': 'OE2'},
371
+ 'PHE': {'CD1': 'CD2', 'CE1': 'CE2'},
372
+ 'TYR': {'CD1': 'CD2', 'CE1': 'CE2'},
373
+ }
374
+
375
+ # Van der Waals radii [Angstroem] of the atoms (from Wikipedia)
376
+ van_der_waals_radius = {
377
+ 'C': 1.7,
378
+ 'N': 1.55,
379
+ 'O': 1.52,
380
+ 'S': 1.8,
381
+ }
382
+
383
+ Bond = collections.namedtuple(
384
+ 'Bond', ['atom1_name', 'atom2_name', 'length', 'stddev'])
385
+ BondAngle = collections.namedtuple(
386
+ 'BondAngle',
387
+ ['atom1_name', 'atom2_name', 'atom3name', 'angle_rad', 'stddev'])
388
+
389
+
390
+ @functools.lru_cache(maxsize=None)
391
+ def load_stereo_chemical_props() -> Tuple[Mapping[str, List[Bond]],
392
+ Mapping[str, List[Bond]],
393
+ Mapping[str, List[BondAngle]]]:
394
+ """Load stereo_chemical_props.txt into a nice structure.
395
+
396
+ Load literature values for bond lengths and bond angles and translate
397
+ bond angles into the length of the opposite edge of the triangle
398
+ ("residue_virtual_bonds").
399
+
400
+ Returns:
401
+ residue_bonds: dict that maps resname --> list of Bond tuples
402
+ residue_virtual_bonds: dict that maps resname --> list of Bond tuples
403
+ residue_bond_angles: dict that maps resname --> list of BondAngle tuples
404
+ """
405
+ stereo_chemical_props_path = (
406
+ 'alphafold/common/stereo_chemical_props.txt')
407
+ with open(stereo_chemical_props_path, 'rt') as f:
408
+ stereo_chemical_props = f.read()
409
+ lines_iter = iter(stereo_chemical_props.splitlines())
410
+ # Load bond lengths.
411
+ residue_bonds = {}
412
+ next(lines_iter) # Skip header line.
413
+ for line in lines_iter:
414
+ if line.strip() == '-':
415
+ break
416
+ bond, resname, length, stddev = line.split()
417
+ atom1, atom2 = bond.split('-')
418
+ if resname not in residue_bonds:
419
+ residue_bonds[resname] = []
420
+ residue_bonds[resname].append(
421
+ Bond(atom1, atom2, float(length), float(stddev)))
422
+ residue_bonds['UNK'] = []
423
+
424
+ # Load bond angles.
425
+ residue_bond_angles = {}
426
+ next(lines_iter) # Skip empty line.
427
+ next(lines_iter) # Skip header line.
428
+ for line in lines_iter:
429
+ if line.strip() == '-':
430
+ break
431
+ bond, resname, angle_degree, stddev_degree = line.split()
432
+ atom1, atom2, atom3 = bond.split('-')
433
+ if resname not in residue_bond_angles:
434
+ residue_bond_angles[resname] = []
435
+ residue_bond_angles[resname].append(
436
+ BondAngle(atom1, atom2, atom3,
437
+ float(angle_degree) / 180. * np.pi,
438
+ float(stddev_degree) / 180. * np.pi))
439
+ residue_bond_angles['UNK'] = []
440
+
441
+ def make_bond_key(atom1_name, atom2_name):
442
+ """Unique key to lookup bonds."""
443
+ return '-'.join(sorted([atom1_name, atom2_name]))
444
+
445
+ # Translate bond angles into distances ("virtual bonds").
446
+ residue_virtual_bonds = {}
447
+ for resname, bond_angles in residue_bond_angles.items():
448
+ # Create a fast lookup dict for bond lengths.
449
+ bond_cache = {}
450
+ for b in residue_bonds[resname]:
451
+ bond_cache[make_bond_key(b.atom1_name, b.atom2_name)] = b
452
+ residue_virtual_bonds[resname] = []
453
+ for ba in bond_angles:
454
+ bond1 = bond_cache[make_bond_key(ba.atom1_name, ba.atom2_name)]
455
+ bond2 = bond_cache[make_bond_key(ba.atom2_name, ba.atom3name)]
456
+
457
+ # Compute distance between atom1 and atom3 using the law of cosines
458
+ # c^2 = a^2 + b^2 - 2ab*cos(gamma).
459
+ gamma = ba.angle_rad
460
+ length = np.sqrt(bond1.length**2 + bond2.length**2
461
+ - 2 * bond1.length * bond2.length * np.cos(gamma))
462
+
463
+ # Propagation of uncertainty assuming uncorrelated errors.
464
+ dl_outer = 0.5 / length
465
+ dl_dgamma = (2 * bond1.length * bond2.length * np.sin(gamma)) * dl_outer
466
+ dl_db1 = (2 * bond1.length - 2 * bond2.length * np.cos(gamma)) * dl_outer
467
+ dl_db2 = (2 * bond2.length - 2 * bond1.length * np.cos(gamma)) * dl_outer
468
+ stddev = np.sqrt((dl_dgamma * ba.stddev)**2 +
469
+ (dl_db1 * bond1.stddev)**2 +
470
+ (dl_db2 * bond2.stddev)**2)
471
+ residue_virtual_bonds[resname].append(
472
+ Bond(ba.atom1_name, ba.atom3name, length, stddev))
473
+
474
+ return (residue_bonds,
475
+ residue_virtual_bonds,
476
+ residue_bond_angles)
477
+
478
+
479
+ # Between-residue bond lengths for general bonds (first element) and for Proline
480
+ # (second element).
481
+ between_res_bond_length_c_n = [1.329, 1.341]
482
+ between_res_bond_length_stddev_c_n = [0.014, 0.016]
483
+
484
+ # Between-residue cos_angles.
485
+ between_res_cos_angles_c_n_ca = [-0.5203, 0.0353] # degrees: 121.352 +- 2.315
486
+ between_res_cos_angles_ca_c_n = [-0.4473, 0.0311] # degrees: 116.568 +- 1.995
487
+
488
+ # This mapping is used when we need to store atom data in a format that requires
489
+ # fixed atom data size for every residue (e.g. a numpy array).
490
+ atom_types = [
491
+ 'N', 'CA', 'C', 'CB', 'O', 'CG', 'CG1', 'CG2', 'OG', 'OG1', 'SG', 'CD',
492
+ 'CD1', 'CD2', 'ND1', 'ND2', 'OD1', 'OD2', 'SD', 'CE', 'CE1', 'CE2', 'CE3',
493
+ 'NE', 'NE1', 'NE2', 'OE1', 'OE2', 'CH2', 'NH1', 'NH2', 'OH', 'CZ', 'CZ2',
494
+ 'CZ3', 'NZ', 'OXT'
495
+ ]
496
+ atom_order = {atom_type: i for i, atom_type in enumerate(atom_types)}
497
+ atom_type_num = len(atom_types) # := 37.
498
+
499
+ # A compact atom encoding with 14 columns
500
+ # pylint: disable=line-too-long
501
+ # pylint: disable=bad-whitespace
502
+ restype_name_to_atom14_names = {
503
+ 'ALA': ['N', 'CA', 'C', 'O', 'CB', '', '', '', '', '', '', '', '', ''],
504
+ 'ARG': ['N', 'CA', 'C', 'O', 'CB', 'CG', 'CD', 'NE', 'CZ', 'NH1', 'NH2', '', '', ''],
505
+ 'ASN': ['N', 'CA', 'C', 'O', 'CB', 'CG', 'OD1', 'ND2', '', '', '', '', '', ''],
506
+ 'ASP': ['N', 'CA', 'C', 'O', 'CB', 'CG', 'OD1', 'OD2', '', '', '', '', '', ''],
507
+ 'CYS': ['N', 'CA', 'C', 'O', 'CB', 'SG', '', '', '', '', '', '', '', ''],
508
+ 'GLN': ['N', 'CA', 'C', 'O', 'CB', 'CG', 'CD', 'OE1', 'NE2', '', '', '', '', ''],
509
+ 'GLU': ['N', 'CA', 'C', 'O', 'CB', 'CG', 'CD', 'OE1', 'OE2', '', '', '', '', ''],
510
+ 'GLY': ['N', 'CA', 'C', 'O', '', '', '', '', '', '', '', '', '', ''],
511
+ 'HIS': ['N', 'CA', 'C', 'O', 'CB', 'CG', 'ND1', 'CD2', 'CE1', 'NE2', '', '', '', ''],
512
+ 'ILE': ['N', 'CA', 'C', 'O', 'CB', 'CG1', 'CG2', 'CD1', '', '', '', '', '', ''],
513
+ 'LEU': ['N', 'CA', 'C', 'O', 'CB', 'CG', 'CD1', 'CD2', '', '', '', '', '', ''],
514
+ 'LYS': ['N', 'CA', 'C', 'O', 'CB', 'CG', 'CD', 'CE', 'NZ', '', '', '', '', ''],
515
+ 'MET': ['N', 'CA', 'C', 'O', 'CB', 'CG', 'SD', 'CE', '', '', '', '', '', ''],
516
+ 'PHE': ['N', 'CA', 'C', 'O', 'CB', 'CG', 'CD1', 'CD2', 'CE1', 'CE2', 'CZ', '', '', ''],
517
+ 'PRO': ['N', 'CA', 'C', 'O', 'CB', 'CG', 'CD', '', '', '', '', '', '', ''],
518
+ 'SER': ['N', 'CA', 'C', 'O', 'CB', 'OG', '', '', '', '', '', '', '', ''],
519
+ 'THR': ['N', 'CA', 'C', 'O', 'CB', 'OG1', 'CG2', '', '', '', '', '', '', ''],
520
+ 'TRP': ['N', 'CA', 'C', 'O', 'CB', 'CG', 'CD1', 'CD2', 'NE1', 'CE2', 'CE3', 'CZ2', 'CZ3', 'CH2'],
521
+ 'TYR': ['N', 'CA', 'C', 'O', 'CB', 'CG', 'CD1', 'CD2', 'CE1', 'CE2', 'CZ', 'OH', '', ''],
522
+ 'VAL': ['N', 'CA', 'C', 'O', 'CB', 'CG1', 'CG2', '', '', '', '', '', '', ''],
523
+ 'UNK': ['', '', '', '', '', '', '', '', '', '', '', '', '', ''],
524
+
525
+ }
526
+ # pylint: enable=line-too-long
527
+ # pylint: enable=bad-whitespace
528
+
529
+
530
+ # This is the standard residue order when coding AA type as a number.
531
+ # Reproduce it by taking 3-letter AA codes and sorting them alphabetically.
532
+ restypes = [
533
+ 'A', 'R', 'N', 'D', 'C', 'Q', 'E', 'G', 'H', 'I', 'L', 'K', 'M', 'F', 'P',
534
+ 'S', 'T', 'W', 'Y', 'V'
535
+ ]
536
+ restype_order = {restype: i for i, restype in enumerate(restypes)}
537
+ restype_num = len(restypes) # := 20.
538
+ unk_restype_index = restype_num # Catch-all index for unknown restypes.
539
+
540
+ restypes_with_x = restypes + ['X']
541
+ restype_order_with_x = {restype: i for i, restype in enumerate(restypes_with_x)}
542
+
543
+
544
+ def sequence_to_onehot(
545
+ sequence: str,
546
+ mapping: Mapping[str, int],
547
+ map_unknown_to_x: bool = False) -> np.ndarray:
548
+ """Maps the given sequence into a one-hot encoded matrix.
549
+
550
+ Args:
551
+ sequence: An amino acid sequence.
552
+ mapping: A dictionary mapping amino acids to integers.
553
+ map_unknown_to_x: If True, any amino acid that is not in the mapping will be
554
+ mapped to the unknown amino acid 'X'. If the mapping doesn't contain
555
+ amino acid 'X', an error will be thrown. If False, any amino acid not in
556
+ the mapping will throw an error.
557
+
558
+ Returns:
559
+ A numpy array of shape (seq_len, num_unique_aas) with one-hot encoding of
560
+ the sequence.
561
+
562
+ Raises:
563
+ ValueError: If the mapping doesn't contain values from 0 to
564
+ num_unique_aas - 1 without any gaps.
565
+ """
566
+ num_entries = max(mapping.values()) + 1
567
+
568
+ if sorted(set(mapping.values())) != list(range(num_entries)):
569
+ raise ValueError('The mapping must have values from 0 to num_unique_aas-1 '
570
+ 'without any gaps. Got: %s' % sorted(mapping.values()))
571
+
572
+ one_hot_arr = np.zeros((len(sequence), num_entries), dtype=np.int32)
573
+
574
+ for aa_index, aa_type in enumerate(sequence):
575
+ if map_unknown_to_x:
576
+ if aa_type.isalpha() and aa_type.isupper():
577
+ aa_id = mapping.get(aa_type, mapping['X'])
578
+ else:
579
+ raise ValueError(f'Invalid character in the sequence: {aa_type}')
580
+ else:
581
+ aa_id = mapping[aa_type]
582
+ one_hot_arr[aa_index, aa_id] = 1
583
+
584
+ return one_hot_arr
585
+
586
+
587
+ restype_1to3 = {
588
+ 'A': 'ALA',
589
+ 'R': 'ARG',
590
+ 'N': 'ASN',
591
+ 'D': 'ASP',
592
+ 'C': 'CYS',
593
+ 'Q': 'GLN',
594
+ 'E': 'GLU',
595
+ 'G': 'GLY',
596
+ 'H': 'HIS',
597
+ 'I': 'ILE',
598
+ 'L': 'LEU',
599
+ 'K': 'LYS',
600
+ 'M': 'MET',
601
+ 'F': 'PHE',
602
+ 'P': 'PRO',
603
+ 'S': 'SER',
604
+ 'T': 'THR',
605
+ 'W': 'TRP',
606
+ 'Y': 'TYR',
607
+ 'V': 'VAL',
608
+ }
609
+
610
+
611
+ # NB: restype_3to1 differs from Bio.PDB.protein_letters_3to1 by being a simple
612
+ # 1-to-1 mapping of 3 letter names to one letter names. The latter contains
613
+ # many more, and less common, three letter names as keys and maps many of these
614
+ # to the same one letter name (including 'X' and 'U' which we don't use here).
615
+ restype_3to1 = {v: k for k, v in restype_1to3.items()}
616
+
617
+ # Define a restype name for all unknown residues.
618
+ unk_restype = 'UNK'
619
+
620
+ resnames = [restype_1to3[r] for r in restypes] + [unk_restype]
621
+ resname_to_idx = {resname: i for i, resname in enumerate(resnames)}
622
+
623
+
624
+ # The mapping here uses hhblits convention, so that B is mapped to D, J and O
625
+ # are mapped to X, U is mapped to C, and Z is mapped to E. Other than that the
626
+ # remaining 20 amino acids are kept in alphabetical order.
627
+ # There are 2 non-amino acid codes, X (representing any amino acid) and
628
+ # "-" representing a missing amino acid in an alignment. The id for these
629
+ # codes is put at the end (20 and 21) so that they can easily be ignored if
630
+ # desired.
631
+ HHBLITS_AA_TO_ID = {
632
+ 'A': 0,
633
+ 'B': 2,
634
+ 'C': 1,
635
+ 'D': 2,
636
+ 'E': 3,
637
+ 'F': 4,
638
+ 'G': 5,
639
+ 'H': 6,
640
+ 'I': 7,
641
+ 'J': 20,
642
+ 'K': 8,
643
+ 'L': 9,
644
+ 'M': 10,
645
+ 'N': 11,
646
+ 'O': 20,
647
+ 'P': 12,
648
+ 'Q': 13,
649
+ 'R': 14,
650
+ 'S': 15,
651
+ 'T': 16,
652
+ 'U': 1,
653
+ 'V': 17,
654
+ 'W': 18,
655
+ 'X': 20,
656
+ 'Y': 19,
657
+ 'Z': 3,
658
+ '-': 21,
659
+ }
660
+
661
+ # Partial inversion of HHBLITS_AA_TO_ID.
662
+ ID_TO_HHBLITS_AA = {
663
+ 0: 'A',
664
+ 1: 'C', # Also U.
665
+ 2: 'D', # Also B.
666
+ 3: 'E', # Also Z.
667
+ 4: 'F',
668
+ 5: 'G',
669
+ 6: 'H',
670
+ 7: 'I',
671
+ 8: 'K',
672
+ 9: 'L',
673
+ 10: 'M',
674
+ 11: 'N',
675
+ 12: 'P',
676
+ 13: 'Q',
677
+ 14: 'R',
678
+ 15: 'S',
679
+ 16: 'T',
680
+ 17: 'V',
681
+ 18: 'W',
682
+ 19: 'Y',
683
+ 20: 'X', # Includes J and O.
684
+ 21: '-',
685
+ }
686
+
687
+ restypes_with_x_and_gap = restypes + ['X', '-']
688
+ MAP_HHBLITS_AATYPE_TO_OUR_AATYPE = tuple(
689
+ restypes_with_x_and_gap.index(ID_TO_HHBLITS_AA[i])
690
+ for i in range(len(restypes_with_x_and_gap)))
691
+
692
+
693
+ def _make_standard_atom_mask() -> np.ndarray:
694
+ """Returns [num_res_types, num_atom_types] mask array."""
695
+ # +1 to account for unknown (all 0s).
696
+ mask = np.zeros([restype_num + 1, atom_type_num], dtype=np.int32)
697
+ for restype, restype_letter in enumerate(restypes):
698
+ restype_name = restype_1to3[restype_letter]
699
+ atom_names = residue_atoms[restype_name]
700
+ for atom_name in atom_names:
701
+ atom_type = atom_order[atom_name]
702
+ mask[restype, atom_type] = 1
703
+ return mask
704
+
705
+
706
+ STANDARD_ATOM_MASK = _make_standard_atom_mask()
707
+
708
+
709
+ # A one hot representation for the first and second atoms defining the axis
710
+ # of rotation for each chi-angle in each residue.
711
+ def chi_angle_atom(atom_index: int) -> np.ndarray:
712
+ """Define chi-angle rigid groups via one-hot representations."""
713
+ chi_angles_index = {}
714
+ one_hots = []
715
+
716
+ for k, v in chi_angles_atoms.items():
717
+ indices = [atom_types.index(s[atom_index]) for s in v]
718
+ indices.extend([-1]*(4-len(indices)))
719
+ chi_angles_index[k] = indices
720
+
721
+ for r in restypes:
722
+ res3 = restype_1to3[r]
723
+ one_hot = np.eye(atom_type_num)[chi_angles_index[res3]]
724
+ one_hots.append(one_hot)
725
+
726
+ one_hots.append(np.zeros([4, atom_type_num])) # Add zeros for residue `X`.
727
+ one_hot = np.stack(one_hots, axis=0)
728
+ one_hot = np.transpose(one_hot, [0, 2, 1])
729
+
730
+ return one_hot
731
+
732
+ chi_atom_1_one_hot = chi_angle_atom(1)
733
+ chi_atom_2_one_hot = chi_angle_atom(2)
734
+
735
+ # An array like chi_angles_atoms but using indices rather than names.
736
+ chi_angles_atom_indices = [chi_angles_atoms[restype_1to3[r]] for r in restypes]
737
+ chi_angles_atom_indices = tree.map_structure(
738
+ lambda atom_name: atom_order[atom_name], chi_angles_atom_indices)
739
+ chi_angles_atom_indices = np.array([
740
+ chi_atoms + ([[0, 0, 0, 0]] * (4 - len(chi_atoms)))
741
+ for chi_atoms in chi_angles_atom_indices])
742
+
743
+ # Mapping from (res_name, atom_name) pairs to the atom's chi group index
744
+ # and atom index within that group.
745
+ chi_groups_for_atom = collections.defaultdict(list)
746
+ for res_name, chi_angle_atoms_for_res in chi_angles_atoms.items():
747
+ for chi_group_i, chi_group in enumerate(chi_angle_atoms_for_res):
748
+ for atom_i, atom in enumerate(chi_group):
749
+ chi_groups_for_atom[(res_name, atom)].append((chi_group_i, atom_i))
750
+ chi_groups_for_atom = dict(chi_groups_for_atom)
751
+
752
+
753
+ def _make_rigid_transformation_4x4(ex, ey, translation):
754
+ """Create a rigid 4x4 transformation matrix from two axes and transl."""
755
+ # Normalize ex.
756
+ ex_normalized = ex / np.linalg.norm(ex)
757
+
758
+ # make ey perpendicular to ex
759
+ ey_normalized = ey - np.dot(ey, ex_normalized) * ex_normalized
760
+ ey_normalized /= np.linalg.norm(ey_normalized)
761
+
762
+ # compute ez as cross product
763
+ eznorm = np.cross(ex_normalized, ey_normalized)
764
+ m = np.stack([ex_normalized, ey_normalized, eznorm, translation]).transpose()
765
+ m = np.concatenate([m, [[0., 0., 0., 1.]]], axis=0)
766
+ return m
767
+
768
+
769
+ # create an array with (restype, atomtype) --> rigid_group_idx
770
+ # and an array with (restype, atomtype, coord) for the atom positions
771
+ # and compute affine transformation matrices (4,4) from one rigid group to the
772
+ # previous group
773
+ restype_atom37_to_rigid_group = np.zeros([21, 37], dtype=np.int)
774
+ restype_atom37_mask = np.zeros([21, 37], dtype=np.float32)
775
+ restype_atom37_rigid_group_positions = np.zeros([21, 37, 3], dtype=np.float32)
776
+ restype_atom14_to_rigid_group = np.zeros([21, 14], dtype=np.int)
777
+ restype_atom14_mask = np.zeros([21, 14], dtype=np.float32)
778
+ restype_atom14_rigid_group_positions = np.zeros([21, 14, 3], dtype=np.float32)
779
+ restype_rigid_group_default_frame = np.zeros([21, 8, 4, 4], dtype=np.float32)
780
+
781
+ ###############################################
782
+ restype_atom14_to_atom37 = []
783
+ restype_atom37_to_atom14 = []
784
+ for rt in restypes:
785
+ atom_names = restype_name_to_atom14_names[restype_1to3[rt]]
786
+ restype_atom14_to_atom37.append([(atom_order[name] if name else 0) for name in atom_names])
787
+ atom_name_to_idx14 = {name: i for i, name in enumerate(atom_names)}
788
+ restype_atom37_to_atom14.append([(atom_name_to_idx14[name] if name in atom_name_to_idx14 else 0) for name in atom_types])
789
+ restype_atom14_to_atom37.append([0] * 14)
790
+ restype_atom37_to_atom14.append([0] * 37)
791
+ restype_atom14_to_atom37 = np.array(restype_atom14_to_atom37, dtype=np.int32)
792
+ restype_atom37_to_atom14 = np.array(restype_atom37_to_atom14, dtype=np.int32)
793
+ ################################################
794
+
795
+ def _make_rigid_group_constants():
796
+ """Fill the arrays above."""
797
+
798
+
799
+ for restype, restype_letter in enumerate(restypes):
800
+ resname = restype_1to3[restype_letter]
801
+ for atomname, group_idx, atom_position in rigid_group_atom_positions[resname]:
802
+ atomtype = atom_order[atomname]
803
+ restype_atom37_to_rigid_group[restype, atomtype] = group_idx
804
+ restype_atom37_mask[restype, atomtype] = 1
805
+ restype_atom37_rigid_group_positions[restype, atomtype, :] = atom_position
806
+
807
+ atom14idx = restype_name_to_atom14_names[resname].index(atomname)
808
+ restype_atom14_to_rigid_group[restype, atom14idx] = group_idx
809
+ restype_atom14_mask[restype, atom14idx] = 1
810
+ restype_atom14_rigid_group_positions[restype, atom14idx, :] = atom_position
811
+
812
+ atom_names = residue_atoms[resname]
813
+ atom_name_to_idx14 = {name: i for i, name in enumerate(atom_names)}
814
+
815
+ for restype, restype_letter in enumerate(restypes):
816
+ resname = restype_1to3[restype_letter]
817
+ atom_positions = {name: np.array(pos) for name, _, pos
818
+ in rigid_group_atom_positions[resname]}
819
+
820
+ # backbone to backbone is the identity transform
821
+ restype_rigid_group_default_frame[restype, 0, :, :] = np.eye(4)
822
+
823
+ # pre-omega-frame to backbone (currently dummy identity matrix)
824
+ restype_rigid_group_default_frame[restype, 1, :, :] = np.eye(4)
825
+
826
+ # phi-frame to backbone
827
+ mat = _make_rigid_transformation_4x4(
828
+ ex=atom_positions['N'] - atom_positions['CA'],
829
+ ey=np.array([1., 0., 0.]),
830
+ translation=atom_positions['N'])
831
+ restype_rigid_group_default_frame[restype, 2, :, :] = mat
832
+
833
+ # psi-frame to backbone
834
+ mat = _make_rigid_transformation_4x4(
835
+ ex=atom_positions['C'] - atom_positions['CA'],
836
+ ey=atom_positions['CA'] - atom_positions['N'],
837
+ translation=atom_positions['C'])
838
+ restype_rigid_group_default_frame[restype, 3, :, :] = mat
839
+
840
+ # chi1-frame to backbone
841
+ if chi_angles_mask[restype][0]:
842
+ base_atom_names = chi_angles_atoms[resname][0]
843
+ base_atom_positions = [atom_positions[name] for name in base_atom_names]
844
+ mat = _make_rigid_transformation_4x4(
845
+ ex=base_atom_positions[2] - base_atom_positions[1],
846
+ ey=base_atom_positions[0] - base_atom_positions[1],
847
+ translation=base_atom_positions[2])
848
+ restype_rigid_group_default_frame[restype, 4, :, :] = mat
849
+
850
+ # chi2-frame to chi1-frame
851
+ # chi3-frame to chi2-frame
852
+ # chi4-frame to chi3-frame
853
+ # luckily all rotation axes for the next frame start at (0,0,0) of the
854
+ # previous frame
855
+ for chi_idx in range(1, 4):
856
+ if chi_angles_mask[restype][chi_idx]:
857
+ axis_end_atom_name = chi_angles_atoms[resname][chi_idx][2]
858
+ axis_end_atom_position = atom_positions[axis_end_atom_name]
859
+ mat = _make_rigid_transformation_4x4(
860
+ ex=axis_end_atom_position,
861
+ ey=np.array([-1., 0., 0.]),
862
+ translation=axis_end_atom_position)
863
+ restype_rigid_group_default_frame[restype, 4 + chi_idx, :, :] = mat
864
+
865
+
866
+ _make_rigid_group_constants()
867
+
868
+
869
+ def make_atom14_dists_bounds(overlap_tolerance=1.5,
870
+ bond_length_tolerance_factor=15):
871
+ """compute upper and lower bounds for bonds to assess violations."""
872
+ restype_atom14_bond_lower_bound = np.zeros([21, 14, 14], np.float32)
873
+ restype_atom14_bond_upper_bound = np.zeros([21, 14, 14], np.float32)
874
+ restype_atom14_bond_stddev = np.zeros([21, 14, 14], np.float32)
875
+ residue_bonds, residue_virtual_bonds, _ = load_stereo_chemical_props()
876
+ for restype, restype_letter in enumerate(restypes):
877
+ resname = restype_1to3[restype_letter]
878
+ atom_list = restype_name_to_atom14_names[resname]
879
+
880
+ # create lower and upper bounds for clashes
881
+ for atom1_idx, atom1_name in enumerate(atom_list):
882
+ if not atom1_name:
883
+ continue
884
+ atom1_radius = van_der_waals_radius[atom1_name[0]]
885
+ for atom2_idx, atom2_name in enumerate(atom_list):
886
+ if (not atom2_name) or atom1_idx == atom2_idx:
887
+ continue
888
+ atom2_radius = van_der_waals_radius[atom2_name[0]]
889
+ lower = atom1_radius + atom2_radius - overlap_tolerance
890
+ upper = 1e10
891
+ restype_atom14_bond_lower_bound[restype, atom1_idx, atom2_idx] = lower
892
+ restype_atom14_bond_lower_bound[restype, atom2_idx, atom1_idx] = lower
893
+ restype_atom14_bond_upper_bound[restype, atom1_idx, atom2_idx] = upper
894
+ restype_atom14_bond_upper_bound[restype, atom2_idx, atom1_idx] = upper
895
+
896
+ # overwrite lower and upper bounds for bonds and angles
897
+ for b in residue_bonds[resname] + residue_virtual_bonds[resname]:
898
+ atom1_idx = atom_list.index(b.atom1_name)
899
+ atom2_idx = atom_list.index(b.atom2_name)
900
+ lower = b.length - bond_length_tolerance_factor * b.stddev
901
+ upper = b.length + bond_length_tolerance_factor * b.stddev
902
+ restype_atom14_bond_lower_bound[restype, atom1_idx, atom2_idx] = lower
903
+ restype_atom14_bond_lower_bound[restype, atom2_idx, atom1_idx] = lower
904
+ restype_atom14_bond_upper_bound[restype, atom1_idx, atom2_idx] = upper
905
+ restype_atom14_bond_upper_bound[restype, atom2_idx, atom1_idx] = upper
906
+ restype_atom14_bond_stddev[restype, atom1_idx, atom2_idx] = b.stddev
907
+ restype_atom14_bond_stddev[restype, atom2_idx, atom1_idx] = b.stddev
908
+ return {'lower_bound': restype_atom14_bond_lower_bound, # shape (21,14,14)
909
+ 'upper_bound': restype_atom14_bond_upper_bound, # shape (21,14,14)
910
+ 'stddev': restype_atom14_bond_stddev, # shape (21,14,14)
911
+ }
af_backprop/alphafold/data/__init__.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2021 DeepMind Technologies Limited
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ """Data pipeline for model features."""
af_backprop/alphafold/data/mmcif_parsing.py ADDED
@@ -0,0 +1,384 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2021 DeepMind Technologies Limited
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ """Parses the mmCIF file format."""
16
+ import collections
17
+ import dataclasses
18
+ import io
19
+ from typing import Any, Mapping, Optional, Sequence, Tuple
20
+
21
+ from absl import logging
22
+ from Bio import PDB
23
+ from Bio.Data import SCOPData
24
+
25
+ # Type aliases:
26
+ ChainId = str
27
+ PdbHeader = Mapping[str, Any]
28
+ PdbStructure = PDB.Structure.Structure
29
+ SeqRes = str
30
+ MmCIFDict = Mapping[str, Sequence[str]]
31
+
32
+
33
+ @dataclasses.dataclass(frozen=True)
34
+ class Monomer:
35
+ id: str
36
+ num: int
37
+
38
+
39
+ # Note - mmCIF format provides no guarantees on the type of author-assigned
40
+ # sequence numbers. They need not be integers.
41
+ @dataclasses.dataclass(frozen=True)
42
+ class AtomSite:
43
+ residue_name: str
44
+ author_chain_id: str
45
+ mmcif_chain_id: str
46
+ author_seq_num: str
47
+ mmcif_seq_num: int
48
+ insertion_code: str
49
+ hetatm_atom: str
50
+ model_num: int
51
+
52
+
53
+ # Used to map SEQRES index to a residue in the structure.
54
+ @dataclasses.dataclass(frozen=True)
55
+ class ResiduePosition:
56
+ chain_id: str
57
+ residue_number: int
58
+ insertion_code: str
59
+
60
+
61
+ @dataclasses.dataclass(frozen=True)
62
+ class ResidueAtPosition:
63
+ position: Optional[ResiduePosition]
64
+ name: str
65
+ is_missing: bool
66
+ hetflag: str
67
+
68
+
69
+ @dataclasses.dataclass(frozen=True)
70
+ class MmcifObject:
71
+ """Representation of a parsed mmCIF file.
72
+
73
+ Contains:
74
+ file_id: A meaningful name, e.g. a pdb_id. Should be unique amongst all
75
+ files being processed.
76
+ header: Biopython header.
77
+ structure: Biopython structure.
78
+ chain_to_seqres: Dict mapping chain_id to 1 letter amino acid sequence. E.g.
79
+ {'A': 'ABCDEFG'}
80
+ seqres_to_structure: Dict; for each chain_id contains a mapping between
81
+ SEQRES index and a ResidueAtPosition. e.g. {'A': {0: ResidueAtPosition,
82
+ 1: ResidueAtPosition,
83
+ ...}}
84
+ raw_string: The raw string used to construct the MmcifObject.
85
+ """
86
+ file_id: str
87
+ header: PdbHeader
88
+ structure: PdbStructure
89
+ chain_to_seqres: Mapping[ChainId, SeqRes]
90
+ seqres_to_structure: Mapping[ChainId, Mapping[int, ResidueAtPosition]]
91
+ raw_string: Any
92
+
93
+
94
+ @dataclasses.dataclass(frozen=True)
95
+ class ParsingResult:
96
+ """Returned by the parse function.
97
+
98
+ Contains:
99
+ mmcif_object: A MmcifObject, may be None if no chain could be successfully
100
+ parsed.
101
+ errors: A dict mapping (file_id, chain_id) to any exception generated.
102
+ """
103
+ mmcif_object: Optional[MmcifObject]
104
+ errors: Mapping[Tuple[str, str], Any]
105
+
106
+
107
+ class ParseError(Exception):
108
+ """An error indicating that an mmCIF file could not be parsed."""
109
+
110
+
111
+ def mmcif_loop_to_list(prefix: str,
112
+ parsed_info: MmCIFDict) -> Sequence[Mapping[str, str]]:
113
+ """Extracts loop associated with a prefix from mmCIF data as a list.
114
+
115
+ Reference for loop_ in mmCIF:
116
+ http://mmcif.wwpdb.org/docs/tutorials/mechanics/pdbx-mmcif-syntax.html
117
+
118
+ Args:
119
+ prefix: Prefix shared by each of the data items in the loop.
120
+ e.g. '_entity_poly_seq.', where the data items are _entity_poly_seq.num,
121
+ _entity_poly_seq.mon_id. Should include the trailing period.
122
+ parsed_info: A dict of parsed mmCIF data, e.g. _mmcif_dict from a Biopython
123
+ parser.
124
+
125
+ Returns:
126
+ Returns a list of dicts; each dict represents 1 entry from an mmCIF loop.
127
+ """
128
+ cols = []
129
+ data = []
130
+ for key, value in parsed_info.items():
131
+ if key.startswith(prefix):
132
+ cols.append(key)
133
+ data.append(value)
134
+
135
+ assert all([len(xs) == len(data[0]) for xs in data]), (
136
+ 'mmCIF error: Not all loops are the same length: %s' % cols)
137
+
138
+ return [dict(zip(cols, xs)) for xs in zip(*data)]
139
+
140
+
141
+ def mmcif_loop_to_dict(prefix: str,
142
+ index: str,
143
+ parsed_info: MmCIFDict,
144
+ ) -> Mapping[str, Mapping[str, str]]:
145
+ """Extracts loop associated with a prefix from mmCIF data as a dictionary.
146
+
147
+ Args:
148
+ prefix: Prefix shared by each of the data items in the loop.
149
+ e.g. '_entity_poly_seq.', where the data items are _entity_poly_seq.num,
150
+ _entity_poly_seq.mon_id. Should include the trailing period.
151
+ index: Which item of loop data should serve as the key.
152
+ parsed_info: A dict of parsed mmCIF data, e.g. _mmcif_dict from a Biopython
153
+ parser.
154
+
155
+ Returns:
156
+ Returns a dict of dicts; each dict represents 1 entry from an mmCIF loop,
157
+ indexed by the index column.
158
+ """
159
+ entries = mmcif_loop_to_list(prefix, parsed_info)
160
+ return {entry[index]: entry for entry in entries}
161
+
162
+
163
+ def parse(*,
164
+ file_id: str,
165
+ mmcif_string: str,
166
+ catch_all_errors: bool = True) -> ParsingResult:
167
+ """Entry point, parses an mmcif_string.
168
+
169
+ Args:
170
+ file_id: A string identifier for this file. Should be unique within the
171
+ collection of files being processed.
172
+ mmcif_string: Contents of an mmCIF file.
173
+ catch_all_errors: If True, all exceptions are caught and error messages are
174
+ returned as part of the ParsingResult. If False exceptions will be allowed
175
+ to propagate.
176
+
177
+ Returns:
178
+ A ParsingResult.
179
+ """
180
+ errors = {}
181
+ try:
182
+ parser = PDB.MMCIFParser(QUIET=True)
183
+ handle = io.StringIO(mmcif_string)
184
+ full_structure = parser.get_structure('', handle)
185
+ first_model_structure = _get_first_model(full_structure)
186
+ # Extract the _mmcif_dict from the parser, which contains useful fields not
187
+ # reflected in the Biopython structure.
188
+ parsed_info = parser._mmcif_dict # pylint:disable=protected-access
189
+
190
+ # Ensure all values are lists, even if singletons.
191
+ for key, value in parsed_info.items():
192
+ if not isinstance(value, list):
193
+ parsed_info[key] = [value]
194
+
195
+ header = _get_header(parsed_info)
196
+
197
+ # Determine the protein chains, and their start numbers according to the
198
+ # internal mmCIF numbering scheme (likely but not guaranteed to be 1).
199
+ valid_chains = _get_protein_chains(parsed_info=parsed_info)
200
+ if not valid_chains:
201
+ return ParsingResult(
202
+ None, {(file_id, ''): 'No protein chains found in this file.'})
203
+ seq_start_num = {chain_id: min([monomer.num for monomer in seq])
204
+ for chain_id, seq in valid_chains.items()}
205
+
206
+ # Loop over the atoms for which we have coordinates. Populate two mappings:
207
+ # -mmcif_to_author_chain_id (maps internal mmCIF chain ids to chain ids used
208
+ # the authors / Biopython).
209
+ # -seq_to_structure_mappings (maps idx into sequence to ResidueAtPosition).
210
+ mmcif_to_author_chain_id = {}
211
+ seq_to_structure_mappings = {}
212
+ for atom in _get_atom_site_list(parsed_info):
213
+ if atom.model_num != '1':
214
+ # We only process the first model at the moment.
215
+ continue
216
+
217
+ mmcif_to_author_chain_id[atom.mmcif_chain_id] = atom.author_chain_id
218
+
219
+ if atom.mmcif_chain_id in valid_chains:
220
+ hetflag = ' '
221
+ if atom.hetatm_atom == 'HETATM':
222
+ # Water atoms are assigned a special hetflag of W in Biopython. We
223
+ # need to do the same, so that this hetflag can be used to fetch
224
+ # a residue from the Biopython structure by id.
225
+ if atom.residue_name in ('HOH', 'WAT'):
226
+ hetflag = 'W'
227
+ else:
228
+ hetflag = 'H_' + atom.residue_name
229
+ insertion_code = atom.insertion_code
230
+ if not _is_set(atom.insertion_code):
231
+ insertion_code = ' '
232
+ position = ResiduePosition(chain_id=atom.author_chain_id,
233
+ residue_number=int(atom.author_seq_num),
234
+ insertion_code=insertion_code)
235
+ seq_idx = int(atom.mmcif_seq_num) - seq_start_num[atom.mmcif_chain_id]
236
+ current = seq_to_structure_mappings.get(atom.author_chain_id, {})
237
+ current[seq_idx] = ResidueAtPosition(position=position,
238
+ name=atom.residue_name,
239
+ is_missing=False,
240
+ hetflag=hetflag)
241
+ seq_to_structure_mappings[atom.author_chain_id] = current
242
+
243
+ # Add missing residue information to seq_to_structure_mappings.
244
+ for chain_id, seq_info in valid_chains.items():
245
+ author_chain = mmcif_to_author_chain_id[chain_id]
246
+ current_mapping = seq_to_structure_mappings[author_chain]
247
+ for idx, monomer in enumerate(seq_info):
248
+ if idx not in current_mapping:
249
+ current_mapping[idx] = ResidueAtPosition(position=None,
250
+ name=monomer.id,
251
+ is_missing=True,
252
+ hetflag=' ')
253
+
254
+ author_chain_to_sequence = {}
255
+ for chain_id, seq_info in valid_chains.items():
256
+ author_chain = mmcif_to_author_chain_id[chain_id]
257
+ seq = []
258
+ for monomer in seq_info:
259
+ code = SCOPData.protein_letters_3to1.get(monomer.id, 'X')
260
+ seq.append(code if len(code) == 1 else 'X')
261
+ seq = ''.join(seq)
262
+ author_chain_to_sequence[author_chain] = seq
263
+
264
+ mmcif_object = MmcifObject(
265
+ file_id=file_id,
266
+ header=header,
267
+ structure=first_model_structure,
268
+ chain_to_seqres=author_chain_to_sequence,
269
+ seqres_to_structure=seq_to_structure_mappings,
270
+ raw_string=parsed_info)
271
+
272
+ return ParsingResult(mmcif_object=mmcif_object, errors=errors)
273
+ except Exception as e: # pylint:disable=broad-except
274
+ errors[(file_id, '')] = e
275
+ if not catch_all_errors:
276
+ raise
277
+ return ParsingResult(mmcif_object=None, errors=errors)
278
+
279
+
280
+ def _get_first_model(structure: PdbStructure) -> PdbStructure:
281
+ """Returns the first model in a Biopython structure."""
282
+ return next(structure.get_models())
283
+
284
+ _MIN_LENGTH_OF_CHAIN_TO_BE_COUNTED_AS_PEPTIDE = 21
285
+
286
+
287
+ def get_release_date(parsed_info: MmCIFDict) -> str:
288
+ """Returns the oldest revision date."""
289
+ revision_dates = parsed_info['_pdbx_audit_revision_history.revision_date']
290
+ return min(revision_dates)
291
+
292
+
293
+ def _get_header(parsed_info: MmCIFDict) -> PdbHeader:
294
+ """Returns a basic header containing method, release date and resolution."""
295
+ header = {}
296
+
297
+ experiments = mmcif_loop_to_list('_exptl.', parsed_info)
298
+ header['structure_method'] = ','.join([
299
+ experiment['_exptl.method'].lower() for experiment in experiments])
300
+
301
+ # Note: The release_date here corresponds to the oldest revision. We prefer to
302
+ # use this for dataset filtering over the deposition_date.
303
+ if '_pdbx_audit_revision_history.revision_date' in parsed_info:
304
+ header['release_date'] = get_release_date(parsed_info)
305
+ else:
306
+ logging.warning('Could not determine release_date: %s',
307
+ parsed_info['_entry.id'])
308
+
309
+ header['resolution'] = 0.00
310
+ for res_key in ('_refine.ls_d_res_high', '_em_3d_reconstruction.resolution',
311
+ '_reflns.d_resolution_high'):
312
+ if res_key in parsed_info:
313
+ try:
314
+ raw_resolution = parsed_info[res_key][0]
315
+ header['resolution'] = float(raw_resolution)
316
+ except ValueError:
317
+ logging.warning('Invalid resolution format: %s', parsed_info[res_key])
318
+
319
+ return header
320
+
321
+
322
+ def _get_atom_site_list(parsed_info: MmCIFDict) -> Sequence[AtomSite]:
323
+ """Returns list of atom sites; contains data not present in the structure."""
324
+ return [AtomSite(*site) for site in zip( # pylint:disable=g-complex-comprehension
325
+ parsed_info['_atom_site.label_comp_id'],
326
+ parsed_info['_atom_site.auth_asym_id'],
327
+ parsed_info['_atom_site.label_asym_id'],
328
+ parsed_info['_atom_site.auth_seq_id'],
329
+ parsed_info['_atom_site.label_seq_id'],
330
+ parsed_info['_atom_site.pdbx_PDB_ins_code'],
331
+ parsed_info['_atom_site.group_PDB'],
332
+ parsed_info['_atom_site.pdbx_PDB_model_num'],
333
+ )]
334
+
335
+
336
+ def _get_protein_chains(
337
+ *, parsed_info: Mapping[str, Any]) -> Mapping[ChainId, Sequence[Monomer]]:
338
+ """Extracts polymer information for protein chains only.
339
+
340
+ Args:
341
+ parsed_info: _mmcif_dict produced by the Biopython parser.
342
+
343
+ Returns:
344
+ A dict mapping mmcif chain id to a list of Monomers.
345
+ """
346
+ # Get polymer information for each entity in the structure.
347
+ entity_poly_seqs = mmcif_loop_to_list('_entity_poly_seq.', parsed_info)
348
+
349
+ polymers = collections.defaultdict(list)
350
+ for entity_poly_seq in entity_poly_seqs:
351
+ polymers[entity_poly_seq['_entity_poly_seq.entity_id']].append(
352
+ Monomer(id=entity_poly_seq['_entity_poly_seq.mon_id'],
353
+ num=int(entity_poly_seq['_entity_poly_seq.num'])))
354
+
355
+ # Get chemical compositions. Will allow us to identify which of these polymers
356
+ # are proteins.
357
+ chem_comps = mmcif_loop_to_dict('_chem_comp.', '_chem_comp.id', parsed_info)
358
+
359
+ # Get chains information for each entity. Necessary so that we can return a
360
+ # dict keyed on chain id rather than entity.
361
+ struct_asyms = mmcif_loop_to_list('_struct_asym.', parsed_info)
362
+
363
+ entity_to_mmcif_chains = collections.defaultdict(list)
364
+ for struct_asym in struct_asyms:
365
+ chain_id = struct_asym['_struct_asym.id']
366
+ entity_id = struct_asym['_struct_asym.entity_id']
367
+ entity_to_mmcif_chains[entity_id].append(chain_id)
368
+
369
+ # Identify and return the valid protein chains.
370
+ valid_chains = {}
371
+ for entity_id, seq_info in polymers.items():
372
+ chain_ids = entity_to_mmcif_chains[entity_id]
373
+
374
+ # Reject polymers without any peptide-like components, such as DNA/RNA.
375
+ if any(['peptide' in chem_comps[monomer.id]['_chem_comp.type']
376
+ for monomer in seq_info]):
377
+ for chain_id in chain_ids:
378
+ valid_chains[chain_id] = seq_info
379
+ return valid_chains
380
+
381
+
382
+ def _is_set(data: str) -> bool:
383
+ """Returns False if data is a special mmCIF character indicating 'unset'."""
384
+ return data not in ('.', '?')
af_backprop/alphafold/data/parsers.py ADDED
@@ -0,0 +1,364 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2021 DeepMind Technologies Limited
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ """Functions for parsing various file formats."""
16
+ import collections
17
+ import dataclasses
18
+ import re
19
+ import string
20
+ from typing import Dict, Iterable, List, Optional, Sequence, Tuple
21
+
22
+ DeletionMatrix = Sequence[Sequence[int]]
23
+
24
+
25
+ @dataclasses.dataclass(frozen=True)
26
+ class TemplateHit:
27
+ """Class representing a template hit."""
28
+ index: int
29
+ name: str
30
+ aligned_cols: int
31
+ sum_probs: float
32
+ query: str
33
+ hit_sequence: str
34
+ indices_query: List[int]
35
+ indices_hit: List[int]
36
+
37
+
38
+ def parse_fasta(fasta_string: str) -> Tuple[Sequence[str], Sequence[str]]:
39
+ """Parses FASTA string and returns list of strings with amino-acid sequences.
40
+
41
+ Arguments:
42
+ fasta_string: The string contents of a FASTA file.
43
+
44
+ Returns:
45
+ A tuple of two lists:
46
+ * A list of sequences.
47
+ * A list of sequence descriptions taken from the comment lines. In the
48
+ same order as the sequences.
49
+ """
50
+ sequences = []
51
+ descriptions = []
52
+ index = -1
53
+ for line in fasta_string.splitlines():
54
+ line = line.strip()
55
+ if line.startswith('>'):
56
+ index += 1
57
+ descriptions.append(line[1:]) # Remove the '>' at the beginning.
58
+ sequences.append('')
59
+ continue
60
+ elif not line:
61
+ continue # Skip blank lines.
62
+ sequences[index] += line
63
+
64
+ return sequences, descriptions
65
+
66
+
67
+ def parse_stockholm(
68
+ stockholm_string: str
69
+ ) -> Tuple[Sequence[str], DeletionMatrix, Sequence[str]]:
70
+ """Parses sequences and deletion matrix from stockholm format alignment.
71
+
72
+ Args:
73
+ stockholm_string: The string contents of a stockholm file. The first
74
+ sequence in the file should be the query sequence.
75
+
76
+ Returns:
77
+ A tuple of:
78
+ * A list of sequences that have been aligned to the query. These
79
+ might contain duplicates.
80
+ * The deletion matrix for the alignment as a list of lists. The element
81
+ at `deletion_matrix[i][j]` is the number of residues deleted from
82
+ the aligned sequence i at residue position j.
83
+ * The names of the targets matched, including the jackhmmer subsequence
84
+ suffix.
85
+ """
86
+ name_to_sequence = collections.OrderedDict()
87
+ for line in stockholm_string.splitlines():
88
+ line = line.strip()
89
+ if not line or line.startswith(('#', '//')):
90
+ continue
91
+ name, sequence = line.split()
92
+ if name not in name_to_sequence:
93
+ name_to_sequence[name] = ''
94
+ name_to_sequence[name] += sequence
95
+
96
+ msa = []
97
+ deletion_matrix = []
98
+
99
+ query = ''
100
+ keep_columns = []
101
+ for seq_index, sequence in enumerate(name_to_sequence.values()):
102
+ if seq_index == 0:
103
+ # Gather the columns with gaps from the query
104
+ query = sequence
105
+ keep_columns = [i for i, res in enumerate(query) if res != '-']
106
+
107
+ # Remove the columns with gaps in the query from all sequences.
108
+ aligned_sequence = ''.join([sequence[c] for c in keep_columns])
109
+
110
+ msa.append(aligned_sequence)
111
+
112
+ # Count the number of deletions w.r.t. query.
113
+ deletion_vec = []
114
+ deletion_count = 0
115
+ for seq_res, query_res in zip(sequence, query):
116
+ if seq_res != '-' or query_res != '-':
117
+ if query_res == '-':
118
+ deletion_count += 1
119
+ else:
120
+ deletion_vec.append(deletion_count)
121
+ deletion_count = 0
122
+ deletion_matrix.append(deletion_vec)
123
+
124
+ return msa, deletion_matrix, list(name_to_sequence.keys())
125
+
126
+
127
+ def parse_a3m(a3m_string: str) -> Tuple[Sequence[str], DeletionMatrix]:
128
+ """Parses sequences and deletion matrix from a3m format alignment.
129
+
130
+ Args:
131
+ a3m_string: The string contents of a a3m file. The first sequence in the
132
+ file should be the query sequence.
133
+
134
+ Returns:
135
+ A tuple of:
136
+ * A list of sequences that have been aligned to the query. These
137
+ might contain duplicates.
138
+ * The deletion matrix for the alignment as a list of lists. The element
139
+ at `deletion_matrix[i][j]` is the number of residues deleted from
140
+ the aligned sequence i at residue position j.
141
+ """
142
+ sequences, _ = parse_fasta(a3m_string)
143
+ deletion_matrix = []
144
+ for msa_sequence in sequences:
145
+ deletion_vec = []
146
+ deletion_count = 0
147
+ for j in msa_sequence:
148
+ if j.islower():
149
+ deletion_count += 1
150
+ else:
151
+ deletion_vec.append(deletion_count)
152
+ deletion_count = 0
153
+ deletion_matrix.append(deletion_vec)
154
+
155
+ # Make the MSA matrix out of aligned (deletion-free) sequences.
156
+ deletion_table = str.maketrans('', '', string.ascii_lowercase)
157
+ aligned_sequences = [s.translate(deletion_table) for s in sequences]
158
+ return aligned_sequences, deletion_matrix
159
+
160
+
161
+ def _convert_sto_seq_to_a3m(
162
+ query_non_gaps: Sequence[bool], sto_seq: str) -> Iterable[str]:
163
+ for is_query_res_non_gap, sequence_res in zip(query_non_gaps, sto_seq):
164
+ if is_query_res_non_gap:
165
+ yield sequence_res
166
+ elif sequence_res != '-':
167
+ yield sequence_res.lower()
168
+
169
+
170
+ def convert_stockholm_to_a3m(stockholm_format: str,
171
+ max_sequences: Optional[int] = None) -> str:
172
+ """Converts MSA in Stockholm format to the A3M format."""
173
+ descriptions = {}
174
+ sequences = {}
175
+ reached_max_sequences = False
176
+
177
+ for line in stockholm_format.splitlines():
178
+ reached_max_sequences = max_sequences and len(sequences) >= max_sequences
179
+ if line.strip() and not line.startswith(('#', '//')):
180
+ # Ignore blank lines, markup and end symbols - remainder are alignment
181
+ # sequence parts.
182
+ seqname, aligned_seq = line.split(maxsplit=1)
183
+ if seqname not in sequences:
184
+ if reached_max_sequences:
185
+ continue
186
+ sequences[seqname] = ''
187
+ sequences[seqname] += aligned_seq
188
+
189
+ for line in stockholm_format.splitlines():
190
+ if line[:4] == '#=GS':
191
+ # Description row - example format is:
192
+ # #=GS UniRef90_Q9H5Z4/4-78 DE [subseq from] cDNA: FLJ22755 ...
193
+ columns = line.split(maxsplit=3)
194
+ seqname, feature = columns[1:3]
195
+ value = columns[3] if len(columns) == 4 else ''
196
+ if feature != 'DE':
197
+ continue
198
+ if reached_max_sequences and seqname not in sequences:
199
+ continue
200
+ descriptions[seqname] = value
201
+ if len(descriptions) == len(sequences):
202
+ break
203
+
204
+ # Convert sto format to a3m line by line
205
+ a3m_sequences = {}
206
+ # query_sequence is assumed to be the first sequence
207
+ query_sequence = next(iter(sequences.values()))
208
+ query_non_gaps = [res != '-' for res in query_sequence]
209
+ for seqname, sto_sequence in sequences.items():
210
+ a3m_sequences[seqname] = ''.join(
211
+ _convert_sto_seq_to_a3m(query_non_gaps, sto_sequence))
212
+
213
+ fasta_chunks = (f">{k} {descriptions.get(k, '')}\n{a3m_sequences[k]}"
214
+ for k in a3m_sequences)
215
+ return '\n'.join(fasta_chunks) + '\n' # Include terminating newline.
216
+
217
+
218
+ def _get_hhr_line_regex_groups(
219
+ regex_pattern: str, line: str) -> Sequence[Optional[str]]:
220
+ match = re.match(regex_pattern, line)
221
+ if match is None:
222
+ raise RuntimeError(f'Could not parse query line {line}')
223
+ return match.groups()
224
+
225
+
226
+ def _update_hhr_residue_indices_list(
227
+ sequence: str, start_index: int, indices_list: List[int]):
228
+ """Computes the relative indices for each residue with respect to the original sequence."""
229
+ counter = start_index
230
+ for symbol in sequence:
231
+ if symbol == '-':
232
+ indices_list.append(-1)
233
+ else:
234
+ indices_list.append(counter)
235
+ counter += 1
236
+
237
+
238
+ def _parse_hhr_hit(detailed_lines: Sequence[str]) -> TemplateHit:
239
+ """Parses the detailed HMM HMM comparison section for a single Hit.
240
+
241
+ This works on .hhr files generated from both HHBlits and HHSearch.
242
+
243
+ Args:
244
+ detailed_lines: A list of lines from a single comparison section between 2
245
+ sequences (which each have their own HMM's)
246
+
247
+ Returns:
248
+ A dictionary with the information from that detailed comparison section
249
+
250
+ Raises:
251
+ RuntimeError: If a certain line cannot be processed
252
+ """
253
+ # Parse first 2 lines.
254
+ number_of_hit = int(detailed_lines[0].split()[-1])
255
+ name_hit = detailed_lines[1][1:]
256
+
257
+ # Parse the summary line.
258
+ pattern = (
259
+ 'Probab=(.*)[\t ]*E-value=(.*)[\t ]*Score=(.*)[\t ]*Aligned_cols=(.*)[\t'
260
+ ' ]*Identities=(.*)%[\t ]*Similarity=(.*)[\t ]*Sum_probs=(.*)[\t '
261
+ ']*Template_Neff=(.*)')
262
+ match = re.match(pattern, detailed_lines[2])
263
+ if match is None:
264
+ raise RuntimeError(
265
+ 'Could not parse section: %s. Expected this: \n%s to contain summary.' %
266
+ (detailed_lines, detailed_lines[2]))
267
+ (prob_true, e_value, _, aligned_cols, _, _, sum_probs,
268
+ neff) = [float(x) for x in match.groups()]
269
+
270
+ # The next section reads the detailed comparisons. These are in a 'human
271
+ # readable' format which has a fixed length. The strategy employed is to
272
+ # assume that each block starts with the query sequence line, and to parse
273
+ # that with a regexp in order to deduce the fixed length used for that block.
274
+ query = ''
275
+ hit_sequence = ''
276
+ indices_query = []
277
+ indices_hit = []
278
+ length_block = None
279
+
280
+ for line in detailed_lines[3:]:
281
+ # Parse the query sequence line
282
+ if (line.startswith('Q ') and not line.startswith('Q ss_dssp') and
283
+ not line.startswith('Q ss_pred') and
284
+ not line.startswith('Q Consensus')):
285
+ # Thus the first 17 characters must be 'Q <query_name> ', and we can parse
286
+ # everything after that.
287
+ # start sequence end total_sequence_length
288
+ patt = r'[\t ]*([0-9]*) ([A-Z-]*)[\t ]*([0-9]*) \([0-9]*\)'
289
+ groups = _get_hhr_line_regex_groups(patt, line[17:])
290
+
291
+ # Get the length of the parsed block using the start and finish indices,
292
+ # and ensure it is the same as the actual block length.
293
+ start = int(groups[0]) - 1 # Make index zero based.
294
+ delta_query = groups[1]
295
+ end = int(groups[2])
296
+ num_insertions = len([x for x in delta_query if x == '-'])
297
+ length_block = end - start + num_insertions
298
+ assert length_block == len(delta_query)
299
+
300
+ # Update the query sequence and indices list.
301
+ query += delta_query
302
+ _update_hhr_residue_indices_list(delta_query, start, indices_query)
303
+
304
+ elif line.startswith('T '):
305
+ # Parse the hit sequence.
306
+ if (not line.startswith('T ss_dssp') and
307
+ not line.startswith('T ss_pred') and
308
+ not line.startswith('T Consensus')):
309
+ # Thus the first 17 characters must be 'T <hit_name> ', and we can
310
+ # parse everything after that.
311
+ # start sequence end total_sequence_length
312
+ patt = r'[\t ]*([0-9]*) ([A-Z-]*)[\t ]*[0-9]* \([0-9]*\)'
313
+ groups = _get_hhr_line_regex_groups(patt, line[17:])
314
+ start = int(groups[0]) - 1 # Make index zero based.
315
+ delta_hit_sequence = groups[1]
316
+ assert length_block == len(delta_hit_sequence)
317
+
318
+ # Update the hit sequence and indices list.
319
+ hit_sequence += delta_hit_sequence
320
+ _update_hhr_residue_indices_list(delta_hit_sequence, start, indices_hit)
321
+
322
+ return TemplateHit(
323
+ index=number_of_hit,
324
+ name=name_hit,
325
+ aligned_cols=int(aligned_cols),
326
+ sum_probs=sum_probs,
327
+ query=query,
328
+ hit_sequence=hit_sequence,
329
+ indices_query=indices_query,
330
+ indices_hit=indices_hit,
331
+ )
332
+
333
+
334
+ def parse_hhr(hhr_string: str) -> Sequence[TemplateHit]:
335
+ """Parses the content of an entire HHR file."""
336
+ lines = hhr_string.splitlines()
337
+
338
+ # Each .hhr file starts with a results table, then has a sequence of hit
339
+ # "paragraphs", each paragraph starting with a line 'No <hit number>'. We
340
+ # iterate through each paragraph to parse each hit.
341
+
342
+ block_starts = [i for i, line in enumerate(lines) if line.startswith('No ')]
343
+
344
+ hits = []
345
+ if block_starts:
346
+ block_starts.append(len(lines)) # Add the end of the final block.
347
+ for i in range(len(block_starts) - 1):
348
+ hits.append(_parse_hhr_hit(lines[block_starts[i]:block_starts[i + 1]]))
349
+ return hits
350
+
351
+
352
+ def parse_e_values_from_tblout(tblout: str) -> Dict[str, float]:
353
+ """Parse target to e-value mapping parsed from Jackhmmer tblout string."""
354
+ e_values = {'query': 0}
355
+ lines = [line for line in tblout.splitlines() if line[0] != '#']
356
+ # As per http://eddylab.org/software/hmmer/Userguide.pdf fields are
357
+ # space-delimited. Relevant fields are (1) target name: and
358
+ # (5) E-value (full sequence) (numbering from 1).
359
+ for line in lines:
360
+ fields = line.split()
361
+ e_value = fields[4]
362
+ target_name = fields[0]
363
+ e_values[target_name] = float(e_value)
364
+ return e_values
af_backprop/alphafold/data/pipeline.py ADDED
@@ -0,0 +1,209 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2021 DeepMind Technologies Limited
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ """Functions for building the input features for the AlphaFold model."""
16
+
17
+ import os
18
+ from typing import Mapping, Optional, Sequence
19
+ from absl import logging
20
+ from alphafold.common import residue_constants
21
+ from alphafold.data import parsers
22
+ from alphafold.data import templates
23
+ from alphafold.data.tools import hhblits
24
+ from alphafold.data.tools import hhsearch
25
+ from alphafold.data.tools import jackhmmer
26
+ import numpy as np
27
+
28
+ # Internal import (7716).
29
+
30
+ FeatureDict = Mapping[str, np.ndarray]
31
+
32
+
33
+ def make_sequence_features(
34
+ sequence: str, description: str, num_res: int) -> FeatureDict:
35
+ """Constructs a feature dict of sequence features."""
36
+ features = {}
37
+ features['aatype'] = residue_constants.sequence_to_onehot(
38
+ sequence=sequence,
39
+ mapping=residue_constants.restype_order_with_x,
40
+ map_unknown_to_x=True)
41
+ features['between_segment_residues'] = np.zeros((num_res,), dtype=np.int32)
42
+ features['domain_name'] = np.array([description.encode('utf-8')],
43
+ dtype=np.object_)
44
+ features['residue_index'] = np.array(range(num_res), dtype=np.int32)
45
+ features['seq_length'] = np.array([num_res] * num_res, dtype=np.int32)
46
+ features['sequence'] = np.array([sequence.encode('utf-8')], dtype=np.object_)
47
+ return features
48
+
49
+
50
+ def make_msa_features(
51
+ msas: Sequence[Sequence[str]],
52
+ deletion_matrices: Sequence[parsers.DeletionMatrix]) -> FeatureDict:
53
+ """Constructs a feature dict of MSA features."""
54
+ if not msas:
55
+ raise ValueError('At least one MSA must be provided.')
56
+
57
+ int_msa = []
58
+ deletion_matrix = []
59
+ seen_sequences = set()
60
+ for msa_index, msa in enumerate(msas):
61
+ if not msa:
62
+ raise ValueError(f'MSA {msa_index} must contain at least one sequence.')
63
+ for sequence_index, sequence in enumerate(msa):
64
+ if sequence in seen_sequences:
65
+ continue
66
+ seen_sequences.add(sequence)
67
+ int_msa.append(
68
+ [residue_constants.HHBLITS_AA_TO_ID[res] for res in sequence])
69
+ deletion_matrix.append(deletion_matrices[msa_index][sequence_index])
70
+
71
+ num_res = len(msas[0][0])
72
+ num_alignments = len(int_msa)
73
+ features = {}
74
+ features['deletion_matrix_int'] = np.array(deletion_matrix, dtype=np.int32)
75
+ features['msa'] = np.array(int_msa, dtype=np.int32)
76
+ features['num_alignments'] = np.array(
77
+ [num_alignments] * num_res, dtype=np.int32)
78
+ return features
79
+
80
+
81
+ class DataPipeline:
82
+ """Runs the alignment tools and assembles the input features."""
83
+
84
+ def __init__(self,
85
+ jackhmmer_binary_path: str,
86
+ hhblits_binary_path: str,
87
+ hhsearch_binary_path: str,
88
+ uniref90_database_path: str,
89
+ mgnify_database_path: str,
90
+ bfd_database_path: Optional[str],
91
+ uniclust30_database_path: Optional[str],
92
+ small_bfd_database_path: Optional[str],
93
+ pdb70_database_path: str,
94
+ template_featurizer: templates.TemplateHitFeaturizer,
95
+ use_small_bfd: bool,
96
+ mgnify_max_hits: int = 501,
97
+ uniref_max_hits: int = 10000):
98
+ """Constructs a feature dict for a given FASTA file."""
99
+ self._use_small_bfd = use_small_bfd
100
+ self.jackhmmer_uniref90_runner = jackhmmer.Jackhmmer(
101
+ binary_path=jackhmmer_binary_path,
102
+ database_path=uniref90_database_path)
103
+ if use_small_bfd:
104
+ self.jackhmmer_small_bfd_runner = jackhmmer.Jackhmmer(
105
+ binary_path=jackhmmer_binary_path,
106
+ database_path=small_bfd_database_path)
107
+ else:
108
+ self.hhblits_bfd_uniclust_runner = hhblits.HHBlits(
109
+ binary_path=hhblits_binary_path,
110
+ databases=[bfd_database_path, uniclust30_database_path])
111
+ self.jackhmmer_mgnify_runner = jackhmmer.Jackhmmer(
112
+ binary_path=jackhmmer_binary_path,
113
+ database_path=mgnify_database_path)
114
+ self.hhsearch_pdb70_runner = hhsearch.HHSearch(
115
+ binary_path=hhsearch_binary_path,
116
+ databases=[pdb70_database_path])
117
+ self.template_featurizer = template_featurizer
118
+ self.mgnify_max_hits = mgnify_max_hits
119
+ self.uniref_max_hits = uniref_max_hits
120
+
121
+ def process(self, input_fasta_path: str, msa_output_dir: str) -> FeatureDict:
122
+ """Runs alignment tools on the input sequence and creates features."""
123
+ with open(input_fasta_path) as f:
124
+ input_fasta_str = f.read()
125
+ input_seqs, input_descs = parsers.parse_fasta(input_fasta_str)
126
+ if len(input_seqs) != 1:
127
+ raise ValueError(
128
+ f'More than one input sequence found in {input_fasta_path}.')
129
+ input_sequence = input_seqs[0]
130
+ input_description = input_descs[0]
131
+ num_res = len(input_sequence)
132
+
133
+ jackhmmer_uniref90_result = self.jackhmmer_uniref90_runner.query(
134
+ input_fasta_path)[0]
135
+ jackhmmer_mgnify_result = self.jackhmmer_mgnify_runner.query(
136
+ input_fasta_path)[0]
137
+
138
+ uniref90_msa_as_a3m = parsers.convert_stockholm_to_a3m(
139
+ jackhmmer_uniref90_result['sto'], max_sequences=self.uniref_max_hits)
140
+ hhsearch_result = self.hhsearch_pdb70_runner.query(uniref90_msa_as_a3m)
141
+
142
+ uniref90_out_path = os.path.join(msa_output_dir, 'uniref90_hits.sto')
143
+ with open(uniref90_out_path, 'w') as f:
144
+ f.write(jackhmmer_uniref90_result['sto'])
145
+
146
+ mgnify_out_path = os.path.join(msa_output_dir, 'mgnify_hits.sto')
147
+ with open(mgnify_out_path, 'w') as f:
148
+ f.write(jackhmmer_mgnify_result['sto'])
149
+
150
+ pdb70_out_path = os.path.join(msa_output_dir, 'pdb70_hits.hhr')
151
+ with open(pdb70_out_path, 'w') as f:
152
+ f.write(hhsearch_result)
153
+
154
+ uniref90_msa, uniref90_deletion_matrix, _ = parsers.parse_stockholm(
155
+ jackhmmer_uniref90_result['sto'])
156
+ mgnify_msa, mgnify_deletion_matrix, _ = parsers.parse_stockholm(
157
+ jackhmmer_mgnify_result['sto'])
158
+ hhsearch_hits = parsers.parse_hhr(hhsearch_result)
159
+ mgnify_msa = mgnify_msa[:self.mgnify_max_hits]
160
+ mgnify_deletion_matrix = mgnify_deletion_matrix[:self.mgnify_max_hits]
161
+
162
+ if self._use_small_bfd:
163
+ jackhmmer_small_bfd_result = self.jackhmmer_small_bfd_runner.query(
164
+ input_fasta_path)[0]
165
+
166
+ bfd_out_path = os.path.join(msa_output_dir, 'small_bfd_hits.a3m')
167
+ with open(bfd_out_path, 'w') as f:
168
+ f.write(jackhmmer_small_bfd_result['sto'])
169
+
170
+ bfd_msa, bfd_deletion_matrix, _ = parsers.parse_stockholm(
171
+ jackhmmer_small_bfd_result['sto'])
172
+ else:
173
+ hhblits_bfd_uniclust_result = self.hhblits_bfd_uniclust_runner.query(
174
+ input_fasta_path)
175
+
176
+ bfd_out_path = os.path.join(msa_output_dir, 'bfd_uniclust_hits.a3m')
177
+ with open(bfd_out_path, 'w') as f:
178
+ f.write(hhblits_bfd_uniclust_result['a3m'])
179
+
180
+ bfd_msa, bfd_deletion_matrix = parsers.parse_a3m(
181
+ hhblits_bfd_uniclust_result['a3m'])
182
+
183
+ templates_result = self.template_featurizer.get_templates(
184
+ query_sequence=input_sequence,
185
+ query_pdb_code=None,
186
+ query_release_date=None,
187
+ hits=hhsearch_hits)
188
+
189
+ sequence_features = make_sequence_features(
190
+ sequence=input_sequence,
191
+ description=input_description,
192
+ num_res=num_res)
193
+
194
+ msa_features = make_msa_features(
195
+ msas=(uniref90_msa, bfd_msa, mgnify_msa),
196
+ deletion_matrices=(uniref90_deletion_matrix,
197
+ bfd_deletion_matrix,
198
+ mgnify_deletion_matrix))
199
+
200
+ logging.info('Uniref90 MSA size: %d sequences.', len(uniref90_msa))
201
+ logging.info('BFD MSA size: %d sequences.', len(bfd_msa))
202
+ logging.info('MGnify MSA size: %d sequences.', len(mgnify_msa))
203
+ logging.info('Final (deduplicated) MSA size: %d sequences.',
204
+ msa_features['num_alignments'][0])
205
+ logging.info('Total number of templates (NB: this can include bad '
206
+ 'templates and is later filtered to top 4): %d.',
207
+ templates_result.features['template_domain_names'].shape[0])
208
+
209
+ return {**sequence_features, **msa_features, **templates_result.features}
af_backprop/alphafold/data/prep_inputs.py ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ from alphafold.common import residue_constants
3
+
4
+ def make_atom14_positions(prot):
5
+ """Constructs denser atom positions (14 dimensions instead of 37)."""
6
+ restype_atom14_to_atom37 = [] # mapping (restype, atom14) --> atom37
7
+ restype_atom37_to_atom14 = [] # mapping (restype, atom37) --> atom14
8
+ restype_atom14_mask = []
9
+
10
+ for rt in residue_constants.restypes:
11
+ atom_names = residue_constants.restype_name_to_atom14_names[
12
+ residue_constants.restype_1to3[rt]]
13
+
14
+ restype_atom14_to_atom37.append([
15
+ (residue_constants.atom_order[name] if name else 0)
16
+ for name in atom_names
17
+ ])
18
+
19
+ atom_name_to_idx14 = {name: i for i, name in enumerate(atom_names)}
20
+ restype_atom37_to_atom14.append([
21
+ (atom_name_to_idx14[name] if name in atom_name_to_idx14 else 0)
22
+ for name in residue_constants.atom_types
23
+ ])
24
+
25
+ restype_atom14_mask.append([(1. if name else 0.) for name in atom_names])
26
+
27
+ # Add dummy mapping for restype 'UNK'.
28
+ restype_atom14_to_atom37.append([0] * 14)
29
+ restype_atom37_to_atom14.append([0] * 37)
30
+ restype_atom14_mask.append([0.] * 14)
31
+
32
+ restype_atom14_to_atom37 = np.array(restype_atom14_to_atom37, dtype=np.int32)
33
+ restype_atom37_to_atom14 = np.array(restype_atom37_to_atom14, dtype=np.int32)
34
+ restype_atom14_mask = np.array(restype_atom14_mask, dtype=np.float32)
35
+
36
+ # Create the mapping for (residx, atom14) --> atom37, i.e. an array
37
+ # with shape (num_res, 14) containing the atom37 indices for this protein.
38
+ residx_atom14_to_atom37 = restype_atom14_to_atom37[prot["aatype"]]
39
+ residx_atom14_mask = restype_atom14_mask[prot["aatype"]]
40
+
41
+ # Create a mask for known ground truth positions.
42
+ residx_atom14_gt_mask = residx_atom14_mask * np.take_along_axis(
43
+ prot["all_atom_mask"], residx_atom14_to_atom37, axis=1).astype(np.float32)
44
+
45
+ # Gather the ground truth positions.
46
+ residx_atom14_gt_positions = residx_atom14_gt_mask[:, :, None] * (
47
+ np.take_along_axis(prot["all_atom_positions"],
48
+ residx_atom14_to_atom37[..., None],
49
+ axis=1))
50
+
51
+ prot["atom14_atom_exists"] = residx_atom14_mask
52
+ prot["atom14_gt_exists"] = residx_atom14_gt_mask
53
+ prot["atom14_gt_positions"] = residx_atom14_gt_positions
54
+
55
+ prot["residx_atom14_to_atom37"] = residx_atom14_to_atom37
56
+
57
+ # Create the gather indices for mapping back.
58
+ residx_atom37_to_atom14 = restype_atom37_to_atom14[prot["aatype"]]
59
+ prot["residx_atom37_to_atom14"] = residx_atom37_to_atom14
60
+
61
+ # Create the corresponding mask.
62
+ restype_atom37_mask = np.zeros([21, 37], dtype=np.float32)
63
+ for restype, restype_letter in enumerate(residue_constants.restypes):
64
+ restype_name = residue_constants.restype_1to3[restype_letter]
65
+ atom_names = residue_constants.residue_atoms[restype_name]
66
+ for atom_name in atom_names:
67
+ atom_type = residue_constants.atom_order[atom_name]
68
+ restype_atom37_mask[restype, atom_type] = 1
69
+
70
+ residx_atom37_mask = restype_atom37_mask[prot["aatype"]]
71
+ prot["atom37_atom_exists"] = residx_atom37_mask
72
+
73
+ # As the atom naming is ambiguous for 7 of the 20 amino acids, provide
74
+ # alternative ground truth coordinates where the naming is swapped
75
+ restype_3 = [
76
+ residue_constants.restype_1to3[res] for res in residue_constants.restypes
77
+ ]
78
+ restype_3 += ["UNK"]
79
+
80
+ # Matrices for renaming ambiguous atoms.
81
+ all_matrices = {res: np.eye(14, dtype=np.float32) for res in restype_3}
82
+ for resname, swap in residue_constants.residue_atom_renaming_swaps.items():
83
+ correspondences = np.arange(14)
84
+ for source_atom_swap, target_atom_swap in swap.items():
85
+ source_index = residue_constants.restype_name_to_atom14_names[
86
+ resname].index(source_atom_swap)
87
+ target_index = residue_constants.restype_name_to_atom14_names[
88
+ resname].index(target_atom_swap)
89
+ correspondences[source_index] = target_index
90
+ correspondences[target_index] = source_index
91
+ renaming_matrix = np.zeros((14, 14), dtype=np.float32)
92
+ for index, correspondence in enumerate(correspondences):
93
+ renaming_matrix[index, correspondence] = 1.
94
+ all_matrices[resname] = renaming_matrix.astype(np.float32)
95
+ renaming_matrices = np.stack([all_matrices[restype] for restype in restype_3])
96
+
97
+ # Pick the transformation matrices for the given residue sequence
98
+ # shape (num_res, 14, 14).
99
+ renaming_transform = renaming_matrices[prot["aatype"]]
100
+
101
+ # Apply it to the ground truth positions. shape (num_res, 14, 3).
102
+ alternative_gt_positions = np.einsum("rac,rab->rbc",
103
+ residx_atom14_gt_positions,
104
+ renaming_transform)
105
+ prot["atom14_alt_gt_positions"] = alternative_gt_positions
106
+
107
+ # Create the mask for the alternative ground truth (differs from the
108
+ # ground truth mask, if only one of the atoms in an ambiguous pair has a
109
+ # ground truth position).
110
+ alternative_gt_mask = np.einsum("ra,rab->rb",
111
+ residx_atom14_gt_mask,
112
+ renaming_transform)
113
+
114
+ prot["atom14_alt_gt_exists"] = alternative_gt_mask
115
+
116
+ # Create an ambiguous atoms mask. shape: (21, 14).
117
+ restype_atom14_is_ambiguous = np.zeros((21, 14), dtype=np.float32)
118
+ for resname, swap in residue_constants.residue_atom_renaming_swaps.items():
119
+ for atom_name1, atom_name2 in swap.items():
120
+ restype = residue_constants.restype_order[
121
+ residue_constants.restype_3to1[resname]]
122
+ atom_idx1 = residue_constants.restype_name_to_atom14_names[resname].index(
123
+ atom_name1)
124
+ atom_idx2 = residue_constants.restype_name_to_atom14_names[resname].index(
125
+ atom_name2)
126
+ restype_atom14_is_ambiguous[restype, atom_idx1] = 1
127
+ restype_atom14_is_ambiguous[restype, atom_idx2] = 1
128
+
129
+ # From this create an ambiguous_mask for the given sequence.
130
+ prot["atom14_atom_is_ambiguous"] = (
131
+ restype_atom14_is_ambiguous[prot["aatype"]])
132
+
133
+ return prot
af_backprop/alphafold/data/templates.py ADDED
@@ -0,0 +1,910 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2021 DeepMind Technologies Limited
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ """Functions for getting templates and calculating template features."""
16
+ import dataclasses
17
+ import datetime
18
+ import glob
19
+ import os
20
+ import re
21
+ from typing import Any, Dict, Mapping, Optional, Sequence, Tuple
22
+
23
+ from absl import logging
24
+ from alphafold.common import residue_constants
25
+ from alphafold.data import mmcif_parsing
26
+ from alphafold.data import parsers
27
+ from alphafold.data.tools import kalign
28
+ import numpy as np
29
+
30
+ # Internal import (7716).
31
+
32
+
33
+ class Error(Exception):
34
+ """Base class for exceptions."""
35
+
36
+
37
+ class NoChainsError(Error):
38
+ """An error indicating that template mmCIF didn't have any chains."""
39
+
40
+
41
+ class SequenceNotInTemplateError(Error):
42
+ """An error indicating that template mmCIF didn't contain the sequence."""
43
+
44
+
45
+ class NoAtomDataInTemplateError(Error):
46
+ """An error indicating that template mmCIF didn't contain atom positions."""
47
+
48
+
49
+ class TemplateAtomMaskAllZerosError(Error):
50
+ """An error indicating that template mmCIF had all atom positions masked."""
51
+
52
+
53
+ class QueryToTemplateAlignError(Error):
54
+ """An error indicating that the query can't be aligned to the template."""
55
+
56
+
57
+ class CaDistanceError(Error):
58
+ """An error indicating that a CA atom distance exceeds a threshold."""
59
+
60
+
61
+ class MultipleChainsError(Error):
62
+ """An error indicating that multiple chains were found for a given ID."""
63
+
64
+
65
+ # Prefilter exceptions.
66
+ class PrefilterError(Exception):
67
+ """A base class for template prefilter exceptions."""
68
+
69
+
70
+ class DateError(PrefilterError):
71
+ """An error indicating that the hit date was after the max allowed date."""
72
+
73
+
74
+ class PdbIdError(PrefilterError):
75
+ """An error indicating that the hit PDB ID was identical to the query."""
76
+
77
+
78
+ class AlignRatioError(PrefilterError):
79
+ """An error indicating that the hit align ratio to the query was too small."""
80
+
81
+
82
+ class DuplicateError(PrefilterError):
83
+ """An error indicating that the hit was an exact subsequence of the query."""
84
+
85
+
86
+ class LengthError(PrefilterError):
87
+ """An error indicating that the hit was too short."""
88
+
89
+
90
+ TEMPLATE_FEATURES = {
91
+ 'template_aatype': np.float32,
92
+ 'template_all_atom_masks': np.float32,
93
+ 'template_all_atom_positions': np.float32,
94
+ 'template_domain_names': np.object,
95
+ 'template_sequence': np.object,
96
+ 'template_sum_probs': np.float32,
97
+ }
98
+
99
+
100
+ def _get_pdb_id_and_chain(hit: parsers.TemplateHit) -> Tuple[str, str]:
101
+ """Returns PDB id and chain id for an HHSearch Hit."""
102
+ # PDB ID: 4 letters. Chain ID: 1+ alphanumeric letters or "." if unknown.
103
+ id_match = re.match(r'[a-zA-Z\d]{4}_[a-zA-Z0-9.]+', hit.name)
104
+ if not id_match:
105
+ raise ValueError(f'hit.name did not start with PDBID_chain: {hit.name}')
106
+ pdb_id, chain_id = id_match.group(0).split('_')
107
+ return pdb_id.lower(), chain_id
108
+
109
+
110
+ def _is_after_cutoff(
111
+ pdb_id: str,
112
+ release_dates: Mapping[str, datetime.datetime],
113
+ release_date_cutoff: Optional[datetime.datetime]) -> bool:
114
+ """Checks if the template date is after the release date cutoff.
115
+
116
+ Args:
117
+ pdb_id: 4 letter pdb code.
118
+ release_dates: Dictionary mapping PDB ids to their structure release dates.
119
+ release_date_cutoff: Max release date that is valid for this query.
120
+
121
+ Returns:
122
+ True if the template release date is after the cutoff, False otherwise.
123
+ """
124
+ if release_date_cutoff is None:
125
+ raise ValueError('The release_date_cutoff must not be None.')
126
+ if pdb_id in release_dates:
127
+ return release_dates[pdb_id] > release_date_cutoff
128
+ else:
129
+ # Since this is just a quick prefilter to reduce the number of mmCIF files
130
+ # we need to parse, we don't have to worry about returning True here.
131
+ logging.warning('Template structure not in release dates dict: %s', pdb_id)
132
+ return False
133
+
134
+
135
+ def _parse_obsolete(obsolete_file_path: str) -> Mapping[str, str]:
136
+ """Parses the data file from PDB that lists which PDB ids are obsolete."""
137
+ with open(obsolete_file_path) as f:
138
+ result = {}
139
+ for line in f:
140
+ line = line.strip()
141
+ # We skip obsolete entries that don't contain a mapping to a new entry.
142
+ if line.startswith('OBSLTE') and len(line) > 30:
143
+ # Format: Date From To
144
+ # 'OBSLTE 31-JUL-94 116L 216L'
145
+ from_id = line[20:24].lower()
146
+ to_id = line[29:33].lower()
147
+ result[from_id] = to_id
148
+ return result
149
+
150
+
151
+ def _parse_release_dates(path: str) -> Mapping[str, datetime.datetime]:
152
+ """Parses release dates file, returns a mapping from PDBs to release dates."""
153
+ if path.endswith('txt'):
154
+ release_dates = {}
155
+ with open(path, 'r') as f:
156
+ for line in f:
157
+ pdb_id, date = line.split(':')
158
+ date = date.strip()
159
+ # Python 3.6 doesn't have datetime.date.fromisoformat() which is about
160
+ # 90x faster than strptime. However, splitting the string manually is
161
+ # about 10x faster than strptime.
162
+ release_dates[pdb_id.strip()] = datetime.datetime(
163
+ year=int(date[:4]), month=int(date[5:7]), day=int(date[8:10]))
164
+ return release_dates
165
+ else:
166
+ raise ValueError('Invalid format of the release date file %s.' % path)
167
+
168
+
169
+ def _assess_hhsearch_hit(
170
+ hit: parsers.TemplateHit,
171
+ hit_pdb_code: str,
172
+ query_sequence: str,
173
+ query_pdb_code: Optional[str],
174
+ release_dates: Mapping[str, datetime.datetime],
175
+ release_date_cutoff: datetime.datetime,
176
+ max_subsequence_ratio: float = 0.95,
177
+ min_align_ratio: float = 0.1) -> bool:
178
+ """Determines if template is valid (without parsing the template mmcif file).
179
+
180
+ Args:
181
+ hit: HhrHit for the template.
182
+ hit_pdb_code: The 4 letter pdb code of the template hit. This might be
183
+ different from the value in the actual hit since the original pdb might
184
+ have become obsolete.
185
+ query_sequence: Amino acid sequence of the query.
186
+ query_pdb_code: 4 letter pdb code of the query.
187
+ release_dates: Dictionary mapping pdb codes to their structure release
188
+ dates.
189
+ release_date_cutoff: Max release date that is valid for this query.
190
+ max_subsequence_ratio: Exclude any exact matches with this much overlap.
191
+ min_align_ratio: Minimum overlap between the template and query.
192
+
193
+ Returns:
194
+ True if the hit passed the prefilter. Raises an exception otherwise.
195
+
196
+ Raises:
197
+ DateError: If the hit date was after the max allowed date.
198
+ PdbIdError: If the hit PDB ID was identical to the query.
199
+ AlignRatioError: If the hit align ratio to the query was too small.
200
+ DuplicateError: If the hit was an exact subsequence of the query.
201
+ LengthError: If the hit was too short.
202
+ """
203
+ aligned_cols = hit.aligned_cols
204
+ align_ratio = aligned_cols / len(query_sequence)
205
+
206
+ template_sequence = hit.hit_sequence.replace('-', '')
207
+ length_ratio = float(len(template_sequence)) / len(query_sequence)
208
+
209
+ # Check whether the template is a large subsequence or duplicate of original
210
+ # query. This can happen due to duplicate entries in the PDB database.
211
+ duplicate = (template_sequence in query_sequence and
212
+ length_ratio > max_subsequence_ratio)
213
+
214
+ if _is_after_cutoff(hit_pdb_code, release_dates, release_date_cutoff):
215
+ raise DateError(f'Date ({release_dates[hit_pdb_code]}) > max template date '
216
+ f'({release_date_cutoff}).')
217
+
218
+ if query_pdb_code is not None:
219
+ if query_pdb_code.lower() == hit_pdb_code.lower():
220
+ raise PdbIdError('PDB code identical to Query PDB code.')
221
+
222
+ if align_ratio <= min_align_ratio:
223
+ raise AlignRatioError('Proportion of residues aligned to query too small. '
224
+ f'Align ratio: {align_ratio}.')
225
+
226
+ if duplicate:
227
+ raise DuplicateError('Template is an exact subsequence of query with large '
228
+ f'coverage. Length ratio: {length_ratio}.')
229
+
230
+ if len(template_sequence) < 10:
231
+ raise LengthError(f'Template too short. Length: {len(template_sequence)}.')
232
+
233
+ return True
234
+
235
+
236
+ def _find_template_in_pdb(
237
+ template_chain_id: str,
238
+ template_sequence: str,
239
+ mmcif_object: mmcif_parsing.MmcifObject) -> Tuple[str, str, int]:
240
+ """Tries to find the template chain in the given pdb file.
241
+
242
+ This method tries the three following things in order:
243
+ 1. Tries if there is an exact match in both the chain ID and the sequence.
244
+ If yes, the chain sequence is returned. Otherwise:
245
+ 2. Tries if there is an exact match only in the sequence.
246
+ If yes, the chain sequence is returned. Otherwise:
247
+ 3. Tries if there is a fuzzy match (X = wildcard) in the sequence.
248
+ If yes, the chain sequence is returned.
249
+ If none of these succeed, a SequenceNotInTemplateError is thrown.
250
+
251
+ Args:
252
+ template_chain_id: The template chain ID.
253
+ template_sequence: The template chain sequence.
254
+ mmcif_object: The PDB object to search for the template in.
255
+
256
+ Returns:
257
+ A tuple with:
258
+ * The chain sequence that was found to match the template in the PDB object.
259
+ * The ID of the chain that is being returned.
260
+ * The offset where the template sequence starts in the chain sequence.
261
+
262
+ Raises:
263
+ SequenceNotInTemplateError: If no match is found after the steps described
264
+ above.
265
+ """
266
+ # Try if there is an exact match in both the chain ID and the (sub)sequence.
267
+ pdb_id = mmcif_object.file_id
268
+ chain_sequence = mmcif_object.chain_to_seqres.get(template_chain_id)
269
+ if chain_sequence and (template_sequence in chain_sequence):
270
+ logging.info(
271
+ 'Found an exact template match %s_%s.', pdb_id, template_chain_id)
272
+ mapping_offset = chain_sequence.find(template_sequence)
273
+ return chain_sequence, template_chain_id, mapping_offset
274
+
275
+ # Try if there is an exact match in the (sub)sequence only.
276
+ for chain_id, chain_sequence in mmcif_object.chain_to_seqres.items():
277
+ if chain_sequence and (template_sequence in chain_sequence):
278
+ logging.info('Found a sequence-only match %s_%s.', pdb_id, chain_id)
279
+ mapping_offset = chain_sequence.find(template_sequence)
280
+ return chain_sequence, chain_id, mapping_offset
281
+
282
+ # Return a chain sequence that fuzzy matches (X = wildcard) the template.
283
+ # Make parentheses unnamed groups (?:_) to avoid the 100 named groups limit.
284
+ regex = ['.' if aa == 'X' else '(?:%s|X)' % aa for aa in template_sequence]
285
+ regex = re.compile(''.join(regex))
286
+ for chain_id, chain_sequence in mmcif_object.chain_to_seqres.items():
287
+ match = re.search(regex, chain_sequence)
288
+ if match:
289
+ logging.info('Found a fuzzy sequence-only match %s_%s.', pdb_id, chain_id)
290
+ mapping_offset = match.start()
291
+ return chain_sequence, chain_id, mapping_offset
292
+
293
+ # No hits, raise an error.
294
+ raise SequenceNotInTemplateError(
295
+ 'Could not find the template sequence in %s_%s. Template sequence: %s, '
296
+ 'chain_to_seqres: %s' % (pdb_id, template_chain_id, template_sequence,
297
+ mmcif_object.chain_to_seqres))
298
+
299
+
300
+ def _realign_pdb_template_to_query(
301
+ old_template_sequence: str,
302
+ template_chain_id: str,
303
+ mmcif_object: mmcif_parsing.MmcifObject,
304
+ old_mapping: Mapping[int, int],
305
+ kalign_binary_path: str) -> Tuple[str, Mapping[int, int]]:
306
+ """Aligns template from the mmcif_object to the query.
307
+
308
+ In case PDB70 contains a different version of the template sequence, we need
309
+ to perform a realignment to the actual sequence that is in the mmCIF file.
310
+ This method performs such realignment, but returns the new sequence and
311
+ mapping only if the sequence in the mmCIF file is 90% identical to the old
312
+ sequence.
313
+
314
+ Note that the old_template_sequence comes from the hit, and contains only that
315
+ part of the chain that matches with the query while the new_template_sequence
316
+ is the full chain.
317
+
318
+ Args:
319
+ old_template_sequence: The template sequence that was returned by the PDB
320
+ template search (typically done using HHSearch).
321
+ template_chain_id: The template chain id was returned by the PDB template
322
+ search (typically done using HHSearch). This is used to find the right
323
+ chain in the mmcif_object chain_to_seqres mapping.
324
+ mmcif_object: A mmcif_object which holds the actual template data.
325
+ old_mapping: A mapping from the query sequence to the template sequence.
326
+ This mapping will be used to compute the new mapping from the query
327
+ sequence to the actual mmcif_object template sequence by aligning the
328
+ old_template_sequence and the actual template sequence.
329
+ kalign_binary_path: The path to a kalign executable.
330
+
331
+ Returns:
332
+ A tuple (new_template_sequence, new_query_to_template_mapping) where:
333
+ * new_template_sequence is the actual template sequence that was found in
334
+ the mmcif_object.
335
+ * new_query_to_template_mapping is the new mapping from the query to the
336
+ actual template found in the mmcif_object.
337
+
338
+ Raises:
339
+ QueryToTemplateAlignError:
340
+ * If there was an error thrown by the alignment tool.
341
+ * Or if the actual template sequence differs by more than 10% from the
342
+ old_template_sequence.
343
+ """
344
+ aligner = kalign.Kalign(binary_path=kalign_binary_path)
345
+ new_template_sequence = mmcif_object.chain_to_seqres.get(
346
+ template_chain_id, '')
347
+
348
+ # Sometimes the template chain id is unknown. But if there is only a single
349
+ # sequence within the mmcif_object, it is safe to assume it is that one.
350
+ if not new_template_sequence:
351
+ if len(mmcif_object.chain_to_seqres) == 1:
352
+ logging.info('Could not find %s in %s, but there is only 1 sequence, so '
353
+ 'using that one.',
354
+ template_chain_id,
355
+ mmcif_object.file_id)
356
+ new_template_sequence = list(mmcif_object.chain_to_seqres.values())[0]
357
+ else:
358
+ raise QueryToTemplateAlignError(
359
+ f'Could not find chain {template_chain_id} in {mmcif_object.file_id}. '
360
+ 'If there are no mmCIF parsing errors, it is possible it was not a '
361
+ 'protein chain.')
362
+
363
+ try:
364
+ (old_aligned_template, new_aligned_template), _ = parsers.parse_a3m(
365
+ aligner.align([old_template_sequence, new_template_sequence]))
366
+ except Exception as e:
367
+ raise QueryToTemplateAlignError(
368
+ 'Could not align old template %s to template %s (%s_%s). Error: %s' %
369
+ (old_template_sequence, new_template_sequence, mmcif_object.file_id,
370
+ template_chain_id, str(e)))
371
+
372
+ logging.info('Old aligned template: %s\nNew aligned template: %s',
373
+ old_aligned_template, new_aligned_template)
374
+
375
+ old_to_new_template_mapping = {}
376
+ old_template_index = -1
377
+ new_template_index = -1
378
+ num_same = 0
379
+ for old_template_aa, new_template_aa in zip(
380
+ old_aligned_template, new_aligned_template):
381
+ if old_template_aa != '-':
382
+ old_template_index += 1
383
+ if new_template_aa != '-':
384
+ new_template_index += 1
385
+ if old_template_aa != '-' and new_template_aa != '-':
386
+ old_to_new_template_mapping[old_template_index] = new_template_index
387
+ if old_template_aa == new_template_aa:
388
+ num_same += 1
389
+
390
+ # Require at least 90 % sequence identity wrt to the shorter of the sequences.
391
+ if float(num_same) / min(
392
+ len(old_template_sequence), len(new_template_sequence)) < 0.9:
393
+ raise QueryToTemplateAlignError(
394
+ 'Insufficient similarity of the sequence in the database: %s to the '
395
+ 'actual sequence in the mmCIF file %s_%s: %s. We require at least '
396
+ '90 %% similarity wrt to the shorter of the sequences. This is not a '
397
+ 'problem unless you think this is a template that should be included.' %
398
+ (old_template_sequence, mmcif_object.file_id, template_chain_id,
399
+ new_template_sequence))
400
+
401
+ new_query_to_template_mapping = {}
402
+ for query_index, old_template_index in old_mapping.items():
403
+ new_query_to_template_mapping[query_index] = (
404
+ old_to_new_template_mapping.get(old_template_index, -1))
405
+
406
+ new_template_sequence = new_template_sequence.replace('-', '')
407
+
408
+ return new_template_sequence, new_query_to_template_mapping
409
+
410
+
411
+ def _check_residue_distances(all_positions: np.ndarray,
412
+ all_positions_mask: np.ndarray,
413
+ max_ca_ca_distance: float):
414
+ """Checks if the distance between unmasked neighbor residues is ok."""
415
+ ca_position = residue_constants.atom_order['CA']
416
+ prev_is_unmasked = False
417
+ prev_calpha = None
418
+ for i, (coords, mask) in enumerate(zip(all_positions, all_positions_mask)):
419
+ this_is_unmasked = bool(mask[ca_position])
420
+ if this_is_unmasked:
421
+ this_calpha = coords[ca_position]
422
+ if prev_is_unmasked:
423
+ distance = np.linalg.norm(this_calpha - prev_calpha)
424
+ if distance > max_ca_ca_distance:
425
+ raise CaDistanceError(
426
+ 'The distance between residues %d and %d is %f > limit %f.' % (
427
+ i, i + 1, distance, max_ca_ca_distance))
428
+ prev_calpha = this_calpha
429
+ prev_is_unmasked = this_is_unmasked
430
+
431
+
432
+ def _get_atom_positions(
433
+ mmcif_object: mmcif_parsing.MmcifObject,
434
+ auth_chain_id: str,
435
+ max_ca_ca_distance: float) -> Tuple[np.ndarray, np.ndarray]:
436
+ """Gets atom positions and mask from a list of Biopython Residues."""
437
+ num_res = len(mmcif_object.chain_to_seqres[auth_chain_id])
438
+
439
+ relevant_chains = [c for c in mmcif_object.structure.get_chains()
440
+ if c.id == auth_chain_id]
441
+ if len(relevant_chains) != 1:
442
+ raise MultipleChainsError(
443
+ f'Expected exactly one chain in structure with id {auth_chain_id}.')
444
+ chain = relevant_chains[0]
445
+
446
+ all_positions = np.zeros([num_res, residue_constants.atom_type_num, 3])
447
+ all_positions_mask = np.zeros([num_res, residue_constants.atom_type_num],
448
+ dtype=np.int64)
449
+ for res_index in range(num_res):
450
+ pos = np.zeros([residue_constants.atom_type_num, 3], dtype=np.float32)
451
+ mask = np.zeros([residue_constants.atom_type_num], dtype=np.float32)
452
+ res_at_position = mmcif_object.seqres_to_structure[auth_chain_id][res_index]
453
+ if not res_at_position.is_missing:
454
+ res = chain[(res_at_position.hetflag,
455
+ res_at_position.position.residue_number,
456
+ res_at_position.position.insertion_code)]
457
+ for atom in res.get_atoms():
458
+ atom_name = atom.get_name()
459
+ x, y, z = atom.get_coord()
460
+ if atom_name in residue_constants.atom_order.keys():
461
+ pos[residue_constants.atom_order[atom_name]] = [x, y, z]
462
+ mask[residue_constants.atom_order[atom_name]] = 1.0
463
+ elif atom_name.upper() == 'SE' and res.get_resname() == 'MSE':
464
+ # Put the coordinates of the selenium atom in the sulphur column.
465
+ pos[residue_constants.atom_order['SD']] = [x, y, z]
466
+ mask[residue_constants.atom_order['SD']] = 1.0
467
+
468
+ all_positions[res_index] = pos
469
+ all_positions_mask[res_index] = mask
470
+ _check_residue_distances(
471
+ all_positions, all_positions_mask, max_ca_ca_distance)
472
+ return all_positions, all_positions_mask
473
+
474
+
475
+ def _extract_template_features(
476
+ mmcif_object: mmcif_parsing.MmcifObject,
477
+ pdb_id: str,
478
+ mapping: Mapping[int, int],
479
+ template_sequence: str,
480
+ query_sequence: str,
481
+ template_chain_id: str,
482
+ kalign_binary_path: str) -> Tuple[Dict[str, Any], Optional[str]]:
483
+ """Parses atom positions in the target structure and aligns with the query.
484
+
485
+ Atoms for each residue in the template structure are indexed to coincide
486
+ with their corresponding residue in the query sequence, according to the
487
+ alignment mapping provided.
488
+
489
+ Args:
490
+ mmcif_object: mmcif_parsing.MmcifObject representing the template.
491
+ pdb_id: PDB code for the template.
492
+ mapping: Dictionary mapping indices in the query sequence to indices in
493
+ the template sequence.
494
+ template_sequence: String describing the amino acid sequence for the
495
+ template protein.
496
+ query_sequence: String describing the amino acid sequence for the query
497
+ protein.
498
+ template_chain_id: String ID describing which chain in the structure proto
499
+ should be used.
500
+ kalign_binary_path: The path to a kalign executable used for template
501
+ realignment.
502
+
503
+ Returns:
504
+ A tuple with:
505
+ * A dictionary containing the extra features derived from the template
506
+ protein structure.
507
+ * A warning message if the hit was realigned to the actual mmCIF sequence.
508
+ Otherwise None.
509
+
510
+ Raises:
511
+ NoChainsError: If the mmcif object doesn't contain any chains.
512
+ SequenceNotInTemplateError: If the given chain id / sequence can't
513
+ be found in the mmcif object.
514
+ QueryToTemplateAlignError: If the actual template in the mmCIF file
515
+ can't be aligned to the query.
516
+ NoAtomDataInTemplateError: If the mmcif object doesn't contain
517
+ atom positions.
518
+ TemplateAtomMaskAllZerosError: If the mmcif object doesn't have any
519
+ unmasked residues.
520
+ """
521
+ if mmcif_object is None or not mmcif_object.chain_to_seqres:
522
+ raise NoChainsError('No chains in PDB: %s_%s' % (pdb_id, template_chain_id))
523
+
524
+ warning = None
525
+ try:
526
+ seqres, chain_id, mapping_offset = _find_template_in_pdb(
527
+ template_chain_id=template_chain_id,
528
+ template_sequence=template_sequence,
529
+ mmcif_object=mmcif_object)
530
+ except SequenceNotInTemplateError:
531
+ # If PDB70 contains a different version of the template, we use the sequence
532
+ # from the mmcif_object.
533
+ chain_id = template_chain_id
534
+ warning = (
535
+ f'The exact sequence {template_sequence} was not found in '
536
+ f'{pdb_id}_{chain_id}. Realigning the template to the actual sequence.')
537
+ logging.warning(warning)
538
+ # This throws an exception if it fails to realign the hit.
539
+ seqres, mapping = _realign_pdb_template_to_query(
540
+ old_template_sequence=template_sequence,
541
+ template_chain_id=template_chain_id,
542
+ mmcif_object=mmcif_object,
543
+ old_mapping=mapping,
544
+ kalign_binary_path=kalign_binary_path)
545
+ logging.info('Sequence in %s_%s: %s successfully realigned to %s',
546
+ pdb_id, chain_id, template_sequence, seqres)
547
+ # The template sequence changed.
548
+ template_sequence = seqres
549
+ # No mapping offset, the query is aligned to the actual sequence.
550
+ mapping_offset = 0
551
+
552
+ try:
553
+ # Essentially set to infinity - we don't want to reject templates unless
554
+ # they're really really bad.
555
+ all_atom_positions, all_atom_mask = _get_atom_positions(
556
+ mmcif_object, chain_id, max_ca_ca_distance=150.0)
557
+ except (CaDistanceError, KeyError) as ex:
558
+ raise NoAtomDataInTemplateError(
559
+ 'Could not get atom data (%s_%s): %s' % (pdb_id, chain_id, str(ex))
560
+ ) from ex
561
+
562
+ all_atom_positions = np.split(all_atom_positions, all_atom_positions.shape[0])
563
+ all_atom_masks = np.split(all_atom_mask, all_atom_mask.shape[0])
564
+
565
+ output_templates_sequence = []
566
+ templates_all_atom_positions = []
567
+ templates_all_atom_masks = []
568
+
569
+ for _ in query_sequence:
570
+ # Residues in the query_sequence that are not in the template_sequence:
571
+ templates_all_atom_positions.append(
572
+ np.zeros((residue_constants.atom_type_num, 3)))
573
+ templates_all_atom_masks.append(np.zeros(residue_constants.atom_type_num))
574
+ output_templates_sequence.append('-')
575
+
576
+ for k, v in mapping.items():
577
+ template_index = v + mapping_offset
578
+ templates_all_atom_positions[k] = all_atom_positions[template_index][0]
579
+ templates_all_atom_masks[k] = all_atom_masks[template_index][0]
580
+ output_templates_sequence[k] = template_sequence[v]
581
+
582
+ # Alanine (AA with the lowest number of atoms) has 5 atoms (C, CA, CB, N, O).
583
+ if np.sum(templates_all_atom_masks) < 5:
584
+ raise TemplateAtomMaskAllZerosError(
585
+ 'Template all atom mask was all zeros: %s_%s. Residue range: %d-%d' %
586
+ (pdb_id, chain_id, min(mapping.values()) + mapping_offset,
587
+ max(mapping.values()) + mapping_offset))
588
+
589
+ output_templates_sequence = ''.join(output_templates_sequence)
590
+
591
+ templates_aatype = residue_constants.sequence_to_onehot(
592
+ output_templates_sequence, residue_constants.HHBLITS_AA_TO_ID)
593
+
594
+ return (
595
+ {
596
+ 'template_all_atom_positions': np.array(templates_all_atom_positions),
597
+ 'template_all_atom_masks': np.array(templates_all_atom_masks),
598
+ 'template_sequence': output_templates_sequence.encode(),
599
+ 'template_aatype': np.array(templates_aatype),
600
+ 'template_domain_names': f'{pdb_id.lower()}_{chain_id}'.encode(),
601
+ },
602
+ warning)
603
+
604
+
605
+ def _build_query_to_hit_index_mapping(
606
+ hit_query_sequence: str,
607
+ hit_sequence: str,
608
+ indices_hit: Sequence[int],
609
+ indices_query: Sequence[int],
610
+ original_query_sequence: str) -> Mapping[int, int]:
611
+ """Gets mapping from indices in original query sequence to indices in the hit.
612
+
613
+ hit_query_sequence and hit_sequence are two aligned sequences containing gap
614
+ characters. hit_query_sequence contains only the part of the original query
615
+ sequence that matched the hit. When interpreting the indices from the .hhr, we
616
+ need to correct for this to recover a mapping from original query sequence to
617
+ the hit sequence.
618
+
619
+ Args:
620
+ hit_query_sequence: The portion of the query sequence that is in the .hhr
621
+ hit
622
+ hit_sequence: The portion of the hit sequence that is in the .hhr
623
+ indices_hit: The indices for each aminoacid relative to the hit sequence
624
+ indices_query: The indices for each aminoacid relative to the original query
625
+ sequence
626
+ original_query_sequence: String describing the original query sequence.
627
+
628
+ Returns:
629
+ Dictionary with indices in the original query sequence as keys and indices
630
+ in the hit sequence as values.
631
+ """
632
+ # If the hit is empty (no aligned residues), return empty mapping
633
+ if not hit_query_sequence:
634
+ return {}
635
+
636
+ # Remove gaps and find the offset of hit.query relative to original query.
637
+ hhsearch_query_sequence = hit_query_sequence.replace('-', '')
638
+ hit_sequence = hit_sequence.replace('-', '')
639
+ hhsearch_query_offset = original_query_sequence.find(hhsearch_query_sequence)
640
+
641
+ # Index of -1 used for gap characters. Subtract the min index ignoring gaps.
642
+ min_idx = min(x for x in indices_hit if x > -1)
643
+ fixed_indices_hit = [
644
+ x - min_idx if x > -1 else -1 for x in indices_hit
645
+ ]
646
+
647
+ min_idx = min(x for x in indices_query if x > -1)
648
+ fixed_indices_query = [x - min_idx if x > -1 else -1 for x in indices_query]
649
+
650
+ # Zip the corrected indices, ignore case where both seqs have gap characters.
651
+ mapping = {}
652
+ for q_i, q_t in zip(fixed_indices_query, fixed_indices_hit):
653
+ if q_t != -1 and q_i != -1:
654
+ if (q_t >= len(hit_sequence) or
655
+ q_i + hhsearch_query_offset >= len(original_query_sequence)):
656
+ continue
657
+ mapping[q_i + hhsearch_query_offset] = q_t
658
+
659
+ return mapping
660
+
661
+
662
+ @dataclasses.dataclass(frozen=True)
663
+ class SingleHitResult:
664
+ features: Optional[Mapping[str, Any]]
665
+ error: Optional[str]
666
+ warning: Optional[str]
667
+
668
+
669
+ def _process_single_hit(
670
+ query_sequence: str,
671
+ query_pdb_code: Optional[str],
672
+ hit: parsers.TemplateHit,
673
+ mmcif_dir: str,
674
+ max_template_date: datetime.datetime,
675
+ release_dates: Mapping[str, datetime.datetime],
676
+ obsolete_pdbs: Mapping[str, str],
677
+ kalign_binary_path: str,
678
+ strict_error_check: bool = False) -> SingleHitResult:
679
+ """Tries to extract template features from a single HHSearch hit."""
680
+ # Fail hard if we can't get the PDB ID and chain name from the hit.
681
+ hit_pdb_code, hit_chain_id = _get_pdb_id_and_chain(hit)
682
+
683
+ if hit_pdb_code not in release_dates:
684
+ if hit_pdb_code in obsolete_pdbs:
685
+ hit_pdb_code = obsolete_pdbs[hit_pdb_code]
686
+
687
+ # Pass hit_pdb_code since it might have changed due to the pdb being obsolete.
688
+ try:
689
+ _assess_hhsearch_hit(
690
+ hit=hit,
691
+ hit_pdb_code=hit_pdb_code,
692
+ query_sequence=query_sequence,
693
+ query_pdb_code=query_pdb_code,
694
+ release_dates=release_dates,
695
+ release_date_cutoff=max_template_date)
696
+ except PrefilterError as e:
697
+ msg = f'hit {hit_pdb_code}_{hit_chain_id} did not pass prefilter: {str(e)}'
698
+ logging.info('%s: %s', query_pdb_code, msg)
699
+ if strict_error_check and isinstance(
700
+ e, (DateError, PdbIdError, DuplicateError)):
701
+ # In strict mode we treat some prefilter cases as errors.
702
+ return SingleHitResult(features=None, error=msg, warning=None)
703
+
704
+ return SingleHitResult(features=None, error=None, warning=None)
705
+
706
+ mapping = _build_query_to_hit_index_mapping(
707
+ hit.query, hit.hit_sequence, hit.indices_hit, hit.indices_query,
708
+ query_sequence)
709
+
710
+ # The mapping is from the query to the actual hit sequence, so we need to
711
+ # remove gaps (which regardless have a missing confidence score).
712
+ template_sequence = hit.hit_sequence.replace('-', '')
713
+
714
+ cif_path = os.path.join(mmcif_dir, hit_pdb_code + '.cif')
715
+ logging.info('Reading PDB entry from %s. Query: %s, template: %s',
716
+ cif_path, query_sequence, template_sequence)
717
+ # Fail if we can't find the mmCIF file.
718
+ with open(cif_path, 'r') as cif_file:
719
+ cif_string = cif_file.read()
720
+
721
+ parsing_result = mmcif_parsing.parse(
722
+ file_id=hit_pdb_code, mmcif_string=cif_string)
723
+
724
+ if parsing_result.mmcif_object is not None:
725
+ hit_release_date = datetime.datetime.strptime(
726
+ parsing_result.mmcif_object.header['release_date'], '%Y-%m-%d')
727
+ if hit_release_date > max_template_date:
728
+ error = ('Template %s date (%s) > max template date (%s).' %
729
+ (hit_pdb_code, hit_release_date, max_template_date))
730
+ if strict_error_check:
731
+ return SingleHitResult(features=None, error=error, warning=None)
732
+ else:
733
+ logging.warning(error)
734
+ return SingleHitResult(features=None, error=None, warning=None)
735
+
736
+ try:
737
+ features, realign_warning = _extract_template_features(
738
+ mmcif_object=parsing_result.mmcif_object,
739
+ pdb_id=hit_pdb_code,
740
+ mapping=mapping,
741
+ template_sequence=template_sequence,
742
+ query_sequence=query_sequence,
743
+ template_chain_id=hit_chain_id,
744
+ kalign_binary_path=kalign_binary_path)
745
+ features['template_sum_probs'] = [hit.sum_probs]
746
+
747
+ # It is possible there were some errors when parsing the other chains in the
748
+ # mmCIF file, but the template features for the chain we want were still
749
+ # computed. In such case the mmCIF parsing errors are not relevant.
750
+ return SingleHitResult(
751
+ features=features, error=None, warning=realign_warning)
752
+ except (NoChainsError, NoAtomDataInTemplateError,
753
+ TemplateAtomMaskAllZerosError) as e:
754
+ # These 3 errors indicate missing mmCIF experimental data rather than a
755
+ # problem with the template search, so turn them into warnings.
756
+ warning = ('%s_%s (sum_probs: %.2f, rank: %d): feature extracting errors: '
757
+ '%s, mmCIF parsing errors: %s'
758
+ % (hit_pdb_code, hit_chain_id, hit.sum_probs, hit.index,
759
+ str(e), parsing_result.errors))
760
+ if strict_error_check:
761
+ return SingleHitResult(features=None, error=warning, warning=None)
762
+ else:
763
+ return SingleHitResult(features=None, error=None, warning=warning)
764
+ except Error as e:
765
+ error = ('%s_%s (sum_probs: %.2f, rank: %d): feature extracting errors: '
766
+ '%s, mmCIF parsing errors: %s'
767
+ % (hit_pdb_code, hit_chain_id, hit.sum_probs, hit.index,
768
+ str(e), parsing_result.errors))
769
+ return SingleHitResult(features=None, error=error, warning=None)
770
+
771
+
772
+ @dataclasses.dataclass(frozen=True)
773
+ class TemplateSearchResult:
774
+ features: Mapping[str, Any]
775
+ errors: Sequence[str]
776
+ warnings: Sequence[str]
777
+
778
+
779
+ class TemplateHitFeaturizer:
780
+ """A class for turning hhr hits to template features."""
781
+
782
+ def __init__(
783
+ self,
784
+ mmcif_dir: str,
785
+ max_template_date: str,
786
+ max_hits: int,
787
+ kalign_binary_path: str,
788
+ release_dates_path: Optional[str],
789
+ obsolete_pdbs_path: Optional[str],
790
+ strict_error_check: bool = False):
791
+ """Initializes the Template Search.
792
+
793
+ Args:
794
+ mmcif_dir: Path to a directory with mmCIF structures. Once a template ID
795
+ is found by HHSearch, this directory is used to retrieve the template
796
+ data.
797
+ max_template_date: The maximum date permitted for template structures. No
798
+ template with date higher than this date will be returned. In ISO8601
799
+ date format, YYYY-MM-DD.
800
+ max_hits: The maximum number of templates that will be returned.
801
+ kalign_binary_path: The path to a kalign executable used for template
802
+ realignment.
803
+ release_dates_path: An optional path to a file with a mapping from PDB IDs
804
+ to their release dates. Thanks to this we don't have to redundantly
805
+ parse mmCIF files to get that information.
806
+ obsolete_pdbs_path: An optional path to a file containing a mapping from
807
+ obsolete PDB IDs to the PDB IDs of their replacements.
808
+ strict_error_check: If True, then the following will be treated as errors:
809
+ * If any template date is after the max_template_date.
810
+ * If any template has identical PDB ID to the query.
811
+ * If any template is a duplicate of the query.
812
+ * Any feature computation errors.
813
+ """
814
+ self._mmcif_dir = mmcif_dir
815
+ if not glob.glob(os.path.join(self._mmcif_dir, '*.cif')):
816
+ logging.error('Could not find CIFs in %s', self._mmcif_dir)
817
+ raise ValueError(f'Could not find CIFs in {self._mmcif_dir}')
818
+
819
+ try:
820
+ self._max_template_date = datetime.datetime.strptime(
821
+ max_template_date, '%Y-%m-%d')
822
+ except ValueError:
823
+ raise ValueError(
824
+ 'max_template_date must be set and have format YYYY-MM-DD.')
825
+ self._max_hits = max_hits
826
+ self._kalign_binary_path = kalign_binary_path
827
+ self._strict_error_check = strict_error_check
828
+
829
+ if release_dates_path:
830
+ logging.info('Using precomputed release dates %s.', release_dates_path)
831
+ self._release_dates = _parse_release_dates(release_dates_path)
832
+ else:
833
+ self._release_dates = {}
834
+
835
+ if obsolete_pdbs_path:
836
+ logging.info('Using precomputed obsolete pdbs %s.', obsolete_pdbs_path)
837
+ self._obsolete_pdbs = _parse_obsolete(obsolete_pdbs_path)
838
+ else:
839
+ self._obsolete_pdbs = {}
840
+
841
+ def get_templates(
842
+ self,
843
+ query_sequence: str,
844
+ query_pdb_code: Optional[str],
845
+ query_release_date: Optional[datetime.datetime],
846
+ hits: Sequence[parsers.TemplateHit]) -> TemplateSearchResult:
847
+ """Computes the templates for given query sequence (more details above)."""
848
+ logging.info('Searching for template for: %s', query_pdb_code)
849
+
850
+ template_features = {}
851
+ for template_feature_name in TEMPLATE_FEATURES:
852
+ template_features[template_feature_name] = []
853
+
854
+ # Always use a max_template_date. Set to query_release_date minus 60 days
855
+ # if that's earlier.
856
+ template_cutoff_date = self._max_template_date
857
+ if query_release_date:
858
+ delta = datetime.timedelta(days=60)
859
+ if query_release_date - delta < template_cutoff_date:
860
+ template_cutoff_date = query_release_date - delta
861
+ assert template_cutoff_date < query_release_date
862
+ assert template_cutoff_date <= self._max_template_date
863
+
864
+ num_hits = 0
865
+ errors = []
866
+ warnings = []
867
+
868
+ for hit in sorted(hits, key=lambda x: x.sum_probs, reverse=True):
869
+ # We got all the templates we wanted, stop processing hits.
870
+ if num_hits >= self._max_hits:
871
+ break
872
+
873
+ result = _process_single_hit(
874
+ query_sequence=query_sequence,
875
+ query_pdb_code=query_pdb_code,
876
+ hit=hit,
877
+ mmcif_dir=self._mmcif_dir,
878
+ max_template_date=template_cutoff_date,
879
+ release_dates=self._release_dates,
880
+ obsolete_pdbs=self._obsolete_pdbs,
881
+ strict_error_check=self._strict_error_check,
882
+ kalign_binary_path=self._kalign_binary_path)
883
+
884
+ if result.error:
885
+ errors.append(result.error)
886
+
887
+ # There could be an error even if there are some results, e.g. thrown by
888
+ # other unparsable chains in the same mmCIF file.
889
+ if result.warning:
890
+ warnings.append(result.warning)
891
+
892
+ if result.features is None:
893
+ logging.info('Skipped invalid hit %s, error: %s, warning: %s',
894
+ hit.name, result.error, result.warning)
895
+ else:
896
+ # Increment the hit counter, since we got features out of this hit.
897
+ num_hits += 1
898
+ for k in template_features:
899
+ template_features[k].append(result.features[k])
900
+
901
+ for name in template_features:
902
+ if num_hits > 0:
903
+ template_features[name] = np.stack(
904
+ template_features[name], axis=0).astype(TEMPLATE_FEATURES[name])
905
+ else:
906
+ # Make sure the feature has correct dtype even if empty.
907
+ template_features[name] = np.array([], dtype=TEMPLATE_FEATURES[name])
908
+
909
+ return TemplateSearchResult(
910
+ features=template_features, errors=errors, warnings=warnings)
af_backprop/alphafold/data/tools/__init__.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2021 DeepMind Technologies Limited
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ """Python wrappers for third party tools."""
af_backprop/alphafold/data/tools/hhblits.py ADDED
@@ -0,0 +1,155 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2021 DeepMind Technologies Limited
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ """Library to run HHblits from Python."""
16
+
17
+ import glob
18
+ import os
19
+ import subprocess
20
+ from typing import Any, Mapping, Optional, Sequence
21
+
22
+ from absl import logging
23
+ from alphafold.data.tools import utils
24
+ # Internal import (7716).
25
+
26
+
27
+ _HHBLITS_DEFAULT_P = 20
28
+ _HHBLITS_DEFAULT_Z = 500
29
+
30
+
31
+ class HHBlits:
32
+ """Python wrapper of the HHblits binary."""
33
+
34
+ def __init__(self,
35
+ *,
36
+ binary_path: str,
37
+ databases: Sequence[str],
38
+ n_cpu: int = 4,
39
+ n_iter: int = 3,
40
+ e_value: float = 0.001,
41
+ maxseq: int = 1_000_000,
42
+ realign_max: int = 100_000,
43
+ maxfilt: int = 100_000,
44
+ min_prefilter_hits: int = 1000,
45
+ all_seqs: bool = False,
46
+ alt: Optional[int] = None,
47
+ p: int = _HHBLITS_DEFAULT_P,
48
+ z: int = _HHBLITS_DEFAULT_Z):
49
+ """Initializes the Python HHblits wrapper.
50
+
51
+ Args:
52
+ binary_path: The path to the HHblits executable.
53
+ databases: A sequence of HHblits database paths. This should be the
54
+ common prefix for the database files (i.e. up to but not including
55
+ _hhm.ffindex etc.)
56
+ n_cpu: The number of CPUs to give HHblits.
57
+ n_iter: The number of HHblits iterations.
58
+ e_value: The E-value, see HHblits docs for more details.
59
+ maxseq: The maximum number of rows in an input alignment. Note that this
60
+ parameter is only supported in HHBlits version 3.1 and higher.
61
+ realign_max: Max number of HMM-HMM hits to realign. HHblits default: 500.
62
+ maxfilt: Max number of hits allowed to pass the 2nd prefilter.
63
+ HHblits default: 20000.
64
+ min_prefilter_hits: Min number of hits to pass prefilter.
65
+ HHblits default: 100.
66
+ all_seqs: Return all sequences in the MSA / Do not filter the result MSA.
67
+ HHblits default: False.
68
+ alt: Show up to this many alternative alignments.
69
+ p: Minimum Prob for a hit to be included in the output hhr file.
70
+ HHblits default: 20.
71
+ z: Hard cap on number of hits reported in the hhr file.
72
+ HHblits default: 500. NB: The relevant HHblits flag is -Z not -z.
73
+
74
+ Raises:
75
+ RuntimeError: If HHblits binary not found within the path.
76
+ """
77
+ self.binary_path = binary_path
78
+ self.databases = databases
79
+
80
+ for database_path in self.databases:
81
+ if not glob.glob(database_path + '_*'):
82
+ logging.error('Could not find HHBlits database %s', database_path)
83
+ raise ValueError(f'Could not find HHBlits database {database_path}')
84
+
85
+ self.n_cpu = n_cpu
86
+ self.n_iter = n_iter
87
+ self.e_value = e_value
88
+ self.maxseq = maxseq
89
+ self.realign_max = realign_max
90
+ self.maxfilt = maxfilt
91
+ self.min_prefilter_hits = min_prefilter_hits
92
+ self.all_seqs = all_seqs
93
+ self.alt = alt
94
+ self.p = p
95
+ self.z = z
96
+
97
+ def query(self, input_fasta_path: str) -> Mapping[str, Any]:
98
+ """Queries the database using HHblits."""
99
+ with utils.tmpdir_manager(base_dir='/tmp') as query_tmp_dir:
100
+ a3m_path = os.path.join(query_tmp_dir, 'output.a3m')
101
+
102
+ db_cmd = []
103
+ for db_path in self.databases:
104
+ db_cmd.append('-d')
105
+ db_cmd.append(db_path)
106
+ cmd = [
107
+ self.binary_path,
108
+ '-i', input_fasta_path,
109
+ '-cpu', str(self.n_cpu),
110
+ '-oa3m', a3m_path,
111
+ '-o', '/dev/null',
112
+ '-n', str(self.n_iter),
113
+ '-e', str(self.e_value),
114
+ '-maxseq', str(self.maxseq),
115
+ '-realign_max', str(self.realign_max),
116
+ '-maxfilt', str(self.maxfilt),
117
+ '-min_prefilter_hits', str(self.min_prefilter_hits)]
118
+ if self.all_seqs:
119
+ cmd += ['-all']
120
+ if self.alt:
121
+ cmd += ['-alt', str(self.alt)]
122
+ if self.p != _HHBLITS_DEFAULT_P:
123
+ cmd += ['-p', str(self.p)]
124
+ if self.z != _HHBLITS_DEFAULT_Z:
125
+ cmd += ['-Z', str(self.z)]
126
+ cmd += db_cmd
127
+
128
+ logging.info('Launching subprocess "%s"', ' '.join(cmd))
129
+ process = subprocess.Popen(
130
+ cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
131
+
132
+ with utils.timing('HHblits query'):
133
+ stdout, stderr = process.communicate()
134
+ retcode = process.wait()
135
+
136
+ if retcode:
137
+ # Logs have a 15k character limit, so log HHblits error line by line.
138
+ logging.error('HHblits failed. HHblits stderr begin:')
139
+ for error_line in stderr.decode('utf-8').splitlines():
140
+ if error_line.strip():
141
+ logging.error(error_line.strip())
142
+ logging.error('HHblits stderr end')
143
+ raise RuntimeError('HHblits failed\nstdout:\n%s\n\nstderr:\n%s\n' % (
144
+ stdout.decode('utf-8'), stderr[:500_000].decode('utf-8')))
145
+
146
+ with open(a3m_path) as f:
147
+ a3m = f.read()
148
+
149
+ raw_output = dict(
150
+ a3m=a3m,
151
+ output=stdout,
152
+ stderr=stderr,
153
+ n_iter=self.n_iter,
154
+ e_value=self.e_value)
155
+ return raw_output
af_backprop/alphafold/data/tools/hhsearch.py ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2021 DeepMind Technologies Limited
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ """Library to run HHsearch from Python."""
16
+
17
+ import glob
18
+ import os
19
+ import subprocess
20
+ from typing import Sequence
21
+
22
+ from absl import logging
23
+
24
+ from alphafold.data.tools import utils
25
+ # Internal import (7716).
26
+
27
+
28
+ class HHSearch:
29
+ """Python wrapper of the HHsearch binary."""
30
+
31
+ def __init__(self,
32
+ *,
33
+ binary_path: str,
34
+ databases: Sequence[str],
35
+ maxseq: int = 1_000_000):
36
+ """Initializes the Python HHsearch wrapper.
37
+
38
+ Args:
39
+ binary_path: The path to the HHsearch executable.
40
+ databases: A sequence of HHsearch database paths. This should be the
41
+ common prefix for the database files (i.e. up to but not including
42
+ _hhm.ffindex etc.)
43
+ maxseq: The maximum number of rows in an input alignment. Note that this
44
+ parameter is only supported in HHBlits version 3.1 and higher.
45
+
46
+ Raises:
47
+ RuntimeError: If HHsearch binary not found within the path.
48
+ """
49
+ self.binary_path = binary_path
50
+ self.databases = databases
51
+ self.maxseq = maxseq
52
+
53
+ for database_path in self.databases:
54
+ if not glob.glob(database_path + '_*'):
55
+ logging.error('Could not find HHsearch database %s', database_path)
56
+ raise ValueError(f'Could not find HHsearch database {database_path}')
57
+
58
+ def query(self, a3m: str) -> str:
59
+ """Queries the database using HHsearch using a given a3m."""
60
+ with utils.tmpdir_manager(base_dir='/tmp') as query_tmp_dir:
61
+ input_path = os.path.join(query_tmp_dir, 'query.a3m')
62
+ hhr_path = os.path.join(query_tmp_dir, 'output.hhr')
63
+ with open(input_path, 'w') as f:
64
+ f.write(a3m)
65
+
66
+ db_cmd = []
67
+ for db_path in self.databases:
68
+ db_cmd.append('-d')
69
+ db_cmd.append(db_path)
70
+ cmd = [self.binary_path,
71
+ '-i', input_path,
72
+ '-o', hhr_path,
73
+ '-maxseq', str(self.maxseq)
74
+ ] + db_cmd
75
+
76
+ logging.info('Launching subprocess "%s"', ' '.join(cmd))
77
+ process = subprocess.Popen(
78
+ cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
79
+ with utils.timing('HHsearch query'):
80
+ stdout, stderr = process.communicate()
81
+ retcode = process.wait()
82
+
83
+ if retcode:
84
+ # Stderr is truncated to prevent proto size errors in Beam.
85
+ raise RuntimeError(
86
+ 'HHSearch failed:\nstdout:\n%s\n\nstderr:\n%s\n' % (
87
+ stdout.decode('utf-8'), stderr[:100_000].decode('utf-8')))
88
+
89
+ with open(hhr_path) as f:
90
+ hhr = f.read()
91
+ return hhr
af_backprop/alphafold/data/tools/hmmbuild.py ADDED
@@ -0,0 +1,138 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2021 DeepMind Technologies Limited
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ """A Python wrapper for hmmbuild - construct HMM profiles from MSA."""
16
+
17
+ import os
18
+ import re
19
+ import subprocess
20
+
21
+ from absl import logging
22
+ from alphafold.data.tools import utils
23
+ # Internal import (7716).
24
+
25
+
26
+ class Hmmbuild(object):
27
+ """Python wrapper of the hmmbuild binary."""
28
+
29
+ def __init__(self,
30
+ *,
31
+ binary_path: str,
32
+ singlemx: bool = False):
33
+ """Initializes the Python hmmbuild wrapper.
34
+
35
+ Args:
36
+ binary_path: The path to the hmmbuild executable.
37
+ singlemx: Whether to use --singlemx flag. If True, it forces HMMBuild to
38
+ just use a common substitution score matrix.
39
+
40
+ Raises:
41
+ RuntimeError: If hmmbuild binary not found within the path.
42
+ """
43
+ self.binary_path = binary_path
44
+ self.singlemx = singlemx
45
+
46
+ def build_profile_from_sto(self, sto: str, model_construction='fast') -> str:
47
+ """Builds a HHM for the aligned sequences given as an A3M string.
48
+
49
+ Args:
50
+ sto: A string with the aligned sequences in the Stockholm format.
51
+ model_construction: Whether to use reference annotation in the msa to
52
+ determine consensus columns ('hand') or default ('fast').
53
+
54
+ Returns:
55
+ A string with the profile in the HMM format.
56
+
57
+ Raises:
58
+ RuntimeError: If hmmbuild fails.
59
+ """
60
+ return self._build_profile(sto, model_construction=model_construction)
61
+
62
+ def build_profile_from_a3m(self, a3m: str) -> str:
63
+ """Builds a HHM for the aligned sequences given as an A3M string.
64
+
65
+ Args:
66
+ a3m: A string with the aligned sequences in the A3M format.
67
+
68
+ Returns:
69
+ A string with the profile in the HMM format.
70
+
71
+ Raises:
72
+ RuntimeError: If hmmbuild fails.
73
+ """
74
+ lines = []
75
+ for line in a3m.splitlines():
76
+ if not line.startswith('>'):
77
+ line = re.sub('[a-z]+', '', line) # Remove inserted residues.
78
+ lines.append(line + '\n')
79
+ msa = ''.join(lines)
80
+ return self._build_profile(msa, model_construction='fast')
81
+
82
+ def _build_profile(self, msa: str, model_construction: str = 'fast') -> str:
83
+ """Builds a HMM for the aligned sequences given as an MSA string.
84
+
85
+ Args:
86
+ msa: A string with the aligned sequences, in A3M or STO format.
87
+ model_construction: Whether to use reference annotation in the msa to
88
+ determine consensus columns ('hand') or default ('fast').
89
+
90
+ Returns:
91
+ A string with the profile in the HMM format.
92
+
93
+ Raises:
94
+ RuntimeError: If hmmbuild fails.
95
+ ValueError: If unspecified arguments are provided.
96
+ """
97
+ if model_construction not in {'hand', 'fast'}:
98
+ raise ValueError(f'Invalid model_construction {model_construction} - only'
99
+ 'hand and fast supported.')
100
+
101
+ with utils.tmpdir_manager(base_dir='/tmp') as query_tmp_dir:
102
+ input_query = os.path.join(query_tmp_dir, 'query.msa')
103
+ output_hmm_path = os.path.join(query_tmp_dir, 'output.hmm')
104
+
105
+ with open(input_query, 'w') as f:
106
+ f.write(msa)
107
+
108
+ cmd = [self.binary_path]
109
+ # If adding flags, we have to do so before the output and input:
110
+
111
+ if model_construction == 'hand':
112
+ cmd.append(f'--{model_construction}')
113
+ if self.singlemx:
114
+ cmd.append('--singlemx')
115
+ cmd.extend([
116
+ '--amino',
117
+ output_hmm_path,
118
+ input_query,
119
+ ])
120
+
121
+ logging.info('Launching subprocess %s', cmd)
122
+ process = subprocess.Popen(cmd, stdout=subprocess.PIPE,
123
+ stderr=subprocess.PIPE)
124
+
125
+ with utils.timing('hmmbuild query'):
126
+ stdout, stderr = process.communicate()
127
+ retcode = process.wait()
128
+ logging.info('hmmbuild stdout:\n%s\n\nstderr:\n%s\n',
129
+ stdout.decode('utf-8'), stderr.decode('utf-8'))
130
+
131
+ if retcode:
132
+ raise RuntimeError('hmmbuild failed\nstdout:\n%s\n\nstderr:\n%s\n'
133
+ % (stdout.decode('utf-8'), stderr.decode('utf-8')))
134
+
135
+ with open(output_hmm_path, encoding='utf-8') as f:
136
+ hmm = f.read()
137
+
138
+ return hmm
af_backprop/alphafold/data/tools/hmmsearch.py ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2021 DeepMind Technologies Limited
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ """A Python wrapper for hmmsearch - search profile against a sequence db."""
16
+
17
+ import os
18
+ import subprocess
19
+ from typing import Optional, Sequence
20
+
21
+ from absl import logging
22
+ from alphafold.data.tools import utils
23
+ # Internal import (7716).
24
+
25
+
26
+ class Hmmsearch(object):
27
+ """Python wrapper of the hmmsearch binary."""
28
+
29
+ def __init__(self,
30
+ *,
31
+ binary_path: str,
32
+ database_path: str,
33
+ flags: Optional[Sequence[str]] = None):
34
+ """Initializes the Python hmmsearch wrapper.
35
+
36
+ Args:
37
+ binary_path: The path to the hmmsearch executable.
38
+ database_path: The path to the hmmsearch database (FASTA format).
39
+ flags: List of flags to be used by hmmsearch.
40
+
41
+ Raises:
42
+ RuntimeError: If hmmsearch binary not found within the path.
43
+ """
44
+ self.binary_path = binary_path
45
+ self.database_path = database_path
46
+ self.flags = flags
47
+
48
+ if not os.path.exists(self.database_path):
49
+ logging.error('Could not find hmmsearch database %s', database_path)
50
+ raise ValueError(f'Could not find hmmsearch database {database_path}')
51
+
52
+ def query(self, hmm: str) -> str:
53
+ """Queries the database using hmmsearch using a given hmm."""
54
+ with utils.tmpdir_manager(base_dir='/tmp') as query_tmp_dir:
55
+ hmm_input_path = os.path.join(query_tmp_dir, 'query.hmm')
56
+ a3m_out_path = os.path.join(query_tmp_dir, 'output.a3m')
57
+ with open(hmm_input_path, 'w') as f:
58
+ f.write(hmm)
59
+
60
+ cmd = [
61
+ self.binary_path,
62
+ '--noali', # Don't include the alignment in stdout.
63
+ '--cpu', '8'
64
+ ]
65
+ # If adding flags, we have to do so before the output and input:
66
+ if self.flags:
67
+ cmd.extend(self.flags)
68
+ cmd.extend([
69
+ '-A', a3m_out_path,
70
+ hmm_input_path,
71
+ self.database_path,
72
+ ])
73
+
74
+ logging.info('Launching sub-process %s', cmd)
75
+ process = subprocess.Popen(
76
+ cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
77
+ with utils.timing(
78
+ f'hmmsearch ({os.path.basename(self.database_path)}) query'):
79
+ stdout, stderr = process.communicate()
80
+ retcode = process.wait()
81
+
82
+ if retcode:
83
+ raise RuntimeError(
84
+ 'hmmsearch failed:\nstdout:\n%s\n\nstderr:\n%s\n' % (
85
+ stdout.decode('utf-8'), stderr.decode('utf-8')))
86
+
87
+ with open(a3m_out_path) as f:
88
+ a3m_out = f.read()
89
+
90
+ return a3m_out
af_backprop/alphafold/data/tools/jackhmmer.py ADDED
@@ -0,0 +1,198 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2021 DeepMind Technologies Limited
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ """Library to run Jackhmmer from Python."""
16
+
17
+ from concurrent import futures
18
+ import glob
19
+ import os
20
+ import subprocess
21
+ from typing import Any, Callable, Mapping, Optional, Sequence
22
+ from urllib import request
23
+
24
+ from absl import logging
25
+
26
+ from alphafold.data.tools import utils
27
+ # Internal import (7716).
28
+
29
+
30
+ class Jackhmmer:
31
+ """Python wrapper of the Jackhmmer binary."""
32
+
33
+ def __init__(self,
34
+ *,
35
+ binary_path: str,
36
+ database_path: str,
37
+ n_cpu: int = 8,
38
+ n_iter: int = 1,
39
+ e_value: float = 0.0001,
40
+ z_value: Optional[int] = None,
41
+ get_tblout: bool = False,
42
+ filter_f1: float = 0.0005,
43
+ filter_f2: float = 0.00005,
44
+ filter_f3: float = 0.0000005,
45
+ incdom_e: Optional[float] = None,
46
+ dom_e: Optional[float] = None,
47
+ num_streamed_chunks: Optional[int] = None,
48
+ streaming_callback: Optional[Callable[[int], None]] = None):
49
+ """Initializes the Python Jackhmmer wrapper.
50
+
51
+ Args:
52
+ binary_path: The path to the jackhmmer executable.
53
+ database_path: The path to the jackhmmer database (FASTA format).
54
+ n_cpu: The number of CPUs to give Jackhmmer.
55
+ n_iter: The number of Jackhmmer iterations.
56
+ e_value: The E-value, see Jackhmmer docs for more details.
57
+ z_value: The Z-value, see Jackhmmer docs for more details.
58
+ get_tblout: Whether to save tblout string.
59
+ filter_f1: MSV and biased composition pre-filter, set to >1.0 to turn off.
60
+ filter_f2: Viterbi pre-filter, set to >1.0 to turn off.
61
+ filter_f3: Forward pre-filter, set to >1.0 to turn off.
62
+ incdom_e: Domain e-value criteria for inclusion of domains in MSA/next
63
+ round.
64
+ dom_e: Domain e-value criteria for inclusion in tblout.
65
+ num_streamed_chunks: Number of database chunks to stream over.
66
+ streaming_callback: Callback function run after each chunk iteration with
67
+ the iteration number as argument.
68
+ """
69
+ self.binary_path = binary_path
70
+ self.database_path = database_path
71
+ self.num_streamed_chunks = num_streamed_chunks
72
+
73
+ if not os.path.exists(self.database_path) and num_streamed_chunks is None:
74
+ logging.error('Could not find Jackhmmer database %s', database_path)
75
+ raise ValueError(f'Could not find Jackhmmer database {database_path}')
76
+
77
+ self.n_cpu = n_cpu
78
+ self.n_iter = n_iter
79
+ self.e_value = e_value
80
+ self.z_value = z_value
81
+ self.filter_f1 = filter_f1
82
+ self.filter_f2 = filter_f2
83
+ self.filter_f3 = filter_f3
84
+ self.incdom_e = incdom_e
85
+ self.dom_e = dom_e
86
+ self.get_tblout = get_tblout
87
+ self.streaming_callback = streaming_callback
88
+
89
+ def _query_chunk(self, input_fasta_path: str, database_path: str
90
+ ) -> Mapping[str, Any]:
91
+ """Queries the database chunk using Jackhmmer."""
92
+ with utils.tmpdir_manager(base_dir='/tmp') as query_tmp_dir:
93
+ sto_path = os.path.join(query_tmp_dir, 'output.sto')
94
+
95
+ # The F1/F2/F3 are the expected proportion to pass each of the filtering
96
+ # stages (which get progressively more expensive), reducing these
97
+ # speeds up the pipeline at the expensive of sensitivity. They are
98
+ # currently set very low to make querying Mgnify run in a reasonable
99
+ # amount of time.
100
+ cmd_flags = [
101
+ # Don't pollute stdout with Jackhmmer output.
102
+ '-o', '/dev/null',
103
+ '-A', sto_path,
104
+ '--noali',
105
+ '--F1', str(self.filter_f1),
106
+ '--F2', str(self.filter_f2),
107
+ '--F3', str(self.filter_f3),
108
+ '--incE', str(self.e_value),
109
+ # Report only sequences with E-values <= x in per-sequence output.
110
+ '-E', str(self.e_value),
111
+ '--cpu', str(self.n_cpu),
112
+ '-N', str(self.n_iter)
113
+ ]
114
+ if self.get_tblout:
115
+ tblout_path = os.path.join(query_tmp_dir, 'tblout.txt')
116
+ cmd_flags.extend(['--tblout', tblout_path])
117
+
118
+ if self.z_value:
119
+ cmd_flags.extend(['-Z', str(self.z_value)])
120
+
121
+ if self.dom_e is not None:
122
+ cmd_flags.extend(['--domE', str(self.dom_e)])
123
+
124
+ if self.incdom_e is not None:
125
+ cmd_flags.extend(['--incdomE', str(self.incdom_e)])
126
+
127
+ cmd = [self.binary_path] + cmd_flags + [input_fasta_path,
128
+ database_path]
129
+
130
+ logging.info('Launching subprocess "%s"', ' '.join(cmd))
131
+ process = subprocess.Popen(
132
+ cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
133
+ with utils.timing(
134
+ f'Jackhmmer ({os.path.basename(database_path)}) query'):
135
+ _, stderr = process.communicate()
136
+ retcode = process.wait()
137
+
138
+ if retcode:
139
+ raise RuntimeError(
140
+ 'Jackhmmer failed\nstderr:\n%s\n' % stderr.decode('utf-8'))
141
+
142
+ # Get e-values for each target name
143
+ tbl = ''
144
+ if self.get_tblout:
145
+ with open(tblout_path) as f:
146
+ tbl = f.read()
147
+
148
+ with open(sto_path) as f:
149
+ sto = f.read()
150
+
151
+ raw_output = dict(
152
+ sto=sto,
153
+ tbl=tbl,
154
+ stderr=stderr,
155
+ n_iter=self.n_iter,
156
+ e_value=self.e_value)
157
+
158
+ return raw_output
159
+
160
+ def query(self, input_fasta_path: str) -> Sequence[Mapping[str, Any]]:
161
+ """Queries the database using Jackhmmer."""
162
+ if self.num_streamed_chunks is None:
163
+ return [self._query_chunk(input_fasta_path, self.database_path)]
164
+
165
+ db_basename = os.path.basename(self.database_path)
166
+ db_remote_chunk = lambda db_idx: f'{self.database_path}.{db_idx}'
167
+ db_local_chunk = lambda db_idx: f'/tmp/ramdisk/{db_basename}.{db_idx}'
168
+
169
+ # Remove existing files to prevent OOM
170
+ for f in glob.glob(db_local_chunk('[0-9]*')):
171
+ try:
172
+ os.remove(f)
173
+ except OSError:
174
+ print(f'OSError while deleting {f}')
175
+
176
+ # Download the (i+1)-th chunk while Jackhmmer is running on the i-th chunk
177
+ with futures.ThreadPoolExecutor(max_workers=2) as executor:
178
+ chunked_output = []
179
+ for i in range(1, self.num_streamed_chunks + 1):
180
+ # Copy the chunk locally
181
+ if i == 1:
182
+ future = executor.submit(
183
+ request.urlretrieve, db_remote_chunk(i), db_local_chunk(i))
184
+ if i < self.num_streamed_chunks:
185
+ next_future = executor.submit(
186
+ request.urlretrieve, db_remote_chunk(i+1), db_local_chunk(i+1))
187
+
188
+ # Run Jackhmmer with the chunk
189
+ future.result()
190
+ chunked_output.append(
191
+ self._query_chunk(input_fasta_path, db_local_chunk(i)))
192
+
193
+ # Remove the local copy of the chunk
194
+ os.remove(db_local_chunk(i))
195
+ future = next_future
196
+ if self.streaming_callback:
197
+ self.streaming_callback(i)
198
+ return chunked_output
af_backprop/alphafold/data/tools/kalign.py ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2021 DeepMind Technologies Limited
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ """A Python wrapper for Kalign."""
16
+ import os
17
+ import subprocess
18
+ from typing import Sequence
19
+
20
+ from absl import logging
21
+
22
+ from alphafold.data.tools import utils
23
+ # Internal import (7716).
24
+
25
+
26
+ def _to_a3m(sequences: Sequence[str]) -> str:
27
+ """Converts sequences to an a3m file."""
28
+ names = ['sequence %d' % i for i in range(1, len(sequences) + 1)]
29
+ a3m = []
30
+ for sequence, name in zip(sequences, names):
31
+ a3m.append(u'>' + name + u'\n')
32
+ a3m.append(sequence + u'\n')
33
+ return ''.join(a3m)
34
+
35
+
36
+ class Kalign:
37
+ """Python wrapper of the Kalign binary."""
38
+
39
+ def __init__(self, *, binary_path: str):
40
+ """Initializes the Python Kalign wrapper.
41
+
42
+ Args:
43
+ binary_path: The path to the Kalign binary.
44
+
45
+ Raises:
46
+ RuntimeError: If Kalign binary not found within the path.
47
+ """
48
+ self.binary_path = binary_path
49
+
50
+ def align(self, sequences: Sequence[str]) -> str:
51
+ """Aligns the sequences and returns the alignment in A3M string.
52
+
53
+ Args:
54
+ sequences: A list of query sequence strings. The sequences have to be at
55
+ least 6 residues long (Kalign requires this). Note that the order in
56
+ which you give the sequences might alter the output slightly as
57
+ different alignment tree might get constructed.
58
+
59
+ Returns:
60
+ A string with the alignment in a3m format.
61
+
62
+ Raises:
63
+ RuntimeError: If Kalign fails.
64
+ ValueError: If any of the sequences is less than 6 residues long.
65
+ """
66
+ logging.info('Aligning %d sequences', len(sequences))
67
+
68
+ for s in sequences:
69
+ if len(s) < 6:
70
+ raise ValueError('Kalign requires all sequences to be at least 6 '
71
+ 'residues long. Got %s (%d residues).' % (s, len(s)))
72
+
73
+ with utils.tmpdir_manager(base_dir='/tmp') as query_tmp_dir:
74
+ input_fasta_path = os.path.join(query_tmp_dir, 'input.fasta')
75
+ output_a3m_path = os.path.join(query_tmp_dir, 'output.a3m')
76
+
77
+ with open(input_fasta_path, 'w') as f:
78
+ f.write(_to_a3m(sequences))
79
+
80
+ cmd = [
81
+ self.binary_path,
82
+ '-i', input_fasta_path,
83
+ '-o', output_a3m_path,
84
+ '-format', 'fasta',
85
+ ]
86
+
87
+ logging.info('Launching subprocess "%s"', ' '.join(cmd))
88
+ process = subprocess.Popen(cmd, stdout=subprocess.PIPE,
89
+ stderr=subprocess.PIPE)
90
+
91
+ with utils.timing('Kalign query'):
92
+ stdout, stderr = process.communicate()
93
+ retcode = process.wait()
94
+ logging.info('Kalign stdout:\n%s\n\nstderr:\n%s\n',
95
+ stdout.decode('utf-8'), stderr.decode('utf-8'))
96
+
97
+ if retcode:
98
+ raise RuntimeError('Kalign failed\nstdout:\n%s\n\nstderr:\n%s\n'
99
+ % (stdout.decode('utf-8'), stderr.decode('utf-8')))
100
+
101
+ with open(output_a3m_path) as f:
102
+ a3m = f.read()
103
+
104
+ return a3m
af_backprop/alphafold/data/tools/utils.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2021 DeepMind Technologies Limited
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ """Common utilities for data pipeline tools."""
15
+ import contextlib
16
+ import shutil
17
+ import tempfile
18
+ import time
19
+ from typing import Optional
20
+
21
+ from absl import logging
22
+
23
+
24
+ @contextlib.contextmanager
25
+ def tmpdir_manager(base_dir: Optional[str] = None):
26
+ """Context manager that deletes a temporary directory on exit."""
27
+ tmpdir = tempfile.mkdtemp(dir=base_dir)
28
+ try:
29
+ yield tmpdir
30
+ finally:
31
+ shutil.rmtree(tmpdir, ignore_errors=True)
32
+
33
+
34
+ @contextlib.contextmanager
35
+ def timing(msg: str):
36
+ logging.info('Started %s', msg)
37
+ tic = time.time()
38
+ yield
39
+ toc = time.time()
40
+ logging.info('Finished %s in %.3f seconds', msg, toc - tic)
af_backprop/alphafold/model/__init__.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2021 DeepMind Technologies Limited
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ """Alphafold model."""
af_backprop/alphafold/model/all_atom.py ADDED
@@ -0,0 +1,1155 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2021 DeepMind Technologies Limited
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ """Ops for all atom representations.
16
+
17
+ Generally we employ two different representations for all atom coordinates,
18
+ one is atom37 where each heavy atom corresponds to a given position in a 37
19
+ dimensional array, This mapping is non amino acid specific, but each slot
20
+ corresponds to an atom of a given name, for example slot 12 always corresponds
21
+ to 'C delta 1', positions that are not present for a given amino acid are
22
+ zeroed out and denoted by a mask.
23
+ The other representation we employ is called atom14, this is a more dense way
24
+ of representing atoms with 14 slots. Here a given slot will correspond to a
25
+ different kind of atom depending on amino acid type, for example slot 5
26
+ corresponds to 'N delta 2' for Aspargine, but to 'C delta 1' for Isoleucine.
27
+ 14 is chosen because it is the maximum number of heavy atoms for any standard
28
+ amino acid.
29
+ The order of slots can be found in 'residue_constants.residue_atoms'.
30
+ Internally the model uses the atom14 representation because it is
31
+ computationally more efficient.
32
+ The internal atom14 representation is turned into the atom37 at the output of
33
+ the network to facilitate easier conversion to existing protein datastructures.
34
+ """
35
+
36
+ from typing import Dict, Optional
37
+ from alphafold.common import residue_constants
38
+
39
+ from alphafold.model import r3
40
+ from alphafold.model import utils
41
+ import jax
42
+ import jax.numpy as jnp
43
+ import numpy as np
44
+
45
+
46
+ def squared_difference(x, y):
47
+ return jnp.square(x - y)
48
+
49
+
50
+ def get_chi_atom_indices():
51
+ """Returns atom indices needed to compute chi angles for all residue types.
52
+
53
+ Returns:
54
+ A tensor of shape [residue_types=21, chis=4, atoms=4]. The residue types are
55
+ in the order specified in residue_constants.restypes + unknown residue type
56
+ at the end. For chi angles which are not defined on the residue, the
57
+ positions indices are by default set to 0.
58
+ """
59
+ chi_atom_indices = []
60
+ for residue_name in residue_constants.restypes:
61
+ residue_name = residue_constants.restype_1to3[residue_name]
62
+ residue_chi_angles = residue_constants.chi_angles_atoms[residue_name]
63
+ atom_indices = []
64
+ for chi_angle in residue_chi_angles:
65
+ atom_indices.append(
66
+ [residue_constants.atom_order[atom] for atom in chi_angle])
67
+ for _ in range(4 - len(atom_indices)):
68
+ atom_indices.append([0, 0, 0, 0]) # For chi angles not defined on the AA.
69
+ chi_atom_indices.append(atom_indices)
70
+
71
+ chi_atom_indices.append([[0, 0, 0, 0]] * 4) # For UNKNOWN residue.
72
+
73
+ return jnp.asarray(chi_atom_indices)
74
+
75
+
76
+ def atom14_to_atom37(atom14_data: jnp.ndarray, # (N, 14, ...)
77
+ batch: Dict[str, jnp.ndarray]
78
+ ) -> jnp.ndarray: # (N, 37, ...)
79
+ """Convert atom14 to atom37 representation."""
80
+ assert len(atom14_data.shape) in [2, 3]
81
+ assert 'residx_atom37_to_atom14' in batch
82
+ assert 'atom37_atom_exists' in batch
83
+
84
+ if jnp.issubdtype(batch['residx_atom37_to_atom14'].dtype, jnp.integer):
85
+ atom37_data = utils.batched_gather(atom14_data, batch['residx_atom37_to_atom14'], batch_dims=1)
86
+ else:
87
+ atom37_data = jnp.einsum("na...,nba->nb...", atom14_data, batch['residx_atom37_to_atom14'])
88
+
89
+ if len(atom14_data.shape) == 2:
90
+ atom37_data *= batch['atom37_atom_exists']
91
+ elif len(atom14_data.shape) == 3:
92
+ atom37_data *= batch['atom37_atom_exists'][:, :, None].astype(atom37_data.dtype)
93
+ return atom37_data
94
+
95
+ def atom37_to_atom14(
96
+ atom37_data: jnp.ndarray, # (N, 37, ...)
97
+ batch: Dict[str, jnp.ndarray]) -> jnp.ndarray: # (N, 14, ...)
98
+ """Convert atom14 to atom37 representation."""
99
+ assert len(atom37_data.shape) in [2, 3]
100
+ assert 'residx_atom14_to_atom37' in batch
101
+ assert 'atom14_atom_exists' in batch
102
+
103
+ if jnp.issubdtype(batch['residx_atom14_to_atom37'].dtype, jnp.integer):
104
+ atom14_data = utils.batched_gather(atom37_data, batch['residx_atom14_to_atom37'], batch_dims=1)
105
+ else:
106
+ atom14_data = jnp.einsum("na...,nba->nb...", atom37_data, batch['residx_atom14_to_atom37'])
107
+
108
+ if len(atom37_data.shape) == 2:
109
+ atom14_data *= batch['atom14_atom_exists'].astype(atom14_data.dtype)
110
+ elif len(atom37_data.shape) == 3:
111
+ atom14_data *= batch['atom14_atom_exists'][:, :, None].astype(atom14_data.dtype)
112
+ return atom14_data
113
+
114
+
115
+ def atom37_to_frames(
116
+ aatype: jnp.ndarray, # (...)
117
+ all_atom_positions: jnp.ndarray, # (..., 37, 3)
118
+ all_atom_mask: jnp.ndarray, # (..., 37)
119
+ ) -> Dict[str, jnp.ndarray]:
120
+ """Computes the frames for the up to 8 rigid groups for each residue.
121
+
122
+ The rigid groups are defined by the possible torsions in a given amino acid.
123
+ We group the atoms according to their dependence on the torsion angles into
124
+ "rigid groups". E.g., the position of atoms in the chi2-group depend on
125
+ chi1 and chi2, but do not depend on chi3 or chi4.
126
+ Jumper et al. (2021) Suppl. Table 2 and corresponding text.
127
+
128
+ Args:
129
+ aatype: Amino acid type, given as array with integers.
130
+ all_atom_positions: atom37 representation of all atom coordinates.
131
+ all_atom_mask: atom37 representation of mask on all atom coordinates.
132
+ Returns:
133
+ Dictionary containing:
134
+ * 'rigidgroups_gt_frames': 8 Frames corresponding to 'all_atom_positions'
135
+ represented as flat 12 dimensional array.
136
+ * 'rigidgroups_gt_exists': Mask denoting whether the atom positions for
137
+ the given frame are available in the ground truth, e.g. if they were
138
+ resolved in the experiment.
139
+ * 'rigidgroups_group_exists': Mask denoting whether given group is in
140
+ principle present for given amino acid type.
141
+ * 'rigidgroups_group_is_ambiguous': Mask denoting whether frame is
142
+ affected by naming ambiguity.
143
+ * 'rigidgroups_alt_gt_frames': 8 Frames with alternative atom renaming
144
+ corresponding to 'all_atom_positions' represented as flat
145
+ 12 dimensional array.
146
+ """
147
+ # 0: 'backbone group',
148
+ # 1: 'pre-omega-group', (empty)
149
+ # 2: 'phi-group', (currently empty, because it defines only hydrogens)
150
+ # 3: 'psi-group',
151
+ # 4,5,6,7: 'chi1,2,3,4-group'
152
+ aatype_in_shape = aatype.shape
153
+
154
+ # If there is a batch axis, just flatten it away, and reshape everything
155
+ # back at the end of the function.
156
+ aatype = jnp.reshape(aatype, [-1])
157
+ all_atom_positions = jnp.reshape(all_atom_positions, [-1, 37, 3])
158
+ all_atom_mask = jnp.reshape(all_atom_mask, [-1, 37])
159
+
160
+ # Create an array with the atom names.
161
+ # shape (num_restypes, num_rigidgroups, 3_atoms): (21, 8, 3)
162
+ restype_rigidgroup_base_atom_names = np.full([21, 8, 3], '', dtype=object)
163
+
164
+ # 0: backbone frame
165
+ restype_rigidgroup_base_atom_names[:, 0, :] = ['C', 'CA', 'N']
166
+
167
+ # 3: 'psi-group'
168
+ restype_rigidgroup_base_atom_names[:, 3, :] = ['CA', 'C', 'O']
169
+
170
+ # 4,5,6,7: 'chi1,2,3,4-group'
171
+ for restype, restype_letter in enumerate(residue_constants.restypes):
172
+ resname = residue_constants.restype_1to3[restype_letter]
173
+ for chi_idx in range(4):
174
+ if residue_constants.chi_angles_mask[restype][chi_idx]:
175
+ atom_names = residue_constants.chi_angles_atoms[resname][chi_idx]
176
+ restype_rigidgroup_base_atom_names[
177
+ restype, chi_idx + 4, :] = atom_names[1:]
178
+
179
+ # Create mask for existing rigid groups.
180
+ restype_rigidgroup_mask = np.zeros([21, 8], dtype=np.float32)
181
+ restype_rigidgroup_mask[:, 0] = 1
182
+ restype_rigidgroup_mask[:, 3] = 1
183
+ restype_rigidgroup_mask[:20, 4:] = residue_constants.chi_angles_mask
184
+
185
+ # Translate atom names into atom37 indices.
186
+ lookuptable = residue_constants.atom_order.copy()
187
+ lookuptable[''] = 0
188
+ restype_rigidgroup_base_atom37_idx = np.vectorize(lambda x: lookuptable[x])(
189
+ restype_rigidgroup_base_atom_names)
190
+
191
+ # Compute the gather indices for all residues in the chain.
192
+ # shape (N, 8, 3)
193
+ residx_rigidgroup_base_atom37_idx = utils.batched_gather(
194
+ restype_rigidgroup_base_atom37_idx, aatype)
195
+
196
+ # Gather the base atom positions for each rigid group.
197
+ base_atom_pos = utils.batched_gather(
198
+ all_atom_positions,
199
+ residx_rigidgroup_base_atom37_idx,
200
+ batch_dims=1)
201
+
202
+ # Compute the Rigids.
203
+ gt_frames = r3.rigids_from_3_points(
204
+ point_on_neg_x_axis=r3.vecs_from_tensor(base_atom_pos[:, :, 0, :]),
205
+ origin=r3.vecs_from_tensor(base_atom_pos[:, :, 1, :]),
206
+ point_on_xy_plane=r3.vecs_from_tensor(base_atom_pos[:, :, 2, :])
207
+ )
208
+
209
+ # Compute a mask whether the group exists.
210
+ # (N, 8)
211
+ group_exists = utils.batched_gather(restype_rigidgroup_mask, aatype)
212
+
213
+ # Compute a mask whether ground truth exists for the group
214
+ gt_atoms_exist = utils.batched_gather( # shape (N, 8, 3)
215
+ all_atom_mask.astype(jnp.float32),
216
+ residx_rigidgroup_base_atom37_idx,
217
+ batch_dims=1)
218
+ gt_exists = jnp.min(gt_atoms_exist, axis=-1) * group_exists # (N, 8)
219
+
220
+ # Adapt backbone frame to old convention (mirror x-axis and z-axis).
221
+ rots = np.tile(np.eye(3, dtype=np.float32), [8, 1, 1])
222
+ rots[0, 0, 0] = -1
223
+ rots[0, 2, 2] = -1
224
+ gt_frames = r3.rigids_mul_rots(gt_frames, r3.rots_from_tensor3x3(rots))
225
+
226
+ # The frames for ambiguous rigid groups are just rotated by 180 degree around
227
+ # the x-axis. The ambiguous group is always the last chi-group.
228
+ restype_rigidgroup_is_ambiguous = np.zeros([21, 8], dtype=np.float32)
229
+ restype_rigidgroup_rots = np.tile(np.eye(3, dtype=np.float32), [21, 8, 1, 1])
230
+
231
+ for resname, _ in residue_constants.residue_atom_renaming_swaps.items():
232
+ restype = residue_constants.restype_order[
233
+ residue_constants.restype_3to1[resname]]
234
+ chi_idx = int(sum(residue_constants.chi_angles_mask[restype]) - 1)
235
+ restype_rigidgroup_is_ambiguous[restype, chi_idx + 4] = 1
236
+ restype_rigidgroup_rots[restype, chi_idx + 4, 1, 1] = -1
237
+ restype_rigidgroup_rots[restype, chi_idx + 4, 2, 2] = -1
238
+
239
+ # Gather the ambiguity information for each residue.
240
+ residx_rigidgroup_is_ambiguous = utils.batched_gather(
241
+ restype_rigidgroup_is_ambiguous, aatype)
242
+ residx_rigidgroup_ambiguity_rot = utils.batched_gather(
243
+ restype_rigidgroup_rots, aatype)
244
+
245
+ # Create the alternative ground truth frames.
246
+ alt_gt_frames = r3.rigids_mul_rots(
247
+ gt_frames, r3.rots_from_tensor3x3(residx_rigidgroup_ambiguity_rot))
248
+
249
+ gt_frames_flat12 = r3.rigids_to_tensor_flat12(gt_frames)
250
+ alt_gt_frames_flat12 = r3.rigids_to_tensor_flat12(alt_gt_frames)
251
+
252
+ # reshape back to original residue layout
253
+ gt_frames_flat12 = jnp.reshape(gt_frames_flat12, aatype_in_shape + (8, 12))
254
+ gt_exists = jnp.reshape(gt_exists, aatype_in_shape + (8,))
255
+ group_exists = jnp.reshape(group_exists, aatype_in_shape + (8,))
256
+ gt_frames_flat12 = jnp.reshape(gt_frames_flat12, aatype_in_shape + (8, 12))
257
+ residx_rigidgroup_is_ambiguous = jnp.reshape(residx_rigidgroup_is_ambiguous,
258
+ aatype_in_shape + (8,))
259
+ alt_gt_frames_flat12 = jnp.reshape(alt_gt_frames_flat12,
260
+ aatype_in_shape + (8, 12,))
261
+
262
+ return {
263
+ 'rigidgroups_gt_frames': gt_frames_flat12, # (..., 8, 12)
264
+ 'rigidgroups_gt_exists': gt_exists, # (..., 8)
265
+ 'rigidgroups_group_exists': group_exists, # (..., 8)
266
+ 'rigidgroups_group_is_ambiguous':
267
+ residx_rigidgroup_is_ambiguous, # (..., 8)
268
+ 'rigidgroups_alt_gt_frames': alt_gt_frames_flat12, # (..., 8, 12)
269
+ }
270
+
271
+
272
+ def atom37_to_torsion_angles(
273
+ aatype: jnp.ndarray, # (B, N)
274
+ all_atom_pos: jnp.ndarray, # (B, N, 37, 3)
275
+ all_atom_mask: jnp.ndarray, # (B, N, 37)
276
+ placeholder_for_undefined=False,
277
+ ) -> Dict[str, jnp.ndarray]:
278
+ """Computes the 7 torsion angles (in sin, cos encoding) for each residue.
279
+
280
+ The 7 torsion angles are in the order
281
+ '[pre_omega, phi, psi, chi_1, chi_2, chi_3, chi_4]',
282
+ here pre_omega denotes the omega torsion angle between the given amino acid
283
+ and the previous amino acid.
284
+
285
+ Args:
286
+ aatype: Amino acid type, given as array with integers.
287
+ all_atom_pos: atom37 representation of all atom coordinates.
288
+ all_atom_mask: atom37 representation of mask on all atom coordinates.
289
+ placeholder_for_undefined: flag denoting whether to set masked torsion
290
+ angles to zero.
291
+ Returns:
292
+ Dict containing:
293
+ * 'torsion_angles_sin_cos': Array with shape (B, N, 7, 2) where the final
294
+ 2 dimensions denote sin and cos respectively
295
+ * 'alt_torsion_angles_sin_cos': same as 'torsion_angles_sin_cos', but
296
+ with the angle shifted by pi for all chi angles affected by the naming
297
+ ambiguities.
298
+ * 'torsion_angles_mask': Mask for which chi angles are present.
299
+ """
300
+
301
+ # Map aatype > 20 to 'Unknown' (20).
302
+ aatype = jnp.minimum(aatype, 20)
303
+
304
+ # Compute the backbone angles.
305
+ num_batch, num_res = aatype.shape
306
+
307
+ pad = jnp.zeros([num_batch, 1, 37, 3], jnp.float32)
308
+ prev_all_atom_pos = jnp.concatenate([pad, all_atom_pos[:, :-1, :, :]], axis=1)
309
+
310
+ pad = jnp.zeros([num_batch, 1, 37], jnp.float32)
311
+ prev_all_atom_mask = jnp.concatenate([pad, all_atom_mask[:, :-1, :]], axis=1)
312
+
313
+ # For each torsion angle collect the 4 atom positions that define this angle.
314
+ # shape (B, N, atoms=4, xyz=3)
315
+ pre_omega_atom_pos = jnp.concatenate(
316
+ [prev_all_atom_pos[:, :, 1:3, :], # prev CA, C
317
+ all_atom_pos[:, :, 0:2, :] # this N, CA
318
+ ], axis=-2)
319
+ phi_atom_pos = jnp.concatenate(
320
+ [prev_all_atom_pos[:, :, 2:3, :], # prev C
321
+ all_atom_pos[:, :, 0:3, :] # this N, CA, C
322
+ ], axis=-2)
323
+ psi_atom_pos = jnp.concatenate(
324
+ [all_atom_pos[:, :, 0:3, :], # this N, CA, C
325
+ all_atom_pos[:, :, 4:5, :] # this O
326
+ ], axis=-2)
327
+
328
+ # Collect the masks from these atoms.
329
+ # Shape [batch, num_res]
330
+ pre_omega_mask = (
331
+ jnp.prod(prev_all_atom_mask[:, :, 1:3], axis=-1) # prev CA, C
332
+ * jnp.prod(all_atom_mask[:, :, 0:2], axis=-1)) # this N, CA
333
+ phi_mask = (
334
+ prev_all_atom_mask[:, :, 2] # prev C
335
+ * jnp.prod(all_atom_mask[:, :, 0:3], axis=-1)) # this N, CA, C
336
+ psi_mask = (
337
+ jnp.prod(all_atom_mask[:, :, 0:3], axis=-1) * # this N, CA, C
338
+ all_atom_mask[:, :, 4]) # this O
339
+
340
+ # Collect the atoms for the chi-angles.
341
+ # Compute the table of chi angle indices. Shape: [restypes, chis=4, atoms=4].
342
+ chi_atom_indices = get_chi_atom_indices()
343
+ # Select atoms to compute chis. Shape: [batch, num_res, chis=4, atoms=4].
344
+ atom_indices = utils.batched_gather(
345
+ params=chi_atom_indices, indices=aatype, axis=0, batch_dims=0)
346
+ # Gather atom positions. Shape: [batch, num_res, chis=4, atoms=4, xyz=3].
347
+ chis_atom_pos = utils.batched_gather(
348
+ params=all_atom_pos, indices=atom_indices, axis=-2,
349
+ batch_dims=2)
350
+
351
+ # Copy the chi angle mask, add the UNKNOWN residue. Shape: [restypes, 4].
352
+ chi_angles_mask = list(residue_constants.chi_angles_mask)
353
+ chi_angles_mask.append([0.0, 0.0, 0.0, 0.0])
354
+ chi_angles_mask = jnp.asarray(chi_angles_mask)
355
+
356
+ # Compute the chi angle mask. I.e. which chis angles exist according to the
357
+ # aatype. Shape [batch, num_res, chis=4].
358
+ chis_mask = utils.batched_gather(params=chi_angles_mask, indices=aatype,
359
+ axis=0, batch_dims=0)
360
+
361
+ # Constrain the chis_mask to those chis, where the ground truth coordinates of
362
+ # all defining four atoms are available.
363
+ # Gather the chi angle atoms mask. Shape: [batch, num_res, chis=4, atoms=4].
364
+ chi_angle_atoms_mask = utils.batched_gather(
365
+ params=all_atom_mask, indices=atom_indices, axis=-1,
366
+ batch_dims=2)
367
+ # Check if all 4 chi angle atoms were set. Shape: [batch, num_res, chis=4].
368
+ chi_angle_atoms_mask = jnp.prod(chi_angle_atoms_mask, axis=[-1])
369
+ chis_mask = chis_mask * (chi_angle_atoms_mask).astype(jnp.float32)
370
+
371
+ # Stack all torsion angle atom positions.
372
+ # Shape (B, N, torsions=7, atoms=4, xyz=3)
373
+ torsions_atom_pos = jnp.concatenate(
374
+ [pre_omega_atom_pos[:, :, None, :, :],
375
+ phi_atom_pos[:, :, None, :, :],
376
+ psi_atom_pos[:, :, None, :, :],
377
+ chis_atom_pos
378
+ ], axis=2)
379
+
380
+ # Stack up masks for all torsion angles.
381
+ # shape (B, N, torsions=7)
382
+ torsion_angles_mask = jnp.concatenate(
383
+ [pre_omega_mask[:, :, None],
384
+ phi_mask[:, :, None],
385
+ psi_mask[:, :, None],
386
+ chis_mask
387
+ ], axis=2)
388
+
389
+ # Create a frame from the first three atoms:
390
+ # First atom: point on x-y-plane
391
+ # Second atom: point on negative x-axis
392
+ # Third atom: origin
393
+ # r3.Rigids (B, N, torsions=7)
394
+ torsion_frames = r3.rigids_from_3_points(
395
+ point_on_neg_x_axis=r3.vecs_from_tensor(torsions_atom_pos[:, :, :, 1, :]),
396
+ origin=r3.vecs_from_tensor(torsions_atom_pos[:, :, :, 2, :]),
397
+ point_on_xy_plane=r3.vecs_from_tensor(torsions_atom_pos[:, :, :, 0, :]))
398
+
399
+ # Compute the position of the forth atom in this frame (y and z coordinate
400
+ # define the chi angle)
401
+ # r3.Vecs (B, N, torsions=7)
402
+ forth_atom_rel_pos = r3.rigids_mul_vecs(
403
+ r3.invert_rigids(torsion_frames),
404
+ r3.vecs_from_tensor(torsions_atom_pos[:, :, :, 3, :]))
405
+
406
+ # Normalize to have the sin and cos of the torsion angle.
407
+ # jnp.ndarray (B, N, torsions=7, sincos=2)
408
+ torsion_angles_sin_cos = jnp.stack(
409
+ [forth_atom_rel_pos.z, forth_atom_rel_pos.y], axis=-1)
410
+ torsion_angles_sin_cos /= jnp.sqrt(
411
+ jnp.sum(jnp.square(torsion_angles_sin_cos), axis=-1, keepdims=True)
412
+ + 1e-8)
413
+
414
+ # Mirror psi, because we computed it from the Oxygen-atom.
415
+ torsion_angles_sin_cos *= jnp.asarray(
416
+ [1., 1., -1., 1., 1., 1., 1.])[None, None, :, None]
417
+
418
+ # Create alternative angles for ambiguous atom names.
419
+ chi_is_ambiguous = utils.batched_gather(
420
+ jnp.asarray(residue_constants.chi_pi_periodic), aatype)
421
+ mirror_torsion_angles = jnp.concatenate(
422
+ [jnp.ones([num_batch, num_res, 3]),
423
+ 1.0 - 2.0 * chi_is_ambiguous], axis=-1)
424
+ alt_torsion_angles_sin_cos = (
425
+ torsion_angles_sin_cos * mirror_torsion_angles[:, :, :, None])
426
+
427
+ if placeholder_for_undefined:
428
+ # Add placeholder torsions in place of undefined torsion angles
429
+ # (e.g. N-terminus pre-omega)
430
+ placeholder_torsions = jnp.stack([
431
+ jnp.ones(torsion_angles_sin_cos.shape[:-1]),
432
+ jnp.zeros(torsion_angles_sin_cos.shape[:-1])
433
+ ], axis=-1)
434
+ torsion_angles_sin_cos = torsion_angles_sin_cos * torsion_angles_mask[
435
+ ..., None] + placeholder_torsions * (1 - torsion_angles_mask[..., None])
436
+ alt_torsion_angles_sin_cos = alt_torsion_angles_sin_cos * torsion_angles_mask[
437
+ ..., None] + placeholder_torsions * (1 - torsion_angles_mask[..., None])
438
+
439
+ return {
440
+ 'torsion_angles_sin_cos': torsion_angles_sin_cos, # (B, N, 7, 2)
441
+ 'alt_torsion_angles_sin_cos': alt_torsion_angles_sin_cos, # (B, N, 7, 2)
442
+ 'torsion_angles_mask': torsion_angles_mask # (B, N, 7)
443
+ }
444
+
445
+
446
+ def torsion_angles_to_frames(
447
+ aatype: jnp.ndarray, # (N)
448
+ backb_to_global: r3.Rigids, # (N)
449
+ torsion_angles_sin_cos: jnp.ndarray # (N, 7, 2)
450
+ ) -> r3.Rigids: # (N, 8)
451
+ """Compute rigid group frames from torsion angles.
452
+
453
+ Jumper et al. (2021) Suppl. Alg. 24 "computeAllAtomCoordinates" lines 2-10
454
+ Jumper et al. (2021) Suppl. Alg. 25 "makeRotX"
455
+
456
+ Args:
457
+ aatype: aatype for each residue
458
+ backb_to_global: Rigid transformations describing transformation from
459
+ backbone frame to global frame.
460
+ torsion_angles_sin_cos: sin and cosine of the 7 torsion angles
461
+ Returns:
462
+ Frames corresponding to all the Sidechain Rigid Transforms
463
+ """
464
+ if jnp.issubdtype(aatype.dtype, jnp.integer):
465
+ assert len(aatype.shape) == 1
466
+ else:
467
+ assert len(aatype.shape) == 2
468
+ assert len(backb_to_global.rot.xx.shape) == 1
469
+ assert len(torsion_angles_sin_cos.shape) == 3
470
+ assert torsion_angles_sin_cos.shape[1] == 7
471
+ assert torsion_angles_sin_cos.shape[2] == 2
472
+
473
+ # Gather the default frames for all rigid groups.
474
+ # r3.Rigids with shape (N, 8)
475
+
476
+ if jnp.issubdtype(aatype.dtype, jnp.integer):
477
+ m = utils.batched_gather(residue_constants.restype_rigid_group_default_frame, aatype)
478
+ else:
479
+ m = jnp.einsum("...a,abcd->...bcd",aatype,residue_constants.restype_rigid_group_default_frame)
480
+
481
+ default_frames = r3.rigids_from_tensor4x4(m)
482
+
483
+ # Create the rotation matrices according to the given angles (each frame is
484
+ # defined such that its rotation is around the x-axis).
485
+ sin_angles = torsion_angles_sin_cos[..., 0]
486
+ cos_angles = torsion_angles_sin_cos[..., 1]
487
+
488
+ # insert zero rotation for backbone group.
489
+ if jnp.issubdtype(aatype.dtype, jnp.integer):
490
+ num_residues, = aatype.shape
491
+ else:
492
+ num_residues,_ = aatype.shape
493
+ sin_angles = jnp.concatenate([jnp.zeros([num_residues, 1]), sin_angles],axis=-1)
494
+ cos_angles = jnp.concatenate([jnp.ones([num_residues, 1]), cos_angles],axis=-1)
495
+ zeros = jnp.zeros_like(sin_angles)
496
+ ones = jnp.ones_like(sin_angles)
497
+
498
+ # all_rots are r3.Rots with shape (N, 8)
499
+ all_rots = r3.Rots(ones, zeros, zeros,
500
+ zeros, cos_angles, -sin_angles,
501
+ zeros, sin_angles, cos_angles)
502
+
503
+ # Apply rotations to the frames.
504
+ all_frames = r3.rigids_mul_rots(default_frames, all_rots)
505
+
506
+ # chi2, chi3, and chi4 frames do not transform to the backbone frame but to
507
+ # the previous frame. So chain them up accordingly.
508
+ chi2_frame_to_frame = jax.tree_map(lambda x: x[:, 5], all_frames)
509
+ chi3_frame_to_frame = jax.tree_map(lambda x: x[:, 6], all_frames)
510
+ chi4_frame_to_frame = jax.tree_map(lambda x: x[:, 7], all_frames)
511
+
512
+ chi1_frame_to_backb = jax.tree_map(lambda x: x[:, 4], all_frames)
513
+ chi2_frame_to_backb = r3.rigids_mul_rigids(chi1_frame_to_backb,
514
+ chi2_frame_to_frame)
515
+ chi3_frame_to_backb = r3.rigids_mul_rigids(chi2_frame_to_backb,
516
+ chi3_frame_to_frame)
517
+ chi4_frame_to_backb = r3.rigids_mul_rigids(chi3_frame_to_backb,
518
+ chi4_frame_to_frame)
519
+
520
+ # Recombine them to a r3.Rigids with shape (N, 8).
521
+ def _concat_frames(xall, x5, x6, x7):
522
+ return jnp.concatenate(
523
+ [xall[:, 0:5], x5[:, None], x6[:, None], x7[:, None]], axis=-1)
524
+
525
+ all_frames_to_backb = jax.tree_map(
526
+ _concat_frames,
527
+ all_frames,
528
+ chi2_frame_to_backb,
529
+ chi3_frame_to_backb,
530
+ chi4_frame_to_backb)
531
+
532
+ # Create the global frames.
533
+ # shape (N, 8)
534
+ all_frames_to_global = r3.rigids_mul_rigids(
535
+ jax.tree_map(lambda x: x[:, None], backb_to_global),
536
+ all_frames_to_backb)
537
+
538
+ return all_frames_to_global
539
+
540
+
541
+ def frames_and_literature_positions_to_atom14_pos(
542
+ aatype: jnp.ndarray, # (N)
543
+ all_frames_to_global: r3.Rigids # (N, 8)
544
+ ) -> r3.Vecs: # (N, 14)
545
+ """Put atom literature positions (atom14 encoding) in each rigid group.
546
+
547
+ Jumper et al. (2021) Suppl. Alg. 24 "computeAllAtomCoordinates" line 11
548
+
549
+ Args:
550
+ aatype: aatype for each residue.
551
+ all_frames_to_global: All per residue coordinate frames.
552
+ Returns:
553
+ Positions of all atom coordinates in global frame.
554
+ """
555
+
556
+ # Pick the appropriate transform for every atom.
557
+ if jnp.issubdtype(aatype.dtype, jnp.integer):
558
+ residx_to_group_idx = utils.batched_gather(residue_constants.restype_atom14_to_rigid_group, aatype)
559
+ group_mask = jax.nn.one_hot(residx_to_group_idx, num_classes=8) # shape (N, 14, 8)
560
+ else:
561
+ group_mask = jnp.einsum("...a,abc->...bc",aatype, jax.nn.one_hot(residue_constants.restype_atom14_to_rigid_group, 8))
562
+
563
+ # r3.Rigids with shape (N, 14)
564
+ map_atoms_to_global = jax.tree_map(
565
+ lambda x: jnp.sum(x[:, None, :] * group_mask, axis=-1),
566
+ all_frames_to_global)
567
+
568
+ # Gather the literature atom positions for each residue.
569
+ # r3.Vecs with shape (N, 14)
570
+ if jnp.issubdtype(aatype.dtype, jnp.integer):
571
+ group_pos = utils.batched_gather(residue_constants.restype_atom14_rigid_group_positions, aatype)
572
+ else:
573
+ group_pos = jnp.einsum("...a,abc->...bc", aatype, residue_constants.restype_atom14_rigid_group_positions)
574
+ lit_positions = r3.vecs_from_tensor(group_pos)
575
+
576
+ # Transform each atom from its local frame to the global frame.
577
+ # r3.Vecs with shape (N, 14)
578
+ pred_positions = r3.rigids_mul_vecs(map_atoms_to_global, lit_positions)
579
+
580
+ # Mask out non-existing atoms.
581
+ if jnp.issubdtype(aatype.dtype, jnp.integer):
582
+ mask = utils.batched_gather(residue_constants.restype_atom14_mask, aatype)
583
+ else:
584
+ mask = jnp.einsum("...a,ab->...b",aatype,residue_constants.restype_atom14_mask)
585
+ pred_positions = jax.tree_map(lambda x: x * mask, pred_positions)
586
+ return pred_positions
587
+
588
+
589
+ def extreme_ca_ca_distance_violations(
590
+ pred_atom_positions: jnp.ndarray, # (N, 37(14), 3)
591
+ pred_atom_mask: jnp.ndarray, # (N, 37(14))
592
+ residue_index: jnp.ndarray, # (N)
593
+ max_angstrom_tolerance=1.5
594
+ ) -> jnp.ndarray:
595
+ """Counts residues whose Ca is a large distance from its neighbour.
596
+
597
+ Measures the fraction of CA-CA pairs between consecutive amino acids that are
598
+ more than 'max_angstrom_tolerance' apart.
599
+
600
+ Args:
601
+ pred_atom_positions: Atom positions in atom37/14 representation
602
+ pred_atom_mask: Atom mask in atom37/14 representation
603
+ residue_index: Residue index for given amino acid, this is assumed to be
604
+ monotonically increasing.
605
+ max_angstrom_tolerance: Maximum distance allowed to not count as violation.
606
+ Returns:
607
+ Fraction of consecutive CA-CA pairs with violation.
608
+ """
609
+ this_ca_pos = pred_atom_positions[:-1, 1, :] # (N - 1, 3)
610
+ this_ca_mask = pred_atom_mask[:-1, 1] # (N - 1)
611
+ next_ca_pos = pred_atom_positions[1:, 1, :] # (N - 1, 3)
612
+ next_ca_mask = pred_atom_mask[1:, 1] # (N - 1)
613
+ has_no_gap_mask = ((residue_index[1:] - residue_index[:-1]) == 1.0).astype(
614
+ jnp.float32)
615
+ ca_ca_distance = jnp.sqrt(
616
+ 1e-6 + jnp.sum(squared_difference(this_ca_pos, next_ca_pos), axis=-1))
617
+ violations = (ca_ca_distance -
618
+ residue_constants.ca_ca) > max_angstrom_tolerance
619
+ mask = this_ca_mask * next_ca_mask * has_no_gap_mask
620
+ return utils.mask_mean(mask=mask, value=violations)
621
+
622
+
623
+ def between_residue_bond_loss(
624
+ pred_atom_positions: jnp.ndarray, # (N, 37(14), 3)
625
+ pred_atom_mask: jnp.ndarray, # (N, 37(14))
626
+ residue_index: jnp.ndarray, # (N)
627
+ aatype: jnp.ndarray, # (N)
628
+ tolerance_factor_soft=12.0,
629
+ tolerance_factor_hard=12.0
630
+ ) -> Dict[str, jnp.ndarray]:
631
+ """Flat-bottom loss to penalize structural violations between residues.
632
+
633
+ This is a loss penalizing any violation of the geometry around the peptide
634
+ bond between consecutive amino acids. This loss corresponds to
635
+ Jumper et al. (2021) Suppl. Sec. 1.9.11, eq 44, 45.
636
+
637
+ Args:
638
+ pred_atom_positions: Atom positions in atom37/14 representation
639
+ pred_atom_mask: Atom mask in atom37/14 representation
640
+ residue_index: Residue index for given amino acid, this is assumed to be
641
+ monotonically increasing.
642
+ aatype: Amino acid type of given residue
643
+ tolerance_factor_soft: soft tolerance factor measured in standard deviations
644
+ of pdb distributions
645
+ tolerance_factor_hard: hard tolerance factor measured in standard deviations
646
+ of pdb distributions
647
+
648
+ Returns:
649
+ Dict containing:
650
+ * 'c_n_loss_mean': Loss for peptide bond length violations
651
+ * 'ca_c_n_loss_mean': Loss for violations of bond angle around C spanned
652
+ by CA, C, N
653
+ * 'c_n_ca_loss_mean': Loss for violations of bond angle around N spanned
654
+ by C, N, CA
655
+ * 'per_residue_loss_sum': sum of all losses for each residue
656
+ * 'per_residue_violation_mask': mask denoting all residues with violation
657
+ present.
658
+ """
659
+ assert len(pred_atom_positions.shape) == 3
660
+ assert len(pred_atom_mask.shape) == 2
661
+ assert len(residue_index.shape) == 1
662
+ assert len(aatype.shape) == 1
663
+
664
+ # Get the positions of the relevant backbone atoms.
665
+ this_ca_pos = pred_atom_positions[:-1, 1, :] # (N - 1, 3)
666
+ this_ca_mask = pred_atom_mask[:-1, 1] # (N - 1)
667
+ this_c_pos = pred_atom_positions[:-1, 2, :] # (N - 1, 3)
668
+ this_c_mask = pred_atom_mask[:-1, 2] # (N - 1)
669
+ next_n_pos = pred_atom_positions[1:, 0, :] # (N - 1, 3)
670
+ next_n_mask = pred_atom_mask[1:, 0] # (N - 1)
671
+ next_ca_pos = pred_atom_positions[1:, 1, :] # (N - 1, 3)
672
+ next_ca_mask = pred_atom_mask[1:, 1] # (N - 1)
673
+ has_no_gap_mask = ((residue_index[1:] - residue_index[:-1]) == 1.0).astype(
674
+ jnp.float32)
675
+
676
+ # Compute loss for the C--N bond.
677
+ c_n_bond_length = jnp.sqrt(
678
+ 1e-6 + jnp.sum(squared_difference(this_c_pos, next_n_pos), axis=-1))
679
+
680
+ # The C-N bond to proline has slightly different length because of the ring.
681
+ next_is_proline = (
682
+ aatype[1:] == residue_constants.resname_to_idx['PRO']).astype(jnp.float32)
683
+ gt_length = (
684
+ (1. - next_is_proline) * residue_constants.between_res_bond_length_c_n[0]
685
+ + next_is_proline * residue_constants.between_res_bond_length_c_n[1])
686
+ gt_stddev = (
687
+ (1. - next_is_proline) *
688
+ residue_constants.between_res_bond_length_stddev_c_n[0] +
689
+ next_is_proline * residue_constants.between_res_bond_length_stddev_c_n[1])
690
+ c_n_bond_length_error = jnp.sqrt(1e-6 +
691
+ jnp.square(c_n_bond_length - gt_length))
692
+ c_n_loss_per_residue = jax.nn.relu(
693
+ c_n_bond_length_error - tolerance_factor_soft * gt_stddev)
694
+ mask = this_c_mask * next_n_mask * has_no_gap_mask
695
+ c_n_loss = jnp.sum(mask * c_n_loss_per_residue) / (jnp.sum(mask) + 1e-6)
696
+ c_n_violation_mask = mask * (
697
+ c_n_bond_length_error > (tolerance_factor_hard * gt_stddev))
698
+
699
+ # Compute loss for the angles.
700
+ ca_c_bond_length = jnp.sqrt(1e-6 + jnp.sum(
701
+ squared_difference(this_ca_pos, this_c_pos), axis=-1))
702
+ n_ca_bond_length = jnp.sqrt(1e-6 + jnp.sum(
703
+ squared_difference(next_n_pos, next_ca_pos), axis=-1))
704
+
705
+ c_ca_unit_vec = (this_ca_pos - this_c_pos) / ca_c_bond_length[:, None]
706
+ c_n_unit_vec = (next_n_pos - this_c_pos) / c_n_bond_length[:, None]
707
+ n_ca_unit_vec = (next_ca_pos - next_n_pos) / n_ca_bond_length[:, None]
708
+
709
+ ca_c_n_cos_angle = jnp.sum(c_ca_unit_vec * c_n_unit_vec, axis=-1)
710
+ gt_angle = residue_constants.between_res_cos_angles_ca_c_n[0]
711
+ gt_stddev = residue_constants.between_res_bond_length_stddev_c_n[0]
712
+ ca_c_n_cos_angle_error = jnp.sqrt(
713
+ 1e-6 + jnp.square(ca_c_n_cos_angle - gt_angle))
714
+ ca_c_n_loss_per_residue = jax.nn.relu(
715
+ ca_c_n_cos_angle_error - tolerance_factor_soft * gt_stddev)
716
+ mask = this_ca_mask * this_c_mask * next_n_mask * has_no_gap_mask
717
+ ca_c_n_loss = jnp.sum(mask * ca_c_n_loss_per_residue) / (jnp.sum(mask) + 1e-6)
718
+ ca_c_n_violation_mask = mask * (ca_c_n_cos_angle_error >
719
+ (tolerance_factor_hard * gt_stddev))
720
+
721
+ c_n_ca_cos_angle = jnp.sum((-c_n_unit_vec) * n_ca_unit_vec, axis=-1)
722
+ gt_angle = residue_constants.between_res_cos_angles_c_n_ca[0]
723
+ gt_stddev = residue_constants.between_res_cos_angles_c_n_ca[1]
724
+ c_n_ca_cos_angle_error = jnp.sqrt(
725
+ 1e-6 + jnp.square(c_n_ca_cos_angle - gt_angle))
726
+ c_n_ca_loss_per_residue = jax.nn.relu(
727
+ c_n_ca_cos_angle_error - tolerance_factor_soft * gt_stddev)
728
+ mask = this_c_mask * next_n_mask * next_ca_mask * has_no_gap_mask
729
+ c_n_ca_loss = jnp.sum(mask * c_n_ca_loss_per_residue) / (jnp.sum(mask) + 1e-6)
730
+ c_n_ca_violation_mask = mask * (
731
+ c_n_ca_cos_angle_error > (tolerance_factor_hard * gt_stddev))
732
+
733
+ # Compute a per residue loss (equally distribute the loss to both
734
+ # neighbouring residues).
735
+ per_residue_loss_sum = (c_n_loss_per_residue +
736
+ ca_c_n_loss_per_residue +
737
+ c_n_ca_loss_per_residue)
738
+ per_residue_loss_sum = 0.5 * (jnp.pad(per_residue_loss_sum, [[0, 1]]) +
739
+ jnp.pad(per_residue_loss_sum, [[1, 0]]))
740
+
741
+ # Compute hard violations.
742
+ violation_mask = jnp.max(
743
+ jnp.stack([c_n_violation_mask,
744
+ ca_c_n_violation_mask,
745
+ c_n_ca_violation_mask]), axis=0)
746
+ violation_mask = jnp.maximum(
747
+ jnp.pad(violation_mask, [[0, 1]]),
748
+ jnp.pad(violation_mask, [[1, 0]]))
749
+
750
+ return {'c_n_loss_mean': c_n_loss, # shape ()
751
+ 'ca_c_n_loss_mean': ca_c_n_loss, # shape ()
752
+ 'c_n_ca_loss_mean': c_n_ca_loss, # shape ()
753
+ 'per_residue_loss_sum': per_residue_loss_sum, # shape (N)
754
+ 'per_residue_violation_mask': violation_mask # shape (N)
755
+ }
756
+
757
+
758
+ def between_residue_clash_loss(
759
+ atom14_pred_positions: jnp.ndarray, # (N, 14, 3)
760
+ atom14_atom_exists: jnp.ndarray, # (N, 14)
761
+ atom14_atom_radius: jnp.ndarray, # (N, 14)
762
+ residue_index: jnp.ndarray, # (N)
763
+ overlap_tolerance_soft=1.5,
764
+ overlap_tolerance_hard=1.5
765
+ ) -> Dict[str, jnp.ndarray]:
766
+ """Loss to penalize steric clashes between residues.
767
+
768
+ This is a loss penalizing any steric clashes due to non bonded atoms in
769
+ different peptides coming too close. This loss corresponds to the part with
770
+ different residues of
771
+ Jumper et al. (2021) Suppl. Sec. 1.9.11, eq 46.
772
+
773
+ Args:
774
+ atom14_pred_positions: Predicted positions of atoms in
775
+ global prediction frame
776
+ atom14_atom_exists: Mask denoting whether atom at positions exists for given
777
+ amino acid type
778
+ atom14_atom_radius: Van der Waals radius for each atom.
779
+ residue_index: Residue index for given amino acid.
780
+ overlap_tolerance_soft: Soft tolerance factor.
781
+ overlap_tolerance_hard: Hard tolerance factor.
782
+
783
+ Returns:
784
+ Dict containing:
785
+ * 'mean_loss': average clash loss
786
+ * 'per_atom_loss_sum': sum of all clash losses per atom, shape (N, 14)
787
+ * 'per_atom_clash_mask': mask whether atom clashes with any other atom
788
+ shape (N, 14)
789
+ """
790
+ assert len(atom14_pred_positions.shape) == 3
791
+ assert len(atom14_atom_exists.shape) == 2
792
+ assert len(atom14_atom_radius.shape) == 2
793
+ assert len(residue_index.shape) == 1
794
+
795
+ # Create the distance matrix.
796
+ # (N, N, 14, 14)
797
+ dists = jnp.sqrt(1e-10 + jnp.sum(
798
+ squared_difference(
799
+ atom14_pred_positions[:, None, :, None, :],
800
+ atom14_pred_positions[None, :, None, :, :]),
801
+ axis=-1))
802
+
803
+ # Create the mask for valid distances.
804
+ # shape (N, N, 14, 14)
805
+ dists_mask = (atom14_atom_exists[:, None, :, None] *
806
+ atom14_atom_exists[None, :, None, :])
807
+
808
+ # Mask out all the duplicate entries in the lower triangular matrix.
809
+ # Also mask out the diagonal (atom-pairs from the same residue) -- these atoms
810
+ # are handled separately.
811
+ dists_mask *= (
812
+ residue_index[:, None, None, None] < residue_index[None, :, None, None])
813
+
814
+ # Backbone C--N bond between subsequent residues is no clash.
815
+ c_one_hot = jax.nn.one_hot(2, num_classes=14)
816
+ n_one_hot = jax.nn.one_hot(0, num_classes=14)
817
+ neighbour_mask = ((residue_index[:, None, None, None] +
818
+ 1) == residue_index[None, :, None, None])
819
+ c_n_bonds = neighbour_mask * c_one_hot[None, None, :,
820
+ None] * n_one_hot[None, None, None, :]
821
+ dists_mask *= (1. - c_n_bonds)
822
+
823
+ # Disulfide bridge between two cysteines is no clash.
824
+ cys_sg_idx = residue_constants.restype_name_to_atom14_names['CYS'].index('SG')
825
+ cys_sg_one_hot = jax.nn.one_hot(cys_sg_idx, num_classes=14)
826
+ disulfide_bonds = (cys_sg_one_hot[None, None, :, None] *
827
+ cys_sg_one_hot[None, None, None, :])
828
+ dists_mask *= (1. - disulfide_bonds)
829
+
830
+ # Compute the lower bound for the allowed distances.
831
+ # shape (N, N, 14, 14)
832
+ dists_lower_bound = dists_mask * (atom14_atom_radius[:, None, :, None] +
833
+ atom14_atom_radius[None, :, None, :])
834
+
835
+ # Compute the error.
836
+ # shape (N, N, 14, 14)
837
+ dists_to_low_error = dists_mask * jax.nn.relu(
838
+ dists_lower_bound - overlap_tolerance_soft - dists)
839
+
840
+ # Compute the mean loss.
841
+ # shape ()
842
+ mean_loss = (jnp.sum(dists_to_low_error)
843
+ / (1e-6 + jnp.sum(dists_mask)))
844
+
845
+ # Compute the per atom loss sum.
846
+ # shape (N, 14)
847
+ per_atom_loss_sum = (jnp.sum(dists_to_low_error, axis=[0, 2]) +
848
+ jnp.sum(dists_to_low_error, axis=[1, 3]))
849
+
850
+ # Compute the hard clash mask.
851
+ # shape (N, N, 14, 14)
852
+ clash_mask = dists_mask * (
853
+ dists < (dists_lower_bound - overlap_tolerance_hard))
854
+
855
+ # Compute the per atom clash.
856
+ # shape (N, 14)
857
+ per_atom_clash_mask = jnp.maximum(
858
+ jnp.max(clash_mask, axis=[0, 2]),
859
+ jnp.max(clash_mask, axis=[1, 3]))
860
+
861
+ return {'mean_loss': mean_loss, # shape ()
862
+ 'per_atom_loss_sum': per_atom_loss_sum, # shape (N, 14)
863
+ 'per_atom_clash_mask': per_atom_clash_mask # shape (N, 14)
864
+ }
865
+
866
+
867
+ def within_residue_violations(
868
+ atom14_pred_positions: jnp.ndarray, # (N, 14, 3)
869
+ atom14_atom_exists: jnp.ndarray, # (N, 14)
870
+ atom14_dists_lower_bound: jnp.ndarray, # (N, 14, 14)
871
+ atom14_dists_upper_bound: jnp.ndarray, # (N, 14, 14)
872
+ tighten_bounds_for_loss=0.0,
873
+ ) -> Dict[str, jnp.ndarray]:
874
+ """Loss to penalize steric clashes within residues.
875
+
876
+ This is a loss penalizing any steric violations or clashes of non-bonded atoms
877
+ in a given peptide. This loss corresponds to the part with
878
+ the same residues of
879
+ Jumper et al. (2021) Suppl. Sec. 1.9.11, eq 46.
880
+
881
+ Args:
882
+ atom14_pred_positions: Predicted positions of atoms in
883
+ global prediction frame
884
+ atom14_atom_exists: Mask denoting whether atom at positions exists for given
885
+ amino acid type
886
+ atom14_dists_lower_bound: Lower bound on allowed distances.
887
+ atom14_dists_upper_bound: Upper bound on allowed distances
888
+ tighten_bounds_for_loss: Extra factor to tighten loss
889
+
890
+ Returns:
891
+ Dict containing:
892
+ * 'per_atom_loss_sum': sum of all clash losses per atom, shape (N, 14)
893
+ * 'per_atom_clash_mask': mask whether atom clashes with any other atom
894
+ shape (N, 14)
895
+ """
896
+ assert len(atom14_pred_positions.shape) == 3
897
+ assert len(atom14_atom_exists.shape) == 2
898
+ assert len(atom14_dists_lower_bound.shape) == 3
899
+ assert len(atom14_dists_upper_bound.shape) == 3
900
+
901
+ # Compute the mask for each residue.
902
+ # shape (N, 14, 14)
903
+ dists_masks = (1. - jnp.eye(14, 14)[None])
904
+ dists_masks *= (atom14_atom_exists[:, :, None] *
905
+ atom14_atom_exists[:, None, :])
906
+
907
+ # Distance matrix
908
+ # shape (N, 14, 14)
909
+ dists = jnp.sqrt(1e-10 + jnp.sum(
910
+ squared_difference(
911
+ atom14_pred_positions[:, :, None, :],
912
+ atom14_pred_positions[:, None, :, :]),
913
+ axis=-1))
914
+
915
+ # Compute the loss.
916
+ # shape (N, 14, 14)
917
+ dists_to_low_error = jax.nn.relu(
918
+ atom14_dists_lower_bound + tighten_bounds_for_loss - dists)
919
+ dists_to_high_error = jax.nn.relu(
920
+ dists - (atom14_dists_upper_bound - tighten_bounds_for_loss))
921
+ loss = dists_masks * (dists_to_low_error + dists_to_high_error)
922
+
923
+ # Compute the per atom loss sum.
924
+ # shape (N, 14)
925
+ per_atom_loss_sum = (jnp.sum(loss, axis=1) +
926
+ jnp.sum(loss, axis=2))
927
+
928
+ # Compute the violations mask.
929
+ # shape (N, 14, 14)
930
+ violations = dists_masks * ((dists < atom14_dists_lower_bound) |
931
+ (dists > atom14_dists_upper_bound))
932
+
933
+ # Compute the per atom violations.
934
+ # shape (N, 14)
935
+ per_atom_violations = jnp.maximum(
936
+ jnp.max(violations, axis=1), jnp.max(violations, axis=2))
937
+
938
+ return {'per_atom_loss_sum': per_atom_loss_sum, # shape (N, 14)
939
+ 'per_atom_violations': per_atom_violations # shape (N, 14)
940
+ }
941
+
942
+
943
+ def find_optimal_renaming(
944
+ atom14_gt_positions: jnp.ndarray, # (N, 14, 3)
945
+ atom14_alt_gt_positions: jnp.ndarray, # (N, 14, 3)
946
+ atom14_atom_is_ambiguous: jnp.ndarray, # (N, 14)
947
+ atom14_gt_exists: jnp.ndarray, # (N, 14)
948
+ atom14_pred_positions: jnp.ndarray, # (N, 14, 3)
949
+ atom14_atom_exists: jnp.ndarray, # (N, 14)
950
+ ) -> jnp.ndarray: # (N):
951
+ """Find optimal renaming for ground truth that maximizes LDDT.
952
+
953
+ Jumper et al. (2021) Suppl. Alg. 26
954
+ "renameSymmetricGroundTruthAtoms" lines 1-5
955
+
956
+ Args:
957
+ atom14_gt_positions: Ground truth positions in global frame of ground truth.
958
+ atom14_alt_gt_positions: Alternate ground truth positions in global frame of
959
+ ground truth with coordinates of ambiguous atoms swapped relative to
960
+ 'atom14_gt_positions'.
961
+ atom14_atom_is_ambiguous: Mask denoting whether atom is among ambiguous
962
+ atoms, see Jumper et al. (2021) Suppl. Table 3
963
+ atom14_gt_exists: Mask denoting whether atom at positions exists in ground
964
+ truth.
965
+ atom14_pred_positions: Predicted positions of atoms in
966
+ global prediction frame
967
+ atom14_atom_exists: Mask denoting whether atom at positions exists for given
968
+ amino acid type
969
+
970
+ Returns:
971
+ Float array of shape [N] with 1. where atom14_alt_gt_positions is closer to
972
+ prediction and 0. otherwise
973
+ """
974
+ assert len(atom14_gt_positions.shape) == 3
975
+ assert len(atom14_alt_gt_positions.shape) == 3
976
+ assert len(atom14_atom_is_ambiguous.shape) == 2
977
+ assert len(atom14_gt_exists.shape) == 2
978
+ assert len(atom14_pred_positions.shape) == 3
979
+ assert len(atom14_atom_exists.shape) == 2
980
+
981
+ # Create the pred distance matrix.
982
+ # shape (N, N, 14, 14)
983
+ pred_dists = jnp.sqrt(1e-10 + jnp.sum(
984
+ squared_difference(
985
+ atom14_pred_positions[:, None, :, None, :],
986
+ atom14_pred_positions[None, :, None, :, :]),
987
+ axis=-1))
988
+
989
+ # Compute distances for ground truth with original and alternative names.
990
+ # shape (N, N, 14, 14)
991
+ gt_dists = jnp.sqrt(1e-10 + jnp.sum(
992
+ squared_difference(
993
+ atom14_gt_positions[:, None, :, None, :],
994
+ atom14_gt_positions[None, :, None, :, :]),
995
+ axis=-1))
996
+ alt_gt_dists = jnp.sqrt(1e-10 + jnp.sum(
997
+ squared_difference(
998
+ atom14_alt_gt_positions[:, None, :, None, :],
999
+ atom14_alt_gt_positions[None, :, None, :, :]),
1000
+ axis=-1))
1001
+
1002
+ # Compute LDDT's.
1003
+ # shape (N, N, 14, 14)
1004
+ lddt = jnp.sqrt(1e-10 + squared_difference(pred_dists, gt_dists))
1005
+ alt_lddt = jnp.sqrt(1e-10 + squared_difference(pred_dists, alt_gt_dists))
1006
+
1007
+ # Create a mask for ambiguous atoms in rows vs. non-ambiguous atoms
1008
+ # in cols.
1009
+ # shape (N ,N, 14, 14)
1010
+ mask = (atom14_gt_exists[:, None, :, None] * # rows
1011
+ atom14_atom_is_ambiguous[:, None, :, None] * # rows
1012
+ atom14_gt_exists[None, :, None, :] * # cols
1013
+ (1. - atom14_atom_is_ambiguous[None, :, None, :])) # cols
1014
+
1015
+ # Aggregate distances for each residue to the non-amibuguous atoms.
1016
+ # shape (N)
1017
+ per_res_lddt = jnp.sum(mask * lddt, axis=[1, 2, 3])
1018
+ alt_per_res_lddt = jnp.sum(mask * alt_lddt, axis=[1, 2, 3])
1019
+
1020
+ # Decide for each residue, whether alternative naming is better.
1021
+ # shape (N)
1022
+ alt_naming_is_better = (alt_per_res_lddt < per_res_lddt).astype(jnp.float32)
1023
+
1024
+ return alt_naming_is_better # shape (N)
1025
+
1026
+
1027
+ def frame_aligned_point_error(
1028
+ pred_frames: r3.Rigids, # shape (num_frames)
1029
+ target_frames: r3.Rigids, # shape (num_frames)
1030
+ frames_mask: jnp.ndarray, # shape (num_frames)
1031
+ pred_positions: r3.Vecs, # shape (num_positions)
1032
+ target_positions: r3.Vecs, # shape (num_positions)
1033
+ positions_mask: jnp.ndarray, # shape (num_positions)
1034
+ length_scale: float,
1035
+ l1_clamp_distance: Optional[float] = None,
1036
+ epsilon=1e-4) -> jnp.ndarray: # shape ()
1037
+ """Measure point error under different alignments.
1038
+
1039
+ Jumper et al. (2021) Suppl. Alg. 28 "computeFAPE"
1040
+
1041
+ Computes error between two structures with B points under A alignments derived
1042
+ from the given pairs of frames.
1043
+ Args:
1044
+ pred_frames: num_frames reference frames for 'pred_positions'.
1045
+ target_frames: num_frames reference frames for 'target_positions'.
1046
+ frames_mask: Mask for frame pairs to use.
1047
+ pred_positions: num_positions predicted positions of the structure.
1048
+ target_positions: num_positions target positions of the structure.
1049
+ positions_mask: Mask on which positions to score.
1050
+ length_scale: length scale to divide loss by.
1051
+ l1_clamp_distance: Distance cutoff on error beyond which gradients will
1052
+ be zero.
1053
+ epsilon: small value used to regularize denominator for masked average.
1054
+ Returns:
1055
+ Masked Frame Aligned Point Error.
1056
+ """
1057
+ assert pred_frames.rot.xx.ndim == 1
1058
+ assert target_frames.rot.xx.ndim == 1
1059
+ assert frames_mask.ndim == 1, frames_mask.ndim
1060
+ assert pred_positions.x.ndim == 1
1061
+ assert target_positions.x.ndim == 1
1062
+ assert positions_mask.ndim == 1
1063
+
1064
+ # Compute array of predicted positions in the predicted frames.
1065
+ # r3.Vecs (num_frames, num_positions)
1066
+ local_pred_pos = r3.rigids_mul_vecs(
1067
+ jax.tree_map(lambda r: r[:, None], r3.invert_rigids(pred_frames)),
1068
+ jax.tree_map(lambda x: x[None, :], pred_positions))
1069
+
1070
+ # Compute array of target positions in the target frames.
1071
+ # r3.Vecs (num_frames, num_positions)
1072
+ local_target_pos = r3.rigids_mul_vecs(
1073
+ jax.tree_map(lambda r: r[:, None], r3.invert_rigids(target_frames)),
1074
+ jax.tree_map(lambda x: x[None, :], target_positions))
1075
+
1076
+ # Compute errors between the structures.
1077
+ # jnp.ndarray (num_frames, num_positions)
1078
+ error_dist = jnp.sqrt(
1079
+ r3.vecs_squared_distance(local_pred_pos, local_target_pos)
1080
+ + epsilon)
1081
+
1082
+ if l1_clamp_distance:
1083
+ error_dist = jnp.clip(error_dist, 0, l1_clamp_distance)
1084
+
1085
+ normed_error = error_dist / length_scale
1086
+ normed_error *= jnp.expand_dims(frames_mask, axis=-1)
1087
+ normed_error *= jnp.expand_dims(positions_mask, axis=-2)
1088
+
1089
+ normalization_factor = (
1090
+ jnp.sum(frames_mask, axis=-1) *
1091
+ jnp.sum(positions_mask, axis=-1))
1092
+ return (jnp.sum(normed_error, axis=(-2, -1)) /
1093
+ (epsilon + normalization_factor))
1094
+
1095
+
1096
+ def _make_renaming_matrices():
1097
+ """Matrices to map atoms to symmetry partners in ambiguous case."""
1098
+ # As the atom naming is ambiguous for 7 of the 20 amino acids, provide
1099
+ # alternative groundtruth coordinates where the naming is swapped
1100
+ restype_3 = [
1101
+ residue_constants.restype_1to3[res] for res in residue_constants.restypes
1102
+ ]
1103
+ restype_3 += ['UNK']
1104
+ # Matrices for renaming ambiguous atoms.
1105
+ all_matrices = {res: np.eye(14, dtype=np.float32) for res in restype_3}
1106
+ for resname, swap in residue_constants.residue_atom_renaming_swaps.items():
1107
+ correspondences = np.arange(14)
1108
+ for source_atom_swap, target_atom_swap in swap.items():
1109
+ source_index = residue_constants.restype_name_to_atom14_names[
1110
+ resname].index(source_atom_swap)
1111
+ target_index = residue_constants.restype_name_to_atom14_names[
1112
+ resname].index(target_atom_swap)
1113
+ correspondences[source_index] = target_index
1114
+ correspondences[target_index] = source_index
1115
+ renaming_matrix = np.zeros((14, 14), dtype=np.float32)
1116
+ for index, correspondence in enumerate(correspondences):
1117
+ renaming_matrix[index, correspondence] = 1.
1118
+ all_matrices[resname] = renaming_matrix.astype(np.float32)
1119
+ renaming_matrices = np.stack([all_matrices[restype] for restype in restype_3])
1120
+ return renaming_matrices
1121
+
1122
+
1123
+ RENAMING_MATRICES = _make_renaming_matrices()
1124
+
1125
+
1126
+ def get_alt_atom14(aatype, positions, mask):
1127
+ """Get alternative atom14 positions.
1128
+
1129
+ Constructs renamed atom positions for ambiguous residues.
1130
+
1131
+ Jumper et al. (2021) Suppl. Table 3 "Ambiguous atom names due to 180 degree-
1132
+ rotation-symmetry"
1133
+
1134
+ Args:
1135
+ aatype: Amino acid at given position
1136
+ positions: Atom positions as r3.Vecs in atom14 representation, (N, 14)
1137
+ mask: Atom masks in atom14 representation, (N, 14)
1138
+ Returns:
1139
+ renamed atom positions, renamed atom mask
1140
+ """
1141
+ # pick the transformation matrices for the given residue sequence
1142
+ # shape (num_res, 14, 14)
1143
+ renaming_transform = utils.batched_gather(
1144
+ jnp.asarray(RENAMING_MATRICES), aatype)
1145
+
1146
+ positions = jax.tree_map(lambda x: x[:, :, None], positions)
1147
+ alternative_positions = jax.tree_map(
1148
+ lambda x: jnp.sum(x, axis=1), positions * renaming_transform)
1149
+
1150
+ # Create the mask for the alternative ground truth (differs from the
1151
+ # ground truth mask, if only one of the atoms in an ambiguous pair has a
1152
+ # ground truth position)
1153
+ alternative_mask = jnp.sum(mask[..., None] * renaming_transform, axis=1)
1154
+
1155
+ return alternative_positions, alternative_mask
af_backprop/alphafold/model/common_modules.py ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2021 DeepMind Technologies Limited
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ """A collection of common Haiku modules for use in protein folding."""
16
+ import haiku as hk
17
+ import jax.numpy as jnp
18
+
19
+
20
+ class Linear(hk.Module):
21
+ """Protein folding specific Linear Module.
22
+
23
+ This differs from the standard Haiku Linear in a few ways:
24
+ * It supports inputs of arbitrary rank
25
+ * Initializers are specified by strings
26
+ """
27
+
28
+ def __init__(self,
29
+ num_output: int,
30
+ initializer: str = 'linear',
31
+ use_bias: bool = True,
32
+ bias_init: float = 0.,
33
+ name: str = 'linear'):
34
+ """Constructs Linear Module.
35
+
36
+ Args:
37
+ num_output: number of output channels.
38
+ initializer: What initializer to use, should be one of {'linear', 'relu',
39
+ 'zeros'}
40
+ use_bias: Whether to include trainable bias
41
+ bias_init: Value used to initialize bias.
42
+ name: name of module, used for name scopes.
43
+ """
44
+
45
+ super().__init__(name=name)
46
+ self.num_output = num_output
47
+ self.initializer = initializer
48
+ self.use_bias = use_bias
49
+ self.bias_init = bias_init
50
+
51
+ def __call__(self, inputs: jnp.ndarray) -> jnp.ndarray:
52
+ """Connects Module.
53
+
54
+ Args:
55
+ inputs: Tensor of shape [..., num_channel]
56
+
57
+ Returns:
58
+ output of shape [..., num_output]
59
+ """
60
+ n_channels = int(inputs.shape[-1])
61
+
62
+ weight_shape = [n_channels, self.num_output]
63
+ if self.initializer == 'linear':
64
+ weight_init = hk.initializers.VarianceScaling(mode='fan_in', scale=1.)
65
+ elif self.initializer == 'relu':
66
+ weight_init = hk.initializers.VarianceScaling(mode='fan_in', scale=2.)
67
+ elif self.initializer == 'zeros':
68
+ weight_init = hk.initializers.Constant(0.0)
69
+
70
+ weights = hk.get_parameter('weights', weight_shape, inputs.dtype,
71
+ weight_init)
72
+
73
+ # this is equivalent to einsum('...c,cd->...d', inputs, weights)
74
+ # but turns out to be slightly faster
75
+ inputs = jnp.swapaxes(inputs, -1, -2)
76
+ output = jnp.einsum('...cb,cd->...db', inputs, weights)
77
+ output = jnp.swapaxes(output, -1, -2)
78
+
79
+ if self.use_bias:
80
+ bias = hk.get_parameter('bias', [self.num_output], inputs.dtype,
81
+ hk.initializers.Constant(self.bias_init))
82
+ output += bias
83
+
84
+ return output
af_backprop/alphafold/model/config.py ADDED
@@ -0,0 +1,412 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2021 DeepMind Technologies Limited
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ """Model config."""
15
+
16
+ import copy
17
+ from alphafold.model.tf import shape_placeholders
18
+ import ml_collections
19
+
20
+
21
+ NUM_RES = shape_placeholders.NUM_RES
22
+ NUM_MSA_SEQ = shape_placeholders.NUM_MSA_SEQ
23
+ NUM_EXTRA_SEQ = shape_placeholders.NUM_EXTRA_SEQ
24
+ NUM_TEMPLATES = shape_placeholders.NUM_TEMPLATES
25
+
26
+
27
+ def model_config(name: str) -> ml_collections.ConfigDict:
28
+ """Get the ConfigDict of a CASP14 model."""
29
+
30
+ if name not in CONFIG_DIFFS:
31
+ raise ValueError(f'Invalid model name {name}.')
32
+ cfg = copy.deepcopy(CONFIG)
33
+ cfg.update_from_flattened_dict(CONFIG_DIFFS[name])
34
+ return cfg
35
+
36
+
37
+ CONFIG_DIFFS = {
38
+ 'model_1': {
39
+ # Jumper et al. (2021) Suppl. Table 5, Model 1.1.1
40
+ 'data.common.max_extra_msa': 5120,
41
+ 'data.common.reduce_msa_clusters_by_max_templates': True,
42
+ 'data.common.use_templates': True,
43
+ 'model.embeddings_and_evoformer.template.embed_torsion_angles': True,
44
+ 'model.embeddings_and_evoformer.template.enabled': True
45
+ },
46
+ 'model_2': {
47
+ # Jumper et al. (2021) Suppl. Table 5, Model 1.1.2
48
+ 'data.common.reduce_msa_clusters_by_max_templates': True,
49
+ 'data.common.use_templates': True,
50
+ 'model.embeddings_and_evoformer.template.embed_torsion_angles': True,
51
+ 'model.embeddings_and_evoformer.template.enabled': True
52
+ },
53
+ 'model_3': {
54
+ # Jumper et al. (2021) Suppl. Table 5, Model 1.2.1
55
+ 'data.common.max_extra_msa': 5120,
56
+ },
57
+ 'model_4': {
58
+ # Jumper et al. (2021) Suppl. Table 5, Model 1.2.2
59
+ 'data.common.max_extra_msa': 5120,
60
+ },
61
+ 'model_5': {
62
+ # Jumper et al. (2021) Suppl. Table 5, Model 1.2.3
63
+ },
64
+
65
+ # The following models are fine-tuned from the corresponding models above
66
+ # with an additional predicted_aligned_error head that can produce
67
+ # predicted TM-score (pTM) and predicted aligned errors.
68
+ 'model_1_ptm': {
69
+ 'data.common.max_extra_msa': 5120,
70
+ 'data.common.reduce_msa_clusters_by_max_templates': True,
71
+ 'data.common.use_templates': True,
72
+ 'model.embeddings_and_evoformer.template.embed_torsion_angles': True,
73
+ 'model.embeddings_and_evoformer.template.enabled': True,
74
+ 'model.heads.predicted_aligned_error.weight': 0.1
75
+ },
76
+ 'model_2_ptm': {
77
+ 'data.common.reduce_msa_clusters_by_max_templates': True,
78
+ 'data.common.use_templates': True,
79
+ 'model.embeddings_and_evoformer.template.embed_torsion_angles': True,
80
+ 'model.embeddings_and_evoformer.template.enabled': True,
81
+ 'model.heads.predicted_aligned_error.weight': 0.1
82
+ },
83
+ 'model_3_ptm': {
84
+ 'data.common.max_extra_msa': 5120,
85
+ 'model.heads.predicted_aligned_error.weight': 0.1
86
+ },
87
+ 'model_4_ptm': {
88
+ 'data.common.max_extra_msa': 5120,
89
+ 'model.heads.predicted_aligned_error.weight': 0.1
90
+ },
91
+ 'model_5_ptm': {
92
+ 'model.heads.predicted_aligned_error.weight': 0.1
93
+ }
94
+ }
95
+
96
+ CONFIG = ml_collections.ConfigDict({
97
+ 'data': {
98
+ 'common': {
99
+ 'masked_msa': {
100
+ 'profile_prob': 0.1,
101
+ 'same_prob': 0.1,
102
+ 'uniform_prob': 0.1
103
+ },
104
+ 'max_extra_msa': 1024,
105
+ 'msa_cluster_features': True,
106
+ 'num_recycle': 3,
107
+ 'reduce_msa_clusters_by_max_templates': False,
108
+ 'resample_msa_in_recycling': True,
109
+ 'template_features': [
110
+ 'template_all_atom_positions', 'template_sum_probs',
111
+ 'template_aatype', 'template_all_atom_masks',
112
+ 'template_domain_names'
113
+ ],
114
+ 'unsupervised_features': [
115
+ 'aatype', 'residue_index', 'sequence', 'msa', 'domain_name',
116
+ 'num_alignments', 'seq_length', 'between_segment_residues',
117
+ 'deletion_matrix'
118
+ ],
119
+ 'use_templates': False,
120
+ },
121
+ 'eval': {
122
+ 'feat': {
123
+ 'aatype': [NUM_RES],
124
+ 'all_atom_mask': [NUM_RES, None],
125
+ 'all_atom_positions': [NUM_RES, None, None],
126
+ 'alt_chi_angles': [NUM_RES, None],
127
+ 'atom14_alt_gt_exists': [NUM_RES, None],
128
+ 'atom14_alt_gt_positions': [NUM_RES, None, None],
129
+ 'atom14_atom_exists': [NUM_RES, None],
130
+ 'atom14_atom_is_ambiguous': [NUM_RES, None],
131
+ 'atom14_gt_exists': [NUM_RES, None],
132
+ 'atom14_gt_positions': [NUM_RES, None, None],
133
+ 'atom37_atom_exists': [NUM_RES, None],
134
+ 'backbone_affine_mask': [NUM_RES],
135
+ 'backbone_affine_tensor': [NUM_RES, None],
136
+ 'bert_mask': [NUM_MSA_SEQ, NUM_RES],
137
+ 'chi_angles': [NUM_RES, None],
138
+ 'chi_mask': [NUM_RES, None],
139
+ 'extra_deletion_value': [NUM_EXTRA_SEQ, NUM_RES],
140
+ 'extra_has_deletion': [NUM_EXTRA_SEQ, NUM_RES],
141
+ 'extra_msa': [NUM_EXTRA_SEQ, NUM_RES],
142
+ 'extra_msa_mask': [NUM_EXTRA_SEQ, NUM_RES],
143
+ 'extra_msa_row_mask': [NUM_EXTRA_SEQ],
144
+ 'is_distillation': [],
145
+ 'msa_feat': [NUM_MSA_SEQ, NUM_RES, None],
146
+ 'msa_mask': [NUM_MSA_SEQ, NUM_RES],
147
+ 'msa_row_mask': [NUM_MSA_SEQ],
148
+ 'pseudo_beta': [NUM_RES, None],
149
+ 'pseudo_beta_mask': [NUM_RES],
150
+ 'random_crop_to_size_seed': [None],
151
+ 'residue_index': [NUM_RES],
152
+ 'residx_atom14_to_atom37': [NUM_RES, None],
153
+ 'residx_atom37_to_atom14': [NUM_RES, None],
154
+ 'resolution': [],
155
+ 'rigidgroups_alt_gt_frames': [NUM_RES, None, None],
156
+ 'rigidgroups_group_exists': [NUM_RES, None],
157
+ 'rigidgroups_group_is_ambiguous': [NUM_RES, None],
158
+ 'rigidgroups_gt_exists': [NUM_RES, None],
159
+ 'rigidgroups_gt_frames': [NUM_RES, None, None],
160
+ 'seq_length': [],
161
+ 'seq_mask': [NUM_RES],
162
+ 'target_feat': [NUM_RES, None],
163
+ 'template_aatype': [NUM_TEMPLATES, NUM_RES],
164
+ 'template_all_atom_masks': [NUM_TEMPLATES, NUM_RES, None],
165
+ 'template_all_atom_positions': [
166
+ NUM_TEMPLATES, NUM_RES, None, None],
167
+ 'template_backbone_affine_mask': [NUM_TEMPLATES, NUM_RES],
168
+ 'template_backbone_affine_tensor': [
169
+ NUM_TEMPLATES, NUM_RES, None],
170
+ 'template_mask': [NUM_TEMPLATES],
171
+ 'template_pseudo_beta': [NUM_TEMPLATES, NUM_RES, None],
172
+ 'template_pseudo_beta_mask': [NUM_TEMPLATES, NUM_RES],
173
+ 'template_sum_probs': [NUM_TEMPLATES, None],
174
+ 'true_msa': [NUM_MSA_SEQ, NUM_RES]
175
+ },
176
+ 'fixed_size': True,
177
+ 'subsample_templates': False, # We want top templates.
178
+ 'masked_msa_replace_fraction': 0.15,
179
+ 'max_msa_clusters': 512,
180
+ 'max_templates': 4,
181
+ 'num_ensemble': 1,
182
+ },
183
+ },
184
+ 'model': {
185
+ 'embeddings_and_evoformer': {
186
+ 'evoformer_num_block': 48,
187
+ 'evoformer': {
188
+ 'msa_row_attention_with_pair_bias': {
189
+ 'dropout_rate': 0.15,
190
+ 'gating': True,
191
+ 'num_head': 8,
192
+ 'orientation': 'per_row',
193
+ 'shared_dropout': True
194
+ },
195
+ 'msa_column_attention': {
196
+ 'dropout_rate': 0.0,
197
+ 'gating': True,
198
+ 'num_head': 8,
199
+ 'orientation': 'per_column',
200
+ 'shared_dropout': True
201
+ },
202
+ 'msa_transition': {
203
+ 'dropout_rate': 0.0,
204
+ 'num_intermediate_factor': 4,
205
+ 'orientation': 'per_row',
206
+ 'shared_dropout': True
207
+ },
208
+ 'outer_product_mean': {
209
+ 'chunk_size': 128,
210
+ 'dropout_rate': 0.0,
211
+ 'num_outer_channel': 32,
212
+ 'orientation': 'per_row',
213
+ 'shared_dropout': True
214
+ },
215
+ 'triangle_attention_starting_node': {
216
+ 'dropout_rate': 0.25,
217
+ 'gating': True,
218
+ 'num_head': 4,
219
+ 'orientation': 'per_row',
220
+ 'shared_dropout': True
221
+ },
222
+ 'triangle_attention_ending_node': {
223
+ 'dropout_rate': 0.25,
224
+ 'gating': True,
225
+ 'num_head': 4,
226
+ 'orientation': 'per_column',
227
+ 'shared_dropout': True
228
+ },
229
+ 'triangle_multiplication_outgoing': {
230
+ 'dropout_rate': 0.25,
231
+ 'equation': 'ikc,jkc->ijc',
232
+ 'num_intermediate_channel': 128,
233
+ 'orientation': 'per_row',
234
+ 'shared_dropout': True
235
+ },
236
+ 'triangle_multiplication_incoming': {
237
+ 'dropout_rate': 0.25,
238
+ 'equation': 'kjc,kic->ijc',
239
+ 'num_intermediate_channel': 128,
240
+ 'orientation': 'per_row',
241
+ 'shared_dropout': True
242
+ },
243
+ 'pair_transition': {
244
+ 'dropout_rate': 0.0,
245
+ 'num_intermediate_factor': 4,
246
+ 'orientation': 'per_row',
247
+ 'shared_dropout': True
248
+ }
249
+ },
250
+ 'extra_msa_channel': 64,
251
+ 'extra_msa_stack_num_block': 4,
252
+ 'max_relative_feature': 32,
253
+ 'custom_relative_features': False,
254
+ 'msa_channel': 256,
255
+ 'pair_channel': 128,
256
+ 'prev_pos': {
257
+ 'min_bin': 3.25,
258
+ 'max_bin': 20.75,
259
+ 'num_bins': 15
260
+ },
261
+ 'recycle_features': True,
262
+ 'recycle_pos': True,
263
+ 'recycle_dgram': False,
264
+ 'backprop_dgram': False,
265
+ 'backprop_dgram_temp': 1.0,
266
+ 'seq_channel': 384,
267
+ 'template': {
268
+ 'attention': {
269
+ 'gating': False,
270
+ 'key_dim': 64,
271
+ 'num_head': 4,
272
+ 'value_dim': 64
273
+ },
274
+ 'dgram_features': {
275
+ 'min_bin': 3.25,
276
+ 'max_bin': 50.75,
277
+ 'num_bins': 39
278
+ },
279
+ 'backprop_dgram': False,
280
+ 'backprop_dgram_temp': 1.0,
281
+ 'embed_torsion_angles': False,
282
+ 'enabled': False,
283
+ 'template_pair_stack': {
284
+ 'num_block': 2,
285
+ 'triangle_attention_starting_node': {
286
+ 'dropout_rate': 0.25,
287
+ 'gating': True,
288
+ 'key_dim': 64,
289
+ 'num_head': 4,
290
+ 'orientation': 'per_row',
291
+ 'shared_dropout': True,
292
+ 'value_dim': 64
293
+ },
294
+ 'triangle_attention_ending_node': {
295
+ 'dropout_rate': 0.25,
296
+ 'gating': True,
297
+ 'key_dim': 64,
298
+ 'num_head': 4,
299
+ 'orientation': 'per_column',
300
+ 'shared_dropout': True,
301
+ 'value_dim': 64
302
+ },
303
+ 'triangle_multiplication_outgoing': {
304
+ 'dropout_rate': 0.25,
305
+ 'equation': 'ikc,jkc->ijc',
306
+ 'num_intermediate_channel': 64,
307
+ 'orientation': 'per_row',
308
+ 'shared_dropout': True
309
+ },
310
+ 'triangle_multiplication_incoming': {
311
+ 'dropout_rate': 0.25,
312
+ 'equation': 'kjc,kic->ijc',
313
+ 'num_intermediate_channel': 64,
314
+ 'orientation': 'per_row',
315
+ 'shared_dropout': True
316
+ },
317
+ 'pair_transition': {
318
+ 'dropout_rate': 0.0,
319
+ 'num_intermediate_factor': 2,
320
+ 'orientation': 'per_row',
321
+ 'shared_dropout': True
322
+ }
323
+ },
324
+ 'max_templates': 4,
325
+ 'subbatch_size': 128,
326
+ 'use_template_unit_vector': False,
327
+ }
328
+ },
329
+ 'global_config': {
330
+ 'mixed_precision': False,
331
+ 'deterministic': False,
332
+ 'subbatch_size': 4,
333
+ 'use_remat': False,
334
+ 'zero_init': True
335
+ },
336
+ 'heads': {
337
+ 'distogram': {
338
+ 'first_break': 2.3125,
339
+ 'last_break': 21.6875,
340
+ 'num_bins': 64,
341
+ 'weight': 0.3
342
+ },
343
+ 'predicted_aligned_error': {
344
+ # `num_bins - 1` bins uniformly space the
345
+ # [0, max_error_bin A] range.
346
+ # The final bin covers [max_error_bin A, +infty]
347
+ # 31A gives bins with 0.5A width.
348
+ 'max_error_bin': 31.,
349
+ 'num_bins': 64,
350
+ 'num_channels': 128,
351
+ 'filter_by_resolution': True,
352
+ 'min_resolution': 0.1,
353
+ 'max_resolution': 3.0,
354
+ 'weight': 0.0,
355
+ },
356
+ 'experimentally_resolved': {
357
+ 'filter_by_resolution': True,
358
+ 'max_resolution': 3.0,
359
+ 'min_resolution': 0.1,
360
+ 'weight': 0.01
361
+ },
362
+ 'structure_module': {
363
+ 'num_layer': 8,
364
+ 'fape': {
365
+ 'clamp_distance': 10.0,
366
+ 'clamp_type': 'relu',
367
+ 'loss_unit_distance': 10.0
368
+ },
369
+ 'angle_norm_weight': 0.01,
370
+ 'chi_weight': 0.5,
371
+ 'clash_overlap_tolerance': 1.5,
372
+ 'compute_in_graph_metrics': True,
373
+ 'dropout': 0.1,
374
+ 'num_channel': 384,
375
+ 'num_head': 12,
376
+ 'num_layer_in_transition': 3,
377
+ 'num_point_qk': 4,
378
+ 'num_point_v': 8,
379
+ 'num_scalar_qk': 16,
380
+ 'num_scalar_v': 16,
381
+ 'position_scale': 10.0,
382
+ 'sidechain': {
383
+ 'atom_clamp_distance': 10.0,
384
+ 'num_channel': 128,
385
+ 'num_residual_block': 2,
386
+ 'weight_frac': 0.5,
387
+ 'length_scale': 10.,
388
+ },
389
+ 'structural_violation_loss_weight': 1.0,
390
+ 'violation_tolerance_factor': 12.0,
391
+ 'weight': 1.0
392
+ },
393
+ 'predicted_lddt': {
394
+ 'filter_by_resolution': True,
395
+ 'max_resolution': 3.0,
396
+ 'min_resolution': 0.1,
397
+ 'num_bins': 50,
398
+ 'num_channels': 128,
399
+ 'weight': 0.01
400
+ },
401
+ 'masked_msa': {
402
+ 'num_output': 23,
403
+ 'weight': 2.0
404
+ },
405
+ },
406
+ 'num_recycle': 3,
407
+ 'backprop_recycle': False,
408
+ 'resample_msa_in_recycling': True,
409
+ 'add_prev': False,
410
+ 'use_struct': True,
411
+ },
412
+ })
af_backprop/alphafold/model/data.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2021 DeepMind Technologies Limited
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ """Convenience functions for reading data."""
16
+
17
+ import io
18
+ import os
19
+ from typing import List
20
+ from alphafold.model import utils
21
+ import haiku as hk
22
+ import numpy as np
23
+ # Internal import (7716).
24
+
25
+
26
+ def casp_model_names(data_dir: str) -> List[str]:
27
+ params = os.listdir(os.path.join(data_dir, 'params'))
28
+ return [os.path.splitext(filename)[0] for filename in params]
29
+
30
+
31
+ def get_model_haiku_params(model_name: str, data_dir: str) -> hk.Params:
32
+ """Get the Haiku parameters from a model name."""
33
+
34
+ path = os.path.join(data_dir, 'params', f'params_{model_name}.npz')
35
+
36
+ with open(path, 'rb') as f:
37
+ params = np.load(io.BytesIO(f.read()), allow_pickle=False)
38
+
39
+ return utils.flat_params_to_haiku(params)
af_backprop/alphafold/model/features.py ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2021 DeepMind Technologies Limited
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ """Code to generate processed features."""
16
+ import copy
17
+ from typing import List, Mapping, Tuple
18
+ from alphafold.model.tf import input_pipeline
19
+ from alphafold.model.tf import proteins_dataset
20
+ import ml_collections
21
+ import numpy as np
22
+ import tensorflow.compat.v1 as tf
23
+
24
+ FeatureDict = Mapping[str, np.ndarray]
25
+
26
+
27
+ def make_data_config(
28
+ config: ml_collections.ConfigDict,
29
+ num_res: int,
30
+ ) -> Tuple[ml_collections.ConfigDict, List[str]]:
31
+ """Makes a data config for the input pipeline."""
32
+ cfg = copy.deepcopy(config.data)
33
+
34
+ feature_names = cfg.common.unsupervised_features
35
+ if cfg.common.use_templates:
36
+ feature_names += cfg.common.template_features
37
+
38
+ with cfg.unlocked():
39
+ cfg.eval.crop_size = num_res
40
+
41
+ return cfg, feature_names
42
+
43
+
44
+ def tf_example_to_features(tf_example: tf.train.Example,
45
+ config: ml_collections.ConfigDict,
46
+ random_seed: int = 0) -> FeatureDict:
47
+ """Converts tf_example to numpy feature dictionary."""
48
+ num_res = int(tf_example.features.feature['seq_length'].int64_list.value[0])
49
+ cfg, feature_names = make_data_config(config, num_res=num_res)
50
+
51
+ if 'deletion_matrix_int' in set(tf_example.features.feature):
52
+ deletion_matrix_int = (
53
+ tf_example.features.feature['deletion_matrix_int'].int64_list.value)
54
+ feat = tf.train.Feature(float_list=tf.train.FloatList(
55
+ value=map(float, deletion_matrix_int)))
56
+ tf_example.features.feature['deletion_matrix'].CopyFrom(feat)
57
+ del tf_example.features.feature['deletion_matrix_int']
58
+
59
+ tf_graph = tf.Graph()
60
+ with tf_graph.as_default(), tf.device('/device:CPU:0'):
61
+ tf.compat.v1.set_random_seed(random_seed)
62
+ tensor_dict = proteins_dataset.create_tensor_dict(
63
+ raw_data=tf_example.SerializeToString(),
64
+ features=feature_names)
65
+ processed_batch = input_pipeline.process_tensors_from_config(
66
+ tensor_dict, cfg)
67
+
68
+ tf_graph.finalize()
69
+
70
+ with tf.Session(graph=tf_graph) as sess:
71
+ features = sess.run(processed_batch)
72
+
73
+ return {k: v for k, v in features.items() if v.dtype != 'O'}
74
+
75
+
76
+ def np_example_to_features(np_example: FeatureDict,
77
+ config: ml_collections.ConfigDict,
78
+ random_seed: int = 0) -> FeatureDict:
79
+ """Preprocesses NumPy feature dict using TF pipeline."""
80
+ np_example = dict(np_example)
81
+ num_res = int(np_example['seq_length'][0])
82
+ cfg, feature_names = make_data_config(config, num_res=num_res)
83
+
84
+ if 'deletion_matrix_int' in np_example:
85
+ np_example['deletion_matrix'] = (
86
+ np_example.pop('deletion_matrix_int').astype(np.float32))
87
+
88
+ tf_graph = tf.Graph()
89
+ with tf_graph.as_default(), tf.device('/device:CPU:0'):
90
+ tf.compat.v1.set_random_seed(random_seed)
91
+ tensor_dict = proteins_dataset.np_to_tensor_dict(
92
+ np_example=np_example, features=feature_names)
93
+
94
+ processed_batch = input_pipeline.process_tensors_from_config(
95
+ tensor_dict, cfg)
96
+
97
+ tf_graph.finalize()
98
+
99
+ with tf.Session(graph=tf_graph) as sess:
100
+ features = sess.run(processed_batch)
101
+
102
+ return {k: v for k, v in features.items() if v.dtype != 'O'}
af_backprop/alphafold/model/folding.py ADDED
@@ -0,0 +1,1016 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2021 DeepMind Technologies Limited
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ """Modules and utilities for the structure module."""
16
+
17
+ import functools
18
+ from typing import Dict
19
+ from alphafold.common import residue_constants
20
+ from alphafold.model import all_atom
21
+ from alphafold.model import common_modules
22
+ from alphafold.model import prng
23
+ from alphafold.model import quat_affine
24
+ from alphafold.model import r3
25
+ from alphafold.model import utils
26
+ import haiku as hk
27
+ import jax
28
+ import jax.numpy as jnp
29
+ import ml_collections
30
+ import numpy as np
31
+
32
+
33
+ def squared_difference(x, y):
34
+ return jnp.square(x - y)
35
+
36
+
37
+ class InvariantPointAttention(hk.Module):
38
+ """Invariant Point attention module.
39
+
40
+ The high-level idea is that this attention module works over a set of points
41
+ and associated orientations in 3D space (e.g. protein residues).
42
+
43
+ Each residue outputs a set of queries and keys as points in their local
44
+ reference frame. The attention is then defined as the euclidean distance
45
+ between the queries and keys in the global frame.
46
+
47
+ Jumper et al. (2021) Suppl. Alg. 22 "InvariantPointAttention"
48
+ """
49
+
50
+ def __init__(self,
51
+ config,
52
+ global_config,
53
+ dist_epsilon=1e-8,
54
+ name='invariant_point_attention'):
55
+ """Initialize.
56
+
57
+ Args:
58
+ config: Structure Module Config
59
+ global_config: Global Config of Model.
60
+ dist_epsilon: Small value to avoid NaN in distance calculation.
61
+ name: Haiku Module name.
62
+ """
63
+ super().__init__(name=name)
64
+
65
+ self._dist_epsilon = dist_epsilon
66
+ self._zero_initialize_last = global_config.zero_init
67
+
68
+ self.config = config
69
+
70
+ self.global_config = global_config
71
+
72
+ def __call__(self, inputs_1d, inputs_2d, mask, affine):
73
+ """Compute geometry-aware attention.
74
+
75
+ Given a set of query residues (defined by affines and associated scalar
76
+ features), this function computes geometry-aware attention between the
77
+ query residues and target residues.
78
+
79
+ The residues produce points in their local reference frame, which
80
+ are converted into the global frame in order to compute attention via
81
+ euclidean distance.
82
+
83
+ Equivalently, the target residues produce points in their local frame to be
84
+ used as attention values, which are converted into the query residues'
85
+ local frames.
86
+
87
+ Args:
88
+ inputs_1d: (N, C) 1D input embedding that is the basis for the
89
+ scalar queries.
90
+ inputs_2d: (N, M, C') 2D input embedding, used for biases and values.
91
+ mask: (N, 1) mask to indicate which elements of inputs_1d participate
92
+ in the attention.
93
+ affine: QuatAffine object describing the position and orientation of
94
+ every element in inputs_1d.
95
+
96
+ Returns:
97
+ Transformation of the input embedding.
98
+ """
99
+ num_residues, _ = inputs_1d.shape
100
+
101
+ # Improve readability by removing a large number of 'self's.
102
+ num_head = self.config.num_head
103
+ num_scalar_qk = self.config.num_scalar_qk
104
+ num_point_qk = self.config.num_point_qk
105
+ num_scalar_v = self.config.num_scalar_v
106
+ num_point_v = self.config.num_point_v
107
+ num_output = self.config.num_channel
108
+
109
+ assert num_scalar_qk > 0
110
+ assert num_point_qk > 0
111
+ assert num_point_v > 0
112
+
113
+ # Construct scalar queries of shape:
114
+ # [num_query_residues, num_head, num_points]
115
+ q_scalar = common_modules.Linear(
116
+ num_head * num_scalar_qk, name='q_scalar')(
117
+ inputs_1d)
118
+ q_scalar = jnp.reshape(
119
+ q_scalar, [num_residues, num_head, num_scalar_qk])
120
+
121
+ # Construct scalar keys/values of shape:
122
+ # [num_target_residues, num_head, num_points]
123
+ kv_scalar = common_modules.Linear(
124
+ num_head * (num_scalar_v + num_scalar_qk), name='kv_scalar')(
125
+ inputs_1d)
126
+ kv_scalar = jnp.reshape(kv_scalar,
127
+ [num_residues, num_head,
128
+ num_scalar_v + num_scalar_qk])
129
+ k_scalar, v_scalar = jnp.split(kv_scalar, [num_scalar_qk], axis=-1)
130
+
131
+ # Construct query points of shape:
132
+ # [num_residues, num_head, num_point_qk]
133
+
134
+ # First construct query points in local frame.
135
+ q_point_local = common_modules.Linear(
136
+ num_head * 3 * num_point_qk, name='q_point_local')(
137
+ inputs_1d)
138
+ q_point_local = jnp.split(q_point_local, 3, axis=-1)
139
+ # Project query points into global frame.
140
+ q_point_global = affine.apply_to_point(q_point_local, extra_dims=1)
141
+ # Reshape query point for later use.
142
+ q_point = [
143
+ jnp.reshape(x, [num_residues, num_head, num_point_qk])
144
+ for x in q_point_global]
145
+
146
+ # Construct key and value points.
147
+ # Key points have shape [num_residues, num_head, num_point_qk]
148
+ # Value points have shape [num_residues, num_head, num_point_v]
149
+
150
+ # Construct key and value points in local frame.
151
+ kv_point_local = common_modules.Linear(
152
+ num_head * 3 * (num_point_qk + num_point_v), name='kv_point_local')(
153
+ inputs_1d)
154
+ kv_point_local = jnp.split(kv_point_local, 3, axis=-1)
155
+ # Project key and value points into global frame.
156
+ kv_point_global = affine.apply_to_point(kv_point_local, extra_dims=1)
157
+ kv_point_global = [
158
+ jnp.reshape(x, [num_residues,
159
+ num_head, (num_point_qk + num_point_v)])
160
+ for x in kv_point_global]
161
+ # Split key and value points.
162
+ k_point, v_point = list(
163
+ zip(*[
164
+ jnp.split(x, [num_point_qk,], axis=-1)
165
+ for x in kv_point_global
166
+ ]))
167
+
168
+ # We assume that all queries and keys come iid from N(0, 1) distribution
169
+ # and compute the variances of the attention logits.
170
+ # Each scalar pair (q, k) contributes Var q*k = 1
171
+ scalar_variance = max(num_scalar_qk, 1) * 1.
172
+ # Each point pair (q, k) contributes Var [0.5 ||q||^2 - <q, k>] = 9 / 2
173
+ point_variance = max(num_point_qk, 1) * 9. / 2
174
+
175
+ # Allocate equal variance to scalar, point and attention 2d parts so that
176
+ # the sum is 1.
177
+
178
+ num_logit_terms = 3
179
+
180
+ scalar_weights = np.sqrt(1.0 / (num_logit_terms * scalar_variance))
181
+ point_weights = np.sqrt(1.0 / (num_logit_terms * point_variance))
182
+ attention_2d_weights = np.sqrt(1.0 / (num_logit_terms))
183
+
184
+ # Trainable per-head weights for points.
185
+ trainable_point_weights = jax.nn.softplus(hk.get_parameter(
186
+ 'trainable_point_weights', shape=[num_head],
187
+ # softplus^{-1} (1)
188
+ init=hk.initializers.Constant(np.log(np.exp(1.) - 1.))))
189
+ point_weights *= jnp.expand_dims(trainable_point_weights, axis=1)
190
+
191
+ v_point = [jnp.swapaxes(x, -2, -3) for x in v_point]
192
+
193
+ q_point = [jnp.swapaxes(x, -2, -3) for x in q_point]
194
+ k_point = [jnp.swapaxes(x, -2, -3) for x in k_point]
195
+ dist2 = [
196
+ squared_difference(qx[:, :, None, :], kx[:, None, :, :])
197
+ for qx, kx in zip(q_point, k_point)
198
+ ]
199
+ dist2 = sum(dist2)
200
+ attn_qk_point = -0.5 * jnp.sum(
201
+ point_weights[:, None, None, :] * dist2, axis=-1)
202
+
203
+ v = jnp.swapaxes(v_scalar, -2, -3)
204
+ q = jnp.swapaxes(scalar_weights * q_scalar, -2, -3)
205
+ k = jnp.swapaxes(k_scalar, -2, -3)
206
+ attn_qk_scalar = jnp.matmul(q, jnp.swapaxes(k, -2, -1))
207
+ attn_logits = attn_qk_scalar + attn_qk_point
208
+
209
+ attention_2d = common_modules.Linear(
210
+ num_head, name='attention_2d')(
211
+ inputs_2d)
212
+
213
+ attention_2d = jnp.transpose(attention_2d, [2, 0, 1])
214
+ attention_2d = attention_2d_weights * attention_2d
215
+ attn_logits += attention_2d
216
+
217
+ mask_2d = mask * jnp.swapaxes(mask, -1, -2)
218
+ attn_logits -= 1e5 * (1. - mask_2d)
219
+
220
+ # [num_head, num_query_residues, num_target_residues]
221
+ attn = jax.nn.softmax(attn_logits)
222
+
223
+ # [num_head, num_query_residues, num_head * num_scalar_v]
224
+ result_scalar = jnp.matmul(attn, v)
225
+
226
+ # For point result, implement matmul manually so that it will be a float32
227
+ # on TPU. This is equivalent to
228
+ # result_point_global = [jnp.einsum('bhqk,bhkc->bhqc', attn, vx)
229
+ # for vx in v_point]
230
+ # but on the TPU, doing the multiply and reduce_sum ensures the
231
+ # computation happens in float32 instead of bfloat16.
232
+ result_point_global = [jnp.sum(
233
+ attn[:, :, :, None] * vx[:, None, :, :],
234
+ axis=-2) for vx in v_point]
235
+
236
+ # [num_query_residues, num_head, num_head * num_(scalar|point)_v]
237
+ result_scalar = jnp.swapaxes(result_scalar, -2, -3)
238
+ result_point_global = [
239
+ jnp.swapaxes(x, -2, -3)
240
+ for x in result_point_global]
241
+
242
+ # Features used in the linear output projection. Should have the size
243
+ # [num_query_residues, ?]
244
+ output_features = []
245
+
246
+ result_scalar = jnp.reshape(
247
+ result_scalar, [num_residues, num_head * num_scalar_v])
248
+ output_features.append(result_scalar)
249
+
250
+ result_point_global = [
251
+ jnp.reshape(r, [num_residues, num_head * num_point_v])
252
+ for r in result_point_global]
253
+ result_point_local = affine.invert_point(result_point_global, extra_dims=1)
254
+ output_features.extend(result_point_local)
255
+
256
+ output_features.append(jnp.sqrt(self._dist_epsilon +
257
+ jnp.square(result_point_local[0]) +
258
+ jnp.square(result_point_local[1]) +
259
+ jnp.square(result_point_local[2])))
260
+
261
+ # Dimensions: h = heads, i and j = residues,
262
+ # c = inputs_2d channels
263
+ # Contraction happens over the second residue dimension, similarly to how
264
+ # the usual attention is performed.
265
+ result_attention_over_2d = jnp.einsum('hij, ijc->ihc', attn, inputs_2d)
266
+ num_out = num_head * result_attention_over_2d.shape[-1]
267
+ output_features.append(
268
+ jnp.reshape(result_attention_over_2d,
269
+ [num_residues, num_out]))
270
+
271
+ final_init = 'zeros' if self._zero_initialize_last else 'linear'
272
+
273
+ final_act = jnp.concatenate(output_features, axis=-1)
274
+
275
+ return common_modules.Linear(
276
+ num_output,
277
+ initializer=final_init,
278
+ name='output_projection')(final_act)
279
+
280
+
281
+ class FoldIteration(hk.Module):
282
+ """A single iteration of the main structure module loop.
283
+
284
+ Jumper et al. (2021) Suppl. Alg. 20 "StructureModule" lines 6-21
285
+
286
+ First, each residue attends to all residues using InvariantPointAttention.
287
+ Then, we apply transition layers to update the hidden representations.
288
+ Finally, we use the hidden representations to produce an update to the
289
+ affine of each residue.
290
+ """
291
+
292
+ def __init__(self, config, global_config,
293
+ name='fold_iteration'):
294
+ super().__init__(name=name)
295
+ self.config = config
296
+ self.global_config = global_config
297
+
298
+ def __call__(self,
299
+ activations,
300
+ sequence_mask,
301
+ update_affine,
302
+ is_training,
303
+ initial_act,
304
+ safe_key=None,
305
+ static_feat_2d=None,
306
+ aatype=None,
307
+ scale_rate=1.0):
308
+ c = self.config
309
+
310
+ if safe_key is None:
311
+ safe_key = prng.SafeKey(hk.next_rng_key())
312
+
313
+ def safe_dropout_fn(tensor, safe_key):
314
+ return prng.safe_dropout(
315
+ tensor=tensor,
316
+ safe_key=safe_key,
317
+ rate=c.dropout * scale_rate,
318
+ is_deterministic=self.global_config.deterministic,
319
+ is_training=is_training)
320
+
321
+ affine = quat_affine.QuatAffine.from_tensor(activations['affine'])
322
+
323
+ act = activations['act']
324
+ attention_module = InvariantPointAttention(self.config, self.global_config)
325
+ # Attention
326
+ attn = attention_module(
327
+ inputs_1d=act,
328
+ inputs_2d=static_feat_2d,
329
+ mask=sequence_mask,
330
+ affine=affine)
331
+ act += attn
332
+ safe_key, *sub_keys = safe_key.split(3)
333
+ sub_keys = iter(sub_keys)
334
+ act = safe_dropout_fn(act, next(sub_keys))
335
+ act = hk.LayerNorm(
336
+ axis=[-1],
337
+ create_scale=True,
338
+ create_offset=True,
339
+ name='attention_layer_norm')(
340
+ act)
341
+
342
+ final_init = 'zeros' if self.global_config.zero_init else 'linear'
343
+
344
+ # Transition
345
+ input_act = act
346
+ for i in range(c.num_layer_in_transition):
347
+ init = 'relu' if i < c.num_layer_in_transition - 1 else final_init
348
+ act = common_modules.Linear(
349
+ c.num_channel,
350
+ initializer=init,
351
+ name='transition')(
352
+ act)
353
+ if i < c.num_layer_in_transition - 1:
354
+ act = jax.nn.relu(act)
355
+ act += input_act
356
+ act = safe_dropout_fn(act, next(sub_keys))
357
+ act = hk.LayerNorm(
358
+ axis=[-1],
359
+ create_scale=True,
360
+ create_offset=True,
361
+ name='transition_layer_norm')(act)
362
+
363
+ if update_affine:
364
+ # This block corresponds to
365
+ # Jumper et al. (2021) Alg. 23 "Backbone update"
366
+ affine_update_size = 6
367
+
368
+ # Affine update
369
+ affine_update = common_modules.Linear(
370
+ affine_update_size,
371
+ initializer=final_init,
372
+ name='affine_update')(
373
+ act)
374
+
375
+ affine = affine.pre_compose(affine_update)
376
+
377
+ sc = MultiRigidSidechain(c.sidechain, self.global_config)(
378
+ affine.scale_translation(c.position_scale), [act, initial_act], aatype)
379
+
380
+ outputs = {'affine': affine.to_tensor(), 'sc': sc}
381
+
382
+ # affine = affine.apply_rotation_tensor_fn(jax.lax.stop_gradient)
383
+
384
+ new_activations = {
385
+ 'act': act,
386
+ 'affine': affine.to_tensor()
387
+ }
388
+ return new_activations, outputs
389
+
390
+
391
+ def generate_affines(representations, batch, config, global_config,
392
+ is_training, safe_key):
393
+ """Generate predicted affines for a single chain.
394
+
395
+ Jumper et al. (2021) Suppl. Alg. 20 "StructureModule"
396
+
397
+ This is the main part of the structure module - it iteratively applies
398
+ folding to produce a set of predicted residue positions.
399
+
400
+ Args:
401
+ representations: Representations dictionary.
402
+ batch: Batch dictionary.
403
+ config: Config for the structure module.
404
+ global_config: Global config.
405
+ is_training: Whether the model is being trained.
406
+ safe_key: A prng.SafeKey object that wraps a PRNG key.
407
+
408
+ Returns:
409
+ A dictionary containing residue affines and sidechain positions.
410
+ """
411
+ c = config
412
+ sequence_mask = batch['seq_mask'][:, None]
413
+
414
+ act = hk.LayerNorm(
415
+ axis=[-1],
416
+ create_scale=True,
417
+ create_offset=True,
418
+ name='single_layer_norm')(
419
+ representations['single'])
420
+
421
+ initial_act = act
422
+ act = common_modules.Linear(
423
+ c.num_channel, name='initial_projection')(
424
+ act)
425
+
426
+ affine = generate_new_affine(sequence_mask)
427
+
428
+ fold_iteration = FoldIteration(
429
+ c, global_config, name='fold_iteration')
430
+
431
+ assert len(batch['seq_mask'].shape) == 1
432
+
433
+ activations = {'act': act,
434
+ 'affine': affine.to_tensor(),
435
+ }
436
+
437
+ act_2d = hk.LayerNorm(
438
+ axis=[-1],
439
+ create_scale=True,
440
+ create_offset=True,
441
+ name='pair_layer_norm')(
442
+ representations['pair'])
443
+
444
+ def fold_iter(x,_):
445
+ x["key"], key = x["key"].split()
446
+ x["act"], out = fold_iteration(
447
+ x["act"],
448
+ initial_act=initial_act,
449
+ static_feat_2d=act_2d,
450
+ safe_key=key,
451
+ sequence_mask=sequence_mask,
452
+ update_affine=True,
453
+ is_training=is_training,
454
+ aatype=batch['aatype'],
455
+ scale_rate=batch["scale_rate"])
456
+ return x, out
457
+ x = {"act":activations,"key":safe_key}
458
+ x, output = hk.scan(fold_iter, x, None, c.num_layer)
459
+ activations = x["act"]
460
+
461
+ # Include the activations in the output dict for use by the LDDT-Head.
462
+ output['act'] = activations['act']
463
+
464
+ return output
465
+
466
+
467
+ class dummy(hk.Module):
468
+ def __init__(self, config, global_config, compute_loss=True):
469
+ super().__init__(name="dummy")
470
+ def __call__(self, representations, batch, is_training, safe_key=None):
471
+ if safe_key is None:
472
+ safe_key = prng.SafeKey(hk.next_rng_key())
473
+ return {}
474
+
475
+ class StructureModule(hk.Module):
476
+ """StructureModule as a network head.
477
+
478
+ Jumper et al. (2021) Suppl. Alg. 20 "StructureModule"
479
+ """
480
+
481
+ def __init__(self, config, global_config, compute_loss=True,
482
+ name='structure_module'):
483
+ super().__init__(name=name)
484
+ self.config = config
485
+ self.global_config = global_config
486
+ self.compute_loss = compute_loss
487
+
488
+ def __call__(self, representations, batch, is_training,
489
+ safe_key=None):
490
+ c = self.config
491
+ ret = {}
492
+
493
+ if safe_key is None:
494
+ safe_key = prng.SafeKey(hk.next_rng_key())
495
+
496
+ output = generate_affines(
497
+ representations=representations,
498
+ batch=batch,
499
+ config=self.config,
500
+ global_config=self.global_config,
501
+ is_training=is_training,
502
+ safe_key=safe_key)
503
+
504
+ ret['representations'] = {'structure_module': output['act']}
505
+
506
+ ret['traj'] = output['affine'] * jnp.array([1.] * 4 + [c.position_scale] * 3)
507
+ ret['sidechains'] = output['sc']
508
+ atom14_pred_positions = r3.vecs_to_tensor(output['sc']['atom_pos'])[-1]
509
+ ret['final_atom14_positions'] = atom14_pred_positions # (N, 14, 3)
510
+ ret['final_atom14_mask'] = batch['atom14_atom_exists'] # (N, 14)
511
+
512
+ atom37_pred_positions = all_atom.atom14_to_atom37(atom14_pred_positions, batch)
513
+ atom37_pred_positions *= batch['atom37_atom_exists'][:, :, None]
514
+ ret['final_atom_positions'] = atom37_pred_positions # (N, 37, 3)
515
+ ret['final_atom_mask'] = batch['atom37_atom_exists'] # (N, 37)
516
+ ret['final_affines'] = ret['traj'][-1]
517
+
518
+ return ret
519
+
520
+ def loss(self, value, batch):
521
+ ret = {'loss': 0.}
522
+
523
+ ret['metrics'] = {}
524
+ # If requested, compute in-graph metrics.
525
+ if self.config.compute_in_graph_metrics:
526
+ atom14_pred_positions = value['final_atom14_positions']
527
+ # Compute renaming and violations.
528
+ value.update(compute_renamed_ground_truth(batch, atom14_pred_positions))
529
+ value['violations'] = find_structural_violations(
530
+ batch, atom14_pred_positions, self.config)
531
+
532
+ # Several violation metrics:
533
+ violation_metrics = compute_violation_metrics(
534
+ batch=batch,
535
+ atom14_pred_positions=atom14_pred_positions,
536
+ violations=value['violations'])
537
+ ret['metrics'].update(violation_metrics)
538
+
539
+ backbone_loss(ret, batch, value, self.config)
540
+
541
+ if 'renamed_atom14_gt_positions' not in value:
542
+ value.update(compute_renamed_ground_truth(
543
+ batch, value['final_atom14_positions']))
544
+ sc_loss = sidechain_loss(batch, value, self.config)
545
+
546
+ ret['loss'] = ((1 - self.config.sidechain.weight_frac) * ret['loss'] +
547
+ self.config.sidechain.weight_frac * sc_loss['loss'])
548
+ ret['sidechain_fape'] = sc_loss['fape']
549
+
550
+ supervised_chi_loss(ret, batch, value, self.config)
551
+
552
+ if self.config.structural_violation_loss_weight:
553
+ if 'violations' not in value:
554
+ value['violations'] = find_structural_violations(
555
+ batch, value['final_atom14_positions'], self.config)
556
+ structural_violation_loss(ret, batch, value, self.config)
557
+
558
+ return ret
559
+
560
+
561
+ def compute_renamed_ground_truth(
562
+ batch: Dict[str, jnp.ndarray],
563
+ atom14_pred_positions: jnp.ndarray,
564
+ ) -> Dict[str, jnp.ndarray]:
565
+ """Find optimal renaming of ground truth based on the predicted positions.
566
+
567
+ Jumper et al. (2021) Suppl. Alg. 26 "renameSymmetricGroundTruthAtoms"
568
+
569
+ This renamed ground truth is then used for all losses,
570
+ such that each loss moves the atoms in the same direction.
571
+ Shape (N).
572
+
573
+ Args:
574
+ batch: Dictionary containing:
575
+ * atom14_gt_positions: Ground truth positions.
576
+ * atom14_alt_gt_positions: Ground truth positions with renaming swaps.
577
+ * atom14_atom_is_ambiguous: 1.0 for atoms that are affected by
578
+ renaming swaps.
579
+ * atom14_gt_exists: Mask for which atoms exist in ground truth.
580
+ * atom14_alt_gt_exists: Mask for which atoms exist in ground truth
581
+ after renaming.
582
+ * atom14_atom_exists: Mask for whether each atom is part of the given
583
+ amino acid type.
584
+ atom14_pred_positions: Array of atom positions in global frame with shape
585
+ (N, 14, 3).
586
+ Returns:
587
+ Dictionary containing:
588
+ alt_naming_is_better: Array with 1.0 where alternative swap is better.
589
+ renamed_atom14_gt_positions: Array of optimal ground truth positions
590
+ after renaming swaps are performed.
591
+ renamed_atom14_gt_exists: Mask after renaming swap is performed.
592
+ """
593
+ alt_naming_is_better = all_atom.find_optimal_renaming(
594
+ atom14_gt_positions=batch['atom14_gt_positions'],
595
+ atom14_alt_gt_positions=batch['atom14_alt_gt_positions'],
596
+ atom14_atom_is_ambiguous=batch['atom14_atom_is_ambiguous'],
597
+ atom14_gt_exists=batch['atom14_gt_exists'],
598
+ atom14_pred_positions=atom14_pred_positions,
599
+ atom14_atom_exists=batch['atom14_atom_exists'])
600
+
601
+ renamed_atom14_gt_positions = (
602
+ (1. - alt_naming_is_better[:, None, None])
603
+ * batch['atom14_gt_positions']
604
+ + alt_naming_is_better[:, None, None]
605
+ * batch['atom14_alt_gt_positions'])
606
+
607
+ renamed_atom14_gt_mask = (
608
+ (1. - alt_naming_is_better[:, None]) * batch['atom14_gt_exists']
609
+ + alt_naming_is_better[:, None] * batch['atom14_alt_gt_exists'])
610
+
611
+ return {
612
+ 'alt_naming_is_better': alt_naming_is_better, # (N)
613
+ 'renamed_atom14_gt_positions': renamed_atom14_gt_positions, # (N, 14, 3)
614
+ 'renamed_atom14_gt_exists': renamed_atom14_gt_mask, # (N, 14)
615
+ }
616
+
617
+
618
+ def backbone_loss(ret, batch, value, config):
619
+ """Backbone FAPE Loss.
620
+
621
+ Jumper et al. (2021) Suppl. Alg. 20 "StructureModule" line 17
622
+
623
+ Args:
624
+ ret: Dictionary to write outputs into, needs to contain 'loss'.
625
+ batch: Batch, needs to contain 'backbone_affine_tensor',
626
+ 'backbone_affine_mask'.
627
+ value: Dictionary containing structure module output, needs to contain
628
+ 'traj', a trajectory of rigids.
629
+ config: Configuration of loss, should contain 'fape.clamp_distance' and
630
+ 'fape.loss_unit_distance'.
631
+ """
632
+ affine_trajectory = quat_affine.QuatAffine.from_tensor(value['traj'])
633
+ rigid_trajectory = r3.rigids_from_quataffine(affine_trajectory)
634
+
635
+ if 'backbone_affine_tensor' in batch:
636
+ gt_affine = quat_affine.QuatAffine.from_tensor(batch['backbone_affine_tensor'])
637
+ backbone_mask = batch['backbone_affine_mask']
638
+ else:
639
+ n_xyz = batch['all_atom_positions'][...,0,:]
640
+ ca_xyz = batch['all_atom_positions'][...,1,:]
641
+ c_xyz = batch['all_atom_positions'][...,2,:]
642
+ rot, trans = quat_affine.make_transform_from_reference(n_xyz, ca_xyz, c_xyz)
643
+ gt_affine = quat_affine.QuatAffine(quaternion=None,
644
+ translation=trans,
645
+ rotation=rot,
646
+ unstack_inputs=True)
647
+ backbone_mask = batch['all_atom_mask'][...,0]
648
+
649
+ gt_rigid = r3.rigids_from_quataffine(gt_affine)
650
+
651
+ fape_loss_fn = functools.partial(
652
+ all_atom.frame_aligned_point_error,
653
+ l1_clamp_distance=config.fape.clamp_distance,
654
+ length_scale=config.fape.loss_unit_distance)
655
+
656
+ fape_loss_fn = jax.vmap(fape_loss_fn, (0, None, None, 0, None, None))
657
+ fape_loss = fape_loss_fn(rigid_trajectory, gt_rigid, backbone_mask,
658
+ rigid_trajectory.trans, gt_rigid.trans,
659
+ backbone_mask)
660
+
661
+ if 'use_clamped_fape' in batch:
662
+ # Jumper et al. (2021) Suppl. Sec. 1.11.5 "Loss clamping details"
663
+ use_clamped_fape = jnp.asarray(batch['use_clamped_fape'], jnp.float32)
664
+ unclamped_fape_loss_fn = functools.partial(
665
+ all_atom.frame_aligned_point_error,
666
+ l1_clamp_distance=None,
667
+ length_scale=config.fape.loss_unit_distance)
668
+ unclamped_fape_loss_fn = jax.vmap(unclamped_fape_loss_fn,
669
+ (0, None, None, 0, None, None))
670
+ fape_loss_unclamped = unclamped_fape_loss_fn(rigid_trajectory, gt_rigid,
671
+ backbone_mask,
672
+ rigid_trajectory.trans,
673
+ gt_rigid.trans,
674
+ backbone_mask)
675
+
676
+ fape_loss = (fape_loss * use_clamped_fape + fape_loss_unclamped * (1 - use_clamped_fape))
677
+
678
+ ret['fape'] = fape_loss[-1]
679
+ ret['loss'] += jnp.mean(fape_loss)
680
+
681
+
682
+ def sidechain_loss(batch, value, config):
683
+ """All Atom FAPE Loss using renamed rigids."""
684
+ # Rename Frames
685
+ # Jumper et al. (2021) Suppl. Alg. 26 "renameSymmetricGroundTruthAtoms" line 7
686
+ alt_naming_is_better = value['alt_naming_is_better']
687
+ renamed_gt_frames = (
688
+ (1. - alt_naming_is_better[:, None, None])
689
+ * batch['rigidgroups_gt_frames']
690
+ + alt_naming_is_better[:, None, None]
691
+ * batch['rigidgroups_alt_gt_frames'])
692
+
693
+ flat_gt_frames = r3.rigids_from_tensor_flat12(jnp.reshape(renamed_gt_frames, [-1, 12]))
694
+ flat_frames_mask = jnp.reshape(batch['rigidgroups_gt_exists'], [-1])
695
+
696
+ flat_gt_positions = r3.vecs_from_tensor(jnp.reshape(value['renamed_atom14_gt_positions'], [-1, 3]))
697
+ flat_positions_mask = jnp.reshape(value['renamed_atom14_gt_exists'], [-1])
698
+
699
+ # Compute frame_aligned_point_error score for the final layer.
700
+ pred_frames = value['sidechains']['frames']
701
+ pred_positions = value['sidechains']['atom_pos']
702
+
703
+ def _slice_last_layer_and_flatten(x):
704
+ return jnp.reshape(x[-1], [-1])
705
+
706
+ flat_pred_frames = jax.tree_map(_slice_last_layer_and_flatten, pred_frames)
707
+ flat_pred_positions = jax.tree_map(_slice_last_layer_and_flatten, pred_positions)
708
+ # FAPE Loss on sidechains
709
+ fape = all_atom.frame_aligned_point_error(
710
+ pred_frames=flat_pred_frames,
711
+ target_frames=flat_gt_frames,
712
+ frames_mask=flat_frames_mask,
713
+ pred_positions=flat_pred_positions,
714
+ target_positions=flat_gt_positions,
715
+ positions_mask=flat_positions_mask,
716
+ l1_clamp_distance=config.sidechain.atom_clamp_distance,
717
+ length_scale=config.sidechain.length_scale)
718
+
719
+ return {
720
+ 'fape': fape,
721
+ 'loss': fape}
722
+
723
+
724
+ def structural_violation_loss(ret, batch, value, config):
725
+ """Computes loss for structural violations."""
726
+ assert config.sidechain.weight_frac
727
+
728
+ # Put all violation losses together to one large loss.
729
+ violations = value['violations']
730
+ num_atoms = jnp.sum(batch['atom14_atom_exists']).astype(jnp.float32)
731
+ ret['loss'] += (config.structural_violation_loss_weight * (
732
+ violations['between_residues']['bonds_c_n_loss_mean'] +
733
+ violations['between_residues']['angles_ca_c_n_loss_mean'] +
734
+ violations['between_residues']['angles_c_n_ca_loss_mean'] +
735
+ jnp.sum(
736
+ violations['between_residues']['clashes_per_atom_loss_sum'] +
737
+ violations['within_residues']['per_atom_loss_sum']) /
738
+ (1e-6 + num_atoms)))
739
+
740
+
741
+ def find_structural_violations(
742
+ batch: Dict[str, jnp.ndarray],
743
+ atom14_pred_positions: jnp.ndarray, # (N, 14, 3)
744
+ config: ml_collections.ConfigDict
745
+ ):
746
+ """Computes several checks for structural violations."""
747
+
748
+ # Compute between residue backbone violations of bonds and angles.
749
+ connection_violations = all_atom.between_residue_bond_loss(
750
+ pred_atom_positions=atom14_pred_positions,
751
+ pred_atom_mask=batch['atom14_atom_exists'].astype(jnp.float32),
752
+ residue_index=batch['residue_index'].astype(jnp.float32),
753
+ aatype=batch['aatype'],
754
+ tolerance_factor_soft=config.violation_tolerance_factor,
755
+ tolerance_factor_hard=config.violation_tolerance_factor)
756
+
757
+ # Compute the Van der Waals radius for every atom
758
+ # (the first letter of the atom name is the element type).
759
+ # Shape: (N, 14).
760
+ atomtype_radius = [
761
+ residue_constants.van_der_waals_radius[name[0]]
762
+ for name in residue_constants.atom_types
763
+ ]
764
+ atom14_atom_radius = batch['atom14_atom_exists'] * utils.batched_gather(
765
+ atomtype_radius, batch['residx_atom14_to_atom37'])
766
+
767
+ # Compute the between residue clash loss.
768
+ between_residue_clashes = all_atom.between_residue_clash_loss(
769
+ atom14_pred_positions=atom14_pred_positions,
770
+ atom14_atom_exists=batch['atom14_atom_exists'],
771
+ atom14_atom_radius=atom14_atom_radius,
772
+ residue_index=batch['residue_index'],
773
+ overlap_tolerance_soft=config.clash_overlap_tolerance,
774
+ overlap_tolerance_hard=config.clash_overlap_tolerance)
775
+
776
+ # Compute all within-residue violations (clashes,
777
+ # bond length and angle violations).
778
+ restype_atom14_bounds = residue_constants.make_atom14_dists_bounds(
779
+ overlap_tolerance=config.clash_overlap_tolerance,
780
+ bond_length_tolerance_factor=config.violation_tolerance_factor)
781
+ atom14_dists_lower_bound = utils.batched_gather(
782
+ restype_atom14_bounds['lower_bound'], batch['aatype'])
783
+ atom14_dists_upper_bound = utils.batched_gather(
784
+ restype_atom14_bounds['upper_bound'], batch['aatype'])
785
+ within_residue_violations = all_atom.within_residue_violations(
786
+ atom14_pred_positions=atom14_pred_positions,
787
+ atom14_atom_exists=batch['atom14_atom_exists'],
788
+ atom14_dists_lower_bound=atom14_dists_lower_bound,
789
+ atom14_dists_upper_bound=atom14_dists_upper_bound,
790
+ tighten_bounds_for_loss=0.0)
791
+
792
+ # Combine them to a single per-residue violation mask (used later for LDDT).
793
+ per_residue_violations_mask = jnp.max(jnp.stack([
794
+ connection_violations['per_residue_violation_mask'],
795
+ jnp.max(between_residue_clashes['per_atom_clash_mask'], axis=-1),
796
+ jnp.max(within_residue_violations['per_atom_violations'],
797
+ axis=-1)]), axis=0)
798
+
799
+ return {
800
+ 'between_residues': {
801
+ 'bonds_c_n_loss_mean':
802
+ connection_violations['c_n_loss_mean'], # ()
803
+ 'angles_ca_c_n_loss_mean':
804
+ connection_violations['ca_c_n_loss_mean'], # ()
805
+ 'angles_c_n_ca_loss_mean':
806
+ connection_violations['c_n_ca_loss_mean'], # ()
807
+ 'connections_per_residue_loss_sum':
808
+ connection_violations['per_residue_loss_sum'], # (N)
809
+ 'connections_per_residue_violation_mask':
810
+ connection_violations['per_residue_violation_mask'], # (N)
811
+ 'clashes_mean_loss':
812
+ between_residue_clashes['mean_loss'], # ()
813
+ 'clashes_per_atom_loss_sum':
814
+ between_residue_clashes['per_atom_loss_sum'], # (N, 14)
815
+ 'clashes_per_atom_clash_mask':
816
+ between_residue_clashes['per_atom_clash_mask'], # (N, 14)
817
+ },
818
+ 'within_residues': {
819
+ 'per_atom_loss_sum':
820
+ within_residue_violations['per_atom_loss_sum'], # (N, 14)
821
+ 'per_atom_violations':
822
+ within_residue_violations['per_atom_violations'], # (N, 14),
823
+ },
824
+ 'total_per_residue_violations_mask':
825
+ per_residue_violations_mask, # (N)
826
+ }
827
+
828
+
829
+ def compute_violation_metrics(
830
+ batch: Dict[str, jnp.ndarray],
831
+ atom14_pred_positions: jnp.ndarray, # (N, 14, 3)
832
+ violations: Dict[str, jnp.ndarray],
833
+ ) -> Dict[str, jnp.ndarray]:
834
+ """Compute several metrics to assess the structural violations."""
835
+
836
+ ret = {}
837
+ extreme_ca_ca_violations = all_atom.extreme_ca_ca_distance_violations(
838
+ pred_atom_positions=atom14_pred_positions,
839
+ pred_atom_mask=batch['atom14_atom_exists'].astype(jnp.float32),
840
+ residue_index=batch['residue_index'].astype(jnp.float32))
841
+ ret['violations_extreme_ca_ca_distance'] = extreme_ca_ca_violations
842
+ ret['violations_between_residue_bond'] = utils.mask_mean(
843
+ mask=batch['seq_mask'],
844
+ value=violations['between_residues'][
845
+ 'connections_per_residue_violation_mask'])
846
+ ret['violations_between_residue_clash'] = utils.mask_mean(
847
+ mask=batch['seq_mask'],
848
+ value=jnp.max(
849
+ violations['between_residues']['clashes_per_atom_clash_mask'],
850
+ axis=-1))
851
+ ret['violations_within_residue'] = utils.mask_mean(
852
+ mask=batch['seq_mask'],
853
+ value=jnp.max(
854
+ violations['within_residues']['per_atom_violations'], axis=-1))
855
+ ret['violations_per_residue'] = utils.mask_mean(
856
+ mask=batch['seq_mask'],
857
+ value=violations['total_per_residue_violations_mask'])
858
+ return ret
859
+
860
+
861
+ def supervised_chi_loss(ret, batch, value, config):
862
+ """Computes loss for direct chi angle supervision.
863
+
864
+ Jumper et al. (2021) Suppl. Alg. 27 "torsionAngleLoss"
865
+
866
+ Args:
867
+ ret: Dictionary to write outputs into, needs to contain 'loss'.
868
+ batch: Batch, needs to contain 'seq_mask', 'chi_mask', 'chi_angles'.
869
+ value: Dictionary containing structure module output, needs to contain
870
+ value['sidechains']['angles_sin_cos'] for angles and
871
+ value['sidechains']['unnormalized_angles_sin_cos'] for unnormalized
872
+ angles.
873
+ config: Configuration of loss, should contain 'chi_weight' and
874
+ 'angle_norm_weight', 'angle_norm_weight' scales angle norm term,
875
+ 'chi_weight' scales torsion term.
876
+ """
877
+ eps = 1e-6
878
+
879
+ sequence_mask = batch['seq_mask']
880
+ num_res = sequence_mask.shape[0]
881
+ chi_mask = batch['chi_mask'].astype(jnp.float32)
882
+ pred_angles = jnp.reshape(
883
+ value['sidechains']['angles_sin_cos'], [-1, num_res, 7, 2])
884
+ pred_angles = pred_angles[:, :, 3:]
885
+
886
+ residue_type_one_hot = jax.nn.one_hot(
887
+ batch['aatype'], residue_constants.restype_num + 1,
888
+ dtype=jnp.float32)[None]
889
+ chi_pi_periodic = jnp.einsum('ijk, kl->ijl', residue_type_one_hot,
890
+ jnp.asarray(residue_constants.chi_pi_periodic))
891
+
892
+ true_chi = batch['chi_angles'][None]
893
+ sin_true_chi = jnp.sin(true_chi)
894
+ cos_true_chi = jnp.cos(true_chi)
895
+ sin_cos_true_chi = jnp.stack([sin_true_chi, cos_true_chi], axis=-1)
896
+
897
+ # This is -1 if chi is pi-periodic and +1 if it's 2pi-periodic
898
+ shifted_mask = (1 - 2 * chi_pi_periodic)[..., None]
899
+ sin_cos_true_chi_shifted = shifted_mask * sin_cos_true_chi
900
+
901
+ sq_chi_error = jnp.sum(
902
+ squared_difference(sin_cos_true_chi, pred_angles), -1)
903
+ sq_chi_error_shifted = jnp.sum(
904
+ squared_difference(sin_cos_true_chi_shifted, pred_angles), -1)
905
+ sq_chi_error = jnp.minimum(sq_chi_error, sq_chi_error_shifted)
906
+
907
+ sq_chi_loss = utils.mask_mean(mask=chi_mask[None], value=sq_chi_error)
908
+ ret['chi_loss'] = sq_chi_loss
909
+ ret['loss'] += config.chi_weight * sq_chi_loss
910
+ unnormed_angles = jnp.reshape(
911
+ value['sidechains']['unnormalized_angles_sin_cos'], [-1, num_res, 7, 2])
912
+ angle_norm = jnp.sqrt(jnp.sum(jnp.square(unnormed_angles), axis=-1) + eps)
913
+ norm_error = jnp.abs(angle_norm - 1.)
914
+ angle_norm_loss = utils.mask_mean(mask=sequence_mask[None, :, None],
915
+ value=norm_error)
916
+
917
+ ret['angle_norm_loss'] = angle_norm_loss
918
+ ret['loss'] += config.angle_norm_weight * angle_norm_loss
919
+
920
+
921
+ def generate_new_affine(sequence_mask):
922
+ num_residues, _ = sequence_mask.shape
923
+ quaternion = jnp.tile(
924
+ jnp.reshape(jnp.asarray([1., 0., 0., 0.]), [1, 4]),
925
+ [num_residues, 1])
926
+
927
+ translation = jnp.zeros([num_residues, 3])
928
+ return quat_affine.QuatAffine(quaternion, translation, unstack_inputs=True)
929
+
930
+
931
+ def l2_normalize(x, axis=-1, epsilon=1e-12):
932
+ return x / jnp.sqrt(
933
+ jnp.maximum(jnp.sum(x**2, axis=axis, keepdims=True), epsilon))
934
+
935
+
936
+ class MultiRigidSidechain(hk.Module):
937
+ """Class to make side chain atoms."""
938
+
939
+ def __init__(self, config, global_config, name='rigid_sidechain'):
940
+ super().__init__(name=name)
941
+ self.config = config
942
+ self.global_config = global_config
943
+
944
+ def __call__(self, affine, representations_list, aatype):
945
+ """Predict side chains using multi-rigid representations.
946
+
947
+ Args:
948
+ affine: The affines for each residue (translations in angstroms).
949
+ representations_list: A list of activations to predict side chains from.
950
+ aatype: Amino acid types.
951
+
952
+ Returns:
953
+ Dict containing atom positions and frames (in angstroms).
954
+ """
955
+ act = [
956
+ common_modules.Linear( # pylint: disable=g-complex-comprehension
957
+ self.config.num_channel,
958
+ name='input_projection')(jax.nn.relu(x))
959
+ for x in representations_list
960
+ ]
961
+ # Sum the activation list (equivalent to concat then Linear).
962
+ act = sum(act)
963
+
964
+ final_init = 'zeros' if self.global_config.zero_init else 'linear'
965
+
966
+ # Mapping with some residual blocks.
967
+ for _ in range(self.config.num_residual_block):
968
+ old_act = act
969
+ act = common_modules.Linear(
970
+ self.config.num_channel,
971
+ initializer='relu',
972
+ name='resblock1')(
973
+ jax.nn.relu(act))
974
+ act = common_modules.Linear(
975
+ self.config.num_channel,
976
+ initializer=final_init,
977
+ name='resblock2')(
978
+ jax.nn.relu(act))
979
+ act += old_act
980
+
981
+ # Map activations to torsion angles. Shape: (num_res, 14).
982
+ num_res = act.shape[0]
983
+ unnormalized_angles = common_modules.Linear(
984
+ 14, name='unnormalized_angles')(
985
+ jax.nn.relu(act))
986
+ unnormalized_angles = jnp.reshape(
987
+ unnormalized_angles, [num_res, 7, 2])
988
+ angles = l2_normalize(unnormalized_angles, axis=-1)
989
+
990
+ outputs = {
991
+ 'angles_sin_cos': angles, # jnp.ndarray (N, 7, 2)
992
+ 'unnormalized_angles_sin_cos':
993
+ unnormalized_angles, # jnp.ndarray (N, 7, 2)
994
+ }
995
+
996
+ # Map torsion angles to frames.
997
+ backb_to_global = r3.rigids_from_quataffine(affine)
998
+
999
+ # Jumper et al. (2021) Suppl. Alg. 24 "computeAllAtomCoordinates"
1000
+
1001
+ # r3.Rigids with shape (N, 8).
1002
+ all_frames_to_global = all_atom.torsion_angles_to_frames(
1003
+ aatype,
1004
+ backb_to_global,
1005
+ angles)
1006
+
1007
+ # Use frames and literature positions to create the final atom coordinates.
1008
+ # r3.Vecs with shape (N, 14).
1009
+ pred_positions = all_atom.frames_and_literature_positions_to_atom14_pos(
1010
+ aatype, all_frames_to_global)
1011
+
1012
+ outputs.update({
1013
+ 'atom_pos': pred_positions, # r3.Vecs (N, 14)
1014
+ 'frames': all_frames_to_global, # r3.Rigids (N, 8)
1015
+ })
1016
+ return outputs
af_backprop/alphafold/model/layer_stack.py ADDED
@@ -0,0 +1,274 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2021 DeepMind Technologies Limited
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ """Function to stack repeats of a layer function without shared parameters."""
16
+
17
+ import collections
18
+ import contextlib
19
+ import functools
20
+ import inspect
21
+ from typing import Any, Callable, Optional, Tuple, Union
22
+
23
+ import haiku as hk
24
+ import jax
25
+ import jax.numpy as jnp
26
+
27
+ LayerStackCarry = collections.namedtuple('LayerStackCarry', ['x', 'rng'])
28
+ LayerStackScanned = collections.namedtuple('LayerStackScanned',
29
+ ['i', 'args_ys'])
30
+
31
+ # WrappedFn should take in arbitrarily nested `jnp.ndarray`, and return the
32
+ # exact same type. We cannot express this with `typing`. So we just use it
33
+ # to inform the user. In reality, the typing below will accept anything.
34
+ NestedArray = Any
35
+ WrappedFn = Callable[..., Union[NestedArray, Tuple[NestedArray]]]
36
+
37
+
38
+ def _check_no_varargs(f):
39
+ if list(inspect.signature(
40
+ f).parameters.values())[0].kind == inspect.Parameter.VAR_POSITIONAL:
41
+ raise ValueError(
42
+ 'The function `f` should not have any `varargs` (that is *args) '
43
+ 'argument. Instead, it should only use explicit positional'
44
+ 'arguments.')
45
+
46
+
47
+ @contextlib.contextmanager
48
+ def nullcontext():
49
+ yield
50
+
51
+
52
+ def maybe_with_rng(key):
53
+ if key is not None:
54
+ return hk.with_rng(key)
55
+ else:
56
+ return nullcontext()
57
+
58
+
59
+ def maybe_fold_in(key, data):
60
+ if key is not None:
61
+ return jax.random.fold_in(key, data)
62
+ else:
63
+ return None
64
+
65
+
66
+ class _LayerStack(hk.Module):
67
+ """Module to compose parameterized functions, implemented as a scan."""
68
+
69
+ def __init__(self,
70
+ count: int,
71
+ unroll: int,
72
+ name: Optional[str] = None):
73
+ """Iterate a function `f` `count` times, with non-shared parameters."""
74
+ super().__init__(name=name)
75
+ self._count = count
76
+ self._unroll = unroll
77
+
78
+ def __call__(self, x, *args_ys):
79
+ count = self._count
80
+ if hk.running_init():
81
+ # At initialization time, we run just one layer but add an extra first
82
+ # dimension to every initialized tensor, making sure to use different
83
+ # random keys for different slices.
84
+ def creator(next_creator, shape, dtype, init, context):
85
+ del context
86
+
87
+ def multi_init(shape, dtype):
88
+ assert shape[0] == count
89
+ key = hk.maybe_next_rng_key()
90
+
91
+ def rng_context_init(slice_idx):
92
+ slice_key = maybe_fold_in(key, slice_idx)
93
+ with maybe_with_rng(slice_key):
94
+ return init(shape[1:], dtype)
95
+
96
+ return jax.vmap(rng_context_init)(jnp.arange(count))
97
+
98
+ return next_creator((count,) + tuple(shape), dtype, multi_init)
99
+
100
+ def getter(next_getter, value, context):
101
+ trailing_dims = len(context.original_shape) + 1
102
+ sliced_value = jax.lax.index_in_dim(
103
+ value, index=0, axis=value.ndim - trailing_dims, keepdims=False)
104
+ return next_getter(sliced_value)
105
+
106
+ with hk.experimental.custom_creator(
107
+ creator), hk.experimental.custom_getter(getter):
108
+ if len(args_ys) == 1 and args_ys[0] is None:
109
+ args0 = (None,)
110
+ else:
111
+ args0 = [
112
+ jax.lax.dynamic_index_in_dim(ys, 0, keepdims=False)
113
+ for ys in args_ys
114
+ ]
115
+ x, z = self._call_wrapped(x, *args0)
116
+ if z is None:
117
+ return x, z
118
+
119
+ # Broadcast state to hold each layer state.
120
+ def broadcast_state(layer_state):
121
+ return jnp.broadcast_to(
122
+ layer_state, [count,] + list(layer_state.shape))
123
+ zs = jax.tree_util.tree_map(broadcast_state, z)
124
+ return x, zs
125
+ else:
126
+ # Use scan during apply, threading through random seed so that it's
127
+ # unique for each layer.
128
+ def layer(carry: LayerStackCarry, scanned: LayerStackScanned):
129
+ rng = carry.rng
130
+
131
+ def getter(next_getter, value, context):
132
+ # Getter slices the full param at the current loop index.
133
+ trailing_dims = len(context.original_shape) + 1
134
+ assert value.shape[value.ndim - trailing_dims] == count, (
135
+ f'Attempting to use a parameter stack of size '
136
+ f'{value.shape[value.ndim - trailing_dims]} for a LayerStack of '
137
+ f'size {count}.')
138
+
139
+ sliced_value = jax.lax.dynamic_index_in_dim(
140
+ value, scanned.i, axis=value.ndim - trailing_dims, keepdims=False)
141
+ return next_getter(sliced_value)
142
+
143
+ with hk.experimental.custom_getter(getter):
144
+ if rng is None:
145
+ out_x, z = self._call_wrapped(carry.x, *scanned.args_ys)
146
+ else:
147
+ rng, rng_ = jax.random.split(rng)
148
+ with hk.with_rng(rng_):
149
+ out_x, z = self._call_wrapped(carry.x, *scanned.args_ys)
150
+ return LayerStackCarry(x=out_x, rng=rng), z
151
+
152
+ carry = LayerStackCarry(x=x, rng=hk.maybe_next_rng_key())
153
+ scanned = LayerStackScanned(i=jnp.arange(count, dtype=jnp.int32),
154
+ args_ys=args_ys)
155
+
156
+ carry, zs = hk.scan(
157
+ layer, carry, scanned, length=count, unroll=self._unroll)
158
+ return carry.x, zs
159
+
160
+ def _call_wrapped(self,
161
+ x: jnp.ndarray,
162
+ *args,
163
+ ) -> Tuple[jnp.ndarray, Optional[jnp.ndarray]]:
164
+ raise NotImplementedError()
165
+
166
+
167
+ class _LayerStackNoState(_LayerStack):
168
+ """_LayerStack impl with no per-layer state provided to the function."""
169
+
170
+ def __init__(self,
171
+ f: WrappedFn,
172
+ count: int,
173
+ unroll: int,
174
+ name: Optional[str] = None):
175
+ super().__init__(count=count, unroll=unroll, name=name)
176
+ _check_no_varargs(f)
177
+ self._f = f
178
+
179
+ @hk.transparent
180
+ def _call_wrapped(self, args, y):
181
+ del y
182
+ ret = self._f(*args)
183
+ if len(args) == 1:
184
+ # If the function takes a single argument, the wrapped function receives
185
+ # a tuple of length 1, and therefore it must return a tuple of length 1.
186
+ ret = (ret,)
187
+ return ret, None
188
+
189
+
190
+ class _LayerStackWithState(_LayerStack):
191
+ """_LayerStack impl with per-layer state provided to the function."""
192
+
193
+ def __init__(self,
194
+ f: WrappedFn,
195
+ count: int,
196
+ unroll: int,
197
+ name: Optional[str] = None):
198
+ super().__init__(count=count, unroll=unroll, name=name)
199
+ self._f = f
200
+
201
+ @hk.transparent
202
+ def _call_wrapped(self, x, *args):
203
+ return self._f(x, *args)
204
+
205
+
206
+ def layer_stack(num_layers: int,
207
+ with_state=False,
208
+ unroll: int = 1,
209
+ name: Optional[str] = None):
210
+ """Utility to wrap a Haiku function and recursively apply it to an input.
211
+
212
+ A function is valid if it uses only explicit position parameters, and
213
+ its return type matches its input type. The position parameters can be
214
+ arbitrarily nested structures with `jnp.ndarray` at the leaf nodes. Note
215
+ that kwargs are not supported, neither are functions with variable number
216
+ of parameters (specified by `*args`).
217
+
218
+ If `with_state=False` then the new, wrapped function can be understood as
219
+ performing the following:
220
+ ```
221
+ for i in range(num_layers):
222
+ x = f(x)
223
+ return x
224
+ ```
225
+
226
+ And if `with_state=True`, assuming `f` takes two arguments on top of `x`:
227
+ ```
228
+ for i in range(num_layers):
229
+ x, zs[i] = f(x, ys_0[i], ys_1[i])
230
+ return x, zs
231
+ ```
232
+ The code using `layer_stack` for the above function would be:
233
+ ```
234
+ def f(x, y_0, y_1):
235
+ ...
236
+ return new_x, z
237
+ x, zs = layer_stack.layer_stack(num_layers,
238
+ with_state=True)(f)(x, ys_0, ys_1)
239
+ ```
240
+
241
+ Crucially, any parameters created inside `f` will not be shared across
242
+ iterations.
243
+
244
+ Args:
245
+ num_layers: The number of times to iterate the wrapped function.
246
+ with_state: Whether or not to pass per-layer state to the wrapped function.
247
+ unroll: the unroll used by `scan`.
248
+ name: Name of the Haiku context.
249
+
250
+ Returns:
251
+ Callable that will produce a layer stack when called with a valid function.
252
+ """
253
+ def iterate(f):
254
+ if with_state:
255
+ @functools.wraps(f)
256
+ def wrapped(x, *args):
257
+ for ys in args:
258
+ assert ys.shape[0] == num_layers
259
+ return _LayerStackWithState(
260
+ f, num_layers, unroll=unroll, name=name)(x, *args)
261
+ else:
262
+ _check_no_varargs(f)
263
+ @functools.wraps(f)
264
+ def wrapped(*args):
265
+ ret = _LayerStackNoState(
266
+ f, num_layers, unroll=unroll, name=name)(args, None)[0]
267
+ if len(args) == 1:
268
+ # If the function takes a single argument, we must also return a
269
+ # single value, and not a tuple of length 1.
270
+ ret = ret[0]
271
+ return ret
272
+
273
+ return wrapped
274
+ return iterate
af_backprop/alphafold/model/lddt.py ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2021 DeepMind Technologies Limited
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ """lDDT protein distance score."""
16
+ import jax.numpy as jnp
17
+
18
+
19
+ def lddt(predicted_points,
20
+ true_points,
21
+ true_points_mask,
22
+ cutoff=15.,
23
+ per_residue=False):
24
+ """Measure (approximate) lDDT for a batch of coordinates.
25
+
26
+ lDDT reference:
27
+ Mariani, V., Biasini, M., Barbato, A. & Schwede, T. lDDT: A local
28
+ superposition-free score for comparing protein structures and models using
29
+ distance difference tests. Bioinformatics 29, 2722–2728 (2013).
30
+
31
+ lDDT is a measure of the difference between the true distance matrix and the
32
+ distance matrix of the predicted points. The difference is computed only on
33
+ points closer than cutoff *in the true structure*.
34
+
35
+ This function does not compute the exact lDDT value that the original paper
36
+ describes because it does not include terms for physical feasibility
37
+ (e.g. bond length violations). Therefore this is only an approximate
38
+ lDDT score.
39
+
40
+ Args:
41
+ predicted_points: (batch, length, 3) array of predicted 3D points
42
+ true_points: (batch, length, 3) array of true 3D points
43
+ true_points_mask: (batch, length, 1) binary-valued float array. This mask
44
+ should be 1 for points that exist in the true points.
45
+ cutoff: Maximum distance for a pair of points to be included
46
+ per_residue: If true, return score for each residue. Note that the overall
47
+ lDDT is not exactly the mean of the per_residue lDDT's because some
48
+ residues have more contacts than others.
49
+
50
+ Returns:
51
+ An (approximate, see above) lDDT score in the range 0-1.
52
+ """
53
+
54
+ assert len(predicted_points.shape) == 3
55
+ assert predicted_points.shape[-1] == 3
56
+ assert true_points_mask.shape[-1] == 1
57
+ assert len(true_points_mask.shape) == 3
58
+
59
+ # Compute true and predicted distance matrices.
60
+ dmat_true = jnp.sqrt(1e-10 + jnp.sum(
61
+ (true_points[:, :, None] - true_points[:, None, :])**2, axis=-1))
62
+
63
+ dmat_predicted = jnp.sqrt(1e-10 + jnp.sum(
64
+ (predicted_points[:, :, None] -
65
+ predicted_points[:, None, :])**2, axis=-1))
66
+
67
+ dists_to_score = (
68
+ (dmat_true < cutoff).astype(jnp.float32) * true_points_mask *
69
+ jnp.transpose(true_points_mask, [0, 2, 1]) *
70
+ (1. - jnp.eye(dmat_true.shape[1])) # Exclude self-interaction.
71
+ )
72
+
73
+ # Shift unscored distances to be far away.
74
+ dist_l1 = jnp.abs(dmat_true - dmat_predicted)
75
+
76
+ # True lDDT uses a number of fixed bins.
77
+ # We ignore the physical plausibility correction to lDDT, though.
78
+ score = 0.25 * ((dist_l1 < 0.5).astype(jnp.float32) +
79
+ (dist_l1 < 1.0).astype(jnp.float32) +
80
+ (dist_l1 < 2.0).astype(jnp.float32) +
81
+ (dist_l1 < 4.0).astype(jnp.float32))
82
+
83
+ # Normalize over the appropriate axes.
84
+ reduce_axes = (-1,) if per_residue else (-2, -1)
85
+ norm = 1. / (1e-10 + jnp.sum(dists_to_score, axis=reduce_axes))
86
+ score = norm * (1e-10 + jnp.sum(dists_to_score * score, axis=reduce_axes))
87
+
88
+ return score
af_backprop/alphafold/model/mapping.py ADDED
@@ -0,0 +1,218 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2021 DeepMind Technologies Limited
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ """Specialized mapping functions."""
16
+
17
+ import functools
18
+
19
+ from typing import Any, Callable, Optional, Sequence, Union
20
+
21
+ import haiku as hk
22
+ import jax
23
+ import jax.numpy as jnp
24
+
25
+
26
+ PYTREE = Any
27
+ PYTREE_JAX_ARRAY = Any
28
+
29
+ partial = functools.partial
30
+ PROXY = object()
31
+
32
+
33
+ def _maybe_slice(array, i, slice_size, axis):
34
+ if axis is PROXY:
35
+ return array
36
+ else:
37
+ return jax.lax.dynamic_slice_in_dim(
38
+ array, i, slice_size=slice_size, axis=axis)
39
+
40
+
41
+ def _maybe_get_size(array, axis):
42
+ if axis == PROXY:
43
+ return -1
44
+ else:
45
+ return array.shape[axis]
46
+
47
+
48
+ def _expand_axes(axes, values, name='sharded_apply'):
49
+ values_tree_def = jax.tree_flatten(values)[1]
50
+ flat_axes = jax.api_util.flatten_axes(name, values_tree_def, axes)
51
+ # Replace None's with PROXY
52
+ flat_axes = [PROXY if x is None else x for x in flat_axes]
53
+ return jax.tree_unflatten(values_tree_def, flat_axes)
54
+
55
+
56
+ def sharded_map(
57
+ fun: Callable[..., PYTREE_JAX_ARRAY],
58
+ shard_size: Union[int, None] = 1,
59
+ in_axes: Union[int, PYTREE] = 0,
60
+ out_axes: Union[int, PYTREE] = 0) -> Callable[..., PYTREE_JAX_ARRAY]:
61
+ """Sharded vmap.
62
+
63
+ Maps `fun` over axes, in a way similar to vmap, but does so in shards of
64
+ `shard_size`. This allows a smooth trade-off between memory usage
65
+ (as in a plain map) vs higher throughput (as in a vmap).
66
+
67
+ Args:
68
+ fun: Function to apply smap transform to.
69
+ shard_size: Integer denoting shard size.
70
+ in_axes: Either integer or pytree describing which axis to map over for each
71
+ input to `fun`, None denotes broadcasting.
72
+ out_axes: integer or pytree denoting to what axis in the output the mapped
73
+ over axis maps.
74
+
75
+ Returns:
76
+ function with smap applied.
77
+ """
78
+ vmapped_fun = hk.vmap(fun, in_axes, out_axes)
79
+ return sharded_apply(vmapped_fun, shard_size, in_axes, out_axes)
80
+
81
+
82
+ def sharded_apply(
83
+ fun: Callable[..., PYTREE_JAX_ARRAY], # pylint: disable=g-bare-generic
84
+ shard_size: Union[int, None] = 1,
85
+ in_axes: Union[int, PYTREE] = 0,
86
+ out_axes: Union[int, PYTREE] = 0,
87
+ new_out_axes: bool = False) -> Callable[..., PYTREE_JAX_ARRAY]:
88
+ """Sharded apply.
89
+
90
+ Applies `fun` over shards to axes, in a way similar to vmap,
91
+ but does so in shards of `shard_size`. Shards are stacked after.
92
+ This allows a smooth trade-off between
93
+ memory usage (as in a plain map) vs higher throughput (as in a vmap).
94
+
95
+ Args:
96
+ fun: Function to apply smap transform to.
97
+ shard_size: Integer denoting shard size.
98
+ in_axes: Either integer or pytree describing which axis to map over for each
99
+ input to `fun`, None denotes broadcasting.
100
+ out_axes: integer or pytree denoting to what axis in the output the mapped
101
+ over axis maps.
102
+ new_out_axes: whether to stack outputs on new axes. This assumes that the
103
+ output sizes for each shard (including the possible remainder shard) are
104
+ the same.
105
+
106
+ Returns:
107
+ function with smap applied.
108
+ """
109
+ docstr = ('Mapped version of {fun}. Takes similar arguments to {fun} '
110
+ 'but with additional array axes over which {fun} is mapped.')
111
+ if new_out_axes:
112
+ raise NotImplementedError('New output axes not yet implemented.')
113
+
114
+ # shard size None denotes no sharding
115
+ if shard_size is None:
116
+ return fun
117
+
118
+ @jax.util.wraps(fun, docstr=docstr)
119
+ def mapped_fn(*args):
120
+ # Expand in axes and Determine Loop range
121
+ in_axes_ = _expand_axes(in_axes, args)
122
+
123
+ in_sizes = jax.tree_util.tree_map(_maybe_get_size, args, in_axes_)
124
+ flat_sizes = jax.tree_flatten(in_sizes)[0]
125
+ in_size = max(flat_sizes)
126
+ assert all(i in {in_size, -1} for i in flat_sizes)
127
+
128
+ num_extra_shards = (in_size - 1) // shard_size
129
+
130
+ # Fix Up if necessary
131
+ last_shard_size = in_size % shard_size
132
+ last_shard_size = shard_size if last_shard_size == 0 else last_shard_size
133
+
134
+ def apply_fun_to_slice(slice_start, slice_size):
135
+ input_slice = jax.tree_util.tree_map(
136
+ lambda array, axis: _maybe_slice(array, slice_start, slice_size, axis
137
+ ), args, in_axes_)
138
+ return fun(*input_slice)
139
+
140
+ remainder_shape_dtype = hk.eval_shape(
141
+ partial(apply_fun_to_slice, 0, last_shard_size))
142
+ out_dtypes = jax.tree_map(lambda x: x.dtype, remainder_shape_dtype)
143
+ out_shapes = jax.tree_map(lambda x: x.shape, remainder_shape_dtype)
144
+ out_axes_ = _expand_axes(out_axes, remainder_shape_dtype)
145
+
146
+ if num_extra_shards > 0:
147
+ regular_shard_shape_dtype = hk.eval_shape(
148
+ partial(apply_fun_to_slice, 0, shard_size))
149
+ shard_shapes = jax.tree_map(lambda x: x.shape, regular_shard_shape_dtype)
150
+
151
+ def make_output_shape(axis, shard_shape, remainder_shape):
152
+ return shard_shape[:axis] + (
153
+ shard_shape[axis] * num_extra_shards +
154
+ remainder_shape[axis],) + shard_shape[axis + 1:]
155
+
156
+ out_shapes = jax.tree_util.tree_map(make_output_shape, out_axes_, shard_shapes,
157
+ out_shapes)
158
+
159
+ # Calls dynamic Update slice with different argument order
160
+ # This is here since tree_multimap only works with positional arguments
161
+ def dynamic_update_slice_in_dim(full_array, update, axis, i):
162
+ return jax.lax.dynamic_update_slice_in_dim(full_array, update, i, axis)
163
+
164
+ def compute_shard(outputs, slice_start, slice_size):
165
+ slice_out = apply_fun_to_slice(slice_start, slice_size)
166
+ update_slice = partial(
167
+ dynamic_update_slice_in_dim, i=slice_start)
168
+ return jax.tree_util.tree_map(update_slice, outputs, slice_out, out_axes_)
169
+
170
+ def scan_iteration(outputs, i):
171
+ new_outputs = compute_shard(outputs, i, shard_size)
172
+ return new_outputs, ()
173
+
174
+ slice_starts = jnp.arange(0, in_size - shard_size + 1, shard_size)
175
+
176
+ def allocate_buffer(dtype, shape):
177
+ return jnp.zeros(shape, dtype=dtype)
178
+
179
+ outputs = jax.tree_util.tree_map(allocate_buffer, out_dtypes, out_shapes)
180
+
181
+ if slice_starts.shape[0] > 0:
182
+ outputs, _ = hk.scan(scan_iteration, outputs, slice_starts)
183
+
184
+ if last_shard_size != shard_size:
185
+ remainder_start = in_size - last_shard_size
186
+ outputs = compute_shard(outputs, remainder_start, last_shard_size)
187
+
188
+ return outputs
189
+
190
+ return mapped_fn
191
+
192
+
193
+ def inference_subbatch(
194
+ module: Callable[..., PYTREE_JAX_ARRAY],
195
+ subbatch_size: int,
196
+ batched_args: Sequence[PYTREE_JAX_ARRAY],
197
+ nonbatched_args: Sequence[PYTREE_JAX_ARRAY],
198
+ low_memory: bool = True,
199
+ input_subbatch_dim: int = 0,
200
+ output_subbatch_dim: Optional[int] = None) -> PYTREE_JAX_ARRAY:
201
+ """Run through subbatches (like batch apply but with split and concat)."""
202
+ assert len(batched_args) > 0 # pylint: disable=g-explicit-length-test
203
+
204
+ if not low_memory:
205
+ args = list(batched_args) + list(nonbatched_args)
206
+ return module(*args)
207
+
208
+ if output_subbatch_dim is None:
209
+ output_subbatch_dim = input_subbatch_dim
210
+
211
+ def run_module(*batched_args):
212
+ args = list(batched_args) + list(nonbatched_args)
213
+ return module(*args)
214
+ sharded_module = sharded_apply(run_module,
215
+ shard_size=subbatch_size,
216
+ in_axes=input_subbatch_dim,
217
+ out_axes=output_subbatch_dim)
218
+ return sharded_module(*batched_args)
af_backprop/alphafold/model/model.py ADDED
@@ -0,0 +1,145 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2021 DeepMind Technologies Limited
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ """Code for constructing the model."""
16
+ from typing import Any, Mapping, Optional, Union
17
+
18
+ from absl import logging
19
+ from alphafold.common import confidence
20
+ from alphafold.model import features
21
+ from alphafold.model import modules
22
+ import haiku as hk
23
+ import jax
24
+ import ml_collections
25
+ import numpy as np
26
+ import tensorflow.compat.v1 as tf
27
+ import tree
28
+
29
+
30
+ def get_confidence_metrics(
31
+ prediction_result: Mapping[str, Any]) -> Mapping[str, Any]:
32
+ """Post processes prediction_result to get confidence metrics."""
33
+
34
+ confidence_metrics = {}
35
+ confidence_metrics['plddt'] = confidence.compute_plddt(
36
+ prediction_result['predicted_lddt']['logits'])
37
+ if 'predicted_aligned_error' in prediction_result:
38
+ confidence_metrics.update(confidence.compute_predicted_aligned_error(
39
+ prediction_result['predicted_aligned_error']['logits'],
40
+ prediction_result['predicted_aligned_error']['breaks']))
41
+ confidence_metrics['ptm'] = confidence.predicted_tm_score(
42
+ prediction_result['predicted_aligned_error']['logits'],
43
+ prediction_result['predicted_aligned_error']['breaks'])
44
+
45
+ return confidence_metrics
46
+
47
+
48
+ class RunModel:
49
+ """Container for JAX model."""
50
+
51
+ def __init__(self,
52
+ config: ml_collections.ConfigDict,
53
+ params: Optional[Mapping[str, Mapping[str, np.ndarray]]] = None,
54
+ is_training=True,
55
+ return_representations=True):
56
+ self.config = config
57
+ self.params = params
58
+
59
+ def _forward_fn(batch):
60
+ model = modules.AlphaFold(self.config.model)
61
+ return model(
62
+ batch,
63
+ is_training=is_training,
64
+ compute_loss=False,
65
+ ensemble_representations=False,
66
+ return_representations=return_representations)
67
+
68
+ self.apply = jax.jit(hk.transform(_forward_fn).apply)
69
+ self.init = jax.jit(hk.transform(_forward_fn).init)
70
+
71
+ def init_params(self, feat: features.FeatureDict, random_seed: int = 0):
72
+ """Initializes the model parameters.
73
+
74
+ If none were provided when this class was instantiated then the parameters
75
+ are randomly initialized.
76
+
77
+ Args:
78
+ feat: A dictionary of NumPy feature arrays as output by
79
+ RunModel.process_features.
80
+ random_seed: A random seed to use to initialize the parameters if none
81
+ were set when this class was initialized.
82
+ """
83
+ if not self.params:
84
+ # Init params randomly.
85
+ rng = jax.random.PRNGKey(random_seed)
86
+ self.params = hk.data_structures.to_mutable_dict(
87
+ self.init(rng, feat))
88
+ logging.warning('Initialized parameters randomly')
89
+
90
+ def process_features(
91
+ self,
92
+ raw_features: Union[tf.train.Example, features.FeatureDict],
93
+ random_seed: int) -> features.FeatureDict:
94
+ """Processes features to prepare for feeding them into the model.
95
+
96
+ Args:
97
+ raw_features: The output of the data pipeline either as a dict of NumPy
98
+ arrays or as a tf.train.Example.
99
+ random_seed: The random seed to use when processing the features.
100
+
101
+ Returns:
102
+ A dict of NumPy feature arrays suitable for feeding into the model.
103
+ """
104
+ if isinstance(raw_features, dict):
105
+ return features.np_example_to_features(
106
+ np_example=raw_features,
107
+ config=self.config,
108
+ random_seed=random_seed)
109
+ else:
110
+ return features.tf_example_to_features(
111
+ tf_example=raw_features,
112
+ config=self.config,
113
+ random_seed=random_seed)
114
+
115
+ def eval_shape(self, feat: features.FeatureDict) -> jax.ShapeDtypeStruct:
116
+ self.init_params(feat)
117
+ logging.info('Running eval_shape with shape(feat) = %s',
118
+ tree.map_structure(lambda x: x.shape, feat))
119
+ shape = jax.eval_shape(self.apply, self.params, jax.random.PRNGKey(0), feat)
120
+ logging.info('Output shape was %s', shape)
121
+ return shape
122
+
123
+ def predict(self, feat: features.FeatureDict) -> Mapping[str, Any]:
124
+ """Makes a prediction by inferencing the model on the provided features.
125
+
126
+ Args:
127
+ feat: A dictionary of NumPy feature arrays as output by
128
+ RunModel.process_features.
129
+
130
+ Returns:
131
+ A dictionary of model outputs.
132
+ """
133
+ self.init_params(feat)
134
+ logging.info('Running predict with shape(feat) = %s',
135
+ tree.map_structure(lambda x: x.shape, feat))
136
+ result = self.apply(self.params, jax.random.PRNGKey(0), feat)
137
+ # This block is to ensure benchmark timings are accurate. Some blocking is
138
+ # already happening when computing get_confidence_metrics, and this ensures
139
+ # all outputs are blocked on.
140
+ jax.tree_map(lambda x: x.block_until_ready(), result)
141
+ if self.config.use_struct:
142
+ result.update(get_confidence_metrics(result))
143
+ logging.info('Output shape was %s',
144
+ tree.map_structure(lambda x: x.shape, result))
145
+ return result
af_backprop/alphafold/model/modules.py ADDED
@@ -0,0 +1,2164 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2021 DeepMind Technologies Limited
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ """Modules and code used in the core part of AlphaFold.
16
+
17
+ The structure generation code is in 'folding.py'.
18
+ """
19
+ import functools
20
+ from alphafold.common import residue_constants
21
+ from alphafold.model import all_atom
22
+ from alphafold.model import common_modules
23
+ from alphafold.model import folding
24
+ from alphafold.model import layer_stack
25
+ from alphafold.model import lddt
26
+ from alphafold.model import mapping
27
+ from alphafold.model import prng
28
+ from alphafold.model import quat_affine
29
+ from alphafold.model import utils
30
+ import haiku as hk
31
+ import jax
32
+ import jax.numpy as jnp
33
+
34
+ from alphafold.model.r3 import Rigids, Rots, Vecs
35
+
36
+
37
+ def softmax_cross_entropy(logits, labels):
38
+ """Computes softmax cross entropy given logits and one-hot class labels."""
39
+ loss = -jnp.sum(labels * jax.nn.log_softmax(logits), axis=-1)
40
+ return jnp.asarray(loss)
41
+
42
+
43
+ def sigmoid_cross_entropy(logits, labels):
44
+ """Computes sigmoid cross entropy given logits and multiple class labels."""
45
+ log_p = jax.nn.log_sigmoid(logits)
46
+ # log(1 - sigmoid(x)) = log_sigmoid(-x), the latter is more numerically stable
47
+ log_not_p = jax.nn.log_sigmoid(-logits)
48
+ loss = -labels * log_p - (1. - labels) * log_not_p
49
+ return jnp.asarray(loss)
50
+
51
+
52
+ def apply_dropout(*, tensor, safe_key, rate, is_training, broadcast_dim=None):
53
+ """Applies dropout to a tensor."""
54
+ if is_training: # and rate != 0.0:
55
+ shape = list(tensor.shape)
56
+ if broadcast_dim is not None:
57
+ shape[broadcast_dim] = 1
58
+ keep_rate = 1.0 - rate
59
+ keep = jax.random.bernoulli(safe_key.get(), keep_rate, shape=shape)
60
+ return keep * tensor / keep_rate
61
+ else:
62
+ return tensor
63
+
64
+
65
+ def dropout_wrapper(module,
66
+ input_act,
67
+ mask,
68
+ safe_key,
69
+ global_config,
70
+ output_act=None,
71
+ is_training=True,
72
+ scale_rate=1.0,
73
+ **kwargs):
74
+ """Applies module + dropout + residual update."""
75
+ if output_act is None:
76
+ output_act = input_act
77
+
78
+ gc = global_config
79
+ residual = module(input_act, mask, is_training=is_training, **kwargs)
80
+ dropout_rate = 0.0 if gc.deterministic else module.config.dropout_rate
81
+
82
+ if module.config.shared_dropout:
83
+ if module.config.orientation == 'per_row':
84
+ broadcast_dim = 0
85
+ else:
86
+ broadcast_dim = 1
87
+ else:
88
+ broadcast_dim = None
89
+
90
+ residual = apply_dropout(tensor=residual,
91
+ safe_key=safe_key,
92
+ rate=dropout_rate * scale_rate,
93
+ is_training=is_training,
94
+ broadcast_dim=broadcast_dim)
95
+
96
+ new_act = output_act + residual
97
+
98
+ return new_act
99
+
100
+
101
+ def create_extra_msa_feature(batch):
102
+ """Expand extra_msa into 1hot and concat with other extra msa features.
103
+
104
+ We do this as late as possible as the one_hot extra msa can be very large.
105
+
106
+ Arguments:
107
+ batch: a dictionary with the following keys:
108
+ * 'extra_msa': [N_extra_seq, N_res] MSA that wasn't selected as a cluster
109
+ centre. Note, that this is not one-hot encoded.
110
+ * 'extra_has_deletion': [N_extra_seq, N_res] Whether there is a deletion to
111
+ the left of each position in the extra MSA.
112
+ * 'extra_deletion_value': [N_extra_seq, N_res] The number of deletions to
113
+ the left of each position in the extra MSA.
114
+
115
+ Returns:
116
+ Concatenated tensor of extra MSA features.
117
+ """
118
+ # 23 = 20 amino acids + 'X' for unknown + gap + bert mask
119
+ msa_1hot = jax.nn.one_hot(batch['extra_msa'], 23)
120
+ msa_feat = [msa_1hot,
121
+ jnp.expand_dims(batch['extra_has_deletion'], axis=-1),
122
+ jnp.expand_dims(batch['extra_deletion_value'], axis=-1)]
123
+ return jnp.concatenate(msa_feat, axis=-1)
124
+
125
+
126
+ class AlphaFoldIteration(hk.Module):
127
+ """A single recycling iteration of AlphaFold architecture.
128
+
129
+ Computes ensembled (averaged) representations from the provided features.
130
+ These representations are then passed to the various heads
131
+ that have been requested by the configuration file. Each head also returns a
132
+ loss which is combined as a weighted sum to produce the total loss.
133
+
134
+ Jumper et al. (2021) Suppl. Alg. 2 "Inference" lines 3-22
135
+ """
136
+
137
+ def __init__(self, config, global_config, name='alphafold_iteration'):
138
+ super().__init__(name=name)
139
+ self.config = config
140
+ self.global_config = global_config
141
+
142
+ def __call__(self,
143
+ ensembled_batch,
144
+ non_ensembled_batch,
145
+ is_training,
146
+ compute_loss=False,
147
+ ensemble_representations=False,
148
+ return_representations=False):
149
+
150
+ num_ensemble = jnp.asarray(ensembled_batch['seq_length'].shape[0])
151
+
152
+ if not ensemble_representations:
153
+ assert ensembled_batch['seq_length'].shape[0] == 1
154
+
155
+ def slice_batch(i):
156
+ b = {k: v[i] for k, v in ensembled_batch.items()}
157
+ b.update(non_ensembled_batch)
158
+ return b
159
+
160
+ # Compute representations for each batch element and average.
161
+ evoformer_module = EmbeddingsAndEvoformer(
162
+ self.config.embeddings_and_evoformer, self.global_config)
163
+ batch0 = slice_batch(0)
164
+ representations = evoformer_module(batch0, is_training)
165
+
166
+ # MSA representations are not ensembled so
167
+ # we don't pass tensor into the loop.
168
+ msa_representation = representations['msa']
169
+ del representations['msa']
170
+
171
+ # Average the representations (except MSA) over the batch dimension.
172
+ if ensemble_representations:
173
+ def body(x):
174
+ """Add one element to the representations ensemble."""
175
+ i, current_representations = x
176
+ feats = slice_batch(i)
177
+ representations_update = evoformer_module(
178
+ feats, is_training)
179
+
180
+ new_representations = {}
181
+ for k in current_representations:
182
+ new_representations[k] = (
183
+ current_representations[k] + representations_update[k])
184
+ return i+1, new_representations
185
+
186
+ if hk.running_init():
187
+ # When initializing the Haiku module, run one iteration of the
188
+ # while_loop to initialize the Haiku modules used in `body`.
189
+ _, representations = body((1, representations))
190
+ else:
191
+ _, representations = hk.while_loop(
192
+ lambda x: x[0] < num_ensemble,
193
+ body,
194
+ (1, representations))
195
+
196
+ for k in representations:
197
+ if k != 'msa':
198
+ representations[k] /= num_ensemble.astype(representations[k].dtype)
199
+
200
+ representations['msa'] = msa_representation
201
+ batch = batch0 # We are not ensembled from here on.
202
+
203
+ if jnp.issubdtype(ensembled_batch['aatype'].dtype, jnp.integer):
204
+ _, num_residues = ensembled_batch['aatype'].shape
205
+ else:
206
+ _, num_residues, _ = ensembled_batch['aatype'].shape
207
+
208
+ if self.config.use_struct:
209
+ struct_module = folding.StructureModule
210
+ else:
211
+ struct_module = folding.dummy
212
+
213
+ heads = {}
214
+ for head_name, head_config in sorted(self.config.heads.items()):
215
+ if not head_config.weight:
216
+ continue # Do not instantiate zero-weight heads.
217
+ head_factory = {
218
+ 'masked_msa': MaskedMsaHead,
219
+ 'distogram': DistogramHead,
220
+ 'structure_module': functools.partial(struct_module, compute_loss=compute_loss),
221
+ 'predicted_lddt': PredictedLDDTHead,
222
+ 'predicted_aligned_error': PredictedAlignedErrorHead,
223
+ 'experimentally_resolved': ExperimentallyResolvedHead,
224
+ }[head_name]
225
+ heads[head_name] = (head_config,
226
+ head_factory(head_config, self.global_config))
227
+
228
+ total_loss = 0.
229
+ ret = {}
230
+ ret['representations'] = representations
231
+
232
+ def loss(module, head_config, ret, name, filter_ret=True):
233
+ if filter_ret:
234
+ value = ret[name]
235
+ else:
236
+ value = ret
237
+ loss_output = module.loss(value, batch)
238
+ ret[name].update(loss_output)
239
+ loss = head_config.weight * ret[name]['loss']
240
+ return loss
241
+
242
+ for name, (head_config, module) in heads.items():
243
+ # Skip PredictedLDDTHead and PredictedAlignedErrorHead until
244
+ # StructureModule is executed.
245
+ if name in ('predicted_lddt', 'predicted_aligned_error'):
246
+ continue
247
+ else:
248
+ ret[name] = module(representations, batch, is_training)
249
+ if 'representations' in ret[name]:
250
+ # Extra representations from the head. Used by the structure module
251
+ # to provide activations for the PredictedLDDTHead.
252
+ representations.update(ret[name].pop('representations'))
253
+ if compute_loss:
254
+ total_loss += loss(module, head_config, ret, name)
255
+
256
+ if self.config.use_struct:
257
+ if self.config.heads.get('predicted_lddt.weight', 0.0):
258
+ # Add PredictedLDDTHead after StructureModule executes.
259
+ name = 'predicted_lddt'
260
+ # Feed all previous results to give access to structure_module result.
261
+ head_config, module = heads[name]
262
+ ret[name] = module(representations, batch, is_training)
263
+ if compute_loss:
264
+ total_loss += loss(module, head_config, ret, name, filter_ret=False)
265
+
266
+ if ('predicted_aligned_error' in self.config.heads
267
+ and self.config.heads.get('predicted_aligned_error.weight', 0.0)):
268
+ # Add PredictedAlignedErrorHead after StructureModule executes.
269
+ name = 'predicted_aligned_error'
270
+ # Feed all previous results to give access to structure_module result.
271
+ head_config, module = heads[name]
272
+ ret[name] = module(representations, batch, is_training)
273
+ if compute_loss:
274
+ total_loss += loss(module, head_config, ret, name, filter_ret=False)
275
+
276
+ if compute_loss:
277
+ return ret, total_loss
278
+ else:
279
+ return ret
280
+
281
+ class AlphaFold(hk.Module):
282
+ """AlphaFold model with recycling.
283
+
284
+ Jumper et al. (2021) Suppl. Alg. 2 "Inference"
285
+ """
286
+
287
+ def __init__(self, config, name='alphafold'):
288
+ super().__init__(name=name)
289
+ self.config = config
290
+ self.global_config = config.global_config
291
+
292
+ def __call__(
293
+ self,
294
+ batch,
295
+ is_training,
296
+ compute_loss=False,
297
+ ensemble_representations=False,
298
+ return_representations=False):
299
+ """Run the AlphaFold model.
300
+
301
+ Arguments:
302
+ batch: Dictionary with inputs to the AlphaFold model.
303
+ is_training: Whether the system is in training or inference mode.
304
+ compute_loss: Whether to compute losses (requires extra features
305
+ to be present in the batch and knowing the true structure).
306
+ ensemble_representations: Whether to use ensembling of representations.
307
+ return_representations: Whether to also return the intermediate
308
+ representations.
309
+
310
+ Returns:
311
+ When compute_loss is True:
312
+ a tuple of loss and output of AlphaFoldIteration.
313
+ When compute_loss is False:
314
+ just output of AlphaFoldIteration.
315
+
316
+ The output of AlphaFoldIteration is a nested dictionary containing
317
+ predictions from the various heads.
318
+ """
319
+ if "scale_rate" not in batch:
320
+ batch["scale_rate"] = jnp.ones((1,))
321
+ impl = AlphaFoldIteration(self.config, self.global_config)
322
+ if jnp.issubdtype(batch['aatype'].dtype, jnp.integer):
323
+ batch_size, num_residues = batch['aatype'].shape
324
+ else:
325
+ batch_size, num_residues, _ = batch['aatype'].shape
326
+
327
+ def get_prev(ret):
328
+ new_prev = {
329
+ 'prev_msa_first_row': ret['representations']['msa_first_row'],
330
+ 'prev_pair': ret['representations']['pair'],
331
+ 'prev_dgram': ret["distogram"]["logits"],
332
+ }
333
+ if self.config.use_struct:
334
+ new_prev.update({'prev_pos': ret['structure_module']['final_atom_positions'],
335
+ 'prev_plddt': ret["predicted_lddt"]["logits"]})
336
+
337
+ if "predicted_aligned_error" in ret:
338
+ new_prev["prev_pae"] = ret["predicted_aligned_error"]["logits"]
339
+
340
+ if not self.config.backprop_recycle:
341
+ for k in ["prev_pos","prev_msa_first_row","prev_pair"]:
342
+ if k in new_prev:
343
+ new_prev[k] = jax.lax.stop_gradient(new_prev[k])
344
+
345
+ return new_prev
346
+
347
+ def do_call(prev,
348
+ recycle_idx,
349
+ compute_loss=compute_loss):
350
+ if self.config.resample_msa_in_recycling:
351
+ num_ensemble = batch_size // (self.config.num_recycle + 1)
352
+ def slice_recycle_idx(x):
353
+ start = recycle_idx * num_ensemble
354
+ size = num_ensemble
355
+ return jax.lax.dynamic_slice_in_dim(x, start, size, axis=0)
356
+ ensembled_batch = jax.tree_map(slice_recycle_idx, batch)
357
+ else:
358
+ num_ensemble = batch_size
359
+ ensembled_batch = batch
360
+ non_ensembled_batch = jax.tree_map(lambda x: x, prev)
361
+
362
+ return impl(ensembled_batch=ensembled_batch,
363
+ non_ensembled_batch=non_ensembled_batch,
364
+ is_training=is_training,
365
+ compute_loss=compute_loss,
366
+ ensemble_representations=ensemble_representations)
367
+
368
+
369
+ emb_config = self.config.embeddings_and_evoformer
370
+ prev = {
371
+ 'prev_msa_first_row': jnp.zeros([num_residues, emb_config.msa_channel]),
372
+ 'prev_pair': jnp.zeros([num_residues, num_residues, emb_config.pair_channel]),
373
+ 'prev_dgram': jnp.zeros([num_residues, num_residues, 64]),
374
+ }
375
+ if self.config.use_struct:
376
+ prev.update({'prev_pos': jnp.zeros([num_residues, residue_constants.atom_type_num, 3]),
377
+ 'prev_plddt': jnp.zeros([num_residues, 50]),
378
+ 'prev_pae': jnp.zeros([num_residues, num_residues, 64])})
379
+
380
+ for k in ["pos","msa_first_row","pair","dgram"]:
381
+ if f"init_{k}" in batch: prev[f"prev_{k}"] = batch[f"init_{k}"][0]
382
+
383
+ if self.config.num_recycle:
384
+ if 'num_iter_recycling' in batch:
385
+ # Training time: num_iter_recycling is in batch.
386
+ # The value for each ensemble batch is the same, so arbitrarily taking
387
+ # 0-th.
388
+ num_iter = batch['num_iter_recycling'][0]
389
+
390
+ # Add insurance that we will not run more
391
+ # recyclings than the model is configured to run.
392
+ num_iter = jnp.minimum(num_iter, self.config.num_recycle)
393
+ else:
394
+ # Eval mode or tests: use the maximum number of iterations.
395
+ num_iter = self.config.num_recycle
396
+
397
+ def add_prev(p,p_):
398
+ p_["prev_dgram"] += p["prev_dgram"]
399
+ if self.config.use_struct:
400
+ p_["prev_plddt"] += p["prev_plddt"]
401
+ p_["prev_pae"] += p["prev_pae"]
402
+ return p_
403
+
404
+ ##############################################################
405
+ def body(p, i):
406
+ p_ = get_prev(do_call(p, recycle_idx=i, compute_loss=False))
407
+ if self.config.add_prev:
408
+ p_ = add_prev(p, p_)
409
+ return p_, None
410
+ if hk.running_init():
411
+ prev,_ = body(prev, 0)
412
+ else:
413
+ prev,_ = hk.scan(body, prev, jnp.arange(num_iter))
414
+ ##############################################################
415
+
416
+ else:
417
+ num_iter = 0
418
+
419
+ ret = do_call(prev=prev, recycle_idx=num_iter)
420
+ if self.config.add_prev:
421
+ prev_ = get_prev(ret)
422
+ if compute_loss:
423
+ ret = ret[0], [ret[1]]
424
+
425
+ if not return_representations:
426
+ del (ret[0] if compute_loss else ret)['representations'] # pytype: disable=unsupported-operands
427
+
428
+ if self.config.add_prev and num_iter > 0:
429
+ prev_ = add_prev(prev, prev_)
430
+ ret["distogram"]["logits"] = prev_["prev_dgram"]/(num_iter+1)
431
+ if self.config.use_struct:
432
+ ret["predicted_lddt"]["logits"] = prev_["prev_plddt"]/(num_iter+1)
433
+ if "predicted_aligned_error" in ret:
434
+ ret["predicted_aligned_error"]["logits"] = prev_["prev_pae"]/(num_iter+1)
435
+
436
+ return ret
437
+
438
+ class TemplatePairStack(hk.Module):
439
+ """Pair stack for the templates.
440
+
441
+ Jumper et al. (2021) Suppl. Alg. 16 "TemplatePairStack"
442
+ """
443
+
444
+ def __init__(self, config, global_config, name='template_pair_stack'):
445
+ super().__init__(name=name)
446
+ self.config = config
447
+ self.global_config = global_config
448
+
449
+ def __call__(self, pair_act, pair_mask, is_training, safe_key=None, scale_rate=1.0):
450
+ """Builds TemplatePairStack module.
451
+
452
+ Arguments:
453
+ pair_act: Pair activations for single template, shape [N_res, N_res, c_t].
454
+ pair_mask: Pair mask, shape [N_res, N_res].
455
+ is_training: Whether the module is in training mode.
456
+ safe_key: Safe key object encapsulating the random number generation key.
457
+
458
+ Returns:
459
+ Updated pair_act, shape [N_res, N_res, c_t].
460
+ """
461
+
462
+ if safe_key is None:
463
+ safe_key = prng.SafeKey(hk.next_rng_key())
464
+
465
+ gc = self.global_config
466
+ c = self.config
467
+
468
+ if not c.num_block:
469
+ return pair_act
470
+
471
+ def block(x):
472
+ """One block of the template pair stack."""
473
+ pair_act, safe_key = x
474
+
475
+ dropout_wrapper_fn = functools.partial(
476
+ dropout_wrapper, is_training=is_training, global_config=gc, scale_rate=scale_rate)
477
+
478
+ safe_key, *sub_keys = safe_key.split(6)
479
+ sub_keys = iter(sub_keys)
480
+
481
+ pair_act = dropout_wrapper_fn(
482
+ TriangleAttention(c.triangle_attention_starting_node, gc,
483
+ name='triangle_attention_starting_node'),
484
+ pair_act,
485
+ pair_mask,
486
+ next(sub_keys))
487
+ pair_act = dropout_wrapper_fn(
488
+ TriangleAttention(c.triangle_attention_ending_node, gc,
489
+ name='triangle_attention_ending_node'),
490
+ pair_act,
491
+ pair_mask,
492
+ next(sub_keys))
493
+ pair_act = dropout_wrapper_fn(
494
+ TriangleMultiplication(c.triangle_multiplication_outgoing, gc,
495
+ name='triangle_multiplication_outgoing'),
496
+ pair_act,
497
+ pair_mask,
498
+ next(sub_keys))
499
+ pair_act = dropout_wrapper_fn(
500
+ TriangleMultiplication(c.triangle_multiplication_incoming, gc,
501
+ name='triangle_multiplication_incoming'),
502
+ pair_act,
503
+ pair_mask,
504
+ next(sub_keys))
505
+ pair_act = dropout_wrapper_fn(
506
+ Transition(c.pair_transition, gc, name='pair_transition'),
507
+ pair_act,
508
+ pair_mask,
509
+ next(sub_keys))
510
+
511
+ return pair_act, safe_key
512
+
513
+ if gc.use_remat:
514
+ block = hk.remat(block)
515
+
516
+ res_stack = layer_stack.layer_stack(c.num_block)(block)
517
+ pair_act, safe_key = res_stack((pair_act, safe_key))
518
+ return pair_act
519
+
520
+
521
+ class Transition(hk.Module):
522
+ """Transition layer.
523
+
524
+ Jumper et al. (2021) Suppl. Alg. 9 "MSATransition"
525
+ Jumper et al. (2021) Suppl. Alg. 15 "PairTransition"
526
+ """
527
+
528
+ def __init__(self, config, global_config, name='transition_block'):
529
+ super().__init__(name=name)
530
+ self.config = config
531
+ self.global_config = global_config
532
+
533
+ def __call__(self, act, mask, is_training=True):
534
+ """Builds Transition module.
535
+
536
+ Arguments:
537
+ act: A tensor of queries of size [batch_size, N_res, N_channel].
538
+ mask: A tensor denoting the mask of size [batch_size, N_res].
539
+ is_training: Whether the module is in training mode.
540
+
541
+ Returns:
542
+ A float32 tensor of size [batch_size, N_res, N_channel].
543
+ """
544
+ _, _, nc = act.shape
545
+
546
+ num_intermediate = int(nc * self.config.num_intermediate_factor)
547
+ mask = jnp.expand_dims(mask, axis=-1)
548
+
549
+ act = hk.LayerNorm(
550
+ axis=[-1],
551
+ create_scale=True,
552
+ create_offset=True,
553
+ name='input_layer_norm')(
554
+ act)
555
+
556
+ transition_module = hk.Sequential([
557
+ common_modules.Linear(
558
+ num_intermediate,
559
+ initializer='relu',
560
+ name='transition1'), jax.nn.relu,
561
+ common_modules.Linear(
562
+ nc,
563
+ initializer=utils.final_init(self.global_config),
564
+ name='transition2')
565
+ ])
566
+
567
+ act = mapping.inference_subbatch(
568
+ transition_module,
569
+ self.global_config.subbatch_size,
570
+ batched_args=[act],
571
+ nonbatched_args=[],
572
+ low_memory=not is_training)
573
+
574
+ return act
575
+
576
+
577
+ def glorot_uniform():
578
+ return hk.initializers.VarianceScaling(scale=1.0,
579
+ mode='fan_avg',
580
+ distribution='uniform')
581
+
582
+
583
+ class Attention(hk.Module):
584
+ """Multihead attention."""
585
+
586
+ def __init__(self, config, global_config, output_dim, name='attention'):
587
+ super().__init__(name=name)
588
+
589
+ self.config = config
590
+ self.global_config = global_config
591
+ self.output_dim = output_dim
592
+
593
+ def __call__(self, q_data, m_data, bias, nonbatched_bias=None):
594
+ """Builds Attention module.
595
+
596
+ Arguments:
597
+ q_data: A tensor of queries, shape [batch_size, N_queries, q_channels].
598
+ m_data: A tensor of memories from which the keys and values are
599
+ projected, shape [batch_size, N_keys, m_channels].
600
+ bias: A bias for the attention, shape [batch_size, N_queries, N_keys].
601
+ nonbatched_bias: Shared bias, shape [N_queries, N_keys].
602
+
603
+ Returns:
604
+ A float32 tensor of shape [batch_size, N_queries, output_dim].
605
+ """
606
+ # Sensible default for when the config keys are missing
607
+ key_dim = self.config.get('key_dim', int(q_data.shape[-1]))
608
+ value_dim = self.config.get('value_dim', int(m_data.shape[-1]))
609
+ num_head = self.config.num_head
610
+ assert key_dim % num_head == 0
611
+ assert value_dim % num_head == 0
612
+ key_dim = key_dim // num_head
613
+ value_dim = value_dim // num_head
614
+
615
+ q_weights = hk.get_parameter(
616
+ 'query_w', shape=(q_data.shape[-1], num_head, key_dim),
617
+ init=glorot_uniform())
618
+ k_weights = hk.get_parameter(
619
+ 'key_w', shape=(m_data.shape[-1], num_head, key_dim),
620
+ init=glorot_uniform())
621
+ v_weights = hk.get_parameter(
622
+ 'value_w', shape=(m_data.shape[-1], num_head, value_dim),
623
+ init=glorot_uniform())
624
+
625
+ q = jnp.einsum('bqa,ahc->bqhc', q_data, q_weights) * key_dim**(-0.5)
626
+ k = jnp.einsum('bka,ahc->bkhc', m_data, k_weights)
627
+ v = jnp.einsum('bka,ahc->bkhc', m_data, v_weights)
628
+ logits = jnp.einsum('bqhc,bkhc->bhqk', q, k) + bias
629
+ if nonbatched_bias is not None:
630
+ logits += jnp.expand_dims(nonbatched_bias, axis=0)
631
+ weights = jax.nn.softmax(logits)
632
+ weighted_avg = jnp.einsum('bhqk,bkhc->bqhc', weights, v)
633
+
634
+ if self.global_config.zero_init:
635
+ init = hk.initializers.Constant(0.0)
636
+ else:
637
+ init = glorot_uniform()
638
+
639
+ if self.config.gating:
640
+ gating_weights = hk.get_parameter(
641
+ 'gating_w',
642
+ shape=(q_data.shape[-1], num_head, value_dim),
643
+ init=hk.initializers.Constant(0.0))
644
+ gating_bias = hk.get_parameter(
645
+ 'gating_b',
646
+ shape=(num_head, value_dim),
647
+ init=hk.initializers.Constant(1.0))
648
+
649
+ gate_values = jnp.einsum('bqc, chv->bqhv', q_data,
650
+ gating_weights) + gating_bias
651
+
652
+ gate_values = jax.nn.sigmoid(gate_values)
653
+
654
+ weighted_avg *= gate_values
655
+
656
+ o_weights = hk.get_parameter(
657
+ 'output_w', shape=(num_head, value_dim, self.output_dim),
658
+ init=init)
659
+ o_bias = hk.get_parameter('output_b', shape=(self.output_dim,),
660
+ init=hk.initializers.Constant(0.0))
661
+
662
+ output = jnp.einsum('bqhc,hco->bqo', weighted_avg, o_weights) + o_bias
663
+
664
+ return output
665
+
666
+
667
+ class GlobalAttention(hk.Module):
668
+ """Global attention.
669
+
670
+ Jumper et al. (2021) Suppl. Alg. 19 "MSAColumnGlobalAttention" lines 2-7
671
+ """
672
+
673
+ def __init__(self, config, global_config, output_dim, name='attention'):
674
+ super().__init__(name=name)
675
+
676
+ self.config = config
677
+ self.global_config = global_config
678
+ self.output_dim = output_dim
679
+
680
+ def __call__(self, q_data, m_data, q_mask, bias):
681
+ """Builds GlobalAttention module.
682
+
683
+ Arguments:
684
+ q_data: A tensor of queries with size [batch_size, N_queries,
685
+ q_channels]
686
+ m_data: A tensor of memories from which the keys and values
687
+ projected. Size [batch_size, N_keys, m_channels]
688
+ q_mask: A binary mask for q_data with zeros in the padded sequence
689
+ elements and ones otherwise. Size [batch_size, N_queries, q_channels]
690
+ (or broadcastable to this shape).
691
+ bias: A bias for the attention.
692
+
693
+ Returns:
694
+ A float32 tensor of size [batch_size, N_queries, output_dim].
695
+ """
696
+ # Sensible default for when the config keys are missing
697
+ key_dim = self.config.get('key_dim', int(q_data.shape[-1]))
698
+ value_dim = self.config.get('value_dim', int(m_data.shape[-1]))
699
+ num_head = self.config.num_head
700
+ assert key_dim % num_head == 0
701
+ assert value_dim % num_head == 0
702
+ key_dim = key_dim // num_head
703
+ value_dim = value_dim // num_head
704
+
705
+ q_weights = hk.get_parameter(
706
+ 'query_w', shape=(q_data.shape[-1], num_head, key_dim),
707
+ init=glorot_uniform())
708
+ k_weights = hk.get_parameter(
709
+ 'key_w', shape=(m_data.shape[-1], key_dim),
710
+ init=glorot_uniform())
711
+ v_weights = hk.get_parameter(
712
+ 'value_w', shape=(m_data.shape[-1], value_dim),
713
+ init=glorot_uniform())
714
+
715
+ v = jnp.einsum('bka,ac->bkc', m_data, v_weights)
716
+
717
+ q_avg = utils.mask_mean(q_mask, q_data, axis=1)
718
+
719
+ q = jnp.einsum('ba,ahc->bhc', q_avg, q_weights) * key_dim**(-0.5)
720
+ k = jnp.einsum('bka,ac->bkc', m_data, k_weights)
721
+ bias = (1e9 * (q_mask[:, None, :, 0] - 1.))
722
+ logits = jnp.einsum('bhc,bkc->bhk', q, k) + bias
723
+ weights = jax.nn.softmax(logits)
724
+ weighted_avg = jnp.einsum('bhk,bkc->bhc', weights, v)
725
+
726
+ if self.global_config.zero_init:
727
+ init = hk.initializers.Constant(0.0)
728
+ else:
729
+ init = glorot_uniform()
730
+
731
+ o_weights = hk.get_parameter(
732
+ 'output_w', shape=(num_head, value_dim, self.output_dim),
733
+ init=init)
734
+ o_bias = hk.get_parameter('output_b', shape=(self.output_dim,),
735
+ init=hk.initializers.Constant(0.0))
736
+
737
+ if self.config.gating:
738
+ gating_weights = hk.get_parameter(
739
+ 'gating_w',
740
+ shape=(q_data.shape[-1], num_head, value_dim),
741
+ init=hk.initializers.Constant(0.0))
742
+ gating_bias = hk.get_parameter(
743
+ 'gating_b',
744
+ shape=(num_head, value_dim),
745
+ init=hk.initializers.Constant(1.0))
746
+
747
+ gate_values = jnp.einsum('bqc, chv->bqhv', q_data, gating_weights)
748
+ gate_values = jax.nn.sigmoid(gate_values + gating_bias)
749
+ weighted_avg = weighted_avg[:, None] * gate_values
750
+ output = jnp.einsum('bqhc,hco->bqo', weighted_avg, o_weights) + o_bias
751
+ else:
752
+ output = jnp.einsum('bhc,hco->bo', weighted_avg, o_weights) + o_bias
753
+ output = output[:, None]
754
+ return output
755
+
756
+
757
+ class MSARowAttentionWithPairBias(hk.Module):
758
+ """MSA per-row attention biased by the pair representation.
759
+
760
+ Jumper et al. (2021) Suppl. Alg. 7 "MSARowAttentionWithPairBias"
761
+ """
762
+
763
+ def __init__(self, config, global_config,
764
+ name='msa_row_attention_with_pair_bias'):
765
+ super().__init__(name=name)
766
+ self.config = config
767
+ self.global_config = global_config
768
+
769
+ def __call__(self,
770
+ msa_act,
771
+ msa_mask,
772
+ pair_act,
773
+ is_training=False):
774
+ """Builds MSARowAttentionWithPairBias module.
775
+
776
+ Arguments:
777
+ msa_act: [N_seq, N_res, c_m] MSA representation.
778
+ msa_mask: [N_seq, N_res] mask of non-padded regions.
779
+ pair_act: [N_res, N_res, c_z] pair representation.
780
+ is_training: Whether the module is in training mode.
781
+
782
+ Returns:
783
+ Update to msa_act, shape [N_seq, N_res, c_m].
784
+ """
785
+ c = self.config
786
+
787
+ assert len(msa_act.shape) == 3
788
+ assert len(msa_mask.shape) == 2
789
+ assert c.orientation == 'per_row'
790
+
791
+ bias = (1e9 * (msa_mask - 1.))[:, None, None, :]
792
+ assert len(bias.shape) == 4
793
+
794
+ msa_act = hk.LayerNorm(
795
+ axis=[-1], create_scale=True, create_offset=True, name='query_norm')(
796
+ msa_act)
797
+
798
+ pair_act = hk.LayerNorm(
799
+ axis=[-1],
800
+ create_scale=True,
801
+ create_offset=True,
802
+ name='feat_2d_norm')(
803
+ pair_act)
804
+
805
+ init_factor = 1. / jnp.sqrt(int(pair_act.shape[-1]))
806
+ weights = hk.get_parameter(
807
+ 'feat_2d_weights',
808
+ shape=(pair_act.shape[-1], c.num_head),
809
+ init=hk.initializers.RandomNormal(stddev=init_factor))
810
+ nonbatched_bias = jnp.einsum('qkc,ch->hqk', pair_act, weights)
811
+
812
+ attn_mod = Attention(
813
+ c, self.global_config, msa_act.shape[-1])
814
+ msa_act = mapping.inference_subbatch(
815
+ attn_mod,
816
+ self.global_config.subbatch_size,
817
+ batched_args=[msa_act, msa_act, bias],
818
+ nonbatched_args=[nonbatched_bias],
819
+ low_memory=not is_training)
820
+
821
+ return msa_act
822
+
823
+
824
+ class MSAColumnAttention(hk.Module):
825
+ """MSA per-column attention.
826
+
827
+ Jumper et al. (2021) Suppl. Alg. 8 "MSAColumnAttention"
828
+ """
829
+
830
+ def __init__(self, config, global_config, name='msa_column_attention'):
831
+ super().__init__(name=name)
832
+ self.config = config
833
+ self.global_config = global_config
834
+
835
+ def __call__(self,
836
+ msa_act,
837
+ msa_mask,
838
+ is_training=False):
839
+ """Builds MSAColumnAttention module.
840
+
841
+ Arguments:
842
+ msa_act: [N_seq, N_res, c_m] MSA representation.
843
+ msa_mask: [N_seq, N_res] mask of non-padded regions.
844
+ is_training: Whether the module is in training mode.
845
+
846
+ Returns:
847
+ Update to msa_act, shape [N_seq, N_res, c_m]
848
+ """
849
+ c = self.config
850
+
851
+ assert len(msa_act.shape) == 3
852
+ assert len(msa_mask.shape) == 2
853
+ assert c.orientation == 'per_column'
854
+
855
+ msa_act = jnp.swapaxes(msa_act, -2, -3)
856
+ msa_mask = jnp.swapaxes(msa_mask, -1, -2)
857
+
858
+ bias = (1e9 * (msa_mask - 1.))[:, None, None, :]
859
+ assert len(bias.shape) == 4
860
+
861
+ msa_act = hk.LayerNorm(
862
+ axis=[-1], create_scale=True, create_offset=True, name='query_norm')(
863
+ msa_act)
864
+
865
+ attn_mod = Attention(
866
+ c, self.global_config, msa_act.shape[-1])
867
+ msa_act = mapping.inference_subbatch(
868
+ attn_mod,
869
+ self.global_config.subbatch_size,
870
+ batched_args=[msa_act, msa_act, bias],
871
+ nonbatched_args=[],
872
+ low_memory=not is_training)
873
+
874
+ msa_act = jnp.swapaxes(msa_act, -2, -3)
875
+
876
+ return msa_act
877
+
878
+
879
+ class MSAColumnGlobalAttention(hk.Module):
880
+ """MSA per-column global attention.
881
+
882
+ Jumper et al. (2021) Suppl. Alg. 19 "MSAColumnGlobalAttention"
883
+ """
884
+
885
+ def __init__(self, config, global_config, name='msa_column_global_attention'):
886
+ super().__init__(name=name)
887
+ self.config = config
888
+ self.global_config = global_config
889
+
890
+ def __call__(self,
891
+ msa_act,
892
+ msa_mask,
893
+ is_training=False):
894
+ """Builds MSAColumnGlobalAttention module.
895
+
896
+ Arguments:
897
+ msa_act: [N_seq, N_res, c_m] MSA representation.
898
+ msa_mask: [N_seq, N_res] mask of non-padded regions.
899
+ is_training: Whether the module is in training mode.
900
+
901
+ Returns:
902
+ Update to msa_act, shape [N_seq, N_res, c_m].
903
+ """
904
+ c = self.config
905
+
906
+ assert len(msa_act.shape) == 3
907
+ assert len(msa_mask.shape) == 2
908
+ assert c.orientation == 'per_column'
909
+
910
+ msa_act = jnp.swapaxes(msa_act, -2, -3)
911
+ msa_mask = jnp.swapaxes(msa_mask, -1, -2)
912
+
913
+ bias = (1e9 * (msa_mask - 1.))[:, None, None, :]
914
+ assert len(bias.shape) == 4
915
+
916
+ msa_act = hk.LayerNorm(
917
+ axis=[-1], create_scale=True, create_offset=True, name='query_norm')(
918
+ msa_act)
919
+
920
+ attn_mod = GlobalAttention(
921
+ c, self.global_config, msa_act.shape[-1],
922
+ name='attention')
923
+ # [N_seq, N_res, 1]
924
+ msa_mask = jnp.expand_dims(msa_mask, axis=-1)
925
+ msa_act = mapping.inference_subbatch(
926
+ attn_mod,
927
+ self.global_config.subbatch_size,
928
+ batched_args=[msa_act, msa_act, msa_mask, bias],
929
+ nonbatched_args=[],
930
+ low_memory=not is_training)
931
+
932
+ msa_act = jnp.swapaxes(msa_act, -2, -3)
933
+
934
+ return msa_act
935
+
936
+
937
+ class TriangleAttention(hk.Module):
938
+ """Triangle Attention.
939
+
940
+ Jumper et al. (2021) Suppl. Alg. 13 "TriangleAttentionStartingNode"
941
+ Jumper et al. (2021) Suppl. Alg. 14 "TriangleAttentionEndingNode"
942
+ """
943
+
944
+ def __init__(self, config, global_config, name='triangle_attention'):
945
+ super().__init__(name=name)
946
+ self.config = config
947
+ self.global_config = global_config
948
+
949
+ def __call__(self, pair_act, pair_mask, is_training=False):
950
+ """Builds TriangleAttention module.
951
+
952
+ Arguments:
953
+ pair_act: [N_res, N_res, c_z] pair activations tensor
954
+ pair_mask: [N_res, N_res] mask of non-padded regions in the tensor.
955
+ is_training: Whether the module is in training mode.
956
+
957
+ Returns:
958
+ Update to pair_act, shape [N_res, N_res, c_z].
959
+ """
960
+ c = self.config
961
+
962
+ assert len(pair_act.shape) == 3
963
+ assert len(pair_mask.shape) == 2
964
+ assert c.orientation in ['per_row', 'per_column']
965
+
966
+ if c.orientation == 'per_column':
967
+ pair_act = jnp.swapaxes(pair_act, -2, -3)
968
+ pair_mask = jnp.swapaxes(pair_mask, -1, -2)
969
+
970
+ bias = (1e9 * (pair_mask - 1.))[:, None, None, :]
971
+ assert len(bias.shape) == 4
972
+
973
+ pair_act = hk.LayerNorm(
974
+ axis=[-1], create_scale=True, create_offset=True, name='query_norm')(
975
+ pair_act)
976
+
977
+ init_factor = 1. / jnp.sqrt(int(pair_act.shape[-1]))
978
+ weights = hk.get_parameter(
979
+ 'feat_2d_weights',
980
+ shape=(pair_act.shape[-1], c.num_head),
981
+ init=hk.initializers.RandomNormal(stddev=init_factor))
982
+ nonbatched_bias = jnp.einsum('qkc,ch->hqk', pair_act, weights)
983
+
984
+ attn_mod = Attention(
985
+ c, self.global_config, pair_act.shape[-1])
986
+ pair_act = mapping.inference_subbatch(
987
+ attn_mod,
988
+ self.global_config.subbatch_size,
989
+ batched_args=[pair_act, pair_act, bias],
990
+ nonbatched_args=[nonbatched_bias],
991
+ low_memory=not is_training)
992
+
993
+ if c.orientation == 'per_column':
994
+ pair_act = jnp.swapaxes(pair_act, -2, -3)
995
+
996
+ return pair_act
997
+
998
+
999
+ class MaskedMsaHead(hk.Module):
1000
+ """Head to predict MSA at the masked locations.
1001
+
1002
+ The MaskedMsaHead employs a BERT-style objective to reconstruct a masked
1003
+ version of the full MSA, based on a linear projection of
1004
+ the MSA representation.
1005
+ Jumper et al. (2021) Suppl. Sec. 1.9.9 "Masked MSA prediction"
1006
+ """
1007
+
1008
+ def __init__(self, config, global_config, name='masked_msa_head'):
1009
+ super().__init__(name=name)
1010
+ self.config = config
1011
+ self.global_config = global_config
1012
+
1013
+ def __call__(self, representations, batch, is_training):
1014
+ """Builds MaskedMsaHead module.
1015
+
1016
+ Arguments:
1017
+ representations: Dictionary of representations, must contain:
1018
+ * 'msa': MSA representation, shape [N_seq, N_res, c_m].
1019
+ batch: Batch, unused.
1020
+ is_training: Whether the module is in training mode.
1021
+
1022
+ Returns:
1023
+ Dictionary containing:
1024
+ * 'logits': logits of shape [N_seq, N_res, N_aatype] with
1025
+ (unnormalized) log probabilies of predicted aatype at position.
1026
+ """
1027
+ del batch
1028
+ logits = common_modules.Linear(
1029
+ self.config.num_output,
1030
+ initializer=utils.final_init(self.global_config),
1031
+ name='logits')(
1032
+ representations['msa'])
1033
+ return dict(logits=logits)
1034
+
1035
+ def loss(self, value, batch):
1036
+ errors = softmax_cross_entropy(
1037
+ labels=jax.nn.one_hot(batch['true_msa'], num_classes=23),
1038
+ logits=value['logits'])
1039
+ loss = (jnp.sum(errors * batch['bert_mask'], axis=(-2, -1)) /
1040
+ (1e-8 + jnp.sum(batch['bert_mask'], axis=(-2, -1))))
1041
+ return {'loss': loss}
1042
+
1043
+
1044
+ class PredictedLDDTHead(hk.Module):
1045
+ """Head to predict the per-residue LDDT to be used as a confidence measure.
1046
+
1047
+ Jumper et al. (2021) Suppl. Sec. 1.9.6 "Model confidence prediction (pLDDT)"
1048
+ Jumper et al. (2021) Suppl. Alg. 29 "predictPerResidueLDDT_Ca"
1049
+ """
1050
+
1051
+ def __init__(self, config, global_config, name='predicted_lddt_head'):
1052
+ super().__init__(name=name)
1053
+ self.config = config
1054
+ self.global_config = global_config
1055
+
1056
+ def __call__(self, representations, batch, is_training):
1057
+ """Builds ExperimentallyResolvedHead module.
1058
+
1059
+ Arguments:
1060
+ representations: Dictionary of representations, must contain:
1061
+ * 'structure_module': Single representation from the structure module,
1062
+ shape [N_res, c_s].
1063
+ batch: Batch, unused.
1064
+ is_training: Whether the module is in training mode.
1065
+
1066
+ Returns:
1067
+ Dictionary containing :
1068
+ * 'logits': logits of shape [N_res, N_bins] with
1069
+ (unnormalized) log probabilies of binned predicted lDDT.
1070
+ """
1071
+ act = representations['structure_module']
1072
+
1073
+ act = hk.LayerNorm(
1074
+ axis=[-1],
1075
+ create_scale=True,
1076
+ create_offset=True,
1077
+ name='input_layer_norm')(
1078
+ act)
1079
+
1080
+ act = common_modules.Linear(
1081
+ self.config.num_channels,
1082
+ initializer='relu',
1083
+ name='act_0')(
1084
+ act)
1085
+ act = jax.nn.relu(act)
1086
+
1087
+ act = common_modules.Linear(
1088
+ self.config.num_channels,
1089
+ initializer='relu',
1090
+ name='act_1')(
1091
+ act)
1092
+ act = jax.nn.relu(act)
1093
+
1094
+ logits = common_modules.Linear(
1095
+ self.config.num_bins,
1096
+ initializer=utils.final_init(self.global_config),
1097
+ name='logits')(
1098
+ act)
1099
+ # Shape (batch_size, num_res, num_bins)
1100
+ return dict(logits=logits)
1101
+
1102
+ def loss(self, value, batch):
1103
+ # Shape (num_res, 37, 3)
1104
+ pred_all_atom_pos = value['structure_module']['final_atom_positions']
1105
+ # Shape (num_res, 37, 3)
1106
+ true_all_atom_pos = batch['all_atom_positions']
1107
+ # Shape (num_res, 37)
1108
+ all_atom_mask = batch['all_atom_mask']
1109
+
1110
+ # Shape (num_res,)
1111
+ lddt_ca = lddt.lddt(
1112
+ # Shape (batch_size, num_res, 3)
1113
+ predicted_points=pred_all_atom_pos[None, :, 1, :],
1114
+ # Shape (batch_size, num_res, 3)
1115
+ true_points=true_all_atom_pos[None, :, 1, :],
1116
+ # Shape (batch_size, num_res, 1)
1117
+ true_points_mask=all_atom_mask[None, :, 1:2].astype(jnp.float32),
1118
+ cutoff=15.,
1119
+ per_residue=True)[0]
1120
+ lddt_ca = jax.lax.stop_gradient(lddt_ca)
1121
+
1122
+ num_bins = self.config.num_bins
1123
+ bin_index = jnp.floor(lddt_ca * num_bins).astype(jnp.int32)
1124
+
1125
+ # protect against out of range for lddt_ca == 1
1126
+ bin_index = jnp.minimum(bin_index, num_bins - 1)
1127
+ lddt_ca_one_hot = jax.nn.one_hot(bin_index, num_classes=num_bins)
1128
+
1129
+ # Shape (num_res, num_channel)
1130
+ logits = value['predicted_lddt']['logits']
1131
+ errors = softmax_cross_entropy(labels=lddt_ca_one_hot, logits=logits)
1132
+
1133
+ # Shape (num_res,)
1134
+ mask_ca = all_atom_mask[:, residue_constants.atom_order['CA']]
1135
+ mask_ca = mask_ca.astype(jnp.float32)
1136
+ loss = jnp.sum(errors * mask_ca) / (jnp.sum(mask_ca) + 1e-8)
1137
+
1138
+ if self.config.filter_by_resolution:
1139
+ # NMR & distillation have resolution = 0
1140
+ loss *= ((batch['resolution'] >= self.config.min_resolution)
1141
+ & (batch['resolution'] <= self.config.max_resolution)).astype(
1142
+ jnp.float32)
1143
+
1144
+ output = {'loss': loss}
1145
+ return output
1146
+
1147
+
1148
+ class PredictedAlignedErrorHead(hk.Module):
1149
+ """Head to predict the distance errors in the backbone alignment frames.
1150
+
1151
+ Can be used to compute predicted TM-Score.
1152
+ Jumper et al. (2021) Suppl. Sec. 1.9.7 "TM-score prediction"
1153
+ """
1154
+
1155
+ def __init__(self, config, global_config,
1156
+ name='predicted_aligned_error_head'):
1157
+ super().__init__(name=name)
1158
+ self.config = config
1159
+ self.global_config = global_config
1160
+
1161
+ def __call__(self, representations, batch, is_training):
1162
+ """Builds PredictedAlignedErrorHead module.
1163
+
1164
+ Arguments:
1165
+ representations: Dictionary of representations, must contain:
1166
+ * 'pair': pair representation, shape [N_res, N_res, c_z].
1167
+ batch: Batch, unused.
1168
+ is_training: Whether the module is in training mode.
1169
+
1170
+ Returns:
1171
+ Dictionary containing:
1172
+ * logits: logits for aligned error, shape [N_res, N_res, N_bins].
1173
+ * bin_breaks: array containing bin breaks, shape [N_bins - 1].
1174
+ """
1175
+
1176
+ act = representations['pair']
1177
+
1178
+ # Shape (num_res, num_res, num_bins)
1179
+ logits = common_modules.Linear(
1180
+ self.config.num_bins,
1181
+ initializer=utils.final_init(self.global_config),
1182
+ name='logits')(act)
1183
+ # Shape (num_bins,)
1184
+ breaks = jnp.linspace(
1185
+ 0., self.config.max_error_bin, self.config.num_bins - 1)
1186
+ return dict(logits=logits, breaks=breaks)
1187
+
1188
+ def loss(self, value, batch):
1189
+ # Shape (num_res, 7)
1190
+ predicted_affine = quat_affine.QuatAffine.from_tensor(
1191
+ value['structure_module']['final_affines'])
1192
+ # Shape (num_res, 7)
1193
+ true_affine = quat_affine.QuatAffine.from_tensor(
1194
+ batch['backbone_affine_tensor'])
1195
+ # Shape (num_res)
1196
+ mask = batch['backbone_affine_mask']
1197
+ # Shape (num_res, num_res)
1198
+ square_mask = mask[:, None] * mask[None, :]
1199
+ num_bins = self.config.num_bins
1200
+ # (1, num_bins - 1)
1201
+ breaks = value['predicted_aligned_error']['breaks']
1202
+ # (1, num_bins)
1203
+ logits = value['predicted_aligned_error']['logits']
1204
+
1205
+ # Compute the squared error for each alignment.
1206
+ def _local_frame_points(affine):
1207
+ points = [jnp.expand_dims(x, axis=-2) for x in affine.translation]
1208
+ return affine.invert_point(points, extra_dims=1)
1209
+ error_dist2_xyz = [
1210
+ jnp.square(a - b)
1211
+ for a, b in zip(_local_frame_points(predicted_affine),
1212
+ _local_frame_points(true_affine))]
1213
+ error_dist2 = sum(error_dist2_xyz)
1214
+ # Shape (num_res, num_res)
1215
+ # First num_res are alignment frames, second num_res are the residues.
1216
+ error_dist2 = jax.lax.stop_gradient(error_dist2)
1217
+
1218
+ sq_breaks = jnp.square(breaks)
1219
+ true_bins = jnp.sum((
1220
+ error_dist2[..., None] > sq_breaks).astype(jnp.int32), axis=-1)
1221
+
1222
+ errors = softmax_cross_entropy(
1223
+ labels=jax.nn.one_hot(true_bins, num_bins, axis=-1), logits=logits)
1224
+
1225
+ loss = (jnp.sum(errors * square_mask, axis=(-2, -1)) /
1226
+ (1e-8 + jnp.sum(square_mask, axis=(-2, -1))))
1227
+
1228
+ if self.config.filter_by_resolution:
1229
+ # NMR & distillation have resolution = 0
1230
+ loss *= ((batch['resolution'] >= self.config.min_resolution)
1231
+ & (batch['resolution'] <= self.config.max_resolution)).astype(
1232
+ jnp.float32)
1233
+
1234
+ output = {'loss': loss}
1235
+ return output
1236
+
1237
+
1238
+ class ExperimentallyResolvedHead(hk.Module):
1239
+ """Predicts if an atom is experimentally resolved in a high-res structure.
1240
+
1241
+ Only trained on high-resolution X-ray crystals & cryo-EM.
1242
+ Jumper et al. (2021) Suppl. Sec. 1.9.10 '"Experimentally resolved" prediction'
1243
+ """
1244
+
1245
+ def __init__(self, config, global_config,
1246
+ name='experimentally_resolved_head'):
1247
+ super().__init__(name=name)
1248
+ self.config = config
1249
+ self.global_config = global_config
1250
+
1251
+ def __call__(self, representations, batch, is_training):
1252
+ """Builds ExperimentallyResolvedHead module.
1253
+
1254
+ Arguments:
1255
+ representations: Dictionary of representations, must contain:
1256
+ * 'single': Single representation, shape [N_res, c_s].
1257
+ batch: Batch, unused.
1258
+ is_training: Whether the module is in training mode.
1259
+
1260
+ Returns:
1261
+ Dictionary containing:
1262
+ * 'logits': logits of shape [N_res, 37],
1263
+ log probability that an atom is resolved in atom37 representation,
1264
+ can be converted to probability by applying sigmoid.
1265
+ """
1266
+ logits = common_modules.Linear(
1267
+ 37, # atom_exists.shape[-1]
1268
+ initializer=utils.final_init(self.global_config),
1269
+ name='logits')(representations['single'])
1270
+ return dict(logits=logits)
1271
+
1272
+ def loss(self, value, batch):
1273
+ logits = value['logits']
1274
+ assert len(logits.shape) == 2
1275
+
1276
+ # Does the atom appear in the amino acid?
1277
+ atom_exists = batch['atom37_atom_exists']
1278
+ # Is the atom resolved in the experiment? Subset of atom_exists,
1279
+ # *except for OXT*
1280
+ all_atom_mask = batch['all_atom_mask'].astype(jnp.float32)
1281
+
1282
+ xent = sigmoid_cross_entropy(labels=all_atom_mask, logits=logits)
1283
+ loss = jnp.sum(xent * atom_exists) / (1e-8 + jnp.sum(atom_exists))
1284
+
1285
+ if self.config.filter_by_resolution:
1286
+ # NMR & distillation examples have resolution = 0.
1287
+ loss *= ((batch['resolution'] >= self.config.min_resolution)
1288
+ & (batch['resolution'] <= self.config.max_resolution)).astype(
1289
+ jnp.float32)
1290
+
1291
+ output = {'loss': loss}
1292
+ return output
1293
+
1294
+
1295
+ class TriangleMultiplication(hk.Module):
1296
+ """Triangle multiplication layer ("outgoing" or "incoming").
1297
+
1298
+ Jumper et al. (2021) Suppl. Alg. 11 "TriangleMultiplicationOutgoing"
1299
+ Jumper et al. (2021) Suppl. Alg. 12 "TriangleMultiplicationIncoming"
1300
+ """
1301
+
1302
+ def __init__(self, config, global_config, name='triangle_multiplication'):
1303
+ super().__init__(name=name)
1304
+ self.config = config
1305
+ self.global_config = global_config
1306
+
1307
+ def __call__(self, act, mask, is_training=True):
1308
+ """Builds TriangleMultiplication module.
1309
+
1310
+ Arguments:
1311
+ act: Pair activations, shape [N_res, N_res, c_z]
1312
+ mask: Pair mask, shape [N_res, N_res].
1313
+ is_training: Whether the module is in training mode.
1314
+
1315
+ Returns:
1316
+ Outputs, same shape/type as act.
1317
+ """
1318
+ del is_training
1319
+ c = self.config
1320
+ gc = self.global_config
1321
+
1322
+ mask = mask[..., None]
1323
+
1324
+ act = hk.LayerNorm(axis=[-1], create_scale=True, create_offset=True,
1325
+ name='layer_norm_input')(act)
1326
+ input_act = act
1327
+
1328
+ left_projection = common_modules.Linear(
1329
+ c.num_intermediate_channel,
1330
+ name='left_projection')
1331
+ left_proj_act = mask * left_projection(act)
1332
+
1333
+ right_projection = common_modules.Linear(
1334
+ c.num_intermediate_channel,
1335
+ name='right_projection')
1336
+ right_proj_act = mask * right_projection(act)
1337
+
1338
+ left_gate_values = jax.nn.sigmoid(common_modules.Linear(
1339
+ c.num_intermediate_channel,
1340
+ bias_init=1.,
1341
+ initializer=utils.final_init(gc),
1342
+ name='left_gate')(act))
1343
+
1344
+ right_gate_values = jax.nn.sigmoid(common_modules.Linear(
1345
+ c.num_intermediate_channel,
1346
+ bias_init=1.,
1347
+ initializer=utils.final_init(gc),
1348
+ name='right_gate')(act))
1349
+
1350
+ left_proj_act *= left_gate_values
1351
+ right_proj_act *= right_gate_values
1352
+
1353
+ # "Outgoing" edges equation: 'ikc,jkc->ijc'
1354
+ # "Incoming" edges equation: 'kjc,kic->ijc'
1355
+ # Note on the Suppl. Alg. 11 & 12 notation:
1356
+ # For the "outgoing" edges, a = left_proj_act and b = right_proj_act
1357
+ # For the "incoming" edges, it's swapped:
1358
+ # b = left_proj_act and a = right_proj_act
1359
+ act = jnp.einsum(c.equation, left_proj_act, right_proj_act)
1360
+
1361
+ act = hk.LayerNorm(
1362
+ axis=[-1],
1363
+ create_scale=True,
1364
+ create_offset=True,
1365
+ name='center_layer_norm')(
1366
+ act)
1367
+
1368
+ output_channel = int(input_act.shape[-1])
1369
+
1370
+ act = common_modules.Linear(
1371
+ output_channel,
1372
+ initializer=utils.final_init(gc),
1373
+ name='output_projection')(act)
1374
+
1375
+ gate_values = jax.nn.sigmoid(common_modules.Linear(
1376
+ output_channel,
1377
+ bias_init=1.,
1378
+ initializer=utils.final_init(gc),
1379
+ name='gating_linear')(input_act))
1380
+ act *= gate_values
1381
+
1382
+ return act
1383
+
1384
+
1385
+ class DistogramHead(hk.Module):
1386
+ """Head to predict a distogram.
1387
+
1388
+ Jumper et al. (2021) Suppl. Sec. 1.9.8 "Distogram prediction"
1389
+ """
1390
+
1391
+ def __init__(self, config, global_config, name='distogram_head'):
1392
+ super().__init__(name=name)
1393
+ self.config = config
1394
+ self.global_config = global_config
1395
+
1396
+ def __call__(self, representations, batch, is_training):
1397
+ """Builds DistogramHead module.
1398
+
1399
+ Arguments:
1400
+ representations: Dictionary of representations, must contain:
1401
+ * 'pair': pair representation, shape [N_res, N_res, c_z].
1402
+ batch: Batch, unused.
1403
+ is_training: Whether the module is in training mode.
1404
+
1405
+ Returns:
1406
+ Dictionary containing:
1407
+ * logits: logits for distogram, shape [N_res, N_res, N_bins].
1408
+ * bin_breaks: array containing bin breaks, shape [N_bins - 1,].
1409
+ """
1410
+ half_logits = common_modules.Linear(
1411
+ self.config.num_bins,
1412
+ initializer=utils.final_init(self.global_config),
1413
+ name='half_logits')(
1414
+ representations['pair'])
1415
+
1416
+ logits = half_logits + jnp.swapaxes(half_logits, -2, -3)
1417
+ breaks = jnp.linspace(self.config.first_break, self.config.last_break,
1418
+ self.config.num_bins - 1)
1419
+
1420
+ return dict(logits=logits, bin_edges=breaks)
1421
+
1422
+ def loss(self, value, batch):
1423
+ return _distogram_log_loss(value['logits'], value['bin_edges'],
1424
+ batch, self.config.num_bins)
1425
+
1426
+
1427
+ def _distogram_log_loss(logits, bin_edges, batch, num_bins):
1428
+ """Log loss of a distogram."""
1429
+
1430
+ assert len(logits.shape) == 3
1431
+ positions = batch['pseudo_beta']
1432
+ mask = batch['pseudo_beta_mask']
1433
+
1434
+ assert positions.shape[-1] == 3
1435
+
1436
+ sq_breaks = jnp.square(bin_edges)
1437
+
1438
+ dist2 = jnp.sum(
1439
+ jnp.square(
1440
+ jnp.expand_dims(positions, axis=-2) -
1441
+ jnp.expand_dims(positions, axis=-3)),
1442
+ axis=-1,
1443
+ keepdims=True)
1444
+
1445
+ true_bins = jnp.sum(dist2 > sq_breaks, axis=-1)
1446
+
1447
+ errors = softmax_cross_entropy(
1448
+ labels=jax.nn.one_hot(true_bins, num_bins), logits=logits)
1449
+
1450
+ square_mask = jnp.expand_dims(mask, axis=-2) * jnp.expand_dims(mask, axis=-1)
1451
+
1452
+ avg_error = (
1453
+ jnp.sum(errors * square_mask, axis=(-2, -1)) /
1454
+ (1e-6 + jnp.sum(square_mask, axis=(-2, -1))))
1455
+ dist2 = dist2[..., 0]
1456
+ return dict(loss=avg_error, true_dist=jnp.sqrt(1e-6 + dist2))
1457
+
1458
+
1459
+ class OuterProductMean(hk.Module):
1460
+ """Computes mean outer product.
1461
+
1462
+ Jumper et al. (2021) Suppl. Alg. 10 "OuterProductMean"
1463
+ """
1464
+
1465
+ def __init__(self,
1466
+ config,
1467
+ global_config,
1468
+ num_output_channel,
1469
+ name='outer_product_mean'):
1470
+ super().__init__(name=name)
1471
+ self.global_config = global_config
1472
+ self.config = config
1473
+ self.num_output_channel = num_output_channel
1474
+
1475
+ def __call__(self, act, mask, is_training=True):
1476
+ """Builds OuterProductMean module.
1477
+
1478
+ Arguments:
1479
+ act: MSA representation, shape [N_seq, N_res, c_m].
1480
+ mask: MSA mask, shape [N_seq, N_res].
1481
+ is_training: Whether the module is in training mode.
1482
+
1483
+ Returns:
1484
+ Update to pair representation, shape [N_res, N_res, c_z].
1485
+ """
1486
+ gc = self.global_config
1487
+ c = self.config
1488
+
1489
+ mask = mask[..., None]
1490
+ act = hk.LayerNorm([-1], True, True, name='layer_norm_input')(act)
1491
+
1492
+ left_act = mask * common_modules.Linear(
1493
+ c.num_outer_channel,
1494
+ initializer='linear',
1495
+ name='left_projection')(
1496
+ act)
1497
+
1498
+ right_act = mask * common_modules.Linear(
1499
+ c.num_outer_channel,
1500
+ initializer='linear',
1501
+ name='right_projection')(
1502
+ act)
1503
+
1504
+ if gc.zero_init:
1505
+ init_w = hk.initializers.Constant(0.0)
1506
+ else:
1507
+ init_w = hk.initializers.VarianceScaling(scale=2., mode='fan_in')
1508
+
1509
+ output_w = hk.get_parameter(
1510
+ 'output_w',
1511
+ shape=(c.num_outer_channel, c.num_outer_channel,
1512
+ self.num_output_channel),
1513
+ init=init_w)
1514
+ output_b = hk.get_parameter(
1515
+ 'output_b', shape=(self.num_output_channel,),
1516
+ init=hk.initializers.Constant(0.0))
1517
+
1518
+ def compute_chunk(left_act):
1519
+ # This is equivalent to
1520
+ #
1521
+ # act = jnp.einsum('abc,ade->dceb', left_act, right_act)
1522
+ # act = jnp.einsum('dceb,cef->bdf', act, output_w) + output_b
1523
+ #
1524
+ # but faster.
1525
+ left_act = jnp.transpose(left_act, [0, 2, 1])
1526
+ act = jnp.einsum('acb,ade->dceb', left_act, right_act)
1527
+ act = jnp.einsum('dceb,cef->dbf', act, output_w) + output_b
1528
+ return jnp.transpose(act, [1, 0, 2])
1529
+
1530
+ act = mapping.inference_subbatch(
1531
+ compute_chunk,
1532
+ c.chunk_size,
1533
+ batched_args=[left_act],
1534
+ nonbatched_args=[],
1535
+ low_memory=True,
1536
+ input_subbatch_dim=1,
1537
+ output_subbatch_dim=0)
1538
+
1539
+ epsilon = 1e-3
1540
+ norm = jnp.einsum('abc,adc->bdc', mask, mask)
1541
+ act /= epsilon + norm
1542
+
1543
+ return act
1544
+
1545
+ def dgram_from_positions(positions, num_bins, min_bin, max_bin):
1546
+ """Compute distogram from amino acid positions.
1547
+ Arguments:
1548
+ positions: [N_res, 3] Position coordinates.
1549
+ num_bins: The number of bins in the distogram.
1550
+ min_bin: The left edge of the first bin.
1551
+ max_bin: The left edge of the final bin. The final bin catches
1552
+ everything larger than `max_bin`.
1553
+ Returns:
1554
+ Distogram with the specified number of bins.
1555
+ """
1556
+ def squared_difference(x, y):
1557
+ return jnp.square(x - y)
1558
+
1559
+ lower_breaks = jnp.linspace(min_bin, max_bin, num_bins)
1560
+ lower_breaks = jnp.square(lower_breaks)
1561
+ upper_breaks = jnp.concatenate([lower_breaks[1:],jnp.array([1e8], dtype=jnp.float32)], axis=-1)
1562
+ dist2 = jnp.sum(
1563
+ squared_difference(
1564
+ jnp.expand_dims(positions, axis=-2),
1565
+ jnp.expand_dims(positions, axis=-3)),
1566
+ axis=-1, keepdims=True)
1567
+
1568
+ return ((dist2 > lower_breaks).astype(jnp.float32) * (dist2 < upper_breaks).astype(jnp.float32))
1569
+
1570
+ def dgram_from_positions_soft(positions, num_bins, min_bin, max_bin, temp=2.0):
1571
+ '''soft positions to dgram converter'''
1572
+ lower_breaks = jnp.append(-1e8,jnp.linspace(min_bin, max_bin, num_bins))
1573
+ upper_breaks = jnp.append(lower_breaks[1:],1e8)
1574
+ dist = jnp.sqrt(jnp.square(positions[...,:,None,:] - positions[...,None,:,:]).sum(-1,keepdims=True) + 1e-8)
1575
+ o = jax.nn.sigmoid((dist - lower_breaks)/temp) * jax.nn.sigmoid((upper_breaks - dist)/temp)
1576
+ o = o/(o.sum(-1,keepdims=True) + 1e-8)
1577
+ return o[...,1:]
1578
+
1579
+ def pseudo_beta_fn(aatype, all_atom_positions, all_atom_masks):
1580
+ """Create pseudo beta features."""
1581
+
1582
+ ca_idx = residue_constants.atom_order['CA']
1583
+ cb_idx = residue_constants.atom_order['CB']
1584
+
1585
+ if jnp.issubdtype(aatype.dtype, jnp.integer):
1586
+ is_gly = jnp.equal(aatype, residue_constants.restype_order['G'])
1587
+ is_gly_tile = jnp.tile(is_gly[..., None], [1] * len(is_gly.shape) + [3])
1588
+ pseudo_beta = jnp.where(is_gly_tile, all_atom_positions[..., ca_idx, :], all_atom_positions[..., cb_idx, :])
1589
+
1590
+ if all_atom_masks is not None:
1591
+ pseudo_beta_mask = jnp.where(is_gly, all_atom_masks[..., ca_idx], all_atom_masks[..., cb_idx])
1592
+ pseudo_beta_mask = pseudo_beta_mask.astype(jnp.float32)
1593
+ return pseudo_beta, pseudo_beta_mask
1594
+ else:
1595
+ return pseudo_beta
1596
+ else:
1597
+ is_gly = aatype[...,residue_constants.restype_order['G']]
1598
+ ca_pos = all_atom_positions[...,ca_idx,:]
1599
+ cb_pos = all_atom_positions[...,cb_idx,:]
1600
+ pseudo_beta = is_gly[...,None] * ca_pos + (1-is_gly[...,None]) * cb_pos
1601
+ if all_atom_masks is not None:
1602
+ ca_mask = all_atom_masks[...,ca_idx]
1603
+ cb_mask = all_atom_masks[...,cb_idx]
1604
+ pseudo_beta_mask = is_gly * ca_mask + (1-is_gly) * cb_mask
1605
+ return pseudo_beta, pseudo_beta_mask
1606
+ else:
1607
+ return pseudo_beta
1608
+
1609
+ class EvoformerIteration(hk.Module):
1610
+ """Single iteration (block) of Evoformer stack.
1611
+ Jumper et al. (2021) Suppl. Alg. 6 "EvoformerStack" lines 2-10
1612
+ """
1613
+
1614
+ def __init__(self, config, global_config, is_extra_msa,
1615
+ name='evoformer_iteration'):
1616
+ super().__init__(name=name)
1617
+ self.config = config
1618
+ self.global_config = global_config
1619
+ self.is_extra_msa = is_extra_msa
1620
+
1621
+ def __call__(self, activations, masks, is_training=True, safe_key=None, scale_rate=1.0):
1622
+ """Builds EvoformerIteration module.
1623
+
1624
+ Arguments:
1625
+ activations: Dictionary containing activations:
1626
+ * 'msa': MSA activations, shape [N_seq, N_res, c_m].
1627
+ * 'pair': pair activations, shape [N_res, N_res, c_z].
1628
+ masks: Dictionary of masks:
1629
+ * 'msa': MSA mask, shape [N_seq, N_res].
1630
+ * 'pair': pair mask, shape [N_res, N_res].
1631
+ is_training: Whether the module is in training mode.
1632
+ safe_key: prng.SafeKey encapsulating rng key.
1633
+
1634
+ Returns:
1635
+ Outputs, same shape/type as act.
1636
+ """
1637
+ c = self.config
1638
+ gc = self.global_config
1639
+
1640
+ msa_act, pair_act = activations['msa'], activations['pair']
1641
+
1642
+ if safe_key is None:
1643
+ safe_key = prng.SafeKey(hk.next_rng_key())
1644
+
1645
+ msa_mask, pair_mask = masks['msa'], masks['pair']
1646
+
1647
+ dropout_wrapper_fn = functools.partial(
1648
+ dropout_wrapper,
1649
+ is_training=is_training,
1650
+ global_config=gc,
1651
+ scale_rate=scale_rate)
1652
+
1653
+ safe_key, *sub_keys = safe_key.split(10)
1654
+ sub_keys = iter(sub_keys)
1655
+
1656
+ msa_act = dropout_wrapper_fn(
1657
+ MSARowAttentionWithPairBias(
1658
+ c.msa_row_attention_with_pair_bias, gc,
1659
+ name='msa_row_attention_with_pair_bias'),
1660
+ msa_act,
1661
+ msa_mask,
1662
+ safe_key=next(sub_keys),
1663
+ pair_act=pair_act)
1664
+
1665
+ if not self.is_extra_msa:
1666
+ attn_mod = MSAColumnAttention(
1667
+ c.msa_column_attention, gc, name='msa_column_attention')
1668
+ else:
1669
+ attn_mod = MSAColumnGlobalAttention(
1670
+ c.msa_column_attention, gc, name='msa_column_global_attention')
1671
+ msa_act = dropout_wrapper_fn(
1672
+ attn_mod,
1673
+ msa_act,
1674
+ msa_mask,
1675
+ safe_key=next(sub_keys))
1676
+
1677
+ msa_act = dropout_wrapper_fn(
1678
+ Transition(c.msa_transition, gc, name='msa_transition'),
1679
+ msa_act,
1680
+ msa_mask,
1681
+ safe_key=next(sub_keys))
1682
+
1683
+ pair_act = dropout_wrapper_fn(
1684
+ OuterProductMean(
1685
+ config=c.outer_product_mean,
1686
+ global_config=self.global_config,
1687
+ num_output_channel=int(pair_act.shape[-1]),
1688
+ name='outer_product_mean'),
1689
+ msa_act,
1690
+ msa_mask,
1691
+ safe_key=next(sub_keys),
1692
+ output_act=pair_act)
1693
+
1694
+ pair_act = dropout_wrapper_fn(
1695
+ TriangleMultiplication(c.triangle_multiplication_outgoing, gc,
1696
+ name='triangle_multiplication_outgoing'),
1697
+ pair_act,
1698
+ pair_mask,
1699
+ safe_key=next(sub_keys))
1700
+ pair_act = dropout_wrapper_fn(
1701
+ TriangleMultiplication(c.triangle_multiplication_incoming, gc,
1702
+ name='triangle_multiplication_incoming'),
1703
+ pair_act,
1704
+ pair_mask,
1705
+ safe_key=next(sub_keys))
1706
+
1707
+ pair_act = dropout_wrapper_fn(
1708
+ TriangleAttention(c.triangle_attention_starting_node, gc,
1709
+ name='triangle_attention_starting_node'),
1710
+ pair_act,
1711
+ pair_mask,
1712
+ safe_key=next(sub_keys))
1713
+ pair_act = dropout_wrapper_fn(
1714
+ TriangleAttention(c.triangle_attention_ending_node, gc,
1715
+ name='triangle_attention_ending_node'),
1716
+ pair_act,
1717
+ pair_mask,
1718
+ safe_key=next(sub_keys))
1719
+
1720
+ pair_act = dropout_wrapper_fn(
1721
+ Transition(c.pair_transition, gc, name='pair_transition'),
1722
+ pair_act,
1723
+ pair_mask,
1724
+ safe_key=next(sub_keys))
1725
+
1726
+ return {'msa': msa_act, 'pair': pair_act}
1727
+
1728
+
1729
+ class EmbeddingsAndEvoformer(hk.Module):
1730
+ """Embeds the input data and runs Evoformer.
1731
+
1732
+ Produces the MSA, single and pair representations.
1733
+ Jumper et al. (2021) Suppl. Alg. 2 "Inference" line 5-18
1734
+ """
1735
+
1736
+ def __init__(self, config, global_config, name='evoformer'):
1737
+ super().__init__(name=name)
1738
+ self.config = config
1739
+ self.global_config = global_config
1740
+
1741
+ def __call__(self, batch, is_training, safe_key=None):
1742
+
1743
+ c = self.config
1744
+ gc = self.global_config
1745
+
1746
+ if safe_key is None:
1747
+ safe_key = prng.SafeKey(hk.next_rng_key())
1748
+
1749
+ # Embed clustered MSA.
1750
+ # Jumper et al. (2021) Suppl. Alg. 2 "Inference" line 5
1751
+ # Jumper et al. (2021) Suppl. Alg. 3 "InputEmbedder"
1752
+ preprocess_1d = common_modules.Linear(
1753
+ c.msa_channel, name='preprocess_1d')(
1754
+ batch['target_feat'])
1755
+
1756
+ preprocess_msa = common_modules.Linear(
1757
+ c.msa_channel, name='preprocess_msa')(
1758
+ batch['msa_feat'])
1759
+
1760
+ msa_activations = jnp.expand_dims(preprocess_1d, axis=0) + preprocess_msa
1761
+
1762
+ left_single = common_modules.Linear(
1763
+ c.pair_channel, name='left_single')(
1764
+ batch['target_feat'])
1765
+ right_single = common_modules.Linear(
1766
+ c.pair_channel, name='right_single')(
1767
+ batch['target_feat'])
1768
+ pair_activations = left_single[:, None] + right_single[None]
1769
+ mask_2d = batch['seq_mask'][:, None] * batch['seq_mask'][None, :]
1770
+
1771
+ # Inject previous outputs for recycling.
1772
+ # Jumper et al. (2021) Suppl. Alg. 2 "Inference" line 6
1773
+ # Jumper et al. (2021) Suppl. Alg. 32 "RecyclingEmbedder"
1774
+
1775
+ if "prev_pos" in batch:
1776
+ # use predicted position input
1777
+ prev_pseudo_beta = pseudo_beta_fn(batch['aatype'], batch['prev_pos'], None)
1778
+ if c.backprop_dgram:
1779
+ dgram = dgram_from_positions_soft(prev_pseudo_beta, temp=c.backprop_dgram_temp, **c.prev_pos)
1780
+ else:
1781
+ dgram = dgram_from_positions(prev_pseudo_beta, **c.prev_pos)
1782
+
1783
+ elif 'prev_dgram' in batch:
1784
+ # use predicted distogram input (from Sergey)
1785
+ dgram = jax.nn.softmax(batch["prev_dgram"])
1786
+ dgram_map = jax.nn.one_hot(jnp.repeat(jnp.append(0,jnp.arange(15)),4),15).at[:,0].set(0)
1787
+ dgram = dgram @ dgram_map
1788
+
1789
+ pair_activations += common_modules.Linear(c.pair_channel, name='prev_pos_linear')(dgram)
1790
+
1791
+ if c.recycle_features:
1792
+ if 'prev_msa_first_row' in batch:
1793
+ prev_msa_first_row = hk.LayerNorm([-1],
1794
+ True,
1795
+ True,
1796
+ name='prev_msa_first_row_norm')(
1797
+ batch['prev_msa_first_row'])
1798
+ msa_activations = msa_activations.at[0].add(prev_msa_first_row)
1799
+
1800
+ if 'prev_pair' in batch:
1801
+ pair_activations += hk.LayerNorm([-1],
1802
+ True,
1803
+ True,
1804
+ name='prev_pair_norm')(
1805
+ batch['prev_pair'])
1806
+
1807
+ # Relative position encoding.
1808
+ # Jumper et al. (2021) Suppl. Alg. 4 "relpos"
1809
+ # Jumper et al. (2021) Suppl. Alg. 5 "one_hot"
1810
+ if c.max_relative_feature:
1811
+ # Add one-hot-encoded clipped residue distances to the pair activations.
1812
+ if "rel_pos" in batch:
1813
+ rel_pos = batch['rel_pos']
1814
+ else:
1815
+ if "offset" in batch:
1816
+ offset = batch['offset']
1817
+ else:
1818
+ pos = batch['residue_index']
1819
+ offset = pos[:, None] - pos[None, :]
1820
+ rel_pos = jax.nn.one_hot(
1821
+ jnp.clip(
1822
+ offset + c.max_relative_feature,
1823
+ a_min=0,
1824
+ a_max=2 * c.max_relative_feature),
1825
+ 2 * c.max_relative_feature + 1)
1826
+ pair_activations += common_modules.Linear(c.pair_channel, name='pair_activiations')(rel_pos)
1827
+
1828
+ # Embed templates into the pair activations.
1829
+ # Jumper et al. (2021) Suppl. Alg. 2 "Inference" lines 9-13
1830
+
1831
+ if c.template.enabled:
1832
+ template_batch = {k: batch[k] for k in batch if k.startswith('template_')}
1833
+ template_pair_representation = TemplateEmbedding(c.template, gc)(
1834
+ pair_activations,
1835
+ template_batch,
1836
+ mask_2d,
1837
+ is_training=is_training,
1838
+ scale_rate=batch["scale_rate"])
1839
+
1840
+ pair_activations += template_pair_representation
1841
+
1842
+ # Embed extra MSA features.
1843
+ # Jumper et al. (2021) Suppl. Alg. 2 "Inference" lines 14-16
1844
+ extra_msa_feat = create_extra_msa_feature(batch)
1845
+ extra_msa_activations = common_modules.Linear(
1846
+ c.extra_msa_channel,
1847
+ name='extra_msa_activations')(
1848
+ extra_msa_feat)
1849
+
1850
+ # Extra MSA Stack.
1851
+ # Jumper et al. (2021) Suppl. Alg. 18 "ExtraMsaStack"
1852
+ extra_msa_stack_input = {
1853
+ 'msa': extra_msa_activations,
1854
+ 'pair': pair_activations,
1855
+ }
1856
+
1857
+ extra_msa_stack_iteration = EvoformerIteration(
1858
+ c.evoformer, gc, is_extra_msa=True, name='extra_msa_stack')
1859
+
1860
+ def extra_msa_stack_fn(x):
1861
+ act, safe_key = x
1862
+ safe_key, safe_subkey = safe_key.split()
1863
+ extra_evoformer_output = extra_msa_stack_iteration(
1864
+ activations=act,
1865
+ masks={
1866
+ 'msa': batch['extra_msa_mask'],
1867
+ 'pair': mask_2d
1868
+ },
1869
+ is_training=is_training,
1870
+ safe_key=safe_subkey, scale_rate=batch["scale_rate"])
1871
+ return (extra_evoformer_output, safe_key)
1872
+
1873
+ if gc.use_remat:
1874
+ extra_msa_stack_fn = hk.remat(extra_msa_stack_fn)
1875
+
1876
+ extra_msa_stack = layer_stack.layer_stack(
1877
+ c.extra_msa_stack_num_block)(
1878
+ extra_msa_stack_fn)
1879
+ extra_msa_output, safe_key = extra_msa_stack(
1880
+ (extra_msa_stack_input, safe_key))
1881
+
1882
+ pair_activations = extra_msa_output['pair']
1883
+
1884
+ evoformer_input = {
1885
+ 'msa': msa_activations,
1886
+ 'pair': pair_activations,
1887
+ }
1888
+
1889
+ evoformer_masks = {'msa': batch['msa_mask'], 'pair': mask_2d}
1890
+
1891
+ ####################################################################
1892
+ ####################################################################
1893
+
1894
+ # Append num_templ rows to msa_activations with template embeddings.
1895
+ # Jumper et al. (2021) Suppl. Alg. 2 "Inference" lines 7-8
1896
+ if c.template.enabled and c.template.embed_torsion_angles:
1897
+ if jnp.issubdtype(batch['template_aatype'].dtype, jnp.integer):
1898
+ num_templ, num_res = batch['template_aatype'].shape
1899
+ # Embed the templates aatypes.
1900
+ aatype = batch['template_aatype']
1901
+ aatype_one_hot = jax.nn.one_hot(batch['template_aatype'], 22, axis=-1)
1902
+ else:
1903
+ num_templ, num_res, _ = batch['template_aatype'].shape
1904
+ aatype = batch['template_aatype'].argmax(-1)
1905
+ aatype_one_hot = batch['template_aatype']
1906
+
1907
+ # Embed the templates aatype, torsion angles and masks.
1908
+ # Shape (templates, residues, msa_channels)
1909
+ ret = all_atom.atom37_to_torsion_angles(
1910
+ aatype=aatype,
1911
+ all_atom_pos=batch['template_all_atom_positions'],
1912
+ all_atom_mask=batch['template_all_atom_masks'],
1913
+ # Ensure consistent behaviour during testing:
1914
+ placeholder_for_undefined=not gc.zero_init)
1915
+
1916
+ template_features = jnp.concatenate([
1917
+ aatype_one_hot,
1918
+ jnp.reshape(ret['torsion_angles_sin_cos'], [num_templ, num_res, 14]),
1919
+ jnp.reshape(ret['alt_torsion_angles_sin_cos'], [num_templ, num_res, 14]),
1920
+ ret['torsion_angles_mask']], axis=-1)
1921
+
1922
+ template_activations = common_modules.Linear(
1923
+ c.msa_channel,
1924
+ initializer='relu',
1925
+ name='template_single_embedding')(template_features)
1926
+ template_activations = jax.nn.relu(template_activations)
1927
+ template_activations = common_modules.Linear(
1928
+ c.msa_channel,
1929
+ initializer='relu',
1930
+ name='template_projection')(template_activations)
1931
+
1932
+ # Concatenate the templates to the msa.
1933
+ evoformer_input['msa'] = jnp.concatenate([evoformer_input['msa'], template_activations], axis=0)
1934
+
1935
+ # Concatenate templates masks to the msa masks.
1936
+ # Use mask from the psi angle, as it only depends on the backbone atoms
1937
+ # from a single residue.
1938
+ torsion_angle_mask = ret['torsion_angles_mask'][:, :, 2]
1939
+ torsion_angle_mask = torsion_angle_mask.astype(evoformer_masks['msa'].dtype)
1940
+ evoformer_masks['msa'] = jnp.concatenate([evoformer_masks['msa'], torsion_angle_mask], axis=0)
1941
+
1942
+ ####################################################################
1943
+ ####################################################################
1944
+
1945
+ # Main trunk of the network
1946
+ # Jumper et al. (2021) Suppl. Alg. 2 "Inference" lines 17-18
1947
+ evoformer_iteration = EvoformerIteration(
1948
+ c.evoformer, gc, is_extra_msa=False, name='evoformer_iteration')
1949
+
1950
+ def evoformer_fn(x):
1951
+ act, safe_key = x
1952
+ safe_key, safe_subkey = safe_key.split()
1953
+ evoformer_output = evoformer_iteration(
1954
+ activations=act,
1955
+ masks=evoformer_masks,
1956
+ is_training=is_training,
1957
+ safe_key=safe_subkey, scale_rate=batch["scale_rate"])
1958
+ return (evoformer_output, safe_key)
1959
+
1960
+ if gc.use_remat:
1961
+ evoformer_fn = hk.remat(evoformer_fn)
1962
+
1963
+ evoformer_stack = layer_stack.layer_stack(c.evoformer_num_block)(evoformer_fn)
1964
+ evoformer_output, safe_key = evoformer_stack((evoformer_input, safe_key))
1965
+
1966
+ msa_activations = evoformer_output['msa']
1967
+ pair_activations = evoformer_output['pair']
1968
+
1969
+ single_activations = common_modules.Linear(
1970
+ c.seq_channel, name='single_activations')(msa_activations[0])
1971
+
1972
+ num_sequences = batch['msa_feat'].shape[0]
1973
+ output = {
1974
+ 'single': single_activations,
1975
+ 'pair': pair_activations,
1976
+ # Crop away template rows such that they are not used in MaskedMsaHead.
1977
+ 'msa': msa_activations[:num_sequences, :, :],
1978
+ 'msa_first_row': msa_activations[0],
1979
+ }
1980
+
1981
+ return output
1982
+
1983
+ ####################################################################
1984
+ ####################################################################
1985
+ class SingleTemplateEmbedding(hk.Module):
1986
+ """Embeds a single template.
1987
+ Jumper et al. (2021) Suppl. Alg. 2 "Inference" lines 9+11
1988
+ """
1989
+
1990
+ def __init__(self, config, global_config, name='single_template_embedding'):
1991
+ super().__init__(name=name)
1992
+ self.config = config
1993
+ self.global_config = global_config
1994
+
1995
+ def __call__(self, query_embedding, batch, mask_2d, is_training, scale_rate=1.0):
1996
+ """Build the single template embedding.
1997
+ Arguments:
1998
+ query_embedding: Query pair representation, shape [N_res, N_res, c_z].
1999
+ batch: A batch of template features (note the template dimension has been
2000
+ stripped out as this module only runs over a single template).
2001
+ mask_2d: Padding mask (Note: this doesn't care if a template exists,
2002
+ unlike the template_pseudo_beta_mask).
2003
+ is_training: Whether the module is in training mode.
2004
+ Returns:
2005
+ A template embedding [N_res, N_res, c_z].
2006
+ """
2007
+ assert mask_2d.dtype == query_embedding.dtype
2008
+ dtype = query_embedding.dtype
2009
+ num_res = batch['template_aatype'].shape[0]
2010
+ num_channels = (self.config.template_pair_stack
2011
+ .triangle_attention_ending_node.value_dim)
2012
+ template_mask = batch['template_pseudo_beta_mask']
2013
+ template_mask_2d = template_mask[:, None] * template_mask[None, :]
2014
+ template_mask_2d = template_mask_2d.astype(dtype)
2015
+
2016
+ if "template_dgram" in batch:
2017
+ template_dgram = batch["template_dgram"]
2018
+ else:
2019
+ if self.config.backprop_dgram:
2020
+ template_dgram = dgram_from_positions_soft(batch['template_pseudo_beta'],
2021
+ temp=self.config.backprop_dgram_temp,
2022
+ **self.config.dgram_features)
2023
+ else:
2024
+ template_dgram = dgram_from_positions(batch['template_pseudo_beta'],
2025
+ **self.config.dgram_features)
2026
+ template_dgram = template_dgram.astype(dtype)
2027
+
2028
+ to_concat = [template_dgram, template_mask_2d[:, :, None]]
2029
+
2030
+ if jnp.issubdtype(batch['template_aatype'].dtype, jnp.integer):
2031
+ aatype = jax.nn.one_hot(batch['template_aatype'], 22, axis=-1, dtype=dtype)
2032
+ else:
2033
+ aatype = batch['template_aatype']
2034
+
2035
+ to_concat.append(jnp.tile(aatype[None, :, :], [num_res, 1, 1]))
2036
+ to_concat.append(jnp.tile(aatype[:, None, :], [1, num_res, 1]))
2037
+
2038
+ # Backbone affine mask: whether the residue has C, CA, N
2039
+ # (the template mask defined above only considers pseudo CB).
2040
+ n, ca, c = [residue_constants.atom_order[a] for a in ('N', 'CA', 'C')]
2041
+ template_mask = (
2042
+ batch['template_all_atom_masks'][..., n] *
2043
+ batch['template_all_atom_masks'][..., ca] *
2044
+ batch['template_all_atom_masks'][..., c])
2045
+ template_mask_2d = template_mask[:, None] * template_mask[None, :]
2046
+
2047
+ # compute unit_vector (not used by default)
2048
+ if self.config.use_template_unit_vector:
2049
+ rot, trans = quat_affine.make_transform_from_reference(
2050
+ n_xyz=batch['template_all_atom_positions'][:, n],
2051
+ ca_xyz=batch['template_all_atom_positions'][:, ca],
2052
+ c_xyz=batch['template_all_atom_positions'][:, c])
2053
+ affines = quat_affine.QuatAffine(
2054
+ quaternion=quat_affine.rot_to_quat(rot, unstack_inputs=True),
2055
+ translation=trans,
2056
+ rotation=rot,
2057
+ unstack_inputs=True)
2058
+ points = [jnp.expand_dims(x, axis=-2) for x in affines.translation]
2059
+ affine_vec = affines.invert_point(points, extra_dims=1)
2060
+ inv_distance_scalar = jax.lax.rsqrt(1e-6 + sum([jnp.square(x) for x in affine_vec]))
2061
+ inv_distance_scalar *= template_mask_2d.astype(inv_distance_scalar.dtype)
2062
+ unit_vector = [(x * inv_distance_scalar)[..., None] for x in affine_vec]
2063
+ else:
2064
+ unit_vector = [jnp.zeros((num_res,num_res,1))] * 3
2065
+
2066
+ unit_vector = [x.astype(dtype) for x in unit_vector]
2067
+ to_concat.extend(unit_vector)
2068
+
2069
+ template_mask_2d = template_mask_2d.astype(dtype)
2070
+ to_concat.append(template_mask_2d[..., None])
2071
+
2072
+ act = jnp.concatenate(to_concat, axis=-1)
2073
+
2074
+ # Mask out non-template regions so we don't get arbitrary values in the
2075
+ # distogram for these regions.
2076
+ act *= template_mask_2d[..., None]
2077
+
2078
+ # Jumper et al. (2021) Suppl. Alg. 2 "Inference" line 9
2079
+ act = common_modules.Linear(
2080
+ num_channels,
2081
+ initializer='relu',
2082
+ name='embedding2d')(act)
2083
+
2084
+ # Jumper et al. (2021) Suppl. Alg. 2 "Inference" line 11
2085
+ act = TemplatePairStack(
2086
+ self.config.template_pair_stack, self.global_config)(act, mask_2d, is_training, scale_rate=scale_rate)
2087
+
2088
+ act = hk.LayerNorm([-1], True, True, name='output_layer_norm')(act)
2089
+ return act
2090
+
2091
+
2092
+ class TemplateEmbedding(hk.Module):
2093
+ """Embeds a set of templates.
2094
+ Jumper et al. (2021) Suppl. Alg. 2 "Inference" lines 9-12
2095
+ Jumper et al. (2021) Suppl. Alg. 17 "TemplatePointwiseAttention"
2096
+ """
2097
+
2098
+ def __init__(self, config, global_config, name='template_embedding'):
2099
+ super().__init__(name=name)
2100
+ self.config = config
2101
+ self.global_config = global_config
2102
+
2103
+ def __call__(self, query_embedding, template_batch, mask_2d, is_training, scale_rate=1.0):
2104
+ """Build TemplateEmbedding module.
2105
+ Arguments:
2106
+ query_embedding: Query pair representation, shape [N_res, N_res, c_z].
2107
+ template_batch: A batch of template features.
2108
+ mask_2d: Padding mask (Note: this doesn't care if a template exists,
2109
+ unlike the template_pseudo_beta_mask).
2110
+ is_training: Whether the module is in training mode.
2111
+ Returns:
2112
+ A template embedding [N_res, N_res, c_z].
2113
+ """
2114
+
2115
+ num_templates = template_batch['template_mask'].shape[0]
2116
+ num_channels = (self.config.template_pair_stack
2117
+ .triangle_attention_ending_node.value_dim)
2118
+ num_res = query_embedding.shape[0]
2119
+
2120
+ dtype = query_embedding.dtype
2121
+ template_mask = template_batch['template_mask']
2122
+ template_mask = template_mask.astype(dtype)
2123
+
2124
+ query_num_channels = query_embedding.shape[-1]
2125
+
2126
+ # Make sure the weights are shared across templates by constructing the
2127
+ # embedder here.
2128
+ # Jumper et al. (2021) Suppl. Alg. 2 "Inference" lines 9-12
2129
+ template_embedder = SingleTemplateEmbedding(self.config, self.global_config)
2130
+
2131
+ def map_fn(batch):
2132
+ return template_embedder(query_embedding, batch, mask_2d, is_training, scale_rate=scale_rate)
2133
+
2134
+ template_pair_representation = mapping.sharded_map(map_fn, in_axes=0)(template_batch)
2135
+
2136
+ # Cross attend from the query to the templates along the residue
2137
+ # dimension by flattening everything else into the batch dimension.
2138
+ # Jumper et al. (2021) Suppl. Alg. 17 "TemplatePointwiseAttention"
2139
+ flat_query = jnp.reshape(query_embedding,[num_res * num_res, 1, query_num_channels])
2140
+
2141
+ flat_templates = jnp.reshape(
2142
+ jnp.transpose(template_pair_representation, [1, 2, 0, 3]),
2143
+ [num_res * num_res, num_templates, num_channels])
2144
+
2145
+ bias = (1e9 * (template_mask[None, None, None, :] - 1.))
2146
+
2147
+ template_pointwise_attention_module = Attention(
2148
+ self.config.attention, self.global_config, query_num_channels)
2149
+ nonbatched_args = [bias]
2150
+ batched_args = [flat_query, flat_templates]
2151
+
2152
+ embedding = mapping.inference_subbatch(
2153
+ template_pointwise_attention_module,
2154
+ self.config.subbatch_size,
2155
+ batched_args=batched_args,
2156
+ nonbatched_args=nonbatched_args,
2157
+ low_memory=not is_training)
2158
+ embedding = jnp.reshape(embedding,[num_res, num_res, query_num_channels])
2159
+
2160
+ # No gradients if no templates.
2161
+ embedding *= (jnp.sum(template_mask) > 0.).astype(embedding.dtype)
2162
+
2163
+ return embedding
2164
+ ####################################################################
af_backprop/alphafold/model/prng.py ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2021 DeepMind Technologies Limited
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ """A collection of utilities surrounding PRNG usage in protein folding."""
16
+
17
+ import haiku as hk
18
+ import jax
19
+
20
+ def safe_dropout(*, tensor, safe_key, rate, is_deterministic, is_training):
21
+ """Applies dropout to a tensor."""
22
+ if is_training and not is_deterministic:
23
+ keep_rate = 1.0 - rate
24
+ keep = jax.random.bernoulli(safe_key.get(), keep_rate, shape=tensor.shape)
25
+ return keep * tensor / keep_rate
26
+ else:
27
+ return tensor
28
+
29
+ class SafeKey:
30
+ """Safety wrapper for PRNG keys."""
31
+
32
+ def __init__(self, key):
33
+ self._key = key
34
+ self._used = False
35
+
36
+ def _assert_not_used(self):
37
+ if self._used:
38
+ raise RuntimeError('Random key has been used previously.')
39
+
40
+ def get(self):
41
+ self._assert_not_used()
42
+ self._used = True
43
+ return self._key
44
+
45
+ def split(self, num_keys=2):
46
+ self._assert_not_used()
47
+ self._used = True
48
+ new_keys = jax.random.split(self._key, num_keys)
49
+ return jax.tree_map(SafeKey, tuple(new_keys))
50
+
51
+ def duplicate(self, num_keys=2):
52
+ self._assert_not_used()
53
+ self._used = True
54
+ return tuple(SafeKey(self._key) for _ in range(num_keys))
55
+
56
+
57
+ def _safe_key_flatten(safe_key):
58
+ # Flatten transfers "ownership" to the tree
59
+ return (safe_key._key,), safe_key._used # pylint: disable=protected-access
60
+
61
+
62
+ def _safe_key_unflatten(aux_data, children):
63
+ ret = SafeKey(children[0])
64
+ ret._used = aux_data # pylint: disable=protected-access
65
+ return ret
66
+
67
+
68
+ jax.tree_util.register_pytree_node(
69
+ SafeKey, _safe_key_flatten, _safe_key_unflatten)
70
+
af_backprop/alphafold/model/quat_affine.py ADDED
@@ -0,0 +1,459 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2021 DeepMind Technologies Limited
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ """Quaternion geometry modules.
16
+
17
+ This introduces a representation of coordinate frames that is based around a
18
+ ‘QuatAffine’ object. This object describes an array of coordinate frames.
19
+ It consists of vectors corresponding to the
20
+ origin of the frames as well as orientations which are stored in two
21
+ ways, as unit quaternions as well as a rotation matrices.
22
+ The rotation matrices are derived from the unit quaternions and the two are kept
23
+ in sync.
24
+ For an explanation of the relation between unit quaternions and rotations see
25
+ https://en.wikipedia.org/wiki/Quaternions_and_spatial_rotation
26
+
27
+ This representation is used in the model for the backbone frames.
28
+
29
+ One important thing to note here, is that while we update both representations
30
+ the jit compiler is going to ensure that only the parts that are
31
+ actually used are executed.
32
+ """
33
+
34
+
35
+ import functools
36
+ from typing import Tuple
37
+
38
+ import jax
39
+ import jax.numpy as jnp
40
+ import numpy as np
41
+
42
+ # pylint: disable=bad-whitespace
43
+ QUAT_TO_ROT = np.zeros((4, 4, 3, 3), dtype=np.float32)
44
+
45
+ QUAT_TO_ROT[0, 0] = [[ 1, 0, 0], [ 0, 1, 0], [ 0, 0, 1]] # rr
46
+ QUAT_TO_ROT[1, 1] = [[ 1, 0, 0], [ 0,-1, 0], [ 0, 0,-1]] # ii
47
+ QUAT_TO_ROT[2, 2] = [[-1, 0, 0], [ 0, 1, 0], [ 0, 0,-1]] # jj
48
+ QUAT_TO_ROT[3, 3] = [[-1, 0, 0], [ 0,-1, 0], [ 0, 0, 1]] # kk
49
+
50
+ QUAT_TO_ROT[1, 2] = [[ 0, 2, 0], [ 2, 0, 0], [ 0, 0, 0]] # ij
51
+ QUAT_TO_ROT[1, 3] = [[ 0, 0, 2], [ 0, 0, 0], [ 2, 0, 0]] # ik
52
+ QUAT_TO_ROT[2, 3] = [[ 0, 0, 0], [ 0, 0, 2], [ 0, 2, 0]] # jk
53
+
54
+ QUAT_TO_ROT[0, 1] = [[ 0, 0, 0], [ 0, 0,-2], [ 0, 2, 0]] # ir
55
+ QUAT_TO_ROT[0, 2] = [[ 0, 0, 2], [ 0, 0, 0], [-2, 0, 0]] # jr
56
+ QUAT_TO_ROT[0, 3] = [[ 0,-2, 0], [ 2, 0, 0], [ 0, 0, 0]] # kr
57
+
58
+ QUAT_MULTIPLY = np.zeros((4, 4, 4), dtype=np.float32)
59
+ QUAT_MULTIPLY[:, :, 0] = [[ 1, 0, 0, 0],
60
+ [ 0,-1, 0, 0],
61
+ [ 0, 0,-1, 0],
62
+ [ 0, 0, 0,-1]]
63
+
64
+ QUAT_MULTIPLY[:, :, 1] = [[ 0, 1, 0, 0],
65
+ [ 1, 0, 0, 0],
66
+ [ 0, 0, 0, 1],
67
+ [ 0, 0,-1, 0]]
68
+
69
+ QUAT_MULTIPLY[:, :, 2] = [[ 0, 0, 1, 0],
70
+ [ 0, 0, 0,-1],
71
+ [ 1, 0, 0, 0],
72
+ [ 0, 1, 0, 0]]
73
+
74
+ QUAT_MULTIPLY[:, :, 3] = [[ 0, 0, 0, 1],
75
+ [ 0, 0, 1, 0],
76
+ [ 0,-1, 0, 0],
77
+ [ 1, 0, 0, 0]]
78
+
79
+ QUAT_MULTIPLY_BY_VEC = QUAT_MULTIPLY[:, 1:, :]
80
+ # pylint: enable=bad-whitespace
81
+
82
+
83
+ def rot_to_quat(rot, unstack_inputs=False):
84
+ """Convert rotation matrix to quaternion.
85
+
86
+ Note that this function calls self_adjoint_eig which is extremely expensive on
87
+ the GPU. If at all possible, this function should run on the CPU.
88
+
89
+ Args:
90
+ rot: rotation matrix (see below for format).
91
+ unstack_inputs: If true, rotation matrix should be shape (..., 3, 3)
92
+ otherwise the rotation matrix should be a list of lists of tensors.
93
+
94
+ Returns:
95
+ Quaternion as (..., 4) tensor.
96
+ """
97
+ if unstack_inputs:
98
+ rot = [jnp.moveaxis(x, -1, 0) for x in jnp.moveaxis(rot, -2, 0)]
99
+
100
+ [[xx, xy, xz], [yx, yy, yz], [zx, zy, zz]] = rot
101
+
102
+ # pylint: disable=bad-whitespace
103
+ k = [[ xx + yy + zz, zy - yz, xz - zx, yx - xy,],
104
+ [ zy - yz, xx - yy - zz, xy + yx, xz + zx,],
105
+ [ xz - zx, xy + yx, yy - xx - zz, yz + zy,],
106
+ [ yx - xy, xz + zx, yz + zy, zz - xx - yy,]]
107
+ # pylint: enable=bad-whitespace
108
+
109
+ k = (1./3.) * jnp.stack([jnp.stack(x, axis=-1) for x in k],
110
+ axis=-2)
111
+
112
+ # Get eigenvalues in non-decreasing order and associated.
113
+ _, qs = jnp.linalg.eigh(k)
114
+ return qs[..., -1]
115
+
116
+
117
+ def rot_list_to_tensor(rot_list):
118
+ """Convert list of lists to rotation tensor."""
119
+ return jnp.stack(
120
+ [jnp.stack(rot_list[0], axis=-1),
121
+ jnp.stack(rot_list[1], axis=-1),
122
+ jnp.stack(rot_list[2], axis=-1)],
123
+ axis=-2)
124
+
125
+
126
+ def vec_list_to_tensor(vec_list):
127
+ """Convert list to vector tensor."""
128
+ return jnp.stack(vec_list, axis=-1)
129
+
130
+
131
+ def quat_to_rot(normalized_quat):
132
+ """Convert a normalized quaternion to a rotation matrix."""
133
+ rot_tensor = jnp.sum(
134
+ np.reshape(QUAT_TO_ROT, (4, 4, 9)) *
135
+ normalized_quat[..., :, None, None] *
136
+ normalized_quat[..., None, :, None],
137
+ axis=(-3, -2))
138
+ rot = jnp.moveaxis(rot_tensor, -1, 0) # Unstack.
139
+ return [[rot[0], rot[1], rot[2]],
140
+ [rot[3], rot[4], rot[5]],
141
+ [rot[6], rot[7], rot[8]]]
142
+
143
+
144
+ def quat_multiply_by_vec(quat, vec):
145
+ """Multiply a quaternion by a pure-vector quaternion."""
146
+ return jnp.sum(
147
+ QUAT_MULTIPLY_BY_VEC *
148
+ quat[..., :, None, None] *
149
+ vec[..., None, :, None],
150
+ axis=(-3, -2))
151
+
152
+
153
+ def quat_multiply(quat1, quat2):
154
+ """Multiply a quaternion by another quaternion."""
155
+ return jnp.sum(
156
+ QUAT_MULTIPLY *
157
+ quat1[..., :, None, None] *
158
+ quat2[..., None, :, None],
159
+ axis=(-3, -2))
160
+
161
+
162
+ def apply_rot_to_vec(rot, vec, unstack=False):
163
+ """Multiply rotation matrix by a vector."""
164
+ if unstack:
165
+ x, y, z = [vec[:, i] for i in range(3)]
166
+ else:
167
+ x, y, z = vec
168
+ return [rot[0][0] * x + rot[0][1] * y + rot[0][2] * z,
169
+ rot[1][0] * x + rot[1][1] * y + rot[1][2] * z,
170
+ rot[2][0] * x + rot[2][1] * y + rot[2][2] * z]
171
+
172
+
173
+ def apply_inverse_rot_to_vec(rot, vec):
174
+ """Multiply the inverse of a rotation matrix by a vector."""
175
+ # Inverse rotation is just transpose
176
+ return [rot[0][0] * vec[0] + rot[1][0] * vec[1] + rot[2][0] * vec[2],
177
+ rot[0][1] * vec[0] + rot[1][1] * vec[1] + rot[2][1] * vec[2],
178
+ rot[0][2] * vec[0] + rot[1][2] * vec[1] + rot[2][2] * vec[2]]
179
+
180
+
181
+ class QuatAffine(object):
182
+ """Affine transformation represented by quaternion and vector."""
183
+
184
+ def __init__(self, quaternion, translation, rotation=None, normalize=True,
185
+ unstack_inputs=False):
186
+ """Initialize from quaternion and translation.
187
+
188
+ Args:
189
+ quaternion: Rotation represented by a quaternion, to be applied
190
+ before translation. Must be a unit quaternion unless normalize==True.
191
+ translation: Translation represented as a vector.
192
+ rotation: Same rotation as the quaternion, represented as a (..., 3, 3)
193
+ tensor. If None, rotation will be calculated from the quaternion.
194
+ normalize: If True, l2 normalize the quaternion on input.
195
+ unstack_inputs: If True, translation is a vector with last component 3
196
+ """
197
+
198
+ if quaternion is not None:
199
+ assert quaternion.shape[-1] == 4
200
+
201
+ if unstack_inputs:
202
+ if rotation is not None:
203
+ rotation = [jnp.moveaxis(x, -1, 0) # Unstack.
204
+ for x in jnp.moveaxis(rotation, -2, 0)] # Unstack.
205
+ translation = jnp.moveaxis(translation, -1, 0) # Unstack.
206
+
207
+ if normalize and quaternion is not None:
208
+ quaternion = quaternion / jnp.linalg.norm(quaternion, axis=-1,
209
+ keepdims=True)
210
+
211
+ if rotation is None:
212
+ rotation = quat_to_rot(quaternion)
213
+
214
+ self.quaternion = quaternion
215
+ self.rotation = [list(row) for row in rotation]
216
+ self.translation = list(translation)
217
+
218
+ assert all(len(row) == 3 for row in self.rotation)
219
+ assert len(self.translation) == 3
220
+
221
+ def to_tensor(self):
222
+ return jnp.concatenate(
223
+ [self.quaternion] +
224
+ [jnp.expand_dims(x, axis=-1) for x in self.translation],
225
+ axis=-1)
226
+
227
+ def apply_tensor_fn(self, tensor_fn):
228
+ """Return a new QuatAffine with tensor_fn applied (e.g. stop_gradient)."""
229
+ return QuatAffine(
230
+ tensor_fn(self.quaternion),
231
+ [tensor_fn(x) for x in self.translation],
232
+ rotation=[[tensor_fn(x) for x in row] for row in self.rotation],
233
+ normalize=False)
234
+
235
+ def apply_rotation_tensor_fn(self, tensor_fn):
236
+ """Return a new QuatAffine with tensor_fn applied to the rotation part."""
237
+ return QuatAffine(
238
+ tensor_fn(self.quaternion),
239
+ [x for x in self.translation],
240
+ rotation=[[tensor_fn(x) for x in row] for row in self.rotation],
241
+ normalize=False)
242
+
243
+ def scale_translation(self, position_scale):
244
+ """Return a new quat affine with a different scale for translation."""
245
+
246
+ return QuatAffine(
247
+ self.quaternion,
248
+ [x * position_scale for x in self.translation],
249
+ rotation=[[x for x in row] for row in self.rotation],
250
+ normalize=False)
251
+
252
+ @classmethod
253
+ def from_tensor(cls, tensor, normalize=False):
254
+ quaternion, tx, ty, tz = jnp.split(tensor, [4, 5, 6], axis=-1)
255
+ return cls(quaternion,
256
+ [tx[..., 0], ty[..., 0], tz[..., 0]],
257
+ normalize=normalize)
258
+
259
+ def pre_compose(self, update):
260
+ """Return a new QuatAffine which applies the transformation update first.
261
+
262
+ Args:
263
+ update: Length-6 vector. 3-vector of x, y, and z such that the quaternion
264
+ update is (1, x, y, z) and zero for the 3-vector is the identity
265
+ quaternion. 3-vector for translation concatenated.
266
+
267
+ Returns:
268
+ New QuatAffine object.
269
+ """
270
+ vector_quaternion_update, x, y, z = jnp.split(update, [3, 4, 5], axis=-1)
271
+ trans_update = [jnp.squeeze(x, axis=-1),
272
+ jnp.squeeze(y, axis=-1),
273
+ jnp.squeeze(z, axis=-1)]
274
+
275
+ new_quaternion = (self.quaternion +
276
+ quat_multiply_by_vec(self.quaternion,
277
+ vector_quaternion_update))
278
+
279
+ trans_update = apply_rot_to_vec(self.rotation, trans_update)
280
+ new_translation = [
281
+ self.translation[0] + trans_update[0],
282
+ self.translation[1] + trans_update[1],
283
+ self.translation[2] + trans_update[2]]
284
+
285
+ return QuatAffine(new_quaternion, new_translation)
286
+
287
+ def apply_to_point(self, point, extra_dims=0):
288
+ """Apply affine to a point.
289
+
290
+ Args:
291
+ point: List of 3 tensors to apply affine.
292
+ extra_dims: Number of dimensions at the end of the transformed_point
293
+ shape that are not present in the rotation and translation. The most
294
+ common use is rotation N points at once with extra_dims=1 for use in a
295
+ network.
296
+
297
+ Returns:
298
+ Transformed point after applying affine.
299
+ """
300
+ rotation = self.rotation
301
+ translation = self.translation
302
+ for _ in range(extra_dims):
303
+ expand_fn = functools.partial(jnp.expand_dims, axis=-1)
304
+ rotation = jax.tree_map(expand_fn, rotation)
305
+ translation = jax.tree_map(expand_fn, translation)
306
+
307
+ rot_point = apply_rot_to_vec(rotation, point)
308
+ return [
309
+ rot_point[0] + translation[0],
310
+ rot_point[1] + translation[1],
311
+ rot_point[2] + translation[2]]
312
+
313
+ def invert_point(self, transformed_point, extra_dims=0):
314
+ """Apply inverse of transformation to a point.
315
+
316
+ Args:
317
+ transformed_point: List of 3 tensors to apply affine
318
+ extra_dims: Number of dimensions at the end of the transformed_point
319
+ shape that are not present in the rotation and translation. The most
320
+ common use is rotation N points at once with extra_dims=1 for use in a
321
+ network.
322
+
323
+ Returns:
324
+ Transformed point after applying affine.
325
+ """
326
+ rotation = self.rotation
327
+ translation = self.translation
328
+ for _ in range(extra_dims):
329
+ expand_fn = functools.partial(jnp.expand_dims, axis=-1)
330
+ rotation = jax.tree_map(expand_fn, rotation)
331
+ translation = jax.tree_map(expand_fn, translation)
332
+
333
+ rot_point = [
334
+ transformed_point[0] - translation[0],
335
+ transformed_point[1] - translation[1],
336
+ transformed_point[2] - translation[2]]
337
+
338
+ return apply_inverse_rot_to_vec(rotation, rot_point)
339
+
340
+ def __repr__(self):
341
+ return 'QuatAffine(%r, %r)' % (self.quaternion, self.translation)
342
+
343
+
344
+ def _multiply(a, b):
345
+ return jnp.stack([
346
+ jnp.array([a[0][0]*b[0][0] + a[0][1]*b[1][0] + a[0][2]*b[2][0],
347
+ a[0][0]*b[0][1] + a[0][1]*b[1][1] + a[0][2]*b[2][1],
348
+ a[0][0]*b[0][2] + a[0][1]*b[1][2] + a[0][2]*b[2][2]]),
349
+
350
+ jnp.array([a[1][0]*b[0][0] + a[1][1]*b[1][0] + a[1][2]*b[2][0],
351
+ a[1][0]*b[0][1] + a[1][1]*b[1][1] + a[1][2]*b[2][1],
352
+ a[1][0]*b[0][2] + a[1][1]*b[1][2] + a[1][2]*b[2][2]]),
353
+
354
+ jnp.array([a[2][0]*b[0][0] + a[2][1]*b[1][0] + a[2][2]*b[2][0],
355
+ a[2][0]*b[0][1] + a[2][1]*b[1][1] + a[2][2]*b[2][1],
356
+ a[2][0]*b[0][2] + a[2][1]*b[1][2] + a[2][2]*b[2][2]])])
357
+
358
+
359
+ def make_canonical_transform(
360
+ n_xyz: jnp.ndarray,
361
+ ca_xyz: jnp.ndarray,
362
+ c_xyz: jnp.ndarray) -> Tuple[jnp.ndarray, jnp.ndarray]:
363
+ """Returns translation and rotation matrices to canonicalize residue atoms.
364
+
365
+ Note that this method does not take care of symmetries. If you provide the
366
+ atom positions in the non-standard way, the N atom will end up not at
367
+ [-0.527250, 1.359329, 0.0] but instead at [-0.527250, -1.359329, 0.0]. You
368
+ need to take care of such cases in your code.
369
+
370
+ Args:
371
+ n_xyz: An array of shape [batch, 3] of nitrogen xyz coordinates.
372
+ ca_xyz: An array of shape [batch, 3] of carbon alpha xyz coordinates.
373
+ c_xyz: An array of shape [batch, 3] of carbon xyz coordinates.
374
+
375
+ Returns:
376
+ A tuple (translation, rotation) where:
377
+ translation is an array of shape [batch, 3] defining the translation.
378
+ rotation is an array of shape [batch, 3, 3] defining the rotation.
379
+ After applying the translation and rotation to all atoms in a residue:
380
+ * All atoms will be shifted so that CA is at the origin,
381
+ * All atoms will be rotated so that C is at the x-axis,
382
+ * All atoms will be shifted so that N is in the xy plane.
383
+ """
384
+ assert len(n_xyz.shape) == 2, n_xyz.shape
385
+ assert n_xyz.shape[-1] == 3, n_xyz.shape
386
+ assert n_xyz.shape == ca_xyz.shape == c_xyz.shape, (
387
+ n_xyz.shape, ca_xyz.shape, c_xyz.shape)
388
+
389
+ # Place CA at the origin.
390
+ translation = -ca_xyz
391
+ n_xyz = n_xyz + translation
392
+ c_xyz = c_xyz + translation
393
+
394
+ # Place C on the x-axis.
395
+ c_x, c_y, c_z = [c_xyz[:, i] for i in range(3)]
396
+ # Rotate by angle c1 in the x-y plane (around the z-axis).
397
+ sin_c1 = -c_y / jnp.sqrt(1e-20 + c_x**2 + c_y**2)
398
+ cos_c1 = c_x / jnp.sqrt(1e-20 + c_x**2 + c_y**2)
399
+ zeros = jnp.zeros_like(sin_c1)
400
+ ones = jnp.ones_like(sin_c1)
401
+ # pylint: disable=bad-whitespace
402
+ c1_rot_matrix = jnp.stack([jnp.array([cos_c1, -sin_c1, zeros]),
403
+ jnp.array([sin_c1, cos_c1, zeros]),
404
+ jnp.array([zeros, zeros, ones])])
405
+
406
+ # Rotate by angle c2 in the x-z plane (around the y-axis).
407
+ sin_c2 = c_z / jnp.sqrt(1e-20 + c_x**2 + c_y**2 + c_z**2)
408
+ cos_c2 = jnp.sqrt(c_x**2 + c_y**2) / jnp.sqrt(
409
+ 1e-20 + c_x**2 + c_y**2 + c_z**2)
410
+ c2_rot_matrix = jnp.stack([jnp.array([cos_c2, zeros, sin_c2]),
411
+ jnp.array([zeros, ones, zeros]),
412
+ jnp.array([-sin_c2, zeros, cos_c2])])
413
+
414
+ c_rot_matrix = _multiply(c2_rot_matrix, c1_rot_matrix)
415
+ n_xyz = jnp.stack(apply_rot_to_vec(c_rot_matrix, n_xyz, unstack=True)).T
416
+
417
+ # Place N in the x-y plane.
418
+ _, n_y, n_z = [n_xyz[:, i] for i in range(3)]
419
+ # Rotate by angle alpha in the y-z plane (around the x-axis).
420
+ sin_n = -n_z / jnp.sqrt(1e-20 + n_y**2 + n_z**2)
421
+ cos_n = n_y / jnp.sqrt(1e-20 + n_y**2 + n_z**2)
422
+ n_rot_matrix = jnp.stack([jnp.array([ones, zeros, zeros]),
423
+ jnp.array([zeros, cos_n, -sin_n]),
424
+ jnp.array([zeros, sin_n, cos_n])])
425
+ # pylint: enable=bad-whitespace
426
+
427
+ return (translation,
428
+ jnp.transpose(_multiply(n_rot_matrix, c_rot_matrix), [2, 0, 1]))
429
+
430
+
431
+ def make_transform_from_reference(
432
+ n_xyz: jnp.ndarray,
433
+ ca_xyz: jnp.ndarray,
434
+ c_xyz: jnp.ndarray) -> Tuple[jnp.ndarray, jnp.ndarray]:
435
+ """Returns rotation and translation matrices to convert from reference.
436
+
437
+ Note that this method does not take care of symmetries. If you provide the
438
+ atom positions in the non-standard way, the N atom will end up not at
439
+ [-0.527250, 1.359329, 0.0] but instead at [-0.527250, -1.359329, 0.0]. You
440
+ need to take care of such cases in your code.
441
+
442
+ Args:
443
+ n_xyz: An array of shape [batch, 3] of nitrogen xyz coordinates.
444
+ ca_xyz: An array of shape [batch, 3] of carbon alpha xyz coordinates.
445
+ c_xyz: An array of shape [batch, 3] of carbon xyz coordinates.
446
+
447
+ Returns:
448
+ A tuple (rotation, translation) where:
449
+ rotation is an array of shape [batch, 3, 3] defining the rotation.
450
+ translation is an array of shape [batch, 3] defining the translation.
451
+ After applying the translation and rotation to the reference backbone,
452
+ the coordinates will approximately equal to the input coordinates.
453
+
454
+ The order of translation and rotation differs from make_canonical_transform
455
+ because the rotation from this function should be applied before the
456
+ translation, unlike make_canonical_transform.
457
+ """
458
+ translation, rotation = make_canonical_transform(n_xyz, ca_xyz, c_xyz)
459
+ return np.transpose(rotation, (0, 2, 1)), -translation
af_backprop/alphafold/model/r3.py ADDED
@@ -0,0 +1,320 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2021 DeepMind Technologies Limited
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ """Transformations for 3D coordinates.
16
+
17
+ This Module contains objects for representing Vectors (Vecs), Rotation Matrices
18
+ (Rots) and proper Rigid transformation (Rigids). These are represented as
19
+ named tuples with arrays for each entry, for example a set of
20
+ [N, M] points would be represented as a Vecs object with arrays of shape [N, M]
21
+ for x, y and z.
22
+
23
+ This is being done to improve readability by making it very clear what objects
24
+ are geometric objects rather than relying on comments and array shapes.
25
+ Another reason for this is to avoid using matrix
26
+ multiplication primitives like matmul or einsum, on modern accelerator hardware
27
+ these can end up on specialized cores such as tensor cores on GPU or the MXU on
28
+ cloud TPUs, this often involves lower computational precision which can be
29
+ problematic for coordinate geometry. Also these cores are typically optimized
30
+ for larger matrices than 3 dimensional, this code is written to avoid any
31
+ unintended use of these cores on both GPUs and TPUs.
32
+ """
33
+
34
+ import collections
35
+ from typing import List
36
+ from alphafold.model import quat_affine
37
+ import jax.numpy as jnp
38
+ import tree
39
+
40
+ # Array of 3-component vectors, stored as individual array for
41
+ # each component.
42
+ Vecs = collections.namedtuple('Vecs', ['x', 'y', 'z'])
43
+
44
+ # Array of 3x3 rotation matrices, stored as individual array for
45
+ # each component.
46
+ Rots = collections.namedtuple('Rots', ['xx', 'xy', 'xz',
47
+ 'yx', 'yy', 'yz',
48
+ 'zx', 'zy', 'zz'])
49
+ # Array of rigid 3D transformations, stored as array of rotations and
50
+ # array of translations.
51
+ Rigids = collections.namedtuple('Rigids', ['rot', 'trans'])
52
+
53
+
54
+ def squared_difference(x, y):
55
+ return jnp.square(x - y)
56
+
57
+
58
+ def invert_rigids(r: Rigids) -> Rigids:
59
+ """Computes group inverse of rigid transformations 'r'."""
60
+ inv_rots = invert_rots(r.rot)
61
+ t = rots_mul_vecs(inv_rots, r.trans)
62
+ inv_trans = Vecs(-t.x, -t.y, -t.z)
63
+ return Rigids(inv_rots, inv_trans)
64
+
65
+
66
+ def invert_rots(m: Rots) -> Rots:
67
+ """Computes inverse of rotations 'm'."""
68
+ return Rots(m.xx, m.yx, m.zx,
69
+ m.xy, m.yy, m.zy,
70
+ m.xz, m.yz, m.zz)
71
+
72
+
73
+ def rigids_from_3_points(
74
+ point_on_neg_x_axis: Vecs, # shape (...)
75
+ origin: Vecs, # shape (...)
76
+ point_on_xy_plane: Vecs, # shape (...)
77
+ ) -> Rigids: # shape (...)
78
+ """Create Rigids from 3 points.
79
+
80
+ Jumper et al. (2021) Suppl. Alg. 21 "rigidFrom3Points"
81
+ This creates a set of rigid transformations from 3 points by Gram Schmidt
82
+ orthogonalization.
83
+
84
+ Args:
85
+ point_on_neg_x_axis: Vecs corresponding to points on the negative x axis
86
+ origin: Origin of resulting rigid transformations
87
+ point_on_xy_plane: Vecs corresponding to points in the xy plane
88
+ Returns:
89
+ Rigid transformations from global frame to local frames derived from
90
+ the input points.
91
+ """
92
+ m = rots_from_two_vecs(
93
+ e0_unnormalized=vecs_sub(origin, point_on_neg_x_axis),
94
+ e1_unnormalized=vecs_sub(point_on_xy_plane, origin))
95
+
96
+ return Rigids(rot=m, trans=origin)
97
+
98
+
99
+ def rigids_from_list(l: List[jnp.ndarray]) -> Rigids:
100
+ """Converts flat list of arrays to rigid transformations."""
101
+ assert len(l) == 12
102
+ return Rigids(Rots(*(l[:9])), Vecs(*(l[9:])))
103
+
104
+
105
+ def rigids_from_quataffine(a: quat_affine.QuatAffine) -> Rigids:
106
+ """Converts QuatAffine object to the corresponding Rigids object."""
107
+ return Rigids(Rots(*tree.flatten(a.rotation)),
108
+ Vecs(*a.translation))
109
+
110
+
111
+ def rigids_from_tensor4x4(
112
+ m: jnp.ndarray # shape (..., 4, 4)
113
+ ) -> Rigids: # shape (...)
114
+ """Construct Rigids object from an 4x4 array.
115
+
116
+ Here the 4x4 is representing the transformation in homogeneous coordinates.
117
+
118
+ Args:
119
+ m: Array representing transformations in homogeneous coordinates.
120
+ Returns:
121
+ Rigids object corresponding to transformations m
122
+ """
123
+ assert m.shape[-1] == 4
124
+ assert m.shape[-2] == 4
125
+ return Rigids(
126
+ Rots(m[..., 0, 0], m[..., 0, 1], m[..., 0, 2],
127
+ m[..., 1, 0], m[..., 1, 1], m[..., 1, 2],
128
+ m[..., 2, 0], m[..., 2, 1], m[..., 2, 2]),
129
+ Vecs(m[..., 0, 3], m[..., 1, 3], m[..., 2, 3]))
130
+
131
+
132
+ def rigids_from_tensor_flat9(
133
+ m: jnp.ndarray # shape (..., 9)
134
+ ) -> Rigids: # shape (...)
135
+ """Flat9 encoding: first two columns of rotation matrix + translation."""
136
+ assert m.shape[-1] == 9
137
+ e0 = Vecs(m[..., 0], m[..., 1], m[..., 2])
138
+ e1 = Vecs(m[..., 3], m[..., 4], m[..., 5])
139
+ trans = Vecs(m[..., 6], m[..., 7], m[..., 8])
140
+ return Rigids(rot=rots_from_two_vecs(e0, e1),
141
+ trans=trans)
142
+
143
+
144
+ def rigids_from_tensor_flat12(
145
+ m: jnp.ndarray # shape (..., 12)
146
+ ) -> Rigids: # shape (...)
147
+ """Flat12 encoding: rotation matrix (9 floats) + translation (3 floats)."""
148
+ assert m.shape[-1] == 12
149
+ x = jnp.moveaxis(m, -1, 0) # Unstack
150
+ return Rigids(Rots(*x[:9]), Vecs(*x[9:]))
151
+
152
+
153
+ def rigids_mul_rigids(a: Rigids, b: Rigids) -> Rigids:
154
+ """Group composition of Rigids 'a' and 'b'."""
155
+ return Rigids(
156
+ rots_mul_rots(a.rot, b.rot),
157
+ vecs_add(a.trans, rots_mul_vecs(a.rot, b.trans)))
158
+
159
+
160
+ def rigids_mul_rots(r: Rigids, m: Rots) -> Rigids:
161
+ """Compose rigid transformations 'r' with rotations 'm'."""
162
+ return Rigids(rots_mul_rots(r.rot, m), r.trans)
163
+
164
+
165
+ def rigids_mul_vecs(r: Rigids, v: Vecs) -> Vecs:
166
+ """Apply rigid transforms 'r' to points 'v'."""
167
+ return vecs_add(rots_mul_vecs(r.rot, v), r.trans)
168
+
169
+
170
+ def rigids_to_list(r: Rigids) -> List[jnp.ndarray]:
171
+ """Turn Rigids into flat list, inverse of 'rigids_from_list'."""
172
+ return list(r.rot) + list(r.trans)
173
+
174
+
175
+ def rigids_to_quataffine(r: Rigids) -> quat_affine.QuatAffine:
176
+ """Convert Rigids r into QuatAffine, inverse of 'rigids_from_quataffine'."""
177
+ return quat_affine.QuatAffine(
178
+ quaternion=None,
179
+ rotation=[[r.rot.xx, r.rot.xy, r.rot.xz],
180
+ [r.rot.yx, r.rot.yy, r.rot.yz],
181
+ [r.rot.zx, r.rot.zy, r.rot.zz]],
182
+ translation=[r.trans.x, r.trans.y, r.trans.z])
183
+
184
+
185
+ def rigids_to_tensor_flat9(
186
+ r: Rigids # shape (...)
187
+ ) -> jnp.ndarray: # shape (..., 9)
188
+ """Flat9 encoding: first two columns of rotation matrix + translation."""
189
+ return jnp.stack(
190
+ [r.rot.xx, r.rot.yx, r.rot.zx, r.rot.xy, r.rot.yy, r.rot.zy]
191
+ + list(r.trans), axis=-1)
192
+
193
+
194
+ def rigids_to_tensor_flat12(
195
+ r: Rigids # shape (...)
196
+ ) -> jnp.ndarray: # shape (..., 12)
197
+ """Flat12 encoding: rotation matrix (9 floats) + translation (3 floats)."""
198
+ return jnp.stack(list(r.rot) + list(r.trans), axis=-1)
199
+
200
+
201
+ def rots_from_tensor3x3(
202
+ m: jnp.ndarray, # shape (..., 3, 3)
203
+ ) -> Rots: # shape (...)
204
+ """Convert rotations represented as (3, 3) array to Rots."""
205
+ assert m.shape[-1] == 3
206
+ assert m.shape[-2] == 3
207
+ return Rots(m[..., 0, 0], m[..., 0, 1], m[..., 0, 2],
208
+ m[..., 1, 0], m[..., 1, 1], m[..., 1, 2],
209
+ m[..., 2, 0], m[..., 2, 1], m[..., 2, 2])
210
+
211
+
212
+ def rots_from_two_vecs(e0_unnormalized: Vecs, e1_unnormalized: Vecs) -> Rots:
213
+ """Create rotation matrices from unnormalized vectors for the x and y-axes.
214
+
215
+ This creates a rotation matrix from two vectors using Gram-Schmidt
216
+ orthogonalization.
217
+
218
+ Args:
219
+ e0_unnormalized: vectors lying along x-axis of resulting rotation
220
+ e1_unnormalized: vectors lying in xy-plane of resulting rotation
221
+ Returns:
222
+ Rotations resulting from Gram-Schmidt procedure.
223
+ """
224
+ # Normalize the unit vector for the x-axis, e0.
225
+ e0 = vecs_robust_normalize(e0_unnormalized)
226
+
227
+ # make e1 perpendicular to e0.
228
+ c = vecs_dot_vecs(e1_unnormalized, e0)
229
+ e1 = Vecs(e1_unnormalized.x - c * e0.x,
230
+ e1_unnormalized.y - c * e0.y,
231
+ e1_unnormalized.z - c * e0.z)
232
+ e1 = vecs_robust_normalize(e1)
233
+
234
+ # Compute e2 as cross product of e0 and e1.
235
+ e2 = vecs_cross_vecs(e0, e1)
236
+
237
+ return Rots(e0.x, e1.x, e2.x, e0.y, e1.y, e2.y, e0.z, e1.z, e2.z)
238
+
239
+
240
+ def rots_mul_rots(a: Rots, b: Rots) -> Rots:
241
+ """Composition of rotations 'a' and 'b'."""
242
+ c0 = rots_mul_vecs(a, Vecs(b.xx, b.yx, b.zx))
243
+ c1 = rots_mul_vecs(a, Vecs(b.xy, b.yy, b.zy))
244
+ c2 = rots_mul_vecs(a, Vecs(b.xz, b.yz, b.zz))
245
+ return Rots(c0.x, c1.x, c2.x, c0.y, c1.y, c2.y, c0.z, c1.z, c2.z)
246
+
247
+
248
+ def rots_mul_vecs(m: Rots, v: Vecs) -> Vecs:
249
+ """Apply rotations 'm' to vectors 'v'."""
250
+ return Vecs(m.xx * v.x + m.xy * v.y + m.xz * v.z,
251
+ m.yx * v.x + m.yy * v.y + m.yz * v.z,
252
+ m.zx * v.x + m.zy * v.y + m.zz * v.z)
253
+
254
+
255
+ def vecs_add(v1: Vecs, v2: Vecs) -> Vecs:
256
+ """Add two vectors 'v1' and 'v2'."""
257
+ return Vecs(v1.x + v2.x, v1.y + v2.y, v1.z + v2.z)
258
+
259
+
260
+ def vecs_dot_vecs(v1: Vecs, v2: Vecs) -> jnp.ndarray:
261
+ """Dot product of vectors 'v1' and 'v2'."""
262
+ return v1.x * v2.x + v1.y * v2.y + v1.z * v2.z
263
+
264
+
265
+ def vecs_cross_vecs(v1: Vecs, v2: Vecs) -> Vecs:
266
+ """Cross product of vectors 'v1' and 'v2'."""
267
+ return Vecs(v1.y * v2.z - v1.z * v2.y,
268
+ v1.z * v2.x - v1.x * v2.z,
269
+ v1.x * v2.y - v1.y * v2.x)
270
+
271
+
272
+ def vecs_from_tensor(x: jnp.ndarray # shape (..., 3)
273
+ ) -> Vecs: # shape (...)
274
+ """Converts from tensor of shape (3,) to Vecs."""
275
+ num_components = x.shape[-1]
276
+ assert num_components == 3
277
+ return Vecs(x[..., 0], x[..., 1], x[..., 2])
278
+
279
+
280
+ def vecs_robust_normalize(v: Vecs, epsilon: float = 1e-8) -> Vecs:
281
+ """Normalizes vectors 'v'.
282
+
283
+ Args:
284
+ v: vectors to be normalized.
285
+ epsilon: small regularizer added to squared norm before taking square root.
286
+ Returns:
287
+ normalized vectors
288
+ """
289
+ norms = vecs_robust_norm(v, epsilon)
290
+ return Vecs(v.x / norms, v.y / norms, v.z / norms)
291
+
292
+
293
+ def vecs_robust_norm(v: Vecs, epsilon: float = 1e-8) -> jnp.ndarray:
294
+ """Computes norm of vectors 'v'.
295
+
296
+ Args:
297
+ v: vectors to be normalized.
298
+ epsilon: small regularizer added to squared norm before taking square root.
299
+ Returns:
300
+ norm of 'v'
301
+ """
302
+ return jnp.sqrt(jnp.square(v.x) + jnp.square(v.y) + jnp.square(v.z) + epsilon)
303
+
304
+
305
+ def vecs_sub(v1: Vecs, v2: Vecs) -> Vecs:
306
+ """Computes v1 - v2."""
307
+ return Vecs(v1.x - v2.x, v1.y - v2.y, v1.z - v2.z)
308
+
309
+
310
+ def vecs_squared_distance(v1: Vecs, v2: Vecs) -> jnp.ndarray:
311
+ """Computes squared euclidean difference between 'v1' and 'v2'."""
312
+ return (squared_difference(v1.x, v2.x) +
313
+ squared_difference(v1.y, v2.y) +
314
+ squared_difference(v1.z, v2.z))
315
+
316
+
317
+ def vecs_to_tensor(v: Vecs # shape (...)
318
+ ) -> jnp.ndarray: # shape(..., 3)
319
+ """Converts 'v' to tensor with shape 3, inverse of 'vecs_from_tensor'."""
320
+ return jnp.stack([v.x, v.y, v.z], axis=-1)
af_backprop/alphafold/model/tf/__init__.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2021 DeepMind Technologies Limited
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ """Alphafold model TensorFlow code."""
af_backprop/alphafold/model/tf/data_transforms.py ADDED
@@ -0,0 +1,625 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2021 DeepMind Technologies Limited
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ """Data for AlphaFold."""
16
+
17
+ from alphafold.common import residue_constants
18
+ from alphafold.model.tf import shape_helpers
19
+ from alphafold.model.tf import shape_placeholders
20
+ from alphafold.model.tf import utils
21
+ import numpy as np
22
+ import tensorflow.compat.v1 as tf
23
+
24
+ # Pylint gets confused by the curry1 decorator because it changes the number
25
+ # of arguments to the function.
26
+ # pylint:disable=no-value-for-parameter
27
+
28
+
29
+ NUM_RES = shape_placeholders.NUM_RES
30
+ NUM_MSA_SEQ = shape_placeholders.NUM_MSA_SEQ
31
+ NUM_EXTRA_SEQ = shape_placeholders.NUM_EXTRA_SEQ
32
+ NUM_TEMPLATES = shape_placeholders.NUM_TEMPLATES
33
+
34
+
35
+ def cast_64bit_ints(protein):
36
+
37
+ for k, v in protein.items():
38
+ if v.dtype == tf.int64:
39
+ protein[k] = tf.cast(v, tf.int32)
40
+ return protein
41
+
42
+
43
+ _MSA_FEATURE_NAMES = [
44
+ 'msa', 'deletion_matrix', 'msa_mask', 'msa_row_mask', 'bert_mask',
45
+ 'true_msa'
46
+ ]
47
+
48
+
49
+ def make_seq_mask(protein):
50
+ protein['seq_mask'] = tf.ones(
51
+ shape_helpers.shape_list(protein['aatype']), dtype=tf.float32)
52
+ return protein
53
+
54
+
55
+ def make_template_mask(protein):
56
+ protein['template_mask'] = tf.ones(
57
+ shape_helpers.shape_list(protein['template_domain_names']),
58
+ dtype=tf.float32)
59
+ return protein
60
+
61
+
62
+ def curry1(f):
63
+ """Supply all arguments but the first."""
64
+
65
+ def fc(*args, **kwargs):
66
+ return lambda x: f(x, *args, **kwargs)
67
+
68
+ return fc
69
+
70
+
71
+ @curry1
72
+ def add_distillation_flag(protein, distillation):
73
+ protein['is_distillation'] = tf.constant(float(distillation),
74
+ shape=[],
75
+ dtype=tf.float32)
76
+ return protein
77
+
78
+
79
+ def make_all_atom_aatype(protein):
80
+ protein['all_atom_aatype'] = protein['aatype']
81
+ return protein
82
+
83
+
84
+ def fix_templates_aatype(protein):
85
+ """Fixes aatype encoding of templates."""
86
+ # Map one-hot to indices.
87
+ protein['template_aatype'] = tf.argmax(
88
+ protein['template_aatype'], output_type=tf.int32, axis=-1)
89
+ # Map hhsearch-aatype to our aatype.
90
+ new_order_list = residue_constants.MAP_HHBLITS_AATYPE_TO_OUR_AATYPE
91
+ new_order = tf.constant(new_order_list, dtype=tf.int32)
92
+ protein['template_aatype'] = tf.gather(params=new_order,
93
+ indices=protein['template_aatype'])
94
+ return protein
95
+
96
+
97
+ def correct_msa_restypes(protein):
98
+ """Correct MSA restype to have the same order as residue_constants."""
99
+ new_order_list = residue_constants.MAP_HHBLITS_AATYPE_TO_OUR_AATYPE
100
+ new_order = tf.constant(new_order_list, dtype=protein['msa'].dtype)
101
+ protein['msa'] = tf.gather(new_order, protein['msa'], axis=0)
102
+
103
+ perm_matrix = np.zeros((22, 22), dtype=np.float32)
104
+ perm_matrix[range(len(new_order_list)), new_order_list] = 1.
105
+
106
+ for k in protein:
107
+ if 'profile' in k: # Include both hhblits and psiblast profiles
108
+ num_dim = protein[k].shape.as_list()[-1]
109
+ assert num_dim in [20, 21, 22], (
110
+ 'num_dim for %s out of expected range: %s' % (k, num_dim))
111
+ protein[k] = tf.tensordot(protein[k], perm_matrix[:num_dim, :num_dim], 1)
112
+ return protein
113
+
114
+
115
+ def squeeze_features(protein):
116
+ """Remove singleton and repeated dimensions in protein features."""
117
+ protein['aatype'] = tf.argmax(
118
+ protein['aatype'], axis=-1, output_type=tf.int32)
119
+ for k in [
120
+ 'domain_name', 'msa', 'num_alignments', 'seq_length', 'sequence',
121
+ 'superfamily', 'deletion_matrix', 'resolution',
122
+ 'between_segment_residues', 'residue_index', 'template_all_atom_masks']:
123
+ if k in protein:
124
+ final_dim = shape_helpers.shape_list(protein[k])[-1]
125
+ if isinstance(final_dim, int) and final_dim == 1:
126
+ protein[k] = tf.squeeze(protein[k], axis=-1)
127
+
128
+ for k in ['seq_length', 'num_alignments']:
129
+ if k in protein:
130
+ protein[k] = protein[k][0] # Remove fake sequence dimension
131
+ return protein
132
+
133
+
134
+ def make_random_crop_to_size_seed(protein):
135
+ """Random seed for cropping residues and templates."""
136
+ protein['random_crop_to_size_seed'] = utils.make_random_seed()
137
+ return protein
138
+
139
+
140
+ @curry1
141
+ def randomly_replace_msa_with_unknown(protein, replace_proportion):
142
+ """Replace a proportion of the MSA with 'X'."""
143
+ msa_mask = (tf.random.uniform(shape_helpers.shape_list(protein['msa'])) <
144
+ replace_proportion)
145
+ x_idx = 20
146
+ gap_idx = 21
147
+ msa_mask = tf.logical_and(msa_mask, protein['msa'] != gap_idx)
148
+ protein['msa'] = tf.where(msa_mask,
149
+ tf.ones_like(protein['msa']) * x_idx,
150
+ protein['msa'])
151
+ aatype_mask = (
152
+ tf.random.uniform(shape_helpers.shape_list(protein['aatype'])) <
153
+ replace_proportion)
154
+
155
+ protein['aatype'] = tf.where(aatype_mask,
156
+ tf.ones_like(protein['aatype']) * x_idx,
157
+ protein['aatype'])
158
+ return protein
159
+
160
+
161
+ @curry1
162
+ def sample_msa(protein, max_seq, keep_extra):
163
+ """Sample MSA randomly, remaining sequences are stored as `extra_*`.
164
+
165
+ Args:
166
+ protein: batch to sample msa from.
167
+ max_seq: number of sequences to sample.
168
+ keep_extra: When True sequences not sampled are put into fields starting
169
+ with 'extra_*'.
170
+
171
+ Returns:
172
+ Protein with sampled msa.
173
+ """
174
+ num_seq = tf.shape(protein['msa'])[0]
175
+ shuffled = tf.random_shuffle(tf.range(1, num_seq))
176
+ index_order = tf.concat([[0], shuffled], axis=0)
177
+ num_sel = tf.minimum(max_seq, num_seq)
178
+
179
+ sel_seq, not_sel_seq = tf.split(index_order, [num_sel, num_seq - num_sel])
180
+
181
+ for k in _MSA_FEATURE_NAMES:
182
+ if k in protein:
183
+ if keep_extra:
184
+ protein['extra_' + k] = tf.gather(protein[k], not_sel_seq)
185
+ protein[k] = tf.gather(protein[k], sel_seq)
186
+
187
+ return protein
188
+
189
+
190
+ @curry1
191
+ def crop_extra_msa(protein, max_extra_msa):
192
+ """MSA features are cropped so only `max_extra_msa` sequences are kept."""
193
+ num_seq = tf.shape(protein['extra_msa'])[0]
194
+ num_sel = tf.minimum(max_extra_msa, num_seq)
195
+ select_indices = tf.random_shuffle(tf.range(0, num_seq))[:num_sel]
196
+ for k in _MSA_FEATURE_NAMES:
197
+ if 'extra_' + k in protein:
198
+ protein['extra_' + k] = tf.gather(protein['extra_' + k], select_indices)
199
+
200
+ return protein
201
+
202
+
203
+ def delete_extra_msa(protein):
204
+ for k in _MSA_FEATURE_NAMES:
205
+ if 'extra_' + k in protein:
206
+ del protein['extra_' + k]
207
+ return protein
208
+
209
+
210
+ @curry1
211
+ def block_delete_msa(protein, config):
212
+ """Sample MSA by deleting contiguous blocks.
213
+
214
+ Jumper et al. (2021) Suppl. Alg. 1 "MSABlockDeletion"
215
+
216
+ Arguments:
217
+ protein: batch dict containing the msa
218
+ config: ConfigDict with parameters
219
+
220
+ Returns:
221
+ updated protein
222
+ """
223
+ num_seq = shape_helpers.shape_list(protein['msa'])[0]
224
+ block_num_seq = tf.cast(
225
+ tf.floor(tf.cast(num_seq, tf.float32) * config.msa_fraction_per_block),
226
+ tf.int32)
227
+
228
+ if config.randomize_num_blocks:
229
+ nb = tf.random.uniform([], 0, config.num_blocks + 1, dtype=tf.int32)
230
+ else:
231
+ nb = config.num_blocks
232
+
233
+ del_block_starts = tf.random.uniform([nb], 0, num_seq, dtype=tf.int32)
234
+ del_blocks = del_block_starts[:, None] + tf.range(block_num_seq)
235
+ del_blocks = tf.clip_by_value(del_blocks, 0, num_seq - 1)
236
+ del_indices = tf.unique(tf.sort(tf.reshape(del_blocks, [-1])))[0]
237
+
238
+ # Make sure we keep the original sequence
239
+ sparse_diff = tf.sets.difference(tf.range(1, num_seq)[None],
240
+ del_indices[None])
241
+ keep_indices = tf.squeeze(tf.sparse.to_dense(sparse_diff), 0)
242
+ keep_indices = tf.concat([[0], keep_indices], axis=0)
243
+
244
+ for k in _MSA_FEATURE_NAMES:
245
+ if k in protein:
246
+ protein[k] = tf.gather(protein[k], keep_indices)
247
+
248
+ return protein
249
+
250
+
251
+ @curry1
252
+ def nearest_neighbor_clusters(protein, gap_agreement_weight=0.):
253
+ """Assign each extra MSA sequence to its nearest neighbor in sampled MSA."""
254
+
255
+ # Determine how much weight we assign to each agreement. In theory, we could
256
+ # use a full blosum matrix here, but right now let's just down-weight gap
257
+ # agreement because it could be spurious.
258
+ # Never put weight on agreeing on BERT mask
259
+ weights = tf.concat([
260
+ tf.ones(21),
261
+ gap_agreement_weight * tf.ones(1),
262
+ np.zeros(1)], 0)
263
+
264
+ # Make agreement score as weighted Hamming distance
265
+ sample_one_hot = (protein['msa_mask'][:, :, None] *
266
+ tf.one_hot(protein['msa'], 23))
267
+ extra_one_hot = (protein['extra_msa_mask'][:, :, None] *
268
+ tf.one_hot(protein['extra_msa'], 23))
269
+
270
+ num_seq, num_res, _ = shape_helpers.shape_list(sample_one_hot)
271
+ extra_num_seq, _, _ = shape_helpers.shape_list(extra_one_hot)
272
+
273
+ # Compute tf.einsum('mrc,nrc,c->mn', sample_one_hot, extra_one_hot, weights)
274
+ # in an optimized fashion to avoid possible memory or computation blowup.
275
+ agreement = tf.matmul(
276
+ tf.reshape(extra_one_hot, [extra_num_seq, num_res * 23]),
277
+ tf.reshape(sample_one_hot * weights, [num_seq, num_res * 23]),
278
+ transpose_b=True)
279
+
280
+ # Assign each sequence in the extra sequences to the closest MSA sample
281
+ protein['extra_cluster_assignment'] = tf.argmax(
282
+ agreement, axis=1, output_type=tf.int32)
283
+
284
+ return protein
285
+
286
+
287
+ @curry1
288
+ def summarize_clusters(protein):
289
+ """Produce profile and deletion_matrix_mean within each cluster."""
290
+ num_seq = shape_helpers.shape_list(protein['msa'])[0]
291
+ def csum(x):
292
+ return tf.math.unsorted_segment_sum(
293
+ x, protein['extra_cluster_assignment'], num_seq)
294
+
295
+ mask = protein['extra_msa_mask']
296
+ mask_counts = 1e-6 + protein['msa_mask'] + csum(mask) # Include center
297
+
298
+ msa_sum = csum(mask[:, :, None] * tf.one_hot(protein['extra_msa'], 23))
299
+ msa_sum += tf.one_hot(protein['msa'], 23) # Original sequence
300
+ protein['cluster_profile'] = msa_sum / mask_counts[:, :, None]
301
+
302
+ del msa_sum
303
+
304
+ del_sum = csum(mask * protein['extra_deletion_matrix'])
305
+ del_sum += protein['deletion_matrix'] # Original sequence
306
+ protein['cluster_deletion_mean'] = del_sum / mask_counts
307
+ del del_sum
308
+
309
+ return protein
310
+
311
+
312
+ def make_msa_mask(protein):
313
+ """Mask features are all ones, but will later be zero-padded."""
314
+ protein['msa_mask'] = tf.ones(
315
+ shape_helpers.shape_list(protein['msa']), dtype=tf.float32)
316
+ protein['msa_row_mask'] = tf.ones(
317
+ shape_helpers.shape_list(protein['msa'])[0], dtype=tf.float32)
318
+ return protein
319
+
320
+
321
+ def pseudo_beta_fn(aatype, all_atom_positions, all_atom_masks):
322
+ """Create pseudo beta features."""
323
+ is_gly = tf.equal(aatype, residue_constants.restype_order['G'])
324
+ ca_idx = residue_constants.atom_order['CA']
325
+ cb_idx = residue_constants.atom_order['CB']
326
+ pseudo_beta = tf.where(
327
+ tf.tile(is_gly[..., None], [1] * len(is_gly.shape) + [3]),
328
+ all_atom_positions[..., ca_idx, :],
329
+ all_atom_positions[..., cb_idx, :])
330
+
331
+ if all_atom_masks is not None:
332
+ pseudo_beta_mask = tf.where(
333
+ is_gly, all_atom_masks[..., ca_idx], all_atom_masks[..., cb_idx])
334
+ pseudo_beta_mask = tf.cast(pseudo_beta_mask, tf.float32)
335
+ return pseudo_beta, pseudo_beta_mask
336
+ else:
337
+ return pseudo_beta
338
+
339
+
340
+ @curry1
341
+ def make_pseudo_beta(protein, prefix=''):
342
+ """Create pseudo-beta (alpha for glycine) position and mask."""
343
+ assert prefix in ['', 'template_']
344
+ protein[prefix + 'pseudo_beta'], protein[prefix + 'pseudo_beta_mask'] = (
345
+ pseudo_beta_fn(
346
+ protein['template_aatype' if prefix else 'all_atom_aatype'],
347
+ protein[prefix + 'all_atom_positions'],
348
+ protein['template_all_atom_masks' if prefix else 'all_atom_mask']))
349
+ return protein
350
+
351
+
352
+ @curry1
353
+ def add_constant_field(protein, key, value):
354
+ protein[key] = tf.convert_to_tensor(value)
355
+ return protein
356
+
357
+
358
+ def shaped_categorical(probs, epsilon=1e-10):
359
+ ds = shape_helpers.shape_list(probs)
360
+ num_classes = ds[-1]
361
+ counts = tf.random.categorical(
362
+ tf.reshape(tf.log(probs + epsilon), [-1, num_classes]),
363
+ 1,
364
+ dtype=tf.int32)
365
+ return tf.reshape(counts, ds[:-1])
366
+
367
+
368
+ def make_hhblits_profile(protein):
369
+ """Compute the HHblits MSA profile if not already present."""
370
+ if 'hhblits_profile' in protein:
371
+ return protein
372
+
373
+ # Compute the profile for every residue (over all MSA sequences).
374
+ protein['hhblits_profile'] = tf.reduce_mean(
375
+ tf.one_hot(protein['msa'], 22), axis=0)
376
+ return protein
377
+
378
+
379
+ @curry1
380
+ def make_masked_msa(protein, config, replace_fraction):
381
+ """Create data for BERT on raw MSA."""
382
+ # Add a random amino acid uniformly
383
+ random_aa = tf.constant([0.05] * 20 + [0., 0.], dtype=tf.float32)
384
+
385
+ categorical_probs = (
386
+ config.uniform_prob * random_aa +
387
+ config.profile_prob * protein['hhblits_profile'] +
388
+ config.same_prob * tf.one_hot(protein['msa'], 22))
389
+
390
+ # Put all remaining probability on [MASK] which is a new column
391
+ pad_shapes = [[0, 0] for _ in range(len(categorical_probs.shape))]
392
+ pad_shapes[-1][1] = 1
393
+ mask_prob = 1. - config.profile_prob - config.same_prob - config.uniform_prob
394
+ assert mask_prob >= 0.
395
+ categorical_probs = tf.pad(
396
+ categorical_probs, pad_shapes, constant_values=mask_prob)
397
+
398
+ sh = shape_helpers.shape_list(protein['msa'])
399
+ mask_position = tf.random.uniform(sh) < replace_fraction
400
+
401
+ bert_msa = shaped_categorical(categorical_probs)
402
+ bert_msa = tf.where(mask_position, bert_msa, protein['msa'])
403
+
404
+ # Mix real and masked MSA
405
+ protein['bert_mask'] = tf.cast(mask_position, tf.float32)
406
+ protein['true_msa'] = protein['msa']
407
+ protein['msa'] = bert_msa
408
+
409
+ return protein
410
+
411
+
412
+ @curry1
413
+ def make_fixed_size(protein, shape_schema, msa_cluster_size, extra_msa_size,
414
+ num_res, num_templates=0):
415
+ """Guess at the MSA and sequence dimensions to make fixed size."""
416
+
417
+ pad_size_map = {
418
+ NUM_RES: num_res,
419
+ NUM_MSA_SEQ: msa_cluster_size,
420
+ NUM_EXTRA_SEQ: extra_msa_size,
421
+ NUM_TEMPLATES: num_templates,
422
+ }
423
+
424
+ for k, v in protein.items():
425
+ # Don't transfer this to the accelerator.
426
+ if k == 'extra_cluster_assignment':
427
+ continue
428
+ shape = v.shape.as_list()
429
+ schema = shape_schema[k]
430
+ assert len(shape) == len(schema), (
431
+ f'Rank mismatch between shape and shape schema for {k}: '
432
+ f'{shape} vs {schema}')
433
+ pad_size = [
434
+ pad_size_map.get(s2, None) or s1 for (s1, s2) in zip(shape, schema)
435
+ ]
436
+ padding = [(0, p - tf.shape(v)[i]) for i, p in enumerate(pad_size)]
437
+ if padding:
438
+ protein[k] = tf.pad(
439
+ v, padding, name=f'pad_to_fixed_{k}')
440
+ protein[k].set_shape(pad_size)
441
+
442
+ return protein
443
+
444
+
445
+ @curry1
446
+ def make_msa_feat(protein):
447
+ """Create and concatenate MSA features."""
448
+ # Whether there is a domain break. Always zero for chains, but keeping
449
+ # for compatibility with domain datasets.
450
+ has_break = tf.clip_by_value(
451
+ tf.cast(protein['between_segment_residues'], tf.float32),
452
+ 0, 1)
453
+ aatype_1hot = tf.one_hot(protein['aatype'], 21, axis=-1)
454
+
455
+ target_feat = [
456
+ tf.expand_dims(has_break, axis=-1),
457
+ aatype_1hot, # Everyone gets the original sequence.
458
+ ]
459
+
460
+ msa_1hot = tf.one_hot(protein['msa'], 23, axis=-1)
461
+ has_deletion = tf.clip_by_value(protein['deletion_matrix'], 0., 1.)
462
+ deletion_value = tf.atan(protein['deletion_matrix'] / 3.) * (2. / np.pi)
463
+
464
+ msa_feat = [
465
+ msa_1hot,
466
+ tf.expand_dims(has_deletion, axis=-1),
467
+ tf.expand_dims(deletion_value, axis=-1),
468
+ ]
469
+
470
+ if 'cluster_profile' in protein:
471
+ deletion_mean_value = (
472
+ tf.atan(protein['cluster_deletion_mean'] / 3.) * (2. / np.pi))
473
+ msa_feat.extend([
474
+ protein['cluster_profile'],
475
+ tf.expand_dims(deletion_mean_value, axis=-1),
476
+ ])
477
+
478
+ if 'extra_deletion_matrix' in protein:
479
+ protein['extra_has_deletion'] = tf.clip_by_value(
480
+ protein['extra_deletion_matrix'], 0., 1.)
481
+ protein['extra_deletion_value'] = tf.atan(
482
+ protein['extra_deletion_matrix'] / 3.) * (2. / np.pi)
483
+
484
+ protein['msa_feat'] = tf.concat(msa_feat, axis=-1)
485
+ protein['target_feat'] = tf.concat(target_feat, axis=-1)
486
+ return protein
487
+
488
+
489
+ @curry1
490
+ def select_feat(protein, feature_list):
491
+ return {k: v for k, v in protein.items() if k in feature_list}
492
+
493
+
494
+ @curry1
495
+ def crop_templates(protein, max_templates):
496
+ for k, v in protein.items():
497
+ if k.startswith('template_'):
498
+ protein[k] = v[:max_templates]
499
+ return protein
500
+
501
+
502
+ @curry1
503
+ def random_crop_to_size(protein, crop_size, max_templates, shape_schema,
504
+ subsample_templates=False):
505
+ """Crop randomly to `crop_size`, or keep as is if shorter than that."""
506
+ seq_length = protein['seq_length']
507
+ if 'template_mask' in protein:
508
+ num_templates = tf.cast(
509
+ shape_helpers.shape_list(protein['template_mask'])[0], tf.int32)
510
+ else:
511
+ num_templates = tf.constant(0, dtype=tf.int32)
512
+ num_res_crop_size = tf.math.minimum(seq_length, crop_size)
513
+
514
+ # Ensures that the cropping of residues and templates happens in the same way
515
+ # across ensembling iterations.
516
+ # Do not use for randomness that should vary in ensembling.
517
+ seed_maker = utils.SeedMaker(initial_seed=protein['random_crop_to_size_seed'])
518
+
519
+ if subsample_templates:
520
+ templates_crop_start = tf.random.stateless_uniform(
521
+ shape=(), minval=0, maxval=num_templates + 1, dtype=tf.int32,
522
+ seed=seed_maker())
523
+ else:
524
+ templates_crop_start = 0
525
+
526
+ num_templates_crop_size = tf.math.minimum(
527
+ num_templates - templates_crop_start, max_templates)
528
+
529
+ num_res_crop_start = tf.random.stateless_uniform(
530
+ shape=(), minval=0, maxval=seq_length - num_res_crop_size + 1,
531
+ dtype=tf.int32, seed=seed_maker())
532
+
533
+ templates_select_indices = tf.argsort(tf.random.stateless_uniform(
534
+ [num_templates], seed=seed_maker()))
535
+
536
+ for k, v in protein.items():
537
+ if k not in shape_schema or (
538
+ 'template' not in k and NUM_RES not in shape_schema[k]):
539
+ continue
540
+
541
+ # randomly permute the templates before cropping them.
542
+ if k.startswith('template') and subsample_templates:
543
+ v = tf.gather(v, templates_select_indices)
544
+
545
+ crop_sizes = []
546
+ crop_starts = []
547
+ for i, (dim_size, dim) in enumerate(zip(shape_schema[k],
548
+ shape_helpers.shape_list(v))):
549
+ is_num_res = (dim_size == NUM_RES)
550
+ if i == 0 and k.startswith('template'):
551
+ crop_size = num_templates_crop_size
552
+ crop_start = templates_crop_start
553
+ else:
554
+ crop_start = num_res_crop_start if is_num_res else 0
555
+ crop_size = (num_res_crop_size if is_num_res else
556
+ (-1 if dim is None else dim))
557
+ crop_sizes.append(crop_size)
558
+ crop_starts.append(crop_start)
559
+ protein[k] = tf.slice(v, crop_starts, crop_sizes)
560
+
561
+ protein['seq_length'] = num_res_crop_size
562
+ return protein
563
+
564
+
565
+ def make_atom14_masks(protein):
566
+ """Construct denser atom positions (14 dimensions instead of 37)."""
567
+ restype_atom14_to_atom37 = [] # mapping (restype, atom14) --> atom37
568
+ restype_atom37_to_atom14 = [] # mapping (restype, atom37) --> atom14
569
+ restype_atom14_mask = []
570
+
571
+ for rt in residue_constants.restypes:
572
+ atom_names = residue_constants.restype_name_to_atom14_names[
573
+ residue_constants.restype_1to3[rt]]
574
+
575
+ restype_atom14_to_atom37.append([
576
+ (residue_constants.atom_order[name] if name else 0)
577
+ for name in atom_names
578
+ ])
579
+
580
+ atom_name_to_idx14 = {name: i for i, name in enumerate(atom_names)}
581
+ restype_atom37_to_atom14.append([
582
+ (atom_name_to_idx14[name] if name in atom_name_to_idx14 else 0)
583
+ for name in residue_constants.atom_types
584
+ ])
585
+
586
+ restype_atom14_mask.append([(1. if name else 0.) for name in atom_names])
587
+
588
+ # Add dummy mapping for restype 'UNK'
589
+ restype_atom14_to_atom37.append([0] * 14)
590
+ restype_atom37_to_atom14.append([0] * 37)
591
+ restype_atom14_mask.append([0.] * 14)
592
+
593
+ restype_atom14_to_atom37 = np.array(restype_atom14_to_atom37, dtype=np.int32)
594
+ restype_atom37_to_atom14 = np.array(restype_atom37_to_atom14, dtype=np.int32)
595
+ restype_atom14_mask = np.array(restype_atom14_mask, dtype=np.float32)
596
+
597
+ # create the mapping for (residx, atom14) --> atom37, i.e. an array
598
+ # with shape (num_res, 14) containing the atom37 indices for this protein
599
+ residx_atom14_to_atom37 = tf.gather(restype_atom14_to_atom37,
600
+ protein['aatype'])
601
+ residx_atom14_mask = tf.gather(restype_atom14_mask,
602
+ protein['aatype'])
603
+
604
+ protein['atom14_atom_exists'] = residx_atom14_mask
605
+ protein['residx_atom14_to_atom37'] = residx_atom14_to_atom37
606
+
607
+ # create the gather indices for mapping back
608
+ residx_atom37_to_atom14 = tf.gather(restype_atom37_to_atom14,
609
+ protein['aatype'])
610
+ protein['residx_atom37_to_atom14'] = residx_atom37_to_atom14
611
+
612
+ # create the corresponding mask
613
+ restype_atom37_mask = np.zeros([21, 37], dtype=np.float32)
614
+ for restype, restype_letter in enumerate(residue_constants.restypes):
615
+ restype_name = residue_constants.restype_1to3[restype_letter]
616
+ atom_names = residue_constants.residue_atoms[restype_name]
617
+ for atom_name in atom_names:
618
+ atom_type = residue_constants.atom_order[atom_name]
619
+ restype_atom37_mask[restype, atom_type] = 1
620
+
621
+ residx_atom37_mask = tf.gather(restype_atom37_mask,
622
+ protein['aatype'])
623
+ protein['atom37_atom_exists'] = residx_atom37_mask
624
+
625
+ return protein
af_backprop/alphafold/model/tf/input_pipeline.py ADDED
@@ -0,0 +1,166 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2021 DeepMind Technologies Limited
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ """Feature pre-processing input pipeline for AlphaFold."""
16
+
17
+ from alphafold.model.tf import data_transforms
18
+ from alphafold.model.tf import shape_placeholders
19
+ import tensorflow.compat.v1 as tf
20
+ import tree
21
+
22
+ # Pylint gets confused by the curry1 decorator because it changes the number
23
+ # of arguments to the function.
24
+ # pylint:disable=no-value-for-parameter
25
+
26
+
27
+ NUM_RES = shape_placeholders.NUM_RES
28
+ NUM_MSA_SEQ = shape_placeholders.NUM_MSA_SEQ
29
+ NUM_EXTRA_SEQ = shape_placeholders.NUM_EXTRA_SEQ
30
+ NUM_TEMPLATES = shape_placeholders.NUM_TEMPLATES
31
+
32
+
33
+ def nonensembled_map_fns(data_config):
34
+ """Input pipeline functions which are not ensembled."""
35
+ common_cfg = data_config.common
36
+
37
+ map_fns = [
38
+ data_transforms.correct_msa_restypes,
39
+ data_transforms.add_distillation_flag(False),
40
+ data_transforms.cast_64bit_ints,
41
+ data_transforms.squeeze_features,
42
+ # Keep to not disrupt RNG.
43
+ data_transforms.randomly_replace_msa_with_unknown(0.0),
44
+ data_transforms.make_seq_mask,
45
+ data_transforms.make_msa_mask,
46
+ # Compute the HHblits profile if it's not set. This has to be run before
47
+ # sampling the MSA.
48
+ data_transforms.make_hhblits_profile,
49
+ data_transforms.make_random_crop_to_size_seed,
50
+ ]
51
+ if common_cfg.use_templates:
52
+ map_fns.extend([
53
+ data_transforms.fix_templates_aatype,
54
+ data_transforms.make_template_mask,
55
+ data_transforms.make_pseudo_beta('template_')
56
+ ])
57
+ map_fns.extend([
58
+ data_transforms.make_atom14_masks,
59
+ ])
60
+
61
+ return map_fns
62
+
63
+
64
+ def ensembled_map_fns(data_config):
65
+ """Input pipeline functions that can be ensembled and averaged."""
66
+ common_cfg = data_config.common
67
+ eval_cfg = data_config.eval
68
+
69
+ map_fns = []
70
+
71
+ if common_cfg.reduce_msa_clusters_by_max_templates:
72
+ pad_msa_clusters = eval_cfg.max_msa_clusters - eval_cfg.max_templates
73
+ else:
74
+ pad_msa_clusters = eval_cfg.max_msa_clusters
75
+
76
+ max_msa_clusters = pad_msa_clusters
77
+ max_extra_msa = common_cfg.max_extra_msa
78
+
79
+ map_fns.append(
80
+ data_transforms.sample_msa(
81
+ max_msa_clusters,
82
+ keep_extra=True))
83
+
84
+ if 'masked_msa' in common_cfg:
85
+ # Masked MSA should come *before* MSA clustering so that
86
+ # the clustering and full MSA profile do not leak information about
87
+ # the masked locations and secret corrupted locations.
88
+ map_fns.append(
89
+ data_transforms.make_masked_msa(common_cfg.masked_msa,
90
+ eval_cfg.masked_msa_replace_fraction))
91
+
92
+ if common_cfg.msa_cluster_features:
93
+ map_fns.append(data_transforms.nearest_neighbor_clusters())
94
+ map_fns.append(data_transforms.summarize_clusters())
95
+
96
+ # Crop after creating the cluster profiles.
97
+ if max_extra_msa:
98
+ map_fns.append(data_transforms.crop_extra_msa(max_extra_msa))
99
+ else:
100
+ map_fns.append(data_transforms.delete_extra_msa)
101
+
102
+ map_fns.append(data_transforms.make_msa_feat())
103
+
104
+ crop_feats = dict(eval_cfg.feat)
105
+
106
+ if eval_cfg.fixed_size:
107
+ map_fns.append(data_transforms.select_feat(list(crop_feats)))
108
+ map_fns.append(data_transforms.random_crop_to_size(
109
+ eval_cfg.crop_size,
110
+ eval_cfg.max_templates,
111
+ crop_feats,
112
+ eval_cfg.subsample_templates))
113
+ map_fns.append(data_transforms.make_fixed_size(
114
+ crop_feats,
115
+ pad_msa_clusters,
116
+ common_cfg.max_extra_msa,
117
+ eval_cfg.crop_size,
118
+ eval_cfg.max_templates))
119
+ else:
120
+ map_fns.append(data_transforms.crop_templates(eval_cfg.max_templates))
121
+
122
+ return map_fns
123
+
124
+
125
+ def process_tensors_from_config(tensors, data_config):
126
+ """Apply filters and maps to an existing dataset, based on the config."""
127
+
128
+ def wrap_ensemble_fn(data, i):
129
+ """Function to be mapped over the ensemble dimension."""
130
+ d = data.copy()
131
+ fns = ensembled_map_fns(data_config)
132
+ fn = compose(fns)
133
+ d['ensemble_index'] = i
134
+ return fn(d)
135
+
136
+ eval_cfg = data_config.eval
137
+ tensors = compose(
138
+ nonensembled_map_fns(
139
+ data_config))(
140
+ tensors)
141
+
142
+ tensors_0 = wrap_ensemble_fn(tensors, tf.constant(0))
143
+ num_ensemble = eval_cfg.num_ensemble
144
+ if data_config.common.resample_msa_in_recycling:
145
+ # Separate batch per ensembling & recycling step.
146
+ num_ensemble *= data_config.common.num_recycle + 1
147
+
148
+ if isinstance(num_ensemble, tf.Tensor) or num_ensemble > 1:
149
+ fn_output_signature = tree.map_structure(
150
+ tf.TensorSpec.from_tensor, tensors_0)
151
+ tensors = tf.map_fn(
152
+ lambda x: wrap_ensemble_fn(tensors, x),
153
+ tf.range(num_ensemble),
154
+ parallel_iterations=1,
155
+ fn_output_signature=fn_output_signature)
156
+ else:
157
+ tensors = tree.map_structure(lambda x: x[None],
158
+ tensors_0)
159
+ return tensors
160
+
161
+
162
+ @data_transforms.curry1
163
+ def compose(x, fs):
164
+ for f in fs:
165
+ x = f(x)
166
+ return x
af_backprop/alphafold/model/tf/protein_features.py ADDED
@@ -0,0 +1,129 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2021 DeepMind Technologies Limited
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ """Contains descriptions of various protein features."""
16
+ import enum
17
+ from typing import Dict, Optional, Sequence, Tuple, Union
18
+ from alphafold.common import residue_constants
19
+ import tensorflow.compat.v1 as tf
20
+
21
+ # Type aliases.
22
+ FeaturesMetadata = Dict[str, Tuple[tf.dtypes.DType, Sequence[Union[str, int]]]]
23
+
24
+
25
+ class FeatureType(enum.Enum):
26
+ ZERO_DIM = 0 # Shape [x]
27
+ ONE_DIM = 1 # Shape [num_res, x]
28
+ TWO_DIM = 2 # Shape [num_res, num_res, x]
29
+ MSA = 3 # Shape [msa_length, num_res, x]
30
+
31
+
32
+ # Placeholder values that will be replaced with their true value at runtime.
33
+ NUM_RES = "num residues placeholder"
34
+ NUM_SEQ = "length msa placeholder"
35
+ NUM_TEMPLATES = "num templates placeholder"
36
+ # Sizes of the protein features, NUM_RES and NUM_SEQ are allowed as placeholders
37
+ # to be replaced with the number of residues and the number of sequences in the
38
+ # multiple sequence alignment, respectively.
39
+
40
+
41
+ FEATURES = {
42
+ #### Static features of a protein sequence ####
43
+ "aatype": (tf.float32, [NUM_RES, 21]),
44
+ "between_segment_residues": (tf.int64, [NUM_RES, 1]),
45
+ "deletion_matrix": (tf.float32, [NUM_SEQ, NUM_RES, 1]),
46
+ "domain_name": (tf.string, [1]),
47
+ "msa": (tf.int64, [NUM_SEQ, NUM_RES, 1]),
48
+ "num_alignments": (tf.int64, [NUM_RES, 1]),
49
+ "residue_index": (tf.int64, [NUM_RES, 1]),
50
+ "seq_length": (tf.int64, [NUM_RES, 1]),
51
+ "sequence": (tf.string, [1]),
52
+ "all_atom_positions": (tf.float32,
53
+ [NUM_RES, residue_constants.atom_type_num, 3]),
54
+ "all_atom_mask": (tf.int64, [NUM_RES, residue_constants.atom_type_num]),
55
+ "resolution": (tf.float32, [1]),
56
+ "template_domain_names": (tf.string, [NUM_TEMPLATES]),
57
+ "template_sum_probs": (tf.float32, [NUM_TEMPLATES, 1]),
58
+ "template_aatype": (tf.float32, [NUM_TEMPLATES, NUM_RES, 22]),
59
+ "template_all_atom_positions": (tf.float32, [
60
+ NUM_TEMPLATES, NUM_RES, residue_constants.atom_type_num, 3
61
+ ]),
62
+ "template_all_atom_masks": (tf.float32, [
63
+ NUM_TEMPLATES, NUM_RES, residue_constants.atom_type_num, 1
64
+ ]),
65
+ }
66
+
67
+ FEATURE_TYPES = {k: v[0] for k, v in FEATURES.items()}
68
+ FEATURE_SIZES = {k: v[1] for k, v in FEATURES.items()}
69
+
70
+
71
+ def register_feature(name: str,
72
+ type_: tf.dtypes.DType,
73
+ shape_: Tuple[Union[str, int]]):
74
+ """Register extra features used in custom datasets."""
75
+ FEATURES[name] = (type_, shape_)
76
+ FEATURE_TYPES[name] = type_
77
+ FEATURE_SIZES[name] = shape_
78
+
79
+
80
+ def shape(feature_name: str,
81
+ num_residues: int,
82
+ msa_length: int,
83
+ num_templates: Optional[int] = None,
84
+ features: Optional[FeaturesMetadata] = None):
85
+ """Get the shape for the given feature name.
86
+
87
+ This is near identical to _get_tf_shape_no_placeholders() but with 2
88
+ differences:
89
+ * This method does not calculate a single placeholder from the total number of
90
+ elements (eg given <NUM_RES, 3> and size := 12, this won't deduce NUM_RES
91
+ must be 4)
92
+ * This method will work with tensors
93
+
94
+ Args:
95
+ feature_name: String identifier for the feature. If the feature name ends
96
+ with "_unnormalized", this suffix is stripped off.
97
+ num_residues: The number of residues in the current domain - some elements
98
+ of the shape can be dynamic and will be replaced by this value.
99
+ msa_length: The number of sequences in the multiple sequence alignment, some
100
+ elements of the shape can be dynamic and will be replaced by this value.
101
+ If the number of alignments is unknown / not read, please pass None for
102
+ msa_length.
103
+ num_templates (optional): The number of templates in this tfexample.
104
+ features: A feature_name to (tf_dtype, shape) lookup; defaults to FEATURES.
105
+
106
+ Returns:
107
+ List of ints representation the tensor size.
108
+
109
+ Raises:
110
+ ValueError: If a feature is requested but no concrete placeholder value is
111
+ given.
112
+ """
113
+ features = features or FEATURES
114
+ if feature_name.endswith("_unnormalized"):
115
+ feature_name = feature_name[:-13]
116
+
117
+ unused_dtype, raw_sizes = features[feature_name]
118
+ replacements = {NUM_RES: num_residues,
119
+ NUM_SEQ: msa_length}
120
+
121
+ if num_templates is not None:
122
+ replacements[NUM_TEMPLATES] = num_templates
123
+
124
+ sizes = [replacements.get(dimension, dimension) for dimension in raw_sizes]
125
+ for dimension in sizes:
126
+ if isinstance(dimension, str):
127
+ raise ValueError("Could not parse %s (shape: %s) with values: %s" % (
128
+ feature_name, raw_sizes, replacements))
129
+ return sizes
af_backprop/alphafold/model/tf/proteins_dataset.py ADDED
@@ -0,0 +1,166 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2021 DeepMind Technologies Limited
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ """Datasets consisting of proteins."""
16
+ from typing import Dict, Mapping, Optional, Sequence
17
+ from alphafold.model.tf import protein_features
18
+ import numpy as np
19
+ import tensorflow.compat.v1 as tf
20
+
21
+ TensorDict = Dict[str, tf.Tensor]
22
+
23
+
24
+ def parse_tfexample(
25
+ raw_data: bytes,
26
+ features: protein_features.FeaturesMetadata,
27
+ key: Optional[str] = None) -> Dict[str, tf.train.Feature]:
28
+ """Read a single TF Example proto and return a subset of its features.
29
+
30
+ Args:
31
+ raw_data: A serialized tf.Example proto.
32
+ features: A dictionary of features, mapping string feature names to a tuple
33
+ (dtype, shape). This dictionary should be a subset of
34
+ protein_features.FEATURES (or the dictionary itself for all features).
35
+ key: Optional string with the SSTable key of that tf.Example. This will be
36
+ added into features as a 'key' but only if requested in features.
37
+
38
+ Returns:
39
+ A dictionary of features mapping feature names to features. Only the given
40
+ features are returned, all other ones are filtered out.
41
+ """
42
+ feature_map = {
43
+ k: tf.io.FixedLenSequenceFeature(shape=(), dtype=v[0], allow_missing=True)
44
+ for k, v in features.items()
45
+ }
46
+ parsed_features = tf.io.parse_single_example(raw_data, feature_map)
47
+ reshaped_features = parse_reshape_logic(parsed_features, features, key=key)
48
+
49
+ return reshaped_features
50
+
51
+
52
+ def _first(tensor: tf.Tensor) -> tf.Tensor:
53
+ """Returns the 1st element - the input can be a tensor or a scalar."""
54
+ return tf.reshape(tensor, shape=(-1,))[0]
55
+
56
+
57
+ def parse_reshape_logic(
58
+ parsed_features: TensorDict,
59
+ features: protein_features.FeaturesMetadata,
60
+ key: Optional[str] = None) -> TensorDict:
61
+ """Transforms parsed serial features to the correct shape."""
62
+ # Find out what is the number of sequences and the number of alignments.
63
+ num_residues = tf.cast(_first(parsed_features["seq_length"]), dtype=tf.int32)
64
+
65
+ if "num_alignments" in parsed_features:
66
+ num_msa = tf.cast(_first(parsed_features["num_alignments"]), dtype=tf.int32)
67
+ else:
68
+ num_msa = 0
69
+
70
+ if "template_domain_names" in parsed_features:
71
+ num_templates = tf.cast(
72
+ tf.shape(parsed_features["template_domain_names"])[0], dtype=tf.int32)
73
+ else:
74
+ num_templates = 0
75
+
76
+ if key is not None and "key" in features:
77
+ parsed_features["key"] = [key] # Expand dims from () to (1,).
78
+
79
+ # Reshape the tensors according to the sequence length and num alignments.
80
+ for k, v in parsed_features.items():
81
+ new_shape = protein_features.shape(
82
+ feature_name=k,
83
+ num_residues=num_residues,
84
+ msa_length=num_msa,
85
+ num_templates=num_templates,
86
+ features=features)
87
+ new_shape_size = tf.constant(1, dtype=tf.int32)
88
+ for dim in new_shape:
89
+ new_shape_size *= tf.cast(dim, tf.int32)
90
+
91
+ assert_equal = tf.assert_equal(
92
+ tf.size(v), new_shape_size,
93
+ name="assert_%s_shape_correct" % k,
94
+ message="The size of feature %s (%s) could not be reshaped "
95
+ "into %s" % (k, tf.size(v), new_shape))
96
+ if "template" not in k:
97
+ # Make sure the feature we are reshaping is not empty.
98
+ assert_non_empty = tf.assert_greater(
99
+ tf.size(v), 0, name="assert_%s_non_empty" % k,
100
+ message="The feature %s is not set in the tf.Example. Either do not "
101
+ "request the feature or use a tf.Example that has the "
102
+ "feature set." % k)
103
+ with tf.control_dependencies([assert_non_empty, assert_equal]):
104
+ parsed_features[k] = tf.reshape(v, new_shape, name="reshape_%s" % k)
105
+ else:
106
+ with tf.control_dependencies([assert_equal]):
107
+ parsed_features[k] = tf.reshape(v, new_shape, name="reshape_%s" % k)
108
+
109
+ return parsed_features
110
+
111
+
112
+ def _make_features_metadata(
113
+ feature_names: Sequence[str]) -> protein_features.FeaturesMetadata:
114
+ """Makes a feature name to type and shape mapping from a list of names."""
115
+ # Make sure these features are always read.
116
+ required_features = ["aatype", "sequence", "seq_length"]
117
+ feature_names = list(set(feature_names) | set(required_features))
118
+
119
+ features_metadata = {name: protein_features.FEATURES[name]
120
+ for name in feature_names}
121
+ return features_metadata
122
+
123
+
124
+ def create_tensor_dict(
125
+ raw_data: bytes,
126
+ features: Sequence[str],
127
+ key: Optional[str] = None,
128
+ ) -> TensorDict:
129
+ """Creates a dictionary of tensor features.
130
+
131
+ Args:
132
+ raw_data: A serialized tf.Example proto.
133
+ features: A list of strings of feature names to be returned in the dataset.
134
+ key: Optional string with the SSTable key of that tf.Example. This will be
135
+ added into features as a 'key' but only if requested in features.
136
+
137
+ Returns:
138
+ A dictionary of features mapping feature names to features. Only the given
139
+ features are returned, all other ones are filtered out.
140
+ """
141
+ features_metadata = _make_features_metadata(features)
142
+ return parse_tfexample(raw_data, features_metadata, key)
143
+
144
+
145
+ def np_to_tensor_dict(
146
+ np_example: Mapping[str, np.ndarray],
147
+ features: Sequence[str],
148
+ ) -> TensorDict:
149
+ """Creates dict of tensors from a dict of NumPy arrays.
150
+
151
+ Args:
152
+ np_example: A dict of NumPy feature arrays.
153
+ features: A list of strings of feature names to be returned in the dataset.
154
+
155
+ Returns:
156
+ A dictionary of features mapping feature names to features. Only the given
157
+ features are returned, all other ones are filtered out.
158
+ """
159
+ features_metadata = _make_features_metadata(features)
160
+ tensor_dict = {k: tf.constant(v) for k, v in np_example.items()
161
+ if k in features_metadata}
162
+
163
+ # Ensures shapes are as expected. Needed for setting size of empty features
164
+ # e.g. when no template hits were found.
165
+ tensor_dict = parse_reshape_logic(tensor_dict, features_metadata)
166
+ return tensor_dict
af_backprop/alphafold/model/tf/shape_helpers.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2021 DeepMind Technologies Limited
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ """Utilities for dealing with shapes of TensorFlow tensors."""
16
+ import tensorflow.compat.v1 as tf
17
+
18
+
19
+ def shape_list(x):
20
+ """Return list of dimensions of a tensor, statically where possible.
21
+
22
+ Like `x.shape.as_list()` but with tensors instead of `None`s.
23
+
24
+ Args:
25
+ x: A tensor.
26
+ Returns:
27
+ A list with length equal to the rank of the tensor. The n-th element of the
28
+ list is an integer when that dimension is statically known otherwise it is
29
+ the n-th element of `tf.shape(x)`.
30
+ """
31
+ x = tf.convert_to_tensor(x)
32
+
33
+ # If unknown rank, return dynamic shape
34
+ if x.get_shape().dims is None:
35
+ return tf.shape(x)
36
+
37
+ static = x.get_shape().as_list()
38
+ shape = tf.shape(x)
39
+
40
+ ret = []
41
+ for i in range(len(static)):
42
+ dim = static[i]
43
+ if dim is None:
44
+ dim = shape[i]
45
+ ret.append(dim)
46
+ return ret
47
+
af_backprop/alphafold/model/tf/shape_placeholders.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2021 DeepMind Technologies Limited
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ """Placeholder values for run-time varying dimension sizes."""
16
+
17
+ NUM_RES = 'num residues placeholder'
18
+ NUM_MSA_SEQ = 'msa placeholder'
19
+ NUM_EXTRA_SEQ = 'extra msa placeholder'
20
+ NUM_TEMPLATES = 'num templates placeholder'
af_backprop/alphafold/model/tf/utils.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2021 DeepMind Technologies Limited
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ """Shared utilities for various components."""
16
+ import tensorflow.compat.v1 as tf
17
+
18
+
19
+ def tf_combine_mask(*masks):
20
+ """Take the intersection of float-valued masks."""
21
+ ret = 1
22
+ for m in masks:
23
+ ret *= m
24
+ return ret
25
+
26
+
27
+ class SeedMaker(object):
28
+ """Return unique seeds."""
29
+
30
+ def __init__(self, initial_seed=0):
31
+ self.next_seed = initial_seed
32
+
33
+ def __call__(self):
34
+ i = self.next_seed
35
+ self.next_seed += 1
36
+ return i
37
+
38
+ seed_maker = SeedMaker()
39
+
40
+
41
+ def make_random_seed():
42
+ return tf.random.uniform([2],
43
+ tf.int32.min,
44
+ tf.int32.max,
45
+ tf.int32,
46
+ seed=seed_maker())
47
+
af_backprop/alphafold/model/utils.py ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2021 DeepMind Technologies Limited
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ """A collection of JAX utility functions for use in protein folding."""
16
+
17
+ import collections
18
+ import numbers
19
+ from typing import Mapping
20
+
21
+ import haiku as hk
22
+ import jax
23
+ import jax.numpy as jnp
24
+ import numpy as np
25
+
26
+
27
+ def final_init(config):
28
+ if config.zero_init:
29
+ return 'zeros'
30
+ else:
31
+ return 'linear'
32
+
33
+
34
+ def batched_gather(params, indices, axis=0, batch_dims=0):
35
+ """Implements a JAX equivalent of `tf.gather` with `axis` and `batch_dims`."""
36
+ take_fn = lambda p, i: jnp.take(p, i, axis=axis)
37
+ for _ in range(batch_dims):
38
+ take_fn = jax.vmap(take_fn)
39
+ return take_fn(params, indices)
40
+
41
+
42
+ def mask_mean(mask, value, axis=None, drop_mask_channel=False, eps=1e-10):
43
+ """Masked mean."""
44
+ if drop_mask_channel:
45
+ mask = mask[..., 0]
46
+
47
+ mask_shape = mask.shape
48
+ value_shape = value.shape
49
+
50
+ assert len(mask_shape) == len(value_shape)
51
+
52
+ if isinstance(axis, numbers.Integral):
53
+ axis = [axis]
54
+ elif axis is None:
55
+ axis = list(range(len(mask_shape)))
56
+ assert isinstance(axis, collections.Iterable), (
57
+ 'axis needs to be either an iterable, integer or "None"')
58
+
59
+ broadcast_factor = 1.
60
+ for axis_ in axis:
61
+ value_size = value_shape[axis_]
62
+ mask_size = mask_shape[axis_]
63
+ if mask_size == 1:
64
+ broadcast_factor *= value_size
65
+ else:
66
+ assert mask_size == value_size
67
+
68
+ return (jnp.sum(mask * value, axis=axis) /
69
+ (jnp.sum(mask, axis=axis) * broadcast_factor + eps))
70
+
71
+
72
+ def flat_params_to_haiku(params: Mapping[str, np.ndarray]) -> hk.Params:
73
+ """Convert a dictionary of NumPy arrays to Haiku parameters."""
74
+ hk_params = {}
75
+ for path, array in params.items():
76
+ scope, name = path.split('//')
77
+ if scope not in hk_params:
78
+ hk_params[scope] = {}
79
+ hk_params[scope][name] = jnp.array(array)
80
+
81
+ return hk_params
af_backprop/examples/AlphaFold_single.ipynb ADDED
@@ -0,0 +1,311 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "nbformat": 4,
3
+ "nbformat_minor": 0,
4
+ "metadata": {
5
+ "colab": {
6
+ "name": "AlphaFold_single.ipynb",
7
+ "provenance": [],
8
+ "include_colab_link": true
9
+ },
10
+ "kernelspec": {
11
+ "name": "python3",
12
+ "display_name": "Python 3"
13
+ },
14
+ "language_info": {
15
+ "name": "python"
16
+ },
17
+ "accelerator": "GPU"
18
+ },
19
+ "cells": [
20
+ {
21
+ "cell_type": "markdown",
22
+ "metadata": {
23
+ "id": "view-in-github",
24
+ "colab_type": "text"
25
+ },
26
+ "source": [
27
+ "<a href=\"https://colab.research.google.com/github/sokrypton/af_backprop/blob/beta/examples/AlphaFold_single.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
28
+ ]
29
+ },
30
+ {
31
+ "cell_type": "markdown",
32
+ "source": [
33
+ "#AlphaFold - single sequence input\n",
34
+ "- WARNING - For DEMO and educational purposes only. \n",
35
+ "- For natural proteins you often need more than a single sequence to accurately predict the structure. See [ColabFold](https://colab.research.google.com/github/sokrypton/ColabFold/blob/main/AlphaFold2.ipynb) notebook if you want to predict the protein structure from a multiple-sequence-alignment. That being said, this notebook could potentially be useful for evaluating *de novo* designed proteins.\n"
36
+ ],
37
+ "metadata": {
38
+ "id": "VpfCw7IzVHXv"
39
+ }
40
+ },
41
+ {
42
+ "cell_type": "code",
43
+ "source": [
44
+ "#@title Setup\n",
45
+ "from IPython.utils import io\n",
46
+ "import os,sys,re\n",
47
+ "import tensorflow as tf\n",
48
+ "import jax\n",
49
+ "import jax.numpy as jnp\n",
50
+ "import numpy as np\n",
51
+ "\n",
52
+ "with io.capture_output() as captured:\n",
53
+ " if not os.path.isdir(\"af_backprop\"):\n",
54
+ " %shell git clone -b beta https://github.com/sokrypton/af_backprop.git\n",
55
+ " %shell pip -q install biopython dm-haiku ml-collections py3Dmol\n",
56
+ " %shell wget -qnc https://raw.githubusercontent.com/sokrypton/ColabFold/main/beta/colabfold.py\n",
57
+ " if not os.path.isdir(\"params\"):\n",
58
+ " %shell mkdir params\n",
59
+ " %shell curl -fsSL https://storage.googleapis.com/alphafold/alphafold_params_2021-07-14.tar | tar x -C params\n",
60
+ "\n",
61
+ "try:\n",
62
+ " # check if TPU is available\n",
63
+ " import jax.tools.colab_tpu\n",
64
+ " jax.tools.colab_tpu.setup_tpu()\n",
65
+ " print('Running on TPU')\n",
66
+ " DEVICE = \"tpu\"\n",
67
+ "except:\n",
68
+ " if jax.local_devices()[0].platform == 'cpu':\n",
69
+ " print(\"WARNING: no GPU detected, will be using CPU\")\n",
70
+ " DEVICE = \"cpu\"\n",
71
+ " else:\n",
72
+ " print('Running on GPU')\n",
73
+ " DEVICE = \"gpu\"\n",
74
+ " # disable GPU on tensorflow\n",
75
+ " tf.config.set_visible_devices([], 'GPU')\n",
76
+ "\n",
77
+ "sys.path.append('/content/af_backprop')\n",
78
+ "# import libraries\n",
79
+ "from utils import update_seq, update_aatype, get_plddt, get_pae\n",
80
+ "import colabfold as cf\n",
81
+ "from alphafold.common import protein\n",
82
+ "from alphafold.data import pipeline\n",
83
+ "from alphafold.model import data, config, model\n",
84
+ "from alphafold.common import residue_constants\n",
85
+ "\n",
86
+ "def clear_mem():\n",
87
+ " backend = jax.lib.xla_bridge.get_backend()\n",
88
+ " for buf in backend.live_buffers(): buf.delete()\n",
89
+ "\n",
90
+ "def setup_model(max_len, model_name=\"model_2_ptm\"):\n",
91
+ "\n",
92
+ " clear_mem()\n",
93
+ "\n",
94
+ " # setup model\n",
95
+ " cfg = config.model_config(\"model_5_ptm\")\n",
96
+ " cfg.model.num_recycle = 0\n",
97
+ " cfg.data.common.num_recycle = 0\n",
98
+ " cfg.data.eval.max_msa_clusters = 1\n",
99
+ " cfg.data.common.max_extra_msa = 1\n",
100
+ " cfg.data.eval.masked_msa_replace_fraction = 0\n",
101
+ " cfg.model.global_config.subbatch_size = None\n",
102
+ " model_params = data.get_model_haiku_params(model_name=model_name, data_dir=\".\")\n",
103
+ " model_runner = model.RunModel(cfg, model_params, is_training=False)\n",
104
+ "\n",
105
+ " seq = \"A\" * max_len\n",
106
+ " length = len(seq)\n",
107
+ " feature_dict = {\n",
108
+ " **pipeline.make_sequence_features(sequence=seq, description=\"none\", num_res=length),\n",
109
+ " **pipeline.make_msa_features(msas=[[seq]], deletion_matrices=[[[0]*length]])\n",
110
+ " }\n",
111
+ " inputs = model_runner.process_features(feature_dict,random_seed=0)\n",
112
+ "\n",
113
+ " def runner(seq, opt):\n",
114
+ " # update sequence\n",
115
+ " inputs = opt[\"inputs\"]\n",
116
+ " inputs.update(opt[\"prev\"])\n",
117
+ " update_seq(seq, inputs)\n",
118
+ " update_aatype(inputs[\"target_feat\"][...,1:], inputs)\n",
119
+ "\n",
120
+ " # mask prediction\n",
121
+ " mask = seq.sum(-1)\n",
122
+ " inputs[\"seq_mask\"] = inputs[\"seq_mask\"].at[:].set(mask)\n",
123
+ " inputs[\"msa_mask\"] = inputs[\"msa_mask\"].at[:].set(mask)\n",
124
+ " inputs[\"residue_index\"] = jnp.where(mask==1,inputs[\"residue_index\"],0)\n",
125
+ "\n",
126
+ " # get prediction\n",
127
+ " key = jax.random.PRNGKey(0)\n",
128
+ " outputs = model_runner.apply(opt[\"params\"], key, inputs)\n",
129
+ "\n",
130
+ " prev = {\"init_msa_first_row\":outputs['representations']['msa_first_row'][None],\n",
131
+ " \"init_pair\":outputs['representations']['pair'][None],\n",
132
+ " \"init_pos\":outputs['structure_module']['final_atom_positions'][None]}\n",
133
+ " \n",
134
+ " aux = {\"final_atom_positions\":outputs[\"structure_module\"][\"final_atom_positions\"],\n",
135
+ " \"final_atom_mask\":outputs[\"structure_module\"][\"final_atom_mask\"],\n",
136
+ " \"plddt\":get_plddt(outputs),\"pae\":get_pae(outputs),\n",
137
+ " \"inputs\":inputs, \"prev\":prev}\n",
138
+ " return aux\n",
139
+ "\n",
140
+ " return jax.jit(runner), {\"inputs\":inputs,\"params\":model_params}\n",
141
+ "\n",
142
+ "MAX_LEN = 50\n",
143
+ "RUNNER, OPT = setup_model(MAX_LEN)"
144
+ ],
145
+ "metadata": {
146
+ "cellView": "form",
147
+ "id": "24ybo88aBiSU"
148
+ },
149
+ "execution_count": null,
150
+ "outputs": []
151
+ },
152
+ {
153
+ "cell_type": "code",
154
+ "source": [
155
+ "%%time\n",
156
+ "#@title Enter the amino acid sequence to fold ⬇️\n",
157
+ "\n",
158
+ "sequence = 'GGGGGGGGGGGGGGGGGGGG' #@param {type:\"string\"}\n",
159
+ "recycles = 0 #@param [\"0\", \"1\", \"2\", \"3\", \"6\", \"12\", \"24\"] {type:\"raw\"}\n",
160
+ "SEQ = re.sub(\"[^A-Z]\", \"\", sequence.upper())\n",
161
+ "LEN = len(SEQ)\n",
162
+ "if LEN > MAX_LEN:\n",
163
+ " print(\"recompiling...\")\n",
164
+ " MAX_LEN = LEN\n",
165
+ " RUNNER, OPT = setup_model(MAX_LEN)\n",
166
+ "\n",
167
+ "x = np.array([residue_constants.restype_order.get(aa,0) for aa in SEQ])\n",
168
+ "x = np.pad(x,[0,MAX_LEN-LEN],constant_values=-1)\n",
169
+ "x = jax.nn.one_hot(x,20)\n",
170
+ "\n",
171
+ "OPT[\"prev\"] = {'init_msa_first_row': np.zeros([1, MAX_LEN, 256]),\n",
172
+ " 'init_pair': np.zeros([1, MAX_LEN, MAX_LEN, 128]),\n",
173
+ " 'init_pos': np.zeros([1, MAX_LEN, 37, 3])}\n",
174
+ "\n",
175
+ "positions = []\n",
176
+ "plddts = []\n",
177
+ "for r in range(recycles+1):\n",
178
+ " outs = RUNNER(x, OPT)\n",
179
+ " outs = jax.tree_map(lambda x:np.asarray(x), outs)\n",
180
+ " positions.append(outs[\"prev\"][\"init_pos\"][0,:LEN])\n",
181
+ " plddts.append(outs[\"plddt\"][:LEN])\n",
182
+ " OPT[\"prev\"] = outs[\"prev\"]\n",
183
+ " if recycles > 0:\n",
184
+ " print(r, plddts[-1].mean())"
185
+ ],
186
+ "metadata": {
187
+ "cellView": "form",
188
+ "id": "cAoC4ar8G7ZH"
189
+ },
190
+ "execution_count": null,
191
+ "outputs": []
192
+ },
193
+ {
194
+ "cell_type": "code",
195
+ "source": [
196
+ "#@title Display 3D structure {run: \"auto\"}\n",
197
+ "color = \"lDDT\" #@param [\"chain\", \"lDDT\", \"rainbow\"]\n",
198
+ "show_sidechains = True #@param {type:\"boolean\"}\n",
199
+ "show_mainchains = False #@param {type:\"boolean\"}\n",
200
+ "#@markdown - TIP - hold mouse over aminoacid to get name and position number\n",
201
+ "\n",
202
+ "def save_pdb(outs, filename):\n",
203
+ " '''save pdb coordinates'''\n",
204
+ " p = {\"residue_index\":outs[\"inputs\"][\"residue_index\"][0][:LEN] + 1,\n",
205
+ " \"aatype\":outs[\"inputs\"][\"aatype\"].argmax(-1)[0][:LEN],\n",
206
+ " \"atom_positions\":outs[\"final_atom_positions\"][:LEN],\n",
207
+ " \"atom_mask\":outs[\"final_atom_mask\"][:LEN]}\n",
208
+ " b_factors = 100.0 * outs[\"plddt\"][:LEN,None] * p[\"atom_mask\"]\n",
209
+ " p = protein.Protein(**p,b_factors=b_factors)\n",
210
+ " pdb_lines = protein.to_pdb(p)\n",
211
+ " with open(filename, 'w') as f:\n",
212
+ " f.write(pdb_lines)\n",
213
+ "\n",
214
+ "save_pdb(outs,\"out.pdb\")\n",
215
+ "num_res = int(outs[\"inputs\"][\"aatype\"][0].sum())\n",
216
+ "\n",
217
+ "v = cf.show_pdb(\"out.pdb\", show_sidechains, show_mainchains, color,\n",
218
+ " color_HP=True, size=(800,480)) \n",
219
+ "v.setHoverable({},\n",
220
+ " True,\n",
221
+ " '''function(atom,viewer,event,container){if(!atom.label){atom.label=viewer.addLabel(\" \"+atom.resn+\":\"+atom.resi,{position:atom,backgroundColor:'mintcream',fontColor:'black'});}}''',\n",
222
+ " '''function(atom,viewer){if(atom.label){viewer.removeLabel(atom.label);delete atom.label;}}''')\n",
223
+ "v.show() \n",
224
+ "\n",
225
+ "if color == \"lDDT\":\n",
226
+ " cf.plot_plddt_legend().show() \n",
227
+ "if \"pae\" in outs:\n",
228
+ " cf.plot_confidence(outs[\"plddt\"][:LEN]*100, outs[\"pae\"][:LEN,:LEN]).show()\n",
229
+ "else:\n",
230
+ " cf.plot_confidence(outs[\"plddt\"][:LEN]*100).show()"
231
+ ],
232
+ "metadata": {
233
+ "cellView": "form",
234
+ "id": "-KbUGG4ZOp0J"
235
+ },
236
+ "execution_count": null,
237
+ "outputs": []
238
+ },
239
+ {
240
+ "cell_type": "code",
241
+ "source": [
242
+ "#@title Animate\n",
243
+ "#@markdown - Animate trajectory if more than 0 recycle(s)\n",
244
+ "import matplotlib\n",
245
+ "from matplotlib import animation\n",
246
+ "import matplotlib.pyplot as plt\n",
247
+ "from IPython.display import HTML\n",
248
+ "\n",
249
+ "def make_animation(positions, plddts=None, line_w=2.0):\n",
250
+ "\n",
251
+ " def ca_align_to_last(positions):\n",
252
+ " def align(P, Q):\n",
253
+ " p = P - P.mean(0,keepdims=True)\n",
254
+ " q = Q - Q.mean(0,keepdims=True)\n",
255
+ " return p @ cf.kabsch(p,q)\n",
256
+ " \n",
257
+ " pos = positions[-1,:,1,:] - positions[-1,:,1,:].mean(0,keepdims=True)\n",
258
+ " best_2D_view = pos @ cf.kabsch(pos,pos,return_v=True)\n",
259
+ "\n",
260
+ " new_positions = []\n",
261
+ " for i in range(len(positions)):\n",
262
+ " new_positions.append(align(positions[i,:,1,:],best_2D_view))\n",
263
+ " return np.asarray(new_positions)\n",
264
+ "\n",
265
+ " # align all to last recycle\n",
266
+ " pos = ca_align_to_last(positions)\n",
267
+ "\n",
268
+ " fig, (ax1, ax2, ax3) = plt.subplots(1,3)\n",
269
+ " fig.subplots_adjust(top = 0.90, bottom = 0.10, right = 1, left = 0, hspace = 0, wspace = 0)\n",
270
+ " fig.set_figwidth(13)\n",
271
+ " fig.set_figheight(5)\n",
272
+ " fig.set_dpi(100)\n",
273
+ "\n",
274
+ " xy_min = pos[...,:2].min() - 1\n",
275
+ " xy_max = pos[...,:2].max() + 1\n",
276
+ "\n",
277
+ " for ax in [ax1,ax3]:\n",
278
+ " ax.set_xlim(xy_min, xy_max)\n",
279
+ " ax.set_ylim(xy_min, xy_max)\n",
280
+ " ax.axis(False)\n",
281
+ "\n",
282
+ " ims=[]\n",
283
+ " for k,(xyz,plddt) in enumerate(zip(pos,plddts)):\n",
284
+ " ims.append([])\n",
285
+ " im2 = ax2.plot(plddt, animated=True, color=\"black\")\n",
286
+ " tt1 = cf.add_text(\"colored by N->C\", ax1)\n",
287
+ " tt2 = cf.add_text(f\"recycle={k}\", ax2)\n",
288
+ " tt3 = cf.add_text(f\"pLDDT={plddt.mean():.3f}\", ax3)\n",
289
+ " ax2.set_xlabel(\"positions\")\n",
290
+ " ax2.set_ylabel(\"pLDDT\")\n",
291
+ " ax2.set_ylim(0,100)\n",
292
+ " ims[-1] += [cf.plot_pseudo_3D(xyz, ax=ax1, line_w=line_w)]\n",
293
+ " ims[-1] += [im2[0],tt1,tt2,tt3]\n",
294
+ " ims[-1] += [cf.plot_pseudo_3D(xyz, c=plddt, cmin=50, cmax=90, ax=ax3, line_w=line_w)]\n",
295
+ " \n",
296
+ " ani = animation.ArtistAnimation(fig, ims, blit=True, interval=120)\n",
297
+ " plt.close()\n",
298
+ " return ani.to_html5_video()\n",
299
+ "\n",
300
+ "HTML(make_animation(np.asarray(positions),\n",
301
+ " np.asarray(plddts) * 100.0))"
302
+ ],
303
+ "metadata": {
304
+ "cellView": "form",
305
+ "id": "tdjdC0KFPjWw"
306
+ },
307
+ "execution_count": null,
308
+ "outputs": []
309
+ }
310
+ ]
311
+ }
af_backprop/examples/af_design.ipynb ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "nbformat": 4,
3
+ "nbformat_minor": 0,
4
+ "metadata": {
5
+ "accelerator": "GPU",
6
+ "colab": {
7
+ "name": "af_design.ipynb",
8
+ "provenance": [],
9
+ "include_colab_link": true
10
+ },
11
+ "kernelspec": {
12
+ "display_name": "Python 3",
13
+ "name": "python3"
14
+ },
15
+ "language_info": {
16
+ "name": "python"
17
+ }
18
+ },
19
+ "cells": [
20
+ {
21
+ "cell_type": "markdown",
22
+ "metadata": {
23
+ "id": "view-in-github",
24
+ "colab_type": "text"
25
+ },
26
+ "source": [
27
+ "<a href=\"https://colab.research.google.com/github/sokrypton/af_backprop/blob/main/examples/af_design.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
28
+ ]
29
+ },
30
+ {
31
+ "cell_type": "markdown",
32
+ "metadata": {
33
+ "id": "OA2k3sAYuiXe"
34
+ },
35
+ "source": [
36
+ "#AF Design\n",
37
+ "NOTE, updated version of this notebook has moved to: [ColabDesign](https://github.com/sokrypton/ColabDesign/tree/main/af)"
38
+ ]
39
+ }
40
+ ]
41
+ }
af_backprop/examples/fixbb_design.ipynb ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "nbformat": 4,
3
+ "nbformat_minor": 0,
4
+ "metadata": {
5
+ "accelerator": "GPU",
6
+ "colab": {
7
+ "name": "fixbb_design.ipynb",
8
+ "provenance": []
9
+ },
10
+ "kernelspec": {
11
+ "display_name": "Python 3",
12
+ "name": "python3"
13
+ },
14
+ "language_info": {
15
+ "name": "python"
16
+ }
17
+ },
18
+ "cells": [
19
+ {
20
+ "cell_type": "markdown",
21
+ "metadata": {
22
+ "id": "uLHIgB5QydoL"
23
+ },
24
+ "source": [
25
+ "This notebook has moved here: https://colab.research.google.com/github/sokrypton/af_backprop/blob/main/examples/af_design.ipynb"
26
+ ]
27
+ }
28
+ ]
29
+ }
af_backprop/examples/sc_hall/1QJG.pdb ADDED
@@ -0,0 +1,1156 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ATOM 1 N MET A 1 10.694 86.076 -17.884 1.00 84.30 A N
2
+ ATOM 2 CA MET A 1 9.356 86.709 -18.062 1.00 81.23 A C
3
+ ATOM 3 C MET A 1 8.548 85.831 -19.012 1.00 74.08 A C
4
+ ATOM 4 O MET A 1 8.958 85.627 -20.159 1.00 74.53 A O
5
+ ATOM 5 CB MET A 1 9.510 88.112 -18.660 1.00 88.90 A C
6
+ ATOM 6 CG MET A 1 8.245 88.955 -18.591 1.00 98.51 A C
7
+ ATOM 7 SD MET A 1 7.719 89.207 -16.879 1.00111.10 A S
8
+ ATOM 8 CE MET A 1 6.310 88.074 -16.761 1.00106.45 A C
9
+ ATOM 9 H1 MET A 1 11.109 85.938 -18.826 1.00 0.00 A H
10
+ ATOM 10 H2 MET A 1 11.317 86.619 -17.265 1.00 0.00 A H
11
+ ATOM 11 H3 MET A 1 10.557 85.138 -17.448 1.00 0.00 A H
12
+ ATOM 12 N ASN A 2 7.410 85.316 -18.548 1.00 62.40 A N
13
+ ATOM 13 CA ASN A 2 6.596 84.449 -19.389 1.00 47.87 A C
14
+ ATOM 14 C ASN A 2 5.205 84.954 -19.711 1.00 41.68 A C
15
+ ATOM 15 O ASN A 2 4.231 84.652 -19.021 1.00 41.06 A O
16
+ ATOM 16 CB ASN A 2 6.506 83.052 -18.794 1.00 41.73 A C
17
+ ATOM 17 CG ASN A 2 7.857 82.460 -18.510 1.00 34.33 A C
18
+ ATOM 18 ND2 ASN A 2 7.874 81.403 -17.729 1.00 31.37 A N
19
+ ATOM 19 OD1 ASN A 2 8.883 82.969 -18.959 1.00 37.21 A O
20
+ ATOM 20 H ASN A 2 7.083 85.483 -17.636 1.00 0.00 A H
21
+ ATOM 21 HD21 ASN A 2 7.016 81.088 -17.384 1.00 0.00 A H
22
+ ATOM 22 HD22 ASN A 2 8.733 80.991 -17.536 1.00 0.00 A H
23
+ ATOM 23 N THR A 3 5.127 85.753 -20.761 1.00 36.66 A N
24
+ ATOM 24 CA THR A 3 3.865 86.272 -21.226 1.00 34.05 A C
25
+ ATOM 25 C THR A 3 3.507 85.320 -22.359 1.00 34.07 A C
26
+ ATOM 26 O THR A 3 4.363 84.575 -22.833 1.00 36.58 A O
27
+ ATOM 27 CB THR A 3 4.029 87.713 -21.764 1.00 33.23 A C
28
+ ATOM 28 CG2 THR A 3 4.446 88.643 -20.655 1.00 34.82 A C
29
+ ATOM 29 OG1 THR A 3 5.029 87.746 -22.789 1.00 32.02 A O
30
+ ATOM 30 H THR A 3 5.922 86.005 -21.270 1.00 0.00 A H
31
+ ATOM 31 HG1 THR A 3 4.879 88.607 -23.212 1.00 0.00 A H
32
+ ATOM 32 N PRO A 4 2.236 85.282 -22.773 1.00 33.54 A N
33
+ ATOM 33 CA PRO A 4 1.860 84.384 -23.865 1.00 30.39 A C
34
+ ATOM 34 C PRO A 4 2.707 84.727 -25.085 1.00 29.97 A C
35
+ ATOM 35 O PRO A 4 3.253 83.850 -25.760 1.00 31.13 A O
36
+ ATOM 36 CB PRO A 4 0.404 84.759 -24.119 1.00 30.46 A C
37
+ ATOM 37 CG PRO A 4 -0.066 85.173 -22.778 1.00 33.24 A C
38
+ ATOM 38 CD PRO A 4 1.061 86.017 -22.276 1.00 34.34 A C
39
+ ATOM 39 N GLU A 5 2.848 86.023 -25.319 1.00 27.73 A N
40
+ ATOM 40 CA GLU A 5 3.607 86.541 -26.437 1.00 27.01 A C
41
+ ATOM 41 C GLU A 5 5.012 85.965 -26.481 1.00 24.21 A C
42
+ ATOM 42 O GLU A 5 5.488 85.561 -27.532 1.00 27.56 A O
43
+ ATOM 43 CB GLU A 5 3.690 88.070 -26.353 1.00 31.01 A C
44
+ ATOM 44 CG GLU A 5 2.342 88.828 -26.453 1.00 43.67 A C
45
+ ATOM 45 CD GLU A 5 1.448 88.719 -25.210 1.00 49.66 A C
46
+ ATOM 46 OE1 GLU A 5 1.955 88.464 -24.095 1.00 57.37 A O
47
+ ATOM 47 OE2 GLU A 5 0.222 88.898 -25.353 1.00 51.34 A O1-
48
+ ATOM 48 H GLU A 5 2.432 86.661 -24.705 1.00 0.00 A H
49
+ ATOM 49 N HIS A 6 5.662 85.907 -25.328 1.00 20.87 A N
50
+ ATOM 50 CA HIS A 6 7.030 85.407 -25.240 1.00 18.21 A C
51
+ ATOM 51 C HIS A 6 7.128 83.913 -25.504 1.00 16.76 A C
52
+ ATOM 52 O HIS A 6 8.016 83.462 -26.216 1.00 20.94 A O
53
+ ATOM 53 CB HIS A 6 7.644 85.758 -23.879 1.00 17.74 A C
54
+ ATOM 54 CG HIS A 6 8.966 85.097 -23.617 1.00 23.16 A C
55
+ ATOM 55 CD2 HIS A 6 9.288 84.014 -22.867 1.00 23.51 A C
56
+ ATOM 56 ND1 HIS A 6 10.148 85.543 -24.171 1.00 26.10 A N
57
+ ATOM 57 CE1 HIS A 6 11.138 84.764 -23.775 1.00 25.94 A C
58
+ ATOM 58 NE2 HIS A 6 10.643 83.829 -22.985 1.00 24.12 A N
59
+ ATOM 59 H HIS A 6 5.203 86.224 -24.518 1.00 0.00 A H
60
+ ATOM 60 HD1 HIS A 6 10.261 86.309 -24.778 1.00 0.00 A H
61
+ ATOM 61 HE2 HIS A 6 11.146 83.092 -22.596 1.00 0.00 A H
62
+ ATOM 62 N MET A 7 6.222 83.145 -24.924 1.00 14.80 A N
63
+ ATOM 63 CA MET A 7 6.251 81.718 -25.128 1.00 13.22 A C
64
+ ATOM 64 C MET A 7 5.959 81.421 -26.587 1.00 11.59 A C
65
+ ATOM 65 O MET A 7 6.500 80.493 -27.161 1.00 12.50 A O
66
+ ATOM 66 CB MET A 7 5.245 81.037 -24.208 1.00 13.28 A C
67
+ ATOM 67 CG MET A 7 5.517 81.314 -22.727 1.00 9.93 A C
68
+ ATOM 68 SD MET A 7 4.532 80.310 -21.631 1.00 15.44 A S
69
+ ATOM 69 CE MET A 7 3.047 81.335 -21.585 1.00 7.92 A C
70
+ ATOM 70 H MET A 7 5.517 83.549 -24.369 1.00 0.00 A H
71
+ ATOM 71 N THR A 8 5.132 82.245 -27.204 1.00 13.23 A N
72
+ ATOM 72 CA THR A 8 4.793 82.053 -28.600 1.00 14.43 A C
73
+ ATOM 73 C THR A 8 5.973 82.395 -29.514 1.00 18.05 A C
74
+ ATOM 74 O THR A 8 6.181 81.732 -30.534 1.00 19.37 A O
75
+ ATOM 75 CB THR A 8 3.577 82.897 -28.978 1.00 19.26 A C
76
+ ATOM 76 CG2 THR A 8 3.136 82.608 -30.403 1.00 20.22 A C
77
+ ATOM 77 OG1 THR A 8 2.500 82.579 -28.086 1.00 14.83 A O
78
+ ATOM 78 H THR A 8 4.729 82.990 -26.718 1.00 0.00 A H
79
+ ATOM 79 HG1 THR A 8 2.746 82.797 -27.187 1.00 0.00 A H
80
+ ATOM 80 N ALA A 9 6.751 83.416 -29.152 1.00 14.07 A N
81
+ ATOM 81 CA ALA A 9 7.911 83.806 -29.947 1.00 13.33 A C
82
+ ATOM 82 C ALA A 9 9.000 82.739 -29.838 1.00 12.50 A C
83
+ ATOM 83 O ALA A 9 9.697 82.462 -30.807 1.00 15.18 A O
84
+ ATOM 84 CB ALA A 9 8.423 85.131 -29.500 1.00 13.58 A C
85
+ ATOM 85 H ALA A 9 6.526 83.927 -28.344 1.00 0.00 A H
86
+ ATOM 86 N VAL A 10 9.118 82.116 -28.671 1.00 11.73 A N
87
+ ATOM 87 CA VAL A 10 10.090 81.055 -28.464 1.00 12.11 A C
88
+ ATOM 88 C VAL A 10 9.731 79.823 -29.295 1.00 13.25 A C
89
+ ATOM 89 O VAL A 10 10.614 79.217 -29.890 1.00 19.98 A O
90
+ ATOM 90 CB VAL A 10 10.191 80.674 -26.995 1.00 9.17 A C
91
+ ATOM 91 CG1 VAL A 10 10.931 79.370 -26.838 1.00 6.34 A C
92
+ ATOM 92 CG2 VAL A 10 10.917 81.772 -26.238 1.00 8.83 A C
93
+ ATOM 93 H VAL A 10 8.524 82.378 -27.934 1.00 0.00 A H
94
+ ATOM 94 N VAL A 11 8.451 79.453 -29.354 1.00 14.16 A N
95
+ ATOM 95 CA VAL A 11 8.036 78.296 -30.153 1.00 8.99 A C
96
+ ATOM 96 C VAL A 11 8.424 78.566 -31.597 1.00 13.66 A C
97
+ ATOM 97 O VAL A 11 8.974 77.707 -32.272 1.00 20.39 A O
98
+ ATOM 98 CB VAL A 11 6.512 78.064 -30.071 1.00 8.71 A C
99
+ ATOM 99 CG1 VAL A 11 6.051 77.051 -31.104 1.00 6.03 A C
100
+ ATOM 100 CG2 VAL A 11 6.137 77.589 -28.690 1.00 10.59 A C
101
+ ATOM 101 H VAL A 11 7.783 79.946 -28.829 1.00 0.00 A H
102
+ ATOM 102 N GLN A 12 8.171 79.789 -32.048 1.00 17.26 A N
103
+ ATOM 103 CA GLN A 12 8.478 80.205 -33.405 1.00 16.35 A C
104
+ ATOM 104 C GLN A 12 9.972 80.219 -33.722 1.00 14.80 A C
105
+ ATOM 105 O GLN A 12 10.369 79.891 -34.824 1.00 13.57 A O
106
+ ATOM 106 CB GLN A 12 7.856 81.566 -33.678 1.00 18.90 A C
107
+ ATOM 107 CG GLN A 12 6.344 81.525 -33.685 1.00 23.55 A C
108
+ ATOM 108 CD GLN A 12 5.732 82.912 -33.828 1.00 34.39 A C
109
+ ATOM 109 NE2 GLN A 12 6.071 83.814 -32.905 1.00 35.29 A N
110
+ ATOM 110 OE1 GLN A 12 4.959 83.169 -34.748 1.00 41.94 A O
111
+ ATOM 111 H GLN A 12 7.747 80.428 -31.439 1.00 0.00 A H
112
+ ATOM 112 HE21 GLN A 12 6.672 83.536 -32.191 1.00 0.00 A H
113
+ ATOM 113 HE22 GLN A 12 5.691 84.710 -33.003 1.00 0.00 A H
114
+ ATOM 114 N ARG A 13 10.804 80.604 -32.764 1.00 16.01 A N
115
+ ATOM 115 CA ARG A 13 12.240 80.609 -32.997 1.00 15.24 A C
116
+ ATOM 116 C ARG A 13 12.729 79.171 -33.079 1.00 15.94 A C
117
+ ATOM 117 O ARG A 13 13.676 78.860 -33.802 1.00 18.78 A O
118
+ ATOM 118 CB ARG A 13 12.970 81.328 -31.868 1.00 14.71 A C
119
+ ATOM 119 CG ARG A 13 12.781 82.814 -31.870 1.00 24.21 A C
120
+ ATOM 120 CD ARG A 13 13.525 83.449 -30.719 1.00 30.45 A C
121
+ ATOM 121 NE ARG A 13 13.429 84.899 -30.810 1.00 42.18 A N
122
+ ATOM 122 CZ ARG A 13 12.766 85.668 -29.951 1.00 46.62 A C
123
+ ATOM 123 NH1 ARG A 13 12.138 85.130 -28.912 1.00 52.17 A N1+
124
+ ATOM 124 NH2 ARG A 13 12.700 86.979 -30.163 1.00 53.25 A N
125
+ ATOM 125 H ARG A 13 10.448 80.892 -31.898 1.00 0.00 A H
126
+ ATOM 126 HE ARG A 13 13.905 85.284 -31.580 1.00 0.00 A H
127
+ ATOM 127 HH11 ARG A 13 12.163 84.134 -28.780 1.00 0.00 A H
128
+ ATOM 128 HH12 ARG A 13 11.626 85.671 -28.239 1.00 0.00 A H
129
+ ATOM 129 HH21 ARG A 13 13.140 87.388 -30.967 1.00 0.00 A H
130
+ ATOM 130 HH22 ARG A 13 12.212 87.593 -29.540 1.00 0.00 A H
131
+ ATOM 131 N TYR A 14 12.069 78.299 -32.327 1.00 13.30 A N
132
+ ATOM 132 CA TYR A 14 12.402 76.890 -32.286 1.00 10.90 A C
133
+ ATOM 133 C TYR A 14 12.147 76.240 -33.647 1.00 13.50 A C
134
+ ATOM 134 O TYR A 14 12.987 75.533 -34.188 1.00 16.92 A O
135
+ ATOM 135 CB TYR A 14 11.538 76.227 -31.229 1.00 6.03 A C
136
+ ATOM 136 CG TYR A 14 11.674 74.726 -31.157 1.00 9.19 A C
137
+ ATOM 137 CD1 TYR A 14 12.809 74.151 -30.607 1.00 11.32 A C
138
+ ATOM 138 CD2 TYR A 14 10.662 73.885 -31.594 1.00 6.00 A C
139
+ ATOM 139 CE1 TYR A 14 12.937 72.787 -30.486 1.00 13.36 A C
140
+ ATOM 140 CE2 TYR A 14 10.787 72.508 -31.473 1.00 7.87 A C
141
+ ATOM 141 CZ TYR A 14 11.936 71.975 -30.912 1.00 7.44 A C
142
+ ATOM 142 OH TYR A 14 12.106 70.618 -30.754 1.00 8.52 A O
143
+ ATOM 143 H TYR A 14 11.336 78.619 -31.760 1.00 0.00 A H
144
+ ATOM 144 HH TYR A 14 11.279 70.202 -31.029 1.00 0.00 A H
145
+ ATOM 145 N VAL A 15 10.951 76.458 -34.176 1.00 17.30 A N
146
+ ATOM 146 CA VAL A 15 10.573 75.905 -35.460 1.00 16.50 A C
147
+ ATOM 147 C VAL A 15 11.522 76.437 -36.513 1.00 17.14 A C
148
+ ATOM 148 O VAL A 15 11.978 75.699 -37.366 1.00 20.10 A O
149
+ ATOM 149 CB VAL A 15 9.135 76.312 -35.818 1.00 14.14 A C
150
+ ATOM 150 CG1 VAL A 15 8.871 76.099 -37.278 1.00 14.20 A C
151
+ ATOM 151 CG2 VAL A 15 8.169 75.506 -35.018 1.00 11.49 A C
152
+ ATOM 152 H VAL A 15 10.306 76.993 -33.666 1.00 0.00 A H
153
+ ATOM 153 N ALA A 16 11.835 77.723 -36.422 1.00 15.98 A N
154
+ ATOM 154 CA ALA A 16 12.718 78.372 -37.381 1.00 16.15 A C
155
+ ATOM 155 C ALA A 16 14.137 77.859 -37.330 1.00 18.88 A C
156
+ ATOM 156 O ALA A 16 14.744 77.656 -38.380 1.00 26.26 A O
157
+ ATOM 157 CB ALA A 16 12.708 79.867 -37.179 1.00 17.59 A C
158
+ ATOM 158 H ALA A 16 11.451 78.264 -35.698 1.00 0.00 A H
159
+ ATOM 159 N ALA A 17 14.670 77.679 -36.119 1.00 12.81 A N
160
+ ATOM 160 CA ALA A 17 16.036 77.180 -35.919 1.00 12.41 A C
161
+ ATOM 161 C ALA A 17 16.144 75.741 -36.425 1.00 15.44 A C
162
+ ATOM 162 O ALA A 17 17.136 75.357 -37.021 1.00 19.19 A O
163
+ ATOM 163 CB ALA A 17 16.415 77.268 -34.457 1.00 9.24 A C
164
+ ATOM 164 H ALA A 17 14.132 77.888 -35.332 1.00 0.00 A H
165
+ ATOM 165 N LEU A 18 15.088 74.967 -36.223 1.00 17.31 A N
166
+ ATOM 166 CA LEU A 18 15.009 73.586 -36.680 1.00 15.47 A C
167
+ ATOM 167 C LEU A 18 15.056 73.590 -38.218 1.00 19.85 A C
168
+ ATOM 168 O LEU A 18 15.794 72.814 -38.837 1.00 20.17 A O
169
+ ATOM 169 CB LEU A 18 13.694 72.985 -36.174 1.00 15.33 A C
170
+ ATOM 170 CG LEU A 18 13.622 71.635 -35.445 1.00 15.11 A C
171
+ ATOM 171 CD1 LEU A 18 14.784 71.406 -34.532 1.00 9.98 A C
172
+ ATOM 172 CD2 LEU A 18 12.329 71.585 -34.666 1.00 11.37 A C
173
+ ATOM 173 H LEU A 18 14.330 75.333 -35.719 1.00 0.00 A H
174
+ ATOM 174 N ASN A 19 14.319 74.519 -38.822 1.00 19.25 A N
175
+ ATOM 175 CA ASN A 19 14.279 74.678 -40.282 1.00 18.67 A C
176
+ ATOM 176 C ASN A 19 15.583 75.183 -40.890 1.00 19.48 A C
177
+ ATOM 177 O ASN A 19 15.759 75.149 -42.112 1.00 26.00 A O
178
+ ATOM 178 CB ASN A 19 13.227 75.700 -40.684 1.00 16.40 A C
179
+ ATOM 179 CG ASN A 19 11.885 75.095 -40.902 1.00 19.81 A C
180
+ ATOM 180 ND2 ASN A 19 10.846 75.801 -40.448 1.00 19.07 A N
181
+ ATOM 181 OD1 ASN A 19 11.761 73.998 -41.471 1.00 21.76 A O
182
+ ATOM 182 H ASN A 19 13.762 75.104 -38.267 1.00 0.00 A H
183
+ ATOM 183 HD21 ASN A 19 11.004 76.649 -39.995 1.00 0.00 A H
184
+ ATOM 184 HD22 ASN A 19 9.959 75.414 -40.603 1.00 0.00 A H
185
+ ATOM 185 N ALA A 20 16.467 75.724 -40.070 1.00 15.92 A N
186
+ ATOM 186 CA ALA A 20 17.708 76.257 -40.599 1.00 13.83 A C
187
+ ATOM 187 C ALA A 20 18.952 75.465 -40.200 1.00 15.30 A C
188
+ ATOM 188 O ALA A 20 20.075 75.893 -40.484 1.00 20.93 A O
189
+ ATOM 189 CB ALA A 20 17.850 77.723 -40.180 1.00 9.03 A C
190
+ ATOM 190 H ALA A 20 16.279 75.791 -39.113 1.00 0.00 A H
191
+ ATOM 191 N GLY A 21 18.765 74.314 -39.559 1.00 14.60 A N
192
+ ATOM 192 CA GLY A 21 19.901 73.530 -39.112 1.00 6.97 A C
193
+ ATOM 193 C GLY A 21 20.786 74.355 -38.196 1.00 11.91 A C
194
+ ATOM 194 O GLY A 21 21.993 74.153 -38.146 1.00 19.27 A O
195
+ ATOM 195 H GLY A 21 17.855 73.991 -39.412 1.00 0.00 A H
196
+ ATOM 196 N ASP A 22 20.176 75.272 -37.455 1.00 10.80 A N
197
+ ATOM 197 CA ASP A 22 20.876 76.156 -36.540 1.00 13.90 A C
198
+ ATOM 198 C ASP A 22 20.938 75.487 -35.162 1.00 18.34 A C
199
+ ATOM 199 O ASP A 22 20.055 75.645 -34.319 1.00 21.73 A O
200
+ ATOM 200 CB ASP A 22 20.107 77.467 -36.460 1.00 15.62 A C
201
+ ATOM 201 CG ASP A 22 20.716 78.448 -35.481 1.00 21.13 A C
202
+ ATOM 202 OD1 ASP A 22 21.758 78.127 -34.874 1.00 24.96 A O
203
+ ATOM 203 OD2 ASP A 22 20.155 79.559 -35.333 1.00 22.13 A O1-
204
+ ATOM 204 H ASP A 22 19.204 75.373 -37.497 1.00 0.00 A H
205
+ ATOM 205 N LEU A 23 22.029 74.791 -34.912 1.00 16.13 A N
206
+ ATOM 206 CA LEU A 23 22.200 74.048 -33.687 1.00 12.88 A C
207
+ ATOM 207 C LEU A 23 22.297 74.892 -32.455 1.00 13.55 A C
208
+ ATOM 208 O LEU A 23 21.770 74.531 -31.414 1.00 16.43 A O
209
+ ATOM 209 CB LEU A 23 23.431 73.159 -33.812 1.00 22.08 A C
210
+ ATOM 210 CG LEU A 23 23.645 72.107 -32.746 1.00 22.16 A C
211
+ ATOM 211 CD1 LEU A 23 22.340 71.415 -32.440 1.00 23.03 A C
212
+ ATOM 212 CD2 LEU A 23 24.679 71.134 -33.248 1.00 22.78 A C
213
+ ATOM 213 H LEU A 23 22.746 74.826 -35.582 1.00 0.00 A H
214
+ ATOM 214 N ASP A 24 22.994 76.002 -32.553 1.00 14.90 A N
215
+ ATOM 215 CA ASP A 24 23.149 76.860 -31.407 1.00 19.66 A C
216
+ ATOM 216 C ASP A 24 21.920 77.692 -31.129 1.00 21.39 A C
217
+ ATOM 217 O ASP A 24 21.687 78.078 -29.993 1.00 24.19 A O
218
+ ATOM 218 CB ASP A 24 24.421 77.697 -31.539 1.00 21.55 A C
219
+ ATOM 219 CG ASP A 24 25.681 76.846 -31.380 1.00 26.43 A C
220
+ ATOM 220 OD1 ASP A 24 25.590 75.603 -31.526 1.00 33.14 A O
221
+ ATOM 221 OD2 ASP A 24 26.765 77.402 -31.111 1.00 34.14 A O1-
222
+ ATOM 222 H ASP A 24 23.414 76.281 -33.395 1.00 0.00 A H
223
+ ATOM 223 N GLY A 25 21.113 77.936 -32.157 1.00 24.38 A N
224
+ ATOM 224 CA GLY A 25 19.888 78.690 -31.980 1.00 19.16 A C
225
+ ATOM 225 C GLY A 25 18.894 77.848 -31.203 1.00 17.24 A C
226
+ ATOM 226 O GLY A 25 18.222 78.340 -30.311 1.00 19.60 A O
227
+ ATOM 227 H GLY A 25 21.353 77.634 -33.059 1.00 0.00 A H
228
+ ATOM 228 N ILE A 26 18.821 76.566 -31.518 1.00 10.29 A N
229
+ ATOM 229 CA ILE A 26 17.930 75.669 -30.819 1.00 12.55 A C
230
+ ATOM 230 C ILE A 26 18.297 75.585 -29.339 1.00 15.29 A C
231
+ ATOM 231 O ILE A 26 17.474 75.828 -28.461 1.00 17.57 A O
232
+ ATOM 232 CB ILE A 26 18.004 74.262 -31.406 1.00 9.32 A C
233
+ ATOM 233 CG1 ILE A 26 17.508 74.273 -32.836 1.00 8.47 A C
234
+ ATOM 234 CG2 ILE A 26 17.156 73.321 -30.597 1.00 13.37 A C
235
+ ATOM 235 CD1 ILE A 26 17.655 72.942 -33.496 1.00 12.60 A C
236
+ ATOM 236 H ILE A 26 19.377 76.251 -32.269 1.00 0.00 A H
237
+ ATOM 237 N VAL A 27 19.550 75.250 -29.072 1.00 17.20 A N
238
+ ATOM 238 CA VAL A 27 20.046 75.090 -27.710 1.00 14.33 A C
239
+ ATOM 239 C VAL A 27 20.013 76.328 -26.818 1.00 13.10 A C
240
+ ATOM 240 O VAL A 27 19.883 76.202 -25.606 1.00 18.39 A O
241
+ ATOM 241 CB VAL A 27 21.461 74.464 -27.718 1.00 12.93 A C
242
+ ATOM 242 CG1 VAL A 27 21.924 74.194 -26.304 1.00 17.80 A C
243
+ ATOM 243 CG2 VAL A 27 21.436 73.131 -28.520 1.00 14.81 A C
244
+ ATOM 244 H VAL A 27 20.169 75.108 -29.823 1.00 0.00 A H
245
+ ATOM 245 N ALA A 28 20.075 77.515 -27.406 1.00 13.76 A N
246
+ ATOM 246 CA ALA A 28 20.052 78.764 -26.637 1.00 12.16 A C
247
+ ATOM 247 C ALA A 28 18.720 78.974 -25.931 1.00 16.53 A C
248
+ ATOM 248 O ALA A 28 18.628 79.725 -24.963 1.00 18.18 A O
249
+ ATOM 249 CB ALA A 28 20.344 79.958 -27.556 1.00 10.61 A C
250
+ ATOM 250 H ALA A 28 20.146 77.563 -28.383 1.00 0.00 A H
251
+ ATOM 251 N LEU A 29 17.693 78.306 -26.443 1.00 16.50 A N
252
+ ATOM 252 CA LEU A 29 16.332 78.376 -25.940 1.00 12.57 A C
253
+ ATOM 253 C LEU A 29 16.119 77.660 -24.611 1.00 13.08 A C
254
+ ATOM 254 O LEU A 29 15.228 78.028 -23.840 1.00 18.83 A O
255
+ ATOM 255 CB LEU A 29 15.392 77.758 -26.981 1.00 10.87 A C
256
+ ATOM 256 CG LEU A 29 14.615 78.653 -27.953 1.00 16.58 A C
257
+ ATOM 257 CD1 LEU A 29 15.336 79.965 -28.233 1.00 17.44 A C
258
+ ATOM 258 CD2 LEU A 29 14.320 77.880 -29.244 1.00 12.46 A C
259
+ ATOM 259 H LEU A 29 17.855 77.733 -27.220 1.00 0.00 A H
260
+ ATOM 260 N PHE A 30 16.905 76.618 -24.363 1.00 15.03 A N
261
+ ATOM 261 CA PHE A 30 16.769 75.816 -23.153 1.00 13.36 A C
262
+ ATOM 262 C PHE A 30 17.547 76.305 -21.927 1.00 18.06 A C
263
+ ATOM 263 O PHE A 30 18.538 77.027 -22.044 1.00 20.40 A O
264
+ ATOM 264 CB PHE A 30 17.128 74.354 -23.463 1.00 13.54 A C
265
+ ATOM 265 CG PHE A 30 16.189 73.679 -24.468 1.00 13.97 A C
266
+ ATOM 266 CD1 PHE A 30 16.391 73.811 -25.833 1.00 12.41 A C
267
+ ATOM 267 CD2 PHE A 30 15.121 72.890 -24.032 1.00 11.09 A C
268
+ ATOM 268 CE1 PHE A 30 15.563 73.176 -26.733 1.00 13.85 A C
269
+ ATOM 269 CE2 PHE A 30 14.289 72.251 -24.930 1.00 8.95 A C
270
+ ATOM 270 CZ PHE A 30 14.509 72.392 -26.278 1.00 13.73 A C
271
+ ATOM 271 H PHE A 30 17.631 76.398 -24.981 1.00 0.00 A H
272
+ ATOM 272 N ALA A 31 17.020 75.999 -20.745 1.00 18.68 A N
273
+ ATOM 273 CA ALA A 31 17.674 76.353 -19.487 1.00 20.42 A C
274
+ ATOM 274 C ALA A 31 18.795 75.322 -19.367 1.00 20.67 A C
275
+ ATOM 275 O ALA A 31 18.672 74.215 -19.864 1.00 20.86 A O
276
+ ATOM 276 CB ALA A 31 16.705 76.222 -18.325 1.00 13.63 A C
277
+ ATOM 277 H ALA A 31 16.197 75.481 -20.726 1.00 0.00 A H
278
+ ATOM 278 N ASP A 32 19.868 75.649 -18.680 1.00 25.24 A N
279
+ ATOM 279 CA ASP A 32 20.975 74.718 -18.584 1.00 29.66 A C
280
+ ATOM 280 C ASP A 32 20.645 73.374 -17.954 1.00 26.34 A C
281
+ ATOM 281 O ASP A 32 21.213 72.337 -18.317 1.00 27.92 A O
282
+ ATOM 282 CB ASP A 32 22.141 75.424 -17.903 1.00 41.07 A C
283
+ ATOM 283 CG ASP A 32 22.543 76.697 -18.649 1.00 56.13 A C
284
+ ATOM 284 OD1 ASP A 32 21.696 77.629 -18.750 1.00 65.00 A O
285
+ ATOM 285 OD2 ASP A 32 23.672 76.741 -19.194 1.00 63.02 A O1-
286
+ ATOM 286 H ASP A 32 19.988 76.501 -18.211 1.00 0.00 A H
287
+ ATOM 287 N ASP A 33 19.654 73.378 -17.082 1.00 24.28 A N
288
+ ATOM 288 CA ASP A 33 19.230 72.174 -16.393 1.00 25.07 A C
289
+ ATOM 289 C ASP A 33 17.986 71.549 -17.017 1.00 25.39 A C
290
+ ATOM 290 O ASP A 33 17.418 70.592 -16.464 1.00 27.75 A O
291
+ ATOM 291 CB ASP A 33 18.975 72.494 -14.915 1.00 34.12 A C
292
+ ATOM 292 CG ASP A 33 18.076 73.713 -14.717 1.00 38.66 A C
293
+ ATOM 293 OD1 ASP A 33 18.398 74.816 -15.213 1.00 41.05 A O
294
+ ATOM 294 OD2 ASP A 33 17.043 73.574 -14.037 1.00 46.96 A O1-
295
+ ATOM 295 H ASP A 33 19.193 74.214 -16.864 1.00 0.00 A H
296
+ ATOM 296 N ALA A 34 17.600 72.044 -18.190 1.00 16.51 A N
297
+ ATOM 297 CA ALA A 34 16.417 71.557 -18.898 1.00 16.20 A C
298
+ ATOM 298 C ALA A 34 16.446 70.087 -19.302 1.00 18.11 A C
299
+ ATOM 299 O ALA A 34 17.509 69.450 -19.332 1.00 22.20 A O
300
+ ATOM 300 CB ALA A 34 16.163 72.412 -20.117 1.00 13.10 A C
301
+ ATOM 301 H ALA A 34 18.140 72.752 -18.594 1.00 0.00 A H
302
+ ATOM 302 N THR A 35 15.267 69.534 -19.566 1.00 17.22 A N
303
+ ATOM 303 CA THR A 35 15.164 68.143 -19.997 1.00 19.54 A C
304
+ ATOM 304 C THR A 35 14.404 68.107 -21.314 1.00 20.23 A C
305
+ ATOM 305 O THR A 35 13.650 69.036 -21.617 1.00 19.48 A O
306
+ ATOM 306 CB THR A 35 14.393 67.270 -18.995 1.00 19.13 A C
307
+ ATOM 307 CG2 THR A 35 15.303 66.727 -17.930 1.00 22.28 A C
308
+ ATOM 308 OG1 THR A 35 13.364 68.047 -18.385 1.00 33.43 A O
309
+ ATOM 309 H THR A 35 14.440 70.059 -19.505 1.00 0.00 A H
310
+ ATOM 310 HG1 THR A 35 13.777 68.790 -17.927 1.00 0.00 A H
311
+ ATOM 311 N VAL A 36 14.589 67.023 -22.066 1.00 17.96 A N
312
+ ATOM 312 CA VAL A 36 13.938 66.785 -23.362 1.00 13.42 A C
313
+ ATOM 313 C VAL A 36 13.458 65.335 -23.329 1.00 17.93 A C
314
+ ATOM 314 O VAL A 36 14.226 64.446 -22.964 1.00 21.00 A O
315
+ ATOM 315 CB VAL A 36 14.960 66.940 -24.551 1.00 15.38 A C
316
+ ATOM 316 CG1 VAL A 36 14.490 66.201 -25.779 1.00 9.13 A C
317
+ ATOM 317 CG2 VAL A 36 15.164 68.416 -24.891 1.00 6.36 A C
318
+ ATOM 318 H VAL A 36 15.199 66.329 -21.737 1.00 0.00 A H
319
+ ATOM 319 N GLU A 37 12.180 65.098 -23.604 1.00 17.48 A N
320
+ ATOM 320 CA GLU A 37 11.667 63.739 -23.638 1.00 16.27 A C
321
+ ATOM 321 C GLU A 37 11.080 63.616 -25.024 1.00 14.95 A C
322
+ ATOM 322 O GLU A 37 9.975 64.057 -25.277 1.00 14.81 A O
323
+ ATOM 323 CB GLU A 37 10.601 63.529 -22.583 1.00 21.55 A C
324
+ ATOM 324 CG GLU A 37 10.328 62.078 -22.314 1.00 25.91 A C
325
+ ATOM 325 CD GLU A 37 9.276 61.879 -21.261 1.00 27.26 A C
326
+ ATOM 326 OE1 GLU A 37 9.249 62.659 -20.294 1.00 36.29 A O
327
+ ATOM 327 OE2 GLU A 37 8.458 60.950 -21.407 1.00 32.31 A O1-
328
+ ATOM 328 H GLU A 37 11.555 65.842 -23.761 1.00 0.00 A H
329
+ ATOM 329 N ASN A 38 11.870 63.089 -25.948 1.00 19.33 A N
330
+ ATOM 330 CA ASN A 38 11.453 62.956 -27.337 1.00 18.26 A C
331
+ ATOM 331 C ASN A 38 11.866 61.577 -27.816 1.00 18.79 A C
332
+ ATOM 332 O ASN A 38 13.049 61.275 -27.911 1.00 23.11 A O
333
+ ATOM 333 CB ASN A 38 12.159 64.031 -28.170 1.00 15.03 A C
334
+ ATOM 334 CG ASN A 38 11.539 64.233 -29.541 1.00 13.27 A C
335
+ ATOM 335 ND2 ASN A 38 11.759 65.409 -30.106 1.00 20.00 A N
336
+ ATOM 336 OD1 ASN A 38 10.881 63.345 -30.090 1.00 14.68 A O
337
+ ATOM 337 H ASN A 38 12.755 62.739 -25.703 1.00 0.00 A H
338
+ ATOM 338 HD21 ASN A 38 12.289 66.102 -29.664 1.00 0.00 A H
339
+ ATOM 339 HD22 ASN A 38 11.345 65.573 -30.975 1.00 0.00 A H
340
+ ATOM 340 N PRO A 39 10.893 60.686 -28.029 1.00 19.61 A N
341
+ ATOM 341 CA PRO A 39 9.463 60.934 -27.834 1.00 20.71 A C
342
+ ATOM 342 C PRO A 39 9.023 60.632 -26.403 1.00 17.56 A C
343
+ ATOM 343 O PRO A 39 9.786 60.056 -25.612 1.00 15.92 A O
344
+ ATOM 344 CB PRO A 39 8.835 59.932 -28.790 1.00 24.55 A C
345
+ ATOM 345 CG PRO A 39 9.747 58.741 -28.617 1.00 19.39 A C
346
+ ATOM 346 CD PRO A 39 11.114 59.384 -28.689 1.00 20.94 A C
347
+ ATOM 347 N VAL A 40 7.792 61.022 -26.084 1.00 17.69 A N
348
+ ATOM 348 CA VAL A 40 7.211 60.751 -24.770 1.00 20.37 A C
349
+ ATOM 349 C VAL A 40 7.255 59.234 -24.574 1.00 21.71 A C
350
+ ATOM 350 O VAL A 40 6.793 58.456 -25.421 1.00 24.84 A O
351
+ ATOM 351 CB VAL A 40 5.745 61.274 -24.666 1.00 13.69 A C
352
+ ATOM 352 CG1 VAL A 40 4.933 60.444 -23.697 1.00 21.73 A C
353
+ ATOM 353 CG2 VAL A 40 5.755 62.678 -24.193 1.00 13.64 A C
354
+ ATOM 354 H VAL A 40 7.263 61.507 -26.755 1.00 0.00 A H
355
+ ATOM 355 N GLY A 41 7.833 58.831 -23.452 1.00 25.12 A N
356
+ ATOM 356 CA GLY A 41 7.974 57.423 -23.140 1.00 25.44 A C
357
+ ATOM 357 C GLY A 41 9.425 56.959 -23.121 1.00 23.88 A C
358
+ ATOM 358 O GLY A 41 9.692 55.840 -22.692 1.00 27.74 A O
359
+ ATOM 359 H GLY A 41 8.186 59.492 -22.810 1.00 0.00 A H
360
+ ATOM 360 N SER A 42 10.348 57.782 -23.626 1.00 26.93 A N
361
+ ATOM 361 CA SER A 42 11.775 57.450 -23.641 1.00 27.71 A C
362
+ ATOM 362 C SER A 42 12.519 58.104 -22.456 1.00 28.62 A C
363
+ ATOM 363 O SER A 42 11.940 58.880 -21.683 1.00 28.06 A O
364
+ ATOM 364 CB SER A 42 12.403 57.858 -24.979 1.00 28.05 A C
365
+ ATOM 365 OG SER A 42 12.277 59.247 -25.211 1.00 26.25 A O
366
+ ATOM 366 H SER A 42 10.065 58.650 -23.975 1.00 0.00 A H
367
+ ATOM 367 HG SER A 42 11.405 59.561 -24.962 1.00 0.00 A H
368
+ ATOM 368 N GLU A 43 13.798 57.785 -22.304 1.00 28.59 A N
369
+ ATOM 369 CA GLU A 43 14.583 58.337 -21.210 1.00 30.92 A C
370
+ ATOM 370 C GLU A 43 14.857 59.797 -21.502 1.00 28.51 A C
371
+ ATOM 371 O GLU A 43 15.324 60.152 -22.588 1.00 30.42 A O
372
+ ATOM 372 CB GLU A 43 15.922 57.600 -21.045 1.00 44.55 A C
373
+ ATOM 373 CG GLU A 43 16.019 56.223 -21.722 1.00 62.47 A C
374
+ ATOM 374 CD GLU A 43 16.565 56.284 -23.155 1.00 70.64 A C
375
+ ATOM 375 OE1 GLU A 43 15.895 56.851 -24.052 1.00 75.18 A O
376
+ ATOM 376 OE2 GLU A 43 17.675 55.758 -23.385 1.00 74.27 A O1-
377
+ ATOM 377 H GLU A 43 14.229 57.192 -22.953 1.00 0.00 A H
378
+ ATOM 378 N PRO A 44 14.533 60.675 -20.552 1.00 25.20 A N
379
+ ATOM 379 CA PRO A 44 14.754 62.113 -20.709 1.00 25.10 A C
380
+ ATOM 380 C PRO A 44 16.243 62.473 -20.872 1.00 26.61 A C
381
+ ATOM 381 O PRO A 44 17.105 61.850 -20.267 1.00 29.02 A O
382
+ ATOM 382 CB PRO A 44 14.195 62.679 -19.395 1.00 19.62 A C
383
+ ATOM 383 CG PRO A 44 13.110 61.749 -19.058 1.00 21.92 A C
384
+ ATOM 384 CD PRO A 44 13.750 60.402 -19.335 1.00 18.83 A C
385
+ ATOM 385 N ARG A 45 16.528 63.439 -21.742 1.00 27.43 A N
386
+ ATOM 386 CA ARG A 45 17.875 63.933 -21.964 1.00 25.28 A C
387
+ ATOM 387 C ARG A 45 17.876 65.090 -20.981 1.00 22.66 A C
388
+ ATOM 388 O ARG A 45 16.906 65.850 -20.926 1.00 25.12 A O
389
+ ATOM 389 CB ARG A 45 18.028 64.488 -23.386 1.00 32.63 A C
390
+ ATOM 390 CG ARG A 45 17.525 63.562 -24.488 1.00 42.81 A C
391
+ ATOM 391 CD ARG A 45 18.355 62.295 -24.560 1.00 50.48 A C
392
+ ATOM 392 NE ARG A 45 17.537 61.091 -24.696 1.00 57.00 A N
393
+ ATOM 393 CZ ARG A 45 17.594 60.248 -25.728 1.00 60.21 A C
394
+ ATOM 394 NH1 ARG A 45 18.433 60.483 -26.743 1.00 56.34 A N1+
395
+ ATOM 395 NH2 ARG A 45 16.865 59.127 -25.706 1.00 64.07 A N
396
+ ATOM 396 H ARG A 45 15.796 63.843 -22.236 1.00 0.00 A H
397
+ ATOM 397 HE ARG A 45 16.892 60.897 -23.964 1.00 0.00 A H
398
+ ATOM 398 HH11 ARG A 45 19.032 61.291 -26.729 1.00 0.00 A H
399
+ ATOM 399 HH12 ARG A 45 18.503 59.869 -27.529 1.00 0.00 A H
400
+ ATOM 400 HH21 ARG A 45 16.294 58.876 -24.902 1.00 0.00 A H
401
+ ATOM 401 HH22 ARG A 45 16.850 58.425 -26.417 1.00 0.00 A H
402
+ ATOM 402 N SER A 46 18.932 65.219 -20.187 1.00 21.54 A N
403
+ ATOM 403 CA SER A 46 19.001 66.291 -19.192 1.00 19.49 A C
404
+ ATOM 404 C SER A 46 20.289 67.084 -19.323 1.00 15.15 A C
405
+ ATOM 405 O SER A 46 21.369 66.505 -19.426 1.00 21.08 A O
406
+ ATOM 406 CB SER A 46 18.885 65.686 -17.785 1.00 22.78 A C
407
+ ATOM 407 OG SER A 46 18.817 66.685 -16.770 1.00 30.92 A O
408
+ ATOM 408 H SER A 46 19.691 64.606 -20.248 1.00 0.00 A H
409
+ ATOM 409 HG SER A 46 19.658 67.169 -16.829 1.00 0.00 A H
410
+ ATOM 410 N GLY A 47 20.175 68.403 -19.383 1.00 9.93 A N
411
+ ATOM 411 CA GLY A 47 21.360 69.224 -19.488 1.00 15.35 A C
412
+ ATOM 412 C GLY A 47 21.740 69.608 -20.899 1.00 20.79 A C
413
+ ATOM 413 O GLY A 47 21.493 68.864 -21.845 1.00 22.57 A O
414
+ ATOM 414 H GLY A 47 19.297 68.836 -19.380 1.00 0.00 A H
415
+ ATOM 415 N THR A 48 22.368 70.774 -21.013 1.00 22.30 A N
416
+ ATOM 416 CA THR A 48 22.815 71.356 -22.272 1.00 27.44 A C
417
+ ATOM 417 C THR A 48 23.584 70.425 -23.210 1.00 26.51 A C
418
+ ATOM 418 O THR A 48 23.317 70.391 -24.411 1.00 28.00 A O
419
+ ATOM 419 CB THR A 48 23.674 72.587 -21.977 1.00 29.56 A C
420
+ ATOM 420 CG2 THR A 48 23.921 73.382 -23.235 1.00 36.98 A C
421
+ ATOM 421 OG1 THR A 48 22.983 73.417 -21.035 1.00 38.91 A O
422
+ ATOM 422 H THR A 48 22.518 71.316 -20.206 1.00 0.00 A H
423
+ ATOM 423 HG1 THR A 48 23.493 74.188 -20.725 1.00 0.00 A H
424
+ ATOM 424 N ALA A 49 24.546 69.686 -22.670 1.00 26.42 A N
425
+ ATOM 425 CA ALA A 49 25.350 68.768 -23.472 1.00 25.21 A C
426
+ ATOM 426 C ALA A 49 24.525 67.668 -24.103 1.00 22.40 A C
427
+ ATOM 427 O ALA A 49 24.779 67.304 -25.245 1.00 24.67 A O
428
+ ATOM 428 CB ALA A 49 26.454 68.160 -22.642 1.00 24.80 A C
429
+ ATOM 429 H ALA A 49 24.722 69.753 -21.705 1.00 0.00 A H
430
+ ATOM 430 N ALA A 50 23.552 67.136 -23.361 1.00 18.24 A N
431
+ ATOM 431 CA ALA A 50 22.692 66.059 -23.862 1.00 16.13 A C
432
+ ATOM 432 C ALA A 50 21.644 66.566 -24.832 1.00 15.85 A C
433
+ ATOM 433 O ALA A 50 21.205 65.829 -25.709 1.00 21.53 A O
434
+ ATOM 434 CB ALA A 50 22.018 65.338 -22.721 1.00 18.85 A C
435
+ ATOM 435 H ALA A 50 23.408 67.471 -22.450 1.00 0.00 A H
436
+ ATOM 436 N ILE A 51 21.209 67.807 -24.641 1.00 14.98 A N
437
+ ATOM 437 CA ILE A 51 20.213 68.425 -25.517 1.00 16.38 A C
438
+ ATOM 438 C ILE A 51 20.848 68.716 -26.892 1.00 17.29 A C
439
+ ATOM 439 O ILE A 51 20.271 68.383 -27.932 1.00 17.52 A O
440
+ ATOM 440 CB ILE A 51 19.589 69.701 -24.851 1.00 17.04 A C
441
+ ATOM 441 CG1 ILE A 51 18.731 69.286 -23.637 1.00 18.33 A C
442
+ ATOM 442 CG2 ILE A 51 18.750 70.483 -25.859 1.00 17.15 A C
443
+ ATOM 443 CD1 ILE A 51 18.279 70.417 -22.747 1.00 14.09 A C
444
+ ATOM 444 H ILE A 51 21.558 68.311 -23.875 1.00 0.00 A H
445
+ ATOM 445 N ARG A 52 22.065 69.256 -26.885 1.00 18.63 A N
446
+ ATOM 446 CA ARG A 52 22.802 69.549 -28.119 1.00 19.29 A C
447
+ ATOM 447 C ARG A 52 23.059 68.258 -28.919 1.00 19.10 A C
448
+ ATOM 448 O ARG A 52 22.758 68.180 -30.114 1.00 24.51 A O
449
+ ATOM 449 CB ARG A 52 24.129 70.235 -27.788 1.00 21.72 A C
450
+ ATOM 450 CG ARG A 52 24.843 70.737 -29.003 1.00 24.00 A C
451
+ ATOM 451 CD ARG A 52 26.227 71.218 -28.716 1.00 21.97 A C
452
+ ATOM 452 NE ARG A 52 26.898 71.544 -29.971 1.00 29.26 A N
453
+ ATOM 453 CZ ARG A 52 26.958 72.767 -30.486 1.00 30.33 A C
454
+ ATOM 454 NH1 ARG A 52 26.384 73.771 -29.843 1.00 28.11 A N1+
455
+ ATOM 455 NH2 ARG A 52 27.608 72.995 -31.626 1.00 22.98 A N
456
+ ATOM 456 H ARG A 52 22.480 69.475 -26.023 1.00 0.00 A H
457
+ ATOM 457 HE ARG A 52 27.290 70.783 -30.464 1.00 0.00 A H
458
+ ATOM 458 HH11 ARG A 52 25.877 73.672 -28.996 1.00 0.00 A H
459
+ ATOM 459 HH12 ARG A 52 26.387 74.709 -30.252 1.00 0.00 A H
460
+ ATOM 460 HH21 ARG A 52 28.079 72.275 -32.140 1.00 0.00 A H
461
+ ATOM 461 HH22 ARG A 52 27.637 73.938 -31.999 1.00 0.00 A H
462
+ ATOM 462 N GLU A 53 23.544 67.223 -28.242 1.00 19.57 A N
463
+ ATOM 463 CA GLU A 53 23.818 65.938 -28.880 1.00 22.65 A C
464
+ ATOM 464 C GLU A 53 22.549 65.369 -29.537 1.00 21.36 A C
465
+ ATOM 465 O GLU A 53 22.580 64.901 -30.692 1.00 20.72 A O
466
+ ATOM 466 CB GLU A 53 24.465 64.968 -27.854 1.00 29.62 A C
467
+ ATOM 467 CG GLU A 53 23.751 63.618 -27.551 1.00 43.16 A C
468
+ ATOM 468 CD GLU A 53 24.079 62.493 -28.534 1.00 48.92 A C
469
+ ATOM 469 OE1 GLU A 53 25.097 62.583 -29.251 1.00 49.44 A O
470
+ ATOM 470 OE2 GLU A 53 23.311 61.508 -28.602 1.00 48.51 A O1-
471
+ ATOM 471 H GLU A 53 23.760 67.315 -27.289 1.00 0.00 A H
472
+ ATOM 472 N PHE A 54 21.426 65.473 -28.831 1.00 15.74 A N
473
+ ATOM 473 CA PHE A 54 20.163 64.981 -29.346 1.00 14.67 A C
474
+ ATOM 474 C PHE A 54 19.739 65.757 -30.596 1.00 14.61 A C
475
+ ATOM 475 O PHE A 54 19.359 65.181 -31.611 1.00 16.88 A O
476
+ ATOM 476 CB PHE A 54 19.076 65.093 -28.284 1.00 16.51 A C
477
+ ATOM 477 CG PHE A 54 17.700 64.832 -28.818 1.00 15.40 A C
478
+ ATOM 478 CD1 PHE A 54 17.260 63.526 -29.028 1.00 19.05 A C
479
+ ATOM 479 CD2 PHE A 54 16.869 65.889 -29.176 1.00 14.87 A C
480
+ ATOM 480 CE1 PHE A 54 16.018 63.272 -29.591 1.00 20.66 A C
481
+ ATOM 481 CE2 PHE A 54 15.625 65.655 -29.741 1.00 22.45 A C
482
+ ATOM 482 CZ PHE A 54 15.196 64.334 -29.952 1.00 21.58 A C
483
+ ATOM 483 H PHE A 54 21.466 65.860 -27.931 1.00 0.00 A H
484
+ ATOM 484 N TYR A 55 19.812 67.073 -30.538 1.00 15.57 A N
485
+ ATOM 485 CA TYR A 55 19.419 67.835 -31.699 1.00 15.97 A C
486
+ ATOM 486 C TYR A 55 20.386 67.726 -32.851 1.00 15.54 A C
487
+ ATOM 487 O TYR A 55 19.967 67.836 -34.002 1.00 17.97 A O
488
+ ATOM 488 CB TYR A 55 19.126 69.271 -31.335 1.00 16.17 A C
489
+ ATOM 489 CG TYR A 55 17.743 69.413 -30.737 1.00 19.24 A C
490
+ ATOM 490 CD1 TYR A 55 16.595 69.253 -31.523 1.00 16.59 A C
491
+ ATOM 491 CD2 TYR A 55 17.580 69.717 -29.384 1.00 21.43 A C
492
+ ATOM 492 CE1 TYR A 55 15.323 69.397 -30.969 1.00 17.69 A C
493
+ ATOM 493 CE2 TYR A 55 16.319 69.862 -28.830 1.00 24.02 A C
494
+ ATOM 494 CZ TYR A 55 15.200 69.702 -29.623 1.00 18.58 A C
495
+ ATOM 495 OH TYR A 55 13.971 69.854 -29.031 1.00 16.62 A O
496
+ ATOM 496 H TYR A 55 20.122 67.516 -29.720 1.00 0.00 A H
497
+ ATOM 497 HH TYR A 55 13.258 69.756 -29.685 1.00 0.00 A H
498
+ ATOM 498 N ALA A 56 21.664 67.470 -32.552 1.00 13.11 A N
499
+ ATOM 499 CA ALA A 56 22.676 67.316 -33.597 1.00 13.94 A C
500
+ ATOM 500 C ALA A 56 22.334 66.072 -34.401 1.00 14.20 A C
501
+ ATOM 501 O ALA A 56 22.427 66.070 -35.624 1.00 19.71 A O
502
+ ATOM 502 CB ALA A 56 24.075 67.195 -32.993 1.00 17.34 A C
503
+ ATOM 503 H ALA A 56 21.936 67.397 -31.617 1.00 0.00 A H
504
+ ATOM 504 N ASN A 57 21.911 65.022 -33.706 1.00 18.33 A N
505
+ ATOM 505 CA ASN A 57 21.521 63.772 -34.353 1.00 18.10 A C
506
+ ATOM 506 C ASN A 57 20.281 63.960 -35.190 1.00 22.69 A C
507
+ ATOM 507 O ASN A 57 20.120 63.306 -36.214 1.00 25.44 A O
508
+ ATOM 508 CB ASN A 57 21.177 62.712 -33.319 1.00 21.21 A C
509
+ ATOM 509 CG ASN A 57 22.358 61.920 -32.885 1.00 29.38 A C
510
+ ATOM 510 ND2 ASN A 57 22.662 61.977 -31.588 1.00 38.74 A N
511
+ ATOM 511 OD1 ASN A 57 23.002 61.241 -33.697 1.00 37.38 A O
512
+ ATOM 512 H ASN A 57 21.873 65.098 -32.727 1.00 0.00 A H
513
+ ATOM 513 HD21 ASN A 57 22.139 62.582 -31.013 1.00 0.00 A H
514
+ ATOM 514 HD22 ASN A 57 23.344 61.421 -31.155 1.00 0.00 A H
515
+ ATOM 515 N SER A 58 19.359 64.777 -34.691 1.00 20.80 A N
516
+ ATOM 516 CA SER A 58 18.097 65.041 -35.371 1.00 17.38 A C
517
+ ATOM 517 C SER A 58 18.311 65.791 -36.668 1.00 19.47 A C
518
+ ATOM 518 O SER A 58 17.671 65.508 -37.674 1.00 21.88 A O
519
+ ATOM 519 CB SER A 58 17.151 65.827 -34.449 1.00 17.00 A C
520
+ ATOM 520 OG SER A 58 16.738 65.021 -33.356 1.00 20.95 A O
521
+ ATOM 521 H SER A 58 19.507 65.199 -33.818 1.00 0.00 A H
522
+ ATOM 522 HG SER A 58 16.358 64.202 -33.683 1.00 0.00 A H
523
+ ATOM 523 N LEU A 59 19.245 66.726 -36.652 1.00 18.71 A N
524
+ ATOM 524 CA LEU A 59 19.536 67.509 -37.835 1.00 16.24 A C
525
+ ATOM 525 C LEU A 59 20.444 66.809 -38.853 1.00 13.82 A C
526
+ ATOM 526 O LEU A 59 20.937 67.438 -39.788 1.00 14.80 A O
527
+ ATOM 527 CB LEU A 59 20.108 68.852 -37.425 1.00 14.05 A C
528
+ ATOM 528 CG LEU A 59 19.145 69.698 -36.596 1.00 12.88 A C
529
+ ATOM 529 CD1 LEU A 59 19.927 70.882 -36.106 1.00 17.02 A C
530
+ ATOM 530 CD2 LEU A 59 17.959 70.167 -37.409 1.00 11.78 A C
531
+ ATOM 531 H LEU A 59 19.753 66.901 -35.830 1.00 0.00 A H
532
+ ATOM 532 N LYS A 60 20.658 65.514 -38.657 1.00 12.12 A N
533
+ ATOM 533 CA LYS A 60 21.456 64.686 -39.551 1.00 13.05 A C
534
+ ATOM 534 C LYS A 60 20.789 64.780 -40.927 1.00 16.30 A C
535
+ ATOM 535 O LYS A 60 21.464 64.851 -41.956 1.00 17.91 A O
536
+ ATOM 536 CB LYS A 60 21.391 63.235 -39.078 1.00 17.73 A C
537
+ ATOM 537 CG LYS A 60 22.692 62.486 -39.059 1.00 23.84 A C
538
+ ATOM 538 CD LYS A 60 23.522 62.797 -37.839 1.00 27.57 A C
539
+ ATOM 539 CE LYS A 60 23.596 61.594 -36.905 1.00 21.21 A C
540
+ ATOM 540 NZ LYS A 60 24.341 60.433 -37.453 1.00 23.59 A N1+
541
+ ATOM 541 H LYS A 60 20.266 65.084 -37.873 1.00 0.00 A H
542
+ ATOM 542 HZ1 LYS A 60 23.931 60.114 -38.358 1.00 0.00 A H
543
+ ATOM 543 HZ2 LYS A 60 25.314 60.730 -37.672 1.00 0.00 A H
544
+ ATOM 544 HZ3 LYS A 60 24.354 59.650 -36.780 1.00 0.00 A H
545
+ ATOM 545 N LEU A 61 19.455 64.758 -40.925 1.00 19.54 A N
546
+ ATOM 546 CA LEU A 61 18.634 64.842 -42.131 1.00 19.62 A C
547
+ ATOM 547 C LEU A 61 18.053 66.240 -42.191 1.00 22.00 A C
548
+ ATOM 548 O LEU A 61 17.749 66.824 -41.156 1.00 23.73 A O
549
+ ATOM 549 CB LEU A 61 17.455 63.864 -42.056 1.00 21.85 A C
550
+ ATOM 550 CG LEU A 61 17.726 62.376 -41.884 1.00 26.24 A C
551
+ ATOM 551 CD1 LEU A 61 16.446 61.604 -42.034 1.00 22.46 A C
552
+ ATOM 552 CD2 LEU A 61 18.709 61.932 -42.935 1.00 32.42 A C
553
+ ATOM 553 H LEU A 61 18.984 64.713 -40.066 1.00 0.00 A H
554
+ ATOM 554 N PRO A 62 17.943 66.824 -43.396 1.00 22.42 A N
555
+ ATOM 555 CA PRO A 62 17.375 68.170 -43.485 1.00 22.72 A C
556
+ ATOM 556 C PRO A 62 15.894 68.092 -43.131 1.00 23.49 A C
557
+ ATOM 557 O PRO A 62 15.174 67.226 -43.638 1.00 22.02 A O
558
+ ATOM 558 CB PRO A 62 17.614 68.543 -44.945 1.00 26.38 A C
559
+ ATOM 559 CG PRO A 62 17.666 67.216 -45.647 1.00 24.89 A C
560
+ ATOM 560 CD PRO A 62 18.425 66.358 -44.707 1.00 21.44 A C
561
+ ATOM 561 N LEU A 63 15.442 68.975 -42.250 1.00 22.90 A N
562
+ ATOM 562 CA LEU A 63 14.058 68.935 -41.817 1.00 16.78 A C
563
+ ATOM 563 C LEU A 63 13.300 70.164 -42.231 1.00 17.38 A C
564
+ ATOM 564 O LEU A 63 13.857 71.258 -42.293 1.00 18.87 A O
565
+ ATOM 565 CB LEU A 63 13.998 68.836 -40.297 1.00 11.97 A C
566
+ ATOM 566 CG LEU A 63 14.823 67.776 -39.574 1.00 10.10 A C
567
+ ATOM 567 CD1 LEU A 63 14.699 68.081 -38.117 1.00 9.09 A C
568
+ ATOM 568 CD2 LEU A 63 14.360 66.334 -39.876 1.00 9.67 A C
569
+ ATOM 569 H LEU A 63 16.025 69.675 -41.895 1.00 0.00 A H
570
+ ATOM 570 N ALA A 64 12.027 69.964 -42.544 1.00 19.85 A N
571
+ ATOM 571 CA ALA A 64 11.115 71.045 -42.911 1.00 18.67 A C
572
+ ATOM 572 C ALA A 64 10.016 70.917 -41.862 1.00 14.97 A C
573
+ ATOM 573 O ALA A 64 9.274 69.926 -41.837 1.00 15.53 A O
574
+ ATOM 574 CB ALA A 64 10.548 70.830 -44.307 1.00 18.29 A C
575
+ ATOM 575 H ALA A 64 11.673 69.048 -42.532 1.00 0.00 A H
576
+ ATOM 576 N VAL A 65 9.992 71.857 -40.928 1.00 18.71 A N
577
+ ATOM 577 CA VAL A 65 9.012 71.827 -39.863 1.00 18.12 A C
578
+ ATOM 578 C VAL A 65 8.152 73.064 -39.940 1.00 14.97 A C
579
+ ATOM 579 O VAL A 65 8.641 74.129 -40.261 1.00 18.06 A O
580
+ ATOM 580 CB VAL A 65 9.699 71.646 -38.470 1.00 17.54 A C
581
+ ATOM 581 CG1 VAL A 65 11.135 72.002 -38.542 1.00 18.32 A C
582
+ ATOM 582 CG2 VAL A 65 9.028 72.462 -37.429 1.00 17.88 A C
583
+ ATOM 583 H VAL A 65 10.612 72.612 -40.980 1.00 0.00 A H
584
+ ATOM 584 N GLU A 66 6.855 72.918 -39.696 1.00 16.34 A N
585
+ ATOM 585 CA GLU A 66 5.952 74.061 -39.752 1.00 19.15 A C
586
+ ATOM 586 C GLU A 66 4.853 73.956 -38.722 1.00 15.51 A C
587
+ ATOM 587 O GLU A 66 4.336 72.874 -38.480 1.00 17.34 A O
588
+ ATOM 588 CB GLU A 66 5.283 74.137 -41.128 1.00 26.88 A C
589
+ ATOM 589 CG GLU A 66 4.292 73.007 -41.373 1.00 38.82 A C
590
+ ATOM 590 CD GLU A 66 3.409 73.196 -42.604 1.00 49.05 A C
591
+ ATOM 591 OE1 GLU A 66 3.507 74.262 -43.261 1.00 55.93 A O
592
+ ATOM 592 OE2 GLU A 66 2.612 72.266 -42.909 1.00 52.01 A O1-
593
+ ATOM 593 H GLU A 66 6.490 72.032 -39.486 1.00 0.00 A H
594
+ ATOM 594 N LEU A 67 4.489 75.078 -38.118 1.00 14.88 A N
595
+ ATOM 595 CA LEU A 67 3.388 75.087 -37.152 1.00 16.04 A C
596
+ ATOM 596 C LEU A 67 2.134 74.870 -37.982 1.00 13.87 A C
597
+ ATOM 597 O LEU A 67 2.047 75.397 -39.083 1.00 23.45 A O
598
+ ATOM 598 CB LEU A 67 3.296 76.443 -36.451 1.00 13.88 A C
599
+ ATOM 599 CG LEU A 67 4.275 76.657 -35.301 1.00 20.67 A C
600
+ ATOM 600 CD1 LEU A 67 4.330 78.143 -34.900 1.00 25.36 A C
601
+ ATOM 601 CD2 LEU A 67 3.868 75.767 -34.142 1.00 23.30 A C
602
+ ATOM 602 H LEU A 67 4.930 75.912 -38.366 1.00 0.00 A H
603
+ ATOM 603 N THR A 68 1.176 74.089 -37.495 1.00 15.39 A N
604
+ ATOM 604 CA THR A 68 -0.032 73.866 -38.280 1.00 13.51 A C
605
+ ATOM 605 C THR A 68 -1.275 74.480 -37.689 1.00 13.48 A C
606
+ ATOM 606 O THR A 68 -2.317 74.459 -38.310 1.00 14.79 A O
607
+ ATOM 607 CB THR A 68 -0.297 72.392 -38.512 1.00 9.45 A C
608
+ ATOM 608 CG2 THR A 68 0.755 71.809 -39.466 1.00 10.27 A C
609
+ ATOM 609 OG1 THR A 68 -0.247 71.711 -37.257 1.00 15.66 A O
610
+ ATOM 610 H THR A 68 1.276 73.696 -36.603 1.00 0.00 A H
611
+ ATOM 611 HG1 THR A 68 0.629 71.871 -36.880 1.00 0.00 A H
612
+ ATOM 612 N GLN A 69 -1.158 75.032 -36.489 1.00 12.31 A N
613
+ ATOM 613 CA GLN A 69 -2.273 75.656 -35.783 1.00 11.58 A C
614
+ ATOM 614 C GLN A 69 -1.700 76.685 -34.837 1.00 15.38 A C
615
+ ATOM 615 O GLN A 69 -0.498 76.728 -34.605 1.00 15.03 A O
616
+ ATOM 616 CB GLN A 69 -3.032 74.625 -34.946 1.00 9.10 A C
617
+ ATOM 617 CG GLN A 69 -4.205 73.982 -35.640 1.00 8.88 A C
618
+ ATOM 618 CD GLN A 69 -4.936 73.018 -34.760 1.00 14.81 A C
619
+ ATOM 619 NE2 GLN A 69 -5.731 72.163 -35.371 1.00 14.48 A N
620
+ ATOM 620 OE1 GLN A 69 -4.797 73.041 -33.527 1.00 24.74 A O
621
+ ATOM 621 H GLN A 69 -0.290 75.078 -36.036 1.00 0.00 A H
622
+ ATOM 622 HE21 GLN A 69 -5.800 72.199 -36.347 1.00 0.00 A H
623
+ ATOM 623 HE22 GLN A 69 -6.234 71.521 -34.825 1.00 0.00 A H
624
+ ATOM 624 N GLU A 70 -2.575 77.494 -34.267 1.00 17.67 A N
625
+ ATOM 625 CA GLU A 70 -2.175 78.510 -33.316 1.00 15.37 A C
626
+ ATOM 626 C GLU A 70 -1.456 77.860 -32.126 1.00 12.95 A C
627
+ ATOM 627 O GLU A 70 -1.678 76.684 -31.812 1.00 15.83 A O
628
+ ATOM 628 CB GLU A 70 -3.433 79.251 -32.846 1.00 15.22 A C
629
+ ATOM 629 CG GLU A 70 -4.542 78.332 -32.266 1.00 13.80 A C
630
+ ATOM 630 CD GLU A 70 -5.831 79.077 -31.979 1.00 11.85 A C
631
+ ATOM 631 OE1 GLU A 70 -5.932 79.770 -30.956 1.00 19.78 A O
632
+ ATOM 632 OE2 GLU A 70 -6.766 78.964 -32.783 1.00 15.86 A O1-
633
+ ATOM 633 H GLU A 70 -3.520 77.395 -34.476 1.00 0.00 A H
634
+ ATOM 634 N VAL A 71 -0.545 78.599 -31.514 1.00 13.61 A N
635
+ ATOM 635 CA VAL A 71 0.179 78.113 -30.348 1.00 12.54 A C
636
+ ATOM 636 C VAL A 71 -0.741 78.284 -29.115 1.00 13.85 A C
637
+ ATOM 637 O VAL A 71 -1.479 79.275 -29.003 1.00 11.75 A O
638
+ ATOM 638 CB VAL A 71 1.510 78.921 -30.124 1.00 10.04 A C
639
+ ATOM 639 CG1 VAL A 71 2.231 78.466 -28.840 1.00 9.76 A C
640
+ ATOM 640 CG2 VAL A 71 2.448 78.743 -31.301 1.00 6.51 A C
641
+ ATOM 641 H VAL A 71 -0.376 79.504 -31.849 1.00 0.00 A H
642
+ ATOM 642 N ARG A 72 -0.777 77.282 -28.245 1.00 11.83 A N
643
+ ATOM 643 CA ARG A 72 -1.580 77.367 -27.037 1.00 11.86 A C
644
+ ATOM 644 C ARG A 72 -0.613 77.779 -25.955 1.00 13.28 A C
645
+ ATOM 645 O ARG A 72 0.319 77.054 -25.665 1.00 20.35 A O
646
+ ATOM 646 CB ARG A 72 -2.187 76.014 -26.706 1.00 6.01 A C
647
+ ATOM 647 CG ARG A 72 -3.535 75.777 -27.333 1.00 9.72 A C
648
+ ATOM 648 CD ARG A 72 -3.509 75.793 -28.857 1.00 6.03 A C
649
+ ATOM 649 NE ARG A 72 -4.710 75.143 -29.389 1.00 7.77 A N
650
+ ATOM 650 CZ ARG A 72 -4.841 74.651 -30.617 1.00 6.68 A C
651
+ ATOM 651 NH1 ARG A 72 -3.855 74.736 -31.503 1.00 12.81 A N1+
652
+ ATOM 652 NH2 ARG A 72 -5.928 73.971 -30.929 1.00 6.00 A N
653
+ ATOM 653 H ARG A 72 -0.231 76.481 -28.394 1.00 0.00 A H
654
+ ATOM 654 HE ARG A 72 -5.477 75.136 -28.778 1.00 0.00 A H
655
+ ATOM 655 HH11 ARG A 72 -2.995 75.220 -31.296 1.00 0.00 A H
656
+ ATOM 656 HH12 ARG A 72 -3.972 74.350 -32.426 1.00 0.00 A H
657
+ ATOM 657 HH21 ARG A 72 -6.650 73.797 -30.256 1.00 0.00 A H
658
+ ATOM 658 HH22 ARG A 72 -6.066 73.585 -31.852 1.00 0.00 A H
659
+ ATOM 659 N ALA A 73 -0.803 78.947 -25.375 1.00 11.78 A N
660
+ ATOM 660 CA ALA A 73 0.105 79.402 -24.342 1.00 14.51 A C
661
+ ATOM 661 C ALA A 73 -0.669 79.861 -23.090 1.00 18.31 A C
662
+ ATOM 662 O ALA A 73 -1.619 80.658 -23.182 1.00 23.63 A O
663
+ ATOM 663 CB ALA A 73 0.982 80.523 -24.896 1.00 8.71 A C
664
+ ATOM 664 H ALA A 73 -1.566 79.514 -25.615 1.00 0.00 A H
665
+ ATOM 665 N VAL A 74 -0.263 79.356 -21.924 1.00 18.62 A N
666
+ ATOM 666 CA VAL A 74 -0.920 79.698 -20.666 1.00 18.32 A C
667
+ ATOM 667 C VAL A 74 -0.087 79.243 -19.471 1.00 20.08 A C
668
+ ATOM 668 O VAL A 74 0.538 78.184 -19.518 1.00 22.78 A O
669
+ ATOM 669 CB VAL A 74 -2.351 79.043 -20.571 1.00 12.55 A C
670
+ ATOM 670 CG1 VAL A 74 -2.258 77.535 -20.280 1.00 8.98 A C
671
+ ATOM 671 CG2 VAL A 74 -3.185 79.729 -19.505 1.00 10.59 A C
672
+ ATOM 672 H VAL A 74 0.495 78.730 -21.882 1.00 0.00 A H
673
+ ATOM 673 N ALA A 75 -0.092 80.054 -18.411 1.00 22.66 A N
674
+ ATOM 674 CA ALA A 75 0.615 79.784 -17.147 1.00 22.52 A C
675
+ ATOM 675 C ALA A 75 1.994 79.147 -17.259 1.00 22.63 A C
676
+ ATOM 676 O ALA A 75 2.236 78.075 -16.699 1.00 33.25 A O
677
+ ATOM 677 CB ALA A 75 -0.264 78.951 -16.220 1.00 21.28 A C
678
+ ATOM 678 H ALA A 75 -0.602 80.881 -18.484 1.00 0.00 A H
679
+ ATOM 679 N ASN A 76 2.903 79.823 -17.952 1.00 20.58 A N
680
+ ATOM 680 CA ASN A 76 4.268 79.325 -18.133 1.00 22.59 A C
681
+ ATOM 681 C ASN A 76 4.377 78.008 -18.900 1.00 20.82 A C
682
+ ATOM 682 O ASN A 76 5.376 77.305 -18.773 1.00 21.38 A O
683
+ ATOM 683 CB ASN A 76 4.978 79.168 -16.784 1.00 22.74 A C
684
+ ATOM 684 CG ASN A 76 4.982 80.450 -15.981 1.00 27.62 A C
685
+ ATOM 685 ND2 ASN A 76 4.488 80.373 -14.765 1.00 27.46 A N
686
+ ATOM 686 OD1 ASN A 76 5.386 81.504 -16.462 1.00 32.04 A O
687
+ ATOM 687 H ASN A 76 2.639 80.681 -18.349 1.00 0.00 A H
688
+ ATOM 688 HD21 ASN A 76 4.146 79.496 -14.486 1.00 0.00 A H
689
+ ATOM 689 HD22 ASN A 76 4.489 81.171 -14.201 1.00 0.00 A H
690
+ ATOM 690 N GLU A 77 3.355 77.661 -19.671 1.00 15.14 A N
691
+ ATOM 691 CA GLU A 77 3.375 76.442 -20.455 1.00 11.27 A C
692
+ ATOM 692 C GLU A 77 2.881 76.771 -21.852 1.00 12.76 A C
693
+ ATOM 693 O GLU A 77 2.173 77.770 -22.038 1.00 11.79 A O
694
+ ATOM 694 CB GLU A 77 2.519 75.364 -19.797 1.00 6.01 A C
695
+ ATOM 695 CG GLU A 77 3.034 74.986 -18.430 1.00 11.87 A C
696
+ ATOM 696 CD GLU A 77 2.296 73.836 -17.819 1.00 12.81 A C
697
+ ATOM 697 OE1 GLU A 77 2.053 72.814 -18.511 1.00 17.80 A O
698
+ ATOM 698 OE2 GLU A 77 1.971 73.918 -16.625 1.00 23.63 A O1-
699
+ ATOM 699 H GLU A 77 2.539 78.194 -19.717 1.00 0.00 A H
700
+ ATOM 700 N ALA A 78 3.348 76.002 -22.838 1.00 7.70 A N
701
+ ATOM 701 CA ALA A 78 2.955 76.190 -24.235 1.00 6.02 A C
702
+ ATOM 702 C ALA A 78 2.866 74.833 -24.881 1.00 7.74 A C
703
+ ATOM 703 O ALA A 78 3.611 73.927 -24.526 1.00 10.37 A O
704
+ ATOM 704 CB ALA A 78 3.958 77.040 -24.964 1.00 6.00 A C
705
+ ATOM 705 H ALA A 78 3.971 75.276 -22.627 1.00 0.00 A H
706
+ ATOM 706 N ALA A 79 1.893 74.651 -25.762 1.00 11.83 A N
707
+ ATOM 707 CA ALA A 79 1.727 73.386 -26.475 1.00 10.41 A C
708
+ ATOM 708 C ALA A 79 1.440 73.791 -27.912 1.00 14.15 A C
709
+ ATOM 709 O ALA A 79 0.759 74.788 -28.157 1.00 13.66 A O
710
+ ATOM 710 CB ALA A 79 0.569 72.601 -25.901 1.00 6.06 A C
711
+ ATOM 711 H ALA A 79 1.275 75.385 -25.969 1.00 0.00 A H
712
+ ATOM 712 N PHE A 80 1.972 73.048 -28.872 1.00 11.10 A N
713
+ ATOM 713 CA PHE A 80 1.749 73.380 -30.271 1.00 9.14 A C
714
+ ATOM 714 C PHE A 80 1.715 72.121 -31.120 1.00 9.38 A C
715
+ ATOM 715 O PHE A 80 2.266 71.090 -30.733 1.00 10.43 A O
716
+ ATOM 716 CB PHE A 80 2.826 74.360 -30.796 1.00 8.83 A C
717
+ ATOM 717 CG PHE A 80 4.252 73.914 -30.536 1.00 11.32 A C
718
+ ATOM 718 CD1 PHE A 80 4.785 73.926 -29.238 1.00 8.20 A C
719
+ ATOM 719 CD2 PHE A 80 5.069 73.502 -31.575 1.00 13.44 A C
720
+ ATOM 720 CE1 PHE A 80 6.102 73.534 -28.993 1.00 9.03 A C
721
+ ATOM 721 CE2 PHE A 80 6.389 73.114 -31.334 1.00 12.24 A C
722
+ ATOM 722 CZ PHE A 80 6.905 73.129 -30.041 1.00 11.05 A C
723
+ ATOM 723 H PHE A 80 2.509 72.255 -28.638 1.00 0.00 A H
724
+ ATOM 724 N ALA A 81 1.016 72.214 -32.251 1.00 10.99 A N
725
+ ATOM 725 CA ALA A 81 0.884 71.124 -33.206 1.00 8.80 A C
726
+ ATOM 726 C ALA A 81 1.687 71.571 -34.428 1.00 7.95 A C
727
+ ATOM 727 O ALA A 81 1.658 72.740 -34.816 1.00 7.94 A O
728
+ ATOM 728 CB ALA A 81 -0.578 70.908 -33.551 1.00 10.70 A C
729
+ ATOM 729 H ALA A 81 0.592 73.066 -32.479 1.00 0.00 A H
730
+ ATOM 730 N PHE A 82 2.454 70.658 -34.999 1.00 10.87 A N
731
+ ATOM 731 CA PHE A 82 3.289 70.998 -36.139 1.00 10.92 A C
732
+ ATOM 732 C PHE A 82 3.673 69.736 -36.902 1.00 9.53 A C
733
+ ATOM 733 O PHE A 82 3.261 68.641 -36.537 1.00 15.36 A O
734
+ ATOM 734 CB PHE A 82 4.553 71.730 -35.642 1.00 9.06 A C
735
+ ATOM 735 CG PHE A 82 5.463 70.889 -34.772 1.00 16.63 A C
736
+ ATOM 736 CD1 PHE A 82 5.068 70.480 -33.495 1.00 13.23 A C
737
+ ATOM 737 CD2 PHE A 82 6.724 70.506 -35.225 1.00 13.14 A C
738
+ ATOM 738 CE1 PHE A 82 5.917 69.703 -32.700 1.00 9.87 A C
739
+ ATOM 739 CE2 PHE A 82 7.587 69.724 -34.427 1.00 10.48 A C
740
+ ATOM 740 CZ PHE A 82 7.184 69.326 -33.175 1.00 9.88 A C
741
+ ATOM 741 H PHE A 82 2.475 69.730 -34.669 1.00 0.00 A H
742
+ ATOM 742 N ILE A 83 4.364 69.879 -38.017 1.00 10.92 A N
743
+ ATOM 743 CA ILE A 83 4.817 68.690 -38.730 1.00 10.71 A C
744
+ ATOM 744 C ILE A 83 6.314 68.819 -38.947 1.00 13.47 A C
745
+ ATOM 745 O ILE A 83 6.840 69.938 -38.968 1.00 13.07 A O
746
+ ATOM 746 CB ILE A 83 4.110 68.484 -40.105 1.00 12.41 A C
747
+ ATOM 747 CG1 ILE A 83 4.151 69.749 -40.958 1.00 15.47 A C
748
+ ATOM 748 CG2 ILE A 83 2.702 67.968 -39.913 1.00 19.29 A C
749
+ ATOM 749 CD1 ILE A 83 5.425 69.913 -41.777 1.00 32.58 A C
750
+ ATOM 750 H ILE A 83 4.593 70.768 -38.367 1.00 0.00 A H
751
+ ATOM 751 N VAL A 84 6.995 67.678 -39.010 1.00 16.93 A N
752
+ ATOM 752 CA VAL A 84 8.429 67.609 -39.284 1.00 15.26 A C
753
+ ATOM 753 C VAL A 84 8.482 66.681 -40.487 1.00 12.30 A C
754
+ ATOM 754 O VAL A 84 8.094 65.516 -40.389 1.00 10.20 A O
755
+ ATOM 755 CB VAL A 84 9.225 66.946 -38.156 1.00 20.95 A C
756
+ ATOM 756 CG1 VAL A 84 10.699 66.907 -38.516 1.00 22.12 A C
757
+ ATOM 757 CG2 VAL A 84 9.028 67.695 -36.859 1.00 22.87 A C
758
+ ATOM 758 H VAL A 84 6.512 66.832 -38.875 1.00 0.00 A H
759
+ ATOM 759 N SER A 85 8.877 67.216 -41.637 1.00 17.81 A N
760
+ ATOM 760 CA SER A 85 8.956 66.428 -42.865 1.00 18.91 A C
761
+ ATOM 761 C SER A 85 10.412 66.233 -43.220 1.00 21.30 A C
762
+ ATOM 762 O SER A 85 11.228 67.122 -43.003 1.00 21.25 A O
763
+ ATOM 763 CB SER A 85 8.298 67.176 -44.023 1.00 17.72 A C
764
+ ATOM 764 OG SER A 85 7.306 68.083 -43.570 1.00 36.30 A O
765
+ ATOM 765 H SER A 85 9.138 68.145 -41.668 1.00 0.00 A H
766
+ ATOM 766 HG SER A 85 7.749 68.805 -43.105 1.00 0.00 A H
767
+ ATOM 767 N PHE A 86 10.722 65.086 -43.807 1.00 24.08 A N
768
+ ATOM 768 CA PHE A 86 12.080 64.760 -44.234 1.00 25.09 A C
769
+ ATOM 769 C PHE A 86 11.997 63.587 -45.209 1.00 29.80 A C
770
+ ATOM 770 O PHE A 86 10.937 62.971 -45.368 1.00 31.28 A O
771
+ ATOM 771 CB PHE A 86 12.968 64.396 -43.028 1.00 25.83 A C
772
+ ATOM 772 CG PHE A 86 12.464 63.227 -42.234 1.00 23.95 A C
773
+ ATOM 773 CD1 PHE A 86 11.511 63.411 -41.228 1.00 25.22 A C
774
+ ATOM 774 CD2 PHE A 86 12.887 61.940 -42.528 1.00 25.79 A C
775
+ ATOM 775 CE1 PHE A 86 10.975 62.322 -40.527 1.00 28.59 A C
776
+ ATOM 776 CE2 PHE A 86 12.362 60.842 -41.837 1.00 25.89 A C
777
+ ATOM 777 CZ PHE A 86 11.397 61.036 -40.832 1.00 26.91 A C
778
+ ATOM 778 H PHE A 86 10.021 64.413 -43.964 1.00 0.00 A H
779
+ ATOM 779 N GLU A 87 13.100 63.296 -45.885 1.00 37.39 A N
780
+ ATOM 780 CA GLU A 87 13.135 62.186 -46.819 1.00 43.62 A C
781
+ ATOM 781 C GLU A 87 14.269 61.285 -46.409 1.00 45.60 A C
782
+ ATOM 782 O GLU A 87 15.385 61.763 -46.198 1.00 45.45 A O
783
+ ATOM 783 CB GLU A 87 13.404 62.673 -48.239 1.00 49.85 A C
784
+ ATOM 784 CG GLU A 87 12.398 63.669 -48.769 1.00 60.90 A C
785
+ ATOM 785 CD GLU A 87 12.611 63.979 -50.240 1.00 64.35 A C
786
+ ATOM 786 OE1 GLU A 87 12.669 63.021 -51.050 1.00 68.83 A O
787
+ ATOM 787 OE2 GLU A 87 12.721 65.180 -50.583 1.00 65.74 A O1-
788
+ ATOM 788 H GLU A 87 13.916 63.827 -45.774 1.00 0.00 A H
789
+ ATOM 789 N TYR A 88 13.985 60.008 -46.191 1.00 51.49 A N
790
+ ATOM 790 CA TYR A 88 15.065 59.097 -45.859 1.00 60.92 A C
791
+ ATOM 791 C TYR A 88 15.253 58.240 -47.088 1.00 66.96 A C
792
+ ATOM 792 O TYR A 88 14.530 57.261 -47.297 1.00 68.37 A O
793
+ ATOM 793 CB TYR A 88 14.781 58.224 -44.639 1.00 62.44 A C
794
+ ATOM 794 CG TYR A 88 15.987 57.378 -44.266 1.00 68.24 A C
795
+ ATOM 795 CD1 TYR A 88 17.243 57.967 -44.093 1.00 67.81 A C
796
+ ATOM 796 CD2 TYR A 88 15.891 55.990 -44.133 1.00 71.27 A C
797
+ ATOM 797 CE1 TYR A 88 18.372 57.205 -43.803 1.00 70.22 A C
798
+ ATOM 798 CE2 TYR A 88 17.023 55.212 -43.841 1.00 71.36 A C
799
+ ATOM 799 CZ TYR A 88 18.259 55.832 -43.678 1.00 71.55 A C
800
+ ATOM 800 OH TYR A 88 19.386 55.092 -43.397 1.00 73.19 A O
801
+ ATOM 801 H TYR A 88 13.069 59.692 -46.317 1.00 0.00 A H
802
+ ATOM 802 HH TYR A 88 20.138 55.683 -43.299 1.00 0.00 A H
803
+ ATOM 803 N GLN A 89 16.189 58.671 -47.926 1.00 71.77 A N
804
+ ATOM 804 CA GLN A 89 16.519 57.997 -49.175 1.00 75.65 A C
805
+ ATOM 805 C GLN A 89 15.393 58.122 -50.206 1.00 75.76 A C
806
+ ATOM 806 O GLN A 89 14.710 57.141 -50.523 1.00 77.08 A O
807
+ ATOM 807 CB GLN A 89 16.888 56.524 -48.921 1.00 78.19 A C
808
+ ATOM 808 CG GLN A 89 17.898 56.313 -47.781 1.00 84.01 A C
809
+ ATOM 809 CD GLN A 89 18.912 57.452 -47.658 1.00 88.89 A C
810
+ ATOM 810 NE2 GLN A 89 20.064 57.298 -48.302 1.00 91.91 A N
811
+ ATOM 811 OE1 GLN A 89 18.651 58.462 -46.995 1.00 92.08 A O
812
+ ATOM 812 H GLN A 89 16.710 59.458 -47.651 1.00 0.00 A H
813
+ ATOM 813 HE21 GLN A 89 20.213 56.483 -48.823 1.00 0.00 A H
814
+ ATOM 814 HE22 GLN A 89 20.726 58.016 -48.216 1.00 0.00 A H
815
+ ATOM 815 N GLY A 90 15.184 59.355 -50.678 1.00 74.54 A N
816
+ ATOM 816 CA GLY A 90 14.160 59.650 -51.678 1.00 70.86 A C
817
+ ATOM 817 C GLY A 90 12.725 59.353 -51.272 1.00 68.55 A C
818
+ ATOM 818 O GLY A 90 11.787 59.539 -52.058 1.00 67.92 A O
819
+ ATOM 819 H GLY A 90 15.715 60.101 -50.336 1.00 0.00 A H
820
+ ATOM 820 N ARG A 91 12.564 58.894 -50.033 1.00 67.07 A N
821
+ ATOM 821 CA ARG A 91 11.269 58.543 -49.470 1.00 64.55 A C
822
+ ATOM 822 C ARG A 91 10.844 59.601 -48.444 1.00 56.77 A C
823
+ ATOM 823 O ARG A 91 11.498 59.765 -47.412 1.00 53.37 A O
824
+ ATOM 824 CB ARG A 91 11.381 57.157 -48.821 1.00 70.87 A C
825
+ ATOM 825 CG ARG A 91 10.072 56.536 -48.353 1.00 82.61 A C
826
+ ATOM 826 CD ARG A 91 9.843 56.764 -46.866 1.00 91.33 A C
827
+ ATOM 827 NE ARG A 91 10.904 56.179 -46.047 1.00 99.54 A N
828
+ ATOM 828 CZ ARG A 91 11.706 56.874 -45.244 1.00104.71 A C
829
+ ATOM 829 NH1 ARG A 91 11.584 58.194 -45.143 1.00106.46 A N1+
830
+ ATOM 830 NH2 ARG A 91 12.619 56.239 -44.520 1.00108.62 A N
831
+ ATOM 831 H ARG A 91 13.335 58.777 -49.455 1.00 0.00 A H
832
+ ATOM 832 HE ARG A 91 11.009 55.209 -46.140 1.00 0.00 A H
833
+ ATOM 833 HH11 ARG A 91 10.861 58.681 -45.653 1.00 0.00 A H
834
+ ATOM 834 HH12 ARG A 91 12.138 58.762 -44.541 1.00 0.00 A H
835
+ ATOM 835 HH21 ARG A 91 12.708 55.244 -44.590 1.00 0.00 A H
836
+ ATOM 836 HH22 ARG A 91 13.232 56.706 -43.887 1.00 0.00 A H
837
+ ATOM 837 N LYS A 92 9.773 60.332 -48.745 1.00 49.39 A N
838
+ ATOM 838 CA LYS A 92 9.280 61.362 -47.838 1.00 44.20 A C
839
+ ATOM 839 C LYS A 92 8.488 60.782 -46.665 1.00 37.95 A C
840
+ ATOM 840 O LYS A 92 7.862 59.730 -46.768 1.00 36.08 A O
841
+ ATOM 841 CB LYS A 92 8.438 62.404 -48.580 1.00 47.71 A C
842
+ ATOM 842 CG LYS A 92 7.989 63.562 -47.681 1.00 54.34 A C
843
+ ATOM 843 CD LYS A 92 7.121 64.568 -48.414 1.00 61.58 A C
844
+ ATOM 844 CE LYS A 92 6.096 65.191 -47.477 1.00 66.71 A C
845
+ ATOM 845 NZ LYS A 92 5.102 64.179 -46.991 1.00 72.99 A N1+
846
+ ATOM 846 H LYS A 92 9.328 60.189 -49.603 1.00 0.00 A H
847
+ ATOM 847 HZ1 LYS A 92 4.604 63.670 -47.752 1.00 0.00 A H
848
+ ATOM 848 HZ2 LYS A 92 5.431 63.405 -46.381 1.00 0.00 A H
849
+ ATOM 849 HZ3 LYS A 92 4.294 64.558 -46.437 1.00 0.00 A H
850
+ ATOM 850 N THR A 93 8.506 61.483 -45.548 1.00 31.62 A N
851
+ ATOM 851 CA THR A 93 7.819 61.026 -44.368 1.00 26.75 A C
852
+ ATOM 852 C THR A 93 7.462 62.279 -43.596 1.00 24.78 A C
853
+ ATOM 853 O THR A 93 8.219 63.251 -43.612 1.00 23.01 A O
854
+ ATOM 854 CB THR A 93 8.769 60.117 -43.588 1.00 30.50 A C
855
+ ATOM 855 CG2 THR A 93 8.131 59.574 -42.325 1.00 28.73 A C
856
+ ATOM 856 OG1 THR A 93 9.162 59.034 -44.440 1.00 27.75 A O
857
+ ATOM 857 H THR A 93 9.028 62.313 -45.474 1.00 0.00 A H
858
+ ATOM 858 HG1 THR A 93 8.374 58.813 -44.959 1.00 0.00 A H
859
+ ATOM 859 N VAL A 94 6.275 62.288 -43.000 1.00 21.14 A N
860
+ ATOM 860 CA VAL A 94 5.775 63.426 -42.232 1.00 21.95 A C
861
+ ATOM 861 C VAL A 94 5.327 62.965 -40.853 1.00 23.50 A C
862
+ ATOM 862 O VAL A 94 4.408 62.148 -40.745 1.00 25.33 A O
863
+ ATOM 863 CB VAL A 94 4.537 64.069 -42.904 1.00 19.68 A C
864
+ ATOM 864 CG1 VAL A 94 3.883 65.037 -41.942 1.00 16.72 A C
865
+ ATOM 865 CG2 VAL A 94 4.927 64.795 -44.163 1.00 15.77 A C
866
+ ATOM 866 H VAL A 94 5.699 61.526 -43.010 1.00 0.00 A H
867
+ ATOM 867 N VAL A 95 5.963 63.478 -39.801 1.00 20.80 A N
868
+ ATOM 868 CA VAL A 95 5.589 63.108 -38.432 1.00 14.02 A C
869
+ ATOM 869 C VAL A 95 4.834 64.298 -37.872 1.00 14.94 A C
870
+ ATOM 870 O VAL A 95 5.287 65.442 -38.003 1.00 16.50 A O
871
+ ATOM 871 CB VAL A 95 6.818 62.832 -37.558 1.00 12.89 A C
872
+ ATOM 872 CG1 VAL A 95 6.403 62.428 -36.191 1.00 13.40 A C
873
+ ATOM 873 CG2 VAL A 95 7.661 61.729 -38.181 1.00 18.43 A C
874
+ ATOM 874 H VAL A 95 6.682 64.134 -39.938 1.00 0.00 A H
875
+ ATOM 875 N ALA A 96 3.673 64.034 -37.281 1.00 13.39 A N
876
+ ATOM 876 CA ALA A 96 2.839 65.089 -36.704 1.00 13.84 A C
877
+ ATOM 877 C ALA A 96 2.736 64.955 -35.178 1.00 13.73 A C
878
+ ATOM 878 O ALA A 96 1.908 64.210 -34.647 1.00 14.82 A O
879
+ ATOM 879 CB ALA A 96 1.446 65.064 -37.339 1.00 10.54 A C
880
+ ATOM 880 H ALA A 96 3.360 63.116 -37.200 1.00 0.00 A H
881
+ ATOM 881 N PRO A 97 3.606 65.653 -34.449 1.00 13.05 A N
882
+ ATOM 882 CA PRO A 97 3.549 65.558 -32.998 1.00 10.55 A C
883
+ ATOM 883 C PRO A 97 2.922 66.810 -32.410 1.00 13.35 A C
884
+ ATOM 884 O PRO A 97 2.604 67.760 -33.140 1.00 11.58 A O
885
+ ATOM 885 CB PRO A 97 5.033 65.520 -32.609 1.00 6.00 A C
886
+ ATOM 886 CG PRO A 97 5.820 65.918 -33.910 1.00 7.23 A C
887
+ ATOM 887 CD PRO A 97 4.788 66.416 -34.870 1.00 10.87 A C
888
+ ATOM 888 N ILE A 98 2.685 66.767 -31.107 1.00 11.92 A N
889
+ ATOM 889 CA ILE A 98 2.211 67.929 -30.374 1.00 12.48 A C
890
+ ATOM 890 C ILE A 98 3.323 68.053 -29.320 1.00 10.15 A C
891
+ ATOM 891 O ILE A 98 3.655 67.071 -28.655 1.00 11.57 A O
892
+ ATOM 892 CB ILE A 98 0.796 67.718 -29.684 1.00 15.20 A C
893
+ ATOM 893 CG1 ILE A 98 -0.334 67.826 -30.724 1.00 8.52 A C
894
+ ATOM 894 CG2 ILE A 98 0.540 68.823 -28.631 1.00 9.28 A C
895
+ ATOM 895 CD1 ILE A 98 -1.721 67.689 -30.134 1.00 6.04 A C
896
+ ATOM 896 H ILE A 98 2.859 65.943 -30.595 1.00 0.00 A H
897
+ ATOM 897 N ASP A 99 3.986 69.198 -29.264 1.00 6.01 A N
898
+ ATOM 898 CA ASP A 99 5.028 69.386 -28.276 1.00 7.15 A C
899
+ ATOM 899 C ASP A 99 4.491 70.205 -27.127 1.00 10.60 A C
900
+ ATOM 900 O ASP A 99 3.639 71.081 -27.326 1.00 10.01 A O
901
+ ATOM 901 CB ASP A 99 6.205 70.150 -28.841 1.00 10.22 A C
902
+ ATOM 902 CG ASP A 99 7.175 69.277 -29.617 1.00 6.01 A C
903
+ ATOM 903 OD1 ASP A 99 6.924 68.080 -29.890 1.00 11.95 A O
904
+ ATOM 904 OD2 ASP A 99 8.219 69.846 -29.961 1.00 8.57 A O1-
905
+ ATOM 905 H ASP A 99 3.765 69.922 -29.882 1.00 0.00 A H
906
+ ATOM 906 HD2 ASP A 99 8.781 69.224 -30.451 1.00 0.00 A H
907
+ ATOM 907 N HIS A 100 5.043 69.955 -25.940 1.00 11.43 A N
908
+ ATOM 908 CA HIS A 100 4.678 70.666 -24.725 1.00 8.74 A C
909
+ ATOM 909 C HIS A 100 5.947 71.261 -24.104 1.00 12.40 A C
910
+ ATOM 910 O HIS A 100 6.892 70.547 -23.794 1.00 10.22 A O
911
+ ATOM 911 CB HIS A 100 3.998 69.713 -23.733 1.00 12.71 A C
912
+ ATOM 912 CG HIS A 100 3.659 70.350 -22.414 1.00 20.40 A C
913
+ ATOM 913 CD2 HIS A 100 3.150 71.568 -22.119 1.00 13.89 A C
914
+ ATOM 914 ND1 HIS A 100 3.837 69.711 -21.205 1.00 18.95 A N
915
+ ATOM 915 CE1 HIS A 100 3.447 70.507 -20.226 1.00 22.32 A C
916
+ ATOM 916 NE2 HIS A 100 3.028 71.641 -20.755 1.00 20.66 A N
917
+ ATOM 917 H HIS A 100 5.734 69.262 -25.919 1.00 0.00 A H
918
+ ATOM 918 HD1 HIS A 100 4.288 68.856 -21.040 1.00 0.00 A H
919
+ ATOM 919 HE2 HIS A 100 2.731 72.447 -20.266 1.00 0.00 A H
920
+ ATOM 920 N PHE A 101 5.970 72.579 -23.994 1.00 10.81 A N
921
+ ATOM 921 CA PHE A 101 7.086 73.315 -23.422 1.00 10.19 A C
922
+ ATOM 922 C PHE A 101 6.664 73.821 -22.050 1.00 12.64 A C
923
+ ATOM 923 O PHE A 101 5.489 74.119 -21.824 1.00 11.76 A O
924
+ ATOM 924 CB PHE A 101 7.395 74.560 -24.265 1.00 11.58 A C
925
+ ATOM 925 CG PHE A 101 8.232 74.303 -25.503 1.00 9.82 A C
926
+ ATOM 926 CD1 PHE A 101 8.433 73.019 -25.996 1.00 15.10 A C
927
+ ATOM 927 CD2 PHE A 101 8.847 75.374 -26.159 1.00 13.96 A C
928
+ ATOM 928 CE1 PHE A 101 9.241 72.813 -27.122 1.00 18.99 A C
929
+ ATOM 929 CE2 PHE A 101 9.655 75.174 -27.285 1.00 13.55 A C
930
+ ATOM 930 CZ PHE A 101 9.853 73.895 -27.763 1.00 8.83 A C
931
+ ATOM 931 H PHE A 101 5.212 73.095 -24.316 1.00 0.00 A H
932
+ ATOM 932 N ARG A 102 7.628 73.976 -21.154 1.00 13.07 A N
933
+ ATOM 933 CA ARG A 102 7.343 74.491 -19.835 1.00 13.20 A C
934
+ ATOM 934 C ARG A 102 8.472 75.491 -19.618 1.00 11.79 A C
935
+ ATOM 935 O ARG A 102 9.641 75.171 -19.825 1.00 13.69 A O
936
+ ATOM 936 CB ARG A 102 7.353 73.355 -18.820 1.00 20.65 A C
937
+ ATOM 937 CG ARG A 102 6.599 73.668 -17.560 1.00 31.29 A C
938
+ ATOM 938 CD ARG A 102 6.617 72.499 -16.595 1.00 43.38 A C
939
+ ATOM 939 NE ARG A 102 5.788 71.387 -17.057 1.00 46.48 A N
940
+ ATOM 940 CZ ARG A 102 4.617 71.049 -16.514 1.00 50.74 A C
941
+ ATOM 941 NH1 ARG A 102 4.133 71.744 -15.484 1.00 50.96 A N1+
942
+ ATOM 942 NH2 ARG A 102 3.940 70.000 -16.978 1.00 53.28 A N
943
+ ATOM 943 H ARG A 102 8.549 73.733 -21.383 1.00 0.00 A H
944
+ ATOM 944 HE ARG A 102 6.158 70.882 -17.819 1.00 0.00 A H
945
+ ATOM 945 HH11 ARG A 102 4.637 72.549 -15.155 1.00 0.00 A H
946
+ ATOM 946 HH12 ARG A 102 3.251 71.575 -15.046 1.00 0.00 A H
947
+ ATOM 947 HH21 ARG A 102 4.295 69.403 -17.709 1.00 0.00 A H
948
+ ATOM 948 HH22 ARG A 102 3.074 69.714 -16.579 1.00 0.00 A H
949
+ ATOM 949 N PHE A 103 8.116 76.726 -19.302 1.00 9.98 A N
950
+ ATOM 950 CA PHE A 103 9.104 77.772 -19.129 1.00 15.39 A C
951
+ ATOM 951 C PHE A 103 9.374 78.094 -17.667 1.00 20.19 A C
952
+ ATOM 952 O PHE A 103 8.618 77.699 -16.771 1.00 20.52 A O
953
+ ATOM 953 CB PHE A 103 8.662 79.056 -19.847 1.00 12.56 A C
954
+ ATOM 954 CG PHE A 103 8.441 78.896 -21.334 1.00 11.89 A C
955
+ ATOM 955 CD1 PHE A 103 7.328 78.209 -21.827 1.00 16.99 A C
956
+ ATOM 956 CD2 PHE A 103 9.297 79.485 -22.239 1.00 14.06 A C
957
+ ATOM 957 CE1 PHE A 103 7.068 78.118 -23.204 1.00 12.90 A C
958
+ ATOM 958 CE2 PHE A 103 9.051 79.401 -23.605 1.00 16.35 A C
959
+ ATOM 959 CZ PHE A 103 7.923 78.711 -24.086 1.00 16.18 A C
960
+ ATOM 960 H PHE A 103 7.174 76.928 -19.137 1.00 0.00 A H
961
+ ATOM 961 N ASN A 104 10.470 78.806 -17.432 1.00 18.03 A N
962
+ ATOM 962 CA ASN A 104 10.837 79.196 -16.083 1.00 18.86 A C
963
+ ATOM 963 C ASN A 104 10.821 80.696 -15.968 1.00 21.20 A C
964
+ ATOM 964 O ASN A 104 10.576 81.383 -16.966 1.00 26.75 A O
965
+ ATOM 965 CB ASN A 104 12.205 78.639 -15.693 1.00 18.97 A C
966
+ ATOM 966 CG ASN A 104 13.339 79.145 -16.565 1.00 15.78 A C
967
+ ATOM 967 ND2 ASN A 104 13.099 80.160 -17.373 1.00 21.10 A N
968
+ ATOM 968 OD1 ASN A 104 14.432 78.598 -16.511 1.00 23.62 A O
969
+ ATOM 969 H ASN A 104 11.032 79.032 -18.204 1.00 0.00 A H
970
+ ATOM 970 HD21 ASN A 104 12.267 80.624 -17.550 1.00 0.00 A H
971
+ ATOM 971 HD22 ASN A 104 13.944 80.392 -17.810 1.00 0.00 A H
972
+ ATOM 972 N GLY A 105 11.125 81.204 -14.775 1.00 25.04 A N
973
+ ATOM 973 CA GLY A 105 11.128 82.641 -14.549 1.00 26.04 A C
974
+ ATOM 974 C GLY A 105 11.921 83.455 -15.559 1.00 26.75 A C
975
+ ATOM 975 O GLY A 105 11.496 84.537 -15.977 1.00 32.50 A O
976
+ ATOM 976 H GLY A 105 11.324 80.606 -14.024 1.00 0.00 A H
977
+ ATOM 977 N ALA A 106 13.060 82.922 -15.982 1.00 26.82 A N
978
+ ATOM 978 CA ALA A 106 13.918 83.599 -16.947 1.00 25.44 A C
979
+ ATOM 979 C ALA A 106 13.375 83.589 -18.381 1.00 26.75 A C
980
+ ATOM 980 O ALA A 106 13.932 84.242 -19.274 1.00 31.90 A O
981
+ ATOM 981 CB ALA A 106 15.297 82.985 -16.912 1.00 27.30 A C
982
+ ATOM 982 H ALA A 106 13.345 82.059 -15.621 1.00 0.00 A H
983
+ ATOM 983 N GLY A 107 12.278 82.876 -18.600 1.00 21.30 A N
984
+ ATOM 984 CA GLY A 107 11.710 82.811 -19.928 1.00 19.15 A C
985
+ ATOM 985 C GLY A 107 12.295 81.715 -20.793 1.00 16.57 A C
986
+ ATOM 986 O GLY A 107 11.977 81.637 -21.979 1.00 19.15 A O
987
+ ATOM 987 H GLY A 107 11.817 82.417 -17.881 1.00 0.00 A H
988
+ ATOM 988 N LYS A 108 13.149 80.880 -20.221 1.00 15.74 A N
989
+ ATOM 989 CA LYS A 108 13.755 79.785 -20.953 1.00 15.79 A C
990
+ ATOM 990 C LYS A 108 12.959 78.509 -20.722 1.00 16.00 A C
991
+ ATOM 991 O LYS A 108 12.232 78.380 -19.728 1.00 19.98 A O
992
+ ATOM 992 CB LYS A 108 15.221 79.576 -20.536 1.00 21.57 A C
993
+ ATOM 993 CG LYS A 108 16.163 80.753 -20.884 1.00 30.19 A C
994
+ ATOM 994 CD LYS A 108 17.600 80.313 -21.253 1.00 41.27 A C
995
+ ATOM 995 CE LYS A 108 18.449 79.827 -20.044 1.00 48.96 A C
996
+ ATOM 996 NZ LYS A 108 19.775 79.201 -20.448 1.00 62.20 A N1+
997
+ ATOM 997 H LYS A 108 13.372 80.997 -19.288 1.00 0.00 A H
998
+ ATOM 998 HZ1 LYS A 108 19.585 78.387 -21.083 1.00 0.00 A H
999
+ ATOM 999 HZ2 LYS A 108 20.373 79.857 -20.978 1.00 0.00 A H
1000
+ ATOM 1000 HZ3 LYS A 108 20.367 78.823 -19.657 1.00 0.00 A H
1001
+ ATOM 1001 N VAL A 109 13.094 77.581 -21.661 1.00 12.86 A N
1002
+ ATOM 1002 CA VAL A 109 12.422 76.295 -21.622 1.00 10.69 A C
1003
+ ATOM 1003 C VAL A 109 13.176 75.384 -20.647 1.00 16.12 A C
1004
+ ATOM 1004 O VAL A 109 14.391 75.199 -20.754 1.00 18.17 A O
1005
+ ATOM 1005 CB VAL A 109 12.387 75.663 -23.067 1.00 6.00 A C
1006
+ ATOM 1006 CG1 VAL A 109 11.695 74.319 -23.068 1.00 6.01 A C
1007
+ ATOM 1007 CG2 VAL A 109 11.663 76.584 -24.017 1.00 7.37 A C
1008
+ ATOM 1008 H VAL A 109 13.697 77.765 -22.416 1.00 0.00 A H
1009
+ ATOM 1009 N VAL A 110 12.474 74.846 -19.661 1.00 14.02 A N
1010
+ ATOM 1010 CA VAL A 110 13.125 73.959 -18.716 1.00 11.14 A C
1011
+ ATOM 1011 C VAL A 110 12.697 72.532 -18.976 1.00 15.43 A C
1012
+ ATOM 1012 O VAL A 110 13.341 71.588 -18.504 1.00 19.88 A O
1013
+ ATOM 1013 CB VAL A 110 12.800 74.338 -17.246 1.00 14.86 A C
1014
+ ATOM 1014 CG1 VAL A 110 13.421 75.672 -16.892 1.00 17.88 A C
1015
+ ATOM 1015 CG2 VAL A 110 11.296 74.409 -17.032 1.00 18.60 A C
1016
+ ATOM 1016 H VAL A 110 11.530 75.078 -19.560 1.00 0.00 A H
1017
+ ATOM 1017 N SER A 111 11.650 72.372 -19.785 1.00 21.33 A N
1018
+ ATOM 1018 CA SER A 111 11.111 71.050 -20.101 1.00 18.49 A C
1019
+ ATOM 1019 C SER A 111 10.358 70.982 -21.431 1.00 19.05 A C
1020
+ ATOM 1020 O SER A 111 9.522 71.837 -21.713 1.00 17.55 A O
1021
+ ATOM 1021 CB SER A 111 10.160 70.630 -18.972 1.00 15.96 A C
1022
+ ATOM 1022 OG SER A 111 9.363 69.523 -19.337 1.00 23.59 A O
1023
+ ATOM 1023 H SER A 111 11.210 73.148 -20.188 1.00 0.00 A H
1024
+ ATOM 1024 HG SER A 111 9.937 68.749 -19.487 1.00 0.00 A H
1025
+ ATOM 1025 N MET A 112 10.665 69.991 -22.264 1.00 17.04 A N
1026
+ ATOM 1026 CA MET A 112 9.929 69.842 -23.510 1.00 12.13 A C
1027
+ ATOM 1027 C MET A 112 9.613 68.375 -23.688 1.00 9.89 A C
1028
+ ATOM 1028 O MET A 112 10.415 67.518 -23.360 1.00 9.32 A O
1029
+ ATOM 1029 CB MET A 112 10.668 70.447 -24.722 1.00 16.06 A C
1030
+ ATOM 1030 CG MET A 112 11.581 69.539 -25.551 1.00 16.45 A C
1031
+ ATOM 1031 SD MET A 112 10.783 68.225 -26.550 1.00 16.21 A S
1032
+ ATOM 1032 CE MET A 112 11.040 68.840 -28.146 1.00 26.96 A C
1033
+ ATOM 1033 H MET A 112 11.372 69.342 -22.047 1.00 0.00 A H
1034
+ ATOM 1034 N ARG A 113 8.396 68.084 -24.121 1.00 8.79 A N
1035
+ ATOM 1035 CA ARG A 113 7.982 66.701 -24.340 1.00 7.97 A C
1036
+ ATOM 1036 C ARG A 113 7.285 66.645 -25.684 1.00 6.41 A C
1037
+ ATOM 1037 O ARG A 113 6.378 67.434 -25.935 1.00 13.99 A O
1038
+ ATOM 1038 CB ARG A 113 7.030 66.251 -23.233 1.00 9.02 A C
1039
+ ATOM 1039 CG ARG A 113 7.625 66.370 -21.867 1.00 15.14 A C
1040
+ ATOM 1040 CD ARG A 113 6.696 65.766 -20.873 1.00 16.20 A C
1041
+ ATOM 1041 NE ARG A 113 6.778 64.314 -20.880 1.00 14.51 A N
1042
+ ATOM 1042 CZ ARG A 113 5.727 63.509 -20.833 1.00 10.39 A C
1043
+ ATOM 1043 NH1 ARG A 113 4.507 64.006 -20.784 1.00 13.94 A N1+
1044
+ ATOM 1044 NH2 ARG A 113 5.907 62.204 -20.788 1.00 13.69 A N
1045
+ ATOM 1045 H ARG A 113 7.768 68.807 -24.323 1.00 0.00 A H
1046
+ ATOM 1046 HE ARG A 113 7.691 63.913 -20.902 1.00 0.00 A H
1047
+ ATOM 1047 HH11 ARG A 113 4.320 64.981 -20.767 1.00 0.00 A H
1048
+ ATOM 1048 HH12 ARG A 113 3.746 63.347 -20.805 1.00 0.00 A H
1049
+ ATOM 1049 HH21 ARG A 113 6.856 61.827 -20.804 1.00 0.00 A H
1050
+ ATOM 1050 HH22 ARG A 113 5.170 61.521 -20.786 1.00 0.00 A H
1051
+ ATOM 1051 N ALA A 114 7.729 65.724 -26.538 1.00 12.66 A N
1052
+ ATOM 1052 CA ALA A 114 7.198 65.566 -27.880 1.00 6.59 A C
1053
+ ATOM 1053 C ALA A 114 6.288 64.349 -27.911 1.00 6.02 A C
1054
+ ATOM 1054 O ALA A 114 6.739 63.219 -27.744 1.00 11.50 A O
1055
+ ATOM 1055 CB ALA A 114 8.349 65.421 -28.876 1.00 7.36 A C
1056
+ ATOM 1056 H ALA A 114 8.438 65.113 -26.244 1.00 0.00 A H
1057
+ ATOM 1057 N LEU A 115 5.000 64.590 -28.126 1.00 7.62 A N
1058
+ ATOM 1058 CA LEU A 115 3.997 63.535 -28.149 1.00 10.84 A C
1059
+ ATOM 1059 C LEU A 115 3.587 63.101 -29.541 1.00 13.62 A C
1060
+ ATOM 1060 O LEU A 115 3.045 63.908 -30.288 1.00 17.93 A O
1061
+ ATOM 1061 CB LEU A 115 2.762 64.036 -27.412 1.00 11.12 A C
1062
+ ATOM 1062 CG LEU A 115 2.448 63.462 -26.042 1.00 13.33 A C
1063
+ ATOM 1063 CD1 LEU A 115 1.773 64.522 -25.185 1.00 21.56 A C
1064
+ ATOM 1064 CD2 LEU A 115 1.549 62.271 -26.233 1.00 19.09 A C
1065
+ ATOM 1065 H LEU A 115 4.719 65.518 -28.285 1.00 0.00 A H
1066
+ ATOM 1066 N PHE A 116 3.887 61.854 -29.901 1.00 17.39 A N
1067
+ ATOM 1067 CA PHE A 116 3.512 61.279 -31.205 1.00 15.62 A C
1068
+ ATOM 1068 C PHE A 116 3.773 59.782 -31.214 1.00 14.98 A C
1069
+ ATOM 1069 O PHE A 116 4.750 59.330 -30.638 1.00 21.35 A O
1070
+ ATOM 1070 CB PHE A 116 4.255 61.931 -32.388 1.00 12.89 A C
1071
+ ATOM 1071 CG PHE A 116 5.742 61.674 -32.402 1.00 15.41 A C
1072
+ ATOM 1072 CD1 PHE A 116 6.260 60.540 -33.006 1.00 18.05 A C
1073
+ ATOM 1073 CD2 PHE A 116 6.627 62.574 -31.794 1.00 16.24 A C
1074
+ ATOM 1074 CE1 PHE A 116 7.644 60.294 -33.007 1.00 19.50 A C
1075
+ ATOM 1075 CE2 PHE A 116 8.000 62.335 -31.790 1.00 17.91 A C
1076
+ ATOM 1076 CZ PHE A 116 8.509 61.189 -32.400 1.00 17.62 A C
1077
+ ATOM 1077 H PHE A 116 4.395 61.271 -29.298 1.00 0.00 A H
1078
+ ATOM 1078 N GLY A 117 2.874 59.021 -31.831 1.00 17.65 A N
1079
+ ATOM 1079 CA GLY A 117 3.034 57.579 -31.914 1.00 20.71 A C
1080
+ ATOM 1080 C GLY A 117 3.137 57.144 -33.370 1.00 23.09 A C
1081
+ ATOM 1081 O GLY A 117 3.117 57.988 -34.263 1.00 26.53 A O
1082
+ ATOM 1082 H GLY A 117 2.104 59.433 -32.250 1.00 0.00 A H
1083
+ ATOM 1083 N GLU A 118 3.195 55.833 -33.617 1.00 29.70 A N
1084
+ ATOM 1084 CA GLU A 118 3.312 55.274 -34.973 1.00 34.79 A C
1085
+ ATOM 1085 C GLU A 118 2.274 55.834 -35.940 1.00 33.08 A C
1086
+ ATOM 1086 O GLU A 118 2.573 56.087 -37.101 1.00 32.88 A O
1087
+ ATOM 1087 CB GLU A 118 3.207 53.745 -34.939 1.00 46.74 A C
1088
+ ATOM 1088 CG GLU A 118 1.967 53.236 -34.174 1.00 64.19 A C
1089
+ ATOM 1089 CD GLU A 118 1.551 51.794 -34.510 1.00 73.18 A C
1090
+ ATOM 1090 OE1 GLU A 118 2.285 51.093 -35.248 1.00 75.99 A O
1091
+ ATOM 1091 OE2 GLU A 118 0.460 51.381 -34.038 1.00 78.00 A O1-
1092
+ ATOM 1092 H GLU A 118 3.159 55.215 -32.856 1.00 0.00 A H
1093
+ ATOM 1093 N LYS A 119 1.059 56.056 -35.451 1.00 31.94 A N
1094
+ ATOM 1094 CA LYS A 119 -0.012 56.595 -36.285 1.00 28.74 A C
1095
+ ATOM 1095 C LYS A 119 0.134 58.092 -36.630 1.00 21.50 A C
1096
+ ATOM 1096 O LYS A 119 -0.639 58.627 -37.418 1.00 22.81 A O
1097
+ ATOM 1097 CB LYS A 119 -1.382 56.269 -35.658 1.00 36.51 A C
1098
+ ATOM 1098 CG LYS A 119 -1.695 54.759 -35.695 1.00 47.40 A C
1099
+ ATOM 1099 CD LYS A 119 -3.014 54.351 -35.029 1.00 55.43 A C
1100
+ ATOM 1100 CE LYS A 119 -3.265 52.816 -35.130 1.00 60.03 A C
1101
+ ATOM 1101 NZ LYS A 119 -2.308 51.931 -34.351 1.00 67.90 A N1+
1102
+ ATOM 1102 H LYS A 119 0.889 55.839 -34.514 1.00 0.00 A H
1103
+ ATOM 1103 HZ1 LYS A 119 -1.283 51.987 -34.576 1.00 0.00 A H
1104
+ ATOM 1104 HZ2 LYS A 119 -2.367 52.066 -33.316 1.00 0.00 A H
1105
+ ATOM 1105 HZ3 LYS A 119 -2.545 50.920 -34.392 1.00 0.00 A H
1106
+ ATOM 1106 N ASN A 120 1.146 58.748 -36.069 1.00 18.19 A N
1107
+ ATOM 1107 CA ASN A 120 1.410 60.162 -36.331 1.00 14.95 A C
1108
+ ATOM 1108 C ASN A 120 2.621 60.253 -37.240 1.00 14.05 A C
1109
+ ATOM 1109 O ASN A 120 3.229 61.300 -37.399 1.00 16.23 A O
1110
+ ATOM 1110 CB ASN A 120 1.649 60.927 -35.022 1.00 16.83 A C
1111
+ ATOM 1111 CG ASN A 120 0.500 60.768 -34.058 1.00 11.86 A C
1112
+ ATOM 1112 ND2 ASN A 120 -0.704 61.061 -34.534 1.00 13.11 A N
1113
+ ATOM 1113 OD1 ASN A 120 0.670 60.299 -32.930 1.00 15.82 A O
1114
+ ATOM 1114 H ASN A 120 1.752 58.275 -35.475 1.00 0.00 A H
1115
+ ATOM 1115 HD21 ASN A 120 -0.770 61.365 -35.461 1.00 0.00 A H
1116
+ ATOM 1116 HD22 ASN A 120 -1.492 60.971 -33.959 1.00 0.00 A H
1117
+ ATOM 1117 N ILE A 121 2.983 59.130 -37.829 1.00 15.46 A N
1118
+ ATOM 1118 CA ILE A 121 4.110 59.084 -38.731 1.00 19.96 A C
1119
+ ATOM 1119 C ILE A 121 3.507 58.777 -40.088 1.00 24.14 A C
1120
+ ATOM 1120 O ILE A 121 2.828 57.778 -40.251 1.00 23.40 A O
1121
+ ATOM 1121 CB ILE A 121 5.075 57.989 -38.307 1.00 17.39 A C
1122
+ ATOM 1122 CG1 ILE A 121 5.643 58.351 -36.940 1.00 12.27 A C
1123
+ ATOM 1123 CG2 ILE A 121 6.173 57.820 -39.330 1.00 17.35 A C
1124
+ ATOM 1124 CD1 ILE A 121 6.604 57.357 -36.398 1.00 23.38 A C
1125
+ ATOM 1125 H ILE A 121 2.500 58.289 -37.685 1.00 0.00 A H
1126
+ ATOM 1126 N HIS A 122 3.705 59.662 -41.048 1.00 23.88 A N
1127
+ ATOM 1127 CA HIS A 122 3.128 59.445 -42.357 1.00 36.19 A C
1128
+ ATOM 1128 C HIS A 122 4.148 59.369 -43.482 1.00 46.30 A C
1129
+ ATOM 1129 O HIS A 122 4.765 60.358 -43.840 1.00 47.68 A O
1130
+ ATOM 1130 CB HIS A 122 2.085 60.519 -42.630 1.00 29.55 A C
1131
+ ATOM 1131 CG HIS A 122 1.052 60.620 -41.556 1.00 29.07 A C
1132
+ ATOM 1132 CD2 HIS A 122 0.190 59.698 -41.060 1.00 25.53 A C
1133
+ ATOM 1133 ND1 HIS A 122 0.885 61.751 -40.788 1.00 26.56 A N
1134
+ ATOM 1134 CE1 HIS A 122 -0.029 61.522 -39.862 1.00 25.43 A C
1135
+ ATOM 1135 NE2 HIS A 122 -0.464 60.282 -40.005 1.00 23.16 A N
1136
+ ATOM 1136 H HIS A 122 4.299 60.404 -40.884 1.00 0.00 A H
1137
+ ATOM 1137 HD1 HIS A 122 1.376 62.596 -40.895 1.00 0.00 A H
1138
+ ATOM 1138 HE2 HIS A 122 -1.100 59.859 -39.377 1.00 0.00 A H
1139
+ ATOM 1139 N ALA A 123 4.395 58.165 -43.976 1.00 59.66 A N
1140
+ ATOM 1140 CA ALA A 123 5.331 58.000 -45.074 1.00 73.75 A C
1141
+ ATOM 1141 C ALA A 123 4.609 58.586 -46.276 1.00 82.21 A C
1142
+ ATOM 1142 O ALA A 123 3.377 58.581 -46.320 1.00 85.95 A O
1143
+ ATOM 1143 CB ALA A 123 5.643 56.528 -45.297 1.00 73.39 A C
1144
+ ATOM 1144 H ALA A 123 3.918 57.391 -43.620 1.00 0.00 A H
1145
+ ATOM 1145 N GLY A 124 5.368 59.086 -47.242 1.00 90.51 A N
1146
+ ATOM 1146 CA GLY A 124 4.773 59.683 -48.421 1.00101.54 A C
1147
+ ATOM 1147 C GLY A 124 4.023 60.951 -48.070 1.00108.75 A C
1148
+ ATOM 1148 O GLY A 124 4.498 62.055 -48.351 1.00109.98 A O
1149
+ ATOM 1149 H GLY A 124 6.323 59.002 -47.173 1.00 0.00 A H
1150
+ ATOM 1150 N ALA A 125 2.855 60.788 -47.454 1.00114.55 A N
1151
+ ATOM 1151 CA ALA A 125 2.012 61.902 -47.043 1.00119.60 A C
1152
+ ATOM 1152 C ALA A 125 2.779 62.840 -46.123 1.00121.61 A C
1153
+ ATOM 1153 O ALA A 125 2.439 64.040 -46.086 1.00122.70 A O
1154
+ ATOM 1154 CB ALA A 125 0.768 61.380 -46.343 1.00121.44 A C
1155
+ ATOM 1155 OXT ALA A 125 3.759 62.383 -45.497 1.00123.42 A O1-
1156
+ ATOM 1156 H ALA A 125 2.534 59.892 -47.260 1.00 0.00 A H
af_backprop/examples/sc_hall/1QJS_starting.pdb ADDED
@@ -0,0 +1,880 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MODEL 1
2
+ ATOM 1 N HIS A 1 -11.161 5.339 22.224 1.00 0.00 N
3
+ ATOM 2 CA HIS A 1 -9.750 5.488 21.883 1.00 0.00 C
4
+ ATOM 3 C HIS A 1 -9.362 4.571 20.728 1.00 0.00 C
5
+ ATOM 4 CB HIS A 1 -8.871 5.198 23.102 1.00 0.00 C
6
+ ATOM 5 O HIS A 1 -9.646 3.372 20.760 1.00 0.00 O
7
+ ATOM 6 CG HIS A 1 -9.124 6.115 24.256 1.00 0.00 C
8
+ ATOM 7 CD2 HIS A 1 -9.874 5.954 25.372 1.00 0.00 C
9
+ ATOM 8 ND1 HIS A 1 -8.571 7.375 24.340 1.00 0.00 N
10
+ ATOM 9 CE1 HIS A 1 -8.971 7.950 25.462 1.00 0.00 C
11
+ ATOM 10 NE2 HIS A 1 -9.762 7.109 26.106 1.00 0.00 N
12
+ ATOM 11 N CYS A 2 -8.947 5.187 19.589 1.00 0.00 N
13
+ ATOM 12 CA CYS A 2 -8.553 4.393 18.430 1.00 0.00 C
14
+ ATOM 13 C CYS A 2 -7.058 4.097 18.455 1.00 0.00 C
15
+ ATOM 14 CB CYS A 2 -8.916 5.119 17.135 1.00 0.00 C
16
+ ATOM 15 O CYS A 2 -6.262 4.934 18.882 1.00 0.00 O
17
+ ATOM 16 SG CYS A 2 -10.678 5.483 16.975 1.00 0.00 S
18
+ ATOM 17 N TYR A 3 -6.659 2.830 18.088 1.00 0.00 N
19
+ ATOM 18 CA TYR A 3 -5.257 2.439 18.175 1.00 0.00 C
20
+ ATOM 19 C TYR A 3 -4.640 2.308 16.787 1.00 0.00 C
21
+ ATOM 20 CB TYR A 3 -5.114 1.117 18.936 1.00 0.00 C
22
+ ATOM 21 O TYR A 3 -5.356 2.162 15.794 1.00 0.00 O
23
+ ATOM 22 CG TYR A 3 -6.148 0.084 18.559 1.00 0.00 C
24
+ ATOM 23 CD1 TYR A 3 -7.368 0.015 19.228 1.00 0.00 C
25
+ ATOM 24 CD2 TYR A 3 -5.907 -0.825 17.534 1.00 0.00 C
26
+ ATOM 25 CE1 TYR A 3 -8.323 -0.936 18.885 1.00 0.00 C
27
+ ATOM 26 CE2 TYR A 3 -6.855 -1.780 17.183 1.00 0.00 C
28
+ ATOM 27 OH TYR A 3 -9.000 -2.772 17.519 1.00 0.00 O
29
+ ATOM 28 CZ TYR A 3 -8.058 -1.828 17.863 1.00 0.00 C
30
+ ATOM 29 N ASN A 4 -3.262 2.608 16.714 1.00 0.00 N
31
+ ATOM 30 CA ASN A 4 -2.461 2.337 15.525 1.00 0.00 C
32
+ ATOM 31 C ASN A 4 -2.395 0.842 15.223 1.00 0.00 C
33
+ ATOM 32 CB ASN A 4 -1.051 2.909 15.684 1.00 0.00 C
34
+ ATOM 33 O ASN A 4 -2.342 0.021 16.140 1.00 0.00 O
35
+ ATOM 34 CG ASN A 4 -1.049 4.414 15.872 1.00 0.00 C
36
+ ATOM 35 ND2 ASN A 4 -0.034 4.925 16.557 1.00 0.00 N
37
+ ATOM 36 OD1 ASN A 4 -1.954 5.111 15.405 1.00 0.00 O
38
+ ATOM 37 N THR A 5 -2.544 0.524 13.919 1.00 0.00 N
39
+ ATOM 38 CA THR A 5 -2.446 -0.879 13.530 1.00 0.00 C
40
+ ATOM 39 C THR A 5 -1.345 -1.076 12.492 1.00 0.00 C
41
+ ATOM 40 CB THR A 5 -3.783 -1.399 12.970 1.00 0.00 C
42
+ ATOM 41 O THR A 5 -0.994 -0.143 11.767 1.00 0.00 O
43
+ ATOM 42 CG2 THR A 5 -4.888 -1.312 14.018 1.00 0.00 C
44
+ ATOM 43 OG1 THR A 5 -4.156 -0.612 11.832 1.00 0.00 O
45
+ ATOM 44 N HIS A 6 -0.770 -2.218 12.577 1.00 0.00 N
46
+ ATOM 45 CA HIS A 6 0.214 -2.702 11.616 1.00 0.00 C
47
+ ATOM 46 C HIS A 6 -0.116 -4.117 11.155 1.00 0.00 C
48
+ ATOM 47 CB HIS A 6 1.619 -2.661 12.220 1.00 0.00 C
49
+ ATOM 48 O HIS A 6 -0.128 -5.050 11.961 1.00 0.00 O
50
+ ATOM 49 CG HIS A 6 2.697 -3.055 11.261 1.00 0.00 C
51
+ ATOM 50 CD2 HIS A 6 3.413 -4.199 11.150 1.00 0.00 C
52
+ ATOM 51 ND1 HIS A 6 3.149 -2.218 10.264 1.00 0.00 N
53
+ ATOM 52 CE1 HIS A 6 4.099 -2.832 9.579 1.00 0.00 C
54
+ ATOM 53 NE2 HIS A 6 4.278 -4.036 10.097 1.00 0.00 N
55
+ ATOM 54 N GLU A 7 -0.382 -4.286 9.867 1.00 0.00 N
56
+ ATOM 55 CA GLU A 7 -0.811 -5.584 9.356 1.00 0.00 C
57
+ ATOM 56 C GLU A 7 0.045 -6.021 8.171 1.00 0.00 C
58
+ ATOM 57 CB GLU A 7 -2.287 -5.541 8.952 1.00 0.00 C
59
+ ATOM 58 O GLU A 7 0.361 -5.213 7.296 1.00 0.00 O
60
+ ATOM 59 CG GLU A 7 -3.240 -5.342 10.121 1.00 0.00 C
61
+ ATOM 60 CD GLU A 7 -4.701 -5.297 9.704 1.00 0.00 C
62
+ ATOM 61 OE1 GLU A 7 -5.587 -5.386 10.584 1.00 0.00 O
63
+ ATOM 62 OE2 GLU A 7 -4.963 -5.174 8.486 1.00 0.00 O
64
+ ATOM 63 N HIS A 8 0.419 -7.223 8.285 1.00 0.00 N
65
+ ATOM 64 CA HIS A 8 1.167 -7.903 7.234 1.00 0.00 C
66
+ ATOM 65 C HIS A 8 0.236 -8.691 6.317 1.00 0.00 C
67
+ ATOM 66 CB HIS A 8 2.218 -8.835 7.840 1.00 0.00 C
68
+ ATOM 67 O HIS A 8 -0.668 -9.384 6.790 1.00 0.00 O
69
+ ATOM 68 CG HIS A 8 3.038 -9.558 6.819 1.00 0.00 C
70
+ ATOM 69 CD2 HIS A 8 3.959 -9.106 5.936 1.00 0.00 C
71
+ ATOM 70 ND1 HIS A 8 2.951 -10.920 6.625 1.00 0.00 N
72
+ ATOM 71 CE1 HIS A 8 3.787 -11.275 5.663 1.00 0.00 C
73
+ ATOM 72 NE2 HIS A 8 4.410 -10.193 5.228 1.00 0.00 N
74
+ ATOM 73 N PHE A 9 0.399 -8.517 4.937 1.00 0.00 N
75
+ ATOM 74 CA PHE A 9 -0.393 -9.342 4.032 1.00 0.00 C
76
+ ATOM 75 C PHE A 9 0.429 -9.757 2.818 1.00 0.00 C
77
+ ATOM 76 CB PHE A 9 -1.652 -8.593 3.583 1.00 0.00 C
78
+ ATOM 77 O PHE A 9 1.455 -9.142 2.517 1.00 0.00 O
79
+ ATOM 78 CG PHE A 9 -1.366 -7.296 2.875 1.00 0.00 C
80
+ ATOM 79 CD1 PHE A 9 -1.153 -6.127 3.596 1.00 0.00 C
81
+ ATOM 80 CD2 PHE A 9 -1.311 -7.246 1.488 1.00 0.00 C
82
+ ATOM 81 CE1 PHE A 9 -0.889 -4.925 2.943 1.00 0.00 C
83
+ ATOM 82 CE2 PHE A 9 -1.047 -6.049 0.829 1.00 0.00 C
84
+ ATOM 83 CZ PHE A 9 -0.837 -4.889 1.558 1.00 0.00 C
85
+ ATOM 84 N ARG A 10 0.178 -10.978 2.316 1.00 0.00 N
86
+ ATOM 85 CA ARG A 10 0.973 -11.510 1.213 1.00 0.00 C
87
+ ATOM 86 C ARG A 10 0.149 -11.587 -0.068 1.00 0.00 C
88
+ ATOM 87 CB ARG A 10 1.525 -12.892 1.566 1.00 0.00 C
89
+ ATOM 88 O ARG A 10 -1.047 -11.885 -0.026 1.00 0.00 O
90
+ ATOM 89 CG ARG A 10 2.623 -12.868 2.618 1.00 0.00 C
91
+ ATOM 90 CD ARG A 10 3.229 -14.247 2.833 1.00 0.00 C
92
+ ATOM 91 NE ARG A 10 4.333 -14.501 1.913 1.00 0.00 N
93
+ ATOM 92 NH1 ARG A 10 5.449 -15.950 3.324 1.00 0.00 N
94
+ ATOM 93 NH2 ARG A 10 6.314 -15.462 1.257 1.00 0.00 N
95
+ ATOM 94 CZ ARG A 10 5.363 -15.304 2.167 1.00 0.00 C
96
+ ATOM 95 N LEU A 11 0.804 -11.094 -1.103 1.00 0.00 N
97
+ ATOM 96 CA LEU A 11 0.320 -11.410 -2.443 1.00 0.00 C
98
+ ATOM 97 C LEU A 11 0.991 -12.670 -2.980 1.00 0.00 C
99
+ ATOM 98 CB LEU A 11 0.572 -10.238 -3.395 1.00 0.00 C
100
+ ATOM 99 O LEU A 11 1.837 -13.263 -2.307 1.00 0.00 O
101
+ ATOM 100 CG LEU A 11 -0.077 -8.906 -3.016 1.00 0.00 C
102
+ ATOM 101 CD1 LEU A 11 0.400 -7.799 -3.952 1.00 0.00 C
103
+ ATOM 102 CD2 LEU A 11 -1.597 -9.023 -3.049 1.00 0.00 C
104
+ ATOM 103 N ASP A 12 0.341 -13.343 -4.077 1.00 0.00 N
105
+ ATOM 104 CA ASP A 12 0.855 -14.552 -4.714 1.00 0.00 C
106
+ ATOM 105 C ASP A 12 2.306 -14.367 -5.151 1.00 0.00 C
107
+ ATOM 106 CB ASP A 12 -0.011 -14.936 -5.915 1.00 0.00 C
108
+ ATOM 107 O ASP A 12 2.690 -14.792 -6.243 1.00 0.00 O
109
+ ATOM 108 CG ASP A 12 -1.397 -15.415 -5.519 1.00 0.00 C
110
+ ATOM 109 OD1 ASP A 12 -1.546 -16.017 -4.434 1.00 0.00 O
111
+ ATOM 110 OD2 ASP A 12 -2.347 -15.191 -6.300 1.00 0.00 O
112
+ ATOM 111 N ASP A 13 3.110 -13.435 -4.576 1.00 0.00 N
113
+ ATOM 112 CA ASP A 13 4.545 -13.254 -4.773 1.00 0.00 C
114
+ ATOM 113 C ASP A 13 5.321 -13.603 -3.505 1.00 0.00 C
115
+ ATOM 114 CB ASP A 13 4.851 -11.817 -5.199 1.00 0.00 C
116
+ ATOM 115 O ASP A 13 5.243 -12.885 -2.506 1.00 0.00 O
117
+ ATOM 116 CG ASP A 13 6.323 -11.586 -5.491 1.00 0.00 C
118
+ ATOM 117 OD1 ASP A 13 7.094 -12.568 -5.553 1.00 0.00 O
119
+ ATOM 118 OD2 ASP A 13 6.716 -10.412 -5.658 1.00 0.00 O
120
+ ATOM 119 N PRO A 14 6.029 -14.766 -3.543 1.00 0.00 N
121
+ ATOM 120 CA PRO A 14 6.658 -15.283 -2.325 1.00 0.00 C
122
+ ATOM 121 C PRO A 14 7.790 -14.392 -1.821 1.00 0.00 C
123
+ ATOM 122 CB PRO A 14 7.191 -16.652 -2.757 1.00 0.00 C
124
+ ATOM 123 O PRO A 14 8.147 -14.448 -0.641 1.00 0.00 O
125
+ ATOM 124 CG PRO A 14 7.284 -16.572 -4.246 1.00 0.00 C
126
+ ATOM 125 CD PRO A 14 6.262 -15.588 -4.736 1.00 0.00 C
127
+ ATOM 126 N TRP A 15 8.193 -13.454 -2.636 1.00 0.00 N
128
+ ATOM 127 CA TRP A 15 9.394 -12.710 -2.274 1.00 0.00 C
129
+ ATOM 128 C TRP A 15 9.039 -11.318 -1.760 1.00 0.00 C
130
+ ATOM 129 CB TRP A 15 10.341 -12.598 -3.472 1.00 0.00 C
131
+ ATOM 130 O TRP A 15 9.853 -10.666 -1.102 1.00 0.00 O
132
+ ATOM 131 CG TRP A 15 10.788 -13.920 -4.020 1.00 0.00 C
133
+ ATOM 132 CD1 TRP A 15 11.486 -14.892 -3.358 1.00 0.00 C
134
+ ATOM 133 CD2 TRP A 15 10.573 -14.413 -5.346 1.00 0.00 C
135
+ ATOM 134 CE2 TRP A 15 11.167 -15.692 -5.418 1.00 0.00 C
136
+ ATOM 135 CE3 TRP A 15 9.934 -13.896 -6.481 1.00 0.00 C
137
+ ATOM 136 NE1 TRP A 15 11.716 -15.961 -4.193 1.00 0.00 N
138
+ ATOM 137 CH2 TRP A 15 10.509 -15.934 -7.678 1.00 0.00 C
139
+ ATOM 138 CZ2 TRP A 15 11.141 -16.463 -6.583 1.00 0.00 C
140
+ ATOM 139 CZ3 TRP A 15 9.909 -14.665 -7.639 1.00 0.00 C
141
+ ATOM 140 N THR A 16 7.868 -10.913 -2.013 1.00 0.00 N
142
+ ATOM 141 CA THR A 16 7.495 -9.538 -1.702 1.00 0.00 C
143
+ ATOM 142 C THR A 16 6.582 -9.488 -0.480 1.00 0.00 C
144
+ ATOM 143 CB THR A 16 6.794 -8.866 -2.898 1.00 0.00 C
145
+ ATOM 144 O THR A 16 5.600 -10.229 -0.402 1.00 0.00 O
146
+ ATOM 145 CG2 THR A 16 6.483 -7.402 -2.602 1.00 0.00 C
147
+ ATOM 146 OG1 THR A 16 7.646 -8.939 -4.047 1.00 0.00 O
148
+ ATOM 147 N GLU A 17 6.987 -8.731 0.402 1.00 0.00 N
149
+ ATOM 148 CA GLU A 17 6.175 -8.520 1.596 1.00 0.00 C
150
+ ATOM 149 C GLU A 17 5.456 -7.175 1.545 1.00 0.00 C
151
+ ATOM 150 CB GLU A 17 7.039 -8.607 2.857 1.00 0.00 C
152
+ ATOM 151 O GLU A 17 6.060 -6.155 1.205 1.00 0.00 O
153
+ ATOM 152 CG GLU A 17 7.672 -9.973 3.076 1.00 0.00 C
154
+ ATOM 153 CD GLU A 17 8.498 -10.055 4.349 1.00 0.00 C
155
+ ATOM 154 OE1 GLU A 17 8.478 -11.113 5.019 1.00 0.00 O
156
+ ATOM 155 OE2 GLU A 17 9.170 -9.053 4.681 1.00 0.00 O
157
+ ATOM 156 N PHE A 18 4.157 -7.211 1.917 1.00 0.00 N
158
+ ATOM 157 CA PHE A 18 3.352 -5.996 1.948 1.00 0.00 C
159
+ ATOM 158 C PHE A 18 2.861 -5.707 3.362 1.00 0.00 C
160
+ ATOM 159 CB PHE A 18 2.160 -6.115 0.993 1.00 0.00 C
161
+ ATOM 160 O PHE A 18 2.413 -6.614 4.066 1.00 0.00 O
162
+ ATOM 161 CG PHE A 18 2.553 -6.268 -0.452 1.00 0.00 C
163
+ ATOM 162 CD1 PHE A 18 2.624 -5.160 -1.288 1.00 0.00 C
164
+ ATOM 163 CD2 PHE A 18 2.851 -7.520 -0.974 1.00 0.00 C
165
+ ATOM 164 CE1 PHE A 18 2.988 -5.299 -2.626 1.00 0.00 C
166
+ ATOM 165 CE2 PHE A 18 3.216 -7.666 -2.310 1.00 0.00 C
167
+ ATOM 166 CZ PHE A 18 3.282 -6.554 -3.134 1.00 0.00 C
168
+ ATOM 167 N TYR A 19 3.014 -4.483 3.756 1.00 0.00 N
169
+ ATOM 168 CA TYR A 19 2.556 -4.040 5.069 1.00 0.00 C
170
+ ATOM 169 C TYR A 19 1.598 -2.861 4.943 1.00 0.00 C
171
+ ATOM 170 CB TYR A 19 3.746 -3.653 5.952 1.00 0.00 C
172
+ ATOM 171 O TYR A 19 1.757 -2.016 4.059 1.00 0.00 O
173
+ ATOM 172 CG TYR A 19 4.807 -4.723 6.042 1.00 0.00 C
174
+ ATOM 173 CD1 TYR A 19 4.770 -5.685 7.049 1.00 0.00 C
175
+ ATOM 174 CD2 TYR A 19 5.849 -4.773 5.122 1.00 0.00 C
176
+ ATOM 175 CE1 TYR A 19 5.747 -6.671 7.138 1.00 0.00 C
177
+ ATOM 176 CE2 TYR A 19 6.831 -5.755 5.201 1.00 0.00 C
178
+ ATOM 177 OH TYR A 19 7.742 -7.672 6.294 1.00 0.00 O
179
+ ATOM 178 CZ TYR A 19 6.772 -6.698 6.211 1.00 0.00 C
180
+ ATOM 179 N ARG A 20 0.661 -2.887 5.726 1.00 0.00 N
181
+ ATOM 180 CA ARG A 20 -0.194 -1.722 5.928 1.00 0.00 C
182
+ ATOM 181 C ARG A 20 -0.080 -1.200 7.357 1.00 0.00 C
183
+ ATOM 182 CB ARG A 20 -1.652 -2.062 5.610 1.00 0.00 C
184
+ ATOM 183 O ARG A 20 -0.319 -1.939 8.314 1.00 0.00 O
185
+ ATOM 184 CG ARG A 20 -2.611 -0.897 5.793 1.00 0.00 C
186
+ ATOM 185 CD ARG A 20 -3.943 -1.349 6.375 1.00 0.00 C
187
+ ATOM 186 NE ARG A 20 -3.783 -1.914 7.712 1.00 0.00 N
188
+ ATOM 187 NH1 ARG A 20 -6.044 -1.973 8.182 1.00 0.00 N
189
+ ATOM 188 NH2 ARG A 20 -4.527 -2.709 9.734 1.00 0.00 N
190
+ ATOM 189 CZ ARG A 20 -4.785 -2.198 8.540 1.00 0.00 C
191
+ ATOM 190 N THR A 21 0.265 0.071 7.504 1.00 0.00 N
192
+ ATOM 191 CA THR A 21 0.341 0.733 8.802 1.00 0.00 C
193
+ ATOM 192 C THR A 21 -0.671 1.872 8.889 1.00 0.00 C
194
+ ATOM 193 CB THR A 21 1.756 1.278 9.069 1.00 0.00 C
195
+ ATOM 194 O THR A 21 -0.741 2.716 7.993 1.00 0.00 O
196
+ ATOM 195 CG2 THR A 21 1.849 1.911 10.454 1.00 0.00 C
197
+ ATOM 196 OG1 THR A 21 2.700 0.204 8.982 1.00 0.00 O
198
+ ATOM 197 N LEU A 22 -1.459 1.839 9.957 1.00 0.00 N
199
+ ATOM 198 CA LEU A 22 -2.458 2.875 10.198 1.00 0.00 C
200
+ ATOM 199 C LEU A 22 -2.082 3.722 11.409 1.00 0.00 C
201
+ ATOM 200 CB LEU A 22 -3.840 2.250 10.406 1.00 0.00 C
202
+ ATOM 201 O LEU A 22 -1.766 3.184 12.473 1.00 0.00 O
203
+ ATOM 202 CG LEU A 22 -5.013 3.224 10.530 1.00 0.00 C
204
+ ATOM 203 CD1 LEU A 22 -6.236 2.675 9.804 1.00 0.00 C
205
+ ATOM 204 CD2 LEU A 22 -5.331 3.493 11.997 1.00 0.00 C
206
+ ATOM 205 N ASN A 23 -2.010 5.058 11.207 1.00 0.00 N
207
+ ATOM 206 CA ASN A 23 -1.916 6.025 12.296 1.00 0.00 C
208
+ ATOM 207 C ASN A 23 -3.262 6.686 12.578 1.00 0.00 C
209
+ ATOM 208 CB ASN A 23 -0.859 7.086 11.982 1.00 0.00 C
210
+ ATOM 209 O ASN A 23 -3.700 7.558 11.826 1.00 0.00 O
211
+ ATOM 210 CG ASN A 23 -0.544 7.968 13.174 1.00 0.00 C
212
+ ATOM 211 ND2 ASN A 23 0.738 8.240 13.389 1.00 0.00 N
213
+ ATOM 212 OD1 ASN A 23 -1.445 8.400 13.897 1.00 0.00 O
214
+ ATOM 213 N ALA A 24 -3.964 6.163 13.657 1.00 0.00 N
215
+ ATOM 214 CA ALA A 24 -5.343 6.534 13.965 1.00 0.00 C
216
+ ATOM 215 C ALA A 24 -5.445 8.012 14.331 1.00 0.00 C
217
+ ATOM 216 CB ALA A 24 -5.885 5.669 15.101 1.00 0.00 C
218
+ ATOM 217 O ALA A 24 -6.448 8.664 14.033 1.00 0.00 O
219
+ ATOM 218 N ARG A 25 -4.362 8.538 14.892 1.00 0.00 N
220
+ ATOM 219 CA ARG A 25 -4.384 9.932 15.323 1.00 0.00 C
221
+ ATOM 220 C ARG A 25 -4.331 10.876 14.126 1.00 0.00 C
222
+ ATOM 221 CB ARG A 25 -3.219 10.220 16.271 1.00 0.00 C
223
+ ATOM 222 O ARG A 25 -5.124 11.815 14.034 1.00 0.00 O
224
+ ATOM 223 CG ARG A 25 -3.211 11.637 16.824 1.00 0.00 C
225
+ ATOM 224 CD ARG A 25 -2.139 11.820 17.889 1.00 0.00 C
226
+ ATOM 225 NE ARG A 25 -2.145 13.176 18.432 1.00 0.00 N
227
+ ATOM 226 NH1 ARG A 25 -0.364 12.832 19.863 1.00 0.00 N
228
+ ATOM 227 NH2 ARG A 25 -1.397 14.877 19.781 1.00 0.00 N
229
+ ATOM 228 CZ ARG A 25 -1.302 13.625 19.358 1.00 0.00 C
230
+ ATOM 229 N SER A 26 -3.376 10.637 13.145 1.00 0.00 N
231
+ ATOM 230 CA SER A 26 -3.171 11.513 11.996 1.00 0.00 C
232
+ ATOM 231 C SER A 26 -4.052 11.100 10.822 1.00 0.00 C
233
+ ATOM 232 CB SER A 26 -1.702 11.504 11.569 1.00 0.00 C
234
+ ATOM 233 O SER A 26 -4.116 11.801 9.810 1.00 0.00 O
235
+ ATOM 234 OG SER A 26 -1.312 10.211 11.138 1.00 0.00 O
236
+ ATOM 235 N LYS A 27 -4.836 10.033 11.095 1.00 0.00 N
237
+ ATOM 236 CA LYS A 27 -5.656 9.457 10.033 1.00 0.00 C
238
+ ATOM 237 C LYS A 27 -4.835 9.227 8.768 1.00 0.00 C
239
+ ATOM 238 CB LYS A 27 -6.850 10.362 9.726 1.00 0.00 C
240
+ ATOM 239 O LYS A 27 -5.263 9.587 7.669 1.00 0.00 O
241
+ ATOM 240 CG LYS A 27 -7.799 10.556 10.899 1.00 0.00 C
242
+ ATOM 241 CD LYS A 27 -8.515 9.260 11.258 1.00 0.00 C
243
+ ATOM 242 CE LYS A 27 -9.598 9.490 12.304 1.00 0.00 C
244
+ ATOM 243 NZ LYS A 27 -10.257 8.213 12.709 1.00 0.00 N
245
+ ATOM 244 N THR A 28 -3.684 8.620 8.927 1.00 0.00 N
246
+ ATOM 245 CA THR A 28 -2.783 8.326 7.817 1.00 0.00 C
247
+ ATOM 246 C THR A 28 -2.604 6.820 7.650 1.00 0.00 C
248
+ ATOM 247 CB THR A 28 -1.409 8.990 8.023 1.00 0.00 C
249
+ ATOM 248 O THR A 28 -2.497 6.089 8.637 1.00 0.00 O
250
+ ATOM 249 CG2 THR A 28 -0.480 8.706 6.848 1.00 0.00 C
251
+ ATOM 250 OG1 THR A 28 -1.584 10.407 8.148 1.00 0.00 O
252
+ ATOM 251 N CYS A 29 -2.718 6.348 6.470 1.00 0.00 N
253
+ ATOM 252 CA CYS A 29 -2.452 4.960 6.109 1.00 0.00 C
254
+ ATOM 253 C CYS A 29 -1.190 4.850 5.262 1.00 0.00 C
255
+ ATOM 254 CB CYS A 29 -3.639 4.366 5.352 1.00 0.00 C
256
+ ATOM 255 O CYS A 29 -1.018 5.597 4.297 1.00 0.00 O
257
+ ATOM 256 SG CYS A 29 -3.561 2.571 5.170 1.00 0.00 S
258
+ ATOM 257 N ILE A 30 -0.309 3.925 5.603 1.00 0.00 N
259
+ ATOM 258 CA ILE A 30 0.942 3.715 4.882 1.00 0.00 C
260
+ ATOM 259 C ILE A 30 0.989 2.290 4.335 1.00 0.00 C
261
+ ATOM 260 CB ILE A 30 2.166 3.984 5.786 1.00 0.00 C
262
+ ATOM 261 O ILE A 30 0.818 1.325 5.084 1.00 0.00 O
263
+ ATOM 262 CG1 ILE A 30 2.083 5.388 6.394 1.00 0.00 C
264
+ ATOM 263 CG2 ILE A 30 3.468 3.803 4.999 1.00 0.00 C
265
+ ATOM 264 CD1 ILE A 30 3.072 5.632 7.526 1.00 0.00 C
266
+ ATOM 265 N VAL A 31 1.098 2.123 3.077 1.00 0.00 N
267
+ ATOM 266 CA VAL A 31 1.317 0.825 2.449 1.00 0.00 C
268
+ ATOM 267 C VAL A 31 2.785 0.684 2.053 1.00 0.00 C
269
+ ATOM 268 CB VAL A 31 0.411 0.632 1.212 1.00 0.00 C
270
+ ATOM 269 O VAL A 31 3.310 1.498 1.289 1.00 0.00 O
271
+ ATOM 270 CG1 VAL A 31 0.662 -0.729 0.566 1.00 0.00 C
272
+ ATOM 271 CG2 VAL A 31 -1.059 0.779 1.601 1.00 0.00 C
273
+ ATOM 272 N THR A 32 3.455 -0.320 2.592 1.00 0.00 N
274
+ ATOM 273 CA THR A 32 4.880 -0.565 2.391 1.00 0.00 C
275
+ ATOM 274 C THR A 32 5.102 -1.860 1.616 1.00 0.00 C
276
+ ATOM 275 CB THR A 32 5.630 -0.631 3.734 1.00 0.00 C
277
+ ATOM 276 O THR A 32 4.464 -2.876 1.899 1.00 0.00 O
278
+ ATOM 277 CG2 THR A 32 7.129 -0.812 3.519 1.00 0.00 C
279
+ ATOM 278 OG1 THR A 32 5.405 0.583 4.461 1.00 0.00 O
280
+ ATOM 279 N VAL A 33 5.927 -1.787 0.613 1.00 0.00 N
281
+ ATOM 280 CA VAL A 33 6.436 -2.966 -0.081 1.00 0.00 C
282
+ ATOM 281 C VAL A 33 7.879 -3.231 0.342 1.00 0.00 C
283
+ ATOM 282 CB VAL A 33 6.351 -2.802 -1.615 1.00 0.00 C
284
+ ATOM 283 O VAL A 33 8.737 -2.352 0.230 1.00 0.00 O
285
+ ATOM 284 CG1 VAL A 33 6.835 -4.067 -2.321 1.00 0.00 C
286
+ ATOM 285 CG2 VAL A 33 4.922 -2.464 -2.036 1.00 0.00 C
287
+ ATOM 286 N ASP A 34 8.169 -4.392 0.805 1.00 0.00 N
288
+ ATOM 287 CA ASP A 34 9.479 -4.825 1.284 1.00 0.00 C
289
+ ATOM 288 C ASP A 34 10.014 -5.983 0.446 1.00 0.00 C
290
+ ATOM 289 CB ASP A 34 9.404 -5.231 2.757 1.00 0.00 C
291
+ ATOM 290 O ASP A 34 9.513 -7.106 0.537 1.00 0.00 O
292
+ ATOM 291 CG ASP A 34 10.762 -5.570 3.348 1.00 0.00 C
293
+ ATOM 292 OD1 ASP A 34 11.727 -5.774 2.581 1.00 0.00 O
294
+ ATOM 293 OD2 ASP A 34 10.866 -5.636 4.592 1.00 0.00 O
295
+ ATOM 294 N GLN A 35 11.073 -5.726 -0.286 1.00 0.00 N
296
+ ATOM 295 CA GLN A 35 11.675 -6.742 -1.144 1.00 0.00 C
297
+ ATOM 296 C GLN A 35 13.024 -7.197 -0.595 1.00 0.00 C
298
+ ATOM 297 CB GLN A 35 11.840 -6.212 -2.569 1.00 0.00 C
299
+ ATOM 298 O GLN A 35 13.897 -7.623 -1.353 1.00 0.00 O
300
+ ATOM 299 CG GLN A 35 10.522 -5.921 -3.274 1.00 0.00 C
301
+ ATOM 300 CD GLN A 35 9.870 -7.169 -3.838 1.00 0.00 C
302
+ ATOM 301 NE2 GLN A 35 8.547 -7.240 -3.747 1.00 0.00 N
303
+ ATOM 302 OE1 GLN A 35 10.551 -8.062 -4.353 1.00 0.00 O
304
+ ATOM 303 N THR A 36 13.298 -6.855 0.668 1.00 0.00 N
305
+ ATOM 304 CA THR A 36 14.573 -7.196 1.289 1.00 0.00 C
306
+ ATOM 305 C THR A 36 14.903 -8.670 1.071 1.00 0.00 C
307
+ ATOM 306 CB THR A 36 14.560 -6.888 2.797 1.00 0.00 C
308
+ ATOM 307 O THR A 36 16.070 -9.034 0.913 1.00 0.00 O
309
+ ATOM 308 CG2 THR A 36 15.919 -7.172 3.428 1.00 0.00 C
310
+ ATOM 309 OG1 THR A 36 14.233 -5.506 2.994 1.00 0.00 O
311
+ ATOM 310 N ASN A 37 13.791 -9.502 0.940 1.00 0.00 N
312
+ ATOM 311 CA ASN A 37 14.006 -10.939 0.811 1.00 0.00 C
313
+ ATOM 312 C ASN A 37 13.990 -11.380 -0.650 1.00 0.00 C
314
+ ATOM 313 CB ASN A 37 12.955 -11.713 1.610 1.00 0.00 C
315
+ ATOM 314 O ASN A 37 13.982 -12.577 -0.942 1.00 0.00 O
316
+ ATOM 315 CG ASN A 37 13.047 -11.453 3.101 1.00 0.00 C
317
+ ATOM 316 ND2 ASN A 37 11.898 -11.333 3.754 1.00 0.00 N
318
+ ATOM 317 OD1 ASN A 37 14.143 -11.360 3.661 1.00 0.00 O
319
+ ATOM 318 N ASN A 38 13.915 -10.430 -1.517 1.00 0.00 N
320
+ ATOM 319 CA ASN A 38 13.930 -10.725 -2.946 1.00 0.00 C
321
+ ATOM 320 C ASN A 38 15.353 -10.908 -3.466 1.00 0.00 C
322
+ ATOM 321 CB ASN A 38 13.216 -9.621 -3.730 1.00 0.00 C
323
+ ATOM 322 O ASN A 38 16.154 -9.972 -3.437 1.00 0.00 O
324
+ ATOM 323 CG ASN A 38 12.940 -10.011 -5.168 1.00 0.00 C
325
+ ATOM 324 ND2 ASN A 38 12.018 -9.303 -5.808 1.00 0.00 N
326
+ ATOM 325 OD1 ASN A 38 13.551 -10.942 -5.700 1.00 0.00 O
327
+ ATOM 326 N PRO A 39 15.718 -12.109 -3.794 1.00 0.00 N
328
+ ATOM 327 CA PRO A 39 17.079 -12.430 -4.229 1.00 0.00 C
329
+ ATOM 328 C PRO A 39 17.430 -11.806 -5.578 1.00 0.00 C
330
+ ATOM 329 CB PRO A 39 17.068 -13.958 -4.322 1.00 0.00 C
331
+ ATOM 330 O PRO A 39 18.598 -11.804 -5.975 1.00 0.00 O
332
+ ATOM 331 CG PRO A 39 15.632 -14.318 -4.529 1.00 0.00 C
333
+ ATOM 332 CD PRO A 39 14.778 -13.272 -3.872 1.00 0.00 C
334
+ ATOM 333 N GLN A 40 16.394 -11.251 -6.150 1.00 0.00 N
335
+ ATOM 334 CA GLN A 40 16.646 -10.746 -7.496 1.00 0.00 C
336
+ ATOM 335 C GLN A 40 17.358 -9.397 -7.452 1.00 0.00 C
337
+ ATOM 336 CB GLN A 40 15.338 -10.625 -8.278 1.00 0.00 C
338
+ ATOM 337 O GLN A 40 16.883 -8.459 -6.810 1.00 0.00 O
339
+ ATOM 338 CG GLN A 40 14.617 -11.951 -8.481 1.00 0.00 C
340
+ ATOM 339 CD GLN A 40 13.302 -11.796 -9.221 1.00 0.00 C
341
+ ATOM 340 NE2 GLN A 40 12.511 -12.864 -9.254 1.00 0.00 N
342
+ ATOM 341 OE1 GLN A 40 12.999 -10.727 -9.759 1.00 0.00 O
343
+ ATOM 342 N GLU A 41 18.658 -9.506 -7.750 1.00 0.00 N
344
+ ATOM 343 CA GLU A 41 19.475 -8.300 -7.847 1.00 0.00 C
345
+ ATOM 344 C GLU A 41 19.146 -7.510 -9.110 1.00 0.00 C
346
+ ATOM 345 CB GLU A 41 20.964 -8.656 -7.822 1.00 0.00 C
347
+ ATOM 346 O GLU A 41 18.572 -8.053 -10.056 1.00 0.00 O
348
+ ATOM 347 CG GLU A 41 21.428 -9.272 -6.510 1.00 0.00 C
349
+ ATOM 348 CD GLU A 41 22.928 -9.511 -6.457 1.00 0.00 C
350
+ ATOM 349 OE1 GLU A 41 23.442 -9.906 -5.385 1.00 0.00 O
351
+ ATOM 350 OE2 GLU A 41 23.595 -9.301 -7.494 1.00 0.00 O
352
+ ATOM 351 N ASN A 42 18.825 -6.245 -8.943 1.00 0.00 N
353
+ ATOM 352 CA ASN A 42 18.748 -5.279 -10.034 1.00 0.00 C
354
+ ATOM 353 C ASN A 42 17.322 -5.141 -10.561 1.00 0.00 C
355
+ ATOM 354 CB ASN A 42 19.697 -5.672 -11.168 1.00 0.00 C
356
+ ATOM 355 O ASN A 42 17.113 -4.986 -11.765 1.00 0.00 O
357
+ ATOM 356 CG ASN A 42 21.153 -5.643 -10.747 1.00 0.00 C
358
+ ATOM 357 ND2 ASN A 42 21.930 -6.604 -11.232 1.00 0.00 N
359
+ ATOM 358 OD1 ASN A 42 21.576 -4.764 -9.991 1.00 0.00 O
360
+ ATOM 359 N MET A 43 16.406 -5.433 -9.710 1.00 0.00 N
361
+ ATOM 360 CA MET A 43 15.006 -5.237 -10.074 1.00 0.00 C
362
+ ATOM 361 C MET A 43 14.398 -4.075 -9.295 1.00 0.00 C
363
+ ATOM 362 CB MET A 43 14.201 -6.513 -9.824 1.00 0.00 C
364
+ ATOM 363 O MET A 43 14.758 -3.842 -8.140 1.00 0.00 O
365
+ ATOM 364 CG MET A 43 14.599 -7.674 -10.720 1.00 0.00 C
366
+ ATOM 365 SD MET A 43 13.455 -9.102 -10.570 1.00 0.00 S
367
+ ATOM 366 CE MET A 43 13.902 -9.697 -8.916 1.00 0.00 C
368
+ ATOM 367 N GLY A 44 13.752 -3.201 -10.052 1.00 0.00 N
369
+ ATOM 368 CA GLY A 44 12.963 -2.172 -9.393 1.00 0.00 C
370
+ ATOM 369 C GLY A 44 11.483 -2.499 -9.336 1.00 0.00 C
371
+ ATOM 370 O GLY A 44 11.043 -3.506 -9.895 1.00 0.00 O
372
+ ATOM 371 N PHE A 45 10.800 -1.846 -8.459 1.00 0.00 N
373
+ ATOM 372 CA PHE A 45 9.356 -2.022 -8.356 1.00 0.00 C
374
+ ATOM 373 C PHE A 45 8.664 -0.687 -8.106 1.00 0.00 C
375
+ ATOM 374 CB PHE A 45 9.013 -3.009 -7.235 1.00 0.00 C
376
+ ATOM 375 O PHE A 45 9.316 0.302 -7.767 1.00 0.00 O
377
+ ATOM 376 CG PHE A 45 9.419 -2.537 -5.865 1.00 0.00 C
378
+ ATOM 377 CD1 PHE A 45 10.703 -2.769 -5.387 1.00 0.00 C
379
+ ATOM 378 CD2 PHE A 45 8.516 -1.860 -5.055 1.00 0.00 C
380
+ ATOM 379 CE1 PHE A 45 11.082 -2.334 -4.119 1.00 0.00 C
381
+ ATOM 380 CE2 PHE A 45 8.888 -1.422 -3.787 1.00 0.00 C
382
+ ATOM 381 CZ PHE A 45 10.170 -1.661 -3.321 1.00 0.00 C
383
+ ATOM 382 N ALA A 46 7.363 -0.706 -8.425 1.00 0.00 N
384
+ ATOM 383 CA ALA A 46 6.515 0.457 -8.177 1.00 0.00 C
385
+ ATOM 384 C ALA A 46 5.186 0.044 -7.550 1.00 0.00 C
386
+ ATOM 385 CB ALA A 46 6.271 1.225 -9.474 1.00 0.00 C
387
+ ATOM 386 O ALA A 46 4.642 -1.014 -7.875 1.00 0.00 O
388
+ ATOM 387 N ILE A 47 4.724 0.753 -6.545 1.00 0.00 N
389
+ ATOM 388 CA ILE A 47 3.375 0.616 -6.007 1.00 0.00 C
390
+ ATOM 389 C ILE A 47 2.622 1.935 -6.157 1.00 0.00 C
391
+ ATOM 390 CB ILE A 47 3.400 0.179 -4.525 1.00 0.00 C
392
+ ATOM 391 O ILE A 47 3.171 3.005 -5.882 1.00 0.00 O
393
+ ATOM 392 CG1 ILE A 47 4.194 1.185 -3.684 1.00 0.00 C
394
+ ATOM 393 CG2 ILE A 47 3.983 -1.231 -4.387 1.00 0.00 C
395
+ ATOM 394 CD1 ILE A 47 4.036 0.998 -2.182 1.00 0.00 C
396
+ ATOM 395 N MET A 48 1.411 1.770 -6.657 1.00 0.00 N
397
+ ATOM 396 CA MET A 48 0.618 2.953 -6.979 1.00 0.00 C
398
+ ATOM 397 C MET A 48 -0.783 2.847 -6.387 1.00 0.00 C
399
+ ATOM 398 CB MET A 48 0.533 3.148 -8.494 1.00 0.00 C
400
+ ATOM 399 O MET A 48 -1.425 1.800 -6.483 1.00 0.00 O
401
+ ATOM 400 CG MET A 48 -0.272 4.368 -8.911 1.00 0.00 C
402
+ ATOM 401 SD MET A 48 -0.347 4.571 -10.733 1.00 0.00 S
403
+ ATOM 402 CE MET A 48 1.088 3.577 -11.226 1.00 0.00 C
404
+ ATOM 403 N LEU A 49 -1.241 3.937 -5.762 1.00 0.00 N
405
+ ATOM 404 CA LEU A 49 -2.646 4.072 -5.392 1.00 0.00 C
406
+ ATOM 405 C LEU A 49 -3.494 4.445 -6.605 1.00 0.00 C
407
+ ATOM 406 CB LEU A 49 -2.813 5.127 -4.295 1.00 0.00 C
408
+ ATOM 407 O LEU A 49 -3.363 5.546 -7.144 1.00 0.00 O
409
+ ATOM 408 CG LEU A 49 -4.234 5.341 -3.771 1.00 0.00 C
410
+ ATOM 409 CD1 LEU A 49 -4.792 4.039 -3.206 1.00 0.00 C
411
+ ATOM 410 CD2 LEU A 49 -4.255 6.441 -2.715 1.00 0.00 C
412
+ ATOM 411 N ILE A 50 -4.427 3.567 -7.073 1.00 0.00 N
413
+ ATOM 412 CA ILE A 50 -5.183 3.724 -8.311 1.00 0.00 C
414
+ ATOM 413 C ILE A 50 -6.056 4.974 -8.227 1.00 0.00 C
415
+ ATOM 414 CB ILE A 50 -6.053 2.481 -8.603 1.00 0.00 C
416
+ ATOM 415 O ILE A 50 -6.663 5.247 -7.188 1.00 0.00 O
417
+ ATOM 416 CG1 ILE A 50 -5.166 1.261 -8.875 1.00 0.00 C
418
+ ATOM 417 CG2 ILE A 50 -6.995 2.746 -9.780 1.00 0.00 C
419
+ ATOM 418 CD1 ILE A 50 -5.936 -0.045 -9.017 1.00 0.00 C
420
+ ATOM 419 N ASP A 51 -6.069 5.759 -9.226 1.00 0.00 N
421
+ ATOM 420 CA ASP A 51 -6.921 6.925 -9.438 1.00 0.00 C
422
+ ATOM 421 C ASP A 51 -6.398 8.135 -8.667 1.00 0.00 C
423
+ ATOM 422 CB ASP A 51 -8.362 6.620 -9.025 1.00 0.00 C
424
+ ATOM 423 O ASP A 51 -7.171 9.018 -8.290 1.00 0.00 O
425
+ ATOM 424 CG ASP A 51 -9.006 5.537 -9.873 1.00 0.00 C
426
+ ATOM 425 OD1 ASP A 51 -8.719 5.462 -11.087 1.00 0.00 O
427
+ ATOM 426 OD2 ASP A 51 -9.809 4.753 -9.323 1.00 0.00 O
428
+ ATOM 427 N THR A 52 -5.116 8.064 -8.253 1.00 0.00 N
429
+ ATOM 428 CA THR A 52 -4.446 9.180 -7.595 1.00 0.00 C
430
+ ATOM 429 C THR A 52 -3.068 9.419 -8.204 1.00 0.00 C
431
+ ATOM 430 CB THR A 52 -4.305 8.933 -6.081 1.00 0.00 C
432
+ ATOM 431 O THR A 52 -2.650 8.695 -9.111 1.00 0.00 O
433
+ ATOM 432 CG2 THR A 52 -5.582 8.334 -5.500 1.00 0.00 C
434
+ ATOM 433 OG1 THR A 52 -3.218 8.029 -5.850 1.00 0.00 O
435
+ ATOM 434 N ASP A 53 -2.398 10.489 -7.806 1.00 0.00 N
436
+ ATOM 435 CA ASP A 53 -1.026 10.761 -8.225 1.00 0.00 C
437
+ ATOM 436 C ASP A 53 -0.028 10.312 -7.160 1.00 0.00 C
438
+ ATOM 437 CB ASP A 53 -0.842 12.251 -8.524 1.00 0.00 C
439
+ ATOM 438 O ASP A 53 1.125 10.750 -7.159 1.00 0.00 O
440
+ ATOM 439 CG ASP A 53 -1.641 12.717 -9.728 1.00 0.00 C
441
+ ATOM 440 OD1 ASP A 53 -1.728 11.972 -10.728 1.00 0.00 O
442
+ ATOM 441 OD2 ASP A 53 -2.186 13.841 -9.678 1.00 0.00 O
443
+ ATOM 442 N ILE A 54 -0.442 9.451 -6.327 1.00 0.00 N
444
+ ATOM 443 CA ILE A 54 0.406 8.993 -5.232 1.00 0.00 C
445
+ ATOM 444 C ILE A 54 1.056 7.662 -5.603 1.00 0.00 C
446
+ ATOM 445 CB ILE A 54 -0.395 8.850 -3.918 1.00 0.00 C
447
+ ATOM 446 O ILE A 54 0.362 6.682 -5.881 1.00 0.00 O
448
+ ATOM 447 CG1 ILE A 54 -1.083 10.174 -3.566 1.00 0.00 C
449
+ ATOM 448 CG2 ILE A 54 0.516 8.386 -2.778 1.00 0.00 C
450
+ ATOM 449 CD1 ILE A 54 -2.074 10.069 -2.414 1.00 0.00 C
451
+ ATOM 450 N TRP A 55 2.279 7.611 -5.677 1.00 0.00 N
452
+ ATOM 451 CA TRP A 55 2.996 6.392 -6.034 1.00 0.00 C
453
+ ATOM 452 C TRP A 55 4.426 6.427 -5.505 1.00 0.00 C
454
+ ATOM 453 CB TRP A 55 3.004 6.197 -7.552 1.00 0.00 C
455
+ ATOM 454 O TRP A 55 4.907 7.474 -5.065 1.00 0.00 O
456
+ ATOM 455 CG TRP A 55 3.649 7.321 -8.307 1.00 0.00 C
457
+ ATOM 456 CD1 TRP A 55 3.095 8.533 -8.612 1.00 0.00 C
458
+ ATOM 457 CD2 TRP A 55 4.970 7.333 -8.857 1.00 0.00 C
459
+ ATOM 458 CE2 TRP A 55 5.150 8.587 -9.482 1.00 0.00 C
460
+ ATOM 459 CE3 TRP A 55 6.020 6.405 -8.881 1.00 0.00 C
461
+ ATOM 460 NE1 TRP A 55 3.993 9.299 -9.318 1.00 0.00 N
462
+ ATOM 461 CH2 TRP A 55 7.351 8.010 -10.134 1.00 0.00 C
463
+ ATOM 462 CZ2 TRP A 55 6.341 8.936 -10.125 1.00 0.00 C
464
+ ATOM 463 CZ3 TRP A 55 7.203 6.755 -9.522 1.00 0.00 C
465
+ ATOM 464 N CYS A 56 5.000 5.260 -5.391 1.00 0.00 N
466
+ ATOM 465 CA CYS A 56 6.389 5.078 -4.984 1.00 0.00 C
467
+ ATOM 466 C CYS A 56 7.106 4.104 -5.911 1.00 0.00 C
468
+ ATOM 467 CB CYS A 56 6.464 4.574 -3.543 1.00 0.00 C
469
+ ATOM 468 O CYS A 56 6.576 3.038 -6.228 1.00 0.00 O
470
+ ATOM 469 SG CYS A 56 8.149 4.302 -2.954 1.00 0.00 S
471
+ ATOM 470 N MET A 57 8.257 4.518 -6.421 1.00 0.00 N
472
+ ATOM 471 CA MET A 57 9.125 3.657 -7.220 1.00 0.00 C
473
+ ATOM 472 C MET A 57 10.511 3.550 -6.593 1.00 0.00 C
474
+ ATOM 473 CB MET A 57 9.237 4.186 -8.651 1.00 0.00 C
475
+ ATOM 474 O MET A 57 11.049 4.539 -6.094 1.00 0.00 O
476
+ ATOM 475 CG MET A 57 7.924 4.169 -9.416 1.00 0.00 C
477
+ ATOM 476 SD MET A 57 8.115 4.724 -11.154 1.00 0.00 S
478
+ ATOM 477 CE MET A 57 9.877 4.372 -11.410 1.00 0.00 C
479
+ ATOM 478 N SER A 58 10.989 2.288 -6.576 1.00 0.00 N
480
+ ATOM 479 CA SER A 58 12.264 2.156 -5.879 1.00 0.00 C
481
+ ATOM 480 C SER A 58 13.082 0.996 -6.438 1.00 0.00 C
482
+ ATOM 481 CB SER A 58 12.038 1.955 -4.380 1.00 0.00 C
483
+ ATOM 482 O SER A 58 12.521 -0.003 -6.894 1.00 0.00 O
484
+ ATOM 483 OG SER A 58 13.273 1.808 -3.701 1.00 0.00 O
485
+ ATOM 484 N PHE A 59 14.446 1.264 -6.479 1.00 0.00 N
486
+ ATOM 485 CA PHE A 59 15.391 0.171 -6.677 1.00 0.00 C
487
+ ATOM 486 C PHE A 59 15.963 -0.298 -5.345 1.00 0.00 C
488
+ ATOM 487 CB PHE A 59 16.524 0.601 -7.614 1.00 0.00 C
489
+ ATOM 488 O PHE A 59 16.693 -1.291 -5.292 1.00 0.00 O
490
+ ATOM 489 CG PHE A 59 16.135 0.625 -9.067 1.00 0.00 C
491
+ ATOM 490 CD1 PHE A 59 16.399 -0.463 -9.890 1.00 0.00 C
492
+ ATOM 491 CD2 PHE A 59 15.504 1.736 -9.611 1.00 0.00 C
493
+ ATOM 492 CE1 PHE A 59 16.040 -0.444 -11.235 1.00 0.00 C
494
+ ATOM 493 CE2 PHE A 59 15.143 1.763 -10.955 1.00 0.00 C
495
+ ATOM 494 CZ PHE A 59 15.412 0.672 -11.766 1.00 0.00 C
496
+ ATOM 495 N ALA A 60 15.648 0.435 -4.357 1.00 0.00 N
497
+ ATOM 496 CA ALA A 60 15.954 0.014 -2.992 1.00 0.00 C
498
+ ATOM 497 C ALA A 60 14.951 -1.025 -2.500 1.00 0.00 C
499
+ ATOM 498 CB ALA A 60 15.969 1.219 -2.054 1.00 0.00 C
500
+ ATOM 499 O ALA A 60 13.911 -1.239 -3.128 1.00 0.00 O
501
+ ATOM 500 N PRO A 61 15.294 -1.798 -1.450 1.00 0.00 N
502
+ ATOM 501 CA PRO A 61 14.452 -2.918 -1.021 1.00 0.00 C
503
+ ATOM 502 C PRO A 61 13.119 -2.462 -0.431 1.00 0.00 C
504
+ ATOM 503 CB PRO A 61 15.308 -3.616 0.038 1.00 0.00 C
505
+ ATOM 504 O PRO A 61 12.211 -3.276 -0.247 1.00 0.00 O
506
+ ATOM 505 CG PRO A 61 16.273 -2.571 0.500 1.00 0.00 C
507
+ ATOM 506 CD PRO A 61 16.512 -1.612 -0.630 1.00 0.00 C
508
+ ATOM 507 N LEU A 62 12.946 -1.118 -0.268 1.00 0.00 N
509
+ ATOM 508 CA LEU A 62 11.732 -0.669 0.405 1.00 0.00 C
510
+ ATOM 509 C LEU A 62 11.131 0.538 -0.307 1.00 0.00 C
511
+ ATOM 510 CB LEU A 62 12.027 -0.321 1.866 1.00 0.00 C
512
+ ATOM 511 O LEU A 62 11.859 1.434 -0.741 1.00 0.00 O
513
+ ATOM 512 CG LEU A 62 12.417 -1.486 2.776 1.00 0.00 C
514
+ ATOM 513 CD1 LEU A 62 12.964 -0.965 4.101 1.00 0.00 C
515
+ ATOM 514 CD2 LEU A 62 11.223 -2.406 3.010 1.00 0.00 C
516
+ ATOM 515 N CYS A 63 9.795 0.579 -0.388 1.00 0.00 N
517
+ ATOM 516 CA CYS A 63 9.049 1.716 -0.916 1.00 0.00 C
518
+ ATOM 517 C CYS A 63 7.730 1.894 -0.174 1.00 0.00 C
519
+ ATOM 518 CB CYS A 63 8.783 1.534 -2.411 1.00 0.00 C
520
+ ATOM 519 O CYS A 63 7.066 0.914 0.166 1.00 0.00 O
521
+ ATOM 520 SG CYS A 63 8.129 3.012 -3.218 1.00 0.00 S
522
+ ATOM 521 N GLU A 64 7.273 3.137 0.060 1.00 0.00 N
523
+ ATOM 522 CA GLU A 64 6.045 3.385 0.811 1.00 0.00 C
524
+ ATOM 523 C GLU A 64 5.144 4.378 0.083 1.00 0.00 C
525
+ ATOM 524 CB GLU A 64 6.369 3.900 2.216 1.00 0.00 C
526
+ ATOM 525 O GLU A 64 5.631 5.312 -0.557 1.00 0.00 O
527
+ ATOM 526 CG GLU A 64 7.158 2.914 3.066 1.00 0.00 C
528
+ ATOM 527 CD GLU A 64 7.576 3.483 4.412 1.00 0.00 C
529
+ ATOM 528 OE1 GLU A 64 8.127 2.728 5.245 1.00 0.00 O
530
+ ATOM 529 OE2 GLU A 64 7.351 4.693 4.636 1.00 0.00 O
531
+ ATOM 530 N VAL A 65 3.891 4.163 0.164 1.00 0.00 N
532
+ ATOM 531 CA VAL A 65 2.869 5.126 -0.231 1.00 0.00 C
533
+ ATOM 532 C VAL A 65 2.061 5.553 0.992 1.00 0.00 C
534
+ ATOM 533 CB VAL A 65 1.931 4.547 -1.314 1.00 0.00 C
535
+ ATOM 534 O VAL A 65 1.446 4.719 1.660 1.00 0.00 O
536
+ ATOM 535 CG1 VAL A 65 0.787 5.515 -1.612 1.00 0.00 C
537
+ ATOM 536 CG2 VAL A 65 2.716 4.234 -2.587 1.00 0.00 C
538
+ ATOM 537 N LYS A 66 2.125 6.823 1.301 1.00 0.00 N
539
+ ATOM 538 CA LYS A 66 1.412 7.397 2.439 1.00 0.00 C
540
+ ATOM 539 C LYS A 66 0.230 8.244 1.976 1.00 0.00 C
541
+ ATOM 540 CB LYS A 66 2.357 8.240 3.295 1.00 0.00 C
542
+ ATOM 541 O LYS A 66 0.372 9.083 1.084 1.00 0.00 O
543
+ ATOM 542 CG LYS A 66 1.740 8.734 4.595 1.00 0.00 C
544
+ ATOM 543 CD LYS A 66 2.762 9.464 5.458 1.00 0.00 C
545
+ ATOM 544 CE LYS A 66 2.137 9.991 6.742 1.00 0.00 C
546
+ ATOM 545 NZ LYS A 66 3.131 10.723 7.582 1.00 0.00 N
547
+ ATOM 546 N PHE A 67 -0.965 8.044 2.601 1.00 0.00 N
548
+ ATOM 547 CA PHE A 67 -2.137 8.813 2.199 1.00 0.00 C
549
+ ATOM 548 C PHE A 67 -3.119 8.946 3.357 1.00 0.00 C
550
+ ATOM 549 CB PHE A 67 -2.826 8.158 0.999 1.00 0.00 C
551
+ ATOM 550 O PHE A 67 -3.043 8.196 4.332 1.00 0.00 O
552
+ ATOM 551 CG PHE A 67 -3.210 6.721 1.229 1.00 0.00 C
553
+ ATOM 552 CD1 PHE A 67 -2.288 5.700 1.029 1.00 0.00 C
554
+ ATOM 553 CD2 PHE A 67 -4.493 6.391 1.645 1.00 0.00 C
555
+ ATOM 554 CE1 PHE A 67 -2.641 4.369 1.241 1.00 0.00 C
556
+ ATOM 555 CE2 PHE A 67 -4.852 5.063 1.859 1.00 0.00 C
557
+ ATOM 556 CZ PHE A 67 -3.925 4.054 1.655 1.00 0.00 C
558
+ ATOM 557 N SER A 68 -3.986 9.931 3.229 1.00 0.00 N
559
+ ATOM 558 CA SER A 68 -5.005 10.179 4.243 1.00 0.00 C
560
+ ATOM 559 C SER A 68 -6.269 9.372 3.966 1.00 0.00 C
561
+ ATOM 560 CB SER A 68 -5.346 11.668 4.308 1.00 0.00 C
562
+ ATOM 561 O SER A 68 -6.585 9.080 2.811 1.00 0.00 O
563
+ ATOM 562 OG SER A 68 -4.210 12.428 4.685 1.00 0.00 O
564
+ ATOM 563 N TYR A 69 -6.917 8.883 5.050 1.00 0.00 N
565
+ ATOM 564 CA TYR A 69 -8.184 8.178 4.893 1.00 0.00 C
566
+ ATOM 565 C TYR A 69 -9.270 8.807 5.758 1.00 0.00 C
567
+ ATOM 566 CB TYR A 69 -8.024 6.697 5.251 1.00 0.00 C
568
+ ATOM 567 O TYR A 69 -8.972 9.565 6.684 1.00 0.00 O
569
+ ATOM 568 CG TYR A 69 -7.710 6.456 6.707 1.00 0.00 C
570
+ ATOM 569 CD1 TYR A 69 -6.406 6.554 7.185 1.00 0.00 C
571
+ ATOM 570 CD2 TYR A 69 -8.718 6.128 7.608 1.00 0.00 C
572
+ ATOM 571 CE1 TYR A 69 -6.112 6.331 8.526 1.00 0.00 C
573
+ ATOM 572 CE2 TYR A 69 -8.436 5.902 8.952 1.00 0.00 C
574
+ ATOM 573 OH TYR A 69 -6.847 5.784 10.729 1.00 0.00 O
575
+ ATOM 574 CZ TYR A 69 -7.132 6.006 9.401 1.00 0.00 C
576
+ ATOM 575 N ARG A 70 -10.583 8.696 5.262 1.00 0.00 N
577
+ ATOM 576 CA ARG A 70 -11.721 9.219 6.010 1.00 0.00 C
578
+ ATOM 577 C ARG A 70 -12.611 8.089 6.514 1.00 0.00 C
579
+ ATOM 578 CB ARG A 70 -12.536 10.183 5.146 1.00 0.00 C
580
+ ATOM 579 O ARG A 70 -12.681 7.024 5.897 1.00 0.00 O
581
+ ATOM 580 CG ARG A 70 -11.777 11.434 4.734 1.00 0.00 C
582
+ ATOM 581 CD ARG A 70 -12.666 12.410 3.977 1.00 0.00 C
583
+ ATOM 582 NE ARG A 70 -11.945 13.627 3.614 1.00 0.00 N
584
+ ATOM 583 NH1 ARG A 70 -13.751 14.639 2.588 1.00 0.00 N
585
+ ATOM 584 NH2 ARG A 70 -11.730 15.716 2.684 1.00 0.00 N
586
+ ATOM 585 CZ ARG A 70 -12.477 14.658 2.963 1.00 0.00 C
587
+ ATOM 586 N GLY A 71 -13.241 8.307 7.725 1.00 0.00 N
588
+ ATOM 587 CA GLY A 71 -14.135 7.313 8.296 1.00 0.00 C
589
+ ATOM 588 C GLY A 71 -13.408 6.233 9.075 1.00 0.00 C
590
+ ATOM 589 O GLY A 71 -12.192 6.309 9.263 1.00 0.00 O
591
+ ATOM 590 N MET A 72 -14.158 5.323 9.659 1.00 0.00 N
592
+ ATOM 591 CA MET A 72 -13.613 4.253 10.491 1.00 0.00 C
593
+ ATOM 592 C MET A 72 -13.139 3.085 9.632 1.00 0.00 C
594
+ ATOM 593 CB MET A 72 -14.656 3.771 11.500 1.00 0.00 C
595
+ ATOM 594 O MET A 72 -12.324 2.274 10.076 1.00 0.00 O
596
+ ATOM 595 CG MET A 72 -15.002 4.801 12.563 1.00 0.00 C
597
+ ATOM 596 SD MET A 72 -16.056 4.109 13.896 1.00 0.00 S
598
+ ATOM 597 CE MET A 72 -15.027 2.711 14.424 1.00 0.00 C
599
+ ATOM 598 N LYS A 73 -13.499 3.031 8.394 1.00 0.00 N
600
+ ATOM 599 CA LYS A 73 -13.158 1.998 7.420 1.00 0.00 C
601
+ ATOM 600 C LYS A 73 -13.157 2.558 6.001 1.00 0.00 C
602
+ ATOM 601 CB LYS A 73 -14.134 0.824 7.519 1.00 0.00 C
603
+ ATOM 602 O LYS A 73 -14.116 3.212 5.585 1.00 0.00 O
604
+ ATOM 603 CG LYS A 73 -13.805 -0.335 6.589 1.00 0.00 C
605
+ ATOM 604 CD LYS A 73 -14.814 -1.467 6.729 1.00 0.00 C
606
+ ATOM 605 CE LYS A 73 -14.536 -2.590 5.738 1.00 0.00 C
607
+ ATOM 606 NZ LYS A 73 -15.551 -3.681 5.839 1.00 0.00 N
608
+ ATOM 607 N ALA A 74 -12.007 2.391 5.220 1.00 0.00 N
609
+ ATOM 608 CA ALA A 74 -11.877 2.842 3.837 1.00 0.00 C
610
+ ATOM 609 C ALA A 74 -11.034 1.868 3.019 1.00 0.00 C
611
+ ATOM 610 CB ALA A 74 -11.266 4.241 3.788 1.00 0.00 C
612
+ ATOM 611 O ALA A 74 -10.053 1.315 3.520 1.00 0.00 O
613
+ ATOM 612 N MET A 75 -11.483 1.658 1.812 1.00 0.00 N
614
+ ATOM 613 CA MET A 75 -10.803 0.722 0.923 1.00 0.00 C
615
+ ATOM 614 C MET A 75 -10.086 1.463 -0.202 1.00 0.00 C
616
+ ATOM 615 CB MET A 75 -11.796 -0.283 0.337 1.00 0.00 C
617
+ ATOM 616 O MET A 75 -10.672 2.331 -0.851 1.00 0.00 O
618
+ ATOM 617 CG MET A 75 -12.428 -1.196 1.375 1.00 0.00 C
619
+ ATOM 618 SD MET A 75 -13.576 -2.419 0.630 1.00 0.00 S
620
+ ATOM 619 CE MET A 75 -15.015 -1.358 0.318 1.00 0.00 C
621
+ ATOM 620 N PHE A 76 -8.828 0.988 -0.456 1.00 0.00 N
622
+ ATOM 621 CA PHE A 76 -8.002 1.608 -1.486 1.00 0.00 C
623
+ ATOM 622 C PHE A 76 -7.388 0.550 -2.396 1.00 0.00 C
624
+ ATOM 623 CB PHE A 76 -6.897 2.459 -0.851 1.00 0.00 C
625
+ ATOM 624 O PHE A 76 -6.937 -0.495 -1.925 1.00 0.00 O
626
+ ATOM 625 CG PHE A 76 -7.413 3.553 0.044 1.00 0.00 C
627
+ ATOM 626 CD1 PHE A 76 -7.706 4.810 -0.471 1.00 0.00 C
628
+ ATOM 627 CD2 PHE A 76 -7.604 3.325 1.400 1.00 0.00 C
629
+ ATOM 628 CE1 PHE A 76 -8.184 5.825 0.355 1.00 0.00 C
630
+ ATOM 629 CE2 PHE A 76 -8.081 4.334 2.232 1.00 0.00 C
631
+ ATOM 630 CZ PHE A 76 -8.369 5.584 1.708 1.00 0.00 C
632
+ ATOM 631 N SER A 77 -7.380 0.793 -3.671 1.00 0.00 N
633
+ ATOM 632 CA SER A 77 -6.849 -0.138 -4.661 1.00 0.00 C
634
+ ATOM 633 C SER A 77 -5.443 0.262 -5.098 1.00 0.00 C
635
+ ATOM 634 CB SER A 77 -7.770 -0.206 -5.880 1.00 0.00 C
636
+ ATOM 635 O SER A 77 -5.182 1.436 -5.368 1.00 0.00 O
637
+ ATOM 636 OG SER A 77 -9.051 -0.690 -5.516 1.00 0.00 O
638
+ ATOM 637 N PHE A 78 -4.547 -0.700 -5.147 1.00 0.00 N
639
+ ATOM 638 CA PHE A 78 -3.150 -0.484 -5.504 1.00 0.00 C
640
+ ATOM 639 C PHE A 78 -2.766 -1.319 -6.720 1.00 0.00 C
641
+ ATOM 640 CB PHE A 78 -2.234 -0.822 -4.324 1.00 0.00 C
642
+ ATOM 641 O PHE A 78 -3.356 -2.373 -6.967 1.00 0.00 O
643
+ ATOM 642 CG PHE A 78 -2.257 0.204 -3.224 1.00 0.00 C
644
+ ATOM 643 CD1 PHE A 78 -1.255 1.162 -3.127 1.00 0.00 C
645
+ ATOM 644 CD2 PHE A 78 -3.281 0.210 -2.286 1.00 0.00 C
646
+ ATOM 645 CE1 PHE A 78 -1.274 2.113 -2.109 1.00 0.00 C
647
+ ATOM 646 CE2 PHE A 78 -3.307 1.158 -1.266 1.00 0.00 C
648
+ ATOM 647 CZ PHE A 78 -2.302 2.108 -1.179 1.00 0.00 C
649
+ ATOM 648 N ARG A 79 -1.852 -0.697 -7.408 1.00 0.00 N
650
+ ATOM 649 CA ARG A 79 -1.158 -1.385 -8.491 1.00 0.00 C
651
+ ATOM 650 C ARG A 79 0.318 -1.580 -8.159 1.00 0.00 C
652
+ ATOM 651 CB ARG A 79 -1.302 -0.609 -9.802 1.00 0.00 C
653
+ ATOM 652 O ARG A 79 1.017 -0.619 -7.830 1.00 0.00 O
654
+ ATOM 653 CG ARG A 79 -0.640 -1.283 -10.993 1.00 0.00 C
655
+ ATOM 654 CD ARG A 79 -0.783 -0.453 -12.262 1.00 0.00 C
656
+ ATOM 655 NE ARG A 79 -0.132 -1.094 -13.401 1.00 0.00 N
657
+ ATOM 656 NH1 ARG A 79 -0.537 0.641 -14.872 1.00 0.00 N
658
+ ATOM 657 NH2 ARG A 79 0.583 -1.230 -15.579 1.00 0.00 N
659
+ ATOM 658 CZ ARG A 79 -0.030 -0.560 -14.615 1.00 0.00 C
660
+ ATOM 659 N TYR A 80 0.823 -2.845 -8.184 1.00 0.00 N
661
+ ATOM 660 CA TYR A 80 2.220 -3.211 -7.979 1.00 0.00 C
662
+ ATOM 661 C TYR A 80 2.861 -3.665 -9.284 1.00 0.00 C
663
+ ATOM 662 CB TYR A 80 2.337 -4.317 -6.927 1.00 0.00 C
664
+ ATOM 663 O TYR A 80 2.344 -4.559 -9.959 1.00 0.00 O
665
+ ATOM 664 CG TYR A 80 3.724 -4.901 -6.812 1.00 0.00 C
666
+ ATOM 665 CD1 TYR A 80 4.021 -6.153 -7.347 1.00 0.00 C
667
+ ATOM 666 CD2 TYR A 80 4.741 -4.204 -6.168 1.00 0.00 C
668
+ ATOM 667 CE1 TYR A 80 5.297 -6.696 -7.242 1.00 0.00 C
669
+ ATOM 668 CE2 TYR A 80 6.020 -4.737 -6.057 1.00 0.00 C
670
+ ATOM 669 OH TYR A 80 7.554 -6.514 -6.490 1.00 0.00 O
671
+ ATOM 670 CZ TYR A 80 6.289 -5.982 -6.597 1.00 0.00 C
672
+ ATOM 671 N ILE A 81 3.977 -3.045 -9.726 1.00 0.00 N
673
+ ATOM 672 CA ILE A 81 4.663 -3.362 -10.974 1.00 0.00 C
674
+ ATOM 673 C ILE A 81 6.141 -3.626 -10.697 1.00 0.00 C
675
+ ATOM 674 CB ILE A 81 4.506 -2.226 -12.010 1.00 0.00 C
676
+ ATOM 675 O ILE A 81 6.795 -2.855 -9.991 1.00 0.00 O
677
+ ATOM 676 CG1 ILE A 81 3.023 -1.919 -12.245 1.00 0.00 C
678
+ ATOM 677 CG2 ILE A 81 5.205 -2.593 -13.323 1.00 0.00 C
679
+ ATOM 678 CD1 ILE A 81 2.772 -0.622 -13.001 1.00 0.00 C
680
+ ATOM 679 N MET A 82 6.624 -4.650 -11.272 1.00 0.00 N
681
+ ATOM 680 CA MET A 82 8.056 -4.932 -11.247 1.00 0.00 C
682
+ ATOM 681 C MET A 82 8.708 -4.553 -12.572 1.00 0.00 C
683
+ ATOM 682 CB MET A 82 8.309 -6.410 -10.943 1.00 0.00 C
684
+ ATOM 683 O MET A 82 8.136 -4.782 -13.639 1.00 0.00 O
685
+ ATOM 684 CG MET A 82 7.919 -6.821 -9.533 1.00 0.00 C
686
+ ATOM 685 SD MET A 82 8.465 -8.524 -9.121 1.00 0.00 S
687
+ ATOM 686 CE MET A 82 7.498 -9.472 -10.328 1.00 0.00 C
688
+ ATOM 687 N TYR A 83 9.885 -3.839 -12.450 1.00 0.00 N
689
+ ATOM 688 CA TYR A 83 10.575 -3.496 -13.688 1.00 0.00 C
690
+ ATOM 689 C TYR A 83 12.059 -3.831 -13.597 1.00 0.00 C
691
+ ATOM 690 CB TYR A 83 10.394 -2.010 -14.011 1.00 0.00 C
692
+ ATOM 691 O TYR A 83 12.607 -3.957 -12.499 1.00 0.00 O
693
+ ATOM 692 CG TYR A 83 10.659 -1.097 -12.838 1.00 0.00 C
694
+ ATOM 693 CD1 TYR A 83 9.644 -0.769 -11.943 1.00 0.00 C
695
+ ATOM 694 CD2 TYR A 83 11.924 -0.561 -12.624 1.00 0.00 C
696
+ ATOM 695 CE1 TYR A 83 9.883 0.073 -10.861 1.00 0.00 C
697
+ ATOM 696 CE2 TYR A 83 12.175 0.282 -11.546 1.00 0.00 C
698
+ ATOM 697 OH TYR A 83 11.392 1.426 -9.603 1.00 0.00 O
699
+ ATOM 698 CZ TYR A 83 11.150 0.592 -10.671 1.00 0.00 C
700
+ ATOM 699 N ASP A 84 12.579 -4.184 -14.769 1.00 0.00 N
701
+ ATOM 700 CA ASP A 84 14.004 -4.497 -14.807 1.00 0.00 C
702
+ ATOM 701 C ASP A 84 14.846 -3.224 -14.847 1.00 0.00 C
703
+ ATOM 702 CB ASP A 84 14.328 -5.379 -16.015 1.00 0.00 C
704
+ ATOM 703 O ASP A 84 14.307 -2.116 -14.805 1.00 0.00 O
705
+ ATOM 704 CG ASP A 84 14.207 -4.641 -17.337 1.00 0.00 C
706
+ ATOM 705 OD1 ASP A 84 14.151 -3.393 -17.336 1.00 0.00 O
707
+ ATOM 706 OD2 ASP A 84 14.166 -5.315 -18.390 1.00 0.00 O
708
+ ATOM 707 N GLN A 85 16.104 -3.107 -14.825 1.00 0.00 N
709
+ ATOM 708 CA GLN A 85 17.029 -1.983 -14.722 1.00 0.00 C
710
+ ATOM 709 C GLN A 85 16.939 -1.083 -15.951 1.00 0.00 C
711
+ ATOM 710 CB GLN A 85 18.463 -2.481 -14.540 1.00 0.00 C
712
+ ATOM 711 O GLN A 85 17.373 0.070 -15.916 1.00 0.00 O
713
+ ATOM 712 CG GLN A 85 18.972 -3.327 -15.700 1.00 0.00 C
714
+ ATOM 713 CD GLN A 85 20.353 -3.901 -15.446 1.00 0.00 C
715
+ ATOM 714 NE2 GLN A 85 20.790 -4.803 -16.318 1.00 0.00 N
716
+ ATOM 715 OE1 GLN A 85 21.023 -3.536 -14.474 1.00 0.00 O
717
+ ATOM 716 N ASN A 86 16.347 -1.719 -16.985 1.00 0.00 N
718
+ ATOM 717 CA ASN A 86 16.216 -0.932 -18.206 1.00 0.00 C
719
+ ATOM 718 C ASN A 86 14.876 -0.203 -18.262 1.00 0.00 C
720
+ ATOM 719 CB ASN A 86 16.389 -1.821 -19.439 1.00 0.00 C
721
+ ATOM 720 O ASN A 86 14.590 0.504 -19.229 1.00 0.00 O
722
+ ATOM 721 CG ASN A 86 17.770 -2.440 -19.524 1.00 0.00 C
723
+ ATOM 722 ND2 ASN A 86 17.829 -3.711 -19.903 1.00 0.00 N
724
+ ATOM 723 OD1 ASN A 86 18.777 -1.781 -19.250 1.00 0.00 O
725
+ ATOM 724 N GLY A 87 14.047 -0.451 -17.157 1.00 0.00 N
726
+ ATOM 725 CA GLY A 87 12.774 0.245 -17.074 1.00 0.00 C
727
+ ATOM 726 C GLY A 87 11.645 -0.491 -17.770 1.00 0.00 C
728
+ ATOM 727 O GLY A 87 10.551 0.053 -17.935 1.00 0.00 O
729
+ ATOM 728 N HIS A 88 11.979 -1.693 -18.203 1.00 0.00 N
730
+ ATOM 729 CA HIS A 88 10.969 -2.521 -18.853 1.00 0.00 C
731
+ ATOM 730 C HIS A 88 10.107 -3.246 -17.825 1.00 0.00 C
732
+ ATOM 731 CB HIS A 88 11.629 -3.533 -19.792 1.00 0.00 C
733
+ ATOM 732 O HIS A 88 10.623 -3.781 -16.841 1.00 0.00 O
734
+ ATOM 733 CG HIS A 88 12.426 -2.902 -20.889 1.00 0.00 C
735
+ ATOM 734 CD2 HIS A 88 13.745 -2.608 -20.974 1.00 0.00 C
736
+ ATOM 735 ND1 HIS A 88 11.862 -2.492 -22.078 1.00 0.00 N
737
+ ATOM 736 CE1 HIS A 88 12.803 -1.973 -22.849 1.00 0.00 C
738
+ ATOM 737 NE2 HIS A 88 13.954 -2.032 -22.202 1.00 0.00 N
739
+ ATOM 738 N ASP A 89 8.762 -3.058 -18.051 1.00 0.00 N
740
+ ATOM 739 CA ASP A 89 7.804 -3.799 -17.236 1.00 0.00 C
741
+ ATOM 740 C ASP A 89 8.001 -5.305 -17.390 1.00 0.00 C
742
+ ATOM 741 CB ASP A 89 6.371 -3.415 -17.610 1.00 0.00 C
743
+ ATOM 742 O ASP A 89 8.056 -5.818 -18.509 1.00 0.00 O
744
+ ATOM 743 CG ASP A 89 5.330 -4.062 -16.713 1.00 0.00 C
745
+ ATOM 744 OD1 ASP A 89 5.698 -4.883 -15.846 1.00 0.00 O
746
+ ATOM 745 OD2 ASP A 89 4.131 -3.750 -16.877 1.00 0.00 O
747
+ ATOM 746 N LEU A 90 8.334 -6.022 -16.320 1.00 0.00 N
748
+ ATOM 747 CA LEU A 90 8.513 -7.469 -16.350 1.00 0.00 C
749
+ ATOM 748 C LEU A 90 7.166 -8.183 -16.380 1.00 0.00 C
750
+ ATOM 749 CB LEU A 90 9.323 -7.935 -15.137 1.00 0.00 C
751
+ ATOM 750 O LEU A 90 7.109 -9.414 -16.328 1.00 0.00 O
752
+ ATOM 751 CG LEU A 90 10.771 -7.447 -15.061 1.00 0.00 C
753
+ ATOM 752 CD1 LEU A 90 11.421 -7.917 -13.764 1.00 0.00 C
754
+ ATOM 753 CD2 LEU A 90 11.563 -7.933 -16.269 1.00 0.00 C
755
+ ATOM 754 N CYS A 91 6.145 -7.647 -16.960 1.00 0.00 N
756
+ ATOM 755 CA CYS A 91 4.838 -8.217 -17.265 1.00 0.00 C
757
+ ATOM 756 C CYS A 91 4.203 -8.824 -16.019 1.00 0.00 C
758
+ ATOM 757 CB CYS A 91 4.958 -9.281 -18.356 1.00 0.00 C
759
+ ATOM 758 O CYS A 91 3.278 -9.632 -16.119 1.00 0.00 O
760
+ ATOM 759 SG CYS A 91 5.305 -8.606 -19.995 1.00 0.00 S
761
+ ATOM 760 N SER A 92 4.554 -8.421 -14.785 1.00 0.00 N
762
+ ATOM 761 CA SER A 92 3.890 -8.942 -13.594 1.00 0.00 C
763
+ ATOM 762 C SER A 92 3.226 -7.824 -12.798 1.00 0.00 C
764
+ ATOM 763 CB SER A 92 4.887 -9.688 -12.707 1.00 0.00 C
765
+ ATOM 764 O SER A 92 3.909 -6.970 -12.228 1.00 0.00 O
766
+ ATOM 765 OG SER A 92 5.438 -10.799 -13.394 1.00 0.00 O
767
+ ATOM 766 N GLN A 93 2.048 -7.488 -13.242 1.00 0.00 N
768
+ ATOM 767 CA GLN A 93 1.265 -6.515 -12.488 1.00 0.00 C
769
+ ATOM 768 C GLN A 93 0.295 -7.209 -11.535 1.00 0.00 C
770
+ ATOM 769 CB GLN A 93 0.499 -5.591 -13.435 1.00 0.00 C
771
+ ATOM 770 O GLN A 93 -0.357 -8.187 -11.908 1.00 0.00 O
772
+ ATOM 771 CG GLN A 93 1.388 -4.851 -14.425 1.00 0.00 C
773
+ ATOM 772 CD GLN A 93 0.606 -3.933 -15.345 1.00 0.00 C
774
+ ATOM 773 NE2 GLN A 93 0.928 -3.968 -16.633 1.00 0.00 N
775
+ ATOM 774 OE1 GLN A 93 -0.282 -3.198 -14.901 1.00 0.00 O
776
+ ATOM 775 N ILE A 94 0.338 -6.724 -10.308 1.00 0.00 N
777
+ ATOM 776 CA ILE A 94 -0.576 -7.263 -9.307 1.00 0.00 C
778
+ ATOM 777 C ILE A 94 -1.499 -6.156 -8.801 1.00 0.00 C
779
+ ATOM 778 CB ILE A 94 0.191 -7.903 -8.128 1.00 0.00 C
780
+ ATOM 779 O ILE A 94 -1.043 -5.052 -8.494 1.00 0.00 O
781
+ ATOM 780 CG1 ILE A 94 1.112 -9.019 -8.633 1.00 0.00 C
782
+ ATOM 781 CG2 ILE A 94 -0.785 -8.433 -7.074 1.00 0.00 C
783
+ ATOM 782 CD1 ILE A 94 2.092 -9.533 -7.587 1.00 0.00 C
784
+ ATOM 783 N PHE A 95 -2.760 -6.352 -8.842 1.00 0.00 N
785
+ ATOM 784 CA PHE A 95 -3.734 -5.442 -8.251 1.00 0.00 C
786
+ ATOM 785 C PHE A 95 -4.216 -5.966 -6.904 1.00 0.00 C
787
+ ATOM 786 CB PHE A 95 -4.925 -5.241 -9.194 1.00 0.00 C
788
+ ATOM 787 O PHE A 95 -4.517 -7.153 -6.765 1.00 0.00 O
789
+ ATOM 788 CG PHE A 95 -4.559 -4.610 -10.509 1.00 0.00 C
790
+ ATOM 789 CD1 PHE A 95 -4.613 -3.231 -10.674 1.00 0.00 C
791
+ ATOM 790 CD2 PHE A 95 -4.159 -5.395 -11.583 1.00 0.00 C
792
+ ATOM 791 CE1 PHE A 95 -4.274 -2.644 -11.891 1.00 0.00 C
793
+ ATOM 792 CE2 PHE A 95 -3.819 -4.816 -12.802 1.00 0.00 C
794
+ ATOM 793 CZ PHE A 95 -3.878 -3.440 -12.954 1.00 0.00 C
795
+ ATOM 794 N PHE A 96 -4.198 -5.106 -5.926 1.00 0.00 N
796
+ ATOM 795 CA PHE A 96 -4.673 -5.552 -4.622 1.00 0.00 C
797
+ ATOM 796 C PHE A 96 -5.353 -4.411 -3.874 1.00 0.00 C
798
+ ATOM 797 CB PHE A 96 -3.515 -6.110 -3.788 1.00 0.00 C
799
+ ATOM 798 O PHE A 96 -5.223 -3.246 -4.257 1.00 0.00 O
800
+ ATOM 799 CG PHE A 96 -2.419 -5.112 -3.528 1.00 0.00 C
801
+ ATOM 800 CD1 PHE A 96 -1.422 -4.893 -4.470 1.00 0.00 C
802
+ ATOM 801 CD2 PHE A 96 -2.386 -4.393 -2.340 1.00 0.00 C
803
+ ATOM 802 CE1 PHE A 96 -0.406 -3.970 -4.231 1.00 0.00 C
804
+ ATOM 803 CE2 PHE A 96 -1.375 -3.470 -2.094 1.00 0.00 C
805
+ ATOM 804 CZ PHE A 96 -0.385 -3.260 -3.041 1.00 0.00 C
806
+ ATOM 805 N THR A 97 -6.180 -4.778 -2.859 1.00 0.00 N
807
+ ATOM 806 CA THR A 97 -6.980 -3.835 -2.084 1.00 0.00 C
808
+ ATOM 807 C THR A 97 -6.479 -3.756 -0.645 1.00 0.00 C
809
+ ATOM 808 CB THR A 97 -8.469 -4.227 -2.092 1.00 0.00 C
810
+ ATOM 809 O THR A 97 -6.224 -4.784 -0.014 1.00 0.00 O
811
+ ATOM 810 CG2 THR A 97 -9.298 -3.253 -1.261 1.00 0.00 C
812
+ ATOM 811 OG1 THR A 97 -8.952 -4.218 -3.441 1.00 0.00 O
813
+ ATOM 812 N VAL A 98 -6.234 -2.578 -0.176 1.00 0.00 N
814
+ ATOM 813 CA VAL A 98 -5.879 -2.335 1.218 1.00 0.00 C
815
+ ATOM 814 C VAL A 98 -7.047 -1.667 1.940 1.00 0.00 C
816
+ ATOM 815 CB VAL A 98 -4.609 -1.463 1.337 1.00 0.00 C
817
+ ATOM 816 O VAL A 98 -7.631 -0.706 1.432 1.00 0.00 O
818
+ ATOM 817 CG1 VAL A 98 -4.398 -1.011 2.781 1.00 0.00 C
819
+ ATOM 818 CG2 VAL A 98 -3.388 -2.228 0.830 1.00 0.00 C
820
+ ATOM 819 N ILE A 99 -7.430 -2.236 3.089 1.00 0.00 N
821
+ ATOM 820 CA ILE A 99 -8.491 -1.681 3.923 1.00 0.00 C
822
+ ATOM 821 C ILE A 99 -7.881 -0.965 5.126 1.00 0.00 C
823
+ ATOM 822 CB ILE A 99 -9.472 -2.778 4.394 1.00 0.00 C
824
+ ATOM 823 O ILE A 99 -7.148 -1.571 5.911 1.00 0.00 O
825
+ ATOM 824 CG1 ILE A 99 -10.082 -3.502 3.188 1.00 0.00 C
826
+ ATOM 825 CG2 ILE A 99 -10.565 -2.179 5.283 1.00 0.00 C
827
+ ATOM 826 CD1 ILE A 99 -10.874 -4.750 3.551 1.00 0.00 C
828
+ ATOM 827 N CYS A 100 -8.094 0.384 5.239 1.00 0.00 N
829
+ ATOM 828 CA CYS A 100 -7.717 1.159 6.416 1.00 0.00 C
830
+ ATOM 829 C CYS A 100 -8.858 1.208 7.425 1.00 0.00 C
831
+ ATOM 830 CB CYS A 100 -7.315 2.579 6.018 1.00 0.00 C
832
+ ATOM 831 O CYS A 100 -9.811 1.971 7.255 1.00 0.00 O
833
+ ATOM 832 SG CYS A 100 -5.913 2.646 4.881 1.00 0.00 S
834
+ ATOM 833 N ARG A 101 -8.734 0.291 8.419 1.00 0.00 N
835
+ ATOM 834 CA ARG A 101 -9.778 0.171 9.433 1.00 0.00 C
836
+ ATOM 835 C ARG A 101 -9.235 0.503 10.819 1.00 0.00 C
837
+ ATOM 836 CB ARG A 101 -10.374 -1.239 9.427 1.00 0.00 C
838
+ ATOM 837 O ARG A 101 -8.234 -0.070 11.253 1.00 0.00 O
839
+ ATOM 838 CG ARG A 101 -11.479 -1.444 10.451 1.00 0.00 C
840
+ ATOM 839 CD ARG A 101 -12.048 -2.855 10.390 1.00 0.00 C
841
+ ATOM 840 NE ARG A 101 -13.253 -2.985 11.205 1.00 0.00 N
842
+ ATOM 841 NH1 ARG A 101 -13.729 -5.141 10.524 1.00 0.00 N
843
+ ATOM 842 NH2 ARG A 101 -15.098 -4.079 12.026 1.00 0.00 N
844
+ ATOM 843 CZ ARG A 101 -14.024 -4.068 11.250 1.00 0.00 C
845
+ ATOM 844 N GLU A 102 -9.902 1.446 11.474 1.00 0.00 N
846
+ ATOM 845 CA GLU A 102 -9.563 1.790 12.851 1.00 0.00 C
847
+ ATOM 846 C GLU A 102 -10.253 0.854 13.840 1.00 0.00 C
848
+ ATOM 847 CB GLU A 102 -9.940 3.242 13.152 1.00 0.00 C
849
+ ATOM 848 O GLU A 102 -11.402 0.458 13.630 1.00 0.00 O
850
+ ATOM 849 CG GLU A 102 -9.221 4.258 12.277 1.00 0.00 C
851
+ ATOM 850 CD GLU A 102 -9.635 5.694 12.559 1.00 0.00 C
852
+ ATOM 851 OE1 GLU A 102 -9.298 6.592 11.755 1.00 0.00 O
853
+ ATOM 852 OE2 GLU A 102 -10.302 5.922 13.593 1.00 0.00 O
854
+ ATOM 853 N TYR A 103 -9.489 0.409 14.757 1.00 0.00 N
855
+ ATOM 854 CA TYR A 103 -10.036 -0.359 15.869 1.00 0.00 C
856
+ ATOM 855 C TYR A 103 -10.129 0.495 17.128 1.00 0.00 C
857
+ ATOM 856 CB TYR A 103 -9.178 -1.598 16.141 1.00 0.00 C
858
+ ATOM 857 O TYR A 103 -9.113 0.975 17.636 1.00 0.00 O
859
+ ATOM 858 CG TYR A 103 -9.123 -2.564 14.982 1.00 0.00 C
860
+ ATOM 859 CD1 TYR A 103 -10.142 -3.491 14.774 1.00 0.00 C
861
+ ATOM 860 CD2 TYR A 103 -8.052 -2.553 14.095 1.00 0.00 C
862
+ ATOM 861 CE1 TYR A 103 -10.094 -4.384 13.709 1.00 0.00 C
863
+ ATOM 862 CE2 TYR A 103 -7.994 -3.442 13.027 1.00 0.00 C
864
+ ATOM 863 OH TYR A 103 -8.966 -5.234 11.786 1.00 0.00 O
865
+ ATOM 864 CZ TYR A 103 -9.018 -4.352 12.842 1.00 0.00 C
866
+ ATOM 865 N CYS A 104 -11.495 0.756 17.579 1.00 0.00 N
867
+ ATOM 866 CA CYS A 104 -11.696 1.681 18.689 1.00 0.00 C
868
+ ATOM 867 C CYS A 104 -12.302 0.967 19.891 1.00 0.00 C
869
+ ATOM 868 CB CYS A 104 -12.597 2.840 18.264 1.00 0.00 C
870
+ ATOM 869 O CYS A 104 -13.088 0.031 19.731 1.00 0.00 O
871
+ ATOM 870 SG CYS A 104 -12.045 3.679 16.763 1.00 0.00 S
872
+ ATOM 871 N CYS A 105 -11.819 1.196 21.115 1.00 0.00 N
873
+ ATOM 872 CA CYS A 105 -12.422 0.718 22.354 1.00 0.00 C
874
+ ATOM 873 C CYS A 105 -13.019 1.872 23.151 1.00 0.00 C
875
+ ATOM 874 CB CYS A 105 -11.386 -0.018 23.204 1.00 0.00 C
876
+ ATOM 875 O CYS A 105 -12.550 3.008 23.054 1.00 0.00 O
877
+ ATOM 876 SG CYS A 105 -10.010 1.023 23.738 1.00 0.00 S
878
+ TER 877 CYS A 105
879
+ ENDMDL
880
+ END