maojiashun commited on
Commit
f25ddd1
1 Parent(s): 529df0a

Upload 350 files

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +3 -0
  2. TransAntivirus/best_in_dataset.py +130 -0
  3. TransAntivirus/data_utils.py +206 -0
  4. TransAntivirus/download_pubchem/SARS0729_canon_desc.csv +185 -0
  5. TransAntivirus/download_pubchem/download.sh +34 -0
  6. TransAntivirus/download_pubchem/extract_info.py +82 -0
  7. TransAntivirus/download_pubchem/finetunev1_new.csv +71 -0
  8. TransAntivirus/download_pubchem/opsin-master.zip +3 -0
  9. TransAntivirus/download_pubchem/opsin-master/.github/workflows/maven.yml +29 -0
  10. TransAntivirus/download_pubchem/opsin-master/.gitignore +5 -0
  11. TransAntivirus/download_pubchem/opsin-master/LICENSE.txt +7 -0
  12. TransAntivirus/download_pubchem/opsin-master/README.md +186 -0
  13. TransAntivirus/download_pubchem/opsin-master/ReleaseNotes.txt +332 -0
  14. TransAntivirus/download_pubchem/opsin-master/fullAssembly.xml +49 -0
  15. TransAntivirus/download_pubchem/opsin-master/opsin-cli/pom.xml +58 -0
  16. TransAntivirus/download_pubchem/opsin-master/opsin-cli/src/main/java/uk/ac/cam/ch/wwmm/opsin/Cli.java +268 -0
  17. TransAntivirus/download_pubchem/opsin-master/opsin-cli/src/main/resources/log4j2.xml +13 -0
  18. TransAntivirus/download_pubchem/opsin-master/opsin-core/pom.xml +67 -0
  19. TransAntivirus/download_pubchem/opsin-master/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/AmbiguityChecker.java +214 -0
  20. TransAntivirus/download_pubchem/opsin-master/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/AnnotatorState.java +72 -0
  21. TransAntivirus/download_pubchem/opsin-master/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/Atom.java +647 -0
  22. TransAntivirus/download_pubchem/opsin-master/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/AtomParity.java +64 -0
  23. TransAntivirus/download_pubchem/opsin-master/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/AtomProperties.java +160 -0
  24. TransAntivirus/download_pubchem/opsin-master/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/Attribute.java +74 -0
  25. TransAntivirus/download_pubchem/opsin-master/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/AutomatonInitialiser.java +105 -0
  26. TransAntivirus/download_pubchem/opsin-master/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/Bond.java +184 -0
  27. TransAntivirus/download_pubchem/opsin-master/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/BondStereo.java +58 -0
  28. TransAntivirus/download_pubchem/opsin-master/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/BuildResults.java +150 -0
  29. TransAntivirus/download_pubchem/opsin-master/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/BuildState.java +46 -0
  30. TransAntivirus/download_pubchem/opsin-master/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/CASTools.java +248 -0
  31. TransAntivirus/download_pubchem/opsin-master/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/CMLWriter.java +217 -0
  32. TransAntivirus/download_pubchem/opsin-master/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/ChemEl.java +138 -0
  33. TransAntivirus/download_pubchem/opsin-master/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/CipOrderingException.java +29 -0
  34. TransAntivirus/download_pubchem/opsin-master/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/CipSequenceRules.java +470 -0
  35. TransAntivirus/download_pubchem/opsin-master/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/ComponentGenerationException.java +28 -0
  36. TransAntivirus/download_pubchem/opsin-master/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/ComponentGenerator.java +0 -0
  37. TransAntivirus/download_pubchem/opsin-master/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/ComponentProcessor.java +0 -0
  38. TransAntivirus/download_pubchem/opsin-master/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/CycleDetector.java +128 -0
  39. TransAntivirus/download_pubchem/opsin-master/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/CyclicAtomList.java +140 -0
  40. TransAntivirus/download_pubchem/opsin-master/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/Element.java +229 -0
  41. TransAntivirus/download_pubchem/opsin-master/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/Fragment.java +633 -0
  42. TransAntivirus/download_pubchem/opsin-master/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/FragmentManager.java +767 -0
  43. TransAntivirus/download_pubchem/opsin-master/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/FragmentTools.java +1242 -0
  44. TransAntivirus/download_pubchem/opsin-master/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/FunctionalAtom.java +20 -0
  45. TransAntivirus/download_pubchem/opsin-master/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/FunctionalReplacement.java +1176 -0
  46. TransAntivirus/download_pubchem/opsin-master/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/FusedRingBuilder.java +1030 -0
  47. TransAntivirus/download_pubchem/opsin-master/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/FusedRingNumberer.java +1849 -0
  48. TransAntivirus/download_pubchem/opsin-master/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/GroupingEl.java +121 -0
  49. TransAntivirus/download_pubchem/opsin-master/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/IDManager.java +30 -0
  50. TransAntivirus/download_pubchem/opsin-master/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/IndentingXMLStreamWriter.java +50 -0
.gitattributes CHANGED
@@ -33,3 +33,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ TransAntivirus/download_pubchem/opsin-master/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/chemical_reversed_SerialisedAutomaton.aut filter=lfs diff=lfs merge=lfs -text
37
+ TransAntivirus/download_pubchem/opsin-master/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/chemicalSerialisedAutomaton.aut filter=lfs diff=lfs merge=lfs -text
38
+ TransAntivirus/download_pubchem/pubchem_30m_new.csv filter=lfs diff=lfs merge=lfs -text
TransAntivirus/best_in_dataset.py ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import HfArgumentParser
2
+
3
+ import torch
4
+ import torch.multiprocessing
5
+ torch.multiprocessing.set_sharing_strategy("file_system")
6
+ from torch.utils.data import DataLoader, ConcatDataset
7
+
8
+ from t5 import T5IUPACTokenizer, T5Collator
9
+ from iupac_dataset import IUPACDataset
10
+ from physprop_exp import levenshtein_distance
11
+
12
+ from dataclasses import dataclass, field
13
+ from typing import Dict, Optional
14
+
15
+ import sys
16
+ import os
17
+ import itertools
18
+ from itertools import dropwhile
19
+ from multiprocessing import Pool
20
+
21
+ import numpy as np
22
+ from scipy import ndimage
23
+
24
+ @dataclass
25
+ class IUPACArguments:
26
+ dataset_dir: str = field(
27
+ metadata={"help": "Directory where dataset is locaed"}
28
+ )
29
+ vocab_fn: str = field(
30
+ metadata={"help": "File containing sentencepiece model"}
31
+ )
32
+ dataset_filename: str = field(
33
+ default="iupacs_logp.txt",
34
+ metadata={"help": "Filename within dataset_dir containing the data"}
35
+ )
36
+ name_col: Optional[str] = field(
37
+ default="Preferred", # for logp
38
+ metadata={"help": "Name of column with IUPAC names"}
39
+ )
40
+
41
+
42
+ def main():
43
+ parser = HfArgumentParser(IUPACArguments)
44
+ iupac_args, = parser.parse_args_into_dataclasses()
45
+
46
+ global tokenizer
47
+ tokenizer = T5IUPACTokenizer(vocab_file=iupac_args.vocab_fn)
48
+
49
+ pad = tokenizer._convert_token_to_id("<pad>")
50
+ unk = tokenizer._convert_token_to_id("<unk>")
51
+
52
+ dataset_kwargs = {
53
+ "dataset_dir": iupac_args.dataset_dir,
54
+ "tokenizer": tokenizer,
55
+ "max_length": 128,
56
+ "prepend_target": False,
57
+ "mean_span_length": 3,
58
+ "mask_probability": 0,
59
+ #"dataset_size": 200000,
60
+ }
61
+
62
+ pubchem_train = IUPACDataset(train=True, **dataset_kwargs)
63
+ pubchem_val = IUPACDataset(train=False, **dataset_kwargs)
64
+ pubchem = ConcatDataset([pubchem_train, pubchem_val])
65
+
66
+ batch_size = 2048
67
+
68
+ collator = T5Collator(tokenizer.pad_token_id)
69
+ def collate(batch):
70
+ # [:-1] to remove </s>
71
+ input_ids = [d["input_ids"][:-1] for d in batch]
72
+ lengths = torch.tensor([d.numel() for d in input_ids])
73
+ return torch.hstack([torch.tensor([len(batch)]), lengths] + input_ids)
74
+ loader = DataLoader(pubchem,
75
+ batch_size=batch_size,
76
+ num_workers=72,
77
+ collate_fn=collate)
78
+
79
+ # we'll find clusters for each input molecule
80
+ input_iupacs = [n.strip() for n in sys.stdin.readlines()]
81
+ # [:-1] to get rid of </s>
82
+ base_tokenizeds = [tokenizer(b)["input_ids"][:-1] for b in input_iupacs]
83
+ base_tokenizeds = [torch.tensor(t)
84
+ for t in base_tokenizeds if len(t) >= 10 and unk not in t]
85
+
86
+ potentially_reachables = []
87
+ for batch_idx, batch in enumerate(loader):
88
+ #num_processed = batch_idx * batch_size
89
+ #if batch_idx % 200 == 0:
90
+ # print("completed {}/{} ({:>5.3f}%)...".format(num_processed, len(pubchem), num_processed / len(pubchem) * 100))
91
+
92
+ bs = batch[0]
93
+ lengths = batch[1:bs + 1]
94
+ tokenizeds = torch.split(batch[bs + 1:], lengths.tolist())
95
+ potentially_reachables += tokenizeds
96
+
97
+
98
+ pairs = list(itertools.product(potentially_reachables, base_tokenizeds))
99
+ pool = Pool(144)
100
+ is_reachable = pool.starmap(check_if_reachable, pairs)
101
+ pool.close()
102
+ pool.join()
103
+
104
+ def check_if_reachable(tokenized, base_tokenized):
105
+ global tokenizer
106
+
107
+ tokenized_bag = set(tokenized.tolist())
108
+ base_bag = set(base_tokenized.tolist())
109
+
110
+ if len(tokenized_bag ^ base_bag) >= 15:
111
+ return False
112
+
113
+ if abs(len(tokenized) - len(base_tokenized)) > 15:
114
+ return False
115
+
116
+ dist, src_mask, _ = levenshtein_distance(base_tokenized, tokenized)
117
+ src_dilated = ndimage.binary_fill_holes(src_mask).astype(int)
118
+
119
+ # we used span lengths 1-5 in gen_t5.py
120
+ if 1 <= src_dilated.sum() <= 5:
121
+ # this is a match
122
+ base_iupac = tokenizer.decode(base_tokenized)
123
+ decoded = tokenizer.decode(tokenized)
124
+ print('"{}","{}"'.format(base_iupac, decoded))
125
+ return True
126
+
127
+ return False
128
+
129
+ if __name__ == "__main__":
130
+ main()
TransAntivirus/data_utils.py ADDED
@@ -0,0 +1,206 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+
3
+ def mask_spans(tokenizer, input_ids, mask_probability, mean_span_length):
4
+ DEBUG = False
5
+ length = input_ids.numel()
6
+ if length < 2:
7
+ return input_ids, tokenizer.sentinels(torch.tensor([0]))
8
+
9
+ num_noise_tokens = round(length * mask_probability)
10
+ num_noise_tokens = min(max(num_noise_tokens, 0), length - 1)
11
+ if num_noise_tokens == 0:
12
+ return input_ids, tokenizer.sentinels(torch.tensor([0]))
13
+ DEBUG and print("num_noise_tokens", num_noise_tokens)
14
+ num_nonnoise_tokens = length - num_noise_tokens
15
+ DEBUG and print("num_nonnoise_tokens", num_nonnoise_tokens)
16
+
17
+ num_noise_spans = round(num_noise_tokens / mean_span_length)
18
+ num_noise_spans = max(num_noise_spans, 1)
19
+ DEBUG and print("num_noise_spans", num_noise_spans)
20
+
21
+ # probability of the last token being noise should be
22
+ # mask_probability, but right now it's 100%
23
+ if torch.rand(1).item() < mask_probability:
24
+ num_nonnoise_spans = num_noise_spans
25
+ else:
26
+ num_nonnoise_spans = num_noise_spans + 1
27
+
28
+ def _random_segmentation(num_items, num_segments):
29
+ ones = (torch.arange(num_items - 1) < num_segments - 1).int()
30
+ first_in_segment = torch.cat([torch.tensor([0]).int(),
31
+ ones[torch.randperm(num_items-1)]])
32
+ segment_id = torch.cumsum(first_in_segment, dim=0)
33
+ _, lengths = segment_id.unique_consecutive(return_counts=True)
34
+ return lengths
35
+ noise_span_lengths = _random_segmentation(num_noise_tokens,
36
+ num_noise_spans)
37
+ DEBUG and print("noise_span_lengths", noise_span_lengths)
38
+ nonnoise_span_lengths = _random_segmentation(num_nonnoise_tokens,
39
+ num_nonnoise_spans)
40
+ DEBUG and print("nonnoise_span_lengths", nonnoise_span_lengths)
41
+ #print(noise_span_lengths.float().mean().item(), noise_span_lengths)
42
+ #print(nonnoise_span_lengths)
43
+ if num_nonnoise_spans > num_noise_spans:
44
+ noise_span_lengths = torch.cat([noise_span_lengths,
45
+ torch.tensor([0])])
46
+ interleaved_span_lengths = torch.stack([
47
+ nonnoise_span_lengths, noise_span_lengths
48
+ ], dim=1).view(-1)
49
+ if num_nonnoise_spans > num_noise_spans:
50
+ interleaved_span_lengths = interleaved_span_lengths[:-1]
51
+
52
+ DEBUG and print('interleaved', interleaved_span_lengths)
53
+ span_starts = torch.cumsum(interleaved_span_lengths, dim=0)[:-1]
54
+ DEBUG and print("span_starts", span_starts)
55
+ span_start_indicator = torch.zeros(length).bool()
56
+ span_start_indicator[span_starts] = 1
57
+ DEBUG and print("span_start_indicator", span_start_indicator)
58
+ span_num = torch.cumsum(span_start_indicator, dim=0)
59
+ DEBUG and print("span_num", span_num)
60
+ is_noise = span_num % 2 == 1
61
+ DEBUG and print("is_noise", is_noise)
62
+
63
+ def sentinelify(tokens, noise_mask):
64
+ prev_token_is_noise = torch.cat([torch.tensor([0]).bool(),
65
+ noise_mask[:-1]])
66
+ first_noise_tokens = noise_mask & ~prev_token_is_noise
67
+ subsequent_noise_tokens = noise_mask & prev_token_is_noise
68
+ sentinels = tokenizer.sentinels(
69
+ torch.cumsum(first_noise_tokens, dim=0) - 1
70
+ )
71
+ tokens = torch.where(first_noise_tokens, sentinels, tokens)
72
+ return tokens[~subsequent_noise_tokens]
73
+
74
+ masked_input = sentinelify(input_ids, is_noise)
75
+ DEBUG and print("masked_input", masked_input)
76
+ target_ids = sentinelify(input_ids, ~is_noise)
77
+ DEBUG and print("target_ids", target_ids)
78
+
79
+ return masked_input, target_ids
80
+
81
+
82
+ def collapse_sentinels(tokenizer, input_ids, target_ids):
83
+ def remove_extraneous(ids):
84
+ # delete everything after </s>
85
+ eos = tokenizer.eos_token_id
86
+ pad_mask = (ids == eos).cumsum(dim=0).clamp(0, 1).bool()
87
+ ids = ids[:ids.numel() - pad_mask.sum()]
88
+ return ids
89
+
90
+ input_ids = remove_extraneous(input_ids)
91
+ target_ids = remove_extraneous(target_ids)
92
+
93
+ num_sentinels = tokenizer._extra_ids
94
+ all_sentinel_ids = tokenizer.sentinels(
95
+ torch.arange(num_sentinels).to(input_ids.device)
96
+ )
97
+ min_sentinel_id = all_sentinel_ids.min()
98
+ max_sentinel_id = all_sentinel_ids.max()
99
+
100
+ def validate(ids, name="ids"):
101
+ #mask = (min_sentinel_id <= ids) & (ids <= max_sentinel_id)
102
+ mask = tokenizer.sentinel_mask(ids)
103
+ sentinels = ids[mask]
104
+ msg = "sentinels in {} are in the wrong order"
105
+ if not torch.all(sentinels==all_sentinel_ids[:sentinels.numel()]):
106
+ raise ValueError(msg.format(name))
107
+ return mask
108
+
109
+ input_sentinel_mask = validate(input_ids, "input_ids")
110
+ target_sentinel_mask = validate(target_ids, "target_ids")
111
+
112
+ input_span_types, input_span_lengths = \
113
+ input_sentinel_mask.unique_consecutive(return_counts=True)
114
+ target_span_types, target_span_lengths = \
115
+ target_sentinel_mask.unique_consecutive(return_counts=True)
116
+
117
+ input_sentinel_span_lengths = input_span_lengths[input_span_types]
118
+ target_sentinel_span_lengths = target_span_lengths[target_span_types]
119
+ if input_sentinel_span_lengths.sum() != input_span_types.sum():
120
+ raise ValueError("consecutive sentinel tokens in input_ids")
121
+ if target_sentinel_span_lengths.sum() != target_span_types.sum():
122
+ raise ValueError("consecutive sentinel tokens in target_ids")
123
+
124
+ msg = "invalid interleaving of sentinels between inputs and target"
125
+ if input_span_types.numel() != target_span_types.numel():
126
+ raise ValueError(msg)
127
+ xor = torch.logical_xor(input_span_types, target_span_types)
128
+ if xor.sum() != input_span_types.numel():
129
+ raise ValueError(msg)
130
+
131
+ input_repeat = input_sentinel_mask.long()
132
+ input_repeat[input_sentinel_mask] = target_span_lengths[~target_span_types]
133
+ input_repeat[input_repeat == 0] = 1
134
+
135
+ target_repeat = target_sentinel_mask.long()
136
+ target_repeat[target_sentinel_mask] = input_span_lengths[~input_span_types]
137
+ target_repeat[target_repeat == 0] = 1
138
+
139
+ input_repeated = input_ids.repeat_interleave(input_repeat)
140
+ target_repeated = target_ids.repeat_interleave(target_repeat)
141
+
142
+ #use_target = (min_sentinel_id <= input_repeated) & (input_repeated <= max_sentinel_id)
143
+ use_target = tokenizer.sentinel_mask(input_repeated)
144
+ collapsed = torch.where(use_target, target_repeated, input_repeated)
145
+
146
+ return collapsed
147
+
148
+
149
+
150
+ def recoverd(x,y):
151
+ x = x.numpy()
152
+ y = y.numpy()
153
+
154
+ x_drop_index = np.where(x==1)[0][0]
155
+ x =x[:x_drop_index+1]
156
+
157
+ y_drop_index = np.where(y==1)[0][0]
158
+ y =y[:y_drop_index+1]
159
+
160
+ z = []
161
+ for i in y:
162
+ if i >1400:
163
+ z.append(i)
164
+
165
+ z = sorted(set(z),reverse=True)
166
+
167
+ final_pos = min(z)
168
+
169
+ if final_pos not in x:
170
+ y = y[:-2]
171
+ z = z[:-1]
172
+ else:
173
+ y = y[:-1]
174
+
175
+ final_list = []
176
+
177
+ index_in_x_last = 0
178
+ index_in_x_current = 0
179
+
180
+ next_y_index = 0
181
+ index_in_y_current = 0
182
+
183
+ if len(z)==1:
184
+ index_in_x_current = np.where(x==z[0])[0][0]
185
+ final_list = list(x[index_in_x_last:index_in_x_current].flatten() )+ list(y[1:].flatten())+ list(x[index_in_x_current+1:].flatten())
186
+
187
+ #print(x,y,index_in_x_current,final_list)
188
+ else:
189
+ for i in range(len(z)):
190
+ index_in_x_current = np.where(x==z[i])[0][0]
191
+ index_in_y_current = np.where(y==z[i])[0][0]+1
192
+
193
+ #print(index_in_x_current,index_in_y_current,z)
194
+
195
+ if i==len(z)-1:
196
+ next_y_index = len(y)
197
+ else:
198
+ next_y_index = np.where(y==z[i+1])[0][0]
199
+
200
+ final_list = final_list + list(x[index_in_x_last:index_in_x_current].flatten()) + list(y[index_in_y_current:next_y_index].flatten())
201
+ index_in_x_last = index_in_x_current +1
202
+
203
+ final_list = final_list +list(x[index_in_x_last:].flatten())
204
+ final_list = np.array(final_list)
205
+ c = torch.from_numpy(final_list)
206
+ return c
TransAntivirus/download_pubchem/SARS0729_canon_desc.csv ADDED
@@ -0,0 +1,185 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ PubChem CID|name |Preferred|Canonical<|NumAtoms|MolWt|Log P|TPSA|HBA|HBD|Fsp3|ROTB|NumRings|AROM|NumHeavyAtom|NumStereo|QED|QEPPI|SAscore|NPscore|Lipinski|Ro4|PAINS
2
+ 290486.0|Triciribine|2-(5-amino-7-methyl-2,6,7,9,11-pentazatricyclo[6.3.1.04,12]dodeca-1(12),3,5,8,10-pentaen-2-yl)-5-(hydroxymethyl)oxolane-3,4-diol|CN1N=C(N)c2cn(C3OC(CO)C(O)C3O)c3ncnc1c23|23|320.3090000000001|-1.8870999999999987|142.25|10|4|0.46153846153846156|2|4|2|23|8|0.5030837169270522|0.2230377496655855|4.206336221285245|0.5141541547619305|1|0|False
3
+ 400633.0|Capecitabine|pentyl N-[1-(3,4-dihydroxy-5-methyloxolan-2-yl)-5-fluoro-2-oxopyrimidin-4-yl]carbamate|CCCCCOC(=O)Nc1nc(=O)n(C2OC(C)C(O)C2O)cc1F|25|359.35400000000004|0.7601999999999995|122.91000000000001|8|3|0.6666666666666666|6|2|1|25|8|0.640839151584694|0.3477485501574143|3.70159793901294|0.379648786773868|1|0|False
4
+ 1546.0|Cladribine|5-(6-amino-2-chloropurin-9-yl)-2-(hydroxymethyl)oxolan-3-ol |Nc1nc(Cl)nc2c1ncn2C1CC(O)C(CO)O1|19|285.69100000000003|-0.29740000000000044|119.31000000000002|8|3|0.5|2|3|2|19|6|0.6481668237451135|0.3813779140915048|3.622779613665122|0.7589084087510317|1|0|False
5
+ 16886.0|Decitabine|4-amino-1-[4-hydroxy-5-(hydroxymethyl)oxolan-2-yl]-1,3,5-triazin-2-one|Nc1ncn(C2CC(O)C(CO)O2)c(=O)n1|16|228.20799999999997|-2.1388|123.49|8|3|0.625|2|2|1|16|6|0.5273624425093522|0.2441325945298081|3.643221050753482|1.1104565214827065|1|0|False
6
+ 3385.0|Fluorouracil (5-Fluoracil, 5-FU)|5-fluoro-1H-pyrimidine-2,4-dione|O=c1[nH]cc(F)c(=O)[nH]1|9|130.078|-0.7977000000000001|65.72|2|2|0.0|0|1|1|9|0|0.482564502908463|0.22429291396082054|2.6590876226101727|-0.7992390609873111|1|0|False
7
+ 248862.0|Nelarabine|2-(2-amino-6-methoxypurin-9-yl)-5-(hydroxymethyl)oxolane-3,4-diol|COc1nc(N)nc2c1ncn2C1OC(CO)C(O)C1O|21|297.271|-1.9713999999999994|148.76999999999998|10|4|0.5454545454545454|3|3|2|21|8|0.5056130599451503|0.23058502859583221|3.669441345268779|0.8720015392381619|1|0|False
8
+ 354624.0|Clofarabine|5-(6-amino-2-chloropurin-9-yl)-4-fluoro-2-(hydroxymethyl)oxolan-3-ol|Nc1nc(Cl)nc2c1ncn2C1OC(CO)C(O)C1F|20|303.681|-0.34940000000000027|119.31000000000002|8|3|0.5|2|3|2|20|8|0.648649936215886|0.3883700063918832|3.8712345426538963|0.6251566198496301|1|0|False
9
+ 3368.0|Fludarabine Phosphate|[5-(6-amino-2-fluoropurin-9-yl)-3,4-dihydroxyoxolan-2-yl]methyl dihydrogen phosphate|Nc1nc(F)nc2c1ncn2C1OC(COP(=O)(O)O)C(O)C1O|24|365.21400000000006|-1.7239000000000007|186.07|10|5|0.5|4|3|2|24|8|0.30617189529250766|0.17310948790732877|3.9840913898753945|1.0864995352244873|0|0|False
10
+ 135605572.0|Entecavir Hydrate|2-amino-9-[4-hydroxy-3-(hydroxymethyl)-2-methylidenecyclopentyl]-1H-purin-6-one|C=C1C(CO)C(O)CC1n1cnc2c(=O)nc(N)[nH]c21|20|277.28399999999993|-0.8278000000000001|130.04999999999998|7|4|0.4166666666666667|2|3|2|20|6|0.5301148369563783|0.3229952345490085|4.052892286880503|0.9583882509483649|1|0|False
11
+ 2754.0|Cilostazol|6-[4-(1-cyclohexyltetrazol-5-yl)butoxy]-3,4-dihydro-1H-quinolin-2-one|O=C1CCc2cc(OCCCCc3nnnn3C3CCCCC3)ccc2N1|27|369.4690000000002|3.4647000000000014|81.93|6|1|0.6|7|4|2|27|0|0.755913406639335|0.7771342880231443|2.508630577222638|-1.5209469226199739|1|0|False
12
+ 3363.0|Floxuridine|5-fluoro-1-[4-hydroxy-5-(hydroxymethyl)oxolan-2-yl]pyrimidine-2,4-dione|O=c1[nH]c(=O)n(C2CC(O)C(CO)O2)cc1F|17|246.194|-1.6836|104.55000000000001|6|3|0.5555555555555556|2|2|1|17|6|0.5776171321259124|0.3356934148814366|3.473377071505359|0.6963745697774709|1|0|False
13
+ 5386.0|Tegafur (FT-207, NSC 148958)|5-fluoro-1-(oxolan-2-yl)pyrimidine-2,4-dione|O=c1[nH]c(=O)n(C2CCCO2)cc1F|14|200.16899999999998|-0.015300000000000313|64.09|4|1|0.5|1|2|1|14|2|0.6927125296086032|0.3535172217146212|2.9469611714043964|-0.4032937319470785|1|0|False
14
+ 5155.0|Stavudine (d4T)|1-[5-(hydroxymethyl)-2,5-dihydrofuran-2-yl]-5-methylpyrimidine-2,4-dione|Cc1cn(C2C=CC(CO)O2)c(=O)[nH]c1=O|16|224.21599999999998|-0.7090799999999999|84.32|5|2|0.4|2|2|1|16|4|0.6498890462135971|0.406330949617662|3.582011471393738|0.952976324214325|1|0|False
15
+ 3367.0|Fludarabine|2-(6-amino-2-fluoropurin-9-yl)-5-(hydroxymethyl)oxolane-3,4-diol|Nc1nc(F)nc2c1ncn2C1OC(CO)C(O)C1O|20|285.235|-1.8409000000000002|139.54000000000002|9|4|0.5|2|3|2|20|8|0.47183364718578485|0.24118104715477695|3.742329367828722|1.06082713921308|1|0|False
16
+ 5281078.0|Mycophenolate Mofetil|2-morpholin-4-ylethyl (E)-6-(4-hydroxy-6-methoxy-7-methyl-3-oxo-1H-2-benzofuran-5-yl)-4-methylhex-4-enoate|COc1c(C)c2c(c(O)c1CC=C(C)CCC(=O)OCCN1CCOCC1)C(=O)OC2|31|433.5010000000003|2.52402|94.53000000000002|8|1|0.5652173913043478|9|3|1|31|0|0.46901748000616883|0.37568520022091145|2.909809273894176|0.7259426339747483|1|0|False
17
+ 191.0|Adenosine|2-(6-aminopurin-9-yl)-5-(hydroxymethyl)oxolane-3,4-diol|Nc1ncnc2c1ncn2C1OC(CO)C(O)C1O|19|267.245|-1.9800000000000006|139.54000000000002|9|4|0.5|2|3|2|19|8|0.49051986767922323|0.23617270345702296|3.5313095881349117|1.3138998457117157|1|0|False
18
+ 1134.0|Telbivudine|1-[4-hydroxy-5-(hydroxymethyl)oxolan-2-yl]-5-methylpyrimidine-2,4-dione|Cc1cn(C2CC(O)C(CO)O2)c(=O)[nH]c1=O|17|242.23099999999997|-1.5142799999999992|104.54999999999998|6|3|0.6|2|2|1|17|6|0.5842742137821719|0.33499679157314843|3.3763667036123843|1.0380300294610116|1|0|False
19
+ 135422442.0|Didanosine|9-[5-(hydroxymethyl)oxolan-2-yl]-1H-purin-6-one|O=c1[nH]cnc2c1ncn2C1CCC(CO)O1|17|236.23099999999997|-0.21050000000000008|93.03|6|2|0.5|2|3|2|17|4|0.7521450742487874|0.5432250422691189|3.611021286177671|0.6773893007177353|1|0|False
20
+ 9837769.0|Emtricitabine|4-amino-5-fluoro-1-[2-(hydroxymethyl)-1,3-oxathiolan-5-yl]pyrimidin-2-one|Nc1nc(=O)n(C2CSC(CO)O2)cc1F|16|247.25099999999998|-0.4550000000000003|90.37|7|2|0.5|2|2|1|16|4|0.7349748054075844|0.3453875663184652|3.8873632196454855|0.6995396761378563|1|0|False
21
+ 3387.0|Lamivudine|4-amino-1-[2-(hydroxymethyl)-1,3-oxathiolan-5-yl]pyrimidin-2-one|Nc1ccn(C2CSC(CO)O2)c(=O)n1|15|229.261|-0.5941000000000001|90.36999999999999|7|2|0.5|2|2|1|15|4|0.7038615838122525|0.3378624805129178|3.8064405703937787|1.4615989713898194|1|0|False
22
+ 3461.0|Gemcitabine|4-amino-1-[3,3-difluoro-4-hydroxy-5-(hydroxymethyl)oxolan-2-yl]pyrimidin-2-one|Nc1ccn(C2OC(CO)C(O)C2(F)F)c(=O)n1|18|263.2|-1.2886|110.60000000000001|7|3|0.5555555555555556|2|2|1|18|6|0.6120873727300361|0.30156266317008307|3.8632725293690937|1.2986545260508495|1|0|False
23
+ 5718.0|Zalcitabine|4-amino-1-[5-(hydroxymethyl)oxolan-2-yl]pyrimidin-2-one|Nc1ccn(C2CCC(CO)O2)c(=O)n1|15|211.22099999999998|-0.5046000000000002|90.36999999999999|6|2|0.5555555555555556|2|2|1|15|4|0.6875458801290977|0.3738843645665796|3.417990020943229|1.2753722241588197|1|0|False
24
+ 4463.0|Nevirapine|2-cyclopropyl-7-methyl-2,4,9,15-tetrazatricyclo[9.4.0.03,8]pentadeca-1(11),3,5,7,12,14-hexaen-10-one|Cc1ccnc2c1NC(=O)c1cccnc1N2C1CC1|20|266.30400000000003|2.6512200000000004|58.120000000000005|4|1|0.26666666666666666|1|4|2|20|0|0.861716125086419|0.5536172692074276|2.5944323945817214|-0.24728332568993|1|0|False
25
+ 253083.0|Trifluridine|1-[4-hydroxy-5-(hydroxymethyl)oxolan-2-yl]-5-(trifluoromethyl)pyrimidine-2,4-dione|O=c1[nH]c(=O)n(C2CC(O)C(CO)O2)cc1C(F)(F)F|20|296.20099999999996|-0.8039000000000001|104.55000000000001|6|3|0.6|2|2|1|20|6|0.6622919739385443|0.35932251164721696|3.6118403697410404|0.4994653045681|1|0|False
26
+ 1805.0|Azacitidine|4-amino-1-[3,4-dihydroxy-5-(hydroxymethyl)oxolan-2-yl]-1,3,5-triazin-2-one|Nc1ncn(C2OC(CO)C(O)C2O)c(=O)n1|17|244.207|-3.1679999999999997|143.72000000000003|9|4|0.625|2|2|1|17|8|0.4262474653048269|0.16004433948003915|3.687407323630228|1.3188208399256056|1|0|False
27
+ 191.0|Vidarabine|2-(6-aminopurin-9-yl)-5-(hydroxymethyl)oxolane-3,4-diol|Nc1ncnc2c1ncn2C1OC(CO)C(O)C1O|19|267.245|-1.9800000000000006|139.54000000000002|9|4|0.5|2|3|2|19|8|0.49051986767922323|0.23617270345702296|3.5313095881349117|1.3138998457117157|1|0|False
28
+ 266934.0|AICAR (Acadesine)|5-amino-1-[3,4-dihydroxy-5-(hydroxymethyl)oxolan-2-yl]imidazole-4-carboxamide|NC(=O)c1ncn(C2OC(CO)C(O)C2O)c1N|18|258.23400000000004|-2.8242999999999996|156.85000000000002|8|5|0.5555555555555556|3|2|1|18|8|0.3908498923708794|0.14438999553237833|3.7095421627648637|0.8286879931867945|0|0|False
29
+ 135398513.0|Aciclovir|2-amino-9-(2-hydroxyethoxymethyl)-1H-purin-6-one|Nc1nc2c(ncn2COCCO)c(=O)[nH]1|16|225.20799999999997|-1.3318000000000003|119.04999999999998|7|3|0.375|4|2|2|16|0|0.554385971786765|0.45613595294095116|2.7707884753483327|-0.10317179830629376|1|0|False
30
+ 135398740.0|Ganciclovir|2-amino-9-(1,3-dihydroxypropan-2-yloxymethyl)-1H-purin-6-one|Nc1nc2c(ncn2COC(CO)CO)c(=O)[nH]1|18|255.23399999999998|-1.970899999999999|139.28|8|4|0.4444444444444444|5|2|2|18|0|0.4854930576221492|0.30914081370444985|2.9975492721457613|0.3788503786253501|1|0|False
31
+ 3687.0|Idoxuridine|1-[4-hydroxy-5-(hydroxymethyl)oxolan-2-yl]-5-iodopyrimidine-2,4-dione|O=c1[nH]c(=O)n(C2CC(O)C(CO)O2)cc1I|17|354.1|-1.2181000000000002|104.55000000000001|6|3|0.5555555555555556|2|2|1|17|6|0.578029884636738|0.3781134158688138|3.6278549979267645|0.910464343233765|1|0|False
32
+ 3657.0|Hydroxyurea|hydroxyurea|NC(=O)NO|5|76.05499999999999|-0.9561000000000002|75.35000000000001|2|3|0.0|0|0|0|5|0|0.25664360627911675|0.1190984427030672|2.5247164941764755|-0.31540413797906003|1|0|False
33
+ 279063.0|Cyclocytidine HCl|4-(hydroxymethyl)-10-imino-3,7-dioxa-1,9-diazatricyclo[6.4.0.02,6]dodeca-8,11-dien-5-ol|N=c1ccn2c(n1)OC1C(O)C(CO)OC12|16|225.204|-1.62583|100.59000000000002|7|3|0.5555555555555556|1|3|1|16|8|0.5326902884382276|0.27411721766208946|4.446202327505897|1.497338959013144|1|0|False
34
+ 1177.0|Uridine|1-[3,4-dihydroxy-5-(hydroxymethyl)oxolan-2-yl]pyrimidine-2,4-dione|O=c1ccn(C2OC(CO)C(O)C2O)c(=O)[nH]1|17|244.20300000000003|-2.8519|124.78|7|4|0.5555555555555556|2|2|1|17|8|0.4435105912731592|0.21867146597756254|3.4839049824931045|1.4898002285532645|1|0|False
35
+ 3159.0|Doxifluridine|1-(3,4-dihydroxy-5-methyloxolan-2-yl)-5-fluoropyrimidine-2,4-dione|CC1OC(n2cc(F)c(=O)[nH]c2=O)C(O)C1O|17|246.19400000000002|-1.6851999999999996|104.55000000000001|6|3|0.5555555555555556|1|2|1|17|8|0.5484019207554993|0.3126320658718193|3.610332741690428|0.7868512061868354|1|0|False
36
+ 596.0|Cytidine|4-amino-1-[3,4-dihydroxy-5-(hydroxymethyl)oxolan-2-yl]pyrimidin-2-one|Nc1ccn(C2OC(CO)C(O)C2O)c(=O)n1|17|243.21900000000002|-2.563|130.82999999999998|8|4|0.5555555555555556|2|2|1|17|8|0.4489304892314893|0.18860344125062514|3.548894614600127|1.6478124261033116|1|0|False
37
+ 2691.0|CGS 21680 HCl|3-[4-[2-[[6-amino-9-[5-(ethylcarbamoyl)-3,4-dihydroxyoxolan-2-yl]purin-2-yl]amino]ethyl]phenyl]propanoic acid|CCNC(=O)C1OC(n2cnc3c(N)nc(NCCc4ccc(CCC(=O)O)cc4)nc32)C(O)C1O|36|499.52800000000025|-0.16439999999999827|197.73999999999995|11|6|0.43478260869565216|10|4|3|36|8|0.21654103037466466|0.1239959413009151|3.773892423920943|0.05996866085087501|0|0|False
38
+ 135402034.0|Guanosine|2-amino-9-[3,4-dihydroxy-5-(hydroxymethyl)oxolan-2-yl]-1H-purin-6-one|Nc1nc2c(ncn2C2OC(CO)C(O)C2O)c(=O)[nH]1|20|283.24399999999997|-2.6867000000000005|159.51|9|5|0.5|2|3|2|20|8|0.3981374161148042|0.16955766100498496|3.720175521674877|1.2488959303664646|0|0|False
39
+ 135402037.0|Inosine|9-[3,4-dihydroxy-5-(hydroxymethyl)oxolan-2-yl]-1H-purin-6-one|O=c1[nH]cnc2c1ncn2C1OC(CO)C(O)C1O|19|268.229|-2.2689|133.49|8|4|0.5|2|3|2|19|8|0.4822946675996651|0.27226976952981763|3.7484460177430403|1.1878722340647632|1|0|False
40
+ 5064.0|Ribavirin|1-[3,4-dihydroxy-5-(hydroxymethyl)oxolan-2-yl]-1,2,4-triazole-3-carboxamide|NC(=O)c1ncn(C2OC(CO)C(O)C2O)n1|17|244.20700000000002|-3.0114999999999994|143.72000000000003|8|4|0.625|3|2|1|17|8|0.44284203167437103|0.19640354038822913|3.8687708687807296|0.6906384280796763|1|0|False
41
+ 5726.0|Zidovudine|1-[4-azido-5-(hydroxymethyl)oxolan-2-yl]-5-methylpyrimidine-2,4-dione|Cc1cn(C2CC(N=[N+]=[N-])C(CO)O2)c(=O)[nH]c1=O|19|267.245|-0.19628000000000012|133.07999999999998|6|2|0.6|3|2|1|19|6|0.4454004294146897|0.3925697774093064|3.849317511398737|0.8935864361663792|1|0|True
42
+ 91302628.0|Sofosbuvir (PSI-7977, GS-7977)|propan-2-yl 2-[[[5-(2,4-dioxopyrimidin-1-yl)-4-fluoro-3-hydroxy-4-methyloxolan-2-yl]methoxy-phenoxyphosphoryl]amino]propanoate|CC(C)OC(=O)C(C)NP(=O)(OCC1OC(n2ccc(=O)[nH]c2=O)C(C)(F)C1O)Oc1ccccc1|36|529.4580000000002|1.6565000000000003|158.17999999999995|10|3|0.5|10|3|2|36|12|0.30528322976796235|0.29165853488216725|4.375073076955571|0.24310435922934737|0|0|False
43
+ 214347.0|Dapivirine (TMC120)|4-[[4-(2,4,6-trimethylanilino)pyrimidin-2-yl]amino]benzonitrile|Cc1cc(C)c(Nc2ccnc(Nc3ccc(C#N)cc3)n2)c(C)c1|25|329.4070000000001|4.760740000000003|73.63|5|2|0.15|4|3|3|25|0|0.7216163650237382|0.8678943244497731|2.2542436522113114|-1.5749294902855966|1|0|False
44
+ 352992.0|Clevudine|1-[3-fluoro-4-hydroxy-5-(hydroxymethyl)oxolan-2-yl]-5-methylpyrimidine-2,4-dione|Cc1cn(C2OC(CO)C(O)C2F)c(=O)[nH]c1=O|18|260.221|-1.5662799999999997|104.54999999999998|6|3|0.6|2|2|1|18|8|0.5958623283349754|0.3413235419835224|3.6783565858417866|0.7867064816243999|1|0|False
45
+ 193962.0|Etravirine (TMC125)|4-[6-amino-5-bromo-2-(4-cyanoanilino)pyrimidin-4-yl]oxy-3,5-dimethylbenzonitrile|Cc1cc(C#N)cc(C)c1Oc1nc(Nc2ccc(C#N)cc2)nc(N)c1Br|28|435.2850000000001|4.717400000000002|120.63999999999999|7|2|0.1|4|3|3|28|0|0.6084537970432328|0.7274550650547328|2.701548964781006|-1.0535754913840394|1|0|False
46
+ 1869.0|N6-methyladenosine (m6A)|2-(hydroxymethyl)-5-[6-(methylamino)purin-9-yl]oxolane-3,4-diol|CNc1ncnc2c1ncn2C1OC(CO)C(O)C1O|20|281.272|-1.5205000000000009|125.55000000000001|9|4|0.5454545454545454|3|3|2|20|8|0.5342275014319904|0.2703216790234994|3.6073528343621897|1.041930725635605|1|0|False
47
+ 248010.0|Cordycepin|2-(6-aminopurin-9-yl)-5-(hydroxymethyl)oxolan-3-ol|Nc1ncnc2c1ncn2C1OC(CO)CC1O|18|251.24599999999998|-0.9508000000000008|119.31000000000002|8|3|0.5|2|3|2|18|6|0.628993302830891|0.36275129625779995|3.6322713415862893|1.3733080654408107|1|0|False
48
+ 4602.0|Osalmid|2-hydroxy-N-(4-hydroxyphenyl)benzamide|O=C(Nc1ccc(O)cc1)c1ccccc1O|17|229.23499999999999|2.3501000000000007|69.56|3|3|0.0|2|2|2|17|0|0.6921374334367943|0.46153746799921075|1.5130819664559425|-0.744133777598706|1|0|False
49
+ 9679.0|4-Amino-5-imidazolecarboxamide|4-amino-1H-imidazole-5-carboxamide|NC(=O)c1nc[nH]c1N|9|126.119|-0.9092000000000005|97.79|3|3|0.0|1|1|1|9|0|0.4539721011557528|0.2552272944309776|2.9954741610717104|-0.5140951039502445|1|0|False
50
+ 135402019.0|2'-Deoxyinosine| 9-[4-hydroxy-5-(hydroxymethyl)oxolan-2-yl]-1H-purin-6-one|O=c1[nH]cnc2c1ncn2C1CC(O)C(CO)O1|18|252.23|-1.2397000000000002|113.26|7|3|0.5|2|3|2|18|6|0.6158237953049276|0.4186149308280351|3.709847812174523|0.9179637393863056|1|0|False
51
+ 72830388.0|Valganciclovir HCl|[2-[(2-amino-6-oxo-4,5-dihydro-1H-purin-9-yl)methoxy]-3-hydroxypropyl] 2-amino-3-methylbutanoate|CC(C)C(N)C(=O)OCC(CO)OCN1C=NC2C(=O)NC(N)=NC21|25|356.38300000000004|-2.668999999999996|164.86|10|4|0.7142857142857143|8|2|0|25|8|0.3418489997940892|0.10362369303515996|4.612915133583076|0.8576886024449053|0|0|False
52
+ 135398748.0|Penciclovir|2-amino-9-[4-hydroxy-3-(hydroxymethyl)butyl]-1H-purin-6-one|Nc1nc(=O)c2ncn(CCC(CO)CO)c2[nH]1|18|253.26199999999997|-1.3073|130.05|7|4|0.5|5|2|2|18|0|0.5232780259906572|0.36417216397814534|2.885575425991915|-0.025008256510316646|1|0|False
53
+ 135403646.0|Azaguanine-8|5-amino-2,6-dihydrotriazolo[4,5-d]pyrimidin-7-one|Nc1nc2[nH]nnc2c(=O)[nH]1|11|152.11700000000002|-1.3766000000000003|113.34|5|3|0.0|0|2|2|11|0|0.4303161239320503|0.4073247679104095|3.506466837537925|-0.6790146471510455|1|0|False
54
+ 22138239.0|Ademetionine disulfate tosylate|2-amino-4-[[5-(6-aminopurin-9-yl)-3,4-dihydroxyoxolan-2-yl]methyl-methylsulfonio]butanoate|C[S+](CCC(N)C(=O)[O-])CC1OC(n2cnc3c(N)ncnc32)C(O)C1O|27|398.4450000000001|-3.256899999999996|185.45999999999998|11|4|0.6|7|3|2|27|12|0.34496993620130295|0.2235483545193125|4.7565494434378035|0.9079142947079224|0|0|False
55
+ 3203.0|Efavirenz|6-chloro-4-(2-cyclopropylethynyl)-4-(trifluoromethyl)-1H-3,1-benzoxazin-2-one|O=C1Nc2ccc(Cl)cc2C(C#CC2CC2)(C(F)(F)F)O1|21|315.67799999999994|4.073100000000001|38.33|2|1|0.35714285714285715|0|3|1|21|2|0.7328090954055055|0.2532354094918868|3.5657078249547665|0.06666647788737144|1|0|False
56
+ 72661.0|Nicotinamide N-oxide|1-oxidopyridin-1-ium-3-carboxamide|NC(=O)c1ccc[n+]([O-])c1|10|138.12599999999998|-0.5811|70.03|2|1|0.0|1|1|1|10|0|0.4162538873709143|0.23477866525463617|2.587100135906283|-1.3023827034912496|1|0|False
57
+ 1134.0|Thymidine|1-[4-hydroxy-5-(hydroxymethyl)oxolan-2-yl]-5-methylpyrimidine-2,4-dione|Cc1cn(C2CC(O)C(CO)O2)c(=O)[nH]c1=O|17|242.23099999999997|-1.5142799999999992|104.54999999999998|6|3|0.6|2|2|1|17|6|0.5842742137821719|0.33499679157314843|3.3763667036123843|1.0380300294610116|1|0|False
58
+ 1177.0|Uracil 1-?-D-arabinofuranoside|1-[3,4-dihydroxy-5-(hydroxymethyl)oxolan-2-yl]pyrimidine-2,4-dione|O=c1ccn(C2OC(CO)C(O)C2O)c(=O)[nH]1|17|244.20300000000003|-2.8519|124.78|7|4|0.5555555555555556|2|2|1|17|8|0.4435105912731592|0.21867146597756254|3.4839049824931045|1.4898002285532645|1|0|False
59
+ 98961.0|2,2'-Cyclouridine|5-hydroxy-4-(hydroxymethyl)-3,7-dioxa-1,9-diazatricyclo[6.4.0.02,6]dodeca-8,11-dien-10-one|O=c1ccn2c(n1)OC1C(O)C(CO)OC12|16|226.18800000000002|-1.7451000000000005|93.81000000000002|7|2|0.5555555555555556|1|3|1|16|8|0.589765502233582|0.30625756971028406|4.082753096736665|1.3490016836473129|1|0|False
60
+ 5353599.0|Brivudine|5-[(E)-2-bromoethenyl]-1-[4-hydroxy-5-(hydroxymethyl)oxolan-2-yl]pyrimidine-2,4-dione|O=c1[nH]c(=O)n(C2CC(O)C(CO)O2)cc1C=CBr|19|333.13800000000003|-0.45710000000000006|104.55000000000001|6|3|0.45454545454545453|3|2|1|19|6|0.6945612204028229|0.40567778230739426|3.850417215540748|1.1374943613292103|1|0|False
61
+ 23700083.0|Ganciclovir sodium|2-amino-9-(1,3-dihydroxypropan-2-yloxymethyl)purin-6-olate|Nc1nc([O-])c2ncn(COC(CO)CO)c2n1|18|254.22599999999997|-2.1905999999999994|142.37|9|3|0.4444444444444444|5|2|2|18|0|0.545640459838781|0.34612172025925764|3.4886751182996063|0.03842276244240006|1|0|False
62
+ 5136.0|Ademetionine|2-amino-4-[[5-(6-aminopurin-9-yl)-3,4-dihydroxyoxolan-2-yl]methyl-methylsulfonio]butanoate|C[S+](CCC(N)C(=O)[O-])CC1OC(n2cnc3c(N)ncnc32)C(O)C1O|27|398.4450000000001|-3.256899999999996|185.45999999999998|11|4|0.6|7|3|2|27|12|0.34496993620130295|0.2235483545193125|4.7565494434378035|0.9079142947079224|0|0|False
63
+ 1971.0|Abacavir|[4-[2-amino-6-(cyclopropylamino)purin-9-yl]cyclopent-2-en-1-yl]methanol|Nc1nc(NC2CC2)c2ncn(C3C=CC(CO)C3)c2n1|21|286.33900000000006|1.0922999999999998|101.88|7|3|0.5|4|4|2|21|4|0.7272302824304794|0.532844209200051|3.688999973120933|0.026597937866581003|1|0|False
64
+ 135605572.0|Entecavir|2-amino-9-[4-hydroxy-3-(hydroxymethyl)-2-methylidenecyclopentyl]-1H-purin-6-one|C=C1C(CO)C(O)CC1n1cnc2c(=O)nc(N)[nH]c21|20|277.28399999999993|-0.8278000000000001|130.04999999999998|7|4|0.4166666666666667|2|3|2|20|6|0.5301148369563783|0.3229952345490085|4.052892286880503|0.9583882509483649|1|0|False
65
+ 224.0|Adenosine 5'-monophosphate monohydrate|[5-(6-aminopurin-9-yl)-3,4-dihydroxyoxolan-2-yl]methyl dihydrogen phosphate|Nc1ncnc2c1ncn2C1OC(COP(=O)(O)O)C(O)C1O|23|347.224|-1.8630000000000009|186.07|10|5|0.5|4|3|2|23|8|0.39017854457244017|0.17004544392240475|3.8048894493072236|1.2966757882455953|0|0|False
66
+ 191.0|Vidarabine monohydrate|2-(6-aminopurin-9-yl)-5-(hydroxymethyl)oxolane-3,4-diol|Nc1ncnc2c1ncn2C1OC(CO)C(O)C1O|19|267.245|-1.9800000000000006|139.54000000000002|9|4|0.5|2|3|2|19|8|0.49051986767922323|0.23617270345702296|3.5313095881349117|1.3138998457117157|1|0|False
67
+ 44399265.0|PSI-6206 (RO-2433, GS-331007)|1-[3-fluoro-4-hydroxy-5-(hydroxymethyl)-3-methyloxolan-2-yl]pyrimidine-2,4-dione|CC1(F)C(O)C(CO)OC1n1ccc(=O)[nH]c1=O|18|260.221|-1.4846000000000001|104.54999999999998|6|3|0.6|2|2|1|18|8|0.6009771935170747|0.3417002787027479|3.9142777396879405|1.009482945923539|1|0|False
68
+ 22451303.0|Regadenoson|1-[6-amino-9-[3,4-dihydroxy-5-(hydroxymethyl)oxolan-2-yl]purin-2-yl]-N-methylpyrazole-4-carboxamide|CNC(=O)c1cnn(-c2nc(N)c3ncn(C4OC(CO)C(O)C4O)c3n2)c1|28|390.36000000000007|-2.4346999999999985|186.45999999999998|12|5|0.4|4|4|3|28|8|0.32033113268604546|0.16974155969442686|3.8655802133882755|-0.1474028333819392|0|0|False
69
+ 56640146.0|Dasabuvir(ABT-333)|N-[6-[3-tert-butyl-5-(2,4-dioxopyrimidin-1-yl)-2-methoxyphenyl]naphthalen-2-yl]methanesulfonamide|COc1c(-c2ccc3cc(NS(C)(=O)=O)ccc3c2)cc(-n2ccc(=O)[nH]c2=O)cc1C(C)(C)C|35|493.58500000000026|4.023600000000002|110.25999999999999|6|2|0.23076923076923078|5|4|4|35|0|0.4363022246167922|0.771847321512829|2.7135256012508666|-0.6786278979178144|1|1|False
70
+ 4483256.0|Cimicifugoside|[2-hydroxy-1,4',6',12',17',17'-hexamethyl-18'-(3,4,5-trihydroxyoxan-2-yl)oxyspiro[3,6-dioxabicyclo[3.1.0]hexane-4,8'-9-oxahexacyclo[11.9.0.01,21.04,12.05,10.016,21]docos-13-ene]-3'-yl] acetate|CC(=O)OC1CC23CC24CCC(OC2OCC(O)C(O)C2O)C(C)(C)C4CC=C3C2(C)CC3OC4(CC(C)C3C12C)OC(O)C1(C)OC41|48|674.8280000000003|2.9487000000000005|156.67000000000002|11|4|0.918918918918919|3|9|0|48|36|0.1983763747315937|0.09644092817770795|7.190522840793108|3.4998626023441517|0|0|False
71
+ 640.0|2’-deoxyuridine|1-[4-hydroxy-5-(hydroxymethyl)oxolan-2-yl]pyrimidine-2,4-dione|O=c1ccn(C2CC(O)C(CO)O2)c(=O)[nH]1|16|228.20399999999998|-1.8226999999999993|104.55000000000001|6|3|0.5555555555555556|2|2|1|16|6|0.552382719533219|0.32881089343575925|3.4575535682360012|1.247112165436718|1|0|False
72
+ 596.0|Cytarabine hydrochloride|4-amino-1-[3,4-dihydroxy-5-(hydroxymethyl)oxolan-2-yl]pyrimidin-2-one|Nc1ccn(C2OC(CO)C(O)C2O)c(=O)n1|17|243.21900000000002|-2.563|130.82999999999998|8|4|0.5555555555555556|2|2|1|17|8|0.4489304892314893|0.18860344125062514|3.548894614600127|1.6478124261033116|1|0|False
73
+ 5375662.0|trans-Zeatin-riboside|2-(hydroxymethyl)-5-[6-[[(E)-4-hydroxy-3-methylbut-2-enyl]amino]purin-9-yl]oxolane-3,4-diol|CC(=CCNc1ncnc2c1ncn2C1OC(CO)C(O)C1O)CO|25|351.36300000000006|-1.2117|145.78|10|5|0.5333333333333333|6|3|2|25|8|0.40449115551681625|0.20327430040647024|3.9559301432792626|1.339358707119148|1|0|False
74
+ 1835.0|5-Methyl-2'-deoxycytidine|4-amino-1-[4-hydroxy-5-(hydroxymethyl)oxolan-2-yl]-5-methylpyrimidin-2-one|Cc1cn(C2CC(O)C(CO)O2)c(=O)nc1N|17|241.24699999999996|-1.2253799999999995|110.6|7|3|0.6|2|2|1|17|6|0.5979214123745736|0.29479101867497837|3.566450315652517|1.1019204230282296|1|0|False
75
+ 53398647.0|Cytidine 5?-triphosphate (disodium salt)|[[[5-(4-amino-2-oxopyrimidin-1-yl)-3,4-dihydroxyoxolan-2-yl]methoxy-hydroxyphosphoryl]oxy-oxidophosphoryl] hydrogen phosphate |Nc1ccn(C2OC(COP(=O)(O)OP(=O)([O-])OP(=O)([O-])O)C(O)C2O)c(=O)n1|29|481.14000000000016|-3.4759999999999986|276.0799999999999|15|5|0.5555555555555556|8|2|1|29|14|0.22541264416469642|0.1419968753465064|5.071692352902783|1.1942632211265276|0|0|False
76
+ 262543.0|5-Methylcytidine|4-amino-1-[3,4-dihydroxy-5-(hydroxymethyl)oxolan-2-yl]-5-methylpyrimidin-2-one|Cc1cn(C2OC(CO)C(O)C2O)c(=O)nc1N|18|257.246|-2.2545799999999994|130.83|8|4|0.6|2|2|1|18|8|0.4701900203991378|0.19188032083627155|3.6185412012264013|1.2991831736939943|1|0|False
77
+ 249989.0|5-Methyluridine|1-[3,4-dihydroxy-5-(hydroxymethyl)oxolan-2-yl]-5-methylpyrimidine-2,4-dione|Cc1cn(C2OC(CO)C(O)C2O)c(=O)[nH]c1=O|18|258.23|-2.5434799999999993|124.78|7|4|0.6|2|2|1|18|8|0.4628801121855684|0.22236210404580592|3.4482133166110174|1.2290593987107328|1|0|False
78
+ 13401.0|3-Hydroxypicolinic acid|3-hydroxypyridine-2-carboxylic acid|O=C(O)c1ncccc1O|10|139.10999999999999|0.4853999999999999|70.42|3|2|0.0|1|1|1|10|0|0.5933874162122502|0.298934256110774|2.01811039231654|-0.41014214009500005|1|0|False
79
+ 4349538.0|NADH, disodium salt hydrate|[[5-(6-aminopurin-9-yl)-3,4-dihydroxyoxolan-2-yl]methoxy-oxidophosphoryl] [5-(3-carbamoyl-4H-pyridin-1-yl)-3,4-dihydroxyoxolan-2-yl]methyl phosphate|NC(=O)C1=CN(C2OC(COP(=O)([O-])OP(=O)([O-])OCC3OC(n4cnc5c(N)ncnc54)C(O)C3O)C(O)C2O)C=CC1|44|663.4300000000004|-3.950399999999993|323.2800000000001|20|6|0.5238095238095238|11|5|2|44|20|0.12490354999455602|0.11711205769249591|5.678699754875055|0.6523253136204249|0|0|False
80
+ 5625.0|Delavirdine (mesylate)|N-[2-[4-[3-(propan-2-ylamino)pyridin-2-yl]piperazine-1-carbonyl]-1H-indol-5-yl]methanesulfonamide|CC(C)Nc1cccnc1N1CCN(C(=O)c2cc3cc(NS(C)(=O)=O)ccc3[nH]2)CC1|32|456.5720000000002|2.7171000000000003|110.43|6|3|0.36363636363636365|6|4|3|32|0|0.5260863337385334|0.7849383937725847|2.5284878962420905|-1.7469754852284307|1|0|False
81
+ 254731.0|uridine triacetate|[3,4-diacetyloxy-5-(2,4-dioxopyrimidin-1-yl)oxolan-2-yl]methyl acetate|CC(=O)OCC1OC(n2ccc(=O)[nH]c2=O)C(OC(C)=O)C1OC(C)=O|26|370.31400000000014|-1.1394999999999988|142.99|10|1|0.5333333333333333|5|2|1|26|8|0.5022752412372868|0.27310239595191593|3.644050594368478|1.1717654190198035|0|0|False
82
+ 58460047.0|Doravirine (MK-1439)|3-chloro-5-[1-[(4-methyl-5-oxo-1H-1,2,4-triazol-3-yl)methyl]-2-oxo-4-(trifluoromethyl)pyridin-3-yl]oxybenzonitrile|Cn1c(Cn2ccc(C(F)(F)F)c(Oc3cc(Cl)cc(C#N)c3)c2=O)n[nH]c1=O|29|425.7540000000001|2.654580000000001|105.69999999999999|7|1|0.17647058823529413|4|3|3|29|0|0.6914051268589833|0.7336281656644693|3.0219252915309625|-1.6455219788390623|1|0|False
83
+ 76450047.0|SGC 0946|1-[3-[[5-(4-amino-5-bromopyrrolo[2,3-d]pyrimidin-7-yl)-3,4-dihydroxyoxolan-2-yl]methyl-propan-2-ylamino]propyl]-3-(4-tert-butylphenyl)urea|CC(C)N(CCCNC(=O)Nc1ccc(C(C)(C)C)cc1)CC1OC(n2cc(Br)c3c(N)ncnc32)C(O)C1O|40|618.5770000000002|3.614900000000002|150.79|9|5|0.5357142857142857|9|4|3|40|8|0.22841744599826647|0.22982258021111773|4.078247297329462|-0.7007523337719948|0|0|False
84
+ 328839.0|Zebularine|1-[3,4-dihydroxy-5-(hydroxymethyl)oxolan-2-yl]pyrimidin-2-one|O=c1ncccn1C1OC(CO)C(O)C1O|16|228.204|-2.1451999999999996|104.81000000000002|7|3|0.5555555555555556|2|2|1|16|8|0.5330176479739586|0.29115142413250616|3.5563030967366647|0.9772741509174374|1|0|False
85
+ 6451164.0|Rilpivirine|4-[[4-[4-[(E)-2-cyanoethenyl]-2,6-dimethylanilino]pyrimidin-2-yl]amino]benzonitrile|Cc1cc(C=CC#N)cc(C)c1Nc1ccnc(Nc2ccc(C#N)cc2)n1|28|366.4280000000001|4.989100000000003|97.42|6|2|0.09090909090909091|5|3|3|28|0|0.6174794624332792|0.8431090497228149|2.682510236970355|-1.1293515643212821|1|0|False
86
+ 76450046.0|EPZ004777|1-[3-[[5-(4-aminopyrrolo[2,3-d]pyrimidin-7-yl)-3,4-dihydroxyoxolan-2-yl]methyl-propan-2-ylamino]propyl]-3-(4-tert-butylphenyl)urea|CC(C)N(CCCNC(=O)Nc1ccc(C(C)(C)C)cc1)CC1OC(n2ccc3c(N)ncnc32)C(O)C1O|39|539.6810000000004|2.8524000000000007|150.79|9|5|0.5357142857142857|9|4|3|39|8|0.26053822184509995|0.24195225681633897|3.956960961496261|-0.6697887882081127|0|0|False
87
+ 4984.0|Puromycin 2HCl|2-amino-N-[5-[6-(dimethylamino)purin-9-yl]-4-hydroxy-2-(hydroxymethyl)oxolan-3-yl]-3-(4-methoxyphenyl)propanamide|COc1ccc(CC(N)C(=O)NC2C(CO)OC(n3cnc4c(N(C)C)ncnc43)C2O)cc1|34|471.5180000000002|-0.7936999999999972|160.88|11|4|0.45454545454545453|8|4|3|34|10|0.32750374532544374|0.2740730496068429|3.938746143561504|0.4059082870695618|0|0|False
88
+ 5468049.0|Triapine|[(Z)-(3-aminopyridin-2-yl)methylideneamino]thiourea|NC(=S)NN=Cc1ncccc1N|13|195.251|-0.16910000000000014|89.32|4|3|0.0|2|1|1|13|0|0.3461858313292753|0.3468357875734713|2.880743596162672|-1.803547330412908|1|0|False
89
+ 74405855.0|VER155008|4-[[5-[6-amino-8-[(3,4-dichlorophenyl)methylamino]purin-9-yl]-3,4-dihydroxyoxolan-2-yl]methoxymethyl]benzonitrile|N#Cc1ccc(COCC2OC(n3c(NCc4ccc(Cl)c(Cl)c4)nc4c(N)ncnc43)C(O)C2O)cc1|38|556.4100000000003|3.0350800000000007|164.35999999999999|11|4|0.28|8|5|4|38|8|0.2535931056491399|0.25407821914119455|3.881323921608492|-0.48547704742053677|0|0|False
90
+ 3032861.0|6-Thio-dG|2-amino-9-[4-hydroxy-5-(hydroxymethyl)oxolan-2-yl]-3H-purine-6-thione|Nc1nc(=S)c2ncn(C3CC(O)C(CO)O3)c2[nH]1|19|283.313|-0.28821000000000024|122.21000000000001|8|4|0.5|2|3|2|19|6|0.5585666589379309|0.29376434618526803|3.8831343306462536|0.7695013335480106|1|0|False
91
+ 23712387.0|8-Bromo-cAMP|6-(6-amino-8-bromopurin-9-yl)-2-oxido-2-oxo-4a,6,7,7a-tetrahydro-4H-furo[3,2-d][1,3,2]dioxaphosphinin-7-ol|Nc1ncnc2c1nc(Br)n2C1OC2COP(=O)([O-])OC2C1O|23|407.0970000000001|-0.6869000000000003|157.67|11|2|0.5|1|4|2|23|10|0.4580747351009518|0.28611028834400404|4.886716744658878|0.5502166703091175|0|0|False
92
+ 16760396.0|Dibutyryl-cAMP (Bucladesine)|[6-[6-(butanoylamino)purin-9-yl]-2-oxido-2-oxo-4a,6,7,7a-tetrahydro-4H-furo[3,2-d][1,3,2]dioxaphosphinin-7-yl] butanoate|CCCC(=O)Nc1ncnc2c1ncn2C1OC2COP(=O)([O-])OC2C1OC(=O)CCC|32|468.3830000000003|1.057999999999999|166.82|12|1|0.6111111111111112|7|4|2|32|10|0.45563025867212836|0.35203094284843184|4.558877319850212|0.39303810441434994|0|0|False
93
+ 236184.0|Bromodeoxyuridine (BrdU)|5-bromo-1-[4-hydroxy-5-(hydroxymethyl)oxolan-2-yl]pyrimidine-2,4-dione|O=c1[nH]c(=O)n(C2CC(O)C(CO)O2)cc1Br|17|307.1|-1.0602|104.55000000000001|6|3|0.5555555555555556|2|2|1|17|6|0.6458903815058055|0.3614417237796252|3.5611586768565298|0.865449848697765|1|0|False
94
+ 409805.0|NSC 23766|6-N-[2-[5-(diethylamino)pentan-2-ylamino]-6-methylpyrimidin-4-yl]-2-methylquinoline-4,6-diamine|CCN(CC)CCCC(C)Nc1nc(C)cc(Nc2ccc3nc(C)cc(N)c3c2)n1|31|421.5930000000002|4.889840000000003|91.99|7|3|0.4583333333333333|10|3|3|31|2|0.42909503459369336|0.5500110053919122|3.111060885927593|-1.299117956324184|1|0|False
95
+ 5604.0|Tubercidin|2-(4-aminopyrrolo[2,3-d]pyrimidin-7-yl)-5-(hydroxymethyl)oxolane-3,4-diol|Nc1ncnc2c1ccn2C1OC(CO)C(O)C1O|19|266.25699999999995|-1.3750000000000007|126.65|8|4|0.45454545454545453|2|3|2|19|8|0.5275658981283201|0.2793960532890429|3.649636728918657|1.0957341600283055|1|0|False
96
+ 5270.0|SQ22536|9-(oxolan-2-yl)purin-6-amine|Nc1ncnc2c1ncn2C1CCCO1|15|205.22099999999995|0.7174999999999998|78.85000000000001|6|1|0.4444444444444444|1|3|2|15|2|0.7415418091420238|0.5011659803359577|2.957435140772226|0.09555946645130006|1|0|False
97
+ 1830.0|5-Iodotubercidin|2-(4-amino-5-iodopyrrolo[2,3-d]pyrimidin-7-yl)-5-(hydroxymethyl)oxolane-3,4-diol|Nc1ncnc2c1c(I)cn2C1OC(CO)C(O)C1O|20|392.1530000000001|-0.7703999999999998|126.65|8|4|0.45454545454545453|2|3|2|20|8|0.4946949398053289|0.31898773411320597|3.8417019552413096|0.9361303704689201|1|0|False
98
+ 137795344.0|LLY-284|2-(4-aminopyrrolo[2,3-d]pyrimidin-7-yl)-5-[hydroxy(phenyl)methyl]oxolane-3,4-diol|Nc1ncnc2c1ccn2C1OC(C(O)c2ccccc2)C(O)C1O|25|342.355|0.3663000000000001|126.65|8|4|0.29411764705882354|3|4|3|25|10|0.5413254255627207|0.37218328519283805|3.804013205830268|0.7875002540522721|1|0|False
99
+ 22608122.0|A-317491|5-[(3-phenoxyphenyl)methyl-(1,2,3,4-tetrahydronaphthalen-1-yl)carbamoyl]benzene-1,2,4-tricarboxylic acid|O=C(O)c1cc(C(=O)O)c(C(=O)N(Cc2cccc(Oc3ccccc3)c2)C2CCCc3ccccc32)cc1C(=O)O|42|565.5780000000003|6.293500000000005|141.43999999999997|5|3|0.15151515151515152|9|5|4|42|2|0.21488624386430227|0.5815074358568025|3.049057713232612|-0.6471647529123546|0|1|False
100
+ 135402018.0|2'-Deoxyguanosine monohydrate|2-amino-9-[4-hydroxy-5-(hydroxymethyl)oxolan-2-yl]-1H-purin-6-one|Nc1nc2c(ncn2C2CC(O)C(CO)O2)c(=O)[nH]1|19|267.24499999999995|-1.6575000000000002|139.28|8|4|0.5|2|3|2|19|6|0.5103728382686842|0.269347900063449|3.674451892329852|0.9964049194238052|1|0|False
101
+ 3479482.0|Khasianine|2-[4,5-dihydroxy-2-(hydroxymethyl)-6-(5',7,9,13-tetramethylspiro[5-oxapentacyclo[10.8.0.02,9.04,8.013,18]icos-18-ene-6,2'-piperidine]-16-yl)oxyoxan-3-yl]oxy-6-methyloxane-3,4,5-triol|CC1CCC2(NC1)OC1CC3C4CC=C5CC(OC6OC(CO)C(OC7OC(C)C(O)C(O)C7O)C(O)C6O)CCC5(C)C4CCC3(C)C1C2C|51|721.9289999999999|1.9629000000000019|179.56|12|7|0.9487179487179487|5|8|0|51|42|0.20575693831306613|0.05320714136035518|6.299089879544575|2.778465708811112|0|0|False
102
+ 223996.0|Isoguanosine|6-amino-9-[3,4-dihydroxy-5-(hydroxymethyl)oxolan-2-yl]-1H-purin-2-one|Nc1nc(=O)[nH]c2c1ncn2C1OC(CO)C(O)C1O|20|283.244|-2.6866999999999996|159.51|9|5|0.5|2|3|2|20|8|0.39813741611480424|0.16955766100498496|3.9266040531434063|1.0924191136221846|0|0|False
103
+ 636.0|2'-Deoxyadenosine monohydrate|5-(6-aminopurin-9-yl)-2-(hydroxymethyl)oxolan-3-ol|Nc1ncnc2c1ncn2C1CC(O)C(CO)O1|18|251.24599999999998|-0.9508000000000003|119.31000000000002|8|3|0.5|2|3|2|18|6|0.628993302830891|0.36275129625779995|3.4841962284641177|1.0509928850136445|1|0|False
104
+ 224.0|5'-Adenylic acid|[5-(6-aminopurin-9-yl)-3,4-dihydroxyoxolan-2-yl]methyl dihydrogen phosphate|Nc1ncnc2c1ncn2C1OC(COP(=O)(O)O)C(O)C1O|23|347.224|-1.8630000000000009|186.07|10|5|0.5|4|3|2|23|8|0.39017854457244017|0.17004544392240475|3.8048894493072236|1.2966757882455953|0|0|False
105
+ 197.0|ADP|[5-(6-aminopurin-9-yl)-3,4-dihydroxyoxolan-2-yl]methyl phosphono hydrogen phosphate|Nc1ncnc2c1ncn2C1OC(COP(=O)(O)OP(=O)(O)O)C(O)C1O|27|427.20300000000003|-1.7460000000000009|232.59999999999997|12|6|0.5|6|3|2|27|10|0.28079075856620234|0.1854502601232538|4.125297661526227|1.3883535027781002|0|0|False
106
+ 1189.0|Xanthosine Dihydrate|9-[3,4-dihydroxy-5-(hydroxymethyl)oxolan-2-yl]-3H-purine-2,6-dione|O=c1[nH]c(=O)c2ncn(C3OC(CO)C(O)C3O)c2[nH]1|20|284.228|-2.9755999999999996|153.46|8|5|0.5|2|3|2|20|8|0.39414005568585614|0.19608362822222536|3.7807159412552958|1.01042710944925|1|0|False
107
+ 1172.0|Uridine 5'-monophosphate| [5-(2,4-dioxopyrimidin-1-yl)-3,4-dihydroxyoxolan-2-yl]methyl dihydrogen phosphate|O=c1ccn(C2OC(COP(=O)(O)O)C(O)C2O)c(=O)[nH]1|21|324.182|-2.7349|171.31|8|5|0.5555555555555556|4|2|1|21|8|0.3684552666679203|0.15597877417689962|3.7734762637985257|1.4404129497273093|0|0|False
108
+ 5270766.0|Besifovir| [1-[(2-aminopurin-9-yl)methyl]cyclopropyl]oxymethylphosphonic acid|Nc1ncc2ncn(CC3(OCP(=O)(O)O)CC3)c2n1|20|299.2270000000001|0.09289999999999987|136.38|7|3|0.5|5|3|2|20|0|0.6607224923488081|0.4978291568100905|3.3061717394473806|-0.07424561820598499|1|0|False
109
+ 135398661.0|3-Methyladenine (3-MA)|3-methyl-7H-purin-6-imine|Cn1cnc(N)c2ncnc1-2|11|149.15699999999998|-0.1029000000000001|69.62|5|1|0.16666666666666666|0|2|0|11|0|0.56733716665046|0.2032784259567452|2.723105050937429|-0.6977762718276544|1|0|False
110
+ 1188.0|2,6-Dihydroxypurine|3,7-dihydropurine-2,6-dione|O=c1[nH]c(=O)c2[nH]cnc2[nH]1|11|152.113|-1.0605000000000004|94.4|3|3|0.0|0|2|2|11|0|0.44866812387763694|0.35339783622062465|2.855621676247601|-0.5009022124224091|1|0|False
111
+ 93556.0|Dihydrothymine|5-methyl-1,3-diazinane-2,4-dione|CC1CNC(=O)NC1=O|9|128.131|-0.5381|58.2|2|2|0.6|0|1|0|9|2|0.46007898954400794|0.1326482996202467|3.3119003089141454|0.44655419103448896|1|0|False
112
+ 667490.0|6-Mercaptopurine (6-MP) Monohydrate|3,7-dihydropurine-6-thione|S=c1nc[nH]c2nc[nH]c12|10|152.182|1.0154899999999998|57.36|3|2|0.0|0|2|2|10|0|0.5538546640663037|0.401710178865939|3.3698736107073435|-0.5813401541192601|1|0|False
113
+ 5359277.0|6-Chloropurine|6-chloro-7H-purine|Clc1ncnc2[nH]cnc12|10|154.56|1.0062999999999998|54.46|3|1|0.0|0|2|2|10|0|0.5764610700745062|0.39268763760375136|2.8791181730415616|-0.8371085619823599|1|0|False
114
+ 3758.0|IBMX|1-methyl-3-(2-methylpropyl)-7H-purine-2,6-dione|CC(C)Cn1c(=O)n(C)c(=O)c2nc[nH]c21|16|222.248|0.07929999999999976|72.68|5|1|0.5|2|2|2|16|0|0.7816409579492779|0.5763174668836328|2.741826113988761|-0.5844837553057187|1|0|False
115
+ 3134.0|6-(Dimethylamino)purine|N,N-dimethyl-7H-purin-6-amine|CN(C)c1ncnc2nc[nH]c12|12|163.18400000000003|0.4189|57.7|4|1|0.2857142857142857|1|2|2|12|0|0.6614854966833106|0.4787635273265147|2.72771441208657|-1.0546352869607836|1|0|False
116
+ 66950.0|Isocytosine|2-amino-1H-pyrimidin-6-one|Nc1nccc(=O)[nH]1|8|111.10399999999998|-0.6479000000000001|71.77|3|2|0.0|0|1|1|8|0|0.4670416161550293|0.27448926713621147|2.683688749895339|-0.2670902200110874|1|0|False
117
+ 940.0|Nicotinamide Riboside Chloride (NIAGEN)|1-[3,4-dihydroxy-5-(hydroxymethyl)oxolan-2-yl]pyridin-1-ium-3-carboxamide|NC(=O)c1ccc[n+](C2OC(CO)C(O)C2O)c1|18|255.25|-2.3154999999999983|116.89000000000001|5|4|0.45454545454545453|3|2|1|18|8|0.45250669782783515|0.2942152228724691|3.86876227657962|0.937203306924639|1|0|False
118
+ 537159.0|Solasonine|2-[5-hydroxy-6-(hydroxymethyl)-2-(5',7,9,13-tetramethylspiro[5-oxapentacyclo[10.8.0.02,9.04,8.013,18]icos-18-ene-6,2'-piperidine]-16-yl)oxy-4-[3,4,5-trihydroxy-6-(hydroxymethyl)oxan-2-yl]oxyoxan-3-yl]oxy-6-methyloxane-3,4,5-triol|CC1CCC2(NC1)OC1CC3C4CC=C5CC(OC6OC(CO)C(O)C(OC7OC(CO)C(O)C(O)C7O)C6OC6OC(C)C(O)C(O)C6O)CCC5(C)C4CCC3(C)C1C2C|62|884.07|-0.2128999999999921|258.71|17|10|0.9555555555555556|8|9|0|62|52|0.14154743306960962|0.051891635005697746|6.761253268994176|2.31431842126156|0|0|False
119
+ 437080.0|Solamargine|2-[4-hydroxy-2-(hydroxymethyl)-6-(5',7,9,13-tetramethylspiro[5-oxapentacyclo[10.8.0.02,9.04,8.013,18]icos-18-ene-6,2'-piperidine]-16-yl)oxy-5-(3,4,5-trihydroxy-6-methyloxan-2-yl)oxyoxan-3-yl]oxy-6-methyloxane-3,4,5-triol|CC1CCC2(NC1)OC1CC3C4CC=C5CC(OC6OC(CO)C(OC7OC(C)C(O)C(O)C7O)C(O)C6OC6OC(C)C(O)C(O)C6O)CCC5(C)C4CCC3(C)C1C2C|61|868.071|0.8147000000000062|238.47999999999996|16|9|0.9555555555555556|7|9|0|61|52|0.16112638285309447|0.05545407431461643|6.750529522128671|2.3842641223563397|0|0|False
120
+ 240980.0|2'-Deoxy-5-Fluorocytidine|4-amino-5-fluoro-1-[4-hydroxy-5-(hydroxymethyl)oxolan-2-yl]pyrimidin-2-one|Nc1nc(=O)n(C2CC(O)C(CO)O2)cc1F|17|245.20999999999998|-1.3947000000000003|110.60000000000001|7|3|0.5555555555555556|2|2|1|17|6|0.5910181091914157|0.2952824050893773|3.606938275518738|0.7886448709701591|1|0|False
121
+ 284240.0|3'-Fluoro-3'-deoxythymidine (Alovudine)|1-[4-fluoro-5-(hydroxymethyl)oxolan-2-yl]-5-methylpyrimidine-2,4-dione|Cc1cn(C2CC(F)C(CO)O2)c(=O)[nH]c1=O|17|244.22199999999998|-0.53708|84.32|5|2|0.6|2|2|1|17|6|0.731622172408428|0.41646450691211045|3.586677740401681|0.507872196497247|1|0|False
122
+ 137795344.0|LLY-283|2-(4-aminopyrrolo[2,3-d]pyrimidin-7-yl)-5-[hydroxy(phenyl)methyl]oxolane-3,4-diol|Nc1ncnc2c1ccn2C1OC(C(O)c2ccccc2)C(O)C1O|25|342.355|0.3663000000000001|126.65|8|4|0.29411764705882354|3|4|3|25|10|0.5413254255627207|0.37218328519283805|3.804013205830268|0.7875002540522721|1|0|False
123
+ 155886644.0|EIDD-2801|[3,4-dihydroxy-5-[4-(hydroxyamino)-2-oxopyrimidin-1-yl]oxolan-2-yl]methyl 2-methylpropanoate|CC(C)C(=O)OCC1OC(n2ccc(=NO)[nH]c2=O)C(O)C1O|23|329.309|-1.7152999999999974|146.37|9|4|0.6153846153846154|4|2|1|23|8|0.29191347249939087|0.20002777173354394|4.189860275255468|1.2390387878063869|1|0|False
124
+ 1651.0|3-Deazaadenosine hydrochloride|2-(4-aminoimidazo[4,5-c]pyridin-1-yl)-5-(hydroxymethyl)oxolane-3,4-diol |Nc1nccc2c1ncn2C1OC(CO)C(O)C1O|19|266.257|-1.3750000000000007|126.65|8|4|0.45454545454545453|2|3|2|19|8|0.5275658981283201|0.2793960532890429|3.656019022097176|1.2390070639558106|1|0|False
125
+ |Uridine 5-diphosphoglucose disodium salt|[[(2R,3S,4R,5R)-5-(2,4-dioxopyrimidin-1-yl)-3,4-dihydroxyoxolan-2-yl]methoxy-[(2R,3S,4S,5R)-1,3,4,5-tetrahydroxy-6-oxohexan-2-yl]oxyphosphoryl] phosphate|O=CC(O)C(O)C(O)C(CO)OP(=O)(OCC1OC(n2ccc(=O)[nH]c2=O)C(O)C1O)OP(=O)([O-])[O-]|36|564.2860000000002|-6.215099999999993|310.48999999999995|18|7|0.6666666666666666|13|2|1|36|18|0.08650496257970032|0.08720964070568987|5.369017676264923|1.3690714143354032|0|0|False
126
+ 70639.0|3-Methylxanthine|3-methyl-7H-purine-2,6-dione|Cn1c(=O)[nH]c(=O)c2[nH]cnc21|12|166.14|-1.0501000000000005|83.54|4|2|0.16666666666666666|0|2|2|12|0|0.5233953855323817|0.4649418539506477|2.6826240788903544|-0.9379367321269417|1|0|False
127
+ 72386069.0|Acelarin (NUC-1031)|benzyl 2-[[[5-(4-amino-2-oxopyrimidin-1-yl)-4,4-difluoro-3-hydroxyoxolan-2-yl]methoxy-phenoxyphosphoryl]amino]propanoate|CC(NP(=O)(OCC1OC(n2ccc(N)nc2=O)C(F)(F)C1O)Oc1ccccc1)C(=O)OCc1ccccc1|40|580.4810000000003|2.6443000000000003|164.22999999999996|11|3|0.32|11|4|3|40|10|0.22561375399287723|0.27837300531820436|4.196538827820978|0.34838553412737994|0|0|False
128
+ 135401907.0|Allopurinol|1,5-dihydropyrazolo[3,4-d]pyrimidin-4-one|O=c1ncnc2[nH][nH]cc1-2|10|136.11399999999998|-0.4022000000000001|74.43|3|2|0.0|0|2|0|10|0|0.5169670612450268|0.16887078714484646|3.3223383321927544|0.49540116952077007|1|0|False
129
+ 3366.0|Flucytosine|6-amino-5-fluoro-1H-pyrimidin-2-one|Nc1nc(=O)[nH]cc1F|9|129.09399999999997|-0.5088000000000001|71.77|3|2|0.0|0|1|1|9|0|0.4952172017431901|0.27854681358196376|3.1539953149178643|-0.7210492950621221|1|0|False
130
+ 60871.0|Adefovir Dipivoxil|[2-(6-aminopurin-9-yl)ethoxymethyl-(2,2-dimethylpropanoyloxymethoxy)phosphoryl]oxymethyl 2,2-dimethylpropanoate|CC(C)(C)C(=O)OCOP(=O)(COCCn1cnc2c(N)ncnc21)OCOC(=O)C(C)(C)C|34|501.47700000000026|2.7025000000000006|166.97999999999996|13|1|0.65|11|2|2|34|0|0.2071524581096599|0.26242117414727406|3.2764101894129833|-0.33118745107923225|0|0|False
131
+ 2265.0|Azathioprine| 6-(3-methyl-5-nitroimidazol-4-yl)sulfanyl-7H-purine|Cn1cnc([N+](=O)[O-])c1Sc1ncnc2nc[nH]c12|19|277.269|1.1457999999999997|115.41999999999999|8|1|0.1111111111111111|3|3|3|19|0|0.43328794180573177|0.5104763505055192|3.144692533268106|-1.257220744395016|1|0|False
132
+ 2723601.0|Thioguanine|2-amino-3,7-dihydropurine-6-thione|Nc1nc(=S)c2[nH]cnc2[nH]1|11|167.197|0.5976899999999998|83.38|4|3|0.0|0|2|2|11|0|0.5014913838271434|0.43666862873652124|3.3037189467190657|-0.34551430559848184|1|0|False
133
+ 135483437.0|Valaciclovir HCl|2-[(2-amino-6-oxo-1H-purin-9-yl)methoxy]ethyl 2-amino-3-methylbutanoate|CC(C)C(N)C(=O)OCCOCn1cnc2c(=O)[nH]c(N)nc21|23|324.341|-0.7975999999999985|151.14000000000004|9|3|0.5384615384615384|7|2|2|23|2|0.44214276395859164|0.3730958971915234|3.2609439427942917|-0.16316017429853905|1|0|False
134
+ 657298.0|Propylthiouracil|6-propyl-2-sulfanylidene-1H-pyrimidin-4-one|CCCc1cc(=O)[nH]c(=S)[nH]1|11|170.237|1.38499|48.65|2|2|0.42857142857142855|2|1|1|11|0|0.6587322532597096|0.2559861332659623|2.683909567066461|-0.9512439041419182|1|0|False
135
+ 3830.0|Kinetin|N-(furan-2-ylmethyl)-7H-purin-6-amine|c1coc(CNc2ncnc3nc[nH]c23)c1|16|215.216|1.5579999999999996|79.63|5|2|0.1|3|3|3|16|0|0.6925924860226649|0.711699598509513|2.490935013809871|-1.402265304972444|1|0|False
136
+ 3324.0|Famciclovir|[2-(acetyloxymethyl)-4-(2-aminopurin-9-yl)butyl] acetate|CC(=O)OCC(CCn1cnc2cnc(N)nc21)COC(C)=O|23|321.33700000000016|0.5409999999999998|122.22|9|1|0.5|7|2|2|23|0|0.733115342152729|0.45204840142043734|2.806858282589218|-0.19600642021383782|1|0|False
137
+ 1269845.0|2-Thiouracil|2-sulfanylidene-1H-pyrimidin-4-one |O=c1cc[nH]c(=S)[nH]1|8|128.156|0.43249|48.65|2|2|0.0|0|1|1|8|0|0.49770208173400393|0.21652997475728783|3.059775463182052|-0.5315741715414|1|0|False
138
+ 667493.0|Methylthiouracil|6-methyl-2-sulfanylidene-1H-pyrimidin-4-one|Cc1cc(=O)[nH]c(=S)[nH]1|9|142.18300000000002|0.74091|48.65|2|2|0.2|0|1|1|9|0|0.5247689157489369|0.2203039974925121|3.0118414687640174|-1.153615214888089|1|0|False
139
+ 1174.0|Uracil|1H-pyrimidine-2,4-dione|O=c1cc[nH]c(=O)[nH]1|8|112.088|-0.9368000000000001|65.72|2|2|0.0|0|1|1|8|0|0.4546939811074878|0.22106494195155435|2.763417421224011|0.034934357644975|1|0|False
140
+ 62389.0|6-Benzylaminopurine|N-benzyl-7H-purin-6-amine|c1ccc(CNc2ncnc3nc[nH]c23)cc1|17|225.25500000000002|1.9649999999999996|66.49000000000001|4|2|0.08333333333333333|3|3|3|17|0|0.7147392861192281|0.6930151711707544|2.118574274148248|-0.9996364494840941|1|0|False
141
+ 597.0|Cytosine|6-amino-1H-pyrimidin-2-one|Nc1cc[nH]c(=O)n1|8|111.104|-0.6479000000000001|71.77000000000001|3|2|0.0|0|1|1|8|0|0.4670416161550293|0.27448926713621147|3.295071966678556|0.31089129653276254|1|0|False
142
+ 4564.0|NU2058|6-(cyclohexylmethoxy)-7H-purin-2-amine|Nc1nc(OCC2CCCCC2)c2nc[nH]c2n1|18|247.30199999999996|1.8941999999999999|89.71|5|2|0.5833333333333334|3|3|2|18|0|0.8640548613284358|0.6675384454567892|2.7087684013184763|-0.5693644300994334|1|0|False
143
+ 99920.0|Adenosine Dialdehyde (ADOX)|2-[1-(6-aminopurin-9-yl)-2-oxoethoxy]-3-hydroxypropanal|Nc1ncnc2c1ncn2C(C=O)OC(C=O)CO|19|265.229|-1.3176|133.22|9|2|0.3|6|2|2|19|4|0.6142899241146795|0.40414391704379254|4.0613930320623375|0.27190180955298426|1|0|False
144
+ 1135.0|Thymine|5-methyl-1H-pyrimidine-2,4-dione|Cc1c[nH]c(=O)[nH]c1=O|9|126.115|-0.6283800000000002|65.72|2|2|0.2|0|1|1|9|0|0.4857813089921775|0.22442590613459015|2.4711895456870963|-0.39816338972551113|1|0|False
145
+ 393593.0|Namodenoson (CF-102)|5-[2-chloro-6-[(3-iodophenyl)methylamino]purin-9-yl]-3,4-dihydroxy-N-methyloxolane-2-carboxamide|CNC(=O)C1OC(n2cnc3c(NCc4cccc(I)c4)nc(Cl)nc32)C(O)C1O|30|544.7370000000002|1.061599999999999|134.42|9|4|0.3333333333333333|5|4|3|30|8|0.27679674050364184|0.3780075367245393|3.858289643137513|-0.35311044843938993|0|0|False
146
+ 190.0|Adenine|7H-purin-6-amine|Nc1ncnc2[nH]cnc12|10|135.13|-0.0648999999999999|80.48|4|2|0.0|0|2|2|10|0|0.5296759293780585|0.46383358553871024|2.745044432988511|-0.22216883360209003|1|0|False
147
+ 22041878.0|2'-Deoxyuridine 5'-monophosphate disodium salt|[5-(2,4-dioxopyrimidin-1-yl)-3-hydroxyoxolan-2-yl]methyl phosphate|O=c1ccn(C2CC(O)C(COP(=O)([O-])[O-])O2)c(=O)[nH]1|20|306.167|-2.9696999999999987|156.74|9|2|0.5555555555555556|4|2|1|20|6|0.5581147155997511|0.2665509998411342|4.253945014153231|0.8390224194285251|1|0|False
148
+ 621.0|2'-Deoxyadenosine 5'-monophosphate|[5-(6-aminopurin-9-yl)-3-hydroxyoxolan-2-yl]methyl dihydrogen phosphate|Nc1ncnc2c1ncn2C1CC(O)C(COP(=O)(O)O)O1|22|331.225|-0.8338000000000003|165.83999999999997|9|4|0.5|4|3|2|22|6|0.518964726677587|0.2703973142533638|3.7330177714088277|1.0058182745873498|0|0|False
149
+ 624.0|2'-Deoxycytidine 5'-monophosphate|[5-(4-amino-2-oxopyrimidin-1-yl)-3-hydroxyoxolan-2-yl]methyl dihydrogen phosphate|Nc1ccn(C2CC(O)C(COP(=O)(O)O)O2)c(=O)n1|20|307.199|-1.4168|157.13|8|4|0.5555555555555556|4|2|1|20|6|0.48991422293348935|0.21590188566756152|3.7558655269737438|1.3705532333776147|1|0|False
150
+ 193.0|S-Adenosyl-L-homocysteine (SAH)|2-amino-4-[[5-(6-aminopurin-9-yl)-3,4-dihydroxyoxolan-2-yl]methylsulfanyl]butanoic acid|Nc1ncnc2c1ncn2C1OC(CSCCC(N)C(=O)O)C(O)C1O|26|384.41800000000006|-1.4370999999999983|182.62999999999997|11|5|0.5714285714285714|7|3|2|26|10|0.3524104766503097|0.1706266479700945|3.9878562207640913|0.8322716321994499|0|0|False
151
+ 5513.0|Tomatine|2-[2-[4,5-dihydroxy-2-(hydroxymethyl)-6-(5',7,9,13-tetramethylspiro[5-oxapentacyclo[10.8.0.02,9.04,8.013,18]icosane-6,2'-piperidine]-16-yl)oxyoxan-3-yl]oxy-5-hydroxy-6-(hydroxymethyl)-4-(3,4,5-trihydroxyoxan-2-yl)oxyoxan-3-yl]oxy-6-(hydroxymethyl)oxane-3,4,5-triol|CC1CCC2(NC1)OC1CC3C4CCC5CC(OC6OC(CO)C(OC7OC(CO)C(O)C(OC8OCC(O)C(O)C8O)C7OC7OC(CO)C(O)C(O)C7O)C(O)C6O)CCC5(C)C4CCC3(C)C1C2C|72|1034.2000000000003|-2.6972999999999807|337.86|22|13|1.0|11|10|0|72|62|0.09370727646314425|0.03741186437663462|7.196015861300958|2.0025666159277615|0|0|False
152
+ 4739.0|Pentostatin|3-[4-hydroxy-5-(hydroxymethyl)oxolan-2-yl]-7,8-dihydro-4H-imidazo[4,5-d][1,3]diazepin-8-ol|OCC1OC(n2cnc3c2NC=NCC3O)CC1O|19|268.273|-0.9890000000000005|112.13000000000001|8|4|0.6363636363636364|2|3|1|19|8|0.5477635402256686|0.20674293672099894|4.441045872605159|1.0618509123565736|1|0|False
153
+ 263976.0|2',3'-Dideoxyadenosine|[5-(6-aminopurin-9-yl)oxolan-2-yl]methanol|Nc1ncnc2c1ncn2C1CCC(CO)O1|17|235.24699999999996|0.07839999999999986|99.08000000000001|7|2|0.5|2|3|2|17|4|0.7642641295225605|0.4804378807684162|3.37615943374439|0.818243690205506|1|0|False
154
+ 1599.0|Puromycin aminonucleoside|4-amino-2-[6-(dimethylamino)purin-9-yl]-5-(hydroxymethyl)oxolan-3-ol|CN(C)c1ncnc2c1ncn2C1OC(CO)C(N)C1O|21|294.315|-1.5297999999999983|122.55000000000001|9|3|0.5833333333333334|3|3|2|21|8|0.6229174346076342|0.35532968935631826|3.892502883730317|0.5775648178855525|1|0|False
155
+ 3461.0|Gemcitabine|4-amino-1-[3,3-difluoro-4-hydroxy-5-(hydroxymethyl)oxolan-2-yl]pyrimidin-2-one|Nc1ccn(C2OC(CO)C(O)C2(F)F)c(=O)n1|18|263.2|-1.2886|110.60000000000001|7|3|0.5555555555555556|2|2|1|18|6|0.6120873727300361|0.30156266317008307|3.8632725293690937|1.2986545260508495|1|0|False
156
+ 4213.0|Mizoribine|1-[3,4-dihydroxy-5-(hydroxymethyl)oxolan-2-yl]-5-hydroxyimidazole-4-carboxamide|NC(=O)c1ncn(C2OC(CO)C(O)C2O)c1O|18|259.218|-2.7009|151.06|8|5|0.5555555555555556|3|2|1|18|8|0.3957886051129175|0.147172771208249|3.773041201226402|0.9518781068023058|0|0|False
157
+ 596.0|Cytarabine|4-amino-1-[3,4-dihydroxy-5-(hydroxymethyl)oxolan-2-yl]pyrimidin-2-one|Nc1ccn(C2OC(CO)C(O)C2O)c(=O)n1|17|243.21900000000002|-2.563|130.82999999999998|8|4|0.5555555555555556|2|2|1|17|8|0.4489304892314893|0.18860344125062514|3.548894614600127|1.6478124261033116|1|0|False
158
+ 24208296.0|ATP disodium|[[[5-(6-aminopurin-9-yl)-3,4-dihydroxyoxolan-2-yl]methoxy-hydroxyphosphoryl]oxy-oxidophosphoryl] hydrogen phosphate|Nc1ncnc2c1ncn2C1OC(COP(=O)(O)OP(=O)([O-])OP(=O)([O-])O)C(O)C1O|31|505.1660000000001|-2.8929999999999993|284.79|16|5|0.5|8|3|2|31|14|0.22489036963823456|0.2003365594519469|5.011918199029612|1.0168482980546354|0|0|False
159
+ 925.0|NAD+| [[5-(6-aminopurin-9-yl)-3,4-dihydroxyoxolan-2-yl]methoxy-hydroxyphosphoryl] [5-(3-carbamoylpyridin-1-ium-1-yl)-3,4-dihydroxyoxolan-2-yl]methyl phosphate|NC(=O)c1ccc[n+](C2OC(COP(=O)([O-])OP(=O)(O)OCC3OC(n4cnc5c(N)ncnc54)C(O)C3O)C(O)C2O)c1|44|663.4300000000004|-3.6478999999999964|321.0900000000001|18|7|0.47619047619047616|11|5|3|44|20|0.0767773571622719|0.12396315529857062|5.338067205606141|0.7011583949037886|0|0|False
160
+ 9896099.0|Abacavir sulfate|[(1S,4R)-4-[2-amino-6-(cyclopropylamino)purin-9-yl]cyclopent-2-en-1-yl]methanol|Nc1nc(NC2CC2)c2ncn(C3C=CC(CO)C3)c2n1|21|286.33900000000006|1.0922999999999998|101.88|7|3|0.5|4|4|2|21|4|0.7272302824304794|0.532844209200051|3.688999973120933|0.026597937866581003|1|0|False
161
+ 3743107.0|Citicoline sodium|[[5-(4-amino-2-oxopyrimidin-1-yl)-3,4-dihydroxyoxolan-2-yl]methoxy-oxidophosphoryl] 2-(trimethylazaniumyl)ethyl phosphate|C[N+](C)(C)CCOP(=O)([O-])OP(=O)([O-])OCC1OC(n2ccc(N)nc2=O)C(O)C1O|31|487.3190000000002|-2.862499999999996|218.55|14|3|0.7142857142857143|10|2|1|31|12|0.22357629050096392|0.11977524271877953|5.089091608912945|1.1662562054042096|0|0|False
162
+ 156610574.0|Cangrelor Tetrasodium|[dichloro(phosphonato)methyl]-[[3,4-dihydroxy-5-[6-(2-methylsulfanylethylamino)-2-(3,3,3-trifluoropropylsulfanyl)purin-9-yl]oxolan-2-yl]methoxy-oxidophosphoryl]oxyphosphinate|CSCCNc1nc(SCCC(F)(F)F)nc2c1ncn2C1OC(COP(=O)([O-])OP(=O)([O-])C(Cl)(Cl)P(=O)([O-])[O-])C(O)C1O|44|772.3340000000004|0.32249999999999895|267.23|19|3|0.7058823529411765|15|3|2|44|12|0.0723261808950979|0.21464495331026204|5.531489339568375|-0.1466218200172386|0|0|False
163
+ 3986128.0|Flavin mononucleotide|[5-(7,8-dimethyl-2,4-dioxobenzo[g]pteridin-10-yl)-2,3,4-trihydroxypentyl] hydrogen phosphate|Cc1cc2nc3c(=O)[nH]c(=O)nc-3n(CC(O)C(O)C(O)COP(=O)([O-])O)c2cc1C|31|455.3400000000002|-2.2385599999999988|210.92|11|5|0.4117647058823529|7|3|1|31|8|0.18980818802860375|0.10778902916070311|4.425392476983317|0.08008486944120105|0|0|False
164
+ 4661174.0|?-Nicotinamide Mononucleotide|[5-(3-carbamoylpyridin-1-ium-1-yl)-3,4-dihydroxyoxolan-2-yl]methyl hydrogen phosphate|NC(=O)c1ccc[n+](C2OC(COP(=O)([O-])O)C(O)C2O)c1|22|334.22099999999995|-2.830499999999999|166.25|7|4|0.45454545454545453|5|2|1|22|10|0.33008457497205185|0.2491576086775115|4.570243080664501|0.7442067405707499|1|0|False
165
+ 13013858.0|Citicholine|[[5-(4-amino-2-oxopyrimidin-1-yl)-3,4-dihydroxyoxolan-2-yl]methoxy-hydroxyphosphoryl] 2-(trimethylazaniumyl)ethyl phosphate|C[N+](C)(C)CCOP(=O)([O-])OP(=O)(O)OCC1OC(n2ccc(N)nc2=O)C(O)C1O|31|488.32700000000017|-2.2304999999999957|215.71999999999997|13|4|0.7142857142857143|10|2|1|31|12|0.20278188342870285|0.09819520093202008|4.814314685836021|1.371965225840339|0|0|False
166
+ 314.0|5'-Cytidylic acid|[5-(4-amino-2-oxopyrimidin-1-yl)-3,4-dihydroxyoxolan-2-yl]methyl dihydrogen phosphate|Nc1ccn(C2OC(COP(=O)(O)O)C(O)C2O)c(=O)n1|21|323.19800000000004|-2.446|177.36|9|5|0.5555555555555556|4|2|1|21|8|0.3735901348140529|0.13458832466329937|3.826860604457865|1.5683275858392525|0|0|False
167
+ 5644.0|UTP, Trisodium Salt|[[[5-(2,4-dioxopyrimidin-1-yl)-3,4-dihydroxyoxolan-2-yl]methoxy-oxidophosphoryl]oxy-oxidophosphoryl] hydrogen phosphate|O=c1ccn(C2OC(COP(=O)([O-])OP(=O)([O-])OP(=O)([O-])O)C(O)C2O)c(=O)[nH]1|29|481.1160000000001|-4.396899999999998|272.86|15|4|0.5555555555555556|8|2|1|29|14|0.2551718313877904|0.18854093107244396|5.328826360999949|0.9171505343682239|0|0|False
168
+ 136005379.0|Inosine 5?-triphosphate trisodium salt|[[[3,4-dihydroxy-5-(6-oxo-1H-purin-9-yl)oxolan-2-yl]methoxy-oxidophosphoryl]oxy-oxidophosphoryl] hydrogen phosphate|O=c1[nH]cnc2c1ncn2C1OC(COP(=O)([O-])OP(=O)([O-])OP(=O)([O-])O)C(O)C1O|31|505.1420000000001|-3.8139|281.57|16|4|0.5|8|3|2|31|14|0.2500591032919711|0.2656860254054147|5.422037939530075|0.769043034145274|0|0|False
169
+ 135441845.0|Guanosine 5'-monophosphate disodium salt|[5-(2-amino-6-oxo-1H-purin-9-yl)-3,4-dihydroxyoxolan-2-yl]methyl phosphate|Nc1nc(=O)c2ncn(C3OC(COP(=O)([O-])[O-])C(O)C3O)c2[nH]1|24|361.20700000000005|-3.8336999999999994|211.7|12|4|0.5|4|3|2|24|8|0.38704670523942886|0.17415969497343173|4.404662987508528|0.877560502907929|0|0|False
170
+ 197.0|Adenosine 5?-diphosphate sodium salt|[5-(6-aminopurin-9-yl)-3,4-dihydroxyoxolan-2-yl]methyl phosphono hydrogen phosphate|Nc1ncnc2c1ncn2C1OC(COP(=O)(O)OP(=O)(O)O)C(O)C1O|27|427.20300000000003|-1.7460000000000009|232.59999999999997|12|6|0.5|6|3|2|27|10|0.28079075856620234|0.1854502601232538|4.125297661526227|1.3883535027781002|0|0|False
171
+ 44134852.0|Triphosphopyridine nucleotide disodium salt|[2-(6-aminopurin-9-yl)-5-[[[[5-(3-carbamoylpyridin-1-ium-1-yl)-3,4-dihydroxyoxolan-2-yl]methoxy-oxidophosphoryl]oxy-oxidophosphoryl]oxymethyl]-4-hydroxyoxolan-3-yl] hydrogen phosphate|NC(=O)c1ccc[n+](C2OC(COP(=O)([O-])OP(=O)([O-])OCC3OC(n4cnc5c(N)ncnc54)C(OP(=O)([O-])O)C3O)C(O)C2O)c1|48|741.3930000000005|-4.794899999999992|373.2800000000001|21|6|0.47619047619047616|13|5|3|48|22|0.07066446785799431|0.11623903144299709|5.927167227885624|0.5859635499812229|0|0|False
172
+ 137795696.0|Diquafosol Tetrasodium|[[5-(2,4-dioxopyrimidin-1-yl)-3,4-dihydroxyoxolan-2-yl]methoxy-oxidophosphoryl] [[[5-(2,4-dioxopyrimidin-1-yl)-3,4-dihydroxyoxolan-2-yl]methoxy-oxidophosphoryl]oxy-oxidophosphoryl] phosphate|O=c1ccn(C2OC(COP(=O)([O-])OP(=O)([O-])OP(=O)([O-])OP(=O)([O-])OCC3OC(n4ccc(=O)[nH]c4=O)C(O)C3O)C(O)C2O)c(=O)[nH]1|49|786.2750000000004|-6.718199999999994|415.7700000000001|25|6|0.5555555555555556|14|4|2|49|24|0.09690594623547874|0.1005939099621791|6.025108139184198|0.5218458453224182|0|0|False
173
+ 258.0|Blasticidin S|3-[[3-amino-5-[carbamimidoyl(methyl)amino]pentanoyl]amino]-6-(4-amino-2-oxopyrimidin-1-yl)-3,6-dihydro-2H-pyran-2-carboxylic acid |CN(CCC(N)CC(=O)NC1C=CC(n2ccc(N)nc2=O)OC1C(=O)O)C(=N)N|30|422.44600000000025|-2.2187299999999945|215.67|9|6|0.47058823529411764|8|2|1|30|8|0.150577431129479|0.10154891406114437|4.490427372075242|1.0509590801570832|0|0|False
174
+ 135545622.0|Disodium 5'-Inosinate|[3,4-dihydroxy-5-(6-oxo-1H-purin-9-yl)oxolan-2-yl]methyl phosphate|O=c1[nH]cnc2c1ncn2C1OC(COP(=O)([O-])[O-])C(O)C1O|23|346.192|-3.4158999999999984|185.67999999999998|11|3|0.5|4|3|2|23|8|0.4706220858439375|0.2667159039917633|4.448493844911617|0.9150235712723259|0|0|False
175
+ 4014956.0|Disodium uridine-5'-monophosphate|[5-(2,4-dioxopyrimidin-1-yl)-3,4-dihydroxyoxolan-2-yl]methyl phosphate|O=c1ccn(C2OC(COP(=O)([O-])[O-])C(O)C2O)c(=O)[nH]1|21|322.166|-3.9988999999999972|176.97|10|3|0.5555555555555556|4|2|1|21|8|0.4658768727043418|0.2057858099091487|4.292027087974349|1.133455884639928|0|0|False
176
+ 24208296.0|Adenosine disodium triphosphate|[[[5-(6-aminopurin-9-yl)-3,4-dihydroxyoxolan-2-yl]methoxy-hydroxyphosphoryl]oxy-oxidophosphoryl] hydrogen phosphate|Nc1ncnc2c1ncn2C1OC(COP(=O)(O)OP(=O)([O-])OP(=O)([O-])O)C(O)C1O|31|505.1660000000001|-2.8929999999999993|284.79|16|5|0.5|8|3|2|31|14|0.22489036963823456|0.2003365594519469|5.011918199029612|1.0168482980546354|0|0|False
177
+ 65040.0|5-Methylcytosine|6-amino-5-methyl-1H-pyrimidin-2-one|Cc1cnc(=O)[nH]c1N|9|125.131|-0.33948000000000017|71.77|3|2|0.2|0|1|1|9|0|0.4979194736965626|0.27879359761277905|2.819639545687096|-0.4087731108036112|1|0|False
178
+ 135432442.0|Guanosine 5'-triphosphate trisodium salt|[[[5-(2-amino-6-oxo-1H-purin-9-yl)-3,4-dihydroxyoxolan-2-yl]methoxy-hydroxyphosphoryl]oxy-oxidophosphoryl] phosphate|Nc1nc2c(ncn2C2OC(COP(=O)(O)OP(=O)([O-])OP(=O)([O-])[O-])C(O)C2O)c(=O)[nH]1|32|520.1570000000002|-4.231699999999997|307.59|17|5|0.5|8|3|2|32|12|0.2058114066028432|0.198961655563169|5.131529164316129|0.9874606492575717|0|0|False
179
+ 4073694.0|Thymidine 5'-monophosphate disodium salt|[3-hydroxy-5-(5-methyl-2,4-dioxopyrimidin-1-yl)oxolan-2-yl]methyl phosphate|Cc1cn(C2CC(O)C(COP(=O)([O-])[O-])O2)c(=O)[nH]c1=O|21|320.194|-2.6612799999999988|156.73999999999998|9|2|0.6|4|2|1|21|6|0.5689477538848637|0.27086147634366586|4.168970366666491|0.6891982972581047|1|0|False
180
+ 135545622.0|Disodium 5'-inosinate monohydrate|[3,4-dihydroxy-5-(6-oxo-1H-purin-9-yl)oxolan-2-yl]methyl phosphate|O=c1[nH]cnc2c1ncn2C1OC(COP(=O)([O-])[O-])C(O)C1O|23|346.192|-3.4158999999999984|185.67999999999998|11|3|0.5|4|3|2|23|8|0.4706220858439375|0.2667159039917633|4.448493844911617|0.9150235712723259|0|0|False
181
+ 44287897.0|Uridine-5'-diphosphate disodium salt|[[5-(2,4-dioxopyrimidin-1-yl)-3,4-dihydroxyoxolan-2-yl]methoxy-oxidophosphoryl] hydrogen phosphate |O=c1ccn(C2OC(COP(=O)([O-])OP(=O)([O-])O)C(O)C2O)c(=O)[nH]1|25|402.14500000000004|-3.881899999999999|223.49999999999997|12|4|0.5555555555555556|6|2|1|25|12|0.33530080341395063|0.10747213319277059|4.854100270601977|1.0638946198671397|0|0|False
182
+ 238.0|ATP|[[5-(6-aminopurin-9-yl)-3,4-dihydroxyoxolan-2-yl]methoxy-hydroxyphosphoryl] phosphono hydrogen phosphate|Nc1ncnc2c1ncn2C1OC(COP(=O)(O)OP(=O)(O)OP(=O)(O)O)C(O)C1O|31|507.1820000000001|-1.629000000000001|279.13|14|7|0.5|8|3|2|31|12|0.19742203378388068|0.16844205143530697|4.41361201850629|1.3438955794867324|0|0|False
183
+ 135398638.0|Hypoxanthine|1,7-dihydropurin-6-one|O=c1[nH]cnc2nc[nH]c12|10|136.114|-0.35380000000000006|74.43|3|2|0.0|0|2|2|10|0|0.5192368565638923|0.39726961313423786|2.9399791809991207|-0.25903651864364996|1|0|False
184
+ 312827.0|Tomatidine|5',7,9,13-tetramethylspiro[5-oxapentacyclo[10.8.0.02,9.04,8.013,18]icosane-6,2'-piperidine]-16-ol|CC1CCC2(NC1)OC1CC3C4CCC5CC(O)CCC5(C)C4CCC3(C)C1C2C|30|415.66200000000026|5.366800000000006|41.489999999999995|3|2|1.0|0|6|0|30|24|0.5589811430146876|0.21002733429425308|5.369563102779233|2.885757885743727|0|0|False
185
+ 9449.0|Adenine sulfate|7H-purin-6-amine|Nc1ncnc2nc[nH]c12|10|135.13|-0.0648999999999999|80.47999999999999|4|2|0.0|0|2|2|10|0|0.5296759293780585|0.46383358553871024|2.6602057061980595|-0.48016737706869|1|0|False
TransAntivirus/download_pubchem/download.sh ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+
3
+ MIN=0
4
+ MAX=1555
5
+
6
+ PREFIX="ftp://ftp.ncbi.nlm.nih.gov/pubchem/Compound/CURRENT-Full/XML/"
7
+ # fill this in
8
+ #DOWNLOAD_DIR=/user4/c5t5-main/download_pubchem/
9
+ DOWNLOAD_DIR=/root/autodl-tmp/c5t5-main/download_pubchem/
10
+
11
+ prev_num="0000"
12
+ for i in $(seq $MIN 5 $MAX); do
13
+ num=$(printf "%04d" $i)
14
+ fn="Compound_${prev_num}00001_${num}00000.xml"
15
+ prev_num=$num
16
+ echo "getting" $fn
17
+ if ! [[ -f $DOWNLOAD_DIR$fn ]]; then
18
+ orig_dir=$(pwd)
19
+ cd $DOWNLOAD_DIR
20
+ wget "${PREFIX}${fn}.gz"
21
+ wget "${PREFIX}${fn}.gz.md5"
22
+ if md5sum -c ${fn}.gz.md5; then
23
+ echo md5 passed
24
+ rm ${fn}.gz.md5
25
+ #gunzip $fn
26
+ pigz -d -p 8 $fn
27
+ else
28
+ echo md5 failed
29
+ fi
30
+ cd $orig_dir
31
+ fi
32
+ python extract_info.py $DOWNLOAD_DIR$fn "<PC-Compound>" Preferred 11 34 -26 Traditional 11 34 -26 "Canonical<" 11 34 -26 Mass 12 34 -26 Formula 11 34 -26 "Log P" 11 34 -26 >> ${DOWNLOAD_DIR}iupacs_properties.txt
33
+ rm $DOWNLOAD_DIR$fn
34
+ done
TransAntivirus/download_pubchem/extract_info.py ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+ import copy
3
+ from multiprocessing import Pool
4
+ import numpy as np
5
+ import itertools
6
+
7
+ # look through fn for all the provided search terms (keys),
8
+ # and extract values as directed by the offset, start & end cols
9
+
10
+ # example usage to get IUPAC:
11
+ # python extract_mass_formula.py Compounds.xml <PC-Compound> Systematic 11 34 -26
12
+
13
+ # Systematic 11 34 -26
14
+ # Mass 12 34 -26
15
+ # Formula 11 34 -26
16
+ # Log P 11 34 -26
17
+
18
+ # chemicals are separated by the 2nd arg
19
+
20
+ LINES_PER_PROC = 10000
21
+
22
+ fn = sys.argv[1]
23
+
24
+ assert len(sys.argv) > 3, "need to provide search terms, etc."
25
+ assert len(sys.argv[3:]) % 4 == 0, "each search term needs offset & cols"
26
+
27
+ chemical_separator = sys.argv[2]
28
+
29
+ search_terms = []
30
+ line_offsets = []
31
+ start_cols = []
32
+ end_cols = []
33
+
34
+ for i in range(3, len(sys.argv), 4):
35
+ search_terms.append(sys.argv[i])
36
+ line_offsets.append(int(sys.argv[i+1]))
37
+ start_cols.append(int(sys.argv[i+2]))
38
+ end_cols.append(int(sys.argv[i+3]))
39
+
40
+ lines = []
41
+
42
+ def find_relevant(start_line):
43
+ relevant_lines = []
44
+ max_length = len(lines)
45
+ for i in range(LINES_PER_PROC):
46
+ if start_line + i >= max_length:
47
+ return relevant_lines
48
+ line = lines[start_line + i]
49
+ if chemical_separator in line:
50
+ relevant_lines.append(start_line + i)
51
+ for search_term in search_terms:
52
+ if search_term in line:
53
+ relevant_lines.append(start_line + i)
54
+ return relevant_lines
55
+
56
+ with open(fn, "r") as xml_file:
57
+ # first line is headers
58
+ found_values = copy.deepcopy(search_terms)
59
+
60
+ lines = xml_file.readlines()
61
+
62
+ p = Pool(32)
63
+ relevant_lines = p.map(find_relevant,
64
+ range(0, len(lines), LINES_PER_PROC))
65
+ relevant_lines = itertools.chain.from_iterable(relevant_lines)
66
+ relevant_lines = np.array(list(relevant_lines))
67
+
68
+ for i in relevant_lines:
69
+ line = lines[i]
70
+ if chemical_separator in line:
71
+ # new chemical -- reset search term lines & found_values
72
+ print("|".join(found_values))
73
+ found_values = ["" for _ in search_terms]
74
+ continue
75
+
76
+ for j, search_term in enumerate(search_terms):
77
+ if search_term in line:
78
+ # found the jth search term on line i
79
+ found = i + line_offsets[j]
80
+ found_values[j] = lines[found][start_cols[j]:end_cols[j]]
81
+
82
+
TransAntivirus/download_pubchem/finetunev1_new.csv ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ smiles|aLogP|canonical_smiles|CanonicalSMILES|IUPACName|XLogP
2
+ CCN(CC)CCCC(C)Nc1ccnc2cc(Cl)ccc12|4.810600000000004|CCN(CC)CCCC(C)Nc1ccnc2cc(Cl)ccc12|CCN(CC)CCCC(C)NC1=C2C=CC(=CC2=NC=C1)Cl|4-N-(7-chloroquinolin-4-yl)-1-N,1-N-diethylpentane-1,4-diamine|4.6
3
+ Cc1cccc(C)c1OCC(=O)NC(Cc1ccccc1)C(O)CC(Cc1ccccc1)NC(=O)C(C(C)C)N1CCCNC1=O|4.328140000000003|Cc1cccc(C)c1OCC(=O)NC(Cc1ccccc1)C(O)CC(Cc1ccccc1)NC(=O)C(C(C)C)N1CCCNC1=O|CC1=C(C(=CC=C1)C)OCC(=O)NC(CC2=CC=CC=C2)C(CC(CC3=CC=CC=C3)NC(=O)C(C(C)C)N4CCCNC4=O)O|N-[5-[[2-(2,6-dimethylphenoxy)acetyl]amino]-4-hydroxy-1,6-diphenylhexan-2-yl]-3-methyl-2-(2-oxo-1,3-diazinan-1-yl)butanamide|5.9
4
+ O=C(Nc1ccc([N+](=O)[O-])cc1Cl)c1cc(Cl)ccc1O|3.859500000000001|O=C(Nc1ccc([N+](=O)[O-])cc1Cl)c1cc(Cl)ccc1O|C1=CC(=C(C=C1[N+](=O)[O-])Cl)NC(=O)C2=C(C=CC(=C2)Cl)O|5-chloro-N-(2-chloro-4-nitrophenyl)-2-hydroxybenzamide|4.0
5
+ CN(C)C(=O)C(CCN1CCC(O)(c2ccc(Cl)cc2)CC1)(c1ccccc1)c1ccccc1|5.088000000000005|CN(C)C(=O)C(CCN1CCC(O)(c2ccc(Cl)cc2)CC1)(c1ccccc1)c1ccccc1|CN(C)C(=O)C(CCN1CCC(CC1)(C2=CC=C(C=C2)Cl)O)(C3=CC=CC=C3)C4=CC=CC=C4|4-[4-(4-chlorophenyl)-4-hydroxypiperidin-1-yl]-N,N-dimethyl-2,2-diphenylbutanamide|5.0
6
+ CC1OC(OC2CC(O)C3(CO)C4C(O)CC5(C)C(C6=CC(=O)OC6)CCC5(O)C4CCC3(O)C2)C(O)C(O)C1O|-1.5149999999999972|CC1OC(OC2CC(O)C3(CO)C4C(O)CC5(C)C(C6=CC(=O)OC6)CCC5(O)C4CCC3(O)C2)C(O)C(O)C1O|CC1C(C(C(C(O1)OC2CC(C3(C4C(CCC3(C2)O)C5(CCC(C5(CC4O)C)C6=CC(=O)OC6)O)CO)O)O)O)O|3-[1,5,11,14-tetrahydroxy-10-(hydroxymethyl)-13-methyl-3-(3,4,5-trihydroxy-6-methyloxan-2-yl)oxy-2,3,4,6,7,8,9,11,12,15,16,17-dodecahydro-1H-cyclopenta[a]phenanthren-17-yl]-2H-furan-5-one|-1.7
7
+ CCN(CC)CCOc1ccc(C(O)(Cc2ccc(Cl)cc2)c2ccc(C)cc2)cc1|5.847620000000007|CCN(CC)CCOc1ccc(C(O)(Cc2ccc(Cl)cc2)c2ccc(C)cc2)cc1|CCN(CC)CCOC1=CC=C(C=C1)C(CC2=CC=C(C=C2)Cl)(C3=CC=C(C=C3)C)O|2-(4-chlorophenyl)-1-[4-[2-(diethylamino)ethoxy]phenyl]-1-(4-methylphenyl)ethanol|6.2
8
+ COc1ccc2cc1Oc1ccc(cc1)CC1c3cc(c(OC)cc3CCN1C)Oc1c(OC)c(OC)cc3c1C(C2)N(C)CC3|7.162400000000009|COc1ccc2cc1Oc1ccc(cc1)CC1c3cc(c(OC)cc3CCN1C)Oc1c(OC)c(OC)cc3c1C(C2)N(C)CC3|CN1CCC2=CC(=C3C=C2C1CC4=CC=C(C=C4)OC5=C(C=CC(=C5)CC6C7=C(O3)C(=C(C=C7CCN6C)OC)OC)OC)OC|9,20,21,25-tetramethoxy-15,30-dimethyl-7,23-dioxa-15,30-diazaheptacyclo[22.6.2.23,6.18,12.114,18.027,31.022,33]hexatriaconta-3(36),4,6(35),8,10,12(34),18,20,22(33),24,26,31-dodecaene|6.4
9
+ CCN(CC)Cc1cc(Nc2ccnc3cc(Cl)ccc23)ccc1O|5.179200000000004|CCN(CC)Cc1cc(Nc2ccnc3cc(Cl)ccc23)ccc1O|CCN(CC)CC1=C(C=CC(=C1)NC2=C3C=CC(=CC3=NC=C2)Cl)O|4-[(7-chloroquinolin-4-yl)amino]-2-(diethylaminomethyl)phenol|2.6
10
+ O=C1NCN(c2ccccc2)C12CCN(CCCC(c1ccc(F)cc1)c1ccc(F)cc1)CC2|5.305400000000005|O=C1NCN(c2ccccc2)C12CCN(CCCC(c1ccc(F)cc1)c1ccc(F)cc1)CC2|C1CN(CCC12C(=O)NCN2C3=CC=CC=C3)CCCC(C4=CC=C(C=C4)F)C5=CC=C(C=C5)F|8-[4,4-bis(4-fluorophenyl)butyl]-1-phenyl-1,3,8-triazaspiro[4.5]decan-4-one|5.6
11
+ Cc1ccc(NC(=O)c2ccc(CN3CCN(C)CC3)cc2)cc1Nc1nccc(-c2cccnc2)n1|4.590320000000004|Cc1ccc(NC(=O)c2ccc(CN3CCN(C)CC3)cc2)cc1Nc1nccc(-c2cccnc2)n1|CC1=C(C=C(C=C1)NC(=O)C2=CC=C(C=C2)CN3CCN(CC3)C)NC4=NC=CC(=N4)C5=CN=CC=C5|4-[(4-methylpiperazin-1-yl)methyl]-N-[4-methyl-3-[(4-pyridin-3-ylpyrimidin-2-yl)amino]phenyl]benzamide|3.5
12
+ CCN(CC)CCOc1ccc2c(c1)C(=O)c1cc(OCCN(CC)CC)ccc1-2|4.339200000000004|CCN(CC)CCOc1ccc2c(c1)C(=O)c1cc(OCCN(CC)CC)ccc1-2|CCN(CC)CCOC1=CC2=C(C=C1)C3=C(C2=O)C=C(C=C3)OCCN(CC)CC|2,7-bis[2-(diethylamino)ethoxy]fluoren-9-one|4.7
13
+ COc1cc2c3cc1Oc1c(OC)c(OC)cc4c1C(Cc1ccc(O)c(c1)Oc1ccc(cc1)CC3N(C)CC2)N(C)CC4|6.859400000000009|COc1cc2c3cc1Oc1c(OC)c(OC)cc4c1C(Cc1ccc(O)c(c1)Oc1ccc(cc1)CC3N(C)CC2)N(C)CC4|CN1CCC2=CC(=C3C=C2C1CC4=CC=C(C=C4)OC5=C(C=CC(=C5)CC6C7=C(O3)C(=C(C=C7CCN6C)OC)OC)O)OC|20,21,25-trimethoxy-15,30-dimethyl-7,23-dioxa-15,30-diazaheptacyclo[22.6.2.23,6.18,12.114,18.027,31.022,33]hexatriaconta-3(36),4,6(35),8,10,12(34),18,20,22(33),24,26,31-dodecaen-9-ol|6.1
14
+ OC(c1cc(C(F)(F)F)nc2c(C(F)(F)F)cccc12)C1CCCCN1|4.447900000000003|OC(c1cc(C(F)(F)F)nc2c(C(F)(F)F)cccc12)C1CCCCN1|C1CCNC(C1)C(C2=CC(=NC3=C2C=CC=C3C(F)(F)F)C(F)(F)F)O|[2,8-bis(trifluoromethyl)quinolin-4-yl]-piperidin-2-ylmethanol|3.6
15
+ OCCN1CCN(CCCN2c3ccccc3Sc3ccc(C(F)(F)F)cc32)CC1|4.308100000000003|OCCN1CCN(CCCN2c3ccccc3Sc3ccc(C(F)(F)F)cc32)CC1|C1CN(CCN1CCCN2C3=CC=CC=C3SC4=C2C=C(C=C4)C(F)(F)F)CCO|2-[4-[3-[2-(trifluoromethyl)phenothiazin-10-yl]propyl]piperazin-1-yl]ethanol|4.4
16
+ O=C(Nc1cc(Cl)cc(Cl)c1O)c1c(O)c(Cl)cc(Cl)c1Cl|5.6171000000000015|O=C(Nc1cc(Cl)cc(Cl)c1O)c1c(O)c(Cl)cc(Cl)c1Cl|C1=C(C=C(C(=C1NC(=O)C2=C(C(=CC(=C2Cl)Cl)Cl)O)O)Cl)Cl|2,3,5-trichloro-N-(3,5-dichloro-2-hydroxyphenyl)-6-hydroxybenzamide|5.7
17
+ CCN(CCO)CCCC(C)Nc1ccnc2cc(Cl)ccc12|3.783000000000002|CCN(CCO)CCCC(C)Nc1ccnc2cc(Cl)ccc12|CCN(CCCC(C)NC1=C2C=CC(=CC2=NC=C1)Cl)CCO|2-[4-[(7-chloroquinolin-4-yl)amino]pentyl-ethylamino]ethanol|3.6
18
+ CC(CN1c2ccccc2Sc2ccccc21)N(C)C|4.239400000000003|CC(CN1c2ccccc2Sc2ccccc21)N(C)C|CC(CN1C2=CC=CC=C2SC3=CC=CC=C31)N(C)C|N,N-dimethyl-1-phenothiazin-10-ylpropan-2-amine|4.8
19
+ CCSc1ccc2c(c1)N(CCCN1CCN(C)CC1)c1ccccc1S2|5.0388000000000055|CCSc1ccc2c(c1)N(CCCN1CCN(C)CC1)c1ccccc1S2|CCSC1=CC2=C(C=C1)SC3=CC=CC=C3N2CCCN4CCN(CC4)C|2-ethylsulfanyl-10-[3-(4-methylpiperazin-1-yl)propyl]phenothiazine|5.4
20
+ CC1OC(OC2C(O)CC(OC3C(O)CC(OC4CCC5(C)C(CCC6C5CC(O)C5(C)C(C7=CC(=O)OC7)CCC65O)C4)OC3C)OC2C)CC(O)C1O|2.218100000000003|CC1OC(OC2C(O)CC(OC3C(O)CC(OC4CCC5(C)C(CCC6C5CC(O)C5(C)C(C7=CC(=O)OC7)CCC65O)C4)OC3C)OC2C)CC(O)C1O|CC1C(C(CC(O1)OC2C(OC(CC2O)OC3C(OC(CC3O)OC4CCC5(C(C4)CCC6C5CC(C7(C6(CCC7C8=CC(=O)OC8)O)C)O)C)C)C)O)O|3-[3-[5-[5-(4,5-dihydroxy-6-methyloxan-2-yl)oxy-4-hydroxy-6-methyloxan-2-yl]oxy-4-hydroxy-6-methyloxan-2-yl]oxy-12,14-dihydroxy-10,13-dimethyl-1,2,3,4,5,6,7,8,9,11,12,15,16,17-tetradecahydrocyclopenta[a]phenanthren-17-yl]-2H-furan-5-one|1.3
21
+ CN(C)CCCN1c2ccccc2CCc2ccc(Cl)cc21|4.528400000000004|CN(C)CCCN1c2ccccc2CCc2ccc(Cl)cc21|CN(C)CCCN1C2=CC=CC=C2CCC3=C1C=C(C=C3)Cl|3-(2-chloro-5,6-dihydrobenzo[b][1]benzazepin-11-yl)-N,N-dimethylpropan-1-amine|5.2
22
+ CN(C)CCCN1c2ccccc2Sc2ccc(Cl)cc21|4.894400000000004|CN(C)CCCN1c2ccccc2Sc2ccc(Cl)cc21|CN(C)CCCN1C2=CC=CC=C2SC3=C1C=C(C=C3)Cl|3-(2-chlorophenothiazin-10-yl)-N,N-dimethylpropan-1-amine|5.2
23
+ CCN(CC)CCOc1ccc(C(=C(Cl)c2ccccc2)c2ccccc2)cc1|6.562600000000006|CCN(CC)CCOc1ccc(C(=C(Cl)c2ccccc2)c2ccccc2)cc1|CCN(CC)CCOC1=CC=C(C=C1)C(=C(C2=CC=CC=C2)Cl)C3=CC=CC=C3|2-[4-(2-chloro-1,2-diphenylethenyl)phenoxy]-N,N-diethylethanamine|7.2
24
+ CCC1OC(=O)C(C)C(OC2CC(C)(OC)C(O)C(C)O2)C(C)C(OC2OC(C)CC(N(C)C)C2O)C(C)(O)CC(C)CN(C)C(C)C(O)C1(C)O|1.9007000000000054|CCC1OC(=O)C(C)C(OC2CC(C)(OC)C(O)C(C)O2)C(C)C(OC2OC(C)CC(N(C)C)C2O)C(C)(O)CC(C)CN(C)C(C)C(O)C1(C)O|CCC1C(C(C(N(CC(CC(C(C(C(C(C(=O)O1)C)OC2CC(C(C(O2)C)O)(C)OC)C)OC3C(C(CC(O3)C)N(C)C)O)(C)O)C)C)C)O)(C)O|11-[4-(dimethylamino)-3-hydroxy-6-methyloxan-2-yl]oxy-2-ethyl-3,4,10-trihydroxy-13-(5-hydroxy-4-methoxy-4,6-dimethyloxan-2-yl)oxy-3,5,6,8,10,12,14-heptamethyl-1-oxa-6-azacyclopentadecan-15-one|4.0
25
+ COc1ncnc(NS(=O)(=O)c2ccc(N)cc2)c1OC|0.8768|COc1ncnc(NS(=O)(=O)c2ccc(N)cc2)c1OC|COC1=C(N=CN=C1OC)NS(=O)(=O)C2=CC=C(C=C2)N|4-amino-N-(5,6-dimethoxypyrimidin-4-yl)benzenesulfonamide|0.7
26
+ COc1ccc2nc(S(=O)Cc3ncc(C)c(OC)c3C)[nH]c2c1|2.8997400000000004|COc1ccc2nc(S(=O)Cc3ncc(C)c(OC)c3C)[nH]c2c1|CC1=CN=C(C(=C1OC)C)CS(=O)C2=NC3=C(N2)C=C(C=C3)OC|6-methoxy-2-[(4-methoxy-3,5-dimethylpyridin-2-yl)methylsulfinyl]-1H-benzimidazole|2.2
27
+ CN(C)CCOc1ccc(C(=C(CCCl)c2ccccc2)c2ccccc2)cc1|6.215000000000006|CN(C)CCOc1ccc(C(=C(CCCl)c2ccccc2)c2ccccc2)cc1|CN(C)CCOC1=CC=C(C=C1)C(=C(CCCl)C2=CC=CC=C2)C3=CC=CC=C3|2-[4-(4-chloro-1,2-diphenylbut-1-enyl)phenoxy]-N,N-dimethylethanamine|7.2
28
+ CSc1ccc2c(c1)N(CCC1CCCCN1C)c1ccccc1S2|5.885600000000005|CSc1ccc2c(c1)N(CCC1CCCCN1C)c1ccccc1S2|CN1CCCCC1CCN2C3=CC=CC=C3SC4=C2C=C(C=C4)SC|10-[2-(1-methylpiperidin-2-yl)ethyl]-2-methylsulfanylphenothiazine|5.9
29
+ CC=CCC(C)C(O)C1C(=O)NC(CC)C(=O)N(C)CC(=O)N(C)C(CC(C)C)C(=O)NC(C(C)C)C(=O)N(C)C(CC(C)C)C(=O)NC(C)C(=O)NC(C)C(=O)N(C)C(CC(C)C)C(=O)N(C)C(CC(C)C)C(=O)N(C)C(C(C)C)C(=O)N1C|3.2690000000000046|CC=CCC(C)C(O)C1C(=O)NC(CC)C(=O)N(C)CC(=O)N(C)C(CC(C)C)C(=O)NC(C(C)C)C(=O)N(C)C(CC(C)C)C(=O)NC(C)C(=O)NC(C)C(=O)N(C)C(CC(C)C)C(=O)N(C)C(CC(C)C)C(=O)N(C)C(C(C)C)C(=O)N1C|CCC1C(=O)N(CC(=O)N(C(C(=O)NC(C(=O)N(C(C(=O)NC(C(=O)NC(C(=O)N(C(C(=O)N(C(C(=O)N(C(C(=O)N(C(C(=O)N1)C(C(C)CC=CC)O)C)C(C)C)C)CC(C)C)C)CC(C)C)C)C)C)CC(C)C)C)C(C)C)CC(C)C)C)C|30-ethyl-33-(1-hydroxy-2-methylhex-4-enyl)-1,4,7,10,12,15,19,25,28-nonamethyl-6,9,18,24-tetrakis(2-methylpropyl)-3,21-di(propan-2-yl)-1,4,7,10,13,16,19,22,25,28,31-undecazacyclotritriacontane-2,5,8,11,14,17,20,23,26,29,32-undecone|7.5
30
+ CC(C)=CCCC1(C)C=Cc2c(O)c3c(c(CC=C(C)C)c2O1)OC12C(=CC4CC1C(C)(C)OC2(CC=C(C)C(=O)O)C4O)C3=O|7.031100000000008|CC(C)=CCCC1(C)C=Cc2c(O)c3c(c(CC=C(C)C)c2O1)OC12C(=CC4CC1C(C)(C)OC2(CC=C(C)C(=O)O)C4O)C3=O|CC(=CCCC1(C=CC2=C(C3=C(C(=C2O1)CC=C(C)C)OC45C6CC(C=C4C3=O)C(C5(OC6(C)C)CC=C(C)C(=O)O)O)O)C)C|4-[12,18-dihydroxy-8,21,21-trimethyl-5-(3-methylbut-2-enyl)-8-(4-methylpent-3-enyl)-14-oxo-3,7,20-trioxahexacyclo[15.4.1.02,15.02,19.04,13.06,11]docosa-4(13),5,9,11,15-pentaen-19-yl]-2-methylbut-2-enoic acid|7.0
31
+ C=CC1CN2CCC1CC2C(O)c1ccnc2ccc(OC)cc12|3.1732000000000014|C=CC1CN2CCC1CC2C(O)c1ccnc2ccc(OC)cc12|COC1=CC2=C(C=CN=C2C=C1)C(C3CC4CCN3CC4C=C)O|(5-ethenyl-1-azabicyclo[2.2.2]octan-2-yl)-(6-methoxyquinolin-4-yl)methanol|2.9
32
+ CCCCCC(=O)OC1(C(C)=O)CCC2C3CCC4=CC(=O)CCC4(C)C3CCC21C|5.969600000000007|CCCCCC(=O)OC1(C(C)=O)CCC2C3CCC4=CC(=O)CCC4(C)C3CCC21C|CCCCCC(=O)OC1(CCC2C1(CCC3C2CCC4=CC(=O)CCC34C)C)C(=O)C|(17-acetyl-10,13-dimethyl-3-oxo-2,6,7,8,9,11,12,14,15,16-decahydro-1H-cyclopenta[a]phenanthren-17-yl) hexanoate|5.7
33
+ COc1ccc2cc1Oc1ccc(cc1)CC1c3c(cc4c(c3Oc3cc5c(cc3OC)CCN(C)C5C2)OCO4)CCN1C|6.873900000000009|COc1ccc2cc1Oc1ccc(cc1)CC1c3c(cc4c(c3Oc3cc5c(cc3OC)CCN(C)C5C2)OCO4)CCN1C|CN1CCC2=CC3=C(C4=C2C1CC5=CC=C(C=C5)OC6=C(C=CC(=C6)CC7C8=CC(=C(C=C8CCN7C)OC)O4)OC)OCO3|22,33-dimethoxy-13,28-dimethyl-2,5,7,20-tetraoxa-13,28-diazaoctacyclo[25.6.2.216,19.13,10.121,25.04,8.031,35.014,39]nonatriaconta-1(33),3(39),4(8),9,16(38),17,19(37),21,23,25(36),31,34-dodecaene|6.5
34
+ CCC(=C(c1ccccc1)c1ccc(OCCN(C)C)cc1)c1ccccc1|5.9961000000000055|CCC(=C(c1ccccc1)c1ccc(OCCN(C)C)cc1)c1ccccc1|CCC(=C(C1=CC=CC=C1)C2=CC=C(C=C2)OCCN(C)C)C3=CC=CC=C3|2-[4-(1,2-diphenylbut-1-enyl)phenoxy]-N,N-dimethylethanamine|7.1
35
+ OC1(c2ccc(Cl)c(C(F)(F)F)c2)CCN(CCCC(c2ccc(F)cc2)c2ccc(F)cc2)CC1|7.532700000000006|OC1(c2ccc(Cl)c(C(F)(F)F)c2)CCN(CCCC(c2ccc(F)cc2)c2ccc(F)cc2)CC1|C1CN(CCC1(C2=CC(=C(C=C2)Cl)C(F)(F)F)O)CCCC(C3=CC=C(C=C3)F)C4=CC=C(C=C4)F|1-[4,4-bis(4-fluorophenyl)butyl]-4-[4-chloro-3-(trifluoromethyl)phenyl]piperidin-4-ol|7.3
36
+ Clc1ccc(Cn2c(CN3CCCC3)nc3ccccc32)cc1|4.333800000000003|Clc1ccc(Cn2c(CN3CCCC3)nc3ccccc32)cc1|C1CCN(C1)CC2=NC3=CC=CC=C3N2CC4=CC=C(C=C4)Cl|1-[(4-chlorophenyl)methyl]-2-(pyrrolidin-1-ylmethyl)benzimidazole|4.0
37
+ C1CCC(C(CC2CCCCN2)C2CCCCC2)CC1|5.295400000000005|C1CCC(C(CC2CCCCN2)C2CCCCC2)CC1|C1CCC(CC1)C(CC2CCCCN2)C3CCCCC3|2-(2,2-dicyclohexylethyl)piperidine|6.8
38
+ Oc1c(Cl)cc(Cl)c(Cl)c1Cc1c(O)c(Cl)cc(Cl)c1Cl|6.609000000000001|Oc1c(Cl)cc(Cl)c(Cl)c1Cc1c(O)c(Cl)cc(Cl)c1Cl|C1=C(C(=C(C(=C1Cl)Cl)CC2=C(C(=CC(=C2Cl)Cl)Cl)O)O)Cl|3,4,6-trichloro-2-[(2,3,5-trichloro-6-hydroxyphenyl)methyl]phenol|7.5
39
+ CCC(C(=O)O)C1CCC(C)C(C(C)C(O)C(C)C(=O)C(CC)C2OC3(C=CC(O)C4(CCC(C)(C5CCC(O)(CC)C(C)O5)O4)O3)C(C)CC2C)O1|6.188000000000006|CCC(C(=O)O)C1CCC(C)C(C(C)C(O)C(C)C(=O)C(CC)C2OC3(C=CC(O)C4(CCC(C)(C5CCC(O)(CC)C(C)O5)O4)O3)C(C)CC2C)O1|CCC(C1CCC(C(O1)C(C)C(C(C)C(=O)C(CC)C2C(CC(C3(O2)C=CC(C4(O3)CCC(O4)(C)C5CCC(C(O5)C)(CC)O)O)C)C)O)C)C(=O)O|2-[6-[6-[3-(5-ethyl-5-hydroxy-6-methyloxan-2-yl)-15-hydroxy-3,10,12-trimethyl-4,6,8-trioxadispiro[4.1.57.35]pentadec-13-en-9-yl]-3-hydroxy-4-methyl-5-oxooctan-2-yl]-5-methyloxan-2-yl]butanoic acid|5.7
40
+ Cc1cc(Nc2ncc(Cl)c(Nc3ccccc3S(=O)(=O)C(C)C)n2)c(OC(C)C)cc1C1CCNCC1|6.361920000000006|Cc1cc(Nc2ncc(Cl)c(Nc3ccccc3S(=O)(=O)C(C)C)n2)c(OC(C)C)cc1C1CCNCC1|CC1=CC(=C(C=C1C2CCNCC2)OC(C)C)NC3=NC=C(C(=N3)NC4=CC=CC=C4S(=O)(=O)C(C)C)Cl|5-chloro-2-N-(5-methyl-4-piperidin-4-yl-2-propan-2-yloxyphenyl)-4-N-(2-propan-2-ylsulfonylphenyl)pyrimidine-2,4-diamine|6.4
41
+ CC1(C)C=Cc2c(c3c(c4c(=O)c(-c5ccc(O)cc5)coc24)OC(C)(C)CC3)O1|5.453400000000006|CC1(C)C=Cc2c(c3c(c4c(=O)c(-c5ccc(O)cc5)coc24)OC(C)(C)CC3)O1|CC1(CCC2=C3C(=C4C(=C2O1)C(=O)C(=CO4)C5=CC=C(C=C5)O)C=CC(O3)(C)C)C|5-(4-hydroxyphenyl)-10,10,16,16-tetramethyl-3,9,15-trioxatetracyclo[12.4.0.02,7.08,13]octadeca-1,4,7,13,17-pentaen-6-one|4.7
42
+ C=CC(=O)Nc1cc(Nc2nccc(-c3cn(C)c4ccccc34)n2)c(OC)cc1N(C)CCN(C)C|4.509800000000003|C=CC(=O)Nc1cc(Nc2nccc(-c3cn(C)c4ccccc34)n2)c(OC)cc1N(C)CCN(C)C|CN1C=C(C2=CC=CC=C21)C3=NC(=NC=C3)NC4=C(C=C(C(=C4)NC(=O)C=C)N(C)CCN(C)C)OC|N-[2-[2-(dimethylamino)ethyl-methylamino]-4-methoxy-5-[[4-(1-methylindol-3-yl)pyrimidin-2-yl]amino]phenyl]prop-2-enamide|3.7
43
+ Cc1c(-c2ccc(O)cc2)n(Cc2ccc(OCCN3CCCCCC3)cc2)c2ccc(O)cc12|6.331020000000006|Cc1c(-c2ccc(O)cc2)n(Cc2ccc(OCCN3CCCCCC3)cc2)c2ccc(O)cc12|CC1=C(N(C2=C1C=C(C=C2)O)CC3=CC=C(C=C3)OCCN4CCCCCC4)C5=CC=C(C=C5)O|1-[[4-[2-(azepan-1-yl)ethoxy]phenyl]methyl]-2-(4-hydroxyphenyl)-3-methylindol-5-ol|6.1
44
+ CCCCCCOC(C)c1cccc(-c2csc(NC(=O)c3cc(Cl)c(C=C(C)C(=O)O)c(Cl)c3)n2)c1OC|8.523600000000004|CCCCCCOC(C)c1cccc(-c2csc(NC(=O)c3cc(Cl)c(C=C(C)C(=O)O)c(Cl)c3)n2)c1OC|CCCCCCOC(C)C1=CC=CC(=C1OC)C2=CSC(=N2)NC(=O)C3=CC(=C(C(=C3)Cl)C=C(C)C(=O)O)Cl|3-[2,6-dichloro-4-[[4-[3-(1-hexoxyethyl)-2-methoxyphenyl]-1,3-thiazol-2-yl]carbamoyl]phenyl]-2-methylprop-2-enoic acid|7.7
45
+ CC(C)=CCc1c2c(c3occ(-c4ccc(O)cc4)c(=O)c3c1O)C=CC(C)(C)O2|5.564100000000006|CC(C)=CCc1c2c(c3occ(-c4ccc(O)cc4)c(=O)c3c1O)C=CC(C)(C)O2|CC(=CCC1=C2C(=C3C(=C1O)C(=O)C(=CO3)C4=CC=C(C=C4)O)C=CC(O2)(C)C)C|5-hydroxy-3-(4-hydroxyphenyl)-8,8-dimethyl-6-(3-methylbut-2-enyl)pyrano[2,3-h]chromen-4-one|5.9
46
+ CCCCc1oc2ccc(NS(C)(=O)=O)cc2c1C(=O)c1ccc(OCCCN(CCCC)CCCC)cc1|7.0490000000000075|CCCCc1oc2ccc(NS(C)(=O)=O)cc2c1C(=O)c1ccc(OCCCN(CCCC)CCCC)cc1|CCCCC1=C(C2=C(O1)C=CC(=C2)NS(=O)(=O)C)C(=O)C3=CC=C(C=C3)OCCCN(CCCC)CCCC|N-[2-butyl-3-[4-[3-(dibutylamino)propoxy]benzoyl]-1-benzofuran-5-yl]methanesulfonamide|7.2
47
+ CC(C)C(=O)OCC(=O)C12OC(C3CCCCC3)OC1CC1C3CCC4=CC(=O)C=CC4(C)C3C(O)CC12C|4.703900000000005|CC(C)C(=O)OCC(=O)C12OC(C3CCCCC3)OC1CC1C3CCC4=CC(=O)C=CC4(C)C3C(O)CC12C|CC(C)C(=O)OCC(=O)C12C(CC3C1(CC(C4C3CCC5=CC(=O)C=CC45C)O)C)OC(O2)C6CCCCC6|[2-(6-cyclohexyl-11-hydroxy-9,13-dimethyl-16-oxo-5,7-dioxapentacyclo[10.8.0.02,9.04,8.013,18]icosa-14,17-dien-8-yl)-2-oxoethyl] 2-methylpropanoate|5.3
48
+ CC1(C)C=Cc2c(c3c(c4c(=O)c(-c5ccc(O)c(O)c5)coc24)OC(C)(C)CC3)O1|5.159000000000005|CC1(C)C=Cc2c(c3c(c4c(=O)c(-c5ccc(O)c(O)c5)coc24)OC(C)(C)CC3)O1|CC1(CCC2=C3C(=C4C(=C2O1)C(=O)C(=CO4)C5=CC(=C(C=C5)O)O)C=CC(O3)(C)C)C|5-(3,4-dihydroxyphenyl)-10,10,16,16-tetramethyl-3,9,15-trioxatetracyclo[12.4.0.02,7.08,13]octadeca-1,4,7,13,17-pentaen-6-one|4.4
49
+ CCCCCOc1ccc(-c2ccc(-c3ccc(C(=O)NC4CC(O)C(O)NC(=O)C5C(O)C(C)CN5C(=O)C(C(C)O)NC(=O)C(C(O)C(O)c5ccc(O)cc5)NC(=O)C5CC(O)CN5C(=O)C(C(C)O)NC4=O)cc3)cc2)cc1|-0.927099999999986|CCCCCOc1ccc(-c2ccc(-c3ccc(C(=O)NC4CC(O)C(O)NC(=O)C5C(O)C(C)CN5C(=O)C(C(C)O)NC(=O)C(C(O)C(O)c5ccc(O)cc5)NC(=O)C5CC(O)CN5C(=O)C(C(C)O)NC4=O)cc3)cc2)cc1|CCCCCOC1=CC=C(C=C1)C2=CC=C(C=C2)C3=CC=C(C=C3)C(=O)NC4CC(C(NC(=O)C5C(C(CN5C(=O)C(NC(=O)C(NC(=O)C6CC(CN6C(=O)C(NC4=O)C(C)O)O)C(C(C7=CC=C(C=C7)O)O)O)C(C)O)C)O)O)O|N-[6-[1,2-dihydroxy-2-(4-hydroxyphenyl)ethyl]-11,20,21,25-tetrahydroxy-3,15-bis(1-hydroxyethyl)-26-methyl-2,5,8,14,17,23-hexaoxo-1,4,7,13,16,22-hexazatricyclo[22.3.0.09,13]heptacosan-18-yl]-4-[4-(4-pentoxyphenyl)phenyl]benzamide|2.3
50
+ CC(C)(C)c1cc(C(C)(C)C)c(NC(=O)c2c[nH]c3ccccc3c2=O)cc1O|5.081000000000005|CC(C)(C)c1cc(C(C)(C)C)c(NC(=O)c2c[nH]c3ccccc3c2=O)cc1O|CC(C)(C)C1=CC(=C(C=C1NC(=O)C2=CNC3=CC=CC=C3C2=O)O)C(C)(C)C|N-(2,4-ditert-butyl-5-hydroxyphenyl)-4-oxo-1H-quinoline-3-carboxamide|5.6
51
+ CCC(=C(c1ccc(OCCN(C)C)cc1)c1cccc(O)c1)c1ccccc1|5.701700000000006|CCC(=C(c1ccc(OCCN(C)C)cc1)c1cccc(O)c1)c1ccccc1|CCC(=C(C1=CC=C(C=C1)OCCN(C)C)C2=CC(=CC=C2)O)C3=CC=CC=C3|3-[1-[4-[2-(dimethylamino)ethoxy]phenyl]-2-phenylbut-1-enyl]phenol|6.8
52
+ CCN1CCN(Cc2ccc(Nc3ncc(F)c(-c4cc(F)c5nc(C)n(C(C)C)c5c4)n3)nc2)CC1|4.936920000000004|CCN1CCN(Cc2ccc(Nc3ncc(F)c(-c4cc(F)c5nc(C)n(C(C)C)c5c4)n3)nc2)CC1|CCN1CCN(CC1)CC2=CN=C(C=C2)NC3=NC=C(C(=N3)C4=CC5=C(C(=C4)F)N=C(N5C(C)C)C)F|N-[5-[(4-ethylpiperazin-1-yl)methyl]pyridin-2-yl]-5-fluoro-4-(7-fluoro-2-methyl-3-propan-2-ylbenzimidazol-5-yl)pyrimidin-2-amine|3.8
53
+ CCc1nc(C(N)=O)c(Nc2ccc(N3CCC(N4CCN(C)CC4)CC3)c(OC)c2)nc1NC1CCOCC1|2.6972000000000014|CCc1nc(C(N)=O)c(Nc2ccc(N3CCC(N4CCN(C)CC4)CC3)c(OC)c2)nc1NC1CCOCC1|CCC1=C(N=C(C(=N1)C(=O)N)NC2=CC(=C(C=C2)N3CCC(CC3)N4CCN(CC4)C)OC)NC5CCOCC5|6-ethyl-3-[3-methoxy-4-[4-(4-methylpiperazin-1-yl)piperidin-1-yl]anilino]-5-(oxan-4-ylamino)pyrazine-2-carboxamide|3.5
54
+ CC(C)(C)c1ccc(C(=O)CCCN2CCC(OC(c3ccccc3)c3ccccc3)CC2)cc1|7.217600000000008|CC(C)(C)c1ccc(C(=O)CCCN2CCC(OC(c3ccccc3)c3ccccc3)CC2)cc1|CC(C)(C)C1=CC=C(C=C1)C(=O)CCCN2CCC(CC2)OC(C3=CC=CC=C3)C4=CC=CC=C4|4-(4-benzhydryloxypiperidin-1-yl)-1-(4-tert-butylphenyl)butan-1-one|7.2
55
+ c1ccc2c(c1)Sc1ccccc1N2CC1CN2CCC1CC2|4.631100000000005|c1ccc2c(c1)Sc1ccccc1N2CC1CN2CCC1CC2|C1CN2CCC1C(C2)CN3C4=CC=CC=C4SC5=CC=CC=C53|10-(1-azabicyclo[2.2.2]octan-3-ylmethyl)phenothiazine|4.6
56
+ CC1=NN(c2ccc(C)c(C)c2)C(=O)C1=NNc1cccc(-c2cccc(C(=O)O)c2)c1O|4.564840000000005|CC1=NN(c2ccc(C)c(C)c2)C(=O)C1=NNc1cccc(-c2cccc(C(=O)O)c2)c1O|CC1=C(C=C(C=C1)N2C(=O)C(=C(N2)C)N=NC3=CC=CC(=C3O)C4=CC(=CC=C4)C(=O)O)C|3-[3-[[2-(3,4-dimethylphenyl)-5-methyl-3-oxo-1H-pyrazol-4-yl]diazenyl]-2-hydroxyphenyl]benzoic acid|5.4
57
+ COC(=O)NC(C(=O)NC(Cc1ccccc1)C(O)CN(Cc1ccc(-c2ccccn2)cc1)NC(=O)C(NC(=O)OC)C(C)(C)C)C(C)(C)C|4.2116000000000025|COC(=O)NC(C(=O)NC(Cc1ccccc1)C(O)CN(Cc1ccc(-c2ccccn2)cc1)NC(=O)C(NC(=O)OC)C(C)(C)C)C(C)(C)C|CC(C)(C)C(C(=O)NC(CC1=CC=CC=C1)C(CN(CC2=CC=C(C=C2)C3=CC=CC=N3)NC(=O)C(C(C)(C)C)NC(=O)OC)O)NC(=O)OC|methyl N-[1-[2-[2-hydroxy-3-[[2-(methoxycarbonylamino)-3,3-dimethylbutanoyl]amino]-4-phenylbutyl]-2-[(4-pyridin-2-ylphenyl)methyl]hydrazinyl]-3,3-dimethyl-1-oxobutan-2-yl]carbamate|5.6
58
+ CN1C2CCC1CC(OC(c1ccccc1)c1ccccc1)C2|4.417800000000004|CN1C2CCC1CC(OC(c1ccccc1)c1ccccc1)C2|CN1C2CCC1CC(C2)OC(C3=CC=CC=C3)C4=CC=CC=C4|3-benzhydryloxy-8-methyl-8-azabicyclo[3.2.1]octane|4.5
59
+ CC(C)N1CCN(c2ccc(OCC3COC(Cn4cncn4)(c4ccc(Cl)cc4Cl)O3)cc2)CC1|4.462700000000003|CC(C)N1CCN(c2ccc(OCC3COC(Cn4cncn4)(c4ccc(Cl)cc4Cl)O3)cc2)CC1|CC(C)N1CCN(CC1)C2=CC=C(C=C2)OCC3COC(O3)(CN4C=NC=N4)C5=C(C=C(C=C5)Cl)Cl|1-[4-[[2-(2,4-dichlorophenyl)-2-(1,2,4-triazol-1-ylmethyl)-1,3-dioxolan-4-yl]methoxy]phenyl]-4-propan-2-ylpiperazine|4.8
60
+ C=CCOc1ccccc1OCC(O)CNC(C)C|1.9890999999999999|C=CCOc1ccccc1OCC(O)CNC(C)C|CC(C)NCC(COC1=CC=CC=C1OCC=C)O|1-(propan-2-ylamino)-3-(2-prop-2-enoxyphenoxy)propan-2-ol|2.1
61
+ CCCCCC(O)C=CC1C(O)CC(=O)C1CCCCCCC(=O)O|3.475100000000002|CCCCCC(O)C=CC1C(O)CC(=O)C1CCCCCCC(=O)O|CCCCCC(C=CC1C(CC(=O)C1CCCCCCC(=O)O)O)O|7-[3-hydroxy-2-(3-hydroxyoct-1-enyl)-5-oxocyclopentyl]heptanoic acid|3.2
62
+ CC1CCOC2Cn3cc(C(=O)NCc4ccc(F)cc4F)c(=O)c(O)c3C(=O)N12|1.3528|CC1CCOC2Cn3cc(C(=O)NCc4ccc(F)cc4F)c(=O)c(O)c3C(=O)N12|CC1CCOC2N1C(=O)C3=C(C(=O)C(=CN3C2)C(=O)NCC4=C(C=C(C=C4)F)F)O|N-[(2,4-difluorophenyl)methyl]-11-hydroxy-7-methyl-9,12-dioxo-4-oxa-1,8-diazatricyclo[8.4.0.03,8]tetradeca-10,13-diene-13-carboxamide|2.4
63
+ OCCN1CCN(CCCN2c3ccccc3C=Cc3ccccc32)CC1|3.308500000000002|OCCN1CCN(CCCN2c3ccccc3C=Cc3ccccc32)CC1|C1CN(CCN1CCCN2C3=CC=CC=C3C=CC4=CC=CC=C42)CCO|2-[4-(3-benzo[b][1]benzazepin-11-ylpropyl)piperazin-1-yl]ethanol|3.6
64
+ CCOC(=O)c1c(CSc2ccccc2)n(C)c2cc(Br)c(O)c(CN(C)C)c12|5.177000000000005|CCOC(=O)c1c(CSc2ccccc2)n(C)c2cc(Br)c(O)c(CN(C)C)c12|CCOC(=O)C1=C(N(C2=CC(=C(C(=C21)CN(C)C)O)Br)C)CSC3=CC=CC=C3|ethyl 6-bromo-4-[(dimethylamino)methyl]-5-hydroxy-1-methyl-2-(phenylsulfanylmethyl)indole-3-carboxylate|4.4
65
+ CC(C)c1nc(CN(C)C(=O)NC(C(=O)NC(Cc2ccccc2)CC(O)C(Cc2ccccc2)NC(=O)OCc2cncs2)C(C)C)cs1|5.905200000000005|CC(C)c1nc(CN(C)C(=O)NC(C(=O)NC(Cc2ccccc2)CC(O)C(Cc2ccccc2)NC(=O)OCc2cncs2)C(C)C)cs1|CC(C)C1=NC(=CS1)CN(C)C(=O)NC(C(C)C)C(=O)NC(CC2=CC=CC=C2)CC(C(CC3=CC=CC=C3)NC(=O)OCC4=CN=CS4)O|1,3-thiazol-5-ylmethyl N-[3-hydroxy-5-[[3-methyl-2-[[methyl-[(2-propan-2-yl-1,3-thiazol-4-yl)methyl]carbamoyl]amino]butanoyl]amino]-1,6-diphenylhexan-2-yl]carbamate|6.0
66
+ Cc1c(O)cccc1C(=O)NC(CSc1ccccc1)C(O)CN1CC2CCCCC2CC1C(=O)NC(C)(C)C|4.747620000000004|Cc1c(O)cccc1C(=O)NC(CSc1ccccc1)C(O)CN1CC2CCCCC2CC1C(=O)NC(C)(C)C|CC1=C(C=CC=C1O)C(=O)NC(CSC2=CC=CC=C2)C(CN3CC4CCCCC4CC3C(=O)NC(C)(C)C)O|N-tert-butyl-2-[2-hydroxy-3-[(3-hydroxy-2-methylbenzoyl)amino]-4-phenylsulfanylbutyl]-3,4,4a,5,6,7,8,8a-octahydro-1H-isoquinoline-3-carboxamide|5.7
67
+ CC(C)(C)NC(=O)C1CC2CCCCC2CN1CC(O)C(Cc1ccccc1)NC(=O)C(CC(N)=O)NC(=O)c1ccc2ccccc2n1|3.092400000000003|CC(C)(C)NC(=O)C1CC2CCCCC2CN1CC(O)C(Cc1ccccc1)NC(=O)C(CC(N)=O)NC(=O)c1ccc2ccccc2n1|CC(C)(C)NC(=O)C1CC2CCCCC2CN1CC(C(CC3=CC=CC=C3)NC(=O)C(CC(=O)N)NC(=O)C4=NC5=CC=CC=C5C=C4)O|N-[4-[3-(tert-butylcarbamoyl)-3,4,4a,5,6,7,8,8a-octahydro-1H-isoquinolin-2-yl]-3-hydroxy-1-phenylbutan-2-yl]-2-(quinoline-2-carbonylamino)butanediamide|4.2
68
+ CCCC1(CCc2ccccc2)CC(O)=C(C(CC)c2cccc(NS(=O)(=O)c3ccc(C(F)(F)F)cn3)c2)C(=O)O1|7.325500000000007|CCCC1(CCc2ccccc2)CC(O)=C(C(CC)c2cccc(NS(=O)(=O)c3ccc(C(F)(F)F)cn3)c2)C(=O)O1|CCCC1(CC(=C(C(=O)O1)C(CC)C2=CC(=CC=C2)NS(=O)(=O)C3=NC=C(C=C3)C(F)(F)F)O)CCC4=CC=CC=C4|N-[3-[1-[4-hydroxy-6-oxo-2-(2-phenylethyl)-2-propyl-3H-pyran-5-yl]propyl]phenyl]-5-(trifluoromethyl)pyridine-2-sulfonamide|7.0
69
+ CC(C)CN(CC(O)C(Cc1ccccc1)NC(=O)OC1CCOC1)S(=O)(=O)c1ccc(N)cc1|2.4028|CC(C)CN(CC(O)C(Cc1ccccc1)NC(=O)OC1CCOC1)S(=O)(=O)c1ccc(N)cc1|CC(C)CN(CC(C(CC1=CC=CC=C1)NC(=O)OC2CCOC2)O)S(=O)(=O)C3=CC=C(C=C3)N|oxolan-3-yl N-[4-[(4-aminophenyl)sulfonyl-(2-methylpropyl)amino]-3-hydroxy-1-phenylbutan-2-yl]carbamate|2.9
70
+ CC(C)CN(CC(O)C(Cc1ccccc1)NC(=O)OC1COC2OCCC12)S(=O)(=O)c1ccc(N)cc1|2.3753|CC(C)CN(CC(O)C(Cc1ccccc1)NC(=O)OC1COC2OCCC12)S(=O)(=O)c1ccc(N)cc1|CC(C)CN(CC(C(CC1=CC=CC=C1)NC(=O)OC2COC3C2CCO3)O)S(=O)(=O)C4=CC=C(C=C4)N|2,3,3a,4,5,6a-hexahydrofuro[2,3-b]furan-4-yl N-[4-[(4-aminophenyl)sulfonyl-(2-methylpropyl)amino]-3-hydroxy-1-phenylbutan-2-yl]carbamate|2.9
71
+ CC(C)(C)NC(=O)C1CN(Cc2cccnc2)CCN1CC(O)CC(Cc1ccccc1)C(=O)NC1c2ccccc2CC1O|2.8669000000000016|CC(C)(C)NC(=O)C1CN(Cc2cccnc2)CCN1CC(O)CC(Cc1ccccc1)C(=O)NC1c2ccccc2CC1O|CC(C)(C)NC(=O)C1CN(CCN1CC(CC(CC2=CC=CC=C2)C(=O)NC3C(CC4=CC=CC=C34)O)O)CC5=CN=CC=C5|1-[4-benzyl-2-hydroxy-5-[(2-hydroxy-2,3-dihydro-1H-inden-1-yl)amino]-5-oxopentyl]-N-tert-butyl-4-(pyridin-3-ylmethyl)piperazine-2-carboxamide|2.8
TransAntivirus/download_pubchem/opsin-master.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bdb2b66ea8ba08e07da78fc5dff4efa219034281c4776410539d172b9831b198
3
+ size 2056951
TransAntivirus/download_pubchem/opsin-master/.github/workflows/maven.yml ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # This workflow will build a Java project with Maven
2
+ # For more information see: https://help.github.com/actions/language-and-framework-guides/building-and-testing-java-with-maven
3
+
4
+ name: Java CI with Maven
5
+
6
+ on: [push, pull_request]
7
+
8
+ jobs:
9
+ build:
10
+ runs-on: ubuntu-20.04
11
+ strategy:
12
+ matrix:
13
+ # test against latest update of each major Java version:
14
+ java: [ 8, 11, 17 ]
15
+ name: Java ${{ matrix.java }}
16
+ steps:
17
+ - uses: actions/checkout@v2
18
+ - uses: actions/cache@v1
19
+ with:
20
+ path: ~/.m2/repository
21
+ key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }}
22
+ restore-keys: |
23
+ ${{ runner.os }}-maven-
24
+ - name: Setup java
25
+ uses: actions/setup-java@v1
26
+ with:
27
+ java-version: ${{ matrix.java }}
28
+ - name: Build with Maven
29
+ run: mvn -B clean test javadoc:javadoc package assembly:assembly
TransAntivirus/download_pubchem/opsin-master/.gitignore ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ target/
2
+ opsin-cli/src/main/java/dl/
3
+ .classpath
4
+ .project
5
+ .settings
TransAntivirus/download_pubchem/opsin-master/LICENSE.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ Copyright 2017 Daniel Lowe
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
4
+
5
+ The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
6
+
7
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
TransAntivirus/download_pubchem/opsin-master/README.md ADDED
@@ -0,0 +1,186 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ OPSIN - Open Parser for Systematic IUPAC Nomenclature
2
+ =====================================================
3
+ __Version 2.5.0 (see ReleaseNotes.txt for what's new in this version)__
4
+ __Source code: <https://github.com/dan2097/opsin>__
5
+ __Web interface and informational site: <http://opsin.ch.cam.ac.uk/>__
6
+ __License: [MIT License](https://opensource.org/licenses/MIT)__
7
+
8
+ OPSIN is a Java library for IUPAC name-to-structure conversion offering high recall and precision on organic chemical nomenclature.
9
+
10
+ Java 7 (or higher) is required for OPSIN 2.5.0
11
+
12
+ Supported outputs are SMILES, CML (Chemical Markup Language) and InChI (IUPAC International Chemical Identifier)
13
+
14
+ ### Simple Usage Examples
15
+ #### Convert a chemical name to SMILES
16
+ `java -jar opsin-2.5.0-jar-with-dependencies.jar -osmi input.txt output.txt`
17
+ where input.txt contains chemical name/s, one per line
18
+
19
+ NameToStructure nts = NameToStructure.getInstance();
20
+ String smiles = nts.parseToSmiles("acetonitrile");
21
+
22
+ #### Convert a chemical name to CML
23
+ `java -jar opsin-2.5.0-jar-with-dependencies.jar -ocml input.txt output.txt`
24
+ where input.txt contains chemical name/s, one per line
25
+
26
+ NameToStructure nts = NameToStructure.getInstance();
27
+ String cml = nts.parseToCML("acetonitrile");
28
+
29
+ #### Convert a chemical name to StdInChI/StdInChIKey/InChI with FixedH
30
+ `java -jar opsin-2.5.0-jar-with-dependencies.jar -ostdinchi input.txt output.txt`
31
+ `java -jar opsin-2.5.0-jar-with-dependencies.jar -ostdinchikey input.txt output.txt`
32
+ `java -jar opsin-2.5.0-jar-with-dependencies.jar -oinchi input.txt output.txt`
33
+ where input.txt contains chemical name/s, one per line
34
+
35
+ NameToInchi nti = new NameToInchi()
36
+ String stdinchi = nti.parseToStdInchi("acetonitrile");
37
+ String stdinchikey = nti.parseToStdInchiKey("acetonitrile");
38
+ String inchi = nti.parseToInchi("acetonitrile");
39
+
40
+ NOTE: OPSIN's non-standard InChI includes an additional layer (FixedH) that indicates which tautomer the chemical name described. StdInChI aims to be tautomer independent.
41
+ ### Advanced Usage
42
+ OPSIN 2.5.0 allows enabling of the following options:
43
+
44
+ * allowRadicals: Allows substituents to be interpretable e.g. allows interpretation of "ethyl"
45
+ * wildcardRadicals: If allowRadicals is enabled, this option uses atoms in the output to represent radicals: 'R' in CML and '*' in SMILES e.g. changes the output of ethyl from C[CH2] to CC\*
46
+ * detailedFailureAnalysis: Provides a potentially more accurate reason as to why a chemical name could not be parsed. This is done by parsing the chemical name from right to left. The trade-off for enabling this is slightly increased memory usage.
47
+ * allowAcidsWithoutAcid: Allows interpretation of acids without the word acid e.g. "acetic"
48
+ * allowUninterpretableStereo: Allows stereochemistry uninterpretable by OPSIN to be ignored (When used as a library the OpsinResult has a status of WARNING if stereochemistry was ignored)
49
+ * verbose: Enables debugging output\*
50
+
51
+ \*When used as a library this is done by modifying Log4J's logging level e.g. `Logger.getLogger("uk.ac.cam.ch.wwmm.opsin").setLevel(Level.DEBUG);`
52
+
53
+ The usage of these options on the command line is described in the command line's help dialog accessible via:
54
+ `java -jar opsin-2.5.0-jar-with-dependencies.jar -h`
55
+
56
+ These options may be controlled using the following code:
57
+
58
+ NameToStructure nts = NameToStructure.getInstance();
59
+ NameToStructureConfig ntsconfig = new NameToStructureConfig();
60
+ //a new NameToStructureConfig starts as a copy of OPSIN's default configuration
61
+ ntsconfig.setAllowRadicals(true);
62
+ OpsinResult result = nts.parseChemicalName("acetonitrile", ntsconfig);
63
+ String cml = result.getCml();
64
+ String smiles = result.getSmiles();
65
+ String stdinchi = NameToInchi.convertResultToStdInChI(result);
66
+
67
+ `result.getStatus()` may be checked to see if the conversion was successful.
68
+ If a structure was generated but OPSIN believes there may be a problem a status of WARNING is returned. Currently this may occur if the name appeared to be ambiguous or stereochemistry was ignored.
69
+ By default only optical rotation specification is ignored (this cannot be converted to stereo-configuration algorithmically).
70
+
71
+ Convenience methods like `result.nameAppearsToBeAmbiguous()` may be used to check the cause of the warning.
72
+
73
+ NOTE: (Std)InChI cannot be generated for polymers or radicals generated in combination with the wildcardRadicals option
74
+
75
+ ### Availability
76
+ OPSIN is available as a standalone JAR from GitHub, <https://github.com/dan2097/opsin/releases>
77
+ `opsin-2.5.0-jar-with-dependencies.jar` can be executed as a commandline application or added to the classpath for library usage.
78
+ OPSIN is also available from the Maven Central Repository for users of Apache Maven.
79
+
80
+ If you are using Maven then add the following to your pom.xml:
81
+
82
+ <dependency>
83
+ <groupId>uk.ac.cam.ch.opsin</groupId>
84
+ <artifactId>opsin-core</artifactId>
85
+ <version>2.5.0</version>
86
+ </dependency>
87
+
88
+ If you need just CML or SMILES output support
89
+
90
+ or
91
+
92
+ <dependency>
93
+ <groupId>uk.ac.cam.ch.opsin</groupId>
94
+ <artifactId>opsin-inchi</artifactId>
95
+ <version>2.5.0</version>
96
+ </dependency>
97
+
98
+ if you also need InChI output support.
99
+
100
+ #### Building from source
101
+ To build OPSIN from source, download Maven 3 and download OPSIN's source code.
102
+
103
+ Running `mvn package assembly:assembly` in the root of OPSIN's source will build the jar with dependencies
104
+
105
+ Running `mvn assembly:assembly` in the opsin-core folder will build the "excludingInChI-jar-with-dependencies"
106
+
107
+ ### About OPSIN
108
+
109
+ The workings of OPSIN are more fully described in:
110
+
111
+ Chemical Name to Structure: OPSIN, an Open Source Solution
112
+ Daniel M. Lowe, Peter T. Corbett, Peter Murray-Rust, Robert C. Glen
113
+ Journal of Chemical Information and Modeling 2011 51 (3), 739-753
114
+
115
+ If you use OPSIN in your work, then it would be great if you could cite us.
116
+
117
+ The following list broadly summarises what OPSIN can currently do and what will be worked on in the future.
118
+
119
+ #### Supported nomenclature includes:
120
+ * alkanes/alkenes/alkynes/heteroatom chains e.g. hexane, hex-1-ene, tetrasiloxane and their cyclic analogues e.g. cyclopropane
121
+ * All IUPAC 1993 recommended rings
122
+ * Trivial acids
123
+ * Hantzsch-Widman e.g. 1,3-oxazole
124
+ * Spiro systems
125
+ * All von Baeyer rings e.g. bicyclo[2.2.2]octane
126
+ * Hydro e.g. 2,3-dihydropyridine
127
+ * Indicated hydrogen e.g. 1H-benzoimidazole
128
+ * Heteroatom replacement
129
+ * Specification of charge e.g. ium/ide/ylium/uide
130
+ * Multiplicative nomenclature e.g. ethylenediaminetetraacetic acid
131
+ * Conjunctive nomenclature e.g. cyclohexaneethanol
132
+ * Fused ring systems e.g. imidazo[4,5-d]pyridine
133
+ * Ring assemblies e.g. biphenyl
134
+ * Most prefix and infix functional replacement nomenclature
135
+ * The following functional classes: acetals, acids, alcohols, amides, anhydrides, anilides, azetidides, azides, bromides, chlorides,
136
+ cyanates, cyanides, esters, di/tri/tetra esters, ethers, fluorides, fulminates, glycol ethers, glycols, hemiacetals, hemiketal,
137
+ hydrazides, hydrazones, hydrides, hydroperoxides, hydroxides, imides, iodides, isocyanates, isocyanides, isoselenocyanates, isothiocyanates,
138
+ ketals, ketones, lactams, lactims, lactones, mercaptans, morpholides, oxides, oximes, peroxides, piperazides, piperidides, pyrrolidides,
139
+ selenides, selenocyanates, selenoketones, selenolsselenosemicarbazones, selenones, selenoxides, selones, semicarbazones, sulfides, sulfones,
140
+ sulfoxides, sultams, sultims, sultines, sultones, tellurides, telluroketones, tellurones, tellurosemicarbazones, telluroxides, thiocyanates,
141
+ thioketones, thiols, thiosemicarbazones
142
+ * Greek letters
143
+ * Lambda convention
144
+ * Amino Acids and derivatives
145
+ * Structure-based polymer names e.g. poly(2,2'-diamino-5-hexadecylbiphenyl-3,3'-diyl)
146
+ * Bridge prefixes e.g. methano
147
+ * Specification of oxidation numbers and charge on elements
148
+ * Perhalogeno terms
149
+ * Subtractive prefixes: deoxy, dehydro, anhydro, demethyl, deamino
150
+ * Stoichiometry ratios and mixture indicators
151
+ * Nucleosides, (oligo)nucleotides and their esters
152
+ * Carbohydrate nomenclature
153
+ * Simple CAS names including inverted CAS names
154
+ * Steroids including alpha/beta stereochemistry
155
+ * Isotopic labelling
156
+ * E/Z/R/S stereochemistry
157
+ * cis/trans indicating relative stereochemistry on rings and as a synonym of E/Z
158
+
159
+ #### Currently UNsupported nomenclature includes:
160
+ * Other less common stereochemical terms
161
+ * Most alkaloids/terpenoids
162
+ * Natural product specific nomenclature operations
163
+
164
+ ### Developers and Contributors
165
+ * Rich Apodaca
166
+ * Albina Asadulina
167
+ * Peter Corbett
168
+ * Daniel Lowe (Current maintainer)
169
+ * John Mayfield
170
+ * Peter Murray-Rust
171
+ * Noel O'Boyle
172
+ * Mark Williamson
173
+
174
+ Thanks also to the many users who have contributed through suggestions and bug reporting.
175
+
176
+ ![YourKit Logo](https://www.yourkit.com/images/yklogo.png)
177
+
178
+ OPSIN's developers use YourKit to profile and optimise code.
179
+
180
+ YourKit supports open source projects with its full-featured Java Profiler.
181
+ YourKit, LLC is the creator of [YourKit Java Profiler](https://www.yourkit.com/java/profiler/index.jsp) and [YourKit .NET Profiler](https://www.yourkit.com/.net/profiler/index.jsp), innovative and intelligent tools for profiling Java and .NET applications.
182
+
183
+ Good Luck and let us know if you have problems, comments or suggestions!
184
+ Bugs may be reported on the project's [issue tracker](https://github.com/dan2097/opsin/issues).
185
+
186
+ ![Build Status](https://github.com/dan2097/opsin/workflows/Java%20CI%20with%20Maven/badge.svg)
TransAntivirus/download_pubchem/opsin-master/ReleaseNotes.txt ADDED
@@ -0,0 +1,332 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Version 2.5.0 (2020-10-04)
2
+ OPSIN now requires Java 7 (or higher)
3
+ Support for traditional oxidation state names e.g. ferric
4
+ Added support for defining the stereochemistry of phosphines/arsines
5
+ Added newly discovered elements
6
+ Improved algorithm for correctly interpreting ester names with a missing space e.g. 3-aminophenyl-4-aminobenzenesulfonate
7
+ Fixed structure of canavanine
8
+ Corrected interpretation of silver oxide
9
+ Vocabulary improvements
10
+ Minor improvements/bug fixes
11
+
12
+ Internal XML Changes:
13
+ tokenList files now all use the same schema (tokenLists.dtd)
14
+
15
+ Version 2.4.0 (2018-12-23)
16
+ OPSIN is now licensed under the MIT License
17
+ Locant labels included in extended SMILES output
18
+ Command-line now has a name flag to include the input name in SMILES/InChI output (tab delimited)
19
+ Added support for carotenoids
20
+ Added support for Vitamin B-6 related compounds
21
+ Added support for more fused ring system bridge prefixes
22
+ Added support for anilide as a functional replacement group
23
+ Allow heteroatom replacement as a detachable prefix e.g. 3,6,9-triaza-2-(4-phenylbutyl)undecanoic acid
24
+ Support Boughton system isotopic suffixes for 13C/14C/15N/17O/18O
25
+ Support salts of acids in CAS inverted names
26
+ Improved support for implicitly positively charged purine nucleosides/nucleotides
27
+ Added various biochemical groups/substituents
28
+ Improved logic for determining intended substitution in names with too few brackets
29
+ Incorrectly capitalized locants can now be used to reference ring fusion atoms
30
+ Some names no longer allow substitution e.g. water, hydrochloride
31
+ Many minor precision/recall improvements
32
+
33
+ Version 2.3.1 (2017-07-23)
34
+ Fixed fused ring numbering algorithm incorrectly numbering some ortho- and peri-fused fused systems involving 7-membered rings
35
+ Support P-thio to indicate thiophosphate linkage
36
+ Count of isotopic replacements no longer required if locants given
37
+ Fixed bug where CIP algorithm could assign priorities to identical substituents
38
+ Fixed "DL" before a substituent not assigning the substituted alpha-carbon as racemic stereo
39
+ L-stereochemistry no longer assumed on semi-systematic glycine derivatives e.g. phenylglycine
40
+ Fixed some cases where substituents like carbonyl should have been part of an implicitly bracketed section
41
+ Fixed interpretation of leucinic acid and 3/4/5-pyrazolone
42
+
43
+ Version 2.3.0 (2017-02-23)
44
+ D/L stereochemistry can now be assigned algorithmically e.g. L-2-aminobutyric acid
45
+ Other minor improvements to amino acid support e.g. homoproline added
46
+ Extended SMILES added to command-line interface
47
+ Names intended to include the triiodide/tribromide anion no longer erroneously have three monohalides
48
+ Ambiguity detected when applying unlocanted subtractive prefixes
49
+ Better support for adjacent multipliers e.g. ditrifluoroacetic acid
50
+ deoxynucleosides are now implicitly 2'-deoxynucleosides
51
+ Added support for <number> as a syntax for a superscripted number
52
+ Added support for amidrazones
53
+ Aluminium hydrides/chlorides/bromides/iodides are now covalently bonded
54
+ Fixed names with isotopes less than 10 not being supported
55
+ Fixed interpretation of some trivial names that clash with systematic names
56
+
57
+ Version 2.2.0 (2016-10-16)
58
+ Added support for IUPAC system for isotope specification e.g. (3-14C,2,2-2H2)butane
59
+ Added support for specifying deuteration using the Boughton system e.g. butane-2,2-d2
60
+ Added support for multiplied bridges e.g. 1,2:3,4-diepoxy
61
+ Front locants after a von baeyer descriptor are now supported e.g. bicyclo[2.2.2]-7-octene
62
+ onosyl substituents now supported e.g. glucuronosyl
63
+ More sugar substituents e.g. glucosaminyl
64
+ Improved support for malformed polycyclic spiro names
65
+ Support for oximino as a suffix
66
+ Added method [NameToStructure.getVersion()] to retrieve OPSIN version number
67
+ Allowed bridges to be used as detachable prefixes
68
+ Allow odd numbers of hydro to be added e.g. trihydro
69
+ Added support for unbracketed R stereochemistry (but not S, for the moment, due to the ambiguity with sulfur locants)
70
+ Various minor bug fixes e.g. stereochemistry was incorrect for isovaline
71
+ Minor vocabulary improvements
72
+
73
+ Version 2.1.0 (2016-03-12)
74
+ Added support for fractional multipliers e.g. hemihydrochloride
75
+ Added support for abbreviated common salts e.g. HCl
76
+ Added support for sandwich compounds e.g. ferrocene
77
+ Improved recognition of names missing the last 'e' (common in German)
78
+ Support for E/Z directly before double bond indication e.g. 2Z-ylidene, 2Z-ene
79
+ Improved support for functional class ethers e.g. "glycerol triglycidyl ether"
80
+ Added general support for names involving an ester formed from an alcohol and an ate group
81
+ Grignards reagents and certain compounds (e.g. uranium hexafluoride), are now treated as covalent rather than ionic
82
+ Added experimental support for outputting extended SMILES. Polymers and attachment points are annotated explicitly
83
+ Polymers when output as SMILES now have atom classes to indicate which end of the repeat unit is which
84
+ Support * as a superscript indicator e.g. *6* to mean superscript 6
85
+ Improved recognition of racemic stereochemistry terms
86
+ Added general support for names like "beta-alanine N,N-diacetic acid"
87
+ Allowed "one" and "ol" suffixes to be used in more cases where another suffix is also present
88
+ "ic acid halide" is not interpreted the same as "ic halide"
89
+ Fixed some cases where ambiguous operations were not considered ambiguous e.g. monosubstitututed phenyl
90
+ Improvements/bug fixes to heuristics for detecting when spaces are omitted from ether/ester names
91
+ Improved support for stereochemistry in older CAS index names
92
+ Many precision improvements e.g. cyclotriphosphazene, thiazoline, TBDMS/TBDPS protecting groups, S-substituted-methionine
93
+ Various minor bug fixes e.g. names containing "SULPH" not recognized
94
+ Minor vocabulary improvements
95
+
96
+ Internal XML Changes:
97
+ Synonymns of the same concept are now or-ed rather being seperate entities e.g. <token>tertiary|tert-|t-</token>
98
+
99
+ Version 2.0.0 (2015-07-10)
100
+ MAJOR CHANGES:
101
+ Requires Java 1.6 or higher
102
+ CML (Chemical Markup Language) is now returned as a String rather than a XOM Element
103
+ OPSIN now attempts to identify if a chemical name is ambiguous. Names that appear ambiguous return with a status of WARNING with the structure provided being one interpretation of the name
104
+
105
+ Added support for "alcohol esters" e.g. phenol acetate [meaning phenyl acetate]
106
+ Multiplied unlocanted substitution is now more intelligent e.g. all substituents must connect to same group, and degeneracy of atom environments is taken into account
107
+ The ester interpretation is now preferred in more cases where a name does not contain a space but the parent is methanoate/ethanoate/formate/acetate/carbamate
108
+ Inorganic oxides are now interpreted, yielding structures with [O-2] ions
109
+ Added more trivial names of simple molecules
110
+ Support for nitrolic acids
111
+ Fixed parsing issue where a directly substituted acetal was not interpretable
112
+ Fixed certain groups e.g. phenethyl, not having their suffix attached to a specific location
113
+ Corrected interpretation of xanthyl, and various trivial names that look systematic
114
+ Name to structure is now ~20% faster
115
+ Initialisation time reduced by a third
116
+ InChI generation is now ~20% faster
117
+ XML processing dependency changed from XOM to Woodstox
118
+ Significant internal refactoring
119
+ Utility functions designed for internal use are no longer on the public API
120
+ Various minor bug fixes
121
+
122
+ Internal XML Changes:
123
+ Groups lacking a labels attribute now have no locants (previously had ascending numeric locants)
124
+ Syntax for addGroup/addHeteroAtom/addBond attributes changed to be easier to parse and allow specification of whether the name is ambiguous if a locant is not provided
125
+
126
+ Version 1.6.0 (2014-04-26)
127
+ Added API/command-line options to generate StdInchiKeys
128
+ Added support for the IUPAC recommended nomenclature for carbobohydrate lactones
129
+ Added support for boronic acid pinacol esters
130
+ Added basic support for specifying chalcogen acid tautomer form e.g. thioacetic S-acid
131
+ Fused ring bridges are now numbered
132
+ Names with Endo/Exo/Syn/Anti stereochemistry can now be partially interpreted if warnRatherThanFailOnUninterpretableStereochemistry is used
133
+ The warnRatherThanFailOnUninterpretableStereochemistry option will now assign as much stereochemistry as OPSIN understands (All ignored stereochemistry terms are mentioned in the OpsinResult message)
134
+ Many minor nomenclature support improvements e.g. succinic imide; hexaldehyde; phenyldiazonium, organotrifluoroborates etc.
135
+ Added more trivial names that can be confused with systematic names e.g. Imidazolidinyl urea
136
+ Fixed StackOverFlowError that could occur when processing molecules with over 5000 atoms
137
+ Many minor bug fixes
138
+ Minor vocabulary improvements
139
+ Minor speed improvements
140
+ NOTE: This is the last release to support Java 1.5
141
+
142
+ Version 1.5.0 (2013-07-21)
143
+ Command line interface now accepts files to read and write to as arguments
144
+ Added option to allow interpretation of acids missing the word acid e.g. "acetic" (off by default)
145
+ Added option to treat uninterpretable stereochemistry as a warning rather than a failure (off by default)
146
+ Added support for nucleotide chains e.g. guanylyl(3'-5')uridine
147
+ Added support for parabens, azetidides, morpholides, piperazides, piperidides and pyrrolidides
148
+ Vocabulary improvements e.g. homo/beta amino acids
149
+ Many minor bug fixes e.g. fulminic acid correctly interpreted
150
+
151
+ Version 1.4.0 (2013-01-27)
152
+ Added support for dialdoses,diketoses,ketoaldoses,alditols,aldonic acids,uronic acids,aldaric acids,glycosides,oligosacchardides, named systematically or from trivial stems, in cyclic or acyclic form
153
+ Added support for ketoses named using dehydro
154
+ Added support for anhydro
155
+ Added more trivial carbohydrate names
156
+ Added support for sn-glcyerol
157
+ Improved heuristics for phospho substitution
158
+ Added hydrazido and anilate suffixes
159
+ Allowed more functional class nomenclature to apply to amino acids
160
+ Added support for inverting CAS names with substituted functional terms e.g. Acetaldehyde, O-methyloxime
161
+ Double substitution of a deoxy chiral centre now uses the CIP rules to decide which substituent replaced the hydroxy group
162
+ Unicode right arrows, superscripts and the soft hyphen are now recognised
163
+
164
+ Version 1.3.0 (2012-09-16)
165
+ Added option to output radicals as R groups (* in SMILES)
166
+ Added support for carbolactone/dicarboximide/lactam/lactim/lactone/olide/sultam/sultim/sultine/sultone suffixes
167
+ Resolved some cases of ambiguity in the grammar; the program's capability to handle longer peptide names is improved
168
+ Allowed one (as in ketone) before yl e.g. indol-2-on-3-yl
169
+ Allowed primed locants to be used as unprimed locants in a bracket e.g. 2-(4'-methylphenyl)pyridine
170
+ Vocabulary improvements
171
+ SMILES writer will no longer reuse ring closures on the same atom
172
+ Fixed case where a name formed of many words that could be parsed ambiguously would cause OPSIN to run out of memory
173
+ NameToStructure.getInstance() no longer throws a checked exception
174
+ Many minor bug fixes
175
+
176
+ Version 1.2.0 (2011-12-06)
177
+ OPSIN is now available from Maven Central
178
+ Basic support for cylised carbohydrates e.g. alpha-D-glucopyranose
179
+ Basic support for systematic carbohydrate stems e.g. D-glycero-D-gluco-Heptose
180
+ Added heuristic for correcting esters with omitted spaces
181
+ Added support for xanthates/xanthic acid
182
+ Minor vocabulary improvements
183
+ Fixed a few minor bugs/limitations in the Cahn-Ingold-Prelog rules implementation and made more memory efficient
184
+ Many minor improvements and bug fixes
185
+
186
+ Version 1.1.0 (2011-06-16)
187
+ Significant improvements to fused ring numbering code, specifically 3/4/5/7/8 member rings are no longer only allowed in chains of rings
188
+ Added support for outputting to StdInChI
189
+ Small improvements to fused ring building code
190
+ Improvements to heuristics for disambiguating what group is being referred to by a locant
191
+ Lower case indicated hydrogen is now recognised
192
+ Improvements to parsing speed
193
+ Many minor improvements and bug fixes
194
+
195
+ Version 1.0.0 (2011-03-09)
196
+ Added native isomeric SMILES output
197
+ Improved command-line interface. The desired format i.e. CML/SMILES/InChI as well as options such as allowing radicals can now all be specified via flags
198
+ Debugging is now performed using log4j rather than by passing a verbose flag
199
+ Added traditional locants to carboxylic acids and alkanes e.g. beta-hydroxybutyric acid
200
+ Added support for cis/trans indicating the relative stereochemistry of two substituents on rings and fused rings sytems
201
+ Added support for stoichiometry ratios and mixture indicators
202
+ Added support for alpha/beta stereochemistry on steroids
203
+ Added support for the method for naming spiro systems described in the 1979 recommendations rule A-42
204
+ Added detailedFailureAnalysis option to detect the part of a chemical name that fails to parse
205
+ Added support for deoxy
206
+ Added open-chain saccharides
207
+ Improvements to CAS index name uninversion algorithm
208
+ Added support for isotopes into the program allowing deuterio/tritio
209
+ Added support for R/S stereochemistry indicated by a locant which is also used to indicate the point of substitution for a substituent
210
+ Many minor improvements and bug fixes
211
+
212
+ Version 0.9.0 (2010-11-01)
213
+ Added transition metals/f-block elements and nobel gases
214
+ Added support for specifying the charge or oxidation number on elements e.g. aluminium(3+), iron(II)
215
+ Calculations based off a van Arkel diagram are now used to determine whether functional bonds to metals should be treated as ionic or covalent
216
+ Improved support for prefix functional replacement e.g. hydrazono/amido/imido/hydrazido/nitrido/pseudohalides can now be used for functional replacement on appropriate acids
217
+ Ortho/meta/para handling improved - can now only apply to six membered rings
218
+ Added support for methylenedioxy
219
+ Added support for simple bridge prefixes e.g. methano as in 2,3-methanoindene
220
+ Added support for perfluoro/perchloro/perbromo/periodo
221
+ Generalised alkane support to allow alkanes of lengths up to 9999 to be described without enumeration
222
+ Updated dependency on JNI-InChI to 0.7, hence InChI 1.03 is now used.
223
+ Improved algorithm for assigning unlocanted hydro terms
224
+ Improved heuristic for determing meaning of oxido
225
+ Improved charge balancing e.g. ionic substance of an implicit ratio 2:3 can now be handled rather than being represented as a net charged 1:1 mixture
226
+ Grammar is a bit more lenient of placement of stereochemistry and multipliers
227
+ Vocabulary improvements especially in the area of nucleosides and nucleotides
228
+ Esters of biochemical compounds e.g. triphosphates are now supported
229
+ Many minor improvements and bug fixes
230
+
231
+ Version 0.8.0 (2010-07-16)
232
+ NameToStructureConfig can now be used to configure whether radicals e.g. ethyl are output or not.
233
+ Names like carbon tetrachloride are now supported
234
+ glycol ethers e.g. ethylene glycol ethyl ether are now supported
235
+ Prefix functional replacement support now includes halogens e.g. chlorophosphate
236
+ Added support for epoxy/epithio/episeleno/epitelluro
237
+ Added suport for hydrazides/fluorohydrins/chlorohydrins/bromohydrins/iodohydrins/cyanohydrins/acetals/ketals/hemiacetals/hemiketals/diketones/disulfones named using functional class nomenclature
238
+ Improvements to algorithm for assigning and finding atoms corresponding to element symbol locants
239
+ Added experimental right to left parser (ReverseParseRules.java)
240
+ Vocabulary improvements
241
+ Parsing is now even faster
242
+ Various bug fixes and name intepretation fixes
243
+
244
+ Version 0.7.0 (2010-06-09)
245
+ Added full support for conjunctive nomenclature e.g. 1,3,5-benzenetriacetic acid
246
+ Added basic support for CAS names
247
+ Added trivial poly-noncarboxylic acids and more trivial carboxylic acids
248
+ Added support for spirobi/spiroter/dispiroter and the majority of spiro(ring-locant-ring) nomenclature
249
+ Indicators of the direction that a chemical rotates plane polarised light are now detected and ignored
250
+ Fixed many cases of trivial names being interpreted systematically by adding more trivial names and detecting such cases
251
+ Names such as oxalic bromide cyanide where a halide/pseudohalide replaces an oxygen are now supported
252
+ Amino acid ester named from the neutral amino acid are now supported e.g. glycine ethyl ester
253
+ Added more heteroatom replacement terms
254
+ Allowed creation of an OPSIN parse through NameToStructure.getOpsinParser()
255
+ Added support for dehydro - for unsaturating bonds
256
+ Improvements to element symbol locant assignment and retrieving appropriate atoms from locants like N2
257
+ OPSIN's SMILES parser now accept specification of number of hydrogens in cases other than chiral atoms
258
+ Mixtures specified by separating components by semicolonspace are now supported
259
+ Many internal improvements and bug fixes
260
+
261
+ Version 0.6.1 (2010-03-18)
262
+ Counter ions are now duplicated such as to lead to if possible a neutral compound
263
+ In names like nitrous amide the atoms modified by the functional replacement can now be substituted
264
+ Allowed ~number~ for specifying superscripts
265
+ Vocabulary improvements
266
+ Added quinone suffix
267
+ Tetrahedral sulfur stereochemistry is now recognised
268
+ Bug fixes to fix incorrect interpretation of some names e.g. triphosgene is now unparseable rather than 3 x phosghene, phospho has different meanings depending on whether it used on an amino acid or another group etc.
269
+
270
+ Version 0.6.0 (2010-02-18)
271
+ OPSIN is now a mavenised project consisting of two modules: core and inchi. Core does name -->CML, inchi depends on core and allows conversion to inchi
272
+ Instead of CML an OpsinResult can be returned which can yield information as to why a name was not interpretable
273
+ Added support for unlocanted R/S/E/Z stereochemistry. Removed limit on number of atoms that stereochemistry code can handle
274
+ Added support for polymers e.g. poly(ethylene)
275
+ Improvements in handling of multiplicative nomenclature
276
+ Improvements to fusion nomenclature handling: multiplied components and multi parent systems are now supported
277
+ Improved support for functional class nomenclature; space detection has been improved and support has been added for anhydride,oxide,oxime,hydrazone,semicarbazone,thiosemicarbazone,selenosemicarbazone,tellurosemicarbazone,imide
278
+ Support for the lambda convention
279
+ Locanted esters
280
+ Improvements in dearomatisation code
281
+ CML output changed to being CML-Lite compliant
282
+ Speed improvements
283
+ Support for greek letters e.g. as alpha or $a or α
284
+ Added more infixes
285
+ Added more suffixes
286
+ Vocabulary improvements
287
+ Systematic handling of amino acid nomenclature
288
+ Added support for perhydro
289
+ Support for ylium/uide
290
+ Support for locants like N-1 (instead of N1)
291
+ Fixed potential infinite loop in fused ring numbering
292
+ Made grammar more lenient in many places e.g. euphonic o, optional sqaure brackets
293
+ Sulph is now treated like sulf as in sulphuric acid
294
+ and many misc fixes and improvements
295
+
296
+ Version 0.5.3 (2009-10-22)
297
+ Added support for amic, aldehydic, anilic, anilide, carboxanilide and amoyl suffixes
298
+ Added support for cyclic imides e.g. succinimide/succinimido
299
+ Added support for amide functional class
300
+ Support for locants such as N5 which means a nitrogen that is attached in some way to position 5. Locants of this type may also be used in ester formation.
301
+ Some improvements to functional replacement using prefixes e.g. thioethanoic acid now works
302
+ Disabled stereochemistry in molecules with over 300 atoms as a temporary fix to the problem in 0.52
303
+ Slight improvement in method for deciding which group detachable hydro prefixes apply to.
304
+ Minor vocabulary update
305
+
306
+ Version 0.5.2 (2009-10-04)
307
+ Outputting directly to InChI is now supported using the separately available nameToInchi jar (an OPSIN jar is expected in the same location as the nameToInchi jar)
308
+ Fused rings with any number of rings in a chain or formed entirely of 6 membered rings can now be numbered
309
+ Added support for E/Z/R/S where locants are given. Unlocanted cases will be dealt with in a subsequent release. In very large molecules a lack of memory may be encountered, this will be resolved in a subsequent release
310
+ Some Infixes are now supported e.g. ethanthioic acid
311
+ All spiro systems with Von Baeyer brackets are now supported e.g. dispiro[4.2.4.2]tetradecane
312
+ Vocabulary increase (especially: terpenes, ingorganic acids, fused ring components)
313
+ Fixed some problems with components with both acylic and cyclic sections e.g. trityl
314
+ Improved locant assignments e.g. 2-furyl is now also fur-2-yl
315
+ Speed improvements
316
+ Removed dependence on Nux/Saxon
317
+ Misc minor fixes
318
+
319
+ Version 0.5.1 (2009-07-20)
320
+ Huge reduction in OPSIN initialisation time (typical ~7 seconds -->800ms)
321
+ Allowed thio/seleno/telluro as divalent linkers and for functional replacement when used as prefixes. Peroxy can now be used for functional replacement
322
+ Better support for semi-trivally named hydrocarbon fused rings e.g. tetracene
323
+ Better handling of carbonic acid derivatives
324
+ Improvements to locant assignment
325
+ Support for names like triethyltetramine and triethylene glycol
326
+ Misc other fixes to prevent OPSIN generating the wrong structure for certain types of names
327
+
328
+ Version 0.5 (2009-06-23)
329
+ Too many changes to list
330
+
331
+ Version 0.1 (2006-10-11)
332
+ Initial release
TransAntivirus/download_pubchem/opsin-master/fullAssembly.xml ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <assembly>
2
+ <id>jar-with-dependencies</id>
3
+ <formats>
4
+ <format>jar</format>
5
+ </formats>
6
+ <moduleSets>
7
+ <moduleSet>
8
+ <includes>
9
+ <include>uk.ac.cam.ch.opsin:opsin-core</include>
10
+ <include>uk.ac.cam.ch.opsin:opsin-inchi</include>
11
+ <include>uk.ac.cam.ch.opsin:opsin-cli</include>
12
+ </includes>
13
+ <sources>
14
+ <includeModuleDirectory>false</includeModuleDirectory>
15
+ <fileSets>
16
+ <fileSet>
17
+ <directory>src/main/java</directory>
18
+ </fileSet>
19
+ </fileSets>
20
+ </sources>
21
+ <binaries>
22
+ <unpack>true</unpack>
23
+ </binaries>
24
+ </moduleSet>
25
+ </moduleSets>
26
+ <files>
27
+ <file>
28
+ <source>LICENSE.txt</source>
29
+ </file>
30
+ <file>
31
+ <source>README.md</source>
32
+ </file>
33
+ <file>
34
+ <source>ReleaseNotes.txt</source>
35
+ </file>
36
+ </files>
37
+ <includeBaseDirectory>false</includeBaseDirectory>
38
+ <dependencySets>
39
+ <dependencySet>
40
+ <unpack>true</unpack>
41
+ <scope>runtime</scope>
42
+ </dependencySet>
43
+ </dependencySets>
44
+ <fileSets>
45
+ <fileSet>
46
+ <directory>${project.build.outputDirectory}</directory>
47
+ </fileSet>
48
+ </fileSets>
49
+ </assembly>
TransAntivirus/download_pubchem/opsin-master/opsin-cli/pom.xml ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
2
+ <modelVersion>4.0.0</modelVersion>
3
+ <parent>
4
+ <artifactId>opsin</artifactId>
5
+ <groupId>uk.ac.cam.ch.opsin</groupId>
6
+ <version>3.0-SNAPSHOT</version>
7
+ </parent>
8
+ <artifactId>opsin-cli</artifactId>
9
+ <name>OPSIN Command Line interface</name>
10
+ <description>Command line interface for using OPSIN to convert names to SMILES/InChI/InChIKey/CML</description>
11
+ <build>
12
+ <plugins>
13
+ <plugin>
14
+ <groupId>org.apache.maven.plugins</groupId>
15
+ <artifactId>maven-shade-plugin</artifactId>
16
+ <version>3.2.4</version>
17
+ <executions>
18
+ <execution>
19
+ <phase>package</phase>
20
+ <goals>
21
+ <goal>shade</goal>
22
+ </goals>
23
+ <configuration>
24
+ <finalName>opsin-${project.version}</finalName>
25
+ <createDependencyReducedPom>false</createDependencyReducedPom>
26
+ <transformers>
27
+ <transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
28
+ <manifestEntries>
29
+ <Main-Class>uk.ac.cam.ch.wwmm.opsin.Cli</Main-Class>
30
+ </manifestEntries>
31
+ </transformer>
32
+ </transformers>
33
+ </configuration>
34
+ </execution>
35
+ </executions>
36
+ </plugin>
37
+ </plugins>
38
+ </build>
39
+ <dependencies>
40
+ <dependency>
41
+ <groupId>uk.ac.cam.ch.opsin</groupId>
42
+ <artifactId>opsin-inchi</artifactId>
43
+ </dependency>
44
+ <dependency>
45
+ <groupId>commons-cli</groupId>
46
+ <artifactId>commons-cli</artifactId>
47
+ </dependency>
48
+ <dependency>
49
+ <groupId>org.apache.logging.log4j</groupId>
50
+ <artifactId>log4j-core</artifactId>
51
+ </dependency>
52
+ <dependency>
53
+ <groupId>org.junit.jupiter</groupId>
54
+ <artifactId>junit-jupiter</artifactId>
55
+ <scope>test</scope>
56
+ </dependency>
57
+ </dependencies>
58
+ </project>
TransAntivirus/download_pubchem/opsin-master/opsin-cli/src/main/java/uk/ac/cam/ch/wwmm/opsin/Cli.java ADDED
@@ -0,0 +1,268 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ package uk.ac.cam.ch.wwmm.opsin;
2
+
3
+ import java.io.BufferedReader;
4
+ import java.io.BufferedWriter;
5
+ import java.io.File;
6
+ import java.io.FileInputStream;
7
+ import java.io.FileOutputStream;
8
+ import java.io.IOException;
9
+ import java.io.InputStream;
10
+ import java.io.InputStreamReader;
11
+ import java.io.OutputStream;
12
+ import java.io.OutputStreamWriter;
13
+ import java.lang.reflect.Method;
14
+ import java.nio.charset.StandardCharsets;
15
+
16
+ import javax.xml.stream.XMLOutputFactory;
17
+ import javax.xml.stream.XMLStreamException;
18
+ import javax.xml.stream.XMLStreamWriter;
19
+
20
+ import org.apache.commons.cli.CommandLine;
21
+ import org.apache.commons.cli.CommandLineParser;
22
+ import org.apache.commons.cli.DefaultParser;
23
+ import org.apache.commons.cli.HelpFormatter;
24
+ import org.apache.commons.cli.Option;
25
+ import org.apache.commons.cli.Option.Builder;
26
+ import org.apache.commons.cli.Options;
27
+ import org.apache.commons.cli.UnrecognizedOptionException;
28
+ import org.apache.logging.log4j.Level;
29
+ import org.apache.logging.log4j.core.config.Configurator;
30
+
31
+ import com.ctc.wstx.api.WstxOutputProperties;
32
+ import com.ctc.wstx.stax.WstxOutputFactory;
33
+
34
+ public class Cli {
35
+
36
+ private enum InchiType {
37
+ inchiWithFixedH, stdInchi, stdInchiKey
38
+ }
39
+
40
+ /**
41
+ * Run OPSIN as a command-line application.
42
+ *
43
+ * @param args
44
+ * @throws Exception
45
+ */
46
+ public static void main(String[] args) throws Exception {
47
+ Options options = buildCommandLineOptions();
48
+ CommandLineParser parser = new DefaultParser();
49
+ CommandLine cmd = null;
50
+ try {
51
+ cmd = parser.parse(options, args);
52
+ } catch (UnrecognizedOptionException e) {
53
+ System.err.println(e.getMessage());
54
+ System.exit(1);
55
+ }
56
+ if (cmd.hasOption("h")) {
57
+ displayUsage(options);
58
+ }
59
+ if (cmd.hasOption("v")) {
60
+ Configurator.setLevel("uk.ac.cam.ch.wwmm.opsin", Level.DEBUG);
61
+ }
62
+
63
+ NameToStructureConfig n2sconfig = generateOpsinConfigObjectFromCmd(cmd);
64
+
65
+ InputStream input = System.in;
66
+ OutputStream output = System.out;
67
+ String[] unparsedArgs = cmd.getArgs();
68
+ if (unparsedArgs.length == 0) {
69
+ System.err.println("Run the jar using the -h flag for help. Enter a chemical name to begin:");
70
+ } else if (unparsedArgs.length == 1) {
71
+ input = new FileInputStream(new File(unparsedArgs[0]));
72
+ } else if (unparsedArgs.length == 2) {
73
+ input = new FileInputStream(new File(unparsedArgs[0]));
74
+ output = new FileOutputStream(new File(unparsedArgs[1]));
75
+ } else {
76
+ displayUsage(options);
77
+ }
78
+ try {
79
+ String outputType = cmd.getOptionValue("o", "smi");
80
+ boolean outputName = cmd.hasOption("n");
81
+ if (outputType.equalsIgnoreCase("cml")) {
82
+ interactiveCmlOutput(input, output, n2sconfig);
83
+ } else if (outputType.equalsIgnoreCase("smi") || outputType.equalsIgnoreCase("smiles")) {
84
+ interactiveSmilesOutput(input, output, n2sconfig, false, outputName);
85
+ } else if (outputType.equalsIgnoreCase("inchi")) {
86
+ interactiveInchiOutput(input, output, n2sconfig, InchiType.inchiWithFixedH, outputName);
87
+ } else if (outputType.equalsIgnoreCase("stdinchi")) {
88
+ interactiveInchiOutput(input, output, n2sconfig, InchiType.stdInchi, outputName);
89
+ } else if (outputType.equalsIgnoreCase("stdinchikey")) {
90
+ interactiveInchiOutput(input, output, n2sconfig, InchiType.stdInchiKey, outputName);
91
+ } else if (outputType.equalsIgnoreCase("extendedsmi") || outputType.equalsIgnoreCase("extendedsmiles")
92
+ || outputType.equalsIgnoreCase("cxsmi") || outputType.equalsIgnoreCase("cxsmiles")) {
93
+ interactiveSmilesOutput(input, output, n2sconfig, true, outputName);
94
+ } else {
95
+ System.err.println("Unrecognised output format: " + outputType);
96
+ System.err.println(
97
+ "Expected output types are \"cml\", \"smi\", \"inchi\", \"stdinchi\" and \"stdinchikey\"");
98
+ System.exit(1);
99
+ }
100
+ } finally {
101
+ if (output != System.out) {
102
+ output.close();
103
+ }
104
+ if (input != System.in) {
105
+ input.close();
106
+ }
107
+ }
108
+ }
109
+
110
+ private static void displayUsage(Options options) {
111
+ HelpFormatter formatter = new HelpFormatter();
112
+ String version = NameToStructure.getVersion();
113
+ formatter.printHelp("java -jar opsin-" + (version != null ? version : "[version]")
114
+ + "-jar-with-dependencies.jar [options] [inputfile] [outputfile]" + OpsinTools.NEWLINE
115
+ + "OPSIN converts systematic chemical names to CML, SMILES or InChI/StdInChI/StdInChIKey"
116
+ + OpsinTools.NEWLINE
117
+ + "Names should be new line delimited and may be read from stdin (default) or a file and output to stdout (default) or a file",
118
+ options);
119
+ System.exit(0);
120
+ }
121
+
122
+ private static Options buildCommandLineOptions() {
123
+ Options options = new Options();
124
+ Builder outputBuilder = Option.builder("o");
125
+ outputBuilder.longOpt("output");
126
+ outputBuilder.hasArg();
127
+ outputBuilder.argName("format");
128
+ StringBuilder outputOptionsDesc = new StringBuilder();
129
+ outputOptionsDesc.append("Sets OPSIN's output format (default smi)").append(OpsinTools.NEWLINE);
130
+ outputOptionsDesc.append("Allowed values are:").append(OpsinTools.NEWLINE);
131
+ outputOptionsDesc.append("cml for Chemical Markup Language").append(OpsinTools.NEWLINE);
132
+ outputOptionsDesc.append("smi for SMILES").append(OpsinTools.NEWLINE);
133
+ outputOptionsDesc.append("extendedsmi for Extended SMILES").append(OpsinTools.NEWLINE);
134
+ outputOptionsDesc.append("inchi for InChI (with FixedH)").append(OpsinTools.NEWLINE);
135
+ outputOptionsDesc.append("stdinchi for StdInChI").append(OpsinTools.NEWLINE);
136
+ outputOptionsDesc.append("stdinchikey for StdInChIKey");
137
+ outputBuilder.desc(outputOptionsDesc.toString());
138
+ options.addOption(outputBuilder.build());
139
+ options.addOption("h", "help", false, "Displays the allowed command line flags");
140
+ options.addOption("v", "verbose", false, "Enables debugging");
141
+
142
+ options.addOption("a", "allowAcidsWithoutAcid", false,
143
+ "Allows interpretation of acids without the word acid e.g. \"acetic\"");
144
+ options.addOption("f", "detailedFailureAnalysis", false,
145
+ "Enables reverse parsing to more accurately determine why parsing failed");
146
+ options.addOption("n", "name", false, "Include name in SMILES/InChI output (tab delimited)");
147
+ options.addOption("r", "allowRadicals", false, "Enables interpretation of radicals");
148
+ options.addOption("s", "allowUninterpretableStereo", false,
149
+ "Allows stereochemistry uninterpretable by OPSIN to be ignored");
150
+ options.addOption("w", "wildcardRadicals", false, "Radicals are output as wildcard atoms");
151
+ return options;
152
+ }
153
+
154
+ /**
155
+ * Uses the command line parameters to configure a new NameToStructureConfig
156
+ *
157
+ * @param cmd
158
+ * @return The configured NameToStructureConfig
159
+ */
160
+ private static NameToStructureConfig generateOpsinConfigObjectFromCmd(CommandLine cmd) {
161
+ NameToStructureConfig n2sconfig = new NameToStructureConfig();
162
+ n2sconfig.setInterpretAcidsWithoutTheWordAcid(cmd.hasOption("a"));
163
+ n2sconfig.setDetailedFailureAnalysis(cmd.hasOption("f"));
164
+ n2sconfig.setAllowRadicals(cmd.hasOption("r"));
165
+ n2sconfig.setWarnRatherThanFailOnUninterpretableStereochemistry(cmd.hasOption("s"));
166
+ n2sconfig.setOutputRadicalsAsWildCardAtoms(cmd.hasOption("w"));
167
+ return n2sconfig;
168
+ }
169
+
170
+ private static void interactiveCmlOutput(InputStream input, OutputStream out, NameToStructureConfig n2sconfig) throws IOException, XMLStreamException {
171
+ NameToStructure nts = NameToStructure.getInstance();
172
+ BufferedReader inputReader = new BufferedReader(new InputStreamReader(input, StandardCharsets.UTF_8));
173
+ XMLOutputFactory factory = new WstxOutputFactory();
174
+ factory.setProperty(WstxOutputProperties.P_OUTPUT_ESCAPE_CR, false);
175
+ XMLStreamWriter writer = factory.createXMLStreamWriter(out, "UTF-8");
176
+ writer = new IndentingXMLStreamWriter(writer, 2);
177
+ writer.writeStartDocument();
178
+ CMLWriter cmlWriter = new CMLWriter(writer);
179
+ cmlWriter.writeCmlStart();
180
+ int id = 1;
181
+ String line;
182
+ while ((line = inputReader.readLine()) != null) {
183
+ int splitPoint = line.indexOf('\t');
184
+ String name = splitPoint >= 0 ? line.substring(0, splitPoint) : line;
185
+ OpsinResult result = nts.parseChemicalName(name, n2sconfig);
186
+ Fragment structure = result.getStructure();
187
+ cmlWriter.writeMolecule(structure, name, id++);
188
+ writer.flush();
189
+ if (structure == null) {
190
+ System.err.println(result.getMessage());
191
+ }
192
+ }
193
+ cmlWriter.writeCmlEnd();
194
+ writer.writeEndDocument();
195
+ writer.flush();
196
+ writer.close();
197
+ }
198
+
199
+ private static void interactiveSmilesOutput(InputStream input, OutputStream out, NameToStructureConfig n2sconfig, boolean extendedSmiles, boolean outputName) throws IOException {
200
+ NameToStructure nts = NameToStructure.getInstance();
201
+ BufferedReader inputReader = new BufferedReader(new InputStreamReader(input, StandardCharsets.UTF_8));
202
+ BufferedWriter outputWriter = new BufferedWriter(new OutputStreamWriter(out, StandardCharsets.UTF_8));
203
+ String line;
204
+ while ((line = inputReader.readLine()) != null) {
205
+ int splitPoint = line.indexOf('\t');
206
+ String name = splitPoint >= 0 ? line.substring(0, splitPoint) : line;
207
+ OpsinResult result = nts.parseChemicalName(name, n2sconfig);
208
+ String output = extendedSmiles ? result.getExtendedSmiles() : result.getSmiles();
209
+ if (output == null) {
210
+ System.err.println(result.getMessage());
211
+ } else {
212
+ outputWriter.write(output);
213
+ }
214
+ if (outputName) {
215
+ outputWriter.write('\t');
216
+ outputWriter.write(line);
217
+ }
218
+ outputWriter.newLine();
219
+ outputWriter.flush();
220
+ }
221
+ }
222
+
223
+ private static void interactiveInchiOutput(InputStream input, OutputStream out, NameToStructureConfig n2sconfig, InchiType inchiType, boolean outputName) throws Exception {
224
+ NameToStructure nts = NameToStructure.getInstance();
225
+ BufferedReader inputReader = new BufferedReader(new InputStreamReader(input, StandardCharsets.UTF_8));
226
+ BufferedWriter outputWriter = new BufferedWriter(new OutputStreamWriter(out, StandardCharsets.UTF_8));
227
+ Class<?> c;
228
+ try {
229
+ c = Class.forName("uk.ac.cam.ch.wwmm.opsin.NameToInchi");
230
+ } catch (ClassNotFoundException e) {
231
+ System.err.println("Could not initialise NameToInChI module. Is it on your classpath?");
232
+ throw new RuntimeException(e);
233
+ }
234
+ Method m;
235
+ switch (inchiType) {
236
+ case inchiWithFixedH:
237
+ m = c.getMethod("convertResultToInChI", new Class[] { OpsinResult.class });
238
+ break;
239
+ case stdInchi:
240
+ m = c.getMethod("convertResultToStdInChI", new Class[] { OpsinResult.class });
241
+ break;
242
+ case stdInchiKey:
243
+ m = c.getMethod("convertResultToStdInChIKey", new Class[] { OpsinResult.class });
244
+ break;
245
+ default:
246
+ throw new IllegalArgumentException("Unexepected enum value: " + inchiType);
247
+ }
248
+
249
+ String line;
250
+ while ((line = inputReader.readLine()) != null) {
251
+ int splitPoint = line.indexOf('\t');
252
+ String name = splitPoint >= 0 ? line.substring(0, splitPoint) : line;
253
+ OpsinResult result = nts.parseChemicalName(name, n2sconfig);
254
+ String output = (String) m.invoke(null, result);
255
+ if (output == null) {
256
+ System.err.println(result.getMessage());
257
+ } else {
258
+ outputWriter.write(output);
259
+ }
260
+ if (outputName) {
261
+ outputWriter.write('\t');
262
+ outputWriter.write(line);
263
+ }
264
+ outputWriter.newLine();
265
+ outputWriter.flush();
266
+ }
267
+ }
268
+ }
TransAntivirus/download_pubchem/opsin-master/opsin-cli/src/main/resources/log4j2.xml ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <Configuration status="WARN">
3
+ <Appenders>
4
+ <Console name="Console" target="SYSTEM_ERR">
5
+ <PatternLayout pattern="%level - %m%n"/>
6
+ </Console>
7
+ </Appenders>
8
+ <Loggers>
9
+ <Root level="warn">
10
+ <AppenderRef ref="Console"/>
11
+ </Root>
12
+ </Loggers>
13
+ </Configuration>
TransAntivirus/download_pubchem/opsin-master/opsin-core/pom.xml ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
2
+ <modelVersion>4.0.0</modelVersion>
3
+ <parent>
4
+ <artifactId>opsin</artifactId>
5
+ <groupId>uk.ac.cam.ch.opsin</groupId>
6
+ <version>3.0-SNAPSHOT</version>
7
+ </parent>
8
+ <artifactId>opsin-core</artifactId>
9
+ <name>OPSIN Core</name>
10
+ <description>Core files of OPSIN. Allows conversion of chemical names to CML (Chemical Markup Language)</description>
11
+ <build>
12
+ <resources>
13
+ <resource>
14
+ <directory>src/main/resources</directory>
15
+ <filtering>true</filtering>
16
+ <includes>
17
+ <include>**/*.props</include>
18
+ </includes>
19
+ </resource>
20
+ <resource>
21
+ <directory>src/main/resources</directory>
22
+ <filtering>false</filtering>
23
+ <excludes>
24
+ <exclude>**/*.props</exclude>
25
+ </excludes>
26
+ </resource>
27
+ </resources>
28
+ </build>
29
+ <dependencies>
30
+ <dependency>
31
+ <groupId>dk.brics</groupId>
32
+ <artifactId>automaton</artifactId>
33
+ </dependency>
34
+ <dependency>
35
+ <groupId>org.codehaus.woodstox</groupId>
36
+ <artifactId>woodstox-core-asl</artifactId>
37
+ </dependency>
38
+ <dependency>
39
+ <groupId>commons-io</groupId>
40
+ <artifactId>commons-io</artifactId>
41
+ </dependency>
42
+ <dependency>
43
+ <groupId>org.apache.logging.log4j</groupId>
44
+ <artifactId>log4j-api</artifactId>
45
+ </dependency>
46
+ <dependency>
47
+ <groupId>org.junit.jupiter</groupId>
48
+ <artifactId>junit-jupiter</artifactId>
49
+ <scope>test</scope>
50
+ </dependency>
51
+ <dependency>
52
+ <groupId>org.hamcrest</groupId>
53
+ <artifactId>hamcrest-library</artifactId>
54
+ <scope>test</scope>
55
+ </dependency>
56
+ <dependency>
57
+ <groupId>org.mockito</groupId>
58
+ <artifactId>mockito-core</artifactId>
59
+ <scope>test</scope>
60
+ </dependency>
61
+ <dependency>
62
+ <groupId>org.apache.logging.log4j</groupId>
63
+ <artifactId>log4j-core</artifactId>
64
+ <scope>test</scope>
65
+ </dependency>
66
+ </dependencies>
67
+ </project>
TransAntivirus/download_pubchem/opsin-master/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/AmbiguityChecker.java ADDED
@@ -0,0 +1,214 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ package uk.ac.cam.ch.wwmm.opsin;
2
+
3
+ import java.util.ArrayDeque;
4
+ import java.util.ArrayList;
5
+ import java.util.Collection;
6
+ import java.util.Deque;
7
+ import java.util.HashMap;
8
+ import java.util.HashSet;
9
+ import java.util.LinkedHashSet;
10
+ import java.util.List;
11
+ import java.util.Map;
12
+ import java.util.Set;
13
+
14
+ class AmbiguityChecker {
15
+
16
+ static boolean isSubstitutionAmbiguous(List<Atom> substitutableAtoms, int numberToBeSubstituted) {
17
+ if (substitutableAtoms.size() == 0) {
18
+ throw new IllegalArgumentException("OPSIN Bug: Must provide at least one substituable atom");
19
+ }
20
+ if (substitutableAtoms.size() < numberToBeSubstituted) {
21
+ throw new IllegalArgumentException("OPSIN Bug: substitutableAtoms must be >= numberToBeSubstituted");
22
+ }
23
+ if (substitutableAtoms.size() == numberToBeSubstituted){
24
+ return false;
25
+ }
26
+ if (allAtomsConnectToDefaultInAtom(substitutableAtoms, numberToBeSubstituted)) {
27
+ return false;
28
+ }
29
+ Set<Atom> uniqueAtoms = new HashSet<>(substitutableAtoms);
30
+ if (uniqueAtoms.size() == 1) {
31
+ return false;
32
+ }
33
+ if (allAtomsEquivalent(uniqueAtoms) && (numberToBeSubstituted == 1 || numberToBeSubstituted == substitutableAtoms.size() - 1)){
34
+ return false;
35
+ }
36
+ return true;
37
+ }
38
+
39
+ static boolean allAtomsEquivalent(Collection<Atom> atoms) {
40
+ StereoAnalyser analyser = analyseRelevantAtomsAndBonds(atoms);
41
+ Set<String> uniqueEnvironments = new HashSet<>();
42
+ for (Atom a : atoms) {
43
+ uniqueEnvironments.add(getAtomEnviron(analyser, a));
44
+ }
45
+ return uniqueEnvironments.size() == 1;
46
+ }
47
+
48
+ static boolean allBondsEquivalent(Collection<Bond> bonds) {
49
+ Set<Atom> relevantAtoms = new HashSet<>();
50
+ for (Bond b : bonds) {
51
+ relevantAtoms.add(b.getFromAtom());
52
+ relevantAtoms.add(b.getToAtom());
53
+ }
54
+ StereoAnalyser analyser = analyseRelevantAtomsAndBonds(relevantAtoms);
55
+ Set<String> uniqueBonds = new HashSet<>();
56
+ for (Bond b : bonds) {
57
+ uniqueBonds.add(bondToCanonicalEnvironString(analyser, b));
58
+ }
59
+ return uniqueBonds.size() == 1;
60
+ }
61
+
62
+ private static String bondToCanonicalEnvironString(StereoAnalyser analyser, Bond b) {
63
+ String s1 = getAtomEnviron(analyser, b.getFromAtom());
64
+ String s2 = getAtomEnviron(analyser, b.getToAtom());
65
+ if (s1.compareTo(s2) > 0){
66
+ return s1 + s2;
67
+ }
68
+ else {
69
+ return s2 + s1;
70
+ }
71
+ }
72
+
73
+ static String getAtomEnviron(StereoAnalyser analyser, Atom a) {
74
+ Integer env = analyser.getAtomEnvironmentNumber(a);
75
+ if (env == null) {
76
+ throw new RuntimeException("OPSIN Bug: Atom was not part of ambiguity analysis");
77
+ }
78
+ //"identical" atoms may be distinguished by bonds yet to be formed, hence split by outvalency
79
+ // e.g. [PH3] vs [PH3]=
80
+ return env + "\t" + a.getOutValency();
81
+ }
82
+
83
+ private static boolean allAtomsConnectToDefaultInAtom(List<Atom> substitutableAtoms, int numberToBeSubstituted) {
84
+ Atom defaultInAtom = substitutableAtoms.get(0).getFrag().getDefaultInAtom();
85
+ if (defaultInAtom != null) {
86
+ for (int i = 0; i < numberToBeSubstituted; i++) {
87
+ if (!substitutableAtoms.get(i).equals(defaultInAtom)) {
88
+ return false;
89
+ }
90
+ }
91
+ return true;
92
+ }
93
+ return false;
94
+ }
95
+
96
+ static StereoAnalyser analyseRelevantAtomsAndBonds(Collection<Atom> startingAtoms) {
97
+ Set<Atom> atoms = new HashSet<>();
98
+ Set<Bond> bonds = new HashSet<>();
99
+ Deque<Atom> stack = new ArrayDeque<>(startingAtoms);
100
+ while (!stack.isEmpty()) {
101
+ Atom a = stack.removeLast();
102
+ if (!atoms.contains(a)) {
103
+ atoms.add(a);
104
+ for (Bond b : a.getBonds()) {
105
+ bonds.add(b);
106
+ stack.add(b.getOtherAtom(a));
107
+ }
108
+ }
109
+ }
110
+
111
+ List<Atom> ghostHydrogens = new ArrayList<>();
112
+ for (Atom atom : atoms) {
113
+ int explicitHydrogensToAdd = StructureBuildingMethods.calculateSubstitutableHydrogenAtoms(atom);
114
+ for (int i = 0; i < explicitHydrogensToAdd; i++) {
115
+ Atom ghostHydrogen = new Atom(ChemEl.H);
116
+ Bond b = new Bond(ghostHydrogen, atom, 1);
117
+ atom.addBond(b);
118
+ ghostHydrogen.addBond(b);
119
+ ghostHydrogens.add(ghostHydrogen);
120
+ }
121
+ }
122
+ atoms.addAll(ghostHydrogens);
123
+ StereoAnalyser analyzer = new StereoAnalyser(atoms, bonds);
124
+ for (Atom ghostHydrogen : ghostHydrogens) {
125
+ Bond b = ghostHydrogen.getFirstBond();
126
+ b.getOtherAtom(ghostHydrogen).removeBond(b);
127
+ }
128
+ return analyzer;
129
+ }
130
+
131
+ static List<Atom> useAtomEnvironmentsToGivePlausibleSubstitution(List<Atom> substitutableAtoms, int numberToBeSubstituted) {
132
+ if (substitutableAtoms.size() == 0) {
133
+ throw new IllegalArgumentException("OPSIN Bug: Must provide at least one substituable atom");
134
+ }
135
+ if (substitutableAtoms.size() < numberToBeSubstituted) {
136
+ throw new IllegalArgumentException("OPSIN Bug: substitutableAtoms must be >= numberToBeSubstituted");
137
+ }
138
+ if (substitutableAtoms.size() == numberToBeSubstituted){
139
+ return substitutableAtoms;
140
+ }
141
+
142
+ List<Atom> preferredAtoms = findPlausibleSubstitutionPatternUsingSymmmetry(substitutableAtoms, numberToBeSubstituted);
143
+ if (preferredAtoms != null){
144
+ return preferredAtoms;
145
+ }
146
+ return findPlausibleSubstitutionPatternUsingLocalEnvironment(substitutableAtoms, numberToBeSubstituted);
147
+ }
148
+
149
+ private static List<Atom> findPlausibleSubstitutionPatternUsingSymmmetry(List<Atom> substitutableAtoms, int numberToBeSubstituted) {
150
+ //cf. octaethylporphyrin (8 identical atoms capable of substitution)
151
+ StereoAnalyser analyser = analyseRelevantAtomsAndBonds(new HashSet<>(substitutableAtoms));
152
+ Map<String, List<Atom>> atomsInEachEnvironment = new HashMap<>();
153
+ for (Atom a : substitutableAtoms) {
154
+ String env = getAtomEnviron(analyser, a);
155
+ List<Atom> atomsInEnvironment = atomsInEachEnvironment.get(env);
156
+ if (atomsInEnvironment == null) {
157
+ atomsInEnvironment = new ArrayList<>();
158
+ atomsInEachEnvironment.put(env, atomsInEnvironment);
159
+ }
160
+ atomsInEnvironment.add(a);
161
+ }
162
+ List<Atom> preferredAtoms = null;
163
+ for (List<Atom> atoms : atomsInEachEnvironment.values()) {
164
+ if (atoms.size() == numberToBeSubstituted){
165
+ if (preferredAtoms != null){
166
+ return null;
167
+ }
168
+ preferredAtoms = atoms;
169
+ }
170
+ }
171
+ if (preferredAtoms == null) {
172
+ //check for environments with double the required atoms where this means each atom can support two substitutions c.f. cyclohexane
173
+ for (List<Atom> atoms : atomsInEachEnvironment.values()) {
174
+ if (atoms.size() == (numberToBeSubstituted * 2)){
175
+ Set<Atom> uniquified = new LinkedHashSet<>(atoms);//retain deterministic atom ordering
176
+ if (uniquified.size() == numberToBeSubstituted) {
177
+ if (preferredAtoms != null){
178
+ return null;
179
+ }
180
+ preferredAtoms = new ArrayList<>(uniquified);
181
+ }
182
+ }
183
+ }
184
+ }
185
+ return preferredAtoms;
186
+ }
187
+
188
+ private static List<Atom> findPlausibleSubstitutionPatternUsingLocalEnvironment(List<Atom> substitutableAtoms, int numberToBeSubstituted) {
189
+ //cf. pentachlorotoluene (5 sp2 carbons vs sp3 methyl)
190
+ Map<String, List<Atom>> atomsInEachLocalEnvironment = new HashMap<>();
191
+ for (Atom a : substitutableAtoms) {
192
+ int valency = a.determineValency(true);
193
+ int currentValency = a.getIncomingValency() + a.getOutValency();
194
+ int numOfBonds = (valency - currentValency) + a.getBondCount();//distinguish sp2 and sp3 atoms
195
+ String s = a.getElement().toString() +"\t" + valency + "\t" + numOfBonds + "\t" + a.hasSpareValency();
196
+ List<Atom> atomsInEnvironment = atomsInEachLocalEnvironment.get(s);
197
+ if (atomsInEnvironment == null) {
198
+ atomsInEnvironment = new ArrayList<>();
199
+ atomsInEachLocalEnvironment.put(s, atomsInEnvironment);
200
+ }
201
+ atomsInEnvironment.add(a);
202
+ }
203
+ List<Atom> preferredAtoms = null;
204
+ for (List<Atom> atoms : atomsInEachLocalEnvironment.values()) {
205
+ if (atoms.size() == numberToBeSubstituted){
206
+ if (preferredAtoms != null){
207
+ return null;
208
+ }
209
+ preferredAtoms = atoms;
210
+ }
211
+ }
212
+ return preferredAtoms;
213
+ }
214
+ }
TransAntivirus/download_pubchem/opsin-master/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/AnnotatorState.java ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ package uk.ac.cam.ch.wwmm.opsin;
2
+
3
+
4
+ /**
5
+ * Contains the state needed during finite-state parsing
6
+ * From this the tokens string and their semantics can be generated
7
+ * @author Daniel
8
+ *
9
+ */
10
+ class AnnotatorState {
11
+
12
+ /** The current state of the DFA. */
13
+ private final int state;
14
+ /** The annotation so far. */
15
+ private final char annot;
16
+
17
+ /** The index of the first char in the chemical name that has yet to be tokenised */
18
+ private final int posInName;
19
+
20
+ private final boolean isCaseSensitive;
21
+
22
+ private final AnnotatorState previousAs;
23
+
24
+
25
+ AnnotatorState(int state, char annot, int posInName, boolean isCaseSensitive, AnnotatorState previousAs) {
26
+ this.state = state;
27
+ this.annot = annot;
28
+ this.posInName = posInName;
29
+ this.isCaseSensitive = isCaseSensitive;
30
+ this.previousAs = previousAs;
31
+ }
32
+
33
+ /**
34
+ * The current state in the DFA
35
+ * @return
36
+ */
37
+ int getState() {
38
+ return state;
39
+ }
40
+
41
+ /**
42
+ * The annotation that was consumed to transition to this state
43
+ * @return
44
+ */
45
+ char getAnnot() {
46
+ return annot;
47
+ }
48
+
49
+ /**
50
+ * The index of the first char in the chemical name that has yet to be tokenised (at the point of creating this AnnotatorState)
51
+ * @return
52
+ */
53
+ int getPosInName() {
54
+ return posInName;
55
+ }
56
+
57
+ /**
58
+ * Where the corresponding token is case sensitive
59
+ * @return
60
+ */
61
+ boolean isCaseSensitive() {
62
+ return isCaseSensitive;
63
+ }
64
+
65
+ /**
66
+ * The last annotator state for the previous token (or null if this is the first)
67
+ * @return
68
+ */
69
+ AnnotatorState getPreviousAs() {
70
+ return previousAs;
71
+ }
72
+ }
TransAntivirus/download_pubchem/opsin-master/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/Atom.java ADDED
@@ -0,0 +1,647 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ package uk.ac.cam.ch.wwmm.opsin;
2
+
3
+ import java.util.ArrayList;
4
+ import java.util.Collections;
5
+ import java.util.HashMap;
6
+ import java.util.List;
7
+ import java.util.Map;
8
+ import java.util.Set;
9
+ import java.util.regex.Matcher;
10
+
11
+ import static uk.ac.cam.ch.wwmm.opsin.OpsinTools.*;
12
+
13
+ /**
14
+ * An atom. Carries information about which fragment it is in, and an ID
15
+ * number and a list of bonds that it is involved. It may also have other information such as
16
+ * whether it has "spare valencies" due to unsaturation, its charge, locant labels, stereochemistry and notes
17
+ *
18
+ * @author ptc24
19
+ * @author dl387
20
+ *
21
+ */
22
+ class Atom {
23
+
24
+ /**The (unique over the molecule) ID of the atom.*/
25
+ private final int id;
26
+
27
+ /**The chemical element of the atom. */
28
+ private ChemEl chemEl;
29
+
30
+ /**The locants that pertain to the atom.*/
31
+ private final List<String> locants = new ArrayList<>(2);
32
+
33
+ /**The formal charge on the atom.*/
34
+ private int charge = 0;
35
+
36
+ /**The isotope of the atom. Null if not defined explicitly.*/
37
+ private Integer isotope = null;
38
+
39
+ /**
40
+ * Holds the atomParity object associated with this object
41
+ * null by default
42
+ */
43
+ private AtomParity atomParity = null;
44
+
45
+ /**The bonds that involve the atom*/
46
+ private final List<Bond> bonds = new ArrayList<>(4);
47
+
48
+ /**A map between PropertyKey s as declared here and useful atom properties, usually relating to some kind of special case. */
49
+ @SuppressWarnings("rawtypes")
50
+ private final Map<PropertyKey, Object> properties = new HashMap<>();
51
+ /** A set of atoms that were equally plausible to perform functional replacement on */
52
+ static final PropertyKey<Set<Atom>> AMBIGUOUS_ELEMENT_ASSIGNMENT = new PropertyKey<>("ambiguousElementAssignment");
53
+ /** The atom class which will be output when serialised to SMILES. Useful for distinguishing attachment points */
54
+ static final PropertyKey<Integer> ATOM_CLASS = new PropertyKey<>("atomClass");
55
+ /** Used on wildcard atoms to indicate their meaning */
56
+ static final PropertyKey<String> HOMOLOGY_GROUP = new PropertyKey<>("homologyGroup");
57
+ /** Used on wildcard atoms to indicate that they are a position variation bond */
58
+ static final PropertyKey<List<Atom>> POSITION_VARIATION_BOND = new PropertyKey<>("positionVariationBond");
59
+ /** The hydrogen count as set in the SMILES*/
60
+ static final PropertyKey<Integer> SMILES_HYDROGEN_COUNT = new PropertyKey<>("smilesHydrogenCount");
61
+ /** The oxidation number as specified by Roman numerals in the name*/
62
+ static final PropertyKey<Integer> OXIDATION_NUMBER = new PropertyKey<>("oxidationNumber");
63
+ /** Is this atom the carbon of an aldehyde? (however NOT formaldehyde)*/
64
+ static final PropertyKey<Boolean> ISALDEHYDE = new PropertyKey<>("isAldehyde");
65
+ /** Indicates that this atom is an anomeric atom in a cyclised carbohydrate*/
66
+ static final PropertyKey<Boolean> ISANOMERIC = new PropertyKey<>("isAnomeric");
67
+ /** Transient integer used to indicate traversal of fragments*/
68
+ static final PropertyKey<Integer> VISITED = new PropertyKey<>("visited");
69
+
70
+ /**The fragment to which the atom belongs.*/
71
+ private Fragment frag;
72
+
73
+ /** Whether an atom is part of a delocalised set of double bonds. A double bond in a kekule structure
74
+ * can be mapped to a single bond with this attribute set to true on both atoms that were in the double bond
75
+ * For example, benzene could be temporarily represented by six singly-bonded atoms, each with a set
76
+ * spare valency attribute , and later converted into a fully-specified valence structure.*/
77
+ private boolean spareValency = false;
78
+
79
+ /**The total bond order of all bonds that are expected to be used for inter fragment bonding
80
+ * e.g. in butan-2-ylidene this would be 2 for the atom at position 2 and 0 for the other 3 */
81
+ private int outValency = 0;
82
+
83
+ /** Null by default or set by the lambda convention.*/
84
+ private Integer lambdaConventionValency;
85
+
86
+ /** Null by default or set by the SMILES builder*/
87
+ private Integer minimumValency;
88
+
89
+ /** Can this atom have implicit hydrogen? True unless explicitly set otherwise otherwise*/
90
+ private boolean implicitHydrogenAllowed = true;
91
+
92
+ /** This is modified by ium/ide/ylium/uide and is used to choose the appropriate valency for the atom*/
93
+ private int protonsExplicitlyAddedOrRemoved = 0;
94
+
95
+ /**
96
+ * Takes same values as type in Fragment. Useful for discriminating suffix atoms from other atoms when a suffix is incorporated into another fragments
97
+ */
98
+ private String type;
99
+
100
+ /**
101
+ * Is this atom in a ring. Default false. Set by the CycleDetector.
102
+ * Double bonds are only converted to spareValency if atom is in a ring
103
+ * Some suffixes have different meanings if an atom is part of a ring or not c.g. cyclohexanal vs ethanal
104
+ */
105
+ private boolean atomIsInACycle = false;
106
+
107
+ /**
108
+ * Builds an Atom from scratch.
109
+ * GENERALLY EXCEPT FOR TESTING SHOULD NOT BE CALLED EXCEPT FROM THE FRAGMANAGER
110
+ * @param id The ID number, unique to the atom in the molecule being built
111
+ * @param chemlEl The chemical element
112
+ * @param frag the Fragment to contain the Atom
113
+ */
114
+ Atom(int id, ChemEl chemlEl, Fragment frag) {
115
+ if (frag == null){
116
+ throw new IllegalArgumentException("Atom is not in a fragment!");
117
+ }
118
+ if (chemlEl == null){
119
+ throw new IllegalArgumentException("Atom does not have an element!");
120
+ }
121
+ this.frag = frag;
122
+ this.id = id;
123
+ this.chemEl = chemlEl;
124
+ this.type =frag.getType();
125
+ }
126
+
127
+ /** Used to build a DUMMY atom.
128
+ * Does not have an id/frag/type as would be expected for a proper atom
129
+ * @param chemlEl The chemical element
130
+ */
131
+ Atom(ChemEl chemlEl){
132
+ this.chemEl = chemlEl;
133
+ this.id = 0;
134
+ }
135
+
136
+ /**
137
+ * Uses the lambdaConventionValency or if that is not available
138
+ * the default valency assuming this is >= the current valency
139
+ * If not then allowed the chemically sensible valencies of the atom are checked with the one that is closest and >= to the current valency
140
+ * being returned. If the valency has still not been determined the current valency i.e. assuming the atom to have 0 implicit hydrogen is returned.
141
+ * This is the correct behaviour for inorganics. For p block elements it means that OPSIN does not believe the atom to be in a valid valency (too high)
142
+ *
143
+ * if considerOutValency is true, the valency that will be used to form bonds using the outAtoms is
144
+ * taken into account i.e. if any radicals were used to form bonds
145
+ * @param considerOutValency
146
+ * @return
147
+ */
148
+ int determineValency(boolean considerOutValency) {
149
+ if (lambdaConventionValency != null){
150
+ return lambdaConventionValency + protonsExplicitlyAddedOrRemoved;
151
+ }
152
+ int currentValency = getIncomingValency();
153
+ if (considerOutValency){
154
+ currentValency += outValency;
155
+ }
156
+ Integer calculatedMinValency = minimumValency == null ? null : minimumValency + protonsExplicitlyAddedOrRemoved;
157
+ if (charge ==0 || protonsExplicitlyAddedOrRemoved != 0){
158
+ Integer defaultValency = ValencyChecker.getDefaultValency(chemEl);
159
+ if (defaultValency != null){
160
+ defaultValency += protonsExplicitlyAddedOrRemoved;
161
+ if (currentValency <= defaultValency && (calculatedMinValency == null || defaultValency >= calculatedMinValency)){
162
+ return defaultValency;
163
+ }
164
+ }
165
+ }
166
+ Integer[] possibleValencies = ValencyChecker.getPossibleValencies(chemEl, charge);
167
+ if (possibleValencies != null) {
168
+ if (calculatedMinValency != null && calculatedMinValency >= currentValency){
169
+ return calculatedMinValency;
170
+ }
171
+ for (Integer possibleValency : possibleValencies) {
172
+ if (calculatedMinValency != null && possibleValency < calculatedMinValency){
173
+ continue;
174
+ }
175
+ if (currentValency <= possibleValency){
176
+ return possibleValency;
177
+ }
178
+ }
179
+ }
180
+ if (calculatedMinValency != null && calculatedMinValency >= currentValency){
181
+ return calculatedMinValency;
182
+ }
183
+ else{
184
+ return currentValency;
185
+ }
186
+ }
187
+
188
+ /**Adds a locant to the Atom. Other locants are preserved.
189
+ * Also associates the locant with the atom in the parent fragments hash
190
+ *
191
+ * @param locant The new locant
192
+ */
193
+ void addLocant(String locant) {
194
+ locants.add(locant);
195
+ frag.addMappingToAtomLocantMap(locant, this);
196
+ }
197
+
198
+ /**Replaces all existing locants with a new one.
199
+ *
200
+ * @param locant The new locant
201
+ */
202
+ void replaceLocants(String locant) {
203
+ clearLocants();
204
+ addLocant(locant);
205
+ }
206
+
207
+ void removeLocant(String locantToRemove) {
208
+ int locantArraySize = locants.size();
209
+ for (int i = locantArraySize -1; i >=0 ; i--) {
210
+ if (locants.get(i).equals(locantToRemove)){
211
+ locants.remove(i);
212
+ frag.removeMappingFromAtomLocantMap(locantToRemove);
213
+ }
214
+ }
215
+ }
216
+
217
+ /**Removes all locants from the Atom.
218
+ *
219
+ */
220
+ void clearLocants() {
221
+ for (int i = 0, l = locants.size(); i < l; i++) {
222
+ frag.removeMappingFromAtomLocantMap(locants.get(i));
223
+ }
224
+ locants.clear();
225
+ }
226
+
227
+ /**
228
+ * Removes only elementSymbolLocants: e.g. N, S', Se
229
+ */
230
+ void removeElementSymbolLocants() {
231
+ for (int i = locants.size() - 1; i >= 0; i--) {
232
+ String locant = locants.get(i);
233
+ if (MATCH_ELEMENT_SYMBOL_LOCANT.matcher(locant).matches()){
234
+ frag.removeMappingFromAtomLocantMap(locant);
235
+ locants.remove(i);
236
+ }
237
+ }
238
+ }
239
+
240
+ /**
241
+ * Removes all locants other than elementSymbolLocants (e.g. N, S', Se)
242
+ * Hence removes numeric locants and greek locants
243
+ */
244
+ void removeLocantsOtherThanElementSymbolLocants() {
245
+ for (int i = locants.size() - 1; i >= 0; i--) {
246
+ String locant = locants.get(i);
247
+ if (!MATCH_ELEMENT_SYMBOL_LOCANT.matcher(locant).matches()){
248
+ frag.removeMappingFromAtomLocantMap(locant);
249
+ locants.remove(i);
250
+ }
251
+ }
252
+ }
253
+
254
+ /**Checks if the Atom has a given locant.
255
+ *
256
+ * @param locant The locant to test for
257
+ * @return true if it has, false if not
258
+ */
259
+ boolean hasLocant(String locant) {
260
+ if (locants.contains(locant)) {
261
+ return true;
262
+ }
263
+ Matcher m = MATCH_AMINOACID_STYLE_LOCANT.matcher(locant);
264
+ if (m.matches()){//e.g. N'5
265
+ if (chemEl.toString().equals(m.group(1))){//element symbol
266
+ if (!m.group(2).equals("") && (!hasLocant(m.group(1) +m.group(2)))){//has primes
267
+ return false;//must have exact locant e.g. N'
268
+ }
269
+ if (OpsinTools.depthFirstSearchForNonSuffixAtomWithLocant(this, m.group(3)) != null){
270
+ return true;
271
+ }
272
+ }
273
+ }
274
+ return false;
275
+ }
276
+
277
+ /**Gets the first locant for the Atom. This may be the locant that was initially
278
+ * specified, or the most recent locant specified using replaceLocant, or first
279
+ * locant to be added since the last invocation of clearLocants.
280
+ *
281
+ * @return The locant, or null if there is no locant
282
+ */
283
+ String getFirstLocant() {
284
+ return locants.size() > 0 ? locants.get(0) : null;
285
+ }
286
+
287
+ /**Returns the array of locants containing all locants associated with the atom
288
+ *
289
+ * @return The list of locants (may be empty)
290
+ */
291
+ List<String> getLocants() {
292
+ return Collections.unmodifiableList(locants);
293
+ }
294
+
295
+ /**Returns the subset of the locants which are element symbol locants e.g. N, S', Se
296
+ *
297
+ * @return The list of locants (may be empty)
298
+ */
299
+ List<String> getElementSymbolLocants() {
300
+ List<String> elementSymbolLocants = new ArrayList<>(1);
301
+ for (int i = 0, l = locants.size(); i < l; i++) {
302
+ String locant = locants.get(i);
303
+ if (MATCH_ELEMENT_SYMBOL_LOCANT.matcher(locant).matches()) {
304
+ elementSymbolLocants.add(locant);
305
+ }
306
+ }
307
+ return elementSymbolLocants;
308
+ }
309
+
310
+ void setFrag(Fragment f) {
311
+ frag = f;
312
+ }
313
+
314
+ Fragment getFrag() {
315
+ return frag;
316
+ }
317
+
318
+ /**Gets the ID of the atom.
319
+ *
320
+ * @return The ID of the atom
321
+ */
322
+ int getID() {
323
+ return id;
324
+ }
325
+
326
+ /**Gets the chemical element corresponding to the element of the atom.
327
+ *
328
+ * @return The chemical element corresponding to the element of the atom
329
+ */
330
+ ChemEl getElement() {
331
+ return chemEl;
332
+ }
333
+
334
+ /**Sets the chemical element corresponding to the element of the atom.
335
+ *
336
+ * @param chemEl The chemical element corresponding to the element of the atom
337
+ */
338
+ void setElement(ChemEl chemEl) {
339
+ this.chemEl = chemEl;
340
+ }
341
+
342
+ /**Gets the formal charge on the atom.
343
+ *
344
+ * @return The formal charge on the atom
345
+ */
346
+ int getCharge() {
347
+ return charge;
348
+ }
349
+
350
+ /**Modifies the charge of this atom by the amount given. This can be any integer
351
+ * The number of protons changed is noted so as to calculate the correct valency for the atom. This can be any integer.
352
+ * For example ide is the loss of a proton so is charge=-1, protons =-1
353
+ * @param charge
354
+ * @param protons
355
+ */
356
+ void addChargeAndProtons(int charge, int protons){
357
+ this.charge += charge;
358
+ protonsExplicitlyAddedOrRemoved+=protons;
359
+ }
360
+
361
+ /**Sets the formal charge on the atom.
362
+ * NOTE: make sure to update protonsExplicitlyAddedOrRemoved if necessary
363
+ *
364
+ * @param c The formal charge on the atom
365
+ */
366
+ void setCharge(int c) {
367
+ charge = c;
368
+ }
369
+
370
+ /**
371
+ * Sets the formal charge and number of protonsExplicitlyAddedOrRemoved to 0
372
+ */
373
+ void neutraliseCharge() {
374
+ charge = 0;
375
+ protonsExplicitlyAddedOrRemoved = 0;
376
+ }
377
+
378
+ /**
379
+ * Gets the mass number of the atom or null if not explicitly defined
380
+ * e.g. 3 for tritium
381
+ * @return
382
+ */
383
+ Integer getIsotope() {
384
+ return isotope;
385
+ }
386
+
387
+ /**
388
+ * Sets the mass number of the atom explicitly
389
+ * @param isotope
390
+ */
391
+ void setIsotope(Integer isotope) {
392
+ if (isotope != null && isotope < chemEl.ATOMIC_NUM) {
393
+ throw new RuntimeException("Isotopic mass cannot be less than the element's number of protons: " + chemEl.toString() + " " + isotope + " < " + chemEl.ATOMIC_NUM );
394
+ }
395
+ this.isotope = isotope;
396
+ }
397
+
398
+ /**Adds a bond to the atom
399
+ *
400
+ * @param b The bond to be added
401
+ */
402
+ void addBond(Bond b) {
403
+ if (bonds.contains(b)){
404
+ throw new IllegalArgumentException("Atom already has given bond (This is not allowed as this would give two bonds between the same atoms!)");
405
+ }
406
+ bonds.add(b);
407
+ }
408
+
409
+ /**Removes a bond to the atom
410
+ *
411
+ * @param b The bond to be removed
412
+ * @return whether bond was present
413
+ */
414
+ boolean removeBond(Bond b) {
415
+ return bonds.remove(b);
416
+ }
417
+
418
+ /**Calculates the number of bonds connecting to the atom, excluding bonds to implicit
419
+ * hydrogens. Double bonds count as
420
+ * two bonds, etc. Eg ethene - both C's have an incoming valency of 2.
421
+ *
422
+ * @return Incoming Valency
423
+ */
424
+ int getIncomingValency() {
425
+ int v = 0;
426
+ for (int i = 0, len = bonds.size(); i < len; i++) {
427
+ v += bonds.get(i).getOrder();
428
+ }
429
+ return v;
430
+ }
431
+
432
+ int getProtonsExplicitlyAddedOrRemoved() {
433
+ return protonsExplicitlyAddedOrRemoved;
434
+ }
435
+
436
+ void setProtonsExplicitlyAddedOrRemoved(int protonsExplicitlyAddedOrRemoved) {
437
+ this.protonsExplicitlyAddedOrRemoved = protonsExplicitlyAddedOrRemoved;
438
+ }
439
+
440
+ /**Does the atom have spare valency to form double bonds?
441
+ *
442
+ * @return true if atom has spare valency
443
+ */
444
+ boolean hasSpareValency() {
445
+ return spareValency;
446
+ }
447
+
448
+ /**Set whether an atom has spare valency
449
+ *
450
+ * @param sv The spare valency
451
+ */
452
+ void setSpareValency(boolean sv) {
453
+ spareValency = sv;
454
+ }
455
+
456
+ /**Gets the total bond order of the bonds expected to be created from this atom for inter fragment bonding
457
+ *
458
+ * @return The outValency
459
+ */
460
+ int getOutValency() {
461
+ return outValency;
462
+ }
463
+
464
+ /**Adds to the total bond order of the bonds expected to be created from this atom for inter fragment bonding
465
+ *
466
+ * @param outV The outValency to be added
467
+ */
468
+ void addOutValency(int outV) {
469
+ outValency += outV;
470
+ }
471
+
472
+ List<Bond> getBonds() {
473
+ return Collections.unmodifiableList(bonds);
474
+ }
475
+
476
+ int getBondCount() {
477
+ return bonds.size();
478
+ }
479
+
480
+ /**Gets a list of atoms that connect to the atom
481
+ *
482
+ * @return The list of atoms connected to the atom
483
+ */
484
+ List<Atom> getAtomNeighbours(){
485
+ int bondCount = bonds.size();
486
+ List<Atom> results = new ArrayList<>(bondCount);
487
+ for (int i = 0; i < bondCount; i++) {
488
+ results.add(bonds.get(i).getOtherAtom(this));
489
+ }
490
+ return results;
491
+ }
492
+
493
+ Integer getLambdaConventionValency() {
494
+ return lambdaConventionValency;
495
+ }
496
+
497
+ void setLambdaConventionValency(Integer valency) {
498
+ this.lambdaConventionValency = valency;
499
+ }
500
+
501
+ String getType() {
502
+ return type;
503
+ }
504
+
505
+ void setType(String type) {
506
+ this.type = type;
507
+ }
508
+
509
+ boolean getAtomIsInACycle() {
510
+ return atomIsInACycle;
511
+ }
512
+
513
+ /**
514
+ * Sets whether atom is in a cycle, true if it is
515
+ * @param atomIsInACycle
516
+ */
517
+ void setAtomIsInACycle(boolean atomIsInACycle) {
518
+ this.atomIsInACycle = atomIsInACycle;
519
+ }
520
+
521
+ AtomParity getAtomParity() {
522
+ return atomParity;
523
+ }
524
+
525
+ void setAtomParity(AtomParity atomParity) {
526
+ this.atomParity = atomParity;
527
+ }
528
+
529
+ void setAtomParity(Atom[] atomRefs4, int parity) {
530
+ atomParity = new AtomParity(atomRefs4, parity);
531
+ }
532
+
533
+ Integer getMinimumValency() {
534
+ return minimumValency;
535
+ }
536
+
537
+ void setMinimumValency(Integer minimumValency) {
538
+ this.minimumValency = minimumValency;
539
+ }
540
+
541
+ boolean getImplicitHydrogenAllowed() {
542
+ return implicitHydrogenAllowed;
543
+ }
544
+
545
+ void setImplicitHydrogenAllowed(boolean implicitHydrogenAllowed) {
546
+ this.implicitHydrogenAllowed = implicitHydrogenAllowed;
547
+ }
548
+
549
+ @SuppressWarnings("unchecked")
550
+ <T> T getProperty(PropertyKey<T> propertyKey) {
551
+ return (T) properties.get(propertyKey);
552
+ }
553
+
554
+ <T> void setProperty(PropertyKey<T> propertyKey, T value) {
555
+ properties.put(propertyKey, value);
556
+ }
557
+
558
+ /**
559
+ * Checks if the valency of this atom allows it to have the amount of spare valency that the atom currently has
560
+ * May reduce the spare valency on the atom to be consistent with the valency of the atom
561
+ * Does nothing if the atom has no spare valency
562
+ * @param takeIntoAccountExternalBonds
563
+ * @throws StructureBuildingException
564
+ */
565
+ void ensureSVIsConsistantWithValency(boolean takeIntoAccountExternalBonds) throws StructureBuildingException {
566
+ if (spareValency) {
567
+ Integer maxValency;
568
+ if (lambdaConventionValency != null) {
569
+ maxValency = lambdaConventionValency + protonsExplicitlyAddedOrRemoved;
570
+ }
571
+ else{
572
+ Integer hwValency = ValencyChecker.getHWValency(chemEl);
573
+ if (hwValency == null) {
574
+ throw new StructureBuildingException(chemEl + " is not expected to be aromatic!");
575
+ }
576
+ maxValency = hwValency + protonsExplicitlyAddedOrRemoved;
577
+ }
578
+ int maxSpareValency;
579
+ if (takeIntoAccountExternalBonds) {
580
+ maxSpareValency = maxValency - getIncomingValency() - outValency;
581
+ }
582
+ else{
583
+ maxSpareValency = maxValency - frag.getIntraFragmentIncomingValency(this);
584
+ }
585
+ if (maxSpareValency < 1) {
586
+ setSpareValency(false);
587
+ }
588
+ }
589
+ }
590
+
591
+ /**
592
+ * Returns the the first bond in the atom's bond list or null if it has no bonds
593
+ * @return
594
+ */
595
+ Bond getFirstBond() {
596
+ if (bonds.size() > 0){
597
+ return bonds.get(0);
598
+ }
599
+ return null;
600
+ }
601
+
602
+ /**Gets the bond between this atom and a given atom
603
+ *
604
+ * @param a The atom to find a bond to
605
+ * @return The bond, or null if there is no bond
606
+ */
607
+ Bond getBondToAtom(Atom a) {
608
+ for (int i = 0, l = bonds.size(); i < l; i++) {
609
+ Bond b = bonds.get(i);
610
+ if(b.getOtherAtom(this) == a){
611
+ return b;
612
+ }
613
+ }
614
+ return null;
615
+ }
616
+
617
+ /**Gets the bond between this atom and a given atom, throwing if fails.
618
+ *
619
+ * @param a The atom to find a bond to
620
+ * @return The bond found
621
+ * @throws StructureBuildingException
622
+ */
623
+ Bond getBondToAtomOrThrow(Atom a) throws StructureBuildingException {
624
+ Bond b = getBondToAtom(a);
625
+ if(b == null){
626
+ throw new StructureBuildingException("Couldn't find specified bond");
627
+ }
628
+ return b;
629
+ }
630
+
631
+ /**
632
+ * Set the stereo group, ignored if the atom does not have any parity info.
633
+ * @param stroGrp the stereo group.
634
+ */
635
+ public void setStereoGroup(StereoGroup stroGrp) {
636
+ if (atomParity != null)
637
+ atomParity.setStereoGroup(stroGrp);
638
+ }
639
+
640
+ /**
641
+ * Access the stereo group on the atom parity info.
642
+ * @return the stereo group
643
+ */
644
+ public StereoGroup getStereoGroup() {
645
+ return atomParity != null ? atomParity.getStereoGroup() : StereoGroup.Unk;
646
+ }
647
+ }
TransAntivirus/download_pubchem/opsin-master/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/AtomParity.java ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ package uk.ac.cam.ch.wwmm.opsin;
2
+
3
+ /**
4
+ * Hold information about 4 atoms and their chiral determinant allowing the description of tetrahedral stereochemistry
5
+ * @author dl387
6
+ *
7
+ */
8
+ class AtomParity {
9
+ /**
10
+ * A dummy hydrogen atom. Used to represent an implicit hydrogen that is attached to a tetrahedral stereocentre
11
+ */
12
+ static final Atom hydrogen = new Atom(ChemEl.H);
13
+ /**
14
+ * A dummy hydrogen atom. Used to represent the hydrogen that replaced a hydroxy at a tetrahedral stereocentre
15
+ */
16
+ static final Atom deoxyHydrogen = new Atom(ChemEl.H);
17
+ private Atom[] atomRefs4;
18
+ private int parity;
19
+ private StereoGroup stereoGroup = StereoGroup.Abs;
20
+ private int stereoGroupNum = 1;
21
+
22
+ /**
23
+ * Create an atomParity from an array of 4 atoms and the parity of the chiral determinant
24
+ * @param atomRefs4
25
+ * @param parity
26
+ */
27
+ AtomParity(Atom[] atomRefs4, int parity){
28
+ if (atomRefs4.length !=4){
29
+ throw new IllegalArgumentException("atomRefs4 must contain references to 4 atoms");
30
+ }
31
+ this.atomRefs4 = atomRefs4;
32
+ this.parity = parity;
33
+ }
34
+
35
+ Atom[] getAtomRefs4() {
36
+ return atomRefs4;
37
+ }
38
+ void setAtomRefs4(Atom[] atomRefs4) {
39
+ this.atomRefs4 = atomRefs4;
40
+ }
41
+ int getParity() {
42
+ return parity;
43
+ }
44
+ void setParity(int parity) {
45
+ this.parity = parity;
46
+ }
47
+
48
+ public void setStereoGroup(StereoGroup stroGrp, int num) {
49
+ this.stereoGroup = stroGrp;
50
+ this.stereoGroupNum = num;
51
+ }
52
+
53
+ public void setStereoGroup(StereoGroup stroGrp) {
54
+ setStereoGroup(stroGrp, 1);
55
+ }
56
+
57
+ public StereoGroup getStereoGroup() {
58
+ return this.stereoGroup;
59
+ }
60
+
61
+ public int getStereoGroupNum() {
62
+ return this.stereoGroupNum;
63
+ }
64
+ }
TransAntivirus/download_pubchem/opsin-master/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/AtomProperties.java ADDED
@@ -0,0 +1,160 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ package uk.ac.cam.ch.wwmm.opsin;
2
+
3
+ import java.util.EnumMap;
4
+ import java.util.Map;
5
+
6
+ /**
7
+ * Holds useful atomic properties
8
+ * @author dl387
9
+ *
10
+ */
11
+ class AtomProperties {
12
+
13
+ private static final Map<ChemEl, Double> elementToPaulingElectronegativity = new EnumMap<>(ChemEl.class);
14
+ private static final Map<ChemEl, Integer> elementToHwPriority = new EnumMap<>(ChemEl.class);
15
+
16
+ static{
17
+ elementToPaulingElectronegativity.put(ChemEl.H, 2.20);
18
+ elementToPaulingElectronegativity.put(ChemEl.Li, 0.98);
19
+ elementToPaulingElectronegativity.put(ChemEl.Be, 1.57);
20
+ elementToPaulingElectronegativity.put(ChemEl.B, 2.04);
21
+ elementToPaulingElectronegativity.put(ChemEl.C, 2.55);
22
+ elementToPaulingElectronegativity.put(ChemEl.N, 3.04);
23
+ elementToPaulingElectronegativity.put(ChemEl.O, 3.44);
24
+ elementToPaulingElectronegativity.put(ChemEl.F, 3.98);
25
+ elementToPaulingElectronegativity.put(ChemEl.Na, 0.93);
26
+ elementToPaulingElectronegativity.put(ChemEl.Mg, 1.31);
27
+ elementToPaulingElectronegativity.put(ChemEl.Al, 1.61);
28
+ elementToPaulingElectronegativity.put(ChemEl.Si, 1.90);
29
+ elementToPaulingElectronegativity.put(ChemEl.P, 2.19);
30
+ elementToPaulingElectronegativity.put(ChemEl.S, 2.58);
31
+ elementToPaulingElectronegativity.put(ChemEl.Cl, 3.16);
32
+ elementToPaulingElectronegativity.put(ChemEl.K, 0.82);
33
+ elementToPaulingElectronegativity.put(ChemEl.Ca, 1.00);
34
+ elementToPaulingElectronegativity.put(ChemEl.Sc, 1.36);
35
+ elementToPaulingElectronegativity.put(ChemEl.Ti, 1.54);
36
+ elementToPaulingElectronegativity.put(ChemEl.V, 1.63);
37
+ elementToPaulingElectronegativity.put(ChemEl.Cr, 1.66);
38
+ elementToPaulingElectronegativity.put(ChemEl.Mn, 1.55);
39
+ elementToPaulingElectronegativity.put(ChemEl.Fe, 1.83);
40
+ elementToPaulingElectronegativity.put(ChemEl.Co, 1.88);
41
+ elementToPaulingElectronegativity.put(ChemEl.Ni, 1.91);
42
+ elementToPaulingElectronegativity.put(ChemEl.Cu, 1.90);
43
+ elementToPaulingElectronegativity.put(ChemEl.Zn, 1.65);
44
+ elementToPaulingElectronegativity.put(ChemEl.Ga, 1.81);
45
+ elementToPaulingElectronegativity.put(ChemEl.Ge, 2.01);
46
+ elementToPaulingElectronegativity.put(ChemEl.As, 2.18);
47
+ elementToPaulingElectronegativity.put(ChemEl.Se, 2.55);
48
+ elementToPaulingElectronegativity.put(ChemEl.Br, 2.96);
49
+ elementToPaulingElectronegativity.put(ChemEl.Kr, 3.00);
50
+ elementToPaulingElectronegativity.put(ChemEl.Rb, 0.82);
51
+ elementToPaulingElectronegativity.put(ChemEl.Sr, 0.95);
52
+ elementToPaulingElectronegativity.put(ChemEl.Y, 1.22);
53
+ elementToPaulingElectronegativity.put(ChemEl.Zr, 1.33);
54
+ elementToPaulingElectronegativity.put(ChemEl.Nb, 1.6);
55
+ elementToPaulingElectronegativity.put(ChemEl.Mo, 2.16);
56
+ elementToPaulingElectronegativity.put(ChemEl.Tc, 1.9);
57
+ elementToPaulingElectronegativity.put(ChemEl.Ru, 2.2);
58
+ elementToPaulingElectronegativity.put(ChemEl.Rh, 2.28);
59
+ elementToPaulingElectronegativity.put(ChemEl.Pd, 2.20);
60
+ elementToPaulingElectronegativity.put(ChemEl.Ag, 1.93);
61
+ elementToPaulingElectronegativity.put(ChemEl.Cd, 1.69);
62
+ elementToPaulingElectronegativity.put(ChemEl.In, 1.78);
63
+ elementToPaulingElectronegativity.put(ChemEl.Sn, 1.96);
64
+ elementToPaulingElectronegativity.put(ChemEl.Sb, 2.05);
65
+ elementToPaulingElectronegativity.put(ChemEl.Te, 2.1);
66
+ elementToPaulingElectronegativity.put(ChemEl.I, 2.66);
67
+ elementToPaulingElectronegativity.put(ChemEl.Xe, 2.60);
68
+ elementToPaulingElectronegativity.put(ChemEl.Cs, 0.79);
69
+ elementToPaulingElectronegativity.put(ChemEl.Ba, 0.89);
70
+ elementToPaulingElectronegativity.put(ChemEl.La, 1.1);
71
+ elementToPaulingElectronegativity.put(ChemEl.Ce, 1.12);
72
+ elementToPaulingElectronegativity.put(ChemEl.Pr, 1.13);
73
+ elementToPaulingElectronegativity.put(ChemEl.Nd, 1.14);
74
+ elementToPaulingElectronegativity.put(ChemEl.Pm, 1.13);
75
+ elementToPaulingElectronegativity.put(ChemEl.Sm, 1.17);
76
+ elementToPaulingElectronegativity.put(ChemEl.Eu, 1.2);
77
+ elementToPaulingElectronegativity.put(ChemEl.Gd, 1.2);
78
+ elementToPaulingElectronegativity.put(ChemEl.Tb, 1.1);
79
+ elementToPaulingElectronegativity.put(ChemEl.Dy, 1.22);
80
+ elementToPaulingElectronegativity.put(ChemEl.Ho, 1.23);
81
+ elementToPaulingElectronegativity.put(ChemEl.Er, 1.24);
82
+ elementToPaulingElectronegativity.put(ChemEl.Tm, 1.25);
83
+ elementToPaulingElectronegativity.put(ChemEl.Yb, 1.1);
84
+ elementToPaulingElectronegativity.put(ChemEl.Lu, 1.27);
85
+ elementToPaulingElectronegativity.put(ChemEl.Hf, 1.3);
86
+ elementToPaulingElectronegativity.put(ChemEl.Ta, 1.5);
87
+ elementToPaulingElectronegativity.put(ChemEl.W, 2.36);
88
+ elementToPaulingElectronegativity.put(ChemEl.Re, 1.9);
89
+ elementToPaulingElectronegativity.put(ChemEl.Os, 2.2);
90
+ elementToPaulingElectronegativity.put(ChemEl.Ir, 2.20);
91
+ elementToPaulingElectronegativity.put(ChemEl.Pt, 2.28);
92
+ elementToPaulingElectronegativity.put(ChemEl.Au, 2.54);
93
+ elementToPaulingElectronegativity.put(ChemEl.Hg, 2.00);
94
+ elementToPaulingElectronegativity.put(ChemEl.Tl, 1.62);
95
+ elementToPaulingElectronegativity.put(ChemEl.Pb, 2.33);
96
+ elementToPaulingElectronegativity.put(ChemEl.Bi, 2.02);
97
+ elementToPaulingElectronegativity.put(ChemEl.Po, 2.0);
98
+ elementToPaulingElectronegativity.put(ChemEl.At, 2.2);
99
+ elementToPaulingElectronegativity.put(ChemEl.Rn, 2.2);
100
+ elementToPaulingElectronegativity.put(ChemEl.Fr, 0.7);
101
+ elementToPaulingElectronegativity.put(ChemEl.Ra, 0.9);
102
+ elementToPaulingElectronegativity.put(ChemEl.Ac, 1.1);
103
+ elementToPaulingElectronegativity.put(ChemEl.Th, 1.3);
104
+ elementToPaulingElectronegativity.put(ChemEl.Pa, 1.5);
105
+ elementToPaulingElectronegativity.put(ChemEl.U, 1.38);
106
+ elementToPaulingElectronegativity.put(ChemEl.Np, 1.36);
107
+ elementToPaulingElectronegativity.put(ChemEl.Pu, 1.28);
108
+ elementToPaulingElectronegativity.put(ChemEl.Am, 1.13);
109
+ elementToPaulingElectronegativity.put(ChemEl.Cm, 1.28);
110
+ elementToPaulingElectronegativity.put(ChemEl.Bk, 1.3);
111
+ elementToPaulingElectronegativity.put(ChemEl.Cf, 1.3);
112
+ elementToPaulingElectronegativity.put(ChemEl.Es, 1.3);
113
+ elementToPaulingElectronegativity.put(ChemEl.Fm, 1.3);
114
+ elementToPaulingElectronegativity.put(ChemEl.Md, 1.3);
115
+ elementToPaulingElectronegativity.put(ChemEl.No, 1.3);
116
+ elementToPaulingElectronegativity.put(ChemEl.Lr, 1.3);
117
+
118
+ elementToHwPriority.put(ChemEl.F, 23);
119
+ elementToHwPriority.put(ChemEl.Cl, 22);
120
+ elementToHwPriority.put(ChemEl.Br, 21);
121
+ elementToHwPriority.put(ChemEl.I, 20);
122
+ elementToHwPriority.put(ChemEl.O, 19);
123
+ elementToHwPriority.put(ChemEl.S, 18);
124
+ elementToHwPriority.put(ChemEl.Se, 17);
125
+ elementToHwPriority.put(ChemEl.Te, 16);
126
+ elementToHwPriority.put(ChemEl.N, 15);
127
+ elementToHwPriority.put(ChemEl.P, 14);
128
+ elementToHwPriority.put(ChemEl.As, 13);
129
+ elementToHwPriority.put(ChemEl.Sb, 12);
130
+ elementToHwPriority.put(ChemEl.Bi, 11);
131
+ elementToHwPriority.put(ChemEl.Si, 10);
132
+ elementToHwPriority.put(ChemEl.Ge, 9);
133
+ elementToHwPriority.put(ChemEl.Sn, 8);
134
+ elementToHwPriority.put(ChemEl.Pb, 7);
135
+ elementToHwPriority.put(ChemEl.B, 6);
136
+ elementToHwPriority.put(ChemEl.Al, 5);
137
+ elementToHwPriority.put(ChemEl.Ga, 4);
138
+ elementToHwPriority.put(ChemEl.In, 3);
139
+ elementToHwPriority.put(ChemEl.Tl, 2);
140
+ elementToHwPriority.put(ChemEl.Hg, 1);
141
+ }
142
+
143
+ /**
144
+ * Useful to give an indication of whether a bond is like to be ionic (diff >1.8), polar or covalent (diff < 1.2)
145
+ * @param chemEl
146
+ * @return
147
+ */
148
+ static Double getPaulingElectronegativity(ChemEl chemEl) {
149
+ return elementToPaulingElectronegativity.get(chemEl);
150
+ }
151
+
152
+ /**
153
+ * Maps chemEl to the priority of that atom in Hantzch-Widman system. A higher value indicates a higher priority.
154
+ * @param chemEl
155
+ * @return
156
+ */
157
+ static Integer getHwpriority(ChemEl chemEl) {
158
+ return elementToHwPriority.get(chemEl);
159
+ }
160
+ }
TransAntivirus/download_pubchem/opsin-master/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/Attribute.java ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ package uk.ac.cam.ch.wwmm.opsin;
2
+
3
+ class Attribute {
4
+
5
+ private final String name;
6
+ private String value;
7
+
8
+ Attribute(String name, String value) {
9
+ this.name = name;
10
+ this.value = value;
11
+ }
12
+
13
+ /**
14
+ * Creates a copy
15
+ * @param attribute
16
+ */
17
+ Attribute(Attribute attribute) {
18
+ this.name = attribute.getName();
19
+ this.value = attribute.getValue();
20
+ }
21
+
22
+ String getValue() {
23
+ return value;
24
+ }
25
+
26
+ String getName() {
27
+ return name;
28
+ }
29
+
30
+ void setValue(String value) {
31
+ this.value = value;
32
+ }
33
+
34
+ String toXML() {
35
+ return getName() + "=\"" + escapeText(value) + "\"";
36
+ }
37
+
38
+ public String toString() {
39
+ return name +"\t" + value;
40
+ }
41
+
42
+ private String escapeText(String s) {
43
+ StringBuilder result = new StringBuilder();
44
+ for (int i = 0, l = s.length(); i < l; i++) {
45
+ char c = s.charAt(i);
46
+ switch (c) {
47
+ case '\t':
48
+ result.append("&#x09;");
49
+ break;
50
+ case '\n':
51
+ result.append("&#x0A;");
52
+ break;
53
+ case '\r':
54
+ result.append("&#x0D;");
55
+ break;
56
+ case '"':
57
+ result.append("&quot;");
58
+ break;
59
+ case '&':
60
+ result.append("&amp;");
61
+ break;
62
+ case '<':
63
+ result.append("&lt;");
64
+ break;
65
+ case '>':
66
+ result.append("&gt;");
67
+ break;
68
+ default:
69
+ result.append(c);
70
+ }
71
+ }
72
+ return result.toString();
73
+ }
74
+ }
TransAntivirus/download_pubchem/opsin-master/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/AutomatonInitialiser.java ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ package uk.ac.cam.ch.wwmm.opsin;
2
+
3
+ import java.io.BufferedInputStream;
4
+ import java.io.IOException;
5
+ import java.io.InputStream;
6
+ import java.io.OutputStream;
7
+ import java.nio.charset.StandardCharsets;
8
+
9
+ import org.apache.logging.log4j.LogManager;
10
+ import org.apache.logging.log4j.Logger;
11
+
12
+ import dk.brics.automaton.Automaton;
13
+ import dk.brics.automaton.RegExp;
14
+ import dk.brics.automaton.RunAutomaton;
15
+ import dk.brics.automaton.SpecialOperations;
16
+
17
+ /**
18
+ * Handles storing and retrieving automata to/from files
19
+ * This is highly useful to do as building these deterministic automata from scratch can take minutes
20
+ * @author dl387
21
+ *
22
+ */
23
+ class AutomatonInitialiser {
24
+
25
+ private static final Logger LOG = LogManager.getLogger(AutomatonInitialiser.class);
26
+ private final ResourceGetter resourceGetter;
27
+
28
+ AutomatonInitialiser(String resourcePath) {
29
+ resourceGetter = new ResourceGetter(resourcePath);
30
+ }
31
+
32
+ /**
33
+ * In preference serialised automata and their hashes will be looked for in the resource folder in your working directory
34
+ * If it cannot be found there then these files will be looked for in the standard resource folder
35
+ * (this is actually the standard behaviour of the resourceGetter but I'm reiterating it here as if the stored hash doesn't match
36
+ * the current hash then the creation of an updated serialised automaton and hash will occur in the working directory resource folder as the standard
37
+ * resource folder will not typically be writable)
38
+ * @param automatonName : A name for the automaton so that it can it can be saved/loaded from disk
39
+ * @param regex : the regex from which to build the RunAutomaton
40
+ * @param reverseAutomaton : should the automaton be reversed
41
+ * @param tableize: if true, a transition table is created which makes the run method faster in return of a higher memory usage (adds ~256kb)
42
+ * @return A RunAutomaton, may have been built from scratch or loaded from a file
43
+ */
44
+ RunAutomaton loadAutomaton(String automatonName, String regex, boolean tableize, boolean reverseAutomaton) {
45
+ if (reverseAutomaton){
46
+ automatonName+="_reversed_";
47
+ }
48
+ try{
49
+ if (isAutomatonCached(automatonName, regex)) {
50
+ return loadCachedAutomaton(automatonName);
51
+ }
52
+ }
53
+ catch (IOException e) {
54
+ LOG.warn("Error loading cached automaton: "+automatonName, e);
55
+ }
56
+ RunAutomaton automaton = createAutomaton(regex, tableize, reverseAutomaton);
57
+ cacheAutomaton(automatonName, automaton, regex);
58
+ return automaton;
59
+ }
60
+
61
+ private boolean isAutomatonCached(String automatonName, String regex) {
62
+ String currentRegexHash = getRegexHash(regex);
63
+ String cachedRegexHash = getCachedRegexHash(automatonName);
64
+ return currentRegexHash.equals(cachedRegexHash);
65
+ }
66
+
67
+ private String getRegexHash(String regex) {
68
+ return Integer.toString(regex.hashCode());
69
+ }
70
+
71
+ private String getCachedRegexHash(String automatonName) {
72
+ /*This file contains the hashcode of the regex which was used to generate the automaton on the disk */
73
+ return resourceGetter.getFileContentsAsString(automatonName + "RegexHash.txt");
74
+ }
75
+
76
+ private RunAutomaton loadCachedAutomaton(String automatonName) throws IOException{
77
+ try (InputStream automatonInput = resourceGetter.getInputstreamFromFileName(automatonName +"SerialisedAutomaton.aut")){
78
+ return RunAutomaton.load(new BufferedInputStream(automatonInput));
79
+ } catch (Exception e) {
80
+ IOException ioe = new IOException("Error loading automaton");
81
+ ioe.initCause(e);
82
+ throw ioe;
83
+ }
84
+ }
85
+
86
+ private static RunAutomaton createAutomaton(String regex, boolean tableize, boolean reverseAutomaton) {
87
+ Automaton a = new RegExp(regex).toAutomaton();
88
+ if (reverseAutomaton){
89
+ SpecialOperations.reverse(a);
90
+ }
91
+ return new RunAutomaton(a, tableize);
92
+ }
93
+
94
+ private void cacheAutomaton(String automatonName, RunAutomaton automaton, String regex) {
95
+ try (OutputStream regexHashOutputStream = resourceGetter.getOutputStream(automatonName + "RegexHash.txt")) {
96
+ regexHashOutputStream.write(getRegexHash(regex).getBytes(StandardCharsets.UTF_8));
97
+ try (OutputStream automatonOutputStream = resourceGetter.getOutputStream(automatonName + "SerialisedAutomaton.aut")) {
98
+ automaton.store(automatonOutputStream);
99
+ }
100
+ } catch (IOException e) {
101
+ LOG.warn("Error serialising automaton: "+automatonName, e);
102
+ }
103
+ }
104
+
105
+ }
TransAntivirus/download_pubchem/opsin-master/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/Bond.java ADDED
@@ -0,0 +1,184 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ package uk.ac.cam.ch.wwmm.opsin;
2
+
3
+ import uk.ac.cam.ch.wwmm.opsin.BondStereo.BondStereoValue;
4
+
5
+ /**A bond, between two atoms.
6
+ *
7
+ * @author ptc24
8
+ * @author dl387
9
+ *
10
+ */
11
+ class Bond {
12
+ /** The Atom the bond comes from */
13
+ private final Atom from;
14
+ /** The Atom the bond goes to */
15
+ private final Atom to;
16
+ /** The bond order */
17
+ private int order;
18
+
19
+ static enum SMILES_BOND_DIRECTION{
20
+ RSLASH,
21
+ LSLASH
22
+ }
23
+ /** If this bond was built from SMILES can be set to either RSLASH or LSLASH. Subsequently read to add a bondStereoElement
24
+ * null by default*/
25
+ private SMILES_BOND_DIRECTION smilesBondDirection = null;
26
+
27
+ /**
28
+ * Holds the bondStereo object associated with this bond
29
+ * null by default
30
+ */
31
+ private BondStereo bondStereo = null;
32
+
33
+ /** DO NOT CALL DIRECTLY EXCEPT FOR TESTING
34
+ * Creates a new Bond.
35
+ *
36
+ * @param from The Atom the bond comes from.
37
+ * @param to The Atom the bond goes to.
38
+ * @param order The bond order.
39
+ */
40
+ Bond(Atom from, Atom to, int order) {
41
+ if (from == to){
42
+ throw new IllegalArgumentException("Bonds must be made between different atoms");
43
+ }
44
+ if (order < 1 || order > 3){
45
+ throw new IllegalArgumentException("Bond order must be 1, 2 or 3");
46
+ }
47
+ if (from == null){
48
+ throw new IllegalArgumentException("From atom was null!");
49
+ }
50
+ if (to == null){
51
+ throw new IllegalArgumentException("To atom was null!");
52
+ }
53
+ this.from = from;
54
+ this.to = to;
55
+ this.order = order;
56
+ }
57
+
58
+ /**
59
+ * Gets from ID
60
+ * @return ID
61
+ */
62
+ int getFrom() {
63
+ return from.getID();
64
+ }
65
+
66
+ /**
67
+ * Gets to ID
68
+ * @return ID
69
+ */
70
+ int getTo() {
71
+ return to.getID();
72
+ }
73
+
74
+ /**Gets order.
75
+ * @return*/
76
+ int getOrder() {
77
+ return order;
78
+ }
79
+
80
+ /**Sets order.
81
+ * @param order*/
82
+ void setOrder(int order) {
83
+ this.order = order;
84
+ }
85
+
86
+ /**
87
+ * Gets from Atom
88
+ * @return Atom
89
+ */
90
+ Atom getFromAtom() {
91
+ return from;
92
+ }
93
+
94
+ /**
95
+ * Gets to Atom
96
+ * @return Atom
97
+ */
98
+ Atom getToAtom() {
99
+ return to;
100
+ }
101
+
102
+ /**Adds to the bond order.
103
+ *
104
+ * @param o The value to be added to the bond order.
105
+ */
106
+ void addOrder(int o) {
107
+ order += o;
108
+ }
109
+
110
+ /**
111
+ * Returns either null or RSLASH or LSLASH
112
+ * @return
113
+ */
114
+ SMILES_BOND_DIRECTION getSmilesStereochemistry() {
115
+ return smilesBondDirection;
116
+ }
117
+
118
+ void setSmilesStereochemistry(SMILES_BOND_DIRECTION bondDirection) {
119
+ this.smilesBondDirection = bondDirection;
120
+ }
121
+
122
+ BondStereo getBondStereo() {
123
+ return bondStereo;
124
+ }
125
+
126
+ void setBondStereo(BondStereo bondStereo) {
127
+ this.bondStereo = bondStereo;
128
+ }
129
+
130
+ void setBondStereoElement(Atom[] atomRefs4, BondStereoValue cOrT) {
131
+ bondStereo = new BondStereo(atomRefs4, cOrT);
132
+ }
133
+
134
+ /**
135
+ * Returns the atom at the other end of the bond to given atom
136
+ * @param atom
137
+ * @return
138
+ */
139
+ Atom getOtherAtom(Atom atom) {
140
+ if (from == atom){
141
+ return to;
142
+ }
143
+ else if (to == atom){
144
+ return from;
145
+ }
146
+ else{
147
+ return null;
148
+ }
149
+ }
150
+
151
+ @Override
152
+ public int hashCode() {
153
+ final int prime = 31;
154
+ int result = 1;
155
+ result = prime * result + from.getID();
156
+ result = prime * result + to.getID();
157
+ return result;
158
+ }
159
+
160
+ @Override
161
+ public boolean equals(Object obj) {
162
+ if (this == obj) {
163
+ return true;
164
+ }
165
+ if (obj == null) {
166
+ return false;
167
+ }
168
+ if (getClass() != obj.getClass()) {
169
+ return false;
170
+ }
171
+ Bond other = (Bond) obj;
172
+
173
+ if (from == other.from &&
174
+ to == other.to){
175
+ return true;
176
+ }
177
+ if (from == other.to &&
178
+ to == other.from){
179
+ return true;
180
+ }
181
+
182
+ return false;
183
+ }
184
+ }
TransAntivirus/download_pubchem/opsin-master/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/BondStereo.java ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ package uk.ac.cam.ch.wwmm.opsin;
2
+
3
+ /**
4
+ * Holds information about the positions of 2 atoms relative to a double bond allowing the specification of cis/trans stereochemistry
5
+ * @author dl387
6
+ *
7
+ */
8
+ class BondStereo {
9
+
10
+ private Atom[] atomRefs4;
11
+ private BondStereoValue bondStereoValue;
12
+
13
+ /**
14
+ * Possible values for a bondStereo element
15
+ * @author dl387
16
+ *
17
+ */
18
+ enum BondStereoValue{
19
+ CIS("C"),
20
+ TRANS("T");
21
+
22
+ private final String value;
23
+ BondStereoValue(String value){
24
+ this.value = value;
25
+ }
26
+ @Override
27
+ public String toString() {
28
+ return value;
29
+ }
30
+ }
31
+
32
+ /**
33
+ * Create a bondStereo from an array of 4 atoms. The 2nd and 3rd atoms of this array are connected via a double bond.
34
+ * The 1st and 4th atoms are at either end of this bond and indication is given as to whether they are cis or trans to each other.
35
+ * @param atomRefs4
36
+ * @param cOrT
37
+ */
38
+ BondStereo(Atom[] atomRefs4, BondStereoValue cOrT) {
39
+ if (atomRefs4.length !=4){
40
+ throw new IllegalArgumentException("atomRefs4 must contain references to 4 atoms");
41
+ }
42
+ this.atomRefs4 = atomRefs4;
43
+ this.bondStereoValue = cOrT;
44
+ }
45
+
46
+ Atom[] getAtomRefs4() {
47
+ return atomRefs4;
48
+ }
49
+ void setAtomRefs4(Atom[] atomRefs4) {
50
+ this.atomRefs4 = atomRefs4;
51
+ }
52
+ BondStereoValue getBondStereoValue() {
53
+ return bondStereoValue;
54
+ }
55
+ void setBondStereoValue(BondStereoValue bondStereoValue) {
56
+ this.bondStereoValue = bondStereoValue;
57
+ }
58
+ }
TransAntivirus/download_pubchem/opsin-master/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/BuildResults.java ADDED
@@ -0,0 +1,150 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ package uk.ac.cam.ch.wwmm.opsin;
2
+
3
+ import java.util.ArrayList;
4
+ import java.util.Collections;
5
+ import java.util.LinkedHashSet;
6
+ import java.util.List;
7
+ import java.util.Set;
8
+
9
+ /**
10
+ * A "struct" to hold the results of fragment building.
11
+ * @author dl387
12
+ *
13
+ */
14
+ class BuildResults {
15
+ /**Holds the atoms that are currently marked as radicals. An atom may be listed twice for say diyl
16
+ * Typically these will be utilised by a word rule e.g. the ethyl of ethyl ethanoate has one
17
+ * Also holds the order of the bond that will be created when it is used (valency)
18
+ * setExplicitly says whether the outAtom absolutely definitely refers to that atom or not.
19
+ * e.g. propyl is stored as prop-1-yl with this set to false while prop-2-yl has it set to true
20
+ * These OutAtoms are the same objects as are present in the fragments*/
21
+ private final List<OutAtom> outAtoms = new ArrayList<>();
22
+
23
+ /**The atoms that may be used to from things like esters*/
24
+ private final List<FunctionalAtom> functionalAtoms = new ArrayList<>();
25
+
26
+ /**A list of fragments that have been evaluated to form this BuildResults. They are in the order they would be found in the XML*/
27
+ private final Set<Fragment> fragments = new LinkedHashSet<>();
28
+
29
+ /**A BuildResults is constructed from a list of Fragments.
30
+ * This constructor creates this list from the groups present in an XML word/bracket/sub element.
31
+ * @param wordSubOrBracket*/
32
+ BuildResults(Element wordSubOrBracket) {
33
+ List<Element> groups = OpsinTools.getDescendantElementsWithTagName(wordSubOrBracket, XmlDeclarations.GROUP_EL);
34
+ for (Element group : groups) {
35
+ Fragment frag = group.getFrag();
36
+ fragments.add(frag);
37
+ for (int i = 0, l = frag.getOutAtomCount(); i < l; i++) {
38
+ outAtoms.add(frag.getOutAtom(i));
39
+ }
40
+ int functionalAtomCount = frag.getFunctionalAtomCount();
41
+ if (functionalAtomCount > 0){
42
+ Element parent = group.getParent();
43
+ if (parent.getName().equals(XmlDeclarations.ROOT_EL) ||
44
+ OpsinTools.getNextGroup(group) == null) {
45
+ for (int i = 0; i < functionalAtomCount; i++) {
46
+ functionalAtoms.add(frag.getFunctionalAtom(i));
47
+ }
48
+ }
49
+ }
50
+ }
51
+ }
52
+
53
+ /**
54
+ * Construct a blank buildResults
55
+ */
56
+ BuildResults() {}
57
+
58
+ /**
59
+ * Returns a read only view of the fragments in this BuildResults
60
+ * @return
61
+ */
62
+ Set<Fragment> getFragments(){
63
+ return Collections.unmodifiableSet(fragments);
64
+ }
65
+
66
+ int getFragmentCount(){
67
+ return fragments.size();
68
+ }
69
+
70
+ OutAtom getOutAtom(int i) {
71
+ return outAtoms.get(i);
72
+ }
73
+
74
+ int getOutAtomCount() {
75
+ return outAtoms.size();
76
+ }
77
+
78
+ OutAtom removeOutAtom(int i) {
79
+ OutAtom outAtom = outAtoms.get(i);
80
+ outAtom.getAtom().getFrag().removeOutAtom(outAtom);
81
+ return outAtoms.remove(i);
82
+ }
83
+
84
+ void removeAllOutAtoms() {
85
+ for (int i = outAtoms.size() -1; i >=0 ; i--) {
86
+ removeOutAtom(i);
87
+ }
88
+ }
89
+
90
+ /**
91
+ * Returns the atom corresponding to position i in the functionalAtoms list
92
+ * @param i index
93
+ * @return atom
94
+ */
95
+ Atom getFunctionalAtom(int i) {
96
+ return functionalAtoms.get(i).getAtom();
97
+ }
98
+
99
+ FunctionalAtom removeFunctionalAtom(int i) {
100
+ FunctionalAtom functionalAtom = functionalAtoms.get(i);
101
+ functionalAtom.getAtom().getFrag().removeFunctionalAtom(functionalAtom);
102
+ return functionalAtoms.remove(i);
103
+ }
104
+
105
+ int getFunctionalAtomCount(){
106
+ return functionalAtoms.size();
107
+ }
108
+
109
+ /**
110
+ * Returns the first OutAtom
111
+ * @return OutAtom
112
+ */
113
+ OutAtom getFirstOutAtom() {
114
+ return outAtoms.get(0);
115
+ }
116
+
117
+ /**
118
+ * Returns the atom corresponding to the given id assuming the atom the id corresponds to is within the list of fragment in this Buildresults
119
+ * @param id index
120
+ * @return atom
121
+ * @throws StructureBuildingException
122
+ */
123
+ Atom getAtomByIdOrThrow(int id) throws StructureBuildingException {
124
+ for (Fragment fragment : fragments) {
125
+ Atom outAtom =fragment.getAtomByID(id);
126
+ if (outAtom != null){
127
+ return outAtom;
128
+ }
129
+ }
130
+ throw new StructureBuildingException("No fragment contained this id: " + id);
131
+ }
132
+
133
+ void mergeBuildResults(BuildResults otherBR) {
134
+ outAtoms.addAll(otherBR.outAtoms);
135
+ functionalAtoms.addAll(otherBR.functionalAtoms);
136
+ fragments.addAll(otherBR.fragments);
137
+ }
138
+
139
+ /**
140
+ * Returns the sum of the charges of the fragments in the buildResults
141
+ * @return
142
+ */
143
+ int getCharge() {
144
+ int totalCharge = 0;
145
+ for (Fragment frag : fragments) {
146
+ totalCharge += frag.getCharge();
147
+ }
148
+ return totalCharge;
149
+ }
150
+ }
TransAntivirus/download_pubchem/opsin-master/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/BuildState.java ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ package uk.ac.cam.ch.wwmm.opsin;
2
+
3
+ import java.util.ArrayList;
4
+ import java.util.HashMap;
5
+ import java.util.List;
6
+
7
+ import uk.ac.cam.ch.wwmm.opsin.OpsinWarning.OpsinWarningType;
8
+
9
+ /**
10
+ * Used to pass the current configuration and FragmentManager around
11
+ * The currentWordRule can be mutated to keep track of what the parent wordRule is at the given time
12
+ *
13
+ * @author dl387
14
+ *
15
+ */
16
+ class BuildState {
17
+
18
+ final FragmentManager fragManager;
19
+ final HashMap<Element, List<Fragment>> xmlSuffixMap;
20
+ final NameToStructureConfig n2sConfig;
21
+ // counter is used for DL- racemic stereochemistry in oligomers, we place each one in a separate racemic group,
22
+ // there is implicitly one group in-case the input has a combination of (RS)- and then DL-
23
+ int numRacGrps = 1;
24
+ private final List<OpsinWarning> warnings = new ArrayList<>();
25
+
26
+ WordRule currentWordRule = null;
27
+
28
+ BuildState(NameToStructureConfig n2sConfig) {
29
+ this.n2sConfig = n2sConfig;
30
+ IDManager idManager = new IDManager();
31
+ fragManager = new FragmentManager(new SMILESFragmentBuilder(idManager), idManager);
32
+ xmlSuffixMap = new HashMap<>();
33
+ }
34
+
35
+ List<OpsinWarning> getWarnings() {
36
+ return warnings;
37
+ }
38
+
39
+ void addWarning(OpsinWarningType type, String message) {
40
+ warnings.add(new OpsinWarning(type, message));
41
+ }
42
+
43
+ void addIsAmbiguous(String message) {
44
+ warnings.add(new OpsinWarning(OpsinWarningType.APPEARS_AMBIGUOUS, message));
45
+ }
46
+ }
TransAntivirus/download_pubchem/opsin-master/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/CASTools.java ADDED
@@ -0,0 +1,248 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ package uk.ac.cam.ch.wwmm.opsin;
2
+
3
+ import java.util.ArrayList;
4
+ import java.util.Arrays;
5
+ import java.util.List;
6
+ import java.util.regex.Matcher;
7
+ import java.util.regex.Pattern;
8
+
9
+ /**
10
+ * Tools for converting CAS nomenclature into IUPAC nomenclature.
11
+ * @author dl387
12
+ */
13
+ class CASTools {
14
+
15
+ private static final Pattern matchCasCollectiveIndex = Pattern.compile("([\\[\\(\\{]([1-9][0-9]?[cC][iI][, ]?)+[\\]\\)\\}])+|[1-9][0-9]?[cC][iI]", Pattern.CASE_INSENSITIVE);
16
+ private static final Pattern matchAcid = Pattern.compile("acid[\\]\\)\\}]*", Pattern.CASE_INSENSITIVE);
17
+ private static final Pattern matchCommaSpace = Pattern.compile(", ");
18
+ private static final Pattern matchCompoundWithPhrase = Pattern.compile("(compd\\. with|compound with|and) ", Pattern.CASE_INSENSITIVE);
19
+ private static final Pattern matchFunctionalTermAllowingSubstituentPrefix = Pattern.compile("(amide|hydrazide|(thi|selen|tellur)?oxime|hydrazone|(iso)?(semicarbazone|thiosemicarbazone|selenosemicarbazone|tellurosemicarbazone)|imide|imine|semioxamazone)[\\]\\)\\}]*", Pattern.CASE_INSENSITIVE);
20
+
21
+ /**
22
+ * Inverts a CAS name.
23
+ * Throws an exception is OPSIN is unable to determine whether something is a substituent or functional term
24
+ * or if something unexpected in a CAS name is encountered
25
+ * @param name
26
+ * @return
27
+ * @throws ParsingException
28
+ */
29
+ static String uninvertCASName(String name, ParseRules parseRules) throws ParsingException {
30
+ List<String> nameComponents = new ArrayList<>(Arrays.asList(matchCommaSpace.split(name)));
31
+ List<String> substituents = new ArrayList<>();
32
+ List<String> seperateWordSubstituents = new ArrayList<>();
33
+ List<String> functionalTerms = new ArrayList<>();
34
+
35
+ String parent = nameComponents.get(0);
36
+ String[] parentNameParts = parent.split(" ");
37
+ if (parentNameParts.length != 1) {
38
+ if (matchCasCollectiveIndex.matcher(parentNameParts[parentNameParts.length - 1]).matches()) {//CAS collective index description should be ignored
39
+ StringBuilder parentSB = new StringBuilder();
40
+ for (int i = 0; i < parentNameParts.length - 1; i++) {
41
+ parentSB.append(parentNameParts[i]);
42
+ }
43
+ parent = parentSB.toString();
44
+ parentNameParts = parent.split(" ");
45
+ }
46
+ for (int i = 1; i < parentNameParts.length; i++) {
47
+ if (!matchAcid.matcher(parentNameParts[i]).matches()) {
48
+ ParseRulesResults results = parseRules.getParses(parentNameParts[i]);
49
+ List<ParseTokens> parseTokens = results.getParseTokensList();
50
+ if (parseTokens.isEmpty()) {
51
+ throw new ParsingException("Invalid CAS name. Parent compound was followed by an unexpected term");
52
+ }
53
+ }
54
+ }
55
+ }
56
+ boolean addedBracket = false;
57
+ boolean esterEncountered = false;
58
+ for (int i = 1; i < nameComponents.size(); i++) {
59
+ String nameComponent = nameComponents.get(i);
60
+ Matcher m = matchCompoundWithPhrase.matcher(nameComponent);
61
+ boolean compoundWithcomponent = false;
62
+ if (m.lookingAt()) {
63
+ nameComponent = nameComponent.substring(m.group().length());
64
+ compoundWithcomponent = true;
65
+ }
66
+ String[] components = nameComponents.get(i).split(" ");
67
+ for (int c = 0, componentLen = components.length; c < componentLen; c++) {
68
+ String component = components[c];
69
+ if (compoundWithcomponent) {
70
+ functionalTerms.add(component);
71
+ continue;
72
+ }
73
+ if (component.endsWith("-")) {
74
+ Character missingCloseBracket = missingCloseBracketCharIfApplicable(component);
75
+ if (missingCloseBracket !=null) {
76
+ if (addedBracket) {
77
+ throw new ParsingException("Close bracket appears to be missing");
78
+ }
79
+ parent += missingCloseBracket;
80
+ addedBracket = true;
81
+ }
82
+ substituents.add(component);
83
+ } else {
84
+ ParseRulesResults results = parseRules.getParses(component);
85
+ List<ParseTokens> parseTokens = results.getParseTokensList();
86
+ if (parseTokens.size() > 0) {
87
+ List<ParseWord> parseWords = WordTools.splitIntoParseWords(parseTokens, component);
88
+
89
+ List<ParseTokens> firstParseWordTokens = parseWords.get(0).getParseTokens();
90
+ WordType firstWordType = OpsinTools.determineWordType(firstParseWordTokens.get(0).getAnnotations());
91
+ for (int j = 1; j < firstParseWordTokens.size(); j++) {
92
+ if (!firstWordType.equals(OpsinTools.determineWordType(firstParseWordTokens.get(j).getAnnotations()))) {
93
+ throw new ParsingException(component + "can be interpreted in multiple ways. For the sake of precision OPSIN has decided not to process this as a CAS name");
94
+ }
95
+ }
96
+
97
+ if (parseWords.size() == 1) {
98
+ switch (firstWordType) {
99
+ case functionalTerm:
100
+ if (component.equalsIgnoreCase("ester")) {
101
+ if (seperateWordSubstituents.size() ==0){
102
+ throw new ParsingException("ester encountered but no substituents were specified in potential CAS name!");
103
+ }
104
+ if (esterEncountered) {
105
+ throw new ParsingException("ester formation was mentioned more than once in CAS name!");
106
+ }
107
+ parent = uninvertEster(parent);
108
+ esterEncountered = true;
109
+ } else {
110
+ functionalTerms.add(component);
111
+ }
112
+ break;
113
+ case substituent:
114
+ seperateWordSubstituents.add(component);
115
+ break;
116
+ case full:
117
+ if (StringTools.endsWithCaseInsensitive(component, "ate") || StringTools.endsWithCaseInsensitive(component, "ite")//e.g. Piperazinium, 1,1-dimethyl-, 2,2,2-trifluoroacetate hydrochloride
118
+ || StringTools.endsWithCaseInsensitive(component, "ium")
119
+ || StringTools.endsWithCaseInsensitive(component, "hydrofluoride") || StringTools.endsWithCaseInsensitive(component, "hydrochloride")
120
+ || StringTools.endsWithCaseInsensitive(component, "hydrobromide") || StringTools.endsWithCaseInsensitive(component, "hydroiodide")) {
121
+ functionalTerms.add(component);
122
+ } else if (StringTools.endsWithCaseInsensitive(component, "ic") && c + 1 < componentLen && components[c + 1].equalsIgnoreCase("acid")) {
123
+ functionalTerms.add(component);
124
+ functionalTerms.add(components[++c]);
125
+ } else {
126
+ throw new ParsingException("Unable to interpret: " + component + " (as part of a CAS index name)- A full word was encountered where a substituent or functionalTerm was expected");
127
+ }
128
+ break;
129
+ default:
130
+ throw new ParsingException("Unrecognised CAS index name form");
131
+ }
132
+ }
133
+ else if (parseWords.size() == 2 && firstWordType.equals(WordType.substituent)) {
134
+ //could be something like O-methyloxime which is parsed as [O-methyl] [oxime]
135
+ List<ParseTokens> secondParseWordTokens = parseWords.get(1).getParseTokens();
136
+ WordType secondWordType = OpsinTools.determineWordType(secondParseWordTokens.get(0).getAnnotations());
137
+ for (int j = 1; j < secondParseWordTokens.size(); j++) {
138
+ if (!secondWordType.equals(OpsinTools.determineWordType(secondParseWordTokens.get(j).getAnnotations()))) {
139
+ throw new ParsingException(component + "can be interpreted in multiple ways. For the sake of precision OPSIN has decided not to process this as a CAS name");
140
+ }
141
+ }
142
+ if (secondWordType.equals(WordType.functionalTerm) &&
143
+ matchFunctionalTermAllowingSubstituentPrefix.matcher(parseWords.get(1).getWord()).matches()){
144
+ functionalTerms.add(component);
145
+ }
146
+ else{
147
+ throw new ParsingException("Unrecognised CAS index name form, could have a missing space?");
148
+ }
149
+ }
150
+ else {
151
+ throw new ParsingException("Unrecognised CAS index name form");
152
+ }
153
+ } else {
154
+ if (!matchCasCollectiveIndex.matcher(component).matches()) {//CAS collective index description should be ignored
155
+ throw new ParsingException("Unable to interpret: " + component + " (as part of a CAS index name)");
156
+ }
157
+ }
158
+ }
159
+ }
160
+ }
161
+ StringBuilder casName = new StringBuilder();
162
+ for (String prefixFunctionalTerm : seperateWordSubstituents) {
163
+ casName.append(prefixFunctionalTerm);
164
+ casName.append(" ");
165
+ }
166
+ for (int i = substituents.size() - 1; i >= 0; i--) {
167
+ //stereochemistry term comes after substituent term. In older CAS names (9CI) this stereochemistry term can apply to the substituent term. Hence append in reverse order
168
+ casName.append(substituents.get(i));
169
+ }
170
+ casName.append(parent);
171
+ for (String functionalTerm : functionalTerms) {
172
+ casName.append(" ");
173
+ casName.append(functionalTerm);
174
+ }
175
+ return casName.toString();
176
+ }
177
+
178
+ private static Character missingCloseBracketCharIfApplicable(String component) {
179
+ int bracketLevel =0;
180
+ Character missingCloseBracket =null;
181
+ for (int i = 0, l = component.length(); i < l; i++) {
182
+ char character = component.charAt(i);
183
+ if (character == '(' || character == '[' || character == '{') {
184
+ bracketLevel++;
185
+ if (bracketLevel ==1){
186
+ missingCloseBracket = character;
187
+ }
188
+ }
189
+ if (character == ')' || character == ']' || character == '}') {
190
+ bracketLevel--;
191
+ if (bracketLevel<0){
192
+ return null;
193
+ }
194
+ }
195
+ }
196
+ if (bracketLevel == 1){
197
+ if (missingCloseBracket == '('){
198
+ return ')';
199
+ }
200
+ if (missingCloseBracket == '['){
201
+ return ']';
202
+ }
203
+ if (missingCloseBracket == '{'){
204
+ return '}';
205
+ }
206
+ }
207
+ return null;
208
+ }
209
+
210
+ /**
211
+ * Modifies the name of the parent acid from ic to ate (or ous to ite)
212
+ * hence allowing the formation of the uninverted ester
213
+ * @param parent
214
+ * @return
215
+ * @throws ParsingException
216
+ */
217
+ private static String uninvertEster(String parent) throws ParsingException {
218
+ int len = parent.length();
219
+ if (len == 0) {
220
+ throw new ParsingException("Failed to uninvert CAS ester");
221
+ }
222
+ char lastChar = parent.charAt(len - 1);
223
+ if (lastChar == ')') {
224
+ if (StringTools.endsWithCaseInsensitive(parent, "ic acid)")) {
225
+ parent = parent.substring(0, parent.length() - 8) + "ate)";
226
+ } else if (StringTools.endsWithCaseInsensitive(parent, "ous acid)")) {
227
+ parent = parent.substring(0, parent.length() - 9) + "ite)";
228
+ } else if (StringTools.endsWithCaseInsensitive(parent, "ine)")){//amino acid
229
+ parent = parent.substring(0, parent.length() - 2) + "ate)";
230
+ }
231
+ else{
232
+ throw new ParsingException("Failed to uninvert CAS ester");
233
+ }
234
+ } else {
235
+ if (StringTools.endsWithCaseInsensitive(parent, "ic acid")) {
236
+ parent = parent.substring(0, parent.length() - 7) + "ate";
237
+ } else if (StringTools.endsWithCaseInsensitive(parent, "ous acid")) {
238
+ parent = parent.substring(0, parent.length() - 8) + "ite";
239
+ } else if (StringTools.endsWithCaseInsensitive(parent, "ine")){//amino acid
240
+ parent = parent.substring(0, parent.length() - 1) + "ate";
241
+ }
242
+ else{
243
+ throw new ParsingException("Failed to uninvert CAS ester");
244
+ }
245
+ }
246
+ return parent;
247
+ }
248
+ }
TransAntivirus/download_pubchem/opsin-master/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/CMLWriter.java ADDED
@@ -0,0 +1,217 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ package uk.ac.cam.ch.wwmm.opsin;
2
+
3
+ import java.io.ByteArrayOutputStream;
4
+ import java.io.UnsupportedEncodingException;
5
+ import java.util.List;
6
+
7
+ import javax.xml.stream.XMLOutputFactory;
8
+ import javax.xml.stream.XMLStreamException;
9
+ import javax.xml.stream.XMLStreamWriter;
10
+
11
+ import com.ctc.wstx.api.WstxOutputProperties;
12
+ import com.ctc.wstx.stax.WstxOutputFactory;
13
+
14
+ class CMLWriter {
15
+ /**
16
+ * CML Elements/Attributes/NameSpace
17
+ */
18
+ static final String CML_NAMESPACE = "http://www.xml-cml.org/schema";
19
+
20
+ private static final XMLOutputFactory factory = new WstxOutputFactory();
21
+ static {
22
+ factory.setProperty(WstxOutputProperties.P_OUTPUT_ESCAPE_CR, false);
23
+ }
24
+
25
+ /**The XML writer*/
26
+ private final XMLStreamWriter writer;
27
+
28
+ /**
29
+ * Creates a CML writer for the given fragment
30
+ * @param writer
31
+
32
+ */
33
+ CMLWriter(XMLStreamWriter writer) {
34
+ this.writer = writer;
35
+ }
36
+
37
+ static String generateCml(Fragment structure, String chemicalName) {
38
+ return generateCml(structure, chemicalName, false);
39
+ }
40
+
41
+ static String generateIndentedCml(Fragment structure, String chemicalName) {
42
+ return generateCml(structure, chemicalName, true);
43
+ }
44
+
45
+ private static String generateCml(Fragment structure, String chemicalName, boolean indent) {
46
+ ByteArrayOutputStream out = new ByteArrayOutputStream();
47
+ try {
48
+ XMLStreamWriter xmlWriter = factory.createXMLStreamWriter(out, "UTF-8");
49
+ if (indent) {
50
+ xmlWriter = new IndentingXMLStreamWriter(xmlWriter, 2);
51
+ }
52
+ CMLWriter cmlWriter = new CMLWriter(xmlWriter);
53
+ cmlWriter.writeCmlStart();
54
+ cmlWriter.writeMolecule(structure, chemicalName, 1);
55
+ cmlWriter.writeCmlEnd();
56
+ xmlWriter.close();
57
+ } catch (XMLStreamException e) {
58
+ throw new RuntimeException(e);
59
+ }
60
+ try {
61
+ return out.toString("UTF-8");
62
+ } catch (UnsupportedEncodingException e) {
63
+ throw new RuntimeException("JVM doesn't support UTF-8...but it should do!");
64
+ }
65
+ }
66
+
67
+ void writeCmlStart(){
68
+ try {
69
+ writer.writeStartElement("cml");
70
+ writer.writeDefaultNamespace(CML_NAMESPACE);
71
+ writer.writeAttribute("convention", "conventions:molecular");
72
+ writer.writeNamespace("conventions", "http://www.xml-cml.org/convention/");
73
+ writer.writeNamespace("cmlDict", "http://www.xml-cml.org/dictionary/cml/");
74
+ writer.writeNamespace("nameDict", "http://www.xml-cml.org/dictionary/cml/name/");
75
+ } catch (XMLStreamException e) {
76
+ throw new RuntimeException(e);
77
+ }
78
+ }
79
+
80
+ void writeCmlEnd(){
81
+ try {
82
+ writer.writeEndElement();
83
+ writer.flush();
84
+ } catch (XMLStreamException e) {
85
+ throw new RuntimeException(e);
86
+ }
87
+ }
88
+
89
+ void writeMolecule(Fragment structure, String chemicalName, int id) throws XMLStreamException {
90
+ writer.writeStartElement("molecule");
91
+ writer.writeAttribute("id", "m" + id);
92
+
93
+ writer.writeStartElement("name");
94
+ writer.writeAttribute("dictRef", "nameDict:unknown");
95
+ writer.writeCharacters(chemicalName);
96
+ writer.writeEndElement();
97
+
98
+ if (structure != null) {
99
+ writer.writeStartElement("atomArray");
100
+ for(Atom atom : structure.getAtomList()) {
101
+ writeAtom(atom);
102
+ }
103
+ writer.writeEndElement();
104
+
105
+ writer.writeStartElement("bondArray");
106
+ for(Bond bond : structure.getBondSet()) {
107
+ writeBond(bond);
108
+ }
109
+ writer.writeEndElement();
110
+ }
111
+
112
+ writer.writeEndElement();
113
+ }
114
+
115
+ private void writeAtom(Atom atom) throws XMLStreamException {
116
+ writer.writeStartElement("atom");
117
+ writer.writeAttribute("id", "a" + Integer.toString(atom.getID()));
118
+ writer.writeAttribute("elementType", atom.getElement().toString());
119
+ if(atom.getCharge() != 0){
120
+ writer.writeAttribute("formalCharge", Integer.toString(atom.getCharge()));
121
+ }
122
+ if(atom.getIsotope() != null){
123
+ writer.writeAttribute("isotopeNumber", Integer.toString(atom.getIsotope()));
124
+ }
125
+ if (atom.getElement() != ChemEl.H){
126
+ int hydrogenCount =0;
127
+ List<Atom> neighbours = atom.getAtomNeighbours();
128
+ for (Atom neighbour : neighbours) {
129
+ if (neighbour.getElement() == ChemEl.H){
130
+ hydrogenCount++;
131
+ }
132
+ }
133
+ if (hydrogenCount==0){//prevent adding of implicit hydrogen
134
+ writer.writeAttribute("hydrogenCount", "0");
135
+ }
136
+ }
137
+ AtomParity atomParity = atom.getAtomParity();
138
+ if(atomParity != null) {
139
+ StereoGroup stereoGroupType = atomParity.getStereoGroup();
140
+ if (!((stereoGroupType == StereoGroup.Rac || stereoGroupType == StereoGroup.Rel) &&
141
+ countStereoGroup(atom) == 1)) {
142
+ writeAtomParity(atomParity);
143
+ }
144
+ }
145
+ for(String locant : atom.getLocants()) {
146
+ writer.writeStartElement("label");
147
+ writer.writeAttribute("value", locant);
148
+ writer.writeAttribute("dictRef", "cmlDict:locant");
149
+ writer.writeEndElement();
150
+ }
151
+ writer.writeEndElement();
152
+ }
153
+
154
+ private int countStereoGroup(Atom atom) {
155
+ int count = 0;
156
+ for (Atom a : atom.getFrag().getAtomList()) {
157
+ if (a.getAtomParity() == null)
158
+ continue;
159
+ if (a.getAtomParity().getStereoGroup().equals(atom.getAtomParity().getStereoGroup()) &&
160
+ a.getAtomParity().getStereoGroupNum() == atom.getAtomParity().getStereoGroupNum())
161
+ count++;
162
+ }
163
+ return count;
164
+ }
165
+
166
+ private void writeAtomParity(AtomParity atomParity) throws XMLStreamException {
167
+ writer.writeStartElement("atomParity");
168
+ writeAtomRefs4(atomParity.getAtomRefs4());
169
+ writer.writeCharacters(Integer.toString(atomParity.getParity()));
170
+ writer.writeEndElement();
171
+ }
172
+
173
+ private void writeBond(Bond bond) throws XMLStreamException {
174
+ writer.writeStartElement("bond");
175
+ writer.writeAttribute("id", "a" + Integer.toString(bond.getFrom()) + "_a" + Integer.toString(bond.getTo()));
176
+ writer.writeAttribute("atomRefs2", "a" + Integer.toString(bond.getFrom()) + " a" + Integer.toString(bond.getTo()));
177
+ switch (bond.getOrder()) {
178
+ case 1:
179
+ writer.writeAttribute("order", "S");
180
+ break;
181
+ case 2:
182
+ writer.writeAttribute("order", "D");
183
+ break;
184
+ case 3:
185
+ writer.writeAttribute("order", "T");
186
+ break;
187
+ default:
188
+ writer.writeAttribute("order", "unknown");
189
+ break;
190
+ }
191
+ BondStereo bondStereo = bond.getBondStereo();
192
+ if (bondStereo != null){
193
+ writeBondStereo(bondStereo);
194
+ }
195
+ writer.writeEndElement();
196
+ }
197
+
198
+ private void writeBondStereo(BondStereo bondStereo) throws XMLStreamException {
199
+ writer.writeStartElement("bondStereo");
200
+ writeAtomRefs4(bondStereo.getAtomRefs4());
201
+ writer.writeCharacters(bondStereo.getBondStereoValue().toString());
202
+ writer.writeEndElement();
203
+ }
204
+
205
+ private void writeAtomRefs4(Atom[] atomRefs4) throws XMLStreamException {
206
+ StringBuilder atomRefsSb = new StringBuilder();
207
+ for(int i = 0; i< atomRefs4.length - 1; i++) {
208
+ atomRefsSb.append('a');
209
+ atomRefsSb.append(atomRefs4[i].getID());
210
+ atomRefsSb.append(' ');
211
+ }
212
+ atomRefsSb.append('a');
213
+ atomRefsSb.append(atomRefs4[atomRefs4.length - 1].getID());
214
+ writer.writeAttribute("atomRefs4", atomRefsSb.toString());
215
+ }
216
+
217
+ }
TransAntivirus/download_pubchem/opsin-master/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/ChemEl.java ADDED
@@ -0,0 +1,138 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ package uk.ac.cam.ch.wwmm.opsin;
2
+
3
+ enum ChemEl {
4
+ R(0),
5
+
6
+ H(1),
7
+ He(2),
8
+ Li(3),
9
+ Be(4),
10
+ B(5),
11
+ C(6),
12
+ N(7),
13
+ O(8),
14
+ F(9),
15
+ Ne(10),
16
+ Na(11),
17
+ Mg(12),
18
+ Al(13),
19
+ Si(14),
20
+ P(15),
21
+ S(16),
22
+ Cl(17),
23
+ Ar(18),
24
+ K(19),
25
+ Ca(20),
26
+ Sc(21),
27
+ Ti(22),
28
+ V(23),
29
+ Cr(24),
30
+ Mn(25),
31
+ Fe(26),
32
+ Co(27),
33
+ Ni(28),
34
+ Cu(29),
35
+ Zn(30),
36
+ Ga(31),
37
+ Ge(32),
38
+ As(33),
39
+ Se(34),
40
+ Br(35),
41
+ Kr(36),
42
+ Rb(37),
43
+ Sr(38),
44
+ Y(39),
45
+ Zr(40),
46
+ Nb(41),
47
+ Mo(42),
48
+ Tc(43),
49
+ Ru(44),
50
+ Rh(45),
51
+ Pd(46),
52
+ Ag(47),
53
+ Cd(48),
54
+ In(49),
55
+ Sn(50),
56
+ Sb(51),
57
+ Te(52),
58
+ I(53),
59
+ Xe(54),
60
+ Cs(55),
61
+ Ba(56),
62
+ La(57),
63
+ Ce(58),
64
+ Pr(59),
65
+ Nd(60),
66
+ Pm(61),
67
+ Sm(62),
68
+ Eu(63),
69
+ Gd(64),
70
+ Tb(65),
71
+ Dy(66),
72
+ Ho(67),
73
+ Er(68),
74
+ Tm(69),
75
+ Yb(70),
76
+ Lu(71),
77
+ Hf(72),
78
+ Ta(73),
79
+ W(74),
80
+ Re(75),
81
+ Os(76),
82
+ Ir(77),
83
+ Pt(78),
84
+ Au(79),
85
+ Hg(80),
86
+ Tl(81),
87
+ Pb(82),
88
+ Bi(83),
89
+ Po(84),
90
+ At(85),
91
+ Rn(86),
92
+ Fr(87),
93
+ Ra(88),
94
+ Ac(89),
95
+ Th(90),
96
+ Pa(91),
97
+ U(92),
98
+ Np(93),
99
+ Pu(94),
100
+ Am(95),
101
+ Cm(96),
102
+ Bk(97),
103
+ Cf(98),
104
+ Es(99),
105
+ Fm(100),
106
+ Md(101),
107
+ No(102),
108
+ Lr(103),
109
+ Rf(104),
110
+ Db(105),
111
+ Sg(106),
112
+ Bh(107),
113
+ Hs(108),
114
+ Mt(109),
115
+ Ds(110),
116
+ Rg(111),
117
+ Cn(112),
118
+ Nh(113),
119
+ Fl(114),
120
+ Mc(115),
121
+ Lv(116),
122
+ Ts(117),
123
+ Og(118);
124
+
125
+ final int ATOMIC_NUM;
126
+
127
+ private ChemEl(int atomicNum) {
128
+ this.ATOMIC_NUM = atomicNum;
129
+ }
130
+
131
+ boolean isChalcogen() {
132
+ return (this == O || this == S || this == Se || this == Te);
133
+ }
134
+
135
+ boolean isHalogen() {
136
+ return (this == F || this == Cl || this == Br || this == I);
137
+ }
138
+ }
TransAntivirus/download_pubchem/opsin-master/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/CipOrderingException.java ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ package uk.ac.cam.ch.wwmm.opsin;
2
+
3
+ /**Thrown if the ordering of ligands can now be determined by OPSIN's implementation of the CIP rules.
4
+ * This could be due to a limitation of the implementation or ligands actually being indistinguishable
5
+ *
6
+ * @author dl387
7
+ *
8
+ */
9
+ class CipOrderingException extends StereochemistryException {
10
+
11
+ private static final long serialVersionUID = 1L;
12
+
13
+ CipOrderingException() {
14
+ super();
15
+ }
16
+
17
+ CipOrderingException(String message) {
18
+ super(message);
19
+ }
20
+
21
+ CipOrderingException(String message, Throwable cause) {
22
+ super(message, cause);
23
+ }
24
+
25
+ CipOrderingException(Throwable cause) {
26
+ super(cause);
27
+ }
28
+
29
+ }
TransAntivirus/download_pubchem/opsin-master/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/CipSequenceRules.java ADDED
@@ -0,0 +1,470 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ package uk.ac.cam.ch.wwmm.opsin;
2
+
3
+ import java.util.ArrayDeque;
4
+ import java.util.ArrayList;
5
+ import java.util.Collections;
6
+ import java.util.Comparator;
7
+ import java.util.Deque;
8
+ import java.util.List;
9
+ import java.util.Queue;
10
+
11
+ /**
12
+ * An implementation of rules 1-2 of the CIP rules i.e. constitutional differences then isotopes if there is a tie
13
+ * Cases that require rules 3-5 to distinguish result in an exception
14
+ *
15
+ * Phantom atoms are not added as I believe that the results of the program will still be the same even in their absence as everything beats a phantom and comparing phantoms to phantoms achieves nothing
16
+ * (higher ligancy beats lower ligancy when comparisons are performed)
17
+ * @author dl387
18
+ *
19
+ */
20
+ class CipSequenceRules {
21
+ private static class CipOrderingRunTimeException extends RuntimeException {
22
+ private static final long serialVersionUID = 1L;
23
+ CipOrderingRunTimeException(String message) {
24
+ super(message);
25
+ }
26
+ }
27
+
28
+ private final Atom chiralAtom;
29
+
30
+ CipSequenceRules(Atom chiralAtom) {
31
+ this.chiralAtom = chiralAtom;
32
+ }
33
+
34
+ /**
35
+ * Returns the chiral atom's neighbours in CIP order from lowest priority to highest priority
36
+ * @return
37
+ * @throws CipOrderingException
38
+ */
39
+ List<Atom> getNeighbouringAtomsInCipOrder() throws CipOrderingException {
40
+ List<Atom> neighbours = chiralAtom.getAtomNeighbours();
41
+ try {
42
+ Collections.sort(neighbours, new SortByCipOrder(chiralAtom));
43
+ }
44
+ catch (CipOrderingRunTimeException e) {
45
+ throw new CipOrderingException(e.getMessage());
46
+ }
47
+ return neighbours;
48
+ }
49
+
50
+ /**
51
+ * Returns the chiral atom's neighbours, with the exception of the given atom, in CIP order from lowest priority to highest priority
52
+ * @param neighbourToIgnore
53
+ * @return
54
+ * @throws CipOrderingException
55
+ */
56
+ List<Atom> getNeighbouringAtomsInCipOrderIgnoringGivenNeighbour(Atom neighbourToIgnore) throws CipOrderingException {
57
+ List<Atom> neighbours = chiralAtom.getAtomNeighbours();
58
+ if (!neighbours.remove(neighbourToIgnore)) {
59
+ throw new IllegalArgumentException("OPSIN bug: Atom" + neighbourToIgnore.getID() +" was not a neighbour of the given stereogenic atom");
60
+ }
61
+ try {
62
+ Collections.sort(neighbours, new SortByCipOrder(chiralAtom));
63
+ }
64
+ catch (CipOrderingRunTimeException e) {
65
+ throw new CipOrderingException(e.getMessage());
66
+ }
67
+ return neighbours;
68
+ }
69
+
70
+
71
+ /**
72
+ * Holds information about what atoms to try next next and how those atoms were reached (to prevent immediate back tracking and to detect cycles)
73
+ * @author dl387
74
+ *
75
+ */
76
+ private static class CipState {
77
+ CipState(List<AtomWithHistory> nextAtoms1, List<AtomWithHistory> nextAtoms2) {
78
+ this.nextAtoms1 = nextAtoms1;
79
+ this.nextAtoms2 = nextAtoms2;
80
+ }
81
+ final List<AtomWithHistory> nextAtoms1;
82
+ final List<AtomWithHistory> nextAtoms2;
83
+ }
84
+
85
+ /**
86
+ * Holds an atom with associated visited atoms
87
+ * @author dl387
88
+ *
89
+ */
90
+ private static class AtomWithHistory {
91
+ AtomWithHistory(Atom atom, List<Atom> visitedAtoms, Integer indexOfOriginalFromRoot) {
92
+ this.atom = atom;
93
+ this.visitedAtoms = visitedAtoms;
94
+ this.indexOfOriginalFromRoot = indexOfOriginalFromRoot;
95
+ }
96
+ final Atom atom;
97
+ final List<Atom> visitedAtoms;
98
+ final Integer indexOfOriginalFromRoot;
99
+ }
100
+
101
+ /**
102
+ * Sorts atoms by their CIP order, low to high
103
+ * @author dl387
104
+ *
105
+ */
106
+ private class SortByCipOrder implements Comparator<Atom> {
107
+ private final Atom chiralAtom;
108
+ private final AtomListCipComparator atomListCipComparator = new AtomListCipComparator();
109
+ private final ListOfAtomListsCipComparator listOfAtomListsCipComparator = new ListOfAtomListsCipComparator();
110
+ private final CipComparator cipComparator = new CipComparator();
111
+ private int rule = 0;
112
+
113
+
114
+ SortByCipOrder(Atom chiralAtom) {
115
+ this.chiralAtom = chiralAtom;
116
+ }
117
+
118
+ public int compare(Atom a, Atom b) {
119
+ /*
120
+ * rule = 0 --> Rule 1a Higher atomic number precedes lower
121
+ * rule = 1 --> Rule 1b A duplicated atom, with its predecessor node having the same label closer to the root, ranks higher than a duplicated atom, with its predecessor node having the same label farther from the root, which ranks higher than any non-duplicated atom node
122
+ * rule = 2 --> Rule 2 Higher atomic mass number precedes lower
123
+ */
124
+ for (rule = 0; rule <= 2; rule++) {
125
+ List<Atom> atomsVisted = new ArrayList<>();
126
+ atomsVisted.add(chiralAtom);
127
+ AtomWithHistory aWithHistory = new AtomWithHistory(a, atomsVisted, null);
128
+ AtomWithHistory bWithHistory = new AtomWithHistory(b, new ArrayList<>(atomsVisted), null);
129
+
130
+ int compare = compareByCipRules(aWithHistory, bWithHistory);
131
+ if (compare != 0) {
132
+ return compare;
133
+ }
134
+
135
+ List<AtomWithHistory> nextAtoms1 = new ArrayList<>();
136
+ nextAtoms1.add(aWithHistory);
137
+
138
+ List<AtomWithHistory> nextAtoms2 = new ArrayList<>();
139
+ nextAtoms2.add(bWithHistory);
140
+
141
+ CipState startingState = new CipState(nextAtoms1, nextAtoms2);
142
+ Deque<CipState> cipStateQueue = new ArrayDeque<>();
143
+ cipStateQueue.add(startingState);
144
+ /* Go through CIP states in a breadth-first manner:
145
+ * Neighbours of the given atom/s (if multiple atoms this is because so far the two paths leading to them have been equivalent) are evaluated for both a and b
146
+ * Neighbours are sorted by CIP priority
147
+ * Comparisons performed between neighbours of a and neighbours of b (will break if compare != 0)
148
+ * Degenerate neighbours grouped together
149
+ * CIP state formed for each list of neighbours and added to queue in order of priority
150
+ *
151
+ */
152
+ while(!cipStateQueue.isEmpty()) {
153
+ CipState currentState = cipStateQueue.removeFirst();
154
+ compare = compareAtNextLevel(currentState, cipStateQueue);
155
+ if (compare != 0) {
156
+ return compare;
157
+ }
158
+ }
159
+ }
160
+ throw new CipOrderingRunTimeException("Failed to assign CIP stereochemistry, this indicates a bug in OPSIN or a limitation in OPSIN's implementation of the sequence rules");
161
+ }
162
+
163
+ /**
164
+ * Compares the neighbours of the atoms specified in nextAtom1/2 in cipstate.
165
+ * Returns the result of the comparison between these neighbours
166
+ * If the comparison returned 0 adds new cipstates to the queue
167
+ * @param cipState
168
+ * @param queue
169
+ * @return
170
+ */
171
+ private int compareAtNextLevel(CipState cipState, Queue<CipState> queue) {
172
+ List<List<AtomWithHistory>> neighbours1 = getNextLevelNeighbours(cipState.nextAtoms1);
173
+ List<List<AtomWithHistory>> neighbours2 = getNextLevelNeighbours(cipState.nextAtoms2);
174
+
175
+ int compare = compareNeighboursByCipPriorityRules(neighbours1, neighbours2);
176
+
177
+ if (compare != 0) {
178
+ return compare;
179
+ }
180
+ List<List<AtomWithHistory>> prioritisedNeighbours1 = formListsWithSamePriority(neighbours1);
181
+ List<List<AtomWithHistory>> prioritisedNeighbours2 = formListsWithSamePriority(neighbours2);
182
+
183
+ //As earlier compare was 0, prioritisedNeighbours1.size() == prioritisedNeighbours2.size()
184
+ for (int i = prioritisedNeighbours1.size() - 1; i >= 0; i--) {
185
+ queue.add(new CipState(prioritisedNeighbours1.get(i), prioritisedNeighbours2.get(i)));
186
+ }
187
+ return 0;
188
+ }
189
+
190
+ private int compareNeighboursByCipPriorityRules(List<List<AtomWithHistory>> neighbours1, List<List<AtomWithHistory>> neighbours2) {
191
+ int difference = listOfAtomListsCipComparator.compare(neighbours1, neighbours2);
192
+ if (difference >0) {
193
+ return 1;
194
+ }
195
+ if (difference < 0) {
196
+ return -1;
197
+ }
198
+ return 0;
199
+ }
200
+
201
+ private List<List<AtomWithHistory>> getNextLevelNeighbours(List<AtomWithHistory> nextAtoms) {
202
+ List<List<AtomWithHistory>> neighbourLists = new ArrayList<>();
203
+ for (AtomWithHistory nextAtom : nextAtoms) {
204
+ neighbourLists.add(getNextAtomsWithAppropriateGhostAtoms(nextAtom));
205
+ }
206
+ Collections.sort(neighbourLists, atomListCipComparator);
207
+ return neighbourLists;
208
+ }
209
+
210
+ /**
211
+ * If given say [H,C,C] this becomes [H] [C,C]
212
+ * If given say [H,C,C] [H,C,C] this becomes [H,H] [C,C,C,C]
213
+ * If given say [H,C,C] [H,C,F] this becomes [H],[C,C][H][C][F]
214
+ * as [H,C,F] is higher priority than [H,C,C] so all its atoms must be evaluated first
215
+ * The input lists of neighbours are assumed to have been presorted.
216
+ * @param neighbourLists
217
+ */
218
+ private List<List<AtomWithHistory>> formListsWithSamePriority(List<List<AtomWithHistory>> neighbourLists) {
219
+ int intialNeighbourListCount = neighbourLists.size();
220
+ if (intialNeighbourListCount > 1) {
221
+ List<List<AtomWithHistory>> listsToRemove = new ArrayList<>();
222
+ for (int i = 0; i < intialNeighbourListCount; i++) {
223
+ List<List<AtomWithHistory>> neighbourListsToCombine = new ArrayList<>();
224
+ List<AtomWithHistory> primaryAtomList = neighbourLists.get(i);
225
+ for (int j = i + 1; j < intialNeighbourListCount; j++) {
226
+ List<AtomWithHistory> neighbourListToCompareWith = neighbourLists.get(j);
227
+ if (atomListCipComparator.compare(primaryAtomList, neighbourListToCompareWith) == 0) {
228
+ neighbourListsToCombine.add(neighbourListToCompareWith);
229
+ i++;
230
+ }
231
+ else {
232
+ break;
233
+ }
234
+ }
235
+ for (List<AtomWithHistory> neighbourList: neighbourListsToCombine) {
236
+ listsToRemove.add(neighbourList);
237
+ primaryAtomList.addAll(neighbourList);
238
+ }
239
+ }
240
+ neighbourLists.removeAll(listsToRemove);
241
+ }
242
+
243
+ List<List<AtomWithHistory>> updatedNeighbourLists = new ArrayList<>();
244
+ //lists of same priority have already been combined (see above) e.g. [H,C,C] [H,C,C] -->[H,C,C,H,C,C]
245
+ //now sort these combined lists by CIP priority
246
+ //then group atoms that have the same CIP priority
247
+ for (int i = 0, lstsLen = neighbourLists.size(); i < lstsLen; i++) {
248
+ List<AtomWithHistory> neighbourList = neighbourLists.get(i);
249
+ Collections.sort(neighbourList, cipComparator);
250
+ AtomWithHistory lastAtom = null;
251
+ List<AtomWithHistory> currentAtomList = new ArrayList<>();
252
+ for (int j = 0, lstLen = neighbourList.size(); j < lstLen; j++) {
253
+ AtomWithHistory a = neighbourList.get(j);
254
+ if (lastAtom != null && compareByCipRules(lastAtom, a) != 0) {
255
+ updatedNeighbourLists.add(currentAtomList);
256
+ currentAtomList = new ArrayList<>();
257
+ }
258
+ currentAtomList.add(a);
259
+ lastAtom = a;
260
+ }
261
+ if (!currentAtomList.isEmpty()) {
262
+ updatedNeighbourLists.add(currentAtomList);
263
+ }
264
+ }
265
+ return updatedNeighbourLists;
266
+ }
267
+
268
+
269
+ /**
270
+ * Sorts atoms by their atomic number, low to high
271
+ * @author dl387
272
+ *
273
+ */
274
+ private class CipComparator implements Comparator<AtomWithHistory> {
275
+ public int compare(AtomWithHistory a, AtomWithHistory b) {
276
+ return compareByCipRules(a, b);
277
+ }
278
+ }
279
+
280
+ /**
281
+ * Sorts atomLists by CIP rules, low to high
282
+ * @author dl387
283
+ *
284
+ */
285
+ private class AtomListCipComparator implements Comparator<List<AtomWithHistory>> {
286
+ public int compare(List<AtomWithHistory> a, List<AtomWithHistory> b) {
287
+ int aSize = a.size();
288
+ int bSize = b.size();
289
+ int differenceInSize = aSize - bSize;
290
+ int maxCommonSize = aSize > bSize ? bSize : aSize;
291
+ for (int i = 1; i <= maxCommonSize; i++) {
292
+ int difference = compareByCipRules(a.get(aSize - i), b.get(bSize - i));
293
+ if (difference > 0) {
294
+ return 1;
295
+ }
296
+ if (difference < 0) {
297
+ return -1;
298
+ }
299
+ }
300
+ if (differenceInSize > 0) {
301
+ return 1;
302
+ }
303
+ if (differenceInSize < 0) {
304
+ return -1;
305
+ }
306
+ return 0;
307
+ }
308
+ }
309
+
310
+ /**
311
+ * Sorts lists of atomLists by CIP rules, low to high
312
+ * @author dl387
313
+ *
314
+ */
315
+ private class ListOfAtomListsCipComparator implements Comparator<List<List<AtomWithHistory>>> {
316
+ public int compare(List<List<AtomWithHistory>> a, List<List<AtomWithHistory>> b) {
317
+ int aSize = a.size();
318
+ int bSize = b.size();
319
+ int differenceInSize = aSize - bSize;
320
+ int maxCommonSize = aSize > bSize ? bSize : aSize;
321
+ for (int i = 1; i <= maxCommonSize; i++) {
322
+ List<AtomWithHistory> aprime = a.get(aSize - i);
323
+ List<AtomWithHistory> bprime = b.get(bSize - i);
324
+ int aprimeSize = aprime.size();
325
+ int bprimeSize = bprime.size();
326
+ int differenceInSizeprime = aprimeSize - bprimeSize;
327
+ int maxCommonSizeprime = aprimeSize > bprimeSize ? bprimeSize : aprimeSize;
328
+ for (int j = 1; j <= maxCommonSizeprime; j++) {
329
+ int difference = compareByCipRules(aprime.get(aprimeSize - j), bprime.get(bprimeSize - j));
330
+ if (difference > 0) {
331
+ return 1;
332
+ }
333
+ if (difference < 0) {
334
+ return -1;
335
+ }
336
+ }
337
+ if (differenceInSizeprime > 0) {
338
+ return 1;
339
+ }
340
+ if (differenceInSizeprime < 0) {
341
+ return -1;
342
+ }
343
+ }
344
+ if (differenceInSize > 0) {
345
+ return 1;
346
+ }
347
+ if (differenceInSize < 0) {
348
+ return -1;
349
+ }
350
+ return 0;
351
+ }
352
+ }
353
+
354
+ /**
355
+ * Gets the neighbouring atoms bar the previous atom in CIP order
356
+ * If the neighbouring atom has already been visited it is replaced with a ghost atom
357
+ * Multiple bonds including those to previous atoms yield ghost atoms unless the bond goes to the chiral atom e.g. in a sulfoxide
358
+ * @param atoms
359
+ * @return
360
+ */
361
+ private List<AtomWithHistory> getNextAtomsWithAppropriateGhostAtoms(AtomWithHistory atomWithHistory) {
362
+ Atom atom = atomWithHistory.atom;
363
+ List<Atom> visitedAtoms = atomWithHistory.visitedAtoms;
364
+ Atom previousAtom = visitedAtoms.get(visitedAtoms.size()-1);
365
+ List<Atom> visitedAtomsIncludingCurrentAtom = new ArrayList<>(visitedAtoms);
366
+ visitedAtomsIncludingCurrentAtom.add(atom);
367
+
368
+ List<AtomWithHistory> neighboursWithHistory = new ArrayList<>();
369
+ for(Bond b : atom.getBonds()) {
370
+ Atom atomBondConnectsTo = b.getOtherAtom(atom);
371
+ if (!atomBondConnectsTo.equals(chiralAtom)) {//P-91.1.4.2.4 (higher order bonds to chiral centre do not involve duplication of atoms)
372
+ for (int j = b.getOrder(); j >1; j--) {//add ghost atoms to represent higher order bonds
373
+ Atom ghost = new Atom(atomBondConnectsTo.getElement());
374
+ if (rule > 0) {
375
+ int indexOfOriginalAtom = visitedAtoms.indexOf(atomBondConnectsTo);
376
+ if (indexOfOriginalAtom != -1) {
377
+ neighboursWithHistory.add(new AtomWithHistory(ghost, visitedAtomsIncludingCurrentAtom, indexOfOriginalAtom));
378
+ }
379
+ else{
380
+ neighboursWithHistory.add(new AtomWithHistory(ghost, visitedAtomsIncludingCurrentAtom, visitedAtoms.size() + 1));
381
+ }
382
+ }
383
+ else{
384
+ neighboursWithHistory.add(new AtomWithHistory(ghost, visitedAtomsIncludingCurrentAtom, null));
385
+ }
386
+ }
387
+ }
388
+ if (!atomBondConnectsTo.equals(previousAtom)) {
389
+ if (visitedAtoms.contains(atomBondConnectsTo)) {//cycle detected, add ghost atom instead
390
+ Atom ghost = new Atom(atomBondConnectsTo.getElement());
391
+ if (rule > 0) {
392
+ neighboursWithHistory.add(new AtomWithHistory(ghost, visitedAtomsIncludingCurrentAtom, visitedAtoms.indexOf(atomBondConnectsTo)));
393
+ }
394
+ else{
395
+ neighboursWithHistory.add(new AtomWithHistory(ghost, visitedAtomsIncludingCurrentAtom, null));
396
+ }
397
+ }
398
+ else{
399
+ neighboursWithHistory.add(new AtomWithHistory(atomBondConnectsTo, visitedAtomsIncludingCurrentAtom, null));
400
+ }
401
+ }
402
+ }
403
+ Collections.sort(neighboursWithHistory, cipComparator);
404
+ return neighboursWithHistory;
405
+ }
406
+
407
+ /**
408
+ * Greater than 0 means a is preferred over b (vice versa for less than 1)
409
+ * @param a
410
+ * @param b
411
+ * @return
412
+ */
413
+ private int compareByCipRules(AtomWithHistory a, AtomWithHistory b) {
414
+ //rule 1a
415
+ //prefer higher atomic number
416
+ int atomicNumber1 = a.atom.getElement().ATOMIC_NUM;
417
+ int atomicNumber2 = b.atom.getElement().ATOMIC_NUM;
418
+ if (atomicNumber1 > atomicNumber2) {
419
+ return 1;
420
+ }
421
+ else if (atomicNumber1 < atomicNumber2) {
422
+ return -1;
423
+ }
424
+ if (rule > 0) {
425
+ //rule 1b
426
+ //prefer duplicate to non-duplicate
427
+ Integer indexFromRoot1 = a.indexOfOriginalFromRoot;
428
+ Integer indexFromRoot2 = b.indexOfOriginalFromRoot;
429
+ if (indexFromRoot1 != null && indexFromRoot2 == null) {
430
+ return 1;
431
+ }
432
+ if (indexFromRoot1 == null && indexFromRoot2 != null) {
433
+ return -1;
434
+ }
435
+ //prefer duplicate of node closer to root
436
+ if (indexFromRoot1 != null && indexFromRoot2 != null) {
437
+ if (indexFromRoot1 < indexFromRoot2 ) {
438
+ return 1;
439
+ }
440
+ if (indexFromRoot1 > indexFromRoot2 ) {
441
+ return -1;
442
+ }
443
+ }
444
+ if (rule > 1) {
445
+ //rule 2
446
+ //prefer higher atomic mass
447
+ Integer atomicMass1 = a.atom.getIsotope();
448
+ Integer atomicMass2 = b.atom.getIsotope();
449
+ if (atomicMass1 != null && atomicMass2 == null) {
450
+ return 1;
451
+ }
452
+ else if (atomicMass1 == null && atomicMass2 != null) {
453
+ return -1;
454
+ }
455
+ else if (atomicMass1 != null && atomicMass2 != null) {
456
+ if (atomicMass1 > atomicMass2) {
457
+ return 1;
458
+ }
459
+ else if (atomicMass1 < atomicMass2) {
460
+ return -1;
461
+ }
462
+ }
463
+ }
464
+
465
+ }
466
+ return 0;
467
+ }
468
+ }
469
+
470
+ }
TransAntivirus/download_pubchem/opsin-master/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/ComponentGenerationException.java ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ package uk.ac.cam.ch.wwmm.opsin;
2
+
3
+ /**Thrown during component generation.
4
+ *
5
+ * @author ptc24
6
+ *
7
+ */
8
+ class ComponentGenerationException extends Exception {
9
+
10
+ private static final long serialVersionUID = 1L;
11
+
12
+ ComponentGenerationException() {
13
+ super();
14
+ }
15
+
16
+ ComponentGenerationException(String message) {
17
+ super(message);
18
+ }
19
+
20
+ ComponentGenerationException(String message, Throwable cause) {
21
+ super(message, cause);
22
+ }
23
+
24
+ ComponentGenerationException(Throwable cause) {
25
+ super(cause);
26
+ }
27
+
28
+ }
TransAntivirus/download_pubchem/opsin-master/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/ComponentGenerator.java ADDED
The diff for this file is too large to render. See raw diff
 
TransAntivirus/download_pubchem/opsin-master/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/ComponentProcessor.java ADDED
The diff for this file is too large to render. See raw diff
 
TransAntivirus/download_pubchem/opsin-master/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/CycleDetector.java ADDED
@@ -0,0 +1,128 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ package uk.ac.cam.ch.wwmm.opsin;
2
+
3
+ import java.util.ArrayDeque;
4
+ import java.util.ArrayList;
5
+ import java.util.Deque;
6
+ import java.util.LinkedHashSet;
7
+ import java.util.List;
8
+ import java.util.Set;
9
+
10
+ /**
11
+ * Assigns whether atoms are in rings or not
12
+ * @author dl387
13
+ *
14
+ */
15
+ class CycleDetector {
16
+
17
+ /**
18
+ * Performs a depth first search for rings hence assigning whether atoms are in rings or not
19
+ * This is necessary for deciding the applicability, and in some cases meaning, of suffixes and to determine what atoms are capable of having spare valency
20
+ * Fragments made of disconnected sections are supported
21
+ * @param frag
22
+ */
23
+ static void assignWhetherAtomsAreInCycles(Fragment frag) {
24
+ List<Atom> atomList = frag.getAtomList();
25
+ for (Atom atom : atomList) {
26
+ atom.setAtomIsInACycle(false);
27
+ atom.setProperty(Atom.VISITED, null);
28
+ }
29
+ for (Atom a : atomList) {//as OPSIN does not disallow disconnected sections within a single "fragment" (e.g. in suffixes) for vigorousness this for loop is required
30
+ if(a.getProperty(Atom.VISITED) == null){//true for only the first atom in a fully connected molecule
31
+ traverseRings(a, null, 0);
32
+ }
33
+ }
34
+ }
35
+
36
+ private static int traverseRings(Atom currentAtom, Atom previousAtom, int depth){
37
+ Integer previouslyAssignedDepth = currentAtom.getProperty(Atom.VISITED);
38
+ if(previouslyAssignedDepth != null){
39
+ return previouslyAssignedDepth;
40
+ }
41
+ currentAtom.setProperty(Atom.VISITED, depth);
42
+ List<Atom> equivalentAtoms = new ArrayList<>();
43
+ equivalentAtoms.add(currentAtom);
44
+
45
+ List<Atom> neighbours;
46
+ for(;;) {
47
+ //Non-recursively process atoms in a chain
48
+ //add the atoms in the chain to equivalentAtoms as either all or none of them are in a ring
49
+ neighbours = currentAtom.getAtomNeighbours();
50
+ neighbours.remove(previousAtom);
51
+ if (neighbours.size() != 1) {
52
+ break;
53
+ }
54
+ Atom nextAtom = neighbours.get(0);
55
+ if (nextAtom.getProperty(Atom.VISITED) != null) {
56
+ //chain reached a previously visited atom, must be a ring
57
+ break;
58
+ }
59
+ previousAtom = currentAtom;
60
+ currentAtom = nextAtom;
61
+ equivalentAtoms.add(currentAtom);
62
+ currentAtom.setProperty(Atom.VISITED, ++depth);
63
+ }
64
+
65
+ int result = depth + 1;
66
+ for (Atom neighbour : neighbours) {
67
+ int temp = traverseRings(neighbour, currentAtom, depth + 1);
68
+ result = Math.min(result, temp);
69
+ }
70
+ if (result < depth){
71
+ for (Atom a : equivalentAtoms) {
72
+ a.setAtomIsInACycle(true);
73
+ }
74
+ } else if (result == depth) {
75
+ currentAtom.setAtomIsInACycle(true);
76
+ }
77
+ return result;
78
+ }
79
+
80
+ private static class PathSearchState{
81
+ final Atom currentAtom;
82
+ final List<Atom> orderAtomsVisited;
83
+ public PathSearchState(Atom currentAtom, List<Atom> orderAtomsVisited ) {
84
+ this.currentAtom = currentAtom;
85
+ this.orderAtomsVisited = orderAtomsVisited;
86
+ }
87
+ Atom getCurrentAtom() {
88
+ return currentAtom;
89
+ }
90
+ List<Atom> getOrderAtomsVisited() {
91
+ return orderAtomsVisited;
92
+ }
93
+ }
94
+
95
+ /**
96
+ * Attempts to find paths from a1 to a2 using only the given bonds
97
+ * @param a1
98
+ * @param a2
99
+ * @param peripheryBonds
100
+ * @return
101
+ */
102
+ static List<List<Atom>> getPathBetweenAtomsUsingBonds(Atom a1, Atom a2, Set<Bond> peripheryBonds){
103
+ List<List<Atom>> paths = new ArrayList<>();
104
+ Deque<PathSearchState> stateStack = new ArrayDeque<>();
105
+ stateStack.add(new PathSearchState(a1, new ArrayList<>()));
106
+ while (stateStack.size()>0){
107
+ PathSearchState state =stateStack.removeLast();//depth first traversal
108
+ List<Atom> orderAtomsVisited = state.getOrderAtomsVisited();
109
+ Atom nextAtom = state.getCurrentAtom();
110
+ orderAtomsVisited.add(nextAtom);
111
+ Set<Bond> neighbourBonds = new LinkedHashSet<>(nextAtom.getBonds());
112
+ neighbourBonds.retainAll(peripheryBonds);
113
+ for (Bond neighbourBond : neighbourBonds) {
114
+ Atom neighbour = neighbourBond.getOtherAtom(nextAtom);
115
+ if (orderAtomsVisited.contains(neighbour)){//atom already visited by this path
116
+ continue;
117
+ }
118
+ if (neighbour ==a2 ){//target atom found
119
+ paths.add(new ArrayList<>(orderAtomsVisited.subList(1, orderAtomsVisited.size())));
120
+ }
121
+ else{//add atom to stack, its neighbours will be recursively investigated shortly
122
+ stateStack.add(new PathSearchState(neighbour, new ArrayList<>(orderAtomsVisited)));
123
+ }
124
+ }
125
+ }
126
+ return paths;
127
+ }
128
+ }
TransAntivirus/download_pubchem/opsin-master/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/CyclicAtomList.java ADDED
@@ -0,0 +1,140 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ package uk.ac.cam.ch.wwmm.opsin;
2
+
3
+ import java.util.List;
4
+
5
+ /**
6
+ * Convenience class for iterating over a list of atoms that form a ring
7
+ * Doing getNext when the index is the final atom in the list will return the first atom
8
+ * Doing getPrevious when the index is the first atom in the list will return the final atom
9
+ * @author dl387
10
+ *
11
+ */
12
+ class CyclicAtomList{
13
+ private int index = -1;
14
+ private final List<Atom> atomList;
15
+
16
+ /**
17
+ * Construct a cyclicAtomList from an atomList
18
+ * Index defaults to -1
19
+ * @param atomList
20
+ */
21
+ CyclicAtomList(List<Atom> atomList) {
22
+ this.atomList = atomList;
23
+ }
24
+
25
+ /**
26
+ * Construct a cyclicAtomList from an atomList
27
+ * The second parameter sets the current index
28
+ * @param atomList
29
+ * @param index
30
+ */
31
+ CyclicAtomList(List<Atom> atomList, int index) {
32
+ this.atomList = atomList;
33
+ setIndex(index);
34
+ }
35
+
36
+ /**
37
+ * Returns the number of elements in this list. If this list contains more
38
+ * than <tt>Integer.MAX_VALUE</tt> elements, returns
39
+ * <tt>Integer.MAX_VALUE</tt>.
40
+ *
41
+ * @return the number of elements in this list
42
+ */
43
+ int size() {
44
+ return atomList.size();
45
+ }
46
+
47
+ /**
48
+ * Returns the atom at the specified position in this list.
49
+ * @param index index of the element to return
50
+ * @return Atom the atom at the specified position in this list
51
+ * @throws IndexOutOfBoundsException - if the index is out of range (index < 0 || index >= size())
52
+ */
53
+ Atom get(int index) throws IndexOutOfBoundsException {
54
+ return atomList.get(index);
55
+ }
56
+
57
+ /**
58
+ * Return the current index in the list
59
+ * @return
60
+ */
61
+ int getIndex() {
62
+ return index;
63
+ }
64
+
65
+ /**
66
+ * Set the current index
67
+ * @param index
68
+ */
69
+ void setIndex(int index) {
70
+ if (index >= atomList.size()){
71
+ throw new IllegalArgumentException("Specified index is not within ringAtom list");
72
+ }
73
+ this.index = index;
74
+ }
75
+
76
+ /**
77
+ * Increments and returns the atom at the new index in the list (next atom)
78
+ * When the index is the final atom in the list will return the first atom
79
+ * @return
80
+ */
81
+ Atom next() {
82
+ int tempIndex = index + 1;
83
+ if (tempIndex >= atomList.size()){
84
+ tempIndex = 0;
85
+ }
86
+ index = tempIndex;
87
+ return atomList.get(index);
88
+ }
89
+
90
+ /**
91
+ * Decrements and returns the atom at the new index in the list (previous atom)
92
+ * when the index is the first atom in the list will return the final atom
93
+ * @return
94
+ */
95
+ Atom previous() {
96
+ int tempIndex = index - 1;
97
+ if (tempIndex < 0){
98
+ tempIndex = atomList.size() -1 ;
99
+ }
100
+ index = tempIndex;
101
+ return atomList.get(index);
102
+ }
103
+
104
+ /**
105
+ * Returns the next atom in the list
106
+ * When the index is the final atom in the list will return the first atom
107
+ * Doesn't effect the list
108
+ * @return
109
+ */
110
+ Atom peekNext() {
111
+ int tempIndex = index + 1;
112
+ if (tempIndex >= atomList.size()){
113
+ tempIndex = 0;
114
+ }
115
+ return atomList.get(tempIndex);
116
+ }
117
+
118
+ /**
119
+ * Returns the previous atom in the list
120
+ * when the index is the first atom in the list will return the final atom
121
+ * Doesn't effect the list
122
+ * @return
123
+ */
124
+ Atom peekPrevious() {
125
+ int tempIndex = index - 1;
126
+ if (tempIndex < 0){
127
+ tempIndex = atomList.size() -1 ;
128
+ }
129
+ return atomList.get(tempIndex);
130
+ }
131
+
132
+ /**
133
+ * Returns the atom corresponding to the current index
134
+ * Note that CycliAtomLists have a default index of -1
135
+ * @return
136
+ */
137
+ Atom getCurrent() {
138
+ return atomList.get(index);
139
+ }
140
+ }
TransAntivirus/download_pubchem/opsin-master/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/Element.java ADDED
@@ -0,0 +1,229 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ package uk.ac.cam.ch.wwmm.opsin;
2
+
3
+ import java.util.ArrayList;
4
+ import java.util.List;
5
+
6
+ abstract class Element {
7
+
8
+ protected String name;
9
+ protected Element parent = null;
10
+ protected final List<Attribute> attributes = new ArrayList<>();
11
+
12
+ Element(String name) {
13
+ this.name = name;
14
+ }
15
+
16
+ void addAttribute(Attribute attribute) {
17
+ attributes.add(attribute);
18
+ }
19
+
20
+ void addAttribute(String atrName, String atrValue) {
21
+ attributes.add(new Attribute(atrName, atrValue));
22
+ }
23
+
24
+ /**
25
+ * Adds a child element
26
+ * @param child
27
+ */
28
+ abstract void addChild(Element child);
29
+
30
+ /**
31
+ * Creates a deep copy with no parent
32
+ */
33
+ abstract Element copy();
34
+
35
+ void detach() {
36
+ if (parent != null) {
37
+ parent.removeChild(this);
38
+ }
39
+ }
40
+
41
+ Attribute getAttribute(int index) {
42
+ return attributes.get(index);
43
+ }
44
+
45
+ /**
46
+ * Returns the attribute with the given name
47
+ * or null if the attribute doesn't exist
48
+ * @param name
49
+ * @return
50
+ */
51
+ Attribute getAttribute(String name) {
52
+ for (int i = 0, len = attributes.size(); i < len; i++) {
53
+ Attribute a = attributes.get(i);
54
+ if (a.getName().equals(name)) {
55
+ return a;
56
+ }
57
+ }
58
+ return null;
59
+ }
60
+
61
+ int getAttributeCount() {
62
+ return attributes.size();
63
+ }
64
+
65
+ /**
66
+ * Returns the value of the attribute with the given name
67
+ * or null if the attribute doesn't exist
68
+ * @param name
69
+ * @return
70
+ */
71
+ String getAttributeValue(String name) {
72
+ Attribute attribute = getAttribute(name);
73
+ if (attribute != null) {
74
+ return attribute.getValue();
75
+ }
76
+ return null;
77
+ }
78
+
79
+ /**
80
+ * Returns the child at the given index in the children list
81
+ * @param index
82
+ * @return
83
+ */
84
+ abstract Element getChild(int index);
85
+
86
+ /**
87
+ * Returns the number of children
88
+ * @return
89
+ */
90
+ abstract int getChildCount();
91
+
92
+ /**
93
+ * Returns a copy of the child elements
94
+ *
95
+ * @return
96
+ */
97
+ abstract List<Element> getChildElements();
98
+
99
+ /**
100
+ * Gets child elements with this name (in iteration order)
101
+ * @param name
102
+ * @return
103
+ */
104
+ abstract List<Element> getChildElements(String name);
105
+
106
+ /**
107
+ * Returns the first child element with the specified name
108
+ *
109
+ * @param name
110
+ * @return
111
+ */
112
+ abstract Element getFirstChildElement(String name);
113
+
114
+ /**
115
+ * Returns the fragment associated with this element (only applicable to tokens)
116
+ * @return
117
+ */
118
+ Fragment getFrag() {
119
+ throw new UnsupportedOperationException("Only tokens can have associated fragments");
120
+ }
121
+
122
+ String getName() {
123
+ return name;
124
+ }
125
+
126
+ Element getParent() {
127
+ return this.parent;
128
+ }
129
+
130
+ abstract String getValue();
131
+
132
+ /**
133
+ * Returns the index of the given child in the children list (or -1 if it isn't a child)
134
+ * @param child
135
+ * @return
136
+ */
137
+ abstract int indexOf(Element child);
138
+
139
+ /**
140
+ * Inserts the element at the given index in the children list
141
+ * @param child
142
+ * @param index
143
+ */
144
+ abstract void insertChild(Element child, int index);
145
+
146
+ boolean removeAttribute(Attribute attribute) {
147
+ return attributes.remove(attribute);
148
+ }
149
+
150
+ /**
151
+ * Removes the given child element
152
+ * @param child
153
+ * @return
154
+ */
155
+ abstract boolean removeChild(Element child);
156
+
157
+ /**
158
+ * Removes the element at the given index in the children list
159
+ * @param index
160
+ * @return
161
+ */
162
+ abstract Element removeChild(int index);
163
+
164
+ /**
165
+ * Replaces a child element with another element
166
+ * @param oldChild
167
+ * @param newChild
168
+ */
169
+ abstract void replaceChild(Element oldChild, Element newChild);
170
+
171
+ /**
172
+ * Sets the fragment associated with this element (only applicable to tokens!)
173
+ * @param frag
174
+ */
175
+ void setFrag(Fragment frag) {
176
+ throw new UnsupportedOperationException("Only tokens can have associated fragments");
177
+ }
178
+
179
+ void setName(String name) {
180
+ this.name = name;
181
+ }
182
+
183
+ void setParent(Element newParentEl) {
184
+ this.parent = newParentEl;
185
+ }
186
+
187
+ abstract void setValue(String text);
188
+
189
+ public String toString() {
190
+ return toXML();
191
+ }
192
+
193
+ String toXML() {
194
+ return toXML(0).toString();
195
+ }
196
+
197
+ private StringBuilder toXML(int indent) {
198
+ StringBuilder result = new StringBuilder();
199
+ for (int i = 0; i < indent; i++) {
200
+ result.append(" ");
201
+ }
202
+ result.append('<');
203
+ result.append(name);
204
+ for (Attribute atr : attributes) {
205
+ result.append(' ');
206
+ result.append(atr.toXML());
207
+ }
208
+ result.append('>');
209
+ if (getChildCount() > 0){
210
+ for (Element child : getChildElements()) {
211
+ result.append(OpsinTools.NEWLINE);
212
+ result.append(child.toXML(indent + 1));
213
+ }
214
+ result.append(OpsinTools.NEWLINE);
215
+ for (int i = 0; i < indent; i++) {
216
+ result.append(" ");
217
+ }
218
+ }
219
+ else{
220
+ result.append(getValue());
221
+ }
222
+ result.append("</");
223
+ result.append(name);
224
+ result.append('>');
225
+
226
+ return result;
227
+ }
228
+
229
+ }
TransAntivirus/download_pubchem/opsin-master/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/Fragment.java ADDED
@@ -0,0 +1,633 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ package uk.ac.cam.ch.wwmm.opsin;
2
+
3
+ import java.util.ArrayList;
4
+ import java.util.Collection;
5
+ import java.util.Collections;
6
+ import java.util.HashMap;
7
+ import java.util.Iterator;
8
+ import java.util.LinkedHashMap;
9
+ import java.util.LinkedHashSet;
10
+ import java.util.List;
11
+ import java.util.Map;
12
+ import java.util.Set;
13
+ import java.util.regex.Matcher;
14
+
15
+ import static uk.ac.cam.ch.wwmm.opsin.OpsinTools.*;
16
+ import static uk.ac.cam.ch.wwmm.opsin.XmlDeclarations.*;
17
+
18
+ /**A fragment of a molecule, holds bonds and atoms.
19
+ *
20
+ * @author ptc24
21
+ * @author dl387
22
+ *
23
+ */
24
+ class Fragment implements Iterable<Atom> {
25
+
26
+ /**A mapping between IDs and the atoms in this fragment, by default is ordered by the order atoms are added to the fragment*/
27
+ private final Map<Integer, Atom> atomMapFromId = new LinkedHashMap<>();
28
+
29
+ /**Equivalent to and synced to atomMapFromId.values() */
30
+ private final Collection<Atom> atomCollection = atomMapFromId.values();
31
+
32
+ /**A mapping between locants and the atoms in this fragment*/
33
+ private final Map<String, Atom> atomMapFromLocant = new HashMap<>();
34
+
35
+ /**The bonds in the fragment*/
36
+ private final Set<Bond> bondSet = new LinkedHashSet<>();
37
+
38
+ /**The associated token element*/
39
+ private Element tokenEl;
40
+
41
+ /**The atoms that are used when this fragment is connected to another fragment. Unused outAtoms means that the fragment is a radical or an error has occurred
42
+ * Initially empty */
43
+ private final List<OutAtom> outAtoms = new ArrayList<>();
44
+
45
+ /**The atoms that are used on this fragment to form things like esters
46
+ * Initially empty */
47
+ private final List<FunctionalAtom> functionalAtoms = new ArrayList<>();
48
+
49
+ /**The atom that fragments connecting to this fragment should connect to in preference
50
+ * e.g. for amino acids the alpha amino group
51
+ * Null by default*/
52
+ private Atom defaultInAtom = null;
53
+
54
+ /**The atoms in the fragment that have been indicated to have hydrogen at the SMILES level.*/
55
+ private final List<Atom> indicatedHydrogen = new ArrayList<>();
56
+
57
+ /**Pseudo atoms indicating start and end of polymer structure repeat unit*/
58
+ private List<Atom> polymerAttachmentPoints = null;
59
+
60
+ /**
61
+ * DO NOT CALL DIRECTLY EXCEPT FOR TESTING
62
+ * Makes an empty Fragment associated with the given tokenEl
63
+ * @param tokenEl
64
+ */
65
+ Fragment(Element tokenEl) {
66
+ this.tokenEl = tokenEl;
67
+ }
68
+
69
+ /**
70
+ * DO NOT CALL DIRECTLY EXCEPT FOR TESTING
71
+ * Makes an empty Fragment with the given type
72
+ *
73
+ * @param type
74
+ */
75
+ Fragment(String type) {
76
+ this.tokenEl = new TokenEl("");
77
+ this.tokenEl.addAttribute(TYPE_ATR, type);
78
+ }
79
+
80
+ /**Adds an atom to the fragment and associates it with this fragment*/
81
+ void addAtom(Atom atom) {
82
+ List<String> locants =atom.getLocants();
83
+ for (String locant: locants) {
84
+ atomMapFromLocant.put(locant, atom);
85
+ }
86
+ atomMapFromId.put(atom.getID(), atom);
87
+ atom.setFrag(this);
88
+ }
89
+
90
+ /**
91
+ * Return the number of atoms in the fragment
92
+ * @return
93
+ */
94
+ int getAtomCount() {
95
+ return atomCollection.size();
96
+ }
97
+
98
+ /**
99
+ * Returns a copy of the fragment's atoms
100
+ * @return
101
+ */
102
+ List<Atom> getAtomList() {
103
+ return new ArrayList<>(atomCollection);
104
+ }
105
+
106
+
107
+ /**
108
+ * Adds a bond to the fragment.
109
+ * @param bond
110
+ */
111
+ void addBond(Bond bond) {
112
+ bondSet.add(bond);
113
+ }
114
+
115
+ /**Removes a bond to the fragment if it is present.
116
+ * @param bond
117
+ * @return*/
118
+ boolean removeBond(Bond bond) {
119
+ return bondSet.remove(bond);
120
+ }
121
+
122
+ /**Gets bondSet.*/
123
+ Set<Bond> getBondSet() {
124
+ return Collections.unmodifiableSet(bondSet);
125
+ }
126
+
127
+ /**Gets the id of the atom in the fragment with the specified locant.
128
+ *
129
+ * @param locant The locant to look for
130
+ * @return The id of the found atom, or 0 if it is not found
131
+ */
132
+ int getIDFromLocant(String locant) {
133
+ Atom a = getAtomByLocant(locant);
134
+ if (a != null){
135
+ return a.getID();
136
+ }
137
+ return 0;
138
+ }
139
+
140
+ /**Gets the id of the atom in the fragment with the specified locant, throwing if this fails.
141
+ *
142
+ * @param locant The locant to look for
143
+ * @return The id of the found atom
144
+ * @throws StructureBuildingException
145
+ */
146
+ int getIDFromLocantOrThrow(String locant) throws StructureBuildingException {
147
+ int id = getIDFromLocant(locant);
148
+ if(id == 0) {
149
+ throw new StructureBuildingException("Couldn't find id from locant " + locant + ".");
150
+ }
151
+ return id;
152
+ }
153
+
154
+ /**Gets the atom in the fragment with the specified locant.
155
+ *
156
+ * @param locant The locant to look for
157
+ * @return The found atom, or null if it is not found
158
+ */
159
+ Atom getAtomByLocant(String locant) {
160
+ Atom a =atomMapFromLocant.get(locant);
161
+ if (a != null){
162
+ return a;
163
+ }
164
+ Matcher m =MATCH_AMINOACID_STYLE_LOCANT.matcher(locant);
165
+ if (m.matches()){//e.g. N5
166
+ Atom backboneAtom =atomMapFromLocant.get(m.group(3));//the atom corresponding to the numeric or greek component
167
+ if (backboneAtom==null){
168
+ return null;
169
+ }
170
+ a = FragmentTools.getAtomByAminoAcidStyleLocant(backboneAtom, m.group(1), m.group(2));
171
+ if (a != null){
172
+ return a;
173
+ }
174
+ }
175
+ return null;
176
+ }
177
+
178
+ /**Gets the atom in the fragment with the specified locant, throwing if this fails.
179
+ *
180
+ * @param locant The locant to look for
181
+ * @return The found atom
182
+ * @throws StructureBuildingException
183
+ */
184
+ Atom getAtomByLocantOrThrow(String locant) throws StructureBuildingException {
185
+ Atom a = getAtomByLocant(locant);
186
+ if(a == null) {
187
+ throw new StructureBuildingException("Could not find the atom with locant " + locant + ".");
188
+ }
189
+ return a;
190
+ }
191
+
192
+ /**Gets the atom in the fragment with the specified ID.
193
+ *
194
+ * @param id The id of the atom.
195
+ * @return The found atom, or null.
196
+ */
197
+ Atom getAtomByID(int id) {
198
+ return atomMapFromId.get(id);
199
+ }
200
+
201
+ /**Gets the atom in the fragment with the specified ID, throwing if this fails.
202
+ *
203
+ * @param id The id of the atom.
204
+ * @return The found atom
205
+ * @throws StructureBuildingException
206
+ */
207
+ Atom getAtomByIDOrThrow(int id) throws StructureBuildingException {
208
+ Atom a = getAtomByID(id);
209
+ if(a == null) {
210
+ throw new StructureBuildingException("Couldn't find atom with id " + id + ".");
211
+ }
212
+ return a;
213
+ }
214
+
215
+ /**Finds a bond between two specified atoms the first of which must be within the fragment
216
+ *
217
+ * @param ID1 The id of one atom
218
+ * @param ID2 The id of the other atom
219
+ * @return The bond found, or null
220
+ */
221
+ Bond findBond(int ID1, int ID2) {
222
+ Atom a = atomMapFromId.get(ID1);
223
+ if (a != null){
224
+ for (Bond b : a.getBonds()) {
225
+ if((b.getFrom() == ID1 && b.getTo() == ID2) ||
226
+ (b.getTo() == ID1 && b.getFrom() == ID2)) {
227
+ return b;
228
+ }
229
+ }
230
+ }
231
+ return null;
232
+ }
233
+
234
+ /**Finds a bond between two specified atoms the first of which must be within the fragment, throwing if it fails.
235
+ *
236
+ * @param ID1 The id of one atom
237
+ * @param ID2 The id of the other atom
238
+ * @return The bond found
239
+ * @throws StructureBuildingException
240
+ */
241
+ Bond findBondOrThrow(int ID1, int ID2) throws StructureBuildingException {
242
+ Bond b = findBond(ID1, ID2);
243
+ if(b == null) {
244
+ throw new StructureBuildingException("Couldn't find specified bond");
245
+ }
246
+ return b;
247
+ }
248
+
249
+ /**Works out how many atoms there are in the fragment there are
250
+ * with consecutive locants, starting from 1 that are in a chain
251
+ *
252
+ * @return The number of atoms in the locant chain
253
+ */
254
+ int getChainLength() {
255
+ int length = 0;
256
+ Atom next = getAtomByLocant(Integer.toString(length + 1));
257
+ Atom previous = null;
258
+ while (next != null){
259
+ if (previous != null && previous.getBondToAtom(next) == null){
260
+ break;
261
+ }
262
+ length++;
263
+ previous = next;
264
+ next = getAtomByLocant(Integer.toString(length + 1));
265
+ }
266
+ return length;
267
+ }
268
+
269
+ /**
270
+ * Gets the type of the corresponding tokenEl
271
+ * Returns "" if undefined
272
+ * @return
273
+ */
274
+ String getType() {
275
+ String type = tokenEl.getAttributeValue(TYPE_ATR);
276
+ return type != null ? type : "";
277
+ }
278
+
279
+ /**
280
+ * Gets the subType of the corresponding tokenEl
281
+ * Returns "" if undefined
282
+ * @return
283
+ */
284
+ String getSubType() {
285
+ String subType = tokenEl.getAttributeValue(SUBTYPE_ATR);
286
+ return subType != null ? subType : "";
287
+ }
288
+
289
+ /**
290
+ * Gets the associate tokenEl
291
+ * Whether or not this is a real token can be tested by whether it has a parent
292
+ * @return
293
+ */
294
+ Element getTokenEl() {
295
+ return tokenEl;
296
+ }
297
+
298
+ /**
299
+ * Sets the associated tokenEl
300
+ * Type/subType are inherited from the tokenEl
301
+ * @param tokenEl
302
+ */
303
+ void setTokenEl(Element tokenEl) {
304
+ this.tokenEl = tokenEl;
305
+ }
306
+
307
+ /**
308
+ * How many OutAtoms (i.e. radicals) are associated with this fragment
309
+ * @return
310
+ */
311
+ int getOutAtomCount() {
312
+ return outAtoms.size();
313
+ }
314
+
315
+ /**
316
+ * Gets the outAtom at a specific index of the outAtoms linkedList
317
+ * @param i
318
+ * @return
319
+ */
320
+ OutAtom getOutAtom(int i) {
321
+ return outAtoms.get(i);
322
+ }
323
+
324
+ /**
325
+ * Adds an outAtom
326
+ * @param id
327
+ * @param valency
328
+ * @param setExplicitly
329
+ * @throws StructureBuildingException
330
+ */
331
+ void addOutAtom(int id, int valency, Boolean setExplicitly) throws StructureBuildingException {
332
+ addOutAtom(getAtomByIDOrThrow(id), valency, setExplicitly);
333
+ }
334
+
335
+ /**
336
+ * Adds an outAtom
337
+ * @param atom
338
+ * @param valency
339
+ * @param setExplicitly
340
+ */
341
+ void addOutAtom(Atom atom, int valency, Boolean setExplicitly) {
342
+ outAtoms.add(new OutAtom(atom, valency, setExplicitly));
343
+ }
344
+
345
+ /**
346
+ * Includes the OutAtoms of a given fragment into this fragment
347
+ * Note that no OutAtoms are created in doing this
348
+ * @param frag
349
+ */
350
+ void incorporateOutAtoms(Fragment frag) {
351
+ outAtoms.addAll(frag.outAtoms);
352
+ }
353
+
354
+ /**
355
+ * Removes the outAtom at a specific index of the outAtom linkedList
356
+ * @param i
357
+ */
358
+ void removeOutAtom(int i) {
359
+ OutAtom removedOutAtom = outAtoms.remove(i);
360
+ if (removedOutAtom.isSetExplicitly()){
361
+ removedOutAtom.getAtom().addOutValency(-removedOutAtom.getValency());
362
+ }
363
+ }
364
+
365
+ /**
366
+ * Removes the specified outAtom from the outAtoms linkedList
367
+ * @param outAtom
368
+ */
369
+ void removeOutAtom(OutAtom outAtom) {
370
+ if (outAtoms.remove(outAtom) && outAtom.isSetExplicitly()){
371
+ outAtom.getAtom().addOutValency(-outAtom.getValency());
372
+ }
373
+ }
374
+
375
+ /**
376
+ * How many functionalAtoms (i.e. locations that can form esters) are associated with this fragment
377
+ * @return
378
+ */
379
+ int getFunctionalAtomCount() {
380
+ return functionalAtoms.size();
381
+ }
382
+
383
+ /**
384
+ * Gets the functionalAtom at a specific index of the functionalAtoms linkedList
385
+ * @param i
386
+ * @return
387
+ */
388
+ FunctionalAtom getFunctionalAtom(int i) {
389
+ return functionalAtoms.get(i);
390
+ }
391
+
392
+ /**Adds a functionalAtom
393
+ * @param atom*/
394
+ void addFunctionalAtom(Atom atom) {
395
+ functionalAtoms.add(new FunctionalAtom(atom));
396
+ }
397
+
398
+ /**
399
+ * Includes the FunctionalAtoms of a given fragment into this fragment
400
+ * Note that no FunctionalAtoms are created in doing this
401
+ * @param frag
402
+ */
403
+ void incorporateFunctionalAtoms(Fragment frag) {
404
+ functionalAtoms.addAll(frag.functionalAtoms);
405
+ }
406
+
407
+ /**
408
+ * Removes the functionalAtom at a specific index of the functionalAtoms linkedList
409
+ * @param i
410
+ * @return
411
+ */
412
+ FunctionalAtom removeFunctionalAtom(int i) {
413
+ return functionalAtoms.remove(i);
414
+ }
415
+
416
+ /**
417
+ * Removes the specified functionalAtom from the functionalAtoms linkedList
418
+ * @param functionalAtom
419
+ */
420
+ void removeFunctionalAtom(FunctionalAtom functionalAtom) {
421
+ functionalAtoms.remove(functionalAtom);
422
+ }
423
+
424
+ List<Atom> getPolymerAttachmentPoints() {
425
+ return polymerAttachmentPoints;
426
+ }
427
+
428
+ void setPolymerAttachmentPoints(List<Atom> polymerAttachmentPoints) {
429
+ this.polymerAttachmentPoints = polymerAttachmentPoints;
430
+ }
431
+
432
+ /**Gets a list of atoms in the fragment that connect to a specified atom
433
+ *
434
+ * @param atom The reference atom
435
+ * @return The list of atoms connected to the atom
436
+ */
437
+ List<Atom> getIntraFragmentAtomNeighbours(Atom atom) {
438
+ List<Atom> results = new ArrayList<>(atom.getBondCount());
439
+ for(Bond b : atom.getBonds()) {
440
+ Atom otherAtom = b.getOtherAtom(atom);
441
+ if (otherAtom == null) {
442
+ throw new RuntimeException("OPSIN Bug: A bond associated with an atom does not involve it");
443
+ }
444
+ //If the other atom is in atomMapFromId then it is in this fragment
445
+ if (atomMapFromId.get(otherAtom.getID()) != null) {
446
+ results.add(otherAtom);
447
+ }
448
+ }
449
+ return results;
450
+ }
451
+
452
+ /**Calculates the number of bonds connecting to the atom, excluding bonds to implicit
453
+ * hydrogens. Double bonds count as
454
+ * two bonds, etc. Eg ethene - both C's have an incoming valency of 2.
455
+ *
456
+ * Only bonds to atoms within the fragment are counted. Suffix atoms are excluded
457
+ *
458
+ * @param atom
459
+ * @return Incoming Valency
460
+ * @throws StructureBuildingException
461
+ */
462
+ int getIntraFragmentIncomingValency(Atom atom) throws StructureBuildingException {
463
+ int v = 0;
464
+ for(Bond b : atom.getBonds()) {
465
+ //recalled atoms will be null if they are not part of this fragment
466
+ if(b.getFromAtom() == atom) {
467
+ Atom a =getAtomByID(b.getTo());
468
+ if (a != null && !a.getType().equals(SUFFIX_TYPE_VAL)){
469
+ v += b.getOrder();
470
+ }
471
+ } else if(b.getToAtom() == atom) {
472
+ Atom a =getAtomByID(b.getFrom());
473
+ if (a != null && !a.getType().equals(SUFFIX_TYPE_VAL)){
474
+ v += b.getOrder();
475
+ }
476
+ }
477
+ else{
478
+ throw new StructureBuildingException("A bond associated with an atom does not involve it");
479
+ }
480
+ }
481
+ return v;
482
+ }
483
+
484
+ /**
485
+ * Checks valencies are sensible
486
+ * @throws StructureBuildingException
487
+ */
488
+ void checkValencies() throws StructureBuildingException {
489
+ for(Atom a : atomCollection) {
490
+ if(!ValencyChecker.checkValency(a)) {
491
+ throw new StructureBuildingException("Atom is in unphysical valency state! Element: " + a.getElement() + " valency: " + a.getIncomingValency());
492
+ }
493
+ }
494
+ }
495
+
496
+ /**
497
+ * Removes an atom from this fragment
498
+ * @param atom
499
+ */
500
+ void removeAtom(Atom atom) {
501
+ int atomID =atom.getID();
502
+ atomMapFromId.remove(atomID);
503
+ for (String l : atom.getLocants()) {
504
+ atomMapFromLocant.remove(l);
505
+ }
506
+ if (defaultInAtom == atom){
507
+ defaultInAtom = null;
508
+ }
509
+ }
510
+ /**
511
+ * Retrieves the overall charge of the fragment by querying all its atoms
512
+ * @return
513
+ */
514
+ int getCharge() {
515
+ int charge=0;
516
+ for (Atom a : atomCollection) {
517
+ charge+=a.getCharge();
518
+ }
519
+ return charge;
520
+ }
521
+
522
+ Atom getDefaultInAtom() {
523
+ return defaultInAtom;
524
+ }
525
+
526
+ void setDefaultInAtom(Atom inAtom) {
527
+ this.defaultInAtom = inAtom;
528
+ }
529
+
530
+ Atom getDefaultInAtomOrFirstAtom() {
531
+ return defaultInAtom != null ? defaultInAtom : getFirstAtom();
532
+ }
533
+
534
+ /**
535
+ * Adds a mapping between the locant and atom object
536
+ * @param locant A locant as a string
537
+ * @param a An atom
538
+ */
539
+ void addMappingToAtomLocantMap(String locant, Atom a){
540
+ atomMapFromLocant.put(locant, a);
541
+ }
542
+
543
+ /**
544
+ * Removes a mapping between a locant
545
+ * @param locant A locant as a string
546
+ */
547
+ void removeMappingFromAtomLocantMap(String locant){
548
+ atomMapFromLocant.remove(locant);
549
+ }
550
+
551
+ /**
552
+ * Checks to see whether a locant is present on this fragment
553
+ * @param locant
554
+ * @return
555
+ */
556
+ boolean hasLocant(String locant) {
557
+ return getAtomByLocant(locant) != null;
558
+ }
559
+
560
+
561
+ /**
562
+ * Returns an unmodifiable list of the locants associated with this fragment
563
+ * @return
564
+ */
565
+ Set<String> getLocants() {
566
+ return Collections.unmodifiableSet(atomMapFromLocant.keySet());
567
+ }
568
+
569
+ List<Atom> getIndicatedHydrogen() {
570
+ return indicatedHydrogen;
571
+ }
572
+
573
+ void addIndicatedHydrogen(Atom atom) {
574
+ indicatedHydrogen.add(atom);
575
+ }
576
+
577
+ /**
578
+ * Returns the id of the first atom in the fragment
579
+ * @return
580
+ * @throws StructureBuildingException
581
+ */
582
+ int getIdOfFirstAtom() {
583
+ return getFirstAtom().getID();
584
+ }
585
+
586
+ /**
587
+ * Returns the the first atom in the fragment or null if it has no atoms
588
+ * Typically the first atom will be the first atom that was added to the fragment
589
+ * @return firstAtom
590
+ */
591
+ Atom getFirstAtom(){
592
+ Iterator<Atom> atomIterator =atomCollection.iterator();
593
+ if (atomIterator.hasNext()){
594
+ return atomIterator.next();
595
+ }
596
+ return null;
597
+ }
598
+
599
+ /**
600
+ * Clears and recreates atomMapFromId (and hence AtomCollection) using the order of the atoms in atomList
601
+ * @param atomList
602
+ * @throws StructureBuildingException
603
+ */
604
+ void reorderAtomCollection(List<Atom> atomList) throws StructureBuildingException {
605
+ if (atomMapFromId.size() != atomList.size()){
606
+ throw new StructureBuildingException("atom list is not the same size as the number of atoms in the fragment");
607
+ }
608
+ atomMapFromId.clear();
609
+ for (Atom atom : atomList) {
610
+ atomMapFromId.put(atom.getID(), atom);
611
+ }
612
+ }
613
+
614
+ /**
615
+ * Reorders the fragment's internal atomList by the value of the first locant of the atoms
616
+ * e.g. 1,2,3,3a,3b,4
617
+ * Used for assuring the correct order of atom iteration when performing ring fusion
618
+ * @throws StructureBuildingException
619
+ */
620
+ void sortAtomListByLocant() throws StructureBuildingException {
621
+ List<Atom> atomList =getAtomList();
622
+ Collections.sort(atomList, new FragmentTools.SortByLocants());
623
+ reorderAtomCollection(atomList);
624
+ }
625
+
626
+ @Override
627
+ public Iterator<Atom> iterator() {
628
+ return atomCollection.iterator();
629
+ }
630
+ }
631
+
632
+
633
+
TransAntivirus/download_pubchem/opsin-master/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/FragmentManager.java ADDED
@@ -0,0 +1,767 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ package uk.ac.cam.ch.wwmm.opsin;
2
+
3
+ import static uk.ac.cam.ch.wwmm.opsin.XmlDeclarations.*;
4
+
5
+ import java.util.ArrayList;
6
+ import java.util.Collections;
7
+ import java.util.HashMap;
8
+ import java.util.LinkedHashMap;
9
+ import java.util.LinkedHashSet;
10
+ import java.util.List;
11
+ import java.util.Map;
12
+ import java.util.Map.Entry;
13
+ import java.util.Set;
14
+
15
+ /** Holds the Fragments during the construction of the molecule,
16
+ * handles the building of new fragments and handles the creation/deletion of atoms/bonds
17
+ *
18
+ * @author ptc24
19
+ * @author dl387
20
+ *
21
+ */
22
+ class FragmentManager {
23
+
24
+ /** A mapping between fragments and inter fragment bonds */
25
+ private final Map<Fragment,Set<Bond>> fragToInterFragmentBond = new LinkedHashMap<>();
26
+
27
+ /** All of the atom-containing fragments in the molecule */
28
+ private final Set<Fragment> fragments = fragToInterFragmentBond.keySet();
29
+
30
+ /** A builder for fragments specified as SMILES */
31
+ private final SMILESFragmentBuilder sBuilder;
32
+
33
+ /** A source of unique integers */
34
+ private final IDManager idManager;
35
+
36
+ /** Sets up a new Fragment manager, containing no fragments.
37
+ *
38
+ * @param sBuilder A SMILESFragmentBuilder - dependency injection.
39
+ * @param idManager An IDManager.
40
+ */
41
+ FragmentManager(SMILESFragmentBuilder sBuilder, IDManager idManager) {
42
+ if (sBuilder == null || idManager == null ){
43
+ throw new IllegalArgumentException("FragmentManager was parsed a null object in its constructor!");
44
+ }
45
+ this.sBuilder = sBuilder;
46
+ this.idManager = idManager;
47
+ }
48
+
49
+ /** Builds a fragment, based on an SMILES string
50
+ * The fragment will not correspond to a token
51
+ *
52
+ * @param smiles The fragment to build
53
+ * @return The built fragment
54
+ * @throws StructureBuildingException
55
+ */
56
+ Fragment buildSMILES(String smiles) throws StructureBuildingException {
57
+ return buildSMILES(smiles, "", NONE_LABELS_VAL);
58
+ }
59
+
60
+ /** Builds a fragment, based on an SMILES string
61
+ * The fragment will not correspond to a token
62
+ *
63
+ * @param smiles
64
+ * @param type
65
+ * @param labelMapping
66
+ * @return
67
+ * @throws StructureBuildingException
68
+ */
69
+ Fragment buildSMILES(String smiles, String type, String labelMapping) throws StructureBuildingException {
70
+ Fragment newFrag = sBuilder.build(smiles, type, labelMapping);
71
+ addFragment(newFrag);
72
+ return newFrag;
73
+ }
74
+
75
+ /** Builds a fragment, based on an SMILES string
76
+ * The fragment will correspond to the given tokenEl
77
+ *
78
+ * @param smiles The fragment to build
79
+ * @param tokenEl The corresponding tokenEl
80
+ * @param labelMapping How to label the fragment
81
+ * @return The built fragment
82
+ * @throws StructureBuildingException
83
+ */
84
+ Fragment buildSMILES(String smiles, Element tokenEl, String labelMapping) throws StructureBuildingException {
85
+ Fragment newFrag = sBuilder.build(smiles, tokenEl, labelMapping);
86
+ addFragment(newFrag);
87
+ return newFrag;
88
+ }
89
+
90
+ /**Creates a new fragment, containing all of the atoms and bonds
91
+ * of all of the other fragments - i.e. the whole molecule. This updates
92
+ * which fragments the atoms think they are in to the new super fragment
93
+ * but does not change the original fragments.
94
+ * Hence the original fragments remain associated with their atoms
95
+ * Atoms and Bonds are not copied.
96
+ *
97
+ * @return The unified fragment
98
+ */
99
+ Fragment getUnifiedFragment() {
100
+ Fragment uniFrag = new Fragment("");
101
+ for (Entry<Fragment, Set<Bond>> entry : fragToInterFragmentBond.entrySet()) {
102
+ Fragment f = entry.getKey();
103
+ Set<Bond> interFragmentBonds = entry.getValue();
104
+ for(Atom atom : f.getAtomList()) {
105
+ uniFrag.addAtom(atom);
106
+ }
107
+ for(Bond bond : f.getBondSet()) {
108
+ uniFrag.addBond(bond);
109
+ }
110
+ uniFrag.incorporateOutAtoms(f);
111
+ uniFrag.incorporateFunctionalAtoms(f);
112
+
113
+ for (Bond interFragmentBond : interFragmentBonds) {
114
+ uniFrag.addBond(interFragmentBond);
115
+ }
116
+ }
117
+ addFragment(uniFrag);
118
+ return uniFrag;
119
+ }
120
+
121
+ /** Incorporates a fragment, usually a suffix, into a parent fragment
122
+ * This does:
123
+ * Imports all of the atoms and bonds from another fragment into this one.
124
+ * Also imports outAtoms and functionalAtoms
125
+ * Reassigns inter-fragment bonds of the child fragment as either intra-fragment bonds
126
+ * of the parent fragment or as inter-fragment bonds of the parent fragment
127
+ *
128
+ * The original fragment still maintains its original atomList/bondList
129
+ *
130
+ * @param childFrag The fragment to be incorporated
131
+ * @param parentFrag The parent fragment
132
+ * @throws StructureBuildingException
133
+ */
134
+ void incorporateFragment(Fragment childFrag, Fragment parentFrag) throws StructureBuildingException {
135
+ for(Atom atom : childFrag.getAtomList()) {
136
+ parentFrag.addAtom(atom);
137
+ }
138
+ for(Bond bond : childFrag.getBondSet()) {
139
+ parentFrag.addBond(bond);
140
+ }
141
+ parentFrag.incorporateOutAtoms(childFrag);
142
+ parentFrag.incorporateFunctionalAtoms(childFrag);
143
+
144
+ Set<Bond> interFragmentBonds = fragToInterFragmentBond.get(childFrag);
145
+ if (interFragmentBonds == null){
146
+ throw new StructureBuildingException("Fragment not registered with this FragmentManager!");
147
+ }
148
+ for (Bond bond : interFragmentBonds) {//reassign inter-fragment bonds of child
149
+ if (bond.getFromAtom().getFrag() == parentFrag && bond.getToAtom().getFrag() == parentFrag){
150
+ //bond is now enclosed within parentFrag so make it an intra-fragment bond
151
+ //and remove it from the inter-fragment set of the parentFrag
152
+ parentFrag.addBond(bond);
153
+ fragToInterFragmentBond.get(parentFrag).remove(bond);
154
+ }
155
+ else{
156
+ //bond was an inter-fragment bond between the childFrag and another frag
157
+ //It is now between the parentFrag and another frag
158
+ addInterFragmentBond(bond);
159
+ }
160
+ }
161
+ fragToInterFragmentBond.remove(childFrag);
162
+ }
163
+
164
+ /** Incorporates a fragment, usually a suffix, into a parent fragment, creating a bond between them.
165
+ *
166
+ * @param childFrag The fragment to be incorporated
167
+ * @param fromAtom An atom on that fragment
168
+ * @param parentFrag The parent fragment
169
+ * @param toAtom An atom on that fragment
170
+ * @param bondOrder The order of the joining bond
171
+ * @throws StructureBuildingException
172
+ */
173
+ void incorporateFragment(Fragment childFrag, Atom fromAtom, Fragment parentFrag, Atom toAtom, int bondOrder) throws StructureBuildingException {
174
+ if (!fromAtom.getFrag().equals(childFrag)){
175
+ throw new StructureBuildingException("OPSIN Bug: fromAtom was not associated with childFrag!");
176
+ }
177
+ if (!toAtom.getFrag().equals(parentFrag)){
178
+ throw new StructureBuildingException("OPSIN Bug: toAtom was not associated with parentFrag!");
179
+ }
180
+ incorporateFragment(childFrag, parentFrag);
181
+ createBond(fromAtom, toAtom, bondOrder);
182
+ }
183
+
184
+ /** Converts an atom in a fragment to a different atomic symbol described by a SMILES string
185
+ * Charged atoms can also be specified eg. [NH4+]
186
+ *
187
+ * @param a The atom to change to a heteroatom
188
+ * @param smiles The SMILES for one atom
189
+ * @throws StructureBuildingException
190
+ */
191
+ void replaceAtomWithSmiles(Atom a, String smiles) throws StructureBuildingException {
192
+ replaceAtomWithAtom(a, getHeteroatom(smiles), false);
193
+ }
194
+
195
+ /**
196
+ * Converts the smiles for a heteroatom to an atom
197
+ * @param smiles
198
+ * @return
199
+ * @throws StructureBuildingException
200
+ */
201
+ Atom getHeteroatom(String smiles) throws StructureBuildingException {
202
+ Fragment heteroAtomFrag = sBuilder.build(smiles);
203
+ if (heteroAtomFrag.getAtomCount() != 1){
204
+ throw new StructureBuildingException("Heteroatom smiles described a fragment with multiple SMILES!");
205
+ }
206
+ return heteroAtomFrag.getFirstAtom();
207
+ }
208
+
209
+ /** Uses the information given in the given heteroatom to change the atomic symbol
210
+ * and charge of the given atom
211
+ *
212
+ * @param a The atom to change to a heteroatom
213
+ * @param heteroAtom The atom to copy element/charge properties from
214
+ * @param assignLocant Whether a locant should be assigned to the heteroatom if the locant is not used elsewhere
215
+ * @throws StructureBuildingException if a charge disagreement occurs
216
+ */
217
+ void replaceAtomWithAtom(Atom a, Atom heteroAtom, boolean assignLocant) throws StructureBuildingException {
218
+ ChemEl chemEl =heteroAtom.getElement();
219
+ int replacementCharge =heteroAtom.getCharge();
220
+ if (replacementCharge!=0){
221
+ if (a.getCharge()==0){
222
+ a.addChargeAndProtons(replacementCharge, heteroAtom.getProtonsExplicitlyAddedOrRemoved());
223
+ }
224
+ else if (a.getCharge()==replacementCharge){
225
+ a.setProtonsExplicitlyAddedOrRemoved(heteroAtom.getProtonsExplicitlyAddedOrRemoved());
226
+ }
227
+ else{
228
+ throw new StructureBuildingException("Charge conflict between replacement term and atom to be replaced");
229
+ }
230
+ }
231
+ a.setElement(chemEl);
232
+ a.removeElementSymbolLocants();
233
+ if (assignLocant){
234
+ String primes = "";
235
+ while (a.getFrag().getAtomByLocant(chemEl.toString() + primes) != null){//if element symbol already assigned, add a prime and try again
236
+ primes += "'";
237
+ }
238
+ a.addLocant(chemEl.toString() + primes);
239
+ }
240
+ }
241
+
242
+ /** Gets an atom, given an id number
243
+ * Use this if you don't know what fragment the atom is in
244
+ * @param id The id of the atom
245
+ * @return The atom, or null if no such atom exists.
246
+ */
247
+ Atom getAtomByID(int id) {
248
+ for(Fragment f : fragments) {
249
+ Atom a = f.getAtomByID(id);
250
+ if(a != null) {
251
+ return a;
252
+ }
253
+ }
254
+ return null;
255
+ }
256
+
257
+ /** Gets an atom, given an id number, throwing if fails.
258
+ * Use this if you don't know what fragment the atom is in
259
+ * @param id The id of the atom
260
+ * @return The atom
261
+ * @throws StructureBuildingException
262
+ */
263
+ Atom getAtomByIDOrThrow(int id) throws StructureBuildingException {
264
+ Atom a = getAtomByID(id);
265
+ if(a == null) {
266
+ throw new StructureBuildingException("Couldn't get atom by id");
267
+ }
268
+ return a;
269
+ }
270
+
271
+ /**Turns all of the spare valencies in the fragments into double bonds.
272
+ *
273
+ * @throws StructureBuildingException
274
+ */
275
+ void convertSpareValenciesToDoubleBonds() throws StructureBuildingException {
276
+ for(Fragment f : fragments) {
277
+ FragmentTools.convertSpareValenciesToDoubleBonds(f);
278
+ }
279
+ }
280
+
281
+ /**
282
+ * Checks valencies are all chemically reasonable. An exception is thrown if any are not
283
+ * @throws StructureBuildingException
284
+ */
285
+ void checkValencies() throws StructureBuildingException {
286
+ for(Fragment f : fragments) {
287
+ f.checkValencies();
288
+ }
289
+ }
290
+
291
+ Set<Fragment> getFragments() {
292
+ return Collections.unmodifiableSet(fragments);
293
+ }
294
+
295
+ /**
296
+ * Registers a fragment
297
+ * @param frag
298
+ */
299
+ private void addFragment(Fragment frag) {
300
+ fragToInterFragmentBond.put(frag, new LinkedHashSet<>());
301
+ }
302
+
303
+ /**
304
+ * Removes a fragment
305
+ * Any inter-fragment bonds of this fragment are removed from the fragments it was connected to
306
+ * Throws an exception if fragment wasn't present
307
+ * @param frag
308
+ * @throws StructureBuildingException
309
+ */
310
+ void removeFragment(Fragment frag) throws StructureBuildingException {
311
+ Set<Bond> interFragmentBondsInvolvingFragmentSet = fragToInterFragmentBond.get(frag);
312
+ if (interFragmentBondsInvolvingFragmentSet == null) {
313
+ throw new StructureBuildingException("Fragment not registered with this FragmentManager!");
314
+ }
315
+ List<Bond> interFragmentBondsInvolvingFragment = new ArrayList<>(interFragmentBondsInvolvingFragmentSet);
316
+ for (Bond bond : interFragmentBondsInvolvingFragment) {
317
+ if (bond.getFromAtom().getFrag() == frag){
318
+ fragToInterFragmentBond.get(bond.getToAtom().getFrag()).remove(bond);
319
+ }
320
+ else{
321
+ fragToInterFragmentBond.get(bond.getFromAtom().getFrag()).remove(bond);
322
+ }
323
+ }
324
+ fragToInterFragmentBond.remove(frag);
325
+ }
326
+
327
+ int getOverallCharge() {
328
+ int totalCharge = 0;
329
+ for (Fragment frag : fragments) {
330
+ totalCharge += frag.getCharge();
331
+ }
332
+ return totalCharge;
333
+ }
334
+
335
+ /**
336
+ * Creates a copy of a fragment by copying data
337
+ * labels the atoms using new ids from the idManager
338
+ * @param originalFragment
339
+ * @return the clone of the fragment
340
+ * @throws StructureBuildingException
341
+ */
342
+ Fragment copyFragment(Fragment originalFragment) throws StructureBuildingException {
343
+ return copyAndRelabelFragment(originalFragment, 0);
344
+ }
345
+
346
+ /**
347
+ * Creates a copy of a fragment by copying data
348
+ * labels the atoms using new ids from the idManager
349
+ * @param originalFragment
350
+ * @param primesToAdd: The minimum number of primes to add to the cloned atoms. More primes will be added if necessary to keep the locants unique e.g. N in the presence of N' becomes N'' when this is 1
351
+ * @return the clone of the fragment
352
+ */
353
+ Fragment copyAndRelabelFragment(Fragment originalFragment, int primesToAdd) {
354
+ Element tokenEl = new TokenEl("");
355
+ tokenEl.addAttribute(TYPE_ATR, originalFragment.getType());
356
+ tokenEl.addAttribute(SUBTYPE_ATR, originalFragment.getSubType());
357
+ Fragment newFragment = new Fragment(tokenEl);
358
+ HashMap<Atom, Atom> oldToNewAtomMap = new HashMap<>();//maps old Atom to new Atom
359
+ List<Atom> atomList =originalFragment.getAtomList();
360
+ for (Atom atom : atomList) {
361
+ int id = idManager.getNextID();
362
+ ArrayList<String> newLocants = new ArrayList<>(atom.getLocants());
363
+ if (primesToAdd !=0){
364
+ for (int i = 0; i < newLocants.size(); i++) {
365
+ String currentLocant = newLocants.get(i);
366
+ int currentPrimes = StringTools.countTerminalPrimes(currentLocant);
367
+ String locantSansPrimes = currentLocant.substring(0, currentLocant.length()-currentPrimes);
368
+ int highestNumberOfPrimesWithThisLocant = currentPrimes;
369
+ while (originalFragment.getAtomByLocant(locantSansPrimes + StringTools.multiplyString("'", highestNumberOfPrimesWithThisLocant +1 ))!=null){
370
+ highestNumberOfPrimesWithThisLocant++;
371
+ }
372
+ newLocants.set(i, locantSansPrimes + StringTools.multiplyString("'", ((highestNumberOfPrimesWithThisLocant +1)*primesToAdd) + currentPrimes));
373
+ }
374
+ }
375
+ Atom newAtom =new Atom(id, atom.getElement(), newFragment);
376
+ for (String newLocant : newLocants) {
377
+ newAtom.addLocant(newLocant);
378
+ }
379
+ newAtom.setCharge(atom.getCharge());
380
+ newAtom.setIsotope(atom.getIsotope());
381
+ newAtom.setSpareValency(atom.hasSpareValency());
382
+ newAtom.setProtonsExplicitlyAddedOrRemoved(atom.getProtonsExplicitlyAddedOrRemoved());
383
+ newAtom.setLambdaConventionValency(atom.getLambdaConventionValency());
384
+ //outValency is derived from the outAtoms so is automatically cloned
385
+ newAtom.setAtomIsInACycle(atom.getAtomIsInACycle());
386
+ newAtom.setType(atom.getType());//may be different from fragment type if the original atom was formerly in a suffix
387
+ newAtom.setMinimumValency(atom.getMinimumValency());
388
+ newAtom.setImplicitHydrogenAllowed(atom.getImplicitHydrogenAllowed());
389
+ newFragment.addAtom(newAtom);
390
+ oldToNewAtomMap.put(atom, newAtom);
391
+ }
392
+ for (Atom atom : atomList) {
393
+ if (atom.getAtomParity() != null){
394
+ Atom[] oldAtomRefs4 = atom.getAtomParity().getAtomRefs4();
395
+ Atom[] newAtomRefs4 = new Atom[4];
396
+ for (int i = 0; i < oldAtomRefs4.length; i++) {
397
+ Atom oldAtom = oldAtomRefs4[i];
398
+ if (oldAtom.equals(AtomParity.hydrogen)){
399
+ newAtomRefs4[i] = AtomParity.hydrogen;
400
+ }
401
+ else if (oldAtom.equals(AtomParity.deoxyHydrogen)){
402
+ newAtomRefs4[i] = AtomParity.deoxyHydrogen;
403
+ }
404
+ else{
405
+ newAtomRefs4[i] = oldToNewAtomMap.get(oldAtom);
406
+ }
407
+ }
408
+ AtomParity newAtomParity =new AtomParity(newAtomRefs4, atom.getAtomParity().getParity());
409
+ newAtomParity.setStereoGroup(atom.getAtomParity().getStereoGroup());
410
+ oldToNewAtomMap.get(atom).setAtomParity(newAtomParity);
411
+ }
412
+ Set<Atom> oldAmbiguousElementAssignmentAtoms = atom.getProperty(Atom.AMBIGUOUS_ELEMENT_ASSIGNMENT);
413
+ if (oldAmbiguousElementAssignmentAtoms!=null){
414
+ Set<Atom> newAtoms = new LinkedHashSet<>();
415
+ for (Atom oldAtom : oldAmbiguousElementAssignmentAtoms) {
416
+ newAtoms.add(oldToNewAtomMap.get(oldAtom));
417
+ }
418
+ oldToNewAtomMap.get(atom).setProperty(Atom.AMBIGUOUS_ELEMENT_ASSIGNMENT, newAtoms);
419
+ }
420
+ Integer smilesHydrogenCount = atom.getProperty(Atom.SMILES_HYDROGEN_COUNT);
421
+ if (smilesHydrogenCount!=null){
422
+ oldToNewAtomMap.get(atom).setProperty(Atom.SMILES_HYDROGEN_COUNT, smilesHydrogenCount);
423
+ }
424
+ Integer oxidationNumber = atom.getProperty(Atom.OXIDATION_NUMBER);
425
+ if (oxidationNumber!=null){
426
+ oldToNewAtomMap.get(atom).setProperty(Atom.OXIDATION_NUMBER, oxidationNumber);
427
+ }
428
+ Boolean isAldehyde = atom.getProperty(Atom.ISALDEHYDE);
429
+ if (isAldehyde!=null){
430
+ oldToNewAtomMap.get(atom).setProperty(Atom.ISALDEHYDE, isAldehyde);
431
+ }
432
+ Boolean isAnomeric = atom.getProperty(Atom.ISANOMERIC);
433
+ if (isAnomeric!=null){
434
+ oldToNewAtomMap.get(atom).setProperty(Atom.ISANOMERIC, isAnomeric);
435
+ }
436
+ Integer atomClass = atom.getProperty(Atom.ATOM_CLASS);
437
+ if (atomClass!=null){
438
+ oldToNewAtomMap.get(atom).setProperty(Atom.ATOM_CLASS, atomClass);
439
+ }
440
+ String homologyGroup = atom.getProperty(Atom.HOMOLOGY_GROUP);
441
+ if (homologyGroup != null) {
442
+ oldToNewAtomMap.get(atom).setProperty(Atom.HOMOLOGY_GROUP, homologyGroup);
443
+ }
444
+ List<Atom> oldPositionVariationAtoms = atom.getProperty(Atom.POSITION_VARIATION_BOND);
445
+ if (oldPositionVariationAtoms != null) {
446
+ List<Atom> newAtoms = new ArrayList<>();
447
+ for (Atom oldAtom : oldPositionVariationAtoms) {
448
+ newAtoms.add(oldToNewAtomMap.get(oldAtom));
449
+ }
450
+ oldToNewAtomMap.get(atom).setProperty(Atom.POSITION_VARIATION_BOND, newAtoms);
451
+ }
452
+ }
453
+ for (int i = 0, l = originalFragment.getOutAtomCount(); i < l; i++) {
454
+ OutAtom outAtom = originalFragment.getOutAtom(i);
455
+ newFragment.addOutAtom(oldToNewAtomMap.get(outAtom.getAtom()), outAtom.getValency(), outAtom.isSetExplicitly());
456
+ if (outAtom.getLocant() !=null){
457
+ newFragment.getOutAtom(newFragment.getOutAtomCount() -1).setLocant(outAtom.getLocant() + StringTools.multiplyString("'", primesToAdd) );
458
+ }
459
+ }
460
+ for (int i = 0, l = originalFragment.getFunctionalAtomCount(); i < l; i++) {
461
+ FunctionalAtom functionalAtom = originalFragment.getFunctionalAtom(i);
462
+ newFragment.addFunctionalAtom(oldToNewAtomMap.get(functionalAtom.getAtom()));
463
+ }
464
+ if (originalFragment.getDefaultInAtom() != null) {
465
+ newFragment.setDefaultInAtom(oldToNewAtomMap.get(originalFragment.getDefaultInAtom()));
466
+ }
467
+ Set<Bond> bondSet =originalFragment.getBondSet();
468
+ for (Bond bond : bondSet) {
469
+ Bond newBond = createBond(oldToNewAtomMap.get(bond.getFromAtom()), oldToNewAtomMap.get(bond.getToAtom()), bond.getOrder());
470
+ newBond.setSmilesStereochemistry(bond.getSmilesStereochemistry());
471
+ if (bond.getBondStereo() != null){
472
+ Atom[] oldAtomRefs4 = bond.getBondStereo().getAtomRefs4();
473
+ Atom[] newAtomRefs4 = new Atom[4];
474
+ for (int i = 0; i < oldAtomRefs4.length; i++) {
475
+ newAtomRefs4[i] = oldToNewAtomMap.get(oldAtomRefs4[i]);
476
+ }
477
+ newBond.setBondStereoElement(newAtomRefs4, bond.getBondStereo().getBondStereoValue());
478
+ }
479
+ }
480
+ List<Atom> indicatedHydrogenAtoms = originalFragment.getIndicatedHydrogen();
481
+ for (Atom atom : indicatedHydrogenAtoms) {
482
+ newFragment.addIndicatedHydrogen(oldToNewAtomMap.get(atom));
483
+ }
484
+ addFragment(newFragment);
485
+ return newFragment;
486
+ }
487
+
488
+ /**
489
+ * Takes an element and produces a copy of it. Groups and suffixes are copied so that the new element
490
+ * has its own group and suffix fragments
491
+ * @param elementToBeCloned
492
+ * @param state The current buildstate
493
+ * @return
494
+ * @throws StructureBuildingException
495
+ */
496
+ Element cloneElement(BuildState state, Element elementToBeCloned) throws StructureBuildingException {
497
+ return cloneElement(state, elementToBeCloned, 0);
498
+ }
499
+
500
+ /**
501
+ * Takes an element and produces a copy of it. Groups and suffixes are copied so that the new element
502
+ * has its own group and suffix fragments
503
+ * @param elementToBeCloned
504
+ * @param state The current buildstate
505
+ * @param primesToAdd: The minimum number of primes to add to the cloned atoms. More primes will be added if necessary to keep the locants unique e.g. N in the presence of N' becomes N'' when this is 1
506
+ * @return
507
+ * @throws StructureBuildingException
508
+ */
509
+ Element cloneElement(BuildState state, Element elementToBeCloned, int primesToAdd) throws StructureBuildingException {
510
+ Element clone = elementToBeCloned.copy();
511
+ List<Element> originalGroups = OpsinTools.getDescendantElementsWithTagName(elementToBeCloned, XmlDeclarations.GROUP_EL);
512
+ List<Element> clonedGroups = OpsinTools.getDescendantElementsWithTagName(clone, XmlDeclarations.GROUP_EL);
513
+ HashMap<Fragment,Fragment> oldNewFragmentMapping =new LinkedHashMap<>();
514
+ for (int i = 0; i < originalGroups.size(); i++) {
515
+ Fragment originalFragment = originalGroups.get(i).getFrag();
516
+ Fragment newFragment = copyAndRelabelFragment(originalFragment, primesToAdd);
517
+ oldNewFragmentMapping.put(originalFragment, newFragment);
518
+ newFragment.setTokenEl(clonedGroups.get(i));
519
+ clonedGroups.get(i).setFrag(newFragment);
520
+ List<Fragment> originalSuffixes =state.xmlSuffixMap.get(originalGroups.get(i));
521
+ List<Fragment> newSuffixFragments =new ArrayList<>();
522
+ for (Fragment suffix : originalSuffixes) {
523
+ newSuffixFragments.add(copyFragment(suffix));
524
+ }
525
+ state.xmlSuffixMap.put(clonedGroups.get(i), newSuffixFragments);
526
+ }
527
+ Set<Bond> interFragmentBondsToClone = new LinkedHashSet<>();
528
+ for (Fragment originalFragment : oldNewFragmentMapping.keySet()) {//add inter fragment bonds to cloned fragments
529
+ for (Bond bond : fragToInterFragmentBond.get(originalFragment)) {
530
+ interFragmentBondsToClone.add(bond);
531
+ }
532
+ }
533
+ for (Bond bond : interFragmentBondsToClone) {
534
+ Atom originalFromAtom = bond.getFromAtom();
535
+ Atom originalToAtom = bond.getToAtom();
536
+ Fragment originalFragment1 = originalFromAtom.getFrag();
537
+ Fragment originalFragment2 = originalToAtom.getFrag();
538
+ if (!oldNewFragmentMapping.containsKey(originalFragment1) || (!oldNewFragmentMapping.containsKey(originalFragment2))){
539
+ throw new StructureBuildingException("An element that was a clone contained a bond that went outside the scope of the cloning");
540
+ }
541
+ Fragment newFragment1 = oldNewFragmentMapping.get(originalFragment1);
542
+ Fragment newFragment2 = oldNewFragmentMapping.get(originalFragment2);
543
+ Atom fromAtom = newFragment1.getAtomList().get(originalFragment1.getAtomList().indexOf(originalFromAtom));
544
+ Atom toAtom = newFragment2.getAtomList().get(originalFragment2.getAtomList().indexOf(originalToAtom));
545
+ createBond(fromAtom, toAtom, bond.getOrder());
546
+ }
547
+ return clone;
548
+ }
549
+
550
+ /**
551
+ * Takes an atom, removes it and bonds everything that was bonded to it to the replacementAtom with the original bond orders.
552
+ * Non element symbol locants are copied to the replacement atom
553
+ * @param atomToBeReplaced
554
+ * @param replacementAtom
555
+ */
556
+ void replaceAtomWithAnotherAtomPreservingConnectivity(Atom atomToBeReplaced, Atom replacementAtom) {
557
+ atomToBeReplaced.removeElementSymbolLocants();
558
+ List<String> locants = new ArrayList<>(atomToBeReplaced.getLocants());
559
+ for (String locant : locants) {
560
+ atomToBeReplaced.removeLocant(locant);
561
+ replacementAtom.addLocant(locant);
562
+ }
563
+ List<Bond> bonds = atomToBeReplaced.getBonds();
564
+ for (Bond bond : bonds) {
565
+ Atom connectedAtom = bond.getOtherAtom(atomToBeReplaced);
566
+ if (connectedAtom.getAtomParity() != null){
567
+ Atom[] atomRefs4 = connectedAtom.getAtomParity().getAtomRefs4();
568
+ for (int i = 0 ; i < 4; i++) {
569
+ if (atomRefs4[i] == atomToBeReplaced){
570
+ atomRefs4[i] = replacementAtom;
571
+ break;
572
+ }
573
+ }
574
+ }
575
+ if (bond.getBondStereo() != null){
576
+ Atom[] atomRefs4 = bond.getBondStereo().getAtomRefs4();
577
+ for (int i = 0 ; i < 4; i++) {
578
+ if (atomRefs4[i] == atomToBeReplaced){
579
+ atomRefs4[i] = replacementAtom;
580
+ break;
581
+ }
582
+ }
583
+ }
584
+ createBond(replacementAtom, bond.getOtherAtom(atomToBeReplaced), bond.getOrder());
585
+ }
586
+ removeAtomAndAssociatedBonds(atomToBeReplaced);
587
+ }
588
+
589
+ /**
590
+ * Removes a bond from the inter-fragment bond mappings if it was present
591
+ * @param bond
592
+ */
593
+ private void removeInterFragmentBondIfPresent(Bond bond) {
594
+ fragToInterFragmentBond.get(bond.getFromAtom().getFrag()).remove(bond);
595
+ fragToInterFragmentBond.get(bond.getToAtom().getFrag()).remove(bond);
596
+ }
597
+
598
+ /**
599
+ * Adds a bond to the fragment to inter-fragment bond mappings
600
+ * @param bond
601
+ */
602
+ private void addInterFragmentBond(Bond bond) {
603
+ fragToInterFragmentBond.get(bond.getFromAtom().getFrag()).add(bond);
604
+ fragToInterFragmentBond.get(bond.getToAtom().getFrag()).add(bond);
605
+ }
606
+
607
+ /**
608
+ * Gets an unmodifiable view of the set of the inter-fragment bonds a fragment is involved in
609
+ * @param frag
610
+ * @return set of inter fragment bonds
611
+ */
612
+ Set<Bond> getInterFragmentBonds(Fragment frag) {
613
+ Set<Bond> interFragmentBonds = fragToInterFragmentBond.get(frag);
614
+ if (interFragmentBonds == null) {
615
+ throw new IllegalArgumentException("Fragment not registered with this FragmentManager!");
616
+ }
617
+ return Collections.unmodifiableSet(interFragmentBonds);
618
+ }
619
+
620
+ /**
621
+ * Create a new Atom of the given element belonging to the given fragment
622
+ * @param chemEl
623
+ * @param frag
624
+ * @return Atom
625
+ */
626
+ Atom createAtom(ChemEl chemEl, Fragment frag) {
627
+ Atom a = new Atom(idManager.getNextID(), chemEl, frag);
628
+ frag.addAtom(a);
629
+ return a;
630
+ }
631
+
632
+ /**
633
+ * Create a new bond between two atoms.
634
+ * The bond is associated with these atoms.
635
+ * It is also listed as an inter-fragment bond or associated with a fragment
636
+ * @param fromAtom
637
+ * @param toAtom
638
+ * @param bondOrder
639
+ * @return Bond
640
+ */
641
+ Bond createBond(Atom fromAtom, Atom toAtom, int bondOrder) {
642
+ Bond b = new Bond(fromAtom, toAtom, bondOrder);
643
+ fromAtom.addBond(b);
644
+ toAtom.addBond(b);
645
+ if (fromAtom.getFrag() == toAtom.getFrag()){
646
+ fromAtom.getFrag().addBond(b);
647
+ }
648
+ else{
649
+ addInterFragmentBond(b);
650
+ }
651
+ return b;
652
+ }
653
+
654
+ void removeAtomAndAssociatedBonds(Atom atom){
655
+ List<Bond> bondsToBeRemoved = new ArrayList<>(atom.getBonds());
656
+ for (Bond bond : bondsToBeRemoved) {
657
+ removeBond(bond);
658
+ }
659
+ atom.getFrag().removeAtom(atom);
660
+ Set<Atom> ambiguousElementAssignment = atom.getProperty(Atom.AMBIGUOUS_ELEMENT_ASSIGNMENT);
661
+ if (ambiguousElementAssignment != null){
662
+ ambiguousElementAssignment.remove(atom);
663
+ if (ambiguousElementAssignment.size() == 1){
664
+ ambiguousElementAssignment.iterator().next().setProperty(Atom.AMBIGUOUS_ELEMENT_ASSIGNMENT, null);
665
+ }
666
+ }
667
+ }
668
+
669
+ void removeBond(Bond bond){
670
+ bond.getFromAtom().getFrag().removeBond(bond);
671
+ bond.getFromAtom().removeBond(bond);
672
+ bond.getToAtom().removeBond(bond);
673
+ removeInterFragmentBondIfPresent(bond);
674
+ }
675
+
676
+ /**
677
+ * Valency is used to determine the expected number of hydrogen
678
+ * Hydrogens are then added to bring the number of connections up to the minimum required to satisfy the atom's valency
679
+ * This allows the valency of the atom to be encoded e.g. phopshane-3 hydrogen, phosphorane-5 hydrogen.
680
+ * It is also necessary when considering stereochemistry as a hydrogen beats nothing in the CIP rules
681
+ * @throws StructureBuildingException
682
+ */
683
+ void makeHydrogensExplicit() throws StructureBuildingException {
684
+ for (Fragment fragment : fragments) {
685
+ List<Atom> atomList = fragment.getAtomList();
686
+ for (Atom parentAtom : atomList) {
687
+ int explicitHydrogensToAdd = StructureBuildingMethods.calculateSubstitutableHydrogenAtoms(parentAtom);
688
+ for (int i = 0; i < explicitHydrogensToAdd; i++) {
689
+ Atom hydrogen = createAtom(ChemEl.H, fragment);
690
+ createBond(parentAtom, hydrogen, 1);
691
+ }
692
+ if (parentAtom.getAtomParity() != null){
693
+ if (explicitHydrogensToAdd > 1) {
694
+ //Cannot have tetrahedral chirality and more than 2 hydrogens
695
+ parentAtom.setAtomParity(null);//probably caused by deoxy
696
+ }
697
+ else {
698
+ modifyAtomParityToTakeIntoAccountExplicitHydrogen(parentAtom);
699
+ }
700
+ }
701
+ }
702
+ }
703
+ }
704
+
705
+ private void modifyAtomParityToTakeIntoAccountExplicitHydrogen(Atom atom) throws StructureBuildingException {
706
+ AtomParity atomParity = atom.getAtomParity();
707
+ if (!StereoAnalyser.isPossiblyStereogenic(atom)){
708
+ //no longer a stereoCentre e.g. due to unsaturation
709
+ atom.setAtomParity(null);
710
+ }
711
+ else{
712
+ Atom[] atomRefs4 = atomParity.getAtomRefs4();
713
+ Integer positionOfImplicitHydrogen = null;
714
+ Integer positionOfDeoxyHydrogen = null;
715
+ for (int i = 0; i < atomRefs4.length; i++) {
716
+ Atom a = atomRefs4[i];
717
+ if (a.equals(AtomParity.hydrogen)){
718
+ positionOfImplicitHydrogen = i;
719
+ }
720
+ else if (a.equals(AtomParity.deoxyHydrogen)){
721
+ positionOfDeoxyHydrogen = i;
722
+ }
723
+ }
724
+ if (positionOfImplicitHydrogen != null || positionOfDeoxyHydrogen != null) {
725
+ //atom parity was set in SMILES, the dummy hydrogen atom has now been substituted
726
+ List<Atom> neighbours = atom.getAtomNeighbours();
727
+ for (Atom atomRef : atomRefs4) {
728
+ neighbours.remove(atomRef);
729
+ }
730
+ if (neighbours.size() == 0) {
731
+ throw new StructureBuildingException("OPSIN Bug: Unable to determine which atom has substituted a hydrogen at stereocentre");
732
+ }
733
+ else if (neighbours.size() == 1 && positionOfDeoxyHydrogen != null) {
734
+ atomRefs4[positionOfDeoxyHydrogen] = neighbours.get(0);
735
+ if (positionOfImplicitHydrogen != null){
736
+ throw new StructureBuildingException("OPSIN Bug: Unable to determine which atom has substituted a hydrogen at stereocentre");
737
+ }
738
+ }
739
+ else if (neighbours.size() == 1 && positionOfImplicitHydrogen != null) {
740
+ atomRefs4[positionOfImplicitHydrogen] = neighbours.get(0);
741
+ }
742
+ else if (neighbours.size() == 2 && positionOfDeoxyHydrogen != null && positionOfImplicitHydrogen != null) {
743
+ try{
744
+ List<Atom> cipOrderedAtoms = new CipSequenceRules(atom).getNeighbouringAtomsInCipOrder();
745
+ //higher priority group replaces the former hydroxy groups (deoxyHydrogen)
746
+ if (cipOrderedAtoms.indexOf(neighbours.get(0)) > cipOrderedAtoms.indexOf(neighbours.get(1))) {
747
+ atomRefs4[positionOfDeoxyHydrogen] = neighbours.get(0);
748
+ atomRefs4[positionOfImplicitHydrogen] = neighbours.get(1);
749
+ }
750
+ else{
751
+ atomRefs4[positionOfDeoxyHydrogen] = neighbours.get(1);
752
+ atomRefs4[positionOfImplicitHydrogen] = neighbours.get(0);
753
+ }
754
+ }
755
+ catch (CipOrderingException e){
756
+ //assume ligands equivalent so it makes no difference which is which
757
+ atomRefs4[positionOfDeoxyHydrogen] = neighbours.get(0);
758
+ atomRefs4[positionOfImplicitHydrogen] = neighbours.get(1);
759
+ }
760
+ }
761
+ else{
762
+ throw new StructureBuildingException("OPSIN Bug: Unable to determine which atom has substituted a hydrogen at stereocentre");
763
+ }
764
+ }
765
+ }
766
+ }
767
+ }
TransAntivirus/download_pubchem/opsin-master/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/FragmentTools.java ADDED
@@ -0,0 +1,1242 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ package uk.ac.cam.ch.wwmm.opsin;
2
+
3
+ import java.util.ArrayDeque;
4
+ import java.util.ArrayList;
5
+ import java.util.Collections;
6
+ import java.util.Comparator;
7
+ import java.util.Deque;
8
+ import java.util.HashMap;
9
+ import java.util.HashSet;
10
+ import java.util.List;
11
+ import java.util.Map;
12
+ import java.util.Set;
13
+ import java.util.regex.Matcher;
14
+ import java.util.regex.Pattern;
15
+
16
+ import static uk.ac.cam.ch.wwmm.opsin.OpsinTools.*;
17
+ import static uk.ac.cam.ch.wwmm.opsin.XmlDeclarations.*;
18
+
19
+ /**
20
+ * Sorts a list of atoms such that their order agrees with the order symbolic locants are typically assigned
21
+ *
22
+ * Preferred atoms are sorted to the START of the list
23
+ * @author dl387
24
+ *
25
+ */
26
+ class SortAtomsForElementSymbols implements Comparator<Atom> {
27
+
28
+ public int compare(Atom a, Atom b){
29
+ int bondOrderA = a.getProperty(Atom.VISITED);
30
+ int bondOrderB = b.getProperty(Atom.VISITED);
31
+ if (bondOrderA > bondOrderB) {//lower order bond is preferred
32
+ return 1;
33
+ }
34
+ if (bondOrderA < bondOrderB) {
35
+ return -1;
36
+ }
37
+
38
+ if (a.getOutValency() > b.getOutValency()) {//prefer atoms with outValency
39
+ return -1;
40
+ }
41
+ if (a.getOutValency() < b.getOutValency()) {
42
+ return 1;
43
+ }
44
+
45
+ int expectedHydrogenA = StructureBuildingMethods.calculateSubstitutableHydrogenAtoms(a);
46
+ int expectedHydrogenB = StructureBuildingMethods.calculateSubstitutableHydrogenAtoms(b);
47
+
48
+ if (expectedHydrogenA > expectedHydrogenB) {//prefer atoms with more hydrogen
49
+ return -1;
50
+ }
51
+ if (expectedHydrogenA < expectedHydrogenB) {
52
+ return 1;
53
+ }
54
+ return 0;
55
+ }
56
+ }
57
+
58
+ /**
59
+ * Performs a very crude sort of atoms such that those that are more likely to be substitued are preferred for low locants
60
+ * Preferred atoms are sorted to the START of the list
61
+ * @author dl387
62
+ *
63
+ */
64
+ class SortAtomsForMainGroupElementSymbols implements Comparator<Atom> {
65
+
66
+ public int compare(Atom a, Atom b){
67
+ int compare = a.getElement().compareTo(b.getElement());
68
+ if (compare != 0) {//only bother comparing properly if elements are the same
69
+ return compare;
70
+ }
71
+
72
+ int aExpectedHydrogen = StructureBuildingMethods.calculateSubstitutableHydrogenAtoms(a);
73
+ int bExpectedHydrogen = StructureBuildingMethods.calculateSubstitutableHydrogenAtoms(b);
74
+ if (aExpectedHydrogen > 0 && bExpectedHydrogen == 0) {//having substitutable hydrogen preferred
75
+ return -1;
76
+ }
77
+ if (aExpectedHydrogen == 0 && bExpectedHydrogen > 0) {
78
+ return 1;
79
+ }
80
+ List<String> locantsA = a.getLocants();
81
+ List<String> locantsB = b.getLocants();
82
+ if (locantsA.size() == 0 && locantsB.size() > 0) {//having no locants preferred
83
+ return -1;
84
+ }
85
+ if (locantsA.size() > 0 && locantsB.size() == 0) {
86
+ return 1;
87
+ }
88
+ return 0;
89
+ }
90
+ }
91
+
92
+ class FragmentTools {
93
+ /**
94
+ * Sorts by number, then by letter e.g. 4,3,3b,5,3a,2 -->2,3,3a,3b,4,5
95
+ * @author dl387
96
+ *
97
+ */
98
+ static class SortByLocants implements Comparator<Atom> {
99
+ static final Pattern locantSegmenter =Pattern.compile("(\\d+)([a-z]?)('*)");
100
+
101
+ public int compare(Atom atoma, Atom atomb){
102
+ if (atoma.getType().equals(SUFFIX_TYPE_VAL) && !atomb.getType().equals(SUFFIX_TYPE_VAL)){//suffix atoms go to the back
103
+ return 1;
104
+ }
105
+ if (atomb.getType().equals(SUFFIX_TYPE_VAL) && !atoma.getType().equals(SUFFIX_TYPE_VAL)){
106
+ return -1;
107
+ }
108
+
109
+ String locanta =atoma.getFirstLocant();
110
+ String locantb =atomb.getFirstLocant();
111
+ if (locanta==null|| locantb==null){
112
+ return 0;
113
+ }
114
+
115
+ Matcher m1 =locantSegmenter.matcher(locanta);
116
+ Matcher m2 =locantSegmenter.matcher(locantb);
117
+ if (!m1.matches()|| !m2.matches()){//inappropriate locant
118
+ return 0;
119
+ }
120
+ String locantaPrimes = m1.group(3);
121
+ String locantbPrimes = m2.group(3);
122
+ if (locantaPrimes.compareTo(locantbPrimes)>=1) {
123
+ return 1;//e.g. 1'' vs 1'
124
+ } else if (locantbPrimes.compareTo(locantaPrimes)>=1) {
125
+ return -1;//e.g. 1' vs 1''
126
+ }
127
+ else{
128
+ int locantaNumber = Integer.parseInt(m1.group(1));
129
+ int locantbNumber = Integer.parseInt(m2.group(1));
130
+
131
+ if (locantaNumber >locantbNumber) {
132
+ return 1;//e.g. 3 vs 2 or 3a vs 2
133
+ } else if (locantbNumber >locantaNumber) {
134
+ return -1;//e.g. 2 vs 3 or 2 vs 3a
135
+ }
136
+ else{
137
+ String locantaLetter = m1.group(2);
138
+ String locantbLetter = m2.group(2);
139
+ if (locantaLetter.compareTo(locantbLetter)>=1) {
140
+ return 1;//e.g. 1b vs 1a
141
+ } else if (locantbLetter.compareTo(locantaLetter)>=1) {
142
+ return -1;//e.g. 1a vs 1b
143
+ }
144
+ return 0;
145
+ }
146
+ }
147
+ }
148
+ }
149
+
150
+ /**
151
+ * Assign element locants to groups/suffixes. These are in addition to any numerical locants that are present.
152
+ * Adds primes to make each locant unique.
153
+ * For groups a locant is not given to carbon atoms
154
+ * If an element appears in a suffix then element locants are not assigned to occurrences of that element in the parent group
155
+ * HeteroAtoms in acidStems connected to the first Atom of the fragment are treated as if they were suffix atoms
156
+ * @param suffixableFragment
157
+ * @param suffixFragments
158
+ * @throws StructureBuildingException
159
+ */
160
+ static void assignElementLocants(Fragment suffixableFragment, List<Fragment> suffixFragments) throws StructureBuildingException {
161
+
162
+ Map<String,Integer> elementCount = new HashMap<>();//keeps track of how many times each element has been seen
163
+ Set<Atom> atomsToIgnore = new HashSet<>();//atoms which already have a symbolic locant
164
+
165
+ List<Fragment> allFragments = new ArrayList<>(suffixFragments);
166
+ allFragments.add(suffixableFragment);
167
+ /*
168
+ * First check whether any element locants have already been assigned, these will take precedence
169
+ */
170
+ for (Fragment fragment : allFragments) {
171
+ List<Atom> atomList = fragment.getAtomList();
172
+ for (Atom atom : atomList) {
173
+ List<String> elementSymbolLocants = atom.getElementSymbolLocants();
174
+ for (String locant : elementSymbolLocants) {
175
+ int primeCount = StringTools.countTerminalPrimes(locant);
176
+ String element = locant.substring(0, locant.length() - primeCount);
177
+ Integer seenCount = elementCount.get(element);
178
+ if (seenCount == null || (seenCount < primeCount + 1)){
179
+ elementCount.put(element, primeCount + 1);
180
+ }
181
+ atomsToIgnore.add(atom);
182
+ }
183
+ }
184
+ }
185
+
186
+ {
187
+ Set<String> elementsToIgnore = elementCount.keySet();
188
+
189
+ for (Fragment fragment : allFragments) {
190
+ List<Atom> atomList = fragment.getAtomList();
191
+ for (Atom atom : atomList) {
192
+ if (elementsToIgnore.contains(atom.getElement().toString())){
193
+ atomsToIgnore.add(atom);
194
+ }
195
+ }
196
+ }
197
+ }
198
+
199
+ String fragType = suffixableFragment.getType();
200
+ if (fragType.equals(NONCARBOXYLICACID_TYPE_VAL) || fragType.equals(CHALCOGENACIDSTEM_TYPE_VAL)){
201
+ if (suffixFragments.size() != 0){
202
+ throw new StructureBuildingException("No suffix fragments were expected to be present on non carboxylic acid");
203
+ }
204
+ processNonCarboxylicAcidLabelling(suffixableFragment, elementCount, atomsToIgnore);
205
+ }
206
+ else{
207
+ if (suffixFragments.size() > 0){
208
+ processSuffixLabelling(suffixFragments, elementCount, atomsToIgnore);
209
+ Integer seenCount = elementCount.get("N");
210
+ if (seenCount != null && seenCount > 1){//look for special case violation of IUPAC rule, =(N)=(NN) is N//N' in practice rather than N/N'/N''
211
+ //this method will put both locants on the N with substituable hydrogen
212
+ detectAndCorrectHydrazoneDerivativeViolation(suffixFragments);
213
+ }
214
+ }
215
+ processMainGroupLabelling(suffixableFragment, elementCount, atomsToIgnore);
216
+ }
217
+ }
218
+
219
+ private static void detectAndCorrectHydrazoneDerivativeViolation(List<Fragment> suffixFragments) {
220
+ fragmentLoop: for (Fragment suffixFrag : suffixFragments) {
221
+ List<Atom> atomList = suffixFrag.getAtomList();
222
+ for (Atom atom : atomList) {
223
+ if (atom.getElement() == ChemEl.N && atom.getIncomingValency() ==3 ){
224
+ List<String> locants =atom.getLocants();
225
+ if (locants.size()==1 && MATCH_ELEMENT_SYMBOL_LOCANT.matcher(locants.get(0)).matches()){
226
+ List<Atom> neighbours = atom.getAtomNeighbours();
227
+ for (Atom neighbour : neighbours) {
228
+ if (neighbour.getElement() == ChemEl.N && neighbour.getIncomingValency()==1){
229
+ String locantToAdd = locants.get(0);
230
+ atom.clearLocants();
231
+ neighbour.addLocant(locantToAdd);
232
+ continue fragmentLoop;
233
+ }
234
+ }
235
+ }
236
+ }
237
+ }
238
+ }
239
+ }
240
+
241
+ private static void processMainGroupLabelling(Fragment suffixableFragment, Map<String, Integer> elementCount, Set<Atom> atomsToIgnore) {
242
+ Set<String> elementToIgnore = new HashSet<>(elementCount.keySet());
243
+ List<Atom> atomList = suffixableFragment.getAtomList();
244
+ Collections.sort(atomList, new SortAtomsForMainGroupElementSymbols());
245
+ Atom atomToAddCLabelTo = null;//only add a C label if there is only one C in the main group
246
+ boolean seenMoreThanOneC = false;
247
+ for (Atom atom : atomList) {
248
+ if (atomsToIgnore.contains(atom)){
249
+ continue;
250
+ }
251
+ ChemEl chemEl = atom.getElement();
252
+ if (elementToIgnore.contains(chemEl.toString())){
253
+ continue;
254
+ }
255
+ if (chemEl == ChemEl.C) {
256
+ if (seenMoreThanOneC) {
257
+ continue;
258
+ }
259
+ if (atomToAddCLabelTo != null){
260
+ atomToAddCLabelTo = null;
261
+ seenMoreThanOneC = true;
262
+ }
263
+ else{
264
+ atomToAddCLabelTo = atom;
265
+ }
266
+ }
267
+ else{
268
+ assignLocant(atom, elementCount);
269
+ }
270
+ }
271
+ if (atomToAddCLabelTo != null){
272
+ atomToAddCLabelTo.addLocant("C");
273
+ }
274
+ }
275
+
276
+ private static void processSuffixLabelling(List<Fragment> suffixFragments, Map<String, Integer> elementCount, Set<Atom> atomsToIgnore) {
277
+ List<Atom> startingAtoms = new ArrayList<>();
278
+ Set<Atom> atomsVisited = new HashSet<>();
279
+ for (Fragment fragment : suffixFragments) {
280
+ Atom rAtom = fragment.getFirstAtom();
281
+ List<Atom> nextAtoms = getIntraFragmentNeighboursAndSetVisitedBondOrder(rAtom);
282
+ atomsVisited.addAll(nextAtoms);
283
+ startingAtoms.addAll(nextAtoms);
284
+ }
285
+ Collections.sort(startingAtoms, new SortAtomsForElementSymbols());
286
+
287
+ Deque<Atom> atomsToConsider = new ArrayDeque<>(startingAtoms);
288
+ while (atomsToConsider.size() > 0){
289
+ assignLocantsAndExploreNeighbours(elementCount, atomsToIgnore, atomsVisited, atomsToConsider);
290
+ }
291
+ }
292
+
293
+ private static void processNonCarboxylicAcidLabelling(Fragment suffixableFragment, Map<String, Integer> elementCount, Set<Atom> atomsToIgnore) {
294
+ Set<Atom> atomsVisited = new HashSet<>();
295
+ Atom firstAtom = suffixableFragment.getFirstAtom();
296
+ List<Atom> startingAtoms = getIntraFragmentNeighboursAndSetVisitedBondOrder(firstAtom);
297
+
298
+ Collections.sort(startingAtoms, new SortAtomsForElementSymbols());
299
+ atomsVisited.add(firstAtom);
300
+ Deque<Atom> atomsToConsider = new ArrayDeque<>(startingAtoms);
301
+ while (atomsToConsider.size() > 0){
302
+ assignLocantsAndExploreNeighbours(elementCount, atomsToIgnore, atomsVisited, atomsToConsider);
303
+ }
304
+ if (!atomsToIgnore.contains(firstAtom) && firstAtom.determineValency(true) > firstAtom.getIncomingValency()) {
305
+ //e.g. carbonimidoyl the carbon has locant C
306
+ assignLocant(firstAtom, elementCount);
307
+ }
308
+ }
309
+
310
+ private static void assignLocantsAndExploreNeighbours(Map<String, Integer> elementCount, Set<Atom> atomsToIgnore, Set<Atom> atomsVisited, Deque<Atom> atomsToConsider) {
311
+ Atom atom = atomsToConsider.removeFirst();
312
+ atomsVisited.add(atom);
313
+ if (!atomsToIgnore.contains(atom)) {//assign locant
314
+ assignLocant(atom, elementCount);
315
+ }
316
+ List<Atom> atomsToExplore = getIntraFragmentNeighboursAndSetVisitedBondOrder(atom);
317
+ atomsToExplore.removeAll(atomsVisited);
318
+ Collections.sort(atomsToExplore, new SortAtomsForElementSymbols());
319
+ for (int i = atomsToExplore.size() - 1; i >= 0; i--) {
320
+ atomsToConsider.addFirst(atomsToExplore.get(i));
321
+ }
322
+ }
323
+
324
+ /**
325
+ * Gets the neighbours of an atom that claim to be within the same frag
326
+ * The order of bond taken to get to the neighbour is set on the neighbours Atom.VISITED property
327
+ * @param atom
328
+ * @return
329
+ */
330
+ private static List<Atom> getIntraFragmentNeighboursAndSetVisitedBondOrder(Atom atom) {
331
+ List<Atom> atomsToExplore = new ArrayList<>();
332
+ List<Bond> bonds = atom.getBonds();
333
+ for (Bond bond : bonds) {
334
+ Atom neighbour = bond.getOtherAtom(atom);
335
+ if (neighbour.getFrag().equals(atom.getFrag())) {
336
+ atomsToExplore.add(neighbour);
337
+ neighbour.setProperty(Atom.VISITED, bond.getOrder());
338
+ }
339
+ }
340
+ return atomsToExplore;
341
+ }
342
+
343
+ private static void assignLocant(Atom atom, Map<String, Integer> elementCount) {
344
+ String element = atom.getElement().toString();
345
+ Integer count = elementCount.get(element);
346
+ if (count == null){
347
+ atom.addLocant(element);
348
+ elementCount.put(element, 1);
349
+ }
350
+ else{
351
+ atom.addLocant(element + StringTools.multiplyString("'", count));
352
+ elementCount.put(element, count + 1);
353
+ }
354
+ }
355
+
356
+ /** Adjusts the order of a bond in a fragment.
357
+ *
358
+ * @param fromAtom The lower-numbered atom in the bond
359
+ * @param bondOrder The new bond order
360
+ * @param fragment The fragment
361
+ * @return The bond that was unsaturated
362
+ * @throws StructureBuildingException
363
+ */
364
+ static Bond unsaturate(Atom fromAtom, int bondOrder, Fragment fragment) throws StructureBuildingException {
365
+ Atom toAtom = null;
366
+ Integer locant = null;
367
+ try{
368
+ String primes ="";
369
+ String locantStr = fromAtom.getFirstLocant();
370
+ int numberOfPrimes = StringTools.countTerminalPrimes(locantStr);
371
+ locant = Integer.parseInt(locantStr.substring(0, locantStr.length()-numberOfPrimes));
372
+ primes = StringTools.multiplyString("'", numberOfPrimes);
373
+ Atom possibleToAtom = fragment.getAtomByLocant(String.valueOf(locant +1)+primes);
374
+ if (possibleToAtom !=null && fromAtom.getBondToAtom(possibleToAtom)!=null){
375
+ toAtom = possibleToAtom;
376
+ }
377
+ else if (possibleToAtom ==null && fromAtom.getAtomIsInACycle()){//allow something like cyclohexan-6-ene, something like butan-4-ene will still fail
378
+ possibleToAtom = fragment.getAtomByLocant("1" + primes);
379
+ if (possibleToAtom !=null && fromAtom.getBondToAtom(possibleToAtom)!=null){
380
+ toAtom =possibleToAtom;
381
+ }
382
+ }
383
+ }
384
+ catch (Exception e) {
385
+ List<Atom> atomList = fragment.getAtomList();
386
+ int initialIndice = atomList.indexOf(fromAtom);
387
+ if (initialIndice +1 < atomList.size() && fromAtom.getBondToAtom(atomList.get(initialIndice +1))!=null){
388
+ toAtom = atomList.get(initialIndice +1);
389
+ }
390
+ }
391
+ if (toAtom==null){
392
+ if (locant!=null){
393
+ throw new StructureBuildingException("Could not find bond to unsaturate starting from the atom with locant: " +locant);
394
+ }
395
+ else{
396
+ throw new StructureBuildingException("Could not find bond to unsaturate");
397
+ }
398
+ }
399
+ Bond b = fromAtom.getBondToAtomOrThrow(toAtom);
400
+ if (b.getOrder() != 1) {
401
+ throw new StructureBuildingException("Bond indicated to be unsaturated was already unsaturated");
402
+ }
403
+ b.setOrder(bondOrder);
404
+ return b;
405
+ }
406
+
407
+ /** Adjusts the order of a bond in a fragment.
408
+ *
409
+ * @param fromAtom The first atom in the bond
410
+ * @param locantTo The locant of the other atom in the bond
411
+ * @param bondOrder The new bond order
412
+ * @param fragment The fragment
413
+ * @throws StructureBuildingException
414
+ */
415
+ static void unsaturate(Atom fromAtom, String locantTo, int bondOrder, Fragment fragment) throws StructureBuildingException {
416
+ Atom toAtom = fragment.getAtomByLocantOrThrow(locantTo);
417
+ Bond b = fromAtom.getBondToAtomOrThrow(toAtom);
418
+ if (b.getOrder() != 1) {
419
+ throw new StructureBuildingException("Bond indicated to be unsaturated was already unsaturated");
420
+ }
421
+ b.setOrder(bondOrder);
422
+ }
423
+
424
+ /**Adjusts the labeling on a fused ring system, such that bridgehead atoms
425
+ * have locants endings in 'a' or 'b' etc. Example: naphthalene
426
+ * 1,2,3,4,5,6,7,8,9,10->1,2,3,4,4a,5,6,7,8,8a
427
+ * @param atomList
428
+ */
429
+ static void relabelLocantsAsFusedRingSystem(List<Atom> atomList) {
430
+ int locantVal = 0;
431
+ char locantLetter = 'a';
432
+ for (Atom atom : atomList) {
433
+ atom.clearLocants();
434
+ }
435
+ for (Atom atom : atomList) {
436
+ if(atom.getElement() != ChemEl.C || atom.getBondCount() < 3) {
437
+ locantVal++;
438
+ locantLetter = 'a';
439
+ atom.addLocant(Integer.toString(locantVal));
440
+ } else {
441
+ atom.addLocant(Integer.toString(locantVal) + locantLetter);
442
+ locantLetter++;
443
+ }
444
+ }
445
+ }
446
+
447
+ /**
448
+ * Adds the given string to all the locants of the atoms.
449
+ * @param atomList
450
+ * @param stringToAdd
451
+ */
452
+ static void relabelLocants(List<Atom> atomList, String stringToAdd) {
453
+ for (Atom atom : atomList) {
454
+ List<String> locants = new ArrayList<>(atom.getLocants());
455
+ atom.clearLocants();
456
+ for (String locant : locants) {
457
+ atom.addLocant(locant + stringToAdd);
458
+ }
459
+ }
460
+ }
461
+
462
+ /**
463
+ * Adds the given string to all the numeric locants of the atoms.
464
+ * @param atomList
465
+ * @param stringToAdd
466
+ */
467
+ static void relabelNumericLocants(List<Atom> atomList, String stringToAdd) {
468
+ for (Atom atom : atomList) {
469
+ List<String> locants = new ArrayList<>(atom.getLocants());
470
+ for (String locant : locants) {
471
+ if (MATCH_NUMERIC_LOCANT.matcher(locant).matches()){
472
+ atom.removeLocant(locant);
473
+ atom.addLocant(locant + stringToAdd);
474
+ }
475
+ }
476
+ }
477
+ }
478
+
479
+
480
+ static void splitOutAtomIntoValency1OutAtoms(OutAtom outAtom) {
481
+ Fragment frag =outAtom.getAtom().getFrag();
482
+ for (int i = 1; i < outAtom.getValency(); i++) {
483
+ frag.addOutAtom(outAtom.getAtom(), 1, outAtom.isSetExplicitly());
484
+ }
485
+ outAtom.setValency(1);
486
+ }
487
+
488
+ /**
489
+ * Checks if the specified Nitrogen is potentially involved in [NH]C=N <-> N=C[NH] tautomerism
490
+ * Given the starting nitrogen returns the other nitrogen or null if that nitrogen does not appear to be involved in such tautomerism
491
+ * @param nitrogen
492
+ * @return null or the other nitrogen
493
+ */
494
+ static Atom detectSimpleNitrogenTautomer(Atom nitrogen) {
495
+ if (nitrogen.getElement() == ChemEl.N && nitrogen.getAtomIsInACycle()){
496
+ for (Atom neighbour : nitrogen.getAtomNeighbours()) {
497
+ if (neighbour.hasSpareValency() && neighbour.getElement() == ChemEl.C && neighbour.getAtomIsInACycle()){
498
+ List<Atom> distance2Neighbours = neighbour.getAtomNeighbours();
499
+ distance2Neighbours.remove(nitrogen);
500
+ for (Atom distance2Neighbour : distance2Neighbours) {
501
+ if (distance2Neighbour.hasSpareValency() && distance2Neighbour.getElement() == ChemEl.N && distance2Neighbour.getAtomIsInACycle() && distance2Neighbour.getCharge()==0){
502
+ return distance2Neighbour;
503
+ }
504
+ }
505
+ }
506
+ }
507
+ }
508
+ return null;
509
+ }
510
+
511
+ /**Increases the order of bonds joining atoms with spareValencies,
512
+ * and uses up said spareValencies.
513
+ * [spare valency is an indication of the atom's desire to form the maximum number of non-cumulative double bonds]
514
+ * @param frag
515
+ * @throws StructureBuildingException If the algorithm can't work out where to put the bonds
516
+ */
517
+ static void convertSpareValenciesToDoubleBonds(Fragment frag) throws StructureBuildingException {
518
+ List<Atom> atomCollection = frag.getAtomList();
519
+ /* pick atom, getAtomNeighbours, decideIfTerminal, resolve */
520
+
521
+ /*
522
+ * Remove spare valency on atoms with valency precluding creation of double bonds
523
+ */
524
+ for(Atom a : atomCollection) {
525
+ a.ensureSVIsConsistantWithValency(true);
526
+ }
527
+
528
+ /*
529
+ * Remove spare valency on atoms that are not adjacent to another atom with spare valency
530
+ */
531
+ atomLoop: for(Atom a : atomCollection) {
532
+ if(a.hasSpareValency()) {
533
+ for(Atom aa : frag.getIntraFragmentAtomNeighbours(a)) {
534
+ if(aa.hasSpareValency()) {
535
+ continue atomLoop;
536
+ }
537
+ }
538
+ a.setSpareValency(false);
539
+ }
540
+ }
541
+
542
+ /*
543
+ * The indicated hydrogen from the original SMILES definition of the fragment e.g. [nH] are used to disambiguate if there are
544
+ * an odd number of atoms with spare valency. Hence pyrrole is unambiguously 1H-pyrrole unless specified otherwise
545
+ * Things gets more complicated if the input contained multiple indicated hydrogen as it is unclear whether these still apply to the final molecule
546
+ */
547
+ Atom atomToReduceValencyAt = null;
548
+ List<Atom> originalIndicatedHydrogen = frag.getIndicatedHydrogen();
549
+ List<Atom> indicatedHydrogen = new ArrayList<>(originalIndicatedHydrogen.size());
550
+ for (Atom atom : frag.getIndicatedHydrogen()) {
551
+ if (atom.hasSpareValency() && atom.getCharge() == 0) {
552
+ indicatedHydrogen.add(atom);
553
+ }
554
+ }
555
+ if (indicatedHydrogen.size() > 0) {
556
+ //typically there will be only one indicated hydrogen
557
+ if (indicatedHydrogen.size() > 1) {
558
+ for (Atom indicatedAtom : indicatedHydrogen) {
559
+ boolean couldBeInvolvedInSimpleNitrogenTautomerism = false;//fix for guanine like purine derivatives
560
+ if (indicatedAtom.getElement() == ChemEl.N && indicatedAtom.getAtomIsInACycle()) {
561
+ atomloop : for (Atom neighbour : indicatedAtom.getAtomNeighbours()) {
562
+ if (neighbour.getElement() == ChemEl.C && neighbour.getAtomIsInACycle()) {
563
+ List<Atom> distance2Neighbours = neighbour.getAtomNeighbours();
564
+ distance2Neighbours.remove(indicatedAtom);
565
+ for (Atom distance2Neighbour : distance2Neighbours) {
566
+ if (distance2Neighbour.getElement() == ChemEl.N && distance2Neighbour.getAtomIsInACycle() && !originalIndicatedHydrogen.contains(distance2Neighbour)){
567
+ couldBeInvolvedInSimpleNitrogenTautomerism = true;
568
+ break atomloop;
569
+ }
570
+ }
571
+ }
572
+ }
573
+ }
574
+ //retain spare valency if has the cyclic [NH]C=N moiety but substitution has meant that this tautomerism doesn't actually occur cf. 8-oxoguanine
575
+ if (!couldBeInvolvedInSimpleNitrogenTautomerism || detectSimpleNitrogenTautomer(indicatedAtom) != null) {
576
+ indicatedAtom.setSpareValency(false);
577
+ }
578
+ }
579
+ }
580
+ else{
581
+ atomToReduceValencyAt = indicatedHydrogen.get(0);
582
+ }
583
+ }
584
+
585
+ int svCount = 0;
586
+ for(Atom a : atomCollection) {
587
+ svCount += a.hasSpareValency() ? 1 :0;
588
+ }
589
+
590
+ /*
591
+ * Double-bonds go between pairs of atoms so if there are an off number of candidate atoms (e.g. pyrrole) an atom must be chosen
592
+ * The atom with indicated hydrogen (see above) is used in preference else heuristics are used to chose a candidate
593
+ */
594
+ if((svCount & 1) == 1) {
595
+ if (atomToReduceValencyAt == null) {
596
+ atomToReduceValencyAt = findBestAtomToRemoveSpareValencyFrom(frag, atomCollection);
597
+ }
598
+ atomToReduceValencyAt.setSpareValency(false);
599
+ svCount--;
600
+ }
601
+
602
+ while(svCount > 0) {
603
+ boolean foundTerminalFlag = false;
604
+ boolean foundNonBridgeHeadFlag = false;
605
+ boolean foundBridgeHeadFlag = false;
606
+ //First handle cases where double bond placement is completely unambiguous i.e. an atom where only one neighbour has spare valency
607
+ for(Atom a : atomCollection) {
608
+ if(a.hasSpareValency()) {
609
+ int count = 0;
610
+ for(Atom aa : frag.getIntraFragmentAtomNeighbours(a)) {
611
+ if(aa.hasSpareValency()) {
612
+ count++;
613
+ }
614
+ }
615
+ if(count == 1) {
616
+ for(Atom aa : frag.getIntraFragmentAtomNeighbours(a)) {
617
+ if(aa.hasSpareValency()) {
618
+ foundTerminalFlag = true;
619
+ a.setSpareValency(false);
620
+ aa.setSpareValency(false);
621
+ a.getBondToAtomOrThrow(aa).addOrder(1);
622
+ svCount -= 2;//Two atoms where for one of them this bond is the only double bond it can possible form
623
+ break;
624
+ }
625
+ }
626
+ }
627
+ }
628
+ }
629
+ if(foundTerminalFlag) {
630
+ continue;
631
+ }
632
+
633
+ //Find two atoms where one, or both, of them are not bridgeheads
634
+ for(Atom a : atomCollection) {
635
+ List<Atom> neighbours = frag.getIntraFragmentAtomNeighbours(a);
636
+ if(a.hasSpareValency() && neighbours.size() < 3) {
637
+ for(Atom aa : neighbours) {
638
+ if(aa.hasSpareValency()) {
639
+ foundNonBridgeHeadFlag = true;
640
+ a.setSpareValency(false);
641
+ aa.setSpareValency(false);
642
+ a.getBondToAtomOrThrow(aa).addOrder(1);
643
+ svCount -= 2;//Two atoms where one of them is not a bridge head
644
+ break;
645
+ }
646
+ }
647
+ }
648
+ if(foundNonBridgeHeadFlag) {
649
+ break;
650
+ }
651
+ }
652
+ if(foundNonBridgeHeadFlag) {
653
+ continue;
654
+ }
655
+
656
+ //Find two atoms where both of them are bridgheads
657
+ for(Atom a : atomCollection) {
658
+ List<Atom> neighbours = frag.getIntraFragmentAtomNeighbours(a);
659
+ if(a.hasSpareValency()) {
660
+ for(Atom aa : neighbours) {
661
+ if(aa.hasSpareValency()) {
662
+ foundBridgeHeadFlag = true;
663
+ a.setSpareValency(false);
664
+ aa.setSpareValency(false);
665
+ a.getBondToAtomOrThrow(aa).addOrder(1);
666
+ svCount -= 2;//Two atoms where both of them are a bridge head e.g. necessary for something like coronene
667
+ break;
668
+ }
669
+ }
670
+ }
671
+ if(foundBridgeHeadFlag) {
672
+ break;
673
+ }
674
+ }
675
+ if(!foundBridgeHeadFlag) {
676
+ throw new StructureBuildingException("Failed to assign all double bonds! (Check that indicated hydrogens have been appropriately specified)");
677
+ }
678
+ }
679
+ }
680
+
681
+ private static Atom findBestAtomToRemoveSpareValencyFrom(Fragment frag, List<Atom> atomCollection) {
682
+ for(Atom a : atomCollection) {//try and find an atom with SV that neighbours only one atom with SV
683
+ if(a.hasSpareValency()) {
684
+ int atomsWithSV = 0;
685
+ for(Atom aa : frag.getIntraFragmentAtomNeighbours(a)) {
686
+ if(aa.hasSpareValency()) {
687
+ atomsWithSV++;
688
+ }
689
+ }
690
+ if (atomsWithSV == 1) {
691
+ return a;
692
+ }
693
+ }
694
+ }
695
+ atomLoop: for(Atom a : atomCollection) {//try and find an atom with bridgehead atoms with SV on both sides c.f. phenoxastibinine == 10H-phenoxastibinine
696
+ if(a.hasSpareValency()) {
697
+ List<Atom> neighbours = frag.getIntraFragmentAtomNeighbours(a);
698
+ if (neighbours.size() == 2) {
699
+ for(Atom aa : neighbours) {
700
+ if(frag.getIntraFragmentAtomNeighbours(aa).size() < 3){
701
+ continue atomLoop;
702
+ }
703
+ }
704
+ return a;
705
+ }
706
+ }
707
+ }
708
+ //Prefer nitrogen to carbon e.g. get NHC=C rather than N=CCH
709
+ Atom firstAtomWithSpareValency = null;
710
+ Atom firstHeteroAtomWithSpareValency = null;
711
+ for(Atom a : atomCollection) {
712
+ if(a.hasSpareValency()) {
713
+ if (a.getElement() != ChemEl.C) {
714
+ if (a.getCharge() == 0) {
715
+ return a;
716
+ }
717
+ if(firstHeteroAtomWithSpareValency == null) {
718
+ firstHeteroAtomWithSpareValency = a;
719
+ }
720
+ }
721
+ if(firstAtomWithSpareValency == null) {
722
+ firstAtomWithSpareValency = a;
723
+ }
724
+ }
725
+ }
726
+ if (firstAtomWithSpareValency == null) {
727
+ throw new IllegalArgumentException("OPSIN Bug: No atom had spare valency!");
728
+ }
729
+ return firstHeteroAtomWithSpareValency != null ? firstHeteroAtomWithSpareValency : firstAtomWithSpareValency;
730
+ }
731
+
732
+
733
+ static Atom getAtomByAminoAcidStyleLocant(Atom backboneAtom, String elementSymbol, String primes) {
734
+ //Search for appropriate atom by using the same algorithm as is used to assign locants initially
735
+
736
+ List<Atom> startingAtoms = new ArrayList<>();
737
+ Set<Atom> atomsVisited = new HashSet<>();
738
+ List<Atom> neighbours = getIntraFragmentNeighboursAndSetVisitedBondOrder(backboneAtom);
739
+ mainLoop: for (Atom neighbour : neighbours) {
740
+ atomsVisited.add(neighbour);
741
+ if (!neighbour.getType().equals(SUFFIX_TYPE_VAL)){
742
+ for (String neighbourLocant : neighbour.getLocants()) {
743
+ if (MATCH_NUMERIC_LOCANT.matcher(neighbourLocant).matches()){//gone to an inappropriate atom
744
+ continue mainLoop;
745
+ }
746
+ }
747
+ }
748
+ startingAtoms.add(neighbour);
749
+ }
750
+
751
+ Collections.sort(startingAtoms, new SortAtomsForElementSymbols());
752
+ Map<String,Integer> elementCount = new HashMap<>();//keeps track of how many times each element has been seen
753
+
754
+ Deque<Atom> atomsToConsider = new ArrayDeque<>(startingAtoms);
755
+ boolean hydrazoneSpecialCase =false;//look for special case violation of IUPAC rule where the locant of the =N- atom is skipped. This flag is set when =N- is encountered
756
+ while (atomsToConsider.size() > 0){
757
+ Atom atom = atomsToConsider.removeFirst();
758
+ atomsVisited.add(atom);
759
+ int primesOnPossibleAtom =0;
760
+ String element =atom.getElement().toString();
761
+ if (elementCount.get(element)==null){
762
+ elementCount.put(element,1);
763
+ }
764
+ else{
765
+ int count =elementCount.get(element);
766
+ primesOnPossibleAtom =count;
767
+ elementCount.put(element, count +1);
768
+ }
769
+ if (hydrazoneSpecialCase){
770
+ if (element.equals(elementSymbol) && primes.length() == primesOnPossibleAtom -1){
771
+ return atom;
772
+ }
773
+ hydrazoneSpecialCase =false;
774
+ }
775
+
776
+ List<Atom> atomNeighbours = getIntraFragmentNeighboursAndSetVisitedBondOrder(atom);
777
+ atomNeighbours.removeAll(atomsVisited);
778
+ for (int i = atomNeighbours.size() -1; i >=0; i--) {
779
+ Atom neighbour = atomNeighbours.get(i);
780
+ if (!neighbour.getType().equals(SUFFIX_TYPE_VAL)){
781
+ for (String neighbourLocant : neighbour.getLocants()) {
782
+ if (MATCH_NUMERIC_LOCANT.matcher(neighbourLocant).matches()){//gone to an inappropriate atom
783
+ atomNeighbours.remove(i);
784
+ break;
785
+ }
786
+ }
787
+ }
788
+ }
789
+ if (atom.getElement() == ChemEl.N && atom.getIncomingValency() ==3 && atom.getCharge()==0
790
+ && atomNeighbours.size()==1 && atomNeighbours.get(0).getElement() == ChemEl.N){
791
+ hydrazoneSpecialCase =true;
792
+ }
793
+ else{
794
+ if (element.equals(elementSymbol)){
795
+ if (primes.length() == primesOnPossibleAtom){
796
+ return atom;
797
+ }
798
+ }
799
+ }
800
+
801
+ Collections.sort(atomNeighbours, new SortAtomsForElementSymbols());
802
+ for (int i = atomNeighbours.size() - 1; i >= 0; i--) {
803
+ atomsToConsider.addFirst(atomNeighbours.get(i));
804
+ }
805
+ }
806
+
807
+ if (primes.equals("") && backboneAtom.getElement().toString().equals(elementSymbol)){//maybe it meant the starting atom
808
+ return backboneAtom;
809
+ }
810
+ return null;
811
+ }
812
+
813
+
814
+ /**
815
+ * Determines whether the bond between two elements is likely to be covalent
816
+ * This is crudely determined based on whether the combination of elements fall outside the ionic and
817
+ * metallic sections of a van Arkel diagram
818
+ * @param chemEl1
819
+ * @param chemEl2
820
+ * @return
821
+ */
822
+ static boolean isCovalent(ChemEl chemEl1, ChemEl chemEl2) {
823
+ Double atom1Electrongegativity = AtomProperties.getPaulingElectronegativity(chemEl1);
824
+ Double atom2Electrongegativity = AtomProperties.getPaulingElectronegativity(chemEl2);
825
+ if (atom1Electrongegativity!=null && atom2Electrongegativity !=null){
826
+ double halfSum = (atom1Electrongegativity + atom2Electrongegativity)/2;
827
+ double difference = Math.abs(atom1Electrongegativity - atom2Electrongegativity);
828
+ if (halfSum < 1.6){
829
+ return false;//probably metallic
830
+ }
831
+ if (difference < 1.76 * halfSum - 3.03){
832
+ return true;
833
+ }
834
+ }
835
+ return false;
836
+ }
837
+
838
+ /**
839
+ * Is the atom a suffix atom/carbon of an aldehyde atom/chalcogen functional atom/hydroxy (or chalcogen equivalent)
840
+ * (by special step heterostems are not considered hydroxy e.g. disulfane)
841
+ * @param atom
842
+ * @return
843
+ */
844
+ static boolean isCharacteristicAtom(Atom atom) {
845
+ if (atom.getType().equals(SUFFIX_TYPE_VAL) ||
846
+ (atom.getElement().isChalcogen() && !HETEROSTEM_SUBTYPE_VAL.equals(atom.getFrag().getSubType()) &&
847
+ atom.getIncomingValency() == 1 &&
848
+ atom.getOutValency() == 0 && atom.getCharge() == 0)) {
849
+ return true;
850
+ }
851
+ return isFunctionalAtomOrAldehyde(atom);
852
+ }
853
+
854
+ /**
855
+ * Is the atom an aldehyde atom or a chalcogen functional atom
856
+ * @param atom
857
+ * @return
858
+ */
859
+ static boolean isFunctionalAtomOrAldehyde(Atom atom) {
860
+ if (Boolean.TRUE.equals(atom.getProperty(Atom.ISALDEHYDE))){//substituting an aldehyde would make it no longer an aldehyde
861
+ return true;
862
+ }
863
+ return isFunctionalAtom(atom);
864
+ }
865
+
866
+ /**
867
+ * Is the atom a chalcogen functional atom
868
+ * @param atom
869
+ * @return
870
+ */
871
+ static boolean isFunctionalAtom(Atom atom) {
872
+ ChemEl chemEl = atom.getElement();
873
+ if (chemEl.isChalcogen()) {//potential chalcogen functional atom
874
+ Fragment frag = atom.getFrag();
875
+ for (int i = 0, l = frag.getFunctionalAtomCount(); i < l; i++) {
876
+ if (atom.equals(frag.getFunctionalAtom(i).getAtom())){
877
+ return true;
878
+ }
879
+ }
880
+ }
881
+ return false;
882
+ }
883
+
884
+
885
+ /**
886
+ * Checks that all atoms in a ring appear to be equivalent
887
+ * @param ring
888
+ * @return true if all equivalent, else false
889
+ */
890
+ static boolean allAtomsInRingAreIdentical(Fragment ring){
891
+ List<Atom> atomList = ring.getAtomList();
892
+ Atom firstAtom = atomList.get(0);
893
+ ChemEl chemEl = firstAtom.getElement();
894
+ int valency = firstAtom.getIncomingValency();
895
+ boolean spareValency = firstAtom.hasSpareValency();
896
+ for (Atom atom : atomList) {
897
+ if (atom.getElement() != chemEl){
898
+ return false;
899
+ }
900
+ if (atom.getIncomingValency() != valency){
901
+ return false;
902
+ }
903
+ if (atom.hasSpareValency() != spareValency){
904
+ return false;
905
+ }
906
+ }
907
+ return true;
908
+ }
909
+
910
+ static void removeTerminalAtom(BuildState state, Atom atomToRemove) {
911
+ AtomParity atomParity = atomToRemove.getAtomNeighbours().get(0).getAtomParity();
912
+ if (atomParity!=null){//replace reference to atom with reference to implicit hydrogen
913
+ Atom[] atomRefs4= atomParity.getAtomRefs4();
914
+ for (int i = 0; i < atomRefs4.length; i++) {
915
+ if (atomRefs4[i]==atomToRemove){
916
+ atomRefs4[i] = AtomParity.deoxyHydrogen;
917
+ break;
918
+ }
919
+ }
920
+ }
921
+ state.fragManager.removeAtomAndAssociatedBonds(atomToRemove);
922
+ }
923
+
924
+ /**
925
+ * Removes a terminal oxygen from the atom
926
+ * An exception is thrown if no suitable oxygen could be found connected to the atom
927
+ * Note that [N+][O-] is treated as N=O
928
+ * @param state
929
+ * @param atom
930
+ * @param desiredBondOrder
931
+ * @throws StructureBuildingException
932
+ */
933
+ static void removeTerminalOxygen(BuildState state, Atom atom, int desiredBondOrder) throws StructureBuildingException {
934
+ //TODO prioritise [N+][O-]
935
+ List<Atom> neighbours = atom.getAtomNeighbours();
936
+ for (Atom neighbour : neighbours) {
937
+ if (neighbour.getElement() == ChemEl.O && neighbour.getBondCount()==1){
938
+ Bond b = atom.getBondToAtomOrThrow(neighbour);
939
+ if (b.getOrder()==desiredBondOrder && neighbour.getCharge()==0){
940
+ FragmentTools.removeTerminalAtom(state, neighbour);
941
+ if (atom.getLambdaConventionValency()!=null){//corrects valency for phosphin/arsin/stibin
942
+ atom.setLambdaConventionValency(atom.getLambdaConventionValency()-desiredBondOrder);
943
+ }
944
+ if (atom.getMinimumValency()!=null){//corrects valency for phosphin/arsin/stibin
945
+ atom.setMinimumValency(atom.getMinimumValency()-desiredBondOrder);
946
+ }
947
+ return;
948
+ }
949
+ else if (neighbour.getCharge() ==-1 && b.getOrder()==1 && desiredBondOrder == 2){
950
+ if (atom.getCharge() ==1 && atom.getElement() == ChemEl.N){
951
+ FragmentTools.removeTerminalAtom(state, neighbour);
952
+ atom.neutraliseCharge();
953
+ return;
954
+ }
955
+ }
956
+ }
957
+ }
958
+ if (desiredBondOrder ==2){
959
+ throw new StructureBuildingException("Double bonded oxygen not found at suffix attachment position. Perhaps a suffix has been used inappropriately");
960
+ }
961
+ else if (desiredBondOrder ==1){
962
+ throw new StructureBuildingException("Hydroxy oxygen not found at suffix attachment position. Perhaps a suffix has been used inappropriately");
963
+ }
964
+ else {
965
+ throw new StructureBuildingException("Suitable oxygen not found at suffix attachment position Perhaps a suffix has been used inappropriately");
966
+ }
967
+
968
+ }
969
+
970
+
971
+ /**
972
+ * Finds terminal atoms of the given element type from the list given
973
+ * The terminal atoms be single bonded, not radicals and uncharged
974
+ * @param atoms
975
+ * @param chemEl
976
+ * @return
977
+ */
978
+ static List<Atom> findHydroxyLikeTerminalAtoms(List<Atom> atoms, ChemEl chemEl) {
979
+ List<Atom> matches =new ArrayList<>();
980
+ for (Atom atom : atoms) {
981
+ if (atom.getElement() == chemEl && atom.getIncomingValency() == 1 &&
982
+ atom.getOutValency() == 0 && atom.getCharge() == 0){
983
+ matches.add(atom);
984
+ }
985
+ }
986
+ return matches;
987
+ }
988
+
989
+ /**
990
+ * Checks whether a bond is part of a 6 member or smaller ring.
991
+ * This is necessary as such double bonds are assumed to not be capable of having E/Z stereochemistry
992
+ * @param bond
993
+ * @return true unless in a 6 member or smaller rings
994
+ */
995
+ static boolean notIn6MemberOrSmallerRing(Bond bond) {
996
+ Atom fromAtom =bond.getFromAtom();
997
+ Atom toAtom = bond.getToAtom();
998
+ if (fromAtom.getAtomIsInACycle() && toAtom.getAtomIsInACycle()){//obviously both must be in rings
999
+ //attempt to get from the fromAtom to the toAtom in 6 or fewer steps.
1000
+ List<Atom> visitedAtoms = new ArrayList<>();
1001
+ Deque<Atom> atomsToInvestigate = new ArrayDeque<>();//A queue is not used as I need to make sure that only up to depth 6 is investigated
1002
+ List<Atom> neighbours =fromAtom.getAtomNeighbours();
1003
+ neighbours.remove(toAtom);
1004
+ for (Atom neighbour : neighbours) {
1005
+ atomsToInvestigate.add(neighbour);
1006
+ }
1007
+ visitedAtoms.add(fromAtom);
1008
+ for (int i = 0; i < 5; i++) {//up to 5 bonds from the neighbours of the fromAtom i.e. up to ring size 6
1009
+ if (atomsToInvestigate.isEmpty()){
1010
+ break;
1011
+ }
1012
+ Deque<Atom> atomsToInvestigateNext = new ArrayDeque<>();
1013
+ while (!atomsToInvestigate.isEmpty()) {
1014
+ Atom currentAtom =atomsToInvestigate.removeFirst();
1015
+ if (currentAtom == toAtom){
1016
+ return false;
1017
+ }
1018
+ visitedAtoms.add(currentAtom);
1019
+ neighbours =currentAtom.getAtomNeighbours();
1020
+ for (Atom neighbour : neighbours) {
1021
+ if (!visitedAtoms.contains(neighbour) && neighbour.getAtomIsInACycle()){
1022
+ atomsToInvestigateNext.add(neighbour);
1023
+ }
1024
+ }
1025
+ }
1026
+ atomsToInvestigate = atomsToInvestigateNext;
1027
+ }
1028
+ }
1029
+ return true;
1030
+ }
1031
+
1032
+ /**
1033
+ * Finds the hydroxy atom of all hydroxy functional groups in a fragment
1034
+ * i.e. not in carboxylic acid or oxime
1035
+ * @param frag
1036
+ * @return
1037
+ * @throws StructureBuildingException
1038
+ */
1039
+ static List<Atom> findHydroxyGroups(Fragment frag) throws StructureBuildingException {
1040
+ List<Atom> hydroxyAtoms = new ArrayList<>();
1041
+ List<Atom> atoms = frag.getAtomList();
1042
+ for (Atom atom : atoms) {
1043
+ if (atom.getElement() == ChemEl.O && atom.getIncomingValency() == 1 && atom.getOutValency() == 0 && atom.getCharge() == 0){
1044
+ Atom adjacentAtom = atom.getAtomNeighbours().get(0);
1045
+ List<Atom> neighbours = adjacentAtom.getAtomNeighbours();
1046
+ if (adjacentAtom.getElement() == ChemEl.C){
1047
+ neighbours.remove(atom);
1048
+ if (neighbours.size() >= 1 && neighbours.get(0).getElement() == ChemEl.O && adjacentAtom.getBondToAtomOrThrow(neighbours.get(0)).getOrder()==2){
1049
+ continue;
1050
+ }
1051
+ if (neighbours.size() >= 2 && neighbours.get(1).getElement() == ChemEl.O && adjacentAtom.getBondToAtomOrThrow(neighbours.get(1)).getOrder()==2){
1052
+ continue;
1053
+ }
1054
+ hydroxyAtoms.add(atom);
1055
+ }
1056
+ }
1057
+ }
1058
+ return hydroxyAtoms;
1059
+ }
1060
+
1061
+ static List<Atom> findnAtomsForSubstitution(List<Atom> atomList, Atom preferredAtom, int numberOfSubstitutionsRequired, int bondOrder, boolean takeIntoAccountOutValency, boolean preserveValency) {
1062
+ int atomCount = atomList.size();
1063
+ int startingIndex = preferredAtom != null ? atomList.indexOf(preferredAtom) : 0;
1064
+ if (startingIndex < 0){
1065
+ throw new IllegalArgumentException("OPSIN Bug: preferredAtom should be part of the list of atoms to search through");
1066
+ }
1067
+ CyclicAtomList atoms = new CyclicAtomList(atomList, startingIndex - 1);//next() will retrieve the atom at the startingIndex
1068
+ List<Atom> substitutableAtoms = new ArrayList<>();
1069
+ if (atomCount == 1 && ELEMENTARYATOM_TYPE_VAL.equals(atomList.get(0).getFrag().getType())) {
1070
+ Atom atom = atomList.get(0);
1071
+ int timesAtomCanBeSubstituted = getTimesElementaryAtomCanBeSubstituted(atom);
1072
+ for (int j = 1; j <= timesAtomCanBeSubstituted; j++) {
1073
+ substitutableAtoms.add(atom);
1074
+ }
1075
+ }
1076
+ else {
1077
+ for (int i = 0; i < atomCount; i++) {//aromaticity preserved, standard valency assumed, characteristic atoms ignored
1078
+ Atom atom = atoms.next();
1079
+ if (!FragmentTools.isCharacteristicAtom(atom) || (numberOfSubstitutionsRequired == 1 && atom == preferredAtom)) {
1080
+ int currentExpectedValency = atom.determineValency(takeIntoAccountOutValency);
1081
+ int usedValency = atom.getIncomingValency() + (atom.hasSpareValency() ? 1 : 0) + (takeIntoAccountOutValency ? atom.getOutValency() : 0);
1082
+ int timesAtomCanBeSubstituted = ((currentExpectedValency - usedValency)/ bondOrder);
1083
+ for (int j = 1; j <= timesAtomCanBeSubstituted; j++) {
1084
+ substitutableAtoms.add(atom);
1085
+ }
1086
+ }
1087
+ }
1088
+ }
1089
+ if (substitutableAtoms.size() >= numberOfSubstitutionsRequired){
1090
+ return substitutableAtoms;
1091
+ }
1092
+ substitutableAtoms.clear();
1093
+ for (int i = 0; i < atomCount; i++) {//aromaticity preserved, standard valency assumed, functional suffixes ignored
1094
+ Atom atom = atoms.next();
1095
+ if (!FragmentTools.isFunctionalAtomOrAldehyde(atom) || (numberOfSubstitutionsRequired == 1 && atom == preferredAtom)) {
1096
+ int currentExpectedValency = atom.determineValency(takeIntoAccountOutValency);
1097
+ int usedValency = atom.getIncomingValency() + (atom.hasSpareValency() ? 1 : 0) + (takeIntoAccountOutValency ? atom.getOutValency() : 0);
1098
+ int timesAtomCanBeSubstituted = ((currentExpectedValency - usedValency)/ bondOrder);
1099
+ for (int j = 1; j <= timesAtomCanBeSubstituted; j++) {
1100
+ substitutableAtoms.add(atom);
1101
+ }
1102
+ }
1103
+ }
1104
+ if (substitutableAtoms.size() >= numberOfSubstitutionsRequired){
1105
+ return substitutableAtoms;
1106
+ }
1107
+ if (preserveValency) {
1108
+ return null;
1109
+ }
1110
+ substitutableAtoms.clear();
1111
+
1112
+ for (int i = 0; i < atomCount; i++) {//aromaticity preserved, any sensible valency allowed, anything substitutable
1113
+ Atom atom = atoms.next();
1114
+ Integer maximumValency = ValencyChecker.getMaximumValency(atom);
1115
+ if (maximumValency != null) {
1116
+ int usedValency = atom.getIncomingValency() + (atom.hasSpareValency() ? 1 : 0) + (takeIntoAccountOutValency ? atom.getOutValency() : 0);
1117
+ int timesAtomCanBeSubstituted = ((maximumValency - usedValency)/ bondOrder);
1118
+ for (int j = 1; j <= timesAtomCanBeSubstituted; j++) {
1119
+ substitutableAtoms.add(atom);
1120
+ }
1121
+ }
1122
+ else{
1123
+ for (int j = 0; j < numberOfSubstitutionsRequired; j++) {
1124
+ substitutableAtoms.add(atom);
1125
+ }
1126
+ }
1127
+ }
1128
+ if (substitutableAtoms.size() >= numberOfSubstitutionsRequired){
1129
+ return substitutableAtoms;
1130
+ }
1131
+ substitutableAtoms.clear();
1132
+
1133
+ for (int i = 0; i < atomCount; i++) {//aromaticity dropped, any sensible valency allowed, anything substitutable
1134
+ Atom atom = atoms.next();
1135
+ Integer maximumValency = ValencyChecker.getMaximumValency(atom);
1136
+ if (maximumValency != null) {
1137
+ int usedValency = atom.getIncomingValency() + (takeIntoAccountOutValency ? atom.getOutValency() : 0);
1138
+ int timesAtomCanBeSubstituted = ((maximumValency - usedValency)/ bondOrder);
1139
+ for (int j = 1; j <= timesAtomCanBeSubstituted; j++) {
1140
+ substitutableAtoms.add(atom);
1141
+ }
1142
+ }
1143
+ else {
1144
+ for (int j = 0; j < numberOfSubstitutionsRequired; j++) {
1145
+ substitutableAtoms.add(atom);
1146
+ }
1147
+ }
1148
+ }
1149
+ if (substitutableAtoms.size() >= numberOfSubstitutionsRequired){
1150
+ return substitutableAtoms;
1151
+ }
1152
+ return null;
1153
+ }
1154
+
1155
+ private static int getTimesElementaryAtomCanBeSubstituted(Atom atom) {
1156
+ Integer oxidationNumber = atom.getProperty(Atom.OXIDATION_NUMBER);//explicitly set oxidation state
1157
+ if (oxidationNumber == null) {
1158
+ String oxidationStates = atom.getFrag().getTokenEl().getAttributeValue(COMMONOXIDATIONSTATESANDMAX_ATR);//properties of this element
1159
+ if (oxidationStates != null) {
1160
+ String[] commonOxidationStates = oxidationStates.split(":")[0].split(",");
1161
+ //highest common oxidation state
1162
+ oxidationNumber = Integer.parseInt(commonOxidationStates[commonOxidationStates.length - 1]);
1163
+ }
1164
+ else {
1165
+ oxidationNumber = 0;
1166
+ }
1167
+ }
1168
+
1169
+ int usedValency = atom.getIncomingValency();
1170
+ return (oxidationNumber > usedValency) ? oxidationNumber - usedValency : 0;
1171
+ }
1172
+
1173
+ static List<Atom> findnAtomsForSubstitution(List<Atom> atomList, Atom preferredAtom, int numberOfSubstitutionsRequired, int bondOrder, boolean takeIntoAccountOutValency) {
1174
+ return findnAtomsForSubstitution(atomList, preferredAtom, numberOfSubstitutionsRequired, bondOrder, takeIntoAccountOutValency, false);
1175
+ }
1176
+
1177
+ static List<Atom> findnAtomsForSubstitution(Fragment frag, Atom preferredAtom, int numberOfSubstitutionsRequired, int bondOrder, boolean takeIntoAccountOutValency) {
1178
+ return findnAtomsForSubstitution(frag.getAtomList(), preferredAtom, numberOfSubstitutionsRequired, bondOrder, takeIntoAccountOutValency);
1179
+ }
1180
+
1181
+ /**
1182
+ * Returns a list of atoms of size >= numberOfSubstitutionsDesired (or null if this not possible)
1183
+ * An atom must have have sufficient valency to support a substituent requiring a bond of order bondOrder
1184
+ * If an atom can support multiple substituents it will appear in the list multiple times
1185
+ * This method iterates over the the fragment atoms attempting to fulfil these requirements with incrementally more lenient constraints:
1186
+ * aromaticity preserved, standard valency assumed, characteristic atoms ignored
1187
+ * aromaticity preserved, standard valency assumed, functional suffixes ignored
1188
+ * aromaticity preserved, any sensible valency allowed, anything substitutable
1189
+ * aromaticity dropped, any sensible valency allowed, anything substitutable
1190
+ *
1191
+ * Iteration starts from the defaultInAtom (if applicable, else the first atom) i.e. the defaultInAtom if substitutable will be the first atom in the list
1192
+ * @param frag
1193
+ * @param numberOfSubstitutionsRequired
1194
+ * @param bondOrder
1195
+ * @return
1196
+ */
1197
+ static List<Atom> findnAtomsForSubstitution(Fragment frag, int numberOfSubstitutionsRequired, int bondOrder) {
1198
+ return findnAtomsForSubstitution(frag.getAtomList(), frag.getDefaultInAtom(), numberOfSubstitutionsRequired, bondOrder, true);
1199
+ }
1200
+
1201
+ /**
1202
+ * Returns a list of the most preferable atoms for substitution (empty list if none are)
1203
+ * An atom must have have sufficient valency to support a substituent requiring a bond of order bondOrder
1204
+ * If an atom can support multiple substituents it will appear in the list multiple times
1205
+ * This method iterates over the the fragment atoms attempting to fulfil these requirements with incrementally more lenient constraints:
1206
+ * aromaticity preserved, standard valency assumed, characteristic atoms ignored
1207
+ * aromaticity preserved, standard valency assumed, functional suffixes ignored
1208
+ * aromaticity preserved, any sensible valency allowed, anything substitutable
1209
+ * aromaticity dropped, any sensible valency allowed, anything substitutable
1210
+ *
1211
+ * Iteration starts from the defaultInAtom (if applicable, else the first atom) i.e. the defaultInAtom if substitutable will be the first atom in the list
1212
+ * @param frag
1213
+ * @param bondOrder
1214
+ * @return
1215
+ */
1216
+ static List<Atom> findSubstituableAtoms(Fragment frag, int bondOrder) {
1217
+ List<Atom> potentialAtoms = findnAtomsForSubstitution(frag, 1, bondOrder);
1218
+ if (potentialAtoms == null) {
1219
+ return Collections.emptyList();
1220
+ }
1221
+ return potentialAtoms;
1222
+ }
1223
+
1224
+ static Atom lastNonSuffixCarbonWithSufficientValency(Fragment conjunctiveFragment) {
1225
+ List<Atom> atomList = conjunctiveFragment.getAtomList();
1226
+ for (int i = atomList.size()-1; i >=0; i--) {
1227
+ Atom a = atomList.get(i);
1228
+ if (a.getType().equals(SUFFIX_TYPE_VAL)){
1229
+ continue;
1230
+ }
1231
+ if (a.getElement() != ChemEl.C){
1232
+ continue;
1233
+ }
1234
+ if (ValencyChecker.checkValencyAvailableForBond(a, 1)){
1235
+ return a;
1236
+ }
1237
+ }
1238
+ return null;
1239
+ }
1240
+
1241
+
1242
+ }
TransAntivirus/download_pubchem/opsin-master/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/FunctionalAtom.java ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ package uk.ac.cam.ch.wwmm.opsin;
2
+
3
+ /**
4
+ * Struct for a FunctionalAtom. As expected holds the atom.
5
+ * This is used to indicate, for example, that this atom may form an ester
6
+ *
7
+ * @author dl387
8
+ *
9
+ */
10
+ class FunctionalAtom {
11
+ private final Atom atom;
12
+
13
+ FunctionalAtom(Atom atom) {
14
+ this.atom = atom;
15
+ }
16
+
17
+ Atom getAtom() {
18
+ return atom;
19
+ }
20
+ }
TransAntivirus/download_pubchem/opsin-master/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/FunctionalReplacement.java ADDED
@@ -0,0 +1,1176 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ package uk.ac.cam.ch.wwmm.opsin;
2
+
3
+ import static uk.ac.cam.ch.wwmm.opsin.XmlDeclarations.*;
4
+
5
+ import java.util.ArrayList;
6
+ import java.util.Collections;
7
+ import java.util.Comparator;
8
+ import java.util.Iterator;
9
+ import java.util.LinkedHashSet;
10
+ import java.util.LinkedList;
11
+ import java.util.List;
12
+ import java.util.Set;
13
+ import java.util.regex.Pattern;
14
+
15
+ /**
16
+ * Methods for performing functional replacement
17
+ * @author dl387
18
+ *
19
+ */
20
+ class FunctionalReplacement {
21
+
22
+ /**
23
+ * Sorts infix transformations by the number of acceptable inputs for the transformation.
24
+ * e.g. thio ends up towards the end of the list as it accepts both -O or =O whilst say imido only accepts =O
25
+ * @author dl387
26
+ *
27
+ */
28
+ private static class SortInfixTransformations implements Comparator<String> {
29
+ public int compare(String infixTransformation1, String infixTransformation2) {
30
+ int allowedInputs1 = infixTransformation1.split(",").length;
31
+ int allowedInputs2 = infixTransformation2.split(",").length;
32
+ if (allowedInputs1 < allowedInputs2){//infixTransformation1 preferred
33
+ return -1;
34
+ }
35
+ if (allowedInputs1 > allowedInputs2){//infixTransformation2 preferred
36
+ return 1;
37
+ }
38
+ else{
39
+ return 0;
40
+ }
41
+ }
42
+ }
43
+ private static enum PREFIX_REPLACEMENT_TYPE{
44
+ chalcogen,//ambiguous
45
+ halideOrPseudoHalide,//only mean functional replacement when applied to non carboxylic acids
46
+ dedicatedFunctionalReplacementPrefix,//no ambiguity exists
47
+ hydrazono,//ambiguous, only applies to non carboxylic acid
48
+ peroxy//ambiguous, also applies to etheric oxygen
49
+ }
50
+
51
+ static final Pattern matchChalcogenReplacement= Pattern.compile("thio|seleno|telluro");
52
+
53
+ private final BuildState state;
54
+
55
+ FunctionalReplacement(BuildState state) {
56
+ this.state = state;
57
+ }
58
+
59
+ /**
60
+ * Applies the effects of acid replacing functional class nomenclature
61
+ * This must be performed early so that prefix/infix functional replacement is performed correctly
62
+ * and so that element symbol locants are assigned appropriately
63
+ * @param finalSubOrRootInWord
64
+ * @param word
65
+ * @throws ComponentGenerationException
66
+ * @throws StructureBuildingException
67
+ */
68
+ void processAcidReplacingFunctionalClassNomenclature(Element finalSubOrRootInWord, Element word) throws ComponentGenerationException, StructureBuildingException {
69
+ Element wordRule = OpsinTools.getParentWordRule(word);
70
+ if (WordRule.valueOf(wordRule.getAttributeValue(WORDRULE_ATR)) == WordRule.acidReplacingFunctionalGroup){
71
+ Element parentWordRule = word.getParent();
72
+ if (parentWordRule.indexOf(word)==0){
73
+ for (int i = 1, l = parentWordRule.getChildCount(); i < l ; i++) {
74
+ Element acidReplacingWord = parentWordRule.getChild(i);
75
+ if (!acidReplacingWord.getName().equals(WORD_EL)) {
76
+ throw new RuntimeException("OPSIN bug: problem with acidReplacingFunctionalGroup word rule");
77
+ }
78
+ String type = acidReplacingWord.getAttributeValue(TYPE_ATR);
79
+ if (type.equals(WordType.full.toString())) {
80
+ //case where functionalTerm is substituted
81
+ //as words are processed from right to left in cases like phosphoric acid tri(ethylamide) this will be phosphoric acid ethylamide ethylamide ethylamide
82
+ processAcidReplacingFunctionalClassNomenclatureFullWord(finalSubOrRootInWord, acidReplacingWord);
83
+ }
84
+ else if (type.equals(WordType.functionalTerm.toString())) {
85
+ processAcidReplacingFunctionalClassNomenclatureFunctionalWord(finalSubOrRootInWord, acidReplacingWord);
86
+ }
87
+ else {
88
+ throw new RuntimeException("OPSIN bug: problem with acidReplacingFunctionalGroup word rule");
89
+ }
90
+ }
91
+ }
92
+ }
93
+ }
94
+
95
+ /**
96
+ * Performs prefix functional replacement e.g. thio in thioacetic acid replaces an O with S
97
+ * Prefixes will present themselves as substituents. There is potential ambiguity between usage as a substituent
98
+ * and as a functional replacement term in some cases. If the substituent is deemed to indicate functional replacement
99
+ * it will be detached and its effects applied to the subsequent group
100
+ *
101
+ * The list of groups and substituents given to this method will be mutated in the process.
102
+ *
103
+ * For heterocyclic rings functional replacement should technically be limited to :
104
+ * pyran, morpholine, chromene, isochromene and xanthene, chromane and isochromane.
105
+ * but this is not currently enforced
106
+ * @param groups
107
+ * @param substituents
108
+ * @return boolean: has any functional replacement occurred
109
+ * @throws StructureBuildingException
110
+ * @throws ComponentGenerationException
111
+ */
112
+ boolean processPrefixFunctionalReplacementNomenclature(List<Element> groups, List<Element> substituents) throws StructureBuildingException, ComponentGenerationException {
113
+ int originalNumberOfGroups = groups.size();
114
+ for (int i = originalNumberOfGroups-1; i >=0; i--) {
115
+ Element group =groups.get(i);
116
+ String groupValue = group.getValue();
117
+ PREFIX_REPLACEMENT_TYPE replacementType = null;
118
+ if (matchChalcogenReplacement.matcher(groupValue).matches() && !isChalcogenSubstituent(group) || groupValue.equals("thiono")){
119
+ replacementType =PREFIX_REPLACEMENT_TYPE.chalcogen;
120
+ }
121
+ else if (HALIDEORPSEUDOHALIDE_SUBTYPE_VAL.equals(group.getAttributeValue(SUBTYPE_ATR))){
122
+ replacementType =PREFIX_REPLACEMENT_TYPE.halideOrPseudoHalide;
123
+ }
124
+ else if (DEDICATEDFUNCTIONALREPLACEMENTPREFIX_SUBTYPE_VAL.equals(group.getAttributeValue(SUBTYPE_ATR))){
125
+ replacementType =PREFIX_REPLACEMENT_TYPE.dedicatedFunctionalReplacementPrefix;
126
+ }
127
+ else if (groupValue.equals("hydrazono")){
128
+ replacementType =PREFIX_REPLACEMENT_TYPE.hydrazono;
129
+ }
130
+ else if (groupValue.equals("peroxy")){
131
+ replacementType =PREFIX_REPLACEMENT_TYPE.peroxy;
132
+ }
133
+ if (replacementType != null) {
134
+ //need to check whether this is an instance of functional replacement by checking the substituent/root it is applying to
135
+ Element substituent = group.getParent();
136
+ Element nextSubOrBracket = OpsinTools.getNextSibling(substituent);
137
+ if (nextSubOrBracket!=null && (nextSubOrBracket.getName().equals(ROOT_EL) || nextSubOrBracket.getName().equals(SUBSTITUENT_EL))){
138
+ Element groupToBeModified = nextSubOrBracket.getFirstChildElement(GROUP_EL);
139
+ if (groupPrecededByElementThatBlocksPrefixReplacementInterpetation(groupToBeModified)) {
140
+ if (replacementType == PREFIX_REPLACEMENT_TYPE.dedicatedFunctionalReplacementPrefix){
141
+ throw new ComponentGenerationException("dedicated Functional Replacement Prefix used in an inappropriate position :" + groupValue);
142
+ }
143
+ continue;//not 2,2'-thiodipyran
144
+ }
145
+ Element locantEl = null;//null unless a locant that agrees with the multiplier is present
146
+ Element multiplierEl = null;
147
+ int numberOfAtomsToReplace = 1;//the number of atoms to be functionally replaced, modified by a multiplier e.g. dithio
148
+ Element possibleMultiplier = OpsinTools.getPreviousSibling(group);
149
+ if (possibleMultiplier != null) {
150
+ Element possibleLocant;
151
+ if (possibleMultiplier.getName().equals(MULTIPLIER_EL)) {
152
+ numberOfAtomsToReplace = Integer.valueOf(possibleMultiplier.getAttributeValue(VALUE_ATR));
153
+ possibleLocant = OpsinTools.getPreviousSibling(possibleMultiplier);
154
+ multiplierEl = possibleMultiplier;
155
+ }
156
+ else{
157
+ possibleLocant = possibleMultiplier;
158
+ }
159
+ if (possibleLocant !=null && possibleLocant.getName().equals(LOCANT_EL) && possibleLocant.getAttribute(TYPE_ATR) == null) {
160
+ int numberOfLocants = possibleLocant.getValue().split(",").length;
161
+ if (numberOfLocants == numberOfAtomsToReplace){//locants and number of replacements agree
162
+ locantEl = possibleLocant;
163
+ }
164
+ else if (numberOfAtomsToReplace > 1) {//doesn't look like prefix functional replacement
165
+ if (replacementType == PREFIX_REPLACEMENT_TYPE.dedicatedFunctionalReplacementPrefix){
166
+ throw new ComponentGenerationException("dedicated Functional Replacement Prefix used in an inappropriate position :" + groupValue);
167
+ }
168
+ continue;
169
+ }
170
+ }
171
+ }
172
+
173
+ int oxygenReplaced;
174
+ if (replacementType == PREFIX_REPLACEMENT_TYPE.chalcogen) {
175
+ oxygenReplaced = performChalcogenFunctionalReplacement(groupToBeModified, locantEl, numberOfAtomsToReplace, group.getAttributeValue(VALUE_ATR));
176
+ }
177
+ else if (replacementType == PREFIX_REPLACEMENT_TYPE.peroxy) {
178
+ if (nextSubOrBracket.getName().equals(SUBSTITUENT_EL)) {
179
+ continue;
180
+ }
181
+ oxygenReplaced = performPeroxyFunctionalReplacement(groupToBeModified, locantEl, numberOfAtomsToReplace);
182
+ }
183
+ else if (replacementType == PREFIX_REPLACEMENT_TYPE.dedicatedFunctionalReplacementPrefix){
184
+ if (!groupToBeModified.getAttributeValue(TYPE_ATR).equals(NONCARBOXYLICACID_TYPE_VAL)
185
+ && !(groupToBeModified.getValue().equals("form") && groupValue.equals("imido"))){
186
+ throw new ComponentGenerationException("dedicated Functional Replacement Prefix used in an inappropriate position :" + groupValue);
187
+ }
188
+ oxygenReplaced = performFunctionalReplacementOnAcid(groupToBeModified, locantEl, numberOfAtomsToReplace, group.getAttributeValue(VALUE_ATR));
189
+ if (oxygenReplaced==0){
190
+ throw new ComponentGenerationException("dedicated Functional Replacement Prefix used in an inappropriate position :" + groupValue);
191
+ }
192
+ }
193
+ else if (replacementType == PREFIX_REPLACEMENT_TYPE.hydrazono || replacementType == PREFIX_REPLACEMENT_TYPE.halideOrPseudoHalide){
194
+ Fragment acidFrag = groupToBeModified.getFrag();
195
+ if (!groupToBeModified.getAttributeValue(TYPE_ATR).equals(NONCARBOXYLICACID_TYPE_VAL) ||
196
+ acidHasSufficientHydrogenForSubstitutionInterpretation(acidFrag, group.getFrag().getOutAtom(0).getValency(), locantEl)){
197
+ //hydrazono replacement only applies to non carboxylic acids e.g. hydrazonooxalic acid
198
+ //need to be careful to note that something like chlorophosphonic acid isn't functional replacement
199
+ continue;
200
+ }
201
+ oxygenReplaced = performFunctionalReplacementOnAcid(groupToBeModified, locantEl, numberOfAtomsToReplace, group.getAttributeValue(VALUE_ATR));
202
+ }
203
+ else{
204
+ throw new StructureBuildingException("OPSIN bug: Unexpected prefix replacement type");
205
+ }
206
+ if (oxygenReplaced>0){
207
+ state.fragManager.removeFragment(group.getFrag());
208
+ substituent.removeChild(group);
209
+ groups.remove(group);
210
+ List<Element> remainingChildren =substituent.getChildElements();//there may be a locant that should be moved
211
+ for (int j = remainingChildren.size()-1; j>=0; j--){
212
+ Element child =substituent.getChild(j);
213
+ child.detach();
214
+ nextSubOrBracket.insertChild(child, 0);
215
+ }
216
+ substituents.remove(substituent);
217
+ substituent.detach();
218
+ if (oxygenReplaced>1){
219
+ multiplierEl.detach();
220
+ }
221
+ }
222
+ }
223
+ else if (replacementType == PREFIX_REPLACEMENT_TYPE.dedicatedFunctionalReplacementPrefix){
224
+ throw new ComponentGenerationException("dedicated Functional Replacement Prefix used in an inappropriate position :" + groupValue);
225
+ }
226
+ }
227
+ }
228
+ return groups.size() != originalNumberOfGroups;
229
+ }
230
+
231
+ private boolean isChalcogenSubstituent(Element group) {
232
+ //Is this group followed by a hyphen and directly preceded by a substituent i.e. no multiplier/locant
233
+ //e.g. methylthio-
234
+ Element next = OpsinTools.getNextSibling(group);
235
+ if (next != null && next.getName().equals(HYPHEN_EL) &&
236
+ OpsinTools.getPreviousSibling(group) == null) {
237
+ Element previousGroup = OpsinTools.getPreviousGroup(group);
238
+ if (previousGroup != null) {
239
+ //TODO We actually want to know if a carbon atom is the attachment point... but we don't know the attachment point locations at this point
240
+ Element suffix = OpsinTools.getNextSibling(previousGroup, SUFFIX_EL);
241
+ if (suffix == null || suffix.getFrag() == null) {
242
+ for (Atom a : previousGroup.getFrag()) {
243
+ if (a.getElement() == ChemEl.C) {
244
+ return true;
245
+ }
246
+ }
247
+ }
248
+ }
249
+ }
250
+ return false;
251
+ }
252
+
253
+ /**
254
+ * Currently prefix replacement terms must be directly adjacent to the groupToBeModified with an exception made
255
+ * for carbohydrate stereochemistry prefixes e.g. 'gluco' and for substractive prefixes e.g. 'deoxy'
256
+ * @param groupToBeModified
257
+ * @return
258
+ */
259
+ private boolean groupPrecededByElementThatBlocksPrefixReplacementInterpetation(Element groupToBeModified) {
260
+ Element previous = OpsinTools.getPreviousSibling(groupToBeModified);
261
+ while (previous !=null && (previous.getName().equals(SUBTRACTIVEPREFIX_EL)
262
+ || (previous.getName().equals(STEREOCHEMISTRY_EL) && previous.getAttributeValue(TYPE_ATR).equals(CARBOHYDRATECONFIGURATIONPREFIX_TYPE_VAL)))){
263
+ previous = OpsinTools.getPreviousSibling(previous);
264
+ }
265
+ return previous != null;
266
+ }
267
+
268
+
269
+ /*
270
+ *
271
+ */
272
+
273
+ /**
274
+ * Performs functional replacement using infixes e.g. thio in ethanthioic acid replaces an O with S
275
+ * @param suffixFragments May be modified if a multiplier is determined to mean multiplication of a suffix, usually untouched
276
+ * @param suffixes The suffix elements May be modified if a multiplier is determined to mean multiplication of a suffix, usually untouched
277
+ * @throws StructureBuildingException
278
+ * @throws ComponentGenerationException
279
+ */
280
+ void processInfixFunctionalReplacementNomenclature(List<Element> suffixes, List<Fragment> suffixFragments) throws StructureBuildingException, ComponentGenerationException {
281
+ for (int i = 0; i < suffixes.size(); i++) {
282
+ Element suffix = suffixes.get(i);
283
+ if (suffix.getAttribute(INFIX_ATR) != null){
284
+ Fragment fragToApplyInfixTo = suffix.getFrag();
285
+ Element possibleAcidGroup = OpsinTools.getPreviousSiblingIgnoringCertainElements(suffix, new String[]{MULTIPLIER_EL, INFIX_EL, SUFFIX_EL});
286
+ if (possibleAcidGroup !=null && possibleAcidGroup.getName().equals(GROUP_EL) &&
287
+ (possibleAcidGroup.getAttributeValue(TYPE_ATR).equals(NONCARBOXYLICACID_TYPE_VAL)|| possibleAcidGroup.getAttributeValue(TYPE_ATR).equals(CHALCOGENACIDSTEM_TYPE_VAL))){
288
+ fragToApplyInfixTo = possibleAcidGroup.getFrag();
289
+ }
290
+ if (fragToApplyInfixTo ==null){
291
+ throw new ComponentGenerationException("infix has erroneously been assigned to a suffix which does not correspond to a suffix fragment. suffix: " + suffix.getValue());
292
+ }
293
+ //e.g. =O:S,-O:S (which indicates replacing either a double or single bonded oxygen with S)
294
+ //This is semicolon delimited for each infix
295
+ List<String> infixTransformations = StringTools.arrayToList(suffix.getAttributeValue(INFIX_ATR).split(";"));
296
+
297
+ List<Atom> atomList =fragToApplyInfixTo.getAtomList();
298
+ LinkedList<Atom> singleBondedOxygen = new LinkedList<>();
299
+ LinkedList<Atom> doubleBondedOxygen = new LinkedList<>();
300
+ populateTerminalSingleAndDoubleBondedOxygen(atomList, singleBondedOxygen, doubleBondedOxygen);
301
+ int oxygenAvailable = singleBondedOxygen.size() +doubleBondedOxygen.size();
302
+
303
+ /*
304
+ * Modifies suffixes, suffixFragments, suffix and infixTransformations as appropriate
305
+ */
306
+ disambiguateMultipliedInfixMeaning(suffixes, suffixFragments, suffix, infixTransformations, oxygenAvailable);
307
+
308
+ /*
309
+ * Sort infixTransformations so more specific transformations are performed first
310
+ * e.g. ethanthioimidic acid-->ethanimidthioic acid as imid can only apply to the double bonded oxygen
311
+ */
312
+ Collections.sort(infixTransformations, new SortInfixTransformations());
313
+
314
+ for (String infixTransformation : infixTransformations) {
315
+ String[] transformationArray = infixTransformation.split(":");
316
+ if (transformationArray.length !=2){
317
+ throw new StructureBuildingException("Atom to be replaced and replacement not specified correctly in infix: " + infixTransformation);
318
+ }
319
+ String[] transformations = transformationArray[0].split(",");
320
+ String replacementSMILES = transformationArray[1];
321
+ boolean acceptDoubleBondedOxygen = false;
322
+ boolean acceptSingleBondedOxygen = false;
323
+ boolean nitrido =false;
324
+ for (String transformation : transformations) {
325
+ if (transformation.startsWith("=")){
326
+ acceptDoubleBondedOxygen = true;
327
+ }
328
+ else if (transformation.startsWith("-")){
329
+ acceptSingleBondedOxygen = true;
330
+ }
331
+ else if (transformation.startsWith("#")){
332
+ nitrido =true;
333
+ }
334
+ else{
335
+ throw new StructureBuildingException("Malformed infix transformation. Expected to start with either - or =. Transformation was: " +transformation);
336
+ }
337
+ if (transformation.length()<2 || transformation.charAt(1)!='O'){
338
+ throw new StructureBuildingException("Only replacement by oxygen is supported. Check infix defintions");
339
+ }
340
+ }
341
+ boolean infixAssignmentAmbiguous =false;
342
+ if ((acceptSingleBondedOxygen ||nitrido) && !acceptDoubleBondedOxygen){
343
+ if (singleBondedOxygen.size() ==0){
344
+ throw new StructureBuildingException("Cannot find single bonded oxygen for infix with SMILES: "+ replacementSMILES+ " to modify!");
345
+ }
346
+ if (singleBondedOxygen.size() !=1){
347
+ infixAssignmentAmbiguous=true;
348
+ }
349
+ }
350
+ if (!acceptSingleBondedOxygen && (acceptDoubleBondedOxygen || nitrido)){
351
+ if (doubleBondedOxygen.size()==0){
352
+ throw new StructureBuildingException("Cannot find double bonded oxygen for infix with SMILES: "+ replacementSMILES+ " to modify!");
353
+ }
354
+ if (doubleBondedOxygen.size() != 1){
355
+ infixAssignmentAmbiguous=true;
356
+ }
357
+ }
358
+ if (acceptSingleBondedOxygen && acceptDoubleBondedOxygen){
359
+ if (oxygenAvailable ==0){
360
+ throw new StructureBuildingException("Cannot find oxygen for infix with SMILES: "+ replacementSMILES+ " to modify!");
361
+ }
362
+ if (oxygenAvailable !=1){
363
+ infixAssignmentAmbiguous=true;
364
+ }
365
+ }
366
+
367
+ Set<Atom> ambiguousElementAtoms = new LinkedHashSet<>();
368
+ Atom atomToUse = null;
369
+ if ((acceptDoubleBondedOxygen || nitrido) && doubleBondedOxygen.size()>0 ){
370
+ atomToUse = doubleBondedOxygen.removeFirst();
371
+ }
372
+ else if (acceptSingleBondedOxygen && singleBondedOxygen.size()>0 ){
373
+ atomToUse = singleBondedOxygen.removeFirst();
374
+ }
375
+ else{
376
+ throw new StructureBuildingException("Cannot find oxygen for infix with SMILES: "+ replacementSMILES+ " to modify!");//this would be a bug
377
+ }
378
+ Fragment replacementFrag = state.fragManager.buildSMILES(replacementSMILES, SUFFIX_TYPE_VAL, NONE_LABELS_VAL);
379
+ if (replacementFrag.getOutAtomCount()>0){//SMILES include an indication of the bond order the replacement fragment will have, this is not intended to be an outatom
380
+ replacementFrag.removeOutAtom(0);
381
+ }
382
+ Atom atomThatWillReplaceOxygen =replacementFrag.getFirstAtom();
383
+ if (replacementFrag.getAtomCount()==1 && atomThatWillReplaceOxygen.getElement().isChalcogen()){
384
+ atomThatWillReplaceOxygen.setCharge(atomToUse.getCharge());
385
+ atomThatWillReplaceOxygen.setProtonsExplicitlyAddedOrRemoved(atomToUse.getProtonsExplicitlyAddedOrRemoved());
386
+ }
387
+ removeOrMoveObsoleteFunctionalAtoms(atomToUse, replacementFrag);//also will move charge if necessary
388
+ moveObsoleteOutAtoms(atomToUse, replacementFrag);//if the replaced atom was an outatom the fragments outatom list need to be corrected
389
+ if (nitrido){
390
+ atomToUse.getFirstBond().setOrder(3);
391
+ Atom removedHydroxy = singleBondedOxygen.removeFirst();
392
+ state.fragManager.removeAtomAndAssociatedBonds(removedHydroxy);
393
+ removeAssociatedFunctionalAtom(removedHydroxy);
394
+ }
395
+ state.fragManager.incorporateFragment(replacementFrag, atomToUse.getFrag());
396
+ state.fragManager.replaceAtomWithAnotherAtomPreservingConnectivity(atomToUse, atomThatWillReplaceOxygen);
397
+ if (infixAssignmentAmbiguous){
398
+ ambiguousElementAtoms.add(atomThatWillReplaceOxygen);
399
+ if (atomThatWillReplaceOxygen.getProperty(Atom.AMBIGUOUS_ELEMENT_ASSIGNMENT)!=null){
400
+ ambiguousElementAtoms.addAll(atomThatWillReplaceOxygen.getProperty(Atom.AMBIGUOUS_ELEMENT_ASSIGNMENT));
401
+ }
402
+ }
403
+ if (infixAssignmentAmbiguous){//record what atoms could have been replaced. Often this ambiguity is resolved later e.g. S-methyl ethanthioate
404
+ for (Atom a : doubleBondedOxygen) {
405
+ ambiguousElementAtoms.add(a);
406
+ if (a.getProperty(Atom.AMBIGUOUS_ELEMENT_ASSIGNMENT)!=null){
407
+ ambiguousElementAtoms.addAll(a.getProperty(Atom.AMBIGUOUS_ELEMENT_ASSIGNMENT));
408
+ }
409
+ }
410
+ for (Atom a : singleBondedOxygen) {
411
+ ambiguousElementAtoms.add(a);
412
+ if (a.getProperty(Atom.AMBIGUOUS_ELEMENT_ASSIGNMENT)!=null){
413
+ ambiguousElementAtoms.addAll(a.getProperty(Atom.AMBIGUOUS_ELEMENT_ASSIGNMENT));
414
+ }
415
+ }
416
+ for (Atom atom : ambiguousElementAtoms) {
417
+ atom.setProperty(Atom.AMBIGUOUS_ELEMENT_ASSIGNMENT, ambiguousElementAtoms);
418
+ }
419
+ }
420
+ }
421
+ }
422
+ }
423
+ }
424
+
425
+ /*
426
+ * Functional class nomenclature
427
+ */
428
+
429
+ /**
430
+ * Replaces the appropriate number of functional oxygen atoms with the corresponding fragment
431
+ * @param acidContainingRoot
432
+ * @param acidReplacingWord
433
+ * @throws ComponentGenerationException
434
+ * @throws StructureBuildingException
435
+ */
436
+ private void processAcidReplacingFunctionalClassNomenclatureFullWord(Element acidContainingRoot, Element acidReplacingWord) throws ComponentGenerationException, StructureBuildingException {
437
+ String locant = acidReplacingWord.getAttributeValue(LOCANT_ATR);
438
+ Element acidReplacingGroup = StructureBuildingMethods.findRightMostGroupInBracket(acidReplacingWord);
439
+ if (acidReplacingGroup ==null){
440
+ throw new ComponentGenerationException("OPSIN bug: acid replacing group not found where one was expected for acidReplacingFunctionalGroup wordRule");
441
+ }
442
+ String functionalGroupName = acidReplacingGroup.getValue();
443
+ Fragment acidReplacingFrag = acidReplacingGroup.getFrag();
444
+ if (acidReplacingGroup.getParent().getChildCount() != 1){
445
+ throw new ComponentGenerationException("Unexpected qualifier to: " + functionalGroupName);
446
+ }
447
+
448
+ Element groupToBeModified = acidContainingRoot.getFirstChildElement(GROUP_EL);
449
+ List<Atom> oxygenAtoms = findFunctionalOxygenAtomsInApplicableSuffixes(groupToBeModified);
450
+ if (oxygenAtoms.size() == 0){
451
+ oxygenAtoms = findFunctionalOxygenAtomsInGroup(groupToBeModified);
452
+ }
453
+ if (oxygenAtoms.size() == 0){
454
+ List<Element> conjunctiveSuffixElements =OpsinTools.getNextSiblingsOfType(groupToBeModified, CONJUNCTIVESUFFIXGROUP_EL);
455
+ for (Element conjunctiveSuffixElement : conjunctiveSuffixElements) {
456
+ oxygenAtoms.addAll(findFunctionalOxygenAtomsInGroup(conjunctiveSuffixElement));
457
+ }
458
+ }
459
+ if (oxygenAtoms.size() < 1){
460
+ throw new ComponentGenerationException("Insufficient oxygen to replace with " + functionalGroupName +"s in " + acidContainingRoot.getFirstChildElement(GROUP_EL).getValue());
461
+ }
462
+
463
+ boolean isAmide = functionalGroupName.equals("amide") || functionalGroupName.equals("amid");
464
+ if (isAmide) {
465
+ if (acidReplacingFrag.getAtomCount()!=1){
466
+ throw new ComponentGenerationException("OPSIN bug: " + functionalGroupName + " not found where expected");
467
+ }
468
+ Atom amideNitrogen = acidReplacingFrag.getFirstAtom();
469
+ amideNitrogen.neutraliseCharge();
470
+ amideNitrogen.clearLocants();
471
+ acidReplacingFrag.addMappingToAtomLocantMap("N", amideNitrogen);
472
+ }
473
+ Atom chosenOxygen = locant != null ? removeOxygenWithAppropriateLocant(oxygenAtoms, locant) : oxygenAtoms.get(0);
474
+ state.fragManager.replaceAtomWithAnotherAtomPreservingConnectivity(chosenOxygen, acidReplacingFrag.getFirstAtom());
475
+ removeAssociatedFunctionalAtom(chosenOxygen);
476
+ }
477
+
478
+
479
+ /**
480
+ * Replaces the appropriate number of functional oxygen atoms with the corresponding fragment
481
+ * @param acidContainingRoot
482
+ * @param functionalWord
483
+ * @throws ComponentGenerationException
484
+ * @throws StructureBuildingException
485
+ */
486
+ private void processAcidReplacingFunctionalClassNomenclatureFunctionalWord(Element acidContainingRoot, Element functionalWord) throws ComponentGenerationException, StructureBuildingException {
487
+ if (functionalWord !=null && functionalWord.getAttributeValue(TYPE_ATR).equals(WordType.functionalTerm.toString())){
488
+ Element functionalTerm = functionalWord.getFirstChildElement(FUNCTIONALTERM_EL);
489
+ if (functionalTerm ==null){
490
+ throw new ComponentGenerationException("OPSIN bug: functionalTerm word not found where one was expected for acidReplacingFunctionalGroup wordRule");
491
+ }
492
+ Element acidReplacingGroup = functionalTerm.getFirstChildElement(FUNCTIONALGROUP_EL);
493
+ String functionalGroupName = acidReplacingGroup.getValue();
494
+ Element possibleLocantOrMultiplier = OpsinTools.getPreviousSibling(acidReplacingGroup);
495
+ int numberOfAcidicHydroxysToReplace = 1;
496
+ String[] locants = null;
497
+ if (possibleLocantOrMultiplier != null){
498
+ if (possibleLocantOrMultiplier.getName().equals(MULTIPLIER_EL)){
499
+ numberOfAcidicHydroxysToReplace = Integer.parseInt(possibleLocantOrMultiplier.getAttributeValue(VALUE_ATR));
500
+ possibleLocantOrMultiplier.detach();
501
+ possibleLocantOrMultiplier = OpsinTools.getPreviousSibling(acidReplacingGroup);
502
+ }
503
+ if (possibleLocantOrMultiplier != null){
504
+ if (possibleLocantOrMultiplier.getName().equals(LOCANT_EL)){
505
+ locants = StringTools.removeDashIfPresent(possibleLocantOrMultiplier.getValue()).split(",");
506
+ possibleLocantOrMultiplier.detach();
507
+ }
508
+ else {
509
+ throw new ComponentGenerationException("Unexpected qualifier to acidReplacingFunctionalGroup functionalTerm");
510
+ }
511
+ }
512
+ }
513
+ if (functionalTerm.getChildCount() != 1){
514
+ throw new ComponentGenerationException("Unexpected qualifier to acidReplacingFunctionalGroup functionalTerm");
515
+ }
516
+
517
+ Element groupToBeModified = acidContainingRoot.getFirstChildElement(GROUP_EL);
518
+ List<Atom> oxygenAtoms = findFunctionalOxygenAtomsInApplicableSuffixes(groupToBeModified);
519
+ if (oxygenAtoms.size()==0) {
520
+ oxygenAtoms = findFunctionalOxygenAtomsInGroup(groupToBeModified);
521
+ }
522
+ if (oxygenAtoms.size()==0) {
523
+ List<Element> conjunctiveSuffixElements =OpsinTools.getNextSiblingsOfType(groupToBeModified, CONJUNCTIVESUFFIXGROUP_EL);
524
+ for (Element conjunctiveSuffixElement : conjunctiveSuffixElements) {
525
+ oxygenAtoms.addAll(findFunctionalOxygenAtomsInGroup(conjunctiveSuffixElement));
526
+ }
527
+ }
528
+ if (numberOfAcidicHydroxysToReplace > oxygenAtoms.size()){
529
+ throw new ComponentGenerationException("Insufficient oxygen to replace with nitrogen in " + acidContainingRoot.getFirstChildElement(GROUP_EL).getValue());
530
+ }
531
+ boolean isAmide = functionalGroupName.equals("amide") || functionalGroupName.equals("amid");
532
+ if (isAmide) {
533
+ for (int i = 0; i < numberOfAcidicHydroxysToReplace; i++) {
534
+ Atom functionalOxygenToReplace = locants != null ? removeOxygenWithAppropriateLocant(oxygenAtoms, locants[i]) : oxygenAtoms.get(i);
535
+ removeAssociatedFunctionalAtom(functionalOxygenToReplace);
536
+ functionalOxygenToReplace.setElement(ChemEl.N);
537
+ }
538
+ }
539
+ else{
540
+ String groupValue = acidReplacingGroup.getAttributeValue(VALUE_ATR);
541
+ String labelsValue = acidReplacingGroup.getAttributeValue(LABELS_ATR);
542
+ Fragment acidReplacingFrag = state.fragManager.buildSMILES(groupValue, SUFFIX_TYPE_VAL, labelsValue != null ? labelsValue : NONE_LABELS_VAL);
543
+ Fragment acidFragment = groupToBeModified.getFrag();
544
+ if (acidFragment.hasLocant("2")){//prefer numeric locants on group to those of replacing group
545
+ for (Atom atom : acidReplacingFrag.getAtomList()) {
546
+ atom.clearLocants();
547
+ }
548
+ }
549
+ Atom firstFunctionalOxygenToReplace = locants != null ? removeOxygenWithAppropriateLocant(oxygenAtoms, locants[0]) : oxygenAtoms.get(0);
550
+ state.fragManager.replaceAtomWithAnotherAtomPreservingConnectivity(firstFunctionalOxygenToReplace, acidReplacingFrag.getFirstAtom());
551
+ removeAssociatedFunctionalAtom(firstFunctionalOxygenToReplace);
552
+ for (int i = 1; i < numberOfAcidicHydroxysToReplace; i++) {
553
+ Fragment clonedHydrazide = state.fragManager.copyAndRelabelFragment(acidReplacingFrag, i);
554
+ Atom functionalOxygenToReplace = locants != null ? removeOxygenWithAppropriateLocant(oxygenAtoms, locants[i]) : oxygenAtoms.get(i);
555
+ state.fragManager.replaceAtomWithAnotherAtomPreservingConnectivity(functionalOxygenToReplace, clonedHydrazide.getFirstAtom());
556
+ state.fragManager.incorporateFragment(clonedHydrazide, functionalOxygenToReplace.getFrag());
557
+ removeAssociatedFunctionalAtom(functionalOxygenToReplace);
558
+ }
559
+ state.fragManager.incorporateFragment(acidReplacingFrag, firstFunctionalOxygenToReplace.getFrag());
560
+ }
561
+ }
562
+ else{
563
+ throw new ComponentGenerationException("amide word not found where expected, bug?");
564
+ }
565
+ }
566
+
567
+ private Atom removeOxygenWithAppropriateLocant(List<Atom> oxygenAtoms, String locant) throws ComponentGenerationException {
568
+ for (Iterator<Atom> iterator = oxygenAtoms.iterator(); iterator.hasNext();) {
569
+ Atom atom = iterator.next();
570
+ if (atom.hasLocant(locant)) {
571
+ iterator.remove();
572
+ return atom;
573
+ }
574
+ }
575
+ //Look for the case whether the locant refers to the backbone
576
+ for (Iterator<Atom> iterator = oxygenAtoms.iterator(); iterator.hasNext();) {
577
+ Atom atom = iterator.next();
578
+ if (OpsinTools.depthFirstSearchForNonSuffixAtomWithLocant(atom, locant) != null){
579
+ iterator.remove();
580
+ return atom;
581
+ }
582
+ }
583
+ throw new ComponentGenerationException("Failed to find acid group at locant: " + locant);
584
+ }
585
+
586
+
587
+ /*
588
+ * Prefix functional replacement nomenclature
589
+ */
590
+
591
+
592
+ private boolean acidHasSufficientHydrogenForSubstitutionInterpretation(Fragment acidFrag, int hydrogenRequiredForSubstitutionInterpretation, Element locantEl) {
593
+ List<Atom> atomsThatWouldBeSubstituted = new ArrayList<>();
594
+ if (locantEl !=null){
595
+ String[] possibleLocants = locantEl.getValue().split(",");
596
+ for (String locant : possibleLocants) {
597
+ Atom atomToBeSubstituted = acidFrag.getAtomByLocant(locant);
598
+ if (atomToBeSubstituted !=null){
599
+ atomsThatWouldBeSubstituted.add(atomToBeSubstituted);
600
+ }
601
+ else{
602
+ atomsThatWouldBeSubstituted.clear();
603
+ atomsThatWouldBeSubstituted.add(acidFrag.getDefaultInAtomOrFirstAtom());
604
+ break;
605
+ }
606
+ }
607
+ }
608
+ else{
609
+ atomsThatWouldBeSubstituted.add(acidFrag.getDefaultInAtomOrFirstAtom());
610
+ }
611
+ for (Atom atom : atomsThatWouldBeSubstituted) {
612
+ if (StructureBuildingMethods.calculateSubstitutableHydrogenAtoms(atom) < hydrogenRequiredForSubstitutionInterpretation){
613
+ return false;//insufficient hydrogens for substitution interpretation
614
+ }
615
+ }
616
+ return true;
617
+ }
618
+
619
+ /**
620
+ * Performs replacement of oxygen atoms by chalogen atoms
621
+ * If this is ambiguous e.g. thioacetate then Atom.AMBIGUOUS_ELEMENT_ASSIGNMENT is populated
622
+ * @param groupToBeModified
623
+ * @param locantEl
624
+ * @param numberOfAtomsToReplace
625
+ * @param replacementSmiles
626
+ * @return
627
+ * @throws StructureBuildingException
628
+ */
629
+ private int performChalcogenFunctionalReplacement(Element groupToBeModified, Element locantEl, int numberOfAtomsToReplace, String replacementSmiles) throws StructureBuildingException {
630
+ List<Atom> oxygenAtoms = findOxygenAtomsInApplicableSuffixes(groupToBeModified);
631
+ if (oxygenAtoms.size() == 0) {
632
+ oxygenAtoms = findOxygenAtomsInGroup(groupToBeModified);
633
+ }
634
+ if (locantEl != null) {//locants are used to indicate replacement on trivial groups
635
+ List<Atom> oxygenWithAppropriateLocants = pickOxygensWithAppropriateLocants(locantEl, oxygenAtoms);
636
+ if(oxygenWithAppropriateLocants.size() < numberOfAtomsToReplace) {
637
+ numberOfAtomsToReplace = 1;
638
+ //e.g. -1-thioureidomethyl
639
+ }
640
+ else{
641
+ locantEl.detach();
642
+ oxygenAtoms = oxygenWithAppropriateLocants;
643
+ }
644
+ }
645
+ List<Atom> replaceableAtoms = new ArrayList<>();
646
+ if (replacementSmiles.startsWith("=")) {
647
+ //e.g. thiono
648
+ replacementSmiles = replacementSmiles.substring(1);
649
+ for (Atom oxygen : oxygenAtoms) {
650
+ int incomingValency = oxygen.getIncomingValency();
651
+ int bondCount = oxygen.getBondCount();
652
+ if (bondCount == 1 && incomingValency == 2) {
653
+ replaceableAtoms.add(oxygen);
654
+ }
655
+ }
656
+ }
657
+ else {
658
+ List<Atom> doubleBondedOxygen = new ArrayList<>();
659
+ List<Atom> singleBondedOxygen = new ArrayList<>();
660
+ List<Atom> ethericOxygen = new ArrayList<>();
661
+ for (Atom oxygen : oxygenAtoms) {
662
+ int incomingValency = oxygen.getIncomingValency();
663
+ int bondCount = oxygen.getBondCount();
664
+ if (bondCount == 1 && incomingValency ==2 ) {
665
+ doubleBondedOxygen.add(oxygen);
666
+ }
667
+ else if (bondCount == 1 && incomingValency == 1) {
668
+ singleBondedOxygen.add(oxygen);
669
+ }
670
+ else if (bondCount == 2 && incomingValency == 2) {
671
+ ethericOxygen.add(oxygen);
672
+ }
673
+ }
674
+ replaceableAtoms.addAll(doubleBondedOxygen);
675
+ replaceableAtoms.addAll(singleBondedOxygen);
676
+ replaceableAtoms.addAll(ethericOxygen);
677
+ }
678
+
679
+ int totalOxygen = replaceableAtoms.size();
680
+ if (numberOfAtomsToReplace >1){
681
+ if (totalOxygen < numberOfAtomsToReplace){
682
+ numberOfAtomsToReplace=1;
683
+ }
684
+ }
685
+
686
+ int atomsReplaced =0;
687
+ if (totalOxygen >=numberOfAtomsToReplace){//check that there atleast as many oxygens as requested replacements
688
+ boolean prefixAssignmentAmbiguous =false;
689
+ Set<Atom> ambiguousElementAtoms = new LinkedHashSet<>();
690
+ if (totalOxygen != numberOfAtomsToReplace){
691
+ prefixAssignmentAmbiguous=true;
692
+ }
693
+
694
+ for (Atom atomToReplace : replaceableAtoms) {
695
+ if (atomsReplaced == numberOfAtomsToReplace){
696
+ ambiguousElementAtoms.add(atomToReplace);
697
+ continue;
698
+ }
699
+ else{
700
+ state.fragManager.replaceAtomWithSmiles(atomToReplace, replacementSmiles);
701
+ if (prefixAssignmentAmbiguous){
702
+ ambiguousElementAtoms.add(atomToReplace);
703
+ }
704
+ }
705
+ atomsReplaced++;
706
+ }
707
+
708
+ if (prefixAssignmentAmbiguous){//record what atoms could have been replaced. Often this ambiguity is resolved later e.g. S-methyl thioacetate
709
+ for (Atom atom : ambiguousElementAtoms) {
710
+ atom.setProperty(Atom.AMBIGUOUS_ELEMENT_ASSIGNMENT, ambiguousElementAtoms);
711
+ }
712
+ }
713
+ }
714
+ return atomsReplaced;
715
+ }
716
+
717
+
718
+ /**
719
+ * Converts functional oxygen to peroxy e.g. peroxybenzoic acid
720
+ * Returns the number of oxygen replaced
721
+ * @param groupToBeModified
722
+ * @param locantEl
723
+ * @param numberOfAtomsToReplace
724
+ * @return
725
+ * @throws StructureBuildingException
726
+ */
727
+ private int performPeroxyFunctionalReplacement(Element groupToBeModified, Element locantEl, int numberOfAtomsToReplace) throws StructureBuildingException {
728
+ List<Atom> oxygenAtoms = findFunctionalOxygenAtomsInApplicableSuffixes(groupToBeModified);
729
+ if (oxygenAtoms.size()==0){
730
+ oxygenAtoms = findEthericOxygenAtomsInGroup(groupToBeModified);
731
+ oxygenAtoms.addAll(findFunctionalOxygenAtomsInGroup(groupToBeModified));
732
+ }
733
+ if (locantEl !=null){
734
+ List<Atom> oxygenWithAppropriateLocants = pickOxygensWithAppropriateLocants(locantEl, oxygenAtoms);
735
+ if(oxygenWithAppropriateLocants.size() < numberOfAtomsToReplace){
736
+ numberOfAtomsToReplace =1;
737
+ }
738
+ else{
739
+ locantEl.detach();
740
+ oxygenAtoms = oxygenWithAppropriateLocants;
741
+ }
742
+ }
743
+ if (numberOfAtomsToReplace >1 && oxygenAtoms.size() < numberOfAtomsToReplace){
744
+ numberOfAtomsToReplace=1;
745
+ }
746
+ int atomsReplaced = 0;
747
+ if (oxygenAtoms.size() >=numberOfAtomsToReplace){//check that there atleast as many oxygens as requested replacements
748
+ atomsReplaced = numberOfAtomsToReplace;
749
+ for (int j = 0; j < numberOfAtomsToReplace; j++) {
750
+ Atom oxygenToReplace = oxygenAtoms.get(j);
751
+ if (oxygenToReplace.getBondCount()==2){//etheric oxygen
752
+ Fragment newOxygen = state.fragManager.buildSMILES("O", SUFFIX_TYPE_VAL, NONE_LABELS_VAL);
753
+ Bond bondToRemove = oxygenToReplace.getFirstBond();
754
+ Atom atomToAttachTo = bondToRemove.getFromAtom() == oxygenToReplace ? bondToRemove.getToAtom() : bondToRemove.getFromAtom();
755
+ state.fragManager.createBond(atomToAttachTo, newOxygen.getFirstAtom(), 1);
756
+ state.fragManager.createBond(newOxygen.getFirstAtom(), oxygenToReplace, 1);
757
+ state.fragManager.removeBond(bondToRemove);
758
+ state.fragManager.incorporateFragment(newOxygen, groupToBeModified.getFrag());
759
+ }
760
+ else{
761
+ Fragment replacementFrag = state.fragManager.buildSMILES("OO", SUFFIX_TYPE_VAL, NONE_LABELS_VAL);
762
+ removeOrMoveObsoleteFunctionalAtoms(oxygenToReplace, replacementFrag);
763
+ state.fragManager.replaceAtomWithAnotherAtomPreservingConnectivity(oxygenToReplace, replacementFrag.getFirstAtom());
764
+ state.fragManager.incorporateFragment(replacementFrag, groupToBeModified.getFrag());
765
+ }
766
+ }
767
+ }
768
+ return atomsReplaced;
769
+ }
770
+
771
+ /**
772
+ * Replaces double bonded oxygen and/or single bonded oxygen depending on the input SMILES
773
+ * SMILES with a valency 1 outAtom replace -O, SMILES with a valency 2 outAtom replace =O
774
+ * SMILES with a valency 3 outAtom replace -O and =O (nitrido)
775
+ * Returns the number of oxygen replaced
776
+ * @param groupToBeModified
777
+ * @param locantEl
778
+ * @param numberOfAtomsToReplace
779
+ * @param replacementSmiles
780
+ * @return
781
+ * @throws StructureBuildingException
782
+ */
783
+ private int performFunctionalReplacementOnAcid(Element groupToBeModified, Element locantEl, int numberOfAtomsToReplace, String replacementSmiles) throws StructureBuildingException {
784
+ int outValency;
785
+ if (replacementSmiles.startsWith("-")){
786
+ outValency =1;
787
+ }
788
+ else if (replacementSmiles.startsWith("=")){
789
+ outValency =2;
790
+ }
791
+ else if (replacementSmiles.startsWith("#")){
792
+ outValency =3;
793
+ }
794
+ else{
795
+ throw new StructureBuildingException("OPSIN bug: Unexpected valency on fragment for prefix functional replacement");
796
+ }
797
+ replacementSmiles = replacementSmiles.substring(1);
798
+ List<Atom> oxygenAtoms = findOxygenAtomsInApplicableSuffixes(groupToBeModified);
799
+ if (oxygenAtoms.size()==0){
800
+ oxygenAtoms = findOxygenAtomsInGroup(groupToBeModified);
801
+ }
802
+ if (locantEl !=null){//locants are used to indicate replacement on trivial groups
803
+ List<Atom> oxygenWithAppropriateLocants = pickOxygensWithAppropriateLocants(locantEl, oxygenAtoms);
804
+ List<Atom> singleBondedOxygen = new ArrayList<>();
805
+ List<Atom> terminalDoubleBondedOxygen = new ArrayList<>();
806
+ populateTerminalSingleAndDoubleBondedOxygen(oxygenWithAppropriateLocants, singleBondedOxygen, terminalDoubleBondedOxygen);
807
+ if (outValency ==1){
808
+ oxygenWithAppropriateLocants.removeAll(terminalDoubleBondedOxygen);
809
+ }
810
+ else if (outValency ==2){
811
+ oxygenWithAppropriateLocants.removeAll(singleBondedOxygen);
812
+ }
813
+ if(oxygenWithAppropriateLocants.size() < numberOfAtomsToReplace){
814
+ numberOfAtomsToReplace =1;
815
+ //e.g. -1-thioureidomethyl
816
+ }
817
+ else{
818
+ locantEl.detach();
819
+ oxygenAtoms = oxygenWithAppropriateLocants;
820
+ }
821
+ }
822
+ List<Atom> singleBondedOxygen = new ArrayList<>();
823
+ List<Atom> terminalDoubleBondedOxygen = new ArrayList<>();
824
+ populateTerminalSingleAndDoubleBondedOxygen(oxygenAtoms, singleBondedOxygen, terminalDoubleBondedOxygen);
825
+ if (outValency ==1){
826
+ oxygenAtoms.removeAll(terminalDoubleBondedOxygen);
827
+ }
828
+ else if (outValency ==2){
829
+ oxygenAtoms.removeAll(singleBondedOxygen);
830
+ //favour bridging oxygen over double bonded oxygen c.f. imidodicarbonate
831
+ oxygenAtoms.removeAll(terminalDoubleBondedOxygen);
832
+ oxygenAtoms.addAll(terminalDoubleBondedOxygen);
833
+ }
834
+ else {
835
+ if (singleBondedOxygen.size()==0 || terminalDoubleBondedOxygen.size()==0){
836
+ throw new StructureBuildingException("Both a -OH and =O are required for nitrido prefix functional replacement");
837
+ }
838
+ oxygenAtoms.removeAll(singleBondedOxygen);
839
+ }
840
+ if (numberOfAtomsToReplace >1 && oxygenAtoms.size() < numberOfAtomsToReplace){
841
+ numberOfAtomsToReplace=1;
842
+ }
843
+
844
+ int atomsReplaced =0;
845
+ if (oxygenAtoms.size() >=numberOfAtomsToReplace){//check that there atleast as many oxygens as requested replacements
846
+ for (Atom atomToReplace : oxygenAtoms) {
847
+ if (atomsReplaced == numberOfAtomsToReplace){
848
+ continue;
849
+ }
850
+ else{
851
+ Fragment replacementFrag = state.fragManager.buildSMILES(replacementSmiles, atomToReplace.getFrag().getTokenEl(), NONE_LABELS_VAL);
852
+ if (outValency ==3){//special case for nitrido
853
+ atomToReplace.getFirstBond().setOrder(3);
854
+ Atom removedHydroxy = singleBondedOxygen.remove(0);
855
+ state.fragManager.removeAtomAndAssociatedBonds(removedHydroxy);
856
+ removeAssociatedFunctionalAtom(removedHydroxy);
857
+ }
858
+ state.fragManager.replaceAtomWithAnotherAtomPreservingConnectivity(atomToReplace, replacementFrag.getFirstAtom());
859
+ if (outValency ==1){
860
+ removeOrMoveObsoleteFunctionalAtoms(atomToReplace, replacementFrag);
861
+ }
862
+ moveObsoleteOutAtoms(atomToReplace, replacementFrag);
863
+ state.fragManager.incorporateFragment(replacementFrag, atomToReplace.getFrag());
864
+ }
865
+ atomsReplaced++;
866
+ }
867
+ }
868
+ return atomsReplaced;
869
+ }
870
+
871
+ /*
872
+ * Infix functional replacement nomenclature
873
+ */
874
+
875
+ /**
876
+ * This block handles infix multiplication. Unless brackets are provided this is ambiguous without knowledge of the suffix that is being modified
877
+ * For example butandithione could be intepreted as butandi(thione) or butan(dithi)one.
878
+ * Obviously the latter is wrong in this case but it is the correct interpretation for butandithiate
879
+ * @param suffixes
880
+ * @param suffixFragments
881
+ * @param suffix
882
+ * @param infixTransformations
883
+ * @param oxygenAvailable
884
+ * @throws ComponentGenerationException
885
+ * @throws StructureBuildingException
886
+ */
887
+ private void disambiguateMultipliedInfixMeaning(List<Element> suffixes,
888
+ List<Fragment> suffixFragments,Element suffix, List<String> infixTransformations, int oxygenAvailable)
889
+ throws ComponentGenerationException, StructureBuildingException {
890
+ Element possibleInfix = OpsinTools.getPreviousSibling(suffix);
891
+ if (possibleInfix.getName().equals(INFIX_EL)){//the infix is only left when there was ambiguity
892
+ Element possibleMultiplier = OpsinTools.getPreviousSibling(possibleInfix);
893
+ if (possibleMultiplier.getName().equals(MULTIPLIER_EL)){
894
+ int multiplierValue =Integer.parseInt(possibleMultiplier.getAttributeValue(VALUE_ATR));
895
+ if (infixTransformations.size() + multiplierValue-1 <=oxygenAvailable){//multiplier means multiply the infix e.g. butandithiate
896
+ for (int j = 1; j < multiplierValue; j++) {
897
+ infixTransformations.add(0, infixTransformations.get(0));
898
+ }
899
+ }
900
+ else{
901
+ Element possibleLocant = OpsinTools.getPreviousSibling(possibleMultiplier);
902
+ String[] locants = null;
903
+ if (possibleLocant.getName().equals(LOCANT_EL)) {
904
+ locants = possibleLocant.getValue().split(",");
905
+ }
906
+ if (locants !=null){
907
+ if (locants.length!=multiplierValue){
908
+ throw new ComponentGenerationException("Multiplier/locant disagreement when multiplying infixed suffix");
909
+ }
910
+ suffix.addAttribute(new Attribute(LOCANT_ATR, locants[0]));
911
+ }
912
+ suffix.addAttribute(new Attribute(MULTIPLIED_ATR, "multiplied"));
913
+ for (int j = 1; j < multiplierValue; j++) {//multiplier means multiply the infixed suffix e.g. butandithione
914
+ Element newSuffix = suffix.copy();
915
+ Fragment newSuffixFrag = state.fragManager.copyFragment(suffix.getFrag());
916
+ newSuffix.setFrag(newSuffixFrag);
917
+ suffixFragments.add(newSuffixFrag);
918
+ OpsinTools.insertAfter(suffix, newSuffix);
919
+ suffixes.add(newSuffix);
920
+ if (locants !=null){//assign locants if available
921
+ newSuffix.getAttribute(LOCANT_ATR).setValue(locants[j]);
922
+ }
923
+ }
924
+ if (locants!=null){
925
+ possibleLocant.detach();
926
+ }
927
+ }
928
+ possibleMultiplier.detach();
929
+ possibleInfix.detach();
930
+ }
931
+ else{
932
+ throw new ComponentGenerationException("Multiplier expected in front of ambiguous infix");
933
+ }
934
+ }
935
+ }
936
+
937
+ /*
938
+ * Convenience Methods
939
+ */
940
+
941
+ /**
942
+ * Given an atom that is to be replaced by a functional replacement fragment
943
+ * determines whether this atom is a functional atom and, if it is, performs the following processes:
944
+ * The functionalAtom is removed. If the the replacement fragment is an atom of O/S/Se/Te or the
945
+ * the terminal atom of the fragment is a single bonded O/S/Se/Te a functionAom is added to this atom.
946
+ * @param atomToBeReplaced
947
+ * @param replacementFrag
948
+ */
949
+ private void removeOrMoveObsoleteFunctionalAtoms(Atom atomToBeReplaced, Fragment replacementFrag){
950
+ List<Atom> replacementAtomList = replacementFrag.getAtomList();
951
+ Fragment origFrag = atomToBeReplaced.getFrag();
952
+ for (int i = origFrag.getFunctionalAtomCount() - 1; i >=0; i--) {
953
+ FunctionalAtom functionalAtom = origFrag.getFunctionalAtom(i);
954
+ if (atomToBeReplaced.equals(functionalAtom.getAtom())){
955
+ atomToBeReplaced.getFrag().removeFunctionalAtom(i);
956
+ Atom terminalAtomOfReplacementFrag = replacementAtomList.get(replacementAtomList.size()-1);
957
+ if ((terminalAtomOfReplacementFrag.getIncomingValency() ==1 || replacementAtomList.size()==1)&& terminalAtomOfReplacementFrag.getElement().isChalcogen()){
958
+ replacementFrag.addFunctionalAtom(terminalAtomOfReplacementFrag);
959
+ terminalAtomOfReplacementFrag.setCharge(atomToBeReplaced.getCharge());
960
+ terminalAtomOfReplacementFrag.setProtonsExplicitlyAddedOrRemoved(atomToBeReplaced.getProtonsExplicitlyAddedOrRemoved());
961
+ }
962
+ atomToBeReplaced.neutraliseCharge();
963
+ }
964
+ }
965
+ }
966
+
967
+ /**
968
+ * Given an atom that is to be replaced by a functional replacement fragment
969
+ * determines whether this atom has outvalency and if it does removes the outatom from the atom's fragment
970
+ * and adds an outatom to the replacementFrag
971
+ * @param atomToBeReplaced
972
+ * @param replacementFrag
973
+ */
974
+ private void moveObsoleteOutAtoms(Atom atomToBeReplaced, Fragment replacementFrag){
975
+ if (atomToBeReplaced.getOutValency() >0){//this is not known to occur in well formed IUPAC names but would occur in thioxy (as a suffix)
976
+ List<Atom> replacementAtomList = replacementFrag.getAtomList();
977
+ Fragment origFrag = atomToBeReplaced.getFrag();
978
+ for (int i = origFrag.getOutAtomCount() - 1; i >=0; i--) {
979
+ OutAtom outAtom = origFrag.getOutAtom(i);
980
+ if (atomToBeReplaced.equals(outAtom.getAtom())){
981
+ atomToBeReplaced.getFrag().removeOutAtom(i);
982
+ Atom terminalAtomOfReplacementFrag = replacementAtomList.get(replacementAtomList.size()-1);
983
+ replacementFrag.addOutAtom(terminalAtomOfReplacementFrag, outAtom.getValency(), outAtom.isSetExplicitly());
984
+ }
985
+ }
986
+ }
987
+ }
988
+
989
+ private void removeAssociatedFunctionalAtom(Atom atomWithFunctionalAtom) throws StructureBuildingException {
990
+ Fragment frag = atomWithFunctionalAtom.getFrag();
991
+ for (int i = frag.getFunctionalAtomCount() - 1; i >=0; i--) {
992
+ FunctionalAtom functionalAtom = frag.getFunctionalAtom(i);
993
+ if (atomWithFunctionalAtom.equals(functionalAtom.getAtom())){
994
+ atomWithFunctionalAtom.getFrag().removeFunctionalAtom(i);
995
+ return;
996
+ }
997
+ }
998
+ throw new StructureBuildingException("OPSIN bug: Unable to find associated functionalAtom");
999
+ }
1000
+
1001
+
1002
+ /**
1003
+ * Returns the subset of oxygenAtoms that possess one of the locants in locantEl
1004
+ * Searches for locant on nearest non suffix atom in case of suffixes
1005
+ * @param locantEl
1006
+ * @param oxygenAtoms
1007
+ * @return
1008
+ */
1009
+ private List<Atom> pickOxygensWithAppropriateLocants(Element locantEl, List<Atom> oxygenAtoms) {
1010
+ String[] possibleLocants = locantEl.getValue().split(",");
1011
+ boolean pLocantSpecialCase = allLocantsP(possibleLocants);
1012
+ List<Atom> oxygenWithAppropriateLocants = new ArrayList<>();
1013
+ for (Atom atom : oxygenAtoms) {
1014
+ List<String> atomlocants = atom.getLocants();
1015
+ if (atomlocants.size() > 0) {
1016
+ for (String locantVal : possibleLocants) {
1017
+ if (atomlocants.contains(locantVal)) {
1018
+ oxygenWithAppropriateLocants.add(atom);
1019
+ break;
1020
+ }
1021
+ }
1022
+ }
1023
+ else if (pLocantSpecialCase) {
1024
+ for (Atom neighbour : atom.getAtomNeighbours()) {
1025
+ if (neighbour.getElement() == ChemEl.P) {
1026
+ oxygenWithAppropriateLocants.add(atom);
1027
+ break;
1028
+ }
1029
+ }
1030
+ }
1031
+ else {
1032
+ Atom atomWithNumericLocant = OpsinTools.depthFirstSearchForAtomWithNumericLocant(atom);
1033
+ if (atomWithNumericLocant != null) {
1034
+ List<String> atomWithNumericLocantLocants = atomWithNumericLocant.getLocants();
1035
+ for (String locantVal : possibleLocants) {
1036
+ if (atomWithNumericLocantLocants.contains(locantVal)) {
1037
+ oxygenWithAppropriateLocants.add(atom);
1038
+ break;
1039
+ }
1040
+ }
1041
+ }
1042
+ }
1043
+ }
1044
+ return oxygenWithAppropriateLocants;
1045
+ }
1046
+
1047
+ private boolean allLocantsP(String[] locants) {
1048
+ if (locants.length == 0) {
1049
+ return false;
1050
+ }
1051
+ for (String locant : locants) {
1052
+ if (!locant.equals("P")) {
1053
+ return false;
1054
+ }
1055
+ }
1056
+ return true;
1057
+ }
1058
+
1059
+ /**
1060
+ * Returns oxygen atoms in suffixes with functionalAtoms
1061
+ * @param groupToBeModified
1062
+ * @return
1063
+ */
1064
+ private List<Atom> findFunctionalOxygenAtomsInApplicableSuffixes(Element groupToBeModified) {
1065
+ List<Element> suffixElements =OpsinTools.getNextSiblingsOfType(groupToBeModified, SUFFIX_EL);
1066
+ List<Atom> oxygenAtoms = new ArrayList<>();
1067
+ for (Element suffix : suffixElements) {
1068
+ Fragment suffixFrag = suffix.getFrag();
1069
+ if (suffixFrag != null) {//null for non carboxylic acids
1070
+ for (int i = 0, l = suffixFrag.getFunctionalAtomCount(); i < l; i++) {
1071
+ Atom a = suffixFrag.getFunctionalAtom(i).getAtom();
1072
+ if (a.getElement() == ChemEl.O) {
1073
+ oxygenAtoms.add(a);
1074
+ }
1075
+ }
1076
+ }
1077
+ }
1078
+ return oxygenAtoms;
1079
+ }
1080
+
1081
+ /**
1082
+ * Returns functional oxygen atoms in groupToBeModified
1083
+ * @param groupToBeModified
1084
+ * @return
1085
+ */
1086
+ private List<Atom> findFunctionalOxygenAtomsInGroup(Element groupToBeModified) {
1087
+ List<Atom> oxygenAtoms = new ArrayList<>();
1088
+ Fragment frag = groupToBeModified.getFrag();
1089
+ for (int i = 0, l = frag.getFunctionalAtomCount(); i < l; i++) {
1090
+ Atom a = frag.getFunctionalAtom(i).getAtom();
1091
+ if (a.getElement() == ChemEl.O){
1092
+ oxygenAtoms.add(a);
1093
+ }
1094
+ }
1095
+ return oxygenAtoms;
1096
+ }
1097
+
1098
+
1099
+ /**
1100
+ * Returns etheric oxygen atoms in groupToBeModified
1101
+ * @param groupToBeModified
1102
+ * @return
1103
+ */
1104
+ private List<Atom> findEthericOxygenAtomsInGroup(Element groupToBeModified) {
1105
+ List<Atom> oxygenAtoms = new ArrayList<>();
1106
+ List<Atom> atomList = groupToBeModified.getFrag().getAtomList();
1107
+ for (Atom a: atomList) {
1108
+ if (a.getElement() == ChemEl.O && a.getBondCount()==2 && a.getCharge()==0 && a.getIncomingValency()==2){
1109
+ oxygenAtoms.add(a);
1110
+ }
1111
+ }
1112
+ return oxygenAtoms;
1113
+ }
1114
+
1115
+
1116
+ /**
1117
+ * Returns oxygen atoms in suffixes with functionalAtoms or acidStem suffixes or aldehyde suffixes (1979 C-531)
1118
+ * @param groupToBeModified
1119
+ * @return
1120
+ */
1121
+ private List<Atom> findOxygenAtomsInApplicableSuffixes(Element groupToBeModified) {
1122
+ List<Element> suffixElements =OpsinTools.getNextSiblingsOfType(groupToBeModified, SUFFIX_EL);
1123
+ List<Atom> oxygenAtoms = new ArrayList<>();
1124
+ for (Element suffix : suffixElements) {
1125
+ Fragment suffixFrag = suffix.getFrag();
1126
+ if (suffixFrag != null) {//null for non carboxylic acids
1127
+ if (suffixFrag.getFunctionalAtomCount() > 0 || groupToBeModified.getAttributeValue(TYPE_ATR).equals(ACIDSTEM_TYPE_VAL) || suffix.getAttributeValue(VALUE_ATR).equals("aldehyde")) {
1128
+ List<Atom> atomList = suffixFrag.getAtomList();
1129
+ for (Atom a : atomList) {
1130
+ if (a.getElement() == ChemEl.O) {
1131
+ oxygenAtoms.add(a);
1132
+ }
1133
+ }
1134
+ }
1135
+ }
1136
+ }
1137
+ return oxygenAtoms;
1138
+ }
1139
+
1140
+ /**
1141
+ * Returns oxygen atoms in groupToBeModified
1142
+ * @param groupToBeModified
1143
+ * @return
1144
+ */
1145
+ private List<Atom> findOxygenAtomsInGroup(Element groupToBeModified) {
1146
+ List<Atom> oxygenAtoms = new ArrayList<>();
1147
+ List<Atom> atomList = groupToBeModified.getFrag().getAtomList();
1148
+ for (Atom a : atomList) {
1149
+ if (a.getElement() == ChemEl.O){
1150
+ oxygenAtoms.add(a);
1151
+ }
1152
+ }
1153
+ return oxygenAtoms;
1154
+ }
1155
+
1156
+
1157
+ private void populateTerminalSingleAndDoubleBondedOxygen(List<Atom> atomList, List<Atom> singleBondedOxygen, List<Atom> doubleBondedOxygen) throws StructureBuildingException {
1158
+ for (Atom a : atomList) {
1159
+ if (a.getElement() == ChemEl.O){//find terminal oxygens
1160
+ if (a.getBondCount()==1){
1161
+ int incomingValency = a.getIncomingValency();
1162
+ if (incomingValency ==2){
1163
+ doubleBondedOxygen.add(a);
1164
+ }
1165
+ else if (incomingValency ==1){
1166
+ singleBondedOxygen.add(a);
1167
+ }
1168
+ else{
1169
+ throw new StructureBuildingException("Unexpected bond order to oxygen; excepted 1 or 2 found: " +incomingValency);
1170
+ }
1171
+
1172
+ }
1173
+ }
1174
+ }
1175
+ }
1176
+ }
TransAntivirus/download_pubchem/opsin-master/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/FusedRingBuilder.java ADDED
@@ -0,0 +1,1030 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ package uk.ac.cam.ch.wwmm.opsin;
2
+
3
+ import java.util.ArrayList;
4
+ import java.util.Arrays;
5
+ import java.util.Collections;
6
+ import java.util.HashMap;
7
+ import java.util.HashSet;
8
+ import java.util.LinkedHashSet;
9
+ import java.util.List;
10
+ import java.util.Locale;
11
+ import java.util.Map;
12
+ import java.util.Set;
13
+
14
+ import static uk.ac.cam.ch.wwmm.opsin.OpsinTools.*;
15
+ import static uk.ac.cam.ch.wwmm.opsin.XmlDeclarations.*;
16
+
17
+ /**
18
+ * Assembles fused rings named using fusion nomenclature
19
+ * @author dl387
20
+ *
21
+ */
22
+ class FusedRingBuilder {
23
+ private final BuildState state;
24
+ private final List<Element> groupsInFusedRing;
25
+ private final Element lastGroup;
26
+ private final Fragment parentRing;
27
+ private final Map<Integer,Fragment> fragmentInScopeForEachFusionLevel = new HashMap<>();
28
+ private final Map<Atom, Atom> atomsToRemoveToReplacementAtom = new HashMap<>();
29
+
30
+ private FusedRingBuilder(BuildState state, List<Element> groupsInFusedRing) {
31
+ this.state = state;
32
+ this.groupsInFusedRing = groupsInFusedRing;
33
+ lastGroup = groupsInFusedRing.get(groupsInFusedRing.size()-1);
34
+ parentRing = lastGroup.getFrag();
35
+ fragmentInScopeForEachFusionLevel.put(0, parentRing);
36
+ }
37
+
38
+ /**
39
+ * Master method for processing fused rings. Fuses groups together
40
+ * @param state: contains the current id and fragment manager
41
+ * @param subOrRoot Element (substituent or root)
42
+ * @throws StructureBuildingException
43
+ */
44
+ static void processFusedRings(BuildState state, Element subOrRoot) throws StructureBuildingException {
45
+ List<Element> groups = subOrRoot.getChildElements(GROUP_EL);
46
+ if (groups.size() < 2){
47
+ return;//nothing to fuse
48
+ }
49
+ List<Element> groupsInFusedRing =new ArrayList<>();
50
+ for (int i = groups.size()-1; i >=0; i--) {//group groups into fused rings
51
+ Element group =groups.get(i);
52
+ groupsInFusedRing.add(0, group);
53
+ if (i!=0){
54
+ Element startingEl = group;
55
+ if ((group.getValue().equals("benz") || group.getValue().equals("benzo")) && FUSIONRING_SUBTYPE_VAL.equals(group.getAttributeValue(SUBTYPE_ATR))){
56
+ Element beforeBenzo = OpsinTools.getPreviousSibling(group);
57
+ if (beforeBenzo !=null && beforeBenzo.getName().equals(LOCANT_EL)){
58
+ startingEl = beforeBenzo;
59
+ }
60
+ }
61
+ Element possibleGroup = OpsinTools.getPreviousSiblingIgnoringCertainElements(startingEl, new String[]{MULTIPLIER_EL, FUSION_EL});
62
+ if (!groups.get(i-1).equals(possibleGroup)){//end of fused ring system
63
+ if (groupsInFusedRing.size()>=2){
64
+ //This will be invoked in cases where there are multiple fused ring systems in the same subOrRoot such as some spiro systems
65
+ new FusedRingBuilder(state, groupsInFusedRing).buildFusedRing();
66
+ }
67
+ groupsInFusedRing.clear();
68
+ }
69
+ }
70
+ }
71
+ if (groupsInFusedRing.size()>=2){
72
+ new FusedRingBuilder(state, groupsInFusedRing).buildFusedRing();
73
+ }
74
+ }
75
+
76
+ /**
77
+ * Combines the groups given in the {@link FusedRingBuilder} constructor to destructively create the fused ring system
78
+ * This fused ring is then numbered
79
+ * @throws StructureBuildingException
80
+ */
81
+ void buildFusedRing() throws StructureBuildingException{
82
+ /*
83
+ * Apply any nonstandard ring numbering, sorts atomOrder by locant
84
+ * Aromatises appropriate cycloalkane rings, Rejects groups with acyclic atoms
85
+ */
86
+ processRingNumberingAndIrregularities();
87
+ processBenzoFusions();//FR-2.2.8 e.g. in 2H-[1,3]benzodioxino[6',5',4':10,5,6]anthra[2,3-b]azepine benzodioxino is one component
88
+ List<Element> nameComponents = formNameComponentList();
89
+ nameComponents.remove(lastGroup);
90
+
91
+ List<Fragment> componentFragments = new ArrayList<>();//all the ring fragments (other than the parentRing). These will later be merged into the parentRing
92
+ List<Fragment> parentFragments = new ArrayList<>();
93
+ parentFragments.add(parentRing);
94
+
95
+ int numberOfParents = 1;
96
+ Element possibleMultiplier = OpsinTools.getPreviousSibling(lastGroup);
97
+ if (nameComponents.size()>0 && possibleMultiplier !=null && possibleMultiplier.getName().equals(MULTIPLIER_EL)){
98
+ numberOfParents = Integer.parseInt(possibleMultiplier.getAttributeValue(VALUE_ATR));
99
+ possibleMultiplier.detach();
100
+ for (int j = 1; j < numberOfParents; j++) {
101
+ Fragment copyOfParentRing =state.fragManager.copyFragment(parentRing);
102
+ parentFragments.add(copyOfParentRing);
103
+ componentFragments.add(copyOfParentRing);
104
+ }
105
+ }
106
+
107
+ /*The indice from nameComponents to use next. Work from right to left i.e. starts at nameComponents.size()-1*/
108
+ int ncIndice = processMultiParentSystem(parentFragments, nameComponents, componentFragments);//handle multiparent systems
109
+ /*
110
+ * The number of primes on the component to be connected.
111
+ * This is initially 0 indicating fusion of unprimed locants with the letter locants of the parentRing
112
+ * Subsequently it will switch to 1 indicating fusion of a second order component (primed locants) with a
113
+ * first order component (unprimed locants)
114
+ * Next would be double primed fusing to single primed locants etc.
115
+ *
116
+ */
117
+ int fusionLevel = (nameComponents.size()-1 -ncIndice)/2;
118
+ for (; ncIndice>=0; ncIndice--) {
119
+ Element fusion = null;
120
+ if (nameComponents.get(ncIndice).getName().equals(FUSION_EL)){
121
+ fusion = nameComponents.get(ncIndice--);
122
+ }
123
+ if (ncIndice <0 || !nameComponents.get(ncIndice).getName().equals(GROUP_EL)){
124
+ throw new StructureBuildingException("Group not found where group expected. This is probably a bug");
125
+ }
126
+ Fragment nextComponent = nameComponents.get(ncIndice).getFrag();
127
+ int multiplier = 1;
128
+ Element possibleMultiplierEl = OpsinTools.getPreviousSibling(nameComponents.get(ncIndice));//e.g. the di of difuro
129
+ if (possibleMultiplierEl != null && possibleMultiplierEl.getName().equals(MULTIPLIER_EL)){
130
+ multiplier = Integer.parseInt(possibleMultiplierEl.getAttributeValue(VALUE_ATR));
131
+ }
132
+ String[] fusionDescriptors =null;
133
+ if (fusion !=null){
134
+ String fusionDescriptorString = fusion.getValue().toLowerCase(Locale.ROOT).substring(1, fusion.getValue().length()-1);
135
+ if (multiplier ==1){
136
+ fusionDescriptors = new String[]{fusionDescriptorString};
137
+ }
138
+ else{
139
+ if (fusionDescriptorString.split(";").length >1){
140
+ fusionDescriptors = fusionDescriptorString.split(";");
141
+ }
142
+ else if (fusionDescriptorString.split(":").length >1){
143
+ fusionDescriptors = fusionDescriptorString.split(":");
144
+ }
145
+ else if (fusionDescriptorString.split(",").length >1){
146
+ fusionDescriptors = fusionDescriptorString.split(",");
147
+ }
148
+ else{//multiplier does not appear to mean multiplied component. Could be indicating multiplication of the whole fused ring system
149
+ if (ncIndice!=0){
150
+ throw new StructureBuildingException("Unexpected multiplier: " + possibleMultiplierEl.getValue() +" or incorrect fusion descriptor: " + fusionDescriptorString);
151
+ }
152
+ multiplier =1;
153
+ fusionDescriptors = new String[]{fusionDescriptorString};
154
+ }
155
+ }
156
+ }
157
+ if (multiplier >1){
158
+ possibleMultiplierEl.detach();
159
+ }
160
+ Fragment[] fusionComponents = new Fragment[multiplier];
161
+ for (int j = 0; j < multiplier; j++) {
162
+ if (j>0){
163
+ fusionComponents[j] = state.fragManager.copyAndRelabelFragment(nextComponent, j);
164
+ }
165
+ else{
166
+ fusionComponents[j] = nextComponent;
167
+ }
168
+ }
169
+
170
+ for (int j = 0; j < multiplier; j++) {
171
+ Fragment component = fusionComponents[j];
172
+ componentFragments.add(component);
173
+ if (fusion !=null){
174
+ if (fusionDescriptors[j].split(":").length==1){//A fusion bracket without a colon is used when applying to the parent component (except in a special case where locants are ommitted)
175
+ //check for case of omitted locant from a higher order fusion bracket e.g. cyclopenta[4,5]pyrrolo[2,3-c]pyridine
176
+ if (fusionDescriptors[j].split("-").length==1 &&
177
+ fusionDescriptors[j].split(",").length >1 &&
178
+ FragmentTools.allAtomsInRingAreIdentical(component)
179
+ && ((StringTools.countTerminalPrimes(fusionDescriptors[j].split(",")[0])) != fusionLevel) ){//Could be like cyclopenta[3,4]cyclobuta[1,2]benzene where the first fusion to occur has parent locants omitted not child locants
180
+ int numberOfPrimes = StringTools.countTerminalPrimes(fusionDescriptors[j].split(",")[0]);
181
+ //note that this is the number of primes on the parent ring. So would expect the child ring and hence the fusionLevel to be 1 higher
182
+ if (numberOfPrimes + 1 != fusionLevel){
183
+ if (numberOfPrimes + 2 == fusionLevel){//ring could be in previous fusion level e.g. the benzo in benzo[10,11]phenanthro[2',3',4',5',6':4,5,6,7]chryseno[1,2,3-bc]coronene
184
+ fusionLevel--;
185
+ }
186
+ else{
187
+ throw new StructureBuildingException("Incorrect number of primes in fusion bracket: " +fusionDescriptors[j]);
188
+ }
189
+ }
190
+ relabelAccordingToFusionLevel(component, fusionLevel);
191
+ List<String> numericalLocantsOfParent = Arrays.asList(fusionDescriptors[j].split(","));
192
+ List<String> numericalLocantsOfChild = findPossibleNumericalLocants(component, determineAtomsToFuse(fragmentInScopeForEachFusionLevel.get(fusionLevel), numericalLocantsOfParent, null).size()-1);
193
+ processHigherOrderFusionDescriptors(component, fragmentInScopeForEachFusionLevel.get(fusionLevel), numericalLocantsOfChild, numericalLocantsOfParent);
194
+ }
195
+ else{
196
+ fusionLevel = 0;
197
+ relabelAccordingToFusionLevel(component, fusionLevel);
198
+ String fusionDescriptor = fusionDescriptors[j];
199
+ String[] fusionArray = determineNumericalAndLetterComponents(fusionDescriptor);
200
+ int numberOfPrimes =0;
201
+ if (!fusionArray[1].equals("")){
202
+ numberOfPrimes =StringTools.countTerminalPrimes(fusionArray[1]);
203
+ if (fusionArray[0].equals("")){
204
+ fusionDescriptor = fusionArray[1].replaceAll("'", "");
205
+ }
206
+ else{
207
+ fusionDescriptor = fusionArray[0]+ "-" +fusionArray[1].replaceAll("'", "");
208
+ }
209
+ if (numberOfPrimes >= parentFragments.size()){
210
+ throw new StructureBuildingException("Unexpected prime in fusion descriptor");
211
+ }
212
+ }
213
+ performSimpleFusion(fusionDescriptor, component, parentFragments.get(numberOfPrimes));//e.g. pyrano[3,2-b]imidazo[4,5-e]pyridine where both are level 0 fusions
214
+ }
215
+ }
216
+ else{
217
+ //determine number of primes in fusor and hence determine fusion level
218
+ int numberOfPrimes = -j + StringTools.countTerminalPrimes(fusionDescriptors[j].split(",")[0]);
219
+ if (numberOfPrimes != fusionLevel){
220
+ if (fusionLevel == numberOfPrimes +1){
221
+ fusionLevel--;
222
+ }
223
+ else{
224
+ throw new StructureBuildingException("Incorrect number of primes in fusion bracket: " +fusionDescriptors[j]);
225
+ }
226
+ }
227
+ relabelAccordingToFusionLevel(component, fusionLevel);
228
+ performHigherOrderFusion(fusionDescriptors[j], component, fragmentInScopeForEachFusionLevel.get(fusionLevel));
229
+ }
230
+ }
231
+ else{
232
+ relabelAccordingToFusionLevel(component, fusionLevel);
233
+ performSimpleFusion(null, component, fragmentInScopeForEachFusionLevel.get(fusionLevel));
234
+ }
235
+ }
236
+ fusionLevel++;
237
+ if (multiplier ==1){//multiplied components may not be substituted onto
238
+ fragmentInScopeForEachFusionLevel.put(fusionLevel, fusionComponents[0]);
239
+ }
240
+ }
241
+ for (Fragment ring : componentFragments) {
242
+ state.fragManager.incorporateFragment(ring, parentRing);
243
+ }
244
+ removeMergedAtoms();
245
+
246
+ FusedRingNumberer.numberFusedRing(parentRing);//numbers the fused ring;
247
+
248
+ StringBuilder fusedRingName = new StringBuilder();
249
+ for (Element element : nameComponents) {
250
+ fusedRingName.append(element.getValue());
251
+ }
252
+ fusedRingName.append(lastGroup.getValue());
253
+
254
+ Element fusedRingEl =lastGroup;//reuse this element to save having to remap suffixes...
255
+ fusedRingEl.getAttribute(VALUE_ATR).setValue(fusedRingName.toString());
256
+ fusedRingEl.getAttribute(TYPE_ATR).setValue(RING_TYPE_VAL);
257
+ fusedRingEl.setValue(fusedRingName.toString());
258
+
259
+ for (Element element : nameComponents) {
260
+ element.detach();
261
+ }
262
+ }
263
+
264
+ private void removeMergedAtoms() {
265
+ for (Atom a : atomsToRemoveToReplacementAtom.keySet()) {
266
+ state.fragManager.removeAtomAndAssociatedBonds(a);
267
+ }
268
+ atomsToRemoveToReplacementAtom.clear();
269
+ }
270
+
271
+ /**
272
+ * Forms a list a list of all group and fusion elements between the first and last group in the fused ring
273
+ * @return
274
+ */
275
+ private List<Element> formNameComponentList() {
276
+ List<Element> nameComponents = new ArrayList<>();
277
+ Element currentEl = groupsInFusedRing.get(0);
278
+ while(currentEl != lastGroup){
279
+ if (currentEl.getName().equals(GROUP_EL) || currentEl.getName().equals(FUSION_EL)){
280
+ nameComponents.add(currentEl);
281
+ }
282
+ currentEl = OpsinTools.getNextSibling(currentEl);
283
+ }
284
+ return nameComponents;
285
+ }
286
+
287
+ private void processRingNumberingAndIrregularities() throws StructureBuildingException {
288
+ for (Element group : groupsInFusedRing) {
289
+ Fragment ring = group.getFrag();
290
+ if (ALKANESTEM_SUBTYPE_VAL.equals(group.getAttributeValue(SUBTYPE_ATR))){
291
+ aromatiseCyclicAlkane(group);
292
+ }
293
+ processPartiallyUnsaturatedHWSystems(group, ring);
294
+ if (group == lastGroup) {
295
+ //perform a quick check that every atom in this group is infact cyclic. Fusion components are enumerated and hence all guaranteed to be purely cyclic
296
+ List<Atom> atomList = ring.getAtomList();
297
+ for (Atom atom : atomList) {
298
+ if (!atom.getAtomIsInACycle()) {
299
+ throw new StructureBuildingException("Inappropriate group used in fusion nomenclature. Only groups composed entirely of atoms in cycles may be used. i.e. not: " + group.getValue());
300
+ }
301
+ }
302
+ if (group.getAttribute(FUSEDRINGNUMBERING_ATR) != null) {
303
+ String[] standardNumbering = group.getAttributeValue(FUSEDRINGNUMBERING_ATR).split("/", -1);
304
+ for (int j = 0; j < standardNumbering.length; j++) {
305
+ atomList.get(j).replaceLocants(standardNumbering[j]);
306
+ }
307
+ } else {
308
+ ring.sortAtomListByLocant();//for those where the order the locants are in is sensible }
309
+ }
310
+ for (Atom atom : atomList) {
311
+ atom.clearLocants();//the parentRing does not have locants, letters are used to indicate the edges
312
+ }
313
+ } else if (group.getAttribute(FUSEDRINGNUMBERING_ATR) == null) {
314
+ ring.sortAtomListByLocant();//for those where the order the locants are in is sensible
315
+ }
316
+ }
317
+ }
318
+
319
+ /**
320
+ * Interprets the unlocanted unsaturator after a partially unsaturated HW Rings as indication of spare valency and detaches it
321
+ * This is necessary as this unsaturator can only refer to the HW ring and for names like 2-Benzoxazolinone to avoid confusion as to what the 2 refers to.
322
+ * @param group
323
+ * @param ring
324
+ */
325
+ private void processPartiallyUnsaturatedHWSystems(Element group, Fragment ring) {
326
+ if (HANTZSCHWIDMAN_SUBTYPE_VAL.equals(group.getAttributeValue(SUBTYPE_ATR)) && group.getAttribute(ADDBOND_ATR)!=null){
327
+ List<Element> unsaturators = OpsinTools.getNextAdjacentSiblingsOfType(group, UNSATURATOR_EL);
328
+ if (unsaturators.size()>0){
329
+ Element unsaturator = unsaturators.get(0);
330
+ if (unsaturator.getAttribute(LOCANT_ATR)==null && unsaturator.getAttributeValue(VALUE_ATR).equals("2")){
331
+ unsaturator.detach();
332
+ List<Bond> bondsToUnsaturate = StructureBuildingMethods.findBondsToUnSaturate(ring, 2, true);
333
+ if (bondsToUnsaturate.size() == 0) {
334
+ throw new RuntimeException("Failed to find bond to unsaturate on partially saturated HW ring");
335
+ }
336
+ Bond b = bondsToUnsaturate.get(0);
337
+ b.getFromAtom().setSpareValency(true);
338
+ b.getToAtom().setSpareValency(true);
339
+ }
340
+ }
341
+ }
342
+ }
343
+
344
+ /**
345
+ * Given a cyclicAlkaneGroup determines whether or not it should be aromatised. Unlocanted ene will be detached if it is an aromatisation hint
346
+ * No unsaturators -->aromatise
347
+ * Just ane -->don't
348
+ * More than 1 ene or locants on ene -->don't
349
+ * yne --> don't
350
+ * @param cyclicAlkaneGroup
351
+ */
352
+ private void aromatiseCyclicAlkane(Element cyclicAlkaneGroup) {
353
+ Element next = OpsinTools.getNextSibling(cyclicAlkaneGroup);
354
+ List<Element> unsaturators = new ArrayList<>();
355
+ while (next!=null && next.getName().equals(UNSATURATOR_EL)){
356
+ unsaturators.add(next);
357
+ next = OpsinTools.getNextSibling(next);
358
+ }
359
+ boolean conjugate =true;
360
+ if (unsaturators.size()==1){
361
+ int value = Integer.parseInt(unsaturators.get(0).getAttributeValue(VALUE_ATR));
362
+ if (value !=2){
363
+ conjugate =false;
364
+ }
365
+ else if (unsaturators.get(0).getAttribute(LOCANT_ATR)!=null){
366
+ conjugate =false;
367
+ }
368
+ }
369
+ else if (unsaturators.size()==2){
370
+ int value1 = Integer.parseInt(unsaturators.get(0).getAttributeValue(VALUE_ATR));
371
+ if (value1 !=1){
372
+ conjugate =false;
373
+ }
374
+ else{
375
+ int value2 = Integer.parseInt(unsaturators.get(1).getAttributeValue(VALUE_ATR));
376
+ if (value2 !=2 || unsaturators.get(1).getAttribute(LOCANT_ATR)!=null){
377
+ conjugate =false;
378
+ }
379
+ }
380
+ }
381
+ else if (unsaturators.size() >2){
382
+ conjugate =false;
383
+ }
384
+ if (conjugate){
385
+ for (Element unsaturator : unsaturators) {
386
+ unsaturator.detach();
387
+ }
388
+ List<Atom> atomList = cyclicAlkaneGroup.getFrag().getAtomList();
389
+ for (Atom atom : atomList) {
390
+ atom.setSpareValency(true);
391
+ }
392
+ }
393
+ }
394
+
395
+ private int processMultiParentSystem(List<Fragment> parentFragments, List<Element> nameComponents, List<Fragment> componentFragments) throws StructureBuildingException {
396
+ int i = nameComponents.size()-1;
397
+ int fusionLevel =0;
398
+ if (i>=0 && parentFragments.size()>1){
399
+ List<Fragment> previousFusionLevelFragments = parentFragments;
400
+ for (; i>=0; i--) {
401
+ if (previousFusionLevelFragments.size()==1){//completed multi parent system
402
+ fragmentInScopeForEachFusionLevel.put(fusionLevel, previousFusionLevelFragments.get(0));
403
+ break;
404
+ }
405
+ Element fusion = null;
406
+ if (nameComponents.get(i).getName().equals(FUSION_EL)){
407
+ fusion = nameComponents.get(i--);
408
+ }
409
+ else{
410
+ throw new StructureBuildingException("Fusion bracket not found where fusion bracket expected");
411
+ }
412
+ if (i <0 || !nameComponents.get(i).getName().equals(GROUP_EL)){
413
+ throw new StructureBuildingException("Group not found where group expected. This is probably a bug");
414
+ }
415
+ Fragment nextComponent = nameComponents.get(i).getFrag();
416
+ relabelAccordingToFusionLevel(nextComponent, fusionLevel);
417
+ int multiplier = 1;
418
+ Element possibleMultiplierEl = OpsinTools.getPreviousSibling(nameComponents.get(i));
419
+ if (possibleMultiplierEl != null && possibleMultiplierEl.getName().equals(MULTIPLIER_EL)){
420
+ multiplier = Integer.parseInt(possibleMultiplierEl.getAttributeValue(VALUE_ATR));
421
+ possibleMultiplierEl.detach();
422
+ }
423
+ List<Fragment> fusionComponents = new ArrayList<>();
424
+ for (int j = 0; j < multiplier; j++) {
425
+ if (j>0){
426
+ Fragment clonedFrag = state.fragManager.copyFragment(nextComponent);
427
+ relabelAccordingToFusionLevel(clonedFrag, j);//fusionLevels worth of primes already added
428
+ fusionComponents.add(clonedFrag);
429
+ }
430
+ else{
431
+ fusionComponents.add(nextComponent);
432
+ }
433
+ }
434
+ fusionLevel+=multiplier;
435
+ if (multiplier>1 && multiplier != previousFusionLevelFragments.size()){
436
+ throw new StructureBuildingException("Mismatch between number of components and number of parents in fused ring system");
437
+ }
438
+ String fusionDescriptorString = fusion.getValue().toLowerCase(Locale.ROOT).substring(1, fusion.getValue().length()-1);
439
+ String[] fusionDescriptors =null;
440
+ if (fusionDescriptorString.split(";").length >1){
441
+ fusionDescriptors = fusionDescriptorString.split(";");
442
+ }
443
+ else if (fusionDescriptorString.split(":").length >1){
444
+ fusionDescriptors = fusionDescriptorString.split(":");
445
+ }
446
+ else if (fusionDescriptorString.split(",").length >1){
447
+ fusionDescriptors = fusionDescriptorString.split(",");
448
+ }
449
+ else{
450
+ throw new StructureBuildingException("Invalid fusion descriptor: " + fusionDescriptorString);
451
+ }
452
+ if (fusionDescriptors.length != previousFusionLevelFragments.size()){
453
+ throw new StructureBuildingException("Invalid fusion descriptor: "+fusionDescriptorString +"(Number of locants disagrees with number of parents)");
454
+ }
455
+ for (int j = 0; j < fusionDescriptors.length; j++) {
456
+ String fusionDescriptor = fusionDescriptors[j];
457
+ Fragment component = multiplier>1 ? fusionComponents.get(j) : nextComponent;
458
+ Fragment parentToUse = previousFusionLevelFragments.get(j);
459
+ boolean simpleFusion = fusionDescriptor.split(":").length <= 1;
460
+ if (simpleFusion){
461
+ String[] fusionArray = determineNumericalAndLetterComponents(fusionDescriptor);
462
+ if (fusionArray[1].length() != 0){
463
+ int numberOfPrimes =StringTools.countTerminalPrimes(fusionArray[1]);
464
+ if (fusionArray[0].length() == 0){
465
+ fusionDescriptor = fusionArray[1].replaceAll("'", "");
466
+ }
467
+ else{
468
+ fusionDescriptor = fusionArray[0]+ "-" +fusionArray[1].replaceAll("'", "");
469
+ }
470
+ if (numberOfPrimes !=j){//check the number of primes on the letter part agree with the parent to use e.g.[4,5-bcd:1,2-c']difuran
471
+ throw new StructureBuildingException("Incorrect number of primes in fusion descriptor: " + fusionDescriptor);
472
+ }
473
+ }
474
+ performSimpleFusion(fusionDescriptor, component, parentToUse);
475
+ }
476
+ else{
477
+ performHigherOrderFusion(fusionDescriptor, component, parentToUse);
478
+ }
479
+ }
480
+ previousFusionLevelFragments = fusionComponents;
481
+ componentFragments.addAll(fusionComponents);
482
+ }
483
+ if (previousFusionLevelFragments.size()!=1){
484
+ throw new StructureBuildingException("Invalid fused ring system. Incomplete multiparent system");
485
+ }
486
+ }
487
+ return i;
488
+ }
489
+
490
+ /**
491
+ * Splits a first order fusion component into it's numerical and letter parts
492
+ * Either one of these can be the blank string as they may have been omitted
493
+ * The first entry in the array is the numbers and the second the letters
494
+ * @param fusionDescriptor
495
+ * @return
496
+ */
497
+ private String[] determineNumericalAndLetterComponents(String fusionDescriptor) {
498
+ String[] fusionArray = fusionDescriptor.split("-");
499
+ if (fusionArray.length ==2){
500
+ return fusionArray;
501
+ }
502
+ else{
503
+ String[] components = new String[2];
504
+ if (fusionArray[0].contains(",")){//the digit section
505
+ components[0]=fusionArray[0];
506
+ components[1]="";
507
+ }
508
+ else{
509
+ components[0]="";
510
+ components[1]=fusionArray[0];
511
+ }
512
+ return components;
513
+ }
514
+ }
515
+
516
+ /**
517
+ * Searches groups for benz(o) components and fuses them in accordance with
518
+ * FR-2.2.8 Heterobicyclic components with a benzene ring
519
+ * @throws StructureBuildingException
520
+ */
521
+ private void processBenzoFusions() throws StructureBuildingException {
522
+ for(int i = groupsInFusedRing.size() - 2; i >= 0; i--) {
523
+ Element group = groupsInFusedRing.get(i);
524
+ if (group.getValue().equals("benz") || group.getValue().equals("benzo")) {
525
+ Element possibleFusionbracket = OpsinTools.getNextSibling(group);
526
+ if (!possibleFusionbracket.getName().equals(FUSION_EL)) {
527
+ Element possibleMultiplier = OpsinTools.getPreviousSibling(group);
528
+ if (possibleMultiplier == null || !possibleMultiplier.getName().equals(MULTIPLIER_EL) || possibleMultiplier.getAttributeValue(TYPE_ATR).equals(GROUP_TYPE_VAL)) {
529
+ //e.g. 2-benzofuran. Fused rings of this type are a special case treated as being a single component
530
+ //and have a special convention for indicating the position of heteroatoms
531
+ benzoSpecificFusion(group, groupsInFusedRing.get(i + 1));
532
+ group.detach();
533
+ groupsInFusedRing.remove(i);
534
+ }
535
+ }
536
+ }
537
+ }
538
+ }
539
+
540
+ /**
541
+ * Modifies nextComponent's locants according to the fusionLevel.
542
+ * @param component
543
+ * @param fusionLevel
544
+ */
545
+ private void relabelAccordingToFusionLevel(Fragment component, int fusionLevel) {
546
+ if (fusionLevel > 0){
547
+ FragmentTools.relabelNumericLocants(component.getAtomList(), StringTools.multiplyString("'", fusionLevel));
548
+ }
549
+ }
550
+
551
+ /**
552
+ * Handles fusion between components where the fusion descriptor is of the form:
553
+ * comma separated locants dash letters
554
+ * e.g imidazo[4,5-d]pyridine
555
+ * The fusionDescriptor may be given as null or the letter/numerical part omitted.
556
+ * Sensible defaults will be found instead
557
+ * @param fusionDescriptor
558
+ * @param childRing
559
+ * @param parentRing
560
+ * @throws StructureBuildingException
561
+ */
562
+ private void performSimpleFusion(String fusionDescriptor, Fragment childRing, Fragment parentRing) throws StructureBuildingException {
563
+ List<String> numericalLocantsOfChild = null;
564
+ List<String> letterLocantsOfParent = null;
565
+ if (fusionDescriptor != null){
566
+ String[] fusionArray = fusionDescriptor.split("-");
567
+ if (fusionArray.length ==2){
568
+ numericalLocantsOfChild = Arrays.asList(fusionArray[0].split(","));
569
+ char[] tempLetterLocantsOfParent = fusionArray[1].toCharArray();
570
+ letterLocantsOfParent = new ArrayList<>();
571
+ for (char letterLocantOfParent : tempLetterLocantsOfParent) {
572
+ letterLocantsOfParent.add(String.valueOf(letterLocantOfParent));
573
+ }
574
+ }
575
+ else{
576
+ if (fusionArray[0].contains(",")){//only has digits
577
+ String[] numericalLocantsOfChildTemp = fusionArray[0].split(",");
578
+ numericalLocantsOfChild = Arrays.asList(numericalLocantsOfChildTemp);
579
+ }
580
+ else{//only has letters
581
+ char[] tempLetterLocantsOfParentCharArray = fusionArray[0].toCharArray();
582
+ letterLocantsOfParent = new ArrayList<>();
583
+ for (char letterLocantOfParentCharArray : tempLetterLocantsOfParentCharArray) {
584
+ letterLocantsOfParent.add(String.valueOf(letterLocantOfParentCharArray));
585
+ }
586
+ }
587
+ }
588
+ }
589
+
590
+ int edgeLength =1;
591
+ if (numericalLocantsOfChild != null){
592
+ if (numericalLocantsOfChild.size() <=1){
593
+ throw new StructureBuildingException("At least two numerical locants must be provided to perform fusion!");
594
+ }
595
+ edgeLength = numericalLocantsOfChild.size()-1;
596
+ }
597
+ else if (letterLocantsOfParent != null){
598
+ edgeLength = letterLocantsOfParent.size();
599
+ }
600
+
601
+ if (numericalLocantsOfChild == null){
602
+ numericalLocantsOfChild = findPossibleNumericalLocants(childRing, edgeLength);
603
+ }
604
+
605
+ if (letterLocantsOfParent == null){
606
+ letterLocantsOfParent = findPossibleLetterLocants(parentRing, edgeLength);
607
+ }
608
+ if (numericalLocantsOfChild == null || letterLocantsOfParent ==null){
609
+ throw new StructureBuildingException("Unable to find bond to form fused ring system. Some information for forming fused ring system was only supplyed implicitly");
610
+ }
611
+
612
+ processFirstOrderFusionDescriptors(childRing, parentRing, numericalLocantsOfChild, letterLocantsOfParent);//fuse the rings
613
+ }
614
+
615
+ /**
616
+ * Takes a ring an returns and array with one letter corresponding to a side/s
617
+ * that contains two adjacent non bridgehead carbons
618
+ * The number of sides is specified by edgeLength
619
+ * @param ring
620
+ * @param edgeLength The number of bonds to be fused along
621
+ * @return
622
+ */
623
+ private List<String> findPossibleLetterLocants(Fragment ring, int edgeLength) {
624
+ List<Integer> carbonAtomIndexes = new ArrayList<>();
625
+ int numberOfAtoms = ring.getAtomCount();
626
+ CyclicAtomList cyclicAtomList = new CyclicAtomList(ring.getAtomList());
627
+ for (int i = 0; i <= numberOfAtoms; i++) {
628
+ //iterate backwards in list to use highest locanted edge in preference.
629
+ //this retains what is currently locant 1 on the parent ring as locant 1 if the first two atoms found match
630
+ //the last atom in the list is potentially tested twice e.g. on a 6 membered ring, 6-5 and 1-6 are both possible
631
+ Atom atom = cyclicAtomList.previous();
632
+ //want non-bridgehead carbon atoms. Double-check that these carbon atoms are actually bonded (e.g. von baeyer systems have non-consecutive atom numbering!)
633
+ if (atom.getElement() == ChemEl.C && atom.getBondCount() == 2
634
+ && (carbonAtomIndexes.size() == 0 || atom.getAtomNeighbours().contains(cyclicAtomList.peekNext()))){
635
+ carbonAtomIndexes.add(cyclicAtomList.getIndex());
636
+ if (carbonAtomIndexes.size() == edgeLength + 1){//as many carbons in a row as to give that edgelength ->use these side/s
637
+ Collections.reverse(carbonAtomIndexes);
638
+ List<String> letterLocantsOfParent = new ArrayList<>();
639
+ for (int j = 0; j < edgeLength; j++) {
640
+ letterLocantsOfParent.add(String.valueOf((char)(97 + carbonAtomIndexes.get(j))));//97 is ascii for a
641
+ }
642
+ return letterLocantsOfParent;
643
+ }
644
+ }
645
+ else{
646
+ carbonAtomIndexes.clear();
647
+ }
648
+ }
649
+ return null;
650
+ }
651
+
652
+ /**
653
+ * Takes a ring and returns an array of numbers corresponding to a side/s
654
+ * that contains two adjacent non bridgehead carbons
655
+ * The number of sides is specified by edgeLength
656
+ * @param ring
657
+ * @param edgeLength The number of bonds to be fused along
658
+ * @return
659
+ */
660
+ private List<String> findPossibleNumericalLocants(Fragment ring, int edgeLength) {
661
+ List<String> carbonLocants = new ArrayList<>();
662
+ int numberOfAtoms = ring.getAtomCount();
663
+ CyclicAtomList cyclicAtomList = new CyclicAtomList(ring.getAtomList());
664
+ for (int i = 0; i <= numberOfAtoms; i++) {
665
+ //the last atom in the list is potentially tested twice e.g. on a 6 membered ring, 1-2 and 6-1 are both possible
666
+ Atom atom = cyclicAtomList.next();
667
+ //want non-bridgehead carbon atoms. Double-check that these carbon atoms are actually bonded (e.g. von baeyer systems have non-consecutive atom numbering!)
668
+ if (atom.getElement() == ChemEl.C && atom.getBondCount() == 2
669
+ && (carbonLocants.size() == 0 || atom.getAtomNeighbours().contains(cyclicAtomList.peekPrevious()))){
670
+ carbonLocants.add(atom.getFirstLocant());
671
+ if (carbonLocants.size() == edgeLength + 1){//as many carbons in a row as to give that edgelength ->use these side/s
672
+ List<String> numericalLocantsOfChild = new ArrayList<>();
673
+ for (String locant : carbonLocants) {
674
+ numericalLocantsOfChild.add(locant);
675
+ }
676
+ return numericalLocantsOfChild;
677
+ }
678
+ }
679
+ else{
680
+ carbonLocants.clear();
681
+ }
682
+ }
683
+ return null;
684
+ }
685
+
686
+ /**
687
+ * Performs a single ring fusion using the values in numericalLocantsOfChild/letterLocantsOfParent
688
+ * @param childRing
689
+ * @param parentRing
690
+ * @param numericalLocantsOfChild
691
+ * @param letterLocantsOfParent
692
+ * @throws StructureBuildingException
693
+ */
694
+ private void processFirstOrderFusionDescriptors(Fragment childRing, Fragment parentRing, List<String> numericalLocantsOfChild, List<String> letterLocantsOfParent) throws StructureBuildingException {
695
+ List<Atom> childAtoms = determineAtomsToFuse(childRing, numericalLocantsOfChild, letterLocantsOfParent.size() +1);
696
+ if (childAtoms ==null){
697
+ throw new StructureBuildingException("Malformed fusion bracket!");
698
+ }
699
+
700
+ List<Atom> parentAtoms = new ArrayList<>();
701
+ List<Atom> parentPeripheralAtomList = getPeripheralAtoms(parentRing.getAtomList());
702
+ CyclicAtomList cyclicListAtomsOnSurfaceOfParent = new CyclicAtomList(parentPeripheralAtomList, (int)letterLocantsOfParent.get(0).charAt(0) -97);//convert from lower case character through ascii to 0-23
703
+ parentAtoms.add(cyclicListAtomsOnSurfaceOfParent.getCurrent());
704
+ for (int i = 0; i < letterLocantsOfParent.size(); i++) {
705
+ parentAtoms.add(cyclicListAtomsOnSurfaceOfParent.next());
706
+ }
707
+ fuseRings(childAtoms, parentAtoms);
708
+ }
709
+
710
+ /**
711
+ * Returns the sublist of the given atoms that are peripheral atoms given that the list is ordered such that the interior atoms are at the end of the list
712
+ * @param atomList
713
+ * @return
714
+ */
715
+ private List<Atom> getPeripheralAtoms(List<Atom> atomList) {
716
+ //find the indice of the last atom on the surface of the ring. This obviously connects to the first atom. The objective is to exclude any interior atoms.
717
+ List<Atom> neighbours = atomList.get(0).getAtomNeighbours();
718
+ int indice = Integer.MAX_VALUE;
719
+ for (Atom atom : neighbours) {
720
+ int indexOfAtom =atomList.indexOf(atom);
721
+ if (indexOfAtom ==1){//not the next atom
722
+ continue;
723
+ }
724
+ else if (indexOfAtom ==-1){//not in parentRing
725
+ continue;
726
+ }
727
+ if (atomList.indexOf(atom)< indice){
728
+ indice = indexOfAtom;
729
+ }
730
+ }
731
+ return atomList.subList(0, indice +1);
732
+ }
733
+
734
+ /**
735
+ * Handles fusion between components where the fusion descriptor is of the form:
736
+ * comma separated locants colon comma separated locants
737
+ * e.g pyrido[1'',2'':1',2']imidazo
738
+ * @param fusionDescriptor
739
+ * @param nextComponent
740
+ * @param fusedRing
741
+ * @throws StructureBuildingException
742
+ */
743
+ private void performHigherOrderFusion(String fusionDescriptor, Fragment nextComponent, Fragment fusedRing) throws StructureBuildingException {
744
+ List<String> numericalLocantsOfChild = null;
745
+ List<String> numericalLocantsOfParent = null;
746
+ String[] fusionArray = fusionDescriptor.split(":");
747
+ if (fusionArray.length ==2){
748
+ numericalLocantsOfChild = Arrays.asList(fusionArray[0].split(","));
749
+ numericalLocantsOfParent = Arrays.asList(fusionArray[1].split(","));
750
+ }
751
+ else{
752
+ throw new StructureBuildingException("Malformed fusion bracket: This is an OPSIN bug, check regexTokens.xml");
753
+ }
754
+ processHigherOrderFusionDescriptors(nextComponent, fusedRing, numericalLocantsOfChild, numericalLocantsOfParent);//fuse the rings
755
+ }
756
+
757
+ /**
758
+ * Performs a single ring fusion using the values in numericalLocantsOfChild/numericalLocantsOfParent
759
+ * @param childRing
760
+ * @param parentRing
761
+ * @param numericalLocantsOfChild
762
+ * @param numericalLocantsOfParent
763
+ * @throws StructureBuildingException
764
+ */
765
+ private void processHigherOrderFusionDescriptors(Fragment childRing, Fragment parentRing, List<String> numericalLocantsOfChild, List<String> numericalLocantsOfParent) throws StructureBuildingException {
766
+ List<Atom> childAtoms =determineAtomsToFuse(childRing, numericalLocantsOfChild, null);
767
+ if (childAtoms ==null){
768
+ throw new StructureBuildingException("Malformed fusion bracket!");
769
+ }
770
+
771
+ List<Atom> parentAtoms = determineAtomsToFuse(parentRing, numericalLocantsOfParent, childAtoms.size());
772
+ if (parentAtoms ==null){
773
+ throw new StructureBuildingException("Malformed fusion bracket!");
774
+ }
775
+ fuseRings(childAtoms, parentAtoms);
776
+ }
777
+
778
+ /**
779
+ * Determines which atoms on a ring should be used for fusion given a set of numerical locants.
780
+ * If from the other ring involved in the fusion it is known how many atoms are expected to be found this should be provided
781
+ * If this is not known it should be set to null and the smallest number of fusion atoms will be returned.
782
+ * @param ring
783
+ * @param numericalLocantsOnRing
784
+ * @param expectedNumberOfAtomsToBeUsedForFusion
785
+ * @return
786
+ * @throws StructureBuildingException
787
+ */
788
+ private List<Atom> determineAtomsToFuse(Fragment ring, List<String> numericalLocantsOnRing, Integer expectedNumberOfAtomsToBeUsedForFusion) throws StructureBuildingException {
789
+ List<Atom> parentPeripheralAtomList = getPeripheralAtoms(ring.getAtomList());
790
+ String firstLocant = numericalLocantsOnRing.get(0);
791
+ String lastLocant = numericalLocantsOnRing.get(numericalLocantsOnRing.size() - 1);
792
+ int indexfirst = parentPeripheralAtomList.indexOf(ring.getAtomByLocantOrThrow(firstLocant));
793
+ if (indexfirst == -1) {
794
+ throw new StructureBuildingException(firstLocant + " refers to an atom that is not a peripheral atom!");
795
+ }
796
+ int indexfinal = parentPeripheralAtomList.indexOf(ring.getAtomByLocantOrThrow(lastLocant));
797
+ if (indexfinal == -1) {
798
+ throw new StructureBuildingException(lastLocant + " refers to an atom that is not a peripheral atom!");
799
+ }
800
+ CyclicAtomList cyclicRingAtomList = new CyclicAtomList(parentPeripheralAtomList, indexfirst);
801
+ List<Atom> fusionAtoms = null;
802
+
803
+ List<Atom> potentialFusionAtomsAscending = new ArrayList<>();
804
+ potentialFusionAtomsAscending.add(cyclicRingAtomList.getCurrent());
805
+ while (cyclicRingAtomList.getIndex() != indexfinal){//assume numbers are ascending
806
+ potentialFusionAtomsAscending.add(cyclicRingAtomList.next());
807
+ }
808
+ if (expectedNumberOfAtomsToBeUsedForFusion ==null ||expectedNumberOfAtomsToBeUsedForFusion == potentialFusionAtomsAscending.size()){
809
+ boolean notInPotentialParentAtoms =false;
810
+ for (int i =1; i < numericalLocantsOnRing.size()-1 ; i ++){
811
+ if (!potentialFusionAtomsAscending.contains(ring.getAtomByLocantOrThrow(numericalLocantsOnRing.get(i)))){
812
+ notInPotentialParentAtoms =true;
813
+ }
814
+ }
815
+ if (!notInPotentialParentAtoms){
816
+ fusionAtoms = potentialFusionAtomsAscending;
817
+ }
818
+ }
819
+
820
+ if (fusionAtoms ==null || expectedNumberOfAtomsToBeUsedForFusion ==null){//that didn't work, so try assuming the numbers are descending
821
+ cyclicRingAtomList.setIndex(indexfirst);
822
+ List<Atom> potentialFusionAtomsDescending = new ArrayList<>();
823
+ potentialFusionAtomsDescending.add(cyclicRingAtomList.getCurrent());
824
+ while (cyclicRingAtomList.getIndex() != indexfinal){//assume numbers are descending
825
+ potentialFusionAtomsDescending.add(cyclicRingAtomList.previous());
826
+ }
827
+ if (expectedNumberOfAtomsToBeUsedForFusion ==null || expectedNumberOfAtomsToBeUsedForFusion == potentialFusionAtomsDescending.size()){
828
+ boolean notInPotentialParentAtoms =false;
829
+ for (int i =1; i < numericalLocantsOnRing.size()-1 ; i ++){
830
+ if (!potentialFusionAtomsDescending.contains(ring.getAtomByLocantOrThrow(numericalLocantsOnRing.get(i)))){
831
+ notInPotentialParentAtoms =true;
832
+ }
833
+ }
834
+ if (!notInPotentialParentAtoms){
835
+ if (fusionAtoms!=null && expectedNumberOfAtomsToBeUsedForFusion ==null){
836
+ //prefer less fusion atoms
837
+ if (potentialFusionAtomsDescending.size()< fusionAtoms.size()){
838
+ fusionAtoms = potentialFusionAtomsDescending;
839
+ }
840
+ }
841
+ else{
842
+ fusionAtoms = potentialFusionAtomsDescending;
843
+ }
844
+ }
845
+ }
846
+ }
847
+ return fusionAtoms;
848
+ }
849
+
850
+ /**
851
+ * Creates the bonds required to fuse two rings together.
852
+ * The child atoms are recorded as atoms that should be removed later
853
+ * @param childAtoms
854
+ * @param parentAtoms
855
+ * @throws StructureBuildingException
856
+ */
857
+ private void fuseRings(List<Atom> childAtoms, List<Atom> parentAtoms) throws StructureBuildingException {
858
+ if (parentAtoms.size()!=childAtoms.size()){
859
+ throw new StructureBuildingException("Problem with fusion descriptors: Parent atoms specified: " + parentAtoms.size() +" Child atoms specified: " + childAtoms.size() + " These should have been identical!");
860
+ }
861
+ //replace parent atoms if the atom has already been used in fusion with the original atom
862
+ //This will occur if fusion has resulted in something resembling a spiro centre e.g. cyclopenta[1,2-b:5,1-b']bis[1,4]oxathiine
863
+ for (int i = parentAtoms.size() -1; i >=0; i--) {
864
+ if (atomsToRemoveToReplacementAtom.get(parentAtoms.get(i))!=null){
865
+ parentAtoms.set(i, atomsToRemoveToReplacementAtom.get(parentAtoms.get(i)));
866
+ }
867
+ if (atomsToRemoveToReplacementAtom.get(childAtoms.get(i))!=null){
868
+ childAtoms.set(i, atomsToRemoveToReplacementAtom.get(childAtoms.get(i)));
869
+ }
870
+ }
871
+
872
+ //sync spareValency and check that element type matches
873
+ for (int i = 0; i < childAtoms.size(); i++) {
874
+ Atom parentAtom = parentAtoms.get(i);
875
+ Atom childAtom = childAtoms.get(i);
876
+ if (childAtom.hasSpareValency()){
877
+ parentAtom.setSpareValency(true);
878
+ }
879
+ if (parentAtom.getElement() != childAtom.getElement()){
880
+ throw new StructureBuildingException("Invalid fusion descriptor: Heteroatom placement is ambiguous as it is not present in both components of the fusion");
881
+ }
882
+ atomsToRemoveToReplacementAtom.put(childAtom, parentAtom);
883
+ }
884
+
885
+ Set<Bond> fusionEdgeBonds = new HashSet<>();//these bonds already exist in both the child and parent atoms
886
+ for (int i = 0; i < childAtoms.size() -1; i++) {
887
+ fusionEdgeBonds.add(childAtoms.get(i).getBondToAtomOrThrow(childAtoms.get(i+1)));
888
+ fusionEdgeBonds.add(parentAtoms.get(i).getBondToAtomOrThrow(parentAtoms.get(i+1)));
889
+ }
890
+
891
+ Set<Bond> bondsToAddToParentAtoms = new LinkedHashSet<>();
892
+ for (Atom childAtom : childAtoms) {
893
+ for (Bond b : childAtom.getBonds()) {
894
+ if (!fusionEdgeBonds.contains(b)){
895
+ bondsToAddToParentAtoms.add(b);
896
+ }
897
+ }
898
+ }
899
+
900
+ Set<Bond> bondsToAddToChildAtoms = new LinkedHashSet<>();
901
+ for (Atom parentAtom : parentAtoms) {
902
+ for (Bond b : parentAtom.getBonds()) {
903
+ if (!fusionEdgeBonds.contains(b)){
904
+ bondsToAddToChildAtoms.add(b);
905
+ }
906
+ }
907
+ }
908
+
909
+ for (Bond bond : bondsToAddToParentAtoms) {
910
+ Atom from = bond.getFromAtom();
911
+ int indiceInChildAtoms = childAtoms.indexOf(from);
912
+ if (indiceInChildAtoms !=-1){
913
+ from = parentAtoms.get(indiceInChildAtoms);
914
+ }
915
+ Atom to = bond.getToAtom();
916
+ indiceInChildAtoms = childAtoms.indexOf(to);
917
+ if (indiceInChildAtoms !=-1){
918
+ to = parentAtoms.get(indiceInChildAtoms);
919
+ }
920
+ state.fragManager.createBond(from, to, 1);
921
+ }
922
+
923
+ for (Bond bond : bondsToAddToChildAtoms) {
924
+ Atom from = bond.getFromAtom();
925
+ int indiceInParentAtoms = parentAtoms.indexOf(from);
926
+ if (indiceInParentAtoms !=-1){
927
+ from = childAtoms.get(indiceInParentAtoms);
928
+ }
929
+ Atom to = bond.getToAtom();
930
+ indiceInParentAtoms = parentAtoms.indexOf(to);
931
+ if (indiceInParentAtoms !=-1){
932
+ to = childAtoms.get(indiceInParentAtoms);
933
+ }
934
+ Bond newBond = new Bond(from, to, 1);
935
+ if (childAtoms.contains(from)){
936
+ from.addBond(newBond);
937
+ }
938
+ else{
939
+ to.addBond(newBond);
940
+ }
941
+ }
942
+ }
943
+
944
+ /**
945
+ * Fuse the benzo with the subsequent ring
946
+ * Uses locants in front of the benz/benzo group to assign heteroatoms on the now numbered fused ring system
947
+ * @param benzoEl
948
+ * @param parentEl
949
+ * @throws StructureBuildingException
950
+ */
951
+ private void benzoSpecificFusion(Element benzoEl, Element parentEl) throws StructureBuildingException {
952
+ /*
953
+ * Perform the fusion, number it and associate it with the parentEl
954
+ */
955
+ Fragment benzoRing = benzoEl.getFrag();
956
+ Fragment parentRing = parentEl.getFrag();
957
+ performSimpleFusion(null, benzoRing , parentRing);
958
+ state.fragManager.incorporateFragment(benzoRing, parentRing);
959
+ removeMergedAtoms();
960
+ FusedRingNumberer.numberFusedRing(parentRing);//numbers the fused ring;
961
+ Fragment fusedRing =parentRing;
962
+ setBenzoHeteroatomPositioning(benzoEl, fusedRing);
963
+ }
964
+
965
+ /**
966
+ * Checks for locant(s) before benzo and uses these to set
967
+ * @param benzoEl
968
+ * @param fusedRing
969
+ * @throws StructureBuildingException
970
+ */
971
+ private void setBenzoHeteroatomPositioning(Element benzoEl, Fragment fusedRing) throws StructureBuildingException {
972
+ Element locantEl = OpsinTools.getPreviousSibling(benzoEl);
973
+ if (locantEl != null && locantEl.getName().equals(LOCANT_EL)) {
974
+ String[] locants = locantEl.getValue().split(",");
975
+ if (locantsCouldApplyToHeteroatomPositions(locants, benzoEl)) {
976
+ List<Atom> atomList =fusedRing.getAtomList();
977
+ List<Atom> heteroatoms = new ArrayList<>();
978
+ List<ChemEl> elementOfHeteroAtom = new ArrayList<>();
979
+ for (Atom atom : atomList) {//this iterates in the same order as the numbering system
980
+ if (atom.getElement() != ChemEl.C){
981
+ heteroatoms.add(atom);
982
+ elementOfHeteroAtom.add(atom.getElement());
983
+ }
984
+ }
985
+ if (locants.length == heteroatoms.size()){//as many locants as there are heteroatoms to assign
986
+ //check for special case of a single locant indicating where the group substitutes e.g. 4-benzofuran-2-yl
987
+ if (!(locants.length == 1 && OpsinTools.getPreviousSibling(locantEl) == null
988
+ && ComponentProcessor.checkLocantPresentOnPotentialRoot(state, benzoEl.getParent(), locants[0]))) {
989
+ for (Atom atom : heteroatoms) {
990
+ atom.setElement(ChemEl.C);
991
+ }
992
+ for (int i=0; i< heteroatoms.size(); i++) {
993
+ fusedRing.getAtomByLocantOrThrow(locants[i]).setElement(elementOfHeteroAtom.get(i));
994
+ }
995
+ locantEl.detach();
996
+ }
997
+ }
998
+ else if (locants.length > 1){
999
+ throw new StructureBuildingException("Unable to assign all locants to benzo-fused ring or multiplier was mising");
1000
+ }
1001
+ }
1002
+ }
1003
+ }
1004
+
1005
+ private boolean locantsCouldApplyToHeteroatomPositions(String[] locants, Element benzoEl) {
1006
+ if (!locantsAreAllNumeric(locants)) {
1007
+ return false;
1008
+ }
1009
+ List<Element> suffixes = benzoEl.getParent().getChildElements(SUFFIX_EL);
1010
+ int suffixesWithoutLocants = 0;
1011
+ for (Element suffix : suffixes) {
1012
+ if (suffix.getAttribute(LOCANT_ATR)==null){
1013
+ suffixesWithoutLocants++;
1014
+ }
1015
+ }
1016
+ if (locants.length == suffixesWithoutLocants){//In preference locants will be assigned to suffixes rather than to this nomenclature
1017
+ return false;
1018
+ }
1019
+ return true;
1020
+ }
1021
+
1022
+ private boolean locantsAreAllNumeric(String[] locants) {
1023
+ for (String locant : locants) {
1024
+ if (!MATCH_NUMERIC_LOCANT.matcher(locant).matches()){
1025
+ return false;
1026
+ }
1027
+ }
1028
+ return true;
1029
+ }
1030
+ }
TransAntivirus/download_pubchem/opsin-master/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/FusedRingNumberer.java ADDED
@@ -0,0 +1,1849 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ package uk.ac.cam.ch.wwmm.opsin;
2
+
3
+ import java.util.ArrayList;
4
+ import java.util.Collections;
5
+ import java.util.Comparator;
6
+ import java.util.EnumMap;
7
+ import java.util.HashMap;
8
+ import java.util.LinkedHashMap;
9
+ import java.util.List;
10
+ import java.util.Map;
11
+ import java.util.Map.Entry;
12
+
13
+ import org.apache.logging.log4j.LogManager;
14
+ import org.apache.logging.log4j.Logger;
15
+
16
+ /**
17
+ * Numbers fusedRings
18
+ * @author aa593
19
+ * @author dl387
20
+ *
21
+ */
22
+ class FusedRingNumberer {
23
+
24
+ private static final Logger LOG = LogManager.getLogger(FusedRingNumberer.class);
25
+ private static class RingConnectivityTable {
26
+ final List<RingShape> ringShapes = new ArrayList<>();
27
+ final List<Ring> neighbouringRings = new ArrayList<>();
28
+ final List<Integer> directionFromRingToNeighbouringRing = new ArrayList<>();
29
+ final List<Ring> usedRings = new ArrayList<>();
30
+
31
+ RingConnectivityTable copy(){
32
+ RingConnectivityTable copy = new RingConnectivityTable();
33
+ copy.ringShapes.addAll(ringShapes);
34
+ copy.neighbouringRings.addAll(neighbouringRings);
35
+ copy.directionFromRingToNeighbouringRing.addAll(directionFromRingToNeighbouringRing);
36
+ copy.usedRings.addAll(usedRings);
37
+ return copy;
38
+ }
39
+ }
40
+
41
+ /**
42
+ * Wrapper for a ring of a fused ring system with the shape that ring is currently being treated as having
43
+ * @author dl387
44
+ *
45
+ */
46
+ private static class RingShape{
47
+ private final Ring ring;
48
+ private final FusionRingShape shape;
49
+ public RingShape(Ring ring, FusionRingShape shape) {
50
+ this.ring = ring;
51
+ this.shape = shape;
52
+ }
53
+ Ring getRing() {
54
+ return ring;
55
+ }
56
+ FusionRingShape getShape() {
57
+ return shape;
58
+ }
59
+ }
60
+
61
+ enum FusionRingShape {
62
+ enterFromLeftHouse,//5 membered ring
63
+ enterFromTopLeftHouse,//5 membered ring
64
+ enterFromTopRightHouse,//5 membered ring
65
+ enterFromRightHouse,//5 membered ring
66
+ enterFromLeftSevenMembered,//7 membered ring
67
+ enterFromTopSevenMembered,//7 membered ring
68
+ enterFromRightSevenMembered,//7 membered ring
69
+ enterFromBottomRightSevenMembered,//7 membered ring
70
+ enterFromBottomLeftSevenMembered,//7 membered ring
71
+ standard
72
+ }
73
+
74
+ private static class Chain {
75
+ private final int length;
76
+ private final int startingX;
77
+ private final int y;
78
+
79
+ Chain(int length, int startingX, int y) {
80
+ this.length = length;
81
+ this.startingX = startingX;
82
+ this.y = y;
83
+ }
84
+
85
+ int getLength() {
86
+ return length;
87
+ }
88
+ int getStartingX() {
89
+ return startingX;
90
+ }
91
+ int getY() {
92
+ return y;
93
+ }
94
+ }
95
+
96
+ /**
97
+ * Sorts by atomSequences by the IUPAC rules for determining the preferred labelling
98
+ * The most preferred will be sorted to the back (0th position)
99
+ * @author dl387
100
+ *
101
+ */
102
+ private static class SortAtomSequences implements Comparator<List<Atom>> {
103
+
104
+ public int compare(List<Atom> sequenceA, List<Atom> sequenceB){
105
+ if (sequenceA.size() != sequenceB.size()){
106
+ //Error in fused ring building. Identified ring sequences not the same lengths!
107
+ return 0;
108
+ }
109
+
110
+ int i=0;
111
+ int j=0;
112
+ //Give low numbers for the heteroatoms as a set.
113
+ while(i < sequenceA.size()){
114
+ Atom atomA=sequenceA.get(i);
115
+ boolean isAaHeteroatom = atomA.getElement() != ChemEl.C;
116
+
117
+
118
+ //bridgehead carbon do not increment numbering
119
+ if (!isAaHeteroatom && atomA.getBondCount()>=3){
120
+ i++;
121
+ continue;
122
+ }
123
+
124
+ Atom atomB=sequenceB.get(j);
125
+ boolean isBaHeteroatom =atomB.getElement() != ChemEl.C;
126
+ if (!isBaHeteroatom && atomB.getBondCount()>=3){
127
+ j++;
128
+ continue;
129
+ }
130
+
131
+ if (isAaHeteroatom && !isBaHeteroatom){
132
+ return -1;
133
+ }
134
+ if (isBaHeteroatom && !isAaHeteroatom){
135
+ return 1;
136
+ }
137
+ i++;j++;
138
+ }
139
+
140
+ i=0;
141
+ j=0;
142
+ //Give low numbers for heteroatoms when considered in the order: O, S, Se, Te, N, P, As, Sb, Bi, Si, Ge, Sn, Pb, B, Hg
143
+ while(i < sequenceA.size()){
144
+ Atom atomA=sequenceA.get(i);
145
+
146
+ //bridgehead carbon do not increment numbering
147
+ if (atomA.getElement() == ChemEl.C && atomA.getBondCount()>=3){
148
+ i++;
149
+ continue;
150
+ }
151
+
152
+ Atom atomB=sequenceB.get(j);
153
+ if (atomB.getElement() == ChemEl.C && atomB.getBondCount()>=3){
154
+ j++;
155
+ continue;
156
+ }
157
+
158
+ Integer heteroAtomPriorityA = heteroAtomValues.get(atomA.getElement());
159
+ int atomAElementValue = heteroAtomPriorityA != null ? heteroAtomPriorityA : 0;
160
+
161
+ Integer heteroAtomPriorityB = heteroAtomValues.get(atomB.getElement());
162
+ int atomBElementValue = heteroAtomPriorityB != null ? heteroAtomPriorityB : 0;
163
+
164
+ if (atomAElementValue > atomBElementValue){
165
+ return -1;
166
+ }
167
+ if (atomAElementValue < atomBElementValue){
168
+ return 1;
169
+ }
170
+ i++;j++;
171
+ }
172
+
173
+ //Give low numbers to fusion carbon atoms.
174
+ for ( i = 0; i < sequenceA.size(); i++) {
175
+ Atom atomA=sequenceA.get(i);
176
+ Atom atomB=sequenceB.get(i);
177
+ if (atomA.getBondCount()>=3 && atomA.getElement() == ChemEl.C){
178
+ if (!(atomB.getBondCount()>=3 && atomB.getElement() == ChemEl.C)){
179
+ return -1;
180
+ }
181
+ }
182
+ if (atomB.getBondCount()>=3 && atomB.getElement() == ChemEl.C){
183
+ if (!(atomA.getBondCount()>=3 && atomA.getElement() == ChemEl.C)){
184
+ return 1;
185
+ }
186
+ }
187
+ }
188
+ //Note that any sequences still unsorted at this step will have fusion carbon atoms in the same places
189
+ //which means you can go through both sequences without constantly looking for fusion carbons i.e. the variable j is no longer needed
190
+
191
+ //Give low numbers to fusion rather than non-fusion atoms of the same heteroelement.
192
+ for (i = 0; i < sequenceA.size(); i++) {
193
+ Atom atomA=sequenceA.get(i);
194
+ Atom atomB=sequenceB.get(i);
195
+ if (atomA.getBondCount()>=3){
196
+ if (!(atomB.getBondCount()>=3)){
197
+ return -1;
198
+ }
199
+ }
200
+ if (atomB.getBondCount()>=3){
201
+ if (!(atomA.getBondCount()>=3)){
202
+ return 1;
203
+ }
204
+ }
205
+ }
206
+ //TODO consider heteroatoms FR5.4d
207
+ return 0;
208
+ }
209
+ }
210
+
211
+ private static final Map<ChemEl, Integer> heteroAtomValues = new EnumMap<>(ChemEl.class);
212
+ static{
213
+ //unknown heteroatoms or carbon are given a value of 0
214
+ heteroAtomValues.put(ChemEl.Hg, 2);
215
+ heteroAtomValues.put(ChemEl.Tl, 3);
216
+ heteroAtomValues.put(ChemEl.In, 4);
217
+ heteroAtomValues.put(ChemEl.Ga, 5);
218
+ heteroAtomValues.put(ChemEl.Al, 6);
219
+ heteroAtomValues.put(ChemEl.B, 7);
220
+ heteroAtomValues.put(ChemEl.Pb, 8);
221
+ heteroAtomValues.put(ChemEl.Sn, 9);
222
+ heteroAtomValues.put(ChemEl.Ge, 10);
223
+ heteroAtomValues.put(ChemEl.Si, 11);
224
+ heteroAtomValues.put(ChemEl.Bi, 12);
225
+ heteroAtomValues.put(ChemEl.Sb, 13);
226
+ heteroAtomValues.put(ChemEl.As, 14);
227
+ heteroAtomValues.put(ChemEl.P, 15);
228
+ heteroAtomValues.put(ChemEl.N, 16);
229
+ heteroAtomValues.put(ChemEl.Te, 17);
230
+ heteroAtomValues.put(ChemEl.Se, 18);
231
+ heteroAtomValues.put(ChemEl.S, 19);
232
+ heteroAtomValues.put(ChemEl.O, 20);
233
+ heteroAtomValues.put(ChemEl.I, 21);
234
+ heteroAtomValues.put(ChemEl.Br, 22);
235
+ heteroAtomValues.put(ChemEl.Cl, 23);
236
+ heteroAtomValues.put(ChemEl.F, 24);
237
+ }
238
+ /*
239
+ * The meaning of the integers used is as follows:
240
+ * 2
241
+ * 3 ^ 1
242
+ * \ | /
243
+ * +-4 <- -> 0
244
+ * / | \
245
+ * -3 v -1
246
+ * -2
247
+ *
248
+ * They indicate the relative directions between rings
249
+ * Possibly enums should be used...
250
+ */
251
+
252
+ /**
253
+ * Numbers the fused ring
254
+ * Works reliably for all common ring systems.
255
+ * Some complex fused ring systems involving multiple connections to rings with an odd number of edges may still be wrong
256
+ * @param fusedRing
257
+ * @throws StructureBuildingException
258
+ */
259
+ static void numberFusedRing(Fragment fusedRing) throws StructureBuildingException {
260
+ List<Ring> rings = SSSRFinder.getSetOfSmallestRings(fusedRing);
261
+ if (rings.size() <2) {
262
+ throw new StructureBuildingException("Ring perception system found less than 2 rings within input fragment!");
263
+ }
264
+ List<Atom> atomList = fusedRing.getAtomList();
265
+ setupAdjacentFusedRingProperties(rings);
266
+ if (!checkRingApplicability(rings)) {
267
+ for (Atom atom : atomList) {
268
+ atom.clearLocants();
269
+ }
270
+ return;
271
+ }
272
+ List<List<Atom>> atomSequences = determinePossiblePeripheryAtomOrders(rings, atomList.size());
273
+ if (atomSequences.size()==0){
274
+ for (Atom atom : atomList) {
275
+ atom.clearLocants();
276
+ }
277
+ return;
278
+ }
279
+
280
+ // add missing atoms to each path
281
+ for (List<Atom> path : atomSequences) {//TODO properly support interior atom labelling
282
+ for(Atom atom : atomList) {
283
+ if(!path.contains(atom)) {
284
+ path.add(atom);
285
+ }
286
+ }
287
+ }
288
+ // find the preferred numbering scheme then relabel with this scheme
289
+ Collections.sort(atomSequences, new SortAtomSequences());
290
+ FragmentTools.relabelLocantsAsFusedRingSystem(atomSequences.get(0));
291
+ fusedRing.reorderAtomCollection(atomSequences.get(0));
292
+ }
293
+
294
+ /**
295
+ * Populates rings with their neighbouring fused rings and the bonds involved
296
+ * @param rings
297
+ */
298
+ static void setupAdjacentFusedRingProperties(List<Ring> rings){
299
+ for (int i = 0, l = rings.size(); i < l; i++) {
300
+ Ring curRing = rings.get(i);
301
+ bondLoop : for (Bond bond : curRing.getBondList()) { // go through all the bonds for the current ring
302
+ for (int j = i + 1; j < l; j++) {
303
+ Ring otherRing = rings.get(j);
304
+ if (otherRing.getBondList().contains(bond)) { // check if this bond belongs to any other ring
305
+ otherRing.addNeighbour(bond, curRing);
306
+ curRing.addNeighbour(bond, otherRing); // if so, then associate the bond with the adjacent ring
307
+ continue bondLoop;
308
+ }
309
+ }
310
+ }
311
+ }
312
+ }
313
+
314
+ /**
315
+ * Checks that all the rings are of sizes 3-8 or if larger than 8 are involved in 2 or fewer fused bonds
316
+ * @param rings
317
+ * @return
318
+ */
319
+ private static boolean checkRingApplicability(List<Ring> rings) {
320
+ for (Ring ring : rings) {
321
+ if (ring.size() <=2){
322
+ throw new RuntimeException("Invalid ring size: " +ring.size());
323
+ }
324
+ if (ring.size() >8 && ring.getNumberOfFusedBonds() > 2){
325
+ return false;
326
+ }
327
+ }
328
+ return true;
329
+ }
330
+
331
+ /**
332
+ * Returns possible enumerations of atoms. Currently Interior atoms are not considered.
333
+ * These enumerations will be compliant with rules FR5.1-FR5.3 of the fused ring nomenclature guidelines
334
+ * http://www.chem.qmul.ac.uk/iupac/fusedring/FR51.html
335
+ * @param rings
336
+ * @param atomCountOfFusedRingSystem
337
+ * @return
338
+ * @throws StructureBuildingException
339
+ */
340
+ private static List<List<Atom>> determinePossiblePeripheryAtomOrders(List<Ring> rings, int atomCountOfFusedRingSystem) throws StructureBuildingException {
341
+ List<Ring> tRings = findTerminalRings(rings);
342
+ if (tRings.size()<1) {
343
+ throw new RuntimeException("OPSIN bug: Unable to find a terminal ring in fused ring system");
344
+ }
345
+ Ring tRing = tRings.get(0);
346
+ Bond b1 = getStartingNonFusedBond(tRing);
347
+ if(b1 == null) {
348
+ throw new RuntimeException("OPSIN Bug: Non-fused bond from terminal ring not found");
349
+ }
350
+
351
+ List<RingConnectivityTable> cts = new ArrayList<>();
352
+ RingConnectivityTable startingCT = new RingConnectivityTable();
353
+ cts.add(startingCT);
354
+ buildRingConnectionTables(tRing, null, 0, b1, b1.getFromAtom(), startingCT, cts);
355
+ //The preference against fusion to elongated edges is built into the construction of the ring table
356
+
357
+ /* FR 5.1.1/FR 5.1.2 Preferred shapes preferred to distorted shapes */
358
+ removeCTsWithDistortedRingShapes(cts);
359
+ //TODO better implement the corner cases of FR 5.1.3-5.1.5
360
+
361
+ /* FR-5.2a. Maximum number of rings in a horizontal row */
362
+ Map<RingConnectivityTable, List<Integer>> horizonalRowDirections = findLongestChainDirections(cts);
363
+ List<Ring[][]> ringMaps = createRingMapsAlignedAlongGivenhorizonalRowDirections(horizonalRowDirections);
364
+ /* FR-5.2b-d */
365
+ return findPossiblePaths(ringMaps, atomCountOfFusedRingSystem);
366
+ }
367
+
368
+ /**
369
+ * Finds the rings with the minimum number of fused bonds
370
+ * @param rings
371
+ * @return
372
+ */
373
+ private static List<Ring> findTerminalRings(List<Ring> rings) {
374
+ List<Ring> tRings = new ArrayList<>();
375
+
376
+ int minFusedBonds = Integer.MAX_VALUE;
377
+ for (Ring ring : rings){
378
+ if (ring.getNumberOfFusedBonds() < minFusedBonds) {
379
+ minFusedBonds = ring.getNumberOfFusedBonds();
380
+ }
381
+ }
382
+
383
+ for (Ring ring : rings){
384
+ if (ring.getNumberOfFusedBonds() == minFusedBonds) {
385
+ tRings.add(ring);
386
+ }
387
+ }
388
+ return tRings;
389
+ }
390
+
391
+ /**
392
+ * Recursive function to create the connectivity table of the rings, for each connection includes both directions
393
+ * @param currentRing
394
+ * @param previousRing
395
+ * @param previousDir
396
+ * @param previousBond
397
+ * @param atom
398
+ * @param ct
399
+ * @param cts
400
+ * @return
401
+ */
402
+ private static List<RingConnectivityTable> buildRingConnectionTables(Ring currentRing, Ring previousRing, int previousDir, Bond previousBond, Atom atom, RingConnectivityTable ct, List<RingConnectivityTable> cts) {
403
+ // order atoms and bonds in the ring
404
+ currentRing.makeCyclicLists(previousBond, atom);
405
+ List<RingConnectivityTable> generatedCts = new ArrayList<>();
406
+ List<FusionRingShape> allowedShapes = getAllowedShapesForRing(currentRing, previousBond);
407
+ if (allowedShapes.size() == 0) {
408
+ throw new RuntimeException("OPSIN limitation, unsupported ring size in fused ring numbering");
409
+ }
410
+ ct.usedRings.add(currentRing);
411
+ for (int i = allowedShapes.size() - 1; i >=0; i--) {
412
+ FusionRingShape fusionRingShape = allowedShapes.get(i);
413
+ RingConnectivityTable currentCT;
414
+ if (i==0) {
415
+ currentCT = ct;
416
+ }
417
+ else{
418
+ currentCT = ct.copy();
419
+ cts.add(currentCT);
420
+ generatedCts.add(currentCT);
421
+ }
422
+ RingShape ringShape = new RingShape(currentRing, fusionRingShape);
423
+ List<RingConnectivityTable> ctsToExpand = new ArrayList<>();
424
+ ctsToExpand.add(currentCT);//all the cts to consider, the currentCT and generated clones
425
+ for (Ring neighbourRing : currentRing.getNeighbours()) {
426
+ //find the directions between the current ring and all neighbouring rings including the previous ring
427
+ // this means that the direction to the previous ring will then be known in both directions
428
+
429
+ // find direction
430
+ Bond currentBond = findFusionBond(currentRing, neighbourRing);
431
+
432
+ int dir = 0;
433
+ if (neighbourRing == previousRing) {
434
+ dir = getOppositeDirection(previousDir);
435
+ }
436
+ else {
437
+ dir = calculateRingDirection(ringShape, previousBond, currentBond, previousDir);
438
+ }
439
+ //System.out.println(currentRing +"|" +neighbourRing +"|" +dir +"|" +(neighbourRing==previousRing));
440
+
441
+ // place into connectivity table, like graph, rings and their connection
442
+ for (RingConnectivityTable ctToExpand : ctsToExpand) {
443
+ ctToExpand.ringShapes.add(ringShape);
444
+ ctToExpand.neighbouringRings.add(neighbourRing);
445
+ ctToExpand.directionFromRingToNeighbouringRing.add(dir);
446
+ }
447
+ if (!currentCT.usedRings.contains(neighbourRing)) {
448
+ List<RingConnectivityTable> newCts = new ArrayList<>();
449
+ for (RingConnectivityTable ctToExpand : ctsToExpand) {
450
+ Atom a = getAtomFromBond(currentRing, currentBond);
451
+ List<RingConnectivityTable> generatedDownStreamCts = buildRingConnectionTables(neighbourRing, currentRing, dir, currentBond, a, ctToExpand, cts);
452
+ newCts.addAll(generatedDownStreamCts);
453
+ }
454
+ ctsToExpand.addAll(newCts);
455
+ generatedCts.addAll(newCts);
456
+ }
457
+ }
458
+ }
459
+ return generatedCts;
460
+ }
461
+
462
+ /**
463
+ * Returns the allowed shapes for the given ring.
464
+ * The starting bond is required to assured that elongated bonds do not unnecesarily correspond to fusions
465
+ * Currently only 5 membered rings are considered in multiple orientations but the same
466
+ * is probably required for 7+ member rings
467
+ * @param ring
468
+ * @param startingBond
469
+ * @return
470
+ */
471
+ private static List<FusionRingShape> getAllowedShapesForRing(Ring ring, Bond startingBond) {
472
+ List<FusionRingShape> allowedRingShapes = new ArrayList<>();
473
+ int size = ring.size();
474
+ if (size==5){
475
+ List<Bond> fusedBonds = ring.getFusedBonds();
476
+ int fusedBondCount = fusedBonds.size();
477
+ if (fusedBondCount==1){
478
+ allowedRingShapes.add(FusionRingShape.enterFromLeftHouse);
479
+ }
480
+ else if (fusedBondCount==2 || fusedBondCount==3 || fusedBondCount==4){
481
+ List<Integer> distances = new ArrayList<>();//one distance is likely to be 0
482
+ for (Bond fusedBond : fusedBonds) {
483
+ distances.add(calculateDistanceBetweenBonds(startingBond, fusedBond, ring));
484
+ }
485
+ if (!distances.contains(1)){
486
+ allowedRingShapes.add(FusionRingShape.enterFromLeftHouse);
487
+ }
488
+ if (!distances.contains(4)){
489
+ allowedRingShapes.add(FusionRingShape.enterFromRightHouse);
490
+ }
491
+
492
+ if (!distances.contains(2)){
493
+ allowedRingShapes.add(FusionRingShape.enterFromTopLeftHouse);
494
+ }
495
+ else if (!distances.contains(3)){
496
+ allowedRingShapes.add(FusionRingShape.enterFromTopRightHouse);
497
+ }
498
+ allowedRingShapes = removeDegenerateRingShapes(allowedRingShapes, distances, 5);
499
+ }
500
+ else if (fusedBondCount==5){
501
+ allowedRingShapes.add(FusionRingShape.enterFromLeftHouse);
502
+ allowedRingShapes.add(FusionRingShape.enterFromRightHouse);
503
+ //top left and top right are the same other than position of the elongated bond which will invariably be used anyway
504
+ allowedRingShapes.add(FusionRingShape.enterFromTopLeftHouse);
505
+ }
506
+ }
507
+ else if (size==7){
508
+ List<Bond> fusedBonds = ring.getFusedBonds();
509
+ int fusedBondCount = fusedBonds.size();
510
+ if (fusedBondCount==1){
511
+ allowedRingShapes.add(FusionRingShape.enterFromLeftSevenMembered);
512
+ }
513
+ else{
514
+ List<Integer> distances = new ArrayList<>();//one distance is likely to be 0
515
+ for (Bond fusedBond : fusedBonds) {
516
+ distances.add(calculateDistanceBetweenBonds(startingBond, fusedBond, ring));
517
+ }
518
+ if (!distances.contains(4) && !distances.contains(6)){
519
+ allowedRingShapes.add(FusionRingShape.enterFromLeftSevenMembered);
520
+ }
521
+ if (!distances.contains(1) && !distances.contains(6)){
522
+ allowedRingShapes.add(FusionRingShape.enterFromTopSevenMembered);
523
+ }
524
+ if (!distances.contains(1) && !distances.contains(3)){
525
+ allowedRingShapes.add(FusionRingShape.enterFromRightSevenMembered);
526
+ }
527
+ if (!distances.contains(2) && !distances.contains(4)){
528
+ allowedRingShapes.add(FusionRingShape.enterFromBottomRightSevenMembered);
529
+ }
530
+ if (!distances.contains(3) && !distances.contains(5)){
531
+ allowedRingShapes.add(FusionRingShape.enterFromBottomLeftSevenMembered);
532
+ }
533
+ allowedRingShapes = removeDegenerateRingShapes(allowedRingShapes, distances, 7);
534
+ }
535
+ }
536
+ else{
537
+ allowedRingShapes.add(FusionRingShape.standard);
538
+ }
539
+ return allowedRingShapes;
540
+ }
541
+
542
+ /**
543
+ * Removes the ring shapes that for given distances have identical properties
544
+ * @param allowedRingShapes
545
+ * @param distances
546
+ * @param ringSize
547
+ */
548
+ private static List<FusionRingShape> removeDegenerateRingShapes(List<FusionRingShape> allowedRingShapes, List<Integer> distances, int ringSize) {
549
+ distances = new ArrayList<>(distances);
550
+ distances.remove((Integer)0);//remove distance 0 if present, this invariably comes from the starting bond and is not of interest (and breaks getDirectionFromDist)
551
+ for (int i = allowedRingShapes.size() - 1; i >=0; i--) {
552
+ FusionRingShape shapeToConsiderRemoving = allowedRingShapes.get(i);
553
+ for (int j = i - 1; j >=0; j--) {
554
+ FusionRingShape shapeToCompareWith = allowedRingShapes.get(j);
555
+ boolean foundDifference = false;
556
+ for (Integer distance : distances) {
557
+ if (getDirectionFromDist(shapeToConsiderRemoving, ringSize, distance) != getDirectionFromDist(shapeToCompareWith, ringSize, distance)){
558
+ foundDifference = true;
559
+ break;
560
+ }
561
+ }
562
+ if (!foundDifference){
563
+ allowedRingShapes.remove(i);
564
+ break;
565
+ }
566
+ }
567
+ }
568
+
569
+ return allowedRingShapes;
570
+ }
571
+
572
+ /**
573
+ * Calculates the direction of the next ring according to the distance between fusion bonds and the previous direction
574
+ * @param ringShape
575
+ * @param previousBond
576
+ * @param currentBond
577
+ * @param previousDir
578
+ * @return
579
+ */
580
+ private static int calculateRingDirection(RingShape ringShape, Bond previousBond, Bond currentBond, int previousDir) {
581
+ // take the ring fused to one from the previous loop step
582
+ Ring ring = ringShape.getRing();
583
+ if (ring.getCyclicBondList() == null ) {
584
+ throw new RuntimeException("OPSIN bug: cyclic bond set should have already been populated");
585
+ }
586
+
587
+ int dist = calculateDistanceBetweenBonds(previousBond, currentBond, ring);
588
+
589
+ if (dist == 0) {
590
+ throw new RuntimeException("OPSIN bug: Distance between bonds is equal to 0");
591
+ }
592
+
593
+ int relativeDir = getDirectionFromDist(ringShape.getShape(), ring.size(), dist);
594
+ return determineAbsoluteDirectionUsingPreviousDirection(ringShape.getShape(), ring.size(), relativeDir, previousDir);
595
+ }
596
+
597
+ /**
598
+ * Given two bonds on a ring returns the distance (in bonds) between them
599
+ * @param bond1
600
+ * @param bond2
601
+ * @param ring
602
+ * @return
603
+ */
604
+ private static int calculateDistanceBetweenBonds(Bond bond1, Bond bond2, Ring ring) {
605
+ List<Bond> cyclicBondList =ring.getCyclicBondList();
606
+ int previousBondIndice = cyclicBondList.indexOf(bond1);
607
+ int currentBondIndice = cyclicBondList.indexOf(bond2);
608
+ if (previousBondIndice==-1 || currentBondIndice==-1){
609
+ throw new RuntimeException("OPSIN bug: previous and current bond were not present in the cyclic bond list of the current ring");
610
+ }
611
+ int ringSize =ring.size();
612
+ int dist = (ringSize + currentBondIndice - previousBondIndice) % ringSize;
613
+ return dist;
614
+ }
615
+
616
+ /**
617
+ * Uses the ring shape, the ring size and distance between the incoming and outgoing fused bond to determine
618
+ * the relative direction between the entry point on the ring and the exit point
619
+ * @param fusionRingShape
620
+ * @param ringSize
621
+ * @param dist
622
+ * @return
623
+ */
624
+ private static int getDirectionFromDist(FusionRingShape fusionRingShape, int ringSize, int dist) {
625
+ int dir=0;
626
+ if (ringSize == 3) { // 3 member ring
627
+ if (dist == 1) {
628
+ dir = -1;
629
+ }
630
+ else if (dist == 2) {
631
+ dir = 1;
632
+ }
633
+ else throw new RuntimeException("Impossible distance between bonds for a 3 membered ring");
634
+ }
635
+ else if (ringSize == 4) { // 4 member ring
636
+ if (dist ==1) {
637
+ dir = -2;
638
+ }
639
+ else if (dist == 2) {
640
+ dir = 0;
641
+ }
642
+ else if (dist ==3) {
643
+ dir = 2;
644
+ }
645
+ else throw new RuntimeException("Impossible distance between bonds for a 4 membered ring");
646
+ }
647
+ else if (ringSize == 5) { // 5 member ring
648
+ switch (fusionRingShape) {
649
+ case enterFromLeftHouse:
650
+ if (dist ==1){
651
+ dir = -2;//fusion to an elongated bond
652
+ }
653
+ else if (dist ==2){
654
+ dir = 0;
655
+ }
656
+ else if (dist ==3){
657
+ dir = 1;
658
+ }
659
+ else if (dist ==4){
660
+ dir = 3;
661
+ }
662
+ else {
663
+ throw new RuntimeException("Impossible distance between bonds for a 5 membered ring");
664
+ }
665
+ break;
666
+ case enterFromTopLeftHouse:
667
+ if (dist ==1){
668
+ dir = -3;
669
+ }
670
+ else if (dist ==2){
671
+ dir = -1;//fusion to an elongated bond
672
+ }
673
+ else if (dist ==3){
674
+ dir = 1;
675
+ }
676
+ else if (dist ==4){
677
+ dir = 3;
678
+ }
679
+ else {
680
+ throw new RuntimeException("Impossible distance between bonds for a 5 membered ring");
681
+ }
682
+ break;
683
+ case enterFromTopRightHouse:
684
+ if (dist ==1){
685
+ dir = -3;
686
+ }
687
+ else if (dist ==2){
688
+ dir = -1;
689
+ }
690
+ else if (dist ==3){
691
+ dir = 1;//fusion to an elongated bond
692
+ }
693
+ else if (dist ==4){
694
+ dir = 3;
695
+ }
696
+ else {
697
+ throw new RuntimeException("Impossible distance between bonds for a 5 membered ring");
698
+ }
699
+ break;
700
+ case enterFromRightHouse:
701
+ if (dist ==1){
702
+ dir = -3;
703
+ }
704
+ else if (dist ==2){
705
+ dir = -1;
706
+ }
707
+ else if (dist ==3){
708
+ dir = 0;
709
+ }
710
+ else if (dist ==4){
711
+ dir = 2;//fusion to an elongated bond
712
+ }
713
+ else {
714
+ throw new RuntimeException("Impossible distance between bonds for a 5 membered ring");
715
+ }
716
+ break;
717
+ default :
718
+ throw new RuntimeException("OPSIN Bug: Unrecognised fusion ring shape for 5 membered ring");
719
+ }
720
+ }
721
+ else if (ringSize == 7) { // 7 member ring
722
+ switch (fusionRingShape) {
723
+ case enterFromLeftSevenMembered:
724
+ if (dist ==1){
725
+ dir = -3;
726
+ }
727
+ else if (dist ==2){
728
+ dir = -1;
729
+ }
730
+ else if (dist ==3){
731
+ dir = 0;
732
+ }
733
+ else if (dist ==4){
734
+ dir = 1;//fusion to an abnormally angled bond
735
+ }
736
+ else if (dist ==5){
737
+ dir = 2;
738
+ }
739
+ else if (dist ==6){
740
+ dir = 3;//fusion to an abnormally angled bond
741
+ }
742
+ else {
743
+ throw new RuntimeException("Impossible distance between bonds for a 7 membered ring");
744
+ }
745
+ break;
746
+ case enterFromTopSevenMembered:
747
+ if (dist ==1){
748
+ dir = -3;//fusion to an abnormally angled bond
749
+ }
750
+ else if (dist ==2){
751
+ dir = -2;
752
+ }
753
+ else if (dist ==3){
754
+ dir = -1;
755
+ }
756
+ else if (dist ==4){
757
+ dir = 1;
758
+ }
759
+ else if (dist ==5){
760
+ dir = 2;
761
+ }
762
+ else if (dist ==6){
763
+ dir = 3;//fusion to an abnormally angled bond
764
+ }
765
+ else {
766
+ throw new RuntimeException("Impossible distance between bonds for a 7 membered ring");
767
+ }
768
+ break;
769
+ case enterFromRightSevenMembered:
770
+ if (dist ==1){
771
+ dir = -3;//fusion to an abnormally angled bond
772
+ }
773
+ else if (dist ==2){
774
+ dir = -2;
775
+ }
776
+ else if (dist ==3){
777
+ dir = -1;//fusion to an abnormally angled bond
778
+ }
779
+ else if (dist ==4){
780
+ dir = 0;
781
+ }
782
+ else if (dist ==5){
783
+ dir = 1;
784
+ }
785
+ else if (dist ==6){
786
+ dir = 3;
787
+ }
788
+ else {
789
+ throw new RuntimeException("Impossible distance between bonds for a 7 membered ring");
790
+ }
791
+ break;
792
+ case enterFromBottomRightSevenMembered:
793
+ if (dist ==1){
794
+ dir = -3;
795
+ }
796
+ else if (dist ==2){
797
+ dir = -2;//fusion to an abnormally angled bond
798
+ }
799
+ else if (dist ==3){
800
+ dir = -1;
801
+ }
802
+ else if (dist ==4){
803
+ dir = 0;//fusion to an abnormally angled bond
804
+ }
805
+ else if (dist ==5){
806
+ dir = 1;
807
+ }
808
+ else if (dist ==6){
809
+ dir = 3;
810
+ }
811
+ else {
812
+ throw new RuntimeException("Impossible distance between bonds for a 7 membered ring");
813
+ }
814
+ break;
815
+ case enterFromBottomLeftSevenMembered:
816
+ if (dist ==1){
817
+ dir = -3;
818
+ }
819
+ else if (dist ==2){
820
+ dir = -1;
821
+ }
822
+ else if (dist ==3){
823
+ dir = 0;//fusion to an abnormally angled bond
824
+ }
825
+ else if (dist ==4){
826
+ dir = 1;
827
+ }
828
+ else if (dist ==5){
829
+ dir = 2;//fusion to an abnormally angled bond
830
+ }
831
+ else if (dist ==6){
832
+ dir = 3;
833
+ }
834
+ else {
835
+ throw new RuntimeException("Impossible distance between bonds for a 7 membered ring");
836
+ }
837
+ break;
838
+ default:
839
+ throw new RuntimeException("OPSIN Bug: Unrecognised fusion ring shape for 7 membered ring");
840
+ }
841
+ }
842
+ else if (ringSize % 2 == 0) {//general case even number of atoms ring (a 6 membered ring or distortion of)
843
+ if (dist == 1) {
844
+ dir = -3;
845
+ }
846
+ else if (dist == ringSize-1) {
847
+ dir = 3;
848
+ }
849
+ else {
850
+ dir = dist - ringSize/2;
851
+ if (Math.abs(dir) > 2 && ringSize >= 8){// 8 and more neighbours
852
+ dir = -2 * Integer.signum(dir);
853
+ }
854
+ }
855
+ }
856
+ else {// general case odd number of atoms ring (distortion of an even numbered ring by insertion of one atom).
857
+ if (dist == 1) {
858
+ dir = -3;
859
+ }
860
+ else if (dist == ringSize/2 || dist == ringSize/2 + 1) {//0 in both cases as effectively we are using a different depiction of the ring system. See FR-5.1.1 (this is done to give the longest horizontal row)
861
+ dir = 0;
862
+ }
863
+ else if (dist == ringSize-1) {
864
+ dir = 3;
865
+ }
866
+ else if(dist < ringSize/2) {
867
+ dir = -2;
868
+ }
869
+ else if(dist > ringSize/2+1) {
870
+ dir = 2;
871
+ }
872
+ else{
873
+ throw new RuntimeException("OPSIN Bug: Unable to determine direction between odd number of atoms ring and next ring");
874
+ }
875
+ }
876
+ return dir;
877
+ }
878
+
879
+ private static void removeCTsWithDistortedRingShapes(List<RingConnectivityTable> cts) {
880
+ Map<RingConnectivityTable, List<Integer>> ctToDistortedRings = new HashMap<>();
881
+ for (RingConnectivityTable ct : cts) {
882
+ List<Integer> distortedRingSizes = new ArrayList<>();
883
+ ctToDistortedRings.put(ct, distortedRingSizes);
884
+ List<RingShape> ringShapes = ct.ringShapes;
885
+ for (int i = 0; i < ringShapes.size(); i++) {
886
+ Ring r1 = ringShapes.get(i).getRing();
887
+ Ring r2 = ct.neighbouringRings.get(i);
888
+ for (int j = i +1; j < ringShapes.size(); j++) {
889
+ if (ringShapes.get(j).getRing().equals(r2) && ct.neighbouringRings.get(j).equals(r1)){//look for the reverse entry in the ring connection table
890
+ int expectedDir = getOppositeDirection(ct.directionFromRingToNeighbouringRing.get(i));
891
+ if (expectedDir != ct.directionFromRingToNeighbouringRing.get(j)){
892
+ distortedRingSizes.add(r2.size());
893
+ }
894
+ }
895
+ }
896
+ }
897
+ }
898
+ int minDistortedRings = Integer.MAX_VALUE;//find the minimum number of distorted rings
899
+ for (List<Integer> distortedRingSizes : ctToDistortedRings.values()) {
900
+ if (distortedRingSizes.size() < minDistortedRings){
901
+ minDistortedRings = distortedRingSizes.size();
902
+ }
903
+ }
904
+ for (int i = cts.size()-1; i>=0; i--) {
905
+ if (ctToDistortedRings.get(cts.get(i)).size()>minDistortedRings){
906
+ cts.remove(i);
907
+ }
908
+ }
909
+ }
910
+
911
+ /**
912
+ * Given a list of cts find the longest chain of rings in a line. This can be used to find a possible horizontal row
913
+ * The output is a map between the connection tables and the directions which give the longest chains
914
+ * Some cts may have no directions that give a chain of rings of this length
915
+ *
916
+ * @param cts
917
+ * @return
918
+ */
919
+ private static Map<RingConnectivityTable, List<Integer>> findLongestChainDirections(List<RingConnectivityTable> cts){
920
+ Map<RingConnectivityTable, List<Integer>> horizonalRowDirections = new LinkedHashMap<>();
921
+ int maxChain = 0;
922
+ for (RingConnectivityTable ct : cts) {
923
+ if (ct.ringShapes.size() != ct.neighbouringRings.size() || ct.neighbouringRings.size() != ct.directionFromRingToNeighbouringRing.size()) {
924
+ throw new RuntimeException("OPSIN Bug: Sizes of arrays in fused ring numbering connection table are not equal");
925
+ }
926
+ int ctEntriesSize = ct.ringShapes.size();
927
+ List<Integer> directions = new ArrayList<>();
928
+ horizonalRowDirections.put(ct, directions);
929
+
930
+ for (int i = 0; i < ctEntriesSize; i++) {
931
+ Ring neighbour = ct.neighbouringRings.get(i);
932
+ int curChain = 1;
933
+ int curDir = ct.directionFromRingToNeighbouringRing.get(i);
934
+
935
+ nextRingInChainLoop: for (int k = 0; k <= ct.usedRings.size(); k++) {//<= rather than < so buggy behaviour can be caught
936
+ int indexOfNeighbour = indexOfCorrespondingRingshape(ct.ringShapes, neighbour);
937
+
938
+ if (indexOfNeighbour >= 0) {
939
+ for (int j = indexOfNeighbour; j < ctEntriesSize; j++) {
940
+ if (ct.ringShapes.get(j).getRing() == neighbour && ct.directionFromRingToNeighbouringRing.get(j) == curDir) {
941
+ curChain++;
942
+ neighbour = ct.neighbouringRings.get(j);
943
+ continue nextRingInChainLoop;
944
+ }
945
+ }
946
+ }
947
+ else{
948
+ throw new RuntimeException("OPSIN bug: fused ring numbering: Ring missing from connection table");
949
+ }
950
+ if (curChain >= maxChain ) {
951
+ int oDir = getOppositeDirection(curDir);
952
+ if(curChain > maxChain){//new longest chain found
953
+ for (List<Integer> previousDirections: horizonalRowDirections.values()) {
954
+ previousDirections.clear();
955
+ }
956
+ }
957
+ // if we has this direction before or its opposite, it is the same orientation
958
+ if(curChain > maxChain || (!directions.contains(curDir) && !directions.contains(oDir))) {
959
+ directions.add(curDir);
960
+ }
961
+ maxChain = curChain;
962
+ }
963
+ break;
964
+ }
965
+ if (maxChain > ct.usedRings.size()){
966
+ throw new RuntimeException("OPSIN bug: fused ring layout contained a loop: more rings in a chain than there were rings!");
967
+ }
968
+ }
969
+ }
970
+ return horizonalRowDirections;
971
+ }
972
+
973
+ /**
974
+ * Given a list of ringShapes finds the indice of the ringShape corresponding to the given ring
975
+ * returns -1 if this is not possible
976
+ * @param ringShapes
977
+ * @param ring
978
+ * @return
979
+ */
980
+ private static int indexOfCorrespondingRingshape(List<RingShape> ringShapes, Ring ring) {
981
+ for (int i = 0; i < ringShapes.size(); i++) {
982
+ if (ringShapes.get(i).getRing().equals(ring)){
983
+ return i;
984
+ }
985
+ }
986
+ return -1;
987
+ }
988
+
989
+
990
+ /**
991
+ * For each RingConnectivityTable and for each horizontal row direction creates a ringMap aligned along the given horizontal row direction
992
+ * @param horizonalRowDirectionsMap
993
+ * @return
994
+ * @throws StructureBuildingException
995
+ */
996
+ private static List<Ring[][]> createRingMapsAlignedAlongGivenhorizonalRowDirections(Map<RingConnectivityTable, List<Integer>> horizonalRowDirectionsMap) throws StructureBuildingException {
997
+ List<Ring[][]> ringMaps = new ArrayList<>();
998
+ for (Entry<RingConnectivityTable, List<Integer>> entry : horizonalRowDirectionsMap.entrySet()) {
999
+ RingConnectivityTable ct = entry.getKey();
1000
+ if ( ct.ringShapes.size() != ct.neighbouringRings.size() || ct.neighbouringRings.size() != ct.directionFromRingToNeighbouringRing.size() || ct.ringShapes.size() <= 0) {
1001
+ throw new RuntimeException("OPSIN Bug: Sizes of arrays in fused ring numbering connection table are not equal");
1002
+ }
1003
+ int ctEntriesSize = ct.ringShapes.size();
1004
+ for (Integer horizonalRowDirection : entry.getValue()) {
1005
+ int[] directionFromRingToNeighbouringRing = new int[ctEntriesSize];
1006
+ // turn the ring system such as to be aligned along the horizonalRowDirection
1007
+ for(int i=0; i<ctEntriesSize; i++){
1008
+ RingShape ringShape = ct.ringShapes.get(i);
1009
+ directionFromRingToNeighbouringRing[i] = determineAbsoluteDirectionUsingPreviousDirection(ringShape.getShape(), ringShape.getRing().size(), ct.directionFromRingToNeighbouringRing.get(i), -horizonalRowDirection);
1010
+ }
1011
+ Ring[][] ringMap = generateRingMap(ct, directionFromRingToNeighbouringRing);
1012
+ if (ringMap !=null){//null if overlapping bonds rings present
1013
+ ringMaps.add(ringMap);
1014
+ }
1015
+ }
1016
+ }
1017
+ if (ringMaps.size()==0){
1018
+ throw new StructureBuildingException("Fused ring systems with overlapping rings such as in helices cannot currently be numbered");
1019
+ }
1020
+ return ringMaps;
1021
+ }
1022
+
1023
+ /**
1024
+ * Applies FR5.2 B, C and D to determine the preferred orientation and returns lists of potential peripheral atom orderings
1025
+ * @param ringMaps
1026
+ * @param atomCountOfFusedRingSystem
1027
+ * @return
1028
+ */
1029
+ private static List<List<Atom>> findPossiblePaths(List<Ring[][]> ringMaps, int atomCountOfFusedRingSystem){
1030
+ List<Double[]> chainQs = new ArrayList<>();
1031
+ List<Ring[][]> correspondingRingMap = new ArrayList<>();
1032
+ for (Ring[][] ringMap : ringMaps) {
1033
+ List<Chain> chains = findChainsOfMaximumLengthInHorizontalDir(ringMap);
1034
+ // For each chain count the number of rings in each quadrant
1035
+ for (Chain chain : chains) {
1036
+ int midChainXcoord = chain.getLength() + chain.getStartingX() - 1;//Remember the X axis is measured in 1/2s so don't need to 1/2 length
1037
+
1038
+ Double[] qs = countQuadrants(ringMap, midChainXcoord, chain.getY());
1039
+ chainQs.add(qs);
1040
+ correspondingRingMap.add(ringMap);
1041
+ }
1042
+ }
1043
+
1044
+ /*
1045
+ * The quadrant numbers are as follows:
1046
+ *
1047
+ * 1 | 0
1048
+ * ----+----
1049
+ * 2 | 3
1050
+ *
1051
+ * But at this stage it is not known what the mapping between these numbers and the/a preferred orientation of the structure is
1052
+ */
1053
+ // order for each right corner candidates for each chain
1054
+ List<List<Integer>> allowedUpperRightQuadrantsForEachChain =rulesBCD(chainQs);
1055
+
1056
+ List<List<Atom>> paths = new ArrayList<> ();
1057
+ for (int c=0; c < chainQs.size(); c++) {
1058
+ Ring[][] ringMap = correspondingRingMap.get(c);
1059
+ List<Integer> allowedUpperRightQuadrants = allowedUpperRightQuadrantsForEachChain.get(c);
1060
+
1061
+ for (Integer upperRightQuadrant : allowedUpperRightQuadrants) {
1062
+ Ring[][] qRingMap = transformQuadrantToUpperRightOfRingMap(ringMap, upperRightQuadrant);
1063
+ if (LOG.isTraceEnabled()){
1064
+ debugRingMap(qRingMap);
1065
+ }
1066
+ boolean inverseAtoms = (upperRightQuadrant == 2 || upperRightQuadrant == 0);
1067
+ List<Atom> peripheralAtomPath = orderAtoms(qRingMap, inverseAtoms, atomCountOfFusedRingSystem);
1068
+ paths.add(peripheralAtomPath);
1069
+ }
1070
+ }
1071
+
1072
+ return paths;
1073
+ }
1074
+
1075
+ private static Ring[][] generateRingMap(RingConnectivityTable ct, int[] directionFromRingToNeighbouringRing) {
1076
+ int ctEntriesSize = ct.ringShapes.size();
1077
+ // Find max and min coordinates for ringMap
1078
+ // we put the first ring into takenRings to start with it in the connection table
1079
+ int nRings = ct.usedRings.size();
1080
+ int[][] coordinates = new int[nRings][]; // correspondent to usedRings
1081
+ Ring[] takenRings = new Ring[nRings];
1082
+ int takenRingsCnt = 0;
1083
+ int maxX = 0;
1084
+ int minX = 0;
1085
+ int maxY = 0;
1086
+ int minY = 0;
1087
+
1088
+ takenRings[takenRingsCnt++] = ct.ringShapes.get(0).getRing();
1089
+ coordinates[0] = new int[]{0,0};
1090
+
1091
+ // Go through the rings in a system
1092
+ // Find the rings connected to them and assign coordinates according to the direction
1093
+ // Each time we go to the ring, whose coordinates were already identified.
1094
+ for(int tr=0; tr<nRings-1; tr++) {
1095
+ Ring currentRing = takenRings[tr];
1096
+ if (currentRing == null){
1097
+ throw new RuntimeException("OPSIN bug: Unexpected null ring in fused ring numbering");
1098
+ }
1099
+
1100
+ int indexOfCurrentRing = indexOfCorrespondingRingshape(ct.ringShapes, currentRing);
1101
+
1102
+ int xy[] = coordinates[tr]; // find the correspondent coordinates for the ring
1103
+
1104
+ if (indexOfCurrentRing >= 0) {
1105
+ for (int j=indexOfCurrentRing; j< ctEntriesSize; j++) {
1106
+ if (ct.ringShapes.get(j).getRing() == currentRing) {
1107
+ Ring neighbour = ct.neighbouringRings.get(j);
1108
+ if (arrayContains(takenRings, neighbour)) {
1109
+ continue;
1110
+ }
1111
+
1112
+ int[] newXY = new int[2];
1113
+ newXY[0] = xy[0] + Math.round(2 * countDX(directionFromRingToNeighbouringRing[j]));
1114
+ newXY[1] = xy[1] + countDY(directionFromRingToNeighbouringRing[j]);
1115
+
1116
+ if(takenRingsCnt > takenRings.length) {
1117
+ throw new RuntimeException("OPSIN Bug: Fused ring numbering bug");
1118
+ }
1119
+ takenRings[takenRingsCnt] = neighbour;
1120
+ coordinates[takenRingsCnt] = newXY;
1121
+ takenRingsCnt++;
1122
+
1123
+ if (newXY[0] > maxX){
1124
+ maxX = newXY[0];
1125
+ }
1126
+ else if (newXY[0] < minX) {
1127
+ minX = newXY[0];
1128
+ }
1129
+
1130
+ if (newXY[1] > maxY){
1131
+ maxY = newXY[1];
1132
+ }
1133
+ else if (newXY[1] < minY) {
1134
+ minY = newXY[1];
1135
+ }
1136
+ }
1137
+ }
1138
+ }
1139
+ else{
1140
+ throw new RuntimeException("OPSIN bug: fused ring numbering: Ring missing from connection table");
1141
+ }
1142
+ }
1143
+ // the height and the width of the map
1144
+ int h = maxY - minY + 1;
1145
+ int w = maxX - minX + 1;
1146
+
1147
+ Ring[][] ringMap = new Ring[w][h];
1148
+
1149
+ // Map rings using coordinates calculated in the previous step, and transform them according to found minX and minY
1150
+
1151
+ int ix = -minX;
1152
+ int iy = -minY;
1153
+ if (ix >= w || iy >= h) {
1154
+ throw new RuntimeException("OPSIN Bug: Fused ring numbering bug, Coordinates have been calculated wrongly");
1155
+ }
1156
+
1157
+ int curX = 0;
1158
+ int curY = 0;
1159
+ for (int ti = 0; ti < takenRings.length; ti++){
1160
+ int[] xy = coordinates[ti];
1161
+ curX = xy[0] - minX;
1162
+ curY = xy[1] - minY;
1163
+ if(curX <0 || curX > w || curY < 0 || curY > h) {
1164
+ throw new RuntimeException("OPSIN Bug: Fused ring numbering bug, Coordinates have been calculated wrongly");
1165
+ }
1166
+ if (ringMap[curX][curY] != null){
1167
+ return null;
1168
+ }
1169
+ ringMap[curX][curY] = takenRings[ti];
1170
+ }
1171
+ return ringMap;
1172
+ }
1173
+
1174
+ /**
1175
+ * Finds all the chains of maximum length for the current direction
1176
+ * @param ringMap
1177
+ * @return
1178
+ */
1179
+ private static List<Chain> findChainsOfMaximumLengthInHorizontalDir(Ring[][] ringMap){
1180
+ int w = ringMap.length;
1181
+ int h = ringMap[0].length;
1182
+
1183
+ List<Chain> chains = new ArrayList<>();
1184
+
1185
+ int maxChain = 0;
1186
+ int chain = 0;
1187
+
1188
+ // Find the longest chain
1189
+ for (int j=0; j<h; j++) {
1190
+ for (int i=0; i<w; i++) {
1191
+ if(ringMap[i][j] != null) {
1192
+ chain = 1;
1193
+ while(i + 2*chain < w && ringMap[i + 2*chain][j] != null ) {
1194
+ chain++; // *2 because along the x axis the step is 2
1195
+ }
1196
+ if (chain > maxChain){
1197
+ chains.clear();
1198
+ maxChain = chain;
1199
+ }
1200
+ if(chain >= maxChain) {
1201
+ chains.add(new Chain(chain, i, j));
1202
+ }
1203
+ i += 2*chain;
1204
+ }
1205
+ }
1206
+ }
1207
+ return chains;
1208
+ }
1209
+
1210
+ /**
1211
+ * Counts number of rings in each quadrant
1212
+ * @param ringMap
1213
+ * @param midChainXcoord
1214
+ * @param yChain
1215
+ * @return
1216
+ */
1217
+ private static Double[] countQuadrants(Ring[][] ringMap, int midChainXcoord, int yChain){
1218
+ Double[] qs = new Double[4];
1219
+ qs[0] = 0d;
1220
+ qs[1] = 0d;
1221
+ qs[2] = 0d;
1222
+ qs[3] = 0d;
1223
+ int w = ringMap.length;
1224
+ int h = ringMap[0].length;
1225
+
1226
+ // Count rings in each quadrants
1227
+ for (int x=0; x<w; x++) {
1228
+ for (int y=0; y<h; y++) {
1229
+ if (ringMap[x][y] == null) {
1230
+ continue;
1231
+ }
1232
+
1233
+ if (x == midChainXcoord || y == yChain ) {// if the ring is on the axis
1234
+ if( x == midChainXcoord && y > yChain ) {
1235
+ qs[0]+=0.5;
1236
+ qs[1]+=0.5;
1237
+ }
1238
+ else if( x == midChainXcoord && y < yChain ) {
1239
+ qs[2]+=0.5;
1240
+ qs[3]+=0.5;
1241
+ }
1242
+ else if( x < midChainXcoord && y == yChain ) {
1243
+ qs[1]+=0.5;
1244
+ qs[2]+=0.5;
1245
+ }
1246
+ else if( x > midChainXcoord && y == yChain ) {
1247
+ qs[0]+=0.5;
1248
+ qs[3]+=0.5;
1249
+ }
1250
+ if (x==midChainXcoord && y==yChain ){
1251
+ qs[0]+=0.25;
1252
+ qs[1]+=0.25;
1253
+ qs[2]+=0.25;
1254
+ qs[3]+=0.25;
1255
+ }
1256
+ }
1257
+ else if(x > midChainXcoord && y > yChain) {
1258
+ qs[0]++;
1259
+ }
1260
+ else if(x < midChainXcoord && y > yChain) {
1261
+ qs[1]++;
1262
+ }
1263
+ else if(x < midChainXcoord && y < yChain) {
1264
+ qs[2]++;
1265
+ }
1266
+ else if(x > midChainXcoord && y < yChain) {
1267
+ qs[3]++;
1268
+ }
1269
+ }
1270
+ }
1271
+
1272
+ return qs;
1273
+ }
1274
+
1275
+ /**
1276
+ * Applying rules FR5.2 B, C and D to the ring system.
1277
+ * Return a list of possible upper right quadrants for each chain given. A chain may have multiple possible upper right quadrants (due to symmetry)
1278
+ * or none if other chains can be shown to be preferable by application of the rules
1279
+ * @param chainQs - array with number of ring in each quadrant for each chain.
1280
+ */
1281
+ private static List<List<Integer>> rulesBCD(List<Double[]> chainQs) {
1282
+ List<List<Integer>> possibleUpperRightQuadrantsForEachChain = new ArrayList<>();
1283
+ int nChains = chainQs.size();
1284
+ if (nChains==0){
1285
+ throw new RuntimeException("OPSIN Bug: Fused ring numbering, no chains found?");
1286
+ }
1287
+
1288
+ // Rule B: Maximum number of rings in upper right quadrant. Upper right corner candidates (it is not at this stage known which quadrant is the upper right one)
1289
+ double qmax = 0;
1290
+
1291
+ for (Double[] chainQ : chainQs) {
1292
+ for (int j = 0; j < 4; j++) {
1293
+ Double q = chainQ[j];
1294
+ if(q > qmax) {
1295
+ qmax = q;
1296
+ }
1297
+ }
1298
+ }
1299
+
1300
+ for (Double[] chainQ : chainQs) {
1301
+ List<Integer> allowedUpperRightQuadrants = new ArrayList<>();
1302
+ for (int j = 0; j < 4; j++){
1303
+ if (chainQ[j] == qmax) {
1304
+ allowedUpperRightQuadrants.add(j);
1305
+ }
1306
+ }
1307
+ possibleUpperRightQuadrantsForEachChain.add(allowedUpperRightQuadrants);
1308
+ }
1309
+
1310
+ // Rule C: Minimum number of rings in lower left quadrant
1311
+ double qmin = Double.MAX_VALUE;
1312
+
1313
+ for (int c = 0; c < nChains; c++) {
1314
+ List<Integer> possibleUpperRightQuadrant = possibleUpperRightQuadrantsForEachChain.get(c);
1315
+ for (Integer upperRightQuad : possibleUpperRightQuadrant) {
1316
+ int qdiagonal = (upperRightQuad + 2) % 4;
1317
+ if (chainQs.get(c)[qdiagonal] < qmin){
1318
+ qmin = chainQs.get(c)[qdiagonal];
1319
+ }
1320
+ }
1321
+ }
1322
+ for (int c = 0; c < nChains; c++) {
1323
+ List<Integer> possibleUpperRightQuadrant = possibleUpperRightQuadrantsForEachChain.get(c);
1324
+ List<Integer> allowedUpperRightQuadrants = new ArrayList<>();
1325
+ for (Integer upperRightQuad : possibleUpperRightQuadrant) {
1326
+ int qdiagonal = (upperRightQuad + 2) % 4;
1327
+ if (chainQs.get(c)[qdiagonal]==qmin) {
1328
+ allowedUpperRightQuadrants.add(upperRightQuad);
1329
+ }
1330
+ }
1331
+ possibleUpperRightQuadrantsForEachChain.set(c, allowedUpperRightQuadrants);
1332
+ }
1333
+
1334
+ // Rule D: Maximum number of rings above the horizontal row
1335
+ double rMax = 0;
1336
+ for (int c = 0; c < nChains; c++) {
1337
+ List<Integer> possibleUpperRightQuadrant = possibleUpperRightQuadrantsForEachChain.get(c);
1338
+ for (Integer upperRightQuad : possibleUpperRightQuadrant) {
1339
+ int upperLeftQuad;
1340
+ if (upperRightQuad % 2 == 0) {
1341
+ upperLeftQuad = upperRightQuad + 1;
1342
+ }
1343
+ else {
1344
+ upperLeftQuad = upperRightQuad - 1;
1345
+ }
1346
+
1347
+ if (chainQs.get(c)[upperLeftQuad] + chainQs.get(c)[upperRightQuad] > rMax) {
1348
+ rMax = chainQs.get(c)[upperLeftQuad] + chainQs.get(c)[upperRightQuad];
1349
+ }
1350
+ }
1351
+ }
1352
+ for (int c = 0; c < nChains; c++) {
1353
+ List<Integer> possibleUpperRightQuadrant = possibleUpperRightQuadrantsForEachChain.get(c);
1354
+ List<Integer> allowedUpperRightQuadrants = new ArrayList<>();
1355
+ for (Integer upperRightQuad : possibleUpperRightQuadrant) {
1356
+ int upperLeftQuad;
1357
+ if (upperRightQuad % 2 == 0) {
1358
+ upperLeftQuad = upperRightQuad + 1;
1359
+ }
1360
+ else {
1361
+ upperLeftQuad = upperRightQuad - 1;
1362
+ }
1363
+
1364
+ if (chainQs.get(c)[upperLeftQuad] + chainQs.get(c)[upperRightQuad] == rMax) {
1365
+ allowedUpperRightQuadrants.add(upperRightQuad);
1366
+ }
1367
+ }
1368
+ possibleUpperRightQuadrantsForEachChain.set(c, allowedUpperRightQuadrants);
1369
+ }
1370
+ return possibleUpperRightQuadrantsForEachChain;
1371
+ }
1372
+
1373
+ /**
1374
+ * Enumerates the peripheral atoms in a system in accordance with FR-5.3:
1375
+ * First finds the uppermost right ring, takes the next neighbour in the clockwise direction, and so on until the starting atom is reached
1376
+ * @param ringMap
1377
+ * @param inverseAtoms The direction in which the periphery atoms should be enumerated. Anticlockwise by default
1378
+ * @param atomCountOfFusedRingSystem
1379
+ * @return
1380
+ */
1381
+ private static List<Atom> orderAtoms(Ring[][] ringMap, boolean inverseAtoms, int atomCountOfFusedRingSystem){
1382
+ int w = ringMap.length;
1383
+ int h = ringMap[0].length;
1384
+
1385
+ // find upper right ring
1386
+ Ring upperRightRing = null;
1387
+ for (int i=w-1; i>=0; i--) {
1388
+ if (ringMap[i][h-1] != null) {
1389
+ upperRightRing = ringMap[i][h-1];
1390
+ break;
1391
+ }
1392
+ }
1393
+ if (upperRightRing == null) {
1394
+ throw new RuntimeException("OPSIN Bug: Upper right ring not found when performing fused ring numbering");
1395
+ }
1396
+ List<Ring> visitedRings = new ArrayList<>();
1397
+ visitedRings.add(upperRightRing);
1398
+ while (isEntirelyFusionAtoms(upperRightRing)){//c.f cyclopropa[de]anthracene
1399
+ upperRightRing = findClockwiseRingFromUpperRightRing(ringMap, upperRightRing, visitedRings);
1400
+ if (upperRightRing==null){
1401
+ throw new RuntimeException("OPSIN Bug: Unabled to find clockwise ring without fusion atoms");
1402
+ }
1403
+ visitedRings.add(upperRightRing);
1404
+ }
1405
+
1406
+ Ring prevRing = findUpperLeftNeighbourOfUpperRightRing(ringMap, upperRightRing);
1407
+ Bond prevBond = findFusionBond(upperRightRing, prevRing);
1408
+ Bond nextBond = null;
1409
+
1410
+ Ring currentRing = upperRightRing;
1411
+ Ring nextRing = null;
1412
+ List<Atom> atomPath = new ArrayList<>();
1413
+ int count = 0;
1414
+ mainLoop: for (; count <= atomCountOfFusedRingSystem; count++) {
1415
+ int ringSize = currentRing.size();
1416
+
1417
+ int startingBondIndex = currentRing.getBondIndex(prevBond) ;
1418
+
1419
+ List<Bond> cyclicBonds = currentRing.getCyclicBondList();
1420
+ List<Bond> fusedBonds = currentRing.getFusedBonds();
1421
+ if (!inverseAtoms) {
1422
+ for(int bondIndex = 0; bondIndex < ringSize; bondIndex++) {
1423
+ int i = (startingBondIndex + bondIndex + 1) % ringSize; // +1 because we start from the bond next to stBond and end with it
1424
+ // if this bond is fused then it indicates the next ring to move to
1425
+ Bond bond = cyclicBonds.get(i);
1426
+ if(fusedBonds.contains(bond)) {
1427
+ nextBond = bond;
1428
+ break;
1429
+ }
1430
+ }
1431
+ }
1432
+ else {
1433
+ for(int bondIndex = 0; bondIndex < ringSize; bondIndex++) {
1434
+ int i = (startingBondIndex - bondIndex -1 + ringSize) % ringSize; // -1 because we start from the bond next to stBond and end with it
1435
+ // if this bond is fused then it indicates the next ring to move to
1436
+ Bond bond = cyclicBonds.get(i);
1437
+ if(fusedBonds.contains(bond)) {
1438
+ nextBond = bond;
1439
+ break;
1440
+ }
1441
+ }
1442
+ }
1443
+ if (nextBond == null) {
1444
+ throw new RuntimeException("OPSIN Bug: None of the bonds from this ring were fused, but this is not possible ");
1445
+ }
1446
+
1447
+ // next ring
1448
+ nextRing = currentRing.getNeighbourOfFusedBond(nextBond);
1449
+
1450
+ int endNumber = currentRing.getBondIndex(nextBond) ;
1451
+
1452
+ // Add atoms in order, considering inverse or not inverse
1453
+ if (!inverseAtoms) {
1454
+ // if distance between prev bond and cur bond = 1 (it means that fused bonds are next to each other) i.e. come under interior atom numbering
1455
+ // we don't add that atom, cause it was added already
1456
+ if ( (endNumber - startingBondIndex + ringSize) % ringSize != 1) {
1457
+ startingBondIndex = (startingBondIndex + 1) % ringSize;
1458
+ endNumber = (endNumber - 1 + ringSize ) % ringSize;
1459
+ if (startingBondIndex > endNumber) {
1460
+ endNumber += ringSize;
1461
+ }
1462
+
1463
+ // start from the atom next to fusion
1464
+ for (int j = startingBondIndex; j <= endNumber; j++) {
1465
+ Atom atom = currentRing.getCyclicAtomList().get(j % ringSize);
1466
+ if (atomPath.contains(atom)) {
1467
+ break mainLoop;
1468
+ }
1469
+ atomPath.add(atom);
1470
+ }
1471
+ }
1472
+ }
1473
+ else {
1474
+ if ( ( startingBondIndex - endNumber + ringSize) % ringSize != 1) {
1475
+ startingBondIndex = (startingBondIndex - 2 + ringSize ) % ringSize;
1476
+ endNumber = endNumber % ringSize;
1477
+ if (startingBondIndex < endNumber) {
1478
+ startingBondIndex += ringSize;
1479
+ }
1480
+
1481
+ for (int j = startingBondIndex; j >= endNumber; j-- ) {
1482
+ Atom atom = currentRing.getCyclicAtomList().get(j % ringSize);
1483
+ if (atomPath.contains(atom)) {
1484
+ break mainLoop;
1485
+ }
1486
+ atomPath.add(atom);
1487
+ }
1488
+ }
1489
+ }
1490
+ prevBond = nextBond;
1491
+ prevRing = currentRing;
1492
+ currentRing = nextRing;
1493
+ }
1494
+ if (count ==atomCountOfFusedRingSystem){
1495
+ throw new RuntimeException("OPSIN Bug: Fused ring numbering may have been stuck in an infinite loop while enumerating peripheral numbering");
1496
+ }
1497
+ return atomPath;
1498
+ }
1499
+
1500
+ private static boolean isEntirelyFusionAtoms(Ring upperRightRing) {
1501
+ List<Atom> atomList = upperRightRing.getAtomList();
1502
+ for (Atom atom : atomList) {
1503
+ if (atom.getBondCount() < 3){
1504
+ return false;
1505
+ }
1506
+ }
1507
+ return true;
1508
+ }
1509
+
1510
+ /**
1511
+ * Finds the neighbour ring, which is the clockwise of the given ring.
1512
+ * @param ringMap
1513
+ * @param upperRightRing
1514
+ * @param visitedRings
1515
+ * @return
1516
+ */
1517
+ private static Ring findClockwiseRingFromUpperRightRing (Ring[][] ringMap, Ring upperRightRing, List<Ring> visitedRings){
1518
+ Ring clockwiseRing = null;
1519
+ int maxX = 0;
1520
+ int maxY = 0;
1521
+
1522
+ for (Ring ring : upperRightRing.getNeighbours()) {
1523
+ if (visitedRings.contains(ring)){
1524
+ continue;
1525
+ }
1526
+ int xy[] = findRingPosition(ringMap, ring);
1527
+ if (xy==null) {
1528
+ throw new RuntimeException("OPSIN Bug: Ring not found in ringMap when performing fused ring numbering");
1529
+ }
1530
+
1531
+ if (xy[0] > maxX || xy[0] == maxX && xy[1] > maxY ) {
1532
+ maxX = xy[0];
1533
+ maxY = xy[1];
1534
+ clockwiseRing = ring;
1535
+ }
1536
+ }
1537
+ return clockwiseRing;
1538
+ }
1539
+
1540
+ /**
1541
+ * Finds the neighbour ring, which is the uppermost and on the left side from the given ring. Used to find previous bond for the uppermost right ring, from which we start to enumerate
1542
+ * @param ringMap
1543
+ * @param upperRightRing
1544
+ * @return
1545
+ */
1546
+ private static Ring findUpperLeftNeighbourOfUpperRightRing (Ring[][] ringMap, Ring upperRightRing){
1547
+ Ring nRing = null;
1548
+ int minX = Integer.MAX_VALUE;
1549
+ int maxY = 0;
1550
+
1551
+ for (Ring ring : upperRightRing.getNeighbours()) {
1552
+ // upper left would be previous ring
1553
+ int xy[] = findRingPosition(ringMap, ring);
1554
+ if (xy==null) {
1555
+ throw new RuntimeException("OPSIN Bug: Ring not found in ringMap when performing fused ring numbering");
1556
+ }
1557
+
1558
+ if (xy[1] > maxY || xy[1] == maxY && xy[0] < minX ) {
1559
+ minX = xy[0];
1560
+ maxY = xy[1];
1561
+ nRing = ring;
1562
+ }
1563
+ }
1564
+ return nRing;
1565
+ }
1566
+
1567
+ /**
1568
+ * Finds the position(i,j) of the ring in the map
1569
+ * @param ringMap
1570
+ * @param ring
1571
+ * @return
1572
+ */
1573
+ private static int[] findRingPosition(Ring[][] ringMap, Ring ring) {
1574
+ int w = ringMap.length;
1575
+ int h = ringMap[0].length;
1576
+
1577
+ for(int i=0; i<w; i++) {
1578
+ for(int j=0; j<h; j++) {
1579
+ if (ringMap[i][j] == ring) {
1580
+ return new int[]{i,j};
1581
+ }
1582
+ }
1583
+ }
1584
+ return null;
1585
+ }
1586
+
1587
+ /**
1588
+ * Transform the map such that the candidate upper right quadrant actually is in the upper right corner
1589
+ * @param ringMap
1590
+ * @param upperRightQuadrant
1591
+ * @return
1592
+ */
1593
+ private static Ring[][] transformQuadrantToUpperRightOfRingMap(Ring[][] ringMap, int upperRightQuadrant){
1594
+ int w = ringMap.length;
1595
+ int h = ringMap[0].length;
1596
+
1597
+ Ring[][] rearrangedMap = new Ring[w][h];
1598
+ for (int i=0; i < w; i++) {
1599
+ for (int j=0; j < h; j++) {
1600
+ if (upperRightQuadrant == 0) {//already is in the upper right
1601
+ rearrangedMap[i][j] = ringMap[i][j];
1602
+ }
1603
+ if(upperRightQuadrant == 1) {//flip in y axis
1604
+ rearrangedMap[w-i-1][j] = ringMap[i][j];
1605
+ }
1606
+ else if(upperRightQuadrant == 2) {//flip in x and y axes
1607
+ rearrangedMap[w-i-1][h-j-1] = ringMap[i][j];
1608
+ }
1609
+ else if(upperRightQuadrant == 3) {//flip in x axis
1610
+ rearrangedMap[i][h-j-1] = ringMap[i][j];
1611
+ }
1612
+ }
1613
+ }
1614
+
1615
+ return rearrangedMap;
1616
+ }
1617
+
1618
+ /**
1619
+ * Checks if array contains an object
1620
+ * @param array
1621
+ * @param obj
1622
+ * @return
1623
+ */
1624
+ private static boolean arrayContains(Object[] array, Object obj) {
1625
+ for (Object arrObj : array) {
1626
+ if (arrObj == obj) {
1627
+ return true;
1628
+ }
1629
+ }
1630
+ return false;
1631
+ }
1632
+
1633
+ /**
1634
+ * Returns a bond which is not a bond that is in two rings
1635
+ * Preference is given to a bond that is at least a bond away from a fused bond to avoid problems with 5 member rings starting in bad orientations
1636
+ * @param tRing
1637
+ * @return
1638
+ */
1639
+ private static Bond getStartingNonFusedBond(Ring tRing){
1640
+ List<Bond> allBonds = new ArrayList<>(tRing.getBondList());
1641
+ for (Bond fusedBond : tRing.getFusedBonds()) {
1642
+ List<Bond> neighbouringBonds = fusedBond.getFromAtom().getBonds();
1643
+ for (Bond bond : neighbouringBonds) {
1644
+ allBonds.remove(bond);
1645
+ }
1646
+ neighbouringBonds = fusedBond.getToAtom().getBonds();
1647
+ for (Bond bond : neighbouringBonds) {
1648
+ allBonds.remove(bond);
1649
+ }
1650
+ }
1651
+ if (allBonds.size() > 0){
1652
+ return allBonds.get(0);
1653
+ }
1654
+ for (Bond bond : tRing.getBondList()) {
1655
+ if(tRing.getNeighbourOfFusedBond(bond) == null){
1656
+ // return a non-fused bond
1657
+ return bond;
1658
+ }
1659
+ }
1660
+ return null;
1661
+ }
1662
+
1663
+ /**
1664
+ * Given the direction of the bond from ring1 to ring2, returns the opposite direction: from ring2 to ring1
1665
+ * @param prevDir
1666
+ * @return
1667
+ */
1668
+ static int getOppositeDirection(int prevDir) {
1669
+ int dir;
1670
+ if (prevDir == 0) {
1671
+ dir = 4;
1672
+ }
1673
+ else if (Math.abs(prevDir) == 4){
1674
+ dir =0;
1675
+ }
1676
+ else if (Math.abs(prevDir) == 2){
1677
+ dir = 2 * -1 * Integer.signum(prevDir);
1678
+ }
1679
+ else if (Math.abs(prevDir) == 1){
1680
+ dir = 3 * -1 * Integer.signum(prevDir);
1681
+ }
1682
+ else {//prevDir will be +-3
1683
+ dir = 1 * -1 * Integer.signum(prevDir);
1684
+ }
1685
+ return dir;
1686
+ }
1687
+
1688
+ /**
1689
+ * Finds the atom connected to the bond, takes into account the order of the bonds and atoms in the ring
1690
+ * @param ring
1691
+ * @param curBond
1692
+ * @return
1693
+ */
1694
+ private static Atom getAtomFromBond(Ring ring, Bond curBond) {
1695
+ if (ring.getCyclicBondList() == null) {
1696
+ throw new RuntimeException("The cyclic bond list should already have been generated");
1697
+ }
1698
+ int bondIndice= ring.getCyclicBondList().indexOf(curBond);
1699
+ int atomIndice = ( bondIndice - 1 + ring.size() ) % ring.size();
1700
+ return ring.getCyclicAtomList().get(atomIndice);
1701
+ }
1702
+
1703
+ /**
1704
+ * Finds the fusion bond between 2 rings
1705
+ * @param r1
1706
+ * @param r2
1707
+ * @return
1708
+ */
1709
+ private static Bond findFusionBond (Ring r1, Ring r2) {
1710
+ List<Bond> b2 = r2.getBondList();
1711
+ for(Bond bond : r1.getBondList()){
1712
+ if (b2.contains(bond)) {
1713
+ return bond;
1714
+ }
1715
+ }
1716
+ return null;
1717
+ }
1718
+
1719
+ /**
1720
+ * Counts delta x distance between previous and next rings
1721
+ * @param val
1722
+ * @return
1723
+ */
1724
+ private static float countDX (int val) {
1725
+ float dX = 0;
1726
+ if (Math.abs(val) == 1) {
1727
+ dX += 0.5f;
1728
+ }
1729
+ else if (Math.abs(val) == 3) {
1730
+ dX -= 0.5f;
1731
+ }
1732
+ else if (Math.abs(val) == 0) {
1733
+ dX += 1f;
1734
+ }
1735
+ else if (Math.abs(val) == 4) {
1736
+ dX -= 1f;
1737
+ }
1738
+ return dX;
1739
+ }
1740
+
1741
+ /**
1742
+ * Counts delta y distance (height) between previous and next rings
1743
+ * @param val
1744
+ * @return
1745
+ */
1746
+
1747
+ private static int countDY (int val) {
1748
+ int dY = 0;
1749
+ if (Math.abs(val) != 4) {
1750
+ if (val > 0) {
1751
+ dY = 1;
1752
+ }
1753
+ if (val < 0) {
1754
+ dY = -1;
1755
+ }
1756
+ }
1757
+ return dY;
1758
+ }
1759
+
1760
+ /**
1761
+ * Take into account the previous direction to convert the given relative direction into a direction that is absolute for the fused ring system
1762
+ * @param fusionRingShape
1763
+ * @param ringSize
1764
+ * @param relativeDirection
1765
+ * @param previousDir
1766
+ * @return
1767
+ */
1768
+ static int determineAbsoluteDirectionUsingPreviousDirection(FusionRingShape fusionRingShape, int ringSize, int relativeDirection, int previousDir){
1769
+ int interimDirection;
1770
+ if (Math.abs(previousDir) == 4) {
1771
+ if (relativeDirection == 0) {
1772
+ interimDirection = 4;
1773
+ }
1774
+ else {
1775
+ interimDirection = relativeDirection + 4 * -1 * Integer.signum(relativeDirection); // if dir<0 we add 4, if dir>0 we add -4
1776
+ }
1777
+ }
1778
+ else {
1779
+ interimDirection = relativeDirection + previousDir;
1780
+ }
1781
+ if (Math.abs(interimDirection) > 4) {// Added
1782
+ interimDirection = (8 - Math.abs(interimDirection)) * Integer.signum(interimDirection) * -1;
1783
+ }
1784
+ //TODO investigate this function and unit test
1785
+ /* Even numbered rings when angled do not have direction 2.
1786
+ * Almost true for 5 member except for corner case where fusion to elongated bond occurs
1787
+ */
1788
+ if (Math.abs(interimDirection) == 2 && ((ringSize % 2 ==0) || ringSize==5 || ringSize==7)) {
1789
+ // if (one of them equal to 1 and another is equal to 3, we decrease absolute value and conserve the sign)
1790
+ if (Math.abs(relativeDirection)==1 && Math.abs(previousDir)==3 || Math.abs(relativeDirection)==3 && Math.abs(previousDir)==1) {
1791
+ interimDirection = 1 * Integer.signum(interimDirection);
1792
+ }
1793
+ // if both are equal to 1
1794
+ else if(Math.abs(relativeDirection)==1 && Math.abs(previousDir)==1 ) {
1795
+ interimDirection = 3 * Integer.signum(interimDirection);
1796
+ }
1797
+ // if both are equal to 3
1798
+ else if(Math.abs(relativeDirection)==3 && Math.abs(previousDir)==3 ) {
1799
+ interimDirection = 3 * Integer.signum(interimDirection);
1800
+ }
1801
+ // else it is correctly 2
1802
+ }
1803
+
1804
+ if (interimDirection == -4) {
1805
+ interimDirection = 4;
1806
+ }
1807
+
1808
+ return interimDirection;
1809
+ }
1810
+
1811
+ private static void debugRingMap(Ring[][] ringMap) {
1812
+ Ring[][] yxOrdered = new Ring[ringMap[0].length][ringMap.length];
1813
+ for (int x = 0; x < ringMap.length; x++) {
1814
+ Ring[] yRings = ringMap[x];
1815
+ for (int y = 0; y < yRings.length; y++) {
1816
+ yxOrdered[y][x] =yRings[y];
1817
+ }
1818
+ }
1819
+ for (int y = yxOrdered.length-1; y >=0 ; y--) {
1820
+ Ring[] xRings = yxOrdered[y];
1821
+ StringBuilder sb = new StringBuilder();
1822
+ for (Ring ring : xRings) {
1823
+ if (ring!=null){
1824
+ int size = ring.size();
1825
+ if (size>9){
1826
+ if (size==10){
1827
+ sb.append("0");
1828
+ }
1829
+ else if (size % 2 ==0){
1830
+ sb.append("2");
1831
+ }
1832
+ else{
1833
+ sb.append("1");
1834
+ }
1835
+ }
1836
+ else{
1837
+ sb.append(size);
1838
+ }
1839
+ }
1840
+ else{
1841
+ sb.append(" ");
1842
+ }
1843
+ }
1844
+ LOG.trace(sb.toString());
1845
+ }
1846
+ LOG.trace("#########");
1847
+
1848
+ }
1849
+ }
TransAntivirus/download_pubchem/opsin-master/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/GroupingEl.java ADDED
@@ -0,0 +1,121 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ package uk.ac.cam.ch.wwmm.opsin;
2
+
3
+ import java.util.ArrayList;
4
+ import java.util.List;
5
+
6
+ class GroupingEl extends Element{
7
+
8
+ private final List<Element> children = new ArrayList<>();
9
+
10
+ GroupingEl(String name) {
11
+ super(name);
12
+ }
13
+
14
+ @Override
15
+ void addChild(Element child) {
16
+ child.setParent(this);
17
+ children.add(child);
18
+ }
19
+
20
+ @Override
21
+ Element copy() {
22
+ GroupingEl copy = new GroupingEl(this.name);
23
+ for (Element childEl : this.children) {
24
+ Element newChild = childEl.copy();
25
+ newChild.setParent(copy);
26
+ copy.addChild(newChild);
27
+ }
28
+ for (int i = 0, len = this.attributes.size(); i < len; i++) {
29
+ Attribute atr = this.attributes.get(i);
30
+ copy.addAttribute(new Attribute(atr));
31
+ }
32
+ return copy;
33
+ }
34
+
35
+ @Override
36
+ Element getChild(int index) {
37
+ return children.get(index);
38
+ }
39
+
40
+ @Override
41
+ int getChildCount() {
42
+ return children.size();
43
+ }
44
+
45
+ @Override
46
+ List<Element> getChildElements() {
47
+ return new ArrayList<>(children);
48
+ }
49
+
50
+ @Override
51
+ List<Element> getChildElements(String name) {
52
+ List<Element> elements = new ArrayList<>(1);
53
+ for (Element element : children) {
54
+ if (element.name.equals(name)) {
55
+ elements.add(element);
56
+ }
57
+ }
58
+ return elements;
59
+ }
60
+
61
+ @Override
62
+ Element getFirstChildElement(String name) {
63
+ for (Element child : children) {
64
+ if (child.getName().equals(name)) {
65
+ return child;
66
+ }
67
+ }
68
+ return null;
69
+ }
70
+
71
+ String getValue() {
72
+ int childCount = getChildCount();
73
+ if (childCount == 0) {
74
+ return "";
75
+ }
76
+ StringBuilder result = new StringBuilder();
77
+ for (int i = 0; i < childCount; i++) {
78
+ result.append(children.get(i).getValue());
79
+ }
80
+ return result.toString();
81
+ }
82
+
83
+ @Override
84
+ int indexOf(Element child) {
85
+ return children.indexOf(child);
86
+ }
87
+
88
+ @Override
89
+ void insertChild(Element child, int index) {
90
+ child.setParent(this);
91
+ children.add(index, child);
92
+ }
93
+
94
+ @Override
95
+ boolean removeChild(Element child) {
96
+ child.setParent(null);
97
+ return children.remove(child);
98
+ }
99
+
100
+ @Override
101
+ Element removeChild(int index) {
102
+ Element removed = children.remove(index);
103
+ removed.setParent(null);
104
+ return removed;
105
+ }
106
+
107
+ @Override
108
+ void replaceChild(Element oldChild, Element newChild) {
109
+ int index = indexOf(oldChild);
110
+ if (index == -1) {
111
+ throw new RuntimeException("oldChild is not a child of this element.");
112
+ }
113
+ removeChild(index);
114
+ insertChild(newChild, index);
115
+ }
116
+
117
+ void setValue(String text) {
118
+ throw new UnsupportedOperationException("Token groups do not have a value");
119
+ }
120
+
121
+ }
TransAntivirus/download_pubchem/opsin-master/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/IDManager.java ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ package uk.ac.cam.ch.wwmm.opsin;
2
+
3
+ /**A source of unique integers. Starts at 1 by default.
4
+ *
5
+ * @author ptc24
6
+ *
7
+ */
8
+ class IDManager {
9
+ /**the last integer generated, or 0 at first*/
10
+ private int currentID;
11
+
12
+ int getCurrentID() {
13
+ return currentID;
14
+ }
15
+
16
+ /**Initialises currentID at zero - will give 1 when first called */
17
+ IDManager() {
18
+ currentID = 0;
19
+ }
20
+
21
+ /**Generates a new, unique integer. This is one
22
+ * higher than the previous integer, or 1 if previously uncalled.
23
+ * @return The generated integer.
24
+ */
25
+ int getNextID() {
26
+ currentID += 1;
27
+ return currentID;
28
+ }
29
+
30
+ }
TransAntivirus/download_pubchem/opsin-master/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/IndentingXMLStreamWriter.java ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ package uk.ac.cam.ch.wwmm.opsin;
2
+
3
+ import javax.xml.stream.XMLStreamException;
4
+ import javax.xml.stream.XMLStreamWriter;
5
+
6
+ import org.codehaus.stax2.util.StreamWriterDelegate;
7
+
8
+ /**
9
+ * This only overrides the commands actually used by the CmlWriter i.e. it isn't general
10
+ */
11
+ class IndentingXMLStreamWriter extends StreamWriterDelegate {
12
+
13
+ private final int indentSize;
14
+ private int depth = 0;
15
+ private boolean atStartOfNewline = false;
16
+
17
+ IndentingXMLStreamWriter(XMLStreamWriter writer, int indentSize) {
18
+ super(writer);
19
+ this.indentSize = indentSize;
20
+ }
21
+
22
+ @Override
23
+ public void writeStartElement(String arg0) throws XMLStreamException {
24
+ if (!atStartOfNewline){
25
+ super.writeCharacters(OpsinTools.NEWLINE);
26
+ }
27
+ super.writeCharacters(StringTools.multiplyString(" ", depth * indentSize));
28
+ super.writeStartElement(arg0);
29
+ atStartOfNewline = false;
30
+ depth++;
31
+ }
32
+
33
+ @Override
34
+ public void writeEndElement() throws XMLStreamException {
35
+ depth--;
36
+ if (atStartOfNewline) {
37
+ super.writeCharacters(StringTools.multiplyString(" ", depth * indentSize));
38
+ }
39
+ super.writeEndElement();
40
+ super.writeCharacters(OpsinTools.NEWLINE);
41
+ atStartOfNewline = true;
42
+ }
43
+
44
+ @Override
45
+ public void writeCharacters(String arg0) throws XMLStreamException {
46
+ super.writeCharacters(arg0);
47
+ atStartOfNewline = false;
48
+ }
49
+
50
+ }