maojiashun
commited on
Commit
•
f25ddd1
1
Parent(s):
529df0a
Upload 350 files
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +3 -0
- TransAntivirus/best_in_dataset.py +130 -0
- TransAntivirus/data_utils.py +206 -0
- TransAntivirus/download_pubchem/SARS0729_canon_desc.csv +185 -0
- TransAntivirus/download_pubchem/download.sh +34 -0
- TransAntivirus/download_pubchem/extract_info.py +82 -0
- TransAntivirus/download_pubchem/finetunev1_new.csv +71 -0
- TransAntivirus/download_pubchem/opsin-master.zip +3 -0
- TransAntivirus/download_pubchem/opsin-master/.github/workflows/maven.yml +29 -0
- TransAntivirus/download_pubchem/opsin-master/.gitignore +5 -0
- TransAntivirus/download_pubchem/opsin-master/LICENSE.txt +7 -0
- TransAntivirus/download_pubchem/opsin-master/README.md +186 -0
- TransAntivirus/download_pubchem/opsin-master/ReleaseNotes.txt +332 -0
- TransAntivirus/download_pubchem/opsin-master/fullAssembly.xml +49 -0
- TransAntivirus/download_pubchem/opsin-master/opsin-cli/pom.xml +58 -0
- TransAntivirus/download_pubchem/opsin-master/opsin-cli/src/main/java/uk/ac/cam/ch/wwmm/opsin/Cli.java +268 -0
- TransAntivirus/download_pubchem/opsin-master/opsin-cli/src/main/resources/log4j2.xml +13 -0
- TransAntivirus/download_pubchem/opsin-master/opsin-core/pom.xml +67 -0
- TransAntivirus/download_pubchem/opsin-master/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/AmbiguityChecker.java +214 -0
- TransAntivirus/download_pubchem/opsin-master/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/AnnotatorState.java +72 -0
- TransAntivirus/download_pubchem/opsin-master/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/Atom.java +647 -0
- TransAntivirus/download_pubchem/opsin-master/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/AtomParity.java +64 -0
- TransAntivirus/download_pubchem/opsin-master/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/AtomProperties.java +160 -0
- TransAntivirus/download_pubchem/opsin-master/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/Attribute.java +74 -0
- TransAntivirus/download_pubchem/opsin-master/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/AutomatonInitialiser.java +105 -0
- TransAntivirus/download_pubchem/opsin-master/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/Bond.java +184 -0
- TransAntivirus/download_pubchem/opsin-master/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/BondStereo.java +58 -0
- TransAntivirus/download_pubchem/opsin-master/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/BuildResults.java +150 -0
- TransAntivirus/download_pubchem/opsin-master/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/BuildState.java +46 -0
- TransAntivirus/download_pubchem/opsin-master/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/CASTools.java +248 -0
- TransAntivirus/download_pubchem/opsin-master/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/CMLWriter.java +217 -0
- TransAntivirus/download_pubchem/opsin-master/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/ChemEl.java +138 -0
- TransAntivirus/download_pubchem/opsin-master/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/CipOrderingException.java +29 -0
- TransAntivirus/download_pubchem/opsin-master/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/CipSequenceRules.java +470 -0
- TransAntivirus/download_pubchem/opsin-master/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/ComponentGenerationException.java +28 -0
- TransAntivirus/download_pubchem/opsin-master/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/ComponentGenerator.java +0 -0
- TransAntivirus/download_pubchem/opsin-master/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/ComponentProcessor.java +0 -0
- TransAntivirus/download_pubchem/opsin-master/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/CycleDetector.java +128 -0
- TransAntivirus/download_pubchem/opsin-master/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/CyclicAtomList.java +140 -0
- TransAntivirus/download_pubchem/opsin-master/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/Element.java +229 -0
- TransAntivirus/download_pubchem/opsin-master/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/Fragment.java +633 -0
- TransAntivirus/download_pubchem/opsin-master/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/FragmentManager.java +767 -0
- TransAntivirus/download_pubchem/opsin-master/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/FragmentTools.java +1242 -0
- TransAntivirus/download_pubchem/opsin-master/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/FunctionalAtom.java +20 -0
- TransAntivirus/download_pubchem/opsin-master/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/FunctionalReplacement.java +1176 -0
- TransAntivirus/download_pubchem/opsin-master/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/FusedRingBuilder.java +1030 -0
- TransAntivirus/download_pubchem/opsin-master/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/FusedRingNumberer.java +1849 -0
- TransAntivirus/download_pubchem/opsin-master/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/GroupingEl.java +121 -0
- TransAntivirus/download_pubchem/opsin-master/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/IDManager.java +30 -0
- TransAntivirus/download_pubchem/opsin-master/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/IndentingXMLStreamWriter.java +50 -0
.gitattributes
CHANGED
@@ -33,3 +33,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
TransAntivirus/download_pubchem/opsin-master/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/chemical_reversed_SerialisedAutomaton.aut filter=lfs diff=lfs merge=lfs -text
|
37 |
+
TransAntivirus/download_pubchem/opsin-master/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/chemicalSerialisedAutomaton.aut filter=lfs diff=lfs merge=lfs -text
|
38 |
+
TransAntivirus/download_pubchem/pubchem_30m_new.csv filter=lfs diff=lfs merge=lfs -text
|
TransAntivirus/best_in_dataset.py
ADDED
@@ -0,0 +1,130 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from transformers import HfArgumentParser
|
2 |
+
|
3 |
+
import torch
|
4 |
+
import torch.multiprocessing
|
5 |
+
torch.multiprocessing.set_sharing_strategy("file_system")
|
6 |
+
from torch.utils.data import DataLoader, ConcatDataset
|
7 |
+
|
8 |
+
from t5 import T5IUPACTokenizer, T5Collator
|
9 |
+
from iupac_dataset import IUPACDataset
|
10 |
+
from physprop_exp import levenshtein_distance
|
11 |
+
|
12 |
+
from dataclasses import dataclass, field
|
13 |
+
from typing import Dict, Optional
|
14 |
+
|
15 |
+
import sys
|
16 |
+
import os
|
17 |
+
import itertools
|
18 |
+
from itertools import dropwhile
|
19 |
+
from multiprocessing import Pool
|
20 |
+
|
21 |
+
import numpy as np
|
22 |
+
from scipy import ndimage
|
23 |
+
|
24 |
+
@dataclass
|
25 |
+
class IUPACArguments:
|
26 |
+
dataset_dir: str = field(
|
27 |
+
metadata={"help": "Directory where dataset is locaed"}
|
28 |
+
)
|
29 |
+
vocab_fn: str = field(
|
30 |
+
metadata={"help": "File containing sentencepiece model"}
|
31 |
+
)
|
32 |
+
dataset_filename: str = field(
|
33 |
+
default="iupacs_logp.txt",
|
34 |
+
metadata={"help": "Filename within dataset_dir containing the data"}
|
35 |
+
)
|
36 |
+
name_col: Optional[str] = field(
|
37 |
+
default="Preferred", # for logp
|
38 |
+
metadata={"help": "Name of column with IUPAC names"}
|
39 |
+
)
|
40 |
+
|
41 |
+
|
42 |
+
def main():
|
43 |
+
parser = HfArgumentParser(IUPACArguments)
|
44 |
+
iupac_args, = parser.parse_args_into_dataclasses()
|
45 |
+
|
46 |
+
global tokenizer
|
47 |
+
tokenizer = T5IUPACTokenizer(vocab_file=iupac_args.vocab_fn)
|
48 |
+
|
49 |
+
pad = tokenizer._convert_token_to_id("<pad>")
|
50 |
+
unk = tokenizer._convert_token_to_id("<unk>")
|
51 |
+
|
52 |
+
dataset_kwargs = {
|
53 |
+
"dataset_dir": iupac_args.dataset_dir,
|
54 |
+
"tokenizer": tokenizer,
|
55 |
+
"max_length": 128,
|
56 |
+
"prepend_target": False,
|
57 |
+
"mean_span_length": 3,
|
58 |
+
"mask_probability": 0,
|
59 |
+
#"dataset_size": 200000,
|
60 |
+
}
|
61 |
+
|
62 |
+
pubchem_train = IUPACDataset(train=True, **dataset_kwargs)
|
63 |
+
pubchem_val = IUPACDataset(train=False, **dataset_kwargs)
|
64 |
+
pubchem = ConcatDataset([pubchem_train, pubchem_val])
|
65 |
+
|
66 |
+
batch_size = 2048
|
67 |
+
|
68 |
+
collator = T5Collator(tokenizer.pad_token_id)
|
69 |
+
def collate(batch):
|
70 |
+
# [:-1] to remove </s>
|
71 |
+
input_ids = [d["input_ids"][:-1] for d in batch]
|
72 |
+
lengths = torch.tensor([d.numel() for d in input_ids])
|
73 |
+
return torch.hstack([torch.tensor([len(batch)]), lengths] + input_ids)
|
74 |
+
loader = DataLoader(pubchem,
|
75 |
+
batch_size=batch_size,
|
76 |
+
num_workers=72,
|
77 |
+
collate_fn=collate)
|
78 |
+
|
79 |
+
# we'll find clusters for each input molecule
|
80 |
+
input_iupacs = [n.strip() for n in sys.stdin.readlines()]
|
81 |
+
# [:-1] to get rid of </s>
|
82 |
+
base_tokenizeds = [tokenizer(b)["input_ids"][:-1] for b in input_iupacs]
|
83 |
+
base_tokenizeds = [torch.tensor(t)
|
84 |
+
for t in base_tokenizeds if len(t) >= 10 and unk not in t]
|
85 |
+
|
86 |
+
potentially_reachables = []
|
87 |
+
for batch_idx, batch in enumerate(loader):
|
88 |
+
#num_processed = batch_idx * batch_size
|
89 |
+
#if batch_idx % 200 == 0:
|
90 |
+
# print("completed {}/{} ({:>5.3f}%)...".format(num_processed, len(pubchem), num_processed / len(pubchem) * 100))
|
91 |
+
|
92 |
+
bs = batch[0]
|
93 |
+
lengths = batch[1:bs + 1]
|
94 |
+
tokenizeds = torch.split(batch[bs + 1:], lengths.tolist())
|
95 |
+
potentially_reachables += tokenizeds
|
96 |
+
|
97 |
+
|
98 |
+
pairs = list(itertools.product(potentially_reachables, base_tokenizeds))
|
99 |
+
pool = Pool(144)
|
100 |
+
is_reachable = pool.starmap(check_if_reachable, pairs)
|
101 |
+
pool.close()
|
102 |
+
pool.join()
|
103 |
+
|
104 |
+
def check_if_reachable(tokenized, base_tokenized):
|
105 |
+
global tokenizer
|
106 |
+
|
107 |
+
tokenized_bag = set(tokenized.tolist())
|
108 |
+
base_bag = set(base_tokenized.tolist())
|
109 |
+
|
110 |
+
if len(tokenized_bag ^ base_bag) >= 15:
|
111 |
+
return False
|
112 |
+
|
113 |
+
if abs(len(tokenized) - len(base_tokenized)) > 15:
|
114 |
+
return False
|
115 |
+
|
116 |
+
dist, src_mask, _ = levenshtein_distance(base_tokenized, tokenized)
|
117 |
+
src_dilated = ndimage.binary_fill_holes(src_mask).astype(int)
|
118 |
+
|
119 |
+
# we used span lengths 1-5 in gen_t5.py
|
120 |
+
if 1 <= src_dilated.sum() <= 5:
|
121 |
+
# this is a match
|
122 |
+
base_iupac = tokenizer.decode(base_tokenized)
|
123 |
+
decoded = tokenizer.decode(tokenized)
|
124 |
+
print('"{}","{}"'.format(base_iupac, decoded))
|
125 |
+
return True
|
126 |
+
|
127 |
+
return False
|
128 |
+
|
129 |
+
if __name__ == "__main__":
|
130 |
+
main()
|
TransAntivirus/data_utils.py
ADDED
@@ -0,0 +1,206 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
|
3 |
+
def mask_spans(tokenizer, input_ids, mask_probability, mean_span_length):
|
4 |
+
DEBUG = False
|
5 |
+
length = input_ids.numel()
|
6 |
+
if length < 2:
|
7 |
+
return input_ids, tokenizer.sentinels(torch.tensor([0]))
|
8 |
+
|
9 |
+
num_noise_tokens = round(length * mask_probability)
|
10 |
+
num_noise_tokens = min(max(num_noise_tokens, 0), length - 1)
|
11 |
+
if num_noise_tokens == 0:
|
12 |
+
return input_ids, tokenizer.sentinels(torch.tensor([0]))
|
13 |
+
DEBUG and print("num_noise_tokens", num_noise_tokens)
|
14 |
+
num_nonnoise_tokens = length - num_noise_tokens
|
15 |
+
DEBUG and print("num_nonnoise_tokens", num_nonnoise_tokens)
|
16 |
+
|
17 |
+
num_noise_spans = round(num_noise_tokens / mean_span_length)
|
18 |
+
num_noise_spans = max(num_noise_spans, 1)
|
19 |
+
DEBUG and print("num_noise_spans", num_noise_spans)
|
20 |
+
|
21 |
+
# probability of the last token being noise should be
|
22 |
+
# mask_probability, but right now it's 100%
|
23 |
+
if torch.rand(1).item() < mask_probability:
|
24 |
+
num_nonnoise_spans = num_noise_spans
|
25 |
+
else:
|
26 |
+
num_nonnoise_spans = num_noise_spans + 1
|
27 |
+
|
28 |
+
def _random_segmentation(num_items, num_segments):
|
29 |
+
ones = (torch.arange(num_items - 1) < num_segments - 1).int()
|
30 |
+
first_in_segment = torch.cat([torch.tensor([0]).int(),
|
31 |
+
ones[torch.randperm(num_items-1)]])
|
32 |
+
segment_id = torch.cumsum(first_in_segment, dim=0)
|
33 |
+
_, lengths = segment_id.unique_consecutive(return_counts=True)
|
34 |
+
return lengths
|
35 |
+
noise_span_lengths = _random_segmentation(num_noise_tokens,
|
36 |
+
num_noise_spans)
|
37 |
+
DEBUG and print("noise_span_lengths", noise_span_lengths)
|
38 |
+
nonnoise_span_lengths = _random_segmentation(num_nonnoise_tokens,
|
39 |
+
num_nonnoise_spans)
|
40 |
+
DEBUG and print("nonnoise_span_lengths", nonnoise_span_lengths)
|
41 |
+
#print(noise_span_lengths.float().mean().item(), noise_span_lengths)
|
42 |
+
#print(nonnoise_span_lengths)
|
43 |
+
if num_nonnoise_spans > num_noise_spans:
|
44 |
+
noise_span_lengths = torch.cat([noise_span_lengths,
|
45 |
+
torch.tensor([0])])
|
46 |
+
interleaved_span_lengths = torch.stack([
|
47 |
+
nonnoise_span_lengths, noise_span_lengths
|
48 |
+
], dim=1).view(-1)
|
49 |
+
if num_nonnoise_spans > num_noise_spans:
|
50 |
+
interleaved_span_lengths = interleaved_span_lengths[:-1]
|
51 |
+
|
52 |
+
DEBUG and print('interleaved', interleaved_span_lengths)
|
53 |
+
span_starts = torch.cumsum(interleaved_span_lengths, dim=0)[:-1]
|
54 |
+
DEBUG and print("span_starts", span_starts)
|
55 |
+
span_start_indicator = torch.zeros(length).bool()
|
56 |
+
span_start_indicator[span_starts] = 1
|
57 |
+
DEBUG and print("span_start_indicator", span_start_indicator)
|
58 |
+
span_num = torch.cumsum(span_start_indicator, dim=0)
|
59 |
+
DEBUG and print("span_num", span_num)
|
60 |
+
is_noise = span_num % 2 == 1
|
61 |
+
DEBUG and print("is_noise", is_noise)
|
62 |
+
|
63 |
+
def sentinelify(tokens, noise_mask):
|
64 |
+
prev_token_is_noise = torch.cat([torch.tensor([0]).bool(),
|
65 |
+
noise_mask[:-1]])
|
66 |
+
first_noise_tokens = noise_mask & ~prev_token_is_noise
|
67 |
+
subsequent_noise_tokens = noise_mask & prev_token_is_noise
|
68 |
+
sentinels = tokenizer.sentinels(
|
69 |
+
torch.cumsum(first_noise_tokens, dim=0) - 1
|
70 |
+
)
|
71 |
+
tokens = torch.where(first_noise_tokens, sentinels, tokens)
|
72 |
+
return tokens[~subsequent_noise_tokens]
|
73 |
+
|
74 |
+
masked_input = sentinelify(input_ids, is_noise)
|
75 |
+
DEBUG and print("masked_input", masked_input)
|
76 |
+
target_ids = sentinelify(input_ids, ~is_noise)
|
77 |
+
DEBUG and print("target_ids", target_ids)
|
78 |
+
|
79 |
+
return masked_input, target_ids
|
80 |
+
|
81 |
+
|
82 |
+
def collapse_sentinels(tokenizer, input_ids, target_ids):
|
83 |
+
def remove_extraneous(ids):
|
84 |
+
# delete everything after </s>
|
85 |
+
eos = tokenizer.eos_token_id
|
86 |
+
pad_mask = (ids == eos).cumsum(dim=0).clamp(0, 1).bool()
|
87 |
+
ids = ids[:ids.numel() - pad_mask.sum()]
|
88 |
+
return ids
|
89 |
+
|
90 |
+
input_ids = remove_extraneous(input_ids)
|
91 |
+
target_ids = remove_extraneous(target_ids)
|
92 |
+
|
93 |
+
num_sentinels = tokenizer._extra_ids
|
94 |
+
all_sentinel_ids = tokenizer.sentinels(
|
95 |
+
torch.arange(num_sentinels).to(input_ids.device)
|
96 |
+
)
|
97 |
+
min_sentinel_id = all_sentinel_ids.min()
|
98 |
+
max_sentinel_id = all_sentinel_ids.max()
|
99 |
+
|
100 |
+
def validate(ids, name="ids"):
|
101 |
+
#mask = (min_sentinel_id <= ids) & (ids <= max_sentinel_id)
|
102 |
+
mask = tokenizer.sentinel_mask(ids)
|
103 |
+
sentinels = ids[mask]
|
104 |
+
msg = "sentinels in {} are in the wrong order"
|
105 |
+
if not torch.all(sentinels==all_sentinel_ids[:sentinels.numel()]):
|
106 |
+
raise ValueError(msg.format(name))
|
107 |
+
return mask
|
108 |
+
|
109 |
+
input_sentinel_mask = validate(input_ids, "input_ids")
|
110 |
+
target_sentinel_mask = validate(target_ids, "target_ids")
|
111 |
+
|
112 |
+
input_span_types, input_span_lengths = \
|
113 |
+
input_sentinel_mask.unique_consecutive(return_counts=True)
|
114 |
+
target_span_types, target_span_lengths = \
|
115 |
+
target_sentinel_mask.unique_consecutive(return_counts=True)
|
116 |
+
|
117 |
+
input_sentinel_span_lengths = input_span_lengths[input_span_types]
|
118 |
+
target_sentinel_span_lengths = target_span_lengths[target_span_types]
|
119 |
+
if input_sentinel_span_lengths.sum() != input_span_types.sum():
|
120 |
+
raise ValueError("consecutive sentinel tokens in input_ids")
|
121 |
+
if target_sentinel_span_lengths.sum() != target_span_types.sum():
|
122 |
+
raise ValueError("consecutive sentinel tokens in target_ids")
|
123 |
+
|
124 |
+
msg = "invalid interleaving of sentinels between inputs and target"
|
125 |
+
if input_span_types.numel() != target_span_types.numel():
|
126 |
+
raise ValueError(msg)
|
127 |
+
xor = torch.logical_xor(input_span_types, target_span_types)
|
128 |
+
if xor.sum() != input_span_types.numel():
|
129 |
+
raise ValueError(msg)
|
130 |
+
|
131 |
+
input_repeat = input_sentinel_mask.long()
|
132 |
+
input_repeat[input_sentinel_mask] = target_span_lengths[~target_span_types]
|
133 |
+
input_repeat[input_repeat == 0] = 1
|
134 |
+
|
135 |
+
target_repeat = target_sentinel_mask.long()
|
136 |
+
target_repeat[target_sentinel_mask] = input_span_lengths[~input_span_types]
|
137 |
+
target_repeat[target_repeat == 0] = 1
|
138 |
+
|
139 |
+
input_repeated = input_ids.repeat_interleave(input_repeat)
|
140 |
+
target_repeated = target_ids.repeat_interleave(target_repeat)
|
141 |
+
|
142 |
+
#use_target = (min_sentinel_id <= input_repeated) & (input_repeated <= max_sentinel_id)
|
143 |
+
use_target = tokenizer.sentinel_mask(input_repeated)
|
144 |
+
collapsed = torch.where(use_target, target_repeated, input_repeated)
|
145 |
+
|
146 |
+
return collapsed
|
147 |
+
|
148 |
+
|
149 |
+
|
150 |
+
def recoverd(x,y):
|
151 |
+
x = x.numpy()
|
152 |
+
y = y.numpy()
|
153 |
+
|
154 |
+
x_drop_index = np.where(x==1)[0][0]
|
155 |
+
x =x[:x_drop_index+1]
|
156 |
+
|
157 |
+
y_drop_index = np.where(y==1)[0][0]
|
158 |
+
y =y[:y_drop_index+1]
|
159 |
+
|
160 |
+
z = []
|
161 |
+
for i in y:
|
162 |
+
if i >1400:
|
163 |
+
z.append(i)
|
164 |
+
|
165 |
+
z = sorted(set(z),reverse=True)
|
166 |
+
|
167 |
+
final_pos = min(z)
|
168 |
+
|
169 |
+
if final_pos not in x:
|
170 |
+
y = y[:-2]
|
171 |
+
z = z[:-1]
|
172 |
+
else:
|
173 |
+
y = y[:-1]
|
174 |
+
|
175 |
+
final_list = []
|
176 |
+
|
177 |
+
index_in_x_last = 0
|
178 |
+
index_in_x_current = 0
|
179 |
+
|
180 |
+
next_y_index = 0
|
181 |
+
index_in_y_current = 0
|
182 |
+
|
183 |
+
if len(z)==1:
|
184 |
+
index_in_x_current = np.where(x==z[0])[0][0]
|
185 |
+
final_list = list(x[index_in_x_last:index_in_x_current].flatten() )+ list(y[1:].flatten())+ list(x[index_in_x_current+1:].flatten())
|
186 |
+
|
187 |
+
#print(x,y,index_in_x_current,final_list)
|
188 |
+
else:
|
189 |
+
for i in range(len(z)):
|
190 |
+
index_in_x_current = np.where(x==z[i])[0][0]
|
191 |
+
index_in_y_current = np.where(y==z[i])[0][0]+1
|
192 |
+
|
193 |
+
#print(index_in_x_current,index_in_y_current,z)
|
194 |
+
|
195 |
+
if i==len(z)-1:
|
196 |
+
next_y_index = len(y)
|
197 |
+
else:
|
198 |
+
next_y_index = np.where(y==z[i+1])[0][0]
|
199 |
+
|
200 |
+
final_list = final_list + list(x[index_in_x_last:index_in_x_current].flatten()) + list(y[index_in_y_current:next_y_index].flatten())
|
201 |
+
index_in_x_last = index_in_x_current +1
|
202 |
+
|
203 |
+
final_list = final_list +list(x[index_in_x_last:].flatten())
|
204 |
+
final_list = np.array(final_list)
|
205 |
+
c = torch.from_numpy(final_list)
|
206 |
+
return c
|
TransAntivirus/download_pubchem/SARS0729_canon_desc.csv
ADDED
@@ -0,0 +1,185 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
PubChem CID|name |Preferred|Canonical<|NumAtoms|MolWt|Log P|TPSA|HBA|HBD|Fsp3|ROTB|NumRings|AROM|NumHeavyAtom|NumStereo|QED|QEPPI|SAscore|NPscore|Lipinski|Ro4|PAINS
|
2 |
+
290486.0|Triciribine|2-(5-amino-7-methyl-2,6,7,9,11-pentazatricyclo[6.3.1.04,12]dodeca-1(12),3,5,8,10-pentaen-2-yl)-5-(hydroxymethyl)oxolane-3,4-diol|CN1N=C(N)c2cn(C3OC(CO)C(O)C3O)c3ncnc1c23|23|320.3090000000001|-1.8870999999999987|142.25|10|4|0.46153846153846156|2|4|2|23|8|0.5030837169270522|0.2230377496655855|4.206336221285245|0.5141541547619305|1|0|False
|
3 |
+
400633.0|Capecitabine|pentyl N-[1-(3,4-dihydroxy-5-methyloxolan-2-yl)-5-fluoro-2-oxopyrimidin-4-yl]carbamate|CCCCCOC(=O)Nc1nc(=O)n(C2OC(C)C(O)C2O)cc1F|25|359.35400000000004|0.7601999999999995|122.91000000000001|8|3|0.6666666666666666|6|2|1|25|8|0.640839151584694|0.3477485501574143|3.70159793901294|0.379648786773868|1|0|False
|
4 |
+
1546.0|Cladribine|5-(6-amino-2-chloropurin-9-yl)-2-(hydroxymethyl)oxolan-3-ol |Nc1nc(Cl)nc2c1ncn2C1CC(O)C(CO)O1|19|285.69100000000003|-0.29740000000000044|119.31000000000002|8|3|0.5|2|3|2|19|6|0.6481668237451135|0.3813779140915048|3.622779613665122|0.7589084087510317|1|0|False
|
5 |
+
16886.0|Decitabine|4-amino-1-[4-hydroxy-5-(hydroxymethyl)oxolan-2-yl]-1,3,5-triazin-2-one|Nc1ncn(C2CC(O)C(CO)O2)c(=O)n1|16|228.20799999999997|-2.1388|123.49|8|3|0.625|2|2|1|16|6|0.5273624425093522|0.2441325945298081|3.643221050753482|1.1104565214827065|1|0|False
|
6 |
+
3385.0|Fluorouracil (5-Fluoracil, 5-FU)|5-fluoro-1H-pyrimidine-2,4-dione|O=c1[nH]cc(F)c(=O)[nH]1|9|130.078|-0.7977000000000001|65.72|2|2|0.0|0|1|1|9|0|0.482564502908463|0.22429291396082054|2.6590876226101727|-0.7992390609873111|1|0|False
|
7 |
+
248862.0|Nelarabine|2-(2-amino-6-methoxypurin-9-yl)-5-(hydroxymethyl)oxolane-3,4-diol|COc1nc(N)nc2c1ncn2C1OC(CO)C(O)C1O|21|297.271|-1.9713999999999994|148.76999999999998|10|4|0.5454545454545454|3|3|2|21|8|0.5056130599451503|0.23058502859583221|3.669441345268779|0.8720015392381619|1|0|False
|
8 |
+
354624.0|Clofarabine|5-(6-amino-2-chloropurin-9-yl)-4-fluoro-2-(hydroxymethyl)oxolan-3-ol|Nc1nc(Cl)nc2c1ncn2C1OC(CO)C(O)C1F|20|303.681|-0.34940000000000027|119.31000000000002|8|3|0.5|2|3|2|20|8|0.648649936215886|0.3883700063918832|3.8712345426538963|0.6251566198496301|1|0|False
|
9 |
+
3368.0|Fludarabine Phosphate|[5-(6-amino-2-fluoropurin-9-yl)-3,4-dihydroxyoxolan-2-yl]methyl dihydrogen phosphate|Nc1nc(F)nc2c1ncn2C1OC(COP(=O)(O)O)C(O)C1O|24|365.21400000000006|-1.7239000000000007|186.07|10|5|0.5|4|3|2|24|8|0.30617189529250766|0.17310948790732877|3.9840913898753945|1.0864995352244873|0|0|False
|
10 |
+
135605572.0|Entecavir Hydrate|2-amino-9-[4-hydroxy-3-(hydroxymethyl)-2-methylidenecyclopentyl]-1H-purin-6-one|C=C1C(CO)C(O)CC1n1cnc2c(=O)nc(N)[nH]c21|20|277.28399999999993|-0.8278000000000001|130.04999999999998|7|4|0.4166666666666667|2|3|2|20|6|0.5301148369563783|0.3229952345490085|4.052892286880503|0.9583882509483649|1|0|False
|
11 |
+
2754.0|Cilostazol|6-[4-(1-cyclohexyltetrazol-5-yl)butoxy]-3,4-dihydro-1H-quinolin-2-one|O=C1CCc2cc(OCCCCc3nnnn3C3CCCCC3)ccc2N1|27|369.4690000000002|3.4647000000000014|81.93|6|1|0.6|7|4|2|27|0|0.755913406639335|0.7771342880231443|2.508630577222638|-1.5209469226199739|1|0|False
|
12 |
+
3363.0|Floxuridine|5-fluoro-1-[4-hydroxy-5-(hydroxymethyl)oxolan-2-yl]pyrimidine-2,4-dione|O=c1[nH]c(=O)n(C2CC(O)C(CO)O2)cc1F|17|246.194|-1.6836|104.55000000000001|6|3|0.5555555555555556|2|2|1|17|6|0.5776171321259124|0.3356934148814366|3.473377071505359|0.6963745697774709|1|0|False
|
13 |
+
5386.0|Tegafur (FT-207, NSC 148958)|5-fluoro-1-(oxolan-2-yl)pyrimidine-2,4-dione|O=c1[nH]c(=O)n(C2CCCO2)cc1F|14|200.16899999999998|-0.015300000000000313|64.09|4|1|0.5|1|2|1|14|2|0.6927125296086032|0.3535172217146212|2.9469611714043964|-0.4032937319470785|1|0|False
|
14 |
+
5155.0|Stavudine (d4T)|1-[5-(hydroxymethyl)-2,5-dihydrofuran-2-yl]-5-methylpyrimidine-2,4-dione|Cc1cn(C2C=CC(CO)O2)c(=O)[nH]c1=O|16|224.21599999999998|-0.7090799999999999|84.32|5|2|0.4|2|2|1|16|4|0.6498890462135971|0.406330949617662|3.582011471393738|0.952976324214325|1|0|False
|
15 |
+
3367.0|Fludarabine|2-(6-amino-2-fluoropurin-9-yl)-5-(hydroxymethyl)oxolane-3,4-diol|Nc1nc(F)nc2c1ncn2C1OC(CO)C(O)C1O|20|285.235|-1.8409000000000002|139.54000000000002|9|4|0.5|2|3|2|20|8|0.47183364718578485|0.24118104715477695|3.742329367828722|1.06082713921308|1|0|False
|
16 |
+
5281078.0|Mycophenolate Mofetil|2-morpholin-4-ylethyl (E)-6-(4-hydroxy-6-methoxy-7-methyl-3-oxo-1H-2-benzofuran-5-yl)-4-methylhex-4-enoate|COc1c(C)c2c(c(O)c1CC=C(C)CCC(=O)OCCN1CCOCC1)C(=O)OC2|31|433.5010000000003|2.52402|94.53000000000002|8|1|0.5652173913043478|9|3|1|31|0|0.46901748000616883|0.37568520022091145|2.909809273894176|0.7259426339747483|1|0|False
|
17 |
+
191.0|Adenosine|2-(6-aminopurin-9-yl)-5-(hydroxymethyl)oxolane-3,4-diol|Nc1ncnc2c1ncn2C1OC(CO)C(O)C1O|19|267.245|-1.9800000000000006|139.54000000000002|9|4|0.5|2|3|2|19|8|0.49051986767922323|0.23617270345702296|3.5313095881349117|1.3138998457117157|1|0|False
|
18 |
+
1134.0|Telbivudine|1-[4-hydroxy-5-(hydroxymethyl)oxolan-2-yl]-5-methylpyrimidine-2,4-dione|Cc1cn(C2CC(O)C(CO)O2)c(=O)[nH]c1=O|17|242.23099999999997|-1.5142799999999992|104.54999999999998|6|3|0.6|2|2|1|17|6|0.5842742137821719|0.33499679157314843|3.3763667036123843|1.0380300294610116|1|0|False
|
19 |
+
135422442.0|Didanosine|9-[5-(hydroxymethyl)oxolan-2-yl]-1H-purin-6-one|O=c1[nH]cnc2c1ncn2C1CCC(CO)O1|17|236.23099999999997|-0.21050000000000008|93.03|6|2|0.5|2|3|2|17|4|0.7521450742487874|0.5432250422691189|3.611021286177671|0.6773893007177353|1|0|False
|
20 |
+
9837769.0|Emtricitabine|4-amino-5-fluoro-1-[2-(hydroxymethyl)-1,3-oxathiolan-5-yl]pyrimidin-2-one|Nc1nc(=O)n(C2CSC(CO)O2)cc1F|16|247.25099999999998|-0.4550000000000003|90.37|7|2|0.5|2|2|1|16|4|0.7349748054075844|0.3453875663184652|3.8873632196454855|0.6995396761378563|1|0|False
|
21 |
+
3387.0|Lamivudine|4-amino-1-[2-(hydroxymethyl)-1,3-oxathiolan-5-yl]pyrimidin-2-one|Nc1ccn(C2CSC(CO)O2)c(=O)n1|15|229.261|-0.5941000000000001|90.36999999999999|7|2|0.5|2|2|1|15|4|0.7038615838122525|0.3378624805129178|3.8064405703937787|1.4615989713898194|1|0|False
|
22 |
+
3461.0|Gemcitabine|4-amino-1-[3,3-difluoro-4-hydroxy-5-(hydroxymethyl)oxolan-2-yl]pyrimidin-2-one|Nc1ccn(C2OC(CO)C(O)C2(F)F)c(=O)n1|18|263.2|-1.2886|110.60000000000001|7|3|0.5555555555555556|2|2|1|18|6|0.6120873727300361|0.30156266317008307|3.8632725293690937|1.2986545260508495|1|0|False
|
23 |
+
5718.0|Zalcitabine|4-amino-1-[5-(hydroxymethyl)oxolan-2-yl]pyrimidin-2-one|Nc1ccn(C2CCC(CO)O2)c(=O)n1|15|211.22099999999998|-0.5046000000000002|90.36999999999999|6|2|0.5555555555555556|2|2|1|15|4|0.6875458801290977|0.3738843645665796|3.417990020943229|1.2753722241588197|1|0|False
|
24 |
+
4463.0|Nevirapine|2-cyclopropyl-7-methyl-2,4,9,15-tetrazatricyclo[9.4.0.03,8]pentadeca-1(11),3,5,7,12,14-hexaen-10-one|Cc1ccnc2c1NC(=O)c1cccnc1N2C1CC1|20|266.30400000000003|2.6512200000000004|58.120000000000005|4|1|0.26666666666666666|1|4|2|20|0|0.861716125086419|0.5536172692074276|2.5944323945817214|-0.24728332568993|1|0|False
|
25 |
+
253083.0|Trifluridine|1-[4-hydroxy-5-(hydroxymethyl)oxolan-2-yl]-5-(trifluoromethyl)pyrimidine-2,4-dione|O=c1[nH]c(=O)n(C2CC(O)C(CO)O2)cc1C(F)(F)F|20|296.20099999999996|-0.8039000000000001|104.55000000000001|6|3|0.6|2|2|1|20|6|0.6622919739385443|0.35932251164721696|3.6118403697410404|0.4994653045681|1|0|False
|
26 |
+
1805.0|Azacitidine|4-amino-1-[3,4-dihydroxy-5-(hydroxymethyl)oxolan-2-yl]-1,3,5-triazin-2-one|Nc1ncn(C2OC(CO)C(O)C2O)c(=O)n1|17|244.207|-3.1679999999999997|143.72000000000003|9|4|0.625|2|2|1|17|8|0.4262474653048269|0.16004433948003915|3.687407323630228|1.3188208399256056|1|0|False
|
27 |
+
191.0|Vidarabine|2-(6-aminopurin-9-yl)-5-(hydroxymethyl)oxolane-3,4-diol|Nc1ncnc2c1ncn2C1OC(CO)C(O)C1O|19|267.245|-1.9800000000000006|139.54000000000002|9|4|0.5|2|3|2|19|8|0.49051986767922323|0.23617270345702296|3.5313095881349117|1.3138998457117157|1|0|False
|
28 |
+
266934.0|AICAR (Acadesine)|5-amino-1-[3,4-dihydroxy-5-(hydroxymethyl)oxolan-2-yl]imidazole-4-carboxamide|NC(=O)c1ncn(C2OC(CO)C(O)C2O)c1N|18|258.23400000000004|-2.8242999999999996|156.85000000000002|8|5|0.5555555555555556|3|2|1|18|8|0.3908498923708794|0.14438999553237833|3.7095421627648637|0.8286879931867945|0|0|False
|
29 |
+
135398513.0|Aciclovir|2-amino-9-(2-hydroxyethoxymethyl)-1H-purin-6-one|Nc1nc2c(ncn2COCCO)c(=O)[nH]1|16|225.20799999999997|-1.3318000000000003|119.04999999999998|7|3|0.375|4|2|2|16|0|0.554385971786765|0.45613595294095116|2.7707884753483327|-0.10317179830629376|1|0|False
|
30 |
+
135398740.0|Ganciclovir|2-amino-9-(1,3-dihydroxypropan-2-yloxymethyl)-1H-purin-6-one|Nc1nc2c(ncn2COC(CO)CO)c(=O)[nH]1|18|255.23399999999998|-1.970899999999999|139.28|8|4|0.4444444444444444|5|2|2|18|0|0.4854930576221492|0.30914081370444985|2.9975492721457613|0.3788503786253501|1|0|False
|
31 |
+
3687.0|Idoxuridine|1-[4-hydroxy-5-(hydroxymethyl)oxolan-2-yl]-5-iodopyrimidine-2,4-dione|O=c1[nH]c(=O)n(C2CC(O)C(CO)O2)cc1I|17|354.1|-1.2181000000000002|104.55000000000001|6|3|0.5555555555555556|2|2|1|17|6|0.578029884636738|0.3781134158688138|3.6278549979267645|0.910464343233765|1|0|False
|
32 |
+
3657.0|Hydroxyurea|hydroxyurea|NC(=O)NO|5|76.05499999999999|-0.9561000000000002|75.35000000000001|2|3|0.0|0|0|0|5|0|0.25664360627911675|0.1190984427030672|2.5247164941764755|-0.31540413797906003|1|0|False
|
33 |
+
279063.0|Cyclocytidine HCl|4-(hydroxymethyl)-10-imino-3,7-dioxa-1,9-diazatricyclo[6.4.0.02,6]dodeca-8,11-dien-5-ol|N=c1ccn2c(n1)OC1C(O)C(CO)OC12|16|225.204|-1.62583|100.59000000000002|7|3|0.5555555555555556|1|3|1|16|8|0.5326902884382276|0.27411721766208946|4.446202327505897|1.497338959013144|1|0|False
|
34 |
+
1177.0|Uridine|1-[3,4-dihydroxy-5-(hydroxymethyl)oxolan-2-yl]pyrimidine-2,4-dione|O=c1ccn(C2OC(CO)C(O)C2O)c(=O)[nH]1|17|244.20300000000003|-2.8519|124.78|7|4|0.5555555555555556|2|2|1|17|8|0.4435105912731592|0.21867146597756254|3.4839049824931045|1.4898002285532645|1|0|False
|
35 |
+
3159.0|Doxifluridine|1-(3,4-dihydroxy-5-methyloxolan-2-yl)-5-fluoropyrimidine-2,4-dione|CC1OC(n2cc(F)c(=O)[nH]c2=O)C(O)C1O|17|246.19400000000002|-1.6851999999999996|104.55000000000001|6|3|0.5555555555555556|1|2|1|17|8|0.5484019207554993|0.3126320658718193|3.610332741690428|0.7868512061868354|1|0|False
|
36 |
+
596.0|Cytidine|4-amino-1-[3,4-dihydroxy-5-(hydroxymethyl)oxolan-2-yl]pyrimidin-2-one|Nc1ccn(C2OC(CO)C(O)C2O)c(=O)n1|17|243.21900000000002|-2.563|130.82999999999998|8|4|0.5555555555555556|2|2|1|17|8|0.4489304892314893|0.18860344125062514|3.548894614600127|1.6478124261033116|1|0|False
|
37 |
+
2691.0|CGS 21680 HCl|3-[4-[2-[[6-amino-9-[5-(ethylcarbamoyl)-3,4-dihydroxyoxolan-2-yl]purin-2-yl]amino]ethyl]phenyl]propanoic acid|CCNC(=O)C1OC(n2cnc3c(N)nc(NCCc4ccc(CCC(=O)O)cc4)nc32)C(O)C1O|36|499.52800000000025|-0.16439999999999827|197.73999999999995|11|6|0.43478260869565216|10|4|3|36|8|0.21654103037466466|0.1239959413009151|3.773892423920943|0.05996866085087501|0|0|False
|
38 |
+
135402034.0|Guanosine|2-amino-9-[3,4-dihydroxy-5-(hydroxymethyl)oxolan-2-yl]-1H-purin-6-one|Nc1nc2c(ncn2C2OC(CO)C(O)C2O)c(=O)[nH]1|20|283.24399999999997|-2.6867000000000005|159.51|9|5|0.5|2|3|2|20|8|0.3981374161148042|0.16955766100498496|3.720175521674877|1.2488959303664646|0|0|False
|
39 |
+
135402037.0|Inosine|9-[3,4-dihydroxy-5-(hydroxymethyl)oxolan-2-yl]-1H-purin-6-one|O=c1[nH]cnc2c1ncn2C1OC(CO)C(O)C1O|19|268.229|-2.2689|133.49|8|4|0.5|2|3|2|19|8|0.4822946675996651|0.27226976952981763|3.7484460177430403|1.1878722340647632|1|0|False
|
40 |
+
5064.0|Ribavirin|1-[3,4-dihydroxy-5-(hydroxymethyl)oxolan-2-yl]-1,2,4-triazole-3-carboxamide|NC(=O)c1ncn(C2OC(CO)C(O)C2O)n1|17|244.20700000000002|-3.0114999999999994|143.72000000000003|8|4|0.625|3|2|1|17|8|0.44284203167437103|0.19640354038822913|3.8687708687807296|0.6906384280796763|1|0|False
|
41 |
+
5726.0|Zidovudine|1-[4-azido-5-(hydroxymethyl)oxolan-2-yl]-5-methylpyrimidine-2,4-dione|Cc1cn(C2CC(N=[N+]=[N-])C(CO)O2)c(=O)[nH]c1=O|19|267.245|-0.19628000000000012|133.07999999999998|6|2|0.6|3|2|1|19|6|0.4454004294146897|0.3925697774093064|3.849317511398737|0.8935864361663792|1|0|True
|
42 |
+
91302628.0|Sofosbuvir (PSI-7977, GS-7977)|propan-2-yl 2-[[[5-(2,4-dioxopyrimidin-1-yl)-4-fluoro-3-hydroxy-4-methyloxolan-2-yl]methoxy-phenoxyphosphoryl]amino]propanoate|CC(C)OC(=O)C(C)NP(=O)(OCC1OC(n2ccc(=O)[nH]c2=O)C(C)(F)C1O)Oc1ccccc1|36|529.4580000000002|1.6565000000000003|158.17999999999995|10|3|0.5|10|3|2|36|12|0.30528322976796235|0.29165853488216725|4.375073076955571|0.24310435922934737|0|0|False
|
43 |
+
214347.0|Dapivirine (TMC120)|4-[[4-(2,4,6-trimethylanilino)pyrimidin-2-yl]amino]benzonitrile|Cc1cc(C)c(Nc2ccnc(Nc3ccc(C#N)cc3)n2)c(C)c1|25|329.4070000000001|4.760740000000003|73.63|5|2|0.15|4|3|3|25|0|0.7216163650237382|0.8678943244497731|2.2542436522113114|-1.5749294902855966|1|0|False
|
44 |
+
352992.0|Clevudine|1-[3-fluoro-4-hydroxy-5-(hydroxymethyl)oxolan-2-yl]-5-methylpyrimidine-2,4-dione|Cc1cn(C2OC(CO)C(O)C2F)c(=O)[nH]c1=O|18|260.221|-1.5662799999999997|104.54999999999998|6|3|0.6|2|2|1|18|8|0.5958623283349754|0.3413235419835224|3.6783565858417866|0.7867064816243999|1|0|False
|
45 |
+
193962.0|Etravirine (TMC125)|4-[6-amino-5-bromo-2-(4-cyanoanilino)pyrimidin-4-yl]oxy-3,5-dimethylbenzonitrile|Cc1cc(C#N)cc(C)c1Oc1nc(Nc2ccc(C#N)cc2)nc(N)c1Br|28|435.2850000000001|4.717400000000002|120.63999999999999|7|2|0.1|4|3|3|28|0|0.6084537970432328|0.7274550650547328|2.701548964781006|-1.0535754913840394|1|0|False
|
46 |
+
1869.0|N6-methyladenosine (m6A)|2-(hydroxymethyl)-5-[6-(methylamino)purin-9-yl]oxolane-3,4-diol|CNc1ncnc2c1ncn2C1OC(CO)C(O)C1O|20|281.272|-1.5205000000000009|125.55000000000001|9|4|0.5454545454545454|3|3|2|20|8|0.5342275014319904|0.2703216790234994|3.6073528343621897|1.041930725635605|1|0|False
|
47 |
+
248010.0|Cordycepin|2-(6-aminopurin-9-yl)-5-(hydroxymethyl)oxolan-3-ol|Nc1ncnc2c1ncn2C1OC(CO)CC1O|18|251.24599999999998|-0.9508000000000008|119.31000000000002|8|3|0.5|2|3|2|18|6|0.628993302830891|0.36275129625779995|3.6322713415862893|1.3733080654408107|1|0|False
|
48 |
+
4602.0|Osalmid|2-hydroxy-N-(4-hydroxyphenyl)benzamide|O=C(Nc1ccc(O)cc1)c1ccccc1O|17|229.23499999999999|2.3501000000000007|69.56|3|3|0.0|2|2|2|17|0|0.6921374334367943|0.46153746799921075|1.5130819664559425|-0.744133777598706|1|0|False
|
49 |
+
9679.0|4-Amino-5-imidazolecarboxamide|4-amino-1H-imidazole-5-carboxamide|NC(=O)c1nc[nH]c1N|9|126.119|-0.9092000000000005|97.79|3|3|0.0|1|1|1|9|0|0.4539721011557528|0.2552272944309776|2.9954741610717104|-0.5140951039502445|1|0|False
|
50 |
+
135402019.0|2'-Deoxyinosine| 9-[4-hydroxy-5-(hydroxymethyl)oxolan-2-yl]-1H-purin-6-one|O=c1[nH]cnc2c1ncn2C1CC(O)C(CO)O1|18|252.23|-1.2397000000000002|113.26|7|3|0.5|2|3|2|18|6|0.6158237953049276|0.4186149308280351|3.709847812174523|0.9179637393863056|1|0|False
|
51 |
+
72830388.0|Valganciclovir HCl|[2-[(2-amino-6-oxo-4,5-dihydro-1H-purin-9-yl)methoxy]-3-hydroxypropyl] 2-amino-3-methylbutanoate|CC(C)C(N)C(=O)OCC(CO)OCN1C=NC2C(=O)NC(N)=NC21|25|356.38300000000004|-2.668999999999996|164.86|10|4|0.7142857142857143|8|2|0|25|8|0.3418489997940892|0.10362369303515996|4.612915133583076|0.8576886024449053|0|0|False
|
52 |
+
135398748.0|Penciclovir|2-amino-9-[4-hydroxy-3-(hydroxymethyl)butyl]-1H-purin-6-one|Nc1nc(=O)c2ncn(CCC(CO)CO)c2[nH]1|18|253.26199999999997|-1.3073|130.05|7|4|0.5|5|2|2|18|0|0.5232780259906572|0.36417216397814534|2.885575425991915|-0.025008256510316646|1|0|False
|
53 |
+
135403646.0|Azaguanine-8|5-amino-2,6-dihydrotriazolo[4,5-d]pyrimidin-7-one|Nc1nc2[nH]nnc2c(=O)[nH]1|11|152.11700000000002|-1.3766000000000003|113.34|5|3|0.0|0|2|2|11|0|0.4303161239320503|0.4073247679104095|3.506466837537925|-0.6790146471510455|1|0|False
|
54 |
+
22138239.0|Ademetionine disulfate tosylate|2-amino-4-[[5-(6-aminopurin-9-yl)-3,4-dihydroxyoxolan-2-yl]methyl-methylsulfonio]butanoate|C[S+](CCC(N)C(=O)[O-])CC1OC(n2cnc3c(N)ncnc32)C(O)C1O|27|398.4450000000001|-3.256899999999996|185.45999999999998|11|4|0.6|7|3|2|27|12|0.34496993620130295|0.2235483545193125|4.7565494434378035|0.9079142947079224|0|0|False
|
55 |
+
3203.0|Efavirenz|6-chloro-4-(2-cyclopropylethynyl)-4-(trifluoromethyl)-1H-3,1-benzoxazin-2-one|O=C1Nc2ccc(Cl)cc2C(C#CC2CC2)(C(F)(F)F)O1|21|315.67799999999994|4.073100000000001|38.33|2|1|0.35714285714285715|0|3|1|21|2|0.7328090954055055|0.2532354094918868|3.5657078249547665|0.06666647788737144|1|0|False
|
56 |
+
72661.0|Nicotinamide N-oxide|1-oxidopyridin-1-ium-3-carboxamide|NC(=O)c1ccc[n+]([O-])c1|10|138.12599999999998|-0.5811|70.03|2|1|0.0|1|1|1|10|0|0.4162538873709143|0.23477866525463617|2.587100135906283|-1.3023827034912496|1|0|False
|
57 |
+
1134.0|Thymidine|1-[4-hydroxy-5-(hydroxymethyl)oxolan-2-yl]-5-methylpyrimidine-2,4-dione|Cc1cn(C2CC(O)C(CO)O2)c(=O)[nH]c1=O|17|242.23099999999997|-1.5142799999999992|104.54999999999998|6|3|0.6|2|2|1|17|6|0.5842742137821719|0.33499679157314843|3.3763667036123843|1.0380300294610116|1|0|False
|
58 |
+
1177.0|Uracil 1-?-D-arabinofuranoside|1-[3,4-dihydroxy-5-(hydroxymethyl)oxolan-2-yl]pyrimidine-2,4-dione|O=c1ccn(C2OC(CO)C(O)C2O)c(=O)[nH]1|17|244.20300000000003|-2.8519|124.78|7|4|0.5555555555555556|2|2|1|17|8|0.4435105912731592|0.21867146597756254|3.4839049824931045|1.4898002285532645|1|0|False
|
59 |
+
98961.0|2,2'-Cyclouridine|5-hydroxy-4-(hydroxymethyl)-3,7-dioxa-1,9-diazatricyclo[6.4.0.02,6]dodeca-8,11-dien-10-one|O=c1ccn2c(n1)OC1C(O)C(CO)OC12|16|226.18800000000002|-1.7451000000000005|93.81000000000002|7|2|0.5555555555555556|1|3|1|16|8|0.589765502233582|0.30625756971028406|4.082753096736665|1.3490016836473129|1|0|False
|
60 |
+
5353599.0|Brivudine|5-[(E)-2-bromoethenyl]-1-[4-hydroxy-5-(hydroxymethyl)oxolan-2-yl]pyrimidine-2,4-dione|O=c1[nH]c(=O)n(C2CC(O)C(CO)O2)cc1C=CBr|19|333.13800000000003|-0.45710000000000006|104.55000000000001|6|3|0.45454545454545453|3|2|1|19|6|0.6945612204028229|0.40567778230739426|3.850417215540748|1.1374943613292103|1|0|False
|
61 |
+
23700083.0|Ganciclovir sodium|2-amino-9-(1,3-dihydroxypropan-2-yloxymethyl)purin-6-olate|Nc1nc([O-])c2ncn(COC(CO)CO)c2n1|18|254.22599999999997|-2.1905999999999994|142.37|9|3|0.4444444444444444|5|2|2|18|0|0.545640459838781|0.34612172025925764|3.4886751182996063|0.03842276244240006|1|0|False
|
62 |
+
5136.0|Ademetionine|2-amino-4-[[5-(6-aminopurin-9-yl)-3,4-dihydroxyoxolan-2-yl]methyl-methylsulfonio]butanoate|C[S+](CCC(N)C(=O)[O-])CC1OC(n2cnc3c(N)ncnc32)C(O)C1O|27|398.4450000000001|-3.256899999999996|185.45999999999998|11|4|0.6|7|3|2|27|12|0.34496993620130295|0.2235483545193125|4.7565494434378035|0.9079142947079224|0|0|False
|
63 |
+
1971.0|Abacavir|[4-[2-amino-6-(cyclopropylamino)purin-9-yl]cyclopent-2-en-1-yl]methanol|Nc1nc(NC2CC2)c2ncn(C3C=CC(CO)C3)c2n1|21|286.33900000000006|1.0922999999999998|101.88|7|3|0.5|4|4|2|21|4|0.7272302824304794|0.532844209200051|3.688999973120933|0.026597937866581003|1|0|False
|
64 |
+
135605572.0|Entecavir|2-amino-9-[4-hydroxy-3-(hydroxymethyl)-2-methylidenecyclopentyl]-1H-purin-6-one|C=C1C(CO)C(O)CC1n1cnc2c(=O)nc(N)[nH]c21|20|277.28399999999993|-0.8278000000000001|130.04999999999998|7|4|0.4166666666666667|2|3|2|20|6|0.5301148369563783|0.3229952345490085|4.052892286880503|0.9583882509483649|1|0|False
|
65 |
+
224.0|Adenosine 5'-monophosphate monohydrate|[5-(6-aminopurin-9-yl)-3,4-dihydroxyoxolan-2-yl]methyl dihydrogen phosphate|Nc1ncnc2c1ncn2C1OC(COP(=O)(O)O)C(O)C1O|23|347.224|-1.8630000000000009|186.07|10|5|0.5|4|3|2|23|8|0.39017854457244017|0.17004544392240475|3.8048894493072236|1.2966757882455953|0|0|False
|
66 |
+
191.0|Vidarabine monohydrate|2-(6-aminopurin-9-yl)-5-(hydroxymethyl)oxolane-3,4-diol|Nc1ncnc2c1ncn2C1OC(CO)C(O)C1O|19|267.245|-1.9800000000000006|139.54000000000002|9|4|0.5|2|3|2|19|8|0.49051986767922323|0.23617270345702296|3.5313095881349117|1.3138998457117157|1|0|False
|
67 |
+
44399265.0|PSI-6206 (RO-2433, GS-331007)|1-[3-fluoro-4-hydroxy-5-(hydroxymethyl)-3-methyloxolan-2-yl]pyrimidine-2,4-dione|CC1(F)C(O)C(CO)OC1n1ccc(=O)[nH]c1=O|18|260.221|-1.4846000000000001|104.54999999999998|6|3|0.6|2|2|1|18|8|0.6009771935170747|0.3417002787027479|3.9142777396879405|1.009482945923539|1|0|False
|
68 |
+
22451303.0|Regadenoson|1-[6-amino-9-[3,4-dihydroxy-5-(hydroxymethyl)oxolan-2-yl]purin-2-yl]-N-methylpyrazole-4-carboxamide|CNC(=O)c1cnn(-c2nc(N)c3ncn(C4OC(CO)C(O)C4O)c3n2)c1|28|390.36000000000007|-2.4346999999999985|186.45999999999998|12|5|0.4|4|4|3|28|8|0.32033113268604546|0.16974155969442686|3.8655802133882755|-0.1474028333819392|0|0|False
|
69 |
+
56640146.0|Dasabuvir(ABT-333)|N-[6-[3-tert-butyl-5-(2,4-dioxopyrimidin-1-yl)-2-methoxyphenyl]naphthalen-2-yl]methanesulfonamide|COc1c(-c2ccc3cc(NS(C)(=O)=O)ccc3c2)cc(-n2ccc(=O)[nH]c2=O)cc1C(C)(C)C|35|493.58500000000026|4.023600000000002|110.25999999999999|6|2|0.23076923076923078|5|4|4|35|0|0.4363022246167922|0.771847321512829|2.7135256012508666|-0.6786278979178144|1|1|False
|
70 |
+
4483256.0|Cimicifugoside|[2-hydroxy-1,4',6',12',17',17'-hexamethyl-18'-(3,4,5-trihydroxyoxan-2-yl)oxyspiro[3,6-dioxabicyclo[3.1.0]hexane-4,8'-9-oxahexacyclo[11.9.0.01,21.04,12.05,10.016,21]docos-13-ene]-3'-yl] acetate|CC(=O)OC1CC23CC24CCC(OC2OCC(O)C(O)C2O)C(C)(C)C4CC=C3C2(C)CC3OC4(CC(C)C3C12C)OC(O)C1(C)OC41|48|674.8280000000003|2.9487000000000005|156.67000000000002|11|4|0.918918918918919|3|9|0|48|36|0.1983763747315937|0.09644092817770795|7.190522840793108|3.4998626023441517|0|0|False
|
71 |
+
640.0|2’-deoxyuridine|1-[4-hydroxy-5-(hydroxymethyl)oxolan-2-yl]pyrimidine-2,4-dione|O=c1ccn(C2CC(O)C(CO)O2)c(=O)[nH]1|16|228.20399999999998|-1.8226999999999993|104.55000000000001|6|3|0.5555555555555556|2|2|1|16|6|0.552382719533219|0.32881089343575925|3.4575535682360012|1.247112165436718|1|0|False
|
72 |
+
596.0|Cytarabine hydrochloride|4-amino-1-[3,4-dihydroxy-5-(hydroxymethyl)oxolan-2-yl]pyrimidin-2-one|Nc1ccn(C2OC(CO)C(O)C2O)c(=O)n1|17|243.21900000000002|-2.563|130.82999999999998|8|4|0.5555555555555556|2|2|1|17|8|0.4489304892314893|0.18860344125062514|3.548894614600127|1.6478124261033116|1|0|False
|
73 |
+
5375662.0|trans-Zeatin-riboside|2-(hydroxymethyl)-5-[6-[[(E)-4-hydroxy-3-methylbut-2-enyl]amino]purin-9-yl]oxolane-3,4-diol|CC(=CCNc1ncnc2c1ncn2C1OC(CO)C(O)C1O)CO|25|351.36300000000006|-1.2117|145.78|10|5|0.5333333333333333|6|3|2|25|8|0.40449115551681625|0.20327430040647024|3.9559301432792626|1.339358707119148|1|0|False
|
74 |
+
1835.0|5-Methyl-2'-deoxycytidine|4-amino-1-[4-hydroxy-5-(hydroxymethyl)oxolan-2-yl]-5-methylpyrimidin-2-one|Cc1cn(C2CC(O)C(CO)O2)c(=O)nc1N|17|241.24699999999996|-1.2253799999999995|110.6|7|3|0.6|2|2|1|17|6|0.5979214123745736|0.29479101867497837|3.566450315652517|1.1019204230282296|1|0|False
|
75 |
+
53398647.0|Cytidine 5?-triphosphate (disodium salt)|[[[5-(4-amino-2-oxopyrimidin-1-yl)-3,4-dihydroxyoxolan-2-yl]methoxy-hydroxyphosphoryl]oxy-oxidophosphoryl] hydrogen phosphate |Nc1ccn(C2OC(COP(=O)(O)OP(=O)([O-])OP(=O)([O-])O)C(O)C2O)c(=O)n1|29|481.14000000000016|-3.4759999999999986|276.0799999999999|15|5|0.5555555555555556|8|2|1|29|14|0.22541264416469642|0.1419968753465064|5.071692352902783|1.1942632211265276|0|0|False
|
76 |
+
262543.0|5-Methylcytidine|4-amino-1-[3,4-dihydroxy-5-(hydroxymethyl)oxolan-2-yl]-5-methylpyrimidin-2-one|Cc1cn(C2OC(CO)C(O)C2O)c(=O)nc1N|18|257.246|-2.2545799999999994|130.83|8|4|0.6|2|2|1|18|8|0.4701900203991378|0.19188032083627155|3.6185412012264013|1.2991831736939943|1|0|False
|
77 |
+
249989.0|5-Methyluridine|1-[3,4-dihydroxy-5-(hydroxymethyl)oxolan-2-yl]-5-methylpyrimidine-2,4-dione|Cc1cn(C2OC(CO)C(O)C2O)c(=O)[nH]c1=O|18|258.23|-2.5434799999999993|124.78|7|4|0.6|2|2|1|18|8|0.4628801121855684|0.22236210404580592|3.4482133166110174|1.2290593987107328|1|0|False
|
78 |
+
13401.0|3-Hydroxypicolinic acid|3-hydroxypyridine-2-carboxylic acid|O=C(O)c1ncccc1O|10|139.10999999999999|0.4853999999999999|70.42|3|2|0.0|1|1|1|10|0|0.5933874162122502|0.298934256110774|2.01811039231654|-0.41014214009500005|1|0|False
|
79 |
+
4349538.0|NADH, disodium salt hydrate|[[5-(6-aminopurin-9-yl)-3,4-dihydroxyoxolan-2-yl]methoxy-oxidophosphoryl] [5-(3-carbamoyl-4H-pyridin-1-yl)-3,4-dihydroxyoxolan-2-yl]methyl phosphate|NC(=O)C1=CN(C2OC(COP(=O)([O-])OP(=O)([O-])OCC3OC(n4cnc5c(N)ncnc54)C(O)C3O)C(O)C2O)C=CC1|44|663.4300000000004|-3.950399999999993|323.2800000000001|20|6|0.5238095238095238|11|5|2|44|20|0.12490354999455602|0.11711205769249591|5.678699754875055|0.6523253136204249|0|0|False
|
80 |
+
5625.0|Delavirdine (mesylate)|N-[2-[4-[3-(propan-2-ylamino)pyridin-2-yl]piperazine-1-carbonyl]-1H-indol-5-yl]methanesulfonamide|CC(C)Nc1cccnc1N1CCN(C(=O)c2cc3cc(NS(C)(=O)=O)ccc3[nH]2)CC1|32|456.5720000000002|2.7171000000000003|110.43|6|3|0.36363636363636365|6|4|3|32|0|0.5260863337385334|0.7849383937725847|2.5284878962420905|-1.7469754852284307|1|0|False
|
81 |
+
254731.0|uridine triacetate|[3,4-diacetyloxy-5-(2,4-dioxopyrimidin-1-yl)oxolan-2-yl]methyl acetate|CC(=O)OCC1OC(n2ccc(=O)[nH]c2=O)C(OC(C)=O)C1OC(C)=O|26|370.31400000000014|-1.1394999999999988|142.99|10|1|0.5333333333333333|5|2|1|26|8|0.5022752412372868|0.27310239595191593|3.644050594368478|1.1717654190198035|0|0|False
|
82 |
+
58460047.0|Doravirine (MK-1439)|3-chloro-5-[1-[(4-methyl-5-oxo-1H-1,2,4-triazol-3-yl)methyl]-2-oxo-4-(trifluoromethyl)pyridin-3-yl]oxybenzonitrile|Cn1c(Cn2ccc(C(F)(F)F)c(Oc3cc(Cl)cc(C#N)c3)c2=O)n[nH]c1=O|29|425.7540000000001|2.654580000000001|105.69999999999999|7|1|0.17647058823529413|4|3|3|29|0|0.6914051268589833|0.7336281656644693|3.0219252915309625|-1.6455219788390623|1|0|False
|
83 |
+
76450047.0|SGC 0946|1-[3-[[5-(4-amino-5-bromopyrrolo[2,3-d]pyrimidin-7-yl)-3,4-dihydroxyoxolan-2-yl]methyl-propan-2-ylamino]propyl]-3-(4-tert-butylphenyl)urea|CC(C)N(CCCNC(=O)Nc1ccc(C(C)(C)C)cc1)CC1OC(n2cc(Br)c3c(N)ncnc32)C(O)C1O|40|618.5770000000002|3.614900000000002|150.79|9|5|0.5357142857142857|9|4|3|40|8|0.22841744599826647|0.22982258021111773|4.078247297329462|-0.7007523337719948|0|0|False
|
84 |
+
328839.0|Zebularine|1-[3,4-dihydroxy-5-(hydroxymethyl)oxolan-2-yl]pyrimidin-2-one|O=c1ncccn1C1OC(CO)C(O)C1O|16|228.204|-2.1451999999999996|104.81000000000002|7|3|0.5555555555555556|2|2|1|16|8|0.5330176479739586|0.29115142413250616|3.5563030967366647|0.9772741509174374|1|0|False
|
85 |
+
6451164.0|Rilpivirine|4-[[4-[4-[(E)-2-cyanoethenyl]-2,6-dimethylanilino]pyrimidin-2-yl]amino]benzonitrile|Cc1cc(C=CC#N)cc(C)c1Nc1ccnc(Nc2ccc(C#N)cc2)n1|28|366.4280000000001|4.989100000000003|97.42|6|2|0.09090909090909091|5|3|3|28|0|0.6174794624332792|0.8431090497228149|2.682510236970355|-1.1293515643212821|1|0|False
|
86 |
+
76450046.0|EPZ004777|1-[3-[[5-(4-aminopyrrolo[2,3-d]pyrimidin-7-yl)-3,4-dihydroxyoxolan-2-yl]methyl-propan-2-ylamino]propyl]-3-(4-tert-butylphenyl)urea|CC(C)N(CCCNC(=O)Nc1ccc(C(C)(C)C)cc1)CC1OC(n2ccc3c(N)ncnc32)C(O)C1O|39|539.6810000000004|2.8524000000000007|150.79|9|5|0.5357142857142857|9|4|3|39|8|0.26053822184509995|0.24195225681633897|3.956960961496261|-0.6697887882081127|0|0|False
|
87 |
+
4984.0|Puromycin 2HCl|2-amino-N-[5-[6-(dimethylamino)purin-9-yl]-4-hydroxy-2-(hydroxymethyl)oxolan-3-yl]-3-(4-methoxyphenyl)propanamide|COc1ccc(CC(N)C(=O)NC2C(CO)OC(n3cnc4c(N(C)C)ncnc43)C2O)cc1|34|471.5180000000002|-0.7936999999999972|160.88|11|4|0.45454545454545453|8|4|3|34|10|0.32750374532544374|0.2740730496068429|3.938746143561504|0.4059082870695618|0|0|False
|
88 |
+
5468049.0|Triapine|[(Z)-(3-aminopyridin-2-yl)methylideneamino]thiourea|NC(=S)NN=Cc1ncccc1N|13|195.251|-0.16910000000000014|89.32|4|3|0.0|2|1|1|13|0|0.3461858313292753|0.3468357875734713|2.880743596162672|-1.803547330412908|1|0|False
|
89 |
+
74405855.0|VER155008|4-[[5-[6-amino-8-[(3,4-dichlorophenyl)methylamino]purin-9-yl]-3,4-dihydroxyoxolan-2-yl]methoxymethyl]benzonitrile|N#Cc1ccc(COCC2OC(n3c(NCc4ccc(Cl)c(Cl)c4)nc4c(N)ncnc43)C(O)C2O)cc1|38|556.4100000000003|3.0350800000000007|164.35999999999999|11|4|0.28|8|5|4|38|8|0.2535931056491399|0.25407821914119455|3.881323921608492|-0.48547704742053677|0|0|False
|
90 |
+
3032861.0|6-Thio-dG|2-amino-9-[4-hydroxy-5-(hydroxymethyl)oxolan-2-yl]-3H-purine-6-thione|Nc1nc(=S)c2ncn(C3CC(O)C(CO)O3)c2[nH]1|19|283.313|-0.28821000000000024|122.21000000000001|8|4|0.5|2|3|2|19|6|0.5585666589379309|0.29376434618526803|3.8831343306462536|0.7695013335480106|1|0|False
|
91 |
+
23712387.0|8-Bromo-cAMP|6-(6-amino-8-bromopurin-9-yl)-2-oxido-2-oxo-4a,6,7,7a-tetrahydro-4H-furo[3,2-d][1,3,2]dioxaphosphinin-7-ol|Nc1ncnc2c1nc(Br)n2C1OC2COP(=O)([O-])OC2C1O|23|407.0970000000001|-0.6869000000000003|157.67|11|2|0.5|1|4|2|23|10|0.4580747351009518|0.28611028834400404|4.886716744658878|0.5502166703091175|0|0|False
|
92 |
+
16760396.0|Dibutyryl-cAMP (Bucladesine)|[6-[6-(butanoylamino)purin-9-yl]-2-oxido-2-oxo-4a,6,7,7a-tetrahydro-4H-furo[3,2-d][1,3,2]dioxaphosphinin-7-yl] butanoate|CCCC(=O)Nc1ncnc2c1ncn2C1OC2COP(=O)([O-])OC2C1OC(=O)CCC|32|468.3830000000003|1.057999999999999|166.82|12|1|0.6111111111111112|7|4|2|32|10|0.45563025867212836|0.35203094284843184|4.558877319850212|0.39303810441434994|0|0|False
|
93 |
+
236184.0|Bromodeoxyuridine (BrdU)|5-bromo-1-[4-hydroxy-5-(hydroxymethyl)oxolan-2-yl]pyrimidine-2,4-dione|O=c1[nH]c(=O)n(C2CC(O)C(CO)O2)cc1Br|17|307.1|-1.0602|104.55000000000001|6|3|0.5555555555555556|2|2|1|17|6|0.6458903815058055|0.3614417237796252|3.5611586768565298|0.865449848697765|1|0|False
|
94 |
+
409805.0|NSC 23766|6-N-[2-[5-(diethylamino)pentan-2-ylamino]-6-methylpyrimidin-4-yl]-2-methylquinoline-4,6-diamine|CCN(CC)CCCC(C)Nc1nc(C)cc(Nc2ccc3nc(C)cc(N)c3c2)n1|31|421.5930000000002|4.889840000000003|91.99|7|3|0.4583333333333333|10|3|3|31|2|0.42909503459369336|0.5500110053919122|3.111060885927593|-1.299117956324184|1|0|False
|
95 |
+
5604.0|Tubercidin|2-(4-aminopyrrolo[2,3-d]pyrimidin-7-yl)-5-(hydroxymethyl)oxolane-3,4-diol|Nc1ncnc2c1ccn2C1OC(CO)C(O)C1O|19|266.25699999999995|-1.3750000000000007|126.65|8|4|0.45454545454545453|2|3|2|19|8|0.5275658981283201|0.2793960532890429|3.649636728918657|1.0957341600283055|1|0|False
|
96 |
+
5270.0|SQ22536|9-(oxolan-2-yl)purin-6-amine|Nc1ncnc2c1ncn2C1CCCO1|15|205.22099999999995|0.7174999999999998|78.85000000000001|6|1|0.4444444444444444|1|3|2|15|2|0.7415418091420238|0.5011659803359577|2.957435140772226|0.09555946645130006|1|0|False
|
97 |
+
1830.0|5-Iodotubercidin|2-(4-amino-5-iodopyrrolo[2,3-d]pyrimidin-7-yl)-5-(hydroxymethyl)oxolane-3,4-diol|Nc1ncnc2c1c(I)cn2C1OC(CO)C(O)C1O|20|392.1530000000001|-0.7703999999999998|126.65|8|4|0.45454545454545453|2|3|2|20|8|0.4946949398053289|0.31898773411320597|3.8417019552413096|0.9361303704689201|1|0|False
|
98 |
+
137795344.0|LLY-284|2-(4-aminopyrrolo[2,3-d]pyrimidin-7-yl)-5-[hydroxy(phenyl)methyl]oxolane-3,4-diol|Nc1ncnc2c1ccn2C1OC(C(O)c2ccccc2)C(O)C1O|25|342.355|0.3663000000000001|126.65|8|4|0.29411764705882354|3|4|3|25|10|0.5413254255627207|0.37218328519283805|3.804013205830268|0.7875002540522721|1|0|False
|
99 |
+
22608122.0|A-317491|5-[(3-phenoxyphenyl)methyl-(1,2,3,4-tetrahydronaphthalen-1-yl)carbamoyl]benzene-1,2,4-tricarboxylic acid|O=C(O)c1cc(C(=O)O)c(C(=O)N(Cc2cccc(Oc3ccccc3)c2)C2CCCc3ccccc32)cc1C(=O)O|42|565.5780000000003|6.293500000000005|141.43999999999997|5|3|0.15151515151515152|9|5|4|42|2|0.21488624386430227|0.5815074358568025|3.049057713232612|-0.6471647529123546|0|1|False
|
100 |
+
135402018.0|2'-Deoxyguanosine monohydrate|2-amino-9-[4-hydroxy-5-(hydroxymethyl)oxolan-2-yl]-1H-purin-6-one|Nc1nc2c(ncn2C2CC(O)C(CO)O2)c(=O)[nH]1|19|267.24499999999995|-1.6575000000000002|139.28|8|4|0.5|2|3|2|19|6|0.5103728382686842|0.269347900063449|3.674451892329852|0.9964049194238052|1|0|False
|
101 |
+
3479482.0|Khasianine|2-[4,5-dihydroxy-2-(hydroxymethyl)-6-(5',7,9,13-tetramethylspiro[5-oxapentacyclo[10.8.0.02,9.04,8.013,18]icos-18-ene-6,2'-piperidine]-16-yl)oxyoxan-3-yl]oxy-6-methyloxane-3,4,5-triol|CC1CCC2(NC1)OC1CC3C4CC=C5CC(OC6OC(CO)C(OC7OC(C)C(O)C(O)C7O)C(O)C6O)CCC5(C)C4CCC3(C)C1C2C|51|721.9289999999999|1.9629000000000019|179.56|12|7|0.9487179487179487|5|8|0|51|42|0.20575693831306613|0.05320714136035518|6.299089879544575|2.778465708811112|0|0|False
|
102 |
+
223996.0|Isoguanosine|6-amino-9-[3,4-dihydroxy-5-(hydroxymethyl)oxolan-2-yl]-1H-purin-2-one|Nc1nc(=O)[nH]c2c1ncn2C1OC(CO)C(O)C1O|20|283.244|-2.6866999999999996|159.51|9|5|0.5|2|3|2|20|8|0.39813741611480424|0.16955766100498496|3.9266040531434063|1.0924191136221846|0|0|False
|
103 |
+
636.0|2'-Deoxyadenosine monohydrate|5-(6-aminopurin-9-yl)-2-(hydroxymethyl)oxolan-3-ol|Nc1ncnc2c1ncn2C1CC(O)C(CO)O1|18|251.24599999999998|-0.9508000000000003|119.31000000000002|8|3|0.5|2|3|2|18|6|0.628993302830891|0.36275129625779995|3.4841962284641177|1.0509928850136445|1|0|False
|
104 |
+
224.0|5'-Adenylic acid|[5-(6-aminopurin-9-yl)-3,4-dihydroxyoxolan-2-yl]methyl dihydrogen phosphate|Nc1ncnc2c1ncn2C1OC(COP(=O)(O)O)C(O)C1O|23|347.224|-1.8630000000000009|186.07|10|5|0.5|4|3|2|23|8|0.39017854457244017|0.17004544392240475|3.8048894493072236|1.2966757882455953|0|0|False
|
105 |
+
197.0|ADP|[5-(6-aminopurin-9-yl)-3,4-dihydroxyoxolan-2-yl]methyl phosphono hydrogen phosphate|Nc1ncnc2c1ncn2C1OC(COP(=O)(O)OP(=O)(O)O)C(O)C1O|27|427.20300000000003|-1.7460000000000009|232.59999999999997|12|6|0.5|6|3|2|27|10|0.28079075856620234|0.1854502601232538|4.125297661526227|1.3883535027781002|0|0|False
|
106 |
+
1189.0|Xanthosine Dihydrate|9-[3,4-dihydroxy-5-(hydroxymethyl)oxolan-2-yl]-3H-purine-2,6-dione|O=c1[nH]c(=O)c2ncn(C3OC(CO)C(O)C3O)c2[nH]1|20|284.228|-2.9755999999999996|153.46|8|5|0.5|2|3|2|20|8|0.39414005568585614|0.19608362822222536|3.7807159412552958|1.01042710944925|1|0|False
|
107 |
+
1172.0|Uridine 5'-monophosphate| [5-(2,4-dioxopyrimidin-1-yl)-3,4-dihydroxyoxolan-2-yl]methyl dihydrogen phosphate|O=c1ccn(C2OC(COP(=O)(O)O)C(O)C2O)c(=O)[nH]1|21|324.182|-2.7349|171.31|8|5|0.5555555555555556|4|2|1|21|8|0.3684552666679203|0.15597877417689962|3.7734762637985257|1.4404129497273093|0|0|False
|
108 |
+
5270766.0|Besifovir| [1-[(2-aminopurin-9-yl)methyl]cyclopropyl]oxymethylphosphonic acid|Nc1ncc2ncn(CC3(OCP(=O)(O)O)CC3)c2n1|20|299.2270000000001|0.09289999999999987|136.38|7|3|0.5|5|3|2|20|0|0.6607224923488081|0.4978291568100905|3.3061717394473806|-0.07424561820598499|1|0|False
|
109 |
+
135398661.0|3-Methyladenine (3-MA)|3-methyl-7H-purin-6-imine|Cn1cnc(N)c2ncnc1-2|11|149.15699999999998|-0.1029000000000001|69.62|5|1|0.16666666666666666|0|2|0|11|0|0.56733716665046|0.2032784259567452|2.723105050937429|-0.6977762718276544|1|0|False
|
110 |
+
1188.0|2,6-Dihydroxypurine|3,7-dihydropurine-2,6-dione|O=c1[nH]c(=O)c2[nH]cnc2[nH]1|11|152.113|-1.0605000000000004|94.4|3|3|0.0|0|2|2|11|0|0.44866812387763694|0.35339783622062465|2.855621676247601|-0.5009022124224091|1|0|False
|
111 |
+
93556.0|Dihydrothymine|5-methyl-1,3-diazinane-2,4-dione|CC1CNC(=O)NC1=O|9|128.131|-0.5381|58.2|2|2|0.6|0|1|0|9|2|0.46007898954400794|0.1326482996202467|3.3119003089141454|0.44655419103448896|1|0|False
|
112 |
+
667490.0|6-Mercaptopurine (6-MP) Monohydrate|3,7-dihydropurine-6-thione|S=c1nc[nH]c2nc[nH]c12|10|152.182|1.0154899999999998|57.36|3|2|0.0|0|2|2|10|0|0.5538546640663037|0.401710178865939|3.3698736107073435|-0.5813401541192601|1|0|False
|
113 |
+
5359277.0|6-Chloropurine|6-chloro-7H-purine|Clc1ncnc2[nH]cnc12|10|154.56|1.0062999999999998|54.46|3|1|0.0|0|2|2|10|0|0.5764610700745062|0.39268763760375136|2.8791181730415616|-0.8371085619823599|1|0|False
|
114 |
+
3758.0|IBMX|1-methyl-3-(2-methylpropyl)-7H-purine-2,6-dione|CC(C)Cn1c(=O)n(C)c(=O)c2nc[nH]c21|16|222.248|0.07929999999999976|72.68|5|1|0.5|2|2|2|16|0|0.7816409579492779|0.5763174668836328|2.741826113988761|-0.5844837553057187|1|0|False
|
115 |
+
3134.0|6-(Dimethylamino)purine|N,N-dimethyl-7H-purin-6-amine|CN(C)c1ncnc2nc[nH]c12|12|163.18400000000003|0.4189|57.7|4|1|0.2857142857142857|1|2|2|12|0|0.6614854966833106|0.4787635273265147|2.72771441208657|-1.0546352869607836|1|0|False
|
116 |
+
66950.0|Isocytosine|2-amino-1H-pyrimidin-6-one|Nc1nccc(=O)[nH]1|8|111.10399999999998|-0.6479000000000001|71.77|3|2|0.0|0|1|1|8|0|0.4670416161550293|0.27448926713621147|2.683688749895339|-0.2670902200110874|1|0|False
|
117 |
+
940.0|Nicotinamide Riboside Chloride (NIAGEN)|1-[3,4-dihydroxy-5-(hydroxymethyl)oxolan-2-yl]pyridin-1-ium-3-carboxamide|NC(=O)c1ccc[n+](C2OC(CO)C(O)C2O)c1|18|255.25|-2.3154999999999983|116.89000000000001|5|4|0.45454545454545453|3|2|1|18|8|0.45250669782783515|0.2942152228724691|3.86876227657962|0.937203306924639|1|0|False
|
118 |
+
537159.0|Solasonine|2-[5-hydroxy-6-(hydroxymethyl)-2-(5',7,9,13-tetramethylspiro[5-oxapentacyclo[10.8.0.02,9.04,8.013,18]icos-18-ene-6,2'-piperidine]-16-yl)oxy-4-[3,4,5-trihydroxy-6-(hydroxymethyl)oxan-2-yl]oxyoxan-3-yl]oxy-6-methyloxane-3,4,5-triol|CC1CCC2(NC1)OC1CC3C4CC=C5CC(OC6OC(CO)C(O)C(OC7OC(CO)C(O)C(O)C7O)C6OC6OC(C)C(O)C(O)C6O)CCC5(C)C4CCC3(C)C1C2C|62|884.07|-0.2128999999999921|258.71|17|10|0.9555555555555556|8|9|0|62|52|0.14154743306960962|0.051891635005697746|6.761253268994176|2.31431842126156|0|0|False
|
119 |
+
437080.0|Solamargine|2-[4-hydroxy-2-(hydroxymethyl)-6-(5',7,9,13-tetramethylspiro[5-oxapentacyclo[10.8.0.02,9.04,8.013,18]icos-18-ene-6,2'-piperidine]-16-yl)oxy-5-(3,4,5-trihydroxy-6-methyloxan-2-yl)oxyoxan-3-yl]oxy-6-methyloxane-3,4,5-triol|CC1CCC2(NC1)OC1CC3C4CC=C5CC(OC6OC(CO)C(OC7OC(C)C(O)C(O)C7O)C(O)C6OC6OC(C)C(O)C(O)C6O)CCC5(C)C4CCC3(C)C1C2C|61|868.071|0.8147000000000062|238.47999999999996|16|9|0.9555555555555556|7|9|0|61|52|0.16112638285309447|0.05545407431461643|6.750529522128671|2.3842641223563397|0|0|False
|
120 |
+
240980.0|2'-Deoxy-5-Fluorocytidine|4-amino-5-fluoro-1-[4-hydroxy-5-(hydroxymethyl)oxolan-2-yl]pyrimidin-2-one|Nc1nc(=O)n(C2CC(O)C(CO)O2)cc1F|17|245.20999999999998|-1.3947000000000003|110.60000000000001|7|3|0.5555555555555556|2|2|1|17|6|0.5910181091914157|0.2952824050893773|3.606938275518738|0.7886448709701591|1|0|False
|
121 |
+
284240.0|3'-Fluoro-3'-deoxythymidine (Alovudine)|1-[4-fluoro-5-(hydroxymethyl)oxolan-2-yl]-5-methylpyrimidine-2,4-dione|Cc1cn(C2CC(F)C(CO)O2)c(=O)[nH]c1=O|17|244.22199999999998|-0.53708|84.32|5|2|0.6|2|2|1|17|6|0.731622172408428|0.41646450691211045|3.586677740401681|0.507872196497247|1|0|False
|
122 |
+
137795344.0|LLY-283|2-(4-aminopyrrolo[2,3-d]pyrimidin-7-yl)-5-[hydroxy(phenyl)methyl]oxolane-3,4-diol|Nc1ncnc2c1ccn2C1OC(C(O)c2ccccc2)C(O)C1O|25|342.355|0.3663000000000001|126.65|8|4|0.29411764705882354|3|4|3|25|10|0.5413254255627207|0.37218328519283805|3.804013205830268|0.7875002540522721|1|0|False
|
123 |
+
155886644.0|EIDD-2801|[3,4-dihydroxy-5-[4-(hydroxyamino)-2-oxopyrimidin-1-yl]oxolan-2-yl]methyl 2-methylpropanoate|CC(C)C(=O)OCC1OC(n2ccc(=NO)[nH]c2=O)C(O)C1O|23|329.309|-1.7152999999999974|146.37|9|4|0.6153846153846154|4|2|1|23|8|0.29191347249939087|0.20002777173354394|4.189860275255468|1.2390387878063869|1|0|False
|
124 |
+
1651.0|3-Deazaadenosine hydrochloride|2-(4-aminoimidazo[4,5-c]pyridin-1-yl)-5-(hydroxymethyl)oxolane-3,4-diol |Nc1nccc2c1ncn2C1OC(CO)C(O)C1O|19|266.257|-1.3750000000000007|126.65|8|4|0.45454545454545453|2|3|2|19|8|0.5275658981283201|0.2793960532890429|3.656019022097176|1.2390070639558106|1|0|False
|
125 |
+
|Uridine 5-diphosphoglucose disodium salt|[[(2R,3S,4R,5R)-5-(2,4-dioxopyrimidin-1-yl)-3,4-dihydroxyoxolan-2-yl]methoxy-[(2R,3S,4S,5R)-1,3,4,5-tetrahydroxy-6-oxohexan-2-yl]oxyphosphoryl] phosphate|O=CC(O)C(O)C(O)C(CO)OP(=O)(OCC1OC(n2ccc(=O)[nH]c2=O)C(O)C1O)OP(=O)([O-])[O-]|36|564.2860000000002|-6.215099999999993|310.48999999999995|18|7|0.6666666666666666|13|2|1|36|18|0.08650496257970032|0.08720964070568987|5.369017676264923|1.3690714143354032|0|0|False
|
126 |
+
70639.0|3-Methylxanthine|3-methyl-7H-purine-2,6-dione|Cn1c(=O)[nH]c(=O)c2[nH]cnc21|12|166.14|-1.0501000000000005|83.54|4|2|0.16666666666666666|0|2|2|12|0|0.5233953855323817|0.4649418539506477|2.6826240788903544|-0.9379367321269417|1|0|False
|
127 |
+
72386069.0|Acelarin (NUC-1031)|benzyl 2-[[[5-(4-amino-2-oxopyrimidin-1-yl)-4,4-difluoro-3-hydroxyoxolan-2-yl]methoxy-phenoxyphosphoryl]amino]propanoate|CC(NP(=O)(OCC1OC(n2ccc(N)nc2=O)C(F)(F)C1O)Oc1ccccc1)C(=O)OCc1ccccc1|40|580.4810000000003|2.6443000000000003|164.22999999999996|11|3|0.32|11|4|3|40|10|0.22561375399287723|0.27837300531820436|4.196538827820978|0.34838553412737994|0|0|False
|
128 |
+
135401907.0|Allopurinol|1,5-dihydropyrazolo[3,4-d]pyrimidin-4-one|O=c1ncnc2[nH][nH]cc1-2|10|136.11399999999998|-0.4022000000000001|74.43|3|2|0.0|0|2|0|10|0|0.5169670612450268|0.16887078714484646|3.3223383321927544|0.49540116952077007|1|0|False
|
129 |
+
3366.0|Flucytosine|6-amino-5-fluoro-1H-pyrimidin-2-one|Nc1nc(=O)[nH]cc1F|9|129.09399999999997|-0.5088000000000001|71.77|3|2|0.0|0|1|1|9|0|0.4952172017431901|0.27854681358196376|3.1539953149178643|-0.7210492950621221|1|0|False
|
130 |
+
60871.0|Adefovir Dipivoxil|[2-(6-aminopurin-9-yl)ethoxymethyl-(2,2-dimethylpropanoyloxymethoxy)phosphoryl]oxymethyl 2,2-dimethylpropanoate|CC(C)(C)C(=O)OCOP(=O)(COCCn1cnc2c(N)ncnc21)OCOC(=O)C(C)(C)C|34|501.47700000000026|2.7025000000000006|166.97999999999996|13|1|0.65|11|2|2|34|0|0.2071524581096599|0.26242117414727406|3.2764101894129833|-0.33118745107923225|0|0|False
|
131 |
+
2265.0|Azathioprine| 6-(3-methyl-5-nitroimidazol-4-yl)sulfanyl-7H-purine|Cn1cnc([N+](=O)[O-])c1Sc1ncnc2nc[nH]c12|19|277.269|1.1457999999999997|115.41999999999999|8|1|0.1111111111111111|3|3|3|19|0|0.43328794180573177|0.5104763505055192|3.144692533268106|-1.257220744395016|1|0|False
|
132 |
+
2723601.0|Thioguanine|2-amino-3,7-dihydropurine-6-thione|Nc1nc(=S)c2[nH]cnc2[nH]1|11|167.197|0.5976899999999998|83.38|4|3|0.0|0|2|2|11|0|0.5014913838271434|0.43666862873652124|3.3037189467190657|-0.34551430559848184|1|0|False
|
133 |
+
135483437.0|Valaciclovir HCl|2-[(2-amino-6-oxo-1H-purin-9-yl)methoxy]ethyl 2-amino-3-methylbutanoate|CC(C)C(N)C(=O)OCCOCn1cnc2c(=O)[nH]c(N)nc21|23|324.341|-0.7975999999999985|151.14000000000004|9|3|0.5384615384615384|7|2|2|23|2|0.44214276395859164|0.3730958971915234|3.2609439427942917|-0.16316017429853905|1|0|False
|
134 |
+
657298.0|Propylthiouracil|6-propyl-2-sulfanylidene-1H-pyrimidin-4-one|CCCc1cc(=O)[nH]c(=S)[nH]1|11|170.237|1.38499|48.65|2|2|0.42857142857142855|2|1|1|11|0|0.6587322532597096|0.2559861332659623|2.683909567066461|-0.9512439041419182|1|0|False
|
135 |
+
3830.0|Kinetin|N-(furan-2-ylmethyl)-7H-purin-6-amine|c1coc(CNc2ncnc3nc[nH]c23)c1|16|215.216|1.5579999999999996|79.63|5|2|0.1|3|3|3|16|0|0.6925924860226649|0.711699598509513|2.490935013809871|-1.402265304972444|1|0|False
|
136 |
+
3324.0|Famciclovir|[2-(acetyloxymethyl)-4-(2-aminopurin-9-yl)butyl] acetate|CC(=O)OCC(CCn1cnc2cnc(N)nc21)COC(C)=O|23|321.33700000000016|0.5409999999999998|122.22|9|1|0.5|7|2|2|23|0|0.733115342152729|0.45204840142043734|2.806858282589218|-0.19600642021383782|1|0|False
|
137 |
+
1269845.0|2-Thiouracil|2-sulfanylidene-1H-pyrimidin-4-one |O=c1cc[nH]c(=S)[nH]1|8|128.156|0.43249|48.65|2|2|0.0|0|1|1|8|0|0.49770208173400393|0.21652997475728783|3.059775463182052|-0.5315741715414|1|0|False
|
138 |
+
667493.0|Methylthiouracil|6-methyl-2-sulfanylidene-1H-pyrimidin-4-one|Cc1cc(=O)[nH]c(=S)[nH]1|9|142.18300000000002|0.74091|48.65|2|2|0.2|0|1|1|9|0|0.5247689157489369|0.2203039974925121|3.0118414687640174|-1.153615214888089|1|0|False
|
139 |
+
1174.0|Uracil|1H-pyrimidine-2,4-dione|O=c1cc[nH]c(=O)[nH]1|8|112.088|-0.9368000000000001|65.72|2|2|0.0|0|1|1|8|0|0.4546939811074878|0.22106494195155435|2.763417421224011|0.034934357644975|1|0|False
|
140 |
+
62389.0|6-Benzylaminopurine|N-benzyl-7H-purin-6-amine|c1ccc(CNc2ncnc3nc[nH]c23)cc1|17|225.25500000000002|1.9649999999999996|66.49000000000001|4|2|0.08333333333333333|3|3|3|17|0|0.7147392861192281|0.6930151711707544|2.118574274148248|-0.9996364494840941|1|0|False
|
141 |
+
597.0|Cytosine|6-amino-1H-pyrimidin-2-one|Nc1cc[nH]c(=O)n1|8|111.104|-0.6479000000000001|71.77000000000001|3|2|0.0|0|1|1|8|0|0.4670416161550293|0.27448926713621147|3.295071966678556|0.31089129653276254|1|0|False
|
142 |
+
4564.0|NU2058|6-(cyclohexylmethoxy)-7H-purin-2-amine|Nc1nc(OCC2CCCCC2)c2nc[nH]c2n1|18|247.30199999999996|1.8941999999999999|89.71|5|2|0.5833333333333334|3|3|2|18|0|0.8640548613284358|0.6675384454567892|2.7087684013184763|-0.5693644300994334|1|0|False
|
143 |
+
99920.0|Adenosine Dialdehyde (ADOX)|2-[1-(6-aminopurin-9-yl)-2-oxoethoxy]-3-hydroxypropanal|Nc1ncnc2c1ncn2C(C=O)OC(C=O)CO|19|265.229|-1.3176|133.22|9|2|0.3|6|2|2|19|4|0.6142899241146795|0.40414391704379254|4.0613930320623375|0.27190180955298426|1|0|False
|
144 |
+
1135.0|Thymine|5-methyl-1H-pyrimidine-2,4-dione|Cc1c[nH]c(=O)[nH]c1=O|9|126.115|-0.6283800000000002|65.72|2|2|0.2|0|1|1|9|0|0.4857813089921775|0.22442590613459015|2.4711895456870963|-0.39816338972551113|1|0|False
|
145 |
+
393593.0|Namodenoson (CF-102)|5-[2-chloro-6-[(3-iodophenyl)methylamino]purin-9-yl]-3,4-dihydroxy-N-methyloxolane-2-carboxamide|CNC(=O)C1OC(n2cnc3c(NCc4cccc(I)c4)nc(Cl)nc32)C(O)C1O|30|544.7370000000002|1.061599999999999|134.42|9|4|0.3333333333333333|5|4|3|30|8|0.27679674050364184|0.3780075367245393|3.858289643137513|-0.35311044843938993|0|0|False
|
146 |
+
190.0|Adenine|7H-purin-6-amine|Nc1ncnc2[nH]cnc12|10|135.13|-0.0648999999999999|80.48|4|2|0.0|0|2|2|10|0|0.5296759293780585|0.46383358553871024|2.745044432988511|-0.22216883360209003|1|0|False
|
147 |
+
22041878.0|2'-Deoxyuridine 5'-monophosphate disodium salt|[5-(2,4-dioxopyrimidin-1-yl)-3-hydroxyoxolan-2-yl]methyl phosphate|O=c1ccn(C2CC(O)C(COP(=O)([O-])[O-])O2)c(=O)[nH]1|20|306.167|-2.9696999999999987|156.74|9|2|0.5555555555555556|4|2|1|20|6|0.5581147155997511|0.2665509998411342|4.253945014153231|0.8390224194285251|1|0|False
|
148 |
+
621.0|2'-Deoxyadenosine 5'-monophosphate|[5-(6-aminopurin-9-yl)-3-hydroxyoxolan-2-yl]methyl dihydrogen phosphate|Nc1ncnc2c1ncn2C1CC(O)C(COP(=O)(O)O)O1|22|331.225|-0.8338000000000003|165.83999999999997|9|4|0.5|4|3|2|22|6|0.518964726677587|0.2703973142533638|3.7330177714088277|1.0058182745873498|0|0|False
|
149 |
+
624.0|2'-Deoxycytidine 5'-monophosphate|[5-(4-amino-2-oxopyrimidin-1-yl)-3-hydroxyoxolan-2-yl]methyl dihydrogen phosphate|Nc1ccn(C2CC(O)C(COP(=O)(O)O)O2)c(=O)n1|20|307.199|-1.4168|157.13|8|4|0.5555555555555556|4|2|1|20|6|0.48991422293348935|0.21590188566756152|3.7558655269737438|1.3705532333776147|1|0|False
|
150 |
+
193.0|S-Adenosyl-L-homocysteine (SAH)|2-amino-4-[[5-(6-aminopurin-9-yl)-3,4-dihydroxyoxolan-2-yl]methylsulfanyl]butanoic acid|Nc1ncnc2c1ncn2C1OC(CSCCC(N)C(=O)O)C(O)C1O|26|384.41800000000006|-1.4370999999999983|182.62999999999997|11|5|0.5714285714285714|7|3|2|26|10|0.3524104766503097|0.1706266479700945|3.9878562207640913|0.8322716321994499|0|0|False
|
151 |
+
5513.0|Tomatine|2-[2-[4,5-dihydroxy-2-(hydroxymethyl)-6-(5',7,9,13-tetramethylspiro[5-oxapentacyclo[10.8.0.02,9.04,8.013,18]icosane-6,2'-piperidine]-16-yl)oxyoxan-3-yl]oxy-5-hydroxy-6-(hydroxymethyl)-4-(3,4,5-trihydroxyoxan-2-yl)oxyoxan-3-yl]oxy-6-(hydroxymethyl)oxane-3,4,5-triol|CC1CCC2(NC1)OC1CC3C4CCC5CC(OC6OC(CO)C(OC7OC(CO)C(O)C(OC8OCC(O)C(O)C8O)C7OC7OC(CO)C(O)C(O)C7O)C(O)C6O)CCC5(C)C4CCC3(C)C1C2C|72|1034.2000000000003|-2.6972999999999807|337.86|22|13|1.0|11|10|0|72|62|0.09370727646314425|0.03741186437663462|7.196015861300958|2.0025666159277615|0|0|False
|
152 |
+
4739.0|Pentostatin|3-[4-hydroxy-5-(hydroxymethyl)oxolan-2-yl]-7,8-dihydro-4H-imidazo[4,5-d][1,3]diazepin-8-ol|OCC1OC(n2cnc3c2NC=NCC3O)CC1O|19|268.273|-0.9890000000000005|112.13000000000001|8|4|0.6363636363636364|2|3|1|19|8|0.5477635402256686|0.20674293672099894|4.441045872605159|1.0618509123565736|1|0|False
|
153 |
+
263976.0|2',3'-Dideoxyadenosine|[5-(6-aminopurin-9-yl)oxolan-2-yl]methanol|Nc1ncnc2c1ncn2C1CCC(CO)O1|17|235.24699999999996|0.07839999999999986|99.08000000000001|7|2|0.5|2|3|2|17|4|0.7642641295225605|0.4804378807684162|3.37615943374439|0.818243690205506|1|0|False
|
154 |
+
1599.0|Puromycin aminonucleoside|4-amino-2-[6-(dimethylamino)purin-9-yl]-5-(hydroxymethyl)oxolan-3-ol|CN(C)c1ncnc2c1ncn2C1OC(CO)C(N)C1O|21|294.315|-1.5297999999999983|122.55000000000001|9|3|0.5833333333333334|3|3|2|21|8|0.6229174346076342|0.35532968935631826|3.892502883730317|0.5775648178855525|1|0|False
|
155 |
+
3461.0|Gemcitabine|4-amino-1-[3,3-difluoro-4-hydroxy-5-(hydroxymethyl)oxolan-2-yl]pyrimidin-2-one|Nc1ccn(C2OC(CO)C(O)C2(F)F)c(=O)n1|18|263.2|-1.2886|110.60000000000001|7|3|0.5555555555555556|2|2|1|18|6|0.6120873727300361|0.30156266317008307|3.8632725293690937|1.2986545260508495|1|0|False
|
156 |
+
4213.0|Mizoribine|1-[3,4-dihydroxy-5-(hydroxymethyl)oxolan-2-yl]-5-hydroxyimidazole-4-carboxamide|NC(=O)c1ncn(C2OC(CO)C(O)C2O)c1O|18|259.218|-2.7009|151.06|8|5|0.5555555555555556|3|2|1|18|8|0.3957886051129175|0.147172771208249|3.773041201226402|0.9518781068023058|0|0|False
|
157 |
+
596.0|Cytarabine|4-amino-1-[3,4-dihydroxy-5-(hydroxymethyl)oxolan-2-yl]pyrimidin-2-one|Nc1ccn(C2OC(CO)C(O)C2O)c(=O)n1|17|243.21900000000002|-2.563|130.82999999999998|8|4|0.5555555555555556|2|2|1|17|8|0.4489304892314893|0.18860344125062514|3.548894614600127|1.6478124261033116|1|0|False
|
158 |
+
24208296.0|ATP disodium|[[[5-(6-aminopurin-9-yl)-3,4-dihydroxyoxolan-2-yl]methoxy-hydroxyphosphoryl]oxy-oxidophosphoryl] hydrogen phosphate|Nc1ncnc2c1ncn2C1OC(COP(=O)(O)OP(=O)([O-])OP(=O)([O-])O)C(O)C1O|31|505.1660000000001|-2.8929999999999993|284.79|16|5|0.5|8|3|2|31|14|0.22489036963823456|0.2003365594519469|5.011918199029612|1.0168482980546354|0|0|False
|
159 |
+
925.0|NAD+| [[5-(6-aminopurin-9-yl)-3,4-dihydroxyoxolan-2-yl]methoxy-hydroxyphosphoryl] [5-(3-carbamoylpyridin-1-ium-1-yl)-3,4-dihydroxyoxolan-2-yl]methyl phosphate|NC(=O)c1ccc[n+](C2OC(COP(=O)([O-])OP(=O)(O)OCC3OC(n4cnc5c(N)ncnc54)C(O)C3O)C(O)C2O)c1|44|663.4300000000004|-3.6478999999999964|321.0900000000001|18|7|0.47619047619047616|11|5|3|44|20|0.0767773571622719|0.12396315529857062|5.338067205606141|0.7011583949037886|0|0|False
|
160 |
+
9896099.0|Abacavir sulfate|[(1S,4R)-4-[2-amino-6-(cyclopropylamino)purin-9-yl]cyclopent-2-en-1-yl]methanol|Nc1nc(NC2CC2)c2ncn(C3C=CC(CO)C3)c2n1|21|286.33900000000006|1.0922999999999998|101.88|7|3|0.5|4|4|2|21|4|0.7272302824304794|0.532844209200051|3.688999973120933|0.026597937866581003|1|0|False
|
161 |
+
3743107.0|Citicoline sodium|[[5-(4-amino-2-oxopyrimidin-1-yl)-3,4-dihydroxyoxolan-2-yl]methoxy-oxidophosphoryl] 2-(trimethylazaniumyl)ethyl phosphate|C[N+](C)(C)CCOP(=O)([O-])OP(=O)([O-])OCC1OC(n2ccc(N)nc2=O)C(O)C1O|31|487.3190000000002|-2.862499999999996|218.55|14|3|0.7142857142857143|10|2|1|31|12|0.22357629050096392|0.11977524271877953|5.089091608912945|1.1662562054042096|0|0|False
|
162 |
+
156610574.0|Cangrelor Tetrasodium|[dichloro(phosphonato)methyl]-[[3,4-dihydroxy-5-[6-(2-methylsulfanylethylamino)-2-(3,3,3-trifluoropropylsulfanyl)purin-9-yl]oxolan-2-yl]methoxy-oxidophosphoryl]oxyphosphinate|CSCCNc1nc(SCCC(F)(F)F)nc2c1ncn2C1OC(COP(=O)([O-])OP(=O)([O-])C(Cl)(Cl)P(=O)([O-])[O-])C(O)C1O|44|772.3340000000004|0.32249999999999895|267.23|19|3|0.7058823529411765|15|3|2|44|12|0.0723261808950979|0.21464495331026204|5.531489339568375|-0.1466218200172386|0|0|False
|
163 |
+
3986128.0|Flavin mononucleotide|[5-(7,8-dimethyl-2,4-dioxobenzo[g]pteridin-10-yl)-2,3,4-trihydroxypentyl] hydrogen phosphate|Cc1cc2nc3c(=O)[nH]c(=O)nc-3n(CC(O)C(O)C(O)COP(=O)([O-])O)c2cc1C|31|455.3400000000002|-2.2385599999999988|210.92|11|5|0.4117647058823529|7|3|1|31|8|0.18980818802860375|0.10778902916070311|4.425392476983317|0.08008486944120105|0|0|False
|
164 |
+
4661174.0|?-Nicotinamide Mononucleotide|[5-(3-carbamoylpyridin-1-ium-1-yl)-3,4-dihydroxyoxolan-2-yl]methyl hydrogen phosphate|NC(=O)c1ccc[n+](C2OC(COP(=O)([O-])O)C(O)C2O)c1|22|334.22099999999995|-2.830499999999999|166.25|7|4|0.45454545454545453|5|2|1|22|10|0.33008457497205185|0.2491576086775115|4.570243080664501|0.7442067405707499|1|0|False
|
165 |
+
13013858.0|Citicholine|[[5-(4-amino-2-oxopyrimidin-1-yl)-3,4-dihydroxyoxolan-2-yl]methoxy-hydroxyphosphoryl] 2-(trimethylazaniumyl)ethyl phosphate|C[N+](C)(C)CCOP(=O)([O-])OP(=O)(O)OCC1OC(n2ccc(N)nc2=O)C(O)C1O|31|488.32700000000017|-2.2304999999999957|215.71999999999997|13|4|0.7142857142857143|10|2|1|31|12|0.20278188342870285|0.09819520093202008|4.814314685836021|1.371965225840339|0|0|False
|
166 |
+
314.0|5'-Cytidylic acid|[5-(4-amino-2-oxopyrimidin-1-yl)-3,4-dihydroxyoxolan-2-yl]methyl dihydrogen phosphate|Nc1ccn(C2OC(COP(=O)(O)O)C(O)C2O)c(=O)n1|21|323.19800000000004|-2.446|177.36|9|5|0.5555555555555556|4|2|1|21|8|0.3735901348140529|0.13458832466329937|3.826860604457865|1.5683275858392525|0|0|False
|
167 |
+
5644.0|UTP, Trisodium Salt|[[[5-(2,4-dioxopyrimidin-1-yl)-3,4-dihydroxyoxolan-2-yl]methoxy-oxidophosphoryl]oxy-oxidophosphoryl] hydrogen phosphate|O=c1ccn(C2OC(COP(=O)([O-])OP(=O)([O-])OP(=O)([O-])O)C(O)C2O)c(=O)[nH]1|29|481.1160000000001|-4.396899999999998|272.86|15|4|0.5555555555555556|8|2|1|29|14|0.2551718313877904|0.18854093107244396|5.328826360999949|0.9171505343682239|0|0|False
|
168 |
+
136005379.0|Inosine 5?-triphosphate trisodium salt|[[[3,4-dihydroxy-5-(6-oxo-1H-purin-9-yl)oxolan-2-yl]methoxy-oxidophosphoryl]oxy-oxidophosphoryl] hydrogen phosphate|O=c1[nH]cnc2c1ncn2C1OC(COP(=O)([O-])OP(=O)([O-])OP(=O)([O-])O)C(O)C1O|31|505.1420000000001|-3.8139|281.57|16|4|0.5|8|3|2|31|14|0.2500591032919711|0.2656860254054147|5.422037939530075|0.769043034145274|0|0|False
|
169 |
+
135441845.0|Guanosine 5'-monophosphate disodium salt|[5-(2-amino-6-oxo-1H-purin-9-yl)-3,4-dihydroxyoxolan-2-yl]methyl phosphate|Nc1nc(=O)c2ncn(C3OC(COP(=O)([O-])[O-])C(O)C3O)c2[nH]1|24|361.20700000000005|-3.8336999999999994|211.7|12|4|0.5|4|3|2|24|8|0.38704670523942886|0.17415969497343173|4.404662987508528|0.877560502907929|0|0|False
|
170 |
+
197.0|Adenosine 5?-diphosphate sodium salt|[5-(6-aminopurin-9-yl)-3,4-dihydroxyoxolan-2-yl]methyl phosphono hydrogen phosphate|Nc1ncnc2c1ncn2C1OC(COP(=O)(O)OP(=O)(O)O)C(O)C1O|27|427.20300000000003|-1.7460000000000009|232.59999999999997|12|6|0.5|6|3|2|27|10|0.28079075856620234|0.1854502601232538|4.125297661526227|1.3883535027781002|0|0|False
|
171 |
+
44134852.0|Triphosphopyridine nucleotide disodium salt|[2-(6-aminopurin-9-yl)-5-[[[[5-(3-carbamoylpyridin-1-ium-1-yl)-3,4-dihydroxyoxolan-2-yl]methoxy-oxidophosphoryl]oxy-oxidophosphoryl]oxymethyl]-4-hydroxyoxolan-3-yl] hydrogen phosphate|NC(=O)c1ccc[n+](C2OC(COP(=O)([O-])OP(=O)([O-])OCC3OC(n4cnc5c(N)ncnc54)C(OP(=O)([O-])O)C3O)C(O)C2O)c1|48|741.3930000000005|-4.794899999999992|373.2800000000001|21|6|0.47619047619047616|13|5|3|48|22|0.07066446785799431|0.11623903144299709|5.927167227885624|0.5859635499812229|0|0|False
|
172 |
+
137795696.0|Diquafosol Tetrasodium|[[5-(2,4-dioxopyrimidin-1-yl)-3,4-dihydroxyoxolan-2-yl]methoxy-oxidophosphoryl] [[[5-(2,4-dioxopyrimidin-1-yl)-3,4-dihydroxyoxolan-2-yl]methoxy-oxidophosphoryl]oxy-oxidophosphoryl] phosphate|O=c1ccn(C2OC(COP(=O)([O-])OP(=O)([O-])OP(=O)([O-])OP(=O)([O-])OCC3OC(n4ccc(=O)[nH]c4=O)C(O)C3O)C(O)C2O)c(=O)[nH]1|49|786.2750000000004|-6.718199999999994|415.7700000000001|25|6|0.5555555555555556|14|4|2|49|24|0.09690594623547874|0.1005939099621791|6.025108139184198|0.5218458453224182|0|0|False
|
173 |
+
258.0|Blasticidin S|3-[[3-amino-5-[carbamimidoyl(methyl)amino]pentanoyl]amino]-6-(4-amino-2-oxopyrimidin-1-yl)-3,6-dihydro-2H-pyran-2-carboxylic acid |CN(CCC(N)CC(=O)NC1C=CC(n2ccc(N)nc2=O)OC1C(=O)O)C(=N)N|30|422.44600000000025|-2.2187299999999945|215.67|9|6|0.47058823529411764|8|2|1|30|8|0.150577431129479|0.10154891406114437|4.490427372075242|1.0509590801570832|0|0|False
|
174 |
+
135545622.0|Disodium 5'-Inosinate|[3,4-dihydroxy-5-(6-oxo-1H-purin-9-yl)oxolan-2-yl]methyl phosphate|O=c1[nH]cnc2c1ncn2C1OC(COP(=O)([O-])[O-])C(O)C1O|23|346.192|-3.4158999999999984|185.67999999999998|11|3|0.5|4|3|2|23|8|0.4706220858439375|0.2667159039917633|4.448493844911617|0.9150235712723259|0|0|False
|
175 |
+
4014956.0|Disodium uridine-5'-monophosphate|[5-(2,4-dioxopyrimidin-1-yl)-3,4-dihydroxyoxolan-2-yl]methyl phosphate|O=c1ccn(C2OC(COP(=O)([O-])[O-])C(O)C2O)c(=O)[nH]1|21|322.166|-3.9988999999999972|176.97|10|3|0.5555555555555556|4|2|1|21|8|0.4658768727043418|0.2057858099091487|4.292027087974349|1.133455884639928|0|0|False
|
176 |
+
24208296.0|Adenosine disodium triphosphate|[[[5-(6-aminopurin-9-yl)-3,4-dihydroxyoxolan-2-yl]methoxy-hydroxyphosphoryl]oxy-oxidophosphoryl] hydrogen phosphate|Nc1ncnc2c1ncn2C1OC(COP(=O)(O)OP(=O)([O-])OP(=O)([O-])O)C(O)C1O|31|505.1660000000001|-2.8929999999999993|284.79|16|5|0.5|8|3|2|31|14|0.22489036963823456|0.2003365594519469|5.011918199029612|1.0168482980546354|0|0|False
|
177 |
+
65040.0|5-Methylcytosine|6-amino-5-methyl-1H-pyrimidin-2-one|Cc1cnc(=O)[nH]c1N|9|125.131|-0.33948000000000017|71.77|3|2|0.2|0|1|1|9|0|0.4979194736965626|0.27879359761277905|2.819639545687096|-0.4087731108036112|1|0|False
|
178 |
+
135432442.0|Guanosine 5'-triphosphate trisodium salt|[[[5-(2-amino-6-oxo-1H-purin-9-yl)-3,4-dihydroxyoxolan-2-yl]methoxy-hydroxyphosphoryl]oxy-oxidophosphoryl] phosphate|Nc1nc2c(ncn2C2OC(COP(=O)(O)OP(=O)([O-])OP(=O)([O-])[O-])C(O)C2O)c(=O)[nH]1|32|520.1570000000002|-4.231699999999997|307.59|17|5|0.5|8|3|2|32|12|0.2058114066028432|0.198961655563169|5.131529164316129|0.9874606492575717|0|0|False
|
179 |
+
4073694.0|Thymidine 5'-monophosphate disodium salt|[3-hydroxy-5-(5-methyl-2,4-dioxopyrimidin-1-yl)oxolan-2-yl]methyl phosphate|Cc1cn(C2CC(O)C(COP(=O)([O-])[O-])O2)c(=O)[nH]c1=O|21|320.194|-2.6612799999999988|156.73999999999998|9|2|0.6|4|2|1|21|6|0.5689477538848637|0.27086147634366586|4.168970366666491|0.6891982972581047|1|0|False
|
180 |
+
135545622.0|Disodium 5'-inosinate monohydrate|[3,4-dihydroxy-5-(6-oxo-1H-purin-9-yl)oxolan-2-yl]methyl phosphate|O=c1[nH]cnc2c1ncn2C1OC(COP(=O)([O-])[O-])C(O)C1O|23|346.192|-3.4158999999999984|185.67999999999998|11|3|0.5|4|3|2|23|8|0.4706220858439375|0.2667159039917633|4.448493844911617|0.9150235712723259|0|0|False
|
181 |
+
44287897.0|Uridine-5'-diphosphate disodium salt|[[5-(2,4-dioxopyrimidin-1-yl)-3,4-dihydroxyoxolan-2-yl]methoxy-oxidophosphoryl] hydrogen phosphate |O=c1ccn(C2OC(COP(=O)([O-])OP(=O)([O-])O)C(O)C2O)c(=O)[nH]1|25|402.14500000000004|-3.881899999999999|223.49999999999997|12|4|0.5555555555555556|6|2|1|25|12|0.33530080341395063|0.10747213319277059|4.854100270601977|1.0638946198671397|0|0|False
|
182 |
+
238.0|ATP|[[5-(6-aminopurin-9-yl)-3,4-dihydroxyoxolan-2-yl]methoxy-hydroxyphosphoryl] phosphono hydrogen phosphate|Nc1ncnc2c1ncn2C1OC(COP(=O)(O)OP(=O)(O)OP(=O)(O)O)C(O)C1O|31|507.1820000000001|-1.629000000000001|279.13|14|7|0.5|8|3|2|31|12|0.19742203378388068|0.16844205143530697|4.41361201850629|1.3438955794867324|0|0|False
|
183 |
+
135398638.0|Hypoxanthine|1,7-dihydropurin-6-one|O=c1[nH]cnc2nc[nH]c12|10|136.114|-0.35380000000000006|74.43|3|2|0.0|0|2|2|10|0|0.5192368565638923|0.39726961313423786|2.9399791809991207|-0.25903651864364996|1|0|False
|
184 |
+
312827.0|Tomatidine|5',7,9,13-tetramethylspiro[5-oxapentacyclo[10.8.0.02,9.04,8.013,18]icosane-6,2'-piperidine]-16-ol|CC1CCC2(NC1)OC1CC3C4CCC5CC(O)CCC5(C)C4CCC3(C)C1C2C|30|415.66200000000026|5.366800000000006|41.489999999999995|3|2|1.0|0|6|0|30|24|0.5589811430146876|0.21002733429425308|5.369563102779233|2.885757885743727|0|0|False
|
185 |
+
9449.0|Adenine sulfate|7H-purin-6-amine|Nc1ncnc2nc[nH]c12|10|135.13|-0.0648999999999999|80.47999999999999|4|2|0.0|0|2|2|10|0|0.5296759293780585|0.46383358553871024|2.6602057061980595|-0.48016737706869|1|0|False
|
TransAntivirus/download_pubchem/download.sh
ADDED
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/bin/bash
|
2 |
+
|
3 |
+
MIN=0
|
4 |
+
MAX=1555
|
5 |
+
|
6 |
+
PREFIX="ftp://ftp.ncbi.nlm.nih.gov/pubchem/Compound/CURRENT-Full/XML/"
|
7 |
+
# fill this in
|
8 |
+
#DOWNLOAD_DIR=/user4/c5t5-main/download_pubchem/
|
9 |
+
DOWNLOAD_DIR=/root/autodl-tmp/c5t5-main/download_pubchem/
|
10 |
+
|
11 |
+
prev_num="0000"
|
12 |
+
for i in $(seq $MIN 5 $MAX); do
|
13 |
+
num=$(printf "%04d" $i)
|
14 |
+
fn="Compound_${prev_num}00001_${num}00000.xml"
|
15 |
+
prev_num=$num
|
16 |
+
echo "getting" $fn
|
17 |
+
if ! [[ -f $DOWNLOAD_DIR$fn ]]; then
|
18 |
+
orig_dir=$(pwd)
|
19 |
+
cd $DOWNLOAD_DIR
|
20 |
+
wget "${PREFIX}${fn}.gz"
|
21 |
+
wget "${PREFIX}${fn}.gz.md5"
|
22 |
+
if md5sum -c ${fn}.gz.md5; then
|
23 |
+
echo md5 passed
|
24 |
+
rm ${fn}.gz.md5
|
25 |
+
#gunzip $fn
|
26 |
+
pigz -d -p 8 $fn
|
27 |
+
else
|
28 |
+
echo md5 failed
|
29 |
+
fi
|
30 |
+
cd $orig_dir
|
31 |
+
fi
|
32 |
+
python extract_info.py $DOWNLOAD_DIR$fn "<PC-Compound>" Preferred 11 34 -26 Traditional 11 34 -26 "Canonical<" 11 34 -26 Mass 12 34 -26 Formula 11 34 -26 "Log P" 11 34 -26 >> ${DOWNLOAD_DIR}iupacs_properties.txt
|
33 |
+
rm $DOWNLOAD_DIR$fn
|
34 |
+
done
|
TransAntivirus/download_pubchem/extract_info.py
ADDED
@@ -0,0 +1,82 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import sys
|
2 |
+
import copy
|
3 |
+
from multiprocessing import Pool
|
4 |
+
import numpy as np
|
5 |
+
import itertools
|
6 |
+
|
7 |
+
# look through fn for all the provided search terms (keys),
|
8 |
+
# and extract values as directed by the offset, start & end cols
|
9 |
+
|
10 |
+
# example usage to get IUPAC:
|
11 |
+
# python extract_mass_formula.py Compounds.xml <PC-Compound> Systematic 11 34 -26
|
12 |
+
|
13 |
+
# Systematic 11 34 -26
|
14 |
+
# Mass 12 34 -26
|
15 |
+
# Formula 11 34 -26
|
16 |
+
# Log P 11 34 -26
|
17 |
+
|
18 |
+
# chemicals are separated by the 2nd arg
|
19 |
+
|
20 |
+
LINES_PER_PROC = 10000
|
21 |
+
|
22 |
+
fn = sys.argv[1]
|
23 |
+
|
24 |
+
assert len(sys.argv) > 3, "need to provide search terms, etc."
|
25 |
+
assert len(sys.argv[3:]) % 4 == 0, "each search term needs offset & cols"
|
26 |
+
|
27 |
+
chemical_separator = sys.argv[2]
|
28 |
+
|
29 |
+
search_terms = []
|
30 |
+
line_offsets = []
|
31 |
+
start_cols = []
|
32 |
+
end_cols = []
|
33 |
+
|
34 |
+
for i in range(3, len(sys.argv), 4):
|
35 |
+
search_terms.append(sys.argv[i])
|
36 |
+
line_offsets.append(int(sys.argv[i+1]))
|
37 |
+
start_cols.append(int(sys.argv[i+2]))
|
38 |
+
end_cols.append(int(sys.argv[i+3]))
|
39 |
+
|
40 |
+
lines = []
|
41 |
+
|
42 |
+
def find_relevant(start_line):
|
43 |
+
relevant_lines = []
|
44 |
+
max_length = len(lines)
|
45 |
+
for i in range(LINES_PER_PROC):
|
46 |
+
if start_line + i >= max_length:
|
47 |
+
return relevant_lines
|
48 |
+
line = lines[start_line + i]
|
49 |
+
if chemical_separator in line:
|
50 |
+
relevant_lines.append(start_line + i)
|
51 |
+
for search_term in search_terms:
|
52 |
+
if search_term in line:
|
53 |
+
relevant_lines.append(start_line + i)
|
54 |
+
return relevant_lines
|
55 |
+
|
56 |
+
with open(fn, "r") as xml_file:
|
57 |
+
# first line is headers
|
58 |
+
found_values = copy.deepcopy(search_terms)
|
59 |
+
|
60 |
+
lines = xml_file.readlines()
|
61 |
+
|
62 |
+
p = Pool(32)
|
63 |
+
relevant_lines = p.map(find_relevant,
|
64 |
+
range(0, len(lines), LINES_PER_PROC))
|
65 |
+
relevant_lines = itertools.chain.from_iterable(relevant_lines)
|
66 |
+
relevant_lines = np.array(list(relevant_lines))
|
67 |
+
|
68 |
+
for i in relevant_lines:
|
69 |
+
line = lines[i]
|
70 |
+
if chemical_separator in line:
|
71 |
+
# new chemical -- reset search term lines & found_values
|
72 |
+
print("|".join(found_values))
|
73 |
+
found_values = ["" for _ in search_terms]
|
74 |
+
continue
|
75 |
+
|
76 |
+
for j, search_term in enumerate(search_terms):
|
77 |
+
if search_term in line:
|
78 |
+
# found the jth search term on line i
|
79 |
+
found = i + line_offsets[j]
|
80 |
+
found_values[j] = lines[found][start_cols[j]:end_cols[j]]
|
81 |
+
|
82 |
+
|
TransAntivirus/download_pubchem/finetunev1_new.csv
ADDED
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
smiles|aLogP|canonical_smiles|CanonicalSMILES|IUPACName|XLogP
|
2 |
+
CCN(CC)CCCC(C)Nc1ccnc2cc(Cl)ccc12|4.810600000000004|CCN(CC)CCCC(C)Nc1ccnc2cc(Cl)ccc12|CCN(CC)CCCC(C)NC1=C2C=CC(=CC2=NC=C1)Cl|4-N-(7-chloroquinolin-4-yl)-1-N,1-N-diethylpentane-1,4-diamine|4.6
|
3 |
+
Cc1cccc(C)c1OCC(=O)NC(Cc1ccccc1)C(O)CC(Cc1ccccc1)NC(=O)C(C(C)C)N1CCCNC1=O|4.328140000000003|Cc1cccc(C)c1OCC(=O)NC(Cc1ccccc1)C(O)CC(Cc1ccccc1)NC(=O)C(C(C)C)N1CCCNC1=O|CC1=C(C(=CC=C1)C)OCC(=O)NC(CC2=CC=CC=C2)C(CC(CC3=CC=CC=C3)NC(=O)C(C(C)C)N4CCCNC4=O)O|N-[5-[[2-(2,6-dimethylphenoxy)acetyl]amino]-4-hydroxy-1,6-diphenylhexan-2-yl]-3-methyl-2-(2-oxo-1,3-diazinan-1-yl)butanamide|5.9
|
4 |
+
O=C(Nc1ccc([N+](=O)[O-])cc1Cl)c1cc(Cl)ccc1O|3.859500000000001|O=C(Nc1ccc([N+](=O)[O-])cc1Cl)c1cc(Cl)ccc1O|C1=CC(=C(C=C1[N+](=O)[O-])Cl)NC(=O)C2=C(C=CC(=C2)Cl)O|5-chloro-N-(2-chloro-4-nitrophenyl)-2-hydroxybenzamide|4.0
|
5 |
+
CN(C)C(=O)C(CCN1CCC(O)(c2ccc(Cl)cc2)CC1)(c1ccccc1)c1ccccc1|5.088000000000005|CN(C)C(=O)C(CCN1CCC(O)(c2ccc(Cl)cc2)CC1)(c1ccccc1)c1ccccc1|CN(C)C(=O)C(CCN1CCC(CC1)(C2=CC=C(C=C2)Cl)O)(C3=CC=CC=C3)C4=CC=CC=C4|4-[4-(4-chlorophenyl)-4-hydroxypiperidin-1-yl]-N,N-dimethyl-2,2-diphenylbutanamide|5.0
|
6 |
+
CC1OC(OC2CC(O)C3(CO)C4C(O)CC5(C)C(C6=CC(=O)OC6)CCC5(O)C4CCC3(O)C2)C(O)C(O)C1O|-1.5149999999999972|CC1OC(OC2CC(O)C3(CO)C4C(O)CC5(C)C(C6=CC(=O)OC6)CCC5(O)C4CCC3(O)C2)C(O)C(O)C1O|CC1C(C(C(C(O1)OC2CC(C3(C4C(CCC3(C2)O)C5(CCC(C5(CC4O)C)C6=CC(=O)OC6)O)CO)O)O)O)O|3-[1,5,11,14-tetrahydroxy-10-(hydroxymethyl)-13-methyl-3-(3,4,5-trihydroxy-6-methyloxan-2-yl)oxy-2,3,4,6,7,8,9,11,12,15,16,17-dodecahydro-1H-cyclopenta[a]phenanthren-17-yl]-2H-furan-5-one|-1.7
|
7 |
+
CCN(CC)CCOc1ccc(C(O)(Cc2ccc(Cl)cc2)c2ccc(C)cc2)cc1|5.847620000000007|CCN(CC)CCOc1ccc(C(O)(Cc2ccc(Cl)cc2)c2ccc(C)cc2)cc1|CCN(CC)CCOC1=CC=C(C=C1)C(CC2=CC=C(C=C2)Cl)(C3=CC=C(C=C3)C)O|2-(4-chlorophenyl)-1-[4-[2-(diethylamino)ethoxy]phenyl]-1-(4-methylphenyl)ethanol|6.2
|
8 |
+
COc1ccc2cc1Oc1ccc(cc1)CC1c3cc(c(OC)cc3CCN1C)Oc1c(OC)c(OC)cc3c1C(C2)N(C)CC3|7.162400000000009|COc1ccc2cc1Oc1ccc(cc1)CC1c3cc(c(OC)cc3CCN1C)Oc1c(OC)c(OC)cc3c1C(C2)N(C)CC3|CN1CCC2=CC(=C3C=C2C1CC4=CC=C(C=C4)OC5=C(C=CC(=C5)CC6C7=C(O3)C(=C(C=C7CCN6C)OC)OC)OC)OC|9,20,21,25-tetramethoxy-15,30-dimethyl-7,23-dioxa-15,30-diazaheptacyclo[22.6.2.23,6.18,12.114,18.027,31.022,33]hexatriaconta-3(36),4,6(35),8,10,12(34),18,20,22(33),24,26,31-dodecaene|6.4
|
9 |
+
CCN(CC)Cc1cc(Nc2ccnc3cc(Cl)ccc23)ccc1O|5.179200000000004|CCN(CC)Cc1cc(Nc2ccnc3cc(Cl)ccc23)ccc1O|CCN(CC)CC1=C(C=CC(=C1)NC2=C3C=CC(=CC3=NC=C2)Cl)O|4-[(7-chloroquinolin-4-yl)amino]-2-(diethylaminomethyl)phenol|2.6
|
10 |
+
O=C1NCN(c2ccccc2)C12CCN(CCCC(c1ccc(F)cc1)c1ccc(F)cc1)CC2|5.305400000000005|O=C1NCN(c2ccccc2)C12CCN(CCCC(c1ccc(F)cc1)c1ccc(F)cc1)CC2|C1CN(CCC12C(=O)NCN2C3=CC=CC=C3)CCCC(C4=CC=C(C=C4)F)C5=CC=C(C=C5)F|8-[4,4-bis(4-fluorophenyl)butyl]-1-phenyl-1,3,8-triazaspiro[4.5]decan-4-one|5.6
|
11 |
+
Cc1ccc(NC(=O)c2ccc(CN3CCN(C)CC3)cc2)cc1Nc1nccc(-c2cccnc2)n1|4.590320000000004|Cc1ccc(NC(=O)c2ccc(CN3CCN(C)CC3)cc2)cc1Nc1nccc(-c2cccnc2)n1|CC1=C(C=C(C=C1)NC(=O)C2=CC=C(C=C2)CN3CCN(CC3)C)NC4=NC=CC(=N4)C5=CN=CC=C5|4-[(4-methylpiperazin-1-yl)methyl]-N-[4-methyl-3-[(4-pyridin-3-ylpyrimidin-2-yl)amino]phenyl]benzamide|3.5
|
12 |
+
CCN(CC)CCOc1ccc2c(c1)C(=O)c1cc(OCCN(CC)CC)ccc1-2|4.339200000000004|CCN(CC)CCOc1ccc2c(c1)C(=O)c1cc(OCCN(CC)CC)ccc1-2|CCN(CC)CCOC1=CC2=C(C=C1)C3=C(C2=O)C=C(C=C3)OCCN(CC)CC|2,7-bis[2-(diethylamino)ethoxy]fluoren-9-one|4.7
|
13 |
+
COc1cc2c3cc1Oc1c(OC)c(OC)cc4c1C(Cc1ccc(O)c(c1)Oc1ccc(cc1)CC3N(C)CC2)N(C)CC4|6.859400000000009|COc1cc2c3cc1Oc1c(OC)c(OC)cc4c1C(Cc1ccc(O)c(c1)Oc1ccc(cc1)CC3N(C)CC2)N(C)CC4|CN1CCC2=CC(=C3C=C2C1CC4=CC=C(C=C4)OC5=C(C=CC(=C5)CC6C7=C(O3)C(=C(C=C7CCN6C)OC)OC)O)OC|20,21,25-trimethoxy-15,30-dimethyl-7,23-dioxa-15,30-diazaheptacyclo[22.6.2.23,6.18,12.114,18.027,31.022,33]hexatriaconta-3(36),4,6(35),8,10,12(34),18,20,22(33),24,26,31-dodecaen-9-ol|6.1
|
14 |
+
OC(c1cc(C(F)(F)F)nc2c(C(F)(F)F)cccc12)C1CCCCN1|4.447900000000003|OC(c1cc(C(F)(F)F)nc2c(C(F)(F)F)cccc12)C1CCCCN1|C1CCNC(C1)C(C2=CC(=NC3=C2C=CC=C3C(F)(F)F)C(F)(F)F)O|[2,8-bis(trifluoromethyl)quinolin-4-yl]-piperidin-2-ylmethanol|3.6
|
15 |
+
OCCN1CCN(CCCN2c3ccccc3Sc3ccc(C(F)(F)F)cc32)CC1|4.308100000000003|OCCN1CCN(CCCN2c3ccccc3Sc3ccc(C(F)(F)F)cc32)CC1|C1CN(CCN1CCCN2C3=CC=CC=C3SC4=C2C=C(C=C4)C(F)(F)F)CCO|2-[4-[3-[2-(trifluoromethyl)phenothiazin-10-yl]propyl]piperazin-1-yl]ethanol|4.4
|
16 |
+
O=C(Nc1cc(Cl)cc(Cl)c1O)c1c(O)c(Cl)cc(Cl)c1Cl|5.6171000000000015|O=C(Nc1cc(Cl)cc(Cl)c1O)c1c(O)c(Cl)cc(Cl)c1Cl|C1=C(C=C(C(=C1NC(=O)C2=C(C(=CC(=C2Cl)Cl)Cl)O)O)Cl)Cl|2,3,5-trichloro-N-(3,5-dichloro-2-hydroxyphenyl)-6-hydroxybenzamide|5.7
|
17 |
+
CCN(CCO)CCCC(C)Nc1ccnc2cc(Cl)ccc12|3.783000000000002|CCN(CCO)CCCC(C)Nc1ccnc2cc(Cl)ccc12|CCN(CCCC(C)NC1=C2C=CC(=CC2=NC=C1)Cl)CCO|2-[4-[(7-chloroquinolin-4-yl)amino]pentyl-ethylamino]ethanol|3.6
|
18 |
+
CC(CN1c2ccccc2Sc2ccccc21)N(C)C|4.239400000000003|CC(CN1c2ccccc2Sc2ccccc21)N(C)C|CC(CN1C2=CC=CC=C2SC3=CC=CC=C31)N(C)C|N,N-dimethyl-1-phenothiazin-10-ylpropan-2-amine|4.8
|
19 |
+
CCSc1ccc2c(c1)N(CCCN1CCN(C)CC1)c1ccccc1S2|5.0388000000000055|CCSc1ccc2c(c1)N(CCCN1CCN(C)CC1)c1ccccc1S2|CCSC1=CC2=C(C=C1)SC3=CC=CC=C3N2CCCN4CCN(CC4)C|2-ethylsulfanyl-10-[3-(4-methylpiperazin-1-yl)propyl]phenothiazine|5.4
|
20 |
+
CC1OC(OC2C(O)CC(OC3C(O)CC(OC4CCC5(C)C(CCC6C5CC(O)C5(C)C(C7=CC(=O)OC7)CCC65O)C4)OC3C)OC2C)CC(O)C1O|2.218100000000003|CC1OC(OC2C(O)CC(OC3C(O)CC(OC4CCC5(C)C(CCC6C5CC(O)C5(C)C(C7=CC(=O)OC7)CCC65O)C4)OC3C)OC2C)CC(O)C1O|CC1C(C(CC(O1)OC2C(OC(CC2O)OC3C(OC(CC3O)OC4CCC5(C(C4)CCC6C5CC(C7(C6(CCC7C8=CC(=O)OC8)O)C)O)C)C)C)O)O|3-[3-[5-[5-(4,5-dihydroxy-6-methyloxan-2-yl)oxy-4-hydroxy-6-methyloxan-2-yl]oxy-4-hydroxy-6-methyloxan-2-yl]oxy-12,14-dihydroxy-10,13-dimethyl-1,2,3,4,5,6,7,8,9,11,12,15,16,17-tetradecahydrocyclopenta[a]phenanthren-17-yl]-2H-furan-5-one|1.3
|
21 |
+
CN(C)CCCN1c2ccccc2CCc2ccc(Cl)cc21|4.528400000000004|CN(C)CCCN1c2ccccc2CCc2ccc(Cl)cc21|CN(C)CCCN1C2=CC=CC=C2CCC3=C1C=C(C=C3)Cl|3-(2-chloro-5,6-dihydrobenzo[b][1]benzazepin-11-yl)-N,N-dimethylpropan-1-amine|5.2
|
22 |
+
CN(C)CCCN1c2ccccc2Sc2ccc(Cl)cc21|4.894400000000004|CN(C)CCCN1c2ccccc2Sc2ccc(Cl)cc21|CN(C)CCCN1C2=CC=CC=C2SC3=C1C=C(C=C3)Cl|3-(2-chlorophenothiazin-10-yl)-N,N-dimethylpropan-1-amine|5.2
|
23 |
+
CCN(CC)CCOc1ccc(C(=C(Cl)c2ccccc2)c2ccccc2)cc1|6.562600000000006|CCN(CC)CCOc1ccc(C(=C(Cl)c2ccccc2)c2ccccc2)cc1|CCN(CC)CCOC1=CC=C(C=C1)C(=C(C2=CC=CC=C2)Cl)C3=CC=CC=C3|2-[4-(2-chloro-1,2-diphenylethenyl)phenoxy]-N,N-diethylethanamine|7.2
|
24 |
+
CCC1OC(=O)C(C)C(OC2CC(C)(OC)C(O)C(C)O2)C(C)C(OC2OC(C)CC(N(C)C)C2O)C(C)(O)CC(C)CN(C)C(C)C(O)C1(C)O|1.9007000000000054|CCC1OC(=O)C(C)C(OC2CC(C)(OC)C(O)C(C)O2)C(C)C(OC2OC(C)CC(N(C)C)C2O)C(C)(O)CC(C)CN(C)C(C)C(O)C1(C)O|CCC1C(C(C(N(CC(CC(C(C(C(C(C(=O)O1)C)OC2CC(C(C(O2)C)O)(C)OC)C)OC3C(C(CC(O3)C)N(C)C)O)(C)O)C)C)C)O)(C)O|11-[4-(dimethylamino)-3-hydroxy-6-methyloxan-2-yl]oxy-2-ethyl-3,4,10-trihydroxy-13-(5-hydroxy-4-methoxy-4,6-dimethyloxan-2-yl)oxy-3,5,6,8,10,12,14-heptamethyl-1-oxa-6-azacyclopentadecan-15-one|4.0
|
25 |
+
COc1ncnc(NS(=O)(=O)c2ccc(N)cc2)c1OC|0.8768|COc1ncnc(NS(=O)(=O)c2ccc(N)cc2)c1OC|COC1=C(N=CN=C1OC)NS(=O)(=O)C2=CC=C(C=C2)N|4-amino-N-(5,6-dimethoxypyrimidin-4-yl)benzenesulfonamide|0.7
|
26 |
+
COc1ccc2nc(S(=O)Cc3ncc(C)c(OC)c3C)[nH]c2c1|2.8997400000000004|COc1ccc2nc(S(=O)Cc3ncc(C)c(OC)c3C)[nH]c2c1|CC1=CN=C(C(=C1OC)C)CS(=O)C2=NC3=C(N2)C=C(C=C3)OC|6-methoxy-2-[(4-methoxy-3,5-dimethylpyridin-2-yl)methylsulfinyl]-1H-benzimidazole|2.2
|
27 |
+
CN(C)CCOc1ccc(C(=C(CCCl)c2ccccc2)c2ccccc2)cc1|6.215000000000006|CN(C)CCOc1ccc(C(=C(CCCl)c2ccccc2)c2ccccc2)cc1|CN(C)CCOC1=CC=C(C=C1)C(=C(CCCl)C2=CC=CC=C2)C3=CC=CC=C3|2-[4-(4-chloro-1,2-diphenylbut-1-enyl)phenoxy]-N,N-dimethylethanamine|7.2
|
28 |
+
CSc1ccc2c(c1)N(CCC1CCCCN1C)c1ccccc1S2|5.885600000000005|CSc1ccc2c(c1)N(CCC1CCCCN1C)c1ccccc1S2|CN1CCCCC1CCN2C3=CC=CC=C3SC4=C2C=C(C=C4)SC|10-[2-(1-methylpiperidin-2-yl)ethyl]-2-methylsulfanylphenothiazine|5.9
|
29 |
+
CC=CCC(C)C(O)C1C(=O)NC(CC)C(=O)N(C)CC(=O)N(C)C(CC(C)C)C(=O)NC(C(C)C)C(=O)N(C)C(CC(C)C)C(=O)NC(C)C(=O)NC(C)C(=O)N(C)C(CC(C)C)C(=O)N(C)C(CC(C)C)C(=O)N(C)C(C(C)C)C(=O)N1C|3.2690000000000046|CC=CCC(C)C(O)C1C(=O)NC(CC)C(=O)N(C)CC(=O)N(C)C(CC(C)C)C(=O)NC(C(C)C)C(=O)N(C)C(CC(C)C)C(=O)NC(C)C(=O)NC(C)C(=O)N(C)C(CC(C)C)C(=O)N(C)C(CC(C)C)C(=O)N(C)C(C(C)C)C(=O)N1C|CCC1C(=O)N(CC(=O)N(C(C(=O)NC(C(=O)N(C(C(=O)NC(C(=O)NC(C(=O)N(C(C(=O)N(C(C(=O)N(C(C(=O)N(C(C(=O)N1)C(C(C)CC=CC)O)C)C(C)C)C)CC(C)C)C)CC(C)C)C)C)C)CC(C)C)C)C(C)C)CC(C)C)C)C|30-ethyl-33-(1-hydroxy-2-methylhex-4-enyl)-1,4,7,10,12,15,19,25,28-nonamethyl-6,9,18,24-tetrakis(2-methylpropyl)-3,21-di(propan-2-yl)-1,4,7,10,13,16,19,22,25,28,31-undecazacyclotritriacontane-2,5,8,11,14,17,20,23,26,29,32-undecone|7.5
|
30 |
+
CC(C)=CCCC1(C)C=Cc2c(O)c3c(c(CC=C(C)C)c2O1)OC12C(=CC4CC1C(C)(C)OC2(CC=C(C)C(=O)O)C4O)C3=O|7.031100000000008|CC(C)=CCCC1(C)C=Cc2c(O)c3c(c(CC=C(C)C)c2O1)OC12C(=CC4CC1C(C)(C)OC2(CC=C(C)C(=O)O)C4O)C3=O|CC(=CCCC1(C=CC2=C(C3=C(C(=C2O1)CC=C(C)C)OC45C6CC(C=C4C3=O)C(C5(OC6(C)C)CC=C(C)C(=O)O)O)O)C)C|4-[12,18-dihydroxy-8,21,21-trimethyl-5-(3-methylbut-2-enyl)-8-(4-methylpent-3-enyl)-14-oxo-3,7,20-trioxahexacyclo[15.4.1.02,15.02,19.04,13.06,11]docosa-4(13),5,9,11,15-pentaen-19-yl]-2-methylbut-2-enoic acid|7.0
|
31 |
+
C=CC1CN2CCC1CC2C(O)c1ccnc2ccc(OC)cc12|3.1732000000000014|C=CC1CN2CCC1CC2C(O)c1ccnc2ccc(OC)cc12|COC1=CC2=C(C=CN=C2C=C1)C(C3CC4CCN3CC4C=C)O|(5-ethenyl-1-azabicyclo[2.2.2]octan-2-yl)-(6-methoxyquinolin-4-yl)methanol|2.9
|
32 |
+
CCCCCC(=O)OC1(C(C)=O)CCC2C3CCC4=CC(=O)CCC4(C)C3CCC21C|5.969600000000007|CCCCCC(=O)OC1(C(C)=O)CCC2C3CCC4=CC(=O)CCC4(C)C3CCC21C|CCCCCC(=O)OC1(CCC2C1(CCC3C2CCC4=CC(=O)CCC34C)C)C(=O)C|(17-acetyl-10,13-dimethyl-3-oxo-2,6,7,8,9,11,12,14,15,16-decahydro-1H-cyclopenta[a]phenanthren-17-yl) hexanoate|5.7
|
33 |
+
COc1ccc2cc1Oc1ccc(cc1)CC1c3c(cc4c(c3Oc3cc5c(cc3OC)CCN(C)C5C2)OCO4)CCN1C|6.873900000000009|COc1ccc2cc1Oc1ccc(cc1)CC1c3c(cc4c(c3Oc3cc5c(cc3OC)CCN(C)C5C2)OCO4)CCN1C|CN1CCC2=CC3=C(C4=C2C1CC5=CC=C(C=C5)OC6=C(C=CC(=C6)CC7C8=CC(=C(C=C8CCN7C)OC)O4)OC)OCO3|22,33-dimethoxy-13,28-dimethyl-2,5,7,20-tetraoxa-13,28-diazaoctacyclo[25.6.2.216,19.13,10.121,25.04,8.031,35.014,39]nonatriaconta-1(33),3(39),4(8),9,16(38),17,19(37),21,23,25(36),31,34-dodecaene|6.5
|
34 |
+
CCC(=C(c1ccccc1)c1ccc(OCCN(C)C)cc1)c1ccccc1|5.9961000000000055|CCC(=C(c1ccccc1)c1ccc(OCCN(C)C)cc1)c1ccccc1|CCC(=C(C1=CC=CC=C1)C2=CC=C(C=C2)OCCN(C)C)C3=CC=CC=C3|2-[4-(1,2-diphenylbut-1-enyl)phenoxy]-N,N-dimethylethanamine|7.1
|
35 |
+
OC1(c2ccc(Cl)c(C(F)(F)F)c2)CCN(CCCC(c2ccc(F)cc2)c2ccc(F)cc2)CC1|7.532700000000006|OC1(c2ccc(Cl)c(C(F)(F)F)c2)CCN(CCCC(c2ccc(F)cc2)c2ccc(F)cc2)CC1|C1CN(CCC1(C2=CC(=C(C=C2)Cl)C(F)(F)F)O)CCCC(C3=CC=C(C=C3)F)C4=CC=C(C=C4)F|1-[4,4-bis(4-fluorophenyl)butyl]-4-[4-chloro-3-(trifluoromethyl)phenyl]piperidin-4-ol|7.3
|
36 |
+
Clc1ccc(Cn2c(CN3CCCC3)nc3ccccc32)cc1|4.333800000000003|Clc1ccc(Cn2c(CN3CCCC3)nc3ccccc32)cc1|C1CCN(C1)CC2=NC3=CC=CC=C3N2CC4=CC=C(C=C4)Cl|1-[(4-chlorophenyl)methyl]-2-(pyrrolidin-1-ylmethyl)benzimidazole|4.0
|
37 |
+
C1CCC(C(CC2CCCCN2)C2CCCCC2)CC1|5.295400000000005|C1CCC(C(CC2CCCCN2)C2CCCCC2)CC1|C1CCC(CC1)C(CC2CCCCN2)C3CCCCC3|2-(2,2-dicyclohexylethyl)piperidine|6.8
|
38 |
+
Oc1c(Cl)cc(Cl)c(Cl)c1Cc1c(O)c(Cl)cc(Cl)c1Cl|6.609000000000001|Oc1c(Cl)cc(Cl)c(Cl)c1Cc1c(O)c(Cl)cc(Cl)c1Cl|C1=C(C(=C(C(=C1Cl)Cl)CC2=C(C(=CC(=C2Cl)Cl)Cl)O)O)Cl|3,4,6-trichloro-2-[(2,3,5-trichloro-6-hydroxyphenyl)methyl]phenol|7.5
|
39 |
+
CCC(C(=O)O)C1CCC(C)C(C(C)C(O)C(C)C(=O)C(CC)C2OC3(C=CC(O)C4(CCC(C)(C5CCC(O)(CC)C(C)O5)O4)O3)C(C)CC2C)O1|6.188000000000006|CCC(C(=O)O)C1CCC(C)C(C(C)C(O)C(C)C(=O)C(CC)C2OC3(C=CC(O)C4(CCC(C)(C5CCC(O)(CC)C(C)O5)O4)O3)C(C)CC2C)O1|CCC(C1CCC(C(O1)C(C)C(C(C)C(=O)C(CC)C2C(CC(C3(O2)C=CC(C4(O3)CCC(O4)(C)C5CCC(C(O5)C)(CC)O)O)C)C)O)C)C(=O)O|2-[6-[6-[3-(5-ethyl-5-hydroxy-6-methyloxan-2-yl)-15-hydroxy-3,10,12-trimethyl-4,6,8-trioxadispiro[4.1.57.35]pentadec-13-en-9-yl]-3-hydroxy-4-methyl-5-oxooctan-2-yl]-5-methyloxan-2-yl]butanoic acid|5.7
|
40 |
+
Cc1cc(Nc2ncc(Cl)c(Nc3ccccc3S(=O)(=O)C(C)C)n2)c(OC(C)C)cc1C1CCNCC1|6.361920000000006|Cc1cc(Nc2ncc(Cl)c(Nc3ccccc3S(=O)(=O)C(C)C)n2)c(OC(C)C)cc1C1CCNCC1|CC1=CC(=C(C=C1C2CCNCC2)OC(C)C)NC3=NC=C(C(=N3)NC4=CC=CC=C4S(=O)(=O)C(C)C)Cl|5-chloro-2-N-(5-methyl-4-piperidin-4-yl-2-propan-2-yloxyphenyl)-4-N-(2-propan-2-ylsulfonylphenyl)pyrimidine-2,4-diamine|6.4
|
41 |
+
CC1(C)C=Cc2c(c3c(c4c(=O)c(-c5ccc(O)cc5)coc24)OC(C)(C)CC3)O1|5.453400000000006|CC1(C)C=Cc2c(c3c(c4c(=O)c(-c5ccc(O)cc5)coc24)OC(C)(C)CC3)O1|CC1(CCC2=C3C(=C4C(=C2O1)C(=O)C(=CO4)C5=CC=C(C=C5)O)C=CC(O3)(C)C)C|5-(4-hydroxyphenyl)-10,10,16,16-tetramethyl-3,9,15-trioxatetracyclo[12.4.0.02,7.08,13]octadeca-1,4,7,13,17-pentaen-6-one|4.7
|
42 |
+
C=CC(=O)Nc1cc(Nc2nccc(-c3cn(C)c4ccccc34)n2)c(OC)cc1N(C)CCN(C)C|4.509800000000003|C=CC(=O)Nc1cc(Nc2nccc(-c3cn(C)c4ccccc34)n2)c(OC)cc1N(C)CCN(C)C|CN1C=C(C2=CC=CC=C21)C3=NC(=NC=C3)NC4=C(C=C(C(=C4)NC(=O)C=C)N(C)CCN(C)C)OC|N-[2-[2-(dimethylamino)ethyl-methylamino]-4-methoxy-5-[[4-(1-methylindol-3-yl)pyrimidin-2-yl]amino]phenyl]prop-2-enamide|3.7
|
43 |
+
Cc1c(-c2ccc(O)cc2)n(Cc2ccc(OCCN3CCCCCC3)cc2)c2ccc(O)cc12|6.331020000000006|Cc1c(-c2ccc(O)cc2)n(Cc2ccc(OCCN3CCCCCC3)cc2)c2ccc(O)cc12|CC1=C(N(C2=C1C=C(C=C2)O)CC3=CC=C(C=C3)OCCN4CCCCCC4)C5=CC=C(C=C5)O|1-[[4-[2-(azepan-1-yl)ethoxy]phenyl]methyl]-2-(4-hydroxyphenyl)-3-methylindol-5-ol|6.1
|
44 |
+
CCCCCCOC(C)c1cccc(-c2csc(NC(=O)c3cc(Cl)c(C=C(C)C(=O)O)c(Cl)c3)n2)c1OC|8.523600000000004|CCCCCCOC(C)c1cccc(-c2csc(NC(=O)c3cc(Cl)c(C=C(C)C(=O)O)c(Cl)c3)n2)c1OC|CCCCCCOC(C)C1=CC=CC(=C1OC)C2=CSC(=N2)NC(=O)C3=CC(=C(C(=C3)Cl)C=C(C)C(=O)O)Cl|3-[2,6-dichloro-4-[[4-[3-(1-hexoxyethyl)-2-methoxyphenyl]-1,3-thiazol-2-yl]carbamoyl]phenyl]-2-methylprop-2-enoic acid|7.7
|
45 |
+
CC(C)=CCc1c2c(c3occ(-c4ccc(O)cc4)c(=O)c3c1O)C=CC(C)(C)O2|5.564100000000006|CC(C)=CCc1c2c(c3occ(-c4ccc(O)cc4)c(=O)c3c1O)C=CC(C)(C)O2|CC(=CCC1=C2C(=C3C(=C1O)C(=O)C(=CO3)C4=CC=C(C=C4)O)C=CC(O2)(C)C)C|5-hydroxy-3-(4-hydroxyphenyl)-8,8-dimethyl-6-(3-methylbut-2-enyl)pyrano[2,3-h]chromen-4-one|5.9
|
46 |
+
CCCCc1oc2ccc(NS(C)(=O)=O)cc2c1C(=O)c1ccc(OCCCN(CCCC)CCCC)cc1|7.0490000000000075|CCCCc1oc2ccc(NS(C)(=O)=O)cc2c1C(=O)c1ccc(OCCCN(CCCC)CCCC)cc1|CCCCC1=C(C2=C(O1)C=CC(=C2)NS(=O)(=O)C)C(=O)C3=CC=C(C=C3)OCCCN(CCCC)CCCC|N-[2-butyl-3-[4-[3-(dibutylamino)propoxy]benzoyl]-1-benzofuran-5-yl]methanesulfonamide|7.2
|
47 |
+
CC(C)C(=O)OCC(=O)C12OC(C3CCCCC3)OC1CC1C3CCC4=CC(=O)C=CC4(C)C3C(O)CC12C|4.703900000000005|CC(C)C(=O)OCC(=O)C12OC(C3CCCCC3)OC1CC1C3CCC4=CC(=O)C=CC4(C)C3C(O)CC12C|CC(C)C(=O)OCC(=O)C12C(CC3C1(CC(C4C3CCC5=CC(=O)C=CC45C)O)C)OC(O2)C6CCCCC6|[2-(6-cyclohexyl-11-hydroxy-9,13-dimethyl-16-oxo-5,7-dioxapentacyclo[10.8.0.02,9.04,8.013,18]icosa-14,17-dien-8-yl)-2-oxoethyl] 2-methylpropanoate|5.3
|
48 |
+
CC1(C)C=Cc2c(c3c(c4c(=O)c(-c5ccc(O)c(O)c5)coc24)OC(C)(C)CC3)O1|5.159000000000005|CC1(C)C=Cc2c(c3c(c4c(=O)c(-c5ccc(O)c(O)c5)coc24)OC(C)(C)CC3)O1|CC1(CCC2=C3C(=C4C(=C2O1)C(=O)C(=CO4)C5=CC(=C(C=C5)O)O)C=CC(O3)(C)C)C|5-(3,4-dihydroxyphenyl)-10,10,16,16-tetramethyl-3,9,15-trioxatetracyclo[12.4.0.02,7.08,13]octadeca-1,4,7,13,17-pentaen-6-one|4.4
|
49 |
+
CCCCCOc1ccc(-c2ccc(-c3ccc(C(=O)NC4CC(O)C(O)NC(=O)C5C(O)C(C)CN5C(=O)C(C(C)O)NC(=O)C(C(O)C(O)c5ccc(O)cc5)NC(=O)C5CC(O)CN5C(=O)C(C(C)O)NC4=O)cc3)cc2)cc1|-0.927099999999986|CCCCCOc1ccc(-c2ccc(-c3ccc(C(=O)NC4CC(O)C(O)NC(=O)C5C(O)C(C)CN5C(=O)C(C(C)O)NC(=O)C(C(O)C(O)c5ccc(O)cc5)NC(=O)C5CC(O)CN5C(=O)C(C(C)O)NC4=O)cc3)cc2)cc1|CCCCCOC1=CC=C(C=C1)C2=CC=C(C=C2)C3=CC=C(C=C3)C(=O)NC4CC(C(NC(=O)C5C(C(CN5C(=O)C(NC(=O)C(NC(=O)C6CC(CN6C(=O)C(NC4=O)C(C)O)O)C(C(C7=CC=C(C=C7)O)O)O)C(C)O)C)O)O)O|N-[6-[1,2-dihydroxy-2-(4-hydroxyphenyl)ethyl]-11,20,21,25-tetrahydroxy-3,15-bis(1-hydroxyethyl)-26-methyl-2,5,8,14,17,23-hexaoxo-1,4,7,13,16,22-hexazatricyclo[22.3.0.09,13]heptacosan-18-yl]-4-[4-(4-pentoxyphenyl)phenyl]benzamide|2.3
|
50 |
+
CC(C)(C)c1cc(C(C)(C)C)c(NC(=O)c2c[nH]c3ccccc3c2=O)cc1O|5.081000000000005|CC(C)(C)c1cc(C(C)(C)C)c(NC(=O)c2c[nH]c3ccccc3c2=O)cc1O|CC(C)(C)C1=CC(=C(C=C1NC(=O)C2=CNC3=CC=CC=C3C2=O)O)C(C)(C)C|N-(2,4-ditert-butyl-5-hydroxyphenyl)-4-oxo-1H-quinoline-3-carboxamide|5.6
|
51 |
+
CCC(=C(c1ccc(OCCN(C)C)cc1)c1cccc(O)c1)c1ccccc1|5.701700000000006|CCC(=C(c1ccc(OCCN(C)C)cc1)c1cccc(O)c1)c1ccccc1|CCC(=C(C1=CC=C(C=C1)OCCN(C)C)C2=CC(=CC=C2)O)C3=CC=CC=C3|3-[1-[4-[2-(dimethylamino)ethoxy]phenyl]-2-phenylbut-1-enyl]phenol|6.8
|
52 |
+
CCN1CCN(Cc2ccc(Nc3ncc(F)c(-c4cc(F)c5nc(C)n(C(C)C)c5c4)n3)nc2)CC1|4.936920000000004|CCN1CCN(Cc2ccc(Nc3ncc(F)c(-c4cc(F)c5nc(C)n(C(C)C)c5c4)n3)nc2)CC1|CCN1CCN(CC1)CC2=CN=C(C=C2)NC3=NC=C(C(=N3)C4=CC5=C(C(=C4)F)N=C(N5C(C)C)C)F|N-[5-[(4-ethylpiperazin-1-yl)methyl]pyridin-2-yl]-5-fluoro-4-(7-fluoro-2-methyl-3-propan-2-ylbenzimidazol-5-yl)pyrimidin-2-amine|3.8
|
53 |
+
CCc1nc(C(N)=O)c(Nc2ccc(N3CCC(N4CCN(C)CC4)CC3)c(OC)c2)nc1NC1CCOCC1|2.6972000000000014|CCc1nc(C(N)=O)c(Nc2ccc(N3CCC(N4CCN(C)CC4)CC3)c(OC)c2)nc1NC1CCOCC1|CCC1=C(N=C(C(=N1)C(=O)N)NC2=CC(=C(C=C2)N3CCC(CC3)N4CCN(CC4)C)OC)NC5CCOCC5|6-ethyl-3-[3-methoxy-4-[4-(4-methylpiperazin-1-yl)piperidin-1-yl]anilino]-5-(oxan-4-ylamino)pyrazine-2-carboxamide|3.5
|
54 |
+
CC(C)(C)c1ccc(C(=O)CCCN2CCC(OC(c3ccccc3)c3ccccc3)CC2)cc1|7.217600000000008|CC(C)(C)c1ccc(C(=O)CCCN2CCC(OC(c3ccccc3)c3ccccc3)CC2)cc1|CC(C)(C)C1=CC=C(C=C1)C(=O)CCCN2CCC(CC2)OC(C3=CC=CC=C3)C4=CC=CC=C4|4-(4-benzhydryloxypiperidin-1-yl)-1-(4-tert-butylphenyl)butan-1-one|7.2
|
55 |
+
c1ccc2c(c1)Sc1ccccc1N2CC1CN2CCC1CC2|4.631100000000005|c1ccc2c(c1)Sc1ccccc1N2CC1CN2CCC1CC2|C1CN2CCC1C(C2)CN3C4=CC=CC=C4SC5=CC=CC=C53|10-(1-azabicyclo[2.2.2]octan-3-ylmethyl)phenothiazine|4.6
|
56 |
+
CC1=NN(c2ccc(C)c(C)c2)C(=O)C1=NNc1cccc(-c2cccc(C(=O)O)c2)c1O|4.564840000000005|CC1=NN(c2ccc(C)c(C)c2)C(=O)C1=NNc1cccc(-c2cccc(C(=O)O)c2)c1O|CC1=C(C=C(C=C1)N2C(=O)C(=C(N2)C)N=NC3=CC=CC(=C3O)C4=CC(=CC=C4)C(=O)O)C|3-[3-[[2-(3,4-dimethylphenyl)-5-methyl-3-oxo-1H-pyrazol-4-yl]diazenyl]-2-hydroxyphenyl]benzoic acid|5.4
|
57 |
+
COC(=O)NC(C(=O)NC(Cc1ccccc1)C(O)CN(Cc1ccc(-c2ccccn2)cc1)NC(=O)C(NC(=O)OC)C(C)(C)C)C(C)(C)C|4.2116000000000025|COC(=O)NC(C(=O)NC(Cc1ccccc1)C(O)CN(Cc1ccc(-c2ccccn2)cc1)NC(=O)C(NC(=O)OC)C(C)(C)C)C(C)(C)C|CC(C)(C)C(C(=O)NC(CC1=CC=CC=C1)C(CN(CC2=CC=C(C=C2)C3=CC=CC=N3)NC(=O)C(C(C)(C)C)NC(=O)OC)O)NC(=O)OC|methyl N-[1-[2-[2-hydroxy-3-[[2-(methoxycarbonylamino)-3,3-dimethylbutanoyl]amino]-4-phenylbutyl]-2-[(4-pyridin-2-ylphenyl)methyl]hydrazinyl]-3,3-dimethyl-1-oxobutan-2-yl]carbamate|5.6
|
58 |
+
CN1C2CCC1CC(OC(c1ccccc1)c1ccccc1)C2|4.417800000000004|CN1C2CCC1CC(OC(c1ccccc1)c1ccccc1)C2|CN1C2CCC1CC(C2)OC(C3=CC=CC=C3)C4=CC=CC=C4|3-benzhydryloxy-8-methyl-8-azabicyclo[3.2.1]octane|4.5
|
59 |
+
CC(C)N1CCN(c2ccc(OCC3COC(Cn4cncn4)(c4ccc(Cl)cc4Cl)O3)cc2)CC1|4.462700000000003|CC(C)N1CCN(c2ccc(OCC3COC(Cn4cncn4)(c4ccc(Cl)cc4Cl)O3)cc2)CC1|CC(C)N1CCN(CC1)C2=CC=C(C=C2)OCC3COC(O3)(CN4C=NC=N4)C5=C(C=C(C=C5)Cl)Cl|1-[4-[[2-(2,4-dichlorophenyl)-2-(1,2,4-triazol-1-ylmethyl)-1,3-dioxolan-4-yl]methoxy]phenyl]-4-propan-2-ylpiperazine|4.8
|
60 |
+
C=CCOc1ccccc1OCC(O)CNC(C)C|1.9890999999999999|C=CCOc1ccccc1OCC(O)CNC(C)C|CC(C)NCC(COC1=CC=CC=C1OCC=C)O|1-(propan-2-ylamino)-3-(2-prop-2-enoxyphenoxy)propan-2-ol|2.1
|
61 |
+
CCCCCC(O)C=CC1C(O)CC(=O)C1CCCCCCC(=O)O|3.475100000000002|CCCCCC(O)C=CC1C(O)CC(=O)C1CCCCCCC(=O)O|CCCCCC(C=CC1C(CC(=O)C1CCCCCCC(=O)O)O)O|7-[3-hydroxy-2-(3-hydroxyoct-1-enyl)-5-oxocyclopentyl]heptanoic acid|3.2
|
62 |
+
CC1CCOC2Cn3cc(C(=O)NCc4ccc(F)cc4F)c(=O)c(O)c3C(=O)N12|1.3528|CC1CCOC2Cn3cc(C(=O)NCc4ccc(F)cc4F)c(=O)c(O)c3C(=O)N12|CC1CCOC2N1C(=O)C3=C(C(=O)C(=CN3C2)C(=O)NCC4=C(C=C(C=C4)F)F)O|N-[(2,4-difluorophenyl)methyl]-11-hydroxy-7-methyl-9,12-dioxo-4-oxa-1,8-diazatricyclo[8.4.0.03,8]tetradeca-10,13-diene-13-carboxamide|2.4
|
63 |
+
OCCN1CCN(CCCN2c3ccccc3C=Cc3ccccc32)CC1|3.308500000000002|OCCN1CCN(CCCN2c3ccccc3C=Cc3ccccc32)CC1|C1CN(CCN1CCCN2C3=CC=CC=C3C=CC4=CC=CC=C42)CCO|2-[4-(3-benzo[b][1]benzazepin-11-ylpropyl)piperazin-1-yl]ethanol|3.6
|
64 |
+
CCOC(=O)c1c(CSc2ccccc2)n(C)c2cc(Br)c(O)c(CN(C)C)c12|5.177000000000005|CCOC(=O)c1c(CSc2ccccc2)n(C)c2cc(Br)c(O)c(CN(C)C)c12|CCOC(=O)C1=C(N(C2=CC(=C(C(=C21)CN(C)C)O)Br)C)CSC3=CC=CC=C3|ethyl 6-bromo-4-[(dimethylamino)methyl]-5-hydroxy-1-methyl-2-(phenylsulfanylmethyl)indole-3-carboxylate|4.4
|
65 |
+
CC(C)c1nc(CN(C)C(=O)NC(C(=O)NC(Cc2ccccc2)CC(O)C(Cc2ccccc2)NC(=O)OCc2cncs2)C(C)C)cs1|5.905200000000005|CC(C)c1nc(CN(C)C(=O)NC(C(=O)NC(Cc2ccccc2)CC(O)C(Cc2ccccc2)NC(=O)OCc2cncs2)C(C)C)cs1|CC(C)C1=NC(=CS1)CN(C)C(=O)NC(C(C)C)C(=O)NC(CC2=CC=CC=C2)CC(C(CC3=CC=CC=C3)NC(=O)OCC4=CN=CS4)O|1,3-thiazol-5-ylmethyl N-[3-hydroxy-5-[[3-methyl-2-[[methyl-[(2-propan-2-yl-1,3-thiazol-4-yl)methyl]carbamoyl]amino]butanoyl]amino]-1,6-diphenylhexan-2-yl]carbamate|6.0
|
66 |
+
Cc1c(O)cccc1C(=O)NC(CSc1ccccc1)C(O)CN1CC2CCCCC2CC1C(=O)NC(C)(C)C|4.747620000000004|Cc1c(O)cccc1C(=O)NC(CSc1ccccc1)C(O)CN1CC2CCCCC2CC1C(=O)NC(C)(C)C|CC1=C(C=CC=C1O)C(=O)NC(CSC2=CC=CC=C2)C(CN3CC4CCCCC4CC3C(=O)NC(C)(C)C)O|N-tert-butyl-2-[2-hydroxy-3-[(3-hydroxy-2-methylbenzoyl)amino]-4-phenylsulfanylbutyl]-3,4,4a,5,6,7,8,8a-octahydro-1H-isoquinoline-3-carboxamide|5.7
|
67 |
+
CC(C)(C)NC(=O)C1CC2CCCCC2CN1CC(O)C(Cc1ccccc1)NC(=O)C(CC(N)=O)NC(=O)c1ccc2ccccc2n1|3.092400000000003|CC(C)(C)NC(=O)C1CC2CCCCC2CN1CC(O)C(Cc1ccccc1)NC(=O)C(CC(N)=O)NC(=O)c1ccc2ccccc2n1|CC(C)(C)NC(=O)C1CC2CCCCC2CN1CC(C(CC3=CC=CC=C3)NC(=O)C(CC(=O)N)NC(=O)C4=NC5=CC=CC=C5C=C4)O|N-[4-[3-(tert-butylcarbamoyl)-3,4,4a,5,6,7,8,8a-octahydro-1H-isoquinolin-2-yl]-3-hydroxy-1-phenylbutan-2-yl]-2-(quinoline-2-carbonylamino)butanediamide|4.2
|
68 |
+
CCCC1(CCc2ccccc2)CC(O)=C(C(CC)c2cccc(NS(=O)(=O)c3ccc(C(F)(F)F)cn3)c2)C(=O)O1|7.325500000000007|CCCC1(CCc2ccccc2)CC(O)=C(C(CC)c2cccc(NS(=O)(=O)c3ccc(C(F)(F)F)cn3)c2)C(=O)O1|CCCC1(CC(=C(C(=O)O1)C(CC)C2=CC(=CC=C2)NS(=O)(=O)C3=NC=C(C=C3)C(F)(F)F)O)CCC4=CC=CC=C4|N-[3-[1-[4-hydroxy-6-oxo-2-(2-phenylethyl)-2-propyl-3H-pyran-5-yl]propyl]phenyl]-5-(trifluoromethyl)pyridine-2-sulfonamide|7.0
|
69 |
+
CC(C)CN(CC(O)C(Cc1ccccc1)NC(=O)OC1CCOC1)S(=O)(=O)c1ccc(N)cc1|2.4028|CC(C)CN(CC(O)C(Cc1ccccc1)NC(=O)OC1CCOC1)S(=O)(=O)c1ccc(N)cc1|CC(C)CN(CC(C(CC1=CC=CC=C1)NC(=O)OC2CCOC2)O)S(=O)(=O)C3=CC=C(C=C3)N|oxolan-3-yl N-[4-[(4-aminophenyl)sulfonyl-(2-methylpropyl)amino]-3-hydroxy-1-phenylbutan-2-yl]carbamate|2.9
|
70 |
+
CC(C)CN(CC(O)C(Cc1ccccc1)NC(=O)OC1COC2OCCC12)S(=O)(=O)c1ccc(N)cc1|2.3753|CC(C)CN(CC(O)C(Cc1ccccc1)NC(=O)OC1COC2OCCC12)S(=O)(=O)c1ccc(N)cc1|CC(C)CN(CC(C(CC1=CC=CC=C1)NC(=O)OC2COC3C2CCO3)O)S(=O)(=O)C4=CC=C(C=C4)N|2,3,3a,4,5,6a-hexahydrofuro[2,3-b]furan-4-yl N-[4-[(4-aminophenyl)sulfonyl-(2-methylpropyl)amino]-3-hydroxy-1-phenylbutan-2-yl]carbamate|2.9
|
71 |
+
CC(C)(C)NC(=O)C1CN(Cc2cccnc2)CCN1CC(O)CC(Cc1ccccc1)C(=O)NC1c2ccccc2CC1O|2.8669000000000016|CC(C)(C)NC(=O)C1CN(Cc2cccnc2)CCN1CC(O)CC(Cc1ccccc1)C(=O)NC1c2ccccc2CC1O|CC(C)(C)NC(=O)C1CN(CCN1CC(CC(CC2=CC=CC=C2)C(=O)NC3C(CC4=CC=CC=C34)O)O)CC5=CN=CC=C5|1-[4-benzyl-2-hydroxy-5-[(2-hydroxy-2,3-dihydro-1H-inden-1-yl)amino]-5-oxopentyl]-N-tert-butyl-4-(pyridin-3-ylmethyl)piperazine-2-carboxamide|2.8
|
TransAntivirus/download_pubchem/opsin-master.zip
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bdb2b66ea8ba08e07da78fc5dff4efa219034281c4776410539d172b9831b198
|
3 |
+
size 2056951
|
TransAntivirus/download_pubchem/opsin-master/.github/workflows/maven.yml
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# This workflow will build a Java project with Maven
|
2 |
+
# For more information see: https://help.github.com/actions/language-and-framework-guides/building-and-testing-java-with-maven
|
3 |
+
|
4 |
+
name: Java CI with Maven
|
5 |
+
|
6 |
+
on: [push, pull_request]
|
7 |
+
|
8 |
+
jobs:
|
9 |
+
build:
|
10 |
+
runs-on: ubuntu-20.04
|
11 |
+
strategy:
|
12 |
+
matrix:
|
13 |
+
# test against latest update of each major Java version:
|
14 |
+
java: [ 8, 11, 17 ]
|
15 |
+
name: Java ${{ matrix.java }}
|
16 |
+
steps:
|
17 |
+
- uses: actions/checkout@v2
|
18 |
+
- uses: actions/cache@v1
|
19 |
+
with:
|
20 |
+
path: ~/.m2/repository
|
21 |
+
key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }}
|
22 |
+
restore-keys: |
|
23 |
+
${{ runner.os }}-maven-
|
24 |
+
- name: Setup java
|
25 |
+
uses: actions/setup-java@v1
|
26 |
+
with:
|
27 |
+
java-version: ${{ matrix.java }}
|
28 |
+
- name: Build with Maven
|
29 |
+
run: mvn -B clean test javadoc:javadoc package assembly:assembly
|
TransAntivirus/download_pubchem/opsin-master/.gitignore
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
target/
|
2 |
+
opsin-cli/src/main/java/dl/
|
3 |
+
.classpath
|
4 |
+
.project
|
5 |
+
.settings
|
TransAntivirus/download_pubchem/opsin-master/LICENSE.txt
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Copyright 2017 Daniel Lowe
|
2 |
+
|
3 |
+
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
4 |
+
|
5 |
+
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
6 |
+
|
7 |
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
TransAntivirus/download_pubchem/opsin-master/README.md
ADDED
@@ -0,0 +1,186 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
OPSIN - Open Parser for Systematic IUPAC Nomenclature
|
2 |
+
=====================================================
|
3 |
+
__Version 2.5.0 (see ReleaseNotes.txt for what's new in this version)__
|
4 |
+
__Source code: <https://github.com/dan2097/opsin>__
|
5 |
+
__Web interface and informational site: <http://opsin.ch.cam.ac.uk/>__
|
6 |
+
__License: [MIT License](https://opensource.org/licenses/MIT)__
|
7 |
+
|
8 |
+
OPSIN is a Java library for IUPAC name-to-structure conversion offering high recall and precision on organic chemical nomenclature.
|
9 |
+
|
10 |
+
Java 7 (or higher) is required for OPSIN 2.5.0
|
11 |
+
|
12 |
+
Supported outputs are SMILES, CML (Chemical Markup Language) and InChI (IUPAC International Chemical Identifier)
|
13 |
+
|
14 |
+
### Simple Usage Examples
|
15 |
+
#### Convert a chemical name to SMILES
|
16 |
+
`java -jar opsin-2.5.0-jar-with-dependencies.jar -osmi input.txt output.txt`
|
17 |
+
where input.txt contains chemical name/s, one per line
|
18 |
+
|
19 |
+
NameToStructure nts = NameToStructure.getInstance();
|
20 |
+
String smiles = nts.parseToSmiles("acetonitrile");
|
21 |
+
|
22 |
+
#### Convert a chemical name to CML
|
23 |
+
`java -jar opsin-2.5.0-jar-with-dependencies.jar -ocml input.txt output.txt`
|
24 |
+
where input.txt contains chemical name/s, one per line
|
25 |
+
|
26 |
+
NameToStructure nts = NameToStructure.getInstance();
|
27 |
+
String cml = nts.parseToCML("acetonitrile");
|
28 |
+
|
29 |
+
#### Convert a chemical name to StdInChI/StdInChIKey/InChI with FixedH
|
30 |
+
`java -jar opsin-2.5.0-jar-with-dependencies.jar -ostdinchi input.txt output.txt`
|
31 |
+
`java -jar opsin-2.5.0-jar-with-dependencies.jar -ostdinchikey input.txt output.txt`
|
32 |
+
`java -jar opsin-2.5.0-jar-with-dependencies.jar -oinchi input.txt output.txt`
|
33 |
+
where input.txt contains chemical name/s, one per line
|
34 |
+
|
35 |
+
NameToInchi nti = new NameToInchi()
|
36 |
+
String stdinchi = nti.parseToStdInchi("acetonitrile");
|
37 |
+
String stdinchikey = nti.parseToStdInchiKey("acetonitrile");
|
38 |
+
String inchi = nti.parseToInchi("acetonitrile");
|
39 |
+
|
40 |
+
NOTE: OPSIN's non-standard InChI includes an additional layer (FixedH) that indicates which tautomer the chemical name described. StdInChI aims to be tautomer independent.
|
41 |
+
### Advanced Usage
|
42 |
+
OPSIN 2.5.0 allows enabling of the following options:
|
43 |
+
|
44 |
+
* allowRadicals: Allows substituents to be interpretable e.g. allows interpretation of "ethyl"
|
45 |
+
* wildcardRadicals: If allowRadicals is enabled, this option uses atoms in the output to represent radicals: 'R' in CML and '*' in SMILES e.g. changes the output of ethyl from C[CH2] to CC\*
|
46 |
+
* detailedFailureAnalysis: Provides a potentially more accurate reason as to why a chemical name could not be parsed. This is done by parsing the chemical name from right to left. The trade-off for enabling this is slightly increased memory usage.
|
47 |
+
* allowAcidsWithoutAcid: Allows interpretation of acids without the word acid e.g. "acetic"
|
48 |
+
* allowUninterpretableStereo: Allows stereochemistry uninterpretable by OPSIN to be ignored (When used as a library the OpsinResult has a status of WARNING if stereochemistry was ignored)
|
49 |
+
* verbose: Enables debugging output\*
|
50 |
+
|
51 |
+
\*When used as a library this is done by modifying Log4J's logging level e.g. `Logger.getLogger("uk.ac.cam.ch.wwmm.opsin").setLevel(Level.DEBUG);`
|
52 |
+
|
53 |
+
The usage of these options on the command line is described in the command line's help dialog accessible via:
|
54 |
+
`java -jar opsin-2.5.0-jar-with-dependencies.jar -h`
|
55 |
+
|
56 |
+
These options may be controlled using the following code:
|
57 |
+
|
58 |
+
NameToStructure nts = NameToStructure.getInstance();
|
59 |
+
NameToStructureConfig ntsconfig = new NameToStructureConfig();
|
60 |
+
//a new NameToStructureConfig starts as a copy of OPSIN's default configuration
|
61 |
+
ntsconfig.setAllowRadicals(true);
|
62 |
+
OpsinResult result = nts.parseChemicalName("acetonitrile", ntsconfig);
|
63 |
+
String cml = result.getCml();
|
64 |
+
String smiles = result.getSmiles();
|
65 |
+
String stdinchi = NameToInchi.convertResultToStdInChI(result);
|
66 |
+
|
67 |
+
`result.getStatus()` may be checked to see if the conversion was successful.
|
68 |
+
If a structure was generated but OPSIN believes there may be a problem a status of WARNING is returned. Currently this may occur if the name appeared to be ambiguous or stereochemistry was ignored.
|
69 |
+
By default only optical rotation specification is ignored (this cannot be converted to stereo-configuration algorithmically).
|
70 |
+
|
71 |
+
Convenience methods like `result.nameAppearsToBeAmbiguous()` may be used to check the cause of the warning.
|
72 |
+
|
73 |
+
NOTE: (Std)InChI cannot be generated for polymers or radicals generated in combination with the wildcardRadicals option
|
74 |
+
|
75 |
+
### Availability
|
76 |
+
OPSIN is available as a standalone JAR from GitHub, <https://github.com/dan2097/opsin/releases>
|
77 |
+
`opsin-2.5.0-jar-with-dependencies.jar` can be executed as a commandline application or added to the classpath for library usage.
|
78 |
+
OPSIN is also available from the Maven Central Repository for users of Apache Maven.
|
79 |
+
|
80 |
+
If you are using Maven then add the following to your pom.xml:
|
81 |
+
|
82 |
+
<dependency>
|
83 |
+
<groupId>uk.ac.cam.ch.opsin</groupId>
|
84 |
+
<artifactId>opsin-core</artifactId>
|
85 |
+
<version>2.5.0</version>
|
86 |
+
</dependency>
|
87 |
+
|
88 |
+
If you need just CML or SMILES output support
|
89 |
+
|
90 |
+
or
|
91 |
+
|
92 |
+
<dependency>
|
93 |
+
<groupId>uk.ac.cam.ch.opsin</groupId>
|
94 |
+
<artifactId>opsin-inchi</artifactId>
|
95 |
+
<version>2.5.0</version>
|
96 |
+
</dependency>
|
97 |
+
|
98 |
+
if you also need InChI output support.
|
99 |
+
|
100 |
+
#### Building from source
|
101 |
+
To build OPSIN from source, download Maven 3 and download OPSIN's source code.
|
102 |
+
|
103 |
+
Running `mvn package assembly:assembly` in the root of OPSIN's source will build the jar with dependencies
|
104 |
+
|
105 |
+
Running `mvn assembly:assembly` in the opsin-core folder will build the "excludingInChI-jar-with-dependencies"
|
106 |
+
|
107 |
+
### About OPSIN
|
108 |
+
|
109 |
+
The workings of OPSIN are more fully described in:
|
110 |
+
|
111 |
+
Chemical Name to Structure: OPSIN, an Open Source Solution
|
112 |
+
Daniel M. Lowe, Peter T. Corbett, Peter Murray-Rust, Robert C. Glen
|
113 |
+
Journal of Chemical Information and Modeling 2011 51 (3), 739-753
|
114 |
+
|
115 |
+
If you use OPSIN in your work, then it would be great if you could cite us.
|
116 |
+
|
117 |
+
The following list broadly summarises what OPSIN can currently do and what will be worked on in the future.
|
118 |
+
|
119 |
+
#### Supported nomenclature includes:
|
120 |
+
* alkanes/alkenes/alkynes/heteroatom chains e.g. hexane, hex-1-ene, tetrasiloxane and their cyclic analogues e.g. cyclopropane
|
121 |
+
* All IUPAC 1993 recommended rings
|
122 |
+
* Trivial acids
|
123 |
+
* Hantzsch-Widman e.g. 1,3-oxazole
|
124 |
+
* Spiro systems
|
125 |
+
* All von Baeyer rings e.g. bicyclo[2.2.2]octane
|
126 |
+
* Hydro e.g. 2,3-dihydropyridine
|
127 |
+
* Indicated hydrogen e.g. 1H-benzoimidazole
|
128 |
+
* Heteroatom replacement
|
129 |
+
* Specification of charge e.g. ium/ide/ylium/uide
|
130 |
+
* Multiplicative nomenclature e.g. ethylenediaminetetraacetic acid
|
131 |
+
* Conjunctive nomenclature e.g. cyclohexaneethanol
|
132 |
+
* Fused ring systems e.g. imidazo[4,5-d]pyridine
|
133 |
+
* Ring assemblies e.g. biphenyl
|
134 |
+
* Most prefix and infix functional replacement nomenclature
|
135 |
+
* The following functional classes: acetals, acids, alcohols, amides, anhydrides, anilides, azetidides, azides, bromides, chlorides,
|
136 |
+
cyanates, cyanides, esters, di/tri/tetra esters, ethers, fluorides, fulminates, glycol ethers, glycols, hemiacetals, hemiketal,
|
137 |
+
hydrazides, hydrazones, hydrides, hydroperoxides, hydroxides, imides, iodides, isocyanates, isocyanides, isoselenocyanates, isothiocyanates,
|
138 |
+
ketals, ketones, lactams, lactims, lactones, mercaptans, morpholides, oxides, oximes, peroxides, piperazides, piperidides, pyrrolidides,
|
139 |
+
selenides, selenocyanates, selenoketones, selenolsselenosemicarbazones, selenones, selenoxides, selones, semicarbazones, sulfides, sulfones,
|
140 |
+
sulfoxides, sultams, sultims, sultines, sultones, tellurides, telluroketones, tellurones, tellurosemicarbazones, telluroxides, thiocyanates,
|
141 |
+
thioketones, thiols, thiosemicarbazones
|
142 |
+
* Greek letters
|
143 |
+
* Lambda convention
|
144 |
+
* Amino Acids and derivatives
|
145 |
+
* Structure-based polymer names e.g. poly(2,2'-diamino-5-hexadecylbiphenyl-3,3'-diyl)
|
146 |
+
* Bridge prefixes e.g. methano
|
147 |
+
* Specification of oxidation numbers and charge on elements
|
148 |
+
* Perhalogeno terms
|
149 |
+
* Subtractive prefixes: deoxy, dehydro, anhydro, demethyl, deamino
|
150 |
+
* Stoichiometry ratios and mixture indicators
|
151 |
+
* Nucleosides, (oligo)nucleotides and their esters
|
152 |
+
* Carbohydrate nomenclature
|
153 |
+
* Simple CAS names including inverted CAS names
|
154 |
+
* Steroids including alpha/beta stereochemistry
|
155 |
+
* Isotopic labelling
|
156 |
+
* E/Z/R/S stereochemistry
|
157 |
+
* cis/trans indicating relative stereochemistry on rings and as a synonym of E/Z
|
158 |
+
|
159 |
+
#### Currently UNsupported nomenclature includes:
|
160 |
+
* Other less common stereochemical terms
|
161 |
+
* Most alkaloids/terpenoids
|
162 |
+
* Natural product specific nomenclature operations
|
163 |
+
|
164 |
+
### Developers and Contributors
|
165 |
+
* Rich Apodaca
|
166 |
+
* Albina Asadulina
|
167 |
+
* Peter Corbett
|
168 |
+
* Daniel Lowe (Current maintainer)
|
169 |
+
* John Mayfield
|
170 |
+
* Peter Murray-Rust
|
171 |
+
* Noel O'Boyle
|
172 |
+
* Mark Williamson
|
173 |
+
|
174 |
+
Thanks also to the many users who have contributed through suggestions and bug reporting.
|
175 |
+
|
176 |
+
![YourKit Logo](https://www.yourkit.com/images/yklogo.png)
|
177 |
+
|
178 |
+
OPSIN's developers use YourKit to profile and optimise code.
|
179 |
+
|
180 |
+
YourKit supports open source projects with its full-featured Java Profiler.
|
181 |
+
YourKit, LLC is the creator of [YourKit Java Profiler](https://www.yourkit.com/java/profiler/index.jsp) and [YourKit .NET Profiler](https://www.yourkit.com/.net/profiler/index.jsp), innovative and intelligent tools for profiling Java and .NET applications.
|
182 |
+
|
183 |
+
Good Luck and let us know if you have problems, comments or suggestions!
|
184 |
+
Bugs may be reported on the project's [issue tracker](https://github.com/dan2097/opsin/issues).
|
185 |
+
|
186 |
+
![Build Status](https://github.com/dan2097/opsin/workflows/Java%20CI%20with%20Maven/badge.svg)
|
TransAntivirus/download_pubchem/opsin-master/ReleaseNotes.txt
ADDED
@@ -0,0 +1,332 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Version 2.5.0 (2020-10-04)
|
2 |
+
OPSIN now requires Java 7 (or higher)
|
3 |
+
Support for traditional oxidation state names e.g. ferric
|
4 |
+
Added support for defining the stereochemistry of phosphines/arsines
|
5 |
+
Added newly discovered elements
|
6 |
+
Improved algorithm for correctly interpreting ester names with a missing space e.g. 3-aminophenyl-4-aminobenzenesulfonate
|
7 |
+
Fixed structure of canavanine
|
8 |
+
Corrected interpretation of silver oxide
|
9 |
+
Vocabulary improvements
|
10 |
+
Minor improvements/bug fixes
|
11 |
+
|
12 |
+
Internal XML Changes:
|
13 |
+
tokenList files now all use the same schema (tokenLists.dtd)
|
14 |
+
|
15 |
+
Version 2.4.0 (2018-12-23)
|
16 |
+
OPSIN is now licensed under the MIT License
|
17 |
+
Locant labels included in extended SMILES output
|
18 |
+
Command-line now has a name flag to include the input name in SMILES/InChI output (tab delimited)
|
19 |
+
Added support for carotenoids
|
20 |
+
Added support for Vitamin B-6 related compounds
|
21 |
+
Added support for more fused ring system bridge prefixes
|
22 |
+
Added support for anilide as a functional replacement group
|
23 |
+
Allow heteroatom replacement as a detachable prefix e.g. 3,6,9-triaza-2-(4-phenylbutyl)undecanoic acid
|
24 |
+
Support Boughton system isotopic suffixes for 13C/14C/15N/17O/18O
|
25 |
+
Support salts of acids in CAS inverted names
|
26 |
+
Improved support for implicitly positively charged purine nucleosides/nucleotides
|
27 |
+
Added various biochemical groups/substituents
|
28 |
+
Improved logic for determining intended substitution in names with too few brackets
|
29 |
+
Incorrectly capitalized locants can now be used to reference ring fusion atoms
|
30 |
+
Some names no longer allow substitution e.g. water, hydrochloride
|
31 |
+
Many minor precision/recall improvements
|
32 |
+
|
33 |
+
Version 2.3.1 (2017-07-23)
|
34 |
+
Fixed fused ring numbering algorithm incorrectly numbering some ortho- and peri-fused fused systems involving 7-membered rings
|
35 |
+
Support P-thio to indicate thiophosphate linkage
|
36 |
+
Count of isotopic replacements no longer required if locants given
|
37 |
+
Fixed bug where CIP algorithm could assign priorities to identical substituents
|
38 |
+
Fixed "DL" before a substituent not assigning the substituted alpha-carbon as racemic stereo
|
39 |
+
L-stereochemistry no longer assumed on semi-systematic glycine derivatives e.g. phenylglycine
|
40 |
+
Fixed some cases where substituents like carbonyl should have been part of an implicitly bracketed section
|
41 |
+
Fixed interpretation of leucinic acid and 3/4/5-pyrazolone
|
42 |
+
|
43 |
+
Version 2.3.0 (2017-02-23)
|
44 |
+
D/L stereochemistry can now be assigned algorithmically e.g. L-2-aminobutyric acid
|
45 |
+
Other minor improvements to amino acid support e.g. homoproline added
|
46 |
+
Extended SMILES added to command-line interface
|
47 |
+
Names intended to include the triiodide/tribromide anion no longer erroneously have three monohalides
|
48 |
+
Ambiguity detected when applying unlocanted subtractive prefixes
|
49 |
+
Better support for adjacent multipliers e.g. ditrifluoroacetic acid
|
50 |
+
deoxynucleosides are now implicitly 2'-deoxynucleosides
|
51 |
+
Added support for <number> as a syntax for a superscripted number
|
52 |
+
Added support for amidrazones
|
53 |
+
Aluminium hydrides/chlorides/bromides/iodides are now covalently bonded
|
54 |
+
Fixed names with isotopes less than 10 not being supported
|
55 |
+
Fixed interpretation of some trivial names that clash with systematic names
|
56 |
+
|
57 |
+
Version 2.2.0 (2016-10-16)
|
58 |
+
Added support for IUPAC system for isotope specification e.g. (3-14C,2,2-2H2)butane
|
59 |
+
Added support for specifying deuteration using the Boughton system e.g. butane-2,2-d2
|
60 |
+
Added support for multiplied bridges e.g. 1,2:3,4-diepoxy
|
61 |
+
Front locants after a von baeyer descriptor are now supported e.g. bicyclo[2.2.2]-7-octene
|
62 |
+
onosyl substituents now supported e.g. glucuronosyl
|
63 |
+
More sugar substituents e.g. glucosaminyl
|
64 |
+
Improved support for malformed polycyclic spiro names
|
65 |
+
Support for oximino as a suffix
|
66 |
+
Added method [NameToStructure.getVersion()] to retrieve OPSIN version number
|
67 |
+
Allowed bridges to be used as detachable prefixes
|
68 |
+
Allow odd numbers of hydro to be added e.g. trihydro
|
69 |
+
Added support for unbracketed R stereochemistry (but not S, for the moment, due to the ambiguity with sulfur locants)
|
70 |
+
Various minor bug fixes e.g. stereochemistry was incorrect for isovaline
|
71 |
+
Minor vocabulary improvements
|
72 |
+
|
73 |
+
Version 2.1.0 (2016-03-12)
|
74 |
+
Added support for fractional multipliers e.g. hemihydrochloride
|
75 |
+
Added support for abbreviated common salts e.g. HCl
|
76 |
+
Added support for sandwich compounds e.g. ferrocene
|
77 |
+
Improved recognition of names missing the last 'e' (common in German)
|
78 |
+
Support for E/Z directly before double bond indication e.g. 2Z-ylidene, 2Z-ene
|
79 |
+
Improved support for functional class ethers e.g. "glycerol triglycidyl ether"
|
80 |
+
Added general support for names involving an ester formed from an alcohol and an ate group
|
81 |
+
Grignards reagents and certain compounds (e.g. uranium hexafluoride), are now treated as covalent rather than ionic
|
82 |
+
Added experimental support for outputting extended SMILES. Polymers and attachment points are annotated explicitly
|
83 |
+
Polymers when output as SMILES now have atom classes to indicate which end of the repeat unit is which
|
84 |
+
Support * as a superscript indicator e.g. *6* to mean superscript 6
|
85 |
+
Improved recognition of racemic stereochemistry terms
|
86 |
+
Added general support for names like "beta-alanine N,N-diacetic acid"
|
87 |
+
Allowed "one" and "ol" suffixes to be used in more cases where another suffix is also present
|
88 |
+
"ic acid halide" is not interpreted the same as "ic halide"
|
89 |
+
Fixed some cases where ambiguous operations were not considered ambiguous e.g. monosubstitututed phenyl
|
90 |
+
Improvements/bug fixes to heuristics for detecting when spaces are omitted from ether/ester names
|
91 |
+
Improved support for stereochemistry in older CAS index names
|
92 |
+
Many precision improvements e.g. cyclotriphosphazene, thiazoline, TBDMS/TBDPS protecting groups, S-substituted-methionine
|
93 |
+
Various minor bug fixes e.g. names containing "SULPH" not recognized
|
94 |
+
Minor vocabulary improvements
|
95 |
+
|
96 |
+
Internal XML Changes:
|
97 |
+
Synonymns of the same concept are now or-ed rather being seperate entities e.g. <token>tertiary|tert-|t-</token>
|
98 |
+
|
99 |
+
Version 2.0.0 (2015-07-10)
|
100 |
+
MAJOR CHANGES:
|
101 |
+
Requires Java 1.6 or higher
|
102 |
+
CML (Chemical Markup Language) is now returned as a String rather than a XOM Element
|
103 |
+
OPSIN now attempts to identify if a chemical name is ambiguous. Names that appear ambiguous return with a status of WARNING with the structure provided being one interpretation of the name
|
104 |
+
|
105 |
+
Added support for "alcohol esters" e.g. phenol acetate [meaning phenyl acetate]
|
106 |
+
Multiplied unlocanted substitution is now more intelligent e.g. all substituents must connect to same group, and degeneracy of atom environments is taken into account
|
107 |
+
The ester interpretation is now preferred in more cases where a name does not contain a space but the parent is methanoate/ethanoate/formate/acetate/carbamate
|
108 |
+
Inorganic oxides are now interpreted, yielding structures with [O-2] ions
|
109 |
+
Added more trivial names of simple molecules
|
110 |
+
Support for nitrolic acids
|
111 |
+
Fixed parsing issue where a directly substituted acetal was not interpretable
|
112 |
+
Fixed certain groups e.g. phenethyl, not having their suffix attached to a specific location
|
113 |
+
Corrected interpretation of xanthyl, and various trivial names that look systematic
|
114 |
+
Name to structure is now ~20% faster
|
115 |
+
Initialisation time reduced by a third
|
116 |
+
InChI generation is now ~20% faster
|
117 |
+
XML processing dependency changed from XOM to Woodstox
|
118 |
+
Significant internal refactoring
|
119 |
+
Utility functions designed for internal use are no longer on the public API
|
120 |
+
Various minor bug fixes
|
121 |
+
|
122 |
+
Internal XML Changes:
|
123 |
+
Groups lacking a labels attribute now have no locants (previously had ascending numeric locants)
|
124 |
+
Syntax for addGroup/addHeteroAtom/addBond attributes changed to be easier to parse and allow specification of whether the name is ambiguous if a locant is not provided
|
125 |
+
|
126 |
+
Version 1.6.0 (2014-04-26)
|
127 |
+
Added API/command-line options to generate StdInchiKeys
|
128 |
+
Added support for the IUPAC recommended nomenclature for carbobohydrate lactones
|
129 |
+
Added support for boronic acid pinacol esters
|
130 |
+
Added basic support for specifying chalcogen acid tautomer form e.g. thioacetic S-acid
|
131 |
+
Fused ring bridges are now numbered
|
132 |
+
Names with Endo/Exo/Syn/Anti stereochemistry can now be partially interpreted if warnRatherThanFailOnUninterpretableStereochemistry is used
|
133 |
+
The warnRatherThanFailOnUninterpretableStereochemistry option will now assign as much stereochemistry as OPSIN understands (All ignored stereochemistry terms are mentioned in the OpsinResult message)
|
134 |
+
Many minor nomenclature support improvements e.g. succinic imide; hexaldehyde; phenyldiazonium, organotrifluoroborates etc.
|
135 |
+
Added more trivial names that can be confused with systematic names e.g. Imidazolidinyl urea
|
136 |
+
Fixed StackOverFlowError that could occur when processing molecules with over 5000 atoms
|
137 |
+
Many minor bug fixes
|
138 |
+
Minor vocabulary improvements
|
139 |
+
Minor speed improvements
|
140 |
+
NOTE: This is the last release to support Java 1.5
|
141 |
+
|
142 |
+
Version 1.5.0 (2013-07-21)
|
143 |
+
Command line interface now accepts files to read and write to as arguments
|
144 |
+
Added option to allow interpretation of acids missing the word acid e.g. "acetic" (off by default)
|
145 |
+
Added option to treat uninterpretable stereochemistry as a warning rather than a failure (off by default)
|
146 |
+
Added support for nucleotide chains e.g. guanylyl(3'-5')uridine
|
147 |
+
Added support for parabens, azetidides, morpholides, piperazides, piperidides and pyrrolidides
|
148 |
+
Vocabulary improvements e.g. homo/beta amino acids
|
149 |
+
Many minor bug fixes e.g. fulminic acid correctly interpreted
|
150 |
+
|
151 |
+
Version 1.4.0 (2013-01-27)
|
152 |
+
Added support for dialdoses,diketoses,ketoaldoses,alditols,aldonic acids,uronic acids,aldaric acids,glycosides,oligosacchardides, named systematically or from trivial stems, in cyclic or acyclic form
|
153 |
+
Added support for ketoses named using dehydro
|
154 |
+
Added support for anhydro
|
155 |
+
Added more trivial carbohydrate names
|
156 |
+
Added support for sn-glcyerol
|
157 |
+
Improved heuristics for phospho substitution
|
158 |
+
Added hydrazido and anilate suffixes
|
159 |
+
Allowed more functional class nomenclature to apply to amino acids
|
160 |
+
Added support for inverting CAS names with substituted functional terms e.g. Acetaldehyde, O-methyloxime
|
161 |
+
Double substitution of a deoxy chiral centre now uses the CIP rules to decide which substituent replaced the hydroxy group
|
162 |
+
Unicode right arrows, superscripts and the soft hyphen are now recognised
|
163 |
+
|
164 |
+
Version 1.3.0 (2012-09-16)
|
165 |
+
Added option to output radicals as R groups (* in SMILES)
|
166 |
+
Added support for carbolactone/dicarboximide/lactam/lactim/lactone/olide/sultam/sultim/sultine/sultone suffixes
|
167 |
+
Resolved some cases of ambiguity in the grammar; the program's capability to handle longer peptide names is improved
|
168 |
+
Allowed one (as in ketone) before yl e.g. indol-2-on-3-yl
|
169 |
+
Allowed primed locants to be used as unprimed locants in a bracket e.g. 2-(4'-methylphenyl)pyridine
|
170 |
+
Vocabulary improvements
|
171 |
+
SMILES writer will no longer reuse ring closures on the same atom
|
172 |
+
Fixed case where a name formed of many words that could be parsed ambiguously would cause OPSIN to run out of memory
|
173 |
+
NameToStructure.getInstance() no longer throws a checked exception
|
174 |
+
Many minor bug fixes
|
175 |
+
|
176 |
+
Version 1.2.0 (2011-12-06)
|
177 |
+
OPSIN is now available from Maven Central
|
178 |
+
Basic support for cylised carbohydrates e.g. alpha-D-glucopyranose
|
179 |
+
Basic support for systematic carbohydrate stems e.g. D-glycero-D-gluco-Heptose
|
180 |
+
Added heuristic for correcting esters with omitted spaces
|
181 |
+
Added support for xanthates/xanthic acid
|
182 |
+
Minor vocabulary improvements
|
183 |
+
Fixed a few minor bugs/limitations in the Cahn-Ingold-Prelog rules implementation and made more memory efficient
|
184 |
+
Many minor improvements and bug fixes
|
185 |
+
|
186 |
+
Version 1.1.0 (2011-06-16)
|
187 |
+
Significant improvements to fused ring numbering code, specifically 3/4/5/7/8 member rings are no longer only allowed in chains of rings
|
188 |
+
Added support for outputting to StdInChI
|
189 |
+
Small improvements to fused ring building code
|
190 |
+
Improvements to heuristics for disambiguating what group is being referred to by a locant
|
191 |
+
Lower case indicated hydrogen is now recognised
|
192 |
+
Improvements to parsing speed
|
193 |
+
Many minor improvements and bug fixes
|
194 |
+
|
195 |
+
Version 1.0.0 (2011-03-09)
|
196 |
+
Added native isomeric SMILES output
|
197 |
+
Improved command-line interface. The desired format i.e. CML/SMILES/InChI as well as options such as allowing radicals can now all be specified via flags
|
198 |
+
Debugging is now performed using log4j rather than by passing a verbose flag
|
199 |
+
Added traditional locants to carboxylic acids and alkanes e.g. beta-hydroxybutyric acid
|
200 |
+
Added support for cis/trans indicating the relative stereochemistry of two substituents on rings and fused rings sytems
|
201 |
+
Added support for stoichiometry ratios and mixture indicators
|
202 |
+
Added support for alpha/beta stereochemistry on steroids
|
203 |
+
Added support for the method for naming spiro systems described in the 1979 recommendations rule A-42
|
204 |
+
Added detailedFailureAnalysis option to detect the part of a chemical name that fails to parse
|
205 |
+
Added support for deoxy
|
206 |
+
Added open-chain saccharides
|
207 |
+
Improvements to CAS index name uninversion algorithm
|
208 |
+
Added support for isotopes into the program allowing deuterio/tritio
|
209 |
+
Added support for R/S stereochemistry indicated by a locant which is also used to indicate the point of substitution for a substituent
|
210 |
+
Many minor improvements and bug fixes
|
211 |
+
|
212 |
+
Version 0.9.0 (2010-11-01)
|
213 |
+
Added transition metals/f-block elements and nobel gases
|
214 |
+
Added support for specifying the charge or oxidation number on elements e.g. aluminium(3+), iron(II)
|
215 |
+
Calculations based off a van Arkel diagram are now used to determine whether functional bonds to metals should be treated as ionic or covalent
|
216 |
+
Improved support for prefix functional replacement e.g. hydrazono/amido/imido/hydrazido/nitrido/pseudohalides can now be used for functional replacement on appropriate acids
|
217 |
+
Ortho/meta/para handling improved - can now only apply to six membered rings
|
218 |
+
Added support for methylenedioxy
|
219 |
+
Added support for simple bridge prefixes e.g. methano as in 2,3-methanoindene
|
220 |
+
Added support for perfluoro/perchloro/perbromo/periodo
|
221 |
+
Generalised alkane support to allow alkanes of lengths up to 9999 to be described without enumeration
|
222 |
+
Updated dependency on JNI-InChI to 0.7, hence InChI 1.03 is now used.
|
223 |
+
Improved algorithm for assigning unlocanted hydro terms
|
224 |
+
Improved heuristic for determing meaning of oxido
|
225 |
+
Improved charge balancing e.g. ionic substance of an implicit ratio 2:3 can now be handled rather than being represented as a net charged 1:1 mixture
|
226 |
+
Grammar is a bit more lenient of placement of stereochemistry and multipliers
|
227 |
+
Vocabulary improvements especially in the area of nucleosides and nucleotides
|
228 |
+
Esters of biochemical compounds e.g. triphosphates are now supported
|
229 |
+
Many minor improvements and bug fixes
|
230 |
+
|
231 |
+
Version 0.8.0 (2010-07-16)
|
232 |
+
NameToStructureConfig can now be used to configure whether radicals e.g. ethyl are output or not.
|
233 |
+
Names like carbon tetrachloride are now supported
|
234 |
+
glycol ethers e.g. ethylene glycol ethyl ether are now supported
|
235 |
+
Prefix functional replacement support now includes halogens e.g. chlorophosphate
|
236 |
+
Added support for epoxy/epithio/episeleno/epitelluro
|
237 |
+
Added suport for hydrazides/fluorohydrins/chlorohydrins/bromohydrins/iodohydrins/cyanohydrins/acetals/ketals/hemiacetals/hemiketals/diketones/disulfones named using functional class nomenclature
|
238 |
+
Improvements to algorithm for assigning and finding atoms corresponding to element symbol locants
|
239 |
+
Added experimental right to left parser (ReverseParseRules.java)
|
240 |
+
Vocabulary improvements
|
241 |
+
Parsing is now even faster
|
242 |
+
Various bug fixes and name intepretation fixes
|
243 |
+
|
244 |
+
Version 0.7.0 (2010-06-09)
|
245 |
+
Added full support for conjunctive nomenclature e.g. 1,3,5-benzenetriacetic acid
|
246 |
+
Added basic support for CAS names
|
247 |
+
Added trivial poly-noncarboxylic acids and more trivial carboxylic acids
|
248 |
+
Added support for spirobi/spiroter/dispiroter and the majority of spiro(ring-locant-ring) nomenclature
|
249 |
+
Indicators of the direction that a chemical rotates plane polarised light are now detected and ignored
|
250 |
+
Fixed many cases of trivial names being interpreted systematically by adding more trivial names and detecting such cases
|
251 |
+
Names such as oxalic bromide cyanide where a halide/pseudohalide replaces an oxygen are now supported
|
252 |
+
Amino acid ester named from the neutral amino acid are now supported e.g. glycine ethyl ester
|
253 |
+
Added more heteroatom replacement terms
|
254 |
+
Allowed creation of an OPSIN parse through NameToStructure.getOpsinParser()
|
255 |
+
Added support for dehydro - for unsaturating bonds
|
256 |
+
Improvements to element symbol locant assignment and retrieving appropriate atoms from locants like N2
|
257 |
+
OPSIN's SMILES parser now accept specification of number of hydrogens in cases other than chiral atoms
|
258 |
+
Mixtures specified by separating components by semicolonspace are now supported
|
259 |
+
Many internal improvements and bug fixes
|
260 |
+
|
261 |
+
Version 0.6.1 (2010-03-18)
|
262 |
+
Counter ions are now duplicated such as to lead to if possible a neutral compound
|
263 |
+
In names like nitrous amide the atoms modified by the functional replacement can now be substituted
|
264 |
+
Allowed ~number~ for specifying superscripts
|
265 |
+
Vocabulary improvements
|
266 |
+
Added quinone suffix
|
267 |
+
Tetrahedral sulfur stereochemistry is now recognised
|
268 |
+
Bug fixes to fix incorrect interpretation of some names e.g. triphosgene is now unparseable rather than 3 x phosghene, phospho has different meanings depending on whether it used on an amino acid or another group etc.
|
269 |
+
|
270 |
+
Version 0.6.0 (2010-02-18)
|
271 |
+
OPSIN is now a mavenised project consisting of two modules: core and inchi. Core does name -->CML, inchi depends on core and allows conversion to inchi
|
272 |
+
Instead of CML an OpsinResult can be returned which can yield information as to why a name was not interpretable
|
273 |
+
Added support for unlocanted R/S/E/Z stereochemistry. Removed limit on number of atoms that stereochemistry code can handle
|
274 |
+
Added support for polymers e.g. poly(ethylene)
|
275 |
+
Improvements in handling of multiplicative nomenclature
|
276 |
+
Improvements to fusion nomenclature handling: multiplied components and multi parent systems are now supported
|
277 |
+
Improved support for functional class nomenclature; space detection has been improved and support has been added for anhydride,oxide,oxime,hydrazone,semicarbazone,thiosemicarbazone,selenosemicarbazone,tellurosemicarbazone,imide
|
278 |
+
Support for the lambda convention
|
279 |
+
Locanted esters
|
280 |
+
Improvements in dearomatisation code
|
281 |
+
CML output changed to being CML-Lite compliant
|
282 |
+
Speed improvements
|
283 |
+
Support for greek letters e.g. as alpha or $a or α
|
284 |
+
Added more infixes
|
285 |
+
Added more suffixes
|
286 |
+
Vocabulary improvements
|
287 |
+
Systematic handling of amino acid nomenclature
|
288 |
+
Added support for perhydro
|
289 |
+
Support for ylium/uide
|
290 |
+
Support for locants like N-1 (instead of N1)
|
291 |
+
Fixed potential infinite loop in fused ring numbering
|
292 |
+
Made grammar more lenient in many places e.g. euphonic o, optional sqaure brackets
|
293 |
+
Sulph is now treated like sulf as in sulphuric acid
|
294 |
+
and many misc fixes and improvements
|
295 |
+
|
296 |
+
Version 0.5.3 (2009-10-22)
|
297 |
+
Added support for amic, aldehydic, anilic, anilide, carboxanilide and amoyl suffixes
|
298 |
+
Added support for cyclic imides e.g. succinimide/succinimido
|
299 |
+
Added support for amide functional class
|
300 |
+
Support for locants such as N5 which means a nitrogen that is attached in some way to position 5. Locants of this type may also be used in ester formation.
|
301 |
+
Some improvements to functional replacement using prefixes e.g. thioethanoic acid now works
|
302 |
+
Disabled stereochemistry in molecules with over 300 atoms as a temporary fix to the problem in 0.52
|
303 |
+
Slight improvement in method for deciding which group detachable hydro prefixes apply to.
|
304 |
+
Minor vocabulary update
|
305 |
+
|
306 |
+
Version 0.5.2 (2009-10-04)
|
307 |
+
Outputting directly to InChI is now supported using the separately available nameToInchi jar (an OPSIN jar is expected in the same location as the nameToInchi jar)
|
308 |
+
Fused rings with any number of rings in a chain or formed entirely of 6 membered rings can now be numbered
|
309 |
+
Added support for E/Z/R/S where locants are given. Unlocanted cases will be dealt with in a subsequent release. In very large molecules a lack of memory may be encountered, this will be resolved in a subsequent release
|
310 |
+
Some Infixes are now supported e.g. ethanthioic acid
|
311 |
+
All spiro systems with Von Baeyer brackets are now supported e.g. dispiro[4.2.4.2]tetradecane
|
312 |
+
Vocabulary increase (especially: terpenes, ingorganic acids, fused ring components)
|
313 |
+
Fixed some problems with components with both acylic and cyclic sections e.g. trityl
|
314 |
+
Improved locant assignments e.g. 2-furyl is now also fur-2-yl
|
315 |
+
Speed improvements
|
316 |
+
Removed dependence on Nux/Saxon
|
317 |
+
Misc minor fixes
|
318 |
+
|
319 |
+
Version 0.5.1 (2009-07-20)
|
320 |
+
Huge reduction in OPSIN initialisation time (typical ~7 seconds -->800ms)
|
321 |
+
Allowed thio/seleno/telluro as divalent linkers and for functional replacement when used as prefixes. Peroxy can now be used for functional replacement
|
322 |
+
Better support for semi-trivally named hydrocarbon fused rings e.g. tetracene
|
323 |
+
Better handling of carbonic acid derivatives
|
324 |
+
Improvements to locant assignment
|
325 |
+
Support for names like triethyltetramine and triethylene glycol
|
326 |
+
Misc other fixes to prevent OPSIN generating the wrong structure for certain types of names
|
327 |
+
|
328 |
+
Version 0.5 (2009-06-23)
|
329 |
+
Too many changes to list
|
330 |
+
|
331 |
+
Version 0.1 (2006-10-11)
|
332 |
+
Initial release
|
TransAntivirus/download_pubchem/opsin-master/fullAssembly.xml
ADDED
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<assembly>
|
2 |
+
<id>jar-with-dependencies</id>
|
3 |
+
<formats>
|
4 |
+
<format>jar</format>
|
5 |
+
</formats>
|
6 |
+
<moduleSets>
|
7 |
+
<moduleSet>
|
8 |
+
<includes>
|
9 |
+
<include>uk.ac.cam.ch.opsin:opsin-core</include>
|
10 |
+
<include>uk.ac.cam.ch.opsin:opsin-inchi</include>
|
11 |
+
<include>uk.ac.cam.ch.opsin:opsin-cli</include>
|
12 |
+
</includes>
|
13 |
+
<sources>
|
14 |
+
<includeModuleDirectory>false</includeModuleDirectory>
|
15 |
+
<fileSets>
|
16 |
+
<fileSet>
|
17 |
+
<directory>src/main/java</directory>
|
18 |
+
</fileSet>
|
19 |
+
</fileSets>
|
20 |
+
</sources>
|
21 |
+
<binaries>
|
22 |
+
<unpack>true</unpack>
|
23 |
+
</binaries>
|
24 |
+
</moduleSet>
|
25 |
+
</moduleSets>
|
26 |
+
<files>
|
27 |
+
<file>
|
28 |
+
<source>LICENSE.txt</source>
|
29 |
+
</file>
|
30 |
+
<file>
|
31 |
+
<source>README.md</source>
|
32 |
+
</file>
|
33 |
+
<file>
|
34 |
+
<source>ReleaseNotes.txt</source>
|
35 |
+
</file>
|
36 |
+
</files>
|
37 |
+
<includeBaseDirectory>false</includeBaseDirectory>
|
38 |
+
<dependencySets>
|
39 |
+
<dependencySet>
|
40 |
+
<unpack>true</unpack>
|
41 |
+
<scope>runtime</scope>
|
42 |
+
</dependencySet>
|
43 |
+
</dependencySets>
|
44 |
+
<fileSets>
|
45 |
+
<fileSet>
|
46 |
+
<directory>${project.build.outputDirectory}</directory>
|
47 |
+
</fileSet>
|
48 |
+
</fileSets>
|
49 |
+
</assembly>
|
TransAntivirus/download_pubchem/opsin-master/opsin-cli/pom.xml
ADDED
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
|
2 |
+
<modelVersion>4.0.0</modelVersion>
|
3 |
+
<parent>
|
4 |
+
<artifactId>opsin</artifactId>
|
5 |
+
<groupId>uk.ac.cam.ch.opsin</groupId>
|
6 |
+
<version>3.0-SNAPSHOT</version>
|
7 |
+
</parent>
|
8 |
+
<artifactId>opsin-cli</artifactId>
|
9 |
+
<name>OPSIN Command Line interface</name>
|
10 |
+
<description>Command line interface for using OPSIN to convert names to SMILES/InChI/InChIKey/CML</description>
|
11 |
+
<build>
|
12 |
+
<plugins>
|
13 |
+
<plugin>
|
14 |
+
<groupId>org.apache.maven.plugins</groupId>
|
15 |
+
<artifactId>maven-shade-plugin</artifactId>
|
16 |
+
<version>3.2.4</version>
|
17 |
+
<executions>
|
18 |
+
<execution>
|
19 |
+
<phase>package</phase>
|
20 |
+
<goals>
|
21 |
+
<goal>shade</goal>
|
22 |
+
</goals>
|
23 |
+
<configuration>
|
24 |
+
<finalName>opsin-${project.version}</finalName>
|
25 |
+
<createDependencyReducedPom>false</createDependencyReducedPom>
|
26 |
+
<transformers>
|
27 |
+
<transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
|
28 |
+
<manifestEntries>
|
29 |
+
<Main-Class>uk.ac.cam.ch.wwmm.opsin.Cli</Main-Class>
|
30 |
+
</manifestEntries>
|
31 |
+
</transformer>
|
32 |
+
</transformers>
|
33 |
+
</configuration>
|
34 |
+
</execution>
|
35 |
+
</executions>
|
36 |
+
</plugin>
|
37 |
+
</plugins>
|
38 |
+
</build>
|
39 |
+
<dependencies>
|
40 |
+
<dependency>
|
41 |
+
<groupId>uk.ac.cam.ch.opsin</groupId>
|
42 |
+
<artifactId>opsin-inchi</artifactId>
|
43 |
+
</dependency>
|
44 |
+
<dependency>
|
45 |
+
<groupId>commons-cli</groupId>
|
46 |
+
<artifactId>commons-cli</artifactId>
|
47 |
+
</dependency>
|
48 |
+
<dependency>
|
49 |
+
<groupId>org.apache.logging.log4j</groupId>
|
50 |
+
<artifactId>log4j-core</artifactId>
|
51 |
+
</dependency>
|
52 |
+
<dependency>
|
53 |
+
<groupId>org.junit.jupiter</groupId>
|
54 |
+
<artifactId>junit-jupiter</artifactId>
|
55 |
+
<scope>test</scope>
|
56 |
+
</dependency>
|
57 |
+
</dependencies>
|
58 |
+
</project>
|
TransAntivirus/download_pubchem/opsin-master/opsin-cli/src/main/java/uk/ac/cam/ch/wwmm/opsin/Cli.java
ADDED
@@ -0,0 +1,268 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
package uk.ac.cam.ch.wwmm.opsin;
|
2 |
+
|
3 |
+
import java.io.BufferedReader;
|
4 |
+
import java.io.BufferedWriter;
|
5 |
+
import java.io.File;
|
6 |
+
import java.io.FileInputStream;
|
7 |
+
import java.io.FileOutputStream;
|
8 |
+
import java.io.IOException;
|
9 |
+
import java.io.InputStream;
|
10 |
+
import java.io.InputStreamReader;
|
11 |
+
import java.io.OutputStream;
|
12 |
+
import java.io.OutputStreamWriter;
|
13 |
+
import java.lang.reflect.Method;
|
14 |
+
import java.nio.charset.StandardCharsets;
|
15 |
+
|
16 |
+
import javax.xml.stream.XMLOutputFactory;
|
17 |
+
import javax.xml.stream.XMLStreamException;
|
18 |
+
import javax.xml.stream.XMLStreamWriter;
|
19 |
+
|
20 |
+
import org.apache.commons.cli.CommandLine;
|
21 |
+
import org.apache.commons.cli.CommandLineParser;
|
22 |
+
import org.apache.commons.cli.DefaultParser;
|
23 |
+
import org.apache.commons.cli.HelpFormatter;
|
24 |
+
import org.apache.commons.cli.Option;
|
25 |
+
import org.apache.commons.cli.Option.Builder;
|
26 |
+
import org.apache.commons.cli.Options;
|
27 |
+
import org.apache.commons.cli.UnrecognizedOptionException;
|
28 |
+
import org.apache.logging.log4j.Level;
|
29 |
+
import org.apache.logging.log4j.core.config.Configurator;
|
30 |
+
|
31 |
+
import com.ctc.wstx.api.WstxOutputProperties;
|
32 |
+
import com.ctc.wstx.stax.WstxOutputFactory;
|
33 |
+
|
34 |
+
public class Cli {
|
35 |
+
|
36 |
+
private enum InchiType {
|
37 |
+
inchiWithFixedH, stdInchi, stdInchiKey
|
38 |
+
}
|
39 |
+
|
40 |
+
/**
|
41 |
+
* Run OPSIN as a command-line application.
|
42 |
+
*
|
43 |
+
* @param args
|
44 |
+
* @throws Exception
|
45 |
+
*/
|
46 |
+
public static void main(String[] args) throws Exception {
|
47 |
+
Options options = buildCommandLineOptions();
|
48 |
+
CommandLineParser parser = new DefaultParser();
|
49 |
+
CommandLine cmd = null;
|
50 |
+
try {
|
51 |
+
cmd = parser.parse(options, args);
|
52 |
+
} catch (UnrecognizedOptionException e) {
|
53 |
+
System.err.println(e.getMessage());
|
54 |
+
System.exit(1);
|
55 |
+
}
|
56 |
+
if (cmd.hasOption("h")) {
|
57 |
+
displayUsage(options);
|
58 |
+
}
|
59 |
+
if (cmd.hasOption("v")) {
|
60 |
+
Configurator.setLevel("uk.ac.cam.ch.wwmm.opsin", Level.DEBUG);
|
61 |
+
}
|
62 |
+
|
63 |
+
NameToStructureConfig n2sconfig = generateOpsinConfigObjectFromCmd(cmd);
|
64 |
+
|
65 |
+
InputStream input = System.in;
|
66 |
+
OutputStream output = System.out;
|
67 |
+
String[] unparsedArgs = cmd.getArgs();
|
68 |
+
if (unparsedArgs.length == 0) {
|
69 |
+
System.err.println("Run the jar using the -h flag for help. Enter a chemical name to begin:");
|
70 |
+
} else if (unparsedArgs.length == 1) {
|
71 |
+
input = new FileInputStream(new File(unparsedArgs[0]));
|
72 |
+
} else if (unparsedArgs.length == 2) {
|
73 |
+
input = new FileInputStream(new File(unparsedArgs[0]));
|
74 |
+
output = new FileOutputStream(new File(unparsedArgs[1]));
|
75 |
+
} else {
|
76 |
+
displayUsage(options);
|
77 |
+
}
|
78 |
+
try {
|
79 |
+
String outputType = cmd.getOptionValue("o", "smi");
|
80 |
+
boolean outputName = cmd.hasOption("n");
|
81 |
+
if (outputType.equalsIgnoreCase("cml")) {
|
82 |
+
interactiveCmlOutput(input, output, n2sconfig);
|
83 |
+
} else if (outputType.equalsIgnoreCase("smi") || outputType.equalsIgnoreCase("smiles")) {
|
84 |
+
interactiveSmilesOutput(input, output, n2sconfig, false, outputName);
|
85 |
+
} else if (outputType.equalsIgnoreCase("inchi")) {
|
86 |
+
interactiveInchiOutput(input, output, n2sconfig, InchiType.inchiWithFixedH, outputName);
|
87 |
+
} else if (outputType.equalsIgnoreCase("stdinchi")) {
|
88 |
+
interactiveInchiOutput(input, output, n2sconfig, InchiType.stdInchi, outputName);
|
89 |
+
} else if (outputType.equalsIgnoreCase("stdinchikey")) {
|
90 |
+
interactiveInchiOutput(input, output, n2sconfig, InchiType.stdInchiKey, outputName);
|
91 |
+
} else if (outputType.equalsIgnoreCase("extendedsmi") || outputType.equalsIgnoreCase("extendedsmiles")
|
92 |
+
|| outputType.equalsIgnoreCase("cxsmi") || outputType.equalsIgnoreCase("cxsmiles")) {
|
93 |
+
interactiveSmilesOutput(input, output, n2sconfig, true, outputName);
|
94 |
+
} else {
|
95 |
+
System.err.println("Unrecognised output format: " + outputType);
|
96 |
+
System.err.println(
|
97 |
+
"Expected output types are \"cml\", \"smi\", \"inchi\", \"stdinchi\" and \"stdinchikey\"");
|
98 |
+
System.exit(1);
|
99 |
+
}
|
100 |
+
} finally {
|
101 |
+
if (output != System.out) {
|
102 |
+
output.close();
|
103 |
+
}
|
104 |
+
if (input != System.in) {
|
105 |
+
input.close();
|
106 |
+
}
|
107 |
+
}
|
108 |
+
}
|
109 |
+
|
110 |
+
private static void displayUsage(Options options) {
|
111 |
+
HelpFormatter formatter = new HelpFormatter();
|
112 |
+
String version = NameToStructure.getVersion();
|
113 |
+
formatter.printHelp("java -jar opsin-" + (version != null ? version : "[version]")
|
114 |
+
+ "-jar-with-dependencies.jar [options] [inputfile] [outputfile]" + OpsinTools.NEWLINE
|
115 |
+
+ "OPSIN converts systematic chemical names to CML, SMILES or InChI/StdInChI/StdInChIKey"
|
116 |
+
+ OpsinTools.NEWLINE
|
117 |
+
+ "Names should be new line delimited and may be read from stdin (default) or a file and output to stdout (default) or a file",
|
118 |
+
options);
|
119 |
+
System.exit(0);
|
120 |
+
}
|
121 |
+
|
122 |
+
private static Options buildCommandLineOptions() {
|
123 |
+
Options options = new Options();
|
124 |
+
Builder outputBuilder = Option.builder("o");
|
125 |
+
outputBuilder.longOpt("output");
|
126 |
+
outputBuilder.hasArg();
|
127 |
+
outputBuilder.argName("format");
|
128 |
+
StringBuilder outputOptionsDesc = new StringBuilder();
|
129 |
+
outputOptionsDesc.append("Sets OPSIN's output format (default smi)").append(OpsinTools.NEWLINE);
|
130 |
+
outputOptionsDesc.append("Allowed values are:").append(OpsinTools.NEWLINE);
|
131 |
+
outputOptionsDesc.append("cml for Chemical Markup Language").append(OpsinTools.NEWLINE);
|
132 |
+
outputOptionsDesc.append("smi for SMILES").append(OpsinTools.NEWLINE);
|
133 |
+
outputOptionsDesc.append("extendedsmi for Extended SMILES").append(OpsinTools.NEWLINE);
|
134 |
+
outputOptionsDesc.append("inchi for InChI (with FixedH)").append(OpsinTools.NEWLINE);
|
135 |
+
outputOptionsDesc.append("stdinchi for StdInChI").append(OpsinTools.NEWLINE);
|
136 |
+
outputOptionsDesc.append("stdinchikey for StdInChIKey");
|
137 |
+
outputBuilder.desc(outputOptionsDesc.toString());
|
138 |
+
options.addOption(outputBuilder.build());
|
139 |
+
options.addOption("h", "help", false, "Displays the allowed command line flags");
|
140 |
+
options.addOption("v", "verbose", false, "Enables debugging");
|
141 |
+
|
142 |
+
options.addOption("a", "allowAcidsWithoutAcid", false,
|
143 |
+
"Allows interpretation of acids without the word acid e.g. \"acetic\"");
|
144 |
+
options.addOption("f", "detailedFailureAnalysis", false,
|
145 |
+
"Enables reverse parsing to more accurately determine why parsing failed");
|
146 |
+
options.addOption("n", "name", false, "Include name in SMILES/InChI output (tab delimited)");
|
147 |
+
options.addOption("r", "allowRadicals", false, "Enables interpretation of radicals");
|
148 |
+
options.addOption("s", "allowUninterpretableStereo", false,
|
149 |
+
"Allows stereochemistry uninterpretable by OPSIN to be ignored");
|
150 |
+
options.addOption("w", "wildcardRadicals", false, "Radicals are output as wildcard atoms");
|
151 |
+
return options;
|
152 |
+
}
|
153 |
+
|
154 |
+
/**
|
155 |
+
* Uses the command line parameters to configure a new NameToStructureConfig
|
156 |
+
*
|
157 |
+
* @param cmd
|
158 |
+
* @return The configured NameToStructureConfig
|
159 |
+
*/
|
160 |
+
private static NameToStructureConfig generateOpsinConfigObjectFromCmd(CommandLine cmd) {
|
161 |
+
NameToStructureConfig n2sconfig = new NameToStructureConfig();
|
162 |
+
n2sconfig.setInterpretAcidsWithoutTheWordAcid(cmd.hasOption("a"));
|
163 |
+
n2sconfig.setDetailedFailureAnalysis(cmd.hasOption("f"));
|
164 |
+
n2sconfig.setAllowRadicals(cmd.hasOption("r"));
|
165 |
+
n2sconfig.setWarnRatherThanFailOnUninterpretableStereochemistry(cmd.hasOption("s"));
|
166 |
+
n2sconfig.setOutputRadicalsAsWildCardAtoms(cmd.hasOption("w"));
|
167 |
+
return n2sconfig;
|
168 |
+
}
|
169 |
+
|
170 |
+
private static void interactiveCmlOutput(InputStream input, OutputStream out, NameToStructureConfig n2sconfig) throws IOException, XMLStreamException {
|
171 |
+
NameToStructure nts = NameToStructure.getInstance();
|
172 |
+
BufferedReader inputReader = new BufferedReader(new InputStreamReader(input, StandardCharsets.UTF_8));
|
173 |
+
XMLOutputFactory factory = new WstxOutputFactory();
|
174 |
+
factory.setProperty(WstxOutputProperties.P_OUTPUT_ESCAPE_CR, false);
|
175 |
+
XMLStreamWriter writer = factory.createXMLStreamWriter(out, "UTF-8");
|
176 |
+
writer = new IndentingXMLStreamWriter(writer, 2);
|
177 |
+
writer.writeStartDocument();
|
178 |
+
CMLWriter cmlWriter = new CMLWriter(writer);
|
179 |
+
cmlWriter.writeCmlStart();
|
180 |
+
int id = 1;
|
181 |
+
String line;
|
182 |
+
while ((line = inputReader.readLine()) != null) {
|
183 |
+
int splitPoint = line.indexOf('\t');
|
184 |
+
String name = splitPoint >= 0 ? line.substring(0, splitPoint) : line;
|
185 |
+
OpsinResult result = nts.parseChemicalName(name, n2sconfig);
|
186 |
+
Fragment structure = result.getStructure();
|
187 |
+
cmlWriter.writeMolecule(structure, name, id++);
|
188 |
+
writer.flush();
|
189 |
+
if (structure == null) {
|
190 |
+
System.err.println(result.getMessage());
|
191 |
+
}
|
192 |
+
}
|
193 |
+
cmlWriter.writeCmlEnd();
|
194 |
+
writer.writeEndDocument();
|
195 |
+
writer.flush();
|
196 |
+
writer.close();
|
197 |
+
}
|
198 |
+
|
199 |
+
private static void interactiveSmilesOutput(InputStream input, OutputStream out, NameToStructureConfig n2sconfig, boolean extendedSmiles, boolean outputName) throws IOException {
|
200 |
+
NameToStructure nts = NameToStructure.getInstance();
|
201 |
+
BufferedReader inputReader = new BufferedReader(new InputStreamReader(input, StandardCharsets.UTF_8));
|
202 |
+
BufferedWriter outputWriter = new BufferedWriter(new OutputStreamWriter(out, StandardCharsets.UTF_8));
|
203 |
+
String line;
|
204 |
+
while ((line = inputReader.readLine()) != null) {
|
205 |
+
int splitPoint = line.indexOf('\t');
|
206 |
+
String name = splitPoint >= 0 ? line.substring(0, splitPoint) : line;
|
207 |
+
OpsinResult result = nts.parseChemicalName(name, n2sconfig);
|
208 |
+
String output = extendedSmiles ? result.getExtendedSmiles() : result.getSmiles();
|
209 |
+
if (output == null) {
|
210 |
+
System.err.println(result.getMessage());
|
211 |
+
} else {
|
212 |
+
outputWriter.write(output);
|
213 |
+
}
|
214 |
+
if (outputName) {
|
215 |
+
outputWriter.write('\t');
|
216 |
+
outputWriter.write(line);
|
217 |
+
}
|
218 |
+
outputWriter.newLine();
|
219 |
+
outputWriter.flush();
|
220 |
+
}
|
221 |
+
}
|
222 |
+
|
223 |
+
private static void interactiveInchiOutput(InputStream input, OutputStream out, NameToStructureConfig n2sconfig, InchiType inchiType, boolean outputName) throws Exception {
|
224 |
+
NameToStructure nts = NameToStructure.getInstance();
|
225 |
+
BufferedReader inputReader = new BufferedReader(new InputStreamReader(input, StandardCharsets.UTF_8));
|
226 |
+
BufferedWriter outputWriter = new BufferedWriter(new OutputStreamWriter(out, StandardCharsets.UTF_8));
|
227 |
+
Class<?> c;
|
228 |
+
try {
|
229 |
+
c = Class.forName("uk.ac.cam.ch.wwmm.opsin.NameToInchi");
|
230 |
+
} catch (ClassNotFoundException e) {
|
231 |
+
System.err.println("Could not initialise NameToInChI module. Is it on your classpath?");
|
232 |
+
throw new RuntimeException(e);
|
233 |
+
}
|
234 |
+
Method m;
|
235 |
+
switch (inchiType) {
|
236 |
+
case inchiWithFixedH:
|
237 |
+
m = c.getMethod("convertResultToInChI", new Class[] { OpsinResult.class });
|
238 |
+
break;
|
239 |
+
case stdInchi:
|
240 |
+
m = c.getMethod("convertResultToStdInChI", new Class[] { OpsinResult.class });
|
241 |
+
break;
|
242 |
+
case stdInchiKey:
|
243 |
+
m = c.getMethod("convertResultToStdInChIKey", new Class[] { OpsinResult.class });
|
244 |
+
break;
|
245 |
+
default:
|
246 |
+
throw new IllegalArgumentException("Unexepected enum value: " + inchiType);
|
247 |
+
}
|
248 |
+
|
249 |
+
String line;
|
250 |
+
while ((line = inputReader.readLine()) != null) {
|
251 |
+
int splitPoint = line.indexOf('\t');
|
252 |
+
String name = splitPoint >= 0 ? line.substring(0, splitPoint) : line;
|
253 |
+
OpsinResult result = nts.parseChemicalName(name, n2sconfig);
|
254 |
+
String output = (String) m.invoke(null, result);
|
255 |
+
if (output == null) {
|
256 |
+
System.err.println(result.getMessage());
|
257 |
+
} else {
|
258 |
+
outputWriter.write(output);
|
259 |
+
}
|
260 |
+
if (outputName) {
|
261 |
+
outputWriter.write('\t');
|
262 |
+
outputWriter.write(line);
|
263 |
+
}
|
264 |
+
outputWriter.newLine();
|
265 |
+
outputWriter.flush();
|
266 |
+
}
|
267 |
+
}
|
268 |
+
}
|
TransAntivirus/download_pubchem/opsin-master/opsin-cli/src/main/resources/log4j2.xml
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<?xml version="1.0" encoding="UTF-8"?>
|
2 |
+
<Configuration status="WARN">
|
3 |
+
<Appenders>
|
4 |
+
<Console name="Console" target="SYSTEM_ERR">
|
5 |
+
<PatternLayout pattern="%level - %m%n"/>
|
6 |
+
</Console>
|
7 |
+
</Appenders>
|
8 |
+
<Loggers>
|
9 |
+
<Root level="warn">
|
10 |
+
<AppenderRef ref="Console"/>
|
11 |
+
</Root>
|
12 |
+
</Loggers>
|
13 |
+
</Configuration>
|
TransAntivirus/download_pubchem/opsin-master/opsin-core/pom.xml
ADDED
@@ -0,0 +1,67 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
|
2 |
+
<modelVersion>4.0.0</modelVersion>
|
3 |
+
<parent>
|
4 |
+
<artifactId>opsin</artifactId>
|
5 |
+
<groupId>uk.ac.cam.ch.opsin</groupId>
|
6 |
+
<version>3.0-SNAPSHOT</version>
|
7 |
+
</parent>
|
8 |
+
<artifactId>opsin-core</artifactId>
|
9 |
+
<name>OPSIN Core</name>
|
10 |
+
<description>Core files of OPSIN. Allows conversion of chemical names to CML (Chemical Markup Language)</description>
|
11 |
+
<build>
|
12 |
+
<resources>
|
13 |
+
<resource>
|
14 |
+
<directory>src/main/resources</directory>
|
15 |
+
<filtering>true</filtering>
|
16 |
+
<includes>
|
17 |
+
<include>**/*.props</include>
|
18 |
+
</includes>
|
19 |
+
</resource>
|
20 |
+
<resource>
|
21 |
+
<directory>src/main/resources</directory>
|
22 |
+
<filtering>false</filtering>
|
23 |
+
<excludes>
|
24 |
+
<exclude>**/*.props</exclude>
|
25 |
+
</excludes>
|
26 |
+
</resource>
|
27 |
+
</resources>
|
28 |
+
</build>
|
29 |
+
<dependencies>
|
30 |
+
<dependency>
|
31 |
+
<groupId>dk.brics</groupId>
|
32 |
+
<artifactId>automaton</artifactId>
|
33 |
+
</dependency>
|
34 |
+
<dependency>
|
35 |
+
<groupId>org.codehaus.woodstox</groupId>
|
36 |
+
<artifactId>woodstox-core-asl</artifactId>
|
37 |
+
</dependency>
|
38 |
+
<dependency>
|
39 |
+
<groupId>commons-io</groupId>
|
40 |
+
<artifactId>commons-io</artifactId>
|
41 |
+
</dependency>
|
42 |
+
<dependency>
|
43 |
+
<groupId>org.apache.logging.log4j</groupId>
|
44 |
+
<artifactId>log4j-api</artifactId>
|
45 |
+
</dependency>
|
46 |
+
<dependency>
|
47 |
+
<groupId>org.junit.jupiter</groupId>
|
48 |
+
<artifactId>junit-jupiter</artifactId>
|
49 |
+
<scope>test</scope>
|
50 |
+
</dependency>
|
51 |
+
<dependency>
|
52 |
+
<groupId>org.hamcrest</groupId>
|
53 |
+
<artifactId>hamcrest-library</artifactId>
|
54 |
+
<scope>test</scope>
|
55 |
+
</dependency>
|
56 |
+
<dependency>
|
57 |
+
<groupId>org.mockito</groupId>
|
58 |
+
<artifactId>mockito-core</artifactId>
|
59 |
+
<scope>test</scope>
|
60 |
+
</dependency>
|
61 |
+
<dependency>
|
62 |
+
<groupId>org.apache.logging.log4j</groupId>
|
63 |
+
<artifactId>log4j-core</artifactId>
|
64 |
+
<scope>test</scope>
|
65 |
+
</dependency>
|
66 |
+
</dependencies>
|
67 |
+
</project>
|
TransAntivirus/download_pubchem/opsin-master/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/AmbiguityChecker.java
ADDED
@@ -0,0 +1,214 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
package uk.ac.cam.ch.wwmm.opsin;
|
2 |
+
|
3 |
+
import java.util.ArrayDeque;
|
4 |
+
import java.util.ArrayList;
|
5 |
+
import java.util.Collection;
|
6 |
+
import java.util.Deque;
|
7 |
+
import java.util.HashMap;
|
8 |
+
import java.util.HashSet;
|
9 |
+
import java.util.LinkedHashSet;
|
10 |
+
import java.util.List;
|
11 |
+
import java.util.Map;
|
12 |
+
import java.util.Set;
|
13 |
+
|
14 |
+
class AmbiguityChecker {
|
15 |
+
|
16 |
+
static boolean isSubstitutionAmbiguous(List<Atom> substitutableAtoms, int numberToBeSubstituted) {
|
17 |
+
if (substitutableAtoms.size() == 0) {
|
18 |
+
throw new IllegalArgumentException("OPSIN Bug: Must provide at least one substituable atom");
|
19 |
+
}
|
20 |
+
if (substitutableAtoms.size() < numberToBeSubstituted) {
|
21 |
+
throw new IllegalArgumentException("OPSIN Bug: substitutableAtoms must be >= numberToBeSubstituted");
|
22 |
+
}
|
23 |
+
if (substitutableAtoms.size() == numberToBeSubstituted){
|
24 |
+
return false;
|
25 |
+
}
|
26 |
+
if (allAtomsConnectToDefaultInAtom(substitutableAtoms, numberToBeSubstituted)) {
|
27 |
+
return false;
|
28 |
+
}
|
29 |
+
Set<Atom> uniqueAtoms = new HashSet<>(substitutableAtoms);
|
30 |
+
if (uniqueAtoms.size() == 1) {
|
31 |
+
return false;
|
32 |
+
}
|
33 |
+
if (allAtomsEquivalent(uniqueAtoms) && (numberToBeSubstituted == 1 || numberToBeSubstituted == substitutableAtoms.size() - 1)){
|
34 |
+
return false;
|
35 |
+
}
|
36 |
+
return true;
|
37 |
+
}
|
38 |
+
|
39 |
+
static boolean allAtomsEquivalent(Collection<Atom> atoms) {
|
40 |
+
StereoAnalyser analyser = analyseRelevantAtomsAndBonds(atoms);
|
41 |
+
Set<String> uniqueEnvironments = new HashSet<>();
|
42 |
+
for (Atom a : atoms) {
|
43 |
+
uniqueEnvironments.add(getAtomEnviron(analyser, a));
|
44 |
+
}
|
45 |
+
return uniqueEnvironments.size() == 1;
|
46 |
+
}
|
47 |
+
|
48 |
+
static boolean allBondsEquivalent(Collection<Bond> bonds) {
|
49 |
+
Set<Atom> relevantAtoms = new HashSet<>();
|
50 |
+
for (Bond b : bonds) {
|
51 |
+
relevantAtoms.add(b.getFromAtom());
|
52 |
+
relevantAtoms.add(b.getToAtom());
|
53 |
+
}
|
54 |
+
StereoAnalyser analyser = analyseRelevantAtomsAndBonds(relevantAtoms);
|
55 |
+
Set<String> uniqueBonds = new HashSet<>();
|
56 |
+
for (Bond b : bonds) {
|
57 |
+
uniqueBonds.add(bondToCanonicalEnvironString(analyser, b));
|
58 |
+
}
|
59 |
+
return uniqueBonds.size() == 1;
|
60 |
+
}
|
61 |
+
|
62 |
+
private static String bondToCanonicalEnvironString(StereoAnalyser analyser, Bond b) {
|
63 |
+
String s1 = getAtomEnviron(analyser, b.getFromAtom());
|
64 |
+
String s2 = getAtomEnviron(analyser, b.getToAtom());
|
65 |
+
if (s1.compareTo(s2) > 0){
|
66 |
+
return s1 + s2;
|
67 |
+
}
|
68 |
+
else {
|
69 |
+
return s2 + s1;
|
70 |
+
}
|
71 |
+
}
|
72 |
+
|
73 |
+
static String getAtomEnviron(StereoAnalyser analyser, Atom a) {
|
74 |
+
Integer env = analyser.getAtomEnvironmentNumber(a);
|
75 |
+
if (env == null) {
|
76 |
+
throw new RuntimeException("OPSIN Bug: Atom was not part of ambiguity analysis");
|
77 |
+
}
|
78 |
+
//"identical" atoms may be distinguished by bonds yet to be formed, hence split by outvalency
|
79 |
+
// e.g. [PH3] vs [PH3]=
|
80 |
+
return env + "\t" + a.getOutValency();
|
81 |
+
}
|
82 |
+
|
83 |
+
private static boolean allAtomsConnectToDefaultInAtom(List<Atom> substitutableAtoms, int numberToBeSubstituted) {
|
84 |
+
Atom defaultInAtom = substitutableAtoms.get(0).getFrag().getDefaultInAtom();
|
85 |
+
if (defaultInAtom != null) {
|
86 |
+
for (int i = 0; i < numberToBeSubstituted; i++) {
|
87 |
+
if (!substitutableAtoms.get(i).equals(defaultInAtom)) {
|
88 |
+
return false;
|
89 |
+
}
|
90 |
+
}
|
91 |
+
return true;
|
92 |
+
}
|
93 |
+
return false;
|
94 |
+
}
|
95 |
+
|
96 |
+
static StereoAnalyser analyseRelevantAtomsAndBonds(Collection<Atom> startingAtoms) {
|
97 |
+
Set<Atom> atoms = new HashSet<>();
|
98 |
+
Set<Bond> bonds = new HashSet<>();
|
99 |
+
Deque<Atom> stack = new ArrayDeque<>(startingAtoms);
|
100 |
+
while (!stack.isEmpty()) {
|
101 |
+
Atom a = stack.removeLast();
|
102 |
+
if (!atoms.contains(a)) {
|
103 |
+
atoms.add(a);
|
104 |
+
for (Bond b : a.getBonds()) {
|
105 |
+
bonds.add(b);
|
106 |
+
stack.add(b.getOtherAtom(a));
|
107 |
+
}
|
108 |
+
}
|
109 |
+
}
|
110 |
+
|
111 |
+
List<Atom> ghostHydrogens = new ArrayList<>();
|
112 |
+
for (Atom atom : atoms) {
|
113 |
+
int explicitHydrogensToAdd = StructureBuildingMethods.calculateSubstitutableHydrogenAtoms(atom);
|
114 |
+
for (int i = 0; i < explicitHydrogensToAdd; i++) {
|
115 |
+
Atom ghostHydrogen = new Atom(ChemEl.H);
|
116 |
+
Bond b = new Bond(ghostHydrogen, atom, 1);
|
117 |
+
atom.addBond(b);
|
118 |
+
ghostHydrogen.addBond(b);
|
119 |
+
ghostHydrogens.add(ghostHydrogen);
|
120 |
+
}
|
121 |
+
}
|
122 |
+
atoms.addAll(ghostHydrogens);
|
123 |
+
StereoAnalyser analyzer = new StereoAnalyser(atoms, bonds);
|
124 |
+
for (Atom ghostHydrogen : ghostHydrogens) {
|
125 |
+
Bond b = ghostHydrogen.getFirstBond();
|
126 |
+
b.getOtherAtom(ghostHydrogen).removeBond(b);
|
127 |
+
}
|
128 |
+
return analyzer;
|
129 |
+
}
|
130 |
+
|
131 |
+
static List<Atom> useAtomEnvironmentsToGivePlausibleSubstitution(List<Atom> substitutableAtoms, int numberToBeSubstituted) {
|
132 |
+
if (substitutableAtoms.size() == 0) {
|
133 |
+
throw new IllegalArgumentException("OPSIN Bug: Must provide at least one substituable atom");
|
134 |
+
}
|
135 |
+
if (substitutableAtoms.size() < numberToBeSubstituted) {
|
136 |
+
throw new IllegalArgumentException("OPSIN Bug: substitutableAtoms must be >= numberToBeSubstituted");
|
137 |
+
}
|
138 |
+
if (substitutableAtoms.size() == numberToBeSubstituted){
|
139 |
+
return substitutableAtoms;
|
140 |
+
}
|
141 |
+
|
142 |
+
List<Atom> preferredAtoms = findPlausibleSubstitutionPatternUsingSymmmetry(substitutableAtoms, numberToBeSubstituted);
|
143 |
+
if (preferredAtoms != null){
|
144 |
+
return preferredAtoms;
|
145 |
+
}
|
146 |
+
return findPlausibleSubstitutionPatternUsingLocalEnvironment(substitutableAtoms, numberToBeSubstituted);
|
147 |
+
}
|
148 |
+
|
149 |
+
private static List<Atom> findPlausibleSubstitutionPatternUsingSymmmetry(List<Atom> substitutableAtoms, int numberToBeSubstituted) {
|
150 |
+
//cf. octaethylporphyrin (8 identical atoms capable of substitution)
|
151 |
+
StereoAnalyser analyser = analyseRelevantAtomsAndBonds(new HashSet<>(substitutableAtoms));
|
152 |
+
Map<String, List<Atom>> atomsInEachEnvironment = new HashMap<>();
|
153 |
+
for (Atom a : substitutableAtoms) {
|
154 |
+
String env = getAtomEnviron(analyser, a);
|
155 |
+
List<Atom> atomsInEnvironment = atomsInEachEnvironment.get(env);
|
156 |
+
if (atomsInEnvironment == null) {
|
157 |
+
atomsInEnvironment = new ArrayList<>();
|
158 |
+
atomsInEachEnvironment.put(env, atomsInEnvironment);
|
159 |
+
}
|
160 |
+
atomsInEnvironment.add(a);
|
161 |
+
}
|
162 |
+
List<Atom> preferredAtoms = null;
|
163 |
+
for (List<Atom> atoms : atomsInEachEnvironment.values()) {
|
164 |
+
if (atoms.size() == numberToBeSubstituted){
|
165 |
+
if (preferredAtoms != null){
|
166 |
+
return null;
|
167 |
+
}
|
168 |
+
preferredAtoms = atoms;
|
169 |
+
}
|
170 |
+
}
|
171 |
+
if (preferredAtoms == null) {
|
172 |
+
//check for environments with double the required atoms where this means each atom can support two substitutions c.f. cyclohexane
|
173 |
+
for (List<Atom> atoms : atomsInEachEnvironment.values()) {
|
174 |
+
if (atoms.size() == (numberToBeSubstituted * 2)){
|
175 |
+
Set<Atom> uniquified = new LinkedHashSet<>(atoms);//retain deterministic atom ordering
|
176 |
+
if (uniquified.size() == numberToBeSubstituted) {
|
177 |
+
if (preferredAtoms != null){
|
178 |
+
return null;
|
179 |
+
}
|
180 |
+
preferredAtoms = new ArrayList<>(uniquified);
|
181 |
+
}
|
182 |
+
}
|
183 |
+
}
|
184 |
+
}
|
185 |
+
return preferredAtoms;
|
186 |
+
}
|
187 |
+
|
188 |
+
private static List<Atom> findPlausibleSubstitutionPatternUsingLocalEnvironment(List<Atom> substitutableAtoms, int numberToBeSubstituted) {
|
189 |
+
//cf. pentachlorotoluene (5 sp2 carbons vs sp3 methyl)
|
190 |
+
Map<String, List<Atom>> atomsInEachLocalEnvironment = new HashMap<>();
|
191 |
+
for (Atom a : substitutableAtoms) {
|
192 |
+
int valency = a.determineValency(true);
|
193 |
+
int currentValency = a.getIncomingValency() + a.getOutValency();
|
194 |
+
int numOfBonds = (valency - currentValency) + a.getBondCount();//distinguish sp2 and sp3 atoms
|
195 |
+
String s = a.getElement().toString() +"\t" + valency + "\t" + numOfBonds + "\t" + a.hasSpareValency();
|
196 |
+
List<Atom> atomsInEnvironment = atomsInEachLocalEnvironment.get(s);
|
197 |
+
if (atomsInEnvironment == null) {
|
198 |
+
atomsInEnvironment = new ArrayList<>();
|
199 |
+
atomsInEachLocalEnvironment.put(s, atomsInEnvironment);
|
200 |
+
}
|
201 |
+
atomsInEnvironment.add(a);
|
202 |
+
}
|
203 |
+
List<Atom> preferredAtoms = null;
|
204 |
+
for (List<Atom> atoms : atomsInEachLocalEnvironment.values()) {
|
205 |
+
if (atoms.size() == numberToBeSubstituted){
|
206 |
+
if (preferredAtoms != null){
|
207 |
+
return null;
|
208 |
+
}
|
209 |
+
preferredAtoms = atoms;
|
210 |
+
}
|
211 |
+
}
|
212 |
+
return preferredAtoms;
|
213 |
+
}
|
214 |
+
}
|
TransAntivirus/download_pubchem/opsin-master/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/AnnotatorState.java
ADDED
@@ -0,0 +1,72 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
package uk.ac.cam.ch.wwmm.opsin;
|
2 |
+
|
3 |
+
|
4 |
+
/**
|
5 |
+
* Contains the state needed during finite-state parsing
|
6 |
+
* From this the tokens string and their semantics can be generated
|
7 |
+
* @author Daniel
|
8 |
+
*
|
9 |
+
*/
|
10 |
+
class AnnotatorState {
|
11 |
+
|
12 |
+
/** The current state of the DFA. */
|
13 |
+
private final int state;
|
14 |
+
/** The annotation so far. */
|
15 |
+
private final char annot;
|
16 |
+
|
17 |
+
/** The index of the first char in the chemical name that has yet to be tokenised */
|
18 |
+
private final int posInName;
|
19 |
+
|
20 |
+
private final boolean isCaseSensitive;
|
21 |
+
|
22 |
+
private final AnnotatorState previousAs;
|
23 |
+
|
24 |
+
|
25 |
+
AnnotatorState(int state, char annot, int posInName, boolean isCaseSensitive, AnnotatorState previousAs) {
|
26 |
+
this.state = state;
|
27 |
+
this.annot = annot;
|
28 |
+
this.posInName = posInName;
|
29 |
+
this.isCaseSensitive = isCaseSensitive;
|
30 |
+
this.previousAs = previousAs;
|
31 |
+
}
|
32 |
+
|
33 |
+
/**
|
34 |
+
* The current state in the DFA
|
35 |
+
* @return
|
36 |
+
*/
|
37 |
+
int getState() {
|
38 |
+
return state;
|
39 |
+
}
|
40 |
+
|
41 |
+
/**
|
42 |
+
* The annotation that was consumed to transition to this state
|
43 |
+
* @return
|
44 |
+
*/
|
45 |
+
char getAnnot() {
|
46 |
+
return annot;
|
47 |
+
}
|
48 |
+
|
49 |
+
/**
|
50 |
+
* The index of the first char in the chemical name that has yet to be tokenised (at the point of creating this AnnotatorState)
|
51 |
+
* @return
|
52 |
+
*/
|
53 |
+
int getPosInName() {
|
54 |
+
return posInName;
|
55 |
+
}
|
56 |
+
|
57 |
+
/**
|
58 |
+
* Where the corresponding token is case sensitive
|
59 |
+
* @return
|
60 |
+
*/
|
61 |
+
boolean isCaseSensitive() {
|
62 |
+
return isCaseSensitive;
|
63 |
+
}
|
64 |
+
|
65 |
+
/**
|
66 |
+
* The last annotator state for the previous token (or null if this is the first)
|
67 |
+
* @return
|
68 |
+
*/
|
69 |
+
AnnotatorState getPreviousAs() {
|
70 |
+
return previousAs;
|
71 |
+
}
|
72 |
+
}
|
TransAntivirus/download_pubchem/opsin-master/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/Atom.java
ADDED
@@ -0,0 +1,647 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
package uk.ac.cam.ch.wwmm.opsin;
|
2 |
+
|
3 |
+
import java.util.ArrayList;
|
4 |
+
import java.util.Collections;
|
5 |
+
import java.util.HashMap;
|
6 |
+
import java.util.List;
|
7 |
+
import java.util.Map;
|
8 |
+
import java.util.Set;
|
9 |
+
import java.util.regex.Matcher;
|
10 |
+
|
11 |
+
import static uk.ac.cam.ch.wwmm.opsin.OpsinTools.*;
|
12 |
+
|
13 |
+
/**
|
14 |
+
* An atom. Carries information about which fragment it is in, and an ID
|
15 |
+
* number and a list of bonds that it is involved. It may also have other information such as
|
16 |
+
* whether it has "spare valencies" due to unsaturation, its charge, locant labels, stereochemistry and notes
|
17 |
+
*
|
18 |
+
* @author ptc24
|
19 |
+
* @author dl387
|
20 |
+
*
|
21 |
+
*/
|
22 |
+
class Atom {
|
23 |
+
|
24 |
+
/**The (unique over the molecule) ID of the atom.*/
|
25 |
+
private final int id;
|
26 |
+
|
27 |
+
/**The chemical element of the atom. */
|
28 |
+
private ChemEl chemEl;
|
29 |
+
|
30 |
+
/**The locants that pertain to the atom.*/
|
31 |
+
private final List<String> locants = new ArrayList<>(2);
|
32 |
+
|
33 |
+
/**The formal charge on the atom.*/
|
34 |
+
private int charge = 0;
|
35 |
+
|
36 |
+
/**The isotope of the atom. Null if not defined explicitly.*/
|
37 |
+
private Integer isotope = null;
|
38 |
+
|
39 |
+
/**
|
40 |
+
* Holds the atomParity object associated with this object
|
41 |
+
* null by default
|
42 |
+
*/
|
43 |
+
private AtomParity atomParity = null;
|
44 |
+
|
45 |
+
/**The bonds that involve the atom*/
|
46 |
+
private final List<Bond> bonds = new ArrayList<>(4);
|
47 |
+
|
48 |
+
/**A map between PropertyKey s as declared here and useful atom properties, usually relating to some kind of special case. */
|
49 |
+
@SuppressWarnings("rawtypes")
|
50 |
+
private final Map<PropertyKey, Object> properties = new HashMap<>();
|
51 |
+
/** A set of atoms that were equally plausible to perform functional replacement on */
|
52 |
+
static final PropertyKey<Set<Atom>> AMBIGUOUS_ELEMENT_ASSIGNMENT = new PropertyKey<>("ambiguousElementAssignment");
|
53 |
+
/** The atom class which will be output when serialised to SMILES. Useful for distinguishing attachment points */
|
54 |
+
static final PropertyKey<Integer> ATOM_CLASS = new PropertyKey<>("atomClass");
|
55 |
+
/** Used on wildcard atoms to indicate their meaning */
|
56 |
+
static final PropertyKey<String> HOMOLOGY_GROUP = new PropertyKey<>("homologyGroup");
|
57 |
+
/** Used on wildcard atoms to indicate that they are a position variation bond */
|
58 |
+
static final PropertyKey<List<Atom>> POSITION_VARIATION_BOND = new PropertyKey<>("positionVariationBond");
|
59 |
+
/** The hydrogen count as set in the SMILES*/
|
60 |
+
static final PropertyKey<Integer> SMILES_HYDROGEN_COUNT = new PropertyKey<>("smilesHydrogenCount");
|
61 |
+
/** The oxidation number as specified by Roman numerals in the name*/
|
62 |
+
static final PropertyKey<Integer> OXIDATION_NUMBER = new PropertyKey<>("oxidationNumber");
|
63 |
+
/** Is this atom the carbon of an aldehyde? (however NOT formaldehyde)*/
|
64 |
+
static final PropertyKey<Boolean> ISALDEHYDE = new PropertyKey<>("isAldehyde");
|
65 |
+
/** Indicates that this atom is an anomeric atom in a cyclised carbohydrate*/
|
66 |
+
static final PropertyKey<Boolean> ISANOMERIC = new PropertyKey<>("isAnomeric");
|
67 |
+
/** Transient integer used to indicate traversal of fragments*/
|
68 |
+
static final PropertyKey<Integer> VISITED = new PropertyKey<>("visited");
|
69 |
+
|
70 |
+
/**The fragment to which the atom belongs.*/
|
71 |
+
private Fragment frag;
|
72 |
+
|
73 |
+
/** Whether an atom is part of a delocalised set of double bonds. A double bond in a kekule structure
|
74 |
+
* can be mapped to a single bond with this attribute set to true on both atoms that were in the double bond
|
75 |
+
* For example, benzene could be temporarily represented by six singly-bonded atoms, each with a set
|
76 |
+
* spare valency attribute , and later converted into a fully-specified valence structure.*/
|
77 |
+
private boolean spareValency = false;
|
78 |
+
|
79 |
+
/**The total bond order of all bonds that are expected to be used for inter fragment bonding
|
80 |
+
* e.g. in butan-2-ylidene this would be 2 for the atom at position 2 and 0 for the other 3 */
|
81 |
+
private int outValency = 0;
|
82 |
+
|
83 |
+
/** Null by default or set by the lambda convention.*/
|
84 |
+
private Integer lambdaConventionValency;
|
85 |
+
|
86 |
+
/** Null by default or set by the SMILES builder*/
|
87 |
+
private Integer minimumValency;
|
88 |
+
|
89 |
+
/** Can this atom have implicit hydrogen? True unless explicitly set otherwise otherwise*/
|
90 |
+
private boolean implicitHydrogenAllowed = true;
|
91 |
+
|
92 |
+
/** This is modified by ium/ide/ylium/uide and is used to choose the appropriate valency for the atom*/
|
93 |
+
private int protonsExplicitlyAddedOrRemoved = 0;
|
94 |
+
|
95 |
+
/**
|
96 |
+
* Takes same values as type in Fragment. Useful for discriminating suffix atoms from other atoms when a suffix is incorporated into another fragments
|
97 |
+
*/
|
98 |
+
private String type;
|
99 |
+
|
100 |
+
/**
|
101 |
+
* Is this atom in a ring. Default false. Set by the CycleDetector.
|
102 |
+
* Double bonds are only converted to spareValency if atom is in a ring
|
103 |
+
* Some suffixes have different meanings if an atom is part of a ring or not c.g. cyclohexanal vs ethanal
|
104 |
+
*/
|
105 |
+
private boolean atomIsInACycle = false;
|
106 |
+
|
107 |
+
/**
|
108 |
+
* Builds an Atom from scratch.
|
109 |
+
* GENERALLY EXCEPT FOR TESTING SHOULD NOT BE CALLED EXCEPT FROM THE FRAGMANAGER
|
110 |
+
* @param id The ID number, unique to the atom in the molecule being built
|
111 |
+
* @param chemlEl The chemical element
|
112 |
+
* @param frag the Fragment to contain the Atom
|
113 |
+
*/
|
114 |
+
Atom(int id, ChemEl chemlEl, Fragment frag) {
|
115 |
+
if (frag == null){
|
116 |
+
throw new IllegalArgumentException("Atom is not in a fragment!");
|
117 |
+
}
|
118 |
+
if (chemlEl == null){
|
119 |
+
throw new IllegalArgumentException("Atom does not have an element!");
|
120 |
+
}
|
121 |
+
this.frag = frag;
|
122 |
+
this.id = id;
|
123 |
+
this.chemEl = chemlEl;
|
124 |
+
this.type =frag.getType();
|
125 |
+
}
|
126 |
+
|
127 |
+
/** Used to build a DUMMY atom.
|
128 |
+
* Does not have an id/frag/type as would be expected for a proper atom
|
129 |
+
* @param chemlEl The chemical element
|
130 |
+
*/
|
131 |
+
Atom(ChemEl chemlEl){
|
132 |
+
this.chemEl = chemlEl;
|
133 |
+
this.id = 0;
|
134 |
+
}
|
135 |
+
|
136 |
+
/**
|
137 |
+
* Uses the lambdaConventionValency or if that is not available
|
138 |
+
* the default valency assuming this is >= the current valency
|
139 |
+
* If not then allowed the chemically sensible valencies of the atom are checked with the one that is closest and >= to the current valency
|
140 |
+
* being returned. If the valency has still not been determined the current valency i.e. assuming the atom to have 0 implicit hydrogen is returned.
|
141 |
+
* This is the correct behaviour for inorganics. For p block elements it means that OPSIN does not believe the atom to be in a valid valency (too high)
|
142 |
+
*
|
143 |
+
* if considerOutValency is true, the valency that will be used to form bonds using the outAtoms is
|
144 |
+
* taken into account i.e. if any radicals were used to form bonds
|
145 |
+
* @param considerOutValency
|
146 |
+
* @return
|
147 |
+
*/
|
148 |
+
int determineValency(boolean considerOutValency) {
|
149 |
+
if (lambdaConventionValency != null){
|
150 |
+
return lambdaConventionValency + protonsExplicitlyAddedOrRemoved;
|
151 |
+
}
|
152 |
+
int currentValency = getIncomingValency();
|
153 |
+
if (considerOutValency){
|
154 |
+
currentValency += outValency;
|
155 |
+
}
|
156 |
+
Integer calculatedMinValency = minimumValency == null ? null : minimumValency + protonsExplicitlyAddedOrRemoved;
|
157 |
+
if (charge ==0 || protonsExplicitlyAddedOrRemoved != 0){
|
158 |
+
Integer defaultValency = ValencyChecker.getDefaultValency(chemEl);
|
159 |
+
if (defaultValency != null){
|
160 |
+
defaultValency += protonsExplicitlyAddedOrRemoved;
|
161 |
+
if (currentValency <= defaultValency && (calculatedMinValency == null || defaultValency >= calculatedMinValency)){
|
162 |
+
return defaultValency;
|
163 |
+
}
|
164 |
+
}
|
165 |
+
}
|
166 |
+
Integer[] possibleValencies = ValencyChecker.getPossibleValencies(chemEl, charge);
|
167 |
+
if (possibleValencies != null) {
|
168 |
+
if (calculatedMinValency != null && calculatedMinValency >= currentValency){
|
169 |
+
return calculatedMinValency;
|
170 |
+
}
|
171 |
+
for (Integer possibleValency : possibleValencies) {
|
172 |
+
if (calculatedMinValency != null && possibleValency < calculatedMinValency){
|
173 |
+
continue;
|
174 |
+
}
|
175 |
+
if (currentValency <= possibleValency){
|
176 |
+
return possibleValency;
|
177 |
+
}
|
178 |
+
}
|
179 |
+
}
|
180 |
+
if (calculatedMinValency != null && calculatedMinValency >= currentValency){
|
181 |
+
return calculatedMinValency;
|
182 |
+
}
|
183 |
+
else{
|
184 |
+
return currentValency;
|
185 |
+
}
|
186 |
+
}
|
187 |
+
|
188 |
+
/**Adds a locant to the Atom. Other locants are preserved.
|
189 |
+
* Also associates the locant with the atom in the parent fragments hash
|
190 |
+
*
|
191 |
+
* @param locant The new locant
|
192 |
+
*/
|
193 |
+
void addLocant(String locant) {
|
194 |
+
locants.add(locant);
|
195 |
+
frag.addMappingToAtomLocantMap(locant, this);
|
196 |
+
}
|
197 |
+
|
198 |
+
/**Replaces all existing locants with a new one.
|
199 |
+
*
|
200 |
+
* @param locant The new locant
|
201 |
+
*/
|
202 |
+
void replaceLocants(String locant) {
|
203 |
+
clearLocants();
|
204 |
+
addLocant(locant);
|
205 |
+
}
|
206 |
+
|
207 |
+
void removeLocant(String locantToRemove) {
|
208 |
+
int locantArraySize = locants.size();
|
209 |
+
for (int i = locantArraySize -1; i >=0 ; i--) {
|
210 |
+
if (locants.get(i).equals(locantToRemove)){
|
211 |
+
locants.remove(i);
|
212 |
+
frag.removeMappingFromAtomLocantMap(locantToRemove);
|
213 |
+
}
|
214 |
+
}
|
215 |
+
}
|
216 |
+
|
217 |
+
/**Removes all locants from the Atom.
|
218 |
+
*
|
219 |
+
*/
|
220 |
+
void clearLocants() {
|
221 |
+
for (int i = 0, l = locants.size(); i < l; i++) {
|
222 |
+
frag.removeMappingFromAtomLocantMap(locants.get(i));
|
223 |
+
}
|
224 |
+
locants.clear();
|
225 |
+
}
|
226 |
+
|
227 |
+
/**
|
228 |
+
* Removes only elementSymbolLocants: e.g. N, S', Se
|
229 |
+
*/
|
230 |
+
void removeElementSymbolLocants() {
|
231 |
+
for (int i = locants.size() - 1; i >= 0; i--) {
|
232 |
+
String locant = locants.get(i);
|
233 |
+
if (MATCH_ELEMENT_SYMBOL_LOCANT.matcher(locant).matches()){
|
234 |
+
frag.removeMappingFromAtomLocantMap(locant);
|
235 |
+
locants.remove(i);
|
236 |
+
}
|
237 |
+
}
|
238 |
+
}
|
239 |
+
|
240 |
+
/**
|
241 |
+
* Removes all locants other than elementSymbolLocants (e.g. N, S', Se)
|
242 |
+
* Hence removes numeric locants and greek locants
|
243 |
+
*/
|
244 |
+
void removeLocantsOtherThanElementSymbolLocants() {
|
245 |
+
for (int i = locants.size() - 1; i >= 0; i--) {
|
246 |
+
String locant = locants.get(i);
|
247 |
+
if (!MATCH_ELEMENT_SYMBOL_LOCANT.matcher(locant).matches()){
|
248 |
+
frag.removeMappingFromAtomLocantMap(locant);
|
249 |
+
locants.remove(i);
|
250 |
+
}
|
251 |
+
}
|
252 |
+
}
|
253 |
+
|
254 |
+
/**Checks if the Atom has a given locant.
|
255 |
+
*
|
256 |
+
* @param locant The locant to test for
|
257 |
+
* @return true if it has, false if not
|
258 |
+
*/
|
259 |
+
boolean hasLocant(String locant) {
|
260 |
+
if (locants.contains(locant)) {
|
261 |
+
return true;
|
262 |
+
}
|
263 |
+
Matcher m = MATCH_AMINOACID_STYLE_LOCANT.matcher(locant);
|
264 |
+
if (m.matches()){//e.g. N'5
|
265 |
+
if (chemEl.toString().equals(m.group(1))){//element symbol
|
266 |
+
if (!m.group(2).equals("") && (!hasLocant(m.group(1) +m.group(2)))){//has primes
|
267 |
+
return false;//must have exact locant e.g. N'
|
268 |
+
}
|
269 |
+
if (OpsinTools.depthFirstSearchForNonSuffixAtomWithLocant(this, m.group(3)) != null){
|
270 |
+
return true;
|
271 |
+
}
|
272 |
+
}
|
273 |
+
}
|
274 |
+
return false;
|
275 |
+
}
|
276 |
+
|
277 |
+
/**Gets the first locant for the Atom. This may be the locant that was initially
|
278 |
+
* specified, or the most recent locant specified using replaceLocant, or first
|
279 |
+
* locant to be added since the last invocation of clearLocants.
|
280 |
+
*
|
281 |
+
* @return The locant, or null if there is no locant
|
282 |
+
*/
|
283 |
+
String getFirstLocant() {
|
284 |
+
return locants.size() > 0 ? locants.get(0) : null;
|
285 |
+
}
|
286 |
+
|
287 |
+
/**Returns the array of locants containing all locants associated with the atom
|
288 |
+
*
|
289 |
+
* @return The list of locants (may be empty)
|
290 |
+
*/
|
291 |
+
List<String> getLocants() {
|
292 |
+
return Collections.unmodifiableList(locants);
|
293 |
+
}
|
294 |
+
|
295 |
+
/**Returns the subset of the locants which are element symbol locants e.g. N, S', Se
|
296 |
+
*
|
297 |
+
* @return The list of locants (may be empty)
|
298 |
+
*/
|
299 |
+
List<String> getElementSymbolLocants() {
|
300 |
+
List<String> elementSymbolLocants = new ArrayList<>(1);
|
301 |
+
for (int i = 0, l = locants.size(); i < l; i++) {
|
302 |
+
String locant = locants.get(i);
|
303 |
+
if (MATCH_ELEMENT_SYMBOL_LOCANT.matcher(locant).matches()) {
|
304 |
+
elementSymbolLocants.add(locant);
|
305 |
+
}
|
306 |
+
}
|
307 |
+
return elementSymbolLocants;
|
308 |
+
}
|
309 |
+
|
310 |
+
void setFrag(Fragment f) {
|
311 |
+
frag = f;
|
312 |
+
}
|
313 |
+
|
314 |
+
Fragment getFrag() {
|
315 |
+
return frag;
|
316 |
+
}
|
317 |
+
|
318 |
+
/**Gets the ID of the atom.
|
319 |
+
*
|
320 |
+
* @return The ID of the atom
|
321 |
+
*/
|
322 |
+
int getID() {
|
323 |
+
return id;
|
324 |
+
}
|
325 |
+
|
326 |
+
/**Gets the chemical element corresponding to the element of the atom.
|
327 |
+
*
|
328 |
+
* @return The chemical element corresponding to the element of the atom
|
329 |
+
*/
|
330 |
+
ChemEl getElement() {
|
331 |
+
return chemEl;
|
332 |
+
}
|
333 |
+
|
334 |
+
/**Sets the chemical element corresponding to the element of the atom.
|
335 |
+
*
|
336 |
+
* @param chemEl The chemical element corresponding to the element of the atom
|
337 |
+
*/
|
338 |
+
void setElement(ChemEl chemEl) {
|
339 |
+
this.chemEl = chemEl;
|
340 |
+
}
|
341 |
+
|
342 |
+
/**Gets the formal charge on the atom.
|
343 |
+
*
|
344 |
+
* @return The formal charge on the atom
|
345 |
+
*/
|
346 |
+
int getCharge() {
|
347 |
+
return charge;
|
348 |
+
}
|
349 |
+
|
350 |
+
/**Modifies the charge of this atom by the amount given. This can be any integer
|
351 |
+
* The number of protons changed is noted so as to calculate the correct valency for the atom. This can be any integer.
|
352 |
+
* For example ide is the loss of a proton so is charge=-1, protons =-1
|
353 |
+
* @param charge
|
354 |
+
* @param protons
|
355 |
+
*/
|
356 |
+
void addChargeAndProtons(int charge, int protons){
|
357 |
+
this.charge += charge;
|
358 |
+
protonsExplicitlyAddedOrRemoved+=protons;
|
359 |
+
}
|
360 |
+
|
361 |
+
/**Sets the formal charge on the atom.
|
362 |
+
* NOTE: make sure to update protonsExplicitlyAddedOrRemoved if necessary
|
363 |
+
*
|
364 |
+
* @param c The formal charge on the atom
|
365 |
+
*/
|
366 |
+
void setCharge(int c) {
|
367 |
+
charge = c;
|
368 |
+
}
|
369 |
+
|
370 |
+
/**
|
371 |
+
* Sets the formal charge and number of protonsExplicitlyAddedOrRemoved to 0
|
372 |
+
*/
|
373 |
+
void neutraliseCharge() {
|
374 |
+
charge = 0;
|
375 |
+
protonsExplicitlyAddedOrRemoved = 0;
|
376 |
+
}
|
377 |
+
|
378 |
+
/**
|
379 |
+
* Gets the mass number of the atom or null if not explicitly defined
|
380 |
+
* e.g. 3 for tritium
|
381 |
+
* @return
|
382 |
+
*/
|
383 |
+
Integer getIsotope() {
|
384 |
+
return isotope;
|
385 |
+
}
|
386 |
+
|
387 |
+
/**
|
388 |
+
* Sets the mass number of the atom explicitly
|
389 |
+
* @param isotope
|
390 |
+
*/
|
391 |
+
void setIsotope(Integer isotope) {
|
392 |
+
if (isotope != null && isotope < chemEl.ATOMIC_NUM) {
|
393 |
+
throw new RuntimeException("Isotopic mass cannot be less than the element's number of protons: " + chemEl.toString() + " " + isotope + " < " + chemEl.ATOMIC_NUM );
|
394 |
+
}
|
395 |
+
this.isotope = isotope;
|
396 |
+
}
|
397 |
+
|
398 |
+
/**Adds a bond to the atom
|
399 |
+
*
|
400 |
+
* @param b The bond to be added
|
401 |
+
*/
|
402 |
+
void addBond(Bond b) {
|
403 |
+
if (bonds.contains(b)){
|
404 |
+
throw new IllegalArgumentException("Atom already has given bond (This is not allowed as this would give two bonds between the same atoms!)");
|
405 |
+
}
|
406 |
+
bonds.add(b);
|
407 |
+
}
|
408 |
+
|
409 |
+
/**Removes a bond to the atom
|
410 |
+
*
|
411 |
+
* @param b The bond to be removed
|
412 |
+
* @return whether bond was present
|
413 |
+
*/
|
414 |
+
boolean removeBond(Bond b) {
|
415 |
+
return bonds.remove(b);
|
416 |
+
}
|
417 |
+
|
418 |
+
/**Calculates the number of bonds connecting to the atom, excluding bonds to implicit
|
419 |
+
* hydrogens. Double bonds count as
|
420 |
+
* two bonds, etc. Eg ethene - both C's have an incoming valency of 2.
|
421 |
+
*
|
422 |
+
* @return Incoming Valency
|
423 |
+
*/
|
424 |
+
int getIncomingValency() {
|
425 |
+
int v = 0;
|
426 |
+
for (int i = 0, len = bonds.size(); i < len; i++) {
|
427 |
+
v += bonds.get(i).getOrder();
|
428 |
+
}
|
429 |
+
return v;
|
430 |
+
}
|
431 |
+
|
432 |
+
int getProtonsExplicitlyAddedOrRemoved() {
|
433 |
+
return protonsExplicitlyAddedOrRemoved;
|
434 |
+
}
|
435 |
+
|
436 |
+
void setProtonsExplicitlyAddedOrRemoved(int protonsExplicitlyAddedOrRemoved) {
|
437 |
+
this.protonsExplicitlyAddedOrRemoved = protonsExplicitlyAddedOrRemoved;
|
438 |
+
}
|
439 |
+
|
440 |
+
/**Does the atom have spare valency to form double bonds?
|
441 |
+
*
|
442 |
+
* @return true if atom has spare valency
|
443 |
+
*/
|
444 |
+
boolean hasSpareValency() {
|
445 |
+
return spareValency;
|
446 |
+
}
|
447 |
+
|
448 |
+
/**Set whether an atom has spare valency
|
449 |
+
*
|
450 |
+
* @param sv The spare valency
|
451 |
+
*/
|
452 |
+
void setSpareValency(boolean sv) {
|
453 |
+
spareValency = sv;
|
454 |
+
}
|
455 |
+
|
456 |
+
/**Gets the total bond order of the bonds expected to be created from this atom for inter fragment bonding
|
457 |
+
*
|
458 |
+
* @return The outValency
|
459 |
+
*/
|
460 |
+
int getOutValency() {
|
461 |
+
return outValency;
|
462 |
+
}
|
463 |
+
|
464 |
+
/**Adds to the total bond order of the bonds expected to be created from this atom for inter fragment bonding
|
465 |
+
*
|
466 |
+
* @param outV The outValency to be added
|
467 |
+
*/
|
468 |
+
void addOutValency(int outV) {
|
469 |
+
outValency += outV;
|
470 |
+
}
|
471 |
+
|
472 |
+
List<Bond> getBonds() {
|
473 |
+
return Collections.unmodifiableList(bonds);
|
474 |
+
}
|
475 |
+
|
476 |
+
int getBondCount() {
|
477 |
+
return bonds.size();
|
478 |
+
}
|
479 |
+
|
480 |
+
/**Gets a list of atoms that connect to the atom
|
481 |
+
*
|
482 |
+
* @return The list of atoms connected to the atom
|
483 |
+
*/
|
484 |
+
List<Atom> getAtomNeighbours(){
|
485 |
+
int bondCount = bonds.size();
|
486 |
+
List<Atom> results = new ArrayList<>(bondCount);
|
487 |
+
for (int i = 0; i < bondCount; i++) {
|
488 |
+
results.add(bonds.get(i).getOtherAtom(this));
|
489 |
+
}
|
490 |
+
return results;
|
491 |
+
}
|
492 |
+
|
493 |
+
Integer getLambdaConventionValency() {
|
494 |
+
return lambdaConventionValency;
|
495 |
+
}
|
496 |
+
|
497 |
+
void setLambdaConventionValency(Integer valency) {
|
498 |
+
this.lambdaConventionValency = valency;
|
499 |
+
}
|
500 |
+
|
501 |
+
String getType() {
|
502 |
+
return type;
|
503 |
+
}
|
504 |
+
|
505 |
+
void setType(String type) {
|
506 |
+
this.type = type;
|
507 |
+
}
|
508 |
+
|
509 |
+
boolean getAtomIsInACycle() {
|
510 |
+
return atomIsInACycle;
|
511 |
+
}
|
512 |
+
|
513 |
+
/**
|
514 |
+
* Sets whether atom is in a cycle, true if it is
|
515 |
+
* @param atomIsInACycle
|
516 |
+
*/
|
517 |
+
void setAtomIsInACycle(boolean atomIsInACycle) {
|
518 |
+
this.atomIsInACycle = atomIsInACycle;
|
519 |
+
}
|
520 |
+
|
521 |
+
AtomParity getAtomParity() {
|
522 |
+
return atomParity;
|
523 |
+
}
|
524 |
+
|
525 |
+
void setAtomParity(AtomParity atomParity) {
|
526 |
+
this.atomParity = atomParity;
|
527 |
+
}
|
528 |
+
|
529 |
+
void setAtomParity(Atom[] atomRefs4, int parity) {
|
530 |
+
atomParity = new AtomParity(atomRefs4, parity);
|
531 |
+
}
|
532 |
+
|
533 |
+
Integer getMinimumValency() {
|
534 |
+
return minimumValency;
|
535 |
+
}
|
536 |
+
|
537 |
+
void setMinimumValency(Integer minimumValency) {
|
538 |
+
this.minimumValency = minimumValency;
|
539 |
+
}
|
540 |
+
|
541 |
+
boolean getImplicitHydrogenAllowed() {
|
542 |
+
return implicitHydrogenAllowed;
|
543 |
+
}
|
544 |
+
|
545 |
+
void setImplicitHydrogenAllowed(boolean implicitHydrogenAllowed) {
|
546 |
+
this.implicitHydrogenAllowed = implicitHydrogenAllowed;
|
547 |
+
}
|
548 |
+
|
549 |
+
@SuppressWarnings("unchecked")
|
550 |
+
<T> T getProperty(PropertyKey<T> propertyKey) {
|
551 |
+
return (T) properties.get(propertyKey);
|
552 |
+
}
|
553 |
+
|
554 |
+
<T> void setProperty(PropertyKey<T> propertyKey, T value) {
|
555 |
+
properties.put(propertyKey, value);
|
556 |
+
}
|
557 |
+
|
558 |
+
/**
|
559 |
+
* Checks if the valency of this atom allows it to have the amount of spare valency that the atom currently has
|
560 |
+
* May reduce the spare valency on the atom to be consistent with the valency of the atom
|
561 |
+
* Does nothing if the atom has no spare valency
|
562 |
+
* @param takeIntoAccountExternalBonds
|
563 |
+
* @throws StructureBuildingException
|
564 |
+
*/
|
565 |
+
void ensureSVIsConsistantWithValency(boolean takeIntoAccountExternalBonds) throws StructureBuildingException {
|
566 |
+
if (spareValency) {
|
567 |
+
Integer maxValency;
|
568 |
+
if (lambdaConventionValency != null) {
|
569 |
+
maxValency = lambdaConventionValency + protonsExplicitlyAddedOrRemoved;
|
570 |
+
}
|
571 |
+
else{
|
572 |
+
Integer hwValency = ValencyChecker.getHWValency(chemEl);
|
573 |
+
if (hwValency == null) {
|
574 |
+
throw new StructureBuildingException(chemEl + " is not expected to be aromatic!");
|
575 |
+
}
|
576 |
+
maxValency = hwValency + protonsExplicitlyAddedOrRemoved;
|
577 |
+
}
|
578 |
+
int maxSpareValency;
|
579 |
+
if (takeIntoAccountExternalBonds) {
|
580 |
+
maxSpareValency = maxValency - getIncomingValency() - outValency;
|
581 |
+
}
|
582 |
+
else{
|
583 |
+
maxSpareValency = maxValency - frag.getIntraFragmentIncomingValency(this);
|
584 |
+
}
|
585 |
+
if (maxSpareValency < 1) {
|
586 |
+
setSpareValency(false);
|
587 |
+
}
|
588 |
+
}
|
589 |
+
}
|
590 |
+
|
591 |
+
/**
|
592 |
+
* Returns the the first bond in the atom's bond list or null if it has no bonds
|
593 |
+
* @return
|
594 |
+
*/
|
595 |
+
Bond getFirstBond() {
|
596 |
+
if (bonds.size() > 0){
|
597 |
+
return bonds.get(0);
|
598 |
+
}
|
599 |
+
return null;
|
600 |
+
}
|
601 |
+
|
602 |
+
/**Gets the bond between this atom and a given atom
|
603 |
+
*
|
604 |
+
* @param a The atom to find a bond to
|
605 |
+
* @return The bond, or null if there is no bond
|
606 |
+
*/
|
607 |
+
Bond getBondToAtom(Atom a) {
|
608 |
+
for (int i = 0, l = bonds.size(); i < l; i++) {
|
609 |
+
Bond b = bonds.get(i);
|
610 |
+
if(b.getOtherAtom(this) == a){
|
611 |
+
return b;
|
612 |
+
}
|
613 |
+
}
|
614 |
+
return null;
|
615 |
+
}
|
616 |
+
|
617 |
+
/**Gets the bond between this atom and a given atom, throwing if fails.
|
618 |
+
*
|
619 |
+
* @param a The atom to find a bond to
|
620 |
+
* @return The bond found
|
621 |
+
* @throws StructureBuildingException
|
622 |
+
*/
|
623 |
+
Bond getBondToAtomOrThrow(Atom a) throws StructureBuildingException {
|
624 |
+
Bond b = getBondToAtom(a);
|
625 |
+
if(b == null){
|
626 |
+
throw new StructureBuildingException("Couldn't find specified bond");
|
627 |
+
}
|
628 |
+
return b;
|
629 |
+
}
|
630 |
+
|
631 |
+
/**
|
632 |
+
* Set the stereo group, ignored if the atom does not have any parity info.
|
633 |
+
* @param stroGrp the stereo group.
|
634 |
+
*/
|
635 |
+
public void setStereoGroup(StereoGroup stroGrp) {
|
636 |
+
if (atomParity != null)
|
637 |
+
atomParity.setStereoGroup(stroGrp);
|
638 |
+
}
|
639 |
+
|
640 |
+
/**
|
641 |
+
* Access the stereo group on the atom parity info.
|
642 |
+
* @return the stereo group
|
643 |
+
*/
|
644 |
+
public StereoGroup getStereoGroup() {
|
645 |
+
return atomParity != null ? atomParity.getStereoGroup() : StereoGroup.Unk;
|
646 |
+
}
|
647 |
+
}
|
TransAntivirus/download_pubchem/opsin-master/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/AtomParity.java
ADDED
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
package uk.ac.cam.ch.wwmm.opsin;
|
2 |
+
|
3 |
+
/**
|
4 |
+
* Hold information about 4 atoms and their chiral determinant allowing the description of tetrahedral stereochemistry
|
5 |
+
* @author dl387
|
6 |
+
*
|
7 |
+
*/
|
8 |
+
class AtomParity {
|
9 |
+
/**
|
10 |
+
* A dummy hydrogen atom. Used to represent an implicit hydrogen that is attached to a tetrahedral stereocentre
|
11 |
+
*/
|
12 |
+
static final Atom hydrogen = new Atom(ChemEl.H);
|
13 |
+
/**
|
14 |
+
* A dummy hydrogen atom. Used to represent the hydrogen that replaced a hydroxy at a tetrahedral stereocentre
|
15 |
+
*/
|
16 |
+
static final Atom deoxyHydrogen = new Atom(ChemEl.H);
|
17 |
+
private Atom[] atomRefs4;
|
18 |
+
private int parity;
|
19 |
+
private StereoGroup stereoGroup = StereoGroup.Abs;
|
20 |
+
private int stereoGroupNum = 1;
|
21 |
+
|
22 |
+
/**
|
23 |
+
* Create an atomParity from an array of 4 atoms and the parity of the chiral determinant
|
24 |
+
* @param atomRefs4
|
25 |
+
* @param parity
|
26 |
+
*/
|
27 |
+
AtomParity(Atom[] atomRefs4, int parity){
|
28 |
+
if (atomRefs4.length !=4){
|
29 |
+
throw new IllegalArgumentException("atomRefs4 must contain references to 4 atoms");
|
30 |
+
}
|
31 |
+
this.atomRefs4 = atomRefs4;
|
32 |
+
this.parity = parity;
|
33 |
+
}
|
34 |
+
|
35 |
+
Atom[] getAtomRefs4() {
|
36 |
+
return atomRefs4;
|
37 |
+
}
|
38 |
+
void setAtomRefs4(Atom[] atomRefs4) {
|
39 |
+
this.atomRefs4 = atomRefs4;
|
40 |
+
}
|
41 |
+
int getParity() {
|
42 |
+
return parity;
|
43 |
+
}
|
44 |
+
void setParity(int parity) {
|
45 |
+
this.parity = parity;
|
46 |
+
}
|
47 |
+
|
48 |
+
public void setStereoGroup(StereoGroup stroGrp, int num) {
|
49 |
+
this.stereoGroup = stroGrp;
|
50 |
+
this.stereoGroupNum = num;
|
51 |
+
}
|
52 |
+
|
53 |
+
public void setStereoGroup(StereoGroup stroGrp) {
|
54 |
+
setStereoGroup(stroGrp, 1);
|
55 |
+
}
|
56 |
+
|
57 |
+
public StereoGroup getStereoGroup() {
|
58 |
+
return this.stereoGroup;
|
59 |
+
}
|
60 |
+
|
61 |
+
public int getStereoGroupNum() {
|
62 |
+
return this.stereoGroupNum;
|
63 |
+
}
|
64 |
+
}
|
TransAntivirus/download_pubchem/opsin-master/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/AtomProperties.java
ADDED
@@ -0,0 +1,160 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
package uk.ac.cam.ch.wwmm.opsin;
|
2 |
+
|
3 |
+
import java.util.EnumMap;
|
4 |
+
import java.util.Map;
|
5 |
+
|
6 |
+
/**
|
7 |
+
* Holds useful atomic properties
|
8 |
+
* @author dl387
|
9 |
+
*
|
10 |
+
*/
|
11 |
+
class AtomProperties {
|
12 |
+
|
13 |
+
private static final Map<ChemEl, Double> elementToPaulingElectronegativity = new EnumMap<>(ChemEl.class);
|
14 |
+
private static final Map<ChemEl, Integer> elementToHwPriority = new EnumMap<>(ChemEl.class);
|
15 |
+
|
16 |
+
static{
|
17 |
+
elementToPaulingElectronegativity.put(ChemEl.H, 2.20);
|
18 |
+
elementToPaulingElectronegativity.put(ChemEl.Li, 0.98);
|
19 |
+
elementToPaulingElectronegativity.put(ChemEl.Be, 1.57);
|
20 |
+
elementToPaulingElectronegativity.put(ChemEl.B, 2.04);
|
21 |
+
elementToPaulingElectronegativity.put(ChemEl.C, 2.55);
|
22 |
+
elementToPaulingElectronegativity.put(ChemEl.N, 3.04);
|
23 |
+
elementToPaulingElectronegativity.put(ChemEl.O, 3.44);
|
24 |
+
elementToPaulingElectronegativity.put(ChemEl.F, 3.98);
|
25 |
+
elementToPaulingElectronegativity.put(ChemEl.Na, 0.93);
|
26 |
+
elementToPaulingElectronegativity.put(ChemEl.Mg, 1.31);
|
27 |
+
elementToPaulingElectronegativity.put(ChemEl.Al, 1.61);
|
28 |
+
elementToPaulingElectronegativity.put(ChemEl.Si, 1.90);
|
29 |
+
elementToPaulingElectronegativity.put(ChemEl.P, 2.19);
|
30 |
+
elementToPaulingElectronegativity.put(ChemEl.S, 2.58);
|
31 |
+
elementToPaulingElectronegativity.put(ChemEl.Cl, 3.16);
|
32 |
+
elementToPaulingElectronegativity.put(ChemEl.K, 0.82);
|
33 |
+
elementToPaulingElectronegativity.put(ChemEl.Ca, 1.00);
|
34 |
+
elementToPaulingElectronegativity.put(ChemEl.Sc, 1.36);
|
35 |
+
elementToPaulingElectronegativity.put(ChemEl.Ti, 1.54);
|
36 |
+
elementToPaulingElectronegativity.put(ChemEl.V, 1.63);
|
37 |
+
elementToPaulingElectronegativity.put(ChemEl.Cr, 1.66);
|
38 |
+
elementToPaulingElectronegativity.put(ChemEl.Mn, 1.55);
|
39 |
+
elementToPaulingElectronegativity.put(ChemEl.Fe, 1.83);
|
40 |
+
elementToPaulingElectronegativity.put(ChemEl.Co, 1.88);
|
41 |
+
elementToPaulingElectronegativity.put(ChemEl.Ni, 1.91);
|
42 |
+
elementToPaulingElectronegativity.put(ChemEl.Cu, 1.90);
|
43 |
+
elementToPaulingElectronegativity.put(ChemEl.Zn, 1.65);
|
44 |
+
elementToPaulingElectronegativity.put(ChemEl.Ga, 1.81);
|
45 |
+
elementToPaulingElectronegativity.put(ChemEl.Ge, 2.01);
|
46 |
+
elementToPaulingElectronegativity.put(ChemEl.As, 2.18);
|
47 |
+
elementToPaulingElectronegativity.put(ChemEl.Se, 2.55);
|
48 |
+
elementToPaulingElectronegativity.put(ChemEl.Br, 2.96);
|
49 |
+
elementToPaulingElectronegativity.put(ChemEl.Kr, 3.00);
|
50 |
+
elementToPaulingElectronegativity.put(ChemEl.Rb, 0.82);
|
51 |
+
elementToPaulingElectronegativity.put(ChemEl.Sr, 0.95);
|
52 |
+
elementToPaulingElectronegativity.put(ChemEl.Y, 1.22);
|
53 |
+
elementToPaulingElectronegativity.put(ChemEl.Zr, 1.33);
|
54 |
+
elementToPaulingElectronegativity.put(ChemEl.Nb, 1.6);
|
55 |
+
elementToPaulingElectronegativity.put(ChemEl.Mo, 2.16);
|
56 |
+
elementToPaulingElectronegativity.put(ChemEl.Tc, 1.9);
|
57 |
+
elementToPaulingElectronegativity.put(ChemEl.Ru, 2.2);
|
58 |
+
elementToPaulingElectronegativity.put(ChemEl.Rh, 2.28);
|
59 |
+
elementToPaulingElectronegativity.put(ChemEl.Pd, 2.20);
|
60 |
+
elementToPaulingElectronegativity.put(ChemEl.Ag, 1.93);
|
61 |
+
elementToPaulingElectronegativity.put(ChemEl.Cd, 1.69);
|
62 |
+
elementToPaulingElectronegativity.put(ChemEl.In, 1.78);
|
63 |
+
elementToPaulingElectronegativity.put(ChemEl.Sn, 1.96);
|
64 |
+
elementToPaulingElectronegativity.put(ChemEl.Sb, 2.05);
|
65 |
+
elementToPaulingElectronegativity.put(ChemEl.Te, 2.1);
|
66 |
+
elementToPaulingElectronegativity.put(ChemEl.I, 2.66);
|
67 |
+
elementToPaulingElectronegativity.put(ChemEl.Xe, 2.60);
|
68 |
+
elementToPaulingElectronegativity.put(ChemEl.Cs, 0.79);
|
69 |
+
elementToPaulingElectronegativity.put(ChemEl.Ba, 0.89);
|
70 |
+
elementToPaulingElectronegativity.put(ChemEl.La, 1.1);
|
71 |
+
elementToPaulingElectronegativity.put(ChemEl.Ce, 1.12);
|
72 |
+
elementToPaulingElectronegativity.put(ChemEl.Pr, 1.13);
|
73 |
+
elementToPaulingElectronegativity.put(ChemEl.Nd, 1.14);
|
74 |
+
elementToPaulingElectronegativity.put(ChemEl.Pm, 1.13);
|
75 |
+
elementToPaulingElectronegativity.put(ChemEl.Sm, 1.17);
|
76 |
+
elementToPaulingElectronegativity.put(ChemEl.Eu, 1.2);
|
77 |
+
elementToPaulingElectronegativity.put(ChemEl.Gd, 1.2);
|
78 |
+
elementToPaulingElectronegativity.put(ChemEl.Tb, 1.1);
|
79 |
+
elementToPaulingElectronegativity.put(ChemEl.Dy, 1.22);
|
80 |
+
elementToPaulingElectronegativity.put(ChemEl.Ho, 1.23);
|
81 |
+
elementToPaulingElectronegativity.put(ChemEl.Er, 1.24);
|
82 |
+
elementToPaulingElectronegativity.put(ChemEl.Tm, 1.25);
|
83 |
+
elementToPaulingElectronegativity.put(ChemEl.Yb, 1.1);
|
84 |
+
elementToPaulingElectronegativity.put(ChemEl.Lu, 1.27);
|
85 |
+
elementToPaulingElectronegativity.put(ChemEl.Hf, 1.3);
|
86 |
+
elementToPaulingElectronegativity.put(ChemEl.Ta, 1.5);
|
87 |
+
elementToPaulingElectronegativity.put(ChemEl.W, 2.36);
|
88 |
+
elementToPaulingElectronegativity.put(ChemEl.Re, 1.9);
|
89 |
+
elementToPaulingElectronegativity.put(ChemEl.Os, 2.2);
|
90 |
+
elementToPaulingElectronegativity.put(ChemEl.Ir, 2.20);
|
91 |
+
elementToPaulingElectronegativity.put(ChemEl.Pt, 2.28);
|
92 |
+
elementToPaulingElectronegativity.put(ChemEl.Au, 2.54);
|
93 |
+
elementToPaulingElectronegativity.put(ChemEl.Hg, 2.00);
|
94 |
+
elementToPaulingElectronegativity.put(ChemEl.Tl, 1.62);
|
95 |
+
elementToPaulingElectronegativity.put(ChemEl.Pb, 2.33);
|
96 |
+
elementToPaulingElectronegativity.put(ChemEl.Bi, 2.02);
|
97 |
+
elementToPaulingElectronegativity.put(ChemEl.Po, 2.0);
|
98 |
+
elementToPaulingElectronegativity.put(ChemEl.At, 2.2);
|
99 |
+
elementToPaulingElectronegativity.put(ChemEl.Rn, 2.2);
|
100 |
+
elementToPaulingElectronegativity.put(ChemEl.Fr, 0.7);
|
101 |
+
elementToPaulingElectronegativity.put(ChemEl.Ra, 0.9);
|
102 |
+
elementToPaulingElectronegativity.put(ChemEl.Ac, 1.1);
|
103 |
+
elementToPaulingElectronegativity.put(ChemEl.Th, 1.3);
|
104 |
+
elementToPaulingElectronegativity.put(ChemEl.Pa, 1.5);
|
105 |
+
elementToPaulingElectronegativity.put(ChemEl.U, 1.38);
|
106 |
+
elementToPaulingElectronegativity.put(ChemEl.Np, 1.36);
|
107 |
+
elementToPaulingElectronegativity.put(ChemEl.Pu, 1.28);
|
108 |
+
elementToPaulingElectronegativity.put(ChemEl.Am, 1.13);
|
109 |
+
elementToPaulingElectronegativity.put(ChemEl.Cm, 1.28);
|
110 |
+
elementToPaulingElectronegativity.put(ChemEl.Bk, 1.3);
|
111 |
+
elementToPaulingElectronegativity.put(ChemEl.Cf, 1.3);
|
112 |
+
elementToPaulingElectronegativity.put(ChemEl.Es, 1.3);
|
113 |
+
elementToPaulingElectronegativity.put(ChemEl.Fm, 1.3);
|
114 |
+
elementToPaulingElectronegativity.put(ChemEl.Md, 1.3);
|
115 |
+
elementToPaulingElectronegativity.put(ChemEl.No, 1.3);
|
116 |
+
elementToPaulingElectronegativity.put(ChemEl.Lr, 1.3);
|
117 |
+
|
118 |
+
elementToHwPriority.put(ChemEl.F, 23);
|
119 |
+
elementToHwPriority.put(ChemEl.Cl, 22);
|
120 |
+
elementToHwPriority.put(ChemEl.Br, 21);
|
121 |
+
elementToHwPriority.put(ChemEl.I, 20);
|
122 |
+
elementToHwPriority.put(ChemEl.O, 19);
|
123 |
+
elementToHwPriority.put(ChemEl.S, 18);
|
124 |
+
elementToHwPriority.put(ChemEl.Se, 17);
|
125 |
+
elementToHwPriority.put(ChemEl.Te, 16);
|
126 |
+
elementToHwPriority.put(ChemEl.N, 15);
|
127 |
+
elementToHwPriority.put(ChemEl.P, 14);
|
128 |
+
elementToHwPriority.put(ChemEl.As, 13);
|
129 |
+
elementToHwPriority.put(ChemEl.Sb, 12);
|
130 |
+
elementToHwPriority.put(ChemEl.Bi, 11);
|
131 |
+
elementToHwPriority.put(ChemEl.Si, 10);
|
132 |
+
elementToHwPriority.put(ChemEl.Ge, 9);
|
133 |
+
elementToHwPriority.put(ChemEl.Sn, 8);
|
134 |
+
elementToHwPriority.put(ChemEl.Pb, 7);
|
135 |
+
elementToHwPriority.put(ChemEl.B, 6);
|
136 |
+
elementToHwPriority.put(ChemEl.Al, 5);
|
137 |
+
elementToHwPriority.put(ChemEl.Ga, 4);
|
138 |
+
elementToHwPriority.put(ChemEl.In, 3);
|
139 |
+
elementToHwPriority.put(ChemEl.Tl, 2);
|
140 |
+
elementToHwPriority.put(ChemEl.Hg, 1);
|
141 |
+
}
|
142 |
+
|
143 |
+
/**
|
144 |
+
* Useful to give an indication of whether a bond is like to be ionic (diff >1.8), polar or covalent (diff < 1.2)
|
145 |
+
* @param chemEl
|
146 |
+
* @return
|
147 |
+
*/
|
148 |
+
static Double getPaulingElectronegativity(ChemEl chemEl) {
|
149 |
+
return elementToPaulingElectronegativity.get(chemEl);
|
150 |
+
}
|
151 |
+
|
152 |
+
/**
|
153 |
+
* Maps chemEl to the priority of that atom in Hantzch-Widman system. A higher value indicates a higher priority.
|
154 |
+
* @param chemEl
|
155 |
+
* @return
|
156 |
+
*/
|
157 |
+
static Integer getHwpriority(ChemEl chemEl) {
|
158 |
+
return elementToHwPriority.get(chemEl);
|
159 |
+
}
|
160 |
+
}
|
TransAntivirus/download_pubchem/opsin-master/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/Attribute.java
ADDED
@@ -0,0 +1,74 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
package uk.ac.cam.ch.wwmm.opsin;
|
2 |
+
|
3 |
+
class Attribute {
|
4 |
+
|
5 |
+
private final String name;
|
6 |
+
private String value;
|
7 |
+
|
8 |
+
Attribute(String name, String value) {
|
9 |
+
this.name = name;
|
10 |
+
this.value = value;
|
11 |
+
}
|
12 |
+
|
13 |
+
/**
|
14 |
+
* Creates a copy
|
15 |
+
* @param attribute
|
16 |
+
*/
|
17 |
+
Attribute(Attribute attribute) {
|
18 |
+
this.name = attribute.getName();
|
19 |
+
this.value = attribute.getValue();
|
20 |
+
}
|
21 |
+
|
22 |
+
String getValue() {
|
23 |
+
return value;
|
24 |
+
}
|
25 |
+
|
26 |
+
String getName() {
|
27 |
+
return name;
|
28 |
+
}
|
29 |
+
|
30 |
+
void setValue(String value) {
|
31 |
+
this.value = value;
|
32 |
+
}
|
33 |
+
|
34 |
+
String toXML() {
|
35 |
+
return getName() + "=\"" + escapeText(value) + "\"";
|
36 |
+
}
|
37 |
+
|
38 |
+
public String toString() {
|
39 |
+
return name +"\t" + value;
|
40 |
+
}
|
41 |
+
|
42 |
+
private String escapeText(String s) {
|
43 |
+
StringBuilder result = new StringBuilder();
|
44 |
+
for (int i = 0, l = s.length(); i < l; i++) {
|
45 |
+
char c = s.charAt(i);
|
46 |
+
switch (c) {
|
47 |
+
case '\t':
|
48 |
+
result.append("	");
|
49 |
+
break;
|
50 |
+
case '\n':
|
51 |
+
result.append("
");
|
52 |
+
break;
|
53 |
+
case '\r':
|
54 |
+
result.append("
");
|
55 |
+
break;
|
56 |
+
case '"':
|
57 |
+
result.append(""");
|
58 |
+
break;
|
59 |
+
case '&':
|
60 |
+
result.append("&");
|
61 |
+
break;
|
62 |
+
case '<':
|
63 |
+
result.append("<");
|
64 |
+
break;
|
65 |
+
case '>':
|
66 |
+
result.append(">");
|
67 |
+
break;
|
68 |
+
default:
|
69 |
+
result.append(c);
|
70 |
+
}
|
71 |
+
}
|
72 |
+
return result.toString();
|
73 |
+
}
|
74 |
+
}
|
TransAntivirus/download_pubchem/opsin-master/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/AutomatonInitialiser.java
ADDED
@@ -0,0 +1,105 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
package uk.ac.cam.ch.wwmm.opsin;
|
2 |
+
|
3 |
+
import java.io.BufferedInputStream;
|
4 |
+
import java.io.IOException;
|
5 |
+
import java.io.InputStream;
|
6 |
+
import java.io.OutputStream;
|
7 |
+
import java.nio.charset.StandardCharsets;
|
8 |
+
|
9 |
+
import org.apache.logging.log4j.LogManager;
|
10 |
+
import org.apache.logging.log4j.Logger;
|
11 |
+
|
12 |
+
import dk.brics.automaton.Automaton;
|
13 |
+
import dk.brics.automaton.RegExp;
|
14 |
+
import dk.brics.automaton.RunAutomaton;
|
15 |
+
import dk.brics.automaton.SpecialOperations;
|
16 |
+
|
17 |
+
/**
|
18 |
+
* Handles storing and retrieving automata to/from files
|
19 |
+
* This is highly useful to do as building these deterministic automata from scratch can take minutes
|
20 |
+
* @author dl387
|
21 |
+
*
|
22 |
+
*/
|
23 |
+
class AutomatonInitialiser {
|
24 |
+
|
25 |
+
private static final Logger LOG = LogManager.getLogger(AutomatonInitialiser.class);
|
26 |
+
private final ResourceGetter resourceGetter;
|
27 |
+
|
28 |
+
AutomatonInitialiser(String resourcePath) {
|
29 |
+
resourceGetter = new ResourceGetter(resourcePath);
|
30 |
+
}
|
31 |
+
|
32 |
+
/**
|
33 |
+
* In preference serialised automata and their hashes will be looked for in the resource folder in your working directory
|
34 |
+
* If it cannot be found there then these files will be looked for in the standard resource folder
|
35 |
+
* (this is actually the standard behaviour of the resourceGetter but I'm reiterating it here as if the stored hash doesn't match
|
36 |
+
* the current hash then the creation of an updated serialised automaton and hash will occur in the working directory resource folder as the standard
|
37 |
+
* resource folder will not typically be writable)
|
38 |
+
* @param automatonName : A name for the automaton so that it can it can be saved/loaded from disk
|
39 |
+
* @param regex : the regex from which to build the RunAutomaton
|
40 |
+
* @param reverseAutomaton : should the automaton be reversed
|
41 |
+
* @param tableize: if true, a transition table is created which makes the run method faster in return of a higher memory usage (adds ~256kb)
|
42 |
+
* @return A RunAutomaton, may have been built from scratch or loaded from a file
|
43 |
+
*/
|
44 |
+
RunAutomaton loadAutomaton(String automatonName, String regex, boolean tableize, boolean reverseAutomaton) {
|
45 |
+
if (reverseAutomaton){
|
46 |
+
automatonName+="_reversed_";
|
47 |
+
}
|
48 |
+
try{
|
49 |
+
if (isAutomatonCached(automatonName, regex)) {
|
50 |
+
return loadCachedAutomaton(automatonName);
|
51 |
+
}
|
52 |
+
}
|
53 |
+
catch (IOException e) {
|
54 |
+
LOG.warn("Error loading cached automaton: "+automatonName, e);
|
55 |
+
}
|
56 |
+
RunAutomaton automaton = createAutomaton(regex, tableize, reverseAutomaton);
|
57 |
+
cacheAutomaton(automatonName, automaton, regex);
|
58 |
+
return automaton;
|
59 |
+
}
|
60 |
+
|
61 |
+
private boolean isAutomatonCached(String automatonName, String regex) {
|
62 |
+
String currentRegexHash = getRegexHash(regex);
|
63 |
+
String cachedRegexHash = getCachedRegexHash(automatonName);
|
64 |
+
return currentRegexHash.equals(cachedRegexHash);
|
65 |
+
}
|
66 |
+
|
67 |
+
private String getRegexHash(String regex) {
|
68 |
+
return Integer.toString(regex.hashCode());
|
69 |
+
}
|
70 |
+
|
71 |
+
private String getCachedRegexHash(String automatonName) {
|
72 |
+
/*This file contains the hashcode of the regex which was used to generate the automaton on the disk */
|
73 |
+
return resourceGetter.getFileContentsAsString(automatonName + "RegexHash.txt");
|
74 |
+
}
|
75 |
+
|
76 |
+
private RunAutomaton loadCachedAutomaton(String automatonName) throws IOException{
|
77 |
+
try (InputStream automatonInput = resourceGetter.getInputstreamFromFileName(automatonName +"SerialisedAutomaton.aut")){
|
78 |
+
return RunAutomaton.load(new BufferedInputStream(automatonInput));
|
79 |
+
} catch (Exception e) {
|
80 |
+
IOException ioe = new IOException("Error loading automaton");
|
81 |
+
ioe.initCause(e);
|
82 |
+
throw ioe;
|
83 |
+
}
|
84 |
+
}
|
85 |
+
|
86 |
+
private static RunAutomaton createAutomaton(String regex, boolean tableize, boolean reverseAutomaton) {
|
87 |
+
Automaton a = new RegExp(regex).toAutomaton();
|
88 |
+
if (reverseAutomaton){
|
89 |
+
SpecialOperations.reverse(a);
|
90 |
+
}
|
91 |
+
return new RunAutomaton(a, tableize);
|
92 |
+
}
|
93 |
+
|
94 |
+
private void cacheAutomaton(String automatonName, RunAutomaton automaton, String regex) {
|
95 |
+
try (OutputStream regexHashOutputStream = resourceGetter.getOutputStream(automatonName + "RegexHash.txt")) {
|
96 |
+
regexHashOutputStream.write(getRegexHash(regex).getBytes(StandardCharsets.UTF_8));
|
97 |
+
try (OutputStream automatonOutputStream = resourceGetter.getOutputStream(automatonName + "SerialisedAutomaton.aut")) {
|
98 |
+
automaton.store(automatonOutputStream);
|
99 |
+
}
|
100 |
+
} catch (IOException e) {
|
101 |
+
LOG.warn("Error serialising automaton: "+automatonName, e);
|
102 |
+
}
|
103 |
+
}
|
104 |
+
|
105 |
+
}
|
TransAntivirus/download_pubchem/opsin-master/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/Bond.java
ADDED
@@ -0,0 +1,184 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
package uk.ac.cam.ch.wwmm.opsin;
|
2 |
+
|
3 |
+
import uk.ac.cam.ch.wwmm.opsin.BondStereo.BondStereoValue;
|
4 |
+
|
5 |
+
/**A bond, between two atoms.
|
6 |
+
*
|
7 |
+
* @author ptc24
|
8 |
+
* @author dl387
|
9 |
+
*
|
10 |
+
*/
|
11 |
+
class Bond {
|
12 |
+
/** The Atom the bond comes from */
|
13 |
+
private final Atom from;
|
14 |
+
/** The Atom the bond goes to */
|
15 |
+
private final Atom to;
|
16 |
+
/** The bond order */
|
17 |
+
private int order;
|
18 |
+
|
19 |
+
static enum SMILES_BOND_DIRECTION{
|
20 |
+
RSLASH,
|
21 |
+
LSLASH
|
22 |
+
}
|
23 |
+
/** If this bond was built from SMILES can be set to either RSLASH or LSLASH. Subsequently read to add a bondStereoElement
|
24 |
+
* null by default*/
|
25 |
+
private SMILES_BOND_DIRECTION smilesBondDirection = null;
|
26 |
+
|
27 |
+
/**
|
28 |
+
* Holds the bondStereo object associated with this bond
|
29 |
+
* null by default
|
30 |
+
*/
|
31 |
+
private BondStereo bondStereo = null;
|
32 |
+
|
33 |
+
/** DO NOT CALL DIRECTLY EXCEPT FOR TESTING
|
34 |
+
* Creates a new Bond.
|
35 |
+
*
|
36 |
+
* @param from The Atom the bond comes from.
|
37 |
+
* @param to The Atom the bond goes to.
|
38 |
+
* @param order The bond order.
|
39 |
+
*/
|
40 |
+
Bond(Atom from, Atom to, int order) {
|
41 |
+
if (from == to){
|
42 |
+
throw new IllegalArgumentException("Bonds must be made between different atoms");
|
43 |
+
}
|
44 |
+
if (order < 1 || order > 3){
|
45 |
+
throw new IllegalArgumentException("Bond order must be 1, 2 or 3");
|
46 |
+
}
|
47 |
+
if (from == null){
|
48 |
+
throw new IllegalArgumentException("From atom was null!");
|
49 |
+
}
|
50 |
+
if (to == null){
|
51 |
+
throw new IllegalArgumentException("To atom was null!");
|
52 |
+
}
|
53 |
+
this.from = from;
|
54 |
+
this.to = to;
|
55 |
+
this.order = order;
|
56 |
+
}
|
57 |
+
|
58 |
+
/**
|
59 |
+
* Gets from ID
|
60 |
+
* @return ID
|
61 |
+
*/
|
62 |
+
int getFrom() {
|
63 |
+
return from.getID();
|
64 |
+
}
|
65 |
+
|
66 |
+
/**
|
67 |
+
* Gets to ID
|
68 |
+
* @return ID
|
69 |
+
*/
|
70 |
+
int getTo() {
|
71 |
+
return to.getID();
|
72 |
+
}
|
73 |
+
|
74 |
+
/**Gets order.
|
75 |
+
* @return*/
|
76 |
+
int getOrder() {
|
77 |
+
return order;
|
78 |
+
}
|
79 |
+
|
80 |
+
/**Sets order.
|
81 |
+
* @param order*/
|
82 |
+
void setOrder(int order) {
|
83 |
+
this.order = order;
|
84 |
+
}
|
85 |
+
|
86 |
+
/**
|
87 |
+
* Gets from Atom
|
88 |
+
* @return Atom
|
89 |
+
*/
|
90 |
+
Atom getFromAtom() {
|
91 |
+
return from;
|
92 |
+
}
|
93 |
+
|
94 |
+
/**
|
95 |
+
* Gets to Atom
|
96 |
+
* @return Atom
|
97 |
+
*/
|
98 |
+
Atom getToAtom() {
|
99 |
+
return to;
|
100 |
+
}
|
101 |
+
|
102 |
+
/**Adds to the bond order.
|
103 |
+
*
|
104 |
+
* @param o The value to be added to the bond order.
|
105 |
+
*/
|
106 |
+
void addOrder(int o) {
|
107 |
+
order += o;
|
108 |
+
}
|
109 |
+
|
110 |
+
/**
|
111 |
+
* Returns either null or RSLASH or LSLASH
|
112 |
+
* @return
|
113 |
+
*/
|
114 |
+
SMILES_BOND_DIRECTION getSmilesStereochemistry() {
|
115 |
+
return smilesBondDirection;
|
116 |
+
}
|
117 |
+
|
118 |
+
void setSmilesStereochemistry(SMILES_BOND_DIRECTION bondDirection) {
|
119 |
+
this.smilesBondDirection = bondDirection;
|
120 |
+
}
|
121 |
+
|
122 |
+
BondStereo getBondStereo() {
|
123 |
+
return bondStereo;
|
124 |
+
}
|
125 |
+
|
126 |
+
void setBondStereo(BondStereo bondStereo) {
|
127 |
+
this.bondStereo = bondStereo;
|
128 |
+
}
|
129 |
+
|
130 |
+
void setBondStereoElement(Atom[] atomRefs4, BondStereoValue cOrT) {
|
131 |
+
bondStereo = new BondStereo(atomRefs4, cOrT);
|
132 |
+
}
|
133 |
+
|
134 |
+
/**
|
135 |
+
* Returns the atom at the other end of the bond to given atom
|
136 |
+
* @param atom
|
137 |
+
* @return
|
138 |
+
*/
|
139 |
+
Atom getOtherAtom(Atom atom) {
|
140 |
+
if (from == atom){
|
141 |
+
return to;
|
142 |
+
}
|
143 |
+
else if (to == atom){
|
144 |
+
return from;
|
145 |
+
}
|
146 |
+
else{
|
147 |
+
return null;
|
148 |
+
}
|
149 |
+
}
|
150 |
+
|
151 |
+
@Override
|
152 |
+
public int hashCode() {
|
153 |
+
final int prime = 31;
|
154 |
+
int result = 1;
|
155 |
+
result = prime * result + from.getID();
|
156 |
+
result = prime * result + to.getID();
|
157 |
+
return result;
|
158 |
+
}
|
159 |
+
|
160 |
+
@Override
|
161 |
+
public boolean equals(Object obj) {
|
162 |
+
if (this == obj) {
|
163 |
+
return true;
|
164 |
+
}
|
165 |
+
if (obj == null) {
|
166 |
+
return false;
|
167 |
+
}
|
168 |
+
if (getClass() != obj.getClass()) {
|
169 |
+
return false;
|
170 |
+
}
|
171 |
+
Bond other = (Bond) obj;
|
172 |
+
|
173 |
+
if (from == other.from &&
|
174 |
+
to == other.to){
|
175 |
+
return true;
|
176 |
+
}
|
177 |
+
if (from == other.to &&
|
178 |
+
to == other.from){
|
179 |
+
return true;
|
180 |
+
}
|
181 |
+
|
182 |
+
return false;
|
183 |
+
}
|
184 |
+
}
|
TransAntivirus/download_pubchem/opsin-master/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/BondStereo.java
ADDED
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
package uk.ac.cam.ch.wwmm.opsin;
|
2 |
+
|
3 |
+
/**
|
4 |
+
* Holds information about the positions of 2 atoms relative to a double bond allowing the specification of cis/trans stereochemistry
|
5 |
+
* @author dl387
|
6 |
+
*
|
7 |
+
*/
|
8 |
+
class BondStereo {
|
9 |
+
|
10 |
+
private Atom[] atomRefs4;
|
11 |
+
private BondStereoValue bondStereoValue;
|
12 |
+
|
13 |
+
/**
|
14 |
+
* Possible values for a bondStereo element
|
15 |
+
* @author dl387
|
16 |
+
*
|
17 |
+
*/
|
18 |
+
enum BondStereoValue{
|
19 |
+
CIS("C"),
|
20 |
+
TRANS("T");
|
21 |
+
|
22 |
+
private final String value;
|
23 |
+
BondStereoValue(String value){
|
24 |
+
this.value = value;
|
25 |
+
}
|
26 |
+
@Override
|
27 |
+
public String toString() {
|
28 |
+
return value;
|
29 |
+
}
|
30 |
+
}
|
31 |
+
|
32 |
+
/**
|
33 |
+
* Create a bondStereo from an array of 4 atoms. The 2nd and 3rd atoms of this array are connected via a double bond.
|
34 |
+
* The 1st and 4th atoms are at either end of this bond and indication is given as to whether they are cis or trans to each other.
|
35 |
+
* @param atomRefs4
|
36 |
+
* @param cOrT
|
37 |
+
*/
|
38 |
+
BondStereo(Atom[] atomRefs4, BondStereoValue cOrT) {
|
39 |
+
if (atomRefs4.length !=4){
|
40 |
+
throw new IllegalArgumentException("atomRefs4 must contain references to 4 atoms");
|
41 |
+
}
|
42 |
+
this.atomRefs4 = atomRefs4;
|
43 |
+
this.bondStereoValue = cOrT;
|
44 |
+
}
|
45 |
+
|
46 |
+
Atom[] getAtomRefs4() {
|
47 |
+
return atomRefs4;
|
48 |
+
}
|
49 |
+
void setAtomRefs4(Atom[] atomRefs4) {
|
50 |
+
this.atomRefs4 = atomRefs4;
|
51 |
+
}
|
52 |
+
BondStereoValue getBondStereoValue() {
|
53 |
+
return bondStereoValue;
|
54 |
+
}
|
55 |
+
void setBondStereoValue(BondStereoValue bondStereoValue) {
|
56 |
+
this.bondStereoValue = bondStereoValue;
|
57 |
+
}
|
58 |
+
}
|
TransAntivirus/download_pubchem/opsin-master/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/BuildResults.java
ADDED
@@ -0,0 +1,150 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
package uk.ac.cam.ch.wwmm.opsin;
|
2 |
+
|
3 |
+
import java.util.ArrayList;
|
4 |
+
import java.util.Collections;
|
5 |
+
import java.util.LinkedHashSet;
|
6 |
+
import java.util.List;
|
7 |
+
import java.util.Set;
|
8 |
+
|
9 |
+
/**
|
10 |
+
* A "struct" to hold the results of fragment building.
|
11 |
+
* @author dl387
|
12 |
+
*
|
13 |
+
*/
|
14 |
+
class BuildResults {
|
15 |
+
/**Holds the atoms that are currently marked as radicals. An atom may be listed twice for say diyl
|
16 |
+
* Typically these will be utilised by a word rule e.g. the ethyl of ethyl ethanoate has one
|
17 |
+
* Also holds the order of the bond that will be created when it is used (valency)
|
18 |
+
* setExplicitly says whether the outAtom absolutely definitely refers to that atom or not.
|
19 |
+
* e.g. propyl is stored as prop-1-yl with this set to false while prop-2-yl has it set to true
|
20 |
+
* These OutAtoms are the same objects as are present in the fragments*/
|
21 |
+
private final List<OutAtom> outAtoms = new ArrayList<>();
|
22 |
+
|
23 |
+
/**The atoms that may be used to from things like esters*/
|
24 |
+
private final List<FunctionalAtom> functionalAtoms = new ArrayList<>();
|
25 |
+
|
26 |
+
/**A list of fragments that have been evaluated to form this BuildResults. They are in the order they would be found in the XML*/
|
27 |
+
private final Set<Fragment> fragments = new LinkedHashSet<>();
|
28 |
+
|
29 |
+
/**A BuildResults is constructed from a list of Fragments.
|
30 |
+
* This constructor creates this list from the groups present in an XML word/bracket/sub element.
|
31 |
+
* @param wordSubOrBracket*/
|
32 |
+
BuildResults(Element wordSubOrBracket) {
|
33 |
+
List<Element> groups = OpsinTools.getDescendantElementsWithTagName(wordSubOrBracket, XmlDeclarations.GROUP_EL);
|
34 |
+
for (Element group : groups) {
|
35 |
+
Fragment frag = group.getFrag();
|
36 |
+
fragments.add(frag);
|
37 |
+
for (int i = 0, l = frag.getOutAtomCount(); i < l; i++) {
|
38 |
+
outAtoms.add(frag.getOutAtom(i));
|
39 |
+
}
|
40 |
+
int functionalAtomCount = frag.getFunctionalAtomCount();
|
41 |
+
if (functionalAtomCount > 0){
|
42 |
+
Element parent = group.getParent();
|
43 |
+
if (parent.getName().equals(XmlDeclarations.ROOT_EL) ||
|
44 |
+
OpsinTools.getNextGroup(group) == null) {
|
45 |
+
for (int i = 0; i < functionalAtomCount; i++) {
|
46 |
+
functionalAtoms.add(frag.getFunctionalAtom(i));
|
47 |
+
}
|
48 |
+
}
|
49 |
+
}
|
50 |
+
}
|
51 |
+
}
|
52 |
+
|
53 |
+
/**
|
54 |
+
* Construct a blank buildResults
|
55 |
+
*/
|
56 |
+
BuildResults() {}
|
57 |
+
|
58 |
+
/**
|
59 |
+
* Returns a read only view of the fragments in this BuildResults
|
60 |
+
* @return
|
61 |
+
*/
|
62 |
+
Set<Fragment> getFragments(){
|
63 |
+
return Collections.unmodifiableSet(fragments);
|
64 |
+
}
|
65 |
+
|
66 |
+
int getFragmentCount(){
|
67 |
+
return fragments.size();
|
68 |
+
}
|
69 |
+
|
70 |
+
OutAtom getOutAtom(int i) {
|
71 |
+
return outAtoms.get(i);
|
72 |
+
}
|
73 |
+
|
74 |
+
int getOutAtomCount() {
|
75 |
+
return outAtoms.size();
|
76 |
+
}
|
77 |
+
|
78 |
+
OutAtom removeOutAtom(int i) {
|
79 |
+
OutAtom outAtom = outAtoms.get(i);
|
80 |
+
outAtom.getAtom().getFrag().removeOutAtom(outAtom);
|
81 |
+
return outAtoms.remove(i);
|
82 |
+
}
|
83 |
+
|
84 |
+
void removeAllOutAtoms() {
|
85 |
+
for (int i = outAtoms.size() -1; i >=0 ; i--) {
|
86 |
+
removeOutAtom(i);
|
87 |
+
}
|
88 |
+
}
|
89 |
+
|
90 |
+
/**
|
91 |
+
* Returns the atom corresponding to position i in the functionalAtoms list
|
92 |
+
* @param i index
|
93 |
+
* @return atom
|
94 |
+
*/
|
95 |
+
Atom getFunctionalAtom(int i) {
|
96 |
+
return functionalAtoms.get(i).getAtom();
|
97 |
+
}
|
98 |
+
|
99 |
+
FunctionalAtom removeFunctionalAtom(int i) {
|
100 |
+
FunctionalAtom functionalAtom = functionalAtoms.get(i);
|
101 |
+
functionalAtom.getAtom().getFrag().removeFunctionalAtom(functionalAtom);
|
102 |
+
return functionalAtoms.remove(i);
|
103 |
+
}
|
104 |
+
|
105 |
+
int getFunctionalAtomCount(){
|
106 |
+
return functionalAtoms.size();
|
107 |
+
}
|
108 |
+
|
109 |
+
/**
|
110 |
+
* Returns the first OutAtom
|
111 |
+
* @return OutAtom
|
112 |
+
*/
|
113 |
+
OutAtom getFirstOutAtom() {
|
114 |
+
return outAtoms.get(0);
|
115 |
+
}
|
116 |
+
|
117 |
+
/**
|
118 |
+
* Returns the atom corresponding to the given id assuming the atom the id corresponds to is within the list of fragment in this Buildresults
|
119 |
+
* @param id index
|
120 |
+
* @return atom
|
121 |
+
* @throws StructureBuildingException
|
122 |
+
*/
|
123 |
+
Atom getAtomByIdOrThrow(int id) throws StructureBuildingException {
|
124 |
+
for (Fragment fragment : fragments) {
|
125 |
+
Atom outAtom =fragment.getAtomByID(id);
|
126 |
+
if (outAtom != null){
|
127 |
+
return outAtom;
|
128 |
+
}
|
129 |
+
}
|
130 |
+
throw new StructureBuildingException("No fragment contained this id: " + id);
|
131 |
+
}
|
132 |
+
|
133 |
+
void mergeBuildResults(BuildResults otherBR) {
|
134 |
+
outAtoms.addAll(otherBR.outAtoms);
|
135 |
+
functionalAtoms.addAll(otherBR.functionalAtoms);
|
136 |
+
fragments.addAll(otherBR.fragments);
|
137 |
+
}
|
138 |
+
|
139 |
+
/**
|
140 |
+
* Returns the sum of the charges of the fragments in the buildResults
|
141 |
+
* @return
|
142 |
+
*/
|
143 |
+
int getCharge() {
|
144 |
+
int totalCharge = 0;
|
145 |
+
for (Fragment frag : fragments) {
|
146 |
+
totalCharge += frag.getCharge();
|
147 |
+
}
|
148 |
+
return totalCharge;
|
149 |
+
}
|
150 |
+
}
|
TransAntivirus/download_pubchem/opsin-master/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/BuildState.java
ADDED
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
package uk.ac.cam.ch.wwmm.opsin;
|
2 |
+
|
3 |
+
import java.util.ArrayList;
|
4 |
+
import java.util.HashMap;
|
5 |
+
import java.util.List;
|
6 |
+
|
7 |
+
import uk.ac.cam.ch.wwmm.opsin.OpsinWarning.OpsinWarningType;
|
8 |
+
|
9 |
+
/**
|
10 |
+
* Used to pass the current configuration and FragmentManager around
|
11 |
+
* The currentWordRule can be mutated to keep track of what the parent wordRule is at the given time
|
12 |
+
*
|
13 |
+
* @author dl387
|
14 |
+
*
|
15 |
+
*/
|
16 |
+
class BuildState {
|
17 |
+
|
18 |
+
final FragmentManager fragManager;
|
19 |
+
final HashMap<Element, List<Fragment>> xmlSuffixMap;
|
20 |
+
final NameToStructureConfig n2sConfig;
|
21 |
+
// counter is used for DL- racemic stereochemistry in oligomers, we place each one in a separate racemic group,
|
22 |
+
// there is implicitly one group in-case the input has a combination of (RS)- and then DL-
|
23 |
+
int numRacGrps = 1;
|
24 |
+
private final List<OpsinWarning> warnings = new ArrayList<>();
|
25 |
+
|
26 |
+
WordRule currentWordRule = null;
|
27 |
+
|
28 |
+
BuildState(NameToStructureConfig n2sConfig) {
|
29 |
+
this.n2sConfig = n2sConfig;
|
30 |
+
IDManager idManager = new IDManager();
|
31 |
+
fragManager = new FragmentManager(new SMILESFragmentBuilder(idManager), idManager);
|
32 |
+
xmlSuffixMap = new HashMap<>();
|
33 |
+
}
|
34 |
+
|
35 |
+
List<OpsinWarning> getWarnings() {
|
36 |
+
return warnings;
|
37 |
+
}
|
38 |
+
|
39 |
+
void addWarning(OpsinWarningType type, String message) {
|
40 |
+
warnings.add(new OpsinWarning(type, message));
|
41 |
+
}
|
42 |
+
|
43 |
+
void addIsAmbiguous(String message) {
|
44 |
+
warnings.add(new OpsinWarning(OpsinWarningType.APPEARS_AMBIGUOUS, message));
|
45 |
+
}
|
46 |
+
}
|
TransAntivirus/download_pubchem/opsin-master/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/CASTools.java
ADDED
@@ -0,0 +1,248 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
package uk.ac.cam.ch.wwmm.opsin;
|
2 |
+
|
3 |
+
import java.util.ArrayList;
|
4 |
+
import java.util.Arrays;
|
5 |
+
import java.util.List;
|
6 |
+
import java.util.regex.Matcher;
|
7 |
+
import java.util.regex.Pattern;
|
8 |
+
|
9 |
+
/**
|
10 |
+
* Tools for converting CAS nomenclature into IUPAC nomenclature.
|
11 |
+
* @author dl387
|
12 |
+
*/
|
13 |
+
class CASTools {
|
14 |
+
|
15 |
+
private static final Pattern matchCasCollectiveIndex = Pattern.compile("([\\[\\(\\{]([1-9][0-9]?[cC][iI][, ]?)+[\\]\\)\\}])+|[1-9][0-9]?[cC][iI]", Pattern.CASE_INSENSITIVE);
|
16 |
+
private static final Pattern matchAcid = Pattern.compile("acid[\\]\\)\\}]*", Pattern.CASE_INSENSITIVE);
|
17 |
+
private static final Pattern matchCommaSpace = Pattern.compile(", ");
|
18 |
+
private static final Pattern matchCompoundWithPhrase = Pattern.compile("(compd\\. with|compound with|and) ", Pattern.CASE_INSENSITIVE);
|
19 |
+
private static final Pattern matchFunctionalTermAllowingSubstituentPrefix = Pattern.compile("(amide|hydrazide|(thi|selen|tellur)?oxime|hydrazone|(iso)?(semicarbazone|thiosemicarbazone|selenosemicarbazone|tellurosemicarbazone)|imide|imine|semioxamazone)[\\]\\)\\}]*", Pattern.CASE_INSENSITIVE);
|
20 |
+
|
21 |
+
/**
|
22 |
+
* Inverts a CAS name.
|
23 |
+
* Throws an exception is OPSIN is unable to determine whether something is a substituent or functional term
|
24 |
+
* or if something unexpected in a CAS name is encountered
|
25 |
+
* @param name
|
26 |
+
* @return
|
27 |
+
* @throws ParsingException
|
28 |
+
*/
|
29 |
+
static String uninvertCASName(String name, ParseRules parseRules) throws ParsingException {
|
30 |
+
List<String> nameComponents = new ArrayList<>(Arrays.asList(matchCommaSpace.split(name)));
|
31 |
+
List<String> substituents = new ArrayList<>();
|
32 |
+
List<String> seperateWordSubstituents = new ArrayList<>();
|
33 |
+
List<String> functionalTerms = new ArrayList<>();
|
34 |
+
|
35 |
+
String parent = nameComponents.get(0);
|
36 |
+
String[] parentNameParts = parent.split(" ");
|
37 |
+
if (parentNameParts.length != 1) {
|
38 |
+
if (matchCasCollectiveIndex.matcher(parentNameParts[parentNameParts.length - 1]).matches()) {//CAS collective index description should be ignored
|
39 |
+
StringBuilder parentSB = new StringBuilder();
|
40 |
+
for (int i = 0; i < parentNameParts.length - 1; i++) {
|
41 |
+
parentSB.append(parentNameParts[i]);
|
42 |
+
}
|
43 |
+
parent = parentSB.toString();
|
44 |
+
parentNameParts = parent.split(" ");
|
45 |
+
}
|
46 |
+
for (int i = 1; i < parentNameParts.length; i++) {
|
47 |
+
if (!matchAcid.matcher(parentNameParts[i]).matches()) {
|
48 |
+
ParseRulesResults results = parseRules.getParses(parentNameParts[i]);
|
49 |
+
List<ParseTokens> parseTokens = results.getParseTokensList();
|
50 |
+
if (parseTokens.isEmpty()) {
|
51 |
+
throw new ParsingException("Invalid CAS name. Parent compound was followed by an unexpected term");
|
52 |
+
}
|
53 |
+
}
|
54 |
+
}
|
55 |
+
}
|
56 |
+
boolean addedBracket = false;
|
57 |
+
boolean esterEncountered = false;
|
58 |
+
for (int i = 1; i < nameComponents.size(); i++) {
|
59 |
+
String nameComponent = nameComponents.get(i);
|
60 |
+
Matcher m = matchCompoundWithPhrase.matcher(nameComponent);
|
61 |
+
boolean compoundWithcomponent = false;
|
62 |
+
if (m.lookingAt()) {
|
63 |
+
nameComponent = nameComponent.substring(m.group().length());
|
64 |
+
compoundWithcomponent = true;
|
65 |
+
}
|
66 |
+
String[] components = nameComponents.get(i).split(" ");
|
67 |
+
for (int c = 0, componentLen = components.length; c < componentLen; c++) {
|
68 |
+
String component = components[c];
|
69 |
+
if (compoundWithcomponent) {
|
70 |
+
functionalTerms.add(component);
|
71 |
+
continue;
|
72 |
+
}
|
73 |
+
if (component.endsWith("-")) {
|
74 |
+
Character missingCloseBracket = missingCloseBracketCharIfApplicable(component);
|
75 |
+
if (missingCloseBracket !=null) {
|
76 |
+
if (addedBracket) {
|
77 |
+
throw new ParsingException("Close bracket appears to be missing");
|
78 |
+
}
|
79 |
+
parent += missingCloseBracket;
|
80 |
+
addedBracket = true;
|
81 |
+
}
|
82 |
+
substituents.add(component);
|
83 |
+
} else {
|
84 |
+
ParseRulesResults results = parseRules.getParses(component);
|
85 |
+
List<ParseTokens> parseTokens = results.getParseTokensList();
|
86 |
+
if (parseTokens.size() > 0) {
|
87 |
+
List<ParseWord> parseWords = WordTools.splitIntoParseWords(parseTokens, component);
|
88 |
+
|
89 |
+
List<ParseTokens> firstParseWordTokens = parseWords.get(0).getParseTokens();
|
90 |
+
WordType firstWordType = OpsinTools.determineWordType(firstParseWordTokens.get(0).getAnnotations());
|
91 |
+
for (int j = 1; j < firstParseWordTokens.size(); j++) {
|
92 |
+
if (!firstWordType.equals(OpsinTools.determineWordType(firstParseWordTokens.get(j).getAnnotations()))) {
|
93 |
+
throw new ParsingException(component + "can be interpreted in multiple ways. For the sake of precision OPSIN has decided not to process this as a CAS name");
|
94 |
+
}
|
95 |
+
}
|
96 |
+
|
97 |
+
if (parseWords.size() == 1) {
|
98 |
+
switch (firstWordType) {
|
99 |
+
case functionalTerm:
|
100 |
+
if (component.equalsIgnoreCase("ester")) {
|
101 |
+
if (seperateWordSubstituents.size() ==0){
|
102 |
+
throw new ParsingException("ester encountered but no substituents were specified in potential CAS name!");
|
103 |
+
}
|
104 |
+
if (esterEncountered) {
|
105 |
+
throw new ParsingException("ester formation was mentioned more than once in CAS name!");
|
106 |
+
}
|
107 |
+
parent = uninvertEster(parent);
|
108 |
+
esterEncountered = true;
|
109 |
+
} else {
|
110 |
+
functionalTerms.add(component);
|
111 |
+
}
|
112 |
+
break;
|
113 |
+
case substituent:
|
114 |
+
seperateWordSubstituents.add(component);
|
115 |
+
break;
|
116 |
+
case full:
|
117 |
+
if (StringTools.endsWithCaseInsensitive(component, "ate") || StringTools.endsWithCaseInsensitive(component, "ite")//e.g. Piperazinium, 1,1-dimethyl-, 2,2,2-trifluoroacetate hydrochloride
|
118 |
+
|| StringTools.endsWithCaseInsensitive(component, "ium")
|
119 |
+
|| StringTools.endsWithCaseInsensitive(component, "hydrofluoride") || StringTools.endsWithCaseInsensitive(component, "hydrochloride")
|
120 |
+
|| StringTools.endsWithCaseInsensitive(component, "hydrobromide") || StringTools.endsWithCaseInsensitive(component, "hydroiodide")) {
|
121 |
+
functionalTerms.add(component);
|
122 |
+
} else if (StringTools.endsWithCaseInsensitive(component, "ic") && c + 1 < componentLen && components[c + 1].equalsIgnoreCase("acid")) {
|
123 |
+
functionalTerms.add(component);
|
124 |
+
functionalTerms.add(components[++c]);
|
125 |
+
} else {
|
126 |
+
throw new ParsingException("Unable to interpret: " + component + " (as part of a CAS index name)- A full word was encountered where a substituent or functionalTerm was expected");
|
127 |
+
}
|
128 |
+
break;
|
129 |
+
default:
|
130 |
+
throw new ParsingException("Unrecognised CAS index name form");
|
131 |
+
}
|
132 |
+
}
|
133 |
+
else if (parseWords.size() == 2 && firstWordType.equals(WordType.substituent)) {
|
134 |
+
//could be something like O-methyloxime which is parsed as [O-methyl] [oxime]
|
135 |
+
List<ParseTokens> secondParseWordTokens = parseWords.get(1).getParseTokens();
|
136 |
+
WordType secondWordType = OpsinTools.determineWordType(secondParseWordTokens.get(0).getAnnotations());
|
137 |
+
for (int j = 1; j < secondParseWordTokens.size(); j++) {
|
138 |
+
if (!secondWordType.equals(OpsinTools.determineWordType(secondParseWordTokens.get(j).getAnnotations()))) {
|
139 |
+
throw new ParsingException(component + "can be interpreted in multiple ways. For the sake of precision OPSIN has decided not to process this as a CAS name");
|
140 |
+
}
|
141 |
+
}
|
142 |
+
if (secondWordType.equals(WordType.functionalTerm) &&
|
143 |
+
matchFunctionalTermAllowingSubstituentPrefix.matcher(parseWords.get(1).getWord()).matches()){
|
144 |
+
functionalTerms.add(component);
|
145 |
+
}
|
146 |
+
else{
|
147 |
+
throw new ParsingException("Unrecognised CAS index name form, could have a missing space?");
|
148 |
+
}
|
149 |
+
}
|
150 |
+
else {
|
151 |
+
throw new ParsingException("Unrecognised CAS index name form");
|
152 |
+
}
|
153 |
+
} else {
|
154 |
+
if (!matchCasCollectiveIndex.matcher(component).matches()) {//CAS collective index description should be ignored
|
155 |
+
throw new ParsingException("Unable to interpret: " + component + " (as part of a CAS index name)");
|
156 |
+
}
|
157 |
+
}
|
158 |
+
}
|
159 |
+
}
|
160 |
+
}
|
161 |
+
StringBuilder casName = new StringBuilder();
|
162 |
+
for (String prefixFunctionalTerm : seperateWordSubstituents) {
|
163 |
+
casName.append(prefixFunctionalTerm);
|
164 |
+
casName.append(" ");
|
165 |
+
}
|
166 |
+
for (int i = substituents.size() - 1; i >= 0; i--) {
|
167 |
+
//stereochemistry term comes after substituent term. In older CAS names (9CI) this stereochemistry term can apply to the substituent term. Hence append in reverse order
|
168 |
+
casName.append(substituents.get(i));
|
169 |
+
}
|
170 |
+
casName.append(parent);
|
171 |
+
for (String functionalTerm : functionalTerms) {
|
172 |
+
casName.append(" ");
|
173 |
+
casName.append(functionalTerm);
|
174 |
+
}
|
175 |
+
return casName.toString();
|
176 |
+
}
|
177 |
+
|
178 |
+
private static Character missingCloseBracketCharIfApplicable(String component) {
|
179 |
+
int bracketLevel =0;
|
180 |
+
Character missingCloseBracket =null;
|
181 |
+
for (int i = 0, l = component.length(); i < l; i++) {
|
182 |
+
char character = component.charAt(i);
|
183 |
+
if (character == '(' || character == '[' || character == '{') {
|
184 |
+
bracketLevel++;
|
185 |
+
if (bracketLevel ==1){
|
186 |
+
missingCloseBracket = character;
|
187 |
+
}
|
188 |
+
}
|
189 |
+
if (character == ')' || character == ']' || character == '}') {
|
190 |
+
bracketLevel--;
|
191 |
+
if (bracketLevel<0){
|
192 |
+
return null;
|
193 |
+
}
|
194 |
+
}
|
195 |
+
}
|
196 |
+
if (bracketLevel == 1){
|
197 |
+
if (missingCloseBracket == '('){
|
198 |
+
return ')';
|
199 |
+
}
|
200 |
+
if (missingCloseBracket == '['){
|
201 |
+
return ']';
|
202 |
+
}
|
203 |
+
if (missingCloseBracket == '{'){
|
204 |
+
return '}';
|
205 |
+
}
|
206 |
+
}
|
207 |
+
return null;
|
208 |
+
}
|
209 |
+
|
210 |
+
/**
|
211 |
+
* Modifies the name of the parent acid from ic to ate (or ous to ite)
|
212 |
+
* hence allowing the formation of the uninverted ester
|
213 |
+
* @param parent
|
214 |
+
* @return
|
215 |
+
* @throws ParsingException
|
216 |
+
*/
|
217 |
+
private static String uninvertEster(String parent) throws ParsingException {
|
218 |
+
int len = parent.length();
|
219 |
+
if (len == 0) {
|
220 |
+
throw new ParsingException("Failed to uninvert CAS ester");
|
221 |
+
}
|
222 |
+
char lastChar = parent.charAt(len - 1);
|
223 |
+
if (lastChar == ')') {
|
224 |
+
if (StringTools.endsWithCaseInsensitive(parent, "ic acid)")) {
|
225 |
+
parent = parent.substring(0, parent.length() - 8) + "ate)";
|
226 |
+
} else if (StringTools.endsWithCaseInsensitive(parent, "ous acid)")) {
|
227 |
+
parent = parent.substring(0, parent.length() - 9) + "ite)";
|
228 |
+
} else if (StringTools.endsWithCaseInsensitive(parent, "ine)")){//amino acid
|
229 |
+
parent = parent.substring(0, parent.length() - 2) + "ate)";
|
230 |
+
}
|
231 |
+
else{
|
232 |
+
throw new ParsingException("Failed to uninvert CAS ester");
|
233 |
+
}
|
234 |
+
} else {
|
235 |
+
if (StringTools.endsWithCaseInsensitive(parent, "ic acid")) {
|
236 |
+
parent = parent.substring(0, parent.length() - 7) + "ate";
|
237 |
+
} else if (StringTools.endsWithCaseInsensitive(parent, "ous acid")) {
|
238 |
+
parent = parent.substring(0, parent.length() - 8) + "ite";
|
239 |
+
} else if (StringTools.endsWithCaseInsensitive(parent, "ine")){//amino acid
|
240 |
+
parent = parent.substring(0, parent.length() - 1) + "ate";
|
241 |
+
}
|
242 |
+
else{
|
243 |
+
throw new ParsingException("Failed to uninvert CAS ester");
|
244 |
+
}
|
245 |
+
}
|
246 |
+
return parent;
|
247 |
+
}
|
248 |
+
}
|
TransAntivirus/download_pubchem/opsin-master/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/CMLWriter.java
ADDED
@@ -0,0 +1,217 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
package uk.ac.cam.ch.wwmm.opsin;
|
2 |
+
|
3 |
+
import java.io.ByteArrayOutputStream;
|
4 |
+
import java.io.UnsupportedEncodingException;
|
5 |
+
import java.util.List;
|
6 |
+
|
7 |
+
import javax.xml.stream.XMLOutputFactory;
|
8 |
+
import javax.xml.stream.XMLStreamException;
|
9 |
+
import javax.xml.stream.XMLStreamWriter;
|
10 |
+
|
11 |
+
import com.ctc.wstx.api.WstxOutputProperties;
|
12 |
+
import com.ctc.wstx.stax.WstxOutputFactory;
|
13 |
+
|
14 |
+
class CMLWriter {
|
15 |
+
/**
|
16 |
+
* CML Elements/Attributes/NameSpace
|
17 |
+
*/
|
18 |
+
static final String CML_NAMESPACE = "http://www.xml-cml.org/schema";
|
19 |
+
|
20 |
+
private static final XMLOutputFactory factory = new WstxOutputFactory();
|
21 |
+
static {
|
22 |
+
factory.setProperty(WstxOutputProperties.P_OUTPUT_ESCAPE_CR, false);
|
23 |
+
}
|
24 |
+
|
25 |
+
/**The XML writer*/
|
26 |
+
private final XMLStreamWriter writer;
|
27 |
+
|
28 |
+
/**
|
29 |
+
* Creates a CML writer for the given fragment
|
30 |
+
* @param writer
|
31 |
+
|
32 |
+
*/
|
33 |
+
CMLWriter(XMLStreamWriter writer) {
|
34 |
+
this.writer = writer;
|
35 |
+
}
|
36 |
+
|
37 |
+
static String generateCml(Fragment structure, String chemicalName) {
|
38 |
+
return generateCml(structure, chemicalName, false);
|
39 |
+
}
|
40 |
+
|
41 |
+
static String generateIndentedCml(Fragment structure, String chemicalName) {
|
42 |
+
return generateCml(structure, chemicalName, true);
|
43 |
+
}
|
44 |
+
|
45 |
+
private static String generateCml(Fragment structure, String chemicalName, boolean indent) {
|
46 |
+
ByteArrayOutputStream out = new ByteArrayOutputStream();
|
47 |
+
try {
|
48 |
+
XMLStreamWriter xmlWriter = factory.createXMLStreamWriter(out, "UTF-8");
|
49 |
+
if (indent) {
|
50 |
+
xmlWriter = new IndentingXMLStreamWriter(xmlWriter, 2);
|
51 |
+
}
|
52 |
+
CMLWriter cmlWriter = new CMLWriter(xmlWriter);
|
53 |
+
cmlWriter.writeCmlStart();
|
54 |
+
cmlWriter.writeMolecule(structure, chemicalName, 1);
|
55 |
+
cmlWriter.writeCmlEnd();
|
56 |
+
xmlWriter.close();
|
57 |
+
} catch (XMLStreamException e) {
|
58 |
+
throw new RuntimeException(e);
|
59 |
+
}
|
60 |
+
try {
|
61 |
+
return out.toString("UTF-8");
|
62 |
+
} catch (UnsupportedEncodingException e) {
|
63 |
+
throw new RuntimeException("JVM doesn't support UTF-8...but it should do!");
|
64 |
+
}
|
65 |
+
}
|
66 |
+
|
67 |
+
void writeCmlStart(){
|
68 |
+
try {
|
69 |
+
writer.writeStartElement("cml");
|
70 |
+
writer.writeDefaultNamespace(CML_NAMESPACE);
|
71 |
+
writer.writeAttribute("convention", "conventions:molecular");
|
72 |
+
writer.writeNamespace("conventions", "http://www.xml-cml.org/convention/");
|
73 |
+
writer.writeNamespace("cmlDict", "http://www.xml-cml.org/dictionary/cml/");
|
74 |
+
writer.writeNamespace("nameDict", "http://www.xml-cml.org/dictionary/cml/name/");
|
75 |
+
} catch (XMLStreamException e) {
|
76 |
+
throw new RuntimeException(e);
|
77 |
+
}
|
78 |
+
}
|
79 |
+
|
80 |
+
void writeCmlEnd(){
|
81 |
+
try {
|
82 |
+
writer.writeEndElement();
|
83 |
+
writer.flush();
|
84 |
+
} catch (XMLStreamException e) {
|
85 |
+
throw new RuntimeException(e);
|
86 |
+
}
|
87 |
+
}
|
88 |
+
|
89 |
+
void writeMolecule(Fragment structure, String chemicalName, int id) throws XMLStreamException {
|
90 |
+
writer.writeStartElement("molecule");
|
91 |
+
writer.writeAttribute("id", "m" + id);
|
92 |
+
|
93 |
+
writer.writeStartElement("name");
|
94 |
+
writer.writeAttribute("dictRef", "nameDict:unknown");
|
95 |
+
writer.writeCharacters(chemicalName);
|
96 |
+
writer.writeEndElement();
|
97 |
+
|
98 |
+
if (structure != null) {
|
99 |
+
writer.writeStartElement("atomArray");
|
100 |
+
for(Atom atom : structure.getAtomList()) {
|
101 |
+
writeAtom(atom);
|
102 |
+
}
|
103 |
+
writer.writeEndElement();
|
104 |
+
|
105 |
+
writer.writeStartElement("bondArray");
|
106 |
+
for(Bond bond : structure.getBondSet()) {
|
107 |
+
writeBond(bond);
|
108 |
+
}
|
109 |
+
writer.writeEndElement();
|
110 |
+
}
|
111 |
+
|
112 |
+
writer.writeEndElement();
|
113 |
+
}
|
114 |
+
|
115 |
+
private void writeAtom(Atom atom) throws XMLStreamException {
|
116 |
+
writer.writeStartElement("atom");
|
117 |
+
writer.writeAttribute("id", "a" + Integer.toString(atom.getID()));
|
118 |
+
writer.writeAttribute("elementType", atom.getElement().toString());
|
119 |
+
if(atom.getCharge() != 0){
|
120 |
+
writer.writeAttribute("formalCharge", Integer.toString(atom.getCharge()));
|
121 |
+
}
|
122 |
+
if(atom.getIsotope() != null){
|
123 |
+
writer.writeAttribute("isotopeNumber", Integer.toString(atom.getIsotope()));
|
124 |
+
}
|
125 |
+
if (atom.getElement() != ChemEl.H){
|
126 |
+
int hydrogenCount =0;
|
127 |
+
List<Atom> neighbours = atom.getAtomNeighbours();
|
128 |
+
for (Atom neighbour : neighbours) {
|
129 |
+
if (neighbour.getElement() == ChemEl.H){
|
130 |
+
hydrogenCount++;
|
131 |
+
}
|
132 |
+
}
|
133 |
+
if (hydrogenCount==0){//prevent adding of implicit hydrogen
|
134 |
+
writer.writeAttribute("hydrogenCount", "0");
|
135 |
+
}
|
136 |
+
}
|
137 |
+
AtomParity atomParity = atom.getAtomParity();
|
138 |
+
if(atomParity != null) {
|
139 |
+
StereoGroup stereoGroupType = atomParity.getStereoGroup();
|
140 |
+
if (!((stereoGroupType == StereoGroup.Rac || stereoGroupType == StereoGroup.Rel) &&
|
141 |
+
countStereoGroup(atom) == 1)) {
|
142 |
+
writeAtomParity(atomParity);
|
143 |
+
}
|
144 |
+
}
|
145 |
+
for(String locant : atom.getLocants()) {
|
146 |
+
writer.writeStartElement("label");
|
147 |
+
writer.writeAttribute("value", locant);
|
148 |
+
writer.writeAttribute("dictRef", "cmlDict:locant");
|
149 |
+
writer.writeEndElement();
|
150 |
+
}
|
151 |
+
writer.writeEndElement();
|
152 |
+
}
|
153 |
+
|
154 |
+
private int countStereoGroup(Atom atom) {
|
155 |
+
int count = 0;
|
156 |
+
for (Atom a : atom.getFrag().getAtomList()) {
|
157 |
+
if (a.getAtomParity() == null)
|
158 |
+
continue;
|
159 |
+
if (a.getAtomParity().getStereoGroup().equals(atom.getAtomParity().getStereoGroup()) &&
|
160 |
+
a.getAtomParity().getStereoGroupNum() == atom.getAtomParity().getStereoGroupNum())
|
161 |
+
count++;
|
162 |
+
}
|
163 |
+
return count;
|
164 |
+
}
|
165 |
+
|
166 |
+
private void writeAtomParity(AtomParity atomParity) throws XMLStreamException {
|
167 |
+
writer.writeStartElement("atomParity");
|
168 |
+
writeAtomRefs4(atomParity.getAtomRefs4());
|
169 |
+
writer.writeCharacters(Integer.toString(atomParity.getParity()));
|
170 |
+
writer.writeEndElement();
|
171 |
+
}
|
172 |
+
|
173 |
+
private void writeBond(Bond bond) throws XMLStreamException {
|
174 |
+
writer.writeStartElement("bond");
|
175 |
+
writer.writeAttribute("id", "a" + Integer.toString(bond.getFrom()) + "_a" + Integer.toString(bond.getTo()));
|
176 |
+
writer.writeAttribute("atomRefs2", "a" + Integer.toString(bond.getFrom()) + " a" + Integer.toString(bond.getTo()));
|
177 |
+
switch (bond.getOrder()) {
|
178 |
+
case 1:
|
179 |
+
writer.writeAttribute("order", "S");
|
180 |
+
break;
|
181 |
+
case 2:
|
182 |
+
writer.writeAttribute("order", "D");
|
183 |
+
break;
|
184 |
+
case 3:
|
185 |
+
writer.writeAttribute("order", "T");
|
186 |
+
break;
|
187 |
+
default:
|
188 |
+
writer.writeAttribute("order", "unknown");
|
189 |
+
break;
|
190 |
+
}
|
191 |
+
BondStereo bondStereo = bond.getBondStereo();
|
192 |
+
if (bondStereo != null){
|
193 |
+
writeBondStereo(bondStereo);
|
194 |
+
}
|
195 |
+
writer.writeEndElement();
|
196 |
+
}
|
197 |
+
|
198 |
+
private void writeBondStereo(BondStereo bondStereo) throws XMLStreamException {
|
199 |
+
writer.writeStartElement("bondStereo");
|
200 |
+
writeAtomRefs4(bondStereo.getAtomRefs4());
|
201 |
+
writer.writeCharacters(bondStereo.getBondStereoValue().toString());
|
202 |
+
writer.writeEndElement();
|
203 |
+
}
|
204 |
+
|
205 |
+
private void writeAtomRefs4(Atom[] atomRefs4) throws XMLStreamException {
|
206 |
+
StringBuilder atomRefsSb = new StringBuilder();
|
207 |
+
for(int i = 0; i< atomRefs4.length - 1; i++) {
|
208 |
+
atomRefsSb.append('a');
|
209 |
+
atomRefsSb.append(atomRefs4[i].getID());
|
210 |
+
atomRefsSb.append(' ');
|
211 |
+
}
|
212 |
+
atomRefsSb.append('a');
|
213 |
+
atomRefsSb.append(atomRefs4[atomRefs4.length - 1].getID());
|
214 |
+
writer.writeAttribute("atomRefs4", atomRefsSb.toString());
|
215 |
+
}
|
216 |
+
|
217 |
+
}
|
TransAntivirus/download_pubchem/opsin-master/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/ChemEl.java
ADDED
@@ -0,0 +1,138 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
package uk.ac.cam.ch.wwmm.opsin;
|
2 |
+
|
3 |
+
enum ChemEl {
|
4 |
+
R(0),
|
5 |
+
|
6 |
+
H(1),
|
7 |
+
He(2),
|
8 |
+
Li(3),
|
9 |
+
Be(4),
|
10 |
+
B(5),
|
11 |
+
C(6),
|
12 |
+
N(7),
|
13 |
+
O(8),
|
14 |
+
F(9),
|
15 |
+
Ne(10),
|
16 |
+
Na(11),
|
17 |
+
Mg(12),
|
18 |
+
Al(13),
|
19 |
+
Si(14),
|
20 |
+
P(15),
|
21 |
+
S(16),
|
22 |
+
Cl(17),
|
23 |
+
Ar(18),
|
24 |
+
K(19),
|
25 |
+
Ca(20),
|
26 |
+
Sc(21),
|
27 |
+
Ti(22),
|
28 |
+
V(23),
|
29 |
+
Cr(24),
|
30 |
+
Mn(25),
|
31 |
+
Fe(26),
|
32 |
+
Co(27),
|
33 |
+
Ni(28),
|
34 |
+
Cu(29),
|
35 |
+
Zn(30),
|
36 |
+
Ga(31),
|
37 |
+
Ge(32),
|
38 |
+
As(33),
|
39 |
+
Se(34),
|
40 |
+
Br(35),
|
41 |
+
Kr(36),
|
42 |
+
Rb(37),
|
43 |
+
Sr(38),
|
44 |
+
Y(39),
|
45 |
+
Zr(40),
|
46 |
+
Nb(41),
|
47 |
+
Mo(42),
|
48 |
+
Tc(43),
|
49 |
+
Ru(44),
|
50 |
+
Rh(45),
|
51 |
+
Pd(46),
|
52 |
+
Ag(47),
|
53 |
+
Cd(48),
|
54 |
+
In(49),
|
55 |
+
Sn(50),
|
56 |
+
Sb(51),
|
57 |
+
Te(52),
|
58 |
+
I(53),
|
59 |
+
Xe(54),
|
60 |
+
Cs(55),
|
61 |
+
Ba(56),
|
62 |
+
La(57),
|
63 |
+
Ce(58),
|
64 |
+
Pr(59),
|
65 |
+
Nd(60),
|
66 |
+
Pm(61),
|
67 |
+
Sm(62),
|
68 |
+
Eu(63),
|
69 |
+
Gd(64),
|
70 |
+
Tb(65),
|
71 |
+
Dy(66),
|
72 |
+
Ho(67),
|
73 |
+
Er(68),
|
74 |
+
Tm(69),
|
75 |
+
Yb(70),
|
76 |
+
Lu(71),
|
77 |
+
Hf(72),
|
78 |
+
Ta(73),
|
79 |
+
W(74),
|
80 |
+
Re(75),
|
81 |
+
Os(76),
|
82 |
+
Ir(77),
|
83 |
+
Pt(78),
|
84 |
+
Au(79),
|
85 |
+
Hg(80),
|
86 |
+
Tl(81),
|
87 |
+
Pb(82),
|
88 |
+
Bi(83),
|
89 |
+
Po(84),
|
90 |
+
At(85),
|
91 |
+
Rn(86),
|
92 |
+
Fr(87),
|
93 |
+
Ra(88),
|
94 |
+
Ac(89),
|
95 |
+
Th(90),
|
96 |
+
Pa(91),
|
97 |
+
U(92),
|
98 |
+
Np(93),
|
99 |
+
Pu(94),
|
100 |
+
Am(95),
|
101 |
+
Cm(96),
|
102 |
+
Bk(97),
|
103 |
+
Cf(98),
|
104 |
+
Es(99),
|
105 |
+
Fm(100),
|
106 |
+
Md(101),
|
107 |
+
No(102),
|
108 |
+
Lr(103),
|
109 |
+
Rf(104),
|
110 |
+
Db(105),
|
111 |
+
Sg(106),
|
112 |
+
Bh(107),
|
113 |
+
Hs(108),
|
114 |
+
Mt(109),
|
115 |
+
Ds(110),
|
116 |
+
Rg(111),
|
117 |
+
Cn(112),
|
118 |
+
Nh(113),
|
119 |
+
Fl(114),
|
120 |
+
Mc(115),
|
121 |
+
Lv(116),
|
122 |
+
Ts(117),
|
123 |
+
Og(118);
|
124 |
+
|
125 |
+
final int ATOMIC_NUM;
|
126 |
+
|
127 |
+
private ChemEl(int atomicNum) {
|
128 |
+
this.ATOMIC_NUM = atomicNum;
|
129 |
+
}
|
130 |
+
|
131 |
+
boolean isChalcogen() {
|
132 |
+
return (this == O || this == S || this == Se || this == Te);
|
133 |
+
}
|
134 |
+
|
135 |
+
boolean isHalogen() {
|
136 |
+
return (this == F || this == Cl || this == Br || this == I);
|
137 |
+
}
|
138 |
+
}
|
TransAntivirus/download_pubchem/opsin-master/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/CipOrderingException.java
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
package uk.ac.cam.ch.wwmm.opsin;
|
2 |
+
|
3 |
+
/**Thrown if the ordering of ligands can now be determined by OPSIN's implementation of the CIP rules.
|
4 |
+
* This could be due to a limitation of the implementation or ligands actually being indistinguishable
|
5 |
+
*
|
6 |
+
* @author dl387
|
7 |
+
*
|
8 |
+
*/
|
9 |
+
class CipOrderingException extends StereochemistryException {
|
10 |
+
|
11 |
+
private static final long serialVersionUID = 1L;
|
12 |
+
|
13 |
+
CipOrderingException() {
|
14 |
+
super();
|
15 |
+
}
|
16 |
+
|
17 |
+
CipOrderingException(String message) {
|
18 |
+
super(message);
|
19 |
+
}
|
20 |
+
|
21 |
+
CipOrderingException(String message, Throwable cause) {
|
22 |
+
super(message, cause);
|
23 |
+
}
|
24 |
+
|
25 |
+
CipOrderingException(Throwable cause) {
|
26 |
+
super(cause);
|
27 |
+
}
|
28 |
+
|
29 |
+
}
|
TransAntivirus/download_pubchem/opsin-master/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/CipSequenceRules.java
ADDED
@@ -0,0 +1,470 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
package uk.ac.cam.ch.wwmm.opsin;
|
2 |
+
|
3 |
+
import java.util.ArrayDeque;
|
4 |
+
import java.util.ArrayList;
|
5 |
+
import java.util.Collections;
|
6 |
+
import java.util.Comparator;
|
7 |
+
import java.util.Deque;
|
8 |
+
import java.util.List;
|
9 |
+
import java.util.Queue;
|
10 |
+
|
11 |
+
/**
|
12 |
+
* An implementation of rules 1-2 of the CIP rules i.e. constitutional differences then isotopes if there is a tie
|
13 |
+
* Cases that require rules 3-5 to distinguish result in an exception
|
14 |
+
*
|
15 |
+
* Phantom atoms are not added as I believe that the results of the program will still be the same even in their absence as everything beats a phantom and comparing phantoms to phantoms achieves nothing
|
16 |
+
* (higher ligancy beats lower ligancy when comparisons are performed)
|
17 |
+
* @author dl387
|
18 |
+
*
|
19 |
+
*/
|
20 |
+
class CipSequenceRules {
|
21 |
+
private static class CipOrderingRunTimeException extends RuntimeException {
|
22 |
+
private static final long serialVersionUID = 1L;
|
23 |
+
CipOrderingRunTimeException(String message) {
|
24 |
+
super(message);
|
25 |
+
}
|
26 |
+
}
|
27 |
+
|
28 |
+
private final Atom chiralAtom;
|
29 |
+
|
30 |
+
CipSequenceRules(Atom chiralAtom) {
|
31 |
+
this.chiralAtom = chiralAtom;
|
32 |
+
}
|
33 |
+
|
34 |
+
/**
|
35 |
+
* Returns the chiral atom's neighbours in CIP order from lowest priority to highest priority
|
36 |
+
* @return
|
37 |
+
* @throws CipOrderingException
|
38 |
+
*/
|
39 |
+
List<Atom> getNeighbouringAtomsInCipOrder() throws CipOrderingException {
|
40 |
+
List<Atom> neighbours = chiralAtom.getAtomNeighbours();
|
41 |
+
try {
|
42 |
+
Collections.sort(neighbours, new SortByCipOrder(chiralAtom));
|
43 |
+
}
|
44 |
+
catch (CipOrderingRunTimeException e) {
|
45 |
+
throw new CipOrderingException(e.getMessage());
|
46 |
+
}
|
47 |
+
return neighbours;
|
48 |
+
}
|
49 |
+
|
50 |
+
/**
|
51 |
+
* Returns the chiral atom's neighbours, with the exception of the given atom, in CIP order from lowest priority to highest priority
|
52 |
+
* @param neighbourToIgnore
|
53 |
+
* @return
|
54 |
+
* @throws CipOrderingException
|
55 |
+
*/
|
56 |
+
List<Atom> getNeighbouringAtomsInCipOrderIgnoringGivenNeighbour(Atom neighbourToIgnore) throws CipOrderingException {
|
57 |
+
List<Atom> neighbours = chiralAtom.getAtomNeighbours();
|
58 |
+
if (!neighbours.remove(neighbourToIgnore)) {
|
59 |
+
throw new IllegalArgumentException("OPSIN bug: Atom" + neighbourToIgnore.getID() +" was not a neighbour of the given stereogenic atom");
|
60 |
+
}
|
61 |
+
try {
|
62 |
+
Collections.sort(neighbours, new SortByCipOrder(chiralAtom));
|
63 |
+
}
|
64 |
+
catch (CipOrderingRunTimeException e) {
|
65 |
+
throw new CipOrderingException(e.getMessage());
|
66 |
+
}
|
67 |
+
return neighbours;
|
68 |
+
}
|
69 |
+
|
70 |
+
|
71 |
+
/**
|
72 |
+
* Holds information about what atoms to try next next and how those atoms were reached (to prevent immediate back tracking and to detect cycles)
|
73 |
+
* @author dl387
|
74 |
+
*
|
75 |
+
*/
|
76 |
+
private static class CipState {
|
77 |
+
CipState(List<AtomWithHistory> nextAtoms1, List<AtomWithHistory> nextAtoms2) {
|
78 |
+
this.nextAtoms1 = nextAtoms1;
|
79 |
+
this.nextAtoms2 = nextAtoms2;
|
80 |
+
}
|
81 |
+
final List<AtomWithHistory> nextAtoms1;
|
82 |
+
final List<AtomWithHistory> nextAtoms2;
|
83 |
+
}
|
84 |
+
|
85 |
+
/**
|
86 |
+
* Holds an atom with associated visited atoms
|
87 |
+
* @author dl387
|
88 |
+
*
|
89 |
+
*/
|
90 |
+
private static class AtomWithHistory {
|
91 |
+
AtomWithHistory(Atom atom, List<Atom> visitedAtoms, Integer indexOfOriginalFromRoot) {
|
92 |
+
this.atom = atom;
|
93 |
+
this.visitedAtoms = visitedAtoms;
|
94 |
+
this.indexOfOriginalFromRoot = indexOfOriginalFromRoot;
|
95 |
+
}
|
96 |
+
final Atom atom;
|
97 |
+
final List<Atom> visitedAtoms;
|
98 |
+
final Integer indexOfOriginalFromRoot;
|
99 |
+
}
|
100 |
+
|
101 |
+
/**
|
102 |
+
* Sorts atoms by their CIP order, low to high
|
103 |
+
* @author dl387
|
104 |
+
*
|
105 |
+
*/
|
106 |
+
private class SortByCipOrder implements Comparator<Atom> {
|
107 |
+
private final Atom chiralAtom;
|
108 |
+
private final AtomListCipComparator atomListCipComparator = new AtomListCipComparator();
|
109 |
+
private final ListOfAtomListsCipComparator listOfAtomListsCipComparator = new ListOfAtomListsCipComparator();
|
110 |
+
private final CipComparator cipComparator = new CipComparator();
|
111 |
+
private int rule = 0;
|
112 |
+
|
113 |
+
|
114 |
+
SortByCipOrder(Atom chiralAtom) {
|
115 |
+
this.chiralAtom = chiralAtom;
|
116 |
+
}
|
117 |
+
|
118 |
+
public int compare(Atom a, Atom b) {
|
119 |
+
/*
|
120 |
+
* rule = 0 --> Rule 1a Higher atomic number precedes lower
|
121 |
+
* rule = 1 --> Rule 1b A duplicated atom, with its predecessor node having the same label closer to the root, ranks higher than a duplicated atom, with its predecessor node having the same label farther from the root, which ranks higher than any non-duplicated atom node
|
122 |
+
* rule = 2 --> Rule 2 Higher atomic mass number precedes lower
|
123 |
+
*/
|
124 |
+
for (rule = 0; rule <= 2; rule++) {
|
125 |
+
List<Atom> atomsVisted = new ArrayList<>();
|
126 |
+
atomsVisted.add(chiralAtom);
|
127 |
+
AtomWithHistory aWithHistory = new AtomWithHistory(a, atomsVisted, null);
|
128 |
+
AtomWithHistory bWithHistory = new AtomWithHistory(b, new ArrayList<>(atomsVisted), null);
|
129 |
+
|
130 |
+
int compare = compareByCipRules(aWithHistory, bWithHistory);
|
131 |
+
if (compare != 0) {
|
132 |
+
return compare;
|
133 |
+
}
|
134 |
+
|
135 |
+
List<AtomWithHistory> nextAtoms1 = new ArrayList<>();
|
136 |
+
nextAtoms1.add(aWithHistory);
|
137 |
+
|
138 |
+
List<AtomWithHistory> nextAtoms2 = new ArrayList<>();
|
139 |
+
nextAtoms2.add(bWithHistory);
|
140 |
+
|
141 |
+
CipState startingState = new CipState(nextAtoms1, nextAtoms2);
|
142 |
+
Deque<CipState> cipStateQueue = new ArrayDeque<>();
|
143 |
+
cipStateQueue.add(startingState);
|
144 |
+
/* Go through CIP states in a breadth-first manner:
|
145 |
+
* Neighbours of the given atom/s (if multiple atoms this is because so far the two paths leading to them have been equivalent) are evaluated for both a and b
|
146 |
+
* Neighbours are sorted by CIP priority
|
147 |
+
* Comparisons performed between neighbours of a and neighbours of b (will break if compare != 0)
|
148 |
+
* Degenerate neighbours grouped together
|
149 |
+
* CIP state formed for each list of neighbours and added to queue in order of priority
|
150 |
+
*
|
151 |
+
*/
|
152 |
+
while(!cipStateQueue.isEmpty()) {
|
153 |
+
CipState currentState = cipStateQueue.removeFirst();
|
154 |
+
compare = compareAtNextLevel(currentState, cipStateQueue);
|
155 |
+
if (compare != 0) {
|
156 |
+
return compare;
|
157 |
+
}
|
158 |
+
}
|
159 |
+
}
|
160 |
+
throw new CipOrderingRunTimeException("Failed to assign CIP stereochemistry, this indicates a bug in OPSIN or a limitation in OPSIN's implementation of the sequence rules");
|
161 |
+
}
|
162 |
+
|
163 |
+
/**
|
164 |
+
* Compares the neighbours of the atoms specified in nextAtom1/2 in cipstate.
|
165 |
+
* Returns the result of the comparison between these neighbours
|
166 |
+
* If the comparison returned 0 adds new cipstates to the queue
|
167 |
+
* @param cipState
|
168 |
+
* @param queue
|
169 |
+
* @return
|
170 |
+
*/
|
171 |
+
private int compareAtNextLevel(CipState cipState, Queue<CipState> queue) {
|
172 |
+
List<List<AtomWithHistory>> neighbours1 = getNextLevelNeighbours(cipState.nextAtoms1);
|
173 |
+
List<List<AtomWithHistory>> neighbours2 = getNextLevelNeighbours(cipState.nextAtoms2);
|
174 |
+
|
175 |
+
int compare = compareNeighboursByCipPriorityRules(neighbours1, neighbours2);
|
176 |
+
|
177 |
+
if (compare != 0) {
|
178 |
+
return compare;
|
179 |
+
}
|
180 |
+
List<List<AtomWithHistory>> prioritisedNeighbours1 = formListsWithSamePriority(neighbours1);
|
181 |
+
List<List<AtomWithHistory>> prioritisedNeighbours2 = formListsWithSamePriority(neighbours2);
|
182 |
+
|
183 |
+
//As earlier compare was 0, prioritisedNeighbours1.size() == prioritisedNeighbours2.size()
|
184 |
+
for (int i = prioritisedNeighbours1.size() - 1; i >= 0; i--) {
|
185 |
+
queue.add(new CipState(prioritisedNeighbours1.get(i), prioritisedNeighbours2.get(i)));
|
186 |
+
}
|
187 |
+
return 0;
|
188 |
+
}
|
189 |
+
|
190 |
+
private int compareNeighboursByCipPriorityRules(List<List<AtomWithHistory>> neighbours1, List<List<AtomWithHistory>> neighbours2) {
|
191 |
+
int difference = listOfAtomListsCipComparator.compare(neighbours1, neighbours2);
|
192 |
+
if (difference >0) {
|
193 |
+
return 1;
|
194 |
+
}
|
195 |
+
if (difference < 0) {
|
196 |
+
return -1;
|
197 |
+
}
|
198 |
+
return 0;
|
199 |
+
}
|
200 |
+
|
201 |
+
private List<List<AtomWithHistory>> getNextLevelNeighbours(List<AtomWithHistory> nextAtoms) {
|
202 |
+
List<List<AtomWithHistory>> neighbourLists = new ArrayList<>();
|
203 |
+
for (AtomWithHistory nextAtom : nextAtoms) {
|
204 |
+
neighbourLists.add(getNextAtomsWithAppropriateGhostAtoms(nextAtom));
|
205 |
+
}
|
206 |
+
Collections.sort(neighbourLists, atomListCipComparator);
|
207 |
+
return neighbourLists;
|
208 |
+
}
|
209 |
+
|
210 |
+
/**
|
211 |
+
* If given say [H,C,C] this becomes [H] [C,C]
|
212 |
+
* If given say [H,C,C] [H,C,C] this becomes [H,H] [C,C,C,C]
|
213 |
+
* If given say [H,C,C] [H,C,F] this becomes [H],[C,C][H][C][F]
|
214 |
+
* as [H,C,F] is higher priority than [H,C,C] so all its atoms must be evaluated first
|
215 |
+
* The input lists of neighbours are assumed to have been presorted.
|
216 |
+
* @param neighbourLists
|
217 |
+
*/
|
218 |
+
private List<List<AtomWithHistory>> formListsWithSamePriority(List<List<AtomWithHistory>> neighbourLists) {
|
219 |
+
int intialNeighbourListCount = neighbourLists.size();
|
220 |
+
if (intialNeighbourListCount > 1) {
|
221 |
+
List<List<AtomWithHistory>> listsToRemove = new ArrayList<>();
|
222 |
+
for (int i = 0; i < intialNeighbourListCount; i++) {
|
223 |
+
List<List<AtomWithHistory>> neighbourListsToCombine = new ArrayList<>();
|
224 |
+
List<AtomWithHistory> primaryAtomList = neighbourLists.get(i);
|
225 |
+
for (int j = i + 1; j < intialNeighbourListCount; j++) {
|
226 |
+
List<AtomWithHistory> neighbourListToCompareWith = neighbourLists.get(j);
|
227 |
+
if (atomListCipComparator.compare(primaryAtomList, neighbourListToCompareWith) == 0) {
|
228 |
+
neighbourListsToCombine.add(neighbourListToCompareWith);
|
229 |
+
i++;
|
230 |
+
}
|
231 |
+
else {
|
232 |
+
break;
|
233 |
+
}
|
234 |
+
}
|
235 |
+
for (List<AtomWithHistory> neighbourList: neighbourListsToCombine) {
|
236 |
+
listsToRemove.add(neighbourList);
|
237 |
+
primaryAtomList.addAll(neighbourList);
|
238 |
+
}
|
239 |
+
}
|
240 |
+
neighbourLists.removeAll(listsToRemove);
|
241 |
+
}
|
242 |
+
|
243 |
+
List<List<AtomWithHistory>> updatedNeighbourLists = new ArrayList<>();
|
244 |
+
//lists of same priority have already been combined (see above) e.g. [H,C,C] [H,C,C] -->[H,C,C,H,C,C]
|
245 |
+
//now sort these combined lists by CIP priority
|
246 |
+
//then group atoms that have the same CIP priority
|
247 |
+
for (int i = 0, lstsLen = neighbourLists.size(); i < lstsLen; i++) {
|
248 |
+
List<AtomWithHistory> neighbourList = neighbourLists.get(i);
|
249 |
+
Collections.sort(neighbourList, cipComparator);
|
250 |
+
AtomWithHistory lastAtom = null;
|
251 |
+
List<AtomWithHistory> currentAtomList = new ArrayList<>();
|
252 |
+
for (int j = 0, lstLen = neighbourList.size(); j < lstLen; j++) {
|
253 |
+
AtomWithHistory a = neighbourList.get(j);
|
254 |
+
if (lastAtom != null && compareByCipRules(lastAtom, a) != 0) {
|
255 |
+
updatedNeighbourLists.add(currentAtomList);
|
256 |
+
currentAtomList = new ArrayList<>();
|
257 |
+
}
|
258 |
+
currentAtomList.add(a);
|
259 |
+
lastAtom = a;
|
260 |
+
}
|
261 |
+
if (!currentAtomList.isEmpty()) {
|
262 |
+
updatedNeighbourLists.add(currentAtomList);
|
263 |
+
}
|
264 |
+
}
|
265 |
+
return updatedNeighbourLists;
|
266 |
+
}
|
267 |
+
|
268 |
+
|
269 |
+
/**
|
270 |
+
* Sorts atoms by their atomic number, low to high
|
271 |
+
* @author dl387
|
272 |
+
*
|
273 |
+
*/
|
274 |
+
private class CipComparator implements Comparator<AtomWithHistory> {
|
275 |
+
public int compare(AtomWithHistory a, AtomWithHistory b) {
|
276 |
+
return compareByCipRules(a, b);
|
277 |
+
}
|
278 |
+
}
|
279 |
+
|
280 |
+
/**
|
281 |
+
* Sorts atomLists by CIP rules, low to high
|
282 |
+
* @author dl387
|
283 |
+
*
|
284 |
+
*/
|
285 |
+
private class AtomListCipComparator implements Comparator<List<AtomWithHistory>> {
|
286 |
+
public int compare(List<AtomWithHistory> a, List<AtomWithHistory> b) {
|
287 |
+
int aSize = a.size();
|
288 |
+
int bSize = b.size();
|
289 |
+
int differenceInSize = aSize - bSize;
|
290 |
+
int maxCommonSize = aSize > bSize ? bSize : aSize;
|
291 |
+
for (int i = 1; i <= maxCommonSize; i++) {
|
292 |
+
int difference = compareByCipRules(a.get(aSize - i), b.get(bSize - i));
|
293 |
+
if (difference > 0) {
|
294 |
+
return 1;
|
295 |
+
}
|
296 |
+
if (difference < 0) {
|
297 |
+
return -1;
|
298 |
+
}
|
299 |
+
}
|
300 |
+
if (differenceInSize > 0) {
|
301 |
+
return 1;
|
302 |
+
}
|
303 |
+
if (differenceInSize < 0) {
|
304 |
+
return -1;
|
305 |
+
}
|
306 |
+
return 0;
|
307 |
+
}
|
308 |
+
}
|
309 |
+
|
310 |
+
/**
|
311 |
+
* Sorts lists of atomLists by CIP rules, low to high
|
312 |
+
* @author dl387
|
313 |
+
*
|
314 |
+
*/
|
315 |
+
private class ListOfAtomListsCipComparator implements Comparator<List<List<AtomWithHistory>>> {
|
316 |
+
public int compare(List<List<AtomWithHistory>> a, List<List<AtomWithHistory>> b) {
|
317 |
+
int aSize = a.size();
|
318 |
+
int bSize = b.size();
|
319 |
+
int differenceInSize = aSize - bSize;
|
320 |
+
int maxCommonSize = aSize > bSize ? bSize : aSize;
|
321 |
+
for (int i = 1; i <= maxCommonSize; i++) {
|
322 |
+
List<AtomWithHistory> aprime = a.get(aSize - i);
|
323 |
+
List<AtomWithHistory> bprime = b.get(bSize - i);
|
324 |
+
int aprimeSize = aprime.size();
|
325 |
+
int bprimeSize = bprime.size();
|
326 |
+
int differenceInSizeprime = aprimeSize - bprimeSize;
|
327 |
+
int maxCommonSizeprime = aprimeSize > bprimeSize ? bprimeSize : aprimeSize;
|
328 |
+
for (int j = 1; j <= maxCommonSizeprime; j++) {
|
329 |
+
int difference = compareByCipRules(aprime.get(aprimeSize - j), bprime.get(bprimeSize - j));
|
330 |
+
if (difference > 0) {
|
331 |
+
return 1;
|
332 |
+
}
|
333 |
+
if (difference < 0) {
|
334 |
+
return -1;
|
335 |
+
}
|
336 |
+
}
|
337 |
+
if (differenceInSizeprime > 0) {
|
338 |
+
return 1;
|
339 |
+
}
|
340 |
+
if (differenceInSizeprime < 0) {
|
341 |
+
return -1;
|
342 |
+
}
|
343 |
+
}
|
344 |
+
if (differenceInSize > 0) {
|
345 |
+
return 1;
|
346 |
+
}
|
347 |
+
if (differenceInSize < 0) {
|
348 |
+
return -1;
|
349 |
+
}
|
350 |
+
return 0;
|
351 |
+
}
|
352 |
+
}
|
353 |
+
|
354 |
+
/**
|
355 |
+
* Gets the neighbouring atoms bar the previous atom in CIP order
|
356 |
+
* If the neighbouring atom has already been visited it is replaced with a ghost atom
|
357 |
+
* Multiple bonds including those to previous atoms yield ghost atoms unless the bond goes to the chiral atom e.g. in a sulfoxide
|
358 |
+
* @param atoms
|
359 |
+
* @return
|
360 |
+
*/
|
361 |
+
private List<AtomWithHistory> getNextAtomsWithAppropriateGhostAtoms(AtomWithHistory atomWithHistory) {
|
362 |
+
Atom atom = atomWithHistory.atom;
|
363 |
+
List<Atom> visitedAtoms = atomWithHistory.visitedAtoms;
|
364 |
+
Atom previousAtom = visitedAtoms.get(visitedAtoms.size()-1);
|
365 |
+
List<Atom> visitedAtomsIncludingCurrentAtom = new ArrayList<>(visitedAtoms);
|
366 |
+
visitedAtomsIncludingCurrentAtom.add(atom);
|
367 |
+
|
368 |
+
List<AtomWithHistory> neighboursWithHistory = new ArrayList<>();
|
369 |
+
for(Bond b : atom.getBonds()) {
|
370 |
+
Atom atomBondConnectsTo = b.getOtherAtom(atom);
|
371 |
+
if (!atomBondConnectsTo.equals(chiralAtom)) {//P-91.1.4.2.4 (higher order bonds to chiral centre do not involve duplication of atoms)
|
372 |
+
for (int j = b.getOrder(); j >1; j--) {//add ghost atoms to represent higher order bonds
|
373 |
+
Atom ghost = new Atom(atomBondConnectsTo.getElement());
|
374 |
+
if (rule > 0) {
|
375 |
+
int indexOfOriginalAtom = visitedAtoms.indexOf(atomBondConnectsTo);
|
376 |
+
if (indexOfOriginalAtom != -1) {
|
377 |
+
neighboursWithHistory.add(new AtomWithHistory(ghost, visitedAtomsIncludingCurrentAtom, indexOfOriginalAtom));
|
378 |
+
}
|
379 |
+
else{
|
380 |
+
neighboursWithHistory.add(new AtomWithHistory(ghost, visitedAtomsIncludingCurrentAtom, visitedAtoms.size() + 1));
|
381 |
+
}
|
382 |
+
}
|
383 |
+
else{
|
384 |
+
neighboursWithHistory.add(new AtomWithHistory(ghost, visitedAtomsIncludingCurrentAtom, null));
|
385 |
+
}
|
386 |
+
}
|
387 |
+
}
|
388 |
+
if (!atomBondConnectsTo.equals(previousAtom)) {
|
389 |
+
if (visitedAtoms.contains(atomBondConnectsTo)) {//cycle detected, add ghost atom instead
|
390 |
+
Atom ghost = new Atom(atomBondConnectsTo.getElement());
|
391 |
+
if (rule > 0) {
|
392 |
+
neighboursWithHistory.add(new AtomWithHistory(ghost, visitedAtomsIncludingCurrentAtom, visitedAtoms.indexOf(atomBondConnectsTo)));
|
393 |
+
}
|
394 |
+
else{
|
395 |
+
neighboursWithHistory.add(new AtomWithHistory(ghost, visitedAtomsIncludingCurrentAtom, null));
|
396 |
+
}
|
397 |
+
}
|
398 |
+
else{
|
399 |
+
neighboursWithHistory.add(new AtomWithHistory(atomBondConnectsTo, visitedAtomsIncludingCurrentAtom, null));
|
400 |
+
}
|
401 |
+
}
|
402 |
+
}
|
403 |
+
Collections.sort(neighboursWithHistory, cipComparator);
|
404 |
+
return neighboursWithHistory;
|
405 |
+
}
|
406 |
+
|
407 |
+
/**
|
408 |
+
* Greater than 0 means a is preferred over b (vice versa for less than 1)
|
409 |
+
* @param a
|
410 |
+
* @param b
|
411 |
+
* @return
|
412 |
+
*/
|
413 |
+
private int compareByCipRules(AtomWithHistory a, AtomWithHistory b) {
|
414 |
+
//rule 1a
|
415 |
+
//prefer higher atomic number
|
416 |
+
int atomicNumber1 = a.atom.getElement().ATOMIC_NUM;
|
417 |
+
int atomicNumber2 = b.atom.getElement().ATOMIC_NUM;
|
418 |
+
if (atomicNumber1 > atomicNumber2) {
|
419 |
+
return 1;
|
420 |
+
}
|
421 |
+
else if (atomicNumber1 < atomicNumber2) {
|
422 |
+
return -1;
|
423 |
+
}
|
424 |
+
if (rule > 0) {
|
425 |
+
//rule 1b
|
426 |
+
//prefer duplicate to non-duplicate
|
427 |
+
Integer indexFromRoot1 = a.indexOfOriginalFromRoot;
|
428 |
+
Integer indexFromRoot2 = b.indexOfOriginalFromRoot;
|
429 |
+
if (indexFromRoot1 != null && indexFromRoot2 == null) {
|
430 |
+
return 1;
|
431 |
+
}
|
432 |
+
if (indexFromRoot1 == null && indexFromRoot2 != null) {
|
433 |
+
return -1;
|
434 |
+
}
|
435 |
+
//prefer duplicate of node closer to root
|
436 |
+
if (indexFromRoot1 != null && indexFromRoot2 != null) {
|
437 |
+
if (indexFromRoot1 < indexFromRoot2 ) {
|
438 |
+
return 1;
|
439 |
+
}
|
440 |
+
if (indexFromRoot1 > indexFromRoot2 ) {
|
441 |
+
return -1;
|
442 |
+
}
|
443 |
+
}
|
444 |
+
if (rule > 1) {
|
445 |
+
//rule 2
|
446 |
+
//prefer higher atomic mass
|
447 |
+
Integer atomicMass1 = a.atom.getIsotope();
|
448 |
+
Integer atomicMass2 = b.atom.getIsotope();
|
449 |
+
if (atomicMass1 != null && atomicMass2 == null) {
|
450 |
+
return 1;
|
451 |
+
}
|
452 |
+
else if (atomicMass1 == null && atomicMass2 != null) {
|
453 |
+
return -1;
|
454 |
+
}
|
455 |
+
else if (atomicMass1 != null && atomicMass2 != null) {
|
456 |
+
if (atomicMass1 > atomicMass2) {
|
457 |
+
return 1;
|
458 |
+
}
|
459 |
+
else if (atomicMass1 < atomicMass2) {
|
460 |
+
return -1;
|
461 |
+
}
|
462 |
+
}
|
463 |
+
}
|
464 |
+
|
465 |
+
}
|
466 |
+
return 0;
|
467 |
+
}
|
468 |
+
}
|
469 |
+
|
470 |
+
}
|
TransAntivirus/download_pubchem/opsin-master/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/ComponentGenerationException.java
ADDED
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
package uk.ac.cam.ch.wwmm.opsin;
|
2 |
+
|
3 |
+
/**Thrown during component generation.
|
4 |
+
*
|
5 |
+
* @author ptc24
|
6 |
+
*
|
7 |
+
*/
|
8 |
+
class ComponentGenerationException extends Exception {
|
9 |
+
|
10 |
+
private static final long serialVersionUID = 1L;
|
11 |
+
|
12 |
+
ComponentGenerationException() {
|
13 |
+
super();
|
14 |
+
}
|
15 |
+
|
16 |
+
ComponentGenerationException(String message) {
|
17 |
+
super(message);
|
18 |
+
}
|
19 |
+
|
20 |
+
ComponentGenerationException(String message, Throwable cause) {
|
21 |
+
super(message, cause);
|
22 |
+
}
|
23 |
+
|
24 |
+
ComponentGenerationException(Throwable cause) {
|
25 |
+
super(cause);
|
26 |
+
}
|
27 |
+
|
28 |
+
}
|
TransAntivirus/download_pubchem/opsin-master/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/ComponentGenerator.java
ADDED
The diff for this file is too large to render.
See raw diff
|
|
TransAntivirus/download_pubchem/opsin-master/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/ComponentProcessor.java
ADDED
The diff for this file is too large to render.
See raw diff
|
|
TransAntivirus/download_pubchem/opsin-master/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/CycleDetector.java
ADDED
@@ -0,0 +1,128 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
package uk.ac.cam.ch.wwmm.opsin;
|
2 |
+
|
3 |
+
import java.util.ArrayDeque;
|
4 |
+
import java.util.ArrayList;
|
5 |
+
import java.util.Deque;
|
6 |
+
import java.util.LinkedHashSet;
|
7 |
+
import java.util.List;
|
8 |
+
import java.util.Set;
|
9 |
+
|
10 |
+
/**
|
11 |
+
* Assigns whether atoms are in rings or not
|
12 |
+
* @author dl387
|
13 |
+
*
|
14 |
+
*/
|
15 |
+
class CycleDetector {
|
16 |
+
|
17 |
+
/**
|
18 |
+
* Performs a depth first search for rings hence assigning whether atoms are in rings or not
|
19 |
+
* This is necessary for deciding the applicability, and in some cases meaning, of suffixes and to determine what atoms are capable of having spare valency
|
20 |
+
* Fragments made of disconnected sections are supported
|
21 |
+
* @param frag
|
22 |
+
*/
|
23 |
+
static void assignWhetherAtomsAreInCycles(Fragment frag) {
|
24 |
+
List<Atom> atomList = frag.getAtomList();
|
25 |
+
for (Atom atom : atomList) {
|
26 |
+
atom.setAtomIsInACycle(false);
|
27 |
+
atom.setProperty(Atom.VISITED, null);
|
28 |
+
}
|
29 |
+
for (Atom a : atomList) {//as OPSIN does not disallow disconnected sections within a single "fragment" (e.g. in suffixes) for vigorousness this for loop is required
|
30 |
+
if(a.getProperty(Atom.VISITED) == null){//true for only the first atom in a fully connected molecule
|
31 |
+
traverseRings(a, null, 0);
|
32 |
+
}
|
33 |
+
}
|
34 |
+
}
|
35 |
+
|
36 |
+
private static int traverseRings(Atom currentAtom, Atom previousAtom, int depth){
|
37 |
+
Integer previouslyAssignedDepth = currentAtom.getProperty(Atom.VISITED);
|
38 |
+
if(previouslyAssignedDepth != null){
|
39 |
+
return previouslyAssignedDepth;
|
40 |
+
}
|
41 |
+
currentAtom.setProperty(Atom.VISITED, depth);
|
42 |
+
List<Atom> equivalentAtoms = new ArrayList<>();
|
43 |
+
equivalentAtoms.add(currentAtom);
|
44 |
+
|
45 |
+
List<Atom> neighbours;
|
46 |
+
for(;;) {
|
47 |
+
//Non-recursively process atoms in a chain
|
48 |
+
//add the atoms in the chain to equivalentAtoms as either all or none of them are in a ring
|
49 |
+
neighbours = currentAtom.getAtomNeighbours();
|
50 |
+
neighbours.remove(previousAtom);
|
51 |
+
if (neighbours.size() != 1) {
|
52 |
+
break;
|
53 |
+
}
|
54 |
+
Atom nextAtom = neighbours.get(0);
|
55 |
+
if (nextAtom.getProperty(Atom.VISITED) != null) {
|
56 |
+
//chain reached a previously visited atom, must be a ring
|
57 |
+
break;
|
58 |
+
}
|
59 |
+
previousAtom = currentAtom;
|
60 |
+
currentAtom = nextAtom;
|
61 |
+
equivalentAtoms.add(currentAtom);
|
62 |
+
currentAtom.setProperty(Atom.VISITED, ++depth);
|
63 |
+
}
|
64 |
+
|
65 |
+
int result = depth + 1;
|
66 |
+
for (Atom neighbour : neighbours) {
|
67 |
+
int temp = traverseRings(neighbour, currentAtom, depth + 1);
|
68 |
+
result = Math.min(result, temp);
|
69 |
+
}
|
70 |
+
if (result < depth){
|
71 |
+
for (Atom a : equivalentAtoms) {
|
72 |
+
a.setAtomIsInACycle(true);
|
73 |
+
}
|
74 |
+
} else if (result == depth) {
|
75 |
+
currentAtom.setAtomIsInACycle(true);
|
76 |
+
}
|
77 |
+
return result;
|
78 |
+
}
|
79 |
+
|
80 |
+
private static class PathSearchState{
|
81 |
+
final Atom currentAtom;
|
82 |
+
final List<Atom> orderAtomsVisited;
|
83 |
+
public PathSearchState(Atom currentAtom, List<Atom> orderAtomsVisited ) {
|
84 |
+
this.currentAtom = currentAtom;
|
85 |
+
this.orderAtomsVisited = orderAtomsVisited;
|
86 |
+
}
|
87 |
+
Atom getCurrentAtom() {
|
88 |
+
return currentAtom;
|
89 |
+
}
|
90 |
+
List<Atom> getOrderAtomsVisited() {
|
91 |
+
return orderAtomsVisited;
|
92 |
+
}
|
93 |
+
}
|
94 |
+
|
95 |
+
/**
|
96 |
+
* Attempts to find paths from a1 to a2 using only the given bonds
|
97 |
+
* @param a1
|
98 |
+
* @param a2
|
99 |
+
* @param peripheryBonds
|
100 |
+
* @return
|
101 |
+
*/
|
102 |
+
static List<List<Atom>> getPathBetweenAtomsUsingBonds(Atom a1, Atom a2, Set<Bond> peripheryBonds){
|
103 |
+
List<List<Atom>> paths = new ArrayList<>();
|
104 |
+
Deque<PathSearchState> stateStack = new ArrayDeque<>();
|
105 |
+
stateStack.add(new PathSearchState(a1, new ArrayList<>()));
|
106 |
+
while (stateStack.size()>0){
|
107 |
+
PathSearchState state =stateStack.removeLast();//depth first traversal
|
108 |
+
List<Atom> orderAtomsVisited = state.getOrderAtomsVisited();
|
109 |
+
Atom nextAtom = state.getCurrentAtom();
|
110 |
+
orderAtomsVisited.add(nextAtom);
|
111 |
+
Set<Bond> neighbourBonds = new LinkedHashSet<>(nextAtom.getBonds());
|
112 |
+
neighbourBonds.retainAll(peripheryBonds);
|
113 |
+
for (Bond neighbourBond : neighbourBonds) {
|
114 |
+
Atom neighbour = neighbourBond.getOtherAtom(nextAtom);
|
115 |
+
if (orderAtomsVisited.contains(neighbour)){//atom already visited by this path
|
116 |
+
continue;
|
117 |
+
}
|
118 |
+
if (neighbour ==a2 ){//target atom found
|
119 |
+
paths.add(new ArrayList<>(orderAtomsVisited.subList(1, orderAtomsVisited.size())));
|
120 |
+
}
|
121 |
+
else{//add atom to stack, its neighbours will be recursively investigated shortly
|
122 |
+
stateStack.add(new PathSearchState(neighbour, new ArrayList<>(orderAtomsVisited)));
|
123 |
+
}
|
124 |
+
}
|
125 |
+
}
|
126 |
+
return paths;
|
127 |
+
}
|
128 |
+
}
|
TransAntivirus/download_pubchem/opsin-master/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/CyclicAtomList.java
ADDED
@@ -0,0 +1,140 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
package uk.ac.cam.ch.wwmm.opsin;
|
2 |
+
|
3 |
+
import java.util.List;
|
4 |
+
|
5 |
+
/**
|
6 |
+
* Convenience class for iterating over a list of atoms that form a ring
|
7 |
+
* Doing getNext when the index is the final atom in the list will return the first atom
|
8 |
+
* Doing getPrevious when the index is the first atom in the list will return the final atom
|
9 |
+
* @author dl387
|
10 |
+
*
|
11 |
+
*/
|
12 |
+
class CyclicAtomList{
|
13 |
+
private int index = -1;
|
14 |
+
private final List<Atom> atomList;
|
15 |
+
|
16 |
+
/**
|
17 |
+
* Construct a cyclicAtomList from an atomList
|
18 |
+
* Index defaults to -1
|
19 |
+
* @param atomList
|
20 |
+
*/
|
21 |
+
CyclicAtomList(List<Atom> atomList) {
|
22 |
+
this.atomList = atomList;
|
23 |
+
}
|
24 |
+
|
25 |
+
/**
|
26 |
+
* Construct a cyclicAtomList from an atomList
|
27 |
+
* The second parameter sets the current index
|
28 |
+
* @param atomList
|
29 |
+
* @param index
|
30 |
+
*/
|
31 |
+
CyclicAtomList(List<Atom> atomList, int index) {
|
32 |
+
this.atomList = atomList;
|
33 |
+
setIndex(index);
|
34 |
+
}
|
35 |
+
|
36 |
+
/**
|
37 |
+
* Returns the number of elements in this list. If this list contains more
|
38 |
+
* than <tt>Integer.MAX_VALUE</tt> elements, returns
|
39 |
+
* <tt>Integer.MAX_VALUE</tt>.
|
40 |
+
*
|
41 |
+
* @return the number of elements in this list
|
42 |
+
*/
|
43 |
+
int size() {
|
44 |
+
return atomList.size();
|
45 |
+
}
|
46 |
+
|
47 |
+
/**
|
48 |
+
* Returns the atom at the specified position in this list.
|
49 |
+
* @param index index of the element to return
|
50 |
+
* @return Atom the atom at the specified position in this list
|
51 |
+
* @throws IndexOutOfBoundsException - if the index is out of range (index < 0 || index >= size())
|
52 |
+
*/
|
53 |
+
Atom get(int index) throws IndexOutOfBoundsException {
|
54 |
+
return atomList.get(index);
|
55 |
+
}
|
56 |
+
|
57 |
+
/**
|
58 |
+
* Return the current index in the list
|
59 |
+
* @return
|
60 |
+
*/
|
61 |
+
int getIndex() {
|
62 |
+
return index;
|
63 |
+
}
|
64 |
+
|
65 |
+
/**
|
66 |
+
* Set the current index
|
67 |
+
* @param index
|
68 |
+
*/
|
69 |
+
void setIndex(int index) {
|
70 |
+
if (index >= atomList.size()){
|
71 |
+
throw new IllegalArgumentException("Specified index is not within ringAtom list");
|
72 |
+
}
|
73 |
+
this.index = index;
|
74 |
+
}
|
75 |
+
|
76 |
+
/**
|
77 |
+
* Increments and returns the atom at the new index in the list (next atom)
|
78 |
+
* When the index is the final atom in the list will return the first atom
|
79 |
+
* @return
|
80 |
+
*/
|
81 |
+
Atom next() {
|
82 |
+
int tempIndex = index + 1;
|
83 |
+
if (tempIndex >= atomList.size()){
|
84 |
+
tempIndex = 0;
|
85 |
+
}
|
86 |
+
index = tempIndex;
|
87 |
+
return atomList.get(index);
|
88 |
+
}
|
89 |
+
|
90 |
+
/**
|
91 |
+
* Decrements and returns the atom at the new index in the list (previous atom)
|
92 |
+
* when the index is the first atom in the list will return the final atom
|
93 |
+
* @return
|
94 |
+
*/
|
95 |
+
Atom previous() {
|
96 |
+
int tempIndex = index - 1;
|
97 |
+
if (tempIndex < 0){
|
98 |
+
tempIndex = atomList.size() -1 ;
|
99 |
+
}
|
100 |
+
index = tempIndex;
|
101 |
+
return atomList.get(index);
|
102 |
+
}
|
103 |
+
|
104 |
+
/**
|
105 |
+
* Returns the next atom in the list
|
106 |
+
* When the index is the final atom in the list will return the first atom
|
107 |
+
* Doesn't effect the list
|
108 |
+
* @return
|
109 |
+
*/
|
110 |
+
Atom peekNext() {
|
111 |
+
int tempIndex = index + 1;
|
112 |
+
if (tempIndex >= atomList.size()){
|
113 |
+
tempIndex = 0;
|
114 |
+
}
|
115 |
+
return atomList.get(tempIndex);
|
116 |
+
}
|
117 |
+
|
118 |
+
/**
|
119 |
+
* Returns the previous atom in the list
|
120 |
+
* when the index is the first atom in the list will return the final atom
|
121 |
+
* Doesn't effect the list
|
122 |
+
* @return
|
123 |
+
*/
|
124 |
+
Atom peekPrevious() {
|
125 |
+
int tempIndex = index - 1;
|
126 |
+
if (tempIndex < 0){
|
127 |
+
tempIndex = atomList.size() -1 ;
|
128 |
+
}
|
129 |
+
return atomList.get(tempIndex);
|
130 |
+
}
|
131 |
+
|
132 |
+
/**
|
133 |
+
* Returns the atom corresponding to the current index
|
134 |
+
* Note that CycliAtomLists have a default index of -1
|
135 |
+
* @return
|
136 |
+
*/
|
137 |
+
Atom getCurrent() {
|
138 |
+
return atomList.get(index);
|
139 |
+
}
|
140 |
+
}
|
TransAntivirus/download_pubchem/opsin-master/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/Element.java
ADDED
@@ -0,0 +1,229 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
package uk.ac.cam.ch.wwmm.opsin;
|
2 |
+
|
3 |
+
import java.util.ArrayList;
|
4 |
+
import java.util.List;
|
5 |
+
|
6 |
+
abstract class Element {
|
7 |
+
|
8 |
+
protected String name;
|
9 |
+
protected Element parent = null;
|
10 |
+
protected final List<Attribute> attributes = new ArrayList<>();
|
11 |
+
|
12 |
+
Element(String name) {
|
13 |
+
this.name = name;
|
14 |
+
}
|
15 |
+
|
16 |
+
void addAttribute(Attribute attribute) {
|
17 |
+
attributes.add(attribute);
|
18 |
+
}
|
19 |
+
|
20 |
+
void addAttribute(String atrName, String atrValue) {
|
21 |
+
attributes.add(new Attribute(atrName, atrValue));
|
22 |
+
}
|
23 |
+
|
24 |
+
/**
|
25 |
+
* Adds a child element
|
26 |
+
* @param child
|
27 |
+
*/
|
28 |
+
abstract void addChild(Element child);
|
29 |
+
|
30 |
+
/**
|
31 |
+
* Creates a deep copy with no parent
|
32 |
+
*/
|
33 |
+
abstract Element copy();
|
34 |
+
|
35 |
+
void detach() {
|
36 |
+
if (parent != null) {
|
37 |
+
parent.removeChild(this);
|
38 |
+
}
|
39 |
+
}
|
40 |
+
|
41 |
+
Attribute getAttribute(int index) {
|
42 |
+
return attributes.get(index);
|
43 |
+
}
|
44 |
+
|
45 |
+
/**
|
46 |
+
* Returns the attribute with the given name
|
47 |
+
* or null if the attribute doesn't exist
|
48 |
+
* @param name
|
49 |
+
* @return
|
50 |
+
*/
|
51 |
+
Attribute getAttribute(String name) {
|
52 |
+
for (int i = 0, len = attributes.size(); i < len; i++) {
|
53 |
+
Attribute a = attributes.get(i);
|
54 |
+
if (a.getName().equals(name)) {
|
55 |
+
return a;
|
56 |
+
}
|
57 |
+
}
|
58 |
+
return null;
|
59 |
+
}
|
60 |
+
|
61 |
+
int getAttributeCount() {
|
62 |
+
return attributes.size();
|
63 |
+
}
|
64 |
+
|
65 |
+
/**
|
66 |
+
* Returns the value of the attribute with the given name
|
67 |
+
* or null if the attribute doesn't exist
|
68 |
+
* @param name
|
69 |
+
* @return
|
70 |
+
*/
|
71 |
+
String getAttributeValue(String name) {
|
72 |
+
Attribute attribute = getAttribute(name);
|
73 |
+
if (attribute != null) {
|
74 |
+
return attribute.getValue();
|
75 |
+
}
|
76 |
+
return null;
|
77 |
+
}
|
78 |
+
|
79 |
+
/**
|
80 |
+
* Returns the child at the given index in the children list
|
81 |
+
* @param index
|
82 |
+
* @return
|
83 |
+
*/
|
84 |
+
abstract Element getChild(int index);
|
85 |
+
|
86 |
+
/**
|
87 |
+
* Returns the number of children
|
88 |
+
* @return
|
89 |
+
*/
|
90 |
+
abstract int getChildCount();
|
91 |
+
|
92 |
+
/**
|
93 |
+
* Returns a copy of the child elements
|
94 |
+
*
|
95 |
+
* @return
|
96 |
+
*/
|
97 |
+
abstract List<Element> getChildElements();
|
98 |
+
|
99 |
+
/**
|
100 |
+
* Gets child elements with this name (in iteration order)
|
101 |
+
* @param name
|
102 |
+
* @return
|
103 |
+
*/
|
104 |
+
abstract List<Element> getChildElements(String name);
|
105 |
+
|
106 |
+
/**
|
107 |
+
* Returns the first child element with the specified name
|
108 |
+
*
|
109 |
+
* @param name
|
110 |
+
* @return
|
111 |
+
*/
|
112 |
+
abstract Element getFirstChildElement(String name);
|
113 |
+
|
114 |
+
/**
|
115 |
+
* Returns the fragment associated with this element (only applicable to tokens)
|
116 |
+
* @return
|
117 |
+
*/
|
118 |
+
Fragment getFrag() {
|
119 |
+
throw new UnsupportedOperationException("Only tokens can have associated fragments");
|
120 |
+
}
|
121 |
+
|
122 |
+
String getName() {
|
123 |
+
return name;
|
124 |
+
}
|
125 |
+
|
126 |
+
Element getParent() {
|
127 |
+
return this.parent;
|
128 |
+
}
|
129 |
+
|
130 |
+
abstract String getValue();
|
131 |
+
|
132 |
+
/**
|
133 |
+
* Returns the index of the given child in the children list (or -1 if it isn't a child)
|
134 |
+
* @param child
|
135 |
+
* @return
|
136 |
+
*/
|
137 |
+
abstract int indexOf(Element child);
|
138 |
+
|
139 |
+
/**
|
140 |
+
* Inserts the element at the given index in the children list
|
141 |
+
* @param child
|
142 |
+
* @param index
|
143 |
+
*/
|
144 |
+
abstract void insertChild(Element child, int index);
|
145 |
+
|
146 |
+
boolean removeAttribute(Attribute attribute) {
|
147 |
+
return attributes.remove(attribute);
|
148 |
+
}
|
149 |
+
|
150 |
+
/**
|
151 |
+
* Removes the given child element
|
152 |
+
* @param child
|
153 |
+
* @return
|
154 |
+
*/
|
155 |
+
abstract boolean removeChild(Element child);
|
156 |
+
|
157 |
+
/**
|
158 |
+
* Removes the element at the given index in the children list
|
159 |
+
* @param index
|
160 |
+
* @return
|
161 |
+
*/
|
162 |
+
abstract Element removeChild(int index);
|
163 |
+
|
164 |
+
/**
|
165 |
+
* Replaces a child element with another element
|
166 |
+
* @param oldChild
|
167 |
+
* @param newChild
|
168 |
+
*/
|
169 |
+
abstract void replaceChild(Element oldChild, Element newChild);
|
170 |
+
|
171 |
+
/**
|
172 |
+
* Sets the fragment associated with this element (only applicable to tokens!)
|
173 |
+
* @param frag
|
174 |
+
*/
|
175 |
+
void setFrag(Fragment frag) {
|
176 |
+
throw new UnsupportedOperationException("Only tokens can have associated fragments");
|
177 |
+
}
|
178 |
+
|
179 |
+
void setName(String name) {
|
180 |
+
this.name = name;
|
181 |
+
}
|
182 |
+
|
183 |
+
void setParent(Element newParentEl) {
|
184 |
+
this.parent = newParentEl;
|
185 |
+
}
|
186 |
+
|
187 |
+
abstract void setValue(String text);
|
188 |
+
|
189 |
+
public String toString() {
|
190 |
+
return toXML();
|
191 |
+
}
|
192 |
+
|
193 |
+
String toXML() {
|
194 |
+
return toXML(0).toString();
|
195 |
+
}
|
196 |
+
|
197 |
+
private StringBuilder toXML(int indent) {
|
198 |
+
StringBuilder result = new StringBuilder();
|
199 |
+
for (int i = 0; i < indent; i++) {
|
200 |
+
result.append(" ");
|
201 |
+
}
|
202 |
+
result.append('<');
|
203 |
+
result.append(name);
|
204 |
+
for (Attribute atr : attributes) {
|
205 |
+
result.append(' ');
|
206 |
+
result.append(atr.toXML());
|
207 |
+
}
|
208 |
+
result.append('>');
|
209 |
+
if (getChildCount() > 0){
|
210 |
+
for (Element child : getChildElements()) {
|
211 |
+
result.append(OpsinTools.NEWLINE);
|
212 |
+
result.append(child.toXML(indent + 1));
|
213 |
+
}
|
214 |
+
result.append(OpsinTools.NEWLINE);
|
215 |
+
for (int i = 0; i < indent; i++) {
|
216 |
+
result.append(" ");
|
217 |
+
}
|
218 |
+
}
|
219 |
+
else{
|
220 |
+
result.append(getValue());
|
221 |
+
}
|
222 |
+
result.append("</");
|
223 |
+
result.append(name);
|
224 |
+
result.append('>');
|
225 |
+
|
226 |
+
return result;
|
227 |
+
}
|
228 |
+
|
229 |
+
}
|
TransAntivirus/download_pubchem/opsin-master/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/Fragment.java
ADDED
@@ -0,0 +1,633 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
package uk.ac.cam.ch.wwmm.opsin;
|
2 |
+
|
3 |
+
import java.util.ArrayList;
|
4 |
+
import java.util.Collection;
|
5 |
+
import java.util.Collections;
|
6 |
+
import java.util.HashMap;
|
7 |
+
import java.util.Iterator;
|
8 |
+
import java.util.LinkedHashMap;
|
9 |
+
import java.util.LinkedHashSet;
|
10 |
+
import java.util.List;
|
11 |
+
import java.util.Map;
|
12 |
+
import java.util.Set;
|
13 |
+
import java.util.regex.Matcher;
|
14 |
+
|
15 |
+
import static uk.ac.cam.ch.wwmm.opsin.OpsinTools.*;
|
16 |
+
import static uk.ac.cam.ch.wwmm.opsin.XmlDeclarations.*;
|
17 |
+
|
18 |
+
/**A fragment of a molecule, holds bonds and atoms.
|
19 |
+
*
|
20 |
+
* @author ptc24
|
21 |
+
* @author dl387
|
22 |
+
*
|
23 |
+
*/
|
24 |
+
class Fragment implements Iterable<Atom> {
|
25 |
+
|
26 |
+
/**A mapping between IDs and the atoms in this fragment, by default is ordered by the order atoms are added to the fragment*/
|
27 |
+
private final Map<Integer, Atom> atomMapFromId = new LinkedHashMap<>();
|
28 |
+
|
29 |
+
/**Equivalent to and synced to atomMapFromId.values() */
|
30 |
+
private final Collection<Atom> atomCollection = atomMapFromId.values();
|
31 |
+
|
32 |
+
/**A mapping between locants and the atoms in this fragment*/
|
33 |
+
private final Map<String, Atom> atomMapFromLocant = new HashMap<>();
|
34 |
+
|
35 |
+
/**The bonds in the fragment*/
|
36 |
+
private final Set<Bond> bondSet = new LinkedHashSet<>();
|
37 |
+
|
38 |
+
/**The associated token element*/
|
39 |
+
private Element tokenEl;
|
40 |
+
|
41 |
+
/**The atoms that are used when this fragment is connected to another fragment. Unused outAtoms means that the fragment is a radical or an error has occurred
|
42 |
+
* Initially empty */
|
43 |
+
private final List<OutAtom> outAtoms = new ArrayList<>();
|
44 |
+
|
45 |
+
/**The atoms that are used on this fragment to form things like esters
|
46 |
+
* Initially empty */
|
47 |
+
private final List<FunctionalAtom> functionalAtoms = new ArrayList<>();
|
48 |
+
|
49 |
+
/**The atom that fragments connecting to this fragment should connect to in preference
|
50 |
+
* e.g. for amino acids the alpha amino group
|
51 |
+
* Null by default*/
|
52 |
+
private Atom defaultInAtom = null;
|
53 |
+
|
54 |
+
/**The atoms in the fragment that have been indicated to have hydrogen at the SMILES level.*/
|
55 |
+
private final List<Atom> indicatedHydrogen = new ArrayList<>();
|
56 |
+
|
57 |
+
/**Pseudo atoms indicating start and end of polymer structure repeat unit*/
|
58 |
+
private List<Atom> polymerAttachmentPoints = null;
|
59 |
+
|
60 |
+
/**
|
61 |
+
* DO NOT CALL DIRECTLY EXCEPT FOR TESTING
|
62 |
+
* Makes an empty Fragment associated with the given tokenEl
|
63 |
+
* @param tokenEl
|
64 |
+
*/
|
65 |
+
Fragment(Element tokenEl) {
|
66 |
+
this.tokenEl = tokenEl;
|
67 |
+
}
|
68 |
+
|
69 |
+
/**
|
70 |
+
* DO NOT CALL DIRECTLY EXCEPT FOR TESTING
|
71 |
+
* Makes an empty Fragment with the given type
|
72 |
+
*
|
73 |
+
* @param type
|
74 |
+
*/
|
75 |
+
Fragment(String type) {
|
76 |
+
this.tokenEl = new TokenEl("");
|
77 |
+
this.tokenEl.addAttribute(TYPE_ATR, type);
|
78 |
+
}
|
79 |
+
|
80 |
+
/**Adds an atom to the fragment and associates it with this fragment*/
|
81 |
+
void addAtom(Atom atom) {
|
82 |
+
List<String> locants =atom.getLocants();
|
83 |
+
for (String locant: locants) {
|
84 |
+
atomMapFromLocant.put(locant, atom);
|
85 |
+
}
|
86 |
+
atomMapFromId.put(atom.getID(), atom);
|
87 |
+
atom.setFrag(this);
|
88 |
+
}
|
89 |
+
|
90 |
+
/**
|
91 |
+
* Return the number of atoms in the fragment
|
92 |
+
* @return
|
93 |
+
*/
|
94 |
+
int getAtomCount() {
|
95 |
+
return atomCollection.size();
|
96 |
+
}
|
97 |
+
|
98 |
+
/**
|
99 |
+
* Returns a copy of the fragment's atoms
|
100 |
+
* @return
|
101 |
+
*/
|
102 |
+
List<Atom> getAtomList() {
|
103 |
+
return new ArrayList<>(atomCollection);
|
104 |
+
}
|
105 |
+
|
106 |
+
|
107 |
+
/**
|
108 |
+
* Adds a bond to the fragment.
|
109 |
+
* @param bond
|
110 |
+
*/
|
111 |
+
void addBond(Bond bond) {
|
112 |
+
bondSet.add(bond);
|
113 |
+
}
|
114 |
+
|
115 |
+
/**Removes a bond to the fragment if it is present.
|
116 |
+
* @param bond
|
117 |
+
* @return*/
|
118 |
+
boolean removeBond(Bond bond) {
|
119 |
+
return bondSet.remove(bond);
|
120 |
+
}
|
121 |
+
|
122 |
+
/**Gets bondSet.*/
|
123 |
+
Set<Bond> getBondSet() {
|
124 |
+
return Collections.unmodifiableSet(bondSet);
|
125 |
+
}
|
126 |
+
|
127 |
+
/**Gets the id of the atom in the fragment with the specified locant.
|
128 |
+
*
|
129 |
+
* @param locant The locant to look for
|
130 |
+
* @return The id of the found atom, or 0 if it is not found
|
131 |
+
*/
|
132 |
+
int getIDFromLocant(String locant) {
|
133 |
+
Atom a = getAtomByLocant(locant);
|
134 |
+
if (a != null){
|
135 |
+
return a.getID();
|
136 |
+
}
|
137 |
+
return 0;
|
138 |
+
}
|
139 |
+
|
140 |
+
/**Gets the id of the atom in the fragment with the specified locant, throwing if this fails.
|
141 |
+
*
|
142 |
+
* @param locant The locant to look for
|
143 |
+
* @return The id of the found atom
|
144 |
+
* @throws StructureBuildingException
|
145 |
+
*/
|
146 |
+
int getIDFromLocantOrThrow(String locant) throws StructureBuildingException {
|
147 |
+
int id = getIDFromLocant(locant);
|
148 |
+
if(id == 0) {
|
149 |
+
throw new StructureBuildingException("Couldn't find id from locant " + locant + ".");
|
150 |
+
}
|
151 |
+
return id;
|
152 |
+
}
|
153 |
+
|
154 |
+
/**Gets the atom in the fragment with the specified locant.
|
155 |
+
*
|
156 |
+
* @param locant The locant to look for
|
157 |
+
* @return The found atom, or null if it is not found
|
158 |
+
*/
|
159 |
+
Atom getAtomByLocant(String locant) {
|
160 |
+
Atom a =atomMapFromLocant.get(locant);
|
161 |
+
if (a != null){
|
162 |
+
return a;
|
163 |
+
}
|
164 |
+
Matcher m =MATCH_AMINOACID_STYLE_LOCANT.matcher(locant);
|
165 |
+
if (m.matches()){//e.g. N5
|
166 |
+
Atom backboneAtom =atomMapFromLocant.get(m.group(3));//the atom corresponding to the numeric or greek component
|
167 |
+
if (backboneAtom==null){
|
168 |
+
return null;
|
169 |
+
}
|
170 |
+
a = FragmentTools.getAtomByAminoAcidStyleLocant(backboneAtom, m.group(1), m.group(2));
|
171 |
+
if (a != null){
|
172 |
+
return a;
|
173 |
+
}
|
174 |
+
}
|
175 |
+
return null;
|
176 |
+
}
|
177 |
+
|
178 |
+
/**Gets the atom in the fragment with the specified locant, throwing if this fails.
|
179 |
+
*
|
180 |
+
* @param locant The locant to look for
|
181 |
+
* @return The found atom
|
182 |
+
* @throws StructureBuildingException
|
183 |
+
*/
|
184 |
+
Atom getAtomByLocantOrThrow(String locant) throws StructureBuildingException {
|
185 |
+
Atom a = getAtomByLocant(locant);
|
186 |
+
if(a == null) {
|
187 |
+
throw new StructureBuildingException("Could not find the atom with locant " + locant + ".");
|
188 |
+
}
|
189 |
+
return a;
|
190 |
+
}
|
191 |
+
|
192 |
+
/**Gets the atom in the fragment with the specified ID.
|
193 |
+
*
|
194 |
+
* @param id The id of the atom.
|
195 |
+
* @return The found atom, or null.
|
196 |
+
*/
|
197 |
+
Atom getAtomByID(int id) {
|
198 |
+
return atomMapFromId.get(id);
|
199 |
+
}
|
200 |
+
|
201 |
+
/**Gets the atom in the fragment with the specified ID, throwing if this fails.
|
202 |
+
*
|
203 |
+
* @param id The id of the atom.
|
204 |
+
* @return The found atom
|
205 |
+
* @throws StructureBuildingException
|
206 |
+
*/
|
207 |
+
Atom getAtomByIDOrThrow(int id) throws StructureBuildingException {
|
208 |
+
Atom a = getAtomByID(id);
|
209 |
+
if(a == null) {
|
210 |
+
throw new StructureBuildingException("Couldn't find atom with id " + id + ".");
|
211 |
+
}
|
212 |
+
return a;
|
213 |
+
}
|
214 |
+
|
215 |
+
/**Finds a bond between two specified atoms the first of which must be within the fragment
|
216 |
+
*
|
217 |
+
* @param ID1 The id of one atom
|
218 |
+
* @param ID2 The id of the other atom
|
219 |
+
* @return The bond found, or null
|
220 |
+
*/
|
221 |
+
Bond findBond(int ID1, int ID2) {
|
222 |
+
Atom a = atomMapFromId.get(ID1);
|
223 |
+
if (a != null){
|
224 |
+
for (Bond b : a.getBonds()) {
|
225 |
+
if((b.getFrom() == ID1 && b.getTo() == ID2) ||
|
226 |
+
(b.getTo() == ID1 && b.getFrom() == ID2)) {
|
227 |
+
return b;
|
228 |
+
}
|
229 |
+
}
|
230 |
+
}
|
231 |
+
return null;
|
232 |
+
}
|
233 |
+
|
234 |
+
/**Finds a bond between two specified atoms the first of which must be within the fragment, throwing if it fails.
|
235 |
+
*
|
236 |
+
* @param ID1 The id of one atom
|
237 |
+
* @param ID2 The id of the other atom
|
238 |
+
* @return The bond found
|
239 |
+
* @throws StructureBuildingException
|
240 |
+
*/
|
241 |
+
Bond findBondOrThrow(int ID1, int ID2) throws StructureBuildingException {
|
242 |
+
Bond b = findBond(ID1, ID2);
|
243 |
+
if(b == null) {
|
244 |
+
throw new StructureBuildingException("Couldn't find specified bond");
|
245 |
+
}
|
246 |
+
return b;
|
247 |
+
}
|
248 |
+
|
249 |
+
/**Works out how many atoms there are in the fragment there are
|
250 |
+
* with consecutive locants, starting from 1 that are in a chain
|
251 |
+
*
|
252 |
+
* @return The number of atoms in the locant chain
|
253 |
+
*/
|
254 |
+
int getChainLength() {
|
255 |
+
int length = 0;
|
256 |
+
Atom next = getAtomByLocant(Integer.toString(length + 1));
|
257 |
+
Atom previous = null;
|
258 |
+
while (next != null){
|
259 |
+
if (previous != null && previous.getBondToAtom(next) == null){
|
260 |
+
break;
|
261 |
+
}
|
262 |
+
length++;
|
263 |
+
previous = next;
|
264 |
+
next = getAtomByLocant(Integer.toString(length + 1));
|
265 |
+
}
|
266 |
+
return length;
|
267 |
+
}
|
268 |
+
|
269 |
+
/**
|
270 |
+
* Gets the type of the corresponding tokenEl
|
271 |
+
* Returns "" if undefined
|
272 |
+
* @return
|
273 |
+
*/
|
274 |
+
String getType() {
|
275 |
+
String type = tokenEl.getAttributeValue(TYPE_ATR);
|
276 |
+
return type != null ? type : "";
|
277 |
+
}
|
278 |
+
|
279 |
+
/**
|
280 |
+
* Gets the subType of the corresponding tokenEl
|
281 |
+
* Returns "" if undefined
|
282 |
+
* @return
|
283 |
+
*/
|
284 |
+
String getSubType() {
|
285 |
+
String subType = tokenEl.getAttributeValue(SUBTYPE_ATR);
|
286 |
+
return subType != null ? subType : "";
|
287 |
+
}
|
288 |
+
|
289 |
+
/**
|
290 |
+
* Gets the associate tokenEl
|
291 |
+
* Whether or not this is a real token can be tested by whether it has a parent
|
292 |
+
* @return
|
293 |
+
*/
|
294 |
+
Element getTokenEl() {
|
295 |
+
return tokenEl;
|
296 |
+
}
|
297 |
+
|
298 |
+
/**
|
299 |
+
* Sets the associated tokenEl
|
300 |
+
* Type/subType are inherited from the tokenEl
|
301 |
+
* @param tokenEl
|
302 |
+
*/
|
303 |
+
void setTokenEl(Element tokenEl) {
|
304 |
+
this.tokenEl = tokenEl;
|
305 |
+
}
|
306 |
+
|
307 |
+
/**
|
308 |
+
* How many OutAtoms (i.e. radicals) are associated with this fragment
|
309 |
+
* @return
|
310 |
+
*/
|
311 |
+
int getOutAtomCount() {
|
312 |
+
return outAtoms.size();
|
313 |
+
}
|
314 |
+
|
315 |
+
/**
|
316 |
+
* Gets the outAtom at a specific index of the outAtoms linkedList
|
317 |
+
* @param i
|
318 |
+
* @return
|
319 |
+
*/
|
320 |
+
OutAtom getOutAtom(int i) {
|
321 |
+
return outAtoms.get(i);
|
322 |
+
}
|
323 |
+
|
324 |
+
/**
|
325 |
+
* Adds an outAtom
|
326 |
+
* @param id
|
327 |
+
* @param valency
|
328 |
+
* @param setExplicitly
|
329 |
+
* @throws StructureBuildingException
|
330 |
+
*/
|
331 |
+
void addOutAtom(int id, int valency, Boolean setExplicitly) throws StructureBuildingException {
|
332 |
+
addOutAtom(getAtomByIDOrThrow(id), valency, setExplicitly);
|
333 |
+
}
|
334 |
+
|
335 |
+
/**
|
336 |
+
* Adds an outAtom
|
337 |
+
* @param atom
|
338 |
+
* @param valency
|
339 |
+
* @param setExplicitly
|
340 |
+
*/
|
341 |
+
void addOutAtom(Atom atom, int valency, Boolean setExplicitly) {
|
342 |
+
outAtoms.add(new OutAtom(atom, valency, setExplicitly));
|
343 |
+
}
|
344 |
+
|
345 |
+
/**
|
346 |
+
* Includes the OutAtoms of a given fragment into this fragment
|
347 |
+
* Note that no OutAtoms are created in doing this
|
348 |
+
* @param frag
|
349 |
+
*/
|
350 |
+
void incorporateOutAtoms(Fragment frag) {
|
351 |
+
outAtoms.addAll(frag.outAtoms);
|
352 |
+
}
|
353 |
+
|
354 |
+
/**
|
355 |
+
* Removes the outAtom at a specific index of the outAtom linkedList
|
356 |
+
* @param i
|
357 |
+
*/
|
358 |
+
void removeOutAtom(int i) {
|
359 |
+
OutAtom removedOutAtom = outAtoms.remove(i);
|
360 |
+
if (removedOutAtom.isSetExplicitly()){
|
361 |
+
removedOutAtom.getAtom().addOutValency(-removedOutAtom.getValency());
|
362 |
+
}
|
363 |
+
}
|
364 |
+
|
365 |
+
/**
|
366 |
+
* Removes the specified outAtom from the outAtoms linkedList
|
367 |
+
* @param outAtom
|
368 |
+
*/
|
369 |
+
void removeOutAtom(OutAtom outAtom) {
|
370 |
+
if (outAtoms.remove(outAtom) && outAtom.isSetExplicitly()){
|
371 |
+
outAtom.getAtom().addOutValency(-outAtom.getValency());
|
372 |
+
}
|
373 |
+
}
|
374 |
+
|
375 |
+
/**
|
376 |
+
* How many functionalAtoms (i.e. locations that can form esters) are associated with this fragment
|
377 |
+
* @return
|
378 |
+
*/
|
379 |
+
int getFunctionalAtomCount() {
|
380 |
+
return functionalAtoms.size();
|
381 |
+
}
|
382 |
+
|
383 |
+
/**
|
384 |
+
* Gets the functionalAtom at a specific index of the functionalAtoms linkedList
|
385 |
+
* @param i
|
386 |
+
* @return
|
387 |
+
*/
|
388 |
+
FunctionalAtom getFunctionalAtom(int i) {
|
389 |
+
return functionalAtoms.get(i);
|
390 |
+
}
|
391 |
+
|
392 |
+
/**Adds a functionalAtom
|
393 |
+
* @param atom*/
|
394 |
+
void addFunctionalAtom(Atom atom) {
|
395 |
+
functionalAtoms.add(new FunctionalAtom(atom));
|
396 |
+
}
|
397 |
+
|
398 |
+
/**
|
399 |
+
* Includes the FunctionalAtoms of a given fragment into this fragment
|
400 |
+
* Note that no FunctionalAtoms are created in doing this
|
401 |
+
* @param frag
|
402 |
+
*/
|
403 |
+
void incorporateFunctionalAtoms(Fragment frag) {
|
404 |
+
functionalAtoms.addAll(frag.functionalAtoms);
|
405 |
+
}
|
406 |
+
|
407 |
+
/**
|
408 |
+
* Removes the functionalAtom at a specific index of the functionalAtoms linkedList
|
409 |
+
* @param i
|
410 |
+
* @return
|
411 |
+
*/
|
412 |
+
FunctionalAtom removeFunctionalAtom(int i) {
|
413 |
+
return functionalAtoms.remove(i);
|
414 |
+
}
|
415 |
+
|
416 |
+
/**
|
417 |
+
* Removes the specified functionalAtom from the functionalAtoms linkedList
|
418 |
+
* @param functionalAtom
|
419 |
+
*/
|
420 |
+
void removeFunctionalAtom(FunctionalAtom functionalAtom) {
|
421 |
+
functionalAtoms.remove(functionalAtom);
|
422 |
+
}
|
423 |
+
|
424 |
+
List<Atom> getPolymerAttachmentPoints() {
|
425 |
+
return polymerAttachmentPoints;
|
426 |
+
}
|
427 |
+
|
428 |
+
void setPolymerAttachmentPoints(List<Atom> polymerAttachmentPoints) {
|
429 |
+
this.polymerAttachmentPoints = polymerAttachmentPoints;
|
430 |
+
}
|
431 |
+
|
432 |
+
/**Gets a list of atoms in the fragment that connect to a specified atom
|
433 |
+
*
|
434 |
+
* @param atom The reference atom
|
435 |
+
* @return The list of atoms connected to the atom
|
436 |
+
*/
|
437 |
+
List<Atom> getIntraFragmentAtomNeighbours(Atom atom) {
|
438 |
+
List<Atom> results = new ArrayList<>(atom.getBondCount());
|
439 |
+
for(Bond b : atom.getBonds()) {
|
440 |
+
Atom otherAtom = b.getOtherAtom(atom);
|
441 |
+
if (otherAtom == null) {
|
442 |
+
throw new RuntimeException("OPSIN Bug: A bond associated with an atom does not involve it");
|
443 |
+
}
|
444 |
+
//If the other atom is in atomMapFromId then it is in this fragment
|
445 |
+
if (atomMapFromId.get(otherAtom.getID()) != null) {
|
446 |
+
results.add(otherAtom);
|
447 |
+
}
|
448 |
+
}
|
449 |
+
return results;
|
450 |
+
}
|
451 |
+
|
452 |
+
/**Calculates the number of bonds connecting to the atom, excluding bonds to implicit
|
453 |
+
* hydrogens. Double bonds count as
|
454 |
+
* two bonds, etc. Eg ethene - both C's have an incoming valency of 2.
|
455 |
+
*
|
456 |
+
* Only bonds to atoms within the fragment are counted. Suffix atoms are excluded
|
457 |
+
*
|
458 |
+
* @param atom
|
459 |
+
* @return Incoming Valency
|
460 |
+
* @throws StructureBuildingException
|
461 |
+
*/
|
462 |
+
int getIntraFragmentIncomingValency(Atom atom) throws StructureBuildingException {
|
463 |
+
int v = 0;
|
464 |
+
for(Bond b : atom.getBonds()) {
|
465 |
+
//recalled atoms will be null if they are not part of this fragment
|
466 |
+
if(b.getFromAtom() == atom) {
|
467 |
+
Atom a =getAtomByID(b.getTo());
|
468 |
+
if (a != null && !a.getType().equals(SUFFIX_TYPE_VAL)){
|
469 |
+
v += b.getOrder();
|
470 |
+
}
|
471 |
+
} else if(b.getToAtom() == atom) {
|
472 |
+
Atom a =getAtomByID(b.getFrom());
|
473 |
+
if (a != null && !a.getType().equals(SUFFIX_TYPE_VAL)){
|
474 |
+
v += b.getOrder();
|
475 |
+
}
|
476 |
+
}
|
477 |
+
else{
|
478 |
+
throw new StructureBuildingException("A bond associated with an atom does not involve it");
|
479 |
+
}
|
480 |
+
}
|
481 |
+
return v;
|
482 |
+
}
|
483 |
+
|
484 |
+
/**
|
485 |
+
* Checks valencies are sensible
|
486 |
+
* @throws StructureBuildingException
|
487 |
+
*/
|
488 |
+
void checkValencies() throws StructureBuildingException {
|
489 |
+
for(Atom a : atomCollection) {
|
490 |
+
if(!ValencyChecker.checkValency(a)) {
|
491 |
+
throw new StructureBuildingException("Atom is in unphysical valency state! Element: " + a.getElement() + " valency: " + a.getIncomingValency());
|
492 |
+
}
|
493 |
+
}
|
494 |
+
}
|
495 |
+
|
496 |
+
/**
|
497 |
+
* Removes an atom from this fragment
|
498 |
+
* @param atom
|
499 |
+
*/
|
500 |
+
void removeAtom(Atom atom) {
|
501 |
+
int atomID =atom.getID();
|
502 |
+
atomMapFromId.remove(atomID);
|
503 |
+
for (String l : atom.getLocants()) {
|
504 |
+
atomMapFromLocant.remove(l);
|
505 |
+
}
|
506 |
+
if (defaultInAtom == atom){
|
507 |
+
defaultInAtom = null;
|
508 |
+
}
|
509 |
+
}
|
510 |
+
/**
|
511 |
+
* Retrieves the overall charge of the fragment by querying all its atoms
|
512 |
+
* @return
|
513 |
+
*/
|
514 |
+
int getCharge() {
|
515 |
+
int charge=0;
|
516 |
+
for (Atom a : atomCollection) {
|
517 |
+
charge+=a.getCharge();
|
518 |
+
}
|
519 |
+
return charge;
|
520 |
+
}
|
521 |
+
|
522 |
+
Atom getDefaultInAtom() {
|
523 |
+
return defaultInAtom;
|
524 |
+
}
|
525 |
+
|
526 |
+
void setDefaultInAtom(Atom inAtom) {
|
527 |
+
this.defaultInAtom = inAtom;
|
528 |
+
}
|
529 |
+
|
530 |
+
Atom getDefaultInAtomOrFirstAtom() {
|
531 |
+
return defaultInAtom != null ? defaultInAtom : getFirstAtom();
|
532 |
+
}
|
533 |
+
|
534 |
+
/**
|
535 |
+
* Adds a mapping between the locant and atom object
|
536 |
+
* @param locant A locant as a string
|
537 |
+
* @param a An atom
|
538 |
+
*/
|
539 |
+
void addMappingToAtomLocantMap(String locant, Atom a){
|
540 |
+
atomMapFromLocant.put(locant, a);
|
541 |
+
}
|
542 |
+
|
543 |
+
/**
|
544 |
+
* Removes a mapping between a locant
|
545 |
+
* @param locant A locant as a string
|
546 |
+
*/
|
547 |
+
void removeMappingFromAtomLocantMap(String locant){
|
548 |
+
atomMapFromLocant.remove(locant);
|
549 |
+
}
|
550 |
+
|
551 |
+
/**
|
552 |
+
* Checks to see whether a locant is present on this fragment
|
553 |
+
* @param locant
|
554 |
+
* @return
|
555 |
+
*/
|
556 |
+
boolean hasLocant(String locant) {
|
557 |
+
return getAtomByLocant(locant) != null;
|
558 |
+
}
|
559 |
+
|
560 |
+
|
561 |
+
/**
|
562 |
+
* Returns an unmodifiable list of the locants associated with this fragment
|
563 |
+
* @return
|
564 |
+
*/
|
565 |
+
Set<String> getLocants() {
|
566 |
+
return Collections.unmodifiableSet(atomMapFromLocant.keySet());
|
567 |
+
}
|
568 |
+
|
569 |
+
List<Atom> getIndicatedHydrogen() {
|
570 |
+
return indicatedHydrogen;
|
571 |
+
}
|
572 |
+
|
573 |
+
void addIndicatedHydrogen(Atom atom) {
|
574 |
+
indicatedHydrogen.add(atom);
|
575 |
+
}
|
576 |
+
|
577 |
+
/**
|
578 |
+
* Returns the id of the first atom in the fragment
|
579 |
+
* @return
|
580 |
+
* @throws StructureBuildingException
|
581 |
+
*/
|
582 |
+
int getIdOfFirstAtom() {
|
583 |
+
return getFirstAtom().getID();
|
584 |
+
}
|
585 |
+
|
586 |
+
/**
|
587 |
+
* Returns the the first atom in the fragment or null if it has no atoms
|
588 |
+
* Typically the first atom will be the first atom that was added to the fragment
|
589 |
+
* @return firstAtom
|
590 |
+
*/
|
591 |
+
Atom getFirstAtom(){
|
592 |
+
Iterator<Atom> atomIterator =atomCollection.iterator();
|
593 |
+
if (atomIterator.hasNext()){
|
594 |
+
return atomIterator.next();
|
595 |
+
}
|
596 |
+
return null;
|
597 |
+
}
|
598 |
+
|
599 |
+
/**
|
600 |
+
* Clears and recreates atomMapFromId (and hence AtomCollection) using the order of the atoms in atomList
|
601 |
+
* @param atomList
|
602 |
+
* @throws StructureBuildingException
|
603 |
+
*/
|
604 |
+
void reorderAtomCollection(List<Atom> atomList) throws StructureBuildingException {
|
605 |
+
if (atomMapFromId.size() != atomList.size()){
|
606 |
+
throw new StructureBuildingException("atom list is not the same size as the number of atoms in the fragment");
|
607 |
+
}
|
608 |
+
atomMapFromId.clear();
|
609 |
+
for (Atom atom : atomList) {
|
610 |
+
atomMapFromId.put(atom.getID(), atom);
|
611 |
+
}
|
612 |
+
}
|
613 |
+
|
614 |
+
/**
|
615 |
+
* Reorders the fragment's internal atomList by the value of the first locant of the atoms
|
616 |
+
* e.g. 1,2,3,3a,3b,4
|
617 |
+
* Used for assuring the correct order of atom iteration when performing ring fusion
|
618 |
+
* @throws StructureBuildingException
|
619 |
+
*/
|
620 |
+
void sortAtomListByLocant() throws StructureBuildingException {
|
621 |
+
List<Atom> atomList =getAtomList();
|
622 |
+
Collections.sort(atomList, new FragmentTools.SortByLocants());
|
623 |
+
reorderAtomCollection(atomList);
|
624 |
+
}
|
625 |
+
|
626 |
+
@Override
|
627 |
+
public Iterator<Atom> iterator() {
|
628 |
+
return atomCollection.iterator();
|
629 |
+
}
|
630 |
+
}
|
631 |
+
|
632 |
+
|
633 |
+
|
TransAntivirus/download_pubchem/opsin-master/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/FragmentManager.java
ADDED
@@ -0,0 +1,767 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
package uk.ac.cam.ch.wwmm.opsin;
|
2 |
+
|
3 |
+
import static uk.ac.cam.ch.wwmm.opsin.XmlDeclarations.*;
|
4 |
+
|
5 |
+
import java.util.ArrayList;
|
6 |
+
import java.util.Collections;
|
7 |
+
import java.util.HashMap;
|
8 |
+
import java.util.LinkedHashMap;
|
9 |
+
import java.util.LinkedHashSet;
|
10 |
+
import java.util.List;
|
11 |
+
import java.util.Map;
|
12 |
+
import java.util.Map.Entry;
|
13 |
+
import java.util.Set;
|
14 |
+
|
15 |
+
/** Holds the Fragments during the construction of the molecule,
|
16 |
+
* handles the building of new fragments and handles the creation/deletion of atoms/bonds
|
17 |
+
*
|
18 |
+
* @author ptc24
|
19 |
+
* @author dl387
|
20 |
+
*
|
21 |
+
*/
|
22 |
+
class FragmentManager {
|
23 |
+
|
24 |
+
/** A mapping between fragments and inter fragment bonds */
|
25 |
+
private final Map<Fragment,Set<Bond>> fragToInterFragmentBond = new LinkedHashMap<>();
|
26 |
+
|
27 |
+
/** All of the atom-containing fragments in the molecule */
|
28 |
+
private final Set<Fragment> fragments = fragToInterFragmentBond.keySet();
|
29 |
+
|
30 |
+
/** A builder for fragments specified as SMILES */
|
31 |
+
private final SMILESFragmentBuilder sBuilder;
|
32 |
+
|
33 |
+
/** A source of unique integers */
|
34 |
+
private final IDManager idManager;
|
35 |
+
|
36 |
+
/** Sets up a new Fragment manager, containing no fragments.
|
37 |
+
*
|
38 |
+
* @param sBuilder A SMILESFragmentBuilder - dependency injection.
|
39 |
+
* @param idManager An IDManager.
|
40 |
+
*/
|
41 |
+
FragmentManager(SMILESFragmentBuilder sBuilder, IDManager idManager) {
|
42 |
+
if (sBuilder == null || idManager == null ){
|
43 |
+
throw new IllegalArgumentException("FragmentManager was parsed a null object in its constructor!");
|
44 |
+
}
|
45 |
+
this.sBuilder = sBuilder;
|
46 |
+
this.idManager = idManager;
|
47 |
+
}
|
48 |
+
|
49 |
+
/** Builds a fragment, based on an SMILES string
|
50 |
+
* The fragment will not correspond to a token
|
51 |
+
*
|
52 |
+
* @param smiles The fragment to build
|
53 |
+
* @return The built fragment
|
54 |
+
* @throws StructureBuildingException
|
55 |
+
*/
|
56 |
+
Fragment buildSMILES(String smiles) throws StructureBuildingException {
|
57 |
+
return buildSMILES(smiles, "", NONE_LABELS_VAL);
|
58 |
+
}
|
59 |
+
|
60 |
+
/** Builds a fragment, based on an SMILES string
|
61 |
+
* The fragment will not correspond to a token
|
62 |
+
*
|
63 |
+
* @param smiles
|
64 |
+
* @param type
|
65 |
+
* @param labelMapping
|
66 |
+
* @return
|
67 |
+
* @throws StructureBuildingException
|
68 |
+
*/
|
69 |
+
Fragment buildSMILES(String smiles, String type, String labelMapping) throws StructureBuildingException {
|
70 |
+
Fragment newFrag = sBuilder.build(smiles, type, labelMapping);
|
71 |
+
addFragment(newFrag);
|
72 |
+
return newFrag;
|
73 |
+
}
|
74 |
+
|
75 |
+
/** Builds a fragment, based on an SMILES string
|
76 |
+
* The fragment will correspond to the given tokenEl
|
77 |
+
*
|
78 |
+
* @param smiles The fragment to build
|
79 |
+
* @param tokenEl The corresponding tokenEl
|
80 |
+
* @param labelMapping How to label the fragment
|
81 |
+
* @return The built fragment
|
82 |
+
* @throws StructureBuildingException
|
83 |
+
*/
|
84 |
+
Fragment buildSMILES(String smiles, Element tokenEl, String labelMapping) throws StructureBuildingException {
|
85 |
+
Fragment newFrag = sBuilder.build(smiles, tokenEl, labelMapping);
|
86 |
+
addFragment(newFrag);
|
87 |
+
return newFrag;
|
88 |
+
}
|
89 |
+
|
90 |
+
/**Creates a new fragment, containing all of the atoms and bonds
|
91 |
+
* of all of the other fragments - i.e. the whole molecule. This updates
|
92 |
+
* which fragments the atoms think they are in to the new super fragment
|
93 |
+
* but does not change the original fragments.
|
94 |
+
* Hence the original fragments remain associated with their atoms
|
95 |
+
* Atoms and Bonds are not copied.
|
96 |
+
*
|
97 |
+
* @return The unified fragment
|
98 |
+
*/
|
99 |
+
Fragment getUnifiedFragment() {
|
100 |
+
Fragment uniFrag = new Fragment("");
|
101 |
+
for (Entry<Fragment, Set<Bond>> entry : fragToInterFragmentBond.entrySet()) {
|
102 |
+
Fragment f = entry.getKey();
|
103 |
+
Set<Bond> interFragmentBonds = entry.getValue();
|
104 |
+
for(Atom atom : f.getAtomList()) {
|
105 |
+
uniFrag.addAtom(atom);
|
106 |
+
}
|
107 |
+
for(Bond bond : f.getBondSet()) {
|
108 |
+
uniFrag.addBond(bond);
|
109 |
+
}
|
110 |
+
uniFrag.incorporateOutAtoms(f);
|
111 |
+
uniFrag.incorporateFunctionalAtoms(f);
|
112 |
+
|
113 |
+
for (Bond interFragmentBond : interFragmentBonds) {
|
114 |
+
uniFrag.addBond(interFragmentBond);
|
115 |
+
}
|
116 |
+
}
|
117 |
+
addFragment(uniFrag);
|
118 |
+
return uniFrag;
|
119 |
+
}
|
120 |
+
|
121 |
+
/** Incorporates a fragment, usually a suffix, into a parent fragment
|
122 |
+
* This does:
|
123 |
+
* Imports all of the atoms and bonds from another fragment into this one.
|
124 |
+
* Also imports outAtoms and functionalAtoms
|
125 |
+
* Reassigns inter-fragment bonds of the child fragment as either intra-fragment bonds
|
126 |
+
* of the parent fragment or as inter-fragment bonds of the parent fragment
|
127 |
+
*
|
128 |
+
* The original fragment still maintains its original atomList/bondList
|
129 |
+
*
|
130 |
+
* @param childFrag The fragment to be incorporated
|
131 |
+
* @param parentFrag The parent fragment
|
132 |
+
* @throws StructureBuildingException
|
133 |
+
*/
|
134 |
+
void incorporateFragment(Fragment childFrag, Fragment parentFrag) throws StructureBuildingException {
|
135 |
+
for(Atom atom : childFrag.getAtomList()) {
|
136 |
+
parentFrag.addAtom(atom);
|
137 |
+
}
|
138 |
+
for(Bond bond : childFrag.getBondSet()) {
|
139 |
+
parentFrag.addBond(bond);
|
140 |
+
}
|
141 |
+
parentFrag.incorporateOutAtoms(childFrag);
|
142 |
+
parentFrag.incorporateFunctionalAtoms(childFrag);
|
143 |
+
|
144 |
+
Set<Bond> interFragmentBonds = fragToInterFragmentBond.get(childFrag);
|
145 |
+
if (interFragmentBonds == null){
|
146 |
+
throw new StructureBuildingException("Fragment not registered with this FragmentManager!");
|
147 |
+
}
|
148 |
+
for (Bond bond : interFragmentBonds) {//reassign inter-fragment bonds of child
|
149 |
+
if (bond.getFromAtom().getFrag() == parentFrag && bond.getToAtom().getFrag() == parentFrag){
|
150 |
+
//bond is now enclosed within parentFrag so make it an intra-fragment bond
|
151 |
+
//and remove it from the inter-fragment set of the parentFrag
|
152 |
+
parentFrag.addBond(bond);
|
153 |
+
fragToInterFragmentBond.get(parentFrag).remove(bond);
|
154 |
+
}
|
155 |
+
else{
|
156 |
+
//bond was an inter-fragment bond between the childFrag and another frag
|
157 |
+
//It is now between the parentFrag and another frag
|
158 |
+
addInterFragmentBond(bond);
|
159 |
+
}
|
160 |
+
}
|
161 |
+
fragToInterFragmentBond.remove(childFrag);
|
162 |
+
}
|
163 |
+
|
164 |
+
/** Incorporates a fragment, usually a suffix, into a parent fragment, creating a bond between them.
|
165 |
+
*
|
166 |
+
* @param childFrag The fragment to be incorporated
|
167 |
+
* @param fromAtom An atom on that fragment
|
168 |
+
* @param parentFrag The parent fragment
|
169 |
+
* @param toAtom An atom on that fragment
|
170 |
+
* @param bondOrder The order of the joining bond
|
171 |
+
* @throws StructureBuildingException
|
172 |
+
*/
|
173 |
+
void incorporateFragment(Fragment childFrag, Atom fromAtom, Fragment parentFrag, Atom toAtom, int bondOrder) throws StructureBuildingException {
|
174 |
+
if (!fromAtom.getFrag().equals(childFrag)){
|
175 |
+
throw new StructureBuildingException("OPSIN Bug: fromAtom was not associated with childFrag!");
|
176 |
+
}
|
177 |
+
if (!toAtom.getFrag().equals(parentFrag)){
|
178 |
+
throw new StructureBuildingException("OPSIN Bug: toAtom was not associated with parentFrag!");
|
179 |
+
}
|
180 |
+
incorporateFragment(childFrag, parentFrag);
|
181 |
+
createBond(fromAtom, toAtom, bondOrder);
|
182 |
+
}
|
183 |
+
|
184 |
+
/** Converts an atom in a fragment to a different atomic symbol described by a SMILES string
|
185 |
+
* Charged atoms can also be specified eg. [NH4+]
|
186 |
+
*
|
187 |
+
* @param a The atom to change to a heteroatom
|
188 |
+
* @param smiles The SMILES for one atom
|
189 |
+
* @throws StructureBuildingException
|
190 |
+
*/
|
191 |
+
void replaceAtomWithSmiles(Atom a, String smiles) throws StructureBuildingException {
|
192 |
+
replaceAtomWithAtom(a, getHeteroatom(smiles), false);
|
193 |
+
}
|
194 |
+
|
195 |
+
/**
|
196 |
+
* Converts the smiles for a heteroatom to an atom
|
197 |
+
* @param smiles
|
198 |
+
* @return
|
199 |
+
* @throws StructureBuildingException
|
200 |
+
*/
|
201 |
+
Atom getHeteroatom(String smiles) throws StructureBuildingException {
|
202 |
+
Fragment heteroAtomFrag = sBuilder.build(smiles);
|
203 |
+
if (heteroAtomFrag.getAtomCount() != 1){
|
204 |
+
throw new StructureBuildingException("Heteroatom smiles described a fragment with multiple SMILES!");
|
205 |
+
}
|
206 |
+
return heteroAtomFrag.getFirstAtom();
|
207 |
+
}
|
208 |
+
|
209 |
+
/** Uses the information given in the given heteroatom to change the atomic symbol
|
210 |
+
* and charge of the given atom
|
211 |
+
*
|
212 |
+
* @param a The atom to change to a heteroatom
|
213 |
+
* @param heteroAtom The atom to copy element/charge properties from
|
214 |
+
* @param assignLocant Whether a locant should be assigned to the heteroatom if the locant is not used elsewhere
|
215 |
+
* @throws StructureBuildingException if a charge disagreement occurs
|
216 |
+
*/
|
217 |
+
void replaceAtomWithAtom(Atom a, Atom heteroAtom, boolean assignLocant) throws StructureBuildingException {
|
218 |
+
ChemEl chemEl =heteroAtom.getElement();
|
219 |
+
int replacementCharge =heteroAtom.getCharge();
|
220 |
+
if (replacementCharge!=0){
|
221 |
+
if (a.getCharge()==0){
|
222 |
+
a.addChargeAndProtons(replacementCharge, heteroAtom.getProtonsExplicitlyAddedOrRemoved());
|
223 |
+
}
|
224 |
+
else if (a.getCharge()==replacementCharge){
|
225 |
+
a.setProtonsExplicitlyAddedOrRemoved(heteroAtom.getProtonsExplicitlyAddedOrRemoved());
|
226 |
+
}
|
227 |
+
else{
|
228 |
+
throw new StructureBuildingException("Charge conflict between replacement term and atom to be replaced");
|
229 |
+
}
|
230 |
+
}
|
231 |
+
a.setElement(chemEl);
|
232 |
+
a.removeElementSymbolLocants();
|
233 |
+
if (assignLocant){
|
234 |
+
String primes = "";
|
235 |
+
while (a.getFrag().getAtomByLocant(chemEl.toString() + primes) != null){//if element symbol already assigned, add a prime and try again
|
236 |
+
primes += "'";
|
237 |
+
}
|
238 |
+
a.addLocant(chemEl.toString() + primes);
|
239 |
+
}
|
240 |
+
}
|
241 |
+
|
242 |
+
/** Gets an atom, given an id number
|
243 |
+
* Use this if you don't know what fragment the atom is in
|
244 |
+
* @param id The id of the atom
|
245 |
+
* @return The atom, or null if no such atom exists.
|
246 |
+
*/
|
247 |
+
Atom getAtomByID(int id) {
|
248 |
+
for(Fragment f : fragments) {
|
249 |
+
Atom a = f.getAtomByID(id);
|
250 |
+
if(a != null) {
|
251 |
+
return a;
|
252 |
+
}
|
253 |
+
}
|
254 |
+
return null;
|
255 |
+
}
|
256 |
+
|
257 |
+
/** Gets an atom, given an id number, throwing if fails.
|
258 |
+
* Use this if you don't know what fragment the atom is in
|
259 |
+
* @param id The id of the atom
|
260 |
+
* @return The atom
|
261 |
+
* @throws StructureBuildingException
|
262 |
+
*/
|
263 |
+
Atom getAtomByIDOrThrow(int id) throws StructureBuildingException {
|
264 |
+
Atom a = getAtomByID(id);
|
265 |
+
if(a == null) {
|
266 |
+
throw new StructureBuildingException("Couldn't get atom by id");
|
267 |
+
}
|
268 |
+
return a;
|
269 |
+
}
|
270 |
+
|
271 |
+
/**Turns all of the spare valencies in the fragments into double bonds.
|
272 |
+
*
|
273 |
+
* @throws StructureBuildingException
|
274 |
+
*/
|
275 |
+
void convertSpareValenciesToDoubleBonds() throws StructureBuildingException {
|
276 |
+
for(Fragment f : fragments) {
|
277 |
+
FragmentTools.convertSpareValenciesToDoubleBonds(f);
|
278 |
+
}
|
279 |
+
}
|
280 |
+
|
281 |
+
/**
|
282 |
+
* Checks valencies are all chemically reasonable. An exception is thrown if any are not
|
283 |
+
* @throws StructureBuildingException
|
284 |
+
*/
|
285 |
+
void checkValencies() throws StructureBuildingException {
|
286 |
+
for(Fragment f : fragments) {
|
287 |
+
f.checkValencies();
|
288 |
+
}
|
289 |
+
}
|
290 |
+
|
291 |
+
Set<Fragment> getFragments() {
|
292 |
+
return Collections.unmodifiableSet(fragments);
|
293 |
+
}
|
294 |
+
|
295 |
+
/**
|
296 |
+
* Registers a fragment
|
297 |
+
* @param frag
|
298 |
+
*/
|
299 |
+
private void addFragment(Fragment frag) {
|
300 |
+
fragToInterFragmentBond.put(frag, new LinkedHashSet<>());
|
301 |
+
}
|
302 |
+
|
303 |
+
/**
|
304 |
+
* Removes a fragment
|
305 |
+
* Any inter-fragment bonds of this fragment are removed from the fragments it was connected to
|
306 |
+
* Throws an exception if fragment wasn't present
|
307 |
+
* @param frag
|
308 |
+
* @throws StructureBuildingException
|
309 |
+
*/
|
310 |
+
void removeFragment(Fragment frag) throws StructureBuildingException {
|
311 |
+
Set<Bond> interFragmentBondsInvolvingFragmentSet = fragToInterFragmentBond.get(frag);
|
312 |
+
if (interFragmentBondsInvolvingFragmentSet == null) {
|
313 |
+
throw new StructureBuildingException("Fragment not registered with this FragmentManager!");
|
314 |
+
}
|
315 |
+
List<Bond> interFragmentBondsInvolvingFragment = new ArrayList<>(interFragmentBondsInvolvingFragmentSet);
|
316 |
+
for (Bond bond : interFragmentBondsInvolvingFragment) {
|
317 |
+
if (bond.getFromAtom().getFrag() == frag){
|
318 |
+
fragToInterFragmentBond.get(bond.getToAtom().getFrag()).remove(bond);
|
319 |
+
}
|
320 |
+
else{
|
321 |
+
fragToInterFragmentBond.get(bond.getFromAtom().getFrag()).remove(bond);
|
322 |
+
}
|
323 |
+
}
|
324 |
+
fragToInterFragmentBond.remove(frag);
|
325 |
+
}
|
326 |
+
|
327 |
+
int getOverallCharge() {
|
328 |
+
int totalCharge = 0;
|
329 |
+
for (Fragment frag : fragments) {
|
330 |
+
totalCharge += frag.getCharge();
|
331 |
+
}
|
332 |
+
return totalCharge;
|
333 |
+
}
|
334 |
+
|
335 |
+
/**
|
336 |
+
* Creates a copy of a fragment by copying data
|
337 |
+
* labels the atoms using new ids from the idManager
|
338 |
+
* @param originalFragment
|
339 |
+
* @return the clone of the fragment
|
340 |
+
* @throws StructureBuildingException
|
341 |
+
*/
|
342 |
+
Fragment copyFragment(Fragment originalFragment) throws StructureBuildingException {
|
343 |
+
return copyAndRelabelFragment(originalFragment, 0);
|
344 |
+
}
|
345 |
+
|
346 |
+
/**
|
347 |
+
* Creates a copy of a fragment by copying data
|
348 |
+
* labels the atoms using new ids from the idManager
|
349 |
+
* @param originalFragment
|
350 |
+
* @param primesToAdd: The minimum number of primes to add to the cloned atoms. More primes will be added if necessary to keep the locants unique e.g. N in the presence of N' becomes N'' when this is 1
|
351 |
+
* @return the clone of the fragment
|
352 |
+
*/
|
353 |
+
Fragment copyAndRelabelFragment(Fragment originalFragment, int primesToAdd) {
|
354 |
+
Element tokenEl = new TokenEl("");
|
355 |
+
tokenEl.addAttribute(TYPE_ATR, originalFragment.getType());
|
356 |
+
tokenEl.addAttribute(SUBTYPE_ATR, originalFragment.getSubType());
|
357 |
+
Fragment newFragment = new Fragment(tokenEl);
|
358 |
+
HashMap<Atom, Atom> oldToNewAtomMap = new HashMap<>();//maps old Atom to new Atom
|
359 |
+
List<Atom> atomList =originalFragment.getAtomList();
|
360 |
+
for (Atom atom : atomList) {
|
361 |
+
int id = idManager.getNextID();
|
362 |
+
ArrayList<String> newLocants = new ArrayList<>(atom.getLocants());
|
363 |
+
if (primesToAdd !=0){
|
364 |
+
for (int i = 0; i < newLocants.size(); i++) {
|
365 |
+
String currentLocant = newLocants.get(i);
|
366 |
+
int currentPrimes = StringTools.countTerminalPrimes(currentLocant);
|
367 |
+
String locantSansPrimes = currentLocant.substring(0, currentLocant.length()-currentPrimes);
|
368 |
+
int highestNumberOfPrimesWithThisLocant = currentPrimes;
|
369 |
+
while (originalFragment.getAtomByLocant(locantSansPrimes + StringTools.multiplyString("'", highestNumberOfPrimesWithThisLocant +1 ))!=null){
|
370 |
+
highestNumberOfPrimesWithThisLocant++;
|
371 |
+
}
|
372 |
+
newLocants.set(i, locantSansPrimes + StringTools.multiplyString("'", ((highestNumberOfPrimesWithThisLocant +1)*primesToAdd) + currentPrimes));
|
373 |
+
}
|
374 |
+
}
|
375 |
+
Atom newAtom =new Atom(id, atom.getElement(), newFragment);
|
376 |
+
for (String newLocant : newLocants) {
|
377 |
+
newAtom.addLocant(newLocant);
|
378 |
+
}
|
379 |
+
newAtom.setCharge(atom.getCharge());
|
380 |
+
newAtom.setIsotope(atom.getIsotope());
|
381 |
+
newAtom.setSpareValency(atom.hasSpareValency());
|
382 |
+
newAtom.setProtonsExplicitlyAddedOrRemoved(atom.getProtonsExplicitlyAddedOrRemoved());
|
383 |
+
newAtom.setLambdaConventionValency(atom.getLambdaConventionValency());
|
384 |
+
//outValency is derived from the outAtoms so is automatically cloned
|
385 |
+
newAtom.setAtomIsInACycle(atom.getAtomIsInACycle());
|
386 |
+
newAtom.setType(atom.getType());//may be different from fragment type if the original atom was formerly in a suffix
|
387 |
+
newAtom.setMinimumValency(atom.getMinimumValency());
|
388 |
+
newAtom.setImplicitHydrogenAllowed(atom.getImplicitHydrogenAllowed());
|
389 |
+
newFragment.addAtom(newAtom);
|
390 |
+
oldToNewAtomMap.put(atom, newAtom);
|
391 |
+
}
|
392 |
+
for (Atom atom : atomList) {
|
393 |
+
if (atom.getAtomParity() != null){
|
394 |
+
Atom[] oldAtomRefs4 = atom.getAtomParity().getAtomRefs4();
|
395 |
+
Atom[] newAtomRefs4 = new Atom[4];
|
396 |
+
for (int i = 0; i < oldAtomRefs4.length; i++) {
|
397 |
+
Atom oldAtom = oldAtomRefs4[i];
|
398 |
+
if (oldAtom.equals(AtomParity.hydrogen)){
|
399 |
+
newAtomRefs4[i] = AtomParity.hydrogen;
|
400 |
+
}
|
401 |
+
else if (oldAtom.equals(AtomParity.deoxyHydrogen)){
|
402 |
+
newAtomRefs4[i] = AtomParity.deoxyHydrogen;
|
403 |
+
}
|
404 |
+
else{
|
405 |
+
newAtomRefs4[i] = oldToNewAtomMap.get(oldAtom);
|
406 |
+
}
|
407 |
+
}
|
408 |
+
AtomParity newAtomParity =new AtomParity(newAtomRefs4, atom.getAtomParity().getParity());
|
409 |
+
newAtomParity.setStereoGroup(atom.getAtomParity().getStereoGroup());
|
410 |
+
oldToNewAtomMap.get(atom).setAtomParity(newAtomParity);
|
411 |
+
}
|
412 |
+
Set<Atom> oldAmbiguousElementAssignmentAtoms = atom.getProperty(Atom.AMBIGUOUS_ELEMENT_ASSIGNMENT);
|
413 |
+
if (oldAmbiguousElementAssignmentAtoms!=null){
|
414 |
+
Set<Atom> newAtoms = new LinkedHashSet<>();
|
415 |
+
for (Atom oldAtom : oldAmbiguousElementAssignmentAtoms) {
|
416 |
+
newAtoms.add(oldToNewAtomMap.get(oldAtom));
|
417 |
+
}
|
418 |
+
oldToNewAtomMap.get(atom).setProperty(Atom.AMBIGUOUS_ELEMENT_ASSIGNMENT, newAtoms);
|
419 |
+
}
|
420 |
+
Integer smilesHydrogenCount = atom.getProperty(Atom.SMILES_HYDROGEN_COUNT);
|
421 |
+
if (smilesHydrogenCount!=null){
|
422 |
+
oldToNewAtomMap.get(atom).setProperty(Atom.SMILES_HYDROGEN_COUNT, smilesHydrogenCount);
|
423 |
+
}
|
424 |
+
Integer oxidationNumber = atom.getProperty(Atom.OXIDATION_NUMBER);
|
425 |
+
if (oxidationNumber!=null){
|
426 |
+
oldToNewAtomMap.get(atom).setProperty(Atom.OXIDATION_NUMBER, oxidationNumber);
|
427 |
+
}
|
428 |
+
Boolean isAldehyde = atom.getProperty(Atom.ISALDEHYDE);
|
429 |
+
if (isAldehyde!=null){
|
430 |
+
oldToNewAtomMap.get(atom).setProperty(Atom.ISALDEHYDE, isAldehyde);
|
431 |
+
}
|
432 |
+
Boolean isAnomeric = atom.getProperty(Atom.ISANOMERIC);
|
433 |
+
if (isAnomeric!=null){
|
434 |
+
oldToNewAtomMap.get(atom).setProperty(Atom.ISANOMERIC, isAnomeric);
|
435 |
+
}
|
436 |
+
Integer atomClass = atom.getProperty(Atom.ATOM_CLASS);
|
437 |
+
if (atomClass!=null){
|
438 |
+
oldToNewAtomMap.get(atom).setProperty(Atom.ATOM_CLASS, atomClass);
|
439 |
+
}
|
440 |
+
String homologyGroup = atom.getProperty(Atom.HOMOLOGY_GROUP);
|
441 |
+
if (homologyGroup != null) {
|
442 |
+
oldToNewAtomMap.get(atom).setProperty(Atom.HOMOLOGY_GROUP, homologyGroup);
|
443 |
+
}
|
444 |
+
List<Atom> oldPositionVariationAtoms = atom.getProperty(Atom.POSITION_VARIATION_BOND);
|
445 |
+
if (oldPositionVariationAtoms != null) {
|
446 |
+
List<Atom> newAtoms = new ArrayList<>();
|
447 |
+
for (Atom oldAtom : oldPositionVariationAtoms) {
|
448 |
+
newAtoms.add(oldToNewAtomMap.get(oldAtom));
|
449 |
+
}
|
450 |
+
oldToNewAtomMap.get(atom).setProperty(Atom.POSITION_VARIATION_BOND, newAtoms);
|
451 |
+
}
|
452 |
+
}
|
453 |
+
for (int i = 0, l = originalFragment.getOutAtomCount(); i < l; i++) {
|
454 |
+
OutAtom outAtom = originalFragment.getOutAtom(i);
|
455 |
+
newFragment.addOutAtom(oldToNewAtomMap.get(outAtom.getAtom()), outAtom.getValency(), outAtom.isSetExplicitly());
|
456 |
+
if (outAtom.getLocant() !=null){
|
457 |
+
newFragment.getOutAtom(newFragment.getOutAtomCount() -1).setLocant(outAtom.getLocant() + StringTools.multiplyString("'", primesToAdd) );
|
458 |
+
}
|
459 |
+
}
|
460 |
+
for (int i = 0, l = originalFragment.getFunctionalAtomCount(); i < l; i++) {
|
461 |
+
FunctionalAtom functionalAtom = originalFragment.getFunctionalAtom(i);
|
462 |
+
newFragment.addFunctionalAtom(oldToNewAtomMap.get(functionalAtom.getAtom()));
|
463 |
+
}
|
464 |
+
if (originalFragment.getDefaultInAtom() != null) {
|
465 |
+
newFragment.setDefaultInAtom(oldToNewAtomMap.get(originalFragment.getDefaultInAtom()));
|
466 |
+
}
|
467 |
+
Set<Bond> bondSet =originalFragment.getBondSet();
|
468 |
+
for (Bond bond : bondSet) {
|
469 |
+
Bond newBond = createBond(oldToNewAtomMap.get(bond.getFromAtom()), oldToNewAtomMap.get(bond.getToAtom()), bond.getOrder());
|
470 |
+
newBond.setSmilesStereochemistry(bond.getSmilesStereochemistry());
|
471 |
+
if (bond.getBondStereo() != null){
|
472 |
+
Atom[] oldAtomRefs4 = bond.getBondStereo().getAtomRefs4();
|
473 |
+
Atom[] newAtomRefs4 = new Atom[4];
|
474 |
+
for (int i = 0; i < oldAtomRefs4.length; i++) {
|
475 |
+
newAtomRefs4[i] = oldToNewAtomMap.get(oldAtomRefs4[i]);
|
476 |
+
}
|
477 |
+
newBond.setBondStereoElement(newAtomRefs4, bond.getBondStereo().getBondStereoValue());
|
478 |
+
}
|
479 |
+
}
|
480 |
+
List<Atom> indicatedHydrogenAtoms = originalFragment.getIndicatedHydrogen();
|
481 |
+
for (Atom atom : indicatedHydrogenAtoms) {
|
482 |
+
newFragment.addIndicatedHydrogen(oldToNewAtomMap.get(atom));
|
483 |
+
}
|
484 |
+
addFragment(newFragment);
|
485 |
+
return newFragment;
|
486 |
+
}
|
487 |
+
|
488 |
+
/**
|
489 |
+
* Takes an element and produces a copy of it. Groups and suffixes are copied so that the new element
|
490 |
+
* has its own group and suffix fragments
|
491 |
+
* @param elementToBeCloned
|
492 |
+
* @param state The current buildstate
|
493 |
+
* @return
|
494 |
+
* @throws StructureBuildingException
|
495 |
+
*/
|
496 |
+
Element cloneElement(BuildState state, Element elementToBeCloned) throws StructureBuildingException {
|
497 |
+
return cloneElement(state, elementToBeCloned, 0);
|
498 |
+
}
|
499 |
+
|
500 |
+
/**
|
501 |
+
* Takes an element and produces a copy of it. Groups and suffixes are copied so that the new element
|
502 |
+
* has its own group and suffix fragments
|
503 |
+
* @param elementToBeCloned
|
504 |
+
* @param state The current buildstate
|
505 |
+
* @param primesToAdd: The minimum number of primes to add to the cloned atoms. More primes will be added if necessary to keep the locants unique e.g. N in the presence of N' becomes N'' when this is 1
|
506 |
+
* @return
|
507 |
+
* @throws StructureBuildingException
|
508 |
+
*/
|
509 |
+
Element cloneElement(BuildState state, Element elementToBeCloned, int primesToAdd) throws StructureBuildingException {
|
510 |
+
Element clone = elementToBeCloned.copy();
|
511 |
+
List<Element> originalGroups = OpsinTools.getDescendantElementsWithTagName(elementToBeCloned, XmlDeclarations.GROUP_EL);
|
512 |
+
List<Element> clonedGroups = OpsinTools.getDescendantElementsWithTagName(clone, XmlDeclarations.GROUP_EL);
|
513 |
+
HashMap<Fragment,Fragment> oldNewFragmentMapping =new LinkedHashMap<>();
|
514 |
+
for (int i = 0; i < originalGroups.size(); i++) {
|
515 |
+
Fragment originalFragment = originalGroups.get(i).getFrag();
|
516 |
+
Fragment newFragment = copyAndRelabelFragment(originalFragment, primesToAdd);
|
517 |
+
oldNewFragmentMapping.put(originalFragment, newFragment);
|
518 |
+
newFragment.setTokenEl(clonedGroups.get(i));
|
519 |
+
clonedGroups.get(i).setFrag(newFragment);
|
520 |
+
List<Fragment> originalSuffixes =state.xmlSuffixMap.get(originalGroups.get(i));
|
521 |
+
List<Fragment> newSuffixFragments =new ArrayList<>();
|
522 |
+
for (Fragment suffix : originalSuffixes) {
|
523 |
+
newSuffixFragments.add(copyFragment(suffix));
|
524 |
+
}
|
525 |
+
state.xmlSuffixMap.put(clonedGroups.get(i), newSuffixFragments);
|
526 |
+
}
|
527 |
+
Set<Bond> interFragmentBondsToClone = new LinkedHashSet<>();
|
528 |
+
for (Fragment originalFragment : oldNewFragmentMapping.keySet()) {//add inter fragment bonds to cloned fragments
|
529 |
+
for (Bond bond : fragToInterFragmentBond.get(originalFragment)) {
|
530 |
+
interFragmentBondsToClone.add(bond);
|
531 |
+
}
|
532 |
+
}
|
533 |
+
for (Bond bond : interFragmentBondsToClone) {
|
534 |
+
Atom originalFromAtom = bond.getFromAtom();
|
535 |
+
Atom originalToAtom = bond.getToAtom();
|
536 |
+
Fragment originalFragment1 = originalFromAtom.getFrag();
|
537 |
+
Fragment originalFragment2 = originalToAtom.getFrag();
|
538 |
+
if (!oldNewFragmentMapping.containsKey(originalFragment1) || (!oldNewFragmentMapping.containsKey(originalFragment2))){
|
539 |
+
throw new StructureBuildingException("An element that was a clone contained a bond that went outside the scope of the cloning");
|
540 |
+
}
|
541 |
+
Fragment newFragment1 = oldNewFragmentMapping.get(originalFragment1);
|
542 |
+
Fragment newFragment2 = oldNewFragmentMapping.get(originalFragment2);
|
543 |
+
Atom fromAtom = newFragment1.getAtomList().get(originalFragment1.getAtomList().indexOf(originalFromAtom));
|
544 |
+
Atom toAtom = newFragment2.getAtomList().get(originalFragment2.getAtomList().indexOf(originalToAtom));
|
545 |
+
createBond(fromAtom, toAtom, bond.getOrder());
|
546 |
+
}
|
547 |
+
return clone;
|
548 |
+
}
|
549 |
+
|
550 |
+
/**
|
551 |
+
* Takes an atom, removes it and bonds everything that was bonded to it to the replacementAtom with the original bond orders.
|
552 |
+
* Non element symbol locants are copied to the replacement atom
|
553 |
+
* @param atomToBeReplaced
|
554 |
+
* @param replacementAtom
|
555 |
+
*/
|
556 |
+
void replaceAtomWithAnotherAtomPreservingConnectivity(Atom atomToBeReplaced, Atom replacementAtom) {
|
557 |
+
atomToBeReplaced.removeElementSymbolLocants();
|
558 |
+
List<String> locants = new ArrayList<>(atomToBeReplaced.getLocants());
|
559 |
+
for (String locant : locants) {
|
560 |
+
atomToBeReplaced.removeLocant(locant);
|
561 |
+
replacementAtom.addLocant(locant);
|
562 |
+
}
|
563 |
+
List<Bond> bonds = atomToBeReplaced.getBonds();
|
564 |
+
for (Bond bond : bonds) {
|
565 |
+
Atom connectedAtom = bond.getOtherAtom(atomToBeReplaced);
|
566 |
+
if (connectedAtom.getAtomParity() != null){
|
567 |
+
Atom[] atomRefs4 = connectedAtom.getAtomParity().getAtomRefs4();
|
568 |
+
for (int i = 0 ; i < 4; i++) {
|
569 |
+
if (atomRefs4[i] == atomToBeReplaced){
|
570 |
+
atomRefs4[i] = replacementAtom;
|
571 |
+
break;
|
572 |
+
}
|
573 |
+
}
|
574 |
+
}
|
575 |
+
if (bond.getBondStereo() != null){
|
576 |
+
Atom[] atomRefs4 = bond.getBondStereo().getAtomRefs4();
|
577 |
+
for (int i = 0 ; i < 4; i++) {
|
578 |
+
if (atomRefs4[i] == atomToBeReplaced){
|
579 |
+
atomRefs4[i] = replacementAtom;
|
580 |
+
break;
|
581 |
+
}
|
582 |
+
}
|
583 |
+
}
|
584 |
+
createBond(replacementAtom, bond.getOtherAtom(atomToBeReplaced), bond.getOrder());
|
585 |
+
}
|
586 |
+
removeAtomAndAssociatedBonds(atomToBeReplaced);
|
587 |
+
}
|
588 |
+
|
589 |
+
/**
|
590 |
+
* Removes a bond from the inter-fragment bond mappings if it was present
|
591 |
+
* @param bond
|
592 |
+
*/
|
593 |
+
private void removeInterFragmentBondIfPresent(Bond bond) {
|
594 |
+
fragToInterFragmentBond.get(bond.getFromAtom().getFrag()).remove(bond);
|
595 |
+
fragToInterFragmentBond.get(bond.getToAtom().getFrag()).remove(bond);
|
596 |
+
}
|
597 |
+
|
598 |
+
/**
|
599 |
+
* Adds a bond to the fragment to inter-fragment bond mappings
|
600 |
+
* @param bond
|
601 |
+
*/
|
602 |
+
private void addInterFragmentBond(Bond bond) {
|
603 |
+
fragToInterFragmentBond.get(bond.getFromAtom().getFrag()).add(bond);
|
604 |
+
fragToInterFragmentBond.get(bond.getToAtom().getFrag()).add(bond);
|
605 |
+
}
|
606 |
+
|
607 |
+
/**
|
608 |
+
* Gets an unmodifiable view of the set of the inter-fragment bonds a fragment is involved in
|
609 |
+
* @param frag
|
610 |
+
* @return set of inter fragment bonds
|
611 |
+
*/
|
612 |
+
Set<Bond> getInterFragmentBonds(Fragment frag) {
|
613 |
+
Set<Bond> interFragmentBonds = fragToInterFragmentBond.get(frag);
|
614 |
+
if (interFragmentBonds == null) {
|
615 |
+
throw new IllegalArgumentException("Fragment not registered with this FragmentManager!");
|
616 |
+
}
|
617 |
+
return Collections.unmodifiableSet(interFragmentBonds);
|
618 |
+
}
|
619 |
+
|
620 |
+
/**
|
621 |
+
* Create a new Atom of the given element belonging to the given fragment
|
622 |
+
* @param chemEl
|
623 |
+
* @param frag
|
624 |
+
* @return Atom
|
625 |
+
*/
|
626 |
+
Atom createAtom(ChemEl chemEl, Fragment frag) {
|
627 |
+
Atom a = new Atom(idManager.getNextID(), chemEl, frag);
|
628 |
+
frag.addAtom(a);
|
629 |
+
return a;
|
630 |
+
}
|
631 |
+
|
632 |
+
/**
|
633 |
+
* Create a new bond between two atoms.
|
634 |
+
* The bond is associated with these atoms.
|
635 |
+
* It is also listed as an inter-fragment bond or associated with a fragment
|
636 |
+
* @param fromAtom
|
637 |
+
* @param toAtom
|
638 |
+
* @param bondOrder
|
639 |
+
* @return Bond
|
640 |
+
*/
|
641 |
+
Bond createBond(Atom fromAtom, Atom toAtom, int bondOrder) {
|
642 |
+
Bond b = new Bond(fromAtom, toAtom, bondOrder);
|
643 |
+
fromAtom.addBond(b);
|
644 |
+
toAtom.addBond(b);
|
645 |
+
if (fromAtom.getFrag() == toAtom.getFrag()){
|
646 |
+
fromAtom.getFrag().addBond(b);
|
647 |
+
}
|
648 |
+
else{
|
649 |
+
addInterFragmentBond(b);
|
650 |
+
}
|
651 |
+
return b;
|
652 |
+
}
|
653 |
+
|
654 |
+
void removeAtomAndAssociatedBonds(Atom atom){
|
655 |
+
List<Bond> bondsToBeRemoved = new ArrayList<>(atom.getBonds());
|
656 |
+
for (Bond bond : bondsToBeRemoved) {
|
657 |
+
removeBond(bond);
|
658 |
+
}
|
659 |
+
atom.getFrag().removeAtom(atom);
|
660 |
+
Set<Atom> ambiguousElementAssignment = atom.getProperty(Atom.AMBIGUOUS_ELEMENT_ASSIGNMENT);
|
661 |
+
if (ambiguousElementAssignment != null){
|
662 |
+
ambiguousElementAssignment.remove(atom);
|
663 |
+
if (ambiguousElementAssignment.size() == 1){
|
664 |
+
ambiguousElementAssignment.iterator().next().setProperty(Atom.AMBIGUOUS_ELEMENT_ASSIGNMENT, null);
|
665 |
+
}
|
666 |
+
}
|
667 |
+
}
|
668 |
+
|
669 |
+
void removeBond(Bond bond){
|
670 |
+
bond.getFromAtom().getFrag().removeBond(bond);
|
671 |
+
bond.getFromAtom().removeBond(bond);
|
672 |
+
bond.getToAtom().removeBond(bond);
|
673 |
+
removeInterFragmentBondIfPresent(bond);
|
674 |
+
}
|
675 |
+
|
676 |
+
/**
|
677 |
+
* Valency is used to determine the expected number of hydrogen
|
678 |
+
* Hydrogens are then added to bring the number of connections up to the minimum required to satisfy the atom's valency
|
679 |
+
* This allows the valency of the atom to be encoded e.g. phopshane-3 hydrogen, phosphorane-5 hydrogen.
|
680 |
+
* It is also necessary when considering stereochemistry as a hydrogen beats nothing in the CIP rules
|
681 |
+
* @throws StructureBuildingException
|
682 |
+
*/
|
683 |
+
void makeHydrogensExplicit() throws StructureBuildingException {
|
684 |
+
for (Fragment fragment : fragments) {
|
685 |
+
List<Atom> atomList = fragment.getAtomList();
|
686 |
+
for (Atom parentAtom : atomList) {
|
687 |
+
int explicitHydrogensToAdd = StructureBuildingMethods.calculateSubstitutableHydrogenAtoms(parentAtom);
|
688 |
+
for (int i = 0; i < explicitHydrogensToAdd; i++) {
|
689 |
+
Atom hydrogen = createAtom(ChemEl.H, fragment);
|
690 |
+
createBond(parentAtom, hydrogen, 1);
|
691 |
+
}
|
692 |
+
if (parentAtom.getAtomParity() != null){
|
693 |
+
if (explicitHydrogensToAdd > 1) {
|
694 |
+
//Cannot have tetrahedral chirality and more than 2 hydrogens
|
695 |
+
parentAtom.setAtomParity(null);//probably caused by deoxy
|
696 |
+
}
|
697 |
+
else {
|
698 |
+
modifyAtomParityToTakeIntoAccountExplicitHydrogen(parentAtom);
|
699 |
+
}
|
700 |
+
}
|
701 |
+
}
|
702 |
+
}
|
703 |
+
}
|
704 |
+
|
705 |
+
private void modifyAtomParityToTakeIntoAccountExplicitHydrogen(Atom atom) throws StructureBuildingException {
|
706 |
+
AtomParity atomParity = atom.getAtomParity();
|
707 |
+
if (!StereoAnalyser.isPossiblyStereogenic(atom)){
|
708 |
+
//no longer a stereoCentre e.g. due to unsaturation
|
709 |
+
atom.setAtomParity(null);
|
710 |
+
}
|
711 |
+
else{
|
712 |
+
Atom[] atomRefs4 = atomParity.getAtomRefs4();
|
713 |
+
Integer positionOfImplicitHydrogen = null;
|
714 |
+
Integer positionOfDeoxyHydrogen = null;
|
715 |
+
for (int i = 0; i < atomRefs4.length; i++) {
|
716 |
+
Atom a = atomRefs4[i];
|
717 |
+
if (a.equals(AtomParity.hydrogen)){
|
718 |
+
positionOfImplicitHydrogen = i;
|
719 |
+
}
|
720 |
+
else if (a.equals(AtomParity.deoxyHydrogen)){
|
721 |
+
positionOfDeoxyHydrogen = i;
|
722 |
+
}
|
723 |
+
}
|
724 |
+
if (positionOfImplicitHydrogen != null || positionOfDeoxyHydrogen != null) {
|
725 |
+
//atom parity was set in SMILES, the dummy hydrogen atom has now been substituted
|
726 |
+
List<Atom> neighbours = atom.getAtomNeighbours();
|
727 |
+
for (Atom atomRef : atomRefs4) {
|
728 |
+
neighbours.remove(atomRef);
|
729 |
+
}
|
730 |
+
if (neighbours.size() == 0) {
|
731 |
+
throw new StructureBuildingException("OPSIN Bug: Unable to determine which atom has substituted a hydrogen at stereocentre");
|
732 |
+
}
|
733 |
+
else if (neighbours.size() == 1 && positionOfDeoxyHydrogen != null) {
|
734 |
+
atomRefs4[positionOfDeoxyHydrogen] = neighbours.get(0);
|
735 |
+
if (positionOfImplicitHydrogen != null){
|
736 |
+
throw new StructureBuildingException("OPSIN Bug: Unable to determine which atom has substituted a hydrogen at stereocentre");
|
737 |
+
}
|
738 |
+
}
|
739 |
+
else if (neighbours.size() == 1 && positionOfImplicitHydrogen != null) {
|
740 |
+
atomRefs4[positionOfImplicitHydrogen] = neighbours.get(0);
|
741 |
+
}
|
742 |
+
else if (neighbours.size() == 2 && positionOfDeoxyHydrogen != null && positionOfImplicitHydrogen != null) {
|
743 |
+
try{
|
744 |
+
List<Atom> cipOrderedAtoms = new CipSequenceRules(atom).getNeighbouringAtomsInCipOrder();
|
745 |
+
//higher priority group replaces the former hydroxy groups (deoxyHydrogen)
|
746 |
+
if (cipOrderedAtoms.indexOf(neighbours.get(0)) > cipOrderedAtoms.indexOf(neighbours.get(1))) {
|
747 |
+
atomRefs4[positionOfDeoxyHydrogen] = neighbours.get(0);
|
748 |
+
atomRefs4[positionOfImplicitHydrogen] = neighbours.get(1);
|
749 |
+
}
|
750 |
+
else{
|
751 |
+
atomRefs4[positionOfDeoxyHydrogen] = neighbours.get(1);
|
752 |
+
atomRefs4[positionOfImplicitHydrogen] = neighbours.get(0);
|
753 |
+
}
|
754 |
+
}
|
755 |
+
catch (CipOrderingException e){
|
756 |
+
//assume ligands equivalent so it makes no difference which is which
|
757 |
+
atomRefs4[positionOfDeoxyHydrogen] = neighbours.get(0);
|
758 |
+
atomRefs4[positionOfImplicitHydrogen] = neighbours.get(1);
|
759 |
+
}
|
760 |
+
}
|
761 |
+
else{
|
762 |
+
throw new StructureBuildingException("OPSIN Bug: Unable to determine which atom has substituted a hydrogen at stereocentre");
|
763 |
+
}
|
764 |
+
}
|
765 |
+
}
|
766 |
+
}
|
767 |
+
}
|
TransAntivirus/download_pubchem/opsin-master/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/FragmentTools.java
ADDED
@@ -0,0 +1,1242 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
package uk.ac.cam.ch.wwmm.opsin;
|
2 |
+
|
3 |
+
import java.util.ArrayDeque;
|
4 |
+
import java.util.ArrayList;
|
5 |
+
import java.util.Collections;
|
6 |
+
import java.util.Comparator;
|
7 |
+
import java.util.Deque;
|
8 |
+
import java.util.HashMap;
|
9 |
+
import java.util.HashSet;
|
10 |
+
import java.util.List;
|
11 |
+
import java.util.Map;
|
12 |
+
import java.util.Set;
|
13 |
+
import java.util.regex.Matcher;
|
14 |
+
import java.util.regex.Pattern;
|
15 |
+
|
16 |
+
import static uk.ac.cam.ch.wwmm.opsin.OpsinTools.*;
|
17 |
+
import static uk.ac.cam.ch.wwmm.opsin.XmlDeclarations.*;
|
18 |
+
|
19 |
+
/**
|
20 |
+
* Sorts a list of atoms such that their order agrees with the order symbolic locants are typically assigned
|
21 |
+
*
|
22 |
+
* Preferred atoms are sorted to the START of the list
|
23 |
+
* @author dl387
|
24 |
+
*
|
25 |
+
*/
|
26 |
+
class SortAtomsForElementSymbols implements Comparator<Atom> {
|
27 |
+
|
28 |
+
public int compare(Atom a, Atom b){
|
29 |
+
int bondOrderA = a.getProperty(Atom.VISITED);
|
30 |
+
int bondOrderB = b.getProperty(Atom.VISITED);
|
31 |
+
if (bondOrderA > bondOrderB) {//lower order bond is preferred
|
32 |
+
return 1;
|
33 |
+
}
|
34 |
+
if (bondOrderA < bondOrderB) {
|
35 |
+
return -1;
|
36 |
+
}
|
37 |
+
|
38 |
+
if (a.getOutValency() > b.getOutValency()) {//prefer atoms with outValency
|
39 |
+
return -1;
|
40 |
+
}
|
41 |
+
if (a.getOutValency() < b.getOutValency()) {
|
42 |
+
return 1;
|
43 |
+
}
|
44 |
+
|
45 |
+
int expectedHydrogenA = StructureBuildingMethods.calculateSubstitutableHydrogenAtoms(a);
|
46 |
+
int expectedHydrogenB = StructureBuildingMethods.calculateSubstitutableHydrogenAtoms(b);
|
47 |
+
|
48 |
+
if (expectedHydrogenA > expectedHydrogenB) {//prefer atoms with more hydrogen
|
49 |
+
return -1;
|
50 |
+
}
|
51 |
+
if (expectedHydrogenA < expectedHydrogenB) {
|
52 |
+
return 1;
|
53 |
+
}
|
54 |
+
return 0;
|
55 |
+
}
|
56 |
+
}
|
57 |
+
|
58 |
+
/**
|
59 |
+
* Performs a very crude sort of atoms such that those that are more likely to be substitued are preferred for low locants
|
60 |
+
* Preferred atoms are sorted to the START of the list
|
61 |
+
* @author dl387
|
62 |
+
*
|
63 |
+
*/
|
64 |
+
class SortAtomsForMainGroupElementSymbols implements Comparator<Atom> {
|
65 |
+
|
66 |
+
public int compare(Atom a, Atom b){
|
67 |
+
int compare = a.getElement().compareTo(b.getElement());
|
68 |
+
if (compare != 0) {//only bother comparing properly if elements are the same
|
69 |
+
return compare;
|
70 |
+
}
|
71 |
+
|
72 |
+
int aExpectedHydrogen = StructureBuildingMethods.calculateSubstitutableHydrogenAtoms(a);
|
73 |
+
int bExpectedHydrogen = StructureBuildingMethods.calculateSubstitutableHydrogenAtoms(b);
|
74 |
+
if (aExpectedHydrogen > 0 && bExpectedHydrogen == 0) {//having substitutable hydrogen preferred
|
75 |
+
return -1;
|
76 |
+
}
|
77 |
+
if (aExpectedHydrogen == 0 && bExpectedHydrogen > 0) {
|
78 |
+
return 1;
|
79 |
+
}
|
80 |
+
List<String> locantsA = a.getLocants();
|
81 |
+
List<String> locantsB = b.getLocants();
|
82 |
+
if (locantsA.size() == 0 && locantsB.size() > 0) {//having no locants preferred
|
83 |
+
return -1;
|
84 |
+
}
|
85 |
+
if (locantsA.size() > 0 && locantsB.size() == 0) {
|
86 |
+
return 1;
|
87 |
+
}
|
88 |
+
return 0;
|
89 |
+
}
|
90 |
+
}
|
91 |
+
|
92 |
+
class FragmentTools {
|
93 |
+
/**
|
94 |
+
* Sorts by number, then by letter e.g. 4,3,3b,5,3a,2 -->2,3,3a,3b,4,5
|
95 |
+
* @author dl387
|
96 |
+
*
|
97 |
+
*/
|
98 |
+
static class SortByLocants implements Comparator<Atom> {
|
99 |
+
static final Pattern locantSegmenter =Pattern.compile("(\\d+)([a-z]?)('*)");
|
100 |
+
|
101 |
+
public int compare(Atom atoma, Atom atomb){
|
102 |
+
if (atoma.getType().equals(SUFFIX_TYPE_VAL) && !atomb.getType().equals(SUFFIX_TYPE_VAL)){//suffix atoms go to the back
|
103 |
+
return 1;
|
104 |
+
}
|
105 |
+
if (atomb.getType().equals(SUFFIX_TYPE_VAL) && !atoma.getType().equals(SUFFIX_TYPE_VAL)){
|
106 |
+
return -1;
|
107 |
+
}
|
108 |
+
|
109 |
+
String locanta =atoma.getFirstLocant();
|
110 |
+
String locantb =atomb.getFirstLocant();
|
111 |
+
if (locanta==null|| locantb==null){
|
112 |
+
return 0;
|
113 |
+
}
|
114 |
+
|
115 |
+
Matcher m1 =locantSegmenter.matcher(locanta);
|
116 |
+
Matcher m2 =locantSegmenter.matcher(locantb);
|
117 |
+
if (!m1.matches()|| !m2.matches()){//inappropriate locant
|
118 |
+
return 0;
|
119 |
+
}
|
120 |
+
String locantaPrimes = m1.group(3);
|
121 |
+
String locantbPrimes = m2.group(3);
|
122 |
+
if (locantaPrimes.compareTo(locantbPrimes)>=1) {
|
123 |
+
return 1;//e.g. 1'' vs 1'
|
124 |
+
} else if (locantbPrimes.compareTo(locantaPrimes)>=1) {
|
125 |
+
return -1;//e.g. 1' vs 1''
|
126 |
+
}
|
127 |
+
else{
|
128 |
+
int locantaNumber = Integer.parseInt(m1.group(1));
|
129 |
+
int locantbNumber = Integer.parseInt(m2.group(1));
|
130 |
+
|
131 |
+
if (locantaNumber >locantbNumber) {
|
132 |
+
return 1;//e.g. 3 vs 2 or 3a vs 2
|
133 |
+
} else if (locantbNumber >locantaNumber) {
|
134 |
+
return -1;//e.g. 2 vs 3 or 2 vs 3a
|
135 |
+
}
|
136 |
+
else{
|
137 |
+
String locantaLetter = m1.group(2);
|
138 |
+
String locantbLetter = m2.group(2);
|
139 |
+
if (locantaLetter.compareTo(locantbLetter)>=1) {
|
140 |
+
return 1;//e.g. 1b vs 1a
|
141 |
+
} else if (locantbLetter.compareTo(locantaLetter)>=1) {
|
142 |
+
return -1;//e.g. 1a vs 1b
|
143 |
+
}
|
144 |
+
return 0;
|
145 |
+
}
|
146 |
+
}
|
147 |
+
}
|
148 |
+
}
|
149 |
+
|
150 |
+
/**
|
151 |
+
* Assign element locants to groups/suffixes. These are in addition to any numerical locants that are present.
|
152 |
+
* Adds primes to make each locant unique.
|
153 |
+
* For groups a locant is not given to carbon atoms
|
154 |
+
* If an element appears in a suffix then element locants are not assigned to occurrences of that element in the parent group
|
155 |
+
* HeteroAtoms in acidStems connected to the first Atom of the fragment are treated as if they were suffix atoms
|
156 |
+
* @param suffixableFragment
|
157 |
+
* @param suffixFragments
|
158 |
+
* @throws StructureBuildingException
|
159 |
+
*/
|
160 |
+
static void assignElementLocants(Fragment suffixableFragment, List<Fragment> suffixFragments) throws StructureBuildingException {
|
161 |
+
|
162 |
+
Map<String,Integer> elementCount = new HashMap<>();//keeps track of how many times each element has been seen
|
163 |
+
Set<Atom> atomsToIgnore = new HashSet<>();//atoms which already have a symbolic locant
|
164 |
+
|
165 |
+
List<Fragment> allFragments = new ArrayList<>(suffixFragments);
|
166 |
+
allFragments.add(suffixableFragment);
|
167 |
+
/*
|
168 |
+
* First check whether any element locants have already been assigned, these will take precedence
|
169 |
+
*/
|
170 |
+
for (Fragment fragment : allFragments) {
|
171 |
+
List<Atom> atomList = fragment.getAtomList();
|
172 |
+
for (Atom atom : atomList) {
|
173 |
+
List<String> elementSymbolLocants = atom.getElementSymbolLocants();
|
174 |
+
for (String locant : elementSymbolLocants) {
|
175 |
+
int primeCount = StringTools.countTerminalPrimes(locant);
|
176 |
+
String element = locant.substring(0, locant.length() - primeCount);
|
177 |
+
Integer seenCount = elementCount.get(element);
|
178 |
+
if (seenCount == null || (seenCount < primeCount + 1)){
|
179 |
+
elementCount.put(element, primeCount + 1);
|
180 |
+
}
|
181 |
+
atomsToIgnore.add(atom);
|
182 |
+
}
|
183 |
+
}
|
184 |
+
}
|
185 |
+
|
186 |
+
{
|
187 |
+
Set<String> elementsToIgnore = elementCount.keySet();
|
188 |
+
|
189 |
+
for (Fragment fragment : allFragments) {
|
190 |
+
List<Atom> atomList = fragment.getAtomList();
|
191 |
+
for (Atom atom : atomList) {
|
192 |
+
if (elementsToIgnore.contains(atom.getElement().toString())){
|
193 |
+
atomsToIgnore.add(atom);
|
194 |
+
}
|
195 |
+
}
|
196 |
+
}
|
197 |
+
}
|
198 |
+
|
199 |
+
String fragType = suffixableFragment.getType();
|
200 |
+
if (fragType.equals(NONCARBOXYLICACID_TYPE_VAL) || fragType.equals(CHALCOGENACIDSTEM_TYPE_VAL)){
|
201 |
+
if (suffixFragments.size() != 0){
|
202 |
+
throw new StructureBuildingException("No suffix fragments were expected to be present on non carboxylic acid");
|
203 |
+
}
|
204 |
+
processNonCarboxylicAcidLabelling(suffixableFragment, elementCount, atomsToIgnore);
|
205 |
+
}
|
206 |
+
else{
|
207 |
+
if (suffixFragments.size() > 0){
|
208 |
+
processSuffixLabelling(suffixFragments, elementCount, atomsToIgnore);
|
209 |
+
Integer seenCount = elementCount.get("N");
|
210 |
+
if (seenCount != null && seenCount > 1){//look for special case violation of IUPAC rule, =(N)=(NN) is N//N' in practice rather than N/N'/N''
|
211 |
+
//this method will put both locants on the N with substituable hydrogen
|
212 |
+
detectAndCorrectHydrazoneDerivativeViolation(suffixFragments);
|
213 |
+
}
|
214 |
+
}
|
215 |
+
processMainGroupLabelling(suffixableFragment, elementCount, atomsToIgnore);
|
216 |
+
}
|
217 |
+
}
|
218 |
+
|
219 |
+
private static void detectAndCorrectHydrazoneDerivativeViolation(List<Fragment> suffixFragments) {
|
220 |
+
fragmentLoop: for (Fragment suffixFrag : suffixFragments) {
|
221 |
+
List<Atom> atomList = suffixFrag.getAtomList();
|
222 |
+
for (Atom atom : atomList) {
|
223 |
+
if (atom.getElement() == ChemEl.N && atom.getIncomingValency() ==3 ){
|
224 |
+
List<String> locants =atom.getLocants();
|
225 |
+
if (locants.size()==1 && MATCH_ELEMENT_SYMBOL_LOCANT.matcher(locants.get(0)).matches()){
|
226 |
+
List<Atom> neighbours = atom.getAtomNeighbours();
|
227 |
+
for (Atom neighbour : neighbours) {
|
228 |
+
if (neighbour.getElement() == ChemEl.N && neighbour.getIncomingValency()==1){
|
229 |
+
String locantToAdd = locants.get(0);
|
230 |
+
atom.clearLocants();
|
231 |
+
neighbour.addLocant(locantToAdd);
|
232 |
+
continue fragmentLoop;
|
233 |
+
}
|
234 |
+
}
|
235 |
+
}
|
236 |
+
}
|
237 |
+
}
|
238 |
+
}
|
239 |
+
}
|
240 |
+
|
241 |
+
private static void processMainGroupLabelling(Fragment suffixableFragment, Map<String, Integer> elementCount, Set<Atom> atomsToIgnore) {
|
242 |
+
Set<String> elementToIgnore = new HashSet<>(elementCount.keySet());
|
243 |
+
List<Atom> atomList = suffixableFragment.getAtomList();
|
244 |
+
Collections.sort(atomList, new SortAtomsForMainGroupElementSymbols());
|
245 |
+
Atom atomToAddCLabelTo = null;//only add a C label if there is only one C in the main group
|
246 |
+
boolean seenMoreThanOneC = false;
|
247 |
+
for (Atom atom : atomList) {
|
248 |
+
if (atomsToIgnore.contains(atom)){
|
249 |
+
continue;
|
250 |
+
}
|
251 |
+
ChemEl chemEl = atom.getElement();
|
252 |
+
if (elementToIgnore.contains(chemEl.toString())){
|
253 |
+
continue;
|
254 |
+
}
|
255 |
+
if (chemEl == ChemEl.C) {
|
256 |
+
if (seenMoreThanOneC) {
|
257 |
+
continue;
|
258 |
+
}
|
259 |
+
if (atomToAddCLabelTo != null){
|
260 |
+
atomToAddCLabelTo = null;
|
261 |
+
seenMoreThanOneC = true;
|
262 |
+
}
|
263 |
+
else{
|
264 |
+
atomToAddCLabelTo = atom;
|
265 |
+
}
|
266 |
+
}
|
267 |
+
else{
|
268 |
+
assignLocant(atom, elementCount);
|
269 |
+
}
|
270 |
+
}
|
271 |
+
if (atomToAddCLabelTo != null){
|
272 |
+
atomToAddCLabelTo.addLocant("C");
|
273 |
+
}
|
274 |
+
}
|
275 |
+
|
276 |
+
private static void processSuffixLabelling(List<Fragment> suffixFragments, Map<String, Integer> elementCount, Set<Atom> atomsToIgnore) {
|
277 |
+
List<Atom> startingAtoms = new ArrayList<>();
|
278 |
+
Set<Atom> atomsVisited = new HashSet<>();
|
279 |
+
for (Fragment fragment : suffixFragments) {
|
280 |
+
Atom rAtom = fragment.getFirstAtom();
|
281 |
+
List<Atom> nextAtoms = getIntraFragmentNeighboursAndSetVisitedBondOrder(rAtom);
|
282 |
+
atomsVisited.addAll(nextAtoms);
|
283 |
+
startingAtoms.addAll(nextAtoms);
|
284 |
+
}
|
285 |
+
Collections.sort(startingAtoms, new SortAtomsForElementSymbols());
|
286 |
+
|
287 |
+
Deque<Atom> atomsToConsider = new ArrayDeque<>(startingAtoms);
|
288 |
+
while (atomsToConsider.size() > 0){
|
289 |
+
assignLocantsAndExploreNeighbours(elementCount, atomsToIgnore, atomsVisited, atomsToConsider);
|
290 |
+
}
|
291 |
+
}
|
292 |
+
|
293 |
+
private static void processNonCarboxylicAcidLabelling(Fragment suffixableFragment, Map<String, Integer> elementCount, Set<Atom> atomsToIgnore) {
|
294 |
+
Set<Atom> atomsVisited = new HashSet<>();
|
295 |
+
Atom firstAtom = suffixableFragment.getFirstAtom();
|
296 |
+
List<Atom> startingAtoms = getIntraFragmentNeighboursAndSetVisitedBondOrder(firstAtom);
|
297 |
+
|
298 |
+
Collections.sort(startingAtoms, new SortAtomsForElementSymbols());
|
299 |
+
atomsVisited.add(firstAtom);
|
300 |
+
Deque<Atom> atomsToConsider = new ArrayDeque<>(startingAtoms);
|
301 |
+
while (atomsToConsider.size() > 0){
|
302 |
+
assignLocantsAndExploreNeighbours(elementCount, atomsToIgnore, atomsVisited, atomsToConsider);
|
303 |
+
}
|
304 |
+
if (!atomsToIgnore.contains(firstAtom) && firstAtom.determineValency(true) > firstAtom.getIncomingValency()) {
|
305 |
+
//e.g. carbonimidoyl the carbon has locant C
|
306 |
+
assignLocant(firstAtom, elementCount);
|
307 |
+
}
|
308 |
+
}
|
309 |
+
|
310 |
+
private static void assignLocantsAndExploreNeighbours(Map<String, Integer> elementCount, Set<Atom> atomsToIgnore, Set<Atom> atomsVisited, Deque<Atom> atomsToConsider) {
|
311 |
+
Atom atom = atomsToConsider.removeFirst();
|
312 |
+
atomsVisited.add(atom);
|
313 |
+
if (!atomsToIgnore.contains(atom)) {//assign locant
|
314 |
+
assignLocant(atom, elementCount);
|
315 |
+
}
|
316 |
+
List<Atom> atomsToExplore = getIntraFragmentNeighboursAndSetVisitedBondOrder(atom);
|
317 |
+
atomsToExplore.removeAll(atomsVisited);
|
318 |
+
Collections.sort(atomsToExplore, new SortAtomsForElementSymbols());
|
319 |
+
for (int i = atomsToExplore.size() - 1; i >= 0; i--) {
|
320 |
+
atomsToConsider.addFirst(atomsToExplore.get(i));
|
321 |
+
}
|
322 |
+
}
|
323 |
+
|
324 |
+
/**
|
325 |
+
* Gets the neighbours of an atom that claim to be within the same frag
|
326 |
+
* The order of bond taken to get to the neighbour is set on the neighbours Atom.VISITED property
|
327 |
+
* @param atom
|
328 |
+
* @return
|
329 |
+
*/
|
330 |
+
private static List<Atom> getIntraFragmentNeighboursAndSetVisitedBondOrder(Atom atom) {
|
331 |
+
List<Atom> atomsToExplore = new ArrayList<>();
|
332 |
+
List<Bond> bonds = atom.getBonds();
|
333 |
+
for (Bond bond : bonds) {
|
334 |
+
Atom neighbour = bond.getOtherAtom(atom);
|
335 |
+
if (neighbour.getFrag().equals(atom.getFrag())) {
|
336 |
+
atomsToExplore.add(neighbour);
|
337 |
+
neighbour.setProperty(Atom.VISITED, bond.getOrder());
|
338 |
+
}
|
339 |
+
}
|
340 |
+
return atomsToExplore;
|
341 |
+
}
|
342 |
+
|
343 |
+
private static void assignLocant(Atom atom, Map<String, Integer> elementCount) {
|
344 |
+
String element = atom.getElement().toString();
|
345 |
+
Integer count = elementCount.get(element);
|
346 |
+
if (count == null){
|
347 |
+
atom.addLocant(element);
|
348 |
+
elementCount.put(element, 1);
|
349 |
+
}
|
350 |
+
else{
|
351 |
+
atom.addLocant(element + StringTools.multiplyString("'", count));
|
352 |
+
elementCount.put(element, count + 1);
|
353 |
+
}
|
354 |
+
}
|
355 |
+
|
356 |
+
/** Adjusts the order of a bond in a fragment.
|
357 |
+
*
|
358 |
+
* @param fromAtom The lower-numbered atom in the bond
|
359 |
+
* @param bondOrder The new bond order
|
360 |
+
* @param fragment The fragment
|
361 |
+
* @return The bond that was unsaturated
|
362 |
+
* @throws StructureBuildingException
|
363 |
+
*/
|
364 |
+
static Bond unsaturate(Atom fromAtom, int bondOrder, Fragment fragment) throws StructureBuildingException {
|
365 |
+
Atom toAtom = null;
|
366 |
+
Integer locant = null;
|
367 |
+
try{
|
368 |
+
String primes ="";
|
369 |
+
String locantStr = fromAtom.getFirstLocant();
|
370 |
+
int numberOfPrimes = StringTools.countTerminalPrimes(locantStr);
|
371 |
+
locant = Integer.parseInt(locantStr.substring(0, locantStr.length()-numberOfPrimes));
|
372 |
+
primes = StringTools.multiplyString("'", numberOfPrimes);
|
373 |
+
Atom possibleToAtom = fragment.getAtomByLocant(String.valueOf(locant +1)+primes);
|
374 |
+
if (possibleToAtom !=null && fromAtom.getBondToAtom(possibleToAtom)!=null){
|
375 |
+
toAtom = possibleToAtom;
|
376 |
+
}
|
377 |
+
else if (possibleToAtom ==null && fromAtom.getAtomIsInACycle()){//allow something like cyclohexan-6-ene, something like butan-4-ene will still fail
|
378 |
+
possibleToAtom = fragment.getAtomByLocant("1" + primes);
|
379 |
+
if (possibleToAtom !=null && fromAtom.getBondToAtom(possibleToAtom)!=null){
|
380 |
+
toAtom =possibleToAtom;
|
381 |
+
}
|
382 |
+
}
|
383 |
+
}
|
384 |
+
catch (Exception e) {
|
385 |
+
List<Atom> atomList = fragment.getAtomList();
|
386 |
+
int initialIndice = atomList.indexOf(fromAtom);
|
387 |
+
if (initialIndice +1 < atomList.size() && fromAtom.getBondToAtom(atomList.get(initialIndice +1))!=null){
|
388 |
+
toAtom = atomList.get(initialIndice +1);
|
389 |
+
}
|
390 |
+
}
|
391 |
+
if (toAtom==null){
|
392 |
+
if (locant!=null){
|
393 |
+
throw new StructureBuildingException("Could not find bond to unsaturate starting from the atom with locant: " +locant);
|
394 |
+
}
|
395 |
+
else{
|
396 |
+
throw new StructureBuildingException("Could not find bond to unsaturate");
|
397 |
+
}
|
398 |
+
}
|
399 |
+
Bond b = fromAtom.getBondToAtomOrThrow(toAtom);
|
400 |
+
if (b.getOrder() != 1) {
|
401 |
+
throw new StructureBuildingException("Bond indicated to be unsaturated was already unsaturated");
|
402 |
+
}
|
403 |
+
b.setOrder(bondOrder);
|
404 |
+
return b;
|
405 |
+
}
|
406 |
+
|
407 |
+
/** Adjusts the order of a bond in a fragment.
|
408 |
+
*
|
409 |
+
* @param fromAtom The first atom in the bond
|
410 |
+
* @param locantTo The locant of the other atom in the bond
|
411 |
+
* @param bondOrder The new bond order
|
412 |
+
* @param fragment The fragment
|
413 |
+
* @throws StructureBuildingException
|
414 |
+
*/
|
415 |
+
static void unsaturate(Atom fromAtom, String locantTo, int bondOrder, Fragment fragment) throws StructureBuildingException {
|
416 |
+
Atom toAtom = fragment.getAtomByLocantOrThrow(locantTo);
|
417 |
+
Bond b = fromAtom.getBondToAtomOrThrow(toAtom);
|
418 |
+
if (b.getOrder() != 1) {
|
419 |
+
throw new StructureBuildingException("Bond indicated to be unsaturated was already unsaturated");
|
420 |
+
}
|
421 |
+
b.setOrder(bondOrder);
|
422 |
+
}
|
423 |
+
|
424 |
+
/**Adjusts the labeling on a fused ring system, such that bridgehead atoms
|
425 |
+
* have locants endings in 'a' or 'b' etc. Example: naphthalene
|
426 |
+
* 1,2,3,4,5,6,7,8,9,10->1,2,3,4,4a,5,6,7,8,8a
|
427 |
+
* @param atomList
|
428 |
+
*/
|
429 |
+
static void relabelLocantsAsFusedRingSystem(List<Atom> atomList) {
|
430 |
+
int locantVal = 0;
|
431 |
+
char locantLetter = 'a';
|
432 |
+
for (Atom atom : atomList) {
|
433 |
+
atom.clearLocants();
|
434 |
+
}
|
435 |
+
for (Atom atom : atomList) {
|
436 |
+
if(atom.getElement() != ChemEl.C || atom.getBondCount() < 3) {
|
437 |
+
locantVal++;
|
438 |
+
locantLetter = 'a';
|
439 |
+
atom.addLocant(Integer.toString(locantVal));
|
440 |
+
} else {
|
441 |
+
atom.addLocant(Integer.toString(locantVal) + locantLetter);
|
442 |
+
locantLetter++;
|
443 |
+
}
|
444 |
+
}
|
445 |
+
}
|
446 |
+
|
447 |
+
/**
|
448 |
+
* Adds the given string to all the locants of the atoms.
|
449 |
+
* @param atomList
|
450 |
+
* @param stringToAdd
|
451 |
+
*/
|
452 |
+
static void relabelLocants(List<Atom> atomList, String stringToAdd) {
|
453 |
+
for (Atom atom : atomList) {
|
454 |
+
List<String> locants = new ArrayList<>(atom.getLocants());
|
455 |
+
atom.clearLocants();
|
456 |
+
for (String locant : locants) {
|
457 |
+
atom.addLocant(locant + stringToAdd);
|
458 |
+
}
|
459 |
+
}
|
460 |
+
}
|
461 |
+
|
462 |
+
/**
|
463 |
+
* Adds the given string to all the numeric locants of the atoms.
|
464 |
+
* @param atomList
|
465 |
+
* @param stringToAdd
|
466 |
+
*/
|
467 |
+
static void relabelNumericLocants(List<Atom> atomList, String stringToAdd) {
|
468 |
+
for (Atom atom : atomList) {
|
469 |
+
List<String> locants = new ArrayList<>(atom.getLocants());
|
470 |
+
for (String locant : locants) {
|
471 |
+
if (MATCH_NUMERIC_LOCANT.matcher(locant).matches()){
|
472 |
+
atom.removeLocant(locant);
|
473 |
+
atom.addLocant(locant + stringToAdd);
|
474 |
+
}
|
475 |
+
}
|
476 |
+
}
|
477 |
+
}
|
478 |
+
|
479 |
+
|
480 |
+
static void splitOutAtomIntoValency1OutAtoms(OutAtom outAtom) {
|
481 |
+
Fragment frag =outAtom.getAtom().getFrag();
|
482 |
+
for (int i = 1; i < outAtom.getValency(); i++) {
|
483 |
+
frag.addOutAtom(outAtom.getAtom(), 1, outAtom.isSetExplicitly());
|
484 |
+
}
|
485 |
+
outAtom.setValency(1);
|
486 |
+
}
|
487 |
+
|
488 |
+
/**
|
489 |
+
* Checks if the specified Nitrogen is potentially involved in [NH]C=N <-> N=C[NH] tautomerism
|
490 |
+
* Given the starting nitrogen returns the other nitrogen or null if that nitrogen does not appear to be involved in such tautomerism
|
491 |
+
* @param nitrogen
|
492 |
+
* @return null or the other nitrogen
|
493 |
+
*/
|
494 |
+
static Atom detectSimpleNitrogenTautomer(Atom nitrogen) {
|
495 |
+
if (nitrogen.getElement() == ChemEl.N && nitrogen.getAtomIsInACycle()){
|
496 |
+
for (Atom neighbour : nitrogen.getAtomNeighbours()) {
|
497 |
+
if (neighbour.hasSpareValency() && neighbour.getElement() == ChemEl.C && neighbour.getAtomIsInACycle()){
|
498 |
+
List<Atom> distance2Neighbours = neighbour.getAtomNeighbours();
|
499 |
+
distance2Neighbours.remove(nitrogen);
|
500 |
+
for (Atom distance2Neighbour : distance2Neighbours) {
|
501 |
+
if (distance2Neighbour.hasSpareValency() && distance2Neighbour.getElement() == ChemEl.N && distance2Neighbour.getAtomIsInACycle() && distance2Neighbour.getCharge()==0){
|
502 |
+
return distance2Neighbour;
|
503 |
+
}
|
504 |
+
}
|
505 |
+
}
|
506 |
+
}
|
507 |
+
}
|
508 |
+
return null;
|
509 |
+
}
|
510 |
+
|
511 |
+
/**Increases the order of bonds joining atoms with spareValencies,
|
512 |
+
* and uses up said spareValencies.
|
513 |
+
* [spare valency is an indication of the atom's desire to form the maximum number of non-cumulative double bonds]
|
514 |
+
* @param frag
|
515 |
+
* @throws StructureBuildingException If the algorithm can't work out where to put the bonds
|
516 |
+
*/
|
517 |
+
static void convertSpareValenciesToDoubleBonds(Fragment frag) throws StructureBuildingException {
|
518 |
+
List<Atom> atomCollection = frag.getAtomList();
|
519 |
+
/* pick atom, getAtomNeighbours, decideIfTerminal, resolve */
|
520 |
+
|
521 |
+
/*
|
522 |
+
* Remove spare valency on atoms with valency precluding creation of double bonds
|
523 |
+
*/
|
524 |
+
for(Atom a : atomCollection) {
|
525 |
+
a.ensureSVIsConsistantWithValency(true);
|
526 |
+
}
|
527 |
+
|
528 |
+
/*
|
529 |
+
* Remove spare valency on atoms that are not adjacent to another atom with spare valency
|
530 |
+
*/
|
531 |
+
atomLoop: for(Atom a : atomCollection) {
|
532 |
+
if(a.hasSpareValency()) {
|
533 |
+
for(Atom aa : frag.getIntraFragmentAtomNeighbours(a)) {
|
534 |
+
if(aa.hasSpareValency()) {
|
535 |
+
continue atomLoop;
|
536 |
+
}
|
537 |
+
}
|
538 |
+
a.setSpareValency(false);
|
539 |
+
}
|
540 |
+
}
|
541 |
+
|
542 |
+
/*
|
543 |
+
* The indicated hydrogen from the original SMILES definition of the fragment e.g. [nH] are used to disambiguate if there are
|
544 |
+
* an odd number of atoms with spare valency. Hence pyrrole is unambiguously 1H-pyrrole unless specified otherwise
|
545 |
+
* Things gets more complicated if the input contained multiple indicated hydrogen as it is unclear whether these still apply to the final molecule
|
546 |
+
*/
|
547 |
+
Atom atomToReduceValencyAt = null;
|
548 |
+
List<Atom> originalIndicatedHydrogen = frag.getIndicatedHydrogen();
|
549 |
+
List<Atom> indicatedHydrogen = new ArrayList<>(originalIndicatedHydrogen.size());
|
550 |
+
for (Atom atom : frag.getIndicatedHydrogen()) {
|
551 |
+
if (atom.hasSpareValency() && atom.getCharge() == 0) {
|
552 |
+
indicatedHydrogen.add(atom);
|
553 |
+
}
|
554 |
+
}
|
555 |
+
if (indicatedHydrogen.size() > 0) {
|
556 |
+
//typically there will be only one indicated hydrogen
|
557 |
+
if (indicatedHydrogen.size() > 1) {
|
558 |
+
for (Atom indicatedAtom : indicatedHydrogen) {
|
559 |
+
boolean couldBeInvolvedInSimpleNitrogenTautomerism = false;//fix for guanine like purine derivatives
|
560 |
+
if (indicatedAtom.getElement() == ChemEl.N && indicatedAtom.getAtomIsInACycle()) {
|
561 |
+
atomloop : for (Atom neighbour : indicatedAtom.getAtomNeighbours()) {
|
562 |
+
if (neighbour.getElement() == ChemEl.C && neighbour.getAtomIsInACycle()) {
|
563 |
+
List<Atom> distance2Neighbours = neighbour.getAtomNeighbours();
|
564 |
+
distance2Neighbours.remove(indicatedAtom);
|
565 |
+
for (Atom distance2Neighbour : distance2Neighbours) {
|
566 |
+
if (distance2Neighbour.getElement() == ChemEl.N && distance2Neighbour.getAtomIsInACycle() && !originalIndicatedHydrogen.contains(distance2Neighbour)){
|
567 |
+
couldBeInvolvedInSimpleNitrogenTautomerism = true;
|
568 |
+
break atomloop;
|
569 |
+
}
|
570 |
+
}
|
571 |
+
}
|
572 |
+
}
|
573 |
+
}
|
574 |
+
//retain spare valency if has the cyclic [NH]C=N moiety but substitution has meant that this tautomerism doesn't actually occur cf. 8-oxoguanine
|
575 |
+
if (!couldBeInvolvedInSimpleNitrogenTautomerism || detectSimpleNitrogenTautomer(indicatedAtom) != null) {
|
576 |
+
indicatedAtom.setSpareValency(false);
|
577 |
+
}
|
578 |
+
}
|
579 |
+
}
|
580 |
+
else{
|
581 |
+
atomToReduceValencyAt = indicatedHydrogen.get(0);
|
582 |
+
}
|
583 |
+
}
|
584 |
+
|
585 |
+
int svCount = 0;
|
586 |
+
for(Atom a : atomCollection) {
|
587 |
+
svCount += a.hasSpareValency() ? 1 :0;
|
588 |
+
}
|
589 |
+
|
590 |
+
/*
|
591 |
+
* Double-bonds go between pairs of atoms so if there are an off number of candidate atoms (e.g. pyrrole) an atom must be chosen
|
592 |
+
* The atom with indicated hydrogen (see above) is used in preference else heuristics are used to chose a candidate
|
593 |
+
*/
|
594 |
+
if((svCount & 1) == 1) {
|
595 |
+
if (atomToReduceValencyAt == null) {
|
596 |
+
atomToReduceValencyAt = findBestAtomToRemoveSpareValencyFrom(frag, atomCollection);
|
597 |
+
}
|
598 |
+
atomToReduceValencyAt.setSpareValency(false);
|
599 |
+
svCount--;
|
600 |
+
}
|
601 |
+
|
602 |
+
while(svCount > 0) {
|
603 |
+
boolean foundTerminalFlag = false;
|
604 |
+
boolean foundNonBridgeHeadFlag = false;
|
605 |
+
boolean foundBridgeHeadFlag = false;
|
606 |
+
//First handle cases where double bond placement is completely unambiguous i.e. an atom where only one neighbour has spare valency
|
607 |
+
for(Atom a : atomCollection) {
|
608 |
+
if(a.hasSpareValency()) {
|
609 |
+
int count = 0;
|
610 |
+
for(Atom aa : frag.getIntraFragmentAtomNeighbours(a)) {
|
611 |
+
if(aa.hasSpareValency()) {
|
612 |
+
count++;
|
613 |
+
}
|
614 |
+
}
|
615 |
+
if(count == 1) {
|
616 |
+
for(Atom aa : frag.getIntraFragmentAtomNeighbours(a)) {
|
617 |
+
if(aa.hasSpareValency()) {
|
618 |
+
foundTerminalFlag = true;
|
619 |
+
a.setSpareValency(false);
|
620 |
+
aa.setSpareValency(false);
|
621 |
+
a.getBondToAtomOrThrow(aa).addOrder(1);
|
622 |
+
svCount -= 2;//Two atoms where for one of them this bond is the only double bond it can possible form
|
623 |
+
break;
|
624 |
+
}
|
625 |
+
}
|
626 |
+
}
|
627 |
+
}
|
628 |
+
}
|
629 |
+
if(foundTerminalFlag) {
|
630 |
+
continue;
|
631 |
+
}
|
632 |
+
|
633 |
+
//Find two atoms where one, or both, of them are not bridgeheads
|
634 |
+
for(Atom a : atomCollection) {
|
635 |
+
List<Atom> neighbours = frag.getIntraFragmentAtomNeighbours(a);
|
636 |
+
if(a.hasSpareValency() && neighbours.size() < 3) {
|
637 |
+
for(Atom aa : neighbours) {
|
638 |
+
if(aa.hasSpareValency()) {
|
639 |
+
foundNonBridgeHeadFlag = true;
|
640 |
+
a.setSpareValency(false);
|
641 |
+
aa.setSpareValency(false);
|
642 |
+
a.getBondToAtomOrThrow(aa).addOrder(1);
|
643 |
+
svCount -= 2;//Two atoms where one of them is not a bridge head
|
644 |
+
break;
|
645 |
+
}
|
646 |
+
}
|
647 |
+
}
|
648 |
+
if(foundNonBridgeHeadFlag) {
|
649 |
+
break;
|
650 |
+
}
|
651 |
+
}
|
652 |
+
if(foundNonBridgeHeadFlag) {
|
653 |
+
continue;
|
654 |
+
}
|
655 |
+
|
656 |
+
//Find two atoms where both of them are bridgheads
|
657 |
+
for(Atom a : atomCollection) {
|
658 |
+
List<Atom> neighbours = frag.getIntraFragmentAtomNeighbours(a);
|
659 |
+
if(a.hasSpareValency()) {
|
660 |
+
for(Atom aa : neighbours) {
|
661 |
+
if(aa.hasSpareValency()) {
|
662 |
+
foundBridgeHeadFlag = true;
|
663 |
+
a.setSpareValency(false);
|
664 |
+
aa.setSpareValency(false);
|
665 |
+
a.getBondToAtomOrThrow(aa).addOrder(1);
|
666 |
+
svCount -= 2;//Two atoms where both of them are a bridge head e.g. necessary for something like coronene
|
667 |
+
break;
|
668 |
+
}
|
669 |
+
}
|
670 |
+
}
|
671 |
+
if(foundBridgeHeadFlag) {
|
672 |
+
break;
|
673 |
+
}
|
674 |
+
}
|
675 |
+
if(!foundBridgeHeadFlag) {
|
676 |
+
throw new StructureBuildingException("Failed to assign all double bonds! (Check that indicated hydrogens have been appropriately specified)");
|
677 |
+
}
|
678 |
+
}
|
679 |
+
}
|
680 |
+
|
681 |
+
private static Atom findBestAtomToRemoveSpareValencyFrom(Fragment frag, List<Atom> atomCollection) {
|
682 |
+
for(Atom a : atomCollection) {//try and find an atom with SV that neighbours only one atom with SV
|
683 |
+
if(a.hasSpareValency()) {
|
684 |
+
int atomsWithSV = 0;
|
685 |
+
for(Atom aa : frag.getIntraFragmentAtomNeighbours(a)) {
|
686 |
+
if(aa.hasSpareValency()) {
|
687 |
+
atomsWithSV++;
|
688 |
+
}
|
689 |
+
}
|
690 |
+
if (atomsWithSV == 1) {
|
691 |
+
return a;
|
692 |
+
}
|
693 |
+
}
|
694 |
+
}
|
695 |
+
atomLoop: for(Atom a : atomCollection) {//try and find an atom with bridgehead atoms with SV on both sides c.f. phenoxastibinine == 10H-phenoxastibinine
|
696 |
+
if(a.hasSpareValency()) {
|
697 |
+
List<Atom> neighbours = frag.getIntraFragmentAtomNeighbours(a);
|
698 |
+
if (neighbours.size() == 2) {
|
699 |
+
for(Atom aa : neighbours) {
|
700 |
+
if(frag.getIntraFragmentAtomNeighbours(aa).size() < 3){
|
701 |
+
continue atomLoop;
|
702 |
+
}
|
703 |
+
}
|
704 |
+
return a;
|
705 |
+
}
|
706 |
+
}
|
707 |
+
}
|
708 |
+
//Prefer nitrogen to carbon e.g. get NHC=C rather than N=CCH
|
709 |
+
Atom firstAtomWithSpareValency = null;
|
710 |
+
Atom firstHeteroAtomWithSpareValency = null;
|
711 |
+
for(Atom a : atomCollection) {
|
712 |
+
if(a.hasSpareValency()) {
|
713 |
+
if (a.getElement() != ChemEl.C) {
|
714 |
+
if (a.getCharge() == 0) {
|
715 |
+
return a;
|
716 |
+
}
|
717 |
+
if(firstHeteroAtomWithSpareValency == null) {
|
718 |
+
firstHeteroAtomWithSpareValency = a;
|
719 |
+
}
|
720 |
+
}
|
721 |
+
if(firstAtomWithSpareValency == null) {
|
722 |
+
firstAtomWithSpareValency = a;
|
723 |
+
}
|
724 |
+
}
|
725 |
+
}
|
726 |
+
if (firstAtomWithSpareValency == null) {
|
727 |
+
throw new IllegalArgumentException("OPSIN Bug: No atom had spare valency!");
|
728 |
+
}
|
729 |
+
return firstHeteroAtomWithSpareValency != null ? firstHeteroAtomWithSpareValency : firstAtomWithSpareValency;
|
730 |
+
}
|
731 |
+
|
732 |
+
|
733 |
+
static Atom getAtomByAminoAcidStyleLocant(Atom backboneAtom, String elementSymbol, String primes) {
|
734 |
+
//Search for appropriate atom by using the same algorithm as is used to assign locants initially
|
735 |
+
|
736 |
+
List<Atom> startingAtoms = new ArrayList<>();
|
737 |
+
Set<Atom> atomsVisited = new HashSet<>();
|
738 |
+
List<Atom> neighbours = getIntraFragmentNeighboursAndSetVisitedBondOrder(backboneAtom);
|
739 |
+
mainLoop: for (Atom neighbour : neighbours) {
|
740 |
+
atomsVisited.add(neighbour);
|
741 |
+
if (!neighbour.getType().equals(SUFFIX_TYPE_VAL)){
|
742 |
+
for (String neighbourLocant : neighbour.getLocants()) {
|
743 |
+
if (MATCH_NUMERIC_LOCANT.matcher(neighbourLocant).matches()){//gone to an inappropriate atom
|
744 |
+
continue mainLoop;
|
745 |
+
}
|
746 |
+
}
|
747 |
+
}
|
748 |
+
startingAtoms.add(neighbour);
|
749 |
+
}
|
750 |
+
|
751 |
+
Collections.sort(startingAtoms, new SortAtomsForElementSymbols());
|
752 |
+
Map<String,Integer> elementCount = new HashMap<>();//keeps track of how many times each element has been seen
|
753 |
+
|
754 |
+
Deque<Atom> atomsToConsider = new ArrayDeque<>(startingAtoms);
|
755 |
+
boolean hydrazoneSpecialCase =false;//look for special case violation of IUPAC rule where the locant of the =N- atom is skipped. This flag is set when =N- is encountered
|
756 |
+
while (atomsToConsider.size() > 0){
|
757 |
+
Atom atom = atomsToConsider.removeFirst();
|
758 |
+
atomsVisited.add(atom);
|
759 |
+
int primesOnPossibleAtom =0;
|
760 |
+
String element =atom.getElement().toString();
|
761 |
+
if (elementCount.get(element)==null){
|
762 |
+
elementCount.put(element,1);
|
763 |
+
}
|
764 |
+
else{
|
765 |
+
int count =elementCount.get(element);
|
766 |
+
primesOnPossibleAtom =count;
|
767 |
+
elementCount.put(element, count +1);
|
768 |
+
}
|
769 |
+
if (hydrazoneSpecialCase){
|
770 |
+
if (element.equals(elementSymbol) && primes.length() == primesOnPossibleAtom -1){
|
771 |
+
return atom;
|
772 |
+
}
|
773 |
+
hydrazoneSpecialCase =false;
|
774 |
+
}
|
775 |
+
|
776 |
+
List<Atom> atomNeighbours = getIntraFragmentNeighboursAndSetVisitedBondOrder(atom);
|
777 |
+
atomNeighbours.removeAll(atomsVisited);
|
778 |
+
for (int i = atomNeighbours.size() -1; i >=0; i--) {
|
779 |
+
Atom neighbour = atomNeighbours.get(i);
|
780 |
+
if (!neighbour.getType().equals(SUFFIX_TYPE_VAL)){
|
781 |
+
for (String neighbourLocant : neighbour.getLocants()) {
|
782 |
+
if (MATCH_NUMERIC_LOCANT.matcher(neighbourLocant).matches()){//gone to an inappropriate atom
|
783 |
+
atomNeighbours.remove(i);
|
784 |
+
break;
|
785 |
+
}
|
786 |
+
}
|
787 |
+
}
|
788 |
+
}
|
789 |
+
if (atom.getElement() == ChemEl.N && atom.getIncomingValency() ==3 && atom.getCharge()==0
|
790 |
+
&& atomNeighbours.size()==1 && atomNeighbours.get(0).getElement() == ChemEl.N){
|
791 |
+
hydrazoneSpecialCase =true;
|
792 |
+
}
|
793 |
+
else{
|
794 |
+
if (element.equals(elementSymbol)){
|
795 |
+
if (primes.length() == primesOnPossibleAtom){
|
796 |
+
return atom;
|
797 |
+
}
|
798 |
+
}
|
799 |
+
}
|
800 |
+
|
801 |
+
Collections.sort(atomNeighbours, new SortAtomsForElementSymbols());
|
802 |
+
for (int i = atomNeighbours.size() - 1; i >= 0; i--) {
|
803 |
+
atomsToConsider.addFirst(atomNeighbours.get(i));
|
804 |
+
}
|
805 |
+
}
|
806 |
+
|
807 |
+
if (primes.equals("") && backboneAtom.getElement().toString().equals(elementSymbol)){//maybe it meant the starting atom
|
808 |
+
return backboneAtom;
|
809 |
+
}
|
810 |
+
return null;
|
811 |
+
}
|
812 |
+
|
813 |
+
|
814 |
+
/**
|
815 |
+
* Determines whether the bond between two elements is likely to be covalent
|
816 |
+
* This is crudely determined based on whether the combination of elements fall outside the ionic and
|
817 |
+
* metallic sections of a van Arkel diagram
|
818 |
+
* @param chemEl1
|
819 |
+
* @param chemEl2
|
820 |
+
* @return
|
821 |
+
*/
|
822 |
+
static boolean isCovalent(ChemEl chemEl1, ChemEl chemEl2) {
|
823 |
+
Double atom1Electrongegativity = AtomProperties.getPaulingElectronegativity(chemEl1);
|
824 |
+
Double atom2Electrongegativity = AtomProperties.getPaulingElectronegativity(chemEl2);
|
825 |
+
if (atom1Electrongegativity!=null && atom2Electrongegativity !=null){
|
826 |
+
double halfSum = (atom1Electrongegativity + atom2Electrongegativity)/2;
|
827 |
+
double difference = Math.abs(atom1Electrongegativity - atom2Electrongegativity);
|
828 |
+
if (halfSum < 1.6){
|
829 |
+
return false;//probably metallic
|
830 |
+
}
|
831 |
+
if (difference < 1.76 * halfSum - 3.03){
|
832 |
+
return true;
|
833 |
+
}
|
834 |
+
}
|
835 |
+
return false;
|
836 |
+
}
|
837 |
+
|
838 |
+
/**
|
839 |
+
* Is the atom a suffix atom/carbon of an aldehyde atom/chalcogen functional atom/hydroxy (or chalcogen equivalent)
|
840 |
+
* (by special step heterostems are not considered hydroxy e.g. disulfane)
|
841 |
+
* @param atom
|
842 |
+
* @return
|
843 |
+
*/
|
844 |
+
static boolean isCharacteristicAtom(Atom atom) {
|
845 |
+
if (atom.getType().equals(SUFFIX_TYPE_VAL) ||
|
846 |
+
(atom.getElement().isChalcogen() && !HETEROSTEM_SUBTYPE_VAL.equals(atom.getFrag().getSubType()) &&
|
847 |
+
atom.getIncomingValency() == 1 &&
|
848 |
+
atom.getOutValency() == 0 && atom.getCharge() == 0)) {
|
849 |
+
return true;
|
850 |
+
}
|
851 |
+
return isFunctionalAtomOrAldehyde(atom);
|
852 |
+
}
|
853 |
+
|
854 |
+
/**
|
855 |
+
* Is the atom an aldehyde atom or a chalcogen functional atom
|
856 |
+
* @param atom
|
857 |
+
* @return
|
858 |
+
*/
|
859 |
+
static boolean isFunctionalAtomOrAldehyde(Atom atom) {
|
860 |
+
if (Boolean.TRUE.equals(atom.getProperty(Atom.ISALDEHYDE))){//substituting an aldehyde would make it no longer an aldehyde
|
861 |
+
return true;
|
862 |
+
}
|
863 |
+
return isFunctionalAtom(atom);
|
864 |
+
}
|
865 |
+
|
866 |
+
/**
|
867 |
+
* Is the atom a chalcogen functional atom
|
868 |
+
* @param atom
|
869 |
+
* @return
|
870 |
+
*/
|
871 |
+
static boolean isFunctionalAtom(Atom atom) {
|
872 |
+
ChemEl chemEl = atom.getElement();
|
873 |
+
if (chemEl.isChalcogen()) {//potential chalcogen functional atom
|
874 |
+
Fragment frag = atom.getFrag();
|
875 |
+
for (int i = 0, l = frag.getFunctionalAtomCount(); i < l; i++) {
|
876 |
+
if (atom.equals(frag.getFunctionalAtom(i).getAtom())){
|
877 |
+
return true;
|
878 |
+
}
|
879 |
+
}
|
880 |
+
}
|
881 |
+
return false;
|
882 |
+
}
|
883 |
+
|
884 |
+
|
885 |
+
/**
|
886 |
+
* Checks that all atoms in a ring appear to be equivalent
|
887 |
+
* @param ring
|
888 |
+
* @return true if all equivalent, else false
|
889 |
+
*/
|
890 |
+
static boolean allAtomsInRingAreIdentical(Fragment ring){
|
891 |
+
List<Atom> atomList = ring.getAtomList();
|
892 |
+
Atom firstAtom = atomList.get(0);
|
893 |
+
ChemEl chemEl = firstAtom.getElement();
|
894 |
+
int valency = firstAtom.getIncomingValency();
|
895 |
+
boolean spareValency = firstAtom.hasSpareValency();
|
896 |
+
for (Atom atom : atomList) {
|
897 |
+
if (atom.getElement() != chemEl){
|
898 |
+
return false;
|
899 |
+
}
|
900 |
+
if (atom.getIncomingValency() != valency){
|
901 |
+
return false;
|
902 |
+
}
|
903 |
+
if (atom.hasSpareValency() != spareValency){
|
904 |
+
return false;
|
905 |
+
}
|
906 |
+
}
|
907 |
+
return true;
|
908 |
+
}
|
909 |
+
|
910 |
+
static void removeTerminalAtom(BuildState state, Atom atomToRemove) {
|
911 |
+
AtomParity atomParity = atomToRemove.getAtomNeighbours().get(0).getAtomParity();
|
912 |
+
if (atomParity!=null){//replace reference to atom with reference to implicit hydrogen
|
913 |
+
Atom[] atomRefs4= atomParity.getAtomRefs4();
|
914 |
+
for (int i = 0; i < atomRefs4.length; i++) {
|
915 |
+
if (atomRefs4[i]==atomToRemove){
|
916 |
+
atomRefs4[i] = AtomParity.deoxyHydrogen;
|
917 |
+
break;
|
918 |
+
}
|
919 |
+
}
|
920 |
+
}
|
921 |
+
state.fragManager.removeAtomAndAssociatedBonds(atomToRemove);
|
922 |
+
}
|
923 |
+
|
924 |
+
/**
|
925 |
+
* Removes a terminal oxygen from the atom
|
926 |
+
* An exception is thrown if no suitable oxygen could be found connected to the atom
|
927 |
+
* Note that [N+][O-] is treated as N=O
|
928 |
+
* @param state
|
929 |
+
* @param atom
|
930 |
+
* @param desiredBondOrder
|
931 |
+
* @throws StructureBuildingException
|
932 |
+
*/
|
933 |
+
static void removeTerminalOxygen(BuildState state, Atom atom, int desiredBondOrder) throws StructureBuildingException {
|
934 |
+
//TODO prioritise [N+][O-]
|
935 |
+
List<Atom> neighbours = atom.getAtomNeighbours();
|
936 |
+
for (Atom neighbour : neighbours) {
|
937 |
+
if (neighbour.getElement() == ChemEl.O && neighbour.getBondCount()==1){
|
938 |
+
Bond b = atom.getBondToAtomOrThrow(neighbour);
|
939 |
+
if (b.getOrder()==desiredBondOrder && neighbour.getCharge()==0){
|
940 |
+
FragmentTools.removeTerminalAtom(state, neighbour);
|
941 |
+
if (atom.getLambdaConventionValency()!=null){//corrects valency for phosphin/arsin/stibin
|
942 |
+
atom.setLambdaConventionValency(atom.getLambdaConventionValency()-desiredBondOrder);
|
943 |
+
}
|
944 |
+
if (atom.getMinimumValency()!=null){//corrects valency for phosphin/arsin/stibin
|
945 |
+
atom.setMinimumValency(atom.getMinimumValency()-desiredBondOrder);
|
946 |
+
}
|
947 |
+
return;
|
948 |
+
}
|
949 |
+
else if (neighbour.getCharge() ==-1 && b.getOrder()==1 && desiredBondOrder == 2){
|
950 |
+
if (atom.getCharge() ==1 && atom.getElement() == ChemEl.N){
|
951 |
+
FragmentTools.removeTerminalAtom(state, neighbour);
|
952 |
+
atom.neutraliseCharge();
|
953 |
+
return;
|
954 |
+
}
|
955 |
+
}
|
956 |
+
}
|
957 |
+
}
|
958 |
+
if (desiredBondOrder ==2){
|
959 |
+
throw new StructureBuildingException("Double bonded oxygen not found at suffix attachment position. Perhaps a suffix has been used inappropriately");
|
960 |
+
}
|
961 |
+
else if (desiredBondOrder ==1){
|
962 |
+
throw new StructureBuildingException("Hydroxy oxygen not found at suffix attachment position. Perhaps a suffix has been used inappropriately");
|
963 |
+
}
|
964 |
+
else {
|
965 |
+
throw new StructureBuildingException("Suitable oxygen not found at suffix attachment position Perhaps a suffix has been used inappropriately");
|
966 |
+
}
|
967 |
+
|
968 |
+
}
|
969 |
+
|
970 |
+
|
971 |
+
/**
|
972 |
+
* Finds terminal atoms of the given element type from the list given
|
973 |
+
* The terminal atoms be single bonded, not radicals and uncharged
|
974 |
+
* @param atoms
|
975 |
+
* @param chemEl
|
976 |
+
* @return
|
977 |
+
*/
|
978 |
+
static List<Atom> findHydroxyLikeTerminalAtoms(List<Atom> atoms, ChemEl chemEl) {
|
979 |
+
List<Atom> matches =new ArrayList<>();
|
980 |
+
for (Atom atom : atoms) {
|
981 |
+
if (atom.getElement() == chemEl && atom.getIncomingValency() == 1 &&
|
982 |
+
atom.getOutValency() == 0 && atom.getCharge() == 0){
|
983 |
+
matches.add(atom);
|
984 |
+
}
|
985 |
+
}
|
986 |
+
return matches;
|
987 |
+
}
|
988 |
+
|
989 |
+
/**
|
990 |
+
* Checks whether a bond is part of a 6 member or smaller ring.
|
991 |
+
* This is necessary as such double bonds are assumed to not be capable of having E/Z stereochemistry
|
992 |
+
* @param bond
|
993 |
+
* @return true unless in a 6 member or smaller rings
|
994 |
+
*/
|
995 |
+
static boolean notIn6MemberOrSmallerRing(Bond bond) {
|
996 |
+
Atom fromAtom =bond.getFromAtom();
|
997 |
+
Atom toAtom = bond.getToAtom();
|
998 |
+
if (fromAtom.getAtomIsInACycle() && toAtom.getAtomIsInACycle()){//obviously both must be in rings
|
999 |
+
//attempt to get from the fromAtom to the toAtom in 6 or fewer steps.
|
1000 |
+
List<Atom> visitedAtoms = new ArrayList<>();
|
1001 |
+
Deque<Atom> atomsToInvestigate = new ArrayDeque<>();//A queue is not used as I need to make sure that only up to depth 6 is investigated
|
1002 |
+
List<Atom> neighbours =fromAtom.getAtomNeighbours();
|
1003 |
+
neighbours.remove(toAtom);
|
1004 |
+
for (Atom neighbour : neighbours) {
|
1005 |
+
atomsToInvestigate.add(neighbour);
|
1006 |
+
}
|
1007 |
+
visitedAtoms.add(fromAtom);
|
1008 |
+
for (int i = 0; i < 5; i++) {//up to 5 bonds from the neighbours of the fromAtom i.e. up to ring size 6
|
1009 |
+
if (atomsToInvestigate.isEmpty()){
|
1010 |
+
break;
|
1011 |
+
}
|
1012 |
+
Deque<Atom> atomsToInvestigateNext = new ArrayDeque<>();
|
1013 |
+
while (!atomsToInvestigate.isEmpty()) {
|
1014 |
+
Atom currentAtom =atomsToInvestigate.removeFirst();
|
1015 |
+
if (currentAtom == toAtom){
|
1016 |
+
return false;
|
1017 |
+
}
|
1018 |
+
visitedAtoms.add(currentAtom);
|
1019 |
+
neighbours =currentAtom.getAtomNeighbours();
|
1020 |
+
for (Atom neighbour : neighbours) {
|
1021 |
+
if (!visitedAtoms.contains(neighbour) && neighbour.getAtomIsInACycle()){
|
1022 |
+
atomsToInvestigateNext.add(neighbour);
|
1023 |
+
}
|
1024 |
+
}
|
1025 |
+
}
|
1026 |
+
atomsToInvestigate = atomsToInvestigateNext;
|
1027 |
+
}
|
1028 |
+
}
|
1029 |
+
return true;
|
1030 |
+
}
|
1031 |
+
|
1032 |
+
/**
|
1033 |
+
* Finds the hydroxy atom of all hydroxy functional groups in a fragment
|
1034 |
+
* i.e. not in carboxylic acid or oxime
|
1035 |
+
* @param frag
|
1036 |
+
* @return
|
1037 |
+
* @throws StructureBuildingException
|
1038 |
+
*/
|
1039 |
+
static List<Atom> findHydroxyGroups(Fragment frag) throws StructureBuildingException {
|
1040 |
+
List<Atom> hydroxyAtoms = new ArrayList<>();
|
1041 |
+
List<Atom> atoms = frag.getAtomList();
|
1042 |
+
for (Atom atom : atoms) {
|
1043 |
+
if (atom.getElement() == ChemEl.O && atom.getIncomingValency() == 1 && atom.getOutValency() == 0 && atom.getCharge() == 0){
|
1044 |
+
Atom adjacentAtom = atom.getAtomNeighbours().get(0);
|
1045 |
+
List<Atom> neighbours = adjacentAtom.getAtomNeighbours();
|
1046 |
+
if (adjacentAtom.getElement() == ChemEl.C){
|
1047 |
+
neighbours.remove(atom);
|
1048 |
+
if (neighbours.size() >= 1 && neighbours.get(0).getElement() == ChemEl.O && adjacentAtom.getBondToAtomOrThrow(neighbours.get(0)).getOrder()==2){
|
1049 |
+
continue;
|
1050 |
+
}
|
1051 |
+
if (neighbours.size() >= 2 && neighbours.get(1).getElement() == ChemEl.O && adjacentAtom.getBondToAtomOrThrow(neighbours.get(1)).getOrder()==2){
|
1052 |
+
continue;
|
1053 |
+
}
|
1054 |
+
hydroxyAtoms.add(atom);
|
1055 |
+
}
|
1056 |
+
}
|
1057 |
+
}
|
1058 |
+
return hydroxyAtoms;
|
1059 |
+
}
|
1060 |
+
|
1061 |
+
static List<Atom> findnAtomsForSubstitution(List<Atom> atomList, Atom preferredAtom, int numberOfSubstitutionsRequired, int bondOrder, boolean takeIntoAccountOutValency, boolean preserveValency) {
|
1062 |
+
int atomCount = atomList.size();
|
1063 |
+
int startingIndex = preferredAtom != null ? atomList.indexOf(preferredAtom) : 0;
|
1064 |
+
if (startingIndex < 0){
|
1065 |
+
throw new IllegalArgumentException("OPSIN Bug: preferredAtom should be part of the list of atoms to search through");
|
1066 |
+
}
|
1067 |
+
CyclicAtomList atoms = new CyclicAtomList(atomList, startingIndex - 1);//next() will retrieve the atom at the startingIndex
|
1068 |
+
List<Atom> substitutableAtoms = new ArrayList<>();
|
1069 |
+
if (atomCount == 1 && ELEMENTARYATOM_TYPE_VAL.equals(atomList.get(0).getFrag().getType())) {
|
1070 |
+
Atom atom = atomList.get(0);
|
1071 |
+
int timesAtomCanBeSubstituted = getTimesElementaryAtomCanBeSubstituted(atom);
|
1072 |
+
for (int j = 1; j <= timesAtomCanBeSubstituted; j++) {
|
1073 |
+
substitutableAtoms.add(atom);
|
1074 |
+
}
|
1075 |
+
}
|
1076 |
+
else {
|
1077 |
+
for (int i = 0; i < atomCount; i++) {//aromaticity preserved, standard valency assumed, characteristic atoms ignored
|
1078 |
+
Atom atom = atoms.next();
|
1079 |
+
if (!FragmentTools.isCharacteristicAtom(atom) || (numberOfSubstitutionsRequired == 1 && atom == preferredAtom)) {
|
1080 |
+
int currentExpectedValency = atom.determineValency(takeIntoAccountOutValency);
|
1081 |
+
int usedValency = atom.getIncomingValency() + (atom.hasSpareValency() ? 1 : 0) + (takeIntoAccountOutValency ? atom.getOutValency() : 0);
|
1082 |
+
int timesAtomCanBeSubstituted = ((currentExpectedValency - usedValency)/ bondOrder);
|
1083 |
+
for (int j = 1; j <= timesAtomCanBeSubstituted; j++) {
|
1084 |
+
substitutableAtoms.add(atom);
|
1085 |
+
}
|
1086 |
+
}
|
1087 |
+
}
|
1088 |
+
}
|
1089 |
+
if (substitutableAtoms.size() >= numberOfSubstitutionsRequired){
|
1090 |
+
return substitutableAtoms;
|
1091 |
+
}
|
1092 |
+
substitutableAtoms.clear();
|
1093 |
+
for (int i = 0; i < atomCount; i++) {//aromaticity preserved, standard valency assumed, functional suffixes ignored
|
1094 |
+
Atom atom = atoms.next();
|
1095 |
+
if (!FragmentTools.isFunctionalAtomOrAldehyde(atom) || (numberOfSubstitutionsRequired == 1 && atom == preferredAtom)) {
|
1096 |
+
int currentExpectedValency = atom.determineValency(takeIntoAccountOutValency);
|
1097 |
+
int usedValency = atom.getIncomingValency() + (atom.hasSpareValency() ? 1 : 0) + (takeIntoAccountOutValency ? atom.getOutValency() : 0);
|
1098 |
+
int timesAtomCanBeSubstituted = ((currentExpectedValency - usedValency)/ bondOrder);
|
1099 |
+
for (int j = 1; j <= timesAtomCanBeSubstituted; j++) {
|
1100 |
+
substitutableAtoms.add(atom);
|
1101 |
+
}
|
1102 |
+
}
|
1103 |
+
}
|
1104 |
+
if (substitutableAtoms.size() >= numberOfSubstitutionsRequired){
|
1105 |
+
return substitutableAtoms;
|
1106 |
+
}
|
1107 |
+
if (preserveValency) {
|
1108 |
+
return null;
|
1109 |
+
}
|
1110 |
+
substitutableAtoms.clear();
|
1111 |
+
|
1112 |
+
for (int i = 0; i < atomCount; i++) {//aromaticity preserved, any sensible valency allowed, anything substitutable
|
1113 |
+
Atom atom = atoms.next();
|
1114 |
+
Integer maximumValency = ValencyChecker.getMaximumValency(atom);
|
1115 |
+
if (maximumValency != null) {
|
1116 |
+
int usedValency = atom.getIncomingValency() + (atom.hasSpareValency() ? 1 : 0) + (takeIntoAccountOutValency ? atom.getOutValency() : 0);
|
1117 |
+
int timesAtomCanBeSubstituted = ((maximumValency - usedValency)/ bondOrder);
|
1118 |
+
for (int j = 1; j <= timesAtomCanBeSubstituted; j++) {
|
1119 |
+
substitutableAtoms.add(atom);
|
1120 |
+
}
|
1121 |
+
}
|
1122 |
+
else{
|
1123 |
+
for (int j = 0; j < numberOfSubstitutionsRequired; j++) {
|
1124 |
+
substitutableAtoms.add(atom);
|
1125 |
+
}
|
1126 |
+
}
|
1127 |
+
}
|
1128 |
+
if (substitutableAtoms.size() >= numberOfSubstitutionsRequired){
|
1129 |
+
return substitutableAtoms;
|
1130 |
+
}
|
1131 |
+
substitutableAtoms.clear();
|
1132 |
+
|
1133 |
+
for (int i = 0; i < atomCount; i++) {//aromaticity dropped, any sensible valency allowed, anything substitutable
|
1134 |
+
Atom atom = atoms.next();
|
1135 |
+
Integer maximumValency = ValencyChecker.getMaximumValency(atom);
|
1136 |
+
if (maximumValency != null) {
|
1137 |
+
int usedValency = atom.getIncomingValency() + (takeIntoAccountOutValency ? atom.getOutValency() : 0);
|
1138 |
+
int timesAtomCanBeSubstituted = ((maximumValency - usedValency)/ bondOrder);
|
1139 |
+
for (int j = 1; j <= timesAtomCanBeSubstituted; j++) {
|
1140 |
+
substitutableAtoms.add(atom);
|
1141 |
+
}
|
1142 |
+
}
|
1143 |
+
else {
|
1144 |
+
for (int j = 0; j < numberOfSubstitutionsRequired; j++) {
|
1145 |
+
substitutableAtoms.add(atom);
|
1146 |
+
}
|
1147 |
+
}
|
1148 |
+
}
|
1149 |
+
if (substitutableAtoms.size() >= numberOfSubstitutionsRequired){
|
1150 |
+
return substitutableAtoms;
|
1151 |
+
}
|
1152 |
+
return null;
|
1153 |
+
}
|
1154 |
+
|
1155 |
+
private static int getTimesElementaryAtomCanBeSubstituted(Atom atom) {
|
1156 |
+
Integer oxidationNumber = atom.getProperty(Atom.OXIDATION_NUMBER);//explicitly set oxidation state
|
1157 |
+
if (oxidationNumber == null) {
|
1158 |
+
String oxidationStates = atom.getFrag().getTokenEl().getAttributeValue(COMMONOXIDATIONSTATESANDMAX_ATR);//properties of this element
|
1159 |
+
if (oxidationStates != null) {
|
1160 |
+
String[] commonOxidationStates = oxidationStates.split(":")[0].split(",");
|
1161 |
+
//highest common oxidation state
|
1162 |
+
oxidationNumber = Integer.parseInt(commonOxidationStates[commonOxidationStates.length - 1]);
|
1163 |
+
}
|
1164 |
+
else {
|
1165 |
+
oxidationNumber = 0;
|
1166 |
+
}
|
1167 |
+
}
|
1168 |
+
|
1169 |
+
int usedValency = atom.getIncomingValency();
|
1170 |
+
return (oxidationNumber > usedValency) ? oxidationNumber - usedValency : 0;
|
1171 |
+
}
|
1172 |
+
|
1173 |
+
static List<Atom> findnAtomsForSubstitution(List<Atom> atomList, Atom preferredAtom, int numberOfSubstitutionsRequired, int bondOrder, boolean takeIntoAccountOutValency) {
|
1174 |
+
return findnAtomsForSubstitution(atomList, preferredAtom, numberOfSubstitutionsRequired, bondOrder, takeIntoAccountOutValency, false);
|
1175 |
+
}
|
1176 |
+
|
1177 |
+
static List<Atom> findnAtomsForSubstitution(Fragment frag, Atom preferredAtom, int numberOfSubstitutionsRequired, int bondOrder, boolean takeIntoAccountOutValency) {
|
1178 |
+
return findnAtomsForSubstitution(frag.getAtomList(), preferredAtom, numberOfSubstitutionsRequired, bondOrder, takeIntoAccountOutValency);
|
1179 |
+
}
|
1180 |
+
|
1181 |
+
/**
|
1182 |
+
* Returns a list of atoms of size >= numberOfSubstitutionsDesired (or null if this not possible)
|
1183 |
+
* An atom must have have sufficient valency to support a substituent requiring a bond of order bondOrder
|
1184 |
+
* If an atom can support multiple substituents it will appear in the list multiple times
|
1185 |
+
* This method iterates over the the fragment atoms attempting to fulfil these requirements with incrementally more lenient constraints:
|
1186 |
+
* aromaticity preserved, standard valency assumed, characteristic atoms ignored
|
1187 |
+
* aromaticity preserved, standard valency assumed, functional suffixes ignored
|
1188 |
+
* aromaticity preserved, any sensible valency allowed, anything substitutable
|
1189 |
+
* aromaticity dropped, any sensible valency allowed, anything substitutable
|
1190 |
+
*
|
1191 |
+
* Iteration starts from the defaultInAtom (if applicable, else the first atom) i.e. the defaultInAtom if substitutable will be the first atom in the list
|
1192 |
+
* @param frag
|
1193 |
+
* @param numberOfSubstitutionsRequired
|
1194 |
+
* @param bondOrder
|
1195 |
+
* @return
|
1196 |
+
*/
|
1197 |
+
static List<Atom> findnAtomsForSubstitution(Fragment frag, int numberOfSubstitutionsRequired, int bondOrder) {
|
1198 |
+
return findnAtomsForSubstitution(frag.getAtomList(), frag.getDefaultInAtom(), numberOfSubstitutionsRequired, bondOrder, true);
|
1199 |
+
}
|
1200 |
+
|
1201 |
+
/**
|
1202 |
+
* Returns a list of the most preferable atoms for substitution (empty list if none are)
|
1203 |
+
* An atom must have have sufficient valency to support a substituent requiring a bond of order bondOrder
|
1204 |
+
* If an atom can support multiple substituents it will appear in the list multiple times
|
1205 |
+
* This method iterates over the the fragment atoms attempting to fulfil these requirements with incrementally more lenient constraints:
|
1206 |
+
* aromaticity preserved, standard valency assumed, characteristic atoms ignored
|
1207 |
+
* aromaticity preserved, standard valency assumed, functional suffixes ignored
|
1208 |
+
* aromaticity preserved, any sensible valency allowed, anything substitutable
|
1209 |
+
* aromaticity dropped, any sensible valency allowed, anything substitutable
|
1210 |
+
*
|
1211 |
+
* Iteration starts from the defaultInAtom (if applicable, else the first atom) i.e. the defaultInAtom if substitutable will be the first atom in the list
|
1212 |
+
* @param frag
|
1213 |
+
* @param bondOrder
|
1214 |
+
* @return
|
1215 |
+
*/
|
1216 |
+
static List<Atom> findSubstituableAtoms(Fragment frag, int bondOrder) {
|
1217 |
+
List<Atom> potentialAtoms = findnAtomsForSubstitution(frag, 1, bondOrder);
|
1218 |
+
if (potentialAtoms == null) {
|
1219 |
+
return Collections.emptyList();
|
1220 |
+
}
|
1221 |
+
return potentialAtoms;
|
1222 |
+
}
|
1223 |
+
|
1224 |
+
static Atom lastNonSuffixCarbonWithSufficientValency(Fragment conjunctiveFragment) {
|
1225 |
+
List<Atom> atomList = conjunctiveFragment.getAtomList();
|
1226 |
+
for (int i = atomList.size()-1; i >=0; i--) {
|
1227 |
+
Atom a = atomList.get(i);
|
1228 |
+
if (a.getType().equals(SUFFIX_TYPE_VAL)){
|
1229 |
+
continue;
|
1230 |
+
}
|
1231 |
+
if (a.getElement() != ChemEl.C){
|
1232 |
+
continue;
|
1233 |
+
}
|
1234 |
+
if (ValencyChecker.checkValencyAvailableForBond(a, 1)){
|
1235 |
+
return a;
|
1236 |
+
}
|
1237 |
+
}
|
1238 |
+
return null;
|
1239 |
+
}
|
1240 |
+
|
1241 |
+
|
1242 |
+
}
|
TransAntivirus/download_pubchem/opsin-master/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/FunctionalAtom.java
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
package uk.ac.cam.ch.wwmm.opsin;
|
2 |
+
|
3 |
+
/**
|
4 |
+
* Struct for a FunctionalAtom. As expected holds the atom.
|
5 |
+
* This is used to indicate, for example, that this atom may form an ester
|
6 |
+
*
|
7 |
+
* @author dl387
|
8 |
+
*
|
9 |
+
*/
|
10 |
+
class FunctionalAtom {
|
11 |
+
private final Atom atom;
|
12 |
+
|
13 |
+
FunctionalAtom(Atom atom) {
|
14 |
+
this.atom = atom;
|
15 |
+
}
|
16 |
+
|
17 |
+
Atom getAtom() {
|
18 |
+
return atom;
|
19 |
+
}
|
20 |
+
}
|
TransAntivirus/download_pubchem/opsin-master/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/FunctionalReplacement.java
ADDED
@@ -0,0 +1,1176 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
package uk.ac.cam.ch.wwmm.opsin;
|
2 |
+
|
3 |
+
import static uk.ac.cam.ch.wwmm.opsin.XmlDeclarations.*;
|
4 |
+
|
5 |
+
import java.util.ArrayList;
|
6 |
+
import java.util.Collections;
|
7 |
+
import java.util.Comparator;
|
8 |
+
import java.util.Iterator;
|
9 |
+
import java.util.LinkedHashSet;
|
10 |
+
import java.util.LinkedList;
|
11 |
+
import java.util.List;
|
12 |
+
import java.util.Set;
|
13 |
+
import java.util.regex.Pattern;
|
14 |
+
|
15 |
+
/**
|
16 |
+
* Methods for performing functional replacement
|
17 |
+
* @author dl387
|
18 |
+
*
|
19 |
+
*/
|
20 |
+
class FunctionalReplacement {
|
21 |
+
|
22 |
+
/**
|
23 |
+
* Sorts infix transformations by the number of acceptable inputs for the transformation.
|
24 |
+
* e.g. thio ends up towards the end of the list as it accepts both -O or =O whilst say imido only accepts =O
|
25 |
+
* @author dl387
|
26 |
+
*
|
27 |
+
*/
|
28 |
+
private static class SortInfixTransformations implements Comparator<String> {
|
29 |
+
public int compare(String infixTransformation1, String infixTransformation2) {
|
30 |
+
int allowedInputs1 = infixTransformation1.split(",").length;
|
31 |
+
int allowedInputs2 = infixTransformation2.split(",").length;
|
32 |
+
if (allowedInputs1 < allowedInputs2){//infixTransformation1 preferred
|
33 |
+
return -1;
|
34 |
+
}
|
35 |
+
if (allowedInputs1 > allowedInputs2){//infixTransformation2 preferred
|
36 |
+
return 1;
|
37 |
+
}
|
38 |
+
else{
|
39 |
+
return 0;
|
40 |
+
}
|
41 |
+
}
|
42 |
+
}
|
43 |
+
private static enum PREFIX_REPLACEMENT_TYPE{
|
44 |
+
chalcogen,//ambiguous
|
45 |
+
halideOrPseudoHalide,//only mean functional replacement when applied to non carboxylic acids
|
46 |
+
dedicatedFunctionalReplacementPrefix,//no ambiguity exists
|
47 |
+
hydrazono,//ambiguous, only applies to non carboxylic acid
|
48 |
+
peroxy//ambiguous, also applies to etheric oxygen
|
49 |
+
}
|
50 |
+
|
51 |
+
static final Pattern matchChalcogenReplacement= Pattern.compile("thio|seleno|telluro");
|
52 |
+
|
53 |
+
private final BuildState state;
|
54 |
+
|
55 |
+
FunctionalReplacement(BuildState state) {
|
56 |
+
this.state = state;
|
57 |
+
}
|
58 |
+
|
59 |
+
/**
|
60 |
+
* Applies the effects of acid replacing functional class nomenclature
|
61 |
+
* This must be performed early so that prefix/infix functional replacement is performed correctly
|
62 |
+
* and so that element symbol locants are assigned appropriately
|
63 |
+
* @param finalSubOrRootInWord
|
64 |
+
* @param word
|
65 |
+
* @throws ComponentGenerationException
|
66 |
+
* @throws StructureBuildingException
|
67 |
+
*/
|
68 |
+
void processAcidReplacingFunctionalClassNomenclature(Element finalSubOrRootInWord, Element word) throws ComponentGenerationException, StructureBuildingException {
|
69 |
+
Element wordRule = OpsinTools.getParentWordRule(word);
|
70 |
+
if (WordRule.valueOf(wordRule.getAttributeValue(WORDRULE_ATR)) == WordRule.acidReplacingFunctionalGroup){
|
71 |
+
Element parentWordRule = word.getParent();
|
72 |
+
if (parentWordRule.indexOf(word)==0){
|
73 |
+
for (int i = 1, l = parentWordRule.getChildCount(); i < l ; i++) {
|
74 |
+
Element acidReplacingWord = parentWordRule.getChild(i);
|
75 |
+
if (!acidReplacingWord.getName().equals(WORD_EL)) {
|
76 |
+
throw new RuntimeException("OPSIN bug: problem with acidReplacingFunctionalGroup word rule");
|
77 |
+
}
|
78 |
+
String type = acidReplacingWord.getAttributeValue(TYPE_ATR);
|
79 |
+
if (type.equals(WordType.full.toString())) {
|
80 |
+
//case where functionalTerm is substituted
|
81 |
+
//as words are processed from right to left in cases like phosphoric acid tri(ethylamide) this will be phosphoric acid ethylamide ethylamide ethylamide
|
82 |
+
processAcidReplacingFunctionalClassNomenclatureFullWord(finalSubOrRootInWord, acidReplacingWord);
|
83 |
+
}
|
84 |
+
else if (type.equals(WordType.functionalTerm.toString())) {
|
85 |
+
processAcidReplacingFunctionalClassNomenclatureFunctionalWord(finalSubOrRootInWord, acidReplacingWord);
|
86 |
+
}
|
87 |
+
else {
|
88 |
+
throw new RuntimeException("OPSIN bug: problem with acidReplacingFunctionalGroup word rule");
|
89 |
+
}
|
90 |
+
}
|
91 |
+
}
|
92 |
+
}
|
93 |
+
}
|
94 |
+
|
95 |
+
/**
|
96 |
+
* Performs prefix functional replacement e.g. thio in thioacetic acid replaces an O with S
|
97 |
+
* Prefixes will present themselves as substituents. There is potential ambiguity between usage as a substituent
|
98 |
+
* and as a functional replacement term in some cases. If the substituent is deemed to indicate functional replacement
|
99 |
+
* it will be detached and its effects applied to the subsequent group
|
100 |
+
*
|
101 |
+
* The list of groups and substituents given to this method will be mutated in the process.
|
102 |
+
*
|
103 |
+
* For heterocyclic rings functional replacement should technically be limited to :
|
104 |
+
* pyran, morpholine, chromene, isochromene and xanthene, chromane and isochromane.
|
105 |
+
* but this is not currently enforced
|
106 |
+
* @param groups
|
107 |
+
* @param substituents
|
108 |
+
* @return boolean: has any functional replacement occurred
|
109 |
+
* @throws StructureBuildingException
|
110 |
+
* @throws ComponentGenerationException
|
111 |
+
*/
|
112 |
+
boolean processPrefixFunctionalReplacementNomenclature(List<Element> groups, List<Element> substituents) throws StructureBuildingException, ComponentGenerationException {
|
113 |
+
int originalNumberOfGroups = groups.size();
|
114 |
+
for (int i = originalNumberOfGroups-1; i >=0; i--) {
|
115 |
+
Element group =groups.get(i);
|
116 |
+
String groupValue = group.getValue();
|
117 |
+
PREFIX_REPLACEMENT_TYPE replacementType = null;
|
118 |
+
if (matchChalcogenReplacement.matcher(groupValue).matches() && !isChalcogenSubstituent(group) || groupValue.equals("thiono")){
|
119 |
+
replacementType =PREFIX_REPLACEMENT_TYPE.chalcogen;
|
120 |
+
}
|
121 |
+
else if (HALIDEORPSEUDOHALIDE_SUBTYPE_VAL.equals(group.getAttributeValue(SUBTYPE_ATR))){
|
122 |
+
replacementType =PREFIX_REPLACEMENT_TYPE.halideOrPseudoHalide;
|
123 |
+
}
|
124 |
+
else if (DEDICATEDFUNCTIONALREPLACEMENTPREFIX_SUBTYPE_VAL.equals(group.getAttributeValue(SUBTYPE_ATR))){
|
125 |
+
replacementType =PREFIX_REPLACEMENT_TYPE.dedicatedFunctionalReplacementPrefix;
|
126 |
+
}
|
127 |
+
else if (groupValue.equals("hydrazono")){
|
128 |
+
replacementType =PREFIX_REPLACEMENT_TYPE.hydrazono;
|
129 |
+
}
|
130 |
+
else if (groupValue.equals("peroxy")){
|
131 |
+
replacementType =PREFIX_REPLACEMENT_TYPE.peroxy;
|
132 |
+
}
|
133 |
+
if (replacementType != null) {
|
134 |
+
//need to check whether this is an instance of functional replacement by checking the substituent/root it is applying to
|
135 |
+
Element substituent = group.getParent();
|
136 |
+
Element nextSubOrBracket = OpsinTools.getNextSibling(substituent);
|
137 |
+
if (nextSubOrBracket!=null && (nextSubOrBracket.getName().equals(ROOT_EL) || nextSubOrBracket.getName().equals(SUBSTITUENT_EL))){
|
138 |
+
Element groupToBeModified = nextSubOrBracket.getFirstChildElement(GROUP_EL);
|
139 |
+
if (groupPrecededByElementThatBlocksPrefixReplacementInterpetation(groupToBeModified)) {
|
140 |
+
if (replacementType == PREFIX_REPLACEMENT_TYPE.dedicatedFunctionalReplacementPrefix){
|
141 |
+
throw new ComponentGenerationException("dedicated Functional Replacement Prefix used in an inappropriate position :" + groupValue);
|
142 |
+
}
|
143 |
+
continue;//not 2,2'-thiodipyran
|
144 |
+
}
|
145 |
+
Element locantEl = null;//null unless a locant that agrees with the multiplier is present
|
146 |
+
Element multiplierEl = null;
|
147 |
+
int numberOfAtomsToReplace = 1;//the number of atoms to be functionally replaced, modified by a multiplier e.g. dithio
|
148 |
+
Element possibleMultiplier = OpsinTools.getPreviousSibling(group);
|
149 |
+
if (possibleMultiplier != null) {
|
150 |
+
Element possibleLocant;
|
151 |
+
if (possibleMultiplier.getName().equals(MULTIPLIER_EL)) {
|
152 |
+
numberOfAtomsToReplace = Integer.valueOf(possibleMultiplier.getAttributeValue(VALUE_ATR));
|
153 |
+
possibleLocant = OpsinTools.getPreviousSibling(possibleMultiplier);
|
154 |
+
multiplierEl = possibleMultiplier;
|
155 |
+
}
|
156 |
+
else{
|
157 |
+
possibleLocant = possibleMultiplier;
|
158 |
+
}
|
159 |
+
if (possibleLocant !=null && possibleLocant.getName().equals(LOCANT_EL) && possibleLocant.getAttribute(TYPE_ATR) == null) {
|
160 |
+
int numberOfLocants = possibleLocant.getValue().split(",").length;
|
161 |
+
if (numberOfLocants == numberOfAtomsToReplace){//locants and number of replacements agree
|
162 |
+
locantEl = possibleLocant;
|
163 |
+
}
|
164 |
+
else if (numberOfAtomsToReplace > 1) {//doesn't look like prefix functional replacement
|
165 |
+
if (replacementType == PREFIX_REPLACEMENT_TYPE.dedicatedFunctionalReplacementPrefix){
|
166 |
+
throw new ComponentGenerationException("dedicated Functional Replacement Prefix used in an inappropriate position :" + groupValue);
|
167 |
+
}
|
168 |
+
continue;
|
169 |
+
}
|
170 |
+
}
|
171 |
+
}
|
172 |
+
|
173 |
+
int oxygenReplaced;
|
174 |
+
if (replacementType == PREFIX_REPLACEMENT_TYPE.chalcogen) {
|
175 |
+
oxygenReplaced = performChalcogenFunctionalReplacement(groupToBeModified, locantEl, numberOfAtomsToReplace, group.getAttributeValue(VALUE_ATR));
|
176 |
+
}
|
177 |
+
else if (replacementType == PREFIX_REPLACEMENT_TYPE.peroxy) {
|
178 |
+
if (nextSubOrBracket.getName().equals(SUBSTITUENT_EL)) {
|
179 |
+
continue;
|
180 |
+
}
|
181 |
+
oxygenReplaced = performPeroxyFunctionalReplacement(groupToBeModified, locantEl, numberOfAtomsToReplace);
|
182 |
+
}
|
183 |
+
else if (replacementType == PREFIX_REPLACEMENT_TYPE.dedicatedFunctionalReplacementPrefix){
|
184 |
+
if (!groupToBeModified.getAttributeValue(TYPE_ATR).equals(NONCARBOXYLICACID_TYPE_VAL)
|
185 |
+
&& !(groupToBeModified.getValue().equals("form") && groupValue.equals("imido"))){
|
186 |
+
throw new ComponentGenerationException("dedicated Functional Replacement Prefix used in an inappropriate position :" + groupValue);
|
187 |
+
}
|
188 |
+
oxygenReplaced = performFunctionalReplacementOnAcid(groupToBeModified, locantEl, numberOfAtomsToReplace, group.getAttributeValue(VALUE_ATR));
|
189 |
+
if (oxygenReplaced==0){
|
190 |
+
throw new ComponentGenerationException("dedicated Functional Replacement Prefix used in an inappropriate position :" + groupValue);
|
191 |
+
}
|
192 |
+
}
|
193 |
+
else if (replacementType == PREFIX_REPLACEMENT_TYPE.hydrazono || replacementType == PREFIX_REPLACEMENT_TYPE.halideOrPseudoHalide){
|
194 |
+
Fragment acidFrag = groupToBeModified.getFrag();
|
195 |
+
if (!groupToBeModified.getAttributeValue(TYPE_ATR).equals(NONCARBOXYLICACID_TYPE_VAL) ||
|
196 |
+
acidHasSufficientHydrogenForSubstitutionInterpretation(acidFrag, group.getFrag().getOutAtom(0).getValency(), locantEl)){
|
197 |
+
//hydrazono replacement only applies to non carboxylic acids e.g. hydrazonooxalic acid
|
198 |
+
//need to be careful to note that something like chlorophosphonic acid isn't functional replacement
|
199 |
+
continue;
|
200 |
+
}
|
201 |
+
oxygenReplaced = performFunctionalReplacementOnAcid(groupToBeModified, locantEl, numberOfAtomsToReplace, group.getAttributeValue(VALUE_ATR));
|
202 |
+
}
|
203 |
+
else{
|
204 |
+
throw new StructureBuildingException("OPSIN bug: Unexpected prefix replacement type");
|
205 |
+
}
|
206 |
+
if (oxygenReplaced>0){
|
207 |
+
state.fragManager.removeFragment(group.getFrag());
|
208 |
+
substituent.removeChild(group);
|
209 |
+
groups.remove(group);
|
210 |
+
List<Element> remainingChildren =substituent.getChildElements();//there may be a locant that should be moved
|
211 |
+
for (int j = remainingChildren.size()-1; j>=0; j--){
|
212 |
+
Element child =substituent.getChild(j);
|
213 |
+
child.detach();
|
214 |
+
nextSubOrBracket.insertChild(child, 0);
|
215 |
+
}
|
216 |
+
substituents.remove(substituent);
|
217 |
+
substituent.detach();
|
218 |
+
if (oxygenReplaced>1){
|
219 |
+
multiplierEl.detach();
|
220 |
+
}
|
221 |
+
}
|
222 |
+
}
|
223 |
+
else if (replacementType == PREFIX_REPLACEMENT_TYPE.dedicatedFunctionalReplacementPrefix){
|
224 |
+
throw new ComponentGenerationException("dedicated Functional Replacement Prefix used in an inappropriate position :" + groupValue);
|
225 |
+
}
|
226 |
+
}
|
227 |
+
}
|
228 |
+
return groups.size() != originalNumberOfGroups;
|
229 |
+
}
|
230 |
+
|
231 |
+
private boolean isChalcogenSubstituent(Element group) {
|
232 |
+
//Is this group followed by a hyphen and directly preceded by a substituent i.e. no multiplier/locant
|
233 |
+
//e.g. methylthio-
|
234 |
+
Element next = OpsinTools.getNextSibling(group);
|
235 |
+
if (next != null && next.getName().equals(HYPHEN_EL) &&
|
236 |
+
OpsinTools.getPreviousSibling(group) == null) {
|
237 |
+
Element previousGroup = OpsinTools.getPreviousGroup(group);
|
238 |
+
if (previousGroup != null) {
|
239 |
+
//TODO We actually want to know if a carbon atom is the attachment point... but we don't know the attachment point locations at this point
|
240 |
+
Element suffix = OpsinTools.getNextSibling(previousGroup, SUFFIX_EL);
|
241 |
+
if (suffix == null || suffix.getFrag() == null) {
|
242 |
+
for (Atom a : previousGroup.getFrag()) {
|
243 |
+
if (a.getElement() == ChemEl.C) {
|
244 |
+
return true;
|
245 |
+
}
|
246 |
+
}
|
247 |
+
}
|
248 |
+
}
|
249 |
+
}
|
250 |
+
return false;
|
251 |
+
}
|
252 |
+
|
253 |
+
/**
|
254 |
+
* Currently prefix replacement terms must be directly adjacent to the groupToBeModified with an exception made
|
255 |
+
* for carbohydrate stereochemistry prefixes e.g. 'gluco' and for substractive prefixes e.g. 'deoxy'
|
256 |
+
* @param groupToBeModified
|
257 |
+
* @return
|
258 |
+
*/
|
259 |
+
private boolean groupPrecededByElementThatBlocksPrefixReplacementInterpetation(Element groupToBeModified) {
|
260 |
+
Element previous = OpsinTools.getPreviousSibling(groupToBeModified);
|
261 |
+
while (previous !=null && (previous.getName().equals(SUBTRACTIVEPREFIX_EL)
|
262 |
+
|| (previous.getName().equals(STEREOCHEMISTRY_EL) && previous.getAttributeValue(TYPE_ATR).equals(CARBOHYDRATECONFIGURATIONPREFIX_TYPE_VAL)))){
|
263 |
+
previous = OpsinTools.getPreviousSibling(previous);
|
264 |
+
}
|
265 |
+
return previous != null;
|
266 |
+
}
|
267 |
+
|
268 |
+
|
269 |
+
/*
|
270 |
+
*
|
271 |
+
*/
|
272 |
+
|
273 |
+
/**
|
274 |
+
* Performs functional replacement using infixes e.g. thio in ethanthioic acid replaces an O with S
|
275 |
+
* @param suffixFragments May be modified if a multiplier is determined to mean multiplication of a suffix, usually untouched
|
276 |
+
* @param suffixes The suffix elements May be modified if a multiplier is determined to mean multiplication of a suffix, usually untouched
|
277 |
+
* @throws StructureBuildingException
|
278 |
+
* @throws ComponentGenerationException
|
279 |
+
*/
|
280 |
+
void processInfixFunctionalReplacementNomenclature(List<Element> suffixes, List<Fragment> suffixFragments) throws StructureBuildingException, ComponentGenerationException {
|
281 |
+
for (int i = 0; i < suffixes.size(); i++) {
|
282 |
+
Element suffix = suffixes.get(i);
|
283 |
+
if (suffix.getAttribute(INFIX_ATR) != null){
|
284 |
+
Fragment fragToApplyInfixTo = suffix.getFrag();
|
285 |
+
Element possibleAcidGroup = OpsinTools.getPreviousSiblingIgnoringCertainElements(suffix, new String[]{MULTIPLIER_EL, INFIX_EL, SUFFIX_EL});
|
286 |
+
if (possibleAcidGroup !=null && possibleAcidGroup.getName().equals(GROUP_EL) &&
|
287 |
+
(possibleAcidGroup.getAttributeValue(TYPE_ATR).equals(NONCARBOXYLICACID_TYPE_VAL)|| possibleAcidGroup.getAttributeValue(TYPE_ATR).equals(CHALCOGENACIDSTEM_TYPE_VAL))){
|
288 |
+
fragToApplyInfixTo = possibleAcidGroup.getFrag();
|
289 |
+
}
|
290 |
+
if (fragToApplyInfixTo ==null){
|
291 |
+
throw new ComponentGenerationException("infix has erroneously been assigned to a suffix which does not correspond to a suffix fragment. suffix: " + suffix.getValue());
|
292 |
+
}
|
293 |
+
//e.g. =O:S,-O:S (which indicates replacing either a double or single bonded oxygen with S)
|
294 |
+
//This is semicolon delimited for each infix
|
295 |
+
List<String> infixTransformations = StringTools.arrayToList(suffix.getAttributeValue(INFIX_ATR).split(";"));
|
296 |
+
|
297 |
+
List<Atom> atomList =fragToApplyInfixTo.getAtomList();
|
298 |
+
LinkedList<Atom> singleBondedOxygen = new LinkedList<>();
|
299 |
+
LinkedList<Atom> doubleBondedOxygen = new LinkedList<>();
|
300 |
+
populateTerminalSingleAndDoubleBondedOxygen(atomList, singleBondedOxygen, doubleBondedOxygen);
|
301 |
+
int oxygenAvailable = singleBondedOxygen.size() +doubleBondedOxygen.size();
|
302 |
+
|
303 |
+
/*
|
304 |
+
* Modifies suffixes, suffixFragments, suffix and infixTransformations as appropriate
|
305 |
+
*/
|
306 |
+
disambiguateMultipliedInfixMeaning(suffixes, suffixFragments, suffix, infixTransformations, oxygenAvailable);
|
307 |
+
|
308 |
+
/*
|
309 |
+
* Sort infixTransformations so more specific transformations are performed first
|
310 |
+
* e.g. ethanthioimidic acid-->ethanimidthioic acid as imid can only apply to the double bonded oxygen
|
311 |
+
*/
|
312 |
+
Collections.sort(infixTransformations, new SortInfixTransformations());
|
313 |
+
|
314 |
+
for (String infixTransformation : infixTransformations) {
|
315 |
+
String[] transformationArray = infixTransformation.split(":");
|
316 |
+
if (transformationArray.length !=2){
|
317 |
+
throw new StructureBuildingException("Atom to be replaced and replacement not specified correctly in infix: " + infixTransformation);
|
318 |
+
}
|
319 |
+
String[] transformations = transformationArray[0].split(",");
|
320 |
+
String replacementSMILES = transformationArray[1];
|
321 |
+
boolean acceptDoubleBondedOxygen = false;
|
322 |
+
boolean acceptSingleBondedOxygen = false;
|
323 |
+
boolean nitrido =false;
|
324 |
+
for (String transformation : transformations) {
|
325 |
+
if (transformation.startsWith("=")){
|
326 |
+
acceptDoubleBondedOxygen = true;
|
327 |
+
}
|
328 |
+
else if (transformation.startsWith("-")){
|
329 |
+
acceptSingleBondedOxygen = true;
|
330 |
+
}
|
331 |
+
else if (transformation.startsWith("#")){
|
332 |
+
nitrido =true;
|
333 |
+
}
|
334 |
+
else{
|
335 |
+
throw new StructureBuildingException("Malformed infix transformation. Expected to start with either - or =. Transformation was: " +transformation);
|
336 |
+
}
|
337 |
+
if (transformation.length()<2 || transformation.charAt(1)!='O'){
|
338 |
+
throw new StructureBuildingException("Only replacement by oxygen is supported. Check infix defintions");
|
339 |
+
}
|
340 |
+
}
|
341 |
+
boolean infixAssignmentAmbiguous =false;
|
342 |
+
if ((acceptSingleBondedOxygen ||nitrido) && !acceptDoubleBondedOxygen){
|
343 |
+
if (singleBondedOxygen.size() ==0){
|
344 |
+
throw new StructureBuildingException("Cannot find single bonded oxygen for infix with SMILES: "+ replacementSMILES+ " to modify!");
|
345 |
+
}
|
346 |
+
if (singleBondedOxygen.size() !=1){
|
347 |
+
infixAssignmentAmbiguous=true;
|
348 |
+
}
|
349 |
+
}
|
350 |
+
if (!acceptSingleBondedOxygen && (acceptDoubleBondedOxygen || nitrido)){
|
351 |
+
if (doubleBondedOxygen.size()==0){
|
352 |
+
throw new StructureBuildingException("Cannot find double bonded oxygen for infix with SMILES: "+ replacementSMILES+ " to modify!");
|
353 |
+
}
|
354 |
+
if (doubleBondedOxygen.size() != 1){
|
355 |
+
infixAssignmentAmbiguous=true;
|
356 |
+
}
|
357 |
+
}
|
358 |
+
if (acceptSingleBondedOxygen && acceptDoubleBondedOxygen){
|
359 |
+
if (oxygenAvailable ==0){
|
360 |
+
throw new StructureBuildingException("Cannot find oxygen for infix with SMILES: "+ replacementSMILES+ " to modify!");
|
361 |
+
}
|
362 |
+
if (oxygenAvailable !=1){
|
363 |
+
infixAssignmentAmbiguous=true;
|
364 |
+
}
|
365 |
+
}
|
366 |
+
|
367 |
+
Set<Atom> ambiguousElementAtoms = new LinkedHashSet<>();
|
368 |
+
Atom atomToUse = null;
|
369 |
+
if ((acceptDoubleBondedOxygen || nitrido) && doubleBondedOxygen.size()>0 ){
|
370 |
+
atomToUse = doubleBondedOxygen.removeFirst();
|
371 |
+
}
|
372 |
+
else if (acceptSingleBondedOxygen && singleBondedOxygen.size()>0 ){
|
373 |
+
atomToUse = singleBondedOxygen.removeFirst();
|
374 |
+
}
|
375 |
+
else{
|
376 |
+
throw new StructureBuildingException("Cannot find oxygen for infix with SMILES: "+ replacementSMILES+ " to modify!");//this would be a bug
|
377 |
+
}
|
378 |
+
Fragment replacementFrag = state.fragManager.buildSMILES(replacementSMILES, SUFFIX_TYPE_VAL, NONE_LABELS_VAL);
|
379 |
+
if (replacementFrag.getOutAtomCount()>0){//SMILES include an indication of the bond order the replacement fragment will have, this is not intended to be an outatom
|
380 |
+
replacementFrag.removeOutAtom(0);
|
381 |
+
}
|
382 |
+
Atom atomThatWillReplaceOxygen =replacementFrag.getFirstAtom();
|
383 |
+
if (replacementFrag.getAtomCount()==1 && atomThatWillReplaceOxygen.getElement().isChalcogen()){
|
384 |
+
atomThatWillReplaceOxygen.setCharge(atomToUse.getCharge());
|
385 |
+
atomThatWillReplaceOxygen.setProtonsExplicitlyAddedOrRemoved(atomToUse.getProtonsExplicitlyAddedOrRemoved());
|
386 |
+
}
|
387 |
+
removeOrMoveObsoleteFunctionalAtoms(atomToUse, replacementFrag);//also will move charge if necessary
|
388 |
+
moveObsoleteOutAtoms(atomToUse, replacementFrag);//if the replaced atom was an outatom the fragments outatom list need to be corrected
|
389 |
+
if (nitrido){
|
390 |
+
atomToUse.getFirstBond().setOrder(3);
|
391 |
+
Atom removedHydroxy = singleBondedOxygen.removeFirst();
|
392 |
+
state.fragManager.removeAtomAndAssociatedBonds(removedHydroxy);
|
393 |
+
removeAssociatedFunctionalAtom(removedHydroxy);
|
394 |
+
}
|
395 |
+
state.fragManager.incorporateFragment(replacementFrag, atomToUse.getFrag());
|
396 |
+
state.fragManager.replaceAtomWithAnotherAtomPreservingConnectivity(atomToUse, atomThatWillReplaceOxygen);
|
397 |
+
if (infixAssignmentAmbiguous){
|
398 |
+
ambiguousElementAtoms.add(atomThatWillReplaceOxygen);
|
399 |
+
if (atomThatWillReplaceOxygen.getProperty(Atom.AMBIGUOUS_ELEMENT_ASSIGNMENT)!=null){
|
400 |
+
ambiguousElementAtoms.addAll(atomThatWillReplaceOxygen.getProperty(Atom.AMBIGUOUS_ELEMENT_ASSIGNMENT));
|
401 |
+
}
|
402 |
+
}
|
403 |
+
if (infixAssignmentAmbiguous){//record what atoms could have been replaced. Often this ambiguity is resolved later e.g. S-methyl ethanthioate
|
404 |
+
for (Atom a : doubleBondedOxygen) {
|
405 |
+
ambiguousElementAtoms.add(a);
|
406 |
+
if (a.getProperty(Atom.AMBIGUOUS_ELEMENT_ASSIGNMENT)!=null){
|
407 |
+
ambiguousElementAtoms.addAll(a.getProperty(Atom.AMBIGUOUS_ELEMENT_ASSIGNMENT));
|
408 |
+
}
|
409 |
+
}
|
410 |
+
for (Atom a : singleBondedOxygen) {
|
411 |
+
ambiguousElementAtoms.add(a);
|
412 |
+
if (a.getProperty(Atom.AMBIGUOUS_ELEMENT_ASSIGNMENT)!=null){
|
413 |
+
ambiguousElementAtoms.addAll(a.getProperty(Atom.AMBIGUOUS_ELEMENT_ASSIGNMENT));
|
414 |
+
}
|
415 |
+
}
|
416 |
+
for (Atom atom : ambiguousElementAtoms) {
|
417 |
+
atom.setProperty(Atom.AMBIGUOUS_ELEMENT_ASSIGNMENT, ambiguousElementAtoms);
|
418 |
+
}
|
419 |
+
}
|
420 |
+
}
|
421 |
+
}
|
422 |
+
}
|
423 |
+
}
|
424 |
+
|
425 |
+
/*
|
426 |
+
* Functional class nomenclature
|
427 |
+
*/
|
428 |
+
|
429 |
+
/**
|
430 |
+
* Replaces the appropriate number of functional oxygen atoms with the corresponding fragment
|
431 |
+
* @param acidContainingRoot
|
432 |
+
* @param acidReplacingWord
|
433 |
+
* @throws ComponentGenerationException
|
434 |
+
* @throws StructureBuildingException
|
435 |
+
*/
|
436 |
+
private void processAcidReplacingFunctionalClassNomenclatureFullWord(Element acidContainingRoot, Element acidReplacingWord) throws ComponentGenerationException, StructureBuildingException {
|
437 |
+
String locant = acidReplacingWord.getAttributeValue(LOCANT_ATR);
|
438 |
+
Element acidReplacingGroup = StructureBuildingMethods.findRightMostGroupInBracket(acidReplacingWord);
|
439 |
+
if (acidReplacingGroup ==null){
|
440 |
+
throw new ComponentGenerationException("OPSIN bug: acid replacing group not found where one was expected for acidReplacingFunctionalGroup wordRule");
|
441 |
+
}
|
442 |
+
String functionalGroupName = acidReplacingGroup.getValue();
|
443 |
+
Fragment acidReplacingFrag = acidReplacingGroup.getFrag();
|
444 |
+
if (acidReplacingGroup.getParent().getChildCount() != 1){
|
445 |
+
throw new ComponentGenerationException("Unexpected qualifier to: " + functionalGroupName);
|
446 |
+
}
|
447 |
+
|
448 |
+
Element groupToBeModified = acidContainingRoot.getFirstChildElement(GROUP_EL);
|
449 |
+
List<Atom> oxygenAtoms = findFunctionalOxygenAtomsInApplicableSuffixes(groupToBeModified);
|
450 |
+
if (oxygenAtoms.size() == 0){
|
451 |
+
oxygenAtoms = findFunctionalOxygenAtomsInGroup(groupToBeModified);
|
452 |
+
}
|
453 |
+
if (oxygenAtoms.size() == 0){
|
454 |
+
List<Element> conjunctiveSuffixElements =OpsinTools.getNextSiblingsOfType(groupToBeModified, CONJUNCTIVESUFFIXGROUP_EL);
|
455 |
+
for (Element conjunctiveSuffixElement : conjunctiveSuffixElements) {
|
456 |
+
oxygenAtoms.addAll(findFunctionalOxygenAtomsInGroup(conjunctiveSuffixElement));
|
457 |
+
}
|
458 |
+
}
|
459 |
+
if (oxygenAtoms.size() < 1){
|
460 |
+
throw new ComponentGenerationException("Insufficient oxygen to replace with " + functionalGroupName +"s in " + acidContainingRoot.getFirstChildElement(GROUP_EL).getValue());
|
461 |
+
}
|
462 |
+
|
463 |
+
boolean isAmide = functionalGroupName.equals("amide") || functionalGroupName.equals("amid");
|
464 |
+
if (isAmide) {
|
465 |
+
if (acidReplacingFrag.getAtomCount()!=1){
|
466 |
+
throw new ComponentGenerationException("OPSIN bug: " + functionalGroupName + " not found where expected");
|
467 |
+
}
|
468 |
+
Atom amideNitrogen = acidReplacingFrag.getFirstAtom();
|
469 |
+
amideNitrogen.neutraliseCharge();
|
470 |
+
amideNitrogen.clearLocants();
|
471 |
+
acidReplacingFrag.addMappingToAtomLocantMap("N", amideNitrogen);
|
472 |
+
}
|
473 |
+
Atom chosenOxygen = locant != null ? removeOxygenWithAppropriateLocant(oxygenAtoms, locant) : oxygenAtoms.get(0);
|
474 |
+
state.fragManager.replaceAtomWithAnotherAtomPreservingConnectivity(chosenOxygen, acidReplacingFrag.getFirstAtom());
|
475 |
+
removeAssociatedFunctionalAtom(chosenOxygen);
|
476 |
+
}
|
477 |
+
|
478 |
+
|
479 |
+
/**
|
480 |
+
* Replaces the appropriate number of functional oxygen atoms with the corresponding fragment
|
481 |
+
* @param acidContainingRoot
|
482 |
+
* @param functionalWord
|
483 |
+
* @throws ComponentGenerationException
|
484 |
+
* @throws StructureBuildingException
|
485 |
+
*/
|
486 |
+
private void processAcidReplacingFunctionalClassNomenclatureFunctionalWord(Element acidContainingRoot, Element functionalWord) throws ComponentGenerationException, StructureBuildingException {
|
487 |
+
if (functionalWord !=null && functionalWord.getAttributeValue(TYPE_ATR).equals(WordType.functionalTerm.toString())){
|
488 |
+
Element functionalTerm = functionalWord.getFirstChildElement(FUNCTIONALTERM_EL);
|
489 |
+
if (functionalTerm ==null){
|
490 |
+
throw new ComponentGenerationException("OPSIN bug: functionalTerm word not found where one was expected for acidReplacingFunctionalGroup wordRule");
|
491 |
+
}
|
492 |
+
Element acidReplacingGroup = functionalTerm.getFirstChildElement(FUNCTIONALGROUP_EL);
|
493 |
+
String functionalGroupName = acidReplacingGroup.getValue();
|
494 |
+
Element possibleLocantOrMultiplier = OpsinTools.getPreviousSibling(acidReplacingGroup);
|
495 |
+
int numberOfAcidicHydroxysToReplace = 1;
|
496 |
+
String[] locants = null;
|
497 |
+
if (possibleLocantOrMultiplier != null){
|
498 |
+
if (possibleLocantOrMultiplier.getName().equals(MULTIPLIER_EL)){
|
499 |
+
numberOfAcidicHydroxysToReplace = Integer.parseInt(possibleLocantOrMultiplier.getAttributeValue(VALUE_ATR));
|
500 |
+
possibleLocantOrMultiplier.detach();
|
501 |
+
possibleLocantOrMultiplier = OpsinTools.getPreviousSibling(acidReplacingGroup);
|
502 |
+
}
|
503 |
+
if (possibleLocantOrMultiplier != null){
|
504 |
+
if (possibleLocantOrMultiplier.getName().equals(LOCANT_EL)){
|
505 |
+
locants = StringTools.removeDashIfPresent(possibleLocantOrMultiplier.getValue()).split(",");
|
506 |
+
possibleLocantOrMultiplier.detach();
|
507 |
+
}
|
508 |
+
else {
|
509 |
+
throw new ComponentGenerationException("Unexpected qualifier to acidReplacingFunctionalGroup functionalTerm");
|
510 |
+
}
|
511 |
+
}
|
512 |
+
}
|
513 |
+
if (functionalTerm.getChildCount() != 1){
|
514 |
+
throw new ComponentGenerationException("Unexpected qualifier to acidReplacingFunctionalGroup functionalTerm");
|
515 |
+
}
|
516 |
+
|
517 |
+
Element groupToBeModified = acidContainingRoot.getFirstChildElement(GROUP_EL);
|
518 |
+
List<Atom> oxygenAtoms = findFunctionalOxygenAtomsInApplicableSuffixes(groupToBeModified);
|
519 |
+
if (oxygenAtoms.size()==0) {
|
520 |
+
oxygenAtoms = findFunctionalOxygenAtomsInGroup(groupToBeModified);
|
521 |
+
}
|
522 |
+
if (oxygenAtoms.size()==0) {
|
523 |
+
List<Element> conjunctiveSuffixElements =OpsinTools.getNextSiblingsOfType(groupToBeModified, CONJUNCTIVESUFFIXGROUP_EL);
|
524 |
+
for (Element conjunctiveSuffixElement : conjunctiveSuffixElements) {
|
525 |
+
oxygenAtoms.addAll(findFunctionalOxygenAtomsInGroup(conjunctiveSuffixElement));
|
526 |
+
}
|
527 |
+
}
|
528 |
+
if (numberOfAcidicHydroxysToReplace > oxygenAtoms.size()){
|
529 |
+
throw new ComponentGenerationException("Insufficient oxygen to replace with nitrogen in " + acidContainingRoot.getFirstChildElement(GROUP_EL).getValue());
|
530 |
+
}
|
531 |
+
boolean isAmide = functionalGroupName.equals("amide") || functionalGroupName.equals("amid");
|
532 |
+
if (isAmide) {
|
533 |
+
for (int i = 0; i < numberOfAcidicHydroxysToReplace; i++) {
|
534 |
+
Atom functionalOxygenToReplace = locants != null ? removeOxygenWithAppropriateLocant(oxygenAtoms, locants[i]) : oxygenAtoms.get(i);
|
535 |
+
removeAssociatedFunctionalAtom(functionalOxygenToReplace);
|
536 |
+
functionalOxygenToReplace.setElement(ChemEl.N);
|
537 |
+
}
|
538 |
+
}
|
539 |
+
else{
|
540 |
+
String groupValue = acidReplacingGroup.getAttributeValue(VALUE_ATR);
|
541 |
+
String labelsValue = acidReplacingGroup.getAttributeValue(LABELS_ATR);
|
542 |
+
Fragment acidReplacingFrag = state.fragManager.buildSMILES(groupValue, SUFFIX_TYPE_VAL, labelsValue != null ? labelsValue : NONE_LABELS_VAL);
|
543 |
+
Fragment acidFragment = groupToBeModified.getFrag();
|
544 |
+
if (acidFragment.hasLocant("2")){//prefer numeric locants on group to those of replacing group
|
545 |
+
for (Atom atom : acidReplacingFrag.getAtomList()) {
|
546 |
+
atom.clearLocants();
|
547 |
+
}
|
548 |
+
}
|
549 |
+
Atom firstFunctionalOxygenToReplace = locants != null ? removeOxygenWithAppropriateLocant(oxygenAtoms, locants[0]) : oxygenAtoms.get(0);
|
550 |
+
state.fragManager.replaceAtomWithAnotherAtomPreservingConnectivity(firstFunctionalOxygenToReplace, acidReplacingFrag.getFirstAtom());
|
551 |
+
removeAssociatedFunctionalAtom(firstFunctionalOxygenToReplace);
|
552 |
+
for (int i = 1; i < numberOfAcidicHydroxysToReplace; i++) {
|
553 |
+
Fragment clonedHydrazide = state.fragManager.copyAndRelabelFragment(acidReplacingFrag, i);
|
554 |
+
Atom functionalOxygenToReplace = locants != null ? removeOxygenWithAppropriateLocant(oxygenAtoms, locants[i]) : oxygenAtoms.get(i);
|
555 |
+
state.fragManager.replaceAtomWithAnotherAtomPreservingConnectivity(functionalOxygenToReplace, clonedHydrazide.getFirstAtom());
|
556 |
+
state.fragManager.incorporateFragment(clonedHydrazide, functionalOxygenToReplace.getFrag());
|
557 |
+
removeAssociatedFunctionalAtom(functionalOxygenToReplace);
|
558 |
+
}
|
559 |
+
state.fragManager.incorporateFragment(acidReplacingFrag, firstFunctionalOxygenToReplace.getFrag());
|
560 |
+
}
|
561 |
+
}
|
562 |
+
else{
|
563 |
+
throw new ComponentGenerationException("amide word not found where expected, bug?");
|
564 |
+
}
|
565 |
+
}
|
566 |
+
|
567 |
+
private Atom removeOxygenWithAppropriateLocant(List<Atom> oxygenAtoms, String locant) throws ComponentGenerationException {
|
568 |
+
for (Iterator<Atom> iterator = oxygenAtoms.iterator(); iterator.hasNext();) {
|
569 |
+
Atom atom = iterator.next();
|
570 |
+
if (atom.hasLocant(locant)) {
|
571 |
+
iterator.remove();
|
572 |
+
return atom;
|
573 |
+
}
|
574 |
+
}
|
575 |
+
//Look for the case whether the locant refers to the backbone
|
576 |
+
for (Iterator<Atom> iterator = oxygenAtoms.iterator(); iterator.hasNext();) {
|
577 |
+
Atom atom = iterator.next();
|
578 |
+
if (OpsinTools.depthFirstSearchForNonSuffixAtomWithLocant(atom, locant) != null){
|
579 |
+
iterator.remove();
|
580 |
+
return atom;
|
581 |
+
}
|
582 |
+
}
|
583 |
+
throw new ComponentGenerationException("Failed to find acid group at locant: " + locant);
|
584 |
+
}
|
585 |
+
|
586 |
+
|
587 |
+
/*
|
588 |
+
* Prefix functional replacement nomenclature
|
589 |
+
*/
|
590 |
+
|
591 |
+
|
592 |
+
private boolean acidHasSufficientHydrogenForSubstitutionInterpretation(Fragment acidFrag, int hydrogenRequiredForSubstitutionInterpretation, Element locantEl) {
|
593 |
+
List<Atom> atomsThatWouldBeSubstituted = new ArrayList<>();
|
594 |
+
if (locantEl !=null){
|
595 |
+
String[] possibleLocants = locantEl.getValue().split(",");
|
596 |
+
for (String locant : possibleLocants) {
|
597 |
+
Atom atomToBeSubstituted = acidFrag.getAtomByLocant(locant);
|
598 |
+
if (atomToBeSubstituted !=null){
|
599 |
+
atomsThatWouldBeSubstituted.add(atomToBeSubstituted);
|
600 |
+
}
|
601 |
+
else{
|
602 |
+
atomsThatWouldBeSubstituted.clear();
|
603 |
+
atomsThatWouldBeSubstituted.add(acidFrag.getDefaultInAtomOrFirstAtom());
|
604 |
+
break;
|
605 |
+
}
|
606 |
+
}
|
607 |
+
}
|
608 |
+
else{
|
609 |
+
atomsThatWouldBeSubstituted.add(acidFrag.getDefaultInAtomOrFirstAtom());
|
610 |
+
}
|
611 |
+
for (Atom atom : atomsThatWouldBeSubstituted) {
|
612 |
+
if (StructureBuildingMethods.calculateSubstitutableHydrogenAtoms(atom) < hydrogenRequiredForSubstitutionInterpretation){
|
613 |
+
return false;//insufficient hydrogens for substitution interpretation
|
614 |
+
}
|
615 |
+
}
|
616 |
+
return true;
|
617 |
+
}
|
618 |
+
|
619 |
+
/**
|
620 |
+
* Performs replacement of oxygen atoms by chalogen atoms
|
621 |
+
* If this is ambiguous e.g. thioacetate then Atom.AMBIGUOUS_ELEMENT_ASSIGNMENT is populated
|
622 |
+
* @param groupToBeModified
|
623 |
+
* @param locantEl
|
624 |
+
* @param numberOfAtomsToReplace
|
625 |
+
* @param replacementSmiles
|
626 |
+
* @return
|
627 |
+
* @throws StructureBuildingException
|
628 |
+
*/
|
629 |
+
private int performChalcogenFunctionalReplacement(Element groupToBeModified, Element locantEl, int numberOfAtomsToReplace, String replacementSmiles) throws StructureBuildingException {
|
630 |
+
List<Atom> oxygenAtoms = findOxygenAtomsInApplicableSuffixes(groupToBeModified);
|
631 |
+
if (oxygenAtoms.size() == 0) {
|
632 |
+
oxygenAtoms = findOxygenAtomsInGroup(groupToBeModified);
|
633 |
+
}
|
634 |
+
if (locantEl != null) {//locants are used to indicate replacement on trivial groups
|
635 |
+
List<Atom> oxygenWithAppropriateLocants = pickOxygensWithAppropriateLocants(locantEl, oxygenAtoms);
|
636 |
+
if(oxygenWithAppropriateLocants.size() < numberOfAtomsToReplace) {
|
637 |
+
numberOfAtomsToReplace = 1;
|
638 |
+
//e.g. -1-thioureidomethyl
|
639 |
+
}
|
640 |
+
else{
|
641 |
+
locantEl.detach();
|
642 |
+
oxygenAtoms = oxygenWithAppropriateLocants;
|
643 |
+
}
|
644 |
+
}
|
645 |
+
List<Atom> replaceableAtoms = new ArrayList<>();
|
646 |
+
if (replacementSmiles.startsWith("=")) {
|
647 |
+
//e.g. thiono
|
648 |
+
replacementSmiles = replacementSmiles.substring(1);
|
649 |
+
for (Atom oxygen : oxygenAtoms) {
|
650 |
+
int incomingValency = oxygen.getIncomingValency();
|
651 |
+
int bondCount = oxygen.getBondCount();
|
652 |
+
if (bondCount == 1 && incomingValency == 2) {
|
653 |
+
replaceableAtoms.add(oxygen);
|
654 |
+
}
|
655 |
+
}
|
656 |
+
}
|
657 |
+
else {
|
658 |
+
List<Atom> doubleBondedOxygen = new ArrayList<>();
|
659 |
+
List<Atom> singleBondedOxygen = new ArrayList<>();
|
660 |
+
List<Atom> ethericOxygen = new ArrayList<>();
|
661 |
+
for (Atom oxygen : oxygenAtoms) {
|
662 |
+
int incomingValency = oxygen.getIncomingValency();
|
663 |
+
int bondCount = oxygen.getBondCount();
|
664 |
+
if (bondCount == 1 && incomingValency ==2 ) {
|
665 |
+
doubleBondedOxygen.add(oxygen);
|
666 |
+
}
|
667 |
+
else if (bondCount == 1 && incomingValency == 1) {
|
668 |
+
singleBondedOxygen.add(oxygen);
|
669 |
+
}
|
670 |
+
else if (bondCount == 2 && incomingValency == 2) {
|
671 |
+
ethericOxygen.add(oxygen);
|
672 |
+
}
|
673 |
+
}
|
674 |
+
replaceableAtoms.addAll(doubleBondedOxygen);
|
675 |
+
replaceableAtoms.addAll(singleBondedOxygen);
|
676 |
+
replaceableAtoms.addAll(ethericOxygen);
|
677 |
+
}
|
678 |
+
|
679 |
+
int totalOxygen = replaceableAtoms.size();
|
680 |
+
if (numberOfAtomsToReplace >1){
|
681 |
+
if (totalOxygen < numberOfAtomsToReplace){
|
682 |
+
numberOfAtomsToReplace=1;
|
683 |
+
}
|
684 |
+
}
|
685 |
+
|
686 |
+
int atomsReplaced =0;
|
687 |
+
if (totalOxygen >=numberOfAtomsToReplace){//check that there atleast as many oxygens as requested replacements
|
688 |
+
boolean prefixAssignmentAmbiguous =false;
|
689 |
+
Set<Atom> ambiguousElementAtoms = new LinkedHashSet<>();
|
690 |
+
if (totalOxygen != numberOfAtomsToReplace){
|
691 |
+
prefixAssignmentAmbiguous=true;
|
692 |
+
}
|
693 |
+
|
694 |
+
for (Atom atomToReplace : replaceableAtoms) {
|
695 |
+
if (atomsReplaced == numberOfAtomsToReplace){
|
696 |
+
ambiguousElementAtoms.add(atomToReplace);
|
697 |
+
continue;
|
698 |
+
}
|
699 |
+
else{
|
700 |
+
state.fragManager.replaceAtomWithSmiles(atomToReplace, replacementSmiles);
|
701 |
+
if (prefixAssignmentAmbiguous){
|
702 |
+
ambiguousElementAtoms.add(atomToReplace);
|
703 |
+
}
|
704 |
+
}
|
705 |
+
atomsReplaced++;
|
706 |
+
}
|
707 |
+
|
708 |
+
if (prefixAssignmentAmbiguous){//record what atoms could have been replaced. Often this ambiguity is resolved later e.g. S-methyl thioacetate
|
709 |
+
for (Atom atom : ambiguousElementAtoms) {
|
710 |
+
atom.setProperty(Atom.AMBIGUOUS_ELEMENT_ASSIGNMENT, ambiguousElementAtoms);
|
711 |
+
}
|
712 |
+
}
|
713 |
+
}
|
714 |
+
return atomsReplaced;
|
715 |
+
}
|
716 |
+
|
717 |
+
|
718 |
+
/**
|
719 |
+
* Converts functional oxygen to peroxy e.g. peroxybenzoic acid
|
720 |
+
* Returns the number of oxygen replaced
|
721 |
+
* @param groupToBeModified
|
722 |
+
* @param locantEl
|
723 |
+
* @param numberOfAtomsToReplace
|
724 |
+
* @return
|
725 |
+
* @throws StructureBuildingException
|
726 |
+
*/
|
727 |
+
private int performPeroxyFunctionalReplacement(Element groupToBeModified, Element locantEl, int numberOfAtomsToReplace) throws StructureBuildingException {
|
728 |
+
List<Atom> oxygenAtoms = findFunctionalOxygenAtomsInApplicableSuffixes(groupToBeModified);
|
729 |
+
if (oxygenAtoms.size()==0){
|
730 |
+
oxygenAtoms = findEthericOxygenAtomsInGroup(groupToBeModified);
|
731 |
+
oxygenAtoms.addAll(findFunctionalOxygenAtomsInGroup(groupToBeModified));
|
732 |
+
}
|
733 |
+
if (locantEl !=null){
|
734 |
+
List<Atom> oxygenWithAppropriateLocants = pickOxygensWithAppropriateLocants(locantEl, oxygenAtoms);
|
735 |
+
if(oxygenWithAppropriateLocants.size() < numberOfAtomsToReplace){
|
736 |
+
numberOfAtomsToReplace =1;
|
737 |
+
}
|
738 |
+
else{
|
739 |
+
locantEl.detach();
|
740 |
+
oxygenAtoms = oxygenWithAppropriateLocants;
|
741 |
+
}
|
742 |
+
}
|
743 |
+
if (numberOfAtomsToReplace >1 && oxygenAtoms.size() < numberOfAtomsToReplace){
|
744 |
+
numberOfAtomsToReplace=1;
|
745 |
+
}
|
746 |
+
int atomsReplaced = 0;
|
747 |
+
if (oxygenAtoms.size() >=numberOfAtomsToReplace){//check that there atleast as many oxygens as requested replacements
|
748 |
+
atomsReplaced = numberOfAtomsToReplace;
|
749 |
+
for (int j = 0; j < numberOfAtomsToReplace; j++) {
|
750 |
+
Atom oxygenToReplace = oxygenAtoms.get(j);
|
751 |
+
if (oxygenToReplace.getBondCount()==2){//etheric oxygen
|
752 |
+
Fragment newOxygen = state.fragManager.buildSMILES("O", SUFFIX_TYPE_VAL, NONE_LABELS_VAL);
|
753 |
+
Bond bondToRemove = oxygenToReplace.getFirstBond();
|
754 |
+
Atom atomToAttachTo = bondToRemove.getFromAtom() == oxygenToReplace ? bondToRemove.getToAtom() : bondToRemove.getFromAtom();
|
755 |
+
state.fragManager.createBond(atomToAttachTo, newOxygen.getFirstAtom(), 1);
|
756 |
+
state.fragManager.createBond(newOxygen.getFirstAtom(), oxygenToReplace, 1);
|
757 |
+
state.fragManager.removeBond(bondToRemove);
|
758 |
+
state.fragManager.incorporateFragment(newOxygen, groupToBeModified.getFrag());
|
759 |
+
}
|
760 |
+
else{
|
761 |
+
Fragment replacementFrag = state.fragManager.buildSMILES("OO", SUFFIX_TYPE_VAL, NONE_LABELS_VAL);
|
762 |
+
removeOrMoveObsoleteFunctionalAtoms(oxygenToReplace, replacementFrag);
|
763 |
+
state.fragManager.replaceAtomWithAnotherAtomPreservingConnectivity(oxygenToReplace, replacementFrag.getFirstAtom());
|
764 |
+
state.fragManager.incorporateFragment(replacementFrag, groupToBeModified.getFrag());
|
765 |
+
}
|
766 |
+
}
|
767 |
+
}
|
768 |
+
return atomsReplaced;
|
769 |
+
}
|
770 |
+
|
771 |
+
/**
|
772 |
+
* Replaces double bonded oxygen and/or single bonded oxygen depending on the input SMILES
|
773 |
+
* SMILES with a valency 1 outAtom replace -O, SMILES with a valency 2 outAtom replace =O
|
774 |
+
* SMILES with a valency 3 outAtom replace -O and =O (nitrido)
|
775 |
+
* Returns the number of oxygen replaced
|
776 |
+
* @param groupToBeModified
|
777 |
+
* @param locantEl
|
778 |
+
* @param numberOfAtomsToReplace
|
779 |
+
* @param replacementSmiles
|
780 |
+
* @return
|
781 |
+
* @throws StructureBuildingException
|
782 |
+
*/
|
783 |
+
private int performFunctionalReplacementOnAcid(Element groupToBeModified, Element locantEl, int numberOfAtomsToReplace, String replacementSmiles) throws StructureBuildingException {
|
784 |
+
int outValency;
|
785 |
+
if (replacementSmiles.startsWith("-")){
|
786 |
+
outValency =1;
|
787 |
+
}
|
788 |
+
else if (replacementSmiles.startsWith("=")){
|
789 |
+
outValency =2;
|
790 |
+
}
|
791 |
+
else if (replacementSmiles.startsWith("#")){
|
792 |
+
outValency =3;
|
793 |
+
}
|
794 |
+
else{
|
795 |
+
throw new StructureBuildingException("OPSIN bug: Unexpected valency on fragment for prefix functional replacement");
|
796 |
+
}
|
797 |
+
replacementSmiles = replacementSmiles.substring(1);
|
798 |
+
List<Atom> oxygenAtoms = findOxygenAtomsInApplicableSuffixes(groupToBeModified);
|
799 |
+
if (oxygenAtoms.size()==0){
|
800 |
+
oxygenAtoms = findOxygenAtomsInGroup(groupToBeModified);
|
801 |
+
}
|
802 |
+
if (locantEl !=null){//locants are used to indicate replacement on trivial groups
|
803 |
+
List<Atom> oxygenWithAppropriateLocants = pickOxygensWithAppropriateLocants(locantEl, oxygenAtoms);
|
804 |
+
List<Atom> singleBondedOxygen = new ArrayList<>();
|
805 |
+
List<Atom> terminalDoubleBondedOxygen = new ArrayList<>();
|
806 |
+
populateTerminalSingleAndDoubleBondedOxygen(oxygenWithAppropriateLocants, singleBondedOxygen, terminalDoubleBondedOxygen);
|
807 |
+
if (outValency ==1){
|
808 |
+
oxygenWithAppropriateLocants.removeAll(terminalDoubleBondedOxygen);
|
809 |
+
}
|
810 |
+
else if (outValency ==2){
|
811 |
+
oxygenWithAppropriateLocants.removeAll(singleBondedOxygen);
|
812 |
+
}
|
813 |
+
if(oxygenWithAppropriateLocants.size() < numberOfAtomsToReplace){
|
814 |
+
numberOfAtomsToReplace =1;
|
815 |
+
//e.g. -1-thioureidomethyl
|
816 |
+
}
|
817 |
+
else{
|
818 |
+
locantEl.detach();
|
819 |
+
oxygenAtoms = oxygenWithAppropriateLocants;
|
820 |
+
}
|
821 |
+
}
|
822 |
+
List<Atom> singleBondedOxygen = new ArrayList<>();
|
823 |
+
List<Atom> terminalDoubleBondedOxygen = new ArrayList<>();
|
824 |
+
populateTerminalSingleAndDoubleBondedOxygen(oxygenAtoms, singleBondedOxygen, terminalDoubleBondedOxygen);
|
825 |
+
if (outValency ==1){
|
826 |
+
oxygenAtoms.removeAll(terminalDoubleBondedOxygen);
|
827 |
+
}
|
828 |
+
else if (outValency ==2){
|
829 |
+
oxygenAtoms.removeAll(singleBondedOxygen);
|
830 |
+
//favour bridging oxygen over double bonded oxygen c.f. imidodicarbonate
|
831 |
+
oxygenAtoms.removeAll(terminalDoubleBondedOxygen);
|
832 |
+
oxygenAtoms.addAll(terminalDoubleBondedOxygen);
|
833 |
+
}
|
834 |
+
else {
|
835 |
+
if (singleBondedOxygen.size()==0 || terminalDoubleBondedOxygen.size()==0){
|
836 |
+
throw new StructureBuildingException("Both a -OH and =O are required for nitrido prefix functional replacement");
|
837 |
+
}
|
838 |
+
oxygenAtoms.removeAll(singleBondedOxygen);
|
839 |
+
}
|
840 |
+
if (numberOfAtomsToReplace >1 && oxygenAtoms.size() < numberOfAtomsToReplace){
|
841 |
+
numberOfAtomsToReplace=1;
|
842 |
+
}
|
843 |
+
|
844 |
+
int atomsReplaced =0;
|
845 |
+
if (oxygenAtoms.size() >=numberOfAtomsToReplace){//check that there atleast as many oxygens as requested replacements
|
846 |
+
for (Atom atomToReplace : oxygenAtoms) {
|
847 |
+
if (atomsReplaced == numberOfAtomsToReplace){
|
848 |
+
continue;
|
849 |
+
}
|
850 |
+
else{
|
851 |
+
Fragment replacementFrag = state.fragManager.buildSMILES(replacementSmiles, atomToReplace.getFrag().getTokenEl(), NONE_LABELS_VAL);
|
852 |
+
if (outValency ==3){//special case for nitrido
|
853 |
+
atomToReplace.getFirstBond().setOrder(3);
|
854 |
+
Atom removedHydroxy = singleBondedOxygen.remove(0);
|
855 |
+
state.fragManager.removeAtomAndAssociatedBonds(removedHydroxy);
|
856 |
+
removeAssociatedFunctionalAtom(removedHydroxy);
|
857 |
+
}
|
858 |
+
state.fragManager.replaceAtomWithAnotherAtomPreservingConnectivity(atomToReplace, replacementFrag.getFirstAtom());
|
859 |
+
if (outValency ==1){
|
860 |
+
removeOrMoveObsoleteFunctionalAtoms(atomToReplace, replacementFrag);
|
861 |
+
}
|
862 |
+
moveObsoleteOutAtoms(atomToReplace, replacementFrag);
|
863 |
+
state.fragManager.incorporateFragment(replacementFrag, atomToReplace.getFrag());
|
864 |
+
}
|
865 |
+
atomsReplaced++;
|
866 |
+
}
|
867 |
+
}
|
868 |
+
return atomsReplaced;
|
869 |
+
}
|
870 |
+
|
871 |
+
/*
|
872 |
+
* Infix functional replacement nomenclature
|
873 |
+
*/
|
874 |
+
|
875 |
+
/**
|
876 |
+
* This block handles infix multiplication. Unless brackets are provided this is ambiguous without knowledge of the suffix that is being modified
|
877 |
+
* For example butandithione could be intepreted as butandi(thione) or butan(dithi)one.
|
878 |
+
* Obviously the latter is wrong in this case but it is the correct interpretation for butandithiate
|
879 |
+
* @param suffixes
|
880 |
+
* @param suffixFragments
|
881 |
+
* @param suffix
|
882 |
+
* @param infixTransformations
|
883 |
+
* @param oxygenAvailable
|
884 |
+
* @throws ComponentGenerationException
|
885 |
+
* @throws StructureBuildingException
|
886 |
+
*/
|
887 |
+
private void disambiguateMultipliedInfixMeaning(List<Element> suffixes,
|
888 |
+
List<Fragment> suffixFragments,Element suffix, List<String> infixTransformations, int oxygenAvailable)
|
889 |
+
throws ComponentGenerationException, StructureBuildingException {
|
890 |
+
Element possibleInfix = OpsinTools.getPreviousSibling(suffix);
|
891 |
+
if (possibleInfix.getName().equals(INFIX_EL)){//the infix is only left when there was ambiguity
|
892 |
+
Element possibleMultiplier = OpsinTools.getPreviousSibling(possibleInfix);
|
893 |
+
if (possibleMultiplier.getName().equals(MULTIPLIER_EL)){
|
894 |
+
int multiplierValue =Integer.parseInt(possibleMultiplier.getAttributeValue(VALUE_ATR));
|
895 |
+
if (infixTransformations.size() + multiplierValue-1 <=oxygenAvailable){//multiplier means multiply the infix e.g. butandithiate
|
896 |
+
for (int j = 1; j < multiplierValue; j++) {
|
897 |
+
infixTransformations.add(0, infixTransformations.get(0));
|
898 |
+
}
|
899 |
+
}
|
900 |
+
else{
|
901 |
+
Element possibleLocant = OpsinTools.getPreviousSibling(possibleMultiplier);
|
902 |
+
String[] locants = null;
|
903 |
+
if (possibleLocant.getName().equals(LOCANT_EL)) {
|
904 |
+
locants = possibleLocant.getValue().split(",");
|
905 |
+
}
|
906 |
+
if (locants !=null){
|
907 |
+
if (locants.length!=multiplierValue){
|
908 |
+
throw new ComponentGenerationException("Multiplier/locant disagreement when multiplying infixed suffix");
|
909 |
+
}
|
910 |
+
suffix.addAttribute(new Attribute(LOCANT_ATR, locants[0]));
|
911 |
+
}
|
912 |
+
suffix.addAttribute(new Attribute(MULTIPLIED_ATR, "multiplied"));
|
913 |
+
for (int j = 1; j < multiplierValue; j++) {//multiplier means multiply the infixed suffix e.g. butandithione
|
914 |
+
Element newSuffix = suffix.copy();
|
915 |
+
Fragment newSuffixFrag = state.fragManager.copyFragment(suffix.getFrag());
|
916 |
+
newSuffix.setFrag(newSuffixFrag);
|
917 |
+
suffixFragments.add(newSuffixFrag);
|
918 |
+
OpsinTools.insertAfter(suffix, newSuffix);
|
919 |
+
suffixes.add(newSuffix);
|
920 |
+
if (locants !=null){//assign locants if available
|
921 |
+
newSuffix.getAttribute(LOCANT_ATR).setValue(locants[j]);
|
922 |
+
}
|
923 |
+
}
|
924 |
+
if (locants!=null){
|
925 |
+
possibleLocant.detach();
|
926 |
+
}
|
927 |
+
}
|
928 |
+
possibleMultiplier.detach();
|
929 |
+
possibleInfix.detach();
|
930 |
+
}
|
931 |
+
else{
|
932 |
+
throw new ComponentGenerationException("Multiplier expected in front of ambiguous infix");
|
933 |
+
}
|
934 |
+
}
|
935 |
+
}
|
936 |
+
|
937 |
+
/*
|
938 |
+
* Convenience Methods
|
939 |
+
*/
|
940 |
+
|
941 |
+
/**
|
942 |
+
* Given an atom that is to be replaced by a functional replacement fragment
|
943 |
+
* determines whether this atom is a functional atom and, if it is, performs the following processes:
|
944 |
+
* The functionalAtom is removed. If the the replacement fragment is an atom of O/S/Se/Te or the
|
945 |
+
* the terminal atom of the fragment is a single bonded O/S/Se/Te a functionAom is added to this atom.
|
946 |
+
* @param atomToBeReplaced
|
947 |
+
* @param replacementFrag
|
948 |
+
*/
|
949 |
+
private void removeOrMoveObsoleteFunctionalAtoms(Atom atomToBeReplaced, Fragment replacementFrag){
|
950 |
+
List<Atom> replacementAtomList = replacementFrag.getAtomList();
|
951 |
+
Fragment origFrag = atomToBeReplaced.getFrag();
|
952 |
+
for (int i = origFrag.getFunctionalAtomCount() - 1; i >=0; i--) {
|
953 |
+
FunctionalAtom functionalAtom = origFrag.getFunctionalAtom(i);
|
954 |
+
if (atomToBeReplaced.equals(functionalAtom.getAtom())){
|
955 |
+
atomToBeReplaced.getFrag().removeFunctionalAtom(i);
|
956 |
+
Atom terminalAtomOfReplacementFrag = replacementAtomList.get(replacementAtomList.size()-1);
|
957 |
+
if ((terminalAtomOfReplacementFrag.getIncomingValency() ==1 || replacementAtomList.size()==1)&& terminalAtomOfReplacementFrag.getElement().isChalcogen()){
|
958 |
+
replacementFrag.addFunctionalAtom(terminalAtomOfReplacementFrag);
|
959 |
+
terminalAtomOfReplacementFrag.setCharge(atomToBeReplaced.getCharge());
|
960 |
+
terminalAtomOfReplacementFrag.setProtonsExplicitlyAddedOrRemoved(atomToBeReplaced.getProtonsExplicitlyAddedOrRemoved());
|
961 |
+
}
|
962 |
+
atomToBeReplaced.neutraliseCharge();
|
963 |
+
}
|
964 |
+
}
|
965 |
+
}
|
966 |
+
|
967 |
+
/**
|
968 |
+
* Given an atom that is to be replaced by a functional replacement fragment
|
969 |
+
* determines whether this atom has outvalency and if it does removes the outatom from the atom's fragment
|
970 |
+
* and adds an outatom to the replacementFrag
|
971 |
+
* @param atomToBeReplaced
|
972 |
+
* @param replacementFrag
|
973 |
+
*/
|
974 |
+
private void moveObsoleteOutAtoms(Atom atomToBeReplaced, Fragment replacementFrag){
|
975 |
+
if (atomToBeReplaced.getOutValency() >0){//this is not known to occur in well formed IUPAC names but would occur in thioxy (as a suffix)
|
976 |
+
List<Atom> replacementAtomList = replacementFrag.getAtomList();
|
977 |
+
Fragment origFrag = atomToBeReplaced.getFrag();
|
978 |
+
for (int i = origFrag.getOutAtomCount() - 1; i >=0; i--) {
|
979 |
+
OutAtom outAtom = origFrag.getOutAtom(i);
|
980 |
+
if (atomToBeReplaced.equals(outAtom.getAtom())){
|
981 |
+
atomToBeReplaced.getFrag().removeOutAtom(i);
|
982 |
+
Atom terminalAtomOfReplacementFrag = replacementAtomList.get(replacementAtomList.size()-1);
|
983 |
+
replacementFrag.addOutAtom(terminalAtomOfReplacementFrag, outAtom.getValency(), outAtom.isSetExplicitly());
|
984 |
+
}
|
985 |
+
}
|
986 |
+
}
|
987 |
+
}
|
988 |
+
|
989 |
+
private void removeAssociatedFunctionalAtom(Atom atomWithFunctionalAtom) throws StructureBuildingException {
|
990 |
+
Fragment frag = atomWithFunctionalAtom.getFrag();
|
991 |
+
for (int i = frag.getFunctionalAtomCount() - 1; i >=0; i--) {
|
992 |
+
FunctionalAtom functionalAtom = frag.getFunctionalAtom(i);
|
993 |
+
if (atomWithFunctionalAtom.equals(functionalAtom.getAtom())){
|
994 |
+
atomWithFunctionalAtom.getFrag().removeFunctionalAtom(i);
|
995 |
+
return;
|
996 |
+
}
|
997 |
+
}
|
998 |
+
throw new StructureBuildingException("OPSIN bug: Unable to find associated functionalAtom");
|
999 |
+
}
|
1000 |
+
|
1001 |
+
|
1002 |
+
/**
|
1003 |
+
* Returns the subset of oxygenAtoms that possess one of the locants in locantEl
|
1004 |
+
* Searches for locant on nearest non suffix atom in case of suffixes
|
1005 |
+
* @param locantEl
|
1006 |
+
* @param oxygenAtoms
|
1007 |
+
* @return
|
1008 |
+
*/
|
1009 |
+
private List<Atom> pickOxygensWithAppropriateLocants(Element locantEl, List<Atom> oxygenAtoms) {
|
1010 |
+
String[] possibleLocants = locantEl.getValue().split(",");
|
1011 |
+
boolean pLocantSpecialCase = allLocantsP(possibleLocants);
|
1012 |
+
List<Atom> oxygenWithAppropriateLocants = new ArrayList<>();
|
1013 |
+
for (Atom atom : oxygenAtoms) {
|
1014 |
+
List<String> atomlocants = atom.getLocants();
|
1015 |
+
if (atomlocants.size() > 0) {
|
1016 |
+
for (String locantVal : possibleLocants) {
|
1017 |
+
if (atomlocants.contains(locantVal)) {
|
1018 |
+
oxygenWithAppropriateLocants.add(atom);
|
1019 |
+
break;
|
1020 |
+
}
|
1021 |
+
}
|
1022 |
+
}
|
1023 |
+
else if (pLocantSpecialCase) {
|
1024 |
+
for (Atom neighbour : atom.getAtomNeighbours()) {
|
1025 |
+
if (neighbour.getElement() == ChemEl.P) {
|
1026 |
+
oxygenWithAppropriateLocants.add(atom);
|
1027 |
+
break;
|
1028 |
+
}
|
1029 |
+
}
|
1030 |
+
}
|
1031 |
+
else {
|
1032 |
+
Atom atomWithNumericLocant = OpsinTools.depthFirstSearchForAtomWithNumericLocant(atom);
|
1033 |
+
if (atomWithNumericLocant != null) {
|
1034 |
+
List<String> atomWithNumericLocantLocants = atomWithNumericLocant.getLocants();
|
1035 |
+
for (String locantVal : possibleLocants) {
|
1036 |
+
if (atomWithNumericLocantLocants.contains(locantVal)) {
|
1037 |
+
oxygenWithAppropriateLocants.add(atom);
|
1038 |
+
break;
|
1039 |
+
}
|
1040 |
+
}
|
1041 |
+
}
|
1042 |
+
}
|
1043 |
+
}
|
1044 |
+
return oxygenWithAppropriateLocants;
|
1045 |
+
}
|
1046 |
+
|
1047 |
+
private boolean allLocantsP(String[] locants) {
|
1048 |
+
if (locants.length == 0) {
|
1049 |
+
return false;
|
1050 |
+
}
|
1051 |
+
for (String locant : locants) {
|
1052 |
+
if (!locant.equals("P")) {
|
1053 |
+
return false;
|
1054 |
+
}
|
1055 |
+
}
|
1056 |
+
return true;
|
1057 |
+
}
|
1058 |
+
|
1059 |
+
/**
|
1060 |
+
* Returns oxygen atoms in suffixes with functionalAtoms
|
1061 |
+
* @param groupToBeModified
|
1062 |
+
* @return
|
1063 |
+
*/
|
1064 |
+
private List<Atom> findFunctionalOxygenAtomsInApplicableSuffixes(Element groupToBeModified) {
|
1065 |
+
List<Element> suffixElements =OpsinTools.getNextSiblingsOfType(groupToBeModified, SUFFIX_EL);
|
1066 |
+
List<Atom> oxygenAtoms = new ArrayList<>();
|
1067 |
+
for (Element suffix : suffixElements) {
|
1068 |
+
Fragment suffixFrag = suffix.getFrag();
|
1069 |
+
if (suffixFrag != null) {//null for non carboxylic acids
|
1070 |
+
for (int i = 0, l = suffixFrag.getFunctionalAtomCount(); i < l; i++) {
|
1071 |
+
Atom a = suffixFrag.getFunctionalAtom(i).getAtom();
|
1072 |
+
if (a.getElement() == ChemEl.O) {
|
1073 |
+
oxygenAtoms.add(a);
|
1074 |
+
}
|
1075 |
+
}
|
1076 |
+
}
|
1077 |
+
}
|
1078 |
+
return oxygenAtoms;
|
1079 |
+
}
|
1080 |
+
|
1081 |
+
/**
|
1082 |
+
* Returns functional oxygen atoms in groupToBeModified
|
1083 |
+
* @param groupToBeModified
|
1084 |
+
* @return
|
1085 |
+
*/
|
1086 |
+
private List<Atom> findFunctionalOxygenAtomsInGroup(Element groupToBeModified) {
|
1087 |
+
List<Atom> oxygenAtoms = new ArrayList<>();
|
1088 |
+
Fragment frag = groupToBeModified.getFrag();
|
1089 |
+
for (int i = 0, l = frag.getFunctionalAtomCount(); i < l; i++) {
|
1090 |
+
Atom a = frag.getFunctionalAtom(i).getAtom();
|
1091 |
+
if (a.getElement() == ChemEl.O){
|
1092 |
+
oxygenAtoms.add(a);
|
1093 |
+
}
|
1094 |
+
}
|
1095 |
+
return oxygenAtoms;
|
1096 |
+
}
|
1097 |
+
|
1098 |
+
|
1099 |
+
/**
|
1100 |
+
* Returns etheric oxygen atoms in groupToBeModified
|
1101 |
+
* @param groupToBeModified
|
1102 |
+
* @return
|
1103 |
+
*/
|
1104 |
+
private List<Atom> findEthericOxygenAtomsInGroup(Element groupToBeModified) {
|
1105 |
+
List<Atom> oxygenAtoms = new ArrayList<>();
|
1106 |
+
List<Atom> atomList = groupToBeModified.getFrag().getAtomList();
|
1107 |
+
for (Atom a: atomList) {
|
1108 |
+
if (a.getElement() == ChemEl.O && a.getBondCount()==2 && a.getCharge()==0 && a.getIncomingValency()==2){
|
1109 |
+
oxygenAtoms.add(a);
|
1110 |
+
}
|
1111 |
+
}
|
1112 |
+
return oxygenAtoms;
|
1113 |
+
}
|
1114 |
+
|
1115 |
+
|
1116 |
+
/**
|
1117 |
+
* Returns oxygen atoms in suffixes with functionalAtoms or acidStem suffixes or aldehyde suffixes (1979 C-531)
|
1118 |
+
* @param groupToBeModified
|
1119 |
+
* @return
|
1120 |
+
*/
|
1121 |
+
private List<Atom> findOxygenAtomsInApplicableSuffixes(Element groupToBeModified) {
|
1122 |
+
List<Element> suffixElements =OpsinTools.getNextSiblingsOfType(groupToBeModified, SUFFIX_EL);
|
1123 |
+
List<Atom> oxygenAtoms = new ArrayList<>();
|
1124 |
+
for (Element suffix : suffixElements) {
|
1125 |
+
Fragment suffixFrag = suffix.getFrag();
|
1126 |
+
if (suffixFrag != null) {//null for non carboxylic acids
|
1127 |
+
if (suffixFrag.getFunctionalAtomCount() > 0 || groupToBeModified.getAttributeValue(TYPE_ATR).equals(ACIDSTEM_TYPE_VAL) || suffix.getAttributeValue(VALUE_ATR).equals("aldehyde")) {
|
1128 |
+
List<Atom> atomList = suffixFrag.getAtomList();
|
1129 |
+
for (Atom a : atomList) {
|
1130 |
+
if (a.getElement() == ChemEl.O) {
|
1131 |
+
oxygenAtoms.add(a);
|
1132 |
+
}
|
1133 |
+
}
|
1134 |
+
}
|
1135 |
+
}
|
1136 |
+
}
|
1137 |
+
return oxygenAtoms;
|
1138 |
+
}
|
1139 |
+
|
1140 |
+
/**
|
1141 |
+
* Returns oxygen atoms in groupToBeModified
|
1142 |
+
* @param groupToBeModified
|
1143 |
+
* @return
|
1144 |
+
*/
|
1145 |
+
private List<Atom> findOxygenAtomsInGroup(Element groupToBeModified) {
|
1146 |
+
List<Atom> oxygenAtoms = new ArrayList<>();
|
1147 |
+
List<Atom> atomList = groupToBeModified.getFrag().getAtomList();
|
1148 |
+
for (Atom a : atomList) {
|
1149 |
+
if (a.getElement() == ChemEl.O){
|
1150 |
+
oxygenAtoms.add(a);
|
1151 |
+
}
|
1152 |
+
}
|
1153 |
+
return oxygenAtoms;
|
1154 |
+
}
|
1155 |
+
|
1156 |
+
|
1157 |
+
private void populateTerminalSingleAndDoubleBondedOxygen(List<Atom> atomList, List<Atom> singleBondedOxygen, List<Atom> doubleBondedOxygen) throws StructureBuildingException {
|
1158 |
+
for (Atom a : atomList) {
|
1159 |
+
if (a.getElement() == ChemEl.O){//find terminal oxygens
|
1160 |
+
if (a.getBondCount()==1){
|
1161 |
+
int incomingValency = a.getIncomingValency();
|
1162 |
+
if (incomingValency ==2){
|
1163 |
+
doubleBondedOxygen.add(a);
|
1164 |
+
}
|
1165 |
+
else if (incomingValency ==1){
|
1166 |
+
singleBondedOxygen.add(a);
|
1167 |
+
}
|
1168 |
+
else{
|
1169 |
+
throw new StructureBuildingException("Unexpected bond order to oxygen; excepted 1 or 2 found: " +incomingValency);
|
1170 |
+
}
|
1171 |
+
|
1172 |
+
}
|
1173 |
+
}
|
1174 |
+
}
|
1175 |
+
}
|
1176 |
+
}
|
TransAntivirus/download_pubchem/opsin-master/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/FusedRingBuilder.java
ADDED
@@ -0,0 +1,1030 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
package uk.ac.cam.ch.wwmm.opsin;
|
2 |
+
|
3 |
+
import java.util.ArrayList;
|
4 |
+
import java.util.Arrays;
|
5 |
+
import java.util.Collections;
|
6 |
+
import java.util.HashMap;
|
7 |
+
import java.util.HashSet;
|
8 |
+
import java.util.LinkedHashSet;
|
9 |
+
import java.util.List;
|
10 |
+
import java.util.Locale;
|
11 |
+
import java.util.Map;
|
12 |
+
import java.util.Set;
|
13 |
+
|
14 |
+
import static uk.ac.cam.ch.wwmm.opsin.OpsinTools.*;
|
15 |
+
import static uk.ac.cam.ch.wwmm.opsin.XmlDeclarations.*;
|
16 |
+
|
17 |
+
/**
|
18 |
+
* Assembles fused rings named using fusion nomenclature
|
19 |
+
* @author dl387
|
20 |
+
*
|
21 |
+
*/
|
22 |
+
class FusedRingBuilder {
|
23 |
+
private final BuildState state;
|
24 |
+
private final List<Element> groupsInFusedRing;
|
25 |
+
private final Element lastGroup;
|
26 |
+
private final Fragment parentRing;
|
27 |
+
private final Map<Integer,Fragment> fragmentInScopeForEachFusionLevel = new HashMap<>();
|
28 |
+
private final Map<Atom, Atom> atomsToRemoveToReplacementAtom = new HashMap<>();
|
29 |
+
|
30 |
+
private FusedRingBuilder(BuildState state, List<Element> groupsInFusedRing) {
|
31 |
+
this.state = state;
|
32 |
+
this.groupsInFusedRing = groupsInFusedRing;
|
33 |
+
lastGroup = groupsInFusedRing.get(groupsInFusedRing.size()-1);
|
34 |
+
parentRing = lastGroup.getFrag();
|
35 |
+
fragmentInScopeForEachFusionLevel.put(0, parentRing);
|
36 |
+
}
|
37 |
+
|
38 |
+
/**
|
39 |
+
* Master method for processing fused rings. Fuses groups together
|
40 |
+
* @param state: contains the current id and fragment manager
|
41 |
+
* @param subOrRoot Element (substituent or root)
|
42 |
+
* @throws StructureBuildingException
|
43 |
+
*/
|
44 |
+
static void processFusedRings(BuildState state, Element subOrRoot) throws StructureBuildingException {
|
45 |
+
List<Element> groups = subOrRoot.getChildElements(GROUP_EL);
|
46 |
+
if (groups.size() < 2){
|
47 |
+
return;//nothing to fuse
|
48 |
+
}
|
49 |
+
List<Element> groupsInFusedRing =new ArrayList<>();
|
50 |
+
for (int i = groups.size()-1; i >=0; i--) {//group groups into fused rings
|
51 |
+
Element group =groups.get(i);
|
52 |
+
groupsInFusedRing.add(0, group);
|
53 |
+
if (i!=0){
|
54 |
+
Element startingEl = group;
|
55 |
+
if ((group.getValue().equals("benz") || group.getValue().equals("benzo")) && FUSIONRING_SUBTYPE_VAL.equals(group.getAttributeValue(SUBTYPE_ATR))){
|
56 |
+
Element beforeBenzo = OpsinTools.getPreviousSibling(group);
|
57 |
+
if (beforeBenzo !=null && beforeBenzo.getName().equals(LOCANT_EL)){
|
58 |
+
startingEl = beforeBenzo;
|
59 |
+
}
|
60 |
+
}
|
61 |
+
Element possibleGroup = OpsinTools.getPreviousSiblingIgnoringCertainElements(startingEl, new String[]{MULTIPLIER_EL, FUSION_EL});
|
62 |
+
if (!groups.get(i-1).equals(possibleGroup)){//end of fused ring system
|
63 |
+
if (groupsInFusedRing.size()>=2){
|
64 |
+
//This will be invoked in cases where there are multiple fused ring systems in the same subOrRoot such as some spiro systems
|
65 |
+
new FusedRingBuilder(state, groupsInFusedRing).buildFusedRing();
|
66 |
+
}
|
67 |
+
groupsInFusedRing.clear();
|
68 |
+
}
|
69 |
+
}
|
70 |
+
}
|
71 |
+
if (groupsInFusedRing.size()>=2){
|
72 |
+
new FusedRingBuilder(state, groupsInFusedRing).buildFusedRing();
|
73 |
+
}
|
74 |
+
}
|
75 |
+
|
76 |
+
/**
|
77 |
+
* Combines the groups given in the {@link FusedRingBuilder} constructor to destructively create the fused ring system
|
78 |
+
* This fused ring is then numbered
|
79 |
+
* @throws StructureBuildingException
|
80 |
+
*/
|
81 |
+
void buildFusedRing() throws StructureBuildingException{
|
82 |
+
/*
|
83 |
+
* Apply any nonstandard ring numbering, sorts atomOrder by locant
|
84 |
+
* Aromatises appropriate cycloalkane rings, Rejects groups with acyclic atoms
|
85 |
+
*/
|
86 |
+
processRingNumberingAndIrregularities();
|
87 |
+
processBenzoFusions();//FR-2.2.8 e.g. in 2H-[1,3]benzodioxino[6',5',4':10,5,6]anthra[2,3-b]azepine benzodioxino is one component
|
88 |
+
List<Element> nameComponents = formNameComponentList();
|
89 |
+
nameComponents.remove(lastGroup);
|
90 |
+
|
91 |
+
List<Fragment> componentFragments = new ArrayList<>();//all the ring fragments (other than the parentRing). These will later be merged into the parentRing
|
92 |
+
List<Fragment> parentFragments = new ArrayList<>();
|
93 |
+
parentFragments.add(parentRing);
|
94 |
+
|
95 |
+
int numberOfParents = 1;
|
96 |
+
Element possibleMultiplier = OpsinTools.getPreviousSibling(lastGroup);
|
97 |
+
if (nameComponents.size()>0 && possibleMultiplier !=null && possibleMultiplier.getName().equals(MULTIPLIER_EL)){
|
98 |
+
numberOfParents = Integer.parseInt(possibleMultiplier.getAttributeValue(VALUE_ATR));
|
99 |
+
possibleMultiplier.detach();
|
100 |
+
for (int j = 1; j < numberOfParents; j++) {
|
101 |
+
Fragment copyOfParentRing =state.fragManager.copyFragment(parentRing);
|
102 |
+
parentFragments.add(copyOfParentRing);
|
103 |
+
componentFragments.add(copyOfParentRing);
|
104 |
+
}
|
105 |
+
}
|
106 |
+
|
107 |
+
/*The indice from nameComponents to use next. Work from right to left i.e. starts at nameComponents.size()-1*/
|
108 |
+
int ncIndice = processMultiParentSystem(parentFragments, nameComponents, componentFragments);//handle multiparent systems
|
109 |
+
/*
|
110 |
+
* The number of primes on the component to be connected.
|
111 |
+
* This is initially 0 indicating fusion of unprimed locants with the letter locants of the parentRing
|
112 |
+
* Subsequently it will switch to 1 indicating fusion of a second order component (primed locants) with a
|
113 |
+
* first order component (unprimed locants)
|
114 |
+
* Next would be double primed fusing to single primed locants etc.
|
115 |
+
*
|
116 |
+
*/
|
117 |
+
int fusionLevel = (nameComponents.size()-1 -ncIndice)/2;
|
118 |
+
for (; ncIndice>=0; ncIndice--) {
|
119 |
+
Element fusion = null;
|
120 |
+
if (nameComponents.get(ncIndice).getName().equals(FUSION_EL)){
|
121 |
+
fusion = nameComponents.get(ncIndice--);
|
122 |
+
}
|
123 |
+
if (ncIndice <0 || !nameComponents.get(ncIndice).getName().equals(GROUP_EL)){
|
124 |
+
throw new StructureBuildingException("Group not found where group expected. This is probably a bug");
|
125 |
+
}
|
126 |
+
Fragment nextComponent = nameComponents.get(ncIndice).getFrag();
|
127 |
+
int multiplier = 1;
|
128 |
+
Element possibleMultiplierEl = OpsinTools.getPreviousSibling(nameComponents.get(ncIndice));//e.g. the di of difuro
|
129 |
+
if (possibleMultiplierEl != null && possibleMultiplierEl.getName().equals(MULTIPLIER_EL)){
|
130 |
+
multiplier = Integer.parseInt(possibleMultiplierEl.getAttributeValue(VALUE_ATR));
|
131 |
+
}
|
132 |
+
String[] fusionDescriptors =null;
|
133 |
+
if (fusion !=null){
|
134 |
+
String fusionDescriptorString = fusion.getValue().toLowerCase(Locale.ROOT).substring(1, fusion.getValue().length()-1);
|
135 |
+
if (multiplier ==1){
|
136 |
+
fusionDescriptors = new String[]{fusionDescriptorString};
|
137 |
+
}
|
138 |
+
else{
|
139 |
+
if (fusionDescriptorString.split(";").length >1){
|
140 |
+
fusionDescriptors = fusionDescriptorString.split(";");
|
141 |
+
}
|
142 |
+
else if (fusionDescriptorString.split(":").length >1){
|
143 |
+
fusionDescriptors = fusionDescriptorString.split(":");
|
144 |
+
}
|
145 |
+
else if (fusionDescriptorString.split(",").length >1){
|
146 |
+
fusionDescriptors = fusionDescriptorString.split(",");
|
147 |
+
}
|
148 |
+
else{//multiplier does not appear to mean multiplied component. Could be indicating multiplication of the whole fused ring system
|
149 |
+
if (ncIndice!=0){
|
150 |
+
throw new StructureBuildingException("Unexpected multiplier: " + possibleMultiplierEl.getValue() +" or incorrect fusion descriptor: " + fusionDescriptorString);
|
151 |
+
}
|
152 |
+
multiplier =1;
|
153 |
+
fusionDescriptors = new String[]{fusionDescriptorString};
|
154 |
+
}
|
155 |
+
}
|
156 |
+
}
|
157 |
+
if (multiplier >1){
|
158 |
+
possibleMultiplierEl.detach();
|
159 |
+
}
|
160 |
+
Fragment[] fusionComponents = new Fragment[multiplier];
|
161 |
+
for (int j = 0; j < multiplier; j++) {
|
162 |
+
if (j>0){
|
163 |
+
fusionComponents[j] = state.fragManager.copyAndRelabelFragment(nextComponent, j);
|
164 |
+
}
|
165 |
+
else{
|
166 |
+
fusionComponents[j] = nextComponent;
|
167 |
+
}
|
168 |
+
}
|
169 |
+
|
170 |
+
for (int j = 0; j < multiplier; j++) {
|
171 |
+
Fragment component = fusionComponents[j];
|
172 |
+
componentFragments.add(component);
|
173 |
+
if (fusion !=null){
|
174 |
+
if (fusionDescriptors[j].split(":").length==1){//A fusion bracket without a colon is used when applying to the parent component (except in a special case where locants are ommitted)
|
175 |
+
//check for case of omitted locant from a higher order fusion bracket e.g. cyclopenta[4,5]pyrrolo[2,3-c]pyridine
|
176 |
+
if (fusionDescriptors[j].split("-").length==1 &&
|
177 |
+
fusionDescriptors[j].split(",").length >1 &&
|
178 |
+
FragmentTools.allAtomsInRingAreIdentical(component)
|
179 |
+
&& ((StringTools.countTerminalPrimes(fusionDescriptors[j].split(",")[0])) != fusionLevel) ){//Could be like cyclopenta[3,4]cyclobuta[1,2]benzene where the first fusion to occur has parent locants omitted not child locants
|
180 |
+
int numberOfPrimes = StringTools.countTerminalPrimes(fusionDescriptors[j].split(",")[0]);
|
181 |
+
//note that this is the number of primes on the parent ring. So would expect the child ring and hence the fusionLevel to be 1 higher
|
182 |
+
if (numberOfPrimes + 1 != fusionLevel){
|
183 |
+
if (numberOfPrimes + 2 == fusionLevel){//ring could be in previous fusion level e.g. the benzo in benzo[10,11]phenanthro[2',3',4',5',6':4,5,6,7]chryseno[1,2,3-bc]coronene
|
184 |
+
fusionLevel--;
|
185 |
+
}
|
186 |
+
else{
|
187 |
+
throw new StructureBuildingException("Incorrect number of primes in fusion bracket: " +fusionDescriptors[j]);
|
188 |
+
}
|
189 |
+
}
|
190 |
+
relabelAccordingToFusionLevel(component, fusionLevel);
|
191 |
+
List<String> numericalLocantsOfParent = Arrays.asList(fusionDescriptors[j].split(","));
|
192 |
+
List<String> numericalLocantsOfChild = findPossibleNumericalLocants(component, determineAtomsToFuse(fragmentInScopeForEachFusionLevel.get(fusionLevel), numericalLocantsOfParent, null).size()-1);
|
193 |
+
processHigherOrderFusionDescriptors(component, fragmentInScopeForEachFusionLevel.get(fusionLevel), numericalLocantsOfChild, numericalLocantsOfParent);
|
194 |
+
}
|
195 |
+
else{
|
196 |
+
fusionLevel = 0;
|
197 |
+
relabelAccordingToFusionLevel(component, fusionLevel);
|
198 |
+
String fusionDescriptor = fusionDescriptors[j];
|
199 |
+
String[] fusionArray = determineNumericalAndLetterComponents(fusionDescriptor);
|
200 |
+
int numberOfPrimes =0;
|
201 |
+
if (!fusionArray[1].equals("")){
|
202 |
+
numberOfPrimes =StringTools.countTerminalPrimes(fusionArray[1]);
|
203 |
+
if (fusionArray[0].equals("")){
|
204 |
+
fusionDescriptor = fusionArray[1].replaceAll("'", "");
|
205 |
+
}
|
206 |
+
else{
|
207 |
+
fusionDescriptor = fusionArray[0]+ "-" +fusionArray[1].replaceAll("'", "");
|
208 |
+
}
|
209 |
+
if (numberOfPrimes >= parentFragments.size()){
|
210 |
+
throw new StructureBuildingException("Unexpected prime in fusion descriptor");
|
211 |
+
}
|
212 |
+
}
|
213 |
+
performSimpleFusion(fusionDescriptor, component, parentFragments.get(numberOfPrimes));//e.g. pyrano[3,2-b]imidazo[4,5-e]pyridine where both are level 0 fusions
|
214 |
+
}
|
215 |
+
}
|
216 |
+
else{
|
217 |
+
//determine number of primes in fusor and hence determine fusion level
|
218 |
+
int numberOfPrimes = -j + StringTools.countTerminalPrimes(fusionDescriptors[j].split(",")[0]);
|
219 |
+
if (numberOfPrimes != fusionLevel){
|
220 |
+
if (fusionLevel == numberOfPrimes +1){
|
221 |
+
fusionLevel--;
|
222 |
+
}
|
223 |
+
else{
|
224 |
+
throw new StructureBuildingException("Incorrect number of primes in fusion bracket: " +fusionDescriptors[j]);
|
225 |
+
}
|
226 |
+
}
|
227 |
+
relabelAccordingToFusionLevel(component, fusionLevel);
|
228 |
+
performHigherOrderFusion(fusionDescriptors[j], component, fragmentInScopeForEachFusionLevel.get(fusionLevel));
|
229 |
+
}
|
230 |
+
}
|
231 |
+
else{
|
232 |
+
relabelAccordingToFusionLevel(component, fusionLevel);
|
233 |
+
performSimpleFusion(null, component, fragmentInScopeForEachFusionLevel.get(fusionLevel));
|
234 |
+
}
|
235 |
+
}
|
236 |
+
fusionLevel++;
|
237 |
+
if (multiplier ==1){//multiplied components may not be substituted onto
|
238 |
+
fragmentInScopeForEachFusionLevel.put(fusionLevel, fusionComponents[0]);
|
239 |
+
}
|
240 |
+
}
|
241 |
+
for (Fragment ring : componentFragments) {
|
242 |
+
state.fragManager.incorporateFragment(ring, parentRing);
|
243 |
+
}
|
244 |
+
removeMergedAtoms();
|
245 |
+
|
246 |
+
FusedRingNumberer.numberFusedRing(parentRing);//numbers the fused ring;
|
247 |
+
|
248 |
+
StringBuilder fusedRingName = new StringBuilder();
|
249 |
+
for (Element element : nameComponents) {
|
250 |
+
fusedRingName.append(element.getValue());
|
251 |
+
}
|
252 |
+
fusedRingName.append(lastGroup.getValue());
|
253 |
+
|
254 |
+
Element fusedRingEl =lastGroup;//reuse this element to save having to remap suffixes...
|
255 |
+
fusedRingEl.getAttribute(VALUE_ATR).setValue(fusedRingName.toString());
|
256 |
+
fusedRingEl.getAttribute(TYPE_ATR).setValue(RING_TYPE_VAL);
|
257 |
+
fusedRingEl.setValue(fusedRingName.toString());
|
258 |
+
|
259 |
+
for (Element element : nameComponents) {
|
260 |
+
element.detach();
|
261 |
+
}
|
262 |
+
}
|
263 |
+
|
264 |
+
private void removeMergedAtoms() {
|
265 |
+
for (Atom a : atomsToRemoveToReplacementAtom.keySet()) {
|
266 |
+
state.fragManager.removeAtomAndAssociatedBonds(a);
|
267 |
+
}
|
268 |
+
atomsToRemoveToReplacementAtom.clear();
|
269 |
+
}
|
270 |
+
|
271 |
+
/**
|
272 |
+
* Forms a list a list of all group and fusion elements between the first and last group in the fused ring
|
273 |
+
* @return
|
274 |
+
*/
|
275 |
+
private List<Element> formNameComponentList() {
|
276 |
+
List<Element> nameComponents = new ArrayList<>();
|
277 |
+
Element currentEl = groupsInFusedRing.get(0);
|
278 |
+
while(currentEl != lastGroup){
|
279 |
+
if (currentEl.getName().equals(GROUP_EL) || currentEl.getName().equals(FUSION_EL)){
|
280 |
+
nameComponents.add(currentEl);
|
281 |
+
}
|
282 |
+
currentEl = OpsinTools.getNextSibling(currentEl);
|
283 |
+
}
|
284 |
+
return nameComponents;
|
285 |
+
}
|
286 |
+
|
287 |
+
private void processRingNumberingAndIrregularities() throws StructureBuildingException {
|
288 |
+
for (Element group : groupsInFusedRing) {
|
289 |
+
Fragment ring = group.getFrag();
|
290 |
+
if (ALKANESTEM_SUBTYPE_VAL.equals(group.getAttributeValue(SUBTYPE_ATR))){
|
291 |
+
aromatiseCyclicAlkane(group);
|
292 |
+
}
|
293 |
+
processPartiallyUnsaturatedHWSystems(group, ring);
|
294 |
+
if (group == lastGroup) {
|
295 |
+
//perform a quick check that every atom in this group is infact cyclic. Fusion components are enumerated and hence all guaranteed to be purely cyclic
|
296 |
+
List<Atom> atomList = ring.getAtomList();
|
297 |
+
for (Atom atom : atomList) {
|
298 |
+
if (!atom.getAtomIsInACycle()) {
|
299 |
+
throw new StructureBuildingException("Inappropriate group used in fusion nomenclature. Only groups composed entirely of atoms in cycles may be used. i.e. not: " + group.getValue());
|
300 |
+
}
|
301 |
+
}
|
302 |
+
if (group.getAttribute(FUSEDRINGNUMBERING_ATR) != null) {
|
303 |
+
String[] standardNumbering = group.getAttributeValue(FUSEDRINGNUMBERING_ATR).split("/", -1);
|
304 |
+
for (int j = 0; j < standardNumbering.length; j++) {
|
305 |
+
atomList.get(j).replaceLocants(standardNumbering[j]);
|
306 |
+
}
|
307 |
+
} else {
|
308 |
+
ring.sortAtomListByLocant();//for those where the order the locants are in is sensible }
|
309 |
+
}
|
310 |
+
for (Atom atom : atomList) {
|
311 |
+
atom.clearLocants();//the parentRing does not have locants, letters are used to indicate the edges
|
312 |
+
}
|
313 |
+
} else if (group.getAttribute(FUSEDRINGNUMBERING_ATR) == null) {
|
314 |
+
ring.sortAtomListByLocant();//for those where the order the locants are in is sensible
|
315 |
+
}
|
316 |
+
}
|
317 |
+
}
|
318 |
+
|
319 |
+
/**
|
320 |
+
* Interprets the unlocanted unsaturator after a partially unsaturated HW Rings as indication of spare valency and detaches it
|
321 |
+
* This is necessary as this unsaturator can only refer to the HW ring and for names like 2-Benzoxazolinone to avoid confusion as to what the 2 refers to.
|
322 |
+
* @param group
|
323 |
+
* @param ring
|
324 |
+
*/
|
325 |
+
private void processPartiallyUnsaturatedHWSystems(Element group, Fragment ring) {
|
326 |
+
if (HANTZSCHWIDMAN_SUBTYPE_VAL.equals(group.getAttributeValue(SUBTYPE_ATR)) && group.getAttribute(ADDBOND_ATR)!=null){
|
327 |
+
List<Element> unsaturators = OpsinTools.getNextAdjacentSiblingsOfType(group, UNSATURATOR_EL);
|
328 |
+
if (unsaturators.size()>0){
|
329 |
+
Element unsaturator = unsaturators.get(0);
|
330 |
+
if (unsaturator.getAttribute(LOCANT_ATR)==null && unsaturator.getAttributeValue(VALUE_ATR).equals("2")){
|
331 |
+
unsaturator.detach();
|
332 |
+
List<Bond> bondsToUnsaturate = StructureBuildingMethods.findBondsToUnSaturate(ring, 2, true);
|
333 |
+
if (bondsToUnsaturate.size() == 0) {
|
334 |
+
throw new RuntimeException("Failed to find bond to unsaturate on partially saturated HW ring");
|
335 |
+
}
|
336 |
+
Bond b = bondsToUnsaturate.get(0);
|
337 |
+
b.getFromAtom().setSpareValency(true);
|
338 |
+
b.getToAtom().setSpareValency(true);
|
339 |
+
}
|
340 |
+
}
|
341 |
+
}
|
342 |
+
}
|
343 |
+
|
344 |
+
/**
|
345 |
+
* Given a cyclicAlkaneGroup determines whether or not it should be aromatised. Unlocanted ene will be detached if it is an aromatisation hint
|
346 |
+
* No unsaturators -->aromatise
|
347 |
+
* Just ane -->don't
|
348 |
+
* More than 1 ene or locants on ene -->don't
|
349 |
+
* yne --> don't
|
350 |
+
* @param cyclicAlkaneGroup
|
351 |
+
*/
|
352 |
+
private void aromatiseCyclicAlkane(Element cyclicAlkaneGroup) {
|
353 |
+
Element next = OpsinTools.getNextSibling(cyclicAlkaneGroup);
|
354 |
+
List<Element> unsaturators = new ArrayList<>();
|
355 |
+
while (next!=null && next.getName().equals(UNSATURATOR_EL)){
|
356 |
+
unsaturators.add(next);
|
357 |
+
next = OpsinTools.getNextSibling(next);
|
358 |
+
}
|
359 |
+
boolean conjugate =true;
|
360 |
+
if (unsaturators.size()==1){
|
361 |
+
int value = Integer.parseInt(unsaturators.get(0).getAttributeValue(VALUE_ATR));
|
362 |
+
if (value !=2){
|
363 |
+
conjugate =false;
|
364 |
+
}
|
365 |
+
else if (unsaturators.get(0).getAttribute(LOCANT_ATR)!=null){
|
366 |
+
conjugate =false;
|
367 |
+
}
|
368 |
+
}
|
369 |
+
else if (unsaturators.size()==2){
|
370 |
+
int value1 = Integer.parseInt(unsaturators.get(0).getAttributeValue(VALUE_ATR));
|
371 |
+
if (value1 !=1){
|
372 |
+
conjugate =false;
|
373 |
+
}
|
374 |
+
else{
|
375 |
+
int value2 = Integer.parseInt(unsaturators.get(1).getAttributeValue(VALUE_ATR));
|
376 |
+
if (value2 !=2 || unsaturators.get(1).getAttribute(LOCANT_ATR)!=null){
|
377 |
+
conjugate =false;
|
378 |
+
}
|
379 |
+
}
|
380 |
+
}
|
381 |
+
else if (unsaturators.size() >2){
|
382 |
+
conjugate =false;
|
383 |
+
}
|
384 |
+
if (conjugate){
|
385 |
+
for (Element unsaturator : unsaturators) {
|
386 |
+
unsaturator.detach();
|
387 |
+
}
|
388 |
+
List<Atom> atomList = cyclicAlkaneGroup.getFrag().getAtomList();
|
389 |
+
for (Atom atom : atomList) {
|
390 |
+
atom.setSpareValency(true);
|
391 |
+
}
|
392 |
+
}
|
393 |
+
}
|
394 |
+
|
395 |
+
private int processMultiParentSystem(List<Fragment> parentFragments, List<Element> nameComponents, List<Fragment> componentFragments) throws StructureBuildingException {
|
396 |
+
int i = nameComponents.size()-1;
|
397 |
+
int fusionLevel =0;
|
398 |
+
if (i>=0 && parentFragments.size()>1){
|
399 |
+
List<Fragment> previousFusionLevelFragments = parentFragments;
|
400 |
+
for (; i>=0; i--) {
|
401 |
+
if (previousFusionLevelFragments.size()==1){//completed multi parent system
|
402 |
+
fragmentInScopeForEachFusionLevel.put(fusionLevel, previousFusionLevelFragments.get(0));
|
403 |
+
break;
|
404 |
+
}
|
405 |
+
Element fusion = null;
|
406 |
+
if (nameComponents.get(i).getName().equals(FUSION_EL)){
|
407 |
+
fusion = nameComponents.get(i--);
|
408 |
+
}
|
409 |
+
else{
|
410 |
+
throw new StructureBuildingException("Fusion bracket not found where fusion bracket expected");
|
411 |
+
}
|
412 |
+
if (i <0 || !nameComponents.get(i).getName().equals(GROUP_EL)){
|
413 |
+
throw new StructureBuildingException("Group not found where group expected. This is probably a bug");
|
414 |
+
}
|
415 |
+
Fragment nextComponent = nameComponents.get(i).getFrag();
|
416 |
+
relabelAccordingToFusionLevel(nextComponent, fusionLevel);
|
417 |
+
int multiplier = 1;
|
418 |
+
Element possibleMultiplierEl = OpsinTools.getPreviousSibling(nameComponents.get(i));
|
419 |
+
if (possibleMultiplierEl != null && possibleMultiplierEl.getName().equals(MULTIPLIER_EL)){
|
420 |
+
multiplier = Integer.parseInt(possibleMultiplierEl.getAttributeValue(VALUE_ATR));
|
421 |
+
possibleMultiplierEl.detach();
|
422 |
+
}
|
423 |
+
List<Fragment> fusionComponents = new ArrayList<>();
|
424 |
+
for (int j = 0; j < multiplier; j++) {
|
425 |
+
if (j>0){
|
426 |
+
Fragment clonedFrag = state.fragManager.copyFragment(nextComponent);
|
427 |
+
relabelAccordingToFusionLevel(clonedFrag, j);//fusionLevels worth of primes already added
|
428 |
+
fusionComponents.add(clonedFrag);
|
429 |
+
}
|
430 |
+
else{
|
431 |
+
fusionComponents.add(nextComponent);
|
432 |
+
}
|
433 |
+
}
|
434 |
+
fusionLevel+=multiplier;
|
435 |
+
if (multiplier>1 && multiplier != previousFusionLevelFragments.size()){
|
436 |
+
throw new StructureBuildingException("Mismatch between number of components and number of parents in fused ring system");
|
437 |
+
}
|
438 |
+
String fusionDescriptorString = fusion.getValue().toLowerCase(Locale.ROOT).substring(1, fusion.getValue().length()-1);
|
439 |
+
String[] fusionDescriptors =null;
|
440 |
+
if (fusionDescriptorString.split(";").length >1){
|
441 |
+
fusionDescriptors = fusionDescriptorString.split(";");
|
442 |
+
}
|
443 |
+
else if (fusionDescriptorString.split(":").length >1){
|
444 |
+
fusionDescriptors = fusionDescriptorString.split(":");
|
445 |
+
}
|
446 |
+
else if (fusionDescriptorString.split(",").length >1){
|
447 |
+
fusionDescriptors = fusionDescriptorString.split(",");
|
448 |
+
}
|
449 |
+
else{
|
450 |
+
throw new StructureBuildingException("Invalid fusion descriptor: " + fusionDescriptorString);
|
451 |
+
}
|
452 |
+
if (fusionDescriptors.length != previousFusionLevelFragments.size()){
|
453 |
+
throw new StructureBuildingException("Invalid fusion descriptor: "+fusionDescriptorString +"(Number of locants disagrees with number of parents)");
|
454 |
+
}
|
455 |
+
for (int j = 0; j < fusionDescriptors.length; j++) {
|
456 |
+
String fusionDescriptor = fusionDescriptors[j];
|
457 |
+
Fragment component = multiplier>1 ? fusionComponents.get(j) : nextComponent;
|
458 |
+
Fragment parentToUse = previousFusionLevelFragments.get(j);
|
459 |
+
boolean simpleFusion = fusionDescriptor.split(":").length <= 1;
|
460 |
+
if (simpleFusion){
|
461 |
+
String[] fusionArray = determineNumericalAndLetterComponents(fusionDescriptor);
|
462 |
+
if (fusionArray[1].length() != 0){
|
463 |
+
int numberOfPrimes =StringTools.countTerminalPrimes(fusionArray[1]);
|
464 |
+
if (fusionArray[0].length() == 0){
|
465 |
+
fusionDescriptor = fusionArray[1].replaceAll("'", "");
|
466 |
+
}
|
467 |
+
else{
|
468 |
+
fusionDescriptor = fusionArray[0]+ "-" +fusionArray[1].replaceAll("'", "");
|
469 |
+
}
|
470 |
+
if (numberOfPrimes !=j){//check the number of primes on the letter part agree with the parent to use e.g.[4,5-bcd:1,2-c']difuran
|
471 |
+
throw new StructureBuildingException("Incorrect number of primes in fusion descriptor: " + fusionDescriptor);
|
472 |
+
}
|
473 |
+
}
|
474 |
+
performSimpleFusion(fusionDescriptor, component, parentToUse);
|
475 |
+
}
|
476 |
+
else{
|
477 |
+
performHigherOrderFusion(fusionDescriptor, component, parentToUse);
|
478 |
+
}
|
479 |
+
}
|
480 |
+
previousFusionLevelFragments = fusionComponents;
|
481 |
+
componentFragments.addAll(fusionComponents);
|
482 |
+
}
|
483 |
+
if (previousFusionLevelFragments.size()!=1){
|
484 |
+
throw new StructureBuildingException("Invalid fused ring system. Incomplete multiparent system");
|
485 |
+
}
|
486 |
+
}
|
487 |
+
return i;
|
488 |
+
}
|
489 |
+
|
490 |
+
/**
|
491 |
+
* Splits a first order fusion component into it's numerical and letter parts
|
492 |
+
* Either one of these can be the blank string as they may have been omitted
|
493 |
+
* The first entry in the array is the numbers and the second the letters
|
494 |
+
* @param fusionDescriptor
|
495 |
+
* @return
|
496 |
+
*/
|
497 |
+
private String[] determineNumericalAndLetterComponents(String fusionDescriptor) {
|
498 |
+
String[] fusionArray = fusionDescriptor.split("-");
|
499 |
+
if (fusionArray.length ==2){
|
500 |
+
return fusionArray;
|
501 |
+
}
|
502 |
+
else{
|
503 |
+
String[] components = new String[2];
|
504 |
+
if (fusionArray[0].contains(",")){//the digit section
|
505 |
+
components[0]=fusionArray[0];
|
506 |
+
components[1]="";
|
507 |
+
}
|
508 |
+
else{
|
509 |
+
components[0]="";
|
510 |
+
components[1]=fusionArray[0];
|
511 |
+
}
|
512 |
+
return components;
|
513 |
+
}
|
514 |
+
}
|
515 |
+
|
516 |
+
/**
|
517 |
+
* Searches groups for benz(o) components and fuses them in accordance with
|
518 |
+
* FR-2.2.8 Heterobicyclic components with a benzene ring
|
519 |
+
* @throws StructureBuildingException
|
520 |
+
*/
|
521 |
+
private void processBenzoFusions() throws StructureBuildingException {
|
522 |
+
for(int i = groupsInFusedRing.size() - 2; i >= 0; i--) {
|
523 |
+
Element group = groupsInFusedRing.get(i);
|
524 |
+
if (group.getValue().equals("benz") || group.getValue().equals("benzo")) {
|
525 |
+
Element possibleFusionbracket = OpsinTools.getNextSibling(group);
|
526 |
+
if (!possibleFusionbracket.getName().equals(FUSION_EL)) {
|
527 |
+
Element possibleMultiplier = OpsinTools.getPreviousSibling(group);
|
528 |
+
if (possibleMultiplier == null || !possibleMultiplier.getName().equals(MULTIPLIER_EL) || possibleMultiplier.getAttributeValue(TYPE_ATR).equals(GROUP_TYPE_VAL)) {
|
529 |
+
//e.g. 2-benzofuran. Fused rings of this type are a special case treated as being a single component
|
530 |
+
//and have a special convention for indicating the position of heteroatoms
|
531 |
+
benzoSpecificFusion(group, groupsInFusedRing.get(i + 1));
|
532 |
+
group.detach();
|
533 |
+
groupsInFusedRing.remove(i);
|
534 |
+
}
|
535 |
+
}
|
536 |
+
}
|
537 |
+
}
|
538 |
+
}
|
539 |
+
|
540 |
+
/**
|
541 |
+
* Modifies nextComponent's locants according to the fusionLevel.
|
542 |
+
* @param component
|
543 |
+
* @param fusionLevel
|
544 |
+
*/
|
545 |
+
private void relabelAccordingToFusionLevel(Fragment component, int fusionLevel) {
|
546 |
+
if (fusionLevel > 0){
|
547 |
+
FragmentTools.relabelNumericLocants(component.getAtomList(), StringTools.multiplyString("'", fusionLevel));
|
548 |
+
}
|
549 |
+
}
|
550 |
+
|
551 |
+
/**
|
552 |
+
* Handles fusion between components where the fusion descriptor is of the form:
|
553 |
+
* comma separated locants dash letters
|
554 |
+
* e.g imidazo[4,5-d]pyridine
|
555 |
+
* The fusionDescriptor may be given as null or the letter/numerical part omitted.
|
556 |
+
* Sensible defaults will be found instead
|
557 |
+
* @param fusionDescriptor
|
558 |
+
* @param childRing
|
559 |
+
* @param parentRing
|
560 |
+
* @throws StructureBuildingException
|
561 |
+
*/
|
562 |
+
private void performSimpleFusion(String fusionDescriptor, Fragment childRing, Fragment parentRing) throws StructureBuildingException {
|
563 |
+
List<String> numericalLocantsOfChild = null;
|
564 |
+
List<String> letterLocantsOfParent = null;
|
565 |
+
if (fusionDescriptor != null){
|
566 |
+
String[] fusionArray = fusionDescriptor.split("-");
|
567 |
+
if (fusionArray.length ==2){
|
568 |
+
numericalLocantsOfChild = Arrays.asList(fusionArray[0].split(","));
|
569 |
+
char[] tempLetterLocantsOfParent = fusionArray[1].toCharArray();
|
570 |
+
letterLocantsOfParent = new ArrayList<>();
|
571 |
+
for (char letterLocantOfParent : tempLetterLocantsOfParent) {
|
572 |
+
letterLocantsOfParent.add(String.valueOf(letterLocantOfParent));
|
573 |
+
}
|
574 |
+
}
|
575 |
+
else{
|
576 |
+
if (fusionArray[0].contains(",")){//only has digits
|
577 |
+
String[] numericalLocantsOfChildTemp = fusionArray[0].split(",");
|
578 |
+
numericalLocantsOfChild = Arrays.asList(numericalLocantsOfChildTemp);
|
579 |
+
}
|
580 |
+
else{//only has letters
|
581 |
+
char[] tempLetterLocantsOfParentCharArray = fusionArray[0].toCharArray();
|
582 |
+
letterLocantsOfParent = new ArrayList<>();
|
583 |
+
for (char letterLocantOfParentCharArray : tempLetterLocantsOfParentCharArray) {
|
584 |
+
letterLocantsOfParent.add(String.valueOf(letterLocantOfParentCharArray));
|
585 |
+
}
|
586 |
+
}
|
587 |
+
}
|
588 |
+
}
|
589 |
+
|
590 |
+
int edgeLength =1;
|
591 |
+
if (numericalLocantsOfChild != null){
|
592 |
+
if (numericalLocantsOfChild.size() <=1){
|
593 |
+
throw new StructureBuildingException("At least two numerical locants must be provided to perform fusion!");
|
594 |
+
}
|
595 |
+
edgeLength = numericalLocantsOfChild.size()-1;
|
596 |
+
}
|
597 |
+
else if (letterLocantsOfParent != null){
|
598 |
+
edgeLength = letterLocantsOfParent.size();
|
599 |
+
}
|
600 |
+
|
601 |
+
if (numericalLocantsOfChild == null){
|
602 |
+
numericalLocantsOfChild = findPossibleNumericalLocants(childRing, edgeLength);
|
603 |
+
}
|
604 |
+
|
605 |
+
if (letterLocantsOfParent == null){
|
606 |
+
letterLocantsOfParent = findPossibleLetterLocants(parentRing, edgeLength);
|
607 |
+
}
|
608 |
+
if (numericalLocantsOfChild == null || letterLocantsOfParent ==null){
|
609 |
+
throw new StructureBuildingException("Unable to find bond to form fused ring system. Some information for forming fused ring system was only supplyed implicitly");
|
610 |
+
}
|
611 |
+
|
612 |
+
processFirstOrderFusionDescriptors(childRing, parentRing, numericalLocantsOfChild, letterLocantsOfParent);//fuse the rings
|
613 |
+
}
|
614 |
+
|
615 |
+
/**
|
616 |
+
* Takes a ring an returns and array with one letter corresponding to a side/s
|
617 |
+
* that contains two adjacent non bridgehead carbons
|
618 |
+
* The number of sides is specified by edgeLength
|
619 |
+
* @param ring
|
620 |
+
* @param edgeLength The number of bonds to be fused along
|
621 |
+
* @return
|
622 |
+
*/
|
623 |
+
private List<String> findPossibleLetterLocants(Fragment ring, int edgeLength) {
|
624 |
+
List<Integer> carbonAtomIndexes = new ArrayList<>();
|
625 |
+
int numberOfAtoms = ring.getAtomCount();
|
626 |
+
CyclicAtomList cyclicAtomList = new CyclicAtomList(ring.getAtomList());
|
627 |
+
for (int i = 0; i <= numberOfAtoms; i++) {
|
628 |
+
//iterate backwards in list to use highest locanted edge in preference.
|
629 |
+
//this retains what is currently locant 1 on the parent ring as locant 1 if the first two atoms found match
|
630 |
+
//the last atom in the list is potentially tested twice e.g. on a 6 membered ring, 6-5 and 1-6 are both possible
|
631 |
+
Atom atom = cyclicAtomList.previous();
|
632 |
+
//want non-bridgehead carbon atoms. Double-check that these carbon atoms are actually bonded (e.g. von baeyer systems have non-consecutive atom numbering!)
|
633 |
+
if (atom.getElement() == ChemEl.C && atom.getBondCount() == 2
|
634 |
+
&& (carbonAtomIndexes.size() == 0 || atom.getAtomNeighbours().contains(cyclicAtomList.peekNext()))){
|
635 |
+
carbonAtomIndexes.add(cyclicAtomList.getIndex());
|
636 |
+
if (carbonAtomIndexes.size() == edgeLength + 1){//as many carbons in a row as to give that edgelength ->use these side/s
|
637 |
+
Collections.reverse(carbonAtomIndexes);
|
638 |
+
List<String> letterLocantsOfParent = new ArrayList<>();
|
639 |
+
for (int j = 0; j < edgeLength; j++) {
|
640 |
+
letterLocantsOfParent.add(String.valueOf((char)(97 + carbonAtomIndexes.get(j))));//97 is ascii for a
|
641 |
+
}
|
642 |
+
return letterLocantsOfParent;
|
643 |
+
}
|
644 |
+
}
|
645 |
+
else{
|
646 |
+
carbonAtomIndexes.clear();
|
647 |
+
}
|
648 |
+
}
|
649 |
+
return null;
|
650 |
+
}
|
651 |
+
|
652 |
+
/**
|
653 |
+
* Takes a ring and returns an array of numbers corresponding to a side/s
|
654 |
+
* that contains two adjacent non bridgehead carbons
|
655 |
+
* The number of sides is specified by edgeLength
|
656 |
+
* @param ring
|
657 |
+
* @param edgeLength The number of bonds to be fused along
|
658 |
+
* @return
|
659 |
+
*/
|
660 |
+
private List<String> findPossibleNumericalLocants(Fragment ring, int edgeLength) {
|
661 |
+
List<String> carbonLocants = new ArrayList<>();
|
662 |
+
int numberOfAtoms = ring.getAtomCount();
|
663 |
+
CyclicAtomList cyclicAtomList = new CyclicAtomList(ring.getAtomList());
|
664 |
+
for (int i = 0; i <= numberOfAtoms; i++) {
|
665 |
+
//the last atom in the list is potentially tested twice e.g. on a 6 membered ring, 1-2 and 6-1 are both possible
|
666 |
+
Atom atom = cyclicAtomList.next();
|
667 |
+
//want non-bridgehead carbon atoms. Double-check that these carbon atoms are actually bonded (e.g. von baeyer systems have non-consecutive atom numbering!)
|
668 |
+
if (atom.getElement() == ChemEl.C && atom.getBondCount() == 2
|
669 |
+
&& (carbonLocants.size() == 0 || atom.getAtomNeighbours().contains(cyclicAtomList.peekPrevious()))){
|
670 |
+
carbonLocants.add(atom.getFirstLocant());
|
671 |
+
if (carbonLocants.size() == edgeLength + 1){//as many carbons in a row as to give that edgelength ->use these side/s
|
672 |
+
List<String> numericalLocantsOfChild = new ArrayList<>();
|
673 |
+
for (String locant : carbonLocants) {
|
674 |
+
numericalLocantsOfChild.add(locant);
|
675 |
+
}
|
676 |
+
return numericalLocantsOfChild;
|
677 |
+
}
|
678 |
+
}
|
679 |
+
else{
|
680 |
+
carbonLocants.clear();
|
681 |
+
}
|
682 |
+
}
|
683 |
+
return null;
|
684 |
+
}
|
685 |
+
|
686 |
+
/**
|
687 |
+
* Performs a single ring fusion using the values in numericalLocantsOfChild/letterLocantsOfParent
|
688 |
+
* @param childRing
|
689 |
+
* @param parentRing
|
690 |
+
* @param numericalLocantsOfChild
|
691 |
+
* @param letterLocantsOfParent
|
692 |
+
* @throws StructureBuildingException
|
693 |
+
*/
|
694 |
+
private void processFirstOrderFusionDescriptors(Fragment childRing, Fragment parentRing, List<String> numericalLocantsOfChild, List<String> letterLocantsOfParent) throws StructureBuildingException {
|
695 |
+
List<Atom> childAtoms = determineAtomsToFuse(childRing, numericalLocantsOfChild, letterLocantsOfParent.size() +1);
|
696 |
+
if (childAtoms ==null){
|
697 |
+
throw new StructureBuildingException("Malformed fusion bracket!");
|
698 |
+
}
|
699 |
+
|
700 |
+
List<Atom> parentAtoms = new ArrayList<>();
|
701 |
+
List<Atom> parentPeripheralAtomList = getPeripheralAtoms(parentRing.getAtomList());
|
702 |
+
CyclicAtomList cyclicListAtomsOnSurfaceOfParent = new CyclicAtomList(parentPeripheralAtomList, (int)letterLocantsOfParent.get(0).charAt(0) -97);//convert from lower case character through ascii to 0-23
|
703 |
+
parentAtoms.add(cyclicListAtomsOnSurfaceOfParent.getCurrent());
|
704 |
+
for (int i = 0; i < letterLocantsOfParent.size(); i++) {
|
705 |
+
parentAtoms.add(cyclicListAtomsOnSurfaceOfParent.next());
|
706 |
+
}
|
707 |
+
fuseRings(childAtoms, parentAtoms);
|
708 |
+
}
|
709 |
+
|
710 |
+
/**
|
711 |
+
* Returns the sublist of the given atoms that are peripheral atoms given that the list is ordered such that the interior atoms are at the end of the list
|
712 |
+
* @param atomList
|
713 |
+
* @return
|
714 |
+
*/
|
715 |
+
private List<Atom> getPeripheralAtoms(List<Atom> atomList) {
|
716 |
+
//find the indice of the last atom on the surface of the ring. This obviously connects to the first atom. The objective is to exclude any interior atoms.
|
717 |
+
List<Atom> neighbours = atomList.get(0).getAtomNeighbours();
|
718 |
+
int indice = Integer.MAX_VALUE;
|
719 |
+
for (Atom atom : neighbours) {
|
720 |
+
int indexOfAtom =atomList.indexOf(atom);
|
721 |
+
if (indexOfAtom ==1){//not the next atom
|
722 |
+
continue;
|
723 |
+
}
|
724 |
+
else if (indexOfAtom ==-1){//not in parentRing
|
725 |
+
continue;
|
726 |
+
}
|
727 |
+
if (atomList.indexOf(atom)< indice){
|
728 |
+
indice = indexOfAtom;
|
729 |
+
}
|
730 |
+
}
|
731 |
+
return atomList.subList(0, indice +1);
|
732 |
+
}
|
733 |
+
|
734 |
+
/**
|
735 |
+
* Handles fusion between components where the fusion descriptor is of the form:
|
736 |
+
* comma separated locants colon comma separated locants
|
737 |
+
* e.g pyrido[1'',2'':1',2']imidazo
|
738 |
+
* @param fusionDescriptor
|
739 |
+
* @param nextComponent
|
740 |
+
* @param fusedRing
|
741 |
+
* @throws StructureBuildingException
|
742 |
+
*/
|
743 |
+
private void performHigherOrderFusion(String fusionDescriptor, Fragment nextComponent, Fragment fusedRing) throws StructureBuildingException {
|
744 |
+
List<String> numericalLocantsOfChild = null;
|
745 |
+
List<String> numericalLocantsOfParent = null;
|
746 |
+
String[] fusionArray = fusionDescriptor.split(":");
|
747 |
+
if (fusionArray.length ==2){
|
748 |
+
numericalLocantsOfChild = Arrays.asList(fusionArray[0].split(","));
|
749 |
+
numericalLocantsOfParent = Arrays.asList(fusionArray[1].split(","));
|
750 |
+
}
|
751 |
+
else{
|
752 |
+
throw new StructureBuildingException("Malformed fusion bracket: This is an OPSIN bug, check regexTokens.xml");
|
753 |
+
}
|
754 |
+
processHigherOrderFusionDescriptors(nextComponent, fusedRing, numericalLocantsOfChild, numericalLocantsOfParent);//fuse the rings
|
755 |
+
}
|
756 |
+
|
757 |
+
/**
|
758 |
+
* Performs a single ring fusion using the values in numericalLocantsOfChild/numericalLocantsOfParent
|
759 |
+
* @param childRing
|
760 |
+
* @param parentRing
|
761 |
+
* @param numericalLocantsOfChild
|
762 |
+
* @param numericalLocantsOfParent
|
763 |
+
* @throws StructureBuildingException
|
764 |
+
*/
|
765 |
+
private void processHigherOrderFusionDescriptors(Fragment childRing, Fragment parentRing, List<String> numericalLocantsOfChild, List<String> numericalLocantsOfParent) throws StructureBuildingException {
|
766 |
+
List<Atom> childAtoms =determineAtomsToFuse(childRing, numericalLocantsOfChild, null);
|
767 |
+
if (childAtoms ==null){
|
768 |
+
throw new StructureBuildingException("Malformed fusion bracket!");
|
769 |
+
}
|
770 |
+
|
771 |
+
List<Atom> parentAtoms = determineAtomsToFuse(parentRing, numericalLocantsOfParent, childAtoms.size());
|
772 |
+
if (parentAtoms ==null){
|
773 |
+
throw new StructureBuildingException("Malformed fusion bracket!");
|
774 |
+
}
|
775 |
+
fuseRings(childAtoms, parentAtoms);
|
776 |
+
}
|
777 |
+
|
778 |
+
/**
|
779 |
+
* Determines which atoms on a ring should be used for fusion given a set of numerical locants.
|
780 |
+
* If from the other ring involved in the fusion it is known how many atoms are expected to be found this should be provided
|
781 |
+
* If this is not known it should be set to null and the smallest number of fusion atoms will be returned.
|
782 |
+
* @param ring
|
783 |
+
* @param numericalLocantsOnRing
|
784 |
+
* @param expectedNumberOfAtomsToBeUsedForFusion
|
785 |
+
* @return
|
786 |
+
* @throws StructureBuildingException
|
787 |
+
*/
|
788 |
+
private List<Atom> determineAtomsToFuse(Fragment ring, List<String> numericalLocantsOnRing, Integer expectedNumberOfAtomsToBeUsedForFusion) throws StructureBuildingException {
|
789 |
+
List<Atom> parentPeripheralAtomList = getPeripheralAtoms(ring.getAtomList());
|
790 |
+
String firstLocant = numericalLocantsOnRing.get(0);
|
791 |
+
String lastLocant = numericalLocantsOnRing.get(numericalLocantsOnRing.size() - 1);
|
792 |
+
int indexfirst = parentPeripheralAtomList.indexOf(ring.getAtomByLocantOrThrow(firstLocant));
|
793 |
+
if (indexfirst == -1) {
|
794 |
+
throw new StructureBuildingException(firstLocant + " refers to an atom that is not a peripheral atom!");
|
795 |
+
}
|
796 |
+
int indexfinal = parentPeripheralAtomList.indexOf(ring.getAtomByLocantOrThrow(lastLocant));
|
797 |
+
if (indexfinal == -1) {
|
798 |
+
throw new StructureBuildingException(lastLocant + " refers to an atom that is not a peripheral atom!");
|
799 |
+
}
|
800 |
+
CyclicAtomList cyclicRingAtomList = new CyclicAtomList(parentPeripheralAtomList, indexfirst);
|
801 |
+
List<Atom> fusionAtoms = null;
|
802 |
+
|
803 |
+
List<Atom> potentialFusionAtomsAscending = new ArrayList<>();
|
804 |
+
potentialFusionAtomsAscending.add(cyclicRingAtomList.getCurrent());
|
805 |
+
while (cyclicRingAtomList.getIndex() != indexfinal){//assume numbers are ascending
|
806 |
+
potentialFusionAtomsAscending.add(cyclicRingAtomList.next());
|
807 |
+
}
|
808 |
+
if (expectedNumberOfAtomsToBeUsedForFusion ==null ||expectedNumberOfAtomsToBeUsedForFusion == potentialFusionAtomsAscending.size()){
|
809 |
+
boolean notInPotentialParentAtoms =false;
|
810 |
+
for (int i =1; i < numericalLocantsOnRing.size()-1 ; i ++){
|
811 |
+
if (!potentialFusionAtomsAscending.contains(ring.getAtomByLocantOrThrow(numericalLocantsOnRing.get(i)))){
|
812 |
+
notInPotentialParentAtoms =true;
|
813 |
+
}
|
814 |
+
}
|
815 |
+
if (!notInPotentialParentAtoms){
|
816 |
+
fusionAtoms = potentialFusionAtomsAscending;
|
817 |
+
}
|
818 |
+
}
|
819 |
+
|
820 |
+
if (fusionAtoms ==null || expectedNumberOfAtomsToBeUsedForFusion ==null){//that didn't work, so try assuming the numbers are descending
|
821 |
+
cyclicRingAtomList.setIndex(indexfirst);
|
822 |
+
List<Atom> potentialFusionAtomsDescending = new ArrayList<>();
|
823 |
+
potentialFusionAtomsDescending.add(cyclicRingAtomList.getCurrent());
|
824 |
+
while (cyclicRingAtomList.getIndex() != indexfinal){//assume numbers are descending
|
825 |
+
potentialFusionAtomsDescending.add(cyclicRingAtomList.previous());
|
826 |
+
}
|
827 |
+
if (expectedNumberOfAtomsToBeUsedForFusion ==null || expectedNumberOfAtomsToBeUsedForFusion == potentialFusionAtomsDescending.size()){
|
828 |
+
boolean notInPotentialParentAtoms =false;
|
829 |
+
for (int i =1; i < numericalLocantsOnRing.size()-1 ; i ++){
|
830 |
+
if (!potentialFusionAtomsDescending.contains(ring.getAtomByLocantOrThrow(numericalLocantsOnRing.get(i)))){
|
831 |
+
notInPotentialParentAtoms =true;
|
832 |
+
}
|
833 |
+
}
|
834 |
+
if (!notInPotentialParentAtoms){
|
835 |
+
if (fusionAtoms!=null && expectedNumberOfAtomsToBeUsedForFusion ==null){
|
836 |
+
//prefer less fusion atoms
|
837 |
+
if (potentialFusionAtomsDescending.size()< fusionAtoms.size()){
|
838 |
+
fusionAtoms = potentialFusionAtomsDescending;
|
839 |
+
}
|
840 |
+
}
|
841 |
+
else{
|
842 |
+
fusionAtoms = potentialFusionAtomsDescending;
|
843 |
+
}
|
844 |
+
}
|
845 |
+
}
|
846 |
+
}
|
847 |
+
return fusionAtoms;
|
848 |
+
}
|
849 |
+
|
850 |
+
/**
|
851 |
+
* Creates the bonds required to fuse two rings together.
|
852 |
+
* The child atoms are recorded as atoms that should be removed later
|
853 |
+
* @param childAtoms
|
854 |
+
* @param parentAtoms
|
855 |
+
* @throws StructureBuildingException
|
856 |
+
*/
|
857 |
+
private void fuseRings(List<Atom> childAtoms, List<Atom> parentAtoms) throws StructureBuildingException {
|
858 |
+
if (parentAtoms.size()!=childAtoms.size()){
|
859 |
+
throw new StructureBuildingException("Problem with fusion descriptors: Parent atoms specified: " + parentAtoms.size() +" Child atoms specified: " + childAtoms.size() + " These should have been identical!");
|
860 |
+
}
|
861 |
+
//replace parent atoms if the atom has already been used in fusion with the original atom
|
862 |
+
//This will occur if fusion has resulted in something resembling a spiro centre e.g. cyclopenta[1,2-b:5,1-b']bis[1,4]oxathiine
|
863 |
+
for (int i = parentAtoms.size() -1; i >=0; i--) {
|
864 |
+
if (atomsToRemoveToReplacementAtom.get(parentAtoms.get(i))!=null){
|
865 |
+
parentAtoms.set(i, atomsToRemoveToReplacementAtom.get(parentAtoms.get(i)));
|
866 |
+
}
|
867 |
+
if (atomsToRemoveToReplacementAtom.get(childAtoms.get(i))!=null){
|
868 |
+
childAtoms.set(i, atomsToRemoveToReplacementAtom.get(childAtoms.get(i)));
|
869 |
+
}
|
870 |
+
}
|
871 |
+
|
872 |
+
//sync spareValency and check that element type matches
|
873 |
+
for (int i = 0; i < childAtoms.size(); i++) {
|
874 |
+
Atom parentAtom = parentAtoms.get(i);
|
875 |
+
Atom childAtom = childAtoms.get(i);
|
876 |
+
if (childAtom.hasSpareValency()){
|
877 |
+
parentAtom.setSpareValency(true);
|
878 |
+
}
|
879 |
+
if (parentAtom.getElement() != childAtom.getElement()){
|
880 |
+
throw new StructureBuildingException("Invalid fusion descriptor: Heteroatom placement is ambiguous as it is not present in both components of the fusion");
|
881 |
+
}
|
882 |
+
atomsToRemoveToReplacementAtom.put(childAtom, parentAtom);
|
883 |
+
}
|
884 |
+
|
885 |
+
Set<Bond> fusionEdgeBonds = new HashSet<>();//these bonds already exist in both the child and parent atoms
|
886 |
+
for (int i = 0; i < childAtoms.size() -1; i++) {
|
887 |
+
fusionEdgeBonds.add(childAtoms.get(i).getBondToAtomOrThrow(childAtoms.get(i+1)));
|
888 |
+
fusionEdgeBonds.add(parentAtoms.get(i).getBondToAtomOrThrow(parentAtoms.get(i+1)));
|
889 |
+
}
|
890 |
+
|
891 |
+
Set<Bond> bondsToAddToParentAtoms = new LinkedHashSet<>();
|
892 |
+
for (Atom childAtom : childAtoms) {
|
893 |
+
for (Bond b : childAtom.getBonds()) {
|
894 |
+
if (!fusionEdgeBonds.contains(b)){
|
895 |
+
bondsToAddToParentAtoms.add(b);
|
896 |
+
}
|
897 |
+
}
|
898 |
+
}
|
899 |
+
|
900 |
+
Set<Bond> bondsToAddToChildAtoms = new LinkedHashSet<>();
|
901 |
+
for (Atom parentAtom : parentAtoms) {
|
902 |
+
for (Bond b : parentAtom.getBonds()) {
|
903 |
+
if (!fusionEdgeBonds.contains(b)){
|
904 |
+
bondsToAddToChildAtoms.add(b);
|
905 |
+
}
|
906 |
+
}
|
907 |
+
}
|
908 |
+
|
909 |
+
for (Bond bond : bondsToAddToParentAtoms) {
|
910 |
+
Atom from = bond.getFromAtom();
|
911 |
+
int indiceInChildAtoms = childAtoms.indexOf(from);
|
912 |
+
if (indiceInChildAtoms !=-1){
|
913 |
+
from = parentAtoms.get(indiceInChildAtoms);
|
914 |
+
}
|
915 |
+
Atom to = bond.getToAtom();
|
916 |
+
indiceInChildAtoms = childAtoms.indexOf(to);
|
917 |
+
if (indiceInChildAtoms !=-1){
|
918 |
+
to = parentAtoms.get(indiceInChildAtoms);
|
919 |
+
}
|
920 |
+
state.fragManager.createBond(from, to, 1);
|
921 |
+
}
|
922 |
+
|
923 |
+
for (Bond bond : bondsToAddToChildAtoms) {
|
924 |
+
Atom from = bond.getFromAtom();
|
925 |
+
int indiceInParentAtoms = parentAtoms.indexOf(from);
|
926 |
+
if (indiceInParentAtoms !=-1){
|
927 |
+
from = childAtoms.get(indiceInParentAtoms);
|
928 |
+
}
|
929 |
+
Atom to = bond.getToAtom();
|
930 |
+
indiceInParentAtoms = parentAtoms.indexOf(to);
|
931 |
+
if (indiceInParentAtoms !=-1){
|
932 |
+
to = childAtoms.get(indiceInParentAtoms);
|
933 |
+
}
|
934 |
+
Bond newBond = new Bond(from, to, 1);
|
935 |
+
if (childAtoms.contains(from)){
|
936 |
+
from.addBond(newBond);
|
937 |
+
}
|
938 |
+
else{
|
939 |
+
to.addBond(newBond);
|
940 |
+
}
|
941 |
+
}
|
942 |
+
}
|
943 |
+
|
944 |
+
/**
|
945 |
+
* Fuse the benzo with the subsequent ring
|
946 |
+
* Uses locants in front of the benz/benzo group to assign heteroatoms on the now numbered fused ring system
|
947 |
+
* @param benzoEl
|
948 |
+
* @param parentEl
|
949 |
+
* @throws StructureBuildingException
|
950 |
+
*/
|
951 |
+
private void benzoSpecificFusion(Element benzoEl, Element parentEl) throws StructureBuildingException {
|
952 |
+
/*
|
953 |
+
* Perform the fusion, number it and associate it with the parentEl
|
954 |
+
*/
|
955 |
+
Fragment benzoRing = benzoEl.getFrag();
|
956 |
+
Fragment parentRing = parentEl.getFrag();
|
957 |
+
performSimpleFusion(null, benzoRing , parentRing);
|
958 |
+
state.fragManager.incorporateFragment(benzoRing, parentRing);
|
959 |
+
removeMergedAtoms();
|
960 |
+
FusedRingNumberer.numberFusedRing(parentRing);//numbers the fused ring;
|
961 |
+
Fragment fusedRing =parentRing;
|
962 |
+
setBenzoHeteroatomPositioning(benzoEl, fusedRing);
|
963 |
+
}
|
964 |
+
|
965 |
+
/**
|
966 |
+
* Checks for locant(s) before benzo and uses these to set
|
967 |
+
* @param benzoEl
|
968 |
+
* @param fusedRing
|
969 |
+
* @throws StructureBuildingException
|
970 |
+
*/
|
971 |
+
private void setBenzoHeteroatomPositioning(Element benzoEl, Fragment fusedRing) throws StructureBuildingException {
|
972 |
+
Element locantEl = OpsinTools.getPreviousSibling(benzoEl);
|
973 |
+
if (locantEl != null && locantEl.getName().equals(LOCANT_EL)) {
|
974 |
+
String[] locants = locantEl.getValue().split(",");
|
975 |
+
if (locantsCouldApplyToHeteroatomPositions(locants, benzoEl)) {
|
976 |
+
List<Atom> atomList =fusedRing.getAtomList();
|
977 |
+
List<Atom> heteroatoms = new ArrayList<>();
|
978 |
+
List<ChemEl> elementOfHeteroAtom = new ArrayList<>();
|
979 |
+
for (Atom atom : atomList) {//this iterates in the same order as the numbering system
|
980 |
+
if (atom.getElement() != ChemEl.C){
|
981 |
+
heteroatoms.add(atom);
|
982 |
+
elementOfHeteroAtom.add(atom.getElement());
|
983 |
+
}
|
984 |
+
}
|
985 |
+
if (locants.length == heteroatoms.size()){//as many locants as there are heteroatoms to assign
|
986 |
+
//check for special case of a single locant indicating where the group substitutes e.g. 4-benzofuran-2-yl
|
987 |
+
if (!(locants.length == 1 && OpsinTools.getPreviousSibling(locantEl) == null
|
988 |
+
&& ComponentProcessor.checkLocantPresentOnPotentialRoot(state, benzoEl.getParent(), locants[0]))) {
|
989 |
+
for (Atom atom : heteroatoms) {
|
990 |
+
atom.setElement(ChemEl.C);
|
991 |
+
}
|
992 |
+
for (int i=0; i< heteroatoms.size(); i++) {
|
993 |
+
fusedRing.getAtomByLocantOrThrow(locants[i]).setElement(elementOfHeteroAtom.get(i));
|
994 |
+
}
|
995 |
+
locantEl.detach();
|
996 |
+
}
|
997 |
+
}
|
998 |
+
else if (locants.length > 1){
|
999 |
+
throw new StructureBuildingException("Unable to assign all locants to benzo-fused ring or multiplier was mising");
|
1000 |
+
}
|
1001 |
+
}
|
1002 |
+
}
|
1003 |
+
}
|
1004 |
+
|
1005 |
+
private boolean locantsCouldApplyToHeteroatomPositions(String[] locants, Element benzoEl) {
|
1006 |
+
if (!locantsAreAllNumeric(locants)) {
|
1007 |
+
return false;
|
1008 |
+
}
|
1009 |
+
List<Element> suffixes = benzoEl.getParent().getChildElements(SUFFIX_EL);
|
1010 |
+
int suffixesWithoutLocants = 0;
|
1011 |
+
for (Element suffix : suffixes) {
|
1012 |
+
if (suffix.getAttribute(LOCANT_ATR)==null){
|
1013 |
+
suffixesWithoutLocants++;
|
1014 |
+
}
|
1015 |
+
}
|
1016 |
+
if (locants.length == suffixesWithoutLocants){//In preference locants will be assigned to suffixes rather than to this nomenclature
|
1017 |
+
return false;
|
1018 |
+
}
|
1019 |
+
return true;
|
1020 |
+
}
|
1021 |
+
|
1022 |
+
private boolean locantsAreAllNumeric(String[] locants) {
|
1023 |
+
for (String locant : locants) {
|
1024 |
+
if (!MATCH_NUMERIC_LOCANT.matcher(locant).matches()){
|
1025 |
+
return false;
|
1026 |
+
}
|
1027 |
+
}
|
1028 |
+
return true;
|
1029 |
+
}
|
1030 |
+
}
|
TransAntivirus/download_pubchem/opsin-master/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/FusedRingNumberer.java
ADDED
@@ -0,0 +1,1849 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
package uk.ac.cam.ch.wwmm.opsin;
|
2 |
+
|
3 |
+
import java.util.ArrayList;
|
4 |
+
import java.util.Collections;
|
5 |
+
import java.util.Comparator;
|
6 |
+
import java.util.EnumMap;
|
7 |
+
import java.util.HashMap;
|
8 |
+
import java.util.LinkedHashMap;
|
9 |
+
import java.util.List;
|
10 |
+
import java.util.Map;
|
11 |
+
import java.util.Map.Entry;
|
12 |
+
|
13 |
+
import org.apache.logging.log4j.LogManager;
|
14 |
+
import org.apache.logging.log4j.Logger;
|
15 |
+
|
16 |
+
/**
|
17 |
+
* Numbers fusedRings
|
18 |
+
* @author aa593
|
19 |
+
* @author dl387
|
20 |
+
*
|
21 |
+
*/
|
22 |
+
class FusedRingNumberer {
|
23 |
+
|
24 |
+
private static final Logger LOG = LogManager.getLogger(FusedRingNumberer.class);
|
25 |
+
private static class RingConnectivityTable {
|
26 |
+
final List<RingShape> ringShapes = new ArrayList<>();
|
27 |
+
final List<Ring> neighbouringRings = new ArrayList<>();
|
28 |
+
final List<Integer> directionFromRingToNeighbouringRing = new ArrayList<>();
|
29 |
+
final List<Ring> usedRings = new ArrayList<>();
|
30 |
+
|
31 |
+
RingConnectivityTable copy(){
|
32 |
+
RingConnectivityTable copy = new RingConnectivityTable();
|
33 |
+
copy.ringShapes.addAll(ringShapes);
|
34 |
+
copy.neighbouringRings.addAll(neighbouringRings);
|
35 |
+
copy.directionFromRingToNeighbouringRing.addAll(directionFromRingToNeighbouringRing);
|
36 |
+
copy.usedRings.addAll(usedRings);
|
37 |
+
return copy;
|
38 |
+
}
|
39 |
+
}
|
40 |
+
|
41 |
+
/**
|
42 |
+
* Wrapper for a ring of a fused ring system with the shape that ring is currently being treated as having
|
43 |
+
* @author dl387
|
44 |
+
*
|
45 |
+
*/
|
46 |
+
private static class RingShape{
|
47 |
+
private final Ring ring;
|
48 |
+
private final FusionRingShape shape;
|
49 |
+
public RingShape(Ring ring, FusionRingShape shape) {
|
50 |
+
this.ring = ring;
|
51 |
+
this.shape = shape;
|
52 |
+
}
|
53 |
+
Ring getRing() {
|
54 |
+
return ring;
|
55 |
+
}
|
56 |
+
FusionRingShape getShape() {
|
57 |
+
return shape;
|
58 |
+
}
|
59 |
+
}
|
60 |
+
|
61 |
+
enum FusionRingShape {
|
62 |
+
enterFromLeftHouse,//5 membered ring
|
63 |
+
enterFromTopLeftHouse,//5 membered ring
|
64 |
+
enterFromTopRightHouse,//5 membered ring
|
65 |
+
enterFromRightHouse,//5 membered ring
|
66 |
+
enterFromLeftSevenMembered,//7 membered ring
|
67 |
+
enterFromTopSevenMembered,//7 membered ring
|
68 |
+
enterFromRightSevenMembered,//7 membered ring
|
69 |
+
enterFromBottomRightSevenMembered,//7 membered ring
|
70 |
+
enterFromBottomLeftSevenMembered,//7 membered ring
|
71 |
+
standard
|
72 |
+
}
|
73 |
+
|
74 |
+
private static class Chain {
|
75 |
+
private final int length;
|
76 |
+
private final int startingX;
|
77 |
+
private final int y;
|
78 |
+
|
79 |
+
Chain(int length, int startingX, int y) {
|
80 |
+
this.length = length;
|
81 |
+
this.startingX = startingX;
|
82 |
+
this.y = y;
|
83 |
+
}
|
84 |
+
|
85 |
+
int getLength() {
|
86 |
+
return length;
|
87 |
+
}
|
88 |
+
int getStartingX() {
|
89 |
+
return startingX;
|
90 |
+
}
|
91 |
+
int getY() {
|
92 |
+
return y;
|
93 |
+
}
|
94 |
+
}
|
95 |
+
|
96 |
+
/**
|
97 |
+
* Sorts by atomSequences by the IUPAC rules for determining the preferred labelling
|
98 |
+
* The most preferred will be sorted to the back (0th position)
|
99 |
+
* @author dl387
|
100 |
+
*
|
101 |
+
*/
|
102 |
+
private static class SortAtomSequences implements Comparator<List<Atom>> {
|
103 |
+
|
104 |
+
public int compare(List<Atom> sequenceA, List<Atom> sequenceB){
|
105 |
+
if (sequenceA.size() != sequenceB.size()){
|
106 |
+
//Error in fused ring building. Identified ring sequences not the same lengths!
|
107 |
+
return 0;
|
108 |
+
}
|
109 |
+
|
110 |
+
int i=0;
|
111 |
+
int j=0;
|
112 |
+
//Give low numbers for the heteroatoms as a set.
|
113 |
+
while(i < sequenceA.size()){
|
114 |
+
Atom atomA=sequenceA.get(i);
|
115 |
+
boolean isAaHeteroatom = atomA.getElement() != ChemEl.C;
|
116 |
+
|
117 |
+
|
118 |
+
//bridgehead carbon do not increment numbering
|
119 |
+
if (!isAaHeteroatom && atomA.getBondCount()>=3){
|
120 |
+
i++;
|
121 |
+
continue;
|
122 |
+
}
|
123 |
+
|
124 |
+
Atom atomB=sequenceB.get(j);
|
125 |
+
boolean isBaHeteroatom =atomB.getElement() != ChemEl.C;
|
126 |
+
if (!isBaHeteroatom && atomB.getBondCount()>=3){
|
127 |
+
j++;
|
128 |
+
continue;
|
129 |
+
}
|
130 |
+
|
131 |
+
if (isAaHeteroatom && !isBaHeteroatom){
|
132 |
+
return -1;
|
133 |
+
}
|
134 |
+
if (isBaHeteroatom && !isAaHeteroatom){
|
135 |
+
return 1;
|
136 |
+
}
|
137 |
+
i++;j++;
|
138 |
+
}
|
139 |
+
|
140 |
+
i=0;
|
141 |
+
j=0;
|
142 |
+
//Give low numbers for heteroatoms when considered in the order: O, S, Se, Te, N, P, As, Sb, Bi, Si, Ge, Sn, Pb, B, Hg
|
143 |
+
while(i < sequenceA.size()){
|
144 |
+
Atom atomA=sequenceA.get(i);
|
145 |
+
|
146 |
+
//bridgehead carbon do not increment numbering
|
147 |
+
if (atomA.getElement() == ChemEl.C && atomA.getBondCount()>=3){
|
148 |
+
i++;
|
149 |
+
continue;
|
150 |
+
}
|
151 |
+
|
152 |
+
Atom atomB=sequenceB.get(j);
|
153 |
+
if (atomB.getElement() == ChemEl.C && atomB.getBondCount()>=3){
|
154 |
+
j++;
|
155 |
+
continue;
|
156 |
+
}
|
157 |
+
|
158 |
+
Integer heteroAtomPriorityA = heteroAtomValues.get(atomA.getElement());
|
159 |
+
int atomAElementValue = heteroAtomPriorityA != null ? heteroAtomPriorityA : 0;
|
160 |
+
|
161 |
+
Integer heteroAtomPriorityB = heteroAtomValues.get(atomB.getElement());
|
162 |
+
int atomBElementValue = heteroAtomPriorityB != null ? heteroAtomPriorityB : 0;
|
163 |
+
|
164 |
+
if (atomAElementValue > atomBElementValue){
|
165 |
+
return -1;
|
166 |
+
}
|
167 |
+
if (atomAElementValue < atomBElementValue){
|
168 |
+
return 1;
|
169 |
+
}
|
170 |
+
i++;j++;
|
171 |
+
}
|
172 |
+
|
173 |
+
//Give low numbers to fusion carbon atoms.
|
174 |
+
for ( i = 0; i < sequenceA.size(); i++) {
|
175 |
+
Atom atomA=sequenceA.get(i);
|
176 |
+
Atom atomB=sequenceB.get(i);
|
177 |
+
if (atomA.getBondCount()>=3 && atomA.getElement() == ChemEl.C){
|
178 |
+
if (!(atomB.getBondCount()>=3 && atomB.getElement() == ChemEl.C)){
|
179 |
+
return -1;
|
180 |
+
}
|
181 |
+
}
|
182 |
+
if (atomB.getBondCount()>=3 && atomB.getElement() == ChemEl.C){
|
183 |
+
if (!(atomA.getBondCount()>=3 && atomA.getElement() == ChemEl.C)){
|
184 |
+
return 1;
|
185 |
+
}
|
186 |
+
}
|
187 |
+
}
|
188 |
+
//Note that any sequences still unsorted at this step will have fusion carbon atoms in the same places
|
189 |
+
//which means you can go through both sequences without constantly looking for fusion carbons i.e. the variable j is no longer needed
|
190 |
+
|
191 |
+
//Give low numbers to fusion rather than non-fusion atoms of the same heteroelement.
|
192 |
+
for (i = 0; i < sequenceA.size(); i++) {
|
193 |
+
Atom atomA=sequenceA.get(i);
|
194 |
+
Atom atomB=sequenceB.get(i);
|
195 |
+
if (atomA.getBondCount()>=3){
|
196 |
+
if (!(atomB.getBondCount()>=3)){
|
197 |
+
return -1;
|
198 |
+
}
|
199 |
+
}
|
200 |
+
if (atomB.getBondCount()>=3){
|
201 |
+
if (!(atomA.getBondCount()>=3)){
|
202 |
+
return 1;
|
203 |
+
}
|
204 |
+
}
|
205 |
+
}
|
206 |
+
//TODO consider heteroatoms FR5.4d
|
207 |
+
return 0;
|
208 |
+
}
|
209 |
+
}
|
210 |
+
|
211 |
+
private static final Map<ChemEl, Integer> heteroAtomValues = new EnumMap<>(ChemEl.class);
|
212 |
+
static{
|
213 |
+
//unknown heteroatoms or carbon are given a value of 0
|
214 |
+
heteroAtomValues.put(ChemEl.Hg, 2);
|
215 |
+
heteroAtomValues.put(ChemEl.Tl, 3);
|
216 |
+
heteroAtomValues.put(ChemEl.In, 4);
|
217 |
+
heteroAtomValues.put(ChemEl.Ga, 5);
|
218 |
+
heteroAtomValues.put(ChemEl.Al, 6);
|
219 |
+
heteroAtomValues.put(ChemEl.B, 7);
|
220 |
+
heteroAtomValues.put(ChemEl.Pb, 8);
|
221 |
+
heteroAtomValues.put(ChemEl.Sn, 9);
|
222 |
+
heteroAtomValues.put(ChemEl.Ge, 10);
|
223 |
+
heteroAtomValues.put(ChemEl.Si, 11);
|
224 |
+
heteroAtomValues.put(ChemEl.Bi, 12);
|
225 |
+
heteroAtomValues.put(ChemEl.Sb, 13);
|
226 |
+
heteroAtomValues.put(ChemEl.As, 14);
|
227 |
+
heteroAtomValues.put(ChemEl.P, 15);
|
228 |
+
heteroAtomValues.put(ChemEl.N, 16);
|
229 |
+
heteroAtomValues.put(ChemEl.Te, 17);
|
230 |
+
heteroAtomValues.put(ChemEl.Se, 18);
|
231 |
+
heteroAtomValues.put(ChemEl.S, 19);
|
232 |
+
heteroAtomValues.put(ChemEl.O, 20);
|
233 |
+
heteroAtomValues.put(ChemEl.I, 21);
|
234 |
+
heteroAtomValues.put(ChemEl.Br, 22);
|
235 |
+
heteroAtomValues.put(ChemEl.Cl, 23);
|
236 |
+
heteroAtomValues.put(ChemEl.F, 24);
|
237 |
+
}
|
238 |
+
/*
|
239 |
+
* The meaning of the integers used is as follows:
|
240 |
+
* 2
|
241 |
+
* 3 ^ 1
|
242 |
+
* \ | /
|
243 |
+
* +-4 <- -> 0
|
244 |
+
* / | \
|
245 |
+
* -3 v -1
|
246 |
+
* -2
|
247 |
+
*
|
248 |
+
* They indicate the relative directions between rings
|
249 |
+
* Possibly enums should be used...
|
250 |
+
*/
|
251 |
+
|
252 |
+
/**
|
253 |
+
* Numbers the fused ring
|
254 |
+
* Works reliably for all common ring systems.
|
255 |
+
* Some complex fused ring systems involving multiple connections to rings with an odd number of edges may still be wrong
|
256 |
+
* @param fusedRing
|
257 |
+
* @throws StructureBuildingException
|
258 |
+
*/
|
259 |
+
static void numberFusedRing(Fragment fusedRing) throws StructureBuildingException {
|
260 |
+
List<Ring> rings = SSSRFinder.getSetOfSmallestRings(fusedRing);
|
261 |
+
if (rings.size() <2) {
|
262 |
+
throw new StructureBuildingException("Ring perception system found less than 2 rings within input fragment!");
|
263 |
+
}
|
264 |
+
List<Atom> atomList = fusedRing.getAtomList();
|
265 |
+
setupAdjacentFusedRingProperties(rings);
|
266 |
+
if (!checkRingApplicability(rings)) {
|
267 |
+
for (Atom atom : atomList) {
|
268 |
+
atom.clearLocants();
|
269 |
+
}
|
270 |
+
return;
|
271 |
+
}
|
272 |
+
List<List<Atom>> atomSequences = determinePossiblePeripheryAtomOrders(rings, atomList.size());
|
273 |
+
if (atomSequences.size()==0){
|
274 |
+
for (Atom atom : atomList) {
|
275 |
+
atom.clearLocants();
|
276 |
+
}
|
277 |
+
return;
|
278 |
+
}
|
279 |
+
|
280 |
+
// add missing atoms to each path
|
281 |
+
for (List<Atom> path : atomSequences) {//TODO properly support interior atom labelling
|
282 |
+
for(Atom atom : atomList) {
|
283 |
+
if(!path.contains(atom)) {
|
284 |
+
path.add(atom);
|
285 |
+
}
|
286 |
+
}
|
287 |
+
}
|
288 |
+
// find the preferred numbering scheme then relabel with this scheme
|
289 |
+
Collections.sort(atomSequences, new SortAtomSequences());
|
290 |
+
FragmentTools.relabelLocantsAsFusedRingSystem(atomSequences.get(0));
|
291 |
+
fusedRing.reorderAtomCollection(atomSequences.get(0));
|
292 |
+
}
|
293 |
+
|
294 |
+
/**
|
295 |
+
* Populates rings with their neighbouring fused rings and the bonds involved
|
296 |
+
* @param rings
|
297 |
+
*/
|
298 |
+
static void setupAdjacentFusedRingProperties(List<Ring> rings){
|
299 |
+
for (int i = 0, l = rings.size(); i < l; i++) {
|
300 |
+
Ring curRing = rings.get(i);
|
301 |
+
bondLoop : for (Bond bond : curRing.getBondList()) { // go through all the bonds for the current ring
|
302 |
+
for (int j = i + 1; j < l; j++) {
|
303 |
+
Ring otherRing = rings.get(j);
|
304 |
+
if (otherRing.getBondList().contains(bond)) { // check if this bond belongs to any other ring
|
305 |
+
otherRing.addNeighbour(bond, curRing);
|
306 |
+
curRing.addNeighbour(bond, otherRing); // if so, then associate the bond with the adjacent ring
|
307 |
+
continue bondLoop;
|
308 |
+
}
|
309 |
+
}
|
310 |
+
}
|
311 |
+
}
|
312 |
+
}
|
313 |
+
|
314 |
+
/**
|
315 |
+
* Checks that all the rings are of sizes 3-8 or if larger than 8 are involved in 2 or fewer fused bonds
|
316 |
+
* @param rings
|
317 |
+
* @return
|
318 |
+
*/
|
319 |
+
private static boolean checkRingApplicability(List<Ring> rings) {
|
320 |
+
for (Ring ring : rings) {
|
321 |
+
if (ring.size() <=2){
|
322 |
+
throw new RuntimeException("Invalid ring size: " +ring.size());
|
323 |
+
}
|
324 |
+
if (ring.size() >8 && ring.getNumberOfFusedBonds() > 2){
|
325 |
+
return false;
|
326 |
+
}
|
327 |
+
}
|
328 |
+
return true;
|
329 |
+
}
|
330 |
+
|
331 |
+
/**
|
332 |
+
* Returns possible enumerations of atoms. Currently Interior atoms are not considered.
|
333 |
+
* These enumerations will be compliant with rules FR5.1-FR5.3 of the fused ring nomenclature guidelines
|
334 |
+
* http://www.chem.qmul.ac.uk/iupac/fusedring/FR51.html
|
335 |
+
* @param rings
|
336 |
+
* @param atomCountOfFusedRingSystem
|
337 |
+
* @return
|
338 |
+
* @throws StructureBuildingException
|
339 |
+
*/
|
340 |
+
private static List<List<Atom>> determinePossiblePeripheryAtomOrders(List<Ring> rings, int atomCountOfFusedRingSystem) throws StructureBuildingException {
|
341 |
+
List<Ring> tRings = findTerminalRings(rings);
|
342 |
+
if (tRings.size()<1) {
|
343 |
+
throw new RuntimeException("OPSIN bug: Unable to find a terminal ring in fused ring system");
|
344 |
+
}
|
345 |
+
Ring tRing = tRings.get(0);
|
346 |
+
Bond b1 = getStartingNonFusedBond(tRing);
|
347 |
+
if(b1 == null) {
|
348 |
+
throw new RuntimeException("OPSIN Bug: Non-fused bond from terminal ring not found");
|
349 |
+
}
|
350 |
+
|
351 |
+
List<RingConnectivityTable> cts = new ArrayList<>();
|
352 |
+
RingConnectivityTable startingCT = new RingConnectivityTable();
|
353 |
+
cts.add(startingCT);
|
354 |
+
buildRingConnectionTables(tRing, null, 0, b1, b1.getFromAtom(), startingCT, cts);
|
355 |
+
//The preference against fusion to elongated edges is built into the construction of the ring table
|
356 |
+
|
357 |
+
/* FR 5.1.1/FR 5.1.2 Preferred shapes preferred to distorted shapes */
|
358 |
+
removeCTsWithDistortedRingShapes(cts);
|
359 |
+
//TODO better implement the corner cases of FR 5.1.3-5.1.5
|
360 |
+
|
361 |
+
/* FR-5.2a. Maximum number of rings in a horizontal row */
|
362 |
+
Map<RingConnectivityTable, List<Integer>> horizonalRowDirections = findLongestChainDirections(cts);
|
363 |
+
List<Ring[][]> ringMaps = createRingMapsAlignedAlongGivenhorizonalRowDirections(horizonalRowDirections);
|
364 |
+
/* FR-5.2b-d */
|
365 |
+
return findPossiblePaths(ringMaps, atomCountOfFusedRingSystem);
|
366 |
+
}
|
367 |
+
|
368 |
+
/**
|
369 |
+
* Finds the rings with the minimum number of fused bonds
|
370 |
+
* @param rings
|
371 |
+
* @return
|
372 |
+
*/
|
373 |
+
private static List<Ring> findTerminalRings(List<Ring> rings) {
|
374 |
+
List<Ring> tRings = new ArrayList<>();
|
375 |
+
|
376 |
+
int minFusedBonds = Integer.MAX_VALUE;
|
377 |
+
for (Ring ring : rings){
|
378 |
+
if (ring.getNumberOfFusedBonds() < minFusedBonds) {
|
379 |
+
minFusedBonds = ring.getNumberOfFusedBonds();
|
380 |
+
}
|
381 |
+
}
|
382 |
+
|
383 |
+
for (Ring ring : rings){
|
384 |
+
if (ring.getNumberOfFusedBonds() == minFusedBonds) {
|
385 |
+
tRings.add(ring);
|
386 |
+
}
|
387 |
+
}
|
388 |
+
return tRings;
|
389 |
+
}
|
390 |
+
|
391 |
+
/**
|
392 |
+
* Recursive function to create the connectivity table of the rings, for each connection includes both directions
|
393 |
+
* @param currentRing
|
394 |
+
* @param previousRing
|
395 |
+
* @param previousDir
|
396 |
+
* @param previousBond
|
397 |
+
* @param atom
|
398 |
+
* @param ct
|
399 |
+
* @param cts
|
400 |
+
* @return
|
401 |
+
*/
|
402 |
+
private static List<RingConnectivityTable> buildRingConnectionTables(Ring currentRing, Ring previousRing, int previousDir, Bond previousBond, Atom atom, RingConnectivityTable ct, List<RingConnectivityTable> cts) {
|
403 |
+
// order atoms and bonds in the ring
|
404 |
+
currentRing.makeCyclicLists(previousBond, atom);
|
405 |
+
List<RingConnectivityTable> generatedCts = new ArrayList<>();
|
406 |
+
List<FusionRingShape> allowedShapes = getAllowedShapesForRing(currentRing, previousBond);
|
407 |
+
if (allowedShapes.size() == 0) {
|
408 |
+
throw new RuntimeException("OPSIN limitation, unsupported ring size in fused ring numbering");
|
409 |
+
}
|
410 |
+
ct.usedRings.add(currentRing);
|
411 |
+
for (int i = allowedShapes.size() - 1; i >=0; i--) {
|
412 |
+
FusionRingShape fusionRingShape = allowedShapes.get(i);
|
413 |
+
RingConnectivityTable currentCT;
|
414 |
+
if (i==0) {
|
415 |
+
currentCT = ct;
|
416 |
+
}
|
417 |
+
else{
|
418 |
+
currentCT = ct.copy();
|
419 |
+
cts.add(currentCT);
|
420 |
+
generatedCts.add(currentCT);
|
421 |
+
}
|
422 |
+
RingShape ringShape = new RingShape(currentRing, fusionRingShape);
|
423 |
+
List<RingConnectivityTable> ctsToExpand = new ArrayList<>();
|
424 |
+
ctsToExpand.add(currentCT);//all the cts to consider, the currentCT and generated clones
|
425 |
+
for (Ring neighbourRing : currentRing.getNeighbours()) {
|
426 |
+
//find the directions between the current ring and all neighbouring rings including the previous ring
|
427 |
+
// this means that the direction to the previous ring will then be known in both directions
|
428 |
+
|
429 |
+
// find direction
|
430 |
+
Bond currentBond = findFusionBond(currentRing, neighbourRing);
|
431 |
+
|
432 |
+
int dir = 0;
|
433 |
+
if (neighbourRing == previousRing) {
|
434 |
+
dir = getOppositeDirection(previousDir);
|
435 |
+
}
|
436 |
+
else {
|
437 |
+
dir = calculateRingDirection(ringShape, previousBond, currentBond, previousDir);
|
438 |
+
}
|
439 |
+
//System.out.println(currentRing +"|" +neighbourRing +"|" +dir +"|" +(neighbourRing==previousRing));
|
440 |
+
|
441 |
+
// place into connectivity table, like graph, rings and their connection
|
442 |
+
for (RingConnectivityTable ctToExpand : ctsToExpand) {
|
443 |
+
ctToExpand.ringShapes.add(ringShape);
|
444 |
+
ctToExpand.neighbouringRings.add(neighbourRing);
|
445 |
+
ctToExpand.directionFromRingToNeighbouringRing.add(dir);
|
446 |
+
}
|
447 |
+
if (!currentCT.usedRings.contains(neighbourRing)) {
|
448 |
+
List<RingConnectivityTable> newCts = new ArrayList<>();
|
449 |
+
for (RingConnectivityTable ctToExpand : ctsToExpand) {
|
450 |
+
Atom a = getAtomFromBond(currentRing, currentBond);
|
451 |
+
List<RingConnectivityTable> generatedDownStreamCts = buildRingConnectionTables(neighbourRing, currentRing, dir, currentBond, a, ctToExpand, cts);
|
452 |
+
newCts.addAll(generatedDownStreamCts);
|
453 |
+
}
|
454 |
+
ctsToExpand.addAll(newCts);
|
455 |
+
generatedCts.addAll(newCts);
|
456 |
+
}
|
457 |
+
}
|
458 |
+
}
|
459 |
+
return generatedCts;
|
460 |
+
}
|
461 |
+
|
462 |
+
/**
|
463 |
+
* Returns the allowed shapes for the given ring.
|
464 |
+
* The starting bond is required to assured that elongated bonds do not unnecesarily correspond to fusions
|
465 |
+
* Currently only 5 membered rings are considered in multiple orientations but the same
|
466 |
+
* is probably required for 7+ member rings
|
467 |
+
* @param ring
|
468 |
+
* @param startingBond
|
469 |
+
* @return
|
470 |
+
*/
|
471 |
+
private static List<FusionRingShape> getAllowedShapesForRing(Ring ring, Bond startingBond) {
|
472 |
+
List<FusionRingShape> allowedRingShapes = new ArrayList<>();
|
473 |
+
int size = ring.size();
|
474 |
+
if (size==5){
|
475 |
+
List<Bond> fusedBonds = ring.getFusedBonds();
|
476 |
+
int fusedBondCount = fusedBonds.size();
|
477 |
+
if (fusedBondCount==1){
|
478 |
+
allowedRingShapes.add(FusionRingShape.enterFromLeftHouse);
|
479 |
+
}
|
480 |
+
else if (fusedBondCount==2 || fusedBondCount==3 || fusedBondCount==4){
|
481 |
+
List<Integer> distances = new ArrayList<>();//one distance is likely to be 0
|
482 |
+
for (Bond fusedBond : fusedBonds) {
|
483 |
+
distances.add(calculateDistanceBetweenBonds(startingBond, fusedBond, ring));
|
484 |
+
}
|
485 |
+
if (!distances.contains(1)){
|
486 |
+
allowedRingShapes.add(FusionRingShape.enterFromLeftHouse);
|
487 |
+
}
|
488 |
+
if (!distances.contains(4)){
|
489 |
+
allowedRingShapes.add(FusionRingShape.enterFromRightHouse);
|
490 |
+
}
|
491 |
+
|
492 |
+
if (!distances.contains(2)){
|
493 |
+
allowedRingShapes.add(FusionRingShape.enterFromTopLeftHouse);
|
494 |
+
}
|
495 |
+
else if (!distances.contains(3)){
|
496 |
+
allowedRingShapes.add(FusionRingShape.enterFromTopRightHouse);
|
497 |
+
}
|
498 |
+
allowedRingShapes = removeDegenerateRingShapes(allowedRingShapes, distances, 5);
|
499 |
+
}
|
500 |
+
else if (fusedBondCount==5){
|
501 |
+
allowedRingShapes.add(FusionRingShape.enterFromLeftHouse);
|
502 |
+
allowedRingShapes.add(FusionRingShape.enterFromRightHouse);
|
503 |
+
//top left and top right are the same other than position of the elongated bond which will invariably be used anyway
|
504 |
+
allowedRingShapes.add(FusionRingShape.enterFromTopLeftHouse);
|
505 |
+
}
|
506 |
+
}
|
507 |
+
else if (size==7){
|
508 |
+
List<Bond> fusedBonds = ring.getFusedBonds();
|
509 |
+
int fusedBondCount = fusedBonds.size();
|
510 |
+
if (fusedBondCount==1){
|
511 |
+
allowedRingShapes.add(FusionRingShape.enterFromLeftSevenMembered);
|
512 |
+
}
|
513 |
+
else{
|
514 |
+
List<Integer> distances = new ArrayList<>();//one distance is likely to be 0
|
515 |
+
for (Bond fusedBond : fusedBonds) {
|
516 |
+
distances.add(calculateDistanceBetweenBonds(startingBond, fusedBond, ring));
|
517 |
+
}
|
518 |
+
if (!distances.contains(4) && !distances.contains(6)){
|
519 |
+
allowedRingShapes.add(FusionRingShape.enterFromLeftSevenMembered);
|
520 |
+
}
|
521 |
+
if (!distances.contains(1) && !distances.contains(6)){
|
522 |
+
allowedRingShapes.add(FusionRingShape.enterFromTopSevenMembered);
|
523 |
+
}
|
524 |
+
if (!distances.contains(1) && !distances.contains(3)){
|
525 |
+
allowedRingShapes.add(FusionRingShape.enterFromRightSevenMembered);
|
526 |
+
}
|
527 |
+
if (!distances.contains(2) && !distances.contains(4)){
|
528 |
+
allowedRingShapes.add(FusionRingShape.enterFromBottomRightSevenMembered);
|
529 |
+
}
|
530 |
+
if (!distances.contains(3) && !distances.contains(5)){
|
531 |
+
allowedRingShapes.add(FusionRingShape.enterFromBottomLeftSevenMembered);
|
532 |
+
}
|
533 |
+
allowedRingShapes = removeDegenerateRingShapes(allowedRingShapes, distances, 7);
|
534 |
+
}
|
535 |
+
}
|
536 |
+
else{
|
537 |
+
allowedRingShapes.add(FusionRingShape.standard);
|
538 |
+
}
|
539 |
+
return allowedRingShapes;
|
540 |
+
}
|
541 |
+
|
542 |
+
/**
|
543 |
+
* Removes the ring shapes that for given distances have identical properties
|
544 |
+
* @param allowedRingShapes
|
545 |
+
* @param distances
|
546 |
+
* @param ringSize
|
547 |
+
*/
|
548 |
+
private static List<FusionRingShape> removeDegenerateRingShapes(List<FusionRingShape> allowedRingShapes, List<Integer> distances, int ringSize) {
|
549 |
+
distances = new ArrayList<>(distances);
|
550 |
+
distances.remove((Integer)0);//remove distance 0 if present, this invariably comes from the starting bond and is not of interest (and breaks getDirectionFromDist)
|
551 |
+
for (int i = allowedRingShapes.size() - 1; i >=0; i--) {
|
552 |
+
FusionRingShape shapeToConsiderRemoving = allowedRingShapes.get(i);
|
553 |
+
for (int j = i - 1; j >=0; j--) {
|
554 |
+
FusionRingShape shapeToCompareWith = allowedRingShapes.get(j);
|
555 |
+
boolean foundDifference = false;
|
556 |
+
for (Integer distance : distances) {
|
557 |
+
if (getDirectionFromDist(shapeToConsiderRemoving, ringSize, distance) != getDirectionFromDist(shapeToCompareWith, ringSize, distance)){
|
558 |
+
foundDifference = true;
|
559 |
+
break;
|
560 |
+
}
|
561 |
+
}
|
562 |
+
if (!foundDifference){
|
563 |
+
allowedRingShapes.remove(i);
|
564 |
+
break;
|
565 |
+
}
|
566 |
+
}
|
567 |
+
}
|
568 |
+
|
569 |
+
return allowedRingShapes;
|
570 |
+
}
|
571 |
+
|
572 |
+
/**
|
573 |
+
* Calculates the direction of the next ring according to the distance between fusion bonds and the previous direction
|
574 |
+
* @param ringShape
|
575 |
+
* @param previousBond
|
576 |
+
* @param currentBond
|
577 |
+
* @param previousDir
|
578 |
+
* @return
|
579 |
+
*/
|
580 |
+
private static int calculateRingDirection(RingShape ringShape, Bond previousBond, Bond currentBond, int previousDir) {
|
581 |
+
// take the ring fused to one from the previous loop step
|
582 |
+
Ring ring = ringShape.getRing();
|
583 |
+
if (ring.getCyclicBondList() == null ) {
|
584 |
+
throw new RuntimeException("OPSIN bug: cyclic bond set should have already been populated");
|
585 |
+
}
|
586 |
+
|
587 |
+
int dist = calculateDistanceBetweenBonds(previousBond, currentBond, ring);
|
588 |
+
|
589 |
+
if (dist == 0) {
|
590 |
+
throw new RuntimeException("OPSIN bug: Distance between bonds is equal to 0");
|
591 |
+
}
|
592 |
+
|
593 |
+
int relativeDir = getDirectionFromDist(ringShape.getShape(), ring.size(), dist);
|
594 |
+
return determineAbsoluteDirectionUsingPreviousDirection(ringShape.getShape(), ring.size(), relativeDir, previousDir);
|
595 |
+
}
|
596 |
+
|
597 |
+
/**
|
598 |
+
* Given two bonds on a ring returns the distance (in bonds) between them
|
599 |
+
* @param bond1
|
600 |
+
* @param bond2
|
601 |
+
* @param ring
|
602 |
+
* @return
|
603 |
+
*/
|
604 |
+
private static int calculateDistanceBetweenBonds(Bond bond1, Bond bond2, Ring ring) {
|
605 |
+
List<Bond> cyclicBondList =ring.getCyclicBondList();
|
606 |
+
int previousBondIndice = cyclicBondList.indexOf(bond1);
|
607 |
+
int currentBondIndice = cyclicBondList.indexOf(bond2);
|
608 |
+
if (previousBondIndice==-1 || currentBondIndice==-1){
|
609 |
+
throw new RuntimeException("OPSIN bug: previous and current bond were not present in the cyclic bond list of the current ring");
|
610 |
+
}
|
611 |
+
int ringSize =ring.size();
|
612 |
+
int dist = (ringSize + currentBondIndice - previousBondIndice) % ringSize;
|
613 |
+
return dist;
|
614 |
+
}
|
615 |
+
|
616 |
+
/**
|
617 |
+
* Uses the ring shape, the ring size and distance between the incoming and outgoing fused bond to determine
|
618 |
+
* the relative direction between the entry point on the ring and the exit point
|
619 |
+
* @param fusionRingShape
|
620 |
+
* @param ringSize
|
621 |
+
* @param dist
|
622 |
+
* @return
|
623 |
+
*/
|
624 |
+
private static int getDirectionFromDist(FusionRingShape fusionRingShape, int ringSize, int dist) {
|
625 |
+
int dir=0;
|
626 |
+
if (ringSize == 3) { // 3 member ring
|
627 |
+
if (dist == 1) {
|
628 |
+
dir = -1;
|
629 |
+
}
|
630 |
+
else if (dist == 2) {
|
631 |
+
dir = 1;
|
632 |
+
}
|
633 |
+
else throw new RuntimeException("Impossible distance between bonds for a 3 membered ring");
|
634 |
+
}
|
635 |
+
else if (ringSize == 4) { // 4 member ring
|
636 |
+
if (dist ==1) {
|
637 |
+
dir = -2;
|
638 |
+
}
|
639 |
+
else if (dist == 2) {
|
640 |
+
dir = 0;
|
641 |
+
}
|
642 |
+
else if (dist ==3) {
|
643 |
+
dir = 2;
|
644 |
+
}
|
645 |
+
else throw new RuntimeException("Impossible distance between bonds for a 4 membered ring");
|
646 |
+
}
|
647 |
+
else if (ringSize == 5) { // 5 member ring
|
648 |
+
switch (fusionRingShape) {
|
649 |
+
case enterFromLeftHouse:
|
650 |
+
if (dist ==1){
|
651 |
+
dir = -2;//fusion to an elongated bond
|
652 |
+
}
|
653 |
+
else if (dist ==2){
|
654 |
+
dir = 0;
|
655 |
+
}
|
656 |
+
else if (dist ==3){
|
657 |
+
dir = 1;
|
658 |
+
}
|
659 |
+
else if (dist ==4){
|
660 |
+
dir = 3;
|
661 |
+
}
|
662 |
+
else {
|
663 |
+
throw new RuntimeException("Impossible distance between bonds for a 5 membered ring");
|
664 |
+
}
|
665 |
+
break;
|
666 |
+
case enterFromTopLeftHouse:
|
667 |
+
if (dist ==1){
|
668 |
+
dir = -3;
|
669 |
+
}
|
670 |
+
else if (dist ==2){
|
671 |
+
dir = -1;//fusion to an elongated bond
|
672 |
+
}
|
673 |
+
else if (dist ==3){
|
674 |
+
dir = 1;
|
675 |
+
}
|
676 |
+
else if (dist ==4){
|
677 |
+
dir = 3;
|
678 |
+
}
|
679 |
+
else {
|
680 |
+
throw new RuntimeException("Impossible distance between bonds for a 5 membered ring");
|
681 |
+
}
|
682 |
+
break;
|
683 |
+
case enterFromTopRightHouse:
|
684 |
+
if (dist ==1){
|
685 |
+
dir = -3;
|
686 |
+
}
|
687 |
+
else if (dist ==2){
|
688 |
+
dir = -1;
|
689 |
+
}
|
690 |
+
else if (dist ==3){
|
691 |
+
dir = 1;//fusion to an elongated bond
|
692 |
+
}
|
693 |
+
else if (dist ==4){
|
694 |
+
dir = 3;
|
695 |
+
}
|
696 |
+
else {
|
697 |
+
throw new RuntimeException("Impossible distance between bonds for a 5 membered ring");
|
698 |
+
}
|
699 |
+
break;
|
700 |
+
case enterFromRightHouse:
|
701 |
+
if (dist ==1){
|
702 |
+
dir = -3;
|
703 |
+
}
|
704 |
+
else if (dist ==2){
|
705 |
+
dir = -1;
|
706 |
+
}
|
707 |
+
else if (dist ==3){
|
708 |
+
dir = 0;
|
709 |
+
}
|
710 |
+
else if (dist ==4){
|
711 |
+
dir = 2;//fusion to an elongated bond
|
712 |
+
}
|
713 |
+
else {
|
714 |
+
throw new RuntimeException("Impossible distance between bonds for a 5 membered ring");
|
715 |
+
}
|
716 |
+
break;
|
717 |
+
default :
|
718 |
+
throw new RuntimeException("OPSIN Bug: Unrecognised fusion ring shape for 5 membered ring");
|
719 |
+
}
|
720 |
+
}
|
721 |
+
else if (ringSize == 7) { // 7 member ring
|
722 |
+
switch (fusionRingShape) {
|
723 |
+
case enterFromLeftSevenMembered:
|
724 |
+
if (dist ==1){
|
725 |
+
dir = -3;
|
726 |
+
}
|
727 |
+
else if (dist ==2){
|
728 |
+
dir = -1;
|
729 |
+
}
|
730 |
+
else if (dist ==3){
|
731 |
+
dir = 0;
|
732 |
+
}
|
733 |
+
else if (dist ==4){
|
734 |
+
dir = 1;//fusion to an abnormally angled bond
|
735 |
+
}
|
736 |
+
else if (dist ==5){
|
737 |
+
dir = 2;
|
738 |
+
}
|
739 |
+
else if (dist ==6){
|
740 |
+
dir = 3;//fusion to an abnormally angled bond
|
741 |
+
}
|
742 |
+
else {
|
743 |
+
throw new RuntimeException("Impossible distance between bonds for a 7 membered ring");
|
744 |
+
}
|
745 |
+
break;
|
746 |
+
case enterFromTopSevenMembered:
|
747 |
+
if (dist ==1){
|
748 |
+
dir = -3;//fusion to an abnormally angled bond
|
749 |
+
}
|
750 |
+
else if (dist ==2){
|
751 |
+
dir = -2;
|
752 |
+
}
|
753 |
+
else if (dist ==3){
|
754 |
+
dir = -1;
|
755 |
+
}
|
756 |
+
else if (dist ==4){
|
757 |
+
dir = 1;
|
758 |
+
}
|
759 |
+
else if (dist ==5){
|
760 |
+
dir = 2;
|
761 |
+
}
|
762 |
+
else if (dist ==6){
|
763 |
+
dir = 3;//fusion to an abnormally angled bond
|
764 |
+
}
|
765 |
+
else {
|
766 |
+
throw new RuntimeException("Impossible distance between bonds for a 7 membered ring");
|
767 |
+
}
|
768 |
+
break;
|
769 |
+
case enterFromRightSevenMembered:
|
770 |
+
if (dist ==1){
|
771 |
+
dir = -3;//fusion to an abnormally angled bond
|
772 |
+
}
|
773 |
+
else if (dist ==2){
|
774 |
+
dir = -2;
|
775 |
+
}
|
776 |
+
else if (dist ==3){
|
777 |
+
dir = -1;//fusion to an abnormally angled bond
|
778 |
+
}
|
779 |
+
else if (dist ==4){
|
780 |
+
dir = 0;
|
781 |
+
}
|
782 |
+
else if (dist ==5){
|
783 |
+
dir = 1;
|
784 |
+
}
|
785 |
+
else if (dist ==6){
|
786 |
+
dir = 3;
|
787 |
+
}
|
788 |
+
else {
|
789 |
+
throw new RuntimeException("Impossible distance between bonds for a 7 membered ring");
|
790 |
+
}
|
791 |
+
break;
|
792 |
+
case enterFromBottomRightSevenMembered:
|
793 |
+
if (dist ==1){
|
794 |
+
dir = -3;
|
795 |
+
}
|
796 |
+
else if (dist ==2){
|
797 |
+
dir = -2;//fusion to an abnormally angled bond
|
798 |
+
}
|
799 |
+
else if (dist ==3){
|
800 |
+
dir = -1;
|
801 |
+
}
|
802 |
+
else if (dist ==4){
|
803 |
+
dir = 0;//fusion to an abnormally angled bond
|
804 |
+
}
|
805 |
+
else if (dist ==5){
|
806 |
+
dir = 1;
|
807 |
+
}
|
808 |
+
else if (dist ==6){
|
809 |
+
dir = 3;
|
810 |
+
}
|
811 |
+
else {
|
812 |
+
throw new RuntimeException("Impossible distance between bonds for a 7 membered ring");
|
813 |
+
}
|
814 |
+
break;
|
815 |
+
case enterFromBottomLeftSevenMembered:
|
816 |
+
if (dist ==1){
|
817 |
+
dir = -3;
|
818 |
+
}
|
819 |
+
else if (dist ==2){
|
820 |
+
dir = -1;
|
821 |
+
}
|
822 |
+
else if (dist ==3){
|
823 |
+
dir = 0;//fusion to an abnormally angled bond
|
824 |
+
}
|
825 |
+
else if (dist ==4){
|
826 |
+
dir = 1;
|
827 |
+
}
|
828 |
+
else if (dist ==5){
|
829 |
+
dir = 2;//fusion to an abnormally angled bond
|
830 |
+
}
|
831 |
+
else if (dist ==6){
|
832 |
+
dir = 3;
|
833 |
+
}
|
834 |
+
else {
|
835 |
+
throw new RuntimeException("Impossible distance between bonds for a 7 membered ring");
|
836 |
+
}
|
837 |
+
break;
|
838 |
+
default:
|
839 |
+
throw new RuntimeException("OPSIN Bug: Unrecognised fusion ring shape for 7 membered ring");
|
840 |
+
}
|
841 |
+
}
|
842 |
+
else if (ringSize % 2 == 0) {//general case even number of atoms ring (a 6 membered ring or distortion of)
|
843 |
+
if (dist == 1) {
|
844 |
+
dir = -3;
|
845 |
+
}
|
846 |
+
else if (dist == ringSize-1) {
|
847 |
+
dir = 3;
|
848 |
+
}
|
849 |
+
else {
|
850 |
+
dir = dist - ringSize/2;
|
851 |
+
if (Math.abs(dir) > 2 && ringSize >= 8){// 8 and more neighbours
|
852 |
+
dir = -2 * Integer.signum(dir);
|
853 |
+
}
|
854 |
+
}
|
855 |
+
}
|
856 |
+
else {// general case odd number of atoms ring (distortion of an even numbered ring by insertion of one atom).
|
857 |
+
if (dist == 1) {
|
858 |
+
dir = -3;
|
859 |
+
}
|
860 |
+
else if (dist == ringSize/2 || dist == ringSize/2 + 1) {//0 in both cases as effectively we are using a different depiction of the ring system. See FR-5.1.1 (this is done to give the longest horizontal row)
|
861 |
+
dir = 0;
|
862 |
+
}
|
863 |
+
else if (dist == ringSize-1) {
|
864 |
+
dir = 3;
|
865 |
+
}
|
866 |
+
else if(dist < ringSize/2) {
|
867 |
+
dir = -2;
|
868 |
+
}
|
869 |
+
else if(dist > ringSize/2+1) {
|
870 |
+
dir = 2;
|
871 |
+
}
|
872 |
+
else{
|
873 |
+
throw new RuntimeException("OPSIN Bug: Unable to determine direction between odd number of atoms ring and next ring");
|
874 |
+
}
|
875 |
+
}
|
876 |
+
return dir;
|
877 |
+
}
|
878 |
+
|
879 |
+
private static void removeCTsWithDistortedRingShapes(List<RingConnectivityTable> cts) {
|
880 |
+
Map<RingConnectivityTable, List<Integer>> ctToDistortedRings = new HashMap<>();
|
881 |
+
for (RingConnectivityTable ct : cts) {
|
882 |
+
List<Integer> distortedRingSizes = new ArrayList<>();
|
883 |
+
ctToDistortedRings.put(ct, distortedRingSizes);
|
884 |
+
List<RingShape> ringShapes = ct.ringShapes;
|
885 |
+
for (int i = 0; i < ringShapes.size(); i++) {
|
886 |
+
Ring r1 = ringShapes.get(i).getRing();
|
887 |
+
Ring r2 = ct.neighbouringRings.get(i);
|
888 |
+
for (int j = i +1; j < ringShapes.size(); j++) {
|
889 |
+
if (ringShapes.get(j).getRing().equals(r2) && ct.neighbouringRings.get(j).equals(r1)){//look for the reverse entry in the ring connection table
|
890 |
+
int expectedDir = getOppositeDirection(ct.directionFromRingToNeighbouringRing.get(i));
|
891 |
+
if (expectedDir != ct.directionFromRingToNeighbouringRing.get(j)){
|
892 |
+
distortedRingSizes.add(r2.size());
|
893 |
+
}
|
894 |
+
}
|
895 |
+
}
|
896 |
+
}
|
897 |
+
}
|
898 |
+
int minDistortedRings = Integer.MAX_VALUE;//find the minimum number of distorted rings
|
899 |
+
for (List<Integer> distortedRingSizes : ctToDistortedRings.values()) {
|
900 |
+
if (distortedRingSizes.size() < minDistortedRings){
|
901 |
+
minDistortedRings = distortedRingSizes.size();
|
902 |
+
}
|
903 |
+
}
|
904 |
+
for (int i = cts.size()-1; i>=0; i--) {
|
905 |
+
if (ctToDistortedRings.get(cts.get(i)).size()>minDistortedRings){
|
906 |
+
cts.remove(i);
|
907 |
+
}
|
908 |
+
}
|
909 |
+
}
|
910 |
+
|
911 |
+
/**
|
912 |
+
* Given a list of cts find the longest chain of rings in a line. This can be used to find a possible horizontal row
|
913 |
+
* The output is a map between the connection tables and the directions which give the longest chains
|
914 |
+
* Some cts may have no directions that give a chain of rings of this length
|
915 |
+
*
|
916 |
+
* @param cts
|
917 |
+
* @return
|
918 |
+
*/
|
919 |
+
private static Map<RingConnectivityTable, List<Integer>> findLongestChainDirections(List<RingConnectivityTable> cts){
|
920 |
+
Map<RingConnectivityTable, List<Integer>> horizonalRowDirections = new LinkedHashMap<>();
|
921 |
+
int maxChain = 0;
|
922 |
+
for (RingConnectivityTable ct : cts) {
|
923 |
+
if (ct.ringShapes.size() != ct.neighbouringRings.size() || ct.neighbouringRings.size() != ct.directionFromRingToNeighbouringRing.size()) {
|
924 |
+
throw new RuntimeException("OPSIN Bug: Sizes of arrays in fused ring numbering connection table are not equal");
|
925 |
+
}
|
926 |
+
int ctEntriesSize = ct.ringShapes.size();
|
927 |
+
List<Integer> directions = new ArrayList<>();
|
928 |
+
horizonalRowDirections.put(ct, directions);
|
929 |
+
|
930 |
+
for (int i = 0; i < ctEntriesSize; i++) {
|
931 |
+
Ring neighbour = ct.neighbouringRings.get(i);
|
932 |
+
int curChain = 1;
|
933 |
+
int curDir = ct.directionFromRingToNeighbouringRing.get(i);
|
934 |
+
|
935 |
+
nextRingInChainLoop: for (int k = 0; k <= ct.usedRings.size(); k++) {//<= rather than < so buggy behaviour can be caught
|
936 |
+
int indexOfNeighbour = indexOfCorrespondingRingshape(ct.ringShapes, neighbour);
|
937 |
+
|
938 |
+
if (indexOfNeighbour >= 0) {
|
939 |
+
for (int j = indexOfNeighbour; j < ctEntriesSize; j++) {
|
940 |
+
if (ct.ringShapes.get(j).getRing() == neighbour && ct.directionFromRingToNeighbouringRing.get(j) == curDir) {
|
941 |
+
curChain++;
|
942 |
+
neighbour = ct.neighbouringRings.get(j);
|
943 |
+
continue nextRingInChainLoop;
|
944 |
+
}
|
945 |
+
}
|
946 |
+
}
|
947 |
+
else{
|
948 |
+
throw new RuntimeException("OPSIN bug: fused ring numbering: Ring missing from connection table");
|
949 |
+
}
|
950 |
+
if (curChain >= maxChain ) {
|
951 |
+
int oDir = getOppositeDirection(curDir);
|
952 |
+
if(curChain > maxChain){//new longest chain found
|
953 |
+
for (List<Integer> previousDirections: horizonalRowDirections.values()) {
|
954 |
+
previousDirections.clear();
|
955 |
+
}
|
956 |
+
}
|
957 |
+
// if we has this direction before or its opposite, it is the same orientation
|
958 |
+
if(curChain > maxChain || (!directions.contains(curDir) && !directions.contains(oDir))) {
|
959 |
+
directions.add(curDir);
|
960 |
+
}
|
961 |
+
maxChain = curChain;
|
962 |
+
}
|
963 |
+
break;
|
964 |
+
}
|
965 |
+
if (maxChain > ct.usedRings.size()){
|
966 |
+
throw new RuntimeException("OPSIN bug: fused ring layout contained a loop: more rings in a chain than there were rings!");
|
967 |
+
}
|
968 |
+
}
|
969 |
+
}
|
970 |
+
return horizonalRowDirections;
|
971 |
+
}
|
972 |
+
|
973 |
+
/**
|
974 |
+
* Given a list of ringShapes finds the indice of the ringShape corresponding to the given ring
|
975 |
+
* returns -1 if this is not possible
|
976 |
+
* @param ringShapes
|
977 |
+
* @param ring
|
978 |
+
* @return
|
979 |
+
*/
|
980 |
+
private static int indexOfCorrespondingRingshape(List<RingShape> ringShapes, Ring ring) {
|
981 |
+
for (int i = 0; i < ringShapes.size(); i++) {
|
982 |
+
if (ringShapes.get(i).getRing().equals(ring)){
|
983 |
+
return i;
|
984 |
+
}
|
985 |
+
}
|
986 |
+
return -1;
|
987 |
+
}
|
988 |
+
|
989 |
+
|
990 |
+
/**
|
991 |
+
* For each RingConnectivityTable and for each horizontal row direction creates a ringMap aligned along the given horizontal row direction
|
992 |
+
* @param horizonalRowDirectionsMap
|
993 |
+
* @return
|
994 |
+
* @throws StructureBuildingException
|
995 |
+
*/
|
996 |
+
private static List<Ring[][]> createRingMapsAlignedAlongGivenhorizonalRowDirections(Map<RingConnectivityTable, List<Integer>> horizonalRowDirectionsMap) throws StructureBuildingException {
|
997 |
+
List<Ring[][]> ringMaps = new ArrayList<>();
|
998 |
+
for (Entry<RingConnectivityTable, List<Integer>> entry : horizonalRowDirectionsMap.entrySet()) {
|
999 |
+
RingConnectivityTable ct = entry.getKey();
|
1000 |
+
if ( ct.ringShapes.size() != ct.neighbouringRings.size() || ct.neighbouringRings.size() != ct.directionFromRingToNeighbouringRing.size() || ct.ringShapes.size() <= 0) {
|
1001 |
+
throw new RuntimeException("OPSIN Bug: Sizes of arrays in fused ring numbering connection table are not equal");
|
1002 |
+
}
|
1003 |
+
int ctEntriesSize = ct.ringShapes.size();
|
1004 |
+
for (Integer horizonalRowDirection : entry.getValue()) {
|
1005 |
+
int[] directionFromRingToNeighbouringRing = new int[ctEntriesSize];
|
1006 |
+
// turn the ring system such as to be aligned along the horizonalRowDirection
|
1007 |
+
for(int i=0; i<ctEntriesSize; i++){
|
1008 |
+
RingShape ringShape = ct.ringShapes.get(i);
|
1009 |
+
directionFromRingToNeighbouringRing[i] = determineAbsoluteDirectionUsingPreviousDirection(ringShape.getShape(), ringShape.getRing().size(), ct.directionFromRingToNeighbouringRing.get(i), -horizonalRowDirection);
|
1010 |
+
}
|
1011 |
+
Ring[][] ringMap = generateRingMap(ct, directionFromRingToNeighbouringRing);
|
1012 |
+
if (ringMap !=null){//null if overlapping bonds rings present
|
1013 |
+
ringMaps.add(ringMap);
|
1014 |
+
}
|
1015 |
+
}
|
1016 |
+
}
|
1017 |
+
if (ringMaps.size()==0){
|
1018 |
+
throw new StructureBuildingException("Fused ring systems with overlapping rings such as in helices cannot currently be numbered");
|
1019 |
+
}
|
1020 |
+
return ringMaps;
|
1021 |
+
}
|
1022 |
+
|
1023 |
+
/**
|
1024 |
+
* Applies FR5.2 B, C and D to determine the preferred orientation and returns lists of potential peripheral atom orderings
|
1025 |
+
* @param ringMaps
|
1026 |
+
* @param atomCountOfFusedRingSystem
|
1027 |
+
* @return
|
1028 |
+
*/
|
1029 |
+
private static List<List<Atom>> findPossiblePaths(List<Ring[][]> ringMaps, int atomCountOfFusedRingSystem){
|
1030 |
+
List<Double[]> chainQs = new ArrayList<>();
|
1031 |
+
List<Ring[][]> correspondingRingMap = new ArrayList<>();
|
1032 |
+
for (Ring[][] ringMap : ringMaps) {
|
1033 |
+
List<Chain> chains = findChainsOfMaximumLengthInHorizontalDir(ringMap);
|
1034 |
+
// For each chain count the number of rings in each quadrant
|
1035 |
+
for (Chain chain : chains) {
|
1036 |
+
int midChainXcoord = chain.getLength() + chain.getStartingX() - 1;//Remember the X axis is measured in 1/2s so don't need to 1/2 length
|
1037 |
+
|
1038 |
+
Double[] qs = countQuadrants(ringMap, midChainXcoord, chain.getY());
|
1039 |
+
chainQs.add(qs);
|
1040 |
+
correspondingRingMap.add(ringMap);
|
1041 |
+
}
|
1042 |
+
}
|
1043 |
+
|
1044 |
+
/*
|
1045 |
+
* The quadrant numbers are as follows:
|
1046 |
+
*
|
1047 |
+
* 1 | 0
|
1048 |
+
* ----+----
|
1049 |
+
* 2 | 3
|
1050 |
+
*
|
1051 |
+
* But at this stage it is not known what the mapping between these numbers and the/a preferred orientation of the structure is
|
1052 |
+
*/
|
1053 |
+
// order for each right corner candidates for each chain
|
1054 |
+
List<List<Integer>> allowedUpperRightQuadrantsForEachChain =rulesBCD(chainQs);
|
1055 |
+
|
1056 |
+
List<List<Atom>> paths = new ArrayList<> ();
|
1057 |
+
for (int c=0; c < chainQs.size(); c++) {
|
1058 |
+
Ring[][] ringMap = correspondingRingMap.get(c);
|
1059 |
+
List<Integer> allowedUpperRightQuadrants = allowedUpperRightQuadrantsForEachChain.get(c);
|
1060 |
+
|
1061 |
+
for (Integer upperRightQuadrant : allowedUpperRightQuadrants) {
|
1062 |
+
Ring[][] qRingMap = transformQuadrantToUpperRightOfRingMap(ringMap, upperRightQuadrant);
|
1063 |
+
if (LOG.isTraceEnabled()){
|
1064 |
+
debugRingMap(qRingMap);
|
1065 |
+
}
|
1066 |
+
boolean inverseAtoms = (upperRightQuadrant == 2 || upperRightQuadrant == 0);
|
1067 |
+
List<Atom> peripheralAtomPath = orderAtoms(qRingMap, inverseAtoms, atomCountOfFusedRingSystem);
|
1068 |
+
paths.add(peripheralAtomPath);
|
1069 |
+
}
|
1070 |
+
}
|
1071 |
+
|
1072 |
+
return paths;
|
1073 |
+
}
|
1074 |
+
|
1075 |
+
private static Ring[][] generateRingMap(RingConnectivityTable ct, int[] directionFromRingToNeighbouringRing) {
|
1076 |
+
int ctEntriesSize = ct.ringShapes.size();
|
1077 |
+
// Find max and min coordinates for ringMap
|
1078 |
+
// we put the first ring into takenRings to start with it in the connection table
|
1079 |
+
int nRings = ct.usedRings.size();
|
1080 |
+
int[][] coordinates = new int[nRings][]; // correspondent to usedRings
|
1081 |
+
Ring[] takenRings = new Ring[nRings];
|
1082 |
+
int takenRingsCnt = 0;
|
1083 |
+
int maxX = 0;
|
1084 |
+
int minX = 0;
|
1085 |
+
int maxY = 0;
|
1086 |
+
int minY = 0;
|
1087 |
+
|
1088 |
+
takenRings[takenRingsCnt++] = ct.ringShapes.get(0).getRing();
|
1089 |
+
coordinates[0] = new int[]{0,0};
|
1090 |
+
|
1091 |
+
// Go through the rings in a system
|
1092 |
+
// Find the rings connected to them and assign coordinates according to the direction
|
1093 |
+
// Each time we go to the ring, whose coordinates were already identified.
|
1094 |
+
for(int tr=0; tr<nRings-1; tr++) {
|
1095 |
+
Ring currentRing = takenRings[tr];
|
1096 |
+
if (currentRing == null){
|
1097 |
+
throw new RuntimeException("OPSIN bug: Unexpected null ring in fused ring numbering");
|
1098 |
+
}
|
1099 |
+
|
1100 |
+
int indexOfCurrentRing = indexOfCorrespondingRingshape(ct.ringShapes, currentRing);
|
1101 |
+
|
1102 |
+
int xy[] = coordinates[tr]; // find the correspondent coordinates for the ring
|
1103 |
+
|
1104 |
+
if (indexOfCurrentRing >= 0) {
|
1105 |
+
for (int j=indexOfCurrentRing; j< ctEntriesSize; j++) {
|
1106 |
+
if (ct.ringShapes.get(j).getRing() == currentRing) {
|
1107 |
+
Ring neighbour = ct.neighbouringRings.get(j);
|
1108 |
+
if (arrayContains(takenRings, neighbour)) {
|
1109 |
+
continue;
|
1110 |
+
}
|
1111 |
+
|
1112 |
+
int[] newXY = new int[2];
|
1113 |
+
newXY[0] = xy[0] + Math.round(2 * countDX(directionFromRingToNeighbouringRing[j]));
|
1114 |
+
newXY[1] = xy[1] + countDY(directionFromRingToNeighbouringRing[j]);
|
1115 |
+
|
1116 |
+
if(takenRingsCnt > takenRings.length) {
|
1117 |
+
throw new RuntimeException("OPSIN Bug: Fused ring numbering bug");
|
1118 |
+
}
|
1119 |
+
takenRings[takenRingsCnt] = neighbour;
|
1120 |
+
coordinates[takenRingsCnt] = newXY;
|
1121 |
+
takenRingsCnt++;
|
1122 |
+
|
1123 |
+
if (newXY[0] > maxX){
|
1124 |
+
maxX = newXY[0];
|
1125 |
+
}
|
1126 |
+
else if (newXY[0] < minX) {
|
1127 |
+
minX = newXY[0];
|
1128 |
+
}
|
1129 |
+
|
1130 |
+
if (newXY[1] > maxY){
|
1131 |
+
maxY = newXY[1];
|
1132 |
+
}
|
1133 |
+
else if (newXY[1] < minY) {
|
1134 |
+
minY = newXY[1];
|
1135 |
+
}
|
1136 |
+
}
|
1137 |
+
}
|
1138 |
+
}
|
1139 |
+
else{
|
1140 |
+
throw new RuntimeException("OPSIN bug: fused ring numbering: Ring missing from connection table");
|
1141 |
+
}
|
1142 |
+
}
|
1143 |
+
// the height and the width of the map
|
1144 |
+
int h = maxY - minY + 1;
|
1145 |
+
int w = maxX - minX + 1;
|
1146 |
+
|
1147 |
+
Ring[][] ringMap = new Ring[w][h];
|
1148 |
+
|
1149 |
+
// Map rings using coordinates calculated in the previous step, and transform them according to found minX and minY
|
1150 |
+
|
1151 |
+
int ix = -minX;
|
1152 |
+
int iy = -minY;
|
1153 |
+
if (ix >= w || iy >= h) {
|
1154 |
+
throw new RuntimeException("OPSIN Bug: Fused ring numbering bug, Coordinates have been calculated wrongly");
|
1155 |
+
}
|
1156 |
+
|
1157 |
+
int curX = 0;
|
1158 |
+
int curY = 0;
|
1159 |
+
for (int ti = 0; ti < takenRings.length; ti++){
|
1160 |
+
int[] xy = coordinates[ti];
|
1161 |
+
curX = xy[0] - minX;
|
1162 |
+
curY = xy[1] - minY;
|
1163 |
+
if(curX <0 || curX > w || curY < 0 || curY > h) {
|
1164 |
+
throw new RuntimeException("OPSIN Bug: Fused ring numbering bug, Coordinates have been calculated wrongly");
|
1165 |
+
}
|
1166 |
+
if (ringMap[curX][curY] != null){
|
1167 |
+
return null;
|
1168 |
+
}
|
1169 |
+
ringMap[curX][curY] = takenRings[ti];
|
1170 |
+
}
|
1171 |
+
return ringMap;
|
1172 |
+
}
|
1173 |
+
|
1174 |
+
/**
|
1175 |
+
* Finds all the chains of maximum length for the current direction
|
1176 |
+
* @param ringMap
|
1177 |
+
* @return
|
1178 |
+
*/
|
1179 |
+
private static List<Chain> findChainsOfMaximumLengthInHorizontalDir(Ring[][] ringMap){
|
1180 |
+
int w = ringMap.length;
|
1181 |
+
int h = ringMap[0].length;
|
1182 |
+
|
1183 |
+
List<Chain> chains = new ArrayList<>();
|
1184 |
+
|
1185 |
+
int maxChain = 0;
|
1186 |
+
int chain = 0;
|
1187 |
+
|
1188 |
+
// Find the longest chain
|
1189 |
+
for (int j=0; j<h; j++) {
|
1190 |
+
for (int i=0; i<w; i++) {
|
1191 |
+
if(ringMap[i][j] != null) {
|
1192 |
+
chain = 1;
|
1193 |
+
while(i + 2*chain < w && ringMap[i + 2*chain][j] != null ) {
|
1194 |
+
chain++; // *2 because along the x axis the step is 2
|
1195 |
+
}
|
1196 |
+
if (chain > maxChain){
|
1197 |
+
chains.clear();
|
1198 |
+
maxChain = chain;
|
1199 |
+
}
|
1200 |
+
if(chain >= maxChain) {
|
1201 |
+
chains.add(new Chain(chain, i, j));
|
1202 |
+
}
|
1203 |
+
i += 2*chain;
|
1204 |
+
}
|
1205 |
+
}
|
1206 |
+
}
|
1207 |
+
return chains;
|
1208 |
+
}
|
1209 |
+
|
1210 |
+
/**
|
1211 |
+
* Counts number of rings in each quadrant
|
1212 |
+
* @param ringMap
|
1213 |
+
* @param midChainXcoord
|
1214 |
+
* @param yChain
|
1215 |
+
* @return
|
1216 |
+
*/
|
1217 |
+
private static Double[] countQuadrants(Ring[][] ringMap, int midChainXcoord, int yChain){
|
1218 |
+
Double[] qs = new Double[4];
|
1219 |
+
qs[0] = 0d;
|
1220 |
+
qs[1] = 0d;
|
1221 |
+
qs[2] = 0d;
|
1222 |
+
qs[3] = 0d;
|
1223 |
+
int w = ringMap.length;
|
1224 |
+
int h = ringMap[0].length;
|
1225 |
+
|
1226 |
+
// Count rings in each quadrants
|
1227 |
+
for (int x=0; x<w; x++) {
|
1228 |
+
for (int y=0; y<h; y++) {
|
1229 |
+
if (ringMap[x][y] == null) {
|
1230 |
+
continue;
|
1231 |
+
}
|
1232 |
+
|
1233 |
+
if (x == midChainXcoord || y == yChain ) {// if the ring is on the axis
|
1234 |
+
if( x == midChainXcoord && y > yChain ) {
|
1235 |
+
qs[0]+=0.5;
|
1236 |
+
qs[1]+=0.5;
|
1237 |
+
}
|
1238 |
+
else if( x == midChainXcoord && y < yChain ) {
|
1239 |
+
qs[2]+=0.5;
|
1240 |
+
qs[3]+=0.5;
|
1241 |
+
}
|
1242 |
+
else if( x < midChainXcoord && y == yChain ) {
|
1243 |
+
qs[1]+=0.5;
|
1244 |
+
qs[2]+=0.5;
|
1245 |
+
}
|
1246 |
+
else if( x > midChainXcoord && y == yChain ) {
|
1247 |
+
qs[0]+=0.5;
|
1248 |
+
qs[3]+=0.5;
|
1249 |
+
}
|
1250 |
+
if (x==midChainXcoord && y==yChain ){
|
1251 |
+
qs[0]+=0.25;
|
1252 |
+
qs[1]+=0.25;
|
1253 |
+
qs[2]+=0.25;
|
1254 |
+
qs[3]+=0.25;
|
1255 |
+
}
|
1256 |
+
}
|
1257 |
+
else if(x > midChainXcoord && y > yChain) {
|
1258 |
+
qs[0]++;
|
1259 |
+
}
|
1260 |
+
else if(x < midChainXcoord && y > yChain) {
|
1261 |
+
qs[1]++;
|
1262 |
+
}
|
1263 |
+
else if(x < midChainXcoord && y < yChain) {
|
1264 |
+
qs[2]++;
|
1265 |
+
}
|
1266 |
+
else if(x > midChainXcoord && y < yChain) {
|
1267 |
+
qs[3]++;
|
1268 |
+
}
|
1269 |
+
}
|
1270 |
+
}
|
1271 |
+
|
1272 |
+
return qs;
|
1273 |
+
}
|
1274 |
+
|
1275 |
+
/**
|
1276 |
+
* Applying rules FR5.2 B, C and D to the ring system.
|
1277 |
+
* Return a list of possible upper right quadrants for each chain given. A chain may have multiple possible upper right quadrants (due to symmetry)
|
1278 |
+
* or none if other chains can be shown to be preferable by application of the rules
|
1279 |
+
* @param chainQs - array with number of ring in each quadrant for each chain.
|
1280 |
+
*/
|
1281 |
+
private static List<List<Integer>> rulesBCD(List<Double[]> chainQs) {
|
1282 |
+
List<List<Integer>> possibleUpperRightQuadrantsForEachChain = new ArrayList<>();
|
1283 |
+
int nChains = chainQs.size();
|
1284 |
+
if (nChains==0){
|
1285 |
+
throw new RuntimeException("OPSIN Bug: Fused ring numbering, no chains found?");
|
1286 |
+
}
|
1287 |
+
|
1288 |
+
// Rule B: Maximum number of rings in upper right quadrant. Upper right corner candidates (it is not at this stage known which quadrant is the upper right one)
|
1289 |
+
double qmax = 0;
|
1290 |
+
|
1291 |
+
for (Double[] chainQ : chainQs) {
|
1292 |
+
for (int j = 0; j < 4; j++) {
|
1293 |
+
Double q = chainQ[j];
|
1294 |
+
if(q > qmax) {
|
1295 |
+
qmax = q;
|
1296 |
+
}
|
1297 |
+
}
|
1298 |
+
}
|
1299 |
+
|
1300 |
+
for (Double[] chainQ : chainQs) {
|
1301 |
+
List<Integer> allowedUpperRightQuadrants = new ArrayList<>();
|
1302 |
+
for (int j = 0; j < 4; j++){
|
1303 |
+
if (chainQ[j] == qmax) {
|
1304 |
+
allowedUpperRightQuadrants.add(j);
|
1305 |
+
}
|
1306 |
+
}
|
1307 |
+
possibleUpperRightQuadrantsForEachChain.add(allowedUpperRightQuadrants);
|
1308 |
+
}
|
1309 |
+
|
1310 |
+
// Rule C: Minimum number of rings in lower left quadrant
|
1311 |
+
double qmin = Double.MAX_VALUE;
|
1312 |
+
|
1313 |
+
for (int c = 0; c < nChains; c++) {
|
1314 |
+
List<Integer> possibleUpperRightQuadrant = possibleUpperRightQuadrantsForEachChain.get(c);
|
1315 |
+
for (Integer upperRightQuad : possibleUpperRightQuadrant) {
|
1316 |
+
int qdiagonal = (upperRightQuad + 2) % 4;
|
1317 |
+
if (chainQs.get(c)[qdiagonal] < qmin){
|
1318 |
+
qmin = chainQs.get(c)[qdiagonal];
|
1319 |
+
}
|
1320 |
+
}
|
1321 |
+
}
|
1322 |
+
for (int c = 0; c < nChains; c++) {
|
1323 |
+
List<Integer> possibleUpperRightQuadrant = possibleUpperRightQuadrantsForEachChain.get(c);
|
1324 |
+
List<Integer> allowedUpperRightQuadrants = new ArrayList<>();
|
1325 |
+
for (Integer upperRightQuad : possibleUpperRightQuadrant) {
|
1326 |
+
int qdiagonal = (upperRightQuad + 2) % 4;
|
1327 |
+
if (chainQs.get(c)[qdiagonal]==qmin) {
|
1328 |
+
allowedUpperRightQuadrants.add(upperRightQuad);
|
1329 |
+
}
|
1330 |
+
}
|
1331 |
+
possibleUpperRightQuadrantsForEachChain.set(c, allowedUpperRightQuadrants);
|
1332 |
+
}
|
1333 |
+
|
1334 |
+
// Rule D: Maximum number of rings above the horizontal row
|
1335 |
+
double rMax = 0;
|
1336 |
+
for (int c = 0; c < nChains; c++) {
|
1337 |
+
List<Integer> possibleUpperRightQuadrant = possibleUpperRightQuadrantsForEachChain.get(c);
|
1338 |
+
for (Integer upperRightQuad : possibleUpperRightQuadrant) {
|
1339 |
+
int upperLeftQuad;
|
1340 |
+
if (upperRightQuad % 2 == 0) {
|
1341 |
+
upperLeftQuad = upperRightQuad + 1;
|
1342 |
+
}
|
1343 |
+
else {
|
1344 |
+
upperLeftQuad = upperRightQuad - 1;
|
1345 |
+
}
|
1346 |
+
|
1347 |
+
if (chainQs.get(c)[upperLeftQuad] + chainQs.get(c)[upperRightQuad] > rMax) {
|
1348 |
+
rMax = chainQs.get(c)[upperLeftQuad] + chainQs.get(c)[upperRightQuad];
|
1349 |
+
}
|
1350 |
+
}
|
1351 |
+
}
|
1352 |
+
for (int c = 0; c < nChains; c++) {
|
1353 |
+
List<Integer> possibleUpperRightQuadrant = possibleUpperRightQuadrantsForEachChain.get(c);
|
1354 |
+
List<Integer> allowedUpperRightQuadrants = new ArrayList<>();
|
1355 |
+
for (Integer upperRightQuad : possibleUpperRightQuadrant) {
|
1356 |
+
int upperLeftQuad;
|
1357 |
+
if (upperRightQuad % 2 == 0) {
|
1358 |
+
upperLeftQuad = upperRightQuad + 1;
|
1359 |
+
}
|
1360 |
+
else {
|
1361 |
+
upperLeftQuad = upperRightQuad - 1;
|
1362 |
+
}
|
1363 |
+
|
1364 |
+
if (chainQs.get(c)[upperLeftQuad] + chainQs.get(c)[upperRightQuad] == rMax) {
|
1365 |
+
allowedUpperRightQuadrants.add(upperRightQuad);
|
1366 |
+
}
|
1367 |
+
}
|
1368 |
+
possibleUpperRightQuadrantsForEachChain.set(c, allowedUpperRightQuadrants);
|
1369 |
+
}
|
1370 |
+
return possibleUpperRightQuadrantsForEachChain;
|
1371 |
+
}
|
1372 |
+
|
1373 |
+
/**
|
1374 |
+
* Enumerates the peripheral atoms in a system in accordance with FR-5.3:
|
1375 |
+
* First finds the uppermost right ring, takes the next neighbour in the clockwise direction, and so on until the starting atom is reached
|
1376 |
+
* @param ringMap
|
1377 |
+
* @param inverseAtoms The direction in which the periphery atoms should be enumerated. Anticlockwise by default
|
1378 |
+
* @param atomCountOfFusedRingSystem
|
1379 |
+
* @return
|
1380 |
+
*/
|
1381 |
+
private static List<Atom> orderAtoms(Ring[][] ringMap, boolean inverseAtoms, int atomCountOfFusedRingSystem){
|
1382 |
+
int w = ringMap.length;
|
1383 |
+
int h = ringMap[0].length;
|
1384 |
+
|
1385 |
+
// find upper right ring
|
1386 |
+
Ring upperRightRing = null;
|
1387 |
+
for (int i=w-1; i>=0; i--) {
|
1388 |
+
if (ringMap[i][h-1] != null) {
|
1389 |
+
upperRightRing = ringMap[i][h-1];
|
1390 |
+
break;
|
1391 |
+
}
|
1392 |
+
}
|
1393 |
+
if (upperRightRing == null) {
|
1394 |
+
throw new RuntimeException("OPSIN Bug: Upper right ring not found when performing fused ring numbering");
|
1395 |
+
}
|
1396 |
+
List<Ring> visitedRings = new ArrayList<>();
|
1397 |
+
visitedRings.add(upperRightRing);
|
1398 |
+
while (isEntirelyFusionAtoms(upperRightRing)){//c.f cyclopropa[de]anthracene
|
1399 |
+
upperRightRing = findClockwiseRingFromUpperRightRing(ringMap, upperRightRing, visitedRings);
|
1400 |
+
if (upperRightRing==null){
|
1401 |
+
throw new RuntimeException("OPSIN Bug: Unabled to find clockwise ring without fusion atoms");
|
1402 |
+
}
|
1403 |
+
visitedRings.add(upperRightRing);
|
1404 |
+
}
|
1405 |
+
|
1406 |
+
Ring prevRing = findUpperLeftNeighbourOfUpperRightRing(ringMap, upperRightRing);
|
1407 |
+
Bond prevBond = findFusionBond(upperRightRing, prevRing);
|
1408 |
+
Bond nextBond = null;
|
1409 |
+
|
1410 |
+
Ring currentRing = upperRightRing;
|
1411 |
+
Ring nextRing = null;
|
1412 |
+
List<Atom> atomPath = new ArrayList<>();
|
1413 |
+
int count = 0;
|
1414 |
+
mainLoop: for (; count <= atomCountOfFusedRingSystem; count++) {
|
1415 |
+
int ringSize = currentRing.size();
|
1416 |
+
|
1417 |
+
int startingBondIndex = currentRing.getBondIndex(prevBond) ;
|
1418 |
+
|
1419 |
+
List<Bond> cyclicBonds = currentRing.getCyclicBondList();
|
1420 |
+
List<Bond> fusedBonds = currentRing.getFusedBonds();
|
1421 |
+
if (!inverseAtoms) {
|
1422 |
+
for(int bondIndex = 0; bondIndex < ringSize; bondIndex++) {
|
1423 |
+
int i = (startingBondIndex + bondIndex + 1) % ringSize; // +1 because we start from the bond next to stBond and end with it
|
1424 |
+
// if this bond is fused then it indicates the next ring to move to
|
1425 |
+
Bond bond = cyclicBonds.get(i);
|
1426 |
+
if(fusedBonds.contains(bond)) {
|
1427 |
+
nextBond = bond;
|
1428 |
+
break;
|
1429 |
+
}
|
1430 |
+
}
|
1431 |
+
}
|
1432 |
+
else {
|
1433 |
+
for(int bondIndex = 0; bondIndex < ringSize; bondIndex++) {
|
1434 |
+
int i = (startingBondIndex - bondIndex -1 + ringSize) % ringSize; // -1 because we start from the bond next to stBond and end with it
|
1435 |
+
// if this bond is fused then it indicates the next ring to move to
|
1436 |
+
Bond bond = cyclicBonds.get(i);
|
1437 |
+
if(fusedBonds.contains(bond)) {
|
1438 |
+
nextBond = bond;
|
1439 |
+
break;
|
1440 |
+
}
|
1441 |
+
}
|
1442 |
+
}
|
1443 |
+
if (nextBond == null) {
|
1444 |
+
throw new RuntimeException("OPSIN Bug: None of the bonds from this ring were fused, but this is not possible ");
|
1445 |
+
}
|
1446 |
+
|
1447 |
+
// next ring
|
1448 |
+
nextRing = currentRing.getNeighbourOfFusedBond(nextBond);
|
1449 |
+
|
1450 |
+
int endNumber = currentRing.getBondIndex(nextBond) ;
|
1451 |
+
|
1452 |
+
// Add atoms in order, considering inverse or not inverse
|
1453 |
+
if (!inverseAtoms) {
|
1454 |
+
// if distance between prev bond and cur bond = 1 (it means that fused bonds are next to each other) i.e. come under interior atom numbering
|
1455 |
+
// we don't add that atom, cause it was added already
|
1456 |
+
if ( (endNumber - startingBondIndex + ringSize) % ringSize != 1) {
|
1457 |
+
startingBondIndex = (startingBondIndex + 1) % ringSize;
|
1458 |
+
endNumber = (endNumber - 1 + ringSize ) % ringSize;
|
1459 |
+
if (startingBondIndex > endNumber) {
|
1460 |
+
endNumber += ringSize;
|
1461 |
+
}
|
1462 |
+
|
1463 |
+
// start from the atom next to fusion
|
1464 |
+
for (int j = startingBondIndex; j <= endNumber; j++) {
|
1465 |
+
Atom atom = currentRing.getCyclicAtomList().get(j % ringSize);
|
1466 |
+
if (atomPath.contains(atom)) {
|
1467 |
+
break mainLoop;
|
1468 |
+
}
|
1469 |
+
atomPath.add(atom);
|
1470 |
+
}
|
1471 |
+
}
|
1472 |
+
}
|
1473 |
+
else {
|
1474 |
+
if ( ( startingBondIndex - endNumber + ringSize) % ringSize != 1) {
|
1475 |
+
startingBondIndex = (startingBondIndex - 2 + ringSize ) % ringSize;
|
1476 |
+
endNumber = endNumber % ringSize;
|
1477 |
+
if (startingBondIndex < endNumber) {
|
1478 |
+
startingBondIndex += ringSize;
|
1479 |
+
}
|
1480 |
+
|
1481 |
+
for (int j = startingBondIndex; j >= endNumber; j-- ) {
|
1482 |
+
Atom atom = currentRing.getCyclicAtomList().get(j % ringSize);
|
1483 |
+
if (atomPath.contains(atom)) {
|
1484 |
+
break mainLoop;
|
1485 |
+
}
|
1486 |
+
atomPath.add(atom);
|
1487 |
+
}
|
1488 |
+
}
|
1489 |
+
}
|
1490 |
+
prevBond = nextBond;
|
1491 |
+
prevRing = currentRing;
|
1492 |
+
currentRing = nextRing;
|
1493 |
+
}
|
1494 |
+
if (count ==atomCountOfFusedRingSystem){
|
1495 |
+
throw new RuntimeException("OPSIN Bug: Fused ring numbering may have been stuck in an infinite loop while enumerating peripheral numbering");
|
1496 |
+
}
|
1497 |
+
return atomPath;
|
1498 |
+
}
|
1499 |
+
|
1500 |
+
private static boolean isEntirelyFusionAtoms(Ring upperRightRing) {
|
1501 |
+
List<Atom> atomList = upperRightRing.getAtomList();
|
1502 |
+
for (Atom atom : atomList) {
|
1503 |
+
if (atom.getBondCount() < 3){
|
1504 |
+
return false;
|
1505 |
+
}
|
1506 |
+
}
|
1507 |
+
return true;
|
1508 |
+
}
|
1509 |
+
|
1510 |
+
/**
|
1511 |
+
* Finds the neighbour ring, which is the clockwise of the given ring.
|
1512 |
+
* @param ringMap
|
1513 |
+
* @param upperRightRing
|
1514 |
+
* @param visitedRings
|
1515 |
+
* @return
|
1516 |
+
*/
|
1517 |
+
private static Ring findClockwiseRingFromUpperRightRing (Ring[][] ringMap, Ring upperRightRing, List<Ring> visitedRings){
|
1518 |
+
Ring clockwiseRing = null;
|
1519 |
+
int maxX = 0;
|
1520 |
+
int maxY = 0;
|
1521 |
+
|
1522 |
+
for (Ring ring : upperRightRing.getNeighbours()) {
|
1523 |
+
if (visitedRings.contains(ring)){
|
1524 |
+
continue;
|
1525 |
+
}
|
1526 |
+
int xy[] = findRingPosition(ringMap, ring);
|
1527 |
+
if (xy==null) {
|
1528 |
+
throw new RuntimeException("OPSIN Bug: Ring not found in ringMap when performing fused ring numbering");
|
1529 |
+
}
|
1530 |
+
|
1531 |
+
if (xy[0] > maxX || xy[0] == maxX && xy[1] > maxY ) {
|
1532 |
+
maxX = xy[0];
|
1533 |
+
maxY = xy[1];
|
1534 |
+
clockwiseRing = ring;
|
1535 |
+
}
|
1536 |
+
}
|
1537 |
+
return clockwiseRing;
|
1538 |
+
}
|
1539 |
+
|
1540 |
+
/**
|
1541 |
+
* Finds the neighbour ring, which is the uppermost and on the left side from the given ring. Used to find previous bond for the uppermost right ring, from which we start to enumerate
|
1542 |
+
* @param ringMap
|
1543 |
+
* @param upperRightRing
|
1544 |
+
* @return
|
1545 |
+
*/
|
1546 |
+
private static Ring findUpperLeftNeighbourOfUpperRightRing (Ring[][] ringMap, Ring upperRightRing){
|
1547 |
+
Ring nRing = null;
|
1548 |
+
int minX = Integer.MAX_VALUE;
|
1549 |
+
int maxY = 0;
|
1550 |
+
|
1551 |
+
for (Ring ring : upperRightRing.getNeighbours()) {
|
1552 |
+
// upper left would be previous ring
|
1553 |
+
int xy[] = findRingPosition(ringMap, ring);
|
1554 |
+
if (xy==null) {
|
1555 |
+
throw new RuntimeException("OPSIN Bug: Ring not found in ringMap when performing fused ring numbering");
|
1556 |
+
}
|
1557 |
+
|
1558 |
+
if (xy[1] > maxY || xy[1] == maxY && xy[0] < minX ) {
|
1559 |
+
minX = xy[0];
|
1560 |
+
maxY = xy[1];
|
1561 |
+
nRing = ring;
|
1562 |
+
}
|
1563 |
+
}
|
1564 |
+
return nRing;
|
1565 |
+
}
|
1566 |
+
|
1567 |
+
/**
|
1568 |
+
* Finds the position(i,j) of the ring in the map
|
1569 |
+
* @param ringMap
|
1570 |
+
* @param ring
|
1571 |
+
* @return
|
1572 |
+
*/
|
1573 |
+
private static int[] findRingPosition(Ring[][] ringMap, Ring ring) {
|
1574 |
+
int w = ringMap.length;
|
1575 |
+
int h = ringMap[0].length;
|
1576 |
+
|
1577 |
+
for(int i=0; i<w; i++) {
|
1578 |
+
for(int j=0; j<h; j++) {
|
1579 |
+
if (ringMap[i][j] == ring) {
|
1580 |
+
return new int[]{i,j};
|
1581 |
+
}
|
1582 |
+
}
|
1583 |
+
}
|
1584 |
+
return null;
|
1585 |
+
}
|
1586 |
+
|
1587 |
+
/**
|
1588 |
+
* Transform the map such that the candidate upper right quadrant actually is in the upper right corner
|
1589 |
+
* @param ringMap
|
1590 |
+
* @param upperRightQuadrant
|
1591 |
+
* @return
|
1592 |
+
*/
|
1593 |
+
private static Ring[][] transformQuadrantToUpperRightOfRingMap(Ring[][] ringMap, int upperRightQuadrant){
|
1594 |
+
int w = ringMap.length;
|
1595 |
+
int h = ringMap[0].length;
|
1596 |
+
|
1597 |
+
Ring[][] rearrangedMap = new Ring[w][h];
|
1598 |
+
for (int i=0; i < w; i++) {
|
1599 |
+
for (int j=0; j < h; j++) {
|
1600 |
+
if (upperRightQuadrant == 0) {//already is in the upper right
|
1601 |
+
rearrangedMap[i][j] = ringMap[i][j];
|
1602 |
+
}
|
1603 |
+
if(upperRightQuadrant == 1) {//flip in y axis
|
1604 |
+
rearrangedMap[w-i-1][j] = ringMap[i][j];
|
1605 |
+
}
|
1606 |
+
else if(upperRightQuadrant == 2) {//flip in x and y axes
|
1607 |
+
rearrangedMap[w-i-1][h-j-1] = ringMap[i][j];
|
1608 |
+
}
|
1609 |
+
else if(upperRightQuadrant == 3) {//flip in x axis
|
1610 |
+
rearrangedMap[i][h-j-1] = ringMap[i][j];
|
1611 |
+
}
|
1612 |
+
}
|
1613 |
+
}
|
1614 |
+
|
1615 |
+
return rearrangedMap;
|
1616 |
+
}
|
1617 |
+
|
1618 |
+
/**
|
1619 |
+
* Checks if array contains an object
|
1620 |
+
* @param array
|
1621 |
+
* @param obj
|
1622 |
+
* @return
|
1623 |
+
*/
|
1624 |
+
private static boolean arrayContains(Object[] array, Object obj) {
|
1625 |
+
for (Object arrObj : array) {
|
1626 |
+
if (arrObj == obj) {
|
1627 |
+
return true;
|
1628 |
+
}
|
1629 |
+
}
|
1630 |
+
return false;
|
1631 |
+
}
|
1632 |
+
|
1633 |
+
/**
|
1634 |
+
* Returns a bond which is not a bond that is in two rings
|
1635 |
+
* Preference is given to a bond that is at least a bond away from a fused bond to avoid problems with 5 member rings starting in bad orientations
|
1636 |
+
* @param tRing
|
1637 |
+
* @return
|
1638 |
+
*/
|
1639 |
+
private static Bond getStartingNonFusedBond(Ring tRing){
|
1640 |
+
List<Bond> allBonds = new ArrayList<>(tRing.getBondList());
|
1641 |
+
for (Bond fusedBond : tRing.getFusedBonds()) {
|
1642 |
+
List<Bond> neighbouringBonds = fusedBond.getFromAtom().getBonds();
|
1643 |
+
for (Bond bond : neighbouringBonds) {
|
1644 |
+
allBonds.remove(bond);
|
1645 |
+
}
|
1646 |
+
neighbouringBonds = fusedBond.getToAtom().getBonds();
|
1647 |
+
for (Bond bond : neighbouringBonds) {
|
1648 |
+
allBonds.remove(bond);
|
1649 |
+
}
|
1650 |
+
}
|
1651 |
+
if (allBonds.size() > 0){
|
1652 |
+
return allBonds.get(0);
|
1653 |
+
}
|
1654 |
+
for (Bond bond : tRing.getBondList()) {
|
1655 |
+
if(tRing.getNeighbourOfFusedBond(bond) == null){
|
1656 |
+
// return a non-fused bond
|
1657 |
+
return bond;
|
1658 |
+
}
|
1659 |
+
}
|
1660 |
+
return null;
|
1661 |
+
}
|
1662 |
+
|
1663 |
+
/**
|
1664 |
+
* Given the direction of the bond from ring1 to ring2, returns the opposite direction: from ring2 to ring1
|
1665 |
+
* @param prevDir
|
1666 |
+
* @return
|
1667 |
+
*/
|
1668 |
+
static int getOppositeDirection(int prevDir) {
|
1669 |
+
int dir;
|
1670 |
+
if (prevDir == 0) {
|
1671 |
+
dir = 4;
|
1672 |
+
}
|
1673 |
+
else if (Math.abs(prevDir) == 4){
|
1674 |
+
dir =0;
|
1675 |
+
}
|
1676 |
+
else if (Math.abs(prevDir) == 2){
|
1677 |
+
dir = 2 * -1 * Integer.signum(prevDir);
|
1678 |
+
}
|
1679 |
+
else if (Math.abs(prevDir) == 1){
|
1680 |
+
dir = 3 * -1 * Integer.signum(prevDir);
|
1681 |
+
}
|
1682 |
+
else {//prevDir will be +-3
|
1683 |
+
dir = 1 * -1 * Integer.signum(prevDir);
|
1684 |
+
}
|
1685 |
+
return dir;
|
1686 |
+
}
|
1687 |
+
|
1688 |
+
/**
|
1689 |
+
* Finds the atom connected to the bond, takes into account the order of the bonds and atoms in the ring
|
1690 |
+
* @param ring
|
1691 |
+
* @param curBond
|
1692 |
+
* @return
|
1693 |
+
*/
|
1694 |
+
private static Atom getAtomFromBond(Ring ring, Bond curBond) {
|
1695 |
+
if (ring.getCyclicBondList() == null) {
|
1696 |
+
throw new RuntimeException("The cyclic bond list should already have been generated");
|
1697 |
+
}
|
1698 |
+
int bondIndice= ring.getCyclicBondList().indexOf(curBond);
|
1699 |
+
int atomIndice = ( bondIndice - 1 + ring.size() ) % ring.size();
|
1700 |
+
return ring.getCyclicAtomList().get(atomIndice);
|
1701 |
+
}
|
1702 |
+
|
1703 |
+
/**
|
1704 |
+
* Finds the fusion bond between 2 rings
|
1705 |
+
* @param r1
|
1706 |
+
* @param r2
|
1707 |
+
* @return
|
1708 |
+
*/
|
1709 |
+
private static Bond findFusionBond (Ring r1, Ring r2) {
|
1710 |
+
List<Bond> b2 = r2.getBondList();
|
1711 |
+
for(Bond bond : r1.getBondList()){
|
1712 |
+
if (b2.contains(bond)) {
|
1713 |
+
return bond;
|
1714 |
+
}
|
1715 |
+
}
|
1716 |
+
return null;
|
1717 |
+
}
|
1718 |
+
|
1719 |
+
/**
|
1720 |
+
* Counts delta x distance between previous and next rings
|
1721 |
+
* @param val
|
1722 |
+
* @return
|
1723 |
+
*/
|
1724 |
+
private static float countDX (int val) {
|
1725 |
+
float dX = 0;
|
1726 |
+
if (Math.abs(val) == 1) {
|
1727 |
+
dX += 0.5f;
|
1728 |
+
}
|
1729 |
+
else if (Math.abs(val) == 3) {
|
1730 |
+
dX -= 0.5f;
|
1731 |
+
}
|
1732 |
+
else if (Math.abs(val) == 0) {
|
1733 |
+
dX += 1f;
|
1734 |
+
}
|
1735 |
+
else if (Math.abs(val) == 4) {
|
1736 |
+
dX -= 1f;
|
1737 |
+
}
|
1738 |
+
return dX;
|
1739 |
+
}
|
1740 |
+
|
1741 |
+
/**
|
1742 |
+
* Counts delta y distance (height) between previous and next rings
|
1743 |
+
* @param val
|
1744 |
+
* @return
|
1745 |
+
*/
|
1746 |
+
|
1747 |
+
private static int countDY (int val) {
|
1748 |
+
int dY = 0;
|
1749 |
+
if (Math.abs(val) != 4) {
|
1750 |
+
if (val > 0) {
|
1751 |
+
dY = 1;
|
1752 |
+
}
|
1753 |
+
if (val < 0) {
|
1754 |
+
dY = -1;
|
1755 |
+
}
|
1756 |
+
}
|
1757 |
+
return dY;
|
1758 |
+
}
|
1759 |
+
|
1760 |
+
/**
|
1761 |
+
* Take into account the previous direction to convert the given relative direction into a direction that is absolute for the fused ring system
|
1762 |
+
* @param fusionRingShape
|
1763 |
+
* @param ringSize
|
1764 |
+
* @param relativeDirection
|
1765 |
+
* @param previousDir
|
1766 |
+
* @return
|
1767 |
+
*/
|
1768 |
+
static int determineAbsoluteDirectionUsingPreviousDirection(FusionRingShape fusionRingShape, int ringSize, int relativeDirection, int previousDir){
|
1769 |
+
int interimDirection;
|
1770 |
+
if (Math.abs(previousDir) == 4) {
|
1771 |
+
if (relativeDirection == 0) {
|
1772 |
+
interimDirection = 4;
|
1773 |
+
}
|
1774 |
+
else {
|
1775 |
+
interimDirection = relativeDirection + 4 * -1 * Integer.signum(relativeDirection); // if dir<0 we add 4, if dir>0 we add -4
|
1776 |
+
}
|
1777 |
+
}
|
1778 |
+
else {
|
1779 |
+
interimDirection = relativeDirection + previousDir;
|
1780 |
+
}
|
1781 |
+
if (Math.abs(interimDirection) > 4) {// Added
|
1782 |
+
interimDirection = (8 - Math.abs(interimDirection)) * Integer.signum(interimDirection) * -1;
|
1783 |
+
}
|
1784 |
+
//TODO investigate this function and unit test
|
1785 |
+
/* Even numbered rings when angled do not have direction 2.
|
1786 |
+
* Almost true for 5 member except for corner case where fusion to elongated bond occurs
|
1787 |
+
*/
|
1788 |
+
if (Math.abs(interimDirection) == 2 && ((ringSize % 2 ==0) || ringSize==5 || ringSize==7)) {
|
1789 |
+
// if (one of them equal to 1 and another is equal to 3, we decrease absolute value and conserve the sign)
|
1790 |
+
if (Math.abs(relativeDirection)==1 && Math.abs(previousDir)==3 || Math.abs(relativeDirection)==3 && Math.abs(previousDir)==1) {
|
1791 |
+
interimDirection = 1 * Integer.signum(interimDirection);
|
1792 |
+
}
|
1793 |
+
// if both are equal to 1
|
1794 |
+
else if(Math.abs(relativeDirection)==1 && Math.abs(previousDir)==1 ) {
|
1795 |
+
interimDirection = 3 * Integer.signum(interimDirection);
|
1796 |
+
}
|
1797 |
+
// if both are equal to 3
|
1798 |
+
else if(Math.abs(relativeDirection)==3 && Math.abs(previousDir)==3 ) {
|
1799 |
+
interimDirection = 3 * Integer.signum(interimDirection);
|
1800 |
+
}
|
1801 |
+
// else it is correctly 2
|
1802 |
+
}
|
1803 |
+
|
1804 |
+
if (interimDirection == -4) {
|
1805 |
+
interimDirection = 4;
|
1806 |
+
}
|
1807 |
+
|
1808 |
+
return interimDirection;
|
1809 |
+
}
|
1810 |
+
|
1811 |
+
private static void debugRingMap(Ring[][] ringMap) {
|
1812 |
+
Ring[][] yxOrdered = new Ring[ringMap[0].length][ringMap.length];
|
1813 |
+
for (int x = 0; x < ringMap.length; x++) {
|
1814 |
+
Ring[] yRings = ringMap[x];
|
1815 |
+
for (int y = 0; y < yRings.length; y++) {
|
1816 |
+
yxOrdered[y][x] =yRings[y];
|
1817 |
+
}
|
1818 |
+
}
|
1819 |
+
for (int y = yxOrdered.length-1; y >=0 ; y--) {
|
1820 |
+
Ring[] xRings = yxOrdered[y];
|
1821 |
+
StringBuilder sb = new StringBuilder();
|
1822 |
+
for (Ring ring : xRings) {
|
1823 |
+
if (ring!=null){
|
1824 |
+
int size = ring.size();
|
1825 |
+
if (size>9){
|
1826 |
+
if (size==10){
|
1827 |
+
sb.append("0");
|
1828 |
+
}
|
1829 |
+
else if (size % 2 ==0){
|
1830 |
+
sb.append("2");
|
1831 |
+
}
|
1832 |
+
else{
|
1833 |
+
sb.append("1");
|
1834 |
+
}
|
1835 |
+
}
|
1836 |
+
else{
|
1837 |
+
sb.append(size);
|
1838 |
+
}
|
1839 |
+
}
|
1840 |
+
else{
|
1841 |
+
sb.append(" ");
|
1842 |
+
}
|
1843 |
+
}
|
1844 |
+
LOG.trace(sb.toString());
|
1845 |
+
}
|
1846 |
+
LOG.trace("#########");
|
1847 |
+
|
1848 |
+
}
|
1849 |
+
}
|
TransAntivirus/download_pubchem/opsin-master/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/GroupingEl.java
ADDED
@@ -0,0 +1,121 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
package uk.ac.cam.ch.wwmm.opsin;
|
2 |
+
|
3 |
+
import java.util.ArrayList;
|
4 |
+
import java.util.List;
|
5 |
+
|
6 |
+
class GroupingEl extends Element{
|
7 |
+
|
8 |
+
private final List<Element> children = new ArrayList<>();
|
9 |
+
|
10 |
+
GroupingEl(String name) {
|
11 |
+
super(name);
|
12 |
+
}
|
13 |
+
|
14 |
+
@Override
|
15 |
+
void addChild(Element child) {
|
16 |
+
child.setParent(this);
|
17 |
+
children.add(child);
|
18 |
+
}
|
19 |
+
|
20 |
+
@Override
|
21 |
+
Element copy() {
|
22 |
+
GroupingEl copy = new GroupingEl(this.name);
|
23 |
+
for (Element childEl : this.children) {
|
24 |
+
Element newChild = childEl.copy();
|
25 |
+
newChild.setParent(copy);
|
26 |
+
copy.addChild(newChild);
|
27 |
+
}
|
28 |
+
for (int i = 0, len = this.attributes.size(); i < len; i++) {
|
29 |
+
Attribute atr = this.attributes.get(i);
|
30 |
+
copy.addAttribute(new Attribute(atr));
|
31 |
+
}
|
32 |
+
return copy;
|
33 |
+
}
|
34 |
+
|
35 |
+
@Override
|
36 |
+
Element getChild(int index) {
|
37 |
+
return children.get(index);
|
38 |
+
}
|
39 |
+
|
40 |
+
@Override
|
41 |
+
int getChildCount() {
|
42 |
+
return children.size();
|
43 |
+
}
|
44 |
+
|
45 |
+
@Override
|
46 |
+
List<Element> getChildElements() {
|
47 |
+
return new ArrayList<>(children);
|
48 |
+
}
|
49 |
+
|
50 |
+
@Override
|
51 |
+
List<Element> getChildElements(String name) {
|
52 |
+
List<Element> elements = new ArrayList<>(1);
|
53 |
+
for (Element element : children) {
|
54 |
+
if (element.name.equals(name)) {
|
55 |
+
elements.add(element);
|
56 |
+
}
|
57 |
+
}
|
58 |
+
return elements;
|
59 |
+
}
|
60 |
+
|
61 |
+
@Override
|
62 |
+
Element getFirstChildElement(String name) {
|
63 |
+
for (Element child : children) {
|
64 |
+
if (child.getName().equals(name)) {
|
65 |
+
return child;
|
66 |
+
}
|
67 |
+
}
|
68 |
+
return null;
|
69 |
+
}
|
70 |
+
|
71 |
+
String getValue() {
|
72 |
+
int childCount = getChildCount();
|
73 |
+
if (childCount == 0) {
|
74 |
+
return "";
|
75 |
+
}
|
76 |
+
StringBuilder result = new StringBuilder();
|
77 |
+
for (int i = 0; i < childCount; i++) {
|
78 |
+
result.append(children.get(i).getValue());
|
79 |
+
}
|
80 |
+
return result.toString();
|
81 |
+
}
|
82 |
+
|
83 |
+
@Override
|
84 |
+
int indexOf(Element child) {
|
85 |
+
return children.indexOf(child);
|
86 |
+
}
|
87 |
+
|
88 |
+
@Override
|
89 |
+
void insertChild(Element child, int index) {
|
90 |
+
child.setParent(this);
|
91 |
+
children.add(index, child);
|
92 |
+
}
|
93 |
+
|
94 |
+
@Override
|
95 |
+
boolean removeChild(Element child) {
|
96 |
+
child.setParent(null);
|
97 |
+
return children.remove(child);
|
98 |
+
}
|
99 |
+
|
100 |
+
@Override
|
101 |
+
Element removeChild(int index) {
|
102 |
+
Element removed = children.remove(index);
|
103 |
+
removed.setParent(null);
|
104 |
+
return removed;
|
105 |
+
}
|
106 |
+
|
107 |
+
@Override
|
108 |
+
void replaceChild(Element oldChild, Element newChild) {
|
109 |
+
int index = indexOf(oldChild);
|
110 |
+
if (index == -1) {
|
111 |
+
throw new RuntimeException("oldChild is not a child of this element.");
|
112 |
+
}
|
113 |
+
removeChild(index);
|
114 |
+
insertChild(newChild, index);
|
115 |
+
}
|
116 |
+
|
117 |
+
void setValue(String text) {
|
118 |
+
throw new UnsupportedOperationException("Token groups do not have a value");
|
119 |
+
}
|
120 |
+
|
121 |
+
}
|
TransAntivirus/download_pubchem/opsin-master/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/IDManager.java
ADDED
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
package uk.ac.cam.ch.wwmm.opsin;
|
2 |
+
|
3 |
+
/**A source of unique integers. Starts at 1 by default.
|
4 |
+
*
|
5 |
+
* @author ptc24
|
6 |
+
*
|
7 |
+
*/
|
8 |
+
class IDManager {
|
9 |
+
/**the last integer generated, or 0 at first*/
|
10 |
+
private int currentID;
|
11 |
+
|
12 |
+
int getCurrentID() {
|
13 |
+
return currentID;
|
14 |
+
}
|
15 |
+
|
16 |
+
/**Initialises currentID at zero - will give 1 when first called */
|
17 |
+
IDManager() {
|
18 |
+
currentID = 0;
|
19 |
+
}
|
20 |
+
|
21 |
+
/**Generates a new, unique integer. This is one
|
22 |
+
* higher than the previous integer, or 1 if previously uncalled.
|
23 |
+
* @return The generated integer.
|
24 |
+
*/
|
25 |
+
int getNextID() {
|
26 |
+
currentID += 1;
|
27 |
+
return currentID;
|
28 |
+
}
|
29 |
+
|
30 |
+
}
|
TransAntivirus/download_pubchem/opsin-master/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/IndentingXMLStreamWriter.java
ADDED
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
package uk.ac.cam.ch.wwmm.opsin;
|
2 |
+
|
3 |
+
import javax.xml.stream.XMLStreamException;
|
4 |
+
import javax.xml.stream.XMLStreamWriter;
|
5 |
+
|
6 |
+
import org.codehaus.stax2.util.StreamWriterDelegate;
|
7 |
+
|
8 |
+
/**
|
9 |
+
* This only overrides the commands actually used by the CmlWriter i.e. it isn't general
|
10 |
+
*/
|
11 |
+
class IndentingXMLStreamWriter extends StreamWriterDelegate {
|
12 |
+
|
13 |
+
private final int indentSize;
|
14 |
+
private int depth = 0;
|
15 |
+
private boolean atStartOfNewline = false;
|
16 |
+
|
17 |
+
IndentingXMLStreamWriter(XMLStreamWriter writer, int indentSize) {
|
18 |
+
super(writer);
|
19 |
+
this.indentSize = indentSize;
|
20 |
+
}
|
21 |
+
|
22 |
+
@Override
|
23 |
+
public void writeStartElement(String arg0) throws XMLStreamException {
|
24 |
+
if (!atStartOfNewline){
|
25 |
+
super.writeCharacters(OpsinTools.NEWLINE);
|
26 |
+
}
|
27 |
+
super.writeCharacters(StringTools.multiplyString(" ", depth * indentSize));
|
28 |
+
super.writeStartElement(arg0);
|
29 |
+
atStartOfNewline = false;
|
30 |
+
depth++;
|
31 |
+
}
|
32 |
+
|
33 |
+
@Override
|
34 |
+
public void writeEndElement() throws XMLStreamException {
|
35 |
+
depth--;
|
36 |
+
if (atStartOfNewline) {
|
37 |
+
super.writeCharacters(StringTools.multiplyString(" ", depth * indentSize));
|
38 |
+
}
|
39 |
+
super.writeEndElement();
|
40 |
+
super.writeCharacters(OpsinTools.NEWLINE);
|
41 |
+
atStartOfNewline = true;
|
42 |
+
}
|
43 |
+
|
44 |
+
@Override
|
45 |
+
public void writeCharacters(String arg0) throws XMLStreamException {
|
46 |
+
super.writeCharacters(arg0);
|
47 |
+
atStartOfNewline = false;
|
48 |
+
}
|
49 |
+
|
50 |
+
}
|