saicharan2804 commited on
Commit
a843a07
1 Parent(s): 9089432

Added tokens_to_mer

Browse files
Files changed (1) hide show
  1. KmerTokenizer.py +7 -1
KmerTokenizer.py CHANGED
@@ -19,6 +19,9 @@ def atomwise_tokenizer(smi, exclusive_tokens = None):
19
  tokens[i] = '[UNK]'
20
  return tokens
21
 
 
 
 
22
  def kmer_tokenizer(smiles, ngram=4, stride=1, remove_last = False, exclusive_tokens = None):
23
  units = atomwise_tokenizer(smiles, exclusive_tokens = exclusive_tokens) #collect all the atom-wise tokens from the SMILES
24
  if ngram == 1:
@@ -29,4 +32,7 @@ def kmer_tokenizer(smiles, ngram=4, stride=1, remove_last = False, exclusive_tok
29
  if remove_last:
30
  if len(tokens[-1]) < ngram: #truncate last whole k-mer if the length of the last k-mers is less than ngram.
31
  tokens = tokens[:-1]
32
- return tokens
 
 
 
 
19
  tokens[i] = '[UNK]'
20
  return tokens
21
 
22
+ def tokens_to_mer(toks):
23
+ return ''.join(toks)
24
+
25
  def kmer_tokenizer(smiles, ngram=4, stride=1, remove_last = False, exclusive_tokens = None):
26
  units = atomwise_tokenizer(smiles, exclusive_tokens = exclusive_tokens) #collect all the atom-wise tokens from the SMILES
27
  if ngram == 1:
 
32
  if remove_last:
33
  if len(tokens[-1]) < ngram: #truncate last whole k-mer if the length of the last k-mers is less than ngram.
34
  tokens = tokens[:-1]
35
+ return tokens
36
+
37
+
38
+ print(kmer_tokenizer('CC[C@H](C)[C@H](NC(=O)[C@H](CC(C)C)NC(=O)[C@@H](NC(=O)[C@@H](N)CCSC)[C@@H](C)O)C(=O)NCC(=O)N[C@@H](C)C(=O)N[C@@H](C)C(=O)N[C@@H](Cc1c[nH]cn1)C(=O)N[C@@H](CC(N)=O)C(=O)NCC(=O)N[C@@H](C)C(=O)N[C@@H](C)C(=O)N[C@@H](CCC(N)=O)C(=O)N[C@@H](CC(C)C)C(=O)N[C@@H](CC(C)C)C(=O)N[C@@H](CCCN=C(N)N)C(=O)N[C@@H](CCC(N)=O)C(=O)N[C@@H](CC(C)C)C(=O)N[C@@H](CCCN=C(N)N)C(=O)NCC(=O)N[C@@H](CCC(N)=O)C(=O)N[C@@H](CC(C)C)C(=O)NCC(=O)N1CCC[C@H]1C(=O)N1CCC[C@H]1C(=O)NCC(=O)N[C@@H](CO)C(=O)N[C@@H](CCCN=C(N)N)C(N)=O'))