jannisborn commited on
Commit
30c86cf
1 Parent(s): 78993d6

fix: Bugfix in SMILES canonicalization

Browse files
Files changed (4) hide show
  1. .gitignore +2 -1
  2. artifacts/model.json +70 -1
  3. configuration.py +2 -1
  4. requirements.txt +1 -1
.gitignore CHANGED
@@ -1 +1,2 @@
1
- __pycache__/
 
 
1
+ __pycache__/
2
+ .DS_Store
artifacts/model.json CHANGED
@@ -1 +1,70 @@
1
- {"drug_sensitivity_min_max": true, "gene_expression_min_max": false, "gene_expression_standardize": false, "augment_smiles": false, "canonical": false, "kekulize": false, "all_bonds_explicit": false, "all_hs_explicit": false, "randomize": false, "remove_bonddir": false, "remove_chirality": false, "selfies": false, "smiles_start_stop_token": true, "number_of_genes": 2128, "smiles_padding_length": 465, "stacked_dense_hidden_sizes": [512], "activation_fn": "relu", "dropout": 0.4, "batch_norm": true, "filters": [64, 64, 64], "multiheads": [4, 4, 4, 4], "smiles_embedding_size": 16, "kernel_sizes": [[3, 16], [5, 16], [11, 16]], "smiles_attention_size": 64, "embed_scale_grad": false, "final_activation": true, "gene_to_dense": false, "batch_size": 2048, "dataset_device": "cuda", "lr": 0.001, "optimizer": "adam", "loss_fn": "mse", "epochs": 200, "save_model": 25, "smiles_vocabulary_size": 108, "drug_sensitivity_processing_parameters": {"processing": "min_max", "parameters": {"min": -11.998083341987641, "max": 12.359055999999999}}, "gene_expression_processing_parameters": {}, "number_of_parameters": 7217361}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "drug_sensitivity_min_max": true,
3
+ "gene_expression_min_max": false,
4
+ "gene_expression_standardize": false,
5
+ "augment_smiles": false,
6
+ "canonical": false,
7
+ "kekulize": false,
8
+ "all_bonds_explicit": false,
9
+ "all_hs_explicit": false,
10
+ "randomize": false,
11
+ "remove_bonddir": false,
12
+ "remove_chirality": false,
13
+ "selfies": false,
14
+ "smiles_start_stop_token": true,
15
+ "number_of_genes": 2128,
16
+ "smiles_padding_length": 465,
17
+ "stacked_dense_hidden_sizes": [
18
+ 512
19
+ ],
20
+ "activation_fn": "relu",
21
+ "dropout": 0.4,
22
+ "batch_norm": true,
23
+ "filters": [
24
+ 64,
25
+ 64,
26
+ 64
27
+ ],
28
+ "multiheads": [
29
+ 4,
30
+ 4,
31
+ 4,
32
+ 4
33
+ ],
34
+ "smiles_embedding_size": 16,
35
+ "kernel_sizes": [
36
+ [
37
+ 3,
38
+ 16
39
+ ],
40
+ [
41
+ 5,
42
+ 16
43
+ ],
44
+ [
45
+ 11,
46
+ 16
47
+ ]
48
+ ],
49
+ "smiles_attention_size": 64,
50
+ "embed_scale_grad": false,
51
+ "final_activation": true,
52
+ "gene_to_dense": false,
53
+ "batch_size": 2048,
54
+ "dataset_device": "cuda",
55
+ "lr": 0.001,
56
+ "optimizer": "adam",
57
+ "loss_fn": "mse",
58
+ "epochs": 200,
59
+ "save_model": 25,
60
+ "smiles_vocabulary_size": 108,
61
+ "drug_sensitivity_processing_parameters": {
62
+ "processing": "min_max",
63
+ "parameters": {
64
+ "min": -11.998083341987641,
65
+ "max": 12.359055999999999
66
+ }
67
+ },
68
+ "gene_expression_processing_parameters": {},
69
+ "number_of_parameters": 7217361
70
+ }
configuration.py CHANGED
@@ -4,7 +4,7 @@ import json
4
  import dill
5
  import pandas as pd
6
  from pytoda.transforms import Compose
7
- from pytoda.smiles.transforms import SMILESToTokenIndexes, LeftPadding
8
  from cos import ensure_filepath_from_uri, COS_BUCKET_URI
9
 
10
  # model files
@@ -41,6 +41,7 @@ with open(GENE_EXPRESSION_STANDARDIZATION_URI, "rb") as fp:
41
  GENE_STANDARDIZATION_PARAMETERS = dill.load(fp)
42
  # smiles transformations
43
  SMILES_TRANSFORMS = [
 
44
  SMILESToTokenIndexes(smiles_language=SMILES_LANGUAGE),
45
  LeftPadding(padding_length=MAX_LENGTH, padding_index=SMILES_LANGUAGE.padding_index),
46
  ]
 
4
  import dill
5
  import pandas as pd
6
  from pytoda.transforms import Compose
7
+ from pytoda.smiles.transforms import SMILESToTokenIndexes, LeftPadding, Canonicalization
8
  from cos import ensure_filepath_from_uri, COS_BUCKET_URI
9
 
10
  # model files
 
41
  GENE_STANDARDIZATION_PARAMETERS = dill.load(fp)
42
  # smiles transformations
43
  SMILES_TRANSFORMS = [
44
+ Canonicalization(),
45
  SMILESToTokenIndexes(smiles_language=SMILES_LANGUAGE),
46
  LeftPadding(padding_length=MAX_LENGTH, padding_index=SMILES_LANGUAGE.padding_index),
47
  ]
requirements.txt CHANGED
@@ -1,5 +1,5 @@
1
  rdkit-pypi
2
- pytoda @git+https://git@github.com/PaccMann/paccmann_datasets@0.0.3
3
  paccmann_predictor @ git+https://github.com/PaccMann/paccmann_predictor@0.0.1.1
4
  tqdm
5
  connexion==2.6.0
 
1
  rdkit-pypi
2
+ pytoda @ git+https://git@github.com/PaccMann/paccmann_datasets@0.0.3
3
  paccmann_predictor @ git+https://github.com/PaccMann/paccmann_predictor@0.0.1.1
4
  tqdm
5
  connexion==2.6.0