jannisborn commited on
Commit
bb77ad9
1 Parent(s): 5adf220

refacor loading

Browse files
Files changed (2) hide show
  1. configuration.py +4 -7
  2. utils.py +25 -0
configuration.py CHANGED
@@ -1,11 +1,11 @@
1
  """Configuration utils."""
2
  import os
3
  import json
4
- import dill
5
  import pandas as pd
6
  from pytoda.transforms import Compose
7
  from pytoda.smiles.transforms import SMILESToTokenIndexes, LeftPadding, Canonicalization
8
  from cos import ensure_filepath_from_uri, COS_BUCKET_URI
 
9
 
10
  # model files
11
  MODEL_WEIGHTS_URI = ensure_filepath_from_uri(os.path.join(COS_BUCKET_URI, "model.pt"))
@@ -29,16 +29,13 @@ with open(MODEL_PARAMS_URI) as fp:
29
  MODEL_PARAMS = json.load(fp)
30
  MAX_LENGTH = MODEL_PARAMS["smiles_padding_length"]
31
  # load SMILES language
32
- with open(SMILES_LANGUAGE_URI, "rb") as fp:
33
- SMILES_LANGUAGE = dill.load(fp)
34
  # load gene expression
35
  GENE_EXPRESSION = pd.read_csv(GENE_EXPRESSION_URI, compression="zip", low_memory=False)
36
  # load genes
37
- with open(GENES_URI, "rb") as fp:
38
- GENES = dill.load(fp)
39
  # load gene standardization parameters
40
- with open(GENE_EXPRESSION_STANDARDIZATION_URI, "rb") as fp:
41
- GENE_STANDARDIZATION_PARAMETERS = dill.load(fp)
42
  # smiles transformations
43
  SMILES_TRANSFORMS = [
44
  Canonicalization(),
 
1
  """Configuration utils."""
2
  import os
3
  import json
 
4
  import pandas as pd
5
  from pytoda.transforms import Compose
6
  from pytoda.smiles.transforms import SMILESToTokenIndexes, LeftPadding, Canonicalization
7
  from cos import ensure_filepath_from_uri, COS_BUCKET_URI
8
+ from .utils import load
9
 
10
  # model files
11
  MODEL_WEIGHTS_URI = ensure_filepath_from_uri(os.path.join(COS_BUCKET_URI, "model.pt"))
 
29
  MODEL_PARAMS = json.load(fp)
30
  MAX_LENGTH = MODEL_PARAMS["smiles_padding_length"]
31
  # load SMILES language
32
+ SMILES_LANGUAGE = load(SMILES_LANGUAGE_URI)
 
33
  # load gene expression
34
  GENE_EXPRESSION = pd.read_csv(GENE_EXPRESSION_URI, compression="zip", low_memory=False)
35
  # load genes
36
+ GENES = load(GENES_URI)
 
37
  # load gene standardization parameters
38
+ GENE_STANDARDIZATION_PARAMETERS = load(GENE_EXPRESSION_STANDARDIZATION_URI)
 
39
  # smiles transformations
40
  SMILES_TRANSFORMS = [
41
  Canonicalization(),
utils.py CHANGED
@@ -10,6 +10,31 @@ from datetime import datetime # noqa: F401
10
  from typing import Dict, List # noqa: F401
11
 
12
  import six
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
  if sys.version_info < (3, 7):
15
  import typing
 
10
  from typing import Dict, List # noqa: F401
11
 
12
  import six
13
+ import warnings
14
+ import dill
15
+
16
+ def load(filepath: str):
17
+ """
18
+ Load a pickled object with Python downwards compatibility
19
+
20
+ Args:
21
+ filepath (str): path to the file.
22
+ """
23
+ warnings.warn(
24
+ "Loading languages will use a text files in the future", FutureWarning
25
+ )
26
+ try:
27
+ with open(filepath, 'rb') as f:
28
+ obj = dill.load(f)
29
+ except TypeError:
30
+ # Necessary to load python3.7 pickled objects with >=3.8:
31
+ # For details see: https://github.com/uqfoundation/dill/pull/406
32
+ storage = dill._dill._reverse_typemap['CodeType']
33
+ dill._dill._reverse_typemap['CodeType'] = dill._dill._create_code
34
+ with open(filepath, 'rb') as f:
35
+ obj = dill.load(f)
36
+ dill._dill._reverse_typemap['CodeType'] = storage
37
+ return obj
38
 
39
  if sys.version_info < (3, 7):
40
  import typing