|
""" |
|
This file includes all tests for the data_preprocessing module. |
|
""" |
|
|
|
import pytest |
|
import numpy as np |
|
import pickle |
|
from data_preprocessing.create_descriptors import (handle_inputs, |
|
create_ecfp_fps, |
|
create_rdkit_descriptors, |
|
create_quantils, |
|
preprocess_molecules) |
|
|
|
class TestPreprocessMolecules: |
|
|
|
def test_handle_inputs(self, input_molecule_formats): |
|
""" |
|
This functions check whether all 3 possible input formats are correctly |
|
transformed into list. |
|
""" |
|
|
|
|
|
output_smiles = handle_inputs(input_molecule_formats.smiles) |
|
assert isinstance(output_smiles, list) |
|
|
|
|
|
output_smiles_coma = handle_inputs(input_molecule_formats.smiles_coma) |
|
assert isinstance(output_smiles_coma, list) |
|
assert output_smiles_coma == input_molecule_formats.smiles_list |
|
|
|
|
|
output_smiles_list = handle_inputs(input_molecule_formats.smiles_list) |
|
assert isinstance(output_smiles_list, list) |
|
|
|
|
|
output_smiles_df = handle_inputs(input_molecule_formats.smiles_df) |
|
assert isinstance(output_smiles_df, list) |
|
|
|
|
|
with pytest.raises(ValueError): |
|
handle_inputs(input_molecule_formats.smiles_df_wrong_key) |
|
|
|
def test_create_ecfps_fps(self, input_mols_from_smiles, ecfps_from_smiles): |
|
""" |
|
This function tests whether the ECFP fingerprints are correctly created. |
|
""" |
|
|
|
|
|
output_ecfps = create_ecfp_fps(input_mols_from_smiles) |
|
assert isinstance(output_ecfps, np.ndarray) |
|
|
|
|
|
assert output_ecfps.shape == ecfps_from_smiles.shape |
|
|
|
|
|
assert np.allclose(output_ecfps, ecfps_from_smiles, 0, 0) |
|
|
|
def test_create_rdkit_descriptors(self, input_mols_from_smiles, |
|
rdkit_descrs_from_smiles): |
|
""" |
|
This function tests whether the RDKit descriptors are correctly created. |
|
""" |
|
|
|
|
|
output_rdkit_descrs = create_rdkit_descriptors(input_mols_from_smiles) |
|
assert isinstance(output_rdkit_descrs, np.ndarray) |
|
|
|
|
|
assert output_rdkit_descrs.shape == rdkit_descrs_from_smiles.shape |
|
|
|
|
|
assert np.allclose(output_rdkit_descrs, rdkit_descrs_from_smiles) |
|
|
|
def test_create_quantils(self, input_mols_from_smiles, rdkit_descr_quantils): |
|
""" |
|
This function tests whether the quantils are correctly created. |
|
""" |
|
current_loc = __file__.rsplit("/",3)[0] |
|
with open(current_loc + "/assets/data_preprocessing_objects/ecdfs.pkl", |
|
"rb") as fl: |
|
ecdfs = pickle.load(fl) |
|
|
|
rdkit_descrs = create_rdkit_descriptors(input_mols_from_smiles) |
|
output_quantils = create_quantils(rdkit_descrs, ecdfs) |
|
|
|
|
|
assert isinstance(output_quantils, np.ndarray) |
|
|
|
|
|
assert output_quantils.shape == rdkit_descr_quantils.shape |
|
|
|
|
|
assert np.allclose(output_quantils, rdkit_descr_quantils) |
|
|
|
def test_preprocess_molecules(self, input_smiles, |
|
preprocessed_features): |
|
""" |
|
This function tests whether the preprocessing of molecules is correctly |
|
done. |
|
""" |
|
|
|
|
|
output_preprocessed_features = preprocess_molecules(input_smiles) |
|
assert isinstance(output_preprocessed_features, np.ndarray) |
|
|
|
|
|
assert output_preprocessed_features.shape == preprocessed_features.shape |
|
|
|
|
|
assert np.allclose(output_preprocessed_features, preprocessed_features) |
|
|
|
|
|
|