mhnfs / src /tests /test_data_preprocessing.py
Tschoui's picture
move project from private to public space
cf004a6
raw
history blame contribute delete
No virus
4.42 kB
"""
This file includes all tests for the data_preprocessing module.
"""
import pytest
import numpy as np
import pickle
from data_preprocessing.create_descriptors import (handle_inputs,
create_ecfp_fps,
create_rdkit_descriptors,
create_quantils,
preprocess_molecules)
class TestPreprocessMolecules:
def test_handle_inputs(self, input_molecule_formats):
"""
This functions check whether all 3 possible input formats are correctly
transformed into list.
"""
# Check 1: Smiles
output_smiles = handle_inputs(input_molecule_formats.smiles)
assert isinstance(output_smiles, list)
# Check 2: Smiles coma
output_smiles_coma = handle_inputs(input_molecule_formats.smiles_coma)
assert isinstance(output_smiles_coma, list)
assert output_smiles_coma == input_molecule_formats.smiles_list
# Check 3: Smiles list
output_smiles_list = handle_inputs(input_molecule_formats.smiles_list)
assert isinstance(output_smiles_list, list)
# Check 4.1: Correct DataFrame
output_smiles_df = handle_inputs(input_molecule_formats.smiles_df)
assert isinstance(output_smiles_df, list)
# Check 4.2: Wrong DataFrame
with pytest.raises(ValueError):
handle_inputs(input_molecule_formats.smiles_df_wrong_key)
def test_create_ecfps_fps(self, input_mols_from_smiles, ecfps_from_smiles):
"""
This function tests whether the ECFP fingerprints are correctly created.
"""
# Check 1: Correct output type
output_ecfps = create_ecfp_fps(input_mols_from_smiles)
assert isinstance(output_ecfps, np.ndarray)
# Check 2: Correct output shape
assert output_ecfps.shape == ecfps_from_smiles.shape
# Check 3: Correct output values
assert np.allclose(output_ecfps, ecfps_from_smiles, 0, 0)
def test_create_rdkit_descriptors(self, input_mols_from_smiles,
rdkit_descrs_from_smiles):
"""
This function tests whether the RDKit descriptors are correctly created.
"""
# Check 1: Correct output type
output_rdkit_descrs = create_rdkit_descriptors(input_mols_from_smiles)
assert isinstance(output_rdkit_descrs, np.ndarray)
# Check 2: Correct output shape
assert output_rdkit_descrs.shape == rdkit_descrs_from_smiles.shape
# Check 3: Correct output values
assert np.allclose(output_rdkit_descrs, rdkit_descrs_from_smiles)
def test_create_quantils(self, input_mols_from_smiles, rdkit_descr_quantils):
"""
This function tests whether the quantils are correctly created.
"""
current_loc = __file__.rsplit("/",3)[0]
with open(current_loc + "/assets/data_preprocessing_objects/ecdfs.pkl",
"rb") as fl:
ecdfs = pickle.load(fl)
rdkit_descrs = create_rdkit_descriptors(input_mols_from_smiles)
output_quantils = create_quantils(rdkit_descrs, ecdfs)
# Check 1: Correct output type
assert isinstance(output_quantils, np.ndarray)
# Check 2: Correct output shape
assert output_quantils.shape == rdkit_descr_quantils.shape
# Check 3: Correct output values
assert np.allclose(output_quantils, rdkit_descr_quantils)
def test_preprocess_molecules(self, input_smiles,
preprocessed_features):
"""
This function tests whether the preprocessing of molecules is correctly
done.
"""
# Check 1: Correct output type
output_preprocessed_features = preprocess_molecules(input_smiles)
assert isinstance(output_preprocessed_features, np.ndarray)
# Check 2: Correct output shape
assert output_preprocessed_features.shape == preprocessed_features.shape
# Check 3: Correct output values
assert np.allclose(output_preprocessed_features, preprocessed_features)