""" This file includes all tests for the data_preprocessing module. """ import pytest import numpy as np import pickle from data_preprocessing.create_descriptors import (handle_inputs, create_ecfp_fps, create_rdkit_descriptors, create_quantils, preprocess_molecules) class TestPreprocessMolecules: def test_handle_inputs(self, input_molecule_formats): """ This functions check whether all 3 possible input formats are correctly transformed into list. """ # Check 1: Smiles output_smiles = handle_inputs(input_molecule_formats.smiles) assert isinstance(output_smiles, list) # Check 2: Smiles coma output_smiles_coma = handle_inputs(input_molecule_formats.smiles_coma) assert isinstance(output_smiles_coma, list) assert output_smiles_coma == input_molecule_formats.smiles_list # Check 3: Smiles list output_smiles_list = handle_inputs(input_molecule_formats.smiles_list) assert isinstance(output_smiles_list, list) # Check 4.1: Correct DataFrame output_smiles_df = handle_inputs(input_molecule_formats.smiles_df) assert isinstance(output_smiles_df, list) # Check 4.2: Wrong DataFrame with pytest.raises(ValueError): handle_inputs(input_molecule_formats.smiles_df_wrong_key) def test_create_ecfps_fps(self, input_mols_from_smiles, ecfps_from_smiles): """ This function tests whether the ECFP fingerprints are correctly created. """ # Check 1: Correct output type output_ecfps = create_ecfp_fps(input_mols_from_smiles) assert isinstance(output_ecfps, np.ndarray) # Check 2: Correct output shape assert output_ecfps.shape == ecfps_from_smiles.shape # Check 3: Correct output values assert np.allclose(output_ecfps, ecfps_from_smiles, 0, 0) def test_create_rdkit_descriptors(self, input_mols_from_smiles, rdkit_descrs_from_smiles): """ This function tests whether the RDKit descriptors are correctly created. """ # Check 1: Correct output type output_rdkit_descrs = create_rdkit_descriptors(input_mols_from_smiles) assert isinstance(output_rdkit_descrs, np.ndarray) # Check 2: Correct output shape assert output_rdkit_descrs.shape == rdkit_descrs_from_smiles.shape # Check 3: Correct output values assert np.allclose(output_rdkit_descrs, rdkit_descrs_from_smiles) def test_create_quantils(self, input_mols_from_smiles, rdkit_descr_quantils): """ This function tests whether the quantils are correctly created. """ current_loc = __file__.rsplit("/",3)[0] with open(current_loc + "/assets/data_preprocessing_objects/ecdfs.pkl", "rb") as fl: ecdfs = pickle.load(fl) rdkit_descrs = create_rdkit_descriptors(input_mols_from_smiles) output_quantils = create_quantils(rdkit_descrs, ecdfs) # Check 1: Correct output type assert isinstance(output_quantils, np.ndarray) # Check 2: Correct output shape assert output_quantils.shape == rdkit_descr_quantils.shape # Check 3: Correct output values assert np.allclose(output_quantils, rdkit_descr_quantils) def test_preprocess_molecules(self, input_smiles, preprocessed_features): """ This function tests whether the preprocessing of molecules is correctly done. """ # Check 1: Correct output type output_preprocessed_features = preprocess_molecules(input_smiles) assert isinstance(output_preprocessed_features, np.ndarray) # Check 2: Correct output shape assert output_preprocessed_features.shape == preprocessed_features.shape # Check 3: Correct output values assert np.allclose(output_preprocessed_features, preprocessed_features)