my_metric / my_metric.py
saicharan2804
Datatype change
9d55a3f
raw
history blame
No virus
3.38 kB
import evaluate
import datasets
import moses
from moses import metrics
import pandas as pd
def _compute(self, list_of_generated_smiles):
test_set = moses.get_dataset('test')
preprocessed_smiles = [smile for smile in list_of_generated_smiles if moses.utils.canonicalize_smiles(smile)]
results = metrics.get_all_metrics(preprocessed_smiles, test_set)
return results
_DESCRIPTION = """
Moses and PyTDC metrics
"""
_KWARGS_DESCRIPTION = """
Args:
predictions (`list` of `int`): Predicted labels.
references (`list` of `int`): Ground truth labels.
normalize (`boolean`): If set to False, returns the number of correctly classified samples. Otherwise, returns the fraction of correctly classified samples. Defaults to True.
sample_weight (`list` of `float`): Sample weights Defaults to None.
Returns:
All moses metrics
"""
_CITATION = """
@article{DBLP:journals/corr/abs-1811-12823,
author = {Daniil Polykovskiy and
Alexander Zhebrak and
Benjam{\'{\i}}n S{\'{a}}nchez{-}Lengeling and
Sergey Golovanov and
Oktai Tatanov and
Stanislav Belyaev and
Rauf Kurbanov and
Aleksey Artamonov and
Vladimir Aladinskiy and
Mark Veselov and
Artur Kadurin and
Sergey I. Nikolenko and
Al{\'{a}}n Aspuru{-}Guzik and
Alex Zhavoronkov},
title = {Molecular Sets {(MOSES):} {A} Benchmarking Platform for Molecular
Generation Models},
journal = {CoRR},
volume = {abs/1811.12823},
year = {2018},
url = {http://arxiv.org/abs/1811.12823},
eprinttype = {arXiv},
eprint = {1811.12823},
timestamp = {Fri, 26 Nov 2021 15:34:30 +0100},
biburl = {https://dblp.org/rec/journals/corr/abs-1811-12823.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
"""
@evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
class my_metric(evaluate.Metric):
def _info(self):
return evaluate.MetricInfo(
description=_DESCRIPTION,
citation=_CITATION,
inputs_description=_KWARGS_DESCRIPTION,
features=datasets.Features(
{
"list_of_generated_smiles": datasets.Sequence(datasets.Value("string")),
}
if self.config_name == "multilabel"
else {
"list_of_generated_smiles": datasets.Value("string"),
}
),
reference_urls=["https://github.com/molecularsets/moses"],
)
def _compute(self, list_of_generated_smiles):
test_set = moses.get_dataset('test')
preprocessed_smiles = [smile for smile in list_of_generated_smiles if moses.utils.canonicalize_smiles(smile)]
results = metrics.get_all_metrics(preprocessed_smiles, test_set)
return {
"results": results
}
# def _compute(self, predictions, references, normalize=True, sample_weight=None):
# return {
# "accuracy": float(
# accuracy_score(references, predictions, normalize=normalize, sample_weight=sample_weight)
# )
# }