File size: 3,375 Bytes
38ae458
a10815e
09e358a
 
 
 
 
 
 
 
 
 
 
45e7e53
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
import evaluate
import datasets
import moses
from moses import metrics
import pandas as pd

def _compute(self, list_of_generated_smiles):
    test_set = moses.get_dataset('test')
    preprocessed_smiles = [smile for smile in list_of_generated_smiles if moses.utils.canonicalize_smiles(smile)]

    results = metrics.get_all_metrics(preprocessed_smiles, test_set)

    return results


_DESCRIPTION = """
Moses and PyTDC metrics
"""


_KWARGS_DESCRIPTION = """
Args:
    predictions (`list` of `int`): Predicted labels.
    references (`list` of `int`): Ground truth labels.
    normalize (`boolean`): If set to False, returns the number of correctly classified samples. Otherwise, returns the fraction of correctly classified samples. Defaults to True.
    sample_weight (`list` of `float`): Sample weights Defaults to None.

Returns:
    All moses metrics
"""


_CITATION = """
@article{DBLP:journals/corr/abs-1811-12823,
  author       = {Daniil Polykovskiy and
                  Alexander Zhebrak and
                  Benjam{\'{\i}}n S{\'{a}}nchez{-}Lengeling and
                  Sergey Golovanov and
                  Oktai Tatanov and
                  Stanislav Belyaev and
                  Rauf Kurbanov and
                  Aleksey Artamonov and
                  Vladimir Aladinskiy and
                  Mark Veselov and
                  Artur Kadurin and
                  Sergey I. Nikolenko and
                  Al{\'{a}}n Aspuru{-}Guzik and
                  Alex Zhavoronkov},
  title        = {Molecular Sets {(MOSES):} {A} Benchmarking Platform for Molecular
                  Generation Models},
  journal      = {CoRR},
  volume       = {abs/1811.12823},
  year         = {2018},
  url          = {http://arxiv.org/abs/1811.12823},
  eprinttype    = {arXiv},
  eprint       = {1811.12823},
  timestamp    = {Fri, 26 Nov 2021 15:34:30 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1811-12823.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
"""


@evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
class my_metric(evaluate.Metric):
    def _info(self):
        return evaluate.MetricInfo(
            description=_DESCRIPTION,
            citation=_CITATION,
            inputs_description=_KWARGS_DESCRIPTION,
            features=datasets.Features(
                {
                    "list_of_generated_smiles": datasets.Sequence(datasets.Value("int32")),
                }
                if self.config_name == "multilabel"
                else {
                    "list_of_generated_smiles": datasets.Value("int32"),
                }
            ),
            reference_urls=["https://github.com/molecularsets/moses"],
        )

    def _compute(self, list_of_generated_smiles):
        test_set = moses.get_dataset('test')
        preprocessed_smiles = [smile for smile in list_of_generated_smiles if moses.utils.canonicalize_smiles(smile)]

        results = metrics.get_all_metrics(preprocessed_smiles, test_set)

        return {
            "results": results
            }

    # def _compute(self, predictions, references, normalize=True, sample_weight=None):
    #     return {
    #         "accuracy": float(
    #             accuracy_score(references, predictions, normalize=normalize, sample_weight=sample_weight)
    #         )
    #     }