File size: 2,831 Bytes
3129d49
 
 
3e28aad
3129d49
3e28aad
 
 
 
 
 
3129d49
3e28aad
 
 
3129d49
3e28aad
 
 
 
 
 
 
 
3129d49
3e28aad
 
3129d49
3e28aad
 
 
 
3129d49
 
3e28aad
 
3a0a584
 
f414f88
 
3a0a584
 
3e28aad
3129d49
3e28aad
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e4e068f
3e28aad
 
 
 
 
 
 
3129d49
3e28aad
3a0a584
 
 
 
 
 
255a1f0
3e28aad
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
import datasets

from .artifact import Artifact, UnitxtArtifactNotFoundError
from .artifact import __file__ as _
from .artifact import fetch_artifact
from .blocks import __file__ as _
from .card import __file__ as _
from .catalog import __file__ as _
from .collections import __file__ as _
from .common import __file__ as _
from .file_utils import __file__ as _
from .fusion import __file__ as _
from .generator_utils import __file__ as _
from .instructions import __file__ as _
from .load import __file__ as _
from .loaders import __file__ as _
from .metric import __file__ as _
from .metrics import __file__ as _
from .normalizers import __file__ as _
from .operator import __file__ as _
from .operators import __file__ as _
from .processors import __file__ as _
from .recipe import __file__ as _
from .register import __file__ as _
from .register import register_all_artifacts
from .schema import __file__ as _
from .split_utils import __file__ as _
from .splitters import __file__ as _
from .stream import __file__ as _
from .task import __file__ as _
from .templates import __file__ as _
from .text_utils import __file__ as _
from .utils import __file__ as _
from .validate import __file__ as _


def fetch(artifact_name):
    try:
        artifact, _ = fetch_artifact(artifact_name)
        return artifact
    except UnitxtArtifactNotFoundError:
        return None


def parse(query: str):
    """
    Parses a query of the form 'key1=value1,key2=value2,...' into a dictionary.
    """
    result = {}
    for kv in query.split(","):
        parts = kv.split("=")
        if parts[1].isdigit():
            result[parts[0]] = int(parts[1])
        elif parts[1].replace(".", "", 1).isdigit():
            result[parts[0]] = float(parts[1])

        result[parts[0]] = parts[1]

    return result


class Dataset(datasets.GeneratorBasedBuilder):
    """TODO: Short description of my dataset."""

    VERSION = datasets.Version("1.1.1")
    builder_configs = {}

    @property
    def generators(self):
        register_all_artifacts()
        if not hasattr(self, "_generators") or self._generators is None:
            recipe = fetch(self.config.name)
            if recipe is None:
                args = parse(self.config.name)
                if "type" not in args:
                    args["type"] = "common_recipe"
                recipe = Artifact.from_dict(args)
            self._generators = recipe()
        return self._generators

    def _info(self):
        return datasets.DatasetInfo()

    def _split_generators(self, _):
        return [datasets.SplitGenerator(name=name, gen_kwargs={"split_name": name}) for name in self.generators.keys()]

    def _generate_examples(self, split_name):
        generator = self.generators[split_name]
        for i, row in enumerate(generator):
            yield i, row