Elron commited on
Commit
646d90e
·
verified ·
1 Parent(s): 975173b

Upload dataset_utils.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. dataset_utils.py +5 -31
dataset_utils.py CHANGED
@@ -1,12 +1,11 @@
1
- import os
2
-
3
  from .artifact import Artifact, UnitxtArtifactNotFoundError, fetch_artifact
4
  from .logging_utils import get_logger
 
5
  from .register import _reset_env_local_catalogs, register_all_artifacts
 
6
 
7
  logger = get_logger()
8
-
9
- __default_recipe__ = "standard_recipe"
10
 
11
 
12
  def fetch(artifact_name):
@@ -18,32 +17,7 @@ def fetch(artifact_name):
18
 
19
 
20
  def parse(query: str):
21
- """Parses a query of the form 'key1=value1,key2=value2,...' into a dictionary."""
22
- result = {}
23
- kvs = query.split(",")
24
- if len(kvs) == 0:
25
- raise ValueError(
26
- 'Illegal query: "{query}" should contain at least one assignment of the form: key1=value1,key2=value2'
27
- )
28
- for kv in kvs:
29
- key_val = kv.split("=")
30
- if (
31
- len(key_val) != 2
32
- or len(key_val[0].strip()) == 0
33
- or len(key_val[1].strip()) == 0
34
- ):
35
- raise ValueError(
36
- f'Illegal query: "{query}" with wrong assignment "{kv}" should be of the form: key=value.'
37
- )
38
- key, val = key_val
39
- if val.isdigit():
40
- result[key] = int(val)
41
- elif val.replace(".", "", 1).isdigit():
42
- result[key] = float(val)
43
- else:
44
- result[key] = val
45
-
46
- return result
47
 
48
 
49
  def get_dataset_artifact(dataset_str):
@@ -53,6 +27,6 @@ def get_dataset_artifact(dataset_str):
53
  if recipe is None:
54
  args = parse(dataset_str)
55
  if "type" not in args:
56
- args["type"] = os.environ.get("UNITXT_DEFAULT_RECIPE", __default_recipe__)
57
  recipe = Artifact.from_dict(args)
58
  return recipe
 
 
 
1
  from .artifact import Artifact, UnitxtArtifactNotFoundError, fetch_artifact
2
  from .logging_utils import get_logger
3
+ from .parsing_utils import parse_key_equals_value_string_to_dict
4
  from .register import _reset_env_local_catalogs, register_all_artifacts
5
+ from .settings_utils import get_settings
6
 
7
  logger = get_logger()
8
+ settings = get_settings()
 
9
 
10
 
11
  def fetch(artifact_name):
 
17
 
18
 
19
  def parse(query: str):
20
+ return parse_key_equals_value_string_to_dict(query)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
 
22
 
23
  def get_dataset_artifact(dataset_str):
 
27
  if recipe is None:
28
  args = parse(dataset_str)
29
  if "type" not in args:
30
+ args["type"] = settings.default_recipe
31
  recipe = Artifact.from_dict(args)
32
  return recipe