marcenacp commited on
Commit
5b216e9
1 Parent(s): 993d03b

Deploy (see actual commits on https://github.com/mlcommons/croissant).

Browse files
Files changed (3) hide show
  1. core/state.py +8 -0
  2. requirements.txt +1 -1
  3. views/overview.py +4 -3
core/state.py CHANGED
@@ -33,6 +33,9 @@ def create_class(mlc_class: type, instance: Any, **kwargs) -> Any:
33
  name = field.name
34
  if hasattr(instance, name) and name not in kwargs:
35
  params[name] = getattr(instance, name)
 
 
 
36
  return mlc_class(**params, **kwargs)
37
 
38
 
@@ -137,6 +140,7 @@ class FileObject:
137
  sha256: str | None = None
138
  df: pd.DataFrame | None = None
139
  folder: epath.PathLike | None = None
 
140
 
141
 
142
  @dataclasses.dataclass
@@ -149,6 +153,7 @@ class FileSet:
149
  encoding_format: str | None = ""
150
  includes: str | None = ""
151
  name: str = ""
 
152
 
153
 
154
  @dataclasses.dataclass
@@ -161,6 +166,7 @@ class Field:
161
  data_types: str | list[str] | None = None
162
  source: mlc.Source | None = None
163
  references: mlc.Source | None = None
 
164
 
165
 
166
  @dataclasses.dataclass
@@ -174,6 +180,7 @@ class RecordSet:
174
  is_enumeration: bool | None = None
175
  key: str | list[str] | None = None
176
  fields: list[Field] = dataclasses.field(default_factory=list)
 
177
 
178
 
179
  @dataclasses.dataclass
@@ -191,6 +198,7 @@ class Metadata:
191
  date_published: datetime.datetime | None = None
192
  license: str | None = ""
193
  personal_sensitive_information: str | None = None
 
194
  url: str = ""
195
  distribution: list[FileObject | FileSet] = dataclasses.field(default_factory=list)
196
  record_sets: list[RecordSet] = dataclasses.field(default_factory=list)
 
33
  name = field.name
34
  if hasattr(instance, name) and name not in kwargs:
35
  params[name] = getattr(instance, name)
36
+ if "uuid" in params and params.get("uuid") is None:
37
+ # Let mlcroissant handle the default value
38
+ del params["uuid"]
39
  return mlc_class(**params, **kwargs)
40
 
41
 
 
140
  sha256: str | None = None
141
  df: pd.DataFrame | None = None
142
  folder: epath.PathLike | None = None
143
+ uuid: str | None = None
144
 
145
 
146
  @dataclasses.dataclass
 
153
  encoding_format: str | None = ""
154
  includes: str | None = ""
155
  name: str = ""
156
+ uuid: str | None = None
157
 
158
 
159
  @dataclasses.dataclass
 
166
  data_types: str | list[str] | None = None
167
  source: mlc.Source | None = None
168
  references: mlc.Source | None = None
169
+ uuid: str | None = None
170
 
171
 
172
  @dataclasses.dataclass
 
180
  is_enumeration: bool | None = None
181
  key: str | list[str] | None = None
182
  fields: list[Field] = dataclasses.field(default_factory=list)
183
+ uuid: str | None = None
184
 
185
 
186
  @dataclasses.dataclass
 
198
  date_published: datetime.datetime | None = None
199
  license: str | None = ""
200
  personal_sensitive_information: str | None = None
201
+ uuid: str | None = None
202
  url: str = ""
203
  distribution: list[FileObject | FileSet] = dataclasses.field(default_factory=list)
204
  record_sets: list[RecordSet] = dataclasses.field(default_factory=list)
requirements.txt CHANGED
@@ -1,5 +1,5 @@
1
  etils[epath]
2
- mlcroissant
3
  numpy
4
  pandas
5
  pytest
 
1
  etils[epath]
2
+ mlcroissant>=1.0.1
3
  numpy
4
  pandas
5
  pytest
views/overview.py CHANGED
@@ -10,7 +10,7 @@ from utils import needed_field
10
  from views.metadata import handle_metadata_change
11
  from views.metadata import MetadataEvent
12
 
13
- _NON_RELEVANT_METADATA = ["ctx", "name", "distribution", "record_sets"]
14
 
15
  _INFO_TEXT = """Croissant files are composed of three layers:
16
 
@@ -38,8 +38,9 @@ def _relevant_fields(class_or_instance: type):
38
  else:
39
  return [
40
  field
41
- for field, value in dataclasses.asdict(class_or_instance).items()
42
- if value and field not in _NON_RELEVANT_METADATA
 
43
  ]
44
 
45
 
 
10
  from views.metadata import handle_metadata_change
11
  from views.metadata import MetadataEvent
12
 
13
+ _NON_RELEVANT_METADATA = ["ctx", "name", "distribution", "record_sets", "uuid"]
14
 
15
  _INFO_TEXT = """Croissant files are composed of three layers:
16
 
 
38
  else:
39
  return [
40
  field
41
+ for field in dataclasses.fields(Metadata)
42
+ if hasattr(class_or_instance, field.name)
43
+ and field.name not in _NON_RELEVANT_METADATA
44
  ]
45
 
46