Spaces:
Runtime error
Runtime error
Deploy (see actual commits on https://github.com/mlcommons/croissant).
Browse files- core/state.py +8 -0
- requirements.txt +1 -1
- views/overview.py +4 -3
core/state.py
CHANGED
@@ -33,6 +33,9 @@ def create_class(mlc_class: type, instance: Any, **kwargs) -> Any:
|
|
33 |
name = field.name
|
34 |
if hasattr(instance, name) and name not in kwargs:
|
35 |
params[name] = getattr(instance, name)
|
|
|
|
|
|
|
36 |
return mlc_class(**params, **kwargs)
|
37 |
|
38 |
|
@@ -137,6 +140,7 @@ class FileObject:
|
|
137 |
sha256: str | None = None
|
138 |
df: pd.DataFrame | None = None
|
139 |
folder: epath.PathLike | None = None
|
|
|
140 |
|
141 |
|
142 |
@dataclasses.dataclass
|
@@ -149,6 +153,7 @@ class FileSet:
|
|
149 |
encoding_format: str | None = ""
|
150 |
includes: str | None = ""
|
151 |
name: str = ""
|
|
|
152 |
|
153 |
|
154 |
@dataclasses.dataclass
|
@@ -161,6 +166,7 @@ class Field:
|
|
161 |
data_types: str | list[str] | None = None
|
162 |
source: mlc.Source | None = None
|
163 |
references: mlc.Source | None = None
|
|
|
164 |
|
165 |
|
166 |
@dataclasses.dataclass
|
@@ -174,6 +180,7 @@ class RecordSet:
|
|
174 |
is_enumeration: bool | None = None
|
175 |
key: str | list[str] | None = None
|
176 |
fields: list[Field] = dataclasses.field(default_factory=list)
|
|
|
177 |
|
178 |
|
179 |
@dataclasses.dataclass
|
@@ -191,6 +198,7 @@ class Metadata:
|
|
191 |
date_published: datetime.datetime | None = None
|
192 |
license: str | None = ""
|
193 |
personal_sensitive_information: str | None = None
|
|
|
194 |
url: str = ""
|
195 |
distribution: list[FileObject | FileSet] = dataclasses.field(default_factory=list)
|
196 |
record_sets: list[RecordSet] = dataclasses.field(default_factory=list)
|
|
|
33 |
name = field.name
|
34 |
if hasattr(instance, name) and name not in kwargs:
|
35 |
params[name] = getattr(instance, name)
|
36 |
+
if "uuid" in params and params.get("uuid") is None:
|
37 |
+
# Let mlcroissant handle the default value
|
38 |
+
del params["uuid"]
|
39 |
return mlc_class(**params, **kwargs)
|
40 |
|
41 |
|
|
|
140 |
sha256: str | None = None
|
141 |
df: pd.DataFrame | None = None
|
142 |
folder: epath.PathLike | None = None
|
143 |
+
uuid: str | None = None
|
144 |
|
145 |
|
146 |
@dataclasses.dataclass
|
|
|
153 |
encoding_format: str | None = ""
|
154 |
includes: str | None = ""
|
155 |
name: str = ""
|
156 |
+
uuid: str | None = None
|
157 |
|
158 |
|
159 |
@dataclasses.dataclass
|
|
|
166 |
data_types: str | list[str] | None = None
|
167 |
source: mlc.Source | None = None
|
168 |
references: mlc.Source | None = None
|
169 |
+
uuid: str | None = None
|
170 |
|
171 |
|
172 |
@dataclasses.dataclass
|
|
|
180 |
is_enumeration: bool | None = None
|
181 |
key: str | list[str] | None = None
|
182 |
fields: list[Field] = dataclasses.field(default_factory=list)
|
183 |
+
uuid: str | None = None
|
184 |
|
185 |
|
186 |
@dataclasses.dataclass
|
|
|
198 |
date_published: datetime.datetime | None = None
|
199 |
license: str | None = ""
|
200 |
personal_sensitive_information: str | None = None
|
201 |
+
uuid: str | None = None
|
202 |
url: str = ""
|
203 |
distribution: list[FileObject | FileSet] = dataclasses.field(default_factory=list)
|
204 |
record_sets: list[RecordSet] = dataclasses.field(default_factory=list)
|
requirements.txt
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
etils[epath]
|
2 |
-
mlcroissant
|
3 |
numpy
|
4 |
pandas
|
5 |
pytest
|
|
|
1 |
etils[epath]
|
2 |
+
mlcroissant>=1.0.1
|
3 |
numpy
|
4 |
pandas
|
5 |
pytest
|
views/overview.py
CHANGED
@@ -10,7 +10,7 @@ from utils import needed_field
|
|
10 |
from views.metadata import handle_metadata_change
|
11 |
from views.metadata import MetadataEvent
|
12 |
|
13 |
-
_NON_RELEVANT_METADATA = ["ctx", "name", "distribution", "record_sets"]
|
14 |
|
15 |
_INFO_TEXT = """Croissant files are composed of three layers:
|
16 |
|
@@ -38,8 +38,9 @@ def _relevant_fields(class_or_instance: type):
|
|
38 |
else:
|
39 |
return [
|
40 |
field
|
41 |
-
for field
|
42 |
-
if
|
|
|
43 |
]
|
44 |
|
45 |
|
|
|
10 |
from views.metadata import handle_metadata_change
|
11 |
from views.metadata import MetadataEvent
|
12 |
|
13 |
+
_NON_RELEVANT_METADATA = ["ctx", "name", "distribution", "record_sets", "uuid"]
|
14 |
|
15 |
_INFO_TEXT = """Croissant files are composed of three layers:
|
16 |
|
|
|
38 |
else:
|
39 |
return [
|
40 |
field
|
41 |
+
for field in dataclasses.fields(Metadata)
|
42 |
+
if hasattr(class_or_instance, field.name)
|
43 |
+
and field.name not in _NON_RELEVANT_METADATA
|
44 |
]
|
45 |
|
46 |
|