Spaces:
Running
Running
Deploy (see actual commits on https://github.com/mlcommons/croissant).
Browse files- app.py +5 -5
- core/query_params.py +9 -14
- core/state.py +14 -11
- cypress.config.js +1 -1
- events/metadata.py +3 -6
- events/resources.py +3 -3
- views/jsonld.py +1 -1
- views/metadata.py +3 -3
- views/overview.py +2 -2
- views/record_sets.py +7 -7
- views/record_sets_test.py +9 -7
- views/splash.py +2 -2
app.py
CHANGED
@@ -21,10 +21,10 @@ init_state()
|
|
21 |
user = get_user()
|
22 |
|
23 |
if OAUTH_CLIENT_ID and not user:
|
24 |
-
query_params = st.
|
25 |
-
state = query_params.
|
26 |
if state and state[0] == OAUTH_STATE:
|
27 |
-
code = query_params
|
28 |
if not code:
|
29 |
st.stop()
|
30 |
try:
|
@@ -34,7 +34,7 @@ if OAUTH_CLIENT_ID and not user:
|
|
34 |
except:
|
35 |
raise
|
36 |
finally:
|
37 |
-
st.
|
38 |
else:
|
39 |
redirect_uri = urllib.parse.quote(REDIRECT_URI, safe="")
|
40 |
client_id = urllib.parse.quote(OAUTH_CLIENT_ID, safe="")
|
@@ -48,7 +48,7 @@ if OAUTH_CLIENT_ID and not user:
|
|
48 |
|
49 |
def _back_to_menu():
|
50 |
"""Sends the user back to the menu."""
|
51 |
-
st.
|
52 |
init_state(force=True)
|
53 |
|
54 |
|
|
|
21 |
user = get_user()
|
22 |
|
23 |
if OAUTH_CLIENT_ID and not user:
|
24 |
+
query_params = st.query_params
|
25 |
+
state = query_params.get_all("state")
|
26 |
if state and state[0] == OAUTH_STATE:
|
27 |
+
code = query_params["code"]
|
28 |
if not code:
|
29 |
st.stop()
|
30 |
try:
|
|
|
34 |
except:
|
35 |
raise
|
36 |
finally:
|
37 |
+
st.query_params.clear()
|
38 |
else:
|
39 |
redirect_uri = urllib.parse.quote(REDIRECT_URI, safe="")
|
40 |
client_id = urllib.parse.quote(OAUTH_CLIENT_ID, safe="")
|
|
|
48 |
|
49 |
def _back_to_menu():
|
50 |
"""Sends the user back to the menu."""
|
51 |
+
st.query_params.clear()
|
52 |
init_state(force=True)
|
53 |
|
54 |
|
core/query_params.py
CHANGED
@@ -15,28 +15,24 @@ class QueryParams:
|
|
15 |
OPEN_RECORD_SET = "recordSet"
|
16 |
|
17 |
|
18 |
-
def _get_query_param(
|
19 |
"""Gets query param with the name `name`."""
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
return param[0]
|
24 |
return None
|
25 |
|
26 |
|
27 |
def _set_query_param(param: str, new_value: str) -> str | None:
|
28 |
-
params = st.
|
29 |
-
if params.
|
30 |
# The value already exists in the query params.
|
31 |
return
|
32 |
-
|
33 |
-
new_params[param] = new_value
|
34 |
-
st.experimental_set_query_params(**new_params)
|
35 |
|
36 |
|
37 |
def is_record_set_expanded(record_set: RecordSet) -> bool:
|
38 |
-
|
39 |
-
open_record_set_name = _get_query_param(params, QueryParams.OPEN_RECORD_SET)
|
40 |
if open_record_set_name:
|
41 |
return open_record_set_name == record_set.name
|
42 |
return False
|
@@ -47,8 +43,7 @@ def expand_record_set(record_set: RecordSet) -> None:
|
|
47 |
|
48 |
|
49 |
def get_project_timestamp() -> str | None:
|
50 |
-
|
51 |
-
return _get_query_param(params, QueryParams.OPEN_PROJECT)
|
52 |
|
53 |
|
54 |
def set_project(project: CurrentProject):
|
|
|
15 |
OPEN_RECORD_SET = "recordSet"
|
16 |
|
17 |
|
18 |
+
def _get_query_param(name: str) -> str | None:
|
19 |
"""Gets query param with the name `name`."""
|
20 |
+
param = st.query_params.get_all(name)
|
21 |
+
if isinstance(param, list) and len(param) > 0:
|
22 |
+
return param[0]
|
|
|
23 |
return None
|
24 |
|
25 |
|
26 |
def _set_query_param(param: str, new_value: str) -> str | None:
|
27 |
+
params = st.query_params
|
28 |
+
if params.get_all(param) == [new_value]:
|
29 |
# The value already exists in the query params.
|
30 |
return
|
31 |
+
params[param] = new_value
|
|
|
|
|
32 |
|
33 |
|
34 |
def is_record_set_expanded(record_set: RecordSet) -> bool:
|
35 |
+
open_record_set_name = _get_query_param(QueryParams.OPEN_RECORD_SET)
|
|
|
36 |
if open_record_set_name:
|
37 |
return open_record_set_name == record_set.name
|
38 |
return False
|
|
|
43 |
|
44 |
|
45 |
def get_project_timestamp() -> str | None:
|
46 |
+
return _get_query_param(QueryParams.OPEN_PROJECT)
|
|
|
47 |
|
48 |
|
49 |
def set_project(project: CurrentProject):
|
core/state.py
CHANGED
@@ -127,6 +127,7 @@ class SelectedRecordSet:
|
|
127 |
class FileObject:
|
128 |
"""FileObject analogue for editor"""
|
129 |
|
|
|
130 |
name: str | None = None
|
131 |
description: str | None = None
|
132 |
contained_in: list[str] | None = dataclasses.field(default_factory=list)
|
@@ -135,7 +136,6 @@ class FileObject:
|
|
135 |
encoding_format: str | None = None
|
136 |
sha256: str | None = None
|
137 |
df: pd.DataFrame | None = None
|
138 |
-
rdf: mlc.Rdf = dataclasses.field(default_factory=mlc.Rdf)
|
139 |
folder: epath.PathLike | None = None
|
140 |
|
141 |
|
@@ -143,23 +143,23 @@ class FileObject:
|
|
143 |
class FileSet:
|
144 |
"""FileSet analogue for editor"""
|
145 |
|
|
|
146 |
contained_in: list[str] = dataclasses.field(default_factory=list)
|
147 |
description: str | None = None
|
148 |
encoding_format: str | None = ""
|
149 |
includes: str | None = ""
|
150 |
name: str = ""
|
151 |
-
rdf: mlc.Rdf = dataclasses.field(default_factory=mlc.Rdf)
|
152 |
|
153 |
|
154 |
@dataclasses.dataclass
|
155 |
class Field:
|
156 |
"""Field analogue for editor"""
|
157 |
|
|
|
158 |
name: str | None = None
|
159 |
description: str | None = None
|
160 |
data_types: str | list[str] | None = None
|
161 |
source: mlc.Source | None = None
|
162 |
-
rdf: mlc.Rdf = dataclasses.field(default_factory=mlc.Rdf)
|
163 |
references: mlc.Source | None = None
|
164 |
|
165 |
|
@@ -167,13 +167,13 @@ class Field:
|
|
167 |
class RecordSet:
|
168 |
"""Record Set analogue for editor"""
|
169 |
|
|
|
170 |
name: str = ""
|
171 |
data: list[Any] | None = None
|
172 |
description: str | None = None
|
173 |
is_enumeration: bool | None = None
|
174 |
key: str | list[str] | None = None
|
175 |
fields: list[Field] = dataclasses.field(default_factory=list)
|
176 |
-
rdf: mlc.Rdf = dataclasses.field(default_factory=mlc.Rdf)
|
177 |
|
178 |
|
179 |
@dataclasses.dataclass
|
@@ -182,9 +182,10 @@ class Metadata:
|
|
182 |
|
183 |
name: str = ""
|
184 |
description: str | None = None
|
185 |
-
|
186 |
-
|
187 |
creators: list[mlc.PersonOrOrganization] = dataclasses.field(default_factory=list)
|
|
|
188 |
data_biases: str | None = None
|
189 |
data_collection: str | None = None
|
190 |
date_published: datetime.datetime | None = None
|
@@ -193,7 +194,6 @@ class Metadata:
|
|
193 |
url: str = ""
|
194 |
distribution: list[FileObject | FileSet] = dataclasses.field(default_factory=list)
|
195 |
record_sets: list[RecordSet] = dataclasses.field(default_factory=list)
|
196 |
-
rdf: mlc.Rdf = dataclasses.field(default_factory=mlc.Rdf)
|
197 |
version: str | None = None
|
198 |
|
199 |
def __bool__(self):
|
@@ -295,17 +295,20 @@ class Metadata:
|
|
295 |
|
296 |
def to_canonical(self) -> mlc.Metadata:
|
297 |
distribution = []
|
|
|
298 |
for file in self.distribution:
|
299 |
if isinstance(file, FileObject):
|
300 |
-
distribution.append(create_class(mlc.FileObject, file))
|
301 |
elif isinstance(file, FileSet):
|
302 |
-
distribution.append(create_class(mlc.FileSet, file))
|
303 |
record_sets = []
|
304 |
for record_set in self.record_sets:
|
305 |
fields = []
|
306 |
for field in record_set.fields:
|
307 |
-
fields.append(create_class(mlc.Field, field))
|
308 |
-
record_sets.append(
|
|
|
|
|
309 |
return create_class(
|
310 |
mlc.Metadata,
|
311 |
self,
|
|
|
127 |
class FileObject:
|
128 |
"""FileObject analogue for editor"""
|
129 |
|
130 |
+
ctx: mlc.Context = dataclasses.field(default_factory=mlc.Context)
|
131 |
name: str | None = None
|
132 |
description: str | None = None
|
133 |
contained_in: list[str] | None = dataclasses.field(default_factory=list)
|
|
|
136 |
encoding_format: str | None = None
|
137 |
sha256: str | None = None
|
138 |
df: pd.DataFrame | None = None
|
|
|
139 |
folder: epath.PathLike | None = None
|
140 |
|
141 |
|
|
|
143 |
class FileSet:
|
144 |
"""FileSet analogue for editor"""
|
145 |
|
146 |
+
ctx: mlc.Context = dataclasses.field(default_factory=mlc.Context)
|
147 |
contained_in: list[str] = dataclasses.field(default_factory=list)
|
148 |
description: str | None = None
|
149 |
encoding_format: str | None = ""
|
150 |
includes: str | None = ""
|
151 |
name: str = ""
|
|
|
152 |
|
153 |
|
154 |
@dataclasses.dataclass
|
155 |
class Field:
|
156 |
"""Field analogue for editor"""
|
157 |
|
158 |
+
ctx: mlc.Context = dataclasses.field(default_factory=mlc.Context)
|
159 |
name: str | None = None
|
160 |
description: str | None = None
|
161 |
data_types: str | list[str] | None = None
|
162 |
source: mlc.Source | None = None
|
|
|
163 |
references: mlc.Source | None = None
|
164 |
|
165 |
|
|
|
167 |
class RecordSet:
|
168 |
"""Record Set analogue for editor"""
|
169 |
|
170 |
+
ctx: mlc.Context = dataclasses.field(default_factory=mlc.Context)
|
171 |
name: str = ""
|
172 |
data: list[Any] | None = None
|
173 |
description: str | None = None
|
174 |
is_enumeration: bool | None = None
|
175 |
key: str | list[str] | None = None
|
176 |
fields: list[Field] = dataclasses.field(default_factory=list)
|
|
|
177 |
|
178 |
|
179 |
@dataclasses.dataclass
|
|
|
182 |
|
183 |
name: str = ""
|
184 |
description: str | None = None
|
185 |
+
cite_as: str | None = None
|
186 |
+
context: dict = dataclasses.field(default_factory=dict)
|
187 |
creators: list[mlc.PersonOrOrganization] = dataclasses.field(default_factory=list)
|
188 |
+
ctx: mlc.Context = dataclasses.field(default_factory=mlc.Context)
|
189 |
data_biases: str | None = None
|
190 |
data_collection: str | None = None
|
191 |
date_published: datetime.datetime | None = None
|
|
|
194 |
url: str = ""
|
195 |
distribution: list[FileObject | FileSet] = dataclasses.field(default_factory=list)
|
196 |
record_sets: list[RecordSet] = dataclasses.field(default_factory=list)
|
|
|
197 |
version: str | None = None
|
198 |
|
199 |
def __bool__(self):
|
|
|
295 |
|
296 |
def to_canonical(self) -> mlc.Metadata:
|
297 |
distribution = []
|
298 |
+
ctx = self.ctx
|
299 |
for file in self.distribution:
|
300 |
if isinstance(file, FileObject):
|
301 |
+
distribution.append(create_class(mlc.FileObject, file, ctx=ctx))
|
302 |
elif isinstance(file, FileSet):
|
303 |
+
distribution.append(create_class(mlc.FileSet, file, ctx=ctx))
|
304 |
record_sets = []
|
305 |
for record_set in self.record_sets:
|
306 |
fields = []
|
307 |
for field in record_set.fields:
|
308 |
+
fields.append(create_class(mlc.Field, field, ctx=ctx))
|
309 |
+
record_sets.append(
|
310 |
+
create_class(mlc.RecordSet, record_set, ctx=ctx, fields=fields)
|
311 |
+
)
|
312 |
return create_class(
|
313 |
mlc.Metadata,
|
314 |
self,
|
cypress.config.js
CHANGED
@@ -3,6 +3,6 @@ const { defineConfig } = require("cypress");
|
|
3 |
module.exports = defineConfig({
|
4 |
// To access content within Streamlit iframes for custom components:
|
5 |
chromeWebSecurity: false,
|
6 |
-
defaultCommandTimeout:
|
7 |
e2e: {},
|
8 |
});
|
|
|
3 |
module.exports = defineConfig({
|
4 |
// To access content within Streamlit iframes for custom components:
|
5 |
chromeWebSecurity: false,
|
6 |
+
defaultCommandTimeout: 20000,
|
7 |
e2e: {},
|
8 |
});
|
events/metadata.py
CHANGED
@@ -92,12 +92,11 @@ class MetadataEvent(enum.Enum):
|
|
92 |
"""Event that triggers a metadata change."""
|
93 |
|
94 |
NAME = "NAME"
|
95 |
-
CONFORMS_TO = "CONFORMS_TO"
|
96 |
DESCRIPTION = "DESCRIPTION"
|
97 |
DATE_PUBLISHED = "DATE_PUBLISHED"
|
98 |
URL = "URL"
|
99 |
LICENSE = "LICENSE"
|
100 |
-
|
101 |
VERSION = "VERSION"
|
102 |
DATA_BIASES = "DATA_BIASES"
|
103 |
DATA_COLLECTION = "DATA_COLLECTION"
|
@@ -111,14 +110,12 @@ class MetadataEvent(enum.Enum):
|
|
111 |
def handle_metadata_change(event: MetadataEvent, metadata: Metadata, key: str):
|
112 |
if event == MetadataEvent.NAME:
|
113 |
metadata.name = find_unique_name(set(), st.session_state[key])
|
114 |
-
if event == MetadataEvent.CONFORMS_TO:
|
115 |
-
metadata.conforms_to = st.session_state[key]
|
116 |
elif event == MetadataEvent.DESCRIPTION:
|
117 |
metadata.description = st.session_state[key]
|
118 |
elif event == MetadataEvent.LICENSE:
|
119 |
metadata.license = LICENSES.get(st.session_state[key])
|
120 |
-
elif event == MetadataEvent.
|
121 |
-
metadata.
|
122 |
elif event == MetadataEvent.URL:
|
123 |
metadata.url = st.session_state[key]
|
124 |
elif event == MetadataEvent.VERSION:
|
|
|
92 |
"""Event that triggers a metadata change."""
|
93 |
|
94 |
NAME = "NAME"
|
|
|
95 |
DESCRIPTION = "DESCRIPTION"
|
96 |
DATE_PUBLISHED = "DATE_PUBLISHED"
|
97 |
URL = "URL"
|
98 |
LICENSE = "LICENSE"
|
99 |
+
CITE_AS = "CITE_AS"
|
100 |
VERSION = "VERSION"
|
101 |
DATA_BIASES = "DATA_BIASES"
|
102 |
DATA_COLLECTION = "DATA_COLLECTION"
|
|
|
110 |
def handle_metadata_change(event: MetadataEvent, metadata: Metadata, key: str):
|
111 |
if event == MetadataEvent.NAME:
|
112 |
metadata.name = find_unique_name(set(), st.session_state[key])
|
|
|
|
|
113 |
elif event == MetadataEvent.DESCRIPTION:
|
114 |
metadata.description = st.session_state[key]
|
115 |
elif event == MetadataEvent.LICENSE:
|
116 |
metadata.license = LICENSES.get(st.session_state[key])
|
117 |
+
elif event == MetadataEvent.CITE_AS:
|
118 |
+
metadata.cite_as = st.session_state[key]
|
119 |
elif event == MetadataEvent.URL:
|
120 |
metadata.url = st.session_state[key]
|
121 |
elif event == MetadataEvent.VERSION:
|
events/resources.py
CHANGED
@@ -72,6 +72,6 @@ def _create_instance1_from_instance2(instance1: Resource, instance2: type):
|
|
72 |
attributes1 = set((field.name for field in dataclasses.fields(instance1)))
|
73 |
attributes2 = set((field.name for field in dataclasses.fields(instance2)))
|
74 |
common_attributes = attributes2.intersection(attributes1)
|
75 |
-
return instance2(
|
76 |
-
attribute: getattr(instance1, attribute) for attribute in common_attributes
|
77 |
-
|
|
|
72 |
attributes1 = set((field.name for field in dataclasses.fields(instance1)))
|
73 |
attributes2 = set((field.name for field in dataclasses.fields(instance2)))
|
74 |
common_attributes = attributes2.intersection(attributes1)
|
75 |
+
return instance2(
|
76 |
+
**{attribute: getattr(instance1, attribute) for attribute in common_attributes}
|
77 |
+
)
|
views/jsonld.py
CHANGED
@@ -47,7 +47,7 @@ def render_jsonld():
|
|
47 |
if croissant.metadata:
|
48 |
metadata = mlc.Metadata(
|
49 |
name=croissant.metadata.name,
|
50 |
-
|
51 |
license=croissant.metadata.license,
|
52 |
description=croissant.metadata.description,
|
53 |
url=croissant.metadata.url,
|
|
|
47 |
if croissant.metadata:
|
48 |
metadata = mlc.Metadata(
|
49 |
name=croissant.metadata.name,
|
50 |
+
cite_as=croissant.metadata.cite_as,
|
51 |
license=croissant.metadata.license,
|
52 |
description=croissant.metadata.description,
|
53 |
url=croissant.metadata.url,
|
views/metadata.py
CHANGED
@@ -97,14 +97,14 @@ def _render_generic_metadata(metadata: Metadata):
|
|
97 |
on_change=handle_metadata_change,
|
98 |
args=(MetadataEvent.LICENSE, metadata, key),
|
99 |
)
|
100 |
-
key = "metadata-
|
101 |
st.text_area(
|
102 |
label="Citation",
|
103 |
key=key,
|
104 |
-
value=metadata.
|
105 |
placeholder="@book{\n title={Title}\n}",
|
106 |
on_change=handle_metadata_change,
|
107 |
-
args=(MetadataEvent.
|
108 |
)
|
109 |
key = "metadata-date-published"
|
110 |
st.date_input(
|
|
|
97 |
on_change=handle_metadata_change,
|
98 |
args=(MetadataEvent.LICENSE, metadata, key),
|
99 |
)
|
100 |
+
key = "metadata-cite-as"
|
101 |
st.text_area(
|
102 |
label="Citation",
|
103 |
key=key,
|
104 |
+
value=metadata.cite_as,
|
105 |
placeholder="@book{\n title={Title}\n}",
|
106 |
on_change=handle_metadata_change,
|
107 |
+
args=(MetadataEvent.CITE_AS, metadata, key),
|
108 |
)
|
109 |
key = "metadata-date-published"
|
110 |
st.date_input(
|
views/overview.py
CHANGED
@@ -10,7 +10,7 @@ from utils import needed_field
|
|
10 |
from views.metadata import handle_metadata_change
|
11 |
from views.metadata import MetadataEvent
|
12 |
|
13 |
-
_NON_RELEVANT_METADATA = ["
|
14 |
|
15 |
_INFO_TEXT = """Croissant files are composed of three layers:
|
16 |
|
@@ -98,7 +98,7 @@ def render_overview():
|
|
98 |
if user_started_editing:
|
99 |
warning = ""
|
100 |
try:
|
101 |
-
issues = metadata.to_canonical().issues
|
102 |
if issues.errors:
|
103 |
warning += "**Errors**\n"
|
104 |
for error in issues.errors:
|
|
|
10 |
from views.metadata import handle_metadata_change
|
11 |
from views.metadata import MetadataEvent
|
12 |
|
13 |
+
_NON_RELEVANT_METADATA = ["ctx", "name", "distribution", "record_sets"]
|
14 |
|
15 |
_INFO_TEXT = """Croissant files are composed of three layers:
|
16 |
|
|
|
98 |
if user_started_editing:
|
99 |
warning = ""
|
100 |
try:
|
101 |
+
issues = metadata.to_canonical().ctx.issues
|
102 |
if issues.errors:
|
103 |
warning += "**Errors**\n"
|
104 |
for error in issues.errors:
|
views/record_sets.py
CHANGED
@@ -44,11 +44,9 @@ class _Result(TypedDict):
|
|
44 |
@st.cache_data(
|
45 |
show_spinner="Generating the dataset...",
|
46 |
hash_funcs={
|
47 |
-
"
|
48 |
-
|
49 |
-
|
50 |
-
"mlcroissant.FileSet": hash,
|
51 |
-
"mlcroissant.RecordSet": hash,
|
52 |
},
|
53 |
)
|
54 |
def _generate_data_with_timeout(record_set: RecordSet) -> _Result:
|
@@ -385,11 +383,13 @@ def _render_left_panel():
|
|
385 |
"⚠️",
|
386 |
key=f"idea-{prefix}",
|
387 |
on_click=lambda: _generate_data_with_timeout.clear(),
|
388 |
-
help=textwrap.dedent(
|
|
|
389 |
```
|
390 |
{exception}
|
391 |
```
|
392 |
-
"""
|
|
|
393 |
)
|
394 |
right.markdown("No preview is possible.")
|
395 |
|
|
|
44 |
@st.cache_data(
|
45 |
show_spinner="Generating the dataset...",
|
46 |
hash_funcs={
|
47 |
+
"core.state.RecordSet": lambda record_set: hash(
|
48 |
+
(record_set.name, record_set.description)
|
49 |
+
),
|
|
|
|
|
50 |
},
|
51 |
)
|
52 |
def _generate_data_with_timeout(record_set: RecordSet) -> _Result:
|
|
|
383 |
"⚠️",
|
384 |
key=f"idea-{prefix}",
|
385 |
on_click=lambda: _generate_data_with_timeout.clear(),
|
386 |
+
help=textwrap.dedent(
|
387 |
+
f"""**Error**:
|
388 |
```
|
389 |
{exception}
|
390 |
```
|
391 |
+
"""
|
392 |
+
),
|
393 |
)
|
394 |
right.markdown("No preview is possible.")
|
395 |
|
views/record_sets_test.py
CHANGED
@@ -19,10 +19,12 @@ def test_find_joins():
|
|
19 |
references=mlc.Source(uid="some_other_record_set/some_other_field"),
|
20 |
),
|
21 |
]
|
22 |
-
assert _find_joins(fields) == set(
|
23 |
-
|
24 |
-
|
25 |
-
(
|
26 |
-
|
27 |
-
|
28 |
-
|
|
|
|
|
|
19 |
references=mlc.Source(uid="some_other_record_set/some_other_field"),
|
20 |
),
|
21 |
]
|
22 |
+
assert _find_joins(fields) == set(
|
23 |
+
[
|
24 |
+
(("some_csv", "some_column"), ("some_record_set", "some_field")),
|
25 |
+
(
|
26 |
+
("some_record_set", "some_field"),
|
27 |
+
("some_other_record_set", "some_other_field"),
|
28 |
+
),
|
29 |
+
]
|
30 |
+
)
|
views/splash.py
CHANGED
@@ -65,11 +65,11 @@ def render_splash():
|
|
65 |
with st.expander("**Load an existing dataset**", expanded=True):
|
66 |
|
67 |
def create_example(dataset: str):
|
68 |
-
base = f"https://raw.githubusercontent.com/mlcommons/croissant/main/datasets/{dataset.lower()}"
|
69 |
url = f"{base}/metadata.json"
|
70 |
try:
|
71 |
json = requests.get(url).json()
|
72 |
-
metadata = mlc.Metadata.from_json(mlc.
|
73 |
st.session_state[Metadata] = Metadata.from_canonical(metadata)
|
74 |
save_current_project()
|
75 |
# Write supplementary files.
|
|
|
65 |
with st.expander("**Load an existing dataset**", expanded=True):
|
66 |
|
67 |
def create_example(dataset: str):
|
68 |
+
base = f"https://raw.githubusercontent.com/mlcommons/croissant/main/datasets/1.0/{dataset.lower()}"
|
69 |
url = f"{base}/metadata.json"
|
70 |
try:
|
71 |
json = requests.get(url).json()
|
72 |
+
metadata = mlc.Metadata.from_json(mlc.Context(), json)
|
73 |
st.session_state[Metadata] = Metadata.from_canonical(metadata)
|
74 |
save_current_project()
|
75 |
# Write supplementary files.
|