Spaces:
Running
Running
Deploy (see actual commits on https://github.com/mlcommons/croissant).
Browse files- app.py +26 -9
- components/flex/__init__.py +64 -0
- core/state.py +0 -2
- events/metadata.py +0 -22
- requirements.txt +1 -1
- views/load.py +12 -0
- views/metadata.py +0 -44
- views/overview.py +1 -3
- views/record_sets.py +1 -1
- views/splash.py +1 -23
app.py
CHANGED
@@ -2,6 +2,7 @@ import urllib.parse
|
|
2 |
|
3 |
import streamlit as st
|
4 |
|
|
|
5 |
from core.constants import OAUTH_CLIENT_ID
|
6 |
from core.constants import OAUTH_STATE
|
7 |
from core.constants import REDIRECT_URI
|
@@ -14,8 +15,6 @@ from views.splash import render_splash
|
|
14 |
from views.wizard import render_editor
|
15 |
|
16 |
st.set_page_config(page_title="Croissant Editor", page_icon="🥐", layout="wide")
|
17 |
-
col1, col2, col3 = st.columns([10, 1, 1])
|
18 |
-
col1.header("Croissant Editor")
|
19 |
|
20 |
init_state()
|
21 |
|
@@ -42,6 +41,7 @@ if OAUTH_CLIENT_ID and not user:
|
|
42 |
state = urllib.parse.quote(OAUTH_STATE, safe="")
|
43 |
scope = urllib.parse.quote("openid profile", safe="")
|
44 |
url = f"https://huggingface.co/oauth/authorize?response_type=code&redirect_uri={redirect_uri}&scope={scope}&client_id={client_id}&state={state}"
|
|
|
45 |
st.link_button("🤗 Login with Hugging Face", url)
|
46 |
st.stop()
|
47 |
|
@@ -59,16 +59,33 @@ def _logout():
|
|
59 |
_back_to_menu()
|
60 |
|
61 |
|
62 |
-
if OAUTH_CLIENT_ID:
|
63 |
-
col2.write("\n") # Vertical box to shift the lgout menu
|
64 |
-
col2.button("Log out", on_click=_logout)
|
65 |
-
|
66 |
timestamp = get_project_timestamp()
|
67 |
|
|
|
|
|
|
|
|
|
68 |
if timestamp:
|
69 |
-
|
70 |
-
|
71 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
72 |
|
73 |
should_display_editor = bool(st.session_state.get(CurrentProject))
|
74 |
|
|
|
2 |
|
3 |
import streamlit as st
|
4 |
|
5 |
+
from components.flex import st_flex
|
6 |
from core.constants import OAUTH_CLIENT_ID
|
7 |
from core.constants import OAUTH_STATE
|
8 |
from core.constants import REDIRECT_URI
|
|
|
15 |
from views.wizard import render_editor
|
16 |
|
17 |
st.set_page_config(page_title="Croissant Editor", page_icon="🥐", layout="wide")
|
|
|
|
|
18 |
|
19 |
init_state()
|
20 |
|
|
|
41 |
state = urllib.parse.quote(OAUTH_STATE, safe="")
|
42 |
scope = urllib.parse.quote("openid profile", safe="")
|
43 |
url = f"https://huggingface.co/oauth/authorize?response_type=code&redirect_uri={redirect_uri}&scope={scope}&client_id={client_id}&state={state}"
|
44 |
+
st.header("Croissant Editor")
|
45 |
st.link_button("🤗 Login with Hugging Face", url)
|
46 |
st.stop()
|
47 |
|
|
|
59 |
_back_to_menu()
|
60 |
|
61 |
|
|
|
|
|
|
|
|
|
62 |
timestamp = get_project_timestamp()
|
63 |
|
64 |
+
button_width = 73 # This is the best value for the current content of the buttons.
|
65 |
+
buttons_widths = []
|
66 |
+
if OAUTH_CLIENT_ID:
|
67 |
+
buttons_widths.append(button_width)
|
68 |
if timestamp:
|
69 |
+
buttons_widths.append(button_width)
|
70 |
+
widths = [200, sum(buttons_widths) + 10] # 10 being the space between elements.
|
71 |
+
|
72 |
+
with st_flex(
|
73 |
+
flex_direction="row",
|
74 |
+
justify_content="space-between",
|
75 |
+
align_items="center",
|
76 |
+
widths=widths,
|
77 |
+
):
|
78 |
+
st.header("Croissant Editor")
|
79 |
+
if OAUTH_CLIENT_ID or timestamp:
|
80 |
+
with st_flex(
|
81 |
+
flex_direction="row",
|
82 |
+
justify_content="space-between",
|
83 |
+
widths=buttons_widths,
|
84 |
+
):
|
85 |
+
if OAUTH_CLIENT_ID:
|
86 |
+
st.button("Log out", on_click=_logout)
|
87 |
+
if timestamp:
|
88 |
+
st.button("Home", on_click=_back_to_menu)
|
89 |
|
90 |
should_display_editor = bool(st.session_state.get(CurrentProject))
|
91 |
|
components/flex/__init__.py
ADDED
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import contextlib
|
2 |
+
|
3 |
+
import streamlit as st
|
4 |
+
import streamlit.components.v1 as components
|
5 |
+
|
6 |
+
|
7 |
+
@contextlib.contextmanager
|
8 |
+
def st_flex(
|
9 |
+
flex_direction="null",
|
10 |
+
justify_content="null",
|
11 |
+
align_items="null",
|
12 |
+
flex="null",
|
13 |
+
widths=None,
|
14 |
+
):
|
15 |
+
"""[flex](https://developer.mozilla.org/en-US/docs/Web/CSS/flex) for Streamlit.
|
16 |
+
|
17 |
+
Warning: This custom component uses a lot of heuristics. But styling flex is
|
18 |
+
important in CSS and missing from Streamlit. st.columns does a poor job at
|
19 |
+
horizontally aligning elements.
|
20 |
+
|
21 |
+
Args:
|
22 |
+
flex_direction: https://developer.mozilla.org/en-US/docs/Web/CSS/flex-direction
|
23 |
+
justify_content:
|
24 |
+
https://developer.mozilla.org/en-US/docs/Web/CSS/justify-content
|
25 |
+
align_items: https://developer.mozilla.org/en-US/docs/Web/CSS/align-items
|
26 |
+
flex: https://developer.mozilla.org/en-US/docs/Web/CSS/flex
|
27 |
+
widths: An array containing the minimal widths of all elements. This somewhat
|
28 |
+
defeats the purpose of flex, but Streamlit forces the width of elements,
|
29 |
+
which is why forcing this parameter is unfortunately needed.
|
30 |
+
"""
|
31 |
+
placeholder = st.empty()
|
32 |
+
with placeholder.container():
|
33 |
+
placeholder = st.empty()
|
34 |
+
with placeholder.container():
|
35 |
+
yield
|
36 |
+
components.html(
|
37 |
+
f"""
|
38 |
+
<script>
|
39 |
+
window.frameElement.style.display = 'none';
|
40 |
+
// Get the current script node
|
41 |
+
const frameElement = window.frameElement.parentElement;
|
42 |
+
|
43 |
+
// Get the parent element
|
44 |
+
const parentElement = frameElement.parentElement;
|
45 |
+
|
46 |
+
// Change container
|
47 |
+
const container = parentElement.firstChild.firstChild;
|
48 |
+
container.style.display = 'flex';
|
49 |
+
container.style.flexDirection = '{flex_direction}';
|
50 |
+
container.style.justifyContent = '{justify_content}';
|
51 |
+
container.style.flex = '{flex}';
|
52 |
+
container.style.alignItems = '{align_items}';
|
53 |
+
container.width = '';
|
54 |
+
container.className = '';
|
55 |
+
|
56 |
+
// Change children
|
57 |
+
let i = 0;
|
58 |
+
for (const child of container.children) {{
|
59 |
+
child.style.width = `${{{widths}?.[i] || 60}}px`;
|
60 |
+
child.className = '';
|
61 |
+
i += 1;
|
62 |
+
}}
|
63 |
+
</script>""",
|
64 |
+
)
|
core/state.py
CHANGED
@@ -183,10 +183,8 @@ class Metadata:
|
|
183 |
name: str = ""
|
184 |
description: str | None = None
|
185 |
citation: str | None = None
|
186 |
-
creator: mlc.PersonOrOrganization | None = None
|
187 |
data_biases: str | None = None
|
188 |
data_collection: str | None = None
|
189 |
-
date_published: datetime.datetime | None = None
|
190 |
license: str | None = ""
|
191 |
personal_sensitive_information: str | None = None
|
192 |
url: str = ""
|
|
|
183 |
name: str = ""
|
184 |
description: str | None = None
|
185 |
citation: str | None = None
|
|
|
186 |
data_biases: str | None = None
|
187 |
data_collection: str | None = None
|
|
|
188 |
license: str | None = ""
|
189 |
personal_sensitive_information: str | None = None
|
190 |
url: str = ""
|
events/metadata.py
CHANGED
@@ -1,11 +1,9 @@
|
|
1 |
-
import datetime
|
2 |
import enum
|
3 |
|
4 |
import streamlit as st
|
5 |
|
6 |
from core.names import find_unique_name
|
7 |
from core.state import Metadata
|
8 |
-
import mlcroissant as mlc
|
9 |
|
10 |
# List from:
|
11 |
LICENSES_URL = "https://huggingface.co/docs/hub/repositories-licenses"
|
@@ -93,7 +91,6 @@ class MetadataEvent(enum.Enum):
|
|
93 |
|
94 |
NAME = "NAME"
|
95 |
DESCRIPTION = "DESCRIPTION"
|
96 |
-
DATE_PUBLISHED = "DATE_PUBLISHED"
|
97 |
URL = "URL"
|
98 |
LICENSE = "LICENSE"
|
99 |
CITATION = "CITATION"
|
@@ -101,10 +98,6 @@ class MetadataEvent(enum.Enum):
|
|
101 |
DATA_BIASES = "DATA_BIASES"
|
102 |
DATA_COLLECTION = "DATA_COLLECTION"
|
103 |
PERSONAL_SENSITIVE_INFORMATION = "PERSONAL_SENSITIVE_INFORMATION"
|
104 |
-
CREATOR_ADD = "CREATOR_ADD"
|
105 |
-
CREATOR_NAME = "CREATOR_NAME"
|
106 |
-
CREATOR_URL = "CREATOR_URL"
|
107 |
-
CREATOR_REMOVE = "CREATOR_REMOVE"
|
108 |
|
109 |
|
110 |
def handle_metadata_change(event: MetadataEvent, metadata: Metadata, key: str):
|
@@ -126,18 +119,3 @@ def handle_metadata_change(event: MetadataEvent, metadata: Metadata, key: str):
|
|
126 |
metadata.data_collection = st.session_state[key]
|
127 |
elif event == MetadataEvent.PERSONAL_SENSITIVE_INFORMATION:
|
128 |
metadata.personal_sensitive_information = st.session_state[key]
|
129 |
-
elif event == MetadataEvent.DATE_PUBLISHED:
|
130 |
-
date = st.session_state[key]
|
131 |
-
metadata.date_published = datetime.datetime(date.year, date.month, date.day)
|
132 |
-
elif event == MetadataEvent.CREATOR_ADD:
|
133 |
-
metadata.creator = mlc.PersonOrOrganization()
|
134 |
-
elif event == MetadataEvent.CREATOR_REMOVE:
|
135 |
-
metadata.creator = None
|
136 |
-
elif event == MetadataEvent.CREATOR_NAME:
|
137 |
-
if not metadata.creator:
|
138 |
-
metadata.creator = mlc.PersonOrOrganization()
|
139 |
-
metadata.creator.name = st.session_state[key]
|
140 |
-
elif event == MetadataEvent.CREATOR_URL:
|
141 |
-
if not metadata.creator:
|
142 |
-
metadata.creator = mlc.PersonOrOrganization()
|
143 |
-
metadata.creator.url = st.session_state[key]
|
|
|
|
|
1 |
import enum
|
2 |
|
3 |
import streamlit as st
|
4 |
|
5 |
from core.names import find_unique_name
|
6 |
from core.state import Metadata
|
|
|
7 |
|
8 |
# List from:
|
9 |
LICENSES_URL = "https://huggingface.co/docs/hub/repositories-licenses"
|
|
|
91 |
|
92 |
NAME = "NAME"
|
93 |
DESCRIPTION = "DESCRIPTION"
|
|
|
94 |
URL = "URL"
|
95 |
LICENSE = "LICENSE"
|
96 |
CITATION = "CITATION"
|
|
|
98 |
DATA_BIASES = "DATA_BIASES"
|
99 |
DATA_COLLECTION = "DATA_COLLECTION"
|
100 |
PERSONAL_SENSITIVE_INFORMATION = "PERSONAL_SENSITIVE_INFORMATION"
|
|
|
|
|
|
|
|
|
101 |
|
102 |
|
103 |
def handle_metadata_change(event: MetadataEvent, metadata: Metadata, key: str):
|
|
|
119 |
metadata.data_collection = st.session_state[key]
|
120 |
elif event == MetadataEvent.PERSONAL_SENSITIVE_INFORMATION:
|
121 |
metadata.personal_sensitive_information = st.session_state[key]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
requirements.txt
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
etils[epath]
|
2 |
-
mlcroissant
|
3 |
numpy
|
4 |
pandas
|
5 |
pytest
|
|
|
1 |
etils[epath]
|
2 |
+
mlcroissant
|
3 |
numpy
|
4 |
pandas
|
5 |
pytest
|
views/load.py
CHANGED
@@ -8,6 +8,17 @@ from core.past_projects import save_current_project
|
|
8 |
from core.state import Metadata
|
9 |
import mlcroissant as mlc
|
10 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
|
12 |
def _on_file_upload(key):
|
13 |
"""Triggers when a new file gets uploaded to load the Croissant metadata."""
|
@@ -29,6 +40,7 @@ def _on_file_upload(key):
|
|
29 |
|
30 |
def render_load():
|
31 |
key = "json-ld-file-upload"
|
|
|
32 |
st.file_uploader(
|
33 |
"Drop a JSON-LD", type="json", key=key, on_change=_on_file_upload, args=(key,)
|
34 |
)
|
|
|
8 |
from core.state import Metadata
|
9 |
import mlcroissant as mlc
|
10 |
|
11 |
+
_JSON_LD_INFO = """You can download JSON-LD Croissant files from major dataset
|
12 |
+
providers:
|
13 |
+
|
14 |
+
- [Kaggle](https://www.kaggle.com/datasets) embeds Croissant JSON-LD directly in their
|
15 |
+
HTML.
|
16 |
+
- [OpenML](https://www.openml.org/search?type=data) offers a 🥐 button on all of their
|
17 |
+
datasets.
|
18 |
+
- [Hugging Face](https://huggingface.co/) offers an
|
19 |
+
[API endpoint](https://datasets-server.huggingface.co/croissant?dataset=${dataset_id) to
|
20 |
+
build a Croissant JSON-LD."""
|
21 |
+
|
22 |
|
23 |
def _on_file_upload(key):
|
24 |
"""Triggers when a new file gets uploaded to load the Croissant metadata."""
|
|
|
40 |
|
41 |
def render_load():
|
42 |
key = "json-ld-file-upload"
|
43 |
+
st.info(_JSON_LD_INFO)
|
44 |
st.file_uploader(
|
45 |
"Drop a JSON-LD", type="json", key=key, on_change=_on_file_upload, args=(key,)
|
46 |
)
|
views/metadata.py
CHANGED
@@ -106,47 +106,3 @@ def _render_generic_metadata(metadata: Metadata):
|
|
106 |
on_change=handle_metadata_change,
|
107 |
args=(MetadataEvent.CITATION, metadata, key),
|
108 |
)
|
109 |
-
key = "metadata-date-published"
|
110 |
-
st.date_input(
|
111 |
-
label="Date of first broadcast/publication.",
|
112 |
-
key=key,
|
113 |
-
value=metadata.date_published,
|
114 |
-
on_change=handle_metadata_change,
|
115 |
-
args=(MetadataEvent.DATE_PUBLISHED, metadata, key),
|
116 |
-
)
|
117 |
-
if metadata.creator:
|
118 |
-
col1, col2, col3 = st.columns([1, 1, 1])
|
119 |
-
key = "metadata-creator-name"
|
120 |
-
col1.text_input(
|
121 |
-
label="Creator name",
|
122 |
-
key=key,
|
123 |
-
value=metadata.creator.name,
|
124 |
-
on_change=handle_metadata_change,
|
125 |
-
placeholder="A person or an organization",
|
126 |
-
args=(MetadataEvent.CREATOR_NAME, metadata, key),
|
127 |
-
)
|
128 |
-
key = "metadata-creator-url"
|
129 |
-
col2.text_input(
|
130 |
-
label="Creator URL",
|
131 |
-
key=key,
|
132 |
-
value=metadata.creator.url,
|
133 |
-
placeholder="https://mlcommons.org",
|
134 |
-
on_change=handle_metadata_change,
|
135 |
-
args=(MetadataEvent.CREATOR_URL, metadata, key),
|
136 |
-
)
|
137 |
-
key = "metadata-creator-remove"
|
138 |
-
col3.button(
|
139 |
-
"✖️",
|
140 |
-
key=key,
|
141 |
-
help="Remove the creator",
|
142 |
-
on_click=handle_metadata_change,
|
143 |
-
args=(MetadataEvent.CREATOR_REMOVE, metadata, key),
|
144 |
-
)
|
145 |
-
else:
|
146 |
-
key = "metadata-add-creator"
|
147 |
-
st.button(
|
148 |
-
label="✚ Add a creator",
|
149 |
-
key=key,
|
150 |
-
on_click=handle_metadata_change,
|
151 |
-
args=(MetadataEvent.CREATOR_ADD, metadata, key),
|
152 |
-
)
|
|
|
106 |
on_change=handle_metadata_change,
|
107 |
args=(MetadataEvent.CITATION, metadata, key),
|
108 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
views/overview.py
CHANGED
@@ -48,7 +48,7 @@ def render_overview():
|
|
48 |
col1, col2 = st.columns([1, 1], gap="medium")
|
49 |
with col1:
|
50 |
key = "metadata-name"
|
51 |
-
|
52 |
label=needed_field("Name"),
|
53 |
key=key,
|
54 |
value=metadata.name,
|
@@ -57,8 +57,6 @@ def render_overview():
|
|
57 |
on_change=handle_metadata_change,
|
58 |
args=(MetadataEvent.NAME, metadata, key),
|
59 |
)
|
60 |
-
if not name:
|
61 |
-
st.stop()
|
62 |
key = "metadata-description"
|
63 |
st.text_area(
|
64 |
label="Description",
|
|
|
48 |
col1, col2 = st.columns([1, 1], gap="medium")
|
49 |
with col1:
|
50 |
key = "metadata-name"
|
51 |
+
st.text_input(
|
52 |
label=needed_field("Name"),
|
53 |
key=key,
|
54 |
value=metadata.name,
|
|
|
57 |
on_change=handle_metadata_change,
|
58 |
args=(MetadataEvent.NAME, metadata, key),
|
59 |
)
|
|
|
|
|
60 |
key = "metadata-description"
|
61 |
st.text_area(
|
62 |
label="Description",
|
views/record_sets.py
CHANGED
@@ -375,7 +375,7 @@ def _render_left_panel():
|
|
375 |
result: _Result = _generate_data_with_timeout(record_set)
|
376 |
df, exception = result.get("df"), result.get("exception")
|
377 |
if exception is None and df is not None and not df.empty:
|
378 |
-
st.markdown("
|
379 |
st.dataframe(df, use_container_width=True)
|
380 |
# The generation is not triggered if record_set has in-line `data`.
|
381 |
elif not record_set.data:
|
|
|
375 |
result: _Result = _generate_data_with_timeout(record_set)
|
376 |
df, exception = result.get("df"), result.get("exception")
|
377 |
if exception is None and df is not None and not df.empty:
|
378 |
+
st.markdown("Preview the data:")
|
379 |
st.dataframe(df, use_container_width=True)
|
380 |
# The generation is not triggered if record_set has in-line `data`.
|
381 |
elif not record_set.data:
|
views/splash.py
CHANGED
@@ -37,7 +37,7 @@ an existing Croissant JSON-MD file. Finally, you can also select any of your
|
|
37 |
past projects from the list.
|
38 |
|
39 |
You can change the project you are currently editing at any time by clicking
|
40 |
-
the
|
41 |
|
42 |
|
43 |
def render_splash():
|
@@ -96,28 +96,6 @@ def render_splash():
|
|
96 |
type="primary",
|
97 |
args=(dataset,),
|
98 |
)
|
99 |
-
url = st.text_input(
|
100 |
-
label="Hugging Face dataset",
|
101 |
-
placeholder="Example: https://huggingface.co/datasets/mnist",
|
102 |
-
)
|
103 |
-
if url.startswith(_HUGGING_FACE_URL):
|
104 |
-
name = url.replace(_HUGGING_FACE_URL, "")
|
105 |
-
api_url = (
|
106 |
-
f"https://datasets-server.huggingface.co/croissant?dataset={name}"
|
107 |
-
)
|
108 |
-
json = requests.get(api_url, headers=None).json()
|
109 |
-
try:
|
110 |
-
metadata = mlc.Metadata.from_json(mlc.Issues(), json, None)
|
111 |
-
st.session_state[Metadata] = Metadata.from_canonical(metadata)
|
112 |
-
save_current_project()
|
113 |
-
st.rerun()
|
114 |
-
except Exception:
|
115 |
-
st.error(f"Malformed JSON: {json}")
|
116 |
-
elif url:
|
117 |
-
st.error(
|
118 |
-
f"Unknown URL {url}. Hugging Face URLS should look like"
|
119 |
-
f" {_HUGGING_FACE_URL}somedataset."
|
120 |
-
)
|
121 |
render_load()
|
122 |
with col2:
|
123 |
with st.expander("**Recent projects**", expanded=True):
|
|
|
37 |
past projects from the list.
|
38 |
|
39 |
You can change the project you are currently editing at any time by clicking
|
40 |
+
the Home button and then choosing one of the options on this page."""
|
41 |
|
42 |
|
43 |
def render_splash():
|
|
|
96 |
type="primary",
|
97 |
args=(dataset,),
|
98 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
99 |
render_load()
|
100 |
with col2:
|
101 |
with st.expander("**Recent projects**", expanded=True):
|