Spaces:
Runtime error
Runtime error
Commit
·
27df543
1
Parent(s):
eb37af6
Push to HF space
Browse files- app.py +281 -0
- requirements.txt +0 -0
app.py
ADDED
@@ -0,0 +1,281 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""Streamlit UI."""
|
2 |
+
|
3 |
+
from typing import Literal, Optional, Union
|
4 |
+
|
5 |
+
import lilac as ll
|
6 |
+
import streamlit as st
|
7 |
+
from datasets import load_dataset_builder
|
8 |
+
|
9 |
+
# Increase the width of the form a little bit.
|
10 |
+
st.markdown(
|
11 |
+
"""
|
12 |
+
<style>
|
13 |
+
.block-container {
|
14 |
+
max-width: 54rem;
|
15 |
+
}
|
16 |
+
[data-testid="stSidebar"][aria-expanded="true"]{
|
17 |
+
min-width: 600px;
|
18 |
+
}
|
19 |
+
</style>
|
20 |
+
""",
|
21 |
+
unsafe_allow_html=True,
|
22 |
+
)
|
23 |
+
|
24 |
+
# There are only 2 pages in the app, choosing a dataset, choosing the space.
|
25 |
+
PAGES = ['dataset', 'space']
|
26 |
+
if 'current_page' not in st.session_state:
|
27 |
+
st.session_state.current_page = 'dataset'
|
28 |
+
|
29 |
+
|
30 |
+
is_valid_dataset = False
|
31 |
+
|
32 |
+
|
33 |
+
def _get_page():
|
34 |
+
return st.session_state.current_page
|
35 |
+
|
36 |
+
|
37 |
+
def _dataset_page():
|
38 |
+
global current_page, hf_dataset_name, hf_config_name, hf_split, sample_size, is_valid_dataset
|
39 |
+
st.header('Deploy a HuggingFace dataset to a space in Lilac 🌸', anchor=False)
|
40 |
+
st.subheader(
|
41 |
+
'Step 1: Choose a dataset',
|
42 |
+
divider='violet',
|
43 |
+
anchor=False,
|
44 |
+
help='For a list of datasets see: https://huggingface.co/datasets',
|
45 |
+
)
|
46 |
+
|
47 |
+
hf_dataset_name = st.text_input(
|
48 |
+
'HuggingFace dataset',
|
49 |
+
help='Either in the format `user/dataset` or `dataset`, for example: `Open-Orca/OpenOrca`',
|
50 |
+
placeholder='dataset or user/dataset',
|
51 |
+
value=st.session_state.get('hf_dataset_name', None),
|
52 |
+
)
|
53 |
+
hf_config_name = st.text_input(
|
54 |
+
'Config',
|
55 |
+
help='Some datasets required this field.',
|
56 |
+
placeholder='(optional)',
|
57 |
+
value=st.session_state.get('hf_config_name', None),
|
58 |
+
)
|
59 |
+
hf_split = st.text_input(
|
60 |
+
'Split',
|
61 |
+
help='Loads all splits by default.',
|
62 |
+
placeholder='(optional)',
|
63 |
+
value=st.session_state.get('hf_split', None),
|
64 |
+
)
|
65 |
+
sample_size = st.number_input(
|
66 |
+
'Sample size',
|
67 |
+
help='Number of rows to sample from the dataset, for each split.',
|
68 |
+
placeholder='(optional)',
|
69 |
+
min_value=1,
|
70 |
+
step=1,
|
71 |
+
key='sample_size',
|
72 |
+
value=st.session_state.get('sample_size', None),
|
73 |
+
)
|
74 |
+
hf_read_token = st.text_input(
|
75 |
+
'HuggingFace read access token',
|
76 |
+
type='password',
|
77 |
+
help='The access token is used to authenticate you with HuggingFace to read the dataset. '
|
78 |
+
'https://huggingface.co/docs/hub/security-tokens',
|
79 |
+
placeholder='(optional if dataset is public)',
|
80 |
+
)
|
81 |
+
|
82 |
+
def _next():
|
83 |
+
st.session_state.current_page = 'space'
|
84 |
+
st.session_state.hf_dataset_name = hf_dataset_name
|
85 |
+
st.session_state.hf_config_name = hf_config_name
|
86 |
+
st.session_state.hf_split = hf_split
|
87 |
+
st.session_state.sample_size = sample_size
|
88 |
+
|
89 |
+
def _next_button():
|
90 |
+
enabled = is_valid_dataset
|
91 |
+
return st.button('Next', disabled=not enabled, type='primary', on_click=_next)
|
92 |
+
|
93 |
+
ds_builder = None
|
94 |
+
if hf_dataset_name:
|
95 |
+
is_valid_dataset = False
|
96 |
+
try:
|
97 |
+
ds_builder = load_dataset_builder(hf_dataset_name, name=hf_config_name, token=hf_read_token)
|
98 |
+
is_valid_dataset = True
|
99 |
+
except Exception as e:
|
100 |
+
st.session_state.ds_error = e
|
101 |
+
st.session_state.ds_loaded = False
|
102 |
+
|
103 |
+
st.session_state.hf_dataset_name = hf_dataset_name
|
104 |
+
|
105 |
+
_next_button()
|
106 |
+
|
107 |
+
if ds_builder:
|
108 |
+
st.session_state.ds_loaded = True
|
109 |
+
st.session_state.ds_error = None
|
110 |
+
st.session_state.ds_dataset_name = ds_builder.info.dataset_name
|
111 |
+
st.session_state.ds_description = ds_builder.info.description
|
112 |
+
st.session_state.ds_features = ds_builder.info.features
|
113 |
+
st.session_state.ds_splits = ds_builder.info.splits
|
114 |
+
else:
|
115 |
+
st.session_state.ds_loaded = False
|
116 |
+
|
117 |
+
|
118 |
+
def _space_page():
|
119 |
+
session = dict(st.session_state)
|
120 |
+
|
121 |
+
def _back():
|
122 |
+
st.session_state.hf_space_name = hf_space_name
|
123 |
+
st.session_state.hf_storage = hf_storage
|
124 |
+
st.session_state.hf_access_token = hf_access_token
|
125 |
+
st.session_state.current_page = 'dataset'
|
126 |
+
|
127 |
+
hf_space_name = st.session_state.get('hf_space_name', None)
|
128 |
+
hf_storage = st.session_state.get('hf_storage', None)
|
129 |
+
hf_access_token = st.session_state.get('hf_access_token', None)
|
130 |
+
|
131 |
+
def _back_button():
|
132 |
+
return st.button('⬅ Back', on_click=_back)
|
133 |
+
|
134 |
+
_back_button()
|
135 |
+
st.subheader(
|
136 |
+
'Step 2: Choose HuggingFace Space settings',
|
137 |
+
divider='violet',
|
138 |
+
anchor=False,
|
139 |
+
help='See HuggingFace Spaces [documentation](https://huggingface.co/docs/hub/spaces-overview)',
|
140 |
+
)
|
141 |
+
if session['hf_config_name']:
|
142 |
+
st.write(f'Config: {session["hf_config_name"]}')
|
143 |
+
if st.session_state['hf_split']:
|
144 |
+
st.write(f'Split: {session["hf_split"]}')
|
145 |
+
if st.session_state.get('sample_size', None):
|
146 |
+
st.write(f'Sample size: {session["sample_size"]}')
|
147 |
+
|
148 |
+
st.write('##### HuggingFace space to create')
|
149 |
+
hf_space_name = st.text_input(
|
150 |
+
'HuggingFace space name',
|
151 |
+
help='This space will be created if it does not exist',
|
152 |
+
placeholder='org/name',
|
153 |
+
value=hf_space_name,
|
154 |
+
)
|
155 |
+
hf_access_token = st.text_input(
|
156 |
+
'HuggingFace write access token',
|
157 |
+
type='password',
|
158 |
+
help='The access token is used to authenticate you with HuggingFace to create the space. '
|
159 |
+
'https://huggingface.co/docs/hub/security-tokens',
|
160 |
+
value=hf_access_token,
|
161 |
+
)
|
162 |
+
storage_options = ['None', 'small', 'medium', 'large']
|
163 |
+
hf_storage = st.selectbox(
|
164 |
+
'Persistent storage',
|
165 |
+
['None', 'small', 'medium', 'large'],
|
166 |
+
help='You will get charged for persistent storage. See https://huggingface.co/docs/hub/spaces-storage',
|
167 |
+
index=storage_options.index(hf_storage if hf_storage else 'None'),
|
168 |
+
)
|
169 |
+
# public_space = st.checkbox(
|
170 |
+
# 'Make space public',
|
171 |
+
# help='If checked, your space will be made publicly visible.',
|
172 |
+
# value=public_space,
|
173 |
+
# )
|
174 |
+
|
175 |
+
deploy_pressed = False
|
176 |
+
|
177 |
+
def _deploy_button():
|
178 |
+
enabled = hf_access_token and hf_space_name
|
179 |
+
return st.button('Deploy', disabled=not enabled, on_click=_deploy)
|
180 |
+
|
181 |
+
print('pressed=', deploy_pressed)
|
182 |
+
|
183 |
+
def _deploy():
|
184 |
+
hf_dataset_name = st.session_state['hf_dataset_name']
|
185 |
+
assert hf_space_name and hf_access_token and hf_dataset_name
|
186 |
+
|
187 |
+
hf_config_name = st.session_state.get('hf_config_name', None)
|
188 |
+
hf_split = st.session_state.get('hf_split', None)
|
189 |
+
sample_size = st.session_state.get('sample_size', None)
|
190 |
+
public_space = st.session_state.get('public_space', False)
|
191 |
+
|
192 |
+
hf_space_storage: Optional[Union[Literal['small'], Literal['medium'], Literal['large']]]
|
193 |
+
if hf_storage == 'None':
|
194 |
+
hf_space_storage = None
|
195 |
+
else:
|
196 |
+
assert hf_storage == 'small' or hf_storage == 'medium' or hf_storage == 'large'
|
197 |
+
hf_space_storage = hf_storage
|
198 |
+
|
199 |
+
try:
|
200 |
+
space_link = ll.deploy_config(
|
201 |
+
hf_space=hf_space_name,
|
202 |
+
create_space=True,
|
203 |
+
hf_space_storage=hf_space_storage,
|
204 |
+
config=ll.Config(
|
205 |
+
datasets=[
|
206 |
+
ll.DatasetConfig(
|
207 |
+
namespace='local',
|
208 |
+
name=hf_dataset_name.replace('/', '_'),
|
209 |
+
source=ll.HuggingFaceSource(
|
210 |
+
dataset_name=hf_dataset_name,
|
211 |
+
config_name=hf_config_name,
|
212 |
+
split=hf_split,
|
213 |
+
sample_size=int(sample_size) if sample_size else None,
|
214 |
+
token=hf_access_token,
|
215 |
+
),
|
216 |
+
)
|
217 |
+
]
|
218 |
+
),
|
219 |
+
hf_token=hf_access_token,
|
220 |
+
)
|
221 |
+
st.session_state.space_link = space_link
|
222 |
+
# print('got space link from python call:', )
|
223 |
+
st.session_state.current_page = 'success'
|
224 |
+
except Exception as e:
|
225 |
+
st.subheader('Deployment failed!', divider='red')
|
226 |
+
st.error(e)
|
227 |
+
|
228 |
+
deployed = _deploy_button()
|
229 |
+
if deployed:
|
230 |
+
deploy_pressed = True
|
231 |
+
|
232 |
+
|
233 |
+
def _success_page():
|
234 |
+
hf_dataset_name = st.session_state['hf_dataset_name']
|
235 |
+
space_link = st.session_state.space_link
|
236 |
+
|
237 |
+
st.subheader('Success!', divider='green')
|
238 |
+
st.subheader(f'[Visit your HuggingFace space ↗]({space_link})')
|
239 |
+
st.write(
|
240 |
+
'Spaces are private by default. '
|
241 |
+
f'To make them public, visit the [Space settings]({space_link}/settings). '
|
242 |
+
)
|
243 |
+
|
244 |
+
|
245 |
+
if _get_page() == 'dataset':
|
246 |
+
_dataset_page()
|
247 |
+
elif _get_page() == 'space':
|
248 |
+
_space_page()
|
249 |
+
elif _get_page() == 'success':
|
250 |
+
_success_page()
|
251 |
+
|
252 |
+
|
253 |
+
dataset_name = st.session_state.get('ds_dataset_name', None) or st.session_state.get(
|
254 |
+
'hf_dataset_name', None
|
255 |
+
)
|
256 |
+
if st.session_state.get('ds_loaded', False):
|
257 |
+
st.sidebar.write('# HuggingFace dataset')
|
258 |
+
|
259 |
+
st.sidebar.header(
|
260 |
+
f'[{dataset_name}](https://huggingface.co/datasets/{dataset_name})',
|
261 |
+
divider='rainbow',
|
262 |
+
anchor=False,
|
263 |
+
)
|
264 |
+
|
265 |
+
st.sidebar.write(st.session_state.get('ds_description', None))
|
266 |
+
|
267 |
+
st.sidebar.write('##### Features')
|
268 |
+
st.sidebar.table(st.session_state.get('ds_features', {}))
|
269 |
+
|
270 |
+
st.sidebar.write('##### Splits')
|
271 |
+
st.sidebar.table(st.session_state.get('ds_splits', {}))
|
272 |
+
else:
|
273 |
+
if st.session_state.get('ds_error', None):
|
274 |
+
st.sidebar.subheader(f'Error loading `{dataset_name}`', divider='red', anchor=False)
|
275 |
+
st.sidebar.error(st.session_state.get('ds_error', None))
|
276 |
+
st.sidebar.write(
|
277 |
+
'If the dataset is private, make sure to enter a HuggingFace '
|
278 |
+
'token that has access to the dataset.'
|
279 |
+
)
|
280 |
+
else:
|
281 |
+
st.sidebar.write('Choose a dataset to see more info..')
|
requirements.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|