nsthorat-lilac commited on
Commit
27df543
·
1 Parent(s): eb37af6

Push to HF space

Browse files
Files changed (2) hide show
  1. app.py +281 -0
  2. requirements.txt +0 -0
app.py ADDED
@@ -0,0 +1,281 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Streamlit UI."""
2
+
3
+ from typing import Literal, Optional, Union
4
+
5
+ import lilac as ll
6
+ import streamlit as st
7
+ from datasets import load_dataset_builder
8
+
9
+ # Increase the width of the form a little bit.
10
+ st.markdown(
11
+ """
12
+ <style>
13
+ .block-container {
14
+ max-width: 54rem;
15
+ }
16
+ [data-testid="stSidebar"][aria-expanded="true"]{
17
+ min-width: 600px;
18
+ }
19
+ </style>
20
+ """,
21
+ unsafe_allow_html=True,
22
+ )
23
+
24
+ # There are only 2 pages in the app, choosing a dataset, choosing the space.
25
+ PAGES = ['dataset', 'space']
26
+ if 'current_page' not in st.session_state:
27
+ st.session_state.current_page = 'dataset'
28
+
29
+
30
+ is_valid_dataset = False
31
+
32
+
33
+ def _get_page():
34
+ return st.session_state.current_page
35
+
36
+
37
+ def _dataset_page():
38
+ global current_page, hf_dataset_name, hf_config_name, hf_split, sample_size, is_valid_dataset
39
+ st.header('Deploy a HuggingFace dataset to a space in Lilac 🌸', anchor=False)
40
+ st.subheader(
41
+ 'Step 1: Choose a dataset',
42
+ divider='violet',
43
+ anchor=False,
44
+ help='For a list of datasets see: https://huggingface.co/datasets',
45
+ )
46
+
47
+ hf_dataset_name = st.text_input(
48
+ 'HuggingFace dataset',
49
+ help='Either in the format `user/dataset` or `dataset`, for example: `Open-Orca/OpenOrca`',
50
+ placeholder='dataset or user/dataset',
51
+ value=st.session_state.get('hf_dataset_name', None),
52
+ )
53
+ hf_config_name = st.text_input(
54
+ 'Config',
55
+ help='Some datasets required this field.',
56
+ placeholder='(optional)',
57
+ value=st.session_state.get('hf_config_name', None),
58
+ )
59
+ hf_split = st.text_input(
60
+ 'Split',
61
+ help='Loads all splits by default.',
62
+ placeholder='(optional)',
63
+ value=st.session_state.get('hf_split', None),
64
+ )
65
+ sample_size = st.number_input(
66
+ 'Sample size',
67
+ help='Number of rows to sample from the dataset, for each split.',
68
+ placeholder='(optional)',
69
+ min_value=1,
70
+ step=1,
71
+ key='sample_size',
72
+ value=st.session_state.get('sample_size', None),
73
+ )
74
+ hf_read_token = st.text_input(
75
+ 'HuggingFace read access token',
76
+ type='password',
77
+ help='The access token is used to authenticate you with HuggingFace to read the dataset. '
78
+ 'https://huggingface.co/docs/hub/security-tokens',
79
+ placeholder='(optional if dataset is public)',
80
+ )
81
+
82
+ def _next():
83
+ st.session_state.current_page = 'space'
84
+ st.session_state.hf_dataset_name = hf_dataset_name
85
+ st.session_state.hf_config_name = hf_config_name
86
+ st.session_state.hf_split = hf_split
87
+ st.session_state.sample_size = sample_size
88
+
89
+ def _next_button():
90
+ enabled = is_valid_dataset
91
+ return st.button('Next', disabled=not enabled, type='primary', on_click=_next)
92
+
93
+ ds_builder = None
94
+ if hf_dataset_name:
95
+ is_valid_dataset = False
96
+ try:
97
+ ds_builder = load_dataset_builder(hf_dataset_name, name=hf_config_name, token=hf_read_token)
98
+ is_valid_dataset = True
99
+ except Exception as e:
100
+ st.session_state.ds_error = e
101
+ st.session_state.ds_loaded = False
102
+
103
+ st.session_state.hf_dataset_name = hf_dataset_name
104
+
105
+ _next_button()
106
+
107
+ if ds_builder:
108
+ st.session_state.ds_loaded = True
109
+ st.session_state.ds_error = None
110
+ st.session_state.ds_dataset_name = ds_builder.info.dataset_name
111
+ st.session_state.ds_description = ds_builder.info.description
112
+ st.session_state.ds_features = ds_builder.info.features
113
+ st.session_state.ds_splits = ds_builder.info.splits
114
+ else:
115
+ st.session_state.ds_loaded = False
116
+
117
+
118
+ def _space_page():
119
+ session = dict(st.session_state)
120
+
121
+ def _back():
122
+ st.session_state.hf_space_name = hf_space_name
123
+ st.session_state.hf_storage = hf_storage
124
+ st.session_state.hf_access_token = hf_access_token
125
+ st.session_state.current_page = 'dataset'
126
+
127
+ hf_space_name = st.session_state.get('hf_space_name', None)
128
+ hf_storage = st.session_state.get('hf_storage', None)
129
+ hf_access_token = st.session_state.get('hf_access_token', None)
130
+
131
+ def _back_button():
132
+ return st.button('⬅ Back', on_click=_back)
133
+
134
+ _back_button()
135
+ st.subheader(
136
+ 'Step 2: Choose HuggingFace Space settings',
137
+ divider='violet',
138
+ anchor=False,
139
+ help='See HuggingFace Spaces [documentation](https://huggingface.co/docs/hub/spaces-overview)',
140
+ )
141
+ if session['hf_config_name']:
142
+ st.write(f'Config: {session["hf_config_name"]}')
143
+ if st.session_state['hf_split']:
144
+ st.write(f'Split: {session["hf_split"]}')
145
+ if st.session_state.get('sample_size', None):
146
+ st.write(f'Sample size: {session["sample_size"]}')
147
+
148
+ st.write('##### HuggingFace space to create')
149
+ hf_space_name = st.text_input(
150
+ 'HuggingFace space name',
151
+ help='This space will be created if it does not exist',
152
+ placeholder='org/name',
153
+ value=hf_space_name,
154
+ )
155
+ hf_access_token = st.text_input(
156
+ 'HuggingFace write access token',
157
+ type='password',
158
+ help='The access token is used to authenticate you with HuggingFace to create the space. '
159
+ 'https://huggingface.co/docs/hub/security-tokens',
160
+ value=hf_access_token,
161
+ )
162
+ storage_options = ['None', 'small', 'medium', 'large']
163
+ hf_storage = st.selectbox(
164
+ 'Persistent storage',
165
+ ['None', 'small', 'medium', 'large'],
166
+ help='You will get charged for persistent storage. See https://huggingface.co/docs/hub/spaces-storage',
167
+ index=storage_options.index(hf_storage if hf_storage else 'None'),
168
+ )
169
+ # public_space = st.checkbox(
170
+ # 'Make space public',
171
+ # help='If checked, your space will be made publicly visible.',
172
+ # value=public_space,
173
+ # )
174
+
175
+ deploy_pressed = False
176
+
177
+ def _deploy_button():
178
+ enabled = hf_access_token and hf_space_name
179
+ return st.button('Deploy', disabled=not enabled, on_click=_deploy)
180
+
181
+ print('pressed=', deploy_pressed)
182
+
183
+ def _deploy():
184
+ hf_dataset_name = st.session_state['hf_dataset_name']
185
+ assert hf_space_name and hf_access_token and hf_dataset_name
186
+
187
+ hf_config_name = st.session_state.get('hf_config_name', None)
188
+ hf_split = st.session_state.get('hf_split', None)
189
+ sample_size = st.session_state.get('sample_size', None)
190
+ public_space = st.session_state.get('public_space', False)
191
+
192
+ hf_space_storage: Optional[Union[Literal['small'], Literal['medium'], Literal['large']]]
193
+ if hf_storage == 'None':
194
+ hf_space_storage = None
195
+ else:
196
+ assert hf_storage == 'small' or hf_storage == 'medium' or hf_storage == 'large'
197
+ hf_space_storage = hf_storage
198
+
199
+ try:
200
+ space_link = ll.deploy_config(
201
+ hf_space=hf_space_name,
202
+ create_space=True,
203
+ hf_space_storage=hf_space_storage,
204
+ config=ll.Config(
205
+ datasets=[
206
+ ll.DatasetConfig(
207
+ namespace='local',
208
+ name=hf_dataset_name.replace('/', '_'),
209
+ source=ll.HuggingFaceSource(
210
+ dataset_name=hf_dataset_name,
211
+ config_name=hf_config_name,
212
+ split=hf_split,
213
+ sample_size=int(sample_size) if sample_size else None,
214
+ token=hf_access_token,
215
+ ),
216
+ )
217
+ ]
218
+ ),
219
+ hf_token=hf_access_token,
220
+ )
221
+ st.session_state.space_link = space_link
222
+ # print('got space link from python call:', )
223
+ st.session_state.current_page = 'success'
224
+ except Exception as e:
225
+ st.subheader('Deployment failed!', divider='red')
226
+ st.error(e)
227
+
228
+ deployed = _deploy_button()
229
+ if deployed:
230
+ deploy_pressed = True
231
+
232
+
233
+ def _success_page():
234
+ hf_dataset_name = st.session_state['hf_dataset_name']
235
+ space_link = st.session_state.space_link
236
+
237
+ st.subheader('Success!', divider='green')
238
+ st.subheader(f'[Visit your HuggingFace space ↗]({space_link})')
239
+ st.write(
240
+ 'Spaces are private by default. '
241
+ f'To make them public, visit the [Space settings]({space_link}/settings). '
242
+ )
243
+
244
+
245
+ if _get_page() == 'dataset':
246
+ _dataset_page()
247
+ elif _get_page() == 'space':
248
+ _space_page()
249
+ elif _get_page() == 'success':
250
+ _success_page()
251
+
252
+
253
+ dataset_name = st.session_state.get('ds_dataset_name', None) or st.session_state.get(
254
+ 'hf_dataset_name', None
255
+ )
256
+ if st.session_state.get('ds_loaded', False):
257
+ st.sidebar.write('# HuggingFace dataset')
258
+
259
+ st.sidebar.header(
260
+ f'[{dataset_name}](https://huggingface.co/datasets/{dataset_name})',
261
+ divider='rainbow',
262
+ anchor=False,
263
+ )
264
+
265
+ st.sidebar.write(st.session_state.get('ds_description', None))
266
+
267
+ st.sidebar.write('##### Features')
268
+ st.sidebar.table(st.session_state.get('ds_features', {}))
269
+
270
+ st.sidebar.write('##### Splits')
271
+ st.sidebar.table(st.session_state.get('ds_splits', {}))
272
+ else:
273
+ if st.session_state.get('ds_error', None):
274
+ st.sidebar.subheader(f'Error loading `{dataset_name}`', divider='red', anchor=False)
275
+ st.sidebar.error(st.session_state.get('ds_error', None))
276
+ st.sidebar.write(
277
+ 'If the dataset is private, make sure to enter a HuggingFace '
278
+ 'token that has access to the dataset.'
279
+ )
280
+ else:
281
+ st.sidebar.write('Choose a dataset to see more info..')
requirements.txt ADDED
The diff for this file is too large to render. See raw diff