Spaces:
Runtime error
Runtime error
"""Lilac deployer streamlit UI. | |
This powers: https://huggingface.co/spaces/lilacai/lilac_deployer | |
""" | |
from typing import Literal, Optional, Union | |
import lilac as ll | |
import streamlit as st | |
from datasets import load_dataset_builder | |
if 'current_page' not in st.session_state: | |
st.session_state.current_page = 'dataset' | |
query_params = st.experimental_get_query_params() | |
if 'dataset' in query_params: | |
st.session_state.hf_dataset_name = query_params['dataset'][0] | |
def _dataset_page(): | |
is_valid_dataset = False | |
st.header('Deploy Lilac for a HuggingFace dataset to a space', anchor=False) | |
st.subheader( | |
'Step 1: select a dataset', | |
divider='violet', | |
anchor=False, | |
help='For a list of datasets see: https://huggingface.co/datasets', | |
) | |
hf_dataset_name = st.text_input( | |
'dataset id', | |
help='Either in the format `user/dataset` or `dataset`, for example: `Open-Orca/OpenOrca`', | |
placeholder='dataset or user/dataset', | |
value=st.session_state.get('hf_dataset_name', None), | |
) | |
with st.expander('advanced options'): | |
hf_config_name = st.text_input( | |
'config', | |
help='Some datasets required this field.', | |
placeholder='(optional)', | |
value=st.session_state.get('hf_config_name', None), | |
) | |
hf_split = st.text_input( | |
'split', | |
help='Loads all splits by default.', | |
placeholder='(optional)', | |
value=st.session_state.get('hf_split', None), | |
) | |
sample_size = st.number_input( | |
'sample size', | |
help='Number of rows to sample from the dataset, for each split.', | |
placeholder='(optional)', | |
min_value=1, | |
step=1, | |
key='sample_size', | |
value=st.session_state.get('sample_size', None), | |
) | |
hf_read_token = st.text_input( | |
'huggingface [read token](https://huggingface.co/settings/tokens)', | |
type='password', | |
help='The access token is used to authenticate you with HuggingFace to read the dataset. ' | |
'https://huggingface.co/docs/hub/security-tokens', | |
placeholder='(optional if dataset is public)', | |
) | |
def _next(): | |
st.session_state.current_page = 'space' | |
st.session_state.hf_dataset_name = hf_dataset_name | |
st.session_state.hf_config_name = hf_config_name | |
st.session_state.hf_split = hf_split | |
st.session_state.sample_size = sample_size | |
def _next_button(): | |
enabled = is_valid_dataset | |
return st.button('Next', disabled=not enabled, type='primary', on_click=_next) | |
ds_builder = None | |
if hf_dataset_name: | |
is_valid_dataset = False | |
try: | |
ds_builder = load_dataset_builder(hf_dataset_name, name=hf_config_name, token=hf_read_token) | |
is_valid_dataset = True | |
except Exception as e: | |
st.session_state.ds_error = e | |
st.session_state.ds_loaded = False | |
st.session_state.hf_dataset_name = hf_dataset_name | |
_next_button() | |
if ds_builder: | |
st.session_state.ds_loaded = True | |
st.session_state.ds_error = None | |
st.session_state.ds_dataset_name = hf_dataset_name | |
st.session_state.ds_description = ds_builder.info.description | |
st.session_state.ds_features = ds_builder.info.features | |
st.session_state.ds_splits = ds_builder.info.splits | |
else: | |
st.session_state.ds_loaded = False | |
def _space_page(): | |
session = dict(st.session_state) | |
def _back(): | |
st.session_state.hf_space_name = hf_space_name | |
st.session_state.hf_storage = hf_storage | |
st.session_state.hf_access_token = hf_access_token | |
st.session_state.current_page = 'dataset' | |
hf_space_name = st.session_state.get('hf_space_name', None) | |
hf_storage = st.session_state.get('hf_storage', None) | |
hf_access_token = st.session_state.get('hf_access_token', None) | |
def _back_button(): | |
return st.button('⬅ Back', on_click=_back) | |
_back_button() | |
st.subheader( | |
'Step 2: create huggingface space', | |
divider='violet', | |
anchor=False, | |
help='See HuggingFace Spaces [documentation](https://huggingface.co/docs/hub/spaces-overview)', | |
) | |
if session.get('hf_config_name', None): | |
st.write(f'Config: {session["hf_config_name"]}') | |
if st.session_state.get('hf_split', None): | |
st.write(f'Split: {session["hf_split"]}') | |
if st.session_state.get('sample_size', None): | |
st.write(f'Sample size: {session["sample_size"]}') | |
hf_space_name = st.text_input( | |
'space id', | |
help='This space will be created if it does not exist', | |
placeholder='org/name', | |
value=hf_space_name, | |
) | |
hf_access_token = st.text_input( | |
'huggingface [write token](https://huggingface.co/settings/tokens)', | |
type='password', | |
help='The access token is used to authenticate you with HuggingFace to create the space. ' | |
'https://huggingface.co/docs/hub/security-tokens', | |
value=hf_access_token, | |
) | |
storage_options = ['None', 'small', 'medium', 'large'] | |
hf_storage = st.selectbox( | |
'persistent storage', | |
['None', 'small', 'medium', 'large'], | |
help='Persistent storage is required if you want data to persist past the lifetime of the ' | |
'space docker image. This is recommended when running computations like signals or embeddings,' | |
'or if you want labels to persist. You will get charged for persistent storage. See ' | |
'https://huggingface.co/docs/hub/spaces-storage', | |
index=storage_options.index(hf_storage if hf_storage else 'None'), | |
) | |
def _deploy_button(): | |
enabled = hf_access_token and hf_space_name | |
return st.button('Deploy', disabled=not enabled, on_click=_deploy) | |
def _deploy(): | |
hf_dataset_name = st.session_state['hf_dataset_name'] | |
assert hf_space_name and hf_access_token and hf_dataset_name | |
hf_config_name = st.session_state.get('hf_config_name', None) | |
hf_split = st.session_state.get('hf_split', None) | |
sample_size = st.session_state.get('sample_size', None) | |
hf_space_storage: Optional[Union[Literal['small'], Literal['medium'], Literal['large']]] | |
if hf_storage == 'None': | |
hf_space_storage = None | |
else: | |
assert hf_storage == 'small' or hf_storage == 'medium' or hf_storage == 'large' | |
hf_space_storage = hf_storage | |
try: | |
space_link = ll.deploy_config( | |
hf_space=hf_space_name, | |
create_space=True, | |
hf_space_storage=hf_space_storage, | |
config=ll.Config( | |
datasets=[ | |
ll.DatasetConfig( | |
namespace='local', | |
name=hf_dataset_name.replace('/', '_'), | |
source=ll.HuggingFaceSource( | |
dataset_name=hf_dataset_name, | |
config_name=hf_config_name, | |
split=hf_split, | |
sample_size=int(sample_size) if sample_size else None, | |
token=hf_access_token, | |
), | |
) | |
] | |
), | |
hf_token=hf_access_token, | |
) | |
st.session_state.space_link = space_link | |
st.session_state.current_page = 'success' | |
except Exception as e: | |
st.subheader('Deployment failed!', divider='red') | |
st.error(e) | |
_deploy_button() | |
def _success_page(): | |
space_link = st.session_state.space_link | |
st.subheader('Success!', divider='green') | |
st.subheader(f'[Visit your HuggingFace space ↗]({space_link})') | |
st.write( | |
'Spaces are private by default. ' | |
f'To make them public, visit the [Space settings]({space_link}/settings). ' | |
) | |
if st.session_state.current_page == 'dataset': | |
_dataset_page() | |
elif st.session_state.current_page == 'space': | |
_space_page() | |
elif st.session_state.current_page == 'success': | |
_success_page() | |
# Sidebar content. | |
dataset_name = st.session_state.get('ds_dataset_name', None) or st.session_state.get( | |
'hf_dataset_name', None | |
) | |
if st.session_state.get('ds_loaded', False): | |
st.sidebar.header( | |
f'[{dataset_name}](https://huggingface.co/datasets/{dataset_name})', | |
divider='rainbow', | |
anchor=False, | |
help='Dataset information from HuggingFace datasets.', | |
) | |
st.sidebar.write(st.session_state.get('ds_description', None)) | |
st.sidebar.write('##### Features') | |
st.sidebar.table(st.session_state.get('ds_features', {})) | |
st.sidebar.write('##### Splits') | |
st.sidebar.table(st.session_state.get('ds_splits', {})) | |
else: | |
if st.session_state.get('ds_error', None): | |
st.sidebar.subheader(f'Error loading `{dataset_name}`', divider='red', anchor=False) | |
st.sidebar.error(st.session_state.get('ds_error', None)) | |
st.sidebar.write( | |
'If the dataset is private, make sure to enter a HuggingFace ' | |
'token that has access to the dataset.' | |
) | |
else: | |
st.sidebar.write('Choose a dataset to see more info..') | |