lilac_deployer / app.py
nsthorat-lilac's picture
Push to HF space
7a5c0ef
"""Lilac deployer streamlit UI.
This powers: https://huggingface.co/spaces/lilacai/lilac_deployer
"""
from typing import Literal, Optional, Union
import lilac as ll
import streamlit as st
from datasets import load_dataset_builder
if 'current_page' not in st.session_state:
st.session_state.current_page = 'dataset'
query_params = st.experimental_get_query_params()
if 'dataset' in query_params:
st.session_state.hf_dataset_name = query_params['dataset'][0]
def _dataset_page():
is_valid_dataset = False
st.header('Deploy Lilac for a HuggingFace dataset to a space', anchor=False)
st.subheader(
'Step 1: select a dataset',
divider='violet',
anchor=False,
help='For a list of datasets see: https://huggingface.co/datasets',
)
hf_dataset_name = st.text_input(
'dataset id',
help='Either in the format `user/dataset` or `dataset`, for example: `Open-Orca/OpenOrca`',
placeholder='dataset or user/dataset',
value=st.session_state.get('hf_dataset_name', None),
)
with st.expander('advanced options'):
hf_config_name = st.text_input(
'config',
help='Some datasets required this field.',
placeholder='(optional)',
value=st.session_state.get('hf_config_name', None),
)
hf_split = st.text_input(
'split',
help='Loads all splits by default.',
placeholder='(optional)',
value=st.session_state.get('hf_split', None),
)
sample_size = st.number_input(
'sample size',
help='Number of rows to sample from the dataset, for each split.',
placeholder='(optional)',
min_value=1,
step=1,
key='sample_size',
value=st.session_state.get('sample_size', None),
)
hf_read_token = st.text_input(
'huggingface [read token](https://huggingface.co/settings/tokens)',
type='password',
help='The access token is used to authenticate you with HuggingFace to read the dataset. '
'https://huggingface.co/docs/hub/security-tokens',
placeholder='(optional if dataset is public)',
)
def _next():
st.session_state.current_page = 'space'
st.session_state.hf_dataset_name = hf_dataset_name
st.session_state.hf_config_name = hf_config_name
st.session_state.hf_split = hf_split
st.session_state.sample_size = sample_size
def _next_button():
enabled = is_valid_dataset
return st.button('Next', disabled=not enabled, type='primary', on_click=_next)
ds_builder = None
if hf_dataset_name:
is_valid_dataset = False
try:
ds_builder = load_dataset_builder(hf_dataset_name, name=hf_config_name, token=hf_read_token)
is_valid_dataset = True
except Exception as e:
st.session_state.ds_error = e
st.session_state.ds_loaded = False
st.session_state.hf_dataset_name = hf_dataset_name
_next_button()
if ds_builder:
st.session_state.ds_loaded = True
st.session_state.ds_error = None
st.session_state.ds_dataset_name = hf_dataset_name
st.session_state.ds_description = ds_builder.info.description
st.session_state.ds_features = ds_builder.info.features
st.session_state.ds_splits = ds_builder.info.splits
else:
st.session_state.ds_loaded = False
def _space_page():
session = dict(st.session_state)
def _back():
st.session_state.hf_space_name = hf_space_name
st.session_state.hf_storage = hf_storage
st.session_state.hf_access_token = hf_access_token
st.session_state.current_page = 'dataset'
hf_space_name = st.session_state.get('hf_space_name', None)
hf_storage = st.session_state.get('hf_storage', None)
hf_access_token = st.session_state.get('hf_access_token', None)
def _back_button():
return st.button('⬅ Back', on_click=_back)
_back_button()
st.subheader(
'Step 2: create huggingface space',
divider='violet',
anchor=False,
help='See HuggingFace Spaces [documentation](https://huggingface.co/docs/hub/spaces-overview)',
)
if session.get('hf_config_name', None):
st.write(f'Config: {session["hf_config_name"]}')
if st.session_state.get('hf_split', None):
st.write(f'Split: {session["hf_split"]}')
if st.session_state.get('sample_size', None):
st.write(f'Sample size: {session["sample_size"]}')
hf_space_name = st.text_input(
'space id',
help='This space will be created if it does not exist',
placeholder='org/name',
value=hf_space_name,
)
hf_access_token = st.text_input(
'huggingface [write token](https://huggingface.co/settings/tokens)',
type='password',
help='The access token is used to authenticate you with HuggingFace to create the space. '
'https://huggingface.co/docs/hub/security-tokens',
value=hf_access_token,
)
storage_options = ['None', 'small', 'medium', 'large']
hf_storage = st.selectbox(
'persistent storage',
['None', 'small', 'medium', 'large'],
help='Persistent storage is required if you want data to persist past the lifetime of the '
'space docker image. This is recommended when running computations like signals or embeddings,'
'or if you want labels to persist. You will get charged for persistent storage. See '
'https://huggingface.co/docs/hub/spaces-storage',
index=storage_options.index(hf_storage if hf_storage else 'None'),
)
def _deploy_button():
enabled = hf_access_token and hf_space_name
return st.button('Deploy', disabled=not enabled, on_click=_deploy)
def _deploy():
hf_dataset_name = st.session_state['hf_dataset_name']
assert hf_space_name and hf_access_token and hf_dataset_name
hf_config_name = st.session_state.get('hf_config_name', None)
hf_split = st.session_state.get('hf_split', None)
sample_size = st.session_state.get('sample_size', None)
hf_space_storage: Optional[Union[Literal['small'], Literal['medium'], Literal['large']]]
if hf_storage == 'None':
hf_space_storage = None
else:
assert hf_storage == 'small' or hf_storage == 'medium' or hf_storage == 'large'
hf_space_storage = hf_storage
try:
space_link = ll.deploy_config(
hf_space=hf_space_name,
create_space=True,
hf_space_storage=hf_space_storage,
config=ll.Config(
datasets=[
ll.DatasetConfig(
namespace='local',
name=hf_dataset_name.replace('/', '_'),
source=ll.HuggingFaceSource(
dataset_name=hf_dataset_name,
config_name=hf_config_name,
split=hf_split,
sample_size=int(sample_size) if sample_size else None,
token=hf_access_token,
),
)
]
),
hf_token=hf_access_token,
)
st.session_state.space_link = space_link
st.session_state.current_page = 'success'
except Exception as e:
st.subheader('Deployment failed!', divider='red')
st.error(e)
_deploy_button()
def _success_page():
space_link = st.session_state.space_link
st.subheader('Success!', divider='green')
st.subheader(f'[Visit your HuggingFace space ↗]({space_link})')
st.write(
'Spaces are private by default. '
f'To make them public, visit the [Space settings]({space_link}/settings). '
)
if st.session_state.current_page == 'dataset':
_dataset_page()
elif st.session_state.current_page == 'space':
_space_page()
elif st.session_state.current_page == 'success':
_success_page()
# Sidebar content.
dataset_name = st.session_state.get('ds_dataset_name', None) or st.session_state.get(
'hf_dataset_name', None
)
if st.session_state.get('ds_loaded', False):
st.sidebar.header(
f'[{dataset_name}](https://huggingface.co/datasets/{dataset_name})',
divider='rainbow',
anchor=False,
help='Dataset information from HuggingFace datasets.',
)
st.sidebar.write(st.session_state.get('ds_description', None))
st.sidebar.write('##### Features')
st.sidebar.table(st.session_state.get('ds_features', {}))
st.sidebar.write('##### Splits')
st.sidebar.table(st.session_state.get('ds_splits', {}))
else:
if st.session_state.get('ds_error', None):
st.sidebar.subheader(f'Error loading `{dataset_name}`', divider='red', anchor=False)
st.sidebar.error(st.session_state.get('ds_error', None))
st.sidebar.write(
'If the dataset is private, make sure to enter a HuggingFace '
'token that has access to the dataset.'
)
else:
st.sidebar.write('Choose a dataset to see more info..')