Spaces:

lilacai
/

lilac_deployer

Runtime error

File size: 8,496 Bytes

"""Lilac deployer streamlit UI.

This powers: https://huggingface.co/spaces/lilacai/lilac_deployer
"""

from typing import Literal, Optional, Union

import lilac as ll
import streamlit as st
from datasets import load_dataset_builder

if 'current_page' not in st.session_state:
  st.session_state.current_page = 'dataset'

query_params = st.experimental_get_query_params()
if 'dataset' in query_params:
  st.session_state.hf_dataset_name = query_params['dataset'][0]


def _dataset_page():
  is_valid_dataset = False

  st.header('Deploy Lilac for a HuggingFace dataset to a space', anchor=False)
  st.subheader(
    'Step 1: select a dataset',
    divider='violet',
    anchor=False,
    help='For a list of datasets see: https://huggingface.co/datasets',
  )
  hf_dataset_name = st.text_input(
    'dataset id',
    help='Either in the format `user/dataset` or `dataset`, for example: `Open-Orca/OpenOrca`',
    placeholder='dataset or user/dataset',
    value=st.session_state.get('hf_dataset_name', None),
  )
  with st.expander('advanced options'):
    hf_config_name = st.text_input(
      'config',
      help='Some datasets required this field.',
      placeholder='(optional)',
      value=st.session_state.get('hf_config_name', None),
    )
    hf_split = st.text_input(
      'split',
      help='Loads all splits by default.',
      placeholder='(optional)',
      value=st.session_state.get('hf_split', None),
    )
    sample_size = st.number_input(
      'sample size',
      help='Number of rows to sample from the dataset, for each split.',
      placeholder='(optional)',
      min_value=1,
      step=1,
      key='sample_size',
      value=st.session_state.get('sample_size', None),
    )
    hf_read_token = st.text_input(
      'huggingface [read token](https://huggingface.co/settings/tokens)',
      type='password',
      help='The access token is used to authenticate you with HuggingFace to read the dataset. '
      'https://huggingface.co/docs/hub/security-tokens',
      placeholder='(optional if dataset is public)',
    )

  def _next():
    st.session_state.current_page = 'space'
    st.session_state.hf_dataset_name = hf_dataset_name
    st.session_state.hf_config_name = hf_config_name
    st.session_state.hf_split = hf_split
    st.session_state.sample_size = sample_size

  def _next_button():
    enabled = is_valid_dataset
    return st.button('Next', disabled=not enabled, type='primary', on_click=_next)

  ds_builder = None
  if hf_dataset_name:
    is_valid_dataset = False
    try:
      ds_builder = load_dataset_builder(hf_dataset_name, name=hf_config_name, token=hf_read_token)
      is_valid_dataset = True
    except Exception as e:
      st.session_state.ds_error = e
      st.session_state.ds_loaded = False

  st.session_state.hf_dataset_name = hf_dataset_name

  _next_button()

  if ds_builder:
    st.session_state.ds_loaded = True
    st.session_state.ds_error = None
    st.session_state.ds_dataset_name = hf_dataset_name
    st.session_state.ds_description = ds_builder.info.description
    st.session_state.ds_features = ds_builder.info.features
    st.session_state.ds_splits = ds_builder.info.splits
  else:
    st.session_state.ds_loaded = False


def _space_page():
  session = dict(st.session_state)

  def _back():
    st.session_state.hf_space_name = hf_space_name
    st.session_state.hf_storage = hf_storage
    st.session_state.hf_access_token = hf_access_token
    st.session_state.current_page = 'dataset'

  hf_space_name = st.session_state.get('hf_space_name', None)
  hf_storage = st.session_state.get('hf_storage', None)
  hf_access_token = st.session_state.get('hf_access_token', None)

  def _back_button():
    return st.button('⬅ Back', on_click=_back)

  _back_button()
  st.subheader(
    'Step 2: create huggingface space',
    divider='violet',
    anchor=False,
    help='See HuggingFace Spaces [documentation](https://huggingface.co/docs/hub/spaces-overview)',
  )
  if session.get('hf_config_name', None):
    st.write(f'Config: {session["hf_config_name"]}')
  if st.session_state.get('hf_split', None):
    st.write(f'Split: {session["hf_split"]}')
  if st.session_state.get('sample_size', None):
    st.write(f'Sample size: {session["sample_size"]}')

  hf_space_name = st.text_input(
    'space id',
    help='This space will be created if it does not exist',
    placeholder='org/name',
    value=hf_space_name,
  )
  hf_access_token = st.text_input(
    'huggingface [write token](https://huggingface.co/settings/tokens)',
    type='password',
    help='The access token is used to authenticate you with HuggingFace to create the space. '
    'https://huggingface.co/docs/hub/security-tokens',
    value=hf_access_token,
  )
  storage_options = ['None', 'small', 'medium', 'large']
  hf_storage = st.selectbox(
    'persistent storage',
    ['None', 'small', 'medium', 'large'],
    help='Persistent storage is required if you want data to persist past the lifetime of the '
    'space docker image. This is recommended when running computations like signals or embeddings,'
    'or if you want labels to persist. You will get charged for persistent storage. See '
    'https://huggingface.co/docs/hub/spaces-storage',
    index=storage_options.index(hf_storage if hf_storage else 'None'),
  )

  def _deploy_button():
    enabled = hf_access_token and hf_space_name
    return st.button('Deploy', disabled=not enabled, on_click=_deploy)

  def _deploy():
    hf_dataset_name = st.session_state['hf_dataset_name']
    assert hf_space_name and hf_access_token and hf_dataset_name

    hf_config_name = st.session_state.get('hf_config_name', None)
    hf_split = st.session_state.get('hf_split', None)
    sample_size = st.session_state.get('sample_size', None)

    hf_space_storage: Optional[Union[Literal['small'], Literal['medium'], Literal['large']]]
    if hf_storage == 'None':
      hf_space_storage = None
    else:
      assert hf_storage == 'small' or hf_storage == 'medium' or hf_storage == 'large'
      hf_space_storage = hf_storage

    try:
      space_link = ll.deploy_config(
        hf_space=hf_space_name,
        create_space=True,
        hf_space_storage=hf_space_storage,
        config=ll.Config(
          datasets=[
            ll.DatasetConfig(
              namespace='local',
              name=hf_dataset_name.replace('/', '_'),
              source=ll.HuggingFaceSource(
                dataset_name=hf_dataset_name,
                config_name=hf_config_name,
                split=hf_split,
                sample_size=int(sample_size) if sample_size else None,
                token=hf_access_token,
              ),
            )
          ]
        ),
        hf_token=hf_access_token,
      )
      st.session_state.space_link = space_link
      st.session_state.current_page = 'success'
    except Exception as e:
      st.subheader('Deployment failed!', divider='red')
      st.error(e)

  _deploy_button()


def _success_page():
  space_link = st.session_state.space_link

  st.subheader('Success!', divider='green')
  st.subheader(f'[Visit your HuggingFace space ↗]({space_link})')
  st.write(
    'Spaces are private by default. '
    f'To make them public, visit the [Space settings]({space_link}/settings). '
  )


if st.session_state.current_page == 'dataset':
  _dataset_page()
elif st.session_state.current_page == 'space':
  _space_page()
elif st.session_state.current_page == 'success':
  _success_page()

# Sidebar content.
dataset_name = st.session_state.get('ds_dataset_name', None) or st.session_state.get(
  'hf_dataset_name', None
)
if st.session_state.get('ds_loaded', False):
  st.sidebar.header(
    f'[{dataset_name}](https://huggingface.co/datasets/{dataset_name})',
    divider='rainbow',
    anchor=False,
    help='Dataset information from HuggingFace datasets.',
  )

  st.sidebar.write(st.session_state.get('ds_description', None))

  st.sidebar.write('##### Features')
  st.sidebar.table(st.session_state.get('ds_features', {}))

  st.sidebar.write('##### Splits')
  st.sidebar.table(st.session_state.get('ds_splits', {}))
else:
  if st.session_state.get('ds_error', None):
    st.sidebar.subheader(f'Error loading `{dataset_name}`', divider='red', anchor=False)
    st.sidebar.error(st.session_state.get('ds_error', None))
    st.sidebar.write(
      'If the dataset is private, make sure to enter a HuggingFace '
      'token that has access to the dataset.'
    )
  else:
    st.sidebar.write('Choose a dataset to see more info..')