burtenshaw's picture
burtenshaw HF staff
Upload 4 files
839621c verified
raw
history blame
3.1 kB
import time
from regex import F
from defaults import (
DEFAULT_DOMAIN,
)
from hub import (
setup_dataset_on_hub,
duplicate_space_on_hub,
add_project_config_to_space_repo,
)
import streamlit as st
st.set_page_config("Domain Data Grower", page_icon="πŸ§‘β€πŸŒΎ")
st.header("πŸ§‘β€πŸŒΎ Domain Data Grower")
st.divider()
################################################################################
# APP MARKDOWN
################################################################################
st.header("🌱 Create a domain specific dataset")
st.markdown(
"""This space will set up your domain specific dataset project. It will
create the resources that you need to build a dataset. Those resources include:
- A dataset repository on the Hub
- Another space to define expert domain and run generation pipelines
For a complete overview of the project. Check out the README
"""
)
st.page_link(
"pages/πŸ§‘β€πŸŒΎ Domain Data Grower.py",
label="Domain Data Grower",
icon="πŸ§‘β€πŸŒΎ",
)
################################################################################
# CONFIGURATION
################################################################################
st.subheader("🌾 Project Configuration")
project_name = st.text_input("Project Name", DEFAULT_DOMAIN)
hub_username = st.text_input("Hub Username", "argilla")
hub_token = st.text_input("Hub Token", type="password")
private_selector = st.checkbox("Private Space", value=False)
if st.button("πŸ€— Setup Project Resources"):
repo_id = f"{hub_username}/{project_name}"
setup_dataset_on_hub(
repo_id=repo_id,
hub_token=hub_token,
)
st.success(
f"Dataset seed created and pushed to the Hub. Check it out [here](https://huggingface.co/datasets/{hub_username}/{project_name}). Hold on the repo_id: {repo_id}, we will need it in the next steps."
)
space_name = f"{project_name}_config_space"
duplicate_space_on_hub(
source_repo="argilla/domain-specific-datasets-template",
target_repo=space_name,
hub_token=hub_token,
private=private_selector,
)
st.success(
f"Configuration Space created. Check it out [here](https://huggingface.co/spaces/{hub_username}/{space_name})."
)
argilla_name = f"{project_name}_argilla_space"
duplicate_space_on_hub(
source_repo="argilla/argilla-template-space",
target_repo=argilla_name,
hub_token=hub_token,
private=private_selector,
)
st.success(
f"Argilla Space created. Check it out [here](https://huggingface.co/spaces/{hub_username}/{argilla_name})."
)
seconds = 5
with st.spinner(f"Adding project configuration to spaces in {seconds} seconds"):
time.sleep(seconds)
add_project_config_to_space_repo(
dataset_repo_id=repo_id,
hub_token=hub_token,
project_name=project_name,
argilla_space_repo_id=f"{hub_username}/{argilla_name}",
project_space_repo_id=f"{hub_username}/{space_name}",
)