File size: 3,100 Bytes
839621c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 |
import time
from regex import F
from defaults import (
DEFAULT_DOMAIN,
)
from hub import (
setup_dataset_on_hub,
duplicate_space_on_hub,
add_project_config_to_space_repo,
)
import streamlit as st
st.set_page_config("Domain Data Grower", page_icon="π§βπΎ")
st.header("π§βπΎ Domain Data Grower")
st.divider()
################################################################################
# APP MARKDOWN
################################################################################
st.header("π± Create a domain specific dataset")
st.markdown(
"""This space will set up your domain specific dataset project. It will
create the resources that you need to build a dataset. Those resources include:
- A dataset repository on the Hub
- Another space to define expert domain and run generation pipelines
For a complete overview of the project. Check out the README
"""
)
st.page_link(
"pages/π§βπΎ Domain Data Grower.py",
label="Domain Data Grower",
icon="π§βπΎ",
)
################################################################################
# CONFIGURATION
################################################################################
st.subheader("πΎ Project Configuration")
project_name = st.text_input("Project Name", DEFAULT_DOMAIN)
hub_username = st.text_input("Hub Username", "argilla")
hub_token = st.text_input("Hub Token", type="password")
private_selector = st.checkbox("Private Space", value=False)
if st.button("π€ Setup Project Resources"):
repo_id = f"{hub_username}/{project_name}"
setup_dataset_on_hub(
repo_id=repo_id,
hub_token=hub_token,
)
st.success(
f"Dataset seed created and pushed to the Hub. Check it out [here](https://huggingface.co/datasets/{hub_username}/{project_name}). Hold on the repo_id: {repo_id}, we will need it in the next steps."
)
space_name = f"{project_name}_config_space"
duplicate_space_on_hub(
source_repo="argilla/domain-specific-datasets-template",
target_repo=space_name,
hub_token=hub_token,
private=private_selector,
)
st.success(
f"Configuration Space created. Check it out [here](https://huggingface.co/spaces/{hub_username}/{space_name})."
)
argilla_name = f"{project_name}_argilla_space"
duplicate_space_on_hub(
source_repo="argilla/argilla-template-space",
target_repo=argilla_name,
hub_token=hub_token,
private=private_selector,
)
st.success(
f"Argilla Space created. Check it out [here](https://huggingface.co/spaces/{hub_username}/{argilla_name})."
)
seconds = 5
with st.spinner(f"Adding project configuration to spaces in {seconds} seconds"):
time.sleep(seconds)
add_project_config_to_space_repo(
dataset_repo_id=repo_id,
hub_token=hub_token,
project_name=project_name,
argilla_space_repo_id=f"{hub_username}/{argilla_name}",
project_space_repo_id=f"{hub_username}/{space_name}",
)
|