|
import time |
|
|
|
from regex import F |
|
from defaults import ( |
|
DEFAULT_DOMAIN, |
|
) |
|
from hub import ( |
|
setup_dataset_on_hub, |
|
duplicate_space_on_hub, |
|
add_project_config_to_space_repo, |
|
) |
|
|
|
import streamlit as st |
|
|
|
st.set_page_config("Domain Data Grower", page_icon="π§βπΎ") |
|
st.header("π§βπΎ Domain Data Grower") |
|
st.divider() |
|
|
|
|
|
|
|
|
|
|
|
st.header("π± Create a domain specific dataset") |
|
|
|
st.markdown( |
|
"""This space will set up your domain specific dataset project. It will |
|
create the resources that you need to build a dataset. Those resources include: |
|
|
|
- A dataset repository on the Hub |
|
- Another space to define expert domain and run generation pipelines |
|
|
|
For a complete overview of the project. Check out the README |
|
""" |
|
) |
|
|
|
st.page_link( |
|
"pages/π§βπΎ Domain Data Grower.py", |
|
label="Domain Data Grower", |
|
icon="π§βπΎ", |
|
) |
|
|
|
|
|
|
|
|
|
|
|
st.subheader("πΎ Project Configuration") |
|
|
|
project_name = st.text_input("Project Name", DEFAULT_DOMAIN) |
|
hub_username = st.text_input("Hub Username", "argilla") |
|
hub_token = st.text_input("Hub Token", type="password") |
|
private_selector = st.checkbox("Private Space", value=False) |
|
|
|
if st.button("π€ Setup Project Resources"): |
|
repo_id = f"{hub_username}/{project_name}" |
|
|
|
setup_dataset_on_hub( |
|
repo_id=repo_id, |
|
hub_token=hub_token, |
|
) |
|
|
|
st.success( |
|
f"Dataset seed created and pushed to the Hub. Check it out [here](https://huggingface.co/datasets/{hub_username}/{project_name}). Hold on the repo_id: {repo_id}, we will need it in the next steps." |
|
) |
|
|
|
space_name = f"{project_name}_config_space" |
|
|
|
duplicate_space_on_hub( |
|
source_repo="argilla/domain-specific-datasets-template", |
|
target_repo=space_name, |
|
hub_token=hub_token, |
|
private=private_selector, |
|
) |
|
|
|
st.success( |
|
f"Configuration Space created. Check it out [here](https://huggingface.co/spaces/{hub_username}/{space_name})." |
|
) |
|
|
|
argilla_name = f"{project_name}_argilla_space" |
|
|
|
duplicate_space_on_hub( |
|
source_repo="argilla/argilla-template-space", |
|
target_repo=argilla_name, |
|
hub_token=hub_token, |
|
private=private_selector, |
|
) |
|
|
|
st.success( |
|
f"Argilla Space created. Check it out [here](https://huggingface.co/spaces/{hub_username}/{argilla_name})." |
|
) |
|
|
|
seconds = 5 |
|
|
|
with st.spinner(f"Adding project configuration to spaces in {seconds} seconds"): |
|
time.sleep(seconds) |
|
add_project_config_to_space_repo( |
|
dataset_repo_id=repo_id, |
|
hub_token=hub_token, |
|
project_name=project_name, |
|
argilla_space_repo_id=f"{hub_username}/{argilla_name}", |
|
project_space_repo_id=f"{hub_username}/{space_name}", |
|
) |
|
|