from datasets import load_dataset import streamlit as st @st.cache() def load_all_usernames(): list_of_usernames = load_dataset("bigcode/the-stack-usernames", split="train")["usernames"] return set(list_of_repo_names) st.title("Am I in The Stack?") st.markdown("This tool lets you check if a repository under a given username is part of [The Stack dataset](https://huggingface.co/datasets/bigcode/the-stack).") usernames = load_all_usernames() username = st.text_input("GitHub Username:") if st.button("Check!"): if username in usernames: st.markdown("**Yes**, your data is in The Stack.") else: st.markdown("**No**, your data is not in The Stack.")