Spaces:
Sleeping
Sleeping
File size: 996 Bytes
a431d31 fcb283e a431d31 a6c2f56 b0c1665 fcb283e a431d31 abfea56 b0c1665 a431d31 fcb283e a431d31 b0c1665 fcb283e b0c1665 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 |
from datasets import load_dataset
import streamlit as st
from huggingface_hub import hf_hub_download
import gzip
import json
@st.cache(allow_output_mutation=True)
def load_all_usernames():
filepath = hf_hub_download(repo_id="bigcode/the-stack-username-to-repo", filename="username_to_repo.json.gz", repo_type="dataset")
with gzip.open(filepath, 'r') as f:
usernames = json.loads(f.read().decode('utf-8'))
return usernames
st.title("Am I in The Stack?")
st.markdown("This tool lets you check if a repository under a given username is part of [The Stack dataset](https://huggingface.co/datasets/bigcode/the-stack).")
usernames = load_all_usernames()
username = st.text_input("Your GitHub Username:")
if st.button("Check!"):
if username in usernames:
st.markdown("**Yes**, your data is in The Stack:")
st.markdown("\n".join([f"`{repo_name}`" for repo_name in usernames[username]]))
else:
st.markdown("**No**, your data is not in The Stack.") |