lvwerra HF staff commited on
Commit
fcb283e
β€’
1 Parent(s): fc6a73f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -4
app.py CHANGED
@@ -1,20 +1,27 @@
1
  from datasets import load_dataset
2
  import streamlit as st
 
 
 
3
 
4
 
5
  @st.cache(allow_output_mutation=True)
6
  def load_all_usernames():
7
- list_of_usernames = load_dataset("bigcode/the-stack-usernames", split="train")["usernames"]
8
- return set(list_of_usernames)
 
 
 
9
 
10
  st.title("Am I in The Stack?")
11
  st.markdown("This tool lets you check if a repository under a given username is part of [The Stack dataset](https://huggingface.co/datasets/bigcode/the-stack).")
12
  usernames = load_all_usernames()
13
 
14
- username = st.text_input("GitHub Username:")
15
 
16
  if st.button("Check!"):
17
  if username in usernames:
18
- st.markdown("**Yes**, your data is in The Stack.")
 
19
  else:
20
  st.markdown("**No**, your data is not in The Stack.")
 
1
  from datasets import load_dataset
2
  import streamlit as st
3
+ from huggingface_hub import hf_hub_download
4
+ import gzip
5
+ import json
6
 
7
 
8
  @st.cache(allow_output_mutation=True)
9
  def load_all_usernames():
10
+ filepath = hf_hub_download(repo_id="bigcode/the-stack-username-to-repo", filename="username_to_repo.json.gz", repo_type="dataset")
11
+
12
+ with gzip.open(filepath, 'r') as f:
13
+ usernames = json.loads(f.read().decode('utf-8'))
14
+ return usernames
15
 
16
  st.title("Am I in The Stack?")
17
  st.markdown("This tool lets you check if a repository under a given username is part of [The Stack dataset](https://huggingface.co/datasets/bigcode/the-stack).")
18
  usernames = load_all_usernames()
19
 
20
+ username = st.text_input("Your GitHub Username:")
21
 
22
  if st.button("Check!"):
23
  if username in usernames:
24
+ st.markdown("**Yes**, your data is in The Stack:")
25
+ st.markdown("\n".join([f"`{repo_name}`" for repo_name in usernames[username]]))
26
  else:
27
  st.markdown("**No**, your data is not in The Stack.")