import gradio as gr from huggingface_hub import hf_hub_download import json import gzip usernames = {} filepath = hf_hub_download(repo_id="bigcode/the-stack-username-to-repo", filename="username_to_repo.json.gz", repo_type="dataset", revision="v1.1") with gzip.open(filepath, 'r') as f: usernames["v1.1"] = json.loads(f.read().decode('utf-8')) filepath = hf_hub_download(repo_id="bigcode/the-stack-username-to-repo", filename="username_to_repo.json.gz", repo_type="dataset") with gzip.open(filepath, 'r') as f: usernames["v1.0"] = json.loads(f.read().decode('utf-8')) text = """\ 🔍🌟AICodeFly⚡️ is a program to search github repos. The goal is to make repos easy to search, review, clone and download. AI to make your coding experience fast and easy. This search will match your term to find up to 100 github repositories that match your search. Use the link to shell the repository as html. """ + """\ """ def check_username(username, version): output_md = "" if username in usernames[version] and len(usernames[version][username])>0: repos = usernames[version][username] repo_word = "repository" if len(repos)==1 else "repositories" output_md += f"**Yes**, there is code from **{len(repos)} {repo_word}** in The Stack:\n\n" for repo in repos: output_md += f"_{repo}_\n\n" else: output_md += "**No**, your code is not in The Stack." return output_md.strip() def check_keyword(username, version): output_md = "" maxhitcount = 1000 maxrepos = 70000000 #6M user entries * up to 18 per user currenthitcount=0 currentrepos=0 repocounter=0 usercounter=0 for repolist in usernames[version]: usercounter += 1 #print(repolist) repos = usernames[version][repolist] repo_word = "repository" if len(repos)==1 else "repositories" #output_md += f"**Yes**, there is code from **{len(repos)} {repo_word}** in The Stack:\n\n" for repo in repos: repocounter += 1 currentrepos += 1 if currentrepos > maxrepos: output_md += f"**Found maximum repos**, Count: **{currentrepos}** in The Stack:\n\n" return output_md.strip() if username in repo: currenthitcount += 1 output_md += f"_{repo} repocounter: {repocounter} usercounter: {usercounter}_\n\n" if currenthitcount > maxhitcount: output_md += f"**Found maximum hits**, Count: **{currenthitcount}** in The Stack:\n\n" return output_md.strip() else: output_md += "**Searched All Repos**, Above found in The Stack." return output_md.strip() with gr.Blocks() as demo: with gr.Row(): _, colum_2, _ = gr.Column(scale=1), gr.Column(scale=6), gr.Column(scale=1) with colum_2: gr.Markdown(text) version = gr.Dropdown(["v1.1", "v1.0"], label="The Stack version:", value="v1.1") username = gr.Text("", label="Keyword to match against repos e.g. BeatSaber") check_button = gr.Button("Check!") repos = gr.Markdown() #check_button.click(check_username, [username, version], repos) check_button.click(check_keyword, [username, version], repos) demo.launch()