Spaces:
Sleeping
Sleeping
Amy Roberts
commited on
Commit
•
b42fea9
1
Parent(s):
6b0b6fd
Ruff
Browse files- app.py +33 -7
- defaults.py +1 -0
- fetch.py +8 -12
- find_similar_issues.py +8 -11
- get_topic.py +0 -57
- retrieval.py +0 -1
- update_stored_issues.py +8 -26
app.py
CHANGED
@@ -31,20 +31,39 @@ def get_query_issue_information(issue_no, token):
|
|
31 |
return request.json()
|
32 |
|
33 |
|
34 |
-
def run_find_similar_issues(token, n_issues, issue_no, query):
|
35 |
if issue_no == "":
|
36 |
issue_no = None
|
37 |
if query == "":
|
38 |
query = None
|
39 |
|
40 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
41 |
|
42 |
issues_html = [f"<a href='{issue['html_url']}' target='_blank'>#{issue['number']} - {issue['title']}</a>" for issue in similar_issues]
|
43 |
issues_html = "<br>".join(issues_html)
|
44 |
-
return issues_html
|
45 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
46 |
|
47 |
-
|
|
|
48 |
# Archive the stored issues
|
49 |
if os.path.exists("issues.json"):
|
50 |
date_time = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
|
@@ -63,6 +82,12 @@ def update_issues():
|
|
63 |
model_id="all-mpnet-base-v2",
|
64 |
update=True
|
65 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
66 |
|
67 |
|
68 |
with gr.Blocks(title="Github Bot") as demo:
|
@@ -76,8 +101,9 @@ with gr.Blocks(title="Github Bot") as demo:
|
|
76 |
with gr.Column():
|
77 |
token = gr.Textbox(label="Github Token", placeholder="Your github token for authentication. This is not stored anywhere.")
|
78 |
n_issues = gr.Slider(1, 50, value=5, step=1, label="Number of similar issues", info="Choose between 1 and 50")
|
79 |
-
|
80 |
-
update_button.
|
|
|
81 |
|
82 |
with gr.Row():
|
83 |
submit_button = gr.Button(value="Submit")
|
@@ -85,7 +111,7 @@ with gr.Blocks(title="Github Bot") as demo:
|
|
85 |
with gr.Row():
|
86 |
with gr.Row():
|
87 |
issues_html = gr.HTML(label="Issue text", elem_id="issue_html")
|
88 |
-
submit_button.click(run_find_similar_issues, outputs=[issues_html], inputs=[token, n_issues, issue_no, query])
|
89 |
|
90 |
with gr.Tab("Find maintainers to ping"):
|
91 |
with gr.Row():
|
|
|
31 |
return request.json()
|
32 |
|
33 |
|
34 |
+
def run_find_similar_issues(token, n_issues, issue_no, query, issue_types):
|
35 |
if issue_no == "":
|
36 |
issue_no = None
|
37 |
if query == "":
|
38 |
query = None
|
39 |
|
40 |
+
if len(issue_types) == 0:
|
41 |
+
raise ValueError("At least one issue type must be selected")
|
42 |
+
|
43 |
+
similar_issues = []
|
44 |
+
similar_pulls = []
|
45 |
+
if "Issue" in issue_types:
|
46 |
+
similar_issues = get_similar_issues(issue_no=issue_no, query=query, token=token, top_k=n_issues, issue_type="issue")
|
47 |
+
if "Pull Request" in issue_types:
|
48 |
+
similar_pulls = get_similar_issues(issue_no=issue_no, query=query, token=token, top_k=n_issues, issue_type="pull")
|
49 |
|
50 |
issues_html = [f"<a href='{issue['html_url']}' target='_blank'>#{issue['number']} - {issue['title']}</a>" for issue in similar_issues]
|
51 |
issues_html = "<br>".join(issues_html)
|
|
|
52 |
|
53 |
+
pulls_html = [f"<a href='{issue['html_url']}' target='_blank'>#{issue['number']} - {issue['title']}</a>" for issue in similar_pulls]
|
54 |
+
pulls_html = "<br>".join(pulls_html)
|
55 |
+
|
56 |
+
final = ""
|
57 |
+
if len(issues_html) > 0:
|
58 |
+
final += f"<h2>Issues</h2>{issues_html}"
|
59 |
+
if len(pulls_html) > 0:
|
60 |
+
final += f"<h2>Pull Requests</h2>{pulls_html}"
|
61 |
+
|
62 |
+
# return issues_html
|
63 |
+
return final
|
64 |
|
65 |
+
|
66 |
+
def update():
|
67 |
# Archive the stored issues
|
68 |
if os.path.exists("issues.json"):
|
69 |
date_time = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
|
|
|
82 |
model_id="all-mpnet-base-v2",
|
83 |
update=True
|
84 |
)
|
85 |
+
build_embeddings.embed_issues(
|
86 |
+
input_filename="issues_dict.json",
|
87 |
+
issue_type="pull",
|
88 |
+
model_id="all-mpnet-base-v2",
|
89 |
+
update=True
|
90 |
+
)
|
91 |
|
92 |
|
93 |
with gr.Blocks(title="Github Bot") as demo:
|
|
|
101 |
with gr.Column():
|
102 |
token = gr.Textbox(label="Github Token", placeholder="Your github token for authentication. This is not stored anywhere.")
|
103 |
n_issues = gr.Slider(1, 50, value=5, step=1, label="Number of similar issues", info="Choose between 1 and 50")
|
104 |
+
issue_types = gr.CheckboxGroup(["Issue", "Pull Request"], label="Issue types")
|
105 |
+
update_button = gr.Button(value="Update issues", trigger_mode="once")
|
106 |
+
update_button.click(update, outputs=[], inputs=[])
|
107 |
|
108 |
with gr.Row():
|
109 |
submit_button = gr.Button(value="Submit")
|
|
|
111 |
with gr.Row():
|
112 |
with gr.Row():
|
113 |
issues_html = gr.HTML(label="Issue text", elem_id="issue_html")
|
114 |
+
submit_button.click(run_find_similar_issues, outputs=[issues_html], inputs=[token, n_issues, issue_no, query, issue_types])
|
115 |
|
116 |
with gr.Tab("Find maintainers to ping"):
|
117 |
with gr.Row():
|
defaults.py
CHANGED
@@ -3,3 +3,4 @@ import os
|
|
3 |
OWNER = "huggingface"
|
4 |
REPO = "transformers"
|
5 |
TOKEN = os.environ.get("GITHUB_TOKEN")
|
|
|
|
3 |
OWNER = "huggingface"
|
4 |
REPO = "transformers"
|
5 |
TOKEN = os.environ.get("GITHUB_TOKEN")
|
6 |
+
GITHUB_API_VERSION = "2022-11-28"
|
fetch.py
CHANGED
@@ -2,16 +2,16 @@
|
|
2 |
Script to fetch issues from the transformers repo and save them to a json file
|
3 |
"""
|
4 |
|
5 |
-
import json
|
6 |
-
|
7 |
import argparse
|
|
|
|
|
|
|
|
|
8 |
|
9 |
import requests
|
10 |
-
import os
|
11 |
import numpy as np
|
12 |
-
|
13 |
-
import
|
14 |
-
import logging
|
15 |
|
16 |
logging.basicConfig(level=logging.INFO)
|
17 |
|
@@ -19,11 +19,7 @@ logger = logging.getLogger(__name__)
|
|
19 |
|
20 |
today = datetime.datetime.now().strftime("%Y_%m_%d_%H_%M_%S")
|
21 |
|
22 |
-
|
23 |
-
REPO = "transformers"
|
24 |
-
GITHUB_API_VERSION = "2022-11-28"
|
25 |
-
TOKEN = os.environ.get("GITHUB_TOKEN")
|
26 |
-
JSON_FILE = f"issues.json"
|
27 |
UPDATE_FILE = False
|
28 |
OVERWRITE_FILE = True
|
29 |
|
@@ -65,7 +61,7 @@ def get_issues(
|
|
65 |
url = f"https://api.github.com/repos/{owner}/{repo}/issues"
|
66 |
headers = {
|
67 |
"Accept": "application/vnd.github+json",
|
68 |
-
|
69 |
"X-GitHub-Api-Version": f"{github_api_version}",
|
70 |
"User-Agent": "amyeroberts",
|
71 |
}
|
|
|
2 |
Script to fetch issues from the transformers repo and save them to a json file
|
3 |
"""
|
4 |
|
|
|
|
|
5 |
import argparse
|
6 |
+
import datetime
|
7 |
+
import logging
|
8 |
+
import json
|
9 |
+
import os
|
10 |
|
11 |
import requests
|
|
|
12 |
import numpy as np
|
13 |
+
|
14 |
+
from defaults import OWNER, REPO, GITHUB_API_VERSION, TOKEN
|
|
|
15 |
|
16 |
logging.basicConfig(level=logging.INFO)
|
17 |
|
|
|
19 |
|
20 |
today = datetime.datetime.now().strftime("%Y_%m_%d_%H_%M_%S")
|
21 |
|
22 |
+
JSON_FILE = "issues.json"
|
|
|
|
|
|
|
|
|
23 |
UPDATE_FILE = False
|
24 |
OVERWRITE_FILE = True
|
25 |
|
|
|
61 |
url = f"https://api.github.com/repos/{owner}/{repo}/issues"
|
62 |
headers = {
|
63 |
"Accept": "application/vnd.github+json",
|
64 |
+
"Authorization": f"{token}",
|
65 |
"X-GitHub-Api-Version": f"{github_api_version}",
|
66 |
"User-Agent": "amyeroberts",
|
67 |
}
|
find_similar_issues.py
CHANGED
@@ -1,4 +1,3 @@
|
|
1 |
-
import pprint
|
2 |
import json
|
3 |
import argparse
|
4 |
import requests
|
@@ -17,11 +16,12 @@ def load_embeddings():
|
|
17 |
embeddings = np.load("issue_embeddings.npy")
|
18 |
return embeddings
|
19 |
|
20 |
-
|
|
|
21 |
"""
|
22 |
Function to load issue information from file
|
23 |
"""
|
24 |
-
with open("
|
25 |
embedding_index_to_issue = json.load(f)
|
26 |
|
27 |
with open("issues_dict.json", "r") as f:
|
@@ -45,23 +45,20 @@ def get_issue(issue_no, token=TOKEN, owner=OWNER, repo=REPO):
|
|
45 |
"""
|
46 |
Function to get issue from GitHub
|
47 |
"""
|
48 |
-
url = f"https://api.github.com/repos/{owner}/{repo}/issues"
|
49 |
headers = {
|
50 |
"Accept": "application/vnd.github+json",
|
51 |
-
|
52 |
"X-GitHub-Api-Version": "2022-11-28",
|
53 |
"User-Agent": "amyeroberts",
|
54 |
}
|
55 |
-
request = requests.get(
|
56 |
-
f"https://api.github.com/repos/{OWNER}/{REPO}/issues/{issue_no}",
|
57 |
-
headers=headers,
|
58 |
-
)
|
59 |
if request.status_code != 200:
|
60 |
raise ValueError(f"Request failed with status code {request.status_code}")
|
61 |
return request.json()
|
62 |
|
63 |
|
64 |
-
def get_similar_issues(issue_no, query, top_k=5, token=TOKEN, owner=OWNER, repo=REPO):
|
65 |
"""
|
66 |
Function to find similar issues
|
67 |
"""
|
@@ -86,7 +83,7 @@ def get_similar_issues(issue_no, query, top_k=5, token=TOKEN, owner=OWNER, repo=
|
|
86 |
most_similar_indices = np.argsort(cosine_similarities)
|
87 |
most_similar_indices = most_similar_indices[0][::-1]
|
88 |
|
89 |
-
embedding_index_to_issue, issues = load_issue_information()
|
90 |
|
91 |
similar_issues = []
|
92 |
for i in most_similar_indices[:top_k]:
|
|
|
|
|
1 |
import json
|
2 |
import argparse
|
3 |
import requests
|
|
|
16 |
embeddings = np.load("issue_embeddings.npy")
|
17 |
return embeddings
|
18 |
|
19 |
+
|
20 |
+
def load_issue_information(issue_type="issue"):
|
21 |
"""
|
22 |
Function to load issue information from file
|
23 |
"""
|
24 |
+
with open(f"embedding_index_to_{issue_type}.json", "r") as f:
|
25 |
embedding_index_to_issue = json.load(f)
|
26 |
|
27 |
with open("issues_dict.json", "r") as f:
|
|
|
45 |
"""
|
46 |
Function to get issue from GitHub
|
47 |
"""
|
48 |
+
url = f"https://api.github.com/repos/{owner}/{repo}/issues/{issue_no}"
|
49 |
headers = {
|
50 |
"Accept": "application/vnd.github+json",
|
51 |
+
"Authorization": f"{token}",
|
52 |
"X-GitHub-Api-Version": "2022-11-28",
|
53 |
"User-Agent": "amyeroberts",
|
54 |
}
|
55 |
+
request = requests.get(url, headers=headers)
|
|
|
|
|
|
|
56 |
if request.status_code != 200:
|
57 |
raise ValueError(f"Request failed with status code {request.status_code}")
|
58 |
return request.json()
|
59 |
|
60 |
|
61 |
+
def get_similar_issues(issue_no, query, top_k=5, token=TOKEN, owner=OWNER, repo=REPO, issue_type="issue"):
|
62 |
"""
|
63 |
Function to find similar issues
|
64 |
"""
|
|
|
83 |
most_similar_indices = np.argsort(cosine_similarities)
|
84 |
most_similar_indices = most_similar_indices[0][::-1]
|
85 |
|
86 |
+
embedding_index_to_issue, issues = load_issue_information(issue_type=issue_type)
|
87 |
|
88 |
similar_issues = []
|
89 |
for i in most_similar_indices[:top_k]:
|
get_topic.py
DELETED
@@ -1,57 +0,0 @@
|
|
1 |
-
|
2 |
-
import json
|
3 |
-
|
4 |
-
with open("issues_dict.json", "r") as f:
|
5 |
-
issues = json.load(f)
|
6 |
-
|
7 |
-
topic_maintainers_map ={
|
8 |
-
"text models": ["@ArthurZucker", "@younesbelkada"],
|
9 |
-
"vision models": "@amyeroberts",
|
10 |
-
"speech models": "@sanchit-gandhi",
|
11 |
-
"graph models": "@clefourrier",
|
12 |
-
"flax": "@sanchit-gandhi",
|
13 |
-
"generate": "@gante",
|
14 |
-
"pipelines": "@Narsil",
|
15 |
-
"tensorflow": ["@gante", "@Rocketknight1"],
|
16 |
-
"tokenizers": "@ArthurZucker",
|
17 |
-
"trainer": ["@muellerzr", "@pacman100"],
|
18 |
-
"deepspeed": "@pacman100",
|
19 |
-
"ray/raytune": ["@richardliaw", "@amogkam"],
|
20 |
-
"Big Model Inference": "@SunMarc",
|
21 |
-
"quantization (bitsandbytes, autogpt)": ["@SunMarc", "@younesbelkada"],
|
22 |
-
"Documentation": ["@stevhliu", "@MKhalusova"],
|
23 |
-
"accelerate": "different repo",
|
24 |
-
"datasets": "different repo",
|
25 |
-
"diffusers": "different repo",
|
26 |
-
"rust tokenizers": "different repo",
|
27 |
-
"Flax examples": "@sanchit-gandhi",
|
28 |
-
"PyTorch vision examples": "@amyeroberts",
|
29 |
-
"PyTorch text examples": "@ArthurZucker",
|
30 |
-
"PyTorch speech examples": "@sanchit-gandhi",
|
31 |
-
"PyTorch generate examples": "@gante",
|
32 |
-
"TensorFlow": "@Rocketknight1",
|
33 |
-
"Research projects and examples": "not maintained",
|
34 |
-
}
|
35 |
-
|
36 |
-
|
37 |
-
issue_no = 2781
|
38 |
-
issue = issues[str(issue_no)]
|
39 |
-
|
40 |
-
|
41 |
-
from transformers import AutoTokenizer, LlamaForCausalLM
|
42 |
-
|
43 |
-
model = LlamaForCausalLM.from_pretrained("meta-llama/Llama-2-7b-hf")
|
44 |
-
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-hf")
|
45 |
-
|
46 |
-
# prompt = f"Which of the following topics {list(topic_maintainers_map.keys())} is this issue about:\n{issue['body']}"
|
47 |
-
prompt = f"QUESTION: What is the provided issue about? Pick up to 3 topics from the following list: {list(topic_maintainers_map.keys())} \nISSUE START:\n{issue['body']} \n ISSUE END. \n ANSWER:"
|
48 |
-
inputs = tokenizer(prompt, return_tensors="pt")
|
49 |
-
|
50 |
-
prefix_len = inputs.input_ids.shape[1]
|
51 |
-
|
52 |
-
# Generate
|
53 |
-
generate_ids = model.generate(inputs.input_ids, max_length=30 + prefix_len)
|
54 |
-
outputs = tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
|
55 |
-
print(outputs[prefix_len:])
|
56 |
-
|
57 |
-
print("TITLE", issue["number"] + " " + issue["title"])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
retrieval.py
CHANGED
@@ -1,6 +1,5 @@
|
|
1 |
import argparse
|
2 |
import json
|
3 |
-
import pprint
|
4 |
|
5 |
import numpy as np
|
6 |
from sentence_transformers import SentenceTransformer
|
|
|
1 |
import argparse
|
2 |
import json
|
|
|
3 |
|
4 |
import numpy as np
|
5 |
from sentence_transformers import SentenceTransformer
|
update_stored_issues.py
CHANGED
@@ -1,38 +1,20 @@
|
|
1 |
"""
|
2 |
Module which updates any of the issues to reflect changes in the issue state
|
3 |
"""
|
4 |
-
import json
|
5 |
-
import datetime
|
6 |
-
from defaults import TOKEN, OWNER, REPO
|
7 |
-
|
8 |
-
GITHUB_API_VERSION = "2022-11-28"
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
# Get the issues that have been updated since the last update
|
13 |
-
|
14 |
-
import json
|
15 |
-
|
16 |
import argparse
|
17 |
-
|
18 |
-
import requests
|
19 |
-
import os
|
20 |
-
import numpy as np
|
21 |
import json
|
22 |
-
import datetime
|
23 |
import logging
|
|
|
24 |
|
25 |
-
|
|
|
26 |
|
27 |
-
|
28 |
|
29 |
-
|
|
|
30 |
|
31 |
-
|
32 |
-
REPO = "transformers"
|
33 |
-
GITHUB_API_VERSION = "2022-11-28"
|
34 |
-
TOKEN = os.environ.get("GITHUB_TOKEN")
|
35 |
-
JSON_FILE = f"issues.json"
|
36 |
|
37 |
|
38 |
def update_issues(
|
@@ -62,7 +44,7 @@ def update_issues(
|
|
62 |
url = f"https://api.github.com/repos/{owner}/{repo}/issues"
|
63 |
headers = {
|
64 |
"Accept": "application/vnd.github+json",
|
65 |
-
|
66 |
"X-GitHub-Api-Version": f"{github_api_version}",
|
67 |
"User-Agent": "amyeroberts",
|
68 |
}
|
|
|
1 |
"""
|
2 |
Module which updates any of the issues to reflect changes in the issue state
|
3 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4 |
import argparse
|
|
|
|
|
|
|
|
|
5 |
import json
|
|
|
6 |
import logging
|
7 |
+
import os
|
8 |
|
9 |
+
import numpy as np
|
10 |
+
import requests
|
11 |
|
12 |
+
from defaults import TOKEN, OWNER, REPO, GITHUB_API_VERSION
|
13 |
|
14 |
+
logging.basicConfig(level=logging.INFO)
|
15 |
+
logger = logging.getLogger(__name__)
|
16 |
|
17 |
+
JSON_FILE = "issues.json"
|
|
|
|
|
|
|
|
|
18 |
|
19 |
|
20 |
def update_issues(
|
|
|
44 |
url = f"https://api.github.com/repos/{owner}/{repo}/issues"
|
45 |
headers = {
|
46 |
"Accept": "application/vnd.github+json",
|
47 |
+
"Authorization": f"{token}",
|
48 |
"X-GitHub-Api-Version": f"{github_api_version}",
|
49 |
"User-Agent": "amyeroberts",
|
50 |
}
|