Amy Roberts commited on
Commit
b42fea9
1 Parent(s): 6b0b6fd
Files changed (7) hide show
  1. app.py +33 -7
  2. defaults.py +1 -0
  3. fetch.py +8 -12
  4. find_similar_issues.py +8 -11
  5. get_topic.py +0 -57
  6. retrieval.py +0 -1
  7. update_stored_issues.py +8 -26
app.py CHANGED
@@ -31,20 +31,39 @@ def get_query_issue_information(issue_no, token):
31
  return request.json()
32
 
33
 
34
- def run_find_similar_issues(token, n_issues, issue_no, query):
35
  if issue_no == "":
36
  issue_no = None
37
  if query == "":
38
  query = None
39
 
40
- similar_issues = get_similar_issues(issue_no=issue_no, query=query, token=token, top_k=n_issues)
 
 
 
 
 
 
 
 
41
 
42
  issues_html = [f"<a href='{issue['html_url']}' target='_blank'>#{issue['number']} - {issue['title']}</a>" for issue in similar_issues]
43
  issues_html = "<br>".join(issues_html)
44
- return issues_html
45
 
 
 
 
 
 
 
 
 
 
 
 
46
 
47
- def update_issues():
 
48
  # Archive the stored issues
49
  if os.path.exists("issues.json"):
50
  date_time = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
@@ -63,6 +82,12 @@ def update_issues():
63
  model_id="all-mpnet-base-v2",
64
  update=True
65
  )
 
 
 
 
 
 
66
 
67
 
68
  with gr.Blocks(title="Github Bot") as demo:
@@ -76,8 +101,9 @@ with gr.Blocks(title="Github Bot") as demo:
76
  with gr.Column():
77
  token = gr.Textbox(label="Github Token", placeholder="Your github token for authentication. This is not stored anywhere.")
78
  n_issues = gr.Slider(1, 50, value=5, step=1, label="Number of similar issues", info="Choose between 1 and 50")
79
- update_button = gr.Button(value="Update issues")
80
- update_button.click(update_issues)
 
81
 
82
  with gr.Row():
83
  submit_button = gr.Button(value="Submit")
@@ -85,7 +111,7 @@ with gr.Blocks(title="Github Bot") as demo:
85
  with gr.Row():
86
  with gr.Row():
87
  issues_html = gr.HTML(label="Issue text", elem_id="issue_html")
88
- submit_button.click(run_find_similar_issues, outputs=[issues_html], inputs=[token, n_issues, issue_no, query])
89
 
90
  with gr.Tab("Find maintainers to ping"):
91
  with gr.Row():
 
31
  return request.json()
32
 
33
 
34
+ def run_find_similar_issues(token, n_issues, issue_no, query, issue_types):
35
  if issue_no == "":
36
  issue_no = None
37
  if query == "":
38
  query = None
39
 
40
+ if len(issue_types) == 0:
41
+ raise ValueError("At least one issue type must be selected")
42
+
43
+ similar_issues = []
44
+ similar_pulls = []
45
+ if "Issue" in issue_types:
46
+ similar_issues = get_similar_issues(issue_no=issue_no, query=query, token=token, top_k=n_issues, issue_type="issue")
47
+ if "Pull Request" in issue_types:
48
+ similar_pulls = get_similar_issues(issue_no=issue_no, query=query, token=token, top_k=n_issues, issue_type="pull")
49
 
50
  issues_html = [f"<a href='{issue['html_url']}' target='_blank'>#{issue['number']} - {issue['title']}</a>" for issue in similar_issues]
51
  issues_html = "<br>".join(issues_html)
 
52
 
53
+ pulls_html = [f"<a href='{issue['html_url']}' target='_blank'>#{issue['number']} - {issue['title']}</a>" for issue in similar_pulls]
54
+ pulls_html = "<br>".join(pulls_html)
55
+
56
+ final = ""
57
+ if len(issues_html) > 0:
58
+ final += f"<h2>Issues</h2>{issues_html}"
59
+ if len(pulls_html) > 0:
60
+ final += f"<h2>Pull Requests</h2>{pulls_html}"
61
+
62
+ # return issues_html
63
+ return final
64
 
65
+
66
+ def update():
67
  # Archive the stored issues
68
  if os.path.exists("issues.json"):
69
  date_time = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
 
82
  model_id="all-mpnet-base-v2",
83
  update=True
84
  )
85
+ build_embeddings.embed_issues(
86
+ input_filename="issues_dict.json",
87
+ issue_type="pull",
88
+ model_id="all-mpnet-base-v2",
89
+ update=True
90
+ )
91
 
92
 
93
  with gr.Blocks(title="Github Bot") as demo:
 
101
  with gr.Column():
102
  token = gr.Textbox(label="Github Token", placeholder="Your github token for authentication. This is not stored anywhere.")
103
  n_issues = gr.Slider(1, 50, value=5, step=1, label="Number of similar issues", info="Choose between 1 and 50")
104
+ issue_types = gr.CheckboxGroup(["Issue", "Pull Request"], label="Issue types")
105
+ update_button = gr.Button(value="Update issues", trigger_mode="once")
106
+ update_button.click(update, outputs=[], inputs=[])
107
 
108
  with gr.Row():
109
  submit_button = gr.Button(value="Submit")
 
111
  with gr.Row():
112
  with gr.Row():
113
  issues_html = gr.HTML(label="Issue text", elem_id="issue_html")
114
+ submit_button.click(run_find_similar_issues, outputs=[issues_html], inputs=[token, n_issues, issue_no, query, issue_types])
115
 
116
  with gr.Tab("Find maintainers to ping"):
117
  with gr.Row():
defaults.py CHANGED
@@ -3,3 +3,4 @@ import os
3
  OWNER = "huggingface"
4
  REPO = "transformers"
5
  TOKEN = os.environ.get("GITHUB_TOKEN")
 
 
3
  OWNER = "huggingface"
4
  REPO = "transformers"
5
  TOKEN = os.environ.get("GITHUB_TOKEN")
6
+ GITHUB_API_VERSION = "2022-11-28"
fetch.py CHANGED
@@ -2,16 +2,16 @@
2
  Script to fetch issues from the transformers repo and save them to a json file
3
  """
4
 
5
- import json
6
-
7
  import argparse
 
 
 
 
8
 
9
  import requests
10
- import os
11
  import numpy as np
12
- import json
13
- import datetime
14
- import logging
15
 
16
  logging.basicConfig(level=logging.INFO)
17
 
@@ -19,11 +19,7 @@ logger = logging.getLogger(__name__)
19
 
20
  today = datetime.datetime.now().strftime("%Y_%m_%d_%H_%M_%S")
21
 
22
- OWNER = "huggingface"
23
- REPO = "transformers"
24
- GITHUB_API_VERSION = "2022-11-28"
25
- TOKEN = os.environ.get("GITHUB_TOKEN")
26
- JSON_FILE = f"issues.json"
27
  UPDATE_FILE = False
28
  OVERWRITE_FILE = True
29
 
@@ -65,7 +61,7 @@ def get_issues(
65
  url = f"https://api.github.com/repos/{owner}/{repo}/issues"
66
  headers = {
67
  "Accept": "application/vnd.github+json",
68
- f"Authorization": f"{token}",
69
  "X-GitHub-Api-Version": f"{github_api_version}",
70
  "User-Agent": "amyeroberts",
71
  }
 
2
  Script to fetch issues from the transformers repo and save them to a json file
3
  """
4
 
 
 
5
  import argparse
6
+ import datetime
7
+ import logging
8
+ import json
9
+ import os
10
 
11
  import requests
 
12
  import numpy as np
13
+
14
+ from defaults import OWNER, REPO, GITHUB_API_VERSION, TOKEN
 
15
 
16
  logging.basicConfig(level=logging.INFO)
17
 
 
19
 
20
  today = datetime.datetime.now().strftime("%Y_%m_%d_%H_%M_%S")
21
 
22
+ JSON_FILE = "issues.json"
 
 
 
 
23
  UPDATE_FILE = False
24
  OVERWRITE_FILE = True
25
 
 
61
  url = f"https://api.github.com/repos/{owner}/{repo}/issues"
62
  headers = {
63
  "Accept": "application/vnd.github+json",
64
+ "Authorization": f"{token}",
65
  "X-GitHub-Api-Version": f"{github_api_version}",
66
  "User-Agent": "amyeroberts",
67
  }
find_similar_issues.py CHANGED
@@ -1,4 +1,3 @@
1
- import pprint
2
  import json
3
  import argparse
4
  import requests
@@ -17,11 +16,12 @@ def load_embeddings():
17
  embeddings = np.load("issue_embeddings.npy")
18
  return embeddings
19
 
20
- def load_issue_information():
 
21
  """
22
  Function to load issue information from file
23
  """
24
- with open("embedding_index_to_issue.json", "r") as f:
25
  embedding_index_to_issue = json.load(f)
26
 
27
  with open("issues_dict.json", "r") as f:
@@ -45,23 +45,20 @@ def get_issue(issue_no, token=TOKEN, owner=OWNER, repo=REPO):
45
  """
46
  Function to get issue from GitHub
47
  """
48
- url = f"https://api.github.com/repos/{owner}/{repo}/issues"
49
  headers = {
50
  "Accept": "application/vnd.github+json",
51
- f"Authorization": "{token}",
52
  "X-GitHub-Api-Version": "2022-11-28",
53
  "User-Agent": "amyeroberts",
54
  }
55
- request = requests.get(
56
- f"https://api.github.com/repos/{OWNER}/{REPO}/issues/{issue_no}",
57
- headers=headers,
58
- )
59
  if request.status_code != 200:
60
  raise ValueError(f"Request failed with status code {request.status_code}")
61
  return request.json()
62
 
63
 
64
- def get_similar_issues(issue_no, query, top_k=5, token=TOKEN, owner=OWNER, repo=REPO):
65
  """
66
  Function to find similar issues
67
  """
@@ -86,7 +83,7 @@ def get_similar_issues(issue_no, query, top_k=5, token=TOKEN, owner=OWNER, repo=
86
  most_similar_indices = np.argsort(cosine_similarities)
87
  most_similar_indices = most_similar_indices[0][::-1]
88
 
89
- embedding_index_to_issue, issues = load_issue_information()
90
 
91
  similar_issues = []
92
  for i in most_similar_indices[:top_k]:
 
 
1
  import json
2
  import argparse
3
  import requests
 
16
  embeddings = np.load("issue_embeddings.npy")
17
  return embeddings
18
 
19
+
20
+ def load_issue_information(issue_type="issue"):
21
  """
22
  Function to load issue information from file
23
  """
24
+ with open(f"embedding_index_to_{issue_type}.json", "r") as f:
25
  embedding_index_to_issue = json.load(f)
26
 
27
  with open("issues_dict.json", "r") as f:
 
45
  """
46
  Function to get issue from GitHub
47
  """
48
+ url = f"https://api.github.com/repos/{owner}/{repo}/issues/{issue_no}"
49
  headers = {
50
  "Accept": "application/vnd.github+json",
51
+ "Authorization": f"{token}",
52
  "X-GitHub-Api-Version": "2022-11-28",
53
  "User-Agent": "amyeroberts",
54
  }
55
+ request = requests.get(url, headers=headers)
 
 
 
56
  if request.status_code != 200:
57
  raise ValueError(f"Request failed with status code {request.status_code}")
58
  return request.json()
59
 
60
 
61
+ def get_similar_issues(issue_no, query, top_k=5, token=TOKEN, owner=OWNER, repo=REPO, issue_type="issue"):
62
  """
63
  Function to find similar issues
64
  """
 
83
  most_similar_indices = np.argsort(cosine_similarities)
84
  most_similar_indices = most_similar_indices[0][::-1]
85
 
86
+ embedding_index_to_issue, issues = load_issue_information(issue_type=issue_type)
87
 
88
  similar_issues = []
89
  for i in most_similar_indices[:top_k]:
get_topic.py DELETED
@@ -1,57 +0,0 @@
1
-
2
- import json
3
-
4
- with open("issues_dict.json", "r") as f:
5
- issues = json.load(f)
6
-
7
- topic_maintainers_map ={
8
- "text models": ["@ArthurZucker", "@younesbelkada"],
9
- "vision models": "@amyeroberts",
10
- "speech models": "@sanchit-gandhi",
11
- "graph models": "@clefourrier",
12
- "flax": "@sanchit-gandhi",
13
- "generate": "@gante",
14
- "pipelines": "@Narsil",
15
- "tensorflow": ["@gante", "@Rocketknight1"],
16
- "tokenizers": "@ArthurZucker",
17
- "trainer": ["@muellerzr", "@pacman100"],
18
- "deepspeed": "@pacman100",
19
- "ray/raytune": ["@richardliaw", "@amogkam"],
20
- "Big Model Inference": "@SunMarc",
21
- "quantization (bitsandbytes, autogpt)": ["@SunMarc", "@younesbelkada"],
22
- "Documentation": ["@stevhliu", "@MKhalusova"],
23
- "accelerate": "different repo",
24
- "datasets": "different repo",
25
- "diffusers": "different repo",
26
- "rust tokenizers": "different repo",
27
- "Flax examples": "@sanchit-gandhi",
28
- "PyTorch vision examples": "@amyeroberts",
29
- "PyTorch text examples": "@ArthurZucker",
30
- "PyTorch speech examples": "@sanchit-gandhi",
31
- "PyTorch generate examples": "@gante",
32
- "TensorFlow": "@Rocketknight1",
33
- "Research projects and examples": "not maintained",
34
- }
35
-
36
-
37
- issue_no = 2781
38
- issue = issues[str(issue_no)]
39
-
40
-
41
- from transformers import AutoTokenizer, LlamaForCausalLM
42
-
43
- model = LlamaForCausalLM.from_pretrained("meta-llama/Llama-2-7b-hf")
44
- tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-hf")
45
-
46
- # prompt = f"Which of the following topics {list(topic_maintainers_map.keys())} is this issue about:\n{issue['body']}"
47
- prompt = f"QUESTION: What is the provided issue about? Pick up to 3 topics from the following list: {list(topic_maintainers_map.keys())} \nISSUE START:\n{issue['body']} \n ISSUE END. \n ANSWER:"
48
- inputs = tokenizer(prompt, return_tensors="pt")
49
-
50
- prefix_len = inputs.input_ids.shape[1]
51
-
52
- # Generate
53
- generate_ids = model.generate(inputs.input_ids, max_length=30 + prefix_len)
54
- outputs = tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
55
- print(outputs[prefix_len:])
56
-
57
- print("TITLE", issue["number"] + " " + issue["title"])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
retrieval.py CHANGED
@@ -1,6 +1,5 @@
1
  import argparse
2
  import json
3
- import pprint
4
 
5
  import numpy as np
6
  from sentence_transformers import SentenceTransformer
 
1
  import argparse
2
  import json
 
3
 
4
  import numpy as np
5
  from sentence_transformers import SentenceTransformer
update_stored_issues.py CHANGED
@@ -1,38 +1,20 @@
1
  """
2
  Module which updates any of the issues to reflect changes in the issue state
3
  """
4
- import json
5
- import datetime
6
- from defaults import TOKEN, OWNER, REPO
7
-
8
- GITHUB_API_VERSION = "2022-11-28"
9
-
10
-
11
-
12
- # Get the issues that have been updated since the last update
13
-
14
- import json
15
-
16
  import argparse
17
-
18
- import requests
19
- import os
20
- import numpy as np
21
  import json
22
- import datetime
23
  import logging
 
24
 
25
- logging.basicConfig(level=logging.INFO)
 
26
 
27
- logger = logging.getLogger(__name__)
28
 
29
- today = datetime.datetime.now().strftime("%Y_%m_%d_%H_%M_%S")
 
30
 
31
- OWNER = "huggingface"
32
- REPO = "transformers"
33
- GITHUB_API_VERSION = "2022-11-28"
34
- TOKEN = os.environ.get("GITHUB_TOKEN")
35
- JSON_FILE = f"issues.json"
36
 
37
 
38
  def update_issues(
@@ -62,7 +44,7 @@ def update_issues(
62
  url = f"https://api.github.com/repos/{owner}/{repo}/issues"
63
  headers = {
64
  "Accept": "application/vnd.github+json",
65
- f"Authorization": f"{token}",
66
  "X-GitHub-Api-Version": f"{github_api_version}",
67
  "User-Agent": "amyeroberts",
68
  }
 
1
  """
2
  Module which updates any of the issues to reflect changes in the issue state
3
  """
 
 
 
 
 
 
 
 
 
 
 
 
4
  import argparse
 
 
 
 
5
  import json
 
6
  import logging
7
+ import os
8
 
9
+ import numpy as np
10
+ import requests
11
 
12
+ from defaults import TOKEN, OWNER, REPO, GITHUB_API_VERSION
13
 
14
+ logging.basicConfig(level=logging.INFO)
15
+ logger = logging.getLogger(__name__)
16
 
17
+ JSON_FILE = "issues.json"
 
 
 
 
18
 
19
 
20
  def update_issues(
 
44
  url = f"https://api.github.com/repos/{owner}/{repo}/issues"
45
  headers = {
46
  "Accept": "application/vnd.github+json",
47
+ "Authorization": f"{token}",
48
  "X-GitHub-Api-Version": f"{github_api_version}",
49
  "User-Agent": "amyeroberts",
50
  }