Amy Roberts commited on
Commit
7d5704e
β€’
1 Parent(s): 12ae336

Move to utils

Browse files
app.py CHANGED
@@ -1,17 +1,16 @@
1
  import datetime
2
- import gradio as gr
3
  import os
4
- from find_similar_issues import get_similar_issues
5
- import requests
6
-
7
- from defaults import OWNER, REPO
8
-
9
- import build_issue_dict
10
- import build_embeddings
11
  import shutil
12
- from fetch import get_issues
13
- from update_stored_issues import update_issues
14
 
 
 
 
 
 
 
 
 
 
15
 
16
 
17
  def get_query_issue_information(issue_no, token):
 
1
  import datetime
 
2
  import os
 
 
 
 
 
 
 
3
  import shutil
 
 
4
 
5
+ import gradio as gr
6
+ import requests
7
+
8
+ from utils import build_issue_dict
9
+ from utils import build_embeddings
10
+ from utils.defaults import OWNER, REPO
11
+ from utils.fetch import get_issues
12
+ from utils.find_similar_issues import get_similar_issues
13
+ from utils.update_stored_issues import update_issues
14
 
15
 
16
  def get_query_issue_information(issue_no, token):
retrieval.py DELETED
@@ -1,80 +0,0 @@
1
- """
2
- Module which contains functionality to retrieve the most similar issues for a given query
3
- """
4
-
5
-
6
- import argparse
7
- import json
8
-
9
- import numpy as np
10
- from sentence_transformers import SentenceTransformer
11
-
12
- def cosine_similarity(a, b):
13
- if a.ndim == 1:
14
- a = a.reshape(1, -1)
15
-
16
- if b.ndim == 1:
17
- b = b.reshape(1, -1)
18
-
19
- return np.dot(a, b.T) / (np.linalg.norm(a, axis=1) * np.linalg.norm(b, axis=1))
20
-
21
-
22
- def retrieve_issue_rankings(
23
- query: str,
24
- model_id: str,
25
- input_embedding_filename: str,
26
- ):
27
- """
28
- Given a query returns the list of issues sorted by similarity to the query
29
- according to their embedding index
30
- """
31
- model = SentenceTransformer(model_id)
32
-
33
- embeddings = np.load(input_embedding_filename)
34
-
35
- query_embedding = model.encode(query)
36
-
37
- # Calculate the cosine similarity between the query and all the issues
38
- cosine_similarities = cosine_similarity(query_embedding, embeddings)
39
-
40
- # Get the index of the most similar issue
41
- most_similar_indices = np.argsort(cosine_similarities)
42
- most_similar_indices = most_similar_indices[0][::-1]
43
- return most_similar_indices
44
-
45
-
46
- def print_issue(issues, issue_id):
47
- # Get the issue id of the most similar issue
48
- issue_info = issues[issue_id]
49
-
50
- print(f"#{issue_id}", issue_info["title"])
51
- print(issue_info["body"])
52
-
53
-
54
- if __name__ == "__main__":
55
- parser = argparse.ArgumentParser()
56
- parser.add_argument("query", type=str)
57
- parser.add_argument("--model_id", type=str, default="all-mpnet-base-v2")
58
- parser.add_argument("--input_embedding_filename", type=str, default="issue_embeddings.npy")
59
- parser.add_argument("--input_index_filename", type=str, default="embedding_index_to_issue.json")
60
-
61
- args = parser.parse_args()
62
-
63
- issue_rankings = retrieve_issue_rankings(
64
- query=args.query,
65
- model_id=args.model_id,
66
- input_embedding_filename=args.input_embedding_filename,
67
- )
68
-
69
- with open("issues_dict.json", "r") as f:
70
- issues = json.load(f)
71
-
72
- with open(args.input_index_filename, "r") as f:
73
- embedding_index_to_issue = json.load(f)
74
-
75
- issue_ids = [embedding_index_to_issue[str(i)] for i in issue_rankings]
76
-
77
- for issue_id in issue_ids[:3]:
78
- print(issue_id)
79
- print_issue(issues, issue_id)
80
- print("\n\n\n")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
utils/__init__.py ADDED
File without changes
build_embeddings.py β†’ utils/build_embeddings.py RENAMED
File without changes
build_issue_dict.py β†’ utils/build_issue_dict.py RENAMED
File without changes
defaults.py β†’ utils/defaults.py RENAMED
File without changes
fetch.py β†’ utils/fetch.py RENAMED
@@ -22,7 +22,7 @@ import os
22
  import requests
23
  import numpy as np
24
 
25
- from defaults import OWNER, REPO, GITHUB_API_VERSION, TOKEN, ISSUE_JSON_FILE
26
 
27
  logging.basicConfig(level=logging.INFO)
28
  logger = logging.getLogger(__name__)
 
22
  import requests
23
  import numpy as np
24
 
25
+ from .defaults import OWNER, REPO, GITHUB_API_VERSION, TOKEN, ISSUE_JSON_FILE
26
 
27
  logging.basicConfig(level=logging.INFO)
28
  logger = logging.getLogger(__name__)
find_similar_issues.py β†’ utils/find_similar_issues.py RENAMED
File without changes
update_embeddings.py β†’ utils/update_embeddings.py RENAMED
File without changes
update_stored_issues.py β†’ utils/update_stored_issues.py RENAMED
@@ -18,7 +18,7 @@ import os
18
  import numpy as np
19
  import requests
20
 
21
- from defaults import TOKEN, OWNER, REPO, GITHUB_API_VERSION, ISSUE_JSON_FILE
22
 
23
  logging.basicConfig(level=logging.INFO)
24
  logger = logging.getLogger(__name__)
 
18
  import numpy as np
19
  import requests
20
 
21
+ from .defaults import TOKEN, OWNER, REPO, GITHUB_API_VERSION, ISSUE_JSON_FILE
22
 
23
  logging.basicConfig(level=logging.INFO)
24
  logger = logging.getLogger(__name__)