terapyon commited on
Commit
648f519
1 Parent(s): 5be1a02

added date filter and comment filter and show date, label refs #5

Browse files
Files changed (3) hide show
  1. app.py +83 -14
  2. gh_issue_loader.py +2 -13
  3. model.py +13 -0
app.py CHANGED
@@ -1,11 +1,13 @@
 
1
  from typing import Iterable
2
  import streamlit as st
3
  import torch
4
  from langchain.embeddings import HuggingFaceEmbeddings
5
  from langchain.vectorstores import Qdrant
6
  from qdrant_client import QdrantClient
7
- from qdrant_client.http.models import Filter, FieldCondition, MatchValue
8
  from config import DB_CONFIG
 
9
 
10
 
11
  @st.cache_resource
@@ -25,11 +27,28 @@ EMBEDDINGS = load_embeddings()
25
 
26
 
27
  def make_filter_obj(options: list[dict[str]]):
 
28
  must = []
29
  for option in options:
30
- must.append(
31
- FieldCondition(key=option["key"], match=MatchValue(value=option["value"]))
32
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  filter = Filter(must=must)
34
  return filter
35
 
@@ -52,22 +71,47 @@ def main(
52
  query: str,
53
  repo_name: str,
54
  query_options: str,
55
- ) -> Iterable[tuple[str, tuple[str, str]]]:
 
 
 
56
  options = [{"key": "metadata.repo_name", "value": repo_name}]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
  if query_options == "Empty":
58
  query_options = ""
59
  query_str = f"{query_options}{query}"
60
- filter = make_filter_obj(options=options)
61
  docs = get_similay(query_str, filter)
62
  for doc, score in docs:
63
  text = doc.page_content
64
  metadata = doc.metadata
65
  # print(metadata)
66
- title = metadata.get("title")
67
- url = metadata.get("url")
68
- id_ = metadata.get("id")
69
- is_comment = metadata.get("type_") == "comment"
70
- yield title, url, id_, text, score, is_comment
 
 
 
 
 
 
71
 
72
 
73
  with st.form("my_form"):
@@ -94,6 +138,20 @@ with st.form("my_form"):
94
  ],
95
  label="Query options",
96
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97
 
98
  submitted = st.form_submit_button("Submit")
99
  if submitted:
@@ -101,8 +159,18 @@ with st.form("my_form"):
101
  st.header("Search Results")
102
  st.divider()
103
  with st.spinner("Searching..."):
104
- results = main(query, repo_name, query_options)
105
- for title, url, id_, text, score, is_comment in results:
 
 
 
 
 
 
 
 
 
 
106
  with st.container():
107
  if not is_comment:
108
  st.subheader(f"#{id_} - {title}")
@@ -110,6 +178,7 @@ with st.form("my_form"):
110
  st.subheader(f"comment with {title}")
111
  st.write(url)
112
  st.write(text)
113
- st.write(score)
 
114
  # st.markdown(html, unsafe_allow_html=True)
115
  st.divider()
 
1
+ from datetime import datetime, date, timedelta
2
  from typing import Iterable
3
  import streamlit as st
4
  import torch
5
  from langchain.embeddings import HuggingFaceEmbeddings
6
  from langchain.vectorstores import Qdrant
7
  from qdrant_client import QdrantClient
8
+ from qdrant_client.http.models import Filter, FieldCondition, MatchValue, Range
9
  from config import DB_CONFIG
10
+ from model import Issue
11
 
12
 
13
  @st.cache_resource
 
27
 
28
 
29
  def make_filter_obj(options: list[dict[str]]):
30
+ # print(options)
31
  must = []
32
  for option in options:
33
+ if "value" in option:
34
+ must.append(
35
+ FieldCondition(
36
+ key=option["key"], match=MatchValue(value=option["value"])
37
+ )
38
+ )
39
+ elif "range" in option:
40
+ range_ = option["range"]
41
+ must.append(
42
+ FieldCondition(
43
+ key=option["key"],
44
+ range=Range(
45
+ gt=range_.get("gt"),
46
+ gte=range_.get("gte"),
47
+ lt=range_.get("lt"),
48
+ lte=range_.get("lte"),
49
+ ),
50
+ )
51
+ )
52
  filter = Filter(must=must)
53
  return filter
54
 
 
71
  query: str,
72
  repo_name: str,
73
  query_options: str,
74
+ start_date: date,
75
+ end_date: date,
76
+ include_comments: bool,
77
+ ) -> Iterable[tuple[Issue, float, str]]:
78
  options = [{"key": "metadata.repo_name", "value": repo_name}]
79
+ if start_date is not None and end_date is not None:
80
+ options.append(
81
+ {
82
+ "key": "metadata.created_at",
83
+ "range": {
84
+ "gte": int(datetime.fromisoformat(str(start_date)).timestamp()),
85
+ "lte": int(
86
+ datetime.fromisoformat(
87
+ str(end_date + timedelta(days=1))
88
+ ).timestamp()
89
+ ),
90
+ },
91
+ }
92
+ )
93
+ if not include_comments:
94
+ options.append({"key": "metadata.type_", "value": "issue"})
95
+ filter = make_filter_obj(options=options)
96
  if query_options == "Empty":
97
  query_options = ""
98
  query_str = f"{query_options}{query}"
 
99
  docs = get_similay(query_str, filter)
100
  for doc, score in docs:
101
  text = doc.page_content
102
  metadata = doc.metadata
103
  # print(metadata)
104
+ issue = Issue(
105
+ repo_name=repo_name,
106
+ id=metadata.get("id"),
107
+ title=metadata.get("title"),
108
+ created_at=metadata.get("created_at"),
109
+ user=metadata.get("user"),
110
+ url=metadata.get("url"),
111
+ labels=metadata.get("labels"),
112
+ type_=metadata.get("type_"),
113
+ )
114
+ yield issue, score, text
115
 
116
 
117
  with st.form("my_form"):
 
138
  ],
139
  label="Query options",
140
  )
141
+ date_min = date(2022, 1, 1)
142
+ date_max = date.today()
143
+ date_col1, date_col2 = st.columns(2)
144
+ start_date = date_col1.date_input(
145
+ label="Select a start date",
146
+ value=date_min,
147
+ format="YYYY-MM-DD",
148
+ )
149
+ end_date = date_col2.date_input(
150
+ label="Select a end date",
151
+ value=date_max,
152
+ format="YYYY-MM-DD",
153
+ )
154
+ include_comments = st.checkbox(label="Include Issue comments", value=True)
155
 
156
  submitted = st.form_submit_button("Submit")
157
  if submitted:
 
159
  st.header("Search Results")
160
  st.divider()
161
  with st.spinner("Searching..."):
162
+ results = main(
163
+ query, repo_name, query_options, start_date, end_date, include_comments
164
+ )
165
+ for issue, score, text in results:
166
+ title = issue.title
167
+ url = issue.url
168
+ id_ = issue.id
169
+ score = round(score, 3)
170
+ created_at = datetime.fromtimestamp(issue.created_at)
171
+ user = issue.user
172
+ labels = issue.labels
173
+ is_comment = issue.type_ == "comment"
174
  with st.container():
175
  if not is_comment:
176
  st.subheader(f"#{id_} - {title}")
 
178
  st.subheader(f"comment with {title}")
179
  st.write(url)
180
  st.write(text)
181
+ st.write("score:", score, "Date:", created_at.date(), "User:", user)
182
+ st.write(f"{labels=}")
183
  # st.markdown(html, unsafe_allow_html=True)
184
  st.divider()
gh_issue_loader.py CHANGED
@@ -1,21 +1,10 @@
1
- from dataclasses import dataclass, asdict
2
  import json
3
  from typing import Iterator
4
  from dateutil.parser import parse
5
  from langchain.docstore.document import Document
6
  from langchain.document_loaders.base import BaseLoader
7
-
8
-
9
- @dataclass
10
- class Issue:
11
- repo_name: str
12
- id: int
13
- title: str
14
- created_at: int
15
- user: str
16
- url: str
17
- labels: list[str]
18
- type_: str
19
 
20
 
21
  def date_to_int(dt_str: str) -> int:
 
1
+ from dataclasses import asdict
2
  import json
3
  from typing import Iterator
4
  from dateutil.parser import parse
5
  from langchain.docstore.document import Document
6
  from langchain.document_loaders.base import BaseLoader
7
+ from gh_issue_loader import Issue
 
 
 
 
 
 
 
 
 
 
 
8
 
9
 
10
  def date_to_int(dt_str: str) -> int:
model.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dataclasses import dataclass
2
+
3
+
4
+ @dataclass(frozen=True)
5
+ class Issue:
6
+ repo_name: str
7
+ id: int
8
+ title: str
9
+ created_at: int
10
+ user: str
11
+ url: str
12
+ labels: list[str]
13
+ type_: str