svummidi commited on
Commit
dddb40f
1 Parent(s): a31ba66

POC for passive monitoring

Browse files
Files changed (4) hide show
  1. README.md +1 -1
  2. app.py +293 -59
  3. app_passive.py +0 -306
  4. app_v1.py +72 -0
README.md CHANGED
@@ -5,7 +5,7 @@ colorFrom: red
5
  colorTo: yellow
6
  sdk: gradio
7
  sdk_version: 3.36.1
8
- app_file: app.py
9
  pinned: false
10
  ---
11
 
 
5
  colorTo: yellow
6
  sdk: gradio
7
  sdk_version: 3.36.1
8
+ app_file: app_v1.py
9
  pinned: false
10
  ---
11
 
app.py CHANGED
@@ -1,72 +1,306 @@
 
1
  import logging
2
  import os
3
- import json
4
- from llama_index.indices.document_summary import DocumentSummaryIndexEmbeddingRetriever
5
- from llama_index.indices.vector_store import VectorIndexRetriever
6
  from llama_index.llms import OpenAI
7
- from llama_index.query_engine import RetrieverQueryEngine
 
 
 
 
8
 
9
  logging.basicConfig(format='%(asctime)s %(levelname)s:%(message)s', level=os.environ.get("LOGLEVEL", "INFO"))
10
  import gradio as gr
11
- from llama_index import VectorStoreIndex, StorageContext, download_loader, load_index_from_storage, ServiceContext, \
12
- get_response_synthesizer
 
 
 
 
 
 
13
 
14
- cache = {}
15
- chatgpt = OpenAI(temperature=0, model="gpt-3.5-turbo")
16
  service_context = ServiceContext.from_defaults(llm=chatgpt, chunk_size=1024)
17
 
18
 
19
- def load_mapping_from_json(filepath):
20
- if not os.path.exists(filepath):
21
- return {}
22
- with open(filepath, 'r') as file:
23
- return json.load(file)
24
-
25
- userIdMapping = load_mapping_from_json('user_id_to_name_mapping.json')
26
-
27
- def loadData():
28
- index_root = "./index"
29
- directory_names = os.listdir(index_root)
30
- for directory in directory_names:
31
- if os.path.isdir(f"{index_root}/{directory}"):
32
- print("Loading from existing index " + directory)
33
- storage_context = StorageContext.from_defaults(persist_dir=f"{index_root}/{directory}")
34
- index = load_index_from_storage(storage_context)
35
- vector_retriever = VectorIndexRetriever(index=index, similarity_top_k=5)
36
- response_synthesizer = get_response_synthesizer(service_context=service_context,
37
- response_mode="tree_summarize")
38
- query_engine = RetrieverQueryEngine(
39
- retriever=vector_retriever,
40
- response_synthesizer=response_synthesizer,
41
- )
42
- cache[directory] = query_engine
43
-
44
-
45
- def chatbot(indexName, input_text):
46
- """
47
- Chatbot function that takes in a prompt and returns a response
48
- """
49
- query = "This data contains updates from multiple teams and sprints - "+ input_text + " Ignore headers in the content."
50
- response = cache[indexName].query(query)
51
- answer = response.response.replace('Based on the given context information', 'Based on the available information')
52
- for userId in userIdMapping:
53
- answer = answer.replace(userId, userIdMapping[userId])
54
- return answer
55
-
56
-
57
- def main():
58
- loadData()
59
- iface = gr.Interface(fn=chatbot, inputs=[
60
- gr.Dropdown(cache.keys(),
61
- type="value", value="sos", label="Select Channel"),
62
- gr.Textbox(lines=7, label="Ask any question", placeholder='What are the key topics?')], outputs=gr.Textbox(lines=11, label="Response"),
63
- title="NLP Demo for Slack Data")
64
- if 'LOGIN_PASS' in os.environ:
65
- iface.launch(auth=('axiamatic', os.environ['LOGIN_PASS']),
66
- auth_message='For access, please check my Slack profile or contact me in Slack.',
67
- share=False)
 
 
 
 
 
 
 
 
 
 
 
 
68
  else:
69
- iface.launch(share=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70
 
71
 
72
- main()
 
1
+ import json
2
  import logging
3
  import os
4
+ import re
5
+ from functools import lru_cache
6
+
7
  from llama_index.llms import OpenAI
8
+ from whoosh.query import Or, Term
9
+
10
+ import csv
11
+
12
+ THREAD_ID = "thread_id"
13
 
14
  logging.basicConfig(format='%(asctime)s %(levelname)s:%(message)s', level=os.environ.get("LOGLEVEL", "INFO"))
15
  import gradio as gr
16
+ from llama_index import ServiceContext, \
17
+ Document, GPTListIndex, VectorStoreIndex
18
+ from whoosh import fields, index
19
+ from whoosh.qparser import QueryParser
20
+
21
+ thread_index = {}
22
+ comment_index = {}
23
+ llama_cache = {}
24
 
25
+ chatgpt = OpenAI(temperature=0, model="gpt-4")
 
26
  service_context = ServiceContext.from_defaults(llm=chatgpt, chunk_size=1024)
27
 
28
 
29
+ def passive_topics(index_name, query, topic, summary_type):
30
+ resp = search_keyword_matches(index_name, query)
31
+ if resp is not None:
32
+ print(f"Found {len(resp)} matches for {query}")
33
+ docs_list = [Document(text=content) for content in resp]
34
+ last_llama_index = GPTListIndex.from_documents(documents=docs_list, service_context=service_context)
35
+ llama_cache[index_name] = last_llama_index
36
+ resp = last_llama_index.as_query_engine().query(
37
+ "What are the key negative topics from the discussion? Limit each topic to 30 characters")
38
+ dynamic_topics = resp.response.split('\n')
39
+ return dynamic_topics
40
+
41
+ return []
42
+
43
+
44
+ def load_data(data_sets):
45
+ for data_set in data_sets:
46
+ create_thread_index(data_set)
47
+ create_comment_index(data_set)
48
+
49
+
50
+ def create_thread_index(data_set):
51
+ # Define a schema for the index
52
+ schema = fields.Schema(id=fields.ID(stored=True), content=fields.TEXT(stored=True))
53
+ index_path = f"./text_index/{data_set}"
54
+ # Create the index directory if it doesn't exist
55
+ if not os.path.exists(index_path):
56
+ os.mkdir(index_path)
57
+ build_index = True
58
+ else:
59
+ build_index = False
60
+ print("Loading from existing thread index " + data_set)
61
+ if build_index:
62
+ print("Building thread index for " + data_set)
63
+ # Create an index under "indexdir"
64
+ write_ix = index.create_in(index_path, schema)
65
+
66
+ # Create a writer object to add documents to the index
67
+ writer = write_ix.writer()
68
+
69
+ # Read the CSV file and add documents to the index
70
+ with open(f'csv/{data_set}.csv', 'r') as csvfile:
71
+ reader = csv.DictReader(csvfile)
72
+ for row in reader:
73
+ writer.add_document(id=row['thread_ts'], content=row['messages_json'])
74
+ # Commit the writer and close it
75
+ writer.commit()
76
+ write_ix.close()
77
+ # Open the index
78
+ read_ix = index.open_dir(index_path)
79
+ thread_index[data_set] = read_ix
80
+
81
+
82
+ def create_comment_index(data_set):
83
+ # Define a schema for the index
84
+ schema = fields.Schema(id=fields.ID(stored=True), content=fields.TEXT(stored=True))
85
+ index_path = f"./text_index/{data_set}_comments"
86
+ # Create the index directory if it doesn't exist
87
+ if not os.path.exists(index_path):
88
+ os.mkdir(index_path)
89
+ build_index = True
90
  else:
91
+ build_index = False
92
+ print("Loading from existing comments index " + data_set)
93
+ if build_index:
94
+ print("Building comments index for " + data_set)
95
+ # Create an index under "indexdir"
96
+ write_ix = index.create_in(index_path, schema)
97
+
98
+ # Create a writer object to add documents to the index
99
+ writer = write_ix.writer()
100
+
101
+ # Read the CSV file and add documents to the index
102
+ count= 0
103
+ with open(f'csv/{data_set}.csv', 'r') as csvfile:
104
+ reader = csv.DictReader(csvfile)
105
+ for row in reader:
106
+ comments = json.loads(row['messages_json'])
107
+ for comment in comments:
108
+ writer.add_document(id=row['thread_ts'], content=comment["content"])
109
+ count += 1
110
+ # Commit the writer and close it
111
+ writer.commit()
112
+ write_ix.close()
113
+ # Open the index
114
+ read_ix = index.open_dir(index_path)
115
+ comment_index[data_set] = read_ix
116
+
117
+
118
+ def search_keyword_matches(ix, input):
119
+ # Create a query parser
120
+ query_parser = QueryParser("content", ix.schema)
121
+ query = query_parser.parse(input)
122
+ return execute_text_search(ix, query)
123
+
124
+
125
+ def search_thread_id_matches(ix, thread_id_list):
126
+ # Create a query parser
127
+ query = Or([Term('id', id_) for id_ in thread_id_list])
128
+ return execute_text_search(ix, query)
129
+
130
+
131
+ def execute_text_search(ix, q):
132
+ # Search the index
133
+ with ix.searcher() as searcher:
134
+ results = searcher.search(q, limit=20)
135
+ if len(results) > 0:
136
+ matches = []
137
+ for result in results:
138
+ matches.append([result['id'], result['content']])
139
+ return matches
140
+ else:
141
+ return None
142
+
143
+
144
+ def gen_insights(index_name, topic, summary_type):
145
+ if topic is not None and len(topic) > 0:
146
+ resp = generate_insights(index_name, topic, summary_type)
147
+ return resp.response
148
+
149
+
150
+ @lru_cache(maxsize=50)
151
+ def generate_insights(index_name, topic, summary_type):
152
+ if llama_cache[index_name] is None:
153
+ return None
154
+ query = f"What is the executive summary for the topic \"{topic}\"? Highlight negative aspects in 100 words"
155
+ if summary_type == "Actions":
156
+ query = f"What are the recommended action items for the topic \"{topic}\"? Limit response to 100 words using bullet points"
157
+ elif summary_type == "Followup":
158
+ query = f"What are the recommended questions to ask team for more clarity and latest status for the topic \"{topic}\"?"
159
+ return llama_cache[index_name].as_query_engine().query(query)
160
+
161
+
162
+ def generate_comment_insights(index_name, topic, summary_type):
163
+ if summary_type == "Show Comments":
164
+ return show_docs(index_name, topic)
165
+ if summary_type == "Show Threads":
166
+ return show_threads(index_name, topic)
167
+ if summary_type == "Show Summary":
168
+ return show_thread_summaries(index_name, topic)
169
+
170
+ return "Not yet implemented"
171
+
172
+
173
+ def retrieve_llama_nodes(index_name, topic):
174
+ llama = llama_cache[index_name]
175
+ if llama is None:
176
+ return None
177
+ retriever = llama.as_retriever()
178
+ return retriever.retrieve(topic)
179
+
180
+
181
+ def show_docs(index_name, topic):
182
+ nodes = retrieve_llama_nodes(index_name, topic)
183
+ if nodes is None:
184
+ return "No matching documents found for the topic " + topic
185
+ text_list = [node_with_score.node.text for node_with_score in nodes]
186
+ return f"Total Matched Comments {len(text_list)}\n" + "\n\n==============\n".join(text_list)
187
+
188
+
189
+ def find_matching_threads(index_name, topic):
190
+ nodes = retrieve_llama_nodes(index_name, topic)
191
+ if nodes is None:
192
+ return None
193
+ thread_ids_list = [node_with_score.node.metadata[THREAD_ID] for node_with_score in nodes]
194
+ matches = search_thread_id_matches(thread_index[index_name], thread_ids_list)
195
+ threads = []
196
+ for thread in matches:
197
+ comments = json.loads(thread[1])
198
+ thread_content = []
199
+ for comment in comments:
200
+ thread_content.append(comment["content"])
201
+ threads.append("\n ->->-> \n ".join(thread_content))
202
+ return threads
203
+
204
+
205
+ def show_threads(index_name, topic):
206
+ threads = find_matching_threads(index_name, topic)
207
+ if threads is None:
208
+ return "No matching documents found for the topic " + topic
209
+ return f"Total Threads {len(threads)}\n" + "\n\n==============\n".join(threads)
210
+
211
+
212
+ @lru_cache(maxsize=50)
213
+ def show_thread_summaries(index_name, topic):
214
+ threads = find_matching_threads(index_name, topic)
215
+ if threads is None:
216
+ return "No matching documents found for the topic " + topic
217
+ docs_list = []
218
+ for thread in threads:
219
+ docs_list.append(Document(text=thread))
220
+ llama_idx = VectorStoreIndex.from_documents(documents=docs_list, service_context=service_context)
221
+ query = f"What is the executive summary for the topic \"{topic}\"? Limit response to 100 words"
222
+ resp = llama_idx.as_query_engine().query(query)
223
+ return resp.response
224
+
225
+
226
+ def remove_leading_numbers(text):
227
+ # Use re.sub to replace any pattern of "<number>." at the beginning of a line.
228
+ return re.sub(r'^\d+[.)]\s*', '', text, flags=re.M)
229
+
230
+ def find_topics_with_llama(index_name, query, matches):
231
+ print(f"Found {len(matches)} matches for {query}")
232
+ docs_list = []
233
+ for match in matches:
234
+ metadata = {THREAD_ID: match[0]}
235
+ docs_list.append(Document(text=match[1], metadata=metadata))
236
+ last_llama_index = VectorStoreIndex.from_documents(documents=docs_list, service_context=service_context)
237
+ llama_cache[index_name] = last_llama_index
238
+ resp = last_llama_index.as_query_engine().query(
239
+ "What are the key negative topics from the discussion? Limit each topic to 30 characters")
240
+ # return resp.response.split('\n')
241
+ # return ["foo", "bar"]
242
+ result_topics = resp.response.split('\n')
243
+ clean_topics = [remove_leading_numbers(topic) for topic in result_topics]
244
+ return clean_topics
245
+
246
+
247
+ def find_topics_by_thread(index_name, query, topic, summary_type):
248
+ resp = search_keyword_matches(thread_index[index_name], query)
249
+ if resp is not None:
250
+ result_topics = find_topics_with_llama(index_name, query, resp)
251
+ return gr.Dropdown.update(choices=result_topics, value=result_topics[0])
252
+
253
+ return "No matches found" if resp is None else resp
254
+
255
+
256
+ def find_topics_by_comments(index_name, query, topic, summary_type):
257
+ resp = search_keyword_matches(comment_index[index_name], query)
258
+ if resp is not None:
259
+ result_topics = find_topics_with_llama(index_name, query, resp)
260
+ return gr.Dropdown.update(choices=result_topics, value=result_topics[0])
261
+
262
+ return "No matches found" if resp is None else resp
263
+
264
+
265
+ def main_demo():
266
+ demo = gr.Blocks()
267
+
268
+ with demo:
269
+ data_sets = ["platform-engg_messages", "apps-ui_messages", "ux-reviews_messages"]
270
+ load_data(data_sets)
271
+ with gr.Tab("Thread"):
272
+ data_sets_dd = gr.Dropdown(data_sets,
273
+ type="value", value=data_sets[0], label="Select Slack Channel")
274
+ keyword_txt = gr.Textbox(lines=2, label="Enter keywords to search", placeholder='CISO, auth0')
275
+ find_topics_button = gr.Button("Find Topics")
276
+ topics_dd = gr.Dropdown([],
277
+ type="value", label="Select Topic with Negative Sentiment", allow_custom_value=True)
278
+
279
+ show_details = gr.Radio(["Summary", "Actions", "Followup"], label="Show Details")
280
+
281
+ find_topics_button.click(find_topics_by_thread,
282
+ inputs=[data_sets_dd, keyword_txt, find_topics_button, topics_dd],
283
+ outputs=topics_dd)
284
+ show_details.change(gen_insights, inputs=[data_sets_dd, topics_dd, show_details],
285
+ outputs=gr.Textbox(lines=11, label="Response"))
286
+
287
+ with gr.Tab("Comment"):
288
+ data_sets_dd = gr.Dropdown(data_sets,
289
+ type="value", value=data_sets[0], label="Select Slack Channel")
290
+ keyword_txt = gr.Textbox(lines=2, label="Enter keywords to search", placeholder='CISO, auth0')
291
+ find_topics_button = gr.Button("Find Topics")
292
+ topics_dd = gr.Dropdown([],
293
+ type="value", label="Select Topic with Negative Sentiment", allow_custom_value=True)
294
+
295
+ show_details = gr.Radio(["Show Comments", "Show Threads", "Show Summary"], label="Show Details")
296
+
297
+ find_topics_button.click(find_topics_by_comments,
298
+ inputs=[data_sets_dd, keyword_txt, find_topics_button, topics_dd],
299
+ outputs=topics_dd)
300
+ show_details.change(generate_comment_insights, inputs=[data_sets_dd, topics_dd, show_details],
301
+ outputs=gr.Textbox(lines=11, label="Response"))
302
+
303
+ demo.launch()
304
 
305
 
306
+ main_demo()
app_passive.py DELETED
@@ -1,306 +0,0 @@
1
- import json
2
- import logging
3
- import os
4
- import re
5
- from functools import lru_cache
6
-
7
- from llama_index.llms import OpenAI
8
- from whoosh.query import Or, Term
9
-
10
- import csv
11
-
12
- THREAD_ID = "thread_id"
13
-
14
- logging.basicConfig(format='%(asctime)s %(levelname)s:%(message)s', level=os.environ.get("LOGLEVEL", "INFO"))
15
- import gradio as gr
16
- from llama_index import ServiceContext, \
17
- Document, GPTListIndex, VectorStoreIndex
18
- from whoosh import fields, index
19
- from whoosh.qparser import QueryParser
20
-
21
- thread_index = {}
22
- comment_index = {}
23
- llama_cache = {}
24
-
25
- chatgpt = OpenAI(temperature=0, model="gpt-4")
26
- service_context = ServiceContext.from_defaults(llm=chatgpt, chunk_size=1024)
27
-
28
-
29
- def passive_topics(index_name, query, topic, summary_type):
30
- resp = search_keyword_matches(index_name, query)
31
- if resp is not None:
32
- print(f"Found {len(resp)} matches for {query}")
33
- docs_list = [Document(text=content) for content in resp]
34
- last_llama_index = GPTListIndex.from_documents(documents=docs_list, service_context=service_context)
35
- llama_cache[index_name] = last_llama_index
36
- resp = last_llama_index.as_query_engine().query(
37
- "What are the key negative topics from the discussion? Limit each topic to 30 characters")
38
- dynamic_topics = resp.response.split('\n')
39
- return dynamic_topics
40
-
41
- return []
42
-
43
-
44
- def load_data(data_sets):
45
- for data_set in data_sets:
46
- create_thread_index(data_set)
47
- create_comment_index(data_set)
48
-
49
-
50
- def create_thread_index(data_set):
51
- # Define a schema for the index
52
- schema = fields.Schema(id=fields.ID(stored=True), content=fields.TEXT(stored=True))
53
- index_path = f"./text_index/{data_set}"
54
- # Create the index directory if it doesn't exist
55
- if not os.path.exists(index_path):
56
- os.mkdir(index_path)
57
- build_index = True
58
- else:
59
- build_index = False
60
- print("Loading from existing thread index " + data_set)
61
- if build_index:
62
- print("Building thread index for " + data_set)
63
- # Create an index under "indexdir"
64
- write_ix = index.create_in(index_path, schema)
65
-
66
- # Create a writer object to add documents to the index
67
- writer = write_ix.writer()
68
-
69
- # Read the CSV file and add documents to the index
70
- with open(f'csv/{data_set}.csv', 'r') as csvfile:
71
- reader = csv.DictReader(csvfile)
72
- for row in reader:
73
- writer.add_document(id=row['thread_ts'], content=row['messages_json'])
74
- # Commit the writer and close it
75
- writer.commit()
76
- write_ix.close()
77
- # Open the index
78
- read_ix = index.open_dir(index_path)
79
- thread_index[data_set] = read_ix
80
-
81
-
82
- def create_comment_index(data_set):
83
- # Define a schema for the index
84
- schema = fields.Schema(id=fields.ID(stored=True), content=fields.TEXT(stored=True))
85
- index_path = f"./text_index/{data_set}_comments"
86
- # Create the index directory if it doesn't exist
87
- if not os.path.exists(index_path):
88
- os.mkdir(index_path)
89
- build_index = True
90
- else:
91
- build_index = False
92
- print("Loading from existing comments index " + data_set)
93
- if build_index:
94
- print("Building comments index for " + data_set)
95
- # Create an index under "indexdir"
96
- write_ix = index.create_in(index_path, schema)
97
-
98
- # Create a writer object to add documents to the index
99
- writer = write_ix.writer()
100
-
101
- # Read the CSV file and add documents to the index
102
- count= 0
103
- with open(f'csv/{data_set}.csv', 'r') as csvfile:
104
- reader = csv.DictReader(csvfile)
105
- for row in reader:
106
- comments = json.loads(row['messages_json'])
107
- for comment in comments:
108
- writer.add_document(id=row['thread_ts'], content=comment["content"])
109
- count += 1
110
- # Commit the writer and close it
111
- writer.commit()
112
- write_ix.close()
113
- # Open the index
114
- read_ix = index.open_dir(index_path)
115
- comment_index[data_set] = read_ix
116
-
117
-
118
- def search_keyword_matches(ix, input):
119
- # Create a query parser
120
- query_parser = QueryParser("content", ix.schema)
121
- query = query_parser.parse(input)
122
- return execute_text_search(ix, query)
123
-
124
-
125
- def search_thread_id_matches(ix, thread_id_list):
126
- # Create a query parser
127
- query = Or([Term('id', id_) for id_ in thread_id_list])
128
- return execute_text_search(ix, query)
129
-
130
-
131
- def execute_text_search(ix, q):
132
- # Search the index
133
- with ix.searcher() as searcher:
134
- results = searcher.search(q, limit=20)
135
- if len(results) > 0:
136
- matches = []
137
- for result in results:
138
- matches.append([result['id'], result['content']])
139
- return matches
140
- else:
141
- return None
142
-
143
-
144
- def gen_insights(index_name, topic, summary_type):
145
- if topic is not None and len(topic) > 0:
146
- resp = generate_insights(index_name, topic, summary_type)
147
- return resp.response
148
-
149
-
150
- @lru_cache(maxsize=50)
151
- def generate_insights(index_name, topic, summary_type):
152
- if llama_cache[index_name] is None:
153
- return None
154
- query = f"What is the executive summary for the topic \"{topic}\"? Highlight negative aspects in 100 words"
155
- if summary_type == "Actions":
156
- query = f"What are the recommended action items for the topic \"{topic}\"? Limit response to 100 words using bullet points"
157
- elif summary_type == "Followup":
158
- query = f"What are the recommended questions to ask team for more clarity and latest status for the topic \"{topic}\"?"
159
- return llama_cache[index_name].as_query_engine().query(query)
160
-
161
-
162
- def generate_comment_insights(index_name, topic, summary_type):
163
- if summary_type == "Show Comments":
164
- return show_docs(index_name, topic)
165
- if summary_type == "Show Threads":
166
- return show_threads(index_name, topic)
167
- if summary_type == "Show Summary":
168
- return show_thread_summaries(index_name, topic)
169
-
170
- return "Not yet implemented"
171
-
172
-
173
- def retrieve_llama_nodes(index_name, topic):
174
- llama = llama_cache[index_name]
175
- if llama is None:
176
- return None
177
- retriever = llama.as_retriever()
178
- return retriever.retrieve(topic)
179
-
180
-
181
- def show_docs(index_name, topic):
182
- nodes = retrieve_llama_nodes(index_name, topic)
183
- if nodes is None:
184
- return "No matching documents found for the topic " + topic
185
- text_list = [node_with_score.node.text for node_with_score in nodes]
186
- return f"Total Matched Comments {len(text_list)}\n" + "\n\n==============\n".join(text_list)
187
-
188
-
189
- def find_matching_threads(index_name, topic):
190
- nodes = retrieve_llama_nodes(index_name, topic)
191
- if nodes is None:
192
- return None
193
- thread_ids_list = [node_with_score.node.metadata[THREAD_ID] for node_with_score in nodes]
194
- matches = search_thread_id_matches(thread_index[index_name], thread_ids_list)
195
- threads = []
196
- for thread in matches:
197
- comments = json.loads(thread[1])
198
- thread_content = []
199
- for comment in comments:
200
- thread_content.append(comment["content"])
201
- threads.append("\n ->->-> \n ".join(thread_content))
202
- return threads
203
-
204
-
205
- def show_threads(index_name, topic):
206
- threads = find_matching_threads(index_name, topic)
207
- if threads is None:
208
- return "No matching documents found for the topic " + topic
209
- return f"Total Threads {len(threads)}\n" + "\n\n==============\n".join(threads)
210
-
211
-
212
- @lru_cache(maxsize=50)
213
- def show_thread_summaries(index_name, topic):
214
- threads = find_matching_threads(index_name, topic)
215
- if threads is None:
216
- return "No matching documents found for the topic " + topic
217
- docs_list = []
218
- for thread in threads:
219
- docs_list.append(Document(text=thread))
220
- llama_idx = VectorStoreIndex.from_documents(documents=docs_list, service_context=service_context)
221
- query = f"What is the executive summary for the topic \"{topic}\"? Limit response to 100 words"
222
- resp = llama_idx.as_query_engine().query(query)
223
- return resp.response
224
-
225
-
226
- def remove_leading_numbers(text):
227
- # Use re.sub to replace any pattern of "<number>." at the beginning of a line.
228
- return re.sub(r'^\d+[.)]\s*', '', text, flags=re.M)
229
-
230
- def find_topics_with_llama(index_name, query, matches):
231
- print(f"Found {len(matches)} matches for {query}")
232
- docs_list = []
233
- for match in matches:
234
- metadata = {THREAD_ID: match[0]}
235
- docs_list.append(Document(text=match[1], metadata=metadata))
236
- last_llama_index = VectorStoreIndex.from_documents(documents=docs_list, service_context=service_context)
237
- llama_cache[index_name] = last_llama_index
238
- resp = last_llama_index.as_query_engine().query(
239
- "What are the key negative topics from the discussion? Limit each topic to 30 characters")
240
- # return resp.response.split('\n')
241
- # return ["foo", "bar"]
242
- result_topics = resp.response.split('\n')
243
- clean_topics = [remove_leading_numbers(topic) for topic in result_topics]
244
- return clean_topics
245
-
246
-
247
- def find_topics_by_thread(index_name, query, topic, summary_type):
248
- resp = search_keyword_matches(thread_index[index_name], query)
249
- if resp is not None:
250
- result_topics = find_topics_with_llama(index_name, query, resp)
251
- return gr.Dropdown.update(choices=result_topics, value=result_topics[0])
252
-
253
- return "No matches found" if resp is None else resp
254
-
255
-
256
- def find_topics_by_comments(index_name, query, topic, summary_type):
257
- resp = search_keyword_matches(comment_index[index_name], query)
258
- if resp is not None:
259
- result_topics = find_topics_with_llama(index_name, query, resp)
260
- return gr.Dropdown.update(choices=result_topics, value=result_topics[0])
261
-
262
- return "No matches found" if resp is None else resp
263
-
264
-
265
- def main_demo():
266
- demo = gr.Blocks()
267
-
268
- with demo:
269
- data_sets = ["platform-engg_messages", "apps-ui_messages", "ux-reviews_messages"]
270
- load_data(data_sets)
271
- with gr.Tab("Thread"):
272
- data_sets_dd = gr.Dropdown(data_sets,
273
- type="value", value=data_sets[0], label="Select Slack Channel")
274
- keyword_txt = gr.Textbox(lines=2, label="Enter keywords to search", placeholder='CISO, auth0')
275
- find_topics_button = gr.Button("Find Topics")
276
- topics_dd = gr.Dropdown([],
277
- type="value", label="Select Topic with Negative Sentiment", allow_custom_value=True)
278
-
279
- show_details = gr.Radio(["Summary", "Actions", "Followup"], label="Show Details")
280
-
281
- find_topics_button.click(find_topics_by_thread,
282
- inputs=[data_sets_dd, keyword_txt, find_topics_button, topics_dd],
283
- outputs=topics_dd)
284
- show_details.change(gen_insights, inputs=[data_sets_dd, topics_dd, show_details],
285
- outputs=gr.Textbox(lines=11, label="Response"))
286
-
287
- with gr.Tab("Comment"):
288
- data_sets_dd = gr.Dropdown(data_sets,
289
- type="value", value=data_sets[0], label="Select Slack Channel")
290
- keyword_txt = gr.Textbox(lines=2, label="Enter keywords to search", placeholder='CISO, auth0')
291
- find_topics_button = gr.Button("Find Topics")
292
- topics_dd = gr.Dropdown([],
293
- type="value", label="Select Topic with Negative Sentiment", allow_custom_value=True)
294
-
295
- show_details = gr.Radio(["Show Comments", "Show Threads", "Show Summary"], label="Show Details")
296
-
297
- find_topics_button.click(find_topics_by_comments,
298
- inputs=[data_sets_dd, keyword_txt, find_topics_button, topics_dd],
299
- outputs=topics_dd)
300
- show_details.change(generate_comment_insights, inputs=[data_sets_dd, topics_dd, show_details],
301
- outputs=gr.Textbox(lines=11, label="Response"))
302
-
303
- demo.launch()
304
-
305
-
306
- main_demo()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app_v1.py ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ import os
3
+ import json
4
+ from llama_index.indices.document_summary import DocumentSummaryIndexEmbeddingRetriever
5
+ from llama_index.indices.vector_store import VectorIndexRetriever
6
+ from llama_index.llms import OpenAI
7
+ from llama_index.query_engine import RetrieverQueryEngine
8
+
9
+ logging.basicConfig(format='%(asctime)s %(levelname)s:%(message)s', level=os.environ.get("LOGLEVEL", "INFO"))
10
+ import gradio as gr
11
+ from llama_index import VectorStoreIndex, StorageContext, download_loader, load_index_from_storage, ServiceContext, \
12
+ get_response_synthesizer
13
+
14
+ cache = {}
15
+ chatgpt = OpenAI(temperature=0, model="gpt-3.5-turbo")
16
+ service_context = ServiceContext.from_defaults(llm=chatgpt, chunk_size=1024)
17
+
18
+
19
+ def load_mapping_from_json(filepath):
20
+ if not os.path.exists(filepath):
21
+ return {}
22
+ with open(filepath, 'r') as file:
23
+ return json.load(file)
24
+
25
+ userIdMapping = load_mapping_from_json('user_id_to_name_mapping.json')
26
+
27
+ def loadData():
28
+ index_root = "./index"
29
+ directory_names = os.listdir(index_root)
30
+ for directory in directory_names:
31
+ if os.path.isdir(f"{index_root}/{directory}"):
32
+ print("Loading from existing index " + directory)
33
+ storage_context = StorageContext.from_defaults(persist_dir=f"{index_root}/{directory}")
34
+ index = load_index_from_storage(storage_context)
35
+ vector_retriever = VectorIndexRetriever(index=index, similarity_top_k=5)
36
+ response_synthesizer = get_response_synthesizer(service_context=service_context,
37
+ response_mode="tree_summarize")
38
+ query_engine = RetrieverQueryEngine(
39
+ retriever=vector_retriever,
40
+ response_synthesizer=response_synthesizer,
41
+ )
42
+ cache[directory] = query_engine
43
+
44
+
45
+ def chatbot(indexName, input_text):
46
+ """
47
+ Chatbot function that takes in a prompt and returns a response
48
+ """
49
+ query = "This data contains updates from multiple teams and sprints - "+ input_text + " Ignore headers in the content."
50
+ response = cache[indexName].query(query)
51
+ answer = response.response.replace('Based on the given context information', 'Based on the available information')
52
+ for userId in userIdMapping:
53
+ answer = answer.replace(userId, userIdMapping[userId])
54
+ return answer
55
+
56
+
57
+ def main():
58
+ loadData()
59
+ iface = gr.Interface(fn=chatbot, inputs=[
60
+ gr.Dropdown(cache.keys(),
61
+ type="value", value="sos", label="Select Channel"),
62
+ gr.Textbox(lines=7, label="Ask any question", placeholder='What are the key topics?')], outputs=gr.Textbox(lines=11, label="Response"),
63
+ title="NLP Demo for Slack Data")
64
+ if 'LOGIN_PASS' in os.environ:
65
+ iface.launch(auth=('axiamatic', os.environ['LOGIN_PASS']),
66
+ auth_message='For access, please check my Slack profile or contact me in Slack.',
67
+ share=False)
68
+ else:
69
+ iface.launch(share=False)
70
+
71
+
72
+ main()