svummidi commited on
Commit
3c109e5
1 Parent(s): 35b7aeb

POC for passive monitoring

Browse files
Files changed (2) hide show
  1. app.py +7 -6
  2. requirements.txt +4 -1
app.py CHANGED
@@ -9,15 +9,15 @@ from whoosh.query import Or, Term
9
 
10
  import csv
11
 
12
- THREAD_ID = "thread_id"
13
-
14
- logging.basicConfig(format='%(asctime)s %(levelname)s:%(message)s', level=os.environ.get("LOGLEVEL", "INFO"))
15
  import gradio as gr
16
  from llama_index import ServiceContext, \
17
  Document, GPTListIndex, VectorStoreIndex
18
  from whoosh import fields, index
19
  from whoosh.qparser import QueryParser
20
 
 
 
 
21
  thread_index = {}
22
  comment_index = {}
23
  llama_cache = {}
@@ -99,7 +99,7 @@ def create_comment_index(data_set):
99
  writer = write_ix.writer()
100
 
101
  # Read the CSV file and add documents to the index
102
- count= 0
103
  with open(f'csv/{data_set}.csv', 'r') as csvfile:
104
  reader = csv.DictReader(csvfile)
105
  for row in reader:
@@ -115,10 +115,10 @@ def create_comment_index(data_set):
115
  comment_index[data_set] = read_ix
116
 
117
 
118
- def search_keyword_matches(ix, input):
119
  # Create a query parser
120
  query_parser = QueryParser("content", ix.schema)
121
- query = query_parser.parse(input)
122
  return execute_text_search(ix, query)
123
 
124
 
@@ -227,6 +227,7 @@ def remove_leading_numbers(text):
227
  # Use re.sub to replace any pattern of "<number>." at the beginning of a line.
228
  return re.sub(r'^\d+[.)]\s*', '', text, flags=re.M)
229
 
 
230
  def find_topics_with_llama(index_name, query, matches):
231
  print(f"Found {len(matches)} matches for {query}")
232
  docs_list = []
 
9
 
10
  import csv
11
 
 
 
 
12
  import gradio as gr
13
  from llama_index import ServiceContext, \
14
  Document, GPTListIndex, VectorStoreIndex
15
  from whoosh import fields, index
16
  from whoosh.qparser import QueryParser
17
 
18
+ logging.basicConfig(format='%(asctime)s %(levelname)s:%(message)s', level=os.environ.get("LOGLEVEL", "INFO"))
19
+ THREAD_ID = "thread_id"
20
+
21
  thread_index = {}
22
  comment_index = {}
23
  llama_cache = {}
 
99
  writer = write_ix.writer()
100
 
101
  # Read the CSV file and add documents to the index
102
+ count = 0
103
  with open(f'csv/{data_set}.csv', 'r') as csvfile:
104
  reader = csv.DictReader(csvfile)
105
  for row in reader:
 
115
  comment_index[data_set] = read_ix
116
 
117
 
118
+ def search_keyword_matches(ix, keyword_query):
119
  # Create a query parser
120
  query_parser = QueryParser("content", ix.schema)
121
+ query = query_parser.parse(keyword_query)
122
  return execute_text_search(ix, query)
123
 
124
 
 
227
  # Use re.sub to replace any pattern of "<number>." at the beginning of a line.
228
  return re.sub(r'^\d+[.)]\s*', '', text, flags=re.M)
229
 
230
+
231
  def find_topics_with_llama(index_name, query, matches):
232
  print(f"Found {len(matches)} matches for {query}")
233
  docs_list = []
requirements.txt CHANGED
@@ -10,4 +10,7 @@ slack~=0.0.2
10
  fastapi~=0.100.0
11
  pydantic>=1.10.5,<2
12
 
13
- ratelimit~=2.2.1
 
 
 
 
10
  fastapi~=0.100.0
11
  pydantic>=1.10.5,<2
12
 
13
+ ratelimit~=2.2.1
14
+ Whoosh~=2.7.4
15
+ spacy~=3.6.1
16
+ numpy~=1.25.1