POC for passive monitoring
Browse files- app.py +7 -6
- requirements.txt +4 -1
app.py
CHANGED
@@ -9,15 +9,15 @@ from whoosh.query import Or, Term
|
|
9 |
|
10 |
import csv
|
11 |
|
12 |
-
THREAD_ID = "thread_id"
|
13 |
-
|
14 |
-
logging.basicConfig(format='%(asctime)s %(levelname)s:%(message)s', level=os.environ.get("LOGLEVEL", "INFO"))
|
15 |
import gradio as gr
|
16 |
from llama_index import ServiceContext, \
|
17 |
Document, GPTListIndex, VectorStoreIndex
|
18 |
from whoosh import fields, index
|
19 |
from whoosh.qparser import QueryParser
|
20 |
|
|
|
|
|
|
|
21 |
thread_index = {}
|
22 |
comment_index = {}
|
23 |
llama_cache = {}
|
@@ -99,7 +99,7 @@ def create_comment_index(data_set):
|
|
99 |
writer = write_ix.writer()
|
100 |
|
101 |
# Read the CSV file and add documents to the index
|
102 |
-
count= 0
|
103 |
with open(f'csv/{data_set}.csv', 'r') as csvfile:
|
104 |
reader = csv.DictReader(csvfile)
|
105 |
for row in reader:
|
@@ -115,10 +115,10 @@ def create_comment_index(data_set):
|
|
115 |
comment_index[data_set] = read_ix
|
116 |
|
117 |
|
118 |
-
def search_keyword_matches(ix,
|
119 |
# Create a query parser
|
120 |
query_parser = QueryParser("content", ix.schema)
|
121 |
-
query = query_parser.parse(
|
122 |
return execute_text_search(ix, query)
|
123 |
|
124 |
|
@@ -227,6 +227,7 @@ def remove_leading_numbers(text):
|
|
227 |
# Use re.sub to replace any pattern of "<number>." at the beginning of a line.
|
228 |
return re.sub(r'^\d+[.)]\s*', '', text, flags=re.M)
|
229 |
|
|
|
230 |
def find_topics_with_llama(index_name, query, matches):
|
231 |
print(f"Found {len(matches)} matches for {query}")
|
232 |
docs_list = []
|
|
|
9 |
|
10 |
import csv
|
11 |
|
|
|
|
|
|
|
12 |
import gradio as gr
|
13 |
from llama_index import ServiceContext, \
|
14 |
Document, GPTListIndex, VectorStoreIndex
|
15 |
from whoosh import fields, index
|
16 |
from whoosh.qparser import QueryParser
|
17 |
|
18 |
+
logging.basicConfig(format='%(asctime)s %(levelname)s:%(message)s', level=os.environ.get("LOGLEVEL", "INFO"))
|
19 |
+
THREAD_ID = "thread_id"
|
20 |
+
|
21 |
thread_index = {}
|
22 |
comment_index = {}
|
23 |
llama_cache = {}
|
|
|
99 |
writer = write_ix.writer()
|
100 |
|
101 |
# Read the CSV file and add documents to the index
|
102 |
+
count = 0
|
103 |
with open(f'csv/{data_set}.csv', 'r') as csvfile:
|
104 |
reader = csv.DictReader(csvfile)
|
105 |
for row in reader:
|
|
|
115 |
comment_index[data_set] = read_ix
|
116 |
|
117 |
|
118 |
+
def search_keyword_matches(ix, keyword_query):
|
119 |
# Create a query parser
|
120 |
query_parser = QueryParser("content", ix.schema)
|
121 |
+
query = query_parser.parse(keyword_query)
|
122 |
return execute_text_search(ix, query)
|
123 |
|
124 |
|
|
|
227 |
# Use re.sub to replace any pattern of "<number>." at the beginning of a line.
|
228 |
return re.sub(r'^\d+[.)]\s*', '', text, flags=re.M)
|
229 |
|
230 |
+
|
231 |
def find_topics_with_llama(index_name, query, matches):
|
232 |
print(f"Found {len(matches)} matches for {query}")
|
233 |
docs_list = []
|
requirements.txt
CHANGED
@@ -10,4 +10,7 @@ slack~=0.0.2
|
|
10 |
fastapi~=0.100.0
|
11 |
pydantic>=1.10.5,<2
|
12 |
|
13 |
-
ratelimit~=2.2.1
|
|
|
|
|
|
|
|
10 |
fastapi~=0.100.0
|
11 |
pydantic>=1.10.5,<2
|
12 |
|
13 |
+
ratelimit~=2.2.1
|
14 |
+
Whoosh~=2.7.4
|
15 |
+
spacy~=3.6.1
|
16 |
+
numpy~=1.25.1
|