Spaces:
Runtime error
Runtime error
Jorge Henao
commited on
Commit
•
47753be
1
Parent(s):
fcc4f87
bug fixes
Browse files- __pycache__/config.cpython-38.pyc +0 -0
- __pycache__/document_quieries.cpython-38.pyc +0 -0
- app.py +1 -3
- document_quieries.py +25 -1
__pycache__/config.cpython-38.pyc
ADDED
Binary file (575 Bytes). View file
|
|
__pycache__/document_quieries.cpython-38.pyc
ADDED
Binary file (3.2 kB). View file
|
|
app.py
CHANGED
@@ -9,7 +9,6 @@ extractive_query = ExtractiveProposalQueries(es_host = Config.es_host, es_index
|
|
9 |
reader_name_or_path = Config.reader_model_name_or_path,
|
10 |
use_gpu = Config.use_gpu)
|
11 |
|
12 |
-
|
13 |
def fake_search(question, retriever_top_k, reader_top_k):
|
14 |
#p1_result = query.search_by_query(query = question, retriever_top_k = retriever_top_k, reader_top_k = reader_top_k, es_index = "informecomisionverdad")
|
15 |
result = []
|
@@ -30,7 +29,6 @@ def search(question, retriever_top_k, reader_top_k):
|
|
30 |
|
31 |
return result
|
32 |
|
33 |
-
|
34 |
if __name__ == "__main__":
|
35 |
# streamlit part starts here with title
|
36 |
title = """
|
@@ -58,7 +56,7 @@ if __name__ == "__main__":
|
|
58 |
# set start time
|
59 |
stt = time.time()
|
60 |
# retrieve top 5 documents
|
61 |
-
results =
|
62 |
# set endtime
|
63 |
ent = time.time()
|
64 |
# measure resulting time
|
|
|
9 |
reader_name_or_path = Config.reader_model_name_or_path,
|
10 |
use_gpu = Config.use_gpu)
|
11 |
|
|
|
12 |
def fake_search(question, retriever_top_k, reader_top_k):
|
13 |
#p1_result = query.search_by_query(query = question, retriever_top_k = retriever_top_k, reader_top_k = reader_top_k, es_index = "informecomisionverdad")
|
14 |
result = []
|
|
|
29 |
|
30 |
return result
|
31 |
|
|
|
32 |
if __name__ == "__main__":
|
33 |
# streamlit part starts here with title
|
34 |
title = """
|
|
|
56 |
# set start time
|
57 |
stt = time.time()
|
58 |
# retrieve top 5 documents
|
59 |
+
results = fake_search(query, retriever_top_k=5, reader_top_k=3)
|
60 |
# set endtime
|
61 |
ent = time.time()
|
62 |
# measure resulting time
|
document_quieries.py
CHANGED
@@ -3,6 +3,9 @@ from haystack.nodes import BM25Retriever, FARMReader
|
|
3 |
from haystack.document_stores import ElasticsearchDocumentStore
|
4 |
from haystack.pipelines import ExtractiveQAPipeline
|
5 |
import certifi
|
|
|
|
|
|
|
6 |
|
7 |
ca_certs=certifi.where()
|
8 |
|
@@ -17,7 +20,8 @@ class ExtractiveProposalQueries(DocumentQueries):
|
|
17 |
def __init__(self, es_host: str, es_index: str, es_user, es_password, reader_name_or_path: str, use_gpu = True) -> None:
|
18 |
reader = FARMReader(model_name_or_path = reader_name_or_path, use_gpu = use_gpu, num_processes=1, context_window_size=200)
|
19 |
self._initialize_pipeline(es_host, es_index, es_user, es_password, reader = reader)
|
20 |
-
|
|
|
21 |
def _initialize_pipeline(self, es_host, es_index, es_user, es_password, reader = None):
|
22 |
if reader is not None:
|
23 |
self.reader = reader
|
@@ -29,8 +33,28 @@ class ExtractiveProposalQueries(DocumentQueries):
|
|
29 |
self.pipe = ExtractiveQAPipeline(self.reader, self.retriever)
|
30 |
|
31 |
def search_by_query(self, query : str, retriever_top_k: int, reader_top_k: int, es_index: str = None) :
|
|
|
32 |
if es_index is not None:
|
33 |
self._initialize_pipeline(self.es_host, es_index, self.es_user, self.es_password)
|
34 |
params = {"Retriever": {"top_k": retriever_top_k}, "Reader": {"top_k": reader_top_k}}
|
35 |
prediction = self.pipe.run( query = query, params = params)
|
36 |
return prediction["answers"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
from haystack.document_stores import ElasticsearchDocumentStore
|
4 |
from haystack.pipelines import ExtractiveQAPipeline
|
5 |
import certifi
|
6 |
+
import datetime
|
7 |
+
import requests
|
8 |
+
from base64 import b64encode
|
9 |
|
10 |
ca_certs=certifi.where()
|
11 |
|
|
|
20 |
def __init__(self, es_host: str, es_index: str, es_user, es_password, reader_name_or_path: str, use_gpu = True) -> None:
|
21 |
reader = FARMReader(model_name_or_path = reader_name_or_path, use_gpu = use_gpu, num_processes=1, context_window_size=200)
|
22 |
self._initialize_pipeline(es_host, es_index, es_user, es_password, reader = reader)
|
23 |
+
self.log = Log(es_host= es_host, es_index="log", es_user = es_user, es_password= es_password)
|
24 |
+
|
25 |
def _initialize_pipeline(self, es_host, es_index, es_user, es_password, reader = None):
|
26 |
if reader is not None:
|
27 |
self.reader = reader
|
|
|
33 |
self.pipe = ExtractiveQAPipeline(self.reader, self.retriever)
|
34 |
|
35 |
def search_by_query(self, query : str, retriever_top_k: int, reader_top_k: int, es_index: str = None) :
|
36 |
+
self.log.write_log(query, "hfspace-informecomision")
|
37 |
if es_index is not None:
|
38 |
self._initialize_pipeline(self.es_host, es_index, self.es_user, self.es_password)
|
39 |
params = {"Retriever": {"top_k": retriever_top_k}, "Reader": {"top_k": reader_top_k}}
|
40 |
prediction = self.pipe.run( query = query, params = params)
|
41 |
return prediction["answers"]
|
42 |
+
|
43 |
+
class Log():
|
44 |
+
|
45 |
+
def __init__(self, es_host: str, es_index: str, es_user, es_password) -> None:
|
46 |
+
self.elastic_endpoint = f"https://{es_host}:443/{es_index}/_doc"
|
47 |
+
self.credentials = b64encode(b"elastic:Xmir2X1f5twb8OJbpjbiIgXt").decode("ascii")
|
48 |
+
self.auth_header = { 'Authorization' : 'Basic %s' % self.credentials }
|
49 |
+
|
50 |
+
def write_log(self, message: str, source: str) -> None:
|
51 |
+
created_date = datetime.datetime.now().strftime('%Y-%m-%dT%H:%M:%SZ')
|
52 |
+
post_data = {
|
53 |
+
"message" : message,
|
54 |
+
"createdDate": {
|
55 |
+
"date" : created_date
|
56 |
+
},
|
57 |
+
"source": source
|
58 |
+
}
|
59 |
+
r = requests.post(self.elastic_endpoint, json = post_data, headers = self.auth_header)
|
60 |
+
print(r.text)
|