Jorge Henao commited on
Commit
3af52d7
1 Parent(s): f41f271

updated index

Browse files
.vscode/launch.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ // Use IntelliSense to learn about possible attributes.
3
+ // Hover to view descriptions of existing attributes.
4
+ // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
5
+ "version": "0.2.0",
6
+ "configurations": [
7
+ {
8
+ "name": "Python: Current File",
9
+ "type": "python",
10
+ "request": "launch",
11
+ "program": "${file}",
12
+ "console": "integratedTerminal",
13
+ "justMyCode": true
14
+ }
15
+ ]
16
+ }
__pycache__/config.cpython-38.pyc CHANGED
Binary files a/__pycache__/config.cpython-38.pyc and b/__pycache__/config.cpython-38.pyc differ
 
__pycache__/pinecode_quieries.cpython-38.pyc CHANGED
Binary files a/__pycache__/pinecode_quieries.cpython-38.pyc and b/__pycache__/pinecode_quieries.cpython-38.pyc differ
 
app_pinecode.py CHANGED
@@ -25,8 +25,8 @@ def search(question, retriever_top_k, reader_top_k):
25
  result = []
26
  for i in range(0, len(query_result)):
27
  item = query_result[i]
28
- result.append([[i+1], item.answer, item.context[:200], item.meta['title'], item.meta['source'], int(item.meta['page'])])
29
- #result.append([[i+1], item.answer, item.context[:200], item.meta['title']])
30
 
31
  return result
32
 
@@ -50,6 +50,8 @@ if __name__ == "__main__":
50
  # Every form must have a submit button.
51
  submitted = st.form_submit_button("Buscar")
52
 
 
 
53
  # on submit we execute search
54
  if(submitted):
55
  # set start time
 
25
  result = []
26
  for i in range(0, len(query_result)):
27
  item = query_result[i]
28
+ #result.append([[i+1], item.answer, item.context[:200], item.meta['title'], item.meta['source'], int(item.meta['page'])])
29
+ result.append([[i+1], item.answer, item.context[:200], item.meta['title']])
30
 
31
  return result
32
 
 
50
  # Every form must have a submit button.
51
  submitted = st.form_submit_button("Buscar")
52
 
53
+ results = search("que es el adres", retriever_top_k=5, reader_top_k=3)
54
+
55
  # on submit we execute search
56
  if(submitted):
57
  # set start time
config.py CHANGED
@@ -2,7 +2,7 @@ class Config():
2
  es_host = "saimon-askwdemocracy.es.us-central1.gcp.cloud.es.io"
3
  es_user = "elastic"
4
  es_password = "53f2a7a9-ea9d-4fd2-a8bc-f471b67f0262"
5
- proposals_index = "props"
6
  reader_model_name_or_path = "deepset/xlm-roberta-base-squad2-distilled"
7
  #reader_model_name_or_path = "deepset/xlm-roberta-base-squad2"
8
  use_gpu = True
 
2
  es_host = "saimon-askwdemocracy.es.us-central1.gcp.cloud.es.io"
3
  es_user = "elastic"
4
  es_password = "53f2a7a9-ea9d-4fd2-a8bc-f471b67f0262"
5
+ proposals_index = "semantic-text-search"
6
  reader_model_name_or_path = "deepset/xlm-roberta-base-squad2-distilled"
7
  #reader_model_name_or_path = "deepset/xlm-roberta-base-squad2"
8
  use_gpu = True
pinecode_quieries.py CHANGED
@@ -34,20 +34,23 @@ class PinecodeProposalQueries(DocumentQueries):
34
  self.document_store = PineconeDocumentStore(
35
  api_key=es_password,
36
  environment = "us-east1-gcp",
37
- index="props",
38
  similarity="cosine",
39
- embedding_dim=768
40
  )
41
  #self.retriever = BM25Retriever(document_store = self.document_store)
42
  self.retriever = EmbeddingRetriever(
43
  document_store=self.document_store,
44
- embedding_model="multi-qa-distilbert-dot-v1",
 
45
  model_format="sentence_transformers"
46
  )
47
- #self.document_store.update_embeddings(self.retriever, batch_size=16)
48
  self.pipe = ExtractiveQAPipeline(self.reader, self.retriever)
49
 
50
  def search_by_query(self, query : str, retriever_top_k: int, reader_top_k: int, es_index: str = None) :
 
 
51
  #self.log.write_log(query, "hfspace-informecomision")
52
  #if es_index is not None:
53
  #self._initialize_pipeline(self.es_host, es_index, self.es_user, self.es_password)
 
34
  self.document_store = PineconeDocumentStore(
35
  api_key=es_password,
36
  environment = "us-east1-gcp",
37
+ index="semantic-text-search",
38
  similarity="cosine",
39
+ embedding_dim=384
40
  )
41
  #self.retriever = BM25Retriever(document_store = self.document_store)
42
  self.retriever = EmbeddingRetriever(
43
  document_store=self.document_store,
44
+ #embedding_model="multi-qa-distilbert-dot-v1",
45
+ embedding_model = "sentence-transformers/msmarco-MiniLM-L6-cos-v5",
46
  model_format="sentence_transformers"
47
  )
48
+ #self.document_store.update_embeddings(self.retriever, update_existing_embeddings=False)
49
  self.pipe = ExtractiveQAPipeline(self.reader, self.retriever)
50
 
51
  def search_by_query(self, query : str, retriever_top_k: int, reader_top_k: int, es_index: str = None) :
52
+ #self.document_store.update_embeddings(self.retriever, update_existing_embeddings=False)
53
+
54
  #self.log.write_log(query, "hfspace-informecomision")
55
  #if es_index is not None:
56
  #self._initialize_pipeline(self.es_host, es_index, self.es_user, self.es_password)
pinecone_document_store.db ADDED
Binary file (73.7 kB). View file