adrien.aribaut-gaudin commited on
Commit
8e58322
1 Parent(s): 3ca15d8

fix: gitignore for the database folder + prompt for requirements + 3 blocks max for best_sources

Browse files
Files changed (2) hide show
  1. .gitignore +2 -1
  2. src/control/controller.py +6 -4
.gitignore CHANGED
@@ -3,4 +3,5 @@ venv1
3
  test/files_to_test/*
4
  config_key.py
5
  test
6
- .env
 
 
3
  test/files_to_test/*
4
  config_key.py
5
  test
6
+ .env
7
+ database
src/control/controller.py CHANGED
@@ -283,8 +283,9 @@ class Controller:
283
  """
284
  coll_name = "collection_for_docs"
285
  collection = self.client_db.get_or_create_collection(coll_name)
286
- for doc in docs:
287
- self.fill_collection(doc, collection)
 
288
  self.retriever.collection = collection
289
 
290
  def fill_collection(self, doc: Doc, collection: str):
@@ -295,7 +296,7 @@ class Controller:
295
 
296
 
297
  @staticmethod
298
- def _select_best_sources(sources: [Block], delta_1_2=0.15, delta_1_n=0.3, absolute=1.2, alpha=0.9) -> [Block]:
299
  """
300
  Select the best sources: not far from the very best, not far from the last selected, and not too bad per se
301
  """
@@ -311,6 +312,7 @@ class Controller:
311
  absolute *= alpha
312
  else:
313
  break
 
314
  return best_sources
315
 
316
  def generate_response_to_requirements(self):
@@ -324,7 +326,7 @@ class Controller:
324
  while (len(context) > 15000) and i < len(sources_contents):
325
  context = "\n".join(sources_contents[:-i])
326
  i += 1
327
- reponse_exigence = generate_response_to_exigence(exigence = exigence["Exigence"], titre_exigence = exigence["Titre"], context = context)
328
  dict_of_excel_content[dict_of_excel_content.index(exigence)]["Conformité"] = reponse_exigence
329
  dict_of_excel_content[dict_of_excel_content.index(exigence)]["Document"] = best_sources[0].doc
330
  dict_of_excel_content[dict_of_excel_content.index(exigence)]["Paragraphes"] = "; ".join([block.index for block in best_sources])
 
283
  """
284
  coll_name = "collection_for_docs"
285
  collection = self.client_db.get_or_create_collection(coll_name)
286
+ if collection.count() == 0:
287
+ for doc in docs:
288
+ self.fill_collection(doc, collection)
289
  self.retriever.collection = collection
290
 
291
  def fill_collection(self, doc: Doc, collection: str):
 
296
 
297
 
298
  @staticmethod
299
+ def _select_best_sources(sources: [Block], delta_1_2=0.15, delta_1_n=0.3, absolute=1.2, alpha=0.9, max_blocks=3) -> [Block]:
300
  """
301
  Select the best sources: not far from the very best, not far from the last selected, and not too bad per se
302
  """
 
312
  absolute *= alpha
313
  else:
314
  break
315
+ best_sources = sorted(best_sources, key=lambda x: x.distance)[:max_blocks]
316
  return best_sources
317
 
318
  def generate_response_to_requirements(self):
 
326
  while (len(context) > 15000) and i < len(sources_contents):
327
  context = "\n".join(sources_contents[:-i])
328
  i += 1
329
+ reponse_exigence = generate_response_to_exigence(exigence = exigence["Exigence"], titre_exigence = exigence["Titre"], content = context)
330
  dict_of_excel_content[dict_of_excel_content.index(exigence)]["Conformité"] = reponse_exigence
331
  dict_of_excel_content[dict_of_excel_content.index(exigence)]["Document"] = best_sources[0].doc
332
  dict_of_excel_content[dict_of_excel_content.index(exigence)]["Paragraphes"] = "; ".join([block.index for block in best_sources])