adrien.aribaut-gaudin
commited on
Commit
•
8e58322
1
Parent(s):
3ca15d8
fix: gitignore for the database folder + prompt for requirements + 3 blocks max for best_sources
Browse files- .gitignore +2 -1
- src/control/controller.py +6 -4
.gitignore
CHANGED
@@ -3,4 +3,5 @@ venv1
|
|
3 |
test/files_to_test/*
|
4 |
config_key.py
|
5 |
test
|
6 |
-
.env
|
|
|
|
3 |
test/files_to_test/*
|
4 |
config_key.py
|
5 |
test
|
6 |
+
.env
|
7 |
+
database
|
src/control/controller.py
CHANGED
@@ -283,8 +283,9 @@ class Controller:
|
|
283 |
"""
|
284 |
coll_name = "collection_for_docs"
|
285 |
collection = self.client_db.get_or_create_collection(coll_name)
|
286 |
-
|
287 |
-
|
|
|
288 |
self.retriever.collection = collection
|
289 |
|
290 |
def fill_collection(self, doc: Doc, collection: str):
|
@@ -295,7 +296,7 @@ class Controller:
|
|
295 |
|
296 |
|
297 |
@staticmethod
|
298 |
-
def _select_best_sources(sources: [Block], delta_1_2=0.15, delta_1_n=0.3, absolute=1.2, alpha=0.9) -> [Block]:
|
299 |
"""
|
300 |
Select the best sources: not far from the very best, not far from the last selected, and not too bad per se
|
301 |
"""
|
@@ -311,6 +312,7 @@ class Controller:
|
|
311 |
absolute *= alpha
|
312 |
else:
|
313 |
break
|
|
|
314 |
return best_sources
|
315 |
|
316 |
def generate_response_to_requirements(self):
|
@@ -324,7 +326,7 @@ class Controller:
|
|
324 |
while (len(context) > 15000) and i < len(sources_contents):
|
325 |
context = "\n".join(sources_contents[:-i])
|
326 |
i += 1
|
327 |
-
reponse_exigence = generate_response_to_exigence(exigence = exigence["Exigence"], titre_exigence = exigence["Titre"],
|
328 |
dict_of_excel_content[dict_of_excel_content.index(exigence)]["Conformité"] = reponse_exigence
|
329 |
dict_of_excel_content[dict_of_excel_content.index(exigence)]["Document"] = best_sources[0].doc
|
330 |
dict_of_excel_content[dict_of_excel_content.index(exigence)]["Paragraphes"] = "; ".join([block.index for block in best_sources])
|
|
|
283 |
"""
|
284 |
coll_name = "collection_for_docs"
|
285 |
collection = self.client_db.get_or_create_collection(coll_name)
|
286 |
+
if collection.count() == 0:
|
287 |
+
for doc in docs:
|
288 |
+
self.fill_collection(doc, collection)
|
289 |
self.retriever.collection = collection
|
290 |
|
291 |
def fill_collection(self, doc: Doc, collection: str):
|
|
|
296 |
|
297 |
|
298 |
@staticmethod
|
299 |
+
def _select_best_sources(sources: [Block], delta_1_2=0.15, delta_1_n=0.3, absolute=1.2, alpha=0.9, max_blocks=3) -> [Block]:
|
300 |
"""
|
301 |
Select the best sources: not far from the very best, not far from the last selected, and not too bad per se
|
302 |
"""
|
|
|
312 |
absolute *= alpha
|
313 |
else:
|
314 |
break
|
315 |
+
best_sources = sorted(best_sources, key=lambda x: x.distance)[:max_blocks]
|
316 |
return best_sources
|
317 |
|
318 |
def generate_response_to_requirements(self):
|
|
|
326 |
while (len(context) > 15000) and i < len(sources_contents):
|
327 |
context = "\n".join(sources_contents[:-i])
|
328 |
i += 1
|
329 |
+
reponse_exigence = generate_response_to_exigence(exigence = exigence["Exigence"], titre_exigence = exigence["Titre"], content = context)
|
330 |
dict_of_excel_content[dict_of_excel_content.index(exigence)]["Conformité"] = reponse_exigence
|
331 |
dict_of_excel_content[dict_of_excel_content.index(exigence)]["Document"] = best_sources[0].doc
|
332 |
dict_of_excel_content[dict_of_excel_content.index(exigence)]["Paragraphes"] = "; ".join([block.index for block in best_sources])
|