diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..a0239559b45f694363c1c3a68a2f25c564691276
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,4 @@
+__pycache__
+venv1
+temp/generated_files/*
+test/files_to_test/*
\ No newline at end of file
diff --git a/README.md b/README.md
index 601c06648e39c764fc69a56cf494c0b03bfea7a1..fea96e99bb2ff0d53030a03b57a567d4927fc970 100644
--- a/README.md
+++ b/README.md
@@ -1,12 +1,13 @@
---
-title: Gdoc
-emoji: âĄ
-colorFrom: indigo
-colorTo: blue
+title: FormatDoc
+emoji: đ
+colorFrom: blue
+colorTo: red
sdk: gradio
-sdk_version: 3.45.2
+sdk_version: 3.34.0
app_file: app.py
pinned: false
+license: eupl-1.1
---
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
diff --git a/app.py b/app.py
new file mode 100644
index 0000000000000000000000000000000000000000..0e149118d0676e2a017d0c0078b18574f25dc643
--- /dev/null
+++ b/app.py
@@ -0,0 +1,24 @@
+import os
+from langchain.llms import OpenAI
+# from transformers import AutoTokenizer, AutoModelForCausalLM
+from config import config
+from src.control.controller import Controller
+import src.view.view as view
+
+os.environ["TOKENIZERS_PARALLELISM"] = "true"
+
+if not "OPENAI_API_KEY" in os.environ:
+ from config_key import OPENAI_API_KEY
+
+ os.environ['OPENAI_API_KEY'] = OPENAI_API_KEY
+
+# tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-hf")
+
+open_ai_model = OpenAI(temperature=0)
+# llama_model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-hf")
+llm = open_ai_model
+
+ctrl = Controller(config)
+app = view.run(controller=ctrl, config=config)
+
+app.queue().launch()
diff --git a/config.py b/config.py
new file mode 100644
index 0000000000000000000000000000000000000000..7c792060677e618ae2fc04f9a4d3132d2d046194
--- /dev/null
+++ b/config.py
@@ -0,0 +1,28 @@
+import os
+
+config = {
+ 'templates_path': 'data/templates',
+ 'these_docs_path': 'data/examples/',
+ 'new_docs_path': 'data/examples/',
+ 'default_template_index': 0,
+ 'styled_docs_path': 'temp/styles_files',
+ 'generated_docs_path': 'temp/generated_files',
+ 'options': ["Recentrer les tableaux", "Recentrer les images (sauf les flottantes)", "Ajouter le template avant", "Justifier le texte"],
+ 'max_styles': 300,
+ 'log_msg': {
+ 'options_applied': 'Les options suivantes ont été appliquées : \n',
+ 'suppressed_styles': 'Les styles suivants ont été supprimés : \n',
+ 'modified_styles': 'Les styles suivants ont été modifiés : \n',
+ 'added_styles': 'Les styles suivants ont été ajoutés :\n',
+ 'modified_style': ' - ',
+ 'color': ' la couleur,',
+ 'font size': ' la taille de la fonte,',
+ 'font': ' la fonte,',
+ 'all_caps': ' les majuscules,',
+ 'bold': 'le caractĂšre gras',
+ 'document': '\n============================\n Sur le document : ',
+ },
+}
+
+templates = [t for t in os.listdir(config['templates_path']) if t.endswith((".docx",))]
+config.update({'templates': templates})
diff --git a/config_key.py b/config_key.py
new file mode 100644
index 0000000000000000000000000000000000000000..9066512c319cc8c527be017a94174cf494fd35fb
--- /dev/null
+++ b/config_key.py
@@ -0,0 +1 @@
+OPENAI_API_KEY = "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"
diff --git a/data/doc.xml b/data/doc.xml
new file mode 100644
index 0000000000000000000000000000000000000000..162764c458598c24e073f5292bb8d20657c12c04
--- /dev/null
+++ b/data/doc.xml
@@ -0,0 +1,46 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Réponse à
+
+
+
+
+
+
+
+
+
+ Nom du Client
+
+
+
+
+
+
+
+
+
+
+
+ pour leStyle pour cette page seulement (non recopié en en-tête)Nom du projetDate de remiseJJ/MM/AAAAStrictement confidentielrightbottom00clause de confidentialitéToute information contenue dans ce document strictement confidentiel est fournie à STYLEREF .CompanyName \\* MERGEFORMAT Nom du Client dans le seul but de répondre à ses demandes et ne peut être utilisée à d’autres fins. STYLEREF .CompanyName \\* MERGEFORMAT Nom du Client s’engage à ne pas publier ni faire connaître tout ou partie de ces informations à quelque tierce partie que ce soit sans l’autorisation préalable d’Orange.© copyright 2018Tous droits réservésvotre contactNom :Titre :Email :@orange.comTél :Mobile :Adresse :Site Web :http://www.orange-business.comTable des matières TOC \\o "1-3" \\h \\z \\u Aucune entrée de table des matières n\'a été trouvée.Liste des tableaux TOC \\h \\z \\c "Tableau" Aucune entrée de table d\'illustration n\'a été trouvée.Liste des figures TOC \\h \\z \\c "Figure" Aucune entrée de table d\'illustration n\'a été trouvée.CccQsddDsbvbvnFezjfzJzekkfjk Nf nvf z,v$
\ No newline at end of file
diff --git a/data/examples/AldoMoro.docx b/data/examples/AldoMoro.docx
new file mode 100644
index 0000000000000000000000000000000000000000..ceef84680b54bf33053892157530b58487042635
Binary files /dev/null and b/data/examples/AldoMoro.docx differ
diff --git a/data/examples/Aldo_Moro_simple.docx b/data/examples/Aldo_Moro_simple.docx
new file mode 100644
index 0000000000000000000000000000000000000000..fda7cf9bc2fb202cb1fa5d6b7b37540384f6f7eb
Binary files /dev/null and b/data/examples/Aldo_Moro_simple.docx differ
diff --git a/data/examples/Aldo_Moro_simple_rouge.docx b/data/examples/Aldo_Moro_simple_rouge.docx
new file mode 100644
index 0000000000000000000000000000000000000000..84d686410baefb07c465378c3dbb707d7d60f1d4
Binary files /dev/null and b/data/examples/Aldo_Moro_simple_rouge.docx differ
diff --git a/data/examples/Aldo_Moro_simple_style.docx b/data/examples/Aldo_Moro_simple_style.docx
new file mode 100644
index 0000000000000000000000000000000000000000..a763466ba6b53e84de1f076092f4ebdd66e6cc53
Binary files /dev/null and b/data/examples/Aldo_Moro_simple_style.docx differ
diff --git a/data/examples/Aldo_Moro_simple_vert.docx b/data/examples/Aldo_Moro_simple_vert.docx
new file mode 100644
index 0000000000000000000000000000000000000000..726e56b9a747efca12e490c1b0af329a35954085
Binary files /dev/null and b/data/examples/Aldo_Moro_simple_vert.docx differ
diff --git a/data/examples/CorpTemplate.docx b/data/examples/CorpTemplate.docx
new file mode 100644
index 0000000000000000000000000000000000000000..8dd942a03290cfa719726d6891a1cefe55554111
Binary files /dev/null and b/data/examples/CorpTemplate.docx differ
diff --git a/data/examples/CorpTemplate_.docx b/data/examples/CorpTemplate_.docx
new file mode 100644
index 0000000000000000000000000000000000000000..3506e2577b9a86f84e93c488f982a07abd4547e5
Binary files /dev/null and b/data/examples/CorpTemplate_.docx differ
diff --git a/data/examples/Tests/Center_image+table_front.docx b/data/examples/Tests/Center_image+table_front.docx
new file mode 100644
index 0000000000000000000000000000000000000000..1e30545e3b7427249e2cbce90c2b7b7ac9238d18
Binary files /dev/null and b/data/examples/Tests/Center_image+table_front.docx differ
diff --git a/data/examples/Tests/Center_image+table_no_front.docx b/data/examples/Tests/Center_image+table_no_front.docx
new file mode 100644
index 0000000000000000000000000000000000000000..984763c131c7ee707e6378fc09daaa85662280f8
Binary files /dev/null and b/data/examples/Tests/Center_image+table_no_front.docx differ
diff --git a/data/examples/Tests/Centering_no_front_pages.docx b/data/examples/Tests/Centering_no_front_pages.docx
new file mode 100644
index 0000000000000000000000000000000000000000..641cd71cd7a835c0b6b96e1213ae7877c60306ba
Binary files /dev/null and b/data/examples/Tests/Centering_no_front_pages.docx differ
diff --git a/data/examples/Tests/Hard_styles_to_modify_front.docx b/data/examples/Tests/Hard_styles_to_modify_front.docx
new file mode 100644
index 0000000000000000000000000000000000000000..0bc0ea9888f4b9f351b3dbdfff1a92fc853e3ff7
Binary files /dev/null and b/data/examples/Tests/Hard_styles_to_modify_front.docx differ
diff --git a/data/examples/Tests/MULTIPLE_IMAGES_ONE_RUN.docx b/data/examples/Tests/MULTIPLE_IMAGES_ONE_RUN.docx
new file mode 100644
index 0000000000000000000000000000000000000000..f148cd8ddb3bbec101629ed282c70ffffd6b9a50
Binary files /dev/null and b/data/examples/Tests/MULTIPLE_IMAGES_ONE_RUN.docx differ
diff --git a/data/examples/Tests/Real_possible_example_front_1.docx b/data/examples/Tests/Real_possible_example_front_1.docx
new file mode 100644
index 0000000000000000000000000000000000000000..4f05c189417301be069dbbd583f67fd280e61c34
Binary files /dev/null and b/data/examples/Tests/Real_possible_example_front_1.docx differ
diff --git a/data/examples/Tests/TEST FLOATING IMAGE.docx b/data/examples/Tests/TEST FLOATING IMAGE.docx
new file mode 100644
index 0000000000000000000000000000000000000000..5af4f2b649fc03e9f7d19975c7b4db1c04d2762d
Binary files /dev/null and b/data/examples/Tests/TEST FLOATING IMAGE.docx differ
diff --git a/data/examples/Tests/Test_generation_musique.docx b/data/examples/Tests/Test_generation_musique.docx
new file mode 100644
index 0000000000000000000000000000000000000000..0588dac90cf30ae1d230acc4ea29efd3fb1f27af
Binary files /dev/null and b/data/examples/Tests/Test_generation_musique.docx differ
diff --git a/data/examples/Tests/Test_image.docx b/data/examples/Tests/Test_image.docx
new file mode 100644
index 0000000000000000000000000000000000000000..30c865d19d0860f0dc322b64550c4580f14a5ef3
Binary files /dev/null and b/data/examples/Tests/Test_image.docx differ
diff --git a/data/examples/[Content_Types].xml b/data/examples/[Content_Types].xml
new file mode 100644
index 0000000000000000000000000000000000000000..4b35629217f39629a1d0880be54361e325debc0a
--- /dev/null
+++ b/data/examples/[Content_Types].xml
@@ -0,0 +1,2 @@
+
+
\ No newline at end of file
diff --git a/data/examples/_rels/.rels b/data/examples/_rels/.rels
new file mode 100644
index 0000000000000000000000000000000000000000..33f70fb26e29067a20c2b4489c1da313dffcab67
--- /dev/null
+++ b/data/examples/_rels/.rels
@@ -0,0 +1,2 @@
+
+
\ No newline at end of file
diff --git a/data/examples/corpd/[Content_Types].xml b/data/examples/corpd/[Content_Types].xml
new file mode 100644
index 0000000000000000000000000000000000000000..c8c6a1c8b13f60d272cd939912d30f59a5f84cb1
--- /dev/null
+++ b/data/examples/corpd/[Content_Types].xml
@@ -0,0 +1,2 @@
+
+
\ No newline at end of file
diff --git a/data/examples/corpd/_rels/.rels b/data/examples/corpd/_rels/.rels
new file mode 100644
index 0000000000000000000000000000000000000000..57be32812b37eeedd2ff3322519e4fbf716a6003
--- /dev/null
+++ b/data/examples/corpd/_rels/.rels
@@ -0,0 +1,2 @@
+
+
\ No newline at end of file
diff --git a/data/examples/corpd/customXml/_rels/item1.xml.rels b/data/examples/corpd/customXml/_rels/item1.xml.rels
new file mode 100644
index 0000000000000000000000000000000000000000..b4bc8d63c3905c71ab20bc61e63546740e2710a6
--- /dev/null
+++ b/data/examples/corpd/customXml/_rels/item1.xml.rels
@@ -0,0 +1,2 @@
+
+
\ No newline at end of file
diff --git a/data/examples/corpd/customXml/_rels/item2.xml.rels b/data/examples/corpd/customXml/_rels/item2.xml.rels
new file mode 100644
index 0000000000000000000000000000000000000000..49b7c9474cb432ed52a61f222cb648ca93e85289
--- /dev/null
+++ b/data/examples/corpd/customXml/_rels/item2.xml.rels
@@ -0,0 +1,2 @@
+
+
\ No newline at end of file
diff --git a/data/examples/corpd/customXml/_rels/item3.xml.rels b/data/examples/corpd/customXml/_rels/item3.xml.rels
new file mode 100644
index 0000000000000000000000000000000000000000..0bdc1ef975e759bdfe68409d4bb27734233257e4
--- /dev/null
+++ b/data/examples/corpd/customXml/_rels/item3.xml.rels
@@ -0,0 +1,2 @@
+
+
\ No newline at end of file
diff --git a/data/examples/corpd/customXml/_rels/item4.xml.rels b/data/examples/corpd/customXml/_rels/item4.xml.rels
new file mode 100644
index 0000000000000000000000000000000000000000..cdb040627942f2f253904511b4ca7e3748e0ca9d
--- /dev/null
+++ b/data/examples/corpd/customXml/_rels/item4.xml.rels
@@ -0,0 +1,2 @@
+
+
\ No newline at end of file
diff --git a/data/examples/corpd/customXml/item1.xml b/data/examples/corpd/customXml/item1.xml
new file mode 100644
index 0000000000000000000000000000000000000000..607faca2f583f4a566c4b899e71705ed41870c3e
--- /dev/null
+++ b/data/examples/corpd/customXml/item1.xml
@@ -0,0 +1 @@
+DocumentLibraryFormDocumentLibraryFormDocumentLibraryForm
\ No newline at end of file
diff --git a/data/examples/corpd/customXml/item2.xml b/data/examples/corpd/customXml/item2.xml
new file mode 100644
index 0000000000000000000000000000000000000000..8c4dde85efd7fc64dd45da7a8e0a5566af41ed58
--- /dev/null
+++ b/data/examples/corpd/customXml/item2.xml
@@ -0,0 +1,111 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ This value indicates the number of saves or revisions. The application is responsible for updating this value after each revision.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/data/examples/corpd/customXml/item3.xml b/data/examples/corpd/customXml/item3.xml
new file mode 100644
index 0000000000000000000000000000000000000000..c8543fbf7d8eba5d1a7a3bde3cca4349afb73687
--- /dev/null
+++ b/data/examples/corpd/customXml/item3.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/data/examples/corpd/customXml/item4.xml b/data/examples/corpd/customXml/item4.xml
new file mode 100644
index 0000000000000000000000000000000000000000..a2f9e90374eb0c0c98a6fe3a256800eb96fb3e43
--- /dev/null
+++ b/data/examples/corpd/customXml/item4.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/data/examples/corpd/customXml/itemProps1.xml b/data/examples/corpd/customXml/itemProps1.xml
new file mode 100644
index 0000000000000000000000000000000000000000..e9a990bc2360b07c29b63ab079470dec8529e82a
--- /dev/null
+++ b/data/examples/corpd/customXml/itemProps1.xml
@@ -0,0 +1,2 @@
+
+
\ No newline at end of file
diff --git a/data/examples/corpd/customXml/itemProps2.xml b/data/examples/corpd/customXml/itemProps2.xml
new file mode 100644
index 0000000000000000000000000000000000000000..b446d876dab26363da2a8e91c272bd0f0e3bcfb6
--- /dev/null
+++ b/data/examples/corpd/customXml/itemProps2.xml
@@ -0,0 +1,2 @@
+
+
\ No newline at end of file
diff --git a/data/examples/corpd/customXml/itemProps3.xml b/data/examples/corpd/customXml/itemProps3.xml
new file mode 100644
index 0000000000000000000000000000000000000000..c0201c270758a974dc5f8cdae2be3cf635d413ce
--- /dev/null
+++ b/data/examples/corpd/customXml/itemProps3.xml
@@ -0,0 +1,2 @@
+
+
\ No newline at end of file
diff --git a/data/examples/corpd/customXml/itemProps4.xml b/data/examples/corpd/customXml/itemProps4.xml
new file mode 100644
index 0000000000000000000000000000000000000000..b94b08d6cc5c54037c32433394a75534c20764d7
--- /dev/null
+++ b/data/examples/corpd/customXml/itemProps4.xml
@@ -0,0 +1,2 @@
+
+
\ No newline at end of file
diff --git a/data/examples/corpd/docProps/app.xml b/data/examples/corpd/docProps/app.xml
new file mode 100644
index 0000000000000000000000000000000000000000..c464eef06ecbd203a18cf34dcf59fb2ea79151b6
--- /dev/null
+++ b/data/examples/corpd/docProps/app.xml
@@ -0,0 +1,2 @@
+
+Corporate Template Fr.dotx15175967Microsoft Office Word082falseTitre1Title1ORANGE FT Groupfalse1140falsefalse16.0000
\ No newline at end of file
diff --git a/data/examples/corpd/docProps/core.xml b/data/examples/corpd/docProps/core.xml
new file mode 100644
index 0000000000000000000000000000000000000000..c0f78d86a0ab2522083d0884a8a954cdb6021325
--- /dev/null
+++ b/data/examples/corpd/docProps/core.xml
@@ -0,0 +1,2 @@
+
+Microsoft Office Userlaura peligry22023-07-07T10:21:00Z2023-07-07T10:21:00Z
\ No newline at end of file
diff --git a/data/examples/corpd/docProps/custom.xml b/data/examples/corpd/docProps/custom.xml
new file mode 100644
index 0000000000000000000000000000000000000000..62a06e8d2f7d68b0eb35cb9f6e591b1bb63c0708
--- /dev/null
+++ b/data/examples/corpd/docProps/custom.xml
@@ -0,0 +1,2 @@
+
+0x0101000D129E6A83B3234C936C6D85FE6AF210
\ No newline at end of file
diff --git a/data/examples/corpd/word/_rels/document.xml.rels b/data/examples/corpd/word/_rels/document.xml.rels
new file mode 100644
index 0000000000000000000000000000000000000000..e52e333ef4c1119b2c73745f09496bfb04912c66
--- /dev/null
+++ b/data/examples/corpd/word/_rels/document.xml.rels
@@ -0,0 +1,4 @@
+
+
+
+
\ No newline at end of file
diff --git a/data/examples/corpd/word/_rels/header1.xml.rels b/data/examples/corpd/word/_rels/header1.xml.rels
new file mode 100644
index 0000000000000000000000000000000000000000..27811d643015603616c33c50d05f8237ae83426d
--- /dev/null
+++ b/data/examples/corpd/word/_rels/header1.xml.rels
@@ -0,0 +1,2 @@
+
+
\ No newline at end of file
diff --git a/data/examples/corpd/word/_rels/header2.xml.rels b/data/examples/corpd/word/_rels/header2.xml.rels
new file mode 100644
index 0000000000000000000000000000000000000000..408fe050e0eb63404b52fb4734985f883167c30e
--- /dev/null
+++ b/data/examples/corpd/word/_rels/header2.xml.rels
@@ -0,0 +1,2 @@
+
+
\ No newline at end of file
diff --git a/data/examples/corpd/word/_rels/header3.xml.rels b/data/examples/corpd/word/_rels/header3.xml.rels
new file mode 100644
index 0000000000000000000000000000000000000000..408fe050e0eb63404b52fb4734985f883167c30e
--- /dev/null
+++ b/data/examples/corpd/word/_rels/header3.xml.rels
@@ -0,0 +1,2 @@
+
+
\ No newline at end of file
diff --git a/data/examples/corpd/word/_rels/header4.xml.rels b/data/examples/corpd/word/_rels/header4.xml.rels
new file mode 100644
index 0000000000000000000000000000000000000000..2c31216f150550a59258ca158a0e89b83a0925d4
--- /dev/null
+++ b/data/examples/corpd/word/_rels/header4.xml.rels
@@ -0,0 +1,2 @@
+
+
\ No newline at end of file
diff --git a/data/examples/corpd/word/_rels/header5.xml.rels b/data/examples/corpd/word/_rels/header5.xml.rels
new file mode 100644
index 0000000000000000000000000000000000000000..408fe050e0eb63404b52fb4734985f883167c30e
--- /dev/null
+++ b/data/examples/corpd/word/_rels/header5.xml.rels
@@ -0,0 +1,2 @@
+
+
\ No newline at end of file
diff --git a/data/examples/corpd/word/_rels/header6.xml.rels b/data/examples/corpd/word/_rels/header6.xml.rels
new file mode 100644
index 0000000000000000000000000000000000000000..5820cdd45108ec7557e5ded551273cbac0220891
--- /dev/null
+++ b/data/examples/corpd/word/_rels/header6.xml.rels
@@ -0,0 +1,2 @@
+
+
\ No newline at end of file
diff --git a/data/examples/corpd/word/_rels/header7.xml.rels b/data/examples/corpd/word/_rels/header7.xml.rels
new file mode 100644
index 0000000000000000000000000000000000000000..408fe050e0eb63404b52fb4734985f883167c30e
--- /dev/null
+++ b/data/examples/corpd/word/_rels/header7.xml.rels
@@ -0,0 +1,2 @@
+
+
\ No newline at end of file
diff --git a/data/examples/corpd/word/_rels/settings.xml.rels b/data/examples/corpd/word/_rels/settings.xml.rels
new file mode 100644
index 0000000000000000000000000000000000000000..93568428252d86b8394e0e6c22596ac2185dafd4
--- /dev/null
+++ b/data/examples/corpd/word/_rels/settings.xml.rels
@@ -0,0 +1,2 @@
+
+
\ No newline at end of file
diff --git a/data/examples/corpd/word/document.xml b/data/examples/corpd/word/document.xml
new file mode 100644
index 0000000000000000000000000000000000000000..049d987d5919d53eabd0af6ba63059cc96ffd179
--- /dev/null
+++ b/data/examples/corpd/word/document.xml
@@ -0,0 +1,2 @@
+
+RĂ©ponse Ă Nom du Clientpour leStyle pour cette page seulement (non recopiĂ© en en-tĂȘte)Nom du projetDate de remiseJJ/MM/AAAAStrictement confidentielrightbottom00clause de confidentialitĂ©Toute information contenue dans ce document strictement confidentiel est fournie Ă STYLEREF .CompanyName \* MERGEFORMAT Nom du Client dans le seul but de rĂ©pondre Ă ses demandes et ne peut ĂȘtre utilisĂ©e Ă dâautres fins. STYLEREF .CompanyName \* MERGEFORMAT Nom du Client sâengage Ă ne pas publier ni faire connaĂźtre tout ou partie de ces informations Ă quelque tierce partie que ce soit sans lâautorisation prĂ©alable dâOrange.© copyright 2018Tous droits rĂ©servĂ©svotre contactNom :Titre :Email :@orange.comTĂ©l :Mobile :Adresse :Site Web :http://www.orange-business.comTable des matiĂšres TOC \o "1-3" \h \z \u Aucune entrĂ©e de table des matiĂšres n'a Ă©tĂ© trouvĂ©e.Liste des tableaux TOC \h \z \c "Tableau" Aucune entrĂ©e de table d'illustration n'a Ă©tĂ© trouvĂ©e.Liste des figures TOC \h \z \c "Figure" Aucune entrĂ©e de table d'illustration n'a Ă©tĂ© trouvĂ©e.CccQsddDsbvbvnFezjfzJzekkfjk Nf nvf z,v$
\ No newline at end of file
diff --git a/data/examples/corpd/word/endnotes.xml b/data/examples/corpd/word/endnotes.xml
new file mode 100644
index 0000000000000000000000000000000000000000..de29e959874fa3f20127ac60e15ea4da0c178d12
--- /dev/null
+++ b/data/examples/corpd/word/endnotes.xml
@@ -0,0 +1,2 @@
+
+
\ No newline at end of file
diff --git a/data/examples/corpd/word/fontTable.xml b/data/examples/corpd/word/fontTable.xml
new file mode 100644
index 0000000000000000000000000000000000000000..46f12af8b92908f815ce495477d16a7391c1fc04
--- /dev/null
+++ b/data/examples/corpd/word/fontTable.xml
@@ -0,0 +1,2 @@
+
+
\ No newline at end of file
diff --git a/data/examples/corpd/word/footer1.xml b/data/examples/corpd/word/footer1.xml
new file mode 100644
index 0000000000000000000000000000000000000000..a52f7aea4b378fc24a99a6b1730b451ccdd2c7e4
--- /dev/null
+++ b/data/examples/corpd/word/footer1.xml
@@ -0,0 +1,2 @@
+
+Clause de confidentialitéPage PAGE \* Arabic \* MERGEFORMAT 2 sur 4 STYLEREF .Classification \* MERGEFORMAT Strictement confidentiel STYLEREF .DateDue \* MERGEFORMAT JJ/MM/AAAA
\ No newline at end of file
diff --git a/data/examples/corpd/word/footer2.xml b/data/examples/corpd/word/footer2.xml
new file mode 100644
index 0000000000000000000000000000000000000000..f4d1bd44b49157c9f44a8ee2b021fe7755c8cc12
--- /dev/null
+++ b/data/examples/corpd/word/footer2.xml
@@ -0,0 +1,2 @@
+
+Clause de confidentialitéPage PAGE \* Arabic \* MERGEFORMAT 2 sur 5 STYLEREF .Classification \* MERGEFORMAT Strictement confidentielJJ/MM/AAAA
\ No newline at end of file
diff --git a/data/examples/corpd/word/footer3.xml b/data/examples/corpd/word/footer3.xml
new file mode 100644
index 0000000000000000000000000000000000000000..887fd4a417c5d5b02e3a9a9595998a468535552b
--- /dev/null
+++ b/data/examples/corpd/word/footer3.xml
@@ -0,0 +1,2 @@
+
+
\ No newline at end of file
diff --git a/data/examples/corpd/word/footer4.xml b/data/examples/corpd/word/footer4.xml
new file mode 100644
index 0000000000000000000000000000000000000000..b42804bf24934079b10fd432bffb3b77a6b1bbc2
--- /dev/null
+++ b/data/examples/corpd/word/footer4.xml
@@ -0,0 +1,2 @@
+
+Chapitre STYLEREF 1 \n Erreur ! Il n'y a pas de texte répondant à ce style dans ce document.: STYLEREF 1 Erreur ! Il n'y a pas de texte répondant à ce style dans ce document.Page PAGE \* Arabic \* MERGEFORMAT 4 sur NUMPAGES \* Arabic \* MERGEFORMAT 3 STYLEREF .Classification \* MERGEFORMAT Strictement confidentiel STYLEREF .DateDue \* MERGEFORMAT JJ/MM/AAAA
\ No newline at end of file
diff --git a/data/examples/corpd/word/footer5.xml b/data/examples/corpd/word/footer5.xml
new file mode 100644
index 0000000000000000000000000000000000000000..da20c06ed7aeb83a691f78691ae9204382f35b7b
--- /dev/null
+++ b/data/examples/corpd/word/footer5.xml
@@ -0,0 +1,2 @@
+
+Table des matiĂšresPage PAGE \* Arabic \* MERGEFORMAT 3 sur 4 STYLEREF .Classification \* MERGEFORMAT Strictement confidentielJJ/MM/AAAA
\ No newline at end of file
diff --git a/data/examples/corpd/word/footer6.xml b/data/examples/corpd/word/footer6.xml
new file mode 100644
index 0000000000000000000000000000000000000000..d1a3bb3e0c254bbd0fbf24e0692526e7d90657bf
--- /dev/null
+++ b/data/examples/corpd/word/footer6.xml
@@ -0,0 +1,2 @@
+
+Table des matiĂšres STYLEREF .DateDue \* MERGEFORMAT JJ/MM/AAAA STYLEREF .Classification \* MERGEFORMAT Strictement confidentielpage PAGE \* Arabic \* MERGEFORMAT 3 sur NUMPAGES \* Arabic \* MERGEFORMAT 3
\ No newline at end of file
diff --git a/data/examples/corpd/word/footer7.xml b/data/examples/corpd/word/footer7.xml
new file mode 100644
index 0000000000000000000000000000000000000000..3d046deb495185a99d75224be926527984344a16
--- /dev/null
+++ b/data/examples/corpd/word/footer7.xml
@@ -0,0 +1,2 @@
+
+Chapitre STYLEREF .Titre1 \w 1. Â : STYLEREF .Titre1 CccPage PAGE \* Arabic \* MERGEFORMAT 4 sur 4 STYLEREF .Classification \* MERGEFORMAT Strictement confidentielJJ/MM/AAAA
\ No newline at end of file
diff --git a/data/examples/corpd/word/footnotes.xml b/data/examples/corpd/word/footnotes.xml
new file mode 100644
index 0000000000000000000000000000000000000000..e45e1072af4035763047e13be101f4d63aabd2bb
--- /dev/null
+++ b/data/examples/corpd/word/footnotes.xml
@@ -0,0 +1,2 @@
+
+
\ No newline at end of file
diff --git a/data/examples/corpd/word/header1.xml b/data/examples/corpd/word/header1.xml
new file mode 100644
index 0000000000000000000000000000000000000000..020554a46f1e20e5171820586ef94e8ba188a8cc
--- /dev/null
+++ b/data/examples/corpd/word/header1.xml
@@ -0,0 +1,2 @@
+
+ STYLEREF .CompanyName Nom du Client STYLEREF .ProjectName Nom du projet
\ No newline at end of file
diff --git a/data/examples/corpd/word/header2.xml b/data/examples/corpd/word/header2.xml
new file mode 100644
index 0000000000000000000000000000000000000000..84fe067f935e87297de9cfdcb6e6820516c9688d
--- /dev/null
+++ b/data/examples/corpd/word/header2.xml
@@ -0,0 +1,2 @@
+
+28575028575000 STYLEREF .CompanyName Nom du Client STYLEREF .ProjectName Nom du projet
\ No newline at end of file
diff --git a/data/examples/corpd/word/header3.xml b/data/examples/corpd/word/header3.xml
new file mode 100644
index 0000000000000000000000000000000000000000..e283ea0005600736360b74bf1bdc8c0f8f522962
--- /dev/null
+++ b/data/examples/corpd/word/header3.xml
@@ -0,0 +1,2 @@
+
+285750285750
\ No newline at end of file
diff --git a/data/examples/corpd/word/header4.xml b/data/examples/corpd/word/header4.xml
new file mode 100644
index 0000000000000000000000000000000000000000..329f518088ebd93e495940b23b3f780562bf44fa
--- /dev/null
+++ b/data/examples/corpd/word/header4.xml
@@ -0,0 +1,2 @@
+
+ STYLEREF .CompanyName Nom du Client STYLEREF .ProjectName Nom du projet
\ No newline at end of file
diff --git a/data/examples/corpd/word/header5.xml b/data/examples/corpd/word/header5.xml
new file mode 100644
index 0000000000000000000000000000000000000000..0483f3ed332a3f7ce9b5683e2c6b856059033e38
--- /dev/null
+++ b/data/examples/corpd/word/header5.xml
@@ -0,0 +1,2 @@
+
+28575028575000 STYLEREF .CompanyName Nom du Client STYLEREF .ProjectName Nom du projet
\ No newline at end of file
diff --git a/data/examples/corpd/word/header6.xml b/data/examples/corpd/word/header6.xml
new file mode 100644
index 0000000000000000000000000000000000000000..5e0e63b4d37d5f021eea10d6ef84ba682766ee67
--- /dev/null
+++ b/data/examples/corpd/word/header6.xml
@@ -0,0 +1,2 @@
+
+ STYLEREF .CompanyName Nom du Client STYLEREF .ProjectName Nom du projet
\ No newline at end of file
diff --git a/data/examples/corpd/word/header7.xml b/data/examples/corpd/word/header7.xml
new file mode 100644
index 0000000000000000000000000000000000000000..80ef2851a61aa928508cad05479088d9098ce6ea
--- /dev/null
+++ b/data/examples/corpd/word/header7.xml
@@ -0,0 +1,2 @@
+
+28575028575000 STYLEREF .CompanyName Nom du Client STYLEREF .ProjectName Nom du projet
\ No newline at end of file
diff --git a/data/examples/corpd/word/media/image1.jpg b/data/examples/corpd/word/media/image1.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..cdd57f3616f13b7f5a64995262ba99f99c178e70
Binary files /dev/null and b/data/examples/corpd/word/media/image1.jpg differ
diff --git a/data/examples/corpd/word/media/image2.png b/data/examples/corpd/word/media/image2.png
new file mode 100644
index 0000000000000000000000000000000000000000..8173ac6213b34cc908683901a428125d96b6fa81
Binary files /dev/null and b/data/examples/corpd/word/media/image2.png differ
diff --git a/data/examples/corpd/word/media/image3.png b/data/examples/corpd/word/media/image3.png
new file mode 100644
index 0000000000000000000000000000000000000000..89970e258792fb4107578076a63f9648791a5b04
Binary files /dev/null and b/data/examples/corpd/word/media/image3.png differ
diff --git a/data/examples/corpd/word/media/image4.png b/data/examples/corpd/word/media/image4.png
new file mode 100644
index 0000000000000000000000000000000000000000..16274e32eff500fd931f03b60f23b0d4e59f3a8a
Binary files /dev/null and b/data/examples/corpd/word/media/image4.png differ
diff --git a/data/examples/corpd/word/media/image5.jpeg b/data/examples/corpd/word/media/image5.jpeg
new file mode 100644
index 0000000000000000000000000000000000000000..68eae97b8f1dce5907d11dab74b37d4d8084286f
Binary files /dev/null and b/data/examples/corpd/word/media/image5.jpeg differ
diff --git a/data/examples/corpd/word/numbering.xml b/data/examples/corpd/word/numbering.xml
new file mode 100644
index 0000000000000000000000000000000000000000..a7962290212b1c9af955c439296cfe05d621b24c
--- /dev/null
+++ b/data/examples/corpd/word/numbering.xml
@@ -0,0 +1,2 @@
+
+
\ No newline at end of file
diff --git a/data/examples/corpd/word/settings.xml b/data/examples/corpd/word/settings.xml
new file mode 100644
index 0000000000000000000000000000000000000000..ec1ec079cab22fd4892dea29dec9b1d5ccd03261
--- /dev/null
+++ b/data/examples/corpd/word/settings.xml
@@ -0,0 +1,2 @@
+
+
\ No newline at end of file
diff --git a/data/examples/corpd/word/styles.xml b/data/examples/corpd/word/styles.xml
new file mode 100644
index 0000000000000000000000000000000000000000..e0abe63fdebd8f3410c4fc376b9170aa2f8f42c4
--- /dev/null
+++ b/data/examples/corpd/word/styles.xml
@@ -0,0 +1,2 @@
+
+
\ No newline at end of file
diff --git a/data/examples/corpd/word/theme/theme1.xml b/data/examples/corpd/word/theme/theme1.xml
new file mode 100644
index 0000000000000000000000000000000000000000..5d801b6c8939358854ad57e1f0e528826c58b89b
--- /dev/null
+++ b/data/examples/corpd/word/theme/theme1.xml
@@ -0,0 +1,2 @@
+
+
\ No newline at end of file
diff --git a/data/examples/corpd/word/webSettings.xml b/data/examples/corpd/word/webSettings.xml
new file mode 100644
index 0000000000000000000000000000000000000000..92bceda3319c3d17490e271ec16d0a63b9f4b883
--- /dev/null
+++ b/data/examples/corpd/word/webSettings.xml
@@ -0,0 +1,2 @@
+
+
\ No newline at end of file
diff --git a/data/examples/corporate_simple.docx b/data/examples/corporate_simple.docx
new file mode 100644
index 0000000000000000000000000000000000000000..6cf9e77f3b942e2e4bbd936f5e03aa9cf37656f1
Binary files /dev/null and b/data/examples/corporate_simple.docx differ
diff --git a/data/examples/corporate_simple_newBody.docx b/data/examples/corporate_simple_newBody.docx
new file mode 100644
index 0000000000000000000000000000000000000000..d9c31ca18e34469deaab6c13a90066bbcd8f1d84
Binary files /dev/null and b/data/examples/corporate_simple_newBody.docx differ
diff --git a/data/examples/corporate_simple_titre1_modif.docx b/data/examples/corporate_simple_titre1_modif.docx
new file mode 100644
index 0000000000000000000000000000000000000000..d028c28f9e59579d08c01597c646950ca9112741
Binary files /dev/null and b/data/examples/corporate_simple_titre1_modif.docx differ
diff --git a/data/examples/docProps/app.xml b/data/examples/docProps/app.xml
new file mode 100644
index 0000000000000000000000000000000000000000..118dc89eff84ee478d2756570b5d6a43a7ad73be
--- /dev/null
+++ b/data/examples/docProps/app.xml
@@ -0,0 +1,2 @@
+
+Normal.dotm211480Microsoft Office Word011falsefalse93falsefalse16.0000
\ No newline at end of file
diff --git a/data/examples/docProps/core.xml b/data/examples/docProps/core.xml
new file mode 100644
index 0000000000000000000000000000000000000000..1c239f6c345e7e993453a1c296dff336ba2635cf
--- /dev/null
+++ b/data/examples/docProps/core.xml
@@ -0,0 +1,2 @@
+
+laura peligrylaura peligry22023-06-27T07:01:00Z2023-07-06T08:37:00Z
\ No newline at end of file
diff --git a/data/examples/word/_rels/document.xml.rels b/data/examples/word/_rels/document.xml.rels
new file mode 100644
index 0000000000000000000000000000000000000000..c4308925acfd5ae11927e3f6a033c9d6bf8121ea
--- /dev/null
+++ b/data/examples/word/_rels/document.xml.rels
@@ -0,0 +1,2 @@
+
+
\ No newline at end of file
diff --git a/data/examples/word/document.xml b/data/examples/word/document.xml
new file mode 100644
index 0000000000000000000000000000000000000000..939734d238a3f2057720733adb6f27a93f00376e
--- /dev/null
+++ b/data/examples/word/document.xml
@@ -0,0 +1,2 @@
+
+Ceci est un titre 1Et ceci un titre 2Et enfin un titre 3Et lĂ du normalEt lĂ du newBody2
\ No newline at end of file
diff --git a/data/examples/word/fontTable.xml b/data/examples/word/fontTable.xml
new file mode 100644
index 0000000000000000000000000000000000000000..27aba94e9b6fc8884fc775c562ddd6ab261238d0
--- /dev/null
+++ b/data/examples/word/fontTable.xml
@@ -0,0 +1,2 @@
+
+
\ No newline at end of file
diff --git a/data/examples/word/numbering.xml b/data/examples/word/numbering.xml
new file mode 100644
index 0000000000000000000000000000000000000000..75809d495d4dc4de0d903c5be86d162d96c0da1b
--- /dev/null
+++ b/data/examples/word/numbering.xml
@@ -0,0 +1,2 @@
+
+
\ No newline at end of file
diff --git a/data/examples/word/settings.xml b/data/examples/word/settings.xml
new file mode 100644
index 0000000000000000000000000000000000000000..4873b406c40be17303f133d16edd8922d4ffbaf6
--- /dev/null
+++ b/data/examples/word/settings.xml
@@ -0,0 +1,2 @@
+
+
\ No newline at end of file
diff --git a/data/examples/word/styles.xml b/data/examples/word/styles.xml
new file mode 100644
index 0000000000000000000000000000000000000000..2e0eef42b73299cc1081509c824344cc411f14fa
--- /dev/null
+++ b/data/examples/word/styles.xml
@@ -0,0 +1,2 @@
+
+
\ No newline at end of file
diff --git a/data/examples/word/theme/theme1.xml b/data/examples/word/theme/theme1.xml
new file mode 100644
index 0000000000000000000000000000000000000000..27e7bdab38772c5dbbfc1a7dcbc03397953a3430
--- /dev/null
+++ b/data/examples/word/theme/theme1.xml
@@ -0,0 +1,2 @@
+
+
\ No newline at end of file
diff --git a/data/examples/word/webSettings.xml b/data/examples/word/webSettings.xml
new file mode 100644
index 0000000000000000000000000000000000000000..67b7983172279b25fca138d82da55825a377ae7b
--- /dev/null
+++ b/data/examples/word/webSettings.xml
@@ -0,0 +1,2 @@
+
+
\ No newline at end of file
diff --git a/data/examples/word_simple.docx b/data/examples/word_simple.docx
new file mode 100644
index 0000000000000000000000000000000000000000..1395b80baf6ef9db48aad4bb22e68a6c23de7671
Binary files /dev/null and b/data/examples/word_simple.docx differ
diff --git a/data/examples/word_simple_.docx b/data/examples/word_simple_.docx
new file mode 100644
index 0000000000000000000000000000000000000000..914fb8d93b290422a6dd36dba1ebb25bcab00e7e
Binary files /dev/null and b/data/examples/word_simple_.docx differ
diff --git a/data/examples/~$doMoro_simple.docx b/data/examples/~$doMoro_simple.docx
new file mode 100644
index 0000000000000000000000000000000000000000..1215360c558324f168a348f977b46d5a160ee437
Binary files /dev/null and b/data/examples/~$doMoro_simple.docx differ
diff --git a/data/examples/~$rpTemplate .docx b/data/examples/~$rpTemplate .docx
new file mode 100644
index 0000000000000000000000000000000000000000..99076fb99e3f3f132cc4073259a7a7b38ad6581e
Binary files /dev/null and b/data/examples/~$rpTemplate .docx differ
diff --git a/data/images/Villa_anna.jpg b/data/images/Villa_anna.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..e0d21f5dc81fad57a9afa529e6f12d4903ae5c65
Binary files /dev/null and b/data/images/Villa_anna.jpg differ
diff --git a/data/images/kid_meme.webp b/data/images/kid_meme.webp
new file mode 100644
index 0000000000000000000000000000000000000000..774e84eae0967d74c8cf4ce2973ff9e298ceef2e
Binary files /dev/null and b/data/images/kid_meme.webp differ
diff --git a/data/images/orange_logo.jpg b/data/images/orange_logo.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..48501f3cda85a7128174a19aa350a3cd2dfa1533
Binary files /dev/null and b/data/images/orange_logo.jpg differ
diff --git "a/data/templates/Compte rendu de r\303\251union.docx" "b/data/templates/Compte rendu de r\303\251union.docx"
new file mode 100644
index 0000000000000000000000000000000000000000..ee59797f3e9e4d8383ddf28cbf61eb083c31dbe2
Binary files /dev/null and "b/data/templates/Compte rendu de r\303\251union.docx" differ
diff --git a/data/templates/CorpTemplate .docx b/data/templates/CorpTemplate .docx
new file mode 100644
index 0000000000000000000000000000000000000000..d0bfc374b58fda89a817b0161d53516a150f0052
Binary files /dev/null and b/data/templates/CorpTemplate .docx differ
diff --git a/data/templates/Corporate Template Fr.docx b/data/templates/Corporate Template Fr.docx
new file mode 100644
index 0000000000000000000000000000000000000000..7af4fca2abc74ca0cded449cfce9a99a9c587cf4
Binary files /dev/null and b/data/templates/Corporate Template Fr.docx differ
diff --git a/data/templates/Corporate Template Fr.dotx b/data/templates/Corporate Template Fr.dotx
new file mode 100644
index 0000000000000000000000000000000000000000..32359e172915c46177c44d33a5b0a3f33a0201cb
Binary files /dev/null and b/data/templates/Corporate Template Fr.dotx differ
diff --git a/data/templates/Corporate Template Green Fr.docx b/data/templates/Corporate Template Green Fr.docx
new file mode 100644
index 0000000000000000000000000000000000000000..aa82fc7318de953712e3f2ec4bb3e0bdccb4a884
Binary files /dev/null and b/data/templates/Corporate Template Green Fr.docx differ
diff --git a/data/templates/Corporate Template Red Fr.docx b/data/templates/Corporate Template Red Fr.docx
new file mode 100644
index 0000000000000000000000000000000000000000..d9b465aec48f8158bc4c0e644893882981631ff5
Binary files /dev/null and b/data/templates/Corporate Template Red Fr.docx differ
diff --git a/data/templates/~$rporate Template Fr.docx b/data/templates/~$rporate Template Fr.docx
new file mode 100644
index 0000000000000000000000000000000000000000..1215360c558324f168a348f977b46d5a160ee437
Binary files /dev/null and b/data/templates/~$rporate Template Fr.docx differ
diff --git a/data/templates/~$rporate Template Fr.dotx b/data/templates/~$rporate Template Fr.dotx
new file mode 100644
index 0000000000000000000000000000000000000000..1215360c558324f168a348f977b46d5a160ee437
Binary files /dev/null and b/data/templates/~$rporate Template Fr.dotx differ
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e76586d79df5542c5a0a943599d0eb2937a43bdc
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,105 @@
+aiofiles==23.2.1
+aiohttp==3.8.5
+aiosignal==1.3.1
+altair==5.1.1
+annotated-types==0.5.0
+anyio==3.7.1
+async-timeout==4.0.3
+attrs==23.1.0
+backoff==2.2.1
+bcrypt==4.0.1
+beautifulsoup4==4.12.2
+certifi==2023.7.22
+charset-normalizer==3.2.0
+chroma-hnswlib==0.7.2
+chromadb==0.4.8
+click==8.1.7
+coloredlogs==15.0.1
+contourpy==1.1.0
+cycler==0.11.0
+dataclasses-json==0.5.14
+exceptiongroup==1.1.3
+fastapi==0.99.1
+ffmpy==0.3.1
+filelock==3.12.3
+flatbuffers==23.5.26
+fonttools==4.42.1
+frozenlist==1.4.0
+fsspec==2023.9.0
+gradio==3.33.1
+gradio_client==0.5.0
+h11==0.14.0
+httpcore==0.17.3
+httptools==0.6.0
+httpx==0.24.1
+huggingface-hub==0.16.4
+humanfriendly==10.0
+idna==3.4
+importlib-resources==6.0.1
+Jinja2==3.1.2
+jsonschema==4.19.0
+jsonschema-specifications==2023.7.1
+kiwisolver==1.4.5
+langchain==0.0.279
+langsmith==0.0.33
+linkify-it-py==2.0.2
+lxml==4.9.3
+markdown-it-py==2.2.0
+MarkupSafe==2.1.3
+marshmallow==3.20.1
+matplotlib==3.7.2
+mdit-py-plugins==0.3.3
+mdurl==0.1.2
+monotonic==1.6
+mpmath==1.3.0
+multidict==6.0.4
+mypy-extensions==1.0.0
+numexpr==2.8.5
+numpy==1.25.2
+onnxruntime==1.15.1
+openai==0.28.0
+orjson==3.9.5
+overrides==7.4.0
+packaging==23.1
+pandas==2.1.0
+Pillow==10.0.0
+posthog==3.0.2
+protobuf==4.24.2
+pulsar-client==3.3.0
+pydantic==1.10.12
+pydantic_core==2.6.3
+pydub==0.25.1
+Pygments==2.16.1
+pyparsing==3.0.9
+PyPika==0.48.9
+python-dateutil==2.8.2
+python-docx==0.8.11
+python-dotenv==1.0.0
+python-multipart==0.0.6
+pytz==2023.3
+PyYAML==6.0.1
+referencing==0.30.2
+requests==2.31.0
+rpds-py==0.10.0
+semantic-version==2.10.0
+six==1.16.0
+sniffio==1.3.0
+soupsieve==2.5
+SQLAlchemy==2.0.20
+starlette==0.27.0
+sympy==1.12
+tenacity==8.2.3
+tokenizers==0.13.3
+toolz==0.12.0
+tqdm==4.66.1
+typing-inspect==0.9.0
+typing_extensions==4.7.1
+tzdata==2023.3
+uc-micro-py==1.0.2
+urllib3==2.0.4
+uvicorn==0.23.2
+uvloop==0.17.0
+watchfiles==0.20.0
+websockets==11.0.3
+wikipedia==1.4.0
+yarl==1.9.2
diff --git a/src/control/controller.py b/src/control/controller.py
new file mode 100644
index 0000000000000000000000000000000000000000..79f03b1d2d002a41613e758b29c59c0c02e1987c
--- /dev/null
+++ b/src/control/controller.py
@@ -0,0 +1,181 @@
+import asyncio
+import os
+from typing import Dict
+import random
+import datetime
+import string
+
+from src.domain.doc import Doc
+from src.domain.wikidoc import WikiPage
+from src.view.log_msg import create_msg_from
+import src.tools.semantic_db as semantic_db
+from src.tools.wiki import Wiki
+from src.tools.llm_tools import get_wikilist, get_public_paragraph, get_private_paragraph
+from src.tools.semantic_db import add_texts_to_collection, query_collection
+
+
+class Controller:
+
+ def __init__(self, config: Dict):
+ self.templates_path = config['templates_path']
+ self.generated_docs_path = config['generated_docs_path']
+ self.styled_docs_path = config['styled_docs_path']
+ self.new_docs = []
+ self.gen_docs = []
+
+ template_path = config['templates_path'] + '/' + config['templates'][config['default_template_index']]
+ self.default_template = Doc(template_path)
+ self.template = self.default_template
+ self.log = []
+ self.differences = []
+
+ def copy_docs(self, temp_docs: []):
+ get_name = lambda doc: doc.name.split('/')[-1].split('.')[0]
+ doc_names = [get_name(doc) for doc in temp_docs]
+ docs = [Doc(path=doc.name) for doc in temp_docs]
+ style_paths = [f"{self.generated_docs_path}/{dn}_.docx" for dn in doc_names]
+ gen_paths = [f"{self.generated_docs_path}/{dn}_e.docx" for dn in doc_names]
+ for doc, style_path, gen_path in zip(docs, style_paths, gen_paths):
+ new_doc = doc.copy(style_path)
+ self.new_docs.append(new_doc)
+ new_doc.check_document()
+
+ def clear_docs(self):
+ for new_doc in self.new_docs:
+ if os.path.exists(new_doc.path):
+ new_doc.clear()
+ for gen_doc in self.gen_docs:
+ if os.path.exists(gen_doc.path):
+ gen_doc.clear()
+ self.new_docs = []
+ self.gen_docs = []
+ self.log = []
+ path_to_clear = os.path.abspath(self.generated_docs_path)
+ [os.remove(f"{path_to_clear}/{doc}") for doc in os.listdir(path_to_clear)]
+
+ def set_template(self, template_name: str = ""):
+ if not template_name:
+ self.template = self.default_template
+ else:
+ template_path = f"{self.templates_path}/{template_name}"
+ self.template = Doc(template_path)
+
+ def get_difference_with_template(self):
+ self.differences = []
+ for new_doc in self.new_docs:
+ diff_styles = new_doc.get_different_styles_with_template(template=self.template)
+ diff_dicts = [{'doc': new_doc, 'style': s} for s in diff_styles]
+ self.differences += diff_dicts
+ template_styles = [name for name in self.template.styles.names]
+ return self.differences, template_styles
+
+ def map_style(self, this_style_index: int, template_style_name: str):
+ """
+ maps a style from 'this' document into a style from the template
+ """
+ diff_dict = self.differences[this_style_index]
+ doc = diff_dict['doc']
+ this_style_name = diff_dict['style']
+ log = doc.copy_one_style(this_style_name, template_style_name, self.template)
+ self.log.append({doc.name: log})
+
+ def apply_template(self, options_list):
+ for new_doc in self.new_docs:
+ log = new_doc.apply_template(template=self.template, options_list=options_list)
+ if log:
+ self.log.append({new_doc.name: log})
+
+ def reset(self):
+ for new_doc in self.new_docs:
+ new_doc.delete()
+ for gen_doc in self.gen_docs:
+ gen_doc.delete()
+ self.new_docs = []
+ self.gen_docs = []
+
+
+ def get_log(self):
+ msg_log = create_msg_from(self.log, self.new_docs)
+ return msg_log
+
+ """
+ Source Control
+ """
+
+ def get_or_create_collection(self, id_: str) -> str:
+ """
+ generates a new id if needed
+ """
+ if id_ != '-1':
+ return id_
+ else:
+ now = datetime.datetime.now().strftime("%m%d%H%M")
+ letters = string.ascii_lowercase + string.digits
+ id_ = now + '-' + ''.join(random.choice(letters) for _ in range(10))
+ semantic_db.get_or_create_collection(id_)
+ return id_
+
+ async def wiki_fetch(self) -> [str]:
+ """
+ returns the title of the wikipages corresponding to the tasks described in the input text
+ """
+ all_tasks = []
+ for new_doc in self.new_docs:
+ all_tasks += new_doc.tasks
+ async_tasks = [asyncio.create_task(get_wikilist(task)) for task in all_tasks]
+ wiki_lists = await asyncio.gather(*async_tasks)
+ flatten_wiki_list = list(set().union(*[set(w) for w in wiki_lists]))
+ return flatten_wiki_list
+
+ async def wiki_upload_and_store(self, wiki_title: str, collection_name: str):
+ """
+ uploads one wikipage and stores them into the right collection
+ """
+ wikipage = Wiki().fetch(wiki_title)
+ wiki_title = wiki_title
+ if type(wikipage) != str:
+ texts = WikiPage(wikipage.page_content).get_paragraphs()
+ add_texts_to_collection(coll_name=collection_name, texts=texts, file=wiki_title, source='wiki')
+ else:
+ print(wikipage)
+
+ """
+ Generate Control
+ """
+
+
+ async def generate_doc_from_db(self, collection_name: str, from_files: [str]) -> [str]:
+
+ def query_from_task(task):
+ return get_public_paragraph(task)
+
+ async def retrieve_text_and_generate(t, collection_name: str, from_files: [str]):
+ """
+ retreives the texts from the database and generates the documents
+ """
+ # retreive the texts from the database
+ task_query = query_from_task(t)
+ texts = query_collection(coll_name=collection_name, query=task_query, from_files=from_files)
+ task_resolutions = get_private_paragraph(task=t, texts=texts)
+ return task_resolutions
+
+ async def real_doc_generation(new_doc):
+ async_task_resolutions = [asyncio.create_task(retrieve_text_and_generate(t=task, collection_name=collection_name, from_files=from_files))
+ for task in new_doc.tasks]
+ tasks_resolutions = await asyncio.gather(*async_task_resolutions) #A VOIR
+ gen_path = f"{self.generated_docs_path}/{new_doc.name}e.docx"
+ gen_doc = new_doc.copy(gen_path)
+ gen_doc.replace_tasks(tasks_resolutions)
+ gen_doc.save_as_docx()
+ gen_paths.append(gen_doc.path)
+ self.gen_docs.append(gen_doc)
+ return gen_paths
+
+ gen_paths = []
+ gen_paths = await asyncio.gather(*[asyncio.create_task(real_doc_generation(new_doc)) for new_doc in self.new_docs])
+ gen_paths = [path for sublist in gen_paths for path in sublist]
+ return gen_paths
+
+
+ def update_style(self,index,style_to_modify):
+ return self.map_style(index,style_to_modify) if style_to_modify else None
\ No newline at end of file
diff --git a/src/domain/block.py b/src/domain/block.py
new file mode 100644
index 0000000000000000000000000000000000000000..30e611ec389531f86b5e1143cb39382cb77f4a70
--- /dev/null
+++ b/src/domain/block.py
@@ -0,0 +1,49 @@
+class Block:
+ def __init__(self, doc: str = '', title: str = '', content: str = '', content_fr: str = '',
+ index: str = '', rank: int = 0, level: int = 0, distance: float = 99999):
+ self.doc = doc
+ self.title = title
+ self.title_fr = ""
+ self.content = content
+ self.content_fr = content_fr
+ self.specials = []
+ self.index = index
+ self.rank = rank
+ self.level = level
+ self.distance = distance
+
+ def to_dict(self) -> {}:
+ block_dict = {'doc': self.doc,
+ 'title': self.title,
+ 'title_fr': self.title_fr,
+ 'content': self.content,
+ 'content_fr': self.content_fr,
+ 'index': self.index,
+ 'rank': self.rank,
+ 'level': self.level,
+ 'distance': self.distance}
+ for i, s in enumerate(self.specials):
+ special_key = 'special_'+str(i)
+ block_dict[special_key] = s
+ block_dict['specials_len'] = len(self.specials)
+ return block_dict
+
+ def from_dict(self, block_dict: {}):
+ self.doc = block_dict['doc']
+ self.title = block_dict['title']
+ self.title_fr = block_dict['title_fr']
+ self.content = block_dict['content']
+ self.content_fr = block_dict['content_fr']
+ self.index = block_dict['index']
+ self.rank = block_dict['rank']
+ self.level = block_dict['level']
+ self.distance = block_dict['distance']
+ self.specials = []
+ for i in range(block_dict['specials_len']):
+ special_key = 'special_' + str(i)
+ self.specials.append(block_dict[special_key])
+ return self
+
+ @property
+ def distance_str(self) -> str:
+ return format(self.distance, '.2f')
diff --git a/src/domain/container.py b/src/domain/container.py
new file mode 100644
index 0000000000000000000000000000000000000000..a1ae1c1fb2e35d7b956f86216132564fa09dea05
--- /dev/null
+++ b/src/domain/container.py
@@ -0,0 +1,184 @@
+from src.domain.paragraph import Paragraph
+from src.domain.block import Block
+
+INFINITE = 10000
+
+
+class Container:
+
+ def __init__(self, paragraphs: [Paragraph], title: Paragraph = None, level: int = 0, index: [int] = None,
+ father=None, id_=0):
+ if index is None:
+ index = []
+ self.level = level
+ if not self.level:
+ pass
+ self.title = title
+ self.paragraphs = []
+ self.all_paragraphs = paragraphs
+ self.children = []
+ self.index = index
+ self.father = father # if not father, then the container is at the top of the hierarchy
+ self.id_ = int(str(1) + str(father.id_) + str(id_))
+ if paragraphs:
+ self.paragraphs, self.children = self.create_children(paragraphs.copy(), level, index)
+ self.containers = [self]
+ for child in self.children:
+ self.containers += child.containers
+ self.blocks = self.get_blocks()
+ self.normal, self.comment, self.task, _ = self.sort_paragraphs()
+
+ self.one_liner = (self.title.text if self.title else '') + ' ' + self.comment
+ self.root_text = self.one_liner + ' ' + self.normal
+
+
+ @property
+ def text(self):
+ text = ""
+ if self.title:
+ text = "Titre " + str(self.level) + " : " + self.title.text + '\n'
+ for p in self.paragraphs:
+ text += p.text + '\n'
+ for child in self.children:
+ text += child.text
+ return text
+
+ @property
+ def table_of_contents(self):
+ toc = []
+ if self.title:
+ toc += [{str(self.level): self.title.text}]
+ if self.children:
+ for child in self.children:
+ toc += child.table_of_contents
+ return toc
+
+ def move(self, position: int, new_father=None):
+ current_father = self.father # should be added in the domain
+ current_father.children.remove(self)
+
+ self.rank = new_father.rank + 1 if new_father else 0
+ self.father = new_father
+ if position < len(new_father.children):
+ new_father.children.insert(position, self)
+ else:
+ new_father.children.append(self)
+
+ def create_children(self, paragraphs, level, rank) -> ([], []):
+ """
+ creates children containers or directly attached content
+ and returns the list of containers and contents of level+1
+ :return:
+ [Content or Container]
+ """
+ attached_paragraphs = []
+ container_paragraphs = []
+ container_title = None
+ children = []
+ in_children = False
+ level = INFINITE
+ child_id = 0
+
+ while paragraphs:
+ p = paragraphs.pop(0)
+ if not in_children and not p.is_structure:
+ attached_paragraphs.append(p)
+ else:
+ in_children = True
+ if p.is_structure and p.level <= level: # if p is higher or equal in hierarchy
+ if container_paragraphs or container_title:
+ children.append(Container(container_paragraphs, container_title, level, rank, self, child_id))
+ child_id += 1
+ container_paragraphs = []
+ container_title = p
+ level = p.level
+
+ else: # p is strictly lower in hierarchy
+ container_paragraphs.append(p)
+
+ if container_paragraphs or container_title:
+ children.append(Container(container_paragraphs, container_title, level, rank, self, child_id))
+ child_id += 1
+
+ return attached_paragraphs, children
+
+ @property
+ def structure(self):
+
+ self_structure = {str(self.id_): {
+ 'index': str(self.id_),
+ 'canMove': True,
+ 'isFolder': True,
+ 'children': [p.id_ for p in self.paragraphs] + [child.id_ for child in self.children],
+ 'canRename': True,
+ 'data': {},
+ 'level': self.level,
+ 'title': self.title.text if self.title else 'root'
+ }}
+ paragraphs_structure = [p.structure for p in self.paragraphs]
+ structure = [self_structure] + paragraphs_structure
+ for child in self.children:
+ structure += child.structure
+ return structure
+
+ def get_lang(self):
+ """
+ returns the main language of the document
+ :return:
+ """
+
+ def get_structure(self, level=2):
+ """
+ returns the structure of the document
+ :return:
+ """
+
+ def create_embeddings(self):
+ """
+
+ :return:
+ """
+
+ def get_blocks(self):
+ block = Block(level=self.level, index=self.index)
+ if self.title:
+ block.title = self.title.text
+ for p in self.paragraphs:
+ if not p.blank:
+ if p.text.startswith('##### '):
+ special_action = p.text.lstrip('##### ')
+ block.specials.append(special_action)
+ else:
+ block.content += p.text
+ blocks = [block] if block.content or block.specials else []
+ for child in self.children:
+ blocks += child.blocks
+ return blocks
+
+ def get_fulltask(self, doc_one_liner):
+ print(doc_one_liner)
+ siblings_ = self.father.children.copy()
+ index = siblings_.index(self)
+ siblings_before_context = [sibling.one_liner for idx, sibling in enumerate(siblings_) if idx < index]
+ siblings_after_context = [sibling.one_liner for idx, sibling in enumerate(siblings_) if index < idx]
+
+ fulltask = {'description': self.task,
+ 'about': self.one_liner,
+ 'doc_description': doc_one_liner,
+ 'above': self.father.one_liner,
+ 'before': siblings_before_context,
+ 'after': siblings_after_context}
+ return fulltask
+
+ def sort_paragraphs(self) -> (str, str, str, str):
+ mapping = {'normal': '', 'comment': '', 'task': '', 'title': ''}
+ for p in self.paragraphs:
+ mapping[p.type] += ' ' + p.parsed_text
+ return mapping['normal'], mapping['comment'], mapping['task'], mapping['title']
+
+ def get_all_styles_used_in_doc(self):
+ styles = []
+ for p in self.paragraphs:
+ styles.append(p.get_styles_in_paragraph())
+ res = list(set().union(*styles))
+ return res
diff --git a/src/domain/doc.py b/src/domain/doc.py
new file mode 100644
index 0000000000000000000000000000000000000000..f4a427546f7751b97b8a7f74a3fec6bf85dbd569
--- /dev/null
+++ b/src/domain/doc.py
@@ -0,0 +1,216 @@
+from xml.dom.minidom import Element
+import docx
+import zipfile
+
+from src.tools.doc_tools import get_difference_with_template, get_positions, convert_to_png
+from PIL import Image
+from docxcompose.composer import Composer
+from docx import Document as Document_compose
+from docx.enum.table import WD_TABLE_ALIGNMENT
+from src.domain.container import Container
+from src.domain.paragraph import Paragraph
+from src.domain.styles import Styles
+import shutil
+import os
+
+
+class Doc:
+
+ def __init__(self, path='', id_=None):
+
+ self.xdoc = docx.Document(path)
+ self.title = path.split('/')[-1]
+ self.name = self.title.split('.')[0]
+ self.id_ = id(self)
+ self.path = path
+ paragraphs = [Paragraph(xp, self.id_, i) for (i, xp) in enumerate(self.xdoc.paragraphs)]
+ self.container = Container(paragraphs, father=self)
+ self.styles = Styles(self.xdoc.styles)
+ self.tasks = [c.get_fulltask(self.container.one_liner) for c in self.container.containers if c.task]
+
+ def copy(self, new_doc_path):
+ shutil.copyfile(self.path, new_doc_path)
+ new_doc = Doc(new_doc_path)
+ new_doc.save_as_docx(new_doc_path)
+ return new_doc
+
+ def clear(self):
+ os.remove(self.path)
+
+ def apply_template(self, template, options_list):
+ center_tables = False
+ center_images = False
+ add_template_before = False
+ justify_content = False
+ log = []
+ i = 0
+ j = 0
+ if("Recentrer les tableaux" in options_list):
+ center_tables = True
+ if("Recentrer les images (sauf les flottantes)" in options_list):
+ center_images = True
+ if("Ajouter le template avant" in options_list):
+ add_template_before = True
+ if("Justifier le texte" in options_list):
+ justify_content = True
+
+ if (justify_content):
+ log.append("Le contenu du document a été justifié")
+ self.justify_content()
+ if(center_images):
+ self.center_images()
+ i = self.number_images_in_doc()
+ log.append(f"{i} image{'s' if i>1 else ''} centrée{'s' if i>1 else ''}")
+ if(center_tables):
+ j = self.center_tables()
+ log.append(f"{j} table{'s' if j>1 else ''} centrée{'s' if j>1 else ''}")
+ if(add_template_before):
+ self.save_as_docx()
+ log.append(f"Le template {template.name} a été ajouté avant le document")
+ log = self.styles.apply_from(template.styles, log)
+ master = Document_compose(template.path)
+ composer = Composer(master)
+ doc = Document_compose(self.path)
+ composer.append(doc)
+ composer.save(self.path)
+ else:
+ log = self.styles.apply_from(template.styles, log)
+ self.save_as_docx()
+ return log
+
+ def copy_one_style(self, src_style_name: str, dest_style_name: str, template):
+ style_dest = template.styles.get_style_from_name(dest_style_name)
+ src_style = self.styles.get_style_from_name(src_style_name)
+ log = self.styles.copy_one_style(src_style, style_dest)
+ return log
+
+ def get_different_styles_with_template(self, template):
+ styles_used_in_doc = self.get_all_styles_of_doc()
+ different_styles = get_difference_with_template(styles_used_in_doc, template)
+ return different_styles
+
+ def save_as_docx(self, path: str = ''):
+ path = path if path else self.path
+ self.path = path
+ self.xdoc.save(path)
+
+ # def add_back_pages_from(self, src_doc):
+ # with open (self.path, "rb") as f:
+ # zip = zipfile.ZipFile(f)
+ # images = [image for image in zip.namelist() if image.startswith('word/media/')]
+ # for image in images:
+ # zip.extract(image)
+ # zip.close()
+ # images = convert_to_png(images)
+ # #copy the entire self to the end of src_doc
+ # for p in self.get_paragraphs():
+ # p.insert_paragraphs(images,src_doc)
+ # return self
+
+ def get_blocks(self):
+
+ def from_list_to_str(index_list):
+ index_str = str(index_list[0])
+ for el in index_list[1:]:
+ index_str += '.' + str(el)
+ return index_str
+
+ blocks = self.container.blocks
+ for block in blocks:
+ block.doc = self.title
+ if block.level == 0:
+ blocks.remove(block)
+ block.index = from_list_to_str(block.index)
+ return blocks
+
+
+ @property
+ def structure(self):
+
+ return self.container.structure
+
+ def replace_tasks(self, resolutions: [str]):
+ if len(resolutions) == len(self.tasks): # exception to be handled
+ p_tasks = [p for p in self.get_paragraphs() if p.type == 'task']
+ for p, r in zip(p_tasks, resolutions):
+ p.set_text(r)
+ else:
+ print(f"résolutions : {len(resolutions)} != {len(self.tasks)} tasks")
+ return self
+
+ def get_paragraphs(self):
+ return self.container.all_paragraphs
+
+ def get_text_from_paragraphs(self):
+ return [p.text for p in self.get_paragraphs()]
+
+ def check_document(self):
+ picCount = 0
+ tabCount = 0
+ for paragraph in self.xdoc.paragraphs:
+ if picCount < len(self.xdoc.inline_shapes):
+ print('\033[1mPicture \033[0m')
+ picCount += 1
+ elif paragraph.text:
+ print(paragraph.text)
+ elif tabCount < len(self.xdoc.tables):
+ table = self.xdoc.tables[tabCount]
+ data = []
+ keys = None
+ for i, row in enumerate(table.rows):
+ text = (cell.text for cell in row.cells)
+ if i == 0:
+ keys = tuple(text)
+ continue
+ row_data = dict(zip(keys, text))
+ data.append(row_data)
+ print('\033[1mTable:\033[0m', data)
+ tabCount += 1
+ else:
+ print('\033[1mEmpty paragraph\033[0m')
+
+
+
+ def center_tables(self):
+ j = 0
+ for table in self.xdoc.tables:
+ j += 1
+ table.alignment = WD_TABLE_ALIGNMENT.CENTER
+ return j
+
+
+ # def center_tables_with_template(self):
+ # j = 0
+ # for i,table in enumerate(self.xdoc.tables):
+ # if(i == 0):
+ # continue
+ # j += 1
+ # table.alignment = 1
+ # return j
+
+ def center_images(self):
+ for paragraph in self.get_paragraphs():
+ paragraph.center_paragraph()
+
+ def justify_content(self):
+ for paragraph in self.get_paragraphs():
+ paragraph.justify_paragraph()
+
+
+
+
+ # def add_paragraph(self,p:Paragraph):
+ # self.container.paragraphs.append(p)
+ # self.xdoc.add_paragraph(p.text,p.xparagraph.style)
+
+
+ def number_images_in_doc(self):
+ picCount = 0
+ for _ in self.xdoc.paragraphs:
+ if picCount < len(self.xdoc.inline_shapes):
+ print('\033[1mPicture \033[0m')
+ picCount += 1
+ return picCount
+
+ def get_all_styles_of_doc(self):
+ return self.container.get_all_styles_used_in_doc()
diff --git a/src/domain/paragraph.py b/src/domain/paragraph.py
new file mode 100644
index 0000000000000000000000000000000000000000..ce97e19ab2a7b7f5c4ccd8c0fdeec7ba29ba75ee
--- /dev/null
+++ b/src/domain/paragraph.py
@@ -0,0 +1,149 @@
+import string
+from src.tools.doc_tools import get_positions, convert_to_png
+from docx.enum.text import WD_ALIGN_PARAGRAPH
+import xml.etree.ElementTree as ET
+from docx.oxml.ns import qn
+import zipfile
+import os
+import re
+
+
+INFINITE = 10000
+
+class Paragraph:
+
+ def __init__(self, xparagraph, doc_id: int, id_: int):
+
+ self.xparagraph = xparagraph
+ self.id_ = int(str(2) + str(doc_id) + str(id_))
+ style_name = self.xparagraph.style.name
+ self.level = self.get_level_from_name(style_name)
+ self.is_structure = self.level < INFINITE
+ self.text = self.xparagraph.text
+ self.type, self.parsed_text = self.parse_text()
+
+
+ @property
+ def structure(self):
+ structure = {str(self.id_): {
+ 'index': str(self.id_),
+ 'canMove': True,
+ 'isFolder': False,
+ 'children': [],
+ 'title': self.text,
+ 'canRename': True,
+ 'data': {},
+ 'level': self.level,
+ }}
+ return structure
+
+ @property
+ def blank(self):
+ """
+ checks if the paragraph is blank: i.e. it brings some signal (it may otherwise be ignored)
+ """
+ text = self.text.replace('\n', '')
+ return set(text).isdisjoint(string.ascii_letters)
+
+ @staticmethod
+ def get_level_from_name(style_name: str) -> int:
+ level = INFINITE
+ if '.Titre' in style_name:
+ suffix = style_name[-1]
+ try:
+ level = int(suffix)
+ except:
+ pass
+ return level
+
+ def parse_text(self) -> (str, str):
+
+ if self.is_structure:
+ return 'structure', self.text
+
+ startswith = {"?? ": "task", "++ ": "comment"}
+ for start in startswith.keys():
+ split = self.text.rsplit(start)
+ if 1 < len(split):
+ return startswith[start], split[1]
+
+ return "normal", self.text
+
+ def set_text(self, text: str):
+ self.text = text
+ self.xparagraph.text = text
+ return self
+
+ def contains_image(self) -> bool:
+ return any("pic:pic" in run.element.xml for run in self.xparagraph.runs)
+ # is_image = False
+ # for run in self.xparagraph.runs:
+ # if "pic:pic" in run.element.xml:
+ # xml = run.element.xml
+ # print(run.element.xml)
+ # #find the anchor element
+ # print(xml)
+ # root = ET.fromstring(xml)
+ # anch = ET.SubElement(root, "wp:anchor")
+ # item = ET.SubElement(anch, "wp:positionH")
+ # item2 = ET.SubElement(anch, "wp:positionV")
+ # # find the anchor element
+ # attri = root.findall(".//{http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing}anchor")
+ # # create a child to the positionH and positionV elements
+ # if attri:
+ # #print all the children of the anchor element
+ # for anchors in attri:
+ # childH = ET.SubElement(anchors, "wp:positionH")
+ # childV = ET.SubElement(anchors, "wp:positionV")
+ # ET.SubElement(childH, "wp:align").text = "center"
+ # ET.SubElement(childV, "wp:align").text = "center"
+ # xml = ET.tostring(root, encoding='unicode', method='xml')
+ # # add a child to the positionH and positionV using xml variable
+ # ET.SubElement(item, "wp:align").text = "center"
+ # ET.SubElement(item2, "wp:align").text = "center"
+ # print(ET.tostring(root))
+ # else:
+ # is_image = True
+ # return is_image
+
+
+
+
+
+
+ def center_paragraph(self):
+ if self.contains_image():
+ self.xparagraph.alignment = WD_ALIGN_PARAGRAPH.CENTER
+
+ def justify_paragraph(self):
+ if(self.xparagraph.style.name == "Normal"):
+ self.xparagraph.alignment = WD_ALIGN_PARAGRAPH.JUSTIFY
+
+ # def insert_paragraphs(self,images,template_doc):
+ # empty_paragraph = Paragraph(template_doc.xdoc.add_paragraph(""),template_doc.id_,template_doc.container.paragraphs[-1].id_+1)
+ # template_doc.add_paragraph(empty_paragraph)
+ # template_xp = template_doc.xdoc.paragraphs[-1]
+ # for run in self.xparagraph.runs:
+ # new_run = template_xp.add_run(run.text)
+ # if "pic:pic" in run.element.xml:
+ # xml = run.element.xml
+ # print(xml)
+ # #check if there is the same image multiple times in the document
+ # image_name = xml.split("pic:pic")[1].split('name="')[1].split('"')[0]
+ # image_name = re.sub('[\s+]', '', image_name)
+ # image_to_put = image_name.lower() + '.png'
+ # #loop over all the cx and cy occurences and stop when both strings in between are numbers
+ # width,height = get_positions(xml)
+ # index_to_use = images.index("word/media/" + image_to_put)
+ # new_run.add_picture(images[index_to_use], width=width, height=height)
+ # # os.remove(images[0])
+ # # return images
+
+ def get_styles_in_paragraph(self):
+ styles = [self.xparagraph.style.name]
+ for run in self.xparagraph.runs:
+ if run.style.name != "Default Paragraph Font":
+ styles.append(run.style.name)
+ return styles
+
+
diff --git a/src/domain/styles.py b/src/domain/styles.py
new file mode 100644
index 0000000000000000000000000000000000000000..4eb0a90f0156f4d136cd912eedb82ffafaeb795f
--- /dev/null
+++ b/src/domain/styles.py
@@ -0,0 +1,134 @@
+from docx.enum.style import WD_STYLE_TYPE
+from docx.shared import RGBColor
+
+
+class Styles:
+
+ def __init__(self, xstyles, doc_id=0, id_=0):
+
+ self.id_ = int(str(doc_id)+str(id_))
+ self.xstyles = xstyles
+ self.names = [s.name for s in xstyles]
+
+ @staticmethod
+ def copy_style(src=None, dest=None) -> {}:
+ modified_style = set()
+ if src.type == WD_STYLE_TYPE.PARAGRAPH:
+ same_color = True
+ if src.font.color.rgb:
+ dest_rgb = RGBColor(src.font.color.rgb[0], src.font.color.rgb[1], src.font.color.rgb[2])
+ if dest.font.color.rgb:
+ for i in range(3):
+ same_color *= dest.font.color.rgb[i] == dest_rgb[i]
+ else:
+ same_color = False
+ dest.font.color.rgb = dest_rgb
+ else:
+ if dest.font.color.rgb:
+ same_color = False
+ if not same_color:
+ modified_style.add(('color', True))
+
+ if dest.font.size != src.font.size:
+ dest.font.size = src.font.size
+ modified_style.add(('font size', (src.font.size, dest.font.size)))
+
+ if dest.font.name != src.font.name:
+ dest.font.name = src.font.name
+ modified_style.add(('font', (src.font.name, dest.font.name)))
+
+ if dest.font.all_caps != src.font.all_caps:
+ dest.font.all_caps = src.font.all_caps
+ modified_style.add(('all_caps', (src.font.all_caps, dest.font.all_caps)))
+
+ if dest.font.bold != src.font.bold:
+ dest.font.bold = src.font.bold
+ modified_style.add(('bold', (src.font.bold, dest.font.bold)))
+
+ dest.font.complex_script = src.font.complex_script
+ dest.font.cs_bold = src.font.cs_bold
+ dest.font.cs_italic = src.font.cs_italic
+ dest.font.double_strike = src.font.double_strike
+ dest.font.emboss = src.font.emboss
+ dest.font.hidden = src.font.hidden
+ dest.font.highlight_color = src.font.highlight_color
+ dest.font.imprint = src.font.imprint
+ dest.font.italic = src.font.italic
+ dest.font.math = src.font.math
+ dest.font.no_proof = src.font.no_proof
+ dest.font.outline = src.font.outline
+ dest.font.rtl = src.font.rtl
+ dest.font.shadow = src.font.shadow
+ dest.font.small_caps = src.font.small_caps
+ dest.font.snap_to_grid = src.font.snap_to_grid
+ dest.font.spec_vanish = src.font.spec_vanish
+ dest.font.strike = src.font.strike
+ dest.font.subscript = src.font.subscript
+ dest.font.superscript = src.font.superscript
+ dest.font.underline = src.font.underline
+ dest.font.web_hidden = src.font.web_hidden
+ dest.base_style = src.base_style
+ dest.hidden = src.hidden
+ dest.locked = src.locked
+ dest.name = src.name
+ dest.priority = src.priority
+ dest.quick_style = src.quick_style
+ dest.unhide_when_used = src.unhide_when_used
+ return modified_style
+
+ def apply_from(self, template_styles, options_list):
+
+ if(options_list == []):
+ log = {'suppressed_styles': [], 'modified_styles': [], 'added_styles': []}
+ else:
+ log = {'options_applied': options_list,'suppressed_styles': [], 'modified_styles': [], 'added_styles': []}
+
+ for s in self.xstyles:
+ if s.name not in template_styles.names:
+ log['suppressed_styles'].append(s.name)
+ s.delete()
+ else:
+ src_style = template_styles.get_style_from_name(s.name)
+ log_s = self.copy_style(src=src_style, dest=s)
+ if log_s:
+ log['modified_styles'].append((s.name, log_s))
+
+ for s in template_styles.xstyles:
+ if not self.contains_style(s):
+ log['added_styles'].append(s.name)
+ self.xstyles.add_style(s.name, s.type)
+ self.copy_style(src=s, dest=self.xstyles[s.name])
+ return log
+
+
+ def copy_one_style(self, src_style, dest_style) -> {}:
+ log_msg = \
+ f"le style {src_style.name} a été mappé sur le style {dest_style.name} du template"
+ log_dict = {'style_mapping': log_msg}
+ self.copy_style(dest_style, src_style)
+ return log_dict
+
+ def get_style_from_name(self, name: str):
+ try:
+ s = self.xstyles[name]
+ except:
+ try:
+ s = self.xstyles[name[1:]]
+ except:
+ s = self.get_style_from_name(self.names[0])
+ print('??')
+ return s
+
+ def contains_style(self, style):
+ resp = True
+ try:
+ s = self.xstyles[style.name]
+ except:
+ try:
+ s = self.xstyles[style.name[1:]]
+ except:
+ resp = False
+ return resp
+
+
+
diff --git a/src/domain/wikidoc.py b/src/domain/wikidoc.py
new file mode 100644
index 0000000000000000000000000000000000000000..25903666909e776182ab45beebdb99e44d4fb8a7
--- /dev/null
+++ b/src/domain/wikidoc.py
@@ -0,0 +1,123 @@
+class Doc:
+ def __init__(self, fulltext: str = '', title: str = '', params: dict = {}):
+ self.params = params
+ self.lines = [Line(text.strip(), self.params) for text in fulltext.split("\n") if text.strip()]
+ self.title, self.lines = self._get_title(title)
+ self.container = Container(lines=self.lines, title=self.title, father=self, params=params)
+ self.fulltext = fulltext
+
+ def _get_title(self, title):
+ lines = self.lines
+ if self.params['type'] == 'input_text':
+ if self.lines and self.lines[0] and self.lines[0].type == 'title':
+ title = self.lines[0].text
+ lines = lines[1:]
+ else:
+ title = 'the title is missing'
+ return title, lines
+
+
+class WikiPage(Doc):
+
+ def __init__(self, fulltext='', title=''):
+ self.params = {
+ 'type': 'wiki',
+ 'startswith_':
+ {'== ': '1', '=== ': '2', '==== ': '3', '===== ': '4', '====== ': '5', '======= ': '6'},
+ 'endswith_':
+ [' ==', ' ===', ' ====', ' =====', ' ======', ' ======'],
+
+ 'discarded': ["See also", "Notes", "References", "Sources", "External links", "Bibliography",
+ "Cinematic adaptations", "Further reading", "Maps"]
+ }
+ super().__init__(fulltext=fulltext, title=title, params=self.params)
+
+ def get_paragraphs(self, chunk=500):
+ return self.container.get_paragraphs(chunk)
+
+
+class Container:
+
+ def __init__(self, lines=[], level=0, title='', father=None, params={}):
+
+ self.children = []
+ self.level = level
+ self.title = title
+ self.father = father
+ self.lines = []
+ self._expand(lines)
+ if params and 'discarded' in params.keys():
+ self.children = [child for child in self.children if child.title not in params['discarded']]
+ self.containers = [self]
+ for child in self.children:
+ self.containers += child.containers
+ self.text = ''
+ for child in self.children:
+ self.text += ' ' + child.text
+
+ def _expand(self, lines):
+ new_child = False
+ new_child_lines = []
+ new_child_title = []
+ for line in lines:
+ if not new_child:
+ if line.is_structure:
+ new_child = True
+ new_child_lines = []
+ new_child_title = line.text
+ line.level = self.level + 1
+ else:
+ self.lines.append(line)
+
+ else:
+ if self.level + 1 < line.level or not line.is_structure:
+ new_child_lines.append(line)
+ elif self.level + 1 == line.level:
+ self.children.append(Container(lines=new_child_lines,
+ level=self.level + 1,
+ title=new_child_title,
+ father=self))
+ new_child_lines = []
+ new_child_title = line.text
+ if new_child:
+ self.children.append(Container(lines=new_child_lines,
+ level=self.level + 1,
+ title=new_child_title,
+ father=self))
+
+ def get_paragraphs(self, chunk=500):
+ if len(self.text) < chunk:
+ paragraphs = [self.text]
+ else:
+ paragraphs = [self.root_text]
+ for child in self.children:
+ paragraphs += child.get_paragraphs(chunk)
+ return paragraphs
+
+
+class Line:
+
+ def __init__(self, text, params):
+ self.text = text
+ self.params = params
+ self.type, self.text = self._parse_text()
+ self.level = int(self.type) if self.type.isdigit() else -1
+ self.is_structure = 0 < self.level
+
+
+ def _parse_text(self):
+ def strip_text(text_, start, end):
+ text_ = text_.split(start)[1]
+ if end != "":
+ text_ = text_.split(end)[0]
+ # text += ". \n"
+ return text_.strip()
+
+ startswith_ = self.params['startswith_']
+
+ endswith_ = self.params['endswith_'] if 'endswith_' in self.params.keys() else [""] * len(startswith_)
+ types = [(strip_text(self.text, starter, endswith_[i]), startswith_[starter])
+ for i, starter in enumerate(startswith_.keys())
+ if self.text.startswith(starter)]
+ (text, type_) = types[0] if types else (self.text, 'normal')
+ return type_, text.strip()
diff --git a/src/model/block.py b/src/model/block.py
new file mode 100644
index 0000000000000000000000000000000000000000..30e611ec389531f86b5e1143cb39382cb77f4a70
--- /dev/null
+++ b/src/model/block.py
@@ -0,0 +1,49 @@
+class Block:
+ def __init__(self, doc: str = '', title: str = '', content: str = '', content_fr: str = '',
+ index: str = '', rank: int = 0, level: int = 0, distance: float = 99999):
+ self.doc = doc
+ self.title = title
+ self.title_fr = ""
+ self.content = content
+ self.content_fr = content_fr
+ self.specials = []
+ self.index = index
+ self.rank = rank
+ self.level = level
+ self.distance = distance
+
+ def to_dict(self) -> {}:
+ block_dict = {'doc': self.doc,
+ 'title': self.title,
+ 'title_fr': self.title_fr,
+ 'content': self.content,
+ 'content_fr': self.content_fr,
+ 'index': self.index,
+ 'rank': self.rank,
+ 'level': self.level,
+ 'distance': self.distance}
+ for i, s in enumerate(self.specials):
+ special_key = 'special_'+str(i)
+ block_dict[special_key] = s
+ block_dict['specials_len'] = len(self.specials)
+ return block_dict
+
+ def from_dict(self, block_dict: {}):
+ self.doc = block_dict['doc']
+ self.title = block_dict['title']
+ self.title_fr = block_dict['title_fr']
+ self.content = block_dict['content']
+ self.content_fr = block_dict['content_fr']
+ self.index = block_dict['index']
+ self.rank = block_dict['rank']
+ self.level = block_dict['level']
+ self.distance = block_dict['distance']
+ self.specials = []
+ for i in range(block_dict['specials_len']):
+ special_key = 'special_' + str(i)
+ self.specials.append(block_dict[special_key])
+ return self
+
+ @property
+ def distance_str(self) -> str:
+ return format(self.distance, '.2f')
diff --git a/src/model/container.py b/src/model/container.py
new file mode 100644
index 0000000000000000000000000000000000000000..29f64744b670da5977a09124a965da92812c2781
--- /dev/null
+++ b/src/model/container.py
@@ -0,0 +1,143 @@
+from src.model.paragraph import Paragraph
+from src.model.block import Block
+
+INFINITE = 99999
+
+
+class Container:
+
+ def __init__(self, paragraphs: [Paragraph], title: Paragraph = None, level: int = 0, index: [int] = None,
+ father=None, id_=0):
+ if index is None:
+ index = []
+ self.level = level
+ self.title = title
+ self.paragraphs = []
+ self.children = []
+ self.index = index
+ self.father = father # if not father, then the container is at the top of the hierarchy
+ self.id_ = int(str(1) + str(father.id_) + str(id_))
+ if paragraphs:
+ self.paragraphs, self.children = self.create_children(paragraphs, level, index)
+ self.blocks = self.get_blocks()
+ self.normals, self.comments, self.tasks = self.sort_paragraphs()
+
+
+ @property
+ def text(self):
+ text = ""
+ if self.title:
+ text = "Titre " + str(self.level) + " : " + self.title.text + '\n'
+ for p in self.paragraphs:
+ text += p.text + '\n'
+ for child in self.children:
+ text += child.text
+ return text
+
+ @property
+ def text_chunks(self, chunk=500):
+ text_chunks = []
+ text_chunk = ""
+ for p in self.paragraphs:
+ if chunk < len(text_chunk) + len(p.text):
+ text_chunks.append(text_chunk)
+ text_chunk = ""
+ else:
+ text_chunk += " " + p.text
+ if text_chunk and not text_chunk.isspace():
+ text_chunks.append(text_chunk)
+ for child in self.children:
+ text_chunks += child.text_chunks
+ return text_chunks
+
+ def get_blocks(self):
+ block = Block(level=self.level, index=self.index)
+ if self.title:
+ block.title = self.title.text
+ for p in self.paragraphs:
+ if not p.blank:
+ if p.text.startswith('##### '):
+ special_action = p.text.lstrip('##### ')
+ block.specials.append(special_action)
+ else:
+ block.content += p.text
+ blocks = [block] if block.content or block.specials else []
+ for child in self.children:
+ blocks += child.blocks
+ return blocks
+
+ def create_children(self, paragraphs: Paragraph, level: int, index: [int]) -> ([Paragraph], []):
+ """
+ creates children containers or directly attached content
+ and returns the list of containers and contents of level+1
+ :return:
+ [Content or Container]
+ """
+ attached_paragraphs = []
+ container_paragraphs = []
+ container_title = None
+ children = []
+ in_children = False
+ child_id = 0
+ level = INFINITE
+
+ while paragraphs:
+ p = paragraphs.pop(0)
+ if not in_children and not p.is_structure:
+ attached_paragraphs.append(p)
+ else:
+ in_children = True
+ if p.is_structure and p.level <= level: # if p is higher in hierarchy, then the child is completed
+ if container_paragraphs or container_title:
+ if level <= len(index):
+ index = index[:level]
+ index[-1] += 1
+ else:
+ for i in range(level-len(index)):
+ index.append(1)
+ children.append(Container(container_paragraphs, container_title, level, index, self, child_id))
+ child_id += 1
+ container_paragraphs = []
+ container_title = p
+ level = p.level
+
+ else: # p is normal text or strictly lower in hierarchy, then the child continues to grow
+ container_paragraphs.append(p)
+
+ if container_paragraphs or container_title:
+ if level <= len(index):
+ index = index[:level]
+ index[-1] += 1
+ else:
+ for i in range(level - len(index)):
+ index.append(1)
+ children.append(Container(container_paragraphs, container_title, level, index, self, child_id))
+ child_id += 1
+
+ return attached_paragraphs, children
+
+ @property
+ def structure(self):
+
+ self_structure = {str(self.id_): {
+ 'index': str(self.id_),
+ 'canMove': True,
+ 'isFolder': True,
+ 'children': [p.id_ for p in self.paragraphs] + [child.id_ for child in self.children],
+ 'canRename': True,
+ 'data': {},
+ 'level': self.level,
+ 'rank': self.rank,
+ 'title': self.title.text if self.title else 'root'
+ }}
+ paragraphs_structure = [p.structure for p in self.paragraphs]
+ structure = [self_structure] + paragraphs_structure
+ for child in self.children:
+ structure += child.structure
+ return structure
+
+ def sort_paragraphs(self) -> ([Paragraph], [Paragraph], [Paragraph]):
+ mapping = {'normal': [], 'comment': [], 'task': []}
+ for p in self.paragraphs:
+ mapping(p.type).append(p)
+ return mapping['normal'], mapping['comment'], mapping['task']
diff --git a/src/model/doc.py b/src/model/doc.py
new file mode 100644
index 0000000000000000000000000000000000000000..14a938eff9c5065a5a027bb1d6f55645a917d885
--- /dev/null
+++ b/src/model/doc.py
@@ -0,0 +1,54 @@
+import docx
+
+from src.model.container import Container
+from src.model.paragraph import Paragraph
+
+
+class Doc:
+
+ def __init__(self, path='', id_=None):
+
+ self.xdoc = docx.Document(path)
+ self.title = path.split('/')[-1]
+ self.id_ = id(self)
+ self.path = path
+ paragraphs = [Paragraph(xp, self.id_, i) for (i, xp) in enumerate(self.xdoc.paragraphs)]
+ self.container = Container(paragraphs, father=self, level=0)
+ self.blocks = self.get_blocks()
+ self.tasks = [c.get_task(self.container.one_liner) for c in self.container.containers if c.task]
+
+ @property
+ def structure(self):
+
+ return self.container.structure
+
+ def get_blocks(self):
+
+ def from_list_to_str(index_list):
+ index_str = str(index_list[0])
+ for el in index_list[1:]:
+ index_str += '.' + str(el)
+ return index_str
+
+ blocks = self.container.blocks
+ for block in blocks:
+ block.doc = self.title
+ if block.level == 0:
+ blocks.remove(block)
+ block.index = from_list_to_str(block.index)
+ return blocks
+"""
+ current_level = len(current_index)
+ if 0 < block.level:
+ if block.level == current_level:
+ current_index[-1] += 1
+ elif current_level < block.level:
+ current_index.append(1)
+ elif block.level < current_level:
+ current_index = current_index[:block.level]
+ current_index[-1] += 1
+ block.index = from_list_to_str(current_index)
+ else:
+ block.index = "0"
+"""
+
diff --git a/src/model/paragraph.py b/src/model/paragraph.py
new file mode 100644
index 0000000000000000000000000000000000000000..cae2e67539f0d6542dc9c38b2244f080b4da5e07
--- /dev/null
+++ b/src/model/paragraph.py
@@ -0,0 +1,50 @@
+import string
+
+INFINITE = 10000
+
+
+class Paragraph:
+
+ def __init__(self, xparagraph, doc_id: int, id_: int):
+
+ self.xparagraph = xparagraph
+ self.id_ = int(str(2) + str(doc_id) + str(id_))
+ self.level = self.get_level_from_name()
+ self.is_structure = self.level < INFINITE
+ self.text = self.xparagraph.text
+ self.type = self.get_type()
+
+ @property
+ def structure(self):
+ structure = {str(self.id_): {
+ 'index': str(self.id_),
+ 'canMove': True,
+ 'isFolder': False,
+ 'children': [],
+ 'title': self.text,
+ 'canRename': True,
+ 'data': {},
+ 'level': self.level,
+ }}
+ return structure
+
+ @property
+ def blank(self):
+ """
+ checks if the paragraph is blank: i.e. it brings some signal (it may otherwise be ignored)
+ """
+ text = self.text.replace('\n', '')
+ return set(text).isdisjoint(string.ascii_letters)
+
+ def get_level_from_name(self) -> int:
+ style_name = self.xparagraph.style.name
+ level = INFINITE
+ if '.Titre' in style_name:
+ suffix = style_name[-1]
+ try:
+ level = int(suffix)
+ except:
+ pass
+ return level
+
+
diff --git a/src/tools/doc_tools.py b/src/tools/doc_tools.py
new file mode 100644
index 0000000000000000000000000000000000000000..ad8266e9de89f0d0a6893d11ca72cf6fb72d1104
--- /dev/null
+++ b/src/tools/doc_tools.py
@@ -0,0 +1,42 @@
+from PIL import Image
+import os
+
+def get_positions(xml_file):
+ i = 0
+ width = xml_file.split('cx="')
+ height = xml_file.split('cy="')
+ while(i < len(width)):
+ temp = width[i].split('"')[0]
+ if(temp.isnumeric()):
+ width = temp
+ break
+ else:
+ i+=1
+ i = 0
+ while(i < len(height)):
+ temp = height[i].split('"')[0]
+ if(temp.isnumeric()):
+ height = temp
+ break
+ else:
+ i+=1
+ return width, height
+
+def convert_to_png(imageslist):
+ for image in imageslist:
+ if(image.endswith('.png')):
+ continue
+ im = Image.open(image)
+ im.save(image.split('.')[0]+'.png')
+ imageslist[imageslist.index(image)] = image.split('.')[0]+'.png'
+ os.remove(image)
+ return imageslist
+
+
+def get_difference_with_template(styles_used_in_doc, template):
+ styles_used_in_template = template.styles.names
+ different_styles = []
+ for style in styles_used_in_doc:
+ if style not in styles_used_in_template:
+ different_styles.append(style)
+ return different_styles
\ No newline at end of file
diff --git a/src/tools/list_tool.py b/src/tools/list_tool.py
new file mode 100644
index 0000000000000000000000000000000000000000..98b4fc59005ac97c1c628e97540ec54d13306a79
--- /dev/null
+++ b/src/tools/list_tool.py
@@ -0,0 +1,13 @@
+def keep_last_occurrences(lst):
+ last_occurrences = {}
+ result = []
+
+ for index, string in lst:
+ last_occurrences[index] = string
+
+ for index, string in lst:
+ if last_occurrences[index] == string:
+ result.append((index, string))
+ last_occurrences[index] = None
+
+ return result
\ No newline at end of file
diff --git a/src/tools/llm_tools.py b/src/tools/llm_tools.py
new file mode 100644
index 0000000000000000000000000000000000000000..39138cbee2335e7af6de9119bb4fade3d7d5396a
--- /dev/null
+++ b/src/tools/llm_tools.py
@@ -0,0 +1,230 @@
+import json
+import string
+
+import wikipedia
+from langchain import PromptTemplate
+from langchain.vectorstores import Chroma
+from langchain.text_splitter import CharacterTextSplitter
+
+from src.tools.llms import openai_llm
+from src.tools.wiki import Wiki
+
+
+
+
+async def get_wikilist(task: {}) -> str:
+ """
+ get the titles of wiki pages interesting for solving the given task
+ """
+
+ llm = openai_llm
+ # lama = llm_model
+ template = (f"\n"
+ f" Your task consists in finding the list of wikipedia page titles which provide useful content "
+ f" for a paragraph whose description is delimited by triple backticks: ```{task['description']}```\n"
+ f" \n"
+ f" The paragraph belongs at the top level of the hierarchy to a document"
+ f" whose description is delimited by triple backticks: ``` {task['doc_description']}```\n"
+ f" Make sure that the paragraph relates the top level of the document\n"
+ f" \n"
+ f" The paragraph belongs to a higher paragraph in the hierarchy \\n"
+ f" whose description is delimited by triple backticks: ``` {task['above']}```\n"
+ f" Make sure that the paragraph relates with the paragraph in the hierarchy of the document\n"
+ f" \n"
+ f" The paragraphs comes after previous paragraphs \\n"
+ f" whose description is delimited by triple backticks: ``` {task['before']}```\n"
+ f" Make sure that the paragraph relates with previous paragraph without any repetition\n"
+ f" \n"
+ f" The paragraphs comes before next paragraphs \\n"
+ f" whose description is delimited by triple backticks: ``` {task['after']}```\n"
+ f" \n"
+ f" Format your response as a JSON list of strings separated by commas.\n"
+ f" \n"
+ f"\n"
+ f" ")
+
+ prompt = PromptTemplate(
+ input_variables=[],
+ template=template
+ )
+
+ #wikilist = LLMChain(llm=openai_llm, prompt=prompt).run()
+ llm_list = llm(template)
+ wikilist = extract_list(llm_list)
+
+ expanded_wikilist = []
+
+ expand_factor = 2
+
+ for wikipage in wikilist:
+ expanded_wikilist += wikipedia.search(wikipage, expand_factor)
+
+ wikilist = list(set(expanded_wikilist))
+
+ return wikilist
+
+
+def extract_list(llm_list: str):
+ print(llm_list)
+
+ def filter_(el: str):
+ resp = 2 < len(el)
+ usable_length = len([c for c in el if c in string.ascii_letters])
+ resp = resp and len(el)*3/4 < usable_length
+ return resp
+
+ try:
+ wikilist = llm_list[1:-1].split('"')
+ wikilist = [el for el in wikilist if filter_(el)]
+ print(wikilist)
+ except:
+ wikilist = []
+ print('issues with the wikilist')
+ return wikilist
+
+
+def get_public_paragraph(task: {}) -> str:
+ """returns the task directly performed by chat GPT"""
+ print(task)
+ llm = openai_llm
+ template = (f"\n"
+ f" Your task consists in generating a paragraph\\n"
+ f" whose description is delimited by triple backticks: ```{task['description']}```\n"
+ f"\n"
+ f" The paragraph belongs at the top level of the hierarchy to a document \\n"
+ f" whose description is delimited by triple backticks: ``` {task['doc_description']}```\n"
+ f" Make sure that the paragraph relates the top level of the document\n"
+ f" \n"
+ f" The paragraph belongs to a higher paragraph in the hierarchy \\n"
+ f" whose description is delimited by triple backticks: ``` {task['above']}```\n"
+ f" Make sure that the paragraph relates with the paragraph in the hierarchy of the document\n"
+ f" \n"
+ f" The paragraphs comes after previous paragraphs \\n"
+ f" whose description is delimited by triple backticks: ``` {task['before']}```\n"
+ f" Make sure that the paragraph relates with previous paragraph without any repetition\n"
+ f" \n"
+ f" The paragraphs comes before next paragraphs \\n"
+ f" whose description is delimited by triple backticks: ``` {task['after']}```\n"
+ f" Make sure that the paragraph prepares the transition to the next paragraph without any repetition\n"
+ f" \n"
+ f" \n"
+ f"\n"
+ f" ")
+
+ p = llm(template)
+
+ return p
+
+
+def create_index(wikilist: [str]):
+ """
+ useful for creating the index of wikipages
+ """
+ fetch = Wiki().fetch
+
+ pages = [(title, fetch(title)) for title in wikilist if type(fetch(title)) != str]
+ texts = []
+ chunk = 800
+ for title, page in pages:
+ texts.append(WikiPage(title=title, fulltext=page.page_content))
+
+ doc_splitter = CharacterTextSplitter(
+ separator=".",
+ chunk_size=chunk,
+ chunk_overlap=100,
+ length_function=len,
+ )
+
+ paragraphs = texts[0].get_paragraphs(chunk=800)
+
+ split_texts = []
+ for p in paragraphs:
+ split_texts += doc_splitter.split_text(p)
+
+ for split_text in split_texts:
+ assert type(split_text) == str
+ assert 0 < len(split_text) < 2 * 500
+
+ wiki_index = Chroma.from_texts(split_texts)
+
+ return wiki_index
+
+
+def get_wiki_paragraph(wiki_index, task: {}) -> str:
+ """useful to get a summary in one line from wiki index"""
+
+ task_description = get_public_paragraph(task)
+ wiki_paragraphs = semantic_search(wiki_index, task_description)
+ text_content = ""
+ for p in wiki_paragraphs:
+ text_content += p.page_content + "/n/n"
+
+ template = (f"\n"
+ f" Your task consists in generating a paragraph\\n"
+ f" whose description is delimited by triple backticks: ```{task['description']}```\n"
+ f"\n"
+ f" The text generation is based in the documents provided in these sections \n"
+ f" delimited by by triple backticks: ``` {text_content}``` \n"
+ f" The paragraph belongs at the top level of the hierarchy to a document \\n"
+ f" whose description is delimited by triple backticks: ``` {task['doc_description']}```\n"
+ f" Make sure that the paragraph relates the top level of the document\n"
+ f" \n"
+ f" The paragraph belongs to a higher paragraph in the hierarchy \\n"
+ f" whose description is delimited by triple backticks: ``` {task['above']}```\n"
+ f" Make sure that the paragraph relates with the paragraph in the hierarchy of the document\n"
+ f" \n"
+ f" The paragraphs comes after previous paragraphs \\n"
+ f" whose description is delimited by triple backticks: ``` {task['before']}```\n"
+ f" Make sure that the paragraph relates with previous paragraph without any repetition\n"
+ f" \n"
+ f" The paragraphs comes before next paragraphs \\n"
+ f" whose description is delimited by triple backticks: ``` {task['after']}```\n"
+ f" Make sure that the paragraph prepares the transition to the next paragraph without any repetition\n"
+ f" \n"
+ f" \n"
+ f"\n"
+ f" ")
+
+ llm = openai_llm
+ p = llm(template)
+
+ return p
+
+
+def get_private_paragraph(texts, task: {}) -> str:
+ """useful to get a summary in one line from wiki index"""
+
+ text_content = ""
+ for t in texts:
+ text_content += t + "/n/n"
+
+ template = (f"\n"
+ f" Your task consists in generating a paragraph\\n"
+ f" whose description is delimited by triple backticks: ```{task['description']}```\n"
+ f"\n"
+ f" The text generation is based in the documents provided in these sections \n"
+ f" delimited by by triple backticks: ``` {text_content}``` \n"
+ f" The paragraph belongs at the top level of the hierarchy to a document \\n"
+ f" whose description is delimited by triple backticks: ``` {task['doc_description']}```\n"
+ f" Make sure that the paragraph relates the top level of the document\n"
+ f" \n"
+ f" The paragraph belongs to a higher paragraph in the hierarchy \\n"
+ f" whose description is delimited by triple backticks: ``` {task['above']}```\n"
+ f" Make sure that the paragraph relates with the paragraph in the hierarchy of the document\n"
+ f" \n"
+ f" The paragraphs comes after previous paragraphs \\n"
+ f" whose description is delimited by triple backticks: ``` {task['before']}```\n"
+ f" Make sure that the paragraph relates with previous paragraph without any repetition\n"
+ f" \n"
+ f" The paragraphs comes before next paragraphs \\n"
+ f" whose description is delimited by triple backticks: ``` {task['after']}```\n"
+ f" Make sure that the paragraph prepares the transition to the next paragraph without any repetition\n"
+ f" \n"
+ f" \n"
+ f"\n"
+ f" ")
+
+ llm = openai_llm
+ p = llm(template)
+
+ return p
diff --git a/src/tools/llms.py b/src/tools/llms.py
new file mode 100644
index 0000000000000000000000000000000000000000..0b205a75ef919b4a3d0e487b80add075d58f21aa
--- /dev/null
+++ b/src/tools/llms.py
@@ -0,0 +1,22 @@
+from langchain.llms import OpenAI
+# from transformers import AutoTokenizer, AutoModelForCausalLM
+import os
+
+
+OpenAI_KEY = "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"
+os.environ["OPENAI_API_KEY"] = OpenAI_KEY
+
+openai_llm = OpenAI(temperature=0) #CHAT GPT MODEL
+
+# llm_model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-hf") #LAMA MODEL
+
+SERPAPI_API_KEY = "dba90c4ecfa942f37e2b9eb2e7c6600ef7fb5c02ab8bbfacef426773df14c06b"
+os.environ["SERPAPI_API_KEY"] = SERPAPI_API_KEY
+
+
+"""
+HF_API_KEY = "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"
+hf_llm = HuggingFaceHub(repo_id="google/flan-t5-small",
+ model_kwargs={"temperature": 0, "max_length": 1000},
+ huggingfacehub_api_token=HF_API_KEY)
+"""
diff --git a/src/tools/semantic_db.py b/src/tools/semantic_db.py
new file mode 100644
index 0000000000000000000000000000000000000000..8354c653c42631b5cfb5301765c67c7053de0d3f
--- /dev/null
+++ b/src/tools/semantic_db.py
@@ -0,0 +1,70 @@
+import chromadb
+from datetime import datetime
+
+chroma_client = chromadb.Client()
+
+
+def get_or_create_collection(coll_name: str):
+ date = coll_name[:6]
+ coll = chroma_client.get_or_create_collection(name=coll_name, metadata={"date": date})
+ return coll
+
+
+def get_collection(coll_name: str):
+ coll = chroma_client.get_collection(name=coll_name)
+ return coll
+
+
+def reset_collection(coll_name: str):
+ coll = chroma_client.get_collection(name=coll_name)
+ coll.delete()
+ return coll
+
+
+def delete_old_collections(old=2):
+ collections = chroma_client.list_collections()
+ current_hour = int(datetime.now().strftime("%m%d%H"))
+
+ for coll in collections:
+ coll_hour = int(coll.metadata['date'])
+ if coll_hour < current_hour - old:
+ chroma_client.delete_collection(coll.name)
+
+
+def add_texts_to_collection(coll_name: str, texts: [str], file: str, source: str):
+ """
+ add texts to a collection : texts originate all from the same file
+ """
+ coll = chroma_client.get_collection(name=coll_name)
+ filenames = [{file: 1, 'source': source} for _ in texts]
+ ids = [file+'-'+str(i) for i in range(len(texts))]
+ try:
+ coll.delete(ids=ids)
+ coll.add(documents=texts, metadatas=filenames, ids=ids)
+ except:
+ print(f"exception raised for collection :{coll_name}, texts: {texts} from file {file} and source {source}")
+
+
+def delete_collection(coll_name: str):
+ chroma_client.delete_collection(name=coll_name)
+
+
+def list_collections():
+ return chroma_client.list_collections()
+
+
+def query_collection(coll_name: str, query: str, from_files: [str], n_results: int = 4):
+ assert 0 < len(from_files)
+ coll = chroma_client.get_collection(name=coll_name)
+ where_ = [{file: 1} for file in from_files]
+ where_ = where_[0] if len(where_) == 1 else {'$or': where_}
+ n_results_ = min(n_results, coll.count())
+
+ ans = ""
+ try:
+ ans = coll.query(query_texts=query, n_results=n_results_, where=where_)
+ except:
+ print(f"exception raised at query collection for collection {coll_name} and query {query} from files "
+ f"{from_files}")
+
+ return ans
diff --git a/src/tools/wiki.py b/src/tools/wiki.py
new file mode 100644
index 0000000000000000000000000000000000000000..6022dc5ab7a8a0381706af790159404592f7f183
--- /dev/null
+++ b/src/tools/wiki.py
@@ -0,0 +1,61 @@
+from typing import Union
+
+from langchain.docstore.base import Docstore
+from langchain.docstore.document import Document
+
+
+
+class Wiki(Docstore):
+ """
+ Wrapper around wikipedia API.
+ """
+
+ def __init__(self) -> None:
+ """Check that wikipedia package is installed."""
+ try:
+ import wikipedia # noqa: F401
+ except ImportError:
+ raise ValueError(
+ "Could not import wikipedia python package. "
+ "Please install it with `pip install wikipedia`."
+ )
+
+ @staticmethod
+ def fetch(searched_page: str) -> Union[str, Document]:
+ """
+ Try to fetch for wiki page.
+
+ If page exists, return the page summary, and a PageWithLookups object.
+ If page does not exist, return similar entries.
+ """
+ import wikipedia
+
+ try:
+ # wikipedia.set_lang("fr")
+ page_content = wikipedia.page(searched_page).content
+ url = wikipedia.page(searched_page).url
+ result: Union[str, Document] = Document(
+ page_content=page_content, metadata={"page": url}
+ )
+ except wikipedia.PageError:
+ result = f"Could not find [{searched_page}]. Similar: {wikipedia.search(searched_page)}"
+
+ except wikipedia.DisambiguationError:
+ result = f"Could not find [{searched_page}]. Similar: {wikipedia.search(searched_page)}"
+ return result
+
+ def search(searched_context: str) -> [str]:
+ """
+ Finds wiki page title in relation with the given context
+ """
+ import wikipedia
+
+ try:
+ # wikipedia.set_lang("fr")
+ page_title_list = wikipedia.search(searched_context)
+ result = page_title_list
+ except wikipedia.PageError:
+ result = f"Could not find [{searched_context}]."
+ return result
+
+
diff --git a/src/view/log_msg.py b/src/view/log_msg.py
new file mode 100644
index 0000000000000000000000000000000000000000..4d96d8592ce1f689d0adebe42bb2a779600229bb
--- /dev/null
+++ b/src/view/log_msg.py
@@ -0,0 +1,45 @@
+from typing import Dict
+
+from config import config
+
+
+def create_msg_from(logs: [Dict], docs) -> str:
+ log_messages = []
+ log_msg = config['log_msg']
+ docs_seen = []
+ msg = ''
+ for doc in docs:
+ for log in logs:
+ if doc.name in log.keys():
+ log = log[doc.name]
+ if 'options_applied' in log.keys():
+ msg += log_msg['options_applied']
+ for option in log['options_applied']:
+ msg += " - " + option + "\n"
+ if 'suppressed_styles' in log.keys():
+ if log['suppressed_styles']:
+ msg += log_msg['suppressed_styles']
+ for style_name in log['suppressed_styles']:
+ msg += " - " + style_name + "\n"
+ if log['modified_styles']:
+ msg += log_msg['modified_styles']
+ for style, log_s in log['modified_styles']:
+ msg += log_msg['modified_style'] + style + "\n"
+ for modif, _ in log_s:
+ msg += log_msg[modif] + ' '
+ msg += '\n'
+ if log['added_styles']:
+ msg += log_msg['added_styles']
+ for style_name in log['added_styles']:
+ msg += " - " + style_name + "\n"
+ if 'style_mapping' in log.keys():
+ msg = log['style_mapping']
+ if msg:
+ if doc not in docs_seen:
+ msg = log_msg['document'] + doc.name + '\n' + msg
+ docs_seen.append(doc)
+ log_messages.append(msg)
+ msg = ''
+ log_messages_str = '\n'.join(log_messages)
+ return log_messages_str
+
diff --git a/src/view/style_components.py b/src/view/style_components.py
new file mode 100644
index 0000000000000000000000000000000000000000..e6225bb3206af3d3e27c42ee832d782f54492824
--- /dev/null
+++ b/src/view/style_components.py
@@ -0,0 +1,11 @@
+import gradio as gr
+
+
+import config
+
+
+def input_files_fn1(input_files_):
+ update_ = {
+ output_files_comp: gr.update(visible=True)
+ } if input_files_ else {}
+ return update_
\ No newline at end of file
diff --git a/src/view/test_view.py b/src/view/test_view.py
new file mode 100644
index 0000000000000000000000000000000000000000..cb845ba4c6f6ef2bd66c3e988014808155e1ef45
--- /dev/null
+++ b/src/view/test_view.py
@@ -0,0 +1,35 @@
+import gradio as gr
+import random
+
+with gr.Blocks() as test:
+ list_2 = ["choix21", "choix 22", "et choix 23"]
+ with gr.Row():
+ with gr.Accordion("See Details") as grac:
+ gr.Markdown("lorem ipsum")
+ hide_btn = gr.Button("hide")
+ show_btn = gr.Button("show")
+
+ def hide_fn():
+ update_ = {
+ grac: gr.update(open=False)
+ }
+ return update_
+
+ def show_fn():
+ update_ = {
+ grac: gr.update(open=True)
+ }
+ return update_
+
+ hide_btn.click(hide_fn,
+ inputs=[],
+ outputs=[grac])
+ show_btn.click(show_fn,
+ inputs=[],
+ outputs=[grac])
+
+
+
+
+if __name__ == "__main__":
+ test.launch()
diff --git a/src/view/view.py b/src/view/view.py
new file mode 100644
index 0000000000000000000000000000000000000000..da9c4e53a839f2c3dd503253a1da6cd7115120a7
--- /dev/null
+++ b/src/view/view.py
@@ -0,0 +1,404 @@
+import gradio as gr
+from typing import Dict
+import asyncio
+import os
+from src.control.controller import Controller
+import tkinter as ttk
+from Levenshtein import distance
+from src.tools.list_tool import keep_last_occurrences
+
+
+def run(config: Dict, controller: Controller):
+
+ """
+ =====================================================
+ Global variables
+ ================
+ """
+ controller.clear_docs()
+ #can u make me a title for the app and add a gradio component for it using the css above
+ title = "
Automatic Document Generation
"
+ with gr.Blocks() as formatdoc:
+ gr.Markdown(title)
+ gr.Markdown("_________________________________________
")
+ with gr.Row():
+ with gr.Column():
+ pass
+ with gr.Column(scale=10):
+ """
+ =====================================================
+ Input and style components
+ ==========================
+ """
+
+ input_files_comp = gr.File(file_count="multiple", file_types=[".docx"])
+
+ with gr.Accordion("Modifier automatiquement les styles", open=False) as style_acc:
+ templates_radio = gr.Radio(
+ label="Templates",
+ choices=config['templates'],
+ value=config['templates'][config['default_template_index']],
+ )
+ options_btn = gr.CheckboxGroup(choices=config['options'],
+ label="Options",
+ interactive=True,)
+
+ with gr.Accordion("Mapper les styles qui n'existent pas dans le template", open=False) \
+ as newstyles_acc:
+ with gr.Column(scale=2):
+ newstyle_comps = [gr.Dropdown(visible=False, interactive=True)
+ for _ in range(config['max_styles'])]
+
+ log_comp = gr.Textbox(label="Journal des modifications", visible=False)
+
+ output_styles_files_comp = gr.File(file_count="multiple", file_types=[".docx"], visible=False)
+
+ with gr.Row():
+ run_style_btn = gr.Button("Appliquer le template et les modifications de style", visible=False)
+ clear_style_btn = gr.Button("Annuler les modifications de style", visible=False)
+
+ """
+ ===============================================
+ Generation components
+ ======================
+ """
+ with gr.Accordion("Générer automatiquement une premiÚre version du document", open=False) as gen_acc:
+
+ generate_option_btn = gr.Radio(
+ label="Automatically generate a draft based on your own database",
+ choices=["Auto generation", "No generation"],
+ value="No generation",
+ interactive=True,
+ visible=False,
+ )
+
+ db_list_comp = gr.CheckboxGroup(
+ label="Base de connaissance",
+ info="Ces documents constituent la source de référence. Désélectionner pour qu'ils ne soient "
+ "pas pris en compte lors de la génération automatiqueF",
+ visible=True,
+ interactive=True,
+ )
+ db_reset_btn = gr.Button("Effacer la base de connaissance", visible=False) \
+ .style(full_width=False, size="sm")
+ with gr.Accordion("Ajouter des documents dans la base de connaissance", open=False):
+ with gr.Column(visible=True, variant="panel") as add_col:
+ with gr.Tab("Depuis Wikipedia"):
+ wiki_fetch_btn = gr.Button("Rechercher les pages Wikipedia", visible=True)
+ wiki_fetch_btn.style(full_width=False, size="sm")
+ wiki_list_comp = gr.CheckboxGroup(
+ label="Sélectionner les pages à ajouter dans la base de connaissance",
+ visible=False,
+ interactive=True,
+ )
+
+ with gr.Column():
+ wiki_add_to_db_btn = \
+ gr.Button("Ajouter les documents sélectionnés à la base de connaissance",
+ visible=False)
+ wiki_add_to_db_btn.style(full_width=False, size="sm")
+
+ wiki_clear_btn = gr.Button("Effacer les choix de documents", visible=False) \
+ .style(full_width=False, size="sm")
+
+ with gr.Tab("Depuis le disque local"):
+ my_files_list_comp = gr.Files(
+ label="Charger ses documents",
+ info="Les documents fournissent le contexte utilisé pour la génération de texte",
+ visible=True,
+ )
+ my_files_add_to_db_btn = gr.Button("Add files to sources", visible=False)
+ my_files_add_to_db_btn.style(full_width=False, size="sm")
+
+ add_close_btn = gr.Button("Close", visible=False).style(size='sm', full_width=False)
+ with gr.Row():
+ db_add_doc_btn = gr.Button("Ajouter de nouveaux documents", visible=False)\
+ .style(full_width=False, size="sm")
+
+ output_files_comp = gr.Files(file_count="multiple", visible=False)
+
+ generate_btn = gr.Button("Générer", interactive=True)
+
+ clear_btn = gr.Button('Nettoyer', visible=False)
+ rerun_btn = gr.Button('Relancer', visible=False)
+
+ with gr.Column():
+ pass
+
+ """
+ ===================================================
+ state variables
+ ===============
+ """
+ wiki_source_var: [str] = gr.State([]) # list of wikipage titles of interest for the input text tasks
+ wiki_db_var: [str] = gr.State([]) # list of wiki document titles in the db (as seen from the UI)
+ my_files_db_var: [str] = gr.State([]) # list of titles of the files uploaded in the db (as seen from the UI)
+ db_collection_var: str = gr.State("-1") # name of the collection of documents sources in the db # list of styles to modify
+
+ """
+ ===================================================
+ Input and styles functions and listeners
+ ========================================
+ """
+
+ def input_files_upload_fn(input_files_):
+ for files in input_files_:
+ if(not files.name.endswith('.docx')):
+ raise gr.Error(f'File {files.name} is not a docx file, please upload only docx files')
+ else:
+ continue
+ controller.copy_docs(input_files_)
+ update_ = {
+ newstyles_acc: gr.update(open=True),
+ style_acc: gr.update(open=False,visible=True),
+ run_style_btn: gr.update(visible=True),
+ clear_style_btn: gr.update(visible=True),
+ }
+ newstyles_update = newstyles_fn()
+ update_.update(newstyles_update)
+ return update_
+
+ input_files_comp.upload(input_files_upload_fn,
+ inputs=[input_files_comp],
+ outputs=[style_acc, newstyles_acc, run_style_btn, clear_style_btn] + newstyle_comps
+ )
+
+ def input_file_clear_fn():
+ controller.clear_docs()
+ update_ = {
+ options_btn: gr.update(value=[]),
+ log_comp: gr.update(value="", visible=False),
+ output_styles_files_comp: gr.update(value=[], visible=False),
+ newstyles_acc: gr.update(open=False),
+ style_acc: gr.update(open=False),
+ gen_acc: gr.update(open=False),
+ output_files_comp: gr.update(visible=False),
+ run_style_btn: gr.update(visible=False),
+ clear_style_btn: gr.update(visible=False),
+ }
+ newstyles_update_ = newstyles_reset()
+ update_.update(newstyles_update_)
+ return update_
+
+ input_files_comp.clear(
+ input_file_clear_fn,
+ inputs=[],
+ outputs=[options_btn, output_styles_files_comp, output_files_comp, log_comp, newstyles_acc,
+ gen_acc, style_acc, run_style_btn, clear_style_btn] + newstyle_comps
+ )
+
+
+
+ def newstyles_fn():
+ different_styles, template_styles = controller.get_difference_with_template()
+ update_ = {}
+ get_label = lambda i: f"document: {different_styles[i]['doc'].name} style: {different_styles[i]['style']}"
+ newstyles_update_ = {
+ newstyle_comps[i]: gr.update(visible=i < len(different_styles),
+ #sort the styles using levenstein distance function
+ choices=sorted(template_styles, key=lambda x: distance(x, different_styles[i]['style'])),
+ value=None,
+ label=get_label(i)) if i < len(different_styles) else ''
+ for i in range(config['max_styles'])
+ }
+ update_.update(newstyles_update_)
+ return update_
+
+
+ def newstyles_reset():
+ update_ = {
+ newstyle_comps[i]: gr.update(visible=False,
+ choices=[],
+ value=None,
+ label='')
+ for i in range(config['max_styles'])
+ }
+ return update_
+
+ def templates_fn(templates_):
+ controller.set_template(templates_)
+ update_ = newstyles_fn()
+ return update_
+
+ templates_radio.change(templates_fn,
+ inputs=[templates_radio],
+ outputs=newstyle_comps)
+
+ def newstyle_fns(src_index: int):
+ def newstyle_fn(newstyle_):
+ controller.update_style(src_index, newstyle_)
+ return newstyle_fn
+
+ for src_index, newstyle_comp in enumerate(newstyle_comps):
+ newstyle_comp.input(newstyle_fns(src_index), inputs=[newstyle_comp], outputs=[])
+
+
+ def clear_style_fn(input_files_):
+ controller.clear_docs()
+ if input_files_:
+ controller.copy_docs(input_files_)
+ controller.set_template()
+ update_ = {
+ options_btn: gr.update(value=[]),
+ log_comp: gr.update(value="", visible=False),
+ output_styles_files_comp: gr.update(value=[], visible=False),
+ newstyles_acc: gr.update(open=False),
+ run_style_btn: gr.update(visible=True),
+ }
+ newstyles_update_ = newstyles_fn()
+ update_.update(newstyles_update_)
+ return update_
+
+ clear_style_btn.click(clear_style_fn,
+ inputs=[input_files_comp],
+ outputs=[options_btn, output_styles_files_comp, log_comp, newstyles_acc, run_style_btn]
+ + newstyle_comps
+ )
+
+ def run_style_fn(options_btn_):
+ print(f"options activated : {options_btn_}")
+ controller.apply_template(options_btn_)
+ log = controller.get_log()
+ new_docs_path = controller.generated_docs_path
+ output_paths = [f"{new_docs_path}/{f}" for f in os.listdir(new_docs_path)]
+ print(f"output_paths: {output_paths}")
+ update_ = {
+ log_comp: gr.update(value=log, visible=True),
+ output_styles_files_comp: gr.update(value=output_paths, visible=True),
+ run_style_btn: gr.update(visible=False),
+ }
+ return update_
+
+
+ run_style_btn.click(run_style_fn,
+ inputs=[options_btn],
+ outputs=[log_comp, output_styles_files_comp, run_style_btn] + newstyle_comps)
+
+ """
+ =====================================================
+ Generation functions
+ ====================
+ """
+
+ def generate_option_fn(db_collection_):
+ id_ = controller.get_or_create_collection(db_collection_)
+ update_ = {
+ db_collection_var: id_
+ }
+ return update_
+
+ def wiki_fetch1_fn():
+ """
+ fetch the wikifiles interesting for solving the tasks as defined in the input doc
+ """
+ update_ = {
+ wiki_list_comp: gr.update(visible=True),
+ }
+ return update_
+
+ async def wiki_fetch2_fn():
+ """
+ fetch the wikifiles interesting for solving the tasks as defined in the input doc
+ """
+ wiki_interesting_files = await controller.wiki_fetch()
+ wiki_files = wiki_interesting_files # [w for w in wiki_interesting_files if w not in wiki_db_files_]
+ update_ = {
+ wiki_list_comp: gr.update(visible=True, value=[], choices=wiki_files),
+ wiki_source_var: wiki_interesting_files,
+ wiki_add_to_db_btn: gr.update(visible=True),
+ # wiki_clear_btn: gr.update(visible=True), #Button to clear the choices that are by default all ticked
+ }
+ return update_
+
+ async def wiki_add_to_db_fn(wiki_list_, wiki_source_, wiki_db_, db_list_, db_collection_):
+ """
+ adds the wikipages to the db source
+ """
+ wiki_to_add = [wiki for wiki in wiki_list_ if wiki not in wiki_db_]
+ db_list_ += wiki_to_add
+ wiki_db_ += wiki_to_add
+ wiki_source_remaining = [wiki for wiki in wiki_source_ if wiki not in wiki_db_]
+ async_upload_and_store_tasks = [asyncio.create_task(controller.wiki_upload_and_store(wiki, db_collection_)) for wiki in wiki_to_add] # A DEPLACER DANS LE CONTROLLER
+ await asyncio.gather(*async_upload_and_store_tasks)
+ db_not_empty = 0 < len(db_list_)
+ wiki_to_add_not_empty = 0 < len(wiki_source_remaining)
+ update_ = {
+ wiki_db_var: wiki_db_,
+ wiki_list_comp: gr.update(value=[], choices=wiki_source_remaining),
+ wiki_add_to_db_btn: gr.update(visible=wiki_to_add_not_empty),
+ db_list_comp: gr.update(
+ visible=True,
+ value=db_list_,
+ choices=db_list_,
+ label="Database content"),
+ db_reset_btn: gr.update(visible=db_not_empty),
+ generate_btn: gr.update(visible=True, interactive=db_not_empty),
+ }
+ return update_
+
+ def generate_fn1():
+ update_ = {
+ output_files_comp: gr.update(visible=True)
+ }
+ return update_
+
+ async def generate_fn2(db_collection_, db_list_):
+ output_files = await controller.generate_doc_from_db(collection_name=db_collection_,
+ from_files=db_list_)
+ update_ = {
+ output_files_comp: gr.update(value=output_files, visible=True),
+ }
+ return update_
+
+
+ """
+ =====================================================
+ Generation listeners
+ ====================
+ """
+
+ wiki_fetch_btn \
+ .click(wiki_fetch1_fn, inputs=[], outputs=[wiki_list_comp]) \
+ .then(wiki_fetch2_fn,
+ inputs=[],
+ outputs=[wiki_list_comp, wiki_source_var, wiki_add_to_db_btn, wiki_clear_btn])
+
+ wiki_add_to_db_btn\
+ .click(generate_option_fn,
+ inputs=[db_collection_var],
+ outputs=[db_collection_var])\
+ .then(wiki_add_to_db_fn,
+ inputs=[wiki_list_comp, wiki_source_var, wiki_db_var, db_list_comp, db_collection_var],
+ outputs=[db_list_comp, wiki_list_comp, wiki_db_var,
+ generate_btn, wiki_add_to_db_btn, db_reset_btn])
+
+ generate_btn\
+ .click(generate_fn1,
+ inputs=[],
+ outputs=[output_files_comp])\
+ .then(generate_fn2,
+ inputs=[db_collection_var, db_list_comp],
+ outputs=[output_files_comp])
+
+
+ """
+ =====================================================
+ Clear and rerun functions and listeners
+ =======================================
+ """
+
+ def clear_fn():
+ update_ = {
+ input_files_comp: gr.update(value=None),
+ output_files_comp: gr.update(value=None, visible=False),
+ clear_btn: gr.update(visible=False),
+ rerun_btn: gr.update(visible=False),
+ }
+ return update_
+
+ clear_btn.click(clear_fn,
+ inputs=[],
+ outputs=[input_files_comp, output_files_comp, clear_btn, rerun_btn])
+
+ # wiki_clear_btn.click(clear_choices_fn, inputs=[], outputs=[wiki_list_comp]) #listener for the clear button of the wiki choices
+ return formatdoc
diff --git a/test_app.py b/test_app.py
new file mode 100644
index 0000000000000000000000000000000000000000..2b2d7ab2d078c4a767e956dd442b17ecb9ad1eae
--- /dev/null
+++ b/test_app.py
@@ -0,0 +1,67 @@
+import docx
+from docx.enum.style import WD_STYLE_TYPE
+import os
+from config import config
+from typing import Dict
+import random
+import datetime
+import string
+
+from lxml import etree
+
+from src.domain.doc import Doc
+
+
+
+
+name = 'CorpTemplate.docx'
+
+template_path = config['templates_path'] + '/' + config['templates'][config['default_template_index']]
+template = Doc(template_path)
+doc_path = config['these_docs_path'] + name
+this_doc = Doc(path=doc_path)
+new_doc_path = config['new_docs_path'] + this_doc.name + '_.docx'
+new_doc = this_doc.copy(new_doc_path)
+
+
+
+
+new_styles = new_doc.styles.xstyles
+print(etree.tostring(new_styles['.Titre1'].element))
+names = new_doc.styles.names
+print(names)
+new_doc.save_as_docx()
+
+
+s = template.styles.xstyles['.BodyText']
+# new_styles.add_style(s.name, WD_STYLE_TYPE.PARAGRAPH)
+
+
+list_styles = [(s, s.name) for s in template.styles.xstyles if s.type==WD_STYLE_TYPE.LIST]
+
+
+base_styles_set = set()
+for s in new_styles:
+ if s.type == 1:
+ if s.base_style:
+ try:
+ base_styles_set.add(s.base_style.name)
+ except:
+ print(f"failure for {s}")
+
+
+base_styles = list(base_styles_set)
+
+
+
+
+"""
+or p in new_doc.xdoc.paragraphs:
+ if p.style == new_styles['_newBody__2']:
+ p.style = s.name
+
+new_styles['_newBody__2'].delete()
+new_doc.save_as_docx()
+"""
+pass
+etree.tostring(list_styles[1][0].element)
\ No newline at end of file
diff --git a/unit_test.py b/unit_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..a14694441d3b845959f69116ec25b63b72bff960
--- /dev/null
+++ b/unit_test.py
@@ -0,0 +1,66 @@
+import unittest
+import os
+
+from src.domain.doc import Doc
+from src.domain.styles import Styles
+from src.domain.container import Container
+from src.domain.paragraph import Paragraph
+
+
+def test_centered_tables(doc):
+ for table in doc.xdoc.tables:
+ if table.alignment != 1:
+ return False
+ return True
+
+
+class Test(unittest.TestCase):
+
+ def test_centered_tables(self):
+ doctotest = Doc(path="test/files_to_test/tables/is_centered.docx")
+ tempdoc = doctotest.copy("test/files_to_test/tables/is_centered_copy.docx")
+ tempdoc.save_as_docx()
+ self.assertFalse(test_centered_tables(doctotest))
+ doctotest.center_tables()
+ doctotest.save_as_docx()
+ self.assertTrue(test_centered_tables(doctotest))
+ os.remove(doctotest.path)
+ tempdoc.save_as_docx("test/files_to_test/tables/is_centered.docx")
+
+ def test_centered_tables_within_text(self):
+ doctotest = Doc(path="test/files_to_test/tables/centered_within_text.docx")
+ tempdoc = doctotest.copy("test/files_to_test/tables/centered_within_text_copy.docx")
+ tempdoc.save_as_docx()
+ self.assertFalse(test_centered_tables(doctotest))
+ doctotest.center_tables()
+ doctotest.save_as_docx()
+ self.assertTrue(test_centered_tables(doctotest))
+ os.remove(doctotest.path)
+ tempdoc.save_as_docx("test/files_to_test/tables/centered_within_text.docx")
+
+ def test_noimage(self):
+ counter = 0
+ doctest = Doc(path="test/files_to_test/images/0_image.docx")
+ for p in doctest.get_paragraphs():
+ if p.contains_image():
+ counter += 1
+ self.assertEqual(counter, 0)
+
+ def test_containsimage(self):
+ counter = 0
+ doctest = Doc(path="test/files_to_test/images/1_image.docx")
+ for p in doctest.get_paragraphs():
+ if p.contains_image():
+ counter += 1
+ self.assertEqual(counter, 1)
+
+ def test_someimages(self):
+ counter = 0
+ doctest = Doc(path="test/files_to_test/images/2_images.docx")
+ for p in doctest.get_paragraphs():
+ if p.contains_image():
+ counter += 1
+ self.assertEqual(counter, 2)
+
+if __name__ == '__main__':
+ unittest.main()
\ No newline at end of file