Spaces:

Hexamind
/

GenProp

Runtime error

App Files Files Community

adrien.aribaut-gaudin commited on Jan 24, 2024

Commit

498db6b

1 Parent(s): 47ca6bf

feat: new public GenProp

Browse files

Files changed (36) hide show

.gitattributes +1 -0
.gitignore +6 -0
app.py +14 -0
config.py +28 -0
data/doc.xml +46 -0
data/templates/Template_presentation.docx +3 -0
requirements.txt +0 -0
src/control/controller.py +285 -0
src/domain/block.py +71 -0
src/domain/container.py +219 -0
src/domain/container_requirements.py +140 -0
src/domain/doc.py +473 -0
src/domain/paragraph.py +140 -0
src/domain/requirements_paragraphs.py +41 -0
src/domain/styles.py +164 -0
src/domain/wikidoc.py +128 -0
src/llm/llm_tools.py +337 -0
src/llm/llms.py +15 -0
src/model/block.py +49 -0
src/model/container.py +143 -0
src/model/doc.py +54 -0
src/model/paragraph.py +50 -0
src/reader/reader_for_requirements.py +143 -0
src/retriever/retriever.py +198 -0
src/tools/doc_tools.py +73 -0
src/tools/index_creation.py +72 -0
src/tools/list_tool.py +17 -0
src/tools/paragraph_tools.py +45 -0
src/tools/pretty_print.py +12 -0
src/tools/semantic_db.py +70 -0
src/tools/wiki.py +61 -0
src/view/log_msg.py +47 -0
src/view/style_components.py +9 -0
src/view/test_view.py +34 -0
src/view/view.py +533 -0
temp/generated_files/file.txt +0 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+*.docx filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,6 @@

+__pycache__
+venv1
+test/files_to_test/*
+config_key.py
+test
+.env

app.py ADDED Viewed

	@@ -0,0 +1,14 @@

+from config import config
+from src.control.controller import Controller
+import src.view.view as view
+import chromadb
+from src.retriever.retriever import Retriever
+client_db = chromadb.Client()
+ctrl = Controller(config, client_db, retriever=Retriever())
+app = view.run(controller=ctrl, config=config)
+app.queue().launch()

config.py ADDED Viewed

	@@ -0,0 +1,28 @@

+import os
+config = {
+    'templates_path': 'data/templates',
+    'these_docs_path': 'data/examples/',
+    'new_docs_path': 'data/examples/',
+    'default_template_index': 0,
+    'styled_docs_path': 'temp/styles_files',
+    'generated_docs_path': 'temp/generated_files',
+    'options': ["Recentrer les tableaux", "Justifier le texte (Normal)"],
+    'max_styles': 300,
+    'log_msg': {
+        'options_applied': 'Les options suivantes ont été appliquées : \n',
+        'suppressed_styles': 'Les styles suivants ont été supprimés : \n',
+        'modified_styles': 'Les styles suivants ont été modifiés : \n',
+        'added_styles': 'Les styles suivants ont été ajoutés :\n',
+        'modified_style': '  - ',
+        'color': ' la couleur,',
+        'font size': ' la taille de la fonte,',
+        'font': ' la fonte,',
+        'all_caps': ' les majuscules,',
+        'bold': 'le caractère gras',
+        'document': '\n============================\n Sur le document : ',
+    },
+}
+templates = [t for t in os.listdir(config['templates_path']) if t.endswith((".docx"))]
+config.update({'templates': templates})

data/doc.xml ADDED Viewed

	@@ -0,0 +1,46 @@

+<w:document xmlns:wpc="http://schemas.microsoft.com/office/word/2010/wordprocessingCanvas" xmlns:cx="http://schemas.microsoft.com/office/drawing/2014/chartex" xmlns:cx1="http://schemas.microsoft.com/office/drawing/2015/9/8/chartex" xmlns:cx2="http://schemas.microsoft.com/office/drawing/2015/10/21/chartex" xmlns:cx3="http://schemas.microsoft.com/office/drawing/2016/5/9/chartex" xmlns:cx4="http://schemas.microsoft.com/office/drawing/2016/5/10/chartex" xmlns:cx5="http://schemas.microsoft.com/office/drawing/2016/5/11/chartex" xmlns:cx6="http://schemas.microsoft.com/office/drawing/2016/5/12/chartex" xmlns:cx7="http://schemas.microsoft.com/office/drawing/2016/5/13/chartex" xmlns:cx8="http://schemas.microsoft.com/office/drawing/2016/5/14/chartex" xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006" xmlns:aink="http://schemas.microsoft.com/office/drawing/2016/ink" xmlns:am3d="http://schemas.microsoft.com/office/drawing/2017/model3d" xmlns:o="urn:schemas-microsoft-com:office:office" xmlns:oel="http://schemas.microsoft.com/office/2019/extlst" xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships" xmlns:m="http://schemas.openxmlformats.org/officeDocument/2006/math" xmlns:v="urn:schemas-microsoft-com:vml" xmlns:wp14="http://schemas.microsoft.com/office/word/2010/wordprocessingDrawing" xmlns:wp="http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing" xmlns:w10="urn:schemas-microsoft-com:office:word" xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main" xmlns:w14="http://schemas.microsoft.com/office/word/2010/wordml" xmlns:w15="http://schemas.microsoft.com/office/word/2012/wordml" xmlns:w16cex="http://schemas.microsoft.com/office/word/2018/wordml/cex" xmlns:w16cid="http://schemas.microsoft.com/office/word/2016/wordml/cid" xmlns:w16="http://schemas.microsoft.com/office/word/2018/wordml" xmlns:w16sdtdh="http://schemas.microsoft.com/office/word/2020/wordml/sdtdatahash" xmlns:w16se="http://schemas.microsoft.com/office/word/2015/wordml/symex" xmlns:wpg="http://schemas.microsoft.com/office/word/2010/wordprocessingGroup" xmlns:wpi="http://schemas.microsoft.com/office/word/2010/wordprocessingInk" xmlns:wne="http://schemas.microsoft.com/office/word/2006/wordml" xmlns:wps="http://schemas.microsoft.com/office/word/2010/wordprocessingShape" mc:Ignorable="w14 w15 w16se w16cid w16 w16cex w16sdtdh wp14">
+    <w:body>
+        <w:tbl>
+            <w:tblPr>
+                <w:tblpPr w:leftFromText="141" w:rightFromText="141" w:vertAnchor="page" w:tblpY="3001"/>
+                <w:tblOverlap w:val="never"/>
+                <w:tblW w:w="7597" w:type="dxa"/>
+                <w:tblLayout w:type="fixed"/>
+                <w:tblCellMar><w:left w:w="0" w:type="dxa"/>
+                    <w:right w:w="0" w:type="dxa"/>
+                </w:tblCellMar><w:tblLook w:val="0000" w:firstRow="0" w:lastRow="0" w:firstColumn="0" w:lastColumn="0" w:noHBand="0" w:noVBand="0"/>
+            </w:tblPr>
+            <w:tblGrid>
+                <w:gridCol w:w="7597"/>
+            </w:tblGrid>
+            <w:tr w:rsidR="008F20A4" w:rsidRPr="00C335CE" w14:paraId="58F81C33" w14:textId="77777777" w:rsidTr="00C4517C">
+            <w:trPr><w:cantSplit/>
+                <w:trHeight w:hRule="exact" w:val="397"/>
+            </w:trPr><w:tc>
+            <w:tcPr>
+            <w:tcW w:w="7597" w:type="dxa"/>
+            <w:shd w:val="clear" w:color="auto" w:fill="auto"/>
+            </w:tcPr><w:p w14:paraId="18CBAEA8" w14:textId="77777777" w:rsidR="008F20A4" w:rsidRPr="00C335CE" w:rsidRDefault="008F20A4" w:rsidP="00C4517C">
+            <w:pPr><w:pStyle w:val="BodyText"/></w:pPr><w:r w:rsidRPr="00C335CE"><w:t>R&#233;ponse &#224;</w:t></w:r></w:p></w:tc>
+        </w:tr><w:tr w:rsidR="008F20A4" w:rsidRPr="00C335CE" w14:paraId="6B21A2A2" w14:textId="77777777" w:rsidTr="00C4517C">
+            <w:trPr><w:cantSplit/>
+                <w:trHeight w:hRule="exact" w:val="851"/>
+            </w:trPr><w:tc><w:tcPr><w:tcW w:w="7597" w:type="dxa"/>
+            <w:shd w:val="clear" w:color="auto" w:fill="auto"/><w:vAlign w:val="bottom"/></w:tcPr>
+            <w:p w14:paraId="415EC112" w14:textId="77777777" w:rsidR="008F20A4" w:rsidRPr="00C335CE" w:rsidRDefault="008F20A4" w:rsidP="00DE680A">
+                <w:pPr><w:pStyle w:val="CompanyName"/>
+                    <w:framePr w:hSpace="0" w:wrap="auto" w:vAnchor="margin" w:hAnchor="text" w:xAlign="left" w:yAlign="inline"/>
+                    <w:suppressOverlap w:val="0"/></w:pPr><w:r w:rsidRPr="00C335CE">
+                <w:t>Nom du Client</w:t>
+            </w:r>
+            </w:p>
+        </w:tc>
+        </w:tr>
+            <w:tr w:rsidR="008F20A4" w:rsidRPr="00C335CE" w14:paraId="4CECECE0" w14:textId="77777777" w:rsidTr="00C4517C">
+            <w:trPr><w:cantSplit/><w:trHeight w:hRule="exact" w:val="397"/>
+            </w:trPr>
+                <w:tc>
+                <w:tcPr>
+                <w:tcW w:w="7597" w:type="dxa"/>
+            <w:shd w:val="clear" w:color="auto" w:fill="auto"/>
+                </w:tcPr><w:p w14:paraId="04690B8E" w14:textId="77777777" w:rsidR="008F20A4" w:rsidRPr="00C335CE" w:rsidRDefault="008F20A4" w:rsidP="00C4517C"><w:pPr><w:pStyle w:val="BodyText"/></w:pPr><w:proofErr w:type="gramStart"/><w:r w:rsidRPr="00C335CE"><w:t>pour</w:t></w:r><w:proofErr w:type="gramEnd"/><w:r w:rsidRPr="00C335CE"><w:t xml:space="preserve"> le</w:t></w:r></w:p></w:tc></w:tr><w:tr w:rsidR="008F20A4" w:rsidRPr="00C335CE" w14:paraId="10E37A3B" w14:textId="77777777" w:rsidTr="00C4517C"><w:trPr><w:cantSplit/><w:trHeight w:hRule="exact" w:val="1871"/></w:trPr><w:tc><w:tcPr><w:tcW w:w="7597" w:type="dxa"/><w:shd w:val="clear" w:color="auto" w:fill="auto"/><w:vAlign w:val="bottom"/></w:tcPr><w:p w14:paraId="3848203F" w14:textId="77777777" w:rsidR="008F20A4" w:rsidRPr="00C335CE" w:rsidRDefault="008F20A4" w:rsidP="00011EBE"><w:pPr><w:pStyle w:val="ProjectNumber"/><w:framePr w:hSpace="0" w:wrap="auto" w:hAnchor="text" w:xAlign="left" w:yAlign="inline"/></w:pPr><w:r w:rsidRPr="00C335CE"><w:t>Style pour cette page seulement (non recopi&#233; en en-t&#234;te)</w:t></w:r></w:p><w:p w14:paraId="4209E6AE" w14:textId="77777777" w:rsidR="008F20A4" w:rsidRPr="00C335CE" w:rsidRDefault="008F20A4" w:rsidP="00011EBE"><w:pPr><w:pStyle w:val="ProjectName"/><w:framePr w:hSpace="0" w:wrap="auto" w:vAnchor="margin" w:hAnchor="text" w:xAlign="left" w:yAlign="inline"/><w:suppressOverlap w:val="0"/></w:pPr><w:r w:rsidRPr="00C335CE"><w:t>Nom du projet</w:t></w:r></w:p></w:tc></w:tr><w:tr w:rsidR="008F20A4" w:rsidRPr="00C335CE" w14:paraId="5CF53648" w14:textId="77777777" w:rsidTr="00C4517C"><w:trPr><w:cantSplit/><w:trHeight w:hRule="exact" w:val="397"/></w:trPr><w:tc><w:tcPr><w:tcW w:w="7597" w:type="dxa"/><w:shd w:val="clear" w:color="auto" w:fill="auto"/></w:tcPr><w:p w14:paraId="01D4D4D2" w14:textId="77777777" w:rsidR="008F20A4" w:rsidRPr="00C335CE" w:rsidRDefault="008F20A4" w:rsidP="00C4517C"><w:pPr><w:pStyle w:val="BodyText"/></w:pPr><w:r w:rsidRPr="00C335CE"><w:t>Date de remise</w:t></w:r></w:p></w:tc></w:tr><w:tr w:rsidR="008F20A4" w:rsidRPr="00C335CE" w14:paraId="47C4D541" w14:textId="77777777" w:rsidTr="00C4517C"><w:trPr><w:cantSplit/><w:trHeight w:hRule="exact" w:val="397"/></w:trPr><w:tc><w:tcPr><w:tcW w:w="7597" w:type="dxa"/><w:shd w:val="clear" w:color="auto" w:fill="auto"/><w:vAlign w:val="bottom"/></w:tcPr><w:p w14:paraId="463058A0" w14:textId="77777777" w:rsidR="008F20A4" w:rsidRPr="00C335CE" w:rsidRDefault="008F20A4" w:rsidP="00C4517C"><w:pPr><w:pStyle w:val="DateDue"/><w:framePr w:hSpace="0" w:wrap="auto" w:vAnchor="margin" w:hAnchor="text" w:xAlign="left" w:yAlign="inline"/><w:suppressOverlap w:val="0"/></w:pPr><w:r w:rsidRPr="00C335CE"><w:t>JJ/MM/AAAA</w:t></w:r></w:p></w:tc></w:tr><w:tr w:rsidR="008F20A4" w:rsidRPr="00C335CE" w14:paraId="6452573F" w14:textId="77777777" w:rsidTr="00C4517C"><w:trPr><w:cantSplit/><w:trHeight w:hRule="exact" w:val="340"/></w:trPr><w:tc><w:tcPr><w:tcW w:w="7597" w:type="dxa"/><w:shd w:val="clear" w:color="auto" w:fill="auto"/><w:vAlign w:val="bottom"/></w:tcPr><w:p w14:paraId="6536045D" w14:textId="77777777" w:rsidR="008F20A4" w:rsidRPr="00C335CE" w:rsidRDefault="008F20A4" w:rsidP="00C4517C"><w:pPr><w:pStyle w:val="Classification"/><w:framePr w:hSpace="0" w:wrap="auto" w:vAnchor="margin" w:hAnchor="text" w:xAlign="left" w:yAlign="inline"/><w:suppressOverlap w:val="0"/></w:pPr><w:r w:rsidRPr="00C335CE"><w:rPr><w:sz w:val="18"/></w:rPr><w:t>Strictement confidentiel</w:t></w:r></w:p></w:tc></w:tr></w:tbl><w:p w14:paraId="45EA0891" w14:textId="77777777" w:rsidR="007132BD" w:rsidRPr="00C335CE" w:rsidRDefault="005A01BC" w:rsidP="00891B8F"><w:pPr><w:pStyle w:val="documentControl"/></w:pPr><w:r w:rsidRPr="00C335CE"><w:rPr><w:noProof/><w:lang w:eastAsia="fr-FR"/></w:rPr><w:drawing><wp:anchor distT="0" distB="0" distL="114300" distR="114300" simplePos="0" relativeHeight="251659264" behindDoc="1" locked="0" layoutInCell="0" allowOverlap="0" wp14:anchorId="4A040FE9" wp14:editId="18E34965"><wp:simplePos x="0" y="0"/><wp:positionH relativeFrom="margin"><wp:align>right</wp:align></wp:positionH><wp:positionV relativeFrom="margin"><wp:align>bottom</wp:align></wp:positionV><wp:extent cx="6768000" cy="6786000"/><wp:effectExtent l="0" t="0" r="0" b="0"/><wp:wrapNone/><wp:docPr id="11" name="Image 11"/><wp:cNvGraphicFramePr><a:graphicFrameLocks xmlns:a="http://schemas.openxmlformats.org/drawingml/2006/main" noChangeAspect="1"/></wp:cNvGraphicFramePr><a:graphic xmlns:a="http://schemas.openxmlformats.org/drawingml/2006/main"><a:graphicData uri="http://schemas.openxmlformats.org/drawingml/2006/picture"><pic:pic xmlns:pic="http://schemas.openxmlformats.org/drawingml/2006/picture"><pic:nvPicPr><pic:cNvPr id="0" name="Picture 2" descr="Page garde offre"/><pic:cNvPicPr><a:picLocks noChangeAspect="1" noChangeArrowheads="1"/></pic:cNvPicPr></pic:nvPicPr><pic:blipFill><a:blip r:embed="rId11"><a:extLst><a:ext uri="{28A0092B-C50C-407E-A947-70E740481C1C}"><a14:useLocalDpi xmlns:a14="http://schemas.microsoft.com/office/drawing/2010/main" val="0"/></a:ext></a:extLst></a:blip><a:stretch><a:fillRect/></a:stretch></pic:blipFill><pic:spPr bwMode="auto"><a:xfrm><a:off x="0" y="0"/><a:ext cx="6768000" cy="6786000"/></a:xfrm><a:prstGeom prst="rect"><a:avLst/></a:prstGeom><a:noFill/><a:ln><a:noFill/></a:ln></pic:spPr></pic:pic></a:graphicData></a:graphic><wp14:sizeRelH relativeFrom="page"><wp14:pctWidth>0</wp14:pctWidth></wp14:sizeRelH><wp14:sizeRelV relativeFrom="page"><wp14:pctHeight>0</wp14:pctHeight></wp14:sizeRelV></wp:anchor></w:drawing></w:r><w:r w:rsidRPr="00C335CE"><w:br w:type="page"/></w:r><w:proofErr w:type="gramStart"/><w:r w:rsidR="00B9348B" w:rsidRPr="00C335CE"><w:lastRenderedPageBreak/><w:t>clause</w:t></w:r><w:proofErr w:type="gramEnd"/><w:r w:rsidR="00B9348B" w:rsidRPr="00C335CE"><w:t xml:space="preserve"> de confidentialit&#233;</w:t></w:r></w:p><w:p w14:paraId="3E57861B" w14:textId="77777777" w:rsidR="007132BD" w:rsidRPr="00C335CE" w:rsidRDefault="00115837" w:rsidP="00891B8F"><w:pPr><w:pStyle w:val="ProprietaryNoticeText"/><w:rPr><w:color w:val="595959"/></w:rPr></w:pPr><w:r w:rsidRPr="00C335CE"><w:rPr><w:color w:val="595959"/></w:rPr><w:t xml:space="preserve">Toute </w:t></w:r><w:r w:rsidR="007132BD" w:rsidRPr="00C335CE"><w:rPr><w:color w:val="595959"/></w:rPr><w:t xml:space="preserve">information </w:t></w:r><w:r w:rsidRPr="00C335CE"><w:rPr><w:color w:val="595959"/></w:rPr><w:t xml:space="preserve">contenue dans ce </w:t></w:r><w:r w:rsidR="007132BD" w:rsidRPr="00C335CE"><w:rPr><w:color w:val="595959"/></w:rPr><w:t xml:space="preserve">document </w:t></w:r><w:r w:rsidRPr="00C335CE"><w:rPr><w:color w:val="595959"/></w:rPr><w:t xml:space="preserve">strictement confidentiel est fournie &#224; </w:t></w:r><w:r w:rsidR="007132BD" w:rsidRPr="00C335CE"><w:rPr><w:color w:val="595959"/></w:rPr><w:fldChar w:fldCharType="begin"/></w:r><w:r w:rsidR="007132BD" w:rsidRPr="00C335CE"><w:rPr><w:color w:val="595959"/></w:rPr><w:instrText xml:space="preserve"> STYLEREF  .CompanyName  \\* MERGEFORMAT </w:instrText></w:r><w:r w:rsidR="007132BD" w:rsidRPr="00C335CE"><w:rPr><w:color w:val="595959"/></w:rPr><w:fldChar w:fldCharType="separate"/></w:r><w:r w:rsidR="000C19BE"><w:rPr><w:noProof/><w:color w:val="595959"/></w:rPr><w:t>Nom du Client</w:t></w:r><w:r w:rsidR="007132BD" w:rsidRPr="00C335CE"><w:rPr><w:color w:val="595959"/></w:rPr><w:fldChar w:fldCharType="end"/></w:r><w:r w:rsidR="007132BD" w:rsidRPr="00C335CE"><w:rPr><w:color w:val="595959"/></w:rPr><w:t xml:space="preserve"> </w:t></w:r><w:r w:rsidRPr="00C335CE"><w:rPr><w:color w:val="595959"/></w:rPr><w:t>dans le seul but de r&#233;pondre &#224; ses demandes et ne peut &#234;tre utilis&#233;</w:t></w:r><w:r w:rsidR="007A159B" w:rsidRPr="00C335CE"><w:rPr><w:color w:val="595959"/></w:rPr><w:t>e</w:t></w:r><w:r w:rsidRPr="00C335CE"><w:rPr><w:color w:val="595959"/></w:rPr><w:t xml:space="preserve"> &#224; d&#8217;autres fins.</w:t></w:r></w:p><w:p w14:paraId="3EC09512" w14:textId="77777777" w:rsidR="007132BD" w:rsidRPr="00C335CE" w:rsidRDefault="007132BD" w:rsidP="007132BD"><w:pPr><w:pStyle w:val="ProprietaryNoticeText"/><w:rPr><w:color w:val="595959"/></w:rPr></w:pPr><w:r w:rsidRPr="00C335CE"><w:rPr><w:color w:val="595959"/></w:rPr><w:fldChar w:fldCharType="begin"/></w:r><w:r w:rsidRPr="00C335CE"><w:rPr><w:color w:val="595959"/></w:rPr><w:instrText xml:space="preserve"> STYLEREF  .CompanyName  \\* MERGEFORMAT </w:instrText></w:r><w:r w:rsidRPr="00C335CE"><w:rPr><w:color w:val="595959"/></w:rPr><w:fldChar w:fldCharType="separate"/></w:r><w:r w:rsidR="000C19BE"><w:rPr><w:noProof/><w:color w:val="595959"/></w:rPr><w:t>Nom du Client</w:t></w:r><w:r w:rsidRPr="00C335CE"><w:rPr><w:color w:val="595959"/></w:rPr><w:fldChar w:fldCharType="end"/></w:r><w:r w:rsidRPr="00C335CE"><w:rPr><w:color w:val="595959"/></w:rPr><w:t xml:space="preserve"> </w:t></w:r><w:r w:rsidR="00115837" w:rsidRPr="00C335CE"><w:rPr><w:color w:val="595959"/></w:rPr><w:t>s&#8217;engage &#224; ne pas publier ni faire conna&#238;tre tout ou partie de ces informations &#224; quelque tierce partie que ce soit sans l&#8217;autorisation pr&#233;alable d&#8217;</w:t></w:r><w:r w:rsidR="00712552" w:rsidRPr="00C335CE"><w:rPr><w:color w:val="595959"/></w:rPr><w:t>Orange</w:t></w:r><w:r w:rsidRPr="00C335CE"><w:rPr><w:color w:val="595959"/></w:rPr><w:t>.</w:t></w:r></w:p><w:p w14:paraId="7D23B684" w14:textId="77777777" w:rsidR="007132BD" w:rsidRPr="00C335CE" w:rsidRDefault="007132BD" w:rsidP="007132BD"><w:pPr><w:pStyle w:val="ProprietaryNoticeText"/><w:rPr><w:color w:val="595959"/></w:rPr></w:pPr><w:r w:rsidRPr="00C335CE"><w:rPr><w:color w:val="595959"/></w:rPr><w:t>&#169; copyright 201</w:t></w:r><w:r w:rsidR="00DA1A27"><w:rPr><w:color w:val="595959"/></w:rPr><w:t>8</w:t></w:r></w:p><w:p w14:paraId="5E1DE421" w14:textId="77777777" w:rsidR="007132BD" w:rsidRPr="00C335CE" w:rsidRDefault="00115837" w:rsidP="00C21D48"><w:pPr><w:pStyle w:val="ProprietaryNoticeText"/><w:spacing w:after="2800"/><w:rPr><w:color w:val="595959"/></w:rPr></w:pPr><w:r w:rsidRPr="00C335CE"><w:rPr><w:color w:val="595959"/></w:rPr><w:t>Tous droits r&#233;serv&#233;s</w:t></w:r></w:p><w:p w14:paraId="3BC5C1B2" w14:textId="77777777" w:rsidR="00AF69C7" w:rsidRPr="00C335CE" w:rsidRDefault="00AF69C7" w:rsidP="00DA1A27"><w:pPr><w:pStyle w:val="PointsContact"/><w:spacing w:before="6000"/></w:pPr><w:proofErr w:type="gramStart"/><w:r w:rsidRPr="00C335CE"><w:t>votre</w:t></w:r><w:proofErr w:type="gramEnd"/><w:r w:rsidRPr="00C335CE"><w:t xml:space="preserve"> contact</w:t></w:r></w:p><w:tbl><w:tblPr><w:tblW w:w="8505" w:type="dxa"/><w:tblInd w:w="85" w:type="dxa"/><w:tblBorders><w:top w:val="single" w:sz="4" w:space="0" w:color="808080"/><w:left w:val="single" w:sz="4" w:space="0" w:color="808080"/><w:bottom w:val="single" w:sz="4" w:space="0" w:color="808080"/><w:right w:val="single" w:sz="4" w:space="0" w:color="808080"/><w:insideH w:val="single" w:sz="4" w:space="0" w:color="808080"/><w:insideV w:val="single" w:sz="4" w:space="0" w:color="808080"/></w:tblBorders><w:tblLayout w:type="fixed"/><w:tblCellMar><w:left w:w="85" w:type="dxa"/><w:right w:w="85" w:type="dxa"/></w:tblCellMar><w:tblLook w:val="0000" w:firstRow="0" w:lastRow="0" w:firstColumn="0" w:lastColumn="0" w:noHBand="0" w:noVBand="0"/></w:tblPr><w:tblGrid><w:gridCol w:w="1095"/><w:gridCol w:w="3264"/><w:gridCol w:w="900"/><w:gridCol w:w="3246"/></w:tblGrid><w:tr w:rsidR="00AF69C7" w:rsidRPr="00C335CE" w14:paraId="64609664" w14:textId="77777777" w:rsidTr="009564E8"><w:trPr><w:cantSplit/></w:trPr><w:tc><w:tcPr><w:tcW w:w="1095" w:type="dxa"/><w:vAlign w:val="center"/></w:tcPr><w:p w14:paraId="2CA49F3C" w14:textId="77777777" w:rsidR="00AF69C7" w:rsidRPr="00C335CE" w:rsidRDefault="00AF69C7" w:rsidP="00FF5DD0"><w:pPr><w:pStyle w:val="CoordonnesContacts"/></w:pPr><w:r w:rsidRPr="00C335CE"><w:t>Nom</w:t></w:r><w:r w:rsidR="00FF5DD0" w:rsidRPr="00C335CE"><w:t xml:space="preserve"> </w:t></w:r><w:r w:rsidRPr="00C335CE"><w:t>:</w:t></w:r></w:p></w:tc><w:tc><w:tcPr><w:tcW w:w="7410" w:type="dxa"/><w:gridSpan w:val="3"/><w:vAlign w:val="center"/></w:tcPr><w:p w14:paraId="427D84E8" w14:textId="77777777" w:rsidR="00AF69C7" w:rsidRPr="00C335CE" w:rsidRDefault="00AF69C7" w:rsidP="00AF69C7"><w:pPr><w:pStyle w:val="CoordonnesContacts"/></w:pPr></w:p></w:tc></w:tr><w:tr w:rsidR="00AF69C7" w:rsidRPr="00C335CE" w14:paraId="04E8E560" w14:textId="77777777" w:rsidTr="009564E8"><w:tc><w:tcPr><w:tcW w:w="1095" w:type="dxa"/><w:vAlign w:val="center"/></w:tcPr><w:p w14:paraId="6876598F" w14:textId="77777777" w:rsidR="00AF69C7" w:rsidRPr="00C335CE" w:rsidRDefault="00AF69C7" w:rsidP="00FF5DD0"><w:pPr><w:pStyle w:val="CoordonnesContacts"/></w:pPr><w:r w:rsidRPr="00C335CE"><w:t>Titre</w:t></w:r><w:r w:rsidR="00FF5DD0" w:rsidRPr="00C335CE"><w:t xml:space="preserve"> </w:t></w:r><w:r w:rsidRPr="00C335CE"><w:t>:</w:t></w:r></w:p></w:tc><w:tc><w:tcPr><w:tcW w:w="3264" w:type="dxa"/><w:vAlign w:val="center"/></w:tcPr><w:p w14:paraId="1F56BDFC" w14:textId="77777777" w:rsidR="00AF69C7" w:rsidRPr="00C335CE" w:rsidRDefault="00AF69C7" w:rsidP="00AF69C7"><w:pPr><w:pStyle w:val="CoordonnesContacts"/></w:pPr></w:p></w:tc><w:tc><w:tcPr><w:tcW w:w="900" w:type="dxa"/><w:vAlign w:val="center"/></w:tcPr><w:p w14:paraId="5DC329C3" w14:textId="77777777" w:rsidR="00AF69C7" w:rsidRPr="00C335CE" w:rsidRDefault="00FF5DD0" w:rsidP="00AF69C7"><w:pPr><w:pStyle w:val="CoordonnesContacts"/></w:pPr><w:proofErr w:type="gramStart"/><w:r w:rsidRPr="00C335CE"><w:t>Email</w:t></w:r><w:proofErr w:type="gramEnd"/><w:r w:rsidRPr="00C335CE"><w:t xml:space="preserve"> </w:t></w:r><w:r w:rsidR="00AF69C7" w:rsidRPr="00C335CE"><w:t>:</w:t></w:r></w:p></w:tc><w:tc><w:tcPr><w:tcW w:w="3246" w:type="dxa"/><w:vAlign w:val="center"/></w:tcPr><w:p w14:paraId="2326070B" w14:textId="77777777" w:rsidR="00AF69C7" w:rsidRPr="00C335CE" w:rsidRDefault="00AF69C7" w:rsidP="00AF69C7"><w:pPr><w:pStyle w:val="CoordonnesContacts"/></w:pPr><w:r w:rsidRPr="00C335CE"><w:t>@orange.com</w:t></w:r></w:p></w:tc></w:tr><w:tr w:rsidR="00AF69C7" w:rsidRPr="00C335CE" w14:paraId="470FE1CB" w14:textId="77777777" w:rsidTr="009564E8"><w:tc><w:tcPr><w:tcW w:w="1095" w:type="dxa"/><w:vAlign w:val="center"/></w:tcPr><w:p w14:paraId="75B44613" w14:textId="77777777" w:rsidR="00AF69C7" w:rsidRPr="00C335CE" w:rsidRDefault="00AF69C7" w:rsidP="00FF5DD0"><w:pPr><w:pStyle w:val="CoordonnesContacts"/></w:pPr><w:r w:rsidRPr="00C335CE"><w:t>T&#233;l</w:t></w:r><w:r w:rsidR="00FF5DD0" w:rsidRPr="00C335CE"><w:t xml:space="preserve"> </w:t></w:r><w:r w:rsidRPr="00C335CE"><w:t>:</w:t></w:r></w:p></w:tc><w:tc><w:tcPr><w:tcW w:w="3264" w:type="dxa"/><w:vAlign w:val="center"/></w:tcPr><w:p w14:paraId="1FF13B91" w14:textId="77777777" w:rsidR="00AF69C7" w:rsidRPr="00C335CE" w:rsidRDefault="00AF69C7" w:rsidP="00AF69C7"><w:pPr><w:pStyle w:val="CoordonnesContacts"/></w:pPr></w:p></w:tc><w:tc><w:tcPr><w:tcW w:w="900" w:type="dxa"/><w:vAlign w:val="center"/></w:tcPr><w:p w14:paraId="0486FC7A" w14:textId="77777777" w:rsidR="00AF69C7" w:rsidRPr="00C335CE" w:rsidRDefault="00AF69C7" w:rsidP="00FF5DD0"><w:pPr><w:pStyle w:val="CoordonnesContacts"/></w:pPr><w:r w:rsidRPr="00C335CE"><w:t>Mobile</w:t></w:r><w:r w:rsidR="00FF5DD0" w:rsidRPr="00C335CE"><w:t xml:space="preserve"> </w:t></w:r><w:r w:rsidRPr="00C335CE"><w:t>:</w:t></w:r></w:p></w:tc><w:tc><w:tcPr><w:tcW w:w="3246" w:type="dxa"/><w:vAlign w:val="center"/></w:tcPr><w:p w14:paraId="49907CC8" w14:textId="77777777" w:rsidR="00AF69C7" w:rsidRPr="00C335CE" w:rsidRDefault="00AF69C7" w:rsidP="00AF69C7"><w:pPr><w:pStyle w:val="CoordonnesContacts"/></w:pPr></w:p></w:tc></w:tr><w:tr w:rsidR="00AF69C7" w:rsidRPr="00C335CE" w14:paraId="071A9502" w14:textId="77777777" w:rsidTr="009564E8"><w:trPr><w:cantSplit/></w:trPr><w:tc><w:tcPr><w:tcW w:w="1095" w:type="dxa"/><w:vAlign w:val="center"/></w:tcPr><w:p w14:paraId="4755FA1E" w14:textId="77777777" w:rsidR="00AF69C7" w:rsidRPr="00C335CE" w:rsidRDefault="00D221F1" w:rsidP="00FF5DD0"><w:pPr><w:pStyle w:val="CoordonnesContacts"/></w:pPr><w:r w:rsidRPr="00C335CE"><w:t>A</w:t></w:r><w:r w:rsidR="00AF69C7" w:rsidRPr="00C335CE"><w:t>dresse</w:t></w:r><w:r w:rsidR="00FF5DD0" w:rsidRPr="00C335CE"><w:t xml:space="preserve"> </w:t></w:r><w:r w:rsidR="00AF69C7" w:rsidRPr="00C335CE"><w:t>:</w:t></w:r></w:p></w:tc><w:tc><w:tcPr><w:tcW w:w="7410" w:type="dxa"/><w:gridSpan w:val="3"/><w:vAlign w:val="center"/></w:tcPr><w:p w14:paraId="486934C7" w14:textId="77777777" w:rsidR="00AF69C7" w:rsidRPr="00C335CE" w:rsidRDefault="00AF69C7" w:rsidP="00AF69C7"><w:pPr><w:pStyle w:val="CoordonnesContacts"/></w:pPr></w:p></w:tc></w:tr><w:tr w:rsidR="00AF69C7" w:rsidRPr="00C335CE" w14:paraId="74B2F971" w14:textId="77777777" w:rsidTr="009564E8"><w:trPr><w:cantSplit/></w:trPr><w:tc><w:tcPr><w:tcW w:w="1095" w:type="dxa"/><w:vAlign w:val="center"/></w:tcPr><w:p w14:paraId="21D3119F" w14:textId="77777777" w:rsidR="00AF69C7" w:rsidRPr="00C335CE" w:rsidRDefault="00AF69C7" w:rsidP="00FF5DD0"><w:pPr><w:pStyle w:val="CoordonnesContacts"/></w:pPr><w:r w:rsidRPr="00C335CE"><w:t>Site Web</w:t></w:r><w:r w:rsidR="00FF5DD0" w:rsidRPr="00C335CE"><w:t xml:space="preserve"> </w:t></w:r><w:r w:rsidRPr="00C335CE"><w:t>:</w:t></w:r></w:p></w:tc><w:tc><w:tcPr><w:tcW w:w="7410" w:type="dxa"/><w:gridSpan w:val="3"/><w:vAlign w:val="center"/></w:tcPr><w:p w14:paraId="3957B477" w14:textId="77777777" w:rsidR="00AF69C7" w:rsidRPr="00C335CE" w:rsidRDefault="00C168A5" w:rsidP="00C168A5"><w:pPr><w:pStyle w:val="CoordonnesContacts"/></w:pPr><w:r w:rsidRPr="00C335CE"><w:t>http://www.</w:t></w:r><w:r w:rsidR="00AF69C7" w:rsidRPr="00C335CE"><w:t>orange-business.com</w:t></w:r></w:p></w:tc></w:tr></w:tbl><w:p w14:paraId="11185772" w14:textId="77777777" w:rsidR="008A617E" w:rsidRPr="00C335CE" w:rsidRDefault="008A617E" w:rsidP="008A617E"><w:pPr><w:pStyle w:val="BodyText"/></w:pPr></w:p><w:p w14:paraId="6DDF5966" w14:textId="77777777" w:rsidR="007132BD" w:rsidRPr="00C335CE" w:rsidRDefault="007132BD" w:rsidP="008A617E"><w:pPr><w:pStyle w:val="BodyText"/><w:sectPr w:rsidR="007132BD" w:rsidRPr="00C335CE" w:rsidSect="002F63F5"><w:headerReference w:type="even" r:id="rId12"/><w:headerReference w:type="default" r:id="rId13"/><w:footerReference w:type="even" r:id="rId14"/><w:footerReference w:type="default" r:id="rId15"/><w:headerReference w:type="first" r:id="rId16"/><w:footerReference w:type="first" r:id="rId17"/><w:pgSz w:w="11906" w:h="16838" w:code="9"/><w:pgMar w:top="720" w:right="720" w:bottom="720" w:left="720" w:header="0" w:footer="0" w:gutter="0"/><w:cols w:space="708"/><w:titlePg/><w:docGrid w:linePitch="360"/></w:sectPr></w:pPr></w:p><w:p w14:paraId="2C75859B" w14:textId="77777777" w:rsidR="007132BD" w:rsidRPr="00C335CE" w:rsidRDefault="007132BD" w:rsidP="00C168A5"><w:pPr><w:pStyle w:val="STitre1"/></w:pPr><w:r w:rsidRPr="00C335CE"><w:lastRenderedPageBreak/><w:t xml:space="preserve">Table </w:t></w:r><w:r w:rsidR="00FA453C" w:rsidRPr="00C335CE"><w:t>des</w:t></w:r><w:r w:rsidRPr="00C335CE"><w:t xml:space="preserve"> </w:t></w:r><w:r w:rsidR="00FA453C" w:rsidRPr="00C335CE"><w:t>mati&#232;res</w:t></w:r></w:p><w:p w14:paraId="33936659" w14:textId="77777777" w:rsidR="007132BD" w:rsidRPr="00C335CE" w:rsidRDefault="00000000" w:rsidP="00F16138"><w:pPr><w:pStyle w:val="TM1"/></w:pPr><w:r><w:fldChar w:fldCharType="begin"/></w:r><w:r><w:instrText xml:space="preserve"> TOC \\o "1-3" \\h \\z \\u </w:instrText></w:r><w:r><w:fldChar w:fldCharType="separate"/></w:r><w:r w:rsidR="001F5250" w:rsidRPr="00C335CE"><w:rPr><w:noProof/></w:rPr><w:t xml:space="preserve">Aucune entr&#233;e de table des </w:t></w:r><w:r w:rsidR="001F5250" w:rsidRPr="00C335CE"><w:t>mati&#232;res</w:t></w:r><w:r w:rsidR="001F5250" w:rsidRPr="00C335CE"><w:rPr><w:noProof/></w:rPr><w:t xml:space="preserve"> n\'a &#233;t&#233; trouv&#233;e.</w:t></w:r><w:r><w:rPr><w:noProof/></w:rPr><w:fldChar w:fldCharType="end"/></w:r></w:p><w:p w14:paraId="38CA3268" w14:textId="77777777" w:rsidR="009A37C6" w:rsidRPr="00C335CE" w:rsidRDefault="009A37C6" w:rsidP="0048016E"><w:pPr><w:pStyle w:val="STitre1"/></w:pPr><w:r w:rsidRPr="00C335CE"><w:t>Liste des tableaux</w:t></w:r></w:p><w:p w14:paraId="3FFCA3F6" w14:textId="77777777" w:rsidR="009A37C6" w:rsidRPr="00C335CE" w:rsidRDefault="009A37C6" w:rsidP="002F4EEA"><w:pPr><w:pStyle w:val="Tabledesillustrations"/><w:rPr><w:lang w:val="fr-FR"/></w:rPr></w:pPr><w:r w:rsidRPr="00C335CE"><w:rPr><w:lang w:val="fr-FR"/></w:rPr><w:fldChar w:fldCharType="begin"/></w:r><w:r w:rsidRPr="00C335CE"><w:rPr><w:lang w:val="fr-FR"/></w:rPr><w:instrText xml:space="preserve"> TOC \\h \\z \\c "Tableau" </w:instrText></w:r><w:r w:rsidRPr="00C335CE"><w:rPr><w:lang w:val="fr-FR"/></w:rPr><w:fldChar w:fldCharType="separate"/></w:r><w:r w:rsidR="002F4EEA" w:rsidRPr="00C335CE"><w:rPr><w:lang w:val="fr-FR"/></w:rPr><w:t>Aucune entr&#233;e de table d\'illustration n\'a &#233;t&#233; trouv&#233;e.</w:t></w:r><w:r w:rsidRPr="00C335CE"><w:rPr><w:lang w:val="fr-FR"/></w:rPr><w:fldChar w:fldCharType="end"/></w:r></w:p><w:p w14:paraId="5A2AE896" w14:textId="77777777" w:rsidR="009A37C6" w:rsidRPr="00C335CE" w:rsidRDefault="009A37C6" w:rsidP="0048016E"><w:pPr><w:pStyle w:val="STitre1"/></w:pPr><w:r w:rsidRPr="00C335CE"><w:t>Liste des figures</w:t></w:r></w:p><w:p w14:paraId="3B29A2A0" w14:textId="77777777" w:rsidR="009A37C6" w:rsidRPr="00C335CE" w:rsidRDefault="009A37C6" w:rsidP="002F4EEA"><w:pPr><w:pStyle w:val="Tabledesillustrations"/><w:rPr><w:lang w:val="fr-FR"/></w:rPr></w:pPr><w:r w:rsidRPr="00C335CE"><w:rPr><w:lang w:val="fr-FR"/></w:rPr><w:fldChar w:fldCharType="begin"/></w:r><w:r w:rsidRPr="00C335CE"><w:rPr><w:lang w:val="fr-FR"/></w:rPr><w:instrText xml:space="preserve"> TOC \\h \\z \\c "Figure" </w:instrText></w:r><w:r w:rsidRPr="00C335CE"><w:rPr><w:lang w:val="fr-FR"/></w:rPr><w:fldChar w:fldCharType="separate"/></w:r><w:r w:rsidR="001F5250" w:rsidRPr="00C335CE"><w:rPr><w:lang w:val="fr-FR"/></w:rPr><w:t>Aucune entr&#233;e de table d\'illustration n\'a &#233;t&#233; trouv&#233;e.</w:t></w:r><w:r w:rsidRPr="00C335CE"><w:rPr><w:lang w:val="fr-FR"/></w:rPr><w:fldChar w:fldCharType="end"/></w:r></w:p><w:p w14:paraId="4750AFB0" w14:textId="77777777" w:rsidR="00F36D8E" w:rsidRPr="00C335CE" w:rsidRDefault="00F36D8E" w:rsidP="00F36D8E"/><w:p w14:paraId="7D70CB6D" w14:textId="77777777" w:rsidR="000836AE" w:rsidRPr="00C335CE" w:rsidRDefault="000836AE" w:rsidP="000836AE"><w:pPr><w:pStyle w:val="BodyText"/><w:sectPr w:rsidR="000836AE" w:rsidRPr="00C335CE" w:rsidSect="005302A5"><w:headerReference w:type="even" r:id="rId18"/><w:headerReference w:type="default" r:id="rId19"/><w:footerReference w:type="even" r:id="rId20"/><w:footerReference w:type="default" r:id="rId21"/><w:headerReference w:type="first" r:id="rId22"/><w:footerReference w:type="first" r:id="rId23"/><w:pgSz w:w="11906" w:h="16838" w:code="9"/><w:pgMar w:top="720" w:right="720" w:bottom="720" w:left="720" w:header="0" w:footer="0" w:gutter="0"/><w:cols w:space="708"/><w:docGrid w:linePitch="360"/></w:sectPr></w:pPr></w:p><w:p w14:paraId="6B39024A" w14:textId="77777777" w:rsidR="001E4CDD" w:rsidRPr="00ED1502" w:rsidRDefault="001E4CDD" w:rsidP="00ED1502"><w:pPr><w:pStyle w:val="BodyText"/></w:pPr></w:p><w:p w14:paraId="0519CE18" w14:textId="3F897D6A" w:rsidR="0048016E" w:rsidRDefault="00ED038F" w:rsidP="00ED038F"><w:pPr><w:pStyle w:val="Titre10"/><w:rPr><w:rStyle w:val="TexteOrange"/></w:rPr></w:pPr><w:proofErr w:type="spellStart"/><w:r><w:rPr><w:rStyle w:val="TexteOrange"/></w:rPr><w:lastRenderedPageBreak/><w:t>Ccc</w:t></w:r><w:proofErr w:type="spellEnd"/></w:p><w:p w14:paraId="3BC69584" w14:textId="48227D67" w:rsidR="00ED038F" w:rsidRDefault="00ED038F" w:rsidP="00ED038F"><w:pPr><w:pStyle w:val="Titre20"/></w:pPr><w:proofErr w:type="spellStart"/><w:r><w:t>Qsdd</w:t></w:r><w:proofErr w:type="spellEnd"/></w:p><w:p w14:paraId="747EE9A7" w14:textId="5DFB2DB0" w:rsidR="00ED038F" w:rsidRDefault="00947006" w:rsidP="00845F4B"><w:pPr><w:pStyle w:val="BodyText"/><w:rPr><w:rStyle w:val="TexteOrange"/></w:rPr></w:pPr><w:proofErr w:type="spellStart"/><w:r w:rsidRPr="00845F4B"><w:rPr><w:rStyle w:val="TexteOrange"/></w:rPr><w:t>Dsbvbvn</w:t></w:r><w:proofErr w:type="spellEnd"/></w:p><w:p w14:paraId="21497A0D" w14:textId="77777777" w:rsidR="00947006" w:rsidRPr="00845F4B" w:rsidRDefault="00947006" w:rsidP="00845F4B"><w:pPr><w:pStyle w:val="BodyText"/><w:rPr><w:rStyle w:val="TexteOrange"/></w:rPr></w:pPr></w:p><w:p w14:paraId="383106B5" w14:textId="5ACDB52E" w:rsidR="00947006" w:rsidRDefault="00947006" w:rsidP="00947006"><w:pPr><w:pStyle w:val="Bullet1"/><w:rPr><w:rStyle w:val="TexteOrange"/></w:rPr></w:pPr><w:proofErr w:type="spellStart"/><w:r><w:rPr><w:rStyle w:val="TexteOrange"/></w:rPr><w:t>Fezjfz</w:t></w:r><w:proofErr w:type="spellEnd"/></w:p><w:p w14:paraId="2C331B80" w14:textId="3A7D6E23" w:rsidR="00947006" w:rsidRDefault="00947006" w:rsidP="00947006"><w:pPr><w:pStyle w:val="Bullet1"/><w:rPr><w:rStyle w:val="TexteOrange"/></w:rPr></w:pPr><w:proofErr w:type="spellStart"/><w:r><w:rPr><w:rStyle w:val="TexteOrange"/></w:rPr><w:t>Jzekkfjk</w:t></w:r><w:proofErr w:type="spellEnd"/></w:p><w:p w14:paraId="603BA9F8" w14:textId="32546CFA" w:rsidR="00947006" w:rsidRPr="00845F4B" w:rsidRDefault="00845F4B" w:rsidP="00845F4B"><w:pPr><w:pStyle w:val="BodyText"/><w:rPr><w:rStyle w:val="TexteOrange"/></w:rPr></w:pPr><w:r w:rsidRPr="00845F4B"><w:rPr><w:rStyle w:val="TexteOrange"/></w:rPr><w:t xml:space="preserve"> </w:t></w:r><w:proofErr w:type="spellStart"/><w:r w:rsidRPr="00845F4B"><w:rPr><w:rStyle w:val="TexteOrange"/></w:rPr><w:t>Nf</w:t></w:r><w:proofErr w:type="spellEnd"/><w:r w:rsidRPr="00845F4B"><w:rPr><w:rStyle w:val="TexteOrange"/></w:rPr><w:t xml:space="preserve"> </w:t></w:r><w:proofErr w:type="spellStart"/><w:r w:rsidRPr="00845F4B"><w:rPr><w:rStyle w:val="TexteOrange"/></w:rPr><w:t>nvf</w:t></w:r><w:proofErr w:type="spellEnd"/><w:r w:rsidRPr="00845F4B"><w:rPr><w:rStyle w:val="TexteOrange"/></w:rPr><w:t xml:space="preserve"> </w:t></w:r><w:proofErr w:type="spellStart"/><w:proofErr w:type="gramStart"/><w:r w:rsidRPr="00845F4B"><w:rPr><w:rStyle w:val="TexteOrange"/></w:rPr><w:t>z,v</w:t></w:r><w:proofErr w:type="spellEnd"/><w:proofErr w:type="gramEnd"/><w:r w:rsidRPr="00845F4B"><w:rPr><w:rStyle w:val="TexteOrange"/></w:rPr><w:t>$</w:t></w:r></w:p><w:p w14:paraId="1E6020A0" w14:textId="77777777" w:rsidR="00845F4B" w:rsidRPr="00845F4B" w:rsidRDefault="00845F4B" w:rsidP="00845F4B"><w:pPr><w:pStyle w:val="BodyText"/><w:rPr><w:rStyle w:val="TexteOrange"/></w:rPr></w:pPr></w:p><w:sectPr w:rsidR="00845F4B" w:rsidRPr="00845F4B" w:rsidSect="00502252"><w:headerReference w:type="default" r:id="rId24"/><w:footerReference w:type="default" r:id="rId25"/><w:pgSz w:w="11906" w:h="16838" w:code="9"/><w:pgMar w:top="720" w:right="720" w:bottom="720" w:left="720" w:header="0" w:footer="0" w:gutter="0"/><w:cols w:space="708"/><w:docGrid w:linePitch="360"/></w:sectPr></w:body></w:document>

data/templates/Template_presentation.docx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:93d264f72e69de63159803b9676a6d28e30946b478151f802f135798a2a71f71
+size 146771

requirements.txt ADDED Viewed

Binary file (5.38 kB). View file

src/control/controller.py ADDED Viewed

	@@ -0,0 +1,285 @@

+import asyncio
+import os
+from typing import Dict
+import random
+import datetime
+import string
+import docx
+from src.tools.doc_tools import get_title
+from src.domain.doc import Doc
+from src.domain.wikidoc import WikiPage
+from src.view.log_msg import create_msg_from
+import src.tools.semantic_db as semantic_db
+from src.tools.wiki import Wiki
+from src.llm.llm_tools import get_wikilist, get_public_paragraph, get_private_paragraph
+from src.tools.semantic_db import add_texts_to_collection, query_collection
+import gradio as gr
+from src.retriever.retriever import Retriever
+class Controller:
+    def __init__(self, config: Dict, client_db, retriever):
+        self.templates_path = config['templates_path']
+        self.generated_docs_path = config['generated_docs_path']
+        self.styled_docs_path = config['styled_docs_path']
+        self.new_docs = []
+        self.gen_docs = []
+        self.input_csv = ""
+        template_path = config['templates_path'] + '/' + config['templates'][config['default_template_index']]
+        self.default_template = Doc(template_path)
+        self.template = self.default_template
+        self.log = []
+        self.differences = []
+        self.list_differences = []
+        self.client_db = client_db
+        self.retriever = retriever
+    def copy_docs(self, temp_docs: []):
+        """
+        Initial copy of the incoming document
+        +
+        create collection for requirments retrieval
+        +
+        Initiate paths
+        TODO: Rename or refactor the function ->  1 mission / function
+        TODO: To be tested on several documents
+        TODO: Rename create_collection in create_requirement_collection
+        """
+        doc_names = [doc.name for doc in temp_docs]
+        for i in range(len(doc_names)):
+            if '/' in doc_names[i]:
+                doc_names[i] = doc_names[i].split('/')[-1]
+            elif '\\' in doc_names[i]:
+                doc_names[i] = doc_names[i].split('\\')[-1]
+            doc_names[i] = doc_names[i].split('.')[0]
+        docs = [Doc(path=doc.name) for doc in temp_docs]
+        self.create_collection(docs)
+        style_paths = [f"{self.generated_docs_path}/{dn}_.docx" for dn in doc_names]
+        gen_paths = [f"{self.generated_docs_path}/{dn}_e.docx" for dn in doc_names]
+        for doc, style_path, gen_path in zip(docs, style_paths, gen_paths):
+            new_doc = doc.copy(style_path)
+            self.new_docs.append(new_doc)
+    def clear_docs(self):
+        for new_doc in self.new_docs:
+            if os.path.exists(new_doc.path):
+                new_doc.clear()
+        for gen_doc in self.gen_docs:
+            if os.path.exists(gen_doc.path):
+                gen_doc.clear()
+        self.new_docs = []
+        self.gen_docs = []
+        self.log = []
+        path_to_clear = os.path.abspath(self.generated_docs_path)
+        [os.remove(f"{path_to_clear}/{doc}") for doc in os.listdir(path_to_clear)]
+    def set_template(self, template_name: str = ""):
+        if not template_name:
+            self.template = self.default_template
+        else:
+            template_path = f"{self.templates_path}/{template_name}"
+            self.template = Doc(template_path)
+    def add_template(self, template_path: str):
+        """
+        TODO: message to be but in config
+        """
+        if not template_path:
+            return
+        elif not template_path.name.endswith(".docx"):
+            gr.Warning("Seuls les fichiers .docx sont acceptés")
+            return
+        doc = docx.Document(template_path.name)
+        doc.save(self.templates_path + '/' + get_title(template_path.name))
+    def delete_curr_template(self, template_name: str):
+        if not template_name:
+            return
+        os.remove(f"{self.templates_path}/{template_name}")
+    def retrieve_number_of_misapplied_styles(self):
+        """
+        not used: buggy !!
+        """
+        res = {}
+        for new_doc in self.new_docs:
+            res[new_doc] = new_doc.retrieve_number_of_misapplied_styles()
+        return res
+    def get_difference_with_template(self):
+        self.differences = []
+        for new_doc in self.new_docs:
+            diff_styles = new_doc.get_different_styles_with_template(template=self.template)
+            diff_dicts = [{'doc': new_doc, 'style': s} for s in diff_styles]
+            self.differences += diff_dicts
+        template_styles = self.template.xdoc.styles
+        template_styles = [style for style in template_styles if style.name in self.template.styles.names]
+        return self.differences, template_styles
+    def get_list_styles(self):
+        self.list_differences = []
+        for new_doc in self.new_docs:
+            list_styles = new_doc.get_list_styles()
+            all_lists_styles = [{'doc': new_doc, 'list_style': s} for s in list_styles]
+            self.list_differences += all_lists_styles
+        return self.list_differences
+    def map_style(self, this_style_index: int, template_style_name: str):
+        """
+        maps a style from 'this' document into a style from the template
+        """
+        #dont make any change if the style is already the same
+        diff_dict = self.differences[this_style_index]
+        doc = diff_dict['doc']
+        this_style_name = diff_dict['style']
+        log = doc.copy_one_style(this_style_name, template_style_name, self.template)
+        if log:
+            self.log.append({doc.name: log})
+    def update_list_style(self, this_style_index: int, template_style_name: str):
+        """
+        maps a style from 'this' document into a style from the template
+        """
+        #dont make any change if the style is already the same
+        diff_dict = self.list_differences[this_style_index]
+        doc = diff_dict['doc']
+        this_style_name = diff_dict['list_style']
+        log = doc.change_bullet_style(this_style_name, template_style_name, self.template)
+        if log:
+            self.log.append({doc.name: log})
+    def update_style(self,index,style_to_modify):
+        return self.map_style(index, style_to_modify) if style_to_modify else None
+    def apply_template(self, options_list):
+        for new_doc in self.new_docs:
+            log = new_doc.apply_template(template=self.template, options_list=options_list)
+            if log:
+                self.log.append({new_doc.name: log})
+    def reset(self):
+        for new_doc in self.new_docs:
+            new_doc.delete()
+        for gen_doc in self.gen_docs:
+            gen_doc.delete()
+        self.new_docs = []
+        self.gen_docs = []
+    def get_log(self):
+        msg_log = create_msg_from(self.log, self.new_docs)
+        return msg_log
+    """
+    Source Control
+    """
+    def get_or_create_collection(self, id_: str) -> str:
+        """
+        generates a new id if needed
+        TODO: rename into get_or_create_generation_collection
+        TODO: have a single DB with separate collections, one for requirements, one for generation
+        """
+        if id_ != '-1':
+            return id_
+        else:
+            now = datetime.datetime.now().strftime("%m%d%H%M")
+            letters = string.ascii_lowercase + string.digits
+            id_ = now + '-' + ''.join(random.choice(letters) for _ in range(10))
+            semantic_db.get_or_create_collection(id_)
+        return id_
+    async def wiki_fetch(self) -> [str]:
+        """
+        returns the title of the wikipages corresponding to the tasks described in the input text
+        """
+        all_tasks = []
+        for new_doc in self.new_docs:
+            all_tasks += new_doc.tasks
+        async_tasks = [asyncio.create_task(get_wikilist(task)) for task in all_tasks]
+        wiki_lists = await asyncio.gather(*async_tasks)
+        flatten_wiki_list = list(set().union(*[set(w) for w in wiki_lists]))
+        return flatten_wiki_list
+    async def wiki_upload_and_store(self, wiki_title: str, collection_name: str):
+        """
+        uploads one wikipage and stores them into the right collection
+        """
+        wikipage = Wiki().fetch(wiki_title)
+        wiki_title = wiki_title
+        if type(wikipage) != str:
+            texts = WikiPage(wikipage.page_content).get_paragraphs()
+            add_texts_to_collection(coll_name=collection_name, texts=texts, file=wiki_title, source='wiki')
+        else:
+            print(wikipage)
+    """
+    Generate Control
+    """
+    async def generate_doc_from_db(self, collection_name: str, from_files: [str]) -> [str]:
+        def query_from_task(task):
+            return get_public_paragraph(task)
+        async def retrieve_text_and_generate(t, collection_name: str, from_files: [str]):
+            """
+            retreives the texts from the database and generates the documents
+            """
+            # retreive the texts from the database
+            task_query = query_from_task(t)
+            texts = query_collection(coll_name=collection_name, query=task_query, from_files=from_files)
+            task_resolutions = get_private_paragraph(task=t, texts=texts)
+            return task_resolutions
+        async def real_doc_generation(new_doc):
+            async_task_resolutions = [asyncio.create_task(retrieve_text_and_generate(t=task, collection_name=collection_name, from_files=from_files))
+                    for task in new_doc.tasks]
+            tasks_resolutions = await asyncio.gather(*async_task_resolutions) #A VOIR
+            gen_path = f"{self.generated_docs_path}/{new_doc.name}e.docx"
+            gen_doc = new_doc.copy(gen_path)
+            gen_doc.replace_tasks(tasks_resolutions)
+            gen_doc.save_as_docx()
+            gen_paths.append(gen_doc.path)
+            self.gen_docs.append(gen_doc)
+            return gen_paths
+        gen_paths = []
+        gen_paths = await asyncio.gather(*[asyncio.create_task(real_doc_generation(new_doc)) for new_doc in self.new_docs])
+        gen_paths = [path for sublist in gen_paths for path in sublist]
+        gen_paths = list(set(gen_paths))
+        return gen_paths
+    """
+    Requirements
+    """
+    def set_input_csv(self, csv_path: str):
+        """
+        TODO: rename to set_requirements_file
+        """
+        self.input_csv = csv_path
+    def create_collection(self, docs: [Doc]):
+        """
+        TODO: rename to create_requirements_collection
+        TODO: merge with semantic tool to have only one DB Object
+        """
+        coll_name = "collection_for_docs"
+        collection = self.client_db.get_or_create_collection(coll_name)
+        for doc in docs:
+            self.fill_collection(doc, collection)
+        self.retriever.collection = collection
+    def fill_collection(self, doc: Doc, collection: str):
+        """
+        fills the collection with the blocks of the documents
+        """
+        Retriever(doc=doc, collection=collection)

src/domain/block.py ADDED Viewed

	@@ -0,0 +1,71 @@

+import math
+class Block:
+    def __init__(self, doc: str = '', title: str = '', content: str = '', content_fr: str = '',
+                 index: str = '', rank: int = 0, level: int = 0, distance: float = 99999):
+        self.doc = doc
+        self.title = title
+        self.title_fr = ""
+        self.content = content
+        self.content_fr = content_fr
+        self.specials = []
+        self.index = index
+        self.rank = rank
+        self.level = level
+        self.distance = distance
+    def separate_1_block_in_n(self, max_size=4500):
+        """
+        Separate a block in n blocks of equal size
+        """
+        content_length = len(self.content)
+        n = math.ceil(content_length / max_size)
+        block_size = content_length // n
+        new_blocks = []
+        for i in range(n):
+            start = i * block_size
+            end = (i + 1) * block_size if i < n - 1 else None
+            new_blocks.append(Block(doc=self.doc,
+                                    title=self.title + f"_part{i}",
+                                    content=self.content[start:end],
+                                    index=self.index + f"_{i}",
+                                    rank=self.rank,
+                                    level=self.level))
+        return new_blocks
+    def to_dict(self) -> {}:
+        block_dict = {'doc': self.doc,
+                      'title': self.title,
+                      'title_fr': self.title_fr,
+                      'content': self.content,
+                      'content_fr': self.content_fr,
+                      'index': self.index,
+                      'rank': self.rank,
+                      'level': self.level,
+                      'distance': self.distance}
+        for i, s in enumerate(self.specials):
+            special_key = 'special_'+str(i)
+            block_dict[special_key] = s
+        block_dict['specials_len'] = len(self.specials)
+        return block_dict
+    def from_dict(self, block_dict: {}):
+        self.doc = block_dict['doc']
+        self.title = block_dict['title']
+        self.title_fr = block_dict['title_fr']
+        self.content = block_dict['content']
+        self.content_fr = block_dict['content_fr']
+        self.index = block_dict['index']
+        self.rank = block_dict['rank']
+        self.level = block_dict['level']
+        self.distance = block_dict['distance']
+        self.specials = []
+        for i in range(block_dict['specials_len']):
+            special_key = 'special_' + str(i)
+            self.specials.append(block_dict[special_key])
+        return self
+    @property
+    def distance_str(self) -> str:
+        return format(self.distance, '.2f')

src/domain/container.py ADDED Viewed

	@@ -0,0 +1,219 @@

+from src.domain.paragraph import Paragraph
+from src.domain.block import Block
+INFINITE = 10000
+class Container:
+    def __init__(self, paragraphs: [Paragraph], title: Paragraph = None, level: int = 0, index: [int] = None,
+                 father=None, id_=0):
+        """
+        should add some summary or infos on content (by a priori generation)
+        """
+        if index is None:
+            index = []
+        self.level = level
+        if not self.level:
+            pass
+        self.title = title
+        self.paragraphs = []
+        self.all_paragraphs = paragraphs
+        self.children = []
+        self.index = index
+        self.father = father  # if not father, then the container is at the top of the hierarchy
+        self.id_ = int(str(1) + str(father.id_) + str(id_))
+        if paragraphs:
+            self.paragraphs, self.children = self.create_children(paragraphs.copy(), level, index)
+        self.containers = [self]
+        for child in self.children:
+            self.containers += child.containers
+        self.blocks = self.get_blocks()
+        self.normal, self.comment, self.task, _ = self.sort_paragraphs()
+        self.one_liner = (self.title.text if self.title else '') + ' ' + self.comment
+        self.root_text = self.one_liner + ' ' + self.normal
+    @property
+    def text(self):
+        text = ""
+        if self.title:
+            text = "Titre " + str(self.level) + " : " + self.title.text + '\n'
+        for p in self.paragraphs:
+                text += p.text + '\n'
+        for child in self.children:
+                text += child.text
+        return text
+    @property
+    def table_of_contents(self):
+        """
+        Not used
+        """
+        toc = []
+        if self.title:
+            toc += [{str(self.level): self.title.text}]
+        if self.children:
+            for child in self.children:
+                toc += child.table_of_contents
+        return toc
+    def move(self, position: int, new_father=None):
+        """
+        Not used
+        """
+        current_father = self.father
+        current_father.children.remove(self)
+        self.rank = new_father.rank + 1 if new_father else 0
+        self.father = new_father
+        if position < len(new_father.children):
+            new_father.children.insert(position, self)
+        else:
+            new_father.children.append(self)
+    def create_children(self, paragraphs, level, rank) -> ([], []):
+        """
+        creates children containers or directly attached content
+        and returns the list of containers and contents of level+1
+        :return:
+        [Content or Container]
+        """
+        attached_paragraphs = []
+        container_paragraphs = []
+        container_title = None
+        children = []
+        in_children = False
+        level = INFINITE
+        child_id = 0
+        while paragraphs:
+            p = paragraphs.pop(0)
+            if not in_children and not p.is_structure:
+                attached_paragraphs.append(p)
+            else:
+                in_children = True
+                if p.is_structure and p.level <= level:  # if p is higher or equal in hierarchy
+                    if container_paragraphs or container_title:
+                        children.append(Container(container_paragraphs, container_title, level, rank, self, child_id))
+                        child_id += 1
+                    container_paragraphs = []
+                    container_title = p
+                    level = p.level
+                else:  # p is strictly lower in hierarchy
+                    container_paragraphs.append(p)
+        if container_paragraphs or container_title:
+            children.append(Container(container_paragraphs, container_title, level, rank, self, child_id))
+            child_id += 1
+        return attached_paragraphs, children
+    @property
+    def structure(self):
+        self_structure = {str(self.id_): {
+            'index': str(self.id_),
+            'canMove': True,
+            'isFolder': True,
+            'children': [p.id_ for p in self.paragraphs] + [child.id_ for child in self.children],
+            'canRename': True,
+            'data': {},
+            'level': self.level,
+            'title': self.title.text if self.title else 'root'
+        }}
+        paragraphs_structure = [p.structure for p in self.paragraphs]
+        structure = [self_structure] + paragraphs_structure
+        for child in self.children:
+            structure += child.structure
+        return structure
+    def get_lang(self):
+        """
+        returns the main language of the document
+        :return:
+        """
+    def get_structure(self, level=2):
+        """
+        returns the structure of the document
+        :return:
+        """
+    def create_embeddings(self):
+        """
+        :return:
+        """
+    def get_blocks(self):
+        block = Block(level=self.level, index=self.index)
+        if self.title:
+            block.title = self.title.text
+        for p in self.paragraphs:
+            if not p.blank:
+                if p.text.startswith('##### '):
+                    special_action = p.text.lstrip('##### ')
+                    block.specials.append(special_action)
+                else:
+                    block.content += p.text
+        blocks = [block] if block.content or block.specials else []
+        for child in self.children:
+            blocks += child.blocks
+        return blocks
+    def get_fulltask(self, doc_one_liner):
+        index = 0
+        siblings_ = []
+        if isinstance(self.father, Container):
+            siblings_ = self.father.children.copy()
+            index = siblings_.index(self)
+        siblings_before_context = [sibling.one_liner for idx, sibling in enumerate(siblings_) if idx < index]
+        siblings_after_context = [sibling.one_liner for idx, sibling in enumerate(siblings_) if index < idx]
+        fulltask = {'description': self.task,
+                    'about': self.one_liner,
+                    'doc_description': doc_one_liner,
+                    'above': self.father.one_liner if isinstance(self.father, Container) else '',
+                    'before': siblings_before_context,
+                    'after': siblings_after_context}
+        return fulltask
+    def sort_paragraphs(self) -> (str, str, str, str):
+        mapping = {'normal': '', 'comment': '', 'task': '', 'title': ''}
+        for p in self.paragraphs:
+            mapping[p.type] += ' ' + p.parsed_text
+        return mapping['normal'], mapping['comment'], mapping['task'], mapping['title']
+    def get_all_styles_used_in_doc_except_list(self):
+        """
+        loop in doc? rather thann in container? (since it applies only to container of level 0)
+        """
+        styles = []
+        for p in self.all_paragraphs:
+            styles.append(p.get_styles_in_paragraph_except_list())
+        res = []
+        #flatten the list
+        temp = [item for sublist in styles for item in sublist]
+        names = [style.name for style in temp]
+        for s in temp:
+            if s.name in names:
+                res.append(s)
+                names.remove(s.name)
+        return res
+    def get_list_styles(self):
+        styles = []
+        for p in self.all_paragraphs:
+            styles.append(p.get_list_styles())
+        res = list(set().union(*styles))
+        return res
+    def retrieve_number_of_misapplied_styles(self):
+        res = 0
+        for p in self.all_paragraphs:
+            if p.style_misapplied:
+                res += 1
+        return res

src/domain/container_requirements.py ADDED Viewed

	@@ -0,0 +1,140 @@

+from src.domain.paragraph import Paragraph
+from src.domain.block import Block
+INFINITE = 10000
+class Container_requirements:
+    def __init__(self, paragraphs: [Paragraph], title: Paragraph = None, level: int = 0, index: [int] = None,
+                 father=None, id_=0):
+        if index is None:
+            index = []
+        self.level = level
+        if not self.level:
+            pass
+        self.title = title
+        self.paragraphs = []
+        self.all_paragraphs = paragraphs
+        self.children = []
+        self.index = index
+        self.father = father  # if not father, then the container is at the top of the hierarchy
+        self.id_ = int(str(1) + str(father.id_) + str(id_))
+        if paragraphs:
+            self.paragraphs, self.children = self.create_children(paragraphs.copy(), level, index)
+        self.containers = [self]
+        for child in self.children:
+            self.containers += child.containers
+        self.blocks = self.get_blocks_requirements()
+    @property
+    def text(self):
+        text = ""
+        if self.title:
+            text = "Titre " + str(self.level) + " : " + self.title.text + '\n'
+        for p in self.paragraphs:
+                text += p.text + '\n'
+        for child in self.children:
+                text += child.text
+        return text
+    def move(self, position: int, new_father=None):
+        current_father = self.father  # should be added in the domain
+        current_father.children.remove(self)
+        self.rank = new_father.rank + 1 if new_father else 0
+        self.father = new_father
+        if position < len(new_father.children):
+            new_father.children.insert(position, self)
+        else:
+            new_father.children.append(self)
+    def create_children(self, paragraphs, level, rank) -> ([], []):
+        """
+        creates children containers or directly attached content
+        and returns the list of containers and contents of level+1
+        :return:
+        [Content or Container]
+        """
+        attached_paragraphs = []
+        container_paragraphs = []
+        container_title = None
+        children = []
+        in_children = False
+        level = INFINITE
+        child_id = 0
+        while paragraphs:
+            p = paragraphs.pop(0)
+            if not in_children and not p.is_structure:
+                attached_paragraphs.append(p)
+            else:
+                in_children = True
+                if p.is_structure and p.level <= level:  # if p is higher or equal in hierarchy
+                    if container_paragraphs or container_title:
+                        children.append(Container_requirements(container_paragraphs, container_title, level, rank, self, child_id))
+                        child_id += 1
+                    container_paragraphs = []
+                    container_title = p
+                    level = p.level
+                else:  # p is strictly lower in hierarchy
+                    container_paragraphs.append(p)
+        if container_paragraphs or container_title:
+            children.append(Container_requirements(container_paragraphs, container_title, level, rank, self, child_id))
+            child_id += 1
+        return attached_paragraphs, children
+    @property
+    def structure(self):
+        self_structure = {str(self.id_): {
+            'index': str(self.id_),
+            'canMove': True,
+            'isFolder': True,
+            'children': [p.id_ for p in self.paragraphs] + [child.id_ for child in self.children],
+            'canRename': True,
+            'data': {},
+            'level': self.level,
+            'title': self.title.text if self.title else 'root'
+        }}
+        paragraphs_structure = [p.structure for p in self.paragraphs]
+        structure = [self_structure] + paragraphs_structure
+        for child in self.children:
+            structure += child.structure
+        return structure
+    def get_blocks_requirements(self):
+        block = Block(level=self.level, index=self.index)
+        if self.title:
+            self.title.text = self.title.text.replace('\r', '').replace('\n', '')
+            block.title = self.title.text
+            block.content = self.title.text + '/'
+        temp_father = self.father
+        while temp_father and type(temp_father) == Container_requirements:
+            if temp_father.title:
+                temp_father.title.text = temp_father.title.text.replace('\r', '').replace('\n', '')
+                block.content = temp_father.title.text + '/' + block.content
+            temp_father = temp_father.father
+        block.content += " :\n\n"
+        i = 0
+        for p in self.paragraphs:
+            if not p.blank:
+                i = 1
+                if p.text.startswith('##### '):
+                    special_action = p.text.lstrip('##### ')
+                    block.specials.append(special_action)
+                else:
+                    block.content += p.text
+        if i == 0:
+            blocks = []
+        else:
+            blocks = [block]
+        for child in self.children:
+            blocks += child.blocks
+        return blocks

src/domain/doc.py ADDED Viewed

	@@ -0,0 +1,473 @@

+import docx
+from src.tools.doc_tools import *
+from docxcompose.composer import Composer
+from docx import Document as Document_compose
+from docx.enum.table import WD_TABLE_ALIGNMENT
+from src.domain.container import Container
+from src.domain.container_requirements import Container_requirements
+from src.domain.paragraph import Paragraph
+from src.domain.styles import Styles
+import shutil
+import os
+from docx.oxml.ns import qn
+from docx.oxml.shared import OxmlElement
+from docx.shared import Inches
+from src.tools.pretty_print import pretty_print_block_and_indexes, pretty_print_paragraphs
+from src.tools.index_creation import set_indexes
+from src.reader.reader_for_requirements import WordReader
+class Doc:
+    """
+    TODO: mettre _ devant les méthodes internes
+    """
+    def __init__(self, path='', id_=None):
+        self.xdoc = docx.Document(path)
+        self.title = get_title(path)
+        self.name = self.title.split('.')[0]
+        self.id_ = id(self)
+        self.path = path
+        self.paragraphs = [Paragraph(xp, self.id_, i, self) for (i, xp) in enumerate(self.xdoc.paragraphs)]
+        self.requirements_paragraphs = WordReader(self.path).paragraphs if not "data/templates" in self.path else []
+        self.handle_content_before_toc()
+        self.container = Container(self.paragraphs, father=self)
+        self.container_requirements = Container_requirements(self.requirements_paragraphs, father=self)
+        set_indexes(self.container, self.path)
+        set_indexes(self.container_requirements, self.path)
+        self.styles = Styles(self.xdoc.styles)
+        self.tasks = [c.get_fulltask(self.container.one_liner) for c in self.container.containers if c.task]
+        self.blocks = self.get_blocks()
+        self.blocks_requirements = self.get_blocks_requirements()
+    def copy(self, new_doc_path):
+        shutil.copyfile(self.path, new_doc_path)
+        new_doc = Doc(new_doc_path)
+        new_doc.save_as_docx(new_doc_path)
+        return new_doc
+    def clear(self):
+        os.remove(self.path)
+    def apply_template(self, template, options_list):
+        """
+        TODO: mettre le texte dans un fichier de config
+        """
+        log = []
+        j = 0
+        if ("Justifier le texte (Normal)" in options_list):
+            log.append("Le contenu du document a été justifié")
+            self.justify_content()
+            self.save_as_docx()
+        if("Recentrer les tableaux" in options_list):
+            j = self.center_tables()
+            log.append(f"{j} table{'s' if j>1 else ''} centrée{'s' if j>1 else ''}")
+            self.save_as_docx()
+        log.append(f"Le template {template.name} a été ajouté avant le document")
+        self.rearrange_tables()
+        self.save_as_docx()
+        log = self.styles.apply_from(template.styles, log)
+        self.save_as_docx()
+        self.delete_toc(template)
+        self.normal_style_for_empty_paragraphs()
+        self.save_as_docx()
+        self.append_doc_to_template_and_update_toc(template)
+        return log
+    def copy_one_style(self, src_style_name: str, dest_style_name: str, template):
+        style_dest = template.styles.get_style_from_name(dest_style_name)
+        src_style = self.styles.get_style_from_name(src_style_name)
+        if src_style:
+            log = self.styles.copy_one_style(src_style, style_dest)
+            return log
+        else:
+            return None
+    def get_different_styles_with_template(self, template):
+        styles_used_in_doc = self.get_all_styles_used_in_doc_except_list()
+        different_styles = get_difference_with_template(styles_used_in_doc, template)
+        return different_styles
+    def save_as_docx(self, path: str = ''):
+        path = path if path else self.path
+        self.path = path
+        self.xdoc.save(path)
+    def get_blocks(self):
+        """
+        TODO: do a function that determines if the Doc is not a template nor a generated doc
+        TODO: merge the two functions for getting blocks
+        TODO: why do we need two functions? in the end, we need only
+        """
+        if "temp/generated_files" in self.path or "data/templates" in self.path:
+            return
+        def from_list_to_str(index_list):
+            index_str = str(index_list[0])
+            for el in index_list[1:]:
+                index_str += '.' + str(el)
+            return index_str
+        blocks = self.container.blocks
+        for block in blocks:
+            block.doc = self.title
+            block.index = from_list_to_str(block.index)
+        return blocks
+    def get_blocks_requirements(self):
+        if "temp/generated_files" in self.path or "data/templates" in self.path:
+            return
+        def from_list_to_str(index_list):
+            index_str = str(index_list[0])
+            for el in index_list[1:]:
+                index_str += '.' + str(el)
+            return index_str
+        blocks = self.container_requirements.blocks
+        for block in blocks:
+            block.doc = self.title
+            block.index = from_list_to_str(block.index) if not isinstance(block.index, str) else block.index
+            # print(f"{block.index}: {block.content}")
+            # print("--------------------------------------------------")
+        return blocks
+    @property
+    def toc(self):
+        """
+        return the paragraphs that are in the table of contents
+        """
+        return [p for p in self.paragraphs if p.toc]
+    @property
+    def structure(self):
+        return self.container.structure
+    def replace_tasks(self, resolutions: [str]):
+        if len(resolutions) == len(self.tasks):  # exception to be handled
+            p_tasks = [p for p in self.paragraphs if p.type == 'task']
+            for p, r in zip(p_tasks, resolutions):
+                p.set_text(r)
+        else:
+            print(f"résolutions : {len(resolutions)} != {len(self.tasks)} tasks")
+        return self
+    def get_paragraphs(self):
+        return self.container.all_paragraphs
+    def get_text_from_paragraphs(self):
+        return [p.text for p in self.paragraphs]
+    def check_document(self):
+        """
+        debugging function to analyse the doc
+        """
+        picCount = 0
+        tabCount = 0
+        for paragraph in self.xdoc.paragraphs:
+            if picCount < len(self.xdoc.inline_shapes):
+                print('\033[1mPicture \033[0m')
+                picCount += 1
+            elif paragraph.text:
+                print(paragraph.text)
+            elif tabCount < len(self.xdoc.tables):
+                table = self.xdoc.tables[tabCount]
+                data = []
+                keys = None
+                for i, row in enumerate(table.rows):
+                    text = (cell.text for cell in row.cells)
+                    if i == 0:
+                        keys = tuple(text)
+                        continue
+                    row_data = dict(zip(keys, text))
+                    data.append(row_data)
+                print('\033[1mTable:\033[0m', data)
+                tabCount += 1
+            else:
+                print('\033[1mEmpty paragraph\033[0m')
+    def center_tables(self):
+        j = 0
+        for table in self.xdoc.tables:
+            j += 1
+            table.alignment = WD_TABLE_ALIGNMENT.CENTER
+        return j
+    def rearrange_tables(self):
+        """
+        Hotfix for autofit.
+        directly from XML
+        """
+        for t_idx, _ in enumerate(self.xdoc.tables):
+            self.xdoc.tables[t_idx].autofit = True
+            self.xdoc.tables[t_idx].allow_autofit = True
+            self.xdoc.tables[t_idx]._tblPr.xpath("./w:tblW")[0].attrib["{http://schemas.openxmlformats.org/wordprocessingml/2006/main}type"] = "auto"
+            for row_idx, _ in enumerate(self.xdoc.tables[t_idx].rows):
+                for cell_idx, _ in enumerate(self.xdoc.tables[t_idx].rows[row_idx].cells):
+                    self.xdoc.tables[t_idx].rows[row_idx].cells[cell_idx]._tc.tcPr.tcW.type = 'auto'
+                    self.xdoc.tables[t_idx].rows[row_idx].cells[cell_idx]._tc.tcPr.tcW.w = 0
+    def center_images(self):
+        """
+        works only for images in the run
+        """
+        for paragraph in self.paragraphs:
+            paragraph.center_paragraph()
+    def justify_content(self):
+        """
+        applied only to normal style
+        """
+        for paragraph in self.paragraphs:
+            paragraph.justify_paragraph()
+    def number_images_in_doc(self):
+        """
+        for debug = not used
+        """
+        picCount = 0
+        for _ in self.xdoc.paragraphs:
+            if picCount < len(self.xdoc.inline_shapes):
+                print('\033[1mPicture \033[0m')
+                picCount += 1
+        return picCount
+    def get_all_styles_used_in_doc_except_list(self):
+        return self.container.get_all_styles_used_in_doc_except_list()
+    def get_list_styles(self):
+        return self.container.get_list_styles()
+    def retrieve_number_of_misapplied_styles(self):
+        return self.container.retrieve_number_of_misapplied_styles()
+    def normal_style_for_empty_paragraphs(self):
+        for p in self.paragraphs:
+            if p.blank and not p.toc:
+                p.set_style(self.styles.get_style_from_name("Normal"))
+        self.save_as_docx()
+    def append_doc_to_template_and_update_toc(self,template):
+        """
+        TODO: rename Document_compose into XDocument
+        Document_compose = plain old Document from docx
+        Composer = from docxcompose => allows to modify several documents
+        """
+        master = Document_compose(template.path)
+        composer = Composer(master)
+        doc = Document_compose(self.path)
+        composer.append(doc)
+        composer.save(self.path)
+        new_doc = Doc(self.path)
+        update_table_of_contents(new_doc.xdoc)
+        new_doc.save_as_docx()
+    def delete_content_before_toc(self):
+        """
+        TODO: loop with paragraph (ours)
+        """
+        if self.contains_toc():
+            for line in self.xdoc.paragraphs:
+                if "toc" in line.style.name:
+                    break
+                if len(line.text) == 0:
+                    self.delete_paragraph(line)
+                    self.paragraphs.pop(0)
+                    continue
+                if 'toc' not in line.style.name:
+                    self.delete_paragraph(line)
+                    self.paragraphs.pop(0)
+            self.save_as_docx()
+    def delete_paragraph(self, paragraph):
+        """
+        TODO: to be put in paragraph
+        """
+        p = paragraph._element
+        p.getparent().remove(p)
+        paragraph._p = paragraph._element = None
+    def delete_toc(self,template):
+        """
+        TODO: loop with paragraph (ours)
+        """
+        index_to_insert = None
+        for index, p in enumerate(template.paragraphs):
+            index_to_insert = index
+            if ("table des matières" or "table of contents") in p.text.lower():
+                index_to_insert += 1
+                break
+        xparagraphs_toc = [p.xparagraph for p in self.toc]
+        for p in xparagraphs_toc:
+            self.delete_paragraph(p)
+            self.paragraphs.pop(0)
+        self.save_as_docx()
+    def insert_table_of_content(self,index):
+        """
+        To create a TOC (not used here)
+        """
+        paragraph = self.xdoc.paragraphs[index].insert_paragraph_before("", "Normal")
+        paragraph.paragraph_format.space_before = Inches(0)
+        paragraph.paragraph_format.space_after = Inches(0)
+        run = paragraph.add_run()
+        fldChar = OxmlElement('w:fldChar')  # creates a new element
+        fldChar.set(qn('w:fldCharType'), 'begin')  # sets attribute on element
+        instrText = OxmlElement('w:instrText')
+        instrText.set(qn('xml:space'), 'preserve')  # sets attribute on element
+        instrText.text = 'TOC \\o "1-5" \\h \\z \\u'   # change 1-3 depending on heading levels you need
+        fldChar2 = OxmlElement('w:fldChar')
+        fldChar2.set(qn('w:fldCharType'), 'separate')
+        fldChar3 = OxmlElement('w:t')
+        fldChar3.text = "Right-click to update field."
+        fldChar3 = OxmlElement('w:updateFields')
+        fldChar3.set(qn('w:val'), 'true')
+        fldChar2.append(fldChar3)
+        fldChar4 = OxmlElement('w:fldChar')
+        fldChar4.set(qn('w:fldCharType'), 'end')
+        r_element = run._r
+        r_element.append(fldChar)
+        r_element.append(instrText)
+        r_element.append(fldChar2)
+        r_element.append(fldChar4)
+        p_element = paragraph._p
+        print(p_element.xml)
+    def contains_toc(self):
+        body_elements = self.xdoc._body._body
+        #extract those wrapped in <w:r> tag
+        rs = body_elements.xpath('.//w:r')
+        #check if style is hyperlink (toc)
+        table_of_content = []
+        for r in rs:
+            if r.style:
+                if "hyperlink" in r.style.lower() or "lienhypertexte" in r.style.lower():
+                    table_of_content.append(r.text)
+        if len(table_of_content) > 0:
+            return True
+        else:
+            return False
+    def handle_content_before_toc(self):
+        """
+        TODO: use a function to determine the type of the doc
+        """
+        if not "data/templates" in self.path and not "temp/generated_files" in self.path: #PREMIER PROBLEME
+            self.delete_content_before_toc()
+    def delete_style(self, style_name):
+        self.styles.delete_style(style_name)
+        self.save_as_docx()
+    def change_bullet_style(self, style_name, template_style_name, template) -> {}:
+        """
+        TODO: recode to respect the OOP
+        suppression of a paragraph with a bullet and rewriting of the bullet with style_name in the target styple (template_style_name)
+        real_style_name = core style name with no indentation
+        level = indentation level
+        """
+        i = 0
+        real_style_name = style_name.split(' : ')[0]
+        level = int(style_name.split(' = ')[1])
+        while i < len(self.xdoc.paragraphs):
+            para = self.xdoc.paragraphs[i]
+            if real_style_name == para.style.name and self.paragraphs[i].is_list and self.paragraphs[i].list_indentation == level:
+                #print xml of paragraph and retrieve the level
+                self.delete_paragraph(self.xdoc.paragraphs[i])
+                self.paragraphs.pop(i)
+                if i == len(self.xdoc.paragraphs):
+                    paragraph_inserted = self.xdoc.add_paragraph(para.text, style=template.styles.get_style_from_name(template_style_name))
+                    self.paragraphs.insert(i, Paragraph(paragraph_inserted, self.id_, i, self))
+                else:
+                    paragraph_inserted = self.xdoc.paragraphs[i].insert_paragraph_before(para.text, style=template.styles.get_style_from_name(template_style_name))
+                    self.paragraphs.insert(i, Paragraph(paragraph_inserted, self.id_, i, self))
+            i += 1
+        log_dict = self.change_bullet_style_in_tables(style_name, template_style_name, template)
+        self.save_as_docx()
+        return log_dict
+    def change_bullet_style_in_tables(self, style_name, template_style_name, template) -> {}:
+        """
+        same as abobe
+        TODO: ... same as above
+        """
+        i = 0
+        real_style_name = style_name.split(' : ')[0]
+        level = int(style_name.split(' = ')[1])
+        for table in self.xdoc.tables:
+            for row in table.rows:
+                for cell in row.cells:
+                    i = 0
+                    for para in cell.paragraphs:
+                        real_para = Paragraph(para, self.id_, i, self)
+                        if real_style_name == para.style.name and real_para.is_list and real_para.list_indentation == level:
+                            self.delete_paragraph(para)
+                            if i == len(cell.paragraphs):
+                                cell.add_paragraph(real_para.text, style=template.styles.get_style_from_name(template_style_name))
+                            else:
+                                cell.paragraphs[i].insert_paragraph_before(real_para.text, style=template.styles.get_style_from_name(template_style_name))
+                        i += 1
+        log = f"Le style {style_name} a été changé en {template_style_name}"
+        log_dict = {'list_mapping': log}
+        return log_dict
+    def table_insertion(self, paragraph: Paragraph, content: dict):
+        #the content is the content of the table with the following format:
+        #content = {
+        #    "header": ["header1", "header2", "header3"],
+        #    "rows": [
+        #        ["row1", "row1", "row1"],
+        #        ["row2", "row2", "row2"],
+        #        ["row3", "row3", "row3"],
+        #    ]
+        #}
+        self.xdoc.add_table(rows = len(content["rows"]) + 1, cols = len(content["headers"]))
+        #Normal table default style
+        table = self.xdoc.tables[-1]
+        #add the header
+        for i, header in enumerate(content["headers"]):
+            table.cell(0, i).text = header
+        #add the rows
+        for i, row in enumerate(content["rows"]):
+            for j, cell in enumerate(row):
+                table.cell(i+1, j).text = cell
+        #insert the table after the paragraph
+        self.move_table_after(table, paragraph.xparagraph)
+        self.rearrange_tables()
+        #save the doc
+        self.save_as_docx()
+        return table
+    def delete_table(self, table):
+        table._element.getparent().remove(table._element)
+        table._element = table._row = None
+        self.save_as_docx()
+    def move_table_after(self, table, paragraph):
+        tbl, p = table._tbl, paragraph._p
+        p.addnext(tbl)
+    def remove_all_but_last_section(self):
+        """
+        not used
+        """
+        sectPrs = self.xdoc._element.xpath(".//w:pPr/w:sectPr")
+        for sectPr in sectPrs:
+            print(sectPr)
+            sectPr.getparent().remove(sectPr)

src/domain/paragraph.py ADDED Viewed

	@@ -0,0 +1,140 @@

+import string
+from docx.enum.text import WD_ALIGN_PARAGRAPH
+from src.tools.paragraph_tools import find_list_indentation_level
+INFINITE = 10000
+class Paragraph:
+    def __init__(self, xparagraph, doc_id: int, id_: int, doc):
+        self.doc = doc
+        self.xparagraph = xparagraph
+        self.is_template_para = False if not "data/templates" in self.doc.path else True
+        self.id_ = int(str(2) + str(doc_id) + str(id_))
+        self.style_name = self.xparagraph.style.name
+        self.is_list, self.list_indentation = find_list_indentation_level(self.xparagraph, self.doc) if not self.is_template_para else (False, 0)
+        self.level = self.get_level_from_name(self.style_name)
+        self.is_structure = self.level < INFINITE
+        self.text = self.xparagraph.text
+        self.type, self.parsed_text = self.parse_text()
+    @property
+    def style_misapplied(self):
+        """
+        function bugged, not used
+        """
+        #check if the actual paragraph style properties are the same as the style itself
+        #if not, the style is misapplied
+        first_run_style = [run.style.font for run in self.xparagraph.runs]
+        first_run_style = first_run_style[0] if first_run_style else None
+        if not first_run_style:
+            return False
+        doc_style = self.doc.styles.get_style_from_name(self.style_name)
+        if first_run_style.size != doc_style.font.size:
+            return True
+        if first_run_style.name != doc_style.font.name:
+            return True
+        if first_run_style.bold != doc_style.font.bold:
+            return True
+        if first_run_style.italic != doc_style.font.italic:
+            return True
+        if first_run_style.underline != doc_style.font.underline:
+            return True
+        if first_run_style.all_caps != doc_style.font.all_caps:
+            return True
+        if first_run_style.color.rgb != doc_style.font.color.rgb:
+            return True
+        return False
+    @property
+    def structure(self):
+        structure = {str(self.id_): {
+            'index': str(self.id_),
+            'canMove': True,
+            'isFolder': False,
+            'children': [],
+            'title': self.text,
+            'canRename': True,
+            'data': {},
+            'level': self.level,
+        }}
+        return structure
+    @property
+    def blank(self):
+        """
+        checks if the paragraph is blank: i.e. it brings some signal (it may otherwise be ignored)
+        """
+        text = self.text.replace('\n', '')
+        return set(text).isdisjoint(string.ascii_letters)
+    @property
+    def toc(self):
+        """
+        Check if the paragraph is part of the table of contents
+        """
+        return "toc" in self.style_name
+    @staticmethod
+    def get_level_from_name(style_name: str) -> int:
+        level = INFINITE
+        if 'Titre' in style_name or 'Heading' in style_name:
+            suffix = style_name[-1]
+            try:
+                level = int(suffix)
+            except:
+                pass
+        return level
+    def parse_text(self) -> (str, str):
+        if self.is_structure:
+            return 'structure', self.text
+        startswith = {"?? ": "task", "++ ": "comment"}
+        for start in startswith.keys():
+            split = self.text.rsplit(start)
+            if 1 < len(split):
+                return startswith[start], split[1]
+        return "normal", self.text
+    def set_text(self, text: str):
+        self.text = text
+        self.xparagraph.text = text
+        return self
+    def center_paragraph(self):
+        if self.contains_image():
+            self.xparagraph.alignment = WD_ALIGN_PARAGRAPH.CENTER
+    def justify_paragraph(self):
+        if(self.xparagraph.style.name == "Normal"):
+            self.xparagraph.alignment = WD_ALIGN_PARAGRAPH.JUSTIFY
+    def contains_image(self) -> bool:
+        return any("pic:pic" in run.element.xml for run in self.xparagraph.runs)
+    def get_styles_in_paragraph_except_list(self):
+        styles = [self.xparagraph.style] if not self.is_list else []
+        for run in self.xparagraph.runs:
+            if run.style.name != "Default Paragraph Font" and run.style.name != self.xparagraph.style.name:
+                styles.append(run.style)
+        return styles
+    def get_list_styles(self):
+        styles = []
+        if self.is_list:
+            styles.append(self.xparagraph.style.name + " : indentation = " + str(self.list_indentation))
+        return styles
+    def set_style(self, style):
+        self.xparagraph.style = style
+        return self

src/domain/requirements_paragraphs.py ADDED Viewed

	@@ -0,0 +1,41 @@

+import string
+INFINITE = 10000
+class Requirement_Paragraph:
+    def __init__(self, text : str, font_style : str, id_ : int, page_id : int):
+        self.font_style = font_style
+        self.id_ = int(str(2)+str(page_id)+str(id_))
+        self.page_id = page_id
+        self.level = self.get_level_from_name(font_style)
+        self.is_structure = self.level < INFINITE
+        self.text = text
+    @property
+    def blank(self):
+        """
+        checks if the paragraph is blank: i.e. it brings some signal (it may otherwise be ignored)
+        """
+        text = self.text.replace('\n', '')
+        return set(text).isdisjoint(string.ascii_letters)
+    def rearrange_paragraph(self):
+        """
+        rearrange the paragraph to have a better structure
+        """
+        if self.font_style == "code":
+            self.text = "\n\nCode :```\n" + self.text + "\n```\n\n"
+        elif self.font_style == "table":
+            self.text = "\n\nTable :\n" + self.text + "\n\n"
+        return self
+    @staticmethod
+    def get_level_from_name(style_name: str) -> int:
+        level = INFINITE
+        if 'Titre' in style_name or 'Heading' in style_name:
+            suffix = style_name[-1]
+            try:
+                level = int(suffix)
+            except:
+                pass
+        return level

src/domain/styles.py ADDED Viewed

	@@ -0,0 +1,164 @@

+from docx.enum.style import WD_STYLE_TYPE
+from docx.shared import RGBColor
+import re
+class Styles:
+    def __init__(self, xstyles, doc_id=0, id_=0):
+        self.id_ = int(str(doc_id)+str(id_))
+        self.xstyles = xstyles
+        self.names = [s.name for s in xstyles]
+    @staticmethod
+    def copy_style(src=None, dest=None) -> {}:
+        modified_style = set()
+        if src.type == WD_STYLE_TYPE.PARAGRAPH:
+            same_color = True
+            if src.font.color.rgb:
+                dest_rgb = RGBColor(src.font.color.rgb[0], src.font.color.rgb[1], src.font.color.rgb[2])
+                if dest.font.color.rgb:
+                    for i in range(3):
+                        same_color *= dest.font.color.rgb[i] == dest_rgb[i]
+                else:
+                    same_color = False
+                dest.font.color.rgb = dest_rgb
+            else:
+                if dest.font.color.rgb:
+                    same_color = False
+            if not same_color:
+                modified_style.add(('color', True))
+            if dest.font.size != src.font.size:
+                dest.font.size = src.font.size
+                modified_style.add(('font size', (src.font.size, dest.font.size)))
+            if dest.font.name != src.font.name:
+                dest.font.name = src.font.name
+                modified_style.add(('font', (src.font.name, dest.font.name)))
+            if dest.font.all_caps != src.font.all_caps:
+                dest.font.all_caps = src.font.all_caps
+                modified_style.add(('all_caps', (src.font.all_caps, dest.font.all_caps)))
+            if dest.font.bold != src.font.bold:
+                dest.font.bold = src.font.bold
+                modified_style.add(('bold', (src.font.bold, dest.font.bold)))
+            dest.font.complex_script = src.font.complex_script
+            dest.font.cs_bold = src.font.cs_bold
+            dest.font.cs_italic = src.font.cs_italic
+            dest.font.double_strike = src.font.double_strike
+            dest.font.emboss = src.font.emboss
+            dest.font.hidden = src.font.hidden
+            dest.font.highlight_color = src.font.highlight_color
+            dest.font.imprint = src.font.imprint
+            dest.font.italic = src.font.italic
+            dest.font.math = src.font.math
+            dest.font.no_proof = src.font.no_proof
+            dest.font.outline = src.font.outline
+            dest.font.rtl = src.font.rtl
+            dest.font.shadow = src.font.shadow
+            dest.font.small_caps = src.font.small_caps
+            dest.font.snap_to_grid = src.font.snap_to_grid
+            dest.font.spec_vanish = src.font.spec_vanish
+            dest.font.strike = src.font.strike
+            dest.font.subscript = src.font.subscript
+            dest.font.superscript = src.font.superscript
+            dest.font.underline = src.font.underline
+            dest.font.web_hidden = src.font.web_hidden
+            dest.base_style = src.base_style
+            dest.hidden = src.hidden
+            dest.locked = src.locked
+            dest.name = src.name
+            dest.priority = src.priority
+            dest.quick_style = src.quick_style
+            dest.unhide_when_used = src.unhide_when_used
+        if src.type == WD_STYLE_TYPE.LIST:
+            dest.hidden = src.hidden
+            dest.locked = src.locked
+            dest.name = src.name
+            dest.priority = src.priority
+            dest.quick_style = src.quick_style
+            dest.style_id = src.style_id
+            dest.unhide_when_used = src.unhide_when_used
+        if src.type == WD_STYLE_TYPE.TABLE:
+            dest.hidden = src.hidden
+            dest.locked = src.locked
+            dest.name = src.name
+            dest.priority = src.priority
+            dest.quick_style = src.quick_style
+            dest.unhide_when_used = src.unhide_when_used
+        return modified_style
+    def apply_from(self, template_styles, options_list):
+        if(options_list == []):
+            log = {'suppressed_styles': [], 'modified_styles': [], 'added_styles': []}
+        else:
+            log = {'options_applied': options_list,'suppressed_styles': [], 'modified_styles': [], 'added_styles': []}
+        for s in self.xstyles:
+            if s.name in template_styles.names:
+                src_style = template_styles.check_particular_styles(s.name)
+                log_s = self.copy_style(src=src_style, dest=s)
+                if log_s:
+                    log['modified_styles'].append((s.name, log_s))
+        for s in template_styles.xstyles:
+            if not self.contains_style(s):
+                log['added_styles'].append(s.name)
+                self.xstyles.add_style(s.name, s.type)
+                self.copy_style(src=s, dest=self.xstyles[s.name])
+        return log
+    def copy_one_style(self, src_style, dest_style) -> {}:
+        log_msg = \
+            f"le style {src_style.name} a été mappé sur le style {dest_style.name} du template"
+        log_dict = {'style_mapping': log_msg}
+        self.copy_style(dest_style, src_style)
+        return log_dict
+    def get_style_from_name(self, name: str):
+        try:
+            s = self.xstyles[name]
+        except:
+            return None
+        return s
+    def contains_style(self, style):
+        resp = True
+        try:
+            s = self.xstyles[style.name]
+        except:
+            try:
+                s = self.xstyles[style.name[1:]]
+            except:
+                resp = False
+        return resp
+    def check_particular_styles(self,style_to_transform : str):
+        temp = style_to_transform
+        if re.search("^Heading [0-9]$", style_to_transform) or re.search("^Titre [0-9]$", style_to_transform):
+            style_to_transform = ".Titre" + style_to_transform[-1]
+            res = self.get_style_from_name(style_to_transform)
+            if res is None:
+                style_to_transform = ".Titre " + style_to_transform[-1]
+                res = self.get_style_from_name(style_to_transform)
+            else:
+                return res
+            if res:
+                return res
+            else:
+                return self.get_style_from_name(temp)
+        else:
+            return self.get_style_from_name(temp)
+    def delete_style(self,style_name):
+        self.xstyles[style_name].delete()
+        self.names.remove(style_name)

src/domain/wikidoc.py ADDED Viewed

	@@ -0,0 +1,128 @@

+"""
+the class works but lots of code could be reused
+"""
+class Doc:
+    def __init__(self, fulltext: str = '', title: str = '', params: dict = {}):
+        self.params = params
+        self.lines = [Line(text.strip(), self.params) for text in fulltext.split("\n") if text.strip()]
+        self.title, self.lines = self._get_title(title)
+        self.container = Container(lines=self.lines, title=self.title, father=self, params=params)
+        self.fulltext = fulltext
+    def _get_title(self, title):
+        lines = self.lines
+        if self.params['type'] == 'input_text':
+            if self.lines and self.lines[0] and self.lines[0].type == 'title':
+                title = self.lines[0].text
+                lines = lines[1:]
+            else:
+                title = 'the title is missing'
+        return title, lines
+class WikiPage(Doc):
+    def __init__(self, fulltext='', title=''):
+        self.params = {
+            'type': 'wiki',
+            'startswith_':
+                {'== ': '1', '=== ': '2', '==== ': '3', '===== ': '4', '====== ': '5', '======= ': '6'},
+            'endswith_':
+                [' ==', ' ===', ' ====', ' =====', ' ======', ' ======'],
+            'discarded': ["See also", "Notes", "References", "Sources", "External links", "Bibliography",
+                          "Cinematic adaptations", "Further reading", "Maps"]
+        }
+        super().__init__(fulltext=fulltext, title=title, params=self.params)
+    def get_paragraphs(self, chunk=500):
+        return self.container.get_paragraphs(chunk)
+class Container:
+    def __init__(self, lines=[], level=0, title='', father=None, params={}):
+        self.children = []
+        self.level = level
+        self.title = title
+        self.father = father
+        self.lines = []
+        self._expand(lines)
+        if params and 'discarded' in params.keys():
+            self.children = [child for child in self.children if child.title not in params['discarded']]
+        self.containers = [self]
+        for child in self.children:
+            self.containers += child.containers
+        self.text = ''
+        for child in self.children:
+            self.text += ' ' + child.text
+    def _expand(self, lines):
+        new_child = False
+        new_child_lines = []
+        new_child_title = []
+        for line in lines:
+            if not new_child:
+                if line.is_structure:
+                    new_child = True
+                    new_child_lines = []
+                    new_child_title = line.text
+                    line.level = self.level + 1
+                else:
+                    self.lines.append(line)
+            else:
+                if self.level + 1 < line.level or not line.is_structure:
+                    new_child_lines.append(line)
+                elif self.level + 1 == line.level:
+                    self.children.append(Container(lines=new_child_lines,
+                                                   level=self.level + 1,
+                                                   title=new_child_title,
+                                                   father=self))
+                    new_child_lines = []
+                    new_child_title = line.text
+        if new_child:
+            self.children.append(Container(lines=new_child_lines,
+                                           level=self.level + 1,
+                                           title=new_child_title,
+                                           father=self))
+    def get_paragraphs(self, chunk=500):
+        if len(self.text) < chunk:
+            paragraphs = [self.text]
+        else:
+            paragraphs = [self.root_text]
+            for child in self.children:
+                paragraphs += child.get_paragraphs(chunk)
+        return paragraphs
+class Line:
+    def __init__(self, text, params):
+        self.text = text
+        self.params = params
+        self.type, self.text = self._parse_text()
+        self.level = int(self.type) if self.type.isdigit() else -1
+        self.is_structure = 0 < self.level
+    def _parse_text(self):
+        def strip_text(text_, start, end):
+            text_ = text_.split(start)[1]
+            if end != "":
+                text_ = text_.split(end)[0]
+            # text += ". \n"
+            return text_.strip()
+        startswith_ = self.params['startswith_']
+        endswith_ = self.params['endswith_'] if 'endswith_' in self.params.keys() else [""] * len(startswith_)
+        types = [(strip_text(self.text, starter, endswith_[i]), startswith_[starter])
+                 for i, starter in enumerate(startswith_.keys())
+                 if self.text.startswith(starter)]
+        (text, type_) = types[0] if types else (self.text, 'normal')
+        return type_, text.strip()

src/llm/llm_tools.py ADDED Viewed

	@@ -0,0 +1,337 @@

+"""
+TODO: add a boolean to switch llms
+"""
+import json
+import string
+import openai
+import wikipedia
+from langchain.text_splitter import CharacterTextSplitter
+from langchain.prompts import PromptTemplate
+from langchain.chains import LLMChain
+from src.llm.llms import openai_llm
+from src.tools.wiki import Wiki
+# async def get_wikilist_open_source(task: {}) -> str:
+#     """
+#     get the titles of wiki pages interesting for solving the given task
+#     """
+#     template = ("<s>[INST] Your task consists in finding the list of wikipedia page titles which provide useful content "
+#                 "    for a paragraph whose description is delimited by triple backticks.\n"
+#                 "    Make sure that you provide no more than 10 elements and that the list is actually finished."
+#                 "    Format your response as a valid JSON list of strings separated by commas.[/INST]</s>"
+#                 "    Description: ```{description}```")
+#     prompt = PromptTemplate(template=template, input_variables=['description'])
+#     llm_chain = LLMChain(llm=opensource_llm, prompt=prompt)
+#     response = llm_chain.run({'description': task['description']})
+#     llm_list = response.choices[0].message.content
+#     try:
+#         wikilist = json.loads(llm_list)
+#     except:
+#         print("json loads failed with" + llm_list)
+#         wikilist = list(llm_list.split(','))
+#     expanded_wikilist = []
+#     expand_factor = 2
+#     for wikipage in wikilist:
+#         expanded_wikilist += wikipedia.search(wikipage, expand_factor)
+#     wikilist = list(set(expanded_wikilist))
+#     return wikilist
+async def get_wikilist(task: {}) -> str:
+    """
+    get the titles of wiki pages interesting for solving the given task
+    """
+    llm = openai_llm
+    template = (f"\n"
+                f"    Your task consists in finding the list of wikipedia page titles which provide useful content "
+                f"    for a paragraph whose description is delimited by triple backticks: ```{task['description']}```\n"
+                f"    "
+                f"    Make sure that you provide no more than 10 elements and that the list is actually finished."
+                f"    Format your response as a valid JSON list of strings separated by commas.\n"
+                f"  \n"
+                f"    ")
+    #wikilist = LLMChain(llm=openai_llm, prompt=prompt).run()
+    llm_list = llm.invoke(template)
+    try:
+        wikilist = json.loads(llm_list)
+    except:
+        print("json loads failed with" + llm_list)
+        wikilist = list(llm_list.split(','))
+    expanded_wikilist = []
+    expand_factor = 2
+    for wikipage in wikilist:
+        expanded_wikilist += wikipedia.search(wikipage, expand_factor)
+    wikilist = list(set(expanded_wikilist))
+    return wikilist
+def extract_list(llm_list: str):
+    def filter_(el: str):
+        resp = 2 < len(el)
+        usable_length = len([c for c in el if c in string.ascii_letters])
+        resp = resp and len(el)*3/4 < usable_length
+        return resp
+    try:
+        wikilist = llm_list[1:-1].split('"')
+        wikilist = [el for el in wikilist if filter_(el)]
+        print(wikilist)
+    except:
+        wikilist = []
+        print('issues with the wikilist')
+    return wikilist
+# def get_public_paragraph_open_source(task: {}) -> str:
+#     """returns the task directly performed by chat GPT"""
+#     template = ("<s>[INST] Your task consists in generating a paragraph whose description is delimited by triple "
+#                 "backticks.\n"
+#                 "    The paragraph belongs at the top level of the hierarchy to a document"
+#                 "    whose doc_description is delimited by triple backticks.\n"
+#                 "    Make sure that the paragraph relates the top level of the document\n"
+#                 "    The paragraph belongs to a higher paragraph in the hierarchy whose description (above) is delimited by "
+#                 "    triple backticks."
+#                 "    Make sure that the paragraph relates with the paragraph in the hierarchy of the document\n"
+#                 "    The paragraphs comes after previous paragraphs whose description (before) is delimited by triple "
+#                 "    backticks.\n"
+#                 "    Make sure that the paragraph relates with previous paragraph without any repetition\n"
+#                 "    The paragraphs comes before next paragraphs whose description (after) is delimited by triple backticks.\n"
+#                 "    Make sure that the paragraph prepares the transition to the next paragraph without any "
+#                 "    repetition. [/INST]</s>"
+#                 "    Description: ```{description}```"
+#                 "    Doc description: ```{doc_description}```"
+#                 "    Above: ```{above}```"
+#                 "    Before: ```{before}```"
+#                 "    After: ```{after}```"
+#                 )
+#     prompt = PromptTemplate(template=template, input_variables=['description', 'doc_description', 'above', 'before', 'after'])
+#     llm_chain = LLMChain(llm=opensource_llm, prompt=prompt)
+#     response = llm_chain.run({'description': task['description'], 'doc_description': task['doc_description'],
+#                               'above': task['above'], 'before': task['before'], 'after': task['after']})
+#     p = response.choices[0].message.content
+#     return p
+def get_public_paragraph(task: {}) -> str:
+    """returns the task directly performed by chat GPT"""
+    print(task)
+    llm = openai_llm
+    template = (f"\n"
+                f"    Your task consists in generating a paragraph\\n"
+                f"    whose description is delimited by triple backticks: ```{task['description']}```\n"
+                f"\n"
+                f"    The paragraph belongs at the top level of the hierarchy to a document \\n"
+                f"    whose description is delimited by triple backticks: ``` {task['doc_description']}```\n"
+                f"    Make sure that the paragraph relates the top level of the document\n"
+                f"    \n"
+                f"    The paragraph belongs to a higher paragraph in the hierarchy \\n"
+                f"    whose description is delimited by triple backticks: ``` {task['above']}```\n"
+                f"    Make sure that the paragraph relates with the paragraph in the hierarchy of the document\n"
+                f"        \n"
+                f"    The paragraphs comes after previous paragraphs \\n"
+                f"    whose description is delimited by triple backticks: ``` {task['before']}```\n"
+                f"    Make sure that the paragraph relates with previous paragraph without any repetition\n"
+                f"    \n"
+                f"    The paragraphs comes before next paragraphs \\n"
+                f"    whose description is delimited by triple backticks: ``` {task['after']}```\n"
+                f"    Make sure that the paragraph prepares the transition to the next paragraph without any repetition\n"
+                f"    \n"
+                f"  \n"
+                f"\n"
+                f"    ")
+    p = llm.invoke(template)
+    return p
+def create_index(wikilist: [str]):
+    """
+    useful for creating the index of wikipages
+    """
+    fetch = Wiki().fetch
+    pages = [(title, fetch(title)) for title in wikilist if type(fetch(title)) != str]
+    texts = []
+    chunk = 800
+    for title, page in pages:
+        texts.append(WikiPage(title=title, fulltext=page.page_content))
+    doc_splitter = CharacterTextSplitter(
+        separator=".",
+        chunk_size=chunk,
+        chunk_overlap=100,
+        length_function=len,
+    )
+    paragraphs = texts[0].get_paragraphs(chunk=800)
+    split_texts = []
+    for p in paragraphs:
+        split_texts += doc_splitter.split_text(p)
+    for split_text in split_texts:
+        assert type(split_text) == str
+        assert 0 < len(split_text) < 2 * 500
+    wiki_index = Chroma.from_texts(split_texts)
+    return wiki_index
+def get_wiki_paragraph(wiki_index, task: {}) -> str:
+    """useful to get a summary in one line from wiki index"""
+    task_description = get_public_paragraph(task)
+    wiki_paragraphs = semantic_search(wiki_index, task_description)
+    text_content = ""
+    for p in wiki_paragraphs:
+        text_content += p.page_content + "/n/n"
+    template = (f"\n"
+                f"    Your task consists in generating a paragraph\\n"
+                f"    whose description is delimited by triple backticks: ```{task['description']}```\n"
+                f"\n"
+                f"    The text generation is based in the documents provided in these sections \n"
+                f"    delimited by by triple backticks: ``` {text_content}``` \n"
+                f"    The paragraph belongs at the top level of the hierarchy to a document \\n"
+                f"    whose description is delimited by triple backticks: ``` {task['doc_description']}```\n"
+                f"    Make sure that the paragraph relates the top level of the document\n"
+                f"    \n"
+                f"    The paragraph belongs to a higher paragraph in the hierarchy \\n"
+                f"    whose description is delimited by triple backticks: ``` {task['above']}```\n"
+                f"    Make sure that the paragraph relates with the paragraph in the hierarchy of the document\n"
+                f"        \n"
+                f"    The paragraphs comes after previous paragraphs \\n"
+                f"    whose description is delimited by triple backticks: ``` {task['before']}```\n"
+                f"    Make sure that the paragraph relates with previous paragraph without any repetition\n"
+                f"    \n"
+                f"    The paragraphs comes before next paragraphs \\n"
+                f"    whose description is delimited by triple backticks: ``` {task['after']}```\n"
+                f"    Make sure that the paragraph prepares the transition to the next paragraph without any repetition\n"
+                f"    \n"
+                f"  \n"
+                f"\n"
+                f"    ")
+    llm = openai_llm
+    p = llm(template)
+    return p
+# def get_private_paragraph_open_source(texts, task: {}) -> str:
+#     """useful to get a summary in one line from wiki index"""
+#     text_content = ""
+#     for t in texts:
+#         text_content += t + "/n/n"
+#     template = ("\n"
+#                 "    Your task consists in generating a paragraph"
+#                 "    whose description is delimited by triple backticks\n"
+#                 "    The text generation is based in the documents provided in these sections \n"
+#                 "    delimited by by triple backticks (text_content)\n"
+#                 "    The paragraph belongs at the top level of the hierarchy to a document"
+#                 "    whose description is delimited by triple backticks (doc_decription)\n"
+#                 "    Make sure that the paragraph relates the top level of the document\n"
+#                 "    \n"
+#                 "    The paragraph belongs to a higher paragraph in the hierarchy"
+#                 "    whose description is delimited by triple backticks (above)\n"
+#                 "    Make sure that the paragraph relates with the paragraph in the hierarchy of the document\n"
+#                 "        \n"
+#                 "    The paragraphs comes after previous paragraphs"
+#                 "    whose description is delimited by triple backticks (before)\n"
+#                 "    Make sure that the paragraph relates with previous paragraph without any repetition\n"
+#                 "    \n"
+#                 "    The paragraphs comes before next paragraphs"
+#                 "    whose description is delimited by triple backticks (after)\n"
+#                 "    Make sure that the paragraph prepares the transition to the next paragraph without any repetition\n"
+#                 "    description: ```{description}```"
+#                 "    text_content: ```{text_content}```"
+#                 "    doc_description: ```{doc_description}```"
+#                 "    above: ```{above}```"
+#                 "    before: ```{before}```"
+#                 "    after: ```{after}```")
+#     prompt = PromptTemplate(template=template, input_variables=['description', 'text_content', 'doc_description', 'above', 'before', 'after'])
+#     llm_chain = LLMChain(llm=opensource_llm, prompt=prompt)
+#     response = llm_chain.run({'description': task['description'], 'text_content': text_content, 'doc_description': task['doc_description'],
+#                                 'above': task['above'], 'before': task['before'], 'after': task['after']})
+#     p = response.choices[0].message.content
+def get_private_paragraph(texts, task: {}) -> str:
+    """useful to get a summary in one line from wiki index"""
+    text_content = ""
+    for t in texts:
+        text_content += t + "/n/n"
+    template = (f"\n"
+                f"    Your task consists in generating a paragraph\\n"
+                f"    whose description is delimited by triple backticks: ```{task['description']}```\n"
+                f"\n"
+                f"    The text generation is based in the documents provided in these sections \n"
+                f"    delimited by by triple backticks: ``` {text_content}``` \n"
+                f"    The paragraph belongs at the top level of the hierarchy to a document \\n"
+                f"    whose description is delimited by triple backticks: ``` {task['doc_description']}```\n"
+                f"    Make sure that the paragraph relates the top level of the document\n"
+                f"    \n"
+                f"    The paragraph belongs to a higher paragraph in the hierarchy \\n"
+                f"    whose description is delimited by triple backticks: ``` {task['above']}```\n"
+                f"    Make sure that the paragraph relates with the paragraph in the hierarchy of the document\n"
+                f"        \n"
+                f"    The paragraphs comes after previous paragraphs \\n"
+                f"    whose description is delimited by triple backticks: ``` {task['before']}```\n"
+                f"    Make sure that the paragraph relates with previous paragraph without any repetition\n"
+                f"    \n"
+                f"    The paragraphs comes before next paragraphs \\n"
+                f"    whose description is delimited by triple backticks: ``` {task['after']}```\n"
+                f"    Make sure that the paragraph prepares the transition to the next paragraph without any repetition\n"
+                f"    \n"
+                f"  \n"
+                f"\n"
+                f"    ")
+    llm = openai_llm
+    p = llm.invoke(template)
+    return p
+def summarize_paragraph_v2(prompt : str, title_doc : str = '', title_para : str = ''):
+    max_tokens = 850
+    location_of_the_paragraph = prompt.split(" :")[0]
+    """summarizes the paragraph"""
+    task = (f"Your task consists in summarizing in English the paragraph of the document untitled ```{title_doc}``` located in the ```{location_of_the_paragraph}``` section of the document."
+                f"The paragraph title is ```{title_para}```."
+                f"Your response shall be concise and shall respect the following format:"
+                f"<summary>"
+                f"If you see that the summary that you are creating will not respect ```{max_tokens}``` tokens, find a way to make it shorter.")
+    generation = openai.chat.completions.create(model="gpt-3.5-turbo-16k", messages=[{"role":"system","content":task},{"role":"user","content":prompt}])
+    res = generation.choices[0].message.content
+    print("****************")
+    print(res)
+    print("----")
+    return str(res).strip()

src/llm/llms.py ADDED Viewed

	@@ -0,0 +1,15 @@

+from langchain_openai import OpenAI
+from transformers import AutoModelForCausalLM
+import os
+os.environ["TOKENIZERS_PARALLELISM"] = "true"
+if not "OPENAI_API_KEY" in os.environ:
+    from config_key import OPENAI_API_KEY
+    os.environ['OPENAI_API_KEY'] = OPENAI_API_KEY
+openai_llm = OpenAI(temperature=0, model="gpt-3.5-turbo-instruct")
+# opensource_llm = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-hf") #LAMA MODEL

src/model/block.py ADDED Viewed

	@@ -0,0 +1,49 @@

+class Block:
+    def __init__(self, doc: str = '', title: str = '', content: str = '', content_fr: str = '',
+                 index: str = '', rank: int = 0, level: int = 0, distance: float = 99999):
+        self.doc = doc
+        self.title = title
+        self.title_fr = ""
+        self.content = content
+        self.content_fr = content_fr
+        self.specials = []
+        self.index = index
+        self.rank = rank
+        self.level = level
+        self.distance = distance
+    def to_dict(self) -> {}:
+        block_dict = {'doc': self.doc,
+                      'title': self.title,
+                      'title_fr': self.title_fr,
+                      'content': self.content,
+                      'content_fr': self.content_fr,
+                      'index': self.index,
+                      'rank': self.rank,
+                      'level': self.level,
+                      'distance': self.distance}
+        for i, s in enumerate(self.specials):
+            special_key = 'special_'+str(i)
+            block_dict[special_key] = s
+        block_dict['specials_len'] = len(self.specials)
+        return block_dict
+    def from_dict(self, block_dict: {}):
+        self.doc = block_dict['doc']
+        self.title = block_dict['title']
+        self.title_fr = block_dict['title_fr']
+        self.content = block_dict['content']
+        self.content_fr = block_dict['content_fr']
+        self.index = block_dict['index']
+        self.rank = block_dict['rank']
+        self.level = block_dict['level']
+        self.distance = block_dict['distance']
+        self.specials = []
+        for i in range(block_dict['specials_len']):
+            special_key = 'special_' + str(i)
+            self.specials.append(block_dict[special_key])
+        return self
+    @property
+    def distance_str(self) -> str:
+        return format(self.distance, '.2f')

src/model/container.py ADDED Viewed

	@@ -0,0 +1,143 @@

+from src.model.paragraph import Paragraph
+from src.model.block import Block
+INFINITE = 99999
+class Container:
+    def __init__(self, paragraphs: [Paragraph], title: Paragraph = None, level: int = 0, index: [int] = None,
+                 father=None, id_=0):
+        if index is None:
+            index = []
+        self.level = level
+        self.title = title
+        self.paragraphs = []
+        self.children = []
+        self.index = index
+        self.father = father  # if not father, then the container is at the top of the hierarchy
+        self.id_ = int(str(1) + str(father.id_) + str(id_))
+        if paragraphs:
+            self.paragraphs, self.children = self.create_children(paragraphs, level, index)
+        self.blocks = self.get_blocks()
+        self.normals, self.comments, self.tasks = self.sort_paragraphs()
+    @property
+    def text(self):
+        text = ""
+        if self.title:
+            text = "Titre " + str(self.level) + " : " + self.title.text + '\n'
+        for p in self.paragraphs:
+            text += p.text + '\n'
+        for child in self.children:
+            text += child.text
+        return text
+    @property
+    def text_chunks(self, chunk=500):
+        text_chunks = []
+        text_chunk = ""
+        for p in self.paragraphs:
+            if chunk < len(text_chunk) + len(p.text):
+                text_chunks.append(text_chunk)
+                text_chunk = ""
+            else:
+                text_chunk += " " + p.text
+        if text_chunk and not text_chunk.isspace():
+            text_chunks.append(text_chunk)
+        for child in self.children:
+            text_chunks += child.text_chunks
+        return text_chunks
+    def get_blocks(self):
+        block = Block(level=self.level, index=self.index)
+        if self.title:
+            block.title = self.title.text
+        for p in self.paragraphs:
+            if not p.blank:
+                if p.text.startswith('##### '):
+                    special_action = p.text.lstrip('##### ')
+                    block.specials.append(special_action)
+                else:
+                    block.content += p.text
+        blocks = [block] if block.content or block.specials else []
+        for child in self.children:
+            blocks += child.blocks
+        return blocks
+    def create_children(self, paragraphs: Paragraph, level: int, index: [int]) -> ([Paragraph], []):
+        """
+        creates children containers or directly attached content
+        and returns the list of containers and contents of level+1
+        :return:
+        [Content or Container]
+        """
+        attached_paragraphs = []
+        container_paragraphs = []
+        container_title = None
+        children = []
+        in_children = False
+        child_id = 0
+        level = INFINITE
+        while paragraphs:
+            p = paragraphs.pop(0)
+            if not in_children and not p.is_structure:
+                attached_paragraphs.append(p)
+            else:
+                in_children = True
+                if p.is_structure and p.level <= level:  # if p is higher in hierarchy, then the child is completed
+                    if container_paragraphs or container_title:
+                        if level <= len(index):
+                            index = index[:level]
+                            index[-1] += 1
+                        else:
+                            for i in range(level-len(index)):
+                                index.append(1)
+                        children.append(Container(container_paragraphs, container_title, level, index, self, child_id))
+                        child_id += 1
+                    container_paragraphs = []
+                    container_title = p
+                    level = p.level
+                else:  # p is normal text or strictly lower in hierarchy, then the child continues to grow
+                    container_paragraphs.append(p)
+        if container_paragraphs or container_title:
+            if level <= len(index):
+                index = index[:level]
+                index[-1] += 1
+            else:
+                for i in range(level - len(index)):
+                    index.append(1)
+            children.append(Container(container_paragraphs, container_title, level, index, self, child_id))
+            child_id += 1
+        return attached_paragraphs, children
+    @property
+    def structure(self):
+        self_structure = {str(self.id_): {
+            'index': str(self.id_),
+            'canMove': True,
+            'isFolder': True,
+            'children': [p.id_ for p in self.paragraphs] + [child.id_ for child in self.children],
+            'canRename': True,
+            'data': {},
+            'level': self.level,
+            'rank': self.rank,
+            'title': self.title.text if self.title else 'root'
+        }}
+        paragraphs_structure = [p.structure for p in self.paragraphs]
+        structure = [self_structure] + paragraphs_structure
+        for child in self.children:
+            structure += child.structure
+        return structure
+    def sort_paragraphs(self) -> ([Paragraph], [Paragraph], [Paragraph]):
+        mapping = {'normal': [], 'comment': [], 'task': []}
+        for p in self.paragraphs:
+            mapping(p.type).append(p)
+        return mapping['normal'], mapping['comment'], mapping['task']

src/model/doc.py ADDED Viewed

	@@ -0,0 +1,54 @@

+import docx
+from src.model.container import Container
+from src.model.paragraph import Paragraph
+class Doc:
+    def __init__(self, path='', id_=None):
+        self.xdoc = docx.Document(path)
+        self.title = path.split('/')[-1]
+        self.id_ = id(self)
+        self.path = path
+        paragraphs = [Paragraph(xp, self.id_, i) for (i, xp) in enumerate(self.xdoc.paragraphs)]
+        self.container = Container(paragraphs, father=self, level=0)
+        self.blocks = self.get_blocks()
+        self.tasks = [c.get_task(self.container.one_liner) for c in self.container.containers if c.task]
+    @property
+    def structure(self):
+        return self.container.structure
+    def get_blocks(self):
+        def from_list_to_str(index_list):
+            index_str = str(index_list[0])
+            for el in index_list[1:]:
+                index_str += '.' + str(el)
+            return index_str
+        blocks = self.container.blocks
+        for block in blocks:
+            block.doc = self.title
+            if block.level == 0:
+                blocks.remove(block)
+            block.index = from_list_to_str(block.index)
+        return blocks
+"""
+    current_level = len(current_index)
+    if 0 < block.level:
+        if block.level == current_level:
+            current_index[-1] += 1
+        elif current_level < block.level:
+            current_index.append(1)
+        elif block.level < current_level:
+            current_index = current_index[:block.level]
+            current_index[-1] += 1
+        block.index = from_list_to_str(current_index)
+    else:
+        block.index = "0"
+"""

src/model/paragraph.py ADDED Viewed

	@@ -0,0 +1,50 @@

+import string
+INFINITE = 10000
+class Paragraph:
+    def __init__(self, xparagraph, doc_id: int, id_: int):
+        self.xparagraph = xparagraph
+        self.id_ = int(str(2) + str(doc_id) + str(id_))
+        self.level = self.get_level_from_name()
+        self.is_structure = self.level < INFINITE
+        self.text = self.xparagraph.text
+        self.type = self.get_type()
+    @property
+    def structure(self):
+        structure = {str(self.id_): {
+            'index': str(self.id_),
+            'canMove': True,
+            'isFolder': False,
+            'children': [],
+            'title': self.text,
+            'canRename': True,
+            'data': {},
+            'level': self.level,
+        }}
+        return structure
+    @property
+    def blank(self):
+        """
+        checks if the paragraph is blank: i.e. it brings some signal (it may otherwise be ignored)
+        """
+        text = self.text.replace('\n', '')
+        return set(text).isdisjoint(string.ascii_letters)
+    def get_level_from_name(self) -> int:
+        style_name = self.xparagraph.style.name
+        level = INFINITE
+        if '.Titre' in style_name:
+            suffix = style_name[-1]
+            try:
+                level = int(suffix)
+            except:
+                pass
+        return level

src/reader/reader_for_requirements.py ADDED Viewed

	@@ -0,0 +1,143 @@

+import docx
+import os
+from docx.document import Document as _Document
+from src.domain.requirements_paragraphs import Requirement_Paragraph
+from docx.oxml.text.paragraph import CT_P
+from docx.oxml.table import CT_Tbl
+from docx.table import _Cell, Table
+from docx.text.paragraph import Paragraph
+class WordReader:
+    def __init__(self, path):
+        self.path = path
+        self.paragraphs = self.get_paragraphs()
+    def iter_block_items(self, parent):
+        if isinstance(parent, _Document):
+            parent_elm = parent.element.body
+        elif isinstance(parent, _Cell):
+            parent_elm = parent._tc
+        else:
+            raise ValueError("Unsupported parent type")
+        for child in parent_elm.iterchildren():
+            if isinstance(child, CT_P):
+                yield Paragraph(child, parent)
+            elif isinstance(child, CT_Tbl):
+                yield Table(child, parent)
+    def get_paragraphs(self):
+        if not os.path.exists(self.path):
+            raise FileNotFoundError(f"The file {self.path} does not exist.")
+        try:
+            doc = docx.Document(self.path)
+            paragraph_objects = []
+            paragraph_id = 0
+            page_id = 1  # Example page ID
+            total_characters = 0
+            for block in self.iter_block_items(doc):
+                if isinstance(block, Paragraph):
+                    paragraph_info = self.extract_paragraph_info(block)
+                    if paragraph_info:  # Only append if paragraph is not empty
+                        page_id = self.estimate_page_number(total_characters)
+                        p_obj = Requirement_Paragraph(text=paragraph_info['text'], font_style=paragraph_info['style'], id_=paragraph_id, page_id=page_id)
+                        #print(f"Found paragraph: {paragraph_info['style']}...")  # DEBUG
+                        paragraph_objects.append(p_obj)
+                        paragraph_id += 1
+                        total_characters += len(paragraph_info['text'])
+                elif isinstance(block, Table):
+                    table_paragraph, table_style = self.table_to_paragraph(block)
+                    if table_paragraph.strip():  # Check if table paragraph is not empty
+                        #print(f"Found table. Predominant style: {table_style}")  # DEBUG
+                        p_obj = Requirement_Paragraph(text=table_paragraph, font_style=table_style, id_=paragraph_id, page_id=page_id)
+                        paragraph_objects.append(p_obj)
+                        paragraph_id += 1
+            return paragraph_objects
+        except Exception as e:
+            raise ValueError(f"Error reading the .docx file. Original error: {str(e)}")
+    def determine_predominant_style(self, styles):
+        # Count the occurrences of each style
+        style_counts = {}
+        for style in styles:
+            if style in style_counts:
+                style_counts[style] += 1
+            else:
+                style_counts[style] = 1
+        # Find the style with the highest count
+        predominant_style = max(style_counts, key=style_counts.get, default="None")
+        return predominant_style
+    def estimate_page_number(self, total_characters):
+        avg_chars_per_page = 2000
+        return total_characters // avg_chars_per_page + 1
+    def extract_paragraph_info(self, paragraph):
+        # Check if paragraph is empty
+        if not paragraph.text.strip():
+            return None  # Return None for empty paragraphs
+        paragraph_style = paragraph.style.name if paragraph.style else 'None'
+        runs = []
+        for run in paragraph.runs:
+            run_details = {
+                'text': run.text,
+                'font_name': run.font.name,
+                'font_size': run.font.size.pt if run.font.size else None,
+                'bold': run.bold,
+                'italic': run.italic,
+                'underline': run.underline
+            }
+            runs.append(run_details)
+        return {
+            'text': paragraph.text,
+            'style': paragraph_style,
+            'runs': runs
+        }
+    def table_to_paragraph(self, table):
+        table_text = ""
+        table_styles = set()
+        for row in table.rows:
+            for cell in row.cells:
+                cell_text = ""
+                for paragraph in cell.paragraphs:
+                    paragraph_style = paragraph.style.name if paragraph.style else 'None'
+                    table_styles.add(paragraph_style)
+                    for run in paragraph.runs:
+                        cell_text += run.text
+                    cell_text += " "
+                table_text += cell_text.strip() + " | "  # Add a separator for cells
+            table_text = table_text.strip() + "\n"  # Add a newline for rows
+        predominant_style = self.determine_predominant_style(table_styles)
+        return table_text.strip(), predominant_style
+    def print_paragraphs_and_tables(self):
+        try:
+            print("start")
+            doc_items = self.get_paragraphs()
+            for item in doc_items:
+                if 'paragraph' in item:
+                    print("Paragraph:", item['paragraph']['text'])
+                elif 'table' in item:
+                    print("Table:")
+                    for row in item['table']:
+                        for cell in row:
+                            for paragraph in cell:
+                                print("   Cell Paragraph:", paragraph['text'])
+                print('-' * 40)  # separator for clarity
+        except Exception as e:
+            print(f"Error: {str(e)}")

src/retriever/retriever.py ADDED Viewed

	@@ -0,0 +1,198 @@

+from src.domain.block import Block
+from src.domain.doc import Doc
+from src.llm.llm_tools import summarize_paragraph_v2
+import gradio as gr
+class Retriever:
+    """
+    The Retriever class is responsible for processing and summarizing documents.
+    It supports operations such as summarizing individual blocks of text, organizing
+    text into a hierarchy, and conducting similarity searches within a collection of documents.
+    Attributes:
+        collection: A collection object where summaries and metadata are stored.
+        llmagent: An instance of LlmAgent used for generating summaries.
+    """
+    def __init__(self, doc: Doc = None, collection=None):
+        """
+        Initializes the Retriever class with a document, a collection, and a language model agent.
+        Args:
+            doc: A document object containing text blocks to be processed.
+            collection: A collection object to store summaries and metadata.
+            llmagent: An instance of LlmAgent for generating summaries.
+        """
+        if doc is not None:
+            self.collection = collection
+            blocks_good_format = doc.blocks_requirements  # List of Block objects from the document.
+            gr.Info("Please wait while the database is being created")
+            # Process each block in the document.
+            for block in blocks_good_format:
+                print(f"block index : {block.index}")
+                # If block content is longer than 4500 characters, split and summarize separately.
+                if len(block.content) > 4500:
+                    new_blocks = block.separate_1_block_in_n(max_size=4500)
+                    for new_block in new_blocks:
+                        summary = summarize_paragraph_v2(prompt=new_block.content, title_doc=doc.title, title_para=block.title)
+                        if "<summary>" in summary:
+                            summary = summary.split("<summary>")[1]
+                        self.collection.add(
+                            documents=[summary],
+                            ids=[new_block.index],
+                            metadatas=[new_block.to_dict()]
+                        )
+                else:
+                    # Summarize the block as is if it's shorter than 4500 characters.
+                    summary = summarize_paragraph_v2(prompt=block.content, title_doc=doc.title, title_para=block.title)
+                    if "<summary>" in summary:
+                        summary = summary.split("<summary>")[1]
+                    self.collection.add(
+                        documents=[summary],
+                        ids=[block.index],
+                        metadatas=[block.to_dict()]
+                    )
+            # Summarize blocks by their hierarchy level after individual processing.
+            self.summarize_by_hierarchy(blocks_good_format, doc.title)
+            gr.Info(f"The collection {collection.name} has been added to the database")
+        else:
+            self.collection = collection
+    def summarize_by_hierarchy(self, blocks, doc_title):
+        """
+        Summarizes blocks based on their hierarchical levels.
+        Args:
+            blocks: A list of Block objects to be summarized.
+            llmagent: An instance of LlmAgent used for generating summaries.
+            doc_title: The title of the document being processed.
+        """
+        hierarchy = self.create_hierarchy(blocks)
+        deepest_blocks_indices = self.find_deepest_blocks(blocks)
+        print("Hierarchy levels identified:", hierarchy.keys())
+        print("Deepest block indices:", [block.index for block in deepest_blocks_indices])
+        for level, level_blocks in hierarchy.items():
+            # Summarize only if the level has more than one block and contains deepest blocks.
+            print(level)
+            print(level_blocks)
+            print(deepest_blocks_indices)
+            print(len(level_blocks))
+            if len(level_blocks) > 1 and any(block.index in deepest_blocks_indices for block in level_blocks):
+                level_content = " ".join(block.content for block in level_blocks)
+                print(f"Summarizing level {level} with content from blocks: {[block.index for block in level_blocks]}")
+                level_summary = summarize_paragraph_v2(prompt=level_content, title_doc=doc_title, title_para=f"Summary of section : {level}")
+                level_summary_id = f"summary_{level}"
+        # Initialize a new Block object with properties from the first block
+                first_block = level_blocks[0]
+                combined_block = Block(
+                    doc=first_block.doc,
+                    title=first_block.title,
+                    content=" ".join(block.content for block in level_blocks),
+                    index=first_block.index,
+                    rank=first_block.rank,
+                    level=first_block.level,
+                    distance=first_block.distance
+                )
+                self.collection.add(
+                    documents=[level_summary],
+                    ids=[level_summary_id],
+                    metadatas=[combined_block.to_dict()]  # Pass the combined block metadata
+                )
+                 # List of dictionaries, each representing a block
+                print(f"Added summary for level {level} to the collection.")
+            else:
+                # Skip summarization for levels that are deepest blocks.
+                print(f"Skipping level {level} as it is deepest blocks.")
+    def create_hierarchy(self, blocks):
+        """
+        Creates a hierarchical structure of the blocks based on their indices.
+        Args:
+            blocks: A list of Block objects to be organized into a hierarchy.
+        Returns:
+            A dictionary representing the hierarchy of blocks.
+        """
+        hierarchy = {}
+        for block in blocks:
+            levels = self.extract_levels(block.index)
+            for level in levels:
+                hierarchy.setdefault(level, []).append(block)
+        return hierarchy
+    def extract_levels(self, index):
+        """
+        Extracts all hierarchical levels from a block index.
+        Args:
+            index: The index string of a block.
+        Returns:
+            A list of levels extracted from the index.
+        """
+        # Splits the index string and creates a list of hierarchical levels.
+        parts = index.split('.')
+        levels = ['.'.join(parts[:i]) for i in range(1, len(parts) + 1)]
+        return levels
+    def find_deepest_blocks(self, blocks):
+        """
+        Identifies the deepest blocks in the hierarchy.
+        Args:
+            blocks: A list of Block objects.
+        Returns:
+            A set of indices representing the deepest blocks.
+        """
+        deepest_blocks = set()
+        block_indices = {block.index for block in blocks}
+        for block in blocks:
+            # A block is considered deepest if no other block's index extends it.
+            if not any(b_index != block.index and b_index.startswith(block.index + '.') for b_index in block_indices):
+                deepest_blocks.add(block.index)
+        return deepest_blocks
+    def similarity_search(self, queries: str) -> {}:
+        """
+        Performs a similarity search in the collection based on given queries.
+        Args:
+            queries: A string or list of strings representing the query or queries.
+        Returns:
+            A list of Block objects that are similar to the given queries.
+        """
+        # Query the collection and retrieve blocks based on similarity.
+        res = self.collection.query(query_texts=queries, n_results=5)
+        block_dict_sources = res['metadatas'][0]
+        distances = res['distances'][0]
+        blocks = []
+        for bd, d in zip(block_dict_sources, distances):
+            b = Block().from_dict(bd)
+            b.distance = d
+            blocks.append(b)
+        return blocks

src/tools/doc_tools.py ADDED Viewed

	@@ -0,0 +1,73 @@

+from PIL import Image
+import os
+import docx
+import docx.oxml.ns as ns
+def get_positions(xml_file):
+    i = 0
+    width = xml_file.split('cx="')
+    height = xml_file.split('cy="')
+    while(i < len(width)):
+        temp = width[i].split('"')[0]
+        if(temp.isnumeric()):
+                width = temp
+                break
+        else:
+            i+=1
+    i = 0
+    while(i < len(height)):
+        temp = height[i].split('"')[0]
+        if(temp.isnumeric()):
+                height = temp
+                break
+        else:
+            i+=1
+    return width, height
+def convert_to_png(imageslist):
+    for image in imageslist:
+        if(image.endswith('.png')):
+            continue
+        im = Image.open(image)
+        im.save(image.split('.')[0]+'.png')
+        imageslist[imageslist.index(image)] = image.split('.')[0]+'.png'
+        os.remove(image)
+    return imageslist
+def get_difference_with_template(styles_used_in_doc, template):
+    styles_used_in_template = template.styles.names
+    different_styles = []
+    for style in styles_used_in_doc:
+        if style.name not in styles_used_in_template:
+            if style.name not in [s.name for s in different_styles]:
+                different_styles.append(style)
+    return different_styles
+def update_table_of_contents(doc):
+    # Find the settings element in the document
+    settings_element = doc.settings.element
+    # Create an "updateFields" element and set its "val" attribute to "true"
+    update_fields_element = docx.oxml.shared.OxmlElement('w:updateFields')
+    update_fields_element.set(ns.qn('w:val'), 'true')
+    # Add the "updateFields" element to the settings element
+    settings_element.append(update_fields_element)
+def left_part_until_number(s):
+    for i, char in enumerate(s):
+        if char.isdigit():
+            return s[:i]
+    return None
+def get_title(path) -> str:
+    if '/' not in path and '\\' not in path:
+        res = path
+    if '/' in path:
+        res = path.split('/')[-1]
+    if '\\' in path:
+        res = path.split('\\')[-1]
+    return res

src/tools/index_creation.py ADDED Viewed

	@@ -0,0 +1,72 @@

+from src.domain.container import Container
+INFINITE = 99999
+def create_dic_levels(c:Container,dict_of_levels : dict = {}):
+    if c.level == 0:
+        dict_of_levels[c.level] = [0]
+    for child in c.children:
+        if child.level not in dict_of_levels:
+            dict_of_levels[child.level] = [1 for _ in range(child.level)]
+        create_dic_levels(child, dict_of_levels)
+    if INFINITE in dict_of_levels.keys():
+        dict_of_levels[INFINITE] = [1]
+    return dict_of_levels
+def create_good_indexes(c:Container, dict_of_levels : dict):
+    actual_level = c.level
+    c.index = dict_of_levels[actual_level].copy()
+    actual_len = len(dict_of_levels[actual_level])
+    temp_update = dict_of_levels[actual_level][-1]
+    dict_of_levels[actual_level][-1] += 1
+    for i in dict_of_levels.values():
+        if len(i) > actual_len:
+            i[actual_len - 1] = temp_update
+    for child in c.children:
+        c_lvl = child.level
+        for i in dict_of_levels.values():
+            if len(i) > c_lvl:
+                i[c_lvl:] = [1 for _ in range(len(i[c_lvl:]))]
+        create_good_indexes(child, dict_of_levels)  # Apply the function recursively to all children
+def create_good_indexes_not_ordered_titles(c:Container, dict_of_levels : dict):
+    actual_level = c.level
+    c.index = dict_of_levels[actual_level].copy()
+    actual_len = len(dict_of_levels[actual_level])
+    temp_update = dict_of_levels[actual_level][-1]
+    dict_of_levels[actual_level][-1] += 1
+    for i in dict_of_levels.values():
+        if len(i) > actual_len:
+            i[actual_len - 1] = temp_update
+    for child in c.children:
+        c_lvl = child.level
+        for i in dict_of_levels.values():
+            if len(i) > c_lvl:
+                i[c_lvl:] = [1 for _ in range(len(i[c_lvl:]))]
+        create_good_indexes(child, dict_of_levels)  # Apply the function recursively to all children
+def set_good_block_indexes(c:Container):
+    for i in c.containers:
+        for b in i.blocks:
+            for j in range(len(i.index)):
+                if i.index[j] == 0:
+                    i.index[j] = 1
+            b.index = i.index
+def set_indexes(c:Container, path : str):
+    if "temp/generated_files" in path or "data/templates" in path:
+        return
+    dict_levels = create_dic_levels(c)
+    myKeys = list(dict_levels.keys())
+    myKeys.sort()
+    dict_levels = {key: dict_levels[key] for key in myKeys}
+    if c.children and c.children[0] and (c.children[0].level > min(list(dict_levels.keys())[1:])):
+        c.children[0].level = min(list(dict_levels.keys())[1:])
+        create_good_indexes_not_ordered_titles(c, dict_levels)
+    else:
+        create_good_indexes(c, dict_levels)
+    set_good_block_indexes(c)

src/tools/list_tool.py ADDED Viewed

	@@ -0,0 +1,17 @@

+def keep_last_occurrences(lst, key):
+    seen = set()
+    for idx in reversed(range(len(lst))):
+        item = lst[idx]
+        k = key(item)
+        if k in seen:
+            del lst[idx]
+        else:
+            seen.add(k)
+    return lst
+def delete_duplicate_styles(list_styles_to_update, different_styles):
+    for s in list_styles_to_update:
+        for d in different_styles:
+            if s["doc"].name == d["doc"].name and s["list_style"].split(" : ")[0] == d["style"].name:
+                different_styles.remove(d)
+    return different_styles

src/tools/paragraph_tools.py ADDED Viewed

	@@ -0,0 +1,45 @@

+from xml.etree import ElementTree as ET
+def find_list_indentation_level(para, doc):
+    namespace = {"w": "http://schemas.openxmlformats.org/wordprocessingml/2006/main"}
+    xml_para = para._p.xml
+    try:
+        xml_numbering = doc.xdoc._part.numbering_part.element.xml
+    except:
+        return False, 0
+    root_para = ET.fromstring(xml_para)
+    root_numbering = ET.fromstring(xml_numbering)
+    abstract_num_reference = []
+    for item in root_numbering:
+        if item.tag == "{http://schemas.openxmlformats.org/wordprocessingml/2006/main}num":
+            abstract_num_reference.append(item)
+    is_numPr = root_para.find(".//w:numPr", namespaces=namespace)
+    is_style = root_para.find(".//w:pStyle", namespaces=namespace)
+    is_numId = root_para.find(".//w:numId", namespaces=namespace)
+    is_lvl = root_para.find(".//w:ilvl", namespaces=namespace)
+    if is_numPr != None:
+        if is_numId != None and is_lvl != None:
+            return True, int(is_lvl.attrib["{http://schemas.openxmlformats.org/wordprocessingml/2006/main}val"]) + 1
+        elif is_numId != None and is_lvl == None:
+            numId = int(is_numId.attrib["{http://schemas.openxmlformats.org/wordprocessingml/2006/main}val"])
+            is_abstractNumId = [item for item in abstract_num_reference if item.attrib["{http://schemas.openxmlformats.org/wordprocessingml/2006/main}numId"] == str(numId)][0]
+            numID_reference = is_abstractNumId.find(".//w:abstractNumId", namespaces=namespace).attrib["{http://schemas.openxmlformats.org/wordprocessingml/2006/main}val"]
+            real_numID = root_numbering.find(f".//w:abstractNum[@w:abstractNumId='{int(numID_reference)}']", namespaces=namespace)
+            if style_Id == None:
+                return False, 0
+            style_Id = is_style.attrib["{http://schemas.openxmlformats.org/wordprocessingml/2006/main}val"]
+            is_style_in_numbering = root_numbering.find(f".//w:pStyle[@w:val='{style_Id}']...", namespaces=namespace)
+            lvl = real_numID.find(".//w:ilvl", namespaces=namespace)
+            return True, int(lvl.attrib["{http://schemas.openxmlformats.org/wordprocessingml/2006/main}ilvl"]) + 1
+    else:
+        if is_style == None:
+            return False, 0
+        else:
+            #check if there is a style element in the root_numbering
+            style_Id = is_style.attrib["{http://schemas.openxmlformats.org/wordprocessingml/2006/main}val"]
+            is_style_in_numbering = root_numbering.find(f".//w:pStyle[@w:val='{style_Id}']...", namespaces=namespace)
+            if is_style_in_numbering == None:
+                return False, 0
+            else:
+                ilvl = is_style_in_numbering.attrib["{http://schemas.openxmlformats.org/wordprocessingml/2006/main}ilvl"]
+                return True, int(ilvl) + 1

src/tools/pretty_print.py ADDED Viewed

	@@ -0,0 +1,12 @@

+from src.domain.block import Block
+from src.domain.requirements_paragraphs import Requirement_Paragraph
+def pretty_print_block_and_indexes(b : [Block]):
+    for block in b:
+        print(f"{block.index} {block.title if block.title else '___NO TITLE__'}")
+        print(f"----------------------------------")
+def pretty_print_paragraphs(para : [Requirement_Paragraph]):
+    for p in para:
+        print(f"{p.level} --> {p.font_style} : {p.text}")
+        print("-------------------")

src/tools/semantic_db.py ADDED Viewed

	@@ -0,0 +1,70 @@

+import chromadb
+from datetime import datetime
+chroma_client = chromadb.Client()
+def get_or_create_collection(coll_name: str):
+    date = coll_name[:6]
+    coll = chroma_client.get_or_create_collection(name=coll_name, metadata={"date": date})
+    return coll
+def get_collection(coll_name: str):
+    coll = chroma_client.get_collection(name=coll_name)
+    return coll
+def reset_collection(coll_name: str):
+    coll = chroma_client.get_collection(name=coll_name)
+    coll.delete()
+    return coll
+def delete_old_collections(old=2):
+    collections = chroma_client.list_collections()
+    current_hour = int(datetime.now().strftime("%m%d%H"))
+    for coll in collections:
+        coll_hour = int(coll.metadata['date'])
+        if coll_hour < current_hour - old:
+            chroma_client.delete_collection(coll.name)
+def add_texts_to_collection(coll_name: str, texts: [str], file: str, source: str):
+    """
+    add texts to a collection : texts originate all from the same file
+    """
+    coll = chroma_client.get_collection(name=coll_name)
+    filenames = [{file: 1, 'source': source} for _ in texts]
+    ids = [file+'-'+str(i) for i in range(len(texts))]
+    try:
+        coll.delete(ids=ids)
+        coll.add(documents=texts, metadatas=filenames, ids=ids)
+    except:
+        print(f"exception raised for collection :{coll_name}, texts: {texts} from file {file} and source {source}")
+def delete_collection(coll_name: str):
+    chroma_client.delete_collection(name=coll_name)
+def list_collections():
+    return chroma_client.list_collections()
+def query_collection(coll_name: str, query: str,  from_files: [str], n_results: int = 4):
+    assert 0 < len(from_files)
+    coll = chroma_client.get_collection(name=coll_name)
+    where_ = [{file: 1} for file in from_files]
+    where_ = where_[0] if len(where_) == 1 else {'$or': where_}
+    n_results_ = min(n_results, coll.count())
+    ans = ""
+    try:
+        ans = coll.query(query_texts=query, n_results=n_results_, where=where_)
+    except:
+        print(f"exception raised at query collection for collection {coll_name} and query {query} from files "
+              f"{from_files}")
+    return ans

src/tools/wiki.py ADDED Viewed

	@@ -0,0 +1,61 @@

+from typing import Union
+from langchain.docstore.base import Docstore
+from langchain.docstore.document import Document
+class Wiki(Docstore):
+    """
+    Wrapper around wikipedia API.
+    """
+    def __init__(self) -> None:
+        """Check that wikipedia package is installed."""
+        try:
+            import wikipedia  # noqa: F401
+        except ImportError:
+            raise ValueError(
+                "Could not import wikipedia python package. "
+                "Please install it with `pip install wikipedia`."
+            )
+    @staticmethod
+    def fetch(searched_page: str) -> Union[str, Document]:
+        """
+            Try to fetch for wiki page.
+            If page exists, return the page summary, and a PageWithLookups object.
+            If page does not exist, return similar entries.
+        """
+        import wikipedia
+        try:
+            # wikipedia.set_lang("fr")
+            page_content = wikipedia.page(searched_page).content
+            url = wikipedia.page(searched_page).url
+            result: Union[str, Document] = Document(
+                page_content=page_content, metadata={"page": url}
+            )
+        except wikipedia.PageError:
+            result = f"Could not find [{searched_page}]. Similar: {wikipedia.search(searched_page)}"
+        except wikipedia.DisambiguationError:
+            result = f"Could not find [{searched_page}]. Similar: {wikipedia.search(searched_page)}"
+        return result
+    def search(searched_context: str) -> [str]:
+        """
+        Finds wiki page title in relation with the given context
+        """
+        import wikipedia
+        try:
+            # wikipedia.set_lang("fr")
+            page_title_list = wikipedia.search(searched_context)
+            result = page_title_list
+        except wikipedia.PageError:
+            result = f"Could not find [{searched_context}]."
+        return result

src/view/log_msg.py ADDED Viewed

	@@ -0,0 +1,47 @@

+from typing import Dict
+from config import config
+def create_msg_from(logs: [Dict], docs) -> str:
+    log_messages = []
+    log_msg = config['log_msg']
+    docs_seen = []
+    msg = ''
+    for doc in docs:
+        for log in logs:
+            if doc.name in log.keys():
+                log = log[doc.name]
+                if 'options_applied' in log.keys():
+                    msg += log_msg['options_applied']
+                    for option in log['options_applied']:
+                        msg += "  -   " + option + "\n"
+                if 'suppressed_styles' in log.keys():
+                    if log['suppressed_styles']:
+                        msg += log_msg['suppressed_styles']
+                        for style_name in log['suppressed_styles']:
+                            msg += "  -   " + style_name + "\n"
+                    if log['modified_styles']:
+                        msg += log_msg['modified_styles']
+                        for style, log_s in log['modified_styles']:
+                            msg += log_msg['modified_style'] + style + "\n"
+                            for modif, _ in log_s:
+                                msg += log_msg[modif] + ' '
+                            msg += '\n'
+                    if log['added_styles']:
+                        msg += log_msg['added_styles']
+                        for style_name in log['added_styles']:
+                            msg += "  -   " + style_name + "\n"
+                if 'style_mapping' in log.keys():
+                    msg = log['style_mapping']
+                if 'list_mapping' in log.keys():
+                    msg = log['list_mapping']
+                if msg:
+                    if doc not in docs_seen:
+                        msg = log_msg['document'] + doc.name + '\n' + msg
+                        docs_seen.append(doc)
+                    log_messages.append(msg)
+                    msg = ''
+    log_messages_str = '\n'.join(log_messages)
+    return log_messages_str

src/view/style_components.py ADDED Viewed

	@@ -0,0 +1,9 @@

+import gradio as gr
+def input_files_fn1(input_files_):
+    update_ = {
+        output_files_comp: gr.update(visible=True)
+    } if input_files_ else {}
+    return update_

src/view/test_view.py ADDED Viewed

	@@ -0,0 +1,34 @@

+import gradio as gr
+with gr.Blocks() as test:
+    list_2 = ["choix21", "choix 22", "et choix 23"]
+    with gr.Row():
+        with gr.Accordion("See Details") as grac:
+            gr.Markdown("lorem ipsum")
+    hide_btn = gr.Button("hide")
+    show_btn = gr.Button("show")
+    def hide_fn():
+        update_ = {
+            grac: gr.update(open=False)
+        }
+        return update_
+    def show_fn():
+        update_ = {
+            grac: gr.update(open=True)
+        }
+        return update_
+    hide_btn.click(hide_fn,
+              inputs=[],
+              outputs=[grac])
+    show_btn.click(show_fn,
+              inputs=[],
+              outputs=[grac])
+if __name__ == "__main__":
+    test.launch()

src/view/view.py ADDED Viewed

	@@ -0,0 +1,533 @@

+import gradio as gr
+from typing import Dict
+import asyncio
+import os
+from src.control.controller import Controller
+from Levenshtein import distance
+from src.tools.list_tool import delete_duplicate_styles
+def run(config: Dict, controller: Controller):
+    """
+    =====================================================
+    Global variables
+    ================
+    """
+    controller.clear_docs()
+    title = "<h1 style=text-align:center;display:block;font-size:4.5em;color:#08a2d2;font-weight:bold;margin-top:4%;padding-bottom:1%>GenProp</h1>"
+    with gr.Blocks(theme=gr.themes.Soft(primary_hue=gr.themes.colors.blue, secondary_hue=gr.themes.colors.orange)) as formatdoc:
+        gr.Markdown(title)
+        gr.Markdown("<p style=color:#08a2d2;font-size:1.5em;padding-bottom:2%;text-align:center>Par Hexamind</p>")
+        gr.Markdown("")
+        with gr.Row():
+            with gr.Column():
+                pass
+            with gr.Column(scale=10):
+                """
+                =====================================================
+                Input and style components
+                ==========================
+                """
+                gr.Markdown("<p style=font-size:1em;>Vous êtes chargé de produire une proposition commerciale</p>")
+                with gr.Accordion("Charger votre proposition", open=True) as input_acc:
+                    input_files_comp = gr.File(file_count="multiple", file_types=[".docx"], label="Document")
+                with gr.Accordion("Appliquer les styles", open=False) as style_acc:
+                    templates_radio = gr.Radio(
+                        label="Templates",
+                        choices=config['templates'],
+                        value=config['templates'][config['default_template_index']],
+                    )
+                    with gr.Row():
+                        options_btn = gr.CheckboxGroup(choices=config['options'],
+                                                    label="Options",
+                                                    interactive=True)
+                    with gr.Accordion("Mapper les styles de liste", open=False) \
+                            as list_acc:
+                        with gr.Column(scale=2):
+                            list_style_comps = [gr.Dropdown(visible=False, interactive=True)
+                                                for _ in range(config['max_styles'])]
+                    with gr.Accordion("Mapper les autres styles non présents dans le template", open=False) \
+                            as newstyles_acc:
+                        with gr.Column(scale=2):
+                            newstyle_comps = [gr.Dropdown(visible=False, interactive=True)
+                                              for _ in range(config['max_styles'])]
+                    log_comp = gr.Textbox(label="Journal des modifications", visible=False)
+                    output_styles_files_comp = gr.File(file_count="multiple", file_types=[".docx"], visible=False)
+                    with gr.Row():
+                        run_style_btn = gr.Button("Appliquer le template et les modifications de style", visible=False)
+                        clear_style_btn = gr.Button("Annuler les modifications de style", visible=False)
+                """
+                ===============================================
+                Generation components
+                ======================
+                """
+                with gr.Accordion("Compléter automatiquement la proposition", open=False) as gen_acc:
+                    generate_option_btn = gr.Radio(
+                        label="Automatically generate a draft based on your own database",
+                        choices=["Auto generation", "No generation"],
+                        value="No generation",
+                        interactive=True,
+                        visible=False,
+                    )
+                    db_list_comp = gr.CheckboxGroup(
+                        label="Base de connaissance",
+                        info="Ces documents constituent la source de référence. Désélectionner pour qu'ils ne soient "
+                             "pas pris en compte lors de la génération automatiqueF",
+                        visible=True,
+                        interactive=True,
+                    )
+                    db_reset_btn = gr.Button("Effacer la base de connaissance", visible=False, size="sm") \
+                    with gr.Column(visible=True):
+                        gr.Markdown("<p style=font-size:1em;text-align:center;>A des fins de démonstrations, la base de connaissance est alimentée depuis Wikipedia</p>")
+                        wiki_fetch_btn = gr.Button("Rechercher les pages Wikipedia", visible=True, size="sm")
+                        wiki_list_comp = gr.CheckboxGroup(
+                            label="Sélectionner les pages à ajouter dans la base de connaissance",
+                            visible=False,
+                            interactive=True,
+                        )
+                        with gr.Column():
+                            wiki_add_to_db_btn = \
+                                gr.Button("Ajouter les documents sélectionnés à la base de connaissance",
+                                        visible=False, size="sm")
+                            # wiki_clear_btn = gr.Button("Effacer les choix de documents", visible=False, size="sm") \
+                        # with gr.Tab("Depuis le disque local (en cours de développement)"):
+                        #     my_files_list_comp = gr.Files(
+                        #         label="Charger ses documents",
+                        #         visible=True,
+                        #     )
+                        #     my_files_add_to_db_btn = gr.Button("Add files to sources", visible=False, size="sm")
+                        add_close_btn = gr.Button("Close", visible=False, size="sm")
+                    with gr.Row():
+                        db_add_doc_btn = gr.Button("Ajouter de nouveaux documents", visible=False, size="sm")\
+                    output_files_comp = gr.Files(file_count="multiple", visible=False)
+                    generate_btn = gr.Button("Générer", interactive=True)
+                    clear_btn = gr.Button('Nettoyer', visible=False)
+                    rerun_btn = gr.Button('Relancer', visible=False)
+                """
+                ===============================================
+                Verification requirements components
+                ======================
+                """
+                with gr.Accordion("Générer la réponse aux exigences (en cours de développement)", open=False, visible=True) as exigences_acc:
+                    input_csv_comp = gr.File(file_count="single", file_types=[".csv", "xlsx"], visible=True, label="Fichiers d'exigences (csv, xlsx only)")
+                    with gr.Row():
+                        verif_btn = gr.Button("Générer la réponse aux exigences (en cours de développement)", visible=False)
+                    output_csv_comp = gr.File(file_count="single", file_types=[".csv", "xlsx"], visible=False)
+                gr.Markdown("")
+                gr.Markdown("")
+                gr.Markdown("<p style=font-size:1em;>Vous êtes administrateur de GenProp</p>")
+                with gr.Accordion("Gérer les templates", open=False) as gestions_templates_acc:
+                    templates_radio_modif = gr.Radio(
+                        interactive=True,
+                        label="Templates",
+                        choices=config['templates'],
+                        value=config['templates'][config['default_template_index']],
+                    )
+                    with gr.Row():
+                        add_template_btn = gr.UploadButton("Ajouter un template",file_count="single", file_types=[".docx"])
+                        delete_curr_template_btn = gr.Button("Supprimer le template sélectionné")
+                with gr.Accordion("Gérer la base de connaissances (en cours de développement)", open=False):
+                    pass
+            with gr.Column():
+                pass
+        """
+        ===================================================
+        state variables
+        ===============
+        """
+        wiki_source_var: [str] = gr.State([])  # list of wikipage titles of interest for the input text tasks
+        wiki_db_var: [str] = gr.State([])  # list of wiki document titles in the db (as seen from the UI)
+        my_files_db_var: [str] = gr.State([])  # list of titles of the files uploaded in the db (as seen from the UI)
+        db_collection_var: str = gr.State("-1")  # name of the collection of documents sources in the db  # list of styles to modify
+        """
+        ===================================================
+        Input and styles functions and listeners
+        ========================================
+        """
+        def input_csv_fn(input_csv_):
+            if not input_csv_.name.endswith('.csv') and not input_csv_.name.endswith('.xlsx'):
+                raise gr.Error(f'File {input_csv_.name} is not a csv or xlsx file, please upload only csv or xlsx files')
+            else:
+                controller.set_input_csv(input_csv_)
+                update_ = {
+                    verif_btn: gr.update(visible=True),
+                }
+                return update_
+        input_csv_comp.upload(input_csv_fn,
+                                inputs=[input_csv_comp],
+                                outputs=[verif_btn],
+                                )
+        def input_files_upload_fn(input_files_):
+            for files in input_files_:
+                if(not files.name.endswith('.docx')):
+                    raise gr.Error(f'File {files.name} is not a docx file, please upload only docx files')
+                else:
+                    continue
+            controller.clear_docs()
+            controller.copy_docs(input_files_)
+            update_ = {
+                newstyles_acc: gr.update(open=True),
+                style_acc: gr.update(visible=True),
+                run_style_btn: gr.update(visible=True),
+                clear_style_btn: gr.update(visible=True),
+                list_acc: gr.update(open=True),
+            }
+            newstyles_update = newstyles_fn()
+            # misapplied_styles = misapplied_styles_fn()
+            # for val in misapplied_styles.values():
+            #     if val > 0:
+            #         doc = list(misapplied_styles.keys())[list(misapplied_styles.values()).index(val)]
+            #         gr.Warning(f"{val} paragraphs were detected in the document {doc.name} because their styles are not well applied. Please review your document for better results.")
+            update_.update(newstyles_update)
+            return update_
+        input_files_comp.upload(input_files_upload_fn,
+                                inputs=[input_files_comp],
+                                outputs=[style_acc, newstyles_acc, run_style_btn, clear_style_btn, list_acc] + newstyle_comps + list_style_comps
+                                )
+        def input_file_clear_fn():
+            controller.clear_docs()
+            update_ = {
+                options_btn: gr.update(value=[]),
+                log_comp: gr.update(value="", visible=False),
+                output_styles_files_comp: gr.update(value=[], visible=False),
+                newstyles_acc: gr.update(open=False),
+                style_acc: gr.update(open=False),
+                gen_acc: gr.update(open=False),
+                output_files_comp: gr.update(visible=False),
+                run_style_btn: gr.update(visible=False),
+                clear_style_btn: gr.update(visible=False),
+                list_acc: gr.update(open=False),
+                exigences_acc: gr.update(value=""),
+            }
+            newstyles_update_ = newstyles_reset()
+            list_style_update_ = newliststyle_reset()
+            update_.update(newstyles_update_)
+            update_.update(list_style_update_)
+            return update_
+        input_files_comp.clear(
+            input_file_clear_fn,
+            inputs=[],
+            outputs=[options_btn, output_styles_files_comp, output_files_comp, log_comp, newstyles_acc, list_acc,
+                     gen_acc, style_acc, run_style_btn, clear_style_btn, exigences_acc] + newstyle_comps + list_style_comps
+        )
+        def misapplied_styles_fn():
+            res = controller.retrieve_number_of_misapplied_styles()
+            return res
+        def newstyles_fn():
+            update_ = {}
+            update_.update(newliststyle_reset())
+            update_.update(newstyles_reset())
+            different_styles, all_template_styles = controller.get_difference_with_template()
+            all_template_styles_names = [style.name for style in all_template_styles]
+            list_styles_to_update = controller.get_list_styles()
+            get_label_list = lambda i: f"style: {list_styles_to_update[i]['list_style']}"
+            list_style_update_ = {
+                list_style_comps[i]: gr.update(visible=i < len(list_styles_to_update),
+                                                  choices=sorted(all_template_styles_names, key=lambda x: distance(x, list_styles_to_update[i]['list_style'])),
+                                                  value=None,
+                                                  label=get_label_list(i)) if i < len(list_styles_to_update) else ''
+                for i in range(config['max_styles'])
+            }
+            update_.update(list_style_update_)
+            #delete styles in different_styles that are already in list_styles_to_update
+            different_styles = delete_duplicate_styles(list_styles_to_update, different_styles)
+            adapted_template_styles = []
+            for i in range(len(different_styles)):
+                adapted_template_styles.append([style.name for style in all_template_styles if style.type == different_styles[i]['style'].type])
+            get_label = lambda i: f"style: {different_styles[i]['style'].name}"
+            newstyles_update_ = {
+                newstyle_comps[i]: gr.update(visible=i < len(different_styles),
+                                             #sort the styles using levenstein distance function
+                                             choices=sorted(adapted_template_styles[i], key=lambda x: distance(x, different_styles[i]['style'].name)),
+                                             value=None,
+                                             label=get_label(i)) if i < len(different_styles) else ''
+                for i in range(len(different_styles))
+            }
+            update_.update(newstyles_update_)
+            return update_
+        def newliststyle_reset():
+            update_ = {
+                list_style_comps[i]: gr.update(visible=False,
+                                                  choices=[],
+                                                  value=None,
+                                                  label='')
+                for i in range(config['max_styles'])
+            }
+            return update_
+        def newstyles_reset():
+            update_ = {
+                newstyle_comps[i]: gr.update(visible=False,
+                                             choices=[],
+                                             value=None,
+                                             label='')
+                for i in range(config['max_styles'])
+            }
+            return update_
+        def templates_fn(templates_):
+            controller.set_template(templates_)
+            update_ = newstyles_fn()
+            return update_
+        templates_radio.change(templates_fn,
+                               inputs=[templates_radio],
+                               outputs=[newstyles_acc, list_acc] + newstyle_comps + list_style_comps)
+        def newstyle_fns(src_index: int):
+            def newstyle_fn(newstyle_):
+                controller.update_style(src_index, newstyle_)
+            return newstyle_fn
+        def change_list_style_fn(src_index: int):
+            def change_list_style_fn(list_style_):
+                controller.update_list_style(src_index, list_style_)
+            return change_list_style_fn
+        def add_template_fn(template):
+            controller.add_template(template)
+            update_ = {
+                templates_radio: gr.update(choices=[t for t in os.listdir(config['templates_path']) if t.endswith((".docx"))]),
+                templates_radio_modif: gr.update(choices=[t for t in os.listdir(config['templates_path']) if t.endswith((".docx"))]),
+            }
+            return update_
+        def delete_curr_template_fn(template):
+            controller.delete_curr_template(template)
+            update_ = {
+                templates_radio: gr.update(choices=[t for t in os.listdir(config['templates_path']) if t.endswith((".docx"))]),
+                templates_radio_modif: gr.update(choices=[t for t in os.listdir(config['templates_path']) if t.endswith((".docx"))]),
+                options_btn: gr.update(value=[]),
+                log_comp: gr.update(value="", visible=False),
+                output_styles_files_comp: gr.update(value=[], visible=False),
+                newstyles_acc: gr.update(open=False),
+                run_style_btn: gr.update(visible=True),
+                list_acc: gr.update(open=False),
+            }
+            return update_
+        add_template_btn.upload(add_template_fn,
+                                inputs=[add_template_btn],
+                                outputs=[templates_radio,templates_radio_modif])
+        delete_curr_template_btn.click(delete_curr_template_fn,
+                                       inputs=[templates_radio],
+                                       outputs=[templates_radio, options_btn, log_comp, output_styles_files_comp, newstyles_acc, run_style_btn, list_acc, templates_radio_modif])
+        for src_index, newstyle_comp in enumerate(newstyle_comps):
+            newstyle_comp.input(newstyle_fns(src_index), inputs=[newstyle_comp], outputs=[],show_progress="full")
+        for src_index, list_style_comp in enumerate(list_style_comps):
+            list_style_comp.input(change_list_style_fn(src_index), inputs=[list_style_comp], outputs=[],show_progress="full")
+        def clear_style_fn(input_files_):
+            controller.clear_docs()
+            if input_files_:
+                controller.copy_docs(input_files_)
+            controller.set_template()
+            update_ = {
+                options_btn: gr.update(value=[]),
+                log_comp: gr.update(value="", visible=False),
+                output_styles_files_comp: gr.update(value=[], visible=False),
+                newstyles_acc: gr.update(open=False),
+                run_style_btn: gr.update(visible=True),
+                list_acc: gr.update(open=False),
+                templates_radio: gr.update(value=config['templates'][config['default_template_index']]),
+            }
+            newstyles_update_ = newstyles_fn()
+            update_.update(newstyles_update_)
+            return update_
+        clear_style_btn.click(clear_style_fn,
+                              inputs=[input_files_comp],
+                              outputs=[options_btn, output_styles_files_comp, log_comp, newstyles_acc, list_acc, run_style_btn, templates_radio]
+                                      + newstyle_comps + list_style_comps
+                              )
+        def run_style_fn(options_btn_):
+            print(f"options activated : {options_btn_}")
+            controller.apply_template(options_btn_)
+            log = controller.get_log()
+            new_docs_path = controller.generated_docs_path
+            output_paths = [f"{new_docs_path}/{f}" for f in os.listdir(new_docs_path)]
+            print(f"output_paths: {output_paths}")
+            update_ = {
+                log_comp: gr.update(value=log, visible=True),
+                output_styles_files_comp: gr.update(value=output_paths, visible=True),
+                run_style_btn: gr.update(visible=False),
+            }
+            return update_
+        run_style_btn.click(run_style_fn,
+                            inputs=[options_btn],
+                            outputs=[log_comp, output_styles_files_comp, run_style_btn] + newstyle_comps, show_progress="full")
+        """
+        =====================================================
+        Generation functions
+        ====================
+        """
+        def generate_option_fn(db_collection_):
+            id_ = controller.get_or_create_collection(db_collection_)
+            update_ = {
+                db_collection_var: id_,
+            }
+            return update_
+        def wiki_fetch1_fn():
+            """
+            fetch the wikifiles interesting for solving the tasks as defined in the input doc
+            """
+            update_ = {
+                wiki_list_comp: gr.update(visible=True),
+            }
+            return update_
+        async def wiki_fetch2_fn():
+            """
+            fetch the wikifiles interesting for solving the tasks as defined in the input doc
+            """
+            wiki_interesting_files = await controller.wiki_fetch()
+            print(f"wiki_interesting_files: {wiki_interesting_files}")
+            wiki_files = wiki_interesting_files  # [w for w in wiki_interesting_files if w not in wiki_db_files_]
+            update_ = {
+                wiki_list_comp: gr.update(visible=True, value=[], choices=wiki_files),
+                wiki_source_var: wiki_interesting_files,
+                wiki_add_to_db_btn: gr.update(visible=True),
+                # wiki_clear_btn: gr.update(visible=True), #Button to clear the choices that are by default all ticked
+            }
+            return update_
+        async def wiki_add_to_db_fn(wiki_list_, wiki_source_, wiki_db_, db_list_, db_collection_):
+            """
+            adds the wikipages to the db source
+            """
+            wiki_to_add = [wiki for wiki in wiki_list_ if wiki not in wiki_db_]
+            db_list_ += wiki_to_add
+            wiki_db_ += wiki_to_add
+            wiki_source_remaining = [wiki for wiki in wiki_source_ if wiki not in wiki_db_]
+            async_upload_and_store_tasks = [asyncio.create_task(controller.wiki_upload_and_store(wiki, db_collection_)) for wiki in wiki_to_add] # A DEPLACER DANS LE CONTROLLER
+            await asyncio.gather(*async_upload_and_store_tasks)
+            db_not_empty = 0 < len(db_list_)
+            wiki_to_add_not_empty = 0 < len(wiki_source_remaining)
+            update_ = {
+                wiki_db_var: wiki_db_,
+                wiki_list_comp: gr.update(value=[], choices=wiki_source_remaining),
+                wiki_add_to_db_btn: gr.update(visible=wiki_to_add_not_empty),
+                db_list_comp: gr.update(
+                    visible=True,
+                    value=db_list_,
+                    choices=db_list_,
+                    label="Database content"),
+                db_reset_btn: gr.update(visible=db_not_empty),
+                generate_btn: gr.update(visible=True, interactive=db_not_empty),
+            }
+            return update_
+        def generate_fn1():
+            update_ = {
+                output_files_comp: gr.update(visible=True)
+            }
+            return update_
+        async def generate_fn2(db_collection_, db_list_):
+            output_files = await controller.generate_doc_from_db(collection_name=db_collection_,
+                                                           from_files=db_list_)
+            update_ = {
+                output_files_comp: gr.update(value=output_files, visible=True),
+            }
+            return update_
+        """
+        =====================================================
+        Generation listeners
+        ====================
+        """
+        wiki_fetch_btn \
+            .click(wiki_fetch1_fn, inputs=[], outputs=[wiki_list_comp]) \
+            .then(wiki_fetch2_fn,
+                  inputs=[],
+                  outputs=[wiki_list_comp, wiki_source_var, wiki_add_to_db_btn])
+        wiki_add_to_db_btn\
+            .click(generate_option_fn,
+                                  inputs=[db_collection_var],
+                                  outputs=[db_collection_var])\
+            .then(wiki_add_to_db_fn,
+                                 inputs=[wiki_list_comp, wiki_source_var, wiki_db_var, db_list_comp, db_collection_var],
+                                 outputs=[db_list_comp, wiki_list_comp, wiki_db_var,
+                                          generate_btn, wiki_add_to_db_btn, db_reset_btn])
+        generate_btn\
+            .click(generate_fn1,
+                   inputs=[],
+                   outputs=[output_files_comp])\
+            .then(generate_fn2,
+                   inputs=[db_collection_var, db_list_comp],
+                   outputs=[output_files_comp])
+        """
+        =====================================================
+        Clear and rerun functions and listeners
+        =======================================
+        """
+        def clear_fn():
+            update_ = {
+                input_files_comp: gr.update(value=None),
+                output_files_comp: gr.update(value=None, visible=False),
+                clear_btn: gr.update(visible=False),
+                rerun_btn: gr.update(visible=False),
+            }
+            return update_
+        clear_btn.click(clear_fn,
+                        inputs=[],
+                        outputs=[input_files_comp, output_files_comp, clear_btn, rerun_btn])
+        # wiki_clear_btn.click(clear_choices_fn, inputs=[], outputs=[wiki_list_comp]) #listener for the clear button of the wiki choices
+    return formatdoc

temp/generated_files/file.txt ADDED Viewed

File without changes